1 // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 // 3 // Portions Copyright 2017 The Chromium OS Authors. All rights reserved. 4 // Use of this source code is governed by a BSD-style license that can be 5 // found in the LICENSE-BSD-3-Clause file. 6 // 7 // Copyright © 2019 Intel Corporation 8 // 9 // SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause 10 // 11 12 use crate::config::{ 13 ConsoleOutputMode, DeviceConfig, DiskConfig, FsConfig, NetConfig, PmemConfig, UserDeviceConfig, 14 VdpaConfig, VhostMode, VmConfig, VsockConfig, 15 }; 16 use crate::cpu::{CpuManager, CPU_MANAGER_ACPI_SIZE}; 17 use crate::device_tree::{DeviceNode, DeviceTree}; 18 use crate::interrupt::LegacyUserspaceInterruptManager; 19 use crate::interrupt::MsiInterruptManager; 20 use crate::memory_manager::{Error as MemoryManagerError, MemoryManager, MEMORY_MANAGER_ACPI_SIZE}; 21 use crate::pci_segment::PciSegment; 22 use crate::seccomp_filters::{get_seccomp_filter, Thread}; 23 use crate::serial_manager::{Error as SerialManagerError, SerialManager}; 24 use crate::sigwinch_listener::start_sigwinch_listener; 25 use crate::GuestRegionMmap; 26 use crate::PciDeviceInfo; 27 use crate::{device_node, DEVICE_MANAGER_SNAPSHOT_ID}; 28 use acpi_tables::sdt::GenericAddress; 29 use acpi_tables::{aml, Aml}; 30 use anyhow::anyhow; 31 use arch::layout; 32 #[cfg(target_arch = "x86_64")] 33 use arch::layout::{APIC_START, IOAPIC_SIZE, IOAPIC_START}; 34 use arch::NumaNodes; 35 #[cfg(target_arch = "aarch64")] 36 use arch::{DeviceType, MmioDeviceInfo}; 37 use block::{ 38 async_io::DiskFile, block_aio_is_supported, block_io_uring_is_supported, detect_image_type, 39 fixed_vhd_sync::FixedVhdDiskSync, qcow, qcow_sync::QcowDiskSync, raw_async_aio::RawFileDiskAio, 40 raw_sync::RawFileDiskSync, vhdx, vhdx_sync::VhdxDiskSync, ImageType, 41 }; 42 #[cfg(feature = "io_uring")] 43 use block::{fixed_vhd_async::FixedVhdDiskAsync, raw_async::RawFileDisk}; 44 #[cfg(target_arch = "aarch64")] 45 use devices::gic; 46 #[cfg(target_arch = "x86_64")] 47 use devices::ioapic; 48 #[cfg(target_arch = "aarch64")] 49 use devices::legacy::Pl011; 50 #[cfg(target_arch = "x86_64")] 51 use devices::legacy::Serial; 52 use devices::{ 53 interrupt_controller, interrupt_controller::InterruptController, AcpiNotificationFlags, 54 }; 55 use hypervisor::{HypervisorType, IoEventAddress}; 56 use libc::{ 57 cfmakeraw, isatty, tcgetattr, tcsetattr, termios, MAP_NORESERVE, MAP_PRIVATE, MAP_SHARED, 58 O_TMPFILE, PROT_READ, PROT_WRITE, TCSANOW, 59 }; 60 use pci::{ 61 DeviceRelocation, PciBarRegionType, PciBdf, PciDevice, VfioPciDevice, VfioUserDmaMapping, 62 VfioUserPciDevice, VfioUserPciDeviceError, 63 }; 64 use rate_limiter::group::RateLimiterGroup; 65 use seccompiler::SeccompAction; 66 use serde::{Deserialize, Serialize}; 67 use std::collections::{BTreeSet, HashMap}; 68 use std::fs::{read_link, File, OpenOptions}; 69 use std::io::{self, stdout, Seek, SeekFrom}; 70 use std::mem::zeroed; 71 use std::num::Wrapping; 72 use std::os::unix::fs::OpenOptionsExt; 73 use std::os::unix::io::{AsRawFd, FromRawFd, RawFd}; 74 use std::path::PathBuf; 75 use std::result; 76 use std::sync::{Arc, Mutex}; 77 use std::time::Instant; 78 use tracer::trace_scoped; 79 use vfio_ioctls::{VfioContainer, VfioDevice, VfioDeviceFd}; 80 use virtio_devices::transport::VirtioTransport; 81 use virtio_devices::transport::{VirtioPciDevice, VirtioPciDeviceActivator}; 82 use virtio_devices::vhost_user::VhostUserConfig; 83 use virtio_devices::{ 84 AccessPlatformMapping, ActivateError, VdpaDmaMapping, VirtioMemMappingSource, 85 }; 86 use virtio_devices::{Endpoint, IommuMapping}; 87 use vm_allocator::{AddressAllocator, SystemAllocator}; 88 use vm_device::dma_mapping::vfio::VfioDmaMapping; 89 use vm_device::dma_mapping::ExternalDmaMapping; 90 use vm_device::interrupt::{ 91 InterruptIndex, InterruptManager, LegacyIrqGroupConfig, MsiIrqGroupConfig, 92 }; 93 use vm_device::{Bus, BusDevice, Resource}; 94 use vm_memory::guest_memory::FileOffset; 95 use vm_memory::GuestMemoryRegion; 96 use vm_memory::{Address, GuestAddress, GuestUsize, MmapRegion}; 97 #[cfg(target_arch = "x86_64")] 98 use vm_memory::{GuestAddressSpace, GuestMemory}; 99 use vm_migration::{ 100 protocol::MemoryRangeTable, snapshot_from_id, versioned_state_from_id, Migratable, 101 MigratableError, Pausable, Snapshot, SnapshotData, Snapshottable, Transportable, 102 }; 103 use vm_virtio::AccessPlatform; 104 use vm_virtio::VirtioDeviceType; 105 use vmm_sys_util::eventfd::EventFd; 106 107 #[cfg(target_arch = "aarch64")] 108 const MMIO_LEN: u64 = 0x1000; 109 110 // Singleton devices / devices the user cannot name 111 #[cfg(target_arch = "x86_64")] 112 const IOAPIC_DEVICE_NAME: &str = "__ioapic"; 113 const SERIAL_DEVICE_NAME: &str = "__serial"; 114 #[cfg(target_arch = "aarch64")] 115 const GPIO_DEVICE_NAME: &str = "__gpio"; 116 const RNG_DEVICE_NAME: &str = "__rng"; 117 const IOMMU_DEVICE_NAME: &str = "__iommu"; 118 const BALLOON_DEVICE_NAME: &str = "__balloon"; 119 const CONSOLE_DEVICE_NAME: &str = "__console"; 120 const PVPANIC_DEVICE_NAME: &str = "__pvpanic"; 121 122 // Devices that the user may name and for which we generate 123 // identifiers if the user doesn't give one 124 const DISK_DEVICE_NAME_PREFIX: &str = "_disk"; 125 const FS_DEVICE_NAME_PREFIX: &str = "_fs"; 126 const NET_DEVICE_NAME_PREFIX: &str = "_net"; 127 const PMEM_DEVICE_NAME_PREFIX: &str = "_pmem"; 128 const VDPA_DEVICE_NAME_PREFIX: &str = "_vdpa"; 129 const VSOCK_DEVICE_NAME_PREFIX: &str = "_vsock"; 130 const WATCHDOG_DEVICE_NAME: &str = "__watchdog"; 131 const VFIO_DEVICE_NAME_PREFIX: &str = "_vfio"; 132 const VFIO_USER_DEVICE_NAME_PREFIX: &str = "_vfio_user"; 133 const VIRTIO_PCI_DEVICE_NAME_PREFIX: &str = "_virtio-pci"; 134 135 /// Errors associated with device manager 136 #[derive(Debug)] 137 pub enum DeviceManagerError { 138 /// Cannot create EventFd. 139 EventFd(io::Error), 140 141 /// Cannot open disk path 142 Disk(io::Error), 143 144 /// Cannot create vhost-user-net device 145 CreateVhostUserNet(virtio_devices::vhost_user::Error), 146 147 /// Cannot create virtio-blk device 148 CreateVirtioBlock(io::Error), 149 150 /// Cannot create virtio-net device 151 CreateVirtioNet(virtio_devices::net::Error), 152 153 /// Cannot create virtio-console device 154 CreateVirtioConsole(io::Error), 155 156 /// Cannot create virtio-rng device 157 CreateVirtioRng(io::Error), 158 159 /// Cannot create virtio-fs device 160 CreateVirtioFs(virtio_devices::vhost_user::Error), 161 162 /// Virtio-fs device was created without a socket. 163 NoVirtioFsSock, 164 165 /// Cannot create vhost-user-blk device 166 CreateVhostUserBlk(virtio_devices::vhost_user::Error), 167 168 /// Cannot create virtio-pmem device 169 CreateVirtioPmem(io::Error), 170 171 /// Cannot create vDPA device 172 CreateVdpa(virtio_devices::vdpa::Error), 173 174 /// Cannot create virtio-vsock device 175 CreateVirtioVsock(io::Error), 176 177 /// Cannot create tpm device 178 CreateTpmDevice(anyhow::Error), 179 180 /// Failed to convert Path to &str for the vDPA device. 181 CreateVdpaConvertPath, 182 183 /// Failed to convert Path to &str for the virtio-vsock device. 184 CreateVsockConvertPath, 185 186 /// Cannot create virtio-vsock backend 187 CreateVsockBackend(virtio_devices::vsock::VsockUnixError), 188 189 /// Cannot create virtio-iommu device 190 CreateVirtioIommu(io::Error), 191 192 /// Cannot create virtio-balloon device 193 CreateVirtioBalloon(io::Error), 194 195 /// Cannot create virtio-watchdog device 196 CreateVirtioWatchdog(io::Error), 197 198 /// Failed to parse disk image format 199 DetectImageType(io::Error), 200 201 /// Cannot open qcow disk path 202 QcowDeviceCreate(qcow::Error), 203 204 /// Cannot create serial manager 205 CreateSerialManager(SerialManagerError), 206 207 /// Cannot spawn the serial manager thread 208 SpawnSerialManager(SerialManagerError), 209 210 /// Cannot open tap interface 211 OpenTap(net_util::TapError), 212 213 /// Cannot allocate IRQ. 214 AllocateIrq, 215 216 /// Cannot configure the IRQ. 217 Irq(vmm_sys_util::errno::Error), 218 219 /// Cannot allocate PCI BARs 220 AllocateBars(pci::PciDeviceError), 221 222 /// Could not free the BARs associated with a PCI device. 223 FreePciBars(pci::PciDeviceError), 224 225 /// Cannot register ioevent. 226 RegisterIoevent(anyhow::Error), 227 228 /// Cannot unregister ioevent. 229 UnRegisterIoevent(anyhow::Error), 230 231 /// Cannot create virtio device 232 VirtioDevice(virtio_devices::transport::VirtioPciDeviceError), 233 234 /// Cannot add PCI device 235 AddPciDevice(pci::PciRootError), 236 237 /// Cannot open persistent memory file 238 PmemFileOpen(io::Error), 239 240 /// Cannot set persistent memory file size 241 PmemFileSetLen(io::Error), 242 243 /// Cannot find a memory range for persistent memory 244 PmemRangeAllocation, 245 246 /// Cannot find a memory range for virtio-fs 247 FsRangeAllocation, 248 249 /// Error creating serial output file 250 SerialOutputFileOpen(io::Error), 251 252 /// Error creating console output file 253 ConsoleOutputFileOpen(io::Error), 254 255 /// Error creating serial pty 256 SerialPtyOpen(io::Error), 257 258 /// Error creating console pty 259 ConsolePtyOpen(io::Error), 260 261 /// Error setting pty raw mode 262 SetPtyRaw(vmm_sys_util::errno::Error), 263 264 /// Error getting pty peer 265 GetPtyPeer(vmm_sys_util::errno::Error), 266 267 /// Cannot create a VFIO device 268 VfioCreate(vfio_ioctls::VfioError), 269 270 /// Cannot create a VFIO PCI device 271 VfioPciCreate(pci::VfioPciError), 272 273 /// Failed to map VFIO MMIO region. 274 VfioMapRegion(pci::VfioPciError), 275 276 /// Failed to DMA map VFIO device. 277 VfioDmaMap(vfio_ioctls::VfioError), 278 279 /// Failed to DMA unmap VFIO device. 280 VfioDmaUnmap(pci::VfioPciError), 281 282 /// Failed to create the passthrough device. 283 CreatePassthroughDevice(anyhow::Error), 284 285 /// Failed to memory map. 286 Mmap(io::Error), 287 288 /// Cannot add legacy device to Bus. 289 BusError(vm_device::BusError), 290 291 /// Failed to allocate IO port 292 AllocateIoPort, 293 294 /// Failed to allocate MMIO address 295 AllocateMmioAddress, 296 297 /// Failed to make hotplug notification 298 HotPlugNotification(io::Error), 299 300 /// Error from a memory manager operation 301 MemoryManager(MemoryManagerError), 302 303 /// Failed to create new interrupt source group. 304 CreateInterruptGroup(io::Error), 305 306 /// Failed to update interrupt source group. 307 UpdateInterruptGroup(io::Error), 308 309 /// Failed to create interrupt controller. 310 CreateInterruptController(interrupt_controller::Error), 311 312 /// Failed to create a new MmapRegion instance. 313 NewMmapRegion(vm_memory::mmap::MmapRegionError), 314 315 /// Failed to clone a File. 316 CloneFile(io::Error), 317 318 /// Failed to create socket file 319 CreateSocketFile(io::Error), 320 321 /// Failed to spawn the network backend 322 SpawnNetBackend(io::Error), 323 324 /// Failed to spawn the block backend 325 SpawnBlockBackend(io::Error), 326 327 /// Missing PCI bus. 328 NoPciBus, 329 330 /// Could not find an available device name. 331 NoAvailableDeviceName, 332 333 /// Missing PCI device. 334 MissingPciDevice, 335 336 /// Failed to remove a PCI device from the PCI bus. 337 RemoveDeviceFromPciBus(pci::PciRootError), 338 339 /// Failed to remove a bus device from the IO bus. 340 RemoveDeviceFromIoBus(vm_device::BusError), 341 342 /// Failed to remove a bus device from the MMIO bus. 343 RemoveDeviceFromMmioBus(vm_device::BusError), 344 345 /// Failed to find the device corresponding to a specific PCI b/d/f. 346 UnknownPciBdf(u32), 347 348 /// Not allowed to remove this type of device from the VM. 349 RemovalNotAllowed(vm_virtio::VirtioDeviceType), 350 351 /// Failed to find device corresponding to the given identifier. 352 UnknownDeviceId(String), 353 354 /// Failed to find an available PCI device ID. 355 NextPciDeviceId(pci::PciRootError), 356 357 /// Could not reserve the PCI device ID. 358 GetPciDeviceId(pci::PciRootError), 359 360 /// Could not give the PCI device ID back. 361 PutPciDeviceId(pci::PciRootError), 362 363 /// No disk path was specified when one was expected 364 NoDiskPath, 365 366 /// Failed to update guest memory for virtio device. 367 UpdateMemoryForVirtioDevice(virtio_devices::Error), 368 369 /// Cannot create virtio-mem device 370 CreateVirtioMem(io::Error), 371 372 /// Cannot find a memory range for virtio-mem memory 373 VirtioMemRangeAllocation, 374 375 /// Failed to update guest memory for VFIO PCI device. 376 UpdateMemoryForVfioPciDevice(vfio_ioctls::VfioError), 377 378 /// Trying to use a directory for pmem but no size specified 379 PmemWithDirectorySizeMissing, 380 381 /// Trying to use a size that is not multiple of 2MiB 382 PmemSizeNotAligned, 383 384 /// Could not find the node in the device tree. 385 MissingNode, 386 387 /// Resource was already found. 388 ResourceAlreadyExists, 389 390 /// Expected resources for virtio-pmem could not be found. 391 MissingVirtioPmemResources, 392 393 /// Missing PCI b/d/f from the DeviceNode. 394 MissingDeviceNodePciBdf, 395 396 /// No support for device passthrough 397 NoDevicePassthroughSupport, 398 399 /// No socket option support for console device 400 NoSocketOptionSupportForConsoleDevice, 401 402 /// Failed to resize virtio-balloon 403 VirtioBalloonResize(virtio_devices::balloon::Error), 404 405 /// Missing virtio-balloon, can't proceed as expected. 406 MissingVirtioBalloon, 407 408 /// Missing virtual IOMMU device 409 MissingVirtualIommu, 410 411 /// Failed to do power button notification 412 PowerButtonNotification(io::Error), 413 414 /// Failed to do AArch64 GPIO power button notification 415 #[cfg(target_arch = "aarch64")] 416 AArch64PowerButtonNotification(devices::legacy::GpioDeviceError), 417 418 /// Failed to set O_DIRECT flag to file descriptor 419 SetDirectIo, 420 421 /// Failed to create FixedVhdDiskAsync 422 CreateFixedVhdDiskAsync(io::Error), 423 424 /// Failed to create FixedVhdDiskSync 425 CreateFixedVhdDiskSync(io::Error), 426 427 /// Failed to create QcowDiskSync 428 CreateQcowDiskSync(qcow::Error), 429 430 /// Failed to create FixedVhdxDiskSync 431 CreateFixedVhdxDiskSync(vhdx::VhdxError), 432 433 /// Failed to add DMA mapping handler to virtio-mem device. 434 AddDmaMappingHandlerVirtioMem(virtio_devices::mem::Error), 435 436 /// Failed to remove DMA mapping handler from virtio-mem device. 437 RemoveDmaMappingHandlerVirtioMem(virtio_devices::mem::Error), 438 439 /// Failed to create vfio-user client 440 VfioUserCreateClient(vfio_user::Error), 441 442 /// Failed to create VFIO user device 443 VfioUserCreate(VfioUserPciDeviceError), 444 445 /// Failed to map region from VFIO user device into guest 446 VfioUserMapRegion(VfioUserPciDeviceError), 447 448 /// Failed to DMA map VFIO user device. 449 VfioUserDmaMap(VfioUserPciDeviceError), 450 451 /// Failed to DMA unmap VFIO user device. 452 VfioUserDmaUnmap(VfioUserPciDeviceError), 453 454 /// Failed to update memory mappings for VFIO user device 455 UpdateMemoryForVfioUserPciDevice(VfioUserPciDeviceError), 456 457 /// Cannot duplicate file descriptor 458 DupFd(vmm_sys_util::errno::Error), 459 460 /// Failed to DMA map virtio device. 461 VirtioDmaMap(std::io::Error), 462 463 /// Failed to DMA unmap virtio device. 464 VirtioDmaUnmap(std::io::Error), 465 466 /// Cannot hotplug device behind vIOMMU 467 InvalidIommuHotplug, 468 469 /// Invalid identifier as it is not unique. 470 IdentifierNotUnique(String), 471 472 /// Invalid identifier 473 InvalidIdentifier(String), 474 475 /// Error activating virtio device 476 VirtioActivate(ActivateError), 477 478 /// Failed retrieving device state from snapshot 479 RestoreGetState(MigratableError), 480 481 /// Cannot create a PvPanic device 482 PvPanicCreate(devices::pvpanic::PvPanicError), 483 484 /// Cannot create a RateLimiterGroup 485 RateLimiterGroupCreate(rate_limiter::group::Error), 486 } 487 488 pub type DeviceManagerResult<T> = result::Result<T, DeviceManagerError>; 489 490 const DEVICE_MANAGER_ACPI_SIZE: usize = 0x10; 491 492 const TIOCSPTLCK: libc::c_int = 0x4004_5431; 493 const TIOCGTPEER: libc::c_int = 0x5441; 494 495 pub fn create_pty() -> io::Result<(File, File, PathBuf)> { 496 // Try to use /dev/pts/ptmx first then fall back to /dev/ptmx 497 // This is done to try and use the devpts filesystem that 498 // could be available for use in the process's namespace first. 499 // Ideally these are all the same file though but different 500 // kernels could have things setup differently. 501 // See https://www.kernel.org/doc/Documentation/filesystems/devpts.txt 502 // for further details. 503 504 let custom_flags = libc::O_NONBLOCK; 505 let main = match OpenOptions::new() 506 .read(true) 507 .write(true) 508 .custom_flags(custom_flags) 509 .open("/dev/pts/ptmx") 510 { 511 Ok(f) => f, 512 _ => OpenOptions::new() 513 .read(true) 514 .write(true) 515 .custom_flags(custom_flags) 516 .open("/dev/ptmx")?, 517 }; 518 let mut unlock: libc::c_ulong = 0; 519 // SAFETY: FFI call into libc, trivially safe 520 unsafe { libc::ioctl(main.as_raw_fd(), TIOCSPTLCK as _, &mut unlock) }; 521 522 // SAFETY: FFI call into libc, trivially safe 523 let sub_fd = unsafe { 524 libc::ioctl( 525 main.as_raw_fd(), 526 TIOCGTPEER as _, 527 libc::O_NOCTTY | libc::O_RDWR, 528 ) 529 }; 530 if sub_fd == -1 { 531 return vmm_sys_util::errno::errno_result().map_err(|e| e.into()); 532 } 533 534 let proc_path = PathBuf::from(format!("/proc/self/fd/{sub_fd}")); 535 let path = read_link(proc_path)?; 536 537 // SAFETY: sub_fd is checked to be valid before being wrapped in File 538 Ok((main, unsafe { File::from_raw_fd(sub_fd) }, path)) 539 } 540 541 #[derive(Default)] 542 pub struct Console { 543 console_resizer: Option<Arc<virtio_devices::ConsoleResizer>>, 544 } 545 546 impl Console { 547 pub fn need_resize(&self) -> bool { 548 if let Some(_resizer) = self.console_resizer.as_ref() { 549 return true; 550 } 551 552 false 553 } 554 555 pub fn update_console_size(&self) { 556 if let Some(resizer) = self.console_resizer.as_ref() { 557 resizer.update_console_size() 558 } 559 } 560 } 561 562 pub(crate) struct AddressManager { 563 pub(crate) allocator: Arc<Mutex<SystemAllocator>>, 564 #[cfg(target_arch = "x86_64")] 565 pub(crate) io_bus: Arc<Bus>, 566 pub(crate) mmio_bus: Arc<Bus>, 567 pub(crate) vm: Arc<dyn hypervisor::Vm>, 568 device_tree: Arc<Mutex<DeviceTree>>, 569 pci_mmio32_allocators: Vec<Arc<Mutex<AddressAllocator>>>, 570 pci_mmio64_allocators: Vec<Arc<Mutex<AddressAllocator>>>, 571 } 572 573 impl DeviceRelocation for AddressManager { 574 fn move_bar( 575 &self, 576 old_base: u64, 577 new_base: u64, 578 len: u64, 579 pci_dev: &mut dyn PciDevice, 580 region_type: PciBarRegionType, 581 ) -> std::result::Result<(), std::io::Error> { 582 match region_type { 583 PciBarRegionType::IoRegion => { 584 #[cfg(target_arch = "x86_64")] 585 { 586 // Update system allocator 587 self.allocator 588 .lock() 589 .unwrap() 590 .free_io_addresses(GuestAddress(old_base), len as GuestUsize); 591 592 self.allocator 593 .lock() 594 .unwrap() 595 .allocate_io_addresses( 596 Some(GuestAddress(new_base)), 597 len as GuestUsize, 598 None, 599 ) 600 .ok_or_else(|| { 601 io::Error::new(io::ErrorKind::Other, "failed allocating new IO range") 602 })?; 603 604 // Update PIO bus 605 self.io_bus 606 .update_range(old_base, len, new_base, len) 607 .map_err(|e| io::Error::new(io::ErrorKind::Other, e))?; 608 } 609 #[cfg(target_arch = "aarch64")] 610 error!("I/O region is not supported"); 611 } 612 PciBarRegionType::Memory32BitRegion | PciBarRegionType::Memory64BitRegion => { 613 let allocators = if region_type == PciBarRegionType::Memory32BitRegion { 614 &self.pci_mmio32_allocators 615 } else { 616 &self.pci_mmio64_allocators 617 }; 618 619 // Find the specific allocator that this BAR was allocated from and use it for new one 620 for allocator in allocators { 621 let allocator_base = allocator.lock().unwrap().base(); 622 let allocator_end = allocator.lock().unwrap().end(); 623 624 if old_base >= allocator_base.0 && old_base <= allocator_end.0 { 625 allocator 626 .lock() 627 .unwrap() 628 .free(GuestAddress(old_base), len as GuestUsize); 629 630 allocator 631 .lock() 632 .unwrap() 633 .allocate(Some(GuestAddress(new_base)), len as GuestUsize, Some(len)) 634 .ok_or_else(|| { 635 io::Error::new( 636 io::ErrorKind::Other, 637 "failed allocating new MMIO range", 638 ) 639 })?; 640 641 break; 642 } 643 } 644 645 // Update MMIO bus 646 self.mmio_bus 647 .update_range(old_base, len, new_base, len) 648 .map_err(|e| io::Error::new(io::ErrorKind::Other, e))?; 649 } 650 } 651 652 // Update the device_tree resources associated with the device 653 if let Some(id) = pci_dev.id() { 654 if let Some(node) = self.device_tree.lock().unwrap().get_mut(&id) { 655 let mut resource_updated = false; 656 for resource in node.resources.iter_mut() { 657 if let Resource::PciBar { base, type_, .. } = resource { 658 if PciBarRegionType::from(*type_) == region_type && *base == old_base { 659 *base = new_base; 660 resource_updated = true; 661 break; 662 } 663 } 664 } 665 666 if !resource_updated { 667 return Err(io::Error::new( 668 io::ErrorKind::Other, 669 format!( 670 "Couldn't find a resource with base 0x{old_base:x} for device {id}" 671 ), 672 )); 673 } 674 } else { 675 return Err(io::Error::new( 676 io::ErrorKind::Other, 677 format!("Couldn't find device {id} from device tree"), 678 )); 679 } 680 } 681 682 let any_dev = pci_dev.as_any(); 683 if let Some(virtio_pci_dev) = any_dev.downcast_ref::<VirtioPciDevice>() { 684 let bar_addr = virtio_pci_dev.config_bar_addr(); 685 if bar_addr == new_base { 686 for (event, addr) in virtio_pci_dev.ioeventfds(old_base) { 687 let io_addr = IoEventAddress::Mmio(addr); 688 self.vm.unregister_ioevent(event, &io_addr).map_err(|e| { 689 io::Error::new( 690 io::ErrorKind::Other, 691 format!("failed to unregister ioevent: {e:?}"), 692 ) 693 })?; 694 } 695 for (event, addr) in virtio_pci_dev.ioeventfds(new_base) { 696 let io_addr = IoEventAddress::Mmio(addr); 697 self.vm 698 .register_ioevent(event, &io_addr, None) 699 .map_err(|e| { 700 io::Error::new( 701 io::ErrorKind::Other, 702 format!("failed to register ioevent: {e:?}"), 703 ) 704 })?; 705 } 706 } else { 707 let virtio_dev = virtio_pci_dev.virtio_device(); 708 let mut virtio_dev = virtio_dev.lock().unwrap(); 709 if let Some(mut shm_regions) = virtio_dev.get_shm_regions() { 710 if shm_regions.addr.raw_value() == old_base { 711 let mem_region = self.vm.make_user_memory_region( 712 shm_regions.mem_slot, 713 old_base, 714 shm_regions.len, 715 shm_regions.host_addr, 716 false, 717 false, 718 ); 719 720 self.vm.remove_user_memory_region(mem_region).map_err(|e| { 721 io::Error::new( 722 io::ErrorKind::Other, 723 format!("failed to remove user memory region: {e:?}"), 724 ) 725 })?; 726 727 // Create new mapping by inserting new region to KVM. 728 let mem_region = self.vm.make_user_memory_region( 729 shm_regions.mem_slot, 730 new_base, 731 shm_regions.len, 732 shm_regions.host_addr, 733 false, 734 false, 735 ); 736 737 self.vm.create_user_memory_region(mem_region).map_err(|e| { 738 io::Error::new( 739 io::ErrorKind::Other, 740 format!("failed to create user memory regions: {e:?}"), 741 ) 742 })?; 743 744 // Update shared memory regions to reflect the new mapping. 745 shm_regions.addr = GuestAddress(new_base); 746 virtio_dev.set_shm_regions(shm_regions).map_err(|e| { 747 io::Error::new( 748 io::ErrorKind::Other, 749 format!("failed to update shared memory regions: {e:?}"), 750 ) 751 })?; 752 } 753 } 754 } 755 } 756 757 pci_dev.move_bar(old_base, new_base) 758 } 759 } 760 761 #[derive(Serialize, Deserialize)] 762 struct DeviceManagerState { 763 device_tree: DeviceTree, 764 device_id_cnt: Wrapping<usize>, 765 } 766 767 #[derive(Debug)] 768 pub struct PtyPair { 769 pub main: File, 770 pub path: PathBuf, 771 } 772 773 impl Clone for PtyPair { 774 fn clone(&self) -> Self { 775 PtyPair { 776 main: self.main.try_clone().unwrap(), 777 path: self.path.clone(), 778 } 779 } 780 } 781 782 #[derive(Clone)] 783 pub enum PciDeviceHandle { 784 Vfio(Arc<Mutex<VfioPciDevice>>), 785 Virtio(Arc<Mutex<VirtioPciDevice>>), 786 VfioUser(Arc<Mutex<VfioUserPciDevice>>), 787 } 788 789 #[derive(Clone)] 790 struct MetaVirtioDevice { 791 virtio_device: Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 792 iommu: bool, 793 id: String, 794 pci_segment: u16, 795 dma_handler: Option<Arc<dyn ExternalDmaMapping>>, 796 } 797 798 #[derive(Default)] 799 pub struct AcpiPlatformAddresses { 800 pub pm_timer_address: Option<GenericAddress>, 801 pub reset_reg_address: Option<GenericAddress>, 802 pub sleep_control_reg_address: Option<GenericAddress>, 803 pub sleep_status_reg_address: Option<GenericAddress>, 804 } 805 806 pub struct DeviceManager { 807 // The underlying hypervisor 808 hypervisor_type: HypervisorType, 809 810 // Manage address space related to devices 811 address_manager: Arc<AddressManager>, 812 813 // Console abstraction 814 console: Arc<Console>, 815 816 // console PTY 817 console_pty: Option<Arc<Mutex<PtyPair>>>, 818 819 // serial PTY 820 serial_pty: Option<Arc<Mutex<PtyPair>>>, 821 822 // Serial Manager 823 serial_manager: Option<Arc<SerialManager>>, 824 825 // pty foreground status, 826 console_resize_pipe: Option<Arc<File>>, 827 828 // To restore on exit. 829 original_termios_opt: Arc<Mutex<Option<termios>>>, 830 831 // Interrupt controller 832 #[cfg(target_arch = "x86_64")] 833 interrupt_controller: Option<Arc<Mutex<ioapic::Ioapic>>>, 834 #[cfg(target_arch = "aarch64")] 835 interrupt_controller: Option<Arc<Mutex<gic::Gic>>>, 836 837 // Things to be added to the commandline (e.g. aarch64 early console) 838 #[cfg(target_arch = "aarch64")] 839 cmdline_additions: Vec<String>, 840 841 // ACPI GED notification device 842 ged_notification_device: Option<Arc<Mutex<devices::AcpiGedDevice>>>, 843 844 // VM configuration 845 config: Arc<Mutex<VmConfig>>, 846 847 // Memory Manager 848 memory_manager: Arc<Mutex<MemoryManager>>, 849 850 // CPU Manager 851 cpu_manager: Arc<Mutex<CpuManager>>, 852 853 // The virtio devices on the system 854 virtio_devices: Vec<MetaVirtioDevice>, 855 856 // List of bus devices 857 // Let the DeviceManager keep strong references to the BusDevice devices. 858 // This allows the IO and MMIO buses to be provided with Weak references, 859 // which prevents cyclic dependencies. 860 bus_devices: Vec<Arc<Mutex<dyn BusDevice>>>, 861 862 // Counter to keep track of the consumed device IDs. 863 device_id_cnt: Wrapping<usize>, 864 865 pci_segments: Vec<PciSegment>, 866 867 #[cfg_attr(target_arch = "aarch64", allow(dead_code))] 868 // MSI Interrupt Manager 869 msi_interrupt_manager: Arc<dyn InterruptManager<GroupConfig = MsiIrqGroupConfig>>, 870 871 #[cfg_attr(feature = "mshv", allow(dead_code))] 872 // Legacy Interrupt Manager 873 legacy_interrupt_manager: Option<Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>>, 874 875 // Passthrough device handle 876 passthrough_device: Option<VfioDeviceFd>, 877 878 // VFIO container 879 // Only one container can be created, therefore it is stored as part of the 880 // DeviceManager to be reused. 881 vfio_container: Option<Arc<VfioContainer>>, 882 883 // Paravirtualized IOMMU 884 iommu_device: Option<Arc<Mutex<virtio_devices::Iommu>>>, 885 iommu_mapping: Option<Arc<IommuMapping>>, 886 887 // PCI information about devices attached to the paravirtualized IOMMU 888 // It contains the virtual IOMMU PCI BDF along with the list of PCI BDF 889 // representing the devices attached to the virtual IOMMU. This is useful 890 // information for filling the ACPI VIOT table. 891 iommu_attached_devices: Option<(PciBdf, Vec<PciBdf>)>, 892 893 // Tree of devices, representing the dependencies between devices. 894 // Useful for introspection, snapshot and restore. 895 device_tree: Arc<Mutex<DeviceTree>>, 896 897 // Exit event 898 exit_evt: EventFd, 899 reset_evt: EventFd, 900 901 #[cfg(target_arch = "aarch64")] 902 id_to_dev_info: HashMap<(DeviceType, String), MmioDeviceInfo>, 903 904 // seccomp action 905 seccomp_action: SeccompAction, 906 907 // List of guest NUMA nodes. 908 numa_nodes: NumaNodes, 909 910 // Possible handle to the virtio-balloon device 911 balloon: Option<Arc<Mutex<virtio_devices::Balloon>>>, 912 913 // Virtio Device activation EventFd to allow the VMM thread to trigger device 914 // activation and thus start the threads from the VMM thread 915 activate_evt: EventFd, 916 917 acpi_address: GuestAddress, 918 919 selected_segment: usize, 920 921 // Possible handle to the virtio-mem device 922 virtio_mem_devices: Vec<Arc<Mutex<virtio_devices::Mem>>>, 923 924 #[cfg(target_arch = "aarch64")] 925 // GPIO device for AArch64 926 gpio_device: Option<Arc<Mutex<devices::legacy::Gpio>>>, 927 928 // pvpanic device 929 pvpanic_device: Option<Arc<Mutex<devices::PvPanicDevice>>>, 930 931 // Flag to force setting the iommu on virtio devices 932 force_iommu: bool, 933 934 // io_uring availability if detected 935 io_uring_supported: Option<bool>, 936 937 // aio availability if detected 938 aio_supported: Option<bool>, 939 940 // List of unique identifiers provided at boot through the configuration. 941 boot_id_list: BTreeSet<String>, 942 943 // Start time of the VM 944 timestamp: Instant, 945 946 // Pending activations 947 pending_activations: Arc<Mutex<Vec<VirtioPciDeviceActivator>>>, 948 949 // Addresses for ACPI platform devices e.g. ACPI PM timer, sleep/reset registers 950 acpi_platform_addresses: AcpiPlatformAddresses, 951 952 snapshot: Option<Snapshot>, 953 954 rate_limit_groups: HashMap<String, Arc<RateLimiterGroup>>, 955 } 956 957 impl DeviceManager { 958 #[allow(clippy::too_many_arguments)] 959 pub fn new( 960 #[cfg(target_arch = "x86_64")] io_bus: Arc<Bus>, 961 mmio_bus: Arc<Bus>, 962 hypervisor_type: HypervisorType, 963 vm: Arc<dyn hypervisor::Vm>, 964 config: Arc<Mutex<VmConfig>>, 965 memory_manager: Arc<Mutex<MemoryManager>>, 966 cpu_manager: Arc<Mutex<CpuManager>>, 967 exit_evt: EventFd, 968 reset_evt: EventFd, 969 seccomp_action: SeccompAction, 970 numa_nodes: NumaNodes, 971 activate_evt: &EventFd, 972 force_iommu: bool, 973 boot_id_list: BTreeSet<String>, 974 timestamp: Instant, 975 snapshot: Option<Snapshot>, 976 dynamic: bool, 977 ) -> DeviceManagerResult<Arc<Mutex<Self>>> { 978 trace_scoped!("DeviceManager::new"); 979 980 let (device_tree, device_id_cnt) = if let Some(snapshot) = snapshot.as_ref() { 981 let state: DeviceManagerState = snapshot.to_state().unwrap(); 982 ( 983 Arc::new(Mutex::new(state.device_tree.clone())), 984 state.device_id_cnt, 985 ) 986 } else { 987 (Arc::new(Mutex::new(DeviceTree::new())), Wrapping(0)) 988 }; 989 990 let num_pci_segments = 991 if let Some(platform_config) = config.lock().unwrap().platform.as_ref() { 992 platform_config.num_pci_segments 993 } else { 994 1 995 }; 996 997 let create_mmio_allocators = |start, end, num_pci_segments, alignment| { 998 // Start each PCI segment mmio range on an aligned boundary 999 let pci_segment_mmio_size = 1000 (end - start + 1) / (alignment * num_pci_segments as u64) * alignment; 1001 1002 let mut mmio_allocators = vec![]; 1003 for i in 0..num_pci_segments as u64 { 1004 let mmio_start = start + i * pci_segment_mmio_size; 1005 let allocator = Arc::new(Mutex::new( 1006 AddressAllocator::new(GuestAddress(mmio_start), pci_segment_mmio_size).unwrap(), 1007 )); 1008 mmio_allocators.push(allocator) 1009 } 1010 1011 mmio_allocators 1012 }; 1013 1014 let start_of_mmio32_area = layout::MEM_32BIT_DEVICES_START.0; 1015 let end_of_mmio32_area = layout::MEM_32BIT_DEVICES_START.0 + layout::MEM_32BIT_DEVICES_SIZE; 1016 let pci_mmio32_allocators = create_mmio_allocators( 1017 start_of_mmio32_area, 1018 end_of_mmio32_area, 1019 num_pci_segments, 1020 4 << 10, 1021 ); 1022 1023 let start_of_mmio64_area = memory_manager.lock().unwrap().start_of_device_area().0; 1024 let end_of_mmio64_area = memory_manager.lock().unwrap().end_of_device_area().0; 1025 let pci_mmio64_allocators = create_mmio_allocators( 1026 start_of_mmio64_area, 1027 end_of_mmio64_area, 1028 num_pci_segments, 1029 4 << 30, 1030 ); 1031 1032 let address_manager = Arc::new(AddressManager { 1033 allocator: memory_manager.lock().unwrap().allocator(), 1034 #[cfg(target_arch = "x86_64")] 1035 io_bus, 1036 mmio_bus, 1037 vm: vm.clone(), 1038 device_tree: Arc::clone(&device_tree), 1039 pci_mmio32_allocators, 1040 pci_mmio64_allocators, 1041 }); 1042 1043 // First we create the MSI interrupt manager, the legacy one is created 1044 // later, after the IOAPIC device creation. 1045 // The reason we create the MSI one first is because the IOAPIC needs it, 1046 // and then the legacy interrupt manager needs an IOAPIC. So we're 1047 // handling a linear dependency chain: 1048 // msi_interrupt_manager <- IOAPIC <- legacy_interrupt_manager. 1049 let msi_interrupt_manager: Arc<dyn InterruptManager<GroupConfig = MsiIrqGroupConfig>> = 1050 Arc::new(MsiInterruptManager::new( 1051 Arc::clone(&address_manager.allocator), 1052 vm, 1053 )); 1054 1055 let acpi_address = address_manager 1056 .allocator 1057 .lock() 1058 .unwrap() 1059 .allocate_platform_mmio_addresses(None, DEVICE_MANAGER_ACPI_SIZE as u64, None) 1060 .ok_or(DeviceManagerError::AllocateIoPort)?; 1061 1062 let mut pci_irq_slots = [0; 32]; 1063 PciSegment::reserve_legacy_interrupts_for_pci_devices( 1064 &address_manager, 1065 &mut pci_irq_slots, 1066 )?; 1067 1068 let mut pci_segments = vec![PciSegment::new_default_segment( 1069 &address_manager, 1070 Arc::clone(&address_manager.pci_mmio32_allocators[0]), 1071 Arc::clone(&address_manager.pci_mmio64_allocators[0]), 1072 &pci_irq_slots, 1073 )?]; 1074 1075 for i in 1..num_pci_segments as usize { 1076 pci_segments.push(PciSegment::new( 1077 i as u16, 1078 numa_node_id_from_pci_segment_id(&numa_nodes, i as u16), 1079 &address_manager, 1080 Arc::clone(&address_manager.pci_mmio32_allocators[i]), 1081 Arc::clone(&address_manager.pci_mmio64_allocators[i]), 1082 &pci_irq_slots, 1083 )?); 1084 } 1085 1086 if dynamic { 1087 let acpi_address = address_manager 1088 .allocator 1089 .lock() 1090 .unwrap() 1091 .allocate_platform_mmio_addresses(None, CPU_MANAGER_ACPI_SIZE as u64, None) 1092 .ok_or(DeviceManagerError::AllocateMmioAddress)?; 1093 1094 address_manager 1095 .mmio_bus 1096 .insert( 1097 cpu_manager.clone(), 1098 acpi_address.0, 1099 CPU_MANAGER_ACPI_SIZE as u64, 1100 ) 1101 .map_err(DeviceManagerError::BusError)?; 1102 1103 cpu_manager.lock().unwrap().set_acpi_address(acpi_address); 1104 } 1105 1106 let mut rate_limit_groups = HashMap::<String, Arc<RateLimiterGroup>>::new(); 1107 if let Some(rate_limit_groups_cfg) = config.lock().unwrap().rate_limit_groups.as_ref() { 1108 for rate_limit_group_cfg in rate_limit_groups_cfg { 1109 let rate_limit_cfg = rate_limit_group_cfg.rate_limiter_config; 1110 let bw = rate_limit_cfg.bandwidth.unwrap_or_default(); 1111 let ops = rate_limit_cfg.ops.unwrap_or_default(); 1112 let mut rate_limit_group = RateLimiterGroup::new( 1113 &rate_limit_group_cfg.id, 1114 bw.size, 1115 bw.one_time_burst.unwrap_or(0), 1116 bw.refill_time, 1117 ops.size, 1118 ops.one_time_burst.unwrap_or(0), 1119 ops.refill_time, 1120 ) 1121 .map_err(DeviceManagerError::RateLimiterGroupCreate)?; 1122 1123 let exit_evt = exit_evt.try_clone().map_err(DeviceManagerError::EventFd)?; 1124 1125 rate_limit_group.start_thread(exit_evt).unwrap(); 1126 rate_limit_groups 1127 .insert(rate_limit_group_cfg.id.clone(), Arc::new(rate_limit_group)); 1128 } 1129 } 1130 1131 let device_manager = DeviceManager { 1132 hypervisor_type, 1133 address_manager: Arc::clone(&address_manager), 1134 console: Arc::new(Console::default()), 1135 interrupt_controller: None, 1136 #[cfg(target_arch = "aarch64")] 1137 cmdline_additions: Vec::new(), 1138 ged_notification_device: None, 1139 config, 1140 memory_manager, 1141 cpu_manager, 1142 virtio_devices: Vec::new(), 1143 bus_devices: Vec::new(), 1144 device_id_cnt, 1145 msi_interrupt_manager, 1146 legacy_interrupt_manager: None, 1147 passthrough_device: None, 1148 vfio_container: None, 1149 iommu_device: None, 1150 iommu_mapping: None, 1151 iommu_attached_devices: None, 1152 pci_segments, 1153 device_tree, 1154 exit_evt, 1155 reset_evt, 1156 #[cfg(target_arch = "aarch64")] 1157 id_to_dev_info: HashMap::new(), 1158 seccomp_action, 1159 numa_nodes, 1160 balloon: None, 1161 activate_evt: activate_evt 1162 .try_clone() 1163 .map_err(DeviceManagerError::EventFd)?, 1164 acpi_address, 1165 selected_segment: 0, 1166 serial_pty: None, 1167 serial_manager: None, 1168 console_pty: None, 1169 console_resize_pipe: None, 1170 original_termios_opt: Arc::new(Mutex::new(None)), 1171 virtio_mem_devices: Vec::new(), 1172 #[cfg(target_arch = "aarch64")] 1173 gpio_device: None, 1174 pvpanic_device: None, 1175 force_iommu, 1176 io_uring_supported: None, 1177 aio_supported: None, 1178 boot_id_list, 1179 timestamp, 1180 pending_activations: Arc::new(Mutex::new(Vec::default())), 1181 acpi_platform_addresses: AcpiPlatformAddresses::default(), 1182 snapshot, 1183 rate_limit_groups, 1184 }; 1185 1186 let device_manager = Arc::new(Mutex::new(device_manager)); 1187 1188 address_manager 1189 .mmio_bus 1190 .insert( 1191 Arc::clone(&device_manager) as Arc<Mutex<dyn BusDevice>>, 1192 acpi_address.0, 1193 DEVICE_MANAGER_ACPI_SIZE as u64, 1194 ) 1195 .map_err(DeviceManagerError::BusError)?; 1196 1197 Ok(device_manager) 1198 } 1199 1200 pub fn serial_pty(&self) -> Option<PtyPair> { 1201 self.serial_pty 1202 .as_ref() 1203 .map(|pty| pty.lock().unwrap().clone()) 1204 } 1205 1206 pub fn console_pty(&self) -> Option<PtyPair> { 1207 self.console_pty 1208 .as_ref() 1209 .map(|pty| pty.lock().unwrap().clone()) 1210 } 1211 1212 pub fn console_resize_pipe(&self) -> Option<Arc<File>> { 1213 self.console_resize_pipe.as_ref().map(Arc::clone) 1214 } 1215 1216 pub fn create_devices( 1217 &mut self, 1218 serial_pty: Option<PtyPair>, 1219 console_pty: Option<PtyPair>, 1220 console_resize_pipe: Option<File>, 1221 original_termios_opt: Arc<Mutex<Option<termios>>>, 1222 ) -> DeviceManagerResult<()> { 1223 trace_scoped!("create_devices"); 1224 1225 let mut virtio_devices: Vec<MetaVirtioDevice> = Vec::new(); 1226 1227 let interrupt_controller = self.add_interrupt_controller()?; 1228 1229 self.cpu_manager 1230 .lock() 1231 .unwrap() 1232 .set_interrupt_controller(interrupt_controller.clone()); 1233 1234 // Now we can create the legacy interrupt manager, which needs the freshly 1235 // formed IOAPIC device. 1236 let legacy_interrupt_manager: Arc< 1237 dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>, 1238 > = Arc::new(LegacyUserspaceInterruptManager::new(Arc::clone( 1239 &interrupt_controller, 1240 ))); 1241 1242 { 1243 if let Some(acpi_address) = self.memory_manager.lock().unwrap().acpi_address() { 1244 self.address_manager 1245 .mmio_bus 1246 .insert( 1247 Arc::clone(&self.memory_manager) as Arc<Mutex<dyn BusDevice>>, 1248 acpi_address.0, 1249 MEMORY_MANAGER_ACPI_SIZE as u64, 1250 ) 1251 .map_err(DeviceManagerError::BusError)?; 1252 } 1253 } 1254 1255 #[cfg(target_arch = "x86_64")] 1256 self.add_legacy_devices( 1257 self.reset_evt 1258 .try_clone() 1259 .map_err(DeviceManagerError::EventFd)?, 1260 )?; 1261 1262 #[cfg(target_arch = "aarch64")] 1263 self.add_legacy_devices(&legacy_interrupt_manager)?; 1264 1265 { 1266 self.ged_notification_device = self.add_acpi_devices( 1267 &legacy_interrupt_manager, 1268 self.reset_evt 1269 .try_clone() 1270 .map_err(DeviceManagerError::EventFd)?, 1271 self.exit_evt 1272 .try_clone() 1273 .map_err(DeviceManagerError::EventFd)?, 1274 )?; 1275 } 1276 1277 self.original_termios_opt = original_termios_opt; 1278 1279 self.console = self.add_console_device( 1280 &legacy_interrupt_manager, 1281 &mut virtio_devices, 1282 serial_pty, 1283 console_pty, 1284 console_resize_pipe, 1285 )?; 1286 1287 if let Some(tpm) = self.config.clone().lock().unwrap().tpm.as_ref() { 1288 let tpm_dev = self.add_tpm_device(tpm.socket.clone())?; 1289 self.bus_devices 1290 .push(Arc::clone(&tpm_dev) as Arc<Mutex<dyn BusDevice>>) 1291 } 1292 self.legacy_interrupt_manager = Some(legacy_interrupt_manager); 1293 1294 virtio_devices.append(&mut self.make_virtio_devices()?); 1295 1296 self.add_pci_devices(virtio_devices.clone())?; 1297 1298 self.virtio_devices = virtio_devices; 1299 1300 if self.config.clone().lock().unwrap().pvpanic { 1301 self.pvpanic_device = self.add_pvpanic_device()?; 1302 } 1303 1304 Ok(()) 1305 } 1306 1307 fn state(&self) -> DeviceManagerState { 1308 DeviceManagerState { 1309 device_tree: self.device_tree.lock().unwrap().clone(), 1310 device_id_cnt: self.device_id_cnt, 1311 } 1312 } 1313 1314 fn get_msi_iova_space(&mut self) -> (u64, u64) { 1315 #[cfg(target_arch = "aarch64")] 1316 { 1317 let vcpus = self.config.lock().unwrap().cpus.boot_vcpus; 1318 let vgic_config = gic::Gic::create_default_config(vcpus.into()); 1319 ( 1320 vgic_config.msi_addr, 1321 vgic_config.msi_addr + vgic_config.msi_size - 1, 1322 ) 1323 } 1324 #[cfg(target_arch = "x86_64")] 1325 (0xfee0_0000, 0xfeef_ffff) 1326 } 1327 1328 #[cfg(target_arch = "aarch64")] 1329 /// Gets the information of the devices registered up to some point in time. 1330 pub fn get_device_info(&self) -> &HashMap<(DeviceType, String), MmioDeviceInfo> { 1331 &self.id_to_dev_info 1332 } 1333 1334 #[allow(unused_variables)] 1335 fn add_pci_devices( 1336 &mut self, 1337 virtio_devices: Vec<MetaVirtioDevice>, 1338 ) -> DeviceManagerResult<()> { 1339 let iommu_id = String::from(IOMMU_DEVICE_NAME); 1340 1341 let iommu_device = if self.config.lock().unwrap().iommu { 1342 let (device, mapping) = virtio_devices::Iommu::new( 1343 iommu_id.clone(), 1344 self.seccomp_action.clone(), 1345 self.exit_evt 1346 .try_clone() 1347 .map_err(DeviceManagerError::EventFd)?, 1348 self.get_msi_iova_space(), 1349 versioned_state_from_id(self.snapshot.as_ref(), iommu_id.as_str()) 1350 .map_err(DeviceManagerError::RestoreGetState)?, 1351 ) 1352 .map_err(DeviceManagerError::CreateVirtioIommu)?; 1353 let device = Arc::new(Mutex::new(device)); 1354 self.iommu_device = Some(Arc::clone(&device)); 1355 self.iommu_mapping = Some(mapping); 1356 1357 // Fill the device tree with a new node. In case of restore, we 1358 // know there is nothing to do, so we can simply override the 1359 // existing entry. 1360 self.device_tree 1361 .lock() 1362 .unwrap() 1363 .insert(iommu_id.clone(), device_node!(iommu_id, device)); 1364 1365 Some(device) 1366 } else { 1367 None 1368 }; 1369 1370 let mut iommu_attached_devices = Vec::new(); 1371 { 1372 for handle in virtio_devices { 1373 let mapping: Option<Arc<IommuMapping>> = if handle.iommu { 1374 self.iommu_mapping.clone() 1375 } else { 1376 None 1377 }; 1378 1379 let dev_id = self.add_virtio_pci_device( 1380 handle.virtio_device, 1381 &mapping, 1382 handle.id, 1383 handle.pci_segment, 1384 handle.dma_handler, 1385 )?; 1386 1387 if handle.iommu { 1388 iommu_attached_devices.push(dev_id); 1389 } 1390 } 1391 1392 let mut vfio_iommu_device_ids = self.add_vfio_devices()?; 1393 iommu_attached_devices.append(&mut vfio_iommu_device_ids); 1394 1395 let mut vfio_user_iommu_device_ids = self.add_user_devices()?; 1396 iommu_attached_devices.append(&mut vfio_user_iommu_device_ids); 1397 1398 // Add all devices from forced iommu segments 1399 if let Some(platform_config) = self.config.lock().unwrap().platform.as_ref() { 1400 if let Some(iommu_segments) = platform_config.iommu_segments.as_ref() { 1401 for segment in iommu_segments { 1402 for device in 0..32 { 1403 let bdf = PciBdf::new(*segment, 0, device, 0); 1404 if !iommu_attached_devices.contains(&bdf) { 1405 iommu_attached_devices.push(bdf); 1406 } 1407 } 1408 } 1409 } 1410 } 1411 1412 if let Some(iommu_device) = iommu_device { 1413 let dev_id = self.add_virtio_pci_device(iommu_device, &None, iommu_id, 0, None)?; 1414 self.iommu_attached_devices = Some((dev_id, iommu_attached_devices)); 1415 } 1416 } 1417 1418 for segment in &self.pci_segments { 1419 #[cfg(target_arch = "x86_64")] 1420 if let Some(pci_config_io) = segment.pci_config_io.as_ref() { 1421 self.bus_devices 1422 .push(Arc::clone(pci_config_io) as Arc<Mutex<dyn BusDevice>>); 1423 } 1424 1425 self.bus_devices 1426 .push(Arc::clone(&segment.pci_config_mmio) as Arc<Mutex<dyn BusDevice>>); 1427 } 1428 1429 Ok(()) 1430 } 1431 1432 #[cfg(target_arch = "aarch64")] 1433 fn add_interrupt_controller( 1434 &mut self, 1435 ) -> DeviceManagerResult<Arc<Mutex<dyn InterruptController>>> { 1436 let interrupt_controller: Arc<Mutex<gic::Gic>> = Arc::new(Mutex::new( 1437 gic::Gic::new( 1438 self.config.lock().unwrap().cpus.boot_vcpus, 1439 Arc::clone(&self.msi_interrupt_manager), 1440 self.address_manager.vm.clone(), 1441 ) 1442 .map_err(DeviceManagerError::CreateInterruptController)?, 1443 )); 1444 1445 self.interrupt_controller = Some(interrupt_controller.clone()); 1446 1447 // Restore the vGic if this is in the process of restoration 1448 let id = String::from(gic::GIC_SNAPSHOT_ID); 1449 if let Some(vgic_snapshot) = snapshot_from_id(self.snapshot.as_ref(), &id) { 1450 // PMU support is optional. Nothing should be impacted if the PMU initialization failed. 1451 if self 1452 .cpu_manager 1453 .lock() 1454 .unwrap() 1455 .init_pmu(arch::aarch64::fdt::AARCH64_PMU_IRQ + 16) 1456 .is_err() 1457 { 1458 info!("Failed to initialize PMU"); 1459 } 1460 1461 let vgic_state = vgic_snapshot 1462 .to_state() 1463 .map_err(DeviceManagerError::RestoreGetState)?; 1464 let saved_vcpu_states = self.cpu_manager.lock().unwrap().get_saved_states(); 1465 interrupt_controller 1466 .lock() 1467 .unwrap() 1468 .restore_vgic(vgic_state, &saved_vcpu_states) 1469 .unwrap(); 1470 } 1471 1472 self.device_tree 1473 .lock() 1474 .unwrap() 1475 .insert(id.clone(), device_node!(id, interrupt_controller)); 1476 1477 Ok(interrupt_controller) 1478 } 1479 1480 #[cfg(target_arch = "aarch64")] 1481 pub fn get_interrupt_controller(&mut self) -> Option<&Arc<Mutex<gic::Gic>>> { 1482 self.interrupt_controller.as_ref() 1483 } 1484 1485 #[cfg(target_arch = "x86_64")] 1486 fn add_interrupt_controller( 1487 &mut self, 1488 ) -> DeviceManagerResult<Arc<Mutex<dyn InterruptController>>> { 1489 let id = String::from(IOAPIC_DEVICE_NAME); 1490 1491 // Create IOAPIC 1492 let interrupt_controller = Arc::new(Mutex::new( 1493 ioapic::Ioapic::new( 1494 id.clone(), 1495 APIC_START, 1496 Arc::clone(&self.msi_interrupt_manager), 1497 versioned_state_from_id(self.snapshot.as_ref(), id.as_str()) 1498 .map_err(DeviceManagerError::RestoreGetState)?, 1499 ) 1500 .map_err(DeviceManagerError::CreateInterruptController)?, 1501 )); 1502 1503 self.interrupt_controller = Some(interrupt_controller.clone()); 1504 1505 self.address_manager 1506 .mmio_bus 1507 .insert(interrupt_controller.clone(), IOAPIC_START.0, IOAPIC_SIZE) 1508 .map_err(DeviceManagerError::BusError)?; 1509 1510 self.bus_devices 1511 .push(Arc::clone(&interrupt_controller) as Arc<Mutex<dyn BusDevice>>); 1512 1513 // Fill the device tree with a new node. In case of restore, we 1514 // know there is nothing to do, so we can simply override the 1515 // existing entry. 1516 self.device_tree 1517 .lock() 1518 .unwrap() 1519 .insert(id.clone(), device_node!(id, interrupt_controller)); 1520 1521 Ok(interrupt_controller) 1522 } 1523 1524 fn add_acpi_devices( 1525 &mut self, 1526 interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>, 1527 reset_evt: EventFd, 1528 exit_evt: EventFd, 1529 ) -> DeviceManagerResult<Option<Arc<Mutex<devices::AcpiGedDevice>>>> { 1530 let vcpus_kill_signalled = self 1531 .cpu_manager 1532 .lock() 1533 .unwrap() 1534 .vcpus_kill_signalled() 1535 .clone(); 1536 let shutdown_device = Arc::new(Mutex::new(devices::AcpiShutdownDevice::new( 1537 exit_evt, 1538 reset_evt, 1539 vcpus_kill_signalled, 1540 ))); 1541 1542 self.bus_devices 1543 .push(Arc::clone(&shutdown_device) as Arc<Mutex<dyn BusDevice>>); 1544 1545 #[cfg(target_arch = "x86_64")] 1546 { 1547 let shutdown_pio_address: u16 = 0x600; 1548 1549 self.address_manager 1550 .allocator 1551 .lock() 1552 .unwrap() 1553 .allocate_io_addresses(Some(GuestAddress(shutdown_pio_address.into())), 0x8, None) 1554 .ok_or(DeviceManagerError::AllocateIoPort)?; 1555 1556 self.address_manager 1557 .io_bus 1558 .insert(shutdown_device, shutdown_pio_address.into(), 0x4) 1559 .map_err(DeviceManagerError::BusError)?; 1560 1561 self.acpi_platform_addresses.sleep_control_reg_address = 1562 Some(GenericAddress::io_port_address::<u8>(shutdown_pio_address)); 1563 self.acpi_platform_addresses.sleep_status_reg_address = 1564 Some(GenericAddress::io_port_address::<u8>(shutdown_pio_address)); 1565 self.acpi_platform_addresses.reset_reg_address = 1566 Some(GenericAddress::io_port_address::<u8>(shutdown_pio_address)); 1567 } 1568 1569 let ged_irq = self 1570 .address_manager 1571 .allocator 1572 .lock() 1573 .unwrap() 1574 .allocate_irq() 1575 .unwrap(); 1576 let interrupt_group = interrupt_manager 1577 .create_group(LegacyIrqGroupConfig { 1578 irq: ged_irq as InterruptIndex, 1579 }) 1580 .map_err(DeviceManagerError::CreateInterruptGroup)?; 1581 let ged_address = self 1582 .address_manager 1583 .allocator 1584 .lock() 1585 .unwrap() 1586 .allocate_platform_mmio_addresses( 1587 None, 1588 devices::acpi::GED_DEVICE_ACPI_SIZE as u64, 1589 None, 1590 ) 1591 .ok_or(DeviceManagerError::AllocateMmioAddress)?; 1592 let ged_device = Arc::new(Mutex::new(devices::AcpiGedDevice::new( 1593 interrupt_group, 1594 ged_irq, 1595 ged_address, 1596 ))); 1597 self.address_manager 1598 .mmio_bus 1599 .insert( 1600 ged_device.clone(), 1601 ged_address.0, 1602 devices::acpi::GED_DEVICE_ACPI_SIZE as u64, 1603 ) 1604 .map_err(DeviceManagerError::BusError)?; 1605 self.bus_devices 1606 .push(Arc::clone(&ged_device) as Arc<Mutex<dyn BusDevice>>); 1607 1608 let pm_timer_device = Arc::new(Mutex::new(devices::AcpiPmTimerDevice::new())); 1609 1610 self.bus_devices 1611 .push(Arc::clone(&pm_timer_device) as Arc<Mutex<dyn BusDevice>>); 1612 1613 #[cfg(target_arch = "x86_64")] 1614 { 1615 let pm_timer_pio_address: u16 = 0x608; 1616 1617 self.address_manager 1618 .allocator 1619 .lock() 1620 .unwrap() 1621 .allocate_io_addresses(Some(GuestAddress(pm_timer_pio_address.into())), 0x4, None) 1622 .ok_or(DeviceManagerError::AllocateIoPort)?; 1623 1624 self.address_manager 1625 .io_bus 1626 .insert(pm_timer_device, pm_timer_pio_address.into(), 0x4) 1627 .map_err(DeviceManagerError::BusError)?; 1628 1629 self.acpi_platform_addresses.pm_timer_address = 1630 Some(GenericAddress::io_port_address::<u32>(pm_timer_pio_address)); 1631 } 1632 1633 Ok(Some(ged_device)) 1634 } 1635 1636 #[cfg(target_arch = "x86_64")] 1637 fn add_legacy_devices(&mut self, reset_evt: EventFd) -> DeviceManagerResult<()> { 1638 let vcpus_kill_signalled = self 1639 .cpu_manager 1640 .lock() 1641 .unwrap() 1642 .vcpus_kill_signalled() 1643 .clone(); 1644 // Add a shutdown device (i8042) 1645 let i8042 = Arc::new(Mutex::new(devices::legacy::I8042Device::new( 1646 reset_evt.try_clone().unwrap(), 1647 vcpus_kill_signalled.clone(), 1648 ))); 1649 1650 self.bus_devices 1651 .push(Arc::clone(&i8042) as Arc<Mutex<dyn BusDevice>>); 1652 1653 self.address_manager 1654 .io_bus 1655 .insert(i8042, 0x61, 0x4) 1656 .map_err(DeviceManagerError::BusError)?; 1657 { 1658 // Add a CMOS emulated device 1659 let mem_size = self 1660 .memory_manager 1661 .lock() 1662 .unwrap() 1663 .guest_memory() 1664 .memory() 1665 .last_addr() 1666 .0 1667 + 1; 1668 let mem_below_4g = std::cmp::min(arch::layout::MEM_32BIT_RESERVED_START.0, mem_size); 1669 let mem_above_4g = mem_size.saturating_sub(arch::layout::RAM_64BIT_START.0); 1670 1671 let cmos = Arc::new(Mutex::new(devices::legacy::Cmos::new( 1672 mem_below_4g, 1673 mem_above_4g, 1674 reset_evt, 1675 Some(vcpus_kill_signalled), 1676 ))); 1677 1678 self.bus_devices 1679 .push(Arc::clone(&cmos) as Arc<Mutex<dyn BusDevice>>); 1680 1681 self.address_manager 1682 .io_bus 1683 .insert(cmos, 0x70, 0x2) 1684 .map_err(DeviceManagerError::BusError)?; 1685 1686 let fwdebug = Arc::new(Mutex::new(devices::legacy::FwDebugDevice::new())); 1687 1688 self.bus_devices 1689 .push(Arc::clone(&fwdebug) as Arc<Mutex<dyn BusDevice>>); 1690 1691 self.address_manager 1692 .io_bus 1693 .insert(fwdebug, 0x402, 0x1) 1694 .map_err(DeviceManagerError::BusError)?; 1695 } 1696 1697 // 0x80 debug port 1698 let debug_port = Arc::new(Mutex::new(devices::legacy::DebugPort::new(self.timestamp))); 1699 self.bus_devices 1700 .push(Arc::clone(&debug_port) as Arc<Mutex<dyn BusDevice>>); 1701 self.address_manager 1702 .io_bus 1703 .insert(debug_port, 0x80, 0x1) 1704 .map_err(DeviceManagerError::BusError)?; 1705 1706 Ok(()) 1707 } 1708 1709 #[cfg(target_arch = "aarch64")] 1710 fn add_legacy_devices( 1711 &mut self, 1712 interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>, 1713 ) -> DeviceManagerResult<()> { 1714 // Add a RTC device 1715 let rtc_irq = self 1716 .address_manager 1717 .allocator 1718 .lock() 1719 .unwrap() 1720 .allocate_irq() 1721 .unwrap(); 1722 1723 let interrupt_group = interrupt_manager 1724 .create_group(LegacyIrqGroupConfig { 1725 irq: rtc_irq as InterruptIndex, 1726 }) 1727 .map_err(DeviceManagerError::CreateInterruptGroup)?; 1728 1729 let rtc_device = Arc::new(Mutex::new(devices::legacy::Rtc::new(interrupt_group))); 1730 1731 self.bus_devices 1732 .push(Arc::clone(&rtc_device) as Arc<Mutex<dyn BusDevice>>); 1733 1734 let addr = arch::layout::LEGACY_RTC_MAPPED_IO_START; 1735 1736 self.address_manager 1737 .mmio_bus 1738 .insert(rtc_device, addr.0, MMIO_LEN) 1739 .map_err(DeviceManagerError::BusError)?; 1740 1741 self.id_to_dev_info.insert( 1742 (DeviceType::Rtc, "rtc".to_string()), 1743 MmioDeviceInfo { 1744 addr: addr.0, 1745 len: MMIO_LEN, 1746 irq: rtc_irq, 1747 }, 1748 ); 1749 1750 // Add a GPIO device 1751 let id = String::from(GPIO_DEVICE_NAME); 1752 let gpio_irq = self 1753 .address_manager 1754 .allocator 1755 .lock() 1756 .unwrap() 1757 .allocate_irq() 1758 .unwrap(); 1759 1760 let interrupt_group = interrupt_manager 1761 .create_group(LegacyIrqGroupConfig { 1762 irq: gpio_irq as InterruptIndex, 1763 }) 1764 .map_err(DeviceManagerError::CreateInterruptGroup)?; 1765 1766 let gpio_device = Arc::new(Mutex::new(devices::legacy::Gpio::new( 1767 id.clone(), 1768 interrupt_group, 1769 versioned_state_from_id(self.snapshot.as_ref(), id.as_str()) 1770 .map_err(DeviceManagerError::RestoreGetState)?, 1771 ))); 1772 1773 self.bus_devices 1774 .push(Arc::clone(&gpio_device) as Arc<Mutex<dyn BusDevice>>); 1775 1776 let addr = arch::layout::LEGACY_GPIO_MAPPED_IO_START; 1777 1778 self.address_manager 1779 .mmio_bus 1780 .insert(gpio_device.clone(), addr.0, MMIO_LEN) 1781 .map_err(DeviceManagerError::BusError)?; 1782 1783 self.gpio_device = Some(gpio_device.clone()); 1784 1785 self.id_to_dev_info.insert( 1786 (DeviceType::Gpio, "gpio".to_string()), 1787 MmioDeviceInfo { 1788 addr: addr.0, 1789 len: MMIO_LEN, 1790 irq: gpio_irq, 1791 }, 1792 ); 1793 1794 self.device_tree 1795 .lock() 1796 .unwrap() 1797 .insert(id.clone(), device_node!(id, gpio_device)); 1798 1799 Ok(()) 1800 } 1801 1802 #[cfg(target_arch = "x86_64")] 1803 fn add_serial_device( 1804 &mut self, 1805 interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>, 1806 serial_writer: Option<Box<dyn io::Write + Send>>, 1807 ) -> DeviceManagerResult<Arc<Mutex<Serial>>> { 1808 // Serial is tied to IRQ #4 1809 let serial_irq = 4; 1810 1811 let id = String::from(SERIAL_DEVICE_NAME); 1812 1813 let interrupt_group = interrupt_manager 1814 .create_group(LegacyIrqGroupConfig { 1815 irq: serial_irq as InterruptIndex, 1816 }) 1817 .map_err(DeviceManagerError::CreateInterruptGroup)?; 1818 1819 let serial = Arc::new(Mutex::new(Serial::new( 1820 id.clone(), 1821 interrupt_group, 1822 serial_writer, 1823 versioned_state_from_id(self.snapshot.as_ref(), id.as_str()) 1824 .map_err(DeviceManagerError::RestoreGetState)?, 1825 ))); 1826 1827 self.bus_devices 1828 .push(Arc::clone(&serial) as Arc<Mutex<dyn BusDevice>>); 1829 1830 self.address_manager 1831 .allocator 1832 .lock() 1833 .unwrap() 1834 .allocate_io_addresses(Some(GuestAddress(0x3f8)), 0x8, None) 1835 .ok_or(DeviceManagerError::AllocateIoPort)?; 1836 1837 self.address_manager 1838 .io_bus 1839 .insert(serial.clone(), 0x3f8, 0x8) 1840 .map_err(DeviceManagerError::BusError)?; 1841 1842 // Fill the device tree with a new node. In case of restore, we 1843 // know there is nothing to do, so we can simply override the 1844 // existing entry. 1845 self.device_tree 1846 .lock() 1847 .unwrap() 1848 .insert(id.clone(), device_node!(id, serial)); 1849 1850 Ok(serial) 1851 } 1852 1853 #[cfg(target_arch = "aarch64")] 1854 fn add_serial_device( 1855 &mut self, 1856 interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>, 1857 serial_writer: Option<Box<dyn io::Write + Send>>, 1858 ) -> DeviceManagerResult<Arc<Mutex<Pl011>>> { 1859 let id = String::from(SERIAL_DEVICE_NAME); 1860 1861 let serial_irq = self 1862 .address_manager 1863 .allocator 1864 .lock() 1865 .unwrap() 1866 .allocate_irq() 1867 .unwrap(); 1868 1869 let interrupt_group = interrupt_manager 1870 .create_group(LegacyIrqGroupConfig { 1871 irq: serial_irq as InterruptIndex, 1872 }) 1873 .map_err(DeviceManagerError::CreateInterruptGroup)?; 1874 1875 let serial = Arc::new(Mutex::new(devices::legacy::Pl011::new( 1876 id.clone(), 1877 interrupt_group, 1878 serial_writer, 1879 self.timestamp, 1880 versioned_state_from_id(self.snapshot.as_ref(), id.as_str()) 1881 .map_err(DeviceManagerError::RestoreGetState)?, 1882 ))); 1883 1884 self.bus_devices 1885 .push(Arc::clone(&serial) as Arc<Mutex<dyn BusDevice>>); 1886 1887 let addr = arch::layout::LEGACY_SERIAL_MAPPED_IO_START; 1888 1889 self.address_manager 1890 .mmio_bus 1891 .insert(serial.clone(), addr.0, MMIO_LEN) 1892 .map_err(DeviceManagerError::BusError)?; 1893 1894 self.id_to_dev_info.insert( 1895 (DeviceType::Serial, DeviceType::Serial.to_string()), 1896 MmioDeviceInfo { 1897 addr: addr.0, 1898 len: MMIO_LEN, 1899 irq: serial_irq, 1900 }, 1901 ); 1902 1903 self.cmdline_additions 1904 .push(format!("earlycon=pl011,mmio,0x{:08x}", addr.0)); 1905 1906 // Fill the device tree with a new node. In case of restore, we 1907 // know there is nothing to do, so we can simply override the 1908 // existing entry. 1909 self.device_tree 1910 .lock() 1911 .unwrap() 1912 .insert(id.clone(), device_node!(id, serial)); 1913 1914 Ok(serial) 1915 } 1916 1917 fn modify_mode<F: FnOnce(&mut termios)>( 1918 &mut self, 1919 fd: RawFd, 1920 f: F, 1921 ) -> vmm_sys_util::errno::Result<()> { 1922 // SAFETY: safe because we check the return value of isatty. 1923 if unsafe { isatty(fd) } != 1 { 1924 return Ok(()); 1925 } 1926 1927 // SAFETY: The following pair are safe because termios gets totally overwritten by tcgetattr 1928 // and we check the return result. 1929 let mut termios: termios = unsafe { zeroed() }; 1930 // SAFETY: see above 1931 let ret = unsafe { tcgetattr(fd, &mut termios as *mut _) }; 1932 if ret < 0 { 1933 return vmm_sys_util::errno::errno_result(); 1934 } 1935 let mut original_termios_opt = self.original_termios_opt.lock().unwrap(); 1936 if original_termios_opt.is_none() { 1937 *original_termios_opt = Some(termios); 1938 } 1939 f(&mut termios); 1940 // SAFETY: Safe because the syscall will only read the extent of termios and we check 1941 // the return result. 1942 let ret = unsafe { tcsetattr(fd, TCSANOW, &termios as *const _) }; 1943 if ret < 0 { 1944 return vmm_sys_util::errno::errno_result(); 1945 } 1946 1947 Ok(()) 1948 } 1949 1950 fn set_raw_mode(&mut self, f: &dyn AsRawFd) -> vmm_sys_util::errno::Result<()> { 1951 // SAFETY: FFI call. Variable t is guaranteed to be a valid termios from modify_mode. 1952 self.modify_mode(f.as_raw_fd(), |t| unsafe { cfmakeraw(t) }) 1953 } 1954 1955 fn listen_for_sigwinch_on_tty(&mut self, pty_sub: File) -> std::io::Result<()> { 1956 let seccomp_filter = get_seccomp_filter( 1957 &self.seccomp_action, 1958 Thread::PtyForeground, 1959 self.hypervisor_type, 1960 ) 1961 .unwrap(); 1962 1963 self.console_resize_pipe = 1964 Some(Arc::new(start_sigwinch_listener(seccomp_filter, pty_sub)?)); 1965 1966 Ok(()) 1967 } 1968 1969 fn add_virtio_console_device( 1970 &mut self, 1971 virtio_devices: &mut Vec<MetaVirtioDevice>, 1972 console_pty: Option<PtyPair>, 1973 resize_pipe: Option<File>, 1974 ) -> DeviceManagerResult<Option<Arc<virtio_devices::ConsoleResizer>>> { 1975 let console_config = self.config.lock().unwrap().console.clone(); 1976 let endpoint = match console_config.mode { 1977 ConsoleOutputMode::File => { 1978 let file = File::create(console_config.file.as_ref().unwrap()) 1979 .map_err(DeviceManagerError::ConsoleOutputFileOpen)?; 1980 Endpoint::File(file) 1981 } 1982 ConsoleOutputMode::Pty => { 1983 if let Some(pty) = console_pty { 1984 self.config.lock().unwrap().console.file = Some(pty.path.clone()); 1985 let file = pty.main.try_clone().unwrap(); 1986 self.console_pty = Some(Arc::new(Mutex::new(pty))); 1987 self.console_resize_pipe = resize_pipe.map(Arc::new); 1988 Endpoint::PtyPair(file.try_clone().unwrap(), file) 1989 } else { 1990 let (main, sub, path) = 1991 create_pty().map_err(DeviceManagerError::ConsolePtyOpen)?; 1992 self.set_raw_mode(&sub) 1993 .map_err(DeviceManagerError::SetPtyRaw)?; 1994 self.config.lock().unwrap().console.file = Some(path.clone()); 1995 let file = main.try_clone().unwrap(); 1996 assert!(resize_pipe.is_none()); 1997 self.listen_for_sigwinch_on_tty(sub).unwrap(); 1998 self.console_pty = Some(Arc::new(Mutex::new(PtyPair { main, path }))); 1999 Endpoint::PtyPair(file.try_clone().unwrap(), file) 2000 } 2001 } 2002 ConsoleOutputMode::Tty => { 2003 // Duplicating the file descriptors like this is needed as otherwise 2004 // they will be closed on a reboot and the numbers reused 2005 2006 // SAFETY: FFI call to dup. Trivially safe. 2007 let stdout = unsafe { libc::dup(libc::STDOUT_FILENO) }; 2008 if stdout == -1 { 2009 return vmm_sys_util::errno::errno_result().map_err(DeviceManagerError::DupFd); 2010 } 2011 // SAFETY: stdout is valid and owned solely by us. 2012 let stdout = unsafe { File::from_raw_fd(stdout) }; 2013 2014 // Make sure stdout is in raw mode, if it's a terminal. 2015 let _ = self.set_raw_mode(&stdout); 2016 2017 // SAFETY: FFI call. Trivially safe. 2018 if unsafe { libc::isatty(libc::STDOUT_FILENO) } == 1 { 2019 self.listen_for_sigwinch_on_tty(stdout.try_clone().unwrap()) 2020 .unwrap(); 2021 } 2022 2023 // If an interactive TTY then we can accept input 2024 // SAFETY: FFI call. Trivially safe. 2025 if unsafe { libc::isatty(libc::STDIN_FILENO) == 1 } { 2026 // SAFETY: FFI call to dup. Trivially safe. 2027 let stdin = unsafe { libc::dup(libc::STDIN_FILENO) }; 2028 if stdin == -1 { 2029 return vmm_sys_util::errno::errno_result() 2030 .map_err(DeviceManagerError::DupFd); 2031 } 2032 // SAFETY: stdin is valid and owned solely by us. 2033 let stdin = unsafe { File::from_raw_fd(stdin) }; 2034 2035 Endpoint::FilePair(stdout, stdin) 2036 } else { 2037 Endpoint::File(stdout) 2038 } 2039 } 2040 ConsoleOutputMode::Socket => { 2041 return Err(DeviceManagerError::NoSocketOptionSupportForConsoleDevice); 2042 } 2043 ConsoleOutputMode::Null => Endpoint::Null, 2044 ConsoleOutputMode::Off => return Ok(None), 2045 }; 2046 let id = String::from(CONSOLE_DEVICE_NAME); 2047 2048 let (virtio_console_device, console_resizer) = virtio_devices::Console::new( 2049 id.clone(), 2050 endpoint, 2051 self.console_resize_pipe 2052 .as_ref() 2053 .map(|p| p.try_clone().unwrap()), 2054 self.force_iommu | console_config.iommu, 2055 self.seccomp_action.clone(), 2056 self.exit_evt 2057 .try_clone() 2058 .map_err(DeviceManagerError::EventFd)?, 2059 versioned_state_from_id(self.snapshot.as_ref(), id.as_str()) 2060 .map_err(DeviceManagerError::RestoreGetState)?, 2061 ) 2062 .map_err(DeviceManagerError::CreateVirtioConsole)?; 2063 let virtio_console_device = Arc::new(Mutex::new(virtio_console_device)); 2064 virtio_devices.push(MetaVirtioDevice { 2065 virtio_device: Arc::clone(&virtio_console_device) 2066 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2067 iommu: console_config.iommu, 2068 id: id.clone(), 2069 pci_segment: 0, 2070 dma_handler: None, 2071 }); 2072 2073 // Fill the device tree with a new node. In case of restore, we 2074 // know there is nothing to do, so we can simply override the 2075 // existing entry. 2076 self.device_tree 2077 .lock() 2078 .unwrap() 2079 .insert(id.clone(), device_node!(id, virtio_console_device)); 2080 2081 // Only provide a resizer (for SIGWINCH handling) if the console is attached to the TTY 2082 Ok(if matches!(console_config.mode, ConsoleOutputMode::Tty) { 2083 Some(console_resizer) 2084 } else { 2085 None 2086 }) 2087 } 2088 2089 fn add_console_device( 2090 &mut self, 2091 interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>, 2092 virtio_devices: &mut Vec<MetaVirtioDevice>, 2093 serial_pty: Option<PtyPair>, 2094 console_pty: Option<PtyPair>, 2095 console_resize_pipe: Option<File>, 2096 ) -> DeviceManagerResult<Arc<Console>> { 2097 let serial_config = self.config.lock().unwrap().serial.clone(); 2098 let serial_writer: Option<Box<dyn io::Write + Send>> = match serial_config.mode { 2099 ConsoleOutputMode::File => Some(Box::new( 2100 File::create(serial_config.file.as_ref().unwrap()) 2101 .map_err(DeviceManagerError::SerialOutputFileOpen)?, 2102 )), 2103 ConsoleOutputMode::Pty => { 2104 if let Some(pty) = serial_pty { 2105 self.config.lock().unwrap().serial.file = Some(pty.path.clone()); 2106 self.serial_pty = Some(Arc::new(Mutex::new(pty))); 2107 } else { 2108 let (main, sub, path) = 2109 create_pty().map_err(DeviceManagerError::SerialPtyOpen)?; 2110 self.set_raw_mode(&sub) 2111 .map_err(DeviceManagerError::SetPtyRaw)?; 2112 self.config.lock().unwrap().serial.file = Some(path.clone()); 2113 self.serial_pty = Some(Arc::new(Mutex::new(PtyPair { main, path }))); 2114 } 2115 None 2116 } 2117 ConsoleOutputMode::Tty => { 2118 let out = stdout(); 2119 let _ = self.set_raw_mode(&out); 2120 Some(Box::new(out)) 2121 } 2122 ConsoleOutputMode::Off | ConsoleOutputMode::Null | ConsoleOutputMode::Socket => None, 2123 }; 2124 if serial_config.mode != ConsoleOutputMode::Off { 2125 let serial = self.add_serial_device(interrupt_manager, serial_writer)?; 2126 self.serial_manager = match serial_config.mode { 2127 ConsoleOutputMode::Pty | ConsoleOutputMode::Tty | ConsoleOutputMode::Socket => { 2128 let serial_manager = SerialManager::new( 2129 serial, 2130 self.serial_pty.clone(), 2131 serial_config.mode, 2132 serial_config.socket, 2133 ) 2134 .map_err(DeviceManagerError::CreateSerialManager)?; 2135 if let Some(mut serial_manager) = serial_manager { 2136 serial_manager 2137 .start_thread( 2138 self.exit_evt 2139 .try_clone() 2140 .map_err(DeviceManagerError::EventFd)?, 2141 ) 2142 .map_err(DeviceManagerError::SpawnSerialManager)?; 2143 Some(Arc::new(serial_manager)) 2144 } else { 2145 None 2146 } 2147 } 2148 _ => None, 2149 }; 2150 } 2151 2152 let console_resizer = 2153 self.add_virtio_console_device(virtio_devices, console_pty, console_resize_pipe)?; 2154 2155 Ok(Arc::new(Console { console_resizer })) 2156 } 2157 2158 fn add_tpm_device( 2159 &mut self, 2160 tpm_path: PathBuf, 2161 ) -> DeviceManagerResult<Arc<Mutex<devices::tpm::Tpm>>> { 2162 // Create TPM Device 2163 let tpm = devices::tpm::Tpm::new(tpm_path.to_str().unwrap().to_string()).map_err(|e| { 2164 DeviceManagerError::CreateTpmDevice(anyhow!("Failed to create TPM Device : {:?}", e)) 2165 })?; 2166 let tpm = Arc::new(Mutex::new(tpm)); 2167 2168 // Add TPM Device to mmio 2169 self.address_manager 2170 .mmio_bus 2171 .insert( 2172 tpm.clone(), 2173 arch::layout::TPM_START.0, 2174 arch::layout::TPM_SIZE, 2175 ) 2176 .map_err(DeviceManagerError::BusError)?; 2177 2178 Ok(tpm) 2179 } 2180 2181 fn make_virtio_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2182 let mut devices: Vec<MetaVirtioDevice> = Vec::new(); 2183 2184 // Create "standard" virtio devices (net/block/rng) 2185 devices.append(&mut self.make_virtio_block_devices()?); 2186 devices.append(&mut self.make_virtio_net_devices()?); 2187 devices.append(&mut self.make_virtio_rng_devices()?); 2188 2189 // Add virtio-fs if required 2190 devices.append(&mut self.make_virtio_fs_devices()?); 2191 2192 // Add virtio-pmem if required 2193 devices.append(&mut self.make_virtio_pmem_devices()?); 2194 2195 // Add virtio-vsock if required 2196 devices.append(&mut self.make_virtio_vsock_devices()?); 2197 2198 devices.append(&mut self.make_virtio_mem_devices()?); 2199 2200 // Add virtio-balloon if required 2201 devices.append(&mut self.make_virtio_balloon_devices()?); 2202 2203 // Add virtio-watchdog device 2204 devices.append(&mut self.make_virtio_watchdog_devices()?); 2205 2206 // Add vDPA devices if required 2207 devices.append(&mut self.make_vdpa_devices()?); 2208 2209 Ok(devices) 2210 } 2211 2212 // Cache whether aio is supported to avoid checking for very block device 2213 fn aio_is_supported(&mut self) -> bool { 2214 if let Some(supported) = self.aio_supported { 2215 return supported; 2216 } 2217 2218 let supported = block_aio_is_supported(); 2219 self.aio_supported = Some(supported); 2220 supported 2221 } 2222 2223 // Cache whether io_uring is supported to avoid probing for very block device 2224 fn io_uring_is_supported(&mut self) -> bool { 2225 if let Some(supported) = self.io_uring_supported { 2226 return supported; 2227 } 2228 2229 let supported = block_io_uring_is_supported(); 2230 self.io_uring_supported = Some(supported); 2231 supported 2232 } 2233 2234 fn make_virtio_block_device( 2235 &mut self, 2236 disk_cfg: &mut DiskConfig, 2237 ) -> DeviceManagerResult<MetaVirtioDevice> { 2238 let id = if let Some(id) = &disk_cfg.id { 2239 id.clone() 2240 } else { 2241 let id = self.next_device_name(DISK_DEVICE_NAME_PREFIX)?; 2242 disk_cfg.id = Some(id.clone()); 2243 id 2244 }; 2245 2246 info!("Creating virtio-block device: {:?}", disk_cfg); 2247 2248 let snapshot = snapshot_from_id(self.snapshot.as_ref(), id.as_str()); 2249 2250 let (virtio_device, migratable_device) = if disk_cfg.vhost_user { 2251 let socket = disk_cfg.vhost_socket.as_ref().unwrap().clone(); 2252 let vu_cfg = VhostUserConfig { 2253 socket, 2254 num_queues: disk_cfg.num_queues, 2255 queue_size: disk_cfg.queue_size, 2256 }; 2257 let vhost_user_block = Arc::new(Mutex::new( 2258 match virtio_devices::vhost_user::Blk::new( 2259 id.clone(), 2260 vu_cfg, 2261 self.seccomp_action.clone(), 2262 self.exit_evt 2263 .try_clone() 2264 .map_err(DeviceManagerError::EventFd)?, 2265 self.force_iommu, 2266 snapshot 2267 .map(|s| s.to_versioned_state()) 2268 .transpose() 2269 .map_err(DeviceManagerError::RestoreGetState)?, 2270 ) { 2271 Ok(vub_device) => vub_device, 2272 Err(e) => { 2273 return Err(DeviceManagerError::CreateVhostUserBlk(e)); 2274 } 2275 }, 2276 )); 2277 2278 ( 2279 Arc::clone(&vhost_user_block) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2280 vhost_user_block as Arc<Mutex<dyn Migratable>>, 2281 ) 2282 } else { 2283 let mut options = OpenOptions::new(); 2284 options.read(true); 2285 options.write(!disk_cfg.readonly); 2286 if disk_cfg.direct { 2287 options.custom_flags(libc::O_DIRECT); 2288 } 2289 // Open block device path 2290 let mut file: File = options 2291 .open( 2292 disk_cfg 2293 .path 2294 .as_ref() 2295 .ok_or(DeviceManagerError::NoDiskPath)? 2296 .clone(), 2297 ) 2298 .map_err(DeviceManagerError::Disk)?; 2299 let image_type = 2300 detect_image_type(&mut file).map_err(DeviceManagerError::DetectImageType)?; 2301 2302 let image = match image_type { 2303 ImageType::FixedVhd => { 2304 // Use asynchronous backend relying on io_uring if the 2305 // syscalls are supported. 2306 if cfg!(feature = "io_uring") 2307 && !disk_cfg.disable_io_uring 2308 && self.io_uring_is_supported() 2309 { 2310 info!("Using asynchronous fixed VHD disk file (io_uring)"); 2311 2312 #[cfg(not(feature = "io_uring"))] 2313 unreachable!("Checked in if statement above"); 2314 #[cfg(feature = "io_uring")] 2315 { 2316 Box::new( 2317 FixedVhdDiskAsync::new(file) 2318 .map_err(DeviceManagerError::CreateFixedVhdDiskAsync)?, 2319 ) as Box<dyn DiskFile> 2320 } 2321 } else { 2322 info!("Using synchronous fixed VHD disk file"); 2323 Box::new( 2324 FixedVhdDiskSync::new(file) 2325 .map_err(DeviceManagerError::CreateFixedVhdDiskSync)?, 2326 ) as Box<dyn DiskFile> 2327 } 2328 } 2329 ImageType::Raw => { 2330 // Use asynchronous backend relying on io_uring if the 2331 // syscalls are supported. 2332 if cfg!(feature = "io_uring") 2333 && !disk_cfg.disable_io_uring 2334 && self.io_uring_is_supported() 2335 { 2336 info!("Using asynchronous RAW disk file (io_uring)"); 2337 2338 #[cfg(not(feature = "io_uring"))] 2339 unreachable!("Checked in if statement above"); 2340 #[cfg(feature = "io_uring")] 2341 { 2342 Box::new(RawFileDisk::new(file)) as Box<dyn DiskFile> 2343 } 2344 } else if !disk_cfg.disable_aio && self.aio_is_supported() { 2345 info!("Using asynchronous RAW disk file (aio)"); 2346 Box::new(RawFileDiskAio::new(file)) as Box<dyn DiskFile> 2347 } else { 2348 info!("Using synchronous RAW disk file"); 2349 Box::new(RawFileDiskSync::new(file)) as Box<dyn DiskFile> 2350 } 2351 } 2352 ImageType::Qcow2 => { 2353 info!("Using synchronous QCOW disk file"); 2354 Box::new( 2355 QcowDiskSync::new(file, disk_cfg.direct) 2356 .map_err(DeviceManagerError::CreateQcowDiskSync)?, 2357 ) as Box<dyn DiskFile> 2358 } 2359 ImageType::Vhdx => { 2360 info!("Using synchronous VHDX disk file"); 2361 Box::new( 2362 VhdxDiskSync::new(file) 2363 .map_err(DeviceManagerError::CreateFixedVhdxDiskSync)?, 2364 ) as Box<dyn DiskFile> 2365 } 2366 }; 2367 2368 let rate_limit_group = 2369 if let Some(rate_limiter_cfg) = disk_cfg.rate_limiter_config.as_ref() { 2370 // Create an anonymous RateLimiterGroup that is dropped when the Disk 2371 // is dropped. 2372 let bw = rate_limiter_cfg.bandwidth.unwrap_or_default(); 2373 let ops = rate_limiter_cfg.ops.unwrap_or_default(); 2374 let mut rate_limit_group = RateLimiterGroup::new( 2375 disk_cfg.id.as_ref().unwrap(), 2376 bw.size, 2377 bw.one_time_burst.unwrap_or(0), 2378 bw.refill_time, 2379 ops.size, 2380 ops.one_time_burst.unwrap_or(0), 2381 ops.refill_time, 2382 ) 2383 .map_err(DeviceManagerError::RateLimiterGroupCreate)?; 2384 2385 rate_limit_group 2386 .start_thread( 2387 self.exit_evt 2388 .try_clone() 2389 .map_err(DeviceManagerError::EventFd)?, 2390 ) 2391 .unwrap(); 2392 2393 Some(Arc::new(rate_limit_group)) 2394 } else if let Some(rate_limit_group) = disk_cfg.rate_limit_group.as_ref() { 2395 self.rate_limit_groups.get(rate_limit_group).cloned() 2396 } else { 2397 None 2398 }; 2399 2400 let virtio_block = Arc::new(Mutex::new( 2401 virtio_devices::Block::new( 2402 id.clone(), 2403 image, 2404 disk_cfg 2405 .path 2406 .as_ref() 2407 .ok_or(DeviceManagerError::NoDiskPath)? 2408 .clone(), 2409 disk_cfg.readonly, 2410 self.force_iommu | disk_cfg.iommu, 2411 disk_cfg.num_queues, 2412 disk_cfg.queue_size, 2413 disk_cfg.serial.clone(), 2414 self.seccomp_action.clone(), 2415 rate_limit_group, 2416 self.exit_evt 2417 .try_clone() 2418 .map_err(DeviceManagerError::EventFd)?, 2419 snapshot 2420 .map(|s| s.to_versioned_state()) 2421 .transpose() 2422 .map_err(DeviceManagerError::RestoreGetState)?, 2423 ) 2424 .map_err(DeviceManagerError::CreateVirtioBlock)?, 2425 )); 2426 2427 ( 2428 Arc::clone(&virtio_block) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2429 virtio_block as Arc<Mutex<dyn Migratable>>, 2430 ) 2431 }; 2432 2433 // Fill the device tree with a new node. In case of restore, we 2434 // know there is nothing to do, so we can simply override the 2435 // existing entry. 2436 self.device_tree 2437 .lock() 2438 .unwrap() 2439 .insert(id.clone(), device_node!(id, migratable_device)); 2440 2441 Ok(MetaVirtioDevice { 2442 virtio_device, 2443 iommu: disk_cfg.iommu, 2444 id, 2445 pci_segment: disk_cfg.pci_segment, 2446 dma_handler: None, 2447 }) 2448 } 2449 2450 fn make_virtio_block_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2451 let mut devices = Vec::new(); 2452 2453 let mut block_devices = self.config.lock().unwrap().disks.clone(); 2454 if let Some(disk_list_cfg) = &mut block_devices { 2455 for disk_cfg in disk_list_cfg.iter_mut() { 2456 devices.push(self.make_virtio_block_device(disk_cfg)?); 2457 } 2458 } 2459 self.config.lock().unwrap().disks = block_devices; 2460 2461 Ok(devices) 2462 } 2463 2464 fn make_virtio_net_device( 2465 &mut self, 2466 net_cfg: &mut NetConfig, 2467 ) -> DeviceManagerResult<MetaVirtioDevice> { 2468 let id = if let Some(id) = &net_cfg.id { 2469 id.clone() 2470 } else { 2471 let id = self.next_device_name(NET_DEVICE_NAME_PREFIX)?; 2472 net_cfg.id = Some(id.clone()); 2473 id 2474 }; 2475 info!("Creating virtio-net device: {:?}", net_cfg); 2476 2477 let snapshot = snapshot_from_id(self.snapshot.as_ref(), id.as_str()); 2478 2479 let (virtio_device, migratable_device) = if net_cfg.vhost_user { 2480 let socket = net_cfg.vhost_socket.as_ref().unwrap().clone(); 2481 let vu_cfg = VhostUserConfig { 2482 socket, 2483 num_queues: net_cfg.num_queues, 2484 queue_size: net_cfg.queue_size, 2485 }; 2486 let server = match net_cfg.vhost_mode { 2487 VhostMode::Client => false, 2488 VhostMode::Server => true, 2489 }; 2490 let vhost_user_net = Arc::new(Mutex::new( 2491 match virtio_devices::vhost_user::Net::new( 2492 id.clone(), 2493 net_cfg.mac, 2494 net_cfg.mtu, 2495 vu_cfg, 2496 server, 2497 self.seccomp_action.clone(), 2498 self.exit_evt 2499 .try_clone() 2500 .map_err(DeviceManagerError::EventFd)?, 2501 self.force_iommu, 2502 snapshot 2503 .map(|s| s.to_versioned_state()) 2504 .transpose() 2505 .map_err(DeviceManagerError::RestoreGetState)?, 2506 net_cfg.offload_tso, 2507 net_cfg.offload_ufo, 2508 net_cfg.offload_csum, 2509 ) { 2510 Ok(vun_device) => vun_device, 2511 Err(e) => { 2512 return Err(DeviceManagerError::CreateVhostUserNet(e)); 2513 } 2514 }, 2515 )); 2516 2517 ( 2518 Arc::clone(&vhost_user_net) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2519 vhost_user_net as Arc<Mutex<dyn Migratable>>, 2520 ) 2521 } else { 2522 let state = snapshot 2523 .map(|s| s.to_versioned_state()) 2524 .transpose() 2525 .map_err(DeviceManagerError::RestoreGetState)?; 2526 2527 let virtio_net = if let Some(ref tap_if_name) = net_cfg.tap { 2528 Arc::new(Mutex::new( 2529 virtio_devices::Net::new( 2530 id.clone(), 2531 Some(tap_if_name), 2532 Some(net_cfg.ip), 2533 Some(net_cfg.mask), 2534 Some(net_cfg.mac), 2535 &mut net_cfg.host_mac, 2536 net_cfg.mtu, 2537 self.force_iommu | net_cfg.iommu, 2538 net_cfg.num_queues, 2539 net_cfg.queue_size, 2540 self.seccomp_action.clone(), 2541 net_cfg.rate_limiter_config, 2542 self.exit_evt 2543 .try_clone() 2544 .map_err(DeviceManagerError::EventFd)?, 2545 state, 2546 net_cfg.offload_tso, 2547 net_cfg.offload_ufo, 2548 net_cfg.offload_csum, 2549 ) 2550 .map_err(DeviceManagerError::CreateVirtioNet)?, 2551 )) 2552 } else if let Some(fds) = &net_cfg.fds { 2553 let net = virtio_devices::Net::from_tap_fds( 2554 id.clone(), 2555 fds, 2556 Some(net_cfg.mac), 2557 net_cfg.mtu, 2558 self.force_iommu | net_cfg.iommu, 2559 net_cfg.queue_size, 2560 self.seccomp_action.clone(), 2561 net_cfg.rate_limiter_config, 2562 self.exit_evt 2563 .try_clone() 2564 .map_err(DeviceManagerError::EventFd)?, 2565 state, 2566 net_cfg.offload_tso, 2567 net_cfg.offload_ufo, 2568 net_cfg.offload_csum, 2569 ) 2570 .map_err(DeviceManagerError::CreateVirtioNet)?; 2571 2572 // SAFETY: 'fds' are valid because TAP devices are created successfully 2573 unsafe { 2574 self.config.lock().unwrap().add_preserved_fds(fds.clone()); 2575 } 2576 2577 Arc::new(Mutex::new(net)) 2578 } else { 2579 Arc::new(Mutex::new( 2580 virtio_devices::Net::new( 2581 id.clone(), 2582 None, 2583 Some(net_cfg.ip), 2584 Some(net_cfg.mask), 2585 Some(net_cfg.mac), 2586 &mut net_cfg.host_mac, 2587 net_cfg.mtu, 2588 self.force_iommu | net_cfg.iommu, 2589 net_cfg.num_queues, 2590 net_cfg.queue_size, 2591 self.seccomp_action.clone(), 2592 net_cfg.rate_limiter_config, 2593 self.exit_evt 2594 .try_clone() 2595 .map_err(DeviceManagerError::EventFd)?, 2596 state, 2597 net_cfg.offload_tso, 2598 net_cfg.offload_ufo, 2599 net_cfg.offload_csum, 2600 ) 2601 .map_err(DeviceManagerError::CreateVirtioNet)?, 2602 )) 2603 }; 2604 2605 ( 2606 Arc::clone(&virtio_net) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2607 virtio_net as Arc<Mutex<dyn Migratable>>, 2608 ) 2609 }; 2610 2611 // Fill the device tree with a new node. In case of restore, we 2612 // know there is nothing to do, so we can simply override the 2613 // existing entry. 2614 self.device_tree 2615 .lock() 2616 .unwrap() 2617 .insert(id.clone(), device_node!(id, migratable_device)); 2618 2619 Ok(MetaVirtioDevice { 2620 virtio_device, 2621 iommu: net_cfg.iommu, 2622 id, 2623 pci_segment: net_cfg.pci_segment, 2624 dma_handler: None, 2625 }) 2626 } 2627 2628 /// Add virto-net and vhost-user-net devices 2629 fn make_virtio_net_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2630 let mut devices = Vec::new(); 2631 let mut net_devices = self.config.lock().unwrap().net.clone(); 2632 if let Some(net_list_cfg) = &mut net_devices { 2633 for net_cfg in net_list_cfg.iter_mut() { 2634 devices.push(self.make_virtio_net_device(net_cfg)?); 2635 } 2636 } 2637 self.config.lock().unwrap().net = net_devices; 2638 2639 Ok(devices) 2640 } 2641 2642 fn make_virtio_rng_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2643 let mut devices = Vec::new(); 2644 2645 // Add virtio-rng if required 2646 let rng_config = self.config.lock().unwrap().rng.clone(); 2647 if let Some(rng_path) = rng_config.src.to_str() { 2648 info!("Creating virtio-rng device: {:?}", rng_config); 2649 let id = String::from(RNG_DEVICE_NAME); 2650 2651 let virtio_rng_device = Arc::new(Mutex::new( 2652 virtio_devices::Rng::new( 2653 id.clone(), 2654 rng_path, 2655 self.force_iommu | rng_config.iommu, 2656 self.seccomp_action.clone(), 2657 self.exit_evt 2658 .try_clone() 2659 .map_err(DeviceManagerError::EventFd)?, 2660 versioned_state_from_id(self.snapshot.as_ref(), id.as_str()) 2661 .map_err(DeviceManagerError::RestoreGetState)?, 2662 ) 2663 .map_err(DeviceManagerError::CreateVirtioRng)?, 2664 )); 2665 devices.push(MetaVirtioDevice { 2666 virtio_device: Arc::clone(&virtio_rng_device) 2667 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2668 iommu: rng_config.iommu, 2669 id: id.clone(), 2670 pci_segment: 0, 2671 dma_handler: None, 2672 }); 2673 2674 // Fill the device tree with a new node. In case of restore, we 2675 // know there is nothing to do, so we can simply override the 2676 // existing entry. 2677 self.device_tree 2678 .lock() 2679 .unwrap() 2680 .insert(id.clone(), device_node!(id, virtio_rng_device)); 2681 } 2682 2683 Ok(devices) 2684 } 2685 2686 fn make_virtio_fs_device( 2687 &mut self, 2688 fs_cfg: &mut FsConfig, 2689 ) -> DeviceManagerResult<MetaVirtioDevice> { 2690 let id = if let Some(id) = &fs_cfg.id { 2691 id.clone() 2692 } else { 2693 let id = self.next_device_name(FS_DEVICE_NAME_PREFIX)?; 2694 fs_cfg.id = Some(id.clone()); 2695 id 2696 }; 2697 2698 info!("Creating virtio-fs device: {:?}", fs_cfg); 2699 2700 let mut node = device_node!(id); 2701 2702 if let Some(fs_socket) = fs_cfg.socket.to_str() { 2703 let virtio_fs_device = Arc::new(Mutex::new( 2704 virtio_devices::vhost_user::Fs::new( 2705 id.clone(), 2706 fs_socket, 2707 &fs_cfg.tag, 2708 fs_cfg.num_queues, 2709 fs_cfg.queue_size, 2710 None, 2711 self.seccomp_action.clone(), 2712 self.exit_evt 2713 .try_clone() 2714 .map_err(DeviceManagerError::EventFd)?, 2715 self.force_iommu, 2716 versioned_state_from_id(self.snapshot.as_ref(), id.as_str()) 2717 .map_err(DeviceManagerError::RestoreGetState)?, 2718 ) 2719 .map_err(DeviceManagerError::CreateVirtioFs)?, 2720 )); 2721 2722 // Update the device tree with the migratable device. 2723 node.migratable = Some(Arc::clone(&virtio_fs_device) as Arc<Mutex<dyn Migratable>>); 2724 self.device_tree.lock().unwrap().insert(id.clone(), node); 2725 2726 Ok(MetaVirtioDevice { 2727 virtio_device: Arc::clone(&virtio_fs_device) 2728 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2729 iommu: false, 2730 id, 2731 pci_segment: fs_cfg.pci_segment, 2732 dma_handler: None, 2733 }) 2734 } else { 2735 Err(DeviceManagerError::NoVirtioFsSock) 2736 } 2737 } 2738 2739 fn make_virtio_fs_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2740 let mut devices = Vec::new(); 2741 2742 let mut fs_devices = self.config.lock().unwrap().fs.clone(); 2743 if let Some(fs_list_cfg) = &mut fs_devices { 2744 for fs_cfg in fs_list_cfg.iter_mut() { 2745 devices.push(self.make_virtio_fs_device(fs_cfg)?); 2746 } 2747 } 2748 self.config.lock().unwrap().fs = fs_devices; 2749 2750 Ok(devices) 2751 } 2752 2753 fn make_virtio_pmem_device( 2754 &mut self, 2755 pmem_cfg: &mut PmemConfig, 2756 ) -> DeviceManagerResult<MetaVirtioDevice> { 2757 let id = if let Some(id) = &pmem_cfg.id { 2758 id.clone() 2759 } else { 2760 let id = self.next_device_name(PMEM_DEVICE_NAME_PREFIX)?; 2761 pmem_cfg.id = Some(id.clone()); 2762 id 2763 }; 2764 2765 info!("Creating virtio-pmem device: {:?}", pmem_cfg); 2766 2767 let mut node = device_node!(id); 2768 2769 // Look for the id in the device tree. If it can be found, that means 2770 // the device is being restored, otherwise it's created from scratch. 2771 let region_range = if let Some(node) = self.device_tree.lock().unwrap().get(&id) { 2772 info!("Restoring virtio-pmem {} resources", id); 2773 2774 let mut region_range: Option<(u64, u64)> = None; 2775 for resource in node.resources.iter() { 2776 match resource { 2777 Resource::MmioAddressRange { base, size } => { 2778 if region_range.is_some() { 2779 return Err(DeviceManagerError::ResourceAlreadyExists); 2780 } 2781 2782 region_range = Some((*base, *size)); 2783 } 2784 _ => { 2785 error!("Unexpected resource {:?} for {}", resource, id); 2786 } 2787 } 2788 } 2789 2790 if region_range.is_none() { 2791 return Err(DeviceManagerError::MissingVirtioPmemResources); 2792 } 2793 2794 region_range 2795 } else { 2796 None 2797 }; 2798 2799 let (custom_flags, set_len) = if pmem_cfg.file.is_dir() { 2800 if pmem_cfg.size.is_none() { 2801 return Err(DeviceManagerError::PmemWithDirectorySizeMissing); 2802 } 2803 (O_TMPFILE, true) 2804 } else { 2805 (0, false) 2806 }; 2807 2808 let mut file = OpenOptions::new() 2809 .read(true) 2810 .write(!pmem_cfg.discard_writes) 2811 .custom_flags(custom_flags) 2812 .open(&pmem_cfg.file) 2813 .map_err(DeviceManagerError::PmemFileOpen)?; 2814 2815 let size = if let Some(size) = pmem_cfg.size { 2816 if set_len { 2817 file.set_len(size) 2818 .map_err(DeviceManagerError::PmemFileSetLen)?; 2819 } 2820 size 2821 } else { 2822 file.seek(SeekFrom::End(0)) 2823 .map_err(DeviceManagerError::PmemFileSetLen)? 2824 }; 2825 2826 if size % 0x20_0000 != 0 { 2827 return Err(DeviceManagerError::PmemSizeNotAligned); 2828 } 2829 2830 let (region_base, region_size) = if let Some((base, size)) = region_range { 2831 // The memory needs to be 2MiB aligned in order to support 2832 // hugepages. 2833 self.pci_segments[pmem_cfg.pci_segment as usize] 2834 .mem64_allocator 2835 .lock() 2836 .unwrap() 2837 .allocate( 2838 Some(GuestAddress(base)), 2839 size as GuestUsize, 2840 Some(0x0020_0000), 2841 ) 2842 .ok_or(DeviceManagerError::PmemRangeAllocation)?; 2843 2844 (base, size) 2845 } else { 2846 // The memory needs to be 2MiB aligned in order to support 2847 // hugepages. 2848 let base = self.pci_segments[pmem_cfg.pci_segment as usize] 2849 .mem64_allocator 2850 .lock() 2851 .unwrap() 2852 .allocate(None, size as GuestUsize, Some(0x0020_0000)) 2853 .ok_or(DeviceManagerError::PmemRangeAllocation)?; 2854 2855 (base.raw_value(), size) 2856 }; 2857 2858 let cloned_file = file.try_clone().map_err(DeviceManagerError::CloneFile)?; 2859 let mmap_region = MmapRegion::build( 2860 Some(FileOffset::new(cloned_file, 0)), 2861 region_size as usize, 2862 PROT_READ | PROT_WRITE, 2863 MAP_NORESERVE 2864 | if pmem_cfg.discard_writes { 2865 MAP_PRIVATE 2866 } else { 2867 MAP_SHARED 2868 }, 2869 ) 2870 .map_err(DeviceManagerError::NewMmapRegion)?; 2871 let host_addr: u64 = mmap_region.as_ptr() as u64; 2872 2873 let mem_slot = self 2874 .memory_manager 2875 .lock() 2876 .unwrap() 2877 .create_userspace_mapping(region_base, region_size, host_addr, false, false, false) 2878 .map_err(DeviceManagerError::MemoryManager)?; 2879 2880 let mapping = virtio_devices::UserspaceMapping { 2881 host_addr, 2882 mem_slot, 2883 addr: GuestAddress(region_base), 2884 len: region_size, 2885 mergeable: false, 2886 }; 2887 2888 let virtio_pmem_device = Arc::new(Mutex::new( 2889 virtio_devices::Pmem::new( 2890 id.clone(), 2891 file, 2892 GuestAddress(region_base), 2893 mapping, 2894 mmap_region, 2895 self.force_iommu | pmem_cfg.iommu, 2896 self.seccomp_action.clone(), 2897 self.exit_evt 2898 .try_clone() 2899 .map_err(DeviceManagerError::EventFd)?, 2900 versioned_state_from_id(self.snapshot.as_ref(), id.as_str()) 2901 .map_err(DeviceManagerError::RestoreGetState)?, 2902 ) 2903 .map_err(DeviceManagerError::CreateVirtioPmem)?, 2904 )); 2905 2906 // Update the device tree with correct resource information and with 2907 // the migratable device. 2908 node.resources.push(Resource::MmioAddressRange { 2909 base: region_base, 2910 size: region_size, 2911 }); 2912 node.migratable = Some(Arc::clone(&virtio_pmem_device) as Arc<Mutex<dyn Migratable>>); 2913 self.device_tree.lock().unwrap().insert(id.clone(), node); 2914 2915 Ok(MetaVirtioDevice { 2916 virtio_device: Arc::clone(&virtio_pmem_device) 2917 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2918 iommu: pmem_cfg.iommu, 2919 id, 2920 pci_segment: pmem_cfg.pci_segment, 2921 dma_handler: None, 2922 }) 2923 } 2924 2925 fn make_virtio_pmem_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2926 let mut devices = Vec::new(); 2927 // Add virtio-pmem if required 2928 let mut pmem_devices = self.config.lock().unwrap().pmem.clone(); 2929 if let Some(pmem_list_cfg) = &mut pmem_devices { 2930 for pmem_cfg in pmem_list_cfg.iter_mut() { 2931 devices.push(self.make_virtio_pmem_device(pmem_cfg)?); 2932 } 2933 } 2934 self.config.lock().unwrap().pmem = pmem_devices; 2935 2936 Ok(devices) 2937 } 2938 2939 fn make_virtio_vsock_device( 2940 &mut self, 2941 vsock_cfg: &mut VsockConfig, 2942 ) -> DeviceManagerResult<MetaVirtioDevice> { 2943 let id = if let Some(id) = &vsock_cfg.id { 2944 id.clone() 2945 } else { 2946 let id = self.next_device_name(VSOCK_DEVICE_NAME_PREFIX)?; 2947 vsock_cfg.id = Some(id.clone()); 2948 id 2949 }; 2950 2951 info!("Creating virtio-vsock device: {:?}", vsock_cfg); 2952 2953 let socket_path = vsock_cfg 2954 .socket 2955 .to_str() 2956 .ok_or(DeviceManagerError::CreateVsockConvertPath)?; 2957 let backend = 2958 virtio_devices::vsock::VsockUnixBackend::new(vsock_cfg.cid, socket_path.to_string()) 2959 .map_err(DeviceManagerError::CreateVsockBackend)?; 2960 2961 let vsock_device = Arc::new(Mutex::new( 2962 virtio_devices::Vsock::new( 2963 id.clone(), 2964 vsock_cfg.cid, 2965 vsock_cfg.socket.clone(), 2966 backend, 2967 self.force_iommu | vsock_cfg.iommu, 2968 self.seccomp_action.clone(), 2969 self.exit_evt 2970 .try_clone() 2971 .map_err(DeviceManagerError::EventFd)?, 2972 versioned_state_from_id(self.snapshot.as_ref(), id.as_str()) 2973 .map_err(DeviceManagerError::RestoreGetState)?, 2974 ) 2975 .map_err(DeviceManagerError::CreateVirtioVsock)?, 2976 )); 2977 2978 // Fill the device tree with a new node. In case of restore, we 2979 // know there is nothing to do, so we can simply override the 2980 // existing entry. 2981 self.device_tree 2982 .lock() 2983 .unwrap() 2984 .insert(id.clone(), device_node!(id, vsock_device)); 2985 2986 Ok(MetaVirtioDevice { 2987 virtio_device: Arc::clone(&vsock_device) 2988 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2989 iommu: vsock_cfg.iommu, 2990 id, 2991 pci_segment: vsock_cfg.pci_segment, 2992 dma_handler: None, 2993 }) 2994 } 2995 2996 fn make_virtio_vsock_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2997 let mut devices = Vec::new(); 2998 2999 let mut vsock = self.config.lock().unwrap().vsock.clone(); 3000 if let Some(ref mut vsock_cfg) = &mut vsock { 3001 devices.push(self.make_virtio_vsock_device(vsock_cfg)?); 3002 } 3003 self.config.lock().unwrap().vsock = vsock; 3004 3005 Ok(devices) 3006 } 3007 3008 fn make_virtio_mem_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 3009 let mut devices = Vec::new(); 3010 3011 let mm = self.memory_manager.clone(); 3012 let mut mm = mm.lock().unwrap(); 3013 for (memory_zone_id, memory_zone) in mm.memory_zones_mut().iter_mut() { 3014 if let Some(virtio_mem_zone) = memory_zone.virtio_mem_zone_mut() { 3015 info!("Creating virtio-mem device: id = {}", memory_zone_id); 3016 3017 let node_id = numa_node_id_from_memory_zone_id(&self.numa_nodes, memory_zone_id) 3018 .map(|i| i as u16); 3019 3020 let virtio_mem_device = Arc::new(Mutex::new( 3021 virtio_devices::Mem::new( 3022 memory_zone_id.clone(), 3023 virtio_mem_zone.region(), 3024 self.seccomp_action.clone(), 3025 node_id, 3026 virtio_mem_zone.hotplugged_size(), 3027 virtio_mem_zone.hugepages(), 3028 self.exit_evt 3029 .try_clone() 3030 .map_err(DeviceManagerError::EventFd)?, 3031 virtio_mem_zone.blocks_state().clone(), 3032 versioned_state_from_id(self.snapshot.as_ref(), memory_zone_id.as_str()) 3033 .map_err(DeviceManagerError::RestoreGetState)?, 3034 ) 3035 .map_err(DeviceManagerError::CreateVirtioMem)?, 3036 )); 3037 3038 // Update the virtio-mem zone so that it has a handle onto the 3039 // virtio-mem device, which will be used for triggering a resize 3040 // if needed. 3041 virtio_mem_zone.set_virtio_device(Arc::clone(&virtio_mem_device)); 3042 3043 self.virtio_mem_devices.push(Arc::clone(&virtio_mem_device)); 3044 3045 devices.push(MetaVirtioDevice { 3046 virtio_device: Arc::clone(&virtio_mem_device) 3047 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 3048 iommu: false, 3049 id: memory_zone_id.clone(), 3050 pci_segment: 0, 3051 dma_handler: None, 3052 }); 3053 3054 // Fill the device tree with a new node. In case of restore, we 3055 // know there is nothing to do, so we can simply override the 3056 // existing entry. 3057 self.device_tree.lock().unwrap().insert( 3058 memory_zone_id.clone(), 3059 device_node!(memory_zone_id, virtio_mem_device), 3060 ); 3061 } 3062 } 3063 3064 Ok(devices) 3065 } 3066 3067 fn make_virtio_balloon_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 3068 let mut devices = Vec::new(); 3069 3070 if let Some(balloon_config) = &self.config.lock().unwrap().balloon { 3071 let id = String::from(BALLOON_DEVICE_NAME); 3072 info!("Creating virtio-balloon device: id = {}", id); 3073 3074 let virtio_balloon_device = Arc::new(Mutex::new( 3075 virtio_devices::Balloon::new( 3076 id.clone(), 3077 balloon_config.size, 3078 balloon_config.deflate_on_oom, 3079 balloon_config.free_page_reporting, 3080 self.seccomp_action.clone(), 3081 self.exit_evt 3082 .try_clone() 3083 .map_err(DeviceManagerError::EventFd)?, 3084 versioned_state_from_id(self.snapshot.as_ref(), id.as_str()) 3085 .map_err(DeviceManagerError::RestoreGetState)?, 3086 ) 3087 .map_err(DeviceManagerError::CreateVirtioBalloon)?, 3088 )); 3089 3090 self.balloon = Some(virtio_balloon_device.clone()); 3091 3092 devices.push(MetaVirtioDevice { 3093 virtio_device: Arc::clone(&virtio_balloon_device) 3094 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 3095 iommu: false, 3096 id: id.clone(), 3097 pci_segment: 0, 3098 dma_handler: None, 3099 }); 3100 3101 self.device_tree 3102 .lock() 3103 .unwrap() 3104 .insert(id.clone(), device_node!(id, virtio_balloon_device)); 3105 } 3106 3107 Ok(devices) 3108 } 3109 3110 fn make_virtio_watchdog_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 3111 let mut devices = Vec::new(); 3112 3113 if !self.config.lock().unwrap().watchdog { 3114 return Ok(devices); 3115 } 3116 3117 let id = String::from(WATCHDOG_DEVICE_NAME); 3118 info!("Creating virtio-watchdog device: id = {}", id); 3119 3120 let virtio_watchdog_device = Arc::new(Mutex::new( 3121 virtio_devices::Watchdog::new( 3122 id.clone(), 3123 self.reset_evt.try_clone().unwrap(), 3124 self.seccomp_action.clone(), 3125 self.exit_evt 3126 .try_clone() 3127 .map_err(DeviceManagerError::EventFd)?, 3128 versioned_state_from_id(self.snapshot.as_ref(), id.as_str()) 3129 .map_err(DeviceManagerError::RestoreGetState)?, 3130 ) 3131 .map_err(DeviceManagerError::CreateVirtioWatchdog)?, 3132 )); 3133 devices.push(MetaVirtioDevice { 3134 virtio_device: Arc::clone(&virtio_watchdog_device) 3135 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 3136 iommu: false, 3137 id: id.clone(), 3138 pci_segment: 0, 3139 dma_handler: None, 3140 }); 3141 3142 self.device_tree 3143 .lock() 3144 .unwrap() 3145 .insert(id.clone(), device_node!(id, virtio_watchdog_device)); 3146 3147 Ok(devices) 3148 } 3149 3150 fn make_vdpa_device( 3151 &mut self, 3152 vdpa_cfg: &mut VdpaConfig, 3153 ) -> DeviceManagerResult<MetaVirtioDevice> { 3154 let id = if let Some(id) = &vdpa_cfg.id { 3155 id.clone() 3156 } else { 3157 let id = self.next_device_name(VDPA_DEVICE_NAME_PREFIX)?; 3158 vdpa_cfg.id = Some(id.clone()); 3159 id 3160 }; 3161 3162 info!("Creating vDPA device: {:?}", vdpa_cfg); 3163 3164 let device_path = vdpa_cfg 3165 .path 3166 .to_str() 3167 .ok_or(DeviceManagerError::CreateVdpaConvertPath)?; 3168 3169 let vdpa_device = Arc::new(Mutex::new( 3170 virtio_devices::Vdpa::new( 3171 id.clone(), 3172 device_path, 3173 self.memory_manager.lock().unwrap().guest_memory(), 3174 vdpa_cfg.num_queues as u16, 3175 versioned_state_from_id(self.snapshot.as_ref(), id.as_str()) 3176 .map_err(DeviceManagerError::RestoreGetState)?, 3177 ) 3178 .map_err(DeviceManagerError::CreateVdpa)?, 3179 )); 3180 3181 // Create the DMA handler that is required by the vDPA device 3182 let vdpa_mapping = Arc::new(VdpaDmaMapping::new( 3183 Arc::clone(&vdpa_device), 3184 Arc::new(self.memory_manager.lock().unwrap().guest_memory()), 3185 )); 3186 3187 self.device_tree 3188 .lock() 3189 .unwrap() 3190 .insert(id.clone(), device_node!(id, vdpa_device)); 3191 3192 Ok(MetaVirtioDevice { 3193 virtio_device: vdpa_device as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 3194 iommu: vdpa_cfg.iommu, 3195 id, 3196 pci_segment: vdpa_cfg.pci_segment, 3197 dma_handler: Some(vdpa_mapping), 3198 }) 3199 } 3200 3201 fn make_vdpa_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 3202 let mut devices = Vec::new(); 3203 // Add vdpa if required 3204 let mut vdpa_devices = self.config.lock().unwrap().vdpa.clone(); 3205 if let Some(vdpa_list_cfg) = &mut vdpa_devices { 3206 for vdpa_cfg in vdpa_list_cfg.iter_mut() { 3207 devices.push(self.make_vdpa_device(vdpa_cfg)?); 3208 } 3209 } 3210 self.config.lock().unwrap().vdpa = vdpa_devices; 3211 3212 Ok(devices) 3213 } 3214 3215 fn next_device_name(&mut self, prefix: &str) -> DeviceManagerResult<String> { 3216 let start_id = self.device_id_cnt; 3217 loop { 3218 // Generate the temporary name. 3219 let name = format!("{}{}", prefix, self.device_id_cnt); 3220 // Increment the counter. 3221 self.device_id_cnt += Wrapping(1); 3222 // Check if the name is already in use. 3223 if !self.boot_id_list.contains(&name) 3224 && !self.device_tree.lock().unwrap().contains_key(&name) 3225 { 3226 return Ok(name); 3227 } 3228 3229 if self.device_id_cnt == start_id { 3230 // We went through a full loop and there's nothing else we can 3231 // do. 3232 break; 3233 } 3234 } 3235 Err(DeviceManagerError::NoAvailableDeviceName) 3236 } 3237 3238 fn add_passthrough_device( 3239 &mut self, 3240 device_cfg: &mut DeviceConfig, 3241 ) -> DeviceManagerResult<(PciBdf, String)> { 3242 // If the passthrough device has not been created yet, it is created 3243 // here and stored in the DeviceManager structure for future needs. 3244 if self.passthrough_device.is_none() { 3245 self.passthrough_device = Some( 3246 self.address_manager 3247 .vm 3248 .create_passthrough_device() 3249 .map_err(|e| DeviceManagerError::CreatePassthroughDevice(e.into()))?, 3250 ); 3251 } 3252 3253 self.add_vfio_device(device_cfg) 3254 } 3255 3256 fn create_vfio_container(&self) -> DeviceManagerResult<Arc<VfioContainer>> { 3257 let passthrough_device = self 3258 .passthrough_device 3259 .as_ref() 3260 .ok_or(DeviceManagerError::NoDevicePassthroughSupport)?; 3261 3262 let dup = passthrough_device 3263 .try_clone() 3264 .map_err(DeviceManagerError::VfioCreate)?; 3265 3266 Ok(Arc::new( 3267 VfioContainer::new(Some(Arc::new(dup))).map_err(DeviceManagerError::VfioCreate)?, 3268 )) 3269 } 3270 3271 fn add_vfio_device( 3272 &mut self, 3273 device_cfg: &mut DeviceConfig, 3274 ) -> DeviceManagerResult<(PciBdf, String)> { 3275 let vfio_name = if let Some(id) = &device_cfg.id { 3276 id.clone() 3277 } else { 3278 let id = self.next_device_name(VFIO_DEVICE_NAME_PREFIX)?; 3279 device_cfg.id = Some(id.clone()); 3280 id 3281 }; 3282 3283 let (pci_segment_id, pci_device_bdf, resources) = 3284 self.pci_resources(&vfio_name, device_cfg.pci_segment)?; 3285 3286 let mut needs_dma_mapping = false; 3287 3288 // Here we create a new VFIO container for two reasons. Either this is 3289 // the first VFIO device, meaning we need a new VFIO container, which 3290 // will be shared with other VFIO devices. Or the new VFIO device is 3291 // attached to a vIOMMU, meaning we must create a dedicated VFIO 3292 // container. In the vIOMMU use case, we can't let all devices under 3293 // the same VFIO container since we couldn't map/unmap memory for each 3294 // device. That's simply because the map/unmap operations happen at the 3295 // VFIO container level. 3296 let vfio_container = if device_cfg.iommu { 3297 let vfio_container = self.create_vfio_container()?; 3298 3299 let vfio_mapping = Arc::new(VfioDmaMapping::new( 3300 Arc::clone(&vfio_container), 3301 Arc::new(self.memory_manager.lock().unwrap().guest_memory()), 3302 )); 3303 3304 if let Some(iommu) = &self.iommu_device { 3305 iommu 3306 .lock() 3307 .unwrap() 3308 .add_external_mapping(pci_device_bdf.into(), vfio_mapping); 3309 } else { 3310 return Err(DeviceManagerError::MissingVirtualIommu); 3311 } 3312 3313 vfio_container 3314 } else if let Some(vfio_container) = &self.vfio_container { 3315 Arc::clone(vfio_container) 3316 } else { 3317 let vfio_container = self.create_vfio_container()?; 3318 needs_dma_mapping = true; 3319 self.vfio_container = Some(Arc::clone(&vfio_container)); 3320 3321 vfio_container 3322 }; 3323 3324 let vfio_device = VfioDevice::new(&device_cfg.path, Arc::clone(&vfio_container)) 3325 .map_err(DeviceManagerError::VfioCreate)?; 3326 3327 if needs_dma_mapping { 3328 // Register DMA mapping in IOMMU. 3329 // Do not register virtio-mem regions, as they are handled directly by 3330 // virtio-mem device itself. 3331 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() { 3332 for region in zone.regions() { 3333 vfio_container 3334 .vfio_dma_map( 3335 region.start_addr().raw_value(), 3336 region.len(), 3337 region.as_ptr() as u64, 3338 ) 3339 .map_err(DeviceManagerError::VfioDmaMap)?; 3340 } 3341 } 3342 3343 let vfio_mapping = Arc::new(VfioDmaMapping::new( 3344 Arc::clone(&vfio_container), 3345 Arc::new(self.memory_manager.lock().unwrap().guest_memory()), 3346 )); 3347 3348 for virtio_mem_device in self.virtio_mem_devices.iter() { 3349 virtio_mem_device 3350 .lock() 3351 .unwrap() 3352 .add_dma_mapping_handler( 3353 VirtioMemMappingSource::Container, 3354 vfio_mapping.clone(), 3355 ) 3356 .map_err(DeviceManagerError::AddDmaMappingHandlerVirtioMem)?; 3357 } 3358 } 3359 3360 let legacy_interrupt_group = 3361 if let Some(legacy_interrupt_manager) = &self.legacy_interrupt_manager { 3362 Some( 3363 legacy_interrupt_manager 3364 .create_group(LegacyIrqGroupConfig { 3365 irq: self.pci_segments[pci_segment_id as usize].pci_irq_slots 3366 [pci_device_bdf.device() as usize] 3367 as InterruptIndex, 3368 }) 3369 .map_err(DeviceManagerError::CreateInterruptGroup)?, 3370 ) 3371 } else { 3372 None 3373 }; 3374 3375 let memory_manager = self.memory_manager.clone(); 3376 3377 let vfio_pci_device = VfioPciDevice::new( 3378 vfio_name.clone(), 3379 &self.address_manager.vm, 3380 vfio_device, 3381 vfio_container, 3382 self.msi_interrupt_manager.clone(), 3383 legacy_interrupt_group, 3384 device_cfg.iommu, 3385 pci_device_bdf, 3386 Arc::new(move || memory_manager.lock().unwrap().allocate_memory_slot()), 3387 vm_migration::snapshot_from_id(self.snapshot.as_ref(), vfio_name.as_str()), 3388 ) 3389 .map_err(DeviceManagerError::VfioPciCreate)?; 3390 3391 let vfio_pci_device = Arc::new(Mutex::new(vfio_pci_device)); 3392 3393 let new_resources = self.add_pci_device( 3394 vfio_pci_device.clone(), 3395 vfio_pci_device.clone(), 3396 pci_segment_id, 3397 pci_device_bdf, 3398 resources, 3399 )?; 3400 3401 vfio_pci_device 3402 .lock() 3403 .unwrap() 3404 .map_mmio_regions() 3405 .map_err(DeviceManagerError::VfioMapRegion)?; 3406 3407 let mut node = device_node!(vfio_name, vfio_pci_device); 3408 3409 // Update the device tree with correct resource information. 3410 node.resources = new_resources; 3411 node.pci_bdf = Some(pci_device_bdf); 3412 node.pci_device_handle = Some(PciDeviceHandle::Vfio(vfio_pci_device)); 3413 3414 self.device_tree 3415 .lock() 3416 .unwrap() 3417 .insert(vfio_name.clone(), node); 3418 3419 Ok((pci_device_bdf, vfio_name)) 3420 } 3421 3422 fn add_pci_device( 3423 &mut self, 3424 bus_device: Arc<Mutex<dyn BusDevice>>, 3425 pci_device: Arc<Mutex<dyn PciDevice>>, 3426 segment_id: u16, 3427 bdf: PciBdf, 3428 resources: Option<Vec<Resource>>, 3429 ) -> DeviceManagerResult<Vec<Resource>> { 3430 let bars = pci_device 3431 .lock() 3432 .unwrap() 3433 .allocate_bars( 3434 &self.address_manager.allocator, 3435 &mut self.pci_segments[segment_id as usize] 3436 .mem32_allocator 3437 .lock() 3438 .unwrap(), 3439 &mut self.pci_segments[segment_id as usize] 3440 .mem64_allocator 3441 .lock() 3442 .unwrap(), 3443 resources, 3444 ) 3445 .map_err(DeviceManagerError::AllocateBars)?; 3446 3447 let mut pci_bus = self.pci_segments[segment_id as usize] 3448 .pci_bus 3449 .lock() 3450 .unwrap(); 3451 3452 pci_bus 3453 .add_device(bdf.device() as u32, pci_device) 3454 .map_err(DeviceManagerError::AddPciDevice)?; 3455 3456 self.bus_devices.push(Arc::clone(&bus_device)); 3457 3458 pci_bus 3459 .register_mapping( 3460 bus_device, 3461 #[cfg(target_arch = "x86_64")] 3462 self.address_manager.io_bus.as_ref(), 3463 self.address_manager.mmio_bus.as_ref(), 3464 bars.clone(), 3465 ) 3466 .map_err(DeviceManagerError::AddPciDevice)?; 3467 3468 let mut new_resources = Vec::new(); 3469 for bar in bars { 3470 new_resources.push(Resource::PciBar { 3471 index: bar.idx(), 3472 base: bar.addr(), 3473 size: bar.size(), 3474 type_: bar.region_type().into(), 3475 prefetchable: bar.prefetchable().into(), 3476 }); 3477 } 3478 3479 Ok(new_resources) 3480 } 3481 3482 fn add_vfio_devices(&mut self) -> DeviceManagerResult<Vec<PciBdf>> { 3483 let mut iommu_attached_device_ids = Vec::new(); 3484 let mut devices = self.config.lock().unwrap().devices.clone(); 3485 3486 if let Some(device_list_cfg) = &mut devices { 3487 for device_cfg in device_list_cfg.iter_mut() { 3488 let (device_id, _) = self.add_passthrough_device(device_cfg)?; 3489 if device_cfg.iommu && self.iommu_device.is_some() { 3490 iommu_attached_device_ids.push(device_id); 3491 } 3492 } 3493 } 3494 3495 // Update the list of devices 3496 self.config.lock().unwrap().devices = devices; 3497 3498 Ok(iommu_attached_device_ids) 3499 } 3500 3501 fn add_vfio_user_device( 3502 &mut self, 3503 device_cfg: &mut UserDeviceConfig, 3504 ) -> DeviceManagerResult<(PciBdf, String)> { 3505 let vfio_user_name = if let Some(id) = &device_cfg.id { 3506 id.clone() 3507 } else { 3508 let id = self.next_device_name(VFIO_USER_DEVICE_NAME_PREFIX)?; 3509 device_cfg.id = Some(id.clone()); 3510 id 3511 }; 3512 3513 let (pci_segment_id, pci_device_bdf, resources) = 3514 self.pci_resources(&vfio_user_name, device_cfg.pci_segment)?; 3515 3516 let legacy_interrupt_group = 3517 if let Some(legacy_interrupt_manager) = &self.legacy_interrupt_manager { 3518 Some( 3519 legacy_interrupt_manager 3520 .create_group(LegacyIrqGroupConfig { 3521 irq: self.pci_segments[pci_segment_id as usize].pci_irq_slots 3522 [pci_device_bdf.device() as usize] 3523 as InterruptIndex, 3524 }) 3525 .map_err(DeviceManagerError::CreateInterruptGroup)?, 3526 ) 3527 } else { 3528 None 3529 }; 3530 3531 let client = Arc::new(Mutex::new( 3532 vfio_user::Client::new(&device_cfg.socket) 3533 .map_err(DeviceManagerError::VfioUserCreateClient)?, 3534 )); 3535 3536 let memory_manager = self.memory_manager.clone(); 3537 3538 let mut vfio_user_pci_device = VfioUserPciDevice::new( 3539 vfio_user_name.clone(), 3540 &self.address_manager.vm, 3541 client.clone(), 3542 self.msi_interrupt_manager.clone(), 3543 legacy_interrupt_group, 3544 pci_device_bdf, 3545 Arc::new(move || memory_manager.lock().unwrap().allocate_memory_slot()), 3546 vm_migration::snapshot_from_id(self.snapshot.as_ref(), vfio_user_name.as_str()), 3547 ) 3548 .map_err(DeviceManagerError::VfioUserCreate)?; 3549 3550 let memory = self.memory_manager.lock().unwrap().guest_memory(); 3551 let vfio_user_mapping = Arc::new(VfioUserDmaMapping::new(client, Arc::new(memory))); 3552 for virtio_mem_device in self.virtio_mem_devices.iter() { 3553 virtio_mem_device 3554 .lock() 3555 .unwrap() 3556 .add_dma_mapping_handler( 3557 VirtioMemMappingSource::Device(pci_device_bdf.into()), 3558 vfio_user_mapping.clone(), 3559 ) 3560 .map_err(DeviceManagerError::AddDmaMappingHandlerVirtioMem)?; 3561 } 3562 3563 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() { 3564 for region in zone.regions() { 3565 vfio_user_pci_device 3566 .dma_map(region) 3567 .map_err(DeviceManagerError::VfioUserDmaMap)?; 3568 } 3569 } 3570 3571 let vfio_user_pci_device = Arc::new(Mutex::new(vfio_user_pci_device)); 3572 3573 let new_resources = self.add_pci_device( 3574 vfio_user_pci_device.clone(), 3575 vfio_user_pci_device.clone(), 3576 pci_segment_id, 3577 pci_device_bdf, 3578 resources, 3579 )?; 3580 3581 // Note it is required to call 'add_pci_device()' in advance to have the list of 3582 // mmio regions provisioned correctly 3583 vfio_user_pci_device 3584 .lock() 3585 .unwrap() 3586 .map_mmio_regions() 3587 .map_err(DeviceManagerError::VfioUserMapRegion)?; 3588 3589 let mut node = device_node!(vfio_user_name, vfio_user_pci_device); 3590 3591 // Update the device tree with correct resource information. 3592 node.resources = new_resources; 3593 node.pci_bdf = Some(pci_device_bdf); 3594 node.pci_device_handle = Some(PciDeviceHandle::VfioUser(vfio_user_pci_device)); 3595 3596 self.device_tree 3597 .lock() 3598 .unwrap() 3599 .insert(vfio_user_name.clone(), node); 3600 3601 Ok((pci_device_bdf, vfio_user_name)) 3602 } 3603 3604 fn add_user_devices(&mut self) -> DeviceManagerResult<Vec<PciBdf>> { 3605 let mut user_devices = self.config.lock().unwrap().user_devices.clone(); 3606 3607 if let Some(device_list_cfg) = &mut user_devices { 3608 for device_cfg in device_list_cfg.iter_mut() { 3609 let (_device_id, _id) = self.add_vfio_user_device(device_cfg)?; 3610 } 3611 } 3612 3613 // Update the list of devices 3614 self.config.lock().unwrap().user_devices = user_devices; 3615 3616 Ok(vec![]) 3617 } 3618 3619 fn add_virtio_pci_device( 3620 &mut self, 3621 virtio_device: Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 3622 iommu_mapping: &Option<Arc<IommuMapping>>, 3623 virtio_device_id: String, 3624 pci_segment_id: u16, 3625 dma_handler: Option<Arc<dyn ExternalDmaMapping>>, 3626 ) -> DeviceManagerResult<PciBdf> { 3627 let id = format!("{VIRTIO_PCI_DEVICE_NAME_PREFIX}-{virtio_device_id}"); 3628 3629 // Add the new virtio-pci node to the device tree. 3630 let mut node = device_node!(id); 3631 node.children = vec![virtio_device_id.clone()]; 3632 3633 let (pci_segment_id, pci_device_bdf, resources) = 3634 self.pci_resources(&id, pci_segment_id)?; 3635 3636 // Update the existing virtio node by setting the parent. 3637 if let Some(node) = self.device_tree.lock().unwrap().get_mut(&virtio_device_id) { 3638 node.parent = Some(id.clone()); 3639 } else { 3640 return Err(DeviceManagerError::MissingNode); 3641 } 3642 3643 // Allows support for one MSI-X vector per queue. It also adds 1 3644 // as we need to take into account the dedicated vector to notify 3645 // about a virtio config change. 3646 let msix_num = (virtio_device.lock().unwrap().queue_max_sizes().len() + 1) as u16; 3647 3648 // Create the AccessPlatform trait from the implementation IommuMapping. 3649 // This will provide address translation for any virtio device sitting 3650 // behind a vIOMMU. 3651 let access_platform: Option<Arc<dyn AccessPlatform>> = if let Some(mapping) = iommu_mapping 3652 { 3653 Some(Arc::new(AccessPlatformMapping::new( 3654 pci_device_bdf.into(), 3655 mapping.clone(), 3656 ))) 3657 } else { 3658 None 3659 }; 3660 3661 let memory = self.memory_manager.lock().unwrap().guest_memory(); 3662 3663 // Map DMA ranges if a DMA handler is available and if the device is 3664 // not attached to a virtual IOMMU. 3665 if let Some(dma_handler) = &dma_handler { 3666 if iommu_mapping.is_some() { 3667 if let Some(iommu) = &self.iommu_device { 3668 iommu 3669 .lock() 3670 .unwrap() 3671 .add_external_mapping(pci_device_bdf.into(), dma_handler.clone()); 3672 } else { 3673 return Err(DeviceManagerError::MissingVirtualIommu); 3674 } 3675 } else { 3676 // Let every virtio-mem device handle the DMA map/unmap through the 3677 // DMA handler provided. 3678 for virtio_mem_device in self.virtio_mem_devices.iter() { 3679 virtio_mem_device 3680 .lock() 3681 .unwrap() 3682 .add_dma_mapping_handler( 3683 VirtioMemMappingSource::Device(pci_device_bdf.into()), 3684 dma_handler.clone(), 3685 ) 3686 .map_err(DeviceManagerError::AddDmaMappingHandlerVirtioMem)?; 3687 } 3688 3689 // Do not register virtio-mem regions, as they are handled directly by 3690 // virtio-mem devices. 3691 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() { 3692 for region in zone.regions() { 3693 let gpa = region.start_addr().0; 3694 let size = region.len(); 3695 dma_handler 3696 .map(gpa, gpa, size) 3697 .map_err(DeviceManagerError::VirtioDmaMap)?; 3698 } 3699 } 3700 } 3701 } 3702 3703 let device_type = virtio_device.lock().unwrap().device_type(); 3704 let virtio_pci_device = Arc::new(Mutex::new( 3705 VirtioPciDevice::new( 3706 id.clone(), 3707 memory, 3708 virtio_device, 3709 msix_num, 3710 access_platform, 3711 &self.msi_interrupt_manager, 3712 pci_device_bdf.into(), 3713 self.activate_evt 3714 .try_clone() 3715 .map_err(DeviceManagerError::EventFd)?, 3716 // All device types *except* virtio block devices should be allocated a 64-bit bar 3717 // The block devices should be given a 32-bit BAR so that they are easily accessible 3718 // to firmware without requiring excessive identity mapping. 3719 // The exception being if not on the default PCI segment. 3720 pci_segment_id > 0 || device_type != VirtioDeviceType::Block as u32, 3721 dma_handler, 3722 self.pending_activations.clone(), 3723 vm_migration::snapshot_from_id(self.snapshot.as_ref(), id.as_str()), 3724 ) 3725 .map_err(DeviceManagerError::VirtioDevice)?, 3726 )); 3727 3728 let new_resources = self.add_pci_device( 3729 virtio_pci_device.clone(), 3730 virtio_pci_device.clone(), 3731 pci_segment_id, 3732 pci_device_bdf, 3733 resources, 3734 )?; 3735 3736 let bar_addr = virtio_pci_device.lock().unwrap().config_bar_addr(); 3737 for (event, addr) in virtio_pci_device.lock().unwrap().ioeventfds(bar_addr) { 3738 let io_addr = IoEventAddress::Mmio(addr); 3739 self.address_manager 3740 .vm 3741 .register_ioevent(event, &io_addr, None) 3742 .map_err(|e| DeviceManagerError::RegisterIoevent(e.into()))?; 3743 } 3744 3745 // Update the device tree with correct resource information. 3746 node.resources = new_resources; 3747 node.migratable = Some(Arc::clone(&virtio_pci_device) as Arc<Mutex<dyn Migratable>>); 3748 node.pci_bdf = Some(pci_device_bdf); 3749 node.pci_device_handle = Some(PciDeviceHandle::Virtio(virtio_pci_device)); 3750 self.device_tree.lock().unwrap().insert(id, node); 3751 3752 Ok(pci_device_bdf) 3753 } 3754 3755 fn add_pvpanic_device( 3756 &mut self, 3757 ) -> DeviceManagerResult<Option<Arc<Mutex<devices::PvPanicDevice>>>> { 3758 let id = String::from(PVPANIC_DEVICE_NAME); 3759 let pci_segment_id = 0x0_u16; 3760 3761 info!("Creating pvpanic device {}", id); 3762 3763 let (pci_segment_id, pci_device_bdf, resources) = 3764 self.pci_resources(&id, pci_segment_id)?; 3765 3766 let snapshot = snapshot_from_id(self.snapshot.as_ref(), id.as_str()); 3767 3768 let pvpanic_device = devices::PvPanicDevice::new(id.clone(), snapshot) 3769 .map_err(DeviceManagerError::PvPanicCreate)?; 3770 3771 let pvpanic_device = Arc::new(Mutex::new(pvpanic_device)); 3772 3773 let new_resources = self.add_pci_device( 3774 pvpanic_device.clone(), 3775 pvpanic_device.clone(), 3776 pci_segment_id, 3777 pci_device_bdf, 3778 resources, 3779 )?; 3780 3781 let mut node = device_node!(id, pvpanic_device); 3782 3783 node.resources = new_resources; 3784 node.pci_bdf = Some(pci_device_bdf); 3785 node.pci_device_handle = None; 3786 3787 self.device_tree.lock().unwrap().insert(id, node); 3788 3789 Ok(Some(pvpanic_device)) 3790 } 3791 3792 fn pci_resources( 3793 &self, 3794 id: &str, 3795 pci_segment_id: u16, 3796 ) -> DeviceManagerResult<(u16, PciBdf, Option<Vec<Resource>>)> { 3797 // Look for the id in the device tree. If it can be found, that means 3798 // the device is being restored, otherwise it's created from scratch. 3799 Ok( 3800 if let Some(node) = self.device_tree.lock().unwrap().get(id) { 3801 info!("Restoring virtio-pci {} resources", id); 3802 let pci_device_bdf: PciBdf = node 3803 .pci_bdf 3804 .ok_or(DeviceManagerError::MissingDeviceNodePciBdf)?; 3805 let pci_segment_id = pci_device_bdf.segment(); 3806 3807 self.pci_segments[pci_segment_id as usize] 3808 .pci_bus 3809 .lock() 3810 .unwrap() 3811 .get_device_id(pci_device_bdf.device() as usize) 3812 .map_err(DeviceManagerError::GetPciDeviceId)?; 3813 3814 (pci_segment_id, pci_device_bdf, Some(node.resources.clone())) 3815 } else { 3816 let pci_device_bdf = 3817 self.pci_segments[pci_segment_id as usize].next_device_bdf()?; 3818 3819 (pci_segment_id, pci_device_bdf, None) 3820 }, 3821 ) 3822 } 3823 3824 #[cfg(target_arch = "x86_64")] 3825 pub fn io_bus(&self) -> &Arc<Bus> { 3826 &self.address_manager.io_bus 3827 } 3828 3829 pub fn mmio_bus(&self) -> &Arc<Bus> { 3830 &self.address_manager.mmio_bus 3831 } 3832 3833 pub fn allocator(&self) -> &Arc<Mutex<SystemAllocator>> { 3834 &self.address_manager.allocator 3835 } 3836 3837 pub fn interrupt_controller(&self) -> Option<Arc<Mutex<dyn InterruptController>>> { 3838 self.interrupt_controller 3839 .as_ref() 3840 .map(|ic| ic.clone() as Arc<Mutex<dyn InterruptController>>) 3841 } 3842 3843 pub(crate) fn pci_segments(&self) -> &Vec<PciSegment> { 3844 &self.pci_segments 3845 } 3846 3847 pub fn console(&self) -> &Arc<Console> { 3848 &self.console 3849 } 3850 3851 #[cfg(target_arch = "aarch64")] 3852 pub fn cmdline_additions(&self) -> &[String] { 3853 self.cmdline_additions.as_slice() 3854 } 3855 3856 pub fn update_memory(&self, new_region: &Arc<GuestRegionMmap>) -> DeviceManagerResult<()> { 3857 for handle in self.virtio_devices.iter() { 3858 handle 3859 .virtio_device 3860 .lock() 3861 .unwrap() 3862 .add_memory_region(new_region) 3863 .map_err(DeviceManagerError::UpdateMemoryForVirtioDevice)?; 3864 3865 if let Some(dma_handler) = &handle.dma_handler { 3866 if !handle.iommu { 3867 let gpa = new_region.start_addr().0; 3868 let size = new_region.len(); 3869 dma_handler 3870 .map(gpa, gpa, size) 3871 .map_err(DeviceManagerError::VirtioDmaMap)?; 3872 } 3873 } 3874 } 3875 3876 // Take care of updating the memory for VFIO PCI devices. 3877 if let Some(vfio_container) = &self.vfio_container { 3878 vfio_container 3879 .vfio_dma_map( 3880 new_region.start_addr().raw_value(), 3881 new_region.len(), 3882 new_region.as_ptr() as u64, 3883 ) 3884 .map_err(DeviceManagerError::UpdateMemoryForVfioPciDevice)?; 3885 } 3886 3887 // Take care of updating the memory for vfio-user devices. 3888 { 3889 let device_tree = self.device_tree.lock().unwrap(); 3890 for pci_device_node in device_tree.pci_devices() { 3891 if let PciDeviceHandle::VfioUser(vfio_user_pci_device) = pci_device_node 3892 .pci_device_handle 3893 .as_ref() 3894 .ok_or(DeviceManagerError::MissingPciDevice)? 3895 { 3896 vfio_user_pci_device 3897 .lock() 3898 .unwrap() 3899 .dma_map(new_region) 3900 .map_err(DeviceManagerError::UpdateMemoryForVfioUserPciDevice)?; 3901 } 3902 } 3903 } 3904 3905 Ok(()) 3906 } 3907 3908 pub fn activate_virtio_devices(&self) -> DeviceManagerResult<()> { 3909 for mut activator in self.pending_activations.lock().unwrap().drain(..) { 3910 activator 3911 .activate() 3912 .map_err(DeviceManagerError::VirtioActivate)?; 3913 } 3914 Ok(()) 3915 } 3916 3917 pub fn notify_hotplug( 3918 &self, 3919 _notification_type: AcpiNotificationFlags, 3920 ) -> DeviceManagerResult<()> { 3921 return self 3922 .ged_notification_device 3923 .as_ref() 3924 .unwrap() 3925 .lock() 3926 .unwrap() 3927 .notify(_notification_type) 3928 .map_err(DeviceManagerError::HotPlugNotification); 3929 } 3930 3931 pub fn add_device( 3932 &mut self, 3933 device_cfg: &mut DeviceConfig, 3934 ) -> DeviceManagerResult<PciDeviceInfo> { 3935 self.validate_identifier(&device_cfg.id)?; 3936 3937 if device_cfg.iommu && !self.is_iommu_segment(device_cfg.pci_segment) { 3938 return Err(DeviceManagerError::InvalidIommuHotplug); 3939 } 3940 3941 let (bdf, device_name) = self.add_passthrough_device(device_cfg)?; 3942 3943 // Update the PCIU bitmap 3944 self.pci_segments[device_cfg.pci_segment as usize].pci_devices_up |= 1 << bdf.device(); 3945 3946 Ok(PciDeviceInfo { 3947 id: device_name, 3948 bdf, 3949 }) 3950 } 3951 3952 pub fn add_user_device( 3953 &mut self, 3954 device_cfg: &mut UserDeviceConfig, 3955 ) -> DeviceManagerResult<PciDeviceInfo> { 3956 self.validate_identifier(&device_cfg.id)?; 3957 3958 let (bdf, device_name) = self.add_vfio_user_device(device_cfg)?; 3959 3960 // Update the PCIU bitmap 3961 self.pci_segments[device_cfg.pci_segment as usize].pci_devices_up |= 1 << bdf.device(); 3962 3963 Ok(PciDeviceInfo { 3964 id: device_name, 3965 bdf, 3966 }) 3967 } 3968 3969 pub fn remove_device(&mut self, id: String) -> DeviceManagerResult<()> { 3970 // The node can be directly a PCI node in case the 'id' refers to a 3971 // VFIO device or a virtio-pci one. 3972 // In case the 'id' refers to a virtio device, we must find the PCI 3973 // node by looking at the parent. 3974 let device_tree = self.device_tree.lock().unwrap(); 3975 let node = device_tree 3976 .get(&id) 3977 .ok_or(DeviceManagerError::UnknownDeviceId(id))?; 3978 3979 let pci_device_node = if node.pci_bdf.is_some() && node.pci_device_handle.is_some() { 3980 node 3981 } else { 3982 let parent = node 3983 .parent 3984 .as_ref() 3985 .ok_or(DeviceManagerError::MissingNode)?; 3986 device_tree 3987 .get(parent) 3988 .ok_or(DeviceManagerError::MissingNode)? 3989 }; 3990 3991 let pci_device_bdf: PciBdf = pci_device_node 3992 .pci_bdf 3993 .ok_or(DeviceManagerError::MissingDeviceNodePciBdf)?; 3994 let pci_segment_id = pci_device_bdf.segment(); 3995 3996 let pci_device_handle = pci_device_node 3997 .pci_device_handle 3998 .as_ref() 3999 .ok_or(DeviceManagerError::MissingPciDevice)?; 4000 #[allow(irrefutable_let_patterns)] 4001 if let PciDeviceHandle::Virtio(virtio_pci_device) = pci_device_handle { 4002 let device_type = VirtioDeviceType::from( 4003 virtio_pci_device 4004 .lock() 4005 .unwrap() 4006 .virtio_device() 4007 .lock() 4008 .unwrap() 4009 .device_type(), 4010 ); 4011 match device_type { 4012 VirtioDeviceType::Net 4013 | VirtioDeviceType::Block 4014 | VirtioDeviceType::Pmem 4015 | VirtioDeviceType::Fs 4016 | VirtioDeviceType::Vsock => {} 4017 _ => return Err(DeviceManagerError::RemovalNotAllowed(device_type)), 4018 } 4019 } 4020 4021 // Update the PCID bitmap 4022 self.pci_segments[pci_segment_id as usize].pci_devices_down |= 1 << pci_device_bdf.device(); 4023 4024 Ok(()) 4025 } 4026 4027 pub fn eject_device(&mut self, pci_segment_id: u16, device_id: u8) -> DeviceManagerResult<()> { 4028 info!( 4029 "Ejecting device_id = {} on segment_id={}", 4030 device_id, pci_segment_id 4031 ); 4032 4033 // Convert the device ID into the corresponding b/d/f. 4034 let pci_device_bdf = PciBdf::new(pci_segment_id, 0, device_id, 0); 4035 4036 // Give the PCI device ID back to the PCI bus. 4037 self.pci_segments[pci_segment_id as usize] 4038 .pci_bus 4039 .lock() 4040 .unwrap() 4041 .put_device_id(device_id as usize) 4042 .map_err(DeviceManagerError::PutPciDeviceId)?; 4043 4044 // Remove the device from the device tree along with its children. 4045 let mut device_tree = self.device_tree.lock().unwrap(); 4046 let pci_device_node = device_tree 4047 .remove_node_by_pci_bdf(pci_device_bdf) 4048 .ok_or(DeviceManagerError::MissingPciDevice)?; 4049 4050 // For VFIO and vfio-user the PCI device id is the id. 4051 // For virtio we overwrite it later as we want the id of the 4052 // underlying device. 4053 let mut id = pci_device_node.id; 4054 let pci_device_handle = pci_device_node 4055 .pci_device_handle 4056 .ok_or(DeviceManagerError::MissingPciDevice)?; 4057 if matches!(pci_device_handle, PciDeviceHandle::Virtio(_)) { 4058 // The virtio-pci device has a single child 4059 if !pci_device_node.children.is_empty() { 4060 assert_eq!(pci_device_node.children.len(), 1); 4061 let child_id = &pci_device_node.children[0]; 4062 id = child_id.clone(); 4063 } 4064 } 4065 for child in pci_device_node.children.iter() { 4066 device_tree.remove(child); 4067 } 4068 4069 let mut iommu_attached = false; 4070 if let Some((_, iommu_attached_devices)) = &self.iommu_attached_devices { 4071 if iommu_attached_devices.contains(&pci_device_bdf) { 4072 iommu_attached = true; 4073 } 4074 } 4075 4076 let (pci_device, bus_device, virtio_device, remove_dma_handler) = match pci_device_handle { 4077 // No need to remove any virtio-mem mapping here as the container outlives all devices 4078 PciDeviceHandle::Vfio(vfio_pci_device) => ( 4079 Arc::clone(&vfio_pci_device) as Arc<Mutex<dyn PciDevice>>, 4080 Arc::clone(&vfio_pci_device) as Arc<Mutex<dyn BusDevice>>, 4081 None as Option<Arc<Mutex<dyn virtio_devices::VirtioDevice>>>, 4082 false, 4083 ), 4084 PciDeviceHandle::Virtio(virtio_pci_device) => { 4085 let dev = virtio_pci_device.lock().unwrap(); 4086 let bar_addr = dev.config_bar_addr(); 4087 for (event, addr) in dev.ioeventfds(bar_addr) { 4088 let io_addr = IoEventAddress::Mmio(addr); 4089 self.address_manager 4090 .vm 4091 .unregister_ioevent(event, &io_addr) 4092 .map_err(|e| DeviceManagerError::UnRegisterIoevent(e.into()))?; 4093 } 4094 4095 if let Some(dma_handler) = dev.dma_handler() { 4096 if !iommu_attached { 4097 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() { 4098 for region in zone.regions() { 4099 let iova = region.start_addr().0; 4100 let size = region.len(); 4101 dma_handler 4102 .unmap(iova, size) 4103 .map_err(DeviceManagerError::VirtioDmaUnmap)?; 4104 } 4105 } 4106 } 4107 } 4108 4109 ( 4110 Arc::clone(&virtio_pci_device) as Arc<Mutex<dyn PciDevice>>, 4111 Arc::clone(&virtio_pci_device) as Arc<Mutex<dyn BusDevice>>, 4112 Some(dev.virtio_device()), 4113 dev.dma_handler().is_some() && !iommu_attached, 4114 ) 4115 } 4116 PciDeviceHandle::VfioUser(vfio_user_pci_device) => { 4117 let mut dev = vfio_user_pci_device.lock().unwrap(); 4118 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() { 4119 for region in zone.regions() { 4120 dev.dma_unmap(region) 4121 .map_err(DeviceManagerError::VfioUserDmaUnmap)?; 4122 } 4123 } 4124 4125 ( 4126 Arc::clone(&vfio_user_pci_device) as Arc<Mutex<dyn PciDevice>>, 4127 Arc::clone(&vfio_user_pci_device) as Arc<Mutex<dyn BusDevice>>, 4128 None as Option<Arc<Mutex<dyn virtio_devices::VirtioDevice>>>, 4129 true, 4130 ) 4131 } 4132 }; 4133 4134 if remove_dma_handler { 4135 for virtio_mem_device in self.virtio_mem_devices.iter() { 4136 virtio_mem_device 4137 .lock() 4138 .unwrap() 4139 .remove_dma_mapping_handler(VirtioMemMappingSource::Device( 4140 pci_device_bdf.into(), 4141 )) 4142 .map_err(DeviceManagerError::RemoveDmaMappingHandlerVirtioMem)?; 4143 } 4144 } 4145 4146 // Free the allocated BARs 4147 pci_device 4148 .lock() 4149 .unwrap() 4150 .free_bars( 4151 &mut self.address_manager.allocator.lock().unwrap(), 4152 &mut self.pci_segments[pci_segment_id as usize] 4153 .mem32_allocator 4154 .lock() 4155 .unwrap(), 4156 &mut self.pci_segments[pci_segment_id as usize] 4157 .mem64_allocator 4158 .lock() 4159 .unwrap(), 4160 ) 4161 .map_err(DeviceManagerError::FreePciBars)?; 4162 4163 // Remove the device from the PCI bus 4164 self.pci_segments[pci_segment_id as usize] 4165 .pci_bus 4166 .lock() 4167 .unwrap() 4168 .remove_by_device(&pci_device) 4169 .map_err(DeviceManagerError::RemoveDeviceFromPciBus)?; 4170 4171 #[cfg(target_arch = "x86_64")] 4172 // Remove the device from the IO bus 4173 self.io_bus() 4174 .remove_by_device(&bus_device) 4175 .map_err(DeviceManagerError::RemoveDeviceFromIoBus)?; 4176 4177 // Remove the device from the MMIO bus 4178 self.mmio_bus() 4179 .remove_by_device(&bus_device) 4180 .map_err(DeviceManagerError::RemoveDeviceFromMmioBus)?; 4181 4182 // Remove the device from the list of BusDevice held by the 4183 // DeviceManager. 4184 self.bus_devices 4185 .retain(|dev| !Arc::ptr_eq(dev, &bus_device)); 4186 4187 // Shutdown and remove the underlying virtio-device if present 4188 if let Some(virtio_device) = virtio_device { 4189 for mapping in virtio_device.lock().unwrap().userspace_mappings() { 4190 self.memory_manager 4191 .lock() 4192 .unwrap() 4193 .remove_userspace_mapping( 4194 mapping.addr.raw_value(), 4195 mapping.len, 4196 mapping.host_addr, 4197 mapping.mergeable, 4198 mapping.mem_slot, 4199 ) 4200 .map_err(DeviceManagerError::MemoryManager)?; 4201 } 4202 4203 virtio_device.lock().unwrap().shutdown(); 4204 4205 self.virtio_devices 4206 .retain(|handler| !Arc::ptr_eq(&handler.virtio_device, &virtio_device)); 4207 } 4208 4209 event!( 4210 "vm", 4211 "device-removed", 4212 "id", 4213 &id, 4214 "bdf", 4215 pci_device_bdf.to_string() 4216 ); 4217 4218 // At this point, the device has been removed from all the list and 4219 // buses where it was stored. At the end of this function, after 4220 // any_device, bus_device and pci_device are released, the actual 4221 // device will be dropped. 4222 Ok(()) 4223 } 4224 4225 fn hotplug_virtio_pci_device( 4226 &mut self, 4227 handle: MetaVirtioDevice, 4228 ) -> DeviceManagerResult<PciDeviceInfo> { 4229 // Add the virtio device to the device manager list. This is important 4230 // as the list is used to notify virtio devices about memory updates 4231 // for instance. 4232 self.virtio_devices.push(handle.clone()); 4233 4234 let mapping: Option<Arc<IommuMapping>> = if handle.iommu { 4235 self.iommu_mapping.clone() 4236 } else { 4237 None 4238 }; 4239 4240 let bdf = self.add_virtio_pci_device( 4241 handle.virtio_device, 4242 &mapping, 4243 handle.id.clone(), 4244 handle.pci_segment, 4245 handle.dma_handler, 4246 )?; 4247 4248 // Update the PCIU bitmap 4249 self.pci_segments[handle.pci_segment as usize].pci_devices_up |= 1 << bdf.device(); 4250 4251 Ok(PciDeviceInfo { id: handle.id, bdf }) 4252 } 4253 4254 fn is_iommu_segment(&self, pci_segment_id: u16) -> bool { 4255 self.config 4256 .lock() 4257 .as_ref() 4258 .unwrap() 4259 .platform 4260 .as_ref() 4261 .map(|pc| { 4262 pc.iommu_segments 4263 .as_ref() 4264 .map(|v| v.contains(&pci_segment_id)) 4265 .unwrap_or_default() 4266 }) 4267 .unwrap_or_default() 4268 } 4269 4270 pub fn add_disk(&mut self, disk_cfg: &mut DiskConfig) -> DeviceManagerResult<PciDeviceInfo> { 4271 self.validate_identifier(&disk_cfg.id)?; 4272 4273 if disk_cfg.iommu && !self.is_iommu_segment(disk_cfg.pci_segment) { 4274 return Err(DeviceManagerError::InvalidIommuHotplug); 4275 } 4276 4277 let device = self.make_virtio_block_device(disk_cfg)?; 4278 self.hotplug_virtio_pci_device(device) 4279 } 4280 4281 pub fn add_fs(&mut self, fs_cfg: &mut FsConfig) -> DeviceManagerResult<PciDeviceInfo> { 4282 self.validate_identifier(&fs_cfg.id)?; 4283 4284 let device = self.make_virtio_fs_device(fs_cfg)?; 4285 self.hotplug_virtio_pci_device(device) 4286 } 4287 4288 pub fn add_pmem(&mut self, pmem_cfg: &mut PmemConfig) -> DeviceManagerResult<PciDeviceInfo> { 4289 self.validate_identifier(&pmem_cfg.id)?; 4290 4291 if pmem_cfg.iommu && !self.is_iommu_segment(pmem_cfg.pci_segment) { 4292 return Err(DeviceManagerError::InvalidIommuHotplug); 4293 } 4294 4295 let device = self.make_virtio_pmem_device(pmem_cfg)?; 4296 self.hotplug_virtio_pci_device(device) 4297 } 4298 4299 pub fn add_net(&mut self, net_cfg: &mut NetConfig) -> DeviceManagerResult<PciDeviceInfo> { 4300 self.validate_identifier(&net_cfg.id)?; 4301 4302 if net_cfg.iommu && !self.is_iommu_segment(net_cfg.pci_segment) { 4303 return Err(DeviceManagerError::InvalidIommuHotplug); 4304 } 4305 4306 let device = self.make_virtio_net_device(net_cfg)?; 4307 self.hotplug_virtio_pci_device(device) 4308 } 4309 4310 pub fn add_vdpa(&mut self, vdpa_cfg: &mut VdpaConfig) -> DeviceManagerResult<PciDeviceInfo> { 4311 self.validate_identifier(&vdpa_cfg.id)?; 4312 4313 if vdpa_cfg.iommu && !self.is_iommu_segment(vdpa_cfg.pci_segment) { 4314 return Err(DeviceManagerError::InvalidIommuHotplug); 4315 } 4316 4317 let device = self.make_vdpa_device(vdpa_cfg)?; 4318 self.hotplug_virtio_pci_device(device) 4319 } 4320 4321 pub fn add_vsock(&mut self, vsock_cfg: &mut VsockConfig) -> DeviceManagerResult<PciDeviceInfo> { 4322 self.validate_identifier(&vsock_cfg.id)?; 4323 4324 if vsock_cfg.iommu && !self.is_iommu_segment(vsock_cfg.pci_segment) { 4325 return Err(DeviceManagerError::InvalidIommuHotplug); 4326 } 4327 4328 let device = self.make_virtio_vsock_device(vsock_cfg)?; 4329 self.hotplug_virtio_pci_device(device) 4330 } 4331 4332 pub fn counters(&self) -> HashMap<String, HashMap<&'static str, Wrapping<u64>>> { 4333 let mut counters = HashMap::new(); 4334 4335 for handle in &self.virtio_devices { 4336 let virtio_device = handle.virtio_device.lock().unwrap(); 4337 if let Some(device_counters) = virtio_device.counters() { 4338 counters.insert(handle.id.clone(), device_counters.clone()); 4339 } 4340 } 4341 4342 counters 4343 } 4344 4345 pub fn resize_balloon(&mut self, size: u64) -> DeviceManagerResult<()> { 4346 if let Some(balloon) = &self.balloon { 4347 return balloon 4348 .lock() 4349 .unwrap() 4350 .resize(size) 4351 .map_err(DeviceManagerError::VirtioBalloonResize); 4352 } 4353 4354 warn!("No balloon setup: Can't resize the balloon"); 4355 Err(DeviceManagerError::MissingVirtioBalloon) 4356 } 4357 4358 pub fn balloon_size(&self) -> u64 { 4359 if let Some(balloon) = &self.balloon { 4360 return balloon.lock().unwrap().get_actual(); 4361 } 4362 4363 0 4364 } 4365 4366 pub fn device_tree(&self) -> Arc<Mutex<DeviceTree>> { 4367 self.device_tree.clone() 4368 } 4369 4370 #[cfg(target_arch = "x86_64")] 4371 pub fn notify_power_button(&self) -> DeviceManagerResult<()> { 4372 self.ged_notification_device 4373 .as_ref() 4374 .unwrap() 4375 .lock() 4376 .unwrap() 4377 .notify(AcpiNotificationFlags::POWER_BUTTON_CHANGED) 4378 .map_err(DeviceManagerError::PowerButtonNotification) 4379 } 4380 4381 #[cfg(target_arch = "aarch64")] 4382 pub fn notify_power_button(&self) -> DeviceManagerResult<()> { 4383 // There are two use cases: 4384 // 1. Users will use direct kernel boot with device tree. 4385 // 2. Users will use ACPI+UEFI boot. 4386 4387 // Trigger a GPIO pin 3 event to satisfy use case 1. 4388 self.gpio_device 4389 .as_ref() 4390 .unwrap() 4391 .lock() 4392 .unwrap() 4393 .trigger_key(3) 4394 .map_err(DeviceManagerError::AArch64PowerButtonNotification)?; 4395 // Trigger a GED power button event to satisfy use case 2. 4396 return self 4397 .ged_notification_device 4398 .as_ref() 4399 .unwrap() 4400 .lock() 4401 .unwrap() 4402 .notify(AcpiNotificationFlags::POWER_BUTTON_CHANGED) 4403 .map_err(DeviceManagerError::PowerButtonNotification); 4404 } 4405 4406 pub fn iommu_attached_devices(&self) -> &Option<(PciBdf, Vec<PciBdf>)> { 4407 &self.iommu_attached_devices 4408 } 4409 4410 fn validate_identifier(&self, id: &Option<String>) -> DeviceManagerResult<()> { 4411 if let Some(id) = id { 4412 if id.starts_with("__") { 4413 return Err(DeviceManagerError::InvalidIdentifier(id.clone())); 4414 } 4415 4416 if self.device_tree.lock().unwrap().contains_key(id) { 4417 return Err(DeviceManagerError::IdentifierNotUnique(id.clone())); 4418 } 4419 } 4420 4421 Ok(()) 4422 } 4423 4424 pub(crate) fn acpi_platform_addresses(&self) -> &AcpiPlatformAddresses { 4425 &self.acpi_platform_addresses 4426 } 4427 } 4428 4429 fn numa_node_id_from_memory_zone_id(numa_nodes: &NumaNodes, memory_zone_id: &str) -> Option<u32> { 4430 for (numa_node_id, numa_node) in numa_nodes.iter() { 4431 if numa_node.memory_zones.contains(&memory_zone_id.to_owned()) { 4432 return Some(*numa_node_id); 4433 } 4434 } 4435 4436 None 4437 } 4438 4439 fn numa_node_id_from_pci_segment_id(numa_nodes: &NumaNodes, pci_segment_id: u16) -> u32 { 4440 for (numa_node_id, numa_node) in numa_nodes.iter() { 4441 if numa_node.pci_segments.contains(&pci_segment_id) { 4442 return *numa_node_id; 4443 } 4444 } 4445 4446 0 4447 } 4448 4449 struct TpmDevice {} 4450 4451 impl Aml for TpmDevice { 4452 fn to_aml_bytes(&self, sink: &mut dyn acpi_tables::AmlSink) { 4453 aml::Device::new( 4454 "TPM2".into(), 4455 vec![ 4456 &aml::Name::new("_HID".into(), &"MSFT0101"), 4457 &aml::Name::new("_STA".into(), &(0xF_usize)), 4458 &aml::Name::new( 4459 "_CRS".into(), 4460 &aml::ResourceTemplate::new(vec![&aml::Memory32Fixed::new( 4461 true, 4462 layout::TPM_START.0 as u32, 4463 layout::TPM_SIZE as u32, 4464 )]), 4465 ), 4466 ], 4467 ) 4468 .to_aml_bytes(sink) 4469 } 4470 } 4471 4472 impl Aml for DeviceManager { 4473 fn to_aml_bytes(&self, sink: &mut dyn acpi_tables::AmlSink) { 4474 #[cfg(target_arch = "aarch64")] 4475 use arch::aarch64::DeviceInfoForFdt; 4476 4477 let mut pci_scan_methods = Vec::new(); 4478 for i in 0..self.pci_segments.len() { 4479 pci_scan_methods.push(aml::MethodCall::new( 4480 format!("\\_SB_.PC{i:02X}.PCNT").as_str().into(), 4481 vec![], 4482 )); 4483 } 4484 let mut pci_scan_inner: Vec<&dyn Aml> = Vec::new(); 4485 for method in &pci_scan_methods { 4486 pci_scan_inner.push(method) 4487 } 4488 4489 // PCI hotplug controller 4490 aml::Device::new( 4491 "_SB_.PHPR".into(), 4492 vec![ 4493 &aml::Name::new("_HID".into(), &aml::EISAName::new("PNP0A06")), 4494 &aml::Name::new("_STA".into(), &0x0bu8), 4495 &aml::Name::new("_UID".into(), &"PCI Hotplug Controller"), 4496 &aml::Mutex::new("BLCK".into(), 0), 4497 &aml::Name::new( 4498 "_CRS".into(), 4499 &aml::ResourceTemplate::new(vec![&aml::AddressSpace::new_memory( 4500 aml::AddressSpaceCacheable::NotCacheable, 4501 true, 4502 self.acpi_address.0, 4503 self.acpi_address.0 + DEVICE_MANAGER_ACPI_SIZE as u64 - 1, 4504 None, 4505 )]), 4506 ), 4507 // OpRegion and Fields map MMIO range into individual field values 4508 &aml::OpRegion::new( 4509 "PCST".into(), 4510 aml::OpRegionSpace::SystemMemory, 4511 &(self.acpi_address.0 as usize), 4512 &DEVICE_MANAGER_ACPI_SIZE, 4513 ), 4514 &aml::Field::new( 4515 "PCST".into(), 4516 aml::FieldAccessType::DWord, 4517 aml::FieldLockRule::NoLock, 4518 aml::FieldUpdateRule::WriteAsZeroes, 4519 vec![ 4520 aml::FieldEntry::Named(*b"PCIU", 32), 4521 aml::FieldEntry::Named(*b"PCID", 32), 4522 aml::FieldEntry::Named(*b"B0EJ", 32), 4523 aml::FieldEntry::Named(*b"PSEG", 32), 4524 ], 4525 ), 4526 &aml::Method::new( 4527 "PCEJ".into(), 4528 2, 4529 true, 4530 vec![ 4531 // Take lock defined above 4532 &aml::Acquire::new("BLCK".into(), 0xffff), 4533 // Choose the current segment 4534 &aml::Store::new(&aml::Path::new("PSEG"), &aml::Arg(1)), 4535 // Write PCI bus number (in first argument) to I/O port via field 4536 &aml::ShiftLeft::new(&aml::Path::new("B0EJ"), &aml::ONE, &aml::Arg(0)), 4537 // Release lock 4538 &aml::Release::new("BLCK".into()), 4539 // Return 0 4540 &aml::Return::new(&aml::ZERO), 4541 ], 4542 ), 4543 &aml::Method::new("PSCN".into(), 0, true, pci_scan_inner), 4544 ], 4545 ) 4546 .to_aml_bytes(sink); 4547 4548 for segment in &self.pci_segments { 4549 segment.to_aml_bytes(sink); 4550 } 4551 4552 let mut mbrd_memory = Vec::new(); 4553 4554 for segment in &self.pci_segments { 4555 mbrd_memory.push(aml::Memory32Fixed::new( 4556 true, 4557 segment.mmio_config_address as u32, 4558 layout::PCI_MMIO_CONFIG_SIZE_PER_SEGMENT as u32, 4559 )) 4560 } 4561 4562 let mut mbrd_memory_refs = Vec::new(); 4563 for mbrd_memory_ref in &mbrd_memory { 4564 mbrd_memory_refs.push(mbrd_memory_ref as &dyn Aml); 4565 } 4566 4567 aml::Device::new( 4568 "_SB_.MBRD".into(), 4569 vec![ 4570 &aml::Name::new("_HID".into(), &aml::EISAName::new("PNP0C02")), 4571 &aml::Name::new("_UID".into(), &aml::ZERO), 4572 &aml::Name::new("_CRS".into(), &aml::ResourceTemplate::new(mbrd_memory_refs)), 4573 ], 4574 ) 4575 .to_aml_bytes(sink); 4576 4577 // Serial device 4578 #[cfg(target_arch = "x86_64")] 4579 let serial_irq = 4; 4580 #[cfg(target_arch = "aarch64")] 4581 let serial_irq = 4582 if self.config.lock().unwrap().serial.clone().mode != ConsoleOutputMode::Off { 4583 self.get_device_info() 4584 .clone() 4585 .get(&(DeviceType::Serial, DeviceType::Serial.to_string())) 4586 .unwrap() 4587 .irq() 4588 } else { 4589 // If serial is turned off, add a fake device with invalid irq. 4590 31 4591 }; 4592 if self.config.lock().unwrap().serial.mode != ConsoleOutputMode::Off { 4593 aml::Device::new( 4594 "_SB_.COM1".into(), 4595 vec![ 4596 &aml::Name::new( 4597 "_HID".into(), 4598 #[cfg(target_arch = "x86_64")] 4599 &aml::EISAName::new("PNP0501"), 4600 #[cfg(target_arch = "aarch64")] 4601 &"ARMH0011", 4602 ), 4603 &aml::Name::new("_UID".into(), &aml::ZERO), 4604 &aml::Name::new("_DDN".into(), &"COM1"), 4605 &aml::Name::new( 4606 "_CRS".into(), 4607 &aml::ResourceTemplate::new(vec![ 4608 &aml::Interrupt::new(true, true, false, false, serial_irq), 4609 #[cfg(target_arch = "x86_64")] 4610 &aml::IO::new(0x3f8, 0x3f8, 0, 0x8), 4611 #[cfg(target_arch = "aarch64")] 4612 &aml::Memory32Fixed::new( 4613 true, 4614 arch::layout::LEGACY_SERIAL_MAPPED_IO_START.raw_value() as u32, 4615 MMIO_LEN as u32, 4616 ), 4617 ]), 4618 ), 4619 ], 4620 ) 4621 .to_aml_bytes(sink); 4622 } 4623 4624 aml::Name::new("_S5_".into(), &aml::Package::new(vec![&5u8])).to_aml_bytes(sink); 4625 4626 aml::Device::new( 4627 "_SB_.PWRB".into(), 4628 vec![ 4629 &aml::Name::new("_HID".into(), &aml::EISAName::new("PNP0C0C")), 4630 &aml::Name::new("_UID".into(), &aml::ZERO), 4631 ], 4632 ) 4633 .to_aml_bytes(sink); 4634 4635 if self.config.lock().unwrap().tpm.is_some() { 4636 // Add tpm device 4637 TpmDevice {}.to_aml_bytes(sink); 4638 } 4639 4640 self.ged_notification_device 4641 .as_ref() 4642 .unwrap() 4643 .lock() 4644 .unwrap() 4645 .to_aml_bytes(sink) 4646 } 4647 } 4648 4649 impl Pausable for DeviceManager { 4650 fn pause(&mut self) -> result::Result<(), MigratableError> { 4651 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4652 if let Some(migratable) = &device_node.migratable { 4653 migratable.lock().unwrap().pause()?; 4654 } 4655 } 4656 // On AArch64, the pause of device manager needs to trigger 4657 // a "pause" of GIC, which will flush the GIC pending tables 4658 // and ITS tables to guest RAM. 4659 #[cfg(target_arch = "aarch64")] 4660 { 4661 self.get_interrupt_controller() 4662 .unwrap() 4663 .lock() 4664 .unwrap() 4665 .pause()?; 4666 }; 4667 4668 Ok(()) 4669 } 4670 4671 fn resume(&mut self) -> result::Result<(), MigratableError> { 4672 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4673 if let Some(migratable) = &device_node.migratable { 4674 migratable.lock().unwrap().resume()?; 4675 } 4676 } 4677 4678 Ok(()) 4679 } 4680 } 4681 4682 impl Snapshottable for DeviceManager { 4683 fn id(&self) -> String { 4684 DEVICE_MANAGER_SNAPSHOT_ID.to_string() 4685 } 4686 4687 fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> { 4688 let mut snapshot = Snapshot::from_data(SnapshotData::new_from_state(&self.state())?); 4689 4690 // We aggregate all devices snapshots. 4691 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4692 if let Some(migratable) = &device_node.migratable { 4693 let mut migratable = migratable.lock().unwrap(); 4694 snapshot.add_snapshot(migratable.id(), migratable.snapshot()?); 4695 } 4696 } 4697 4698 Ok(snapshot) 4699 } 4700 } 4701 4702 impl Transportable for DeviceManager {} 4703 4704 impl Migratable for DeviceManager { 4705 fn start_dirty_log(&mut self) -> std::result::Result<(), MigratableError> { 4706 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4707 if let Some(migratable) = &device_node.migratable { 4708 migratable.lock().unwrap().start_dirty_log()?; 4709 } 4710 } 4711 Ok(()) 4712 } 4713 4714 fn stop_dirty_log(&mut self) -> std::result::Result<(), MigratableError> { 4715 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4716 if let Some(migratable) = &device_node.migratable { 4717 migratable.lock().unwrap().stop_dirty_log()?; 4718 } 4719 } 4720 Ok(()) 4721 } 4722 4723 fn dirty_log(&mut self) -> std::result::Result<MemoryRangeTable, MigratableError> { 4724 let mut tables = Vec::new(); 4725 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4726 if let Some(migratable) = &device_node.migratable { 4727 tables.push(migratable.lock().unwrap().dirty_log()?); 4728 } 4729 } 4730 Ok(MemoryRangeTable::new_from_tables(tables)) 4731 } 4732 4733 fn start_migration(&mut self) -> std::result::Result<(), MigratableError> { 4734 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4735 if let Some(migratable) = &device_node.migratable { 4736 migratable.lock().unwrap().start_migration()?; 4737 } 4738 } 4739 Ok(()) 4740 } 4741 4742 fn complete_migration(&mut self) -> std::result::Result<(), MigratableError> { 4743 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4744 if let Some(migratable) = &device_node.migratable { 4745 migratable.lock().unwrap().complete_migration()?; 4746 } 4747 } 4748 Ok(()) 4749 } 4750 } 4751 4752 const PCIU_FIELD_OFFSET: u64 = 0; 4753 const PCID_FIELD_OFFSET: u64 = 4; 4754 const B0EJ_FIELD_OFFSET: u64 = 8; 4755 const PSEG_FIELD_OFFSET: u64 = 12; 4756 const PCIU_FIELD_SIZE: usize = 4; 4757 const PCID_FIELD_SIZE: usize = 4; 4758 const B0EJ_FIELD_SIZE: usize = 4; 4759 const PSEG_FIELD_SIZE: usize = 4; 4760 4761 impl BusDevice for DeviceManager { 4762 fn read(&mut self, base: u64, offset: u64, data: &mut [u8]) { 4763 match offset { 4764 PCIU_FIELD_OFFSET => { 4765 assert!(data.len() == PCIU_FIELD_SIZE); 4766 data.copy_from_slice( 4767 &self.pci_segments[self.selected_segment] 4768 .pci_devices_up 4769 .to_le_bytes(), 4770 ); 4771 // Clear the PCIU bitmap 4772 self.pci_segments[self.selected_segment].pci_devices_up = 0; 4773 } 4774 PCID_FIELD_OFFSET => { 4775 assert!(data.len() == PCID_FIELD_SIZE); 4776 data.copy_from_slice( 4777 &self.pci_segments[self.selected_segment] 4778 .pci_devices_down 4779 .to_le_bytes(), 4780 ); 4781 // Clear the PCID bitmap 4782 self.pci_segments[self.selected_segment].pci_devices_down = 0; 4783 } 4784 B0EJ_FIELD_OFFSET => { 4785 assert!(data.len() == B0EJ_FIELD_SIZE); 4786 // Always return an empty bitmap since the eject is always 4787 // taken care of right away during a write access. 4788 data.fill(0); 4789 } 4790 PSEG_FIELD_OFFSET => { 4791 assert_eq!(data.len(), PSEG_FIELD_SIZE); 4792 data.copy_from_slice(&(self.selected_segment as u32).to_le_bytes()); 4793 } 4794 _ => error!( 4795 "Accessing unknown location at base 0x{:x}, offset 0x{:x}", 4796 base, offset 4797 ), 4798 } 4799 4800 debug!( 4801 "PCI_HP_REG_R: base 0x{:x}, offset 0x{:x}, data {:?}", 4802 base, offset, data 4803 ) 4804 } 4805 4806 fn write(&mut self, base: u64, offset: u64, data: &[u8]) -> Option<Arc<std::sync::Barrier>> { 4807 match offset { 4808 B0EJ_FIELD_OFFSET => { 4809 assert!(data.len() == B0EJ_FIELD_SIZE); 4810 let mut data_array: [u8; 4] = [0, 0, 0, 0]; 4811 data_array.copy_from_slice(data); 4812 let mut slot_bitmap = u32::from_le_bytes(data_array); 4813 4814 while slot_bitmap > 0 { 4815 let slot_id = slot_bitmap.trailing_zeros(); 4816 if let Err(e) = self.eject_device(self.selected_segment as u16, slot_id as u8) { 4817 error!("Failed ejecting device {}: {:?}", slot_id, e); 4818 } 4819 slot_bitmap &= !(1 << slot_id); 4820 } 4821 } 4822 PSEG_FIELD_OFFSET => { 4823 assert_eq!(data.len(), PSEG_FIELD_SIZE); 4824 let mut data_array: [u8; 4] = [0, 0, 0, 0]; 4825 data_array.copy_from_slice(data); 4826 let selected_segment = u32::from_le_bytes(data_array) as usize; 4827 if selected_segment >= self.pci_segments.len() { 4828 error!( 4829 "Segment selection out of range: {} >= {}", 4830 selected_segment, 4831 self.pci_segments.len() 4832 ); 4833 return None; 4834 } 4835 self.selected_segment = selected_segment; 4836 } 4837 _ => error!( 4838 "Accessing unknown location at base 0x{:x}, offset 0x{:x}", 4839 base, offset 4840 ), 4841 } 4842 4843 debug!( 4844 "PCI_HP_REG_W: base 0x{:x}, offset 0x{:x}, data {:?}", 4845 base, offset, data 4846 ); 4847 4848 None 4849 } 4850 } 4851 4852 impl Drop for DeviceManager { 4853 fn drop(&mut self) { 4854 for handle in self.virtio_devices.drain(..) { 4855 handle.virtio_device.lock().unwrap().shutdown(); 4856 } 4857 4858 if let Some(termios) = *self.original_termios_opt.lock().unwrap() { 4859 // SAFETY: FFI call 4860 let _ = unsafe { tcsetattr(stdout().lock().as_raw_fd(), TCSANOW, &termios) }; 4861 } 4862 } 4863 } 4864