1 // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 // 3 // Portions Copyright 2017 The Chromium OS Authors. All rights reserved. 4 // Use of this source code is governed by a BSD-style license that can be 5 // found in the LICENSE-BSD-3-Clause file. 6 // 7 // Copyright © 2019 Intel Corporation 8 // 9 // SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause 10 // 11 12 use crate::config::{ 13 ConsoleOutputMode, DeviceConfig, DiskConfig, FsConfig, NetConfig, PmemConfig, UserDeviceConfig, 14 VdpaConfig, VhostMode, VmConfig, VsockConfig, 15 }; 16 use crate::cpu::{CpuManager, CPU_MANAGER_ACPI_SIZE}; 17 use crate::device_tree::{DeviceNode, DeviceTree}; 18 use crate::interrupt::LegacyUserspaceInterruptManager; 19 use crate::interrupt::MsiInterruptManager; 20 use crate::memory_manager::{Error as MemoryManagerError, MemoryManager, MEMORY_MANAGER_ACPI_SIZE}; 21 use crate::pci_segment::PciSegment; 22 use crate::seccomp_filters::{get_seccomp_filter, Thread}; 23 use crate::serial_manager::{Error as SerialManagerError, SerialManager}; 24 use crate::sigwinch_listener::start_sigwinch_listener; 25 use crate::GuestRegionMmap; 26 use crate::PciDeviceInfo; 27 use crate::{device_node, DEVICE_MANAGER_SNAPSHOT_ID}; 28 use acpi_tables::sdt::GenericAddress; 29 use acpi_tables::{aml, Aml}; 30 use anyhow::anyhow; 31 use arch::layout; 32 #[cfg(target_arch = "x86_64")] 33 use arch::layout::{APIC_START, IOAPIC_SIZE, IOAPIC_START}; 34 use arch::NumaNodes; 35 #[cfg(target_arch = "aarch64")] 36 use arch::{DeviceType, MmioDeviceInfo}; 37 use block::{ 38 async_io::DiskFile, block_aio_is_supported, block_io_uring_is_supported, detect_image_type, 39 fixed_vhd_sync::FixedVhdDiskSync, qcow, qcow_sync::QcowDiskSync, raw_async_aio::RawFileDiskAio, 40 raw_sync::RawFileDiskSync, vhdx, vhdx_sync::VhdxDiskSync, ImageType, 41 }; 42 #[cfg(feature = "io_uring")] 43 use block::{fixed_vhd_async::FixedVhdDiskAsync, raw_async::RawFileDisk}; 44 #[cfg(target_arch = "aarch64")] 45 use devices::gic; 46 #[cfg(target_arch = "x86_64")] 47 use devices::ioapic; 48 #[cfg(target_arch = "aarch64")] 49 use devices::legacy::Pl011; 50 #[cfg(target_arch = "x86_64")] 51 use devices::legacy::Serial; 52 use devices::{ 53 interrupt_controller, interrupt_controller::InterruptController, AcpiNotificationFlags, 54 }; 55 use hypervisor::{HypervisorType, IoEventAddress}; 56 use libc::{ 57 cfmakeraw, isatty, tcgetattr, tcsetattr, termios, MAP_NORESERVE, MAP_PRIVATE, MAP_SHARED, 58 O_TMPFILE, PROT_READ, PROT_WRITE, TCSANOW, 59 }; 60 use pci::{ 61 DeviceRelocation, PciBarRegionType, PciBdf, PciDevice, VfioPciDevice, VfioUserDmaMapping, 62 VfioUserPciDevice, VfioUserPciDeviceError, 63 }; 64 use seccompiler::SeccompAction; 65 use serde::{Deserialize, Serialize}; 66 use std::collections::{BTreeSet, HashMap}; 67 use std::fs::{read_link, File, OpenOptions}; 68 use std::io::{self, stdout, Seek, SeekFrom}; 69 use std::mem::zeroed; 70 use std::num::Wrapping; 71 use std::os::unix::fs::OpenOptionsExt; 72 use std::os::unix::io::{AsRawFd, FromRawFd, RawFd}; 73 use std::path::PathBuf; 74 use std::result; 75 use std::sync::{Arc, Mutex}; 76 use std::time::Instant; 77 use tracer::trace_scoped; 78 use vfio_ioctls::{VfioContainer, VfioDevice, VfioDeviceFd}; 79 use virtio_devices::transport::VirtioTransport; 80 use virtio_devices::transport::{VirtioPciDevice, VirtioPciDeviceActivator}; 81 use virtio_devices::vhost_user::VhostUserConfig; 82 use virtio_devices::{ 83 AccessPlatformMapping, ActivateError, VdpaDmaMapping, VirtioMemMappingSource, 84 }; 85 use virtio_devices::{Endpoint, IommuMapping}; 86 use vm_allocator::{AddressAllocator, SystemAllocator}; 87 use vm_device::dma_mapping::vfio::VfioDmaMapping; 88 use vm_device::dma_mapping::ExternalDmaMapping; 89 use vm_device::interrupt::{ 90 InterruptIndex, InterruptManager, LegacyIrqGroupConfig, MsiIrqGroupConfig, 91 }; 92 use vm_device::{Bus, BusDevice, Resource}; 93 use vm_memory::guest_memory::FileOffset; 94 use vm_memory::GuestMemoryRegion; 95 use vm_memory::{Address, GuestAddress, GuestUsize, MmapRegion}; 96 #[cfg(target_arch = "x86_64")] 97 use vm_memory::{GuestAddressSpace, GuestMemory}; 98 use vm_migration::{ 99 protocol::MemoryRangeTable, snapshot_from_id, versioned_state_from_id, Migratable, 100 MigratableError, Pausable, Snapshot, SnapshotData, Snapshottable, Transportable, 101 }; 102 use vm_virtio::AccessPlatform; 103 use vm_virtio::VirtioDeviceType; 104 use vmm_sys_util::eventfd::EventFd; 105 106 #[cfg(target_arch = "aarch64")] 107 const MMIO_LEN: u64 = 0x1000; 108 109 // Singleton devices / devices the user cannot name 110 #[cfg(target_arch = "x86_64")] 111 const IOAPIC_DEVICE_NAME: &str = "__ioapic"; 112 const SERIAL_DEVICE_NAME: &str = "__serial"; 113 #[cfg(target_arch = "aarch64")] 114 const GPIO_DEVICE_NAME: &str = "__gpio"; 115 const RNG_DEVICE_NAME: &str = "__rng"; 116 const IOMMU_DEVICE_NAME: &str = "__iommu"; 117 const BALLOON_DEVICE_NAME: &str = "__balloon"; 118 const CONSOLE_DEVICE_NAME: &str = "__console"; 119 const PVPANIC_DEVICE_NAME: &str = "__pvpanic"; 120 121 // Devices that the user may name and for which we generate 122 // identifiers if the user doesn't give one 123 const DISK_DEVICE_NAME_PREFIX: &str = "_disk"; 124 const FS_DEVICE_NAME_PREFIX: &str = "_fs"; 125 const NET_DEVICE_NAME_PREFIX: &str = "_net"; 126 const PMEM_DEVICE_NAME_PREFIX: &str = "_pmem"; 127 const VDPA_DEVICE_NAME_PREFIX: &str = "_vdpa"; 128 const VSOCK_DEVICE_NAME_PREFIX: &str = "_vsock"; 129 const WATCHDOG_DEVICE_NAME: &str = "__watchdog"; 130 const VFIO_DEVICE_NAME_PREFIX: &str = "_vfio"; 131 const VFIO_USER_DEVICE_NAME_PREFIX: &str = "_vfio_user"; 132 const VIRTIO_PCI_DEVICE_NAME_PREFIX: &str = "_virtio-pci"; 133 134 /// Errors associated with device manager 135 #[derive(Debug)] 136 pub enum DeviceManagerError { 137 /// Cannot create EventFd. 138 EventFd(io::Error), 139 140 /// Cannot open disk path 141 Disk(io::Error), 142 143 /// Cannot create vhost-user-net device 144 CreateVhostUserNet(virtio_devices::vhost_user::Error), 145 146 /// Cannot create virtio-blk device 147 CreateVirtioBlock(io::Error), 148 149 /// Cannot create virtio-net device 150 CreateVirtioNet(virtio_devices::net::Error), 151 152 /// Cannot create virtio-console device 153 CreateVirtioConsole(io::Error), 154 155 /// Cannot create virtio-rng device 156 CreateVirtioRng(io::Error), 157 158 /// Cannot create virtio-fs device 159 CreateVirtioFs(virtio_devices::vhost_user::Error), 160 161 /// Virtio-fs device was created without a socket. 162 NoVirtioFsSock, 163 164 /// Cannot create vhost-user-blk device 165 CreateVhostUserBlk(virtio_devices::vhost_user::Error), 166 167 /// Cannot create virtio-pmem device 168 CreateVirtioPmem(io::Error), 169 170 /// Cannot create vDPA device 171 CreateVdpa(virtio_devices::vdpa::Error), 172 173 /// Cannot create virtio-vsock device 174 CreateVirtioVsock(io::Error), 175 176 /// Cannot create tpm device 177 CreateTpmDevice(anyhow::Error), 178 179 /// Failed to convert Path to &str for the vDPA device. 180 CreateVdpaConvertPath, 181 182 /// Failed to convert Path to &str for the virtio-vsock device. 183 CreateVsockConvertPath, 184 185 /// Cannot create virtio-vsock backend 186 CreateVsockBackend(virtio_devices::vsock::VsockUnixError), 187 188 /// Cannot create virtio-iommu device 189 CreateVirtioIommu(io::Error), 190 191 /// Cannot create virtio-balloon device 192 CreateVirtioBalloon(io::Error), 193 194 /// Cannot create virtio-watchdog device 195 CreateVirtioWatchdog(io::Error), 196 197 /// Failed to parse disk image format 198 DetectImageType(io::Error), 199 200 /// Cannot open qcow disk path 201 QcowDeviceCreate(qcow::Error), 202 203 /// Cannot create serial manager 204 CreateSerialManager(SerialManagerError), 205 206 /// Cannot spawn the serial manager thread 207 SpawnSerialManager(SerialManagerError), 208 209 /// Cannot open tap interface 210 OpenTap(net_util::TapError), 211 212 /// Cannot allocate IRQ. 213 AllocateIrq, 214 215 /// Cannot configure the IRQ. 216 Irq(vmm_sys_util::errno::Error), 217 218 /// Cannot allocate PCI BARs 219 AllocateBars(pci::PciDeviceError), 220 221 /// Could not free the BARs associated with a PCI device. 222 FreePciBars(pci::PciDeviceError), 223 224 /// Cannot register ioevent. 225 RegisterIoevent(anyhow::Error), 226 227 /// Cannot unregister ioevent. 228 UnRegisterIoevent(anyhow::Error), 229 230 /// Cannot create virtio device 231 VirtioDevice(virtio_devices::transport::VirtioPciDeviceError), 232 233 /// Cannot add PCI device 234 AddPciDevice(pci::PciRootError), 235 236 /// Cannot open persistent memory file 237 PmemFileOpen(io::Error), 238 239 /// Cannot set persistent memory file size 240 PmemFileSetLen(io::Error), 241 242 /// Cannot find a memory range for persistent memory 243 PmemRangeAllocation, 244 245 /// Cannot find a memory range for virtio-fs 246 FsRangeAllocation, 247 248 /// Error creating serial output file 249 SerialOutputFileOpen(io::Error), 250 251 /// Error creating console output file 252 ConsoleOutputFileOpen(io::Error), 253 254 /// Error creating serial pty 255 SerialPtyOpen(io::Error), 256 257 /// Error creating console pty 258 ConsolePtyOpen(io::Error), 259 260 /// Error setting pty raw mode 261 SetPtyRaw(vmm_sys_util::errno::Error), 262 263 /// Error getting pty peer 264 GetPtyPeer(vmm_sys_util::errno::Error), 265 266 /// Cannot create a VFIO device 267 VfioCreate(vfio_ioctls::VfioError), 268 269 /// Cannot create a VFIO PCI device 270 VfioPciCreate(pci::VfioPciError), 271 272 /// Failed to map VFIO MMIO region. 273 VfioMapRegion(pci::VfioPciError), 274 275 /// Failed to DMA map VFIO device. 276 VfioDmaMap(vfio_ioctls::VfioError), 277 278 /// Failed to DMA unmap VFIO device. 279 VfioDmaUnmap(pci::VfioPciError), 280 281 /// Failed to create the passthrough device. 282 CreatePassthroughDevice(anyhow::Error), 283 284 /// Failed to memory map. 285 Mmap(io::Error), 286 287 /// Cannot add legacy device to Bus. 288 BusError(vm_device::BusError), 289 290 /// Failed to allocate IO port 291 AllocateIoPort, 292 293 /// Failed to allocate MMIO address 294 AllocateMmioAddress, 295 296 /// Failed to make hotplug notification 297 HotPlugNotification(io::Error), 298 299 /// Error from a memory manager operation 300 MemoryManager(MemoryManagerError), 301 302 /// Failed to create new interrupt source group. 303 CreateInterruptGroup(io::Error), 304 305 /// Failed to update interrupt source group. 306 UpdateInterruptGroup(io::Error), 307 308 /// Failed to create interrupt controller. 309 CreateInterruptController(interrupt_controller::Error), 310 311 /// Failed to create a new MmapRegion instance. 312 NewMmapRegion(vm_memory::mmap::MmapRegionError), 313 314 /// Failed to clone a File. 315 CloneFile(io::Error), 316 317 /// Failed to create socket file 318 CreateSocketFile(io::Error), 319 320 /// Failed to spawn the network backend 321 SpawnNetBackend(io::Error), 322 323 /// Failed to spawn the block backend 324 SpawnBlockBackend(io::Error), 325 326 /// Missing PCI bus. 327 NoPciBus, 328 329 /// Could not find an available device name. 330 NoAvailableDeviceName, 331 332 /// Missing PCI device. 333 MissingPciDevice, 334 335 /// Failed to remove a PCI device from the PCI bus. 336 RemoveDeviceFromPciBus(pci::PciRootError), 337 338 /// Failed to remove a bus device from the IO bus. 339 RemoveDeviceFromIoBus(vm_device::BusError), 340 341 /// Failed to remove a bus device from the MMIO bus. 342 RemoveDeviceFromMmioBus(vm_device::BusError), 343 344 /// Failed to find the device corresponding to a specific PCI b/d/f. 345 UnknownPciBdf(u32), 346 347 /// Not allowed to remove this type of device from the VM. 348 RemovalNotAllowed(vm_virtio::VirtioDeviceType), 349 350 /// Failed to find device corresponding to the given identifier. 351 UnknownDeviceId(String), 352 353 /// Failed to find an available PCI device ID. 354 NextPciDeviceId(pci::PciRootError), 355 356 /// Could not reserve the PCI device ID. 357 GetPciDeviceId(pci::PciRootError), 358 359 /// Could not give the PCI device ID back. 360 PutPciDeviceId(pci::PciRootError), 361 362 /// No disk path was specified when one was expected 363 NoDiskPath, 364 365 /// Failed to update guest memory for virtio device. 366 UpdateMemoryForVirtioDevice(virtio_devices::Error), 367 368 /// Cannot create virtio-mem device 369 CreateVirtioMem(io::Error), 370 371 /// Cannot find a memory range for virtio-mem memory 372 VirtioMemRangeAllocation, 373 374 /// Failed to update guest memory for VFIO PCI device. 375 UpdateMemoryForVfioPciDevice(vfio_ioctls::VfioError), 376 377 /// Trying to use a directory for pmem but no size specified 378 PmemWithDirectorySizeMissing, 379 380 /// Trying to use a size that is not multiple of 2MiB 381 PmemSizeNotAligned, 382 383 /// Could not find the node in the device tree. 384 MissingNode, 385 386 /// Resource was already found. 387 ResourceAlreadyExists, 388 389 /// Expected resources for virtio-pmem could not be found. 390 MissingVirtioPmemResources, 391 392 /// Missing PCI b/d/f from the DeviceNode. 393 MissingDeviceNodePciBdf, 394 395 /// No support for device passthrough 396 NoDevicePassthroughSupport, 397 398 /// No socket option support for console device 399 NoSocketOptionSupportForConsoleDevice, 400 401 /// Failed to resize virtio-balloon 402 VirtioBalloonResize(virtio_devices::balloon::Error), 403 404 /// Missing virtio-balloon, can't proceed as expected. 405 MissingVirtioBalloon, 406 407 /// Missing virtual IOMMU device 408 MissingVirtualIommu, 409 410 /// Failed to do power button notification 411 PowerButtonNotification(io::Error), 412 413 /// Failed to do AArch64 GPIO power button notification 414 #[cfg(target_arch = "aarch64")] 415 AArch64PowerButtonNotification(devices::legacy::GpioDeviceError), 416 417 /// Failed to set O_DIRECT flag to file descriptor 418 SetDirectIo, 419 420 /// Failed to create FixedVhdDiskAsync 421 CreateFixedVhdDiskAsync(io::Error), 422 423 /// Failed to create FixedVhdDiskSync 424 CreateFixedVhdDiskSync(io::Error), 425 426 /// Failed to create QcowDiskSync 427 CreateQcowDiskSync(qcow::Error), 428 429 /// Failed to create FixedVhdxDiskSync 430 CreateFixedVhdxDiskSync(vhdx::VhdxError), 431 432 /// Failed to add DMA mapping handler to virtio-mem device. 433 AddDmaMappingHandlerVirtioMem(virtio_devices::mem::Error), 434 435 /// Failed to remove DMA mapping handler from virtio-mem device. 436 RemoveDmaMappingHandlerVirtioMem(virtio_devices::mem::Error), 437 438 /// Failed to create vfio-user client 439 VfioUserCreateClient(vfio_user::Error), 440 441 /// Failed to create VFIO user device 442 VfioUserCreate(VfioUserPciDeviceError), 443 444 /// Failed to map region from VFIO user device into guest 445 VfioUserMapRegion(VfioUserPciDeviceError), 446 447 /// Failed to DMA map VFIO user device. 448 VfioUserDmaMap(VfioUserPciDeviceError), 449 450 /// Failed to DMA unmap VFIO user device. 451 VfioUserDmaUnmap(VfioUserPciDeviceError), 452 453 /// Failed to update memory mappings for VFIO user device 454 UpdateMemoryForVfioUserPciDevice(VfioUserPciDeviceError), 455 456 /// Cannot duplicate file descriptor 457 DupFd(vmm_sys_util::errno::Error), 458 459 /// Failed to DMA map virtio device. 460 VirtioDmaMap(std::io::Error), 461 462 /// Failed to DMA unmap virtio device. 463 VirtioDmaUnmap(std::io::Error), 464 465 /// Cannot hotplug device behind vIOMMU 466 InvalidIommuHotplug, 467 468 /// Invalid identifier as it is not unique. 469 IdentifierNotUnique(String), 470 471 /// Invalid identifier 472 InvalidIdentifier(String), 473 474 /// Error activating virtio device 475 VirtioActivate(ActivateError), 476 477 /// Failed retrieving device state from snapshot 478 RestoreGetState(MigratableError), 479 480 /// Cannot create a PvPanic device 481 PvPanicCreate(devices::pvpanic::PvPanicError), 482 } 483 pub type DeviceManagerResult<T> = result::Result<T, DeviceManagerError>; 484 485 const DEVICE_MANAGER_ACPI_SIZE: usize = 0x10; 486 487 const TIOCSPTLCK: libc::c_int = 0x4004_5431; 488 const TIOCGTPEER: libc::c_int = 0x5441; 489 490 pub fn create_pty() -> io::Result<(File, File, PathBuf)> { 491 // Try to use /dev/pts/ptmx first then fall back to /dev/ptmx 492 // This is done to try and use the devpts filesystem that 493 // could be available for use in the process's namespace first. 494 // Ideally these are all the same file though but different 495 // kernels could have things setup differently. 496 // See https://www.kernel.org/doc/Documentation/filesystems/devpts.txt 497 // for further details. 498 499 let custom_flags = libc::O_NONBLOCK; 500 let main = match OpenOptions::new() 501 .read(true) 502 .write(true) 503 .custom_flags(custom_flags) 504 .open("/dev/pts/ptmx") 505 { 506 Ok(f) => f, 507 _ => OpenOptions::new() 508 .read(true) 509 .write(true) 510 .custom_flags(custom_flags) 511 .open("/dev/ptmx")?, 512 }; 513 let mut unlock: libc::c_ulong = 0; 514 // SAFETY: FFI call into libc, trivially safe 515 unsafe { libc::ioctl(main.as_raw_fd(), TIOCSPTLCK as _, &mut unlock) }; 516 517 // SAFETY: FFI call into libc, trivially safe 518 let sub_fd = unsafe { 519 libc::ioctl( 520 main.as_raw_fd(), 521 TIOCGTPEER as _, 522 libc::O_NOCTTY | libc::O_RDWR, 523 ) 524 }; 525 if sub_fd == -1 { 526 return vmm_sys_util::errno::errno_result().map_err(|e| e.into()); 527 } 528 529 let proc_path = PathBuf::from(format!("/proc/self/fd/{sub_fd}")); 530 let path = read_link(proc_path)?; 531 532 // SAFETY: sub_fd is checked to be valid before being wrapped in File 533 Ok((main, unsafe { File::from_raw_fd(sub_fd) }, path)) 534 } 535 536 #[derive(Default)] 537 pub struct Console { 538 console_resizer: Option<Arc<virtio_devices::ConsoleResizer>>, 539 } 540 541 impl Console { 542 pub fn need_resize(&self) -> bool { 543 if let Some(_resizer) = self.console_resizer.as_ref() { 544 return true; 545 } 546 547 false 548 } 549 550 pub fn update_console_size(&self) { 551 if let Some(resizer) = self.console_resizer.as_ref() { 552 resizer.update_console_size() 553 } 554 } 555 } 556 557 pub(crate) struct AddressManager { 558 pub(crate) allocator: Arc<Mutex<SystemAllocator>>, 559 #[cfg(target_arch = "x86_64")] 560 pub(crate) io_bus: Arc<Bus>, 561 pub(crate) mmio_bus: Arc<Bus>, 562 pub(crate) vm: Arc<dyn hypervisor::Vm>, 563 device_tree: Arc<Mutex<DeviceTree>>, 564 pci_mmio_allocators: Vec<Arc<Mutex<AddressAllocator>>>, 565 } 566 567 impl DeviceRelocation for AddressManager { 568 fn move_bar( 569 &self, 570 old_base: u64, 571 new_base: u64, 572 len: u64, 573 pci_dev: &mut dyn PciDevice, 574 region_type: PciBarRegionType, 575 ) -> std::result::Result<(), std::io::Error> { 576 match region_type { 577 PciBarRegionType::IoRegion => { 578 #[cfg(target_arch = "x86_64")] 579 { 580 // Update system allocator 581 self.allocator 582 .lock() 583 .unwrap() 584 .free_io_addresses(GuestAddress(old_base), len as GuestUsize); 585 586 self.allocator 587 .lock() 588 .unwrap() 589 .allocate_io_addresses( 590 Some(GuestAddress(new_base)), 591 len as GuestUsize, 592 None, 593 ) 594 .ok_or_else(|| { 595 io::Error::new(io::ErrorKind::Other, "failed allocating new IO range") 596 })?; 597 598 // Update PIO bus 599 self.io_bus 600 .update_range(old_base, len, new_base, len) 601 .map_err(|e| io::Error::new(io::ErrorKind::Other, e))?; 602 } 603 #[cfg(target_arch = "aarch64")] 604 error!("I/O region is not supported"); 605 } 606 PciBarRegionType::Memory32BitRegion | PciBarRegionType::Memory64BitRegion => { 607 // Update system allocator 608 if region_type == PciBarRegionType::Memory32BitRegion { 609 self.allocator 610 .lock() 611 .unwrap() 612 .free_mmio_hole_addresses(GuestAddress(old_base), len as GuestUsize); 613 614 self.allocator 615 .lock() 616 .unwrap() 617 .allocate_mmio_hole_addresses( 618 Some(GuestAddress(new_base)), 619 len as GuestUsize, 620 Some(len), 621 ) 622 .ok_or_else(|| { 623 io::Error::new( 624 io::ErrorKind::Other, 625 "failed allocating new 32 bits MMIO range", 626 ) 627 })?; 628 } else { 629 // Find the specific allocator that this BAR was allocated from and use it for new one 630 for allocator in &self.pci_mmio_allocators { 631 let allocator_base = allocator.lock().unwrap().base(); 632 let allocator_end = allocator.lock().unwrap().end(); 633 634 if old_base >= allocator_base.0 && old_base <= allocator_end.0 { 635 allocator 636 .lock() 637 .unwrap() 638 .free(GuestAddress(old_base), len as GuestUsize); 639 640 allocator 641 .lock() 642 .unwrap() 643 .allocate( 644 Some(GuestAddress(new_base)), 645 len as GuestUsize, 646 Some(len), 647 ) 648 .ok_or_else(|| { 649 io::Error::new( 650 io::ErrorKind::Other, 651 "failed allocating new 64 bits MMIO range", 652 ) 653 })?; 654 655 break; 656 } 657 } 658 } 659 660 // Update MMIO bus 661 self.mmio_bus 662 .update_range(old_base, len, new_base, len) 663 .map_err(|e| io::Error::new(io::ErrorKind::Other, e))?; 664 } 665 } 666 667 // Update the device_tree resources associated with the device 668 if let Some(id) = pci_dev.id() { 669 if let Some(node) = self.device_tree.lock().unwrap().get_mut(&id) { 670 let mut resource_updated = false; 671 for resource in node.resources.iter_mut() { 672 if let Resource::PciBar { base, type_, .. } = resource { 673 if PciBarRegionType::from(*type_) == region_type && *base == old_base { 674 *base = new_base; 675 resource_updated = true; 676 break; 677 } 678 } 679 } 680 681 if !resource_updated { 682 return Err(io::Error::new( 683 io::ErrorKind::Other, 684 format!( 685 "Couldn't find a resource with base 0x{old_base:x} for device {id}" 686 ), 687 )); 688 } 689 } else { 690 return Err(io::Error::new( 691 io::ErrorKind::Other, 692 format!("Couldn't find device {id} from device tree"), 693 )); 694 } 695 } 696 697 let any_dev = pci_dev.as_any(); 698 if let Some(virtio_pci_dev) = any_dev.downcast_ref::<VirtioPciDevice>() { 699 let bar_addr = virtio_pci_dev.config_bar_addr(); 700 if bar_addr == new_base { 701 for (event, addr) in virtio_pci_dev.ioeventfds(old_base) { 702 let io_addr = IoEventAddress::Mmio(addr); 703 self.vm.unregister_ioevent(event, &io_addr).map_err(|e| { 704 io::Error::new( 705 io::ErrorKind::Other, 706 format!("failed to unregister ioevent: {e:?}"), 707 ) 708 })?; 709 } 710 for (event, addr) in virtio_pci_dev.ioeventfds(new_base) { 711 let io_addr = IoEventAddress::Mmio(addr); 712 self.vm 713 .register_ioevent(event, &io_addr, None) 714 .map_err(|e| { 715 io::Error::new( 716 io::ErrorKind::Other, 717 format!("failed to register ioevent: {e:?}"), 718 ) 719 })?; 720 } 721 } else { 722 let virtio_dev = virtio_pci_dev.virtio_device(); 723 let mut virtio_dev = virtio_dev.lock().unwrap(); 724 if let Some(mut shm_regions) = virtio_dev.get_shm_regions() { 725 if shm_regions.addr.raw_value() == old_base { 726 let mem_region = self.vm.make_user_memory_region( 727 shm_regions.mem_slot, 728 old_base, 729 shm_regions.len, 730 shm_regions.host_addr, 731 false, 732 false, 733 ); 734 735 self.vm.remove_user_memory_region(mem_region).map_err(|e| { 736 io::Error::new( 737 io::ErrorKind::Other, 738 format!("failed to remove user memory region: {e:?}"), 739 ) 740 })?; 741 742 // Create new mapping by inserting new region to KVM. 743 let mem_region = self.vm.make_user_memory_region( 744 shm_regions.mem_slot, 745 new_base, 746 shm_regions.len, 747 shm_regions.host_addr, 748 false, 749 false, 750 ); 751 752 self.vm.create_user_memory_region(mem_region).map_err(|e| { 753 io::Error::new( 754 io::ErrorKind::Other, 755 format!("failed to create user memory regions: {e:?}"), 756 ) 757 })?; 758 759 // Update shared memory regions to reflect the new mapping. 760 shm_regions.addr = GuestAddress(new_base); 761 virtio_dev.set_shm_regions(shm_regions).map_err(|e| { 762 io::Error::new( 763 io::ErrorKind::Other, 764 format!("failed to update shared memory regions: {e:?}"), 765 ) 766 })?; 767 } 768 } 769 } 770 } 771 772 pci_dev.move_bar(old_base, new_base) 773 } 774 } 775 776 #[derive(Serialize, Deserialize)] 777 struct DeviceManagerState { 778 device_tree: DeviceTree, 779 device_id_cnt: Wrapping<usize>, 780 } 781 782 #[derive(Debug)] 783 pub struct PtyPair { 784 pub main: File, 785 pub path: PathBuf, 786 } 787 788 impl Clone for PtyPair { 789 fn clone(&self) -> Self { 790 PtyPair { 791 main: self.main.try_clone().unwrap(), 792 path: self.path.clone(), 793 } 794 } 795 } 796 797 #[derive(Clone)] 798 pub enum PciDeviceHandle { 799 Vfio(Arc<Mutex<VfioPciDevice>>), 800 Virtio(Arc<Mutex<VirtioPciDevice>>), 801 VfioUser(Arc<Mutex<VfioUserPciDevice>>), 802 } 803 804 #[derive(Clone)] 805 struct MetaVirtioDevice { 806 virtio_device: Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 807 iommu: bool, 808 id: String, 809 pci_segment: u16, 810 dma_handler: Option<Arc<dyn ExternalDmaMapping>>, 811 } 812 813 #[derive(Default)] 814 pub struct AcpiPlatformAddresses { 815 pub pm_timer_address: Option<GenericAddress>, 816 pub reset_reg_address: Option<GenericAddress>, 817 pub sleep_control_reg_address: Option<GenericAddress>, 818 pub sleep_status_reg_address: Option<GenericAddress>, 819 } 820 821 pub struct DeviceManager { 822 // The underlying hypervisor 823 hypervisor_type: HypervisorType, 824 825 // Manage address space related to devices 826 address_manager: Arc<AddressManager>, 827 828 // Console abstraction 829 console: Arc<Console>, 830 831 // console PTY 832 console_pty: Option<Arc<Mutex<PtyPair>>>, 833 834 // serial PTY 835 serial_pty: Option<Arc<Mutex<PtyPair>>>, 836 837 // Serial Manager 838 serial_manager: Option<Arc<SerialManager>>, 839 840 // pty foreground status, 841 console_resize_pipe: Option<Arc<File>>, 842 843 // To restore on exit. 844 original_termios_opt: Arc<Mutex<Option<termios>>>, 845 846 // Interrupt controller 847 #[cfg(target_arch = "x86_64")] 848 interrupt_controller: Option<Arc<Mutex<ioapic::Ioapic>>>, 849 #[cfg(target_arch = "aarch64")] 850 interrupt_controller: Option<Arc<Mutex<gic::Gic>>>, 851 852 // Things to be added to the commandline (e.g. aarch64 early console) 853 #[cfg(target_arch = "aarch64")] 854 cmdline_additions: Vec<String>, 855 856 // ACPI GED notification device 857 ged_notification_device: Option<Arc<Mutex<devices::AcpiGedDevice>>>, 858 859 // VM configuration 860 config: Arc<Mutex<VmConfig>>, 861 862 // Memory Manager 863 memory_manager: Arc<Mutex<MemoryManager>>, 864 865 // CPU Manager 866 cpu_manager: Arc<Mutex<CpuManager>>, 867 868 // The virtio devices on the system 869 virtio_devices: Vec<MetaVirtioDevice>, 870 871 // List of bus devices 872 // Let the DeviceManager keep strong references to the BusDevice devices. 873 // This allows the IO and MMIO buses to be provided with Weak references, 874 // which prevents cyclic dependencies. 875 bus_devices: Vec<Arc<Mutex<dyn BusDevice>>>, 876 877 // Counter to keep track of the consumed device IDs. 878 device_id_cnt: Wrapping<usize>, 879 880 pci_segments: Vec<PciSegment>, 881 882 #[cfg_attr(target_arch = "aarch64", allow(dead_code))] 883 // MSI Interrupt Manager 884 msi_interrupt_manager: Arc<dyn InterruptManager<GroupConfig = MsiIrqGroupConfig>>, 885 886 #[cfg_attr(feature = "mshv", allow(dead_code))] 887 // Legacy Interrupt Manager 888 legacy_interrupt_manager: Option<Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>>, 889 890 // Passthrough device handle 891 passthrough_device: Option<VfioDeviceFd>, 892 893 // VFIO container 894 // Only one container can be created, therefore it is stored as part of the 895 // DeviceManager to be reused. 896 vfio_container: Option<Arc<VfioContainer>>, 897 898 // Paravirtualized IOMMU 899 iommu_device: Option<Arc<Mutex<virtio_devices::Iommu>>>, 900 iommu_mapping: Option<Arc<IommuMapping>>, 901 902 // PCI information about devices attached to the paravirtualized IOMMU 903 // It contains the virtual IOMMU PCI BDF along with the list of PCI BDF 904 // representing the devices attached to the virtual IOMMU. This is useful 905 // information for filling the ACPI VIOT table. 906 iommu_attached_devices: Option<(PciBdf, Vec<PciBdf>)>, 907 908 // Tree of devices, representing the dependencies between devices. 909 // Useful for introspection, snapshot and restore. 910 device_tree: Arc<Mutex<DeviceTree>>, 911 912 // Exit event 913 exit_evt: EventFd, 914 reset_evt: EventFd, 915 916 #[cfg(target_arch = "aarch64")] 917 id_to_dev_info: HashMap<(DeviceType, String), MmioDeviceInfo>, 918 919 // seccomp action 920 seccomp_action: SeccompAction, 921 922 // List of guest NUMA nodes. 923 numa_nodes: NumaNodes, 924 925 // Possible handle to the virtio-balloon device 926 balloon: Option<Arc<Mutex<virtio_devices::Balloon>>>, 927 928 // Virtio Device activation EventFd to allow the VMM thread to trigger device 929 // activation and thus start the threads from the VMM thread 930 activate_evt: EventFd, 931 932 acpi_address: GuestAddress, 933 934 selected_segment: usize, 935 936 // Possible handle to the virtio-mem device 937 virtio_mem_devices: Vec<Arc<Mutex<virtio_devices::Mem>>>, 938 939 #[cfg(target_arch = "aarch64")] 940 // GPIO device for AArch64 941 gpio_device: Option<Arc<Mutex<devices::legacy::Gpio>>>, 942 943 // pvpanic device 944 pvpanic_device: Option<Arc<Mutex<devices::PvPanicDevice>>>, 945 946 // Flag to force setting the iommu on virtio devices 947 force_iommu: bool, 948 949 // io_uring availability if detected 950 io_uring_supported: Option<bool>, 951 952 // aio availability if detected 953 aio_supported: Option<bool>, 954 955 // List of unique identifiers provided at boot through the configuration. 956 boot_id_list: BTreeSet<String>, 957 958 // Start time of the VM 959 timestamp: Instant, 960 961 // Pending activations 962 pending_activations: Arc<Mutex<Vec<VirtioPciDeviceActivator>>>, 963 964 // Addresses for ACPI platform devices e.g. ACPI PM timer, sleep/reset registers 965 acpi_platform_addresses: AcpiPlatformAddresses, 966 967 snapshot: Option<Snapshot>, 968 } 969 970 impl DeviceManager { 971 #[allow(clippy::too_many_arguments)] 972 pub fn new( 973 #[cfg(target_arch = "x86_64")] io_bus: Arc<Bus>, 974 mmio_bus: Arc<Bus>, 975 hypervisor_type: HypervisorType, 976 vm: Arc<dyn hypervisor::Vm>, 977 config: Arc<Mutex<VmConfig>>, 978 memory_manager: Arc<Mutex<MemoryManager>>, 979 cpu_manager: Arc<Mutex<CpuManager>>, 980 exit_evt: EventFd, 981 reset_evt: EventFd, 982 seccomp_action: SeccompAction, 983 numa_nodes: NumaNodes, 984 activate_evt: &EventFd, 985 force_iommu: bool, 986 boot_id_list: BTreeSet<String>, 987 timestamp: Instant, 988 snapshot: Option<Snapshot>, 989 dynamic: bool, 990 ) -> DeviceManagerResult<Arc<Mutex<Self>>> { 991 trace_scoped!("DeviceManager::new"); 992 993 let (device_tree, device_id_cnt) = if let Some(snapshot) = snapshot.as_ref() { 994 let state: DeviceManagerState = snapshot.to_state().unwrap(); 995 ( 996 Arc::new(Mutex::new(state.device_tree.clone())), 997 state.device_id_cnt, 998 ) 999 } else { 1000 (Arc::new(Mutex::new(DeviceTree::new())), Wrapping(0)) 1001 }; 1002 1003 let num_pci_segments = 1004 if let Some(platform_config) = config.lock().unwrap().platform.as_ref() { 1005 platform_config.num_pci_segments 1006 } else { 1007 1 1008 }; 1009 1010 let start_of_device_area = memory_manager.lock().unwrap().start_of_device_area().0; 1011 let end_of_device_area = memory_manager.lock().unwrap().end_of_device_area().0; 1012 1013 // Start each PCI segment range on a 4GiB boundary 1014 let pci_segment_size = (end_of_device_area - start_of_device_area + 1) 1015 / ((4 << 30) * num_pci_segments as u64) 1016 * (4 << 30); 1017 1018 let mut pci_mmio_allocators = vec![]; 1019 for i in 0..num_pci_segments as u64 { 1020 let mmio_start = start_of_device_area + i * pci_segment_size; 1021 let allocator = Arc::new(Mutex::new( 1022 AddressAllocator::new(GuestAddress(mmio_start), pci_segment_size).unwrap(), 1023 )); 1024 pci_mmio_allocators.push(allocator) 1025 } 1026 1027 let address_manager = Arc::new(AddressManager { 1028 allocator: memory_manager.lock().unwrap().allocator(), 1029 #[cfg(target_arch = "x86_64")] 1030 io_bus, 1031 mmio_bus, 1032 vm: vm.clone(), 1033 device_tree: Arc::clone(&device_tree), 1034 pci_mmio_allocators, 1035 }); 1036 1037 // First we create the MSI interrupt manager, the legacy one is created 1038 // later, after the IOAPIC device creation. 1039 // The reason we create the MSI one first is because the IOAPIC needs it, 1040 // and then the legacy interrupt manager needs an IOAPIC. So we're 1041 // handling a linear dependency chain: 1042 // msi_interrupt_manager <- IOAPIC <- legacy_interrupt_manager. 1043 let msi_interrupt_manager: Arc<dyn InterruptManager<GroupConfig = MsiIrqGroupConfig>> = 1044 Arc::new(MsiInterruptManager::new( 1045 Arc::clone(&address_manager.allocator), 1046 vm, 1047 )); 1048 1049 let acpi_address = address_manager 1050 .allocator 1051 .lock() 1052 .unwrap() 1053 .allocate_platform_mmio_addresses(None, DEVICE_MANAGER_ACPI_SIZE as u64, None) 1054 .ok_or(DeviceManagerError::AllocateIoPort)?; 1055 1056 let mut pci_irq_slots = [0; 32]; 1057 PciSegment::reserve_legacy_interrupts_for_pci_devices( 1058 &address_manager, 1059 &mut pci_irq_slots, 1060 )?; 1061 1062 let mut pci_segments = vec![PciSegment::new_default_segment( 1063 &address_manager, 1064 Arc::clone(&address_manager.pci_mmio_allocators[0]), 1065 &pci_irq_slots, 1066 )?]; 1067 1068 for i in 1..num_pci_segments as usize { 1069 pci_segments.push(PciSegment::new( 1070 i as u16, 1071 numa_node_id_from_pci_segment_id(&numa_nodes, i as u16), 1072 &address_manager, 1073 Arc::clone(&address_manager.pci_mmio_allocators[i]), 1074 &pci_irq_slots, 1075 )?); 1076 } 1077 1078 if dynamic { 1079 let acpi_address = address_manager 1080 .allocator 1081 .lock() 1082 .unwrap() 1083 .allocate_platform_mmio_addresses(None, CPU_MANAGER_ACPI_SIZE as u64, None) 1084 .ok_or(DeviceManagerError::AllocateMmioAddress)?; 1085 1086 address_manager 1087 .mmio_bus 1088 .insert( 1089 cpu_manager.clone(), 1090 acpi_address.0, 1091 CPU_MANAGER_ACPI_SIZE as u64, 1092 ) 1093 .map_err(DeviceManagerError::BusError)?; 1094 1095 cpu_manager.lock().unwrap().set_acpi_address(acpi_address); 1096 } 1097 1098 let device_manager = DeviceManager { 1099 hypervisor_type, 1100 address_manager: Arc::clone(&address_manager), 1101 console: Arc::new(Console::default()), 1102 interrupt_controller: None, 1103 #[cfg(target_arch = "aarch64")] 1104 cmdline_additions: Vec::new(), 1105 ged_notification_device: None, 1106 config, 1107 memory_manager, 1108 cpu_manager, 1109 virtio_devices: Vec::new(), 1110 bus_devices: Vec::new(), 1111 device_id_cnt, 1112 msi_interrupt_manager, 1113 legacy_interrupt_manager: None, 1114 passthrough_device: None, 1115 vfio_container: None, 1116 iommu_device: None, 1117 iommu_mapping: None, 1118 iommu_attached_devices: None, 1119 pci_segments, 1120 device_tree, 1121 exit_evt, 1122 reset_evt, 1123 #[cfg(target_arch = "aarch64")] 1124 id_to_dev_info: HashMap::new(), 1125 seccomp_action, 1126 numa_nodes, 1127 balloon: None, 1128 activate_evt: activate_evt 1129 .try_clone() 1130 .map_err(DeviceManagerError::EventFd)?, 1131 acpi_address, 1132 selected_segment: 0, 1133 serial_pty: None, 1134 serial_manager: None, 1135 console_pty: None, 1136 console_resize_pipe: None, 1137 original_termios_opt: Arc::new(Mutex::new(None)), 1138 virtio_mem_devices: Vec::new(), 1139 #[cfg(target_arch = "aarch64")] 1140 gpio_device: None, 1141 pvpanic_device: None, 1142 force_iommu, 1143 io_uring_supported: None, 1144 aio_supported: None, 1145 boot_id_list, 1146 timestamp, 1147 pending_activations: Arc::new(Mutex::new(Vec::default())), 1148 acpi_platform_addresses: AcpiPlatformAddresses::default(), 1149 snapshot, 1150 }; 1151 1152 let device_manager = Arc::new(Mutex::new(device_manager)); 1153 1154 address_manager 1155 .mmio_bus 1156 .insert( 1157 Arc::clone(&device_manager) as Arc<Mutex<dyn BusDevice>>, 1158 acpi_address.0, 1159 DEVICE_MANAGER_ACPI_SIZE as u64, 1160 ) 1161 .map_err(DeviceManagerError::BusError)?; 1162 1163 Ok(device_manager) 1164 } 1165 1166 pub fn serial_pty(&self) -> Option<PtyPair> { 1167 self.serial_pty 1168 .as_ref() 1169 .map(|pty| pty.lock().unwrap().clone()) 1170 } 1171 1172 pub fn console_pty(&self) -> Option<PtyPair> { 1173 self.console_pty 1174 .as_ref() 1175 .map(|pty| pty.lock().unwrap().clone()) 1176 } 1177 1178 pub fn console_resize_pipe(&self) -> Option<Arc<File>> { 1179 self.console_resize_pipe.as_ref().map(Arc::clone) 1180 } 1181 1182 pub fn create_devices( 1183 &mut self, 1184 serial_pty: Option<PtyPair>, 1185 console_pty: Option<PtyPair>, 1186 console_resize_pipe: Option<File>, 1187 original_termios_opt: Arc<Mutex<Option<termios>>>, 1188 ) -> DeviceManagerResult<()> { 1189 trace_scoped!("create_devices"); 1190 1191 let mut virtio_devices: Vec<MetaVirtioDevice> = Vec::new(); 1192 1193 let interrupt_controller = self.add_interrupt_controller()?; 1194 1195 self.cpu_manager 1196 .lock() 1197 .unwrap() 1198 .set_interrupt_controller(interrupt_controller.clone()); 1199 1200 // Now we can create the legacy interrupt manager, which needs the freshly 1201 // formed IOAPIC device. 1202 let legacy_interrupt_manager: Arc< 1203 dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>, 1204 > = Arc::new(LegacyUserspaceInterruptManager::new(Arc::clone( 1205 &interrupt_controller, 1206 ))); 1207 1208 { 1209 if let Some(acpi_address) = self.memory_manager.lock().unwrap().acpi_address() { 1210 self.address_manager 1211 .mmio_bus 1212 .insert( 1213 Arc::clone(&self.memory_manager) as Arc<Mutex<dyn BusDevice>>, 1214 acpi_address.0, 1215 MEMORY_MANAGER_ACPI_SIZE as u64, 1216 ) 1217 .map_err(DeviceManagerError::BusError)?; 1218 } 1219 } 1220 1221 #[cfg(target_arch = "x86_64")] 1222 self.add_legacy_devices( 1223 self.reset_evt 1224 .try_clone() 1225 .map_err(DeviceManagerError::EventFd)?, 1226 )?; 1227 1228 #[cfg(target_arch = "aarch64")] 1229 self.add_legacy_devices(&legacy_interrupt_manager)?; 1230 1231 { 1232 self.ged_notification_device = self.add_acpi_devices( 1233 &legacy_interrupt_manager, 1234 self.reset_evt 1235 .try_clone() 1236 .map_err(DeviceManagerError::EventFd)?, 1237 self.exit_evt 1238 .try_clone() 1239 .map_err(DeviceManagerError::EventFd)?, 1240 )?; 1241 } 1242 1243 self.original_termios_opt = original_termios_opt; 1244 1245 self.console = self.add_console_device( 1246 &legacy_interrupt_manager, 1247 &mut virtio_devices, 1248 serial_pty, 1249 console_pty, 1250 console_resize_pipe, 1251 )?; 1252 1253 if let Some(tpm) = self.config.clone().lock().unwrap().tpm.as_ref() { 1254 let tpm_dev = self.add_tpm_device(tpm.socket.clone())?; 1255 self.bus_devices 1256 .push(Arc::clone(&tpm_dev) as Arc<Mutex<dyn BusDevice>>) 1257 } 1258 self.legacy_interrupt_manager = Some(legacy_interrupt_manager); 1259 1260 virtio_devices.append(&mut self.make_virtio_devices()?); 1261 1262 self.add_pci_devices(virtio_devices.clone())?; 1263 1264 self.virtio_devices = virtio_devices; 1265 1266 if self.config.clone().lock().unwrap().pvpanic { 1267 self.pvpanic_device = self.add_pvpanic_device()?; 1268 } 1269 1270 Ok(()) 1271 } 1272 1273 fn state(&self) -> DeviceManagerState { 1274 DeviceManagerState { 1275 device_tree: self.device_tree.lock().unwrap().clone(), 1276 device_id_cnt: self.device_id_cnt, 1277 } 1278 } 1279 1280 fn get_msi_iova_space(&mut self) -> (u64, u64) { 1281 #[cfg(target_arch = "aarch64")] 1282 { 1283 let vcpus = self.config.lock().unwrap().cpus.boot_vcpus; 1284 let vgic_config = gic::Gic::create_default_config(vcpus.into()); 1285 ( 1286 vgic_config.msi_addr, 1287 vgic_config.msi_addr + vgic_config.msi_size - 1, 1288 ) 1289 } 1290 #[cfg(target_arch = "x86_64")] 1291 (0xfee0_0000, 0xfeef_ffff) 1292 } 1293 1294 #[cfg(target_arch = "aarch64")] 1295 /// Gets the information of the devices registered up to some point in time. 1296 pub fn get_device_info(&self) -> &HashMap<(DeviceType, String), MmioDeviceInfo> { 1297 &self.id_to_dev_info 1298 } 1299 1300 #[allow(unused_variables)] 1301 fn add_pci_devices( 1302 &mut self, 1303 virtio_devices: Vec<MetaVirtioDevice>, 1304 ) -> DeviceManagerResult<()> { 1305 let iommu_id = String::from(IOMMU_DEVICE_NAME); 1306 1307 let iommu_device = if self.config.lock().unwrap().iommu { 1308 let (device, mapping) = virtio_devices::Iommu::new( 1309 iommu_id.clone(), 1310 self.seccomp_action.clone(), 1311 self.exit_evt 1312 .try_clone() 1313 .map_err(DeviceManagerError::EventFd)?, 1314 self.get_msi_iova_space(), 1315 versioned_state_from_id(self.snapshot.as_ref(), iommu_id.as_str()) 1316 .map_err(DeviceManagerError::RestoreGetState)?, 1317 ) 1318 .map_err(DeviceManagerError::CreateVirtioIommu)?; 1319 let device = Arc::new(Mutex::new(device)); 1320 self.iommu_device = Some(Arc::clone(&device)); 1321 self.iommu_mapping = Some(mapping); 1322 1323 // Fill the device tree with a new node. In case of restore, we 1324 // know there is nothing to do, so we can simply override the 1325 // existing entry. 1326 self.device_tree 1327 .lock() 1328 .unwrap() 1329 .insert(iommu_id.clone(), device_node!(iommu_id, device)); 1330 1331 Some(device) 1332 } else { 1333 None 1334 }; 1335 1336 let mut iommu_attached_devices = Vec::new(); 1337 { 1338 for handle in virtio_devices { 1339 let mapping: Option<Arc<IommuMapping>> = if handle.iommu { 1340 self.iommu_mapping.clone() 1341 } else { 1342 None 1343 }; 1344 1345 let dev_id = self.add_virtio_pci_device( 1346 handle.virtio_device, 1347 &mapping, 1348 handle.id, 1349 handle.pci_segment, 1350 handle.dma_handler, 1351 )?; 1352 1353 if handle.iommu { 1354 iommu_attached_devices.push(dev_id); 1355 } 1356 } 1357 1358 let mut vfio_iommu_device_ids = self.add_vfio_devices()?; 1359 iommu_attached_devices.append(&mut vfio_iommu_device_ids); 1360 1361 let mut vfio_user_iommu_device_ids = self.add_user_devices()?; 1362 iommu_attached_devices.append(&mut vfio_user_iommu_device_ids); 1363 1364 // Add all devices from forced iommu segments 1365 if let Some(platform_config) = self.config.lock().unwrap().platform.as_ref() { 1366 if let Some(iommu_segments) = platform_config.iommu_segments.as_ref() { 1367 for segment in iommu_segments { 1368 for device in 0..32 { 1369 let bdf = PciBdf::new(*segment, 0, device, 0); 1370 if !iommu_attached_devices.contains(&bdf) { 1371 iommu_attached_devices.push(bdf); 1372 } 1373 } 1374 } 1375 } 1376 } 1377 1378 if let Some(iommu_device) = iommu_device { 1379 let dev_id = self.add_virtio_pci_device(iommu_device, &None, iommu_id, 0, None)?; 1380 self.iommu_attached_devices = Some((dev_id, iommu_attached_devices)); 1381 } 1382 } 1383 1384 for segment in &self.pci_segments { 1385 #[cfg(target_arch = "x86_64")] 1386 if let Some(pci_config_io) = segment.pci_config_io.as_ref() { 1387 self.bus_devices 1388 .push(Arc::clone(pci_config_io) as Arc<Mutex<dyn BusDevice>>); 1389 } 1390 1391 self.bus_devices 1392 .push(Arc::clone(&segment.pci_config_mmio) as Arc<Mutex<dyn BusDevice>>); 1393 } 1394 1395 Ok(()) 1396 } 1397 1398 #[cfg(target_arch = "aarch64")] 1399 fn add_interrupt_controller( 1400 &mut self, 1401 ) -> DeviceManagerResult<Arc<Mutex<dyn InterruptController>>> { 1402 let interrupt_controller: Arc<Mutex<gic::Gic>> = Arc::new(Mutex::new( 1403 gic::Gic::new( 1404 self.config.lock().unwrap().cpus.boot_vcpus, 1405 Arc::clone(&self.msi_interrupt_manager), 1406 self.address_manager.vm.clone(), 1407 ) 1408 .map_err(DeviceManagerError::CreateInterruptController)?, 1409 )); 1410 1411 self.interrupt_controller = Some(interrupt_controller.clone()); 1412 1413 // Restore the vGic if this is in the process of restoration 1414 let id = String::from(gic::GIC_SNAPSHOT_ID); 1415 if let Some(vgic_snapshot) = snapshot_from_id(self.snapshot.as_ref(), &id) { 1416 // PMU support is optional. Nothing should be impacted if the PMU initialization failed. 1417 if self 1418 .cpu_manager 1419 .lock() 1420 .unwrap() 1421 .init_pmu(arch::aarch64::fdt::AARCH64_PMU_IRQ + 16) 1422 .is_err() 1423 { 1424 info!("Failed to initialize PMU"); 1425 } 1426 1427 let vgic_state = vgic_snapshot 1428 .to_state() 1429 .map_err(DeviceManagerError::RestoreGetState)?; 1430 let saved_vcpu_states = self.cpu_manager.lock().unwrap().get_saved_states(); 1431 interrupt_controller 1432 .lock() 1433 .unwrap() 1434 .restore_vgic(vgic_state, &saved_vcpu_states) 1435 .unwrap(); 1436 } 1437 1438 self.device_tree 1439 .lock() 1440 .unwrap() 1441 .insert(id.clone(), device_node!(id, interrupt_controller)); 1442 1443 Ok(interrupt_controller) 1444 } 1445 1446 #[cfg(target_arch = "aarch64")] 1447 pub fn get_interrupt_controller(&mut self) -> Option<&Arc<Mutex<gic::Gic>>> { 1448 self.interrupt_controller.as_ref() 1449 } 1450 1451 #[cfg(target_arch = "x86_64")] 1452 fn add_interrupt_controller( 1453 &mut self, 1454 ) -> DeviceManagerResult<Arc<Mutex<dyn InterruptController>>> { 1455 let id = String::from(IOAPIC_DEVICE_NAME); 1456 1457 // Create IOAPIC 1458 let interrupt_controller = Arc::new(Mutex::new( 1459 ioapic::Ioapic::new( 1460 id.clone(), 1461 APIC_START, 1462 Arc::clone(&self.msi_interrupt_manager), 1463 versioned_state_from_id(self.snapshot.as_ref(), id.as_str()) 1464 .map_err(DeviceManagerError::RestoreGetState)?, 1465 ) 1466 .map_err(DeviceManagerError::CreateInterruptController)?, 1467 )); 1468 1469 self.interrupt_controller = Some(interrupt_controller.clone()); 1470 1471 self.address_manager 1472 .mmio_bus 1473 .insert(interrupt_controller.clone(), IOAPIC_START.0, IOAPIC_SIZE) 1474 .map_err(DeviceManagerError::BusError)?; 1475 1476 self.bus_devices 1477 .push(Arc::clone(&interrupt_controller) as Arc<Mutex<dyn BusDevice>>); 1478 1479 // Fill the device tree with a new node. In case of restore, we 1480 // know there is nothing to do, so we can simply override the 1481 // existing entry. 1482 self.device_tree 1483 .lock() 1484 .unwrap() 1485 .insert(id.clone(), device_node!(id, interrupt_controller)); 1486 1487 Ok(interrupt_controller) 1488 } 1489 1490 fn add_acpi_devices( 1491 &mut self, 1492 interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>, 1493 reset_evt: EventFd, 1494 exit_evt: EventFd, 1495 ) -> DeviceManagerResult<Option<Arc<Mutex<devices::AcpiGedDevice>>>> { 1496 let vcpus_kill_signalled = self 1497 .cpu_manager 1498 .lock() 1499 .unwrap() 1500 .vcpus_kill_signalled() 1501 .clone(); 1502 let shutdown_device = Arc::new(Mutex::new(devices::AcpiShutdownDevice::new( 1503 exit_evt, 1504 reset_evt, 1505 vcpus_kill_signalled, 1506 ))); 1507 1508 self.bus_devices 1509 .push(Arc::clone(&shutdown_device) as Arc<Mutex<dyn BusDevice>>); 1510 1511 #[cfg(target_arch = "x86_64")] 1512 { 1513 let shutdown_pio_address: u16 = 0x600; 1514 1515 self.address_manager 1516 .allocator 1517 .lock() 1518 .unwrap() 1519 .allocate_io_addresses(Some(GuestAddress(shutdown_pio_address.into())), 0x8, None) 1520 .ok_or(DeviceManagerError::AllocateIoPort)?; 1521 1522 self.address_manager 1523 .io_bus 1524 .insert(shutdown_device, shutdown_pio_address.into(), 0x4) 1525 .map_err(DeviceManagerError::BusError)?; 1526 1527 self.acpi_platform_addresses.sleep_control_reg_address = 1528 Some(GenericAddress::io_port_address::<u8>(shutdown_pio_address)); 1529 self.acpi_platform_addresses.sleep_status_reg_address = 1530 Some(GenericAddress::io_port_address::<u8>(shutdown_pio_address)); 1531 self.acpi_platform_addresses.reset_reg_address = 1532 Some(GenericAddress::io_port_address::<u8>(shutdown_pio_address)); 1533 } 1534 1535 let ged_irq = self 1536 .address_manager 1537 .allocator 1538 .lock() 1539 .unwrap() 1540 .allocate_irq() 1541 .unwrap(); 1542 let interrupt_group = interrupt_manager 1543 .create_group(LegacyIrqGroupConfig { 1544 irq: ged_irq as InterruptIndex, 1545 }) 1546 .map_err(DeviceManagerError::CreateInterruptGroup)?; 1547 let ged_address = self 1548 .address_manager 1549 .allocator 1550 .lock() 1551 .unwrap() 1552 .allocate_platform_mmio_addresses( 1553 None, 1554 devices::acpi::GED_DEVICE_ACPI_SIZE as u64, 1555 None, 1556 ) 1557 .ok_or(DeviceManagerError::AllocateMmioAddress)?; 1558 let ged_device = Arc::new(Mutex::new(devices::AcpiGedDevice::new( 1559 interrupt_group, 1560 ged_irq, 1561 ged_address, 1562 ))); 1563 self.address_manager 1564 .mmio_bus 1565 .insert( 1566 ged_device.clone(), 1567 ged_address.0, 1568 devices::acpi::GED_DEVICE_ACPI_SIZE as u64, 1569 ) 1570 .map_err(DeviceManagerError::BusError)?; 1571 self.bus_devices 1572 .push(Arc::clone(&ged_device) as Arc<Mutex<dyn BusDevice>>); 1573 1574 let pm_timer_device = Arc::new(Mutex::new(devices::AcpiPmTimerDevice::new())); 1575 1576 self.bus_devices 1577 .push(Arc::clone(&pm_timer_device) as Arc<Mutex<dyn BusDevice>>); 1578 1579 #[cfg(target_arch = "x86_64")] 1580 { 1581 let pm_timer_pio_address: u16 = 0x608; 1582 1583 self.address_manager 1584 .allocator 1585 .lock() 1586 .unwrap() 1587 .allocate_io_addresses(Some(GuestAddress(pm_timer_pio_address.into())), 0x4, None) 1588 .ok_or(DeviceManagerError::AllocateIoPort)?; 1589 1590 self.address_manager 1591 .io_bus 1592 .insert(pm_timer_device, pm_timer_pio_address.into(), 0x4) 1593 .map_err(DeviceManagerError::BusError)?; 1594 1595 self.acpi_platform_addresses.pm_timer_address = 1596 Some(GenericAddress::io_port_address::<u32>(pm_timer_pio_address)); 1597 } 1598 1599 Ok(Some(ged_device)) 1600 } 1601 1602 #[cfg(target_arch = "x86_64")] 1603 fn add_legacy_devices(&mut self, reset_evt: EventFd) -> DeviceManagerResult<()> { 1604 let vcpus_kill_signalled = self 1605 .cpu_manager 1606 .lock() 1607 .unwrap() 1608 .vcpus_kill_signalled() 1609 .clone(); 1610 // Add a shutdown device (i8042) 1611 let i8042 = Arc::new(Mutex::new(devices::legacy::I8042Device::new( 1612 reset_evt.try_clone().unwrap(), 1613 vcpus_kill_signalled.clone(), 1614 ))); 1615 1616 self.bus_devices 1617 .push(Arc::clone(&i8042) as Arc<Mutex<dyn BusDevice>>); 1618 1619 self.address_manager 1620 .io_bus 1621 .insert(i8042, 0x61, 0x4) 1622 .map_err(DeviceManagerError::BusError)?; 1623 { 1624 // Add a CMOS emulated device 1625 let mem_size = self 1626 .memory_manager 1627 .lock() 1628 .unwrap() 1629 .guest_memory() 1630 .memory() 1631 .last_addr() 1632 .0 1633 + 1; 1634 let mem_below_4g = std::cmp::min(arch::layout::MEM_32BIT_RESERVED_START.0, mem_size); 1635 let mem_above_4g = mem_size.saturating_sub(arch::layout::RAM_64BIT_START.0); 1636 1637 let cmos = Arc::new(Mutex::new(devices::legacy::Cmos::new( 1638 mem_below_4g, 1639 mem_above_4g, 1640 reset_evt, 1641 Some(vcpus_kill_signalled), 1642 ))); 1643 1644 self.bus_devices 1645 .push(Arc::clone(&cmos) as Arc<Mutex<dyn BusDevice>>); 1646 1647 self.address_manager 1648 .io_bus 1649 .insert(cmos, 0x70, 0x2) 1650 .map_err(DeviceManagerError::BusError)?; 1651 1652 let fwdebug = Arc::new(Mutex::new(devices::legacy::FwDebugDevice::new())); 1653 1654 self.bus_devices 1655 .push(Arc::clone(&fwdebug) as Arc<Mutex<dyn BusDevice>>); 1656 1657 self.address_manager 1658 .io_bus 1659 .insert(fwdebug, 0x402, 0x1) 1660 .map_err(DeviceManagerError::BusError)?; 1661 } 1662 1663 // 0x80 debug port 1664 let debug_port = Arc::new(Mutex::new(devices::legacy::DebugPort::new(self.timestamp))); 1665 self.bus_devices 1666 .push(Arc::clone(&debug_port) as Arc<Mutex<dyn BusDevice>>); 1667 self.address_manager 1668 .io_bus 1669 .insert(debug_port, 0x80, 0x1) 1670 .map_err(DeviceManagerError::BusError)?; 1671 1672 Ok(()) 1673 } 1674 1675 #[cfg(target_arch = "aarch64")] 1676 fn add_legacy_devices( 1677 &mut self, 1678 interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>, 1679 ) -> DeviceManagerResult<()> { 1680 // Add a RTC device 1681 let rtc_irq = self 1682 .address_manager 1683 .allocator 1684 .lock() 1685 .unwrap() 1686 .allocate_irq() 1687 .unwrap(); 1688 1689 let interrupt_group = interrupt_manager 1690 .create_group(LegacyIrqGroupConfig { 1691 irq: rtc_irq as InterruptIndex, 1692 }) 1693 .map_err(DeviceManagerError::CreateInterruptGroup)?; 1694 1695 let rtc_device = Arc::new(Mutex::new(devices::legacy::Rtc::new(interrupt_group))); 1696 1697 self.bus_devices 1698 .push(Arc::clone(&rtc_device) as Arc<Mutex<dyn BusDevice>>); 1699 1700 let addr = arch::layout::LEGACY_RTC_MAPPED_IO_START; 1701 1702 self.address_manager 1703 .mmio_bus 1704 .insert(rtc_device, addr.0, MMIO_LEN) 1705 .map_err(DeviceManagerError::BusError)?; 1706 1707 self.id_to_dev_info.insert( 1708 (DeviceType::Rtc, "rtc".to_string()), 1709 MmioDeviceInfo { 1710 addr: addr.0, 1711 len: MMIO_LEN, 1712 irq: rtc_irq, 1713 }, 1714 ); 1715 1716 // Add a GPIO device 1717 let id = String::from(GPIO_DEVICE_NAME); 1718 let gpio_irq = self 1719 .address_manager 1720 .allocator 1721 .lock() 1722 .unwrap() 1723 .allocate_irq() 1724 .unwrap(); 1725 1726 let interrupt_group = interrupt_manager 1727 .create_group(LegacyIrqGroupConfig { 1728 irq: gpio_irq as InterruptIndex, 1729 }) 1730 .map_err(DeviceManagerError::CreateInterruptGroup)?; 1731 1732 let gpio_device = Arc::new(Mutex::new(devices::legacy::Gpio::new( 1733 id.clone(), 1734 interrupt_group, 1735 versioned_state_from_id(self.snapshot.as_ref(), id.as_str()) 1736 .map_err(DeviceManagerError::RestoreGetState)?, 1737 ))); 1738 1739 self.bus_devices 1740 .push(Arc::clone(&gpio_device) as Arc<Mutex<dyn BusDevice>>); 1741 1742 let addr = arch::layout::LEGACY_GPIO_MAPPED_IO_START; 1743 1744 self.address_manager 1745 .mmio_bus 1746 .insert(gpio_device.clone(), addr.0, MMIO_LEN) 1747 .map_err(DeviceManagerError::BusError)?; 1748 1749 self.gpio_device = Some(gpio_device.clone()); 1750 1751 self.id_to_dev_info.insert( 1752 (DeviceType::Gpio, "gpio".to_string()), 1753 MmioDeviceInfo { 1754 addr: addr.0, 1755 len: MMIO_LEN, 1756 irq: gpio_irq, 1757 }, 1758 ); 1759 1760 self.device_tree 1761 .lock() 1762 .unwrap() 1763 .insert(id.clone(), device_node!(id, gpio_device)); 1764 1765 Ok(()) 1766 } 1767 1768 #[cfg(target_arch = "x86_64")] 1769 fn add_serial_device( 1770 &mut self, 1771 interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>, 1772 serial_writer: Option<Box<dyn io::Write + Send>>, 1773 ) -> DeviceManagerResult<Arc<Mutex<Serial>>> { 1774 // Serial is tied to IRQ #4 1775 let serial_irq = 4; 1776 1777 let id = String::from(SERIAL_DEVICE_NAME); 1778 1779 let interrupt_group = interrupt_manager 1780 .create_group(LegacyIrqGroupConfig { 1781 irq: serial_irq as InterruptIndex, 1782 }) 1783 .map_err(DeviceManagerError::CreateInterruptGroup)?; 1784 1785 let serial = Arc::new(Mutex::new(Serial::new( 1786 id.clone(), 1787 interrupt_group, 1788 serial_writer, 1789 versioned_state_from_id(self.snapshot.as_ref(), id.as_str()) 1790 .map_err(DeviceManagerError::RestoreGetState)?, 1791 ))); 1792 1793 self.bus_devices 1794 .push(Arc::clone(&serial) as Arc<Mutex<dyn BusDevice>>); 1795 1796 self.address_manager 1797 .allocator 1798 .lock() 1799 .unwrap() 1800 .allocate_io_addresses(Some(GuestAddress(0x3f8)), 0x8, None) 1801 .ok_or(DeviceManagerError::AllocateIoPort)?; 1802 1803 self.address_manager 1804 .io_bus 1805 .insert(serial.clone(), 0x3f8, 0x8) 1806 .map_err(DeviceManagerError::BusError)?; 1807 1808 // Fill the device tree with a new node. In case of restore, we 1809 // know there is nothing to do, so we can simply override the 1810 // existing entry. 1811 self.device_tree 1812 .lock() 1813 .unwrap() 1814 .insert(id.clone(), device_node!(id, serial)); 1815 1816 Ok(serial) 1817 } 1818 1819 #[cfg(target_arch = "aarch64")] 1820 fn add_serial_device( 1821 &mut self, 1822 interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>, 1823 serial_writer: Option<Box<dyn io::Write + Send>>, 1824 ) -> DeviceManagerResult<Arc<Mutex<Pl011>>> { 1825 let id = String::from(SERIAL_DEVICE_NAME); 1826 1827 let serial_irq = self 1828 .address_manager 1829 .allocator 1830 .lock() 1831 .unwrap() 1832 .allocate_irq() 1833 .unwrap(); 1834 1835 let interrupt_group = interrupt_manager 1836 .create_group(LegacyIrqGroupConfig { 1837 irq: serial_irq as InterruptIndex, 1838 }) 1839 .map_err(DeviceManagerError::CreateInterruptGroup)?; 1840 1841 let serial = Arc::new(Mutex::new(devices::legacy::Pl011::new( 1842 id.clone(), 1843 interrupt_group, 1844 serial_writer, 1845 self.timestamp, 1846 versioned_state_from_id(self.snapshot.as_ref(), id.as_str()) 1847 .map_err(DeviceManagerError::RestoreGetState)?, 1848 ))); 1849 1850 self.bus_devices 1851 .push(Arc::clone(&serial) as Arc<Mutex<dyn BusDevice>>); 1852 1853 let addr = arch::layout::LEGACY_SERIAL_MAPPED_IO_START; 1854 1855 self.address_manager 1856 .mmio_bus 1857 .insert(serial.clone(), addr.0, MMIO_LEN) 1858 .map_err(DeviceManagerError::BusError)?; 1859 1860 self.id_to_dev_info.insert( 1861 (DeviceType::Serial, DeviceType::Serial.to_string()), 1862 MmioDeviceInfo { 1863 addr: addr.0, 1864 len: MMIO_LEN, 1865 irq: serial_irq, 1866 }, 1867 ); 1868 1869 self.cmdline_additions 1870 .push(format!("earlycon=pl011,mmio,0x{:08x}", addr.0)); 1871 1872 // Fill the device tree with a new node. In case of restore, we 1873 // know there is nothing to do, so we can simply override the 1874 // existing entry. 1875 self.device_tree 1876 .lock() 1877 .unwrap() 1878 .insert(id.clone(), device_node!(id, serial)); 1879 1880 Ok(serial) 1881 } 1882 1883 fn modify_mode<F: FnOnce(&mut termios)>( 1884 &mut self, 1885 fd: RawFd, 1886 f: F, 1887 ) -> vmm_sys_util::errno::Result<()> { 1888 // SAFETY: safe because we check the return value of isatty. 1889 if unsafe { isatty(fd) } != 1 { 1890 return Ok(()); 1891 } 1892 1893 // SAFETY: The following pair are safe because termios gets totally overwritten by tcgetattr 1894 // and we check the return result. 1895 let mut termios: termios = unsafe { zeroed() }; 1896 // SAFETY: see above 1897 let ret = unsafe { tcgetattr(fd, &mut termios as *mut _) }; 1898 if ret < 0 { 1899 return vmm_sys_util::errno::errno_result(); 1900 } 1901 let mut original_termios_opt = self.original_termios_opt.lock().unwrap(); 1902 if original_termios_opt.is_none() { 1903 *original_termios_opt = Some(termios); 1904 } 1905 f(&mut termios); 1906 // SAFETY: Safe because the syscall will only read the extent of termios and we check 1907 // the return result. 1908 let ret = unsafe { tcsetattr(fd, TCSANOW, &termios as *const _) }; 1909 if ret < 0 { 1910 return vmm_sys_util::errno::errno_result(); 1911 } 1912 1913 Ok(()) 1914 } 1915 1916 fn set_raw_mode(&mut self, f: &dyn AsRawFd) -> vmm_sys_util::errno::Result<()> { 1917 // SAFETY: FFI call. Variable t is guaranteed to be a valid termios from modify_mode. 1918 self.modify_mode(f.as_raw_fd(), |t| unsafe { cfmakeraw(t) }) 1919 } 1920 1921 fn listen_for_sigwinch_on_tty(&mut self, pty_sub: File) -> std::io::Result<()> { 1922 let seccomp_filter = get_seccomp_filter( 1923 &self.seccomp_action, 1924 Thread::PtyForeground, 1925 self.hypervisor_type, 1926 ) 1927 .unwrap(); 1928 1929 self.console_resize_pipe = 1930 Some(Arc::new(start_sigwinch_listener(seccomp_filter, pty_sub)?)); 1931 1932 Ok(()) 1933 } 1934 1935 fn add_virtio_console_device( 1936 &mut self, 1937 virtio_devices: &mut Vec<MetaVirtioDevice>, 1938 console_pty: Option<PtyPair>, 1939 resize_pipe: Option<File>, 1940 ) -> DeviceManagerResult<Option<Arc<virtio_devices::ConsoleResizer>>> { 1941 let console_config = self.config.lock().unwrap().console.clone(); 1942 let endpoint = match console_config.mode { 1943 ConsoleOutputMode::File => { 1944 let file = File::create(console_config.file.as_ref().unwrap()) 1945 .map_err(DeviceManagerError::ConsoleOutputFileOpen)?; 1946 Endpoint::File(file) 1947 } 1948 ConsoleOutputMode::Pty => { 1949 if let Some(pty) = console_pty { 1950 self.config.lock().unwrap().console.file = Some(pty.path.clone()); 1951 let file = pty.main.try_clone().unwrap(); 1952 self.console_pty = Some(Arc::new(Mutex::new(pty))); 1953 self.console_resize_pipe = resize_pipe.map(Arc::new); 1954 Endpoint::PtyPair(file.try_clone().unwrap(), file) 1955 } else { 1956 let (main, sub, path) = 1957 create_pty().map_err(DeviceManagerError::ConsolePtyOpen)?; 1958 self.set_raw_mode(&sub) 1959 .map_err(DeviceManagerError::SetPtyRaw)?; 1960 self.config.lock().unwrap().console.file = Some(path.clone()); 1961 let file = main.try_clone().unwrap(); 1962 assert!(resize_pipe.is_none()); 1963 self.listen_for_sigwinch_on_tty(sub).unwrap(); 1964 self.console_pty = Some(Arc::new(Mutex::new(PtyPair { main, path }))); 1965 Endpoint::PtyPair(file.try_clone().unwrap(), file) 1966 } 1967 } 1968 ConsoleOutputMode::Tty => { 1969 // Duplicating the file descriptors like this is needed as otherwise 1970 // they will be closed on a reboot and the numbers reused 1971 1972 // SAFETY: FFI call to dup. Trivially safe. 1973 let stdout = unsafe { libc::dup(libc::STDOUT_FILENO) }; 1974 if stdout == -1 { 1975 return vmm_sys_util::errno::errno_result().map_err(DeviceManagerError::DupFd); 1976 } 1977 // SAFETY: stdout is valid and owned solely by us. 1978 let stdout = unsafe { File::from_raw_fd(stdout) }; 1979 1980 // Make sure stdout is in raw mode, if it's a terminal. 1981 let _ = self.set_raw_mode(&stdout); 1982 1983 // SAFETY: FFI call. Trivially safe. 1984 if unsafe { libc::isatty(libc::STDOUT_FILENO) } == 1 { 1985 self.listen_for_sigwinch_on_tty(stdout.try_clone().unwrap()) 1986 .unwrap(); 1987 } 1988 1989 // If an interactive TTY then we can accept input 1990 // SAFETY: FFI call. Trivially safe. 1991 if unsafe { libc::isatty(libc::STDIN_FILENO) == 1 } { 1992 // SAFETY: FFI call to dup. Trivially safe. 1993 let stdin = unsafe { libc::dup(libc::STDIN_FILENO) }; 1994 if stdin == -1 { 1995 return vmm_sys_util::errno::errno_result() 1996 .map_err(DeviceManagerError::DupFd); 1997 } 1998 // SAFETY: stdin is valid and owned solely by us. 1999 let stdin = unsafe { File::from_raw_fd(stdin) }; 2000 2001 Endpoint::FilePair(stdout, stdin) 2002 } else { 2003 Endpoint::File(stdout) 2004 } 2005 } 2006 ConsoleOutputMode::Socket => { 2007 return Err(DeviceManagerError::NoSocketOptionSupportForConsoleDevice); 2008 } 2009 ConsoleOutputMode::Null => Endpoint::Null, 2010 ConsoleOutputMode::Off => return Ok(None), 2011 }; 2012 let id = String::from(CONSOLE_DEVICE_NAME); 2013 2014 let (virtio_console_device, console_resizer) = virtio_devices::Console::new( 2015 id.clone(), 2016 endpoint, 2017 self.console_resize_pipe 2018 .as_ref() 2019 .map(|p| p.try_clone().unwrap()), 2020 self.force_iommu | console_config.iommu, 2021 self.seccomp_action.clone(), 2022 self.exit_evt 2023 .try_clone() 2024 .map_err(DeviceManagerError::EventFd)?, 2025 versioned_state_from_id(self.snapshot.as_ref(), id.as_str()) 2026 .map_err(DeviceManagerError::RestoreGetState)?, 2027 ) 2028 .map_err(DeviceManagerError::CreateVirtioConsole)?; 2029 let virtio_console_device = Arc::new(Mutex::new(virtio_console_device)); 2030 virtio_devices.push(MetaVirtioDevice { 2031 virtio_device: Arc::clone(&virtio_console_device) 2032 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2033 iommu: console_config.iommu, 2034 id: id.clone(), 2035 pci_segment: 0, 2036 dma_handler: None, 2037 }); 2038 2039 // Fill the device tree with a new node. In case of restore, we 2040 // know there is nothing to do, so we can simply override the 2041 // existing entry. 2042 self.device_tree 2043 .lock() 2044 .unwrap() 2045 .insert(id.clone(), device_node!(id, virtio_console_device)); 2046 2047 // Only provide a resizer (for SIGWINCH handling) if the console is attached to the TTY 2048 Ok(if matches!(console_config.mode, ConsoleOutputMode::Tty) { 2049 Some(console_resizer) 2050 } else { 2051 None 2052 }) 2053 } 2054 2055 fn add_console_device( 2056 &mut self, 2057 interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>, 2058 virtio_devices: &mut Vec<MetaVirtioDevice>, 2059 serial_pty: Option<PtyPair>, 2060 console_pty: Option<PtyPair>, 2061 console_resize_pipe: Option<File>, 2062 ) -> DeviceManagerResult<Arc<Console>> { 2063 let serial_config = self.config.lock().unwrap().serial.clone(); 2064 let serial_writer: Option<Box<dyn io::Write + Send>> = match serial_config.mode { 2065 ConsoleOutputMode::File => Some(Box::new( 2066 File::create(serial_config.file.as_ref().unwrap()) 2067 .map_err(DeviceManagerError::SerialOutputFileOpen)?, 2068 )), 2069 ConsoleOutputMode::Pty => { 2070 if let Some(pty) = serial_pty { 2071 self.config.lock().unwrap().serial.file = Some(pty.path.clone()); 2072 self.serial_pty = Some(Arc::new(Mutex::new(pty))); 2073 } else { 2074 let (main, sub, path) = 2075 create_pty().map_err(DeviceManagerError::SerialPtyOpen)?; 2076 self.set_raw_mode(&sub) 2077 .map_err(DeviceManagerError::SetPtyRaw)?; 2078 self.config.lock().unwrap().serial.file = Some(path.clone()); 2079 self.serial_pty = Some(Arc::new(Mutex::new(PtyPair { main, path }))); 2080 } 2081 None 2082 } 2083 ConsoleOutputMode::Tty => { 2084 let out = stdout(); 2085 let _ = self.set_raw_mode(&out); 2086 Some(Box::new(out)) 2087 } 2088 ConsoleOutputMode::Off | ConsoleOutputMode::Null | ConsoleOutputMode::Socket => None, 2089 }; 2090 if serial_config.mode != ConsoleOutputMode::Off { 2091 let serial = self.add_serial_device(interrupt_manager, serial_writer)?; 2092 self.serial_manager = match serial_config.mode { 2093 ConsoleOutputMode::Pty | ConsoleOutputMode::Tty | ConsoleOutputMode::Socket => { 2094 let serial_manager = SerialManager::new( 2095 serial, 2096 self.serial_pty.clone(), 2097 serial_config.mode, 2098 serial_config.socket, 2099 ) 2100 .map_err(DeviceManagerError::CreateSerialManager)?; 2101 if let Some(mut serial_manager) = serial_manager { 2102 serial_manager 2103 .start_thread( 2104 self.exit_evt 2105 .try_clone() 2106 .map_err(DeviceManagerError::EventFd)?, 2107 ) 2108 .map_err(DeviceManagerError::SpawnSerialManager)?; 2109 Some(Arc::new(serial_manager)) 2110 } else { 2111 None 2112 } 2113 } 2114 _ => None, 2115 }; 2116 } 2117 2118 let console_resizer = 2119 self.add_virtio_console_device(virtio_devices, console_pty, console_resize_pipe)?; 2120 2121 Ok(Arc::new(Console { console_resizer })) 2122 } 2123 2124 fn add_tpm_device( 2125 &mut self, 2126 tpm_path: PathBuf, 2127 ) -> DeviceManagerResult<Arc<Mutex<devices::tpm::Tpm>>> { 2128 // Create TPM Device 2129 let tpm = devices::tpm::Tpm::new(tpm_path.to_str().unwrap().to_string()).map_err(|e| { 2130 DeviceManagerError::CreateTpmDevice(anyhow!("Failed to create TPM Device : {:?}", e)) 2131 })?; 2132 let tpm = Arc::new(Mutex::new(tpm)); 2133 2134 // Add TPM Device to mmio 2135 self.address_manager 2136 .mmio_bus 2137 .insert( 2138 tpm.clone(), 2139 arch::layout::TPM_START.0, 2140 arch::layout::TPM_SIZE, 2141 ) 2142 .map_err(DeviceManagerError::BusError)?; 2143 2144 Ok(tpm) 2145 } 2146 2147 fn make_virtio_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2148 let mut devices: Vec<MetaVirtioDevice> = Vec::new(); 2149 2150 // Create "standard" virtio devices (net/block/rng) 2151 devices.append(&mut self.make_virtio_block_devices()?); 2152 devices.append(&mut self.make_virtio_net_devices()?); 2153 devices.append(&mut self.make_virtio_rng_devices()?); 2154 2155 // Add virtio-fs if required 2156 devices.append(&mut self.make_virtio_fs_devices()?); 2157 2158 // Add virtio-pmem if required 2159 devices.append(&mut self.make_virtio_pmem_devices()?); 2160 2161 // Add virtio-vsock if required 2162 devices.append(&mut self.make_virtio_vsock_devices()?); 2163 2164 devices.append(&mut self.make_virtio_mem_devices()?); 2165 2166 // Add virtio-balloon if required 2167 devices.append(&mut self.make_virtio_balloon_devices()?); 2168 2169 // Add virtio-watchdog device 2170 devices.append(&mut self.make_virtio_watchdog_devices()?); 2171 2172 // Add vDPA devices if required 2173 devices.append(&mut self.make_vdpa_devices()?); 2174 2175 Ok(devices) 2176 } 2177 2178 // Cache whether aio is supported to avoid checking for very block device 2179 fn aio_is_supported(&mut self) -> bool { 2180 if let Some(supported) = self.aio_supported { 2181 return supported; 2182 } 2183 2184 let supported = block_aio_is_supported(); 2185 self.aio_supported = Some(supported); 2186 supported 2187 } 2188 2189 // Cache whether io_uring is supported to avoid probing for very block device 2190 fn io_uring_is_supported(&mut self) -> bool { 2191 if let Some(supported) = self.io_uring_supported { 2192 return supported; 2193 } 2194 2195 let supported = block_io_uring_is_supported(); 2196 self.io_uring_supported = Some(supported); 2197 supported 2198 } 2199 2200 fn make_virtio_block_device( 2201 &mut self, 2202 disk_cfg: &mut DiskConfig, 2203 ) -> DeviceManagerResult<MetaVirtioDevice> { 2204 let id = if let Some(id) = &disk_cfg.id { 2205 id.clone() 2206 } else { 2207 let id = self.next_device_name(DISK_DEVICE_NAME_PREFIX)?; 2208 disk_cfg.id = Some(id.clone()); 2209 id 2210 }; 2211 2212 info!("Creating virtio-block device: {:?}", disk_cfg); 2213 2214 let snapshot = snapshot_from_id(self.snapshot.as_ref(), id.as_str()); 2215 2216 let (virtio_device, migratable_device) = if disk_cfg.vhost_user { 2217 let socket = disk_cfg.vhost_socket.as_ref().unwrap().clone(); 2218 let vu_cfg = VhostUserConfig { 2219 socket, 2220 num_queues: disk_cfg.num_queues, 2221 queue_size: disk_cfg.queue_size, 2222 }; 2223 let vhost_user_block = Arc::new(Mutex::new( 2224 match virtio_devices::vhost_user::Blk::new( 2225 id.clone(), 2226 vu_cfg, 2227 self.seccomp_action.clone(), 2228 self.exit_evt 2229 .try_clone() 2230 .map_err(DeviceManagerError::EventFd)?, 2231 self.force_iommu, 2232 snapshot 2233 .map(|s| s.to_versioned_state()) 2234 .transpose() 2235 .map_err(DeviceManagerError::RestoreGetState)?, 2236 ) { 2237 Ok(vub_device) => vub_device, 2238 Err(e) => { 2239 return Err(DeviceManagerError::CreateVhostUserBlk(e)); 2240 } 2241 }, 2242 )); 2243 2244 ( 2245 Arc::clone(&vhost_user_block) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2246 vhost_user_block as Arc<Mutex<dyn Migratable>>, 2247 ) 2248 } else { 2249 let mut options = OpenOptions::new(); 2250 options.read(true); 2251 options.write(!disk_cfg.readonly); 2252 if disk_cfg.direct { 2253 options.custom_flags(libc::O_DIRECT); 2254 } 2255 // Open block device path 2256 let mut file: File = options 2257 .open( 2258 disk_cfg 2259 .path 2260 .as_ref() 2261 .ok_or(DeviceManagerError::NoDiskPath)? 2262 .clone(), 2263 ) 2264 .map_err(DeviceManagerError::Disk)?; 2265 let image_type = 2266 detect_image_type(&mut file).map_err(DeviceManagerError::DetectImageType)?; 2267 2268 let image = match image_type { 2269 ImageType::FixedVhd => { 2270 // Use asynchronous backend relying on io_uring if the 2271 // syscalls are supported. 2272 if cfg!(feature = "io_uring") 2273 && !disk_cfg.disable_io_uring 2274 && self.io_uring_is_supported() 2275 { 2276 info!("Using asynchronous fixed VHD disk file (io_uring)"); 2277 2278 #[cfg(not(feature = "io_uring"))] 2279 unreachable!("Checked in if statement above"); 2280 #[cfg(feature = "io_uring")] 2281 { 2282 Box::new( 2283 FixedVhdDiskAsync::new(file) 2284 .map_err(DeviceManagerError::CreateFixedVhdDiskAsync)?, 2285 ) as Box<dyn DiskFile> 2286 } 2287 } else { 2288 info!("Using synchronous fixed VHD disk file"); 2289 Box::new( 2290 FixedVhdDiskSync::new(file) 2291 .map_err(DeviceManagerError::CreateFixedVhdDiskSync)?, 2292 ) as Box<dyn DiskFile> 2293 } 2294 } 2295 ImageType::Raw => { 2296 // Use asynchronous backend relying on io_uring if the 2297 // syscalls are supported. 2298 if cfg!(feature = "io_uring") 2299 && !disk_cfg.disable_io_uring 2300 && self.io_uring_is_supported() 2301 { 2302 info!("Using asynchronous RAW disk file (io_uring)"); 2303 2304 #[cfg(not(feature = "io_uring"))] 2305 unreachable!("Checked in if statement above"); 2306 #[cfg(feature = "io_uring")] 2307 { 2308 Box::new(RawFileDisk::new(file)) as Box<dyn DiskFile> 2309 } 2310 } else if !disk_cfg.disable_aio && self.aio_is_supported() { 2311 info!("Using asynchronous RAW disk file (aio)"); 2312 Box::new(RawFileDiskAio::new(file)) as Box<dyn DiskFile> 2313 } else { 2314 info!("Using synchronous RAW disk file"); 2315 Box::new(RawFileDiskSync::new(file)) as Box<dyn DiskFile> 2316 } 2317 } 2318 ImageType::Qcow2 => { 2319 info!("Using synchronous QCOW disk file"); 2320 Box::new( 2321 QcowDiskSync::new(file, disk_cfg.direct) 2322 .map_err(DeviceManagerError::CreateQcowDiskSync)?, 2323 ) as Box<dyn DiskFile> 2324 } 2325 ImageType::Vhdx => { 2326 info!("Using synchronous VHDX disk file"); 2327 Box::new( 2328 VhdxDiskSync::new(file) 2329 .map_err(DeviceManagerError::CreateFixedVhdxDiskSync)?, 2330 ) as Box<dyn DiskFile> 2331 } 2332 }; 2333 2334 let virtio_block = Arc::new(Mutex::new( 2335 virtio_devices::Block::new( 2336 id.clone(), 2337 image, 2338 disk_cfg 2339 .path 2340 .as_ref() 2341 .ok_or(DeviceManagerError::NoDiskPath)? 2342 .clone(), 2343 disk_cfg.readonly, 2344 self.force_iommu | disk_cfg.iommu, 2345 disk_cfg.num_queues, 2346 disk_cfg.queue_size, 2347 disk_cfg.serial.clone(), 2348 self.seccomp_action.clone(), 2349 disk_cfg.rate_limiter_config, 2350 self.exit_evt 2351 .try_clone() 2352 .map_err(DeviceManagerError::EventFd)?, 2353 snapshot 2354 .map(|s| s.to_versioned_state()) 2355 .transpose() 2356 .map_err(DeviceManagerError::RestoreGetState)?, 2357 ) 2358 .map_err(DeviceManagerError::CreateVirtioBlock)?, 2359 )); 2360 2361 ( 2362 Arc::clone(&virtio_block) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2363 virtio_block as Arc<Mutex<dyn Migratable>>, 2364 ) 2365 }; 2366 2367 // Fill the device tree with a new node. In case of restore, we 2368 // know there is nothing to do, so we can simply override the 2369 // existing entry. 2370 self.device_tree 2371 .lock() 2372 .unwrap() 2373 .insert(id.clone(), device_node!(id, migratable_device)); 2374 2375 Ok(MetaVirtioDevice { 2376 virtio_device, 2377 iommu: disk_cfg.iommu, 2378 id, 2379 pci_segment: disk_cfg.pci_segment, 2380 dma_handler: None, 2381 }) 2382 } 2383 2384 fn make_virtio_block_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2385 let mut devices = Vec::new(); 2386 2387 let mut block_devices = self.config.lock().unwrap().disks.clone(); 2388 if let Some(disk_list_cfg) = &mut block_devices { 2389 for disk_cfg in disk_list_cfg.iter_mut() { 2390 devices.push(self.make_virtio_block_device(disk_cfg)?); 2391 } 2392 } 2393 self.config.lock().unwrap().disks = block_devices; 2394 2395 Ok(devices) 2396 } 2397 2398 fn make_virtio_net_device( 2399 &mut self, 2400 net_cfg: &mut NetConfig, 2401 ) -> DeviceManagerResult<MetaVirtioDevice> { 2402 let id = if let Some(id) = &net_cfg.id { 2403 id.clone() 2404 } else { 2405 let id = self.next_device_name(NET_DEVICE_NAME_PREFIX)?; 2406 net_cfg.id = Some(id.clone()); 2407 id 2408 }; 2409 info!("Creating virtio-net device: {:?}", net_cfg); 2410 2411 let snapshot = snapshot_from_id(self.snapshot.as_ref(), id.as_str()); 2412 2413 let (virtio_device, migratable_device) = if net_cfg.vhost_user { 2414 let socket = net_cfg.vhost_socket.as_ref().unwrap().clone(); 2415 let vu_cfg = VhostUserConfig { 2416 socket, 2417 num_queues: net_cfg.num_queues, 2418 queue_size: net_cfg.queue_size, 2419 }; 2420 let server = match net_cfg.vhost_mode { 2421 VhostMode::Client => false, 2422 VhostMode::Server => true, 2423 }; 2424 let vhost_user_net = Arc::new(Mutex::new( 2425 match virtio_devices::vhost_user::Net::new( 2426 id.clone(), 2427 net_cfg.mac, 2428 net_cfg.mtu, 2429 vu_cfg, 2430 server, 2431 self.seccomp_action.clone(), 2432 self.exit_evt 2433 .try_clone() 2434 .map_err(DeviceManagerError::EventFd)?, 2435 self.force_iommu, 2436 snapshot 2437 .map(|s| s.to_versioned_state()) 2438 .transpose() 2439 .map_err(DeviceManagerError::RestoreGetState)?, 2440 net_cfg.offload_tso, 2441 net_cfg.offload_ufo, 2442 net_cfg.offload_csum, 2443 ) { 2444 Ok(vun_device) => vun_device, 2445 Err(e) => { 2446 return Err(DeviceManagerError::CreateVhostUserNet(e)); 2447 } 2448 }, 2449 )); 2450 2451 ( 2452 Arc::clone(&vhost_user_net) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2453 vhost_user_net as Arc<Mutex<dyn Migratable>>, 2454 ) 2455 } else { 2456 let state = snapshot 2457 .map(|s| s.to_versioned_state()) 2458 .transpose() 2459 .map_err(DeviceManagerError::RestoreGetState)?; 2460 2461 let virtio_net = if let Some(ref tap_if_name) = net_cfg.tap { 2462 Arc::new(Mutex::new( 2463 virtio_devices::Net::new( 2464 id.clone(), 2465 Some(tap_if_name), 2466 None, 2467 None, 2468 Some(net_cfg.mac), 2469 &mut net_cfg.host_mac, 2470 net_cfg.mtu, 2471 self.force_iommu | net_cfg.iommu, 2472 net_cfg.num_queues, 2473 net_cfg.queue_size, 2474 self.seccomp_action.clone(), 2475 net_cfg.rate_limiter_config, 2476 self.exit_evt 2477 .try_clone() 2478 .map_err(DeviceManagerError::EventFd)?, 2479 state, 2480 net_cfg.offload_tso, 2481 net_cfg.offload_ufo, 2482 net_cfg.offload_csum, 2483 ) 2484 .map_err(DeviceManagerError::CreateVirtioNet)?, 2485 )) 2486 } else if let Some(fds) = &net_cfg.fds { 2487 let net = virtio_devices::Net::from_tap_fds( 2488 id.clone(), 2489 fds, 2490 Some(net_cfg.mac), 2491 net_cfg.mtu, 2492 self.force_iommu | net_cfg.iommu, 2493 net_cfg.queue_size, 2494 self.seccomp_action.clone(), 2495 net_cfg.rate_limiter_config, 2496 self.exit_evt 2497 .try_clone() 2498 .map_err(DeviceManagerError::EventFd)?, 2499 state, 2500 net_cfg.offload_tso, 2501 net_cfg.offload_ufo, 2502 net_cfg.offload_csum, 2503 ) 2504 .map_err(DeviceManagerError::CreateVirtioNet)?; 2505 2506 // SAFETY: 'fds' are valid because TAP devices are created successfully 2507 unsafe { 2508 self.config.lock().unwrap().add_preserved_fds(fds.clone()); 2509 } 2510 2511 Arc::new(Mutex::new(net)) 2512 } else { 2513 Arc::new(Mutex::new( 2514 virtio_devices::Net::new( 2515 id.clone(), 2516 None, 2517 Some(net_cfg.ip), 2518 Some(net_cfg.mask), 2519 Some(net_cfg.mac), 2520 &mut net_cfg.host_mac, 2521 net_cfg.mtu, 2522 self.force_iommu | net_cfg.iommu, 2523 net_cfg.num_queues, 2524 net_cfg.queue_size, 2525 self.seccomp_action.clone(), 2526 net_cfg.rate_limiter_config, 2527 self.exit_evt 2528 .try_clone() 2529 .map_err(DeviceManagerError::EventFd)?, 2530 state, 2531 net_cfg.offload_tso, 2532 net_cfg.offload_ufo, 2533 net_cfg.offload_csum, 2534 ) 2535 .map_err(DeviceManagerError::CreateVirtioNet)?, 2536 )) 2537 }; 2538 2539 ( 2540 Arc::clone(&virtio_net) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2541 virtio_net as Arc<Mutex<dyn Migratable>>, 2542 ) 2543 }; 2544 2545 // Fill the device tree with a new node. In case of restore, we 2546 // know there is nothing to do, so we can simply override the 2547 // existing entry. 2548 self.device_tree 2549 .lock() 2550 .unwrap() 2551 .insert(id.clone(), device_node!(id, migratable_device)); 2552 2553 Ok(MetaVirtioDevice { 2554 virtio_device, 2555 iommu: net_cfg.iommu, 2556 id, 2557 pci_segment: net_cfg.pci_segment, 2558 dma_handler: None, 2559 }) 2560 } 2561 2562 /// Add virto-net and vhost-user-net devices 2563 fn make_virtio_net_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2564 let mut devices = Vec::new(); 2565 let mut net_devices = self.config.lock().unwrap().net.clone(); 2566 if let Some(net_list_cfg) = &mut net_devices { 2567 for net_cfg in net_list_cfg.iter_mut() { 2568 devices.push(self.make_virtio_net_device(net_cfg)?); 2569 } 2570 } 2571 self.config.lock().unwrap().net = net_devices; 2572 2573 Ok(devices) 2574 } 2575 2576 fn make_virtio_rng_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2577 let mut devices = Vec::new(); 2578 2579 // Add virtio-rng if required 2580 let rng_config = self.config.lock().unwrap().rng.clone(); 2581 if let Some(rng_path) = rng_config.src.to_str() { 2582 info!("Creating virtio-rng device: {:?}", rng_config); 2583 let id = String::from(RNG_DEVICE_NAME); 2584 2585 let virtio_rng_device = Arc::new(Mutex::new( 2586 virtio_devices::Rng::new( 2587 id.clone(), 2588 rng_path, 2589 self.force_iommu | rng_config.iommu, 2590 self.seccomp_action.clone(), 2591 self.exit_evt 2592 .try_clone() 2593 .map_err(DeviceManagerError::EventFd)?, 2594 versioned_state_from_id(self.snapshot.as_ref(), id.as_str()) 2595 .map_err(DeviceManagerError::RestoreGetState)?, 2596 ) 2597 .map_err(DeviceManagerError::CreateVirtioRng)?, 2598 )); 2599 devices.push(MetaVirtioDevice { 2600 virtio_device: Arc::clone(&virtio_rng_device) 2601 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2602 iommu: rng_config.iommu, 2603 id: id.clone(), 2604 pci_segment: 0, 2605 dma_handler: None, 2606 }); 2607 2608 // Fill the device tree with a new node. In case of restore, we 2609 // know there is nothing to do, so we can simply override the 2610 // existing entry. 2611 self.device_tree 2612 .lock() 2613 .unwrap() 2614 .insert(id.clone(), device_node!(id, virtio_rng_device)); 2615 } 2616 2617 Ok(devices) 2618 } 2619 2620 fn make_virtio_fs_device( 2621 &mut self, 2622 fs_cfg: &mut FsConfig, 2623 ) -> DeviceManagerResult<MetaVirtioDevice> { 2624 let id = if let Some(id) = &fs_cfg.id { 2625 id.clone() 2626 } else { 2627 let id = self.next_device_name(FS_DEVICE_NAME_PREFIX)?; 2628 fs_cfg.id = Some(id.clone()); 2629 id 2630 }; 2631 2632 info!("Creating virtio-fs device: {:?}", fs_cfg); 2633 2634 let mut node = device_node!(id); 2635 2636 if let Some(fs_socket) = fs_cfg.socket.to_str() { 2637 let virtio_fs_device = Arc::new(Mutex::new( 2638 virtio_devices::vhost_user::Fs::new( 2639 id.clone(), 2640 fs_socket, 2641 &fs_cfg.tag, 2642 fs_cfg.num_queues, 2643 fs_cfg.queue_size, 2644 None, 2645 self.seccomp_action.clone(), 2646 self.exit_evt 2647 .try_clone() 2648 .map_err(DeviceManagerError::EventFd)?, 2649 self.force_iommu, 2650 versioned_state_from_id(self.snapshot.as_ref(), id.as_str()) 2651 .map_err(DeviceManagerError::RestoreGetState)?, 2652 ) 2653 .map_err(DeviceManagerError::CreateVirtioFs)?, 2654 )); 2655 2656 // Update the device tree with the migratable device. 2657 node.migratable = Some(Arc::clone(&virtio_fs_device) as Arc<Mutex<dyn Migratable>>); 2658 self.device_tree.lock().unwrap().insert(id.clone(), node); 2659 2660 Ok(MetaVirtioDevice { 2661 virtio_device: Arc::clone(&virtio_fs_device) 2662 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2663 iommu: false, 2664 id, 2665 pci_segment: fs_cfg.pci_segment, 2666 dma_handler: None, 2667 }) 2668 } else { 2669 Err(DeviceManagerError::NoVirtioFsSock) 2670 } 2671 } 2672 2673 fn make_virtio_fs_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2674 let mut devices = Vec::new(); 2675 2676 let mut fs_devices = self.config.lock().unwrap().fs.clone(); 2677 if let Some(fs_list_cfg) = &mut fs_devices { 2678 for fs_cfg in fs_list_cfg.iter_mut() { 2679 devices.push(self.make_virtio_fs_device(fs_cfg)?); 2680 } 2681 } 2682 self.config.lock().unwrap().fs = fs_devices; 2683 2684 Ok(devices) 2685 } 2686 2687 fn make_virtio_pmem_device( 2688 &mut self, 2689 pmem_cfg: &mut PmemConfig, 2690 ) -> DeviceManagerResult<MetaVirtioDevice> { 2691 let id = if let Some(id) = &pmem_cfg.id { 2692 id.clone() 2693 } else { 2694 let id = self.next_device_name(PMEM_DEVICE_NAME_PREFIX)?; 2695 pmem_cfg.id = Some(id.clone()); 2696 id 2697 }; 2698 2699 info!("Creating virtio-pmem device: {:?}", pmem_cfg); 2700 2701 let mut node = device_node!(id); 2702 2703 // Look for the id in the device tree. If it can be found, that means 2704 // the device is being restored, otherwise it's created from scratch. 2705 let region_range = if let Some(node) = self.device_tree.lock().unwrap().get(&id) { 2706 info!("Restoring virtio-pmem {} resources", id); 2707 2708 let mut region_range: Option<(u64, u64)> = None; 2709 for resource in node.resources.iter() { 2710 match resource { 2711 Resource::MmioAddressRange { base, size } => { 2712 if region_range.is_some() { 2713 return Err(DeviceManagerError::ResourceAlreadyExists); 2714 } 2715 2716 region_range = Some((*base, *size)); 2717 } 2718 _ => { 2719 error!("Unexpected resource {:?} for {}", resource, id); 2720 } 2721 } 2722 } 2723 2724 if region_range.is_none() { 2725 return Err(DeviceManagerError::MissingVirtioPmemResources); 2726 } 2727 2728 region_range 2729 } else { 2730 None 2731 }; 2732 2733 let (custom_flags, set_len) = if pmem_cfg.file.is_dir() { 2734 if pmem_cfg.size.is_none() { 2735 return Err(DeviceManagerError::PmemWithDirectorySizeMissing); 2736 } 2737 (O_TMPFILE, true) 2738 } else { 2739 (0, false) 2740 }; 2741 2742 let mut file = OpenOptions::new() 2743 .read(true) 2744 .write(!pmem_cfg.discard_writes) 2745 .custom_flags(custom_flags) 2746 .open(&pmem_cfg.file) 2747 .map_err(DeviceManagerError::PmemFileOpen)?; 2748 2749 let size = if let Some(size) = pmem_cfg.size { 2750 if set_len { 2751 file.set_len(size) 2752 .map_err(DeviceManagerError::PmemFileSetLen)?; 2753 } 2754 size 2755 } else { 2756 file.seek(SeekFrom::End(0)) 2757 .map_err(DeviceManagerError::PmemFileSetLen)? 2758 }; 2759 2760 if size % 0x20_0000 != 0 { 2761 return Err(DeviceManagerError::PmemSizeNotAligned); 2762 } 2763 2764 let (region_base, region_size) = if let Some((base, size)) = region_range { 2765 // The memory needs to be 2MiB aligned in order to support 2766 // hugepages. 2767 self.pci_segments[pmem_cfg.pci_segment as usize] 2768 .allocator 2769 .lock() 2770 .unwrap() 2771 .allocate( 2772 Some(GuestAddress(base)), 2773 size as GuestUsize, 2774 Some(0x0020_0000), 2775 ) 2776 .ok_or(DeviceManagerError::PmemRangeAllocation)?; 2777 2778 (base, size) 2779 } else { 2780 // The memory needs to be 2MiB aligned in order to support 2781 // hugepages. 2782 let base = self.pci_segments[pmem_cfg.pci_segment as usize] 2783 .allocator 2784 .lock() 2785 .unwrap() 2786 .allocate(None, size as GuestUsize, Some(0x0020_0000)) 2787 .ok_or(DeviceManagerError::PmemRangeAllocation)?; 2788 2789 (base.raw_value(), size) 2790 }; 2791 2792 let cloned_file = file.try_clone().map_err(DeviceManagerError::CloneFile)?; 2793 let mmap_region = MmapRegion::build( 2794 Some(FileOffset::new(cloned_file, 0)), 2795 region_size as usize, 2796 PROT_READ | PROT_WRITE, 2797 MAP_NORESERVE 2798 | if pmem_cfg.discard_writes { 2799 MAP_PRIVATE 2800 } else { 2801 MAP_SHARED 2802 }, 2803 ) 2804 .map_err(DeviceManagerError::NewMmapRegion)?; 2805 let host_addr: u64 = mmap_region.as_ptr() as u64; 2806 2807 let mem_slot = self 2808 .memory_manager 2809 .lock() 2810 .unwrap() 2811 .create_userspace_mapping(region_base, region_size, host_addr, false, false, false) 2812 .map_err(DeviceManagerError::MemoryManager)?; 2813 2814 let mapping = virtio_devices::UserspaceMapping { 2815 host_addr, 2816 mem_slot, 2817 addr: GuestAddress(region_base), 2818 len: region_size, 2819 mergeable: false, 2820 }; 2821 2822 let virtio_pmem_device = Arc::new(Mutex::new( 2823 virtio_devices::Pmem::new( 2824 id.clone(), 2825 file, 2826 GuestAddress(region_base), 2827 mapping, 2828 mmap_region, 2829 self.force_iommu | pmem_cfg.iommu, 2830 self.seccomp_action.clone(), 2831 self.exit_evt 2832 .try_clone() 2833 .map_err(DeviceManagerError::EventFd)?, 2834 versioned_state_from_id(self.snapshot.as_ref(), id.as_str()) 2835 .map_err(DeviceManagerError::RestoreGetState)?, 2836 ) 2837 .map_err(DeviceManagerError::CreateVirtioPmem)?, 2838 )); 2839 2840 // Update the device tree with correct resource information and with 2841 // the migratable device. 2842 node.resources.push(Resource::MmioAddressRange { 2843 base: region_base, 2844 size: region_size, 2845 }); 2846 node.migratable = Some(Arc::clone(&virtio_pmem_device) as Arc<Mutex<dyn Migratable>>); 2847 self.device_tree.lock().unwrap().insert(id.clone(), node); 2848 2849 Ok(MetaVirtioDevice { 2850 virtio_device: Arc::clone(&virtio_pmem_device) 2851 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2852 iommu: pmem_cfg.iommu, 2853 id, 2854 pci_segment: pmem_cfg.pci_segment, 2855 dma_handler: None, 2856 }) 2857 } 2858 2859 fn make_virtio_pmem_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2860 let mut devices = Vec::new(); 2861 // Add virtio-pmem if required 2862 let mut pmem_devices = self.config.lock().unwrap().pmem.clone(); 2863 if let Some(pmem_list_cfg) = &mut pmem_devices { 2864 for pmem_cfg in pmem_list_cfg.iter_mut() { 2865 devices.push(self.make_virtio_pmem_device(pmem_cfg)?); 2866 } 2867 } 2868 self.config.lock().unwrap().pmem = pmem_devices; 2869 2870 Ok(devices) 2871 } 2872 2873 fn make_virtio_vsock_device( 2874 &mut self, 2875 vsock_cfg: &mut VsockConfig, 2876 ) -> DeviceManagerResult<MetaVirtioDevice> { 2877 let id = if let Some(id) = &vsock_cfg.id { 2878 id.clone() 2879 } else { 2880 let id = self.next_device_name(VSOCK_DEVICE_NAME_PREFIX)?; 2881 vsock_cfg.id = Some(id.clone()); 2882 id 2883 }; 2884 2885 info!("Creating virtio-vsock device: {:?}", vsock_cfg); 2886 2887 let socket_path = vsock_cfg 2888 .socket 2889 .to_str() 2890 .ok_or(DeviceManagerError::CreateVsockConvertPath)?; 2891 let backend = 2892 virtio_devices::vsock::VsockUnixBackend::new(vsock_cfg.cid, socket_path.to_string()) 2893 .map_err(DeviceManagerError::CreateVsockBackend)?; 2894 2895 let vsock_device = Arc::new(Mutex::new( 2896 virtio_devices::Vsock::new( 2897 id.clone(), 2898 vsock_cfg.cid, 2899 vsock_cfg.socket.clone(), 2900 backend, 2901 self.force_iommu | vsock_cfg.iommu, 2902 self.seccomp_action.clone(), 2903 self.exit_evt 2904 .try_clone() 2905 .map_err(DeviceManagerError::EventFd)?, 2906 versioned_state_from_id(self.snapshot.as_ref(), id.as_str()) 2907 .map_err(DeviceManagerError::RestoreGetState)?, 2908 ) 2909 .map_err(DeviceManagerError::CreateVirtioVsock)?, 2910 )); 2911 2912 // Fill the device tree with a new node. In case of restore, we 2913 // know there is nothing to do, so we can simply override the 2914 // existing entry. 2915 self.device_tree 2916 .lock() 2917 .unwrap() 2918 .insert(id.clone(), device_node!(id, vsock_device)); 2919 2920 Ok(MetaVirtioDevice { 2921 virtio_device: Arc::clone(&vsock_device) 2922 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2923 iommu: vsock_cfg.iommu, 2924 id, 2925 pci_segment: vsock_cfg.pci_segment, 2926 dma_handler: None, 2927 }) 2928 } 2929 2930 fn make_virtio_vsock_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2931 let mut devices = Vec::new(); 2932 2933 let mut vsock = self.config.lock().unwrap().vsock.clone(); 2934 if let Some(ref mut vsock_cfg) = &mut vsock { 2935 devices.push(self.make_virtio_vsock_device(vsock_cfg)?); 2936 } 2937 self.config.lock().unwrap().vsock = vsock; 2938 2939 Ok(devices) 2940 } 2941 2942 fn make_virtio_mem_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2943 let mut devices = Vec::new(); 2944 2945 let mm = self.memory_manager.clone(); 2946 let mut mm = mm.lock().unwrap(); 2947 for (memory_zone_id, memory_zone) in mm.memory_zones_mut().iter_mut() { 2948 if let Some(virtio_mem_zone) = memory_zone.virtio_mem_zone_mut() { 2949 info!("Creating virtio-mem device: id = {}", memory_zone_id); 2950 2951 let node_id = numa_node_id_from_memory_zone_id(&self.numa_nodes, memory_zone_id) 2952 .map(|i| i as u16); 2953 2954 let virtio_mem_device = Arc::new(Mutex::new( 2955 virtio_devices::Mem::new( 2956 memory_zone_id.clone(), 2957 virtio_mem_zone.region(), 2958 self.seccomp_action.clone(), 2959 node_id, 2960 virtio_mem_zone.hotplugged_size(), 2961 virtio_mem_zone.hugepages(), 2962 self.exit_evt 2963 .try_clone() 2964 .map_err(DeviceManagerError::EventFd)?, 2965 virtio_mem_zone.blocks_state().clone(), 2966 versioned_state_from_id(self.snapshot.as_ref(), memory_zone_id.as_str()) 2967 .map_err(DeviceManagerError::RestoreGetState)?, 2968 ) 2969 .map_err(DeviceManagerError::CreateVirtioMem)?, 2970 )); 2971 2972 // Update the virtio-mem zone so that it has a handle onto the 2973 // virtio-mem device, which will be used for triggering a resize 2974 // if needed. 2975 virtio_mem_zone.set_virtio_device(Arc::clone(&virtio_mem_device)); 2976 2977 self.virtio_mem_devices.push(Arc::clone(&virtio_mem_device)); 2978 2979 devices.push(MetaVirtioDevice { 2980 virtio_device: Arc::clone(&virtio_mem_device) 2981 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2982 iommu: false, 2983 id: memory_zone_id.clone(), 2984 pci_segment: 0, 2985 dma_handler: None, 2986 }); 2987 2988 // Fill the device tree with a new node. In case of restore, we 2989 // know there is nothing to do, so we can simply override the 2990 // existing entry. 2991 self.device_tree.lock().unwrap().insert( 2992 memory_zone_id.clone(), 2993 device_node!(memory_zone_id, virtio_mem_device), 2994 ); 2995 } 2996 } 2997 2998 Ok(devices) 2999 } 3000 3001 fn make_virtio_balloon_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 3002 let mut devices = Vec::new(); 3003 3004 if let Some(balloon_config) = &self.config.lock().unwrap().balloon { 3005 let id = String::from(BALLOON_DEVICE_NAME); 3006 info!("Creating virtio-balloon device: id = {}", id); 3007 3008 let virtio_balloon_device = Arc::new(Mutex::new( 3009 virtio_devices::Balloon::new( 3010 id.clone(), 3011 balloon_config.size, 3012 balloon_config.deflate_on_oom, 3013 balloon_config.free_page_reporting, 3014 self.seccomp_action.clone(), 3015 self.exit_evt 3016 .try_clone() 3017 .map_err(DeviceManagerError::EventFd)?, 3018 versioned_state_from_id(self.snapshot.as_ref(), id.as_str()) 3019 .map_err(DeviceManagerError::RestoreGetState)?, 3020 ) 3021 .map_err(DeviceManagerError::CreateVirtioBalloon)?, 3022 )); 3023 3024 self.balloon = Some(virtio_balloon_device.clone()); 3025 3026 devices.push(MetaVirtioDevice { 3027 virtio_device: Arc::clone(&virtio_balloon_device) 3028 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 3029 iommu: false, 3030 id: id.clone(), 3031 pci_segment: 0, 3032 dma_handler: None, 3033 }); 3034 3035 self.device_tree 3036 .lock() 3037 .unwrap() 3038 .insert(id.clone(), device_node!(id, virtio_balloon_device)); 3039 } 3040 3041 Ok(devices) 3042 } 3043 3044 fn make_virtio_watchdog_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 3045 let mut devices = Vec::new(); 3046 3047 if !self.config.lock().unwrap().watchdog { 3048 return Ok(devices); 3049 } 3050 3051 let id = String::from(WATCHDOG_DEVICE_NAME); 3052 info!("Creating virtio-watchdog device: id = {}", id); 3053 3054 let virtio_watchdog_device = Arc::new(Mutex::new( 3055 virtio_devices::Watchdog::new( 3056 id.clone(), 3057 self.reset_evt.try_clone().unwrap(), 3058 self.seccomp_action.clone(), 3059 self.exit_evt 3060 .try_clone() 3061 .map_err(DeviceManagerError::EventFd)?, 3062 versioned_state_from_id(self.snapshot.as_ref(), id.as_str()) 3063 .map_err(DeviceManagerError::RestoreGetState)?, 3064 ) 3065 .map_err(DeviceManagerError::CreateVirtioWatchdog)?, 3066 )); 3067 devices.push(MetaVirtioDevice { 3068 virtio_device: Arc::clone(&virtio_watchdog_device) 3069 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 3070 iommu: false, 3071 id: id.clone(), 3072 pci_segment: 0, 3073 dma_handler: None, 3074 }); 3075 3076 self.device_tree 3077 .lock() 3078 .unwrap() 3079 .insert(id.clone(), device_node!(id, virtio_watchdog_device)); 3080 3081 Ok(devices) 3082 } 3083 3084 fn make_vdpa_device( 3085 &mut self, 3086 vdpa_cfg: &mut VdpaConfig, 3087 ) -> DeviceManagerResult<MetaVirtioDevice> { 3088 let id = if let Some(id) = &vdpa_cfg.id { 3089 id.clone() 3090 } else { 3091 let id = self.next_device_name(VDPA_DEVICE_NAME_PREFIX)?; 3092 vdpa_cfg.id = Some(id.clone()); 3093 id 3094 }; 3095 3096 info!("Creating vDPA device: {:?}", vdpa_cfg); 3097 3098 let device_path = vdpa_cfg 3099 .path 3100 .to_str() 3101 .ok_or(DeviceManagerError::CreateVdpaConvertPath)?; 3102 3103 let vdpa_device = Arc::new(Mutex::new( 3104 virtio_devices::Vdpa::new( 3105 id.clone(), 3106 device_path, 3107 self.memory_manager.lock().unwrap().guest_memory(), 3108 vdpa_cfg.num_queues as u16, 3109 versioned_state_from_id(self.snapshot.as_ref(), id.as_str()) 3110 .map_err(DeviceManagerError::RestoreGetState)?, 3111 ) 3112 .map_err(DeviceManagerError::CreateVdpa)?, 3113 )); 3114 3115 // Create the DMA handler that is required by the vDPA device 3116 let vdpa_mapping = Arc::new(VdpaDmaMapping::new( 3117 Arc::clone(&vdpa_device), 3118 Arc::new(self.memory_manager.lock().unwrap().guest_memory()), 3119 )); 3120 3121 self.device_tree 3122 .lock() 3123 .unwrap() 3124 .insert(id.clone(), device_node!(id, vdpa_device)); 3125 3126 Ok(MetaVirtioDevice { 3127 virtio_device: vdpa_device as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 3128 iommu: vdpa_cfg.iommu, 3129 id, 3130 pci_segment: vdpa_cfg.pci_segment, 3131 dma_handler: Some(vdpa_mapping), 3132 }) 3133 } 3134 3135 fn make_vdpa_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 3136 let mut devices = Vec::new(); 3137 // Add vdpa if required 3138 let mut vdpa_devices = self.config.lock().unwrap().vdpa.clone(); 3139 if let Some(vdpa_list_cfg) = &mut vdpa_devices { 3140 for vdpa_cfg in vdpa_list_cfg.iter_mut() { 3141 devices.push(self.make_vdpa_device(vdpa_cfg)?); 3142 } 3143 } 3144 self.config.lock().unwrap().vdpa = vdpa_devices; 3145 3146 Ok(devices) 3147 } 3148 3149 fn next_device_name(&mut self, prefix: &str) -> DeviceManagerResult<String> { 3150 let start_id = self.device_id_cnt; 3151 loop { 3152 // Generate the temporary name. 3153 let name = format!("{}{}", prefix, self.device_id_cnt); 3154 // Increment the counter. 3155 self.device_id_cnt += Wrapping(1); 3156 // Check if the name is already in use. 3157 if !self.boot_id_list.contains(&name) 3158 && !self.device_tree.lock().unwrap().contains_key(&name) 3159 { 3160 return Ok(name); 3161 } 3162 3163 if self.device_id_cnt == start_id { 3164 // We went through a full loop and there's nothing else we can 3165 // do. 3166 break; 3167 } 3168 } 3169 Err(DeviceManagerError::NoAvailableDeviceName) 3170 } 3171 3172 fn add_passthrough_device( 3173 &mut self, 3174 device_cfg: &mut DeviceConfig, 3175 ) -> DeviceManagerResult<(PciBdf, String)> { 3176 // If the passthrough device has not been created yet, it is created 3177 // here and stored in the DeviceManager structure for future needs. 3178 if self.passthrough_device.is_none() { 3179 self.passthrough_device = Some( 3180 self.address_manager 3181 .vm 3182 .create_passthrough_device() 3183 .map_err(|e| DeviceManagerError::CreatePassthroughDevice(e.into()))?, 3184 ); 3185 } 3186 3187 self.add_vfio_device(device_cfg) 3188 } 3189 3190 fn create_vfio_container(&self) -> DeviceManagerResult<Arc<VfioContainer>> { 3191 let passthrough_device = self 3192 .passthrough_device 3193 .as_ref() 3194 .ok_or(DeviceManagerError::NoDevicePassthroughSupport)?; 3195 3196 let dup = passthrough_device 3197 .try_clone() 3198 .map_err(DeviceManagerError::VfioCreate)?; 3199 3200 Ok(Arc::new( 3201 VfioContainer::new(Some(Arc::new(dup))).map_err(DeviceManagerError::VfioCreate)?, 3202 )) 3203 } 3204 3205 fn add_vfio_device( 3206 &mut self, 3207 device_cfg: &mut DeviceConfig, 3208 ) -> DeviceManagerResult<(PciBdf, String)> { 3209 let vfio_name = if let Some(id) = &device_cfg.id { 3210 id.clone() 3211 } else { 3212 let id = self.next_device_name(VFIO_DEVICE_NAME_PREFIX)?; 3213 device_cfg.id = Some(id.clone()); 3214 id 3215 }; 3216 3217 let (pci_segment_id, pci_device_bdf, resources) = 3218 self.pci_resources(&vfio_name, device_cfg.pci_segment)?; 3219 3220 let mut needs_dma_mapping = false; 3221 3222 // Here we create a new VFIO container for two reasons. Either this is 3223 // the first VFIO device, meaning we need a new VFIO container, which 3224 // will be shared with other VFIO devices. Or the new VFIO device is 3225 // attached to a vIOMMU, meaning we must create a dedicated VFIO 3226 // container. In the vIOMMU use case, we can't let all devices under 3227 // the same VFIO container since we couldn't map/unmap memory for each 3228 // device. That's simply because the map/unmap operations happen at the 3229 // VFIO container level. 3230 let vfio_container = if device_cfg.iommu { 3231 let vfio_container = self.create_vfio_container()?; 3232 3233 let vfio_mapping = Arc::new(VfioDmaMapping::new( 3234 Arc::clone(&vfio_container), 3235 Arc::new(self.memory_manager.lock().unwrap().guest_memory()), 3236 )); 3237 3238 if let Some(iommu) = &self.iommu_device { 3239 iommu 3240 .lock() 3241 .unwrap() 3242 .add_external_mapping(pci_device_bdf.into(), vfio_mapping); 3243 } else { 3244 return Err(DeviceManagerError::MissingVirtualIommu); 3245 } 3246 3247 vfio_container 3248 } else if let Some(vfio_container) = &self.vfio_container { 3249 Arc::clone(vfio_container) 3250 } else { 3251 let vfio_container = self.create_vfio_container()?; 3252 needs_dma_mapping = true; 3253 self.vfio_container = Some(Arc::clone(&vfio_container)); 3254 3255 vfio_container 3256 }; 3257 3258 let vfio_device = VfioDevice::new(&device_cfg.path, Arc::clone(&vfio_container)) 3259 .map_err(DeviceManagerError::VfioCreate)?; 3260 3261 if needs_dma_mapping { 3262 // Register DMA mapping in IOMMU. 3263 // Do not register virtio-mem regions, as they are handled directly by 3264 // virtio-mem device itself. 3265 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() { 3266 for region in zone.regions() { 3267 vfio_container 3268 .vfio_dma_map( 3269 region.start_addr().raw_value(), 3270 region.len(), 3271 region.as_ptr() as u64, 3272 ) 3273 .map_err(DeviceManagerError::VfioDmaMap)?; 3274 } 3275 } 3276 3277 let vfio_mapping = Arc::new(VfioDmaMapping::new( 3278 Arc::clone(&vfio_container), 3279 Arc::new(self.memory_manager.lock().unwrap().guest_memory()), 3280 )); 3281 3282 for virtio_mem_device in self.virtio_mem_devices.iter() { 3283 virtio_mem_device 3284 .lock() 3285 .unwrap() 3286 .add_dma_mapping_handler( 3287 VirtioMemMappingSource::Container, 3288 vfio_mapping.clone(), 3289 ) 3290 .map_err(DeviceManagerError::AddDmaMappingHandlerVirtioMem)?; 3291 } 3292 } 3293 3294 let legacy_interrupt_group = 3295 if let Some(legacy_interrupt_manager) = &self.legacy_interrupt_manager { 3296 Some( 3297 legacy_interrupt_manager 3298 .create_group(LegacyIrqGroupConfig { 3299 irq: self.pci_segments[pci_segment_id as usize].pci_irq_slots 3300 [pci_device_bdf.device() as usize] 3301 as InterruptIndex, 3302 }) 3303 .map_err(DeviceManagerError::CreateInterruptGroup)?, 3304 ) 3305 } else { 3306 None 3307 }; 3308 3309 let memory_manager = self.memory_manager.clone(); 3310 3311 let vfio_pci_device = VfioPciDevice::new( 3312 vfio_name.clone(), 3313 &self.address_manager.vm, 3314 vfio_device, 3315 vfio_container, 3316 self.msi_interrupt_manager.clone(), 3317 legacy_interrupt_group, 3318 device_cfg.iommu, 3319 pci_device_bdf, 3320 Arc::new(move || memory_manager.lock().unwrap().allocate_memory_slot()), 3321 vm_migration::snapshot_from_id(self.snapshot.as_ref(), vfio_name.as_str()), 3322 ) 3323 .map_err(DeviceManagerError::VfioPciCreate)?; 3324 3325 let vfio_pci_device = Arc::new(Mutex::new(vfio_pci_device)); 3326 3327 let new_resources = self.add_pci_device( 3328 vfio_pci_device.clone(), 3329 vfio_pci_device.clone(), 3330 pci_segment_id, 3331 pci_device_bdf, 3332 resources, 3333 )?; 3334 3335 vfio_pci_device 3336 .lock() 3337 .unwrap() 3338 .map_mmio_regions() 3339 .map_err(DeviceManagerError::VfioMapRegion)?; 3340 3341 let mut node = device_node!(vfio_name, vfio_pci_device); 3342 3343 // Update the device tree with correct resource information. 3344 node.resources = new_resources; 3345 node.pci_bdf = Some(pci_device_bdf); 3346 node.pci_device_handle = Some(PciDeviceHandle::Vfio(vfio_pci_device)); 3347 3348 self.device_tree 3349 .lock() 3350 .unwrap() 3351 .insert(vfio_name.clone(), node); 3352 3353 Ok((pci_device_bdf, vfio_name)) 3354 } 3355 3356 fn add_pci_device( 3357 &mut self, 3358 bus_device: Arc<Mutex<dyn BusDevice>>, 3359 pci_device: Arc<Mutex<dyn PciDevice>>, 3360 segment_id: u16, 3361 bdf: PciBdf, 3362 resources: Option<Vec<Resource>>, 3363 ) -> DeviceManagerResult<Vec<Resource>> { 3364 let bars = pci_device 3365 .lock() 3366 .unwrap() 3367 .allocate_bars( 3368 &self.address_manager.allocator, 3369 &mut self.pci_segments[segment_id as usize] 3370 .allocator 3371 .lock() 3372 .unwrap(), 3373 resources, 3374 ) 3375 .map_err(DeviceManagerError::AllocateBars)?; 3376 3377 let mut pci_bus = self.pci_segments[segment_id as usize] 3378 .pci_bus 3379 .lock() 3380 .unwrap(); 3381 3382 pci_bus 3383 .add_device(bdf.device() as u32, pci_device) 3384 .map_err(DeviceManagerError::AddPciDevice)?; 3385 3386 self.bus_devices.push(Arc::clone(&bus_device)); 3387 3388 pci_bus 3389 .register_mapping( 3390 bus_device, 3391 #[cfg(target_arch = "x86_64")] 3392 self.address_manager.io_bus.as_ref(), 3393 self.address_manager.mmio_bus.as_ref(), 3394 bars.clone(), 3395 ) 3396 .map_err(DeviceManagerError::AddPciDevice)?; 3397 3398 let mut new_resources = Vec::new(); 3399 for bar in bars { 3400 new_resources.push(Resource::PciBar { 3401 index: bar.idx(), 3402 base: bar.addr(), 3403 size: bar.size(), 3404 type_: bar.region_type().into(), 3405 prefetchable: bar.prefetchable().into(), 3406 }); 3407 } 3408 3409 Ok(new_resources) 3410 } 3411 3412 fn add_vfio_devices(&mut self) -> DeviceManagerResult<Vec<PciBdf>> { 3413 let mut iommu_attached_device_ids = Vec::new(); 3414 let mut devices = self.config.lock().unwrap().devices.clone(); 3415 3416 if let Some(device_list_cfg) = &mut devices { 3417 for device_cfg in device_list_cfg.iter_mut() { 3418 let (device_id, _) = self.add_passthrough_device(device_cfg)?; 3419 if device_cfg.iommu && self.iommu_device.is_some() { 3420 iommu_attached_device_ids.push(device_id); 3421 } 3422 } 3423 } 3424 3425 // Update the list of devices 3426 self.config.lock().unwrap().devices = devices; 3427 3428 Ok(iommu_attached_device_ids) 3429 } 3430 3431 fn add_vfio_user_device( 3432 &mut self, 3433 device_cfg: &mut UserDeviceConfig, 3434 ) -> DeviceManagerResult<(PciBdf, String)> { 3435 let vfio_user_name = if let Some(id) = &device_cfg.id { 3436 id.clone() 3437 } else { 3438 let id = self.next_device_name(VFIO_USER_DEVICE_NAME_PREFIX)?; 3439 device_cfg.id = Some(id.clone()); 3440 id 3441 }; 3442 3443 let (pci_segment_id, pci_device_bdf, resources) = 3444 self.pci_resources(&vfio_user_name, device_cfg.pci_segment)?; 3445 3446 let legacy_interrupt_group = 3447 if let Some(legacy_interrupt_manager) = &self.legacy_interrupt_manager { 3448 Some( 3449 legacy_interrupt_manager 3450 .create_group(LegacyIrqGroupConfig { 3451 irq: self.pci_segments[pci_segment_id as usize].pci_irq_slots 3452 [pci_device_bdf.device() as usize] 3453 as InterruptIndex, 3454 }) 3455 .map_err(DeviceManagerError::CreateInterruptGroup)?, 3456 ) 3457 } else { 3458 None 3459 }; 3460 3461 let client = Arc::new(Mutex::new( 3462 vfio_user::Client::new(&device_cfg.socket) 3463 .map_err(DeviceManagerError::VfioUserCreateClient)?, 3464 )); 3465 3466 let memory_manager = self.memory_manager.clone(); 3467 3468 let mut vfio_user_pci_device = VfioUserPciDevice::new( 3469 vfio_user_name.clone(), 3470 &self.address_manager.vm, 3471 client.clone(), 3472 self.msi_interrupt_manager.clone(), 3473 legacy_interrupt_group, 3474 pci_device_bdf, 3475 Arc::new(move || memory_manager.lock().unwrap().allocate_memory_slot()), 3476 vm_migration::snapshot_from_id(self.snapshot.as_ref(), vfio_user_name.as_str()), 3477 ) 3478 .map_err(DeviceManagerError::VfioUserCreate)?; 3479 3480 let memory = self.memory_manager.lock().unwrap().guest_memory(); 3481 let vfio_user_mapping = Arc::new(VfioUserDmaMapping::new(client, Arc::new(memory))); 3482 for virtio_mem_device in self.virtio_mem_devices.iter() { 3483 virtio_mem_device 3484 .lock() 3485 .unwrap() 3486 .add_dma_mapping_handler( 3487 VirtioMemMappingSource::Device(pci_device_bdf.into()), 3488 vfio_user_mapping.clone(), 3489 ) 3490 .map_err(DeviceManagerError::AddDmaMappingHandlerVirtioMem)?; 3491 } 3492 3493 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() { 3494 for region in zone.regions() { 3495 vfio_user_pci_device 3496 .dma_map(region) 3497 .map_err(DeviceManagerError::VfioUserDmaMap)?; 3498 } 3499 } 3500 3501 let vfio_user_pci_device = Arc::new(Mutex::new(vfio_user_pci_device)); 3502 3503 let new_resources = self.add_pci_device( 3504 vfio_user_pci_device.clone(), 3505 vfio_user_pci_device.clone(), 3506 pci_segment_id, 3507 pci_device_bdf, 3508 resources, 3509 )?; 3510 3511 // Note it is required to call 'add_pci_device()' in advance to have the list of 3512 // mmio regions provisioned correctly 3513 vfio_user_pci_device 3514 .lock() 3515 .unwrap() 3516 .map_mmio_regions() 3517 .map_err(DeviceManagerError::VfioUserMapRegion)?; 3518 3519 let mut node = device_node!(vfio_user_name, vfio_user_pci_device); 3520 3521 // Update the device tree with correct resource information. 3522 node.resources = new_resources; 3523 node.pci_bdf = Some(pci_device_bdf); 3524 node.pci_device_handle = Some(PciDeviceHandle::VfioUser(vfio_user_pci_device)); 3525 3526 self.device_tree 3527 .lock() 3528 .unwrap() 3529 .insert(vfio_user_name.clone(), node); 3530 3531 Ok((pci_device_bdf, vfio_user_name)) 3532 } 3533 3534 fn add_user_devices(&mut self) -> DeviceManagerResult<Vec<PciBdf>> { 3535 let mut user_devices = self.config.lock().unwrap().user_devices.clone(); 3536 3537 if let Some(device_list_cfg) = &mut user_devices { 3538 for device_cfg in device_list_cfg.iter_mut() { 3539 let (_device_id, _id) = self.add_vfio_user_device(device_cfg)?; 3540 } 3541 } 3542 3543 // Update the list of devices 3544 self.config.lock().unwrap().user_devices = user_devices; 3545 3546 Ok(vec![]) 3547 } 3548 3549 fn add_virtio_pci_device( 3550 &mut self, 3551 virtio_device: Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 3552 iommu_mapping: &Option<Arc<IommuMapping>>, 3553 virtio_device_id: String, 3554 pci_segment_id: u16, 3555 dma_handler: Option<Arc<dyn ExternalDmaMapping>>, 3556 ) -> DeviceManagerResult<PciBdf> { 3557 let id = format!("{VIRTIO_PCI_DEVICE_NAME_PREFIX}-{virtio_device_id}"); 3558 3559 // Add the new virtio-pci node to the device tree. 3560 let mut node = device_node!(id); 3561 node.children = vec![virtio_device_id.clone()]; 3562 3563 let (pci_segment_id, pci_device_bdf, resources) = 3564 self.pci_resources(&id, pci_segment_id)?; 3565 3566 // Update the existing virtio node by setting the parent. 3567 if let Some(node) = self.device_tree.lock().unwrap().get_mut(&virtio_device_id) { 3568 node.parent = Some(id.clone()); 3569 } else { 3570 return Err(DeviceManagerError::MissingNode); 3571 } 3572 3573 // Allows support for one MSI-X vector per queue. It also adds 1 3574 // as we need to take into account the dedicated vector to notify 3575 // about a virtio config change. 3576 let msix_num = (virtio_device.lock().unwrap().queue_max_sizes().len() + 1) as u16; 3577 3578 // Create the AccessPlatform trait from the implementation IommuMapping. 3579 // This will provide address translation for any virtio device sitting 3580 // behind a vIOMMU. 3581 let access_platform: Option<Arc<dyn AccessPlatform>> = if let Some(mapping) = iommu_mapping 3582 { 3583 Some(Arc::new(AccessPlatformMapping::new( 3584 pci_device_bdf.into(), 3585 mapping.clone(), 3586 ))) 3587 } else { 3588 None 3589 }; 3590 3591 let memory = self.memory_manager.lock().unwrap().guest_memory(); 3592 3593 // Map DMA ranges if a DMA handler is available and if the device is 3594 // not attached to a virtual IOMMU. 3595 if let Some(dma_handler) = &dma_handler { 3596 if iommu_mapping.is_some() { 3597 if let Some(iommu) = &self.iommu_device { 3598 iommu 3599 .lock() 3600 .unwrap() 3601 .add_external_mapping(pci_device_bdf.into(), dma_handler.clone()); 3602 } else { 3603 return Err(DeviceManagerError::MissingVirtualIommu); 3604 } 3605 } else { 3606 // Let every virtio-mem device handle the DMA map/unmap through the 3607 // DMA handler provided. 3608 for virtio_mem_device in self.virtio_mem_devices.iter() { 3609 virtio_mem_device 3610 .lock() 3611 .unwrap() 3612 .add_dma_mapping_handler( 3613 VirtioMemMappingSource::Device(pci_device_bdf.into()), 3614 dma_handler.clone(), 3615 ) 3616 .map_err(DeviceManagerError::AddDmaMappingHandlerVirtioMem)?; 3617 } 3618 3619 // Do not register virtio-mem regions, as they are handled directly by 3620 // virtio-mem devices. 3621 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() { 3622 for region in zone.regions() { 3623 let gpa = region.start_addr().0; 3624 let size = region.len(); 3625 dma_handler 3626 .map(gpa, gpa, size) 3627 .map_err(DeviceManagerError::VirtioDmaMap)?; 3628 } 3629 } 3630 } 3631 } 3632 3633 let device_type = virtio_device.lock().unwrap().device_type(); 3634 let virtio_pci_device = Arc::new(Mutex::new( 3635 VirtioPciDevice::new( 3636 id.clone(), 3637 memory, 3638 virtio_device, 3639 msix_num, 3640 access_platform, 3641 &self.msi_interrupt_manager, 3642 pci_device_bdf.into(), 3643 self.activate_evt 3644 .try_clone() 3645 .map_err(DeviceManagerError::EventFd)?, 3646 // All device types *except* virtio block devices should be allocated a 64-bit bar 3647 // The block devices should be given a 32-bit BAR so that they are easily accessible 3648 // to firmware without requiring excessive identity mapping. 3649 // The exception being if not on the default PCI segment. 3650 pci_segment_id > 0 || device_type != VirtioDeviceType::Block as u32, 3651 dma_handler, 3652 self.pending_activations.clone(), 3653 vm_migration::snapshot_from_id(self.snapshot.as_ref(), id.as_str()), 3654 ) 3655 .map_err(DeviceManagerError::VirtioDevice)?, 3656 )); 3657 3658 let new_resources = self.add_pci_device( 3659 virtio_pci_device.clone(), 3660 virtio_pci_device.clone(), 3661 pci_segment_id, 3662 pci_device_bdf, 3663 resources, 3664 )?; 3665 3666 let bar_addr = virtio_pci_device.lock().unwrap().config_bar_addr(); 3667 for (event, addr) in virtio_pci_device.lock().unwrap().ioeventfds(bar_addr) { 3668 let io_addr = IoEventAddress::Mmio(addr); 3669 self.address_manager 3670 .vm 3671 .register_ioevent(event, &io_addr, None) 3672 .map_err(|e| DeviceManagerError::RegisterIoevent(e.into()))?; 3673 } 3674 3675 // Update the device tree with correct resource information. 3676 node.resources = new_resources; 3677 node.migratable = Some(Arc::clone(&virtio_pci_device) as Arc<Mutex<dyn Migratable>>); 3678 node.pci_bdf = Some(pci_device_bdf); 3679 node.pci_device_handle = Some(PciDeviceHandle::Virtio(virtio_pci_device)); 3680 self.device_tree.lock().unwrap().insert(id, node); 3681 3682 Ok(pci_device_bdf) 3683 } 3684 3685 fn add_pvpanic_device( 3686 &mut self, 3687 ) -> DeviceManagerResult<Option<Arc<Mutex<devices::PvPanicDevice>>>> { 3688 let id = String::from(PVPANIC_DEVICE_NAME); 3689 let pci_segment_id = 0x0_u16; 3690 3691 info!("Creating pvpanic device {}", id); 3692 3693 let (pci_segment_id, pci_device_bdf, resources) = 3694 self.pci_resources(&id, pci_segment_id)?; 3695 3696 let snapshot = snapshot_from_id(self.snapshot.as_ref(), id.as_str()); 3697 3698 let pvpanic_device = devices::PvPanicDevice::new(id.clone(), snapshot) 3699 .map_err(DeviceManagerError::PvPanicCreate)?; 3700 3701 let pvpanic_device = Arc::new(Mutex::new(pvpanic_device)); 3702 3703 let new_resources = self.add_pci_device( 3704 pvpanic_device.clone(), 3705 pvpanic_device.clone(), 3706 pci_segment_id, 3707 pci_device_bdf, 3708 resources, 3709 )?; 3710 3711 let mut node = device_node!(id, pvpanic_device); 3712 3713 node.resources = new_resources; 3714 node.pci_bdf = Some(pci_device_bdf); 3715 node.pci_device_handle = None; 3716 3717 self.device_tree.lock().unwrap().insert(id, node); 3718 3719 Ok(Some(pvpanic_device)) 3720 } 3721 3722 fn pci_resources( 3723 &self, 3724 id: &str, 3725 pci_segment_id: u16, 3726 ) -> DeviceManagerResult<(u16, PciBdf, Option<Vec<Resource>>)> { 3727 // Look for the id in the device tree. If it can be found, that means 3728 // the device is being restored, otherwise it's created from scratch. 3729 Ok( 3730 if let Some(node) = self.device_tree.lock().unwrap().get(id) { 3731 info!("Restoring virtio-pci {} resources", id); 3732 let pci_device_bdf: PciBdf = node 3733 .pci_bdf 3734 .ok_or(DeviceManagerError::MissingDeviceNodePciBdf)?; 3735 let pci_segment_id = pci_device_bdf.segment(); 3736 3737 self.pci_segments[pci_segment_id as usize] 3738 .pci_bus 3739 .lock() 3740 .unwrap() 3741 .get_device_id(pci_device_bdf.device() as usize) 3742 .map_err(DeviceManagerError::GetPciDeviceId)?; 3743 3744 (pci_segment_id, pci_device_bdf, Some(node.resources.clone())) 3745 } else { 3746 let pci_device_bdf = 3747 self.pci_segments[pci_segment_id as usize].next_device_bdf()?; 3748 3749 (pci_segment_id, pci_device_bdf, None) 3750 }, 3751 ) 3752 } 3753 3754 #[cfg(target_arch = "x86_64")] 3755 pub fn io_bus(&self) -> &Arc<Bus> { 3756 &self.address_manager.io_bus 3757 } 3758 3759 pub fn mmio_bus(&self) -> &Arc<Bus> { 3760 &self.address_manager.mmio_bus 3761 } 3762 3763 pub fn allocator(&self) -> &Arc<Mutex<SystemAllocator>> { 3764 &self.address_manager.allocator 3765 } 3766 3767 pub fn interrupt_controller(&self) -> Option<Arc<Mutex<dyn InterruptController>>> { 3768 self.interrupt_controller 3769 .as_ref() 3770 .map(|ic| ic.clone() as Arc<Mutex<dyn InterruptController>>) 3771 } 3772 3773 pub(crate) fn pci_segments(&self) -> &Vec<PciSegment> { 3774 &self.pci_segments 3775 } 3776 3777 pub fn console(&self) -> &Arc<Console> { 3778 &self.console 3779 } 3780 3781 #[cfg(target_arch = "aarch64")] 3782 pub fn cmdline_additions(&self) -> &[String] { 3783 self.cmdline_additions.as_slice() 3784 } 3785 3786 pub fn update_memory(&self, new_region: &Arc<GuestRegionMmap>) -> DeviceManagerResult<()> { 3787 for handle in self.virtio_devices.iter() { 3788 handle 3789 .virtio_device 3790 .lock() 3791 .unwrap() 3792 .add_memory_region(new_region) 3793 .map_err(DeviceManagerError::UpdateMemoryForVirtioDevice)?; 3794 3795 if let Some(dma_handler) = &handle.dma_handler { 3796 if !handle.iommu { 3797 let gpa = new_region.start_addr().0; 3798 let size = new_region.len(); 3799 dma_handler 3800 .map(gpa, gpa, size) 3801 .map_err(DeviceManagerError::VirtioDmaMap)?; 3802 } 3803 } 3804 } 3805 3806 // Take care of updating the memory for VFIO PCI devices. 3807 if let Some(vfio_container) = &self.vfio_container { 3808 vfio_container 3809 .vfio_dma_map( 3810 new_region.start_addr().raw_value(), 3811 new_region.len(), 3812 new_region.as_ptr() as u64, 3813 ) 3814 .map_err(DeviceManagerError::UpdateMemoryForVfioPciDevice)?; 3815 } 3816 3817 // Take care of updating the memory for vfio-user devices. 3818 { 3819 let device_tree = self.device_tree.lock().unwrap(); 3820 for pci_device_node in device_tree.pci_devices() { 3821 if let PciDeviceHandle::VfioUser(vfio_user_pci_device) = pci_device_node 3822 .pci_device_handle 3823 .as_ref() 3824 .ok_or(DeviceManagerError::MissingPciDevice)? 3825 { 3826 vfio_user_pci_device 3827 .lock() 3828 .unwrap() 3829 .dma_map(new_region) 3830 .map_err(DeviceManagerError::UpdateMemoryForVfioUserPciDevice)?; 3831 } 3832 } 3833 } 3834 3835 Ok(()) 3836 } 3837 3838 pub fn activate_virtio_devices(&self) -> DeviceManagerResult<()> { 3839 for mut activator in self.pending_activations.lock().unwrap().drain(..) { 3840 activator 3841 .activate() 3842 .map_err(DeviceManagerError::VirtioActivate)?; 3843 } 3844 Ok(()) 3845 } 3846 3847 pub fn notify_hotplug( 3848 &self, 3849 _notification_type: AcpiNotificationFlags, 3850 ) -> DeviceManagerResult<()> { 3851 return self 3852 .ged_notification_device 3853 .as_ref() 3854 .unwrap() 3855 .lock() 3856 .unwrap() 3857 .notify(_notification_type) 3858 .map_err(DeviceManagerError::HotPlugNotification); 3859 } 3860 3861 pub fn add_device( 3862 &mut self, 3863 device_cfg: &mut DeviceConfig, 3864 ) -> DeviceManagerResult<PciDeviceInfo> { 3865 self.validate_identifier(&device_cfg.id)?; 3866 3867 if device_cfg.iommu && !self.is_iommu_segment(device_cfg.pci_segment) { 3868 return Err(DeviceManagerError::InvalidIommuHotplug); 3869 } 3870 3871 let (bdf, device_name) = self.add_passthrough_device(device_cfg)?; 3872 3873 // Update the PCIU bitmap 3874 self.pci_segments[device_cfg.pci_segment as usize].pci_devices_up |= 1 << bdf.device(); 3875 3876 Ok(PciDeviceInfo { 3877 id: device_name, 3878 bdf, 3879 }) 3880 } 3881 3882 pub fn add_user_device( 3883 &mut self, 3884 device_cfg: &mut UserDeviceConfig, 3885 ) -> DeviceManagerResult<PciDeviceInfo> { 3886 self.validate_identifier(&device_cfg.id)?; 3887 3888 let (bdf, device_name) = self.add_vfio_user_device(device_cfg)?; 3889 3890 // Update the PCIU bitmap 3891 self.pci_segments[device_cfg.pci_segment as usize].pci_devices_up |= 1 << bdf.device(); 3892 3893 Ok(PciDeviceInfo { 3894 id: device_name, 3895 bdf, 3896 }) 3897 } 3898 3899 pub fn remove_device(&mut self, id: String) -> DeviceManagerResult<()> { 3900 // The node can be directly a PCI node in case the 'id' refers to a 3901 // VFIO device or a virtio-pci one. 3902 // In case the 'id' refers to a virtio device, we must find the PCI 3903 // node by looking at the parent. 3904 let device_tree = self.device_tree.lock().unwrap(); 3905 let node = device_tree 3906 .get(&id) 3907 .ok_or(DeviceManagerError::UnknownDeviceId(id))?; 3908 3909 let pci_device_node = if node.pci_bdf.is_some() && node.pci_device_handle.is_some() { 3910 node 3911 } else { 3912 let parent = node 3913 .parent 3914 .as_ref() 3915 .ok_or(DeviceManagerError::MissingNode)?; 3916 device_tree 3917 .get(parent) 3918 .ok_or(DeviceManagerError::MissingNode)? 3919 }; 3920 3921 let pci_device_bdf: PciBdf = pci_device_node 3922 .pci_bdf 3923 .ok_or(DeviceManagerError::MissingDeviceNodePciBdf)?; 3924 let pci_segment_id = pci_device_bdf.segment(); 3925 3926 let pci_device_handle = pci_device_node 3927 .pci_device_handle 3928 .as_ref() 3929 .ok_or(DeviceManagerError::MissingPciDevice)?; 3930 #[allow(irrefutable_let_patterns)] 3931 if let PciDeviceHandle::Virtio(virtio_pci_device) = pci_device_handle { 3932 let device_type = VirtioDeviceType::from( 3933 virtio_pci_device 3934 .lock() 3935 .unwrap() 3936 .virtio_device() 3937 .lock() 3938 .unwrap() 3939 .device_type(), 3940 ); 3941 match device_type { 3942 VirtioDeviceType::Net 3943 | VirtioDeviceType::Block 3944 | VirtioDeviceType::Pmem 3945 | VirtioDeviceType::Fs 3946 | VirtioDeviceType::Vsock => {} 3947 _ => return Err(DeviceManagerError::RemovalNotAllowed(device_type)), 3948 } 3949 } 3950 3951 // Update the PCID bitmap 3952 self.pci_segments[pci_segment_id as usize].pci_devices_down |= 1 << pci_device_bdf.device(); 3953 3954 Ok(()) 3955 } 3956 3957 pub fn eject_device(&mut self, pci_segment_id: u16, device_id: u8) -> DeviceManagerResult<()> { 3958 info!( 3959 "Ejecting device_id = {} on segment_id={}", 3960 device_id, pci_segment_id 3961 ); 3962 3963 // Convert the device ID into the corresponding b/d/f. 3964 let pci_device_bdf = PciBdf::new(pci_segment_id, 0, device_id, 0); 3965 3966 // Give the PCI device ID back to the PCI bus. 3967 self.pci_segments[pci_segment_id as usize] 3968 .pci_bus 3969 .lock() 3970 .unwrap() 3971 .put_device_id(device_id as usize) 3972 .map_err(DeviceManagerError::PutPciDeviceId)?; 3973 3974 // Remove the device from the device tree along with its children. 3975 let mut device_tree = self.device_tree.lock().unwrap(); 3976 let pci_device_node = device_tree 3977 .remove_node_by_pci_bdf(pci_device_bdf) 3978 .ok_or(DeviceManagerError::MissingPciDevice)?; 3979 3980 // For VFIO and vfio-user the PCI device id is the id. 3981 // For virtio we overwrite it later as we want the id of the 3982 // underlying device. 3983 let mut id = pci_device_node.id; 3984 let pci_device_handle = pci_device_node 3985 .pci_device_handle 3986 .ok_or(DeviceManagerError::MissingPciDevice)?; 3987 if matches!(pci_device_handle, PciDeviceHandle::Virtio(_)) { 3988 // The virtio-pci device has a single child 3989 if !pci_device_node.children.is_empty() { 3990 assert_eq!(pci_device_node.children.len(), 1); 3991 let child_id = &pci_device_node.children[0]; 3992 id = child_id.clone(); 3993 } 3994 } 3995 for child in pci_device_node.children.iter() { 3996 device_tree.remove(child); 3997 } 3998 3999 let mut iommu_attached = false; 4000 if let Some((_, iommu_attached_devices)) = &self.iommu_attached_devices { 4001 if iommu_attached_devices.contains(&pci_device_bdf) { 4002 iommu_attached = true; 4003 } 4004 } 4005 4006 let (pci_device, bus_device, virtio_device, remove_dma_handler) = match pci_device_handle { 4007 // No need to remove any virtio-mem mapping here as the container outlives all devices 4008 PciDeviceHandle::Vfio(vfio_pci_device) => ( 4009 Arc::clone(&vfio_pci_device) as Arc<Mutex<dyn PciDevice>>, 4010 Arc::clone(&vfio_pci_device) as Arc<Mutex<dyn BusDevice>>, 4011 None as Option<Arc<Mutex<dyn virtio_devices::VirtioDevice>>>, 4012 false, 4013 ), 4014 PciDeviceHandle::Virtio(virtio_pci_device) => { 4015 let dev = virtio_pci_device.lock().unwrap(); 4016 let bar_addr = dev.config_bar_addr(); 4017 for (event, addr) in dev.ioeventfds(bar_addr) { 4018 let io_addr = IoEventAddress::Mmio(addr); 4019 self.address_manager 4020 .vm 4021 .unregister_ioevent(event, &io_addr) 4022 .map_err(|e| DeviceManagerError::UnRegisterIoevent(e.into()))?; 4023 } 4024 4025 if let Some(dma_handler) = dev.dma_handler() { 4026 if !iommu_attached { 4027 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() { 4028 for region in zone.regions() { 4029 let iova = region.start_addr().0; 4030 let size = region.len(); 4031 dma_handler 4032 .unmap(iova, size) 4033 .map_err(DeviceManagerError::VirtioDmaUnmap)?; 4034 } 4035 } 4036 } 4037 } 4038 4039 ( 4040 Arc::clone(&virtio_pci_device) as Arc<Mutex<dyn PciDevice>>, 4041 Arc::clone(&virtio_pci_device) as Arc<Mutex<dyn BusDevice>>, 4042 Some(dev.virtio_device()), 4043 dev.dma_handler().is_some() && !iommu_attached, 4044 ) 4045 } 4046 PciDeviceHandle::VfioUser(vfio_user_pci_device) => { 4047 let mut dev = vfio_user_pci_device.lock().unwrap(); 4048 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() { 4049 for region in zone.regions() { 4050 dev.dma_unmap(region) 4051 .map_err(DeviceManagerError::VfioUserDmaUnmap)?; 4052 } 4053 } 4054 4055 ( 4056 Arc::clone(&vfio_user_pci_device) as Arc<Mutex<dyn PciDevice>>, 4057 Arc::clone(&vfio_user_pci_device) as Arc<Mutex<dyn BusDevice>>, 4058 None as Option<Arc<Mutex<dyn virtio_devices::VirtioDevice>>>, 4059 true, 4060 ) 4061 } 4062 }; 4063 4064 if remove_dma_handler { 4065 for virtio_mem_device in self.virtio_mem_devices.iter() { 4066 virtio_mem_device 4067 .lock() 4068 .unwrap() 4069 .remove_dma_mapping_handler(VirtioMemMappingSource::Device( 4070 pci_device_bdf.into(), 4071 )) 4072 .map_err(DeviceManagerError::RemoveDmaMappingHandlerVirtioMem)?; 4073 } 4074 } 4075 4076 // Free the allocated BARs 4077 pci_device 4078 .lock() 4079 .unwrap() 4080 .free_bars( 4081 &mut self.address_manager.allocator.lock().unwrap(), 4082 &mut self.pci_segments[pci_segment_id as usize] 4083 .allocator 4084 .lock() 4085 .unwrap(), 4086 ) 4087 .map_err(DeviceManagerError::FreePciBars)?; 4088 4089 // Remove the device from the PCI bus 4090 self.pci_segments[pci_segment_id as usize] 4091 .pci_bus 4092 .lock() 4093 .unwrap() 4094 .remove_by_device(&pci_device) 4095 .map_err(DeviceManagerError::RemoveDeviceFromPciBus)?; 4096 4097 #[cfg(target_arch = "x86_64")] 4098 // Remove the device from the IO bus 4099 self.io_bus() 4100 .remove_by_device(&bus_device) 4101 .map_err(DeviceManagerError::RemoveDeviceFromIoBus)?; 4102 4103 // Remove the device from the MMIO bus 4104 self.mmio_bus() 4105 .remove_by_device(&bus_device) 4106 .map_err(DeviceManagerError::RemoveDeviceFromMmioBus)?; 4107 4108 // Remove the device from the list of BusDevice held by the 4109 // DeviceManager. 4110 self.bus_devices 4111 .retain(|dev| !Arc::ptr_eq(dev, &bus_device)); 4112 4113 // Shutdown and remove the underlying virtio-device if present 4114 if let Some(virtio_device) = virtio_device { 4115 for mapping in virtio_device.lock().unwrap().userspace_mappings() { 4116 self.memory_manager 4117 .lock() 4118 .unwrap() 4119 .remove_userspace_mapping( 4120 mapping.addr.raw_value(), 4121 mapping.len, 4122 mapping.host_addr, 4123 mapping.mergeable, 4124 mapping.mem_slot, 4125 ) 4126 .map_err(DeviceManagerError::MemoryManager)?; 4127 } 4128 4129 virtio_device.lock().unwrap().shutdown(); 4130 4131 self.virtio_devices 4132 .retain(|handler| !Arc::ptr_eq(&handler.virtio_device, &virtio_device)); 4133 } 4134 4135 event!( 4136 "vm", 4137 "device-removed", 4138 "id", 4139 &id, 4140 "bdf", 4141 pci_device_bdf.to_string() 4142 ); 4143 4144 // At this point, the device has been removed from all the list and 4145 // buses where it was stored. At the end of this function, after 4146 // any_device, bus_device and pci_device are released, the actual 4147 // device will be dropped. 4148 Ok(()) 4149 } 4150 4151 fn hotplug_virtio_pci_device( 4152 &mut self, 4153 handle: MetaVirtioDevice, 4154 ) -> DeviceManagerResult<PciDeviceInfo> { 4155 // Add the virtio device to the device manager list. This is important 4156 // as the list is used to notify virtio devices about memory updates 4157 // for instance. 4158 self.virtio_devices.push(handle.clone()); 4159 4160 let mapping: Option<Arc<IommuMapping>> = if handle.iommu { 4161 self.iommu_mapping.clone() 4162 } else { 4163 None 4164 }; 4165 4166 let bdf = self.add_virtio_pci_device( 4167 handle.virtio_device, 4168 &mapping, 4169 handle.id.clone(), 4170 handle.pci_segment, 4171 handle.dma_handler, 4172 )?; 4173 4174 // Update the PCIU bitmap 4175 self.pci_segments[handle.pci_segment as usize].pci_devices_up |= 1 << bdf.device(); 4176 4177 Ok(PciDeviceInfo { id: handle.id, bdf }) 4178 } 4179 4180 fn is_iommu_segment(&self, pci_segment_id: u16) -> bool { 4181 self.config 4182 .lock() 4183 .as_ref() 4184 .unwrap() 4185 .platform 4186 .as_ref() 4187 .map(|pc| { 4188 pc.iommu_segments 4189 .as_ref() 4190 .map(|v| v.contains(&pci_segment_id)) 4191 .unwrap_or_default() 4192 }) 4193 .unwrap_or_default() 4194 } 4195 4196 pub fn add_disk(&mut self, disk_cfg: &mut DiskConfig) -> DeviceManagerResult<PciDeviceInfo> { 4197 self.validate_identifier(&disk_cfg.id)?; 4198 4199 if disk_cfg.iommu && !self.is_iommu_segment(disk_cfg.pci_segment) { 4200 return Err(DeviceManagerError::InvalidIommuHotplug); 4201 } 4202 4203 let device = self.make_virtio_block_device(disk_cfg)?; 4204 self.hotplug_virtio_pci_device(device) 4205 } 4206 4207 pub fn add_fs(&mut self, fs_cfg: &mut FsConfig) -> DeviceManagerResult<PciDeviceInfo> { 4208 self.validate_identifier(&fs_cfg.id)?; 4209 4210 let device = self.make_virtio_fs_device(fs_cfg)?; 4211 self.hotplug_virtio_pci_device(device) 4212 } 4213 4214 pub fn add_pmem(&mut self, pmem_cfg: &mut PmemConfig) -> DeviceManagerResult<PciDeviceInfo> { 4215 self.validate_identifier(&pmem_cfg.id)?; 4216 4217 if pmem_cfg.iommu && !self.is_iommu_segment(pmem_cfg.pci_segment) { 4218 return Err(DeviceManagerError::InvalidIommuHotplug); 4219 } 4220 4221 let device = self.make_virtio_pmem_device(pmem_cfg)?; 4222 self.hotplug_virtio_pci_device(device) 4223 } 4224 4225 pub fn add_net(&mut self, net_cfg: &mut NetConfig) -> DeviceManagerResult<PciDeviceInfo> { 4226 self.validate_identifier(&net_cfg.id)?; 4227 4228 if net_cfg.iommu && !self.is_iommu_segment(net_cfg.pci_segment) { 4229 return Err(DeviceManagerError::InvalidIommuHotplug); 4230 } 4231 4232 let device = self.make_virtio_net_device(net_cfg)?; 4233 self.hotplug_virtio_pci_device(device) 4234 } 4235 4236 pub fn add_vdpa(&mut self, vdpa_cfg: &mut VdpaConfig) -> DeviceManagerResult<PciDeviceInfo> { 4237 self.validate_identifier(&vdpa_cfg.id)?; 4238 4239 if vdpa_cfg.iommu && !self.is_iommu_segment(vdpa_cfg.pci_segment) { 4240 return Err(DeviceManagerError::InvalidIommuHotplug); 4241 } 4242 4243 let device = self.make_vdpa_device(vdpa_cfg)?; 4244 self.hotplug_virtio_pci_device(device) 4245 } 4246 4247 pub fn add_vsock(&mut self, vsock_cfg: &mut VsockConfig) -> DeviceManagerResult<PciDeviceInfo> { 4248 self.validate_identifier(&vsock_cfg.id)?; 4249 4250 if vsock_cfg.iommu && !self.is_iommu_segment(vsock_cfg.pci_segment) { 4251 return Err(DeviceManagerError::InvalidIommuHotplug); 4252 } 4253 4254 let device = self.make_virtio_vsock_device(vsock_cfg)?; 4255 self.hotplug_virtio_pci_device(device) 4256 } 4257 4258 pub fn counters(&self) -> HashMap<String, HashMap<&'static str, Wrapping<u64>>> { 4259 let mut counters = HashMap::new(); 4260 4261 for handle in &self.virtio_devices { 4262 let virtio_device = handle.virtio_device.lock().unwrap(); 4263 if let Some(device_counters) = virtio_device.counters() { 4264 counters.insert(handle.id.clone(), device_counters.clone()); 4265 } 4266 } 4267 4268 counters 4269 } 4270 4271 pub fn resize_balloon(&mut self, size: u64) -> DeviceManagerResult<()> { 4272 if let Some(balloon) = &self.balloon { 4273 return balloon 4274 .lock() 4275 .unwrap() 4276 .resize(size) 4277 .map_err(DeviceManagerError::VirtioBalloonResize); 4278 } 4279 4280 warn!("No balloon setup: Can't resize the balloon"); 4281 Err(DeviceManagerError::MissingVirtioBalloon) 4282 } 4283 4284 pub fn balloon_size(&self) -> u64 { 4285 if let Some(balloon) = &self.balloon { 4286 return balloon.lock().unwrap().get_actual(); 4287 } 4288 4289 0 4290 } 4291 4292 pub fn device_tree(&self) -> Arc<Mutex<DeviceTree>> { 4293 self.device_tree.clone() 4294 } 4295 4296 #[cfg(target_arch = "x86_64")] 4297 pub fn notify_power_button(&self) -> DeviceManagerResult<()> { 4298 self.ged_notification_device 4299 .as_ref() 4300 .unwrap() 4301 .lock() 4302 .unwrap() 4303 .notify(AcpiNotificationFlags::POWER_BUTTON_CHANGED) 4304 .map_err(DeviceManagerError::PowerButtonNotification) 4305 } 4306 4307 #[cfg(target_arch = "aarch64")] 4308 pub fn notify_power_button(&self) -> DeviceManagerResult<()> { 4309 // There are two use cases: 4310 // 1. Users will use direct kernel boot with device tree. 4311 // 2. Users will use ACPI+UEFI boot. 4312 4313 // Trigger a GPIO pin 3 event to satisfy use case 1. 4314 self.gpio_device 4315 .as_ref() 4316 .unwrap() 4317 .lock() 4318 .unwrap() 4319 .trigger_key(3) 4320 .map_err(DeviceManagerError::AArch64PowerButtonNotification)?; 4321 // Trigger a GED power button event to satisfy use case 2. 4322 return self 4323 .ged_notification_device 4324 .as_ref() 4325 .unwrap() 4326 .lock() 4327 .unwrap() 4328 .notify(AcpiNotificationFlags::POWER_BUTTON_CHANGED) 4329 .map_err(DeviceManagerError::PowerButtonNotification); 4330 } 4331 4332 pub fn iommu_attached_devices(&self) -> &Option<(PciBdf, Vec<PciBdf>)> { 4333 &self.iommu_attached_devices 4334 } 4335 4336 fn validate_identifier(&self, id: &Option<String>) -> DeviceManagerResult<()> { 4337 if let Some(id) = id { 4338 if id.starts_with("__") { 4339 return Err(DeviceManagerError::InvalidIdentifier(id.clone())); 4340 } 4341 4342 if self.device_tree.lock().unwrap().contains_key(id) { 4343 return Err(DeviceManagerError::IdentifierNotUnique(id.clone())); 4344 } 4345 } 4346 4347 Ok(()) 4348 } 4349 4350 pub(crate) fn acpi_platform_addresses(&self) -> &AcpiPlatformAddresses { 4351 &self.acpi_platform_addresses 4352 } 4353 } 4354 4355 fn numa_node_id_from_memory_zone_id(numa_nodes: &NumaNodes, memory_zone_id: &str) -> Option<u32> { 4356 for (numa_node_id, numa_node) in numa_nodes.iter() { 4357 if numa_node.memory_zones.contains(&memory_zone_id.to_owned()) { 4358 return Some(*numa_node_id); 4359 } 4360 } 4361 4362 None 4363 } 4364 4365 fn numa_node_id_from_pci_segment_id(numa_nodes: &NumaNodes, pci_segment_id: u16) -> u32 { 4366 for (numa_node_id, numa_node) in numa_nodes.iter() { 4367 if numa_node.pci_segments.contains(&pci_segment_id) { 4368 return *numa_node_id; 4369 } 4370 } 4371 4372 0 4373 } 4374 4375 struct TpmDevice {} 4376 4377 impl Aml for TpmDevice { 4378 fn to_aml_bytes(&self, sink: &mut dyn acpi_tables::AmlSink) { 4379 aml::Device::new( 4380 "TPM2".into(), 4381 vec![ 4382 &aml::Name::new("_HID".into(), &"MSFT0101"), 4383 &aml::Name::new("_STA".into(), &(0xF_usize)), 4384 &aml::Name::new( 4385 "_CRS".into(), 4386 &aml::ResourceTemplate::new(vec![&aml::Memory32Fixed::new( 4387 true, 4388 layout::TPM_START.0 as u32, 4389 layout::TPM_SIZE as u32, 4390 )]), 4391 ), 4392 ], 4393 ) 4394 .to_aml_bytes(sink) 4395 } 4396 } 4397 4398 impl Aml for DeviceManager { 4399 fn to_aml_bytes(&self, sink: &mut dyn acpi_tables::AmlSink) { 4400 #[cfg(target_arch = "aarch64")] 4401 use arch::aarch64::DeviceInfoForFdt; 4402 4403 let mut pci_scan_methods = Vec::new(); 4404 for i in 0..self.pci_segments.len() { 4405 pci_scan_methods.push(aml::MethodCall::new( 4406 format!("\\_SB_.PC{i:02X}.PCNT").as_str().into(), 4407 vec![], 4408 )); 4409 } 4410 let mut pci_scan_inner: Vec<&dyn Aml> = Vec::new(); 4411 for method in &pci_scan_methods { 4412 pci_scan_inner.push(method) 4413 } 4414 4415 // PCI hotplug controller 4416 aml::Device::new( 4417 "_SB_.PHPR".into(), 4418 vec![ 4419 &aml::Name::new("_HID".into(), &aml::EISAName::new("PNP0A06")), 4420 &aml::Name::new("_STA".into(), &0x0bu8), 4421 &aml::Name::new("_UID".into(), &"PCI Hotplug Controller"), 4422 &aml::Mutex::new("BLCK".into(), 0), 4423 &aml::Name::new( 4424 "_CRS".into(), 4425 &aml::ResourceTemplate::new(vec![&aml::AddressSpace::new_memory( 4426 aml::AddressSpaceCacheable::NotCacheable, 4427 true, 4428 self.acpi_address.0, 4429 self.acpi_address.0 + DEVICE_MANAGER_ACPI_SIZE as u64 - 1, 4430 None, 4431 )]), 4432 ), 4433 // OpRegion and Fields map MMIO range into individual field values 4434 &aml::OpRegion::new( 4435 "PCST".into(), 4436 aml::OpRegionSpace::SystemMemory, 4437 &(self.acpi_address.0 as usize), 4438 &DEVICE_MANAGER_ACPI_SIZE, 4439 ), 4440 &aml::Field::new( 4441 "PCST".into(), 4442 aml::FieldAccessType::DWord, 4443 aml::FieldLockRule::NoLock, 4444 aml::FieldUpdateRule::WriteAsZeroes, 4445 vec![ 4446 aml::FieldEntry::Named(*b"PCIU", 32), 4447 aml::FieldEntry::Named(*b"PCID", 32), 4448 aml::FieldEntry::Named(*b"B0EJ", 32), 4449 aml::FieldEntry::Named(*b"PSEG", 32), 4450 ], 4451 ), 4452 &aml::Method::new( 4453 "PCEJ".into(), 4454 2, 4455 true, 4456 vec![ 4457 // Take lock defined above 4458 &aml::Acquire::new("BLCK".into(), 0xffff), 4459 // Choose the current segment 4460 &aml::Store::new(&aml::Path::new("PSEG"), &aml::Arg(1)), 4461 // Write PCI bus number (in first argument) to I/O port via field 4462 &aml::ShiftLeft::new(&aml::Path::new("B0EJ"), &aml::ONE, &aml::Arg(0)), 4463 // Release lock 4464 &aml::Release::new("BLCK".into()), 4465 // Return 0 4466 &aml::Return::new(&aml::ZERO), 4467 ], 4468 ), 4469 &aml::Method::new("PSCN".into(), 0, true, pci_scan_inner), 4470 ], 4471 ) 4472 .to_aml_bytes(sink); 4473 4474 for segment in &self.pci_segments { 4475 segment.to_aml_bytes(sink); 4476 } 4477 4478 let mut mbrd_memory = Vec::new(); 4479 4480 for segment in &self.pci_segments { 4481 mbrd_memory.push(aml::Memory32Fixed::new( 4482 true, 4483 segment.mmio_config_address as u32, 4484 layout::PCI_MMIO_CONFIG_SIZE_PER_SEGMENT as u32, 4485 )) 4486 } 4487 4488 let mut mbrd_memory_refs = Vec::new(); 4489 for mbrd_memory_ref in &mbrd_memory { 4490 mbrd_memory_refs.push(mbrd_memory_ref as &dyn Aml); 4491 } 4492 4493 aml::Device::new( 4494 "_SB_.MBRD".into(), 4495 vec![ 4496 &aml::Name::new("_HID".into(), &aml::EISAName::new("PNP0C02")), 4497 &aml::Name::new("_UID".into(), &aml::ZERO), 4498 &aml::Name::new("_CRS".into(), &aml::ResourceTemplate::new(mbrd_memory_refs)), 4499 ], 4500 ) 4501 .to_aml_bytes(sink); 4502 4503 // Serial device 4504 #[cfg(target_arch = "x86_64")] 4505 let serial_irq = 4; 4506 #[cfg(target_arch = "aarch64")] 4507 let serial_irq = 4508 if self.config.lock().unwrap().serial.clone().mode != ConsoleOutputMode::Off { 4509 self.get_device_info() 4510 .clone() 4511 .get(&(DeviceType::Serial, DeviceType::Serial.to_string())) 4512 .unwrap() 4513 .irq() 4514 } else { 4515 // If serial is turned off, add a fake device with invalid irq. 4516 31 4517 }; 4518 if self.config.lock().unwrap().serial.mode != ConsoleOutputMode::Off { 4519 aml::Device::new( 4520 "_SB_.COM1".into(), 4521 vec![ 4522 &aml::Name::new( 4523 "_HID".into(), 4524 #[cfg(target_arch = "x86_64")] 4525 &aml::EISAName::new("PNP0501"), 4526 #[cfg(target_arch = "aarch64")] 4527 &"ARMH0011", 4528 ), 4529 &aml::Name::new("_UID".into(), &aml::ZERO), 4530 &aml::Name::new("_DDN".into(), &"COM1"), 4531 &aml::Name::new( 4532 "_CRS".into(), 4533 &aml::ResourceTemplate::new(vec![ 4534 &aml::Interrupt::new(true, true, false, false, serial_irq), 4535 #[cfg(target_arch = "x86_64")] 4536 &aml::IO::new(0x3f8, 0x3f8, 0, 0x8), 4537 #[cfg(target_arch = "aarch64")] 4538 &aml::Memory32Fixed::new( 4539 true, 4540 arch::layout::LEGACY_SERIAL_MAPPED_IO_START.raw_value() as u32, 4541 MMIO_LEN as u32, 4542 ), 4543 ]), 4544 ), 4545 ], 4546 ) 4547 .to_aml_bytes(sink); 4548 } 4549 4550 aml::Name::new("_S5_".into(), &aml::Package::new(vec![&5u8])).to_aml_bytes(sink); 4551 4552 aml::Device::new( 4553 "_SB_.PWRB".into(), 4554 vec![ 4555 &aml::Name::new("_HID".into(), &aml::EISAName::new("PNP0C0C")), 4556 &aml::Name::new("_UID".into(), &aml::ZERO), 4557 ], 4558 ) 4559 .to_aml_bytes(sink); 4560 4561 if self.config.lock().unwrap().tpm.is_some() { 4562 // Add tpm device 4563 TpmDevice {}.to_aml_bytes(sink); 4564 } 4565 4566 self.ged_notification_device 4567 .as_ref() 4568 .unwrap() 4569 .lock() 4570 .unwrap() 4571 .to_aml_bytes(sink) 4572 } 4573 } 4574 4575 impl Pausable for DeviceManager { 4576 fn pause(&mut self) -> result::Result<(), MigratableError> { 4577 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4578 if let Some(migratable) = &device_node.migratable { 4579 migratable.lock().unwrap().pause()?; 4580 } 4581 } 4582 // On AArch64, the pause of device manager needs to trigger 4583 // a "pause" of GIC, which will flush the GIC pending tables 4584 // and ITS tables to guest RAM. 4585 #[cfg(target_arch = "aarch64")] 4586 { 4587 self.get_interrupt_controller() 4588 .unwrap() 4589 .lock() 4590 .unwrap() 4591 .pause()?; 4592 }; 4593 4594 Ok(()) 4595 } 4596 4597 fn resume(&mut self) -> result::Result<(), MigratableError> { 4598 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4599 if let Some(migratable) = &device_node.migratable { 4600 migratable.lock().unwrap().resume()?; 4601 } 4602 } 4603 4604 Ok(()) 4605 } 4606 } 4607 4608 impl Snapshottable for DeviceManager { 4609 fn id(&self) -> String { 4610 DEVICE_MANAGER_SNAPSHOT_ID.to_string() 4611 } 4612 4613 fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> { 4614 let mut snapshot = Snapshot::from_data(SnapshotData::new_from_state(&self.state())?); 4615 4616 // We aggregate all devices snapshots. 4617 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4618 if let Some(migratable) = &device_node.migratable { 4619 let mut migratable = migratable.lock().unwrap(); 4620 snapshot.add_snapshot(migratable.id(), migratable.snapshot()?); 4621 } 4622 } 4623 4624 Ok(snapshot) 4625 } 4626 } 4627 4628 impl Transportable for DeviceManager {} 4629 4630 impl Migratable for DeviceManager { 4631 fn start_dirty_log(&mut self) -> std::result::Result<(), MigratableError> { 4632 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4633 if let Some(migratable) = &device_node.migratable { 4634 migratable.lock().unwrap().start_dirty_log()?; 4635 } 4636 } 4637 Ok(()) 4638 } 4639 4640 fn stop_dirty_log(&mut self) -> std::result::Result<(), MigratableError> { 4641 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4642 if let Some(migratable) = &device_node.migratable { 4643 migratable.lock().unwrap().stop_dirty_log()?; 4644 } 4645 } 4646 Ok(()) 4647 } 4648 4649 fn dirty_log(&mut self) -> std::result::Result<MemoryRangeTable, MigratableError> { 4650 let mut tables = Vec::new(); 4651 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4652 if let Some(migratable) = &device_node.migratable { 4653 tables.push(migratable.lock().unwrap().dirty_log()?); 4654 } 4655 } 4656 Ok(MemoryRangeTable::new_from_tables(tables)) 4657 } 4658 4659 fn start_migration(&mut self) -> std::result::Result<(), MigratableError> { 4660 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4661 if let Some(migratable) = &device_node.migratable { 4662 migratable.lock().unwrap().start_migration()?; 4663 } 4664 } 4665 Ok(()) 4666 } 4667 4668 fn complete_migration(&mut self) -> std::result::Result<(), MigratableError> { 4669 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4670 if let Some(migratable) = &device_node.migratable { 4671 migratable.lock().unwrap().complete_migration()?; 4672 } 4673 } 4674 Ok(()) 4675 } 4676 } 4677 4678 const PCIU_FIELD_OFFSET: u64 = 0; 4679 const PCID_FIELD_OFFSET: u64 = 4; 4680 const B0EJ_FIELD_OFFSET: u64 = 8; 4681 const PSEG_FIELD_OFFSET: u64 = 12; 4682 const PCIU_FIELD_SIZE: usize = 4; 4683 const PCID_FIELD_SIZE: usize = 4; 4684 const B0EJ_FIELD_SIZE: usize = 4; 4685 const PSEG_FIELD_SIZE: usize = 4; 4686 4687 impl BusDevice for DeviceManager { 4688 fn read(&mut self, base: u64, offset: u64, data: &mut [u8]) { 4689 match offset { 4690 PCIU_FIELD_OFFSET => { 4691 assert!(data.len() == PCIU_FIELD_SIZE); 4692 data.copy_from_slice( 4693 &self.pci_segments[self.selected_segment] 4694 .pci_devices_up 4695 .to_le_bytes(), 4696 ); 4697 // Clear the PCIU bitmap 4698 self.pci_segments[self.selected_segment].pci_devices_up = 0; 4699 } 4700 PCID_FIELD_OFFSET => { 4701 assert!(data.len() == PCID_FIELD_SIZE); 4702 data.copy_from_slice( 4703 &self.pci_segments[self.selected_segment] 4704 .pci_devices_down 4705 .to_le_bytes(), 4706 ); 4707 // Clear the PCID bitmap 4708 self.pci_segments[self.selected_segment].pci_devices_down = 0; 4709 } 4710 B0EJ_FIELD_OFFSET => { 4711 assert!(data.len() == B0EJ_FIELD_SIZE); 4712 // Always return an empty bitmap since the eject is always 4713 // taken care of right away during a write access. 4714 data.fill(0); 4715 } 4716 PSEG_FIELD_OFFSET => { 4717 assert_eq!(data.len(), PSEG_FIELD_SIZE); 4718 data.copy_from_slice(&(self.selected_segment as u32).to_le_bytes()); 4719 } 4720 _ => error!( 4721 "Accessing unknown location at base 0x{:x}, offset 0x{:x}", 4722 base, offset 4723 ), 4724 } 4725 4726 debug!( 4727 "PCI_HP_REG_R: base 0x{:x}, offset 0x{:x}, data {:?}", 4728 base, offset, data 4729 ) 4730 } 4731 4732 fn write(&mut self, base: u64, offset: u64, data: &[u8]) -> Option<Arc<std::sync::Barrier>> { 4733 match offset { 4734 B0EJ_FIELD_OFFSET => { 4735 assert!(data.len() == B0EJ_FIELD_SIZE); 4736 let mut data_array: [u8; 4] = [0, 0, 0, 0]; 4737 data_array.copy_from_slice(data); 4738 let mut slot_bitmap = u32::from_le_bytes(data_array); 4739 4740 while slot_bitmap > 0 { 4741 let slot_id = slot_bitmap.trailing_zeros(); 4742 if let Err(e) = self.eject_device(self.selected_segment as u16, slot_id as u8) { 4743 error!("Failed ejecting device {}: {:?}", slot_id, e); 4744 } 4745 slot_bitmap &= !(1 << slot_id); 4746 } 4747 } 4748 PSEG_FIELD_OFFSET => { 4749 assert_eq!(data.len(), PSEG_FIELD_SIZE); 4750 let mut data_array: [u8; 4] = [0, 0, 0, 0]; 4751 data_array.copy_from_slice(data); 4752 let selected_segment = u32::from_le_bytes(data_array) as usize; 4753 if selected_segment >= self.pci_segments.len() { 4754 error!( 4755 "Segment selection out of range: {} >= {}", 4756 selected_segment, 4757 self.pci_segments.len() 4758 ); 4759 return None; 4760 } 4761 self.selected_segment = selected_segment; 4762 } 4763 _ => error!( 4764 "Accessing unknown location at base 0x{:x}, offset 0x{:x}", 4765 base, offset 4766 ), 4767 } 4768 4769 debug!( 4770 "PCI_HP_REG_W: base 0x{:x}, offset 0x{:x}, data {:?}", 4771 base, offset, data 4772 ); 4773 4774 None 4775 } 4776 } 4777 4778 impl Drop for DeviceManager { 4779 fn drop(&mut self) { 4780 for handle in self.virtio_devices.drain(..) { 4781 handle.virtio_device.lock().unwrap().shutdown(); 4782 } 4783 4784 if let Some(termios) = *self.original_termios_opt.lock().unwrap() { 4785 // SAFETY: FFI call 4786 let _ = unsafe { tcsetattr(stdout().lock().as_raw_fd(), TCSANOW, &termios) }; 4787 } 4788 } 4789 } 4790