1 // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 // 3 // Portions Copyright 2017 The Chromium OS Authors. All rights reserved. 4 // Use of this source code is governed by a BSD-style license that can be 5 // found in the LICENSE-BSD-3-Clause file. 6 // 7 // Copyright © 2019 Intel Corporation 8 // 9 // SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause 10 // 11 12 use crate::config::{ 13 ConsoleOutputMode, DeviceConfig, DiskConfig, FsConfig, NetConfig, PmemConfig, UserDeviceConfig, 14 VdpaConfig, VhostMode, VmConfig, VsockConfig, 15 }; 16 use crate::cpu::{CpuManager, CPU_MANAGER_ACPI_SIZE}; 17 use crate::device_tree::{DeviceNode, DeviceTree}; 18 use crate::interrupt::LegacyUserspaceInterruptManager; 19 use crate::interrupt::MsiInterruptManager; 20 use crate::memory_manager::{Error as MemoryManagerError, MemoryManager, MEMORY_MANAGER_ACPI_SIZE}; 21 use crate::pci_segment::PciSegment; 22 use crate::seccomp_filters::{get_seccomp_filter, Thread}; 23 use crate::serial_manager::{Error as SerialManagerError, SerialManager}; 24 use crate::sigwinch_listener::start_sigwinch_listener; 25 use crate::GuestRegionMmap; 26 use crate::PciDeviceInfo; 27 use crate::{device_node, DEVICE_MANAGER_SNAPSHOT_ID}; 28 use acpi_tables::sdt::GenericAddress; 29 use acpi_tables::{aml, Aml}; 30 use anyhow::anyhow; 31 use arch::layout; 32 #[cfg(target_arch = "x86_64")] 33 use arch::layout::{APIC_START, IOAPIC_SIZE, IOAPIC_START}; 34 use arch::NumaNodes; 35 #[cfg(target_arch = "aarch64")] 36 use arch::{DeviceType, MmioDeviceInfo}; 37 use block::{ 38 async_io::DiskFile, block_io_uring_is_supported, detect_image_type, 39 fixed_vhd_sync::FixedVhdDiskSync, qcow, qcow_sync::QcowDiskSync, raw_sync::RawFileDiskSync, 40 vhdx, vhdx_sync::VhdxDiskSync, ImageType, 41 }; 42 #[cfg(feature = "io_uring")] 43 use block::{fixed_vhd_async::FixedVhdDiskAsync, raw_async::RawFileDisk}; 44 #[cfg(target_arch = "aarch64")] 45 use devices::gic; 46 #[cfg(target_arch = "x86_64")] 47 use devices::ioapic; 48 #[cfg(target_arch = "aarch64")] 49 use devices::legacy::Pl011; 50 #[cfg(target_arch = "x86_64")] 51 use devices::legacy::Serial; 52 use devices::{ 53 interrupt_controller, interrupt_controller::InterruptController, AcpiNotificationFlags, 54 }; 55 use hypervisor::{HypervisorType, IoEventAddress}; 56 use libc::{ 57 cfmakeraw, isatty, tcgetattr, tcsetattr, termios, MAP_NORESERVE, MAP_PRIVATE, MAP_SHARED, 58 O_TMPFILE, PROT_READ, PROT_WRITE, TCSANOW, 59 }; 60 use pci::{ 61 DeviceRelocation, PciBarRegionType, PciBdf, PciDevice, VfioPciDevice, VfioUserDmaMapping, 62 VfioUserPciDevice, VfioUserPciDeviceError, 63 }; 64 use seccompiler::SeccompAction; 65 use serde::{Deserialize, Serialize}; 66 use std::collections::{BTreeSet, HashMap}; 67 use std::fs::{read_link, File, OpenOptions}; 68 use std::io::{self, stdout, Seek, SeekFrom}; 69 use std::mem::zeroed; 70 use std::num::Wrapping; 71 use std::os::unix::fs::OpenOptionsExt; 72 use std::os::unix::io::{AsRawFd, FromRawFd, RawFd}; 73 use std::path::PathBuf; 74 use std::result; 75 use std::sync::{Arc, Mutex}; 76 use std::time::Instant; 77 use tracer::trace_scoped; 78 use vfio_ioctls::{VfioContainer, VfioDevice, VfioDeviceFd}; 79 use virtio_devices::transport::VirtioTransport; 80 use virtio_devices::transport::{VirtioPciDevice, VirtioPciDeviceActivator}; 81 use virtio_devices::vhost_user::VhostUserConfig; 82 use virtio_devices::{ 83 AccessPlatformMapping, ActivateError, VdpaDmaMapping, VirtioMemMappingSource, 84 }; 85 use virtio_devices::{Endpoint, IommuMapping}; 86 use vm_allocator::{AddressAllocator, SystemAllocator}; 87 use vm_device::dma_mapping::vfio::VfioDmaMapping; 88 use vm_device::dma_mapping::ExternalDmaMapping; 89 use vm_device::interrupt::{ 90 InterruptIndex, InterruptManager, LegacyIrqGroupConfig, MsiIrqGroupConfig, 91 }; 92 use vm_device::{Bus, BusDevice, Resource}; 93 use vm_memory::guest_memory::FileOffset; 94 use vm_memory::GuestMemoryRegion; 95 use vm_memory::{Address, GuestAddress, GuestUsize, MmapRegion}; 96 #[cfg(target_arch = "x86_64")] 97 use vm_memory::{GuestAddressSpace, GuestMemory}; 98 use vm_migration::{ 99 protocol::MemoryRangeTable, snapshot_from_id, versioned_state_from_id, Migratable, 100 MigratableError, Pausable, Snapshot, SnapshotData, Snapshottable, Transportable, 101 }; 102 use vm_virtio::AccessPlatform; 103 use vm_virtio::VirtioDeviceType; 104 use vmm_sys_util::eventfd::EventFd; 105 106 #[cfg(target_arch = "aarch64")] 107 const MMIO_LEN: u64 = 0x1000; 108 109 // Singleton devices / devices the user cannot name 110 #[cfg(target_arch = "x86_64")] 111 const IOAPIC_DEVICE_NAME: &str = "__ioapic"; 112 const SERIAL_DEVICE_NAME: &str = "__serial"; 113 #[cfg(target_arch = "aarch64")] 114 const GPIO_DEVICE_NAME: &str = "__gpio"; 115 const RNG_DEVICE_NAME: &str = "__rng"; 116 const IOMMU_DEVICE_NAME: &str = "__iommu"; 117 const BALLOON_DEVICE_NAME: &str = "__balloon"; 118 const CONSOLE_DEVICE_NAME: &str = "__console"; 119 const PVPANIC_DEVICE_NAME: &str = "__pvpanic"; 120 121 // Devices that the user may name and for which we generate 122 // identifiers if the user doesn't give one 123 const DISK_DEVICE_NAME_PREFIX: &str = "_disk"; 124 const FS_DEVICE_NAME_PREFIX: &str = "_fs"; 125 const NET_DEVICE_NAME_PREFIX: &str = "_net"; 126 const PMEM_DEVICE_NAME_PREFIX: &str = "_pmem"; 127 const VDPA_DEVICE_NAME_PREFIX: &str = "_vdpa"; 128 const VSOCK_DEVICE_NAME_PREFIX: &str = "_vsock"; 129 const WATCHDOG_DEVICE_NAME: &str = "__watchdog"; 130 const VFIO_DEVICE_NAME_PREFIX: &str = "_vfio"; 131 const VFIO_USER_DEVICE_NAME_PREFIX: &str = "_vfio_user"; 132 const VIRTIO_PCI_DEVICE_NAME_PREFIX: &str = "_virtio-pci"; 133 134 /// Errors associated with device manager 135 #[derive(Debug)] 136 pub enum DeviceManagerError { 137 /// Cannot create EventFd. 138 EventFd(io::Error), 139 140 /// Cannot open disk path 141 Disk(io::Error), 142 143 /// Cannot create vhost-user-net device 144 CreateVhostUserNet(virtio_devices::vhost_user::Error), 145 146 /// Cannot create virtio-blk device 147 CreateVirtioBlock(io::Error), 148 149 /// Cannot create virtio-net device 150 CreateVirtioNet(virtio_devices::net::Error), 151 152 /// Cannot create virtio-console device 153 CreateVirtioConsole(io::Error), 154 155 /// Cannot create virtio-rng device 156 CreateVirtioRng(io::Error), 157 158 /// Cannot create virtio-fs device 159 CreateVirtioFs(virtio_devices::vhost_user::Error), 160 161 /// Virtio-fs device was created without a socket. 162 NoVirtioFsSock, 163 164 /// Cannot create vhost-user-blk device 165 CreateVhostUserBlk(virtio_devices::vhost_user::Error), 166 167 /// Cannot create virtio-pmem device 168 CreateVirtioPmem(io::Error), 169 170 /// Cannot create vDPA device 171 CreateVdpa(virtio_devices::vdpa::Error), 172 173 /// Cannot create virtio-vsock device 174 CreateVirtioVsock(io::Error), 175 176 /// Cannot create tpm device 177 CreateTpmDevice(anyhow::Error), 178 179 /// Failed to convert Path to &str for the vDPA device. 180 CreateVdpaConvertPath, 181 182 /// Failed to convert Path to &str for the virtio-vsock device. 183 CreateVsockConvertPath, 184 185 /// Cannot create virtio-vsock backend 186 CreateVsockBackend(virtio_devices::vsock::VsockUnixError), 187 188 /// Cannot create virtio-iommu device 189 CreateVirtioIommu(io::Error), 190 191 /// Cannot create virtio-balloon device 192 CreateVirtioBalloon(io::Error), 193 194 /// Cannot create virtio-watchdog device 195 CreateVirtioWatchdog(io::Error), 196 197 /// Failed to parse disk image format 198 DetectImageType(io::Error), 199 200 /// Cannot open qcow disk path 201 QcowDeviceCreate(qcow::Error), 202 203 /// Cannot create serial manager 204 CreateSerialManager(SerialManagerError), 205 206 /// Cannot spawn the serial manager thread 207 SpawnSerialManager(SerialManagerError), 208 209 /// Cannot open tap interface 210 OpenTap(net_util::TapError), 211 212 /// Cannot allocate IRQ. 213 AllocateIrq, 214 215 /// Cannot configure the IRQ. 216 Irq(vmm_sys_util::errno::Error), 217 218 /// Cannot allocate PCI BARs 219 AllocateBars(pci::PciDeviceError), 220 221 /// Could not free the BARs associated with a PCI device. 222 FreePciBars(pci::PciDeviceError), 223 224 /// Cannot register ioevent. 225 RegisterIoevent(anyhow::Error), 226 227 /// Cannot unregister ioevent. 228 UnRegisterIoevent(anyhow::Error), 229 230 /// Cannot create virtio device 231 VirtioDevice(virtio_devices::transport::VirtioPciDeviceError), 232 233 /// Cannot add PCI device 234 AddPciDevice(pci::PciRootError), 235 236 /// Cannot open persistent memory file 237 PmemFileOpen(io::Error), 238 239 /// Cannot set persistent memory file size 240 PmemFileSetLen(io::Error), 241 242 /// Cannot find a memory range for persistent memory 243 PmemRangeAllocation, 244 245 /// Cannot find a memory range for virtio-fs 246 FsRangeAllocation, 247 248 /// Error creating serial output file 249 SerialOutputFileOpen(io::Error), 250 251 /// Error creating console output file 252 ConsoleOutputFileOpen(io::Error), 253 254 /// Error creating serial pty 255 SerialPtyOpen(io::Error), 256 257 /// Error creating console pty 258 ConsolePtyOpen(io::Error), 259 260 /// Error setting pty raw mode 261 SetPtyRaw(vmm_sys_util::errno::Error), 262 263 /// Error getting pty peer 264 GetPtyPeer(vmm_sys_util::errno::Error), 265 266 /// Cannot create a VFIO device 267 VfioCreate(vfio_ioctls::VfioError), 268 269 /// Cannot create a VFIO PCI device 270 VfioPciCreate(pci::VfioPciError), 271 272 /// Failed to map VFIO MMIO region. 273 VfioMapRegion(pci::VfioPciError), 274 275 /// Failed to DMA map VFIO device. 276 VfioDmaMap(vfio_ioctls::VfioError), 277 278 /// Failed to DMA unmap VFIO device. 279 VfioDmaUnmap(pci::VfioPciError), 280 281 /// Failed to create the passthrough device. 282 CreatePassthroughDevice(anyhow::Error), 283 284 /// Failed to memory map. 285 Mmap(io::Error), 286 287 /// Cannot add legacy device to Bus. 288 BusError(vm_device::BusError), 289 290 /// Failed to allocate IO port 291 AllocateIoPort, 292 293 /// Failed to allocate MMIO address 294 AllocateMmioAddress, 295 296 /// Failed to make hotplug notification 297 HotPlugNotification(io::Error), 298 299 /// Error from a memory manager operation 300 MemoryManager(MemoryManagerError), 301 302 /// Failed to create new interrupt source group. 303 CreateInterruptGroup(io::Error), 304 305 /// Failed to update interrupt source group. 306 UpdateInterruptGroup(io::Error), 307 308 /// Failed to create interrupt controller. 309 CreateInterruptController(interrupt_controller::Error), 310 311 /// Failed to create a new MmapRegion instance. 312 NewMmapRegion(vm_memory::mmap::MmapRegionError), 313 314 /// Failed to clone a File. 315 CloneFile(io::Error), 316 317 /// Failed to create socket file 318 CreateSocketFile(io::Error), 319 320 /// Failed to spawn the network backend 321 SpawnNetBackend(io::Error), 322 323 /// Failed to spawn the block backend 324 SpawnBlockBackend(io::Error), 325 326 /// Missing PCI bus. 327 NoPciBus, 328 329 /// Could not find an available device name. 330 NoAvailableDeviceName, 331 332 /// Missing PCI device. 333 MissingPciDevice, 334 335 /// Failed to remove a PCI device from the PCI bus. 336 RemoveDeviceFromPciBus(pci::PciRootError), 337 338 /// Failed to remove a bus device from the IO bus. 339 RemoveDeviceFromIoBus(vm_device::BusError), 340 341 /// Failed to remove a bus device from the MMIO bus. 342 RemoveDeviceFromMmioBus(vm_device::BusError), 343 344 /// Failed to find the device corresponding to a specific PCI b/d/f. 345 UnknownPciBdf(u32), 346 347 /// Not allowed to remove this type of device from the VM. 348 RemovalNotAllowed(vm_virtio::VirtioDeviceType), 349 350 /// Failed to find device corresponding to the given identifier. 351 UnknownDeviceId(String), 352 353 /// Failed to find an available PCI device ID. 354 NextPciDeviceId(pci::PciRootError), 355 356 /// Could not reserve the PCI device ID. 357 GetPciDeviceId(pci::PciRootError), 358 359 /// Could not give the PCI device ID back. 360 PutPciDeviceId(pci::PciRootError), 361 362 /// No disk path was specified when one was expected 363 NoDiskPath, 364 365 /// Failed to update guest memory for virtio device. 366 UpdateMemoryForVirtioDevice(virtio_devices::Error), 367 368 /// Cannot create virtio-mem device 369 CreateVirtioMem(io::Error), 370 371 /// Cannot find a memory range for virtio-mem memory 372 VirtioMemRangeAllocation, 373 374 /// Failed to update guest memory for VFIO PCI device. 375 UpdateMemoryForVfioPciDevice(vfio_ioctls::VfioError), 376 377 /// Trying to use a directory for pmem but no size specified 378 PmemWithDirectorySizeMissing, 379 380 /// Trying to use a size that is not multiple of 2MiB 381 PmemSizeNotAligned, 382 383 /// Could not find the node in the device tree. 384 MissingNode, 385 386 /// Resource was already found. 387 ResourceAlreadyExists, 388 389 /// Expected resources for virtio-pmem could not be found. 390 MissingVirtioPmemResources, 391 392 /// Missing PCI b/d/f from the DeviceNode. 393 MissingDeviceNodePciBdf, 394 395 /// No support for device passthrough 396 NoDevicePassthroughSupport, 397 398 /// Failed to resize virtio-balloon 399 VirtioBalloonResize(virtio_devices::balloon::Error), 400 401 /// Missing virtio-balloon, can't proceed as expected. 402 MissingVirtioBalloon, 403 404 /// Missing virtual IOMMU device 405 MissingVirtualIommu, 406 407 /// Failed to do power button notification 408 PowerButtonNotification(io::Error), 409 410 /// Failed to do AArch64 GPIO power button notification 411 #[cfg(target_arch = "aarch64")] 412 AArch64PowerButtonNotification(devices::legacy::GpioDeviceError), 413 414 /// Failed to set O_DIRECT flag to file descriptor 415 SetDirectIo, 416 417 /// Failed to create FixedVhdDiskAsync 418 CreateFixedVhdDiskAsync(io::Error), 419 420 /// Failed to create FixedVhdDiskSync 421 CreateFixedVhdDiskSync(io::Error), 422 423 /// Failed to create QcowDiskSync 424 CreateQcowDiskSync(qcow::Error), 425 426 /// Failed to create FixedVhdxDiskSync 427 CreateFixedVhdxDiskSync(vhdx::VhdxError), 428 429 /// Failed to add DMA mapping handler to virtio-mem device. 430 AddDmaMappingHandlerVirtioMem(virtio_devices::mem::Error), 431 432 /// Failed to remove DMA mapping handler from virtio-mem device. 433 RemoveDmaMappingHandlerVirtioMem(virtio_devices::mem::Error), 434 435 /// Failed to create vfio-user client 436 VfioUserCreateClient(vfio_user::Error), 437 438 /// Failed to create VFIO user device 439 VfioUserCreate(VfioUserPciDeviceError), 440 441 /// Failed to map region from VFIO user device into guest 442 VfioUserMapRegion(VfioUserPciDeviceError), 443 444 /// Failed to DMA map VFIO user device. 445 VfioUserDmaMap(VfioUserPciDeviceError), 446 447 /// Failed to DMA unmap VFIO user device. 448 VfioUserDmaUnmap(VfioUserPciDeviceError), 449 450 /// Failed to update memory mappings for VFIO user device 451 UpdateMemoryForVfioUserPciDevice(VfioUserPciDeviceError), 452 453 /// Cannot duplicate file descriptor 454 DupFd(vmm_sys_util::errno::Error), 455 456 /// Failed to DMA map virtio device. 457 VirtioDmaMap(std::io::Error), 458 459 /// Failed to DMA unmap virtio device. 460 VirtioDmaUnmap(std::io::Error), 461 462 /// Cannot hotplug device behind vIOMMU 463 InvalidIommuHotplug, 464 465 /// Invalid identifier as it is not unique. 466 IdentifierNotUnique(String), 467 468 /// Invalid identifier 469 InvalidIdentifier(String), 470 471 /// Error activating virtio device 472 VirtioActivate(ActivateError), 473 474 /// Failed retrieving device state from snapshot 475 RestoreGetState(MigratableError), 476 477 /// Cannot create a PvPanic device 478 PvPanicCreate(devices::pvpanic::PvPanicError), 479 } 480 pub type DeviceManagerResult<T> = result::Result<T, DeviceManagerError>; 481 482 const DEVICE_MANAGER_ACPI_SIZE: usize = 0x10; 483 484 const TIOCSPTLCK: libc::c_int = 0x4004_5431; 485 const TIOCGTPEER: libc::c_int = 0x5441; 486 487 pub fn create_pty() -> io::Result<(File, File, PathBuf)> { 488 // Try to use /dev/pts/ptmx first then fall back to /dev/ptmx 489 // This is done to try and use the devpts filesystem that 490 // could be available for use in the process's namespace first. 491 // Ideally these are all the same file though but different 492 // kernels could have things setup differently. 493 // See https://www.kernel.org/doc/Documentation/filesystems/devpts.txt 494 // for further details. 495 496 let custom_flags = libc::O_NONBLOCK; 497 let main = match OpenOptions::new() 498 .read(true) 499 .write(true) 500 .custom_flags(custom_flags) 501 .open("/dev/pts/ptmx") 502 { 503 Ok(f) => f, 504 _ => OpenOptions::new() 505 .read(true) 506 .write(true) 507 .custom_flags(custom_flags) 508 .open("/dev/ptmx")?, 509 }; 510 let mut unlock: libc::c_ulong = 0; 511 // SAFETY: FFI call into libc, trivially safe 512 unsafe { libc::ioctl(main.as_raw_fd(), TIOCSPTLCK as _, &mut unlock) }; 513 514 // SAFETY: FFI call into libc, trivally safe 515 let sub_fd = unsafe { 516 libc::ioctl( 517 main.as_raw_fd(), 518 TIOCGTPEER as _, 519 libc::O_NOCTTY | libc::O_RDWR, 520 ) 521 }; 522 if sub_fd == -1 { 523 return vmm_sys_util::errno::errno_result().map_err(|e| e.into()); 524 } 525 526 let proc_path = PathBuf::from(format!("/proc/self/fd/{sub_fd}")); 527 let path = read_link(proc_path)?; 528 529 // SAFETY: sub_fd is checked to be valid before being wrapped in File 530 Ok((main, unsafe { File::from_raw_fd(sub_fd) }, path)) 531 } 532 533 #[derive(Default)] 534 pub struct Console { 535 console_resizer: Option<Arc<virtio_devices::ConsoleResizer>>, 536 } 537 538 impl Console { 539 pub fn need_resize(&self) -> bool { 540 if let Some(_resizer) = self.console_resizer.as_ref() { 541 return true; 542 } 543 544 false 545 } 546 547 pub fn update_console_size(&self) { 548 if let Some(resizer) = self.console_resizer.as_ref() { 549 resizer.update_console_size() 550 } 551 } 552 } 553 554 pub(crate) struct AddressManager { 555 pub(crate) allocator: Arc<Mutex<SystemAllocator>>, 556 #[cfg(target_arch = "x86_64")] 557 pub(crate) io_bus: Arc<Bus>, 558 pub(crate) mmio_bus: Arc<Bus>, 559 pub(crate) vm: Arc<dyn hypervisor::Vm>, 560 device_tree: Arc<Mutex<DeviceTree>>, 561 pci_mmio_allocators: Vec<Arc<Mutex<AddressAllocator>>>, 562 } 563 564 impl DeviceRelocation for AddressManager { 565 fn move_bar( 566 &self, 567 old_base: u64, 568 new_base: u64, 569 len: u64, 570 pci_dev: &mut dyn PciDevice, 571 region_type: PciBarRegionType, 572 ) -> std::result::Result<(), std::io::Error> { 573 match region_type { 574 PciBarRegionType::IoRegion => { 575 #[cfg(target_arch = "x86_64")] 576 { 577 // Update system allocator 578 self.allocator 579 .lock() 580 .unwrap() 581 .free_io_addresses(GuestAddress(old_base), len as GuestUsize); 582 583 self.allocator 584 .lock() 585 .unwrap() 586 .allocate_io_addresses( 587 Some(GuestAddress(new_base)), 588 len as GuestUsize, 589 None, 590 ) 591 .ok_or_else(|| { 592 io::Error::new(io::ErrorKind::Other, "failed allocating new IO range") 593 })?; 594 595 // Update PIO bus 596 self.io_bus 597 .update_range(old_base, len, new_base, len) 598 .map_err(|e| io::Error::new(io::ErrorKind::Other, e))?; 599 } 600 #[cfg(target_arch = "aarch64")] 601 error!("I/O region is not supported"); 602 } 603 PciBarRegionType::Memory32BitRegion | PciBarRegionType::Memory64BitRegion => { 604 // Update system allocator 605 if region_type == PciBarRegionType::Memory32BitRegion { 606 self.allocator 607 .lock() 608 .unwrap() 609 .free_mmio_hole_addresses(GuestAddress(old_base), len as GuestUsize); 610 611 self.allocator 612 .lock() 613 .unwrap() 614 .allocate_mmio_hole_addresses( 615 Some(GuestAddress(new_base)), 616 len as GuestUsize, 617 Some(len), 618 ) 619 .ok_or_else(|| { 620 io::Error::new( 621 io::ErrorKind::Other, 622 "failed allocating new 32 bits MMIO range", 623 ) 624 })?; 625 } else { 626 // Find the specific allocator that this BAR was allocated from and use it for new one 627 for allocator in &self.pci_mmio_allocators { 628 let allocator_base = allocator.lock().unwrap().base(); 629 let allocator_end = allocator.lock().unwrap().end(); 630 631 if old_base >= allocator_base.0 && old_base <= allocator_end.0 { 632 allocator 633 .lock() 634 .unwrap() 635 .free(GuestAddress(old_base), len as GuestUsize); 636 637 allocator 638 .lock() 639 .unwrap() 640 .allocate( 641 Some(GuestAddress(new_base)), 642 len as GuestUsize, 643 Some(len), 644 ) 645 .ok_or_else(|| { 646 io::Error::new( 647 io::ErrorKind::Other, 648 "failed allocating new 64 bits MMIO range", 649 ) 650 })?; 651 652 break; 653 } 654 } 655 } 656 657 // Update MMIO bus 658 self.mmio_bus 659 .update_range(old_base, len, new_base, len) 660 .map_err(|e| io::Error::new(io::ErrorKind::Other, e))?; 661 } 662 } 663 664 // Update the device_tree resources associated with the device 665 if let Some(id) = pci_dev.id() { 666 if let Some(node) = self.device_tree.lock().unwrap().get_mut(&id) { 667 let mut resource_updated = false; 668 for resource in node.resources.iter_mut() { 669 if let Resource::PciBar { base, type_, .. } = resource { 670 if PciBarRegionType::from(*type_) == region_type && *base == old_base { 671 *base = new_base; 672 resource_updated = true; 673 break; 674 } 675 } 676 } 677 678 if !resource_updated { 679 return Err(io::Error::new( 680 io::ErrorKind::Other, 681 format!( 682 "Couldn't find a resource with base 0x{old_base:x} for device {id}" 683 ), 684 )); 685 } 686 } else { 687 return Err(io::Error::new( 688 io::ErrorKind::Other, 689 format!("Couldn't find device {id} from device tree"), 690 )); 691 } 692 } 693 694 let any_dev = pci_dev.as_any(); 695 if let Some(virtio_pci_dev) = any_dev.downcast_ref::<VirtioPciDevice>() { 696 let bar_addr = virtio_pci_dev.config_bar_addr(); 697 if bar_addr == new_base { 698 for (event, addr) in virtio_pci_dev.ioeventfds(old_base) { 699 let io_addr = IoEventAddress::Mmio(addr); 700 self.vm.unregister_ioevent(event, &io_addr).map_err(|e| { 701 io::Error::new( 702 io::ErrorKind::Other, 703 format!("failed to unregister ioevent: {e:?}"), 704 ) 705 })?; 706 } 707 for (event, addr) in virtio_pci_dev.ioeventfds(new_base) { 708 let io_addr = IoEventAddress::Mmio(addr); 709 self.vm 710 .register_ioevent(event, &io_addr, None) 711 .map_err(|e| { 712 io::Error::new( 713 io::ErrorKind::Other, 714 format!("failed to register ioevent: {e:?}"), 715 ) 716 })?; 717 } 718 } else { 719 let virtio_dev = virtio_pci_dev.virtio_device(); 720 let mut virtio_dev = virtio_dev.lock().unwrap(); 721 if let Some(mut shm_regions) = virtio_dev.get_shm_regions() { 722 if shm_regions.addr.raw_value() == old_base { 723 let mem_region = self.vm.make_user_memory_region( 724 shm_regions.mem_slot, 725 old_base, 726 shm_regions.len, 727 shm_regions.host_addr, 728 false, 729 false, 730 ); 731 732 self.vm.remove_user_memory_region(mem_region).map_err(|e| { 733 io::Error::new( 734 io::ErrorKind::Other, 735 format!("failed to remove user memory region: {e:?}"), 736 ) 737 })?; 738 739 // Create new mapping by inserting new region to KVM. 740 let mem_region = self.vm.make_user_memory_region( 741 shm_regions.mem_slot, 742 new_base, 743 shm_regions.len, 744 shm_regions.host_addr, 745 false, 746 false, 747 ); 748 749 self.vm.create_user_memory_region(mem_region).map_err(|e| { 750 io::Error::new( 751 io::ErrorKind::Other, 752 format!("failed to create user memory regions: {e:?}"), 753 ) 754 })?; 755 756 // Update shared memory regions to reflect the new mapping. 757 shm_regions.addr = GuestAddress(new_base); 758 virtio_dev.set_shm_regions(shm_regions).map_err(|e| { 759 io::Error::new( 760 io::ErrorKind::Other, 761 format!("failed to update shared memory regions: {e:?}"), 762 ) 763 })?; 764 } 765 } 766 } 767 } 768 769 pci_dev.move_bar(old_base, new_base) 770 } 771 } 772 773 #[derive(Serialize, Deserialize)] 774 struct DeviceManagerState { 775 device_tree: DeviceTree, 776 device_id_cnt: Wrapping<usize>, 777 } 778 779 #[derive(Debug)] 780 pub struct PtyPair { 781 pub main: File, 782 pub path: PathBuf, 783 } 784 785 impl Clone for PtyPair { 786 fn clone(&self) -> Self { 787 PtyPair { 788 main: self.main.try_clone().unwrap(), 789 path: self.path.clone(), 790 } 791 } 792 } 793 794 #[derive(Clone)] 795 pub enum PciDeviceHandle { 796 Vfio(Arc<Mutex<VfioPciDevice>>), 797 Virtio(Arc<Mutex<VirtioPciDevice>>), 798 VfioUser(Arc<Mutex<VfioUserPciDevice>>), 799 } 800 801 #[derive(Clone)] 802 struct MetaVirtioDevice { 803 virtio_device: Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 804 iommu: bool, 805 id: String, 806 pci_segment: u16, 807 dma_handler: Option<Arc<dyn ExternalDmaMapping>>, 808 } 809 810 #[derive(Default)] 811 pub struct AcpiPlatformAddresses { 812 pub pm_timer_address: Option<GenericAddress>, 813 pub reset_reg_address: Option<GenericAddress>, 814 pub sleep_control_reg_address: Option<GenericAddress>, 815 pub sleep_status_reg_address: Option<GenericAddress>, 816 } 817 818 pub struct DeviceManager { 819 // The underlying hypervisor 820 hypervisor_type: HypervisorType, 821 822 // Manage address space related to devices 823 address_manager: Arc<AddressManager>, 824 825 // Console abstraction 826 console: Arc<Console>, 827 828 // console PTY 829 console_pty: Option<Arc<Mutex<PtyPair>>>, 830 831 // serial PTY 832 serial_pty: Option<Arc<Mutex<PtyPair>>>, 833 834 // Serial Manager 835 serial_manager: Option<Arc<SerialManager>>, 836 837 // pty foreground status, 838 console_resize_pipe: Option<Arc<File>>, 839 840 // To restore on exit. 841 original_termios_opt: Arc<Mutex<Option<termios>>>, 842 843 // Interrupt controller 844 #[cfg(target_arch = "x86_64")] 845 interrupt_controller: Option<Arc<Mutex<ioapic::Ioapic>>>, 846 #[cfg(target_arch = "aarch64")] 847 interrupt_controller: Option<Arc<Mutex<gic::Gic>>>, 848 849 // Things to be added to the commandline (e.g. aarch64 early console) 850 #[cfg(target_arch = "aarch64")] 851 cmdline_additions: Vec<String>, 852 853 // ACPI GED notification device 854 ged_notification_device: Option<Arc<Mutex<devices::AcpiGedDevice>>>, 855 856 // VM configuration 857 config: Arc<Mutex<VmConfig>>, 858 859 // Memory Manager 860 memory_manager: Arc<Mutex<MemoryManager>>, 861 862 // CPU Manager 863 cpu_manager: Arc<Mutex<CpuManager>>, 864 865 // The virtio devices on the system 866 virtio_devices: Vec<MetaVirtioDevice>, 867 868 // List of bus devices 869 // Let the DeviceManager keep strong references to the BusDevice devices. 870 // This allows the IO and MMIO buses to be provided with Weak references, 871 // which prevents cyclic dependencies. 872 bus_devices: Vec<Arc<Mutex<dyn BusDevice>>>, 873 874 // Counter to keep track of the consumed device IDs. 875 device_id_cnt: Wrapping<usize>, 876 877 pci_segments: Vec<PciSegment>, 878 879 #[cfg_attr(target_arch = "aarch64", allow(dead_code))] 880 // MSI Interrupt Manager 881 msi_interrupt_manager: Arc<dyn InterruptManager<GroupConfig = MsiIrqGroupConfig>>, 882 883 #[cfg_attr(feature = "mshv", allow(dead_code))] 884 // Legacy Interrupt Manager 885 legacy_interrupt_manager: Option<Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>>, 886 887 // Passthrough device handle 888 passthrough_device: Option<VfioDeviceFd>, 889 890 // VFIO container 891 // Only one container can be created, therefore it is stored as part of the 892 // DeviceManager to be reused. 893 vfio_container: Option<Arc<VfioContainer>>, 894 895 // Paravirtualized IOMMU 896 iommu_device: Option<Arc<Mutex<virtio_devices::Iommu>>>, 897 iommu_mapping: Option<Arc<IommuMapping>>, 898 899 // PCI information about devices attached to the paravirtualized IOMMU 900 // It contains the virtual IOMMU PCI BDF along with the list of PCI BDF 901 // representing the devices attached to the virtual IOMMU. This is useful 902 // information for filling the ACPI VIOT table. 903 iommu_attached_devices: Option<(PciBdf, Vec<PciBdf>)>, 904 905 // Tree of devices, representing the dependencies between devices. 906 // Useful for introspection, snapshot and restore. 907 device_tree: Arc<Mutex<DeviceTree>>, 908 909 // Exit event 910 exit_evt: EventFd, 911 reset_evt: EventFd, 912 913 #[cfg(target_arch = "aarch64")] 914 id_to_dev_info: HashMap<(DeviceType, String), MmioDeviceInfo>, 915 916 // seccomp action 917 seccomp_action: SeccompAction, 918 919 // List of guest NUMA nodes. 920 numa_nodes: NumaNodes, 921 922 // Possible handle to the virtio-balloon device 923 balloon: Option<Arc<Mutex<virtio_devices::Balloon>>>, 924 925 // Virtio Device activation EventFd to allow the VMM thread to trigger device 926 // activation and thus start the threads from the VMM thread 927 activate_evt: EventFd, 928 929 acpi_address: GuestAddress, 930 931 selected_segment: usize, 932 933 // Possible handle to the virtio-mem device 934 virtio_mem_devices: Vec<Arc<Mutex<virtio_devices::Mem>>>, 935 936 #[cfg(target_arch = "aarch64")] 937 // GPIO device for AArch64 938 gpio_device: Option<Arc<Mutex<devices::legacy::Gpio>>>, 939 940 // pvpanic device 941 pvpanic_device: Option<Arc<Mutex<devices::PvPanicDevice>>>, 942 943 // Flag to force setting the iommu on virtio devices 944 force_iommu: bool, 945 946 // io_uring availability if detected 947 io_uring_supported: Option<bool>, 948 949 // List of unique identifiers provided at boot through the configuration. 950 boot_id_list: BTreeSet<String>, 951 952 // Start time of the VM 953 timestamp: Instant, 954 955 // Pending activations 956 pending_activations: Arc<Mutex<Vec<VirtioPciDeviceActivator>>>, 957 958 // Addresses for ACPI platform devices e.g. ACPI PM timer, sleep/reset registers 959 acpi_platform_addresses: AcpiPlatformAddresses, 960 961 snapshot: Option<Snapshot>, 962 } 963 964 impl DeviceManager { 965 #[allow(clippy::too_many_arguments)] 966 pub fn new( 967 #[cfg(target_arch = "x86_64")] io_bus: Arc<Bus>, 968 mmio_bus: Arc<Bus>, 969 hypervisor_type: HypervisorType, 970 vm: Arc<dyn hypervisor::Vm>, 971 config: Arc<Mutex<VmConfig>>, 972 memory_manager: Arc<Mutex<MemoryManager>>, 973 cpu_manager: Arc<Mutex<CpuManager>>, 974 exit_evt: EventFd, 975 reset_evt: EventFd, 976 seccomp_action: SeccompAction, 977 numa_nodes: NumaNodes, 978 activate_evt: &EventFd, 979 force_iommu: bool, 980 boot_id_list: BTreeSet<String>, 981 timestamp: Instant, 982 snapshot: Option<Snapshot>, 983 dynamic: bool, 984 ) -> DeviceManagerResult<Arc<Mutex<Self>>> { 985 trace_scoped!("DeviceManager::new"); 986 987 let (device_tree, device_id_cnt) = if let Some(snapshot) = snapshot.as_ref() { 988 let state: DeviceManagerState = snapshot.to_state().unwrap(); 989 ( 990 Arc::new(Mutex::new(state.device_tree.clone())), 991 state.device_id_cnt, 992 ) 993 } else { 994 (Arc::new(Mutex::new(DeviceTree::new())), Wrapping(0)) 995 }; 996 997 let num_pci_segments = 998 if let Some(platform_config) = config.lock().unwrap().platform.as_ref() { 999 platform_config.num_pci_segments 1000 } else { 1001 1 1002 }; 1003 1004 let start_of_device_area = memory_manager.lock().unwrap().start_of_device_area().0; 1005 let end_of_device_area = memory_manager.lock().unwrap().end_of_device_area().0; 1006 1007 // Start each PCI segment range on a 4GiB boundary 1008 let pci_segment_size = (end_of_device_area - start_of_device_area + 1) 1009 / ((4 << 30) * num_pci_segments as u64) 1010 * (4 << 30); 1011 1012 let mut pci_mmio_allocators = vec![]; 1013 for i in 0..num_pci_segments as u64 { 1014 let mmio_start = start_of_device_area + i * pci_segment_size; 1015 let allocator = Arc::new(Mutex::new( 1016 AddressAllocator::new(GuestAddress(mmio_start), pci_segment_size).unwrap(), 1017 )); 1018 pci_mmio_allocators.push(allocator) 1019 } 1020 1021 let address_manager = Arc::new(AddressManager { 1022 allocator: memory_manager.lock().unwrap().allocator(), 1023 #[cfg(target_arch = "x86_64")] 1024 io_bus, 1025 mmio_bus, 1026 vm: vm.clone(), 1027 device_tree: Arc::clone(&device_tree), 1028 pci_mmio_allocators, 1029 }); 1030 1031 // First we create the MSI interrupt manager, the legacy one is created 1032 // later, after the IOAPIC device creation. 1033 // The reason we create the MSI one first is because the IOAPIC needs it, 1034 // and then the legacy interrupt manager needs an IOAPIC. So we're 1035 // handling a linear dependency chain: 1036 // msi_interrupt_manager <- IOAPIC <- legacy_interrupt_manager. 1037 let msi_interrupt_manager: Arc<dyn InterruptManager<GroupConfig = MsiIrqGroupConfig>> = 1038 Arc::new(MsiInterruptManager::new( 1039 Arc::clone(&address_manager.allocator), 1040 vm, 1041 )); 1042 1043 let acpi_address = address_manager 1044 .allocator 1045 .lock() 1046 .unwrap() 1047 .allocate_platform_mmio_addresses(None, DEVICE_MANAGER_ACPI_SIZE as u64, None) 1048 .ok_or(DeviceManagerError::AllocateIoPort)?; 1049 1050 let mut pci_irq_slots = [0; 32]; 1051 PciSegment::reserve_legacy_interrupts_for_pci_devices( 1052 &address_manager, 1053 &mut pci_irq_slots, 1054 )?; 1055 1056 let mut pci_segments = vec![PciSegment::new_default_segment( 1057 &address_manager, 1058 Arc::clone(&address_manager.pci_mmio_allocators[0]), 1059 &pci_irq_slots, 1060 )?]; 1061 1062 for i in 1..num_pci_segments as usize { 1063 pci_segments.push(PciSegment::new( 1064 i as u16, 1065 &address_manager, 1066 Arc::clone(&address_manager.pci_mmio_allocators[i]), 1067 &pci_irq_slots, 1068 )?); 1069 } 1070 1071 if dynamic { 1072 let acpi_address = address_manager 1073 .allocator 1074 .lock() 1075 .unwrap() 1076 .allocate_platform_mmio_addresses(None, CPU_MANAGER_ACPI_SIZE as u64, None) 1077 .ok_or(DeviceManagerError::AllocateMmioAddress)?; 1078 1079 address_manager 1080 .mmio_bus 1081 .insert( 1082 cpu_manager.clone(), 1083 acpi_address.0, 1084 CPU_MANAGER_ACPI_SIZE as u64, 1085 ) 1086 .map_err(DeviceManagerError::BusError)?; 1087 1088 cpu_manager.lock().unwrap().set_acpi_address(acpi_address); 1089 } 1090 1091 let device_manager = DeviceManager { 1092 hypervisor_type, 1093 address_manager: Arc::clone(&address_manager), 1094 console: Arc::new(Console::default()), 1095 interrupt_controller: None, 1096 #[cfg(target_arch = "aarch64")] 1097 cmdline_additions: Vec::new(), 1098 ged_notification_device: None, 1099 config, 1100 memory_manager, 1101 cpu_manager, 1102 virtio_devices: Vec::new(), 1103 bus_devices: Vec::new(), 1104 device_id_cnt, 1105 msi_interrupt_manager, 1106 legacy_interrupt_manager: None, 1107 passthrough_device: None, 1108 vfio_container: None, 1109 iommu_device: None, 1110 iommu_mapping: None, 1111 iommu_attached_devices: None, 1112 pci_segments, 1113 device_tree, 1114 exit_evt, 1115 reset_evt, 1116 #[cfg(target_arch = "aarch64")] 1117 id_to_dev_info: HashMap::new(), 1118 seccomp_action, 1119 numa_nodes, 1120 balloon: None, 1121 activate_evt: activate_evt 1122 .try_clone() 1123 .map_err(DeviceManagerError::EventFd)?, 1124 acpi_address, 1125 selected_segment: 0, 1126 serial_pty: None, 1127 serial_manager: None, 1128 console_pty: None, 1129 console_resize_pipe: None, 1130 original_termios_opt: Arc::new(Mutex::new(None)), 1131 virtio_mem_devices: Vec::new(), 1132 #[cfg(target_arch = "aarch64")] 1133 gpio_device: None, 1134 pvpanic_device: None, 1135 force_iommu, 1136 io_uring_supported: None, 1137 boot_id_list, 1138 timestamp, 1139 pending_activations: Arc::new(Mutex::new(Vec::default())), 1140 acpi_platform_addresses: AcpiPlatformAddresses::default(), 1141 snapshot, 1142 }; 1143 1144 let device_manager = Arc::new(Mutex::new(device_manager)); 1145 1146 address_manager 1147 .mmio_bus 1148 .insert( 1149 Arc::clone(&device_manager) as Arc<Mutex<dyn BusDevice>>, 1150 acpi_address.0, 1151 DEVICE_MANAGER_ACPI_SIZE as u64, 1152 ) 1153 .map_err(DeviceManagerError::BusError)?; 1154 1155 Ok(device_manager) 1156 } 1157 1158 pub fn serial_pty(&self) -> Option<PtyPair> { 1159 self.serial_pty 1160 .as_ref() 1161 .map(|pty| pty.lock().unwrap().clone()) 1162 } 1163 1164 pub fn console_pty(&self) -> Option<PtyPair> { 1165 self.console_pty 1166 .as_ref() 1167 .map(|pty| pty.lock().unwrap().clone()) 1168 } 1169 1170 pub fn console_resize_pipe(&self) -> Option<Arc<File>> { 1171 self.console_resize_pipe.as_ref().map(Arc::clone) 1172 } 1173 1174 pub fn create_devices( 1175 &mut self, 1176 serial_pty: Option<PtyPair>, 1177 console_pty: Option<PtyPair>, 1178 console_resize_pipe: Option<File>, 1179 original_termios_opt: Arc<Mutex<Option<termios>>>, 1180 ) -> DeviceManagerResult<()> { 1181 trace_scoped!("create_devices"); 1182 1183 let mut virtio_devices: Vec<MetaVirtioDevice> = Vec::new(); 1184 1185 let interrupt_controller = self.add_interrupt_controller()?; 1186 1187 self.cpu_manager 1188 .lock() 1189 .unwrap() 1190 .set_interrupt_controller(interrupt_controller.clone()); 1191 1192 // Now we can create the legacy interrupt manager, which needs the freshly 1193 // formed IOAPIC device. 1194 let legacy_interrupt_manager: Arc< 1195 dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>, 1196 > = Arc::new(LegacyUserspaceInterruptManager::new(Arc::clone( 1197 &interrupt_controller, 1198 ))); 1199 1200 { 1201 if let Some(acpi_address) = self.memory_manager.lock().unwrap().acpi_address() { 1202 self.address_manager 1203 .mmio_bus 1204 .insert( 1205 Arc::clone(&self.memory_manager) as Arc<Mutex<dyn BusDevice>>, 1206 acpi_address.0, 1207 MEMORY_MANAGER_ACPI_SIZE as u64, 1208 ) 1209 .map_err(DeviceManagerError::BusError)?; 1210 } 1211 } 1212 1213 #[cfg(target_arch = "x86_64")] 1214 self.add_legacy_devices( 1215 self.reset_evt 1216 .try_clone() 1217 .map_err(DeviceManagerError::EventFd)?, 1218 )?; 1219 1220 #[cfg(target_arch = "aarch64")] 1221 self.add_legacy_devices(&legacy_interrupt_manager)?; 1222 1223 { 1224 self.ged_notification_device = self.add_acpi_devices( 1225 &legacy_interrupt_manager, 1226 self.reset_evt 1227 .try_clone() 1228 .map_err(DeviceManagerError::EventFd)?, 1229 self.exit_evt 1230 .try_clone() 1231 .map_err(DeviceManagerError::EventFd)?, 1232 )?; 1233 } 1234 1235 self.original_termios_opt = original_termios_opt; 1236 1237 self.console = self.add_console_device( 1238 &legacy_interrupt_manager, 1239 &mut virtio_devices, 1240 serial_pty, 1241 console_pty, 1242 console_resize_pipe, 1243 )?; 1244 1245 if let Some(tpm) = self.config.clone().lock().unwrap().tpm.as_ref() { 1246 let tpm_dev = self.add_tpm_device(tpm.socket.clone())?; 1247 self.bus_devices 1248 .push(Arc::clone(&tpm_dev) as Arc<Mutex<dyn BusDevice>>) 1249 } 1250 self.legacy_interrupt_manager = Some(legacy_interrupt_manager); 1251 1252 virtio_devices.append(&mut self.make_virtio_devices()?); 1253 1254 self.add_pci_devices(virtio_devices.clone())?; 1255 1256 self.virtio_devices = virtio_devices; 1257 1258 if self.config.clone().lock().unwrap().pvpanic { 1259 self.pvpanic_device = self.add_pvpanic_device()?; 1260 } 1261 1262 Ok(()) 1263 } 1264 1265 fn state(&self) -> DeviceManagerState { 1266 DeviceManagerState { 1267 device_tree: self.device_tree.lock().unwrap().clone(), 1268 device_id_cnt: self.device_id_cnt, 1269 } 1270 } 1271 1272 fn get_msi_iova_space(&mut self) -> (u64, u64) { 1273 #[cfg(target_arch = "aarch64")] 1274 { 1275 let vcpus = self.config.lock().unwrap().cpus.boot_vcpus; 1276 let vgic_config = gic::Gic::create_default_config(vcpus.into()); 1277 ( 1278 vgic_config.msi_addr, 1279 vgic_config.msi_addr + vgic_config.msi_size - 1, 1280 ) 1281 } 1282 #[cfg(target_arch = "x86_64")] 1283 (0xfee0_0000, 0xfeef_ffff) 1284 } 1285 1286 #[cfg(target_arch = "aarch64")] 1287 /// Gets the information of the devices registered up to some point in time. 1288 pub fn get_device_info(&self) -> &HashMap<(DeviceType, String), MmioDeviceInfo> { 1289 &self.id_to_dev_info 1290 } 1291 1292 #[allow(unused_variables)] 1293 fn add_pci_devices( 1294 &mut self, 1295 virtio_devices: Vec<MetaVirtioDevice>, 1296 ) -> DeviceManagerResult<()> { 1297 let iommu_id = String::from(IOMMU_DEVICE_NAME); 1298 1299 let iommu_device = if self.config.lock().unwrap().iommu { 1300 let (device, mapping) = virtio_devices::Iommu::new( 1301 iommu_id.clone(), 1302 self.seccomp_action.clone(), 1303 self.exit_evt 1304 .try_clone() 1305 .map_err(DeviceManagerError::EventFd)?, 1306 self.get_msi_iova_space(), 1307 versioned_state_from_id(self.snapshot.as_ref(), iommu_id.as_str()) 1308 .map_err(DeviceManagerError::RestoreGetState)?, 1309 ) 1310 .map_err(DeviceManagerError::CreateVirtioIommu)?; 1311 let device = Arc::new(Mutex::new(device)); 1312 self.iommu_device = Some(Arc::clone(&device)); 1313 self.iommu_mapping = Some(mapping); 1314 1315 // Fill the device tree with a new node. In case of restore, we 1316 // know there is nothing to do, so we can simply override the 1317 // existing entry. 1318 self.device_tree 1319 .lock() 1320 .unwrap() 1321 .insert(iommu_id.clone(), device_node!(iommu_id, device)); 1322 1323 Some(device) 1324 } else { 1325 None 1326 }; 1327 1328 let mut iommu_attached_devices = Vec::new(); 1329 { 1330 for handle in virtio_devices { 1331 let mapping: Option<Arc<IommuMapping>> = if handle.iommu { 1332 self.iommu_mapping.clone() 1333 } else { 1334 None 1335 }; 1336 1337 let dev_id = self.add_virtio_pci_device( 1338 handle.virtio_device, 1339 &mapping, 1340 handle.id, 1341 handle.pci_segment, 1342 handle.dma_handler, 1343 )?; 1344 1345 if handle.iommu { 1346 iommu_attached_devices.push(dev_id); 1347 } 1348 } 1349 1350 let mut vfio_iommu_device_ids = self.add_vfio_devices()?; 1351 iommu_attached_devices.append(&mut vfio_iommu_device_ids); 1352 1353 let mut vfio_user_iommu_device_ids = self.add_user_devices()?; 1354 iommu_attached_devices.append(&mut vfio_user_iommu_device_ids); 1355 1356 // Add all devices from forced iommu segments 1357 if let Some(platform_config) = self.config.lock().unwrap().platform.as_ref() { 1358 if let Some(iommu_segments) = platform_config.iommu_segments.as_ref() { 1359 for segment in iommu_segments { 1360 for device in 0..32 { 1361 let bdf = PciBdf::new(*segment, 0, device, 0); 1362 if !iommu_attached_devices.contains(&bdf) { 1363 iommu_attached_devices.push(bdf); 1364 } 1365 } 1366 } 1367 } 1368 } 1369 1370 if let Some(iommu_device) = iommu_device { 1371 let dev_id = self.add_virtio_pci_device(iommu_device, &None, iommu_id, 0, None)?; 1372 self.iommu_attached_devices = Some((dev_id, iommu_attached_devices)); 1373 } 1374 } 1375 1376 for segment in &self.pci_segments { 1377 #[cfg(target_arch = "x86_64")] 1378 if let Some(pci_config_io) = segment.pci_config_io.as_ref() { 1379 self.bus_devices 1380 .push(Arc::clone(pci_config_io) as Arc<Mutex<dyn BusDevice>>); 1381 } 1382 1383 self.bus_devices 1384 .push(Arc::clone(&segment.pci_config_mmio) as Arc<Mutex<dyn BusDevice>>); 1385 } 1386 1387 Ok(()) 1388 } 1389 1390 #[cfg(target_arch = "aarch64")] 1391 fn add_interrupt_controller( 1392 &mut self, 1393 ) -> DeviceManagerResult<Arc<Mutex<dyn InterruptController>>> { 1394 let interrupt_controller: Arc<Mutex<gic::Gic>> = Arc::new(Mutex::new( 1395 gic::Gic::new( 1396 self.config.lock().unwrap().cpus.boot_vcpus, 1397 Arc::clone(&self.msi_interrupt_manager), 1398 self.address_manager.vm.clone(), 1399 ) 1400 .map_err(DeviceManagerError::CreateInterruptController)?, 1401 )); 1402 1403 self.interrupt_controller = Some(interrupt_controller.clone()); 1404 1405 // Restore the vGic if this is in the process of restoration 1406 let id = String::from(gic::GIC_SNAPSHOT_ID); 1407 if let Some(vgic_snapshot) = snapshot_from_id(self.snapshot.as_ref(), &id) { 1408 // PMU support is optional. Nothing should be impacted if the PMU initialization failed. 1409 if self 1410 .cpu_manager 1411 .lock() 1412 .unwrap() 1413 .init_pmu(arch::aarch64::fdt::AARCH64_PMU_IRQ + 16) 1414 .is_err() 1415 { 1416 info!("Failed to initialize PMU"); 1417 } 1418 1419 let vgic_state = vgic_snapshot 1420 .to_state() 1421 .map_err(DeviceManagerError::RestoreGetState)?; 1422 let saved_vcpu_states = self.cpu_manager.lock().unwrap().get_saved_states(); 1423 interrupt_controller 1424 .lock() 1425 .unwrap() 1426 .restore_vgic(vgic_state, &saved_vcpu_states) 1427 .unwrap(); 1428 } 1429 1430 self.device_tree 1431 .lock() 1432 .unwrap() 1433 .insert(id.clone(), device_node!(id, interrupt_controller)); 1434 1435 Ok(interrupt_controller) 1436 } 1437 1438 #[cfg(target_arch = "aarch64")] 1439 pub fn get_interrupt_controller(&mut self) -> Option<&Arc<Mutex<gic::Gic>>> { 1440 self.interrupt_controller.as_ref() 1441 } 1442 1443 #[cfg(target_arch = "x86_64")] 1444 fn add_interrupt_controller( 1445 &mut self, 1446 ) -> DeviceManagerResult<Arc<Mutex<dyn InterruptController>>> { 1447 let id = String::from(IOAPIC_DEVICE_NAME); 1448 1449 // Create IOAPIC 1450 let interrupt_controller = Arc::new(Mutex::new( 1451 ioapic::Ioapic::new( 1452 id.clone(), 1453 APIC_START, 1454 Arc::clone(&self.msi_interrupt_manager), 1455 versioned_state_from_id(self.snapshot.as_ref(), id.as_str()) 1456 .map_err(DeviceManagerError::RestoreGetState)?, 1457 ) 1458 .map_err(DeviceManagerError::CreateInterruptController)?, 1459 )); 1460 1461 self.interrupt_controller = Some(interrupt_controller.clone()); 1462 1463 self.address_manager 1464 .mmio_bus 1465 .insert(interrupt_controller.clone(), IOAPIC_START.0, IOAPIC_SIZE) 1466 .map_err(DeviceManagerError::BusError)?; 1467 1468 self.bus_devices 1469 .push(Arc::clone(&interrupt_controller) as Arc<Mutex<dyn BusDevice>>); 1470 1471 // Fill the device tree with a new node. In case of restore, we 1472 // know there is nothing to do, so we can simply override the 1473 // existing entry. 1474 self.device_tree 1475 .lock() 1476 .unwrap() 1477 .insert(id.clone(), device_node!(id, interrupt_controller)); 1478 1479 Ok(interrupt_controller) 1480 } 1481 1482 fn add_acpi_devices( 1483 &mut self, 1484 interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>, 1485 reset_evt: EventFd, 1486 exit_evt: EventFd, 1487 ) -> DeviceManagerResult<Option<Arc<Mutex<devices::AcpiGedDevice>>>> { 1488 let vcpus_kill_signalled = self 1489 .cpu_manager 1490 .lock() 1491 .unwrap() 1492 .vcpus_kill_signalled() 1493 .clone(); 1494 let shutdown_device = Arc::new(Mutex::new(devices::AcpiShutdownDevice::new( 1495 exit_evt, 1496 reset_evt, 1497 vcpus_kill_signalled, 1498 ))); 1499 1500 self.bus_devices 1501 .push(Arc::clone(&shutdown_device) as Arc<Mutex<dyn BusDevice>>); 1502 1503 #[cfg(target_arch = "x86_64")] 1504 { 1505 let shutdown_pio_address: u16 = 0x600; 1506 1507 self.address_manager 1508 .allocator 1509 .lock() 1510 .unwrap() 1511 .allocate_io_addresses(Some(GuestAddress(shutdown_pio_address.into())), 0x8, None) 1512 .ok_or(DeviceManagerError::AllocateIoPort)?; 1513 1514 self.address_manager 1515 .io_bus 1516 .insert(shutdown_device, shutdown_pio_address.into(), 0x4) 1517 .map_err(DeviceManagerError::BusError)?; 1518 1519 self.acpi_platform_addresses.sleep_control_reg_address = 1520 Some(GenericAddress::io_port_address::<u8>(shutdown_pio_address)); 1521 self.acpi_platform_addresses.sleep_status_reg_address = 1522 Some(GenericAddress::io_port_address::<u8>(shutdown_pio_address)); 1523 self.acpi_platform_addresses.reset_reg_address = 1524 Some(GenericAddress::io_port_address::<u8>(shutdown_pio_address)); 1525 } 1526 1527 let ged_irq = self 1528 .address_manager 1529 .allocator 1530 .lock() 1531 .unwrap() 1532 .allocate_irq() 1533 .unwrap(); 1534 let interrupt_group = interrupt_manager 1535 .create_group(LegacyIrqGroupConfig { 1536 irq: ged_irq as InterruptIndex, 1537 }) 1538 .map_err(DeviceManagerError::CreateInterruptGroup)?; 1539 let ged_address = self 1540 .address_manager 1541 .allocator 1542 .lock() 1543 .unwrap() 1544 .allocate_platform_mmio_addresses( 1545 None, 1546 devices::acpi::GED_DEVICE_ACPI_SIZE as u64, 1547 None, 1548 ) 1549 .ok_or(DeviceManagerError::AllocateMmioAddress)?; 1550 let ged_device = Arc::new(Mutex::new(devices::AcpiGedDevice::new( 1551 interrupt_group, 1552 ged_irq, 1553 ged_address, 1554 ))); 1555 self.address_manager 1556 .mmio_bus 1557 .insert( 1558 ged_device.clone(), 1559 ged_address.0, 1560 devices::acpi::GED_DEVICE_ACPI_SIZE as u64, 1561 ) 1562 .map_err(DeviceManagerError::BusError)?; 1563 self.bus_devices 1564 .push(Arc::clone(&ged_device) as Arc<Mutex<dyn BusDevice>>); 1565 1566 let pm_timer_device = Arc::new(Mutex::new(devices::AcpiPmTimerDevice::new())); 1567 1568 self.bus_devices 1569 .push(Arc::clone(&pm_timer_device) as Arc<Mutex<dyn BusDevice>>); 1570 1571 #[cfg(target_arch = "x86_64")] 1572 { 1573 let pm_timer_pio_address: u16 = 0x608; 1574 1575 self.address_manager 1576 .allocator 1577 .lock() 1578 .unwrap() 1579 .allocate_io_addresses(Some(GuestAddress(pm_timer_pio_address.into())), 0x4, None) 1580 .ok_or(DeviceManagerError::AllocateIoPort)?; 1581 1582 self.address_manager 1583 .io_bus 1584 .insert(pm_timer_device, pm_timer_pio_address.into(), 0x4) 1585 .map_err(DeviceManagerError::BusError)?; 1586 1587 self.acpi_platform_addresses.pm_timer_address = 1588 Some(GenericAddress::io_port_address::<u32>(pm_timer_pio_address)); 1589 } 1590 1591 Ok(Some(ged_device)) 1592 } 1593 1594 #[cfg(target_arch = "x86_64")] 1595 fn add_legacy_devices(&mut self, reset_evt: EventFd) -> DeviceManagerResult<()> { 1596 let vcpus_kill_signalled = self 1597 .cpu_manager 1598 .lock() 1599 .unwrap() 1600 .vcpus_kill_signalled() 1601 .clone(); 1602 // Add a shutdown device (i8042) 1603 let i8042 = Arc::new(Mutex::new(devices::legacy::I8042Device::new( 1604 reset_evt.try_clone().unwrap(), 1605 vcpus_kill_signalled.clone(), 1606 ))); 1607 1608 self.bus_devices 1609 .push(Arc::clone(&i8042) as Arc<Mutex<dyn BusDevice>>); 1610 1611 self.address_manager 1612 .io_bus 1613 .insert(i8042, 0x61, 0x4) 1614 .map_err(DeviceManagerError::BusError)?; 1615 { 1616 // Add a CMOS emulated device 1617 let mem_size = self 1618 .memory_manager 1619 .lock() 1620 .unwrap() 1621 .guest_memory() 1622 .memory() 1623 .last_addr() 1624 .0 1625 + 1; 1626 let mem_below_4g = std::cmp::min(arch::layout::MEM_32BIT_RESERVED_START.0, mem_size); 1627 let mem_above_4g = mem_size.saturating_sub(arch::layout::RAM_64BIT_START.0); 1628 1629 let cmos = Arc::new(Mutex::new(devices::legacy::Cmos::new( 1630 mem_below_4g, 1631 mem_above_4g, 1632 reset_evt, 1633 Some(vcpus_kill_signalled), 1634 ))); 1635 1636 self.bus_devices 1637 .push(Arc::clone(&cmos) as Arc<Mutex<dyn BusDevice>>); 1638 1639 self.address_manager 1640 .io_bus 1641 .insert(cmos, 0x70, 0x2) 1642 .map_err(DeviceManagerError::BusError)?; 1643 1644 let fwdebug = Arc::new(Mutex::new(devices::legacy::FwDebugDevice::new())); 1645 1646 self.bus_devices 1647 .push(Arc::clone(&fwdebug) as Arc<Mutex<dyn BusDevice>>); 1648 1649 self.address_manager 1650 .io_bus 1651 .insert(fwdebug, 0x402, 0x1) 1652 .map_err(DeviceManagerError::BusError)?; 1653 } 1654 1655 // 0x80 debug port 1656 let debug_port = Arc::new(Mutex::new(devices::legacy::DebugPort::new(self.timestamp))); 1657 self.bus_devices 1658 .push(Arc::clone(&debug_port) as Arc<Mutex<dyn BusDevice>>); 1659 self.address_manager 1660 .io_bus 1661 .insert(debug_port, 0x80, 0x1) 1662 .map_err(DeviceManagerError::BusError)?; 1663 1664 Ok(()) 1665 } 1666 1667 #[cfg(target_arch = "aarch64")] 1668 fn add_legacy_devices( 1669 &mut self, 1670 interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>, 1671 ) -> DeviceManagerResult<()> { 1672 // Add a RTC device 1673 let rtc_irq = self 1674 .address_manager 1675 .allocator 1676 .lock() 1677 .unwrap() 1678 .allocate_irq() 1679 .unwrap(); 1680 1681 let interrupt_group = interrupt_manager 1682 .create_group(LegacyIrqGroupConfig { 1683 irq: rtc_irq as InterruptIndex, 1684 }) 1685 .map_err(DeviceManagerError::CreateInterruptGroup)?; 1686 1687 let rtc_device = Arc::new(Mutex::new(devices::legacy::Rtc::new(interrupt_group))); 1688 1689 self.bus_devices 1690 .push(Arc::clone(&rtc_device) as Arc<Mutex<dyn BusDevice>>); 1691 1692 let addr = arch::layout::LEGACY_RTC_MAPPED_IO_START; 1693 1694 self.address_manager 1695 .mmio_bus 1696 .insert(rtc_device, addr.0, MMIO_LEN) 1697 .map_err(DeviceManagerError::BusError)?; 1698 1699 self.id_to_dev_info.insert( 1700 (DeviceType::Rtc, "rtc".to_string()), 1701 MmioDeviceInfo { 1702 addr: addr.0, 1703 len: MMIO_LEN, 1704 irq: rtc_irq, 1705 }, 1706 ); 1707 1708 // Add a GPIO device 1709 let id = String::from(GPIO_DEVICE_NAME); 1710 let gpio_irq = self 1711 .address_manager 1712 .allocator 1713 .lock() 1714 .unwrap() 1715 .allocate_irq() 1716 .unwrap(); 1717 1718 let interrupt_group = interrupt_manager 1719 .create_group(LegacyIrqGroupConfig { 1720 irq: gpio_irq as InterruptIndex, 1721 }) 1722 .map_err(DeviceManagerError::CreateInterruptGroup)?; 1723 1724 let gpio_device = Arc::new(Mutex::new(devices::legacy::Gpio::new( 1725 id.clone(), 1726 interrupt_group, 1727 versioned_state_from_id(self.snapshot.as_ref(), id.as_str()) 1728 .map_err(DeviceManagerError::RestoreGetState)?, 1729 ))); 1730 1731 self.bus_devices 1732 .push(Arc::clone(&gpio_device) as Arc<Mutex<dyn BusDevice>>); 1733 1734 let addr = arch::layout::LEGACY_GPIO_MAPPED_IO_START; 1735 1736 self.address_manager 1737 .mmio_bus 1738 .insert(gpio_device.clone(), addr.0, MMIO_LEN) 1739 .map_err(DeviceManagerError::BusError)?; 1740 1741 self.gpio_device = Some(gpio_device.clone()); 1742 1743 self.id_to_dev_info.insert( 1744 (DeviceType::Gpio, "gpio".to_string()), 1745 MmioDeviceInfo { 1746 addr: addr.0, 1747 len: MMIO_LEN, 1748 irq: gpio_irq, 1749 }, 1750 ); 1751 1752 self.device_tree 1753 .lock() 1754 .unwrap() 1755 .insert(id.clone(), device_node!(id, gpio_device)); 1756 1757 Ok(()) 1758 } 1759 1760 #[cfg(target_arch = "x86_64")] 1761 fn add_serial_device( 1762 &mut self, 1763 interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>, 1764 serial_writer: Option<Box<dyn io::Write + Send>>, 1765 ) -> DeviceManagerResult<Arc<Mutex<Serial>>> { 1766 // Serial is tied to IRQ #4 1767 let serial_irq = 4; 1768 1769 let id = String::from(SERIAL_DEVICE_NAME); 1770 1771 let interrupt_group = interrupt_manager 1772 .create_group(LegacyIrqGroupConfig { 1773 irq: serial_irq as InterruptIndex, 1774 }) 1775 .map_err(DeviceManagerError::CreateInterruptGroup)?; 1776 1777 let serial = Arc::new(Mutex::new(Serial::new( 1778 id.clone(), 1779 interrupt_group, 1780 serial_writer, 1781 versioned_state_from_id(self.snapshot.as_ref(), id.as_str()) 1782 .map_err(DeviceManagerError::RestoreGetState)?, 1783 ))); 1784 1785 self.bus_devices 1786 .push(Arc::clone(&serial) as Arc<Mutex<dyn BusDevice>>); 1787 1788 self.address_manager 1789 .allocator 1790 .lock() 1791 .unwrap() 1792 .allocate_io_addresses(Some(GuestAddress(0x3f8)), 0x8, None) 1793 .ok_or(DeviceManagerError::AllocateIoPort)?; 1794 1795 self.address_manager 1796 .io_bus 1797 .insert(serial.clone(), 0x3f8, 0x8) 1798 .map_err(DeviceManagerError::BusError)?; 1799 1800 // Fill the device tree with a new node. In case of restore, we 1801 // know there is nothing to do, so we can simply override the 1802 // existing entry. 1803 self.device_tree 1804 .lock() 1805 .unwrap() 1806 .insert(id.clone(), device_node!(id, serial)); 1807 1808 Ok(serial) 1809 } 1810 1811 #[cfg(target_arch = "aarch64")] 1812 fn add_serial_device( 1813 &mut self, 1814 interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>, 1815 serial_writer: Option<Box<dyn io::Write + Send>>, 1816 ) -> DeviceManagerResult<Arc<Mutex<Pl011>>> { 1817 let id = String::from(SERIAL_DEVICE_NAME); 1818 1819 let serial_irq = self 1820 .address_manager 1821 .allocator 1822 .lock() 1823 .unwrap() 1824 .allocate_irq() 1825 .unwrap(); 1826 1827 let interrupt_group = interrupt_manager 1828 .create_group(LegacyIrqGroupConfig { 1829 irq: serial_irq as InterruptIndex, 1830 }) 1831 .map_err(DeviceManagerError::CreateInterruptGroup)?; 1832 1833 let serial = Arc::new(Mutex::new(devices::legacy::Pl011::new( 1834 id.clone(), 1835 interrupt_group, 1836 serial_writer, 1837 self.timestamp, 1838 versioned_state_from_id(self.snapshot.as_ref(), id.as_str()) 1839 .map_err(DeviceManagerError::RestoreGetState)?, 1840 ))); 1841 1842 self.bus_devices 1843 .push(Arc::clone(&serial) as Arc<Mutex<dyn BusDevice>>); 1844 1845 let addr = arch::layout::LEGACY_SERIAL_MAPPED_IO_START; 1846 1847 self.address_manager 1848 .mmio_bus 1849 .insert(serial.clone(), addr.0, MMIO_LEN) 1850 .map_err(DeviceManagerError::BusError)?; 1851 1852 self.id_to_dev_info.insert( 1853 (DeviceType::Serial, DeviceType::Serial.to_string()), 1854 MmioDeviceInfo { 1855 addr: addr.0, 1856 len: MMIO_LEN, 1857 irq: serial_irq, 1858 }, 1859 ); 1860 1861 self.cmdline_additions 1862 .push(format!("earlycon=pl011,mmio,0x{:08x}", addr.0)); 1863 1864 // Fill the device tree with a new node. In case of restore, we 1865 // know there is nothing to do, so we can simply override the 1866 // existing entry. 1867 self.device_tree 1868 .lock() 1869 .unwrap() 1870 .insert(id.clone(), device_node!(id, serial)); 1871 1872 Ok(serial) 1873 } 1874 1875 fn modify_mode<F: FnOnce(&mut termios)>( 1876 &mut self, 1877 fd: RawFd, 1878 f: F, 1879 ) -> vmm_sys_util::errno::Result<()> { 1880 // SAFETY: safe because we check the return value of isatty. 1881 if unsafe { isatty(fd) } != 1 { 1882 return Ok(()); 1883 } 1884 1885 // SAFETY: The following pair are safe because termios gets totally overwritten by tcgetattr 1886 // and we check the return result. 1887 let mut termios: termios = unsafe { zeroed() }; 1888 // SAFETY: see above 1889 let ret = unsafe { tcgetattr(fd, &mut termios as *mut _) }; 1890 if ret < 0 { 1891 return vmm_sys_util::errno::errno_result(); 1892 } 1893 let mut original_termios_opt = self.original_termios_opt.lock().unwrap(); 1894 if original_termios_opt.is_none() { 1895 *original_termios_opt = Some(termios); 1896 } 1897 f(&mut termios); 1898 // SAFETY: Safe because the syscall will only read the extent of termios and we check 1899 // the return result. 1900 let ret = unsafe { tcsetattr(fd, TCSANOW, &termios as *const _) }; 1901 if ret < 0 { 1902 return vmm_sys_util::errno::errno_result(); 1903 } 1904 1905 Ok(()) 1906 } 1907 1908 fn set_raw_mode(&mut self, f: &mut dyn AsRawFd) -> vmm_sys_util::errno::Result<()> { 1909 // SAFETY: FFI call. Variable t is guaranteed to be a valid termios from modify_mode. 1910 self.modify_mode(f.as_raw_fd(), |t| unsafe { cfmakeraw(t) }) 1911 } 1912 1913 fn listen_for_sigwinch_on_tty(&mut self, pty_sub: File) -> std::io::Result<()> { 1914 let seccomp_filter = get_seccomp_filter( 1915 &self.seccomp_action, 1916 Thread::PtyForeground, 1917 self.hypervisor_type, 1918 ) 1919 .unwrap(); 1920 1921 self.console_resize_pipe = 1922 Some(Arc::new(start_sigwinch_listener(seccomp_filter, pty_sub)?)); 1923 1924 Ok(()) 1925 } 1926 1927 fn add_virtio_console_device( 1928 &mut self, 1929 virtio_devices: &mut Vec<MetaVirtioDevice>, 1930 console_pty: Option<PtyPair>, 1931 resize_pipe: Option<File>, 1932 ) -> DeviceManagerResult<Option<Arc<virtio_devices::ConsoleResizer>>> { 1933 let console_config = self.config.lock().unwrap().console.clone(); 1934 let endpoint = match console_config.mode { 1935 ConsoleOutputMode::File => { 1936 let file = File::create(console_config.file.as_ref().unwrap()) 1937 .map_err(DeviceManagerError::ConsoleOutputFileOpen)?; 1938 Endpoint::File(file) 1939 } 1940 ConsoleOutputMode::Pty => { 1941 if let Some(pty) = console_pty { 1942 self.config.lock().unwrap().console.file = Some(pty.path.clone()); 1943 let file = pty.main.try_clone().unwrap(); 1944 self.console_pty = Some(Arc::new(Mutex::new(pty))); 1945 self.console_resize_pipe = resize_pipe.map(Arc::new); 1946 Endpoint::PtyPair(file.try_clone().unwrap(), file) 1947 } else { 1948 let (main, mut sub, path) = 1949 create_pty().map_err(DeviceManagerError::ConsolePtyOpen)?; 1950 self.set_raw_mode(&mut sub) 1951 .map_err(DeviceManagerError::SetPtyRaw)?; 1952 self.config.lock().unwrap().console.file = Some(path.clone()); 1953 let file = main.try_clone().unwrap(); 1954 assert!(resize_pipe.is_none()); 1955 self.listen_for_sigwinch_on_tty(sub).unwrap(); 1956 self.console_pty = Some(Arc::new(Mutex::new(PtyPair { main, path }))); 1957 Endpoint::PtyPair(file.try_clone().unwrap(), file) 1958 } 1959 } 1960 ConsoleOutputMode::Tty => { 1961 // Duplicating the file descriptors like this is needed as otherwise 1962 // they will be closed on a reboot and the numbers reused 1963 1964 // SAFETY: FFI call to dup. Trivially safe. 1965 let stdout = unsafe { libc::dup(libc::STDOUT_FILENO) }; 1966 if stdout == -1 { 1967 return vmm_sys_util::errno::errno_result().map_err(DeviceManagerError::DupFd); 1968 } 1969 // SAFETY: stdout is valid and owned solely by us. 1970 let mut stdout = unsafe { File::from_raw_fd(stdout) }; 1971 1972 // Make sure stdout is in raw mode, if it's a terminal. 1973 let _ = self.set_raw_mode(&mut stdout); 1974 1975 // SAFETY: FFI call. Trivially safe. 1976 if unsafe { libc::isatty(libc::STDOUT_FILENO) } == 1 { 1977 self.listen_for_sigwinch_on_tty(stdout.try_clone().unwrap()) 1978 .unwrap(); 1979 } 1980 1981 // If an interactive TTY then we can accept input 1982 // SAFETY: FFI call. Trivially safe. 1983 if unsafe { libc::isatty(libc::STDIN_FILENO) == 1 } { 1984 // SAFETY: FFI call to dup. Trivially safe. 1985 let stdin = unsafe { libc::dup(libc::STDIN_FILENO) }; 1986 if stdin == -1 { 1987 return vmm_sys_util::errno::errno_result() 1988 .map_err(DeviceManagerError::DupFd); 1989 } 1990 // SAFETY: stdin is valid and owned solely by us. 1991 let stdin = unsafe { File::from_raw_fd(stdin) }; 1992 1993 Endpoint::FilePair(stdout, stdin) 1994 } else { 1995 Endpoint::File(stdout) 1996 } 1997 } 1998 ConsoleOutputMode::Null => Endpoint::Null, 1999 ConsoleOutputMode::Off => return Ok(None), 2000 }; 2001 let id = String::from(CONSOLE_DEVICE_NAME); 2002 2003 let (virtio_console_device, console_resizer) = virtio_devices::Console::new( 2004 id.clone(), 2005 endpoint, 2006 self.console_resize_pipe 2007 .as_ref() 2008 .map(|p| p.try_clone().unwrap()), 2009 self.force_iommu | console_config.iommu, 2010 self.seccomp_action.clone(), 2011 self.exit_evt 2012 .try_clone() 2013 .map_err(DeviceManagerError::EventFd)?, 2014 versioned_state_from_id(self.snapshot.as_ref(), id.as_str()) 2015 .map_err(DeviceManagerError::RestoreGetState)?, 2016 ) 2017 .map_err(DeviceManagerError::CreateVirtioConsole)?; 2018 let virtio_console_device = Arc::new(Mutex::new(virtio_console_device)); 2019 virtio_devices.push(MetaVirtioDevice { 2020 virtio_device: Arc::clone(&virtio_console_device) 2021 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2022 iommu: console_config.iommu, 2023 id: id.clone(), 2024 pci_segment: 0, 2025 dma_handler: None, 2026 }); 2027 2028 // Fill the device tree with a new node. In case of restore, we 2029 // know there is nothing to do, so we can simply override the 2030 // existing entry. 2031 self.device_tree 2032 .lock() 2033 .unwrap() 2034 .insert(id.clone(), device_node!(id, virtio_console_device)); 2035 2036 // Only provide a resizer (for SIGWINCH handling) if the console is attached to the TTY 2037 Ok(if matches!(console_config.mode, ConsoleOutputMode::Tty) { 2038 Some(console_resizer) 2039 } else { 2040 None 2041 }) 2042 } 2043 2044 fn add_console_device( 2045 &mut self, 2046 interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>, 2047 virtio_devices: &mut Vec<MetaVirtioDevice>, 2048 serial_pty: Option<PtyPair>, 2049 console_pty: Option<PtyPair>, 2050 console_resize_pipe: Option<File>, 2051 ) -> DeviceManagerResult<Arc<Console>> { 2052 let serial_config = self.config.lock().unwrap().serial.clone(); 2053 let serial_writer: Option<Box<dyn io::Write + Send>> = match serial_config.mode { 2054 ConsoleOutputMode::File => Some(Box::new( 2055 File::create(serial_config.file.as_ref().unwrap()) 2056 .map_err(DeviceManagerError::SerialOutputFileOpen)?, 2057 )), 2058 ConsoleOutputMode::Pty => { 2059 if let Some(pty) = serial_pty { 2060 self.config.lock().unwrap().serial.file = Some(pty.path.clone()); 2061 self.serial_pty = Some(Arc::new(Mutex::new(pty))); 2062 } else { 2063 let (main, mut sub, path) = 2064 create_pty().map_err(DeviceManagerError::SerialPtyOpen)?; 2065 self.set_raw_mode(&mut sub) 2066 .map_err(DeviceManagerError::SetPtyRaw)?; 2067 self.config.lock().unwrap().serial.file = Some(path.clone()); 2068 self.serial_pty = Some(Arc::new(Mutex::new(PtyPair { main, path }))); 2069 } 2070 None 2071 } 2072 ConsoleOutputMode::Tty => { 2073 let mut out = stdout(); 2074 let _ = self.set_raw_mode(&mut out); 2075 Some(Box::new(out)) 2076 } 2077 ConsoleOutputMode::Off | ConsoleOutputMode::Null => None, 2078 }; 2079 if serial_config.mode != ConsoleOutputMode::Off { 2080 let serial = self.add_serial_device(interrupt_manager, serial_writer)?; 2081 self.serial_manager = match serial_config.mode { 2082 ConsoleOutputMode::Pty | ConsoleOutputMode::Tty => { 2083 let serial_manager = 2084 SerialManager::new(serial, self.serial_pty.clone(), serial_config.mode) 2085 .map_err(DeviceManagerError::CreateSerialManager)?; 2086 if let Some(mut serial_manager) = serial_manager { 2087 serial_manager 2088 .start_thread( 2089 self.exit_evt 2090 .try_clone() 2091 .map_err(DeviceManagerError::EventFd)?, 2092 ) 2093 .map_err(DeviceManagerError::SpawnSerialManager)?; 2094 Some(Arc::new(serial_manager)) 2095 } else { 2096 None 2097 } 2098 } 2099 _ => None, 2100 }; 2101 } 2102 2103 let console_resizer = 2104 self.add_virtio_console_device(virtio_devices, console_pty, console_resize_pipe)?; 2105 2106 Ok(Arc::new(Console { console_resizer })) 2107 } 2108 2109 fn add_tpm_device( 2110 &mut self, 2111 tpm_path: PathBuf, 2112 ) -> DeviceManagerResult<Arc<Mutex<devices::tpm::Tpm>>> { 2113 // Create TPM Device 2114 let tpm = devices::tpm::Tpm::new(tpm_path.to_str().unwrap().to_string()).map_err(|e| { 2115 DeviceManagerError::CreateTpmDevice(anyhow!("Failed to create TPM Device : {:?}", e)) 2116 })?; 2117 let tpm = Arc::new(Mutex::new(tpm)); 2118 2119 // Add TPM Device to mmio 2120 self.address_manager 2121 .mmio_bus 2122 .insert( 2123 tpm.clone(), 2124 arch::layout::TPM_START.0, 2125 arch::layout::TPM_SIZE, 2126 ) 2127 .map_err(DeviceManagerError::BusError)?; 2128 2129 Ok(tpm) 2130 } 2131 2132 fn make_virtio_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2133 let mut devices: Vec<MetaVirtioDevice> = Vec::new(); 2134 2135 // Create "standard" virtio devices (net/block/rng) 2136 devices.append(&mut self.make_virtio_block_devices()?); 2137 devices.append(&mut self.make_virtio_net_devices()?); 2138 devices.append(&mut self.make_virtio_rng_devices()?); 2139 2140 // Add virtio-fs if required 2141 devices.append(&mut self.make_virtio_fs_devices()?); 2142 2143 // Add virtio-pmem if required 2144 devices.append(&mut self.make_virtio_pmem_devices()?); 2145 2146 // Add virtio-vsock if required 2147 devices.append(&mut self.make_virtio_vsock_devices()?); 2148 2149 devices.append(&mut self.make_virtio_mem_devices()?); 2150 2151 // Add virtio-balloon if required 2152 devices.append(&mut self.make_virtio_balloon_devices()?); 2153 2154 // Add virtio-watchdog device 2155 devices.append(&mut self.make_virtio_watchdog_devices()?); 2156 2157 // Add vDPA devices if required 2158 devices.append(&mut self.make_vdpa_devices()?); 2159 2160 Ok(devices) 2161 } 2162 2163 // Cache whether io_uring is supported to avoid probing for very block device 2164 fn io_uring_is_supported(&mut self) -> bool { 2165 if let Some(supported) = self.io_uring_supported { 2166 return supported; 2167 } 2168 2169 let supported = block_io_uring_is_supported(); 2170 self.io_uring_supported = Some(supported); 2171 supported 2172 } 2173 2174 fn make_virtio_block_device( 2175 &mut self, 2176 disk_cfg: &mut DiskConfig, 2177 ) -> DeviceManagerResult<MetaVirtioDevice> { 2178 let id = if let Some(id) = &disk_cfg.id { 2179 id.clone() 2180 } else { 2181 let id = self.next_device_name(DISK_DEVICE_NAME_PREFIX)?; 2182 disk_cfg.id = Some(id.clone()); 2183 id 2184 }; 2185 2186 info!("Creating virtio-block device: {:?}", disk_cfg); 2187 2188 let snapshot = snapshot_from_id(self.snapshot.as_ref(), id.as_str()); 2189 2190 let (virtio_device, migratable_device) = if disk_cfg.vhost_user { 2191 let socket = disk_cfg.vhost_socket.as_ref().unwrap().clone(); 2192 let vu_cfg = VhostUserConfig { 2193 socket, 2194 num_queues: disk_cfg.num_queues, 2195 queue_size: disk_cfg.queue_size, 2196 }; 2197 let vhost_user_block = Arc::new(Mutex::new( 2198 match virtio_devices::vhost_user::Blk::new( 2199 id.clone(), 2200 vu_cfg, 2201 self.seccomp_action.clone(), 2202 self.exit_evt 2203 .try_clone() 2204 .map_err(DeviceManagerError::EventFd)?, 2205 self.force_iommu, 2206 snapshot 2207 .map(|s| s.to_versioned_state()) 2208 .transpose() 2209 .map_err(DeviceManagerError::RestoreGetState)?, 2210 ) { 2211 Ok(vub_device) => vub_device, 2212 Err(e) => { 2213 return Err(DeviceManagerError::CreateVhostUserBlk(e)); 2214 } 2215 }, 2216 )); 2217 2218 ( 2219 Arc::clone(&vhost_user_block) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2220 vhost_user_block as Arc<Mutex<dyn Migratable>>, 2221 ) 2222 } else { 2223 let mut options = OpenOptions::new(); 2224 options.read(true); 2225 options.write(!disk_cfg.readonly); 2226 if disk_cfg.direct { 2227 options.custom_flags(libc::O_DIRECT); 2228 } 2229 // Open block device path 2230 let mut file: File = options 2231 .open( 2232 disk_cfg 2233 .path 2234 .as_ref() 2235 .ok_or(DeviceManagerError::NoDiskPath)? 2236 .clone(), 2237 ) 2238 .map_err(DeviceManagerError::Disk)?; 2239 let image_type = 2240 detect_image_type(&mut file).map_err(DeviceManagerError::DetectImageType)?; 2241 2242 let image = match image_type { 2243 ImageType::FixedVhd => { 2244 // Use asynchronous backend relying on io_uring if the 2245 // syscalls are supported. 2246 if cfg!(feature = "io_uring") 2247 && !disk_cfg.disable_io_uring 2248 && self.io_uring_is_supported() 2249 { 2250 info!("Using asynchronous fixed VHD disk file (io_uring)"); 2251 2252 #[cfg(not(feature = "io_uring"))] 2253 unreachable!("Checked in if statement above"); 2254 #[cfg(feature = "io_uring")] 2255 { 2256 Box::new( 2257 FixedVhdDiskAsync::new(file) 2258 .map_err(DeviceManagerError::CreateFixedVhdDiskAsync)?, 2259 ) as Box<dyn DiskFile> 2260 } 2261 } else { 2262 info!("Using synchronous fixed VHD disk file"); 2263 Box::new( 2264 FixedVhdDiskSync::new(file) 2265 .map_err(DeviceManagerError::CreateFixedVhdDiskSync)?, 2266 ) as Box<dyn DiskFile> 2267 } 2268 } 2269 ImageType::Raw => { 2270 // Use asynchronous backend relying on io_uring if the 2271 // syscalls are supported. 2272 if cfg!(feature = "io_uring") 2273 && !disk_cfg.disable_io_uring 2274 && self.io_uring_is_supported() 2275 { 2276 info!("Using asynchronous RAW disk file (io_uring)"); 2277 2278 #[cfg(not(feature = "io_uring"))] 2279 unreachable!("Checked in if statement above"); 2280 #[cfg(feature = "io_uring")] 2281 { 2282 Box::new(RawFileDisk::new(file)) as Box<dyn DiskFile> 2283 } 2284 } else { 2285 info!("Using synchronous RAW disk file"); 2286 Box::new(RawFileDiskSync::new(file)) as Box<dyn DiskFile> 2287 } 2288 } 2289 ImageType::Qcow2 => { 2290 info!("Using synchronous QCOW disk file"); 2291 Box::new( 2292 QcowDiskSync::new(file, disk_cfg.direct) 2293 .map_err(DeviceManagerError::CreateQcowDiskSync)?, 2294 ) as Box<dyn DiskFile> 2295 } 2296 ImageType::Vhdx => { 2297 info!("Using synchronous VHDX disk file"); 2298 Box::new( 2299 VhdxDiskSync::new(file) 2300 .map_err(DeviceManagerError::CreateFixedVhdxDiskSync)?, 2301 ) as Box<dyn DiskFile> 2302 } 2303 }; 2304 2305 let virtio_block = Arc::new(Mutex::new( 2306 virtio_devices::Block::new( 2307 id.clone(), 2308 image, 2309 disk_cfg 2310 .path 2311 .as_ref() 2312 .ok_or(DeviceManagerError::NoDiskPath)? 2313 .clone(), 2314 disk_cfg.readonly, 2315 self.force_iommu | disk_cfg.iommu, 2316 disk_cfg.num_queues, 2317 disk_cfg.queue_size, 2318 self.seccomp_action.clone(), 2319 disk_cfg.rate_limiter_config, 2320 self.exit_evt 2321 .try_clone() 2322 .map_err(DeviceManagerError::EventFd)?, 2323 snapshot 2324 .map(|s| s.to_versioned_state()) 2325 .transpose() 2326 .map_err(DeviceManagerError::RestoreGetState)?, 2327 ) 2328 .map_err(DeviceManagerError::CreateVirtioBlock)?, 2329 )); 2330 2331 ( 2332 Arc::clone(&virtio_block) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2333 virtio_block as Arc<Mutex<dyn Migratable>>, 2334 ) 2335 }; 2336 2337 // Fill the device tree with a new node. In case of restore, we 2338 // know there is nothing to do, so we can simply override the 2339 // existing entry. 2340 self.device_tree 2341 .lock() 2342 .unwrap() 2343 .insert(id.clone(), device_node!(id, migratable_device)); 2344 2345 Ok(MetaVirtioDevice { 2346 virtio_device, 2347 iommu: disk_cfg.iommu, 2348 id, 2349 pci_segment: disk_cfg.pci_segment, 2350 dma_handler: None, 2351 }) 2352 } 2353 2354 fn make_virtio_block_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2355 let mut devices = Vec::new(); 2356 2357 let mut block_devices = self.config.lock().unwrap().disks.clone(); 2358 if let Some(disk_list_cfg) = &mut block_devices { 2359 for disk_cfg in disk_list_cfg.iter_mut() { 2360 devices.push(self.make_virtio_block_device(disk_cfg)?); 2361 } 2362 } 2363 self.config.lock().unwrap().disks = block_devices; 2364 2365 Ok(devices) 2366 } 2367 2368 fn make_virtio_net_device( 2369 &mut self, 2370 net_cfg: &mut NetConfig, 2371 ) -> DeviceManagerResult<MetaVirtioDevice> { 2372 let id = if let Some(id) = &net_cfg.id { 2373 id.clone() 2374 } else { 2375 let id = self.next_device_name(NET_DEVICE_NAME_PREFIX)?; 2376 net_cfg.id = Some(id.clone()); 2377 id 2378 }; 2379 info!("Creating virtio-net device: {:?}", net_cfg); 2380 2381 let snapshot = snapshot_from_id(self.snapshot.as_ref(), id.as_str()); 2382 2383 let (virtio_device, migratable_device) = if net_cfg.vhost_user { 2384 let socket = net_cfg.vhost_socket.as_ref().unwrap().clone(); 2385 let vu_cfg = VhostUserConfig { 2386 socket, 2387 num_queues: net_cfg.num_queues, 2388 queue_size: net_cfg.queue_size, 2389 }; 2390 let server = match net_cfg.vhost_mode { 2391 VhostMode::Client => false, 2392 VhostMode::Server => true, 2393 }; 2394 let vhost_user_net = Arc::new(Mutex::new( 2395 match virtio_devices::vhost_user::Net::new( 2396 id.clone(), 2397 net_cfg.mac, 2398 net_cfg.mtu, 2399 vu_cfg, 2400 server, 2401 self.seccomp_action.clone(), 2402 self.exit_evt 2403 .try_clone() 2404 .map_err(DeviceManagerError::EventFd)?, 2405 self.force_iommu, 2406 snapshot 2407 .map(|s| s.to_versioned_state()) 2408 .transpose() 2409 .map_err(DeviceManagerError::RestoreGetState)?, 2410 net_cfg.offload_tso, 2411 net_cfg.offload_ufo, 2412 net_cfg.offload_csum, 2413 ) { 2414 Ok(vun_device) => vun_device, 2415 Err(e) => { 2416 return Err(DeviceManagerError::CreateVhostUserNet(e)); 2417 } 2418 }, 2419 )); 2420 2421 ( 2422 Arc::clone(&vhost_user_net) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2423 vhost_user_net as Arc<Mutex<dyn Migratable>>, 2424 ) 2425 } else { 2426 let state = snapshot 2427 .map(|s| s.to_versioned_state()) 2428 .transpose() 2429 .map_err(DeviceManagerError::RestoreGetState)?; 2430 2431 let virtio_net = if let Some(ref tap_if_name) = net_cfg.tap { 2432 Arc::new(Mutex::new( 2433 virtio_devices::Net::new( 2434 id.clone(), 2435 Some(tap_if_name), 2436 None, 2437 None, 2438 Some(net_cfg.mac), 2439 &mut net_cfg.host_mac, 2440 net_cfg.mtu, 2441 self.force_iommu | net_cfg.iommu, 2442 net_cfg.num_queues, 2443 net_cfg.queue_size, 2444 self.seccomp_action.clone(), 2445 net_cfg.rate_limiter_config, 2446 self.exit_evt 2447 .try_clone() 2448 .map_err(DeviceManagerError::EventFd)?, 2449 state, 2450 net_cfg.offload_tso, 2451 net_cfg.offload_ufo, 2452 net_cfg.offload_csum, 2453 ) 2454 .map_err(DeviceManagerError::CreateVirtioNet)?, 2455 )) 2456 } else if let Some(fds) = &net_cfg.fds { 2457 let net = virtio_devices::Net::from_tap_fds( 2458 id.clone(), 2459 fds, 2460 Some(net_cfg.mac), 2461 net_cfg.mtu, 2462 self.force_iommu | net_cfg.iommu, 2463 net_cfg.queue_size, 2464 self.seccomp_action.clone(), 2465 net_cfg.rate_limiter_config, 2466 self.exit_evt 2467 .try_clone() 2468 .map_err(DeviceManagerError::EventFd)?, 2469 state, 2470 net_cfg.offload_tso, 2471 net_cfg.offload_ufo, 2472 net_cfg.offload_csum, 2473 ) 2474 .map_err(DeviceManagerError::CreateVirtioNet)?; 2475 2476 // SAFETY: 'fds' are valid because TAP devices are created successfully 2477 unsafe { 2478 self.config.lock().unwrap().add_preserved_fds(fds.clone()); 2479 } 2480 2481 Arc::new(Mutex::new(net)) 2482 } else { 2483 Arc::new(Mutex::new( 2484 virtio_devices::Net::new( 2485 id.clone(), 2486 None, 2487 Some(net_cfg.ip), 2488 Some(net_cfg.mask), 2489 Some(net_cfg.mac), 2490 &mut net_cfg.host_mac, 2491 net_cfg.mtu, 2492 self.force_iommu | net_cfg.iommu, 2493 net_cfg.num_queues, 2494 net_cfg.queue_size, 2495 self.seccomp_action.clone(), 2496 net_cfg.rate_limiter_config, 2497 self.exit_evt 2498 .try_clone() 2499 .map_err(DeviceManagerError::EventFd)?, 2500 state, 2501 net_cfg.offload_tso, 2502 net_cfg.offload_ufo, 2503 net_cfg.offload_csum, 2504 ) 2505 .map_err(DeviceManagerError::CreateVirtioNet)?, 2506 )) 2507 }; 2508 2509 ( 2510 Arc::clone(&virtio_net) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2511 virtio_net as Arc<Mutex<dyn Migratable>>, 2512 ) 2513 }; 2514 2515 // Fill the device tree with a new node. In case of restore, we 2516 // know there is nothing to do, so we can simply override the 2517 // existing entry. 2518 self.device_tree 2519 .lock() 2520 .unwrap() 2521 .insert(id.clone(), device_node!(id, migratable_device)); 2522 2523 Ok(MetaVirtioDevice { 2524 virtio_device, 2525 iommu: net_cfg.iommu, 2526 id, 2527 pci_segment: net_cfg.pci_segment, 2528 dma_handler: None, 2529 }) 2530 } 2531 2532 /// Add virto-net and vhost-user-net devices 2533 fn make_virtio_net_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2534 let mut devices = Vec::new(); 2535 let mut net_devices = self.config.lock().unwrap().net.clone(); 2536 if let Some(net_list_cfg) = &mut net_devices { 2537 for net_cfg in net_list_cfg.iter_mut() { 2538 devices.push(self.make_virtio_net_device(net_cfg)?); 2539 } 2540 } 2541 self.config.lock().unwrap().net = net_devices; 2542 2543 Ok(devices) 2544 } 2545 2546 fn make_virtio_rng_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2547 let mut devices = Vec::new(); 2548 2549 // Add virtio-rng if required 2550 let rng_config = self.config.lock().unwrap().rng.clone(); 2551 if let Some(rng_path) = rng_config.src.to_str() { 2552 info!("Creating virtio-rng device: {:?}", rng_config); 2553 let id = String::from(RNG_DEVICE_NAME); 2554 2555 let virtio_rng_device = Arc::new(Mutex::new( 2556 virtio_devices::Rng::new( 2557 id.clone(), 2558 rng_path, 2559 self.force_iommu | rng_config.iommu, 2560 self.seccomp_action.clone(), 2561 self.exit_evt 2562 .try_clone() 2563 .map_err(DeviceManagerError::EventFd)?, 2564 versioned_state_from_id(self.snapshot.as_ref(), id.as_str()) 2565 .map_err(DeviceManagerError::RestoreGetState)?, 2566 ) 2567 .map_err(DeviceManagerError::CreateVirtioRng)?, 2568 )); 2569 devices.push(MetaVirtioDevice { 2570 virtio_device: Arc::clone(&virtio_rng_device) 2571 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2572 iommu: rng_config.iommu, 2573 id: id.clone(), 2574 pci_segment: 0, 2575 dma_handler: None, 2576 }); 2577 2578 // Fill the device tree with a new node. In case of restore, we 2579 // know there is nothing to do, so we can simply override the 2580 // existing entry. 2581 self.device_tree 2582 .lock() 2583 .unwrap() 2584 .insert(id.clone(), device_node!(id, virtio_rng_device)); 2585 } 2586 2587 Ok(devices) 2588 } 2589 2590 fn make_virtio_fs_device( 2591 &mut self, 2592 fs_cfg: &mut FsConfig, 2593 ) -> DeviceManagerResult<MetaVirtioDevice> { 2594 let id = if let Some(id) = &fs_cfg.id { 2595 id.clone() 2596 } else { 2597 let id = self.next_device_name(FS_DEVICE_NAME_PREFIX)?; 2598 fs_cfg.id = Some(id.clone()); 2599 id 2600 }; 2601 2602 info!("Creating virtio-fs device: {:?}", fs_cfg); 2603 2604 let mut node = device_node!(id); 2605 2606 if let Some(fs_socket) = fs_cfg.socket.to_str() { 2607 let virtio_fs_device = Arc::new(Mutex::new( 2608 virtio_devices::vhost_user::Fs::new( 2609 id.clone(), 2610 fs_socket, 2611 &fs_cfg.tag, 2612 fs_cfg.num_queues, 2613 fs_cfg.queue_size, 2614 None, 2615 self.seccomp_action.clone(), 2616 self.exit_evt 2617 .try_clone() 2618 .map_err(DeviceManagerError::EventFd)?, 2619 self.force_iommu, 2620 versioned_state_from_id(self.snapshot.as_ref(), id.as_str()) 2621 .map_err(DeviceManagerError::RestoreGetState)?, 2622 ) 2623 .map_err(DeviceManagerError::CreateVirtioFs)?, 2624 )); 2625 2626 // Update the device tree with the migratable device. 2627 node.migratable = Some(Arc::clone(&virtio_fs_device) as Arc<Mutex<dyn Migratable>>); 2628 self.device_tree.lock().unwrap().insert(id.clone(), node); 2629 2630 Ok(MetaVirtioDevice { 2631 virtio_device: Arc::clone(&virtio_fs_device) 2632 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2633 iommu: false, 2634 id, 2635 pci_segment: fs_cfg.pci_segment, 2636 dma_handler: None, 2637 }) 2638 } else { 2639 Err(DeviceManagerError::NoVirtioFsSock) 2640 } 2641 } 2642 2643 fn make_virtio_fs_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2644 let mut devices = Vec::new(); 2645 2646 let mut fs_devices = self.config.lock().unwrap().fs.clone(); 2647 if let Some(fs_list_cfg) = &mut fs_devices { 2648 for fs_cfg in fs_list_cfg.iter_mut() { 2649 devices.push(self.make_virtio_fs_device(fs_cfg)?); 2650 } 2651 } 2652 self.config.lock().unwrap().fs = fs_devices; 2653 2654 Ok(devices) 2655 } 2656 2657 fn make_virtio_pmem_device( 2658 &mut self, 2659 pmem_cfg: &mut PmemConfig, 2660 ) -> DeviceManagerResult<MetaVirtioDevice> { 2661 let id = if let Some(id) = &pmem_cfg.id { 2662 id.clone() 2663 } else { 2664 let id = self.next_device_name(PMEM_DEVICE_NAME_PREFIX)?; 2665 pmem_cfg.id = Some(id.clone()); 2666 id 2667 }; 2668 2669 info!("Creating virtio-pmem device: {:?}", pmem_cfg); 2670 2671 let mut node = device_node!(id); 2672 2673 // Look for the id in the device tree. If it can be found, that means 2674 // the device is being restored, otherwise it's created from scratch. 2675 let region_range = if let Some(node) = self.device_tree.lock().unwrap().get(&id) { 2676 info!("Restoring virtio-pmem {} resources", id); 2677 2678 let mut region_range: Option<(u64, u64)> = None; 2679 for resource in node.resources.iter() { 2680 match resource { 2681 Resource::MmioAddressRange { base, size } => { 2682 if region_range.is_some() { 2683 return Err(DeviceManagerError::ResourceAlreadyExists); 2684 } 2685 2686 region_range = Some((*base, *size)); 2687 } 2688 _ => { 2689 error!("Unexpected resource {:?} for {}", resource, id); 2690 } 2691 } 2692 } 2693 2694 if region_range.is_none() { 2695 return Err(DeviceManagerError::MissingVirtioPmemResources); 2696 } 2697 2698 region_range 2699 } else { 2700 None 2701 }; 2702 2703 let (custom_flags, set_len) = if pmem_cfg.file.is_dir() { 2704 if pmem_cfg.size.is_none() { 2705 return Err(DeviceManagerError::PmemWithDirectorySizeMissing); 2706 } 2707 (O_TMPFILE, true) 2708 } else { 2709 (0, false) 2710 }; 2711 2712 let mut file = OpenOptions::new() 2713 .read(true) 2714 .write(!pmem_cfg.discard_writes) 2715 .custom_flags(custom_flags) 2716 .open(&pmem_cfg.file) 2717 .map_err(DeviceManagerError::PmemFileOpen)?; 2718 2719 let size = if let Some(size) = pmem_cfg.size { 2720 if set_len { 2721 file.set_len(size) 2722 .map_err(DeviceManagerError::PmemFileSetLen)?; 2723 } 2724 size 2725 } else { 2726 file.seek(SeekFrom::End(0)) 2727 .map_err(DeviceManagerError::PmemFileSetLen)? 2728 }; 2729 2730 if size % 0x20_0000 != 0 { 2731 return Err(DeviceManagerError::PmemSizeNotAligned); 2732 } 2733 2734 let (region_base, region_size) = if let Some((base, size)) = region_range { 2735 // The memory needs to be 2MiB aligned in order to support 2736 // hugepages. 2737 self.pci_segments[pmem_cfg.pci_segment as usize] 2738 .allocator 2739 .lock() 2740 .unwrap() 2741 .allocate( 2742 Some(GuestAddress(base)), 2743 size as GuestUsize, 2744 Some(0x0020_0000), 2745 ) 2746 .ok_or(DeviceManagerError::PmemRangeAllocation)?; 2747 2748 (base, size) 2749 } else { 2750 // The memory needs to be 2MiB aligned in order to support 2751 // hugepages. 2752 let base = self.pci_segments[pmem_cfg.pci_segment as usize] 2753 .allocator 2754 .lock() 2755 .unwrap() 2756 .allocate(None, size as GuestUsize, Some(0x0020_0000)) 2757 .ok_or(DeviceManagerError::PmemRangeAllocation)?; 2758 2759 (base.raw_value(), size) 2760 }; 2761 2762 let cloned_file = file.try_clone().map_err(DeviceManagerError::CloneFile)?; 2763 let mmap_region = MmapRegion::build( 2764 Some(FileOffset::new(cloned_file, 0)), 2765 region_size as usize, 2766 PROT_READ | PROT_WRITE, 2767 MAP_NORESERVE 2768 | if pmem_cfg.discard_writes { 2769 MAP_PRIVATE 2770 } else { 2771 MAP_SHARED 2772 }, 2773 ) 2774 .map_err(DeviceManagerError::NewMmapRegion)?; 2775 let host_addr: u64 = mmap_region.as_ptr() as u64; 2776 2777 let mem_slot = self 2778 .memory_manager 2779 .lock() 2780 .unwrap() 2781 .create_userspace_mapping(region_base, region_size, host_addr, false, false, false) 2782 .map_err(DeviceManagerError::MemoryManager)?; 2783 2784 let mapping = virtio_devices::UserspaceMapping { 2785 host_addr, 2786 mem_slot, 2787 addr: GuestAddress(region_base), 2788 len: region_size, 2789 mergeable: false, 2790 }; 2791 2792 let virtio_pmem_device = Arc::new(Mutex::new( 2793 virtio_devices::Pmem::new( 2794 id.clone(), 2795 file, 2796 GuestAddress(region_base), 2797 mapping, 2798 mmap_region, 2799 self.force_iommu | pmem_cfg.iommu, 2800 self.seccomp_action.clone(), 2801 self.exit_evt 2802 .try_clone() 2803 .map_err(DeviceManagerError::EventFd)?, 2804 versioned_state_from_id(self.snapshot.as_ref(), id.as_str()) 2805 .map_err(DeviceManagerError::RestoreGetState)?, 2806 ) 2807 .map_err(DeviceManagerError::CreateVirtioPmem)?, 2808 )); 2809 2810 // Update the device tree with correct resource information and with 2811 // the migratable device. 2812 node.resources.push(Resource::MmioAddressRange { 2813 base: region_base, 2814 size: region_size, 2815 }); 2816 node.migratable = Some(Arc::clone(&virtio_pmem_device) as Arc<Mutex<dyn Migratable>>); 2817 self.device_tree.lock().unwrap().insert(id.clone(), node); 2818 2819 Ok(MetaVirtioDevice { 2820 virtio_device: Arc::clone(&virtio_pmem_device) 2821 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2822 iommu: pmem_cfg.iommu, 2823 id, 2824 pci_segment: pmem_cfg.pci_segment, 2825 dma_handler: None, 2826 }) 2827 } 2828 2829 fn make_virtio_pmem_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2830 let mut devices = Vec::new(); 2831 // Add virtio-pmem if required 2832 let mut pmem_devices = self.config.lock().unwrap().pmem.clone(); 2833 if let Some(pmem_list_cfg) = &mut pmem_devices { 2834 for pmem_cfg in pmem_list_cfg.iter_mut() { 2835 devices.push(self.make_virtio_pmem_device(pmem_cfg)?); 2836 } 2837 } 2838 self.config.lock().unwrap().pmem = pmem_devices; 2839 2840 Ok(devices) 2841 } 2842 2843 fn make_virtio_vsock_device( 2844 &mut self, 2845 vsock_cfg: &mut VsockConfig, 2846 ) -> DeviceManagerResult<MetaVirtioDevice> { 2847 let id = if let Some(id) = &vsock_cfg.id { 2848 id.clone() 2849 } else { 2850 let id = self.next_device_name(VSOCK_DEVICE_NAME_PREFIX)?; 2851 vsock_cfg.id = Some(id.clone()); 2852 id 2853 }; 2854 2855 info!("Creating virtio-vsock device: {:?}", vsock_cfg); 2856 2857 let socket_path = vsock_cfg 2858 .socket 2859 .to_str() 2860 .ok_or(DeviceManagerError::CreateVsockConvertPath)?; 2861 let backend = 2862 virtio_devices::vsock::VsockUnixBackend::new(vsock_cfg.cid, socket_path.to_string()) 2863 .map_err(DeviceManagerError::CreateVsockBackend)?; 2864 2865 let vsock_device = Arc::new(Mutex::new( 2866 virtio_devices::Vsock::new( 2867 id.clone(), 2868 vsock_cfg.cid, 2869 vsock_cfg.socket.clone(), 2870 backend, 2871 self.force_iommu | vsock_cfg.iommu, 2872 self.seccomp_action.clone(), 2873 self.exit_evt 2874 .try_clone() 2875 .map_err(DeviceManagerError::EventFd)?, 2876 versioned_state_from_id(self.snapshot.as_ref(), id.as_str()) 2877 .map_err(DeviceManagerError::RestoreGetState)?, 2878 ) 2879 .map_err(DeviceManagerError::CreateVirtioVsock)?, 2880 )); 2881 2882 // Fill the device tree with a new node. In case of restore, we 2883 // know there is nothing to do, so we can simply override the 2884 // existing entry. 2885 self.device_tree 2886 .lock() 2887 .unwrap() 2888 .insert(id.clone(), device_node!(id, vsock_device)); 2889 2890 Ok(MetaVirtioDevice { 2891 virtio_device: Arc::clone(&vsock_device) 2892 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2893 iommu: vsock_cfg.iommu, 2894 id, 2895 pci_segment: vsock_cfg.pci_segment, 2896 dma_handler: None, 2897 }) 2898 } 2899 2900 fn make_virtio_vsock_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2901 let mut devices = Vec::new(); 2902 2903 let mut vsock = self.config.lock().unwrap().vsock.clone(); 2904 if let Some(ref mut vsock_cfg) = &mut vsock { 2905 devices.push(self.make_virtio_vsock_device(vsock_cfg)?); 2906 } 2907 self.config.lock().unwrap().vsock = vsock; 2908 2909 Ok(devices) 2910 } 2911 2912 fn make_virtio_mem_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2913 let mut devices = Vec::new(); 2914 2915 let mm = self.memory_manager.clone(); 2916 let mut mm = mm.lock().unwrap(); 2917 for (memory_zone_id, memory_zone) in mm.memory_zones_mut().iter_mut() { 2918 if let Some(virtio_mem_zone) = memory_zone.virtio_mem_zone_mut() { 2919 info!("Creating virtio-mem device: id = {}", memory_zone_id); 2920 2921 let node_id = numa_node_id_from_memory_zone_id(&self.numa_nodes, memory_zone_id) 2922 .map(|i| i as u16); 2923 2924 let virtio_mem_device = Arc::new(Mutex::new( 2925 virtio_devices::Mem::new( 2926 memory_zone_id.clone(), 2927 virtio_mem_zone.region(), 2928 self.seccomp_action.clone(), 2929 node_id, 2930 virtio_mem_zone.hotplugged_size(), 2931 virtio_mem_zone.hugepages(), 2932 self.exit_evt 2933 .try_clone() 2934 .map_err(DeviceManagerError::EventFd)?, 2935 virtio_mem_zone.blocks_state().clone(), 2936 versioned_state_from_id(self.snapshot.as_ref(), memory_zone_id.as_str()) 2937 .map_err(DeviceManagerError::RestoreGetState)?, 2938 ) 2939 .map_err(DeviceManagerError::CreateVirtioMem)?, 2940 )); 2941 2942 // Update the virtio-mem zone so that it has a handle onto the 2943 // virtio-mem device, which will be used for triggering a resize 2944 // if needed. 2945 virtio_mem_zone.set_virtio_device(Arc::clone(&virtio_mem_device)); 2946 2947 self.virtio_mem_devices.push(Arc::clone(&virtio_mem_device)); 2948 2949 devices.push(MetaVirtioDevice { 2950 virtio_device: Arc::clone(&virtio_mem_device) 2951 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2952 iommu: false, 2953 id: memory_zone_id.clone(), 2954 pci_segment: 0, 2955 dma_handler: None, 2956 }); 2957 2958 // Fill the device tree with a new node. In case of restore, we 2959 // know there is nothing to do, so we can simply override the 2960 // existing entry. 2961 self.device_tree.lock().unwrap().insert( 2962 memory_zone_id.clone(), 2963 device_node!(memory_zone_id, virtio_mem_device), 2964 ); 2965 } 2966 } 2967 2968 Ok(devices) 2969 } 2970 2971 fn make_virtio_balloon_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2972 let mut devices = Vec::new(); 2973 2974 if let Some(balloon_config) = &self.config.lock().unwrap().balloon { 2975 let id = String::from(BALLOON_DEVICE_NAME); 2976 info!("Creating virtio-balloon device: id = {}", id); 2977 2978 let virtio_balloon_device = Arc::new(Mutex::new( 2979 virtio_devices::Balloon::new( 2980 id.clone(), 2981 balloon_config.size, 2982 balloon_config.deflate_on_oom, 2983 balloon_config.free_page_reporting, 2984 self.seccomp_action.clone(), 2985 self.exit_evt 2986 .try_clone() 2987 .map_err(DeviceManagerError::EventFd)?, 2988 versioned_state_from_id(self.snapshot.as_ref(), id.as_str()) 2989 .map_err(DeviceManagerError::RestoreGetState)?, 2990 ) 2991 .map_err(DeviceManagerError::CreateVirtioBalloon)?, 2992 )); 2993 2994 self.balloon = Some(virtio_balloon_device.clone()); 2995 2996 devices.push(MetaVirtioDevice { 2997 virtio_device: Arc::clone(&virtio_balloon_device) 2998 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2999 iommu: false, 3000 id: id.clone(), 3001 pci_segment: 0, 3002 dma_handler: None, 3003 }); 3004 3005 self.device_tree 3006 .lock() 3007 .unwrap() 3008 .insert(id.clone(), device_node!(id, virtio_balloon_device)); 3009 } 3010 3011 Ok(devices) 3012 } 3013 3014 fn make_virtio_watchdog_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 3015 let mut devices = Vec::new(); 3016 3017 if !self.config.lock().unwrap().watchdog { 3018 return Ok(devices); 3019 } 3020 3021 let id = String::from(WATCHDOG_DEVICE_NAME); 3022 info!("Creating virtio-watchdog device: id = {}", id); 3023 3024 let virtio_watchdog_device = Arc::new(Mutex::new( 3025 virtio_devices::Watchdog::new( 3026 id.clone(), 3027 self.reset_evt.try_clone().unwrap(), 3028 self.seccomp_action.clone(), 3029 self.exit_evt 3030 .try_clone() 3031 .map_err(DeviceManagerError::EventFd)?, 3032 versioned_state_from_id(self.snapshot.as_ref(), id.as_str()) 3033 .map_err(DeviceManagerError::RestoreGetState)?, 3034 ) 3035 .map_err(DeviceManagerError::CreateVirtioWatchdog)?, 3036 )); 3037 devices.push(MetaVirtioDevice { 3038 virtio_device: Arc::clone(&virtio_watchdog_device) 3039 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 3040 iommu: false, 3041 id: id.clone(), 3042 pci_segment: 0, 3043 dma_handler: None, 3044 }); 3045 3046 self.device_tree 3047 .lock() 3048 .unwrap() 3049 .insert(id.clone(), device_node!(id, virtio_watchdog_device)); 3050 3051 Ok(devices) 3052 } 3053 3054 fn make_vdpa_device( 3055 &mut self, 3056 vdpa_cfg: &mut VdpaConfig, 3057 ) -> DeviceManagerResult<MetaVirtioDevice> { 3058 let id = if let Some(id) = &vdpa_cfg.id { 3059 id.clone() 3060 } else { 3061 let id = self.next_device_name(VDPA_DEVICE_NAME_PREFIX)?; 3062 vdpa_cfg.id = Some(id.clone()); 3063 id 3064 }; 3065 3066 info!("Creating vDPA device: {:?}", vdpa_cfg); 3067 3068 let device_path = vdpa_cfg 3069 .path 3070 .to_str() 3071 .ok_or(DeviceManagerError::CreateVdpaConvertPath)?; 3072 3073 let vdpa_device = Arc::new(Mutex::new( 3074 virtio_devices::Vdpa::new( 3075 id.clone(), 3076 device_path, 3077 self.memory_manager.lock().unwrap().guest_memory(), 3078 vdpa_cfg.num_queues as u16, 3079 versioned_state_from_id(self.snapshot.as_ref(), id.as_str()) 3080 .map_err(DeviceManagerError::RestoreGetState)?, 3081 ) 3082 .map_err(DeviceManagerError::CreateVdpa)?, 3083 )); 3084 3085 // Create the DMA handler that is required by the vDPA device 3086 let vdpa_mapping = Arc::new(VdpaDmaMapping::new( 3087 Arc::clone(&vdpa_device), 3088 Arc::new(self.memory_manager.lock().unwrap().guest_memory()), 3089 )); 3090 3091 self.device_tree 3092 .lock() 3093 .unwrap() 3094 .insert(id.clone(), device_node!(id, vdpa_device)); 3095 3096 Ok(MetaVirtioDevice { 3097 virtio_device: vdpa_device as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 3098 iommu: vdpa_cfg.iommu, 3099 id, 3100 pci_segment: vdpa_cfg.pci_segment, 3101 dma_handler: Some(vdpa_mapping), 3102 }) 3103 } 3104 3105 fn make_vdpa_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 3106 let mut devices = Vec::new(); 3107 // Add vdpa if required 3108 let mut vdpa_devices = self.config.lock().unwrap().vdpa.clone(); 3109 if let Some(vdpa_list_cfg) = &mut vdpa_devices { 3110 for vdpa_cfg in vdpa_list_cfg.iter_mut() { 3111 devices.push(self.make_vdpa_device(vdpa_cfg)?); 3112 } 3113 } 3114 self.config.lock().unwrap().vdpa = vdpa_devices; 3115 3116 Ok(devices) 3117 } 3118 3119 fn next_device_name(&mut self, prefix: &str) -> DeviceManagerResult<String> { 3120 let start_id = self.device_id_cnt; 3121 loop { 3122 // Generate the temporary name. 3123 let name = format!("{}{}", prefix, self.device_id_cnt); 3124 // Increment the counter. 3125 self.device_id_cnt += Wrapping(1); 3126 // Check if the name is already in use. 3127 if !self.boot_id_list.contains(&name) 3128 && !self.device_tree.lock().unwrap().contains_key(&name) 3129 { 3130 return Ok(name); 3131 } 3132 3133 if self.device_id_cnt == start_id { 3134 // We went through a full loop and there's nothing else we can 3135 // do. 3136 break; 3137 } 3138 } 3139 Err(DeviceManagerError::NoAvailableDeviceName) 3140 } 3141 3142 fn add_passthrough_device( 3143 &mut self, 3144 device_cfg: &mut DeviceConfig, 3145 ) -> DeviceManagerResult<(PciBdf, String)> { 3146 // If the passthrough device has not been created yet, it is created 3147 // here and stored in the DeviceManager structure for future needs. 3148 if self.passthrough_device.is_none() { 3149 self.passthrough_device = Some( 3150 self.address_manager 3151 .vm 3152 .create_passthrough_device() 3153 .map_err(|e| DeviceManagerError::CreatePassthroughDevice(e.into()))?, 3154 ); 3155 } 3156 3157 self.add_vfio_device(device_cfg) 3158 } 3159 3160 fn create_vfio_container(&self) -> DeviceManagerResult<Arc<VfioContainer>> { 3161 let passthrough_device = self 3162 .passthrough_device 3163 .as_ref() 3164 .ok_or(DeviceManagerError::NoDevicePassthroughSupport)?; 3165 3166 let dup = passthrough_device 3167 .try_clone() 3168 .map_err(DeviceManagerError::VfioCreate)?; 3169 3170 Ok(Arc::new( 3171 VfioContainer::new(Some(Arc::new(dup))).map_err(DeviceManagerError::VfioCreate)?, 3172 )) 3173 } 3174 3175 fn add_vfio_device( 3176 &mut self, 3177 device_cfg: &mut DeviceConfig, 3178 ) -> DeviceManagerResult<(PciBdf, String)> { 3179 let vfio_name = if let Some(id) = &device_cfg.id { 3180 id.clone() 3181 } else { 3182 let id = self.next_device_name(VFIO_DEVICE_NAME_PREFIX)?; 3183 device_cfg.id = Some(id.clone()); 3184 id 3185 }; 3186 3187 let (pci_segment_id, pci_device_bdf, resources) = 3188 self.pci_resources(&vfio_name, device_cfg.pci_segment)?; 3189 3190 let mut needs_dma_mapping = false; 3191 3192 // Here we create a new VFIO container for two reasons. Either this is 3193 // the first VFIO device, meaning we need a new VFIO container, which 3194 // will be shared with other VFIO devices. Or the new VFIO device is 3195 // attached to a vIOMMU, meaning we must create a dedicated VFIO 3196 // container. In the vIOMMU use case, we can't let all devices under 3197 // the same VFIO container since we couldn't map/unmap memory for each 3198 // device. That's simply because the map/unmap operations happen at the 3199 // VFIO container level. 3200 let vfio_container = if device_cfg.iommu { 3201 let vfio_container = self.create_vfio_container()?; 3202 3203 let vfio_mapping = Arc::new(VfioDmaMapping::new( 3204 Arc::clone(&vfio_container), 3205 Arc::new(self.memory_manager.lock().unwrap().guest_memory()), 3206 )); 3207 3208 if let Some(iommu) = &self.iommu_device { 3209 iommu 3210 .lock() 3211 .unwrap() 3212 .add_external_mapping(pci_device_bdf.into(), vfio_mapping); 3213 } else { 3214 return Err(DeviceManagerError::MissingVirtualIommu); 3215 } 3216 3217 vfio_container 3218 } else if let Some(vfio_container) = &self.vfio_container { 3219 Arc::clone(vfio_container) 3220 } else { 3221 let vfio_container = self.create_vfio_container()?; 3222 needs_dma_mapping = true; 3223 self.vfio_container = Some(Arc::clone(&vfio_container)); 3224 3225 vfio_container 3226 }; 3227 3228 let vfio_device = VfioDevice::new(&device_cfg.path, Arc::clone(&vfio_container)) 3229 .map_err(DeviceManagerError::VfioCreate)?; 3230 3231 if needs_dma_mapping { 3232 // Register DMA mapping in IOMMU. 3233 // Do not register virtio-mem regions, as they are handled directly by 3234 // virtio-mem device itself. 3235 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() { 3236 for region in zone.regions() { 3237 vfio_container 3238 .vfio_dma_map( 3239 region.start_addr().raw_value(), 3240 region.len(), 3241 region.as_ptr() as u64, 3242 ) 3243 .map_err(DeviceManagerError::VfioDmaMap)?; 3244 } 3245 } 3246 3247 let vfio_mapping = Arc::new(VfioDmaMapping::new( 3248 Arc::clone(&vfio_container), 3249 Arc::new(self.memory_manager.lock().unwrap().guest_memory()), 3250 )); 3251 3252 for virtio_mem_device in self.virtio_mem_devices.iter() { 3253 virtio_mem_device 3254 .lock() 3255 .unwrap() 3256 .add_dma_mapping_handler( 3257 VirtioMemMappingSource::Container, 3258 vfio_mapping.clone(), 3259 ) 3260 .map_err(DeviceManagerError::AddDmaMappingHandlerVirtioMem)?; 3261 } 3262 } 3263 3264 let legacy_interrupt_group = 3265 if let Some(legacy_interrupt_manager) = &self.legacy_interrupt_manager { 3266 Some( 3267 legacy_interrupt_manager 3268 .create_group(LegacyIrqGroupConfig { 3269 irq: self.pci_segments[pci_segment_id as usize].pci_irq_slots 3270 [pci_device_bdf.device() as usize] 3271 as InterruptIndex, 3272 }) 3273 .map_err(DeviceManagerError::CreateInterruptGroup)?, 3274 ) 3275 } else { 3276 None 3277 }; 3278 3279 let memory_manager = self.memory_manager.clone(); 3280 3281 let vfio_pci_device = VfioPciDevice::new( 3282 vfio_name.clone(), 3283 &self.address_manager.vm, 3284 vfio_device, 3285 vfio_container, 3286 self.msi_interrupt_manager.clone(), 3287 legacy_interrupt_group, 3288 device_cfg.iommu, 3289 pci_device_bdf, 3290 Arc::new(move || memory_manager.lock().unwrap().allocate_memory_slot()), 3291 vm_migration::snapshot_from_id(self.snapshot.as_ref(), vfio_name.as_str()), 3292 ) 3293 .map_err(DeviceManagerError::VfioPciCreate)?; 3294 3295 let vfio_pci_device = Arc::new(Mutex::new(vfio_pci_device)); 3296 3297 let new_resources = self.add_pci_device( 3298 vfio_pci_device.clone(), 3299 vfio_pci_device.clone(), 3300 pci_segment_id, 3301 pci_device_bdf, 3302 resources, 3303 )?; 3304 3305 vfio_pci_device 3306 .lock() 3307 .unwrap() 3308 .map_mmio_regions() 3309 .map_err(DeviceManagerError::VfioMapRegion)?; 3310 3311 let mut node = device_node!(vfio_name, vfio_pci_device); 3312 3313 // Update the device tree with correct resource information. 3314 node.resources = new_resources; 3315 node.pci_bdf = Some(pci_device_bdf); 3316 node.pci_device_handle = Some(PciDeviceHandle::Vfio(vfio_pci_device)); 3317 3318 self.device_tree 3319 .lock() 3320 .unwrap() 3321 .insert(vfio_name.clone(), node); 3322 3323 Ok((pci_device_bdf, vfio_name)) 3324 } 3325 3326 fn add_pci_device( 3327 &mut self, 3328 bus_device: Arc<Mutex<dyn BusDevice>>, 3329 pci_device: Arc<Mutex<dyn PciDevice>>, 3330 segment_id: u16, 3331 bdf: PciBdf, 3332 resources: Option<Vec<Resource>>, 3333 ) -> DeviceManagerResult<Vec<Resource>> { 3334 let bars = pci_device 3335 .lock() 3336 .unwrap() 3337 .allocate_bars( 3338 &self.address_manager.allocator, 3339 &mut self.pci_segments[segment_id as usize] 3340 .allocator 3341 .lock() 3342 .unwrap(), 3343 resources, 3344 ) 3345 .map_err(DeviceManagerError::AllocateBars)?; 3346 3347 let mut pci_bus = self.pci_segments[segment_id as usize] 3348 .pci_bus 3349 .lock() 3350 .unwrap(); 3351 3352 pci_bus 3353 .add_device(bdf.device() as u32, pci_device) 3354 .map_err(DeviceManagerError::AddPciDevice)?; 3355 3356 self.bus_devices.push(Arc::clone(&bus_device)); 3357 3358 pci_bus 3359 .register_mapping( 3360 bus_device, 3361 #[cfg(target_arch = "x86_64")] 3362 self.address_manager.io_bus.as_ref(), 3363 self.address_manager.mmio_bus.as_ref(), 3364 bars.clone(), 3365 ) 3366 .map_err(DeviceManagerError::AddPciDevice)?; 3367 3368 let mut new_resources = Vec::new(); 3369 for bar in bars { 3370 new_resources.push(Resource::PciBar { 3371 index: bar.idx(), 3372 base: bar.addr(), 3373 size: bar.size(), 3374 type_: bar.region_type().into(), 3375 prefetchable: bar.prefetchable().into(), 3376 }); 3377 } 3378 3379 Ok(new_resources) 3380 } 3381 3382 fn add_vfio_devices(&mut self) -> DeviceManagerResult<Vec<PciBdf>> { 3383 let mut iommu_attached_device_ids = Vec::new(); 3384 let mut devices = self.config.lock().unwrap().devices.clone(); 3385 3386 if let Some(device_list_cfg) = &mut devices { 3387 for device_cfg in device_list_cfg.iter_mut() { 3388 let (device_id, _) = self.add_passthrough_device(device_cfg)?; 3389 if device_cfg.iommu && self.iommu_device.is_some() { 3390 iommu_attached_device_ids.push(device_id); 3391 } 3392 } 3393 } 3394 3395 // Update the list of devices 3396 self.config.lock().unwrap().devices = devices; 3397 3398 Ok(iommu_attached_device_ids) 3399 } 3400 3401 fn add_vfio_user_device( 3402 &mut self, 3403 device_cfg: &mut UserDeviceConfig, 3404 ) -> DeviceManagerResult<(PciBdf, String)> { 3405 let vfio_user_name = if let Some(id) = &device_cfg.id { 3406 id.clone() 3407 } else { 3408 let id = self.next_device_name(VFIO_USER_DEVICE_NAME_PREFIX)?; 3409 device_cfg.id = Some(id.clone()); 3410 id 3411 }; 3412 3413 let (pci_segment_id, pci_device_bdf, resources) = 3414 self.pci_resources(&vfio_user_name, device_cfg.pci_segment)?; 3415 3416 let legacy_interrupt_group = 3417 if let Some(legacy_interrupt_manager) = &self.legacy_interrupt_manager { 3418 Some( 3419 legacy_interrupt_manager 3420 .create_group(LegacyIrqGroupConfig { 3421 irq: self.pci_segments[pci_segment_id as usize].pci_irq_slots 3422 [pci_device_bdf.device() as usize] 3423 as InterruptIndex, 3424 }) 3425 .map_err(DeviceManagerError::CreateInterruptGroup)?, 3426 ) 3427 } else { 3428 None 3429 }; 3430 3431 let client = Arc::new(Mutex::new( 3432 vfio_user::Client::new(&device_cfg.socket) 3433 .map_err(DeviceManagerError::VfioUserCreateClient)?, 3434 )); 3435 3436 let memory_manager = self.memory_manager.clone(); 3437 3438 let mut vfio_user_pci_device = VfioUserPciDevice::new( 3439 vfio_user_name.clone(), 3440 &self.address_manager.vm, 3441 client.clone(), 3442 self.msi_interrupt_manager.clone(), 3443 legacy_interrupt_group, 3444 pci_device_bdf, 3445 Arc::new(move || memory_manager.lock().unwrap().allocate_memory_slot()), 3446 vm_migration::snapshot_from_id(self.snapshot.as_ref(), vfio_user_name.as_str()), 3447 ) 3448 .map_err(DeviceManagerError::VfioUserCreate)?; 3449 3450 let memory = self.memory_manager.lock().unwrap().guest_memory(); 3451 let vfio_user_mapping = Arc::new(VfioUserDmaMapping::new(client, Arc::new(memory))); 3452 for virtio_mem_device in self.virtio_mem_devices.iter() { 3453 virtio_mem_device 3454 .lock() 3455 .unwrap() 3456 .add_dma_mapping_handler( 3457 VirtioMemMappingSource::Device(pci_device_bdf.into()), 3458 vfio_user_mapping.clone(), 3459 ) 3460 .map_err(DeviceManagerError::AddDmaMappingHandlerVirtioMem)?; 3461 } 3462 3463 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() { 3464 for region in zone.regions() { 3465 vfio_user_pci_device 3466 .dma_map(region) 3467 .map_err(DeviceManagerError::VfioUserDmaMap)?; 3468 } 3469 } 3470 3471 let vfio_user_pci_device = Arc::new(Mutex::new(vfio_user_pci_device)); 3472 3473 let new_resources = self.add_pci_device( 3474 vfio_user_pci_device.clone(), 3475 vfio_user_pci_device.clone(), 3476 pci_segment_id, 3477 pci_device_bdf, 3478 resources, 3479 )?; 3480 3481 // Note it is required to call 'add_pci_device()' in advance to have the list of 3482 // mmio regions provisioned correctly 3483 vfio_user_pci_device 3484 .lock() 3485 .unwrap() 3486 .map_mmio_regions() 3487 .map_err(DeviceManagerError::VfioUserMapRegion)?; 3488 3489 let mut node = device_node!(vfio_user_name, vfio_user_pci_device); 3490 3491 // Update the device tree with correct resource information. 3492 node.resources = new_resources; 3493 node.pci_bdf = Some(pci_device_bdf); 3494 node.pci_device_handle = Some(PciDeviceHandle::VfioUser(vfio_user_pci_device)); 3495 3496 self.device_tree 3497 .lock() 3498 .unwrap() 3499 .insert(vfio_user_name.clone(), node); 3500 3501 Ok((pci_device_bdf, vfio_user_name)) 3502 } 3503 3504 fn add_user_devices(&mut self) -> DeviceManagerResult<Vec<PciBdf>> { 3505 let mut user_devices = self.config.lock().unwrap().user_devices.clone(); 3506 3507 if let Some(device_list_cfg) = &mut user_devices { 3508 for device_cfg in device_list_cfg.iter_mut() { 3509 let (_device_id, _id) = self.add_vfio_user_device(device_cfg)?; 3510 } 3511 } 3512 3513 // Update the list of devices 3514 self.config.lock().unwrap().user_devices = user_devices; 3515 3516 Ok(vec![]) 3517 } 3518 3519 fn add_virtio_pci_device( 3520 &mut self, 3521 virtio_device: Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 3522 iommu_mapping: &Option<Arc<IommuMapping>>, 3523 virtio_device_id: String, 3524 pci_segment_id: u16, 3525 dma_handler: Option<Arc<dyn ExternalDmaMapping>>, 3526 ) -> DeviceManagerResult<PciBdf> { 3527 let id = format!("{VIRTIO_PCI_DEVICE_NAME_PREFIX}-{virtio_device_id}"); 3528 3529 // Add the new virtio-pci node to the device tree. 3530 let mut node = device_node!(id); 3531 node.children = vec![virtio_device_id.clone()]; 3532 3533 let (pci_segment_id, pci_device_bdf, resources) = 3534 self.pci_resources(&id, pci_segment_id)?; 3535 3536 // Update the existing virtio node by setting the parent. 3537 if let Some(node) = self.device_tree.lock().unwrap().get_mut(&virtio_device_id) { 3538 node.parent = Some(id.clone()); 3539 } else { 3540 return Err(DeviceManagerError::MissingNode); 3541 } 3542 3543 // Allows support for one MSI-X vector per queue. It also adds 1 3544 // as we need to take into account the dedicated vector to notify 3545 // about a virtio config change. 3546 let msix_num = (virtio_device.lock().unwrap().queue_max_sizes().len() + 1) as u16; 3547 3548 // Create the AccessPlatform trait from the implementation IommuMapping. 3549 // This will provide address translation for any virtio device sitting 3550 // behind a vIOMMU. 3551 let access_platform: Option<Arc<dyn AccessPlatform>> = if let Some(mapping) = iommu_mapping 3552 { 3553 Some(Arc::new(AccessPlatformMapping::new( 3554 pci_device_bdf.into(), 3555 mapping.clone(), 3556 ))) 3557 } else { 3558 None 3559 }; 3560 3561 let memory = self.memory_manager.lock().unwrap().guest_memory(); 3562 3563 // Map DMA ranges if a DMA handler is available and if the device is 3564 // not attached to a virtual IOMMU. 3565 if let Some(dma_handler) = &dma_handler { 3566 if iommu_mapping.is_some() { 3567 if let Some(iommu) = &self.iommu_device { 3568 iommu 3569 .lock() 3570 .unwrap() 3571 .add_external_mapping(pci_device_bdf.into(), dma_handler.clone()); 3572 } else { 3573 return Err(DeviceManagerError::MissingVirtualIommu); 3574 } 3575 } else { 3576 // Let every virtio-mem device handle the DMA map/unmap through the 3577 // DMA handler provided. 3578 for virtio_mem_device in self.virtio_mem_devices.iter() { 3579 virtio_mem_device 3580 .lock() 3581 .unwrap() 3582 .add_dma_mapping_handler( 3583 VirtioMemMappingSource::Device(pci_device_bdf.into()), 3584 dma_handler.clone(), 3585 ) 3586 .map_err(DeviceManagerError::AddDmaMappingHandlerVirtioMem)?; 3587 } 3588 3589 // Do not register virtio-mem regions, as they are handled directly by 3590 // virtio-mem devices. 3591 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() { 3592 for region in zone.regions() { 3593 let gpa = region.start_addr().0; 3594 let size = region.len(); 3595 dma_handler 3596 .map(gpa, gpa, size) 3597 .map_err(DeviceManagerError::VirtioDmaMap)?; 3598 } 3599 } 3600 } 3601 } 3602 3603 let device_type = virtio_device.lock().unwrap().device_type(); 3604 let virtio_pci_device = Arc::new(Mutex::new( 3605 VirtioPciDevice::new( 3606 id.clone(), 3607 memory, 3608 virtio_device, 3609 msix_num, 3610 access_platform, 3611 &self.msi_interrupt_manager, 3612 pci_device_bdf.into(), 3613 self.activate_evt 3614 .try_clone() 3615 .map_err(DeviceManagerError::EventFd)?, 3616 // All device types *except* virtio block devices should be allocated a 64-bit bar 3617 // The block devices should be given a 32-bit BAR so that they are easily accessible 3618 // to firmware without requiring excessive identity mapping. 3619 // The exception being if not on the default PCI segment. 3620 pci_segment_id > 0 || device_type != VirtioDeviceType::Block as u32, 3621 dma_handler, 3622 self.pending_activations.clone(), 3623 vm_migration::snapshot_from_id(self.snapshot.as_ref(), id.as_str()), 3624 ) 3625 .map_err(DeviceManagerError::VirtioDevice)?, 3626 )); 3627 3628 let new_resources = self.add_pci_device( 3629 virtio_pci_device.clone(), 3630 virtio_pci_device.clone(), 3631 pci_segment_id, 3632 pci_device_bdf, 3633 resources, 3634 )?; 3635 3636 let bar_addr = virtio_pci_device.lock().unwrap().config_bar_addr(); 3637 for (event, addr) in virtio_pci_device.lock().unwrap().ioeventfds(bar_addr) { 3638 let io_addr = IoEventAddress::Mmio(addr); 3639 self.address_manager 3640 .vm 3641 .register_ioevent(event, &io_addr, None) 3642 .map_err(|e| DeviceManagerError::RegisterIoevent(e.into()))?; 3643 } 3644 3645 // Update the device tree with correct resource information. 3646 node.resources = new_resources; 3647 node.migratable = Some(Arc::clone(&virtio_pci_device) as Arc<Mutex<dyn Migratable>>); 3648 node.pci_bdf = Some(pci_device_bdf); 3649 node.pci_device_handle = Some(PciDeviceHandle::Virtio(virtio_pci_device)); 3650 self.device_tree.lock().unwrap().insert(id, node); 3651 3652 Ok(pci_device_bdf) 3653 } 3654 3655 fn add_pvpanic_device( 3656 &mut self, 3657 ) -> DeviceManagerResult<Option<Arc<Mutex<devices::PvPanicDevice>>>> { 3658 let id = String::from(PVPANIC_DEVICE_NAME); 3659 let pci_segment_id = 0x0_u16; 3660 3661 info!("Creating pvpanic device {}", id); 3662 3663 let (pci_segment_id, pci_device_bdf, resources) = 3664 self.pci_resources(&id, pci_segment_id)?; 3665 3666 let snapshot = snapshot_from_id(self.snapshot.as_ref(), id.as_str()); 3667 3668 let pvpanic_device = devices::PvPanicDevice::new(id.clone(), snapshot) 3669 .map_err(DeviceManagerError::PvPanicCreate)?; 3670 3671 let pvpanic_device = Arc::new(Mutex::new(pvpanic_device)); 3672 3673 let new_resources = self.add_pci_device( 3674 pvpanic_device.clone(), 3675 pvpanic_device.clone(), 3676 pci_segment_id, 3677 pci_device_bdf, 3678 resources, 3679 )?; 3680 3681 let mut node = device_node!(id, pvpanic_device); 3682 3683 node.resources = new_resources; 3684 node.pci_bdf = Some(pci_device_bdf); 3685 node.pci_device_handle = None; 3686 3687 self.device_tree.lock().unwrap().insert(id, node); 3688 3689 Ok(Some(pvpanic_device)) 3690 } 3691 3692 fn pci_resources( 3693 &self, 3694 id: &str, 3695 pci_segment_id: u16, 3696 ) -> DeviceManagerResult<(u16, PciBdf, Option<Vec<Resource>>)> { 3697 // Look for the id in the device tree. If it can be found, that means 3698 // the device is being restored, otherwise it's created from scratch. 3699 Ok( 3700 if let Some(node) = self.device_tree.lock().unwrap().get(id) { 3701 info!("Restoring virtio-pci {} resources", id); 3702 let pci_device_bdf: PciBdf = node 3703 .pci_bdf 3704 .ok_or(DeviceManagerError::MissingDeviceNodePciBdf)?; 3705 let pci_segment_id = pci_device_bdf.segment(); 3706 3707 self.pci_segments[pci_segment_id as usize] 3708 .pci_bus 3709 .lock() 3710 .unwrap() 3711 .get_device_id(pci_device_bdf.device() as usize) 3712 .map_err(DeviceManagerError::GetPciDeviceId)?; 3713 3714 (pci_segment_id, pci_device_bdf, Some(node.resources.clone())) 3715 } else { 3716 let pci_device_bdf = 3717 self.pci_segments[pci_segment_id as usize].next_device_bdf()?; 3718 3719 (pci_segment_id, pci_device_bdf, None) 3720 }, 3721 ) 3722 } 3723 3724 #[cfg(target_arch = "x86_64")] 3725 pub fn io_bus(&self) -> &Arc<Bus> { 3726 &self.address_manager.io_bus 3727 } 3728 3729 pub fn mmio_bus(&self) -> &Arc<Bus> { 3730 &self.address_manager.mmio_bus 3731 } 3732 3733 pub fn allocator(&self) -> &Arc<Mutex<SystemAllocator>> { 3734 &self.address_manager.allocator 3735 } 3736 3737 pub fn interrupt_controller(&self) -> Option<Arc<Mutex<dyn InterruptController>>> { 3738 self.interrupt_controller 3739 .as_ref() 3740 .map(|ic| ic.clone() as Arc<Mutex<dyn InterruptController>>) 3741 } 3742 3743 pub(crate) fn pci_segments(&self) -> &Vec<PciSegment> { 3744 &self.pci_segments 3745 } 3746 3747 pub fn console(&self) -> &Arc<Console> { 3748 &self.console 3749 } 3750 3751 #[cfg(target_arch = "aarch64")] 3752 pub fn cmdline_additions(&self) -> &[String] { 3753 self.cmdline_additions.as_slice() 3754 } 3755 3756 pub fn update_memory(&self, new_region: &Arc<GuestRegionMmap>) -> DeviceManagerResult<()> { 3757 for handle in self.virtio_devices.iter() { 3758 handle 3759 .virtio_device 3760 .lock() 3761 .unwrap() 3762 .add_memory_region(new_region) 3763 .map_err(DeviceManagerError::UpdateMemoryForVirtioDevice)?; 3764 3765 if let Some(dma_handler) = &handle.dma_handler { 3766 if !handle.iommu { 3767 let gpa = new_region.start_addr().0; 3768 let size = new_region.len(); 3769 dma_handler 3770 .map(gpa, gpa, size) 3771 .map_err(DeviceManagerError::VirtioDmaMap)?; 3772 } 3773 } 3774 } 3775 3776 // Take care of updating the memory for VFIO PCI devices. 3777 if let Some(vfio_container) = &self.vfio_container { 3778 vfio_container 3779 .vfio_dma_map( 3780 new_region.start_addr().raw_value(), 3781 new_region.len(), 3782 new_region.as_ptr() as u64, 3783 ) 3784 .map_err(DeviceManagerError::UpdateMemoryForVfioPciDevice)?; 3785 } 3786 3787 // Take care of updating the memory for vfio-user devices. 3788 { 3789 let device_tree = self.device_tree.lock().unwrap(); 3790 for pci_device_node in device_tree.pci_devices() { 3791 if let PciDeviceHandle::VfioUser(vfio_user_pci_device) = pci_device_node 3792 .pci_device_handle 3793 .as_ref() 3794 .ok_or(DeviceManagerError::MissingPciDevice)? 3795 { 3796 vfio_user_pci_device 3797 .lock() 3798 .unwrap() 3799 .dma_map(new_region) 3800 .map_err(DeviceManagerError::UpdateMemoryForVfioUserPciDevice)?; 3801 } 3802 } 3803 } 3804 3805 Ok(()) 3806 } 3807 3808 pub fn activate_virtio_devices(&self) -> DeviceManagerResult<()> { 3809 for mut activator in self.pending_activations.lock().unwrap().drain(..) { 3810 activator 3811 .activate() 3812 .map_err(DeviceManagerError::VirtioActivate)?; 3813 } 3814 Ok(()) 3815 } 3816 3817 pub fn notify_hotplug( 3818 &self, 3819 _notification_type: AcpiNotificationFlags, 3820 ) -> DeviceManagerResult<()> { 3821 return self 3822 .ged_notification_device 3823 .as_ref() 3824 .unwrap() 3825 .lock() 3826 .unwrap() 3827 .notify(_notification_type) 3828 .map_err(DeviceManagerError::HotPlugNotification); 3829 } 3830 3831 pub fn add_device( 3832 &mut self, 3833 device_cfg: &mut DeviceConfig, 3834 ) -> DeviceManagerResult<PciDeviceInfo> { 3835 self.validate_identifier(&device_cfg.id)?; 3836 3837 if device_cfg.iommu && !self.is_iommu_segment(device_cfg.pci_segment) { 3838 return Err(DeviceManagerError::InvalidIommuHotplug); 3839 } 3840 3841 let (bdf, device_name) = self.add_passthrough_device(device_cfg)?; 3842 3843 // Update the PCIU bitmap 3844 self.pci_segments[device_cfg.pci_segment as usize].pci_devices_up |= 1 << bdf.device(); 3845 3846 Ok(PciDeviceInfo { 3847 id: device_name, 3848 bdf, 3849 }) 3850 } 3851 3852 pub fn add_user_device( 3853 &mut self, 3854 device_cfg: &mut UserDeviceConfig, 3855 ) -> DeviceManagerResult<PciDeviceInfo> { 3856 self.validate_identifier(&device_cfg.id)?; 3857 3858 let (bdf, device_name) = self.add_vfio_user_device(device_cfg)?; 3859 3860 // Update the PCIU bitmap 3861 self.pci_segments[device_cfg.pci_segment as usize].pci_devices_up |= 1 << bdf.device(); 3862 3863 Ok(PciDeviceInfo { 3864 id: device_name, 3865 bdf, 3866 }) 3867 } 3868 3869 pub fn remove_device(&mut self, id: String) -> DeviceManagerResult<()> { 3870 // The node can be directly a PCI node in case the 'id' refers to a 3871 // VFIO device or a virtio-pci one. 3872 // In case the 'id' refers to a virtio device, we must find the PCI 3873 // node by looking at the parent. 3874 let device_tree = self.device_tree.lock().unwrap(); 3875 let node = device_tree 3876 .get(&id) 3877 .ok_or(DeviceManagerError::UnknownDeviceId(id))?; 3878 3879 let pci_device_node = if node.pci_bdf.is_some() && node.pci_device_handle.is_some() { 3880 node 3881 } else { 3882 let parent = node 3883 .parent 3884 .as_ref() 3885 .ok_or(DeviceManagerError::MissingNode)?; 3886 device_tree 3887 .get(parent) 3888 .ok_or(DeviceManagerError::MissingNode)? 3889 }; 3890 3891 let pci_device_bdf: PciBdf = pci_device_node 3892 .pci_bdf 3893 .ok_or(DeviceManagerError::MissingDeviceNodePciBdf)?; 3894 let pci_segment_id = pci_device_bdf.segment(); 3895 3896 let pci_device_handle = pci_device_node 3897 .pci_device_handle 3898 .as_ref() 3899 .ok_or(DeviceManagerError::MissingPciDevice)?; 3900 #[allow(irrefutable_let_patterns)] 3901 if let PciDeviceHandle::Virtio(virtio_pci_device) = pci_device_handle { 3902 let device_type = VirtioDeviceType::from( 3903 virtio_pci_device 3904 .lock() 3905 .unwrap() 3906 .virtio_device() 3907 .lock() 3908 .unwrap() 3909 .device_type(), 3910 ); 3911 match device_type { 3912 VirtioDeviceType::Net 3913 | VirtioDeviceType::Block 3914 | VirtioDeviceType::Pmem 3915 | VirtioDeviceType::Fs 3916 | VirtioDeviceType::Vsock => {} 3917 _ => return Err(DeviceManagerError::RemovalNotAllowed(device_type)), 3918 } 3919 } 3920 3921 // Update the PCID bitmap 3922 self.pci_segments[pci_segment_id as usize].pci_devices_down |= 1 << pci_device_bdf.device(); 3923 3924 Ok(()) 3925 } 3926 3927 pub fn eject_device(&mut self, pci_segment_id: u16, device_id: u8) -> DeviceManagerResult<()> { 3928 info!( 3929 "Ejecting device_id = {} on segment_id={}", 3930 device_id, pci_segment_id 3931 ); 3932 3933 // Convert the device ID into the corresponding b/d/f. 3934 let pci_device_bdf = PciBdf::new(pci_segment_id, 0, device_id, 0); 3935 3936 // Give the PCI device ID back to the PCI bus. 3937 self.pci_segments[pci_segment_id as usize] 3938 .pci_bus 3939 .lock() 3940 .unwrap() 3941 .put_device_id(device_id as usize) 3942 .map_err(DeviceManagerError::PutPciDeviceId)?; 3943 3944 // Remove the device from the device tree along with its children. 3945 let mut device_tree = self.device_tree.lock().unwrap(); 3946 let pci_device_node = device_tree 3947 .remove_node_by_pci_bdf(pci_device_bdf) 3948 .ok_or(DeviceManagerError::MissingPciDevice)?; 3949 3950 // For VFIO and vfio-user the PCI device id is the id. 3951 // For virtio we overwrite it later as we want the id of the 3952 // underlying device. 3953 let mut id = pci_device_node.id; 3954 let pci_device_handle = pci_device_node 3955 .pci_device_handle 3956 .ok_or(DeviceManagerError::MissingPciDevice)?; 3957 if matches!(pci_device_handle, PciDeviceHandle::Virtio(_)) { 3958 // The virtio-pci device has a single child 3959 if !pci_device_node.children.is_empty() { 3960 assert_eq!(pci_device_node.children.len(), 1); 3961 let child_id = &pci_device_node.children[0]; 3962 id = child_id.clone(); 3963 } 3964 } 3965 for child in pci_device_node.children.iter() { 3966 device_tree.remove(child); 3967 } 3968 3969 let mut iommu_attached = false; 3970 if let Some((_, iommu_attached_devices)) = &self.iommu_attached_devices { 3971 if iommu_attached_devices.contains(&pci_device_bdf) { 3972 iommu_attached = true; 3973 } 3974 } 3975 3976 let (pci_device, bus_device, virtio_device, remove_dma_handler) = match pci_device_handle { 3977 // No need to remove any virtio-mem mapping here as the container outlives all devices 3978 PciDeviceHandle::Vfio(vfio_pci_device) => ( 3979 Arc::clone(&vfio_pci_device) as Arc<Mutex<dyn PciDevice>>, 3980 Arc::clone(&vfio_pci_device) as Arc<Mutex<dyn BusDevice>>, 3981 None as Option<Arc<Mutex<dyn virtio_devices::VirtioDevice>>>, 3982 false, 3983 ), 3984 PciDeviceHandle::Virtio(virtio_pci_device) => { 3985 let dev = virtio_pci_device.lock().unwrap(); 3986 let bar_addr = dev.config_bar_addr(); 3987 for (event, addr) in dev.ioeventfds(bar_addr) { 3988 let io_addr = IoEventAddress::Mmio(addr); 3989 self.address_manager 3990 .vm 3991 .unregister_ioevent(event, &io_addr) 3992 .map_err(|e| DeviceManagerError::UnRegisterIoevent(e.into()))?; 3993 } 3994 3995 if let Some(dma_handler) = dev.dma_handler() { 3996 if !iommu_attached { 3997 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() { 3998 for region in zone.regions() { 3999 let iova = region.start_addr().0; 4000 let size = region.len(); 4001 dma_handler 4002 .unmap(iova, size) 4003 .map_err(DeviceManagerError::VirtioDmaUnmap)?; 4004 } 4005 } 4006 } 4007 } 4008 4009 ( 4010 Arc::clone(&virtio_pci_device) as Arc<Mutex<dyn PciDevice>>, 4011 Arc::clone(&virtio_pci_device) as Arc<Mutex<dyn BusDevice>>, 4012 Some(dev.virtio_device()), 4013 dev.dma_handler().is_some() && !iommu_attached, 4014 ) 4015 } 4016 PciDeviceHandle::VfioUser(vfio_user_pci_device) => { 4017 let mut dev = vfio_user_pci_device.lock().unwrap(); 4018 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() { 4019 for region in zone.regions() { 4020 dev.dma_unmap(region) 4021 .map_err(DeviceManagerError::VfioUserDmaUnmap)?; 4022 } 4023 } 4024 4025 ( 4026 Arc::clone(&vfio_user_pci_device) as Arc<Mutex<dyn PciDevice>>, 4027 Arc::clone(&vfio_user_pci_device) as Arc<Mutex<dyn BusDevice>>, 4028 None as Option<Arc<Mutex<dyn virtio_devices::VirtioDevice>>>, 4029 true, 4030 ) 4031 } 4032 }; 4033 4034 if remove_dma_handler { 4035 for virtio_mem_device in self.virtio_mem_devices.iter() { 4036 virtio_mem_device 4037 .lock() 4038 .unwrap() 4039 .remove_dma_mapping_handler(VirtioMemMappingSource::Device( 4040 pci_device_bdf.into(), 4041 )) 4042 .map_err(DeviceManagerError::RemoveDmaMappingHandlerVirtioMem)?; 4043 } 4044 } 4045 4046 // Free the allocated BARs 4047 pci_device 4048 .lock() 4049 .unwrap() 4050 .free_bars( 4051 &mut self.address_manager.allocator.lock().unwrap(), 4052 &mut self.pci_segments[pci_segment_id as usize] 4053 .allocator 4054 .lock() 4055 .unwrap(), 4056 ) 4057 .map_err(DeviceManagerError::FreePciBars)?; 4058 4059 // Remove the device from the PCI bus 4060 self.pci_segments[pci_segment_id as usize] 4061 .pci_bus 4062 .lock() 4063 .unwrap() 4064 .remove_by_device(&pci_device) 4065 .map_err(DeviceManagerError::RemoveDeviceFromPciBus)?; 4066 4067 #[cfg(target_arch = "x86_64")] 4068 // Remove the device from the IO bus 4069 self.io_bus() 4070 .remove_by_device(&bus_device) 4071 .map_err(DeviceManagerError::RemoveDeviceFromIoBus)?; 4072 4073 // Remove the device from the MMIO bus 4074 self.mmio_bus() 4075 .remove_by_device(&bus_device) 4076 .map_err(DeviceManagerError::RemoveDeviceFromMmioBus)?; 4077 4078 // Remove the device from the list of BusDevice held by the 4079 // DeviceManager. 4080 self.bus_devices 4081 .retain(|dev| !Arc::ptr_eq(dev, &bus_device)); 4082 4083 // Shutdown and remove the underlying virtio-device if present 4084 if let Some(virtio_device) = virtio_device { 4085 for mapping in virtio_device.lock().unwrap().userspace_mappings() { 4086 self.memory_manager 4087 .lock() 4088 .unwrap() 4089 .remove_userspace_mapping( 4090 mapping.addr.raw_value(), 4091 mapping.len, 4092 mapping.host_addr, 4093 mapping.mergeable, 4094 mapping.mem_slot, 4095 ) 4096 .map_err(DeviceManagerError::MemoryManager)?; 4097 } 4098 4099 virtio_device.lock().unwrap().shutdown(); 4100 4101 self.virtio_devices 4102 .retain(|handler| !Arc::ptr_eq(&handler.virtio_device, &virtio_device)); 4103 } 4104 4105 event!( 4106 "vm", 4107 "device-removed", 4108 "id", 4109 &id, 4110 "bdf", 4111 pci_device_bdf.to_string() 4112 ); 4113 4114 // At this point, the device has been removed from all the list and 4115 // buses where it was stored. At the end of this function, after 4116 // any_device, bus_device and pci_device are released, the actual 4117 // device will be dropped. 4118 Ok(()) 4119 } 4120 4121 fn hotplug_virtio_pci_device( 4122 &mut self, 4123 handle: MetaVirtioDevice, 4124 ) -> DeviceManagerResult<PciDeviceInfo> { 4125 // Add the virtio device to the device manager list. This is important 4126 // as the list is used to notify virtio devices about memory updates 4127 // for instance. 4128 self.virtio_devices.push(handle.clone()); 4129 4130 let mapping: Option<Arc<IommuMapping>> = if handle.iommu { 4131 self.iommu_mapping.clone() 4132 } else { 4133 None 4134 }; 4135 4136 let bdf = self.add_virtio_pci_device( 4137 handle.virtio_device, 4138 &mapping, 4139 handle.id.clone(), 4140 handle.pci_segment, 4141 handle.dma_handler, 4142 )?; 4143 4144 // Update the PCIU bitmap 4145 self.pci_segments[handle.pci_segment as usize].pci_devices_up |= 1 << bdf.device(); 4146 4147 Ok(PciDeviceInfo { id: handle.id, bdf }) 4148 } 4149 4150 fn is_iommu_segment(&self, pci_segment_id: u16) -> bool { 4151 self.config 4152 .lock() 4153 .as_ref() 4154 .unwrap() 4155 .platform 4156 .as_ref() 4157 .map(|pc| { 4158 pc.iommu_segments 4159 .as_ref() 4160 .map(|v| v.contains(&pci_segment_id)) 4161 .unwrap_or_default() 4162 }) 4163 .unwrap_or_default() 4164 } 4165 4166 pub fn add_disk(&mut self, disk_cfg: &mut DiskConfig) -> DeviceManagerResult<PciDeviceInfo> { 4167 self.validate_identifier(&disk_cfg.id)?; 4168 4169 if disk_cfg.iommu && !self.is_iommu_segment(disk_cfg.pci_segment) { 4170 return Err(DeviceManagerError::InvalidIommuHotplug); 4171 } 4172 4173 let device = self.make_virtio_block_device(disk_cfg)?; 4174 self.hotplug_virtio_pci_device(device) 4175 } 4176 4177 pub fn add_fs(&mut self, fs_cfg: &mut FsConfig) -> DeviceManagerResult<PciDeviceInfo> { 4178 self.validate_identifier(&fs_cfg.id)?; 4179 4180 let device = self.make_virtio_fs_device(fs_cfg)?; 4181 self.hotplug_virtio_pci_device(device) 4182 } 4183 4184 pub fn add_pmem(&mut self, pmem_cfg: &mut PmemConfig) -> DeviceManagerResult<PciDeviceInfo> { 4185 self.validate_identifier(&pmem_cfg.id)?; 4186 4187 if pmem_cfg.iommu && !self.is_iommu_segment(pmem_cfg.pci_segment) { 4188 return Err(DeviceManagerError::InvalidIommuHotplug); 4189 } 4190 4191 let device = self.make_virtio_pmem_device(pmem_cfg)?; 4192 self.hotplug_virtio_pci_device(device) 4193 } 4194 4195 pub fn add_net(&mut self, net_cfg: &mut NetConfig) -> DeviceManagerResult<PciDeviceInfo> { 4196 self.validate_identifier(&net_cfg.id)?; 4197 4198 if net_cfg.iommu && !self.is_iommu_segment(net_cfg.pci_segment) { 4199 return Err(DeviceManagerError::InvalidIommuHotplug); 4200 } 4201 4202 let device = self.make_virtio_net_device(net_cfg)?; 4203 self.hotplug_virtio_pci_device(device) 4204 } 4205 4206 pub fn add_vdpa(&mut self, vdpa_cfg: &mut VdpaConfig) -> DeviceManagerResult<PciDeviceInfo> { 4207 self.validate_identifier(&vdpa_cfg.id)?; 4208 4209 if vdpa_cfg.iommu && !self.is_iommu_segment(vdpa_cfg.pci_segment) { 4210 return Err(DeviceManagerError::InvalidIommuHotplug); 4211 } 4212 4213 let device = self.make_vdpa_device(vdpa_cfg)?; 4214 self.hotplug_virtio_pci_device(device) 4215 } 4216 4217 pub fn add_vsock(&mut self, vsock_cfg: &mut VsockConfig) -> DeviceManagerResult<PciDeviceInfo> { 4218 self.validate_identifier(&vsock_cfg.id)?; 4219 4220 if vsock_cfg.iommu && !self.is_iommu_segment(vsock_cfg.pci_segment) { 4221 return Err(DeviceManagerError::InvalidIommuHotplug); 4222 } 4223 4224 let device = self.make_virtio_vsock_device(vsock_cfg)?; 4225 self.hotplug_virtio_pci_device(device) 4226 } 4227 4228 pub fn counters(&self) -> HashMap<String, HashMap<&'static str, Wrapping<u64>>> { 4229 let mut counters = HashMap::new(); 4230 4231 for handle in &self.virtio_devices { 4232 let virtio_device = handle.virtio_device.lock().unwrap(); 4233 if let Some(device_counters) = virtio_device.counters() { 4234 counters.insert(handle.id.clone(), device_counters.clone()); 4235 } 4236 } 4237 4238 counters 4239 } 4240 4241 pub fn resize_balloon(&mut self, size: u64) -> DeviceManagerResult<()> { 4242 if let Some(balloon) = &self.balloon { 4243 return balloon 4244 .lock() 4245 .unwrap() 4246 .resize(size) 4247 .map_err(DeviceManagerError::VirtioBalloonResize); 4248 } 4249 4250 warn!("No balloon setup: Can't resize the balloon"); 4251 Err(DeviceManagerError::MissingVirtioBalloon) 4252 } 4253 4254 pub fn balloon_size(&self) -> u64 { 4255 if let Some(balloon) = &self.balloon { 4256 return balloon.lock().unwrap().get_actual(); 4257 } 4258 4259 0 4260 } 4261 4262 pub fn device_tree(&self) -> Arc<Mutex<DeviceTree>> { 4263 self.device_tree.clone() 4264 } 4265 4266 #[cfg(target_arch = "x86_64")] 4267 pub fn notify_power_button(&self) -> DeviceManagerResult<()> { 4268 self.ged_notification_device 4269 .as_ref() 4270 .unwrap() 4271 .lock() 4272 .unwrap() 4273 .notify(AcpiNotificationFlags::POWER_BUTTON_CHANGED) 4274 .map_err(DeviceManagerError::PowerButtonNotification) 4275 } 4276 4277 #[cfg(target_arch = "aarch64")] 4278 pub fn notify_power_button(&self) -> DeviceManagerResult<()> { 4279 // There are two use cases: 4280 // 1. Users will use direct kernel boot with device tree. 4281 // 2. Users will use ACPI+UEFI boot. 4282 4283 // Trigger a GPIO pin 3 event to satisify use case 1. 4284 self.gpio_device 4285 .as_ref() 4286 .unwrap() 4287 .lock() 4288 .unwrap() 4289 .trigger_key(3) 4290 .map_err(DeviceManagerError::AArch64PowerButtonNotification)?; 4291 // Trigger a GED power button event to satisify use case 2. 4292 return self 4293 .ged_notification_device 4294 .as_ref() 4295 .unwrap() 4296 .lock() 4297 .unwrap() 4298 .notify(AcpiNotificationFlags::POWER_BUTTON_CHANGED) 4299 .map_err(DeviceManagerError::PowerButtonNotification); 4300 } 4301 4302 pub fn iommu_attached_devices(&self) -> &Option<(PciBdf, Vec<PciBdf>)> { 4303 &self.iommu_attached_devices 4304 } 4305 4306 fn validate_identifier(&self, id: &Option<String>) -> DeviceManagerResult<()> { 4307 if let Some(id) = id { 4308 if id.starts_with("__") { 4309 return Err(DeviceManagerError::InvalidIdentifier(id.clone())); 4310 } 4311 4312 if self.device_tree.lock().unwrap().contains_key(id) { 4313 return Err(DeviceManagerError::IdentifierNotUnique(id.clone())); 4314 } 4315 } 4316 4317 Ok(()) 4318 } 4319 4320 pub(crate) fn acpi_platform_addresses(&self) -> &AcpiPlatformAddresses { 4321 &self.acpi_platform_addresses 4322 } 4323 } 4324 4325 fn numa_node_id_from_memory_zone_id(numa_nodes: &NumaNodes, memory_zone_id: &str) -> Option<u32> { 4326 for (numa_node_id, numa_node) in numa_nodes.iter() { 4327 if numa_node.memory_zones.contains(&memory_zone_id.to_owned()) { 4328 return Some(*numa_node_id); 4329 } 4330 } 4331 4332 None 4333 } 4334 4335 struct TpmDevice {} 4336 4337 impl Aml for TpmDevice { 4338 fn to_aml_bytes(&self, sink: &mut dyn acpi_tables::AmlSink) { 4339 aml::Device::new( 4340 "TPM2".into(), 4341 vec![ 4342 &aml::Name::new("_HID".into(), &"MSFT0101"), 4343 &aml::Name::new("_STA".into(), &(0xF_usize)), 4344 &aml::Name::new( 4345 "_CRS".into(), 4346 &aml::ResourceTemplate::new(vec![&aml::Memory32Fixed::new( 4347 true, 4348 layout::TPM_START.0 as u32, 4349 layout::TPM_SIZE as u32, 4350 )]), 4351 ), 4352 ], 4353 ) 4354 .to_aml_bytes(sink) 4355 } 4356 } 4357 4358 impl Aml for DeviceManager { 4359 fn to_aml_bytes(&self, sink: &mut dyn acpi_tables::AmlSink) { 4360 #[cfg(target_arch = "aarch64")] 4361 use arch::aarch64::DeviceInfoForFdt; 4362 4363 let mut pci_scan_methods = Vec::new(); 4364 for i in 0..self.pci_segments.len() { 4365 pci_scan_methods.push(aml::MethodCall::new( 4366 format!("\\_SB_.PC{i:02X}.PCNT").as_str().into(), 4367 vec![], 4368 )); 4369 } 4370 let mut pci_scan_inner: Vec<&dyn Aml> = Vec::new(); 4371 for method in &pci_scan_methods { 4372 pci_scan_inner.push(method) 4373 } 4374 4375 // PCI hotplug controller 4376 aml::Device::new( 4377 "_SB_.PHPR".into(), 4378 vec![ 4379 &aml::Name::new("_HID".into(), &aml::EISAName::new("PNP0A06")), 4380 &aml::Name::new("_STA".into(), &0x0bu8), 4381 &aml::Name::new("_UID".into(), &"PCI Hotplug Controller"), 4382 &aml::Mutex::new("BLCK".into(), 0), 4383 &aml::Name::new( 4384 "_CRS".into(), 4385 &aml::ResourceTemplate::new(vec![&aml::AddressSpace::new_memory( 4386 aml::AddressSpaceCachable::NotCacheable, 4387 true, 4388 self.acpi_address.0, 4389 self.acpi_address.0 + DEVICE_MANAGER_ACPI_SIZE as u64 - 1, 4390 None, 4391 )]), 4392 ), 4393 // OpRegion and Fields map MMIO range into individual field values 4394 &aml::OpRegion::new( 4395 "PCST".into(), 4396 aml::OpRegionSpace::SystemMemory, 4397 &(self.acpi_address.0 as usize), 4398 &DEVICE_MANAGER_ACPI_SIZE, 4399 ), 4400 &aml::Field::new( 4401 "PCST".into(), 4402 aml::FieldAccessType::DWord, 4403 aml::FieldLockRule::NoLock, 4404 aml::FieldUpdateRule::WriteAsZeroes, 4405 vec![ 4406 aml::FieldEntry::Named(*b"PCIU", 32), 4407 aml::FieldEntry::Named(*b"PCID", 32), 4408 aml::FieldEntry::Named(*b"B0EJ", 32), 4409 aml::FieldEntry::Named(*b"PSEG", 32), 4410 ], 4411 ), 4412 &aml::Method::new( 4413 "PCEJ".into(), 4414 2, 4415 true, 4416 vec![ 4417 // Take lock defined above 4418 &aml::Acquire::new("BLCK".into(), 0xffff), 4419 // Choose the current segment 4420 &aml::Store::new(&aml::Path::new("PSEG"), &aml::Arg(1)), 4421 // Write PCI bus number (in first argument) to I/O port via field 4422 &aml::ShiftLeft::new(&aml::Path::new("B0EJ"), &aml::ONE, &aml::Arg(0)), 4423 // Release lock 4424 &aml::Release::new("BLCK".into()), 4425 // Return 0 4426 &aml::Return::new(&aml::ZERO), 4427 ], 4428 ), 4429 &aml::Method::new("PSCN".into(), 0, true, pci_scan_inner), 4430 ], 4431 ) 4432 .to_aml_bytes(sink); 4433 4434 for segment in &self.pci_segments { 4435 segment.to_aml_bytes(sink); 4436 } 4437 4438 let mut mbrd_memory = Vec::new(); 4439 4440 for segment in &self.pci_segments { 4441 mbrd_memory.push(aml::Memory32Fixed::new( 4442 true, 4443 segment.mmio_config_address as u32, 4444 layout::PCI_MMIO_CONFIG_SIZE_PER_SEGMENT as u32, 4445 )) 4446 } 4447 4448 let mut mbrd_memory_refs = Vec::new(); 4449 for mbrd_memory_ref in &mbrd_memory { 4450 mbrd_memory_refs.push(mbrd_memory_ref as &dyn Aml); 4451 } 4452 4453 aml::Device::new( 4454 "_SB_.MBRD".into(), 4455 vec![ 4456 &aml::Name::new("_HID".into(), &aml::EISAName::new("PNP0C02")), 4457 &aml::Name::new("_UID".into(), &aml::ZERO), 4458 &aml::Name::new("_CRS".into(), &aml::ResourceTemplate::new(mbrd_memory_refs)), 4459 ], 4460 ) 4461 .to_aml_bytes(sink); 4462 4463 // Serial device 4464 #[cfg(target_arch = "x86_64")] 4465 let serial_irq = 4; 4466 #[cfg(target_arch = "aarch64")] 4467 let serial_irq = 4468 if self.config.lock().unwrap().serial.clone().mode != ConsoleOutputMode::Off { 4469 self.get_device_info() 4470 .clone() 4471 .get(&(DeviceType::Serial, DeviceType::Serial.to_string())) 4472 .unwrap() 4473 .irq() 4474 } else { 4475 // If serial is turned off, add a fake device with invalid irq. 4476 31 4477 }; 4478 if self.config.lock().unwrap().serial.mode != ConsoleOutputMode::Off { 4479 aml::Device::new( 4480 "_SB_.COM1".into(), 4481 vec![ 4482 &aml::Name::new( 4483 "_HID".into(), 4484 #[cfg(target_arch = "x86_64")] 4485 &aml::EISAName::new("PNP0501"), 4486 #[cfg(target_arch = "aarch64")] 4487 &"ARMH0011", 4488 ), 4489 &aml::Name::new("_UID".into(), &aml::ZERO), 4490 &aml::Name::new("_DDN".into(), &"COM1"), 4491 &aml::Name::new( 4492 "_CRS".into(), 4493 &aml::ResourceTemplate::new(vec![ 4494 &aml::Interrupt::new(true, true, false, false, serial_irq), 4495 #[cfg(target_arch = "x86_64")] 4496 &aml::IO::new(0x3f8, 0x3f8, 0, 0x8), 4497 #[cfg(target_arch = "aarch64")] 4498 &aml::Memory32Fixed::new( 4499 true, 4500 arch::layout::LEGACY_SERIAL_MAPPED_IO_START.raw_value() as u32, 4501 MMIO_LEN as u32, 4502 ), 4503 ]), 4504 ), 4505 ], 4506 ) 4507 .to_aml_bytes(sink); 4508 } 4509 4510 aml::Name::new("_S5_".into(), &aml::Package::new(vec![&5u8])).to_aml_bytes(sink); 4511 4512 aml::Device::new( 4513 "_SB_.PWRB".into(), 4514 vec![ 4515 &aml::Name::new("_HID".into(), &aml::EISAName::new("PNP0C0C")), 4516 &aml::Name::new("_UID".into(), &aml::ZERO), 4517 ], 4518 ) 4519 .to_aml_bytes(sink); 4520 4521 if self.config.lock().unwrap().tpm.is_some() { 4522 // Add tpm device 4523 TpmDevice {}.to_aml_bytes(sink); 4524 } 4525 4526 self.ged_notification_device 4527 .as_ref() 4528 .unwrap() 4529 .lock() 4530 .unwrap() 4531 .to_aml_bytes(sink) 4532 } 4533 } 4534 4535 impl Pausable for DeviceManager { 4536 fn pause(&mut self) -> result::Result<(), MigratableError> { 4537 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4538 if let Some(migratable) = &device_node.migratable { 4539 migratable.lock().unwrap().pause()?; 4540 } 4541 } 4542 // On AArch64, the pause of device manager needs to trigger 4543 // a "pause" of GIC, which will flush the GIC pending tables 4544 // and ITS tables to guest RAM. 4545 #[cfg(target_arch = "aarch64")] 4546 { 4547 self.get_interrupt_controller() 4548 .unwrap() 4549 .lock() 4550 .unwrap() 4551 .pause()?; 4552 }; 4553 4554 Ok(()) 4555 } 4556 4557 fn resume(&mut self) -> result::Result<(), MigratableError> { 4558 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4559 if let Some(migratable) = &device_node.migratable { 4560 migratable.lock().unwrap().resume()?; 4561 } 4562 } 4563 4564 Ok(()) 4565 } 4566 } 4567 4568 impl Snapshottable for DeviceManager { 4569 fn id(&self) -> String { 4570 DEVICE_MANAGER_SNAPSHOT_ID.to_string() 4571 } 4572 4573 fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> { 4574 let mut snapshot = Snapshot::from_data(SnapshotData::new_from_state(&self.state())?); 4575 4576 // We aggregate all devices snapshots. 4577 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4578 if let Some(migratable) = &device_node.migratable { 4579 let mut migratable = migratable.lock().unwrap(); 4580 snapshot.add_snapshot(migratable.id(), migratable.snapshot()?); 4581 } 4582 } 4583 4584 Ok(snapshot) 4585 } 4586 } 4587 4588 impl Transportable for DeviceManager {} 4589 4590 impl Migratable for DeviceManager { 4591 fn start_dirty_log(&mut self) -> std::result::Result<(), MigratableError> { 4592 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4593 if let Some(migratable) = &device_node.migratable { 4594 migratable.lock().unwrap().start_dirty_log()?; 4595 } 4596 } 4597 Ok(()) 4598 } 4599 4600 fn stop_dirty_log(&mut self) -> std::result::Result<(), MigratableError> { 4601 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4602 if let Some(migratable) = &device_node.migratable { 4603 migratable.lock().unwrap().stop_dirty_log()?; 4604 } 4605 } 4606 Ok(()) 4607 } 4608 4609 fn dirty_log(&mut self) -> std::result::Result<MemoryRangeTable, MigratableError> { 4610 let mut tables = Vec::new(); 4611 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4612 if let Some(migratable) = &device_node.migratable { 4613 tables.push(migratable.lock().unwrap().dirty_log()?); 4614 } 4615 } 4616 Ok(MemoryRangeTable::new_from_tables(tables)) 4617 } 4618 4619 fn start_migration(&mut self) -> std::result::Result<(), MigratableError> { 4620 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4621 if let Some(migratable) = &device_node.migratable { 4622 migratable.lock().unwrap().start_migration()?; 4623 } 4624 } 4625 Ok(()) 4626 } 4627 4628 fn complete_migration(&mut self) -> std::result::Result<(), MigratableError> { 4629 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4630 if let Some(migratable) = &device_node.migratable { 4631 migratable.lock().unwrap().complete_migration()?; 4632 } 4633 } 4634 Ok(()) 4635 } 4636 } 4637 4638 const PCIU_FIELD_OFFSET: u64 = 0; 4639 const PCID_FIELD_OFFSET: u64 = 4; 4640 const B0EJ_FIELD_OFFSET: u64 = 8; 4641 const PSEG_FIELD_OFFSET: u64 = 12; 4642 const PCIU_FIELD_SIZE: usize = 4; 4643 const PCID_FIELD_SIZE: usize = 4; 4644 const B0EJ_FIELD_SIZE: usize = 4; 4645 const PSEG_FIELD_SIZE: usize = 4; 4646 4647 impl BusDevice for DeviceManager { 4648 fn read(&mut self, base: u64, offset: u64, data: &mut [u8]) { 4649 match offset { 4650 PCIU_FIELD_OFFSET => { 4651 assert!(data.len() == PCIU_FIELD_SIZE); 4652 data.copy_from_slice( 4653 &self.pci_segments[self.selected_segment] 4654 .pci_devices_up 4655 .to_le_bytes(), 4656 ); 4657 // Clear the PCIU bitmap 4658 self.pci_segments[self.selected_segment].pci_devices_up = 0; 4659 } 4660 PCID_FIELD_OFFSET => { 4661 assert!(data.len() == PCID_FIELD_SIZE); 4662 data.copy_from_slice( 4663 &self.pci_segments[self.selected_segment] 4664 .pci_devices_down 4665 .to_le_bytes(), 4666 ); 4667 // Clear the PCID bitmap 4668 self.pci_segments[self.selected_segment].pci_devices_down = 0; 4669 } 4670 B0EJ_FIELD_OFFSET => { 4671 assert!(data.len() == B0EJ_FIELD_SIZE); 4672 // Always return an empty bitmap since the eject is always 4673 // taken care of right away during a write access. 4674 data.fill(0); 4675 } 4676 PSEG_FIELD_OFFSET => { 4677 assert_eq!(data.len(), PSEG_FIELD_SIZE); 4678 data.copy_from_slice(&(self.selected_segment as u32).to_le_bytes()); 4679 } 4680 _ => error!( 4681 "Accessing unknown location at base 0x{:x}, offset 0x{:x}", 4682 base, offset 4683 ), 4684 } 4685 4686 debug!( 4687 "PCI_HP_REG_R: base 0x{:x}, offset 0x{:x}, data {:?}", 4688 base, offset, data 4689 ) 4690 } 4691 4692 fn write(&mut self, base: u64, offset: u64, data: &[u8]) -> Option<Arc<std::sync::Barrier>> { 4693 match offset { 4694 B0EJ_FIELD_OFFSET => { 4695 assert!(data.len() == B0EJ_FIELD_SIZE); 4696 let mut data_array: [u8; 4] = [0, 0, 0, 0]; 4697 data_array.copy_from_slice(data); 4698 let mut slot_bitmap = u32::from_le_bytes(data_array); 4699 4700 while slot_bitmap > 0 { 4701 let slot_id = slot_bitmap.trailing_zeros(); 4702 if let Err(e) = self.eject_device(self.selected_segment as u16, slot_id as u8) { 4703 error!("Failed ejecting device {}: {:?}", slot_id, e); 4704 } 4705 slot_bitmap &= !(1 << slot_id); 4706 } 4707 } 4708 PSEG_FIELD_OFFSET => { 4709 assert_eq!(data.len(), PSEG_FIELD_SIZE); 4710 let mut data_array: [u8; 4] = [0, 0, 0, 0]; 4711 data_array.copy_from_slice(data); 4712 let selected_segment = u32::from_le_bytes(data_array) as usize; 4713 if selected_segment >= self.pci_segments.len() { 4714 error!( 4715 "Segment selection out of range: {} >= {}", 4716 selected_segment, 4717 self.pci_segments.len() 4718 ); 4719 return None; 4720 } 4721 self.selected_segment = selected_segment; 4722 } 4723 _ => error!( 4724 "Accessing unknown location at base 0x{:x}, offset 0x{:x}", 4725 base, offset 4726 ), 4727 } 4728 4729 debug!( 4730 "PCI_HP_REG_W: base 0x{:x}, offset 0x{:x}, data {:?}", 4731 base, offset, data 4732 ); 4733 4734 None 4735 } 4736 } 4737 4738 impl Drop for DeviceManager { 4739 fn drop(&mut self) { 4740 for handle in self.virtio_devices.drain(..) { 4741 handle.virtio_device.lock().unwrap().shutdown(); 4742 } 4743 4744 if let Some(termios) = *self.original_termios_opt.lock().unwrap() { 4745 // SAFETY: FFI call 4746 let _ = unsafe { tcsetattr(stdout().lock().as_raw_fd(), TCSANOW, &termios) }; 4747 } 4748 } 4749 } 4750