1 // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 // 3 // Portions Copyright 2017 The Chromium OS Authors. All rights reserved. 4 // Use of this source code is governed by a BSD-style license that can be 5 // found in the LICENSE-BSD-3-Clause file. 6 // 7 // Copyright © 2019 Intel Corporation 8 // 9 // SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause 10 // 11 12 use crate::config::{ 13 ConsoleOutputMode, DeviceConfig, DiskConfig, FsConfig, NetConfig, PmemConfig, UserDeviceConfig, 14 VdpaConfig, VhostMode, VmConfig, VsockConfig, 15 }; 16 use crate::cpu::{CpuManager, CPU_MANAGER_ACPI_SIZE}; 17 use crate::device_tree::{DeviceNode, DeviceTree}; 18 use crate::interrupt::LegacyUserspaceInterruptManager; 19 use crate::interrupt::MsiInterruptManager; 20 use crate::memory_manager::{Error as MemoryManagerError, MemoryManager, MEMORY_MANAGER_ACPI_SIZE}; 21 use crate::pci_segment::PciSegment; 22 use crate::seccomp_filters::{get_seccomp_filter, Thread}; 23 use crate::serial_manager::{Error as SerialManagerError, SerialManager}; 24 use crate::sigwinch_listener::start_sigwinch_listener; 25 use crate::GuestRegionMmap; 26 use crate::PciDeviceInfo; 27 use crate::{device_node, DEVICE_MANAGER_SNAPSHOT_ID}; 28 use acpi_tables::sdt::GenericAddress; 29 use acpi_tables::{aml, aml::Aml}; 30 use anyhow::anyhow; 31 use arch::layout; 32 #[cfg(target_arch = "x86_64")] 33 use arch::layout::{APIC_START, IOAPIC_SIZE, IOAPIC_START}; 34 use arch::NumaNodes; 35 #[cfg(target_arch = "aarch64")] 36 use arch::{DeviceType, MmioDeviceInfo}; 37 use block_util::{ 38 async_io::DiskFile, block_io_uring_is_supported, detect_image_type, 39 fixed_vhd_async::FixedVhdDiskAsync, fixed_vhd_sync::FixedVhdDiskSync, qcow_sync::QcowDiskSync, 40 raw_async::RawFileDisk, raw_sync::RawFileDiskSync, vhdx_sync::VhdxDiskSync, ImageType, 41 }; 42 #[cfg(target_arch = "aarch64")] 43 use devices::gic; 44 #[cfg(target_arch = "x86_64")] 45 use devices::ioapic; 46 #[cfg(target_arch = "aarch64")] 47 use devices::legacy::Pl011; 48 #[cfg(target_arch = "x86_64")] 49 use devices::legacy::Serial; 50 use devices::{ 51 interrupt_controller, interrupt_controller::InterruptController, AcpiNotificationFlags, 52 }; 53 use hypervisor::{HypervisorType, IoEventAddress}; 54 use libc::{ 55 cfmakeraw, isatty, tcgetattr, tcsetattr, termios, MAP_NORESERVE, MAP_PRIVATE, MAP_SHARED, 56 O_TMPFILE, PROT_READ, PROT_WRITE, TCSANOW, 57 }; 58 use pci::{ 59 DeviceRelocation, PciBarRegionType, PciBdf, PciDevice, VfioPciDevice, VfioUserDmaMapping, 60 VfioUserPciDevice, VfioUserPciDeviceError, 61 }; 62 use seccompiler::SeccompAction; 63 use serde::{Deserialize, Serialize}; 64 use std::collections::{BTreeSet, HashMap}; 65 use std::fs::{read_link, File, OpenOptions}; 66 use std::io::{self, stdout, Seek, SeekFrom}; 67 use std::mem::zeroed; 68 use std::num::Wrapping; 69 use std::os::unix::fs::OpenOptionsExt; 70 use std::os::unix::io::{AsRawFd, FromRawFd, RawFd}; 71 use std::path::PathBuf; 72 use std::result; 73 use std::sync::{Arc, Mutex}; 74 use std::time::Instant; 75 use tracer::trace_scoped; 76 use vfio_ioctls::{VfioContainer, VfioDevice, VfioDeviceFd}; 77 use virtio_devices::transport::VirtioTransport; 78 use virtio_devices::transport::{VirtioPciDevice, VirtioPciDeviceActivator}; 79 use virtio_devices::vhost_user::VhostUserConfig; 80 use virtio_devices::{ 81 AccessPlatformMapping, ActivateError, VdpaDmaMapping, VirtioMemMappingSource, 82 }; 83 use virtio_devices::{Endpoint, IommuMapping}; 84 use vm_allocator::{AddressAllocator, SystemAllocator}; 85 use vm_device::dma_mapping::vfio::VfioDmaMapping; 86 use vm_device::dma_mapping::ExternalDmaMapping; 87 use vm_device::interrupt::{ 88 InterruptIndex, InterruptManager, LegacyIrqGroupConfig, MsiIrqGroupConfig, 89 }; 90 use vm_device::{Bus, BusDevice, Resource}; 91 use vm_memory::guest_memory::FileOffset; 92 use vm_memory::GuestMemoryRegion; 93 use vm_memory::{Address, GuestAddress, GuestUsize, MmapRegion}; 94 #[cfg(target_arch = "x86_64")] 95 use vm_memory::{GuestAddressSpace, GuestMemory}; 96 use vm_migration::{ 97 protocol::MemoryRangeTable, snapshot_from_id, versioned_state_from_id, Migratable, 98 MigratableError, Pausable, Snapshot, SnapshotData, Snapshottable, Transportable, 99 }; 100 use vm_virtio::AccessPlatform; 101 use vm_virtio::VirtioDeviceType; 102 use vmm_sys_util::eventfd::EventFd; 103 104 #[cfg(target_arch = "aarch64")] 105 const MMIO_LEN: u64 = 0x1000; 106 107 // Singleton devices / devices the user cannot name 108 #[cfg(target_arch = "x86_64")] 109 const IOAPIC_DEVICE_NAME: &str = "__ioapic"; 110 const SERIAL_DEVICE_NAME: &str = "__serial"; 111 #[cfg(target_arch = "aarch64")] 112 const GPIO_DEVICE_NAME: &str = "__gpio"; 113 const RNG_DEVICE_NAME: &str = "__rng"; 114 const IOMMU_DEVICE_NAME: &str = "__iommu"; 115 const BALLOON_DEVICE_NAME: &str = "__balloon"; 116 const CONSOLE_DEVICE_NAME: &str = "__console"; 117 118 // Devices that the user may name and for which we generate 119 // identifiers if the user doesn't give one 120 const DISK_DEVICE_NAME_PREFIX: &str = "_disk"; 121 const FS_DEVICE_NAME_PREFIX: &str = "_fs"; 122 const NET_DEVICE_NAME_PREFIX: &str = "_net"; 123 const PMEM_DEVICE_NAME_PREFIX: &str = "_pmem"; 124 const VDPA_DEVICE_NAME_PREFIX: &str = "_vdpa"; 125 const VSOCK_DEVICE_NAME_PREFIX: &str = "_vsock"; 126 const WATCHDOG_DEVICE_NAME: &str = "__watchdog"; 127 const VFIO_DEVICE_NAME_PREFIX: &str = "_vfio"; 128 const VFIO_USER_DEVICE_NAME_PREFIX: &str = "_vfio_user"; 129 const VIRTIO_PCI_DEVICE_NAME_PREFIX: &str = "_virtio-pci"; 130 131 /// Errors associated with device manager 132 #[derive(Debug)] 133 pub enum DeviceManagerError { 134 /// Cannot create EventFd. 135 EventFd(io::Error), 136 137 /// Cannot open disk path 138 Disk(io::Error), 139 140 /// Cannot create vhost-user-net device 141 CreateVhostUserNet(virtio_devices::vhost_user::Error), 142 143 /// Cannot create virtio-blk device 144 CreateVirtioBlock(io::Error), 145 146 /// Cannot create virtio-net device 147 CreateVirtioNet(virtio_devices::net::Error), 148 149 /// Cannot create virtio-console device 150 CreateVirtioConsole(io::Error), 151 152 /// Cannot create virtio-rng device 153 CreateVirtioRng(io::Error), 154 155 /// Cannot create virtio-fs device 156 CreateVirtioFs(virtio_devices::vhost_user::Error), 157 158 /// Virtio-fs device was created without a socket. 159 NoVirtioFsSock, 160 161 /// Cannot create vhost-user-blk device 162 CreateVhostUserBlk(virtio_devices::vhost_user::Error), 163 164 /// Cannot create virtio-pmem device 165 CreateVirtioPmem(io::Error), 166 167 /// Cannot create vDPA device 168 CreateVdpa(virtio_devices::vdpa::Error), 169 170 /// Cannot create virtio-vsock device 171 CreateVirtioVsock(io::Error), 172 173 /// Cannot create tpm device 174 CreateTpmDevice(anyhow::Error), 175 176 /// Failed to convert Path to &str for the vDPA device. 177 CreateVdpaConvertPath, 178 179 /// Failed to convert Path to &str for the virtio-vsock device. 180 CreateVsockConvertPath, 181 182 /// Cannot create virtio-vsock backend 183 CreateVsockBackend(virtio_devices::vsock::VsockUnixError), 184 185 /// Cannot create virtio-iommu device 186 CreateVirtioIommu(io::Error), 187 188 /// Cannot create virtio-balloon device 189 CreateVirtioBalloon(io::Error), 190 191 /// Cannot create virtio-watchdog device 192 CreateVirtioWatchdog(io::Error), 193 194 /// Failed to parse disk image format 195 DetectImageType(io::Error), 196 197 /// Cannot open qcow disk path 198 QcowDeviceCreate(qcow::Error), 199 200 /// Cannot create serial manager 201 CreateSerialManager(SerialManagerError), 202 203 /// Cannot spawn the serial manager thread 204 SpawnSerialManager(SerialManagerError), 205 206 /// Cannot open tap interface 207 OpenTap(net_util::TapError), 208 209 /// Cannot allocate IRQ. 210 AllocateIrq, 211 212 /// Cannot configure the IRQ. 213 Irq(vmm_sys_util::errno::Error), 214 215 /// Cannot allocate PCI BARs 216 AllocateBars(pci::PciDeviceError), 217 218 /// Could not free the BARs associated with a PCI device. 219 FreePciBars(pci::PciDeviceError), 220 221 /// Cannot register ioevent. 222 RegisterIoevent(anyhow::Error), 223 224 /// Cannot unregister ioevent. 225 UnRegisterIoevent(anyhow::Error), 226 227 /// Cannot create virtio device 228 VirtioDevice(virtio_devices::transport::VirtioPciDeviceError), 229 230 /// Cannot add PCI device 231 AddPciDevice(pci::PciRootError), 232 233 /// Cannot open persistent memory file 234 PmemFileOpen(io::Error), 235 236 /// Cannot set persistent memory file size 237 PmemFileSetLen(io::Error), 238 239 /// Cannot find a memory range for persistent memory 240 PmemRangeAllocation, 241 242 /// Cannot find a memory range for virtio-fs 243 FsRangeAllocation, 244 245 /// Error creating serial output file 246 SerialOutputFileOpen(io::Error), 247 248 /// Error creating console output file 249 ConsoleOutputFileOpen(io::Error), 250 251 /// Error creating serial pty 252 SerialPtyOpen(io::Error), 253 254 /// Error creating console pty 255 ConsolePtyOpen(io::Error), 256 257 /// Error setting pty raw mode 258 SetPtyRaw(vmm_sys_util::errno::Error), 259 260 /// Error getting pty peer 261 GetPtyPeer(vmm_sys_util::errno::Error), 262 263 /// Cannot create a VFIO device 264 VfioCreate(vfio_ioctls::VfioError), 265 266 /// Cannot create a VFIO PCI device 267 VfioPciCreate(pci::VfioPciError), 268 269 /// Failed to map VFIO MMIO region. 270 VfioMapRegion(pci::VfioPciError), 271 272 /// Failed to DMA map VFIO device. 273 VfioDmaMap(vfio_ioctls::VfioError), 274 275 /// Failed to DMA unmap VFIO device. 276 VfioDmaUnmap(pci::VfioPciError), 277 278 /// Failed to create the passthrough device. 279 CreatePassthroughDevice(anyhow::Error), 280 281 /// Failed to memory map. 282 Mmap(io::Error), 283 284 /// Cannot add legacy device to Bus. 285 BusError(vm_device::BusError), 286 287 /// Failed to allocate IO port 288 AllocateIoPort, 289 290 /// Failed to allocate MMIO address 291 AllocateMmioAddress, 292 293 /// Failed to make hotplug notification 294 HotPlugNotification(io::Error), 295 296 /// Error from a memory manager operation 297 MemoryManager(MemoryManagerError), 298 299 /// Failed to create new interrupt source group. 300 CreateInterruptGroup(io::Error), 301 302 /// Failed to update interrupt source group. 303 UpdateInterruptGroup(io::Error), 304 305 /// Failed to create interrupt controller. 306 CreateInterruptController(interrupt_controller::Error), 307 308 /// Failed to create a new MmapRegion instance. 309 NewMmapRegion(vm_memory::mmap::MmapRegionError), 310 311 /// Failed to clone a File. 312 CloneFile(io::Error), 313 314 /// Failed to create socket file 315 CreateSocketFile(io::Error), 316 317 /// Failed to spawn the network backend 318 SpawnNetBackend(io::Error), 319 320 /// Failed to spawn the block backend 321 SpawnBlockBackend(io::Error), 322 323 /// Missing PCI bus. 324 NoPciBus, 325 326 /// Could not find an available device name. 327 NoAvailableDeviceName, 328 329 /// Missing PCI device. 330 MissingPciDevice, 331 332 /// Failed to remove a PCI device from the PCI bus. 333 RemoveDeviceFromPciBus(pci::PciRootError), 334 335 /// Failed to remove a bus device from the IO bus. 336 RemoveDeviceFromIoBus(vm_device::BusError), 337 338 /// Failed to remove a bus device from the MMIO bus. 339 RemoveDeviceFromMmioBus(vm_device::BusError), 340 341 /// Failed to find the device corresponding to a specific PCI b/d/f. 342 UnknownPciBdf(u32), 343 344 /// Not allowed to remove this type of device from the VM. 345 RemovalNotAllowed(vm_virtio::VirtioDeviceType), 346 347 /// Failed to find device corresponding to the given identifier. 348 UnknownDeviceId(String), 349 350 /// Failed to find an available PCI device ID. 351 NextPciDeviceId(pci::PciRootError), 352 353 /// Could not reserve the PCI device ID. 354 GetPciDeviceId(pci::PciRootError), 355 356 /// Could not give the PCI device ID back. 357 PutPciDeviceId(pci::PciRootError), 358 359 /// No disk path was specified when one was expected 360 NoDiskPath, 361 362 /// Failed to update guest memory for virtio device. 363 UpdateMemoryForVirtioDevice(virtio_devices::Error), 364 365 /// Cannot create virtio-mem device 366 CreateVirtioMem(io::Error), 367 368 /// Cannot find a memory range for virtio-mem memory 369 VirtioMemRangeAllocation, 370 371 /// Failed to update guest memory for VFIO PCI device. 372 UpdateMemoryForVfioPciDevice(vfio_ioctls::VfioError), 373 374 /// Trying to use a directory for pmem but no size specified 375 PmemWithDirectorySizeMissing, 376 377 /// Trying to use a size that is not multiple of 2MiB 378 PmemSizeNotAligned, 379 380 /// Could not find the node in the device tree. 381 MissingNode, 382 383 /// Resource was already found. 384 ResourceAlreadyExists, 385 386 /// Expected resources for virtio-pmem could not be found. 387 MissingVirtioPmemResources, 388 389 /// Missing PCI b/d/f from the DeviceNode. 390 MissingDeviceNodePciBdf, 391 392 /// No support for device passthrough 393 NoDevicePassthroughSupport, 394 395 /// Failed to resize virtio-balloon 396 VirtioBalloonResize(virtio_devices::balloon::Error), 397 398 /// Missing virtio-balloon, can't proceed as expected. 399 MissingVirtioBalloon, 400 401 /// Missing virtual IOMMU device 402 MissingVirtualIommu, 403 404 /// Failed to do power button notification 405 PowerButtonNotification(io::Error), 406 407 /// Failed to do AArch64 GPIO power button notification 408 #[cfg(target_arch = "aarch64")] 409 AArch64PowerButtonNotification(devices::legacy::GpioDeviceError), 410 411 /// Failed to set O_DIRECT flag to file descriptor 412 SetDirectIo, 413 414 /// Failed to create FixedVhdDiskAsync 415 CreateFixedVhdDiskAsync(io::Error), 416 417 /// Failed to create FixedVhdDiskSync 418 CreateFixedVhdDiskSync(io::Error), 419 420 /// Failed to create QcowDiskSync 421 CreateQcowDiskSync(qcow::Error), 422 423 /// Failed to create FixedVhdxDiskSync 424 CreateFixedVhdxDiskSync(vhdx::vhdx::VhdxError), 425 426 /// Failed to add DMA mapping handler to virtio-mem device. 427 AddDmaMappingHandlerVirtioMem(virtio_devices::mem::Error), 428 429 /// Failed to remove DMA mapping handler from virtio-mem device. 430 RemoveDmaMappingHandlerVirtioMem(virtio_devices::mem::Error), 431 432 /// Failed to create vfio-user client 433 VfioUserCreateClient(vfio_user::Error), 434 435 /// Failed to create VFIO user device 436 VfioUserCreate(VfioUserPciDeviceError), 437 438 /// Failed to map region from VFIO user device into guest 439 VfioUserMapRegion(VfioUserPciDeviceError), 440 441 /// Failed to DMA map VFIO user device. 442 VfioUserDmaMap(VfioUserPciDeviceError), 443 444 /// Failed to DMA unmap VFIO user device. 445 VfioUserDmaUnmap(VfioUserPciDeviceError), 446 447 /// Failed to update memory mappings for VFIO user device 448 UpdateMemoryForVfioUserPciDevice(VfioUserPciDeviceError), 449 450 /// Cannot duplicate file descriptor 451 DupFd(vmm_sys_util::errno::Error), 452 453 /// Failed to DMA map virtio device. 454 VirtioDmaMap(std::io::Error), 455 456 /// Failed to DMA unmap virtio device. 457 VirtioDmaUnmap(std::io::Error), 458 459 /// Cannot hotplug device behind vIOMMU 460 InvalidIommuHotplug, 461 462 /// Invalid identifier as it is not unique. 463 IdentifierNotUnique(String), 464 465 /// Invalid identifier 466 InvalidIdentifier(String), 467 468 /// Error activating virtio device 469 VirtioActivate(ActivateError), 470 471 /// Failed retrieving device state from snapshot 472 RestoreGetState(MigratableError), 473 } 474 pub type DeviceManagerResult<T> = result::Result<T, DeviceManagerError>; 475 476 const DEVICE_MANAGER_ACPI_SIZE: usize = 0x10; 477 478 const TIOCSPTLCK: libc::c_int = 0x4004_5431; 479 const TIOCGTPEER: libc::c_int = 0x5441; 480 481 pub fn create_pty() -> io::Result<(File, File, PathBuf)> { 482 // Try to use /dev/pts/ptmx first then fall back to /dev/ptmx 483 // This is done to try and use the devpts filesystem that 484 // could be available for use in the process's namespace first. 485 // Ideally these are all the same file though but different 486 // kernels could have things setup differently. 487 // See https://www.kernel.org/doc/Documentation/filesystems/devpts.txt 488 // for further details. 489 490 let custom_flags = libc::O_NONBLOCK; 491 let main = match OpenOptions::new() 492 .read(true) 493 .write(true) 494 .custom_flags(custom_flags) 495 .open("/dev/pts/ptmx") 496 { 497 Ok(f) => f, 498 _ => OpenOptions::new() 499 .read(true) 500 .write(true) 501 .custom_flags(custom_flags) 502 .open("/dev/ptmx")?, 503 }; 504 let mut unlock: libc::c_ulong = 0; 505 // SAFETY: FFI call into libc, trivially safe 506 unsafe { libc::ioctl(main.as_raw_fd(), TIOCSPTLCK as _, &mut unlock) }; 507 508 // SAFETY: FFI call into libc, trivally safe 509 let sub_fd = unsafe { 510 libc::ioctl( 511 main.as_raw_fd(), 512 TIOCGTPEER as _, 513 libc::O_NOCTTY | libc::O_RDWR, 514 ) 515 }; 516 if sub_fd == -1 { 517 return vmm_sys_util::errno::errno_result().map_err(|e| e.into()); 518 } 519 520 let proc_path = PathBuf::from(format!("/proc/self/fd/{sub_fd}")); 521 let path = read_link(proc_path)?; 522 523 // SAFETY: sub_fd is checked to be valid before being wrapped in File 524 Ok((main, unsafe { File::from_raw_fd(sub_fd) }, path)) 525 } 526 527 #[derive(Default)] 528 pub struct Console { 529 console_resizer: Option<Arc<virtio_devices::ConsoleResizer>>, 530 } 531 532 impl Console { 533 pub fn update_console_size(&self) { 534 if let Some(resizer) = self.console_resizer.as_ref() { 535 resizer.update_console_size() 536 } 537 } 538 } 539 540 pub(crate) struct AddressManager { 541 pub(crate) allocator: Arc<Mutex<SystemAllocator>>, 542 #[cfg(target_arch = "x86_64")] 543 pub(crate) io_bus: Arc<Bus>, 544 pub(crate) mmio_bus: Arc<Bus>, 545 pub(crate) vm: Arc<dyn hypervisor::Vm>, 546 device_tree: Arc<Mutex<DeviceTree>>, 547 pci_mmio_allocators: Vec<Arc<Mutex<AddressAllocator>>>, 548 } 549 550 impl DeviceRelocation for AddressManager { 551 fn move_bar( 552 &self, 553 old_base: u64, 554 new_base: u64, 555 len: u64, 556 pci_dev: &mut dyn PciDevice, 557 region_type: PciBarRegionType, 558 ) -> std::result::Result<(), std::io::Error> { 559 match region_type { 560 PciBarRegionType::IoRegion => { 561 #[cfg(target_arch = "x86_64")] 562 { 563 // Update system allocator 564 self.allocator 565 .lock() 566 .unwrap() 567 .free_io_addresses(GuestAddress(old_base), len as GuestUsize); 568 569 self.allocator 570 .lock() 571 .unwrap() 572 .allocate_io_addresses( 573 Some(GuestAddress(new_base)), 574 len as GuestUsize, 575 None, 576 ) 577 .ok_or_else(|| { 578 io::Error::new(io::ErrorKind::Other, "failed allocating new IO range") 579 })?; 580 581 // Update PIO bus 582 self.io_bus 583 .update_range(old_base, len, new_base, len) 584 .map_err(|e| io::Error::new(io::ErrorKind::Other, e))?; 585 } 586 #[cfg(target_arch = "aarch64")] 587 error!("I/O region is not supported"); 588 } 589 PciBarRegionType::Memory32BitRegion | PciBarRegionType::Memory64BitRegion => { 590 // Update system allocator 591 if region_type == PciBarRegionType::Memory32BitRegion { 592 self.allocator 593 .lock() 594 .unwrap() 595 .free_mmio_hole_addresses(GuestAddress(old_base), len as GuestUsize); 596 597 self.allocator 598 .lock() 599 .unwrap() 600 .allocate_mmio_hole_addresses( 601 Some(GuestAddress(new_base)), 602 len as GuestUsize, 603 Some(len), 604 ) 605 .ok_or_else(|| { 606 io::Error::new( 607 io::ErrorKind::Other, 608 "failed allocating new 32 bits MMIO range", 609 ) 610 })?; 611 } else { 612 // Find the specific allocator that this BAR was allocated from and use it for new one 613 for allocator in &self.pci_mmio_allocators { 614 let allocator_base = allocator.lock().unwrap().base(); 615 let allocator_end = allocator.lock().unwrap().end(); 616 617 if old_base >= allocator_base.0 && old_base <= allocator_end.0 { 618 allocator 619 .lock() 620 .unwrap() 621 .free(GuestAddress(old_base), len as GuestUsize); 622 623 allocator 624 .lock() 625 .unwrap() 626 .allocate( 627 Some(GuestAddress(new_base)), 628 len as GuestUsize, 629 Some(len), 630 ) 631 .ok_or_else(|| { 632 io::Error::new( 633 io::ErrorKind::Other, 634 "failed allocating new 64 bits MMIO range", 635 ) 636 })?; 637 638 break; 639 } 640 } 641 } 642 643 // Update MMIO bus 644 self.mmio_bus 645 .update_range(old_base, len, new_base, len) 646 .map_err(|e| io::Error::new(io::ErrorKind::Other, e))?; 647 } 648 } 649 650 // Update the device_tree resources associated with the device 651 if let Some(id) = pci_dev.id() { 652 if let Some(node) = self.device_tree.lock().unwrap().get_mut(&id) { 653 let mut resource_updated = false; 654 for resource in node.resources.iter_mut() { 655 if let Resource::PciBar { base, type_, .. } = resource { 656 if PciBarRegionType::from(*type_) == region_type && *base == old_base { 657 *base = new_base; 658 resource_updated = true; 659 break; 660 } 661 } 662 } 663 664 if !resource_updated { 665 return Err(io::Error::new( 666 io::ErrorKind::Other, 667 format!( 668 "Couldn't find a resource with base 0x{old_base:x} for device {id}" 669 ), 670 )); 671 } 672 } else { 673 return Err(io::Error::new( 674 io::ErrorKind::Other, 675 format!("Couldn't find device {id} from device tree"), 676 )); 677 } 678 } 679 680 let any_dev = pci_dev.as_any(); 681 if let Some(virtio_pci_dev) = any_dev.downcast_ref::<VirtioPciDevice>() { 682 let bar_addr = virtio_pci_dev.config_bar_addr(); 683 if bar_addr == new_base { 684 for (event, addr) in virtio_pci_dev.ioeventfds(old_base) { 685 let io_addr = IoEventAddress::Mmio(addr); 686 self.vm.unregister_ioevent(event, &io_addr).map_err(|e| { 687 io::Error::new( 688 io::ErrorKind::Other, 689 format!("failed to unregister ioevent: {e:?}"), 690 ) 691 })?; 692 } 693 for (event, addr) in virtio_pci_dev.ioeventfds(new_base) { 694 let io_addr = IoEventAddress::Mmio(addr); 695 self.vm 696 .register_ioevent(event, &io_addr, None) 697 .map_err(|e| { 698 io::Error::new( 699 io::ErrorKind::Other, 700 format!("failed to register ioevent: {e:?}"), 701 ) 702 })?; 703 } 704 } else { 705 let virtio_dev = virtio_pci_dev.virtio_device(); 706 let mut virtio_dev = virtio_dev.lock().unwrap(); 707 if let Some(mut shm_regions) = virtio_dev.get_shm_regions() { 708 if shm_regions.addr.raw_value() == old_base { 709 let mem_region = self.vm.make_user_memory_region( 710 shm_regions.mem_slot, 711 old_base, 712 shm_regions.len, 713 shm_regions.host_addr, 714 false, 715 false, 716 ); 717 718 self.vm.remove_user_memory_region(mem_region).map_err(|e| { 719 io::Error::new( 720 io::ErrorKind::Other, 721 format!("failed to remove user memory region: {e:?}"), 722 ) 723 })?; 724 725 // Create new mapping by inserting new region to KVM. 726 let mem_region = self.vm.make_user_memory_region( 727 shm_regions.mem_slot, 728 new_base, 729 shm_regions.len, 730 shm_regions.host_addr, 731 false, 732 false, 733 ); 734 735 self.vm.create_user_memory_region(mem_region).map_err(|e| { 736 io::Error::new( 737 io::ErrorKind::Other, 738 format!("failed to create user memory regions: {e:?}"), 739 ) 740 })?; 741 742 // Update shared memory regions to reflect the new mapping. 743 shm_regions.addr = GuestAddress(new_base); 744 virtio_dev.set_shm_regions(shm_regions).map_err(|e| { 745 io::Error::new( 746 io::ErrorKind::Other, 747 format!("failed to update shared memory regions: {e:?}"), 748 ) 749 })?; 750 } 751 } 752 } 753 } 754 755 pci_dev.move_bar(old_base, new_base) 756 } 757 } 758 759 #[derive(Serialize, Deserialize)] 760 struct DeviceManagerState { 761 device_tree: DeviceTree, 762 device_id_cnt: Wrapping<usize>, 763 } 764 765 #[derive(Debug)] 766 pub struct PtyPair { 767 pub main: File, 768 pub path: PathBuf, 769 } 770 771 impl Clone for PtyPair { 772 fn clone(&self) -> Self { 773 PtyPair { 774 main: self.main.try_clone().unwrap(), 775 path: self.path.clone(), 776 } 777 } 778 } 779 780 #[derive(Clone)] 781 pub enum PciDeviceHandle { 782 Vfio(Arc<Mutex<VfioPciDevice>>), 783 Virtio(Arc<Mutex<VirtioPciDevice>>), 784 VfioUser(Arc<Mutex<VfioUserPciDevice>>), 785 } 786 787 #[derive(Clone)] 788 struct MetaVirtioDevice { 789 virtio_device: Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 790 iommu: bool, 791 id: String, 792 pci_segment: u16, 793 dma_handler: Option<Arc<dyn ExternalDmaMapping>>, 794 } 795 796 #[derive(Default)] 797 pub struct AcpiPlatformAddresses { 798 pub pm_timer_address: Option<GenericAddress>, 799 pub reset_reg_address: Option<GenericAddress>, 800 pub sleep_control_reg_address: Option<GenericAddress>, 801 pub sleep_status_reg_address: Option<GenericAddress>, 802 } 803 804 pub struct DeviceManager { 805 // The underlying hypervisor 806 hypervisor_type: HypervisorType, 807 808 // Manage address space related to devices 809 address_manager: Arc<AddressManager>, 810 811 // Console abstraction 812 console: Arc<Console>, 813 814 // console PTY 815 console_pty: Option<Arc<Mutex<PtyPair>>>, 816 817 // serial PTY 818 serial_pty: Option<Arc<Mutex<PtyPair>>>, 819 820 // Serial Manager 821 serial_manager: Option<Arc<SerialManager>>, 822 823 // pty foreground status, 824 console_resize_pipe: Option<Arc<File>>, 825 826 // Interrupt controller 827 #[cfg(target_arch = "x86_64")] 828 interrupt_controller: Option<Arc<Mutex<ioapic::Ioapic>>>, 829 #[cfg(target_arch = "aarch64")] 830 interrupt_controller: Option<Arc<Mutex<gic::Gic>>>, 831 832 // Things to be added to the commandline (e.g. aarch64 early console) 833 #[cfg(target_arch = "aarch64")] 834 cmdline_additions: Vec<String>, 835 836 // ACPI GED notification device 837 ged_notification_device: Option<Arc<Mutex<devices::AcpiGedDevice>>>, 838 839 // VM configuration 840 config: Arc<Mutex<VmConfig>>, 841 842 // Memory Manager 843 memory_manager: Arc<Mutex<MemoryManager>>, 844 845 // CPU Manager 846 cpu_manager: Arc<Mutex<CpuManager>>, 847 848 // The virtio devices on the system 849 virtio_devices: Vec<MetaVirtioDevice>, 850 851 // List of bus devices 852 // Let the DeviceManager keep strong references to the BusDevice devices. 853 // This allows the IO and MMIO buses to be provided with Weak references, 854 // which prevents cyclic dependencies. 855 bus_devices: Vec<Arc<Mutex<dyn BusDevice>>>, 856 857 // Counter to keep track of the consumed device IDs. 858 device_id_cnt: Wrapping<usize>, 859 860 pci_segments: Vec<PciSegment>, 861 862 #[cfg_attr(target_arch = "aarch64", allow(dead_code))] 863 // MSI Interrupt Manager 864 msi_interrupt_manager: Arc<dyn InterruptManager<GroupConfig = MsiIrqGroupConfig>>, 865 866 #[cfg_attr(feature = "mshv", allow(dead_code))] 867 // Legacy Interrupt Manager 868 legacy_interrupt_manager: Option<Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>>, 869 870 // Passthrough device handle 871 passthrough_device: Option<VfioDeviceFd>, 872 873 // VFIO container 874 // Only one container can be created, therefore it is stored as part of the 875 // DeviceManager to be reused. 876 vfio_container: Option<Arc<VfioContainer>>, 877 878 // Paravirtualized IOMMU 879 iommu_device: Option<Arc<Mutex<virtio_devices::Iommu>>>, 880 iommu_mapping: Option<Arc<IommuMapping>>, 881 882 // PCI information about devices attached to the paravirtualized IOMMU 883 // It contains the virtual IOMMU PCI BDF along with the list of PCI BDF 884 // representing the devices attached to the virtual IOMMU. This is useful 885 // information for filling the ACPI VIOT table. 886 iommu_attached_devices: Option<(PciBdf, Vec<PciBdf>)>, 887 888 // Tree of devices, representing the dependencies between devices. 889 // Useful for introspection, snapshot and restore. 890 device_tree: Arc<Mutex<DeviceTree>>, 891 892 // Exit event 893 exit_evt: EventFd, 894 reset_evt: EventFd, 895 896 #[cfg(target_arch = "aarch64")] 897 id_to_dev_info: HashMap<(DeviceType, String), MmioDeviceInfo>, 898 899 // seccomp action 900 seccomp_action: SeccompAction, 901 902 // List of guest NUMA nodes. 903 numa_nodes: NumaNodes, 904 905 // Possible handle to the virtio-balloon device 906 balloon: Option<Arc<Mutex<virtio_devices::Balloon>>>, 907 908 // Virtio Device activation EventFd to allow the VMM thread to trigger device 909 // activation and thus start the threads from the VMM thread 910 activate_evt: EventFd, 911 912 acpi_address: GuestAddress, 913 914 selected_segment: usize, 915 916 // Possible handle to the virtio-mem device 917 virtio_mem_devices: Vec<Arc<Mutex<virtio_devices::Mem>>>, 918 919 #[cfg(target_arch = "aarch64")] 920 // GPIO device for AArch64 921 gpio_device: Option<Arc<Mutex<devices::legacy::Gpio>>>, 922 923 // Flag to force setting the iommu on virtio devices 924 force_iommu: bool, 925 926 // io_uring availability if detected 927 io_uring_supported: Option<bool>, 928 929 // List of unique identifiers provided at boot through the configuration. 930 boot_id_list: BTreeSet<String>, 931 932 // Start time of the VM 933 timestamp: Instant, 934 935 // Pending activations 936 pending_activations: Arc<Mutex<Vec<VirtioPciDeviceActivator>>>, 937 938 // Addresses for ACPI platform devices e.g. ACPI PM timer, sleep/reset registers 939 acpi_platform_addresses: AcpiPlatformAddresses, 940 941 snapshot: Option<Snapshot>, 942 } 943 944 impl DeviceManager { 945 #[allow(clippy::too_many_arguments)] 946 pub fn new( 947 #[cfg(target_arch = "x86_64")] io_bus: Arc<Bus>, 948 mmio_bus: Arc<Bus>, 949 hypervisor_type: HypervisorType, 950 vm: Arc<dyn hypervisor::Vm>, 951 config: Arc<Mutex<VmConfig>>, 952 memory_manager: Arc<Mutex<MemoryManager>>, 953 cpu_manager: Arc<Mutex<CpuManager>>, 954 exit_evt: EventFd, 955 reset_evt: EventFd, 956 seccomp_action: SeccompAction, 957 numa_nodes: NumaNodes, 958 activate_evt: &EventFd, 959 force_iommu: bool, 960 boot_id_list: BTreeSet<String>, 961 timestamp: Instant, 962 snapshot: Option<Snapshot>, 963 dynamic: bool, 964 ) -> DeviceManagerResult<Arc<Mutex<Self>>> { 965 trace_scoped!("DeviceManager::new"); 966 967 let (device_tree, device_id_cnt) = if let Some(snapshot) = snapshot.as_ref() { 968 let state: DeviceManagerState = snapshot.to_state().unwrap(); 969 ( 970 Arc::new(Mutex::new(state.device_tree.clone())), 971 state.device_id_cnt, 972 ) 973 } else { 974 (Arc::new(Mutex::new(DeviceTree::new())), Wrapping(0)) 975 }; 976 977 let num_pci_segments = 978 if let Some(platform_config) = config.lock().unwrap().platform.as_ref() { 979 platform_config.num_pci_segments 980 } else { 981 1 982 }; 983 984 let start_of_device_area = memory_manager.lock().unwrap().start_of_device_area().0; 985 let end_of_device_area = memory_manager.lock().unwrap().end_of_device_area().0; 986 987 // Start each PCI segment range on a 4GiB boundary 988 let pci_segment_size = (end_of_device_area - start_of_device_area + 1) 989 / ((4 << 30) * num_pci_segments as u64) 990 * (4 << 30); 991 992 let mut pci_mmio_allocators = vec![]; 993 for i in 0..num_pci_segments as u64 { 994 let mmio_start = start_of_device_area + i * pci_segment_size; 995 let allocator = Arc::new(Mutex::new( 996 AddressAllocator::new(GuestAddress(mmio_start), pci_segment_size).unwrap(), 997 )); 998 pci_mmio_allocators.push(allocator) 999 } 1000 1001 let address_manager = Arc::new(AddressManager { 1002 allocator: memory_manager.lock().unwrap().allocator(), 1003 #[cfg(target_arch = "x86_64")] 1004 io_bus, 1005 mmio_bus, 1006 vm: vm.clone(), 1007 device_tree: Arc::clone(&device_tree), 1008 pci_mmio_allocators, 1009 }); 1010 1011 // First we create the MSI interrupt manager, the legacy one is created 1012 // later, after the IOAPIC device creation. 1013 // The reason we create the MSI one first is because the IOAPIC needs it, 1014 // and then the legacy interrupt manager needs an IOAPIC. So we're 1015 // handling a linear dependency chain: 1016 // msi_interrupt_manager <- IOAPIC <- legacy_interrupt_manager. 1017 let msi_interrupt_manager: Arc<dyn InterruptManager<GroupConfig = MsiIrqGroupConfig>> = 1018 Arc::new(MsiInterruptManager::new( 1019 Arc::clone(&address_manager.allocator), 1020 vm, 1021 )); 1022 1023 let acpi_address = address_manager 1024 .allocator 1025 .lock() 1026 .unwrap() 1027 .allocate_platform_mmio_addresses(None, DEVICE_MANAGER_ACPI_SIZE as u64, None) 1028 .ok_or(DeviceManagerError::AllocateIoPort)?; 1029 1030 let mut pci_irq_slots = [0; 32]; 1031 PciSegment::reserve_legacy_interrupts_for_pci_devices( 1032 &address_manager, 1033 &mut pci_irq_slots, 1034 )?; 1035 1036 let mut pci_segments = vec![PciSegment::new_default_segment( 1037 &address_manager, 1038 Arc::clone(&address_manager.pci_mmio_allocators[0]), 1039 &pci_irq_slots, 1040 )?]; 1041 1042 for i in 1..num_pci_segments as usize { 1043 pci_segments.push(PciSegment::new( 1044 i as u16, 1045 &address_manager, 1046 Arc::clone(&address_manager.pci_mmio_allocators[i]), 1047 &pci_irq_slots, 1048 )?); 1049 } 1050 1051 if dynamic { 1052 let acpi_address = address_manager 1053 .allocator 1054 .lock() 1055 .unwrap() 1056 .allocate_platform_mmio_addresses(None, CPU_MANAGER_ACPI_SIZE as u64, None) 1057 .ok_or(DeviceManagerError::AllocateMmioAddress)?; 1058 1059 address_manager 1060 .mmio_bus 1061 .insert( 1062 cpu_manager.clone(), 1063 acpi_address.0, 1064 CPU_MANAGER_ACPI_SIZE as u64, 1065 ) 1066 .map_err(DeviceManagerError::BusError)?; 1067 1068 cpu_manager.lock().unwrap().set_acpi_address(acpi_address); 1069 } 1070 1071 let device_manager = DeviceManager { 1072 hypervisor_type, 1073 address_manager: Arc::clone(&address_manager), 1074 console: Arc::new(Console::default()), 1075 interrupt_controller: None, 1076 #[cfg(target_arch = "aarch64")] 1077 cmdline_additions: Vec::new(), 1078 ged_notification_device: None, 1079 config, 1080 memory_manager, 1081 cpu_manager, 1082 virtio_devices: Vec::new(), 1083 bus_devices: Vec::new(), 1084 device_id_cnt, 1085 msi_interrupt_manager, 1086 legacy_interrupt_manager: None, 1087 passthrough_device: None, 1088 vfio_container: None, 1089 iommu_device: None, 1090 iommu_mapping: None, 1091 iommu_attached_devices: None, 1092 pci_segments, 1093 device_tree, 1094 exit_evt, 1095 reset_evt, 1096 #[cfg(target_arch = "aarch64")] 1097 id_to_dev_info: HashMap::new(), 1098 seccomp_action, 1099 numa_nodes, 1100 balloon: None, 1101 activate_evt: activate_evt 1102 .try_clone() 1103 .map_err(DeviceManagerError::EventFd)?, 1104 acpi_address, 1105 selected_segment: 0, 1106 serial_pty: None, 1107 serial_manager: None, 1108 console_pty: None, 1109 console_resize_pipe: None, 1110 virtio_mem_devices: Vec::new(), 1111 #[cfg(target_arch = "aarch64")] 1112 gpio_device: None, 1113 force_iommu, 1114 io_uring_supported: None, 1115 boot_id_list, 1116 timestamp, 1117 pending_activations: Arc::new(Mutex::new(Vec::default())), 1118 acpi_platform_addresses: AcpiPlatformAddresses::default(), 1119 snapshot, 1120 }; 1121 1122 let device_manager = Arc::new(Mutex::new(device_manager)); 1123 1124 address_manager 1125 .mmio_bus 1126 .insert( 1127 Arc::clone(&device_manager) as Arc<Mutex<dyn BusDevice>>, 1128 acpi_address.0, 1129 DEVICE_MANAGER_ACPI_SIZE as u64, 1130 ) 1131 .map_err(DeviceManagerError::BusError)?; 1132 1133 Ok(device_manager) 1134 } 1135 1136 pub fn serial_pty(&self) -> Option<PtyPair> { 1137 self.serial_pty 1138 .as_ref() 1139 .map(|pty| pty.lock().unwrap().clone()) 1140 } 1141 1142 pub fn console_pty(&self) -> Option<PtyPair> { 1143 self.console_pty 1144 .as_ref() 1145 .map(|pty| pty.lock().unwrap().clone()) 1146 } 1147 1148 pub fn console_resize_pipe(&self) -> Option<Arc<File>> { 1149 self.console_resize_pipe.as_ref().map(Arc::clone) 1150 } 1151 1152 pub fn create_devices( 1153 &mut self, 1154 serial_pty: Option<PtyPair>, 1155 console_pty: Option<PtyPair>, 1156 console_resize_pipe: Option<File>, 1157 ) -> DeviceManagerResult<()> { 1158 trace_scoped!("create_devices"); 1159 1160 let mut virtio_devices: Vec<MetaVirtioDevice> = Vec::new(); 1161 1162 let interrupt_controller = self.add_interrupt_controller()?; 1163 1164 self.cpu_manager 1165 .lock() 1166 .unwrap() 1167 .set_interrupt_controller(interrupt_controller.clone()); 1168 1169 // Now we can create the legacy interrupt manager, which needs the freshly 1170 // formed IOAPIC device. 1171 let legacy_interrupt_manager: Arc< 1172 dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>, 1173 > = Arc::new(LegacyUserspaceInterruptManager::new(Arc::clone( 1174 &interrupt_controller, 1175 ))); 1176 1177 { 1178 if let Some(acpi_address) = self.memory_manager.lock().unwrap().acpi_address() { 1179 self.address_manager 1180 .mmio_bus 1181 .insert( 1182 Arc::clone(&self.memory_manager) as Arc<Mutex<dyn BusDevice>>, 1183 acpi_address.0, 1184 MEMORY_MANAGER_ACPI_SIZE as u64, 1185 ) 1186 .map_err(DeviceManagerError::BusError)?; 1187 } 1188 } 1189 1190 #[cfg(target_arch = "x86_64")] 1191 self.add_legacy_devices( 1192 self.reset_evt 1193 .try_clone() 1194 .map_err(DeviceManagerError::EventFd)?, 1195 )?; 1196 1197 #[cfg(target_arch = "aarch64")] 1198 self.add_legacy_devices(&legacy_interrupt_manager)?; 1199 1200 { 1201 self.ged_notification_device = self.add_acpi_devices( 1202 &legacy_interrupt_manager, 1203 self.reset_evt 1204 .try_clone() 1205 .map_err(DeviceManagerError::EventFd)?, 1206 self.exit_evt 1207 .try_clone() 1208 .map_err(DeviceManagerError::EventFd)?, 1209 )?; 1210 } 1211 1212 self.console = self.add_console_device( 1213 &legacy_interrupt_manager, 1214 &mut virtio_devices, 1215 serial_pty, 1216 console_pty, 1217 console_resize_pipe, 1218 )?; 1219 1220 if let Some(tpm) = self.config.clone().lock().unwrap().tpm.as_ref() { 1221 let tpm_dev = self.add_tpm_device(tpm.socket.clone())?; 1222 self.bus_devices 1223 .push(Arc::clone(&tpm_dev) as Arc<Mutex<dyn BusDevice>>) 1224 } 1225 self.legacy_interrupt_manager = Some(legacy_interrupt_manager); 1226 1227 virtio_devices.append(&mut self.make_virtio_devices()?); 1228 1229 self.add_pci_devices(virtio_devices.clone())?; 1230 1231 self.virtio_devices = virtio_devices; 1232 1233 Ok(()) 1234 } 1235 1236 fn state(&self) -> DeviceManagerState { 1237 DeviceManagerState { 1238 device_tree: self.device_tree.lock().unwrap().clone(), 1239 device_id_cnt: self.device_id_cnt, 1240 } 1241 } 1242 1243 fn get_msi_iova_space(&mut self) -> (u64, u64) { 1244 #[cfg(target_arch = "aarch64")] 1245 { 1246 let vcpus = self.config.lock().unwrap().cpus.boot_vcpus; 1247 let vgic_config = gic::Gic::create_default_config(vcpus.into()); 1248 ( 1249 vgic_config.msi_addr, 1250 vgic_config.msi_addr + vgic_config.msi_size - 1, 1251 ) 1252 } 1253 #[cfg(target_arch = "x86_64")] 1254 (0xfee0_0000, 0xfeef_ffff) 1255 } 1256 1257 #[cfg(target_arch = "aarch64")] 1258 /// Gets the information of the devices registered up to some point in time. 1259 pub fn get_device_info(&self) -> &HashMap<(DeviceType, String), MmioDeviceInfo> { 1260 &self.id_to_dev_info 1261 } 1262 1263 #[allow(unused_variables)] 1264 fn add_pci_devices( 1265 &mut self, 1266 virtio_devices: Vec<MetaVirtioDevice>, 1267 ) -> DeviceManagerResult<()> { 1268 let iommu_id = String::from(IOMMU_DEVICE_NAME); 1269 1270 let iommu_device = if self.config.lock().unwrap().iommu { 1271 let (device, mapping) = virtio_devices::Iommu::new( 1272 iommu_id.clone(), 1273 self.seccomp_action.clone(), 1274 self.exit_evt 1275 .try_clone() 1276 .map_err(DeviceManagerError::EventFd)?, 1277 self.get_msi_iova_space(), 1278 versioned_state_from_id(self.snapshot.as_ref(), iommu_id.as_str()) 1279 .map_err(DeviceManagerError::RestoreGetState)?, 1280 ) 1281 .map_err(DeviceManagerError::CreateVirtioIommu)?; 1282 let device = Arc::new(Mutex::new(device)); 1283 self.iommu_device = Some(Arc::clone(&device)); 1284 self.iommu_mapping = Some(mapping); 1285 1286 // Fill the device tree with a new node. In case of restore, we 1287 // know there is nothing to do, so we can simply override the 1288 // existing entry. 1289 self.device_tree 1290 .lock() 1291 .unwrap() 1292 .insert(iommu_id.clone(), device_node!(iommu_id, device)); 1293 1294 Some(device) 1295 } else { 1296 None 1297 }; 1298 1299 let mut iommu_attached_devices = Vec::new(); 1300 { 1301 for handle in virtio_devices { 1302 let mapping: Option<Arc<IommuMapping>> = if handle.iommu { 1303 self.iommu_mapping.clone() 1304 } else { 1305 None 1306 }; 1307 1308 let dev_id = self.add_virtio_pci_device( 1309 handle.virtio_device, 1310 &mapping, 1311 handle.id, 1312 handle.pci_segment, 1313 handle.dma_handler, 1314 )?; 1315 1316 if handle.iommu { 1317 iommu_attached_devices.push(dev_id); 1318 } 1319 } 1320 1321 let mut vfio_iommu_device_ids = self.add_vfio_devices()?; 1322 iommu_attached_devices.append(&mut vfio_iommu_device_ids); 1323 1324 let mut vfio_user_iommu_device_ids = self.add_user_devices()?; 1325 iommu_attached_devices.append(&mut vfio_user_iommu_device_ids); 1326 1327 // Add all devices from forced iommu segments 1328 if let Some(platform_config) = self.config.lock().unwrap().platform.as_ref() { 1329 if let Some(iommu_segments) = platform_config.iommu_segments.as_ref() { 1330 for segment in iommu_segments { 1331 for device in 0..32 { 1332 let bdf = PciBdf::new(*segment, 0, device, 0); 1333 if !iommu_attached_devices.contains(&bdf) { 1334 iommu_attached_devices.push(bdf); 1335 } 1336 } 1337 } 1338 } 1339 } 1340 1341 if let Some(iommu_device) = iommu_device { 1342 let dev_id = self.add_virtio_pci_device(iommu_device, &None, iommu_id, 0, None)?; 1343 self.iommu_attached_devices = Some((dev_id, iommu_attached_devices)); 1344 } 1345 } 1346 1347 for segment in &self.pci_segments { 1348 #[cfg(target_arch = "x86_64")] 1349 if let Some(pci_config_io) = segment.pci_config_io.as_ref() { 1350 self.bus_devices 1351 .push(Arc::clone(pci_config_io) as Arc<Mutex<dyn BusDevice>>); 1352 } 1353 1354 self.bus_devices 1355 .push(Arc::clone(&segment.pci_config_mmio) as Arc<Mutex<dyn BusDevice>>); 1356 } 1357 1358 Ok(()) 1359 } 1360 1361 #[cfg(target_arch = "aarch64")] 1362 fn add_interrupt_controller( 1363 &mut self, 1364 ) -> DeviceManagerResult<Arc<Mutex<dyn InterruptController>>> { 1365 let interrupt_controller: Arc<Mutex<gic::Gic>> = Arc::new(Mutex::new( 1366 gic::Gic::new( 1367 self.config.lock().unwrap().cpus.boot_vcpus, 1368 Arc::clone(&self.msi_interrupt_manager), 1369 self.address_manager.vm.clone(), 1370 ) 1371 .map_err(DeviceManagerError::CreateInterruptController)?, 1372 )); 1373 1374 self.interrupt_controller = Some(interrupt_controller.clone()); 1375 1376 // Restore the vGic if this is in the process of restoration 1377 let id = String::from(gic::GIC_SNAPSHOT_ID); 1378 if let Some(vgic_snapshot) = snapshot_from_id(self.snapshot.as_ref(), &id) { 1379 // PMU support is optional. Nothing should be impacted if the PMU initialization failed. 1380 if self 1381 .cpu_manager 1382 .lock() 1383 .unwrap() 1384 .init_pmu(arch::aarch64::fdt::AARCH64_PMU_IRQ + 16) 1385 .is_err() 1386 { 1387 info!("Failed to initialize PMU"); 1388 } 1389 1390 let vgic_state = vgic_snapshot 1391 .to_state() 1392 .map_err(DeviceManagerError::RestoreGetState)?; 1393 let saved_vcpu_states = self.cpu_manager.lock().unwrap().get_saved_states(); 1394 interrupt_controller 1395 .lock() 1396 .unwrap() 1397 .restore_vgic(vgic_state, &saved_vcpu_states) 1398 .unwrap(); 1399 } 1400 1401 self.device_tree 1402 .lock() 1403 .unwrap() 1404 .insert(id.clone(), device_node!(id, interrupt_controller)); 1405 1406 Ok(interrupt_controller) 1407 } 1408 1409 #[cfg(target_arch = "aarch64")] 1410 pub fn get_interrupt_controller(&mut self) -> Option<&Arc<Mutex<gic::Gic>>> { 1411 self.interrupt_controller.as_ref() 1412 } 1413 1414 #[cfg(target_arch = "x86_64")] 1415 fn add_interrupt_controller( 1416 &mut self, 1417 ) -> DeviceManagerResult<Arc<Mutex<dyn InterruptController>>> { 1418 let id = String::from(IOAPIC_DEVICE_NAME); 1419 1420 // Create IOAPIC 1421 let interrupt_controller = Arc::new(Mutex::new( 1422 ioapic::Ioapic::new( 1423 id.clone(), 1424 APIC_START, 1425 Arc::clone(&self.msi_interrupt_manager), 1426 versioned_state_from_id(self.snapshot.as_ref(), id.as_str()) 1427 .map_err(DeviceManagerError::RestoreGetState)?, 1428 ) 1429 .map_err(DeviceManagerError::CreateInterruptController)?, 1430 )); 1431 1432 self.interrupt_controller = Some(interrupt_controller.clone()); 1433 1434 self.address_manager 1435 .mmio_bus 1436 .insert(interrupt_controller.clone(), IOAPIC_START.0, IOAPIC_SIZE) 1437 .map_err(DeviceManagerError::BusError)?; 1438 1439 self.bus_devices 1440 .push(Arc::clone(&interrupt_controller) as Arc<Mutex<dyn BusDevice>>); 1441 1442 // Fill the device tree with a new node. In case of restore, we 1443 // know there is nothing to do, so we can simply override the 1444 // existing entry. 1445 self.device_tree 1446 .lock() 1447 .unwrap() 1448 .insert(id.clone(), device_node!(id, interrupt_controller)); 1449 1450 Ok(interrupt_controller) 1451 } 1452 1453 fn add_acpi_devices( 1454 &mut self, 1455 interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>, 1456 reset_evt: EventFd, 1457 exit_evt: EventFd, 1458 ) -> DeviceManagerResult<Option<Arc<Mutex<devices::AcpiGedDevice>>>> { 1459 let shutdown_device = Arc::new(Mutex::new(devices::AcpiShutdownDevice::new( 1460 exit_evt, reset_evt, 1461 ))); 1462 1463 self.bus_devices 1464 .push(Arc::clone(&shutdown_device) as Arc<Mutex<dyn BusDevice>>); 1465 1466 #[cfg(target_arch = "x86_64")] 1467 { 1468 let shutdown_pio_address: u16 = 0x600; 1469 1470 self.address_manager 1471 .allocator 1472 .lock() 1473 .unwrap() 1474 .allocate_io_addresses(Some(GuestAddress(shutdown_pio_address.into())), 0x8, None) 1475 .ok_or(DeviceManagerError::AllocateIoPort)?; 1476 1477 self.address_manager 1478 .io_bus 1479 .insert(shutdown_device, shutdown_pio_address.into(), 0x4) 1480 .map_err(DeviceManagerError::BusError)?; 1481 1482 self.acpi_platform_addresses.sleep_control_reg_address = 1483 Some(GenericAddress::io_port_address::<u8>(shutdown_pio_address)); 1484 self.acpi_platform_addresses.sleep_status_reg_address = 1485 Some(GenericAddress::io_port_address::<u8>(shutdown_pio_address)); 1486 self.acpi_platform_addresses.reset_reg_address = 1487 Some(GenericAddress::io_port_address::<u8>(shutdown_pio_address)); 1488 } 1489 1490 let ged_irq = self 1491 .address_manager 1492 .allocator 1493 .lock() 1494 .unwrap() 1495 .allocate_irq() 1496 .unwrap(); 1497 let interrupt_group = interrupt_manager 1498 .create_group(LegacyIrqGroupConfig { 1499 irq: ged_irq as InterruptIndex, 1500 }) 1501 .map_err(DeviceManagerError::CreateInterruptGroup)?; 1502 let ged_address = self 1503 .address_manager 1504 .allocator 1505 .lock() 1506 .unwrap() 1507 .allocate_platform_mmio_addresses( 1508 None, 1509 devices::acpi::GED_DEVICE_ACPI_SIZE as u64, 1510 None, 1511 ) 1512 .ok_or(DeviceManagerError::AllocateMmioAddress)?; 1513 let ged_device = Arc::new(Mutex::new(devices::AcpiGedDevice::new( 1514 interrupt_group, 1515 ged_irq, 1516 ged_address, 1517 ))); 1518 self.address_manager 1519 .mmio_bus 1520 .insert( 1521 ged_device.clone(), 1522 ged_address.0, 1523 devices::acpi::GED_DEVICE_ACPI_SIZE as u64, 1524 ) 1525 .map_err(DeviceManagerError::BusError)?; 1526 self.bus_devices 1527 .push(Arc::clone(&ged_device) as Arc<Mutex<dyn BusDevice>>); 1528 1529 let pm_timer_device = Arc::new(Mutex::new(devices::AcpiPmTimerDevice::new())); 1530 1531 self.bus_devices 1532 .push(Arc::clone(&pm_timer_device) as Arc<Mutex<dyn BusDevice>>); 1533 1534 #[cfg(target_arch = "x86_64")] 1535 { 1536 let pm_timer_pio_address: u16 = 0x608; 1537 1538 self.address_manager 1539 .allocator 1540 .lock() 1541 .unwrap() 1542 .allocate_io_addresses(Some(GuestAddress(pm_timer_pio_address.into())), 0x4, None) 1543 .ok_or(DeviceManagerError::AllocateIoPort)?; 1544 1545 self.address_manager 1546 .io_bus 1547 .insert(pm_timer_device, pm_timer_pio_address.into(), 0x4) 1548 .map_err(DeviceManagerError::BusError)?; 1549 1550 self.acpi_platform_addresses.pm_timer_address = 1551 Some(GenericAddress::io_port_address::<u32>(pm_timer_pio_address)); 1552 } 1553 1554 Ok(Some(ged_device)) 1555 } 1556 1557 #[cfg(target_arch = "x86_64")] 1558 fn add_legacy_devices(&mut self, reset_evt: EventFd) -> DeviceManagerResult<()> { 1559 // Add a shutdown device (i8042) 1560 let i8042 = Arc::new(Mutex::new(devices::legacy::I8042Device::new( 1561 reset_evt.try_clone().unwrap(), 1562 ))); 1563 1564 self.bus_devices 1565 .push(Arc::clone(&i8042) as Arc<Mutex<dyn BusDevice>>); 1566 1567 self.address_manager 1568 .io_bus 1569 .insert(i8042, 0x61, 0x4) 1570 .map_err(DeviceManagerError::BusError)?; 1571 { 1572 // Add a CMOS emulated device 1573 let mem_size = self 1574 .memory_manager 1575 .lock() 1576 .unwrap() 1577 .guest_memory() 1578 .memory() 1579 .last_addr() 1580 .0 1581 + 1; 1582 let mem_below_4g = std::cmp::min(arch::layout::MEM_32BIT_RESERVED_START.0, mem_size); 1583 let mem_above_4g = mem_size.saturating_sub(arch::layout::RAM_64BIT_START.0); 1584 1585 let cmos = Arc::new(Mutex::new(devices::legacy::Cmos::new( 1586 mem_below_4g, 1587 mem_above_4g, 1588 reset_evt, 1589 ))); 1590 1591 self.bus_devices 1592 .push(Arc::clone(&cmos) as Arc<Mutex<dyn BusDevice>>); 1593 1594 self.address_manager 1595 .io_bus 1596 .insert(cmos, 0x70, 0x2) 1597 .map_err(DeviceManagerError::BusError)?; 1598 1599 let fwdebug = Arc::new(Mutex::new(devices::legacy::FwDebugDevice::new())); 1600 1601 self.bus_devices 1602 .push(Arc::clone(&fwdebug) as Arc<Mutex<dyn BusDevice>>); 1603 1604 self.address_manager 1605 .io_bus 1606 .insert(fwdebug, 0x402, 0x1) 1607 .map_err(DeviceManagerError::BusError)?; 1608 } 1609 1610 // 0x80 debug port 1611 let debug_port = Arc::new(Mutex::new(devices::legacy::DebugPort::new(self.timestamp))); 1612 self.bus_devices 1613 .push(Arc::clone(&debug_port) as Arc<Mutex<dyn BusDevice>>); 1614 self.address_manager 1615 .io_bus 1616 .insert(debug_port, 0x80, 0x1) 1617 .map_err(DeviceManagerError::BusError)?; 1618 1619 Ok(()) 1620 } 1621 1622 #[cfg(target_arch = "aarch64")] 1623 fn add_legacy_devices( 1624 &mut self, 1625 interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>, 1626 ) -> DeviceManagerResult<()> { 1627 // Add a RTC device 1628 let rtc_irq = self 1629 .address_manager 1630 .allocator 1631 .lock() 1632 .unwrap() 1633 .allocate_irq() 1634 .unwrap(); 1635 1636 let interrupt_group = interrupt_manager 1637 .create_group(LegacyIrqGroupConfig { 1638 irq: rtc_irq as InterruptIndex, 1639 }) 1640 .map_err(DeviceManagerError::CreateInterruptGroup)?; 1641 1642 let rtc_device = Arc::new(Mutex::new(devices::legacy::Rtc::new(interrupt_group))); 1643 1644 self.bus_devices 1645 .push(Arc::clone(&rtc_device) as Arc<Mutex<dyn BusDevice>>); 1646 1647 let addr = arch::layout::LEGACY_RTC_MAPPED_IO_START; 1648 1649 self.address_manager 1650 .mmio_bus 1651 .insert(rtc_device, addr.0, MMIO_LEN) 1652 .map_err(DeviceManagerError::BusError)?; 1653 1654 self.id_to_dev_info.insert( 1655 (DeviceType::Rtc, "rtc".to_string()), 1656 MmioDeviceInfo { 1657 addr: addr.0, 1658 len: MMIO_LEN, 1659 irq: rtc_irq, 1660 }, 1661 ); 1662 1663 // Add a GPIO device 1664 let id = String::from(GPIO_DEVICE_NAME); 1665 let gpio_irq = self 1666 .address_manager 1667 .allocator 1668 .lock() 1669 .unwrap() 1670 .allocate_irq() 1671 .unwrap(); 1672 1673 let interrupt_group = interrupt_manager 1674 .create_group(LegacyIrqGroupConfig { 1675 irq: gpio_irq as InterruptIndex, 1676 }) 1677 .map_err(DeviceManagerError::CreateInterruptGroup)?; 1678 1679 let gpio_device = Arc::new(Mutex::new(devices::legacy::Gpio::new( 1680 id.clone(), 1681 interrupt_group, 1682 versioned_state_from_id(self.snapshot.as_ref(), id.as_str()) 1683 .map_err(DeviceManagerError::RestoreGetState)?, 1684 ))); 1685 1686 self.bus_devices 1687 .push(Arc::clone(&gpio_device) as Arc<Mutex<dyn BusDevice>>); 1688 1689 let addr = arch::layout::LEGACY_GPIO_MAPPED_IO_START; 1690 1691 self.address_manager 1692 .mmio_bus 1693 .insert(gpio_device.clone(), addr.0, MMIO_LEN) 1694 .map_err(DeviceManagerError::BusError)?; 1695 1696 self.gpio_device = Some(gpio_device.clone()); 1697 1698 self.id_to_dev_info.insert( 1699 (DeviceType::Gpio, "gpio".to_string()), 1700 MmioDeviceInfo { 1701 addr: addr.0, 1702 len: MMIO_LEN, 1703 irq: gpio_irq, 1704 }, 1705 ); 1706 1707 self.device_tree 1708 .lock() 1709 .unwrap() 1710 .insert(id.clone(), device_node!(id, gpio_device)); 1711 1712 Ok(()) 1713 } 1714 1715 #[cfg(target_arch = "x86_64")] 1716 fn add_serial_device( 1717 &mut self, 1718 interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>, 1719 serial_writer: Option<Box<dyn io::Write + Send>>, 1720 ) -> DeviceManagerResult<Arc<Mutex<Serial>>> { 1721 // Serial is tied to IRQ #4 1722 let serial_irq = 4; 1723 1724 let id = String::from(SERIAL_DEVICE_NAME); 1725 1726 let interrupt_group = interrupt_manager 1727 .create_group(LegacyIrqGroupConfig { 1728 irq: serial_irq as InterruptIndex, 1729 }) 1730 .map_err(DeviceManagerError::CreateInterruptGroup)?; 1731 1732 let serial = Arc::new(Mutex::new(Serial::new( 1733 id.clone(), 1734 interrupt_group, 1735 serial_writer, 1736 versioned_state_from_id(self.snapshot.as_ref(), id.as_str()) 1737 .map_err(DeviceManagerError::RestoreGetState)?, 1738 ))); 1739 1740 self.bus_devices 1741 .push(Arc::clone(&serial) as Arc<Mutex<dyn BusDevice>>); 1742 1743 self.address_manager 1744 .allocator 1745 .lock() 1746 .unwrap() 1747 .allocate_io_addresses(Some(GuestAddress(0x3f8)), 0x8, None) 1748 .ok_or(DeviceManagerError::AllocateIoPort)?; 1749 1750 self.address_manager 1751 .io_bus 1752 .insert(serial.clone(), 0x3f8, 0x8) 1753 .map_err(DeviceManagerError::BusError)?; 1754 1755 // Fill the device tree with a new node. In case of restore, we 1756 // know there is nothing to do, so we can simply override the 1757 // existing entry. 1758 self.device_tree 1759 .lock() 1760 .unwrap() 1761 .insert(id.clone(), device_node!(id, serial)); 1762 1763 Ok(serial) 1764 } 1765 1766 #[cfg(target_arch = "aarch64")] 1767 fn add_serial_device( 1768 &mut self, 1769 interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>, 1770 serial_writer: Option<Box<dyn io::Write + Send>>, 1771 ) -> DeviceManagerResult<Arc<Mutex<Pl011>>> { 1772 let id = String::from(SERIAL_DEVICE_NAME); 1773 1774 let serial_irq = self 1775 .address_manager 1776 .allocator 1777 .lock() 1778 .unwrap() 1779 .allocate_irq() 1780 .unwrap(); 1781 1782 let interrupt_group = interrupt_manager 1783 .create_group(LegacyIrqGroupConfig { 1784 irq: serial_irq as InterruptIndex, 1785 }) 1786 .map_err(DeviceManagerError::CreateInterruptGroup)?; 1787 1788 let serial = Arc::new(Mutex::new(devices::legacy::Pl011::new( 1789 id.clone(), 1790 interrupt_group, 1791 serial_writer, 1792 self.timestamp, 1793 versioned_state_from_id(self.snapshot.as_ref(), id.as_str()) 1794 .map_err(DeviceManagerError::RestoreGetState)?, 1795 ))); 1796 1797 self.bus_devices 1798 .push(Arc::clone(&serial) as Arc<Mutex<dyn BusDevice>>); 1799 1800 let addr = arch::layout::LEGACY_SERIAL_MAPPED_IO_START; 1801 1802 self.address_manager 1803 .mmio_bus 1804 .insert(serial.clone(), addr.0, MMIO_LEN) 1805 .map_err(DeviceManagerError::BusError)?; 1806 1807 self.id_to_dev_info.insert( 1808 (DeviceType::Serial, DeviceType::Serial.to_string()), 1809 MmioDeviceInfo { 1810 addr: addr.0, 1811 len: MMIO_LEN, 1812 irq: serial_irq, 1813 }, 1814 ); 1815 1816 self.cmdline_additions 1817 .push(format!("earlycon=pl011,mmio,0x{:08x}", addr.0)); 1818 1819 // Fill the device tree with a new node. In case of restore, we 1820 // know there is nothing to do, so we can simply override the 1821 // existing entry. 1822 self.device_tree 1823 .lock() 1824 .unwrap() 1825 .insert(id.clone(), device_node!(id, serial)); 1826 1827 Ok(serial) 1828 } 1829 1830 fn modify_mode<F: FnOnce(&mut termios)>( 1831 &self, 1832 fd: RawFd, 1833 f: F, 1834 ) -> vmm_sys_util::errno::Result<()> { 1835 // SAFETY: safe because we check the return value of isatty. 1836 if unsafe { isatty(fd) } != 1 { 1837 return Ok(()); 1838 } 1839 1840 // SAFETY: The following pair are safe because termios gets totally overwritten by tcgetattr 1841 // and we check the return result. 1842 let mut termios: termios = unsafe { zeroed() }; 1843 // SAFETY: see above 1844 let ret = unsafe { tcgetattr(fd, &mut termios as *mut _) }; 1845 if ret < 0 { 1846 return vmm_sys_util::errno::errno_result(); 1847 } 1848 f(&mut termios); 1849 // SAFETY: Safe because the syscall will only read the extent of termios and we check 1850 // the return result. 1851 let ret = unsafe { tcsetattr(fd, TCSANOW, &termios as *const _) }; 1852 if ret < 0 { 1853 return vmm_sys_util::errno::errno_result(); 1854 } 1855 1856 Ok(()) 1857 } 1858 1859 fn set_raw_mode(&self, f: &mut File) -> vmm_sys_util::errno::Result<()> { 1860 // SAFETY: FFI call. Variable t is guaranteed to be a valid termios from modify_mode. 1861 self.modify_mode(f.as_raw_fd(), |t| unsafe { cfmakeraw(t) }) 1862 } 1863 1864 fn listen_for_sigwinch_on_tty(&mut self, pty_main: File, pty_sub: File) -> std::io::Result<()> { 1865 let seccomp_filter = get_seccomp_filter( 1866 &self.seccomp_action, 1867 Thread::PtyForeground, 1868 self.hypervisor_type, 1869 ) 1870 .unwrap(); 1871 1872 match start_sigwinch_listener(seccomp_filter, pty_main, pty_sub) { 1873 Ok(pipe) => { 1874 self.console_resize_pipe = Some(Arc::new(pipe)); 1875 } 1876 Err(e) => { 1877 warn!("Ignoring error from setting up SIGWINCH listener: {}", e) 1878 } 1879 } 1880 1881 Ok(()) 1882 } 1883 1884 fn add_virtio_console_device( 1885 &mut self, 1886 virtio_devices: &mut Vec<MetaVirtioDevice>, 1887 console_pty: Option<PtyPair>, 1888 resize_pipe: Option<File>, 1889 ) -> DeviceManagerResult<Option<Arc<virtio_devices::ConsoleResizer>>> { 1890 let console_config = self.config.lock().unwrap().console.clone(); 1891 let endpoint = match console_config.mode { 1892 ConsoleOutputMode::File => { 1893 let file = File::create(console_config.file.as_ref().unwrap()) 1894 .map_err(DeviceManagerError::ConsoleOutputFileOpen)?; 1895 Endpoint::File(file) 1896 } 1897 ConsoleOutputMode::Pty => { 1898 if let Some(pty) = console_pty { 1899 self.config.lock().unwrap().console.file = Some(pty.path.clone()); 1900 let file = pty.main.try_clone().unwrap(); 1901 self.console_pty = Some(Arc::new(Mutex::new(pty))); 1902 self.console_resize_pipe = resize_pipe.map(Arc::new); 1903 Endpoint::PtyPair(file.try_clone().unwrap(), file) 1904 } else { 1905 let (main, mut sub, path) = 1906 create_pty().map_err(DeviceManagerError::ConsolePtyOpen)?; 1907 self.set_raw_mode(&mut sub) 1908 .map_err(DeviceManagerError::SetPtyRaw)?; 1909 self.config.lock().unwrap().console.file = Some(path.clone()); 1910 let file = main.try_clone().unwrap(); 1911 assert!(resize_pipe.is_none()); 1912 self.listen_for_sigwinch_on_tty(main.try_clone().unwrap(), sub) 1913 .unwrap(); 1914 self.console_pty = Some(Arc::new(Mutex::new(PtyPair { main, path }))); 1915 Endpoint::PtyPair(file.try_clone().unwrap(), file) 1916 } 1917 } 1918 ConsoleOutputMode::Tty => { 1919 // Duplicating the file descriptors like this is needed as otherwise 1920 // they will be closed on a reboot and the numbers reused 1921 1922 // SAFETY: FFI call to dup. Trivially safe. 1923 let stdout = unsafe { libc::dup(libc::STDOUT_FILENO) }; 1924 if stdout == -1 { 1925 return vmm_sys_util::errno::errno_result().map_err(DeviceManagerError::DupFd); 1926 } 1927 // SAFETY: stdout is valid and owned solely by us. 1928 let stdout = unsafe { File::from_raw_fd(stdout) }; 1929 1930 // If an interactive TTY then we can accept input 1931 // SAFETY: FFI call. Trivially safe. 1932 if unsafe { libc::isatty(libc::STDIN_FILENO) == 1 } { 1933 // SAFETY: FFI call to dup. Trivially safe. 1934 let stdin = unsafe { libc::dup(libc::STDIN_FILENO) }; 1935 if stdin == -1 { 1936 return vmm_sys_util::errno::errno_result() 1937 .map_err(DeviceManagerError::DupFd); 1938 } 1939 // SAFETY: stdin is valid and owned solely by us. 1940 let stdin = unsafe { File::from_raw_fd(stdin) }; 1941 1942 Endpoint::FilePair(stdout, stdin) 1943 } else { 1944 Endpoint::File(stdout) 1945 } 1946 } 1947 ConsoleOutputMode::Null => Endpoint::Null, 1948 ConsoleOutputMode::Off => return Ok(None), 1949 }; 1950 let id = String::from(CONSOLE_DEVICE_NAME); 1951 1952 let (virtio_console_device, console_resizer) = virtio_devices::Console::new( 1953 id.clone(), 1954 endpoint, 1955 self.console_resize_pipe 1956 .as_ref() 1957 .map(|p| p.try_clone().unwrap()), 1958 self.force_iommu | console_config.iommu, 1959 self.seccomp_action.clone(), 1960 self.exit_evt 1961 .try_clone() 1962 .map_err(DeviceManagerError::EventFd)?, 1963 versioned_state_from_id(self.snapshot.as_ref(), id.as_str()) 1964 .map_err(DeviceManagerError::RestoreGetState)?, 1965 ) 1966 .map_err(DeviceManagerError::CreateVirtioConsole)?; 1967 let virtio_console_device = Arc::new(Mutex::new(virtio_console_device)); 1968 virtio_devices.push(MetaVirtioDevice { 1969 virtio_device: Arc::clone(&virtio_console_device) 1970 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 1971 iommu: console_config.iommu, 1972 id: id.clone(), 1973 pci_segment: 0, 1974 dma_handler: None, 1975 }); 1976 1977 // Fill the device tree with a new node. In case of restore, we 1978 // know there is nothing to do, so we can simply override the 1979 // existing entry. 1980 self.device_tree 1981 .lock() 1982 .unwrap() 1983 .insert(id.clone(), device_node!(id, virtio_console_device)); 1984 1985 // Only provide a resizer (for SIGWINCH handling) if the console is attached to the TTY 1986 Ok(if matches!(console_config.mode, ConsoleOutputMode::Tty) { 1987 Some(console_resizer) 1988 } else { 1989 None 1990 }) 1991 } 1992 1993 fn add_console_device( 1994 &mut self, 1995 interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>, 1996 virtio_devices: &mut Vec<MetaVirtioDevice>, 1997 serial_pty: Option<PtyPair>, 1998 console_pty: Option<PtyPair>, 1999 console_resize_pipe: Option<File>, 2000 ) -> DeviceManagerResult<Arc<Console>> { 2001 let serial_config = self.config.lock().unwrap().serial.clone(); 2002 let serial_writer: Option<Box<dyn io::Write + Send>> = match serial_config.mode { 2003 ConsoleOutputMode::File => Some(Box::new( 2004 File::create(serial_config.file.as_ref().unwrap()) 2005 .map_err(DeviceManagerError::SerialOutputFileOpen)?, 2006 )), 2007 ConsoleOutputMode::Pty => { 2008 if let Some(pty) = serial_pty { 2009 self.config.lock().unwrap().serial.file = Some(pty.path.clone()); 2010 self.serial_pty = Some(Arc::new(Mutex::new(pty))); 2011 } else { 2012 let (main, mut sub, path) = 2013 create_pty().map_err(DeviceManagerError::SerialPtyOpen)?; 2014 self.set_raw_mode(&mut sub) 2015 .map_err(DeviceManagerError::SetPtyRaw)?; 2016 self.config.lock().unwrap().serial.file = Some(path.clone()); 2017 self.serial_pty = Some(Arc::new(Mutex::new(PtyPair { main, path }))); 2018 } 2019 None 2020 } 2021 ConsoleOutputMode::Tty => Some(Box::new(stdout())), 2022 ConsoleOutputMode::Off | ConsoleOutputMode::Null => None, 2023 }; 2024 if serial_config.mode != ConsoleOutputMode::Off { 2025 let serial = self.add_serial_device(interrupt_manager, serial_writer)?; 2026 self.serial_manager = match serial_config.mode { 2027 ConsoleOutputMode::Pty | ConsoleOutputMode::Tty => { 2028 let serial_manager = 2029 SerialManager::new(serial, self.serial_pty.clone(), serial_config.mode) 2030 .map_err(DeviceManagerError::CreateSerialManager)?; 2031 if let Some(mut serial_manager) = serial_manager { 2032 serial_manager 2033 .start_thread( 2034 self.exit_evt 2035 .try_clone() 2036 .map_err(DeviceManagerError::EventFd)?, 2037 ) 2038 .map_err(DeviceManagerError::SpawnSerialManager)?; 2039 Some(Arc::new(serial_manager)) 2040 } else { 2041 None 2042 } 2043 } 2044 _ => None, 2045 }; 2046 } 2047 2048 let console_resizer = 2049 self.add_virtio_console_device(virtio_devices, console_pty, console_resize_pipe)?; 2050 2051 Ok(Arc::new(Console { console_resizer })) 2052 } 2053 2054 fn add_tpm_device( 2055 &mut self, 2056 tpm_path: PathBuf, 2057 ) -> DeviceManagerResult<Arc<Mutex<devices::tpm::Tpm>>> { 2058 // Create TPM Device 2059 let tpm = devices::tpm::Tpm::new(tpm_path.to_str().unwrap().to_string()).map_err(|e| { 2060 DeviceManagerError::CreateTpmDevice(anyhow!("Failed to create TPM Device : {:?}", e)) 2061 })?; 2062 let tpm = Arc::new(Mutex::new(tpm)); 2063 2064 // Add TPM Device to mmio 2065 self.address_manager 2066 .mmio_bus 2067 .insert( 2068 tpm.clone(), 2069 arch::layout::TPM_START.0, 2070 arch::layout::TPM_SIZE, 2071 ) 2072 .map_err(DeviceManagerError::BusError)?; 2073 2074 Ok(tpm) 2075 } 2076 2077 fn make_virtio_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2078 let mut devices: Vec<MetaVirtioDevice> = Vec::new(); 2079 2080 // Create "standard" virtio devices (net/block/rng) 2081 devices.append(&mut self.make_virtio_block_devices()?); 2082 devices.append(&mut self.make_virtio_net_devices()?); 2083 devices.append(&mut self.make_virtio_rng_devices()?); 2084 2085 // Add virtio-fs if required 2086 devices.append(&mut self.make_virtio_fs_devices()?); 2087 2088 // Add virtio-pmem if required 2089 devices.append(&mut self.make_virtio_pmem_devices()?); 2090 2091 // Add virtio-vsock if required 2092 devices.append(&mut self.make_virtio_vsock_devices()?); 2093 2094 devices.append(&mut self.make_virtio_mem_devices()?); 2095 2096 // Add virtio-balloon if required 2097 devices.append(&mut self.make_virtio_balloon_devices()?); 2098 2099 // Add virtio-watchdog device 2100 devices.append(&mut self.make_virtio_watchdog_devices()?); 2101 2102 // Add vDPA devices if required 2103 devices.append(&mut self.make_vdpa_devices()?); 2104 2105 Ok(devices) 2106 } 2107 2108 // Cache whether io_uring is supported to avoid probing for very block device 2109 fn io_uring_is_supported(&mut self) -> bool { 2110 if let Some(supported) = self.io_uring_supported { 2111 return supported; 2112 } 2113 2114 let supported = block_io_uring_is_supported(); 2115 self.io_uring_supported = Some(supported); 2116 supported 2117 } 2118 2119 fn make_virtio_block_device( 2120 &mut self, 2121 disk_cfg: &mut DiskConfig, 2122 ) -> DeviceManagerResult<MetaVirtioDevice> { 2123 let id = if let Some(id) = &disk_cfg.id { 2124 id.clone() 2125 } else { 2126 let id = self.next_device_name(DISK_DEVICE_NAME_PREFIX)?; 2127 disk_cfg.id = Some(id.clone()); 2128 id 2129 }; 2130 2131 info!("Creating virtio-block device: {:?}", disk_cfg); 2132 2133 let snapshot = snapshot_from_id(self.snapshot.as_ref(), id.as_str()); 2134 2135 let (virtio_device, migratable_device) = if disk_cfg.vhost_user { 2136 let socket = disk_cfg.vhost_socket.as_ref().unwrap().clone(); 2137 let vu_cfg = VhostUserConfig { 2138 socket, 2139 num_queues: disk_cfg.num_queues, 2140 queue_size: disk_cfg.queue_size, 2141 }; 2142 let vhost_user_block = Arc::new(Mutex::new( 2143 match virtio_devices::vhost_user::Blk::new( 2144 id.clone(), 2145 vu_cfg, 2146 self.seccomp_action.clone(), 2147 self.exit_evt 2148 .try_clone() 2149 .map_err(DeviceManagerError::EventFd)?, 2150 self.force_iommu, 2151 snapshot 2152 .map(|s| s.to_versioned_state()) 2153 .transpose() 2154 .map_err(DeviceManagerError::RestoreGetState)?, 2155 ) { 2156 Ok(vub_device) => vub_device, 2157 Err(e) => { 2158 return Err(DeviceManagerError::CreateVhostUserBlk(e)); 2159 } 2160 }, 2161 )); 2162 2163 ( 2164 Arc::clone(&vhost_user_block) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2165 vhost_user_block as Arc<Mutex<dyn Migratable>>, 2166 ) 2167 } else { 2168 let mut options = OpenOptions::new(); 2169 options.read(true); 2170 options.write(!disk_cfg.readonly); 2171 if disk_cfg.direct { 2172 options.custom_flags(libc::O_DIRECT); 2173 } 2174 // Open block device path 2175 let mut file: File = options 2176 .open( 2177 disk_cfg 2178 .path 2179 .as_ref() 2180 .ok_or(DeviceManagerError::NoDiskPath)? 2181 .clone(), 2182 ) 2183 .map_err(DeviceManagerError::Disk)?; 2184 let image_type = 2185 detect_image_type(&mut file).map_err(DeviceManagerError::DetectImageType)?; 2186 2187 let image = match image_type { 2188 ImageType::FixedVhd => { 2189 // Use asynchronous backend relying on io_uring if the 2190 // syscalls are supported. 2191 if !disk_cfg.disable_io_uring && self.io_uring_is_supported() { 2192 info!("Using asynchronous fixed VHD disk file (io_uring)"); 2193 Box::new( 2194 FixedVhdDiskAsync::new(file) 2195 .map_err(DeviceManagerError::CreateFixedVhdDiskAsync)?, 2196 ) as Box<dyn DiskFile> 2197 } else { 2198 info!("Using synchronous fixed VHD disk file"); 2199 Box::new( 2200 FixedVhdDiskSync::new(file) 2201 .map_err(DeviceManagerError::CreateFixedVhdDiskSync)?, 2202 ) as Box<dyn DiskFile> 2203 } 2204 } 2205 ImageType::Raw => { 2206 // Use asynchronous backend relying on io_uring if the 2207 // syscalls are supported. 2208 if !disk_cfg.disable_io_uring && self.io_uring_is_supported() { 2209 info!("Using asynchronous RAW disk file (io_uring)"); 2210 Box::new(RawFileDisk::new(file)) as Box<dyn DiskFile> 2211 } else { 2212 info!("Using synchronous RAW disk file"); 2213 Box::new(RawFileDiskSync::new(file)) as Box<dyn DiskFile> 2214 } 2215 } 2216 ImageType::Qcow2 => { 2217 info!("Using synchronous QCOW disk file"); 2218 Box::new( 2219 QcowDiskSync::new(file, disk_cfg.direct) 2220 .map_err(DeviceManagerError::CreateQcowDiskSync)?, 2221 ) as Box<dyn DiskFile> 2222 } 2223 ImageType::Vhdx => { 2224 info!("Using synchronous VHDX disk file"); 2225 Box::new( 2226 VhdxDiskSync::new(file) 2227 .map_err(DeviceManagerError::CreateFixedVhdxDiskSync)?, 2228 ) as Box<dyn DiskFile> 2229 } 2230 }; 2231 2232 let virtio_block = Arc::new(Mutex::new( 2233 virtio_devices::Block::new( 2234 id.clone(), 2235 image, 2236 disk_cfg 2237 .path 2238 .as_ref() 2239 .ok_or(DeviceManagerError::NoDiskPath)? 2240 .clone(), 2241 disk_cfg.readonly, 2242 self.force_iommu | disk_cfg.iommu, 2243 disk_cfg.num_queues, 2244 disk_cfg.queue_size, 2245 self.seccomp_action.clone(), 2246 disk_cfg.rate_limiter_config, 2247 self.exit_evt 2248 .try_clone() 2249 .map_err(DeviceManagerError::EventFd)?, 2250 snapshot 2251 .map(|s| s.to_versioned_state()) 2252 .transpose() 2253 .map_err(DeviceManagerError::RestoreGetState)?, 2254 ) 2255 .map_err(DeviceManagerError::CreateVirtioBlock)?, 2256 )); 2257 2258 ( 2259 Arc::clone(&virtio_block) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2260 virtio_block as Arc<Mutex<dyn Migratable>>, 2261 ) 2262 }; 2263 2264 // Fill the device tree with a new node. In case of restore, we 2265 // know there is nothing to do, so we can simply override the 2266 // existing entry. 2267 self.device_tree 2268 .lock() 2269 .unwrap() 2270 .insert(id.clone(), device_node!(id, migratable_device)); 2271 2272 Ok(MetaVirtioDevice { 2273 virtio_device, 2274 iommu: disk_cfg.iommu, 2275 id, 2276 pci_segment: disk_cfg.pci_segment, 2277 dma_handler: None, 2278 }) 2279 } 2280 2281 fn make_virtio_block_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2282 let mut devices = Vec::new(); 2283 2284 let mut block_devices = self.config.lock().unwrap().disks.clone(); 2285 if let Some(disk_list_cfg) = &mut block_devices { 2286 for disk_cfg in disk_list_cfg.iter_mut() { 2287 devices.push(self.make_virtio_block_device(disk_cfg)?); 2288 } 2289 } 2290 self.config.lock().unwrap().disks = block_devices; 2291 2292 Ok(devices) 2293 } 2294 2295 fn make_virtio_net_device( 2296 &mut self, 2297 net_cfg: &mut NetConfig, 2298 ) -> DeviceManagerResult<MetaVirtioDevice> { 2299 let id = if let Some(id) = &net_cfg.id { 2300 id.clone() 2301 } else { 2302 let id = self.next_device_name(NET_DEVICE_NAME_PREFIX)?; 2303 net_cfg.id = Some(id.clone()); 2304 id 2305 }; 2306 info!("Creating virtio-net device: {:?}", net_cfg); 2307 2308 let snapshot = snapshot_from_id(self.snapshot.as_ref(), id.as_str()); 2309 2310 let (virtio_device, migratable_device) = if net_cfg.vhost_user { 2311 let socket = net_cfg.vhost_socket.as_ref().unwrap().clone(); 2312 let vu_cfg = VhostUserConfig { 2313 socket, 2314 num_queues: net_cfg.num_queues, 2315 queue_size: net_cfg.queue_size, 2316 }; 2317 let server = match net_cfg.vhost_mode { 2318 VhostMode::Client => false, 2319 VhostMode::Server => true, 2320 }; 2321 let vhost_user_net = Arc::new(Mutex::new( 2322 match virtio_devices::vhost_user::Net::new( 2323 id.clone(), 2324 net_cfg.mac, 2325 net_cfg.mtu, 2326 vu_cfg, 2327 server, 2328 self.seccomp_action.clone(), 2329 self.exit_evt 2330 .try_clone() 2331 .map_err(DeviceManagerError::EventFd)?, 2332 self.force_iommu, 2333 snapshot 2334 .map(|s| s.to_versioned_state()) 2335 .transpose() 2336 .map_err(DeviceManagerError::RestoreGetState)?, 2337 ) { 2338 Ok(vun_device) => vun_device, 2339 Err(e) => { 2340 return Err(DeviceManagerError::CreateVhostUserNet(e)); 2341 } 2342 }, 2343 )); 2344 2345 ( 2346 Arc::clone(&vhost_user_net) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2347 vhost_user_net as Arc<Mutex<dyn Migratable>>, 2348 ) 2349 } else { 2350 let state = snapshot 2351 .map(|s| s.to_versioned_state()) 2352 .transpose() 2353 .map_err(DeviceManagerError::RestoreGetState)?; 2354 2355 let virtio_net = if let Some(ref tap_if_name) = net_cfg.tap { 2356 Arc::new(Mutex::new( 2357 virtio_devices::Net::new( 2358 id.clone(), 2359 Some(tap_if_name), 2360 None, 2361 None, 2362 Some(net_cfg.mac), 2363 &mut net_cfg.host_mac, 2364 net_cfg.mtu, 2365 self.force_iommu | net_cfg.iommu, 2366 net_cfg.num_queues, 2367 net_cfg.queue_size, 2368 self.seccomp_action.clone(), 2369 net_cfg.rate_limiter_config, 2370 self.exit_evt 2371 .try_clone() 2372 .map_err(DeviceManagerError::EventFd)?, 2373 state, 2374 ) 2375 .map_err(DeviceManagerError::CreateVirtioNet)?, 2376 )) 2377 } else if let Some(fds) = &net_cfg.fds { 2378 Arc::new(Mutex::new( 2379 virtio_devices::Net::from_tap_fds( 2380 id.clone(), 2381 fds, 2382 Some(net_cfg.mac), 2383 net_cfg.mtu, 2384 self.force_iommu | net_cfg.iommu, 2385 net_cfg.queue_size, 2386 self.seccomp_action.clone(), 2387 net_cfg.rate_limiter_config, 2388 self.exit_evt 2389 .try_clone() 2390 .map_err(DeviceManagerError::EventFd)?, 2391 state, 2392 ) 2393 .map_err(DeviceManagerError::CreateVirtioNet)?, 2394 )) 2395 } else { 2396 Arc::new(Mutex::new( 2397 virtio_devices::Net::new( 2398 id.clone(), 2399 None, 2400 Some(net_cfg.ip), 2401 Some(net_cfg.mask), 2402 Some(net_cfg.mac), 2403 &mut net_cfg.host_mac, 2404 net_cfg.mtu, 2405 self.force_iommu | net_cfg.iommu, 2406 net_cfg.num_queues, 2407 net_cfg.queue_size, 2408 self.seccomp_action.clone(), 2409 net_cfg.rate_limiter_config, 2410 self.exit_evt 2411 .try_clone() 2412 .map_err(DeviceManagerError::EventFd)?, 2413 state, 2414 ) 2415 .map_err(DeviceManagerError::CreateVirtioNet)?, 2416 )) 2417 }; 2418 2419 ( 2420 Arc::clone(&virtio_net) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2421 virtio_net as Arc<Mutex<dyn Migratable>>, 2422 ) 2423 }; 2424 2425 // Fill the device tree with a new node. In case of restore, we 2426 // know there is nothing to do, so we can simply override the 2427 // existing entry. 2428 self.device_tree 2429 .lock() 2430 .unwrap() 2431 .insert(id.clone(), device_node!(id, migratable_device)); 2432 2433 Ok(MetaVirtioDevice { 2434 virtio_device, 2435 iommu: net_cfg.iommu, 2436 id, 2437 pci_segment: net_cfg.pci_segment, 2438 dma_handler: None, 2439 }) 2440 } 2441 2442 /// Add virto-net and vhost-user-net devices 2443 fn make_virtio_net_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2444 let mut devices = Vec::new(); 2445 let mut net_devices = self.config.lock().unwrap().net.clone(); 2446 if let Some(net_list_cfg) = &mut net_devices { 2447 for net_cfg in net_list_cfg.iter_mut() { 2448 devices.push(self.make_virtio_net_device(net_cfg)?); 2449 } 2450 } 2451 self.config.lock().unwrap().net = net_devices; 2452 2453 Ok(devices) 2454 } 2455 2456 fn make_virtio_rng_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2457 let mut devices = Vec::new(); 2458 2459 // Add virtio-rng if required 2460 let rng_config = self.config.lock().unwrap().rng.clone(); 2461 if let Some(rng_path) = rng_config.src.to_str() { 2462 info!("Creating virtio-rng device: {:?}", rng_config); 2463 let id = String::from(RNG_DEVICE_NAME); 2464 2465 let virtio_rng_device = Arc::new(Mutex::new( 2466 virtio_devices::Rng::new( 2467 id.clone(), 2468 rng_path, 2469 self.force_iommu | rng_config.iommu, 2470 self.seccomp_action.clone(), 2471 self.exit_evt 2472 .try_clone() 2473 .map_err(DeviceManagerError::EventFd)?, 2474 versioned_state_from_id(self.snapshot.as_ref(), id.as_str()) 2475 .map_err(DeviceManagerError::RestoreGetState)?, 2476 ) 2477 .map_err(DeviceManagerError::CreateVirtioRng)?, 2478 )); 2479 devices.push(MetaVirtioDevice { 2480 virtio_device: Arc::clone(&virtio_rng_device) 2481 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2482 iommu: rng_config.iommu, 2483 id: id.clone(), 2484 pci_segment: 0, 2485 dma_handler: None, 2486 }); 2487 2488 // Fill the device tree with a new node. In case of restore, we 2489 // know there is nothing to do, so we can simply override the 2490 // existing entry. 2491 self.device_tree 2492 .lock() 2493 .unwrap() 2494 .insert(id.clone(), device_node!(id, virtio_rng_device)); 2495 } 2496 2497 Ok(devices) 2498 } 2499 2500 fn make_virtio_fs_device( 2501 &mut self, 2502 fs_cfg: &mut FsConfig, 2503 ) -> DeviceManagerResult<MetaVirtioDevice> { 2504 let id = if let Some(id) = &fs_cfg.id { 2505 id.clone() 2506 } else { 2507 let id = self.next_device_name(FS_DEVICE_NAME_PREFIX)?; 2508 fs_cfg.id = Some(id.clone()); 2509 id 2510 }; 2511 2512 info!("Creating virtio-fs device: {:?}", fs_cfg); 2513 2514 let mut node = device_node!(id); 2515 2516 if let Some(fs_socket) = fs_cfg.socket.to_str() { 2517 let virtio_fs_device = Arc::new(Mutex::new( 2518 virtio_devices::vhost_user::Fs::new( 2519 id.clone(), 2520 fs_socket, 2521 &fs_cfg.tag, 2522 fs_cfg.num_queues, 2523 fs_cfg.queue_size, 2524 None, 2525 self.seccomp_action.clone(), 2526 self.exit_evt 2527 .try_clone() 2528 .map_err(DeviceManagerError::EventFd)?, 2529 self.force_iommu, 2530 versioned_state_from_id(self.snapshot.as_ref(), id.as_str()) 2531 .map_err(DeviceManagerError::RestoreGetState)?, 2532 ) 2533 .map_err(DeviceManagerError::CreateVirtioFs)?, 2534 )); 2535 2536 // Update the device tree with the migratable device. 2537 node.migratable = Some(Arc::clone(&virtio_fs_device) as Arc<Mutex<dyn Migratable>>); 2538 self.device_tree.lock().unwrap().insert(id.clone(), node); 2539 2540 Ok(MetaVirtioDevice { 2541 virtio_device: Arc::clone(&virtio_fs_device) 2542 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2543 iommu: false, 2544 id, 2545 pci_segment: fs_cfg.pci_segment, 2546 dma_handler: None, 2547 }) 2548 } else { 2549 Err(DeviceManagerError::NoVirtioFsSock) 2550 } 2551 } 2552 2553 fn make_virtio_fs_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2554 let mut devices = Vec::new(); 2555 2556 let mut fs_devices = self.config.lock().unwrap().fs.clone(); 2557 if let Some(fs_list_cfg) = &mut fs_devices { 2558 for fs_cfg in fs_list_cfg.iter_mut() { 2559 devices.push(self.make_virtio_fs_device(fs_cfg)?); 2560 } 2561 } 2562 self.config.lock().unwrap().fs = fs_devices; 2563 2564 Ok(devices) 2565 } 2566 2567 fn make_virtio_pmem_device( 2568 &mut self, 2569 pmem_cfg: &mut PmemConfig, 2570 ) -> DeviceManagerResult<MetaVirtioDevice> { 2571 let id = if let Some(id) = &pmem_cfg.id { 2572 id.clone() 2573 } else { 2574 let id = self.next_device_name(PMEM_DEVICE_NAME_PREFIX)?; 2575 pmem_cfg.id = Some(id.clone()); 2576 id 2577 }; 2578 2579 info!("Creating virtio-pmem device: {:?}", pmem_cfg); 2580 2581 let mut node = device_node!(id); 2582 2583 // Look for the id in the device tree. If it can be found, that means 2584 // the device is being restored, otherwise it's created from scratch. 2585 let region_range = if let Some(node) = self.device_tree.lock().unwrap().get(&id) { 2586 info!("Restoring virtio-pmem {} resources", id); 2587 2588 let mut region_range: Option<(u64, u64)> = None; 2589 for resource in node.resources.iter() { 2590 match resource { 2591 Resource::MmioAddressRange { base, size } => { 2592 if region_range.is_some() { 2593 return Err(DeviceManagerError::ResourceAlreadyExists); 2594 } 2595 2596 region_range = Some((*base, *size)); 2597 } 2598 _ => { 2599 error!("Unexpected resource {:?} for {}", resource, id); 2600 } 2601 } 2602 } 2603 2604 if region_range.is_none() { 2605 return Err(DeviceManagerError::MissingVirtioPmemResources); 2606 } 2607 2608 region_range 2609 } else { 2610 None 2611 }; 2612 2613 let (custom_flags, set_len) = if pmem_cfg.file.is_dir() { 2614 if pmem_cfg.size.is_none() { 2615 return Err(DeviceManagerError::PmemWithDirectorySizeMissing); 2616 } 2617 (O_TMPFILE, true) 2618 } else { 2619 (0, false) 2620 }; 2621 2622 let mut file = OpenOptions::new() 2623 .read(true) 2624 .write(!pmem_cfg.discard_writes) 2625 .custom_flags(custom_flags) 2626 .open(&pmem_cfg.file) 2627 .map_err(DeviceManagerError::PmemFileOpen)?; 2628 2629 let size = if let Some(size) = pmem_cfg.size { 2630 if set_len { 2631 file.set_len(size) 2632 .map_err(DeviceManagerError::PmemFileSetLen)?; 2633 } 2634 size 2635 } else { 2636 file.seek(SeekFrom::End(0)) 2637 .map_err(DeviceManagerError::PmemFileSetLen)? 2638 }; 2639 2640 if size % 0x20_0000 != 0 { 2641 return Err(DeviceManagerError::PmemSizeNotAligned); 2642 } 2643 2644 let (region_base, region_size) = if let Some((base, size)) = region_range { 2645 // The memory needs to be 2MiB aligned in order to support 2646 // hugepages. 2647 self.pci_segments[pmem_cfg.pci_segment as usize] 2648 .allocator 2649 .lock() 2650 .unwrap() 2651 .allocate( 2652 Some(GuestAddress(base)), 2653 size as GuestUsize, 2654 Some(0x0020_0000), 2655 ) 2656 .ok_or(DeviceManagerError::PmemRangeAllocation)?; 2657 2658 (base, size) 2659 } else { 2660 // The memory needs to be 2MiB aligned in order to support 2661 // hugepages. 2662 let base = self.pci_segments[pmem_cfg.pci_segment as usize] 2663 .allocator 2664 .lock() 2665 .unwrap() 2666 .allocate(None, size as GuestUsize, Some(0x0020_0000)) 2667 .ok_or(DeviceManagerError::PmemRangeAllocation)?; 2668 2669 (base.raw_value(), size) 2670 }; 2671 2672 let cloned_file = file.try_clone().map_err(DeviceManagerError::CloneFile)?; 2673 let mmap_region = MmapRegion::build( 2674 Some(FileOffset::new(cloned_file, 0)), 2675 region_size as usize, 2676 PROT_READ | PROT_WRITE, 2677 MAP_NORESERVE 2678 | if pmem_cfg.discard_writes { 2679 MAP_PRIVATE 2680 } else { 2681 MAP_SHARED 2682 }, 2683 ) 2684 .map_err(DeviceManagerError::NewMmapRegion)?; 2685 let host_addr: u64 = mmap_region.as_ptr() as u64; 2686 2687 let mem_slot = self 2688 .memory_manager 2689 .lock() 2690 .unwrap() 2691 .create_userspace_mapping(region_base, region_size, host_addr, false, false, false) 2692 .map_err(DeviceManagerError::MemoryManager)?; 2693 2694 let mapping = virtio_devices::UserspaceMapping { 2695 host_addr, 2696 mem_slot, 2697 addr: GuestAddress(region_base), 2698 len: region_size, 2699 mergeable: false, 2700 }; 2701 2702 let virtio_pmem_device = Arc::new(Mutex::new( 2703 virtio_devices::Pmem::new( 2704 id.clone(), 2705 file, 2706 GuestAddress(region_base), 2707 mapping, 2708 mmap_region, 2709 self.force_iommu | pmem_cfg.iommu, 2710 self.seccomp_action.clone(), 2711 self.exit_evt 2712 .try_clone() 2713 .map_err(DeviceManagerError::EventFd)?, 2714 versioned_state_from_id(self.snapshot.as_ref(), id.as_str()) 2715 .map_err(DeviceManagerError::RestoreGetState)?, 2716 ) 2717 .map_err(DeviceManagerError::CreateVirtioPmem)?, 2718 )); 2719 2720 // Update the device tree with correct resource information and with 2721 // the migratable device. 2722 node.resources.push(Resource::MmioAddressRange { 2723 base: region_base, 2724 size: region_size, 2725 }); 2726 node.migratable = Some(Arc::clone(&virtio_pmem_device) as Arc<Mutex<dyn Migratable>>); 2727 self.device_tree.lock().unwrap().insert(id.clone(), node); 2728 2729 Ok(MetaVirtioDevice { 2730 virtio_device: Arc::clone(&virtio_pmem_device) 2731 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2732 iommu: pmem_cfg.iommu, 2733 id, 2734 pci_segment: pmem_cfg.pci_segment, 2735 dma_handler: None, 2736 }) 2737 } 2738 2739 fn make_virtio_pmem_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2740 let mut devices = Vec::new(); 2741 // Add virtio-pmem if required 2742 let mut pmem_devices = self.config.lock().unwrap().pmem.clone(); 2743 if let Some(pmem_list_cfg) = &mut pmem_devices { 2744 for pmem_cfg in pmem_list_cfg.iter_mut() { 2745 devices.push(self.make_virtio_pmem_device(pmem_cfg)?); 2746 } 2747 } 2748 self.config.lock().unwrap().pmem = pmem_devices; 2749 2750 Ok(devices) 2751 } 2752 2753 fn make_virtio_vsock_device( 2754 &mut self, 2755 vsock_cfg: &mut VsockConfig, 2756 ) -> DeviceManagerResult<MetaVirtioDevice> { 2757 let id = if let Some(id) = &vsock_cfg.id { 2758 id.clone() 2759 } else { 2760 let id = self.next_device_name(VSOCK_DEVICE_NAME_PREFIX)?; 2761 vsock_cfg.id = Some(id.clone()); 2762 id 2763 }; 2764 2765 info!("Creating virtio-vsock device: {:?}", vsock_cfg); 2766 2767 let socket_path = vsock_cfg 2768 .socket 2769 .to_str() 2770 .ok_or(DeviceManagerError::CreateVsockConvertPath)?; 2771 let backend = 2772 virtio_devices::vsock::VsockUnixBackend::new(vsock_cfg.cid, socket_path.to_string()) 2773 .map_err(DeviceManagerError::CreateVsockBackend)?; 2774 2775 let vsock_device = Arc::new(Mutex::new( 2776 virtio_devices::Vsock::new( 2777 id.clone(), 2778 vsock_cfg.cid, 2779 vsock_cfg.socket.clone(), 2780 backend, 2781 self.force_iommu | vsock_cfg.iommu, 2782 self.seccomp_action.clone(), 2783 self.exit_evt 2784 .try_clone() 2785 .map_err(DeviceManagerError::EventFd)?, 2786 versioned_state_from_id(self.snapshot.as_ref(), id.as_str()) 2787 .map_err(DeviceManagerError::RestoreGetState)?, 2788 ) 2789 .map_err(DeviceManagerError::CreateVirtioVsock)?, 2790 )); 2791 2792 // Fill the device tree with a new node. In case of restore, we 2793 // know there is nothing to do, so we can simply override the 2794 // existing entry. 2795 self.device_tree 2796 .lock() 2797 .unwrap() 2798 .insert(id.clone(), device_node!(id, vsock_device)); 2799 2800 Ok(MetaVirtioDevice { 2801 virtio_device: Arc::clone(&vsock_device) 2802 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2803 iommu: vsock_cfg.iommu, 2804 id, 2805 pci_segment: vsock_cfg.pci_segment, 2806 dma_handler: None, 2807 }) 2808 } 2809 2810 fn make_virtio_vsock_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2811 let mut devices = Vec::new(); 2812 2813 let mut vsock = self.config.lock().unwrap().vsock.clone(); 2814 if let Some(ref mut vsock_cfg) = &mut vsock { 2815 devices.push(self.make_virtio_vsock_device(vsock_cfg)?); 2816 } 2817 self.config.lock().unwrap().vsock = vsock; 2818 2819 Ok(devices) 2820 } 2821 2822 fn make_virtio_mem_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2823 let mut devices = Vec::new(); 2824 2825 let mm = self.memory_manager.clone(); 2826 let mut mm = mm.lock().unwrap(); 2827 for (memory_zone_id, memory_zone) in mm.memory_zones_mut().iter_mut() { 2828 if let Some(virtio_mem_zone) = memory_zone.virtio_mem_zone_mut() { 2829 info!("Creating virtio-mem device: id = {}", memory_zone_id); 2830 2831 let node_id = numa_node_id_from_memory_zone_id(&self.numa_nodes, memory_zone_id) 2832 .map(|i| i as u16); 2833 2834 let virtio_mem_device = Arc::new(Mutex::new( 2835 virtio_devices::Mem::new( 2836 memory_zone_id.clone(), 2837 virtio_mem_zone.region(), 2838 self.seccomp_action.clone(), 2839 node_id, 2840 virtio_mem_zone.hotplugged_size(), 2841 virtio_mem_zone.hugepages(), 2842 self.exit_evt 2843 .try_clone() 2844 .map_err(DeviceManagerError::EventFd)?, 2845 virtio_mem_zone.blocks_state().clone(), 2846 versioned_state_from_id(self.snapshot.as_ref(), memory_zone_id.as_str()) 2847 .map_err(DeviceManagerError::RestoreGetState)?, 2848 ) 2849 .map_err(DeviceManagerError::CreateVirtioMem)?, 2850 )); 2851 2852 // Update the virtio-mem zone so that it has a handle onto the 2853 // virtio-mem device, which will be used for triggering a resize 2854 // if needed. 2855 virtio_mem_zone.set_virtio_device(Arc::clone(&virtio_mem_device)); 2856 2857 self.virtio_mem_devices.push(Arc::clone(&virtio_mem_device)); 2858 2859 devices.push(MetaVirtioDevice { 2860 virtio_device: Arc::clone(&virtio_mem_device) 2861 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2862 iommu: false, 2863 id: memory_zone_id.clone(), 2864 pci_segment: 0, 2865 dma_handler: None, 2866 }); 2867 2868 // Fill the device tree with a new node. In case of restore, we 2869 // know there is nothing to do, so we can simply override the 2870 // existing entry. 2871 self.device_tree.lock().unwrap().insert( 2872 memory_zone_id.clone(), 2873 device_node!(memory_zone_id, virtio_mem_device), 2874 ); 2875 } 2876 } 2877 2878 Ok(devices) 2879 } 2880 2881 fn make_virtio_balloon_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2882 let mut devices = Vec::new(); 2883 2884 if let Some(balloon_config) = &self.config.lock().unwrap().balloon { 2885 let id = String::from(BALLOON_DEVICE_NAME); 2886 info!("Creating virtio-balloon device: id = {}", id); 2887 2888 let virtio_balloon_device = Arc::new(Mutex::new( 2889 virtio_devices::Balloon::new( 2890 id.clone(), 2891 balloon_config.size, 2892 balloon_config.deflate_on_oom, 2893 balloon_config.free_page_reporting, 2894 self.seccomp_action.clone(), 2895 self.exit_evt 2896 .try_clone() 2897 .map_err(DeviceManagerError::EventFd)?, 2898 versioned_state_from_id(self.snapshot.as_ref(), id.as_str()) 2899 .map_err(DeviceManagerError::RestoreGetState)?, 2900 ) 2901 .map_err(DeviceManagerError::CreateVirtioBalloon)?, 2902 )); 2903 2904 self.balloon = Some(virtio_balloon_device.clone()); 2905 2906 devices.push(MetaVirtioDevice { 2907 virtio_device: Arc::clone(&virtio_balloon_device) 2908 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2909 iommu: false, 2910 id: id.clone(), 2911 pci_segment: 0, 2912 dma_handler: None, 2913 }); 2914 2915 self.device_tree 2916 .lock() 2917 .unwrap() 2918 .insert(id.clone(), device_node!(id, virtio_balloon_device)); 2919 } 2920 2921 Ok(devices) 2922 } 2923 2924 fn make_virtio_watchdog_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2925 let mut devices = Vec::new(); 2926 2927 if !self.config.lock().unwrap().watchdog { 2928 return Ok(devices); 2929 } 2930 2931 let id = String::from(WATCHDOG_DEVICE_NAME); 2932 info!("Creating virtio-watchdog device: id = {}", id); 2933 2934 let virtio_watchdog_device = Arc::new(Mutex::new( 2935 virtio_devices::Watchdog::new( 2936 id.clone(), 2937 self.reset_evt.try_clone().unwrap(), 2938 self.seccomp_action.clone(), 2939 self.exit_evt 2940 .try_clone() 2941 .map_err(DeviceManagerError::EventFd)?, 2942 versioned_state_from_id(self.snapshot.as_ref(), id.as_str()) 2943 .map_err(DeviceManagerError::RestoreGetState)?, 2944 ) 2945 .map_err(DeviceManagerError::CreateVirtioWatchdog)?, 2946 )); 2947 devices.push(MetaVirtioDevice { 2948 virtio_device: Arc::clone(&virtio_watchdog_device) 2949 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2950 iommu: false, 2951 id: id.clone(), 2952 pci_segment: 0, 2953 dma_handler: None, 2954 }); 2955 2956 self.device_tree 2957 .lock() 2958 .unwrap() 2959 .insert(id.clone(), device_node!(id, virtio_watchdog_device)); 2960 2961 Ok(devices) 2962 } 2963 2964 fn make_vdpa_device( 2965 &mut self, 2966 vdpa_cfg: &mut VdpaConfig, 2967 ) -> DeviceManagerResult<MetaVirtioDevice> { 2968 let id = if let Some(id) = &vdpa_cfg.id { 2969 id.clone() 2970 } else { 2971 let id = self.next_device_name(VDPA_DEVICE_NAME_PREFIX)?; 2972 vdpa_cfg.id = Some(id.clone()); 2973 id 2974 }; 2975 2976 info!("Creating vDPA device: {:?}", vdpa_cfg); 2977 2978 let device_path = vdpa_cfg 2979 .path 2980 .to_str() 2981 .ok_or(DeviceManagerError::CreateVdpaConvertPath)?; 2982 2983 let vdpa_device = Arc::new(Mutex::new( 2984 virtio_devices::Vdpa::new( 2985 id.clone(), 2986 device_path, 2987 self.memory_manager.lock().unwrap().guest_memory(), 2988 vdpa_cfg.num_queues as u16, 2989 versioned_state_from_id(self.snapshot.as_ref(), id.as_str()) 2990 .map_err(DeviceManagerError::RestoreGetState)?, 2991 ) 2992 .map_err(DeviceManagerError::CreateVdpa)?, 2993 )); 2994 2995 // Create the DMA handler that is required by the vDPA device 2996 let vdpa_mapping = Arc::new(VdpaDmaMapping::new( 2997 Arc::clone(&vdpa_device), 2998 Arc::new(self.memory_manager.lock().unwrap().guest_memory()), 2999 )); 3000 3001 self.device_tree 3002 .lock() 3003 .unwrap() 3004 .insert(id.clone(), device_node!(id, vdpa_device)); 3005 3006 Ok(MetaVirtioDevice { 3007 virtio_device: vdpa_device as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 3008 iommu: vdpa_cfg.iommu, 3009 id, 3010 pci_segment: vdpa_cfg.pci_segment, 3011 dma_handler: Some(vdpa_mapping), 3012 }) 3013 } 3014 3015 fn make_vdpa_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 3016 let mut devices = Vec::new(); 3017 // Add vdpa if required 3018 let mut vdpa_devices = self.config.lock().unwrap().vdpa.clone(); 3019 if let Some(vdpa_list_cfg) = &mut vdpa_devices { 3020 for vdpa_cfg in vdpa_list_cfg.iter_mut() { 3021 devices.push(self.make_vdpa_device(vdpa_cfg)?); 3022 } 3023 } 3024 self.config.lock().unwrap().vdpa = vdpa_devices; 3025 3026 Ok(devices) 3027 } 3028 3029 fn next_device_name(&mut self, prefix: &str) -> DeviceManagerResult<String> { 3030 let start_id = self.device_id_cnt; 3031 loop { 3032 // Generate the temporary name. 3033 let name = format!("{}{}", prefix, self.device_id_cnt); 3034 // Increment the counter. 3035 self.device_id_cnt += Wrapping(1); 3036 // Check if the name is already in use. 3037 if !self.boot_id_list.contains(&name) 3038 && !self.device_tree.lock().unwrap().contains_key(&name) 3039 { 3040 return Ok(name); 3041 } 3042 3043 if self.device_id_cnt == start_id { 3044 // We went through a full loop and there's nothing else we can 3045 // do. 3046 break; 3047 } 3048 } 3049 Err(DeviceManagerError::NoAvailableDeviceName) 3050 } 3051 3052 fn add_passthrough_device( 3053 &mut self, 3054 device_cfg: &mut DeviceConfig, 3055 ) -> DeviceManagerResult<(PciBdf, String)> { 3056 // If the passthrough device has not been created yet, it is created 3057 // here and stored in the DeviceManager structure for future needs. 3058 if self.passthrough_device.is_none() { 3059 self.passthrough_device = Some( 3060 self.address_manager 3061 .vm 3062 .create_passthrough_device() 3063 .map_err(|e| DeviceManagerError::CreatePassthroughDevice(e.into()))?, 3064 ); 3065 } 3066 3067 self.add_vfio_device(device_cfg) 3068 } 3069 3070 fn create_vfio_container(&self) -> DeviceManagerResult<Arc<VfioContainer>> { 3071 let passthrough_device = self 3072 .passthrough_device 3073 .as_ref() 3074 .ok_or(DeviceManagerError::NoDevicePassthroughSupport)?; 3075 3076 let dup = passthrough_device 3077 .try_clone() 3078 .map_err(DeviceManagerError::VfioCreate)?; 3079 3080 Ok(Arc::new( 3081 VfioContainer::new(Some(Arc::new(dup))).map_err(DeviceManagerError::VfioCreate)?, 3082 )) 3083 } 3084 3085 fn add_vfio_device( 3086 &mut self, 3087 device_cfg: &mut DeviceConfig, 3088 ) -> DeviceManagerResult<(PciBdf, String)> { 3089 let vfio_name = if let Some(id) = &device_cfg.id { 3090 id.clone() 3091 } else { 3092 let id = self.next_device_name(VFIO_DEVICE_NAME_PREFIX)?; 3093 device_cfg.id = Some(id.clone()); 3094 id 3095 }; 3096 3097 let (pci_segment_id, pci_device_bdf, resources) = 3098 self.pci_resources(&vfio_name, device_cfg.pci_segment)?; 3099 3100 let mut needs_dma_mapping = false; 3101 3102 // Here we create a new VFIO container for two reasons. Either this is 3103 // the first VFIO device, meaning we need a new VFIO container, which 3104 // will be shared with other VFIO devices. Or the new VFIO device is 3105 // attached to a vIOMMU, meaning we must create a dedicated VFIO 3106 // container. In the vIOMMU use case, we can't let all devices under 3107 // the same VFIO container since we couldn't map/unmap memory for each 3108 // device. That's simply because the map/unmap operations happen at the 3109 // VFIO container level. 3110 let vfio_container = if device_cfg.iommu { 3111 let vfio_container = self.create_vfio_container()?; 3112 3113 let vfio_mapping = Arc::new(VfioDmaMapping::new( 3114 Arc::clone(&vfio_container), 3115 Arc::new(self.memory_manager.lock().unwrap().guest_memory()), 3116 )); 3117 3118 if let Some(iommu) = &self.iommu_device { 3119 iommu 3120 .lock() 3121 .unwrap() 3122 .add_external_mapping(pci_device_bdf.into(), vfio_mapping); 3123 } else { 3124 return Err(DeviceManagerError::MissingVirtualIommu); 3125 } 3126 3127 vfio_container 3128 } else if let Some(vfio_container) = &self.vfio_container { 3129 Arc::clone(vfio_container) 3130 } else { 3131 let vfio_container = self.create_vfio_container()?; 3132 needs_dma_mapping = true; 3133 self.vfio_container = Some(Arc::clone(&vfio_container)); 3134 3135 vfio_container 3136 }; 3137 3138 let vfio_device = VfioDevice::new(&device_cfg.path, Arc::clone(&vfio_container)) 3139 .map_err(DeviceManagerError::VfioCreate)?; 3140 3141 if needs_dma_mapping { 3142 // Register DMA mapping in IOMMU. 3143 // Do not register virtio-mem regions, as they are handled directly by 3144 // virtio-mem device itself. 3145 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() { 3146 for region in zone.regions() { 3147 vfio_container 3148 .vfio_dma_map( 3149 region.start_addr().raw_value(), 3150 region.len(), 3151 region.as_ptr() as u64, 3152 ) 3153 .map_err(DeviceManagerError::VfioDmaMap)?; 3154 } 3155 } 3156 3157 let vfio_mapping = Arc::new(VfioDmaMapping::new( 3158 Arc::clone(&vfio_container), 3159 Arc::new(self.memory_manager.lock().unwrap().guest_memory()), 3160 )); 3161 3162 for virtio_mem_device in self.virtio_mem_devices.iter() { 3163 virtio_mem_device 3164 .lock() 3165 .unwrap() 3166 .add_dma_mapping_handler( 3167 VirtioMemMappingSource::Container, 3168 vfio_mapping.clone(), 3169 ) 3170 .map_err(DeviceManagerError::AddDmaMappingHandlerVirtioMem)?; 3171 } 3172 } 3173 3174 let legacy_interrupt_group = 3175 if let Some(legacy_interrupt_manager) = &self.legacy_interrupt_manager { 3176 Some( 3177 legacy_interrupt_manager 3178 .create_group(LegacyIrqGroupConfig { 3179 irq: self.pci_segments[pci_segment_id as usize].pci_irq_slots 3180 [pci_device_bdf.device() as usize] 3181 as InterruptIndex, 3182 }) 3183 .map_err(DeviceManagerError::CreateInterruptGroup)?, 3184 ) 3185 } else { 3186 None 3187 }; 3188 3189 let memory_manager = self.memory_manager.clone(); 3190 3191 let vfio_pci_device = VfioPciDevice::new( 3192 vfio_name.clone(), 3193 &self.address_manager.vm, 3194 vfio_device, 3195 vfio_container, 3196 self.msi_interrupt_manager.clone(), 3197 legacy_interrupt_group, 3198 device_cfg.iommu, 3199 pci_device_bdf, 3200 Arc::new(move || memory_manager.lock().unwrap().allocate_memory_slot()), 3201 vm_migration::snapshot_from_id(self.snapshot.as_ref(), vfio_name.as_str()), 3202 ) 3203 .map_err(DeviceManagerError::VfioPciCreate)?; 3204 3205 let vfio_pci_device = Arc::new(Mutex::new(vfio_pci_device)); 3206 3207 let new_resources = self.add_pci_device( 3208 vfio_pci_device.clone(), 3209 vfio_pci_device.clone(), 3210 pci_segment_id, 3211 pci_device_bdf, 3212 resources, 3213 )?; 3214 3215 vfio_pci_device 3216 .lock() 3217 .unwrap() 3218 .map_mmio_regions() 3219 .map_err(DeviceManagerError::VfioMapRegion)?; 3220 3221 let mut node = device_node!(vfio_name, vfio_pci_device); 3222 3223 // Update the device tree with correct resource information. 3224 node.resources = new_resources; 3225 node.pci_bdf = Some(pci_device_bdf); 3226 node.pci_device_handle = Some(PciDeviceHandle::Vfio(vfio_pci_device)); 3227 3228 self.device_tree 3229 .lock() 3230 .unwrap() 3231 .insert(vfio_name.clone(), node); 3232 3233 Ok((pci_device_bdf, vfio_name)) 3234 } 3235 3236 fn add_pci_device( 3237 &mut self, 3238 bus_device: Arc<Mutex<dyn BusDevice>>, 3239 pci_device: Arc<Mutex<dyn PciDevice>>, 3240 segment_id: u16, 3241 bdf: PciBdf, 3242 resources: Option<Vec<Resource>>, 3243 ) -> DeviceManagerResult<Vec<Resource>> { 3244 let bars = pci_device 3245 .lock() 3246 .unwrap() 3247 .allocate_bars( 3248 &self.address_manager.allocator, 3249 &mut self.pci_segments[segment_id as usize] 3250 .allocator 3251 .lock() 3252 .unwrap(), 3253 resources, 3254 ) 3255 .map_err(DeviceManagerError::AllocateBars)?; 3256 3257 let mut pci_bus = self.pci_segments[segment_id as usize] 3258 .pci_bus 3259 .lock() 3260 .unwrap(); 3261 3262 pci_bus 3263 .add_device(bdf.device() as u32, pci_device) 3264 .map_err(DeviceManagerError::AddPciDevice)?; 3265 3266 self.bus_devices.push(Arc::clone(&bus_device)); 3267 3268 pci_bus 3269 .register_mapping( 3270 bus_device, 3271 #[cfg(target_arch = "x86_64")] 3272 self.address_manager.io_bus.as_ref(), 3273 self.address_manager.mmio_bus.as_ref(), 3274 bars.clone(), 3275 ) 3276 .map_err(DeviceManagerError::AddPciDevice)?; 3277 3278 let mut new_resources = Vec::new(); 3279 for bar in bars { 3280 new_resources.push(Resource::PciBar { 3281 index: bar.idx(), 3282 base: bar.addr(), 3283 size: bar.size(), 3284 type_: bar.region_type().into(), 3285 prefetchable: bar.prefetchable().into(), 3286 }); 3287 } 3288 3289 Ok(new_resources) 3290 } 3291 3292 fn add_vfio_devices(&mut self) -> DeviceManagerResult<Vec<PciBdf>> { 3293 let mut iommu_attached_device_ids = Vec::new(); 3294 let mut devices = self.config.lock().unwrap().devices.clone(); 3295 3296 if let Some(device_list_cfg) = &mut devices { 3297 for device_cfg in device_list_cfg.iter_mut() { 3298 let (device_id, _) = self.add_passthrough_device(device_cfg)?; 3299 if device_cfg.iommu && self.iommu_device.is_some() { 3300 iommu_attached_device_ids.push(device_id); 3301 } 3302 } 3303 } 3304 3305 // Update the list of devices 3306 self.config.lock().unwrap().devices = devices; 3307 3308 Ok(iommu_attached_device_ids) 3309 } 3310 3311 fn add_vfio_user_device( 3312 &mut self, 3313 device_cfg: &mut UserDeviceConfig, 3314 ) -> DeviceManagerResult<(PciBdf, String)> { 3315 let vfio_user_name = if let Some(id) = &device_cfg.id { 3316 id.clone() 3317 } else { 3318 let id = self.next_device_name(VFIO_USER_DEVICE_NAME_PREFIX)?; 3319 device_cfg.id = Some(id.clone()); 3320 id 3321 }; 3322 3323 let (pci_segment_id, pci_device_bdf, resources) = 3324 self.pci_resources(&vfio_user_name, device_cfg.pci_segment)?; 3325 3326 let legacy_interrupt_group = 3327 if let Some(legacy_interrupt_manager) = &self.legacy_interrupt_manager { 3328 Some( 3329 legacy_interrupt_manager 3330 .create_group(LegacyIrqGroupConfig { 3331 irq: self.pci_segments[pci_segment_id as usize].pci_irq_slots 3332 [pci_device_bdf.device() as usize] 3333 as InterruptIndex, 3334 }) 3335 .map_err(DeviceManagerError::CreateInterruptGroup)?, 3336 ) 3337 } else { 3338 None 3339 }; 3340 3341 let client = Arc::new(Mutex::new( 3342 vfio_user::Client::new(&device_cfg.socket) 3343 .map_err(DeviceManagerError::VfioUserCreateClient)?, 3344 )); 3345 3346 let memory_manager = self.memory_manager.clone(); 3347 3348 let mut vfio_user_pci_device = VfioUserPciDevice::new( 3349 vfio_user_name.clone(), 3350 &self.address_manager.vm, 3351 client.clone(), 3352 self.msi_interrupt_manager.clone(), 3353 legacy_interrupt_group, 3354 pci_device_bdf, 3355 Arc::new(move || memory_manager.lock().unwrap().allocate_memory_slot()), 3356 vm_migration::snapshot_from_id(self.snapshot.as_ref(), vfio_user_name.as_str()), 3357 ) 3358 .map_err(DeviceManagerError::VfioUserCreate)?; 3359 3360 let memory = self.memory_manager.lock().unwrap().guest_memory(); 3361 let vfio_user_mapping = Arc::new(VfioUserDmaMapping::new(client, Arc::new(memory))); 3362 for virtio_mem_device in self.virtio_mem_devices.iter() { 3363 virtio_mem_device 3364 .lock() 3365 .unwrap() 3366 .add_dma_mapping_handler( 3367 VirtioMemMappingSource::Device(pci_device_bdf.into()), 3368 vfio_user_mapping.clone(), 3369 ) 3370 .map_err(DeviceManagerError::AddDmaMappingHandlerVirtioMem)?; 3371 } 3372 3373 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() { 3374 for region in zone.regions() { 3375 vfio_user_pci_device 3376 .dma_map(region) 3377 .map_err(DeviceManagerError::VfioUserDmaMap)?; 3378 } 3379 } 3380 3381 let vfio_user_pci_device = Arc::new(Mutex::new(vfio_user_pci_device)); 3382 3383 let new_resources = self.add_pci_device( 3384 vfio_user_pci_device.clone(), 3385 vfio_user_pci_device.clone(), 3386 pci_segment_id, 3387 pci_device_bdf, 3388 resources, 3389 )?; 3390 3391 // Note it is required to call 'add_pci_device()' in advance to have the list of 3392 // mmio regions provisioned correctly 3393 vfio_user_pci_device 3394 .lock() 3395 .unwrap() 3396 .map_mmio_regions() 3397 .map_err(DeviceManagerError::VfioUserMapRegion)?; 3398 3399 let mut node = device_node!(vfio_user_name, vfio_user_pci_device); 3400 3401 // Update the device tree with correct resource information. 3402 node.resources = new_resources; 3403 node.pci_bdf = Some(pci_device_bdf); 3404 node.pci_device_handle = Some(PciDeviceHandle::VfioUser(vfio_user_pci_device)); 3405 3406 self.device_tree 3407 .lock() 3408 .unwrap() 3409 .insert(vfio_user_name.clone(), node); 3410 3411 Ok((pci_device_bdf, vfio_user_name)) 3412 } 3413 3414 fn add_user_devices(&mut self) -> DeviceManagerResult<Vec<PciBdf>> { 3415 let mut user_devices = self.config.lock().unwrap().user_devices.clone(); 3416 3417 if let Some(device_list_cfg) = &mut user_devices { 3418 for device_cfg in device_list_cfg.iter_mut() { 3419 let (_device_id, _id) = self.add_vfio_user_device(device_cfg)?; 3420 } 3421 } 3422 3423 // Update the list of devices 3424 self.config.lock().unwrap().user_devices = user_devices; 3425 3426 Ok(vec![]) 3427 } 3428 3429 fn add_virtio_pci_device( 3430 &mut self, 3431 virtio_device: Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 3432 iommu_mapping: &Option<Arc<IommuMapping>>, 3433 virtio_device_id: String, 3434 pci_segment_id: u16, 3435 dma_handler: Option<Arc<dyn ExternalDmaMapping>>, 3436 ) -> DeviceManagerResult<PciBdf> { 3437 let id = format!("{VIRTIO_PCI_DEVICE_NAME_PREFIX}-{virtio_device_id}"); 3438 3439 // Add the new virtio-pci node to the device tree. 3440 let mut node = device_node!(id); 3441 node.children = vec![virtio_device_id.clone()]; 3442 3443 let (pci_segment_id, pci_device_bdf, resources) = 3444 self.pci_resources(&id, pci_segment_id)?; 3445 3446 // Update the existing virtio node by setting the parent. 3447 if let Some(node) = self.device_tree.lock().unwrap().get_mut(&virtio_device_id) { 3448 node.parent = Some(id.clone()); 3449 } else { 3450 return Err(DeviceManagerError::MissingNode); 3451 } 3452 3453 // Allows support for one MSI-X vector per queue. It also adds 1 3454 // as we need to take into account the dedicated vector to notify 3455 // about a virtio config change. 3456 let msix_num = (virtio_device.lock().unwrap().queue_max_sizes().len() + 1) as u16; 3457 3458 // Create the AccessPlatform trait from the implementation IommuMapping. 3459 // This will provide address translation for any virtio device sitting 3460 // behind a vIOMMU. 3461 let access_platform: Option<Arc<dyn AccessPlatform>> = if let Some(mapping) = iommu_mapping 3462 { 3463 Some(Arc::new(AccessPlatformMapping::new( 3464 pci_device_bdf.into(), 3465 mapping.clone(), 3466 ))) 3467 } else { 3468 None 3469 }; 3470 3471 let memory = self.memory_manager.lock().unwrap().guest_memory(); 3472 3473 // Map DMA ranges if a DMA handler is available and if the device is 3474 // not attached to a virtual IOMMU. 3475 if let Some(dma_handler) = &dma_handler { 3476 if iommu_mapping.is_some() { 3477 if let Some(iommu) = &self.iommu_device { 3478 iommu 3479 .lock() 3480 .unwrap() 3481 .add_external_mapping(pci_device_bdf.into(), dma_handler.clone()); 3482 } else { 3483 return Err(DeviceManagerError::MissingVirtualIommu); 3484 } 3485 } else { 3486 // Let every virtio-mem device handle the DMA map/unmap through the 3487 // DMA handler provided. 3488 for virtio_mem_device in self.virtio_mem_devices.iter() { 3489 virtio_mem_device 3490 .lock() 3491 .unwrap() 3492 .add_dma_mapping_handler( 3493 VirtioMemMappingSource::Device(pci_device_bdf.into()), 3494 dma_handler.clone(), 3495 ) 3496 .map_err(DeviceManagerError::AddDmaMappingHandlerVirtioMem)?; 3497 } 3498 3499 // Do not register virtio-mem regions, as they are handled directly by 3500 // virtio-mem devices. 3501 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() { 3502 for region in zone.regions() { 3503 let gpa = region.start_addr().0; 3504 let size = region.len(); 3505 dma_handler 3506 .map(gpa, gpa, size) 3507 .map_err(DeviceManagerError::VirtioDmaMap)?; 3508 } 3509 } 3510 } 3511 } 3512 3513 let device_type = virtio_device.lock().unwrap().device_type(); 3514 let virtio_pci_device = Arc::new(Mutex::new( 3515 VirtioPciDevice::new( 3516 id.clone(), 3517 memory, 3518 virtio_device, 3519 msix_num, 3520 access_platform, 3521 &self.msi_interrupt_manager, 3522 pci_device_bdf.into(), 3523 self.activate_evt 3524 .try_clone() 3525 .map_err(DeviceManagerError::EventFd)?, 3526 // All device types *except* virtio block devices should be allocated a 64-bit bar 3527 // The block devices should be given a 32-bit BAR so that they are easily accessible 3528 // to firmware without requiring excessive identity mapping. 3529 // The exception being if not on the default PCI segment. 3530 pci_segment_id > 0 || device_type != VirtioDeviceType::Block as u32, 3531 dma_handler, 3532 self.pending_activations.clone(), 3533 vm_migration::snapshot_from_id(self.snapshot.as_ref(), id.as_str()), 3534 ) 3535 .map_err(DeviceManagerError::VirtioDevice)?, 3536 )); 3537 3538 let new_resources = self.add_pci_device( 3539 virtio_pci_device.clone(), 3540 virtio_pci_device.clone(), 3541 pci_segment_id, 3542 pci_device_bdf, 3543 resources, 3544 )?; 3545 3546 let bar_addr = virtio_pci_device.lock().unwrap().config_bar_addr(); 3547 for (event, addr) in virtio_pci_device.lock().unwrap().ioeventfds(bar_addr) { 3548 let io_addr = IoEventAddress::Mmio(addr); 3549 self.address_manager 3550 .vm 3551 .register_ioevent(event, &io_addr, None) 3552 .map_err(|e| DeviceManagerError::RegisterIoevent(e.into()))?; 3553 } 3554 3555 // Update the device tree with correct resource information. 3556 node.resources = new_resources; 3557 node.migratable = Some(Arc::clone(&virtio_pci_device) as Arc<Mutex<dyn Migratable>>); 3558 node.pci_bdf = Some(pci_device_bdf); 3559 node.pci_device_handle = Some(PciDeviceHandle::Virtio(virtio_pci_device)); 3560 self.device_tree.lock().unwrap().insert(id, node); 3561 3562 Ok(pci_device_bdf) 3563 } 3564 3565 fn pci_resources( 3566 &self, 3567 id: &str, 3568 pci_segment_id: u16, 3569 ) -> DeviceManagerResult<(u16, PciBdf, Option<Vec<Resource>>)> { 3570 // Look for the id in the device tree. If it can be found, that means 3571 // the device is being restored, otherwise it's created from scratch. 3572 Ok( 3573 if let Some(node) = self.device_tree.lock().unwrap().get(id) { 3574 info!("Restoring virtio-pci {} resources", id); 3575 let pci_device_bdf: PciBdf = node 3576 .pci_bdf 3577 .ok_or(DeviceManagerError::MissingDeviceNodePciBdf)?; 3578 let pci_segment_id = pci_device_bdf.segment(); 3579 3580 self.pci_segments[pci_segment_id as usize] 3581 .pci_bus 3582 .lock() 3583 .unwrap() 3584 .get_device_id(pci_device_bdf.device() as usize) 3585 .map_err(DeviceManagerError::GetPciDeviceId)?; 3586 3587 (pci_segment_id, pci_device_bdf, Some(node.resources.clone())) 3588 } else { 3589 let pci_device_bdf = 3590 self.pci_segments[pci_segment_id as usize].next_device_bdf()?; 3591 3592 (pci_segment_id, pci_device_bdf, None) 3593 }, 3594 ) 3595 } 3596 3597 #[cfg(target_arch = "x86_64")] 3598 pub fn io_bus(&self) -> &Arc<Bus> { 3599 &self.address_manager.io_bus 3600 } 3601 3602 pub fn mmio_bus(&self) -> &Arc<Bus> { 3603 &self.address_manager.mmio_bus 3604 } 3605 3606 pub fn allocator(&self) -> &Arc<Mutex<SystemAllocator>> { 3607 &self.address_manager.allocator 3608 } 3609 3610 pub fn interrupt_controller(&self) -> Option<Arc<Mutex<dyn InterruptController>>> { 3611 self.interrupt_controller 3612 .as_ref() 3613 .map(|ic| ic.clone() as Arc<Mutex<dyn InterruptController>>) 3614 } 3615 3616 pub(crate) fn pci_segments(&self) -> &Vec<PciSegment> { 3617 &self.pci_segments 3618 } 3619 3620 pub fn console(&self) -> &Arc<Console> { 3621 &self.console 3622 } 3623 3624 #[cfg(target_arch = "aarch64")] 3625 pub fn cmdline_additions(&self) -> &[String] { 3626 self.cmdline_additions.as_slice() 3627 } 3628 3629 pub fn update_memory(&self, new_region: &Arc<GuestRegionMmap>) -> DeviceManagerResult<()> { 3630 for handle in self.virtio_devices.iter() { 3631 handle 3632 .virtio_device 3633 .lock() 3634 .unwrap() 3635 .add_memory_region(new_region) 3636 .map_err(DeviceManagerError::UpdateMemoryForVirtioDevice)?; 3637 3638 if let Some(dma_handler) = &handle.dma_handler { 3639 if !handle.iommu { 3640 let gpa = new_region.start_addr().0; 3641 let size = new_region.len(); 3642 dma_handler 3643 .map(gpa, gpa, size) 3644 .map_err(DeviceManagerError::VirtioDmaMap)?; 3645 } 3646 } 3647 } 3648 3649 // Take care of updating the memory for VFIO PCI devices. 3650 if let Some(vfio_container) = &self.vfio_container { 3651 vfio_container 3652 .vfio_dma_map( 3653 new_region.start_addr().raw_value(), 3654 new_region.len(), 3655 new_region.as_ptr() as u64, 3656 ) 3657 .map_err(DeviceManagerError::UpdateMemoryForVfioPciDevice)?; 3658 } 3659 3660 // Take care of updating the memory for vfio-user devices. 3661 { 3662 let device_tree = self.device_tree.lock().unwrap(); 3663 for pci_device_node in device_tree.pci_devices() { 3664 if let PciDeviceHandle::VfioUser(vfio_user_pci_device) = pci_device_node 3665 .pci_device_handle 3666 .as_ref() 3667 .ok_or(DeviceManagerError::MissingPciDevice)? 3668 { 3669 vfio_user_pci_device 3670 .lock() 3671 .unwrap() 3672 .dma_map(new_region) 3673 .map_err(DeviceManagerError::UpdateMemoryForVfioUserPciDevice)?; 3674 } 3675 } 3676 } 3677 3678 Ok(()) 3679 } 3680 3681 pub fn activate_virtio_devices(&self) -> DeviceManagerResult<()> { 3682 for mut activator in self.pending_activations.lock().unwrap().drain(..) { 3683 activator 3684 .activate() 3685 .map_err(DeviceManagerError::VirtioActivate)?; 3686 } 3687 Ok(()) 3688 } 3689 3690 pub fn notify_hotplug( 3691 &self, 3692 _notification_type: AcpiNotificationFlags, 3693 ) -> DeviceManagerResult<()> { 3694 return self 3695 .ged_notification_device 3696 .as_ref() 3697 .unwrap() 3698 .lock() 3699 .unwrap() 3700 .notify(_notification_type) 3701 .map_err(DeviceManagerError::HotPlugNotification); 3702 } 3703 3704 pub fn add_device( 3705 &mut self, 3706 device_cfg: &mut DeviceConfig, 3707 ) -> DeviceManagerResult<PciDeviceInfo> { 3708 self.validate_identifier(&device_cfg.id)?; 3709 3710 if device_cfg.iommu && !self.is_iommu_segment(device_cfg.pci_segment) { 3711 return Err(DeviceManagerError::InvalidIommuHotplug); 3712 } 3713 3714 let (bdf, device_name) = self.add_passthrough_device(device_cfg)?; 3715 3716 // Update the PCIU bitmap 3717 self.pci_segments[device_cfg.pci_segment as usize].pci_devices_up |= 1 << bdf.device(); 3718 3719 Ok(PciDeviceInfo { 3720 id: device_name, 3721 bdf, 3722 }) 3723 } 3724 3725 pub fn add_user_device( 3726 &mut self, 3727 device_cfg: &mut UserDeviceConfig, 3728 ) -> DeviceManagerResult<PciDeviceInfo> { 3729 self.validate_identifier(&device_cfg.id)?; 3730 3731 let (bdf, device_name) = self.add_vfio_user_device(device_cfg)?; 3732 3733 // Update the PCIU bitmap 3734 self.pci_segments[device_cfg.pci_segment as usize].pci_devices_up |= 1 << bdf.device(); 3735 3736 Ok(PciDeviceInfo { 3737 id: device_name, 3738 bdf, 3739 }) 3740 } 3741 3742 pub fn remove_device(&mut self, id: String) -> DeviceManagerResult<()> { 3743 // The node can be directly a PCI node in case the 'id' refers to a 3744 // VFIO device or a virtio-pci one. 3745 // In case the 'id' refers to a virtio device, we must find the PCI 3746 // node by looking at the parent. 3747 let device_tree = self.device_tree.lock().unwrap(); 3748 let node = device_tree 3749 .get(&id) 3750 .ok_or(DeviceManagerError::UnknownDeviceId(id))?; 3751 3752 let pci_device_node = if node.pci_bdf.is_some() && node.pci_device_handle.is_some() { 3753 node 3754 } else { 3755 let parent = node 3756 .parent 3757 .as_ref() 3758 .ok_or(DeviceManagerError::MissingNode)?; 3759 device_tree 3760 .get(parent) 3761 .ok_or(DeviceManagerError::MissingNode)? 3762 }; 3763 3764 let pci_device_bdf: PciBdf = pci_device_node 3765 .pci_bdf 3766 .ok_or(DeviceManagerError::MissingDeviceNodePciBdf)?; 3767 let pci_segment_id = pci_device_bdf.segment(); 3768 3769 let pci_device_handle = pci_device_node 3770 .pci_device_handle 3771 .as_ref() 3772 .ok_or(DeviceManagerError::MissingPciDevice)?; 3773 #[allow(irrefutable_let_patterns)] 3774 if let PciDeviceHandle::Virtio(virtio_pci_device) = pci_device_handle { 3775 let device_type = VirtioDeviceType::from( 3776 virtio_pci_device 3777 .lock() 3778 .unwrap() 3779 .virtio_device() 3780 .lock() 3781 .unwrap() 3782 .device_type(), 3783 ); 3784 match device_type { 3785 VirtioDeviceType::Net 3786 | VirtioDeviceType::Block 3787 | VirtioDeviceType::Pmem 3788 | VirtioDeviceType::Fs 3789 | VirtioDeviceType::Vsock => {} 3790 _ => return Err(DeviceManagerError::RemovalNotAllowed(device_type)), 3791 } 3792 } 3793 3794 // Update the PCID bitmap 3795 self.pci_segments[pci_segment_id as usize].pci_devices_down |= 1 << pci_device_bdf.device(); 3796 3797 Ok(()) 3798 } 3799 3800 pub fn eject_device(&mut self, pci_segment_id: u16, device_id: u8) -> DeviceManagerResult<()> { 3801 info!( 3802 "Ejecting device_id = {} on segment_id={}", 3803 device_id, pci_segment_id 3804 ); 3805 3806 // Convert the device ID into the corresponding b/d/f. 3807 let pci_device_bdf = PciBdf::new(pci_segment_id, 0, device_id, 0); 3808 3809 // Give the PCI device ID back to the PCI bus. 3810 self.pci_segments[pci_segment_id as usize] 3811 .pci_bus 3812 .lock() 3813 .unwrap() 3814 .put_device_id(device_id as usize) 3815 .map_err(DeviceManagerError::PutPciDeviceId)?; 3816 3817 // Remove the device from the device tree along with its children. 3818 let mut device_tree = self.device_tree.lock().unwrap(); 3819 let pci_device_node = device_tree 3820 .remove_node_by_pci_bdf(pci_device_bdf) 3821 .ok_or(DeviceManagerError::MissingPciDevice)?; 3822 3823 // For VFIO and vfio-user the PCI device id is the id. 3824 // For virtio we overwrite it later as we want the id of the 3825 // underlying device. 3826 let mut id = pci_device_node.id; 3827 let pci_device_handle = pci_device_node 3828 .pci_device_handle 3829 .ok_or(DeviceManagerError::MissingPciDevice)?; 3830 if matches!(pci_device_handle, PciDeviceHandle::Virtio(_)) { 3831 // The virtio-pci device has a single child 3832 if !pci_device_node.children.is_empty() { 3833 assert_eq!(pci_device_node.children.len(), 1); 3834 let child_id = &pci_device_node.children[0]; 3835 id = child_id.clone(); 3836 } 3837 } 3838 for child in pci_device_node.children.iter() { 3839 device_tree.remove(child); 3840 } 3841 3842 let mut iommu_attached = false; 3843 if let Some((_, iommu_attached_devices)) = &self.iommu_attached_devices { 3844 if iommu_attached_devices.contains(&pci_device_bdf) { 3845 iommu_attached = true; 3846 } 3847 } 3848 3849 let (pci_device, bus_device, virtio_device, remove_dma_handler) = match pci_device_handle { 3850 // No need to remove any virtio-mem mapping here as the container outlives all devices 3851 PciDeviceHandle::Vfio(vfio_pci_device) => ( 3852 Arc::clone(&vfio_pci_device) as Arc<Mutex<dyn PciDevice>>, 3853 Arc::clone(&vfio_pci_device) as Arc<Mutex<dyn BusDevice>>, 3854 None as Option<Arc<Mutex<dyn virtio_devices::VirtioDevice>>>, 3855 false, 3856 ), 3857 PciDeviceHandle::Virtio(virtio_pci_device) => { 3858 let dev = virtio_pci_device.lock().unwrap(); 3859 let bar_addr = dev.config_bar_addr(); 3860 for (event, addr) in dev.ioeventfds(bar_addr) { 3861 let io_addr = IoEventAddress::Mmio(addr); 3862 self.address_manager 3863 .vm 3864 .unregister_ioevent(event, &io_addr) 3865 .map_err(|e| DeviceManagerError::UnRegisterIoevent(e.into()))?; 3866 } 3867 3868 if let Some(dma_handler) = dev.dma_handler() { 3869 if !iommu_attached { 3870 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() { 3871 for region in zone.regions() { 3872 let iova = region.start_addr().0; 3873 let size = region.len(); 3874 dma_handler 3875 .unmap(iova, size) 3876 .map_err(DeviceManagerError::VirtioDmaUnmap)?; 3877 } 3878 } 3879 } 3880 } 3881 3882 ( 3883 Arc::clone(&virtio_pci_device) as Arc<Mutex<dyn PciDevice>>, 3884 Arc::clone(&virtio_pci_device) as Arc<Mutex<dyn BusDevice>>, 3885 Some(dev.virtio_device()), 3886 dev.dma_handler().is_some() && !iommu_attached, 3887 ) 3888 } 3889 PciDeviceHandle::VfioUser(vfio_user_pci_device) => { 3890 let mut dev = vfio_user_pci_device.lock().unwrap(); 3891 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() { 3892 for region in zone.regions() { 3893 dev.dma_unmap(region) 3894 .map_err(DeviceManagerError::VfioUserDmaUnmap)?; 3895 } 3896 } 3897 3898 ( 3899 Arc::clone(&vfio_user_pci_device) as Arc<Mutex<dyn PciDevice>>, 3900 Arc::clone(&vfio_user_pci_device) as Arc<Mutex<dyn BusDevice>>, 3901 None as Option<Arc<Mutex<dyn virtio_devices::VirtioDevice>>>, 3902 true, 3903 ) 3904 } 3905 }; 3906 3907 if remove_dma_handler { 3908 for virtio_mem_device in self.virtio_mem_devices.iter() { 3909 virtio_mem_device 3910 .lock() 3911 .unwrap() 3912 .remove_dma_mapping_handler(VirtioMemMappingSource::Device( 3913 pci_device_bdf.into(), 3914 )) 3915 .map_err(DeviceManagerError::RemoveDmaMappingHandlerVirtioMem)?; 3916 } 3917 } 3918 3919 // Free the allocated BARs 3920 pci_device 3921 .lock() 3922 .unwrap() 3923 .free_bars( 3924 &mut self.address_manager.allocator.lock().unwrap(), 3925 &mut self.pci_segments[pci_segment_id as usize] 3926 .allocator 3927 .lock() 3928 .unwrap(), 3929 ) 3930 .map_err(DeviceManagerError::FreePciBars)?; 3931 3932 // Remove the device from the PCI bus 3933 self.pci_segments[pci_segment_id as usize] 3934 .pci_bus 3935 .lock() 3936 .unwrap() 3937 .remove_by_device(&pci_device) 3938 .map_err(DeviceManagerError::RemoveDeviceFromPciBus)?; 3939 3940 #[cfg(target_arch = "x86_64")] 3941 // Remove the device from the IO bus 3942 self.io_bus() 3943 .remove_by_device(&bus_device) 3944 .map_err(DeviceManagerError::RemoveDeviceFromIoBus)?; 3945 3946 // Remove the device from the MMIO bus 3947 self.mmio_bus() 3948 .remove_by_device(&bus_device) 3949 .map_err(DeviceManagerError::RemoveDeviceFromMmioBus)?; 3950 3951 // Remove the device from the list of BusDevice held by the 3952 // DeviceManager. 3953 self.bus_devices 3954 .retain(|dev| !Arc::ptr_eq(dev, &bus_device)); 3955 3956 // Shutdown and remove the underlying virtio-device if present 3957 if let Some(virtio_device) = virtio_device { 3958 for mapping in virtio_device.lock().unwrap().userspace_mappings() { 3959 self.memory_manager 3960 .lock() 3961 .unwrap() 3962 .remove_userspace_mapping( 3963 mapping.addr.raw_value(), 3964 mapping.len, 3965 mapping.host_addr, 3966 mapping.mergeable, 3967 mapping.mem_slot, 3968 ) 3969 .map_err(DeviceManagerError::MemoryManager)?; 3970 } 3971 3972 virtio_device.lock().unwrap().shutdown(); 3973 3974 self.virtio_devices 3975 .retain(|handler| !Arc::ptr_eq(&handler.virtio_device, &virtio_device)); 3976 } 3977 3978 event!( 3979 "vm", 3980 "device-removed", 3981 "id", 3982 &id, 3983 "bdf", 3984 pci_device_bdf.to_string() 3985 ); 3986 3987 // At this point, the device has been removed from all the list and 3988 // buses where it was stored. At the end of this function, after 3989 // any_device, bus_device and pci_device are released, the actual 3990 // device will be dropped. 3991 Ok(()) 3992 } 3993 3994 fn hotplug_virtio_pci_device( 3995 &mut self, 3996 handle: MetaVirtioDevice, 3997 ) -> DeviceManagerResult<PciDeviceInfo> { 3998 // Add the virtio device to the device manager list. This is important 3999 // as the list is used to notify virtio devices about memory updates 4000 // for instance. 4001 self.virtio_devices.push(handle.clone()); 4002 4003 let mapping: Option<Arc<IommuMapping>> = if handle.iommu { 4004 self.iommu_mapping.clone() 4005 } else { 4006 None 4007 }; 4008 4009 let bdf = self.add_virtio_pci_device( 4010 handle.virtio_device, 4011 &mapping, 4012 handle.id.clone(), 4013 handle.pci_segment, 4014 handle.dma_handler, 4015 )?; 4016 4017 // Update the PCIU bitmap 4018 self.pci_segments[handle.pci_segment as usize].pci_devices_up |= 1 << bdf.device(); 4019 4020 Ok(PciDeviceInfo { id: handle.id, bdf }) 4021 } 4022 4023 fn is_iommu_segment(&self, pci_segment_id: u16) -> bool { 4024 self.config 4025 .lock() 4026 .as_ref() 4027 .unwrap() 4028 .platform 4029 .as_ref() 4030 .map(|pc| { 4031 pc.iommu_segments 4032 .as_ref() 4033 .map(|v| v.contains(&pci_segment_id)) 4034 .unwrap_or_default() 4035 }) 4036 .unwrap_or_default() 4037 } 4038 4039 pub fn add_disk(&mut self, disk_cfg: &mut DiskConfig) -> DeviceManagerResult<PciDeviceInfo> { 4040 self.validate_identifier(&disk_cfg.id)?; 4041 4042 if disk_cfg.iommu && !self.is_iommu_segment(disk_cfg.pci_segment) { 4043 return Err(DeviceManagerError::InvalidIommuHotplug); 4044 } 4045 4046 let device = self.make_virtio_block_device(disk_cfg)?; 4047 self.hotplug_virtio_pci_device(device) 4048 } 4049 4050 pub fn add_fs(&mut self, fs_cfg: &mut FsConfig) -> DeviceManagerResult<PciDeviceInfo> { 4051 self.validate_identifier(&fs_cfg.id)?; 4052 4053 let device = self.make_virtio_fs_device(fs_cfg)?; 4054 self.hotplug_virtio_pci_device(device) 4055 } 4056 4057 pub fn add_pmem(&mut self, pmem_cfg: &mut PmemConfig) -> DeviceManagerResult<PciDeviceInfo> { 4058 self.validate_identifier(&pmem_cfg.id)?; 4059 4060 if pmem_cfg.iommu && !self.is_iommu_segment(pmem_cfg.pci_segment) { 4061 return Err(DeviceManagerError::InvalidIommuHotplug); 4062 } 4063 4064 let device = self.make_virtio_pmem_device(pmem_cfg)?; 4065 self.hotplug_virtio_pci_device(device) 4066 } 4067 4068 pub fn add_net(&mut self, net_cfg: &mut NetConfig) -> DeviceManagerResult<PciDeviceInfo> { 4069 self.validate_identifier(&net_cfg.id)?; 4070 4071 if net_cfg.iommu && !self.is_iommu_segment(net_cfg.pci_segment) { 4072 return Err(DeviceManagerError::InvalidIommuHotplug); 4073 } 4074 4075 let device = self.make_virtio_net_device(net_cfg)?; 4076 self.hotplug_virtio_pci_device(device) 4077 } 4078 4079 pub fn add_vdpa(&mut self, vdpa_cfg: &mut VdpaConfig) -> DeviceManagerResult<PciDeviceInfo> { 4080 self.validate_identifier(&vdpa_cfg.id)?; 4081 4082 if vdpa_cfg.iommu && !self.is_iommu_segment(vdpa_cfg.pci_segment) { 4083 return Err(DeviceManagerError::InvalidIommuHotplug); 4084 } 4085 4086 let device = self.make_vdpa_device(vdpa_cfg)?; 4087 self.hotplug_virtio_pci_device(device) 4088 } 4089 4090 pub fn add_vsock(&mut self, vsock_cfg: &mut VsockConfig) -> DeviceManagerResult<PciDeviceInfo> { 4091 self.validate_identifier(&vsock_cfg.id)?; 4092 4093 if vsock_cfg.iommu && !self.is_iommu_segment(vsock_cfg.pci_segment) { 4094 return Err(DeviceManagerError::InvalidIommuHotplug); 4095 } 4096 4097 let device = self.make_virtio_vsock_device(vsock_cfg)?; 4098 self.hotplug_virtio_pci_device(device) 4099 } 4100 4101 pub fn counters(&self) -> HashMap<String, HashMap<&'static str, Wrapping<u64>>> { 4102 let mut counters = HashMap::new(); 4103 4104 for handle in &self.virtio_devices { 4105 let virtio_device = handle.virtio_device.lock().unwrap(); 4106 if let Some(device_counters) = virtio_device.counters() { 4107 counters.insert(handle.id.clone(), device_counters.clone()); 4108 } 4109 } 4110 4111 counters 4112 } 4113 4114 pub fn resize_balloon(&mut self, size: u64) -> DeviceManagerResult<()> { 4115 if let Some(balloon) = &self.balloon { 4116 return balloon 4117 .lock() 4118 .unwrap() 4119 .resize(size) 4120 .map_err(DeviceManagerError::VirtioBalloonResize); 4121 } 4122 4123 warn!("No balloon setup: Can't resize the balloon"); 4124 Err(DeviceManagerError::MissingVirtioBalloon) 4125 } 4126 4127 pub fn balloon_size(&self) -> u64 { 4128 if let Some(balloon) = &self.balloon { 4129 return balloon.lock().unwrap().get_actual(); 4130 } 4131 4132 0 4133 } 4134 4135 pub fn device_tree(&self) -> Arc<Mutex<DeviceTree>> { 4136 self.device_tree.clone() 4137 } 4138 4139 #[cfg(target_arch = "x86_64")] 4140 pub fn notify_power_button(&self) -> DeviceManagerResult<()> { 4141 self.ged_notification_device 4142 .as_ref() 4143 .unwrap() 4144 .lock() 4145 .unwrap() 4146 .notify(AcpiNotificationFlags::POWER_BUTTON_CHANGED) 4147 .map_err(DeviceManagerError::PowerButtonNotification) 4148 } 4149 4150 #[cfg(target_arch = "aarch64")] 4151 pub fn notify_power_button(&self) -> DeviceManagerResult<()> { 4152 // There are two use cases: 4153 // 1. Users will use direct kernel boot with device tree. 4154 // 2. Users will use ACPI+UEFI boot. 4155 4156 // Trigger a GPIO pin 3 event to satisify use case 1. 4157 self.gpio_device 4158 .as_ref() 4159 .unwrap() 4160 .lock() 4161 .unwrap() 4162 .trigger_key(3) 4163 .map_err(DeviceManagerError::AArch64PowerButtonNotification)?; 4164 // Trigger a GED power button event to satisify use case 2. 4165 return self 4166 .ged_notification_device 4167 .as_ref() 4168 .unwrap() 4169 .lock() 4170 .unwrap() 4171 .notify(AcpiNotificationFlags::POWER_BUTTON_CHANGED) 4172 .map_err(DeviceManagerError::PowerButtonNotification); 4173 } 4174 4175 pub fn iommu_attached_devices(&self) -> &Option<(PciBdf, Vec<PciBdf>)> { 4176 &self.iommu_attached_devices 4177 } 4178 4179 fn validate_identifier(&self, id: &Option<String>) -> DeviceManagerResult<()> { 4180 if let Some(id) = id { 4181 if id.starts_with("__") { 4182 return Err(DeviceManagerError::InvalidIdentifier(id.clone())); 4183 } 4184 4185 if self.device_tree.lock().unwrap().contains_key(id) { 4186 return Err(DeviceManagerError::IdentifierNotUnique(id.clone())); 4187 } 4188 } 4189 4190 Ok(()) 4191 } 4192 4193 pub(crate) fn acpi_platform_addresses(&self) -> &AcpiPlatformAddresses { 4194 &self.acpi_platform_addresses 4195 } 4196 } 4197 4198 fn numa_node_id_from_memory_zone_id(numa_nodes: &NumaNodes, memory_zone_id: &str) -> Option<u32> { 4199 for (numa_node_id, numa_node) in numa_nodes.iter() { 4200 if numa_node.memory_zones.contains(&memory_zone_id.to_owned()) { 4201 return Some(*numa_node_id); 4202 } 4203 } 4204 4205 None 4206 } 4207 4208 struct TpmDevice {} 4209 4210 impl Aml for TpmDevice { 4211 fn to_aml_bytes(&self) -> Vec<u8> { 4212 aml::Device::new( 4213 "TPM2".into(), 4214 vec![ 4215 &aml::Name::new("_HID".into(), &"MSFT0101"), 4216 &aml::Name::new("_STA".into(), &(0xF_usize)), 4217 &aml::Name::new( 4218 "_CRS".into(), 4219 &aml::ResourceTemplate::new(vec![&aml::Memory32Fixed::new( 4220 true, 4221 layout::TPM_START.0 as u32, 4222 layout::TPM_SIZE as u32, 4223 )]), 4224 ), 4225 ], 4226 ) 4227 .to_aml_bytes() 4228 } 4229 } 4230 4231 impl Aml for DeviceManager { 4232 fn append_aml_bytes(&self, bytes: &mut Vec<u8>) { 4233 #[cfg(target_arch = "aarch64")] 4234 use arch::aarch64::DeviceInfoForFdt; 4235 4236 let mut pci_scan_methods = Vec::new(); 4237 for i in 0..self.pci_segments.len() { 4238 pci_scan_methods.push(aml::MethodCall::new( 4239 format!("\\_SB_.PCI{i:X}.PCNT").as_str().into(), 4240 vec![], 4241 )); 4242 } 4243 let mut pci_scan_inner: Vec<&dyn Aml> = Vec::new(); 4244 for method in &pci_scan_methods { 4245 pci_scan_inner.push(method) 4246 } 4247 4248 // PCI hotplug controller 4249 aml::Device::new( 4250 "_SB_.PHPR".into(), 4251 vec![ 4252 &aml::Name::new("_HID".into(), &aml::EisaName::new("PNP0A06")), 4253 &aml::Name::new("_STA".into(), &0x0bu8), 4254 &aml::Name::new("_UID".into(), &"PCI Hotplug Controller"), 4255 &aml::Mutex::new("BLCK".into(), 0), 4256 &aml::Name::new( 4257 "_CRS".into(), 4258 &aml::ResourceTemplate::new(vec![&aml::AddressSpace::new_memory( 4259 aml::AddressSpaceCachable::NotCacheable, 4260 true, 4261 self.acpi_address.0, 4262 self.acpi_address.0 + DEVICE_MANAGER_ACPI_SIZE as u64 - 1, 4263 )]), 4264 ), 4265 // OpRegion and Fields map MMIO range into individual field values 4266 &aml::OpRegion::new( 4267 "PCST".into(), 4268 aml::OpRegionSpace::SystemMemory, 4269 self.acpi_address.0 as usize, 4270 DEVICE_MANAGER_ACPI_SIZE, 4271 ), 4272 &aml::Field::new( 4273 "PCST".into(), 4274 aml::FieldAccessType::DWord, 4275 aml::FieldUpdateRule::WriteAsZeroes, 4276 vec![ 4277 aml::FieldEntry::Named(*b"PCIU", 32), 4278 aml::FieldEntry::Named(*b"PCID", 32), 4279 aml::FieldEntry::Named(*b"B0EJ", 32), 4280 aml::FieldEntry::Named(*b"PSEG", 32), 4281 ], 4282 ), 4283 &aml::Method::new( 4284 "PCEJ".into(), 4285 2, 4286 true, 4287 vec![ 4288 // Take lock defined above 4289 &aml::Acquire::new("BLCK".into(), 0xffff), 4290 // Choose the current segment 4291 &aml::Store::new(&aml::Path::new("PSEG"), &aml::Arg(1)), 4292 // Write PCI bus number (in first argument) to I/O port via field 4293 &aml::ShiftLeft::new(&aml::Path::new("B0EJ"), &aml::ONE, &aml::Arg(0)), 4294 // Release lock 4295 &aml::Release::new("BLCK".into()), 4296 // Return 0 4297 &aml::Return::new(&aml::ZERO), 4298 ], 4299 ), 4300 &aml::Method::new("PSCN".into(), 0, true, pci_scan_inner), 4301 ], 4302 ) 4303 .append_aml_bytes(bytes); 4304 4305 for segment in &self.pci_segments { 4306 segment.append_aml_bytes(bytes); 4307 } 4308 4309 let mut mbrd_memory = Vec::new(); 4310 4311 for segment in &self.pci_segments { 4312 mbrd_memory.push(aml::Memory32Fixed::new( 4313 true, 4314 segment.mmio_config_address as u32, 4315 layout::PCI_MMIO_CONFIG_SIZE_PER_SEGMENT as u32, 4316 )) 4317 } 4318 4319 let mut mbrd_memory_refs = Vec::new(); 4320 for mbrd_memory_ref in &mbrd_memory { 4321 mbrd_memory_refs.push(mbrd_memory_ref as &dyn Aml); 4322 } 4323 4324 aml::Device::new( 4325 "_SB_.MBRD".into(), 4326 vec![ 4327 &aml::Name::new("_HID".into(), &aml::EisaName::new("PNP0C02")), 4328 &aml::Name::new("_UID".into(), &aml::ZERO), 4329 &aml::Name::new("_CRS".into(), &aml::ResourceTemplate::new(mbrd_memory_refs)), 4330 ], 4331 ) 4332 .append_aml_bytes(bytes); 4333 4334 // Serial device 4335 #[cfg(target_arch = "x86_64")] 4336 let serial_irq = 4; 4337 #[cfg(target_arch = "aarch64")] 4338 let serial_irq = 4339 if self.config.lock().unwrap().serial.clone().mode != ConsoleOutputMode::Off { 4340 self.get_device_info() 4341 .clone() 4342 .get(&(DeviceType::Serial, DeviceType::Serial.to_string())) 4343 .unwrap() 4344 .irq() 4345 } else { 4346 // If serial is turned off, add a fake device with invalid irq. 4347 31 4348 }; 4349 if self.config.lock().unwrap().serial.mode != ConsoleOutputMode::Off { 4350 aml::Device::new( 4351 "_SB_.COM1".into(), 4352 vec![ 4353 &aml::Name::new( 4354 "_HID".into(), 4355 #[cfg(target_arch = "x86_64")] 4356 &aml::EisaName::new("PNP0501"), 4357 #[cfg(target_arch = "aarch64")] 4358 &"ARMH0011", 4359 ), 4360 &aml::Name::new("_UID".into(), &aml::ZERO), 4361 &aml::Name::new("_DDN".into(), &"COM1"), 4362 &aml::Name::new( 4363 "_CRS".into(), 4364 &aml::ResourceTemplate::new(vec![ 4365 &aml::Interrupt::new(true, true, false, false, serial_irq), 4366 #[cfg(target_arch = "x86_64")] 4367 &aml::Io::new(0x3f8, 0x3f8, 0, 0x8), 4368 #[cfg(target_arch = "aarch64")] 4369 &aml::Memory32Fixed::new( 4370 true, 4371 arch::layout::LEGACY_SERIAL_MAPPED_IO_START.raw_value() as u32, 4372 MMIO_LEN as u32, 4373 ), 4374 ]), 4375 ), 4376 ], 4377 ) 4378 .append_aml_bytes(bytes); 4379 } 4380 4381 aml::Name::new("_S5_".into(), &aml::Package::new(vec![&5u8])).append_aml_bytes(bytes); 4382 4383 aml::Device::new( 4384 "_SB_.PWRB".into(), 4385 vec![ 4386 &aml::Name::new("_HID".into(), &aml::EisaName::new("PNP0C0C")), 4387 &aml::Name::new("_UID".into(), &aml::ZERO), 4388 ], 4389 ) 4390 .append_aml_bytes(bytes); 4391 4392 if self.config.lock().unwrap().tpm.is_some() { 4393 // Add tpm device 4394 let tpm_acpi = TpmDevice {}; 4395 let tpm_dsdt_data = tpm_acpi.to_aml_bytes(); 4396 bytes.extend_from_slice(tpm_dsdt_data.as_slice()); 4397 } 4398 4399 self.ged_notification_device 4400 .as_ref() 4401 .unwrap() 4402 .lock() 4403 .unwrap() 4404 .append_aml_bytes(bytes); 4405 } 4406 } 4407 4408 impl Pausable for DeviceManager { 4409 fn pause(&mut self) -> result::Result<(), MigratableError> { 4410 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4411 if let Some(migratable) = &device_node.migratable { 4412 migratable.lock().unwrap().pause()?; 4413 } 4414 } 4415 // On AArch64, the pause of device manager needs to trigger 4416 // a "pause" of GIC, which will flush the GIC pending tables 4417 // and ITS tables to guest RAM. 4418 #[cfg(target_arch = "aarch64")] 4419 { 4420 self.get_interrupt_controller() 4421 .unwrap() 4422 .lock() 4423 .unwrap() 4424 .pause()?; 4425 }; 4426 4427 Ok(()) 4428 } 4429 4430 fn resume(&mut self) -> result::Result<(), MigratableError> { 4431 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4432 if let Some(migratable) = &device_node.migratable { 4433 migratable.lock().unwrap().resume()?; 4434 } 4435 } 4436 4437 Ok(()) 4438 } 4439 } 4440 4441 impl Snapshottable for DeviceManager { 4442 fn id(&self) -> String { 4443 DEVICE_MANAGER_SNAPSHOT_ID.to_string() 4444 } 4445 4446 fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> { 4447 let mut snapshot = Snapshot::from_data(SnapshotData::new_from_state(&self.state())?); 4448 4449 // We aggregate all devices snapshots. 4450 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4451 if let Some(migratable) = &device_node.migratable { 4452 let mut migratable = migratable.lock().unwrap(); 4453 snapshot.add_snapshot(migratable.id(), migratable.snapshot()?); 4454 } 4455 } 4456 4457 Ok(snapshot) 4458 } 4459 } 4460 4461 impl Transportable for DeviceManager {} 4462 4463 impl Migratable for DeviceManager { 4464 fn start_dirty_log(&mut self) -> std::result::Result<(), MigratableError> { 4465 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4466 if let Some(migratable) = &device_node.migratable { 4467 migratable.lock().unwrap().start_dirty_log()?; 4468 } 4469 } 4470 Ok(()) 4471 } 4472 4473 fn stop_dirty_log(&mut self) -> std::result::Result<(), MigratableError> { 4474 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4475 if let Some(migratable) = &device_node.migratable { 4476 migratable.lock().unwrap().stop_dirty_log()?; 4477 } 4478 } 4479 Ok(()) 4480 } 4481 4482 fn dirty_log(&mut self) -> std::result::Result<MemoryRangeTable, MigratableError> { 4483 let mut tables = Vec::new(); 4484 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4485 if let Some(migratable) = &device_node.migratable { 4486 tables.push(migratable.lock().unwrap().dirty_log()?); 4487 } 4488 } 4489 Ok(MemoryRangeTable::new_from_tables(tables)) 4490 } 4491 4492 fn start_migration(&mut self) -> std::result::Result<(), MigratableError> { 4493 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4494 if let Some(migratable) = &device_node.migratable { 4495 migratable.lock().unwrap().start_migration()?; 4496 } 4497 } 4498 Ok(()) 4499 } 4500 4501 fn complete_migration(&mut self) -> std::result::Result<(), MigratableError> { 4502 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4503 if let Some(migratable) = &device_node.migratable { 4504 migratable.lock().unwrap().complete_migration()?; 4505 } 4506 } 4507 Ok(()) 4508 } 4509 } 4510 4511 const PCIU_FIELD_OFFSET: u64 = 0; 4512 const PCID_FIELD_OFFSET: u64 = 4; 4513 const B0EJ_FIELD_OFFSET: u64 = 8; 4514 const PSEG_FIELD_OFFSET: u64 = 12; 4515 const PCIU_FIELD_SIZE: usize = 4; 4516 const PCID_FIELD_SIZE: usize = 4; 4517 const B0EJ_FIELD_SIZE: usize = 4; 4518 const PSEG_FIELD_SIZE: usize = 4; 4519 4520 impl BusDevice for DeviceManager { 4521 fn read(&mut self, base: u64, offset: u64, data: &mut [u8]) { 4522 match offset { 4523 PCIU_FIELD_OFFSET => { 4524 assert!(data.len() == PCIU_FIELD_SIZE); 4525 data.copy_from_slice( 4526 &self.pci_segments[self.selected_segment] 4527 .pci_devices_up 4528 .to_le_bytes(), 4529 ); 4530 // Clear the PCIU bitmap 4531 self.pci_segments[self.selected_segment].pci_devices_up = 0; 4532 } 4533 PCID_FIELD_OFFSET => { 4534 assert!(data.len() == PCID_FIELD_SIZE); 4535 data.copy_from_slice( 4536 &self.pci_segments[self.selected_segment] 4537 .pci_devices_down 4538 .to_le_bytes(), 4539 ); 4540 // Clear the PCID bitmap 4541 self.pci_segments[self.selected_segment].pci_devices_down = 0; 4542 } 4543 B0EJ_FIELD_OFFSET => { 4544 assert!(data.len() == B0EJ_FIELD_SIZE); 4545 // Always return an empty bitmap since the eject is always 4546 // taken care of right away during a write access. 4547 data.fill(0); 4548 } 4549 PSEG_FIELD_OFFSET => { 4550 assert_eq!(data.len(), PSEG_FIELD_SIZE); 4551 data.copy_from_slice(&(self.selected_segment as u32).to_le_bytes()); 4552 } 4553 _ => error!( 4554 "Accessing unknown location at base 0x{:x}, offset 0x{:x}", 4555 base, offset 4556 ), 4557 } 4558 4559 debug!( 4560 "PCI_HP_REG_R: base 0x{:x}, offset 0x{:x}, data {:?}", 4561 base, offset, data 4562 ) 4563 } 4564 4565 fn write(&mut self, base: u64, offset: u64, data: &[u8]) -> Option<Arc<std::sync::Barrier>> { 4566 match offset { 4567 B0EJ_FIELD_OFFSET => { 4568 assert!(data.len() == B0EJ_FIELD_SIZE); 4569 let mut data_array: [u8; 4] = [0, 0, 0, 0]; 4570 data_array.copy_from_slice(data); 4571 let mut slot_bitmap = u32::from_le_bytes(data_array); 4572 4573 while slot_bitmap > 0 { 4574 let slot_id = slot_bitmap.trailing_zeros(); 4575 if let Err(e) = self.eject_device(self.selected_segment as u16, slot_id as u8) { 4576 error!("Failed ejecting device {}: {:?}", slot_id, e); 4577 } 4578 slot_bitmap &= !(1 << slot_id); 4579 } 4580 } 4581 PSEG_FIELD_OFFSET => { 4582 assert_eq!(data.len(), PSEG_FIELD_SIZE); 4583 let mut data_array: [u8; 4] = [0, 0, 0, 0]; 4584 data_array.copy_from_slice(data); 4585 let selected_segment = u32::from_le_bytes(data_array) as usize; 4586 if selected_segment >= self.pci_segments.len() { 4587 error!( 4588 "Segment selection out of range: {} >= {}", 4589 selected_segment, 4590 self.pci_segments.len() 4591 ); 4592 return None; 4593 } 4594 self.selected_segment = selected_segment; 4595 } 4596 _ => error!( 4597 "Accessing unknown location at base 0x{:x}, offset 0x{:x}", 4598 base, offset 4599 ), 4600 } 4601 4602 debug!( 4603 "PCI_HP_REG_W: base 0x{:x}, offset 0x{:x}, data {:?}", 4604 base, offset, data 4605 ); 4606 4607 None 4608 } 4609 } 4610 4611 impl Drop for DeviceManager { 4612 fn drop(&mut self) { 4613 for handle in self.virtio_devices.drain(..) { 4614 handle.virtio_device.lock().unwrap().shutdown(); 4615 } 4616 } 4617 } 4618