1 // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 // 3 // Portions Copyright 2017 The Chromium OS Authors. All rights reserved. 4 // Use of this source code is governed by a BSD-style license that can be 5 // found in the LICENSE-BSD-3-Clause file. 6 // 7 // Copyright © 2019 Intel Corporation 8 // 9 // SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause 10 // 11 12 use crate::config::{ 13 ConsoleOutputMode, DeviceConfig, DiskConfig, FsConfig, NetConfig, PmemConfig, UserDeviceConfig, 14 VdpaConfig, VhostMode, VmConfig, VsockConfig, 15 }; 16 use crate::device_tree::{DeviceNode, DeviceTree}; 17 use crate::interrupt::LegacyUserspaceInterruptManager; 18 use crate::interrupt::MsiInterruptManager; 19 use crate::memory_manager::{Error as MemoryManagerError, MemoryManager, MEMORY_MANAGER_ACPI_SIZE}; 20 use crate::pci_segment::PciSegment; 21 use crate::seccomp_filters::{get_seccomp_filter, Thread}; 22 use crate::serial_manager::{Error as SerialManagerError, SerialManager}; 23 use crate::sigwinch_listener::start_sigwinch_listener; 24 #[cfg(target_arch = "aarch64")] 25 use crate::GuestMemoryMmap; 26 use crate::GuestRegionMmap; 27 use crate::PciDeviceInfo; 28 use crate::{device_node, DEVICE_MANAGER_SNAPSHOT_ID}; 29 use acpi_tables::sdt::GenericAddress; 30 use acpi_tables::{aml, aml::Aml}; 31 use anyhow::anyhow; 32 use arch::layout; 33 #[cfg(target_arch = "x86_64")] 34 use arch::layout::{APIC_START, IOAPIC_SIZE, IOAPIC_START}; 35 use arch::NumaNodes; 36 #[cfg(target_arch = "aarch64")] 37 use arch::{DeviceType, MmioDeviceInfo}; 38 use block_util::{ 39 async_io::DiskFile, block_io_uring_is_supported, detect_image_type, 40 fixed_vhd_async::FixedVhdDiskAsync, fixed_vhd_sync::FixedVhdDiskSync, qcow_sync::QcowDiskSync, 41 raw_async::RawFileDisk, raw_sync::RawFileDiskSync, vhdx_sync::VhdxDiskSync, ImageType, 42 }; 43 #[cfg(target_arch = "aarch64")] 44 use devices::gic; 45 #[cfg(target_arch = "x86_64")] 46 use devices::ioapic; 47 #[cfg(target_arch = "aarch64")] 48 use devices::legacy::Pl011; 49 #[cfg(target_arch = "x86_64")] 50 use devices::legacy::Serial; 51 use devices::{ 52 interrupt_controller, interrupt_controller::InterruptController, AcpiNotificationFlags, 53 }; 54 use hypervisor::{HypervisorType, HypervisorVmError, IoEventAddress}; 55 use libc::{ 56 cfmakeraw, isatty, tcgetattr, tcsetattr, termios, MAP_NORESERVE, MAP_PRIVATE, MAP_SHARED, 57 O_TMPFILE, PROT_READ, PROT_WRITE, TCSANOW, 58 }; 59 #[cfg(target_arch = "x86_64")] 60 use pci::PciConfigIo; 61 use pci::{ 62 DeviceRelocation, PciBarRegionType, PciBdf, PciDevice, VfioPciDevice, VfioUserDmaMapping, 63 VfioUserPciDevice, VfioUserPciDeviceError, 64 }; 65 use seccompiler::SeccompAction; 66 use serde::{Deserialize, Serialize}; 67 use std::collections::{BTreeSet, HashMap}; 68 use std::convert::TryInto; 69 use std::fs::{read_link, File, OpenOptions}; 70 use std::io::{self, stdout, Seek, SeekFrom}; 71 use std::mem::zeroed; 72 use std::num::Wrapping; 73 use std::os::unix::fs::OpenOptionsExt; 74 use std::os::unix::io::{AsRawFd, FromRawFd, RawFd}; 75 use std::path::PathBuf; 76 use std::result; 77 use std::sync::{Arc, Mutex}; 78 use std::time::Instant; 79 use vfio_ioctls::{VfioContainer, VfioDevice, VfioDeviceFd}; 80 use virtio_devices::transport::VirtioTransport; 81 use virtio_devices::transport::{VirtioPciDevice, VirtioPciDeviceActivator}; 82 use virtio_devices::vhost_user::VhostUserConfig; 83 use virtio_devices::{ 84 AccessPlatformMapping, ActivateError, VdpaDmaMapping, VirtioMemMappingSource, 85 }; 86 use virtio_devices::{Endpoint, IommuMapping}; 87 use vm_allocator::{AddressAllocator, SystemAllocator}; 88 use vm_device::dma_mapping::vfio::VfioDmaMapping; 89 use vm_device::dma_mapping::ExternalDmaMapping; 90 use vm_device::interrupt::{ 91 InterruptIndex, InterruptManager, LegacyIrqGroupConfig, MsiIrqGroupConfig, 92 }; 93 use vm_device::{Bus, BusDevice, Resource}; 94 use vm_memory::guest_memory::FileOffset; 95 #[cfg(target_arch = "aarch64")] 96 use vm_memory::GuestMemoryAtomic; 97 use vm_memory::GuestMemoryRegion; 98 use vm_memory::{Address, GuestAddress, GuestUsize, MmapRegion}; 99 #[cfg(target_arch = "x86_64")] 100 use vm_memory::{GuestAddressSpace, GuestMemory}; 101 use vm_migration::{ 102 protocol::MemoryRangeTable, Migratable, MigratableError, Pausable, Snapshot, 103 SnapshotDataSection, Snapshottable, Transportable, 104 }; 105 use vm_virtio::AccessPlatform; 106 use vm_virtio::VirtioDeviceType; 107 use vmm_sys_util::eventfd::EventFd; 108 109 #[cfg(target_arch = "aarch64")] 110 const MMIO_LEN: u64 = 0x1000; 111 112 // Singleton devices / devices the user cannot name 113 #[cfg(target_arch = "x86_64")] 114 const IOAPIC_DEVICE_NAME: &str = "__ioapic"; 115 const SERIAL_DEVICE_NAME: &str = "__serial"; 116 #[cfg(target_arch = "aarch64")] 117 const GPIO_DEVICE_NAME: &str = "__gpio"; 118 const RNG_DEVICE_NAME: &str = "__rng"; 119 const IOMMU_DEVICE_NAME: &str = "__iommu"; 120 const BALLOON_DEVICE_NAME: &str = "__balloon"; 121 const CONSOLE_DEVICE_NAME: &str = "__console"; 122 123 // Devices that the user may name and for which we generate 124 // identifiers if the user doesn't give one 125 const DISK_DEVICE_NAME_PREFIX: &str = "_disk"; 126 const FS_DEVICE_NAME_PREFIX: &str = "_fs"; 127 const NET_DEVICE_NAME_PREFIX: &str = "_net"; 128 const PMEM_DEVICE_NAME_PREFIX: &str = "_pmem"; 129 const VDPA_DEVICE_NAME_PREFIX: &str = "_vdpa"; 130 const VSOCK_DEVICE_NAME_PREFIX: &str = "_vsock"; 131 const WATCHDOG_DEVICE_NAME: &str = "__watchdog"; 132 const VFIO_DEVICE_NAME_PREFIX: &str = "_vfio"; 133 const VFIO_USER_DEVICE_NAME_PREFIX: &str = "_vfio_user"; 134 const VIRTIO_PCI_DEVICE_NAME_PREFIX: &str = "_virtio-pci"; 135 136 /// Errors associated with device manager 137 #[derive(Debug)] 138 pub enum DeviceManagerError { 139 /// Cannot create EventFd. 140 EventFd(io::Error), 141 142 /// Cannot open disk path 143 Disk(io::Error), 144 145 /// Cannot create vhost-user-net device 146 CreateVhostUserNet(virtio_devices::vhost_user::Error), 147 148 /// Cannot create virtio-blk device 149 CreateVirtioBlock(io::Error), 150 151 /// Cannot create virtio-net device 152 CreateVirtioNet(virtio_devices::net::Error), 153 154 /// Cannot create virtio-console device 155 CreateVirtioConsole(io::Error), 156 157 /// Cannot create virtio-rng device 158 CreateVirtioRng(io::Error), 159 160 /// Cannot create virtio-fs device 161 CreateVirtioFs(virtio_devices::vhost_user::Error), 162 163 /// Virtio-fs device was created without a socket. 164 NoVirtioFsSock, 165 166 /// Cannot create vhost-user-blk device 167 CreateVhostUserBlk(virtio_devices::vhost_user::Error), 168 169 /// Cannot create virtio-pmem device 170 CreateVirtioPmem(io::Error), 171 172 /// Cannot create vDPA device 173 CreateVdpa(virtio_devices::vdpa::Error), 174 175 /// Cannot create virtio-vsock device 176 CreateVirtioVsock(io::Error), 177 178 /// Failed to convert Path to &str for the vDPA device. 179 CreateVdpaConvertPath, 180 181 /// Failed to convert Path to &str for the virtio-vsock device. 182 CreateVsockConvertPath, 183 184 /// Cannot create virtio-vsock backend 185 CreateVsockBackend(virtio_devices::vsock::VsockUnixError), 186 187 /// Cannot create virtio-iommu device 188 CreateVirtioIommu(io::Error), 189 190 /// Cannot create virtio-balloon device 191 CreateVirtioBalloon(io::Error), 192 193 /// Cannot create virtio-watchdog device 194 CreateVirtioWatchdog(io::Error), 195 196 /// Failed to parse disk image format 197 DetectImageType(io::Error), 198 199 /// Cannot open qcow disk path 200 QcowDeviceCreate(qcow::Error), 201 202 /// Cannot create serial manager 203 CreateSerialManager(SerialManagerError), 204 205 /// Cannot spawn the serial manager thread 206 SpawnSerialManager(SerialManagerError), 207 208 /// Cannot open tap interface 209 OpenTap(net_util::TapError), 210 211 /// Cannot allocate IRQ. 212 AllocateIrq, 213 214 /// Cannot configure the IRQ. 215 Irq(vmm_sys_util::errno::Error), 216 217 /// Cannot allocate PCI BARs 218 AllocateBars(pci::PciDeviceError), 219 220 /// Could not free the BARs associated with a PCI device. 221 FreePciBars(pci::PciDeviceError), 222 223 /// Cannot register ioevent. 224 RegisterIoevent(anyhow::Error), 225 226 /// Cannot unregister ioevent. 227 UnRegisterIoevent(anyhow::Error), 228 229 /// Cannot create virtio device 230 VirtioDevice(vmm_sys_util::errno::Error), 231 232 /// Cannot add PCI device 233 AddPciDevice(pci::PciRootError), 234 235 /// Cannot open persistent memory file 236 PmemFileOpen(io::Error), 237 238 /// Cannot set persistent memory file size 239 PmemFileSetLen(io::Error), 240 241 /// Cannot find a memory range for persistent memory 242 PmemRangeAllocation, 243 244 /// Cannot find a memory range for virtio-fs 245 FsRangeAllocation, 246 247 /// Error creating serial output file 248 SerialOutputFileOpen(io::Error), 249 250 /// Error creating console output file 251 ConsoleOutputFileOpen(io::Error), 252 253 /// Error creating serial pty 254 SerialPtyOpen(io::Error), 255 256 /// Error creating console pty 257 ConsolePtyOpen(io::Error), 258 259 /// Error setting pty raw mode 260 SetPtyRaw(vmm_sys_util::errno::Error), 261 262 /// Error getting pty peer 263 GetPtyPeer(vmm_sys_util::errno::Error), 264 265 /// Cannot create a VFIO device 266 VfioCreate(vfio_ioctls::VfioError), 267 268 /// Cannot create a VFIO PCI device 269 VfioPciCreate(pci::VfioPciError), 270 271 /// Failed to map VFIO MMIO region. 272 VfioMapRegion(pci::VfioPciError), 273 274 /// Failed to DMA map VFIO device. 275 VfioDmaMap(vfio_ioctls::VfioError), 276 277 /// Failed to DMA unmap VFIO device. 278 VfioDmaUnmap(pci::VfioPciError), 279 280 /// Failed to create the passthrough device. 281 CreatePassthroughDevice(anyhow::Error), 282 283 /// Failed to memory map. 284 Mmap(io::Error), 285 286 /// Cannot add legacy device to Bus. 287 BusError(vm_device::BusError), 288 289 /// Failed to allocate IO port 290 AllocateIoPort, 291 292 /// Failed to allocate MMIO address 293 AllocateMmioAddress, 294 295 /// Failed to make hotplug notification 296 HotPlugNotification(io::Error), 297 298 /// Error from a memory manager operation 299 MemoryManager(MemoryManagerError), 300 301 /// Failed to create new interrupt source group. 302 CreateInterruptGroup(io::Error), 303 304 /// Failed to update interrupt source group. 305 UpdateInterruptGroup(io::Error), 306 307 /// Failed to create interrupt controller. 308 CreateInterruptController(interrupt_controller::Error), 309 310 /// Failed to create a new MmapRegion instance. 311 NewMmapRegion(vm_memory::mmap::MmapRegionError), 312 313 /// Failed to clone a File. 314 CloneFile(io::Error), 315 316 /// Failed to create socket file 317 CreateSocketFile(io::Error), 318 319 /// Failed to spawn the network backend 320 SpawnNetBackend(io::Error), 321 322 /// Failed to spawn the block backend 323 SpawnBlockBackend(io::Error), 324 325 /// Missing PCI bus. 326 NoPciBus, 327 328 /// Could not find an available device name. 329 NoAvailableDeviceName, 330 331 /// Missing PCI device. 332 MissingPciDevice, 333 334 /// Failed to remove a PCI device from the PCI bus. 335 RemoveDeviceFromPciBus(pci::PciRootError), 336 337 /// Failed to remove a bus device from the IO bus. 338 RemoveDeviceFromIoBus(vm_device::BusError), 339 340 /// Failed to remove a bus device from the MMIO bus. 341 RemoveDeviceFromMmioBus(vm_device::BusError), 342 343 /// Failed to find the device corresponding to a specific PCI b/d/f. 344 UnknownPciBdf(u32), 345 346 /// Not allowed to remove this type of device from the VM. 347 RemovalNotAllowed(vm_virtio::VirtioDeviceType), 348 349 /// Failed to find device corresponding to the given identifier. 350 UnknownDeviceId(String), 351 352 /// Failed to find an available PCI device ID. 353 NextPciDeviceId(pci::PciRootError), 354 355 /// Could not reserve the PCI device ID. 356 GetPciDeviceId(pci::PciRootError), 357 358 /// Could not give the PCI device ID back. 359 PutPciDeviceId(pci::PciRootError), 360 361 /// No disk path was specified when one was expected 362 NoDiskPath, 363 364 /// Failed to update guest memory for virtio device. 365 UpdateMemoryForVirtioDevice(virtio_devices::Error), 366 367 /// Cannot create virtio-mem device 368 CreateVirtioMem(io::Error), 369 370 /// Cannot generate a ResizeSender from the Resize object. 371 CreateResizeSender(virtio_devices::mem::Error), 372 373 /// Cannot find a memory range for virtio-mem memory 374 VirtioMemRangeAllocation, 375 376 /// Failed to update guest memory for VFIO PCI device. 377 UpdateMemoryForVfioPciDevice(vfio_ioctls::VfioError), 378 379 /// Trying to use a directory for pmem but no size specified 380 PmemWithDirectorySizeMissing, 381 382 /// Trying to use a size that is not multiple of 2MiB 383 PmemSizeNotAligned, 384 385 /// Could not find the node in the device tree. 386 MissingNode, 387 388 /// Resource was already found. 389 ResourceAlreadyExists, 390 391 /// Expected resources for virtio-pmem could not be found. 392 MissingVirtioPmemResources, 393 394 /// Missing PCI b/d/f from the DeviceNode. 395 MissingDeviceNodePciBdf, 396 397 /// No support for device passthrough 398 NoDevicePassthroughSupport, 399 400 /// Failed to resize virtio-balloon 401 VirtioBalloonResize(virtio_devices::balloon::Error), 402 403 /// Missing virtio-balloon, can't proceed as expected. 404 MissingVirtioBalloon, 405 406 /// Missing virtual IOMMU device 407 MissingVirtualIommu, 408 409 /// Failed to do power button notification 410 PowerButtonNotification(io::Error), 411 412 /// Failed to do AArch64 GPIO power button notification 413 #[cfg(target_arch = "aarch64")] 414 AArch64PowerButtonNotification(devices::legacy::GpioDeviceError), 415 416 /// Failed to set O_DIRECT flag to file descriptor 417 SetDirectIo, 418 419 /// Failed to create FixedVhdDiskAsync 420 CreateFixedVhdDiskAsync(io::Error), 421 422 /// Failed to create FixedVhdDiskSync 423 CreateFixedVhdDiskSync(io::Error), 424 425 /// Failed to create QcowDiskSync 426 CreateQcowDiskSync(qcow::Error), 427 428 /// Failed to create FixedVhdxDiskSync 429 CreateFixedVhdxDiskSync(vhdx::vhdx::VhdxError), 430 431 /// Failed to add DMA mapping handler to virtio-mem device. 432 AddDmaMappingHandlerVirtioMem(virtio_devices::mem::Error), 433 434 /// Failed to remove DMA mapping handler from virtio-mem device. 435 RemoveDmaMappingHandlerVirtioMem(virtio_devices::mem::Error), 436 437 /// Failed to create vfio-user client 438 VfioUserCreateClient(vfio_user::Error), 439 440 /// Failed to create VFIO user device 441 VfioUserCreate(VfioUserPciDeviceError), 442 443 /// Failed to map region from VFIO user device into guest 444 VfioUserMapRegion(VfioUserPciDeviceError), 445 446 /// Failed to DMA map VFIO user device. 447 VfioUserDmaMap(VfioUserPciDeviceError), 448 449 /// Failed to DMA unmap VFIO user device. 450 VfioUserDmaUnmap(VfioUserPciDeviceError), 451 452 /// Failed to update memory mappings for VFIO user device 453 UpdateMemoryForVfioUserPciDevice(VfioUserPciDeviceError), 454 455 /// Cannot duplicate file descriptor 456 DupFd(vmm_sys_util::errno::Error), 457 458 /// Failed to DMA map virtio device. 459 VirtioDmaMap(std::io::Error), 460 461 /// Failed to DMA unmap virtio device. 462 VirtioDmaUnmap(std::io::Error), 463 464 /// Cannot hotplug device behind vIOMMU 465 InvalidIommuHotplug, 466 467 /// Failed to create UEFI flash 468 CreateUefiFlash(HypervisorVmError), 469 470 /// Invalid identifier as it is not unique. 471 IdentifierNotUnique(String), 472 473 /// Invalid identifier 474 InvalidIdentifier(String), 475 476 /// Error activating virtio device 477 VirtioActivate(ActivateError), 478 } 479 pub type DeviceManagerResult<T> = result::Result<T, DeviceManagerError>; 480 481 const DEVICE_MANAGER_ACPI_SIZE: usize = 0x10; 482 483 const TIOCSPTLCK: libc::c_int = 0x4004_5431; 484 const TIOCGTPEER: libc::c_int = 0x5441; 485 486 pub fn create_pty(non_blocking: bool) -> io::Result<(File, File, PathBuf)> { 487 // Try to use /dev/pts/ptmx first then fall back to /dev/ptmx 488 // This is done to try and use the devpts filesystem that 489 // could be available for use in the process's namespace first. 490 // Ideally these are all the same file though but different 491 // kernels could have things setup differently. 492 // See https://www.kernel.org/doc/Documentation/filesystems/devpts.txt 493 // for further details. 494 495 let custom_flags = libc::O_NOCTTY | if non_blocking { libc::O_NONBLOCK } else { 0 }; 496 let main = match OpenOptions::new() 497 .read(true) 498 .write(true) 499 .custom_flags(custom_flags) 500 .open("/dev/pts/ptmx") 501 { 502 Ok(f) => f, 503 _ => OpenOptions::new() 504 .read(true) 505 .write(true) 506 .custom_flags(custom_flags) 507 .open("/dev/ptmx")?, 508 }; 509 let mut unlock: libc::c_ulong = 0; 510 // SAFETY: FFI call into libc, trivially safe 511 unsafe { 512 libc::ioctl( 513 main.as_raw_fd(), 514 TIOCSPTLCK.try_into().unwrap(), 515 &mut unlock, 516 ) 517 }; 518 519 // SAFETY: FFI call into libc, trivally safe 520 let sub_fd = unsafe { 521 libc::ioctl( 522 main.as_raw_fd(), 523 TIOCGTPEER.try_into().unwrap(), 524 libc::O_NOCTTY | libc::O_RDWR, 525 ) 526 }; 527 if sub_fd == -1 { 528 return vmm_sys_util::errno::errno_result().map_err(|e| e.into()); 529 } 530 531 let proc_path = PathBuf::from(format!("/proc/self/fd/{}", sub_fd)); 532 let path = read_link(proc_path)?; 533 534 // SAFETY: sub_fd is checked to be valid before being wrapped in File 535 Ok((main, unsafe { File::from_raw_fd(sub_fd) }, path)) 536 } 537 538 #[derive(Default)] 539 pub struct Console { 540 console_resizer: Option<Arc<virtio_devices::ConsoleResizer>>, 541 } 542 543 impl Console { 544 pub fn update_console_size(&self) { 545 if let Some(resizer) = self.console_resizer.as_ref() { 546 resizer.update_console_size() 547 } 548 } 549 } 550 551 pub(crate) struct AddressManager { 552 pub(crate) allocator: Arc<Mutex<SystemAllocator>>, 553 #[cfg(target_arch = "x86_64")] 554 pub(crate) io_bus: Arc<Bus>, 555 pub(crate) mmio_bus: Arc<Bus>, 556 vm: Arc<dyn hypervisor::Vm>, 557 device_tree: Arc<Mutex<DeviceTree>>, 558 pci_mmio_allocators: Vec<Arc<Mutex<AddressAllocator>>>, 559 } 560 561 impl DeviceRelocation for AddressManager { 562 fn move_bar( 563 &self, 564 old_base: u64, 565 new_base: u64, 566 len: u64, 567 pci_dev: &mut dyn PciDevice, 568 region_type: PciBarRegionType, 569 ) -> std::result::Result<(), std::io::Error> { 570 match region_type { 571 PciBarRegionType::IoRegion => { 572 #[cfg(target_arch = "x86_64")] 573 { 574 // Update system allocator 575 self.allocator 576 .lock() 577 .unwrap() 578 .free_io_addresses(GuestAddress(old_base), len as GuestUsize); 579 580 self.allocator 581 .lock() 582 .unwrap() 583 .allocate_io_addresses( 584 Some(GuestAddress(new_base)), 585 len as GuestUsize, 586 None, 587 ) 588 .ok_or_else(|| { 589 io::Error::new(io::ErrorKind::Other, "failed allocating new IO range") 590 })?; 591 592 // Update PIO bus 593 self.io_bus 594 .update_range(old_base, len, new_base, len) 595 .map_err(|e| io::Error::new(io::ErrorKind::Other, e))?; 596 } 597 #[cfg(target_arch = "aarch64")] 598 error!("I/O region is not supported"); 599 } 600 PciBarRegionType::Memory32BitRegion | PciBarRegionType::Memory64BitRegion => { 601 // Update system allocator 602 if region_type == PciBarRegionType::Memory32BitRegion { 603 self.allocator 604 .lock() 605 .unwrap() 606 .free_mmio_hole_addresses(GuestAddress(old_base), len as GuestUsize); 607 608 self.allocator 609 .lock() 610 .unwrap() 611 .allocate_mmio_hole_addresses( 612 Some(GuestAddress(new_base)), 613 len as GuestUsize, 614 Some(len), 615 ) 616 .ok_or_else(|| { 617 io::Error::new( 618 io::ErrorKind::Other, 619 "failed allocating new 32 bits MMIO range", 620 ) 621 })?; 622 } else { 623 // Find the specific allocator that this BAR was allocated from and use it for new one 624 for allocator in &self.pci_mmio_allocators { 625 let allocator_base = allocator.lock().unwrap().base(); 626 let allocator_end = allocator.lock().unwrap().end(); 627 628 if old_base >= allocator_base.0 && old_base <= allocator_end.0 { 629 allocator 630 .lock() 631 .unwrap() 632 .free(GuestAddress(old_base), len as GuestUsize); 633 634 allocator 635 .lock() 636 .unwrap() 637 .allocate( 638 Some(GuestAddress(new_base)), 639 len as GuestUsize, 640 Some(len), 641 ) 642 .ok_or_else(|| { 643 io::Error::new( 644 io::ErrorKind::Other, 645 "failed allocating new 64 bits MMIO range", 646 ) 647 })?; 648 649 break; 650 } 651 } 652 } 653 654 // Update MMIO bus 655 self.mmio_bus 656 .update_range(old_base, len, new_base, len) 657 .map_err(|e| io::Error::new(io::ErrorKind::Other, e))?; 658 } 659 } 660 661 // Update the device_tree resources associated with the device 662 if let Some(id) = pci_dev.id() { 663 if let Some(node) = self.device_tree.lock().unwrap().get_mut(&id) { 664 let mut resource_updated = false; 665 for resource in node.resources.iter_mut() { 666 if let Resource::PciBar { base, type_, .. } = resource { 667 if PciBarRegionType::from(*type_) == region_type && *base == old_base { 668 *base = new_base; 669 resource_updated = true; 670 break; 671 } 672 } 673 } 674 675 if !resource_updated { 676 return Err(io::Error::new( 677 io::ErrorKind::Other, 678 format!( 679 "Couldn't find a resource with base 0x{:x} for device {}", 680 old_base, id 681 ), 682 )); 683 } 684 } else { 685 return Err(io::Error::new( 686 io::ErrorKind::Other, 687 format!("Couldn't find device {} from device tree", id), 688 )); 689 } 690 } 691 692 let any_dev = pci_dev.as_any(); 693 if let Some(virtio_pci_dev) = any_dev.downcast_ref::<VirtioPciDevice>() { 694 let bar_addr = virtio_pci_dev.config_bar_addr(); 695 if bar_addr == new_base { 696 for (event, addr) in virtio_pci_dev.ioeventfds(old_base) { 697 let io_addr = IoEventAddress::Mmio(addr); 698 self.vm.unregister_ioevent(event, &io_addr).map_err(|e| { 699 io::Error::new( 700 io::ErrorKind::Other, 701 format!("failed to unregister ioevent: {:?}", e), 702 ) 703 })?; 704 } 705 for (event, addr) in virtio_pci_dev.ioeventfds(new_base) { 706 let io_addr = IoEventAddress::Mmio(addr); 707 self.vm 708 .register_ioevent(event, &io_addr, None) 709 .map_err(|e| { 710 io::Error::new( 711 io::ErrorKind::Other, 712 format!("failed to register ioevent: {:?}", e), 713 ) 714 })?; 715 } 716 } else { 717 let virtio_dev = virtio_pci_dev.virtio_device(); 718 let mut virtio_dev = virtio_dev.lock().unwrap(); 719 if let Some(mut shm_regions) = virtio_dev.get_shm_regions() { 720 if shm_regions.addr.raw_value() == old_base { 721 let mem_region = self.vm.make_user_memory_region( 722 shm_regions.mem_slot, 723 old_base, 724 shm_regions.len, 725 shm_regions.host_addr, 726 false, 727 false, 728 ); 729 730 self.vm.remove_user_memory_region(mem_region).map_err(|e| { 731 io::Error::new( 732 io::ErrorKind::Other, 733 format!("failed to remove user memory region: {:?}", e), 734 ) 735 })?; 736 737 // Create new mapping by inserting new region to KVM. 738 let mem_region = self.vm.make_user_memory_region( 739 shm_regions.mem_slot, 740 new_base, 741 shm_regions.len, 742 shm_regions.host_addr, 743 false, 744 false, 745 ); 746 747 self.vm.create_user_memory_region(mem_region).map_err(|e| { 748 io::Error::new( 749 io::ErrorKind::Other, 750 format!("failed to create user memory regions: {:?}", e), 751 ) 752 })?; 753 754 // Update shared memory regions to reflect the new mapping. 755 shm_regions.addr = GuestAddress(new_base); 756 virtio_dev.set_shm_regions(shm_regions).map_err(|e| { 757 io::Error::new( 758 io::ErrorKind::Other, 759 format!("failed to update shared memory regions: {:?}", e), 760 ) 761 })?; 762 } 763 } 764 } 765 } 766 767 pci_dev.move_bar(old_base, new_base) 768 } 769 } 770 771 #[derive(Serialize, Deserialize)] 772 struct DeviceManagerState { 773 device_tree: DeviceTree, 774 device_id_cnt: Wrapping<usize>, 775 } 776 777 #[derive(Debug)] 778 pub struct PtyPair { 779 pub main: File, 780 pub sub: File, 781 pub path: PathBuf, 782 } 783 784 impl Clone for PtyPair { 785 fn clone(&self) -> Self { 786 PtyPair { 787 main: self.main.try_clone().unwrap(), 788 sub: self.sub.try_clone().unwrap(), 789 path: self.path.clone(), 790 } 791 } 792 } 793 794 #[derive(Clone)] 795 pub enum PciDeviceHandle { 796 Vfio(Arc<Mutex<VfioPciDevice>>), 797 Virtio(Arc<Mutex<VirtioPciDevice>>), 798 VfioUser(Arc<Mutex<VfioUserPciDevice>>), 799 } 800 801 #[derive(Clone)] 802 struct MetaVirtioDevice { 803 virtio_device: Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 804 iommu: bool, 805 id: String, 806 pci_segment: u16, 807 dma_handler: Option<Arc<dyn ExternalDmaMapping>>, 808 } 809 810 #[derive(Default)] 811 pub struct AcpiPlatformAddresses { 812 pub pm_timer_address: Option<GenericAddress>, 813 pub reset_reg_address: Option<GenericAddress>, 814 pub sleep_control_reg_address: Option<GenericAddress>, 815 pub sleep_status_reg_address: Option<GenericAddress>, 816 } 817 818 pub struct DeviceManager { 819 // The underlying hypervisor 820 hypervisor_type: HypervisorType, 821 822 // Manage address space related to devices 823 address_manager: Arc<AddressManager>, 824 825 // Console abstraction 826 console: Arc<Console>, 827 828 // console PTY 829 console_pty: Option<Arc<Mutex<PtyPair>>>, 830 831 // serial PTY 832 serial_pty: Option<Arc<Mutex<PtyPair>>>, 833 834 // Serial Manager 835 serial_manager: Option<Arc<SerialManager>>, 836 837 // pty foreground status, 838 console_resize_pipe: Option<Arc<File>>, 839 840 // Interrupt controller 841 #[cfg(target_arch = "x86_64")] 842 interrupt_controller: Option<Arc<Mutex<ioapic::Ioapic>>>, 843 #[cfg(target_arch = "aarch64")] 844 interrupt_controller: Option<Arc<Mutex<gic::Gic>>>, 845 846 // Things to be added to the commandline (e.g. aarch64 early console) 847 #[cfg(target_arch = "aarch64")] 848 cmdline_additions: Vec<String>, 849 850 // ACPI GED notification device 851 ged_notification_device: Option<Arc<Mutex<devices::AcpiGedDevice>>>, 852 853 // VM configuration 854 config: Arc<Mutex<VmConfig>>, 855 856 // Memory Manager 857 memory_manager: Arc<Mutex<MemoryManager>>, 858 859 // The virtio devices on the system 860 virtio_devices: Vec<MetaVirtioDevice>, 861 862 // List of bus devices 863 // Let the DeviceManager keep strong references to the BusDevice devices. 864 // This allows the IO and MMIO buses to be provided with Weak references, 865 // which prevents cyclic dependencies. 866 bus_devices: Vec<Arc<Mutex<dyn BusDevice>>>, 867 868 // Counter to keep track of the consumed device IDs. 869 device_id_cnt: Wrapping<usize>, 870 871 pci_segments: Vec<PciSegment>, 872 873 #[cfg_attr(target_arch = "aarch64", allow(dead_code))] 874 // MSI Interrupt Manager 875 msi_interrupt_manager: Arc<dyn InterruptManager<GroupConfig = MsiIrqGroupConfig>>, 876 877 #[cfg_attr(feature = "mshv", allow(dead_code))] 878 // Legacy Interrupt Manager 879 legacy_interrupt_manager: Option<Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>>, 880 881 // Passthrough device handle 882 passthrough_device: Option<VfioDeviceFd>, 883 884 // VFIO container 885 // Only one container can be created, therefore it is stored as part of the 886 // DeviceManager to be reused. 887 vfio_container: Option<Arc<VfioContainer>>, 888 889 // Paravirtualized IOMMU 890 iommu_device: Option<Arc<Mutex<virtio_devices::Iommu>>>, 891 iommu_mapping: Option<Arc<IommuMapping>>, 892 893 // PCI information about devices attached to the paravirtualized IOMMU 894 // It contains the virtual IOMMU PCI BDF along with the list of PCI BDF 895 // representing the devices attached to the virtual IOMMU. This is useful 896 // information for filling the ACPI VIOT table. 897 iommu_attached_devices: Option<(PciBdf, Vec<PciBdf>)>, 898 899 // Tree of devices, representing the dependencies between devices. 900 // Useful for introspection, snapshot and restore. 901 device_tree: Arc<Mutex<DeviceTree>>, 902 903 // Exit event 904 exit_evt: EventFd, 905 reset_evt: EventFd, 906 907 #[cfg(target_arch = "aarch64")] 908 id_to_dev_info: HashMap<(DeviceType, String), MmioDeviceInfo>, 909 910 // seccomp action 911 seccomp_action: SeccompAction, 912 913 // List of guest NUMA nodes. 914 numa_nodes: NumaNodes, 915 916 // Possible handle to the virtio-balloon device 917 balloon: Option<Arc<Mutex<virtio_devices::Balloon>>>, 918 919 // Virtio Device activation EventFd to allow the VMM thread to trigger device 920 // activation and thus start the threads from the VMM thread 921 activate_evt: EventFd, 922 923 acpi_address: GuestAddress, 924 925 selected_segment: usize, 926 927 // Possible handle to the virtio-mem device 928 virtio_mem_devices: Vec<Arc<Mutex<virtio_devices::Mem>>>, 929 930 #[cfg(target_arch = "aarch64")] 931 // GPIO device for AArch64 932 gpio_device: Option<Arc<Mutex<devices::legacy::Gpio>>>, 933 934 #[cfg(target_arch = "aarch64")] 935 // Flash device for UEFI on AArch64 936 uefi_flash: Option<GuestMemoryAtomic<GuestMemoryMmap>>, 937 938 // Flag to force setting the iommu on virtio devices 939 force_iommu: bool, 940 941 // Helps identify if the VM is currently being restored 942 restoring: bool, 943 944 // io_uring availability if detected 945 io_uring_supported: Option<bool>, 946 947 // List of unique identifiers provided at boot through the configuration. 948 boot_id_list: BTreeSet<String>, 949 950 // Start time of the VM 951 timestamp: Instant, 952 953 // Pending activations 954 pending_activations: Arc<Mutex<Vec<VirtioPciDeviceActivator>>>, 955 956 // Addresses for ACPI platform devices e.g. ACPI PM timer, sleep/reset registers 957 acpi_platform_addresses: AcpiPlatformAddresses, 958 } 959 960 impl DeviceManager { 961 #[allow(clippy::too_many_arguments)] 962 pub fn new( 963 hypervisor_type: HypervisorType, 964 vm: Arc<dyn hypervisor::Vm>, 965 config: Arc<Mutex<VmConfig>>, 966 memory_manager: Arc<Mutex<MemoryManager>>, 967 exit_evt: &EventFd, 968 reset_evt: &EventFd, 969 seccomp_action: SeccompAction, 970 numa_nodes: NumaNodes, 971 activate_evt: &EventFd, 972 force_iommu: bool, 973 restoring: bool, 974 boot_id_list: BTreeSet<String>, 975 timestamp: Instant, 976 ) -> DeviceManagerResult<Arc<Mutex<Self>>> { 977 let device_tree = Arc::new(Mutex::new(DeviceTree::new())); 978 979 let num_pci_segments = 980 if let Some(platform_config) = config.lock().unwrap().platform.as_ref() { 981 platform_config.num_pci_segments 982 } else { 983 1 984 }; 985 986 let start_of_device_area = memory_manager.lock().unwrap().start_of_device_area().0; 987 let end_of_device_area = memory_manager.lock().unwrap().end_of_device_area().0; 988 989 // Start each PCI segment range on a 4GiB boundary 990 let pci_segment_size = (end_of_device_area - start_of_device_area + 1) 991 / ((4 << 30) * num_pci_segments as u64) 992 * (4 << 30); 993 994 let mut pci_mmio_allocators = vec![]; 995 for i in 0..num_pci_segments as u64 { 996 let mmio_start = start_of_device_area + i * pci_segment_size; 997 let allocator = Arc::new(Mutex::new( 998 AddressAllocator::new(GuestAddress(mmio_start), pci_segment_size).unwrap(), 999 )); 1000 pci_mmio_allocators.push(allocator) 1001 } 1002 1003 let address_manager = Arc::new(AddressManager { 1004 allocator: memory_manager.lock().unwrap().allocator(), 1005 #[cfg(target_arch = "x86_64")] 1006 io_bus: Arc::new(Bus::new()), 1007 mmio_bus: Arc::new(Bus::new()), 1008 vm: vm.clone(), 1009 device_tree: Arc::clone(&device_tree), 1010 pci_mmio_allocators, 1011 }); 1012 1013 // First we create the MSI interrupt manager, the legacy one is created 1014 // later, after the IOAPIC device creation. 1015 // The reason we create the MSI one first is because the IOAPIC needs it, 1016 // and then the legacy interrupt manager needs an IOAPIC. So we're 1017 // handling a linear dependency chain: 1018 // msi_interrupt_manager <- IOAPIC <- legacy_interrupt_manager. 1019 let msi_interrupt_manager: Arc<dyn InterruptManager<GroupConfig = MsiIrqGroupConfig>> = 1020 Arc::new(MsiInterruptManager::new( 1021 Arc::clone(&address_manager.allocator), 1022 vm, 1023 )); 1024 1025 let acpi_address = address_manager 1026 .allocator 1027 .lock() 1028 .unwrap() 1029 .allocate_platform_mmio_addresses(None, DEVICE_MANAGER_ACPI_SIZE as u64, None) 1030 .ok_or(DeviceManagerError::AllocateIoPort)?; 1031 1032 let mut pci_irq_slots = [0; 32]; 1033 PciSegment::reserve_legacy_interrupts_for_pci_devices( 1034 &address_manager, 1035 &mut pci_irq_slots, 1036 )?; 1037 1038 let mut pci_segments = vec![PciSegment::new_default_segment( 1039 &address_manager, 1040 Arc::clone(&address_manager.pci_mmio_allocators[0]), 1041 &pci_irq_slots, 1042 )?]; 1043 1044 for i in 1..num_pci_segments as usize { 1045 pci_segments.push(PciSegment::new( 1046 i as u16, 1047 &address_manager, 1048 Arc::clone(&address_manager.pci_mmio_allocators[i]), 1049 &pci_irq_slots, 1050 )?); 1051 } 1052 1053 let device_manager = DeviceManager { 1054 hypervisor_type, 1055 address_manager: Arc::clone(&address_manager), 1056 console: Arc::new(Console::default()), 1057 interrupt_controller: None, 1058 #[cfg(target_arch = "aarch64")] 1059 cmdline_additions: Vec::new(), 1060 ged_notification_device: None, 1061 config, 1062 memory_manager, 1063 virtio_devices: Vec::new(), 1064 bus_devices: Vec::new(), 1065 device_id_cnt: Wrapping(0), 1066 msi_interrupt_manager, 1067 legacy_interrupt_manager: None, 1068 passthrough_device: None, 1069 vfio_container: None, 1070 iommu_device: None, 1071 iommu_mapping: None, 1072 iommu_attached_devices: None, 1073 pci_segments, 1074 device_tree, 1075 exit_evt: exit_evt.try_clone().map_err(DeviceManagerError::EventFd)?, 1076 reset_evt: reset_evt.try_clone().map_err(DeviceManagerError::EventFd)?, 1077 #[cfg(target_arch = "aarch64")] 1078 id_to_dev_info: HashMap::new(), 1079 seccomp_action, 1080 numa_nodes, 1081 balloon: None, 1082 activate_evt: activate_evt 1083 .try_clone() 1084 .map_err(DeviceManagerError::EventFd)?, 1085 acpi_address, 1086 selected_segment: 0, 1087 serial_pty: None, 1088 serial_manager: None, 1089 console_pty: None, 1090 console_resize_pipe: None, 1091 virtio_mem_devices: Vec::new(), 1092 #[cfg(target_arch = "aarch64")] 1093 gpio_device: None, 1094 #[cfg(target_arch = "aarch64")] 1095 uefi_flash: None, 1096 force_iommu, 1097 restoring, 1098 io_uring_supported: None, 1099 boot_id_list, 1100 timestamp, 1101 pending_activations: Arc::new(Mutex::new(Vec::default())), 1102 acpi_platform_addresses: AcpiPlatformAddresses::default(), 1103 }; 1104 1105 let device_manager = Arc::new(Mutex::new(device_manager)); 1106 1107 address_manager 1108 .mmio_bus 1109 .insert( 1110 Arc::clone(&device_manager) as Arc<Mutex<dyn BusDevice>>, 1111 acpi_address.0, 1112 DEVICE_MANAGER_ACPI_SIZE as u64, 1113 ) 1114 .map_err(DeviceManagerError::BusError)?; 1115 1116 Ok(device_manager) 1117 } 1118 1119 pub fn serial_pty(&self) -> Option<PtyPair> { 1120 self.serial_pty 1121 .as_ref() 1122 .map(|pty| pty.lock().unwrap().clone()) 1123 } 1124 1125 pub fn console_pty(&self) -> Option<PtyPair> { 1126 self.console_pty 1127 .as_ref() 1128 .map(|pty| pty.lock().unwrap().clone()) 1129 } 1130 1131 pub fn console_resize_pipe(&self) -> Option<Arc<File>> { 1132 self.console_resize_pipe.as_ref().map(Arc::clone) 1133 } 1134 1135 pub fn create_devices( 1136 &mut self, 1137 serial_pty: Option<PtyPair>, 1138 console_pty: Option<PtyPair>, 1139 console_resize_pipe: Option<File>, 1140 ) -> DeviceManagerResult<()> { 1141 let mut virtio_devices: Vec<MetaVirtioDevice> = Vec::new(); 1142 1143 let interrupt_controller = self.add_interrupt_controller()?; 1144 1145 // Now we can create the legacy interrupt manager, which needs the freshly 1146 // formed IOAPIC device. 1147 let legacy_interrupt_manager: Arc< 1148 dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>, 1149 > = Arc::new(LegacyUserspaceInterruptManager::new(Arc::clone( 1150 &interrupt_controller, 1151 ))); 1152 1153 { 1154 if let Some(acpi_address) = self.memory_manager.lock().unwrap().acpi_address() { 1155 self.address_manager 1156 .mmio_bus 1157 .insert( 1158 Arc::clone(&self.memory_manager) as Arc<Mutex<dyn BusDevice>>, 1159 acpi_address.0, 1160 MEMORY_MANAGER_ACPI_SIZE as u64, 1161 ) 1162 .map_err(DeviceManagerError::BusError)?; 1163 } 1164 } 1165 1166 #[cfg(target_arch = "x86_64")] 1167 self.add_legacy_devices( 1168 self.reset_evt 1169 .try_clone() 1170 .map_err(DeviceManagerError::EventFd)?, 1171 )?; 1172 1173 #[cfg(target_arch = "aarch64")] 1174 self.add_legacy_devices(&legacy_interrupt_manager)?; 1175 1176 { 1177 self.ged_notification_device = self.add_acpi_devices( 1178 &legacy_interrupt_manager, 1179 self.reset_evt 1180 .try_clone() 1181 .map_err(DeviceManagerError::EventFd)?, 1182 self.exit_evt 1183 .try_clone() 1184 .map_err(DeviceManagerError::EventFd)?, 1185 )?; 1186 } 1187 1188 self.console = self.add_console_device( 1189 &legacy_interrupt_manager, 1190 &mut virtio_devices, 1191 serial_pty, 1192 console_pty, 1193 console_resize_pipe, 1194 )?; 1195 1196 self.legacy_interrupt_manager = Some(legacy_interrupt_manager); 1197 1198 virtio_devices.append(&mut self.make_virtio_devices()?); 1199 1200 self.add_pci_devices(virtio_devices.clone())?; 1201 1202 self.virtio_devices = virtio_devices; 1203 1204 Ok(()) 1205 } 1206 1207 fn state(&self) -> DeviceManagerState { 1208 DeviceManagerState { 1209 device_tree: self.device_tree.lock().unwrap().clone(), 1210 device_id_cnt: self.device_id_cnt, 1211 } 1212 } 1213 1214 fn set_state(&mut self, state: &DeviceManagerState) { 1215 *self.device_tree.lock().unwrap() = state.device_tree.clone(); 1216 self.device_id_cnt = state.device_id_cnt; 1217 } 1218 1219 fn get_msi_iova_space(&mut self) -> (u64, u64) { 1220 #[cfg(target_arch = "aarch64")] 1221 { 1222 let vcpus = self.config.lock().unwrap().cpus.boot_vcpus; 1223 let msi_start = arch::layout::GIC_V3_DIST_START.raw_value() 1224 - arch::layout::GIC_V3_REDIST_SIZE * (vcpus as u64) 1225 - arch::layout::GIC_V3_ITS_SIZE; 1226 let msi_end = msi_start + arch::layout::GIC_V3_ITS_SIZE - 1; 1227 (msi_start, msi_end) 1228 } 1229 #[cfg(target_arch = "x86_64")] 1230 (0xfee0_0000, 0xfeef_ffff) 1231 } 1232 1233 #[cfg(target_arch = "aarch64")] 1234 /// Gets the information of the devices registered up to some point in time. 1235 pub fn get_device_info(&self) -> &HashMap<(DeviceType, String), MmioDeviceInfo> { 1236 &self.id_to_dev_info 1237 } 1238 1239 #[allow(unused_variables)] 1240 fn add_pci_devices( 1241 &mut self, 1242 virtio_devices: Vec<MetaVirtioDevice>, 1243 ) -> DeviceManagerResult<()> { 1244 let iommu_id = String::from(IOMMU_DEVICE_NAME); 1245 1246 let iommu_device = if self.config.lock().unwrap().iommu { 1247 let (device, mapping) = virtio_devices::Iommu::new( 1248 iommu_id.clone(), 1249 self.seccomp_action.clone(), 1250 self.exit_evt 1251 .try_clone() 1252 .map_err(DeviceManagerError::EventFd)?, 1253 self.get_msi_iova_space(), 1254 ) 1255 .map_err(DeviceManagerError::CreateVirtioIommu)?; 1256 let device = Arc::new(Mutex::new(device)); 1257 self.iommu_device = Some(Arc::clone(&device)); 1258 self.iommu_mapping = Some(mapping); 1259 1260 // Fill the device tree with a new node. In case of restore, we 1261 // know there is nothing to do, so we can simply override the 1262 // existing entry. 1263 self.device_tree 1264 .lock() 1265 .unwrap() 1266 .insert(iommu_id.clone(), device_node!(iommu_id, device)); 1267 1268 Some(device) 1269 } else { 1270 None 1271 }; 1272 1273 let mut iommu_attached_devices = Vec::new(); 1274 { 1275 for handle in virtio_devices { 1276 let mapping: Option<Arc<IommuMapping>> = if handle.iommu { 1277 self.iommu_mapping.clone() 1278 } else { 1279 None 1280 }; 1281 1282 let dev_id = self.add_virtio_pci_device( 1283 handle.virtio_device, 1284 &mapping, 1285 handle.id, 1286 handle.pci_segment, 1287 handle.dma_handler, 1288 )?; 1289 1290 if handle.iommu { 1291 iommu_attached_devices.push(dev_id); 1292 } 1293 } 1294 1295 let mut vfio_iommu_device_ids = self.add_vfio_devices()?; 1296 iommu_attached_devices.append(&mut vfio_iommu_device_ids); 1297 1298 let mut vfio_user_iommu_device_ids = self.add_user_devices()?; 1299 iommu_attached_devices.append(&mut vfio_user_iommu_device_ids); 1300 1301 // Add all devices from forced iommu segments 1302 if let Some(platform_config) = self.config.lock().unwrap().platform.as_ref() { 1303 if let Some(iommu_segments) = platform_config.iommu_segments.as_ref() { 1304 for segment in iommu_segments { 1305 for device in 0..32 { 1306 let bdf = PciBdf::new(*segment, 0, device, 0); 1307 if !iommu_attached_devices.contains(&bdf) { 1308 iommu_attached_devices.push(bdf); 1309 } 1310 } 1311 } 1312 } 1313 } 1314 1315 if let Some(iommu_device) = iommu_device { 1316 let dev_id = self.add_virtio_pci_device(iommu_device, &None, iommu_id, 0, None)?; 1317 self.iommu_attached_devices = Some((dev_id, iommu_attached_devices)); 1318 } 1319 } 1320 1321 for segment in &self.pci_segments { 1322 #[cfg(target_arch = "x86_64")] 1323 if let Some(pci_config_io) = segment.pci_config_io.as_ref() { 1324 self.bus_devices 1325 .push(Arc::clone(pci_config_io) as Arc<Mutex<dyn BusDevice>>); 1326 } 1327 1328 self.bus_devices 1329 .push(Arc::clone(&segment.pci_config_mmio) as Arc<Mutex<dyn BusDevice>>); 1330 } 1331 1332 Ok(()) 1333 } 1334 1335 #[cfg(target_arch = "aarch64")] 1336 fn add_interrupt_controller( 1337 &mut self, 1338 ) -> DeviceManagerResult<Arc<Mutex<dyn InterruptController>>> { 1339 let interrupt_controller: Arc<Mutex<gic::Gic>> = Arc::new(Mutex::new( 1340 gic::Gic::new( 1341 self.config.lock().unwrap().cpus.boot_vcpus, 1342 Arc::clone(&self.msi_interrupt_manager), 1343 ) 1344 .map_err(DeviceManagerError::CreateInterruptController)?, 1345 )); 1346 1347 self.interrupt_controller = Some(interrupt_controller.clone()); 1348 1349 // Unlike x86_64, the "interrupt_controller" here for AArch64 is only 1350 // a `Gic` object that implements the `InterruptController` to provide 1351 // interrupt delivery service. This is not the real GIC device so that 1352 // we do not need to insert it to the device tree. 1353 1354 Ok(interrupt_controller) 1355 } 1356 1357 #[cfg(target_arch = "aarch64")] 1358 pub fn get_interrupt_controller(&mut self) -> Option<&Arc<Mutex<gic::Gic>>> { 1359 self.interrupt_controller.as_ref() 1360 } 1361 1362 #[cfg(target_arch = "x86_64")] 1363 fn add_interrupt_controller( 1364 &mut self, 1365 ) -> DeviceManagerResult<Arc<Mutex<dyn InterruptController>>> { 1366 let id = String::from(IOAPIC_DEVICE_NAME); 1367 1368 // Create IOAPIC 1369 let interrupt_controller = Arc::new(Mutex::new( 1370 ioapic::Ioapic::new( 1371 id.clone(), 1372 APIC_START, 1373 Arc::clone(&self.msi_interrupt_manager), 1374 ) 1375 .map_err(DeviceManagerError::CreateInterruptController)?, 1376 )); 1377 1378 self.interrupt_controller = Some(interrupt_controller.clone()); 1379 1380 self.address_manager 1381 .mmio_bus 1382 .insert(interrupt_controller.clone(), IOAPIC_START.0, IOAPIC_SIZE) 1383 .map_err(DeviceManagerError::BusError)?; 1384 1385 self.bus_devices 1386 .push(Arc::clone(&interrupt_controller) as Arc<Mutex<dyn BusDevice>>); 1387 1388 // Fill the device tree with a new node. In case of restore, we 1389 // know there is nothing to do, so we can simply override the 1390 // existing entry. 1391 self.device_tree 1392 .lock() 1393 .unwrap() 1394 .insert(id.clone(), device_node!(id, interrupt_controller)); 1395 1396 Ok(interrupt_controller) 1397 } 1398 1399 fn add_acpi_devices( 1400 &mut self, 1401 interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>, 1402 reset_evt: EventFd, 1403 exit_evt: EventFd, 1404 ) -> DeviceManagerResult<Option<Arc<Mutex<devices::AcpiGedDevice>>>> { 1405 let shutdown_device = Arc::new(Mutex::new(devices::AcpiShutdownDevice::new( 1406 exit_evt, reset_evt, 1407 ))); 1408 1409 self.bus_devices 1410 .push(Arc::clone(&shutdown_device) as Arc<Mutex<dyn BusDevice>>); 1411 1412 #[cfg(target_arch = "x86_64")] 1413 { 1414 self.address_manager 1415 .allocator 1416 .lock() 1417 .unwrap() 1418 .allocate_io_addresses(Some(GuestAddress(0x3c0)), 0x8, None) 1419 .ok_or(DeviceManagerError::AllocateIoPort)?; 1420 1421 self.address_manager 1422 .io_bus 1423 .insert(shutdown_device, 0x3c0, 0x4) 1424 .map_err(DeviceManagerError::BusError)?; 1425 self.acpi_platform_addresses.sleep_control_reg_address = 1426 Some(GenericAddress::io_port_address::<u8>(0x3c0)); 1427 self.acpi_platform_addresses.sleep_status_reg_address = 1428 Some(GenericAddress::io_port_address::<u8>(0x3c0)); 1429 self.acpi_platform_addresses.reset_reg_address = 1430 Some(GenericAddress::io_port_address::<u8>(0x3c0)); 1431 } 1432 1433 let ged_irq = self 1434 .address_manager 1435 .allocator 1436 .lock() 1437 .unwrap() 1438 .allocate_irq() 1439 .unwrap(); 1440 let interrupt_group = interrupt_manager 1441 .create_group(LegacyIrqGroupConfig { 1442 irq: ged_irq as InterruptIndex, 1443 }) 1444 .map_err(DeviceManagerError::CreateInterruptGroup)?; 1445 let ged_address = self 1446 .address_manager 1447 .allocator 1448 .lock() 1449 .unwrap() 1450 .allocate_platform_mmio_addresses( 1451 None, 1452 devices::acpi::GED_DEVICE_ACPI_SIZE as u64, 1453 None, 1454 ) 1455 .ok_or(DeviceManagerError::AllocateMmioAddress)?; 1456 let ged_device = Arc::new(Mutex::new(devices::AcpiGedDevice::new( 1457 interrupt_group, 1458 ged_irq, 1459 ged_address, 1460 ))); 1461 self.address_manager 1462 .mmio_bus 1463 .insert( 1464 ged_device.clone(), 1465 ged_address.0, 1466 devices::acpi::GED_DEVICE_ACPI_SIZE as u64, 1467 ) 1468 .map_err(DeviceManagerError::BusError)?; 1469 self.bus_devices 1470 .push(Arc::clone(&ged_device) as Arc<Mutex<dyn BusDevice>>); 1471 1472 let pm_timer_device = Arc::new(Mutex::new(devices::AcpiPmTimerDevice::new())); 1473 1474 self.bus_devices 1475 .push(Arc::clone(&pm_timer_device) as Arc<Mutex<dyn BusDevice>>); 1476 1477 #[cfg(target_arch = "x86_64")] 1478 { 1479 self.address_manager 1480 .allocator 1481 .lock() 1482 .unwrap() 1483 .allocate_io_addresses(Some(GuestAddress(0xb008)), 0x4, None) 1484 .ok_or(DeviceManagerError::AllocateIoPort)?; 1485 1486 self.address_manager 1487 .io_bus 1488 .insert(pm_timer_device, 0xb008, 0x4) 1489 .map_err(DeviceManagerError::BusError)?; 1490 1491 self.acpi_platform_addresses.pm_timer_address = 1492 Some(GenericAddress::io_port_address::<u32>(0xb008)); 1493 } 1494 1495 Ok(Some(ged_device)) 1496 } 1497 1498 #[cfg(target_arch = "x86_64")] 1499 fn add_legacy_devices(&mut self, reset_evt: EventFd) -> DeviceManagerResult<()> { 1500 // Add a shutdown device (i8042) 1501 let i8042 = Arc::new(Mutex::new(devices::legacy::I8042Device::new( 1502 reset_evt.try_clone().unwrap(), 1503 ))); 1504 1505 self.bus_devices 1506 .push(Arc::clone(&i8042) as Arc<Mutex<dyn BusDevice>>); 1507 1508 self.address_manager 1509 .io_bus 1510 .insert(i8042, 0x61, 0x4) 1511 .map_err(DeviceManagerError::BusError)?; 1512 { 1513 // Add a CMOS emulated device 1514 let mem_size = self 1515 .memory_manager 1516 .lock() 1517 .unwrap() 1518 .guest_memory() 1519 .memory() 1520 .last_addr() 1521 .0 1522 + 1; 1523 let mem_below_4g = std::cmp::min(arch::layout::MEM_32BIT_RESERVED_START.0, mem_size); 1524 let mem_above_4g = mem_size.saturating_sub(arch::layout::RAM_64BIT_START.0); 1525 1526 let cmos = Arc::new(Mutex::new(devices::legacy::Cmos::new( 1527 mem_below_4g, 1528 mem_above_4g, 1529 reset_evt, 1530 ))); 1531 1532 self.bus_devices 1533 .push(Arc::clone(&cmos) as Arc<Mutex<dyn BusDevice>>); 1534 1535 self.address_manager 1536 .io_bus 1537 .insert(cmos, 0x70, 0x2) 1538 .map_err(DeviceManagerError::BusError)?; 1539 } 1540 #[cfg(feature = "fwdebug")] 1541 { 1542 let fwdebug = Arc::new(Mutex::new(devices::legacy::FwDebugDevice::new())); 1543 1544 self.bus_devices 1545 .push(Arc::clone(&fwdebug) as Arc<Mutex<dyn BusDevice>>); 1546 1547 self.address_manager 1548 .io_bus 1549 .insert(fwdebug, 0x402, 0x1) 1550 .map_err(DeviceManagerError::BusError)?; 1551 } 1552 1553 // 0x80 debug port 1554 let debug_port = Arc::new(Mutex::new(devices::legacy::DebugPort::new(self.timestamp))); 1555 self.bus_devices 1556 .push(Arc::clone(&debug_port) as Arc<Mutex<dyn BusDevice>>); 1557 self.address_manager 1558 .io_bus 1559 .insert(debug_port, 0x80, 0x1) 1560 .map_err(DeviceManagerError::BusError)?; 1561 1562 Ok(()) 1563 } 1564 1565 #[cfg(target_arch = "aarch64")] 1566 fn add_legacy_devices( 1567 &mut self, 1568 interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>, 1569 ) -> DeviceManagerResult<()> { 1570 // Add a RTC device 1571 let rtc_irq = self 1572 .address_manager 1573 .allocator 1574 .lock() 1575 .unwrap() 1576 .allocate_irq() 1577 .unwrap(); 1578 1579 let interrupt_group = interrupt_manager 1580 .create_group(LegacyIrqGroupConfig { 1581 irq: rtc_irq as InterruptIndex, 1582 }) 1583 .map_err(DeviceManagerError::CreateInterruptGroup)?; 1584 1585 let rtc_device = Arc::new(Mutex::new(devices::legacy::Rtc::new(interrupt_group))); 1586 1587 self.bus_devices 1588 .push(Arc::clone(&rtc_device) as Arc<Mutex<dyn BusDevice>>); 1589 1590 let addr = arch::layout::LEGACY_RTC_MAPPED_IO_START; 1591 1592 self.address_manager 1593 .mmio_bus 1594 .insert(rtc_device, addr.0, MMIO_LEN) 1595 .map_err(DeviceManagerError::BusError)?; 1596 1597 self.id_to_dev_info.insert( 1598 (DeviceType::Rtc, "rtc".to_string()), 1599 MmioDeviceInfo { 1600 addr: addr.0, 1601 len: MMIO_LEN, 1602 irq: rtc_irq, 1603 }, 1604 ); 1605 1606 // Add a GPIO device 1607 let id = String::from(GPIO_DEVICE_NAME); 1608 let gpio_irq = self 1609 .address_manager 1610 .allocator 1611 .lock() 1612 .unwrap() 1613 .allocate_irq() 1614 .unwrap(); 1615 1616 let interrupt_group = interrupt_manager 1617 .create_group(LegacyIrqGroupConfig { 1618 irq: gpio_irq as InterruptIndex, 1619 }) 1620 .map_err(DeviceManagerError::CreateInterruptGroup)?; 1621 1622 let gpio_device = Arc::new(Mutex::new(devices::legacy::Gpio::new( 1623 id.clone(), 1624 interrupt_group, 1625 ))); 1626 1627 self.bus_devices 1628 .push(Arc::clone(&gpio_device) as Arc<Mutex<dyn BusDevice>>); 1629 1630 let addr = arch::layout::LEGACY_GPIO_MAPPED_IO_START; 1631 1632 self.address_manager 1633 .mmio_bus 1634 .insert(gpio_device.clone(), addr.0, MMIO_LEN) 1635 .map_err(DeviceManagerError::BusError)?; 1636 1637 self.gpio_device = Some(gpio_device.clone()); 1638 1639 self.id_to_dev_info.insert( 1640 (DeviceType::Gpio, "gpio".to_string()), 1641 MmioDeviceInfo { 1642 addr: addr.0, 1643 len: MMIO_LEN, 1644 irq: gpio_irq, 1645 }, 1646 ); 1647 1648 self.device_tree 1649 .lock() 1650 .unwrap() 1651 .insert(id.clone(), device_node!(id, gpio_device)); 1652 1653 // On AArch64, the UEFI binary requires a flash device at address 0. 1654 // 4 MiB memory is mapped to simulate the flash. 1655 let uefi_mem_slot = self.memory_manager.lock().unwrap().allocate_memory_slot(); 1656 let uefi_region = GuestRegionMmap::new( 1657 MmapRegion::new(arch::layout::UEFI_SIZE as usize).unwrap(), 1658 arch::layout::UEFI_START, 1659 ) 1660 .unwrap(); 1661 let uefi_mem_region = self 1662 .memory_manager 1663 .lock() 1664 .unwrap() 1665 .vm 1666 .make_user_memory_region( 1667 uefi_mem_slot, 1668 uefi_region.start_addr().raw_value(), 1669 uefi_region.len() as u64, 1670 uefi_region.as_ptr() as u64, 1671 false, 1672 false, 1673 ); 1674 self.memory_manager 1675 .lock() 1676 .unwrap() 1677 .vm 1678 .create_user_memory_region(uefi_mem_region) 1679 .map_err(DeviceManagerError::CreateUefiFlash)?; 1680 1681 let uefi_flash = 1682 GuestMemoryAtomic::new(GuestMemoryMmap::from_regions(vec![uefi_region]).unwrap()); 1683 self.uefi_flash = Some(uefi_flash); 1684 1685 Ok(()) 1686 } 1687 1688 #[cfg(target_arch = "x86_64")] 1689 fn add_serial_device( 1690 &mut self, 1691 interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>, 1692 serial_writer: Option<Box<dyn io::Write + Send>>, 1693 ) -> DeviceManagerResult<Arc<Mutex<Serial>>> { 1694 // Serial is tied to IRQ #4 1695 let serial_irq = 4; 1696 1697 let id = String::from(SERIAL_DEVICE_NAME); 1698 1699 let interrupt_group = interrupt_manager 1700 .create_group(LegacyIrqGroupConfig { 1701 irq: serial_irq as InterruptIndex, 1702 }) 1703 .map_err(DeviceManagerError::CreateInterruptGroup)?; 1704 1705 let serial = Arc::new(Mutex::new(Serial::new( 1706 id.clone(), 1707 interrupt_group, 1708 serial_writer, 1709 ))); 1710 1711 self.bus_devices 1712 .push(Arc::clone(&serial) as Arc<Mutex<dyn BusDevice>>); 1713 1714 self.address_manager 1715 .allocator 1716 .lock() 1717 .unwrap() 1718 .allocate_io_addresses(Some(GuestAddress(0x3f8)), 0x8, None) 1719 .ok_or(DeviceManagerError::AllocateIoPort)?; 1720 1721 self.address_manager 1722 .io_bus 1723 .insert(serial.clone(), 0x3f8, 0x8) 1724 .map_err(DeviceManagerError::BusError)?; 1725 1726 // Fill the device tree with a new node. In case of restore, we 1727 // know there is nothing to do, so we can simply override the 1728 // existing entry. 1729 self.device_tree 1730 .lock() 1731 .unwrap() 1732 .insert(id.clone(), device_node!(id, serial)); 1733 1734 Ok(serial) 1735 } 1736 1737 #[cfg(target_arch = "aarch64")] 1738 fn add_serial_device( 1739 &mut self, 1740 interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>, 1741 serial_writer: Option<Box<dyn io::Write + Send>>, 1742 ) -> DeviceManagerResult<Arc<Mutex<Pl011>>> { 1743 let id = String::from(SERIAL_DEVICE_NAME); 1744 1745 let serial_irq = self 1746 .address_manager 1747 .allocator 1748 .lock() 1749 .unwrap() 1750 .allocate_irq() 1751 .unwrap(); 1752 1753 let interrupt_group = interrupt_manager 1754 .create_group(LegacyIrqGroupConfig { 1755 irq: serial_irq as InterruptIndex, 1756 }) 1757 .map_err(DeviceManagerError::CreateInterruptGroup)?; 1758 1759 let serial = Arc::new(Mutex::new(devices::legacy::Pl011::new( 1760 id.clone(), 1761 interrupt_group, 1762 serial_writer, 1763 self.timestamp, 1764 ))); 1765 1766 self.bus_devices 1767 .push(Arc::clone(&serial) as Arc<Mutex<dyn BusDevice>>); 1768 1769 let addr = arch::layout::LEGACY_SERIAL_MAPPED_IO_START; 1770 1771 self.address_manager 1772 .mmio_bus 1773 .insert(serial.clone(), addr.0, MMIO_LEN) 1774 .map_err(DeviceManagerError::BusError)?; 1775 1776 self.id_to_dev_info.insert( 1777 (DeviceType::Serial, DeviceType::Serial.to_string()), 1778 MmioDeviceInfo { 1779 addr: addr.0, 1780 len: MMIO_LEN, 1781 irq: serial_irq, 1782 }, 1783 ); 1784 1785 self.cmdline_additions 1786 .push(format!("earlycon=pl011,mmio,0x{:08x}", addr.0)); 1787 1788 // Fill the device tree with a new node. In case of restore, we 1789 // know there is nothing to do, so we can simply override the 1790 // existing entry. 1791 self.device_tree 1792 .lock() 1793 .unwrap() 1794 .insert(id.clone(), device_node!(id, serial)); 1795 1796 Ok(serial) 1797 } 1798 1799 fn modify_mode<F: FnOnce(&mut termios)>( 1800 &self, 1801 fd: RawFd, 1802 f: F, 1803 ) -> vmm_sys_util::errno::Result<()> { 1804 // SAFETY: safe because we check the return value of isatty. 1805 if unsafe { isatty(fd) } != 1 { 1806 return Ok(()); 1807 } 1808 1809 // SAFETY: The following pair are safe because termios gets totally overwritten by tcgetattr 1810 // and we check the return result. 1811 let mut termios: termios = unsafe { zeroed() }; 1812 let ret = unsafe { tcgetattr(fd, &mut termios as *mut _) }; 1813 if ret < 0 { 1814 return vmm_sys_util::errno::errno_result(); 1815 } 1816 f(&mut termios); 1817 // SAFETY: Safe because the syscall will only read the extent of termios and we check 1818 // the return result. 1819 let ret = unsafe { tcsetattr(fd, TCSANOW, &termios as *const _) }; 1820 if ret < 0 { 1821 return vmm_sys_util::errno::errno_result(); 1822 } 1823 1824 Ok(()) 1825 } 1826 1827 fn set_raw_mode(&self, f: &mut File) -> vmm_sys_util::errno::Result<()> { 1828 // SAFETY: FFI call. Variable t is guaranteed to be a valid termios from modify_mode. 1829 self.modify_mode(f.as_raw_fd(), |t| unsafe { cfmakeraw(t) }) 1830 } 1831 1832 fn listen_for_sigwinch_on_tty(&mut self, pty: &File) -> std::io::Result<()> { 1833 let seccomp_filter = get_seccomp_filter( 1834 &self.seccomp_action, 1835 Thread::PtyForeground, 1836 self.hypervisor_type, 1837 ) 1838 .unwrap(); 1839 1840 match start_sigwinch_listener(seccomp_filter, pty) { 1841 Ok(pipe) => { 1842 self.console_resize_pipe = Some(Arc::new(pipe)); 1843 } 1844 Err(e) => { 1845 warn!("Ignoring error from setting up SIGWINCH listener: {}", e) 1846 } 1847 } 1848 1849 Ok(()) 1850 } 1851 1852 fn add_virtio_console_device( 1853 &mut self, 1854 virtio_devices: &mut Vec<MetaVirtioDevice>, 1855 console_pty: Option<PtyPair>, 1856 resize_pipe: Option<File>, 1857 ) -> DeviceManagerResult<Option<Arc<virtio_devices::ConsoleResizer>>> { 1858 let console_config = self.config.lock().unwrap().console.clone(); 1859 let endpoint = match console_config.mode { 1860 ConsoleOutputMode::File => { 1861 let file = File::create(console_config.file.as_ref().unwrap()) 1862 .map_err(DeviceManagerError::ConsoleOutputFileOpen)?; 1863 Endpoint::File(file) 1864 } 1865 ConsoleOutputMode::Pty => { 1866 if let Some(pty) = console_pty { 1867 self.config.lock().unwrap().console.file = Some(pty.path.clone()); 1868 let file = pty.main.try_clone().unwrap(); 1869 self.console_pty = Some(Arc::new(Mutex::new(pty))); 1870 self.console_resize_pipe = resize_pipe.map(Arc::new); 1871 Endpoint::FilePair(file.try_clone().unwrap(), file) 1872 } else { 1873 let (main, mut sub, path) = 1874 create_pty(false).map_err(DeviceManagerError::ConsolePtyOpen)?; 1875 self.set_raw_mode(&mut sub) 1876 .map_err(DeviceManagerError::SetPtyRaw)?; 1877 self.config.lock().unwrap().console.file = Some(path.clone()); 1878 let file = main.try_clone().unwrap(); 1879 assert!(resize_pipe.is_none()); 1880 self.listen_for_sigwinch_on_tty(&sub).unwrap(); 1881 self.console_pty = Some(Arc::new(Mutex::new(PtyPair { main, sub, path }))); 1882 Endpoint::FilePair(file.try_clone().unwrap(), file) 1883 } 1884 } 1885 ConsoleOutputMode::Tty => { 1886 // Duplicating the file descriptors like this is needed as otherwise 1887 // they will be closed on a reboot and the numbers reused 1888 1889 // SAFETY: FFI call to dup. Trivially safe. 1890 let stdout = unsafe { libc::dup(libc::STDOUT_FILENO) }; 1891 if stdout == -1 { 1892 return vmm_sys_util::errno::errno_result().map_err(DeviceManagerError::DupFd); 1893 } 1894 // SAFETY: stdout is valid and owned solely by us. 1895 let stdout = unsafe { File::from_raw_fd(stdout) }; 1896 1897 // If an interactive TTY then we can accept input 1898 // SAFETY: FFI call. Trivially safe. 1899 if unsafe { libc::isatty(libc::STDIN_FILENO) == 1 } { 1900 // SAFETY: FFI call to dup. Trivially safe. 1901 let stdin = unsafe { libc::dup(libc::STDIN_FILENO) }; 1902 if stdin == -1 { 1903 return vmm_sys_util::errno::errno_result() 1904 .map_err(DeviceManagerError::DupFd); 1905 } 1906 // SAFETY: stdin is valid and owned solely by us. 1907 let stdin = unsafe { File::from_raw_fd(stdin) }; 1908 1909 Endpoint::FilePair(stdout, stdin) 1910 } else { 1911 Endpoint::File(stdout) 1912 } 1913 } 1914 ConsoleOutputMode::Null => Endpoint::Null, 1915 ConsoleOutputMode::Off => return Ok(None), 1916 }; 1917 let id = String::from(CONSOLE_DEVICE_NAME); 1918 1919 let (virtio_console_device, console_resizer) = virtio_devices::Console::new( 1920 id.clone(), 1921 endpoint, 1922 self.console_resize_pipe 1923 .as_ref() 1924 .map(|p| p.try_clone().unwrap()), 1925 self.force_iommu | console_config.iommu, 1926 self.seccomp_action.clone(), 1927 self.exit_evt 1928 .try_clone() 1929 .map_err(DeviceManagerError::EventFd)?, 1930 ) 1931 .map_err(DeviceManagerError::CreateVirtioConsole)?; 1932 let virtio_console_device = Arc::new(Mutex::new(virtio_console_device)); 1933 virtio_devices.push(MetaVirtioDevice { 1934 virtio_device: Arc::clone(&virtio_console_device) 1935 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 1936 iommu: console_config.iommu, 1937 id: id.clone(), 1938 pci_segment: 0, 1939 dma_handler: None, 1940 }); 1941 1942 // Fill the device tree with a new node. In case of restore, we 1943 // know there is nothing to do, so we can simply override the 1944 // existing entry. 1945 self.device_tree 1946 .lock() 1947 .unwrap() 1948 .insert(id.clone(), device_node!(id, virtio_console_device)); 1949 1950 // Only provide a resizer (for SIGWINCH handling) if the console is attached to the TTY 1951 Ok(if matches!(console_config.mode, ConsoleOutputMode::Tty) { 1952 Some(console_resizer) 1953 } else { 1954 None 1955 }) 1956 } 1957 1958 fn add_console_device( 1959 &mut self, 1960 interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>, 1961 virtio_devices: &mut Vec<MetaVirtioDevice>, 1962 serial_pty: Option<PtyPair>, 1963 console_pty: Option<PtyPair>, 1964 console_resize_pipe: Option<File>, 1965 ) -> DeviceManagerResult<Arc<Console>> { 1966 let serial_config = self.config.lock().unwrap().serial.clone(); 1967 let serial_writer: Option<Box<dyn io::Write + Send>> = match serial_config.mode { 1968 ConsoleOutputMode::File => Some(Box::new( 1969 File::create(serial_config.file.as_ref().unwrap()) 1970 .map_err(DeviceManagerError::SerialOutputFileOpen)?, 1971 )), 1972 ConsoleOutputMode::Pty => { 1973 if let Some(pty) = serial_pty { 1974 self.config.lock().unwrap().serial.file = Some(pty.path.clone()); 1975 self.serial_pty = Some(Arc::new(Mutex::new(pty))); 1976 } else { 1977 let (main, mut sub, path) = 1978 create_pty(true).map_err(DeviceManagerError::SerialPtyOpen)?; 1979 self.set_raw_mode(&mut sub) 1980 .map_err(DeviceManagerError::SetPtyRaw)?; 1981 self.config.lock().unwrap().serial.file = Some(path.clone()); 1982 self.serial_pty = Some(Arc::new(Mutex::new(PtyPair { main, sub, path }))); 1983 } 1984 None 1985 } 1986 ConsoleOutputMode::Tty => Some(Box::new(stdout())), 1987 ConsoleOutputMode::Off | ConsoleOutputMode::Null => None, 1988 }; 1989 if serial_config.mode != ConsoleOutputMode::Off { 1990 let serial = self.add_serial_device(interrupt_manager, serial_writer)?; 1991 self.serial_manager = match serial_config.mode { 1992 ConsoleOutputMode::Pty | ConsoleOutputMode::Tty => { 1993 let serial_manager = 1994 SerialManager::new(serial, self.serial_pty.clone(), serial_config.mode) 1995 .map_err(DeviceManagerError::CreateSerialManager)?; 1996 if let Some(mut serial_manager) = serial_manager { 1997 serial_manager 1998 .start_thread( 1999 self.exit_evt 2000 .try_clone() 2001 .map_err(DeviceManagerError::EventFd)?, 2002 ) 2003 .map_err(DeviceManagerError::SpawnSerialManager)?; 2004 Some(Arc::new(serial_manager)) 2005 } else { 2006 None 2007 } 2008 } 2009 _ => None, 2010 }; 2011 } 2012 2013 let console_resizer = 2014 self.add_virtio_console_device(virtio_devices, console_pty, console_resize_pipe)?; 2015 2016 Ok(Arc::new(Console { console_resizer })) 2017 } 2018 2019 fn make_virtio_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2020 let mut devices: Vec<MetaVirtioDevice> = Vec::new(); 2021 2022 // Create "standard" virtio devices (net/block/rng) 2023 devices.append(&mut self.make_virtio_block_devices()?); 2024 devices.append(&mut self.make_virtio_net_devices()?); 2025 devices.append(&mut self.make_virtio_rng_devices()?); 2026 2027 // Add virtio-fs if required 2028 devices.append(&mut self.make_virtio_fs_devices()?); 2029 2030 // Add virtio-pmem if required 2031 devices.append(&mut self.make_virtio_pmem_devices()?); 2032 2033 // Add virtio-vsock if required 2034 devices.append(&mut self.make_virtio_vsock_devices()?); 2035 2036 devices.append(&mut self.make_virtio_mem_devices()?); 2037 2038 // Add virtio-balloon if required 2039 devices.append(&mut self.make_virtio_balloon_devices()?); 2040 2041 // Add virtio-watchdog device 2042 devices.append(&mut self.make_virtio_watchdog_devices()?); 2043 2044 // Add vDPA devices if required 2045 devices.append(&mut self.make_vdpa_devices()?); 2046 2047 Ok(devices) 2048 } 2049 2050 // Cache whether io_uring is supported to avoid probing for very block device 2051 fn io_uring_is_supported(&mut self) -> bool { 2052 if let Some(supported) = self.io_uring_supported { 2053 return supported; 2054 } 2055 2056 let supported = block_io_uring_is_supported(); 2057 self.io_uring_supported = Some(supported); 2058 supported 2059 } 2060 2061 fn make_virtio_block_device( 2062 &mut self, 2063 disk_cfg: &mut DiskConfig, 2064 ) -> DeviceManagerResult<MetaVirtioDevice> { 2065 let id = if let Some(id) = &disk_cfg.id { 2066 id.clone() 2067 } else { 2068 let id = self.next_device_name(DISK_DEVICE_NAME_PREFIX)?; 2069 disk_cfg.id = Some(id.clone()); 2070 id 2071 }; 2072 2073 info!("Creating virtio-block device: {:?}", disk_cfg); 2074 2075 let (virtio_device, migratable_device) = if disk_cfg.vhost_user { 2076 let socket = disk_cfg.vhost_socket.as_ref().unwrap().clone(); 2077 let vu_cfg = VhostUserConfig { 2078 socket, 2079 num_queues: disk_cfg.num_queues, 2080 queue_size: disk_cfg.queue_size, 2081 }; 2082 let vhost_user_block = Arc::new(Mutex::new( 2083 match virtio_devices::vhost_user::Blk::new( 2084 id.clone(), 2085 vu_cfg, 2086 self.restoring, 2087 self.seccomp_action.clone(), 2088 self.exit_evt 2089 .try_clone() 2090 .map_err(DeviceManagerError::EventFd)?, 2091 self.force_iommu, 2092 ) { 2093 Ok(vub_device) => vub_device, 2094 Err(e) => { 2095 return Err(DeviceManagerError::CreateVhostUserBlk(e)); 2096 } 2097 }, 2098 )); 2099 2100 ( 2101 Arc::clone(&vhost_user_block) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2102 vhost_user_block as Arc<Mutex<dyn Migratable>>, 2103 ) 2104 } else { 2105 let mut options = OpenOptions::new(); 2106 options.read(true); 2107 options.write(!disk_cfg.readonly); 2108 if disk_cfg.direct { 2109 options.custom_flags(libc::O_DIRECT); 2110 } 2111 // Open block device path 2112 let mut file: File = options 2113 .open( 2114 disk_cfg 2115 .path 2116 .as_ref() 2117 .ok_or(DeviceManagerError::NoDiskPath)? 2118 .clone(), 2119 ) 2120 .map_err(DeviceManagerError::Disk)?; 2121 let image_type = 2122 detect_image_type(&mut file).map_err(DeviceManagerError::DetectImageType)?; 2123 2124 let image = match image_type { 2125 ImageType::FixedVhd => { 2126 // Use asynchronous backend relying on io_uring if the 2127 // syscalls are supported. 2128 if self.io_uring_is_supported() && !disk_cfg.disable_io_uring { 2129 info!("Using asynchronous fixed VHD disk file (io_uring)"); 2130 Box::new( 2131 FixedVhdDiskAsync::new(file) 2132 .map_err(DeviceManagerError::CreateFixedVhdDiskAsync)?, 2133 ) as Box<dyn DiskFile> 2134 } else { 2135 info!("Using synchronous fixed VHD disk file"); 2136 Box::new( 2137 FixedVhdDiskSync::new(file) 2138 .map_err(DeviceManagerError::CreateFixedVhdDiskSync)?, 2139 ) as Box<dyn DiskFile> 2140 } 2141 } 2142 ImageType::Raw => { 2143 // Use asynchronous backend relying on io_uring if the 2144 // syscalls are supported. 2145 if self.io_uring_is_supported() && !disk_cfg.disable_io_uring { 2146 info!("Using asynchronous RAW disk file (io_uring)"); 2147 Box::new(RawFileDisk::new(file)) as Box<dyn DiskFile> 2148 } else { 2149 info!("Using synchronous RAW disk file"); 2150 Box::new(RawFileDiskSync::new(file)) as Box<dyn DiskFile> 2151 } 2152 } 2153 ImageType::Qcow2 => { 2154 info!("Using synchronous QCOW disk file"); 2155 Box::new( 2156 QcowDiskSync::new(file, disk_cfg.direct) 2157 .map_err(DeviceManagerError::CreateQcowDiskSync)?, 2158 ) as Box<dyn DiskFile> 2159 } 2160 ImageType::Vhdx => { 2161 info!("Using synchronous VHDX disk file"); 2162 Box::new( 2163 VhdxDiskSync::new(file) 2164 .map_err(DeviceManagerError::CreateFixedVhdxDiskSync)?, 2165 ) as Box<dyn DiskFile> 2166 } 2167 }; 2168 2169 let virtio_block = Arc::new(Mutex::new( 2170 virtio_devices::Block::new( 2171 id.clone(), 2172 image, 2173 disk_cfg 2174 .path 2175 .as_ref() 2176 .ok_or(DeviceManagerError::NoDiskPath)? 2177 .clone(), 2178 disk_cfg.readonly, 2179 self.force_iommu | disk_cfg.iommu, 2180 disk_cfg.num_queues, 2181 disk_cfg.queue_size, 2182 self.seccomp_action.clone(), 2183 disk_cfg.rate_limiter_config, 2184 self.exit_evt 2185 .try_clone() 2186 .map_err(DeviceManagerError::EventFd)?, 2187 ) 2188 .map_err(DeviceManagerError::CreateVirtioBlock)?, 2189 )); 2190 2191 ( 2192 Arc::clone(&virtio_block) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2193 virtio_block as Arc<Mutex<dyn Migratable>>, 2194 ) 2195 }; 2196 2197 // Fill the device tree with a new node. In case of restore, we 2198 // know there is nothing to do, so we can simply override the 2199 // existing entry. 2200 self.device_tree 2201 .lock() 2202 .unwrap() 2203 .insert(id.clone(), device_node!(id, migratable_device)); 2204 2205 Ok(MetaVirtioDevice { 2206 virtio_device, 2207 iommu: disk_cfg.iommu, 2208 id, 2209 pci_segment: disk_cfg.pci_segment, 2210 dma_handler: None, 2211 }) 2212 } 2213 2214 fn make_virtio_block_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2215 let mut devices = Vec::new(); 2216 2217 let mut block_devices = self.config.lock().unwrap().disks.clone(); 2218 if let Some(disk_list_cfg) = &mut block_devices { 2219 for disk_cfg in disk_list_cfg.iter_mut() { 2220 devices.push(self.make_virtio_block_device(disk_cfg)?); 2221 } 2222 } 2223 self.config.lock().unwrap().disks = block_devices; 2224 2225 Ok(devices) 2226 } 2227 2228 fn make_virtio_net_device( 2229 &mut self, 2230 net_cfg: &mut NetConfig, 2231 ) -> DeviceManagerResult<MetaVirtioDevice> { 2232 let id = if let Some(id) = &net_cfg.id { 2233 id.clone() 2234 } else { 2235 let id = self.next_device_name(NET_DEVICE_NAME_PREFIX)?; 2236 net_cfg.id = Some(id.clone()); 2237 id 2238 }; 2239 info!("Creating virtio-net device: {:?}", net_cfg); 2240 2241 let (virtio_device, migratable_device) = if net_cfg.vhost_user { 2242 let socket = net_cfg.vhost_socket.as_ref().unwrap().clone(); 2243 let vu_cfg = VhostUserConfig { 2244 socket, 2245 num_queues: net_cfg.num_queues, 2246 queue_size: net_cfg.queue_size, 2247 }; 2248 let server = match net_cfg.vhost_mode { 2249 VhostMode::Client => false, 2250 VhostMode::Server => true, 2251 }; 2252 let vhost_user_net = Arc::new(Mutex::new( 2253 match virtio_devices::vhost_user::Net::new( 2254 id.clone(), 2255 net_cfg.mac, 2256 vu_cfg, 2257 server, 2258 self.seccomp_action.clone(), 2259 self.restoring, 2260 self.exit_evt 2261 .try_clone() 2262 .map_err(DeviceManagerError::EventFd)?, 2263 self.force_iommu, 2264 ) { 2265 Ok(vun_device) => vun_device, 2266 Err(e) => { 2267 return Err(DeviceManagerError::CreateVhostUserNet(e)); 2268 } 2269 }, 2270 )); 2271 2272 ( 2273 Arc::clone(&vhost_user_net) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2274 vhost_user_net as Arc<Mutex<dyn Migratable>>, 2275 ) 2276 } else { 2277 let virtio_net = if let Some(ref tap_if_name) = net_cfg.tap { 2278 Arc::new(Mutex::new( 2279 virtio_devices::Net::new( 2280 id.clone(), 2281 Some(tap_if_name), 2282 None, 2283 None, 2284 Some(net_cfg.mac), 2285 &mut net_cfg.host_mac, 2286 self.force_iommu | net_cfg.iommu, 2287 net_cfg.num_queues, 2288 net_cfg.queue_size, 2289 self.seccomp_action.clone(), 2290 net_cfg.rate_limiter_config, 2291 self.exit_evt 2292 .try_clone() 2293 .map_err(DeviceManagerError::EventFd)?, 2294 ) 2295 .map_err(DeviceManagerError::CreateVirtioNet)?, 2296 )) 2297 } else if let Some(fds) = &net_cfg.fds { 2298 Arc::new(Mutex::new( 2299 virtio_devices::Net::from_tap_fds( 2300 id.clone(), 2301 fds, 2302 Some(net_cfg.mac), 2303 self.force_iommu | net_cfg.iommu, 2304 net_cfg.queue_size, 2305 self.seccomp_action.clone(), 2306 net_cfg.rate_limiter_config, 2307 self.exit_evt 2308 .try_clone() 2309 .map_err(DeviceManagerError::EventFd)?, 2310 ) 2311 .map_err(DeviceManagerError::CreateVirtioNet)?, 2312 )) 2313 } else { 2314 Arc::new(Mutex::new( 2315 virtio_devices::Net::new( 2316 id.clone(), 2317 None, 2318 Some(net_cfg.ip), 2319 Some(net_cfg.mask), 2320 Some(net_cfg.mac), 2321 &mut net_cfg.host_mac, 2322 self.force_iommu | net_cfg.iommu, 2323 net_cfg.num_queues, 2324 net_cfg.queue_size, 2325 self.seccomp_action.clone(), 2326 net_cfg.rate_limiter_config, 2327 self.exit_evt 2328 .try_clone() 2329 .map_err(DeviceManagerError::EventFd)?, 2330 ) 2331 .map_err(DeviceManagerError::CreateVirtioNet)?, 2332 )) 2333 }; 2334 2335 ( 2336 Arc::clone(&virtio_net) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2337 virtio_net as Arc<Mutex<dyn Migratable>>, 2338 ) 2339 }; 2340 2341 // Fill the device tree with a new node. In case of restore, we 2342 // know there is nothing to do, so we can simply override the 2343 // existing entry. 2344 self.device_tree 2345 .lock() 2346 .unwrap() 2347 .insert(id.clone(), device_node!(id, migratable_device)); 2348 2349 Ok(MetaVirtioDevice { 2350 virtio_device, 2351 iommu: net_cfg.iommu, 2352 id, 2353 pci_segment: net_cfg.pci_segment, 2354 dma_handler: None, 2355 }) 2356 } 2357 2358 /// Add virto-net and vhost-user-net devices 2359 fn make_virtio_net_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2360 let mut devices = Vec::new(); 2361 let mut net_devices = self.config.lock().unwrap().net.clone(); 2362 if let Some(net_list_cfg) = &mut net_devices { 2363 for net_cfg in net_list_cfg.iter_mut() { 2364 devices.push(self.make_virtio_net_device(net_cfg)?); 2365 } 2366 } 2367 self.config.lock().unwrap().net = net_devices; 2368 2369 Ok(devices) 2370 } 2371 2372 fn make_virtio_rng_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2373 let mut devices = Vec::new(); 2374 2375 // Add virtio-rng if required 2376 let rng_config = self.config.lock().unwrap().rng.clone(); 2377 if let Some(rng_path) = rng_config.src.to_str() { 2378 info!("Creating virtio-rng device: {:?}", rng_config); 2379 let id = String::from(RNG_DEVICE_NAME); 2380 2381 let virtio_rng_device = Arc::new(Mutex::new( 2382 virtio_devices::Rng::new( 2383 id.clone(), 2384 rng_path, 2385 self.force_iommu | rng_config.iommu, 2386 self.seccomp_action.clone(), 2387 self.exit_evt 2388 .try_clone() 2389 .map_err(DeviceManagerError::EventFd)?, 2390 ) 2391 .map_err(DeviceManagerError::CreateVirtioRng)?, 2392 )); 2393 devices.push(MetaVirtioDevice { 2394 virtio_device: Arc::clone(&virtio_rng_device) 2395 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2396 iommu: rng_config.iommu, 2397 id: id.clone(), 2398 pci_segment: 0, 2399 dma_handler: None, 2400 }); 2401 2402 // Fill the device tree with a new node. In case of restore, we 2403 // know there is nothing to do, so we can simply override the 2404 // existing entry. 2405 self.device_tree 2406 .lock() 2407 .unwrap() 2408 .insert(id.clone(), device_node!(id, virtio_rng_device)); 2409 } 2410 2411 Ok(devices) 2412 } 2413 2414 fn make_virtio_fs_device( 2415 &mut self, 2416 fs_cfg: &mut FsConfig, 2417 ) -> DeviceManagerResult<MetaVirtioDevice> { 2418 let id = if let Some(id) = &fs_cfg.id { 2419 id.clone() 2420 } else { 2421 let id = self.next_device_name(FS_DEVICE_NAME_PREFIX)?; 2422 fs_cfg.id = Some(id.clone()); 2423 id 2424 }; 2425 2426 info!("Creating virtio-fs device: {:?}", fs_cfg); 2427 2428 let mut node = device_node!(id); 2429 2430 if let Some(fs_socket) = fs_cfg.socket.to_str() { 2431 let virtio_fs_device = Arc::new(Mutex::new( 2432 virtio_devices::vhost_user::Fs::new( 2433 id.clone(), 2434 fs_socket, 2435 &fs_cfg.tag, 2436 fs_cfg.num_queues, 2437 fs_cfg.queue_size, 2438 None, 2439 self.seccomp_action.clone(), 2440 self.restoring, 2441 self.exit_evt 2442 .try_clone() 2443 .map_err(DeviceManagerError::EventFd)?, 2444 self.force_iommu, 2445 ) 2446 .map_err(DeviceManagerError::CreateVirtioFs)?, 2447 )); 2448 2449 // Update the device tree with the migratable device. 2450 node.migratable = Some(Arc::clone(&virtio_fs_device) as Arc<Mutex<dyn Migratable>>); 2451 self.device_tree.lock().unwrap().insert(id.clone(), node); 2452 2453 Ok(MetaVirtioDevice { 2454 virtio_device: Arc::clone(&virtio_fs_device) 2455 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2456 iommu: false, 2457 id, 2458 pci_segment: fs_cfg.pci_segment, 2459 dma_handler: None, 2460 }) 2461 } else { 2462 Err(DeviceManagerError::NoVirtioFsSock) 2463 } 2464 } 2465 2466 fn make_virtio_fs_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2467 let mut devices = Vec::new(); 2468 2469 let mut fs_devices = self.config.lock().unwrap().fs.clone(); 2470 if let Some(fs_list_cfg) = &mut fs_devices { 2471 for fs_cfg in fs_list_cfg.iter_mut() { 2472 devices.push(self.make_virtio_fs_device(fs_cfg)?); 2473 } 2474 } 2475 self.config.lock().unwrap().fs = fs_devices; 2476 2477 Ok(devices) 2478 } 2479 2480 fn make_virtio_pmem_device( 2481 &mut self, 2482 pmem_cfg: &mut PmemConfig, 2483 ) -> DeviceManagerResult<MetaVirtioDevice> { 2484 let id = if let Some(id) = &pmem_cfg.id { 2485 id.clone() 2486 } else { 2487 let id = self.next_device_name(PMEM_DEVICE_NAME_PREFIX)?; 2488 pmem_cfg.id = Some(id.clone()); 2489 id 2490 }; 2491 2492 info!("Creating virtio-pmem device: {:?}", pmem_cfg); 2493 2494 let mut node = device_node!(id); 2495 2496 // Look for the id in the device tree. If it can be found, that means 2497 // the device is being restored, otherwise it's created from scratch. 2498 let region_range = if let Some(node) = self.device_tree.lock().unwrap().get(&id) { 2499 info!("Restoring virtio-pmem {} resources", id); 2500 2501 let mut region_range: Option<(u64, u64)> = None; 2502 for resource in node.resources.iter() { 2503 match resource { 2504 Resource::MmioAddressRange { base, size } => { 2505 if region_range.is_some() { 2506 return Err(DeviceManagerError::ResourceAlreadyExists); 2507 } 2508 2509 region_range = Some((*base, *size)); 2510 } 2511 _ => { 2512 error!("Unexpected resource {:?} for {}", resource, id); 2513 } 2514 } 2515 } 2516 2517 if region_range.is_none() { 2518 return Err(DeviceManagerError::MissingVirtioPmemResources); 2519 } 2520 2521 region_range 2522 } else { 2523 None 2524 }; 2525 2526 let (custom_flags, set_len) = if pmem_cfg.file.is_dir() { 2527 if pmem_cfg.size.is_none() { 2528 return Err(DeviceManagerError::PmemWithDirectorySizeMissing); 2529 } 2530 (O_TMPFILE, true) 2531 } else { 2532 (0, false) 2533 }; 2534 2535 let mut file = OpenOptions::new() 2536 .read(true) 2537 .write(!pmem_cfg.discard_writes) 2538 .custom_flags(custom_flags) 2539 .open(&pmem_cfg.file) 2540 .map_err(DeviceManagerError::PmemFileOpen)?; 2541 2542 let size = if let Some(size) = pmem_cfg.size { 2543 if set_len { 2544 file.set_len(size) 2545 .map_err(DeviceManagerError::PmemFileSetLen)?; 2546 } 2547 size 2548 } else { 2549 file.seek(SeekFrom::End(0)) 2550 .map_err(DeviceManagerError::PmemFileSetLen)? 2551 }; 2552 2553 if size % 0x20_0000 != 0 { 2554 return Err(DeviceManagerError::PmemSizeNotAligned); 2555 } 2556 2557 let (region_base, region_size) = if let Some((base, size)) = region_range { 2558 // The memory needs to be 2MiB aligned in order to support 2559 // hugepages. 2560 self.pci_segments[pmem_cfg.pci_segment as usize] 2561 .allocator 2562 .lock() 2563 .unwrap() 2564 .allocate( 2565 Some(GuestAddress(base)), 2566 size as GuestUsize, 2567 Some(0x0020_0000), 2568 ) 2569 .ok_or(DeviceManagerError::PmemRangeAllocation)?; 2570 2571 (base, size) 2572 } else { 2573 // The memory needs to be 2MiB aligned in order to support 2574 // hugepages. 2575 let base = self.pci_segments[pmem_cfg.pci_segment as usize] 2576 .allocator 2577 .lock() 2578 .unwrap() 2579 .allocate(None, size as GuestUsize, Some(0x0020_0000)) 2580 .ok_or(DeviceManagerError::PmemRangeAllocation)?; 2581 2582 (base.raw_value(), size) 2583 }; 2584 2585 let cloned_file = file.try_clone().map_err(DeviceManagerError::CloneFile)?; 2586 let mmap_region = MmapRegion::build( 2587 Some(FileOffset::new(cloned_file, 0)), 2588 region_size as usize, 2589 PROT_READ | PROT_WRITE, 2590 MAP_NORESERVE 2591 | if pmem_cfg.discard_writes { 2592 MAP_PRIVATE 2593 } else { 2594 MAP_SHARED 2595 }, 2596 ) 2597 .map_err(DeviceManagerError::NewMmapRegion)?; 2598 let host_addr: u64 = mmap_region.as_ptr() as u64; 2599 2600 let mem_slot = self 2601 .memory_manager 2602 .lock() 2603 .unwrap() 2604 .create_userspace_mapping(region_base, region_size, host_addr, false, false, false) 2605 .map_err(DeviceManagerError::MemoryManager)?; 2606 2607 let mapping = virtio_devices::UserspaceMapping { 2608 host_addr, 2609 mem_slot, 2610 addr: GuestAddress(region_base), 2611 len: region_size, 2612 mergeable: false, 2613 }; 2614 2615 let virtio_pmem_device = Arc::new(Mutex::new( 2616 virtio_devices::Pmem::new( 2617 id.clone(), 2618 file, 2619 GuestAddress(region_base), 2620 mapping, 2621 mmap_region, 2622 self.force_iommu | pmem_cfg.iommu, 2623 self.seccomp_action.clone(), 2624 self.exit_evt 2625 .try_clone() 2626 .map_err(DeviceManagerError::EventFd)?, 2627 ) 2628 .map_err(DeviceManagerError::CreateVirtioPmem)?, 2629 )); 2630 2631 // Update the device tree with correct resource information and with 2632 // the migratable device. 2633 node.resources.push(Resource::MmioAddressRange { 2634 base: region_base, 2635 size: region_size, 2636 }); 2637 node.migratable = Some(Arc::clone(&virtio_pmem_device) as Arc<Mutex<dyn Migratable>>); 2638 self.device_tree.lock().unwrap().insert(id.clone(), node); 2639 2640 Ok(MetaVirtioDevice { 2641 virtio_device: Arc::clone(&virtio_pmem_device) 2642 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2643 iommu: pmem_cfg.iommu, 2644 id, 2645 pci_segment: pmem_cfg.pci_segment, 2646 dma_handler: None, 2647 }) 2648 } 2649 2650 fn make_virtio_pmem_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2651 let mut devices = Vec::new(); 2652 // Add virtio-pmem if required 2653 let mut pmem_devices = self.config.lock().unwrap().pmem.clone(); 2654 if let Some(pmem_list_cfg) = &mut pmem_devices { 2655 for pmem_cfg in pmem_list_cfg.iter_mut() { 2656 devices.push(self.make_virtio_pmem_device(pmem_cfg)?); 2657 } 2658 } 2659 self.config.lock().unwrap().pmem = pmem_devices; 2660 2661 Ok(devices) 2662 } 2663 2664 fn make_virtio_vsock_device( 2665 &mut self, 2666 vsock_cfg: &mut VsockConfig, 2667 ) -> DeviceManagerResult<MetaVirtioDevice> { 2668 let id = if let Some(id) = &vsock_cfg.id { 2669 id.clone() 2670 } else { 2671 let id = self.next_device_name(VSOCK_DEVICE_NAME_PREFIX)?; 2672 vsock_cfg.id = Some(id.clone()); 2673 id 2674 }; 2675 2676 info!("Creating virtio-vsock device: {:?}", vsock_cfg); 2677 2678 let socket_path = vsock_cfg 2679 .socket 2680 .to_str() 2681 .ok_or(DeviceManagerError::CreateVsockConvertPath)?; 2682 let backend = 2683 virtio_devices::vsock::VsockUnixBackend::new(vsock_cfg.cid, socket_path.to_string()) 2684 .map_err(DeviceManagerError::CreateVsockBackend)?; 2685 2686 let vsock_device = Arc::new(Mutex::new( 2687 virtio_devices::Vsock::new( 2688 id.clone(), 2689 vsock_cfg.cid, 2690 vsock_cfg.socket.clone(), 2691 backend, 2692 self.force_iommu | vsock_cfg.iommu, 2693 self.seccomp_action.clone(), 2694 self.exit_evt 2695 .try_clone() 2696 .map_err(DeviceManagerError::EventFd)?, 2697 ) 2698 .map_err(DeviceManagerError::CreateVirtioVsock)?, 2699 )); 2700 2701 // Fill the device tree with a new node. In case of restore, we 2702 // know there is nothing to do, so we can simply override the 2703 // existing entry. 2704 self.device_tree 2705 .lock() 2706 .unwrap() 2707 .insert(id.clone(), device_node!(id, vsock_device)); 2708 2709 Ok(MetaVirtioDevice { 2710 virtio_device: Arc::clone(&vsock_device) 2711 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2712 iommu: vsock_cfg.iommu, 2713 id, 2714 pci_segment: vsock_cfg.pci_segment, 2715 dma_handler: None, 2716 }) 2717 } 2718 2719 fn make_virtio_vsock_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2720 let mut devices = Vec::new(); 2721 2722 let mut vsock = self.config.lock().unwrap().vsock.clone(); 2723 if let Some(ref mut vsock_cfg) = &mut vsock { 2724 devices.push(self.make_virtio_vsock_device(vsock_cfg)?); 2725 } 2726 self.config.lock().unwrap().vsock = vsock; 2727 2728 Ok(devices) 2729 } 2730 2731 fn make_virtio_mem_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2732 let mut devices = Vec::new(); 2733 2734 let mm = self.memory_manager.clone(); 2735 let mm = mm.lock().unwrap(); 2736 for (memory_zone_id, memory_zone) in mm.memory_zones().iter() { 2737 if let Some(virtio_mem_zone) = memory_zone.virtio_mem_zone() { 2738 info!("Creating virtio-mem device: id = {}", memory_zone_id); 2739 2740 let node_id = numa_node_id_from_memory_zone_id(&self.numa_nodes, memory_zone_id) 2741 .map(|i| i as u16); 2742 2743 let virtio_mem_device = Arc::new(Mutex::new( 2744 virtio_devices::Mem::new( 2745 memory_zone_id.clone(), 2746 virtio_mem_zone.region(), 2747 virtio_mem_zone 2748 .resize_handler() 2749 .new_resize_sender() 2750 .map_err(DeviceManagerError::CreateResizeSender)?, 2751 self.seccomp_action.clone(), 2752 node_id, 2753 virtio_mem_zone.hotplugged_size(), 2754 virtio_mem_zone.hugepages(), 2755 self.exit_evt 2756 .try_clone() 2757 .map_err(DeviceManagerError::EventFd)?, 2758 virtio_mem_zone.blocks_state().clone(), 2759 ) 2760 .map_err(DeviceManagerError::CreateVirtioMem)?, 2761 )); 2762 2763 self.virtio_mem_devices.push(Arc::clone(&virtio_mem_device)); 2764 2765 devices.push(MetaVirtioDevice { 2766 virtio_device: Arc::clone(&virtio_mem_device) 2767 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2768 iommu: false, 2769 id: memory_zone_id.clone(), 2770 pci_segment: 0, 2771 dma_handler: None, 2772 }); 2773 2774 // Fill the device tree with a new node. In case of restore, we 2775 // know there is nothing to do, so we can simply override the 2776 // existing entry. 2777 self.device_tree.lock().unwrap().insert( 2778 memory_zone_id.clone(), 2779 device_node!(memory_zone_id, virtio_mem_device), 2780 ); 2781 } 2782 } 2783 2784 Ok(devices) 2785 } 2786 2787 fn make_virtio_balloon_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2788 let mut devices = Vec::new(); 2789 2790 if let Some(balloon_config) = &self.config.lock().unwrap().balloon { 2791 let id = String::from(BALLOON_DEVICE_NAME); 2792 info!("Creating virtio-balloon device: id = {}", id); 2793 2794 let virtio_balloon_device = Arc::new(Mutex::new( 2795 virtio_devices::Balloon::new( 2796 id.clone(), 2797 balloon_config.size, 2798 balloon_config.deflate_on_oom, 2799 balloon_config.free_page_reporting, 2800 self.seccomp_action.clone(), 2801 self.exit_evt 2802 .try_clone() 2803 .map_err(DeviceManagerError::EventFd)?, 2804 ) 2805 .map_err(DeviceManagerError::CreateVirtioBalloon)?, 2806 )); 2807 2808 self.balloon = Some(virtio_balloon_device.clone()); 2809 2810 devices.push(MetaVirtioDevice { 2811 virtio_device: Arc::clone(&virtio_balloon_device) 2812 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2813 iommu: false, 2814 id: id.clone(), 2815 pci_segment: 0, 2816 dma_handler: None, 2817 }); 2818 2819 self.device_tree 2820 .lock() 2821 .unwrap() 2822 .insert(id.clone(), device_node!(id, virtio_balloon_device)); 2823 } 2824 2825 Ok(devices) 2826 } 2827 2828 fn make_virtio_watchdog_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2829 let mut devices = Vec::new(); 2830 2831 if !self.config.lock().unwrap().watchdog { 2832 return Ok(devices); 2833 } 2834 2835 let id = String::from(WATCHDOG_DEVICE_NAME); 2836 info!("Creating virtio-watchdog device: id = {}", id); 2837 2838 let virtio_watchdog_device = Arc::new(Mutex::new( 2839 virtio_devices::Watchdog::new( 2840 id.clone(), 2841 self.reset_evt.try_clone().unwrap(), 2842 self.seccomp_action.clone(), 2843 self.exit_evt 2844 .try_clone() 2845 .map_err(DeviceManagerError::EventFd)?, 2846 ) 2847 .map_err(DeviceManagerError::CreateVirtioWatchdog)?, 2848 )); 2849 devices.push(MetaVirtioDevice { 2850 virtio_device: Arc::clone(&virtio_watchdog_device) 2851 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2852 iommu: false, 2853 id: id.clone(), 2854 pci_segment: 0, 2855 dma_handler: None, 2856 }); 2857 2858 self.device_tree 2859 .lock() 2860 .unwrap() 2861 .insert(id.clone(), device_node!(id, virtio_watchdog_device)); 2862 2863 Ok(devices) 2864 } 2865 2866 fn make_vdpa_device( 2867 &mut self, 2868 vdpa_cfg: &mut VdpaConfig, 2869 ) -> DeviceManagerResult<MetaVirtioDevice> { 2870 let id = if let Some(id) = &vdpa_cfg.id { 2871 id.clone() 2872 } else { 2873 let id = self.next_device_name(VDPA_DEVICE_NAME_PREFIX)?; 2874 vdpa_cfg.id = Some(id.clone()); 2875 id 2876 }; 2877 2878 info!("Creating vDPA device: {:?}", vdpa_cfg); 2879 2880 let device_path = vdpa_cfg 2881 .path 2882 .to_str() 2883 .ok_or(DeviceManagerError::CreateVdpaConvertPath)?; 2884 2885 let vdpa_device = Arc::new(Mutex::new( 2886 virtio_devices::Vdpa::new( 2887 id.clone(), 2888 device_path, 2889 self.memory_manager.lock().unwrap().guest_memory(), 2890 vdpa_cfg.num_queues as u16, 2891 ) 2892 .map_err(DeviceManagerError::CreateVdpa)?, 2893 )); 2894 2895 // Create the DMA handler that is required by the vDPA device 2896 let vdpa_mapping = Arc::new(VdpaDmaMapping::new( 2897 Arc::clone(&vdpa_device), 2898 Arc::new(self.memory_manager.lock().unwrap().guest_memory()), 2899 )); 2900 2901 self.device_tree 2902 .lock() 2903 .unwrap() 2904 .insert(id.clone(), device_node!(id)); 2905 2906 Ok(MetaVirtioDevice { 2907 virtio_device: vdpa_device as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2908 iommu: vdpa_cfg.iommu, 2909 id, 2910 pci_segment: vdpa_cfg.pci_segment, 2911 dma_handler: Some(vdpa_mapping), 2912 }) 2913 } 2914 2915 fn make_vdpa_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2916 let mut devices = Vec::new(); 2917 // Add vdpa if required 2918 let mut vdpa_devices = self.config.lock().unwrap().vdpa.clone(); 2919 if let Some(vdpa_list_cfg) = &mut vdpa_devices { 2920 for vdpa_cfg in vdpa_list_cfg.iter_mut() { 2921 devices.push(self.make_vdpa_device(vdpa_cfg)?); 2922 } 2923 } 2924 self.config.lock().unwrap().vdpa = vdpa_devices; 2925 2926 Ok(devices) 2927 } 2928 2929 fn next_device_name(&mut self, prefix: &str) -> DeviceManagerResult<String> { 2930 let start_id = self.device_id_cnt; 2931 loop { 2932 // Generate the temporary name. 2933 let name = format!("{}{}", prefix, self.device_id_cnt); 2934 // Increment the counter. 2935 self.device_id_cnt += Wrapping(1); 2936 // Check if the name is already in use. 2937 if !self.boot_id_list.contains(&name) 2938 && !self.device_tree.lock().unwrap().contains_key(&name) 2939 { 2940 return Ok(name); 2941 } 2942 2943 if self.device_id_cnt == start_id { 2944 // We went through a full loop and there's nothing else we can 2945 // do. 2946 break; 2947 } 2948 } 2949 Err(DeviceManagerError::NoAvailableDeviceName) 2950 } 2951 2952 fn add_passthrough_device( 2953 &mut self, 2954 device_cfg: &mut DeviceConfig, 2955 ) -> DeviceManagerResult<(PciBdf, String)> { 2956 // If the passthrough device has not been created yet, it is created 2957 // here and stored in the DeviceManager structure for future needs. 2958 if self.passthrough_device.is_none() { 2959 self.passthrough_device = Some( 2960 self.address_manager 2961 .vm 2962 .create_passthrough_device() 2963 .map_err(|e| DeviceManagerError::CreatePassthroughDevice(e.into()))?, 2964 ); 2965 } 2966 2967 self.add_vfio_device(device_cfg) 2968 } 2969 2970 fn create_vfio_container(&self) -> DeviceManagerResult<Arc<VfioContainer>> { 2971 let passthrough_device = self 2972 .passthrough_device 2973 .as_ref() 2974 .ok_or(DeviceManagerError::NoDevicePassthroughSupport)?; 2975 2976 let dup = passthrough_device 2977 .try_clone() 2978 .map_err(DeviceManagerError::VfioCreate)?; 2979 2980 Ok(Arc::new( 2981 VfioContainer::new(Some(Arc::new(dup))).map_err(DeviceManagerError::VfioCreate)?, 2982 )) 2983 } 2984 2985 fn add_vfio_device( 2986 &mut self, 2987 device_cfg: &mut DeviceConfig, 2988 ) -> DeviceManagerResult<(PciBdf, String)> { 2989 let vfio_name = if let Some(id) = &device_cfg.id { 2990 id.clone() 2991 } else { 2992 let id = self.next_device_name(VFIO_DEVICE_NAME_PREFIX)?; 2993 device_cfg.id = Some(id.clone()); 2994 id 2995 }; 2996 2997 let (pci_segment_id, pci_device_bdf, resources) = 2998 self.pci_resources(&vfio_name, device_cfg.pci_segment)?; 2999 3000 let mut needs_dma_mapping = false; 3001 3002 // Here we create a new VFIO container for two reasons. Either this is 3003 // the first VFIO device, meaning we need a new VFIO container, which 3004 // will be shared with other VFIO devices. Or the new VFIO device is 3005 // attached to a vIOMMU, meaning we must create a dedicated VFIO 3006 // container. In the vIOMMU use case, we can't let all devices under 3007 // the same VFIO container since we couldn't map/unmap memory for each 3008 // device. That's simply because the map/unmap operations happen at the 3009 // VFIO container level. 3010 let vfio_container = if device_cfg.iommu { 3011 let vfio_container = self.create_vfio_container()?; 3012 3013 let vfio_mapping = Arc::new(VfioDmaMapping::new( 3014 Arc::clone(&vfio_container), 3015 Arc::new(self.memory_manager.lock().unwrap().guest_memory()), 3016 )); 3017 3018 if let Some(iommu) = &self.iommu_device { 3019 iommu 3020 .lock() 3021 .unwrap() 3022 .add_external_mapping(pci_device_bdf.into(), vfio_mapping); 3023 } else { 3024 return Err(DeviceManagerError::MissingVirtualIommu); 3025 } 3026 3027 vfio_container 3028 } else if let Some(vfio_container) = &self.vfio_container { 3029 Arc::clone(vfio_container) 3030 } else { 3031 let vfio_container = self.create_vfio_container()?; 3032 needs_dma_mapping = true; 3033 self.vfio_container = Some(Arc::clone(&vfio_container)); 3034 3035 vfio_container 3036 }; 3037 3038 let vfio_device = VfioDevice::new(&device_cfg.path, Arc::clone(&vfio_container)) 3039 .map_err(DeviceManagerError::VfioCreate)?; 3040 3041 if needs_dma_mapping { 3042 // Register DMA mapping in IOMMU. 3043 // Do not register virtio-mem regions, as they are handled directly by 3044 // virtio-mem device itself. 3045 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() { 3046 for region in zone.regions() { 3047 vfio_container 3048 .vfio_dma_map( 3049 region.start_addr().raw_value(), 3050 region.len() as u64, 3051 region.as_ptr() as u64, 3052 ) 3053 .map_err(DeviceManagerError::VfioDmaMap)?; 3054 } 3055 } 3056 3057 let vfio_mapping = Arc::new(VfioDmaMapping::new( 3058 Arc::clone(&vfio_container), 3059 Arc::new(self.memory_manager.lock().unwrap().guest_memory()), 3060 )); 3061 3062 for virtio_mem_device in self.virtio_mem_devices.iter() { 3063 virtio_mem_device 3064 .lock() 3065 .unwrap() 3066 .add_dma_mapping_handler( 3067 VirtioMemMappingSource::Container, 3068 vfio_mapping.clone(), 3069 ) 3070 .map_err(DeviceManagerError::AddDmaMappingHandlerVirtioMem)?; 3071 } 3072 } 3073 3074 let legacy_interrupt_group = 3075 if let Some(legacy_interrupt_manager) = &self.legacy_interrupt_manager { 3076 Some( 3077 legacy_interrupt_manager 3078 .create_group(LegacyIrqGroupConfig { 3079 irq: self.pci_segments[pci_segment_id as usize].pci_irq_slots 3080 [pci_device_bdf.device() as usize] 3081 as InterruptIndex, 3082 }) 3083 .map_err(DeviceManagerError::CreateInterruptGroup)?, 3084 ) 3085 } else { 3086 None 3087 }; 3088 3089 let memory_manager = self.memory_manager.clone(); 3090 3091 let vfio_pci_device = VfioPciDevice::new( 3092 vfio_name.clone(), 3093 &self.address_manager.vm, 3094 vfio_device, 3095 vfio_container, 3096 self.msi_interrupt_manager.clone(), 3097 legacy_interrupt_group, 3098 device_cfg.iommu, 3099 pci_device_bdf, 3100 self.restoring, 3101 Arc::new(move || memory_manager.lock().unwrap().allocate_memory_slot()), 3102 ) 3103 .map_err(DeviceManagerError::VfioPciCreate)?; 3104 3105 let vfio_pci_device = Arc::new(Mutex::new(vfio_pci_device)); 3106 3107 let new_resources = self.add_pci_device( 3108 vfio_pci_device.clone(), 3109 vfio_pci_device.clone(), 3110 pci_segment_id, 3111 pci_device_bdf, 3112 resources, 3113 )?; 3114 3115 // When restoring a VM, the restore codepath will take care of mapping 3116 // the MMIO regions based on the information from the snapshot. 3117 if !self.restoring { 3118 vfio_pci_device 3119 .lock() 3120 .unwrap() 3121 .map_mmio_regions() 3122 .map_err(DeviceManagerError::VfioMapRegion)?; 3123 } 3124 3125 let mut node = device_node!(vfio_name, vfio_pci_device); 3126 3127 // Update the device tree with correct resource information. 3128 node.resources = new_resources; 3129 node.pci_bdf = Some(pci_device_bdf); 3130 node.pci_device_handle = Some(PciDeviceHandle::Vfio(vfio_pci_device)); 3131 3132 self.device_tree 3133 .lock() 3134 .unwrap() 3135 .insert(vfio_name.clone(), node); 3136 3137 Ok((pci_device_bdf, vfio_name)) 3138 } 3139 3140 fn add_pci_device( 3141 &mut self, 3142 bus_device: Arc<Mutex<dyn BusDevice>>, 3143 pci_device: Arc<Mutex<dyn PciDevice>>, 3144 segment_id: u16, 3145 bdf: PciBdf, 3146 resources: Option<Vec<Resource>>, 3147 ) -> DeviceManagerResult<Vec<Resource>> { 3148 let bars = pci_device 3149 .lock() 3150 .unwrap() 3151 .allocate_bars( 3152 &self.address_manager.allocator, 3153 &mut self.pci_segments[segment_id as usize] 3154 .allocator 3155 .lock() 3156 .unwrap(), 3157 resources, 3158 ) 3159 .map_err(DeviceManagerError::AllocateBars)?; 3160 3161 let mut pci_bus = self.pci_segments[segment_id as usize] 3162 .pci_bus 3163 .lock() 3164 .unwrap(); 3165 3166 pci_bus 3167 .add_device(bdf.device() as u32, pci_device) 3168 .map_err(DeviceManagerError::AddPciDevice)?; 3169 3170 self.bus_devices.push(Arc::clone(&bus_device)); 3171 3172 pci_bus 3173 .register_mapping( 3174 bus_device, 3175 #[cfg(target_arch = "x86_64")] 3176 self.address_manager.io_bus.as_ref(), 3177 self.address_manager.mmio_bus.as_ref(), 3178 bars.clone(), 3179 ) 3180 .map_err(DeviceManagerError::AddPciDevice)?; 3181 3182 let mut new_resources = Vec::new(); 3183 for bar in bars { 3184 new_resources.push(Resource::PciBar { 3185 index: bar.idx(), 3186 base: bar.addr(), 3187 size: bar.size(), 3188 type_: bar.region_type().into(), 3189 prefetchable: bar.prefetchable().into(), 3190 }); 3191 } 3192 3193 Ok(new_resources) 3194 } 3195 3196 fn add_vfio_devices(&mut self) -> DeviceManagerResult<Vec<PciBdf>> { 3197 let mut iommu_attached_device_ids = Vec::new(); 3198 let mut devices = self.config.lock().unwrap().devices.clone(); 3199 3200 if let Some(device_list_cfg) = &mut devices { 3201 for device_cfg in device_list_cfg.iter_mut() { 3202 let (device_id, _) = self.add_passthrough_device(device_cfg)?; 3203 if device_cfg.iommu && self.iommu_device.is_some() { 3204 iommu_attached_device_ids.push(device_id); 3205 } 3206 } 3207 } 3208 3209 // Update the list of devices 3210 self.config.lock().unwrap().devices = devices; 3211 3212 Ok(iommu_attached_device_ids) 3213 } 3214 3215 fn add_vfio_user_device( 3216 &mut self, 3217 device_cfg: &mut UserDeviceConfig, 3218 ) -> DeviceManagerResult<(PciBdf, String)> { 3219 let vfio_user_name = if let Some(id) = &device_cfg.id { 3220 id.clone() 3221 } else { 3222 let id = self.next_device_name(VFIO_USER_DEVICE_NAME_PREFIX)?; 3223 device_cfg.id = Some(id.clone()); 3224 id 3225 }; 3226 3227 let (pci_segment_id, pci_device_bdf, resources) = 3228 self.pci_resources(&vfio_user_name, device_cfg.pci_segment)?; 3229 3230 let legacy_interrupt_group = 3231 if let Some(legacy_interrupt_manager) = &self.legacy_interrupt_manager { 3232 Some( 3233 legacy_interrupt_manager 3234 .create_group(LegacyIrqGroupConfig { 3235 irq: self.pci_segments[pci_segment_id as usize].pci_irq_slots 3236 [pci_device_bdf.device() as usize] 3237 as InterruptIndex, 3238 }) 3239 .map_err(DeviceManagerError::CreateInterruptGroup)?, 3240 ) 3241 } else { 3242 None 3243 }; 3244 3245 let client = Arc::new(Mutex::new( 3246 vfio_user::Client::new(&device_cfg.socket) 3247 .map_err(DeviceManagerError::VfioUserCreateClient)?, 3248 )); 3249 3250 let memory_manager = self.memory_manager.clone(); 3251 3252 let mut vfio_user_pci_device = VfioUserPciDevice::new( 3253 vfio_user_name.clone(), 3254 &self.address_manager.vm, 3255 client.clone(), 3256 self.msi_interrupt_manager.clone(), 3257 legacy_interrupt_group, 3258 pci_device_bdf, 3259 self.restoring, 3260 Arc::new(move || memory_manager.lock().unwrap().allocate_memory_slot()), 3261 ) 3262 .map_err(DeviceManagerError::VfioUserCreate)?; 3263 3264 let memory = self.memory_manager.lock().unwrap().guest_memory(); 3265 let vfio_user_mapping = Arc::new(VfioUserDmaMapping::new(client, Arc::new(memory))); 3266 for virtio_mem_device in self.virtio_mem_devices.iter() { 3267 virtio_mem_device 3268 .lock() 3269 .unwrap() 3270 .add_dma_mapping_handler( 3271 VirtioMemMappingSource::Device(pci_device_bdf.into()), 3272 vfio_user_mapping.clone(), 3273 ) 3274 .map_err(DeviceManagerError::AddDmaMappingHandlerVirtioMem)?; 3275 } 3276 3277 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() { 3278 for region in zone.regions() { 3279 vfio_user_pci_device 3280 .dma_map(region) 3281 .map_err(DeviceManagerError::VfioUserDmaMap)?; 3282 } 3283 } 3284 3285 let vfio_user_pci_device = Arc::new(Mutex::new(vfio_user_pci_device)); 3286 3287 let new_resources = self.add_pci_device( 3288 vfio_user_pci_device.clone(), 3289 vfio_user_pci_device.clone(), 3290 pci_segment_id, 3291 pci_device_bdf, 3292 resources, 3293 )?; 3294 3295 // When restoring a VM, the restore codepath will take care of mapping 3296 // the MMIO regions based on the information from the snapshot. 3297 if !self.restoring { 3298 // Note it is required to call 'add_pci_device()' in advance to have the list of 3299 // mmio regions provisioned correctly 3300 vfio_user_pci_device 3301 .lock() 3302 .unwrap() 3303 .map_mmio_regions() 3304 .map_err(DeviceManagerError::VfioUserMapRegion)?; 3305 } 3306 3307 let mut node = device_node!(vfio_user_name, vfio_user_pci_device); 3308 3309 // Update the device tree with correct resource information. 3310 node.resources = new_resources; 3311 node.pci_bdf = Some(pci_device_bdf); 3312 node.pci_device_handle = Some(PciDeviceHandle::VfioUser(vfio_user_pci_device)); 3313 3314 self.device_tree 3315 .lock() 3316 .unwrap() 3317 .insert(vfio_user_name.clone(), node); 3318 3319 Ok((pci_device_bdf, vfio_user_name)) 3320 } 3321 3322 fn add_user_devices(&mut self) -> DeviceManagerResult<Vec<PciBdf>> { 3323 let mut user_devices = self.config.lock().unwrap().user_devices.clone(); 3324 3325 if let Some(device_list_cfg) = &mut user_devices { 3326 for device_cfg in device_list_cfg.iter_mut() { 3327 let (_device_id, _id) = self.add_vfio_user_device(device_cfg)?; 3328 } 3329 } 3330 3331 // Update the list of devices 3332 self.config.lock().unwrap().user_devices = user_devices; 3333 3334 Ok(vec![]) 3335 } 3336 3337 fn add_virtio_pci_device( 3338 &mut self, 3339 virtio_device: Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 3340 iommu_mapping: &Option<Arc<IommuMapping>>, 3341 virtio_device_id: String, 3342 pci_segment_id: u16, 3343 dma_handler: Option<Arc<dyn ExternalDmaMapping>>, 3344 ) -> DeviceManagerResult<PciBdf> { 3345 let id = format!("{}-{}", VIRTIO_PCI_DEVICE_NAME_PREFIX, virtio_device_id); 3346 3347 // Add the new virtio-pci node to the device tree. 3348 let mut node = device_node!(id); 3349 node.children = vec![virtio_device_id.clone()]; 3350 3351 let (pci_segment_id, pci_device_bdf, resources) = 3352 self.pci_resources(&id, pci_segment_id)?; 3353 3354 // Update the existing virtio node by setting the parent. 3355 if let Some(node) = self.device_tree.lock().unwrap().get_mut(&virtio_device_id) { 3356 node.parent = Some(id.clone()); 3357 } else { 3358 return Err(DeviceManagerError::MissingNode); 3359 } 3360 3361 // Allows support for one MSI-X vector per queue. It also adds 1 3362 // as we need to take into account the dedicated vector to notify 3363 // about a virtio config change. 3364 let msix_num = (virtio_device.lock().unwrap().queue_max_sizes().len() + 1) as u16; 3365 3366 // Create the AccessPlatform trait from the implementation IommuMapping. 3367 // This will provide address translation for any virtio device sitting 3368 // behind a vIOMMU. 3369 let access_platform: Option<Arc<dyn AccessPlatform>> = if let Some(mapping) = iommu_mapping 3370 { 3371 Some(Arc::new(AccessPlatformMapping::new( 3372 pci_device_bdf.into(), 3373 mapping.clone(), 3374 ))) 3375 } else { 3376 None 3377 }; 3378 3379 let memory = self.memory_manager.lock().unwrap().guest_memory(); 3380 3381 // Map DMA ranges if a DMA handler is available and if the device is 3382 // not attached to a virtual IOMMU. 3383 if let Some(dma_handler) = &dma_handler { 3384 if iommu_mapping.is_some() { 3385 if let Some(iommu) = &self.iommu_device { 3386 iommu 3387 .lock() 3388 .unwrap() 3389 .add_external_mapping(pci_device_bdf.into(), dma_handler.clone()); 3390 } else { 3391 return Err(DeviceManagerError::MissingVirtualIommu); 3392 } 3393 } else { 3394 // Let every virtio-mem device handle the DMA map/unmap through the 3395 // DMA handler provided. 3396 for virtio_mem_device in self.virtio_mem_devices.iter() { 3397 virtio_mem_device 3398 .lock() 3399 .unwrap() 3400 .add_dma_mapping_handler( 3401 VirtioMemMappingSource::Device(pci_device_bdf.into()), 3402 dma_handler.clone(), 3403 ) 3404 .map_err(DeviceManagerError::AddDmaMappingHandlerVirtioMem)?; 3405 } 3406 3407 // Do not register virtio-mem regions, as they are handled directly by 3408 // virtio-mem devices. 3409 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() { 3410 for region in zone.regions() { 3411 let gpa = region.start_addr().0; 3412 let size = region.len(); 3413 dma_handler 3414 .map(gpa, gpa, size) 3415 .map_err(DeviceManagerError::VirtioDmaMap)?; 3416 } 3417 } 3418 } 3419 } 3420 3421 let device_type = virtio_device.lock().unwrap().device_type(); 3422 let virtio_pci_device = Arc::new(Mutex::new( 3423 VirtioPciDevice::new( 3424 id.clone(), 3425 memory, 3426 virtio_device, 3427 msix_num, 3428 access_platform, 3429 &self.msi_interrupt_manager, 3430 pci_device_bdf.into(), 3431 self.activate_evt 3432 .try_clone() 3433 .map_err(DeviceManagerError::EventFd)?, 3434 // All device types *except* virtio block devices should be allocated a 64-bit bar 3435 // The block devices should be given a 32-bit BAR so that they are easily accessible 3436 // to firmware without requiring excessive identity mapping. 3437 // The exception being if not on the default PCI segment. 3438 pci_segment_id > 0 || device_type != VirtioDeviceType::Block as u32, 3439 dma_handler, 3440 self.pending_activations.clone(), 3441 ) 3442 .map_err(DeviceManagerError::VirtioDevice)?, 3443 )); 3444 3445 let new_resources = self.add_pci_device( 3446 virtio_pci_device.clone(), 3447 virtio_pci_device.clone(), 3448 pci_segment_id, 3449 pci_device_bdf, 3450 resources, 3451 )?; 3452 3453 let bar_addr = virtio_pci_device.lock().unwrap().config_bar_addr(); 3454 for (event, addr) in virtio_pci_device.lock().unwrap().ioeventfds(bar_addr) { 3455 let io_addr = IoEventAddress::Mmio(addr); 3456 self.address_manager 3457 .vm 3458 .register_ioevent(event, &io_addr, None) 3459 .map_err(|e| DeviceManagerError::RegisterIoevent(e.into()))?; 3460 } 3461 3462 // Update the device tree with correct resource information. 3463 node.resources = new_resources; 3464 node.migratable = Some(Arc::clone(&virtio_pci_device) as Arc<Mutex<dyn Migratable>>); 3465 node.pci_bdf = Some(pci_device_bdf); 3466 node.pci_device_handle = Some(PciDeviceHandle::Virtio(virtio_pci_device)); 3467 self.device_tree.lock().unwrap().insert(id, node); 3468 3469 Ok(pci_device_bdf) 3470 } 3471 3472 fn pci_resources( 3473 &self, 3474 id: &str, 3475 pci_segment_id: u16, 3476 ) -> DeviceManagerResult<(u16, PciBdf, Option<Vec<Resource>>)> { 3477 // Look for the id in the device tree. If it can be found, that means 3478 // the device is being restored, otherwise it's created from scratch. 3479 Ok( 3480 if let Some(node) = self.device_tree.lock().unwrap().get(id) { 3481 info!("Restoring virtio-pci {} resources", id); 3482 let pci_device_bdf: PciBdf = node 3483 .pci_bdf 3484 .ok_or(DeviceManagerError::MissingDeviceNodePciBdf)?; 3485 let pci_segment_id = pci_device_bdf.segment(); 3486 3487 self.pci_segments[pci_segment_id as usize] 3488 .pci_bus 3489 .lock() 3490 .unwrap() 3491 .get_device_id(pci_device_bdf.device() as usize) 3492 .map_err(DeviceManagerError::GetPciDeviceId)?; 3493 3494 (pci_segment_id, pci_device_bdf, Some(node.resources.clone())) 3495 } else { 3496 let pci_device_bdf = 3497 self.pci_segments[pci_segment_id as usize].next_device_bdf()?; 3498 3499 (pci_segment_id, pci_device_bdf, None) 3500 }, 3501 ) 3502 } 3503 3504 #[cfg(target_arch = "x86_64")] 3505 pub fn io_bus(&self) -> &Arc<Bus> { 3506 &self.address_manager.io_bus 3507 } 3508 3509 pub fn mmio_bus(&self) -> &Arc<Bus> { 3510 &self.address_manager.mmio_bus 3511 } 3512 3513 pub fn allocator(&self) -> &Arc<Mutex<SystemAllocator>> { 3514 &self.address_manager.allocator 3515 } 3516 3517 pub fn interrupt_controller(&self) -> Option<Arc<Mutex<dyn InterruptController>>> { 3518 self.interrupt_controller 3519 .as_ref() 3520 .map(|ic| ic.clone() as Arc<Mutex<dyn InterruptController>>) 3521 } 3522 3523 #[cfg(target_arch = "x86_64")] 3524 // Used to provide a fast path for handling PIO exits 3525 pub fn pci_config_io(&self) -> Arc<Mutex<PciConfigIo>> { 3526 Arc::clone(self.pci_segments[0].pci_config_io.as_ref().unwrap()) 3527 } 3528 3529 pub(crate) fn pci_segments(&self) -> &Vec<PciSegment> { 3530 &self.pci_segments 3531 } 3532 3533 pub fn console(&self) -> &Arc<Console> { 3534 &self.console 3535 } 3536 3537 #[cfg(target_arch = "aarch64")] 3538 pub fn cmdline_additions(&self) -> &[String] { 3539 self.cmdline_additions.as_slice() 3540 } 3541 3542 pub fn update_memory(&self, new_region: &Arc<GuestRegionMmap>) -> DeviceManagerResult<()> { 3543 for handle in self.virtio_devices.iter() { 3544 handle 3545 .virtio_device 3546 .lock() 3547 .unwrap() 3548 .add_memory_region(new_region) 3549 .map_err(DeviceManagerError::UpdateMemoryForVirtioDevice)?; 3550 3551 if let Some(dma_handler) = &handle.dma_handler { 3552 if !handle.iommu { 3553 let gpa = new_region.start_addr().0; 3554 let size = new_region.len(); 3555 dma_handler 3556 .map(gpa, gpa, size) 3557 .map_err(DeviceManagerError::VirtioDmaMap)?; 3558 } 3559 } 3560 } 3561 3562 // Take care of updating the memory for VFIO PCI devices. 3563 if let Some(vfio_container) = &self.vfio_container { 3564 vfio_container 3565 .vfio_dma_map( 3566 new_region.start_addr().raw_value(), 3567 new_region.len() as u64, 3568 new_region.as_ptr() as u64, 3569 ) 3570 .map_err(DeviceManagerError::UpdateMemoryForVfioPciDevice)?; 3571 } 3572 3573 // Take care of updating the memory for vfio-user devices. 3574 { 3575 let device_tree = self.device_tree.lock().unwrap(); 3576 for pci_device_node in device_tree.pci_devices() { 3577 if let PciDeviceHandle::VfioUser(vfio_user_pci_device) = pci_device_node 3578 .pci_device_handle 3579 .as_ref() 3580 .ok_or(DeviceManagerError::MissingPciDevice)? 3581 { 3582 vfio_user_pci_device 3583 .lock() 3584 .unwrap() 3585 .dma_map(new_region) 3586 .map_err(DeviceManagerError::UpdateMemoryForVfioUserPciDevice)?; 3587 } 3588 } 3589 } 3590 3591 Ok(()) 3592 } 3593 3594 pub fn activate_virtio_devices(&self) -> DeviceManagerResult<()> { 3595 for mut activator in self.pending_activations.lock().unwrap().drain(..) { 3596 activator 3597 .activate() 3598 .map_err(DeviceManagerError::VirtioActivate)?; 3599 } 3600 Ok(()) 3601 } 3602 3603 pub fn notify_hotplug( 3604 &self, 3605 _notification_type: AcpiNotificationFlags, 3606 ) -> DeviceManagerResult<()> { 3607 return self 3608 .ged_notification_device 3609 .as_ref() 3610 .unwrap() 3611 .lock() 3612 .unwrap() 3613 .notify(_notification_type) 3614 .map_err(DeviceManagerError::HotPlugNotification); 3615 } 3616 3617 pub fn add_device( 3618 &mut self, 3619 device_cfg: &mut DeviceConfig, 3620 ) -> DeviceManagerResult<PciDeviceInfo> { 3621 self.validate_identifier(&device_cfg.id)?; 3622 3623 if device_cfg.iommu && !self.is_iommu_segment(device_cfg.pci_segment) { 3624 return Err(DeviceManagerError::InvalidIommuHotplug); 3625 } 3626 3627 let (bdf, device_name) = self.add_passthrough_device(device_cfg)?; 3628 3629 // Update the PCIU bitmap 3630 self.pci_segments[device_cfg.pci_segment as usize].pci_devices_up |= 1 << bdf.device(); 3631 3632 Ok(PciDeviceInfo { 3633 id: device_name, 3634 bdf, 3635 }) 3636 } 3637 3638 pub fn add_user_device( 3639 &mut self, 3640 device_cfg: &mut UserDeviceConfig, 3641 ) -> DeviceManagerResult<PciDeviceInfo> { 3642 self.validate_identifier(&device_cfg.id)?; 3643 3644 let (bdf, device_name) = self.add_vfio_user_device(device_cfg)?; 3645 3646 // Update the PCIU bitmap 3647 self.pci_segments[device_cfg.pci_segment as usize].pci_devices_up |= 1 << bdf.device(); 3648 3649 Ok(PciDeviceInfo { 3650 id: device_name, 3651 bdf, 3652 }) 3653 } 3654 3655 pub fn remove_device(&mut self, id: String) -> DeviceManagerResult<()> { 3656 // The node can be directly a PCI node in case the 'id' refers to a 3657 // VFIO device or a virtio-pci one. 3658 // In case the 'id' refers to a virtio device, we must find the PCI 3659 // node by looking at the parent. 3660 let device_tree = self.device_tree.lock().unwrap(); 3661 let node = device_tree 3662 .get(&id) 3663 .ok_or(DeviceManagerError::UnknownDeviceId(id))?; 3664 3665 let pci_device_node = if node.pci_bdf.is_some() && node.pci_device_handle.is_some() { 3666 node 3667 } else { 3668 let parent = node 3669 .parent 3670 .as_ref() 3671 .ok_or(DeviceManagerError::MissingNode)?; 3672 device_tree 3673 .get(parent) 3674 .ok_or(DeviceManagerError::MissingNode)? 3675 }; 3676 3677 let pci_device_bdf: PciBdf = pci_device_node 3678 .pci_bdf 3679 .ok_or(DeviceManagerError::MissingDeviceNodePciBdf)?; 3680 let pci_segment_id = pci_device_bdf.segment(); 3681 3682 let pci_device_handle = pci_device_node 3683 .pci_device_handle 3684 .as_ref() 3685 .ok_or(DeviceManagerError::MissingPciDevice)?; 3686 #[allow(irrefutable_let_patterns)] 3687 if let PciDeviceHandle::Virtio(virtio_pci_device) = pci_device_handle { 3688 let device_type = VirtioDeviceType::from( 3689 virtio_pci_device 3690 .lock() 3691 .unwrap() 3692 .virtio_device() 3693 .lock() 3694 .unwrap() 3695 .device_type(), 3696 ); 3697 match device_type { 3698 VirtioDeviceType::Net 3699 | VirtioDeviceType::Block 3700 | VirtioDeviceType::Pmem 3701 | VirtioDeviceType::Fs 3702 | VirtioDeviceType::Vsock => {} 3703 _ => return Err(DeviceManagerError::RemovalNotAllowed(device_type)), 3704 } 3705 } 3706 3707 // Update the PCID bitmap 3708 self.pci_segments[pci_segment_id as usize].pci_devices_down |= 1 << pci_device_bdf.device(); 3709 3710 Ok(()) 3711 } 3712 3713 pub fn eject_device(&mut self, pci_segment_id: u16, device_id: u8) -> DeviceManagerResult<()> { 3714 info!( 3715 "Ejecting device_id = {} on segment_id={}", 3716 device_id, pci_segment_id 3717 ); 3718 3719 // Convert the device ID into the corresponding b/d/f. 3720 let pci_device_bdf = PciBdf::new(pci_segment_id, 0, device_id, 0); 3721 3722 // Give the PCI device ID back to the PCI bus. 3723 self.pci_segments[pci_segment_id as usize] 3724 .pci_bus 3725 .lock() 3726 .unwrap() 3727 .put_device_id(device_id as usize) 3728 .map_err(DeviceManagerError::PutPciDeviceId)?; 3729 3730 // Remove the device from the device tree along with its children. 3731 let mut device_tree = self.device_tree.lock().unwrap(); 3732 let pci_device_node = device_tree 3733 .remove_node_by_pci_bdf(pci_device_bdf) 3734 .ok_or(DeviceManagerError::MissingPciDevice)?; 3735 3736 // For VFIO and vfio-user the PCI device id is the id. 3737 // For virtio we overwrite it later as we want the id of the 3738 // underlying device. 3739 let mut id = pci_device_node.id; 3740 let pci_device_handle = pci_device_node 3741 .pci_device_handle 3742 .ok_or(DeviceManagerError::MissingPciDevice)?; 3743 if matches!(pci_device_handle, PciDeviceHandle::Virtio(_)) { 3744 // The virtio-pci device has a single child 3745 if !pci_device_node.children.is_empty() { 3746 assert_eq!(pci_device_node.children.len(), 1); 3747 let child_id = &pci_device_node.children[0]; 3748 id = child_id.clone(); 3749 } 3750 } 3751 for child in pci_device_node.children.iter() { 3752 device_tree.remove(child); 3753 } 3754 3755 let mut iommu_attached = false; 3756 if let Some((_, iommu_attached_devices)) = &self.iommu_attached_devices { 3757 if iommu_attached_devices.contains(&pci_device_bdf) { 3758 iommu_attached = true; 3759 } 3760 } 3761 3762 let (pci_device, bus_device, virtio_device, remove_dma_handler) = match pci_device_handle { 3763 // No need to remove any virtio-mem mapping here as the container outlives all devices 3764 PciDeviceHandle::Vfio(vfio_pci_device) => ( 3765 Arc::clone(&vfio_pci_device) as Arc<Mutex<dyn PciDevice>>, 3766 Arc::clone(&vfio_pci_device) as Arc<Mutex<dyn BusDevice>>, 3767 None as Option<Arc<Mutex<dyn virtio_devices::VirtioDevice>>>, 3768 false, 3769 ), 3770 PciDeviceHandle::Virtio(virtio_pci_device) => { 3771 let dev = virtio_pci_device.lock().unwrap(); 3772 let bar_addr = dev.config_bar_addr(); 3773 for (event, addr) in dev.ioeventfds(bar_addr) { 3774 let io_addr = IoEventAddress::Mmio(addr); 3775 self.address_manager 3776 .vm 3777 .unregister_ioevent(event, &io_addr) 3778 .map_err(|e| DeviceManagerError::UnRegisterIoevent(e.into()))?; 3779 } 3780 3781 if let Some(dma_handler) = dev.dma_handler() { 3782 if !iommu_attached { 3783 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() { 3784 for region in zone.regions() { 3785 let iova = region.start_addr().0; 3786 let size = region.len(); 3787 dma_handler 3788 .unmap(iova, size) 3789 .map_err(DeviceManagerError::VirtioDmaUnmap)?; 3790 } 3791 } 3792 } 3793 } 3794 3795 ( 3796 Arc::clone(&virtio_pci_device) as Arc<Mutex<dyn PciDevice>>, 3797 Arc::clone(&virtio_pci_device) as Arc<Mutex<dyn BusDevice>>, 3798 Some(dev.virtio_device()), 3799 dev.dma_handler().is_some() && !iommu_attached, 3800 ) 3801 } 3802 PciDeviceHandle::VfioUser(vfio_user_pci_device) => { 3803 let mut dev = vfio_user_pci_device.lock().unwrap(); 3804 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() { 3805 for region in zone.regions() { 3806 dev.dma_unmap(region) 3807 .map_err(DeviceManagerError::VfioUserDmaUnmap)?; 3808 } 3809 } 3810 3811 ( 3812 Arc::clone(&vfio_user_pci_device) as Arc<Mutex<dyn PciDevice>>, 3813 Arc::clone(&vfio_user_pci_device) as Arc<Mutex<dyn BusDevice>>, 3814 None as Option<Arc<Mutex<dyn virtio_devices::VirtioDevice>>>, 3815 true, 3816 ) 3817 } 3818 }; 3819 3820 if remove_dma_handler { 3821 for virtio_mem_device in self.virtio_mem_devices.iter() { 3822 virtio_mem_device 3823 .lock() 3824 .unwrap() 3825 .remove_dma_mapping_handler(VirtioMemMappingSource::Device( 3826 pci_device_bdf.into(), 3827 )) 3828 .map_err(DeviceManagerError::RemoveDmaMappingHandlerVirtioMem)?; 3829 } 3830 } 3831 3832 // Free the allocated BARs 3833 pci_device 3834 .lock() 3835 .unwrap() 3836 .free_bars( 3837 &mut self.address_manager.allocator.lock().unwrap(), 3838 &mut self.pci_segments[pci_segment_id as usize] 3839 .allocator 3840 .lock() 3841 .unwrap(), 3842 ) 3843 .map_err(DeviceManagerError::FreePciBars)?; 3844 3845 // Remove the device from the PCI bus 3846 self.pci_segments[pci_segment_id as usize] 3847 .pci_bus 3848 .lock() 3849 .unwrap() 3850 .remove_by_device(&pci_device) 3851 .map_err(DeviceManagerError::RemoveDeviceFromPciBus)?; 3852 3853 #[cfg(target_arch = "x86_64")] 3854 // Remove the device from the IO bus 3855 self.io_bus() 3856 .remove_by_device(&bus_device) 3857 .map_err(DeviceManagerError::RemoveDeviceFromIoBus)?; 3858 3859 // Remove the device from the MMIO bus 3860 self.mmio_bus() 3861 .remove_by_device(&bus_device) 3862 .map_err(DeviceManagerError::RemoveDeviceFromMmioBus)?; 3863 3864 // Remove the device from the list of BusDevice held by the 3865 // DeviceManager. 3866 self.bus_devices 3867 .retain(|dev| !Arc::ptr_eq(dev, &bus_device)); 3868 3869 // Shutdown and remove the underlying virtio-device if present 3870 if let Some(virtio_device) = virtio_device { 3871 for mapping in virtio_device.lock().unwrap().userspace_mappings() { 3872 self.memory_manager 3873 .lock() 3874 .unwrap() 3875 .remove_userspace_mapping( 3876 mapping.addr.raw_value(), 3877 mapping.len, 3878 mapping.host_addr, 3879 mapping.mergeable, 3880 mapping.mem_slot, 3881 ) 3882 .map_err(DeviceManagerError::MemoryManager)?; 3883 } 3884 3885 virtio_device.lock().unwrap().shutdown(); 3886 3887 self.virtio_devices 3888 .retain(|handler| !Arc::ptr_eq(&handler.virtio_device, &virtio_device)); 3889 } 3890 3891 event!( 3892 "vm", 3893 "device-removed", 3894 "id", 3895 &id, 3896 "bdf", 3897 pci_device_bdf.to_string() 3898 ); 3899 3900 // At this point, the device has been removed from all the list and 3901 // buses where it was stored. At the end of this function, after 3902 // any_device, bus_device and pci_device are released, the actual 3903 // device will be dropped. 3904 Ok(()) 3905 } 3906 3907 fn hotplug_virtio_pci_device( 3908 &mut self, 3909 handle: MetaVirtioDevice, 3910 ) -> DeviceManagerResult<PciDeviceInfo> { 3911 // Add the virtio device to the device manager list. This is important 3912 // as the list is used to notify virtio devices about memory updates 3913 // for instance. 3914 self.virtio_devices.push(handle.clone()); 3915 3916 let mapping: Option<Arc<IommuMapping>> = if handle.iommu { 3917 self.iommu_mapping.clone() 3918 } else { 3919 None 3920 }; 3921 3922 let bdf = self.add_virtio_pci_device( 3923 handle.virtio_device, 3924 &mapping, 3925 handle.id.clone(), 3926 handle.pci_segment, 3927 handle.dma_handler, 3928 )?; 3929 3930 // Update the PCIU bitmap 3931 self.pci_segments[handle.pci_segment as usize].pci_devices_up |= 1 << bdf.device(); 3932 3933 Ok(PciDeviceInfo { id: handle.id, bdf }) 3934 } 3935 3936 fn is_iommu_segment(&self, pci_segment_id: u16) -> bool { 3937 self.config 3938 .lock() 3939 .as_ref() 3940 .unwrap() 3941 .platform 3942 .as_ref() 3943 .map(|pc| { 3944 pc.iommu_segments 3945 .as_ref() 3946 .map(|v| v.contains(&pci_segment_id)) 3947 .unwrap_or_default() 3948 }) 3949 .unwrap_or_default() 3950 } 3951 3952 pub fn add_disk(&mut self, disk_cfg: &mut DiskConfig) -> DeviceManagerResult<PciDeviceInfo> { 3953 self.validate_identifier(&disk_cfg.id)?; 3954 3955 if disk_cfg.iommu && !self.is_iommu_segment(disk_cfg.pci_segment) { 3956 return Err(DeviceManagerError::InvalidIommuHotplug); 3957 } 3958 3959 let device = self.make_virtio_block_device(disk_cfg)?; 3960 self.hotplug_virtio_pci_device(device) 3961 } 3962 3963 pub fn add_fs(&mut self, fs_cfg: &mut FsConfig) -> DeviceManagerResult<PciDeviceInfo> { 3964 self.validate_identifier(&fs_cfg.id)?; 3965 3966 let device = self.make_virtio_fs_device(fs_cfg)?; 3967 self.hotplug_virtio_pci_device(device) 3968 } 3969 3970 pub fn add_pmem(&mut self, pmem_cfg: &mut PmemConfig) -> DeviceManagerResult<PciDeviceInfo> { 3971 self.validate_identifier(&pmem_cfg.id)?; 3972 3973 if pmem_cfg.iommu && !self.is_iommu_segment(pmem_cfg.pci_segment) { 3974 return Err(DeviceManagerError::InvalidIommuHotplug); 3975 } 3976 3977 let device = self.make_virtio_pmem_device(pmem_cfg)?; 3978 self.hotplug_virtio_pci_device(device) 3979 } 3980 3981 pub fn add_net(&mut self, net_cfg: &mut NetConfig) -> DeviceManagerResult<PciDeviceInfo> { 3982 self.validate_identifier(&net_cfg.id)?; 3983 3984 if net_cfg.iommu && !self.is_iommu_segment(net_cfg.pci_segment) { 3985 return Err(DeviceManagerError::InvalidIommuHotplug); 3986 } 3987 3988 let device = self.make_virtio_net_device(net_cfg)?; 3989 self.hotplug_virtio_pci_device(device) 3990 } 3991 3992 pub fn add_vdpa(&mut self, vdpa_cfg: &mut VdpaConfig) -> DeviceManagerResult<PciDeviceInfo> { 3993 self.validate_identifier(&vdpa_cfg.id)?; 3994 3995 if vdpa_cfg.iommu && !self.is_iommu_segment(vdpa_cfg.pci_segment) { 3996 return Err(DeviceManagerError::InvalidIommuHotplug); 3997 } 3998 3999 let device = self.make_vdpa_device(vdpa_cfg)?; 4000 self.hotplug_virtio_pci_device(device) 4001 } 4002 4003 pub fn add_vsock(&mut self, vsock_cfg: &mut VsockConfig) -> DeviceManagerResult<PciDeviceInfo> { 4004 self.validate_identifier(&vsock_cfg.id)?; 4005 4006 if vsock_cfg.iommu && !self.is_iommu_segment(vsock_cfg.pci_segment) { 4007 return Err(DeviceManagerError::InvalidIommuHotplug); 4008 } 4009 4010 let device = self.make_virtio_vsock_device(vsock_cfg)?; 4011 self.hotplug_virtio_pci_device(device) 4012 } 4013 4014 pub fn counters(&self) -> HashMap<String, HashMap<&'static str, Wrapping<u64>>> { 4015 let mut counters = HashMap::new(); 4016 4017 for handle in &self.virtio_devices { 4018 let virtio_device = handle.virtio_device.lock().unwrap(); 4019 if let Some(device_counters) = virtio_device.counters() { 4020 counters.insert(handle.id.clone(), device_counters.clone()); 4021 } 4022 } 4023 4024 counters 4025 } 4026 4027 pub fn resize_balloon(&mut self, size: u64) -> DeviceManagerResult<()> { 4028 if let Some(balloon) = &self.balloon { 4029 return balloon 4030 .lock() 4031 .unwrap() 4032 .resize(size) 4033 .map_err(DeviceManagerError::VirtioBalloonResize); 4034 } 4035 4036 warn!("No balloon setup: Can't resize the balloon"); 4037 Err(DeviceManagerError::MissingVirtioBalloon) 4038 } 4039 4040 pub fn balloon_size(&self) -> u64 { 4041 if let Some(balloon) = &self.balloon { 4042 return balloon.lock().unwrap().get_actual(); 4043 } 4044 4045 0 4046 } 4047 4048 pub fn device_tree(&self) -> Arc<Mutex<DeviceTree>> { 4049 self.device_tree.clone() 4050 } 4051 4052 pub fn restore_devices( 4053 &mut self, 4054 snapshot: Snapshot, 4055 ) -> std::result::Result<(), MigratableError> { 4056 // Finally, restore all devices associated with the DeviceManager. 4057 // It's important to restore devices in the right order, that's why 4058 // the device tree is the right way to ensure we restore a child before 4059 // its parent node. 4060 for node in self 4061 .device_tree 4062 .lock() 4063 .unwrap() 4064 .breadth_first_traversal() 4065 .rev() 4066 { 4067 // Restore the node 4068 if let Some(migratable) = &node.migratable { 4069 info!("Restoring {} from DeviceManager", node.id); 4070 if let Some(snapshot) = snapshot.snapshots.get(&node.id) { 4071 migratable.lock().unwrap().pause()?; 4072 migratable.lock().unwrap().restore(*snapshot.clone())?; 4073 } else { 4074 return Err(MigratableError::Restore(anyhow!( 4075 "Missing device {}", 4076 node.id 4077 ))); 4078 } 4079 } 4080 } 4081 4082 // The devices have been fully restored, we can now update the 4083 // restoring state of the DeviceManager. 4084 self.restoring = false; 4085 4086 Ok(()) 4087 } 4088 4089 #[cfg(target_arch = "x86_64")] 4090 pub fn notify_power_button(&self) -> DeviceManagerResult<()> { 4091 self.ged_notification_device 4092 .as_ref() 4093 .unwrap() 4094 .lock() 4095 .unwrap() 4096 .notify(AcpiNotificationFlags::POWER_BUTTON_CHANGED) 4097 .map_err(DeviceManagerError::PowerButtonNotification) 4098 } 4099 4100 #[cfg(target_arch = "aarch64")] 4101 pub fn notify_power_button(&self) -> DeviceManagerResult<()> { 4102 // There are two use cases: 4103 // 1. Users will use direct kernel boot with device tree. 4104 // 2. Users will use ACPI+UEFI boot. 4105 4106 // Trigger a GPIO pin 3 event to satisify use case 1. 4107 self.gpio_device 4108 .as_ref() 4109 .unwrap() 4110 .lock() 4111 .unwrap() 4112 .trigger_key(3) 4113 .map_err(DeviceManagerError::AArch64PowerButtonNotification)?; 4114 // Trigger a GED power button event to satisify use case 2. 4115 return self 4116 .ged_notification_device 4117 .as_ref() 4118 .unwrap() 4119 .lock() 4120 .unwrap() 4121 .notify(AcpiNotificationFlags::POWER_BUTTON_CHANGED) 4122 .map_err(DeviceManagerError::PowerButtonNotification); 4123 } 4124 4125 pub fn iommu_attached_devices(&self) -> &Option<(PciBdf, Vec<PciBdf>)> { 4126 &self.iommu_attached_devices 4127 } 4128 4129 #[cfg(target_arch = "aarch64")] 4130 pub fn uefi_flash(&self) -> GuestMemoryAtomic<GuestMemoryMmap> { 4131 self.uefi_flash.as_ref().unwrap().clone() 4132 } 4133 4134 fn validate_identifier(&self, id: &Option<String>) -> DeviceManagerResult<()> { 4135 if let Some(id) = id { 4136 if id.starts_with("__") { 4137 return Err(DeviceManagerError::InvalidIdentifier(id.clone())); 4138 } 4139 4140 if self.device_tree.lock().unwrap().contains_key(id) { 4141 return Err(DeviceManagerError::IdentifierNotUnique(id.clone())); 4142 } 4143 } 4144 4145 Ok(()) 4146 } 4147 4148 pub(crate) fn acpi_platform_addresses(&self) -> &AcpiPlatformAddresses { 4149 &self.acpi_platform_addresses 4150 } 4151 } 4152 4153 fn numa_node_id_from_memory_zone_id(numa_nodes: &NumaNodes, memory_zone_id: &str) -> Option<u32> { 4154 for (numa_node_id, numa_node) in numa_nodes.iter() { 4155 if numa_node.memory_zones.contains(&memory_zone_id.to_owned()) { 4156 return Some(*numa_node_id); 4157 } 4158 } 4159 4160 None 4161 } 4162 4163 impl Aml for DeviceManager { 4164 fn append_aml_bytes(&self, bytes: &mut Vec<u8>) { 4165 #[cfg(target_arch = "aarch64")] 4166 use arch::aarch64::DeviceInfoForFdt; 4167 4168 let mut pci_scan_methods = Vec::new(); 4169 for i in 0..self.pci_segments.len() { 4170 pci_scan_methods.push(aml::MethodCall::new( 4171 format!("\\_SB_.PCI{:X}.PCNT", i).as_str().into(), 4172 vec![], 4173 )); 4174 } 4175 let mut pci_scan_inner: Vec<&dyn Aml> = Vec::new(); 4176 for method in &pci_scan_methods { 4177 pci_scan_inner.push(method) 4178 } 4179 4180 // PCI hotplug controller 4181 aml::Device::new( 4182 "_SB_.PHPR".into(), 4183 vec![ 4184 &aml::Name::new("_HID".into(), &aml::EisaName::new("PNP0A06")), 4185 &aml::Name::new("_STA".into(), &0x0bu8), 4186 &aml::Name::new("_UID".into(), &"PCI Hotplug Controller"), 4187 &aml::Mutex::new("BLCK".into(), 0), 4188 &aml::Name::new( 4189 "_CRS".into(), 4190 &aml::ResourceTemplate::new(vec![&aml::AddressSpace::new_memory( 4191 aml::AddressSpaceCachable::NotCacheable, 4192 true, 4193 self.acpi_address.0 as u64, 4194 self.acpi_address.0 + DEVICE_MANAGER_ACPI_SIZE as u64 - 1, 4195 )]), 4196 ), 4197 // OpRegion and Fields map MMIO range into individual field values 4198 &aml::OpRegion::new( 4199 "PCST".into(), 4200 aml::OpRegionSpace::SystemMemory, 4201 self.acpi_address.0 as usize, 4202 DEVICE_MANAGER_ACPI_SIZE, 4203 ), 4204 &aml::Field::new( 4205 "PCST".into(), 4206 aml::FieldAccessType::DWord, 4207 aml::FieldUpdateRule::WriteAsZeroes, 4208 vec![ 4209 aml::FieldEntry::Named(*b"PCIU", 32), 4210 aml::FieldEntry::Named(*b"PCID", 32), 4211 aml::FieldEntry::Named(*b"B0EJ", 32), 4212 aml::FieldEntry::Named(*b"PSEG", 32), 4213 ], 4214 ), 4215 &aml::Method::new( 4216 "PCEJ".into(), 4217 2, 4218 true, 4219 vec![ 4220 // Take lock defined above 4221 &aml::Acquire::new("BLCK".into(), 0xffff), 4222 // Choose the current segment 4223 &aml::Store::new(&aml::Path::new("PSEG"), &aml::Arg(1)), 4224 // Write PCI bus number (in first argument) to I/O port via field 4225 &aml::ShiftLeft::new(&aml::Path::new("B0EJ"), &aml::ONE, &aml::Arg(0)), 4226 // Release lock 4227 &aml::Release::new("BLCK".into()), 4228 // Return 0 4229 &aml::Return::new(&aml::ZERO), 4230 ], 4231 ), 4232 &aml::Method::new("PSCN".into(), 0, true, pci_scan_inner), 4233 ], 4234 ) 4235 .append_aml_bytes(bytes); 4236 4237 for segment in &self.pci_segments { 4238 segment.append_aml_bytes(bytes); 4239 } 4240 4241 let mut mbrd_memory = Vec::new(); 4242 4243 for segment in &self.pci_segments { 4244 mbrd_memory.push(aml::Memory32Fixed::new( 4245 true, 4246 segment.mmio_config_address as u32, 4247 layout::PCI_MMIO_CONFIG_SIZE_PER_SEGMENT as u32, 4248 )) 4249 } 4250 4251 let mut mbrd_memory_refs = Vec::new(); 4252 for mbrd_memory_ref in &mbrd_memory { 4253 mbrd_memory_refs.push(mbrd_memory_ref as &dyn Aml); 4254 } 4255 4256 aml::Device::new( 4257 "_SB_.MBRD".into(), 4258 vec![ 4259 &aml::Name::new("_HID".into(), &aml::EisaName::new("PNP0C02")), 4260 &aml::Name::new("_UID".into(), &aml::ZERO), 4261 &aml::Name::new("_CRS".into(), &aml::ResourceTemplate::new(mbrd_memory_refs)), 4262 ], 4263 ) 4264 .append_aml_bytes(bytes); 4265 4266 // Serial device 4267 #[cfg(target_arch = "x86_64")] 4268 let serial_irq = 4; 4269 #[cfg(target_arch = "aarch64")] 4270 let serial_irq = 4271 if self.config.lock().unwrap().serial.clone().mode != ConsoleOutputMode::Off { 4272 self.get_device_info() 4273 .clone() 4274 .get(&(DeviceType::Serial, DeviceType::Serial.to_string())) 4275 .unwrap() 4276 .irq() 4277 } else { 4278 // If serial is turned off, add a fake device with invalid irq. 4279 31 4280 }; 4281 if self.config.lock().unwrap().serial.mode != ConsoleOutputMode::Off { 4282 aml::Device::new( 4283 "_SB_.COM1".into(), 4284 vec![ 4285 &aml::Name::new( 4286 "_HID".into(), 4287 #[cfg(target_arch = "x86_64")] 4288 &aml::EisaName::new("PNP0501"), 4289 #[cfg(target_arch = "aarch64")] 4290 &"ARMH0011", 4291 ), 4292 &aml::Name::new("_UID".into(), &aml::ZERO), 4293 &aml::Name::new("_DDN".into(), &"COM1"), 4294 &aml::Name::new( 4295 "_CRS".into(), 4296 &aml::ResourceTemplate::new(vec![ 4297 &aml::Interrupt::new(true, true, false, false, serial_irq), 4298 #[cfg(target_arch = "x86_64")] 4299 &aml::Io::new(0x3f8, 0x3f8, 0, 0x8), 4300 #[cfg(target_arch = "aarch64")] 4301 &aml::Memory32Fixed::new( 4302 true, 4303 arch::layout::LEGACY_SERIAL_MAPPED_IO_START.raw_value() as u32, 4304 MMIO_LEN as u32, 4305 ), 4306 ]), 4307 ), 4308 ], 4309 ) 4310 .append_aml_bytes(bytes); 4311 } 4312 4313 aml::Name::new("_S5_".into(), &aml::Package::new(vec![&5u8])).append_aml_bytes(bytes); 4314 4315 aml::Device::new( 4316 "_SB_.PWRB".into(), 4317 vec![ 4318 &aml::Name::new("_HID".into(), &aml::EisaName::new("PNP0C0C")), 4319 &aml::Name::new("_UID".into(), &aml::ZERO), 4320 ], 4321 ) 4322 .append_aml_bytes(bytes); 4323 4324 self.ged_notification_device 4325 .as_ref() 4326 .unwrap() 4327 .lock() 4328 .unwrap() 4329 .append_aml_bytes(bytes); 4330 } 4331 } 4332 4333 impl Pausable for DeviceManager { 4334 fn pause(&mut self) -> result::Result<(), MigratableError> { 4335 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4336 if let Some(migratable) = &device_node.migratable { 4337 migratable.lock().unwrap().pause()?; 4338 } 4339 } 4340 // On AArch64, the pause of device manager needs to trigger 4341 // a "pause" of GIC, which will flush the GIC pending tables 4342 // and ITS tables to guest RAM. 4343 #[cfg(target_arch = "aarch64")] 4344 { 4345 self.get_interrupt_controller() 4346 .unwrap() 4347 .lock() 4348 .unwrap() 4349 .pause()?; 4350 }; 4351 4352 Ok(()) 4353 } 4354 4355 fn resume(&mut self) -> result::Result<(), MigratableError> { 4356 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4357 if let Some(migratable) = &device_node.migratable { 4358 migratable.lock().unwrap().resume()?; 4359 } 4360 } 4361 4362 Ok(()) 4363 } 4364 } 4365 4366 impl Snapshottable for DeviceManager { 4367 fn id(&self) -> String { 4368 DEVICE_MANAGER_SNAPSHOT_ID.to_string() 4369 } 4370 4371 fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> { 4372 let mut snapshot = Snapshot::new(DEVICE_MANAGER_SNAPSHOT_ID); 4373 4374 // We aggregate all devices snapshots. 4375 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4376 if let Some(migratable) = &device_node.migratable { 4377 let device_snapshot = migratable.lock().unwrap().snapshot()?; 4378 snapshot.add_snapshot(device_snapshot); 4379 } 4380 } 4381 4382 // Then we store the DeviceManager state. 4383 snapshot.add_data_section(SnapshotDataSection::new_from_state( 4384 DEVICE_MANAGER_SNAPSHOT_ID, 4385 &self.state(), 4386 )?); 4387 4388 Ok(snapshot) 4389 } 4390 4391 fn restore(&mut self, snapshot: Snapshot) -> std::result::Result<(), MigratableError> { 4392 // Let's first restore the DeviceManager. 4393 4394 self.set_state(&snapshot.to_state(DEVICE_MANAGER_SNAPSHOT_ID)?); 4395 4396 // Now that DeviceManager is updated with the right states, it's time 4397 // to create the devices based on the configuration. 4398 self.create_devices(None, None, None) 4399 .map_err(|e| MigratableError::Restore(anyhow!("Could not create devices {:?}", e)))?; 4400 4401 Ok(()) 4402 } 4403 } 4404 4405 impl Transportable for DeviceManager {} 4406 4407 impl Migratable for DeviceManager { 4408 fn start_dirty_log(&mut self) -> std::result::Result<(), MigratableError> { 4409 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4410 if let Some(migratable) = &device_node.migratable { 4411 migratable.lock().unwrap().start_dirty_log()?; 4412 } 4413 } 4414 Ok(()) 4415 } 4416 4417 fn stop_dirty_log(&mut self) -> std::result::Result<(), MigratableError> { 4418 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4419 if let Some(migratable) = &device_node.migratable { 4420 migratable.lock().unwrap().stop_dirty_log()?; 4421 } 4422 } 4423 Ok(()) 4424 } 4425 4426 fn dirty_log(&mut self) -> std::result::Result<MemoryRangeTable, MigratableError> { 4427 let mut tables = Vec::new(); 4428 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4429 if let Some(migratable) = &device_node.migratable { 4430 tables.push(migratable.lock().unwrap().dirty_log()?); 4431 } 4432 } 4433 Ok(MemoryRangeTable::new_from_tables(tables)) 4434 } 4435 4436 fn start_migration(&mut self) -> std::result::Result<(), MigratableError> { 4437 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4438 if let Some(migratable) = &device_node.migratable { 4439 migratable.lock().unwrap().start_migration()?; 4440 } 4441 } 4442 Ok(()) 4443 } 4444 4445 fn complete_migration(&mut self) -> std::result::Result<(), MigratableError> { 4446 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4447 if let Some(migratable) = &device_node.migratable { 4448 migratable.lock().unwrap().complete_migration()?; 4449 } 4450 } 4451 Ok(()) 4452 } 4453 } 4454 4455 const PCIU_FIELD_OFFSET: u64 = 0; 4456 const PCID_FIELD_OFFSET: u64 = 4; 4457 const B0EJ_FIELD_OFFSET: u64 = 8; 4458 const PSEG_FIELD_OFFSET: u64 = 12; 4459 const PCIU_FIELD_SIZE: usize = 4; 4460 const PCID_FIELD_SIZE: usize = 4; 4461 const B0EJ_FIELD_SIZE: usize = 4; 4462 const PSEG_FIELD_SIZE: usize = 4; 4463 4464 impl BusDevice for DeviceManager { 4465 fn read(&mut self, base: u64, offset: u64, data: &mut [u8]) { 4466 match offset { 4467 PCIU_FIELD_OFFSET => { 4468 assert!(data.len() == PCIU_FIELD_SIZE); 4469 data.copy_from_slice( 4470 &self.pci_segments[self.selected_segment] 4471 .pci_devices_up 4472 .to_le_bytes(), 4473 ); 4474 // Clear the PCIU bitmap 4475 self.pci_segments[self.selected_segment].pci_devices_up = 0; 4476 } 4477 PCID_FIELD_OFFSET => { 4478 assert!(data.len() == PCID_FIELD_SIZE); 4479 data.copy_from_slice( 4480 &self.pci_segments[self.selected_segment] 4481 .pci_devices_down 4482 .to_le_bytes(), 4483 ); 4484 // Clear the PCID bitmap 4485 self.pci_segments[self.selected_segment].pci_devices_down = 0; 4486 } 4487 B0EJ_FIELD_OFFSET => { 4488 assert!(data.len() == B0EJ_FIELD_SIZE); 4489 // Always return an empty bitmap since the eject is always 4490 // taken care of right away during a write access. 4491 data.fill(0); 4492 } 4493 PSEG_FIELD_OFFSET => { 4494 assert_eq!(data.len(), PSEG_FIELD_SIZE); 4495 data.copy_from_slice(&(self.selected_segment as u32).to_le_bytes()); 4496 } 4497 _ => error!( 4498 "Accessing unknown location at base 0x{:x}, offset 0x{:x}", 4499 base, offset 4500 ), 4501 } 4502 4503 debug!( 4504 "PCI_HP_REG_R: base 0x{:x}, offset 0x{:x}, data {:?}", 4505 base, offset, data 4506 ) 4507 } 4508 4509 fn write(&mut self, base: u64, offset: u64, data: &[u8]) -> Option<Arc<std::sync::Barrier>> { 4510 match offset { 4511 B0EJ_FIELD_OFFSET => { 4512 assert!(data.len() == B0EJ_FIELD_SIZE); 4513 let mut data_array: [u8; 4] = [0, 0, 0, 0]; 4514 data_array.copy_from_slice(data); 4515 let mut slot_bitmap = u32::from_le_bytes(data_array); 4516 4517 while slot_bitmap > 0 { 4518 let slot_id = slot_bitmap.trailing_zeros(); 4519 if let Err(e) = self.eject_device(self.selected_segment as u16, slot_id as u8) { 4520 error!("Failed ejecting device {}: {:?}", slot_id, e); 4521 } 4522 slot_bitmap &= !(1 << slot_id); 4523 } 4524 } 4525 PSEG_FIELD_OFFSET => { 4526 assert_eq!(data.len(), PSEG_FIELD_SIZE); 4527 let mut data_array: [u8; 4] = [0, 0, 0, 0]; 4528 data_array.copy_from_slice(data); 4529 let selected_segment = u32::from_le_bytes(data_array) as usize; 4530 if selected_segment >= self.pci_segments.len() { 4531 error!( 4532 "Segment selection out of range: {} >= {}", 4533 selected_segment, 4534 self.pci_segments.len() 4535 ); 4536 return None; 4537 } 4538 self.selected_segment = selected_segment; 4539 } 4540 _ => error!( 4541 "Accessing unknown location at base 0x{:x}, offset 0x{:x}", 4542 base, offset 4543 ), 4544 } 4545 4546 debug!( 4547 "PCI_HP_REG_W: base 0x{:x}, offset 0x{:x}, data {:?}", 4548 base, offset, data 4549 ); 4550 4551 None 4552 } 4553 } 4554 4555 impl Drop for DeviceManager { 4556 fn drop(&mut self) { 4557 for handle in self.virtio_devices.drain(..) { 4558 handle.virtio_device.lock().unwrap().shutdown(); 4559 } 4560 } 4561 } 4562