1 // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 // 3 // Portions Copyright 2017 The Chromium OS Authors. All rights reserved. 4 // Use of this source code is governed by a BSD-style license that can be 5 // found in the LICENSE-BSD-3-Clause file. 6 // 7 // Copyright © 2019 Intel Corporation 8 // 9 // SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause 10 // 11 12 use crate::config::{ 13 ConsoleOutputMode, DeviceConfig, DiskConfig, FsConfig, NetConfig, PmemConfig, UserDeviceConfig, 14 VdpaConfig, VhostMode, VmConfig, VsockConfig, 15 }; 16 use crate::device_tree::{DeviceNode, DeviceTree}; 17 use crate::interrupt::LegacyUserspaceInterruptManager; 18 use crate::interrupt::MsiInterruptManager; 19 use crate::memory_manager::{Error as MemoryManagerError, MemoryManager, MEMORY_MANAGER_ACPI_SIZE}; 20 use crate::pci_segment::PciSegment; 21 use crate::seccomp_filters::{get_seccomp_filter, Thread}; 22 use crate::serial_manager::{Error as SerialManagerError, SerialManager}; 23 use crate::sigwinch_listener::start_sigwinch_listener; 24 use crate::GuestRegionMmap; 25 use crate::PciDeviceInfo; 26 use crate::{device_node, DEVICE_MANAGER_SNAPSHOT_ID}; 27 use acpi_tables::sdt::GenericAddress; 28 use acpi_tables::{aml, aml::Aml}; 29 use anyhow::anyhow; 30 use arch::layout; 31 #[cfg(target_arch = "x86_64")] 32 use arch::layout::{APIC_START, IOAPIC_SIZE, IOAPIC_START}; 33 use arch::NumaNodes; 34 #[cfg(target_arch = "aarch64")] 35 use arch::{DeviceType, MmioDeviceInfo}; 36 use block_util::{ 37 async_io::DiskFile, block_io_uring_is_supported, detect_image_type, 38 fixed_vhd_async::FixedVhdDiskAsync, fixed_vhd_sync::FixedVhdDiskSync, qcow_sync::QcowDiskSync, 39 raw_async::RawFileDisk, raw_sync::RawFileDiskSync, vhdx_sync::VhdxDiskSync, ImageType, 40 }; 41 #[cfg(target_arch = "aarch64")] 42 use devices::gic; 43 #[cfg(target_arch = "x86_64")] 44 use devices::ioapic; 45 #[cfg(target_arch = "aarch64")] 46 use devices::legacy::Pl011; 47 #[cfg(target_arch = "x86_64")] 48 use devices::legacy::Serial; 49 use devices::{ 50 interrupt_controller, interrupt_controller::InterruptController, AcpiNotificationFlags, 51 }; 52 use hypervisor::{HypervisorType, IoEventAddress}; 53 use libc::{ 54 cfmakeraw, isatty, tcgetattr, tcsetattr, termios, MAP_NORESERVE, MAP_PRIVATE, MAP_SHARED, 55 O_TMPFILE, PROT_READ, PROT_WRITE, TCSANOW, 56 }; 57 #[cfg(target_arch = "x86_64")] 58 use pci::PciConfigIo; 59 use pci::{ 60 DeviceRelocation, PciBarRegionType, PciBdf, PciDevice, VfioPciDevice, VfioUserDmaMapping, 61 VfioUserPciDevice, VfioUserPciDeviceError, 62 }; 63 use seccompiler::SeccompAction; 64 use serde::{Deserialize, Serialize}; 65 use std::collections::{BTreeSet, HashMap}; 66 use std::convert::TryInto; 67 use std::fs::{read_link, File, OpenOptions}; 68 use std::io::{self, stdout, Seek, SeekFrom}; 69 use std::mem::zeroed; 70 use std::num::Wrapping; 71 use std::os::unix::fs::OpenOptionsExt; 72 use std::os::unix::io::{AsRawFd, FromRawFd, RawFd}; 73 use std::path::PathBuf; 74 use std::result; 75 use std::sync::{Arc, Mutex}; 76 use std::time::Instant; 77 use tracer::trace_scoped; 78 use vfio_ioctls::{VfioContainer, VfioDevice, VfioDeviceFd}; 79 use virtio_devices::transport::VirtioTransport; 80 use virtio_devices::transport::{VirtioPciDevice, VirtioPciDeviceActivator}; 81 use virtio_devices::vhost_user::VhostUserConfig; 82 use virtio_devices::{ 83 AccessPlatformMapping, ActivateError, VdpaDmaMapping, VirtioMemMappingSource, 84 }; 85 use virtio_devices::{Endpoint, IommuMapping}; 86 use vm_allocator::{AddressAllocator, SystemAllocator}; 87 use vm_device::dma_mapping::vfio::VfioDmaMapping; 88 use vm_device::dma_mapping::ExternalDmaMapping; 89 use vm_device::interrupt::{ 90 InterruptIndex, InterruptManager, LegacyIrqGroupConfig, MsiIrqGroupConfig, 91 }; 92 use vm_device::{Bus, BusDevice, Resource}; 93 use vm_memory::guest_memory::FileOffset; 94 use vm_memory::GuestMemoryRegion; 95 use vm_memory::{Address, GuestAddress, GuestUsize, MmapRegion}; 96 #[cfg(target_arch = "x86_64")] 97 use vm_memory::{GuestAddressSpace, GuestMemory}; 98 use vm_migration::{ 99 protocol::MemoryRangeTable, Migratable, MigratableError, Pausable, Snapshot, 100 SnapshotDataSection, Snapshottable, Transportable, 101 }; 102 use vm_virtio::AccessPlatform; 103 use vm_virtio::VirtioDeviceType; 104 use vmm_sys_util::eventfd::EventFd; 105 106 #[cfg(target_arch = "aarch64")] 107 const MMIO_LEN: u64 = 0x1000; 108 109 // Singleton devices / devices the user cannot name 110 #[cfg(target_arch = "x86_64")] 111 const IOAPIC_DEVICE_NAME: &str = "__ioapic"; 112 const SERIAL_DEVICE_NAME: &str = "__serial"; 113 #[cfg(target_arch = "aarch64")] 114 const GPIO_DEVICE_NAME: &str = "__gpio"; 115 const RNG_DEVICE_NAME: &str = "__rng"; 116 const IOMMU_DEVICE_NAME: &str = "__iommu"; 117 const BALLOON_DEVICE_NAME: &str = "__balloon"; 118 const CONSOLE_DEVICE_NAME: &str = "__console"; 119 120 // Devices that the user may name and for which we generate 121 // identifiers if the user doesn't give one 122 const DISK_DEVICE_NAME_PREFIX: &str = "_disk"; 123 const FS_DEVICE_NAME_PREFIX: &str = "_fs"; 124 const NET_DEVICE_NAME_PREFIX: &str = "_net"; 125 const PMEM_DEVICE_NAME_PREFIX: &str = "_pmem"; 126 const VDPA_DEVICE_NAME_PREFIX: &str = "_vdpa"; 127 const VSOCK_DEVICE_NAME_PREFIX: &str = "_vsock"; 128 const WATCHDOG_DEVICE_NAME: &str = "__watchdog"; 129 const VFIO_DEVICE_NAME_PREFIX: &str = "_vfio"; 130 const VFIO_USER_DEVICE_NAME_PREFIX: &str = "_vfio_user"; 131 const VIRTIO_PCI_DEVICE_NAME_PREFIX: &str = "_virtio-pci"; 132 133 /// Errors associated with device manager 134 #[derive(Debug)] 135 pub enum DeviceManagerError { 136 /// Cannot create EventFd. 137 EventFd(io::Error), 138 139 /// Cannot open disk path 140 Disk(io::Error), 141 142 /// Cannot create vhost-user-net device 143 CreateVhostUserNet(virtio_devices::vhost_user::Error), 144 145 /// Cannot create virtio-blk device 146 CreateVirtioBlock(io::Error), 147 148 /// Cannot create virtio-net device 149 CreateVirtioNet(virtio_devices::net::Error), 150 151 /// Cannot create virtio-console device 152 CreateVirtioConsole(io::Error), 153 154 /// Cannot create virtio-rng device 155 CreateVirtioRng(io::Error), 156 157 /// Cannot create virtio-fs device 158 CreateVirtioFs(virtio_devices::vhost_user::Error), 159 160 /// Virtio-fs device was created without a socket. 161 NoVirtioFsSock, 162 163 /// Cannot create vhost-user-blk device 164 CreateVhostUserBlk(virtio_devices::vhost_user::Error), 165 166 /// Cannot create virtio-pmem device 167 CreateVirtioPmem(io::Error), 168 169 /// Cannot create vDPA device 170 CreateVdpa(virtio_devices::vdpa::Error), 171 172 /// Cannot create virtio-vsock device 173 CreateVirtioVsock(io::Error), 174 175 /// Failed to convert Path to &str for the vDPA device. 176 CreateVdpaConvertPath, 177 178 /// Failed to convert Path to &str for the virtio-vsock device. 179 CreateVsockConvertPath, 180 181 /// Cannot create virtio-vsock backend 182 CreateVsockBackend(virtio_devices::vsock::VsockUnixError), 183 184 /// Cannot create virtio-iommu device 185 CreateVirtioIommu(io::Error), 186 187 /// Cannot create virtio-balloon device 188 CreateVirtioBalloon(io::Error), 189 190 /// Cannot create virtio-watchdog device 191 CreateVirtioWatchdog(io::Error), 192 193 /// Failed to parse disk image format 194 DetectImageType(io::Error), 195 196 /// Cannot open qcow disk path 197 QcowDeviceCreate(qcow::Error), 198 199 /// Cannot create serial manager 200 CreateSerialManager(SerialManagerError), 201 202 /// Cannot spawn the serial manager thread 203 SpawnSerialManager(SerialManagerError), 204 205 /// Cannot open tap interface 206 OpenTap(net_util::TapError), 207 208 /// Cannot allocate IRQ. 209 AllocateIrq, 210 211 /// Cannot configure the IRQ. 212 Irq(vmm_sys_util::errno::Error), 213 214 /// Cannot allocate PCI BARs 215 AllocateBars(pci::PciDeviceError), 216 217 /// Could not free the BARs associated with a PCI device. 218 FreePciBars(pci::PciDeviceError), 219 220 /// Cannot register ioevent. 221 RegisterIoevent(anyhow::Error), 222 223 /// Cannot unregister ioevent. 224 UnRegisterIoevent(anyhow::Error), 225 226 /// Cannot create virtio device 227 VirtioDevice(vmm_sys_util::errno::Error), 228 229 /// Cannot add PCI device 230 AddPciDevice(pci::PciRootError), 231 232 /// Cannot open persistent memory file 233 PmemFileOpen(io::Error), 234 235 /// Cannot set persistent memory file size 236 PmemFileSetLen(io::Error), 237 238 /// Cannot find a memory range for persistent memory 239 PmemRangeAllocation, 240 241 /// Cannot find a memory range for virtio-fs 242 FsRangeAllocation, 243 244 /// Error creating serial output file 245 SerialOutputFileOpen(io::Error), 246 247 /// Error creating console output file 248 ConsoleOutputFileOpen(io::Error), 249 250 /// Error creating serial pty 251 SerialPtyOpen(io::Error), 252 253 /// Error creating console pty 254 ConsolePtyOpen(io::Error), 255 256 /// Error setting pty raw mode 257 SetPtyRaw(vmm_sys_util::errno::Error), 258 259 /// Error getting pty peer 260 GetPtyPeer(vmm_sys_util::errno::Error), 261 262 /// Cannot create a VFIO device 263 VfioCreate(vfio_ioctls::VfioError), 264 265 /// Cannot create a VFIO PCI device 266 VfioPciCreate(pci::VfioPciError), 267 268 /// Failed to map VFIO MMIO region. 269 VfioMapRegion(pci::VfioPciError), 270 271 /// Failed to DMA map VFIO device. 272 VfioDmaMap(vfio_ioctls::VfioError), 273 274 /// Failed to DMA unmap VFIO device. 275 VfioDmaUnmap(pci::VfioPciError), 276 277 /// Failed to create the passthrough device. 278 CreatePassthroughDevice(anyhow::Error), 279 280 /// Failed to memory map. 281 Mmap(io::Error), 282 283 /// Cannot add legacy device to Bus. 284 BusError(vm_device::BusError), 285 286 /// Failed to allocate IO port 287 AllocateIoPort, 288 289 /// Failed to allocate MMIO address 290 AllocateMmioAddress, 291 292 /// Failed to make hotplug notification 293 HotPlugNotification(io::Error), 294 295 /// Error from a memory manager operation 296 MemoryManager(MemoryManagerError), 297 298 /// Failed to create new interrupt source group. 299 CreateInterruptGroup(io::Error), 300 301 /// Failed to update interrupt source group. 302 UpdateInterruptGroup(io::Error), 303 304 /// Failed to create interrupt controller. 305 CreateInterruptController(interrupt_controller::Error), 306 307 /// Failed to create a new MmapRegion instance. 308 NewMmapRegion(vm_memory::mmap::MmapRegionError), 309 310 /// Failed to clone a File. 311 CloneFile(io::Error), 312 313 /// Failed to create socket file 314 CreateSocketFile(io::Error), 315 316 /// Failed to spawn the network backend 317 SpawnNetBackend(io::Error), 318 319 /// Failed to spawn the block backend 320 SpawnBlockBackend(io::Error), 321 322 /// Missing PCI bus. 323 NoPciBus, 324 325 /// Could not find an available device name. 326 NoAvailableDeviceName, 327 328 /// Missing PCI device. 329 MissingPciDevice, 330 331 /// Failed to remove a PCI device from the PCI bus. 332 RemoveDeviceFromPciBus(pci::PciRootError), 333 334 /// Failed to remove a bus device from the IO bus. 335 RemoveDeviceFromIoBus(vm_device::BusError), 336 337 /// Failed to remove a bus device from the MMIO bus. 338 RemoveDeviceFromMmioBus(vm_device::BusError), 339 340 /// Failed to find the device corresponding to a specific PCI b/d/f. 341 UnknownPciBdf(u32), 342 343 /// Not allowed to remove this type of device from the VM. 344 RemovalNotAllowed(vm_virtio::VirtioDeviceType), 345 346 /// Failed to find device corresponding to the given identifier. 347 UnknownDeviceId(String), 348 349 /// Failed to find an available PCI device ID. 350 NextPciDeviceId(pci::PciRootError), 351 352 /// Could not reserve the PCI device ID. 353 GetPciDeviceId(pci::PciRootError), 354 355 /// Could not give the PCI device ID back. 356 PutPciDeviceId(pci::PciRootError), 357 358 /// No disk path was specified when one was expected 359 NoDiskPath, 360 361 /// Failed to update guest memory for virtio device. 362 UpdateMemoryForVirtioDevice(virtio_devices::Error), 363 364 /// Cannot create virtio-mem device 365 CreateVirtioMem(io::Error), 366 367 /// Cannot find a memory range for virtio-mem memory 368 VirtioMemRangeAllocation, 369 370 /// Failed to update guest memory for VFIO PCI device. 371 UpdateMemoryForVfioPciDevice(vfio_ioctls::VfioError), 372 373 /// Trying to use a directory for pmem but no size specified 374 PmemWithDirectorySizeMissing, 375 376 /// Trying to use a size that is not multiple of 2MiB 377 PmemSizeNotAligned, 378 379 /// Could not find the node in the device tree. 380 MissingNode, 381 382 /// Resource was already found. 383 ResourceAlreadyExists, 384 385 /// Expected resources for virtio-pmem could not be found. 386 MissingVirtioPmemResources, 387 388 /// Missing PCI b/d/f from the DeviceNode. 389 MissingDeviceNodePciBdf, 390 391 /// No support for device passthrough 392 NoDevicePassthroughSupport, 393 394 /// Failed to resize virtio-balloon 395 VirtioBalloonResize(virtio_devices::balloon::Error), 396 397 /// Missing virtio-balloon, can't proceed as expected. 398 MissingVirtioBalloon, 399 400 /// Missing virtual IOMMU device 401 MissingVirtualIommu, 402 403 /// Failed to do power button notification 404 PowerButtonNotification(io::Error), 405 406 /// Failed to do AArch64 GPIO power button notification 407 #[cfg(target_arch = "aarch64")] 408 AArch64PowerButtonNotification(devices::legacy::GpioDeviceError), 409 410 /// Failed to set O_DIRECT flag to file descriptor 411 SetDirectIo, 412 413 /// Failed to create FixedVhdDiskAsync 414 CreateFixedVhdDiskAsync(io::Error), 415 416 /// Failed to create FixedVhdDiskSync 417 CreateFixedVhdDiskSync(io::Error), 418 419 /// Failed to create QcowDiskSync 420 CreateQcowDiskSync(qcow::Error), 421 422 /// Failed to create FixedVhdxDiskSync 423 CreateFixedVhdxDiskSync(vhdx::vhdx::VhdxError), 424 425 /// Failed to add DMA mapping handler to virtio-mem device. 426 AddDmaMappingHandlerVirtioMem(virtio_devices::mem::Error), 427 428 /// Failed to remove DMA mapping handler from virtio-mem device. 429 RemoveDmaMappingHandlerVirtioMem(virtio_devices::mem::Error), 430 431 /// Failed to create vfio-user client 432 VfioUserCreateClient(vfio_user::Error), 433 434 /// Failed to create VFIO user device 435 VfioUserCreate(VfioUserPciDeviceError), 436 437 /// Failed to map region from VFIO user device into guest 438 VfioUserMapRegion(VfioUserPciDeviceError), 439 440 /// Failed to DMA map VFIO user device. 441 VfioUserDmaMap(VfioUserPciDeviceError), 442 443 /// Failed to DMA unmap VFIO user device. 444 VfioUserDmaUnmap(VfioUserPciDeviceError), 445 446 /// Failed to update memory mappings for VFIO user device 447 UpdateMemoryForVfioUserPciDevice(VfioUserPciDeviceError), 448 449 /// Cannot duplicate file descriptor 450 DupFd(vmm_sys_util::errno::Error), 451 452 /// Failed to DMA map virtio device. 453 VirtioDmaMap(std::io::Error), 454 455 /// Failed to DMA unmap virtio device. 456 VirtioDmaUnmap(std::io::Error), 457 458 /// Cannot hotplug device behind vIOMMU 459 InvalidIommuHotplug, 460 461 /// Invalid identifier as it is not unique. 462 IdentifierNotUnique(String), 463 464 /// Invalid identifier 465 InvalidIdentifier(String), 466 467 /// Error activating virtio device 468 VirtioActivate(ActivateError), 469 } 470 pub type DeviceManagerResult<T> = result::Result<T, DeviceManagerError>; 471 472 const DEVICE_MANAGER_ACPI_SIZE: usize = 0x10; 473 474 const TIOCSPTLCK: libc::c_int = 0x4004_5431; 475 const TIOCGTPEER: libc::c_int = 0x5441; 476 477 pub fn create_pty() -> io::Result<(File, File, PathBuf)> { 478 // Try to use /dev/pts/ptmx first then fall back to /dev/ptmx 479 // This is done to try and use the devpts filesystem that 480 // could be available for use in the process's namespace first. 481 // Ideally these are all the same file though but different 482 // kernels could have things setup differently. 483 // See https://www.kernel.org/doc/Documentation/filesystems/devpts.txt 484 // for further details. 485 486 let custom_flags = libc::O_NONBLOCK; 487 let main = match OpenOptions::new() 488 .read(true) 489 .write(true) 490 .custom_flags(custom_flags) 491 .open("/dev/pts/ptmx") 492 { 493 Ok(f) => f, 494 _ => OpenOptions::new() 495 .read(true) 496 .write(true) 497 .custom_flags(custom_flags) 498 .open("/dev/ptmx")?, 499 }; 500 let mut unlock: libc::c_ulong = 0; 501 // SAFETY: FFI call into libc, trivially safe 502 unsafe { 503 libc::ioctl( 504 main.as_raw_fd(), 505 TIOCSPTLCK.try_into().unwrap(), 506 &mut unlock, 507 ) 508 }; 509 510 // SAFETY: FFI call into libc, trivally safe 511 let sub_fd = unsafe { 512 libc::ioctl( 513 main.as_raw_fd(), 514 TIOCGTPEER.try_into().unwrap(), 515 libc::O_NOCTTY | libc::O_RDWR, 516 ) 517 }; 518 if sub_fd == -1 { 519 return vmm_sys_util::errno::errno_result().map_err(|e| e.into()); 520 } 521 522 let proc_path = PathBuf::from(format!("/proc/self/fd/{}", sub_fd)); 523 let path = read_link(proc_path)?; 524 525 // SAFETY: sub_fd is checked to be valid before being wrapped in File 526 Ok((main, unsafe { File::from_raw_fd(sub_fd) }, path)) 527 } 528 529 #[derive(Default)] 530 pub struct Console { 531 console_resizer: Option<Arc<virtio_devices::ConsoleResizer>>, 532 } 533 534 impl Console { 535 pub fn update_console_size(&self) { 536 if let Some(resizer) = self.console_resizer.as_ref() { 537 resizer.update_console_size() 538 } 539 } 540 } 541 542 pub(crate) struct AddressManager { 543 pub(crate) allocator: Arc<Mutex<SystemAllocator>>, 544 #[cfg(target_arch = "x86_64")] 545 pub(crate) io_bus: Arc<Bus>, 546 pub(crate) mmio_bus: Arc<Bus>, 547 vm: Arc<dyn hypervisor::Vm>, 548 device_tree: Arc<Mutex<DeviceTree>>, 549 pci_mmio_allocators: Vec<Arc<Mutex<AddressAllocator>>>, 550 } 551 552 impl DeviceRelocation for AddressManager { 553 fn move_bar( 554 &self, 555 old_base: u64, 556 new_base: u64, 557 len: u64, 558 pci_dev: &mut dyn PciDevice, 559 region_type: PciBarRegionType, 560 ) -> std::result::Result<(), std::io::Error> { 561 match region_type { 562 PciBarRegionType::IoRegion => { 563 #[cfg(target_arch = "x86_64")] 564 { 565 // Update system allocator 566 self.allocator 567 .lock() 568 .unwrap() 569 .free_io_addresses(GuestAddress(old_base), len as GuestUsize); 570 571 self.allocator 572 .lock() 573 .unwrap() 574 .allocate_io_addresses( 575 Some(GuestAddress(new_base)), 576 len as GuestUsize, 577 None, 578 ) 579 .ok_or_else(|| { 580 io::Error::new(io::ErrorKind::Other, "failed allocating new IO range") 581 })?; 582 583 // Update PIO bus 584 self.io_bus 585 .update_range(old_base, len, new_base, len) 586 .map_err(|e| io::Error::new(io::ErrorKind::Other, e))?; 587 } 588 #[cfg(target_arch = "aarch64")] 589 error!("I/O region is not supported"); 590 } 591 PciBarRegionType::Memory32BitRegion | PciBarRegionType::Memory64BitRegion => { 592 // Update system allocator 593 if region_type == PciBarRegionType::Memory32BitRegion { 594 self.allocator 595 .lock() 596 .unwrap() 597 .free_mmio_hole_addresses(GuestAddress(old_base), len as GuestUsize); 598 599 self.allocator 600 .lock() 601 .unwrap() 602 .allocate_mmio_hole_addresses( 603 Some(GuestAddress(new_base)), 604 len as GuestUsize, 605 Some(len), 606 ) 607 .ok_or_else(|| { 608 io::Error::new( 609 io::ErrorKind::Other, 610 "failed allocating new 32 bits MMIO range", 611 ) 612 })?; 613 } else { 614 // Find the specific allocator that this BAR was allocated from and use it for new one 615 for allocator in &self.pci_mmio_allocators { 616 let allocator_base = allocator.lock().unwrap().base(); 617 let allocator_end = allocator.lock().unwrap().end(); 618 619 if old_base >= allocator_base.0 && old_base <= allocator_end.0 { 620 allocator 621 .lock() 622 .unwrap() 623 .free(GuestAddress(old_base), len as GuestUsize); 624 625 allocator 626 .lock() 627 .unwrap() 628 .allocate( 629 Some(GuestAddress(new_base)), 630 len as GuestUsize, 631 Some(len), 632 ) 633 .ok_or_else(|| { 634 io::Error::new( 635 io::ErrorKind::Other, 636 "failed allocating new 64 bits MMIO range", 637 ) 638 })?; 639 640 break; 641 } 642 } 643 } 644 645 // Update MMIO bus 646 self.mmio_bus 647 .update_range(old_base, len, new_base, len) 648 .map_err(|e| io::Error::new(io::ErrorKind::Other, e))?; 649 } 650 } 651 652 // Update the device_tree resources associated with the device 653 if let Some(id) = pci_dev.id() { 654 if let Some(node) = self.device_tree.lock().unwrap().get_mut(&id) { 655 let mut resource_updated = false; 656 for resource in node.resources.iter_mut() { 657 if let Resource::PciBar { base, type_, .. } = resource { 658 if PciBarRegionType::from(*type_) == region_type && *base == old_base { 659 *base = new_base; 660 resource_updated = true; 661 break; 662 } 663 } 664 } 665 666 if !resource_updated { 667 return Err(io::Error::new( 668 io::ErrorKind::Other, 669 format!( 670 "Couldn't find a resource with base 0x{:x} for device {}", 671 old_base, id 672 ), 673 )); 674 } 675 } else { 676 return Err(io::Error::new( 677 io::ErrorKind::Other, 678 format!("Couldn't find device {} from device tree", id), 679 )); 680 } 681 } 682 683 let any_dev = pci_dev.as_any(); 684 if let Some(virtio_pci_dev) = any_dev.downcast_ref::<VirtioPciDevice>() { 685 let bar_addr = virtio_pci_dev.config_bar_addr(); 686 if bar_addr == new_base { 687 for (event, addr) in virtio_pci_dev.ioeventfds(old_base) { 688 let io_addr = IoEventAddress::Mmio(addr); 689 self.vm.unregister_ioevent(event, &io_addr).map_err(|e| { 690 io::Error::new( 691 io::ErrorKind::Other, 692 format!("failed to unregister ioevent: {:?}", e), 693 ) 694 })?; 695 } 696 for (event, addr) in virtio_pci_dev.ioeventfds(new_base) { 697 let io_addr = IoEventAddress::Mmio(addr); 698 self.vm 699 .register_ioevent(event, &io_addr, None) 700 .map_err(|e| { 701 io::Error::new( 702 io::ErrorKind::Other, 703 format!("failed to register ioevent: {:?}", e), 704 ) 705 })?; 706 } 707 } else { 708 let virtio_dev = virtio_pci_dev.virtio_device(); 709 let mut virtio_dev = virtio_dev.lock().unwrap(); 710 if let Some(mut shm_regions) = virtio_dev.get_shm_regions() { 711 if shm_regions.addr.raw_value() == old_base { 712 let mem_region = self.vm.make_user_memory_region( 713 shm_regions.mem_slot, 714 old_base, 715 shm_regions.len, 716 shm_regions.host_addr, 717 false, 718 false, 719 ); 720 721 self.vm.remove_user_memory_region(mem_region).map_err(|e| { 722 io::Error::new( 723 io::ErrorKind::Other, 724 format!("failed to remove user memory region: {:?}", e), 725 ) 726 })?; 727 728 // Create new mapping by inserting new region to KVM. 729 let mem_region = self.vm.make_user_memory_region( 730 shm_regions.mem_slot, 731 new_base, 732 shm_regions.len, 733 shm_regions.host_addr, 734 false, 735 false, 736 ); 737 738 self.vm.create_user_memory_region(mem_region).map_err(|e| { 739 io::Error::new( 740 io::ErrorKind::Other, 741 format!("failed to create user memory regions: {:?}", e), 742 ) 743 })?; 744 745 // Update shared memory regions to reflect the new mapping. 746 shm_regions.addr = GuestAddress(new_base); 747 virtio_dev.set_shm_regions(shm_regions).map_err(|e| { 748 io::Error::new( 749 io::ErrorKind::Other, 750 format!("failed to update shared memory regions: {:?}", e), 751 ) 752 })?; 753 } 754 } 755 } 756 } 757 758 pci_dev.move_bar(old_base, new_base) 759 } 760 } 761 762 #[derive(Serialize, Deserialize)] 763 struct DeviceManagerState { 764 device_tree: DeviceTree, 765 device_id_cnt: Wrapping<usize>, 766 } 767 768 #[derive(Debug)] 769 pub struct PtyPair { 770 pub main: File, 771 pub path: PathBuf, 772 } 773 774 impl Clone for PtyPair { 775 fn clone(&self) -> Self { 776 PtyPair { 777 main: self.main.try_clone().unwrap(), 778 path: self.path.clone(), 779 } 780 } 781 } 782 783 #[derive(Clone)] 784 pub enum PciDeviceHandle { 785 Vfio(Arc<Mutex<VfioPciDevice>>), 786 Virtio(Arc<Mutex<VirtioPciDevice>>), 787 VfioUser(Arc<Mutex<VfioUserPciDevice>>), 788 } 789 790 #[derive(Clone)] 791 struct MetaVirtioDevice { 792 virtio_device: Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 793 iommu: bool, 794 id: String, 795 pci_segment: u16, 796 dma_handler: Option<Arc<dyn ExternalDmaMapping>>, 797 } 798 799 #[derive(Default)] 800 pub struct AcpiPlatformAddresses { 801 pub pm_timer_address: Option<GenericAddress>, 802 pub reset_reg_address: Option<GenericAddress>, 803 pub sleep_control_reg_address: Option<GenericAddress>, 804 pub sleep_status_reg_address: Option<GenericAddress>, 805 } 806 807 pub struct DeviceManager { 808 // The underlying hypervisor 809 hypervisor_type: HypervisorType, 810 811 // Manage address space related to devices 812 address_manager: Arc<AddressManager>, 813 814 // Console abstraction 815 console: Arc<Console>, 816 817 // console PTY 818 console_pty: Option<Arc<Mutex<PtyPair>>>, 819 820 // serial PTY 821 serial_pty: Option<Arc<Mutex<PtyPair>>>, 822 823 // Serial Manager 824 serial_manager: Option<Arc<SerialManager>>, 825 826 // pty foreground status, 827 console_resize_pipe: Option<Arc<File>>, 828 829 // Interrupt controller 830 #[cfg(target_arch = "x86_64")] 831 interrupt_controller: Option<Arc<Mutex<ioapic::Ioapic>>>, 832 #[cfg(target_arch = "aarch64")] 833 interrupt_controller: Option<Arc<Mutex<gic::Gic>>>, 834 835 // Things to be added to the commandline (e.g. aarch64 early console) 836 #[cfg(target_arch = "aarch64")] 837 cmdline_additions: Vec<String>, 838 839 // ACPI GED notification device 840 ged_notification_device: Option<Arc<Mutex<devices::AcpiGedDevice>>>, 841 842 // VM configuration 843 config: Arc<Mutex<VmConfig>>, 844 845 // Memory Manager 846 memory_manager: Arc<Mutex<MemoryManager>>, 847 848 // The virtio devices on the system 849 virtio_devices: Vec<MetaVirtioDevice>, 850 851 // List of bus devices 852 // Let the DeviceManager keep strong references to the BusDevice devices. 853 // This allows the IO and MMIO buses to be provided with Weak references, 854 // which prevents cyclic dependencies. 855 bus_devices: Vec<Arc<Mutex<dyn BusDevice>>>, 856 857 // Counter to keep track of the consumed device IDs. 858 device_id_cnt: Wrapping<usize>, 859 860 pci_segments: Vec<PciSegment>, 861 862 #[cfg_attr(target_arch = "aarch64", allow(dead_code))] 863 // MSI Interrupt Manager 864 msi_interrupt_manager: Arc<dyn InterruptManager<GroupConfig = MsiIrqGroupConfig>>, 865 866 #[cfg_attr(feature = "mshv", allow(dead_code))] 867 // Legacy Interrupt Manager 868 legacy_interrupt_manager: Option<Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>>, 869 870 // Passthrough device handle 871 passthrough_device: Option<VfioDeviceFd>, 872 873 // VFIO container 874 // Only one container can be created, therefore it is stored as part of the 875 // DeviceManager to be reused. 876 vfio_container: Option<Arc<VfioContainer>>, 877 878 // Paravirtualized IOMMU 879 iommu_device: Option<Arc<Mutex<virtio_devices::Iommu>>>, 880 iommu_mapping: Option<Arc<IommuMapping>>, 881 882 // PCI information about devices attached to the paravirtualized IOMMU 883 // It contains the virtual IOMMU PCI BDF along with the list of PCI BDF 884 // representing the devices attached to the virtual IOMMU. This is useful 885 // information for filling the ACPI VIOT table. 886 iommu_attached_devices: Option<(PciBdf, Vec<PciBdf>)>, 887 888 // Tree of devices, representing the dependencies between devices. 889 // Useful for introspection, snapshot and restore. 890 device_tree: Arc<Mutex<DeviceTree>>, 891 892 // Exit event 893 exit_evt: EventFd, 894 reset_evt: EventFd, 895 896 #[cfg(target_arch = "aarch64")] 897 id_to_dev_info: HashMap<(DeviceType, String), MmioDeviceInfo>, 898 899 // seccomp action 900 seccomp_action: SeccompAction, 901 902 // List of guest NUMA nodes. 903 numa_nodes: NumaNodes, 904 905 // Possible handle to the virtio-balloon device 906 balloon: Option<Arc<Mutex<virtio_devices::Balloon>>>, 907 908 // Virtio Device activation EventFd to allow the VMM thread to trigger device 909 // activation and thus start the threads from the VMM thread 910 activate_evt: EventFd, 911 912 acpi_address: GuestAddress, 913 914 selected_segment: usize, 915 916 // Possible handle to the virtio-mem device 917 virtio_mem_devices: Vec<Arc<Mutex<virtio_devices::Mem>>>, 918 919 #[cfg(target_arch = "aarch64")] 920 // GPIO device for AArch64 921 gpio_device: Option<Arc<Mutex<devices::legacy::Gpio>>>, 922 923 // Flag to force setting the iommu on virtio devices 924 force_iommu: bool, 925 926 // Helps identify if the VM is currently being restored 927 restoring: bool, 928 929 // io_uring availability if detected 930 io_uring_supported: Option<bool>, 931 932 // List of unique identifiers provided at boot through the configuration. 933 boot_id_list: BTreeSet<String>, 934 935 // Start time of the VM 936 timestamp: Instant, 937 938 // Pending activations 939 pending_activations: Arc<Mutex<Vec<VirtioPciDeviceActivator>>>, 940 941 // Addresses for ACPI platform devices e.g. ACPI PM timer, sleep/reset registers 942 acpi_platform_addresses: AcpiPlatformAddresses, 943 } 944 945 impl DeviceManager { 946 #[allow(clippy::too_many_arguments)] 947 pub fn new( 948 hypervisor_type: HypervisorType, 949 vm: Arc<dyn hypervisor::Vm>, 950 config: Arc<Mutex<VmConfig>>, 951 memory_manager: Arc<Mutex<MemoryManager>>, 952 exit_evt: &EventFd, 953 reset_evt: &EventFd, 954 seccomp_action: SeccompAction, 955 numa_nodes: NumaNodes, 956 activate_evt: &EventFd, 957 force_iommu: bool, 958 restoring: bool, 959 boot_id_list: BTreeSet<String>, 960 timestamp: Instant, 961 ) -> DeviceManagerResult<Arc<Mutex<Self>>> { 962 trace_scoped!("DeviceManager::new"); 963 964 let device_tree = Arc::new(Mutex::new(DeviceTree::new())); 965 966 let num_pci_segments = 967 if let Some(platform_config) = config.lock().unwrap().platform.as_ref() { 968 platform_config.num_pci_segments 969 } else { 970 1 971 }; 972 973 let start_of_device_area = memory_manager.lock().unwrap().start_of_device_area().0; 974 let end_of_device_area = memory_manager.lock().unwrap().end_of_device_area().0; 975 976 // Start each PCI segment range on a 4GiB boundary 977 let pci_segment_size = (end_of_device_area - start_of_device_area + 1) 978 / ((4 << 30) * num_pci_segments as u64) 979 * (4 << 30); 980 981 let mut pci_mmio_allocators = vec![]; 982 for i in 0..num_pci_segments as u64 { 983 let mmio_start = start_of_device_area + i * pci_segment_size; 984 let allocator = Arc::new(Mutex::new( 985 AddressAllocator::new(GuestAddress(mmio_start), pci_segment_size).unwrap(), 986 )); 987 pci_mmio_allocators.push(allocator) 988 } 989 990 let address_manager = Arc::new(AddressManager { 991 allocator: memory_manager.lock().unwrap().allocator(), 992 #[cfg(target_arch = "x86_64")] 993 io_bus: Arc::new(Bus::new()), 994 mmio_bus: Arc::new(Bus::new()), 995 vm: vm.clone(), 996 device_tree: Arc::clone(&device_tree), 997 pci_mmio_allocators, 998 }); 999 1000 // First we create the MSI interrupt manager, the legacy one is created 1001 // later, after the IOAPIC device creation. 1002 // The reason we create the MSI one first is because the IOAPIC needs it, 1003 // and then the legacy interrupt manager needs an IOAPIC. So we're 1004 // handling a linear dependency chain: 1005 // msi_interrupt_manager <- IOAPIC <- legacy_interrupt_manager. 1006 let msi_interrupt_manager: Arc<dyn InterruptManager<GroupConfig = MsiIrqGroupConfig>> = 1007 Arc::new(MsiInterruptManager::new( 1008 Arc::clone(&address_manager.allocator), 1009 vm, 1010 )); 1011 1012 let acpi_address = address_manager 1013 .allocator 1014 .lock() 1015 .unwrap() 1016 .allocate_platform_mmio_addresses(None, DEVICE_MANAGER_ACPI_SIZE as u64, None) 1017 .ok_or(DeviceManagerError::AllocateIoPort)?; 1018 1019 let mut pci_irq_slots = [0; 32]; 1020 PciSegment::reserve_legacy_interrupts_for_pci_devices( 1021 &address_manager, 1022 &mut pci_irq_slots, 1023 )?; 1024 1025 let mut pci_segments = vec![PciSegment::new_default_segment( 1026 &address_manager, 1027 Arc::clone(&address_manager.pci_mmio_allocators[0]), 1028 &pci_irq_slots, 1029 )?]; 1030 1031 for i in 1..num_pci_segments as usize { 1032 pci_segments.push(PciSegment::new( 1033 i as u16, 1034 &address_manager, 1035 Arc::clone(&address_manager.pci_mmio_allocators[i]), 1036 &pci_irq_slots, 1037 )?); 1038 } 1039 1040 let device_manager = DeviceManager { 1041 hypervisor_type, 1042 address_manager: Arc::clone(&address_manager), 1043 console: Arc::new(Console::default()), 1044 interrupt_controller: None, 1045 #[cfg(target_arch = "aarch64")] 1046 cmdline_additions: Vec::new(), 1047 ged_notification_device: None, 1048 config, 1049 memory_manager, 1050 virtio_devices: Vec::new(), 1051 bus_devices: Vec::new(), 1052 device_id_cnt: Wrapping(0), 1053 msi_interrupt_manager, 1054 legacy_interrupt_manager: None, 1055 passthrough_device: None, 1056 vfio_container: None, 1057 iommu_device: None, 1058 iommu_mapping: None, 1059 iommu_attached_devices: None, 1060 pci_segments, 1061 device_tree, 1062 exit_evt: exit_evt.try_clone().map_err(DeviceManagerError::EventFd)?, 1063 reset_evt: reset_evt.try_clone().map_err(DeviceManagerError::EventFd)?, 1064 #[cfg(target_arch = "aarch64")] 1065 id_to_dev_info: HashMap::new(), 1066 seccomp_action, 1067 numa_nodes, 1068 balloon: None, 1069 activate_evt: activate_evt 1070 .try_clone() 1071 .map_err(DeviceManagerError::EventFd)?, 1072 acpi_address, 1073 selected_segment: 0, 1074 serial_pty: None, 1075 serial_manager: None, 1076 console_pty: None, 1077 console_resize_pipe: None, 1078 virtio_mem_devices: Vec::new(), 1079 #[cfg(target_arch = "aarch64")] 1080 gpio_device: None, 1081 force_iommu, 1082 restoring, 1083 io_uring_supported: None, 1084 boot_id_list, 1085 timestamp, 1086 pending_activations: Arc::new(Mutex::new(Vec::default())), 1087 acpi_platform_addresses: AcpiPlatformAddresses::default(), 1088 }; 1089 1090 let device_manager = Arc::new(Mutex::new(device_manager)); 1091 1092 address_manager 1093 .mmio_bus 1094 .insert( 1095 Arc::clone(&device_manager) as Arc<Mutex<dyn BusDevice>>, 1096 acpi_address.0, 1097 DEVICE_MANAGER_ACPI_SIZE as u64, 1098 ) 1099 .map_err(DeviceManagerError::BusError)?; 1100 1101 Ok(device_manager) 1102 } 1103 1104 pub fn serial_pty(&self) -> Option<PtyPair> { 1105 self.serial_pty 1106 .as_ref() 1107 .map(|pty| pty.lock().unwrap().clone()) 1108 } 1109 1110 pub fn console_pty(&self) -> Option<PtyPair> { 1111 self.console_pty 1112 .as_ref() 1113 .map(|pty| pty.lock().unwrap().clone()) 1114 } 1115 1116 pub fn console_resize_pipe(&self) -> Option<Arc<File>> { 1117 self.console_resize_pipe.as_ref().map(Arc::clone) 1118 } 1119 1120 pub fn create_devices( 1121 &mut self, 1122 serial_pty: Option<PtyPair>, 1123 console_pty: Option<PtyPair>, 1124 console_resize_pipe: Option<File>, 1125 ) -> DeviceManagerResult<()> { 1126 trace_scoped!("create_devices"); 1127 1128 let mut virtio_devices: Vec<MetaVirtioDevice> = Vec::new(); 1129 1130 let interrupt_controller = self.add_interrupt_controller()?; 1131 1132 // Now we can create the legacy interrupt manager, which needs the freshly 1133 // formed IOAPIC device. 1134 let legacy_interrupt_manager: Arc< 1135 dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>, 1136 > = Arc::new(LegacyUserspaceInterruptManager::new(Arc::clone( 1137 &interrupt_controller, 1138 ))); 1139 1140 { 1141 if let Some(acpi_address) = self.memory_manager.lock().unwrap().acpi_address() { 1142 self.address_manager 1143 .mmio_bus 1144 .insert( 1145 Arc::clone(&self.memory_manager) as Arc<Mutex<dyn BusDevice>>, 1146 acpi_address.0, 1147 MEMORY_MANAGER_ACPI_SIZE as u64, 1148 ) 1149 .map_err(DeviceManagerError::BusError)?; 1150 } 1151 } 1152 1153 #[cfg(target_arch = "x86_64")] 1154 self.add_legacy_devices( 1155 self.reset_evt 1156 .try_clone() 1157 .map_err(DeviceManagerError::EventFd)?, 1158 )?; 1159 1160 #[cfg(target_arch = "aarch64")] 1161 self.add_legacy_devices(&legacy_interrupt_manager)?; 1162 1163 { 1164 self.ged_notification_device = self.add_acpi_devices( 1165 &legacy_interrupt_manager, 1166 self.reset_evt 1167 .try_clone() 1168 .map_err(DeviceManagerError::EventFd)?, 1169 self.exit_evt 1170 .try_clone() 1171 .map_err(DeviceManagerError::EventFd)?, 1172 )?; 1173 } 1174 1175 self.console = self.add_console_device( 1176 &legacy_interrupt_manager, 1177 &mut virtio_devices, 1178 serial_pty, 1179 console_pty, 1180 console_resize_pipe, 1181 )?; 1182 1183 self.legacy_interrupt_manager = Some(legacy_interrupt_manager); 1184 1185 virtio_devices.append(&mut self.make_virtio_devices()?); 1186 1187 self.add_pci_devices(virtio_devices.clone())?; 1188 1189 self.virtio_devices = virtio_devices; 1190 1191 Ok(()) 1192 } 1193 1194 fn state(&self) -> DeviceManagerState { 1195 DeviceManagerState { 1196 device_tree: self.device_tree.lock().unwrap().clone(), 1197 device_id_cnt: self.device_id_cnt, 1198 } 1199 } 1200 1201 fn set_state(&mut self, state: &DeviceManagerState) { 1202 *self.device_tree.lock().unwrap() = state.device_tree.clone(); 1203 self.device_id_cnt = state.device_id_cnt; 1204 } 1205 1206 fn get_msi_iova_space(&mut self) -> (u64, u64) { 1207 #[cfg(target_arch = "aarch64")] 1208 { 1209 let vcpus = self.config.lock().unwrap().cpus.boot_vcpus; 1210 let vgic_config = gic::Gic::create_default_config(vcpus.into()); 1211 ( 1212 vgic_config.msi_addr, 1213 vgic_config.msi_addr + vgic_config.msi_size - 1, 1214 ) 1215 } 1216 #[cfg(target_arch = "x86_64")] 1217 (0xfee0_0000, 0xfeef_ffff) 1218 } 1219 1220 #[cfg(target_arch = "aarch64")] 1221 /// Gets the information of the devices registered up to some point in time. 1222 pub fn get_device_info(&self) -> &HashMap<(DeviceType, String), MmioDeviceInfo> { 1223 &self.id_to_dev_info 1224 } 1225 1226 #[allow(unused_variables)] 1227 fn add_pci_devices( 1228 &mut self, 1229 virtio_devices: Vec<MetaVirtioDevice>, 1230 ) -> DeviceManagerResult<()> { 1231 let iommu_id = String::from(IOMMU_DEVICE_NAME); 1232 1233 let iommu_device = if self.config.lock().unwrap().iommu { 1234 let (device, mapping) = virtio_devices::Iommu::new( 1235 iommu_id.clone(), 1236 self.seccomp_action.clone(), 1237 self.exit_evt 1238 .try_clone() 1239 .map_err(DeviceManagerError::EventFd)?, 1240 self.get_msi_iova_space(), 1241 ) 1242 .map_err(DeviceManagerError::CreateVirtioIommu)?; 1243 let device = Arc::new(Mutex::new(device)); 1244 self.iommu_device = Some(Arc::clone(&device)); 1245 self.iommu_mapping = Some(mapping); 1246 1247 // Fill the device tree with a new node. In case of restore, we 1248 // know there is nothing to do, so we can simply override the 1249 // existing entry. 1250 self.device_tree 1251 .lock() 1252 .unwrap() 1253 .insert(iommu_id.clone(), device_node!(iommu_id, device)); 1254 1255 Some(device) 1256 } else { 1257 None 1258 }; 1259 1260 let mut iommu_attached_devices = Vec::new(); 1261 { 1262 for handle in virtio_devices { 1263 let mapping: Option<Arc<IommuMapping>> = if handle.iommu { 1264 self.iommu_mapping.clone() 1265 } else { 1266 None 1267 }; 1268 1269 let dev_id = self.add_virtio_pci_device( 1270 handle.virtio_device, 1271 &mapping, 1272 handle.id, 1273 handle.pci_segment, 1274 handle.dma_handler, 1275 )?; 1276 1277 if handle.iommu { 1278 iommu_attached_devices.push(dev_id); 1279 } 1280 } 1281 1282 let mut vfio_iommu_device_ids = self.add_vfio_devices()?; 1283 iommu_attached_devices.append(&mut vfio_iommu_device_ids); 1284 1285 let mut vfio_user_iommu_device_ids = self.add_user_devices()?; 1286 iommu_attached_devices.append(&mut vfio_user_iommu_device_ids); 1287 1288 // Add all devices from forced iommu segments 1289 if let Some(platform_config) = self.config.lock().unwrap().platform.as_ref() { 1290 if let Some(iommu_segments) = platform_config.iommu_segments.as_ref() { 1291 for segment in iommu_segments { 1292 for device in 0..32 { 1293 let bdf = PciBdf::new(*segment, 0, device, 0); 1294 if !iommu_attached_devices.contains(&bdf) { 1295 iommu_attached_devices.push(bdf); 1296 } 1297 } 1298 } 1299 } 1300 } 1301 1302 if let Some(iommu_device) = iommu_device { 1303 let dev_id = self.add_virtio_pci_device(iommu_device, &None, iommu_id, 0, None)?; 1304 self.iommu_attached_devices = Some((dev_id, iommu_attached_devices)); 1305 } 1306 } 1307 1308 for segment in &self.pci_segments { 1309 #[cfg(target_arch = "x86_64")] 1310 if let Some(pci_config_io) = segment.pci_config_io.as_ref() { 1311 self.bus_devices 1312 .push(Arc::clone(pci_config_io) as Arc<Mutex<dyn BusDevice>>); 1313 } 1314 1315 self.bus_devices 1316 .push(Arc::clone(&segment.pci_config_mmio) as Arc<Mutex<dyn BusDevice>>); 1317 } 1318 1319 Ok(()) 1320 } 1321 1322 #[cfg(target_arch = "aarch64")] 1323 fn add_interrupt_controller( 1324 &mut self, 1325 ) -> DeviceManagerResult<Arc<Mutex<dyn InterruptController>>> { 1326 let interrupt_controller: Arc<Mutex<gic::Gic>> = Arc::new(Mutex::new( 1327 gic::Gic::new( 1328 self.config.lock().unwrap().cpus.boot_vcpus, 1329 Arc::clone(&self.msi_interrupt_manager), 1330 ) 1331 .map_err(DeviceManagerError::CreateInterruptController)?, 1332 )); 1333 1334 self.interrupt_controller = Some(interrupt_controller.clone()); 1335 1336 // Unlike x86_64, the "interrupt_controller" here for AArch64 is only 1337 // a `Gic` object that implements the `InterruptController` to provide 1338 // interrupt delivery service. This is not the real GIC device so that 1339 // we do not need to insert it to the device tree. 1340 1341 Ok(interrupt_controller) 1342 } 1343 1344 #[cfg(target_arch = "aarch64")] 1345 pub fn get_interrupt_controller(&mut self) -> Option<&Arc<Mutex<gic::Gic>>> { 1346 self.interrupt_controller.as_ref() 1347 } 1348 1349 #[cfg(target_arch = "x86_64")] 1350 fn add_interrupt_controller( 1351 &mut self, 1352 ) -> DeviceManagerResult<Arc<Mutex<dyn InterruptController>>> { 1353 let id = String::from(IOAPIC_DEVICE_NAME); 1354 1355 // Create IOAPIC 1356 let interrupt_controller = Arc::new(Mutex::new( 1357 ioapic::Ioapic::new( 1358 id.clone(), 1359 APIC_START, 1360 Arc::clone(&self.msi_interrupt_manager), 1361 ) 1362 .map_err(DeviceManagerError::CreateInterruptController)?, 1363 )); 1364 1365 self.interrupt_controller = Some(interrupt_controller.clone()); 1366 1367 self.address_manager 1368 .mmio_bus 1369 .insert(interrupt_controller.clone(), IOAPIC_START.0, IOAPIC_SIZE) 1370 .map_err(DeviceManagerError::BusError)?; 1371 1372 self.bus_devices 1373 .push(Arc::clone(&interrupt_controller) as Arc<Mutex<dyn BusDevice>>); 1374 1375 // Fill the device tree with a new node. In case of restore, we 1376 // know there is nothing to do, so we can simply override the 1377 // existing entry. 1378 self.device_tree 1379 .lock() 1380 .unwrap() 1381 .insert(id.clone(), device_node!(id, interrupt_controller)); 1382 1383 Ok(interrupt_controller) 1384 } 1385 1386 fn add_acpi_devices( 1387 &mut self, 1388 interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>, 1389 reset_evt: EventFd, 1390 exit_evt: EventFd, 1391 ) -> DeviceManagerResult<Option<Arc<Mutex<devices::AcpiGedDevice>>>> { 1392 let shutdown_device = Arc::new(Mutex::new(devices::AcpiShutdownDevice::new( 1393 exit_evt, reset_evt, 1394 ))); 1395 1396 self.bus_devices 1397 .push(Arc::clone(&shutdown_device) as Arc<Mutex<dyn BusDevice>>); 1398 1399 #[cfg(target_arch = "x86_64")] 1400 { 1401 let shutdown_pio_address: u16 = 0x600; 1402 1403 // TODO: Remove the entry for 0x3c0 once all firmwares will have been 1404 // updated with the new value. 1405 self.address_manager 1406 .allocator 1407 .lock() 1408 .unwrap() 1409 .allocate_io_addresses(Some(GuestAddress(0x3c0)), 0x8, None) 1410 .ok_or(DeviceManagerError::AllocateIoPort)?; 1411 1412 self.address_manager 1413 .allocator 1414 .lock() 1415 .unwrap() 1416 .allocate_io_addresses(Some(GuestAddress(shutdown_pio_address.into())), 0x8, None) 1417 .ok_or(DeviceManagerError::AllocateIoPort)?; 1418 1419 // TODO: Remove the entry for 0x3c0 once all firmwares will have been 1420 // updated with the new value. 1421 self.address_manager 1422 .io_bus 1423 .insert(shutdown_device.clone(), 0x3c0, 0x4) 1424 .map_err(DeviceManagerError::BusError)?; 1425 1426 self.address_manager 1427 .io_bus 1428 .insert(shutdown_device, shutdown_pio_address.into(), 0x4) 1429 .map_err(DeviceManagerError::BusError)?; 1430 1431 self.acpi_platform_addresses.sleep_control_reg_address = 1432 Some(GenericAddress::io_port_address::<u8>(shutdown_pio_address)); 1433 self.acpi_platform_addresses.sleep_status_reg_address = 1434 Some(GenericAddress::io_port_address::<u8>(shutdown_pio_address)); 1435 self.acpi_platform_addresses.reset_reg_address = 1436 Some(GenericAddress::io_port_address::<u8>(shutdown_pio_address)); 1437 } 1438 1439 let ged_irq = self 1440 .address_manager 1441 .allocator 1442 .lock() 1443 .unwrap() 1444 .allocate_irq() 1445 .unwrap(); 1446 let interrupt_group = interrupt_manager 1447 .create_group(LegacyIrqGroupConfig { 1448 irq: ged_irq as InterruptIndex, 1449 }) 1450 .map_err(DeviceManagerError::CreateInterruptGroup)?; 1451 let ged_address = self 1452 .address_manager 1453 .allocator 1454 .lock() 1455 .unwrap() 1456 .allocate_platform_mmio_addresses( 1457 None, 1458 devices::acpi::GED_DEVICE_ACPI_SIZE as u64, 1459 None, 1460 ) 1461 .ok_or(DeviceManagerError::AllocateMmioAddress)?; 1462 let ged_device = Arc::new(Mutex::new(devices::AcpiGedDevice::new( 1463 interrupt_group, 1464 ged_irq, 1465 ged_address, 1466 ))); 1467 self.address_manager 1468 .mmio_bus 1469 .insert( 1470 ged_device.clone(), 1471 ged_address.0, 1472 devices::acpi::GED_DEVICE_ACPI_SIZE as u64, 1473 ) 1474 .map_err(DeviceManagerError::BusError)?; 1475 self.bus_devices 1476 .push(Arc::clone(&ged_device) as Arc<Mutex<dyn BusDevice>>); 1477 1478 let pm_timer_device = Arc::new(Mutex::new(devices::AcpiPmTimerDevice::new())); 1479 1480 self.bus_devices 1481 .push(Arc::clone(&pm_timer_device) as Arc<Mutex<dyn BusDevice>>); 1482 1483 #[cfg(target_arch = "x86_64")] 1484 { 1485 let pm_timer_pio_address: u16 = 0x608; 1486 1487 // TODO: Remove the entry for 0xb008 once all firmwares will have been 1488 // updated with the new value. 1489 self.address_manager 1490 .allocator 1491 .lock() 1492 .unwrap() 1493 .allocate_io_addresses(Some(GuestAddress(0xb008)), 0x4, None) 1494 .ok_or(DeviceManagerError::AllocateIoPort)?; 1495 1496 self.address_manager 1497 .allocator 1498 .lock() 1499 .unwrap() 1500 .allocate_io_addresses(Some(GuestAddress(pm_timer_pio_address.into())), 0x4, None) 1501 .ok_or(DeviceManagerError::AllocateIoPort)?; 1502 1503 // TODO: Remove the entry for 0xb008 once all firmwares will have been 1504 // updated with the new value. 1505 self.address_manager 1506 .io_bus 1507 .insert(pm_timer_device.clone(), 0xb008, 0x4) 1508 .map_err(DeviceManagerError::BusError)?; 1509 1510 self.address_manager 1511 .io_bus 1512 .insert(pm_timer_device, pm_timer_pio_address.into(), 0x4) 1513 .map_err(DeviceManagerError::BusError)?; 1514 1515 self.acpi_platform_addresses.pm_timer_address = 1516 Some(GenericAddress::io_port_address::<u32>(pm_timer_pio_address)); 1517 } 1518 1519 Ok(Some(ged_device)) 1520 } 1521 1522 #[cfg(target_arch = "x86_64")] 1523 fn add_legacy_devices(&mut self, reset_evt: EventFd) -> DeviceManagerResult<()> { 1524 // Add a shutdown device (i8042) 1525 let i8042 = Arc::new(Mutex::new(devices::legacy::I8042Device::new( 1526 reset_evt.try_clone().unwrap(), 1527 ))); 1528 1529 self.bus_devices 1530 .push(Arc::clone(&i8042) as Arc<Mutex<dyn BusDevice>>); 1531 1532 self.address_manager 1533 .io_bus 1534 .insert(i8042, 0x61, 0x4) 1535 .map_err(DeviceManagerError::BusError)?; 1536 { 1537 // Add a CMOS emulated device 1538 let mem_size = self 1539 .memory_manager 1540 .lock() 1541 .unwrap() 1542 .guest_memory() 1543 .memory() 1544 .last_addr() 1545 .0 1546 + 1; 1547 let mem_below_4g = std::cmp::min(arch::layout::MEM_32BIT_RESERVED_START.0, mem_size); 1548 let mem_above_4g = mem_size.saturating_sub(arch::layout::RAM_64BIT_START.0); 1549 1550 let cmos = Arc::new(Mutex::new(devices::legacy::Cmos::new( 1551 mem_below_4g, 1552 mem_above_4g, 1553 reset_evt, 1554 ))); 1555 1556 self.bus_devices 1557 .push(Arc::clone(&cmos) as Arc<Mutex<dyn BusDevice>>); 1558 1559 self.address_manager 1560 .io_bus 1561 .insert(cmos, 0x70, 0x2) 1562 .map_err(DeviceManagerError::BusError)?; 1563 1564 let fwdebug = Arc::new(Mutex::new(devices::legacy::FwDebugDevice::new())); 1565 1566 self.bus_devices 1567 .push(Arc::clone(&fwdebug) as Arc<Mutex<dyn BusDevice>>); 1568 1569 self.address_manager 1570 .io_bus 1571 .insert(fwdebug, 0x402, 0x1) 1572 .map_err(DeviceManagerError::BusError)?; 1573 } 1574 1575 // 0x80 debug port 1576 let debug_port = Arc::new(Mutex::new(devices::legacy::DebugPort::new(self.timestamp))); 1577 self.bus_devices 1578 .push(Arc::clone(&debug_port) as Arc<Mutex<dyn BusDevice>>); 1579 self.address_manager 1580 .io_bus 1581 .insert(debug_port, 0x80, 0x1) 1582 .map_err(DeviceManagerError::BusError)?; 1583 1584 Ok(()) 1585 } 1586 1587 #[cfg(target_arch = "aarch64")] 1588 fn add_legacy_devices( 1589 &mut self, 1590 interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>, 1591 ) -> DeviceManagerResult<()> { 1592 // Add a RTC device 1593 let rtc_irq = self 1594 .address_manager 1595 .allocator 1596 .lock() 1597 .unwrap() 1598 .allocate_irq() 1599 .unwrap(); 1600 1601 let interrupt_group = interrupt_manager 1602 .create_group(LegacyIrqGroupConfig { 1603 irq: rtc_irq as InterruptIndex, 1604 }) 1605 .map_err(DeviceManagerError::CreateInterruptGroup)?; 1606 1607 let rtc_device = Arc::new(Mutex::new(devices::legacy::Rtc::new(interrupt_group))); 1608 1609 self.bus_devices 1610 .push(Arc::clone(&rtc_device) as Arc<Mutex<dyn BusDevice>>); 1611 1612 let addr = arch::layout::LEGACY_RTC_MAPPED_IO_START; 1613 1614 self.address_manager 1615 .mmio_bus 1616 .insert(rtc_device, addr.0, MMIO_LEN) 1617 .map_err(DeviceManagerError::BusError)?; 1618 1619 self.id_to_dev_info.insert( 1620 (DeviceType::Rtc, "rtc".to_string()), 1621 MmioDeviceInfo { 1622 addr: addr.0, 1623 len: MMIO_LEN, 1624 irq: rtc_irq, 1625 }, 1626 ); 1627 1628 // Add a GPIO device 1629 let id = String::from(GPIO_DEVICE_NAME); 1630 let gpio_irq = self 1631 .address_manager 1632 .allocator 1633 .lock() 1634 .unwrap() 1635 .allocate_irq() 1636 .unwrap(); 1637 1638 let interrupt_group = interrupt_manager 1639 .create_group(LegacyIrqGroupConfig { 1640 irq: gpio_irq as InterruptIndex, 1641 }) 1642 .map_err(DeviceManagerError::CreateInterruptGroup)?; 1643 1644 let gpio_device = Arc::new(Mutex::new(devices::legacy::Gpio::new( 1645 id.clone(), 1646 interrupt_group, 1647 ))); 1648 1649 self.bus_devices 1650 .push(Arc::clone(&gpio_device) as Arc<Mutex<dyn BusDevice>>); 1651 1652 let addr = arch::layout::LEGACY_GPIO_MAPPED_IO_START; 1653 1654 self.address_manager 1655 .mmio_bus 1656 .insert(gpio_device.clone(), addr.0, MMIO_LEN) 1657 .map_err(DeviceManagerError::BusError)?; 1658 1659 self.gpio_device = Some(gpio_device.clone()); 1660 1661 self.id_to_dev_info.insert( 1662 (DeviceType::Gpio, "gpio".to_string()), 1663 MmioDeviceInfo { 1664 addr: addr.0, 1665 len: MMIO_LEN, 1666 irq: gpio_irq, 1667 }, 1668 ); 1669 1670 self.device_tree 1671 .lock() 1672 .unwrap() 1673 .insert(id.clone(), device_node!(id, gpio_device)); 1674 1675 Ok(()) 1676 } 1677 1678 #[cfg(target_arch = "x86_64")] 1679 fn add_serial_device( 1680 &mut self, 1681 interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>, 1682 serial_writer: Option<Box<dyn io::Write + Send>>, 1683 ) -> DeviceManagerResult<Arc<Mutex<Serial>>> { 1684 // Serial is tied to IRQ #4 1685 let serial_irq = 4; 1686 1687 let id = String::from(SERIAL_DEVICE_NAME); 1688 1689 let interrupt_group = interrupt_manager 1690 .create_group(LegacyIrqGroupConfig { 1691 irq: serial_irq as InterruptIndex, 1692 }) 1693 .map_err(DeviceManagerError::CreateInterruptGroup)?; 1694 1695 let serial = Arc::new(Mutex::new(Serial::new( 1696 id.clone(), 1697 interrupt_group, 1698 serial_writer, 1699 ))); 1700 1701 self.bus_devices 1702 .push(Arc::clone(&serial) as Arc<Mutex<dyn BusDevice>>); 1703 1704 self.address_manager 1705 .allocator 1706 .lock() 1707 .unwrap() 1708 .allocate_io_addresses(Some(GuestAddress(0x3f8)), 0x8, None) 1709 .ok_or(DeviceManagerError::AllocateIoPort)?; 1710 1711 self.address_manager 1712 .io_bus 1713 .insert(serial.clone(), 0x3f8, 0x8) 1714 .map_err(DeviceManagerError::BusError)?; 1715 1716 // Fill the device tree with a new node. In case of restore, we 1717 // know there is nothing to do, so we can simply override the 1718 // existing entry. 1719 self.device_tree 1720 .lock() 1721 .unwrap() 1722 .insert(id.clone(), device_node!(id, serial)); 1723 1724 Ok(serial) 1725 } 1726 1727 #[cfg(target_arch = "aarch64")] 1728 fn add_serial_device( 1729 &mut self, 1730 interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>, 1731 serial_writer: Option<Box<dyn io::Write + Send>>, 1732 ) -> DeviceManagerResult<Arc<Mutex<Pl011>>> { 1733 let id = String::from(SERIAL_DEVICE_NAME); 1734 1735 let serial_irq = self 1736 .address_manager 1737 .allocator 1738 .lock() 1739 .unwrap() 1740 .allocate_irq() 1741 .unwrap(); 1742 1743 let interrupt_group = interrupt_manager 1744 .create_group(LegacyIrqGroupConfig { 1745 irq: serial_irq as InterruptIndex, 1746 }) 1747 .map_err(DeviceManagerError::CreateInterruptGroup)?; 1748 1749 let serial = Arc::new(Mutex::new(devices::legacy::Pl011::new( 1750 id.clone(), 1751 interrupt_group, 1752 serial_writer, 1753 self.timestamp, 1754 ))); 1755 1756 self.bus_devices 1757 .push(Arc::clone(&serial) as Arc<Mutex<dyn BusDevice>>); 1758 1759 let addr = arch::layout::LEGACY_SERIAL_MAPPED_IO_START; 1760 1761 self.address_manager 1762 .mmio_bus 1763 .insert(serial.clone(), addr.0, MMIO_LEN) 1764 .map_err(DeviceManagerError::BusError)?; 1765 1766 self.id_to_dev_info.insert( 1767 (DeviceType::Serial, DeviceType::Serial.to_string()), 1768 MmioDeviceInfo { 1769 addr: addr.0, 1770 len: MMIO_LEN, 1771 irq: serial_irq, 1772 }, 1773 ); 1774 1775 self.cmdline_additions 1776 .push(format!("earlycon=pl011,mmio,0x{:08x}", addr.0)); 1777 1778 // Fill the device tree with a new node. In case of restore, we 1779 // know there is nothing to do, so we can simply override the 1780 // existing entry. 1781 self.device_tree 1782 .lock() 1783 .unwrap() 1784 .insert(id.clone(), device_node!(id, serial)); 1785 1786 Ok(serial) 1787 } 1788 1789 fn modify_mode<F: FnOnce(&mut termios)>( 1790 &self, 1791 fd: RawFd, 1792 f: F, 1793 ) -> vmm_sys_util::errno::Result<()> { 1794 // SAFETY: safe because we check the return value of isatty. 1795 if unsafe { isatty(fd) } != 1 { 1796 return Ok(()); 1797 } 1798 1799 // SAFETY: The following pair are safe because termios gets totally overwritten by tcgetattr 1800 // and we check the return result. 1801 let mut termios: termios = unsafe { zeroed() }; 1802 let ret = unsafe { tcgetattr(fd, &mut termios as *mut _) }; 1803 if ret < 0 { 1804 return vmm_sys_util::errno::errno_result(); 1805 } 1806 f(&mut termios); 1807 // SAFETY: Safe because the syscall will only read the extent of termios and we check 1808 // the return result. 1809 let ret = unsafe { tcsetattr(fd, TCSANOW, &termios as *const _) }; 1810 if ret < 0 { 1811 return vmm_sys_util::errno::errno_result(); 1812 } 1813 1814 Ok(()) 1815 } 1816 1817 fn set_raw_mode(&self, f: &mut File) -> vmm_sys_util::errno::Result<()> { 1818 // SAFETY: FFI call. Variable t is guaranteed to be a valid termios from modify_mode. 1819 self.modify_mode(f.as_raw_fd(), |t| unsafe { cfmakeraw(t) }) 1820 } 1821 1822 fn listen_for_sigwinch_on_tty(&mut self, pty_main: File, pty_sub: File) -> std::io::Result<()> { 1823 let seccomp_filter = get_seccomp_filter( 1824 &self.seccomp_action, 1825 Thread::PtyForeground, 1826 self.hypervisor_type, 1827 ) 1828 .unwrap(); 1829 1830 match start_sigwinch_listener(seccomp_filter, pty_main, pty_sub) { 1831 Ok(pipe) => { 1832 self.console_resize_pipe = Some(Arc::new(pipe)); 1833 } 1834 Err(e) => { 1835 warn!("Ignoring error from setting up SIGWINCH listener: {}", e) 1836 } 1837 } 1838 1839 Ok(()) 1840 } 1841 1842 fn add_virtio_console_device( 1843 &mut self, 1844 virtio_devices: &mut Vec<MetaVirtioDevice>, 1845 console_pty: Option<PtyPair>, 1846 resize_pipe: Option<File>, 1847 ) -> DeviceManagerResult<Option<Arc<virtio_devices::ConsoleResizer>>> { 1848 let console_config = self.config.lock().unwrap().console.clone(); 1849 let endpoint = match console_config.mode { 1850 ConsoleOutputMode::File => { 1851 let file = File::create(console_config.file.as_ref().unwrap()) 1852 .map_err(DeviceManagerError::ConsoleOutputFileOpen)?; 1853 Endpoint::File(file) 1854 } 1855 ConsoleOutputMode::Pty => { 1856 if let Some(pty) = console_pty { 1857 self.config.lock().unwrap().console.file = Some(pty.path.clone()); 1858 let file = pty.main.try_clone().unwrap(); 1859 self.console_pty = Some(Arc::new(Mutex::new(pty))); 1860 self.console_resize_pipe = resize_pipe.map(Arc::new); 1861 Endpoint::PtyPair(file.try_clone().unwrap(), file) 1862 } else { 1863 let (main, mut sub, path) = 1864 create_pty().map_err(DeviceManagerError::ConsolePtyOpen)?; 1865 self.set_raw_mode(&mut sub) 1866 .map_err(DeviceManagerError::SetPtyRaw)?; 1867 self.config.lock().unwrap().console.file = Some(path.clone()); 1868 let file = main.try_clone().unwrap(); 1869 assert!(resize_pipe.is_none()); 1870 self.listen_for_sigwinch_on_tty(main.try_clone().unwrap(), sub) 1871 .unwrap(); 1872 self.console_pty = Some(Arc::new(Mutex::new(PtyPair { main, path }))); 1873 Endpoint::PtyPair(file.try_clone().unwrap(), file) 1874 } 1875 } 1876 ConsoleOutputMode::Tty => { 1877 // Duplicating the file descriptors like this is needed as otherwise 1878 // they will be closed on a reboot and the numbers reused 1879 1880 // SAFETY: FFI call to dup. Trivially safe. 1881 let stdout = unsafe { libc::dup(libc::STDOUT_FILENO) }; 1882 if stdout == -1 { 1883 return vmm_sys_util::errno::errno_result().map_err(DeviceManagerError::DupFd); 1884 } 1885 // SAFETY: stdout is valid and owned solely by us. 1886 let stdout = unsafe { File::from_raw_fd(stdout) }; 1887 1888 // If an interactive TTY then we can accept input 1889 // SAFETY: FFI call. Trivially safe. 1890 if unsafe { libc::isatty(libc::STDIN_FILENO) == 1 } { 1891 // SAFETY: FFI call to dup. Trivially safe. 1892 let stdin = unsafe { libc::dup(libc::STDIN_FILENO) }; 1893 if stdin == -1 { 1894 return vmm_sys_util::errno::errno_result() 1895 .map_err(DeviceManagerError::DupFd); 1896 } 1897 // SAFETY: stdin is valid and owned solely by us. 1898 let stdin = unsafe { File::from_raw_fd(stdin) }; 1899 1900 Endpoint::FilePair(stdout, stdin) 1901 } else { 1902 Endpoint::File(stdout) 1903 } 1904 } 1905 ConsoleOutputMode::Null => Endpoint::Null, 1906 ConsoleOutputMode::Off => return Ok(None), 1907 }; 1908 let id = String::from(CONSOLE_DEVICE_NAME); 1909 1910 let (virtio_console_device, console_resizer) = virtio_devices::Console::new( 1911 id.clone(), 1912 endpoint, 1913 self.console_resize_pipe 1914 .as_ref() 1915 .map(|p| p.try_clone().unwrap()), 1916 self.force_iommu | console_config.iommu, 1917 self.seccomp_action.clone(), 1918 self.exit_evt 1919 .try_clone() 1920 .map_err(DeviceManagerError::EventFd)?, 1921 ) 1922 .map_err(DeviceManagerError::CreateVirtioConsole)?; 1923 let virtio_console_device = Arc::new(Mutex::new(virtio_console_device)); 1924 virtio_devices.push(MetaVirtioDevice { 1925 virtio_device: Arc::clone(&virtio_console_device) 1926 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 1927 iommu: console_config.iommu, 1928 id: id.clone(), 1929 pci_segment: 0, 1930 dma_handler: None, 1931 }); 1932 1933 // Fill the device tree with a new node. In case of restore, we 1934 // know there is nothing to do, so we can simply override the 1935 // existing entry. 1936 self.device_tree 1937 .lock() 1938 .unwrap() 1939 .insert(id.clone(), device_node!(id, virtio_console_device)); 1940 1941 // Only provide a resizer (for SIGWINCH handling) if the console is attached to the TTY 1942 Ok(if matches!(console_config.mode, ConsoleOutputMode::Tty) { 1943 Some(console_resizer) 1944 } else { 1945 None 1946 }) 1947 } 1948 1949 fn add_console_device( 1950 &mut self, 1951 interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>, 1952 virtio_devices: &mut Vec<MetaVirtioDevice>, 1953 serial_pty: Option<PtyPair>, 1954 console_pty: Option<PtyPair>, 1955 console_resize_pipe: Option<File>, 1956 ) -> DeviceManagerResult<Arc<Console>> { 1957 let serial_config = self.config.lock().unwrap().serial.clone(); 1958 let serial_writer: Option<Box<dyn io::Write + Send>> = match serial_config.mode { 1959 ConsoleOutputMode::File => Some(Box::new( 1960 File::create(serial_config.file.as_ref().unwrap()) 1961 .map_err(DeviceManagerError::SerialOutputFileOpen)?, 1962 )), 1963 ConsoleOutputMode::Pty => { 1964 if let Some(pty) = serial_pty { 1965 self.config.lock().unwrap().serial.file = Some(pty.path.clone()); 1966 self.serial_pty = Some(Arc::new(Mutex::new(pty))); 1967 } else { 1968 let (main, mut sub, path) = 1969 create_pty().map_err(DeviceManagerError::SerialPtyOpen)?; 1970 self.set_raw_mode(&mut sub) 1971 .map_err(DeviceManagerError::SetPtyRaw)?; 1972 self.config.lock().unwrap().serial.file = Some(path.clone()); 1973 self.serial_pty = Some(Arc::new(Mutex::new(PtyPair { main, path }))); 1974 } 1975 None 1976 } 1977 ConsoleOutputMode::Tty => Some(Box::new(stdout())), 1978 ConsoleOutputMode::Off | ConsoleOutputMode::Null => None, 1979 }; 1980 if serial_config.mode != ConsoleOutputMode::Off { 1981 let serial = self.add_serial_device(interrupt_manager, serial_writer)?; 1982 self.serial_manager = match serial_config.mode { 1983 ConsoleOutputMode::Pty | ConsoleOutputMode::Tty => { 1984 let serial_manager = 1985 SerialManager::new(serial, self.serial_pty.clone(), serial_config.mode) 1986 .map_err(DeviceManagerError::CreateSerialManager)?; 1987 if let Some(mut serial_manager) = serial_manager { 1988 serial_manager 1989 .start_thread( 1990 self.exit_evt 1991 .try_clone() 1992 .map_err(DeviceManagerError::EventFd)?, 1993 ) 1994 .map_err(DeviceManagerError::SpawnSerialManager)?; 1995 Some(Arc::new(serial_manager)) 1996 } else { 1997 None 1998 } 1999 } 2000 _ => None, 2001 }; 2002 } 2003 2004 let console_resizer = 2005 self.add_virtio_console_device(virtio_devices, console_pty, console_resize_pipe)?; 2006 2007 Ok(Arc::new(Console { console_resizer })) 2008 } 2009 2010 fn make_virtio_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2011 let mut devices: Vec<MetaVirtioDevice> = Vec::new(); 2012 2013 // Create "standard" virtio devices (net/block/rng) 2014 devices.append(&mut self.make_virtio_block_devices()?); 2015 devices.append(&mut self.make_virtio_net_devices()?); 2016 devices.append(&mut self.make_virtio_rng_devices()?); 2017 2018 // Add virtio-fs if required 2019 devices.append(&mut self.make_virtio_fs_devices()?); 2020 2021 // Add virtio-pmem if required 2022 devices.append(&mut self.make_virtio_pmem_devices()?); 2023 2024 // Add virtio-vsock if required 2025 devices.append(&mut self.make_virtio_vsock_devices()?); 2026 2027 devices.append(&mut self.make_virtio_mem_devices()?); 2028 2029 // Add virtio-balloon if required 2030 devices.append(&mut self.make_virtio_balloon_devices()?); 2031 2032 // Add virtio-watchdog device 2033 devices.append(&mut self.make_virtio_watchdog_devices()?); 2034 2035 // Add vDPA devices if required 2036 devices.append(&mut self.make_vdpa_devices()?); 2037 2038 Ok(devices) 2039 } 2040 2041 // Cache whether io_uring is supported to avoid probing for very block device 2042 fn io_uring_is_supported(&mut self) -> bool { 2043 if let Some(supported) = self.io_uring_supported { 2044 return supported; 2045 } 2046 2047 let supported = block_io_uring_is_supported(); 2048 self.io_uring_supported = Some(supported); 2049 supported 2050 } 2051 2052 fn make_virtio_block_device( 2053 &mut self, 2054 disk_cfg: &mut DiskConfig, 2055 ) -> DeviceManagerResult<MetaVirtioDevice> { 2056 let id = if let Some(id) = &disk_cfg.id { 2057 id.clone() 2058 } else { 2059 let id = self.next_device_name(DISK_DEVICE_NAME_PREFIX)?; 2060 disk_cfg.id = Some(id.clone()); 2061 id 2062 }; 2063 2064 info!("Creating virtio-block device: {:?}", disk_cfg); 2065 2066 let (virtio_device, migratable_device) = if disk_cfg.vhost_user { 2067 let socket = disk_cfg.vhost_socket.as_ref().unwrap().clone(); 2068 let vu_cfg = VhostUserConfig { 2069 socket, 2070 num_queues: disk_cfg.num_queues, 2071 queue_size: disk_cfg.queue_size, 2072 }; 2073 let vhost_user_block = Arc::new(Mutex::new( 2074 match virtio_devices::vhost_user::Blk::new( 2075 id.clone(), 2076 vu_cfg, 2077 self.restoring, 2078 self.seccomp_action.clone(), 2079 self.exit_evt 2080 .try_clone() 2081 .map_err(DeviceManagerError::EventFd)?, 2082 self.force_iommu, 2083 ) { 2084 Ok(vub_device) => vub_device, 2085 Err(e) => { 2086 return Err(DeviceManagerError::CreateVhostUserBlk(e)); 2087 } 2088 }, 2089 )); 2090 2091 ( 2092 Arc::clone(&vhost_user_block) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2093 vhost_user_block as Arc<Mutex<dyn Migratable>>, 2094 ) 2095 } else { 2096 let mut options = OpenOptions::new(); 2097 options.read(true); 2098 options.write(!disk_cfg.readonly); 2099 if disk_cfg.direct { 2100 options.custom_flags(libc::O_DIRECT); 2101 } 2102 // Open block device path 2103 let mut file: File = options 2104 .open( 2105 disk_cfg 2106 .path 2107 .as_ref() 2108 .ok_or(DeviceManagerError::NoDiskPath)? 2109 .clone(), 2110 ) 2111 .map_err(DeviceManagerError::Disk)?; 2112 let image_type = 2113 detect_image_type(&mut file).map_err(DeviceManagerError::DetectImageType)?; 2114 2115 let image = match image_type { 2116 ImageType::FixedVhd => { 2117 // Use asynchronous backend relying on io_uring if the 2118 // syscalls are supported. 2119 if self.io_uring_is_supported() && !disk_cfg.disable_io_uring { 2120 info!("Using asynchronous fixed VHD disk file (io_uring)"); 2121 Box::new( 2122 FixedVhdDiskAsync::new(file) 2123 .map_err(DeviceManagerError::CreateFixedVhdDiskAsync)?, 2124 ) as Box<dyn DiskFile> 2125 } else { 2126 info!("Using synchronous fixed VHD disk file"); 2127 Box::new( 2128 FixedVhdDiskSync::new(file) 2129 .map_err(DeviceManagerError::CreateFixedVhdDiskSync)?, 2130 ) as Box<dyn DiskFile> 2131 } 2132 } 2133 ImageType::Raw => { 2134 // Use asynchronous backend relying on io_uring if the 2135 // syscalls are supported. 2136 if self.io_uring_is_supported() && !disk_cfg.disable_io_uring { 2137 info!("Using asynchronous RAW disk file (io_uring)"); 2138 Box::new(RawFileDisk::new(file)) as Box<dyn DiskFile> 2139 } else { 2140 info!("Using synchronous RAW disk file"); 2141 Box::new(RawFileDiskSync::new(file)) as Box<dyn DiskFile> 2142 } 2143 } 2144 ImageType::Qcow2 => { 2145 info!("Using synchronous QCOW disk file"); 2146 Box::new( 2147 QcowDiskSync::new(file, disk_cfg.direct) 2148 .map_err(DeviceManagerError::CreateQcowDiskSync)?, 2149 ) as Box<dyn DiskFile> 2150 } 2151 ImageType::Vhdx => { 2152 info!("Using synchronous VHDX disk file"); 2153 Box::new( 2154 VhdxDiskSync::new(file) 2155 .map_err(DeviceManagerError::CreateFixedVhdxDiskSync)?, 2156 ) as Box<dyn DiskFile> 2157 } 2158 }; 2159 2160 let virtio_block = Arc::new(Mutex::new( 2161 virtio_devices::Block::new( 2162 id.clone(), 2163 image, 2164 disk_cfg 2165 .path 2166 .as_ref() 2167 .ok_or(DeviceManagerError::NoDiskPath)? 2168 .clone(), 2169 disk_cfg.readonly, 2170 self.force_iommu | disk_cfg.iommu, 2171 disk_cfg.num_queues, 2172 disk_cfg.queue_size, 2173 self.seccomp_action.clone(), 2174 disk_cfg.rate_limiter_config, 2175 self.exit_evt 2176 .try_clone() 2177 .map_err(DeviceManagerError::EventFd)?, 2178 ) 2179 .map_err(DeviceManagerError::CreateVirtioBlock)?, 2180 )); 2181 2182 ( 2183 Arc::clone(&virtio_block) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2184 virtio_block as Arc<Mutex<dyn Migratable>>, 2185 ) 2186 }; 2187 2188 // Fill the device tree with a new node. In case of restore, we 2189 // know there is nothing to do, so we can simply override the 2190 // existing entry. 2191 self.device_tree 2192 .lock() 2193 .unwrap() 2194 .insert(id.clone(), device_node!(id, migratable_device)); 2195 2196 Ok(MetaVirtioDevice { 2197 virtio_device, 2198 iommu: disk_cfg.iommu, 2199 id, 2200 pci_segment: disk_cfg.pci_segment, 2201 dma_handler: None, 2202 }) 2203 } 2204 2205 fn make_virtio_block_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2206 let mut devices = Vec::new(); 2207 2208 let mut block_devices = self.config.lock().unwrap().disks.clone(); 2209 if let Some(disk_list_cfg) = &mut block_devices { 2210 for disk_cfg in disk_list_cfg.iter_mut() { 2211 devices.push(self.make_virtio_block_device(disk_cfg)?); 2212 } 2213 } 2214 self.config.lock().unwrap().disks = block_devices; 2215 2216 Ok(devices) 2217 } 2218 2219 fn make_virtio_net_device( 2220 &mut self, 2221 net_cfg: &mut NetConfig, 2222 ) -> DeviceManagerResult<MetaVirtioDevice> { 2223 let id = if let Some(id) = &net_cfg.id { 2224 id.clone() 2225 } else { 2226 let id = self.next_device_name(NET_DEVICE_NAME_PREFIX)?; 2227 net_cfg.id = Some(id.clone()); 2228 id 2229 }; 2230 info!("Creating virtio-net device: {:?}", net_cfg); 2231 2232 let (virtio_device, migratable_device) = if net_cfg.vhost_user { 2233 let socket = net_cfg.vhost_socket.as_ref().unwrap().clone(); 2234 let vu_cfg = VhostUserConfig { 2235 socket, 2236 num_queues: net_cfg.num_queues, 2237 queue_size: net_cfg.queue_size, 2238 }; 2239 let server = match net_cfg.vhost_mode { 2240 VhostMode::Client => false, 2241 VhostMode::Server => true, 2242 }; 2243 let vhost_user_net = Arc::new(Mutex::new( 2244 match virtio_devices::vhost_user::Net::new( 2245 id.clone(), 2246 net_cfg.mac, 2247 net_cfg.mtu, 2248 vu_cfg, 2249 server, 2250 self.seccomp_action.clone(), 2251 self.restoring, 2252 self.exit_evt 2253 .try_clone() 2254 .map_err(DeviceManagerError::EventFd)?, 2255 self.force_iommu, 2256 ) { 2257 Ok(vun_device) => vun_device, 2258 Err(e) => { 2259 return Err(DeviceManagerError::CreateVhostUserNet(e)); 2260 } 2261 }, 2262 )); 2263 2264 ( 2265 Arc::clone(&vhost_user_net) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2266 vhost_user_net as Arc<Mutex<dyn Migratable>>, 2267 ) 2268 } else { 2269 let virtio_net = if let Some(ref tap_if_name) = net_cfg.tap { 2270 Arc::new(Mutex::new( 2271 virtio_devices::Net::new( 2272 id.clone(), 2273 Some(tap_if_name), 2274 None, 2275 None, 2276 Some(net_cfg.mac), 2277 &mut net_cfg.host_mac, 2278 net_cfg.mtu, 2279 self.force_iommu | net_cfg.iommu, 2280 net_cfg.num_queues, 2281 net_cfg.queue_size, 2282 self.seccomp_action.clone(), 2283 net_cfg.rate_limiter_config, 2284 self.exit_evt 2285 .try_clone() 2286 .map_err(DeviceManagerError::EventFd)?, 2287 ) 2288 .map_err(DeviceManagerError::CreateVirtioNet)?, 2289 )) 2290 } else if let Some(fds) = &net_cfg.fds { 2291 Arc::new(Mutex::new( 2292 virtio_devices::Net::from_tap_fds( 2293 id.clone(), 2294 fds, 2295 Some(net_cfg.mac), 2296 net_cfg.mtu, 2297 self.force_iommu | net_cfg.iommu, 2298 net_cfg.queue_size, 2299 self.seccomp_action.clone(), 2300 net_cfg.rate_limiter_config, 2301 self.exit_evt 2302 .try_clone() 2303 .map_err(DeviceManagerError::EventFd)?, 2304 ) 2305 .map_err(DeviceManagerError::CreateVirtioNet)?, 2306 )) 2307 } else { 2308 Arc::new(Mutex::new( 2309 virtio_devices::Net::new( 2310 id.clone(), 2311 None, 2312 Some(net_cfg.ip), 2313 Some(net_cfg.mask), 2314 Some(net_cfg.mac), 2315 &mut net_cfg.host_mac, 2316 net_cfg.mtu, 2317 self.force_iommu | net_cfg.iommu, 2318 net_cfg.num_queues, 2319 net_cfg.queue_size, 2320 self.seccomp_action.clone(), 2321 net_cfg.rate_limiter_config, 2322 self.exit_evt 2323 .try_clone() 2324 .map_err(DeviceManagerError::EventFd)?, 2325 ) 2326 .map_err(DeviceManagerError::CreateVirtioNet)?, 2327 )) 2328 }; 2329 2330 ( 2331 Arc::clone(&virtio_net) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2332 virtio_net as Arc<Mutex<dyn Migratable>>, 2333 ) 2334 }; 2335 2336 // Fill the device tree with a new node. In case of restore, we 2337 // know there is nothing to do, so we can simply override the 2338 // existing entry. 2339 self.device_tree 2340 .lock() 2341 .unwrap() 2342 .insert(id.clone(), device_node!(id, migratable_device)); 2343 2344 Ok(MetaVirtioDevice { 2345 virtio_device, 2346 iommu: net_cfg.iommu, 2347 id, 2348 pci_segment: net_cfg.pci_segment, 2349 dma_handler: None, 2350 }) 2351 } 2352 2353 /// Add virto-net and vhost-user-net devices 2354 fn make_virtio_net_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2355 let mut devices = Vec::new(); 2356 let mut net_devices = self.config.lock().unwrap().net.clone(); 2357 if let Some(net_list_cfg) = &mut net_devices { 2358 for net_cfg in net_list_cfg.iter_mut() { 2359 devices.push(self.make_virtio_net_device(net_cfg)?); 2360 } 2361 } 2362 self.config.lock().unwrap().net = net_devices; 2363 2364 Ok(devices) 2365 } 2366 2367 fn make_virtio_rng_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2368 let mut devices = Vec::new(); 2369 2370 // Add virtio-rng if required 2371 let rng_config = self.config.lock().unwrap().rng.clone(); 2372 if let Some(rng_path) = rng_config.src.to_str() { 2373 info!("Creating virtio-rng device: {:?}", rng_config); 2374 let id = String::from(RNG_DEVICE_NAME); 2375 2376 let virtio_rng_device = Arc::new(Mutex::new( 2377 virtio_devices::Rng::new( 2378 id.clone(), 2379 rng_path, 2380 self.force_iommu | rng_config.iommu, 2381 self.seccomp_action.clone(), 2382 self.exit_evt 2383 .try_clone() 2384 .map_err(DeviceManagerError::EventFd)?, 2385 ) 2386 .map_err(DeviceManagerError::CreateVirtioRng)?, 2387 )); 2388 devices.push(MetaVirtioDevice { 2389 virtio_device: Arc::clone(&virtio_rng_device) 2390 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2391 iommu: rng_config.iommu, 2392 id: id.clone(), 2393 pci_segment: 0, 2394 dma_handler: None, 2395 }); 2396 2397 // Fill the device tree with a new node. In case of restore, we 2398 // know there is nothing to do, so we can simply override the 2399 // existing entry. 2400 self.device_tree 2401 .lock() 2402 .unwrap() 2403 .insert(id.clone(), device_node!(id, virtio_rng_device)); 2404 } 2405 2406 Ok(devices) 2407 } 2408 2409 fn make_virtio_fs_device( 2410 &mut self, 2411 fs_cfg: &mut FsConfig, 2412 ) -> DeviceManagerResult<MetaVirtioDevice> { 2413 let id = if let Some(id) = &fs_cfg.id { 2414 id.clone() 2415 } else { 2416 let id = self.next_device_name(FS_DEVICE_NAME_PREFIX)?; 2417 fs_cfg.id = Some(id.clone()); 2418 id 2419 }; 2420 2421 info!("Creating virtio-fs device: {:?}", fs_cfg); 2422 2423 let mut node = device_node!(id); 2424 2425 if let Some(fs_socket) = fs_cfg.socket.to_str() { 2426 let virtio_fs_device = Arc::new(Mutex::new( 2427 virtio_devices::vhost_user::Fs::new( 2428 id.clone(), 2429 fs_socket, 2430 &fs_cfg.tag, 2431 fs_cfg.num_queues, 2432 fs_cfg.queue_size, 2433 None, 2434 self.seccomp_action.clone(), 2435 self.restoring, 2436 self.exit_evt 2437 .try_clone() 2438 .map_err(DeviceManagerError::EventFd)?, 2439 self.force_iommu, 2440 ) 2441 .map_err(DeviceManagerError::CreateVirtioFs)?, 2442 )); 2443 2444 // Update the device tree with the migratable device. 2445 node.migratable = Some(Arc::clone(&virtio_fs_device) as Arc<Mutex<dyn Migratable>>); 2446 self.device_tree.lock().unwrap().insert(id.clone(), node); 2447 2448 Ok(MetaVirtioDevice { 2449 virtio_device: Arc::clone(&virtio_fs_device) 2450 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2451 iommu: false, 2452 id, 2453 pci_segment: fs_cfg.pci_segment, 2454 dma_handler: None, 2455 }) 2456 } else { 2457 Err(DeviceManagerError::NoVirtioFsSock) 2458 } 2459 } 2460 2461 fn make_virtio_fs_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2462 let mut devices = Vec::new(); 2463 2464 let mut fs_devices = self.config.lock().unwrap().fs.clone(); 2465 if let Some(fs_list_cfg) = &mut fs_devices { 2466 for fs_cfg in fs_list_cfg.iter_mut() { 2467 devices.push(self.make_virtio_fs_device(fs_cfg)?); 2468 } 2469 } 2470 self.config.lock().unwrap().fs = fs_devices; 2471 2472 Ok(devices) 2473 } 2474 2475 fn make_virtio_pmem_device( 2476 &mut self, 2477 pmem_cfg: &mut PmemConfig, 2478 ) -> DeviceManagerResult<MetaVirtioDevice> { 2479 let id = if let Some(id) = &pmem_cfg.id { 2480 id.clone() 2481 } else { 2482 let id = self.next_device_name(PMEM_DEVICE_NAME_PREFIX)?; 2483 pmem_cfg.id = Some(id.clone()); 2484 id 2485 }; 2486 2487 info!("Creating virtio-pmem device: {:?}", pmem_cfg); 2488 2489 let mut node = device_node!(id); 2490 2491 // Look for the id in the device tree. If it can be found, that means 2492 // the device is being restored, otherwise it's created from scratch. 2493 let region_range = if let Some(node) = self.device_tree.lock().unwrap().get(&id) { 2494 info!("Restoring virtio-pmem {} resources", id); 2495 2496 let mut region_range: Option<(u64, u64)> = None; 2497 for resource in node.resources.iter() { 2498 match resource { 2499 Resource::MmioAddressRange { base, size } => { 2500 if region_range.is_some() { 2501 return Err(DeviceManagerError::ResourceAlreadyExists); 2502 } 2503 2504 region_range = Some((*base, *size)); 2505 } 2506 _ => { 2507 error!("Unexpected resource {:?} for {}", resource, id); 2508 } 2509 } 2510 } 2511 2512 if region_range.is_none() { 2513 return Err(DeviceManagerError::MissingVirtioPmemResources); 2514 } 2515 2516 region_range 2517 } else { 2518 None 2519 }; 2520 2521 let (custom_flags, set_len) = if pmem_cfg.file.is_dir() { 2522 if pmem_cfg.size.is_none() { 2523 return Err(DeviceManagerError::PmemWithDirectorySizeMissing); 2524 } 2525 (O_TMPFILE, true) 2526 } else { 2527 (0, false) 2528 }; 2529 2530 let mut file = OpenOptions::new() 2531 .read(true) 2532 .write(!pmem_cfg.discard_writes) 2533 .custom_flags(custom_flags) 2534 .open(&pmem_cfg.file) 2535 .map_err(DeviceManagerError::PmemFileOpen)?; 2536 2537 let size = if let Some(size) = pmem_cfg.size { 2538 if set_len { 2539 file.set_len(size) 2540 .map_err(DeviceManagerError::PmemFileSetLen)?; 2541 } 2542 size 2543 } else { 2544 file.seek(SeekFrom::End(0)) 2545 .map_err(DeviceManagerError::PmemFileSetLen)? 2546 }; 2547 2548 if size % 0x20_0000 != 0 { 2549 return Err(DeviceManagerError::PmemSizeNotAligned); 2550 } 2551 2552 let (region_base, region_size) = if let Some((base, size)) = region_range { 2553 // The memory needs to be 2MiB aligned in order to support 2554 // hugepages. 2555 self.pci_segments[pmem_cfg.pci_segment as usize] 2556 .allocator 2557 .lock() 2558 .unwrap() 2559 .allocate( 2560 Some(GuestAddress(base)), 2561 size as GuestUsize, 2562 Some(0x0020_0000), 2563 ) 2564 .ok_or(DeviceManagerError::PmemRangeAllocation)?; 2565 2566 (base, size) 2567 } else { 2568 // The memory needs to be 2MiB aligned in order to support 2569 // hugepages. 2570 let base = self.pci_segments[pmem_cfg.pci_segment as usize] 2571 .allocator 2572 .lock() 2573 .unwrap() 2574 .allocate(None, size as GuestUsize, Some(0x0020_0000)) 2575 .ok_or(DeviceManagerError::PmemRangeAllocation)?; 2576 2577 (base.raw_value(), size) 2578 }; 2579 2580 let cloned_file = file.try_clone().map_err(DeviceManagerError::CloneFile)?; 2581 let mmap_region = MmapRegion::build( 2582 Some(FileOffset::new(cloned_file, 0)), 2583 region_size as usize, 2584 PROT_READ | PROT_WRITE, 2585 MAP_NORESERVE 2586 | if pmem_cfg.discard_writes { 2587 MAP_PRIVATE 2588 } else { 2589 MAP_SHARED 2590 }, 2591 ) 2592 .map_err(DeviceManagerError::NewMmapRegion)?; 2593 let host_addr: u64 = mmap_region.as_ptr() as u64; 2594 2595 let mem_slot = self 2596 .memory_manager 2597 .lock() 2598 .unwrap() 2599 .create_userspace_mapping(region_base, region_size, host_addr, false, false, false) 2600 .map_err(DeviceManagerError::MemoryManager)?; 2601 2602 let mapping = virtio_devices::UserspaceMapping { 2603 host_addr, 2604 mem_slot, 2605 addr: GuestAddress(region_base), 2606 len: region_size, 2607 mergeable: false, 2608 }; 2609 2610 let virtio_pmem_device = Arc::new(Mutex::new( 2611 virtio_devices::Pmem::new( 2612 id.clone(), 2613 file, 2614 GuestAddress(region_base), 2615 mapping, 2616 mmap_region, 2617 self.force_iommu | pmem_cfg.iommu, 2618 self.seccomp_action.clone(), 2619 self.exit_evt 2620 .try_clone() 2621 .map_err(DeviceManagerError::EventFd)?, 2622 ) 2623 .map_err(DeviceManagerError::CreateVirtioPmem)?, 2624 )); 2625 2626 // Update the device tree with correct resource information and with 2627 // the migratable device. 2628 node.resources.push(Resource::MmioAddressRange { 2629 base: region_base, 2630 size: region_size, 2631 }); 2632 node.migratable = Some(Arc::clone(&virtio_pmem_device) as Arc<Mutex<dyn Migratable>>); 2633 self.device_tree.lock().unwrap().insert(id.clone(), node); 2634 2635 Ok(MetaVirtioDevice { 2636 virtio_device: Arc::clone(&virtio_pmem_device) 2637 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2638 iommu: pmem_cfg.iommu, 2639 id, 2640 pci_segment: pmem_cfg.pci_segment, 2641 dma_handler: None, 2642 }) 2643 } 2644 2645 fn make_virtio_pmem_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2646 let mut devices = Vec::new(); 2647 // Add virtio-pmem if required 2648 let mut pmem_devices = self.config.lock().unwrap().pmem.clone(); 2649 if let Some(pmem_list_cfg) = &mut pmem_devices { 2650 for pmem_cfg in pmem_list_cfg.iter_mut() { 2651 devices.push(self.make_virtio_pmem_device(pmem_cfg)?); 2652 } 2653 } 2654 self.config.lock().unwrap().pmem = pmem_devices; 2655 2656 Ok(devices) 2657 } 2658 2659 fn make_virtio_vsock_device( 2660 &mut self, 2661 vsock_cfg: &mut VsockConfig, 2662 ) -> DeviceManagerResult<MetaVirtioDevice> { 2663 let id = if let Some(id) = &vsock_cfg.id { 2664 id.clone() 2665 } else { 2666 let id = self.next_device_name(VSOCK_DEVICE_NAME_PREFIX)?; 2667 vsock_cfg.id = Some(id.clone()); 2668 id 2669 }; 2670 2671 info!("Creating virtio-vsock device: {:?}", vsock_cfg); 2672 2673 let socket_path = vsock_cfg 2674 .socket 2675 .to_str() 2676 .ok_or(DeviceManagerError::CreateVsockConvertPath)?; 2677 let backend = 2678 virtio_devices::vsock::VsockUnixBackend::new(vsock_cfg.cid, socket_path.to_string()) 2679 .map_err(DeviceManagerError::CreateVsockBackend)?; 2680 2681 let vsock_device = Arc::new(Mutex::new( 2682 virtio_devices::Vsock::new( 2683 id.clone(), 2684 vsock_cfg.cid, 2685 vsock_cfg.socket.clone(), 2686 backend, 2687 self.force_iommu | vsock_cfg.iommu, 2688 self.seccomp_action.clone(), 2689 self.exit_evt 2690 .try_clone() 2691 .map_err(DeviceManagerError::EventFd)?, 2692 ) 2693 .map_err(DeviceManagerError::CreateVirtioVsock)?, 2694 )); 2695 2696 // Fill the device tree with a new node. In case of restore, we 2697 // know there is nothing to do, so we can simply override the 2698 // existing entry. 2699 self.device_tree 2700 .lock() 2701 .unwrap() 2702 .insert(id.clone(), device_node!(id, vsock_device)); 2703 2704 Ok(MetaVirtioDevice { 2705 virtio_device: Arc::clone(&vsock_device) 2706 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2707 iommu: vsock_cfg.iommu, 2708 id, 2709 pci_segment: vsock_cfg.pci_segment, 2710 dma_handler: None, 2711 }) 2712 } 2713 2714 fn make_virtio_vsock_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2715 let mut devices = Vec::new(); 2716 2717 let mut vsock = self.config.lock().unwrap().vsock.clone(); 2718 if let Some(ref mut vsock_cfg) = &mut vsock { 2719 devices.push(self.make_virtio_vsock_device(vsock_cfg)?); 2720 } 2721 self.config.lock().unwrap().vsock = vsock; 2722 2723 Ok(devices) 2724 } 2725 2726 fn make_virtio_mem_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2727 let mut devices = Vec::new(); 2728 2729 let mm = self.memory_manager.clone(); 2730 let mut mm = mm.lock().unwrap(); 2731 for (memory_zone_id, memory_zone) in mm.memory_zones_mut().iter_mut() { 2732 if let Some(virtio_mem_zone) = memory_zone.virtio_mem_zone_mut() { 2733 info!("Creating virtio-mem device: id = {}", memory_zone_id); 2734 2735 let node_id = numa_node_id_from_memory_zone_id(&self.numa_nodes, memory_zone_id) 2736 .map(|i| i as u16); 2737 2738 let virtio_mem_device = Arc::new(Mutex::new( 2739 virtio_devices::Mem::new( 2740 memory_zone_id.clone(), 2741 virtio_mem_zone.region(), 2742 self.seccomp_action.clone(), 2743 node_id, 2744 virtio_mem_zone.hotplugged_size(), 2745 virtio_mem_zone.hugepages(), 2746 self.exit_evt 2747 .try_clone() 2748 .map_err(DeviceManagerError::EventFd)?, 2749 virtio_mem_zone.blocks_state().clone(), 2750 ) 2751 .map_err(DeviceManagerError::CreateVirtioMem)?, 2752 )); 2753 2754 // Update the virtio-mem zone so that it has a handle onto the 2755 // virtio-mem device, which will be used for triggering a resize 2756 // if needed. 2757 virtio_mem_zone.set_virtio_device(Arc::clone(&virtio_mem_device)); 2758 2759 self.virtio_mem_devices.push(Arc::clone(&virtio_mem_device)); 2760 2761 devices.push(MetaVirtioDevice { 2762 virtio_device: Arc::clone(&virtio_mem_device) 2763 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2764 iommu: false, 2765 id: memory_zone_id.clone(), 2766 pci_segment: 0, 2767 dma_handler: None, 2768 }); 2769 2770 // Fill the device tree with a new node. In case of restore, we 2771 // know there is nothing to do, so we can simply override the 2772 // existing entry. 2773 self.device_tree.lock().unwrap().insert( 2774 memory_zone_id.clone(), 2775 device_node!(memory_zone_id, virtio_mem_device), 2776 ); 2777 } 2778 } 2779 2780 Ok(devices) 2781 } 2782 2783 fn make_virtio_balloon_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2784 let mut devices = Vec::new(); 2785 2786 if let Some(balloon_config) = &self.config.lock().unwrap().balloon { 2787 let id = String::from(BALLOON_DEVICE_NAME); 2788 info!("Creating virtio-balloon device: id = {}", id); 2789 2790 let virtio_balloon_device = Arc::new(Mutex::new( 2791 virtio_devices::Balloon::new( 2792 id.clone(), 2793 balloon_config.size, 2794 balloon_config.deflate_on_oom, 2795 balloon_config.free_page_reporting, 2796 self.seccomp_action.clone(), 2797 self.exit_evt 2798 .try_clone() 2799 .map_err(DeviceManagerError::EventFd)?, 2800 ) 2801 .map_err(DeviceManagerError::CreateVirtioBalloon)?, 2802 )); 2803 2804 self.balloon = Some(virtio_balloon_device.clone()); 2805 2806 devices.push(MetaVirtioDevice { 2807 virtio_device: Arc::clone(&virtio_balloon_device) 2808 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2809 iommu: false, 2810 id: id.clone(), 2811 pci_segment: 0, 2812 dma_handler: None, 2813 }); 2814 2815 self.device_tree 2816 .lock() 2817 .unwrap() 2818 .insert(id.clone(), device_node!(id, virtio_balloon_device)); 2819 } 2820 2821 Ok(devices) 2822 } 2823 2824 fn make_virtio_watchdog_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2825 let mut devices = Vec::new(); 2826 2827 if !self.config.lock().unwrap().watchdog { 2828 return Ok(devices); 2829 } 2830 2831 let id = String::from(WATCHDOG_DEVICE_NAME); 2832 info!("Creating virtio-watchdog device: id = {}", id); 2833 2834 let virtio_watchdog_device = Arc::new(Mutex::new( 2835 virtio_devices::Watchdog::new( 2836 id.clone(), 2837 self.reset_evt.try_clone().unwrap(), 2838 self.seccomp_action.clone(), 2839 self.exit_evt 2840 .try_clone() 2841 .map_err(DeviceManagerError::EventFd)?, 2842 ) 2843 .map_err(DeviceManagerError::CreateVirtioWatchdog)?, 2844 )); 2845 devices.push(MetaVirtioDevice { 2846 virtio_device: Arc::clone(&virtio_watchdog_device) 2847 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2848 iommu: false, 2849 id: id.clone(), 2850 pci_segment: 0, 2851 dma_handler: None, 2852 }); 2853 2854 self.device_tree 2855 .lock() 2856 .unwrap() 2857 .insert(id.clone(), device_node!(id, virtio_watchdog_device)); 2858 2859 Ok(devices) 2860 } 2861 2862 fn make_vdpa_device( 2863 &mut self, 2864 vdpa_cfg: &mut VdpaConfig, 2865 ) -> DeviceManagerResult<MetaVirtioDevice> { 2866 let id = if let Some(id) = &vdpa_cfg.id { 2867 id.clone() 2868 } else { 2869 let id = self.next_device_name(VDPA_DEVICE_NAME_PREFIX)?; 2870 vdpa_cfg.id = Some(id.clone()); 2871 id 2872 }; 2873 2874 info!("Creating vDPA device: {:?}", vdpa_cfg); 2875 2876 let device_path = vdpa_cfg 2877 .path 2878 .to_str() 2879 .ok_or(DeviceManagerError::CreateVdpaConvertPath)?; 2880 2881 let vdpa_device = Arc::new(Mutex::new( 2882 virtio_devices::Vdpa::new( 2883 id.clone(), 2884 device_path, 2885 self.memory_manager.lock().unwrap().guest_memory(), 2886 vdpa_cfg.num_queues as u16, 2887 ) 2888 .map_err(DeviceManagerError::CreateVdpa)?, 2889 )); 2890 2891 // Create the DMA handler that is required by the vDPA device 2892 let vdpa_mapping = Arc::new(VdpaDmaMapping::new( 2893 Arc::clone(&vdpa_device), 2894 Arc::new(self.memory_manager.lock().unwrap().guest_memory()), 2895 )); 2896 2897 self.device_tree 2898 .lock() 2899 .unwrap() 2900 .insert(id.clone(), device_node!(id)); 2901 2902 Ok(MetaVirtioDevice { 2903 virtio_device: vdpa_device as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2904 iommu: vdpa_cfg.iommu, 2905 id, 2906 pci_segment: vdpa_cfg.pci_segment, 2907 dma_handler: Some(vdpa_mapping), 2908 }) 2909 } 2910 2911 fn make_vdpa_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2912 let mut devices = Vec::new(); 2913 // Add vdpa if required 2914 let mut vdpa_devices = self.config.lock().unwrap().vdpa.clone(); 2915 if let Some(vdpa_list_cfg) = &mut vdpa_devices { 2916 for vdpa_cfg in vdpa_list_cfg.iter_mut() { 2917 devices.push(self.make_vdpa_device(vdpa_cfg)?); 2918 } 2919 } 2920 self.config.lock().unwrap().vdpa = vdpa_devices; 2921 2922 Ok(devices) 2923 } 2924 2925 fn next_device_name(&mut self, prefix: &str) -> DeviceManagerResult<String> { 2926 let start_id = self.device_id_cnt; 2927 loop { 2928 // Generate the temporary name. 2929 let name = format!("{}{}", prefix, self.device_id_cnt); 2930 // Increment the counter. 2931 self.device_id_cnt += Wrapping(1); 2932 // Check if the name is already in use. 2933 if !self.boot_id_list.contains(&name) 2934 && !self.device_tree.lock().unwrap().contains_key(&name) 2935 { 2936 return Ok(name); 2937 } 2938 2939 if self.device_id_cnt == start_id { 2940 // We went through a full loop and there's nothing else we can 2941 // do. 2942 break; 2943 } 2944 } 2945 Err(DeviceManagerError::NoAvailableDeviceName) 2946 } 2947 2948 fn add_passthrough_device( 2949 &mut self, 2950 device_cfg: &mut DeviceConfig, 2951 ) -> DeviceManagerResult<(PciBdf, String)> { 2952 // If the passthrough device has not been created yet, it is created 2953 // here and stored in the DeviceManager structure for future needs. 2954 if self.passthrough_device.is_none() { 2955 self.passthrough_device = Some( 2956 self.address_manager 2957 .vm 2958 .create_passthrough_device() 2959 .map_err(|e| DeviceManagerError::CreatePassthroughDevice(e.into()))?, 2960 ); 2961 } 2962 2963 self.add_vfio_device(device_cfg) 2964 } 2965 2966 fn create_vfio_container(&self) -> DeviceManagerResult<Arc<VfioContainer>> { 2967 let passthrough_device = self 2968 .passthrough_device 2969 .as_ref() 2970 .ok_or(DeviceManagerError::NoDevicePassthroughSupport)?; 2971 2972 let dup = passthrough_device 2973 .try_clone() 2974 .map_err(DeviceManagerError::VfioCreate)?; 2975 2976 Ok(Arc::new( 2977 VfioContainer::new(Some(Arc::new(dup))).map_err(DeviceManagerError::VfioCreate)?, 2978 )) 2979 } 2980 2981 fn add_vfio_device( 2982 &mut self, 2983 device_cfg: &mut DeviceConfig, 2984 ) -> DeviceManagerResult<(PciBdf, String)> { 2985 let vfio_name = if let Some(id) = &device_cfg.id { 2986 id.clone() 2987 } else { 2988 let id = self.next_device_name(VFIO_DEVICE_NAME_PREFIX)?; 2989 device_cfg.id = Some(id.clone()); 2990 id 2991 }; 2992 2993 let (pci_segment_id, pci_device_bdf, resources) = 2994 self.pci_resources(&vfio_name, device_cfg.pci_segment)?; 2995 2996 let mut needs_dma_mapping = false; 2997 2998 // Here we create a new VFIO container for two reasons. Either this is 2999 // the first VFIO device, meaning we need a new VFIO container, which 3000 // will be shared with other VFIO devices. Or the new VFIO device is 3001 // attached to a vIOMMU, meaning we must create a dedicated VFIO 3002 // container. In the vIOMMU use case, we can't let all devices under 3003 // the same VFIO container since we couldn't map/unmap memory for each 3004 // device. That's simply because the map/unmap operations happen at the 3005 // VFIO container level. 3006 let vfio_container = if device_cfg.iommu { 3007 let vfio_container = self.create_vfio_container()?; 3008 3009 let vfio_mapping = Arc::new(VfioDmaMapping::new( 3010 Arc::clone(&vfio_container), 3011 Arc::new(self.memory_manager.lock().unwrap().guest_memory()), 3012 )); 3013 3014 if let Some(iommu) = &self.iommu_device { 3015 iommu 3016 .lock() 3017 .unwrap() 3018 .add_external_mapping(pci_device_bdf.into(), vfio_mapping); 3019 } else { 3020 return Err(DeviceManagerError::MissingVirtualIommu); 3021 } 3022 3023 vfio_container 3024 } else if let Some(vfio_container) = &self.vfio_container { 3025 Arc::clone(vfio_container) 3026 } else { 3027 let vfio_container = self.create_vfio_container()?; 3028 needs_dma_mapping = true; 3029 self.vfio_container = Some(Arc::clone(&vfio_container)); 3030 3031 vfio_container 3032 }; 3033 3034 let vfio_device = VfioDevice::new(&device_cfg.path, Arc::clone(&vfio_container)) 3035 .map_err(DeviceManagerError::VfioCreate)?; 3036 3037 if needs_dma_mapping { 3038 // Register DMA mapping in IOMMU. 3039 // Do not register virtio-mem regions, as they are handled directly by 3040 // virtio-mem device itself. 3041 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() { 3042 for region in zone.regions() { 3043 vfio_container 3044 .vfio_dma_map( 3045 region.start_addr().raw_value(), 3046 region.len() as u64, 3047 region.as_ptr() as u64, 3048 ) 3049 .map_err(DeviceManagerError::VfioDmaMap)?; 3050 } 3051 } 3052 3053 let vfio_mapping = Arc::new(VfioDmaMapping::new( 3054 Arc::clone(&vfio_container), 3055 Arc::new(self.memory_manager.lock().unwrap().guest_memory()), 3056 )); 3057 3058 for virtio_mem_device in self.virtio_mem_devices.iter() { 3059 virtio_mem_device 3060 .lock() 3061 .unwrap() 3062 .add_dma_mapping_handler( 3063 VirtioMemMappingSource::Container, 3064 vfio_mapping.clone(), 3065 ) 3066 .map_err(DeviceManagerError::AddDmaMappingHandlerVirtioMem)?; 3067 } 3068 } 3069 3070 let legacy_interrupt_group = 3071 if let Some(legacy_interrupt_manager) = &self.legacy_interrupt_manager { 3072 Some( 3073 legacy_interrupt_manager 3074 .create_group(LegacyIrqGroupConfig { 3075 irq: self.pci_segments[pci_segment_id as usize].pci_irq_slots 3076 [pci_device_bdf.device() as usize] 3077 as InterruptIndex, 3078 }) 3079 .map_err(DeviceManagerError::CreateInterruptGroup)?, 3080 ) 3081 } else { 3082 None 3083 }; 3084 3085 let memory_manager = self.memory_manager.clone(); 3086 3087 let vfio_pci_device = VfioPciDevice::new( 3088 vfio_name.clone(), 3089 &self.address_manager.vm, 3090 vfio_device, 3091 vfio_container, 3092 self.msi_interrupt_manager.clone(), 3093 legacy_interrupt_group, 3094 device_cfg.iommu, 3095 pci_device_bdf, 3096 self.restoring, 3097 Arc::new(move || memory_manager.lock().unwrap().allocate_memory_slot()), 3098 ) 3099 .map_err(DeviceManagerError::VfioPciCreate)?; 3100 3101 let vfio_pci_device = Arc::new(Mutex::new(vfio_pci_device)); 3102 3103 let new_resources = self.add_pci_device( 3104 vfio_pci_device.clone(), 3105 vfio_pci_device.clone(), 3106 pci_segment_id, 3107 pci_device_bdf, 3108 resources, 3109 )?; 3110 3111 // When restoring a VM, the restore codepath will take care of mapping 3112 // the MMIO regions based on the information from the snapshot. 3113 if !self.restoring { 3114 vfio_pci_device 3115 .lock() 3116 .unwrap() 3117 .map_mmio_regions() 3118 .map_err(DeviceManagerError::VfioMapRegion)?; 3119 } 3120 3121 let mut node = device_node!(vfio_name, vfio_pci_device); 3122 3123 // Update the device tree with correct resource information. 3124 node.resources = new_resources; 3125 node.pci_bdf = Some(pci_device_bdf); 3126 node.pci_device_handle = Some(PciDeviceHandle::Vfio(vfio_pci_device)); 3127 3128 self.device_tree 3129 .lock() 3130 .unwrap() 3131 .insert(vfio_name.clone(), node); 3132 3133 Ok((pci_device_bdf, vfio_name)) 3134 } 3135 3136 fn add_pci_device( 3137 &mut self, 3138 bus_device: Arc<Mutex<dyn BusDevice>>, 3139 pci_device: Arc<Mutex<dyn PciDevice>>, 3140 segment_id: u16, 3141 bdf: PciBdf, 3142 resources: Option<Vec<Resource>>, 3143 ) -> DeviceManagerResult<Vec<Resource>> { 3144 let bars = pci_device 3145 .lock() 3146 .unwrap() 3147 .allocate_bars( 3148 &self.address_manager.allocator, 3149 &mut self.pci_segments[segment_id as usize] 3150 .allocator 3151 .lock() 3152 .unwrap(), 3153 resources, 3154 ) 3155 .map_err(DeviceManagerError::AllocateBars)?; 3156 3157 let mut pci_bus = self.pci_segments[segment_id as usize] 3158 .pci_bus 3159 .lock() 3160 .unwrap(); 3161 3162 pci_bus 3163 .add_device(bdf.device() as u32, pci_device) 3164 .map_err(DeviceManagerError::AddPciDevice)?; 3165 3166 self.bus_devices.push(Arc::clone(&bus_device)); 3167 3168 pci_bus 3169 .register_mapping( 3170 bus_device, 3171 #[cfg(target_arch = "x86_64")] 3172 self.address_manager.io_bus.as_ref(), 3173 self.address_manager.mmio_bus.as_ref(), 3174 bars.clone(), 3175 ) 3176 .map_err(DeviceManagerError::AddPciDevice)?; 3177 3178 let mut new_resources = Vec::new(); 3179 for bar in bars { 3180 new_resources.push(Resource::PciBar { 3181 index: bar.idx(), 3182 base: bar.addr(), 3183 size: bar.size(), 3184 type_: bar.region_type().into(), 3185 prefetchable: bar.prefetchable().into(), 3186 }); 3187 } 3188 3189 Ok(new_resources) 3190 } 3191 3192 fn add_vfio_devices(&mut self) -> DeviceManagerResult<Vec<PciBdf>> { 3193 let mut iommu_attached_device_ids = Vec::new(); 3194 let mut devices = self.config.lock().unwrap().devices.clone(); 3195 3196 if let Some(device_list_cfg) = &mut devices { 3197 for device_cfg in device_list_cfg.iter_mut() { 3198 let (device_id, _) = self.add_passthrough_device(device_cfg)?; 3199 if device_cfg.iommu && self.iommu_device.is_some() { 3200 iommu_attached_device_ids.push(device_id); 3201 } 3202 } 3203 } 3204 3205 // Update the list of devices 3206 self.config.lock().unwrap().devices = devices; 3207 3208 Ok(iommu_attached_device_ids) 3209 } 3210 3211 fn add_vfio_user_device( 3212 &mut self, 3213 device_cfg: &mut UserDeviceConfig, 3214 ) -> DeviceManagerResult<(PciBdf, String)> { 3215 let vfio_user_name = if let Some(id) = &device_cfg.id { 3216 id.clone() 3217 } else { 3218 let id = self.next_device_name(VFIO_USER_DEVICE_NAME_PREFIX)?; 3219 device_cfg.id = Some(id.clone()); 3220 id 3221 }; 3222 3223 let (pci_segment_id, pci_device_bdf, resources) = 3224 self.pci_resources(&vfio_user_name, device_cfg.pci_segment)?; 3225 3226 let legacy_interrupt_group = 3227 if let Some(legacy_interrupt_manager) = &self.legacy_interrupt_manager { 3228 Some( 3229 legacy_interrupt_manager 3230 .create_group(LegacyIrqGroupConfig { 3231 irq: self.pci_segments[pci_segment_id as usize].pci_irq_slots 3232 [pci_device_bdf.device() as usize] 3233 as InterruptIndex, 3234 }) 3235 .map_err(DeviceManagerError::CreateInterruptGroup)?, 3236 ) 3237 } else { 3238 None 3239 }; 3240 3241 let client = Arc::new(Mutex::new( 3242 vfio_user::Client::new(&device_cfg.socket) 3243 .map_err(DeviceManagerError::VfioUserCreateClient)?, 3244 )); 3245 3246 let memory_manager = self.memory_manager.clone(); 3247 3248 let mut vfio_user_pci_device = VfioUserPciDevice::new( 3249 vfio_user_name.clone(), 3250 &self.address_manager.vm, 3251 client.clone(), 3252 self.msi_interrupt_manager.clone(), 3253 legacy_interrupt_group, 3254 pci_device_bdf, 3255 self.restoring, 3256 Arc::new(move || memory_manager.lock().unwrap().allocate_memory_slot()), 3257 ) 3258 .map_err(DeviceManagerError::VfioUserCreate)?; 3259 3260 let memory = self.memory_manager.lock().unwrap().guest_memory(); 3261 let vfio_user_mapping = Arc::new(VfioUserDmaMapping::new(client, Arc::new(memory))); 3262 for virtio_mem_device in self.virtio_mem_devices.iter() { 3263 virtio_mem_device 3264 .lock() 3265 .unwrap() 3266 .add_dma_mapping_handler( 3267 VirtioMemMappingSource::Device(pci_device_bdf.into()), 3268 vfio_user_mapping.clone(), 3269 ) 3270 .map_err(DeviceManagerError::AddDmaMappingHandlerVirtioMem)?; 3271 } 3272 3273 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() { 3274 for region in zone.regions() { 3275 vfio_user_pci_device 3276 .dma_map(region) 3277 .map_err(DeviceManagerError::VfioUserDmaMap)?; 3278 } 3279 } 3280 3281 let vfio_user_pci_device = Arc::new(Mutex::new(vfio_user_pci_device)); 3282 3283 let new_resources = self.add_pci_device( 3284 vfio_user_pci_device.clone(), 3285 vfio_user_pci_device.clone(), 3286 pci_segment_id, 3287 pci_device_bdf, 3288 resources, 3289 )?; 3290 3291 // When restoring a VM, the restore codepath will take care of mapping 3292 // the MMIO regions based on the information from the snapshot. 3293 if !self.restoring { 3294 // Note it is required to call 'add_pci_device()' in advance to have the list of 3295 // mmio regions provisioned correctly 3296 vfio_user_pci_device 3297 .lock() 3298 .unwrap() 3299 .map_mmio_regions() 3300 .map_err(DeviceManagerError::VfioUserMapRegion)?; 3301 } 3302 3303 let mut node = device_node!(vfio_user_name, vfio_user_pci_device); 3304 3305 // Update the device tree with correct resource information. 3306 node.resources = new_resources; 3307 node.pci_bdf = Some(pci_device_bdf); 3308 node.pci_device_handle = Some(PciDeviceHandle::VfioUser(vfio_user_pci_device)); 3309 3310 self.device_tree 3311 .lock() 3312 .unwrap() 3313 .insert(vfio_user_name.clone(), node); 3314 3315 Ok((pci_device_bdf, vfio_user_name)) 3316 } 3317 3318 fn add_user_devices(&mut self) -> DeviceManagerResult<Vec<PciBdf>> { 3319 let mut user_devices = self.config.lock().unwrap().user_devices.clone(); 3320 3321 if let Some(device_list_cfg) = &mut user_devices { 3322 for device_cfg in device_list_cfg.iter_mut() { 3323 let (_device_id, _id) = self.add_vfio_user_device(device_cfg)?; 3324 } 3325 } 3326 3327 // Update the list of devices 3328 self.config.lock().unwrap().user_devices = user_devices; 3329 3330 Ok(vec![]) 3331 } 3332 3333 fn add_virtio_pci_device( 3334 &mut self, 3335 virtio_device: Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 3336 iommu_mapping: &Option<Arc<IommuMapping>>, 3337 virtio_device_id: String, 3338 pci_segment_id: u16, 3339 dma_handler: Option<Arc<dyn ExternalDmaMapping>>, 3340 ) -> DeviceManagerResult<PciBdf> { 3341 let id = format!("{}-{}", VIRTIO_PCI_DEVICE_NAME_PREFIX, virtio_device_id); 3342 3343 // Add the new virtio-pci node to the device tree. 3344 let mut node = device_node!(id); 3345 node.children = vec![virtio_device_id.clone()]; 3346 3347 let (pci_segment_id, pci_device_bdf, resources) = 3348 self.pci_resources(&id, pci_segment_id)?; 3349 3350 // Update the existing virtio node by setting the parent. 3351 if let Some(node) = self.device_tree.lock().unwrap().get_mut(&virtio_device_id) { 3352 node.parent = Some(id.clone()); 3353 } else { 3354 return Err(DeviceManagerError::MissingNode); 3355 } 3356 3357 // Allows support for one MSI-X vector per queue. It also adds 1 3358 // as we need to take into account the dedicated vector to notify 3359 // about a virtio config change. 3360 let msix_num = (virtio_device.lock().unwrap().queue_max_sizes().len() + 1) as u16; 3361 3362 // Create the AccessPlatform trait from the implementation IommuMapping. 3363 // This will provide address translation for any virtio device sitting 3364 // behind a vIOMMU. 3365 let access_platform: Option<Arc<dyn AccessPlatform>> = if let Some(mapping) = iommu_mapping 3366 { 3367 Some(Arc::new(AccessPlatformMapping::new( 3368 pci_device_bdf.into(), 3369 mapping.clone(), 3370 ))) 3371 } else { 3372 None 3373 }; 3374 3375 let memory = self.memory_manager.lock().unwrap().guest_memory(); 3376 3377 // Map DMA ranges if a DMA handler is available and if the device is 3378 // not attached to a virtual IOMMU. 3379 if let Some(dma_handler) = &dma_handler { 3380 if iommu_mapping.is_some() { 3381 if let Some(iommu) = &self.iommu_device { 3382 iommu 3383 .lock() 3384 .unwrap() 3385 .add_external_mapping(pci_device_bdf.into(), dma_handler.clone()); 3386 } else { 3387 return Err(DeviceManagerError::MissingVirtualIommu); 3388 } 3389 } else { 3390 // Let every virtio-mem device handle the DMA map/unmap through the 3391 // DMA handler provided. 3392 for virtio_mem_device in self.virtio_mem_devices.iter() { 3393 virtio_mem_device 3394 .lock() 3395 .unwrap() 3396 .add_dma_mapping_handler( 3397 VirtioMemMappingSource::Device(pci_device_bdf.into()), 3398 dma_handler.clone(), 3399 ) 3400 .map_err(DeviceManagerError::AddDmaMappingHandlerVirtioMem)?; 3401 } 3402 3403 // Do not register virtio-mem regions, as they are handled directly by 3404 // virtio-mem devices. 3405 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() { 3406 for region in zone.regions() { 3407 let gpa = region.start_addr().0; 3408 let size = region.len(); 3409 dma_handler 3410 .map(gpa, gpa, size) 3411 .map_err(DeviceManagerError::VirtioDmaMap)?; 3412 } 3413 } 3414 } 3415 } 3416 3417 let device_type = virtio_device.lock().unwrap().device_type(); 3418 let virtio_pci_device = Arc::new(Mutex::new( 3419 VirtioPciDevice::new( 3420 id.clone(), 3421 memory, 3422 virtio_device, 3423 msix_num, 3424 access_platform, 3425 &self.msi_interrupt_manager, 3426 pci_device_bdf.into(), 3427 self.activate_evt 3428 .try_clone() 3429 .map_err(DeviceManagerError::EventFd)?, 3430 // All device types *except* virtio block devices should be allocated a 64-bit bar 3431 // The block devices should be given a 32-bit BAR so that they are easily accessible 3432 // to firmware without requiring excessive identity mapping. 3433 // The exception being if not on the default PCI segment. 3434 pci_segment_id > 0 || device_type != VirtioDeviceType::Block as u32, 3435 dma_handler, 3436 self.pending_activations.clone(), 3437 ) 3438 .map_err(DeviceManagerError::VirtioDevice)?, 3439 )); 3440 3441 let new_resources = self.add_pci_device( 3442 virtio_pci_device.clone(), 3443 virtio_pci_device.clone(), 3444 pci_segment_id, 3445 pci_device_bdf, 3446 resources, 3447 )?; 3448 3449 let bar_addr = virtio_pci_device.lock().unwrap().config_bar_addr(); 3450 for (event, addr) in virtio_pci_device.lock().unwrap().ioeventfds(bar_addr) { 3451 let io_addr = IoEventAddress::Mmio(addr); 3452 self.address_manager 3453 .vm 3454 .register_ioevent(event, &io_addr, None) 3455 .map_err(|e| DeviceManagerError::RegisterIoevent(e.into()))?; 3456 } 3457 3458 // Update the device tree with correct resource information. 3459 node.resources = new_resources; 3460 node.migratable = Some(Arc::clone(&virtio_pci_device) as Arc<Mutex<dyn Migratable>>); 3461 node.pci_bdf = Some(pci_device_bdf); 3462 node.pci_device_handle = Some(PciDeviceHandle::Virtio(virtio_pci_device)); 3463 self.device_tree.lock().unwrap().insert(id, node); 3464 3465 Ok(pci_device_bdf) 3466 } 3467 3468 fn pci_resources( 3469 &self, 3470 id: &str, 3471 pci_segment_id: u16, 3472 ) -> DeviceManagerResult<(u16, PciBdf, Option<Vec<Resource>>)> { 3473 // Look for the id in the device tree. If it can be found, that means 3474 // the device is being restored, otherwise it's created from scratch. 3475 Ok( 3476 if let Some(node) = self.device_tree.lock().unwrap().get(id) { 3477 info!("Restoring virtio-pci {} resources", id); 3478 let pci_device_bdf: PciBdf = node 3479 .pci_bdf 3480 .ok_or(DeviceManagerError::MissingDeviceNodePciBdf)?; 3481 let pci_segment_id = pci_device_bdf.segment(); 3482 3483 self.pci_segments[pci_segment_id as usize] 3484 .pci_bus 3485 .lock() 3486 .unwrap() 3487 .get_device_id(pci_device_bdf.device() as usize) 3488 .map_err(DeviceManagerError::GetPciDeviceId)?; 3489 3490 (pci_segment_id, pci_device_bdf, Some(node.resources.clone())) 3491 } else { 3492 let pci_device_bdf = 3493 self.pci_segments[pci_segment_id as usize].next_device_bdf()?; 3494 3495 (pci_segment_id, pci_device_bdf, None) 3496 }, 3497 ) 3498 } 3499 3500 #[cfg(target_arch = "x86_64")] 3501 pub fn io_bus(&self) -> &Arc<Bus> { 3502 &self.address_manager.io_bus 3503 } 3504 3505 pub fn mmio_bus(&self) -> &Arc<Bus> { 3506 &self.address_manager.mmio_bus 3507 } 3508 3509 pub fn allocator(&self) -> &Arc<Mutex<SystemAllocator>> { 3510 &self.address_manager.allocator 3511 } 3512 3513 pub fn interrupt_controller(&self) -> Option<Arc<Mutex<dyn InterruptController>>> { 3514 self.interrupt_controller 3515 .as_ref() 3516 .map(|ic| ic.clone() as Arc<Mutex<dyn InterruptController>>) 3517 } 3518 3519 #[cfg(target_arch = "x86_64")] 3520 // Used to provide a fast path for handling PIO exits 3521 pub fn pci_config_io(&self) -> Arc<Mutex<PciConfigIo>> { 3522 Arc::clone(self.pci_segments[0].pci_config_io.as_ref().unwrap()) 3523 } 3524 3525 pub(crate) fn pci_segments(&self) -> &Vec<PciSegment> { 3526 &self.pci_segments 3527 } 3528 3529 pub fn console(&self) -> &Arc<Console> { 3530 &self.console 3531 } 3532 3533 #[cfg(target_arch = "aarch64")] 3534 pub fn cmdline_additions(&self) -> &[String] { 3535 self.cmdline_additions.as_slice() 3536 } 3537 3538 pub fn update_memory(&self, new_region: &Arc<GuestRegionMmap>) -> DeviceManagerResult<()> { 3539 for handle in self.virtio_devices.iter() { 3540 handle 3541 .virtio_device 3542 .lock() 3543 .unwrap() 3544 .add_memory_region(new_region) 3545 .map_err(DeviceManagerError::UpdateMemoryForVirtioDevice)?; 3546 3547 if let Some(dma_handler) = &handle.dma_handler { 3548 if !handle.iommu { 3549 let gpa = new_region.start_addr().0; 3550 let size = new_region.len(); 3551 dma_handler 3552 .map(gpa, gpa, size) 3553 .map_err(DeviceManagerError::VirtioDmaMap)?; 3554 } 3555 } 3556 } 3557 3558 // Take care of updating the memory for VFIO PCI devices. 3559 if let Some(vfio_container) = &self.vfio_container { 3560 vfio_container 3561 .vfio_dma_map( 3562 new_region.start_addr().raw_value(), 3563 new_region.len() as u64, 3564 new_region.as_ptr() as u64, 3565 ) 3566 .map_err(DeviceManagerError::UpdateMemoryForVfioPciDevice)?; 3567 } 3568 3569 // Take care of updating the memory for vfio-user devices. 3570 { 3571 let device_tree = self.device_tree.lock().unwrap(); 3572 for pci_device_node in device_tree.pci_devices() { 3573 if let PciDeviceHandle::VfioUser(vfio_user_pci_device) = pci_device_node 3574 .pci_device_handle 3575 .as_ref() 3576 .ok_or(DeviceManagerError::MissingPciDevice)? 3577 { 3578 vfio_user_pci_device 3579 .lock() 3580 .unwrap() 3581 .dma_map(new_region) 3582 .map_err(DeviceManagerError::UpdateMemoryForVfioUserPciDevice)?; 3583 } 3584 } 3585 } 3586 3587 Ok(()) 3588 } 3589 3590 pub fn activate_virtio_devices(&self) -> DeviceManagerResult<()> { 3591 for mut activator in self.pending_activations.lock().unwrap().drain(..) { 3592 activator 3593 .activate() 3594 .map_err(DeviceManagerError::VirtioActivate)?; 3595 } 3596 Ok(()) 3597 } 3598 3599 pub fn notify_hotplug( 3600 &self, 3601 _notification_type: AcpiNotificationFlags, 3602 ) -> DeviceManagerResult<()> { 3603 return self 3604 .ged_notification_device 3605 .as_ref() 3606 .unwrap() 3607 .lock() 3608 .unwrap() 3609 .notify(_notification_type) 3610 .map_err(DeviceManagerError::HotPlugNotification); 3611 } 3612 3613 pub fn add_device( 3614 &mut self, 3615 device_cfg: &mut DeviceConfig, 3616 ) -> DeviceManagerResult<PciDeviceInfo> { 3617 self.validate_identifier(&device_cfg.id)?; 3618 3619 if device_cfg.iommu && !self.is_iommu_segment(device_cfg.pci_segment) { 3620 return Err(DeviceManagerError::InvalidIommuHotplug); 3621 } 3622 3623 let (bdf, device_name) = self.add_passthrough_device(device_cfg)?; 3624 3625 // Update the PCIU bitmap 3626 self.pci_segments[device_cfg.pci_segment as usize].pci_devices_up |= 1 << bdf.device(); 3627 3628 Ok(PciDeviceInfo { 3629 id: device_name, 3630 bdf, 3631 }) 3632 } 3633 3634 pub fn add_user_device( 3635 &mut self, 3636 device_cfg: &mut UserDeviceConfig, 3637 ) -> DeviceManagerResult<PciDeviceInfo> { 3638 self.validate_identifier(&device_cfg.id)?; 3639 3640 let (bdf, device_name) = self.add_vfio_user_device(device_cfg)?; 3641 3642 // Update the PCIU bitmap 3643 self.pci_segments[device_cfg.pci_segment as usize].pci_devices_up |= 1 << bdf.device(); 3644 3645 Ok(PciDeviceInfo { 3646 id: device_name, 3647 bdf, 3648 }) 3649 } 3650 3651 pub fn remove_device(&mut self, id: String) -> DeviceManagerResult<()> { 3652 // The node can be directly a PCI node in case the 'id' refers to a 3653 // VFIO device or a virtio-pci one. 3654 // In case the 'id' refers to a virtio device, we must find the PCI 3655 // node by looking at the parent. 3656 let device_tree = self.device_tree.lock().unwrap(); 3657 let node = device_tree 3658 .get(&id) 3659 .ok_or(DeviceManagerError::UnknownDeviceId(id))?; 3660 3661 let pci_device_node = if node.pci_bdf.is_some() && node.pci_device_handle.is_some() { 3662 node 3663 } else { 3664 let parent = node 3665 .parent 3666 .as_ref() 3667 .ok_or(DeviceManagerError::MissingNode)?; 3668 device_tree 3669 .get(parent) 3670 .ok_or(DeviceManagerError::MissingNode)? 3671 }; 3672 3673 let pci_device_bdf: PciBdf = pci_device_node 3674 .pci_bdf 3675 .ok_or(DeviceManagerError::MissingDeviceNodePciBdf)?; 3676 let pci_segment_id = pci_device_bdf.segment(); 3677 3678 let pci_device_handle = pci_device_node 3679 .pci_device_handle 3680 .as_ref() 3681 .ok_or(DeviceManagerError::MissingPciDevice)?; 3682 #[allow(irrefutable_let_patterns)] 3683 if let PciDeviceHandle::Virtio(virtio_pci_device) = pci_device_handle { 3684 let device_type = VirtioDeviceType::from( 3685 virtio_pci_device 3686 .lock() 3687 .unwrap() 3688 .virtio_device() 3689 .lock() 3690 .unwrap() 3691 .device_type(), 3692 ); 3693 match device_type { 3694 VirtioDeviceType::Net 3695 | VirtioDeviceType::Block 3696 | VirtioDeviceType::Pmem 3697 | VirtioDeviceType::Fs 3698 | VirtioDeviceType::Vsock => {} 3699 _ => return Err(DeviceManagerError::RemovalNotAllowed(device_type)), 3700 } 3701 } 3702 3703 // Update the PCID bitmap 3704 self.pci_segments[pci_segment_id as usize].pci_devices_down |= 1 << pci_device_bdf.device(); 3705 3706 Ok(()) 3707 } 3708 3709 pub fn eject_device(&mut self, pci_segment_id: u16, device_id: u8) -> DeviceManagerResult<()> { 3710 info!( 3711 "Ejecting device_id = {} on segment_id={}", 3712 device_id, pci_segment_id 3713 ); 3714 3715 // Convert the device ID into the corresponding b/d/f. 3716 let pci_device_bdf = PciBdf::new(pci_segment_id, 0, device_id, 0); 3717 3718 // Give the PCI device ID back to the PCI bus. 3719 self.pci_segments[pci_segment_id as usize] 3720 .pci_bus 3721 .lock() 3722 .unwrap() 3723 .put_device_id(device_id as usize) 3724 .map_err(DeviceManagerError::PutPciDeviceId)?; 3725 3726 // Remove the device from the device tree along with its children. 3727 let mut device_tree = self.device_tree.lock().unwrap(); 3728 let pci_device_node = device_tree 3729 .remove_node_by_pci_bdf(pci_device_bdf) 3730 .ok_or(DeviceManagerError::MissingPciDevice)?; 3731 3732 // For VFIO and vfio-user the PCI device id is the id. 3733 // For virtio we overwrite it later as we want the id of the 3734 // underlying device. 3735 let mut id = pci_device_node.id; 3736 let pci_device_handle = pci_device_node 3737 .pci_device_handle 3738 .ok_or(DeviceManagerError::MissingPciDevice)?; 3739 if matches!(pci_device_handle, PciDeviceHandle::Virtio(_)) { 3740 // The virtio-pci device has a single child 3741 if !pci_device_node.children.is_empty() { 3742 assert_eq!(pci_device_node.children.len(), 1); 3743 let child_id = &pci_device_node.children[0]; 3744 id = child_id.clone(); 3745 } 3746 } 3747 for child in pci_device_node.children.iter() { 3748 device_tree.remove(child); 3749 } 3750 3751 let mut iommu_attached = false; 3752 if let Some((_, iommu_attached_devices)) = &self.iommu_attached_devices { 3753 if iommu_attached_devices.contains(&pci_device_bdf) { 3754 iommu_attached = true; 3755 } 3756 } 3757 3758 let (pci_device, bus_device, virtio_device, remove_dma_handler) = match pci_device_handle { 3759 // No need to remove any virtio-mem mapping here as the container outlives all devices 3760 PciDeviceHandle::Vfio(vfio_pci_device) => ( 3761 Arc::clone(&vfio_pci_device) as Arc<Mutex<dyn PciDevice>>, 3762 Arc::clone(&vfio_pci_device) as Arc<Mutex<dyn BusDevice>>, 3763 None as Option<Arc<Mutex<dyn virtio_devices::VirtioDevice>>>, 3764 false, 3765 ), 3766 PciDeviceHandle::Virtio(virtio_pci_device) => { 3767 let dev = virtio_pci_device.lock().unwrap(); 3768 let bar_addr = dev.config_bar_addr(); 3769 for (event, addr) in dev.ioeventfds(bar_addr) { 3770 let io_addr = IoEventAddress::Mmio(addr); 3771 self.address_manager 3772 .vm 3773 .unregister_ioevent(event, &io_addr) 3774 .map_err(|e| DeviceManagerError::UnRegisterIoevent(e.into()))?; 3775 } 3776 3777 if let Some(dma_handler) = dev.dma_handler() { 3778 if !iommu_attached { 3779 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() { 3780 for region in zone.regions() { 3781 let iova = region.start_addr().0; 3782 let size = region.len(); 3783 dma_handler 3784 .unmap(iova, size) 3785 .map_err(DeviceManagerError::VirtioDmaUnmap)?; 3786 } 3787 } 3788 } 3789 } 3790 3791 ( 3792 Arc::clone(&virtio_pci_device) as Arc<Mutex<dyn PciDevice>>, 3793 Arc::clone(&virtio_pci_device) as Arc<Mutex<dyn BusDevice>>, 3794 Some(dev.virtio_device()), 3795 dev.dma_handler().is_some() && !iommu_attached, 3796 ) 3797 } 3798 PciDeviceHandle::VfioUser(vfio_user_pci_device) => { 3799 let mut dev = vfio_user_pci_device.lock().unwrap(); 3800 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() { 3801 for region in zone.regions() { 3802 dev.dma_unmap(region) 3803 .map_err(DeviceManagerError::VfioUserDmaUnmap)?; 3804 } 3805 } 3806 3807 ( 3808 Arc::clone(&vfio_user_pci_device) as Arc<Mutex<dyn PciDevice>>, 3809 Arc::clone(&vfio_user_pci_device) as Arc<Mutex<dyn BusDevice>>, 3810 None as Option<Arc<Mutex<dyn virtio_devices::VirtioDevice>>>, 3811 true, 3812 ) 3813 } 3814 }; 3815 3816 if remove_dma_handler { 3817 for virtio_mem_device in self.virtio_mem_devices.iter() { 3818 virtio_mem_device 3819 .lock() 3820 .unwrap() 3821 .remove_dma_mapping_handler(VirtioMemMappingSource::Device( 3822 pci_device_bdf.into(), 3823 )) 3824 .map_err(DeviceManagerError::RemoveDmaMappingHandlerVirtioMem)?; 3825 } 3826 } 3827 3828 // Free the allocated BARs 3829 pci_device 3830 .lock() 3831 .unwrap() 3832 .free_bars( 3833 &mut self.address_manager.allocator.lock().unwrap(), 3834 &mut self.pci_segments[pci_segment_id as usize] 3835 .allocator 3836 .lock() 3837 .unwrap(), 3838 ) 3839 .map_err(DeviceManagerError::FreePciBars)?; 3840 3841 // Remove the device from the PCI bus 3842 self.pci_segments[pci_segment_id as usize] 3843 .pci_bus 3844 .lock() 3845 .unwrap() 3846 .remove_by_device(&pci_device) 3847 .map_err(DeviceManagerError::RemoveDeviceFromPciBus)?; 3848 3849 #[cfg(target_arch = "x86_64")] 3850 // Remove the device from the IO bus 3851 self.io_bus() 3852 .remove_by_device(&bus_device) 3853 .map_err(DeviceManagerError::RemoveDeviceFromIoBus)?; 3854 3855 // Remove the device from the MMIO bus 3856 self.mmio_bus() 3857 .remove_by_device(&bus_device) 3858 .map_err(DeviceManagerError::RemoveDeviceFromMmioBus)?; 3859 3860 // Remove the device from the list of BusDevice held by the 3861 // DeviceManager. 3862 self.bus_devices 3863 .retain(|dev| !Arc::ptr_eq(dev, &bus_device)); 3864 3865 // Shutdown and remove the underlying virtio-device if present 3866 if let Some(virtio_device) = virtio_device { 3867 for mapping in virtio_device.lock().unwrap().userspace_mappings() { 3868 self.memory_manager 3869 .lock() 3870 .unwrap() 3871 .remove_userspace_mapping( 3872 mapping.addr.raw_value(), 3873 mapping.len, 3874 mapping.host_addr, 3875 mapping.mergeable, 3876 mapping.mem_slot, 3877 ) 3878 .map_err(DeviceManagerError::MemoryManager)?; 3879 } 3880 3881 virtio_device.lock().unwrap().shutdown(); 3882 3883 self.virtio_devices 3884 .retain(|handler| !Arc::ptr_eq(&handler.virtio_device, &virtio_device)); 3885 } 3886 3887 event!( 3888 "vm", 3889 "device-removed", 3890 "id", 3891 &id, 3892 "bdf", 3893 pci_device_bdf.to_string() 3894 ); 3895 3896 // At this point, the device has been removed from all the list and 3897 // buses where it was stored. At the end of this function, after 3898 // any_device, bus_device and pci_device are released, the actual 3899 // device will be dropped. 3900 Ok(()) 3901 } 3902 3903 fn hotplug_virtio_pci_device( 3904 &mut self, 3905 handle: MetaVirtioDevice, 3906 ) -> DeviceManagerResult<PciDeviceInfo> { 3907 // Add the virtio device to the device manager list. This is important 3908 // as the list is used to notify virtio devices about memory updates 3909 // for instance. 3910 self.virtio_devices.push(handle.clone()); 3911 3912 let mapping: Option<Arc<IommuMapping>> = if handle.iommu { 3913 self.iommu_mapping.clone() 3914 } else { 3915 None 3916 }; 3917 3918 let bdf = self.add_virtio_pci_device( 3919 handle.virtio_device, 3920 &mapping, 3921 handle.id.clone(), 3922 handle.pci_segment, 3923 handle.dma_handler, 3924 )?; 3925 3926 // Update the PCIU bitmap 3927 self.pci_segments[handle.pci_segment as usize].pci_devices_up |= 1 << bdf.device(); 3928 3929 Ok(PciDeviceInfo { id: handle.id, bdf }) 3930 } 3931 3932 fn is_iommu_segment(&self, pci_segment_id: u16) -> bool { 3933 self.config 3934 .lock() 3935 .as_ref() 3936 .unwrap() 3937 .platform 3938 .as_ref() 3939 .map(|pc| { 3940 pc.iommu_segments 3941 .as_ref() 3942 .map(|v| v.contains(&pci_segment_id)) 3943 .unwrap_or_default() 3944 }) 3945 .unwrap_or_default() 3946 } 3947 3948 pub fn add_disk(&mut self, disk_cfg: &mut DiskConfig) -> DeviceManagerResult<PciDeviceInfo> { 3949 self.validate_identifier(&disk_cfg.id)?; 3950 3951 if disk_cfg.iommu && !self.is_iommu_segment(disk_cfg.pci_segment) { 3952 return Err(DeviceManagerError::InvalidIommuHotplug); 3953 } 3954 3955 let device = self.make_virtio_block_device(disk_cfg)?; 3956 self.hotplug_virtio_pci_device(device) 3957 } 3958 3959 pub fn add_fs(&mut self, fs_cfg: &mut FsConfig) -> DeviceManagerResult<PciDeviceInfo> { 3960 self.validate_identifier(&fs_cfg.id)?; 3961 3962 let device = self.make_virtio_fs_device(fs_cfg)?; 3963 self.hotplug_virtio_pci_device(device) 3964 } 3965 3966 pub fn add_pmem(&mut self, pmem_cfg: &mut PmemConfig) -> DeviceManagerResult<PciDeviceInfo> { 3967 self.validate_identifier(&pmem_cfg.id)?; 3968 3969 if pmem_cfg.iommu && !self.is_iommu_segment(pmem_cfg.pci_segment) { 3970 return Err(DeviceManagerError::InvalidIommuHotplug); 3971 } 3972 3973 let device = self.make_virtio_pmem_device(pmem_cfg)?; 3974 self.hotplug_virtio_pci_device(device) 3975 } 3976 3977 pub fn add_net(&mut self, net_cfg: &mut NetConfig) -> DeviceManagerResult<PciDeviceInfo> { 3978 self.validate_identifier(&net_cfg.id)?; 3979 3980 if net_cfg.iommu && !self.is_iommu_segment(net_cfg.pci_segment) { 3981 return Err(DeviceManagerError::InvalidIommuHotplug); 3982 } 3983 3984 let device = self.make_virtio_net_device(net_cfg)?; 3985 self.hotplug_virtio_pci_device(device) 3986 } 3987 3988 pub fn add_vdpa(&mut self, vdpa_cfg: &mut VdpaConfig) -> DeviceManagerResult<PciDeviceInfo> { 3989 self.validate_identifier(&vdpa_cfg.id)?; 3990 3991 if vdpa_cfg.iommu && !self.is_iommu_segment(vdpa_cfg.pci_segment) { 3992 return Err(DeviceManagerError::InvalidIommuHotplug); 3993 } 3994 3995 let device = self.make_vdpa_device(vdpa_cfg)?; 3996 self.hotplug_virtio_pci_device(device) 3997 } 3998 3999 pub fn add_vsock(&mut self, vsock_cfg: &mut VsockConfig) -> DeviceManagerResult<PciDeviceInfo> { 4000 self.validate_identifier(&vsock_cfg.id)?; 4001 4002 if vsock_cfg.iommu && !self.is_iommu_segment(vsock_cfg.pci_segment) { 4003 return Err(DeviceManagerError::InvalidIommuHotplug); 4004 } 4005 4006 let device = self.make_virtio_vsock_device(vsock_cfg)?; 4007 self.hotplug_virtio_pci_device(device) 4008 } 4009 4010 pub fn counters(&self) -> HashMap<String, HashMap<&'static str, Wrapping<u64>>> { 4011 let mut counters = HashMap::new(); 4012 4013 for handle in &self.virtio_devices { 4014 let virtio_device = handle.virtio_device.lock().unwrap(); 4015 if let Some(device_counters) = virtio_device.counters() { 4016 counters.insert(handle.id.clone(), device_counters.clone()); 4017 } 4018 } 4019 4020 counters 4021 } 4022 4023 pub fn resize_balloon(&mut self, size: u64) -> DeviceManagerResult<()> { 4024 if let Some(balloon) = &self.balloon { 4025 return balloon 4026 .lock() 4027 .unwrap() 4028 .resize(size) 4029 .map_err(DeviceManagerError::VirtioBalloonResize); 4030 } 4031 4032 warn!("No balloon setup: Can't resize the balloon"); 4033 Err(DeviceManagerError::MissingVirtioBalloon) 4034 } 4035 4036 pub fn balloon_size(&self) -> u64 { 4037 if let Some(balloon) = &self.balloon { 4038 return balloon.lock().unwrap().get_actual(); 4039 } 4040 4041 0 4042 } 4043 4044 pub fn device_tree(&self) -> Arc<Mutex<DeviceTree>> { 4045 self.device_tree.clone() 4046 } 4047 4048 pub fn restore_devices( 4049 &mut self, 4050 snapshot: Snapshot, 4051 ) -> std::result::Result<(), MigratableError> { 4052 // Finally, restore all devices associated with the DeviceManager. 4053 // It's important to restore devices in the right order, that's why 4054 // the device tree is the right way to ensure we restore a child before 4055 // its parent node. 4056 for node in self 4057 .device_tree 4058 .lock() 4059 .unwrap() 4060 .breadth_first_traversal() 4061 .rev() 4062 { 4063 // Restore the node 4064 if let Some(migratable) = &node.migratable { 4065 info!("Restoring {} from DeviceManager", node.id); 4066 if let Some(snapshot) = snapshot.snapshots.get(&node.id) { 4067 migratable.lock().unwrap().pause()?; 4068 migratable.lock().unwrap().restore(*snapshot.clone())?; 4069 } else { 4070 return Err(MigratableError::Restore(anyhow!( 4071 "Missing device {}", 4072 node.id 4073 ))); 4074 } 4075 } 4076 } 4077 4078 // The devices have been fully restored, we can now update the 4079 // restoring state of the DeviceManager. 4080 self.restoring = false; 4081 4082 Ok(()) 4083 } 4084 4085 #[cfg(target_arch = "x86_64")] 4086 pub fn notify_power_button(&self) -> DeviceManagerResult<()> { 4087 self.ged_notification_device 4088 .as_ref() 4089 .unwrap() 4090 .lock() 4091 .unwrap() 4092 .notify(AcpiNotificationFlags::POWER_BUTTON_CHANGED) 4093 .map_err(DeviceManagerError::PowerButtonNotification) 4094 } 4095 4096 #[cfg(target_arch = "aarch64")] 4097 pub fn notify_power_button(&self) -> DeviceManagerResult<()> { 4098 // There are two use cases: 4099 // 1. Users will use direct kernel boot with device tree. 4100 // 2. Users will use ACPI+UEFI boot. 4101 4102 // Trigger a GPIO pin 3 event to satisify use case 1. 4103 self.gpio_device 4104 .as_ref() 4105 .unwrap() 4106 .lock() 4107 .unwrap() 4108 .trigger_key(3) 4109 .map_err(DeviceManagerError::AArch64PowerButtonNotification)?; 4110 // Trigger a GED power button event to satisify use case 2. 4111 return self 4112 .ged_notification_device 4113 .as_ref() 4114 .unwrap() 4115 .lock() 4116 .unwrap() 4117 .notify(AcpiNotificationFlags::POWER_BUTTON_CHANGED) 4118 .map_err(DeviceManagerError::PowerButtonNotification); 4119 } 4120 4121 pub fn iommu_attached_devices(&self) -> &Option<(PciBdf, Vec<PciBdf>)> { 4122 &self.iommu_attached_devices 4123 } 4124 4125 fn validate_identifier(&self, id: &Option<String>) -> DeviceManagerResult<()> { 4126 if let Some(id) = id { 4127 if id.starts_with("__") { 4128 return Err(DeviceManagerError::InvalidIdentifier(id.clone())); 4129 } 4130 4131 if self.device_tree.lock().unwrap().contains_key(id) { 4132 return Err(DeviceManagerError::IdentifierNotUnique(id.clone())); 4133 } 4134 } 4135 4136 Ok(()) 4137 } 4138 4139 pub(crate) fn acpi_platform_addresses(&self) -> &AcpiPlatformAddresses { 4140 &self.acpi_platform_addresses 4141 } 4142 } 4143 4144 fn numa_node_id_from_memory_zone_id(numa_nodes: &NumaNodes, memory_zone_id: &str) -> Option<u32> { 4145 for (numa_node_id, numa_node) in numa_nodes.iter() { 4146 if numa_node.memory_zones.contains(&memory_zone_id.to_owned()) { 4147 return Some(*numa_node_id); 4148 } 4149 } 4150 4151 None 4152 } 4153 4154 impl Aml for DeviceManager { 4155 fn append_aml_bytes(&self, bytes: &mut Vec<u8>) { 4156 #[cfg(target_arch = "aarch64")] 4157 use arch::aarch64::DeviceInfoForFdt; 4158 4159 let mut pci_scan_methods = Vec::new(); 4160 for i in 0..self.pci_segments.len() { 4161 pci_scan_methods.push(aml::MethodCall::new( 4162 format!("\\_SB_.PCI{:X}.PCNT", i).as_str().into(), 4163 vec![], 4164 )); 4165 } 4166 let mut pci_scan_inner: Vec<&dyn Aml> = Vec::new(); 4167 for method in &pci_scan_methods { 4168 pci_scan_inner.push(method) 4169 } 4170 4171 // PCI hotplug controller 4172 aml::Device::new( 4173 "_SB_.PHPR".into(), 4174 vec![ 4175 &aml::Name::new("_HID".into(), &aml::EisaName::new("PNP0A06")), 4176 &aml::Name::new("_STA".into(), &0x0bu8), 4177 &aml::Name::new("_UID".into(), &"PCI Hotplug Controller"), 4178 &aml::Mutex::new("BLCK".into(), 0), 4179 &aml::Name::new( 4180 "_CRS".into(), 4181 &aml::ResourceTemplate::new(vec![&aml::AddressSpace::new_memory( 4182 aml::AddressSpaceCachable::NotCacheable, 4183 true, 4184 self.acpi_address.0 as u64, 4185 self.acpi_address.0 + DEVICE_MANAGER_ACPI_SIZE as u64 - 1, 4186 )]), 4187 ), 4188 // OpRegion and Fields map MMIO range into individual field values 4189 &aml::OpRegion::new( 4190 "PCST".into(), 4191 aml::OpRegionSpace::SystemMemory, 4192 self.acpi_address.0 as usize, 4193 DEVICE_MANAGER_ACPI_SIZE, 4194 ), 4195 &aml::Field::new( 4196 "PCST".into(), 4197 aml::FieldAccessType::DWord, 4198 aml::FieldUpdateRule::WriteAsZeroes, 4199 vec![ 4200 aml::FieldEntry::Named(*b"PCIU", 32), 4201 aml::FieldEntry::Named(*b"PCID", 32), 4202 aml::FieldEntry::Named(*b"B0EJ", 32), 4203 aml::FieldEntry::Named(*b"PSEG", 32), 4204 ], 4205 ), 4206 &aml::Method::new( 4207 "PCEJ".into(), 4208 2, 4209 true, 4210 vec![ 4211 // Take lock defined above 4212 &aml::Acquire::new("BLCK".into(), 0xffff), 4213 // Choose the current segment 4214 &aml::Store::new(&aml::Path::new("PSEG"), &aml::Arg(1)), 4215 // Write PCI bus number (in first argument) to I/O port via field 4216 &aml::ShiftLeft::new(&aml::Path::new("B0EJ"), &aml::ONE, &aml::Arg(0)), 4217 // Release lock 4218 &aml::Release::new("BLCK".into()), 4219 // Return 0 4220 &aml::Return::new(&aml::ZERO), 4221 ], 4222 ), 4223 &aml::Method::new("PSCN".into(), 0, true, pci_scan_inner), 4224 ], 4225 ) 4226 .append_aml_bytes(bytes); 4227 4228 for segment in &self.pci_segments { 4229 segment.append_aml_bytes(bytes); 4230 } 4231 4232 let mut mbrd_memory = Vec::new(); 4233 4234 for segment in &self.pci_segments { 4235 mbrd_memory.push(aml::Memory32Fixed::new( 4236 true, 4237 segment.mmio_config_address as u32, 4238 layout::PCI_MMIO_CONFIG_SIZE_PER_SEGMENT as u32, 4239 )) 4240 } 4241 4242 let mut mbrd_memory_refs = Vec::new(); 4243 for mbrd_memory_ref in &mbrd_memory { 4244 mbrd_memory_refs.push(mbrd_memory_ref as &dyn Aml); 4245 } 4246 4247 aml::Device::new( 4248 "_SB_.MBRD".into(), 4249 vec![ 4250 &aml::Name::new("_HID".into(), &aml::EisaName::new("PNP0C02")), 4251 &aml::Name::new("_UID".into(), &aml::ZERO), 4252 &aml::Name::new("_CRS".into(), &aml::ResourceTemplate::new(mbrd_memory_refs)), 4253 ], 4254 ) 4255 .append_aml_bytes(bytes); 4256 4257 // Serial device 4258 #[cfg(target_arch = "x86_64")] 4259 let serial_irq = 4; 4260 #[cfg(target_arch = "aarch64")] 4261 let serial_irq = 4262 if self.config.lock().unwrap().serial.clone().mode != ConsoleOutputMode::Off { 4263 self.get_device_info() 4264 .clone() 4265 .get(&(DeviceType::Serial, DeviceType::Serial.to_string())) 4266 .unwrap() 4267 .irq() 4268 } else { 4269 // If serial is turned off, add a fake device with invalid irq. 4270 31 4271 }; 4272 if self.config.lock().unwrap().serial.mode != ConsoleOutputMode::Off { 4273 aml::Device::new( 4274 "_SB_.COM1".into(), 4275 vec![ 4276 &aml::Name::new( 4277 "_HID".into(), 4278 #[cfg(target_arch = "x86_64")] 4279 &aml::EisaName::new("PNP0501"), 4280 #[cfg(target_arch = "aarch64")] 4281 &"ARMH0011", 4282 ), 4283 &aml::Name::new("_UID".into(), &aml::ZERO), 4284 &aml::Name::new("_DDN".into(), &"COM1"), 4285 &aml::Name::new( 4286 "_CRS".into(), 4287 &aml::ResourceTemplate::new(vec![ 4288 &aml::Interrupt::new(true, true, false, false, serial_irq), 4289 #[cfg(target_arch = "x86_64")] 4290 &aml::Io::new(0x3f8, 0x3f8, 0, 0x8), 4291 #[cfg(target_arch = "aarch64")] 4292 &aml::Memory32Fixed::new( 4293 true, 4294 arch::layout::LEGACY_SERIAL_MAPPED_IO_START.raw_value() as u32, 4295 MMIO_LEN as u32, 4296 ), 4297 ]), 4298 ), 4299 ], 4300 ) 4301 .append_aml_bytes(bytes); 4302 } 4303 4304 aml::Name::new("_S5_".into(), &aml::Package::new(vec![&5u8])).append_aml_bytes(bytes); 4305 4306 aml::Device::new( 4307 "_SB_.PWRB".into(), 4308 vec![ 4309 &aml::Name::new("_HID".into(), &aml::EisaName::new("PNP0C0C")), 4310 &aml::Name::new("_UID".into(), &aml::ZERO), 4311 ], 4312 ) 4313 .append_aml_bytes(bytes); 4314 4315 self.ged_notification_device 4316 .as_ref() 4317 .unwrap() 4318 .lock() 4319 .unwrap() 4320 .append_aml_bytes(bytes); 4321 } 4322 } 4323 4324 impl Pausable for DeviceManager { 4325 fn pause(&mut self) -> result::Result<(), MigratableError> { 4326 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4327 if let Some(migratable) = &device_node.migratable { 4328 migratable.lock().unwrap().pause()?; 4329 } 4330 } 4331 // On AArch64, the pause of device manager needs to trigger 4332 // a "pause" of GIC, which will flush the GIC pending tables 4333 // and ITS tables to guest RAM. 4334 #[cfg(target_arch = "aarch64")] 4335 { 4336 self.get_interrupt_controller() 4337 .unwrap() 4338 .lock() 4339 .unwrap() 4340 .pause()?; 4341 }; 4342 4343 Ok(()) 4344 } 4345 4346 fn resume(&mut self) -> result::Result<(), MigratableError> { 4347 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4348 if let Some(migratable) = &device_node.migratable { 4349 migratable.lock().unwrap().resume()?; 4350 } 4351 } 4352 4353 Ok(()) 4354 } 4355 } 4356 4357 impl Snapshottable for DeviceManager { 4358 fn id(&self) -> String { 4359 DEVICE_MANAGER_SNAPSHOT_ID.to_string() 4360 } 4361 4362 fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> { 4363 let mut snapshot = Snapshot::new(DEVICE_MANAGER_SNAPSHOT_ID); 4364 4365 // We aggregate all devices snapshots. 4366 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4367 if let Some(migratable) = &device_node.migratable { 4368 let device_snapshot = migratable.lock().unwrap().snapshot()?; 4369 snapshot.add_snapshot(device_snapshot); 4370 } 4371 } 4372 4373 // Then we store the DeviceManager state. 4374 snapshot.add_data_section(SnapshotDataSection::new_from_state( 4375 DEVICE_MANAGER_SNAPSHOT_ID, 4376 &self.state(), 4377 )?); 4378 4379 Ok(snapshot) 4380 } 4381 4382 fn restore(&mut self, snapshot: Snapshot) -> std::result::Result<(), MigratableError> { 4383 // Let's first restore the DeviceManager. 4384 4385 self.set_state(&snapshot.to_state(DEVICE_MANAGER_SNAPSHOT_ID)?); 4386 4387 // Now that DeviceManager is updated with the right states, it's time 4388 // to create the devices based on the configuration. 4389 self.create_devices(None, None, None) 4390 .map_err(|e| MigratableError::Restore(anyhow!("Could not create devices {:?}", e)))?; 4391 4392 Ok(()) 4393 } 4394 } 4395 4396 impl Transportable for DeviceManager {} 4397 4398 impl Migratable for DeviceManager { 4399 fn start_dirty_log(&mut self) -> std::result::Result<(), MigratableError> { 4400 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4401 if let Some(migratable) = &device_node.migratable { 4402 migratable.lock().unwrap().start_dirty_log()?; 4403 } 4404 } 4405 Ok(()) 4406 } 4407 4408 fn stop_dirty_log(&mut self) -> std::result::Result<(), MigratableError> { 4409 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4410 if let Some(migratable) = &device_node.migratable { 4411 migratable.lock().unwrap().stop_dirty_log()?; 4412 } 4413 } 4414 Ok(()) 4415 } 4416 4417 fn dirty_log(&mut self) -> std::result::Result<MemoryRangeTable, MigratableError> { 4418 let mut tables = Vec::new(); 4419 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4420 if let Some(migratable) = &device_node.migratable { 4421 tables.push(migratable.lock().unwrap().dirty_log()?); 4422 } 4423 } 4424 Ok(MemoryRangeTable::new_from_tables(tables)) 4425 } 4426 4427 fn start_migration(&mut self) -> std::result::Result<(), MigratableError> { 4428 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4429 if let Some(migratable) = &device_node.migratable { 4430 migratable.lock().unwrap().start_migration()?; 4431 } 4432 } 4433 Ok(()) 4434 } 4435 4436 fn complete_migration(&mut self) -> std::result::Result<(), MigratableError> { 4437 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4438 if let Some(migratable) = &device_node.migratable { 4439 migratable.lock().unwrap().complete_migration()?; 4440 } 4441 } 4442 Ok(()) 4443 } 4444 } 4445 4446 const PCIU_FIELD_OFFSET: u64 = 0; 4447 const PCID_FIELD_OFFSET: u64 = 4; 4448 const B0EJ_FIELD_OFFSET: u64 = 8; 4449 const PSEG_FIELD_OFFSET: u64 = 12; 4450 const PCIU_FIELD_SIZE: usize = 4; 4451 const PCID_FIELD_SIZE: usize = 4; 4452 const B0EJ_FIELD_SIZE: usize = 4; 4453 const PSEG_FIELD_SIZE: usize = 4; 4454 4455 impl BusDevice for DeviceManager { 4456 fn read(&mut self, base: u64, offset: u64, data: &mut [u8]) { 4457 match offset { 4458 PCIU_FIELD_OFFSET => { 4459 assert!(data.len() == PCIU_FIELD_SIZE); 4460 data.copy_from_slice( 4461 &self.pci_segments[self.selected_segment] 4462 .pci_devices_up 4463 .to_le_bytes(), 4464 ); 4465 // Clear the PCIU bitmap 4466 self.pci_segments[self.selected_segment].pci_devices_up = 0; 4467 } 4468 PCID_FIELD_OFFSET => { 4469 assert!(data.len() == PCID_FIELD_SIZE); 4470 data.copy_from_slice( 4471 &self.pci_segments[self.selected_segment] 4472 .pci_devices_down 4473 .to_le_bytes(), 4474 ); 4475 // Clear the PCID bitmap 4476 self.pci_segments[self.selected_segment].pci_devices_down = 0; 4477 } 4478 B0EJ_FIELD_OFFSET => { 4479 assert!(data.len() == B0EJ_FIELD_SIZE); 4480 // Always return an empty bitmap since the eject is always 4481 // taken care of right away during a write access. 4482 data.fill(0); 4483 } 4484 PSEG_FIELD_OFFSET => { 4485 assert_eq!(data.len(), PSEG_FIELD_SIZE); 4486 data.copy_from_slice(&(self.selected_segment as u32).to_le_bytes()); 4487 } 4488 _ => error!( 4489 "Accessing unknown location at base 0x{:x}, offset 0x{:x}", 4490 base, offset 4491 ), 4492 } 4493 4494 debug!( 4495 "PCI_HP_REG_R: base 0x{:x}, offset 0x{:x}, data {:?}", 4496 base, offset, data 4497 ) 4498 } 4499 4500 fn write(&mut self, base: u64, offset: u64, data: &[u8]) -> Option<Arc<std::sync::Barrier>> { 4501 match offset { 4502 B0EJ_FIELD_OFFSET => { 4503 assert!(data.len() == B0EJ_FIELD_SIZE); 4504 let mut data_array: [u8; 4] = [0, 0, 0, 0]; 4505 data_array.copy_from_slice(data); 4506 let mut slot_bitmap = u32::from_le_bytes(data_array); 4507 4508 while slot_bitmap > 0 { 4509 let slot_id = slot_bitmap.trailing_zeros(); 4510 if let Err(e) = self.eject_device(self.selected_segment as u16, slot_id as u8) { 4511 error!("Failed ejecting device {}: {:?}", slot_id, e); 4512 } 4513 slot_bitmap &= !(1 << slot_id); 4514 } 4515 } 4516 PSEG_FIELD_OFFSET => { 4517 assert_eq!(data.len(), PSEG_FIELD_SIZE); 4518 let mut data_array: [u8; 4] = [0, 0, 0, 0]; 4519 data_array.copy_from_slice(data); 4520 let selected_segment = u32::from_le_bytes(data_array) as usize; 4521 if selected_segment >= self.pci_segments.len() { 4522 error!( 4523 "Segment selection out of range: {} >= {}", 4524 selected_segment, 4525 self.pci_segments.len() 4526 ); 4527 return None; 4528 } 4529 self.selected_segment = selected_segment; 4530 } 4531 _ => error!( 4532 "Accessing unknown location at base 0x{:x}, offset 0x{:x}", 4533 base, offset 4534 ), 4535 } 4536 4537 debug!( 4538 "PCI_HP_REG_W: base 0x{:x}, offset 0x{:x}, data {:?}", 4539 base, offset, data 4540 ); 4541 4542 None 4543 } 4544 } 4545 4546 impl Drop for DeviceManager { 4547 fn drop(&mut self) { 4548 for handle in self.virtio_devices.drain(..) { 4549 handle.virtio_device.lock().unwrap().shutdown(); 4550 } 4551 } 4552 } 4553