1 // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 // 3 // Portions Copyright 2017 The Chromium OS Authors. All rights reserved. 4 // Use of this source code is governed by a BSD-style license that can be 5 // found in the LICENSE-BSD-3-Clause file. 6 // 7 // Copyright © 2019 Intel Corporation 8 // 9 // SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause 10 // 11 12 use crate::config::{ 13 ConsoleOutputMode, DeviceConfig, DiskConfig, FsConfig, NetConfig, PmemConfig, UserDeviceConfig, 14 VdpaConfig, VhostMode, VmConfig, VsockConfig, 15 }; 16 use crate::device_tree::{DeviceNode, DeviceTree}; 17 #[cfg(feature = "kvm")] 18 use crate::interrupt::kvm::KvmMsiInterruptManager as MsiInterruptManager; 19 #[cfg(feature = "mshv")] 20 use crate::interrupt::mshv::MshvMsiInterruptManager as MsiInterruptManager; 21 use crate::interrupt::LegacyUserspaceInterruptManager; 22 use crate::memory_manager::MEMORY_MANAGER_ACPI_SIZE; 23 use crate::memory_manager::{Error as MemoryManagerError, MemoryManager}; 24 use crate::pci_segment::PciSegment; 25 use crate::seccomp_filters::{get_seccomp_filter, Thread}; 26 use crate::serial_manager::{Error as SerialManagerError, SerialManager}; 27 use crate::sigwinch_listener::start_sigwinch_listener; 28 use crate::GuestRegionMmap; 29 use crate::PciDeviceInfo; 30 use crate::{device_node, DEVICE_MANAGER_SNAPSHOT_ID}; 31 use acpi_tables::{aml, aml::Aml}; 32 use anyhow::anyhow; 33 #[cfg(target_arch = "aarch64")] 34 use arch::aarch64::gic::gicv3_its::kvm::KvmGicV3Its; 35 use arch::layout; 36 #[cfg(target_arch = "x86_64")] 37 use arch::layout::{APIC_START, IOAPIC_SIZE, IOAPIC_START}; 38 use arch::NumaNodes; 39 #[cfg(target_arch = "aarch64")] 40 use arch::{DeviceType, MmioDeviceInfo}; 41 use block_util::{ 42 async_io::DiskFile, block_io_uring_is_supported, detect_image_type, 43 fixed_vhd_async::FixedVhdDiskAsync, fixed_vhd_sync::FixedVhdDiskSync, qcow_sync::QcowDiskSync, 44 raw_async::RawFileDisk, raw_sync::RawFileDiskSync, vhdx_sync::VhdxDiskSync, ImageType, 45 }; 46 #[cfg(target_arch = "aarch64")] 47 use devices::gic; 48 #[cfg(target_arch = "x86_64")] 49 use devices::ioapic; 50 #[cfg(target_arch = "aarch64")] 51 use devices::legacy::Pl011; 52 #[cfg(target_arch = "x86_64")] 53 use devices::legacy::Serial; 54 use devices::{ 55 interrupt_controller, interrupt_controller::InterruptController, AcpiNotificationFlags, 56 }; 57 #[cfg(feature = "kvm")] 58 use hypervisor::kvm_ioctls::*; 59 use hypervisor::DeviceFd; 60 #[cfg(feature = "mshv")] 61 use hypervisor::IoEventAddress; 62 use libc::{ 63 cfmakeraw, isatty, tcgetattr, tcsetattr, termios, MAP_NORESERVE, MAP_PRIVATE, MAP_SHARED, 64 O_TMPFILE, PROT_READ, PROT_WRITE, TCSANOW, 65 }; 66 #[cfg(target_arch = "x86_64")] 67 use pci::PciConfigIo; 68 use pci::{ 69 DeviceRelocation, PciBarRegionType, PciBdf, PciDevice, VfioPciDevice, VfioUserDmaMapping, 70 VfioUserPciDevice, VfioUserPciDeviceError, 71 }; 72 use seccompiler::SeccompAction; 73 use std::collections::HashMap; 74 use std::convert::TryInto; 75 use std::fs::{read_link, File, OpenOptions}; 76 use std::io::{self, stdout, Seek, SeekFrom}; 77 use std::mem::zeroed; 78 use std::num::Wrapping; 79 use std::os::unix::fs::OpenOptionsExt; 80 use std::os::unix::io::{AsRawFd, FromRawFd, RawFd}; 81 use std::path::PathBuf; 82 use std::result; 83 use std::sync::{Arc, Mutex}; 84 use vfio_ioctls::{VfioContainer, VfioDevice}; 85 use virtio_devices::transport::VirtioPciDevice; 86 use virtio_devices::transport::VirtioTransport; 87 use virtio_devices::vhost_user::VhostUserConfig; 88 use virtio_devices::{AccessPlatformMapping, VdpaDmaMapping, VirtioMemMappingSource}; 89 use virtio_devices::{Endpoint, IommuMapping}; 90 use virtio_devices::{VirtioSharedMemory, VirtioSharedMemoryList}; 91 use vm_allocator::{AddressAllocator, SystemAllocator}; 92 use vm_device::dma_mapping::vfio::VfioDmaMapping; 93 use vm_device::dma_mapping::ExternalDmaMapping; 94 use vm_device::interrupt::{ 95 InterruptIndex, InterruptManager, LegacyIrqGroupConfig, MsiIrqGroupConfig, 96 }; 97 use vm_device::{Bus, BusDevice, Resource}; 98 use vm_memory::guest_memory::FileOffset; 99 use vm_memory::GuestMemoryRegion; 100 use vm_memory::{Address, GuestAddress, GuestUsize, MmapRegion}; 101 #[cfg(target_arch = "x86_64")] 102 use vm_memory::{GuestAddressSpace, GuestMemory}; 103 use vm_migration::{ 104 protocol::MemoryRangeTable, Migratable, MigratableError, Pausable, Snapshot, 105 SnapshotDataSection, Snapshottable, Transportable, 106 }; 107 use vm_virtio::AccessPlatform; 108 use vm_virtio::VirtioDeviceType; 109 use vmm_sys_util::eventfd::EventFd; 110 111 #[cfg(target_arch = "aarch64")] 112 const MMIO_LEN: u64 = 0x1000; 113 114 const VFIO_DEVICE_NAME_PREFIX: &str = "_vfio"; 115 116 const VFIO_USER_DEVICE_NAME_PREFIX: &str = "_vfio_user"; 117 118 #[cfg(target_arch = "x86_64")] 119 const IOAPIC_DEVICE_NAME: &str = "_ioapic"; 120 121 const SERIAL_DEVICE_NAME_PREFIX: &str = "_serial"; 122 #[cfg(target_arch = "aarch64")] 123 const GPIO_DEVICE_NAME_PREFIX: &str = "_gpio"; 124 125 const CONSOLE_DEVICE_NAME: &str = "_console"; 126 const DISK_DEVICE_NAME_PREFIX: &str = "_disk"; 127 const FS_DEVICE_NAME_PREFIX: &str = "_fs"; 128 const BALLOON_DEVICE_NAME: &str = "_balloon"; 129 const NET_DEVICE_NAME_PREFIX: &str = "_net"; 130 const PMEM_DEVICE_NAME_PREFIX: &str = "_pmem"; 131 const RNG_DEVICE_NAME: &str = "_rng"; 132 const VDPA_DEVICE_NAME_PREFIX: &str = "_vdpa"; 133 const VSOCK_DEVICE_NAME_PREFIX: &str = "_vsock"; 134 const WATCHDOG_DEVICE_NAME: &str = "_watchdog"; 135 136 const IOMMU_DEVICE_NAME: &str = "_iommu"; 137 138 const VIRTIO_PCI_DEVICE_NAME_PREFIX: &str = "_virtio-pci"; 139 140 /// Errors associated with device manager 141 #[derive(Debug)] 142 pub enum DeviceManagerError { 143 /// Cannot create EventFd. 144 EventFd(io::Error), 145 146 /// Cannot open disk path 147 Disk(io::Error), 148 149 /// Cannot create vhost-user-net device 150 CreateVhostUserNet(virtio_devices::vhost_user::Error), 151 152 /// Cannot create virtio-blk device 153 CreateVirtioBlock(io::Error), 154 155 /// Cannot create virtio-net device 156 CreateVirtioNet(virtio_devices::net::Error), 157 158 /// Cannot create virtio-console device 159 CreateVirtioConsole(io::Error), 160 161 /// Cannot create virtio-rng device 162 CreateVirtioRng(io::Error), 163 164 /// Cannot create virtio-fs device 165 CreateVirtioFs(virtio_devices::vhost_user::Error), 166 167 /// Virtio-fs device was created without a socket. 168 NoVirtioFsSock, 169 170 /// Cannot create vhost-user-blk device 171 CreateVhostUserBlk(virtio_devices::vhost_user::Error), 172 173 /// Cannot create virtio-pmem device 174 CreateVirtioPmem(io::Error), 175 176 /// Cannot create vDPA device 177 CreateVdpa(virtio_devices::vdpa::Error), 178 179 /// Cannot create virtio-vsock device 180 CreateVirtioVsock(io::Error), 181 182 /// Failed to convert Path to &str for the vDPA device. 183 CreateVdpaConvertPath, 184 185 /// Failed to convert Path to &str for the virtio-vsock device. 186 CreateVsockConvertPath, 187 188 /// Cannot create virtio-vsock backend 189 CreateVsockBackend(virtio_devices::vsock::VsockUnixError), 190 191 /// Cannot create virtio-iommu device 192 CreateVirtioIommu(io::Error), 193 194 /// Cannot create virtio-balloon device 195 CreateVirtioBalloon(io::Error), 196 197 /// Cannot create virtio-watchdog device 198 CreateVirtioWatchdog(io::Error), 199 200 /// Failed to parse disk image format 201 DetectImageType(io::Error), 202 203 /// Cannot open qcow disk path 204 QcowDeviceCreate(qcow::Error), 205 206 /// Cannot create serial manager 207 CreateSerialManager(SerialManagerError), 208 209 /// Cannot spawn the serial manager thread 210 SpawnSerialManager(SerialManagerError), 211 212 /// Cannot open tap interface 213 OpenTap(net_util::TapError), 214 215 /// Cannot allocate IRQ. 216 AllocateIrq, 217 218 /// Cannot configure the IRQ. 219 Irq(vmm_sys_util::errno::Error), 220 221 /// Cannot allocate PCI BARs 222 AllocateBars(pci::PciDeviceError), 223 224 /// Could not free the BARs associated with a PCI device. 225 FreePciBars(pci::PciDeviceError), 226 227 /// Cannot register ioevent. 228 RegisterIoevent(anyhow::Error), 229 230 /// Cannot unregister ioevent. 231 UnRegisterIoevent(anyhow::Error), 232 233 /// Cannot create virtio device 234 VirtioDevice(vmm_sys_util::errno::Error), 235 236 /// Cannot add PCI device 237 AddPciDevice(pci::PciRootError), 238 239 /// Cannot open persistent memory file 240 PmemFileOpen(io::Error), 241 242 /// Cannot set persistent memory file size 243 PmemFileSetLen(io::Error), 244 245 /// Cannot find a memory range for persistent memory 246 PmemRangeAllocation, 247 248 /// Cannot find a memory range for virtio-fs 249 FsRangeAllocation, 250 251 /// Error creating serial output file 252 SerialOutputFileOpen(io::Error), 253 254 /// Error creating console output file 255 ConsoleOutputFileOpen(io::Error), 256 257 /// Error creating serial pty 258 SerialPtyOpen(io::Error), 259 260 /// Error creating console pty 261 ConsolePtyOpen(io::Error), 262 263 /// Error setting pty raw mode 264 SetPtyRaw(vmm_sys_util::errno::Error), 265 266 /// Error getting pty peer 267 GetPtyPeer(vmm_sys_util::errno::Error), 268 269 /// Cannot create a VFIO device 270 VfioCreate(vfio_ioctls::VfioError), 271 272 /// Cannot create a VFIO PCI device 273 VfioPciCreate(pci::VfioPciError), 274 275 /// Failed to map VFIO MMIO region. 276 VfioMapRegion(pci::VfioPciError), 277 278 /// Failed to DMA map VFIO device. 279 VfioDmaMap(vfio_ioctls::VfioError), 280 281 /// Failed to DMA unmap VFIO device. 282 VfioDmaUnmap(pci::VfioPciError), 283 284 /// Failed to create the passthrough device. 285 CreatePassthroughDevice(anyhow::Error), 286 287 /// Failed to memory map. 288 Mmap(io::Error), 289 290 /// Cannot add legacy device to Bus. 291 BusError(vm_device::BusError), 292 293 /// Failed to allocate IO port 294 AllocateIoPort, 295 296 /// Failed to allocate MMIO address 297 AllocateMmioAddress, 298 299 /// Failed to make hotplug notification 300 HotPlugNotification(io::Error), 301 302 /// Error from a memory manager operation 303 MemoryManager(MemoryManagerError), 304 305 /// Failed to create new interrupt source group. 306 CreateInterruptGroup(io::Error), 307 308 /// Failed to update interrupt source group. 309 UpdateInterruptGroup(io::Error), 310 311 /// Failed to create interrupt controller. 312 CreateInterruptController(interrupt_controller::Error), 313 314 /// Failed to create a new MmapRegion instance. 315 NewMmapRegion(vm_memory::mmap::MmapRegionError), 316 317 /// Failed to clone a File. 318 CloneFile(io::Error), 319 320 /// Failed to create socket file 321 CreateSocketFile(io::Error), 322 323 /// Failed to spawn the network backend 324 SpawnNetBackend(io::Error), 325 326 /// Failed to spawn the block backend 327 SpawnBlockBackend(io::Error), 328 329 /// Missing PCI bus. 330 NoPciBus, 331 332 /// Could not find an available device name. 333 NoAvailableDeviceName, 334 335 /// Missing PCI device. 336 MissingPciDevice, 337 338 /// Failed to remove a PCI device from the PCI bus. 339 RemoveDeviceFromPciBus(pci::PciRootError), 340 341 /// Failed to remove a bus device from the IO bus. 342 RemoveDeviceFromIoBus(vm_device::BusError), 343 344 /// Failed to remove a bus device from the MMIO bus. 345 RemoveDeviceFromMmioBus(vm_device::BusError), 346 347 /// Failed to find the device corresponding to a specific PCI b/d/f. 348 UnknownPciBdf(u32), 349 350 /// Not allowed to remove this type of device from the VM. 351 RemovalNotAllowed(vm_virtio::VirtioDeviceType), 352 353 /// Failed to find device corresponding to the given identifier. 354 UnknownDeviceId(String), 355 356 /// Failed to find an available PCI device ID. 357 NextPciDeviceId(pci::PciRootError), 358 359 /// Could not reserve the PCI device ID. 360 GetPciDeviceId(pci::PciRootError), 361 362 /// Could not give the PCI device ID back. 363 PutPciDeviceId(pci::PciRootError), 364 365 /// Incorrect device ID as it is already used by another device. 366 DeviceIdAlreadyInUse, 367 368 /// No disk path was specified when one was expected 369 NoDiskPath, 370 371 /// Failed to update guest memory for virtio device. 372 UpdateMemoryForVirtioDevice(virtio_devices::Error), 373 374 /// Cannot create virtio-mem device 375 CreateVirtioMem(io::Error), 376 377 /// Cannot generate a ResizeSender from the Resize object. 378 CreateResizeSender(virtio_devices::mem::Error), 379 380 /// Cannot find a memory range for virtio-mem memory 381 VirtioMemRangeAllocation, 382 383 /// Failed to update guest memory for VFIO PCI device. 384 UpdateMemoryForVfioPciDevice(vfio_ioctls::VfioError), 385 386 /// Trying to use a directory for pmem but no size specified 387 PmemWithDirectorySizeMissing, 388 389 /// Trying to use a size that is not multiple of 2MiB 390 PmemSizeNotAligned, 391 392 /// Could not find the node in the device tree. 393 MissingNode, 394 395 /// Resource was already found. 396 ResourceAlreadyExists, 397 398 /// Expected resources for virtio-pci could not be found. 399 MissingVirtioPciResources, 400 401 /// Expected resources for virtio-pmem could not be found. 402 MissingVirtioPmemResources, 403 404 /// Missing PCI b/d/f from the DeviceNode. 405 MissingDeviceNodePciBdf, 406 407 /// No support for device passthrough 408 NoDevicePassthroughSupport, 409 410 /// Failed to resize virtio-balloon 411 VirtioBalloonResize(virtio_devices::balloon::Error), 412 413 /// Missing virtio-balloon, can't proceed as expected. 414 MissingVirtioBalloon, 415 416 /// Missing virtual IOMMU device 417 MissingVirtualIommu, 418 419 /// Failed to do power button notification 420 PowerButtonNotification(io::Error), 421 422 /// Failed to do AArch64 GPIO power button notification 423 #[cfg(target_arch = "aarch64")] 424 AArch64PowerButtonNotification(devices::legacy::GpioDeviceError), 425 426 /// Failed to set O_DIRECT flag to file descriptor 427 SetDirectIo, 428 429 /// Failed to create FixedVhdDiskAsync 430 CreateFixedVhdDiskAsync(io::Error), 431 432 /// Failed to create FixedVhdDiskSync 433 CreateFixedVhdDiskSync(io::Error), 434 435 /// Failed to create QcowDiskSync 436 CreateQcowDiskSync(qcow::Error), 437 438 /// Failed to create FixedVhdxDiskSync 439 CreateFixedVhdxDiskSync(vhdx::vhdx::VhdxError), 440 441 /// Failed to add DMA mapping handler to virtio-mem device. 442 AddDmaMappingHandlerVirtioMem(virtio_devices::mem::Error), 443 444 /// Failed to remove DMA mapping handler from virtio-mem device. 445 RemoveDmaMappingHandlerVirtioMem(virtio_devices::mem::Error), 446 447 /// Failed to create vfio-user client 448 VfioUserCreateClient(vfio_user::Error), 449 450 /// Failed to create VFIO user device 451 VfioUserCreate(VfioUserPciDeviceError), 452 453 /// Failed to map region from VFIO user device into guest 454 VfioUserMapRegion(VfioUserPciDeviceError), 455 456 /// Failed to DMA map VFIO user device. 457 VfioUserDmaMap(VfioUserPciDeviceError), 458 459 /// Failed to DMA unmap VFIO user device. 460 VfioUserDmaUnmap(VfioUserPciDeviceError), 461 462 /// Failed to update memory mappings for VFIO user device 463 UpdateMemoryForVfioUserPciDevice(VfioUserPciDeviceError), 464 465 /// Cannot duplicate file descriptor 466 DupFd(vmm_sys_util::errno::Error), 467 468 /// Failed to DMA map virtio device. 469 VirtioDmaMap(std::io::Error), 470 471 /// Failed to DMA unmap virtio device. 472 VirtioDmaUnmap(std::io::Error), 473 474 /// Cannot hotplug device behind vIOMMU 475 InvalidIommuHotplug, 476 } 477 pub type DeviceManagerResult<T> = result::Result<T, DeviceManagerError>; 478 479 const DEVICE_MANAGER_ACPI_SIZE: usize = 0x10; 480 481 const TIOCSPTLCK: libc::c_int = 0x4004_5431; 482 const TIOCGTPEER: libc::c_int = 0x5441; 483 484 pub fn create_pty(non_blocking: bool) -> io::Result<(File, File, PathBuf)> { 485 // Try to use /dev/pts/ptmx first then fall back to /dev/ptmx 486 // This is done to try and use the devpts filesystem that 487 // could be available for use in the process's namespace first. 488 // Ideally these are all the same file though but different 489 // kernels could have things setup differently. 490 // See https://www.kernel.org/doc/Documentation/filesystems/devpts.txt 491 // for further details. 492 493 let custom_flags = libc::O_NOCTTY | if non_blocking { libc::O_NONBLOCK } else { 0 }; 494 let main = match OpenOptions::new() 495 .read(true) 496 .write(true) 497 .custom_flags(custom_flags) 498 .open("/dev/pts/ptmx") 499 { 500 Ok(f) => f, 501 _ => OpenOptions::new() 502 .read(true) 503 .write(true) 504 .custom_flags(custom_flags) 505 .open("/dev/ptmx")?, 506 }; 507 let mut unlock: libc::c_ulong = 0; 508 // SAFETY: FFI call into libc, trivially safe 509 unsafe { 510 libc::ioctl( 511 main.as_raw_fd(), 512 TIOCSPTLCK.try_into().unwrap(), 513 &mut unlock, 514 ) 515 }; 516 517 // SAFETY: FFI call into libc, trivally safe 518 let sub_fd = unsafe { 519 libc::ioctl( 520 main.as_raw_fd(), 521 TIOCGTPEER.try_into().unwrap(), 522 libc::O_NOCTTY | libc::O_RDWR, 523 ) 524 }; 525 if sub_fd == -1 { 526 return vmm_sys_util::errno::errno_result().map_err(|e| e.into()); 527 } 528 529 let proc_path = PathBuf::from(format!("/proc/self/fd/{}", sub_fd)); 530 let path = read_link(proc_path)?; 531 532 // SAFETY: sub_fd is checked to be valid before being wrapped in File 533 Ok((main, unsafe { File::from_raw_fd(sub_fd) }, path)) 534 } 535 536 #[derive(Default)] 537 pub struct Console { 538 console_resizer: Option<Arc<virtio_devices::ConsoleResizer>>, 539 } 540 541 impl Console { 542 pub fn update_console_size(&self) { 543 if let Some(resizer) = self.console_resizer.as_ref() { 544 resizer.update_console_size() 545 } 546 } 547 } 548 549 pub(crate) struct AddressManager { 550 pub(crate) allocator: Arc<Mutex<SystemAllocator>>, 551 #[cfg(target_arch = "x86_64")] 552 pub(crate) io_bus: Arc<Bus>, 553 pub(crate) mmio_bus: Arc<Bus>, 554 vm: Arc<dyn hypervisor::Vm>, 555 device_tree: Arc<Mutex<DeviceTree>>, 556 pci_mmio_allocators: Vec<Arc<Mutex<AddressAllocator>>>, 557 } 558 559 impl DeviceRelocation for AddressManager { 560 fn move_bar( 561 &self, 562 old_base: u64, 563 new_base: u64, 564 len: u64, 565 pci_dev: &mut dyn PciDevice, 566 region_type: PciBarRegionType, 567 ) -> std::result::Result<(), std::io::Error> { 568 match region_type { 569 PciBarRegionType::IoRegion => { 570 #[cfg(target_arch = "x86_64")] 571 { 572 // Update system allocator 573 self.allocator 574 .lock() 575 .unwrap() 576 .free_io_addresses(GuestAddress(old_base), len as GuestUsize); 577 578 self.allocator 579 .lock() 580 .unwrap() 581 .allocate_io_addresses( 582 Some(GuestAddress(new_base)), 583 len as GuestUsize, 584 None, 585 ) 586 .ok_or_else(|| { 587 io::Error::new(io::ErrorKind::Other, "failed allocating new IO range") 588 })?; 589 590 // Update PIO bus 591 self.io_bus 592 .update_range(old_base, len, new_base, len) 593 .map_err(|e| io::Error::new(io::ErrorKind::Other, e))?; 594 } 595 #[cfg(target_arch = "aarch64")] 596 error!("I/O region is not supported"); 597 } 598 PciBarRegionType::Memory32BitRegion | PciBarRegionType::Memory64BitRegion => { 599 // Update system allocator 600 if region_type == PciBarRegionType::Memory32BitRegion { 601 self.allocator 602 .lock() 603 .unwrap() 604 .free_mmio_hole_addresses(GuestAddress(old_base), len as GuestUsize); 605 606 self.allocator 607 .lock() 608 .unwrap() 609 .allocate_mmio_hole_addresses( 610 Some(GuestAddress(new_base)), 611 len as GuestUsize, 612 Some(len), 613 ) 614 .ok_or_else(|| { 615 io::Error::new( 616 io::ErrorKind::Other, 617 "failed allocating new 32 bits MMIO range", 618 ) 619 })?; 620 } else { 621 // Find the specific allocator that this BAR was allocated from and use it for new one 622 for allocator in &self.pci_mmio_allocators { 623 let allocator_base = allocator.lock().unwrap().base(); 624 let allocator_end = allocator.lock().unwrap().end(); 625 626 if old_base >= allocator_base.0 && old_base <= allocator_end.0 { 627 allocator 628 .lock() 629 .unwrap() 630 .free(GuestAddress(old_base), len as GuestUsize); 631 632 allocator 633 .lock() 634 .unwrap() 635 .allocate( 636 Some(GuestAddress(new_base)), 637 len as GuestUsize, 638 Some(len), 639 ) 640 .ok_or_else(|| { 641 io::Error::new( 642 io::ErrorKind::Other, 643 "failed allocating new 64 bits MMIO range", 644 ) 645 })?; 646 647 break; 648 } 649 } 650 } 651 652 // Update MMIO bus 653 self.mmio_bus 654 .update_range(old_base, len, new_base, len) 655 .map_err(|e| io::Error::new(io::ErrorKind::Other, e))?; 656 } 657 } 658 659 let any_dev = pci_dev.as_any(); 660 if let Some(virtio_pci_dev) = any_dev.downcast_ref::<VirtioPciDevice>() { 661 // Update the device_tree resources associated with the device 662 if let Some(node) = self 663 .device_tree 664 .lock() 665 .unwrap() 666 .get_mut(&virtio_pci_dev.id()) 667 { 668 let mut resource_updated = false; 669 for resource in node.resources.iter_mut() { 670 if let Resource::MmioAddressRange { base, .. } = resource { 671 if *base == old_base { 672 *base = new_base; 673 resource_updated = true; 674 break; 675 } 676 } 677 } 678 679 if !resource_updated { 680 return Err(io::Error::new( 681 io::ErrorKind::Other, 682 format!( 683 "Couldn't find a resource with base 0x{:x} for device {}", 684 old_base, 685 virtio_pci_dev.id() 686 ), 687 )); 688 } 689 } else { 690 return Err(io::Error::new( 691 io::ErrorKind::Other, 692 format!( 693 "Couldn't find device {} from device tree", 694 virtio_pci_dev.id() 695 ), 696 )); 697 } 698 699 let bar_addr = virtio_pci_dev.config_bar_addr(); 700 if bar_addr == new_base { 701 for (event, addr) in virtio_pci_dev.ioeventfds(old_base) { 702 let io_addr = IoEventAddress::Mmio(addr); 703 self.vm.unregister_ioevent(event, &io_addr).map_err(|e| { 704 io::Error::new( 705 io::ErrorKind::Other, 706 format!("failed to unregister ioevent: {:?}", e), 707 ) 708 })?; 709 } 710 for (event, addr) in virtio_pci_dev.ioeventfds(new_base) { 711 let io_addr = IoEventAddress::Mmio(addr); 712 self.vm 713 .register_ioevent(event, &io_addr, None) 714 .map_err(|e| { 715 io::Error::new( 716 io::ErrorKind::Other, 717 format!("failed to register ioevent: {:?}", e), 718 ) 719 })?; 720 } 721 } else { 722 let virtio_dev = virtio_pci_dev.virtio_device(); 723 let mut virtio_dev = virtio_dev.lock().unwrap(); 724 if let Some(mut shm_regions) = virtio_dev.get_shm_regions() { 725 if shm_regions.addr.raw_value() == old_base { 726 let mem_region = self.vm.make_user_memory_region( 727 shm_regions.mem_slot, 728 old_base, 729 shm_regions.len, 730 shm_regions.host_addr, 731 false, 732 false, 733 ); 734 735 self.vm.remove_user_memory_region(mem_region).map_err(|e| { 736 io::Error::new( 737 io::ErrorKind::Other, 738 format!("failed to remove user memory region: {:?}", e), 739 ) 740 })?; 741 742 // Create new mapping by inserting new region to KVM. 743 let mem_region = self.vm.make_user_memory_region( 744 shm_regions.mem_slot, 745 new_base, 746 shm_regions.len, 747 shm_regions.host_addr, 748 false, 749 false, 750 ); 751 752 self.vm.create_user_memory_region(mem_region).map_err(|e| { 753 io::Error::new( 754 io::ErrorKind::Other, 755 format!("failed to create user memory regions: {:?}", e), 756 ) 757 })?; 758 759 // Update shared memory regions to reflect the new mapping. 760 shm_regions.addr = GuestAddress(new_base); 761 virtio_dev.set_shm_regions(shm_regions).map_err(|e| { 762 io::Error::new( 763 io::ErrorKind::Other, 764 format!("failed to update shared memory regions: {:?}", e), 765 ) 766 })?; 767 } 768 } 769 } 770 } 771 772 pci_dev.move_bar(old_base, new_base) 773 } 774 } 775 776 #[derive(Serialize, Deserialize)] 777 struct DeviceManagerState { 778 device_tree: DeviceTree, 779 device_id_cnt: Wrapping<usize>, 780 } 781 782 #[derive(Debug)] 783 pub struct PtyPair { 784 pub main: File, 785 pub sub: File, 786 pub path: PathBuf, 787 } 788 789 impl Clone for PtyPair { 790 fn clone(&self) -> Self { 791 PtyPair { 792 main: self.main.try_clone().unwrap(), 793 sub: self.sub.try_clone().unwrap(), 794 path: self.path.clone(), 795 } 796 } 797 } 798 799 #[derive(Clone)] 800 pub enum PciDeviceHandle { 801 Vfio(Arc<Mutex<VfioPciDevice>>), 802 Virtio(Arc<Mutex<VirtioPciDevice>>), 803 VfioUser(Arc<Mutex<VfioUserPciDevice>>), 804 } 805 806 #[derive(Clone)] 807 struct MetaVirtioDevice { 808 virtio_device: Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 809 iommu: bool, 810 id: String, 811 pci_segment: u16, 812 dma_handler: Option<Arc<dyn ExternalDmaMapping>>, 813 } 814 815 pub struct DeviceManager { 816 // Manage address space related to devices 817 address_manager: Arc<AddressManager>, 818 819 // Console abstraction 820 console: Arc<Console>, 821 822 // console PTY 823 console_pty: Option<Arc<Mutex<PtyPair>>>, 824 825 // serial PTY 826 serial_pty: Option<Arc<Mutex<PtyPair>>>, 827 828 // Serial Manager 829 serial_manager: Option<Arc<SerialManager>>, 830 831 // pty foreground status, 832 console_resize_pipe: Option<Arc<File>>, 833 834 // Interrupt controller 835 #[cfg(target_arch = "x86_64")] 836 interrupt_controller: Option<Arc<Mutex<ioapic::Ioapic>>>, 837 #[cfg(target_arch = "aarch64")] 838 interrupt_controller: Option<Arc<Mutex<gic::Gic>>>, 839 840 // Things to be added to the commandline (i.e. for virtio-mmio) 841 cmdline_additions: Vec<String>, 842 843 // ACPI GED notification device 844 ged_notification_device: Option<Arc<Mutex<devices::AcpiGedDevice>>>, 845 846 // VM configuration 847 config: Arc<Mutex<VmConfig>>, 848 849 // Memory Manager 850 memory_manager: Arc<Mutex<MemoryManager>>, 851 852 // The virtio devices on the system 853 virtio_devices: Vec<MetaVirtioDevice>, 854 855 // List of bus devices 856 // Let the DeviceManager keep strong references to the BusDevice devices. 857 // This allows the IO and MMIO buses to be provided with Weak references, 858 // which prevents cyclic dependencies. 859 bus_devices: Vec<Arc<Mutex<dyn BusDevice>>>, 860 861 // Counter to keep track of the consumed device IDs. 862 device_id_cnt: Wrapping<usize>, 863 864 pci_segments: Vec<PciSegment>, 865 866 #[cfg_attr(target_arch = "aarch64", allow(dead_code))] 867 // MSI Interrupt Manager 868 msi_interrupt_manager: Arc<dyn InterruptManager<GroupConfig = MsiIrqGroupConfig>>, 869 870 #[cfg_attr(feature = "mshv", allow(dead_code))] 871 // Legacy Interrupt Manager 872 legacy_interrupt_manager: Option<Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>>, 873 874 // Passthrough device handle 875 passthrough_device: Option<Arc<dyn hypervisor::Device>>, 876 877 // VFIO container 878 // Only one container can be created, therefore it is stored as part of the 879 // DeviceManager to be reused. 880 vfio_container: Option<Arc<VfioContainer>>, 881 882 // Paravirtualized IOMMU 883 iommu_device: Option<Arc<Mutex<virtio_devices::Iommu>>>, 884 iommu_mapping: Option<Arc<IommuMapping>>, 885 886 // PCI information about devices attached to the paravirtualized IOMMU 887 // It contains the virtual IOMMU PCI BDF along with the list of PCI BDF 888 // representing the devices attached to the virtual IOMMU. This is useful 889 // information for filling the ACPI VIOT table. 890 iommu_attached_devices: Option<(PciBdf, Vec<PciBdf>)>, 891 892 // Tree of devices, representing the dependencies between devices. 893 // Useful for introspection, snapshot and restore. 894 device_tree: Arc<Mutex<DeviceTree>>, 895 896 // Exit event 897 exit_evt: EventFd, 898 reset_evt: EventFd, 899 900 #[cfg(target_arch = "aarch64")] 901 id_to_dev_info: HashMap<(DeviceType, String), MmioDeviceInfo>, 902 903 // seccomp action 904 seccomp_action: SeccompAction, 905 906 // List of guest NUMA nodes. 907 numa_nodes: NumaNodes, 908 909 // Possible handle to the virtio-balloon device 910 balloon: Option<Arc<Mutex<virtio_devices::Balloon>>>, 911 912 // Virtio Device activation EventFd to allow the VMM thread to trigger device 913 // activation and thus start the threads from the VMM thread 914 activate_evt: EventFd, 915 916 acpi_address: GuestAddress, 917 918 selected_segment: usize, 919 920 // Possible handle to the virtio-mem device 921 virtio_mem_devices: Vec<Arc<Mutex<virtio_devices::Mem>>>, 922 923 #[cfg(target_arch = "aarch64")] 924 // GPIO device for AArch64 925 gpio_device: Option<Arc<Mutex<devices::legacy::Gpio>>>, 926 927 // Flag to force setting the iommu on virtio devices 928 force_iommu: bool, 929 930 // Helps identify if the VM is currently being restored 931 restoring: bool, 932 933 // io_uring availability if detected 934 io_uring_supported: Option<bool>, 935 } 936 937 impl DeviceManager { 938 #[allow(clippy::too_many_arguments)] 939 pub fn new( 940 vm: Arc<dyn hypervisor::Vm>, 941 config: Arc<Mutex<VmConfig>>, 942 memory_manager: Arc<Mutex<MemoryManager>>, 943 exit_evt: &EventFd, 944 reset_evt: &EventFd, 945 seccomp_action: SeccompAction, 946 numa_nodes: NumaNodes, 947 activate_evt: &EventFd, 948 force_iommu: bool, 949 restoring: bool, 950 ) -> DeviceManagerResult<Arc<Mutex<Self>>> { 951 let device_tree = Arc::new(Mutex::new(DeviceTree::new())); 952 953 let num_pci_segments = 954 if let Some(platform_config) = config.lock().unwrap().platform.as_ref() { 955 platform_config.num_pci_segments 956 } else { 957 1 958 }; 959 960 let start_of_device_area = memory_manager.lock().unwrap().start_of_device_area().0; 961 let end_of_device_area = memory_manager.lock().unwrap().end_of_device_area().0; 962 963 // Start each PCI segment range on a 4GiB boundary 964 let pci_segment_size = (end_of_device_area - start_of_device_area + 1) 965 / ((4 << 30) * num_pci_segments as u64) 966 * (4 << 30); 967 968 let mut pci_mmio_allocators = vec![]; 969 for i in 0..num_pci_segments as u64 { 970 let mmio_start = start_of_device_area + i * pci_segment_size; 971 let allocator = Arc::new(Mutex::new( 972 AddressAllocator::new(GuestAddress(mmio_start), pci_segment_size).unwrap(), 973 )); 974 pci_mmio_allocators.push(allocator) 975 } 976 977 let address_manager = Arc::new(AddressManager { 978 allocator: memory_manager.lock().unwrap().allocator(), 979 #[cfg(target_arch = "x86_64")] 980 io_bus: Arc::new(Bus::new()), 981 mmio_bus: Arc::new(Bus::new()), 982 vm: vm.clone(), 983 device_tree: Arc::clone(&device_tree), 984 pci_mmio_allocators, 985 }); 986 987 // First we create the MSI interrupt manager, the legacy one is created 988 // later, after the IOAPIC device creation. 989 // The reason we create the MSI one first is because the IOAPIC needs it, 990 // and then the legacy interrupt manager needs an IOAPIC. So we're 991 // handling a linear dependency chain: 992 // msi_interrupt_manager <- IOAPIC <- legacy_interrupt_manager. 993 let msi_interrupt_manager: Arc<dyn InterruptManager<GroupConfig = MsiIrqGroupConfig>> = 994 Arc::new(MsiInterruptManager::new( 995 Arc::clone(&address_manager.allocator), 996 vm, 997 )); 998 999 let acpi_address = address_manager 1000 .allocator 1001 .lock() 1002 .unwrap() 1003 .allocate_platform_mmio_addresses(None, DEVICE_MANAGER_ACPI_SIZE as u64, None) 1004 .ok_or(DeviceManagerError::AllocateIoPort)?; 1005 1006 let mut pci_irq_slots = [0; 32]; 1007 PciSegment::reserve_legacy_interrupts_for_pci_devices( 1008 &address_manager, 1009 &mut pci_irq_slots, 1010 )?; 1011 1012 let mut pci_segments = vec![PciSegment::new_default_segment( 1013 &address_manager, 1014 Arc::clone(&address_manager.pci_mmio_allocators[0]), 1015 &pci_irq_slots, 1016 )?]; 1017 1018 for i in 1..num_pci_segments as usize { 1019 pci_segments.push(PciSegment::new( 1020 i as u16, 1021 &address_manager, 1022 Arc::clone(&address_manager.pci_mmio_allocators[i]), 1023 &pci_irq_slots, 1024 )?); 1025 } 1026 1027 let device_manager = DeviceManager { 1028 address_manager: Arc::clone(&address_manager), 1029 console: Arc::new(Console::default()), 1030 interrupt_controller: None, 1031 cmdline_additions: Vec::new(), 1032 1033 ged_notification_device: None, 1034 config, 1035 memory_manager, 1036 virtio_devices: Vec::new(), 1037 bus_devices: Vec::new(), 1038 device_id_cnt: Wrapping(0), 1039 msi_interrupt_manager, 1040 legacy_interrupt_manager: None, 1041 passthrough_device: None, 1042 vfio_container: None, 1043 iommu_device: None, 1044 iommu_mapping: None, 1045 iommu_attached_devices: None, 1046 pci_segments, 1047 device_tree, 1048 exit_evt: exit_evt.try_clone().map_err(DeviceManagerError::EventFd)?, 1049 reset_evt: reset_evt.try_clone().map_err(DeviceManagerError::EventFd)?, 1050 #[cfg(target_arch = "aarch64")] 1051 id_to_dev_info: HashMap::new(), 1052 seccomp_action, 1053 1054 numa_nodes, 1055 balloon: None, 1056 activate_evt: activate_evt 1057 .try_clone() 1058 .map_err(DeviceManagerError::EventFd)?, 1059 1060 acpi_address, 1061 1062 selected_segment: 0, 1063 serial_pty: None, 1064 serial_manager: None, 1065 console_pty: None, 1066 console_resize_pipe: None, 1067 virtio_mem_devices: Vec::new(), 1068 #[cfg(target_arch = "aarch64")] 1069 gpio_device: None, 1070 force_iommu, 1071 restoring, 1072 io_uring_supported: None, 1073 }; 1074 1075 let device_manager = Arc::new(Mutex::new(device_manager)); 1076 1077 address_manager 1078 .mmio_bus 1079 .insert( 1080 Arc::clone(&device_manager) as Arc<Mutex<dyn BusDevice>>, 1081 acpi_address.0, 1082 DEVICE_MANAGER_ACPI_SIZE as u64, 1083 ) 1084 .map_err(DeviceManagerError::BusError)?; 1085 1086 Ok(device_manager) 1087 } 1088 1089 pub fn serial_pty(&self) -> Option<PtyPair> { 1090 self.serial_pty 1091 .as_ref() 1092 .map(|pty| pty.lock().unwrap().clone()) 1093 } 1094 1095 pub fn console_pty(&self) -> Option<PtyPair> { 1096 self.console_pty 1097 .as_ref() 1098 .map(|pty| pty.lock().unwrap().clone()) 1099 } 1100 1101 pub fn console_resize_pipe(&self) -> Option<Arc<File>> { 1102 self.console_resize_pipe.as_ref().map(Arc::clone) 1103 } 1104 1105 pub fn create_devices( 1106 &mut self, 1107 serial_pty: Option<PtyPair>, 1108 console_pty: Option<PtyPair>, 1109 console_resize_pipe: Option<File>, 1110 ) -> DeviceManagerResult<()> { 1111 let mut virtio_devices: Vec<MetaVirtioDevice> = Vec::new(); 1112 1113 let interrupt_controller = self.add_interrupt_controller()?; 1114 1115 // Now we can create the legacy interrupt manager, which needs the freshly 1116 // formed IOAPIC device. 1117 let legacy_interrupt_manager: Arc< 1118 dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>, 1119 > = Arc::new(LegacyUserspaceInterruptManager::new(Arc::clone( 1120 &interrupt_controller, 1121 ))); 1122 1123 { 1124 if let Some(acpi_address) = self.memory_manager.lock().unwrap().acpi_address() { 1125 self.address_manager 1126 .mmio_bus 1127 .insert( 1128 Arc::clone(&self.memory_manager) as Arc<Mutex<dyn BusDevice>>, 1129 acpi_address.0, 1130 MEMORY_MANAGER_ACPI_SIZE as u64, 1131 ) 1132 .map_err(DeviceManagerError::BusError)?; 1133 } 1134 } 1135 1136 #[cfg(target_arch = "x86_64")] 1137 self.add_legacy_devices( 1138 self.reset_evt 1139 .try_clone() 1140 .map_err(DeviceManagerError::EventFd)?, 1141 )?; 1142 1143 #[cfg(target_arch = "aarch64")] 1144 self.add_legacy_devices(&legacy_interrupt_manager)?; 1145 1146 { 1147 self.ged_notification_device = self.add_acpi_devices( 1148 &legacy_interrupt_manager, 1149 self.reset_evt 1150 .try_clone() 1151 .map_err(DeviceManagerError::EventFd)?, 1152 self.exit_evt 1153 .try_clone() 1154 .map_err(DeviceManagerError::EventFd)?, 1155 )?; 1156 } 1157 1158 self.console = self.add_console_device( 1159 &legacy_interrupt_manager, 1160 &mut virtio_devices, 1161 serial_pty, 1162 console_pty, 1163 console_resize_pipe, 1164 )?; 1165 1166 self.legacy_interrupt_manager = Some(legacy_interrupt_manager); 1167 1168 virtio_devices.append(&mut self.make_virtio_devices()?); 1169 1170 self.add_pci_devices(virtio_devices.clone())?; 1171 1172 self.virtio_devices = virtio_devices; 1173 1174 Ok(()) 1175 } 1176 1177 fn state(&self) -> DeviceManagerState { 1178 DeviceManagerState { 1179 device_tree: self.device_tree.lock().unwrap().clone(), 1180 device_id_cnt: self.device_id_cnt, 1181 } 1182 } 1183 1184 fn set_state(&mut self, state: &DeviceManagerState) { 1185 *self.device_tree.lock().unwrap() = state.device_tree.clone(); 1186 self.device_id_cnt = state.device_id_cnt; 1187 } 1188 1189 fn get_msi_iova_space(&mut self) -> (u64, u64) { 1190 #[cfg(target_arch = "aarch64")] 1191 { 1192 let vcpus = self.config.lock().unwrap().cpus.boot_vcpus; 1193 let msi_start = arch::layout::GIC_V3_DIST_START.raw_value() 1194 - arch::layout::GIC_V3_REDIST_SIZE * (vcpus as u64) 1195 - arch::layout::GIC_V3_ITS_SIZE; 1196 let msi_end = msi_start + arch::layout::GIC_V3_ITS_SIZE - 1; 1197 (msi_start, msi_end) 1198 } 1199 #[cfg(target_arch = "x86_64")] 1200 (0xfee0_0000, 0xfeef_ffff) 1201 } 1202 1203 #[cfg(target_arch = "aarch64")] 1204 /// Gets the information of the devices registered up to some point in time. 1205 pub fn get_device_info(&self) -> &HashMap<(DeviceType, String), MmioDeviceInfo> { 1206 &self.id_to_dev_info 1207 } 1208 1209 #[allow(unused_variables)] 1210 fn add_pci_devices( 1211 &mut self, 1212 virtio_devices: Vec<MetaVirtioDevice>, 1213 ) -> DeviceManagerResult<()> { 1214 let iommu_id = String::from(IOMMU_DEVICE_NAME); 1215 1216 let iommu_device = if self.config.lock().unwrap().iommu { 1217 let (device, mapping) = virtio_devices::Iommu::new( 1218 iommu_id.clone(), 1219 self.seccomp_action.clone(), 1220 self.exit_evt 1221 .try_clone() 1222 .map_err(DeviceManagerError::EventFd)?, 1223 self.get_msi_iova_space(), 1224 ) 1225 .map_err(DeviceManagerError::CreateVirtioIommu)?; 1226 let device = Arc::new(Mutex::new(device)); 1227 self.iommu_device = Some(Arc::clone(&device)); 1228 self.iommu_mapping = Some(mapping); 1229 1230 // Fill the device tree with a new node. In case of restore, we 1231 // know there is nothing to do, so we can simply override the 1232 // existing entry. 1233 self.device_tree 1234 .lock() 1235 .unwrap() 1236 .insert(iommu_id.clone(), device_node!(iommu_id, device)); 1237 1238 Some(device) 1239 } else { 1240 None 1241 }; 1242 1243 let mut iommu_attached_devices = Vec::new(); 1244 { 1245 for handle in virtio_devices { 1246 let mapping: Option<Arc<IommuMapping>> = if handle.iommu { 1247 self.iommu_mapping.clone() 1248 } else { 1249 None 1250 }; 1251 1252 let dev_id = self.add_virtio_pci_device( 1253 handle.virtio_device, 1254 &mapping, 1255 handle.id, 1256 handle.pci_segment, 1257 handle.dma_handler, 1258 )?; 1259 1260 if handle.iommu { 1261 iommu_attached_devices.push(dev_id); 1262 } 1263 } 1264 1265 let mut vfio_iommu_device_ids = self.add_vfio_devices()?; 1266 iommu_attached_devices.append(&mut vfio_iommu_device_ids); 1267 1268 let mut vfio_user_iommu_device_ids = self.add_user_devices()?; 1269 iommu_attached_devices.append(&mut vfio_user_iommu_device_ids); 1270 1271 // Add all devices from forced iommu segments 1272 if let Some(platform_config) = self.config.lock().unwrap().platform.as_ref() { 1273 if let Some(iommu_segments) = platform_config.iommu_segments.as_ref() { 1274 for segment in iommu_segments { 1275 for device in 0..32 { 1276 let bdf = PciBdf::new(*segment, 0, device, 0); 1277 if !iommu_attached_devices.contains(&bdf) { 1278 iommu_attached_devices.push(bdf); 1279 } 1280 } 1281 } 1282 } 1283 } 1284 1285 if let Some(iommu_device) = iommu_device { 1286 let dev_id = self.add_virtio_pci_device(iommu_device, &None, iommu_id, 0, None)?; 1287 self.iommu_attached_devices = Some((dev_id, iommu_attached_devices)); 1288 } 1289 } 1290 1291 for segment in &self.pci_segments { 1292 #[cfg(target_arch = "x86_64")] 1293 if let Some(pci_config_io) = segment.pci_config_io.as_ref() { 1294 self.bus_devices 1295 .push(Arc::clone(pci_config_io) as Arc<Mutex<dyn BusDevice>>); 1296 } 1297 1298 self.bus_devices 1299 .push(Arc::clone(&segment.pci_config_mmio) as Arc<Mutex<dyn BusDevice>>); 1300 } 1301 1302 Ok(()) 1303 } 1304 1305 #[cfg(target_arch = "aarch64")] 1306 fn add_interrupt_controller( 1307 &mut self, 1308 ) -> DeviceManagerResult<Arc<Mutex<dyn InterruptController>>> { 1309 let interrupt_controller: Arc<Mutex<gic::Gic>> = Arc::new(Mutex::new( 1310 gic::Gic::new( 1311 self.config.lock().unwrap().cpus.boot_vcpus, 1312 Arc::clone(&self.msi_interrupt_manager), 1313 ) 1314 .map_err(DeviceManagerError::CreateInterruptController)?, 1315 )); 1316 1317 self.interrupt_controller = Some(interrupt_controller.clone()); 1318 1319 // Unlike x86_64, the "interrupt_controller" here for AArch64 is only 1320 // a `Gic` object that implements the `InterruptController` to provide 1321 // interrupt delivery service. This is not the real GIC device so that 1322 // we do not need to insert it to the device tree. 1323 1324 Ok(interrupt_controller) 1325 } 1326 1327 #[cfg(target_arch = "aarch64")] 1328 pub fn get_interrupt_controller(&mut self) -> Option<&Arc<Mutex<gic::Gic>>> { 1329 self.interrupt_controller.as_ref() 1330 } 1331 1332 #[cfg(target_arch = "x86_64")] 1333 fn add_interrupt_controller( 1334 &mut self, 1335 ) -> DeviceManagerResult<Arc<Mutex<dyn InterruptController>>> { 1336 let id = String::from(IOAPIC_DEVICE_NAME); 1337 1338 // Create IOAPIC 1339 let interrupt_controller = Arc::new(Mutex::new( 1340 ioapic::Ioapic::new( 1341 id.clone(), 1342 APIC_START, 1343 Arc::clone(&self.msi_interrupt_manager), 1344 ) 1345 .map_err(DeviceManagerError::CreateInterruptController)?, 1346 )); 1347 1348 self.interrupt_controller = Some(interrupt_controller.clone()); 1349 1350 self.address_manager 1351 .mmio_bus 1352 .insert(interrupt_controller.clone(), IOAPIC_START.0, IOAPIC_SIZE) 1353 .map_err(DeviceManagerError::BusError)?; 1354 1355 self.bus_devices 1356 .push(Arc::clone(&interrupt_controller) as Arc<Mutex<dyn BusDevice>>); 1357 1358 // Fill the device tree with a new node. In case of restore, we 1359 // know there is nothing to do, so we can simply override the 1360 // existing entry. 1361 self.device_tree 1362 .lock() 1363 .unwrap() 1364 .insert(id.clone(), device_node!(id, interrupt_controller)); 1365 1366 Ok(interrupt_controller) 1367 } 1368 1369 fn add_acpi_devices( 1370 &mut self, 1371 interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>, 1372 reset_evt: EventFd, 1373 exit_evt: EventFd, 1374 ) -> DeviceManagerResult<Option<Arc<Mutex<devices::AcpiGedDevice>>>> { 1375 let shutdown_device = Arc::new(Mutex::new(devices::AcpiShutdownDevice::new( 1376 exit_evt, reset_evt, 1377 ))); 1378 1379 self.bus_devices 1380 .push(Arc::clone(&shutdown_device) as Arc<Mutex<dyn BusDevice>>); 1381 1382 #[cfg(target_arch = "x86_64")] 1383 { 1384 self.address_manager 1385 .allocator 1386 .lock() 1387 .unwrap() 1388 .allocate_io_addresses(Some(GuestAddress(0x3c0)), 0x8, None) 1389 .ok_or(DeviceManagerError::AllocateIoPort)?; 1390 1391 self.address_manager 1392 .io_bus 1393 .insert(shutdown_device, 0x3c0, 0x4) 1394 .map_err(DeviceManagerError::BusError)?; 1395 } 1396 1397 let ged_irq = self 1398 .address_manager 1399 .allocator 1400 .lock() 1401 .unwrap() 1402 .allocate_irq() 1403 .unwrap(); 1404 let interrupt_group = interrupt_manager 1405 .create_group(LegacyIrqGroupConfig { 1406 irq: ged_irq as InterruptIndex, 1407 }) 1408 .map_err(DeviceManagerError::CreateInterruptGroup)?; 1409 let ged_address = self 1410 .address_manager 1411 .allocator 1412 .lock() 1413 .unwrap() 1414 .allocate_platform_mmio_addresses( 1415 None, 1416 devices::acpi::GED_DEVICE_ACPI_SIZE as u64, 1417 None, 1418 ) 1419 .ok_or(DeviceManagerError::AllocateMmioAddress)?; 1420 let ged_device = Arc::new(Mutex::new(devices::AcpiGedDevice::new( 1421 interrupt_group, 1422 ged_irq, 1423 ged_address, 1424 ))); 1425 self.address_manager 1426 .mmio_bus 1427 .insert( 1428 ged_device.clone(), 1429 ged_address.0, 1430 devices::acpi::GED_DEVICE_ACPI_SIZE as u64, 1431 ) 1432 .map_err(DeviceManagerError::BusError)?; 1433 self.bus_devices 1434 .push(Arc::clone(&ged_device) as Arc<Mutex<dyn BusDevice>>); 1435 1436 let pm_timer_device = Arc::new(Mutex::new(devices::AcpiPmTimerDevice::new())); 1437 1438 self.bus_devices 1439 .push(Arc::clone(&pm_timer_device) as Arc<Mutex<dyn BusDevice>>); 1440 1441 #[cfg(target_arch = "x86_64")] 1442 { 1443 self.address_manager 1444 .allocator 1445 .lock() 1446 .unwrap() 1447 .allocate_io_addresses(Some(GuestAddress(0xb008)), 0x4, None) 1448 .ok_or(DeviceManagerError::AllocateIoPort)?; 1449 1450 self.address_manager 1451 .io_bus 1452 .insert(pm_timer_device, 0xb008, 0x4) 1453 .map_err(DeviceManagerError::BusError)?; 1454 } 1455 1456 Ok(Some(ged_device)) 1457 } 1458 1459 #[cfg(target_arch = "x86_64")] 1460 fn add_legacy_devices(&mut self, reset_evt: EventFd) -> DeviceManagerResult<()> { 1461 // Add a shutdown device (i8042) 1462 let i8042 = Arc::new(Mutex::new(devices::legacy::I8042Device::new( 1463 reset_evt.try_clone().unwrap(), 1464 ))); 1465 1466 self.bus_devices 1467 .push(Arc::clone(&i8042) as Arc<Mutex<dyn BusDevice>>); 1468 1469 self.address_manager 1470 .io_bus 1471 .insert(i8042, 0x61, 0x4) 1472 .map_err(DeviceManagerError::BusError)?; 1473 { 1474 // Add a CMOS emulated device 1475 let mem_size = self 1476 .memory_manager 1477 .lock() 1478 .unwrap() 1479 .guest_memory() 1480 .memory() 1481 .last_addr() 1482 .0 1483 + 1; 1484 let mem_below_4g = std::cmp::min(arch::layout::MEM_32BIT_RESERVED_START.0, mem_size); 1485 let mem_above_4g = mem_size.saturating_sub(arch::layout::RAM_64BIT_START.0); 1486 1487 let cmos = Arc::new(Mutex::new(devices::legacy::Cmos::new( 1488 mem_below_4g, 1489 mem_above_4g, 1490 reset_evt, 1491 ))); 1492 1493 self.bus_devices 1494 .push(Arc::clone(&cmos) as Arc<Mutex<dyn BusDevice>>); 1495 1496 self.address_manager 1497 .io_bus 1498 .insert(cmos, 0x70, 0x2) 1499 .map_err(DeviceManagerError::BusError)?; 1500 } 1501 #[cfg(feature = "fwdebug")] 1502 { 1503 let fwdebug = Arc::new(Mutex::new(devices::legacy::FwDebugDevice::new())); 1504 1505 self.bus_devices 1506 .push(Arc::clone(&fwdebug) as Arc<Mutex<dyn BusDevice>>); 1507 1508 self.address_manager 1509 .io_bus 1510 .insert(fwdebug, 0x402, 0x1) 1511 .map_err(DeviceManagerError::BusError)?; 1512 } 1513 1514 Ok(()) 1515 } 1516 1517 #[cfg(target_arch = "aarch64")] 1518 fn add_legacy_devices( 1519 &mut self, 1520 interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>, 1521 ) -> DeviceManagerResult<()> { 1522 // Add a RTC device 1523 let rtc_irq = self 1524 .address_manager 1525 .allocator 1526 .lock() 1527 .unwrap() 1528 .allocate_irq() 1529 .unwrap(); 1530 1531 let interrupt_group = interrupt_manager 1532 .create_group(LegacyIrqGroupConfig { 1533 irq: rtc_irq as InterruptIndex, 1534 }) 1535 .map_err(DeviceManagerError::CreateInterruptGroup)?; 1536 1537 let rtc_device = Arc::new(Mutex::new(devices::legacy::Rtc::new(interrupt_group))); 1538 1539 self.bus_devices 1540 .push(Arc::clone(&rtc_device) as Arc<Mutex<dyn BusDevice>>); 1541 1542 let addr = arch::layout::LEGACY_RTC_MAPPED_IO_START; 1543 1544 self.address_manager 1545 .mmio_bus 1546 .insert(rtc_device, addr.0, MMIO_LEN) 1547 .map_err(DeviceManagerError::BusError)?; 1548 1549 self.id_to_dev_info.insert( 1550 (DeviceType::Rtc, "rtc".to_string()), 1551 MmioDeviceInfo { 1552 addr: addr.0, 1553 len: MMIO_LEN, 1554 irq: rtc_irq, 1555 }, 1556 ); 1557 1558 // Add a GPIO device 1559 let id = String::from(GPIO_DEVICE_NAME_PREFIX); 1560 let gpio_irq = self 1561 .address_manager 1562 .allocator 1563 .lock() 1564 .unwrap() 1565 .allocate_irq() 1566 .unwrap(); 1567 1568 let interrupt_group = interrupt_manager 1569 .create_group(LegacyIrqGroupConfig { 1570 irq: gpio_irq as InterruptIndex, 1571 }) 1572 .map_err(DeviceManagerError::CreateInterruptGroup)?; 1573 1574 let gpio_device = Arc::new(Mutex::new(devices::legacy::Gpio::new( 1575 id.clone(), 1576 interrupt_group, 1577 ))); 1578 1579 self.bus_devices 1580 .push(Arc::clone(&gpio_device) as Arc<Mutex<dyn BusDevice>>); 1581 1582 let addr = arch::layout::LEGACY_GPIO_MAPPED_IO_START; 1583 1584 self.address_manager 1585 .mmio_bus 1586 .insert(gpio_device.clone(), addr.0, MMIO_LEN) 1587 .map_err(DeviceManagerError::BusError)?; 1588 1589 self.gpio_device = Some(gpio_device.clone()); 1590 1591 self.id_to_dev_info.insert( 1592 (DeviceType::Gpio, "gpio".to_string()), 1593 MmioDeviceInfo { 1594 addr: addr.0, 1595 len: MMIO_LEN, 1596 irq: gpio_irq, 1597 }, 1598 ); 1599 1600 self.device_tree 1601 .lock() 1602 .unwrap() 1603 .insert(id.clone(), device_node!(id, gpio_device)); 1604 1605 Ok(()) 1606 } 1607 1608 #[cfg(target_arch = "x86_64")] 1609 fn add_serial_device( 1610 &mut self, 1611 interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>, 1612 serial_writer: Option<Box<dyn io::Write + Send>>, 1613 ) -> DeviceManagerResult<Arc<Mutex<Serial>>> { 1614 // Serial is tied to IRQ #4 1615 let serial_irq = 4; 1616 1617 let id = String::from(SERIAL_DEVICE_NAME_PREFIX); 1618 1619 let interrupt_group = interrupt_manager 1620 .create_group(LegacyIrqGroupConfig { 1621 irq: serial_irq as InterruptIndex, 1622 }) 1623 .map_err(DeviceManagerError::CreateInterruptGroup)?; 1624 1625 let serial = Arc::new(Mutex::new(Serial::new( 1626 id.clone(), 1627 interrupt_group, 1628 serial_writer, 1629 ))); 1630 1631 self.bus_devices 1632 .push(Arc::clone(&serial) as Arc<Mutex<dyn BusDevice>>); 1633 1634 self.address_manager 1635 .allocator 1636 .lock() 1637 .unwrap() 1638 .allocate_io_addresses(Some(GuestAddress(0x3f8)), 0x8, None) 1639 .ok_or(DeviceManagerError::AllocateIoPort)?; 1640 1641 self.address_manager 1642 .io_bus 1643 .insert(serial.clone(), 0x3f8, 0x8) 1644 .map_err(DeviceManagerError::BusError)?; 1645 1646 // Fill the device tree with a new node. In case of restore, we 1647 // know there is nothing to do, so we can simply override the 1648 // existing entry. 1649 self.device_tree 1650 .lock() 1651 .unwrap() 1652 .insert(id.clone(), device_node!(id, serial)); 1653 1654 Ok(serial) 1655 } 1656 1657 #[cfg(target_arch = "aarch64")] 1658 fn add_serial_device( 1659 &mut self, 1660 interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>, 1661 serial_writer: Option<Box<dyn io::Write + Send>>, 1662 ) -> DeviceManagerResult<Arc<Mutex<Pl011>>> { 1663 let id = String::from(SERIAL_DEVICE_NAME_PREFIX); 1664 1665 let serial_irq = self 1666 .address_manager 1667 .allocator 1668 .lock() 1669 .unwrap() 1670 .allocate_irq() 1671 .unwrap(); 1672 1673 let interrupt_group = interrupt_manager 1674 .create_group(LegacyIrqGroupConfig { 1675 irq: serial_irq as InterruptIndex, 1676 }) 1677 .map_err(DeviceManagerError::CreateInterruptGroup)?; 1678 1679 let serial = Arc::new(Mutex::new(devices::legacy::Pl011::new( 1680 id.clone(), 1681 interrupt_group, 1682 serial_writer, 1683 ))); 1684 1685 self.bus_devices 1686 .push(Arc::clone(&serial) as Arc<Mutex<dyn BusDevice>>); 1687 1688 let addr = arch::layout::LEGACY_SERIAL_MAPPED_IO_START; 1689 1690 self.address_manager 1691 .mmio_bus 1692 .insert(serial.clone(), addr.0, MMIO_LEN) 1693 .map_err(DeviceManagerError::BusError)?; 1694 1695 self.id_to_dev_info.insert( 1696 (DeviceType::Serial, DeviceType::Serial.to_string()), 1697 MmioDeviceInfo { 1698 addr: addr.0, 1699 len: MMIO_LEN, 1700 irq: serial_irq, 1701 }, 1702 ); 1703 1704 self.cmdline_additions 1705 .push(format!("earlycon=pl011,mmio,0x{:08x}", addr.0)); 1706 1707 // Fill the device tree with a new node. In case of restore, we 1708 // know there is nothing to do, so we can simply override the 1709 // existing entry. 1710 self.device_tree 1711 .lock() 1712 .unwrap() 1713 .insert(id.clone(), device_node!(id, serial)); 1714 1715 Ok(serial) 1716 } 1717 1718 fn modify_mode<F: FnOnce(&mut termios)>( 1719 &self, 1720 fd: RawFd, 1721 f: F, 1722 ) -> vmm_sys_util::errno::Result<()> { 1723 // SAFETY: safe because we check the return value of isatty. 1724 if unsafe { isatty(fd) } != 1 { 1725 return Ok(()); 1726 } 1727 1728 // SAFETY: The following pair are safe because termios gets totally overwritten by tcgetattr 1729 // and we check the return result. 1730 let mut termios: termios = unsafe { zeroed() }; 1731 let ret = unsafe { tcgetattr(fd, &mut termios as *mut _) }; 1732 if ret < 0 { 1733 return vmm_sys_util::errno::errno_result(); 1734 } 1735 f(&mut termios); 1736 // SAFETY: Safe because the syscall will only read the extent of termios and we check 1737 // the return result. 1738 let ret = unsafe { tcsetattr(fd, TCSANOW, &termios as *const _) }; 1739 if ret < 0 { 1740 return vmm_sys_util::errno::errno_result(); 1741 } 1742 1743 Ok(()) 1744 } 1745 1746 fn set_raw_mode(&self, f: &mut File) -> vmm_sys_util::errno::Result<()> { 1747 // SAFETY: FFI call. Variable t is guaranteed to be a valid termios from modify_mode. 1748 self.modify_mode(f.as_raw_fd(), |t| unsafe { cfmakeraw(t) }) 1749 } 1750 1751 fn listen_for_sigwinch_on_tty(&mut self, pty: &File) -> std::io::Result<()> { 1752 let seccomp_filter = 1753 get_seccomp_filter(&self.seccomp_action, Thread::PtyForeground).unwrap(); 1754 1755 match start_sigwinch_listener(seccomp_filter, pty) { 1756 Ok(pipe) => { 1757 self.console_resize_pipe = Some(Arc::new(pipe)); 1758 } 1759 Err(e) => { 1760 warn!("Ignoring error from setting up SIGWINCH listener: {}", e) 1761 } 1762 } 1763 1764 Ok(()) 1765 } 1766 1767 fn add_virtio_console_device( 1768 &mut self, 1769 virtio_devices: &mut Vec<MetaVirtioDevice>, 1770 console_pty: Option<PtyPair>, 1771 resize_pipe: Option<File>, 1772 ) -> DeviceManagerResult<Option<Arc<virtio_devices::ConsoleResizer>>> { 1773 let console_config = self.config.lock().unwrap().console.clone(); 1774 let endpoint = match console_config.mode { 1775 ConsoleOutputMode::File => { 1776 let file = File::create(console_config.file.as_ref().unwrap()) 1777 .map_err(DeviceManagerError::ConsoleOutputFileOpen)?; 1778 Endpoint::File(file) 1779 } 1780 ConsoleOutputMode::Pty => { 1781 if let Some(pty) = console_pty { 1782 self.config.lock().unwrap().console.file = Some(pty.path.clone()); 1783 let file = pty.main.try_clone().unwrap(); 1784 self.console_pty = Some(Arc::new(Mutex::new(pty))); 1785 self.console_resize_pipe = resize_pipe.map(Arc::new); 1786 Endpoint::FilePair(file.try_clone().unwrap(), file) 1787 } else { 1788 let (main, mut sub, path) = 1789 create_pty(false).map_err(DeviceManagerError::ConsolePtyOpen)?; 1790 self.set_raw_mode(&mut sub) 1791 .map_err(DeviceManagerError::SetPtyRaw)?; 1792 self.config.lock().unwrap().console.file = Some(path.clone()); 1793 let file = main.try_clone().unwrap(); 1794 assert!(resize_pipe.is_none()); 1795 self.listen_for_sigwinch_on_tty(&sub).unwrap(); 1796 self.console_pty = Some(Arc::new(Mutex::new(PtyPair { main, sub, path }))); 1797 Endpoint::FilePair(file.try_clone().unwrap(), file) 1798 } 1799 } 1800 ConsoleOutputMode::Tty => { 1801 // Duplicating the file descriptors like this is needed as otherwise 1802 // they will be closed on a reboot and the numbers reused 1803 1804 // SAFETY: FFI call to dup. Trivially safe. 1805 let stdout = unsafe { libc::dup(libc::STDOUT_FILENO) }; 1806 if stdout == -1 { 1807 return vmm_sys_util::errno::errno_result().map_err(DeviceManagerError::DupFd); 1808 } 1809 // SAFETY: stdout is valid and owned solely by us. 1810 let stdout = unsafe { File::from_raw_fd(stdout) }; 1811 1812 // If an interactive TTY then we can accept input 1813 // SAFETY: FFI call. Trivially safe. 1814 if unsafe { libc::isatty(libc::STDIN_FILENO) == 1 } { 1815 // SAFETY: FFI call to dup. Trivially safe. 1816 let stdin = unsafe { libc::dup(libc::STDIN_FILENO) }; 1817 if stdin == -1 { 1818 return vmm_sys_util::errno::errno_result() 1819 .map_err(DeviceManagerError::DupFd); 1820 } 1821 // SAFETY: stdin is valid and owned solely by us. 1822 let stdin = unsafe { File::from_raw_fd(stdin) }; 1823 1824 Endpoint::FilePair(stdout, stdin) 1825 } else { 1826 Endpoint::File(stdout) 1827 } 1828 } 1829 ConsoleOutputMode::Null => Endpoint::Null, 1830 ConsoleOutputMode::Off => return Ok(None), 1831 }; 1832 let id = String::from(CONSOLE_DEVICE_NAME); 1833 1834 let (virtio_console_device, console_resizer) = virtio_devices::Console::new( 1835 id.clone(), 1836 endpoint, 1837 self.console_resize_pipe 1838 .as_ref() 1839 .map(|p| p.try_clone().unwrap()), 1840 self.force_iommu | console_config.iommu, 1841 self.seccomp_action.clone(), 1842 self.exit_evt 1843 .try_clone() 1844 .map_err(DeviceManagerError::EventFd)?, 1845 ) 1846 .map_err(DeviceManagerError::CreateVirtioConsole)?; 1847 let virtio_console_device = Arc::new(Mutex::new(virtio_console_device)); 1848 virtio_devices.push(MetaVirtioDevice { 1849 virtio_device: Arc::clone(&virtio_console_device) 1850 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 1851 iommu: console_config.iommu, 1852 id: id.clone(), 1853 pci_segment: 0, 1854 dma_handler: None, 1855 }); 1856 1857 // Fill the device tree with a new node. In case of restore, we 1858 // know there is nothing to do, so we can simply override the 1859 // existing entry. 1860 self.device_tree 1861 .lock() 1862 .unwrap() 1863 .insert(id.clone(), device_node!(id, virtio_console_device)); 1864 1865 // Only provide a resizer (for SIGWINCH handling) if the console is attached to the TTY 1866 Ok(if matches!(console_config.mode, ConsoleOutputMode::Tty) { 1867 Some(console_resizer) 1868 } else { 1869 None 1870 }) 1871 } 1872 1873 fn add_console_device( 1874 &mut self, 1875 interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>, 1876 virtio_devices: &mut Vec<MetaVirtioDevice>, 1877 serial_pty: Option<PtyPair>, 1878 console_pty: Option<PtyPair>, 1879 console_resize_pipe: Option<File>, 1880 ) -> DeviceManagerResult<Arc<Console>> { 1881 let serial_config = self.config.lock().unwrap().serial.clone(); 1882 let serial_writer: Option<Box<dyn io::Write + Send>> = match serial_config.mode { 1883 ConsoleOutputMode::File => Some(Box::new( 1884 File::create(serial_config.file.as_ref().unwrap()) 1885 .map_err(DeviceManagerError::SerialOutputFileOpen)?, 1886 )), 1887 ConsoleOutputMode::Pty => { 1888 if let Some(pty) = serial_pty { 1889 self.config.lock().unwrap().serial.file = Some(pty.path.clone()); 1890 self.serial_pty = Some(Arc::new(Mutex::new(pty))); 1891 } else { 1892 let (main, mut sub, path) = 1893 create_pty(true).map_err(DeviceManagerError::SerialPtyOpen)?; 1894 self.set_raw_mode(&mut sub) 1895 .map_err(DeviceManagerError::SetPtyRaw)?; 1896 self.config.lock().unwrap().serial.file = Some(path.clone()); 1897 self.serial_pty = Some(Arc::new(Mutex::new(PtyPair { main, sub, path }))); 1898 } 1899 None 1900 } 1901 ConsoleOutputMode::Tty => Some(Box::new(stdout())), 1902 ConsoleOutputMode::Off | ConsoleOutputMode::Null => None, 1903 }; 1904 if serial_config.mode != ConsoleOutputMode::Off { 1905 let serial = self.add_serial_device(interrupt_manager, serial_writer)?; 1906 self.serial_manager = match serial_config.mode { 1907 ConsoleOutputMode::Pty | ConsoleOutputMode::Tty => { 1908 let serial_manager = 1909 SerialManager::new(serial, self.serial_pty.clone(), serial_config.mode) 1910 .map_err(DeviceManagerError::CreateSerialManager)?; 1911 if let Some(mut serial_manager) = serial_manager { 1912 serial_manager 1913 .start_thread( 1914 self.exit_evt 1915 .try_clone() 1916 .map_err(DeviceManagerError::EventFd)?, 1917 ) 1918 .map_err(DeviceManagerError::SpawnSerialManager)?; 1919 Some(Arc::new(serial_manager)) 1920 } else { 1921 None 1922 } 1923 } 1924 _ => None, 1925 }; 1926 } 1927 1928 let console_resizer = 1929 self.add_virtio_console_device(virtio_devices, console_pty, console_resize_pipe)?; 1930 1931 Ok(Arc::new(Console { console_resizer })) 1932 } 1933 1934 fn make_virtio_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 1935 let mut devices: Vec<MetaVirtioDevice> = Vec::new(); 1936 1937 // Create "standard" virtio devices (net/block/rng) 1938 devices.append(&mut self.make_virtio_block_devices()?); 1939 devices.append(&mut self.make_virtio_net_devices()?); 1940 devices.append(&mut self.make_virtio_rng_devices()?); 1941 1942 // Add virtio-fs if required 1943 devices.append(&mut self.make_virtio_fs_devices()?); 1944 1945 // Add virtio-pmem if required 1946 devices.append(&mut self.make_virtio_pmem_devices()?); 1947 1948 // Add virtio-vsock if required 1949 devices.append(&mut self.make_virtio_vsock_devices()?); 1950 1951 devices.append(&mut self.make_virtio_mem_devices()?); 1952 1953 // Add virtio-balloon if required 1954 devices.append(&mut self.make_virtio_balloon_devices()?); 1955 1956 // Add virtio-watchdog device 1957 devices.append(&mut self.make_virtio_watchdog_devices()?); 1958 1959 // Add vDPA devices if required 1960 devices.append(&mut self.make_vdpa_devices()?); 1961 1962 Ok(devices) 1963 } 1964 1965 // Cache whether io_uring is supported to avoid probing for very block device 1966 fn io_uring_is_supported(&mut self) -> bool { 1967 if let Some(supported) = self.io_uring_supported { 1968 return supported; 1969 } 1970 1971 let supported = block_io_uring_is_supported(); 1972 self.io_uring_supported = Some(supported); 1973 supported 1974 } 1975 1976 fn make_virtio_block_device( 1977 &mut self, 1978 disk_cfg: &mut DiskConfig, 1979 ) -> DeviceManagerResult<MetaVirtioDevice> { 1980 let id = if let Some(id) = &disk_cfg.id { 1981 id.clone() 1982 } else { 1983 let id = self.next_device_name(DISK_DEVICE_NAME_PREFIX)?; 1984 disk_cfg.id = Some(id.clone()); 1985 id 1986 }; 1987 1988 info!("Creating virtio-block device: {:?}", disk_cfg); 1989 1990 if disk_cfg.vhost_user { 1991 let socket = disk_cfg.vhost_socket.as_ref().unwrap().clone(); 1992 let vu_cfg = VhostUserConfig { 1993 socket, 1994 num_queues: disk_cfg.num_queues, 1995 queue_size: disk_cfg.queue_size, 1996 }; 1997 let vhost_user_block_device = Arc::new(Mutex::new( 1998 match virtio_devices::vhost_user::Blk::new( 1999 id.clone(), 2000 vu_cfg, 2001 self.restoring, 2002 self.seccomp_action.clone(), 2003 self.exit_evt 2004 .try_clone() 2005 .map_err(DeviceManagerError::EventFd)?, 2006 self.force_iommu, 2007 ) { 2008 Ok(vub_device) => vub_device, 2009 Err(e) => { 2010 return Err(DeviceManagerError::CreateVhostUserBlk(e)); 2011 } 2012 }, 2013 )); 2014 2015 // Fill the device tree with a new node. In case of restore, we 2016 // know there is nothing to do, so we can simply override the 2017 // existing entry. 2018 self.device_tree 2019 .lock() 2020 .unwrap() 2021 .insert(id.clone(), device_node!(id, vhost_user_block_device)); 2022 2023 Ok(MetaVirtioDevice { 2024 virtio_device: Arc::clone(&vhost_user_block_device) 2025 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2026 iommu: false, 2027 id, 2028 pci_segment: disk_cfg.pci_segment, 2029 dma_handler: None, 2030 }) 2031 } else { 2032 let mut options = OpenOptions::new(); 2033 options.read(true); 2034 options.write(!disk_cfg.readonly); 2035 if disk_cfg.direct { 2036 options.custom_flags(libc::O_DIRECT); 2037 } 2038 // Open block device path 2039 let mut file: File = options 2040 .open( 2041 disk_cfg 2042 .path 2043 .as_ref() 2044 .ok_or(DeviceManagerError::NoDiskPath)? 2045 .clone(), 2046 ) 2047 .map_err(DeviceManagerError::Disk)?; 2048 let image_type = 2049 detect_image_type(&mut file).map_err(DeviceManagerError::DetectImageType)?; 2050 2051 let image = match image_type { 2052 ImageType::FixedVhd => { 2053 // Use asynchronous backend relying on io_uring if the 2054 // syscalls are supported. 2055 if self.io_uring_is_supported() && !disk_cfg.disable_io_uring { 2056 info!("Using asynchronous fixed VHD disk file (io_uring)"); 2057 Box::new( 2058 FixedVhdDiskAsync::new(file) 2059 .map_err(DeviceManagerError::CreateFixedVhdDiskAsync)?, 2060 ) as Box<dyn DiskFile> 2061 } else { 2062 info!("Using synchronous fixed VHD disk file"); 2063 Box::new( 2064 FixedVhdDiskSync::new(file) 2065 .map_err(DeviceManagerError::CreateFixedVhdDiskSync)?, 2066 ) as Box<dyn DiskFile> 2067 } 2068 } 2069 ImageType::Raw => { 2070 // Use asynchronous backend relying on io_uring if the 2071 // syscalls are supported. 2072 if self.io_uring_is_supported() && !disk_cfg.disable_io_uring { 2073 info!("Using asynchronous RAW disk file (io_uring)"); 2074 Box::new(RawFileDisk::new(file)) as Box<dyn DiskFile> 2075 } else { 2076 info!("Using synchronous RAW disk file"); 2077 Box::new(RawFileDiskSync::new(file)) as Box<dyn DiskFile> 2078 } 2079 } 2080 ImageType::Qcow2 => { 2081 info!("Using synchronous QCOW disk file"); 2082 Box::new( 2083 QcowDiskSync::new(file, disk_cfg.direct) 2084 .map_err(DeviceManagerError::CreateQcowDiskSync)?, 2085 ) as Box<dyn DiskFile> 2086 } 2087 ImageType::Vhdx => { 2088 info!("Using synchronous VHDX disk file"); 2089 Box::new( 2090 VhdxDiskSync::new(file) 2091 .map_err(DeviceManagerError::CreateFixedVhdxDiskSync)?, 2092 ) as Box<dyn DiskFile> 2093 } 2094 }; 2095 2096 let dev = Arc::new(Mutex::new( 2097 virtio_devices::Block::new( 2098 id.clone(), 2099 image, 2100 disk_cfg 2101 .path 2102 .as_ref() 2103 .ok_or(DeviceManagerError::NoDiskPath)? 2104 .clone(), 2105 disk_cfg.readonly, 2106 self.force_iommu | disk_cfg.iommu, 2107 disk_cfg.num_queues, 2108 disk_cfg.queue_size, 2109 self.seccomp_action.clone(), 2110 disk_cfg.rate_limiter_config, 2111 self.exit_evt 2112 .try_clone() 2113 .map_err(DeviceManagerError::EventFd)?, 2114 ) 2115 .map_err(DeviceManagerError::CreateVirtioBlock)?, 2116 )); 2117 2118 let virtio_device = Arc::clone(&dev) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>; 2119 let migratable_device = dev as Arc<Mutex<dyn Migratable>>; 2120 2121 // Fill the device tree with a new node. In case of restore, we 2122 // know there is nothing to do, so we can simply override the 2123 // existing entry. 2124 self.device_tree 2125 .lock() 2126 .unwrap() 2127 .insert(id.clone(), device_node!(id, migratable_device)); 2128 2129 Ok(MetaVirtioDevice { 2130 virtio_device, 2131 iommu: disk_cfg.iommu, 2132 id, 2133 pci_segment: disk_cfg.pci_segment, 2134 dma_handler: None, 2135 }) 2136 } 2137 } 2138 2139 fn make_virtio_block_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2140 let mut devices = Vec::new(); 2141 2142 let mut block_devices = self.config.lock().unwrap().disks.clone(); 2143 if let Some(disk_list_cfg) = &mut block_devices { 2144 for disk_cfg in disk_list_cfg.iter_mut() { 2145 devices.push(self.make_virtio_block_device(disk_cfg)?); 2146 } 2147 } 2148 self.config.lock().unwrap().disks = block_devices; 2149 2150 Ok(devices) 2151 } 2152 2153 fn make_virtio_net_device( 2154 &mut self, 2155 net_cfg: &mut NetConfig, 2156 ) -> DeviceManagerResult<MetaVirtioDevice> { 2157 let id = if let Some(id) = &net_cfg.id { 2158 id.clone() 2159 } else { 2160 let id = self.next_device_name(NET_DEVICE_NAME_PREFIX)?; 2161 net_cfg.id = Some(id.clone()); 2162 id 2163 }; 2164 info!("Creating virtio-net device: {:?}", net_cfg); 2165 2166 if net_cfg.vhost_user { 2167 let socket = net_cfg.vhost_socket.as_ref().unwrap().clone(); 2168 let vu_cfg = VhostUserConfig { 2169 socket, 2170 num_queues: net_cfg.num_queues, 2171 queue_size: net_cfg.queue_size, 2172 }; 2173 let server = match net_cfg.vhost_mode { 2174 VhostMode::Client => false, 2175 VhostMode::Server => true, 2176 }; 2177 let vhost_user_net_device = Arc::new(Mutex::new( 2178 match virtio_devices::vhost_user::Net::new( 2179 id.clone(), 2180 net_cfg.mac, 2181 vu_cfg, 2182 server, 2183 self.seccomp_action.clone(), 2184 self.restoring, 2185 self.exit_evt 2186 .try_clone() 2187 .map_err(DeviceManagerError::EventFd)?, 2188 self.force_iommu, 2189 ) { 2190 Ok(vun_device) => vun_device, 2191 Err(e) => { 2192 return Err(DeviceManagerError::CreateVhostUserNet(e)); 2193 } 2194 }, 2195 )); 2196 2197 // Fill the device tree with a new node. In case of restore, we 2198 // know there is nothing to do, so we can simply override the 2199 // existing entry. 2200 self.device_tree 2201 .lock() 2202 .unwrap() 2203 .insert(id.clone(), device_node!(id, vhost_user_net_device)); 2204 2205 Ok(MetaVirtioDevice { 2206 virtio_device: Arc::clone(&vhost_user_net_device) 2207 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2208 iommu: net_cfg.iommu, 2209 id, 2210 pci_segment: net_cfg.pci_segment, 2211 dma_handler: None, 2212 }) 2213 } else { 2214 let virtio_net_device = if let Some(ref tap_if_name) = net_cfg.tap { 2215 Arc::new(Mutex::new( 2216 virtio_devices::Net::new( 2217 id.clone(), 2218 Some(tap_if_name), 2219 None, 2220 None, 2221 Some(net_cfg.mac), 2222 &mut net_cfg.host_mac, 2223 self.force_iommu | net_cfg.iommu, 2224 net_cfg.num_queues, 2225 net_cfg.queue_size, 2226 self.seccomp_action.clone(), 2227 net_cfg.rate_limiter_config, 2228 self.exit_evt 2229 .try_clone() 2230 .map_err(DeviceManagerError::EventFd)?, 2231 ) 2232 .map_err(DeviceManagerError::CreateVirtioNet)?, 2233 )) 2234 } else if let Some(fds) = &net_cfg.fds { 2235 Arc::new(Mutex::new( 2236 virtio_devices::Net::from_tap_fds( 2237 id.clone(), 2238 fds, 2239 Some(net_cfg.mac), 2240 self.force_iommu | net_cfg.iommu, 2241 net_cfg.queue_size, 2242 self.seccomp_action.clone(), 2243 net_cfg.rate_limiter_config, 2244 self.exit_evt 2245 .try_clone() 2246 .map_err(DeviceManagerError::EventFd)?, 2247 ) 2248 .map_err(DeviceManagerError::CreateVirtioNet)?, 2249 )) 2250 } else { 2251 Arc::new(Mutex::new( 2252 virtio_devices::Net::new( 2253 id.clone(), 2254 None, 2255 Some(net_cfg.ip), 2256 Some(net_cfg.mask), 2257 Some(net_cfg.mac), 2258 &mut net_cfg.host_mac, 2259 self.force_iommu | net_cfg.iommu, 2260 net_cfg.num_queues, 2261 net_cfg.queue_size, 2262 self.seccomp_action.clone(), 2263 net_cfg.rate_limiter_config, 2264 self.exit_evt 2265 .try_clone() 2266 .map_err(DeviceManagerError::EventFd)?, 2267 ) 2268 .map_err(DeviceManagerError::CreateVirtioNet)?, 2269 )) 2270 }; 2271 2272 // Fill the device tree with a new node. In case of restore, we 2273 // know there is nothing to do, so we can simply override the 2274 // existing entry. 2275 self.device_tree 2276 .lock() 2277 .unwrap() 2278 .insert(id.clone(), device_node!(id, virtio_net_device)); 2279 2280 Ok(MetaVirtioDevice { 2281 virtio_device: Arc::clone(&virtio_net_device) 2282 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2283 iommu: net_cfg.iommu, 2284 id, 2285 pci_segment: net_cfg.pci_segment, 2286 dma_handler: None, 2287 }) 2288 } 2289 } 2290 2291 /// Add virto-net and vhost-user-net devices 2292 fn make_virtio_net_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2293 let mut devices = Vec::new(); 2294 let mut net_devices = self.config.lock().unwrap().net.clone(); 2295 if let Some(net_list_cfg) = &mut net_devices { 2296 for net_cfg in net_list_cfg.iter_mut() { 2297 devices.push(self.make_virtio_net_device(net_cfg)?); 2298 } 2299 } 2300 self.config.lock().unwrap().net = net_devices; 2301 2302 Ok(devices) 2303 } 2304 2305 fn make_virtio_rng_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2306 let mut devices = Vec::new(); 2307 2308 // Add virtio-rng if required 2309 let rng_config = self.config.lock().unwrap().rng.clone(); 2310 if let Some(rng_path) = rng_config.src.to_str() { 2311 info!("Creating virtio-rng device: {:?}", rng_config); 2312 let id = String::from(RNG_DEVICE_NAME); 2313 2314 let virtio_rng_device = Arc::new(Mutex::new( 2315 virtio_devices::Rng::new( 2316 id.clone(), 2317 rng_path, 2318 self.force_iommu | rng_config.iommu, 2319 self.seccomp_action.clone(), 2320 self.exit_evt 2321 .try_clone() 2322 .map_err(DeviceManagerError::EventFd)?, 2323 ) 2324 .map_err(DeviceManagerError::CreateVirtioRng)?, 2325 )); 2326 devices.push(MetaVirtioDevice { 2327 virtio_device: Arc::clone(&virtio_rng_device) 2328 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2329 iommu: rng_config.iommu, 2330 id: id.clone(), 2331 pci_segment: 0, 2332 dma_handler: None, 2333 }); 2334 2335 // Fill the device tree with a new node. In case of restore, we 2336 // know there is nothing to do, so we can simply override the 2337 // existing entry. 2338 self.device_tree 2339 .lock() 2340 .unwrap() 2341 .insert(id.clone(), device_node!(id, virtio_rng_device)); 2342 } 2343 2344 Ok(devices) 2345 } 2346 2347 fn make_virtio_fs_device( 2348 &mut self, 2349 fs_cfg: &mut FsConfig, 2350 ) -> DeviceManagerResult<MetaVirtioDevice> { 2351 let id = if let Some(id) = &fs_cfg.id { 2352 id.clone() 2353 } else { 2354 let id = self.next_device_name(FS_DEVICE_NAME_PREFIX)?; 2355 fs_cfg.id = Some(id.clone()); 2356 id 2357 }; 2358 2359 info!("Creating virtio-fs device: {:?}", fs_cfg); 2360 2361 let mut node = device_node!(id); 2362 2363 // Look for the id in the device tree. If it can be found, that means 2364 // the device is being restored, otherwise it's created from scratch. 2365 let cache_range = if let Some(node) = self.device_tree.lock().unwrap().get(&id) { 2366 info!("Restoring virtio-fs {} resources", id); 2367 2368 let mut cache_range: Option<(u64, u64)> = None; 2369 for resource in node.resources.iter() { 2370 match resource { 2371 Resource::MmioAddressRange { base, size } => { 2372 if cache_range.is_some() { 2373 return Err(DeviceManagerError::ResourceAlreadyExists); 2374 } 2375 2376 cache_range = Some((*base, *size)); 2377 } 2378 _ => { 2379 error!("Unexpected resource {:?} for {}", resource, id); 2380 } 2381 } 2382 } 2383 2384 cache_range 2385 } else { 2386 None 2387 }; 2388 2389 // DAX is not supported, we override the config by disabling the option. 2390 fs_cfg.dax = false; 2391 2392 if let Some(fs_socket) = fs_cfg.socket.to_str() { 2393 let cache = if fs_cfg.dax { 2394 let (cache_base, cache_size) = if let Some((base, size)) = cache_range { 2395 // The memory needs to be 2MiB aligned in order to support 2396 // hugepages. 2397 self.pci_segments[fs_cfg.pci_segment as usize] 2398 .allocator 2399 .lock() 2400 .unwrap() 2401 .allocate( 2402 Some(GuestAddress(base)), 2403 size as GuestUsize, 2404 Some(0x0020_0000), 2405 ) 2406 .ok_or(DeviceManagerError::FsRangeAllocation)?; 2407 2408 (base, size) 2409 } else { 2410 let size = fs_cfg.cache_size; 2411 // The memory needs to be 2MiB aligned in order to support 2412 // hugepages. 2413 let base = self.pci_segments[fs_cfg.pci_segment as usize] 2414 .allocator 2415 .lock() 2416 .unwrap() 2417 .allocate(None, size as GuestUsize, Some(0x0020_0000)) 2418 .ok_or(DeviceManagerError::FsRangeAllocation)?; 2419 2420 (base.raw_value(), size) 2421 }; 2422 2423 // Update the node with correct resource information. 2424 node.resources.push(Resource::MmioAddressRange { 2425 base: cache_base, 2426 size: cache_size, 2427 }); 2428 2429 let mmap_region = MmapRegion::build( 2430 None, 2431 cache_size as usize, 2432 libc::PROT_NONE, 2433 libc::MAP_ANONYMOUS | libc::MAP_PRIVATE, 2434 ) 2435 .map_err(DeviceManagerError::NewMmapRegion)?; 2436 let host_addr: u64 = mmap_region.as_ptr() as u64; 2437 2438 let mem_slot = self 2439 .memory_manager 2440 .lock() 2441 .unwrap() 2442 .create_userspace_mapping( 2443 cache_base, cache_size, host_addr, false, false, false, 2444 ) 2445 .map_err(DeviceManagerError::MemoryManager)?; 2446 2447 let region_list = vec![VirtioSharedMemory { 2448 offset: 0, 2449 len: cache_size, 2450 }]; 2451 2452 Some(( 2453 VirtioSharedMemoryList { 2454 host_addr, 2455 mem_slot, 2456 addr: GuestAddress(cache_base), 2457 len: cache_size as GuestUsize, 2458 region_list, 2459 }, 2460 mmap_region, 2461 )) 2462 } else { 2463 None 2464 }; 2465 2466 let virtio_fs_device = Arc::new(Mutex::new( 2467 virtio_devices::vhost_user::Fs::new( 2468 id.clone(), 2469 fs_socket, 2470 &fs_cfg.tag, 2471 fs_cfg.num_queues, 2472 fs_cfg.queue_size, 2473 cache, 2474 self.seccomp_action.clone(), 2475 self.restoring, 2476 self.exit_evt 2477 .try_clone() 2478 .map_err(DeviceManagerError::EventFd)?, 2479 self.force_iommu, 2480 ) 2481 .map_err(DeviceManagerError::CreateVirtioFs)?, 2482 )); 2483 2484 // Update the device tree with the migratable device. 2485 node.migratable = Some(Arc::clone(&virtio_fs_device) as Arc<Mutex<dyn Migratable>>); 2486 self.device_tree.lock().unwrap().insert(id.clone(), node); 2487 2488 Ok(MetaVirtioDevice { 2489 virtio_device: Arc::clone(&virtio_fs_device) 2490 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2491 iommu: false, 2492 id, 2493 pci_segment: fs_cfg.pci_segment, 2494 dma_handler: None, 2495 }) 2496 } else { 2497 Err(DeviceManagerError::NoVirtioFsSock) 2498 } 2499 } 2500 2501 fn make_virtio_fs_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2502 let mut devices = Vec::new(); 2503 2504 let mut fs_devices = self.config.lock().unwrap().fs.clone(); 2505 if let Some(fs_list_cfg) = &mut fs_devices { 2506 for fs_cfg in fs_list_cfg.iter_mut() { 2507 devices.push(self.make_virtio_fs_device(fs_cfg)?); 2508 } 2509 } 2510 self.config.lock().unwrap().fs = fs_devices; 2511 2512 Ok(devices) 2513 } 2514 2515 fn make_virtio_pmem_device( 2516 &mut self, 2517 pmem_cfg: &mut PmemConfig, 2518 ) -> DeviceManagerResult<MetaVirtioDevice> { 2519 let id = if let Some(id) = &pmem_cfg.id { 2520 id.clone() 2521 } else { 2522 let id = self.next_device_name(PMEM_DEVICE_NAME_PREFIX)?; 2523 pmem_cfg.id = Some(id.clone()); 2524 id 2525 }; 2526 2527 info!("Creating virtio-pmem device: {:?}", pmem_cfg); 2528 2529 let mut node = device_node!(id); 2530 2531 // Look for the id in the device tree. If it can be found, that means 2532 // the device is being restored, otherwise it's created from scratch. 2533 let region_range = if let Some(node) = self.device_tree.lock().unwrap().get(&id) { 2534 info!("Restoring virtio-pmem {} resources", id); 2535 2536 let mut region_range: Option<(u64, u64)> = None; 2537 for resource in node.resources.iter() { 2538 match resource { 2539 Resource::MmioAddressRange { base, size } => { 2540 if region_range.is_some() { 2541 return Err(DeviceManagerError::ResourceAlreadyExists); 2542 } 2543 2544 region_range = Some((*base, *size)); 2545 } 2546 _ => { 2547 error!("Unexpected resource {:?} for {}", resource, id); 2548 } 2549 } 2550 } 2551 2552 if region_range.is_none() { 2553 return Err(DeviceManagerError::MissingVirtioPmemResources); 2554 } 2555 2556 region_range 2557 } else { 2558 None 2559 }; 2560 2561 let (custom_flags, set_len) = if pmem_cfg.file.is_dir() { 2562 if pmem_cfg.size.is_none() { 2563 return Err(DeviceManagerError::PmemWithDirectorySizeMissing); 2564 } 2565 (O_TMPFILE, true) 2566 } else { 2567 (0, false) 2568 }; 2569 2570 let mut file = OpenOptions::new() 2571 .read(true) 2572 .write(!pmem_cfg.discard_writes) 2573 .custom_flags(custom_flags) 2574 .open(&pmem_cfg.file) 2575 .map_err(DeviceManagerError::PmemFileOpen)?; 2576 2577 let size = if let Some(size) = pmem_cfg.size { 2578 if set_len { 2579 file.set_len(size) 2580 .map_err(DeviceManagerError::PmemFileSetLen)?; 2581 } 2582 size 2583 } else { 2584 file.seek(SeekFrom::End(0)) 2585 .map_err(DeviceManagerError::PmemFileSetLen)? 2586 }; 2587 2588 if size % 0x20_0000 != 0 { 2589 return Err(DeviceManagerError::PmemSizeNotAligned); 2590 } 2591 2592 let (region_base, region_size) = if let Some((base, size)) = region_range { 2593 // The memory needs to be 2MiB aligned in order to support 2594 // hugepages. 2595 self.pci_segments[pmem_cfg.pci_segment as usize] 2596 .allocator 2597 .lock() 2598 .unwrap() 2599 .allocate( 2600 Some(GuestAddress(base)), 2601 size as GuestUsize, 2602 Some(0x0020_0000), 2603 ) 2604 .ok_or(DeviceManagerError::PmemRangeAllocation)?; 2605 2606 (base, size) 2607 } else { 2608 // The memory needs to be 2MiB aligned in order to support 2609 // hugepages. 2610 let base = self.pci_segments[pmem_cfg.pci_segment as usize] 2611 .allocator 2612 .lock() 2613 .unwrap() 2614 .allocate(None, size as GuestUsize, Some(0x0020_0000)) 2615 .ok_or(DeviceManagerError::PmemRangeAllocation)?; 2616 2617 (base.raw_value(), size) 2618 }; 2619 2620 let cloned_file = file.try_clone().map_err(DeviceManagerError::CloneFile)?; 2621 let mmap_region = MmapRegion::build( 2622 Some(FileOffset::new(cloned_file, 0)), 2623 region_size as usize, 2624 PROT_READ | PROT_WRITE, 2625 MAP_NORESERVE 2626 | if pmem_cfg.discard_writes { 2627 MAP_PRIVATE 2628 } else { 2629 MAP_SHARED 2630 }, 2631 ) 2632 .map_err(DeviceManagerError::NewMmapRegion)?; 2633 let host_addr: u64 = mmap_region.as_ptr() as u64; 2634 2635 let mem_slot = self 2636 .memory_manager 2637 .lock() 2638 .unwrap() 2639 .create_userspace_mapping( 2640 region_base, 2641 region_size, 2642 host_addr, 2643 pmem_cfg.mergeable, 2644 false, 2645 false, 2646 ) 2647 .map_err(DeviceManagerError::MemoryManager)?; 2648 2649 let mapping = virtio_devices::UserspaceMapping { 2650 host_addr, 2651 mem_slot, 2652 addr: GuestAddress(region_base), 2653 len: region_size, 2654 mergeable: pmem_cfg.mergeable, 2655 }; 2656 2657 let virtio_pmem_device = Arc::new(Mutex::new( 2658 virtio_devices::Pmem::new( 2659 id.clone(), 2660 file, 2661 GuestAddress(region_base), 2662 mapping, 2663 mmap_region, 2664 self.force_iommu | pmem_cfg.iommu, 2665 self.seccomp_action.clone(), 2666 self.exit_evt 2667 .try_clone() 2668 .map_err(DeviceManagerError::EventFd)?, 2669 ) 2670 .map_err(DeviceManagerError::CreateVirtioPmem)?, 2671 )); 2672 2673 // Update the device tree with correct resource information and with 2674 // the migratable device. 2675 node.resources.push(Resource::MmioAddressRange { 2676 base: region_base, 2677 size: region_size, 2678 }); 2679 node.migratable = Some(Arc::clone(&virtio_pmem_device) as Arc<Mutex<dyn Migratable>>); 2680 self.device_tree.lock().unwrap().insert(id.clone(), node); 2681 2682 Ok(MetaVirtioDevice { 2683 virtio_device: Arc::clone(&virtio_pmem_device) 2684 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2685 iommu: pmem_cfg.iommu, 2686 id, 2687 pci_segment: pmem_cfg.pci_segment, 2688 dma_handler: None, 2689 }) 2690 } 2691 2692 fn make_virtio_pmem_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2693 let mut devices = Vec::new(); 2694 // Add virtio-pmem if required 2695 let mut pmem_devices = self.config.lock().unwrap().pmem.clone(); 2696 if let Some(pmem_list_cfg) = &mut pmem_devices { 2697 for pmem_cfg in pmem_list_cfg.iter_mut() { 2698 devices.push(self.make_virtio_pmem_device(pmem_cfg)?); 2699 } 2700 } 2701 self.config.lock().unwrap().pmem = pmem_devices; 2702 2703 Ok(devices) 2704 } 2705 2706 fn make_virtio_vsock_device( 2707 &mut self, 2708 vsock_cfg: &mut VsockConfig, 2709 ) -> DeviceManagerResult<MetaVirtioDevice> { 2710 let id = if let Some(id) = &vsock_cfg.id { 2711 id.clone() 2712 } else { 2713 let id = self.next_device_name(VSOCK_DEVICE_NAME_PREFIX)?; 2714 vsock_cfg.id = Some(id.clone()); 2715 id 2716 }; 2717 2718 info!("Creating virtio-vsock device: {:?}", vsock_cfg); 2719 2720 let socket_path = vsock_cfg 2721 .socket 2722 .to_str() 2723 .ok_or(DeviceManagerError::CreateVsockConvertPath)?; 2724 let backend = 2725 virtio_devices::vsock::VsockUnixBackend::new(vsock_cfg.cid, socket_path.to_string()) 2726 .map_err(DeviceManagerError::CreateVsockBackend)?; 2727 2728 let vsock_device = Arc::new(Mutex::new( 2729 virtio_devices::Vsock::new( 2730 id.clone(), 2731 vsock_cfg.cid, 2732 vsock_cfg.socket.clone(), 2733 backend, 2734 self.force_iommu | vsock_cfg.iommu, 2735 self.seccomp_action.clone(), 2736 self.exit_evt 2737 .try_clone() 2738 .map_err(DeviceManagerError::EventFd)?, 2739 ) 2740 .map_err(DeviceManagerError::CreateVirtioVsock)?, 2741 )); 2742 2743 // Fill the device tree with a new node. In case of restore, we 2744 // know there is nothing to do, so we can simply override the 2745 // existing entry. 2746 self.device_tree 2747 .lock() 2748 .unwrap() 2749 .insert(id.clone(), device_node!(id, vsock_device)); 2750 2751 Ok(MetaVirtioDevice { 2752 virtio_device: Arc::clone(&vsock_device) 2753 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2754 iommu: vsock_cfg.iommu, 2755 id, 2756 pci_segment: vsock_cfg.pci_segment, 2757 dma_handler: None, 2758 }) 2759 } 2760 2761 fn make_virtio_vsock_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2762 let mut devices = Vec::new(); 2763 2764 let mut vsock = self.config.lock().unwrap().vsock.clone(); 2765 if let Some(ref mut vsock_cfg) = &mut vsock { 2766 devices.push(self.make_virtio_vsock_device(vsock_cfg)?); 2767 } 2768 self.config.lock().unwrap().vsock = vsock; 2769 2770 Ok(devices) 2771 } 2772 2773 fn make_virtio_mem_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2774 let mut devices = Vec::new(); 2775 2776 let mm = self.memory_manager.clone(); 2777 let mm = mm.lock().unwrap(); 2778 for (memory_zone_id, memory_zone) in mm.memory_zones().iter() { 2779 if let Some(virtio_mem_zone) = memory_zone.virtio_mem_zone() { 2780 info!("Creating virtio-mem device: id = {}", memory_zone_id); 2781 2782 let node_id = numa_node_id_from_memory_zone_id(&self.numa_nodes, memory_zone_id) 2783 .map(|i| i as u16); 2784 2785 let virtio_mem_device = Arc::new(Mutex::new( 2786 virtio_devices::Mem::new( 2787 memory_zone_id.clone(), 2788 virtio_mem_zone.region(), 2789 virtio_mem_zone 2790 .resize_handler() 2791 .new_resize_sender() 2792 .map_err(DeviceManagerError::CreateResizeSender)?, 2793 self.seccomp_action.clone(), 2794 node_id, 2795 virtio_mem_zone.hotplugged_size(), 2796 virtio_mem_zone.hugepages(), 2797 self.exit_evt 2798 .try_clone() 2799 .map_err(DeviceManagerError::EventFd)?, 2800 virtio_mem_zone.blocks_state().clone(), 2801 ) 2802 .map_err(DeviceManagerError::CreateVirtioMem)?, 2803 )); 2804 2805 self.virtio_mem_devices.push(Arc::clone(&virtio_mem_device)); 2806 2807 devices.push(MetaVirtioDevice { 2808 virtio_device: Arc::clone(&virtio_mem_device) 2809 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2810 iommu: false, 2811 id: memory_zone_id.clone(), 2812 pci_segment: 0, 2813 dma_handler: None, 2814 }); 2815 2816 // Fill the device tree with a new node. In case of restore, we 2817 // know there is nothing to do, so we can simply override the 2818 // existing entry. 2819 self.device_tree.lock().unwrap().insert( 2820 memory_zone_id.clone(), 2821 device_node!(memory_zone_id, virtio_mem_device), 2822 ); 2823 } 2824 } 2825 2826 Ok(devices) 2827 } 2828 2829 fn make_virtio_balloon_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2830 let mut devices = Vec::new(); 2831 2832 if let Some(balloon_config) = &self.config.lock().unwrap().balloon { 2833 let id = String::from(BALLOON_DEVICE_NAME); 2834 info!("Creating virtio-balloon device: id = {}", id); 2835 2836 let virtio_balloon_device = Arc::new(Mutex::new( 2837 virtio_devices::Balloon::new( 2838 id.clone(), 2839 balloon_config.size, 2840 balloon_config.deflate_on_oom, 2841 balloon_config.free_page_reporting, 2842 self.seccomp_action.clone(), 2843 self.exit_evt 2844 .try_clone() 2845 .map_err(DeviceManagerError::EventFd)?, 2846 ) 2847 .map_err(DeviceManagerError::CreateVirtioBalloon)?, 2848 )); 2849 2850 self.balloon = Some(virtio_balloon_device.clone()); 2851 2852 devices.push(MetaVirtioDevice { 2853 virtio_device: Arc::clone(&virtio_balloon_device) 2854 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2855 iommu: false, 2856 id: id.clone(), 2857 pci_segment: 0, 2858 dma_handler: None, 2859 }); 2860 2861 self.device_tree 2862 .lock() 2863 .unwrap() 2864 .insert(id.clone(), device_node!(id, virtio_balloon_device)); 2865 } 2866 2867 Ok(devices) 2868 } 2869 2870 fn make_virtio_watchdog_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2871 let mut devices = Vec::new(); 2872 2873 if !self.config.lock().unwrap().watchdog { 2874 return Ok(devices); 2875 } 2876 2877 let id = String::from(WATCHDOG_DEVICE_NAME); 2878 info!("Creating virtio-watchdog device: id = {}", id); 2879 2880 let virtio_watchdog_device = Arc::new(Mutex::new( 2881 virtio_devices::Watchdog::new( 2882 id.clone(), 2883 self.reset_evt.try_clone().unwrap(), 2884 self.seccomp_action.clone(), 2885 self.exit_evt 2886 .try_clone() 2887 .map_err(DeviceManagerError::EventFd)?, 2888 ) 2889 .map_err(DeviceManagerError::CreateVirtioWatchdog)?, 2890 )); 2891 devices.push(MetaVirtioDevice { 2892 virtio_device: Arc::clone(&virtio_watchdog_device) 2893 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2894 iommu: false, 2895 id: id.clone(), 2896 pci_segment: 0, 2897 dma_handler: None, 2898 }); 2899 2900 self.device_tree 2901 .lock() 2902 .unwrap() 2903 .insert(id.clone(), device_node!(id, virtio_watchdog_device)); 2904 2905 Ok(devices) 2906 } 2907 2908 fn make_vdpa_device( 2909 &mut self, 2910 vdpa_cfg: &mut VdpaConfig, 2911 ) -> DeviceManagerResult<MetaVirtioDevice> { 2912 let id = if let Some(id) = &vdpa_cfg.id { 2913 id.clone() 2914 } else { 2915 let id = self.next_device_name(VDPA_DEVICE_NAME_PREFIX)?; 2916 vdpa_cfg.id = Some(id.clone()); 2917 id 2918 }; 2919 2920 info!("Creating vDPA device: {:?}", vdpa_cfg); 2921 2922 let device_path = vdpa_cfg 2923 .path 2924 .to_str() 2925 .ok_or(DeviceManagerError::CreateVdpaConvertPath)?; 2926 2927 let vdpa_device = Arc::new(Mutex::new( 2928 virtio_devices::Vdpa::new( 2929 id.clone(), 2930 device_path, 2931 self.memory_manager.lock().unwrap().guest_memory(), 2932 vdpa_cfg.num_queues as u16, 2933 ) 2934 .map_err(DeviceManagerError::CreateVdpa)?, 2935 )); 2936 2937 // Create the DMA handler that is required by the vDPA device 2938 let vdpa_mapping = Arc::new(VdpaDmaMapping::new( 2939 Arc::clone(&vdpa_device), 2940 Arc::new(self.memory_manager.lock().unwrap().guest_memory()), 2941 )); 2942 2943 self.device_tree 2944 .lock() 2945 .unwrap() 2946 .insert(id.clone(), device_node!(id)); 2947 2948 Ok(MetaVirtioDevice { 2949 virtio_device: vdpa_device as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2950 iommu: vdpa_cfg.iommu, 2951 id, 2952 pci_segment: vdpa_cfg.pci_segment, 2953 dma_handler: Some(vdpa_mapping), 2954 }) 2955 } 2956 2957 fn make_vdpa_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2958 let mut devices = Vec::new(); 2959 // Add vdpa if required 2960 let mut vdpa_devices = self.config.lock().unwrap().vdpa.clone(); 2961 if let Some(vdpa_list_cfg) = &mut vdpa_devices { 2962 for vdpa_cfg in vdpa_list_cfg.iter_mut() { 2963 devices.push(self.make_vdpa_device(vdpa_cfg)?); 2964 } 2965 } 2966 self.config.lock().unwrap().vdpa = vdpa_devices; 2967 2968 Ok(devices) 2969 } 2970 2971 fn next_device_name(&mut self, prefix: &str) -> DeviceManagerResult<String> { 2972 let start_id = self.device_id_cnt; 2973 loop { 2974 // Generate the temporary name. 2975 let name = format!("{}{}", prefix, self.device_id_cnt); 2976 // Increment the counter. 2977 self.device_id_cnt += Wrapping(1); 2978 // Check if the name is already in use. 2979 if !self.device_tree.lock().unwrap().contains_key(&name) { 2980 return Ok(name); 2981 } 2982 2983 if self.device_id_cnt == start_id { 2984 // We went through a full loop and there's nothing else we can 2985 // do. 2986 break; 2987 } 2988 } 2989 Err(DeviceManagerError::NoAvailableDeviceName) 2990 } 2991 2992 fn add_passthrough_device( 2993 &mut self, 2994 device_cfg: &mut DeviceConfig, 2995 ) -> DeviceManagerResult<(PciBdf, String)> { 2996 // If the passthrough device has not been created yet, it is created 2997 // here and stored in the DeviceManager structure for future needs. 2998 if self.passthrough_device.is_none() { 2999 self.passthrough_device = Some( 3000 self.address_manager 3001 .vm 3002 .create_passthrough_device() 3003 .map_err(|e| DeviceManagerError::CreatePassthroughDevice(e.into()))?, 3004 ); 3005 } 3006 3007 self.add_vfio_device(device_cfg) 3008 } 3009 3010 fn create_vfio_container(&self) -> DeviceManagerResult<Arc<VfioContainer>> { 3011 let passthrough_device = self 3012 .passthrough_device 3013 .as_ref() 3014 .ok_or(DeviceManagerError::NoDevicePassthroughSupport)?; 3015 3016 // Safe because we know the RawFd is valid. 3017 // 3018 // This dup() is mandatory to be able to give full ownership of the 3019 // file descriptor to the DeviceFd::from_raw_fd() function later in 3020 // the code. 3021 // 3022 // This is particularly needed so that VfioContainer will still have 3023 // a valid file descriptor even if DeviceManager, and therefore the 3024 // passthrough_device are dropped. In case of Drop, the file descriptor 3025 // would be closed, but Linux would still have the duplicated file 3026 // descriptor opened from DeviceFd, preventing from unexpected behavior 3027 // where the VfioContainer would try to use a closed file descriptor. 3028 let dup_device_fd = unsafe { libc::dup(passthrough_device.as_raw_fd()) }; 3029 if dup_device_fd == -1 { 3030 return vmm_sys_util::errno::errno_result().map_err(DeviceManagerError::DupFd); 3031 } 3032 3033 // SAFETY the raw fd conversion here is safe because: 3034 // 1. When running on KVM or MSHV, passthrough_device wraps around DeviceFd. 3035 // 2. The conversion here extracts the raw fd and then turns the raw fd into a DeviceFd 3036 // of the same (correct) type. 3037 Ok(Arc::new( 3038 VfioContainer::new(Arc::new(unsafe { DeviceFd::from_raw_fd(dup_device_fd) })) 3039 .map_err(DeviceManagerError::VfioCreate)?, 3040 )) 3041 } 3042 3043 fn add_vfio_device( 3044 &mut self, 3045 device_cfg: &mut DeviceConfig, 3046 ) -> DeviceManagerResult<(PciBdf, String)> { 3047 let pci_segment_id = device_cfg.pci_segment; 3048 let pci_device_bdf = self.pci_segments[pci_segment_id as usize].next_device_bdf()?; 3049 3050 let mut needs_dma_mapping = false; 3051 3052 // Here we create a new VFIO container for two reasons. Either this is 3053 // the first VFIO device, meaning we need a new VFIO container, which 3054 // will be shared with other VFIO devices. Or the new VFIO device is 3055 // attached to a vIOMMU, meaning we must create a dedicated VFIO 3056 // container. In the vIOMMU use case, we can't let all devices under 3057 // the same VFIO container since we couldn't map/unmap memory for each 3058 // device. That's simply because the map/unmap operations happen at the 3059 // VFIO container level. 3060 let vfio_container = if device_cfg.iommu { 3061 let vfio_container = self.create_vfio_container()?; 3062 3063 let vfio_mapping = Arc::new(VfioDmaMapping::new( 3064 Arc::clone(&vfio_container), 3065 Arc::new(self.memory_manager.lock().unwrap().guest_memory()), 3066 )); 3067 3068 if let Some(iommu) = &self.iommu_device { 3069 iommu 3070 .lock() 3071 .unwrap() 3072 .add_external_mapping(pci_device_bdf.into(), vfio_mapping); 3073 } else { 3074 return Err(DeviceManagerError::MissingVirtualIommu); 3075 } 3076 3077 vfio_container 3078 } else if let Some(vfio_container) = &self.vfio_container { 3079 Arc::clone(vfio_container) 3080 } else { 3081 let vfio_container = self.create_vfio_container()?; 3082 needs_dma_mapping = true; 3083 self.vfio_container = Some(Arc::clone(&vfio_container)); 3084 3085 vfio_container 3086 }; 3087 3088 let vfio_device = VfioDevice::new(&device_cfg.path, Arc::clone(&vfio_container)) 3089 .map_err(DeviceManagerError::VfioCreate)?; 3090 3091 if needs_dma_mapping { 3092 // Register DMA mapping in IOMMU. 3093 // Do not register virtio-mem regions, as they are handled directly by 3094 // virtio-mem device itself. 3095 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() { 3096 for region in zone.regions() { 3097 vfio_container 3098 .vfio_dma_map( 3099 region.start_addr().raw_value(), 3100 region.len() as u64, 3101 region.as_ptr() as u64, 3102 ) 3103 .map_err(DeviceManagerError::VfioDmaMap)?; 3104 } 3105 } 3106 3107 let vfio_mapping = Arc::new(VfioDmaMapping::new( 3108 Arc::clone(&vfio_container), 3109 Arc::new(self.memory_manager.lock().unwrap().guest_memory()), 3110 )); 3111 3112 for virtio_mem_device in self.virtio_mem_devices.iter() { 3113 virtio_mem_device 3114 .lock() 3115 .unwrap() 3116 .add_dma_mapping_handler( 3117 VirtioMemMappingSource::Container, 3118 vfio_mapping.clone(), 3119 ) 3120 .map_err(DeviceManagerError::AddDmaMappingHandlerVirtioMem)?; 3121 } 3122 } 3123 3124 let legacy_interrupt_group = 3125 if let Some(legacy_interrupt_manager) = &self.legacy_interrupt_manager { 3126 Some( 3127 legacy_interrupt_manager 3128 .create_group(LegacyIrqGroupConfig { 3129 irq: self.pci_segments[pci_segment_id as usize].pci_irq_slots 3130 [pci_device_bdf.device() as usize] 3131 as InterruptIndex, 3132 }) 3133 .map_err(DeviceManagerError::CreateInterruptGroup)?, 3134 ) 3135 } else { 3136 None 3137 }; 3138 3139 let vfio_pci_device = VfioPciDevice::new( 3140 &self.address_manager.vm, 3141 vfio_device, 3142 vfio_container, 3143 &self.msi_interrupt_manager, 3144 legacy_interrupt_group, 3145 device_cfg.iommu, 3146 pci_device_bdf, 3147 ) 3148 .map_err(DeviceManagerError::VfioPciCreate)?; 3149 3150 let vfio_name = if let Some(id) = &device_cfg.id { 3151 if self.device_tree.lock().unwrap().contains_key(id) { 3152 return Err(DeviceManagerError::DeviceIdAlreadyInUse); 3153 } 3154 3155 id.clone() 3156 } else { 3157 let id = self.next_device_name(VFIO_DEVICE_NAME_PREFIX)?; 3158 device_cfg.id = Some(id.clone()); 3159 id 3160 }; 3161 3162 let vfio_pci_device = Arc::new(Mutex::new(vfio_pci_device)); 3163 3164 self.add_pci_device( 3165 vfio_pci_device.clone(), 3166 vfio_pci_device.clone(), 3167 pci_segment_id, 3168 pci_device_bdf, 3169 )?; 3170 3171 vfio_pci_device 3172 .lock() 3173 .unwrap() 3174 .map_mmio_regions(&self.address_manager.vm, || { 3175 self.memory_manager.lock().unwrap().allocate_memory_slot() 3176 }) 3177 .map_err(DeviceManagerError::VfioMapRegion)?; 3178 3179 let mut node = device_node!(vfio_name); 3180 3181 for region in vfio_pci_device.lock().unwrap().mmio_regions() { 3182 node.resources.push(Resource::MmioAddressRange { 3183 base: region.start.0, 3184 size: region.length as u64, 3185 }); 3186 } 3187 3188 node.pci_bdf = Some(pci_device_bdf); 3189 node.pci_device_handle = Some(PciDeviceHandle::Vfio(vfio_pci_device)); 3190 3191 self.device_tree 3192 .lock() 3193 .unwrap() 3194 .insert(vfio_name.clone(), node); 3195 3196 Ok((pci_device_bdf, vfio_name)) 3197 } 3198 3199 fn add_pci_device( 3200 &mut self, 3201 bus_device: Arc<Mutex<dyn BusDevice>>, 3202 pci_device: Arc<Mutex<dyn PciDevice>>, 3203 segment_id: u16, 3204 bdf: PciBdf, 3205 ) -> DeviceManagerResult<Vec<(GuestAddress, GuestUsize, PciBarRegionType)>> { 3206 let bars = pci_device 3207 .lock() 3208 .unwrap() 3209 .allocate_bars( 3210 &self.address_manager.allocator, 3211 &mut self.pci_segments[segment_id as usize] 3212 .allocator 3213 .lock() 3214 .unwrap(), 3215 ) 3216 .map_err(DeviceManagerError::AllocateBars)?; 3217 3218 let mut pci_bus = self.pci_segments[segment_id as usize] 3219 .pci_bus 3220 .lock() 3221 .unwrap(); 3222 3223 pci_bus 3224 .add_device(bdf.device() as u32, pci_device) 3225 .map_err(DeviceManagerError::AddPciDevice)?; 3226 3227 self.bus_devices.push(Arc::clone(&bus_device)); 3228 3229 pci_bus 3230 .register_mapping( 3231 bus_device, 3232 #[cfg(target_arch = "x86_64")] 3233 self.address_manager.io_bus.as_ref(), 3234 self.address_manager.mmio_bus.as_ref(), 3235 bars.clone(), 3236 ) 3237 .map_err(DeviceManagerError::AddPciDevice)?; 3238 3239 Ok(bars) 3240 } 3241 3242 fn add_vfio_devices(&mut self) -> DeviceManagerResult<Vec<PciBdf>> { 3243 let mut iommu_attached_device_ids = Vec::new(); 3244 let mut devices = self.config.lock().unwrap().devices.clone(); 3245 3246 if let Some(device_list_cfg) = &mut devices { 3247 for device_cfg in device_list_cfg.iter_mut() { 3248 let (device_id, _) = self.add_passthrough_device(device_cfg)?; 3249 if device_cfg.iommu && self.iommu_device.is_some() { 3250 iommu_attached_device_ids.push(device_id); 3251 } 3252 } 3253 } 3254 3255 // Update the list of devices 3256 self.config.lock().unwrap().devices = devices; 3257 3258 Ok(iommu_attached_device_ids) 3259 } 3260 3261 fn add_vfio_user_device( 3262 &mut self, 3263 device_cfg: &mut UserDeviceConfig, 3264 ) -> DeviceManagerResult<(PciBdf, String)> { 3265 let pci_segment_id = device_cfg.pci_segment; 3266 let pci_device_bdf = self.pci_segments[pci_segment_id as usize].next_device_bdf()?; 3267 3268 let legacy_interrupt_group = 3269 if let Some(legacy_interrupt_manager) = &self.legacy_interrupt_manager { 3270 Some( 3271 legacy_interrupt_manager 3272 .create_group(LegacyIrqGroupConfig { 3273 irq: self.pci_segments[pci_segment_id as usize].pci_irq_slots 3274 [pci_device_bdf.device() as usize] 3275 as InterruptIndex, 3276 }) 3277 .map_err(DeviceManagerError::CreateInterruptGroup)?, 3278 ) 3279 } else { 3280 None 3281 }; 3282 3283 let client = Arc::new(Mutex::new( 3284 vfio_user::Client::new(&device_cfg.socket) 3285 .map_err(DeviceManagerError::VfioUserCreateClient)?, 3286 )); 3287 3288 let mut vfio_user_pci_device = VfioUserPciDevice::new( 3289 &self.address_manager.vm, 3290 client.clone(), 3291 &self.msi_interrupt_manager, 3292 legacy_interrupt_group, 3293 pci_device_bdf, 3294 ) 3295 .map_err(DeviceManagerError::VfioUserCreate)?; 3296 3297 vfio_user_pci_device 3298 .map_mmio_regions(&self.address_manager.vm, || { 3299 self.memory_manager.lock().unwrap().allocate_memory_slot() 3300 }) 3301 .map_err(DeviceManagerError::VfioUserMapRegion)?; 3302 3303 let memory = self.memory_manager.lock().unwrap().guest_memory(); 3304 let vfio_user_mapping = Arc::new(VfioUserDmaMapping::new(client, Arc::new(memory))); 3305 for virtio_mem_device in self.virtio_mem_devices.iter() { 3306 virtio_mem_device 3307 .lock() 3308 .unwrap() 3309 .add_dma_mapping_handler( 3310 VirtioMemMappingSource::Device(pci_device_bdf.into()), 3311 vfio_user_mapping.clone(), 3312 ) 3313 .map_err(DeviceManagerError::AddDmaMappingHandlerVirtioMem)?; 3314 } 3315 3316 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() { 3317 for region in zone.regions() { 3318 vfio_user_pci_device 3319 .dma_map(region) 3320 .map_err(DeviceManagerError::VfioUserDmaMap)?; 3321 } 3322 } 3323 3324 let vfio_user_pci_device = Arc::new(Mutex::new(vfio_user_pci_device)); 3325 3326 let vfio_user_name = if let Some(id) = &device_cfg.id { 3327 if self.device_tree.lock().unwrap().contains_key(id) { 3328 return Err(DeviceManagerError::DeviceIdAlreadyInUse); 3329 } 3330 3331 id.clone() 3332 } else { 3333 let id = self.next_device_name(VFIO_USER_DEVICE_NAME_PREFIX)?; 3334 device_cfg.id = Some(id.clone()); 3335 id 3336 }; 3337 3338 self.add_pci_device( 3339 vfio_user_pci_device.clone(), 3340 vfio_user_pci_device.clone(), 3341 pci_segment_id, 3342 pci_device_bdf, 3343 )?; 3344 3345 let mut node = device_node!(vfio_user_name); 3346 3347 node.pci_bdf = Some(pci_device_bdf); 3348 node.pci_device_handle = Some(PciDeviceHandle::VfioUser(vfio_user_pci_device)); 3349 3350 self.device_tree 3351 .lock() 3352 .unwrap() 3353 .insert(vfio_user_name.clone(), node); 3354 3355 Ok((pci_device_bdf, vfio_user_name)) 3356 } 3357 3358 fn add_user_devices(&mut self) -> DeviceManagerResult<Vec<PciBdf>> { 3359 let mut user_devices = self.config.lock().unwrap().user_devices.clone(); 3360 3361 if let Some(device_list_cfg) = &mut user_devices { 3362 for device_cfg in device_list_cfg.iter_mut() { 3363 let (_device_id, _id) = self.add_vfio_user_device(device_cfg)?; 3364 } 3365 } 3366 3367 // Update the list of devices 3368 self.config.lock().unwrap().user_devices = user_devices; 3369 3370 Ok(vec![]) 3371 } 3372 3373 fn add_virtio_pci_device( 3374 &mut self, 3375 virtio_device: Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 3376 iommu_mapping: &Option<Arc<IommuMapping>>, 3377 virtio_device_id: String, 3378 pci_segment_id: u16, 3379 dma_handler: Option<Arc<dyn ExternalDmaMapping>>, 3380 ) -> DeviceManagerResult<PciBdf> { 3381 let id = format!("{}-{}", VIRTIO_PCI_DEVICE_NAME_PREFIX, virtio_device_id); 3382 3383 // Add the new virtio-pci node to the device tree. 3384 let mut node = device_node!(id); 3385 node.children = vec![virtio_device_id.clone()]; 3386 3387 // Look for the id in the device tree. If it can be found, that means 3388 // the device is being restored, otherwise it's created from scratch. 3389 let (pci_segment_id, pci_device_bdf, config_bar_addr) = if let Some(node) = 3390 self.device_tree.lock().unwrap().get(&id) 3391 { 3392 info!("Restoring virtio-pci {} resources", id); 3393 let pci_device_bdf: PciBdf = node 3394 .pci_bdf 3395 .ok_or(DeviceManagerError::MissingDeviceNodePciBdf)?; 3396 let pci_segment_id = pci_device_bdf.segment(); 3397 3398 self.pci_segments[pci_segment_id as usize] 3399 .pci_bus 3400 .lock() 3401 .unwrap() 3402 .get_device_id(pci_device_bdf.device() as usize) 3403 .map_err(DeviceManagerError::GetPciDeviceId)?; 3404 3405 if node.resources.is_empty() { 3406 return Err(DeviceManagerError::MissingVirtioPciResources); 3407 } 3408 3409 // We know the configuration BAR address is stored on the first 3410 // resource in the list. 3411 let config_bar_addr = match node.resources[0] { 3412 Resource::MmioAddressRange { base, .. } => Some(base), 3413 _ => { 3414 error!("Unexpected resource {:?} for {}", node.resources[0], id); 3415 return Err(DeviceManagerError::MissingVirtioPciResources); 3416 } 3417 }; 3418 3419 (pci_segment_id, pci_device_bdf, config_bar_addr) 3420 } else { 3421 let pci_device_bdf = self.pci_segments[pci_segment_id as usize].next_device_bdf()?; 3422 3423 (pci_segment_id, pci_device_bdf, None) 3424 }; 3425 3426 // Update the existing virtio node by setting the parent. 3427 if let Some(node) = self.device_tree.lock().unwrap().get_mut(&virtio_device_id) { 3428 node.parent = Some(id.clone()); 3429 } else { 3430 return Err(DeviceManagerError::MissingNode); 3431 } 3432 3433 // Allows support for one MSI-X vector per queue. It also adds 1 3434 // as we need to take into account the dedicated vector to notify 3435 // about a virtio config change. 3436 let msix_num = (virtio_device.lock().unwrap().queue_max_sizes().len() + 1) as u16; 3437 3438 // Create the AccessPlatform trait from the implementation IommuMapping. 3439 // This will provide address translation for any virtio device sitting 3440 // behind a vIOMMU. 3441 let access_platform: Option<Arc<dyn AccessPlatform>> = if let Some(mapping) = iommu_mapping 3442 { 3443 Some(Arc::new(AccessPlatformMapping::new( 3444 pci_device_bdf.into(), 3445 mapping.clone(), 3446 ))) 3447 } else { 3448 None 3449 }; 3450 3451 let memory = self.memory_manager.lock().unwrap().guest_memory(); 3452 3453 // Map DMA ranges if a DMA handler is available and if the device is 3454 // not attached to a virtual IOMMU. 3455 if let Some(dma_handler) = &dma_handler { 3456 if iommu_mapping.is_some() { 3457 if let Some(iommu) = &self.iommu_device { 3458 iommu 3459 .lock() 3460 .unwrap() 3461 .add_external_mapping(pci_device_bdf.into(), dma_handler.clone()); 3462 } else { 3463 return Err(DeviceManagerError::MissingVirtualIommu); 3464 } 3465 } else { 3466 // Let every virtio-mem device handle the DMA map/unmap through the 3467 // DMA handler provided. 3468 for virtio_mem_device in self.virtio_mem_devices.iter() { 3469 virtio_mem_device 3470 .lock() 3471 .unwrap() 3472 .add_dma_mapping_handler( 3473 VirtioMemMappingSource::Device(pci_device_bdf.into()), 3474 dma_handler.clone(), 3475 ) 3476 .map_err(DeviceManagerError::AddDmaMappingHandlerVirtioMem)?; 3477 } 3478 3479 // Do not register virtio-mem regions, as they are handled directly by 3480 // virtio-mem devices. 3481 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() { 3482 for region in zone.regions() { 3483 let gpa = region.start_addr().0; 3484 let size = region.len(); 3485 dma_handler 3486 .map(gpa, gpa, size) 3487 .map_err(DeviceManagerError::VirtioDmaMap)?; 3488 } 3489 } 3490 } 3491 } 3492 3493 let device_type = virtio_device.lock().unwrap().device_type(); 3494 let mut virtio_pci_device = VirtioPciDevice::new( 3495 id.clone(), 3496 memory, 3497 virtio_device, 3498 msix_num, 3499 access_platform, 3500 &self.msi_interrupt_manager, 3501 pci_device_bdf.into(), 3502 self.activate_evt 3503 .try_clone() 3504 .map_err(DeviceManagerError::EventFd)?, 3505 // All device types *except* virtio block devices should be allocated a 64-bit bar 3506 // The block devices should be given a 32-bit BAR so that they are easily accessible 3507 // to firmware without requiring excessive identity mapping. 3508 // The exception being if not on the default PCI segment. 3509 pci_segment_id > 0 || device_type != VirtioDeviceType::Block as u32, 3510 dma_handler, 3511 ) 3512 .map_err(DeviceManagerError::VirtioDevice)?; 3513 3514 // This is important as this will set the BAR address if it exists, 3515 // which is mandatory on the restore path. 3516 if let Some(addr) = config_bar_addr { 3517 virtio_pci_device.set_config_bar_addr(addr); 3518 } 3519 3520 let virtio_pci_device = Arc::new(Mutex::new(virtio_pci_device)); 3521 let bars = self.add_pci_device( 3522 virtio_pci_device.clone(), 3523 virtio_pci_device.clone(), 3524 pci_segment_id, 3525 pci_device_bdf, 3526 )?; 3527 3528 let bar_addr = virtio_pci_device.lock().unwrap().config_bar_addr(); 3529 for (event, addr) in virtio_pci_device.lock().unwrap().ioeventfds(bar_addr) { 3530 let io_addr = IoEventAddress::Mmio(addr); 3531 self.address_manager 3532 .vm 3533 .register_ioevent(event, &io_addr, None) 3534 .map_err(|e| DeviceManagerError::RegisterIoevent(e.into()))?; 3535 } 3536 3537 // Update the device tree with correct resource information. 3538 for pci_bar in bars.iter() { 3539 node.resources.push(Resource::MmioAddressRange { 3540 base: pci_bar.0.raw_value(), 3541 size: pci_bar.1 as u64, 3542 }); 3543 } 3544 node.migratable = Some(Arc::clone(&virtio_pci_device) as Arc<Mutex<dyn Migratable>>); 3545 node.pci_bdf = Some(pci_device_bdf); 3546 node.pci_device_handle = Some(PciDeviceHandle::Virtio(virtio_pci_device)); 3547 self.device_tree.lock().unwrap().insert(id, node); 3548 3549 Ok(pci_device_bdf) 3550 } 3551 3552 #[cfg(target_arch = "x86_64")] 3553 pub fn io_bus(&self) -> &Arc<Bus> { 3554 &self.address_manager.io_bus 3555 } 3556 3557 pub fn mmio_bus(&self) -> &Arc<Bus> { 3558 &self.address_manager.mmio_bus 3559 } 3560 3561 pub fn allocator(&self) -> &Arc<Mutex<SystemAllocator>> { 3562 &self.address_manager.allocator 3563 } 3564 3565 pub fn interrupt_controller(&self) -> Option<Arc<Mutex<dyn InterruptController>>> { 3566 self.interrupt_controller 3567 .as_ref() 3568 .map(|ic| ic.clone() as Arc<Mutex<dyn InterruptController>>) 3569 } 3570 3571 #[cfg(target_arch = "x86_64")] 3572 // Used to provide a fast path for handling PIO exits 3573 pub fn pci_config_io(&self) -> Arc<Mutex<PciConfigIo>> { 3574 Arc::clone(self.pci_segments[0].pci_config_io.as_ref().unwrap()) 3575 } 3576 3577 pub(crate) fn pci_segments(&self) -> &Vec<PciSegment> { 3578 &self.pci_segments 3579 } 3580 3581 pub fn console(&self) -> &Arc<Console> { 3582 &self.console 3583 } 3584 3585 pub fn cmdline_additions(&self) -> &[String] { 3586 self.cmdline_additions.as_slice() 3587 } 3588 3589 pub fn update_memory(&self, new_region: &Arc<GuestRegionMmap>) -> DeviceManagerResult<()> { 3590 for handle in self.virtio_devices.iter() { 3591 handle 3592 .virtio_device 3593 .lock() 3594 .unwrap() 3595 .add_memory_region(new_region) 3596 .map_err(DeviceManagerError::UpdateMemoryForVirtioDevice)?; 3597 3598 if let Some(dma_handler) = &handle.dma_handler { 3599 if !handle.iommu { 3600 let gpa = new_region.start_addr().0; 3601 let size = new_region.len(); 3602 dma_handler 3603 .map(gpa, gpa, size) 3604 .map_err(DeviceManagerError::VirtioDmaMap)?; 3605 } 3606 } 3607 } 3608 3609 // Take care of updating the memory for VFIO PCI devices. 3610 if let Some(vfio_container) = &self.vfio_container { 3611 vfio_container 3612 .vfio_dma_map( 3613 new_region.start_addr().raw_value(), 3614 new_region.len() as u64, 3615 new_region.as_ptr() as u64, 3616 ) 3617 .map_err(DeviceManagerError::UpdateMemoryForVfioPciDevice)?; 3618 } 3619 3620 // Take care of updating the memory for vfio-user devices. 3621 { 3622 let device_tree = self.device_tree.lock().unwrap(); 3623 for pci_device_node in device_tree.pci_devices() { 3624 if let PciDeviceHandle::VfioUser(vfio_user_pci_device) = pci_device_node 3625 .pci_device_handle 3626 .as_ref() 3627 .ok_or(DeviceManagerError::MissingPciDevice)? 3628 { 3629 vfio_user_pci_device 3630 .lock() 3631 .unwrap() 3632 .dma_map(new_region) 3633 .map_err(DeviceManagerError::UpdateMemoryForVfioUserPciDevice)?; 3634 } 3635 } 3636 } 3637 3638 Ok(()) 3639 } 3640 3641 pub fn activate_virtio_devices(&self) -> DeviceManagerResult<()> { 3642 // Find virtio pci devices and activate any pending ones 3643 let device_tree = self.device_tree.lock().unwrap(); 3644 for pci_device_node in device_tree.pci_devices() { 3645 #[allow(irrefutable_let_patterns)] 3646 if let PciDeviceHandle::Virtio(virtio_pci_device) = &pci_device_node 3647 .pci_device_handle 3648 .as_ref() 3649 .ok_or(DeviceManagerError::MissingPciDevice)? 3650 { 3651 virtio_pci_device.lock().unwrap().maybe_activate(); 3652 } 3653 } 3654 Ok(()) 3655 } 3656 3657 pub fn notify_hotplug( 3658 &self, 3659 _notification_type: AcpiNotificationFlags, 3660 ) -> DeviceManagerResult<()> { 3661 return self 3662 .ged_notification_device 3663 .as_ref() 3664 .unwrap() 3665 .lock() 3666 .unwrap() 3667 .notify(_notification_type) 3668 .map_err(DeviceManagerError::HotPlugNotification); 3669 } 3670 3671 pub fn add_device( 3672 &mut self, 3673 device_cfg: &mut DeviceConfig, 3674 ) -> DeviceManagerResult<PciDeviceInfo> { 3675 if device_cfg.iommu && !self.is_iommu_segment(device_cfg.pci_segment) { 3676 return Err(DeviceManagerError::InvalidIommuHotplug); 3677 } 3678 3679 let (bdf, device_name) = self.add_passthrough_device(device_cfg)?; 3680 3681 // Update the PCIU bitmap 3682 self.pci_segments[device_cfg.pci_segment as usize].pci_devices_up |= 1 << bdf.device(); 3683 3684 Ok(PciDeviceInfo { 3685 id: device_name, 3686 bdf, 3687 }) 3688 } 3689 3690 pub fn add_user_device( 3691 &mut self, 3692 device_cfg: &mut UserDeviceConfig, 3693 ) -> DeviceManagerResult<PciDeviceInfo> { 3694 let (bdf, device_name) = self.add_vfio_user_device(device_cfg)?; 3695 3696 // Update the PCIU bitmap 3697 self.pci_segments[device_cfg.pci_segment as usize].pci_devices_up |= 1 << bdf.device(); 3698 3699 Ok(PciDeviceInfo { 3700 id: device_name, 3701 bdf, 3702 }) 3703 } 3704 3705 pub fn remove_device(&mut self, id: String) -> DeviceManagerResult<()> { 3706 // The node can be directly a PCI node in case the 'id' refers to a 3707 // VFIO device or a virtio-pci one. 3708 // In case the 'id' refers to a virtio device, we must find the PCI 3709 // node by looking at the parent. 3710 let device_tree = self.device_tree.lock().unwrap(); 3711 let node = device_tree 3712 .get(&id) 3713 .ok_or(DeviceManagerError::UnknownDeviceId(id))?; 3714 3715 let pci_device_node = if node.pci_bdf.is_some() && node.pci_device_handle.is_some() { 3716 node 3717 } else { 3718 let parent = node 3719 .parent 3720 .as_ref() 3721 .ok_or(DeviceManagerError::MissingNode)?; 3722 device_tree 3723 .get(parent) 3724 .ok_or(DeviceManagerError::MissingNode)? 3725 }; 3726 3727 let pci_device_bdf: PciBdf = pci_device_node 3728 .pci_bdf 3729 .ok_or(DeviceManagerError::MissingDeviceNodePciBdf)?; 3730 let pci_segment_id = pci_device_bdf.segment(); 3731 3732 let pci_device_handle = pci_device_node 3733 .pci_device_handle 3734 .as_ref() 3735 .ok_or(DeviceManagerError::MissingPciDevice)?; 3736 #[allow(irrefutable_let_patterns)] 3737 if let PciDeviceHandle::Virtio(virtio_pci_device) = pci_device_handle { 3738 let device_type = VirtioDeviceType::from( 3739 virtio_pci_device 3740 .lock() 3741 .unwrap() 3742 .virtio_device() 3743 .lock() 3744 .unwrap() 3745 .device_type(), 3746 ); 3747 match device_type { 3748 VirtioDeviceType::Net 3749 | VirtioDeviceType::Block 3750 | VirtioDeviceType::Pmem 3751 | VirtioDeviceType::Fs 3752 | VirtioDeviceType::Vsock => {} 3753 _ => return Err(DeviceManagerError::RemovalNotAllowed(device_type)), 3754 } 3755 } 3756 3757 // Update the PCID bitmap 3758 self.pci_segments[pci_segment_id as usize].pci_devices_down |= 1 << pci_device_bdf.device(); 3759 3760 Ok(()) 3761 } 3762 3763 pub fn eject_device(&mut self, pci_segment_id: u16, device_id: u8) -> DeviceManagerResult<()> { 3764 info!( 3765 "Ejecting device_id = {} on segment_id={}", 3766 device_id, pci_segment_id 3767 ); 3768 3769 // Convert the device ID into the corresponding b/d/f. 3770 let pci_device_bdf = PciBdf::new(pci_segment_id, 0, device_id, 0); 3771 3772 // Give the PCI device ID back to the PCI bus. 3773 self.pci_segments[pci_segment_id as usize] 3774 .pci_bus 3775 .lock() 3776 .unwrap() 3777 .put_device_id(device_id as usize) 3778 .map_err(DeviceManagerError::PutPciDeviceId)?; 3779 3780 // Remove the device from the device tree along with its children. 3781 let mut device_tree = self.device_tree.lock().unwrap(); 3782 let pci_device_node = device_tree 3783 .remove_node_by_pci_bdf(pci_device_bdf) 3784 .ok_or(DeviceManagerError::MissingPciDevice)?; 3785 for child in pci_device_node.children.iter() { 3786 device_tree.remove(child); 3787 } 3788 3789 let mut iommu_attached = false; 3790 if let Some((_, iommu_attached_devices)) = &self.iommu_attached_devices { 3791 if iommu_attached_devices.contains(&pci_device_bdf) { 3792 iommu_attached = true; 3793 } 3794 } 3795 3796 let pci_device_handle = pci_device_node 3797 .pci_device_handle 3798 .ok_or(DeviceManagerError::MissingPciDevice)?; 3799 let (pci_device, bus_device, virtio_device, remove_dma_handler) = match pci_device_handle { 3800 // No need to remove any virtio-mem mapping here as the container outlives all devices 3801 PciDeviceHandle::Vfio(vfio_pci_device) => ( 3802 Arc::clone(&vfio_pci_device) as Arc<Mutex<dyn PciDevice>>, 3803 Arc::clone(&vfio_pci_device) as Arc<Mutex<dyn BusDevice>>, 3804 None as Option<Arc<Mutex<dyn virtio_devices::VirtioDevice>>>, 3805 false, 3806 ), 3807 PciDeviceHandle::Virtio(virtio_pci_device) => { 3808 let dev = virtio_pci_device.lock().unwrap(); 3809 let bar_addr = dev.config_bar_addr(); 3810 for (event, addr) in dev.ioeventfds(bar_addr) { 3811 let io_addr = IoEventAddress::Mmio(addr); 3812 self.address_manager 3813 .vm 3814 .unregister_ioevent(event, &io_addr) 3815 .map_err(|e| DeviceManagerError::UnRegisterIoevent(e.into()))?; 3816 } 3817 3818 if let Some(dma_handler) = dev.dma_handler() { 3819 if !iommu_attached { 3820 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() { 3821 for region in zone.regions() { 3822 let iova = region.start_addr().0; 3823 let size = region.len(); 3824 dma_handler 3825 .unmap(iova, size) 3826 .map_err(DeviceManagerError::VirtioDmaUnmap)?; 3827 } 3828 } 3829 } 3830 } 3831 3832 ( 3833 Arc::clone(&virtio_pci_device) as Arc<Mutex<dyn PciDevice>>, 3834 Arc::clone(&virtio_pci_device) as Arc<Mutex<dyn BusDevice>>, 3835 Some(dev.virtio_device()), 3836 dev.dma_handler().is_some() && !iommu_attached, 3837 ) 3838 } 3839 PciDeviceHandle::VfioUser(vfio_user_pci_device) => { 3840 let mut dev = vfio_user_pci_device.lock().unwrap(); 3841 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() { 3842 for region in zone.regions() { 3843 dev.dma_unmap(region) 3844 .map_err(DeviceManagerError::VfioUserDmaUnmap)?; 3845 } 3846 } 3847 3848 ( 3849 Arc::clone(&vfio_user_pci_device) as Arc<Mutex<dyn PciDevice>>, 3850 Arc::clone(&vfio_user_pci_device) as Arc<Mutex<dyn BusDevice>>, 3851 None as Option<Arc<Mutex<dyn virtio_devices::VirtioDevice>>>, 3852 true, 3853 ) 3854 } 3855 }; 3856 3857 if remove_dma_handler { 3858 for virtio_mem_device in self.virtio_mem_devices.iter() { 3859 virtio_mem_device 3860 .lock() 3861 .unwrap() 3862 .remove_dma_mapping_handler(VirtioMemMappingSource::Device( 3863 pci_device_bdf.into(), 3864 )) 3865 .map_err(DeviceManagerError::RemoveDmaMappingHandlerVirtioMem)?; 3866 } 3867 } 3868 3869 // Free the allocated BARs 3870 pci_device 3871 .lock() 3872 .unwrap() 3873 .free_bars( 3874 &mut self.address_manager.allocator.lock().unwrap(), 3875 &mut self.pci_segments[pci_segment_id as usize] 3876 .allocator 3877 .lock() 3878 .unwrap(), 3879 ) 3880 .map_err(DeviceManagerError::FreePciBars)?; 3881 3882 // Remove the device from the PCI bus 3883 self.pci_segments[pci_segment_id as usize] 3884 .pci_bus 3885 .lock() 3886 .unwrap() 3887 .remove_by_device(&pci_device) 3888 .map_err(DeviceManagerError::RemoveDeviceFromPciBus)?; 3889 3890 #[cfg(target_arch = "x86_64")] 3891 // Remove the device from the IO bus 3892 self.io_bus() 3893 .remove_by_device(&bus_device) 3894 .map_err(DeviceManagerError::RemoveDeviceFromIoBus)?; 3895 3896 // Remove the device from the MMIO bus 3897 self.mmio_bus() 3898 .remove_by_device(&bus_device) 3899 .map_err(DeviceManagerError::RemoveDeviceFromMmioBus)?; 3900 3901 // Remove the device from the list of BusDevice held by the 3902 // DeviceManager. 3903 self.bus_devices 3904 .retain(|dev| !Arc::ptr_eq(dev, &bus_device)); 3905 3906 // Shutdown and remove the underlying virtio-device if present 3907 if let Some(virtio_device) = virtio_device { 3908 for mapping in virtio_device.lock().unwrap().userspace_mappings() { 3909 self.memory_manager 3910 .lock() 3911 .unwrap() 3912 .remove_userspace_mapping( 3913 mapping.addr.raw_value(), 3914 mapping.len, 3915 mapping.host_addr, 3916 mapping.mergeable, 3917 mapping.mem_slot, 3918 ) 3919 .map_err(DeviceManagerError::MemoryManager)?; 3920 } 3921 3922 virtio_device.lock().unwrap().shutdown(); 3923 3924 self.virtio_devices 3925 .retain(|handler| !Arc::ptr_eq(&handler.virtio_device, &virtio_device)); 3926 } 3927 3928 // At this point, the device has been removed from all the list and 3929 // buses where it was stored. At the end of this function, after 3930 // any_device, bus_device and pci_device are released, the actual 3931 // device will be dropped. 3932 Ok(()) 3933 } 3934 3935 fn hotplug_virtio_pci_device( 3936 &mut self, 3937 handle: MetaVirtioDevice, 3938 ) -> DeviceManagerResult<PciDeviceInfo> { 3939 // Add the virtio device to the device manager list. This is important 3940 // as the list is used to notify virtio devices about memory updates 3941 // for instance. 3942 self.virtio_devices.push(handle.clone()); 3943 3944 let mapping: Option<Arc<IommuMapping>> = if handle.iommu { 3945 self.iommu_mapping.clone() 3946 } else { 3947 None 3948 }; 3949 3950 let bdf = self.add_virtio_pci_device( 3951 handle.virtio_device, 3952 &mapping, 3953 handle.id.clone(), 3954 handle.pci_segment, 3955 handle.dma_handler, 3956 )?; 3957 3958 // Update the PCIU bitmap 3959 self.pci_segments[handle.pci_segment as usize].pci_devices_up |= 1 << bdf.device(); 3960 3961 Ok(PciDeviceInfo { id: handle.id, bdf }) 3962 } 3963 3964 fn is_iommu_segment(&self, pci_segment_id: u16) -> bool { 3965 self.config 3966 .lock() 3967 .as_ref() 3968 .unwrap() 3969 .platform 3970 .as_ref() 3971 .map(|pc| { 3972 pc.iommu_segments 3973 .as_ref() 3974 .map(|v| v.contains(&pci_segment_id)) 3975 .unwrap_or_default() 3976 }) 3977 .unwrap_or_default() 3978 } 3979 3980 pub fn add_disk(&mut self, disk_cfg: &mut DiskConfig) -> DeviceManagerResult<PciDeviceInfo> { 3981 if disk_cfg.iommu && !self.is_iommu_segment(disk_cfg.pci_segment) { 3982 return Err(DeviceManagerError::InvalidIommuHotplug); 3983 } 3984 3985 let device = self.make_virtio_block_device(disk_cfg)?; 3986 self.hotplug_virtio_pci_device(device) 3987 } 3988 3989 pub fn add_fs(&mut self, fs_cfg: &mut FsConfig) -> DeviceManagerResult<PciDeviceInfo> { 3990 let device = self.make_virtio_fs_device(fs_cfg)?; 3991 self.hotplug_virtio_pci_device(device) 3992 } 3993 3994 pub fn add_pmem(&mut self, pmem_cfg: &mut PmemConfig) -> DeviceManagerResult<PciDeviceInfo> { 3995 if pmem_cfg.iommu && !self.is_iommu_segment(pmem_cfg.pci_segment) { 3996 return Err(DeviceManagerError::InvalidIommuHotplug); 3997 } 3998 3999 let device = self.make_virtio_pmem_device(pmem_cfg)?; 4000 self.hotplug_virtio_pci_device(device) 4001 } 4002 4003 pub fn add_net(&mut self, net_cfg: &mut NetConfig) -> DeviceManagerResult<PciDeviceInfo> { 4004 if net_cfg.iommu && !self.is_iommu_segment(net_cfg.pci_segment) { 4005 return Err(DeviceManagerError::InvalidIommuHotplug); 4006 } 4007 4008 let device = self.make_virtio_net_device(net_cfg)?; 4009 self.hotplug_virtio_pci_device(device) 4010 } 4011 4012 pub fn add_vdpa(&mut self, vdpa_cfg: &mut VdpaConfig) -> DeviceManagerResult<PciDeviceInfo> { 4013 if vdpa_cfg.iommu && !self.is_iommu_segment(vdpa_cfg.pci_segment) { 4014 return Err(DeviceManagerError::InvalidIommuHotplug); 4015 } 4016 4017 let device = self.make_vdpa_device(vdpa_cfg)?; 4018 self.hotplug_virtio_pci_device(device) 4019 } 4020 4021 pub fn add_vsock(&mut self, vsock_cfg: &mut VsockConfig) -> DeviceManagerResult<PciDeviceInfo> { 4022 if vsock_cfg.iommu && !self.is_iommu_segment(vsock_cfg.pci_segment) { 4023 return Err(DeviceManagerError::InvalidIommuHotplug); 4024 } 4025 4026 let device = self.make_virtio_vsock_device(vsock_cfg)?; 4027 self.hotplug_virtio_pci_device(device) 4028 } 4029 4030 pub fn counters(&self) -> HashMap<String, HashMap<&'static str, Wrapping<u64>>> { 4031 let mut counters = HashMap::new(); 4032 4033 for handle in &self.virtio_devices { 4034 let virtio_device = handle.virtio_device.lock().unwrap(); 4035 if let Some(device_counters) = virtio_device.counters() { 4036 counters.insert(handle.id.clone(), device_counters.clone()); 4037 } 4038 } 4039 4040 counters 4041 } 4042 4043 pub fn resize_balloon(&mut self, size: u64) -> DeviceManagerResult<()> { 4044 if let Some(balloon) = &self.balloon { 4045 return balloon 4046 .lock() 4047 .unwrap() 4048 .resize(size) 4049 .map_err(DeviceManagerError::VirtioBalloonResize); 4050 } 4051 4052 warn!("No balloon setup: Can't resize the balloon"); 4053 Err(DeviceManagerError::MissingVirtioBalloon) 4054 } 4055 4056 pub fn balloon_size(&self) -> u64 { 4057 if let Some(balloon) = &self.balloon { 4058 return balloon.lock().unwrap().get_actual(); 4059 } 4060 4061 0 4062 } 4063 4064 pub fn device_tree(&self) -> Arc<Mutex<DeviceTree>> { 4065 self.device_tree.clone() 4066 } 4067 4068 pub fn restore_devices( 4069 &mut self, 4070 snapshot: Snapshot, 4071 ) -> std::result::Result<(), MigratableError> { 4072 // Finally, restore all devices associated with the DeviceManager. 4073 // It's important to restore devices in the right order, that's why 4074 // the device tree is the right way to ensure we restore a child before 4075 // its parent node. 4076 for node in self 4077 .device_tree 4078 .lock() 4079 .unwrap() 4080 .breadth_first_traversal() 4081 .rev() 4082 { 4083 // Restore the node 4084 if let Some(migratable) = &node.migratable { 4085 info!("Restoring {} from DeviceManager", node.id); 4086 if let Some(snapshot) = snapshot.snapshots.get(&node.id) { 4087 migratable.lock().unwrap().pause()?; 4088 migratable.lock().unwrap().restore(*snapshot.clone())?; 4089 } else { 4090 return Err(MigratableError::Restore(anyhow!( 4091 "Missing device {}", 4092 node.id 4093 ))); 4094 } 4095 } 4096 } 4097 4098 // The devices have been fully restored, we can now update the 4099 // restoring state of the DeviceManager. 4100 self.restoring = false; 4101 4102 Ok(()) 4103 } 4104 4105 #[cfg(target_arch = "x86_64")] 4106 pub fn notify_power_button(&self) -> DeviceManagerResult<()> { 4107 self.ged_notification_device 4108 .as_ref() 4109 .unwrap() 4110 .lock() 4111 .unwrap() 4112 .notify(AcpiNotificationFlags::POWER_BUTTON_CHANGED) 4113 .map_err(DeviceManagerError::PowerButtonNotification) 4114 } 4115 4116 #[cfg(target_arch = "aarch64")] 4117 pub fn notify_power_button(&self) -> DeviceManagerResult<()> { 4118 // There are two use cases: 4119 // 1. Users will use direct kernel boot with device tree. 4120 // 2. Users will use ACPI+UEFI boot. 4121 4122 // Trigger a GPIO pin 3 event to satisify use case 1. 4123 self.gpio_device 4124 .as_ref() 4125 .unwrap() 4126 .lock() 4127 .unwrap() 4128 .trigger_key(3) 4129 .map_err(DeviceManagerError::AArch64PowerButtonNotification)?; 4130 // Trigger a GED power button event to satisify use case 2. 4131 return self 4132 .ged_notification_device 4133 .as_ref() 4134 .unwrap() 4135 .lock() 4136 .unwrap() 4137 .notify(AcpiNotificationFlags::POWER_BUTTON_CHANGED) 4138 .map_err(DeviceManagerError::PowerButtonNotification); 4139 } 4140 4141 pub fn iommu_attached_devices(&self) -> &Option<(PciBdf, Vec<PciBdf>)> { 4142 &self.iommu_attached_devices 4143 } 4144 } 4145 4146 fn numa_node_id_from_memory_zone_id(numa_nodes: &NumaNodes, memory_zone_id: &str) -> Option<u32> { 4147 for (numa_node_id, numa_node) in numa_nodes.iter() { 4148 if numa_node.memory_zones.contains(&memory_zone_id.to_owned()) { 4149 return Some(*numa_node_id); 4150 } 4151 } 4152 4153 None 4154 } 4155 4156 impl Aml for DeviceManager { 4157 fn append_aml_bytes(&self, bytes: &mut Vec<u8>) { 4158 #[cfg(target_arch = "aarch64")] 4159 use arch::aarch64::DeviceInfoForFdt; 4160 4161 let mut pci_scan_methods = Vec::new(); 4162 for i in 0..self.pci_segments.len() { 4163 pci_scan_methods.push(aml::MethodCall::new( 4164 format!("\\_SB_.PCI{:X}.PCNT", i).as_str().into(), 4165 vec![], 4166 )); 4167 } 4168 let mut pci_scan_inner: Vec<&dyn Aml> = Vec::new(); 4169 for method in &pci_scan_methods { 4170 pci_scan_inner.push(method) 4171 } 4172 4173 // PCI hotplug controller 4174 aml::Device::new( 4175 "_SB_.PHPR".into(), 4176 vec![ 4177 &aml::Name::new("_HID".into(), &aml::EisaName::new("PNP0A06")), 4178 &aml::Name::new("_STA".into(), &0x0bu8), 4179 &aml::Name::new("_UID".into(), &"PCI Hotplug Controller"), 4180 &aml::Mutex::new("BLCK".into(), 0), 4181 &aml::Name::new( 4182 "_CRS".into(), 4183 &aml::ResourceTemplate::new(vec![&aml::AddressSpace::new_memory( 4184 aml::AddressSpaceCachable::NotCacheable, 4185 true, 4186 self.acpi_address.0 as u64, 4187 self.acpi_address.0 + DEVICE_MANAGER_ACPI_SIZE as u64 - 1, 4188 )]), 4189 ), 4190 // OpRegion and Fields map MMIO range into individual field values 4191 &aml::OpRegion::new( 4192 "PCST".into(), 4193 aml::OpRegionSpace::SystemMemory, 4194 self.acpi_address.0 as usize, 4195 DEVICE_MANAGER_ACPI_SIZE, 4196 ), 4197 &aml::Field::new( 4198 "PCST".into(), 4199 aml::FieldAccessType::DWord, 4200 aml::FieldUpdateRule::WriteAsZeroes, 4201 vec![ 4202 aml::FieldEntry::Named(*b"PCIU", 32), 4203 aml::FieldEntry::Named(*b"PCID", 32), 4204 aml::FieldEntry::Named(*b"B0EJ", 32), 4205 aml::FieldEntry::Named(*b"PSEG", 32), 4206 ], 4207 ), 4208 &aml::Method::new( 4209 "PCEJ".into(), 4210 2, 4211 true, 4212 vec![ 4213 // Take lock defined above 4214 &aml::Acquire::new("BLCK".into(), 0xffff), 4215 // Choose the current segment 4216 &aml::Store::new(&aml::Path::new("PSEG"), &aml::Arg(1)), 4217 // Write PCI bus number (in first argument) to I/O port via field 4218 &aml::ShiftLeft::new(&aml::Path::new("B0EJ"), &aml::ONE, &aml::Arg(0)), 4219 // Release lock 4220 &aml::Release::new("BLCK".into()), 4221 // Return 0 4222 &aml::Return::new(&aml::ZERO), 4223 ], 4224 ), 4225 &aml::Method::new("PSCN".into(), 0, true, pci_scan_inner), 4226 ], 4227 ) 4228 .append_aml_bytes(bytes); 4229 4230 for segment in &self.pci_segments { 4231 segment.append_aml_bytes(bytes); 4232 } 4233 4234 let mut mbrd_memory = Vec::new(); 4235 4236 for segment in &self.pci_segments { 4237 mbrd_memory.push(aml::Memory32Fixed::new( 4238 true, 4239 segment.mmio_config_address as u32, 4240 layout::PCI_MMIO_CONFIG_SIZE_PER_SEGMENT as u32, 4241 )) 4242 } 4243 4244 let mut mbrd_memory_refs = Vec::new(); 4245 for mbrd_memory_ref in &mbrd_memory { 4246 mbrd_memory_refs.push(mbrd_memory_ref as &dyn Aml); 4247 } 4248 4249 aml::Device::new( 4250 "_SB_.MBRD".into(), 4251 vec![ 4252 &aml::Name::new("_HID".into(), &aml::EisaName::new("PNP0C02")), 4253 &aml::Name::new("_UID".into(), &aml::ZERO), 4254 &aml::Name::new("_CRS".into(), &aml::ResourceTemplate::new(mbrd_memory_refs)), 4255 ], 4256 ) 4257 .append_aml_bytes(bytes); 4258 4259 // Serial device 4260 #[cfg(target_arch = "x86_64")] 4261 let serial_irq = 4; 4262 #[cfg(target_arch = "aarch64")] 4263 let serial_irq = 4264 if self.config.lock().unwrap().serial.clone().mode != ConsoleOutputMode::Off { 4265 self.get_device_info() 4266 .clone() 4267 .get(&(DeviceType::Serial, DeviceType::Serial.to_string())) 4268 .unwrap() 4269 .irq() 4270 } else { 4271 // If serial is turned off, add a fake device with invalid irq. 4272 31 4273 }; 4274 if self.config.lock().unwrap().serial.mode != ConsoleOutputMode::Off { 4275 aml::Device::new( 4276 "_SB_.COM1".into(), 4277 vec![ 4278 &aml::Name::new( 4279 "_HID".into(), 4280 #[cfg(target_arch = "x86_64")] 4281 &aml::EisaName::new("PNP0501"), 4282 #[cfg(target_arch = "aarch64")] 4283 &"ARMH0011", 4284 ), 4285 &aml::Name::new("_UID".into(), &aml::ZERO), 4286 &aml::Name::new("_DDN".into(), &"COM1"), 4287 &aml::Name::new( 4288 "_CRS".into(), 4289 &aml::ResourceTemplate::new(vec![ 4290 &aml::Interrupt::new(true, true, false, false, serial_irq), 4291 #[cfg(target_arch = "x86_64")] 4292 &aml::Io::new(0x3f8, 0x3f8, 0, 0x8), 4293 #[cfg(target_arch = "aarch64")] 4294 &aml::Memory32Fixed::new( 4295 true, 4296 arch::layout::LEGACY_SERIAL_MAPPED_IO_START.raw_value() as u32, 4297 MMIO_LEN as u32, 4298 ), 4299 ]), 4300 ), 4301 ], 4302 ) 4303 .append_aml_bytes(bytes); 4304 } 4305 4306 aml::Name::new("_S5_".into(), &aml::Package::new(vec![&5u8])).append_aml_bytes(bytes); 4307 4308 aml::Device::new( 4309 "_SB_.PWRB".into(), 4310 vec![ 4311 &aml::Name::new("_HID".into(), &aml::EisaName::new("PNP0C0C")), 4312 &aml::Name::new("_UID".into(), &aml::ZERO), 4313 ], 4314 ) 4315 .append_aml_bytes(bytes); 4316 4317 self.ged_notification_device 4318 .as_ref() 4319 .unwrap() 4320 .lock() 4321 .unwrap() 4322 .append_aml_bytes(bytes); 4323 } 4324 } 4325 4326 impl Pausable for DeviceManager { 4327 fn pause(&mut self) -> result::Result<(), MigratableError> { 4328 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4329 if let Some(migratable) = &device_node.migratable { 4330 migratable.lock().unwrap().pause()?; 4331 } 4332 } 4333 // On AArch64, the pause of device manager needs to trigger 4334 // a "pause" of GIC, which will flush the GIC pending tables 4335 // and ITS tables to guest RAM. 4336 #[cfg(target_arch = "aarch64")] 4337 { 4338 let gic_device = Arc::clone( 4339 self.get_interrupt_controller() 4340 .unwrap() 4341 .lock() 4342 .unwrap() 4343 .get_gic_device() 4344 .unwrap(), 4345 ); 4346 if let Some(gicv3_its) = gic_device 4347 .lock() 4348 .unwrap() 4349 .as_any_concrete_mut() 4350 .downcast_mut::<KvmGicV3Its>() 4351 { 4352 gicv3_its.pause()?; 4353 } else { 4354 return Err(MigratableError::Pause(anyhow!( 4355 "GicDevice downcast to KvmGicV3Its failed when pausing device manager!" 4356 ))); 4357 }; 4358 }; 4359 4360 Ok(()) 4361 } 4362 4363 fn resume(&mut self) -> result::Result<(), MigratableError> { 4364 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4365 if let Some(migratable) = &device_node.migratable { 4366 migratable.lock().unwrap().resume()?; 4367 } 4368 } 4369 4370 Ok(()) 4371 } 4372 } 4373 4374 impl Snapshottable for DeviceManager { 4375 fn id(&self) -> String { 4376 DEVICE_MANAGER_SNAPSHOT_ID.to_string() 4377 } 4378 4379 fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> { 4380 let mut snapshot = Snapshot::new(DEVICE_MANAGER_SNAPSHOT_ID); 4381 4382 // We aggregate all devices snapshots. 4383 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4384 if let Some(migratable) = &device_node.migratable { 4385 let device_snapshot = migratable.lock().unwrap().snapshot()?; 4386 snapshot.add_snapshot(device_snapshot); 4387 } 4388 } 4389 4390 // Then we store the DeviceManager state. 4391 snapshot.add_data_section(SnapshotDataSection::new_from_state( 4392 DEVICE_MANAGER_SNAPSHOT_ID, 4393 &self.state(), 4394 )?); 4395 4396 Ok(snapshot) 4397 } 4398 4399 fn restore(&mut self, snapshot: Snapshot) -> std::result::Result<(), MigratableError> { 4400 // Let's first restore the DeviceManager. 4401 4402 self.set_state(&snapshot.to_state(DEVICE_MANAGER_SNAPSHOT_ID)?); 4403 4404 // Now that DeviceManager is updated with the right states, it's time 4405 // to create the devices based on the configuration. 4406 self.create_devices(None, None, None) 4407 .map_err(|e| MigratableError::Restore(anyhow!("Could not create devices {:?}", e)))?; 4408 4409 Ok(()) 4410 } 4411 } 4412 4413 impl Transportable for DeviceManager {} 4414 4415 impl Migratable for DeviceManager { 4416 fn start_dirty_log(&mut self) -> std::result::Result<(), MigratableError> { 4417 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4418 if let Some(migratable) = &device_node.migratable { 4419 migratable.lock().unwrap().start_dirty_log()?; 4420 } 4421 } 4422 Ok(()) 4423 } 4424 4425 fn stop_dirty_log(&mut self) -> std::result::Result<(), MigratableError> { 4426 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4427 if let Some(migratable) = &device_node.migratable { 4428 migratable.lock().unwrap().stop_dirty_log()?; 4429 } 4430 } 4431 Ok(()) 4432 } 4433 4434 fn dirty_log(&mut self) -> std::result::Result<MemoryRangeTable, MigratableError> { 4435 let mut tables = Vec::new(); 4436 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4437 if let Some(migratable) = &device_node.migratable { 4438 tables.push(migratable.lock().unwrap().dirty_log()?); 4439 } 4440 } 4441 Ok(MemoryRangeTable::new_from_tables(tables)) 4442 } 4443 4444 fn start_migration(&mut self) -> std::result::Result<(), MigratableError> { 4445 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4446 if let Some(migratable) = &device_node.migratable { 4447 migratable.lock().unwrap().start_migration()?; 4448 } 4449 } 4450 Ok(()) 4451 } 4452 4453 fn complete_migration(&mut self) -> std::result::Result<(), MigratableError> { 4454 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4455 if let Some(migratable) = &device_node.migratable { 4456 migratable.lock().unwrap().complete_migration()?; 4457 } 4458 } 4459 Ok(()) 4460 } 4461 } 4462 4463 const PCIU_FIELD_OFFSET: u64 = 0; 4464 const PCID_FIELD_OFFSET: u64 = 4; 4465 const B0EJ_FIELD_OFFSET: u64 = 8; 4466 const PSEG_FIELD_OFFSET: u64 = 12; 4467 const PCIU_FIELD_SIZE: usize = 4; 4468 const PCID_FIELD_SIZE: usize = 4; 4469 const B0EJ_FIELD_SIZE: usize = 4; 4470 const PSEG_FIELD_SIZE: usize = 4; 4471 4472 impl BusDevice for DeviceManager { 4473 fn read(&mut self, base: u64, offset: u64, data: &mut [u8]) { 4474 match offset { 4475 PCIU_FIELD_OFFSET => { 4476 assert!(data.len() == PCIU_FIELD_SIZE); 4477 data.copy_from_slice( 4478 &self.pci_segments[self.selected_segment] 4479 .pci_devices_up 4480 .to_le_bytes(), 4481 ); 4482 // Clear the PCIU bitmap 4483 self.pci_segments[self.selected_segment].pci_devices_up = 0; 4484 } 4485 PCID_FIELD_OFFSET => { 4486 assert!(data.len() == PCID_FIELD_SIZE); 4487 data.copy_from_slice( 4488 &self.pci_segments[self.selected_segment] 4489 .pci_devices_down 4490 .to_le_bytes(), 4491 ); 4492 // Clear the PCID bitmap 4493 self.pci_segments[self.selected_segment].pci_devices_down = 0; 4494 } 4495 B0EJ_FIELD_OFFSET => { 4496 assert!(data.len() == B0EJ_FIELD_SIZE); 4497 // Always return an empty bitmap since the eject is always 4498 // taken care of right away during a write access. 4499 data.fill(0); 4500 } 4501 PSEG_FIELD_OFFSET => { 4502 assert_eq!(data.len(), PSEG_FIELD_SIZE); 4503 data.copy_from_slice(&(self.selected_segment as u32).to_le_bytes()); 4504 } 4505 _ => error!( 4506 "Accessing unknown location at base 0x{:x}, offset 0x{:x}", 4507 base, offset 4508 ), 4509 } 4510 4511 debug!( 4512 "PCI_HP_REG_R: base 0x{:x}, offset 0x{:x}, data {:?}", 4513 base, offset, data 4514 ) 4515 } 4516 4517 fn write(&mut self, base: u64, offset: u64, data: &[u8]) -> Option<Arc<std::sync::Barrier>> { 4518 match offset { 4519 B0EJ_FIELD_OFFSET => { 4520 assert!(data.len() == B0EJ_FIELD_SIZE); 4521 let mut data_array: [u8; 4] = [0, 0, 0, 0]; 4522 data_array.copy_from_slice(data); 4523 let mut slot_bitmap = u32::from_le_bytes(data_array); 4524 4525 while slot_bitmap > 0 { 4526 let slot_id = slot_bitmap.trailing_zeros(); 4527 if let Err(e) = self.eject_device(self.selected_segment as u16, slot_id as u8) { 4528 error!("Failed ejecting device {}: {:?}", slot_id, e); 4529 } 4530 slot_bitmap &= !(1 << slot_id); 4531 } 4532 } 4533 PSEG_FIELD_OFFSET => { 4534 assert_eq!(data.len(), PSEG_FIELD_SIZE); 4535 let mut data_array: [u8; 4] = [0, 0, 0, 0]; 4536 data_array.copy_from_slice(data); 4537 let selected_segment = u32::from_le_bytes(data_array) as usize; 4538 if selected_segment >= self.pci_segments.len() { 4539 error!( 4540 "Segment selection out of range: {} >= {}", 4541 selected_segment, 4542 self.pci_segments.len() 4543 ); 4544 return None; 4545 } 4546 self.selected_segment = selected_segment; 4547 } 4548 _ => error!( 4549 "Accessing unknown location at base 0x{:x}, offset 0x{:x}", 4550 base, offset 4551 ), 4552 } 4553 4554 debug!( 4555 "PCI_HP_REG_W: base 0x{:x}, offset 0x{:x}, data {:?}", 4556 base, offset, data 4557 ); 4558 4559 None 4560 } 4561 } 4562 4563 impl Drop for DeviceManager { 4564 fn drop(&mut self) { 4565 for handle in self.virtio_devices.drain(..) { 4566 handle.virtio_device.lock().unwrap().shutdown(); 4567 } 4568 } 4569 } 4570