1 // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 // 3 // Portions Copyright 2017 The Chromium OS Authors. All rights reserved. 4 // Use of this source code is governed by a BSD-style license that can be 5 // found in the LICENSE-BSD-3-Clause file. 6 // 7 // Copyright © 2019 Intel Corporation 8 // 9 // SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause 10 // 11 12 use crate::config::{ 13 ConsoleOutputMode, DeviceConfig, DiskConfig, FsConfig, NetConfig, PmemConfig, UserDeviceConfig, 14 VdpaConfig, VhostMode, VmConfig, VsockConfig, 15 }; 16 use crate::device_tree::{DeviceNode, DeviceTree}; 17 use crate::interrupt::LegacyUserspaceInterruptManager; 18 use crate::interrupt::MsiInterruptManager; 19 use crate::memory_manager::{Error as MemoryManagerError, MemoryManager, MEMORY_MANAGER_ACPI_SIZE}; 20 use crate::pci_segment::PciSegment; 21 use crate::seccomp_filters::{get_seccomp_filter, Thread}; 22 use crate::serial_manager::{Error as SerialManagerError, SerialManager}; 23 use crate::sigwinch_listener::start_sigwinch_listener; 24 #[cfg(target_arch = "aarch64")] 25 use crate::GuestMemoryMmap; 26 use crate::GuestRegionMmap; 27 use crate::PciDeviceInfo; 28 use crate::{device_node, DEVICE_MANAGER_SNAPSHOT_ID}; 29 use acpi_tables::{aml, aml::Aml}; 30 use anyhow::anyhow; 31 use arch::layout; 32 #[cfg(target_arch = "x86_64")] 33 use arch::layout::{APIC_START, IOAPIC_SIZE, IOAPIC_START}; 34 use arch::NumaNodes; 35 #[cfg(target_arch = "aarch64")] 36 use arch::{DeviceType, MmioDeviceInfo}; 37 use block_util::{ 38 async_io::DiskFile, block_io_uring_is_supported, detect_image_type, 39 fixed_vhd_async::FixedVhdDiskAsync, fixed_vhd_sync::FixedVhdDiskSync, qcow_sync::QcowDiskSync, 40 raw_async::RawFileDisk, raw_sync::RawFileDiskSync, vhdx_sync::VhdxDiskSync, ImageType, 41 }; 42 #[cfg(target_arch = "aarch64")] 43 use devices::gic; 44 #[cfg(target_arch = "x86_64")] 45 use devices::ioapic; 46 #[cfg(target_arch = "aarch64")] 47 use devices::legacy::Pl011; 48 #[cfg(target_arch = "x86_64")] 49 use devices::legacy::Serial; 50 use devices::{ 51 interrupt_controller, interrupt_controller::InterruptController, AcpiNotificationFlags, 52 }; 53 use hypervisor::{DeviceFd, HypervisorVmError, IoEventAddress}; 54 use libc::{ 55 cfmakeraw, isatty, tcgetattr, tcsetattr, termios, MAP_NORESERVE, MAP_PRIVATE, MAP_SHARED, 56 O_TMPFILE, PROT_READ, PROT_WRITE, TCSANOW, 57 }; 58 #[cfg(target_arch = "x86_64")] 59 use pci::PciConfigIo; 60 use pci::{ 61 DeviceRelocation, PciBarRegionType, PciBdf, PciDevice, VfioPciDevice, VfioUserDmaMapping, 62 VfioUserPciDevice, VfioUserPciDeviceError, 63 }; 64 use seccompiler::SeccompAction; 65 use serde::{Deserialize, Serialize}; 66 use std::collections::{BTreeSet, HashMap}; 67 use std::convert::TryInto; 68 use std::fs::{read_link, File, OpenOptions}; 69 use std::io::{self, stdout, Seek, SeekFrom}; 70 use std::mem::zeroed; 71 use std::num::Wrapping; 72 use std::os::unix::fs::OpenOptionsExt; 73 use std::os::unix::io::{AsRawFd, FromRawFd, RawFd}; 74 use std::path::PathBuf; 75 use std::result; 76 use std::sync::{Arc, Mutex}; 77 use std::time::Instant; 78 use vfio_ioctls::{VfioContainer, VfioDevice}; 79 use virtio_devices::transport::VirtioTransport; 80 use virtio_devices::transport::{VirtioPciDevice, VirtioPciDeviceActivator}; 81 use virtio_devices::vhost_user::VhostUserConfig; 82 use virtio_devices::{ 83 AccessPlatformMapping, ActivateError, VdpaDmaMapping, VirtioMemMappingSource, 84 }; 85 use virtio_devices::{Endpoint, IommuMapping}; 86 use vm_allocator::{AddressAllocator, SystemAllocator}; 87 use vm_device::dma_mapping::vfio::VfioDmaMapping; 88 use vm_device::dma_mapping::ExternalDmaMapping; 89 use vm_device::interrupt::{ 90 InterruptIndex, InterruptManager, LegacyIrqGroupConfig, MsiIrqGroupConfig, 91 }; 92 use vm_device::{Bus, BusDevice, Resource}; 93 use vm_memory::guest_memory::FileOffset; 94 #[cfg(target_arch = "aarch64")] 95 use vm_memory::GuestMemoryAtomic; 96 use vm_memory::GuestMemoryRegion; 97 use vm_memory::{Address, GuestAddress, GuestUsize, MmapRegion}; 98 #[cfg(target_arch = "x86_64")] 99 use vm_memory::{GuestAddressSpace, GuestMemory}; 100 use vm_migration::{ 101 protocol::MemoryRangeTable, Migratable, MigratableError, Pausable, Snapshot, 102 SnapshotDataSection, Snapshottable, Transportable, 103 }; 104 use vm_virtio::AccessPlatform; 105 use vm_virtio::VirtioDeviceType; 106 use vmm_sys_util::eventfd::EventFd; 107 108 #[cfg(target_arch = "aarch64")] 109 const MMIO_LEN: u64 = 0x1000; 110 111 // Singleton devices / devices the user cannot name 112 #[cfg(target_arch = "x86_64")] 113 const IOAPIC_DEVICE_NAME: &str = "__ioapic"; 114 const SERIAL_DEVICE_NAME: &str = "__serial"; 115 #[cfg(target_arch = "aarch64")] 116 const GPIO_DEVICE_NAME: &str = "__gpio"; 117 const RNG_DEVICE_NAME: &str = "__rng"; 118 const IOMMU_DEVICE_NAME: &str = "__iommu"; 119 const BALLOON_DEVICE_NAME: &str = "__balloon"; 120 const CONSOLE_DEVICE_NAME: &str = "__console"; 121 122 // Devices that the user may name and for which we generate 123 // identifiers if the user doesn't give one 124 const DISK_DEVICE_NAME_PREFIX: &str = "_disk"; 125 const FS_DEVICE_NAME_PREFIX: &str = "_fs"; 126 const NET_DEVICE_NAME_PREFIX: &str = "_net"; 127 const PMEM_DEVICE_NAME_PREFIX: &str = "_pmem"; 128 const VDPA_DEVICE_NAME_PREFIX: &str = "_vdpa"; 129 const VSOCK_DEVICE_NAME_PREFIX: &str = "_vsock"; 130 const WATCHDOG_DEVICE_NAME: &str = "__watchdog"; 131 const VFIO_DEVICE_NAME_PREFIX: &str = "_vfio"; 132 const VFIO_USER_DEVICE_NAME_PREFIX: &str = "_vfio_user"; 133 const VIRTIO_PCI_DEVICE_NAME_PREFIX: &str = "_virtio-pci"; 134 135 /// Errors associated with device manager 136 #[derive(Debug)] 137 pub enum DeviceManagerError { 138 /// Cannot create EventFd. 139 EventFd(io::Error), 140 141 /// Cannot open disk path 142 Disk(io::Error), 143 144 /// Cannot create vhost-user-net device 145 CreateVhostUserNet(virtio_devices::vhost_user::Error), 146 147 /// Cannot create virtio-blk device 148 CreateVirtioBlock(io::Error), 149 150 /// Cannot create virtio-net device 151 CreateVirtioNet(virtio_devices::net::Error), 152 153 /// Cannot create virtio-console device 154 CreateVirtioConsole(io::Error), 155 156 /// Cannot create virtio-rng device 157 CreateVirtioRng(io::Error), 158 159 /// Cannot create virtio-fs device 160 CreateVirtioFs(virtio_devices::vhost_user::Error), 161 162 /// Virtio-fs device was created without a socket. 163 NoVirtioFsSock, 164 165 /// Cannot create vhost-user-blk device 166 CreateVhostUserBlk(virtio_devices::vhost_user::Error), 167 168 /// Cannot create virtio-pmem device 169 CreateVirtioPmem(io::Error), 170 171 /// Cannot create vDPA device 172 CreateVdpa(virtio_devices::vdpa::Error), 173 174 /// Cannot create virtio-vsock device 175 CreateVirtioVsock(io::Error), 176 177 /// Failed to convert Path to &str for the vDPA device. 178 CreateVdpaConvertPath, 179 180 /// Failed to convert Path to &str for the virtio-vsock device. 181 CreateVsockConvertPath, 182 183 /// Cannot create virtio-vsock backend 184 CreateVsockBackend(virtio_devices::vsock::VsockUnixError), 185 186 /// Cannot create virtio-iommu device 187 CreateVirtioIommu(io::Error), 188 189 /// Cannot create virtio-balloon device 190 CreateVirtioBalloon(io::Error), 191 192 /// Cannot create virtio-watchdog device 193 CreateVirtioWatchdog(io::Error), 194 195 /// Failed to parse disk image format 196 DetectImageType(io::Error), 197 198 /// Cannot open qcow disk path 199 QcowDeviceCreate(qcow::Error), 200 201 /// Cannot create serial manager 202 CreateSerialManager(SerialManagerError), 203 204 /// Cannot spawn the serial manager thread 205 SpawnSerialManager(SerialManagerError), 206 207 /// Cannot open tap interface 208 OpenTap(net_util::TapError), 209 210 /// Cannot allocate IRQ. 211 AllocateIrq, 212 213 /// Cannot configure the IRQ. 214 Irq(vmm_sys_util::errno::Error), 215 216 /// Cannot allocate PCI BARs 217 AllocateBars(pci::PciDeviceError), 218 219 /// Could not free the BARs associated with a PCI device. 220 FreePciBars(pci::PciDeviceError), 221 222 /// Cannot register ioevent. 223 RegisterIoevent(anyhow::Error), 224 225 /// Cannot unregister ioevent. 226 UnRegisterIoevent(anyhow::Error), 227 228 /// Cannot create virtio device 229 VirtioDevice(vmm_sys_util::errno::Error), 230 231 /// Cannot add PCI device 232 AddPciDevice(pci::PciRootError), 233 234 /// Cannot open persistent memory file 235 PmemFileOpen(io::Error), 236 237 /// Cannot set persistent memory file size 238 PmemFileSetLen(io::Error), 239 240 /// Cannot find a memory range for persistent memory 241 PmemRangeAllocation, 242 243 /// Cannot find a memory range for virtio-fs 244 FsRangeAllocation, 245 246 /// Error creating serial output file 247 SerialOutputFileOpen(io::Error), 248 249 /// Error creating console output file 250 ConsoleOutputFileOpen(io::Error), 251 252 /// Error creating serial pty 253 SerialPtyOpen(io::Error), 254 255 /// Error creating console pty 256 ConsolePtyOpen(io::Error), 257 258 /// Error setting pty raw mode 259 SetPtyRaw(vmm_sys_util::errno::Error), 260 261 /// Error getting pty peer 262 GetPtyPeer(vmm_sys_util::errno::Error), 263 264 /// Cannot create a VFIO device 265 VfioCreate(vfio_ioctls::VfioError), 266 267 /// Cannot create a VFIO PCI device 268 VfioPciCreate(pci::VfioPciError), 269 270 /// Failed to map VFIO MMIO region. 271 VfioMapRegion(pci::VfioPciError), 272 273 /// Failed to DMA map VFIO device. 274 VfioDmaMap(vfio_ioctls::VfioError), 275 276 /// Failed to DMA unmap VFIO device. 277 VfioDmaUnmap(pci::VfioPciError), 278 279 /// Failed to create the passthrough device. 280 CreatePassthroughDevice(anyhow::Error), 281 282 /// Failed to memory map. 283 Mmap(io::Error), 284 285 /// Cannot add legacy device to Bus. 286 BusError(vm_device::BusError), 287 288 /// Failed to allocate IO port 289 AllocateIoPort, 290 291 /// Failed to allocate MMIO address 292 AllocateMmioAddress, 293 294 /// Failed to make hotplug notification 295 HotPlugNotification(io::Error), 296 297 /// Error from a memory manager operation 298 MemoryManager(MemoryManagerError), 299 300 /// Failed to create new interrupt source group. 301 CreateInterruptGroup(io::Error), 302 303 /// Failed to update interrupt source group. 304 UpdateInterruptGroup(io::Error), 305 306 /// Failed to create interrupt controller. 307 CreateInterruptController(interrupt_controller::Error), 308 309 /// Failed to create a new MmapRegion instance. 310 NewMmapRegion(vm_memory::mmap::MmapRegionError), 311 312 /// Failed to clone a File. 313 CloneFile(io::Error), 314 315 /// Failed to create socket file 316 CreateSocketFile(io::Error), 317 318 /// Failed to spawn the network backend 319 SpawnNetBackend(io::Error), 320 321 /// Failed to spawn the block backend 322 SpawnBlockBackend(io::Error), 323 324 /// Missing PCI bus. 325 NoPciBus, 326 327 /// Could not find an available device name. 328 NoAvailableDeviceName, 329 330 /// Missing PCI device. 331 MissingPciDevice, 332 333 /// Failed to remove a PCI device from the PCI bus. 334 RemoveDeviceFromPciBus(pci::PciRootError), 335 336 /// Failed to remove a bus device from the IO bus. 337 RemoveDeviceFromIoBus(vm_device::BusError), 338 339 /// Failed to remove a bus device from the MMIO bus. 340 RemoveDeviceFromMmioBus(vm_device::BusError), 341 342 /// Failed to find the device corresponding to a specific PCI b/d/f. 343 UnknownPciBdf(u32), 344 345 /// Not allowed to remove this type of device from the VM. 346 RemovalNotAllowed(vm_virtio::VirtioDeviceType), 347 348 /// Failed to find device corresponding to the given identifier. 349 UnknownDeviceId(String), 350 351 /// Failed to find an available PCI device ID. 352 NextPciDeviceId(pci::PciRootError), 353 354 /// Could not reserve the PCI device ID. 355 GetPciDeviceId(pci::PciRootError), 356 357 /// Could not give the PCI device ID back. 358 PutPciDeviceId(pci::PciRootError), 359 360 /// No disk path was specified when one was expected 361 NoDiskPath, 362 363 /// Failed to update guest memory for virtio device. 364 UpdateMemoryForVirtioDevice(virtio_devices::Error), 365 366 /// Cannot create virtio-mem device 367 CreateVirtioMem(io::Error), 368 369 /// Cannot generate a ResizeSender from the Resize object. 370 CreateResizeSender(virtio_devices::mem::Error), 371 372 /// Cannot find a memory range for virtio-mem memory 373 VirtioMemRangeAllocation, 374 375 /// Failed to update guest memory for VFIO PCI device. 376 UpdateMemoryForVfioPciDevice(vfio_ioctls::VfioError), 377 378 /// Trying to use a directory for pmem but no size specified 379 PmemWithDirectorySizeMissing, 380 381 /// Trying to use a size that is not multiple of 2MiB 382 PmemSizeNotAligned, 383 384 /// Could not find the node in the device tree. 385 MissingNode, 386 387 /// Resource was already found. 388 ResourceAlreadyExists, 389 390 /// Expected resources for virtio-pmem could not be found. 391 MissingVirtioPmemResources, 392 393 /// Missing PCI b/d/f from the DeviceNode. 394 MissingDeviceNodePciBdf, 395 396 /// No support for device passthrough 397 NoDevicePassthroughSupport, 398 399 /// Failed to resize virtio-balloon 400 VirtioBalloonResize(virtio_devices::balloon::Error), 401 402 /// Missing virtio-balloon, can't proceed as expected. 403 MissingVirtioBalloon, 404 405 /// Missing virtual IOMMU device 406 MissingVirtualIommu, 407 408 /// Failed to do power button notification 409 PowerButtonNotification(io::Error), 410 411 /// Failed to do AArch64 GPIO power button notification 412 #[cfg(target_arch = "aarch64")] 413 AArch64PowerButtonNotification(devices::legacy::GpioDeviceError), 414 415 /// Failed to set O_DIRECT flag to file descriptor 416 SetDirectIo, 417 418 /// Failed to create FixedVhdDiskAsync 419 CreateFixedVhdDiskAsync(io::Error), 420 421 /// Failed to create FixedVhdDiskSync 422 CreateFixedVhdDiskSync(io::Error), 423 424 /// Failed to create QcowDiskSync 425 CreateQcowDiskSync(qcow::Error), 426 427 /// Failed to create FixedVhdxDiskSync 428 CreateFixedVhdxDiskSync(vhdx::vhdx::VhdxError), 429 430 /// Failed to add DMA mapping handler to virtio-mem device. 431 AddDmaMappingHandlerVirtioMem(virtio_devices::mem::Error), 432 433 /// Failed to remove DMA mapping handler from virtio-mem device. 434 RemoveDmaMappingHandlerVirtioMem(virtio_devices::mem::Error), 435 436 /// Failed to create vfio-user client 437 VfioUserCreateClient(vfio_user::Error), 438 439 /// Failed to create VFIO user device 440 VfioUserCreate(VfioUserPciDeviceError), 441 442 /// Failed to map region from VFIO user device into guest 443 VfioUserMapRegion(VfioUserPciDeviceError), 444 445 /// Failed to DMA map VFIO user device. 446 VfioUserDmaMap(VfioUserPciDeviceError), 447 448 /// Failed to DMA unmap VFIO user device. 449 VfioUserDmaUnmap(VfioUserPciDeviceError), 450 451 /// Failed to update memory mappings for VFIO user device 452 UpdateMemoryForVfioUserPciDevice(VfioUserPciDeviceError), 453 454 /// Cannot duplicate file descriptor 455 DupFd(vmm_sys_util::errno::Error), 456 457 /// Failed to DMA map virtio device. 458 VirtioDmaMap(std::io::Error), 459 460 /// Failed to DMA unmap virtio device. 461 VirtioDmaUnmap(std::io::Error), 462 463 /// Cannot hotplug device behind vIOMMU 464 InvalidIommuHotplug, 465 466 /// Failed to create UEFI flash 467 CreateUefiFlash(HypervisorVmError), 468 469 /// Invalid identifier as it is not unique. 470 IdentifierNotUnique(String), 471 472 /// Invalid identifier 473 InvalidIdentifier(String), 474 475 /// Error activating virtio device 476 VirtioActivate(ActivateError), 477 } 478 pub type DeviceManagerResult<T> = result::Result<T, DeviceManagerError>; 479 480 const DEVICE_MANAGER_ACPI_SIZE: usize = 0x10; 481 482 const TIOCSPTLCK: libc::c_int = 0x4004_5431; 483 const TIOCGTPEER: libc::c_int = 0x5441; 484 485 pub fn create_pty(non_blocking: bool) -> io::Result<(File, File, PathBuf)> { 486 // Try to use /dev/pts/ptmx first then fall back to /dev/ptmx 487 // This is done to try and use the devpts filesystem that 488 // could be available for use in the process's namespace first. 489 // Ideally these are all the same file though but different 490 // kernels could have things setup differently. 491 // See https://www.kernel.org/doc/Documentation/filesystems/devpts.txt 492 // for further details. 493 494 let custom_flags = libc::O_NOCTTY | if non_blocking { libc::O_NONBLOCK } else { 0 }; 495 let main = match OpenOptions::new() 496 .read(true) 497 .write(true) 498 .custom_flags(custom_flags) 499 .open("/dev/pts/ptmx") 500 { 501 Ok(f) => f, 502 _ => OpenOptions::new() 503 .read(true) 504 .write(true) 505 .custom_flags(custom_flags) 506 .open("/dev/ptmx")?, 507 }; 508 let mut unlock: libc::c_ulong = 0; 509 // SAFETY: FFI call into libc, trivially safe 510 unsafe { 511 libc::ioctl( 512 main.as_raw_fd(), 513 TIOCSPTLCK.try_into().unwrap(), 514 &mut unlock, 515 ) 516 }; 517 518 // SAFETY: FFI call into libc, trivally safe 519 let sub_fd = unsafe { 520 libc::ioctl( 521 main.as_raw_fd(), 522 TIOCGTPEER.try_into().unwrap(), 523 libc::O_NOCTTY | libc::O_RDWR, 524 ) 525 }; 526 if sub_fd == -1 { 527 return vmm_sys_util::errno::errno_result().map_err(|e| e.into()); 528 } 529 530 let proc_path = PathBuf::from(format!("/proc/self/fd/{}", sub_fd)); 531 let path = read_link(proc_path)?; 532 533 // SAFETY: sub_fd is checked to be valid before being wrapped in File 534 Ok((main, unsafe { File::from_raw_fd(sub_fd) }, path)) 535 } 536 537 #[derive(Default)] 538 pub struct Console { 539 console_resizer: Option<Arc<virtio_devices::ConsoleResizer>>, 540 } 541 542 impl Console { 543 pub fn update_console_size(&self) { 544 if let Some(resizer) = self.console_resizer.as_ref() { 545 resizer.update_console_size() 546 } 547 } 548 } 549 550 pub(crate) struct AddressManager { 551 pub(crate) allocator: Arc<Mutex<SystemAllocator>>, 552 #[cfg(target_arch = "x86_64")] 553 pub(crate) io_bus: Arc<Bus>, 554 pub(crate) mmio_bus: Arc<Bus>, 555 vm: Arc<dyn hypervisor::Vm>, 556 device_tree: Arc<Mutex<DeviceTree>>, 557 pci_mmio_allocators: Vec<Arc<Mutex<AddressAllocator>>>, 558 } 559 560 impl DeviceRelocation for AddressManager { 561 fn move_bar( 562 &self, 563 old_base: u64, 564 new_base: u64, 565 len: u64, 566 pci_dev: &mut dyn PciDevice, 567 region_type: PciBarRegionType, 568 ) -> std::result::Result<(), std::io::Error> { 569 match region_type { 570 PciBarRegionType::IoRegion => { 571 #[cfg(target_arch = "x86_64")] 572 { 573 // Update system allocator 574 self.allocator 575 .lock() 576 .unwrap() 577 .free_io_addresses(GuestAddress(old_base), len as GuestUsize); 578 579 self.allocator 580 .lock() 581 .unwrap() 582 .allocate_io_addresses( 583 Some(GuestAddress(new_base)), 584 len as GuestUsize, 585 None, 586 ) 587 .ok_or_else(|| { 588 io::Error::new(io::ErrorKind::Other, "failed allocating new IO range") 589 })?; 590 591 // Update PIO bus 592 self.io_bus 593 .update_range(old_base, len, new_base, len) 594 .map_err(|e| io::Error::new(io::ErrorKind::Other, e))?; 595 } 596 #[cfg(target_arch = "aarch64")] 597 error!("I/O region is not supported"); 598 } 599 PciBarRegionType::Memory32BitRegion | PciBarRegionType::Memory64BitRegion => { 600 // Update system allocator 601 if region_type == PciBarRegionType::Memory32BitRegion { 602 self.allocator 603 .lock() 604 .unwrap() 605 .free_mmio_hole_addresses(GuestAddress(old_base), len as GuestUsize); 606 607 self.allocator 608 .lock() 609 .unwrap() 610 .allocate_mmio_hole_addresses( 611 Some(GuestAddress(new_base)), 612 len as GuestUsize, 613 Some(len), 614 ) 615 .ok_or_else(|| { 616 io::Error::new( 617 io::ErrorKind::Other, 618 "failed allocating new 32 bits MMIO range", 619 ) 620 })?; 621 } else { 622 // Find the specific allocator that this BAR was allocated from and use it for new one 623 for allocator in &self.pci_mmio_allocators { 624 let allocator_base = allocator.lock().unwrap().base(); 625 let allocator_end = allocator.lock().unwrap().end(); 626 627 if old_base >= allocator_base.0 && old_base <= allocator_end.0 { 628 allocator 629 .lock() 630 .unwrap() 631 .free(GuestAddress(old_base), len as GuestUsize); 632 633 allocator 634 .lock() 635 .unwrap() 636 .allocate( 637 Some(GuestAddress(new_base)), 638 len as GuestUsize, 639 Some(len), 640 ) 641 .ok_or_else(|| { 642 io::Error::new( 643 io::ErrorKind::Other, 644 "failed allocating new 64 bits MMIO range", 645 ) 646 })?; 647 648 break; 649 } 650 } 651 } 652 653 // Update MMIO bus 654 self.mmio_bus 655 .update_range(old_base, len, new_base, len) 656 .map_err(|e| io::Error::new(io::ErrorKind::Other, e))?; 657 } 658 } 659 660 // Update the device_tree resources associated with the device 661 if let Some(id) = pci_dev.id() { 662 if let Some(node) = self.device_tree.lock().unwrap().get_mut(&id) { 663 let mut resource_updated = false; 664 for resource in node.resources.iter_mut() { 665 if let Resource::PciBar { base, type_, .. } = resource { 666 if PciBarRegionType::from(*type_) == region_type && *base == old_base { 667 *base = new_base; 668 resource_updated = true; 669 break; 670 } 671 } 672 } 673 674 if !resource_updated { 675 return Err(io::Error::new( 676 io::ErrorKind::Other, 677 format!( 678 "Couldn't find a resource with base 0x{:x} for device {}", 679 old_base, id 680 ), 681 )); 682 } 683 } else { 684 return Err(io::Error::new( 685 io::ErrorKind::Other, 686 format!("Couldn't find device {} from device tree", id), 687 )); 688 } 689 } 690 691 let any_dev = pci_dev.as_any(); 692 if let Some(virtio_pci_dev) = any_dev.downcast_ref::<VirtioPciDevice>() { 693 let bar_addr = virtio_pci_dev.config_bar_addr(); 694 if bar_addr == new_base { 695 for (event, addr) in virtio_pci_dev.ioeventfds(old_base) { 696 let io_addr = IoEventAddress::Mmio(addr); 697 self.vm.unregister_ioevent(event, &io_addr).map_err(|e| { 698 io::Error::new( 699 io::ErrorKind::Other, 700 format!("failed to unregister ioevent: {:?}", e), 701 ) 702 })?; 703 } 704 for (event, addr) in virtio_pci_dev.ioeventfds(new_base) { 705 let io_addr = IoEventAddress::Mmio(addr); 706 self.vm 707 .register_ioevent(event, &io_addr, None) 708 .map_err(|e| { 709 io::Error::new( 710 io::ErrorKind::Other, 711 format!("failed to register ioevent: {:?}", e), 712 ) 713 })?; 714 } 715 } else { 716 let virtio_dev = virtio_pci_dev.virtio_device(); 717 let mut virtio_dev = virtio_dev.lock().unwrap(); 718 if let Some(mut shm_regions) = virtio_dev.get_shm_regions() { 719 if shm_regions.addr.raw_value() == old_base { 720 let mem_region = self.vm.make_user_memory_region( 721 shm_regions.mem_slot, 722 old_base, 723 shm_regions.len, 724 shm_regions.host_addr, 725 false, 726 false, 727 ); 728 729 self.vm.remove_user_memory_region(mem_region).map_err(|e| { 730 io::Error::new( 731 io::ErrorKind::Other, 732 format!("failed to remove user memory region: {:?}", e), 733 ) 734 })?; 735 736 // Create new mapping by inserting new region to KVM. 737 let mem_region = self.vm.make_user_memory_region( 738 shm_regions.mem_slot, 739 new_base, 740 shm_regions.len, 741 shm_regions.host_addr, 742 false, 743 false, 744 ); 745 746 self.vm.create_user_memory_region(mem_region).map_err(|e| { 747 io::Error::new( 748 io::ErrorKind::Other, 749 format!("failed to create user memory regions: {:?}", e), 750 ) 751 })?; 752 753 // Update shared memory regions to reflect the new mapping. 754 shm_regions.addr = GuestAddress(new_base); 755 virtio_dev.set_shm_regions(shm_regions).map_err(|e| { 756 io::Error::new( 757 io::ErrorKind::Other, 758 format!("failed to update shared memory regions: {:?}", e), 759 ) 760 })?; 761 } 762 } 763 } 764 } 765 766 pci_dev.move_bar(old_base, new_base) 767 } 768 } 769 770 #[derive(Serialize, Deserialize)] 771 struct DeviceManagerState { 772 device_tree: DeviceTree, 773 device_id_cnt: Wrapping<usize>, 774 } 775 776 #[derive(Debug)] 777 pub struct PtyPair { 778 pub main: File, 779 pub sub: File, 780 pub path: PathBuf, 781 } 782 783 impl Clone for PtyPair { 784 fn clone(&self) -> Self { 785 PtyPair { 786 main: self.main.try_clone().unwrap(), 787 sub: self.sub.try_clone().unwrap(), 788 path: self.path.clone(), 789 } 790 } 791 } 792 793 #[derive(Clone)] 794 pub enum PciDeviceHandle { 795 Vfio(Arc<Mutex<VfioPciDevice>>), 796 Virtio(Arc<Mutex<VirtioPciDevice>>), 797 VfioUser(Arc<Mutex<VfioUserPciDevice>>), 798 } 799 800 #[derive(Clone)] 801 struct MetaVirtioDevice { 802 virtio_device: Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 803 iommu: bool, 804 id: String, 805 pci_segment: u16, 806 dma_handler: Option<Arc<dyn ExternalDmaMapping>>, 807 } 808 809 pub struct DeviceManager { 810 // Manage address space related to devices 811 address_manager: Arc<AddressManager>, 812 813 // Console abstraction 814 console: Arc<Console>, 815 816 // console PTY 817 console_pty: Option<Arc<Mutex<PtyPair>>>, 818 819 // serial PTY 820 serial_pty: Option<Arc<Mutex<PtyPair>>>, 821 822 // Serial Manager 823 serial_manager: Option<Arc<SerialManager>>, 824 825 // pty foreground status, 826 console_resize_pipe: Option<Arc<File>>, 827 828 // Interrupt controller 829 #[cfg(target_arch = "x86_64")] 830 interrupt_controller: Option<Arc<Mutex<ioapic::Ioapic>>>, 831 #[cfg(target_arch = "aarch64")] 832 interrupt_controller: Option<Arc<Mutex<gic::Gic>>>, 833 834 // Things to be added to the commandline (e.g. aarch64 early console) 835 #[cfg(target_arch = "aarch64")] 836 cmdline_additions: Vec<String>, 837 838 // ACPI GED notification device 839 ged_notification_device: Option<Arc<Mutex<devices::AcpiGedDevice>>>, 840 841 // VM configuration 842 config: Arc<Mutex<VmConfig>>, 843 844 // Memory Manager 845 memory_manager: Arc<Mutex<MemoryManager>>, 846 847 // The virtio devices on the system 848 virtio_devices: Vec<MetaVirtioDevice>, 849 850 // List of bus devices 851 // Let the DeviceManager keep strong references to the BusDevice devices. 852 // This allows the IO and MMIO buses to be provided with Weak references, 853 // which prevents cyclic dependencies. 854 bus_devices: Vec<Arc<Mutex<dyn BusDevice>>>, 855 856 // Counter to keep track of the consumed device IDs. 857 device_id_cnt: Wrapping<usize>, 858 859 pci_segments: Vec<PciSegment>, 860 861 #[cfg_attr(target_arch = "aarch64", allow(dead_code))] 862 // MSI Interrupt Manager 863 msi_interrupt_manager: Arc<dyn InterruptManager<GroupConfig = MsiIrqGroupConfig>>, 864 865 #[cfg_attr(feature = "mshv", allow(dead_code))] 866 // Legacy Interrupt Manager 867 legacy_interrupt_manager: Option<Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>>, 868 869 // Passthrough device handle 870 passthrough_device: Option<Arc<dyn hypervisor::Device>>, 871 872 // VFIO container 873 // Only one container can be created, therefore it is stored as part of the 874 // DeviceManager to be reused. 875 vfio_container: Option<Arc<VfioContainer>>, 876 877 // Paravirtualized IOMMU 878 iommu_device: Option<Arc<Mutex<virtio_devices::Iommu>>>, 879 iommu_mapping: Option<Arc<IommuMapping>>, 880 881 // PCI information about devices attached to the paravirtualized IOMMU 882 // It contains the virtual IOMMU PCI BDF along with the list of PCI BDF 883 // representing the devices attached to the virtual IOMMU. This is useful 884 // information for filling the ACPI VIOT table. 885 iommu_attached_devices: Option<(PciBdf, Vec<PciBdf>)>, 886 887 // Tree of devices, representing the dependencies between devices. 888 // Useful for introspection, snapshot and restore. 889 device_tree: Arc<Mutex<DeviceTree>>, 890 891 // Exit event 892 exit_evt: EventFd, 893 reset_evt: EventFd, 894 895 #[cfg(target_arch = "aarch64")] 896 id_to_dev_info: HashMap<(DeviceType, String), MmioDeviceInfo>, 897 898 // seccomp action 899 seccomp_action: SeccompAction, 900 901 // List of guest NUMA nodes. 902 numa_nodes: NumaNodes, 903 904 // Possible handle to the virtio-balloon device 905 balloon: Option<Arc<Mutex<virtio_devices::Balloon>>>, 906 907 // Virtio Device activation EventFd to allow the VMM thread to trigger device 908 // activation and thus start the threads from the VMM thread 909 activate_evt: EventFd, 910 911 acpi_address: GuestAddress, 912 913 selected_segment: usize, 914 915 // Possible handle to the virtio-mem device 916 virtio_mem_devices: Vec<Arc<Mutex<virtio_devices::Mem>>>, 917 918 #[cfg(target_arch = "aarch64")] 919 // GPIO device for AArch64 920 gpio_device: Option<Arc<Mutex<devices::legacy::Gpio>>>, 921 922 #[cfg(target_arch = "aarch64")] 923 // Flash device for UEFI on AArch64 924 uefi_flash: Option<GuestMemoryAtomic<GuestMemoryMmap>>, 925 926 // Flag to force setting the iommu on virtio devices 927 force_iommu: bool, 928 929 // Helps identify if the VM is currently being restored 930 restoring: bool, 931 932 // io_uring availability if detected 933 io_uring_supported: Option<bool>, 934 935 // List of unique identifiers provided at boot through the configuration. 936 boot_id_list: BTreeSet<String>, 937 938 // Start time of the VM 939 timestamp: Instant, 940 941 // Pending activations 942 pending_activations: Arc<Mutex<Vec<VirtioPciDeviceActivator>>>, 943 } 944 945 impl DeviceManager { 946 #[allow(clippy::too_many_arguments)] 947 pub fn new( 948 vm: Arc<dyn hypervisor::Vm>, 949 config: Arc<Mutex<VmConfig>>, 950 memory_manager: Arc<Mutex<MemoryManager>>, 951 exit_evt: &EventFd, 952 reset_evt: &EventFd, 953 seccomp_action: SeccompAction, 954 numa_nodes: NumaNodes, 955 activate_evt: &EventFd, 956 force_iommu: bool, 957 restoring: bool, 958 boot_id_list: BTreeSet<String>, 959 timestamp: Instant, 960 ) -> DeviceManagerResult<Arc<Mutex<Self>>> { 961 let device_tree = Arc::new(Mutex::new(DeviceTree::new())); 962 963 let num_pci_segments = 964 if let Some(platform_config) = config.lock().unwrap().platform.as_ref() { 965 platform_config.num_pci_segments 966 } else { 967 1 968 }; 969 970 let start_of_device_area = memory_manager.lock().unwrap().start_of_device_area().0; 971 let end_of_device_area = memory_manager.lock().unwrap().end_of_device_area().0; 972 973 // Start each PCI segment range on a 4GiB boundary 974 let pci_segment_size = (end_of_device_area - start_of_device_area + 1) 975 / ((4 << 30) * num_pci_segments as u64) 976 * (4 << 30); 977 978 let mut pci_mmio_allocators = vec![]; 979 for i in 0..num_pci_segments as u64 { 980 let mmio_start = start_of_device_area + i * pci_segment_size; 981 let allocator = Arc::new(Mutex::new( 982 AddressAllocator::new(GuestAddress(mmio_start), pci_segment_size).unwrap(), 983 )); 984 pci_mmio_allocators.push(allocator) 985 } 986 987 let address_manager = Arc::new(AddressManager { 988 allocator: memory_manager.lock().unwrap().allocator(), 989 #[cfg(target_arch = "x86_64")] 990 io_bus: Arc::new(Bus::new()), 991 mmio_bus: Arc::new(Bus::new()), 992 vm: vm.clone(), 993 device_tree: Arc::clone(&device_tree), 994 pci_mmio_allocators, 995 }); 996 997 // First we create the MSI interrupt manager, the legacy one is created 998 // later, after the IOAPIC device creation. 999 // The reason we create the MSI one first is because the IOAPIC needs it, 1000 // and then the legacy interrupt manager needs an IOAPIC. So we're 1001 // handling a linear dependency chain: 1002 // msi_interrupt_manager <- IOAPIC <- legacy_interrupt_manager. 1003 let msi_interrupt_manager: Arc<dyn InterruptManager<GroupConfig = MsiIrqGroupConfig>> = 1004 Arc::new(MsiInterruptManager::new( 1005 Arc::clone(&address_manager.allocator), 1006 vm, 1007 )); 1008 1009 let acpi_address = address_manager 1010 .allocator 1011 .lock() 1012 .unwrap() 1013 .allocate_platform_mmio_addresses(None, DEVICE_MANAGER_ACPI_SIZE as u64, None) 1014 .ok_or(DeviceManagerError::AllocateIoPort)?; 1015 1016 let mut pci_irq_slots = [0; 32]; 1017 PciSegment::reserve_legacy_interrupts_for_pci_devices( 1018 &address_manager, 1019 &mut pci_irq_slots, 1020 )?; 1021 1022 let mut pci_segments = vec![PciSegment::new_default_segment( 1023 &address_manager, 1024 Arc::clone(&address_manager.pci_mmio_allocators[0]), 1025 &pci_irq_slots, 1026 )?]; 1027 1028 for i in 1..num_pci_segments as usize { 1029 pci_segments.push(PciSegment::new( 1030 i as u16, 1031 &address_manager, 1032 Arc::clone(&address_manager.pci_mmio_allocators[i]), 1033 &pci_irq_slots, 1034 )?); 1035 } 1036 1037 let device_manager = DeviceManager { 1038 address_manager: Arc::clone(&address_manager), 1039 console: Arc::new(Console::default()), 1040 interrupt_controller: None, 1041 #[cfg(target_arch = "aarch64")] 1042 cmdline_additions: Vec::new(), 1043 ged_notification_device: None, 1044 config, 1045 memory_manager, 1046 virtio_devices: Vec::new(), 1047 bus_devices: Vec::new(), 1048 device_id_cnt: Wrapping(0), 1049 msi_interrupt_manager, 1050 legacy_interrupt_manager: None, 1051 passthrough_device: None, 1052 vfio_container: None, 1053 iommu_device: None, 1054 iommu_mapping: None, 1055 iommu_attached_devices: None, 1056 pci_segments, 1057 device_tree, 1058 exit_evt: exit_evt.try_clone().map_err(DeviceManagerError::EventFd)?, 1059 reset_evt: reset_evt.try_clone().map_err(DeviceManagerError::EventFd)?, 1060 #[cfg(target_arch = "aarch64")] 1061 id_to_dev_info: HashMap::new(), 1062 seccomp_action, 1063 numa_nodes, 1064 balloon: None, 1065 activate_evt: activate_evt 1066 .try_clone() 1067 .map_err(DeviceManagerError::EventFd)?, 1068 acpi_address, 1069 selected_segment: 0, 1070 serial_pty: None, 1071 serial_manager: None, 1072 console_pty: None, 1073 console_resize_pipe: None, 1074 virtio_mem_devices: Vec::new(), 1075 #[cfg(target_arch = "aarch64")] 1076 gpio_device: None, 1077 #[cfg(target_arch = "aarch64")] 1078 uefi_flash: None, 1079 force_iommu, 1080 restoring, 1081 io_uring_supported: None, 1082 boot_id_list, 1083 timestamp, 1084 pending_activations: Arc::new(Mutex::new(Vec::default())), 1085 }; 1086 1087 let device_manager = Arc::new(Mutex::new(device_manager)); 1088 1089 address_manager 1090 .mmio_bus 1091 .insert( 1092 Arc::clone(&device_manager) as Arc<Mutex<dyn BusDevice>>, 1093 acpi_address.0, 1094 DEVICE_MANAGER_ACPI_SIZE as u64, 1095 ) 1096 .map_err(DeviceManagerError::BusError)?; 1097 1098 Ok(device_manager) 1099 } 1100 1101 pub fn serial_pty(&self) -> Option<PtyPair> { 1102 self.serial_pty 1103 .as_ref() 1104 .map(|pty| pty.lock().unwrap().clone()) 1105 } 1106 1107 pub fn console_pty(&self) -> Option<PtyPair> { 1108 self.console_pty 1109 .as_ref() 1110 .map(|pty| pty.lock().unwrap().clone()) 1111 } 1112 1113 pub fn console_resize_pipe(&self) -> Option<Arc<File>> { 1114 self.console_resize_pipe.as_ref().map(Arc::clone) 1115 } 1116 1117 pub fn create_devices( 1118 &mut self, 1119 serial_pty: Option<PtyPair>, 1120 console_pty: Option<PtyPair>, 1121 console_resize_pipe: Option<File>, 1122 ) -> DeviceManagerResult<()> { 1123 let mut virtio_devices: Vec<MetaVirtioDevice> = Vec::new(); 1124 1125 let interrupt_controller = self.add_interrupt_controller()?; 1126 1127 // Now we can create the legacy interrupt manager, which needs the freshly 1128 // formed IOAPIC device. 1129 let legacy_interrupt_manager: Arc< 1130 dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>, 1131 > = Arc::new(LegacyUserspaceInterruptManager::new(Arc::clone( 1132 &interrupt_controller, 1133 ))); 1134 1135 { 1136 if let Some(acpi_address) = self.memory_manager.lock().unwrap().acpi_address() { 1137 self.address_manager 1138 .mmio_bus 1139 .insert( 1140 Arc::clone(&self.memory_manager) as Arc<Mutex<dyn BusDevice>>, 1141 acpi_address.0, 1142 MEMORY_MANAGER_ACPI_SIZE as u64, 1143 ) 1144 .map_err(DeviceManagerError::BusError)?; 1145 } 1146 } 1147 1148 #[cfg(target_arch = "x86_64")] 1149 self.add_legacy_devices( 1150 self.reset_evt 1151 .try_clone() 1152 .map_err(DeviceManagerError::EventFd)?, 1153 )?; 1154 1155 #[cfg(target_arch = "aarch64")] 1156 self.add_legacy_devices(&legacy_interrupt_manager)?; 1157 1158 { 1159 self.ged_notification_device = self.add_acpi_devices( 1160 &legacy_interrupt_manager, 1161 self.reset_evt 1162 .try_clone() 1163 .map_err(DeviceManagerError::EventFd)?, 1164 self.exit_evt 1165 .try_clone() 1166 .map_err(DeviceManagerError::EventFd)?, 1167 )?; 1168 } 1169 1170 self.console = self.add_console_device( 1171 &legacy_interrupt_manager, 1172 &mut virtio_devices, 1173 serial_pty, 1174 console_pty, 1175 console_resize_pipe, 1176 )?; 1177 1178 self.legacy_interrupt_manager = Some(legacy_interrupt_manager); 1179 1180 virtio_devices.append(&mut self.make_virtio_devices()?); 1181 1182 self.add_pci_devices(virtio_devices.clone())?; 1183 1184 self.virtio_devices = virtio_devices; 1185 1186 Ok(()) 1187 } 1188 1189 fn state(&self) -> DeviceManagerState { 1190 DeviceManagerState { 1191 device_tree: self.device_tree.lock().unwrap().clone(), 1192 device_id_cnt: self.device_id_cnt, 1193 } 1194 } 1195 1196 fn set_state(&mut self, state: &DeviceManagerState) { 1197 *self.device_tree.lock().unwrap() = state.device_tree.clone(); 1198 self.device_id_cnt = state.device_id_cnt; 1199 } 1200 1201 fn get_msi_iova_space(&mut self) -> (u64, u64) { 1202 #[cfg(target_arch = "aarch64")] 1203 { 1204 let vcpus = self.config.lock().unwrap().cpus.boot_vcpus; 1205 let msi_start = arch::layout::GIC_V3_DIST_START.raw_value() 1206 - arch::layout::GIC_V3_REDIST_SIZE * (vcpus as u64) 1207 - arch::layout::GIC_V3_ITS_SIZE; 1208 let msi_end = msi_start + arch::layout::GIC_V3_ITS_SIZE - 1; 1209 (msi_start, msi_end) 1210 } 1211 #[cfg(target_arch = "x86_64")] 1212 (0xfee0_0000, 0xfeef_ffff) 1213 } 1214 1215 #[cfg(target_arch = "aarch64")] 1216 /// Gets the information of the devices registered up to some point in time. 1217 pub fn get_device_info(&self) -> &HashMap<(DeviceType, String), MmioDeviceInfo> { 1218 &self.id_to_dev_info 1219 } 1220 1221 #[allow(unused_variables)] 1222 fn add_pci_devices( 1223 &mut self, 1224 virtio_devices: Vec<MetaVirtioDevice>, 1225 ) -> DeviceManagerResult<()> { 1226 let iommu_id = String::from(IOMMU_DEVICE_NAME); 1227 1228 let iommu_device = if self.config.lock().unwrap().iommu { 1229 let (device, mapping) = virtio_devices::Iommu::new( 1230 iommu_id.clone(), 1231 self.seccomp_action.clone(), 1232 self.exit_evt 1233 .try_clone() 1234 .map_err(DeviceManagerError::EventFd)?, 1235 self.get_msi_iova_space(), 1236 ) 1237 .map_err(DeviceManagerError::CreateVirtioIommu)?; 1238 let device = Arc::new(Mutex::new(device)); 1239 self.iommu_device = Some(Arc::clone(&device)); 1240 self.iommu_mapping = Some(mapping); 1241 1242 // Fill the device tree with a new node. In case of restore, we 1243 // know there is nothing to do, so we can simply override the 1244 // existing entry. 1245 self.device_tree 1246 .lock() 1247 .unwrap() 1248 .insert(iommu_id.clone(), device_node!(iommu_id, device)); 1249 1250 Some(device) 1251 } else { 1252 None 1253 }; 1254 1255 let mut iommu_attached_devices = Vec::new(); 1256 { 1257 for handle in virtio_devices { 1258 let mapping: Option<Arc<IommuMapping>> = if handle.iommu { 1259 self.iommu_mapping.clone() 1260 } else { 1261 None 1262 }; 1263 1264 let dev_id = self.add_virtio_pci_device( 1265 handle.virtio_device, 1266 &mapping, 1267 handle.id, 1268 handle.pci_segment, 1269 handle.dma_handler, 1270 )?; 1271 1272 if handle.iommu { 1273 iommu_attached_devices.push(dev_id); 1274 } 1275 } 1276 1277 let mut vfio_iommu_device_ids = self.add_vfio_devices()?; 1278 iommu_attached_devices.append(&mut vfio_iommu_device_ids); 1279 1280 let mut vfio_user_iommu_device_ids = self.add_user_devices()?; 1281 iommu_attached_devices.append(&mut vfio_user_iommu_device_ids); 1282 1283 // Add all devices from forced iommu segments 1284 if let Some(platform_config) = self.config.lock().unwrap().platform.as_ref() { 1285 if let Some(iommu_segments) = platform_config.iommu_segments.as_ref() { 1286 for segment in iommu_segments { 1287 for device in 0..32 { 1288 let bdf = PciBdf::new(*segment, 0, device, 0); 1289 if !iommu_attached_devices.contains(&bdf) { 1290 iommu_attached_devices.push(bdf); 1291 } 1292 } 1293 } 1294 } 1295 } 1296 1297 if let Some(iommu_device) = iommu_device { 1298 let dev_id = self.add_virtio_pci_device(iommu_device, &None, iommu_id, 0, None)?; 1299 self.iommu_attached_devices = Some((dev_id, iommu_attached_devices)); 1300 } 1301 } 1302 1303 for segment in &self.pci_segments { 1304 #[cfg(target_arch = "x86_64")] 1305 if let Some(pci_config_io) = segment.pci_config_io.as_ref() { 1306 self.bus_devices 1307 .push(Arc::clone(pci_config_io) as Arc<Mutex<dyn BusDevice>>); 1308 } 1309 1310 self.bus_devices 1311 .push(Arc::clone(&segment.pci_config_mmio) as Arc<Mutex<dyn BusDevice>>); 1312 } 1313 1314 Ok(()) 1315 } 1316 1317 #[cfg(target_arch = "aarch64")] 1318 fn add_interrupt_controller( 1319 &mut self, 1320 ) -> DeviceManagerResult<Arc<Mutex<dyn InterruptController>>> { 1321 let interrupt_controller: Arc<Mutex<gic::Gic>> = Arc::new(Mutex::new( 1322 gic::Gic::new( 1323 self.config.lock().unwrap().cpus.boot_vcpus, 1324 Arc::clone(&self.msi_interrupt_manager), 1325 ) 1326 .map_err(DeviceManagerError::CreateInterruptController)?, 1327 )); 1328 1329 self.interrupt_controller = Some(interrupt_controller.clone()); 1330 1331 // Unlike x86_64, the "interrupt_controller" here for AArch64 is only 1332 // a `Gic` object that implements the `InterruptController` to provide 1333 // interrupt delivery service. This is not the real GIC device so that 1334 // we do not need to insert it to the device tree. 1335 1336 Ok(interrupt_controller) 1337 } 1338 1339 #[cfg(target_arch = "aarch64")] 1340 pub fn get_interrupt_controller(&mut self) -> Option<&Arc<Mutex<gic::Gic>>> { 1341 self.interrupt_controller.as_ref() 1342 } 1343 1344 #[cfg(target_arch = "x86_64")] 1345 fn add_interrupt_controller( 1346 &mut self, 1347 ) -> DeviceManagerResult<Arc<Mutex<dyn InterruptController>>> { 1348 let id = String::from(IOAPIC_DEVICE_NAME); 1349 1350 // Create IOAPIC 1351 let interrupt_controller = Arc::new(Mutex::new( 1352 ioapic::Ioapic::new( 1353 id.clone(), 1354 APIC_START, 1355 Arc::clone(&self.msi_interrupt_manager), 1356 ) 1357 .map_err(DeviceManagerError::CreateInterruptController)?, 1358 )); 1359 1360 self.interrupt_controller = Some(interrupt_controller.clone()); 1361 1362 self.address_manager 1363 .mmio_bus 1364 .insert(interrupt_controller.clone(), IOAPIC_START.0, IOAPIC_SIZE) 1365 .map_err(DeviceManagerError::BusError)?; 1366 1367 self.bus_devices 1368 .push(Arc::clone(&interrupt_controller) as Arc<Mutex<dyn BusDevice>>); 1369 1370 // Fill the device tree with a new node. In case of restore, we 1371 // know there is nothing to do, so we can simply override the 1372 // existing entry. 1373 self.device_tree 1374 .lock() 1375 .unwrap() 1376 .insert(id.clone(), device_node!(id, interrupt_controller)); 1377 1378 Ok(interrupt_controller) 1379 } 1380 1381 fn add_acpi_devices( 1382 &mut self, 1383 interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>, 1384 reset_evt: EventFd, 1385 exit_evt: EventFd, 1386 ) -> DeviceManagerResult<Option<Arc<Mutex<devices::AcpiGedDevice>>>> { 1387 let shutdown_device = Arc::new(Mutex::new(devices::AcpiShutdownDevice::new( 1388 exit_evt, reset_evt, 1389 ))); 1390 1391 self.bus_devices 1392 .push(Arc::clone(&shutdown_device) as Arc<Mutex<dyn BusDevice>>); 1393 1394 #[cfg(target_arch = "x86_64")] 1395 { 1396 self.address_manager 1397 .allocator 1398 .lock() 1399 .unwrap() 1400 .allocate_io_addresses(Some(GuestAddress(0x3c0)), 0x8, None) 1401 .ok_or(DeviceManagerError::AllocateIoPort)?; 1402 1403 self.address_manager 1404 .io_bus 1405 .insert(shutdown_device, 0x3c0, 0x4) 1406 .map_err(DeviceManagerError::BusError)?; 1407 } 1408 1409 let ged_irq = self 1410 .address_manager 1411 .allocator 1412 .lock() 1413 .unwrap() 1414 .allocate_irq() 1415 .unwrap(); 1416 let interrupt_group = interrupt_manager 1417 .create_group(LegacyIrqGroupConfig { 1418 irq: ged_irq as InterruptIndex, 1419 }) 1420 .map_err(DeviceManagerError::CreateInterruptGroup)?; 1421 let ged_address = self 1422 .address_manager 1423 .allocator 1424 .lock() 1425 .unwrap() 1426 .allocate_platform_mmio_addresses( 1427 None, 1428 devices::acpi::GED_DEVICE_ACPI_SIZE as u64, 1429 None, 1430 ) 1431 .ok_or(DeviceManagerError::AllocateMmioAddress)?; 1432 let ged_device = Arc::new(Mutex::new(devices::AcpiGedDevice::new( 1433 interrupt_group, 1434 ged_irq, 1435 ged_address, 1436 ))); 1437 self.address_manager 1438 .mmio_bus 1439 .insert( 1440 ged_device.clone(), 1441 ged_address.0, 1442 devices::acpi::GED_DEVICE_ACPI_SIZE as u64, 1443 ) 1444 .map_err(DeviceManagerError::BusError)?; 1445 self.bus_devices 1446 .push(Arc::clone(&ged_device) as Arc<Mutex<dyn BusDevice>>); 1447 1448 let pm_timer_device = Arc::new(Mutex::new(devices::AcpiPmTimerDevice::new())); 1449 1450 self.bus_devices 1451 .push(Arc::clone(&pm_timer_device) as Arc<Mutex<dyn BusDevice>>); 1452 1453 #[cfg(target_arch = "x86_64")] 1454 { 1455 self.address_manager 1456 .allocator 1457 .lock() 1458 .unwrap() 1459 .allocate_io_addresses(Some(GuestAddress(0xb008)), 0x4, None) 1460 .ok_or(DeviceManagerError::AllocateIoPort)?; 1461 1462 self.address_manager 1463 .io_bus 1464 .insert(pm_timer_device, 0xb008, 0x4) 1465 .map_err(DeviceManagerError::BusError)?; 1466 } 1467 1468 Ok(Some(ged_device)) 1469 } 1470 1471 #[cfg(target_arch = "x86_64")] 1472 fn add_legacy_devices(&mut self, reset_evt: EventFd) -> DeviceManagerResult<()> { 1473 // Add a shutdown device (i8042) 1474 let i8042 = Arc::new(Mutex::new(devices::legacy::I8042Device::new( 1475 reset_evt.try_clone().unwrap(), 1476 ))); 1477 1478 self.bus_devices 1479 .push(Arc::clone(&i8042) as Arc<Mutex<dyn BusDevice>>); 1480 1481 self.address_manager 1482 .io_bus 1483 .insert(i8042, 0x61, 0x4) 1484 .map_err(DeviceManagerError::BusError)?; 1485 { 1486 // Add a CMOS emulated device 1487 let mem_size = self 1488 .memory_manager 1489 .lock() 1490 .unwrap() 1491 .guest_memory() 1492 .memory() 1493 .last_addr() 1494 .0 1495 + 1; 1496 let mem_below_4g = std::cmp::min(arch::layout::MEM_32BIT_RESERVED_START.0, mem_size); 1497 let mem_above_4g = mem_size.saturating_sub(arch::layout::RAM_64BIT_START.0); 1498 1499 let cmos = Arc::new(Mutex::new(devices::legacy::Cmos::new( 1500 mem_below_4g, 1501 mem_above_4g, 1502 reset_evt, 1503 ))); 1504 1505 self.bus_devices 1506 .push(Arc::clone(&cmos) as Arc<Mutex<dyn BusDevice>>); 1507 1508 self.address_manager 1509 .io_bus 1510 .insert(cmos, 0x70, 0x2) 1511 .map_err(DeviceManagerError::BusError)?; 1512 } 1513 #[cfg(feature = "fwdebug")] 1514 { 1515 let fwdebug = Arc::new(Mutex::new(devices::legacy::FwDebugDevice::new())); 1516 1517 self.bus_devices 1518 .push(Arc::clone(&fwdebug) as Arc<Mutex<dyn BusDevice>>); 1519 1520 self.address_manager 1521 .io_bus 1522 .insert(fwdebug, 0x402, 0x1) 1523 .map_err(DeviceManagerError::BusError)?; 1524 } 1525 1526 // 0x80 debug port 1527 let debug_port = Arc::new(Mutex::new(devices::legacy::DebugPort::new(self.timestamp))); 1528 self.bus_devices 1529 .push(Arc::clone(&debug_port) as Arc<Mutex<dyn BusDevice>>); 1530 self.address_manager 1531 .io_bus 1532 .insert(debug_port, 0x80, 0x1) 1533 .map_err(DeviceManagerError::BusError)?; 1534 1535 Ok(()) 1536 } 1537 1538 #[cfg(target_arch = "aarch64")] 1539 fn add_legacy_devices( 1540 &mut self, 1541 interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>, 1542 ) -> DeviceManagerResult<()> { 1543 // Add a RTC device 1544 let rtc_irq = self 1545 .address_manager 1546 .allocator 1547 .lock() 1548 .unwrap() 1549 .allocate_irq() 1550 .unwrap(); 1551 1552 let interrupt_group = interrupt_manager 1553 .create_group(LegacyIrqGroupConfig { 1554 irq: rtc_irq as InterruptIndex, 1555 }) 1556 .map_err(DeviceManagerError::CreateInterruptGroup)?; 1557 1558 let rtc_device = Arc::new(Mutex::new(devices::legacy::Rtc::new(interrupt_group))); 1559 1560 self.bus_devices 1561 .push(Arc::clone(&rtc_device) as Arc<Mutex<dyn BusDevice>>); 1562 1563 let addr = arch::layout::LEGACY_RTC_MAPPED_IO_START; 1564 1565 self.address_manager 1566 .mmio_bus 1567 .insert(rtc_device, addr.0, MMIO_LEN) 1568 .map_err(DeviceManagerError::BusError)?; 1569 1570 self.id_to_dev_info.insert( 1571 (DeviceType::Rtc, "rtc".to_string()), 1572 MmioDeviceInfo { 1573 addr: addr.0, 1574 len: MMIO_LEN, 1575 irq: rtc_irq, 1576 }, 1577 ); 1578 1579 // Add a GPIO device 1580 let id = String::from(GPIO_DEVICE_NAME); 1581 let gpio_irq = self 1582 .address_manager 1583 .allocator 1584 .lock() 1585 .unwrap() 1586 .allocate_irq() 1587 .unwrap(); 1588 1589 let interrupt_group = interrupt_manager 1590 .create_group(LegacyIrqGroupConfig { 1591 irq: gpio_irq as InterruptIndex, 1592 }) 1593 .map_err(DeviceManagerError::CreateInterruptGroup)?; 1594 1595 let gpio_device = Arc::new(Mutex::new(devices::legacy::Gpio::new( 1596 id.clone(), 1597 interrupt_group, 1598 ))); 1599 1600 self.bus_devices 1601 .push(Arc::clone(&gpio_device) as Arc<Mutex<dyn BusDevice>>); 1602 1603 let addr = arch::layout::LEGACY_GPIO_MAPPED_IO_START; 1604 1605 self.address_manager 1606 .mmio_bus 1607 .insert(gpio_device.clone(), addr.0, MMIO_LEN) 1608 .map_err(DeviceManagerError::BusError)?; 1609 1610 self.gpio_device = Some(gpio_device.clone()); 1611 1612 self.id_to_dev_info.insert( 1613 (DeviceType::Gpio, "gpio".to_string()), 1614 MmioDeviceInfo { 1615 addr: addr.0, 1616 len: MMIO_LEN, 1617 irq: gpio_irq, 1618 }, 1619 ); 1620 1621 self.device_tree 1622 .lock() 1623 .unwrap() 1624 .insert(id.clone(), device_node!(id, gpio_device)); 1625 1626 // On AArch64, the UEFI binary requires a flash device at address 0. 1627 // 4 MiB memory is mapped to simulate the flash. 1628 let uefi_mem_slot = self.memory_manager.lock().unwrap().allocate_memory_slot(); 1629 let uefi_region = GuestRegionMmap::new( 1630 MmapRegion::new(arch::layout::UEFI_SIZE as usize).unwrap(), 1631 arch::layout::UEFI_START, 1632 ) 1633 .unwrap(); 1634 let uefi_mem_region = self 1635 .memory_manager 1636 .lock() 1637 .unwrap() 1638 .vm 1639 .make_user_memory_region( 1640 uefi_mem_slot, 1641 uefi_region.start_addr().raw_value(), 1642 uefi_region.len() as u64, 1643 uefi_region.as_ptr() as u64, 1644 false, 1645 false, 1646 ); 1647 self.memory_manager 1648 .lock() 1649 .unwrap() 1650 .vm 1651 .create_user_memory_region(uefi_mem_region) 1652 .map_err(DeviceManagerError::CreateUefiFlash)?; 1653 1654 let uefi_flash = 1655 GuestMemoryAtomic::new(GuestMemoryMmap::from_regions(vec![uefi_region]).unwrap()); 1656 self.uefi_flash = Some(uefi_flash); 1657 1658 Ok(()) 1659 } 1660 1661 #[cfg(target_arch = "x86_64")] 1662 fn add_serial_device( 1663 &mut self, 1664 interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>, 1665 serial_writer: Option<Box<dyn io::Write + Send>>, 1666 ) -> DeviceManagerResult<Arc<Mutex<Serial>>> { 1667 // Serial is tied to IRQ #4 1668 let serial_irq = 4; 1669 1670 let id = String::from(SERIAL_DEVICE_NAME); 1671 1672 let interrupt_group = interrupt_manager 1673 .create_group(LegacyIrqGroupConfig { 1674 irq: serial_irq as InterruptIndex, 1675 }) 1676 .map_err(DeviceManagerError::CreateInterruptGroup)?; 1677 1678 let serial = Arc::new(Mutex::new(Serial::new( 1679 id.clone(), 1680 interrupt_group, 1681 serial_writer, 1682 ))); 1683 1684 self.bus_devices 1685 .push(Arc::clone(&serial) as Arc<Mutex<dyn BusDevice>>); 1686 1687 self.address_manager 1688 .allocator 1689 .lock() 1690 .unwrap() 1691 .allocate_io_addresses(Some(GuestAddress(0x3f8)), 0x8, None) 1692 .ok_or(DeviceManagerError::AllocateIoPort)?; 1693 1694 self.address_manager 1695 .io_bus 1696 .insert(serial.clone(), 0x3f8, 0x8) 1697 .map_err(DeviceManagerError::BusError)?; 1698 1699 // Fill the device tree with a new node. In case of restore, we 1700 // know there is nothing to do, so we can simply override the 1701 // existing entry. 1702 self.device_tree 1703 .lock() 1704 .unwrap() 1705 .insert(id.clone(), device_node!(id, serial)); 1706 1707 Ok(serial) 1708 } 1709 1710 #[cfg(target_arch = "aarch64")] 1711 fn add_serial_device( 1712 &mut self, 1713 interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>, 1714 serial_writer: Option<Box<dyn io::Write + Send>>, 1715 ) -> DeviceManagerResult<Arc<Mutex<Pl011>>> { 1716 let id = String::from(SERIAL_DEVICE_NAME); 1717 1718 let serial_irq = self 1719 .address_manager 1720 .allocator 1721 .lock() 1722 .unwrap() 1723 .allocate_irq() 1724 .unwrap(); 1725 1726 let interrupt_group = interrupt_manager 1727 .create_group(LegacyIrqGroupConfig { 1728 irq: serial_irq as InterruptIndex, 1729 }) 1730 .map_err(DeviceManagerError::CreateInterruptGroup)?; 1731 1732 let serial = Arc::new(Mutex::new(devices::legacy::Pl011::new( 1733 id.clone(), 1734 interrupt_group, 1735 serial_writer, 1736 self.timestamp, 1737 ))); 1738 1739 self.bus_devices 1740 .push(Arc::clone(&serial) as Arc<Mutex<dyn BusDevice>>); 1741 1742 let addr = arch::layout::LEGACY_SERIAL_MAPPED_IO_START; 1743 1744 self.address_manager 1745 .mmio_bus 1746 .insert(serial.clone(), addr.0, MMIO_LEN) 1747 .map_err(DeviceManagerError::BusError)?; 1748 1749 self.id_to_dev_info.insert( 1750 (DeviceType::Serial, DeviceType::Serial.to_string()), 1751 MmioDeviceInfo { 1752 addr: addr.0, 1753 len: MMIO_LEN, 1754 irq: serial_irq, 1755 }, 1756 ); 1757 1758 self.cmdline_additions 1759 .push(format!("earlycon=pl011,mmio,0x{:08x}", addr.0)); 1760 1761 // Fill the device tree with a new node. In case of restore, we 1762 // know there is nothing to do, so we can simply override the 1763 // existing entry. 1764 self.device_tree 1765 .lock() 1766 .unwrap() 1767 .insert(id.clone(), device_node!(id, serial)); 1768 1769 Ok(serial) 1770 } 1771 1772 fn modify_mode<F: FnOnce(&mut termios)>( 1773 &self, 1774 fd: RawFd, 1775 f: F, 1776 ) -> vmm_sys_util::errno::Result<()> { 1777 // SAFETY: safe because we check the return value of isatty. 1778 if unsafe { isatty(fd) } != 1 { 1779 return Ok(()); 1780 } 1781 1782 // SAFETY: The following pair are safe because termios gets totally overwritten by tcgetattr 1783 // and we check the return result. 1784 let mut termios: termios = unsafe { zeroed() }; 1785 let ret = unsafe { tcgetattr(fd, &mut termios as *mut _) }; 1786 if ret < 0 { 1787 return vmm_sys_util::errno::errno_result(); 1788 } 1789 f(&mut termios); 1790 // SAFETY: Safe because the syscall will only read the extent of termios and we check 1791 // the return result. 1792 let ret = unsafe { tcsetattr(fd, TCSANOW, &termios as *const _) }; 1793 if ret < 0 { 1794 return vmm_sys_util::errno::errno_result(); 1795 } 1796 1797 Ok(()) 1798 } 1799 1800 fn set_raw_mode(&self, f: &mut File) -> vmm_sys_util::errno::Result<()> { 1801 // SAFETY: FFI call. Variable t is guaranteed to be a valid termios from modify_mode. 1802 self.modify_mode(f.as_raw_fd(), |t| unsafe { cfmakeraw(t) }) 1803 } 1804 1805 fn listen_for_sigwinch_on_tty(&mut self, pty: &File) -> std::io::Result<()> { 1806 let seccomp_filter = 1807 get_seccomp_filter(&self.seccomp_action, Thread::PtyForeground).unwrap(); 1808 1809 match start_sigwinch_listener(seccomp_filter, pty) { 1810 Ok(pipe) => { 1811 self.console_resize_pipe = Some(Arc::new(pipe)); 1812 } 1813 Err(e) => { 1814 warn!("Ignoring error from setting up SIGWINCH listener: {}", e) 1815 } 1816 } 1817 1818 Ok(()) 1819 } 1820 1821 fn add_virtio_console_device( 1822 &mut self, 1823 virtio_devices: &mut Vec<MetaVirtioDevice>, 1824 console_pty: Option<PtyPair>, 1825 resize_pipe: Option<File>, 1826 ) -> DeviceManagerResult<Option<Arc<virtio_devices::ConsoleResizer>>> { 1827 let console_config = self.config.lock().unwrap().console.clone(); 1828 let endpoint = match console_config.mode { 1829 ConsoleOutputMode::File => { 1830 let file = File::create(console_config.file.as_ref().unwrap()) 1831 .map_err(DeviceManagerError::ConsoleOutputFileOpen)?; 1832 Endpoint::File(file) 1833 } 1834 ConsoleOutputMode::Pty => { 1835 if let Some(pty) = console_pty { 1836 self.config.lock().unwrap().console.file = Some(pty.path.clone()); 1837 let file = pty.main.try_clone().unwrap(); 1838 self.console_pty = Some(Arc::new(Mutex::new(pty))); 1839 self.console_resize_pipe = resize_pipe.map(Arc::new); 1840 Endpoint::FilePair(file.try_clone().unwrap(), file) 1841 } else { 1842 let (main, mut sub, path) = 1843 create_pty(false).map_err(DeviceManagerError::ConsolePtyOpen)?; 1844 self.set_raw_mode(&mut sub) 1845 .map_err(DeviceManagerError::SetPtyRaw)?; 1846 self.config.lock().unwrap().console.file = Some(path.clone()); 1847 let file = main.try_clone().unwrap(); 1848 assert!(resize_pipe.is_none()); 1849 self.listen_for_sigwinch_on_tty(&sub).unwrap(); 1850 self.console_pty = Some(Arc::new(Mutex::new(PtyPair { main, sub, path }))); 1851 Endpoint::FilePair(file.try_clone().unwrap(), file) 1852 } 1853 } 1854 ConsoleOutputMode::Tty => { 1855 // Duplicating the file descriptors like this is needed as otherwise 1856 // they will be closed on a reboot and the numbers reused 1857 1858 // SAFETY: FFI call to dup. Trivially safe. 1859 let stdout = unsafe { libc::dup(libc::STDOUT_FILENO) }; 1860 if stdout == -1 { 1861 return vmm_sys_util::errno::errno_result().map_err(DeviceManagerError::DupFd); 1862 } 1863 // SAFETY: stdout is valid and owned solely by us. 1864 let stdout = unsafe { File::from_raw_fd(stdout) }; 1865 1866 // If an interactive TTY then we can accept input 1867 // SAFETY: FFI call. Trivially safe. 1868 if unsafe { libc::isatty(libc::STDIN_FILENO) == 1 } { 1869 // SAFETY: FFI call to dup. Trivially safe. 1870 let stdin = unsafe { libc::dup(libc::STDIN_FILENO) }; 1871 if stdin == -1 { 1872 return vmm_sys_util::errno::errno_result() 1873 .map_err(DeviceManagerError::DupFd); 1874 } 1875 // SAFETY: stdin is valid and owned solely by us. 1876 let stdin = unsafe { File::from_raw_fd(stdin) }; 1877 1878 Endpoint::FilePair(stdout, stdin) 1879 } else { 1880 Endpoint::File(stdout) 1881 } 1882 } 1883 ConsoleOutputMode::Null => Endpoint::Null, 1884 ConsoleOutputMode::Off => return Ok(None), 1885 }; 1886 let id = String::from(CONSOLE_DEVICE_NAME); 1887 1888 let (virtio_console_device, console_resizer) = virtio_devices::Console::new( 1889 id.clone(), 1890 endpoint, 1891 self.console_resize_pipe 1892 .as_ref() 1893 .map(|p| p.try_clone().unwrap()), 1894 self.force_iommu | console_config.iommu, 1895 self.seccomp_action.clone(), 1896 self.exit_evt 1897 .try_clone() 1898 .map_err(DeviceManagerError::EventFd)?, 1899 ) 1900 .map_err(DeviceManagerError::CreateVirtioConsole)?; 1901 let virtio_console_device = Arc::new(Mutex::new(virtio_console_device)); 1902 virtio_devices.push(MetaVirtioDevice { 1903 virtio_device: Arc::clone(&virtio_console_device) 1904 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 1905 iommu: console_config.iommu, 1906 id: id.clone(), 1907 pci_segment: 0, 1908 dma_handler: None, 1909 }); 1910 1911 // Fill the device tree with a new node. In case of restore, we 1912 // know there is nothing to do, so we can simply override the 1913 // existing entry. 1914 self.device_tree 1915 .lock() 1916 .unwrap() 1917 .insert(id.clone(), device_node!(id, virtio_console_device)); 1918 1919 // Only provide a resizer (for SIGWINCH handling) if the console is attached to the TTY 1920 Ok(if matches!(console_config.mode, ConsoleOutputMode::Tty) { 1921 Some(console_resizer) 1922 } else { 1923 None 1924 }) 1925 } 1926 1927 fn add_console_device( 1928 &mut self, 1929 interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>, 1930 virtio_devices: &mut Vec<MetaVirtioDevice>, 1931 serial_pty: Option<PtyPair>, 1932 console_pty: Option<PtyPair>, 1933 console_resize_pipe: Option<File>, 1934 ) -> DeviceManagerResult<Arc<Console>> { 1935 let serial_config = self.config.lock().unwrap().serial.clone(); 1936 let serial_writer: Option<Box<dyn io::Write + Send>> = match serial_config.mode { 1937 ConsoleOutputMode::File => Some(Box::new( 1938 File::create(serial_config.file.as_ref().unwrap()) 1939 .map_err(DeviceManagerError::SerialOutputFileOpen)?, 1940 )), 1941 ConsoleOutputMode::Pty => { 1942 if let Some(pty) = serial_pty { 1943 self.config.lock().unwrap().serial.file = Some(pty.path.clone()); 1944 self.serial_pty = Some(Arc::new(Mutex::new(pty))); 1945 } else { 1946 let (main, mut sub, path) = 1947 create_pty(true).map_err(DeviceManagerError::SerialPtyOpen)?; 1948 self.set_raw_mode(&mut sub) 1949 .map_err(DeviceManagerError::SetPtyRaw)?; 1950 self.config.lock().unwrap().serial.file = Some(path.clone()); 1951 self.serial_pty = Some(Arc::new(Mutex::new(PtyPair { main, sub, path }))); 1952 } 1953 None 1954 } 1955 ConsoleOutputMode::Tty => Some(Box::new(stdout())), 1956 ConsoleOutputMode::Off | ConsoleOutputMode::Null => None, 1957 }; 1958 if serial_config.mode != ConsoleOutputMode::Off { 1959 let serial = self.add_serial_device(interrupt_manager, serial_writer)?; 1960 self.serial_manager = match serial_config.mode { 1961 ConsoleOutputMode::Pty | ConsoleOutputMode::Tty => { 1962 let serial_manager = 1963 SerialManager::new(serial, self.serial_pty.clone(), serial_config.mode) 1964 .map_err(DeviceManagerError::CreateSerialManager)?; 1965 if let Some(mut serial_manager) = serial_manager { 1966 serial_manager 1967 .start_thread( 1968 self.exit_evt 1969 .try_clone() 1970 .map_err(DeviceManagerError::EventFd)?, 1971 ) 1972 .map_err(DeviceManagerError::SpawnSerialManager)?; 1973 Some(Arc::new(serial_manager)) 1974 } else { 1975 None 1976 } 1977 } 1978 _ => None, 1979 }; 1980 } 1981 1982 let console_resizer = 1983 self.add_virtio_console_device(virtio_devices, console_pty, console_resize_pipe)?; 1984 1985 Ok(Arc::new(Console { console_resizer })) 1986 } 1987 1988 fn make_virtio_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 1989 let mut devices: Vec<MetaVirtioDevice> = Vec::new(); 1990 1991 // Create "standard" virtio devices (net/block/rng) 1992 devices.append(&mut self.make_virtio_block_devices()?); 1993 devices.append(&mut self.make_virtio_net_devices()?); 1994 devices.append(&mut self.make_virtio_rng_devices()?); 1995 1996 // Add virtio-fs if required 1997 devices.append(&mut self.make_virtio_fs_devices()?); 1998 1999 // Add virtio-pmem if required 2000 devices.append(&mut self.make_virtio_pmem_devices()?); 2001 2002 // Add virtio-vsock if required 2003 devices.append(&mut self.make_virtio_vsock_devices()?); 2004 2005 devices.append(&mut self.make_virtio_mem_devices()?); 2006 2007 // Add virtio-balloon if required 2008 devices.append(&mut self.make_virtio_balloon_devices()?); 2009 2010 // Add virtio-watchdog device 2011 devices.append(&mut self.make_virtio_watchdog_devices()?); 2012 2013 // Add vDPA devices if required 2014 devices.append(&mut self.make_vdpa_devices()?); 2015 2016 Ok(devices) 2017 } 2018 2019 // Cache whether io_uring is supported to avoid probing for very block device 2020 fn io_uring_is_supported(&mut self) -> bool { 2021 if let Some(supported) = self.io_uring_supported { 2022 return supported; 2023 } 2024 2025 let supported = block_io_uring_is_supported(); 2026 self.io_uring_supported = Some(supported); 2027 supported 2028 } 2029 2030 fn make_virtio_block_device( 2031 &mut self, 2032 disk_cfg: &mut DiskConfig, 2033 ) -> DeviceManagerResult<MetaVirtioDevice> { 2034 let id = if let Some(id) = &disk_cfg.id { 2035 id.clone() 2036 } else { 2037 let id = self.next_device_name(DISK_DEVICE_NAME_PREFIX)?; 2038 disk_cfg.id = Some(id.clone()); 2039 id 2040 }; 2041 2042 info!("Creating virtio-block device: {:?}", disk_cfg); 2043 2044 let (virtio_device, migratable_device) = if disk_cfg.vhost_user { 2045 let socket = disk_cfg.vhost_socket.as_ref().unwrap().clone(); 2046 let vu_cfg = VhostUserConfig { 2047 socket, 2048 num_queues: disk_cfg.num_queues, 2049 queue_size: disk_cfg.queue_size, 2050 }; 2051 let vhost_user_block = Arc::new(Mutex::new( 2052 match virtio_devices::vhost_user::Blk::new( 2053 id.clone(), 2054 vu_cfg, 2055 self.restoring, 2056 self.seccomp_action.clone(), 2057 self.exit_evt 2058 .try_clone() 2059 .map_err(DeviceManagerError::EventFd)?, 2060 self.force_iommu, 2061 ) { 2062 Ok(vub_device) => vub_device, 2063 Err(e) => { 2064 return Err(DeviceManagerError::CreateVhostUserBlk(e)); 2065 } 2066 }, 2067 )); 2068 2069 ( 2070 Arc::clone(&vhost_user_block) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2071 vhost_user_block as Arc<Mutex<dyn Migratable>>, 2072 ) 2073 } else { 2074 let mut options = OpenOptions::new(); 2075 options.read(true); 2076 options.write(!disk_cfg.readonly); 2077 if disk_cfg.direct { 2078 options.custom_flags(libc::O_DIRECT); 2079 } 2080 // Open block device path 2081 let mut file: File = options 2082 .open( 2083 disk_cfg 2084 .path 2085 .as_ref() 2086 .ok_or(DeviceManagerError::NoDiskPath)? 2087 .clone(), 2088 ) 2089 .map_err(DeviceManagerError::Disk)?; 2090 let image_type = 2091 detect_image_type(&mut file).map_err(DeviceManagerError::DetectImageType)?; 2092 2093 let image = match image_type { 2094 ImageType::FixedVhd => { 2095 // Use asynchronous backend relying on io_uring if the 2096 // syscalls are supported. 2097 if self.io_uring_is_supported() && !disk_cfg.disable_io_uring { 2098 info!("Using asynchronous fixed VHD disk file (io_uring)"); 2099 Box::new( 2100 FixedVhdDiskAsync::new(file) 2101 .map_err(DeviceManagerError::CreateFixedVhdDiskAsync)?, 2102 ) as Box<dyn DiskFile> 2103 } else { 2104 info!("Using synchronous fixed VHD disk file"); 2105 Box::new( 2106 FixedVhdDiskSync::new(file) 2107 .map_err(DeviceManagerError::CreateFixedVhdDiskSync)?, 2108 ) as Box<dyn DiskFile> 2109 } 2110 } 2111 ImageType::Raw => { 2112 // Use asynchronous backend relying on io_uring if the 2113 // syscalls are supported. 2114 if self.io_uring_is_supported() && !disk_cfg.disable_io_uring { 2115 info!("Using asynchronous RAW disk file (io_uring)"); 2116 Box::new(RawFileDisk::new(file)) as Box<dyn DiskFile> 2117 } else { 2118 info!("Using synchronous RAW disk file"); 2119 Box::new(RawFileDiskSync::new(file)) as Box<dyn DiskFile> 2120 } 2121 } 2122 ImageType::Qcow2 => { 2123 info!("Using synchronous QCOW disk file"); 2124 Box::new( 2125 QcowDiskSync::new(file, disk_cfg.direct) 2126 .map_err(DeviceManagerError::CreateQcowDiskSync)?, 2127 ) as Box<dyn DiskFile> 2128 } 2129 ImageType::Vhdx => { 2130 info!("Using synchronous VHDX disk file"); 2131 Box::new( 2132 VhdxDiskSync::new(file) 2133 .map_err(DeviceManagerError::CreateFixedVhdxDiskSync)?, 2134 ) as Box<dyn DiskFile> 2135 } 2136 }; 2137 2138 let virtio_block = Arc::new(Mutex::new( 2139 virtio_devices::Block::new( 2140 id.clone(), 2141 image, 2142 disk_cfg 2143 .path 2144 .as_ref() 2145 .ok_or(DeviceManagerError::NoDiskPath)? 2146 .clone(), 2147 disk_cfg.readonly, 2148 self.force_iommu | disk_cfg.iommu, 2149 disk_cfg.num_queues, 2150 disk_cfg.queue_size, 2151 self.seccomp_action.clone(), 2152 disk_cfg.rate_limiter_config, 2153 self.exit_evt 2154 .try_clone() 2155 .map_err(DeviceManagerError::EventFd)?, 2156 ) 2157 .map_err(DeviceManagerError::CreateVirtioBlock)?, 2158 )); 2159 2160 ( 2161 Arc::clone(&virtio_block) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2162 virtio_block as Arc<Mutex<dyn Migratable>>, 2163 ) 2164 }; 2165 2166 // Fill the device tree with a new node. In case of restore, we 2167 // know there is nothing to do, so we can simply override the 2168 // existing entry. 2169 self.device_tree 2170 .lock() 2171 .unwrap() 2172 .insert(id.clone(), device_node!(id, migratable_device)); 2173 2174 Ok(MetaVirtioDevice { 2175 virtio_device, 2176 iommu: disk_cfg.iommu, 2177 id, 2178 pci_segment: disk_cfg.pci_segment, 2179 dma_handler: None, 2180 }) 2181 } 2182 2183 fn make_virtio_block_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2184 let mut devices = Vec::new(); 2185 2186 let mut block_devices = self.config.lock().unwrap().disks.clone(); 2187 if let Some(disk_list_cfg) = &mut block_devices { 2188 for disk_cfg in disk_list_cfg.iter_mut() { 2189 devices.push(self.make_virtio_block_device(disk_cfg)?); 2190 } 2191 } 2192 self.config.lock().unwrap().disks = block_devices; 2193 2194 Ok(devices) 2195 } 2196 2197 fn make_virtio_net_device( 2198 &mut self, 2199 net_cfg: &mut NetConfig, 2200 ) -> DeviceManagerResult<MetaVirtioDevice> { 2201 let id = if let Some(id) = &net_cfg.id { 2202 id.clone() 2203 } else { 2204 let id = self.next_device_name(NET_DEVICE_NAME_PREFIX)?; 2205 net_cfg.id = Some(id.clone()); 2206 id 2207 }; 2208 info!("Creating virtio-net device: {:?}", net_cfg); 2209 2210 let (virtio_device, migratable_device) = if net_cfg.vhost_user { 2211 let socket = net_cfg.vhost_socket.as_ref().unwrap().clone(); 2212 let vu_cfg = VhostUserConfig { 2213 socket, 2214 num_queues: net_cfg.num_queues, 2215 queue_size: net_cfg.queue_size, 2216 }; 2217 let server = match net_cfg.vhost_mode { 2218 VhostMode::Client => false, 2219 VhostMode::Server => true, 2220 }; 2221 let vhost_user_net = Arc::new(Mutex::new( 2222 match virtio_devices::vhost_user::Net::new( 2223 id.clone(), 2224 net_cfg.mac, 2225 vu_cfg, 2226 server, 2227 self.seccomp_action.clone(), 2228 self.restoring, 2229 self.exit_evt 2230 .try_clone() 2231 .map_err(DeviceManagerError::EventFd)?, 2232 self.force_iommu, 2233 ) { 2234 Ok(vun_device) => vun_device, 2235 Err(e) => { 2236 return Err(DeviceManagerError::CreateVhostUserNet(e)); 2237 } 2238 }, 2239 )); 2240 2241 ( 2242 Arc::clone(&vhost_user_net) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2243 vhost_user_net as Arc<Mutex<dyn Migratable>>, 2244 ) 2245 } else { 2246 let virtio_net = if let Some(ref tap_if_name) = net_cfg.tap { 2247 Arc::new(Mutex::new( 2248 virtio_devices::Net::new( 2249 id.clone(), 2250 Some(tap_if_name), 2251 None, 2252 None, 2253 Some(net_cfg.mac), 2254 &mut net_cfg.host_mac, 2255 self.force_iommu | net_cfg.iommu, 2256 net_cfg.num_queues, 2257 net_cfg.queue_size, 2258 self.seccomp_action.clone(), 2259 net_cfg.rate_limiter_config, 2260 self.exit_evt 2261 .try_clone() 2262 .map_err(DeviceManagerError::EventFd)?, 2263 ) 2264 .map_err(DeviceManagerError::CreateVirtioNet)?, 2265 )) 2266 } else if let Some(fds) = &net_cfg.fds { 2267 Arc::new(Mutex::new( 2268 virtio_devices::Net::from_tap_fds( 2269 id.clone(), 2270 fds, 2271 Some(net_cfg.mac), 2272 self.force_iommu | net_cfg.iommu, 2273 net_cfg.queue_size, 2274 self.seccomp_action.clone(), 2275 net_cfg.rate_limiter_config, 2276 self.exit_evt 2277 .try_clone() 2278 .map_err(DeviceManagerError::EventFd)?, 2279 ) 2280 .map_err(DeviceManagerError::CreateVirtioNet)?, 2281 )) 2282 } else { 2283 Arc::new(Mutex::new( 2284 virtio_devices::Net::new( 2285 id.clone(), 2286 None, 2287 Some(net_cfg.ip), 2288 Some(net_cfg.mask), 2289 Some(net_cfg.mac), 2290 &mut net_cfg.host_mac, 2291 self.force_iommu | net_cfg.iommu, 2292 net_cfg.num_queues, 2293 net_cfg.queue_size, 2294 self.seccomp_action.clone(), 2295 net_cfg.rate_limiter_config, 2296 self.exit_evt 2297 .try_clone() 2298 .map_err(DeviceManagerError::EventFd)?, 2299 ) 2300 .map_err(DeviceManagerError::CreateVirtioNet)?, 2301 )) 2302 }; 2303 2304 ( 2305 Arc::clone(&virtio_net) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2306 virtio_net as Arc<Mutex<dyn Migratable>>, 2307 ) 2308 }; 2309 2310 // Fill the device tree with a new node. In case of restore, we 2311 // know there is nothing to do, so we can simply override the 2312 // existing entry. 2313 self.device_tree 2314 .lock() 2315 .unwrap() 2316 .insert(id.clone(), device_node!(id, migratable_device)); 2317 2318 Ok(MetaVirtioDevice { 2319 virtio_device, 2320 iommu: net_cfg.iommu, 2321 id, 2322 pci_segment: net_cfg.pci_segment, 2323 dma_handler: None, 2324 }) 2325 } 2326 2327 /// Add virto-net and vhost-user-net devices 2328 fn make_virtio_net_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2329 let mut devices = Vec::new(); 2330 let mut net_devices = self.config.lock().unwrap().net.clone(); 2331 if let Some(net_list_cfg) = &mut net_devices { 2332 for net_cfg in net_list_cfg.iter_mut() { 2333 devices.push(self.make_virtio_net_device(net_cfg)?); 2334 } 2335 } 2336 self.config.lock().unwrap().net = net_devices; 2337 2338 Ok(devices) 2339 } 2340 2341 fn make_virtio_rng_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2342 let mut devices = Vec::new(); 2343 2344 // Add virtio-rng if required 2345 let rng_config = self.config.lock().unwrap().rng.clone(); 2346 if let Some(rng_path) = rng_config.src.to_str() { 2347 info!("Creating virtio-rng device: {:?}", rng_config); 2348 let id = String::from(RNG_DEVICE_NAME); 2349 2350 let virtio_rng_device = Arc::new(Mutex::new( 2351 virtio_devices::Rng::new( 2352 id.clone(), 2353 rng_path, 2354 self.force_iommu | rng_config.iommu, 2355 self.seccomp_action.clone(), 2356 self.exit_evt 2357 .try_clone() 2358 .map_err(DeviceManagerError::EventFd)?, 2359 ) 2360 .map_err(DeviceManagerError::CreateVirtioRng)?, 2361 )); 2362 devices.push(MetaVirtioDevice { 2363 virtio_device: Arc::clone(&virtio_rng_device) 2364 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2365 iommu: rng_config.iommu, 2366 id: id.clone(), 2367 pci_segment: 0, 2368 dma_handler: None, 2369 }); 2370 2371 // Fill the device tree with a new node. In case of restore, we 2372 // know there is nothing to do, so we can simply override the 2373 // existing entry. 2374 self.device_tree 2375 .lock() 2376 .unwrap() 2377 .insert(id.clone(), device_node!(id, virtio_rng_device)); 2378 } 2379 2380 Ok(devices) 2381 } 2382 2383 fn make_virtio_fs_device( 2384 &mut self, 2385 fs_cfg: &mut FsConfig, 2386 ) -> DeviceManagerResult<MetaVirtioDevice> { 2387 let id = if let Some(id) = &fs_cfg.id { 2388 id.clone() 2389 } else { 2390 let id = self.next_device_name(FS_DEVICE_NAME_PREFIX)?; 2391 fs_cfg.id = Some(id.clone()); 2392 id 2393 }; 2394 2395 info!("Creating virtio-fs device: {:?}", fs_cfg); 2396 2397 let mut node = device_node!(id); 2398 2399 if let Some(fs_socket) = fs_cfg.socket.to_str() { 2400 let virtio_fs_device = Arc::new(Mutex::new( 2401 virtio_devices::vhost_user::Fs::new( 2402 id.clone(), 2403 fs_socket, 2404 &fs_cfg.tag, 2405 fs_cfg.num_queues, 2406 fs_cfg.queue_size, 2407 None, 2408 self.seccomp_action.clone(), 2409 self.restoring, 2410 self.exit_evt 2411 .try_clone() 2412 .map_err(DeviceManagerError::EventFd)?, 2413 self.force_iommu, 2414 ) 2415 .map_err(DeviceManagerError::CreateVirtioFs)?, 2416 )); 2417 2418 // Update the device tree with the migratable device. 2419 node.migratable = Some(Arc::clone(&virtio_fs_device) as Arc<Mutex<dyn Migratable>>); 2420 self.device_tree.lock().unwrap().insert(id.clone(), node); 2421 2422 Ok(MetaVirtioDevice { 2423 virtio_device: Arc::clone(&virtio_fs_device) 2424 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2425 iommu: false, 2426 id, 2427 pci_segment: fs_cfg.pci_segment, 2428 dma_handler: None, 2429 }) 2430 } else { 2431 Err(DeviceManagerError::NoVirtioFsSock) 2432 } 2433 } 2434 2435 fn make_virtio_fs_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2436 let mut devices = Vec::new(); 2437 2438 let mut fs_devices = self.config.lock().unwrap().fs.clone(); 2439 if let Some(fs_list_cfg) = &mut fs_devices { 2440 for fs_cfg in fs_list_cfg.iter_mut() { 2441 devices.push(self.make_virtio_fs_device(fs_cfg)?); 2442 } 2443 } 2444 self.config.lock().unwrap().fs = fs_devices; 2445 2446 Ok(devices) 2447 } 2448 2449 fn make_virtio_pmem_device( 2450 &mut self, 2451 pmem_cfg: &mut PmemConfig, 2452 ) -> DeviceManagerResult<MetaVirtioDevice> { 2453 let id = if let Some(id) = &pmem_cfg.id { 2454 id.clone() 2455 } else { 2456 let id = self.next_device_name(PMEM_DEVICE_NAME_PREFIX)?; 2457 pmem_cfg.id = Some(id.clone()); 2458 id 2459 }; 2460 2461 info!("Creating virtio-pmem device: {:?}", pmem_cfg); 2462 2463 let mut node = device_node!(id); 2464 2465 // Look for the id in the device tree. If it can be found, that means 2466 // the device is being restored, otherwise it's created from scratch. 2467 let region_range = if let Some(node) = self.device_tree.lock().unwrap().get(&id) { 2468 info!("Restoring virtio-pmem {} resources", id); 2469 2470 let mut region_range: Option<(u64, u64)> = None; 2471 for resource in node.resources.iter() { 2472 match resource { 2473 Resource::MmioAddressRange { base, size } => { 2474 if region_range.is_some() { 2475 return Err(DeviceManagerError::ResourceAlreadyExists); 2476 } 2477 2478 region_range = Some((*base, *size)); 2479 } 2480 _ => { 2481 error!("Unexpected resource {:?} for {}", resource, id); 2482 } 2483 } 2484 } 2485 2486 if region_range.is_none() { 2487 return Err(DeviceManagerError::MissingVirtioPmemResources); 2488 } 2489 2490 region_range 2491 } else { 2492 None 2493 }; 2494 2495 let (custom_flags, set_len) = if pmem_cfg.file.is_dir() { 2496 if pmem_cfg.size.is_none() { 2497 return Err(DeviceManagerError::PmemWithDirectorySizeMissing); 2498 } 2499 (O_TMPFILE, true) 2500 } else { 2501 (0, false) 2502 }; 2503 2504 let mut file = OpenOptions::new() 2505 .read(true) 2506 .write(!pmem_cfg.discard_writes) 2507 .custom_flags(custom_flags) 2508 .open(&pmem_cfg.file) 2509 .map_err(DeviceManagerError::PmemFileOpen)?; 2510 2511 let size = if let Some(size) = pmem_cfg.size { 2512 if set_len { 2513 file.set_len(size) 2514 .map_err(DeviceManagerError::PmemFileSetLen)?; 2515 } 2516 size 2517 } else { 2518 file.seek(SeekFrom::End(0)) 2519 .map_err(DeviceManagerError::PmemFileSetLen)? 2520 }; 2521 2522 if size % 0x20_0000 != 0 { 2523 return Err(DeviceManagerError::PmemSizeNotAligned); 2524 } 2525 2526 let (region_base, region_size) = if let Some((base, size)) = region_range { 2527 // The memory needs to be 2MiB aligned in order to support 2528 // hugepages. 2529 self.pci_segments[pmem_cfg.pci_segment as usize] 2530 .allocator 2531 .lock() 2532 .unwrap() 2533 .allocate( 2534 Some(GuestAddress(base)), 2535 size as GuestUsize, 2536 Some(0x0020_0000), 2537 ) 2538 .ok_or(DeviceManagerError::PmemRangeAllocation)?; 2539 2540 (base, size) 2541 } else { 2542 // The memory needs to be 2MiB aligned in order to support 2543 // hugepages. 2544 let base = self.pci_segments[pmem_cfg.pci_segment as usize] 2545 .allocator 2546 .lock() 2547 .unwrap() 2548 .allocate(None, size as GuestUsize, Some(0x0020_0000)) 2549 .ok_or(DeviceManagerError::PmemRangeAllocation)?; 2550 2551 (base.raw_value(), size) 2552 }; 2553 2554 let cloned_file = file.try_clone().map_err(DeviceManagerError::CloneFile)?; 2555 let mmap_region = MmapRegion::build( 2556 Some(FileOffset::new(cloned_file, 0)), 2557 region_size as usize, 2558 PROT_READ | PROT_WRITE, 2559 MAP_NORESERVE 2560 | if pmem_cfg.discard_writes { 2561 MAP_PRIVATE 2562 } else { 2563 MAP_SHARED 2564 }, 2565 ) 2566 .map_err(DeviceManagerError::NewMmapRegion)?; 2567 let host_addr: u64 = mmap_region.as_ptr() as u64; 2568 2569 let mem_slot = self 2570 .memory_manager 2571 .lock() 2572 .unwrap() 2573 .create_userspace_mapping(region_base, region_size, host_addr, false, false, false) 2574 .map_err(DeviceManagerError::MemoryManager)?; 2575 2576 let mapping = virtio_devices::UserspaceMapping { 2577 host_addr, 2578 mem_slot, 2579 addr: GuestAddress(region_base), 2580 len: region_size, 2581 mergeable: false, 2582 }; 2583 2584 let virtio_pmem_device = Arc::new(Mutex::new( 2585 virtio_devices::Pmem::new( 2586 id.clone(), 2587 file, 2588 GuestAddress(region_base), 2589 mapping, 2590 mmap_region, 2591 self.force_iommu | pmem_cfg.iommu, 2592 self.seccomp_action.clone(), 2593 self.exit_evt 2594 .try_clone() 2595 .map_err(DeviceManagerError::EventFd)?, 2596 ) 2597 .map_err(DeviceManagerError::CreateVirtioPmem)?, 2598 )); 2599 2600 // Update the device tree with correct resource information and with 2601 // the migratable device. 2602 node.resources.push(Resource::MmioAddressRange { 2603 base: region_base, 2604 size: region_size, 2605 }); 2606 node.migratable = Some(Arc::clone(&virtio_pmem_device) as Arc<Mutex<dyn Migratable>>); 2607 self.device_tree.lock().unwrap().insert(id.clone(), node); 2608 2609 Ok(MetaVirtioDevice { 2610 virtio_device: Arc::clone(&virtio_pmem_device) 2611 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2612 iommu: pmem_cfg.iommu, 2613 id, 2614 pci_segment: pmem_cfg.pci_segment, 2615 dma_handler: None, 2616 }) 2617 } 2618 2619 fn make_virtio_pmem_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2620 let mut devices = Vec::new(); 2621 // Add virtio-pmem if required 2622 let mut pmem_devices = self.config.lock().unwrap().pmem.clone(); 2623 if let Some(pmem_list_cfg) = &mut pmem_devices { 2624 for pmem_cfg in pmem_list_cfg.iter_mut() { 2625 devices.push(self.make_virtio_pmem_device(pmem_cfg)?); 2626 } 2627 } 2628 self.config.lock().unwrap().pmem = pmem_devices; 2629 2630 Ok(devices) 2631 } 2632 2633 fn make_virtio_vsock_device( 2634 &mut self, 2635 vsock_cfg: &mut VsockConfig, 2636 ) -> DeviceManagerResult<MetaVirtioDevice> { 2637 let id = if let Some(id) = &vsock_cfg.id { 2638 id.clone() 2639 } else { 2640 let id = self.next_device_name(VSOCK_DEVICE_NAME_PREFIX)?; 2641 vsock_cfg.id = Some(id.clone()); 2642 id 2643 }; 2644 2645 info!("Creating virtio-vsock device: {:?}", vsock_cfg); 2646 2647 let socket_path = vsock_cfg 2648 .socket 2649 .to_str() 2650 .ok_or(DeviceManagerError::CreateVsockConvertPath)?; 2651 let backend = 2652 virtio_devices::vsock::VsockUnixBackend::new(vsock_cfg.cid, socket_path.to_string()) 2653 .map_err(DeviceManagerError::CreateVsockBackend)?; 2654 2655 let vsock_device = Arc::new(Mutex::new( 2656 virtio_devices::Vsock::new( 2657 id.clone(), 2658 vsock_cfg.cid, 2659 vsock_cfg.socket.clone(), 2660 backend, 2661 self.force_iommu | vsock_cfg.iommu, 2662 self.seccomp_action.clone(), 2663 self.exit_evt 2664 .try_clone() 2665 .map_err(DeviceManagerError::EventFd)?, 2666 ) 2667 .map_err(DeviceManagerError::CreateVirtioVsock)?, 2668 )); 2669 2670 // Fill the device tree with a new node. In case of restore, we 2671 // know there is nothing to do, so we can simply override the 2672 // existing entry. 2673 self.device_tree 2674 .lock() 2675 .unwrap() 2676 .insert(id.clone(), device_node!(id, vsock_device)); 2677 2678 Ok(MetaVirtioDevice { 2679 virtio_device: Arc::clone(&vsock_device) 2680 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2681 iommu: vsock_cfg.iommu, 2682 id, 2683 pci_segment: vsock_cfg.pci_segment, 2684 dma_handler: None, 2685 }) 2686 } 2687 2688 fn make_virtio_vsock_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2689 let mut devices = Vec::new(); 2690 2691 let mut vsock = self.config.lock().unwrap().vsock.clone(); 2692 if let Some(ref mut vsock_cfg) = &mut vsock { 2693 devices.push(self.make_virtio_vsock_device(vsock_cfg)?); 2694 } 2695 self.config.lock().unwrap().vsock = vsock; 2696 2697 Ok(devices) 2698 } 2699 2700 fn make_virtio_mem_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2701 let mut devices = Vec::new(); 2702 2703 let mm = self.memory_manager.clone(); 2704 let mm = mm.lock().unwrap(); 2705 for (memory_zone_id, memory_zone) in mm.memory_zones().iter() { 2706 if let Some(virtio_mem_zone) = memory_zone.virtio_mem_zone() { 2707 info!("Creating virtio-mem device: id = {}", memory_zone_id); 2708 2709 let node_id = numa_node_id_from_memory_zone_id(&self.numa_nodes, memory_zone_id) 2710 .map(|i| i as u16); 2711 2712 let virtio_mem_device = Arc::new(Mutex::new( 2713 virtio_devices::Mem::new( 2714 memory_zone_id.clone(), 2715 virtio_mem_zone.region(), 2716 virtio_mem_zone 2717 .resize_handler() 2718 .new_resize_sender() 2719 .map_err(DeviceManagerError::CreateResizeSender)?, 2720 self.seccomp_action.clone(), 2721 node_id, 2722 virtio_mem_zone.hotplugged_size(), 2723 virtio_mem_zone.hugepages(), 2724 self.exit_evt 2725 .try_clone() 2726 .map_err(DeviceManagerError::EventFd)?, 2727 virtio_mem_zone.blocks_state().clone(), 2728 ) 2729 .map_err(DeviceManagerError::CreateVirtioMem)?, 2730 )); 2731 2732 self.virtio_mem_devices.push(Arc::clone(&virtio_mem_device)); 2733 2734 devices.push(MetaVirtioDevice { 2735 virtio_device: Arc::clone(&virtio_mem_device) 2736 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2737 iommu: false, 2738 id: memory_zone_id.clone(), 2739 pci_segment: 0, 2740 dma_handler: None, 2741 }); 2742 2743 // Fill the device tree with a new node. In case of restore, we 2744 // know there is nothing to do, so we can simply override the 2745 // existing entry. 2746 self.device_tree.lock().unwrap().insert( 2747 memory_zone_id.clone(), 2748 device_node!(memory_zone_id, virtio_mem_device), 2749 ); 2750 } 2751 } 2752 2753 Ok(devices) 2754 } 2755 2756 fn make_virtio_balloon_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2757 let mut devices = Vec::new(); 2758 2759 if let Some(balloon_config) = &self.config.lock().unwrap().balloon { 2760 let id = String::from(BALLOON_DEVICE_NAME); 2761 info!("Creating virtio-balloon device: id = {}", id); 2762 2763 let virtio_balloon_device = Arc::new(Mutex::new( 2764 virtio_devices::Balloon::new( 2765 id.clone(), 2766 balloon_config.size, 2767 balloon_config.deflate_on_oom, 2768 balloon_config.free_page_reporting, 2769 self.seccomp_action.clone(), 2770 self.exit_evt 2771 .try_clone() 2772 .map_err(DeviceManagerError::EventFd)?, 2773 ) 2774 .map_err(DeviceManagerError::CreateVirtioBalloon)?, 2775 )); 2776 2777 self.balloon = Some(virtio_balloon_device.clone()); 2778 2779 devices.push(MetaVirtioDevice { 2780 virtio_device: Arc::clone(&virtio_balloon_device) 2781 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2782 iommu: false, 2783 id: id.clone(), 2784 pci_segment: 0, 2785 dma_handler: None, 2786 }); 2787 2788 self.device_tree 2789 .lock() 2790 .unwrap() 2791 .insert(id.clone(), device_node!(id, virtio_balloon_device)); 2792 } 2793 2794 Ok(devices) 2795 } 2796 2797 fn make_virtio_watchdog_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2798 let mut devices = Vec::new(); 2799 2800 if !self.config.lock().unwrap().watchdog { 2801 return Ok(devices); 2802 } 2803 2804 let id = String::from(WATCHDOG_DEVICE_NAME); 2805 info!("Creating virtio-watchdog device: id = {}", id); 2806 2807 let virtio_watchdog_device = Arc::new(Mutex::new( 2808 virtio_devices::Watchdog::new( 2809 id.clone(), 2810 self.reset_evt.try_clone().unwrap(), 2811 self.seccomp_action.clone(), 2812 self.exit_evt 2813 .try_clone() 2814 .map_err(DeviceManagerError::EventFd)?, 2815 ) 2816 .map_err(DeviceManagerError::CreateVirtioWatchdog)?, 2817 )); 2818 devices.push(MetaVirtioDevice { 2819 virtio_device: Arc::clone(&virtio_watchdog_device) 2820 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2821 iommu: false, 2822 id: id.clone(), 2823 pci_segment: 0, 2824 dma_handler: None, 2825 }); 2826 2827 self.device_tree 2828 .lock() 2829 .unwrap() 2830 .insert(id.clone(), device_node!(id, virtio_watchdog_device)); 2831 2832 Ok(devices) 2833 } 2834 2835 fn make_vdpa_device( 2836 &mut self, 2837 vdpa_cfg: &mut VdpaConfig, 2838 ) -> DeviceManagerResult<MetaVirtioDevice> { 2839 let id = if let Some(id) = &vdpa_cfg.id { 2840 id.clone() 2841 } else { 2842 let id = self.next_device_name(VDPA_DEVICE_NAME_PREFIX)?; 2843 vdpa_cfg.id = Some(id.clone()); 2844 id 2845 }; 2846 2847 info!("Creating vDPA device: {:?}", vdpa_cfg); 2848 2849 let device_path = vdpa_cfg 2850 .path 2851 .to_str() 2852 .ok_or(DeviceManagerError::CreateVdpaConvertPath)?; 2853 2854 let vdpa_device = Arc::new(Mutex::new( 2855 virtio_devices::Vdpa::new( 2856 id.clone(), 2857 device_path, 2858 self.memory_manager.lock().unwrap().guest_memory(), 2859 vdpa_cfg.num_queues as u16, 2860 ) 2861 .map_err(DeviceManagerError::CreateVdpa)?, 2862 )); 2863 2864 // Create the DMA handler that is required by the vDPA device 2865 let vdpa_mapping = Arc::new(VdpaDmaMapping::new( 2866 Arc::clone(&vdpa_device), 2867 Arc::new(self.memory_manager.lock().unwrap().guest_memory()), 2868 )); 2869 2870 self.device_tree 2871 .lock() 2872 .unwrap() 2873 .insert(id.clone(), device_node!(id)); 2874 2875 Ok(MetaVirtioDevice { 2876 virtio_device: vdpa_device as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2877 iommu: vdpa_cfg.iommu, 2878 id, 2879 pci_segment: vdpa_cfg.pci_segment, 2880 dma_handler: Some(vdpa_mapping), 2881 }) 2882 } 2883 2884 fn make_vdpa_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2885 let mut devices = Vec::new(); 2886 // Add vdpa if required 2887 let mut vdpa_devices = self.config.lock().unwrap().vdpa.clone(); 2888 if let Some(vdpa_list_cfg) = &mut vdpa_devices { 2889 for vdpa_cfg in vdpa_list_cfg.iter_mut() { 2890 devices.push(self.make_vdpa_device(vdpa_cfg)?); 2891 } 2892 } 2893 self.config.lock().unwrap().vdpa = vdpa_devices; 2894 2895 Ok(devices) 2896 } 2897 2898 fn next_device_name(&mut self, prefix: &str) -> DeviceManagerResult<String> { 2899 let start_id = self.device_id_cnt; 2900 loop { 2901 // Generate the temporary name. 2902 let name = format!("{}{}", prefix, self.device_id_cnt); 2903 // Increment the counter. 2904 self.device_id_cnt += Wrapping(1); 2905 // Check if the name is already in use. 2906 if !self.boot_id_list.contains(&name) 2907 && !self.device_tree.lock().unwrap().contains_key(&name) 2908 { 2909 return Ok(name); 2910 } 2911 2912 if self.device_id_cnt == start_id { 2913 // We went through a full loop and there's nothing else we can 2914 // do. 2915 break; 2916 } 2917 } 2918 Err(DeviceManagerError::NoAvailableDeviceName) 2919 } 2920 2921 fn add_passthrough_device( 2922 &mut self, 2923 device_cfg: &mut DeviceConfig, 2924 ) -> DeviceManagerResult<(PciBdf, String)> { 2925 // If the passthrough device has not been created yet, it is created 2926 // here and stored in the DeviceManager structure for future needs. 2927 if self.passthrough_device.is_none() { 2928 self.passthrough_device = Some( 2929 self.address_manager 2930 .vm 2931 .create_passthrough_device() 2932 .map_err(|e| DeviceManagerError::CreatePassthroughDevice(e.into()))?, 2933 ); 2934 } 2935 2936 self.add_vfio_device(device_cfg) 2937 } 2938 2939 fn create_vfio_container(&self) -> DeviceManagerResult<Arc<VfioContainer>> { 2940 let passthrough_device = self 2941 .passthrough_device 2942 .as_ref() 2943 .ok_or(DeviceManagerError::NoDevicePassthroughSupport)?; 2944 2945 // Safe because we know the RawFd is valid. 2946 // 2947 // This dup() is mandatory to be able to give full ownership of the 2948 // file descriptor to the DeviceFd::from_raw_fd() function later in 2949 // the code. 2950 // 2951 // This is particularly needed so that VfioContainer will still have 2952 // a valid file descriptor even if DeviceManager, and therefore the 2953 // passthrough_device are dropped. In case of Drop, the file descriptor 2954 // would be closed, but Linux would still have the duplicated file 2955 // descriptor opened from DeviceFd, preventing from unexpected behavior 2956 // where the VfioContainer would try to use a closed file descriptor. 2957 let dup_device_fd = unsafe { libc::dup(passthrough_device.as_raw_fd()) }; 2958 if dup_device_fd == -1 { 2959 return vmm_sys_util::errno::errno_result().map_err(DeviceManagerError::DupFd); 2960 } 2961 2962 assert!(passthrough_device.as_any().is::<DeviceFd>()); 2963 2964 // SAFETY the raw fd conversion here is safe because: 2965 // 1. When running on KVM or MSHV, passthrough_device wraps around DeviceFd. 2966 // 2. The conversion here extracts the raw fd and then turns the raw fd into a DeviceFd 2967 // of the same (correct) type. 2968 Ok(Arc::new( 2969 VfioContainer::new(Arc::new(unsafe { DeviceFd::from_raw_fd(dup_device_fd) })) 2970 .map_err(DeviceManagerError::VfioCreate)?, 2971 )) 2972 } 2973 2974 fn add_vfio_device( 2975 &mut self, 2976 device_cfg: &mut DeviceConfig, 2977 ) -> DeviceManagerResult<(PciBdf, String)> { 2978 let vfio_name = if let Some(id) = &device_cfg.id { 2979 id.clone() 2980 } else { 2981 let id = self.next_device_name(VFIO_DEVICE_NAME_PREFIX)?; 2982 device_cfg.id = Some(id.clone()); 2983 id 2984 }; 2985 2986 let (pci_segment_id, pci_device_bdf, resources) = 2987 self.pci_resources(&vfio_name, device_cfg.pci_segment)?; 2988 2989 let mut needs_dma_mapping = false; 2990 2991 // Here we create a new VFIO container for two reasons. Either this is 2992 // the first VFIO device, meaning we need a new VFIO container, which 2993 // will be shared with other VFIO devices. Or the new VFIO device is 2994 // attached to a vIOMMU, meaning we must create a dedicated VFIO 2995 // container. In the vIOMMU use case, we can't let all devices under 2996 // the same VFIO container since we couldn't map/unmap memory for each 2997 // device. That's simply because the map/unmap operations happen at the 2998 // VFIO container level. 2999 let vfio_container = if device_cfg.iommu { 3000 let vfio_container = self.create_vfio_container()?; 3001 3002 let vfio_mapping = Arc::new(VfioDmaMapping::new( 3003 Arc::clone(&vfio_container), 3004 Arc::new(self.memory_manager.lock().unwrap().guest_memory()), 3005 )); 3006 3007 if let Some(iommu) = &self.iommu_device { 3008 iommu 3009 .lock() 3010 .unwrap() 3011 .add_external_mapping(pci_device_bdf.into(), vfio_mapping); 3012 } else { 3013 return Err(DeviceManagerError::MissingVirtualIommu); 3014 } 3015 3016 vfio_container 3017 } else if let Some(vfio_container) = &self.vfio_container { 3018 Arc::clone(vfio_container) 3019 } else { 3020 let vfio_container = self.create_vfio_container()?; 3021 needs_dma_mapping = true; 3022 self.vfio_container = Some(Arc::clone(&vfio_container)); 3023 3024 vfio_container 3025 }; 3026 3027 let vfio_device = VfioDevice::new(&device_cfg.path, Arc::clone(&vfio_container)) 3028 .map_err(DeviceManagerError::VfioCreate)?; 3029 3030 if needs_dma_mapping { 3031 // Register DMA mapping in IOMMU. 3032 // Do not register virtio-mem regions, as they are handled directly by 3033 // virtio-mem device itself. 3034 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() { 3035 for region in zone.regions() { 3036 vfio_container 3037 .vfio_dma_map( 3038 region.start_addr().raw_value(), 3039 region.len() as u64, 3040 region.as_ptr() as u64, 3041 ) 3042 .map_err(DeviceManagerError::VfioDmaMap)?; 3043 } 3044 } 3045 3046 let vfio_mapping = Arc::new(VfioDmaMapping::new( 3047 Arc::clone(&vfio_container), 3048 Arc::new(self.memory_manager.lock().unwrap().guest_memory()), 3049 )); 3050 3051 for virtio_mem_device in self.virtio_mem_devices.iter() { 3052 virtio_mem_device 3053 .lock() 3054 .unwrap() 3055 .add_dma_mapping_handler( 3056 VirtioMemMappingSource::Container, 3057 vfio_mapping.clone(), 3058 ) 3059 .map_err(DeviceManagerError::AddDmaMappingHandlerVirtioMem)?; 3060 } 3061 } 3062 3063 let legacy_interrupt_group = 3064 if let Some(legacy_interrupt_manager) = &self.legacy_interrupt_manager { 3065 Some( 3066 legacy_interrupt_manager 3067 .create_group(LegacyIrqGroupConfig { 3068 irq: self.pci_segments[pci_segment_id as usize].pci_irq_slots 3069 [pci_device_bdf.device() as usize] 3070 as InterruptIndex, 3071 }) 3072 .map_err(DeviceManagerError::CreateInterruptGroup)?, 3073 ) 3074 } else { 3075 None 3076 }; 3077 3078 let memory_manager = self.memory_manager.clone(); 3079 3080 let vfio_pci_device = VfioPciDevice::new( 3081 vfio_name.clone(), 3082 &self.address_manager.vm, 3083 vfio_device, 3084 vfio_container, 3085 self.msi_interrupt_manager.clone(), 3086 legacy_interrupt_group, 3087 device_cfg.iommu, 3088 pci_device_bdf, 3089 self.restoring, 3090 Arc::new(move || memory_manager.lock().unwrap().allocate_memory_slot()), 3091 ) 3092 .map_err(DeviceManagerError::VfioPciCreate)?; 3093 3094 let vfio_pci_device = Arc::new(Mutex::new(vfio_pci_device)); 3095 3096 let new_resources = self.add_pci_device( 3097 vfio_pci_device.clone(), 3098 vfio_pci_device.clone(), 3099 pci_segment_id, 3100 pci_device_bdf, 3101 resources, 3102 )?; 3103 3104 // When restoring a VM, the restore codepath will take care of mapping 3105 // the MMIO regions based on the information from the snapshot. 3106 if !self.restoring { 3107 vfio_pci_device 3108 .lock() 3109 .unwrap() 3110 .map_mmio_regions() 3111 .map_err(DeviceManagerError::VfioMapRegion)?; 3112 } 3113 3114 let mut node = device_node!(vfio_name, vfio_pci_device); 3115 3116 // Update the device tree with correct resource information. 3117 node.resources = new_resources; 3118 node.pci_bdf = Some(pci_device_bdf); 3119 node.pci_device_handle = Some(PciDeviceHandle::Vfio(vfio_pci_device)); 3120 3121 self.device_tree 3122 .lock() 3123 .unwrap() 3124 .insert(vfio_name.clone(), node); 3125 3126 Ok((pci_device_bdf, vfio_name)) 3127 } 3128 3129 fn add_pci_device( 3130 &mut self, 3131 bus_device: Arc<Mutex<dyn BusDevice>>, 3132 pci_device: Arc<Mutex<dyn PciDevice>>, 3133 segment_id: u16, 3134 bdf: PciBdf, 3135 resources: Option<Vec<Resource>>, 3136 ) -> DeviceManagerResult<Vec<Resource>> { 3137 let bars = pci_device 3138 .lock() 3139 .unwrap() 3140 .allocate_bars( 3141 &self.address_manager.allocator, 3142 &mut self.pci_segments[segment_id as usize] 3143 .allocator 3144 .lock() 3145 .unwrap(), 3146 resources, 3147 ) 3148 .map_err(DeviceManagerError::AllocateBars)?; 3149 3150 let mut pci_bus = self.pci_segments[segment_id as usize] 3151 .pci_bus 3152 .lock() 3153 .unwrap(); 3154 3155 pci_bus 3156 .add_device(bdf.device() as u32, pci_device) 3157 .map_err(DeviceManagerError::AddPciDevice)?; 3158 3159 self.bus_devices.push(Arc::clone(&bus_device)); 3160 3161 pci_bus 3162 .register_mapping( 3163 bus_device, 3164 #[cfg(target_arch = "x86_64")] 3165 self.address_manager.io_bus.as_ref(), 3166 self.address_manager.mmio_bus.as_ref(), 3167 bars.clone(), 3168 ) 3169 .map_err(DeviceManagerError::AddPciDevice)?; 3170 3171 let mut new_resources = Vec::new(); 3172 for bar in bars { 3173 new_resources.push(Resource::PciBar { 3174 index: bar.idx(), 3175 base: bar.addr(), 3176 size: bar.size(), 3177 type_: bar.region_type().into(), 3178 prefetchable: bar.prefetchable().into(), 3179 }); 3180 } 3181 3182 Ok(new_resources) 3183 } 3184 3185 fn add_vfio_devices(&mut self) -> DeviceManagerResult<Vec<PciBdf>> { 3186 let mut iommu_attached_device_ids = Vec::new(); 3187 let mut devices = self.config.lock().unwrap().devices.clone(); 3188 3189 if let Some(device_list_cfg) = &mut devices { 3190 for device_cfg in device_list_cfg.iter_mut() { 3191 let (device_id, _) = self.add_passthrough_device(device_cfg)?; 3192 if device_cfg.iommu && self.iommu_device.is_some() { 3193 iommu_attached_device_ids.push(device_id); 3194 } 3195 } 3196 } 3197 3198 // Update the list of devices 3199 self.config.lock().unwrap().devices = devices; 3200 3201 Ok(iommu_attached_device_ids) 3202 } 3203 3204 fn add_vfio_user_device( 3205 &mut self, 3206 device_cfg: &mut UserDeviceConfig, 3207 ) -> DeviceManagerResult<(PciBdf, String)> { 3208 let vfio_user_name = if let Some(id) = &device_cfg.id { 3209 id.clone() 3210 } else { 3211 let id = self.next_device_name(VFIO_USER_DEVICE_NAME_PREFIX)?; 3212 device_cfg.id = Some(id.clone()); 3213 id 3214 }; 3215 3216 let (pci_segment_id, pci_device_bdf, resources) = 3217 self.pci_resources(&vfio_user_name, device_cfg.pci_segment)?; 3218 3219 let legacy_interrupt_group = 3220 if let Some(legacy_interrupt_manager) = &self.legacy_interrupt_manager { 3221 Some( 3222 legacy_interrupt_manager 3223 .create_group(LegacyIrqGroupConfig { 3224 irq: self.pci_segments[pci_segment_id as usize].pci_irq_slots 3225 [pci_device_bdf.device() as usize] 3226 as InterruptIndex, 3227 }) 3228 .map_err(DeviceManagerError::CreateInterruptGroup)?, 3229 ) 3230 } else { 3231 None 3232 }; 3233 3234 let client = Arc::new(Mutex::new( 3235 vfio_user::Client::new(&device_cfg.socket) 3236 .map_err(DeviceManagerError::VfioUserCreateClient)?, 3237 )); 3238 3239 let memory_manager = self.memory_manager.clone(); 3240 3241 let mut vfio_user_pci_device = VfioUserPciDevice::new( 3242 vfio_user_name.clone(), 3243 &self.address_manager.vm, 3244 client.clone(), 3245 self.msi_interrupt_manager.clone(), 3246 legacy_interrupt_group, 3247 pci_device_bdf, 3248 self.restoring, 3249 Arc::new(move || memory_manager.lock().unwrap().allocate_memory_slot()), 3250 ) 3251 .map_err(DeviceManagerError::VfioUserCreate)?; 3252 3253 let memory = self.memory_manager.lock().unwrap().guest_memory(); 3254 let vfio_user_mapping = Arc::new(VfioUserDmaMapping::new(client, Arc::new(memory))); 3255 for virtio_mem_device in self.virtio_mem_devices.iter() { 3256 virtio_mem_device 3257 .lock() 3258 .unwrap() 3259 .add_dma_mapping_handler( 3260 VirtioMemMappingSource::Device(pci_device_bdf.into()), 3261 vfio_user_mapping.clone(), 3262 ) 3263 .map_err(DeviceManagerError::AddDmaMappingHandlerVirtioMem)?; 3264 } 3265 3266 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() { 3267 for region in zone.regions() { 3268 vfio_user_pci_device 3269 .dma_map(region) 3270 .map_err(DeviceManagerError::VfioUserDmaMap)?; 3271 } 3272 } 3273 3274 let vfio_user_pci_device = Arc::new(Mutex::new(vfio_user_pci_device)); 3275 3276 let new_resources = self.add_pci_device( 3277 vfio_user_pci_device.clone(), 3278 vfio_user_pci_device.clone(), 3279 pci_segment_id, 3280 pci_device_bdf, 3281 resources, 3282 )?; 3283 3284 // When restoring a VM, the restore codepath will take care of mapping 3285 // the MMIO regions based on the information from the snapshot. 3286 if !self.restoring { 3287 // Note it is required to call 'add_pci_device()' in advance to have the list of 3288 // mmio regions provisioned correctly 3289 vfio_user_pci_device 3290 .lock() 3291 .unwrap() 3292 .map_mmio_regions() 3293 .map_err(DeviceManagerError::VfioUserMapRegion)?; 3294 } 3295 3296 let mut node = device_node!(vfio_user_name, vfio_user_pci_device); 3297 3298 // Update the device tree with correct resource information. 3299 node.resources = new_resources; 3300 node.pci_bdf = Some(pci_device_bdf); 3301 node.pci_device_handle = Some(PciDeviceHandle::VfioUser(vfio_user_pci_device)); 3302 3303 self.device_tree 3304 .lock() 3305 .unwrap() 3306 .insert(vfio_user_name.clone(), node); 3307 3308 Ok((pci_device_bdf, vfio_user_name)) 3309 } 3310 3311 fn add_user_devices(&mut self) -> DeviceManagerResult<Vec<PciBdf>> { 3312 let mut user_devices = self.config.lock().unwrap().user_devices.clone(); 3313 3314 if let Some(device_list_cfg) = &mut user_devices { 3315 for device_cfg in device_list_cfg.iter_mut() { 3316 let (_device_id, _id) = self.add_vfio_user_device(device_cfg)?; 3317 } 3318 } 3319 3320 // Update the list of devices 3321 self.config.lock().unwrap().user_devices = user_devices; 3322 3323 Ok(vec![]) 3324 } 3325 3326 fn add_virtio_pci_device( 3327 &mut self, 3328 virtio_device: Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 3329 iommu_mapping: &Option<Arc<IommuMapping>>, 3330 virtio_device_id: String, 3331 pci_segment_id: u16, 3332 dma_handler: Option<Arc<dyn ExternalDmaMapping>>, 3333 ) -> DeviceManagerResult<PciBdf> { 3334 let id = format!("{}-{}", VIRTIO_PCI_DEVICE_NAME_PREFIX, virtio_device_id); 3335 3336 // Add the new virtio-pci node to the device tree. 3337 let mut node = device_node!(id); 3338 node.children = vec![virtio_device_id.clone()]; 3339 3340 let (pci_segment_id, pci_device_bdf, resources) = 3341 self.pci_resources(&id, pci_segment_id)?; 3342 3343 // Update the existing virtio node by setting the parent. 3344 if let Some(node) = self.device_tree.lock().unwrap().get_mut(&virtio_device_id) { 3345 node.parent = Some(id.clone()); 3346 } else { 3347 return Err(DeviceManagerError::MissingNode); 3348 } 3349 3350 // Allows support for one MSI-X vector per queue. It also adds 1 3351 // as we need to take into account the dedicated vector to notify 3352 // about a virtio config change. 3353 let msix_num = (virtio_device.lock().unwrap().queue_max_sizes().len() + 1) as u16; 3354 3355 // Create the AccessPlatform trait from the implementation IommuMapping. 3356 // This will provide address translation for any virtio device sitting 3357 // behind a vIOMMU. 3358 let access_platform: Option<Arc<dyn AccessPlatform>> = if let Some(mapping) = iommu_mapping 3359 { 3360 Some(Arc::new(AccessPlatformMapping::new( 3361 pci_device_bdf.into(), 3362 mapping.clone(), 3363 ))) 3364 } else { 3365 None 3366 }; 3367 3368 let memory = self.memory_manager.lock().unwrap().guest_memory(); 3369 3370 // Map DMA ranges if a DMA handler is available and if the device is 3371 // not attached to a virtual IOMMU. 3372 if let Some(dma_handler) = &dma_handler { 3373 if iommu_mapping.is_some() { 3374 if let Some(iommu) = &self.iommu_device { 3375 iommu 3376 .lock() 3377 .unwrap() 3378 .add_external_mapping(pci_device_bdf.into(), dma_handler.clone()); 3379 } else { 3380 return Err(DeviceManagerError::MissingVirtualIommu); 3381 } 3382 } else { 3383 // Let every virtio-mem device handle the DMA map/unmap through the 3384 // DMA handler provided. 3385 for virtio_mem_device in self.virtio_mem_devices.iter() { 3386 virtio_mem_device 3387 .lock() 3388 .unwrap() 3389 .add_dma_mapping_handler( 3390 VirtioMemMappingSource::Device(pci_device_bdf.into()), 3391 dma_handler.clone(), 3392 ) 3393 .map_err(DeviceManagerError::AddDmaMappingHandlerVirtioMem)?; 3394 } 3395 3396 // Do not register virtio-mem regions, as they are handled directly by 3397 // virtio-mem devices. 3398 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() { 3399 for region in zone.regions() { 3400 let gpa = region.start_addr().0; 3401 let size = region.len(); 3402 dma_handler 3403 .map(gpa, gpa, size) 3404 .map_err(DeviceManagerError::VirtioDmaMap)?; 3405 } 3406 } 3407 } 3408 } 3409 3410 let device_type = virtio_device.lock().unwrap().device_type(); 3411 let virtio_pci_device = Arc::new(Mutex::new( 3412 VirtioPciDevice::new( 3413 id.clone(), 3414 memory, 3415 virtio_device, 3416 msix_num, 3417 access_platform, 3418 &self.msi_interrupt_manager, 3419 pci_device_bdf.into(), 3420 self.activate_evt 3421 .try_clone() 3422 .map_err(DeviceManagerError::EventFd)?, 3423 // All device types *except* virtio block devices should be allocated a 64-bit bar 3424 // The block devices should be given a 32-bit BAR so that they are easily accessible 3425 // to firmware without requiring excessive identity mapping. 3426 // The exception being if not on the default PCI segment. 3427 pci_segment_id > 0 || device_type != VirtioDeviceType::Block as u32, 3428 dma_handler, 3429 self.pending_activations.clone(), 3430 ) 3431 .map_err(DeviceManagerError::VirtioDevice)?, 3432 )); 3433 3434 let new_resources = self.add_pci_device( 3435 virtio_pci_device.clone(), 3436 virtio_pci_device.clone(), 3437 pci_segment_id, 3438 pci_device_bdf, 3439 resources, 3440 )?; 3441 3442 let bar_addr = virtio_pci_device.lock().unwrap().config_bar_addr(); 3443 for (event, addr) in virtio_pci_device.lock().unwrap().ioeventfds(bar_addr) { 3444 let io_addr = IoEventAddress::Mmio(addr); 3445 self.address_manager 3446 .vm 3447 .register_ioevent(event, &io_addr, None) 3448 .map_err(|e| DeviceManagerError::RegisterIoevent(e.into()))?; 3449 } 3450 3451 // Update the device tree with correct resource information. 3452 node.resources = new_resources; 3453 node.migratable = Some(Arc::clone(&virtio_pci_device) as Arc<Mutex<dyn Migratable>>); 3454 node.pci_bdf = Some(pci_device_bdf); 3455 node.pci_device_handle = Some(PciDeviceHandle::Virtio(virtio_pci_device)); 3456 self.device_tree.lock().unwrap().insert(id, node); 3457 3458 Ok(pci_device_bdf) 3459 } 3460 3461 fn pci_resources( 3462 &self, 3463 id: &str, 3464 pci_segment_id: u16, 3465 ) -> DeviceManagerResult<(u16, PciBdf, Option<Vec<Resource>>)> { 3466 // Look for the id in the device tree. If it can be found, that means 3467 // the device is being restored, otherwise it's created from scratch. 3468 Ok( 3469 if let Some(node) = self.device_tree.lock().unwrap().get(id) { 3470 info!("Restoring virtio-pci {} resources", id); 3471 let pci_device_bdf: PciBdf = node 3472 .pci_bdf 3473 .ok_or(DeviceManagerError::MissingDeviceNodePciBdf)?; 3474 let pci_segment_id = pci_device_bdf.segment(); 3475 3476 self.pci_segments[pci_segment_id as usize] 3477 .pci_bus 3478 .lock() 3479 .unwrap() 3480 .get_device_id(pci_device_bdf.device() as usize) 3481 .map_err(DeviceManagerError::GetPciDeviceId)?; 3482 3483 (pci_segment_id, pci_device_bdf, Some(node.resources.clone())) 3484 } else { 3485 let pci_device_bdf = 3486 self.pci_segments[pci_segment_id as usize].next_device_bdf()?; 3487 3488 (pci_segment_id, pci_device_bdf, None) 3489 }, 3490 ) 3491 } 3492 3493 #[cfg(target_arch = "x86_64")] 3494 pub fn io_bus(&self) -> &Arc<Bus> { 3495 &self.address_manager.io_bus 3496 } 3497 3498 pub fn mmio_bus(&self) -> &Arc<Bus> { 3499 &self.address_manager.mmio_bus 3500 } 3501 3502 pub fn allocator(&self) -> &Arc<Mutex<SystemAllocator>> { 3503 &self.address_manager.allocator 3504 } 3505 3506 pub fn interrupt_controller(&self) -> Option<Arc<Mutex<dyn InterruptController>>> { 3507 self.interrupt_controller 3508 .as_ref() 3509 .map(|ic| ic.clone() as Arc<Mutex<dyn InterruptController>>) 3510 } 3511 3512 #[cfg(target_arch = "x86_64")] 3513 // Used to provide a fast path for handling PIO exits 3514 pub fn pci_config_io(&self) -> Arc<Mutex<PciConfigIo>> { 3515 Arc::clone(self.pci_segments[0].pci_config_io.as_ref().unwrap()) 3516 } 3517 3518 pub(crate) fn pci_segments(&self) -> &Vec<PciSegment> { 3519 &self.pci_segments 3520 } 3521 3522 pub fn console(&self) -> &Arc<Console> { 3523 &self.console 3524 } 3525 3526 #[cfg(target_arch = "aarch64")] 3527 pub fn cmdline_additions(&self) -> &[String] { 3528 self.cmdline_additions.as_slice() 3529 } 3530 3531 pub fn update_memory(&self, new_region: &Arc<GuestRegionMmap>) -> DeviceManagerResult<()> { 3532 for handle in self.virtio_devices.iter() { 3533 handle 3534 .virtio_device 3535 .lock() 3536 .unwrap() 3537 .add_memory_region(new_region) 3538 .map_err(DeviceManagerError::UpdateMemoryForVirtioDevice)?; 3539 3540 if let Some(dma_handler) = &handle.dma_handler { 3541 if !handle.iommu { 3542 let gpa = new_region.start_addr().0; 3543 let size = new_region.len(); 3544 dma_handler 3545 .map(gpa, gpa, size) 3546 .map_err(DeviceManagerError::VirtioDmaMap)?; 3547 } 3548 } 3549 } 3550 3551 // Take care of updating the memory for VFIO PCI devices. 3552 if let Some(vfio_container) = &self.vfio_container { 3553 vfio_container 3554 .vfio_dma_map( 3555 new_region.start_addr().raw_value(), 3556 new_region.len() as u64, 3557 new_region.as_ptr() as u64, 3558 ) 3559 .map_err(DeviceManagerError::UpdateMemoryForVfioPciDevice)?; 3560 } 3561 3562 // Take care of updating the memory for vfio-user devices. 3563 { 3564 let device_tree = self.device_tree.lock().unwrap(); 3565 for pci_device_node in device_tree.pci_devices() { 3566 if let PciDeviceHandle::VfioUser(vfio_user_pci_device) = pci_device_node 3567 .pci_device_handle 3568 .as_ref() 3569 .ok_or(DeviceManagerError::MissingPciDevice)? 3570 { 3571 vfio_user_pci_device 3572 .lock() 3573 .unwrap() 3574 .dma_map(new_region) 3575 .map_err(DeviceManagerError::UpdateMemoryForVfioUserPciDevice)?; 3576 } 3577 } 3578 } 3579 3580 Ok(()) 3581 } 3582 3583 pub fn activate_virtio_devices(&self) -> DeviceManagerResult<()> { 3584 for mut activator in self.pending_activations.lock().unwrap().drain(..) { 3585 activator 3586 .activate() 3587 .map_err(DeviceManagerError::VirtioActivate)?; 3588 } 3589 Ok(()) 3590 } 3591 3592 pub fn notify_hotplug( 3593 &self, 3594 _notification_type: AcpiNotificationFlags, 3595 ) -> DeviceManagerResult<()> { 3596 return self 3597 .ged_notification_device 3598 .as_ref() 3599 .unwrap() 3600 .lock() 3601 .unwrap() 3602 .notify(_notification_type) 3603 .map_err(DeviceManagerError::HotPlugNotification); 3604 } 3605 3606 pub fn add_device( 3607 &mut self, 3608 device_cfg: &mut DeviceConfig, 3609 ) -> DeviceManagerResult<PciDeviceInfo> { 3610 self.validate_identifier(&device_cfg.id)?; 3611 3612 if device_cfg.iommu && !self.is_iommu_segment(device_cfg.pci_segment) { 3613 return Err(DeviceManagerError::InvalidIommuHotplug); 3614 } 3615 3616 let (bdf, device_name) = self.add_passthrough_device(device_cfg)?; 3617 3618 // Update the PCIU bitmap 3619 self.pci_segments[device_cfg.pci_segment as usize].pci_devices_up |= 1 << bdf.device(); 3620 3621 Ok(PciDeviceInfo { 3622 id: device_name, 3623 bdf, 3624 }) 3625 } 3626 3627 pub fn add_user_device( 3628 &mut self, 3629 device_cfg: &mut UserDeviceConfig, 3630 ) -> DeviceManagerResult<PciDeviceInfo> { 3631 self.validate_identifier(&device_cfg.id)?; 3632 3633 let (bdf, device_name) = self.add_vfio_user_device(device_cfg)?; 3634 3635 // Update the PCIU bitmap 3636 self.pci_segments[device_cfg.pci_segment as usize].pci_devices_up |= 1 << bdf.device(); 3637 3638 Ok(PciDeviceInfo { 3639 id: device_name, 3640 bdf, 3641 }) 3642 } 3643 3644 pub fn remove_device(&mut self, id: String) -> DeviceManagerResult<()> { 3645 // The node can be directly a PCI node in case the 'id' refers to a 3646 // VFIO device or a virtio-pci one. 3647 // In case the 'id' refers to a virtio device, we must find the PCI 3648 // node by looking at the parent. 3649 let device_tree = self.device_tree.lock().unwrap(); 3650 let node = device_tree 3651 .get(&id) 3652 .ok_or(DeviceManagerError::UnknownDeviceId(id))?; 3653 3654 let pci_device_node = if node.pci_bdf.is_some() && node.pci_device_handle.is_some() { 3655 node 3656 } else { 3657 let parent = node 3658 .parent 3659 .as_ref() 3660 .ok_or(DeviceManagerError::MissingNode)?; 3661 device_tree 3662 .get(parent) 3663 .ok_or(DeviceManagerError::MissingNode)? 3664 }; 3665 3666 let pci_device_bdf: PciBdf = pci_device_node 3667 .pci_bdf 3668 .ok_or(DeviceManagerError::MissingDeviceNodePciBdf)?; 3669 let pci_segment_id = pci_device_bdf.segment(); 3670 3671 let pci_device_handle = pci_device_node 3672 .pci_device_handle 3673 .as_ref() 3674 .ok_or(DeviceManagerError::MissingPciDevice)?; 3675 #[allow(irrefutable_let_patterns)] 3676 if let PciDeviceHandle::Virtio(virtio_pci_device) = pci_device_handle { 3677 let device_type = VirtioDeviceType::from( 3678 virtio_pci_device 3679 .lock() 3680 .unwrap() 3681 .virtio_device() 3682 .lock() 3683 .unwrap() 3684 .device_type(), 3685 ); 3686 match device_type { 3687 VirtioDeviceType::Net 3688 | VirtioDeviceType::Block 3689 | VirtioDeviceType::Pmem 3690 | VirtioDeviceType::Fs 3691 | VirtioDeviceType::Vsock => {} 3692 _ => return Err(DeviceManagerError::RemovalNotAllowed(device_type)), 3693 } 3694 } 3695 3696 // Update the PCID bitmap 3697 self.pci_segments[pci_segment_id as usize].pci_devices_down |= 1 << pci_device_bdf.device(); 3698 3699 Ok(()) 3700 } 3701 3702 pub fn eject_device(&mut self, pci_segment_id: u16, device_id: u8) -> DeviceManagerResult<()> { 3703 info!( 3704 "Ejecting device_id = {} on segment_id={}", 3705 device_id, pci_segment_id 3706 ); 3707 3708 // Convert the device ID into the corresponding b/d/f. 3709 let pci_device_bdf = PciBdf::new(pci_segment_id, 0, device_id, 0); 3710 3711 // Give the PCI device ID back to the PCI bus. 3712 self.pci_segments[pci_segment_id as usize] 3713 .pci_bus 3714 .lock() 3715 .unwrap() 3716 .put_device_id(device_id as usize) 3717 .map_err(DeviceManagerError::PutPciDeviceId)?; 3718 3719 // Remove the device from the device tree along with its children. 3720 let mut device_tree = self.device_tree.lock().unwrap(); 3721 let pci_device_node = device_tree 3722 .remove_node_by_pci_bdf(pci_device_bdf) 3723 .ok_or(DeviceManagerError::MissingPciDevice)?; 3724 3725 // For VFIO and vfio-user the PCI device id is the id. 3726 // For virtio we overwrite it later as we want the id of the 3727 // underlying device. 3728 let mut id = pci_device_node.id; 3729 let pci_device_handle = pci_device_node 3730 .pci_device_handle 3731 .ok_or(DeviceManagerError::MissingPciDevice)?; 3732 if matches!(pci_device_handle, PciDeviceHandle::Virtio(_)) { 3733 // The virtio-pci device has a single child 3734 if !pci_device_node.children.is_empty() { 3735 assert_eq!(pci_device_node.children.len(), 1); 3736 let child_id = &pci_device_node.children[0]; 3737 id = child_id.clone(); 3738 } 3739 } 3740 for child in pci_device_node.children.iter() { 3741 device_tree.remove(child); 3742 } 3743 3744 let mut iommu_attached = false; 3745 if let Some((_, iommu_attached_devices)) = &self.iommu_attached_devices { 3746 if iommu_attached_devices.contains(&pci_device_bdf) { 3747 iommu_attached = true; 3748 } 3749 } 3750 3751 let (pci_device, bus_device, virtio_device, remove_dma_handler) = match pci_device_handle { 3752 // No need to remove any virtio-mem mapping here as the container outlives all devices 3753 PciDeviceHandle::Vfio(vfio_pci_device) => ( 3754 Arc::clone(&vfio_pci_device) as Arc<Mutex<dyn PciDevice>>, 3755 Arc::clone(&vfio_pci_device) as Arc<Mutex<dyn BusDevice>>, 3756 None as Option<Arc<Mutex<dyn virtio_devices::VirtioDevice>>>, 3757 false, 3758 ), 3759 PciDeviceHandle::Virtio(virtio_pci_device) => { 3760 let dev = virtio_pci_device.lock().unwrap(); 3761 let bar_addr = dev.config_bar_addr(); 3762 for (event, addr) in dev.ioeventfds(bar_addr) { 3763 let io_addr = IoEventAddress::Mmio(addr); 3764 self.address_manager 3765 .vm 3766 .unregister_ioevent(event, &io_addr) 3767 .map_err(|e| DeviceManagerError::UnRegisterIoevent(e.into()))?; 3768 } 3769 3770 if let Some(dma_handler) = dev.dma_handler() { 3771 if !iommu_attached { 3772 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() { 3773 for region in zone.regions() { 3774 let iova = region.start_addr().0; 3775 let size = region.len(); 3776 dma_handler 3777 .unmap(iova, size) 3778 .map_err(DeviceManagerError::VirtioDmaUnmap)?; 3779 } 3780 } 3781 } 3782 } 3783 3784 ( 3785 Arc::clone(&virtio_pci_device) as Arc<Mutex<dyn PciDevice>>, 3786 Arc::clone(&virtio_pci_device) as Arc<Mutex<dyn BusDevice>>, 3787 Some(dev.virtio_device()), 3788 dev.dma_handler().is_some() && !iommu_attached, 3789 ) 3790 } 3791 PciDeviceHandle::VfioUser(vfio_user_pci_device) => { 3792 let mut dev = vfio_user_pci_device.lock().unwrap(); 3793 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() { 3794 for region in zone.regions() { 3795 dev.dma_unmap(region) 3796 .map_err(DeviceManagerError::VfioUserDmaUnmap)?; 3797 } 3798 } 3799 3800 ( 3801 Arc::clone(&vfio_user_pci_device) as Arc<Mutex<dyn PciDevice>>, 3802 Arc::clone(&vfio_user_pci_device) as Arc<Mutex<dyn BusDevice>>, 3803 None as Option<Arc<Mutex<dyn virtio_devices::VirtioDevice>>>, 3804 true, 3805 ) 3806 } 3807 }; 3808 3809 if remove_dma_handler { 3810 for virtio_mem_device in self.virtio_mem_devices.iter() { 3811 virtio_mem_device 3812 .lock() 3813 .unwrap() 3814 .remove_dma_mapping_handler(VirtioMemMappingSource::Device( 3815 pci_device_bdf.into(), 3816 )) 3817 .map_err(DeviceManagerError::RemoveDmaMappingHandlerVirtioMem)?; 3818 } 3819 } 3820 3821 // Free the allocated BARs 3822 pci_device 3823 .lock() 3824 .unwrap() 3825 .free_bars( 3826 &mut self.address_manager.allocator.lock().unwrap(), 3827 &mut self.pci_segments[pci_segment_id as usize] 3828 .allocator 3829 .lock() 3830 .unwrap(), 3831 ) 3832 .map_err(DeviceManagerError::FreePciBars)?; 3833 3834 // Remove the device from the PCI bus 3835 self.pci_segments[pci_segment_id as usize] 3836 .pci_bus 3837 .lock() 3838 .unwrap() 3839 .remove_by_device(&pci_device) 3840 .map_err(DeviceManagerError::RemoveDeviceFromPciBus)?; 3841 3842 #[cfg(target_arch = "x86_64")] 3843 // Remove the device from the IO bus 3844 self.io_bus() 3845 .remove_by_device(&bus_device) 3846 .map_err(DeviceManagerError::RemoveDeviceFromIoBus)?; 3847 3848 // Remove the device from the MMIO bus 3849 self.mmio_bus() 3850 .remove_by_device(&bus_device) 3851 .map_err(DeviceManagerError::RemoveDeviceFromMmioBus)?; 3852 3853 // Remove the device from the list of BusDevice held by the 3854 // DeviceManager. 3855 self.bus_devices 3856 .retain(|dev| !Arc::ptr_eq(dev, &bus_device)); 3857 3858 // Shutdown and remove the underlying virtio-device if present 3859 if let Some(virtio_device) = virtio_device { 3860 for mapping in virtio_device.lock().unwrap().userspace_mappings() { 3861 self.memory_manager 3862 .lock() 3863 .unwrap() 3864 .remove_userspace_mapping( 3865 mapping.addr.raw_value(), 3866 mapping.len, 3867 mapping.host_addr, 3868 mapping.mergeable, 3869 mapping.mem_slot, 3870 ) 3871 .map_err(DeviceManagerError::MemoryManager)?; 3872 } 3873 3874 virtio_device.lock().unwrap().shutdown(); 3875 3876 self.virtio_devices 3877 .retain(|handler| !Arc::ptr_eq(&handler.virtio_device, &virtio_device)); 3878 } 3879 3880 event!( 3881 "vm", 3882 "device-removed", 3883 "id", 3884 &id, 3885 "bdf", 3886 pci_device_bdf.to_string() 3887 ); 3888 3889 // At this point, the device has been removed from all the list and 3890 // buses where it was stored. At the end of this function, after 3891 // any_device, bus_device and pci_device are released, the actual 3892 // device will be dropped. 3893 Ok(()) 3894 } 3895 3896 fn hotplug_virtio_pci_device( 3897 &mut self, 3898 handle: MetaVirtioDevice, 3899 ) -> DeviceManagerResult<PciDeviceInfo> { 3900 // Add the virtio device to the device manager list. This is important 3901 // as the list is used to notify virtio devices about memory updates 3902 // for instance. 3903 self.virtio_devices.push(handle.clone()); 3904 3905 let mapping: Option<Arc<IommuMapping>> = if handle.iommu { 3906 self.iommu_mapping.clone() 3907 } else { 3908 None 3909 }; 3910 3911 let bdf = self.add_virtio_pci_device( 3912 handle.virtio_device, 3913 &mapping, 3914 handle.id.clone(), 3915 handle.pci_segment, 3916 handle.dma_handler, 3917 )?; 3918 3919 // Update the PCIU bitmap 3920 self.pci_segments[handle.pci_segment as usize].pci_devices_up |= 1 << bdf.device(); 3921 3922 Ok(PciDeviceInfo { id: handle.id, bdf }) 3923 } 3924 3925 fn is_iommu_segment(&self, pci_segment_id: u16) -> bool { 3926 self.config 3927 .lock() 3928 .as_ref() 3929 .unwrap() 3930 .platform 3931 .as_ref() 3932 .map(|pc| { 3933 pc.iommu_segments 3934 .as_ref() 3935 .map(|v| v.contains(&pci_segment_id)) 3936 .unwrap_or_default() 3937 }) 3938 .unwrap_or_default() 3939 } 3940 3941 pub fn add_disk(&mut self, disk_cfg: &mut DiskConfig) -> DeviceManagerResult<PciDeviceInfo> { 3942 self.validate_identifier(&disk_cfg.id)?; 3943 3944 if disk_cfg.iommu && !self.is_iommu_segment(disk_cfg.pci_segment) { 3945 return Err(DeviceManagerError::InvalidIommuHotplug); 3946 } 3947 3948 let device = self.make_virtio_block_device(disk_cfg)?; 3949 self.hotplug_virtio_pci_device(device) 3950 } 3951 3952 pub fn add_fs(&mut self, fs_cfg: &mut FsConfig) -> DeviceManagerResult<PciDeviceInfo> { 3953 self.validate_identifier(&fs_cfg.id)?; 3954 3955 let device = self.make_virtio_fs_device(fs_cfg)?; 3956 self.hotplug_virtio_pci_device(device) 3957 } 3958 3959 pub fn add_pmem(&mut self, pmem_cfg: &mut PmemConfig) -> DeviceManagerResult<PciDeviceInfo> { 3960 self.validate_identifier(&pmem_cfg.id)?; 3961 3962 if pmem_cfg.iommu && !self.is_iommu_segment(pmem_cfg.pci_segment) { 3963 return Err(DeviceManagerError::InvalidIommuHotplug); 3964 } 3965 3966 let device = self.make_virtio_pmem_device(pmem_cfg)?; 3967 self.hotplug_virtio_pci_device(device) 3968 } 3969 3970 pub fn add_net(&mut self, net_cfg: &mut NetConfig) -> DeviceManagerResult<PciDeviceInfo> { 3971 self.validate_identifier(&net_cfg.id)?; 3972 3973 if net_cfg.iommu && !self.is_iommu_segment(net_cfg.pci_segment) { 3974 return Err(DeviceManagerError::InvalidIommuHotplug); 3975 } 3976 3977 let device = self.make_virtio_net_device(net_cfg)?; 3978 self.hotplug_virtio_pci_device(device) 3979 } 3980 3981 pub fn add_vdpa(&mut self, vdpa_cfg: &mut VdpaConfig) -> DeviceManagerResult<PciDeviceInfo> { 3982 self.validate_identifier(&vdpa_cfg.id)?; 3983 3984 if vdpa_cfg.iommu && !self.is_iommu_segment(vdpa_cfg.pci_segment) { 3985 return Err(DeviceManagerError::InvalidIommuHotplug); 3986 } 3987 3988 let device = self.make_vdpa_device(vdpa_cfg)?; 3989 self.hotplug_virtio_pci_device(device) 3990 } 3991 3992 pub fn add_vsock(&mut self, vsock_cfg: &mut VsockConfig) -> DeviceManagerResult<PciDeviceInfo> { 3993 self.validate_identifier(&vsock_cfg.id)?; 3994 3995 if vsock_cfg.iommu && !self.is_iommu_segment(vsock_cfg.pci_segment) { 3996 return Err(DeviceManagerError::InvalidIommuHotplug); 3997 } 3998 3999 let device = self.make_virtio_vsock_device(vsock_cfg)?; 4000 self.hotplug_virtio_pci_device(device) 4001 } 4002 4003 pub fn counters(&self) -> HashMap<String, HashMap<&'static str, Wrapping<u64>>> { 4004 let mut counters = HashMap::new(); 4005 4006 for handle in &self.virtio_devices { 4007 let virtio_device = handle.virtio_device.lock().unwrap(); 4008 if let Some(device_counters) = virtio_device.counters() { 4009 counters.insert(handle.id.clone(), device_counters.clone()); 4010 } 4011 } 4012 4013 counters 4014 } 4015 4016 pub fn resize_balloon(&mut self, size: u64) -> DeviceManagerResult<()> { 4017 if let Some(balloon) = &self.balloon { 4018 return balloon 4019 .lock() 4020 .unwrap() 4021 .resize(size) 4022 .map_err(DeviceManagerError::VirtioBalloonResize); 4023 } 4024 4025 warn!("No balloon setup: Can't resize the balloon"); 4026 Err(DeviceManagerError::MissingVirtioBalloon) 4027 } 4028 4029 pub fn balloon_size(&self) -> u64 { 4030 if let Some(balloon) = &self.balloon { 4031 return balloon.lock().unwrap().get_actual(); 4032 } 4033 4034 0 4035 } 4036 4037 pub fn device_tree(&self) -> Arc<Mutex<DeviceTree>> { 4038 self.device_tree.clone() 4039 } 4040 4041 pub fn restore_devices( 4042 &mut self, 4043 snapshot: Snapshot, 4044 ) -> std::result::Result<(), MigratableError> { 4045 // Finally, restore all devices associated with the DeviceManager. 4046 // It's important to restore devices in the right order, that's why 4047 // the device tree is the right way to ensure we restore a child before 4048 // its parent node. 4049 for node in self 4050 .device_tree 4051 .lock() 4052 .unwrap() 4053 .breadth_first_traversal() 4054 .rev() 4055 { 4056 // Restore the node 4057 if let Some(migratable) = &node.migratable { 4058 info!("Restoring {} from DeviceManager", node.id); 4059 if let Some(snapshot) = snapshot.snapshots.get(&node.id) { 4060 migratable.lock().unwrap().pause()?; 4061 migratable.lock().unwrap().restore(*snapshot.clone())?; 4062 } else { 4063 return Err(MigratableError::Restore(anyhow!( 4064 "Missing device {}", 4065 node.id 4066 ))); 4067 } 4068 } 4069 } 4070 4071 // The devices have been fully restored, we can now update the 4072 // restoring state of the DeviceManager. 4073 self.restoring = false; 4074 4075 Ok(()) 4076 } 4077 4078 #[cfg(target_arch = "x86_64")] 4079 pub fn notify_power_button(&self) -> DeviceManagerResult<()> { 4080 self.ged_notification_device 4081 .as_ref() 4082 .unwrap() 4083 .lock() 4084 .unwrap() 4085 .notify(AcpiNotificationFlags::POWER_BUTTON_CHANGED) 4086 .map_err(DeviceManagerError::PowerButtonNotification) 4087 } 4088 4089 #[cfg(target_arch = "aarch64")] 4090 pub fn notify_power_button(&self) -> DeviceManagerResult<()> { 4091 // There are two use cases: 4092 // 1. Users will use direct kernel boot with device tree. 4093 // 2. Users will use ACPI+UEFI boot. 4094 4095 // Trigger a GPIO pin 3 event to satisify use case 1. 4096 self.gpio_device 4097 .as_ref() 4098 .unwrap() 4099 .lock() 4100 .unwrap() 4101 .trigger_key(3) 4102 .map_err(DeviceManagerError::AArch64PowerButtonNotification)?; 4103 // Trigger a GED power button event to satisify use case 2. 4104 return self 4105 .ged_notification_device 4106 .as_ref() 4107 .unwrap() 4108 .lock() 4109 .unwrap() 4110 .notify(AcpiNotificationFlags::POWER_BUTTON_CHANGED) 4111 .map_err(DeviceManagerError::PowerButtonNotification); 4112 } 4113 4114 pub fn iommu_attached_devices(&self) -> &Option<(PciBdf, Vec<PciBdf>)> { 4115 &self.iommu_attached_devices 4116 } 4117 4118 #[cfg(target_arch = "aarch64")] 4119 pub fn uefi_flash(&self) -> GuestMemoryAtomic<GuestMemoryMmap> { 4120 self.uefi_flash.as_ref().unwrap().clone() 4121 } 4122 4123 fn validate_identifier(&self, id: &Option<String>) -> DeviceManagerResult<()> { 4124 if let Some(id) = id { 4125 if id.starts_with("__") { 4126 return Err(DeviceManagerError::InvalidIdentifier(id.clone())); 4127 } 4128 4129 if self.device_tree.lock().unwrap().contains_key(id) { 4130 return Err(DeviceManagerError::IdentifierNotUnique(id.clone())); 4131 } 4132 } 4133 4134 Ok(()) 4135 } 4136 } 4137 4138 fn numa_node_id_from_memory_zone_id(numa_nodes: &NumaNodes, memory_zone_id: &str) -> Option<u32> { 4139 for (numa_node_id, numa_node) in numa_nodes.iter() { 4140 if numa_node.memory_zones.contains(&memory_zone_id.to_owned()) { 4141 return Some(*numa_node_id); 4142 } 4143 } 4144 4145 None 4146 } 4147 4148 impl Aml for DeviceManager { 4149 fn append_aml_bytes(&self, bytes: &mut Vec<u8>) { 4150 #[cfg(target_arch = "aarch64")] 4151 use arch::aarch64::DeviceInfoForFdt; 4152 4153 let mut pci_scan_methods = Vec::new(); 4154 for i in 0..self.pci_segments.len() { 4155 pci_scan_methods.push(aml::MethodCall::new( 4156 format!("\\_SB_.PCI{:X}.PCNT", i).as_str().into(), 4157 vec![], 4158 )); 4159 } 4160 let mut pci_scan_inner: Vec<&dyn Aml> = Vec::new(); 4161 for method in &pci_scan_methods { 4162 pci_scan_inner.push(method) 4163 } 4164 4165 // PCI hotplug controller 4166 aml::Device::new( 4167 "_SB_.PHPR".into(), 4168 vec![ 4169 &aml::Name::new("_HID".into(), &aml::EisaName::new("PNP0A06")), 4170 &aml::Name::new("_STA".into(), &0x0bu8), 4171 &aml::Name::new("_UID".into(), &"PCI Hotplug Controller"), 4172 &aml::Mutex::new("BLCK".into(), 0), 4173 &aml::Name::new( 4174 "_CRS".into(), 4175 &aml::ResourceTemplate::new(vec![&aml::AddressSpace::new_memory( 4176 aml::AddressSpaceCachable::NotCacheable, 4177 true, 4178 self.acpi_address.0 as u64, 4179 self.acpi_address.0 + DEVICE_MANAGER_ACPI_SIZE as u64 - 1, 4180 )]), 4181 ), 4182 // OpRegion and Fields map MMIO range into individual field values 4183 &aml::OpRegion::new( 4184 "PCST".into(), 4185 aml::OpRegionSpace::SystemMemory, 4186 self.acpi_address.0 as usize, 4187 DEVICE_MANAGER_ACPI_SIZE, 4188 ), 4189 &aml::Field::new( 4190 "PCST".into(), 4191 aml::FieldAccessType::DWord, 4192 aml::FieldUpdateRule::WriteAsZeroes, 4193 vec![ 4194 aml::FieldEntry::Named(*b"PCIU", 32), 4195 aml::FieldEntry::Named(*b"PCID", 32), 4196 aml::FieldEntry::Named(*b"B0EJ", 32), 4197 aml::FieldEntry::Named(*b"PSEG", 32), 4198 ], 4199 ), 4200 &aml::Method::new( 4201 "PCEJ".into(), 4202 2, 4203 true, 4204 vec![ 4205 // Take lock defined above 4206 &aml::Acquire::new("BLCK".into(), 0xffff), 4207 // Choose the current segment 4208 &aml::Store::new(&aml::Path::new("PSEG"), &aml::Arg(1)), 4209 // Write PCI bus number (in first argument) to I/O port via field 4210 &aml::ShiftLeft::new(&aml::Path::new("B0EJ"), &aml::ONE, &aml::Arg(0)), 4211 // Release lock 4212 &aml::Release::new("BLCK".into()), 4213 // Return 0 4214 &aml::Return::new(&aml::ZERO), 4215 ], 4216 ), 4217 &aml::Method::new("PSCN".into(), 0, true, pci_scan_inner), 4218 ], 4219 ) 4220 .append_aml_bytes(bytes); 4221 4222 for segment in &self.pci_segments { 4223 segment.append_aml_bytes(bytes); 4224 } 4225 4226 let mut mbrd_memory = Vec::new(); 4227 4228 for segment in &self.pci_segments { 4229 mbrd_memory.push(aml::Memory32Fixed::new( 4230 true, 4231 segment.mmio_config_address as u32, 4232 layout::PCI_MMIO_CONFIG_SIZE_PER_SEGMENT as u32, 4233 )) 4234 } 4235 4236 let mut mbrd_memory_refs = Vec::new(); 4237 for mbrd_memory_ref in &mbrd_memory { 4238 mbrd_memory_refs.push(mbrd_memory_ref as &dyn Aml); 4239 } 4240 4241 aml::Device::new( 4242 "_SB_.MBRD".into(), 4243 vec![ 4244 &aml::Name::new("_HID".into(), &aml::EisaName::new("PNP0C02")), 4245 &aml::Name::new("_UID".into(), &aml::ZERO), 4246 &aml::Name::new("_CRS".into(), &aml::ResourceTemplate::new(mbrd_memory_refs)), 4247 ], 4248 ) 4249 .append_aml_bytes(bytes); 4250 4251 // Serial device 4252 #[cfg(target_arch = "x86_64")] 4253 let serial_irq = 4; 4254 #[cfg(target_arch = "aarch64")] 4255 let serial_irq = 4256 if self.config.lock().unwrap().serial.clone().mode != ConsoleOutputMode::Off { 4257 self.get_device_info() 4258 .clone() 4259 .get(&(DeviceType::Serial, DeviceType::Serial.to_string())) 4260 .unwrap() 4261 .irq() 4262 } else { 4263 // If serial is turned off, add a fake device with invalid irq. 4264 31 4265 }; 4266 if self.config.lock().unwrap().serial.mode != ConsoleOutputMode::Off { 4267 aml::Device::new( 4268 "_SB_.COM1".into(), 4269 vec![ 4270 &aml::Name::new( 4271 "_HID".into(), 4272 #[cfg(target_arch = "x86_64")] 4273 &aml::EisaName::new("PNP0501"), 4274 #[cfg(target_arch = "aarch64")] 4275 &"ARMH0011", 4276 ), 4277 &aml::Name::new("_UID".into(), &aml::ZERO), 4278 &aml::Name::new("_DDN".into(), &"COM1"), 4279 &aml::Name::new( 4280 "_CRS".into(), 4281 &aml::ResourceTemplate::new(vec![ 4282 &aml::Interrupt::new(true, true, false, false, serial_irq), 4283 #[cfg(target_arch = "x86_64")] 4284 &aml::Io::new(0x3f8, 0x3f8, 0, 0x8), 4285 #[cfg(target_arch = "aarch64")] 4286 &aml::Memory32Fixed::new( 4287 true, 4288 arch::layout::LEGACY_SERIAL_MAPPED_IO_START.raw_value() as u32, 4289 MMIO_LEN as u32, 4290 ), 4291 ]), 4292 ), 4293 ], 4294 ) 4295 .append_aml_bytes(bytes); 4296 } 4297 4298 aml::Name::new("_S5_".into(), &aml::Package::new(vec![&5u8])).append_aml_bytes(bytes); 4299 4300 aml::Device::new( 4301 "_SB_.PWRB".into(), 4302 vec![ 4303 &aml::Name::new("_HID".into(), &aml::EisaName::new("PNP0C0C")), 4304 &aml::Name::new("_UID".into(), &aml::ZERO), 4305 ], 4306 ) 4307 .append_aml_bytes(bytes); 4308 4309 self.ged_notification_device 4310 .as_ref() 4311 .unwrap() 4312 .lock() 4313 .unwrap() 4314 .append_aml_bytes(bytes); 4315 } 4316 } 4317 4318 impl Pausable for DeviceManager { 4319 fn pause(&mut self) -> result::Result<(), MigratableError> { 4320 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4321 if let Some(migratable) = &device_node.migratable { 4322 migratable.lock().unwrap().pause()?; 4323 } 4324 } 4325 // On AArch64, the pause of device manager needs to trigger 4326 // a "pause" of GIC, which will flush the GIC pending tables 4327 // and ITS tables to guest RAM. 4328 #[cfg(target_arch = "aarch64")] 4329 { 4330 self.get_interrupt_controller() 4331 .unwrap() 4332 .lock() 4333 .unwrap() 4334 .pause()?; 4335 }; 4336 4337 Ok(()) 4338 } 4339 4340 fn resume(&mut self) -> result::Result<(), MigratableError> { 4341 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4342 if let Some(migratable) = &device_node.migratable { 4343 migratable.lock().unwrap().resume()?; 4344 } 4345 } 4346 4347 Ok(()) 4348 } 4349 } 4350 4351 impl Snapshottable for DeviceManager { 4352 fn id(&self) -> String { 4353 DEVICE_MANAGER_SNAPSHOT_ID.to_string() 4354 } 4355 4356 fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> { 4357 let mut snapshot = Snapshot::new(DEVICE_MANAGER_SNAPSHOT_ID); 4358 4359 // We aggregate all devices snapshots. 4360 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4361 if let Some(migratable) = &device_node.migratable { 4362 let device_snapshot = migratable.lock().unwrap().snapshot()?; 4363 snapshot.add_snapshot(device_snapshot); 4364 } 4365 } 4366 4367 // Then we store the DeviceManager state. 4368 snapshot.add_data_section(SnapshotDataSection::new_from_state( 4369 DEVICE_MANAGER_SNAPSHOT_ID, 4370 &self.state(), 4371 )?); 4372 4373 Ok(snapshot) 4374 } 4375 4376 fn restore(&mut self, snapshot: Snapshot) -> std::result::Result<(), MigratableError> { 4377 // Let's first restore the DeviceManager. 4378 4379 self.set_state(&snapshot.to_state(DEVICE_MANAGER_SNAPSHOT_ID)?); 4380 4381 // Now that DeviceManager is updated with the right states, it's time 4382 // to create the devices based on the configuration. 4383 self.create_devices(None, None, None) 4384 .map_err(|e| MigratableError::Restore(anyhow!("Could not create devices {:?}", e)))?; 4385 4386 Ok(()) 4387 } 4388 } 4389 4390 impl Transportable for DeviceManager {} 4391 4392 impl Migratable for DeviceManager { 4393 fn start_dirty_log(&mut self) -> std::result::Result<(), MigratableError> { 4394 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4395 if let Some(migratable) = &device_node.migratable { 4396 migratable.lock().unwrap().start_dirty_log()?; 4397 } 4398 } 4399 Ok(()) 4400 } 4401 4402 fn stop_dirty_log(&mut self) -> std::result::Result<(), MigratableError> { 4403 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4404 if let Some(migratable) = &device_node.migratable { 4405 migratable.lock().unwrap().stop_dirty_log()?; 4406 } 4407 } 4408 Ok(()) 4409 } 4410 4411 fn dirty_log(&mut self) -> std::result::Result<MemoryRangeTable, MigratableError> { 4412 let mut tables = Vec::new(); 4413 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4414 if let Some(migratable) = &device_node.migratable { 4415 tables.push(migratable.lock().unwrap().dirty_log()?); 4416 } 4417 } 4418 Ok(MemoryRangeTable::new_from_tables(tables)) 4419 } 4420 4421 fn start_migration(&mut self) -> std::result::Result<(), MigratableError> { 4422 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4423 if let Some(migratable) = &device_node.migratable { 4424 migratable.lock().unwrap().start_migration()?; 4425 } 4426 } 4427 Ok(()) 4428 } 4429 4430 fn complete_migration(&mut self) -> std::result::Result<(), MigratableError> { 4431 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4432 if let Some(migratable) = &device_node.migratable { 4433 migratable.lock().unwrap().complete_migration()?; 4434 } 4435 } 4436 Ok(()) 4437 } 4438 } 4439 4440 const PCIU_FIELD_OFFSET: u64 = 0; 4441 const PCID_FIELD_OFFSET: u64 = 4; 4442 const B0EJ_FIELD_OFFSET: u64 = 8; 4443 const PSEG_FIELD_OFFSET: u64 = 12; 4444 const PCIU_FIELD_SIZE: usize = 4; 4445 const PCID_FIELD_SIZE: usize = 4; 4446 const B0EJ_FIELD_SIZE: usize = 4; 4447 const PSEG_FIELD_SIZE: usize = 4; 4448 4449 impl BusDevice for DeviceManager { 4450 fn read(&mut self, base: u64, offset: u64, data: &mut [u8]) { 4451 match offset { 4452 PCIU_FIELD_OFFSET => { 4453 assert!(data.len() == PCIU_FIELD_SIZE); 4454 data.copy_from_slice( 4455 &self.pci_segments[self.selected_segment] 4456 .pci_devices_up 4457 .to_le_bytes(), 4458 ); 4459 // Clear the PCIU bitmap 4460 self.pci_segments[self.selected_segment].pci_devices_up = 0; 4461 } 4462 PCID_FIELD_OFFSET => { 4463 assert!(data.len() == PCID_FIELD_SIZE); 4464 data.copy_from_slice( 4465 &self.pci_segments[self.selected_segment] 4466 .pci_devices_down 4467 .to_le_bytes(), 4468 ); 4469 // Clear the PCID bitmap 4470 self.pci_segments[self.selected_segment].pci_devices_down = 0; 4471 } 4472 B0EJ_FIELD_OFFSET => { 4473 assert!(data.len() == B0EJ_FIELD_SIZE); 4474 // Always return an empty bitmap since the eject is always 4475 // taken care of right away during a write access. 4476 data.fill(0); 4477 } 4478 PSEG_FIELD_OFFSET => { 4479 assert_eq!(data.len(), PSEG_FIELD_SIZE); 4480 data.copy_from_slice(&(self.selected_segment as u32).to_le_bytes()); 4481 } 4482 _ => error!( 4483 "Accessing unknown location at base 0x{:x}, offset 0x{:x}", 4484 base, offset 4485 ), 4486 } 4487 4488 debug!( 4489 "PCI_HP_REG_R: base 0x{:x}, offset 0x{:x}, data {:?}", 4490 base, offset, data 4491 ) 4492 } 4493 4494 fn write(&mut self, base: u64, offset: u64, data: &[u8]) -> Option<Arc<std::sync::Barrier>> { 4495 match offset { 4496 B0EJ_FIELD_OFFSET => { 4497 assert!(data.len() == B0EJ_FIELD_SIZE); 4498 let mut data_array: [u8; 4] = [0, 0, 0, 0]; 4499 data_array.copy_from_slice(data); 4500 let mut slot_bitmap = u32::from_le_bytes(data_array); 4501 4502 while slot_bitmap > 0 { 4503 let slot_id = slot_bitmap.trailing_zeros(); 4504 if let Err(e) = self.eject_device(self.selected_segment as u16, slot_id as u8) { 4505 error!("Failed ejecting device {}: {:?}", slot_id, e); 4506 } 4507 slot_bitmap &= !(1 << slot_id); 4508 } 4509 } 4510 PSEG_FIELD_OFFSET => { 4511 assert_eq!(data.len(), PSEG_FIELD_SIZE); 4512 let mut data_array: [u8; 4] = [0, 0, 0, 0]; 4513 data_array.copy_from_slice(data); 4514 let selected_segment = u32::from_le_bytes(data_array) as usize; 4515 if selected_segment >= self.pci_segments.len() { 4516 error!( 4517 "Segment selection out of range: {} >= {}", 4518 selected_segment, 4519 self.pci_segments.len() 4520 ); 4521 return None; 4522 } 4523 self.selected_segment = selected_segment; 4524 } 4525 _ => error!( 4526 "Accessing unknown location at base 0x{:x}, offset 0x{:x}", 4527 base, offset 4528 ), 4529 } 4530 4531 debug!( 4532 "PCI_HP_REG_W: base 0x{:x}, offset 0x{:x}, data {:?}", 4533 base, offset, data 4534 ); 4535 4536 None 4537 } 4538 } 4539 4540 impl Drop for DeviceManager { 4541 fn drop(&mut self) { 4542 for handle in self.virtio_devices.drain(..) { 4543 handle.virtio_device.lock().unwrap().shutdown(); 4544 } 4545 } 4546 } 4547