1 // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 // 3 // Portions Copyright 2017 The Chromium OS Authors. All rights reserved. 4 // Use of this source code is governed by a BSD-style license that can be 5 // found in the LICENSE-BSD-3-Clause file. 6 // 7 // Copyright © 2019 Intel Corporation 8 // 9 // SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause 10 // 11 12 use std::collections::{BTreeMap, BTreeSet, HashMap}; 13 use std::fs::{File, OpenOptions}; 14 use std::io::{self, stdout, IsTerminal, Seek, SeekFrom}; 15 use std::num::Wrapping; 16 use std::os::unix::fs::OpenOptionsExt; 17 use std::os::unix::io::{AsRawFd, FromRawFd}; 18 use std::path::PathBuf; 19 use std::result; 20 use std::sync::{Arc, Mutex}; 21 use std::time::Instant; 22 23 use acpi_tables::sdt::GenericAddress; 24 use acpi_tables::{aml, Aml}; 25 use anyhow::anyhow; 26 use arch::layout; 27 #[cfg(target_arch = "x86_64")] 28 use arch::layout::{APIC_START, IOAPIC_SIZE, IOAPIC_START}; 29 use arch::NumaNodes; 30 #[cfg(target_arch = "aarch64")] 31 use arch::{DeviceType, MmioDeviceInfo}; 32 use block::{ 33 async_io::DiskFile, block_aio_is_supported, block_io_uring_is_supported, detect_image_type, 34 fixed_vhd_sync::FixedVhdDiskSync, qcow, qcow_sync::QcowDiskSync, raw_async_aio::RawFileDiskAio, 35 raw_sync::RawFileDiskSync, vhdx, vhdx_sync::VhdxDiskSync, ImageType, 36 }; 37 #[cfg(feature = "io_uring")] 38 use block::{fixed_vhd_async::FixedVhdDiskAsync, raw_async::RawFileDisk}; 39 #[cfg(target_arch = "x86_64")] 40 use devices::debug_console::DebugConsole; 41 #[cfg(target_arch = "aarch64")] 42 use devices::gic; 43 #[cfg(target_arch = "x86_64")] 44 use devices::ioapic; 45 #[cfg(target_arch = "aarch64")] 46 use devices::legacy::Pl011; 47 #[cfg(feature = "pvmemcontrol")] 48 use devices::pvmemcontrol::{PvmemcontrolBusDevice, PvmemcontrolPciDevice}; 49 use devices::{ 50 interrupt_controller, interrupt_controller::InterruptController, AcpiNotificationFlags, 51 }; 52 use hypervisor::IoEventAddress; 53 use libc::{ 54 tcsetattr, termios, MAP_NORESERVE, MAP_PRIVATE, MAP_SHARED, O_TMPFILE, PROT_READ, PROT_WRITE, 55 TCSANOW, 56 }; 57 use pci::{ 58 DeviceRelocation, MmioRegion, PciBarRegionType, PciBdf, PciDevice, VfioDmaMapping, 59 VfioPciDevice, VfioUserDmaMapping, VfioUserPciDevice, VfioUserPciDeviceError, 60 }; 61 use rate_limiter::group::RateLimiterGroup; 62 use seccompiler::SeccompAction; 63 use serde::{Deserialize, Serialize}; 64 use tracer::trace_scoped; 65 use vfio_ioctls::{VfioContainer, VfioDevice, VfioDeviceFd}; 66 use virtio_devices::transport::VirtioTransport; 67 use virtio_devices::transport::{VirtioPciDevice, VirtioPciDeviceActivator}; 68 use virtio_devices::vhost_user::VhostUserConfig; 69 use virtio_devices::{ 70 AccessPlatformMapping, ActivateError, VdpaDmaMapping, VirtioMemMappingSource, 71 }; 72 use virtio_devices::{Endpoint, IommuMapping}; 73 use vm_allocator::{AddressAllocator, SystemAllocator}; 74 use vm_device::dma_mapping::ExternalDmaMapping; 75 use vm_device::interrupt::{ 76 InterruptIndex, InterruptManager, LegacyIrqGroupConfig, MsiIrqGroupConfig, 77 }; 78 use vm_device::{Bus, BusDevice, BusDeviceSync, Resource}; 79 use vm_memory::guest_memory::FileOffset; 80 use vm_memory::GuestMemoryRegion; 81 use vm_memory::{Address, GuestAddress, GuestUsize, MmapRegion}; 82 #[cfg(target_arch = "x86_64")] 83 use vm_memory::{GuestAddressSpace, GuestMemory}; 84 use vm_migration::{ 85 protocol::MemoryRangeTable, snapshot_from_id, state_from_id, Migratable, MigratableError, 86 Pausable, Snapshot, SnapshotData, Snapshottable, Transportable, 87 }; 88 use vm_virtio::AccessPlatform; 89 use vm_virtio::VirtioDeviceType; 90 use vmm_sys_util::eventfd::EventFd; 91 #[cfg(target_arch = "x86_64")] 92 use {devices::debug_console, devices::legacy::Serial}; 93 94 use crate::config::{ 95 ConsoleOutputMode, DeviceConfig, DiskConfig, FsConfig, NetConfig, PmemConfig, UserDeviceConfig, 96 VdpaConfig, VhostMode, VmConfig, VsockConfig, 97 }; 98 use crate::console_devices::{ConsoleDeviceError, ConsoleInfo, ConsoleOutput}; 99 use crate::cpu::{CpuManager, CPU_MANAGER_ACPI_SIZE}; 100 use crate::device_tree::{DeviceNode, DeviceTree}; 101 use crate::interrupt::LegacyUserspaceInterruptManager; 102 use crate::interrupt::MsiInterruptManager; 103 use crate::memory_manager::{Error as MemoryManagerError, MemoryManager, MEMORY_MANAGER_ACPI_SIZE}; 104 use crate::pci_segment::PciSegment; 105 use crate::serial_manager::{Error as SerialManagerError, SerialManager}; 106 use crate::vm_config::DEFAULT_PCI_SEGMENT_APERTURE_WEIGHT; 107 use crate::GuestRegionMmap; 108 use crate::PciDeviceInfo; 109 use crate::{device_node, DEVICE_MANAGER_SNAPSHOT_ID}; 110 111 #[cfg(target_arch = "aarch64")] 112 const MMIO_LEN: u64 = 0x1000; 113 114 // Singleton devices / devices the user cannot name 115 #[cfg(target_arch = "x86_64")] 116 const IOAPIC_DEVICE_NAME: &str = "__ioapic"; 117 const SERIAL_DEVICE_NAME: &str = "__serial"; 118 #[cfg(target_arch = "x86_64")] 119 const DEBUGCON_DEVICE_NAME: &str = "__debug_console"; 120 #[cfg(target_arch = "aarch64")] 121 const GPIO_DEVICE_NAME: &str = "__gpio"; 122 const RNG_DEVICE_NAME: &str = "__rng"; 123 const IOMMU_DEVICE_NAME: &str = "__iommu"; 124 #[cfg(feature = "pvmemcontrol")] 125 const PVMEMCONTROL_DEVICE_NAME: &str = "__pvmemcontrol"; 126 const BALLOON_DEVICE_NAME: &str = "__balloon"; 127 const CONSOLE_DEVICE_NAME: &str = "__console"; 128 const PVPANIC_DEVICE_NAME: &str = "__pvpanic"; 129 130 // Devices that the user may name and for which we generate 131 // identifiers if the user doesn't give one 132 const DISK_DEVICE_NAME_PREFIX: &str = "_disk"; 133 const FS_DEVICE_NAME_PREFIX: &str = "_fs"; 134 const NET_DEVICE_NAME_PREFIX: &str = "_net"; 135 const PMEM_DEVICE_NAME_PREFIX: &str = "_pmem"; 136 const VDPA_DEVICE_NAME_PREFIX: &str = "_vdpa"; 137 const VSOCK_DEVICE_NAME_PREFIX: &str = "_vsock"; 138 const WATCHDOG_DEVICE_NAME: &str = "__watchdog"; 139 const VFIO_DEVICE_NAME_PREFIX: &str = "_vfio"; 140 const VFIO_USER_DEVICE_NAME_PREFIX: &str = "_vfio_user"; 141 const VIRTIO_PCI_DEVICE_NAME_PREFIX: &str = "_virtio-pci"; 142 143 /// Errors associated with device manager 144 #[derive(Debug)] 145 pub enum DeviceManagerError { 146 /// Cannot create EventFd. 147 EventFd(io::Error), 148 149 /// Cannot open disk path 150 Disk(io::Error), 151 152 /// Cannot create vhost-user-net device 153 CreateVhostUserNet(virtio_devices::vhost_user::Error), 154 155 /// Cannot create virtio-blk device 156 CreateVirtioBlock(io::Error), 157 158 /// Cannot create virtio-net device 159 CreateVirtioNet(virtio_devices::net::Error), 160 161 /// Cannot create virtio-console device 162 CreateVirtioConsole(io::Error), 163 164 /// Cannot create virtio-rng device 165 CreateVirtioRng(io::Error), 166 167 /// Cannot create virtio-fs device 168 CreateVirtioFs(virtio_devices::vhost_user::Error), 169 170 /// Virtio-fs device was created without a socket. 171 NoVirtioFsSock, 172 173 /// Cannot create vhost-user-blk device 174 CreateVhostUserBlk(virtio_devices::vhost_user::Error), 175 176 /// Cannot create virtio-pmem device 177 CreateVirtioPmem(io::Error), 178 179 /// Cannot create vDPA device 180 CreateVdpa(virtio_devices::vdpa::Error), 181 182 /// Cannot create virtio-vsock device 183 CreateVirtioVsock(io::Error), 184 185 /// Cannot create tpm device 186 CreateTpmDevice(anyhow::Error), 187 188 /// Failed to convert Path to &str for the vDPA device. 189 CreateVdpaConvertPath, 190 191 /// Failed to convert Path to &str for the virtio-vsock device. 192 CreateVsockConvertPath, 193 194 /// Cannot create virtio-vsock backend 195 CreateVsockBackend(virtio_devices::vsock::VsockUnixError), 196 197 /// Cannot create virtio-iommu device 198 CreateVirtioIommu(io::Error), 199 200 /// Cannot create virtio-balloon device 201 CreateVirtioBalloon(io::Error), 202 203 /// Cannot create pvmemcontrol device 204 #[cfg(feature = "pvmemcontrol")] 205 CreatePvmemcontrol(io::Error), 206 207 /// Cannot create virtio-watchdog device 208 CreateVirtioWatchdog(io::Error), 209 210 /// Failed to parse disk image format 211 DetectImageType(io::Error), 212 213 /// Cannot open qcow disk path 214 QcowDeviceCreate(qcow::Error), 215 216 /// Cannot create serial manager 217 CreateSerialManager(SerialManagerError), 218 219 /// Cannot spawn the serial manager thread 220 SpawnSerialManager(SerialManagerError), 221 222 /// Cannot open tap interface 223 OpenTap(net_util::TapError), 224 225 /// Cannot allocate IRQ. 226 AllocateIrq, 227 228 /// Cannot configure the IRQ. 229 Irq(vmm_sys_util::errno::Error), 230 231 /// Cannot allocate PCI BARs 232 AllocateBars(pci::PciDeviceError), 233 234 /// Could not free the BARs associated with a PCI device. 235 FreePciBars(pci::PciDeviceError), 236 237 /// Cannot register ioevent. 238 RegisterIoevent(anyhow::Error), 239 240 /// Cannot unregister ioevent. 241 UnRegisterIoevent(anyhow::Error), 242 243 /// Cannot create virtio device 244 VirtioDevice(virtio_devices::transport::VirtioPciDeviceError), 245 246 /// Cannot add PCI device 247 AddPciDevice(pci::PciRootError), 248 249 /// Cannot open persistent memory file 250 PmemFileOpen(io::Error), 251 252 /// Cannot set persistent memory file size 253 PmemFileSetLen(io::Error), 254 255 /// Cannot find a memory range for persistent memory 256 PmemRangeAllocation, 257 258 /// Cannot find a memory range for virtio-fs 259 FsRangeAllocation, 260 261 /// Error creating serial output file 262 SerialOutputFileOpen(io::Error), 263 264 #[cfg(target_arch = "x86_64")] 265 /// Error creating debug-console output file 266 DebugconOutputFileOpen(io::Error), 267 268 /// Error creating console output file 269 ConsoleOutputFileOpen(io::Error), 270 271 /// Error creating serial pty 272 SerialPtyOpen(io::Error), 273 274 /// Error creating console pty 275 ConsolePtyOpen(io::Error), 276 277 /// Error creating console pty 278 DebugconPtyOpen(io::Error), 279 280 /// Error setting pty raw mode 281 SetPtyRaw(ConsoleDeviceError), 282 283 /// Error getting pty peer 284 GetPtyPeer(vmm_sys_util::errno::Error), 285 286 /// Cannot create a VFIO device 287 VfioCreate(vfio_ioctls::VfioError), 288 289 /// Cannot create a VFIO PCI device 290 VfioPciCreate(pci::VfioPciError), 291 292 /// Failed to map VFIO MMIO region. 293 VfioMapRegion(pci::VfioPciError), 294 295 /// Failed to DMA map VFIO device. 296 VfioDmaMap(vfio_ioctls::VfioError), 297 298 /// Failed to DMA unmap VFIO device. 299 VfioDmaUnmap(pci::VfioPciError), 300 301 /// Failed to create the passthrough device. 302 CreatePassthroughDevice(anyhow::Error), 303 304 /// Failed to memory map. 305 Mmap(io::Error), 306 307 /// Cannot add legacy device to Bus. 308 BusError(vm_device::BusError), 309 310 /// Failed to allocate IO port 311 AllocateIoPort, 312 313 /// Failed to allocate MMIO address 314 AllocateMmioAddress, 315 316 /// Failed to make hotplug notification 317 HotPlugNotification(io::Error), 318 319 /// Error from a memory manager operation 320 MemoryManager(MemoryManagerError), 321 322 /// Failed to create new interrupt source group. 323 CreateInterruptGroup(io::Error), 324 325 /// Failed to update interrupt source group. 326 UpdateInterruptGroup(io::Error), 327 328 /// Failed to create interrupt controller. 329 CreateInterruptController(interrupt_controller::Error), 330 331 /// Failed to create a new MmapRegion instance. 332 NewMmapRegion(vm_memory::mmap::MmapRegionError), 333 334 /// Failed to clone a File. 335 CloneFile(io::Error), 336 337 /// Failed to create socket file 338 CreateSocketFile(io::Error), 339 340 /// Failed to spawn the network backend 341 SpawnNetBackend(io::Error), 342 343 /// Failed to spawn the block backend 344 SpawnBlockBackend(io::Error), 345 346 /// Missing PCI bus. 347 NoPciBus, 348 349 /// Could not find an available device name. 350 NoAvailableDeviceName, 351 352 /// Missing PCI device. 353 MissingPciDevice, 354 355 /// Failed to remove a PCI device from the PCI bus. 356 RemoveDeviceFromPciBus(pci::PciRootError), 357 358 /// Failed to remove a bus device from the IO bus. 359 RemoveDeviceFromIoBus(vm_device::BusError), 360 361 /// Failed to remove a bus device from the MMIO bus. 362 RemoveDeviceFromMmioBus(vm_device::BusError), 363 364 /// Failed to find the device corresponding to a specific PCI b/d/f. 365 UnknownPciBdf(u32), 366 367 /// Not allowed to remove this type of device from the VM. 368 RemovalNotAllowed(vm_virtio::VirtioDeviceType), 369 370 /// Failed to find device corresponding to the given identifier. 371 UnknownDeviceId(String), 372 373 /// Failed to find an available PCI device ID. 374 NextPciDeviceId(pci::PciRootError), 375 376 /// Could not reserve the PCI device ID. 377 GetPciDeviceId(pci::PciRootError), 378 379 /// Could not give the PCI device ID back. 380 PutPciDeviceId(pci::PciRootError), 381 382 /// No disk path was specified when one was expected 383 NoDiskPath, 384 385 /// Failed to update guest memory for virtio device. 386 UpdateMemoryForVirtioDevice(virtio_devices::Error), 387 388 /// Cannot create virtio-mem device 389 CreateVirtioMem(io::Error), 390 391 /// Cannot find a memory range for virtio-mem memory 392 VirtioMemRangeAllocation, 393 394 /// Failed to update guest memory for VFIO PCI device. 395 UpdateMemoryForVfioPciDevice(vfio_ioctls::VfioError), 396 397 /// Trying to use a directory for pmem but no size specified 398 PmemWithDirectorySizeMissing, 399 400 /// Trying to use a size that is not multiple of 2MiB 401 PmemSizeNotAligned, 402 403 /// Could not find the node in the device tree. 404 MissingNode, 405 406 /// Resource was already found. 407 ResourceAlreadyExists, 408 409 /// Expected resources for virtio-pmem could not be found. 410 MissingVirtioPmemResources, 411 412 /// Missing PCI b/d/f from the DeviceNode. 413 MissingDeviceNodePciBdf, 414 415 /// No support for device passthrough 416 NoDevicePassthroughSupport, 417 418 /// No socket option support for console device 419 NoSocketOptionSupportForConsoleDevice, 420 421 /// Failed to resize virtio-balloon 422 VirtioBalloonResize(virtio_devices::balloon::Error), 423 424 /// Missing virtio-balloon, can't proceed as expected. 425 MissingVirtioBalloon, 426 427 /// Missing virtual IOMMU device 428 MissingVirtualIommu, 429 430 /// Failed to do power button notification 431 PowerButtonNotification(io::Error), 432 433 /// Failed to do AArch64 GPIO power button notification 434 #[cfg(target_arch = "aarch64")] 435 AArch64PowerButtonNotification(devices::legacy::GpioDeviceError), 436 437 /// Failed to set O_DIRECT flag to file descriptor 438 SetDirectIo, 439 440 /// Failed to create FixedVhdDiskAsync 441 CreateFixedVhdDiskAsync(io::Error), 442 443 /// Failed to create FixedVhdDiskSync 444 CreateFixedVhdDiskSync(io::Error), 445 446 /// Failed to create QcowDiskSync 447 CreateQcowDiskSync(qcow::Error), 448 449 /// Failed to create FixedVhdxDiskSync 450 CreateFixedVhdxDiskSync(vhdx::VhdxError), 451 452 /// Failed to add DMA mapping handler to virtio-mem device. 453 AddDmaMappingHandlerVirtioMem(virtio_devices::mem::Error), 454 455 /// Failed to remove DMA mapping handler from virtio-mem device. 456 RemoveDmaMappingHandlerVirtioMem(virtio_devices::mem::Error), 457 458 /// Failed to create vfio-user client 459 VfioUserCreateClient(vfio_user::Error), 460 461 /// Failed to create VFIO user device 462 VfioUserCreate(VfioUserPciDeviceError), 463 464 /// Failed to map region from VFIO user device into guest 465 VfioUserMapRegion(VfioUserPciDeviceError), 466 467 /// Failed to DMA map VFIO user device. 468 VfioUserDmaMap(VfioUserPciDeviceError), 469 470 /// Failed to DMA unmap VFIO user device. 471 VfioUserDmaUnmap(VfioUserPciDeviceError), 472 473 /// Failed to update memory mappings for VFIO user device 474 UpdateMemoryForVfioUserPciDevice(VfioUserPciDeviceError), 475 476 /// Cannot duplicate file descriptor 477 DupFd(vmm_sys_util::errno::Error), 478 479 /// Failed to DMA map virtio device. 480 VirtioDmaMap(std::io::Error), 481 482 /// Failed to DMA unmap virtio device. 483 VirtioDmaUnmap(std::io::Error), 484 485 /// Cannot hotplug device behind vIOMMU 486 InvalidIommuHotplug, 487 488 /// Invalid identifier as it is not unique. 489 IdentifierNotUnique(String), 490 491 /// Invalid identifier 492 InvalidIdentifier(String), 493 494 /// Error activating virtio device 495 VirtioActivate(ActivateError), 496 497 /// Failed retrieving device state from snapshot 498 RestoreGetState(MigratableError), 499 500 /// Cannot create a PvPanic device 501 PvPanicCreate(devices::pvpanic::PvPanicError), 502 503 /// Cannot create a RateLimiterGroup 504 RateLimiterGroupCreate(rate_limiter::group::Error), 505 506 /// Cannot start sigwinch listener 507 StartSigwinchListener(std::io::Error), 508 509 // Invalid console info 510 InvalidConsoleInfo, 511 512 // Invalid console fd 513 InvalidConsoleFd, 514 } 515 516 pub type DeviceManagerResult<T> = result::Result<T, DeviceManagerError>; 517 518 const DEVICE_MANAGER_ACPI_SIZE: usize = 0x10; 519 520 #[derive(Default)] 521 pub struct Console { 522 console_resizer: Option<Arc<virtio_devices::ConsoleResizer>>, 523 } 524 525 impl Console { 526 pub fn need_resize(&self) -> bool { 527 if let Some(_resizer) = self.console_resizer.as_ref() { 528 return true; 529 } 530 531 false 532 } 533 534 pub fn update_console_size(&self) { 535 if let Some(resizer) = self.console_resizer.as_ref() { 536 resizer.update_console_size() 537 } 538 } 539 } 540 541 pub(crate) struct AddressManager { 542 pub(crate) allocator: Arc<Mutex<SystemAllocator>>, 543 #[cfg(target_arch = "x86_64")] 544 pub(crate) io_bus: Arc<Bus>, 545 pub(crate) mmio_bus: Arc<Bus>, 546 pub(crate) vm: Arc<dyn hypervisor::Vm>, 547 device_tree: Arc<Mutex<DeviceTree>>, 548 pci_mmio32_allocators: Vec<Arc<Mutex<AddressAllocator>>>, 549 pci_mmio64_allocators: Vec<Arc<Mutex<AddressAllocator>>>, 550 } 551 552 impl DeviceRelocation for AddressManager { 553 fn move_bar( 554 &self, 555 old_base: u64, 556 new_base: u64, 557 len: u64, 558 pci_dev: &mut dyn PciDevice, 559 region_type: PciBarRegionType, 560 ) -> std::result::Result<(), std::io::Error> { 561 match region_type { 562 PciBarRegionType::IoRegion => { 563 #[cfg(target_arch = "x86_64")] 564 { 565 // Update system allocator 566 self.allocator 567 .lock() 568 .unwrap() 569 .free_io_addresses(GuestAddress(old_base), len as GuestUsize); 570 571 self.allocator 572 .lock() 573 .unwrap() 574 .allocate_io_addresses( 575 Some(GuestAddress(new_base)), 576 len as GuestUsize, 577 None, 578 ) 579 .ok_or_else(|| { 580 io::Error::new(io::ErrorKind::Other, "failed allocating new IO range") 581 })?; 582 583 // Update PIO bus 584 self.io_bus 585 .update_range(old_base, len, new_base, len) 586 .map_err(|e| io::Error::new(io::ErrorKind::Other, e))?; 587 } 588 #[cfg(target_arch = "aarch64")] 589 error!("I/O region is not supported"); 590 } 591 PciBarRegionType::Memory32BitRegion | PciBarRegionType::Memory64BitRegion => { 592 let allocators = if region_type == PciBarRegionType::Memory32BitRegion { 593 &self.pci_mmio32_allocators 594 } else { 595 &self.pci_mmio64_allocators 596 }; 597 598 // Find the specific allocator that this BAR was allocated from and use it for new one 599 for allocator in allocators { 600 let allocator_base = allocator.lock().unwrap().base(); 601 let allocator_end = allocator.lock().unwrap().end(); 602 603 if old_base >= allocator_base.0 && old_base <= allocator_end.0 { 604 allocator 605 .lock() 606 .unwrap() 607 .free(GuestAddress(old_base), len as GuestUsize); 608 609 allocator 610 .lock() 611 .unwrap() 612 .allocate(Some(GuestAddress(new_base)), len as GuestUsize, Some(len)) 613 .ok_or_else(|| { 614 io::Error::new( 615 io::ErrorKind::Other, 616 "failed allocating new MMIO range", 617 ) 618 })?; 619 620 break; 621 } 622 } 623 624 // Update MMIO bus 625 self.mmio_bus 626 .update_range(old_base, len, new_base, len) 627 .map_err(|e| io::Error::new(io::ErrorKind::Other, e))?; 628 } 629 } 630 631 // Update the device_tree resources associated with the device 632 if let Some(id) = pci_dev.id() { 633 if let Some(node) = self.device_tree.lock().unwrap().get_mut(&id) { 634 let mut resource_updated = false; 635 for resource in node.resources.iter_mut() { 636 if let Resource::PciBar { base, type_, .. } = resource { 637 if PciBarRegionType::from(*type_) == region_type && *base == old_base { 638 *base = new_base; 639 resource_updated = true; 640 break; 641 } 642 } 643 } 644 645 if !resource_updated { 646 return Err(io::Error::new( 647 io::ErrorKind::Other, 648 format!( 649 "Couldn't find a resource with base 0x{old_base:x} for device {id}" 650 ), 651 )); 652 } 653 } else { 654 return Err(io::Error::new( 655 io::ErrorKind::Other, 656 format!("Couldn't find device {id} from device tree"), 657 )); 658 } 659 } 660 661 let any_dev = pci_dev.as_any(); 662 if let Some(virtio_pci_dev) = any_dev.downcast_ref::<VirtioPciDevice>() { 663 let bar_addr = virtio_pci_dev.config_bar_addr(); 664 if bar_addr == new_base { 665 for (event, addr) in virtio_pci_dev.ioeventfds(old_base) { 666 let io_addr = IoEventAddress::Mmio(addr); 667 self.vm.unregister_ioevent(event, &io_addr).map_err(|e| { 668 io::Error::new( 669 io::ErrorKind::Other, 670 format!("failed to unregister ioevent: {e:?}"), 671 ) 672 })?; 673 } 674 for (event, addr) in virtio_pci_dev.ioeventfds(new_base) { 675 let io_addr = IoEventAddress::Mmio(addr); 676 self.vm 677 .register_ioevent(event, &io_addr, None) 678 .map_err(|e| { 679 io::Error::new( 680 io::ErrorKind::Other, 681 format!("failed to register ioevent: {e:?}"), 682 ) 683 })?; 684 } 685 } else { 686 let virtio_dev = virtio_pci_dev.virtio_device(); 687 let mut virtio_dev = virtio_dev.lock().unwrap(); 688 if let Some(mut shm_regions) = virtio_dev.get_shm_regions() { 689 if shm_regions.addr.raw_value() == old_base { 690 let mem_region = self.vm.make_user_memory_region( 691 shm_regions.mem_slot, 692 old_base, 693 shm_regions.len, 694 shm_regions.host_addr, 695 false, 696 false, 697 ); 698 699 self.vm.remove_user_memory_region(mem_region).map_err(|e| { 700 io::Error::new( 701 io::ErrorKind::Other, 702 format!("failed to remove user memory region: {e:?}"), 703 ) 704 })?; 705 706 // Create new mapping by inserting new region to KVM. 707 let mem_region = self.vm.make_user_memory_region( 708 shm_regions.mem_slot, 709 new_base, 710 shm_regions.len, 711 shm_regions.host_addr, 712 false, 713 false, 714 ); 715 716 self.vm.create_user_memory_region(mem_region).map_err(|e| { 717 io::Error::new( 718 io::ErrorKind::Other, 719 format!("failed to create user memory regions: {e:?}"), 720 ) 721 })?; 722 723 // Update shared memory regions to reflect the new mapping. 724 shm_regions.addr = GuestAddress(new_base); 725 virtio_dev.set_shm_regions(shm_regions).map_err(|e| { 726 io::Error::new( 727 io::ErrorKind::Other, 728 format!("failed to update shared memory regions: {e:?}"), 729 ) 730 })?; 731 } 732 } 733 } 734 } 735 736 pci_dev.move_bar(old_base, new_base) 737 } 738 } 739 740 #[derive(Serialize, Deserialize)] 741 struct DeviceManagerState { 742 device_tree: DeviceTree, 743 device_id_cnt: Wrapping<usize>, 744 } 745 746 #[derive(Debug)] 747 pub struct PtyPair { 748 pub main: File, 749 pub path: PathBuf, 750 } 751 752 impl Clone for PtyPair { 753 fn clone(&self) -> Self { 754 PtyPair { 755 main: self.main.try_clone().unwrap(), 756 path: self.path.clone(), 757 } 758 } 759 } 760 761 #[derive(Clone)] 762 pub enum PciDeviceHandle { 763 Vfio(Arc<Mutex<VfioPciDevice>>), 764 Virtio(Arc<Mutex<VirtioPciDevice>>), 765 VfioUser(Arc<Mutex<VfioUserPciDevice>>), 766 } 767 768 #[derive(Clone)] 769 struct MetaVirtioDevice { 770 virtio_device: Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 771 iommu: bool, 772 id: String, 773 pci_segment: u16, 774 dma_handler: Option<Arc<dyn ExternalDmaMapping>>, 775 } 776 777 #[derive(Default)] 778 pub struct AcpiPlatformAddresses { 779 pub pm_timer_address: Option<GenericAddress>, 780 pub reset_reg_address: Option<GenericAddress>, 781 pub sleep_control_reg_address: Option<GenericAddress>, 782 pub sleep_status_reg_address: Option<GenericAddress>, 783 } 784 785 #[cfg(all(feature = "mshv", feature = "sev_snp"))] 786 struct SevSnpPageAccessProxy { 787 vm: Arc<dyn hypervisor::Vm>, 788 } 789 790 #[cfg(all(feature = "mshv", feature = "sev_snp"))] 791 impl std::fmt::Debug for SevSnpPageAccessProxy { 792 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 793 write!(f, "SNP Page access proxy") 794 } 795 } 796 797 #[cfg(all(feature = "mshv", feature = "sev_snp"))] 798 impl SevSnpPageAccessProxy { 799 fn new(vm: Arc<dyn hypervisor::Vm>) -> SevSnpPageAccessProxy { 800 SevSnpPageAccessProxy { vm } 801 } 802 } 803 804 #[cfg(all(feature = "mshv", feature = "sev_snp"))] 805 impl AccessPlatform for SevSnpPageAccessProxy { 806 fn translate_gpa(&self, base: u64, _size: u64) -> std::result::Result<u64, std::io::Error> { 807 Ok(base) 808 } 809 810 fn translate_gva(&self, base: u64, size: u64) -> std::result::Result<u64, std::io::Error> { 811 self.vm 812 .gain_page_access(base, size as u32) 813 .map_err(|e| io::Error::new(io::ErrorKind::Other, e))?; 814 Ok(base) 815 } 816 } 817 818 pub struct DeviceManager { 819 // Manage address space related to devices 820 address_manager: Arc<AddressManager>, 821 822 // Console abstraction 823 console: Arc<Console>, 824 825 // Serial Manager 826 serial_manager: Option<Arc<SerialManager>>, 827 828 // pty foreground status, 829 console_resize_pipe: Option<Arc<File>>, 830 831 // To restore on exit. 832 original_termios_opt: Arc<Mutex<Option<termios>>>, 833 834 // Interrupt controller 835 #[cfg(target_arch = "x86_64")] 836 interrupt_controller: Option<Arc<Mutex<ioapic::Ioapic>>>, 837 #[cfg(target_arch = "aarch64")] 838 interrupt_controller: Option<Arc<Mutex<gic::Gic>>>, 839 840 // Things to be added to the commandline (e.g. aarch64 early console) 841 #[cfg(target_arch = "aarch64")] 842 cmdline_additions: Vec<String>, 843 844 // ACPI GED notification device 845 ged_notification_device: Option<Arc<Mutex<devices::AcpiGedDevice>>>, 846 847 // VM configuration 848 config: Arc<Mutex<VmConfig>>, 849 850 // Memory Manager 851 memory_manager: Arc<Mutex<MemoryManager>>, 852 853 // CPU Manager 854 cpu_manager: Arc<Mutex<CpuManager>>, 855 856 // The virtio devices on the system 857 virtio_devices: Vec<MetaVirtioDevice>, 858 859 // List of bus devices 860 // Let the DeviceManager keep strong references to the BusDevice devices. 861 // This allows the IO and MMIO buses to be provided with Weak references, 862 // which prevents cyclic dependencies. 863 bus_devices: Vec<Arc<dyn BusDeviceSync>>, 864 865 // Counter to keep track of the consumed device IDs. 866 device_id_cnt: Wrapping<usize>, 867 868 pci_segments: Vec<PciSegment>, 869 870 #[cfg_attr(target_arch = "aarch64", allow(dead_code))] 871 // MSI Interrupt Manager 872 msi_interrupt_manager: Arc<dyn InterruptManager<GroupConfig = MsiIrqGroupConfig>>, 873 874 #[cfg_attr(feature = "mshv", allow(dead_code))] 875 // Legacy Interrupt Manager 876 legacy_interrupt_manager: Option<Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>>, 877 878 // Passthrough device handle 879 passthrough_device: Option<VfioDeviceFd>, 880 881 // VFIO container 882 // Only one container can be created, therefore it is stored as part of the 883 // DeviceManager to be reused. 884 vfio_container: Option<Arc<VfioContainer>>, 885 886 // Paravirtualized IOMMU 887 iommu_device: Option<Arc<Mutex<virtio_devices::Iommu>>>, 888 iommu_mapping: Option<Arc<IommuMapping>>, 889 890 // PCI information about devices attached to the paravirtualized IOMMU 891 // It contains the virtual IOMMU PCI BDF along with the list of PCI BDF 892 // representing the devices attached to the virtual IOMMU. This is useful 893 // information for filling the ACPI VIOT table. 894 iommu_attached_devices: Option<(PciBdf, Vec<PciBdf>)>, 895 896 // Tree of devices, representing the dependencies between devices. 897 // Useful for introspection, snapshot and restore. 898 device_tree: Arc<Mutex<DeviceTree>>, 899 900 // Exit event 901 exit_evt: EventFd, 902 reset_evt: EventFd, 903 904 #[cfg(target_arch = "aarch64")] 905 id_to_dev_info: HashMap<(DeviceType, String), MmioDeviceInfo>, 906 907 // seccomp action 908 seccomp_action: SeccompAction, 909 910 // List of guest NUMA nodes. 911 numa_nodes: NumaNodes, 912 913 // Possible handle to the virtio-balloon device 914 balloon: Option<Arc<Mutex<virtio_devices::Balloon>>>, 915 916 // Virtio Device activation EventFd to allow the VMM thread to trigger device 917 // activation and thus start the threads from the VMM thread 918 activate_evt: EventFd, 919 920 acpi_address: GuestAddress, 921 922 selected_segment: usize, 923 924 // Possible handle to the virtio-mem device 925 virtio_mem_devices: Vec<Arc<Mutex<virtio_devices::Mem>>>, 926 927 #[cfg(target_arch = "aarch64")] 928 // GPIO device for AArch64 929 gpio_device: Option<Arc<Mutex<devices::legacy::Gpio>>>, 930 931 #[cfg(feature = "pvmemcontrol")] 932 pvmemcontrol_devices: Option<( 933 Arc<PvmemcontrolBusDevice>, 934 Arc<Mutex<PvmemcontrolPciDevice>>, 935 )>, 936 937 // pvpanic device 938 pvpanic_device: Option<Arc<Mutex<devices::PvPanicDevice>>>, 939 940 // Flag to force setting the iommu on virtio devices 941 force_iommu: bool, 942 943 // io_uring availability if detected 944 io_uring_supported: Option<bool>, 945 946 // aio availability if detected 947 aio_supported: Option<bool>, 948 949 // List of unique identifiers provided at boot through the configuration. 950 boot_id_list: BTreeSet<String>, 951 952 // Start time of the VM 953 timestamp: Instant, 954 955 // Pending activations 956 pending_activations: Arc<Mutex<Vec<VirtioPciDeviceActivator>>>, 957 958 // Addresses for ACPI platform devices e.g. ACPI PM timer, sleep/reset registers 959 acpi_platform_addresses: AcpiPlatformAddresses, 960 961 snapshot: Option<Snapshot>, 962 963 rate_limit_groups: HashMap<String, Arc<RateLimiterGroup>>, 964 965 mmio_regions: Arc<Mutex<Vec<MmioRegion>>>, 966 } 967 968 fn create_mmio_allocators( 969 start: u64, 970 end: u64, 971 num_pci_segments: u16, 972 weights: Vec<u32>, 973 alignment: u64, 974 ) -> Vec<Arc<Mutex<AddressAllocator>>> { 975 let total_weight: u32 = weights.iter().sum(); 976 977 // Start each PCI segment mmio range on an aligned boundary 978 let pci_segment_mmio_size = (end - start + 1) / (alignment * total_weight as u64) * alignment; 979 980 let mut mmio_allocators = vec![]; 981 let mut i = 0; 982 for segment_id in 0..num_pci_segments as u64 { 983 let weight = weights[segment_id as usize] as u64; 984 let mmio_start = start + i * pci_segment_mmio_size; 985 let mmio_size = pci_segment_mmio_size * weight; 986 let allocator = Arc::new(Mutex::new( 987 AddressAllocator::new(GuestAddress(mmio_start), mmio_size).unwrap(), 988 )); 989 mmio_allocators.push(allocator); 990 i += weight; 991 } 992 993 mmio_allocators 994 } 995 996 impl DeviceManager { 997 #[allow(clippy::too_many_arguments)] 998 pub fn new( 999 #[cfg(target_arch = "x86_64")] io_bus: Arc<Bus>, 1000 mmio_bus: Arc<Bus>, 1001 vm: Arc<dyn hypervisor::Vm>, 1002 config: Arc<Mutex<VmConfig>>, 1003 memory_manager: Arc<Mutex<MemoryManager>>, 1004 cpu_manager: Arc<Mutex<CpuManager>>, 1005 exit_evt: EventFd, 1006 reset_evt: EventFd, 1007 seccomp_action: SeccompAction, 1008 numa_nodes: NumaNodes, 1009 activate_evt: &EventFd, 1010 force_iommu: bool, 1011 boot_id_list: BTreeSet<String>, 1012 timestamp: Instant, 1013 snapshot: Option<Snapshot>, 1014 dynamic: bool, 1015 ) -> DeviceManagerResult<Arc<Mutex<Self>>> { 1016 trace_scoped!("DeviceManager::new"); 1017 1018 let (device_tree, device_id_cnt) = if let Some(snapshot) = snapshot.as_ref() { 1019 let state: DeviceManagerState = snapshot.to_state().unwrap(); 1020 ( 1021 Arc::new(Mutex::new(state.device_tree.clone())), 1022 state.device_id_cnt, 1023 ) 1024 } else { 1025 (Arc::new(Mutex::new(DeviceTree::new())), Wrapping(0)) 1026 }; 1027 1028 let num_pci_segments = 1029 if let Some(platform_config) = config.lock().unwrap().platform.as_ref() { 1030 platform_config.num_pci_segments 1031 } else { 1032 1 1033 }; 1034 1035 let mut mmio32_aperture_weights: Vec<u32> = 1036 std::iter::repeat(DEFAULT_PCI_SEGMENT_APERTURE_WEIGHT) 1037 .take(num_pci_segments.into()) 1038 .collect(); 1039 if let Some(pci_segments) = &config.lock().unwrap().pci_segments { 1040 for pci_segment in pci_segments.iter() { 1041 mmio32_aperture_weights[pci_segment.pci_segment as usize] = 1042 pci_segment.mmio32_aperture_weight 1043 } 1044 } 1045 1046 let start_of_mmio32_area = layout::MEM_32BIT_DEVICES_START.0; 1047 let end_of_mmio32_area = layout::MEM_32BIT_DEVICES_START.0 + layout::MEM_32BIT_DEVICES_SIZE; 1048 let pci_mmio32_allocators = create_mmio_allocators( 1049 start_of_mmio32_area, 1050 end_of_mmio32_area, 1051 num_pci_segments, 1052 mmio32_aperture_weights, 1053 4 << 10, 1054 ); 1055 1056 let mut mmio64_aperture_weights: Vec<u32> = 1057 std::iter::repeat(DEFAULT_PCI_SEGMENT_APERTURE_WEIGHT) 1058 .take(num_pci_segments.into()) 1059 .collect(); 1060 if let Some(pci_segments) = &config.lock().unwrap().pci_segments { 1061 for pci_segment in pci_segments.iter() { 1062 mmio64_aperture_weights[pci_segment.pci_segment as usize] = 1063 pci_segment.mmio64_aperture_weight 1064 } 1065 } 1066 1067 let start_of_mmio64_area = memory_manager.lock().unwrap().start_of_device_area().0; 1068 let end_of_mmio64_area = memory_manager.lock().unwrap().end_of_device_area().0; 1069 let pci_mmio64_allocators = create_mmio_allocators( 1070 start_of_mmio64_area, 1071 end_of_mmio64_area, 1072 num_pci_segments, 1073 mmio64_aperture_weights, 1074 4 << 30, 1075 ); 1076 1077 let address_manager = Arc::new(AddressManager { 1078 allocator: memory_manager.lock().unwrap().allocator(), 1079 #[cfg(target_arch = "x86_64")] 1080 io_bus, 1081 mmio_bus, 1082 vm: vm.clone(), 1083 device_tree: Arc::clone(&device_tree), 1084 pci_mmio32_allocators, 1085 pci_mmio64_allocators, 1086 }); 1087 1088 // First we create the MSI interrupt manager, the legacy one is created 1089 // later, after the IOAPIC device creation. 1090 // The reason we create the MSI one first is because the IOAPIC needs it, 1091 // and then the legacy interrupt manager needs an IOAPIC. So we're 1092 // handling a linear dependency chain: 1093 // msi_interrupt_manager <- IOAPIC <- legacy_interrupt_manager. 1094 let msi_interrupt_manager: Arc<dyn InterruptManager<GroupConfig = MsiIrqGroupConfig>> = 1095 Arc::new(MsiInterruptManager::new( 1096 Arc::clone(&address_manager.allocator), 1097 vm, 1098 )); 1099 1100 let acpi_address = address_manager 1101 .allocator 1102 .lock() 1103 .unwrap() 1104 .allocate_platform_mmio_addresses(None, DEVICE_MANAGER_ACPI_SIZE as u64, None) 1105 .ok_or(DeviceManagerError::AllocateIoPort)?; 1106 1107 let mut pci_irq_slots = [0; 32]; 1108 PciSegment::reserve_legacy_interrupts_for_pci_devices( 1109 &address_manager, 1110 &mut pci_irq_slots, 1111 )?; 1112 1113 let mut pci_segments = vec![PciSegment::new_default_segment( 1114 &address_manager, 1115 Arc::clone(&address_manager.pci_mmio32_allocators[0]), 1116 Arc::clone(&address_manager.pci_mmio64_allocators[0]), 1117 &pci_irq_slots, 1118 )?]; 1119 1120 for i in 1..num_pci_segments as usize { 1121 pci_segments.push(PciSegment::new( 1122 i as u16, 1123 numa_node_id_from_pci_segment_id(&numa_nodes, i as u16), 1124 &address_manager, 1125 Arc::clone(&address_manager.pci_mmio32_allocators[i]), 1126 Arc::clone(&address_manager.pci_mmio64_allocators[i]), 1127 &pci_irq_slots, 1128 )?); 1129 } 1130 1131 if dynamic { 1132 let acpi_address = address_manager 1133 .allocator 1134 .lock() 1135 .unwrap() 1136 .allocate_platform_mmio_addresses(None, CPU_MANAGER_ACPI_SIZE as u64, None) 1137 .ok_or(DeviceManagerError::AllocateMmioAddress)?; 1138 1139 address_manager 1140 .mmio_bus 1141 .insert( 1142 cpu_manager.clone(), 1143 acpi_address.0, 1144 CPU_MANAGER_ACPI_SIZE as u64, 1145 ) 1146 .map_err(DeviceManagerError::BusError)?; 1147 1148 cpu_manager.lock().unwrap().set_acpi_address(acpi_address); 1149 } 1150 1151 let mut rate_limit_groups = HashMap::<String, Arc<RateLimiterGroup>>::new(); 1152 if let Some(rate_limit_groups_cfg) = config.lock().unwrap().rate_limit_groups.as_ref() { 1153 for rate_limit_group_cfg in rate_limit_groups_cfg { 1154 let rate_limit_cfg = rate_limit_group_cfg.rate_limiter_config; 1155 let bw = rate_limit_cfg.bandwidth.unwrap_or_default(); 1156 let ops = rate_limit_cfg.ops.unwrap_or_default(); 1157 let mut rate_limit_group = RateLimiterGroup::new( 1158 &rate_limit_group_cfg.id, 1159 bw.size, 1160 bw.one_time_burst.unwrap_or(0), 1161 bw.refill_time, 1162 ops.size, 1163 ops.one_time_burst.unwrap_or(0), 1164 ops.refill_time, 1165 ) 1166 .map_err(DeviceManagerError::RateLimiterGroupCreate)?; 1167 1168 let exit_evt = exit_evt.try_clone().map_err(DeviceManagerError::EventFd)?; 1169 1170 rate_limit_group.start_thread(exit_evt).unwrap(); 1171 rate_limit_groups 1172 .insert(rate_limit_group_cfg.id.clone(), Arc::new(rate_limit_group)); 1173 } 1174 } 1175 1176 let device_manager = DeviceManager { 1177 address_manager: Arc::clone(&address_manager), 1178 console: Arc::new(Console::default()), 1179 interrupt_controller: None, 1180 #[cfg(target_arch = "aarch64")] 1181 cmdline_additions: Vec::new(), 1182 ged_notification_device: None, 1183 config, 1184 memory_manager, 1185 cpu_manager, 1186 virtio_devices: Vec::new(), 1187 bus_devices: Vec::new(), 1188 device_id_cnt, 1189 msi_interrupt_manager, 1190 legacy_interrupt_manager: None, 1191 passthrough_device: None, 1192 vfio_container: None, 1193 iommu_device: None, 1194 iommu_mapping: None, 1195 iommu_attached_devices: None, 1196 pci_segments, 1197 device_tree, 1198 exit_evt, 1199 reset_evt, 1200 #[cfg(target_arch = "aarch64")] 1201 id_to_dev_info: HashMap::new(), 1202 seccomp_action, 1203 numa_nodes, 1204 balloon: None, 1205 activate_evt: activate_evt 1206 .try_clone() 1207 .map_err(DeviceManagerError::EventFd)?, 1208 acpi_address, 1209 selected_segment: 0, 1210 serial_manager: None, 1211 console_resize_pipe: None, 1212 original_termios_opt: Arc::new(Mutex::new(None)), 1213 virtio_mem_devices: Vec::new(), 1214 #[cfg(target_arch = "aarch64")] 1215 gpio_device: None, 1216 #[cfg(feature = "pvmemcontrol")] 1217 pvmemcontrol_devices: None, 1218 pvpanic_device: None, 1219 force_iommu, 1220 io_uring_supported: None, 1221 aio_supported: None, 1222 boot_id_list, 1223 timestamp, 1224 pending_activations: Arc::new(Mutex::new(Vec::default())), 1225 acpi_platform_addresses: AcpiPlatformAddresses::default(), 1226 snapshot, 1227 rate_limit_groups, 1228 mmio_regions: Arc::new(Mutex::new(Vec::new())), 1229 }; 1230 1231 let device_manager = Arc::new(Mutex::new(device_manager)); 1232 1233 address_manager 1234 .mmio_bus 1235 .insert( 1236 Arc::clone(&device_manager) as Arc<dyn BusDeviceSync>, 1237 acpi_address.0, 1238 DEVICE_MANAGER_ACPI_SIZE as u64, 1239 ) 1240 .map_err(DeviceManagerError::BusError)?; 1241 1242 Ok(device_manager) 1243 } 1244 1245 pub fn console_resize_pipe(&self) -> Option<Arc<File>> { 1246 self.console_resize_pipe.clone() 1247 } 1248 1249 pub fn create_devices( 1250 &mut self, 1251 console_info: Option<ConsoleInfo>, 1252 console_resize_pipe: Option<Arc<File>>, 1253 original_termios_opt: Arc<Mutex<Option<termios>>>, 1254 ) -> DeviceManagerResult<()> { 1255 trace_scoped!("create_devices"); 1256 1257 let mut virtio_devices: Vec<MetaVirtioDevice> = Vec::new(); 1258 1259 let interrupt_controller = self.add_interrupt_controller()?; 1260 1261 self.cpu_manager 1262 .lock() 1263 .unwrap() 1264 .set_interrupt_controller(interrupt_controller.clone()); 1265 1266 // Now we can create the legacy interrupt manager, which needs the freshly 1267 // formed IOAPIC device. 1268 let legacy_interrupt_manager: Arc< 1269 dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>, 1270 > = Arc::new(LegacyUserspaceInterruptManager::new(Arc::clone( 1271 &interrupt_controller, 1272 ))); 1273 1274 { 1275 if let Some(acpi_address) = self.memory_manager.lock().unwrap().acpi_address() { 1276 self.address_manager 1277 .mmio_bus 1278 .insert( 1279 Arc::clone(&self.memory_manager) as Arc<dyn BusDeviceSync>, 1280 acpi_address.0, 1281 MEMORY_MANAGER_ACPI_SIZE as u64, 1282 ) 1283 .map_err(DeviceManagerError::BusError)?; 1284 } 1285 } 1286 1287 #[cfg(target_arch = "x86_64")] 1288 self.add_legacy_devices( 1289 self.reset_evt 1290 .try_clone() 1291 .map_err(DeviceManagerError::EventFd)?, 1292 )?; 1293 1294 #[cfg(target_arch = "aarch64")] 1295 self.add_legacy_devices(&legacy_interrupt_manager)?; 1296 1297 { 1298 self.ged_notification_device = self.add_acpi_devices( 1299 &legacy_interrupt_manager, 1300 self.reset_evt 1301 .try_clone() 1302 .map_err(DeviceManagerError::EventFd)?, 1303 self.exit_evt 1304 .try_clone() 1305 .map_err(DeviceManagerError::EventFd)?, 1306 )?; 1307 } 1308 1309 self.original_termios_opt = original_termios_opt; 1310 1311 self.console = self.add_console_devices( 1312 &legacy_interrupt_manager, 1313 &mut virtio_devices, 1314 console_info, 1315 console_resize_pipe, 1316 )?; 1317 1318 if let Some(tpm) = self.config.clone().lock().unwrap().tpm.as_ref() { 1319 let tpm_dev = self.add_tpm_device(tpm.socket.clone())?; 1320 self.bus_devices 1321 .push(Arc::clone(&tpm_dev) as Arc<dyn BusDeviceSync>) 1322 } 1323 self.legacy_interrupt_manager = Some(legacy_interrupt_manager); 1324 1325 virtio_devices.append(&mut self.make_virtio_devices()?); 1326 1327 self.add_pci_devices(virtio_devices.clone())?; 1328 1329 self.virtio_devices = virtio_devices; 1330 1331 // Add pvmemcontrol if required 1332 #[cfg(feature = "pvmemcontrol")] 1333 { 1334 if self.config.lock().unwrap().pvmemcontrol.is_some() { 1335 let (pvmemcontrol_bus_device, pvmemcontrol_pci_device) = 1336 self.make_pvmemcontrol_device()?; 1337 self.pvmemcontrol_devices = 1338 Some((pvmemcontrol_bus_device, pvmemcontrol_pci_device)); 1339 } 1340 } 1341 1342 if self.config.clone().lock().unwrap().pvpanic { 1343 self.pvpanic_device = self.add_pvpanic_device()?; 1344 } 1345 1346 Ok(()) 1347 } 1348 1349 fn state(&self) -> DeviceManagerState { 1350 DeviceManagerState { 1351 device_tree: self.device_tree.lock().unwrap().clone(), 1352 device_id_cnt: self.device_id_cnt, 1353 } 1354 } 1355 1356 fn get_msi_iova_space(&mut self) -> (u64, u64) { 1357 #[cfg(target_arch = "aarch64")] 1358 { 1359 let vcpus = self.config.lock().unwrap().cpus.boot_vcpus; 1360 let vgic_config = gic::Gic::create_default_config(vcpus.into()); 1361 ( 1362 vgic_config.msi_addr, 1363 vgic_config.msi_addr + vgic_config.msi_size - 1, 1364 ) 1365 } 1366 #[cfg(target_arch = "x86_64")] 1367 (0xfee0_0000, 0xfeef_ffff) 1368 } 1369 1370 #[cfg(target_arch = "aarch64")] 1371 /// Gets the information of the devices registered up to some point in time. 1372 pub fn get_device_info(&self) -> &HashMap<(DeviceType, String), MmioDeviceInfo> { 1373 &self.id_to_dev_info 1374 } 1375 1376 #[allow(unused_variables)] 1377 fn add_pci_devices( 1378 &mut self, 1379 virtio_devices: Vec<MetaVirtioDevice>, 1380 ) -> DeviceManagerResult<()> { 1381 let iommu_id = String::from(IOMMU_DEVICE_NAME); 1382 1383 let iommu_device = if self.config.lock().unwrap().iommu { 1384 let (device, mapping) = virtio_devices::Iommu::new( 1385 iommu_id.clone(), 1386 self.seccomp_action.clone(), 1387 self.exit_evt 1388 .try_clone() 1389 .map_err(DeviceManagerError::EventFd)?, 1390 self.get_msi_iova_space(), 1391 state_from_id(self.snapshot.as_ref(), iommu_id.as_str()) 1392 .map_err(DeviceManagerError::RestoreGetState)?, 1393 ) 1394 .map_err(DeviceManagerError::CreateVirtioIommu)?; 1395 let device = Arc::new(Mutex::new(device)); 1396 self.iommu_device = Some(Arc::clone(&device)); 1397 self.iommu_mapping = Some(mapping); 1398 1399 // Fill the device tree with a new node. In case of restore, we 1400 // know there is nothing to do, so we can simply override the 1401 // existing entry. 1402 self.device_tree 1403 .lock() 1404 .unwrap() 1405 .insert(iommu_id.clone(), device_node!(iommu_id, device)); 1406 1407 Some(device) 1408 } else { 1409 None 1410 }; 1411 1412 let mut iommu_attached_devices = Vec::new(); 1413 { 1414 for handle in virtio_devices { 1415 let mapping: Option<Arc<IommuMapping>> = if handle.iommu { 1416 self.iommu_mapping.clone() 1417 } else { 1418 None 1419 }; 1420 1421 let dev_id = self.add_virtio_pci_device( 1422 handle.virtio_device, 1423 &mapping, 1424 handle.id, 1425 handle.pci_segment, 1426 handle.dma_handler, 1427 )?; 1428 1429 if handle.iommu { 1430 iommu_attached_devices.push(dev_id); 1431 } 1432 } 1433 1434 let mut vfio_iommu_device_ids = self.add_vfio_devices()?; 1435 iommu_attached_devices.append(&mut vfio_iommu_device_ids); 1436 1437 let mut vfio_user_iommu_device_ids = self.add_user_devices()?; 1438 iommu_attached_devices.append(&mut vfio_user_iommu_device_ids); 1439 1440 // Add all devices from forced iommu segments 1441 if let Some(platform_config) = self.config.lock().unwrap().platform.as_ref() { 1442 if let Some(iommu_segments) = platform_config.iommu_segments.as_ref() { 1443 for segment in iommu_segments { 1444 for device in 0..32 { 1445 let bdf = PciBdf::new(*segment, 0, device, 0); 1446 if !iommu_attached_devices.contains(&bdf) { 1447 iommu_attached_devices.push(bdf); 1448 } 1449 } 1450 } 1451 } 1452 } 1453 1454 if let Some(iommu_device) = iommu_device { 1455 let dev_id = self.add_virtio_pci_device(iommu_device, &None, iommu_id, 0, None)?; 1456 self.iommu_attached_devices = Some((dev_id, iommu_attached_devices)); 1457 } 1458 } 1459 1460 for segment in &self.pci_segments { 1461 #[cfg(target_arch = "x86_64")] 1462 if let Some(pci_config_io) = segment.pci_config_io.as_ref() { 1463 self.bus_devices 1464 .push(Arc::clone(pci_config_io) as Arc<dyn BusDeviceSync>); 1465 } 1466 1467 self.bus_devices 1468 .push(Arc::clone(&segment.pci_config_mmio) as Arc<dyn BusDeviceSync>); 1469 } 1470 1471 Ok(()) 1472 } 1473 1474 #[cfg(target_arch = "aarch64")] 1475 fn add_interrupt_controller( 1476 &mut self, 1477 ) -> DeviceManagerResult<Arc<Mutex<dyn InterruptController>>> { 1478 let interrupt_controller: Arc<Mutex<gic::Gic>> = Arc::new(Mutex::new( 1479 gic::Gic::new( 1480 self.config.lock().unwrap().cpus.boot_vcpus, 1481 Arc::clone(&self.msi_interrupt_manager), 1482 self.address_manager.vm.clone(), 1483 ) 1484 .map_err(DeviceManagerError::CreateInterruptController)?, 1485 )); 1486 1487 self.interrupt_controller = Some(interrupt_controller.clone()); 1488 1489 // Restore the vGic if this is in the process of restoration 1490 let id = String::from(gic::GIC_SNAPSHOT_ID); 1491 if let Some(vgic_snapshot) = snapshot_from_id(self.snapshot.as_ref(), &id) { 1492 // PMU support is optional. Nothing should be impacted if the PMU initialization failed. 1493 if self 1494 .cpu_manager 1495 .lock() 1496 .unwrap() 1497 .init_pmu(arch::aarch64::fdt::AARCH64_PMU_IRQ + 16) 1498 .is_err() 1499 { 1500 info!("Failed to initialize PMU"); 1501 } 1502 1503 let vgic_state = vgic_snapshot 1504 .to_state() 1505 .map_err(DeviceManagerError::RestoreGetState)?; 1506 let saved_vcpu_states = self.cpu_manager.lock().unwrap().get_saved_states(); 1507 interrupt_controller 1508 .lock() 1509 .unwrap() 1510 .restore_vgic(vgic_state, &saved_vcpu_states) 1511 .unwrap(); 1512 } 1513 1514 self.device_tree 1515 .lock() 1516 .unwrap() 1517 .insert(id.clone(), device_node!(id, interrupt_controller)); 1518 1519 Ok(interrupt_controller) 1520 } 1521 1522 #[cfg(target_arch = "aarch64")] 1523 pub fn get_interrupt_controller(&mut self) -> Option<&Arc<Mutex<gic::Gic>>> { 1524 self.interrupt_controller.as_ref() 1525 } 1526 1527 #[cfg(target_arch = "x86_64")] 1528 fn add_interrupt_controller( 1529 &mut self, 1530 ) -> DeviceManagerResult<Arc<Mutex<dyn InterruptController>>> { 1531 let id = String::from(IOAPIC_DEVICE_NAME); 1532 1533 // Create IOAPIC 1534 let interrupt_controller = Arc::new(Mutex::new( 1535 ioapic::Ioapic::new( 1536 id.clone(), 1537 APIC_START, 1538 Arc::clone(&self.msi_interrupt_manager), 1539 state_from_id(self.snapshot.as_ref(), id.as_str()) 1540 .map_err(DeviceManagerError::RestoreGetState)?, 1541 ) 1542 .map_err(DeviceManagerError::CreateInterruptController)?, 1543 )); 1544 1545 self.interrupt_controller = Some(interrupt_controller.clone()); 1546 1547 self.address_manager 1548 .mmio_bus 1549 .insert(interrupt_controller.clone(), IOAPIC_START.0, IOAPIC_SIZE) 1550 .map_err(DeviceManagerError::BusError)?; 1551 1552 self.bus_devices 1553 .push(Arc::clone(&interrupt_controller) as Arc<dyn BusDeviceSync>); 1554 1555 // Fill the device tree with a new node. In case of restore, we 1556 // know there is nothing to do, so we can simply override the 1557 // existing entry. 1558 self.device_tree 1559 .lock() 1560 .unwrap() 1561 .insert(id.clone(), device_node!(id, interrupt_controller)); 1562 1563 Ok(interrupt_controller) 1564 } 1565 1566 fn add_acpi_devices( 1567 &mut self, 1568 interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>, 1569 reset_evt: EventFd, 1570 exit_evt: EventFd, 1571 ) -> DeviceManagerResult<Option<Arc<Mutex<devices::AcpiGedDevice>>>> { 1572 let vcpus_kill_signalled = self 1573 .cpu_manager 1574 .lock() 1575 .unwrap() 1576 .vcpus_kill_signalled() 1577 .clone(); 1578 let shutdown_device = Arc::new(Mutex::new(devices::AcpiShutdownDevice::new( 1579 exit_evt, 1580 reset_evt, 1581 vcpus_kill_signalled, 1582 ))); 1583 1584 self.bus_devices 1585 .push(Arc::clone(&shutdown_device) as Arc<dyn BusDeviceSync>); 1586 1587 #[cfg(target_arch = "x86_64")] 1588 { 1589 let shutdown_pio_address: u16 = 0x600; 1590 1591 self.address_manager 1592 .allocator 1593 .lock() 1594 .unwrap() 1595 .allocate_io_addresses(Some(GuestAddress(shutdown_pio_address.into())), 0x8, None) 1596 .ok_or(DeviceManagerError::AllocateIoPort)?; 1597 1598 self.address_manager 1599 .io_bus 1600 .insert(shutdown_device, shutdown_pio_address.into(), 0x4) 1601 .map_err(DeviceManagerError::BusError)?; 1602 1603 self.acpi_platform_addresses.sleep_control_reg_address = 1604 Some(GenericAddress::io_port_address::<u8>(shutdown_pio_address)); 1605 self.acpi_platform_addresses.sleep_status_reg_address = 1606 Some(GenericAddress::io_port_address::<u8>(shutdown_pio_address)); 1607 self.acpi_platform_addresses.reset_reg_address = 1608 Some(GenericAddress::io_port_address::<u8>(shutdown_pio_address)); 1609 } 1610 1611 let ged_irq = self 1612 .address_manager 1613 .allocator 1614 .lock() 1615 .unwrap() 1616 .allocate_irq() 1617 .unwrap(); 1618 let interrupt_group = interrupt_manager 1619 .create_group(LegacyIrqGroupConfig { 1620 irq: ged_irq as InterruptIndex, 1621 }) 1622 .map_err(DeviceManagerError::CreateInterruptGroup)?; 1623 let ged_address = self 1624 .address_manager 1625 .allocator 1626 .lock() 1627 .unwrap() 1628 .allocate_platform_mmio_addresses( 1629 None, 1630 devices::acpi::GED_DEVICE_ACPI_SIZE as u64, 1631 None, 1632 ) 1633 .ok_or(DeviceManagerError::AllocateMmioAddress)?; 1634 let ged_device = Arc::new(Mutex::new(devices::AcpiGedDevice::new( 1635 interrupt_group, 1636 ged_irq, 1637 ged_address, 1638 ))); 1639 self.address_manager 1640 .mmio_bus 1641 .insert( 1642 ged_device.clone(), 1643 ged_address.0, 1644 devices::acpi::GED_DEVICE_ACPI_SIZE as u64, 1645 ) 1646 .map_err(DeviceManagerError::BusError)?; 1647 self.bus_devices 1648 .push(Arc::clone(&ged_device) as Arc<dyn BusDeviceSync>); 1649 1650 let pm_timer_device = Arc::new(Mutex::new(devices::AcpiPmTimerDevice::new())); 1651 1652 self.bus_devices 1653 .push(Arc::clone(&pm_timer_device) as Arc<dyn BusDeviceSync>); 1654 1655 #[cfg(target_arch = "x86_64")] 1656 { 1657 let pm_timer_pio_address: u16 = 0x608; 1658 1659 self.address_manager 1660 .allocator 1661 .lock() 1662 .unwrap() 1663 .allocate_io_addresses(Some(GuestAddress(pm_timer_pio_address.into())), 0x4, None) 1664 .ok_or(DeviceManagerError::AllocateIoPort)?; 1665 1666 self.address_manager 1667 .io_bus 1668 .insert(pm_timer_device, pm_timer_pio_address.into(), 0x4) 1669 .map_err(DeviceManagerError::BusError)?; 1670 1671 self.acpi_platform_addresses.pm_timer_address = 1672 Some(GenericAddress::io_port_address::<u32>(pm_timer_pio_address)); 1673 } 1674 1675 Ok(Some(ged_device)) 1676 } 1677 1678 #[cfg(target_arch = "x86_64")] 1679 fn add_legacy_devices(&mut self, reset_evt: EventFd) -> DeviceManagerResult<()> { 1680 let vcpus_kill_signalled = self 1681 .cpu_manager 1682 .lock() 1683 .unwrap() 1684 .vcpus_kill_signalled() 1685 .clone(); 1686 // Add a shutdown device (i8042) 1687 let i8042 = Arc::new(Mutex::new(devices::legacy::I8042Device::new( 1688 reset_evt.try_clone().unwrap(), 1689 vcpus_kill_signalled.clone(), 1690 ))); 1691 1692 self.bus_devices 1693 .push(Arc::clone(&i8042) as Arc<dyn BusDeviceSync>); 1694 1695 self.address_manager 1696 .io_bus 1697 .insert(i8042, 0x61, 0x4) 1698 .map_err(DeviceManagerError::BusError)?; 1699 { 1700 // Add a CMOS emulated device 1701 let mem_size = self 1702 .memory_manager 1703 .lock() 1704 .unwrap() 1705 .guest_memory() 1706 .memory() 1707 .last_addr() 1708 .0 1709 + 1; 1710 let mem_below_4g = std::cmp::min(arch::layout::MEM_32BIT_RESERVED_START.0, mem_size); 1711 let mem_above_4g = mem_size.saturating_sub(arch::layout::RAM_64BIT_START.0); 1712 1713 let cmos = Arc::new(Mutex::new(devices::legacy::Cmos::new( 1714 mem_below_4g, 1715 mem_above_4g, 1716 reset_evt, 1717 Some(vcpus_kill_signalled), 1718 ))); 1719 1720 self.bus_devices 1721 .push(Arc::clone(&cmos) as Arc<dyn BusDeviceSync>); 1722 1723 self.address_manager 1724 .io_bus 1725 .insert(cmos, 0x70, 0x2) 1726 .map_err(DeviceManagerError::BusError)?; 1727 1728 let fwdebug = Arc::new(Mutex::new(devices::legacy::FwDebugDevice::new())); 1729 1730 self.bus_devices 1731 .push(Arc::clone(&fwdebug) as Arc<dyn BusDeviceSync>); 1732 1733 self.address_manager 1734 .io_bus 1735 .insert(fwdebug, 0x402, 0x1) 1736 .map_err(DeviceManagerError::BusError)?; 1737 } 1738 1739 // 0x80 debug port 1740 let debug_port = Arc::new(Mutex::new(devices::legacy::DebugPort::new(self.timestamp))); 1741 self.bus_devices 1742 .push(Arc::clone(&debug_port) as Arc<dyn BusDeviceSync>); 1743 self.address_manager 1744 .io_bus 1745 .insert(debug_port, 0x80, 0x1) 1746 .map_err(DeviceManagerError::BusError)?; 1747 1748 Ok(()) 1749 } 1750 1751 #[cfg(target_arch = "aarch64")] 1752 fn add_legacy_devices( 1753 &mut self, 1754 interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>, 1755 ) -> DeviceManagerResult<()> { 1756 // Add a RTC device 1757 let rtc_irq = self 1758 .address_manager 1759 .allocator 1760 .lock() 1761 .unwrap() 1762 .allocate_irq() 1763 .unwrap(); 1764 1765 let interrupt_group = interrupt_manager 1766 .create_group(LegacyIrqGroupConfig { 1767 irq: rtc_irq as InterruptIndex, 1768 }) 1769 .map_err(DeviceManagerError::CreateInterruptGroup)?; 1770 1771 let rtc_device = Arc::new(Mutex::new(devices::legacy::Rtc::new(interrupt_group))); 1772 1773 self.bus_devices 1774 .push(Arc::clone(&rtc_device) as Arc<dyn BusDeviceSync>); 1775 1776 let addr = arch::layout::LEGACY_RTC_MAPPED_IO_START; 1777 1778 self.address_manager 1779 .mmio_bus 1780 .insert(rtc_device, addr.0, MMIO_LEN) 1781 .map_err(DeviceManagerError::BusError)?; 1782 1783 self.id_to_dev_info.insert( 1784 (DeviceType::Rtc, "rtc".to_string()), 1785 MmioDeviceInfo { 1786 addr: addr.0, 1787 len: MMIO_LEN, 1788 irq: rtc_irq, 1789 }, 1790 ); 1791 1792 // Add a GPIO device 1793 let id = String::from(GPIO_DEVICE_NAME); 1794 let gpio_irq = self 1795 .address_manager 1796 .allocator 1797 .lock() 1798 .unwrap() 1799 .allocate_irq() 1800 .unwrap(); 1801 1802 let interrupt_group = interrupt_manager 1803 .create_group(LegacyIrqGroupConfig { 1804 irq: gpio_irq as InterruptIndex, 1805 }) 1806 .map_err(DeviceManagerError::CreateInterruptGroup)?; 1807 1808 let gpio_device = Arc::new(Mutex::new(devices::legacy::Gpio::new( 1809 id.clone(), 1810 interrupt_group, 1811 state_from_id(self.snapshot.as_ref(), id.as_str()) 1812 .map_err(DeviceManagerError::RestoreGetState)?, 1813 ))); 1814 1815 self.bus_devices 1816 .push(Arc::clone(&gpio_device) as Arc<dyn BusDeviceSync>); 1817 1818 let addr = arch::layout::LEGACY_GPIO_MAPPED_IO_START; 1819 1820 self.address_manager 1821 .mmio_bus 1822 .insert(gpio_device.clone(), addr.0, MMIO_LEN) 1823 .map_err(DeviceManagerError::BusError)?; 1824 1825 self.gpio_device = Some(gpio_device.clone()); 1826 1827 self.id_to_dev_info.insert( 1828 (DeviceType::Gpio, "gpio".to_string()), 1829 MmioDeviceInfo { 1830 addr: addr.0, 1831 len: MMIO_LEN, 1832 irq: gpio_irq, 1833 }, 1834 ); 1835 1836 self.device_tree 1837 .lock() 1838 .unwrap() 1839 .insert(id.clone(), device_node!(id, gpio_device)); 1840 1841 Ok(()) 1842 } 1843 1844 #[cfg(target_arch = "x86_64")] 1845 fn add_debug_console_device( 1846 &mut self, 1847 debug_console_writer: Box<dyn io::Write + Send>, 1848 ) -> DeviceManagerResult<Arc<Mutex<DebugConsole>>> { 1849 let id = String::from(DEBUGCON_DEVICE_NAME); 1850 let debug_console = Arc::new(Mutex::new(DebugConsole::new( 1851 id.clone(), 1852 debug_console_writer, 1853 ))); 1854 1855 let port = self 1856 .config 1857 .lock() 1858 .unwrap() 1859 .debug_console 1860 .clone() 1861 .iobase 1862 .map(|port| port as u64) 1863 .unwrap_or(debug_console::DEFAULT_PORT); 1864 1865 self.bus_devices 1866 .push(Arc::clone(&debug_console) as Arc<dyn BusDeviceSync>); 1867 1868 self.address_manager 1869 .allocator 1870 .lock() 1871 .unwrap() 1872 .allocate_io_addresses(Some(GuestAddress(port)), 0x1, None) 1873 .ok_or(DeviceManagerError::AllocateIoPort)?; 1874 1875 self.address_manager 1876 .io_bus 1877 .insert(debug_console.clone(), port, 0x1) 1878 .map_err(DeviceManagerError::BusError)?; 1879 1880 // Fill the device tree with a new node. In case of restore, we 1881 // know there is nothing to do, so we can simply override the 1882 // existing entry. 1883 self.device_tree 1884 .lock() 1885 .unwrap() 1886 .insert(id.clone(), device_node!(id, debug_console)); 1887 1888 Ok(debug_console) 1889 } 1890 1891 #[cfg(target_arch = "x86_64")] 1892 fn add_serial_device( 1893 &mut self, 1894 interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>, 1895 serial_writer: Option<Box<dyn io::Write + Send>>, 1896 ) -> DeviceManagerResult<Arc<Mutex<Serial>>> { 1897 // Serial is tied to IRQ #4 1898 let serial_irq = 4; 1899 1900 let id = String::from(SERIAL_DEVICE_NAME); 1901 1902 let interrupt_group = interrupt_manager 1903 .create_group(LegacyIrqGroupConfig { 1904 irq: serial_irq as InterruptIndex, 1905 }) 1906 .map_err(DeviceManagerError::CreateInterruptGroup)?; 1907 1908 let serial = Arc::new(Mutex::new(Serial::new( 1909 id.clone(), 1910 interrupt_group, 1911 serial_writer, 1912 state_from_id(self.snapshot.as_ref(), id.as_str()) 1913 .map_err(DeviceManagerError::RestoreGetState)?, 1914 ))); 1915 1916 self.bus_devices 1917 .push(Arc::clone(&serial) as Arc<dyn BusDeviceSync>); 1918 1919 self.address_manager 1920 .allocator 1921 .lock() 1922 .unwrap() 1923 .allocate_io_addresses(Some(GuestAddress(0x3f8)), 0x8, None) 1924 .ok_or(DeviceManagerError::AllocateIoPort)?; 1925 1926 self.address_manager 1927 .io_bus 1928 .insert(serial.clone(), 0x3f8, 0x8) 1929 .map_err(DeviceManagerError::BusError)?; 1930 1931 // Fill the device tree with a new node. In case of restore, we 1932 // know there is nothing to do, so we can simply override the 1933 // existing entry. 1934 self.device_tree 1935 .lock() 1936 .unwrap() 1937 .insert(id.clone(), device_node!(id, serial)); 1938 1939 Ok(serial) 1940 } 1941 1942 #[cfg(target_arch = "aarch64")] 1943 fn add_serial_device( 1944 &mut self, 1945 interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>, 1946 serial_writer: Option<Box<dyn io::Write + Send>>, 1947 ) -> DeviceManagerResult<Arc<Mutex<Pl011>>> { 1948 let id = String::from(SERIAL_DEVICE_NAME); 1949 1950 let serial_irq = self 1951 .address_manager 1952 .allocator 1953 .lock() 1954 .unwrap() 1955 .allocate_irq() 1956 .unwrap(); 1957 1958 let interrupt_group = interrupt_manager 1959 .create_group(LegacyIrqGroupConfig { 1960 irq: serial_irq as InterruptIndex, 1961 }) 1962 .map_err(DeviceManagerError::CreateInterruptGroup)?; 1963 1964 let serial = Arc::new(Mutex::new(devices::legacy::Pl011::new( 1965 id.clone(), 1966 interrupt_group, 1967 serial_writer, 1968 self.timestamp, 1969 state_from_id(self.snapshot.as_ref(), id.as_str()) 1970 .map_err(DeviceManagerError::RestoreGetState)?, 1971 ))); 1972 1973 self.bus_devices 1974 .push(Arc::clone(&serial) as Arc<dyn BusDeviceSync>); 1975 1976 let addr = arch::layout::LEGACY_SERIAL_MAPPED_IO_START; 1977 1978 self.address_manager 1979 .mmio_bus 1980 .insert(serial.clone(), addr.0, MMIO_LEN) 1981 .map_err(DeviceManagerError::BusError)?; 1982 1983 self.id_to_dev_info.insert( 1984 (DeviceType::Serial, DeviceType::Serial.to_string()), 1985 MmioDeviceInfo { 1986 addr: addr.0, 1987 len: MMIO_LEN, 1988 irq: serial_irq, 1989 }, 1990 ); 1991 1992 self.cmdline_additions 1993 .push(format!("earlycon=pl011,mmio,0x{:08x}", addr.0)); 1994 1995 // Fill the device tree with a new node. In case of restore, we 1996 // know there is nothing to do, so we can simply override the 1997 // existing entry. 1998 self.device_tree 1999 .lock() 2000 .unwrap() 2001 .insert(id.clone(), device_node!(id, serial)); 2002 2003 Ok(serial) 2004 } 2005 2006 fn add_virtio_console_device( 2007 &mut self, 2008 virtio_devices: &mut Vec<MetaVirtioDevice>, 2009 console_fd: ConsoleOutput, 2010 resize_pipe: Option<Arc<File>>, 2011 ) -> DeviceManagerResult<Option<Arc<virtio_devices::ConsoleResizer>>> { 2012 let console_config = self.config.lock().unwrap().console.clone(); 2013 let endpoint = match console_fd { 2014 ConsoleOutput::File(file) => Endpoint::File(file), 2015 ConsoleOutput::Pty(file) => { 2016 self.console_resize_pipe = resize_pipe; 2017 Endpoint::PtyPair(Arc::new(file.try_clone().unwrap()), file) 2018 } 2019 ConsoleOutput::Tty(stdout) => { 2020 if stdout.is_terminal() { 2021 self.console_resize_pipe = resize_pipe; 2022 } 2023 2024 // If an interactive TTY then we can accept input 2025 // SAFETY: FFI call. Trivially safe. 2026 if unsafe { libc::isatty(libc::STDIN_FILENO) == 1 } { 2027 // SAFETY: FFI call to dup. Trivially safe. 2028 let stdin = unsafe { libc::dup(libc::STDIN_FILENO) }; 2029 if stdin == -1 { 2030 return vmm_sys_util::errno::errno_result() 2031 .map_err(DeviceManagerError::DupFd); 2032 } 2033 // SAFETY: stdin is valid and owned solely by us. 2034 let stdin = unsafe { File::from_raw_fd(stdin) }; 2035 Endpoint::FilePair(stdout, Arc::new(stdin)) 2036 } else { 2037 Endpoint::File(stdout) 2038 } 2039 } 2040 ConsoleOutput::Socket(_) => { 2041 return Err(DeviceManagerError::NoSocketOptionSupportForConsoleDevice); 2042 } 2043 ConsoleOutput::Null => Endpoint::Null, 2044 ConsoleOutput::Off => return Ok(None), 2045 }; 2046 let id = String::from(CONSOLE_DEVICE_NAME); 2047 2048 let (virtio_console_device, console_resizer) = virtio_devices::Console::new( 2049 id.clone(), 2050 endpoint, 2051 self.console_resize_pipe 2052 .as_ref() 2053 .map(|p| p.try_clone().unwrap()), 2054 self.force_iommu | console_config.iommu, 2055 self.seccomp_action.clone(), 2056 self.exit_evt 2057 .try_clone() 2058 .map_err(DeviceManagerError::EventFd)?, 2059 state_from_id(self.snapshot.as_ref(), id.as_str()) 2060 .map_err(DeviceManagerError::RestoreGetState)?, 2061 ) 2062 .map_err(DeviceManagerError::CreateVirtioConsole)?; 2063 let virtio_console_device = Arc::new(Mutex::new(virtio_console_device)); 2064 virtio_devices.push(MetaVirtioDevice { 2065 virtio_device: Arc::clone(&virtio_console_device) 2066 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2067 iommu: console_config.iommu, 2068 id: id.clone(), 2069 pci_segment: 0, 2070 dma_handler: None, 2071 }); 2072 2073 // Fill the device tree with a new node. In case of restore, we 2074 // know there is nothing to do, so we can simply override the 2075 // existing entry. 2076 self.device_tree 2077 .lock() 2078 .unwrap() 2079 .insert(id.clone(), device_node!(id, virtio_console_device)); 2080 2081 // Only provide a resizer (for SIGWINCH handling) if the console is attached to the TTY 2082 Ok(if matches!(console_config.mode, ConsoleOutputMode::Tty) { 2083 Some(console_resizer) 2084 } else { 2085 None 2086 }) 2087 } 2088 2089 /// Adds all devices that behave like a console with respect to the VM 2090 /// configuration. This includes: 2091 /// - debug-console 2092 /// - serial-console 2093 /// - virtio-console 2094 fn add_console_devices( 2095 &mut self, 2096 interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>, 2097 virtio_devices: &mut Vec<MetaVirtioDevice>, 2098 console_info: Option<ConsoleInfo>, 2099 console_resize_pipe: Option<Arc<File>>, 2100 ) -> DeviceManagerResult<Arc<Console>> { 2101 let serial_config = self.config.lock().unwrap().serial.clone(); 2102 if console_info.is_none() { 2103 return Err(DeviceManagerError::InvalidConsoleInfo); 2104 } 2105 2106 // SAFETY: console_info is Some, so it's safe to unwrap. 2107 let console_info = console_info.unwrap(); 2108 2109 let serial_writer: Option<Box<dyn io::Write + Send>> = match console_info.serial_main_fd { 2110 ConsoleOutput::File(ref file) | ConsoleOutput::Tty(ref file) => { 2111 Some(Box::new(Arc::clone(file))) 2112 } 2113 ConsoleOutput::Off 2114 | ConsoleOutput::Null 2115 | ConsoleOutput::Pty(_) 2116 | ConsoleOutput::Socket(_) => None, 2117 }; 2118 2119 if !matches!(console_info.serial_main_fd, ConsoleOutput::Off) { 2120 let serial = self.add_serial_device(interrupt_manager, serial_writer)?; 2121 self.serial_manager = match console_info.serial_main_fd { 2122 ConsoleOutput::Pty(_) | ConsoleOutput::Tty(_) | ConsoleOutput::Socket(_) => { 2123 let serial_manager = SerialManager::new( 2124 serial, 2125 console_info.serial_main_fd, 2126 serial_config.socket, 2127 ) 2128 .map_err(DeviceManagerError::CreateSerialManager)?; 2129 if let Some(mut serial_manager) = serial_manager { 2130 serial_manager 2131 .start_thread( 2132 self.exit_evt 2133 .try_clone() 2134 .map_err(DeviceManagerError::EventFd)?, 2135 ) 2136 .map_err(DeviceManagerError::SpawnSerialManager)?; 2137 Some(Arc::new(serial_manager)) 2138 } else { 2139 None 2140 } 2141 } 2142 _ => None, 2143 }; 2144 } 2145 2146 #[cfg(target_arch = "x86_64")] 2147 { 2148 let debug_console_writer: Option<Box<dyn io::Write + Send>> = 2149 match console_info.debug_main_fd { 2150 ConsoleOutput::File(file) | ConsoleOutput::Tty(file) => Some(Box::new(file)), 2151 ConsoleOutput::Off 2152 | ConsoleOutput::Null 2153 | ConsoleOutput::Pty(_) 2154 | ConsoleOutput::Socket(_) => None, 2155 }; 2156 if let Some(writer) = debug_console_writer { 2157 let _ = self.add_debug_console_device(writer)?; 2158 } 2159 } 2160 2161 let console_resizer = self.add_virtio_console_device( 2162 virtio_devices, 2163 console_info.console_main_fd, 2164 console_resize_pipe, 2165 )?; 2166 2167 Ok(Arc::new(Console { console_resizer })) 2168 } 2169 2170 fn add_tpm_device( 2171 &mut self, 2172 tpm_path: PathBuf, 2173 ) -> DeviceManagerResult<Arc<Mutex<devices::tpm::Tpm>>> { 2174 // Create TPM Device 2175 let tpm = devices::tpm::Tpm::new(tpm_path.to_str().unwrap().to_string()).map_err(|e| { 2176 DeviceManagerError::CreateTpmDevice(anyhow!("Failed to create TPM Device : {:?}", e)) 2177 })?; 2178 let tpm = Arc::new(Mutex::new(tpm)); 2179 2180 // Add TPM Device to mmio 2181 self.address_manager 2182 .mmio_bus 2183 .insert( 2184 tpm.clone(), 2185 arch::layout::TPM_START.0, 2186 arch::layout::TPM_SIZE, 2187 ) 2188 .map_err(DeviceManagerError::BusError)?; 2189 2190 Ok(tpm) 2191 } 2192 2193 fn make_virtio_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2194 let mut devices: Vec<MetaVirtioDevice> = Vec::new(); 2195 2196 // Create "standard" virtio devices (net/block/rng) 2197 devices.append(&mut self.make_virtio_block_devices()?); 2198 devices.append(&mut self.make_virtio_net_devices()?); 2199 devices.append(&mut self.make_virtio_rng_devices()?); 2200 2201 // Add virtio-fs if required 2202 devices.append(&mut self.make_virtio_fs_devices()?); 2203 2204 // Add virtio-pmem if required 2205 devices.append(&mut self.make_virtio_pmem_devices()?); 2206 2207 // Add virtio-vsock if required 2208 devices.append(&mut self.make_virtio_vsock_devices()?); 2209 2210 devices.append(&mut self.make_virtio_mem_devices()?); 2211 2212 // Add virtio-balloon if required 2213 devices.append(&mut self.make_virtio_balloon_devices()?); 2214 2215 // Add virtio-watchdog device 2216 devices.append(&mut self.make_virtio_watchdog_devices()?); 2217 2218 // Add vDPA devices if required 2219 devices.append(&mut self.make_vdpa_devices()?); 2220 2221 Ok(devices) 2222 } 2223 2224 // Cache whether aio is supported to avoid checking for very block device 2225 fn aio_is_supported(&mut self) -> bool { 2226 if let Some(supported) = self.aio_supported { 2227 return supported; 2228 } 2229 2230 let supported = block_aio_is_supported(); 2231 self.aio_supported = Some(supported); 2232 supported 2233 } 2234 2235 // Cache whether io_uring is supported to avoid probing for very block device 2236 fn io_uring_is_supported(&mut self) -> bool { 2237 if let Some(supported) = self.io_uring_supported { 2238 return supported; 2239 } 2240 2241 let supported = block_io_uring_is_supported(); 2242 self.io_uring_supported = Some(supported); 2243 supported 2244 } 2245 2246 fn make_virtio_block_device( 2247 &mut self, 2248 disk_cfg: &mut DiskConfig, 2249 ) -> DeviceManagerResult<MetaVirtioDevice> { 2250 let id = if let Some(id) = &disk_cfg.id { 2251 id.clone() 2252 } else { 2253 let id = self.next_device_name(DISK_DEVICE_NAME_PREFIX)?; 2254 disk_cfg.id = Some(id.clone()); 2255 id 2256 }; 2257 2258 info!("Creating virtio-block device: {:?}", disk_cfg); 2259 2260 let (virtio_device, migratable_device) = if disk_cfg.vhost_user { 2261 let socket = disk_cfg.vhost_socket.as_ref().unwrap().clone(); 2262 let vu_cfg = VhostUserConfig { 2263 socket, 2264 num_queues: disk_cfg.num_queues, 2265 queue_size: disk_cfg.queue_size, 2266 }; 2267 let vhost_user_block = Arc::new(Mutex::new( 2268 match virtio_devices::vhost_user::Blk::new( 2269 id.clone(), 2270 vu_cfg, 2271 self.seccomp_action.clone(), 2272 self.exit_evt 2273 .try_clone() 2274 .map_err(DeviceManagerError::EventFd)?, 2275 self.force_iommu, 2276 state_from_id(self.snapshot.as_ref(), id.as_str()) 2277 .map_err(DeviceManagerError::RestoreGetState)?, 2278 ) { 2279 Ok(vub_device) => vub_device, 2280 Err(e) => { 2281 return Err(DeviceManagerError::CreateVhostUserBlk(e)); 2282 } 2283 }, 2284 )); 2285 2286 ( 2287 Arc::clone(&vhost_user_block) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2288 vhost_user_block as Arc<Mutex<dyn Migratable>>, 2289 ) 2290 } else { 2291 let mut options = OpenOptions::new(); 2292 options.read(true); 2293 options.write(!disk_cfg.readonly); 2294 if disk_cfg.direct { 2295 options.custom_flags(libc::O_DIRECT); 2296 } 2297 // Open block device path 2298 let mut file: File = options 2299 .open( 2300 disk_cfg 2301 .path 2302 .as_ref() 2303 .ok_or(DeviceManagerError::NoDiskPath)? 2304 .clone(), 2305 ) 2306 .map_err(DeviceManagerError::Disk)?; 2307 let image_type = 2308 detect_image_type(&mut file).map_err(DeviceManagerError::DetectImageType)?; 2309 2310 let image = match image_type { 2311 ImageType::FixedVhd => { 2312 // Use asynchronous backend relying on io_uring if the 2313 // syscalls are supported. 2314 if cfg!(feature = "io_uring") 2315 && !disk_cfg.disable_io_uring 2316 && self.io_uring_is_supported() 2317 { 2318 info!("Using asynchronous fixed VHD disk file (io_uring)"); 2319 2320 #[cfg(not(feature = "io_uring"))] 2321 unreachable!("Checked in if statement above"); 2322 #[cfg(feature = "io_uring")] 2323 { 2324 Box::new( 2325 FixedVhdDiskAsync::new(file) 2326 .map_err(DeviceManagerError::CreateFixedVhdDiskAsync)?, 2327 ) as Box<dyn DiskFile> 2328 } 2329 } else { 2330 info!("Using synchronous fixed VHD disk file"); 2331 Box::new( 2332 FixedVhdDiskSync::new(file) 2333 .map_err(DeviceManagerError::CreateFixedVhdDiskSync)?, 2334 ) as Box<dyn DiskFile> 2335 } 2336 } 2337 ImageType::Raw => { 2338 // Use asynchronous backend relying on io_uring if the 2339 // syscalls are supported. 2340 if cfg!(feature = "io_uring") 2341 && !disk_cfg.disable_io_uring 2342 && self.io_uring_is_supported() 2343 { 2344 info!("Using asynchronous RAW disk file (io_uring)"); 2345 2346 #[cfg(not(feature = "io_uring"))] 2347 unreachable!("Checked in if statement above"); 2348 #[cfg(feature = "io_uring")] 2349 { 2350 Box::new(RawFileDisk::new(file)) as Box<dyn DiskFile> 2351 } 2352 } else if !disk_cfg.disable_aio && self.aio_is_supported() { 2353 info!("Using asynchronous RAW disk file (aio)"); 2354 Box::new(RawFileDiskAio::new(file)) as Box<dyn DiskFile> 2355 } else { 2356 info!("Using synchronous RAW disk file"); 2357 Box::new(RawFileDiskSync::new(file)) as Box<dyn DiskFile> 2358 } 2359 } 2360 ImageType::Qcow2 => { 2361 info!("Using synchronous QCOW disk file"); 2362 Box::new( 2363 QcowDiskSync::new(file, disk_cfg.direct) 2364 .map_err(DeviceManagerError::CreateQcowDiskSync)?, 2365 ) as Box<dyn DiskFile> 2366 } 2367 ImageType::Vhdx => { 2368 info!("Using synchronous VHDX disk file"); 2369 Box::new( 2370 VhdxDiskSync::new(file) 2371 .map_err(DeviceManagerError::CreateFixedVhdxDiskSync)?, 2372 ) as Box<dyn DiskFile> 2373 } 2374 }; 2375 2376 let rate_limit_group = 2377 if let Some(rate_limiter_cfg) = disk_cfg.rate_limiter_config.as_ref() { 2378 // Create an anonymous RateLimiterGroup that is dropped when the Disk 2379 // is dropped. 2380 let bw = rate_limiter_cfg.bandwidth.unwrap_or_default(); 2381 let ops = rate_limiter_cfg.ops.unwrap_or_default(); 2382 let mut rate_limit_group = RateLimiterGroup::new( 2383 disk_cfg.id.as_ref().unwrap(), 2384 bw.size, 2385 bw.one_time_burst.unwrap_or(0), 2386 bw.refill_time, 2387 ops.size, 2388 ops.one_time_burst.unwrap_or(0), 2389 ops.refill_time, 2390 ) 2391 .map_err(DeviceManagerError::RateLimiterGroupCreate)?; 2392 2393 rate_limit_group 2394 .start_thread( 2395 self.exit_evt 2396 .try_clone() 2397 .map_err(DeviceManagerError::EventFd)?, 2398 ) 2399 .unwrap(); 2400 2401 Some(Arc::new(rate_limit_group)) 2402 } else if let Some(rate_limit_group) = disk_cfg.rate_limit_group.as_ref() { 2403 self.rate_limit_groups.get(rate_limit_group).cloned() 2404 } else { 2405 None 2406 }; 2407 2408 let queue_affinity = if let Some(queue_affinity) = disk_cfg.queue_affinity.as_ref() { 2409 queue_affinity 2410 .iter() 2411 .map(|a| (a.queue_index, a.host_cpus.clone())) 2412 .collect() 2413 } else { 2414 BTreeMap::new() 2415 }; 2416 2417 let virtio_block = Arc::new(Mutex::new( 2418 virtio_devices::Block::new( 2419 id.clone(), 2420 image, 2421 disk_cfg 2422 .path 2423 .as_ref() 2424 .ok_or(DeviceManagerError::NoDiskPath)? 2425 .clone(), 2426 disk_cfg.readonly, 2427 self.force_iommu | disk_cfg.iommu, 2428 disk_cfg.num_queues, 2429 disk_cfg.queue_size, 2430 disk_cfg.serial.clone(), 2431 self.seccomp_action.clone(), 2432 rate_limit_group, 2433 self.exit_evt 2434 .try_clone() 2435 .map_err(DeviceManagerError::EventFd)?, 2436 state_from_id(self.snapshot.as_ref(), id.as_str()) 2437 .map_err(DeviceManagerError::RestoreGetState)?, 2438 queue_affinity, 2439 ) 2440 .map_err(DeviceManagerError::CreateVirtioBlock)?, 2441 )); 2442 2443 ( 2444 Arc::clone(&virtio_block) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2445 virtio_block as Arc<Mutex<dyn Migratable>>, 2446 ) 2447 }; 2448 2449 // Fill the device tree with a new node. In case of restore, we 2450 // know there is nothing to do, so we can simply override the 2451 // existing entry. 2452 self.device_tree 2453 .lock() 2454 .unwrap() 2455 .insert(id.clone(), device_node!(id, migratable_device)); 2456 2457 Ok(MetaVirtioDevice { 2458 virtio_device, 2459 iommu: disk_cfg.iommu, 2460 id, 2461 pci_segment: disk_cfg.pci_segment, 2462 dma_handler: None, 2463 }) 2464 } 2465 2466 fn make_virtio_block_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2467 let mut devices = Vec::new(); 2468 2469 let mut block_devices = self.config.lock().unwrap().disks.clone(); 2470 if let Some(disk_list_cfg) = &mut block_devices { 2471 for disk_cfg in disk_list_cfg.iter_mut() { 2472 devices.push(self.make_virtio_block_device(disk_cfg)?); 2473 } 2474 } 2475 self.config.lock().unwrap().disks = block_devices; 2476 2477 Ok(devices) 2478 } 2479 2480 fn make_virtio_net_device( 2481 &mut self, 2482 net_cfg: &mut NetConfig, 2483 ) -> DeviceManagerResult<MetaVirtioDevice> { 2484 let id = if let Some(id) = &net_cfg.id { 2485 id.clone() 2486 } else { 2487 let id = self.next_device_name(NET_DEVICE_NAME_PREFIX)?; 2488 net_cfg.id = Some(id.clone()); 2489 id 2490 }; 2491 info!("Creating virtio-net device: {:?}", net_cfg); 2492 2493 let (virtio_device, migratable_device) = if net_cfg.vhost_user { 2494 let socket = net_cfg.vhost_socket.as_ref().unwrap().clone(); 2495 let vu_cfg = VhostUserConfig { 2496 socket, 2497 num_queues: net_cfg.num_queues, 2498 queue_size: net_cfg.queue_size, 2499 }; 2500 let server = match net_cfg.vhost_mode { 2501 VhostMode::Client => false, 2502 VhostMode::Server => true, 2503 }; 2504 let vhost_user_net = Arc::new(Mutex::new( 2505 match virtio_devices::vhost_user::Net::new( 2506 id.clone(), 2507 net_cfg.mac, 2508 net_cfg.mtu, 2509 vu_cfg, 2510 server, 2511 self.seccomp_action.clone(), 2512 self.exit_evt 2513 .try_clone() 2514 .map_err(DeviceManagerError::EventFd)?, 2515 self.force_iommu, 2516 state_from_id(self.snapshot.as_ref(), id.as_str()) 2517 .map_err(DeviceManagerError::RestoreGetState)?, 2518 net_cfg.offload_tso, 2519 net_cfg.offload_ufo, 2520 net_cfg.offload_csum, 2521 ) { 2522 Ok(vun_device) => vun_device, 2523 Err(e) => { 2524 return Err(DeviceManagerError::CreateVhostUserNet(e)); 2525 } 2526 }, 2527 )); 2528 2529 ( 2530 Arc::clone(&vhost_user_net) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2531 vhost_user_net as Arc<Mutex<dyn Migratable>>, 2532 ) 2533 } else { 2534 let state = state_from_id(self.snapshot.as_ref(), id.as_str()) 2535 .map_err(DeviceManagerError::RestoreGetState)?; 2536 let virtio_net = if let Some(ref tap_if_name) = net_cfg.tap { 2537 Arc::new(Mutex::new( 2538 virtio_devices::Net::new( 2539 id.clone(), 2540 Some(tap_if_name), 2541 Some(net_cfg.ip), 2542 Some(net_cfg.mask), 2543 Some(net_cfg.mac), 2544 &mut net_cfg.host_mac, 2545 net_cfg.mtu, 2546 self.force_iommu | net_cfg.iommu, 2547 net_cfg.num_queues, 2548 net_cfg.queue_size, 2549 self.seccomp_action.clone(), 2550 net_cfg.rate_limiter_config, 2551 self.exit_evt 2552 .try_clone() 2553 .map_err(DeviceManagerError::EventFd)?, 2554 state, 2555 net_cfg.offload_tso, 2556 net_cfg.offload_ufo, 2557 net_cfg.offload_csum, 2558 ) 2559 .map_err(DeviceManagerError::CreateVirtioNet)?, 2560 )) 2561 } else if let Some(fds) = &net_cfg.fds { 2562 let net = virtio_devices::Net::from_tap_fds( 2563 id.clone(), 2564 fds, 2565 Some(net_cfg.mac), 2566 net_cfg.mtu, 2567 self.force_iommu | net_cfg.iommu, 2568 net_cfg.queue_size, 2569 self.seccomp_action.clone(), 2570 net_cfg.rate_limiter_config, 2571 self.exit_evt 2572 .try_clone() 2573 .map_err(DeviceManagerError::EventFd)?, 2574 state, 2575 net_cfg.offload_tso, 2576 net_cfg.offload_ufo, 2577 net_cfg.offload_csum, 2578 ) 2579 .map_err(DeviceManagerError::CreateVirtioNet)?; 2580 2581 // SAFETY: 'fds' are valid because TAP devices are created successfully 2582 unsafe { 2583 self.config.lock().unwrap().add_preserved_fds(fds.clone()); 2584 } 2585 2586 Arc::new(Mutex::new(net)) 2587 } else { 2588 Arc::new(Mutex::new( 2589 virtio_devices::Net::new( 2590 id.clone(), 2591 None, 2592 Some(net_cfg.ip), 2593 Some(net_cfg.mask), 2594 Some(net_cfg.mac), 2595 &mut net_cfg.host_mac, 2596 net_cfg.mtu, 2597 self.force_iommu | net_cfg.iommu, 2598 net_cfg.num_queues, 2599 net_cfg.queue_size, 2600 self.seccomp_action.clone(), 2601 net_cfg.rate_limiter_config, 2602 self.exit_evt 2603 .try_clone() 2604 .map_err(DeviceManagerError::EventFd)?, 2605 state, 2606 net_cfg.offload_tso, 2607 net_cfg.offload_ufo, 2608 net_cfg.offload_csum, 2609 ) 2610 .map_err(DeviceManagerError::CreateVirtioNet)?, 2611 )) 2612 }; 2613 2614 ( 2615 Arc::clone(&virtio_net) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2616 virtio_net as Arc<Mutex<dyn Migratable>>, 2617 ) 2618 }; 2619 2620 // Fill the device tree with a new node. In case of restore, we 2621 // know there is nothing to do, so we can simply override the 2622 // existing entry. 2623 self.device_tree 2624 .lock() 2625 .unwrap() 2626 .insert(id.clone(), device_node!(id, migratable_device)); 2627 2628 Ok(MetaVirtioDevice { 2629 virtio_device, 2630 iommu: net_cfg.iommu, 2631 id, 2632 pci_segment: net_cfg.pci_segment, 2633 dma_handler: None, 2634 }) 2635 } 2636 2637 /// Add virto-net and vhost-user-net devices 2638 fn make_virtio_net_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2639 let mut devices = Vec::new(); 2640 let mut net_devices = self.config.lock().unwrap().net.clone(); 2641 if let Some(net_list_cfg) = &mut net_devices { 2642 for net_cfg in net_list_cfg.iter_mut() { 2643 devices.push(self.make_virtio_net_device(net_cfg)?); 2644 } 2645 } 2646 self.config.lock().unwrap().net = net_devices; 2647 2648 Ok(devices) 2649 } 2650 2651 fn make_virtio_rng_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2652 let mut devices = Vec::new(); 2653 2654 // Add virtio-rng if required 2655 let rng_config = self.config.lock().unwrap().rng.clone(); 2656 if let Some(rng_path) = rng_config.src.to_str() { 2657 info!("Creating virtio-rng device: {:?}", rng_config); 2658 let id = String::from(RNG_DEVICE_NAME); 2659 2660 let virtio_rng_device = Arc::new(Mutex::new( 2661 virtio_devices::Rng::new( 2662 id.clone(), 2663 rng_path, 2664 self.force_iommu | rng_config.iommu, 2665 self.seccomp_action.clone(), 2666 self.exit_evt 2667 .try_clone() 2668 .map_err(DeviceManagerError::EventFd)?, 2669 state_from_id(self.snapshot.as_ref(), id.as_str()) 2670 .map_err(DeviceManagerError::RestoreGetState)?, 2671 ) 2672 .map_err(DeviceManagerError::CreateVirtioRng)?, 2673 )); 2674 devices.push(MetaVirtioDevice { 2675 virtio_device: Arc::clone(&virtio_rng_device) 2676 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2677 iommu: rng_config.iommu, 2678 id: id.clone(), 2679 pci_segment: 0, 2680 dma_handler: None, 2681 }); 2682 2683 // Fill the device tree with a new node. In case of restore, we 2684 // know there is nothing to do, so we can simply override the 2685 // existing entry. 2686 self.device_tree 2687 .lock() 2688 .unwrap() 2689 .insert(id.clone(), device_node!(id, virtio_rng_device)); 2690 } 2691 2692 Ok(devices) 2693 } 2694 2695 fn make_virtio_fs_device( 2696 &mut self, 2697 fs_cfg: &mut FsConfig, 2698 ) -> DeviceManagerResult<MetaVirtioDevice> { 2699 let id = if let Some(id) = &fs_cfg.id { 2700 id.clone() 2701 } else { 2702 let id = self.next_device_name(FS_DEVICE_NAME_PREFIX)?; 2703 fs_cfg.id = Some(id.clone()); 2704 id 2705 }; 2706 2707 info!("Creating virtio-fs device: {:?}", fs_cfg); 2708 2709 let mut node = device_node!(id); 2710 2711 if let Some(fs_socket) = fs_cfg.socket.to_str() { 2712 let virtio_fs_device = Arc::new(Mutex::new( 2713 virtio_devices::vhost_user::Fs::new( 2714 id.clone(), 2715 fs_socket, 2716 &fs_cfg.tag, 2717 fs_cfg.num_queues, 2718 fs_cfg.queue_size, 2719 None, 2720 self.seccomp_action.clone(), 2721 self.exit_evt 2722 .try_clone() 2723 .map_err(DeviceManagerError::EventFd)?, 2724 self.force_iommu, 2725 state_from_id(self.snapshot.as_ref(), id.as_str()) 2726 .map_err(DeviceManagerError::RestoreGetState)?, 2727 ) 2728 .map_err(DeviceManagerError::CreateVirtioFs)?, 2729 )); 2730 2731 // Update the device tree with the migratable device. 2732 node.migratable = Some(Arc::clone(&virtio_fs_device) as Arc<Mutex<dyn Migratable>>); 2733 self.device_tree.lock().unwrap().insert(id.clone(), node); 2734 2735 Ok(MetaVirtioDevice { 2736 virtio_device: Arc::clone(&virtio_fs_device) 2737 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2738 iommu: false, 2739 id, 2740 pci_segment: fs_cfg.pci_segment, 2741 dma_handler: None, 2742 }) 2743 } else { 2744 Err(DeviceManagerError::NoVirtioFsSock) 2745 } 2746 } 2747 2748 fn make_virtio_fs_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2749 let mut devices = Vec::new(); 2750 2751 let mut fs_devices = self.config.lock().unwrap().fs.clone(); 2752 if let Some(fs_list_cfg) = &mut fs_devices { 2753 for fs_cfg in fs_list_cfg.iter_mut() { 2754 devices.push(self.make_virtio_fs_device(fs_cfg)?); 2755 } 2756 } 2757 self.config.lock().unwrap().fs = fs_devices; 2758 2759 Ok(devices) 2760 } 2761 2762 fn make_virtio_pmem_device( 2763 &mut self, 2764 pmem_cfg: &mut PmemConfig, 2765 ) -> DeviceManagerResult<MetaVirtioDevice> { 2766 let id = if let Some(id) = &pmem_cfg.id { 2767 id.clone() 2768 } else { 2769 let id = self.next_device_name(PMEM_DEVICE_NAME_PREFIX)?; 2770 pmem_cfg.id = Some(id.clone()); 2771 id 2772 }; 2773 2774 info!("Creating virtio-pmem device: {:?}", pmem_cfg); 2775 2776 let mut node = device_node!(id); 2777 2778 // Look for the id in the device tree. If it can be found, that means 2779 // the device is being restored, otherwise it's created from scratch. 2780 let region_range = if let Some(node) = self.device_tree.lock().unwrap().get(&id) { 2781 info!("Restoring virtio-pmem {} resources", id); 2782 2783 let mut region_range: Option<(u64, u64)> = None; 2784 for resource in node.resources.iter() { 2785 match resource { 2786 Resource::MmioAddressRange { base, size } => { 2787 if region_range.is_some() { 2788 return Err(DeviceManagerError::ResourceAlreadyExists); 2789 } 2790 2791 region_range = Some((*base, *size)); 2792 } 2793 _ => { 2794 error!("Unexpected resource {:?} for {}", resource, id); 2795 } 2796 } 2797 } 2798 2799 if region_range.is_none() { 2800 return Err(DeviceManagerError::MissingVirtioPmemResources); 2801 } 2802 2803 region_range 2804 } else { 2805 None 2806 }; 2807 2808 let (custom_flags, set_len) = if pmem_cfg.file.is_dir() { 2809 if pmem_cfg.size.is_none() { 2810 return Err(DeviceManagerError::PmemWithDirectorySizeMissing); 2811 } 2812 (O_TMPFILE, true) 2813 } else { 2814 (0, false) 2815 }; 2816 2817 let mut file = OpenOptions::new() 2818 .read(true) 2819 .write(!pmem_cfg.discard_writes) 2820 .custom_flags(custom_flags) 2821 .open(&pmem_cfg.file) 2822 .map_err(DeviceManagerError::PmemFileOpen)?; 2823 2824 let size = if let Some(size) = pmem_cfg.size { 2825 if set_len { 2826 file.set_len(size) 2827 .map_err(DeviceManagerError::PmemFileSetLen)?; 2828 } 2829 size 2830 } else { 2831 file.seek(SeekFrom::End(0)) 2832 .map_err(DeviceManagerError::PmemFileSetLen)? 2833 }; 2834 2835 if size % 0x20_0000 != 0 { 2836 return Err(DeviceManagerError::PmemSizeNotAligned); 2837 } 2838 2839 let (region_base, region_size) = if let Some((base, size)) = region_range { 2840 // The memory needs to be 2MiB aligned in order to support 2841 // hugepages. 2842 self.pci_segments[pmem_cfg.pci_segment as usize] 2843 .mem64_allocator 2844 .lock() 2845 .unwrap() 2846 .allocate( 2847 Some(GuestAddress(base)), 2848 size as GuestUsize, 2849 Some(0x0020_0000), 2850 ) 2851 .ok_or(DeviceManagerError::PmemRangeAllocation)?; 2852 2853 (base, size) 2854 } else { 2855 // The memory needs to be 2MiB aligned in order to support 2856 // hugepages. 2857 let base = self.pci_segments[pmem_cfg.pci_segment as usize] 2858 .mem64_allocator 2859 .lock() 2860 .unwrap() 2861 .allocate(None, size as GuestUsize, Some(0x0020_0000)) 2862 .ok_or(DeviceManagerError::PmemRangeAllocation)?; 2863 2864 (base.raw_value(), size) 2865 }; 2866 2867 let cloned_file = file.try_clone().map_err(DeviceManagerError::CloneFile)?; 2868 let mmap_region = MmapRegion::build( 2869 Some(FileOffset::new(cloned_file, 0)), 2870 region_size as usize, 2871 PROT_READ | PROT_WRITE, 2872 MAP_NORESERVE 2873 | if pmem_cfg.discard_writes { 2874 MAP_PRIVATE 2875 } else { 2876 MAP_SHARED 2877 }, 2878 ) 2879 .map_err(DeviceManagerError::NewMmapRegion)?; 2880 let host_addr: u64 = mmap_region.as_ptr() as u64; 2881 2882 let mem_slot = self 2883 .memory_manager 2884 .lock() 2885 .unwrap() 2886 .create_userspace_mapping(region_base, region_size, host_addr, false, false, false) 2887 .map_err(DeviceManagerError::MemoryManager)?; 2888 2889 let mapping = virtio_devices::UserspaceMapping { 2890 host_addr, 2891 mem_slot, 2892 addr: GuestAddress(region_base), 2893 len: region_size, 2894 mergeable: false, 2895 }; 2896 2897 let virtio_pmem_device = Arc::new(Mutex::new( 2898 virtio_devices::Pmem::new( 2899 id.clone(), 2900 file, 2901 GuestAddress(region_base), 2902 mapping, 2903 mmap_region, 2904 self.force_iommu | pmem_cfg.iommu, 2905 self.seccomp_action.clone(), 2906 self.exit_evt 2907 .try_clone() 2908 .map_err(DeviceManagerError::EventFd)?, 2909 state_from_id(self.snapshot.as_ref(), id.as_str()) 2910 .map_err(DeviceManagerError::RestoreGetState)?, 2911 ) 2912 .map_err(DeviceManagerError::CreateVirtioPmem)?, 2913 )); 2914 2915 // Update the device tree with correct resource information and with 2916 // the migratable device. 2917 node.resources.push(Resource::MmioAddressRange { 2918 base: region_base, 2919 size: region_size, 2920 }); 2921 node.migratable = Some(Arc::clone(&virtio_pmem_device) as Arc<Mutex<dyn Migratable>>); 2922 self.device_tree.lock().unwrap().insert(id.clone(), node); 2923 2924 Ok(MetaVirtioDevice { 2925 virtio_device: Arc::clone(&virtio_pmem_device) 2926 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2927 iommu: pmem_cfg.iommu, 2928 id, 2929 pci_segment: pmem_cfg.pci_segment, 2930 dma_handler: None, 2931 }) 2932 } 2933 2934 fn make_virtio_pmem_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2935 let mut devices = Vec::new(); 2936 // Add virtio-pmem if required 2937 let mut pmem_devices = self.config.lock().unwrap().pmem.clone(); 2938 if let Some(pmem_list_cfg) = &mut pmem_devices { 2939 for pmem_cfg in pmem_list_cfg.iter_mut() { 2940 devices.push(self.make_virtio_pmem_device(pmem_cfg)?); 2941 } 2942 } 2943 self.config.lock().unwrap().pmem = pmem_devices; 2944 2945 Ok(devices) 2946 } 2947 2948 fn make_virtio_vsock_device( 2949 &mut self, 2950 vsock_cfg: &mut VsockConfig, 2951 ) -> DeviceManagerResult<MetaVirtioDevice> { 2952 let id = if let Some(id) = &vsock_cfg.id { 2953 id.clone() 2954 } else { 2955 let id = self.next_device_name(VSOCK_DEVICE_NAME_PREFIX)?; 2956 vsock_cfg.id = Some(id.clone()); 2957 id 2958 }; 2959 2960 info!("Creating virtio-vsock device: {:?}", vsock_cfg); 2961 2962 let socket_path = vsock_cfg 2963 .socket 2964 .to_str() 2965 .ok_or(DeviceManagerError::CreateVsockConvertPath)?; 2966 let backend = 2967 virtio_devices::vsock::VsockUnixBackend::new(vsock_cfg.cid, socket_path.to_string()) 2968 .map_err(DeviceManagerError::CreateVsockBackend)?; 2969 2970 let vsock_device = Arc::new(Mutex::new( 2971 virtio_devices::Vsock::new( 2972 id.clone(), 2973 vsock_cfg.cid, 2974 vsock_cfg.socket.clone(), 2975 backend, 2976 self.force_iommu | vsock_cfg.iommu, 2977 self.seccomp_action.clone(), 2978 self.exit_evt 2979 .try_clone() 2980 .map_err(DeviceManagerError::EventFd)?, 2981 state_from_id(self.snapshot.as_ref(), id.as_str()) 2982 .map_err(DeviceManagerError::RestoreGetState)?, 2983 ) 2984 .map_err(DeviceManagerError::CreateVirtioVsock)?, 2985 )); 2986 2987 // Fill the device tree with a new node. In case of restore, we 2988 // know there is nothing to do, so we can simply override the 2989 // existing entry. 2990 self.device_tree 2991 .lock() 2992 .unwrap() 2993 .insert(id.clone(), device_node!(id, vsock_device)); 2994 2995 Ok(MetaVirtioDevice { 2996 virtio_device: Arc::clone(&vsock_device) 2997 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2998 iommu: vsock_cfg.iommu, 2999 id, 3000 pci_segment: vsock_cfg.pci_segment, 3001 dma_handler: None, 3002 }) 3003 } 3004 3005 fn make_virtio_vsock_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 3006 let mut devices = Vec::new(); 3007 3008 let mut vsock = self.config.lock().unwrap().vsock.clone(); 3009 if let Some(ref mut vsock_cfg) = &mut vsock { 3010 devices.push(self.make_virtio_vsock_device(vsock_cfg)?); 3011 } 3012 self.config.lock().unwrap().vsock = vsock; 3013 3014 Ok(devices) 3015 } 3016 3017 fn make_virtio_mem_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 3018 let mut devices = Vec::new(); 3019 3020 let mm = self.memory_manager.clone(); 3021 let mut mm = mm.lock().unwrap(); 3022 for (memory_zone_id, memory_zone) in mm.memory_zones_mut().iter_mut() { 3023 if let Some(virtio_mem_zone) = memory_zone.virtio_mem_zone_mut() { 3024 info!("Creating virtio-mem device: id = {}", memory_zone_id); 3025 3026 let node_id = numa_node_id_from_memory_zone_id(&self.numa_nodes, memory_zone_id) 3027 .map(|i| i as u16); 3028 3029 let virtio_mem_device = Arc::new(Mutex::new( 3030 virtio_devices::Mem::new( 3031 memory_zone_id.clone(), 3032 virtio_mem_zone.region(), 3033 self.seccomp_action.clone(), 3034 node_id, 3035 virtio_mem_zone.hotplugged_size(), 3036 virtio_mem_zone.hugepages(), 3037 self.exit_evt 3038 .try_clone() 3039 .map_err(DeviceManagerError::EventFd)?, 3040 virtio_mem_zone.blocks_state().clone(), 3041 state_from_id(self.snapshot.as_ref(), memory_zone_id.as_str()) 3042 .map_err(DeviceManagerError::RestoreGetState)?, 3043 ) 3044 .map_err(DeviceManagerError::CreateVirtioMem)?, 3045 )); 3046 3047 // Update the virtio-mem zone so that it has a handle onto the 3048 // virtio-mem device, which will be used for triggering a resize 3049 // if needed. 3050 virtio_mem_zone.set_virtio_device(Arc::clone(&virtio_mem_device)); 3051 3052 self.virtio_mem_devices.push(Arc::clone(&virtio_mem_device)); 3053 3054 devices.push(MetaVirtioDevice { 3055 virtio_device: Arc::clone(&virtio_mem_device) 3056 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 3057 iommu: false, 3058 id: memory_zone_id.clone(), 3059 pci_segment: 0, 3060 dma_handler: None, 3061 }); 3062 3063 // Fill the device tree with a new node. In case of restore, we 3064 // know there is nothing to do, so we can simply override the 3065 // existing entry. 3066 self.device_tree.lock().unwrap().insert( 3067 memory_zone_id.clone(), 3068 device_node!(memory_zone_id, virtio_mem_device), 3069 ); 3070 } 3071 } 3072 3073 Ok(devices) 3074 } 3075 3076 #[cfg(feature = "pvmemcontrol")] 3077 fn make_pvmemcontrol_device( 3078 &mut self, 3079 ) -> DeviceManagerResult<( 3080 Arc<PvmemcontrolBusDevice>, 3081 Arc<Mutex<PvmemcontrolPciDevice>>, 3082 )> { 3083 let id = String::from(PVMEMCONTROL_DEVICE_NAME); 3084 let pci_segment_id = 0x0_u16; 3085 3086 let (pci_segment_id, pci_device_bdf, resources) = 3087 self.pci_resources(&id, pci_segment_id)?; 3088 3089 info!("Creating pvmemcontrol device: id = {}", id); 3090 let (pvmemcontrol_pci_device, pvmemcontrol_bus_device) = 3091 devices::pvmemcontrol::PvmemcontrolDevice::make_device( 3092 id.clone(), 3093 self.memory_manager.lock().unwrap().guest_memory(), 3094 ); 3095 3096 let pvmemcontrol_pci_device = Arc::new(Mutex::new(pvmemcontrol_pci_device)); 3097 let pvmemcontrol_bus_device = Arc::new(pvmemcontrol_bus_device); 3098 3099 let new_resources = self.add_pci_device( 3100 pvmemcontrol_bus_device.clone(), 3101 pvmemcontrol_pci_device.clone(), 3102 pci_segment_id, 3103 pci_device_bdf, 3104 resources, 3105 )?; 3106 3107 let mut node = device_node!(id, pvmemcontrol_pci_device); 3108 3109 node.resources = new_resources; 3110 node.pci_bdf = Some(pci_device_bdf); 3111 node.pci_device_handle = None; 3112 3113 self.device_tree.lock().unwrap().insert(id, node); 3114 3115 Ok((pvmemcontrol_bus_device, pvmemcontrol_pci_device)) 3116 } 3117 3118 fn make_virtio_balloon_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 3119 let mut devices = Vec::new(); 3120 3121 if let Some(balloon_config) = &self.config.lock().unwrap().balloon { 3122 let id = String::from(BALLOON_DEVICE_NAME); 3123 info!("Creating virtio-balloon device: id = {}", id); 3124 3125 let virtio_balloon_device = Arc::new(Mutex::new( 3126 virtio_devices::Balloon::new( 3127 id.clone(), 3128 balloon_config.size, 3129 balloon_config.deflate_on_oom, 3130 balloon_config.free_page_reporting, 3131 self.seccomp_action.clone(), 3132 self.exit_evt 3133 .try_clone() 3134 .map_err(DeviceManagerError::EventFd)?, 3135 state_from_id(self.snapshot.as_ref(), id.as_str()) 3136 .map_err(DeviceManagerError::RestoreGetState)?, 3137 ) 3138 .map_err(DeviceManagerError::CreateVirtioBalloon)?, 3139 )); 3140 3141 self.balloon = Some(virtio_balloon_device.clone()); 3142 3143 devices.push(MetaVirtioDevice { 3144 virtio_device: Arc::clone(&virtio_balloon_device) 3145 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 3146 iommu: false, 3147 id: id.clone(), 3148 pci_segment: 0, 3149 dma_handler: None, 3150 }); 3151 3152 self.device_tree 3153 .lock() 3154 .unwrap() 3155 .insert(id.clone(), device_node!(id, virtio_balloon_device)); 3156 } 3157 3158 Ok(devices) 3159 } 3160 3161 fn make_virtio_watchdog_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 3162 let mut devices = Vec::new(); 3163 3164 if !self.config.lock().unwrap().watchdog { 3165 return Ok(devices); 3166 } 3167 3168 let id = String::from(WATCHDOG_DEVICE_NAME); 3169 info!("Creating virtio-watchdog device: id = {}", id); 3170 3171 let virtio_watchdog_device = Arc::new(Mutex::new( 3172 virtio_devices::Watchdog::new( 3173 id.clone(), 3174 self.reset_evt.try_clone().unwrap(), 3175 self.seccomp_action.clone(), 3176 self.exit_evt 3177 .try_clone() 3178 .map_err(DeviceManagerError::EventFd)?, 3179 state_from_id(self.snapshot.as_ref(), id.as_str()) 3180 .map_err(DeviceManagerError::RestoreGetState)?, 3181 ) 3182 .map_err(DeviceManagerError::CreateVirtioWatchdog)?, 3183 )); 3184 devices.push(MetaVirtioDevice { 3185 virtio_device: Arc::clone(&virtio_watchdog_device) 3186 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 3187 iommu: false, 3188 id: id.clone(), 3189 pci_segment: 0, 3190 dma_handler: None, 3191 }); 3192 3193 self.device_tree 3194 .lock() 3195 .unwrap() 3196 .insert(id.clone(), device_node!(id, virtio_watchdog_device)); 3197 3198 Ok(devices) 3199 } 3200 3201 fn make_vdpa_device( 3202 &mut self, 3203 vdpa_cfg: &mut VdpaConfig, 3204 ) -> DeviceManagerResult<MetaVirtioDevice> { 3205 let id = if let Some(id) = &vdpa_cfg.id { 3206 id.clone() 3207 } else { 3208 let id = self.next_device_name(VDPA_DEVICE_NAME_PREFIX)?; 3209 vdpa_cfg.id = Some(id.clone()); 3210 id 3211 }; 3212 3213 info!("Creating vDPA device: {:?}", vdpa_cfg); 3214 3215 let device_path = vdpa_cfg 3216 .path 3217 .to_str() 3218 .ok_or(DeviceManagerError::CreateVdpaConvertPath)?; 3219 3220 let vdpa_device = Arc::new(Mutex::new( 3221 virtio_devices::Vdpa::new( 3222 id.clone(), 3223 device_path, 3224 self.memory_manager.lock().unwrap().guest_memory(), 3225 vdpa_cfg.num_queues as u16, 3226 state_from_id(self.snapshot.as_ref(), id.as_str()) 3227 .map_err(DeviceManagerError::RestoreGetState)?, 3228 ) 3229 .map_err(DeviceManagerError::CreateVdpa)?, 3230 )); 3231 3232 // Create the DMA handler that is required by the vDPA device 3233 let vdpa_mapping = Arc::new(VdpaDmaMapping::new( 3234 Arc::clone(&vdpa_device), 3235 Arc::new(self.memory_manager.lock().unwrap().guest_memory()), 3236 )); 3237 3238 self.device_tree 3239 .lock() 3240 .unwrap() 3241 .insert(id.clone(), device_node!(id, vdpa_device)); 3242 3243 Ok(MetaVirtioDevice { 3244 virtio_device: vdpa_device as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 3245 iommu: vdpa_cfg.iommu, 3246 id, 3247 pci_segment: vdpa_cfg.pci_segment, 3248 dma_handler: Some(vdpa_mapping), 3249 }) 3250 } 3251 3252 fn make_vdpa_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 3253 let mut devices = Vec::new(); 3254 // Add vdpa if required 3255 let mut vdpa_devices = self.config.lock().unwrap().vdpa.clone(); 3256 if let Some(vdpa_list_cfg) = &mut vdpa_devices { 3257 for vdpa_cfg in vdpa_list_cfg.iter_mut() { 3258 devices.push(self.make_vdpa_device(vdpa_cfg)?); 3259 } 3260 } 3261 self.config.lock().unwrap().vdpa = vdpa_devices; 3262 3263 Ok(devices) 3264 } 3265 3266 fn next_device_name(&mut self, prefix: &str) -> DeviceManagerResult<String> { 3267 let start_id = self.device_id_cnt; 3268 loop { 3269 // Generate the temporary name. 3270 let name = format!("{}{}", prefix, self.device_id_cnt); 3271 // Increment the counter. 3272 self.device_id_cnt += Wrapping(1); 3273 // Check if the name is already in use. 3274 if !self.boot_id_list.contains(&name) 3275 && !self.device_tree.lock().unwrap().contains_key(&name) 3276 { 3277 return Ok(name); 3278 } 3279 3280 if self.device_id_cnt == start_id { 3281 // We went through a full loop and there's nothing else we can 3282 // do. 3283 break; 3284 } 3285 } 3286 Err(DeviceManagerError::NoAvailableDeviceName) 3287 } 3288 3289 fn add_passthrough_device( 3290 &mut self, 3291 device_cfg: &mut DeviceConfig, 3292 ) -> DeviceManagerResult<(PciBdf, String)> { 3293 // If the passthrough device has not been created yet, it is created 3294 // here and stored in the DeviceManager structure for future needs. 3295 if self.passthrough_device.is_none() { 3296 self.passthrough_device = Some( 3297 self.address_manager 3298 .vm 3299 .create_passthrough_device() 3300 .map_err(|e| DeviceManagerError::CreatePassthroughDevice(e.into()))?, 3301 ); 3302 } 3303 3304 self.add_vfio_device(device_cfg) 3305 } 3306 3307 fn create_vfio_container(&self) -> DeviceManagerResult<Arc<VfioContainer>> { 3308 let passthrough_device = self 3309 .passthrough_device 3310 .as_ref() 3311 .ok_or(DeviceManagerError::NoDevicePassthroughSupport)?; 3312 3313 let dup = passthrough_device 3314 .try_clone() 3315 .map_err(DeviceManagerError::VfioCreate)?; 3316 3317 Ok(Arc::new( 3318 VfioContainer::new(Some(Arc::new(dup))).map_err(DeviceManagerError::VfioCreate)?, 3319 )) 3320 } 3321 3322 fn add_vfio_device( 3323 &mut self, 3324 device_cfg: &mut DeviceConfig, 3325 ) -> DeviceManagerResult<(PciBdf, String)> { 3326 let vfio_name = if let Some(id) = &device_cfg.id { 3327 id.clone() 3328 } else { 3329 let id = self.next_device_name(VFIO_DEVICE_NAME_PREFIX)?; 3330 device_cfg.id = Some(id.clone()); 3331 id 3332 }; 3333 3334 let (pci_segment_id, pci_device_bdf, resources) = 3335 self.pci_resources(&vfio_name, device_cfg.pci_segment)?; 3336 3337 let mut needs_dma_mapping = false; 3338 3339 // Here we create a new VFIO container for two reasons. Either this is 3340 // the first VFIO device, meaning we need a new VFIO container, which 3341 // will be shared with other VFIO devices. Or the new VFIO device is 3342 // attached to a vIOMMU, meaning we must create a dedicated VFIO 3343 // container. In the vIOMMU use case, we can't let all devices under 3344 // the same VFIO container since we couldn't map/unmap memory for each 3345 // device. That's simply because the map/unmap operations happen at the 3346 // VFIO container level. 3347 let vfio_container = if device_cfg.iommu { 3348 let vfio_container = self.create_vfio_container()?; 3349 3350 let vfio_mapping = Arc::new(VfioDmaMapping::new( 3351 Arc::clone(&vfio_container), 3352 Arc::new(self.memory_manager.lock().unwrap().guest_memory()), 3353 Arc::clone(&self.mmio_regions), 3354 )); 3355 3356 if let Some(iommu) = &self.iommu_device { 3357 iommu 3358 .lock() 3359 .unwrap() 3360 .add_external_mapping(pci_device_bdf.into(), vfio_mapping); 3361 } else { 3362 return Err(DeviceManagerError::MissingVirtualIommu); 3363 } 3364 3365 vfio_container 3366 } else if let Some(vfio_container) = &self.vfio_container { 3367 Arc::clone(vfio_container) 3368 } else { 3369 let vfio_container = self.create_vfio_container()?; 3370 needs_dma_mapping = true; 3371 self.vfio_container = Some(Arc::clone(&vfio_container)); 3372 3373 vfio_container 3374 }; 3375 3376 let vfio_device = VfioDevice::new(&device_cfg.path, Arc::clone(&vfio_container)) 3377 .map_err(DeviceManagerError::VfioCreate)?; 3378 3379 if needs_dma_mapping { 3380 // Register DMA mapping in IOMMU. 3381 // Do not register virtio-mem regions, as they are handled directly by 3382 // virtio-mem device itself. 3383 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() { 3384 for region in zone.regions() { 3385 vfio_container 3386 .vfio_dma_map( 3387 region.start_addr().raw_value(), 3388 region.len(), 3389 region.as_ptr() as u64, 3390 ) 3391 .map_err(DeviceManagerError::VfioDmaMap)?; 3392 } 3393 } 3394 3395 let vfio_mapping = Arc::new(VfioDmaMapping::new( 3396 Arc::clone(&vfio_container), 3397 Arc::new(self.memory_manager.lock().unwrap().guest_memory()), 3398 Arc::clone(&self.mmio_regions), 3399 )); 3400 3401 for virtio_mem_device in self.virtio_mem_devices.iter() { 3402 virtio_mem_device 3403 .lock() 3404 .unwrap() 3405 .add_dma_mapping_handler( 3406 VirtioMemMappingSource::Container, 3407 vfio_mapping.clone(), 3408 ) 3409 .map_err(DeviceManagerError::AddDmaMappingHandlerVirtioMem)?; 3410 } 3411 } 3412 3413 let legacy_interrupt_group = 3414 if let Some(legacy_interrupt_manager) = &self.legacy_interrupt_manager { 3415 Some( 3416 legacy_interrupt_manager 3417 .create_group(LegacyIrqGroupConfig { 3418 irq: self.pci_segments[pci_segment_id as usize].pci_irq_slots 3419 [pci_device_bdf.device() as usize] 3420 as InterruptIndex, 3421 }) 3422 .map_err(DeviceManagerError::CreateInterruptGroup)?, 3423 ) 3424 } else { 3425 None 3426 }; 3427 3428 let memory_manager = self.memory_manager.clone(); 3429 3430 let vfio_pci_device = VfioPciDevice::new( 3431 vfio_name.clone(), 3432 &self.address_manager.vm, 3433 vfio_device, 3434 vfio_container, 3435 self.msi_interrupt_manager.clone(), 3436 legacy_interrupt_group, 3437 device_cfg.iommu, 3438 pci_device_bdf, 3439 Arc::new(move || memory_manager.lock().unwrap().allocate_memory_slot()), 3440 vm_migration::snapshot_from_id(self.snapshot.as_ref(), vfio_name.as_str()), 3441 device_cfg.x_nv_gpudirect_clique, 3442 ) 3443 .map_err(DeviceManagerError::VfioPciCreate)?; 3444 3445 let vfio_pci_device = Arc::new(Mutex::new(vfio_pci_device)); 3446 3447 let new_resources = self.add_pci_device( 3448 vfio_pci_device.clone(), 3449 vfio_pci_device.clone(), 3450 pci_segment_id, 3451 pci_device_bdf, 3452 resources, 3453 )?; 3454 3455 vfio_pci_device 3456 .lock() 3457 .unwrap() 3458 .map_mmio_regions() 3459 .map_err(DeviceManagerError::VfioMapRegion)?; 3460 3461 for mmio_region in vfio_pci_device.lock().unwrap().mmio_regions() { 3462 self.mmio_regions.lock().unwrap().push(mmio_region); 3463 } 3464 3465 let mut node = device_node!(vfio_name, vfio_pci_device); 3466 3467 // Update the device tree with correct resource information. 3468 node.resources = new_resources; 3469 node.pci_bdf = Some(pci_device_bdf); 3470 node.pci_device_handle = Some(PciDeviceHandle::Vfio(vfio_pci_device)); 3471 3472 self.device_tree 3473 .lock() 3474 .unwrap() 3475 .insert(vfio_name.clone(), node); 3476 3477 Ok((pci_device_bdf, vfio_name)) 3478 } 3479 3480 fn add_pci_device( 3481 &mut self, 3482 bus_device: Arc<dyn BusDeviceSync>, 3483 pci_device: Arc<Mutex<dyn PciDevice>>, 3484 segment_id: u16, 3485 bdf: PciBdf, 3486 resources: Option<Vec<Resource>>, 3487 ) -> DeviceManagerResult<Vec<Resource>> { 3488 let bars = pci_device 3489 .lock() 3490 .unwrap() 3491 .allocate_bars( 3492 &self.address_manager.allocator, 3493 &mut self.pci_segments[segment_id as usize] 3494 .mem32_allocator 3495 .lock() 3496 .unwrap(), 3497 &mut self.pci_segments[segment_id as usize] 3498 .mem64_allocator 3499 .lock() 3500 .unwrap(), 3501 resources, 3502 ) 3503 .map_err(DeviceManagerError::AllocateBars)?; 3504 3505 let mut pci_bus = self.pci_segments[segment_id as usize] 3506 .pci_bus 3507 .lock() 3508 .unwrap(); 3509 3510 pci_bus 3511 .add_device(bdf.device() as u32, pci_device) 3512 .map_err(DeviceManagerError::AddPciDevice)?; 3513 3514 self.bus_devices.push(Arc::clone(&bus_device)); 3515 3516 pci_bus 3517 .register_mapping( 3518 bus_device, 3519 #[cfg(target_arch = "x86_64")] 3520 self.address_manager.io_bus.as_ref(), 3521 self.address_manager.mmio_bus.as_ref(), 3522 bars.clone(), 3523 ) 3524 .map_err(DeviceManagerError::AddPciDevice)?; 3525 3526 let mut new_resources = Vec::new(); 3527 for bar in bars { 3528 new_resources.push(Resource::PciBar { 3529 index: bar.idx(), 3530 base: bar.addr(), 3531 size: bar.size(), 3532 type_: bar.region_type().into(), 3533 prefetchable: bar.prefetchable().into(), 3534 }); 3535 } 3536 3537 Ok(new_resources) 3538 } 3539 3540 fn add_vfio_devices(&mut self) -> DeviceManagerResult<Vec<PciBdf>> { 3541 let mut iommu_attached_device_ids = Vec::new(); 3542 let mut devices = self.config.lock().unwrap().devices.clone(); 3543 3544 if let Some(device_list_cfg) = &mut devices { 3545 for device_cfg in device_list_cfg.iter_mut() { 3546 let (device_id, _) = self.add_passthrough_device(device_cfg)?; 3547 if device_cfg.iommu && self.iommu_device.is_some() { 3548 iommu_attached_device_ids.push(device_id); 3549 } 3550 } 3551 } 3552 3553 // Update the list of devices 3554 self.config.lock().unwrap().devices = devices; 3555 3556 Ok(iommu_attached_device_ids) 3557 } 3558 3559 fn add_vfio_user_device( 3560 &mut self, 3561 device_cfg: &mut UserDeviceConfig, 3562 ) -> DeviceManagerResult<(PciBdf, String)> { 3563 let vfio_user_name = if let Some(id) = &device_cfg.id { 3564 id.clone() 3565 } else { 3566 let id = self.next_device_name(VFIO_USER_DEVICE_NAME_PREFIX)?; 3567 device_cfg.id = Some(id.clone()); 3568 id 3569 }; 3570 3571 let (pci_segment_id, pci_device_bdf, resources) = 3572 self.pci_resources(&vfio_user_name, device_cfg.pci_segment)?; 3573 3574 let legacy_interrupt_group = 3575 if let Some(legacy_interrupt_manager) = &self.legacy_interrupt_manager { 3576 Some( 3577 legacy_interrupt_manager 3578 .create_group(LegacyIrqGroupConfig { 3579 irq: self.pci_segments[pci_segment_id as usize].pci_irq_slots 3580 [pci_device_bdf.device() as usize] 3581 as InterruptIndex, 3582 }) 3583 .map_err(DeviceManagerError::CreateInterruptGroup)?, 3584 ) 3585 } else { 3586 None 3587 }; 3588 3589 let client = Arc::new(Mutex::new( 3590 vfio_user::Client::new(&device_cfg.socket) 3591 .map_err(DeviceManagerError::VfioUserCreateClient)?, 3592 )); 3593 3594 let memory_manager = self.memory_manager.clone(); 3595 3596 let mut vfio_user_pci_device = VfioUserPciDevice::new( 3597 vfio_user_name.clone(), 3598 &self.address_manager.vm, 3599 client.clone(), 3600 self.msi_interrupt_manager.clone(), 3601 legacy_interrupt_group, 3602 pci_device_bdf, 3603 Arc::new(move || memory_manager.lock().unwrap().allocate_memory_slot()), 3604 vm_migration::snapshot_from_id(self.snapshot.as_ref(), vfio_user_name.as_str()), 3605 ) 3606 .map_err(DeviceManagerError::VfioUserCreate)?; 3607 3608 let memory = self.memory_manager.lock().unwrap().guest_memory(); 3609 let vfio_user_mapping = Arc::new(VfioUserDmaMapping::new(client, Arc::new(memory))); 3610 for virtio_mem_device in self.virtio_mem_devices.iter() { 3611 virtio_mem_device 3612 .lock() 3613 .unwrap() 3614 .add_dma_mapping_handler( 3615 VirtioMemMappingSource::Device(pci_device_bdf.into()), 3616 vfio_user_mapping.clone(), 3617 ) 3618 .map_err(DeviceManagerError::AddDmaMappingHandlerVirtioMem)?; 3619 } 3620 3621 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() { 3622 for region in zone.regions() { 3623 vfio_user_pci_device 3624 .dma_map(region) 3625 .map_err(DeviceManagerError::VfioUserDmaMap)?; 3626 } 3627 } 3628 3629 let vfio_user_pci_device = Arc::new(Mutex::new(vfio_user_pci_device)); 3630 3631 let new_resources = self.add_pci_device( 3632 vfio_user_pci_device.clone(), 3633 vfio_user_pci_device.clone(), 3634 pci_segment_id, 3635 pci_device_bdf, 3636 resources, 3637 )?; 3638 3639 // Note it is required to call 'add_pci_device()' in advance to have the list of 3640 // mmio regions provisioned correctly 3641 vfio_user_pci_device 3642 .lock() 3643 .unwrap() 3644 .map_mmio_regions() 3645 .map_err(DeviceManagerError::VfioUserMapRegion)?; 3646 3647 let mut node = device_node!(vfio_user_name, vfio_user_pci_device); 3648 3649 // Update the device tree with correct resource information. 3650 node.resources = new_resources; 3651 node.pci_bdf = Some(pci_device_bdf); 3652 node.pci_device_handle = Some(PciDeviceHandle::VfioUser(vfio_user_pci_device)); 3653 3654 self.device_tree 3655 .lock() 3656 .unwrap() 3657 .insert(vfio_user_name.clone(), node); 3658 3659 Ok((pci_device_bdf, vfio_user_name)) 3660 } 3661 3662 fn add_user_devices(&mut self) -> DeviceManagerResult<Vec<PciBdf>> { 3663 let mut user_devices = self.config.lock().unwrap().user_devices.clone(); 3664 3665 if let Some(device_list_cfg) = &mut user_devices { 3666 for device_cfg in device_list_cfg.iter_mut() { 3667 let (_device_id, _id) = self.add_vfio_user_device(device_cfg)?; 3668 } 3669 } 3670 3671 // Update the list of devices 3672 self.config.lock().unwrap().user_devices = user_devices; 3673 3674 Ok(vec![]) 3675 } 3676 3677 fn add_virtio_pci_device( 3678 &mut self, 3679 virtio_device: Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 3680 iommu_mapping: &Option<Arc<IommuMapping>>, 3681 virtio_device_id: String, 3682 pci_segment_id: u16, 3683 dma_handler: Option<Arc<dyn ExternalDmaMapping>>, 3684 ) -> DeviceManagerResult<PciBdf> { 3685 let id = format!("{VIRTIO_PCI_DEVICE_NAME_PREFIX}-{virtio_device_id}"); 3686 3687 // Add the new virtio-pci node to the device tree. 3688 let mut node = device_node!(id); 3689 node.children = vec![virtio_device_id.clone()]; 3690 3691 let (pci_segment_id, pci_device_bdf, resources) = 3692 self.pci_resources(&id, pci_segment_id)?; 3693 3694 // Update the existing virtio node by setting the parent. 3695 if let Some(node) = self.device_tree.lock().unwrap().get_mut(&virtio_device_id) { 3696 node.parent = Some(id.clone()); 3697 } else { 3698 return Err(DeviceManagerError::MissingNode); 3699 } 3700 3701 // Allows support for one MSI-X vector per queue. It also adds 1 3702 // as we need to take into account the dedicated vector to notify 3703 // about a virtio config change. 3704 let msix_num = (virtio_device.lock().unwrap().queue_max_sizes().len() + 1) as u16; 3705 3706 // Create the AccessPlatform trait from the implementation IommuMapping. 3707 // This will provide address translation for any virtio device sitting 3708 // behind a vIOMMU. 3709 let mut access_platform: Option<Arc<dyn AccessPlatform>> = None; 3710 3711 if let Some(mapping) = iommu_mapping { 3712 access_platform = Some(Arc::new(AccessPlatformMapping::new( 3713 pci_device_bdf.into(), 3714 mapping.clone(), 3715 ))); 3716 } 3717 3718 // If SEV-SNP is enabled create the AccessPlatform from SevSnpPageAccessProxy 3719 #[cfg(feature = "sev_snp")] 3720 if self.config.lock().unwrap().is_sev_snp_enabled() { 3721 access_platform = Some(Arc::new(SevSnpPageAccessProxy::new( 3722 self.address_manager.vm.clone(), 3723 ))); 3724 } 3725 3726 let memory = self.memory_manager.lock().unwrap().guest_memory(); 3727 3728 // Map DMA ranges if a DMA handler is available and if the device is 3729 // not attached to a virtual IOMMU. 3730 if let Some(dma_handler) = &dma_handler { 3731 if iommu_mapping.is_some() { 3732 if let Some(iommu) = &self.iommu_device { 3733 iommu 3734 .lock() 3735 .unwrap() 3736 .add_external_mapping(pci_device_bdf.into(), dma_handler.clone()); 3737 } else { 3738 return Err(DeviceManagerError::MissingVirtualIommu); 3739 } 3740 } else { 3741 // Let every virtio-mem device handle the DMA map/unmap through the 3742 // DMA handler provided. 3743 for virtio_mem_device in self.virtio_mem_devices.iter() { 3744 virtio_mem_device 3745 .lock() 3746 .unwrap() 3747 .add_dma_mapping_handler( 3748 VirtioMemMappingSource::Device(pci_device_bdf.into()), 3749 dma_handler.clone(), 3750 ) 3751 .map_err(DeviceManagerError::AddDmaMappingHandlerVirtioMem)?; 3752 } 3753 3754 // Do not register virtio-mem regions, as they are handled directly by 3755 // virtio-mem devices. 3756 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() { 3757 for region in zone.regions() { 3758 let gpa = region.start_addr().0; 3759 let size = region.len(); 3760 dma_handler 3761 .map(gpa, gpa, size) 3762 .map_err(DeviceManagerError::VirtioDmaMap)?; 3763 } 3764 } 3765 } 3766 } 3767 3768 let device_type = virtio_device.lock().unwrap().device_type(); 3769 let virtio_pci_device = Arc::new(Mutex::new( 3770 VirtioPciDevice::new( 3771 id.clone(), 3772 memory, 3773 virtio_device, 3774 msix_num, 3775 access_platform, 3776 &self.msi_interrupt_manager, 3777 pci_device_bdf.into(), 3778 self.activate_evt 3779 .try_clone() 3780 .map_err(DeviceManagerError::EventFd)?, 3781 // All device types *except* virtio block devices should be allocated a 64-bit bar 3782 // The block devices should be given a 32-bit BAR so that they are easily accessible 3783 // to firmware without requiring excessive identity mapping. 3784 // The exception being if not on the default PCI segment. 3785 pci_segment_id > 0 || device_type != VirtioDeviceType::Block as u32, 3786 dma_handler, 3787 self.pending_activations.clone(), 3788 vm_migration::snapshot_from_id(self.snapshot.as_ref(), id.as_str()), 3789 ) 3790 .map_err(DeviceManagerError::VirtioDevice)?, 3791 )); 3792 3793 let new_resources = self.add_pci_device( 3794 virtio_pci_device.clone(), 3795 virtio_pci_device.clone(), 3796 pci_segment_id, 3797 pci_device_bdf, 3798 resources, 3799 )?; 3800 3801 let bar_addr = virtio_pci_device.lock().unwrap().config_bar_addr(); 3802 for (event, addr) in virtio_pci_device.lock().unwrap().ioeventfds(bar_addr) { 3803 let io_addr = IoEventAddress::Mmio(addr); 3804 self.address_manager 3805 .vm 3806 .register_ioevent(event, &io_addr, None) 3807 .map_err(|e| DeviceManagerError::RegisterIoevent(e.into()))?; 3808 } 3809 3810 // Update the device tree with correct resource information. 3811 node.resources = new_resources; 3812 node.migratable = Some(Arc::clone(&virtio_pci_device) as Arc<Mutex<dyn Migratable>>); 3813 node.pci_bdf = Some(pci_device_bdf); 3814 node.pci_device_handle = Some(PciDeviceHandle::Virtio(virtio_pci_device)); 3815 self.device_tree.lock().unwrap().insert(id, node); 3816 3817 Ok(pci_device_bdf) 3818 } 3819 3820 fn add_pvpanic_device( 3821 &mut self, 3822 ) -> DeviceManagerResult<Option<Arc<Mutex<devices::PvPanicDevice>>>> { 3823 let id = String::from(PVPANIC_DEVICE_NAME); 3824 let pci_segment_id = 0x0_u16; 3825 3826 info!("Creating pvpanic device {}", id); 3827 3828 let (pci_segment_id, pci_device_bdf, resources) = 3829 self.pci_resources(&id, pci_segment_id)?; 3830 3831 let snapshot = snapshot_from_id(self.snapshot.as_ref(), id.as_str()); 3832 3833 let pvpanic_device = devices::PvPanicDevice::new(id.clone(), snapshot) 3834 .map_err(DeviceManagerError::PvPanicCreate)?; 3835 3836 let pvpanic_device = Arc::new(Mutex::new(pvpanic_device)); 3837 3838 let new_resources = self.add_pci_device( 3839 pvpanic_device.clone(), 3840 pvpanic_device.clone(), 3841 pci_segment_id, 3842 pci_device_bdf, 3843 resources, 3844 )?; 3845 3846 let mut node = device_node!(id, pvpanic_device); 3847 3848 node.resources = new_resources; 3849 node.pci_bdf = Some(pci_device_bdf); 3850 node.pci_device_handle = None; 3851 3852 self.device_tree.lock().unwrap().insert(id, node); 3853 3854 Ok(Some(pvpanic_device)) 3855 } 3856 3857 fn pci_resources( 3858 &self, 3859 id: &str, 3860 pci_segment_id: u16, 3861 ) -> DeviceManagerResult<(u16, PciBdf, Option<Vec<Resource>>)> { 3862 // Look for the id in the device tree. If it can be found, that means 3863 // the device is being restored, otherwise it's created from scratch. 3864 Ok( 3865 if let Some(node) = self.device_tree.lock().unwrap().get(id) { 3866 info!("Restoring virtio-pci {} resources", id); 3867 let pci_device_bdf: PciBdf = node 3868 .pci_bdf 3869 .ok_or(DeviceManagerError::MissingDeviceNodePciBdf)?; 3870 let pci_segment_id = pci_device_bdf.segment(); 3871 3872 self.pci_segments[pci_segment_id as usize] 3873 .pci_bus 3874 .lock() 3875 .unwrap() 3876 .get_device_id(pci_device_bdf.device() as usize) 3877 .map_err(DeviceManagerError::GetPciDeviceId)?; 3878 3879 (pci_segment_id, pci_device_bdf, Some(node.resources.clone())) 3880 } else { 3881 let pci_device_bdf = 3882 self.pci_segments[pci_segment_id as usize].next_device_bdf()?; 3883 3884 (pci_segment_id, pci_device_bdf, None) 3885 }, 3886 ) 3887 } 3888 3889 #[cfg(target_arch = "x86_64")] 3890 pub fn io_bus(&self) -> &Arc<Bus> { 3891 &self.address_manager.io_bus 3892 } 3893 3894 pub fn mmio_bus(&self) -> &Arc<Bus> { 3895 &self.address_manager.mmio_bus 3896 } 3897 3898 pub fn allocator(&self) -> &Arc<Mutex<SystemAllocator>> { 3899 &self.address_manager.allocator 3900 } 3901 3902 pub fn interrupt_controller(&self) -> Option<Arc<Mutex<dyn InterruptController>>> { 3903 self.interrupt_controller 3904 .as_ref() 3905 .map(|ic| ic.clone() as Arc<Mutex<dyn InterruptController>>) 3906 } 3907 3908 pub(crate) fn pci_segments(&self) -> &Vec<PciSegment> { 3909 &self.pci_segments 3910 } 3911 3912 #[cfg(target_arch = "aarch64")] 3913 pub fn cmdline_additions(&self) -> &[String] { 3914 self.cmdline_additions.as_slice() 3915 } 3916 3917 pub fn update_memory(&self, new_region: &Arc<GuestRegionMmap>) -> DeviceManagerResult<()> { 3918 for handle in self.virtio_devices.iter() { 3919 handle 3920 .virtio_device 3921 .lock() 3922 .unwrap() 3923 .add_memory_region(new_region) 3924 .map_err(DeviceManagerError::UpdateMemoryForVirtioDevice)?; 3925 3926 if let Some(dma_handler) = &handle.dma_handler { 3927 if !handle.iommu { 3928 let gpa = new_region.start_addr().0; 3929 let size = new_region.len(); 3930 dma_handler 3931 .map(gpa, gpa, size) 3932 .map_err(DeviceManagerError::VirtioDmaMap)?; 3933 } 3934 } 3935 } 3936 3937 // Take care of updating the memory for VFIO PCI devices. 3938 if let Some(vfio_container) = &self.vfio_container { 3939 vfio_container 3940 .vfio_dma_map( 3941 new_region.start_addr().raw_value(), 3942 new_region.len(), 3943 new_region.as_ptr() as u64, 3944 ) 3945 .map_err(DeviceManagerError::UpdateMemoryForVfioPciDevice)?; 3946 } 3947 3948 // Take care of updating the memory for vfio-user devices. 3949 { 3950 let device_tree = self.device_tree.lock().unwrap(); 3951 for pci_device_node in device_tree.pci_devices() { 3952 if let PciDeviceHandle::VfioUser(vfio_user_pci_device) = pci_device_node 3953 .pci_device_handle 3954 .as_ref() 3955 .ok_or(DeviceManagerError::MissingPciDevice)? 3956 { 3957 vfio_user_pci_device 3958 .lock() 3959 .unwrap() 3960 .dma_map(new_region) 3961 .map_err(DeviceManagerError::UpdateMemoryForVfioUserPciDevice)?; 3962 } 3963 } 3964 } 3965 3966 Ok(()) 3967 } 3968 3969 pub fn activate_virtio_devices(&self) -> DeviceManagerResult<()> { 3970 for mut activator in self.pending_activations.lock().unwrap().drain(..) { 3971 activator 3972 .activate() 3973 .map_err(DeviceManagerError::VirtioActivate)?; 3974 } 3975 Ok(()) 3976 } 3977 3978 pub fn notify_hotplug( 3979 &self, 3980 _notification_type: AcpiNotificationFlags, 3981 ) -> DeviceManagerResult<()> { 3982 return self 3983 .ged_notification_device 3984 .as_ref() 3985 .unwrap() 3986 .lock() 3987 .unwrap() 3988 .notify(_notification_type) 3989 .map_err(DeviceManagerError::HotPlugNotification); 3990 } 3991 3992 pub fn add_device( 3993 &mut self, 3994 device_cfg: &mut DeviceConfig, 3995 ) -> DeviceManagerResult<PciDeviceInfo> { 3996 self.validate_identifier(&device_cfg.id)?; 3997 3998 if device_cfg.iommu && !self.is_iommu_segment(device_cfg.pci_segment) { 3999 return Err(DeviceManagerError::InvalidIommuHotplug); 4000 } 4001 4002 let (bdf, device_name) = self.add_passthrough_device(device_cfg)?; 4003 4004 // Update the PCIU bitmap 4005 self.pci_segments[device_cfg.pci_segment as usize].pci_devices_up |= 1 << bdf.device(); 4006 4007 Ok(PciDeviceInfo { 4008 id: device_name, 4009 bdf, 4010 }) 4011 } 4012 4013 pub fn add_user_device( 4014 &mut self, 4015 device_cfg: &mut UserDeviceConfig, 4016 ) -> DeviceManagerResult<PciDeviceInfo> { 4017 self.validate_identifier(&device_cfg.id)?; 4018 4019 let (bdf, device_name) = self.add_vfio_user_device(device_cfg)?; 4020 4021 // Update the PCIU bitmap 4022 self.pci_segments[device_cfg.pci_segment as usize].pci_devices_up |= 1 << bdf.device(); 4023 4024 Ok(PciDeviceInfo { 4025 id: device_name, 4026 bdf, 4027 }) 4028 } 4029 4030 pub fn remove_device(&mut self, id: String) -> DeviceManagerResult<()> { 4031 // The node can be directly a PCI node in case the 'id' refers to a 4032 // VFIO device or a virtio-pci one. 4033 // In case the 'id' refers to a virtio device, we must find the PCI 4034 // node by looking at the parent. 4035 let device_tree = self.device_tree.lock().unwrap(); 4036 let node = device_tree 4037 .get(&id) 4038 .ok_or(DeviceManagerError::UnknownDeviceId(id))?; 4039 4040 let pci_device_node = if node.pci_bdf.is_some() && node.pci_device_handle.is_some() { 4041 node 4042 } else { 4043 let parent = node 4044 .parent 4045 .as_ref() 4046 .ok_or(DeviceManagerError::MissingNode)?; 4047 device_tree 4048 .get(parent) 4049 .ok_or(DeviceManagerError::MissingNode)? 4050 }; 4051 4052 let pci_device_bdf: PciBdf = pci_device_node 4053 .pci_bdf 4054 .ok_or(DeviceManagerError::MissingDeviceNodePciBdf)?; 4055 let pci_segment_id = pci_device_bdf.segment(); 4056 4057 let pci_device_handle = pci_device_node 4058 .pci_device_handle 4059 .as_ref() 4060 .ok_or(DeviceManagerError::MissingPciDevice)?; 4061 #[allow(irrefutable_let_patterns)] 4062 if let PciDeviceHandle::Virtio(virtio_pci_device) = pci_device_handle { 4063 let device_type = VirtioDeviceType::from( 4064 virtio_pci_device 4065 .lock() 4066 .unwrap() 4067 .virtio_device() 4068 .lock() 4069 .unwrap() 4070 .device_type(), 4071 ); 4072 match device_type { 4073 VirtioDeviceType::Net 4074 | VirtioDeviceType::Block 4075 | VirtioDeviceType::Pmem 4076 | VirtioDeviceType::Fs 4077 | VirtioDeviceType::Vsock => {} 4078 _ => return Err(DeviceManagerError::RemovalNotAllowed(device_type)), 4079 } 4080 } 4081 4082 // Update the PCID bitmap 4083 self.pci_segments[pci_segment_id as usize].pci_devices_down |= 1 << pci_device_bdf.device(); 4084 4085 Ok(()) 4086 } 4087 4088 pub fn eject_device(&mut self, pci_segment_id: u16, device_id: u8) -> DeviceManagerResult<()> { 4089 info!( 4090 "Ejecting device_id = {} on segment_id={}", 4091 device_id, pci_segment_id 4092 ); 4093 4094 // Convert the device ID into the corresponding b/d/f. 4095 let pci_device_bdf = PciBdf::new(pci_segment_id, 0, device_id, 0); 4096 4097 // Give the PCI device ID back to the PCI bus. 4098 self.pci_segments[pci_segment_id as usize] 4099 .pci_bus 4100 .lock() 4101 .unwrap() 4102 .put_device_id(device_id as usize) 4103 .map_err(DeviceManagerError::PutPciDeviceId)?; 4104 4105 // Remove the device from the device tree along with its children. 4106 let mut device_tree = self.device_tree.lock().unwrap(); 4107 let pci_device_node = device_tree 4108 .remove_node_by_pci_bdf(pci_device_bdf) 4109 .ok_or(DeviceManagerError::MissingPciDevice)?; 4110 4111 // For VFIO and vfio-user the PCI device id is the id. 4112 // For virtio we overwrite it later as we want the id of the 4113 // underlying device. 4114 let mut id = pci_device_node.id; 4115 let pci_device_handle = pci_device_node 4116 .pci_device_handle 4117 .ok_or(DeviceManagerError::MissingPciDevice)?; 4118 if matches!(pci_device_handle, PciDeviceHandle::Virtio(_)) { 4119 // The virtio-pci device has a single child 4120 if !pci_device_node.children.is_empty() { 4121 assert_eq!(pci_device_node.children.len(), 1); 4122 let child_id = &pci_device_node.children[0]; 4123 id.clone_from(child_id); 4124 } 4125 } 4126 for child in pci_device_node.children.iter() { 4127 device_tree.remove(child); 4128 } 4129 4130 let mut iommu_attached = false; 4131 if let Some((_, iommu_attached_devices)) = &self.iommu_attached_devices { 4132 if iommu_attached_devices.contains(&pci_device_bdf) { 4133 iommu_attached = true; 4134 } 4135 } 4136 4137 let (pci_device, bus_device, virtio_device, remove_dma_handler) = match pci_device_handle { 4138 // No need to remove any virtio-mem mapping here as the container outlives all devices 4139 PciDeviceHandle::Vfio(vfio_pci_device) => { 4140 for mmio_region in vfio_pci_device.lock().unwrap().mmio_regions() { 4141 self.mmio_regions 4142 .lock() 4143 .unwrap() 4144 .retain(|x| x.start != mmio_region.start) 4145 } 4146 4147 ( 4148 Arc::clone(&vfio_pci_device) as Arc<Mutex<dyn PciDevice>>, 4149 Arc::clone(&vfio_pci_device) as Arc<dyn BusDeviceSync>, 4150 None as Option<Arc<Mutex<dyn virtio_devices::VirtioDevice>>>, 4151 false, 4152 ) 4153 } 4154 PciDeviceHandle::Virtio(virtio_pci_device) => { 4155 let dev = virtio_pci_device.lock().unwrap(); 4156 let bar_addr = dev.config_bar_addr(); 4157 for (event, addr) in dev.ioeventfds(bar_addr) { 4158 let io_addr = IoEventAddress::Mmio(addr); 4159 self.address_manager 4160 .vm 4161 .unregister_ioevent(event, &io_addr) 4162 .map_err(|e| DeviceManagerError::UnRegisterIoevent(e.into()))?; 4163 } 4164 4165 if let Some(dma_handler) = dev.dma_handler() { 4166 if !iommu_attached { 4167 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() { 4168 for region in zone.regions() { 4169 let iova = region.start_addr().0; 4170 let size = region.len(); 4171 dma_handler 4172 .unmap(iova, size) 4173 .map_err(DeviceManagerError::VirtioDmaUnmap)?; 4174 } 4175 } 4176 } 4177 } 4178 4179 ( 4180 Arc::clone(&virtio_pci_device) as Arc<Mutex<dyn PciDevice>>, 4181 Arc::clone(&virtio_pci_device) as Arc<dyn BusDeviceSync>, 4182 Some(dev.virtio_device()), 4183 dev.dma_handler().is_some() && !iommu_attached, 4184 ) 4185 } 4186 PciDeviceHandle::VfioUser(vfio_user_pci_device) => { 4187 let mut dev = vfio_user_pci_device.lock().unwrap(); 4188 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() { 4189 for region in zone.regions() { 4190 dev.dma_unmap(region) 4191 .map_err(DeviceManagerError::VfioUserDmaUnmap)?; 4192 } 4193 } 4194 4195 ( 4196 Arc::clone(&vfio_user_pci_device) as Arc<Mutex<dyn PciDevice>>, 4197 Arc::clone(&vfio_user_pci_device) as Arc<dyn BusDeviceSync>, 4198 None as Option<Arc<Mutex<dyn virtio_devices::VirtioDevice>>>, 4199 true, 4200 ) 4201 } 4202 }; 4203 4204 if remove_dma_handler { 4205 for virtio_mem_device in self.virtio_mem_devices.iter() { 4206 virtio_mem_device 4207 .lock() 4208 .unwrap() 4209 .remove_dma_mapping_handler(VirtioMemMappingSource::Device( 4210 pci_device_bdf.into(), 4211 )) 4212 .map_err(DeviceManagerError::RemoveDmaMappingHandlerVirtioMem)?; 4213 } 4214 } 4215 4216 // Free the allocated BARs 4217 pci_device 4218 .lock() 4219 .unwrap() 4220 .free_bars( 4221 &mut self.address_manager.allocator.lock().unwrap(), 4222 &mut self.pci_segments[pci_segment_id as usize] 4223 .mem32_allocator 4224 .lock() 4225 .unwrap(), 4226 &mut self.pci_segments[pci_segment_id as usize] 4227 .mem64_allocator 4228 .lock() 4229 .unwrap(), 4230 ) 4231 .map_err(DeviceManagerError::FreePciBars)?; 4232 4233 // Remove the device from the PCI bus 4234 self.pci_segments[pci_segment_id as usize] 4235 .pci_bus 4236 .lock() 4237 .unwrap() 4238 .remove_by_device(&pci_device) 4239 .map_err(DeviceManagerError::RemoveDeviceFromPciBus)?; 4240 4241 #[cfg(target_arch = "x86_64")] 4242 // Remove the device from the IO bus 4243 self.io_bus() 4244 .remove_by_device(&bus_device) 4245 .map_err(DeviceManagerError::RemoveDeviceFromIoBus)?; 4246 4247 // Remove the device from the MMIO bus 4248 self.mmio_bus() 4249 .remove_by_device(&bus_device) 4250 .map_err(DeviceManagerError::RemoveDeviceFromMmioBus)?; 4251 4252 // Remove the device from the list of BusDevice held by the 4253 // DeviceManager. 4254 self.bus_devices 4255 .retain(|dev| !Arc::ptr_eq(dev, &bus_device)); 4256 4257 // Shutdown and remove the underlying virtio-device if present 4258 if let Some(virtio_device) = virtio_device { 4259 for mapping in virtio_device.lock().unwrap().userspace_mappings() { 4260 self.memory_manager 4261 .lock() 4262 .unwrap() 4263 .remove_userspace_mapping( 4264 mapping.addr.raw_value(), 4265 mapping.len, 4266 mapping.host_addr, 4267 mapping.mergeable, 4268 mapping.mem_slot, 4269 ) 4270 .map_err(DeviceManagerError::MemoryManager)?; 4271 } 4272 4273 virtio_device.lock().unwrap().shutdown(); 4274 4275 self.virtio_devices 4276 .retain(|handler| !Arc::ptr_eq(&handler.virtio_device, &virtio_device)); 4277 } 4278 4279 event!( 4280 "vm", 4281 "device-removed", 4282 "id", 4283 &id, 4284 "bdf", 4285 pci_device_bdf.to_string() 4286 ); 4287 4288 // At this point, the device has been removed from all the list and 4289 // buses where it was stored. At the end of this function, after 4290 // any_device, bus_device and pci_device are released, the actual 4291 // device will be dropped. 4292 Ok(()) 4293 } 4294 4295 fn hotplug_virtio_pci_device( 4296 &mut self, 4297 handle: MetaVirtioDevice, 4298 ) -> DeviceManagerResult<PciDeviceInfo> { 4299 // Add the virtio device to the device manager list. This is important 4300 // as the list is used to notify virtio devices about memory updates 4301 // for instance. 4302 self.virtio_devices.push(handle.clone()); 4303 4304 let mapping: Option<Arc<IommuMapping>> = if handle.iommu { 4305 self.iommu_mapping.clone() 4306 } else { 4307 None 4308 }; 4309 4310 let bdf = self.add_virtio_pci_device( 4311 handle.virtio_device, 4312 &mapping, 4313 handle.id.clone(), 4314 handle.pci_segment, 4315 handle.dma_handler, 4316 )?; 4317 4318 // Update the PCIU bitmap 4319 self.pci_segments[handle.pci_segment as usize].pci_devices_up |= 1 << bdf.device(); 4320 4321 Ok(PciDeviceInfo { id: handle.id, bdf }) 4322 } 4323 4324 fn is_iommu_segment(&self, pci_segment_id: u16) -> bool { 4325 self.config 4326 .lock() 4327 .as_ref() 4328 .unwrap() 4329 .platform 4330 .as_ref() 4331 .map(|pc| { 4332 pc.iommu_segments 4333 .as_ref() 4334 .map(|v| v.contains(&pci_segment_id)) 4335 .unwrap_or_default() 4336 }) 4337 .unwrap_or_default() 4338 } 4339 4340 pub fn add_disk(&mut self, disk_cfg: &mut DiskConfig) -> DeviceManagerResult<PciDeviceInfo> { 4341 self.validate_identifier(&disk_cfg.id)?; 4342 4343 if disk_cfg.iommu && !self.is_iommu_segment(disk_cfg.pci_segment) { 4344 return Err(DeviceManagerError::InvalidIommuHotplug); 4345 } 4346 4347 let device = self.make_virtio_block_device(disk_cfg)?; 4348 self.hotplug_virtio_pci_device(device) 4349 } 4350 4351 pub fn add_fs(&mut self, fs_cfg: &mut FsConfig) -> DeviceManagerResult<PciDeviceInfo> { 4352 self.validate_identifier(&fs_cfg.id)?; 4353 4354 let device = self.make_virtio_fs_device(fs_cfg)?; 4355 self.hotplug_virtio_pci_device(device) 4356 } 4357 4358 pub fn add_pmem(&mut self, pmem_cfg: &mut PmemConfig) -> DeviceManagerResult<PciDeviceInfo> { 4359 self.validate_identifier(&pmem_cfg.id)?; 4360 4361 if pmem_cfg.iommu && !self.is_iommu_segment(pmem_cfg.pci_segment) { 4362 return Err(DeviceManagerError::InvalidIommuHotplug); 4363 } 4364 4365 let device = self.make_virtio_pmem_device(pmem_cfg)?; 4366 self.hotplug_virtio_pci_device(device) 4367 } 4368 4369 pub fn add_net(&mut self, net_cfg: &mut NetConfig) -> DeviceManagerResult<PciDeviceInfo> { 4370 self.validate_identifier(&net_cfg.id)?; 4371 4372 if net_cfg.iommu && !self.is_iommu_segment(net_cfg.pci_segment) { 4373 return Err(DeviceManagerError::InvalidIommuHotplug); 4374 } 4375 4376 let device = self.make_virtio_net_device(net_cfg)?; 4377 self.hotplug_virtio_pci_device(device) 4378 } 4379 4380 pub fn add_vdpa(&mut self, vdpa_cfg: &mut VdpaConfig) -> DeviceManagerResult<PciDeviceInfo> { 4381 self.validate_identifier(&vdpa_cfg.id)?; 4382 4383 if vdpa_cfg.iommu && !self.is_iommu_segment(vdpa_cfg.pci_segment) { 4384 return Err(DeviceManagerError::InvalidIommuHotplug); 4385 } 4386 4387 let device = self.make_vdpa_device(vdpa_cfg)?; 4388 self.hotplug_virtio_pci_device(device) 4389 } 4390 4391 pub fn add_vsock(&mut self, vsock_cfg: &mut VsockConfig) -> DeviceManagerResult<PciDeviceInfo> { 4392 self.validate_identifier(&vsock_cfg.id)?; 4393 4394 if vsock_cfg.iommu && !self.is_iommu_segment(vsock_cfg.pci_segment) { 4395 return Err(DeviceManagerError::InvalidIommuHotplug); 4396 } 4397 4398 let device = self.make_virtio_vsock_device(vsock_cfg)?; 4399 self.hotplug_virtio_pci_device(device) 4400 } 4401 4402 pub fn counters(&self) -> HashMap<String, HashMap<&'static str, Wrapping<u64>>> { 4403 let mut counters = HashMap::new(); 4404 4405 for handle in &self.virtio_devices { 4406 let virtio_device = handle.virtio_device.lock().unwrap(); 4407 if let Some(device_counters) = virtio_device.counters() { 4408 counters.insert(handle.id.clone(), device_counters.clone()); 4409 } 4410 } 4411 4412 counters 4413 } 4414 4415 pub fn resize_balloon(&mut self, size: u64) -> DeviceManagerResult<()> { 4416 if let Some(balloon) = &self.balloon { 4417 return balloon 4418 .lock() 4419 .unwrap() 4420 .resize(size) 4421 .map_err(DeviceManagerError::VirtioBalloonResize); 4422 } 4423 4424 warn!("No balloon setup: Can't resize the balloon"); 4425 Err(DeviceManagerError::MissingVirtioBalloon) 4426 } 4427 4428 pub fn balloon_size(&self) -> u64 { 4429 if let Some(balloon) = &self.balloon { 4430 return balloon.lock().unwrap().get_actual(); 4431 } 4432 4433 0 4434 } 4435 4436 pub fn device_tree(&self) -> Arc<Mutex<DeviceTree>> { 4437 self.device_tree.clone() 4438 } 4439 4440 #[cfg(target_arch = "x86_64")] 4441 pub fn notify_power_button(&self) -> DeviceManagerResult<()> { 4442 self.ged_notification_device 4443 .as_ref() 4444 .unwrap() 4445 .lock() 4446 .unwrap() 4447 .notify(AcpiNotificationFlags::POWER_BUTTON_CHANGED) 4448 .map_err(DeviceManagerError::PowerButtonNotification) 4449 } 4450 4451 #[cfg(target_arch = "aarch64")] 4452 pub fn notify_power_button(&self) -> DeviceManagerResult<()> { 4453 // There are two use cases: 4454 // 1. Users will use direct kernel boot with device tree. 4455 // 2. Users will use ACPI+UEFI boot. 4456 4457 // Trigger a GPIO pin 3 event to satisfy use case 1. 4458 self.gpio_device 4459 .as_ref() 4460 .unwrap() 4461 .lock() 4462 .unwrap() 4463 .trigger_key(3) 4464 .map_err(DeviceManagerError::AArch64PowerButtonNotification)?; 4465 // Trigger a GED power button event to satisfy use case 2. 4466 return self 4467 .ged_notification_device 4468 .as_ref() 4469 .unwrap() 4470 .lock() 4471 .unwrap() 4472 .notify(AcpiNotificationFlags::POWER_BUTTON_CHANGED) 4473 .map_err(DeviceManagerError::PowerButtonNotification); 4474 } 4475 4476 pub fn iommu_attached_devices(&self) -> &Option<(PciBdf, Vec<PciBdf>)> { 4477 &self.iommu_attached_devices 4478 } 4479 4480 fn validate_identifier(&self, id: &Option<String>) -> DeviceManagerResult<()> { 4481 if let Some(id) = id { 4482 if id.starts_with("__") { 4483 return Err(DeviceManagerError::InvalidIdentifier(id.clone())); 4484 } 4485 4486 if self.device_tree.lock().unwrap().contains_key(id) { 4487 return Err(DeviceManagerError::IdentifierNotUnique(id.clone())); 4488 } 4489 } 4490 4491 Ok(()) 4492 } 4493 4494 pub(crate) fn acpi_platform_addresses(&self) -> &AcpiPlatformAddresses { 4495 &self.acpi_platform_addresses 4496 } 4497 } 4498 4499 fn numa_node_id_from_memory_zone_id(numa_nodes: &NumaNodes, memory_zone_id: &str) -> Option<u32> { 4500 for (numa_node_id, numa_node) in numa_nodes.iter() { 4501 if numa_node.memory_zones.contains(&memory_zone_id.to_owned()) { 4502 return Some(*numa_node_id); 4503 } 4504 } 4505 4506 None 4507 } 4508 4509 fn numa_node_id_from_pci_segment_id(numa_nodes: &NumaNodes, pci_segment_id: u16) -> u32 { 4510 for (numa_node_id, numa_node) in numa_nodes.iter() { 4511 if numa_node.pci_segments.contains(&pci_segment_id) { 4512 return *numa_node_id; 4513 } 4514 } 4515 4516 0 4517 } 4518 4519 struct TpmDevice {} 4520 4521 impl Aml for TpmDevice { 4522 fn to_aml_bytes(&self, sink: &mut dyn acpi_tables::AmlSink) { 4523 aml::Device::new( 4524 "TPM2".into(), 4525 vec![ 4526 &aml::Name::new("_HID".into(), &"MSFT0101"), 4527 &aml::Name::new("_STA".into(), &(0xF_usize)), 4528 &aml::Name::new( 4529 "_CRS".into(), 4530 &aml::ResourceTemplate::new(vec![&aml::Memory32Fixed::new( 4531 true, 4532 layout::TPM_START.0 as u32, 4533 layout::TPM_SIZE as u32, 4534 )]), 4535 ), 4536 ], 4537 ) 4538 .to_aml_bytes(sink) 4539 } 4540 } 4541 4542 impl Aml for DeviceManager { 4543 fn to_aml_bytes(&self, sink: &mut dyn acpi_tables::AmlSink) { 4544 #[cfg(target_arch = "aarch64")] 4545 use arch::aarch64::DeviceInfoForFdt; 4546 4547 let mut pci_scan_methods = Vec::new(); 4548 for i in 0..self.pci_segments.len() { 4549 pci_scan_methods.push(aml::MethodCall::new( 4550 format!("\\_SB_.PC{i:02X}.PCNT").as_str().into(), 4551 vec![], 4552 )); 4553 } 4554 let mut pci_scan_inner: Vec<&dyn Aml> = Vec::new(); 4555 for method in &pci_scan_methods { 4556 pci_scan_inner.push(method) 4557 } 4558 4559 // PCI hotplug controller 4560 aml::Device::new( 4561 "_SB_.PHPR".into(), 4562 vec![ 4563 &aml::Name::new("_HID".into(), &aml::EISAName::new("PNP0A06")), 4564 &aml::Name::new("_STA".into(), &0x0bu8), 4565 &aml::Name::new("_UID".into(), &"PCI Hotplug Controller"), 4566 &aml::Mutex::new("BLCK".into(), 0), 4567 &aml::Name::new( 4568 "_CRS".into(), 4569 &aml::ResourceTemplate::new(vec![&aml::AddressSpace::new_memory( 4570 aml::AddressSpaceCacheable::NotCacheable, 4571 true, 4572 self.acpi_address.0, 4573 self.acpi_address.0 + DEVICE_MANAGER_ACPI_SIZE as u64 - 1, 4574 None, 4575 )]), 4576 ), 4577 // OpRegion and Fields map MMIO range into individual field values 4578 &aml::OpRegion::new( 4579 "PCST".into(), 4580 aml::OpRegionSpace::SystemMemory, 4581 &(self.acpi_address.0 as usize), 4582 &DEVICE_MANAGER_ACPI_SIZE, 4583 ), 4584 &aml::Field::new( 4585 "PCST".into(), 4586 aml::FieldAccessType::DWord, 4587 aml::FieldLockRule::NoLock, 4588 aml::FieldUpdateRule::WriteAsZeroes, 4589 vec![ 4590 aml::FieldEntry::Named(*b"PCIU", 32), 4591 aml::FieldEntry::Named(*b"PCID", 32), 4592 aml::FieldEntry::Named(*b"B0EJ", 32), 4593 aml::FieldEntry::Named(*b"PSEG", 32), 4594 ], 4595 ), 4596 &aml::Method::new( 4597 "PCEJ".into(), 4598 2, 4599 true, 4600 vec![ 4601 // Take lock defined above 4602 &aml::Acquire::new("BLCK".into(), 0xffff), 4603 // Choose the current segment 4604 &aml::Store::new(&aml::Path::new("PSEG"), &aml::Arg(1)), 4605 // Write PCI bus number (in first argument) to I/O port via field 4606 &aml::ShiftLeft::new(&aml::Path::new("B0EJ"), &aml::ONE, &aml::Arg(0)), 4607 // Release lock 4608 &aml::Release::new("BLCK".into()), 4609 // Return 0 4610 &aml::Return::new(&aml::ZERO), 4611 ], 4612 ), 4613 &aml::Method::new("PSCN".into(), 0, true, pci_scan_inner), 4614 ], 4615 ) 4616 .to_aml_bytes(sink); 4617 4618 for segment in &self.pci_segments { 4619 segment.to_aml_bytes(sink); 4620 } 4621 4622 let mut mbrd_memory = Vec::new(); 4623 4624 for segment in &self.pci_segments { 4625 mbrd_memory.push(aml::Memory32Fixed::new( 4626 true, 4627 segment.mmio_config_address as u32, 4628 layout::PCI_MMIO_CONFIG_SIZE_PER_SEGMENT as u32, 4629 )) 4630 } 4631 4632 let mut mbrd_memory_refs = Vec::new(); 4633 for mbrd_memory_ref in &mbrd_memory { 4634 mbrd_memory_refs.push(mbrd_memory_ref as &dyn Aml); 4635 } 4636 4637 aml::Device::new( 4638 "_SB_.MBRD".into(), 4639 vec![ 4640 &aml::Name::new("_HID".into(), &aml::EISAName::new("PNP0C02")), 4641 &aml::Name::new("_UID".into(), &aml::ZERO), 4642 &aml::Name::new("_CRS".into(), &aml::ResourceTemplate::new(mbrd_memory_refs)), 4643 ], 4644 ) 4645 .to_aml_bytes(sink); 4646 4647 // Serial device 4648 #[cfg(target_arch = "x86_64")] 4649 let serial_irq = 4; 4650 #[cfg(target_arch = "aarch64")] 4651 let serial_irq = 4652 if self.config.lock().unwrap().serial.clone().mode != ConsoleOutputMode::Off { 4653 self.get_device_info() 4654 .clone() 4655 .get(&(DeviceType::Serial, DeviceType::Serial.to_string())) 4656 .unwrap() 4657 .irq() 4658 } else { 4659 // If serial is turned off, add a fake device with invalid irq. 4660 31 4661 }; 4662 if self.config.lock().unwrap().serial.mode != ConsoleOutputMode::Off { 4663 aml::Device::new( 4664 "_SB_.COM1".into(), 4665 vec![ 4666 &aml::Name::new( 4667 "_HID".into(), 4668 #[cfg(target_arch = "x86_64")] 4669 &aml::EISAName::new("PNP0501"), 4670 #[cfg(target_arch = "aarch64")] 4671 &"ARMH0011", 4672 ), 4673 &aml::Name::new("_UID".into(), &aml::ZERO), 4674 &aml::Name::new("_DDN".into(), &"COM1"), 4675 &aml::Name::new( 4676 "_CRS".into(), 4677 &aml::ResourceTemplate::new(vec![ 4678 &aml::Interrupt::new(true, true, false, false, serial_irq), 4679 #[cfg(target_arch = "x86_64")] 4680 &aml::IO::new(0x3f8, 0x3f8, 0, 0x8), 4681 #[cfg(target_arch = "aarch64")] 4682 &aml::Memory32Fixed::new( 4683 true, 4684 arch::layout::LEGACY_SERIAL_MAPPED_IO_START.raw_value() as u32, 4685 MMIO_LEN as u32, 4686 ), 4687 ]), 4688 ), 4689 ], 4690 ) 4691 .to_aml_bytes(sink); 4692 } 4693 4694 aml::Name::new("_S5_".into(), &aml::Package::new(vec![&5u8])).to_aml_bytes(sink); 4695 4696 aml::Device::new( 4697 "_SB_.PWRB".into(), 4698 vec![ 4699 &aml::Name::new("_HID".into(), &aml::EISAName::new("PNP0C0C")), 4700 &aml::Name::new("_UID".into(), &aml::ZERO), 4701 ], 4702 ) 4703 .to_aml_bytes(sink); 4704 4705 if self.config.lock().unwrap().tpm.is_some() { 4706 // Add tpm device 4707 TpmDevice {}.to_aml_bytes(sink); 4708 } 4709 4710 self.ged_notification_device 4711 .as_ref() 4712 .unwrap() 4713 .lock() 4714 .unwrap() 4715 .to_aml_bytes(sink) 4716 } 4717 } 4718 4719 impl Pausable for DeviceManager { 4720 fn pause(&mut self) -> result::Result<(), MigratableError> { 4721 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4722 if let Some(migratable) = &device_node.migratable { 4723 migratable.lock().unwrap().pause()?; 4724 } 4725 } 4726 // On AArch64, the pause of device manager needs to trigger 4727 // a "pause" of GIC, which will flush the GIC pending tables 4728 // and ITS tables to guest RAM. 4729 #[cfg(target_arch = "aarch64")] 4730 { 4731 self.get_interrupt_controller() 4732 .unwrap() 4733 .lock() 4734 .unwrap() 4735 .pause()?; 4736 }; 4737 4738 Ok(()) 4739 } 4740 4741 fn resume(&mut self) -> result::Result<(), MigratableError> { 4742 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4743 if let Some(migratable) = &device_node.migratable { 4744 migratable.lock().unwrap().resume()?; 4745 } 4746 } 4747 4748 Ok(()) 4749 } 4750 } 4751 4752 impl Snapshottable for DeviceManager { 4753 fn id(&self) -> String { 4754 DEVICE_MANAGER_SNAPSHOT_ID.to_string() 4755 } 4756 4757 fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> { 4758 let mut snapshot = Snapshot::from_data(SnapshotData::new_from_state(&self.state())?); 4759 4760 // We aggregate all devices snapshots. 4761 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4762 if let Some(migratable) = &device_node.migratable { 4763 let mut migratable = migratable.lock().unwrap(); 4764 snapshot.add_snapshot(migratable.id(), migratable.snapshot()?); 4765 } 4766 } 4767 4768 Ok(snapshot) 4769 } 4770 } 4771 4772 impl Transportable for DeviceManager {} 4773 4774 impl Migratable for DeviceManager { 4775 fn start_dirty_log(&mut self) -> std::result::Result<(), MigratableError> { 4776 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4777 if let Some(migratable) = &device_node.migratable { 4778 migratable.lock().unwrap().start_dirty_log()?; 4779 } 4780 } 4781 Ok(()) 4782 } 4783 4784 fn stop_dirty_log(&mut self) -> std::result::Result<(), MigratableError> { 4785 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4786 if let Some(migratable) = &device_node.migratable { 4787 migratable.lock().unwrap().stop_dirty_log()?; 4788 } 4789 } 4790 Ok(()) 4791 } 4792 4793 fn dirty_log(&mut self) -> std::result::Result<MemoryRangeTable, MigratableError> { 4794 let mut tables = Vec::new(); 4795 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4796 if let Some(migratable) = &device_node.migratable { 4797 tables.push(migratable.lock().unwrap().dirty_log()?); 4798 } 4799 } 4800 Ok(MemoryRangeTable::new_from_tables(tables)) 4801 } 4802 4803 fn start_migration(&mut self) -> std::result::Result<(), MigratableError> { 4804 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4805 if let Some(migratable) = &device_node.migratable { 4806 migratable.lock().unwrap().start_migration()?; 4807 } 4808 } 4809 Ok(()) 4810 } 4811 4812 fn complete_migration(&mut self) -> std::result::Result<(), MigratableError> { 4813 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4814 if let Some(migratable) = &device_node.migratable { 4815 migratable.lock().unwrap().complete_migration()?; 4816 } 4817 } 4818 Ok(()) 4819 } 4820 } 4821 4822 const PCIU_FIELD_OFFSET: u64 = 0; 4823 const PCID_FIELD_OFFSET: u64 = 4; 4824 const B0EJ_FIELD_OFFSET: u64 = 8; 4825 const PSEG_FIELD_OFFSET: u64 = 12; 4826 const PCIU_FIELD_SIZE: usize = 4; 4827 const PCID_FIELD_SIZE: usize = 4; 4828 const B0EJ_FIELD_SIZE: usize = 4; 4829 const PSEG_FIELD_SIZE: usize = 4; 4830 4831 impl BusDevice for DeviceManager { 4832 fn read(&mut self, base: u64, offset: u64, data: &mut [u8]) { 4833 match offset { 4834 PCIU_FIELD_OFFSET => { 4835 assert!(data.len() == PCIU_FIELD_SIZE); 4836 data.copy_from_slice( 4837 &self.pci_segments[self.selected_segment] 4838 .pci_devices_up 4839 .to_le_bytes(), 4840 ); 4841 // Clear the PCIU bitmap 4842 self.pci_segments[self.selected_segment].pci_devices_up = 0; 4843 } 4844 PCID_FIELD_OFFSET => { 4845 assert!(data.len() == PCID_FIELD_SIZE); 4846 data.copy_from_slice( 4847 &self.pci_segments[self.selected_segment] 4848 .pci_devices_down 4849 .to_le_bytes(), 4850 ); 4851 // Clear the PCID bitmap 4852 self.pci_segments[self.selected_segment].pci_devices_down = 0; 4853 } 4854 B0EJ_FIELD_OFFSET => { 4855 assert!(data.len() == B0EJ_FIELD_SIZE); 4856 // Always return an empty bitmap since the eject is always 4857 // taken care of right away during a write access. 4858 data.fill(0); 4859 } 4860 PSEG_FIELD_OFFSET => { 4861 assert_eq!(data.len(), PSEG_FIELD_SIZE); 4862 data.copy_from_slice(&(self.selected_segment as u32).to_le_bytes()); 4863 } 4864 _ => error!( 4865 "Accessing unknown location at base 0x{:x}, offset 0x{:x}", 4866 base, offset 4867 ), 4868 } 4869 4870 debug!( 4871 "PCI_HP_REG_R: base 0x{:x}, offset 0x{:x}, data {:?}", 4872 base, offset, data 4873 ) 4874 } 4875 4876 fn write(&mut self, base: u64, offset: u64, data: &[u8]) -> Option<Arc<std::sync::Barrier>> { 4877 match offset { 4878 B0EJ_FIELD_OFFSET => { 4879 assert!(data.len() == B0EJ_FIELD_SIZE); 4880 let mut data_array: [u8; 4] = [0, 0, 0, 0]; 4881 data_array.copy_from_slice(data); 4882 let mut slot_bitmap = u32::from_le_bytes(data_array); 4883 4884 while slot_bitmap > 0 { 4885 let slot_id = slot_bitmap.trailing_zeros(); 4886 if let Err(e) = self.eject_device(self.selected_segment as u16, slot_id as u8) { 4887 error!("Failed ejecting device {}: {:?}", slot_id, e); 4888 } 4889 slot_bitmap &= !(1 << slot_id); 4890 } 4891 } 4892 PSEG_FIELD_OFFSET => { 4893 assert_eq!(data.len(), PSEG_FIELD_SIZE); 4894 let mut data_array: [u8; 4] = [0, 0, 0, 0]; 4895 data_array.copy_from_slice(data); 4896 let selected_segment = u32::from_le_bytes(data_array) as usize; 4897 if selected_segment >= self.pci_segments.len() { 4898 error!( 4899 "Segment selection out of range: {} >= {}", 4900 selected_segment, 4901 self.pci_segments.len() 4902 ); 4903 return None; 4904 } 4905 self.selected_segment = selected_segment; 4906 } 4907 _ => error!( 4908 "Accessing unknown location at base 0x{:x}, offset 0x{:x}", 4909 base, offset 4910 ), 4911 } 4912 4913 debug!( 4914 "PCI_HP_REG_W: base 0x{:x}, offset 0x{:x}, data {:?}", 4915 base, offset, data 4916 ); 4917 4918 None 4919 } 4920 } 4921 4922 impl Drop for DeviceManager { 4923 fn drop(&mut self) { 4924 // Wake up the DeviceManager threads (mainly virtio device workers), 4925 // to avoid deadlock on waiting for paused/parked worker threads. 4926 if let Err(e) = self.resume() { 4927 error!("Error resuming DeviceManager: {:?}", e); 4928 } 4929 4930 for handle in self.virtio_devices.drain(..) { 4931 handle.virtio_device.lock().unwrap().shutdown(); 4932 } 4933 4934 if let Some(termios) = *self.original_termios_opt.lock().unwrap() { 4935 // SAFETY: FFI call 4936 let _ = unsafe { tcsetattr(stdout().lock().as_raw_fd(), TCSANOW, &termios) }; 4937 } 4938 } 4939 } 4940 4941 #[cfg(test)] 4942 mod tests { 4943 use super::*; 4944 4945 #[test] 4946 fn test_create_mmio_allocators() { 4947 let res = create_mmio_allocators(0x100000, 0x400000, 1, vec![1], 4 << 10); 4948 assert_eq!(res.len(), 1); 4949 assert_eq!( 4950 res[0].lock().unwrap().base(), 4951 vm_memory::GuestAddress(0x100000) 4952 ); 4953 assert_eq!( 4954 res[0].lock().unwrap().end(), 4955 vm_memory::GuestAddress(0x3fffff) 4956 ); 4957 4958 let res = create_mmio_allocators(0x100000, 0x400000, 2, vec![1, 1], 4 << 10); 4959 assert_eq!(res.len(), 2); 4960 assert_eq!( 4961 res[0].lock().unwrap().base(), 4962 vm_memory::GuestAddress(0x100000) 4963 ); 4964 assert_eq!( 4965 res[0].lock().unwrap().end(), 4966 vm_memory::GuestAddress(0x27ffff) 4967 ); 4968 assert_eq!( 4969 res[1].lock().unwrap().base(), 4970 vm_memory::GuestAddress(0x280000) 4971 ); 4972 assert_eq!( 4973 res[1].lock().unwrap().end(), 4974 vm_memory::GuestAddress(0x3fffff) 4975 ); 4976 4977 let res = create_mmio_allocators(0x100000, 0x400000, 2, vec![2, 1], 4 << 10); 4978 assert_eq!(res.len(), 2); 4979 assert_eq!( 4980 res[0].lock().unwrap().base(), 4981 vm_memory::GuestAddress(0x100000) 4982 ); 4983 assert_eq!( 4984 res[0].lock().unwrap().end(), 4985 vm_memory::GuestAddress(0x2fffff) 4986 ); 4987 assert_eq!( 4988 res[1].lock().unwrap().base(), 4989 vm_memory::GuestAddress(0x300000) 4990 ); 4991 assert_eq!( 4992 res[1].lock().unwrap().end(), 4993 vm_memory::GuestAddress(0x3fffff) 4994 ); 4995 } 4996 } 4997