1 // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 // 3 // Portions Copyright 2017 The Chromium OS Authors. All rights reserved. 4 // Use of this source code is governed by a BSD-style license that can be 5 // found in the LICENSE-BSD-3-Clause file. 6 // 7 // Copyright © 2019 Intel Corporation 8 // 9 // SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause 10 // 11 12 use crate::config::{ 13 ConsoleOutputMode, DeviceConfig, DiskConfig, FsConfig, NetConfig, PmemConfig, UserDeviceConfig, 14 VdpaConfig, VhostMode, VmConfig, VsockConfig, 15 }; 16 use crate::console_devices::{ConsoleDeviceError, ConsoleInfo}; 17 use crate::cpu::{CpuManager, CPU_MANAGER_ACPI_SIZE}; 18 use crate::device_tree::{DeviceNode, DeviceTree}; 19 use crate::interrupt::LegacyUserspaceInterruptManager; 20 use crate::interrupt::MsiInterruptManager; 21 use crate::memory_manager::{Error as MemoryManagerError, MemoryManager, MEMORY_MANAGER_ACPI_SIZE}; 22 use crate::pci_segment::PciSegment; 23 use crate::serial_manager::{Error as SerialManagerError, SerialManager}; 24 use crate::vm_config::DEFAULT_PCI_SEGMENT_APERTURE_WEIGHT; 25 use crate::GuestRegionMmap; 26 use crate::PciDeviceInfo; 27 use crate::{device_node, DEVICE_MANAGER_SNAPSHOT_ID}; 28 use acpi_tables::sdt::GenericAddress; 29 use acpi_tables::{aml, Aml}; 30 use anyhow::anyhow; 31 use arch::layout; 32 #[cfg(target_arch = "x86_64")] 33 use arch::layout::{APIC_START, IOAPIC_SIZE, IOAPIC_START}; 34 use arch::NumaNodes; 35 #[cfg(target_arch = "aarch64")] 36 use arch::{DeviceType, MmioDeviceInfo}; 37 use block::{ 38 async_io::DiskFile, block_aio_is_supported, block_io_uring_is_supported, detect_image_type, 39 fixed_vhd_sync::FixedVhdDiskSync, qcow, qcow_sync::QcowDiskSync, raw_async_aio::RawFileDiskAio, 40 raw_sync::RawFileDiskSync, vhdx, vhdx_sync::VhdxDiskSync, ImageType, 41 }; 42 #[cfg(feature = "io_uring")] 43 use block::{fixed_vhd_async::FixedVhdDiskAsync, raw_async::RawFileDisk}; 44 #[cfg(target_arch = "x86_64")] 45 use devices::debug_console::DebugConsole; 46 #[cfg(target_arch = "aarch64")] 47 use devices::gic; 48 #[cfg(target_arch = "x86_64")] 49 use devices::ioapic; 50 #[cfg(target_arch = "aarch64")] 51 use devices::legacy::Pl011; 52 #[cfg(feature = "pvmemcontrol")] 53 use devices::pvmemcontrol::{PvmemcontrolBusDevice, PvmemcontrolPciDevice}; 54 use devices::{ 55 interrupt_controller, interrupt_controller::InterruptController, AcpiNotificationFlags, 56 }; 57 use hypervisor::IoEventAddress; 58 use libc::{ 59 tcsetattr, termios, MAP_NORESERVE, MAP_PRIVATE, MAP_SHARED, O_TMPFILE, PROT_READ, PROT_WRITE, 60 TCSANOW, 61 }; 62 use pci::{ 63 DeviceRelocation, MmioRegion, PciBarRegionType, PciBdf, PciDevice, VfioDmaMapping, 64 VfioPciDevice, VfioUserDmaMapping, VfioUserPciDevice, VfioUserPciDeviceError, 65 }; 66 use rate_limiter::group::RateLimiterGroup; 67 use seccompiler::SeccompAction; 68 use serde::{Deserialize, Serialize}; 69 use std::collections::{BTreeMap, BTreeSet, HashMap}; 70 use std::fs::{File, OpenOptions}; 71 use std::io::{self, stdout, Seek, SeekFrom}; 72 use std::num::Wrapping; 73 use std::os::fd::RawFd; 74 use std::os::unix::fs::OpenOptionsExt; 75 use std::os::unix::io::{AsRawFd, FromRawFd}; 76 use std::path::PathBuf; 77 use std::result; 78 use std::sync::{Arc, Mutex}; 79 use std::time::Instant; 80 use tracer::trace_scoped; 81 use vfio_ioctls::{VfioContainer, VfioDevice, VfioDeviceFd}; 82 use virtio_devices::transport::VirtioTransport; 83 use virtio_devices::transport::{VirtioPciDevice, VirtioPciDeviceActivator}; 84 use virtio_devices::vhost_user::VhostUserConfig; 85 use virtio_devices::{ 86 AccessPlatformMapping, ActivateError, VdpaDmaMapping, VirtioMemMappingSource, 87 }; 88 use virtio_devices::{Endpoint, IommuMapping}; 89 use vm_allocator::{AddressAllocator, SystemAllocator}; 90 use vm_device::dma_mapping::ExternalDmaMapping; 91 use vm_device::interrupt::{ 92 InterruptIndex, InterruptManager, LegacyIrqGroupConfig, MsiIrqGroupConfig, 93 }; 94 use vm_device::{Bus, BusDevice, BusDeviceSync, Resource}; 95 use vm_memory::guest_memory::FileOffset; 96 use vm_memory::GuestMemoryRegion; 97 use vm_memory::{Address, GuestAddress, GuestUsize, MmapRegion}; 98 #[cfg(target_arch = "x86_64")] 99 use vm_memory::{GuestAddressSpace, GuestMemory}; 100 use vm_migration::{ 101 protocol::MemoryRangeTable, snapshot_from_id, state_from_id, Migratable, MigratableError, 102 Pausable, Snapshot, SnapshotData, Snapshottable, Transportable, 103 }; 104 use vm_virtio::AccessPlatform; 105 use vm_virtio::VirtioDeviceType; 106 use vmm_sys_util::eventfd::EventFd; 107 #[cfg(target_arch = "x86_64")] 108 use {devices::debug_console, devices::legacy::Serial}; 109 110 #[cfg(target_arch = "aarch64")] 111 const MMIO_LEN: u64 = 0x1000; 112 113 // Singleton devices / devices the user cannot name 114 #[cfg(target_arch = "x86_64")] 115 const IOAPIC_DEVICE_NAME: &str = "__ioapic"; 116 const SERIAL_DEVICE_NAME: &str = "__serial"; 117 #[cfg(target_arch = "x86_64")] 118 const DEBUGCON_DEVICE_NAME: &str = "__debug_console"; 119 #[cfg(target_arch = "aarch64")] 120 const GPIO_DEVICE_NAME: &str = "__gpio"; 121 const RNG_DEVICE_NAME: &str = "__rng"; 122 const IOMMU_DEVICE_NAME: &str = "__iommu"; 123 #[cfg(feature = "pvmemcontrol")] 124 const PVMEMCONTROL_DEVICE_NAME: &str = "__pvmemcontrol"; 125 const BALLOON_DEVICE_NAME: &str = "__balloon"; 126 const CONSOLE_DEVICE_NAME: &str = "__console"; 127 const PVPANIC_DEVICE_NAME: &str = "__pvpanic"; 128 129 // Devices that the user may name and for which we generate 130 // identifiers if the user doesn't give one 131 const DISK_DEVICE_NAME_PREFIX: &str = "_disk"; 132 const FS_DEVICE_NAME_PREFIX: &str = "_fs"; 133 const NET_DEVICE_NAME_PREFIX: &str = "_net"; 134 const PMEM_DEVICE_NAME_PREFIX: &str = "_pmem"; 135 const VDPA_DEVICE_NAME_PREFIX: &str = "_vdpa"; 136 const VSOCK_DEVICE_NAME_PREFIX: &str = "_vsock"; 137 const WATCHDOG_DEVICE_NAME: &str = "__watchdog"; 138 const VFIO_DEVICE_NAME_PREFIX: &str = "_vfio"; 139 const VFIO_USER_DEVICE_NAME_PREFIX: &str = "_vfio_user"; 140 const VIRTIO_PCI_DEVICE_NAME_PREFIX: &str = "_virtio-pci"; 141 142 /// Errors associated with device manager 143 #[derive(Debug)] 144 pub enum DeviceManagerError { 145 /// Cannot create EventFd. 146 EventFd(io::Error), 147 148 /// Cannot open disk path 149 Disk(io::Error), 150 151 /// Cannot create vhost-user-net device 152 CreateVhostUserNet(virtio_devices::vhost_user::Error), 153 154 /// Cannot create virtio-blk device 155 CreateVirtioBlock(io::Error), 156 157 /// Cannot create virtio-net device 158 CreateVirtioNet(virtio_devices::net::Error), 159 160 /// Cannot create virtio-console device 161 CreateVirtioConsole(io::Error), 162 163 /// Cannot create virtio-rng device 164 CreateVirtioRng(io::Error), 165 166 /// Cannot create virtio-fs device 167 CreateVirtioFs(virtio_devices::vhost_user::Error), 168 169 /// Virtio-fs device was created without a socket. 170 NoVirtioFsSock, 171 172 /// Cannot create vhost-user-blk device 173 CreateVhostUserBlk(virtio_devices::vhost_user::Error), 174 175 /// Cannot create virtio-pmem device 176 CreateVirtioPmem(io::Error), 177 178 /// Cannot create vDPA device 179 CreateVdpa(virtio_devices::vdpa::Error), 180 181 /// Cannot create virtio-vsock device 182 CreateVirtioVsock(io::Error), 183 184 /// Cannot create tpm device 185 CreateTpmDevice(anyhow::Error), 186 187 /// Failed to convert Path to &str for the vDPA device. 188 CreateVdpaConvertPath, 189 190 /// Failed to convert Path to &str for the virtio-vsock device. 191 CreateVsockConvertPath, 192 193 /// Cannot create virtio-vsock backend 194 CreateVsockBackend(virtio_devices::vsock::VsockUnixError), 195 196 /// Cannot create virtio-iommu device 197 CreateVirtioIommu(io::Error), 198 199 /// Cannot create virtio-balloon device 200 CreateVirtioBalloon(io::Error), 201 202 /// Cannot create pvmemcontrol device 203 #[cfg(feature = "pvmemcontrol")] 204 CreatePvmemcontrol(io::Error), 205 206 /// Cannot create virtio-watchdog device 207 CreateVirtioWatchdog(io::Error), 208 209 /// Failed to parse disk image format 210 DetectImageType(io::Error), 211 212 /// Cannot open qcow disk path 213 QcowDeviceCreate(qcow::Error), 214 215 /// Cannot create serial manager 216 CreateSerialManager(SerialManagerError), 217 218 /// Cannot spawn the serial manager thread 219 SpawnSerialManager(SerialManagerError), 220 221 /// Cannot open tap interface 222 OpenTap(net_util::TapError), 223 224 /// Cannot allocate IRQ. 225 AllocateIrq, 226 227 /// Cannot configure the IRQ. 228 Irq(vmm_sys_util::errno::Error), 229 230 /// Cannot allocate PCI BARs 231 AllocateBars(pci::PciDeviceError), 232 233 /// Could not free the BARs associated with a PCI device. 234 FreePciBars(pci::PciDeviceError), 235 236 /// Cannot register ioevent. 237 RegisterIoevent(anyhow::Error), 238 239 /// Cannot unregister ioevent. 240 UnRegisterIoevent(anyhow::Error), 241 242 /// Cannot create virtio device 243 VirtioDevice(virtio_devices::transport::VirtioPciDeviceError), 244 245 /// Cannot add PCI device 246 AddPciDevice(pci::PciRootError), 247 248 /// Cannot open persistent memory file 249 PmemFileOpen(io::Error), 250 251 /// Cannot set persistent memory file size 252 PmemFileSetLen(io::Error), 253 254 /// Cannot find a memory range for persistent memory 255 PmemRangeAllocation, 256 257 /// Cannot find a memory range for virtio-fs 258 FsRangeAllocation, 259 260 /// Error creating serial output file 261 SerialOutputFileOpen(io::Error), 262 263 #[cfg(target_arch = "x86_64")] 264 /// Error creating debug-console output file 265 DebugconOutputFileOpen(io::Error), 266 267 /// Error creating console output file 268 ConsoleOutputFileOpen(io::Error), 269 270 /// Error creating serial pty 271 SerialPtyOpen(io::Error), 272 273 /// Error creating console pty 274 ConsolePtyOpen(io::Error), 275 276 /// Error creating console pty 277 DebugconPtyOpen(io::Error), 278 279 /// Error setting pty raw mode 280 SetPtyRaw(ConsoleDeviceError), 281 282 /// Error getting pty peer 283 GetPtyPeer(vmm_sys_util::errno::Error), 284 285 /// Cannot create a VFIO device 286 VfioCreate(vfio_ioctls::VfioError), 287 288 /// Cannot create a VFIO PCI device 289 VfioPciCreate(pci::VfioPciError), 290 291 /// Failed to map VFIO MMIO region. 292 VfioMapRegion(pci::VfioPciError), 293 294 /// Failed to DMA map VFIO device. 295 VfioDmaMap(vfio_ioctls::VfioError), 296 297 /// Failed to DMA unmap VFIO device. 298 VfioDmaUnmap(pci::VfioPciError), 299 300 /// Failed to create the passthrough device. 301 CreatePassthroughDevice(anyhow::Error), 302 303 /// Failed to memory map. 304 Mmap(io::Error), 305 306 /// Cannot add legacy device to Bus. 307 BusError(vm_device::BusError), 308 309 /// Failed to allocate IO port 310 AllocateIoPort, 311 312 /// Failed to allocate MMIO address 313 AllocateMmioAddress, 314 315 /// Failed to make hotplug notification 316 HotPlugNotification(io::Error), 317 318 /// Error from a memory manager operation 319 MemoryManager(MemoryManagerError), 320 321 /// Failed to create new interrupt source group. 322 CreateInterruptGroup(io::Error), 323 324 /// Failed to update interrupt source group. 325 UpdateInterruptGroup(io::Error), 326 327 /// Failed to create interrupt controller. 328 CreateInterruptController(interrupt_controller::Error), 329 330 /// Failed to create a new MmapRegion instance. 331 NewMmapRegion(vm_memory::mmap::MmapRegionError), 332 333 /// Failed to clone a File. 334 CloneFile(io::Error), 335 336 /// Failed to create socket file 337 CreateSocketFile(io::Error), 338 339 /// Failed to spawn the network backend 340 SpawnNetBackend(io::Error), 341 342 /// Failed to spawn the block backend 343 SpawnBlockBackend(io::Error), 344 345 /// Missing PCI bus. 346 NoPciBus, 347 348 /// Could not find an available device name. 349 NoAvailableDeviceName, 350 351 /// Missing PCI device. 352 MissingPciDevice, 353 354 /// Failed to remove a PCI device from the PCI bus. 355 RemoveDeviceFromPciBus(pci::PciRootError), 356 357 /// Failed to remove a bus device from the IO bus. 358 RemoveDeviceFromIoBus(vm_device::BusError), 359 360 /// Failed to remove a bus device from the MMIO bus. 361 RemoveDeviceFromMmioBus(vm_device::BusError), 362 363 /// Failed to find the device corresponding to a specific PCI b/d/f. 364 UnknownPciBdf(u32), 365 366 /// Not allowed to remove this type of device from the VM. 367 RemovalNotAllowed(vm_virtio::VirtioDeviceType), 368 369 /// Failed to find device corresponding to the given identifier. 370 UnknownDeviceId(String), 371 372 /// Failed to find an available PCI device ID. 373 NextPciDeviceId(pci::PciRootError), 374 375 /// Could not reserve the PCI device ID. 376 GetPciDeviceId(pci::PciRootError), 377 378 /// Could not give the PCI device ID back. 379 PutPciDeviceId(pci::PciRootError), 380 381 /// No disk path was specified when one was expected 382 NoDiskPath, 383 384 /// Failed to update guest memory for virtio device. 385 UpdateMemoryForVirtioDevice(virtio_devices::Error), 386 387 /// Cannot create virtio-mem device 388 CreateVirtioMem(io::Error), 389 390 /// Cannot find a memory range for virtio-mem memory 391 VirtioMemRangeAllocation, 392 393 /// Failed to update guest memory for VFIO PCI device. 394 UpdateMemoryForVfioPciDevice(vfio_ioctls::VfioError), 395 396 /// Trying to use a directory for pmem but no size specified 397 PmemWithDirectorySizeMissing, 398 399 /// Trying to use a size that is not multiple of 2MiB 400 PmemSizeNotAligned, 401 402 /// Could not find the node in the device tree. 403 MissingNode, 404 405 /// Resource was already found. 406 ResourceAlreadyExists, 407 408 /// Expected resources for virtio-pmem could not be found. 409 MissingVirtioPmemResources, 410 411 /// Missing PCI b/d/f from the DeviceNode. 412 MissingDeviceNodePciBdf, 413 414 /// No support for device passthrough 415 NoDevicePassthroughSupport, 416 417 /// No socket option support for console device 418 NoSocketOptionSupportForConsoleDevice, 419 420 /// Failed to resize virtio-balloon 421 VirtioBalloonResize(virtio_devices::balloon::Error), 422 423 /// Missing virtio-balloon, can't proceed as expected. 424 MissingVirtioBalloon, 425 426 /// Missing virtual IOMMU device 427 MissingVirtualIommu, 428 429 /// Failed to do power button notification 430 PowerButtonNotification(io::Error), 431 432 /// Failed to do AArch64 GPIO power button notification 433 #[cfg(target_arch = "aarch64")] 434 AArch64PowerButtonNotification(devices::legacy::GpioDeviceError), 435 436 /// Failed to set O_DIRECT flag to file descriptor 437 SetDirectIo, 438 439 /// Failed to create FixedVhdDiskAsync 440 CreateFixedVhdDiskAsync(io::Error), 441 442 /// Failed to create FixedVhdDiskSync 443 CreateFixedVhdDiskSync(io::Error), 444 445 /// Failed to create QcowDiskSync 446 CreateQcowDiskSync(qcow::Error), 447 448 /// Failed to create FixedVhdxDiskSync 449 CreateFixedVhdxDiskSync(vhdx::VhdxError), 450 451 /// Failed to add DMA mapping handler to virtio-mem device. 452 AddDmaMappingHandlerVirtioMem(virtio_devices::mem::Error), 453 454 /// Failed to remove DMA mapping handler from virtio-mem device. 455 RemoveDmaMappingHandlerVirtioMem(virtio_devices::mem::Error), 456 457 /// Failed to create vfio-user client 458 VfioUserCreateClient(vfio_user::Error), 459 460 /// Failed to create VFIO user device 461 VfioUserCreate(VfioUserPciDeviceError), 462 463 /// Failed to map region from VFIO user device into guest 464 VfioUserMapRegion(VfioUserPciDeviceError), 465 466 /// Failed to DMA map VFIO user device. 467 VfioUserDmaMap(VfioUserPciDeviceError), 468 469 /// Failed to DMA unmap VFIO user device. 470 VfioUserDmaUnmap(VfioUserPciDeviceError), 471 472 /// Failed to update memory mappings for VFIO user device 473 UpdateMemoryForVfioUserPciDevice(VfioUserPciDeviceError), 474 475 /// Cannot duplicate file descriptor 476 DupFd(vmm_sys_util::errno::Error), 477 478 /// Failed to DMA map virtio device. 479 VirtioDmaMap(std::io::Error), 480 481 /// Failed to DMA unmap virtio device. 482 VirtioDmaUnmap(std::io::Error), 483 484 /// Cannot hotplug device behind vIOMMU 485 InvalidIommuHotplug, 486 487 /// Invalid identifier as it is not unique. 488 IdentifierNotUnique(String), 489 490 /// Invalid identifier 491 InvalidIdentifier(String), 492 493 /// Error activating virtio device 494 VirtioActivate(ActivateError), 495 496 /// Failed retrieving device state from snapshot 497 RestoreGetState(MigratableError), 498 499 /// Cannot create a PvPanic device 500 PvPanicCreate(devices::pvpanic::PvPanicError), 501 502 /// Cannot create a RateLimiterGroup 503 RateLimiterGroupCreate(rate_limiter::group::Error), 504 505 /// Cannot start sigwinch listener 506 StartSigwinchListener(std::io::Error), 507 508 // Invalid console info 509 InvalidConsoleInfo, 510 511 // Invalid console fd 512 InvalidConsoleFd, 513 } 514 515 pub type DeviceManagerResult<T> = result::Result<T, DeviceManagerError>; 516 517 const DEVICE_MANAGER_ACPI_SIZE: usize = 0x10; 518 519 #[derive(Default)] 520 pub struct Console { 521 console_resizer: Option<Arc<virtio_devices::ConsoleResizer>>, 522 } 523 524 impl Console { 525 pub fn need_resize(&self) -> bool { 526 if let Some(_resizer) = self.console_resizer.as_ref() { 527 return true; 528 } 529 530 false 531 } 532 533 pub fn update_console_size(&self) { 534 if let Some(resizer) = self.console_resizer.as_ref() { 535 resizer.update_console_size() 536 } 537 } 538 } 539 540 pub(crate) struct AddressManager { 541 pub(crate) allocator: Arc<Mutex<SystemAllocator>>, 542 #[cfg(target_arch = "x86_64")] 543 pub(crate) io_bus: Arc<Bus>, 544 pub(crate) mmio_bus: Arc<Bus>, 545 pub(crate) vm: Arc<dyn hypervisor::Vm>, 546 device_tree: Arc<Mutex<DeviceTree>>, 547 pci_mmio32_allocators: Vec<Arc<Mutex<AddressAllocator>>>, 548 pci_mmio64_allocators: Vec<Arc<Mutex<AddressAllocator>>>, 549 } 550 551 impl DeviceRelocation for AddressManager { 552 fn move_bar( 553 &self, 554 old_base: u64, 555 new_base: u64, 556 len: u64, 557 pci_dev: &mut dyn PciDevice, 558 region_type: PciBarRegionType, 559 ) -> std::result::Result<(), std::io::Error> { 560 match region_type { 561 PciBarRegionType::IoRegion => { 562 #[cfg(target_arch = "x86_64")] 563 { 564 // Update system allocator 565 self.allocator 566 .lock() 567 .unwrap() 568 .free_io_addresses(GuestAddress(old_base), len as GuestUsize); 569 570 self.allocator 571 .lock() 572 .unwrap() 573 .allocate_io_addresses( 574 Some(GuestAddress(new_base)), 575 len as GuestUsize, 576 None, 577 ) 578 .ok_or_else(|| { 579 io::Error::new(io::ErrorKind::Other, "failed allocating new IO range") 580 })?; 581 582 // Update PIO bus 583 self.io_bus 584 .update_range(old_base, len, new_base, len) 585 .map_err(|e| io::Error::new(io::ErrorKind::Other, e))?; 586 } 587 #[cfg(target_arch = "aarch64")] 588 error!("I/O region is not supported"); 589 } 590 PciBarRegionType::Memory32BitRegion | PciBarRegionType::Memory64BitRegion => { 591 let allocators = if region_type == PciBarRegionType::Memory32BitRegion { 592 &self.pci_mmio32_allocators 593 } else { 594 &self.pci_mmio64_allocators 595 }; 596 597 // Find the specific allocator that this BAR was allocated from and use it for new one 598 for allocator in allocators { 599 let allocator_base = allocator.lock().unwrap().base(); 600 let allocator_end = allocator.lock().unwrap().end(); 601 602 if old_base >= allocator_base.0 && old_base <= allocator_end.0 { 603 allocator 604 .lock() 605 .unwrap() 606 .free(GuestAddress(old_base), len as GuestUsize); 607 608 allocator 609 .lock() 610 .unwrap() 611 .allocate(Some(GuestAddress(new_base)), len as GuestUsize, Some(len)) 612 .ok_or_else(|| { 613 io::Error::new( 614 io::ErrorKind::Other, 615 "failed allocating new MMIO range", 616 ) 617 })?; 618 619 break; 620 } 621 } 622 623 // Update MMIO bus 624 self.mmio_bus 625 .update_range(old_base, len, new_base, len) 626 .map_err(|e| io::Error::new(io::ErrorKind::Other, e))?; 627 } 628 } 629 630 // Update the device_tree resources associated with the device 631 if let Some(id) = pci_dev.id() { 632 if let Some(node) = self.device_tree.lock().unwrap().get_mut(&id) { 633 let mut resource_updated = false; 634 for resource in node.resources.iter_mut() { 635 if let Resource::PciBar { base, type_, .. } = resource { 636 if PciBarRegionType::from(*type_) == region_type && *base == old_base { 637 *base = new_base; 638 resource_updated = true; 639 break; 640 } 641 } 642 } 643 644 if !resource_updated { 645 return Err(io::Error::new( 646 io::ErrorKind::Other, 647 format!( 648 "Couldn't find a resource with base 0x{old_base:x} for device {id}" 649 ), 650 )); 651 } 652 } else { 653 return Err(io::Error::new( 654 io::ErrorKind::Other, 655 format!("Couldn't find device {id} from device tree"), 656 )); 657 } 658 } 659 660 let any_dev = pci_dev.as_any(); 661 if let Some(virtio_pci_dev) = any_dev.downcast_ref::<VirtioPciDevice>() { 662 let bar_addr = virtio_pci_dev.config_bar_addr(); 663 if bar_addr == new_base { 664 for (event, addr) in virtio_pci_dev.ioeventfds(old_base) { 665 let io_addr = IoEventAddress::Mmio(addr); 666 self.vm.unregister_ioevent(event, &io_addr).map_err(|e| { 667 io::Error::new( 668 io::ErrorKind::Other, 669 format!("failed to unregister ioevent: {e:?}"), 670 ) 671 })?; 672 } 673 for (event, addr) in virtio_pci_dev.ioeventfds(new_base) { 674 let io_addr = IoEventAddress::Mmio(addr); 675 self.vm 676 .register_ioevent(event, &io_addr, None) 677 .map_err(|e| { 678 io::Error::new( 679 io::ErrorKind::Other, 680 format!("failed to register ioevent: {e:?}"), 681 ) 682 })?; 683 } 684 } else { 685 let virtio_dev = virtio_pci_dev.virtio_device(); 686 let mut virtio_dev = virtio_dev.lock().unwrap(); 687 if let Some(mut shm_regions) = virtio_dev.get_shm_regions() { 688 if shm_regions.addr.raw_value() == old_base { 689 let mem_region = self.vm.make_user_memory_region( 690 shm_regions.mem_slot, 691 old_base, 692 shm_regions.len, 693 shm_regions.host_addr, 694 false, 695 false, 696 ); 697 698 self.vm.remove_user_memory_region(mem_region).map_err(|e| { 699 io::Error::new( 700 io::ErrorKind::Other, 701 format!("failed to remove user memory region: {e:?}"), 702 ) 703 })?; 704 705 // Create new mapping by inserting new region to KVM. 706 let mem_region = self.vm.make_user_memory_region( 707 shm_regions.mem_slot, 708 new_base, 709 shm_regions.len, 710 shm_regions.host_addr, 711 false, 712 false, 713 ); 714 715 self.vm.create_user_memory_region(mem_region).map_err(|e| { 716 io::Error::new( 717 io::ErrorKind::Other, 718 format!("failed to create user memory regions: {e:?}"), 719 ) 720 })?; 721 722 // Update shared memory regions to reflect the new mapping. 723 shm_regions.addr = GuestAddress(new_base); 724 virtio_dev.set_shm_regions(shm_regions).map_err(|e| { 725 io::Error::new( 726 io::ErrorKind::Other, 727 format!("failed to update shared memory regions: {e:?}"), 728 ) 729 })?; 730 } 731 } 732 } 733 } 734 735 pci_dev.move_bar(old_base, new_base) 736 } 737 } 738 739 #[derive(Serialize, Deserialize)] 740 struct DeviceManagerState { 741 device_tree: DeviceTree, 742 device_id_cnt: Wrapping<usize>, 743 } 744 745 #[derive(Debug)] 746 pub struct PtyPair { 747 pub main: File, 748 pub path: PathBuf, 749 } 750 751 impl Clone for PtyPair { 752 fn clone(&self) -> Self { 753 PtyPair { 754 main: self.main.try_clone().unwrap(), 755 path: self.path.clone(), 756 } 757 } 758 } 759 760 #[derive(Clone)] 761 pub enum PciDeviceHandle { 762 Vfio(Arc<Mutex<VfioPciDevice>>), 763 Virtio(Arc<Mutex<VirtioPciDevice>>), 764 VfioUser(Arc<Mutex<VfioUserPciDevice>>), 765 } 766 767 #[derive(Clone)] 768 struct MetaVirtioDevice { 769 virtio_device: Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 770 iommu: bool, 771 id: String, 772 pci_segment: u16, 773 dma_handler: Option<Arc<dyn ExternalDmaMapping>>, 774 } 775 776 #[derive(Default)] 777 pub struct AcpiPlatformAddresses { 778 pub pm_timer_address: Option<GenericAddress>, 779 pub reset_reg_address: Option<GenericAddress>, 780 pub sleep_control_reg_address: Option<GenericAddress>, 781 pub sleep_status_reg_address: Option<GenericAddress>, 782 } 783 784 #[cfg(all(feature = "mshv", feature = "sev_snp"))] 785 struct SevSnpPageAccessProxy { 786 vm: Arc<dyn hypervisor::Vm>, 787 } 788 789 #[cfg(all(feature = "mshv", feature = "sev_snp"))] 790 impl std::fmt::Debug for SevSnpPageAccessProxy { 791 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 792 write!(f, "SNP Page access proxy") 793 } 794 } 795 796 #[cfg(all(feature = "mshv", feature = "sev_snp"))] 797 impl SevSnpPageAccessProxy { 798 fn new(vm: Arc<dyn hypervisor::Vm>) -> SevSnpPageAccessProxy { 799 SevSnpPageAccessProxy { vm } 800 } 801 } 802 803 #[cfg(all(feature = "mshv", feature = "sev_snp"))] 804 impl AccessPlatform for SevSnpPageAccessProxy { 805 fn translate_gpa(&self, base: u64, _size: u64) -> std::result::Result<u64, std::io::Error> { 806 Ok(base) 807 } 808 809 fn translate_gva(&self, base: u64, size: u64) -> std::result::Result<u64, std::io::Error> { 810 self.vm 811 .gain_page_access(base, size as u32) 812 .map_err(|e| io::Error::new(io::ErrorKind::Other, e))?; 813 Ok(base) 814 } 815 } 816 817 pub struct DeviceManager { 818 // Manage address space related to devices 819 address_manager: Arc<AddressManager>, 820 821 // Console abstraction 822 console: Arc<Console>, 823 824 // Serial Manager 825 serial_manager: Option<Arc<SerialManager>>, 826 827 // pty foreground status, 828 console_resize_pipe: Option<Arc<File>>, 829 830 // To restore on exit. 831 original_termios_opt: Arc<Mutex<Option<termios>>>, 832 833 // Interrupt controller 834 #[cfg(target_arch = "x86_64")] 835 interrupt_controller: Option<Arc<Mutex<ioapic::Ioapic>>>, 836 #[cfg(target_arch = "aarch64")] 837 interrupt_controller: Option<Arc<Mutex<gic::Gic>>>, 838 839 // Things to be added to the commandline (e.g. aarch64 early console) 840 #[cfg(target_arch = "aarch64")] 841 cmdline_additions: Vec<String>, 842 843 // ACPI GED notification device 844 ged_notification_device: Option<Arc<Mutex<devices::AcpiGedDevice>>>, 845 846 // VM configuration 847 config: Arc<Mutex<VmConfig>>, 848 849 // Memory Manager 850 memory_manager: Arc<Mutex<MemoryManager>>, 851 852 // CPU Manager 853 cpu_manager: Arc<Mutex<CpuManager>>, 854 855 // The virtio devices on the system 856 virtio_devices: Vec<MetaVirtioDevice>, 857 858 // List of bus devices 859 // Let the DeviceManager keep strong references to the BusDevice devices. 860 // This allows the IO and MMIO buses to be provided with Weak references, 861 // which prevents cyclic dependencies. 862 bus_devices: Vec<Arc<dyn BusDeviceSync>>, 863 864 // Counter to keep track of the consumed device IDs. 865 device_id_cnt: Wrapping<usize>, 866 867 pci_segments: Vec<PciSegment>, 868 869 #[cfg_attr(target_arch = "aarch64", allow(dead_code))] 870 // MSI Interrupt Manager 871 msi_interrupt_manager: Arc<dyn InterruptManager<GroupConfig = MsiIrqGroupConfig>>, 872 873 #[cfg_attr(feature = "mshv", allow(dead_code))] 874 // Legacy Interrupt Manager 875 legacy_interrupt_manager: Option<Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>>, 876 877 // Passthrough device handle 878 passthrough_device: Option<VfioDeviceFd>, 879 880 // VFIO container 881 // Only one container can be created, therefore it is stored as part of the 882 // DeviceManager to be reused. 883 vfio_container: Option<Arc<VfioContainer>>, 884 885 // Paravirtualized IOMMU 886 iommu_device: Option<Arc<Mutex<virtio_devices::Iommu>>>, 887 iommu_mapping: Option<Arc<IommuMapping>>, 888 889 // PCI information about devices attached to the paravirtualized IOMMU 890 // It contains the virtual IOMMU PCI BDF along with the list of PCI BDF 891 // representing the devices attached to the virtual IOMMU. This is useful 892 // information for filling the ACPI VIOT table. 893 iommu_attached_devices: Option<(PciBdf, Vec<PciBdf>)>, 894 895 // Tree of devices, representing the dependencies between devices. 896 // Useful for introspection, snapshot and restore. 897 device_tree: Arc<Mutex<DeviceTree>>, 898 899 // Exit event 900 exit_evt: EventFd, 901 reset_evt: EventFd, 902 903 #[cfg(target_arch = "aarch64")] 904 id_to_dev_info: HashMap<(DeviceType, String), MmioDeviceInfo>, 905 906 // seccomp action 907 seccomp_action: SeccompAction, 908 909 // List of guest NUMA nodes. 910 numa_nodes: NumaNodes, 911 912 // Possible handle to the virtio-balloon device 913 balloon: Option<Arc<Mutex<virtio_devices::Balloon>>>, 914 915 // Virtio Device activation EventFd to allow the VMM thread to trigger device 916 // activation and thus start the threads from the VMM thread 917 activate_evt: EventFd, 918 919 acpi_address: GuestAddress, 920 921 selected_segment: usize, 922 923 // Possible handle to the virtio-mem device 924 virtio_mem_devices: Vec<Arc<Mutex<virtio_devices::Mem>>>, 925 926 #[cfg(target_arch = "aarch64")] 927 // GPIO device for AArch64 928 gpio_device: Option<Arc<Mutex<devices::legacy::Gpio>>>, 929 930 #[cfg(feature = "pvmemcontrol")] 931 pvmemcontrol_devices: Option<( 932 Arc<PvmemcontrolBusDevice>, 933 Arc<Mutex<PvmemcontrolPciDevice>>, 934 )>, 935 936 // pvpanic device 937 pvpanic_device: Option<Arc<Mutex<devices::PvPanicDevice>>>, 938 939 // Flag to force setting the iommu on virtio devices 940 force_iommu: bool, 941 942 // io_uring availability if detected 943 io_uring_supported: Option<bool>, 944 945 // aio availability if detected 946 aio_supported: Option<bool>, 947 948 // List of unique identifiers provided at boot through the configuration. 949 boot_id_list: BTreeSet<String>, 950 951 // Start time of the VM 952 timestamp: Instant, 953 954 // Pending activations 955 pending_activations: Arc<Mutex<Vec<VirtioPciDeviceActivator>>>, 956 957 // Addresses for ACPI platform devices e.g. ACPI PM timer, sleep/reset registers 958 acpi_platform_addresses: AcpiPlatformAddresses, 959 960 snapshot: Option<Snapshot>, 961 962 rate_limit_groups: HashMap<String, Arc<RateLimiterGroup>>, 963 964 mmio_regions: Arc<Mutex<Vec<MmioRegion>>>, 965 } 966 967 fn create_mmio_allocators( 968 start: u64, 969 end: u64, 970 num_pci_segments: u16, 971 weights: Vec<u32>, 972 alignment: u64, 973 ) -> Vec<Arc<Mutex<AddressAllocator>>> { 974 let total_weight: u32 = weights.iter().sum(); 975 976 // Start each PCI segment mmio range on an aligned boundary 977 let pci_segment_mmio_size = (end - start + 1) / (alignment * total_weight as u64) * alignment; 978 979 let mut mmio_allocators = vec![]; 980 let mut i = 0; 981 for segment_id in 0..num_pci_segments as u64 { 982 let weight = weights[segment_id as usize] as u64; 983 let mmio_start = start + i * pci_segment_mmio_size; 984 let mmio_size = pci_segment_mmio_size * weight; 985 let allocator = Arc::new(Mutex::new( 986 AddressAllocator::new(GuestAddress(mmio_start), mmio_size).unwrap(), 987 )); 988 mmio_allocators.push(allocator); 989 i += weight; 990 } 991 992 mmio_allocators 993 } 994 995 impl DeviceManager { 996 #[allow(clippy::too_many_arguments)] 997 pub fn new( 998 #[cfg(target_arch = "x86_64")] io_bus: Arc<Bus>, 999 mmio_bus: Arc<Bus>, 1000 vm: Arc<dyn hypervisor::Vm>, 1001 config: Arc<Mutex<VmConfig>>, 1002 memory_manager: Arc<Mutex<MemoryManager>>, 1003 cpu_manager: Arc<Mutex<CpuManager>>, 1004 exit_evt: EventFd, 1005 reset_evt: EventFd, 1006 seccomp_action: SeccompAction, 1007 numa_nodes: NumaNodes, 1008 activate_evt: &EventFd, 1009 force_iommu: bool, 1010 boot_id_list: BTreeSet<String>, 1011 timestamp: Instant, 1012 snapshot: Option<Snapshot>, 1013 dynamic: bool, 1014 ) -> DeviceManagerResult<Arc<Mutex<Self>>> { 1015 trace_scoped!("DeviceManager::new"); 1016 1017 let (device_tree, device_id_cnt) = if let Some(snapshot) = snapshot.as_ref() { 1018 let state: DeviceManagerState = snapshot.to_state().unwrap(); 1019 ( 1020 Arc::new(Mutex::new(state.device_tree.clone())), 1021 state.device_id_cnt, 1022 ) 1023 } else { 1024 (Arc::new(Mutex::new(DeviceTree::new())), Wrapping(0)) 1025 }; 1026 1027 let num_pci_segments = 1028 if let Some(platform_config) = config.lock().unwrap().platform.as_ref() { 1029 platform_config.num_pci_segments 1030 } else { 1031 1 1032 }; 1033 1034 let mut mmio32_aperture_weights: Vec<u32> = 1035 std::iter::repeat(DEFAULT_PCI_SEGMENT_APERTURE_WEIGHT) 1036 .take(num_pci_segments.into()) 1037 .collect(); 1038 if let Some(pci_segments) = &config.lock().unwrap().pci_segments { 1039 for pci_segment in pci_segments.iter() { 1040 mmio32_aperture_weights[pci_segment.pci_segment as usize] = 1041 pci_segment.mmio32_aperture_weight 1042 } 1043 } 1044 1045 let start_of_mmio32_area = layout::MEM_32BIT_DEVICES_START.0; 1046 let end_of_mmio32_area = layout::MEM_32BIT_DEVICES_START.0 + layout::MEM_32BIT_DEVICES_SIZE; 1047 let pci_mmio32_allocators = create_mmio_allocators( 1048 start_of_mmio32_area, 1049 end_of_mmio32_area, 1050 num_pci_segments, 1051 mmio32_aperture_weights, 1052 4 << 10, 1053 ); 1054 1055 let mut mmio64_aperture_weights: Vec<u32> = 1056 std::iter::repeat(DEFAULT_PCI_SEGMENT_APERTURE_WEIGHT) 1057 .take(num_pci_segments.into()) 1058 .collect(); 1059 if let Some(pci_segments) = &config.lock().unwrap().pci_segments { 1060 for pci_segment in pci_segments.iter() { 1061 mmio64_aperture_weights[pci_segment.pci_segment as usize] = 1062 pci_segment.mmio64_aperture_weight 1063 } 1064 } 1065 1066 let start_of_mmio64_area = memory_manager.lock().unwrap().start_of_device_area().0; 1067 let end_of_mmio64_area = memory_manager.lock().unwrap().end_of_device_area().0; 1068 let pci_mmio64_allocators = create_mmio_allocators( 1069 start_of_mmio64_area, 1070 end_of_mmio64_area, 1071 num_pci_segments, 1072 mmio64_aperture_weights, 1073 4 << 30, 1074 ); 1075 1076 let address_manager = Arc::new(AddressManager { 1077 allocator: memory_manager.lock().unwrap().allocator(), 1078 #[cfg(target_arch = "x86_64")] 1079 io_bus, 1080 mmio_bus, 1081 vm: vm.clone(), 1082 device_tree: Arc::clone(&device_tree), 1083 pci_mmio32_allocators, 1084 pci_mmio64_allocators, 1085 }); 1086 1087 // First we create the MSI interrupt manager, the legacy one is created 1088 // later, after the IOAPIC device creation. 1089 // The reason we create the MSI one first is because the IOAPIC needs it, 1090 // and then the legacy interrupt manager needs an IOAPIC. So we're 1091 // handling a linear dependency chain: 1092 // msi_interrupt_manager <- IOAPIC <- legacy_interrupt_manager. 1093 let msi_interrupt_manager: Arc<dyn InterruptManager<GroupConfig = MsiIrqGroupConfig>> = 1094 Arc::new(MsiInterruptManager::new( 1095 Arc::clone(&address_manager.allocator), 1096 vm, 1097 )); 1098 1099 let acpi_address = address_manager 1100 .allocator 1101 .lock() 1102 .unwrap() 1103 .allocate_platform_mmio_addresses(None, DEVICE_MANAGER_ACPI_SIZE as u64, None) 1104 .ok_or(DeviceManagerError::AllocateIoPort)?; 1105 1106 let mut pci_irq_slots = [0; 32]; 1107 PciSegment::reserve_legacy_interrupts_for_pci_devices( 1108 &address_manager, 1109 &mut pci_irq_slots, 1110 )?; 1111 1112 let mut pci_segments = vec![PciSegment::new_default_segment( 1113 &address_manager, 1114 Arc::clone(&address_manager.pci_mmio32_allocators[0]), 1115 Arc::clone(&address_manager.pci_mmio64_allocators[0]), 1116 &pci_irq_slots, 1117 )?]; 1118 1119 for i in 1..num_pci_segments as usize { 1120 pci_segments.push(PciSegment::new( 1121 i as u16, 1122 numa_node_id_from_pci_segment_id(&numa_nodes, i as u16), 1123 &address_manager, 1124 Arc::clone(&address_manager.pci_mmio32_allocators[i]), 1125 Arc::clone(&address_manager.pci_mmio64_allocators[i]), 1126 &pci_irq_slots, 1127 )?); 1128 } 1129 1130 if dynamic { 1131 let acpi_address = address_manager 1132 .allocator 1133 .lock() 1134 .unwrap() 1135 .allocate_platform_mmio_addresses(None, CPU_MANAGER_ACPI_SIZE as u64, None) 1136 .ok_or(DeviceManagerError::AllocateMmioAddress)?; 1137 1138 address_manager 1139 .mmio_bus 1140 .insert( 1141 cpu_manager.clone(), 1142 acpi_address.0, 1143 CPU_MANAGER_ACPI_SIZE as u64, 1144 ) 1145 .map_err(DeviceManagerError::BusError)?; 1146 1147 cpu_manager.lock().unwrap().set_acpi_address(acpi_address); 1148 } 1149 1150 let mut rate_limit_groups = HashMap::<String, Arc<RateLimiterGroup>>::new(); 1151 if let Some(rate_limit_groups_cfg) = config.lock().unwrap().rate_limit_groups.as_ref() { 1152 for rate_limit_group_cfg in rate_limit_groups_cfg { 1153 let rate_limit_cfg = rate_limit_group_cfg.rate_limiter_config; 1154 let bw = rate_limit_cfg.bandwidth.unwrap_or_default(); 1155 let ops = rate_limit_cfg.ops.unwrap_or_default(); 1156 let mut rate_limit_group = RateLimiterGroup::new( 1157 &rate_limit_group_cfg.id, 1158 bw.size, 1159 bw.one_time_burst.unwrap_or(0), 1160 bw.refill_time, 1161 ops.size, 1162 ops.one_time_burst.unwrap_or(0), 1163 ops.refill_time, 1164 ) 1165 .map_err(DeviceManagerError::RateLimiterGroupCreate)?; 1166 1167 let exit_evt = exit_evt.try_clone().map_err(DeviceManagerError::EventFd)?; 1168 1169 rate_limit_group.start_thread(exit_evt).unwrap(); 1170 rate_limit_groups 1171 .insert(rate_limit_group_cfg.id.clone(), Arc::new(rate_limit_group)); 1172 } 1173 } 1174 1175 let device_manager = DeviceManager { 1176 address_manager: Arc::clone(&address_manager), 1177 console: Arc::new(Console::default()), 1178 interrupt_controller: None, 1179 #[cfg(target_arch = "aarch64")] 1180 cmdline_additions: Vec::new(), 1181 ged_notification_device: None, 1182 config, 1183 memory_manager, 1184 cpu_manager, 1185 virtio_devices: Vec::new(), 1186 bus_devices: Vec::new(), 1187 device_id_cnt, 1188 msi_interrupt_manager, 1189 legacy_interrupt_manager: None, 1190 passthrough_device: None, 1191 vfio_container: None, 1192 iommu_device: None, 1193 iommu_mapping: None, 1194 iommu_attached_devices: None, 1195 pci_segments, 1196 device_tree, 1197 exit_evt, 1198 reset_evt, 1199 #[cfg(target_arch = "aarch64")] 1200 id_to_dev_info: HashMap::new(), 1201 seccomp_action, 1202 numa_nodes, 1203 balloon: None, 1204 activate_evt: activate_evt 1205 .try_clone() 1206 .map_err(DeviceManagerError::EventFd)?, 1207 acpi_address, 1208 selected_segment: 0, 1209 serial_manager: None, 1210 console_resize_pipe: None, 1211 original_termios_opt: Arc::new(Mutex::new(None)), 1212 virtio_mem_devices: Vec::new(), 1213 #[cfg(target_arch = "aarch64")] 1214 gpio_device: None, 1215 #[cfg(feature = "pvmemcontrol")] 1216 pvmemcontrol_devices: None, 1217 pvpanic_device: None, 1218 force_iommu, 1219 io_uring_supported: None, 1220 aio_supported: None, 1221 boot_id_list, 1222 timestamp, 1223 pending_activations: Arc::new(Mutex::new(Vec::default())), 1224 acpi_platform_addresses: AcpiPlatformAddresses::default(), 1225 snapshot, 1226 rate_limit_groups, 1227 mmio_regions: Arc::new(Mutex::new(Vec::new())), 1228 }; 1229 1230 let device_manager = Arc::new(Mutex::new(device_manager)); 1231 1232 address_manager 1233 .mmio_bus 1234 .insert( 1235 Arc::clone(&device_manager) as Arc<dyn BusDeviceSync>, 1236 acpi_address.0, 1237 DEVICE_MANAGER_ACPI_SIZE as u64, 1238 ) 1239 .map_err(DeviceManagerError::BusError)?; 1240 1241 Ok(device_manager) 1242 } 1243 1244 pub fn console_resize_pipe(&self) -> Option<Arc<File>> { 1245 self.console_resize_pipe.clone() 1246 } 1247 1248 pub fn create_devices( 1249 &mut self, 1250 console_info: Option<ConsoleInfo>, 1251 console_resize_pipe: Option<Arc<File>>, 1252 original_termios_opt: Arc<Mutex<Option<termios>>>, 1253 ) -> DeviceManagerResult<()> { 1254 trace_scoped!("create_devices"); 1255 1256 let mut virtio_devices: Vec<MetaVirtioDevice> = Vec::new(); 1257 1258 let interrupt_controller = self.add_interrupt_controller()?; 1259 1260 self.cpu_manager 1261 .lock() 1262 .unwrap() 1263 .set_interrupt_controller(interrupt_controller.clone()); 1264 1265 // Now we can create the legacy interrupt manager, which needs the freshly 1266 // formed IOAPIC device. 1267 let legacy_interrupt_manager: Arc< 1268 dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>, 1269 > = Arc::new(LegacyUserspaceInterruptManager::new(Arc::clone( 1270 &interrupt_controller, 1271 ))); 1272 1273 { 1274 if let Some(acpi_address) = self.memory_manager.lock().unwrap().acpi_address() { 1275 self.address_manager 1276 .mmio_bus 1277 .insert( 1278 Arc::clone(&self.memory_manager) as Arc<dyn BusDeviceSync>, 1279 acpi_address.0, 1280 MEMORY_MANAGER_ACPI_SIZE as u64, 1281 ) 1282 .map_err(DeviceManagerError::BusError)?; 1283 } 1284 } 1285 1286 #[cfg(target_arch = "x86_64")] 1287 self.add_legacy_devices( 1288 self.reset_evt 1289 .try_clone() 1290 .map_err(DeviceManagerError::EventFd)?, 1291 )?; 1292 1293 #[cfg(target_arch = "aarch64")] 1294 self.add_legacy_devices(&legacy_interrupt_manager)?; 1295 1296 { 1297 self.ged_notification_device = self.add_acpi_devices( 1298 &legacy_interrupt_manager, 1299 self.reset_evt 1300 .try_clone() 1301 .map_err(DeviceManagerError::EventFd)?, 1302 self.exit_evt 1303 .try_clone() 1304 .map_err(DeviceManagerError::EventFd)?, 1305 )?; 1306 } 1307 1308 self.original_termios_opt = original_termios_opt; 1309 1310 self.console = self.add_console_devices( 1311 &legacy_interrupt_manager, 1312 &mut virtio_devices, 1313 console_info, 1314 console_resize_pipe, 1315 )?; 1316 1317 if let Some(tpm) = self.config.clone().lock().unwrap().tpm.as_ref() { 1318 let tpm_dev = self.add_tpm_device(tpm.socket.clone())?; 1319 self.bus_devices 1320 .push(Arc::clone(&tpm_dev) as Arc<dyn BusDeviceSync>) 1321 } 1322 self.legacy_interrupt_manager = Some(legacy_interrupt_manager); 1323 1324 virtio_devices.append(&mut self.make_virtio_devices()?); 1325 1326 self.add_pci_devices(virtio_devices.clone())?; 1327 1328 self.virtio_devices = virtio_devices; 1329 1330 // Add pvmemcontrol if required 1331 #[cfg(feature = "pvmemcontrol")] 1332 { 1333 if self.config.lock().unwrap().pvmemcontrol.is_some() { 1334 let (pvmemcontrol_bus_device, pvmemcontrol_pci_device) = 1335 self.make_pvmemcontrol_device()?; 1336 self.pvmemcontrol_devices = 1337 Some((pvmemcontrol_bus_device, pvmemcontrol_pci_device)); 1338 } 1339 } 1340 1341 if self.config.clone().lock().unwrap().pvpanic { 1342 self.pvpanic_device = self.add_pvpanic_device()?; 1343 } 1344 1345 Ok(()) 1346 } 1347 1348 fn state(&self) -> DeviceManagerState { 1349 DeviceManagerState { 1350 device_tree: self.device_tree.lock().unwrap().clone(), 1351 device_id_cnt: self.device_id_cnt, 1352 } 1353 } 1354 1355 fn get_msi_iova_space(&mut self) -> (u64, u64) { 1356 #[cfg(target_arch = "aarch64")] 1357 { 1358 let vcpus = self.config.lock().unwrap().cpus.boot_vcpus; 1359 let vgic_config = gic::Gic::create_default_config(vcpus.into()); 1360 ( 1361 vgic_config.msi_addr, 1362 vgic_config.msi_addr + vgic_config.msi_size - 1, 1363 ) 1364 } 1365 #[cfg(target_arch = "x86_64")] 1366 (0xfee0_0000, 0xfeef_ffff) 1367 } 1368 1369 #[cfg(target_arch = "aarch64")] 1370 /// Gets the information of the devices registered up to some point in time. 1371 pub fn get_device_info(&self) -> &HashMap<(DeviceType, String), MmioDeviceInfo> { 1372 &self.id_to_dev_info 1373 } 1374 1375 #[allow(unused_variables)] 1376 fn add_pci_devices( 1377 &mut self, 1378 virtio_devices: Vec<MetaVirtioDevice>, 1379 ) -> DeviceManagerResult<()> { 1380 let iommu_id = String::from(IOMMU_DEVICE_NAME); 1381 1382 let iommu_device = if self.config.lock().unwrap().iommu { 1383 let (device, mapping) = virtio_devices::Iommu::new( 1384 iommu_id.clone(), 1385 self.seccomp_action.clone(), 1386 self.exit_evt 1387 .try_clone() 1388 .map_err(DeviceManagerError::EventFd)?, 1389 self.get_msi_iova_space(), 1390 state_from_id(self.snapshot.as_ref(), iommu_id.as_str()) 1391 .map_err(DeviceManagerError::RestoreGetState)?, 1392 ) 1393 .map_err(DeviceManagerError::CreateVirtioIommu)?; 1394 let device = Arc::new(Mutex::new(device)); 1395 self.iommu_device = Some(Arc::clone(&device)); 1396 self.iommu_mapping = Some(mapping); 1397 1398 // Fill the device tree with a new node. In case of restore, we 1399 // know there is nothing to do, so we can simply override the 1400 // existing entry. 1401 self.device_tree 1402 .lock() 1403 .unwrap() 1404 .insert(iommu_id.clone(), device_node!(iommu_id, device)); 1405 1406 Some(device) 1407 } else { 1408 None 1409 }; 1410 1411 let mut iommu_attached_devices = Vec::new(); 1412 { 1413 for handle in virtio_devices { 1414 let mapping: Option<Arc<IommuMapping>> = if handle.iommu { 1415 self.iommu_mapping.clone() 1416 } else { 1417 None 1418 }; 1419 1420 let dev_id = self.add_virtio_pci_device( 1421 handle.virtio_device, 1422 &mapping, 1423 handle.id, 1424 handle.pci_segment, 1425 handle.dma_handler, 1426 )?; 1427 1428 if handle.iommu { 1429 iommu_attached_devices.push(dev_id); 1430 } 1431 } 1432 1433 let mut vfio_iommu_device_ids = self.add_vfio_devices()?; 1434 iommu_attached_devices.append(&mut vfio_iommu_device_ids); 1435 1436 let mut vfio_user_iommu_device_ids = self.add_user_devices()?; 1437 iommu_attached_devices.append(&mut vfio_user_iommu_device_ids); 1438 1439 // Add all devices from forced iommu segments 1440 if let Some(platform_config) = self.config.lock().unwrap().platform.as_ref() { 1441 if let Some(iommu_segments) = platform_config.iommu_segments.as_ref() { 1442 for segment in iommu_segments { 1443 for device in 0..32 { 1444 let bdf = PciBdf::new(*segment, 0, device, 0); 1445 if !iommu_attached_devices.contains(&bdf) { 1446 iommu_attached_devices.push(bdf); 1447 } 1448 } 1449 } 1450 } 1451 } 1452 1453 if let Some(iommu_device) = iommu_device { 1454 let dev_id = self.add_virtio_pci_device(iommu_device, &None, iommu_id, 0, None)?; 1455 self.iommu_attached_devices = Some((dev_id, iommu_attached_devices)); 1456 } 1457 } 1458 1459 for segment in &self.pci_segments { 1460 #[cfg(target_arch = "x86_64")] 1461 if let Some(pci_config_io) = segment.pci_config_io.as_ref() { 1462 self.bus_devices 1463 .push(Arc::clone(pci_config_io) as Arc<dyn BusDeviceSync>); 1464 } 1465 1466 self.bus_devices 1467 .push(Arc::clone(&segment.pci_config_mmio) as Arc<dyn BusDeviceSync>); 1468 } 1469 1470 Ok(()) 1471 } 1472 1473 #[cfg(target_arch = "aarch64")] 1474 fn add_interrupt_controller( 1475 &mut self, 1476 ) -> DeviceManagerResult<Arc<Mutex<dyn InterruptController>>> { 1477 let interrupt_controller: Arc<Mutex<gic::Gic>> = Arc::new(Mutex::new( 1478 gic::Gic::new( 1479 self.config.lock().unwrap().cpus.boot_vcpus, 1480 Arc::clone(&self.msi_interrupt_manager), 1481 self.address_manager.vm.clone(), 1482 ) 1483 .map_err(DeviceManagerError::CreateInterruptController)?, 1484 )); 1485 1486 self.interrupt_controller = Some(interrupt_controller.clone()); 1487 1488 // Restore the vGic if this is in the process of restoration 1489 let id = String::from(gic::GIC_SNAPSHOT_ID); 1490 if let Some(vgic_snapshot) = snapshot_from_id(self.snapshot.as_ref(), &id) { 1491 // PMU support is optional. Nothing should be impacted if the PMU initialization failed. 1492 if self 1493 .cpu_manager 1494 .lock() 1495 .unwrap() 1496 .init_pmu(arch::aarch64::fdt::AARCH64_PMU_IRQ + 16) 1497 .is_err() 1498 { 1499 info!("Failed to initialize PMU"); 1500 } 1501 1502 let vgic_state = vgic_snapshot 1503 .to_state() 1504 .map_err(DeviceManagerError::RestoreGetState)?; 1505 let saved_vcpu_states = self.cpu_manager.lock().unwrap().get_saved_states(); 1506 interrupt_controller 1507 .lock() 1508 .unwrap() 1509 .restore_vgic(vgic_state, &saved_vcpu_states) 1510 .unwrap(); 1511 } 1512 1513 self.device_tree 1514 .lock() 1515 .unwrap() 1516 .insert(id.clone(), device_node!(id, interrupt_controller)); 1517 1518 Ok(interrupt_controller) 1519 } 1520 1521 #[cfg(target_arch = "aarch64")] 1522 pub fn get_interrupt_controller(&mut self) -> Option<&Arc<Mutex<gic::Gic>>> { 1523 self.interrupt_controller.as_ref() 1524 } 1525 1526 #[cfg(target_arch = "x86_64")] 1527 fn add_interrupt_controller( 1528 &mut self, 1529 ) -> DeviceManagerResult<Arc<Mutex<dyn InterruptController>>> { 1530 let id = String::from(IOAPIC_DEVICE_NAME); 1531 1532 // Create IOAPIC 1533 let interrupt_controller = Arc::new(Mutex::new( 1534 ioapic::Ioapic::new( 1535 id.clone(), 1536 APIC_START, 1537 Arc::clone(&self.msi_interrupt_manager), 1538 state_from_id(self.snapshot.as_ref(), id.as_str()) 1539 .map_err(DeviceManagerError::RestoreGetState)?, 1540 ) 1541 .map_err(DeviceManagerError::CreateInterruptController)?, 1542 )); 1543 1544 self.interrupt_controller = Some(interrupt_controller.clone()); 1545 1546 self.address_manager 1547 .mmio_bus 1548 .insert(interrupt_controller.clone(), IOAPIC_START.0, IOAPIC_SIZE) 1549 .map_err(DeviceManagerError::BusError)?; 1550 1551 self.bus_devices 1552 .push(Arc::clone(&interrupt_controller) as Arc<dyn BusDeviceSync>); 1553 1554 // Fill the device tree with a new node. In case of restore, we 1555 // know there is nothing to do, so we can simply override the 1556 // existing entry. 1557 self.device_tree 1558 .lock() 1559 .unwrap() 1560 .insert(id.clone(), device_node!(id, interrupt_controller)); 1561 1562 Ok(interrupt_controller) 1563 } 1564 1565 fn add_acpi_devices( 1566 &mut self, 1567 interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>, 1568 reset_evt: EventFd, 1569 exit_evt: EventFd, 1570 ) -> DeviceManagerResult<Option<Arc<Mutex<devices::AcpiGedDevice>>>> { 1571 let vcpus_kill_signalled = self 1572 .cpu_manager 1573 .lock() 1574 .unwrap() 1575 .vcpus_kill_signalled() 1576 .clone(); 1577 let shutdown_device = Arc::new(Mutex::new(devices::AcpiShutdownDevice::new( 1578 exit_evt, 1579 reset_evt, 1580 vcpus_kill_signalled, 1581 ))); 1582 1583 self.bus_devices 1584 .push(Arc::clone(&shutdown_device) as Arc<dyn BusDeviceSync>); 1585 1586 #[cfg(target_arch = "x86_64")] 1587 { 1588 let shutdown_pio_address: u16 = 0x600; 1589 1590 self.address_manager 1591 .allocator 1592 .lock() 1593 .unwrap() 1594 .allocate_io_addresses(Some(GuestAddress(shutdown_pio_address.into())), 0x8, None) 1595 .ok_or(DeviceManagerError::AllocateIoPort)?; 1596 1597 self.address_manager 1598 .io_bus 1599 .insert(shutdown_device, shutdown_pio_address.into(), 0x4) 1600 .map_err(DeviceManagerError::BusError)?; 1601 1602 self.acpi_platform_addresses.sleep_control_reg_address = 1603 Some(GenericAddress::io_port_address::<u8>(shutdown_pio_address)); 1604 self.acpi_platform_addresses.sleep_status_reg_address = 1605 Some(GenericAddress::io_port_address::<u8>(shutdown_pio_address)); 1606 self.acpi_platform_addresses.reset_reg_address = 1607 Some(GenericAddress::io_port_address::<u8>(shutdown_pio_address)); 1608 } 1609 1610 let ged_irq = self 1611 .address_manager 1612 .allocator 1613 .lock() 1614 .unwrap() 1615 .allocate_irq() 1616 .unwrap(); 1617 let interrupt_group = interrupt_manager 1618 .create_group(LegacyIrqGroupConfig { 1619 irq: ged_irq as InterruptIndex, 1620 }) 1621 .map_err(DeviceManagerError::CreateInterruptGroup)?; 1622 let ged_address = self 1623 .address_manager 1624 .allocator 1625 .lock() 1626 .unwrap() 1627 .allocate_platform_mmio_addresses( 1628 None, 1629 devices::acpi::GED_DEVICE_ACPI_SIZE as u64, 1630 None, 1631 ) 1632 .ok_or(DeviceManagerError::AllocateMmioAddress)?; 1633 let ged_device = Arc::new(Mutex::new(devices::AcpiGedDevice::new( 1634 interrupt_group, 1635 ged_irq, 1636 ged_address, 1637 ))); 1638 self.address_manager 1639 .mmio_bus 1640 .insert( 1641 ged_device.clone(), 1642 ged_address.0, 1643 devices::acpi::GED_DEVICE_ACPI_SIZE as u64, 1644 ) 1645 .map_err(DeviceManagerError::BusError)?; 1646 self.bus_devices 1647 .push(Arc::clone(&ged_device) as Arc<dyn BusDeviceSync>); 1648 1649 let pm_timer_device = Arc::new(Mutex::new(devices::AcpiPmTimerDevice::new())); 1650 1651 self.bus_devices 1652 .push(Arc::clone(&pm_timer_device) as Arc<dyn BusDeviceSync>); 1653 1654 #[cfg(target_arch = "x86_64")] 1655 { 1656 let pm_timer_pio_address: u16 = 0x608; 1657 1658 self.address_manager 1659 .allocator 1660 .lock() 1661 .unwrap() 1662 .allocate_io_addresses(Some(GuestAddress(pm_timer_pio_address.into())), 0x4, None) 1663 .ok_or(DeviceManagerError::AllocateIoPort)?; 1664 1665 self.address_manager 1666 .io_bus 1667 .insert(pm_timer_device, pm_timer_pio_address.into(), 0x4) 1668 .map_err(DeviceManagerError::BusError)?; 1669 1670 self.acpi_platform_addresses.pm_timer_address = 1671 Some(GenericAddress::io_port_address::<u32>(pm_timer_pio_address)); 1672 } 1673 1674 Ok(Some(ged_device)) 1675 } 1676 1677 #[cfg(target_arch = "x86_64")] 1678 fn add_legacy_devices(&mut self, reset_evt: EventFd) -> DeviceManagerResult<()> { 1679 let vcpus_kill_signalled = self 1680 .cpu_manager 1681 .lock() 1682 .unwrap() 1683 .vcpus_kill_signalled() 1684 .clone(); 1685 // Add a shutdown device (i8042) 1686 let i8042 = Arc::new(Mutex::new(devices::legacy::I8042Device::new( 1687 reset_evt.try_clone().unwrap(), 1688 vcpus_kill_signalled.clone(), 1689 ))); 1690 1691 self.bus_devices 1692 .push(Arc::clone(&i8042) as Arc<dyn BusDeviceSync>); 1693 1694 self.address_manager 1695 .io_bus 1696 .insert(i8042, 0x61, 0x4) 1697 .map_err(DeviceManagerError::BusError)?; 1698 { 1699 // Add a CMOS emulated device 1700 let mem_size = self 1701 .memory_manager 1702 .lock() 1703 .unwrap() 1704 .guest_memory() 1705 .memory() 1706 .last_addr() 1707 .0 1708 + 1; 1709 let mem_below_4g = std::cmp::min(arch::layout::MEM_32BIT_RESERVED_START.0, mem_size); 1710 let mem_above_4g = mem_size.saturating_sub(arch::layout::RAM_64BIT_START.0); 1711 1712 let cmos = Arc::new(Mutex::new(devices::legacy::Cmos::new( 1713 mem_below_4g, 1714 mem_above_4g, 1715 reset_evt, 1716 Some(vcpus_kill_signalled), 1717 ))); 1718 1719 self.bus_devices 1720 .push(Arc::clone(&cmos) as Arc<dyn BusDeviceSync>); 1721 1722 self.address_manager 1723 .io_bus 1724 .insert(cmos, 0x70, 0x2) 1725 .map_err(DeviceManagerError::BusError)?; 1726 1727 let fwdebug = Arc::new(Mutex::new(devices::legacy::FwDebugDevice::new())); 1728 1729 self.bus_devices 1730 .push(Arc::clone(&fwdebug) as Arc<dyn BusDeviceSync>); 1731 1732 self.address_manager 1733 .io_bus 1734 .insert(fwdebug, 0x402, 0x1) 1735 .map_err(DeviceManagerError::BusError)?; 1736 } 1737 1738 // 0x80 debug port 1739 let debug_port = Arc::new(Mutex::new(devices::legacy::DebugPort::new(self.timestamp))); 1740 self.bus_devices 1741 .push(Arc::clone(&debug_port) as Arc<dyn BusDeviceSync>); 1742 self.address_manager 1743 .io_bus 1744 .insert(debug_port, 0x80, 0x1) 1745 .map_err(DeviceManagerError::BusError)?; 1746 1747 Ok(()) 1748 } 1749 1750 #[cfg(target_arch = "aarch64")] 1751 fn add_legacy_devices( 1752 &mut self, 1753 interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>, 1754 ) -> DeviceManagerResult<()> { 1755 // Add a RTC device 1756 let rtc_irq = self 1757 .address_manager 1758 .allocator 1759 .lock() 1760 .unwrap() 1761 .allocate_irq() 1762 .unwrap(); 1763 1764 let interrupt_group = interrupt_manager 1765 .create_group(LegacyIrqGroupConfig { 1766 irq: rtc_irq as InterruptIndex, 1767 }) 1768 .map_err(DeviceManagerError::CreateInterruptGroup)?; 1769 1770 let rtc_device = Arc::new(Mutex::new(devices::legacy::Rtc::new(interrupt_group))); 1771 1772 self.bus_devices 1773 .push(Arc::clone(&rtc_device) as Arc<dyn BusDeviceSync>); 1774 1775 let addr = arch::layout::LEGACY_RTC_MAPPED_IO_START; 1776 1777 self.address_manager 1778 .mmio_bus 1779 .insert(rtc_device, addr.0, MMIO_LEN) 1780 .map_err(DeviceManagerError::BusError)?; 1781 1782 self.id_to_dev_info.insert( 1783 (DeviceType::Rtc, "rtc".to_string()), 1784 MmioDeviceInfo { 1785 addr: addr.0, 1786 len: MMIO_LEN, 1787 irq: rtc_irq, 1788 }, 1789 ); 1790 1791 // Add a GPIO device 1792 let id = String::from(GPIO_DEVICE_NAME); 1793 let gpio_irq = self 1794 .address_manager 1795 .allocator 1796 .lock() 1797 .unwrap() 1798 .allocate_irq() 1799 .unwrap(); 1800 1801 let interrupt_group = interrupt_manager 1802 .create_group(LegacyIrqGroupConfig { 1803 irq: gpio_irq as InterruptIndex, 1804 }) 1805 .map_err(DeviceManagerError::CreateInterruptGroup)?; 1806 1807 let gpio_device = Arc::new(Mutex::new(devices::legacy::Gpio::new( 1808 id.clone(), 1809 interrupt_group, 1810 state_from_id(self.snapshot.as_ref(), id.as_str()) 1811 .map_err(DeviceManagerError::RestoreGetState)?, 1812 ))); 1813 1814 self.bus_devices 1815 .push(Arc::clone(&gpio_device) as Arc<dyn BusDeviceSync>); 1816 1817 let addr = arch::layout::LEGACY_GPIO_MAPPED_IO_START; 1818 1819 self.address_manager 1820 .mmio_bus 1821 .insert(gpio_device.clone(), addr.0, MMIO_LEN) 1822 .map_err(DeviceManagerError::BusError)?; 1823 1824 self.gpio_device = Some(gpio_device.clone()); 1825 1826 self.id_to_dev_info.insert( 1827 (DeviceType::Gpio, "gpio".to_string()), 1828 MmioDeviceInfo { 1829 addr: addr.0, 1830 len: MMIO_LEN, 1831 irq: gpio_irq, 1832 }, 1833 ); 1834 1835 self.device_tree 1836 .lock() 1837 .unwrap() 1838 .insert(id.clone(), device_node!(id, gpio_device)); 1839 1840 Ok(()) 1841 } 1842 1843 #[cfg(target_arch = "x86_64")] 1844 fn add_debug_console_device( 1845 &mut self, 1846 debug_console_writer: Box<dyn io::Write + Send>, 1847 ) -> DeviceManagerResult<Arc<Mutex<DebugConsole>>> { 1848 let id = String::from(DEBUGCON_DEVICE_NAME); 1849 let debug_console = Arc::new(Mutex::new(DebugConsole::new( 1850 id.clone(), 1851 debug_console_writer, 1852 ))); 1853 1854 let port = self 1855 .config 1856 .lock() 1857 .unwrap() 1858 .debug_console 1859 .clone() 1860 .iobase 1861 .map(|port| port as u64) 1862 .unwrap_or(debug_console::DEFAULT_PORT); 1863 1864 self.bus_devices 1865 .push(Arc::clone(&debug_console) as Arc<dyn BusDeviceSync>); 1866 1867 self.address_manager 1868 .allocator 1869 .lock() 1870 .unwrap() 1871 .allocate_io_addresses(Some(GuestAddress(port)), 0x1, None) 1872 .ok_or(DeviceManagerError::AllocateIoPort)?; 1873 1874 self.address_manager 1875 .io_bus 1876 .insert(debug_console.clone(), port, 0x1) 1877 .map_err(DeviceManagerError::BusError)?; 1878 1879 // Fill the device tree with a new node. In case of restore, we 1880 // know there is nothing to do, so we can simply override the 1881 // existing entry. 1882 self.device_tree 1883 .lock() 1884 .unwrap() 1885 .insert(id.clone(), device_node!(id, debug_console)); 1886 1887 Ok(debug_console) 1888 } 1889 1890 #[cfg(target_arch = "x86_64")] 1891 fn add_serial_device( 1892 &mut self, 1893 interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>, 1894 serial_writer: Option<Box<dyn io::Write + Send>>, 1895 ) -> DeviceManagerResult<Arc<Mutex<Serial>>> { 1896 // Serial is tied to IRQ #4 1897 let serial_irq = 4; 1898 1899 let id = String::from(SERIAL_DEVICE_NAME); 1900 1901 let interrupt_group = interrupt_manager 1902 .create_group(LegacyIrqGroupConfig { 1903 irq: serial_irq as InterruptIndex, 1904 }) 1905 .map_err(DeviceManagerError::CreateInterruptGroup)?; 1906 1907 let serial = Arc::new(Mutex::new(Serial::new( 1908 id.clone(), 1909 interrupt_group, 1910 serial_writer, 1911 state_from_id(self.snapshot.as_ref(), id.as_str()) 1912 .map_err(DeviceManagerError::RestoreGetState)?, 1913 ))); 1914 1915 self.bus_devices 1916 .push(Arc::clone(&serial) as Arc<dyn BusDeviceSync>); 1917 1918 self.address_manager 1919 .allocator 1920 .lock() 1921 .unwrap() 1922 .allocate_io_addresses(Some(GuestAddress(0x3f8)), 0x8, None) 1923 .ok_or(DeviceManagerError::AllocateIoPort)?; 1924 1925 self.address_manager 1926 .io_bus 1927 .insert(serial.clone(), 0x3f8, 0x8) 1928 .map_err(DeviceManagerError::BusError)?; 1929 1930 // Fill the device tree with a new node. In case of restore, we 1931 // know there is nothing to do, so we can simply override the 1932 // existing entry. 1933 self.device_tree 1934 .lock() 1935 .unwrap() 1936 .insert(id.clone(), device_node!(id, serial)); 1937 1938 Ok(serial) 1939 } 1940 1941 #[cfg(target_arch = "aarch64")] 1942 fn add_serial_device( 1943 &mut self, 1944 interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>, 1945 serial_writer: Option<Box<dyn io::Write + Send>>, 1946 ) -> DeviceManagerResult<Arc<Mutex<Pl011>>> { 1947 let id = String::from(SERIAL_DEVICE_NAME); 1948 1949 let serial_irq = self 1950 .address_manager 1951 .allocator 1952 .lock() 1953 .unwrap() 1954 .allocate_irq() 1955 .unwrap(); 1956 1957 let interrupt_group = interrupt_manager 1958 .create_group(LegacyIrqGroupConfig { 1959 irq: serial_irq as InterruptIndex, 1960 }) 1961 .map_err(DeviceManagerError::CreateInterruptGroup)?; 1962 1963 let serial = Arc::new(Mutex::new(devices::legacy::Pl011::new( 1964 id.clone(), 1965 interrupt_group, 1966 serial_writer, 1967 self.timestamp, 1968 state_from_id(self.snapshot.as_ref(), id.as_str()) 1969 .map_err(DeviceManagerError::RestoreGetState)?, 1970 ))); 1971 1972 self.bus_devices 1973 .push(Arc::clone(&serial) as Arc<dyn BusDeviceSync>); 1974 1975 let addr = arch::layout::LEGACY_SERIAL_MAPPED_IO_START; 1976 1977 self.address_manager 1978 .mmio_bus 1979 .insert(serial.clone(), addr.0, MMIO_LEN) 1980 .map_err(DeviceManagerError::BusError)?; 1981 1982 self.id_to_dev_info.insert( 1983 (DeviceType::Serial, DeviceType::Serial.to_string()), 1984 MmioDeviceInfo { 1985 addr: addr.0, 1986 len: MMIO_LEN, 1987 irq: serial_irq, 1988 }, 1989 ); 1990 1991 self.cmdline_additions 1992 .push(format!("earlycon=pl011,mmio,0x{:08x}", addr.0)); 1993 1994 // Fill the device tree with a new node. In case of restore, we 1995 // know there is nothing to do, so we can simply override the 1996 // existing entry. 1997 self.device_tree 1998 .lock() 1999 .unwrap() 2000 .insert(id.clone(), device_node!(id, serial)); 2001 2002 Ok(serial) 2003 } 2004 2005 fn add_virtio_console_device( 2006 &mut self, 2007 virtio_devices: &mut Vec<MetaVirtioDevice>, 2008 console_fd: Option<RawFd>, 2009 resize_pipe: Option<Arc<File>>, 2010 ) -> DeviceManagerResult<Option<Arc<virtio_devices::ConsoleResizer>>> { 2011 let console_config = self.config.lock().unwrap().console.clone(); 2012 let endpoint = match console_config.mode { 2013 ConsoleOutputMode::File => { 2014 if let Some(file_fd) = console_fd { 2015 // SAFETY: file_fd is guaranteed to be a valid fd from 2016 // pre_create_console_devices() in vmm/src/console_devices.rs 2017 Endpoint::File(unsafe { File::from_raw_fd(file_fd) }) 2018 } else { 2019 return Err(DeviceManagerError::InvalidConsoleFd); 2020 } 2021 } 2022 ConsoleOutputMode::Pty => { 2023 if let Some(pty_fd) = console_fd { 2024 // SAFETY: pty_fd is guaranteed to be a valid fd from 2025 // pre_create_console_devices() in vmm/src/console_devices.rs 2026 let file = unsafe { File::from_raw_fd(pty_fd) }; 2027 self.console_resize_pipe = resize_pipe; 2028 Endpoint::PtyPair(file.try_clone().unwrap(), file) 2029 } else { 2030 return Err(DeviceManagerError::InvalidConsoleFd); 2031 } 2032 } 2033 ConsoleOutputMode::Tty => { 2034 if let Some(tty_fd) = console_fd { 2035 // SAFETY: tty_fd is guaranteed to be a valid fd from 2036 // pre_create_console_devices() in vmm/src/console_devices.rs 2037 let stdout = unsafe { File::from_raw_fd(tty_fd) }; 2038 // If an interactive TTY then we can accept input 2039 // SAFETY: FFI call. Trivially safe. 2040 if unsafe { libc::isatty(libc::STDIN_FILENO) == 1 } { 2041 // SAFETY: FFI call to dup. Trivially safe. 2042 let stdin = unsafe { libc::dup(libc::STDIN_FILENO) }; 2043 if stdin == -1 { 2044 return vmm_sys_util::errno::errno_result() 2045 .map_err(DeviceManagerError::DupFd); 2046 } 2047 // SAFETY: stdin is valid and owned solely by us. 2048 let stdin = unsafe { File::from_raw_fd(stdin) }; 2049 Endpoint::FilePair(stdout, stdin) 2050 } else { 2051 Endpoint::File(stdout) 2052 } 2053 } else { 2054 return Err(DeviceManagerError::InvalidConsoleFd); 2055 } 2056 } 2057 ConsoleOutputMode::Socket => { 2058 return Err(DeviceManagerError::NoSocketOptionSupportForConsoleDevice); 2059 } 2060 ConsoleOutputMode::Null => Endpoint::Null, 2061 ConsoleOutputMode::Off => return Ok(None), 2062 }; 2063 let id = String::from(CONSOLE_DEVICE_NAME); 2064 2065 let (virtio_console_device, console_resizer) = virtio_devices::Console::new( 2066 id.clone(), 2067 endpoint, 2068 self.console_resize_pipe 2069 .as_ref() 2070 .map(|p| p.try_clone().unwrap()), 2071 self.force_iommu | console_config.iommu, 2072 self.seccomp_action.clone(), 2073 self.exit_evt 2074 .try_clone() 2075 .map_err(DeviceManagerError::EventFd)?, 2076 state_from_id(self.snapshot.as_ref(), id.as_str()) 2077 .map_err(DeviceManagerError::RestoreGetState)?, 2078 ) 2079 .map_err(DeviceManagerError::CreateVirtioConsole)?; 2080 let virtio_console_device = Arc::new(Mutex::new(virtio_console_device)); 2081 virtio_devices.push(MetaVirtioDevice { 2082 virtio_device: Arc::clone(&virtio_console_device) 2083 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2084 iommu: console_config.iommu, 2085 id: id.clone(), 2086 pci_segment: 0, 2087 dma_handler: None, 2088 }); 2089 2090 // Fill the device tree with a new node. In case of restore, we 2091 // know there is nothing to do, so we can simply override the 2092 // existing entry. 2093 self.device_tree 2094 .lock() 2095 .unwrap() 2096 .insert(id.clone(), device_node!(id, virtio_console_device)); 2097 2098 // Only provide a resizer (for SIGWINCH handling) if the console is attached to the TTY 2099 Ok(if matches!(console_config.mode, ConsoleOutputMode::Tty) { 2100 Some(console_resizer) 2101 } else { 2102 None 2103 }) 2104 } 2105 2106 /// Adds all devices that behave like a console with respect to the VM 2107 /// configuration. This includes: 2108 /// - debug-console 2109 /// - serial-console 2110 /// - virtio-console 2111 fn add_console_devices( 2112 &mut self, 2113 interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>, 2114 virtio_devices: &mut Vec<MetaVirtioDevice>, 2115 console_info: Option<ConsoleInfo>, 2116 console_resize_pipe: Option<Arc<File>>, 2117 ) -> DeviceManagerResult<Arc<Console>> { 2118 let serial_config = self.config.lock().unwrap().serial.clone(); 2119 if console_info.is_none() { 2120 return Err(DeviceManagerError::InvalidConsoleInfo); 2121 } 2122 2123 // SAFETY: console_info is Some, so it's safe to unwrap. 2124 let console_info = console_info.unwrap(); 2125 let serial_writer: Option<Box<dyn io::Write + Send>> = match serial_config.mode { 2126 ConsoleOutputMode::File | ConsoleOutputMode::Tty => { 2127 if console_info.serial_main_fd.is_none() { 2128 return Err(DeviceManagerError::InvalidConsoleInfo); 2129 } 2130 // SAFETY: serial_main_fd is Some, so it's safe to unwrap. 2131 // SAFETY: serial_main_fd is guaranteed to be a valid fd from 2132 // pre_create_console_devices() in vmm/src/console_devices.rs 2133 Some(Box::new(unsafe { 2134 File::from_raw_fd(console_info.serial_main_fd.unwrap()) 2135 })) 2136 } 2137 ConsoleOutputMode::Off 2138 | ConsoleOutputMode::Null 2139 | ConsoleOutputMode::Pty 2140 | ConsoleOutputMode::Socket => None, 2141 }; 2142 if serial_config.mode != ConsoleOutputMode::Off { 2143 let serial = self.add_serial_device(interrupt_manager, serial_writer)?; 2144 self.serial_manager = match serial_config.mode { 2145 ConsoleOutputMode::Pty | ConsoleOutputMode::Tty | ConsoleOutputMode::Socket => { 2146 let serial_manager = SerialManager::new( 2147 serial, 2148 console_info.serial_main_fd, 2149 serial_config.mode, 2150 serial_config.socket, 2151 ) 2152 .map_err(DeviceManagerError::CreateSerialManager)?; 2153 if let Some(mut serial_manager) = serial_manager { 2154 serial_manager 2155 .start_thread( 2156 self.exit_evt 2157 .try_clone() 2158 .map_err(DeviceManagerError::EventFd)?, 2159 ) 2160 .map_err(DeviceManagerError::SpawnSerialManager)?; 2161 Some(Arc::new(serial_manager)) 2162 } else { 2163 None 2164 } 2165 } 2166 _ => None, 2167 }; 2168 } 2169 2170 #[cfg(target_arch = "x86_64")] 2171 { 2172 let debug_console_config = self.config.lock().unwrap().debug_console.clone(); 2173 let debug_console_writer: Option<Box<dyn io::Write + Send>> = 2174 match debug_console_config.mode { 2175 ConsoleOutputMode::File | ConsoleOutputMode::Tty => { 2176 if console_info.debug_main_fd.is_none() { 2177 return Err(DeviceManagerError::InvalidConsoleInfo); 2178 } 2179 // SAFETY: debug_main_fd is Some, so it's safe to unwrap. 2180 // SAFETY: debug_main_fd is guaranteed to be a valid fd from 2181 // pre_create_console_devices() in vmm/src/console_devices.rs 2182 Some(Box::new(unsafe { 2183 File::from_raw_fd(console_info.debug_main_fd.unwrap()) 2184 })) 2185 } 2186 ConsoleOutputMode::Off 2187 | ConsoleOutputMode::Null 2188 | ConsoleOutputMode::Pty 2189 | ConsoleOutputMode::Socket => None, 2190 }; 2191 if let Some(writer) = debug_console_writer { 2192 let _ = self.add_debug_console_device(writer)?; 2193 } 2194 } 2195 2196 let console_resizer = self.add_virtio_console_device( 2197 virtio_devices, 2198 console_info.console_main_fd, 2199 console_resize_pipe, 2200 )?; 2201 2202 Ok(Arc::new(Console { console_resizer })) 2203 } 2204 2205 fn add_tpm_device( 2206 &mut self, 2207 tpm_path: PathBuf, 2208 ) -> DeviceManagerResult<Arc<Mutex<devices::tpm::Tpm>>> { 2209 // Create TPM Device 2210 let tpm = devices::tpm::Tpm::new(tpm_path.to_str().unwrap().to_string()).map_err(|e| { 2211 DeviceManagerError::CreateTpmDevice(anyhow!("Failed to create TPM Device : {:?}", e)) 2212 })?; 2213 let tpm = Arc::new(Mutex::new(tpm)); 2214 2215 // Add TPM Device to mmio 2216 self.address_manager 2217 .mmio_bus 2218 .insert( 2219 tpm.clone(), 2220 arch::layout::TPM_START.0, 2221 arch::layout::TPM_SIZE, 2222 ) 2223 .map_err(DeviceManagerError::BusError)?; 2224 2225 Ok(tpm) 2226 } 2227 2228 fn make_virtio_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2229 let mut devices: Vec<MetaVirtioDevice> = Vec::new(); 2230 2231 // Create "standard" virtio devices (net/block/rng) 2232 devices.append(&mut self.make_virtio_block_devices()?); 2233 devices.append(&mut self.make_virtio_net_devices()?); 2234 devices.append(&mut self.make_virtio_rng_devices()?); 2235 2236 // Add virtio-fs if required 2237 devices.append(&mut self.make_virtio_fs_devices()?); 2238 2239 // Add virtio-pmem if required 2240 devices.append(&mut self.make_virtio_pmem_devices()?); 2241 2242 // Add virtio-vsock if required 2243 devices.append(&mut self.make_virtio_vsock_devices()?); 2244 2245 devices.append(&mut self.make_virtio_mem_devices()?); 2246 2247 // Add virtio-balloon if required 2248 devices.append(&mut self.make_virtio_balloon_devices()?); 2249 2250 // Add virtio-watchdog device 2251 devices.append(&mut self.make_virtio_watchdog_devices()?); 2252 2253 // Add vDPA devices if required 2254 devices.append(&mut self.make_vdpa_devices()?); 2255 2256 Ok(devices) 2257 } 2258 2259 // Cache whether aio is supported to avoid checking for very block device 2260 fn aio_is_supported(&mut self) -> bool { 2261 if let Some(supported) = self.aio_supported { 2262 return supported; 2263 } 2264 2265 let supported = block_aio_is_supported(); 2266 self.aio_supported = Some(supported); 2267 supported 2268 } 2269 2270 // Cache whether io_uring is supported to avoid probing for very block device 2271 fn io_uring_is_supported(&mut self) -> bool { 2272 if let Some(supported) = self.io_uring_supported { 2273 return supported; 2274 } 2275 2276 let supported = block_io_uring_is_supported(); 2277 self.io_uring_supported = Some(supported); 2278 supported 2279 } 2280 2281 fn make_virtio_block_device( 2282 &mut self, 2283 disk_cfg: &mut DiskConfig, 2284 ) -> DeviceManagerResult<MetaVirtioDevice> { 2285 let id = if let Some(id) = &disk_cfg.id { 2286 id.clone() 2287 } else { 2288 let id = self.next_device_name(DISK_DEVICE_NAME_PREFIX)?; 2289 disk_cfg.id = Some(id.clone()); 2290 id 2291 }; 2292 2293 info!("Creating virtio-block device: {:?}", disk_cfg); 2294 2295 let (virtio_device, migratable_device) = if disk_cfg.vhost_user { 2296 let socket = disk_cfg.vhost_socket.as_ref().unwrap().clone(); 2297 let vu_cfg = VhostUserConfig { 2298 socket, 2299 num_queues: disk_cfg.num_queues, 2300 queue_size: disk_cfg.queue_size, 2301 }; 2302 let vhost_user_block = Arc::new(Mutex::new( 2303 match virtio_devices::vhost_user::Blk::new( 2304 id.clone(), 2305 vu_cfg, 2306 self.seccomp_action.clone(), 2307 self.exit_evt 2308 .try_clone() 2309 .map_err(DeviceManagerError::EventFd)?, 2310 self.force_iommu, 2311 state_from_id(self.snapshot.as_ref(), id.as_str()) 2312 .map_err(DeviceManagerError::RestoreGetState)?, 2313 ) { 2314 Ok(vub_device) => vub_device, 2315 Err(e) => { 2316 return Err(DeviceManagerError::CreateVhostUserBlk(e)); 2317 } 2318 }, 2319 )); 2320 2321 ( 2322 Arc::clone(&vhost_user_block) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2323 vhost_user_block as Arc<Mutex<dyn Migratable>>, 2324 ) 2325 } else { 2326 let mut options = OpenOptions::new(); 2327 options.read(true); 2328 options.write(!disk_cfg.readonly); 2329 if disk_cfg.direct { 2330 options.custom_flags(libc::O_DIRECT); 2331 } 2332 // Open block device path 2333 let mut file: File = options 2334 .open( 2335 disk_cfg 2336 .path 2337 .as_ref() 2338 .ok_or(DeviceManagerError::NoDiskPath)? 2339 .clone(), 2340 ) 2341 .map_err(DeviceManagerError::Disk)?; 2342 let image_type = 2343 detect_image_type(&mut file).map_err(DeviceManagerError::DetectImageType)?; 2344 2345 let image = match image_type { 2346 ImageType::FixedVhd => { 2347 // Use asynchronous backend relying on io_uring if the 2348 // syscalls are supported. 2349 if cfg!(feature = "io_uring") 2350 && !disk_cfg.disable_io_uring 2351 && self.io_uring_is_supported() 2352 { 2353 info!("Using asynchronous fixed VHD disk file (io_uring)"); 2354 2355 #[cfg(not(feature = "io_uring"))] 2356 unreachable!("Checked in if statement above"); 2357 #[cfg(feature = "io_uring")] 2358 { 2359 Box::new( 2360 FixedVhdDiskAsync::new(file) 2361 .map_err(DeviceManagerError::CreateFixedVhdDiskAsync)?, 2362 ) as Box<dyn DiskFile> 2363 } 2364 } else { 2365 info!("Using synchronous fixed VHD disk file"); 2366 Box::new( 2367 FixedVhdDiskSync::new(file) 2368 .map_err(DeviceManagerError::CreateFixedVhdDiskSync)?, 2369 ) as Box<dyn DiskFile> 2370 } 2371 } 2372 ImageType::Raw => { 2373 // Use asynchronous backend relying on io_uring if the 2374 // syscalls are supported. 2375 if cfg!(feature = "io_uring") 2376 && !disk_cfg.disable_io_uring 2377 && self.io_uring_is_supported() 2378 { 2379 info!("Using asynchronous RAW disk file (io_uring)"); 2380 2381 #[cfg(not(feature = "io_uring"))] 2382 unreachable!("Checked in if statement above"); 2383 #[cfg(feature = "io_uring")] 2384 { 2385 Box::new(RawFileDisk::new(file)) as Box<dyn DiskFile> 2386 } 2387 } else if !disk_cfg.disable_aio && self.aio_is_supported() { 2388 info!("Using asynchronous RAW disk file (aio)"); 2389 Box::new(RawFileDiskAio::new(file)) as Box<dyn DiskFile> 2390 } else { 2391 info!("Using synchronous RAW disk file"); 2392 Box::new(RawFileDiskSync::new(file)) as Box<dyn DiskFile> 2393 } 2394 } 2395 ImageType::Qcow2 => { 2396 info!("Using synchronous QCOW disk file"); 2397 Box::new( 2398 QcowDiskSync::new(file, disk_cfg.direct) 2399 .map_err(DeviceManagerError::CreateQcowDiskSync)?, 2400 ) as Box<dyn DiskFile> 2401 } 2402 ImageType::Vhdx => { 2403 info!("Using synchronous VHDX disk file"); 2404 Box::new( 2405 VhdxDiskSync::new(file) 2406 .map_err(DeviceManagerError::CreateFixedVhdxDiskSync)?, 2407 ) as Box<dyn DiskFile> 2408 } 2409 }; 2410 2411 let rate_limit_group = 2412 if let Some(rate_limiter_cfg) = disk_cfg.rate_limiter_config.as_ref() { 2413 // Create an anonymous RateLimiterGroup that is dropped when the Disk 2414 // is dropped. 2415 let bw = rate_limiter_cfg.bandwidth.unwrap_or_default(); 2416 let ops = rate_limiter_cfg.ops.unwrap_or_default(); 2417 let mut rate_limit_group = RateLimiterGroup::new( 2418 disk_cfg.id.as_ref().unwrap(), 2419 bw.size, 2420 bw.one_time_burst.unwrap_or(0), 2421 bw.refill_time, 2422 ops.size, 2423 ops.one_time_burst.unwrap_or(0), 2424 ops.refill_time, 2425 ) 2426 .map_err(DeviceManagerError::RateLimiterGroupCreate)?; 2427 2428 rate_limit_group 2429 .start_thread( 2430 self.exit_evt 2431 .try_clone() 2432 .map_err(DeviceManagerError::EventFd)?, 2433 ) 2434 .unwrap(); 2435 2436 Some(Arc::new(rate_limit_group)) 2437 } else if let Some(rate_limit_group) = disk_cfg.rate_limit_group.as_ref() { 2438 self.rate_limit_groups.get(rate_limit_group).cloned() 2439 } else { 2440 None 2441 }; 2442 2443 let queue_affinity = if let Some(queue_affinity) = disk_cfg.queue_affinity.as_ref() { 2444 queue_affinity 2445 .iter() 2446 .map(|a| (a.queue_index, a.host_cpus.clone())) 2447 .collect() 2448 } else { 2449 BTreeMap::new() 2450 }; 2451 2452 let virtio_block = Arc::new(Mutex::new( 2453 virtio_devices::Block::new( 2454 id.clone(), 2455 image, 2456 disk_cfg 2457 .path 2458 .as_ref() 2459 .ok_or(DeviceManagerError::NoDiskPath)? 2460 .clone(), 2461 disk_cfg.readonly, 2462 self.force_iommu | disk_cfg.iommu, 2463 disk_cfg.num_queues, 2464 disk_cfg.queue_size, 2465 disk_cfg.serial.clone(), 2466 self.seccomp_action.clone(), 2467 rate_limit_group, 2468 self.exit_evt 2469 .try_clone() 2470 .map_err(DeviceManagerError::EventFd)?, 2471 state_from_id(self.snapshot.as_ref(), id.as_str()) 2472 .map_err(DeviceManagerError::RestoreGetState)?, 2473 queue_affinity, 2474 ) 2475 .map_err(DeviceManagerError::CreateVirtioBlock)?, 2476 )); 2477 2478 ( 2479 Arc::clone(&virtio_block) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2480 virtio_block as Arc<Mutex<dyn Migratable>>, 2481 ) 2482 }; 2483 2484 // Fill the device tree with a new node. In case of restore, we 2485 // know there is nothing to do, so we can simply override the 2486 // existing entry. 2487 self.device_tree 2488 .lock() 2489 .unwrap() 2490 .insert(id.clone(), device_node!(id, migratable_device)); 2491 2492 Ok(MetaVirtioDevice { 2493 virtio_device, 2494 iommu: disk_cfg.iommu, 2495 id, 2496 pci_segment: disk_cfg.pci_segment, 2497 dma_handler: None, 2498 }) 2499 } 2500 2501 fn make_virtio_block_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2502 let mut devices = Vec::new(); 2503 2504 let mut block_devices = self.config.lock().unwrap().disks.clone(); 2505 if let Some(disk_list_cfg) = &mut block_devices { 2506 for disk_cfg in disk_list_cfg.iter_mut() { 2507 devices.push(self.make_virtio_block_device(disk_cfg)?); 2508 } 2509 } 2510 self.config.lock().unwrap().disks = block_devices; 2511 2512 Ok(devices) 2513 } 2514 2515 fn make_virtio_net_device( 2516 &mut self, 2517 net_cfg: &mut NetConfig, 2518 ) -> DeviceManagerResult<MetaVirtioDevice> { 2519 let id = if let Some(id) = &net_cfg.id { 2520 id.clone() 2521 } else { 2522 let id = self.next_device_name(NET_DEVICE_NAME_PREFIX)?; 2523 net_cfg.id = Some(id.clone()); 2524 id 2525 }; 2526 info!("Creating virtio-net device: {:?}", net_cfg); 2527 2528 let (virtio_device, migratable_device) = if net_cfg.vhost_user { 2529 let socket = net_cfg.vhost_socket.as_ref().unwrap().clone(); 2530 let vu_cfg = VhostUserConfig { 2531 socket, 2532 num_queues: net_cfg.num_queues, 2533 queue_size: net_cfg.queue_size, 2534 }; 2535 let server = match net_cfg.vhost_mode { 2536 VhostMode::Client => false, 2537 VhostMode::Server => true, 2538 }; 2539 let vhost_user_net = Arc::new(Mutex::new( 2540 match virtio_devices::vhost_user::Net::new( 2541 id.clone(), 2542 net_cfg.mac, 2543 net_cfg.mtu, 2544 vu_cfg, 2545 server, 2546 self.seccomp_action.clone(), 2547 self.exit_evt 2548 .try_clone() 2549 .map_err(DeviceManagerError::EventFd)?, 2550 self.force_iommu, 2551 state_from_id(self.snapshot.as_ref(), id.as_str()) 2552 .map_err(DeviceManagerError::RestoreGetState)?, 2553 net_cfg.offload_tso, 2554 net_cfg.offload_ufo, 2555 net_cfg.offload_csum, 2556 ) { 2557 Ok(vun_device) => vun_device, 2558 Err(e) => { 2559 return Err(DeviceManagerError::CreateVhostUserNet(e)); 2560 } 2561 }, 2562 )); 2563 2564 ( 2565 Arc::clone(&vhost_user_net) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2566 vhost_user_net as Arc<Mutex<dyn Migratable>>, 2567 ) 2568 } else { 2569 let state = state_from_id(self.snapshot.as_ref(), id.as_str()) 2570 .map_err(DeviceManagerError::RestoreGetState)?; 2571 let virtio_net = if let Some(ref tap_if_name) = net_cfg.tap { 2572 Arc::new(Mutex::new( 2573 virtio_devices::Net::new( 2574 id.clone(), 2575 Some(tap_if_name), 2576 Some(net_cfg.ip), 2577 Some(net_cfg.mask), 2578 Some(net_cfg.mac), 2579 &mut net_cfg.host_mac, 2580 net_cfg.mtu, 2581 self.force_iommu | net_cfg.iommu, 2582 net_cfg.num_queues, 2583 net_cfg.queue_size, 2584 self.seccomp_action.clone(), 2585 net_cfg.rate_limiter_config, 2586 self.exit_evt 2587 .try_clone() 2588 .map_err(DeviceManagerError::EventFd)?, 2589 state, 2590 net_cfg.offload_tso, 2591 net_cfg.offload_ufo, 2592 net_cfg.offload_csum, 2593 ) 2594 .map_err(DeviceManagerError::CreateVirtioNet)?, 2595 )) 2596 } else if let Some(fds) = &net_cfg.fds { 2597 let net = virtio_devices::Net::from_tap_fds( 2598 id.clone(), 2599 fds, 2600 Some(net_cfg.mac), 2601 net_cfg.mtu, 2602 self.force_iommu | net_cfg.iommu, 2603 net_cfg.queue_size, 2604 self.seccomp_action.clone(), 2605 net_cfg.rate_limiter_config, 2606 self.exit_evt 2607 .try_clone() 2608 .map_err(DeviceManagerError::EventFd)?, 2609 state, 2610 net_cfg.offload_tso, 2611 net_cfg.offload_ufo, 2612 net_cfg.offload_csum, 2613 ) 2614 .map_err(DeviceManagerError::CreateVirtioNet)?; 2615 2616 // SAFETY: 'fds' are valid because TAP devices are created successfully 2617 unsafe { 2618 self.config.lock().unwrap().add_preserved_fds(fds.clone()); 2619 } 2620 2621 Arc::new(Mutex::new(net)) 2622 } else { 2623 Arc::new(Mutex::new( 2624 virtio_devices::Net::new( 2625 id.clone(), 2626 None, 2627 Some(net_cfg.ip), 2628 Some(net_cfg.mask), 2629 Some(net_cfg.mac), 2630 &mut net_cfg.host_mac, 2631 net_cfg.mtu, 2632 self.force_iommu | net_cfg.iommu, 2633 net_cfg.num_queues, 2634 net_cfg.queue_size, 2635 self.seccomp_action.clone(), 2636 net_cfg.rate_limiter_config, 2637 self.exit_evt 2638 .try_clone() 2639 .map_err(DeviceManagerError::EventFd)?, 2640 state, 2641 net_cfg.offload_tso, 2642 net_cfg.offload_ufo, 2643 net_cfg.offload_csum, 2644 ) 2645 .map_err(DeviceManagerError::CreateVirtioNet)?, 2646 )) 2647 }; 2648 2649 ( 2650 Arc::clone(&virtio_net) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2651 virtio_net as Arc<Mutex<dyn Migratable>>, 2652 ) 2653 }; 2654 2655 // Fill the device tree with a new node. In case of restore, we 2656 // know there is nothing to do, so we can simply override the 2657 // existing entry. 2658 self.device_tree 2659 .lock() 2660 .unwrap() 2661 .insert(id.clone(), device_node!(id, migratable_device)); 2662 2663 Ok(MetaVirtioDevice { 2664 virtio_device, 2665 iommu: net_cfg.iommu, 2666 id, 2667 pci_segment: net_cfg.pci_segment, 2668 dma_handler: None, 2669 }) 2670 } 2671 2672 /// Add virto-net and vhost-user-net devices 2673 fn make_virtio_net_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2674 let mut devices = Vec::new(); 2675 let mut net_devices = self.config.lock().unwrap().net.clone(); 2676 if let Some(net_list_cfg) = &mut net_devices { 2677 for net_cfg in net_list_cfg.iter_mut() { 2678 devices.push(self.make_virtio_net_device(net_cfg)?); 2679 } 2680 } 2681 self.config.lock().unwrap().net = net_devices; 2682 2683 Ok(devices) 2684 } 2685 2686 fn make_virtio_rng_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2687 let mut devices = Vec::new(); 2688 2689 // Add virtio-rng if required 2690 let rng_config = self.config.lock().unwrap().rng.clone(); 2691 if let Some(rng_path) = rng_config.src.to_str() { 2692 info!("Creating virtio-rng device: {:?}", rng_config); 2693 let id = String::from(RNG_DEVICE_NAME); 2694 2695 let virtio_rng_device = Arc::new(Mutex::new( 2696 virtio_devices::Rng::new( 2697 id.clone(), 2698 rng_path, 2699 self.force_iommu | rng_config.iommu, 2700 self.seccomp_action.clone(), 2701 self.exit_evt 2702 .try_clone() 2703 .map_err(DeviceManagerError::EventFd)?, 2704 state_from_id(self.snapshot.as_ref(), id.as_str()) 2705 .map_err(DeviceManagerError::RestoreGetState)?, 2706 ) 2707 .map_err(DeviceManagerError::CreateVirtioRng)?, 2708 )); 2709 devices.push(MetaVirtioDevice { 2710 virtio_device: Arc::clone(&virtio_rng_device) 2711 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2712 iommu: rng_config.iommu, 2713 id: id.clone(), 2714 pci_segment: 0, 2715 dma_handler: None, 2716 }); 2717 2718 // Fill the device tree with a new node. In case of restore, we 2719 // know there is nothing to do, so we can simply override the 2720 // existing entry. 2721 self.device_tree 2722 .lock() 2723 .unwrap() 2724 .insert(id.clone(), device_node!(id, virtio_rng_device)); 2725 } 2726 2727 Ok(devices) 2728 } 2729 2730 fn make_virtio_fs_device( 2731 &mut self, 2732 fs_cfg: &mut FsConfig, 2733 ) -> DeviceManagerResult<MetaVirtioDevice> { 2734 let id = if let Some(id) = &fs_cfg.id { 2735 id.clone() 2736 } else { 2737 let id = self.next_device_name(FS_DEVICE_NAME_PREFIX)?; 2738 fs_cfg.id = Some(id.clone()); 2739 id 2740 }; 2741 2742 info!("Creating virtio-fs device: {:?}", fs_cfg); 2743 2744 let mut node = device_node!(id); 2745 2746 if let Some(fs_socket) = fs_cfg.socket.to_str() { 2747 let virtio_fs_device = Arc::new(Mutex::new( 2748 virtio_devices::vhost_user::Fs::new( 2749 id.clone(), 2750 fs_socket, 2751 &fs_cfg.tag, 2752 fs_cfg.num_queues, 2753 fs_cfg.queue_size, 2754 None, 2755 self.seccomp_action.clone(), 2756 self.exit_evt 2757 .try_clone() 2758 .map_err(DeviceManagerError::EventFd)?, 2759 self.force_iommu, 2760 state_from_id(self.snapshot.as_ref(), id.as_str()) 2761 .map_err(DeviceManagerError::RestoreGetState)?, 2762 ) 2763 .map_err(DeviceManagerError::CreateVirtioFs)?, 2764 )); 2765 2766 // Update the device tree with the migratable device. 2767 node.migratable = Some(Arc::clone(&virtio_fs_device) as Arc<Mutex<dyn Migratable>>); 2768 self.device_tree.lock().unwrap().insert(id.clone(), node); 2769 2770 Ok(MetaVirtioDevice { 2771 virtio_device: Arc::clone(&virtio_fs_device) 2772 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2773 iommu: false, 2774 id, 2775 pci_segment: fs_cfg.pci_segment, 2776 dma_handler: None, 2777 }) 2778 } else { 2779 Err(DeviceManagerError::NoVirtioFsSock) 2780 } 2781 } 2782 2783 fn make_virtio_fs_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2784 let mut devices = Vec::new(); 2785 2786 let mut fs_devices = self.config.lock().unwrap().fs.clone(); 2787 if let Some(fs_list_cfg) = &mut fs_devices { 2788 for fs_cfg in fs_list_cfg.iter_mut() { 2789 devices.push(self.make_virtio_fs_device(fs_cfg)?); 2790 } 2791 } 2792 self.config.lock().unwrap().fs = fs_devices; 2793 2794 Ok(devices) 2795 } 2796 2797 fn make_virtio_pmem_device( 2798 &mut self, 2799 pmem_cfg: &mut PmemConfig, 2800 ) -> DeviceManagerResult<MetaVirtioDevice> { 2801 let id = if let Some(id) = &pmem_cfg.id { 2802 id.clone() 2803 } else { 2804 let id = self.next_device_name(PMEM_DEVICE_NAME_PREFIX)?; 2805 pmem_cfg.id = Some(id.clone()); 2806 id 2807 }; 2808 2809 info!("Creating virtio-pmem device: {:?}", pmem_cfg); 2810 2811 let mut node = device_node!(id); 2812 2813 // Look for the id in the device tree. If it can be found, that means 2814 // the device is being restored, otherwise it's created from scratch. 2815 let region_range = if let Some(node) = self.device_tree.lock().unwrap().get(&id) { 2816 info!("Restoring virtio-pmem {} resources", id); 2817 2818 let mut region_range: Option<(u64, u64)> = None; 2819 for resource in node.resources.iter() { 2820 match resource { 2821 Resource::MmioAddressRange { base, size } => { 2822 if region_range.is_some() { 2823 return Err(DeviceManagerError::ResourceAlreadyExists); 2824 } 2825 2826 region_range = Some((*base, *size)); 2827 } 2828 _ => { 2829 error!("Unexpected resource {:?} for {}", resource, id); 2830 } 2831 } 2832 } 2833 2834 if region_range.is_none() { 2835 return Err(DeviceManagerError::MissingVirtioPmemResources); 2836 } 2837 2838 region_range 2839 } else { 2840 None 2841 }; 2842 2843 let (custom_flags, set_len) = if pmem_cfg.file.is_dir() { 2844 if pmem_cfg.size.is_none() { 2845 return Err(DeviceManagerError::PmemWithDirectorySizeMissing); 2846 } 2847 (O_TMPFILE, true) 2848 } else { 2849 (0, false) 2850 }; 2851 2852 let mut file = OpenOptions::new() 2853 .read(true) 2854 .write(!pmem_cfg.discard_writes) 2855 .custom_flags(custom_flags) 2856 .open(&pmem_cfg.file) 2857 .map_err(DeviceManagerError::PmemFileOpen)?; 2858 2859 let size = if let Some(size) = pmem_cfg.size { 2860 if set_len { 2861 file.set_len(size) 2862 .map_err(DeviceManagerError::PmemFileSetLen)?; 2863 } 2864 size 2865 } else { 2866 file.seek(SeekFrom::End(0)) 2867 .map_err(DeviceManagerError::PmemFileSetLen)? 2868 }; 2869 2870 if size % 0x20_0000 != 0 { 2871 return Err(DeviceManagerError::PmemSizeNotAligned); 2872 } 2873 2874 let (region_base, region_size) = if let Some((base, size)) = region_range { 2875 // The memory needs to be 2MiB aligned in order to support 2876 // hugepages. 2877 self.pci_segments[pmem_cfg.pci_segment as usize] 2878 .mem64_allocator 2879 .lock() 2880 .unwrap() 2881 .allocate( 2882 Some(GuestAddress(base)), 2883 size as GuestUsize, 2884 Some(0x0020_0000), 2885 ) 2886 .ok_or(DeviceManagerError::PmemRangeAllocation)?; 2887 2888 (base, size) 2889 } else { 2890 // The memory needs to be 2MiB aligned in order to support 2891 // hugepages. 2892 let base = self.pci_segments[pmem_cfg.pci_segment as usize] 2893 .mem64_allocator 2894 .lock() 2895 .unwrap() 2896 .allocate(None, size as GuestUsize, Some(0x0020_0000)) 2897 .ok_or(DeviceManagerError::PmemRangeAllocation)?; 2898 2899 (base.raw_value(), size) 2900 }; 2901 2902 let cloned_file = file.try_clone().map_err(DeviceManagerError::CloneFile)?; 2903 let mmap_region = MmapRegion::build( 2904 Some(FileOffset::new(cloned_file, 0)), 2905 region_size as usize, 2906 PROT_READ | PROT_WRITE, 2907 MAP_NORESERVE 2908 | if pmem_cfg.discard_writes { 2909 MAP_PRIVATE 2910 } else { 2911 MAP_SHARED 2912 }, 2913 ) 2914 .map_err(DeviceManagerError::NewMmapRegion)?; 2915 let host_addr: u64 = mmap_region.as_ptr() as u64; 2916 2917 let mem_slot = self 2918 .memory_manager 2919 .lock() 2920 .unwrap() 2921 .create_userspace_mapping(region_base, region_size, host_addr, false, false, false) 2922 .map_err(DeviceManagerError::MemoryManager)?; 2923 2924 let mapping = virtio_devices::UserspaceMapping { 2925 host_addr, 2926 mem_slot, 2927 addr: GuestAddress(region_base), 2928 len: region_size, 2929 mergeable: false, 2930 }; 2931 2932 let virtio_pmem_device = Arc::new(Mutex::new( 2933 virtio_devices::Pmem::new( 2934 id.clone(), 2935 file, 2936 GuestAddress(region_base), 2937 mapping, 2938 mmap_region, 2939 self.force_iommu | pmem_cfg.iommu, 2940 self.seccomp_action.clone(), 2941 self.exit_evt 2942 .try_clone() 2943 .map_err(DeviceManagerError::EventFd)?, 2944 state_from_id(self.snapshot.as_ref(), id.as_str()) 2945 .map_err(DeviceManagerError::RestoreGetState)?, 2946 ) 2947 .map_err(DeviceManagerError::CreateVirtioPmem)?, 2948 )); 2949 2950 // Update the device tree with correct resource information and with 2951 // the migratable device. 2952 node.resources.push(Resource::MmioAddressRange { 2953 base: region_base, 2954 size: region_size, 2955 }); 2956 node.migratable = Some(Arc::clone(&virtio_pmem_device) as Arc<Mutex<dyn Migratable>>); 2957 self.device_tree.lock().unwrap().insert(id.clone(), node); 2958 2959 Ok(MetaVirtioDevice { 2960 virtio_device: Arc::clone(&virtio_pmem_device) 2961 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2962 iommu: pmem_cfg.iommu, 2963 id, 2964 pci_segment: pmem_cfg.pci_segment, 2965 dma_handler: None, 2966 }) 2967 } 2968 2969 fn make_virtio_pmem_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2970 let mut devices = Vec::new(); 2971 // Add virtio-pmem if required 2972 let mut pmem_devices = self.config.lock().unwrap().pmem.clone(); 2973 if let Some(pmem_list_cfg) = &mut pmem_devices { 2974 for pmem_cfg in pmem_list_cfg.iter_mut() { 2975 devices.push(self.make_virtio_pmem_device(pmem_cfg)?); 2976 } 2977 } 2978 self.config.lock().unwrap().pmem = pmem_devices; 2979 2980 Ok(devices) 2981 } 2982 2983 fn make_virtio_vsock_device( 2984 &mut self, 2985 vsock_cfg: &mut VsockConfig, 2986 ) -> DeviceManagerResult<MetaVirtioDevice> { 2987 let id = if let Some(id) = &vsock_cfg.id { 2988 id.clone() 2989 } else { 2990 let id = self.next_device_name(VSOCK_DEVICE_NAME_PREFIX)?; 2991 vsock_cfg.id = Some(id.clone()); 2992 id 2993 }; 2994 2995 info!("Creating virtio-vsock device: {:?}", vsock_cfg); 2996 2997 let socket_path = vsock_cfg 2998 .socket 2999 .to_str() 3000 .ok_or(DeviceManagerError::CreateVsockConvertPath)?; 3001 let backend = 3002 virtio_devices::vsock::VsockUnixBackend::new(vsock_cfg.cid, socket_path.to_string()) 3003 .map_err(DeviceManagerError::CreateVsockBackend)?; 3004 3005 let vsock_device = Arc::new(Mutex::new( 3006 virtio_devices::Vsock::new( 3007 id.clone(), 3008 vsock_cfg.cid, 3009 vsock_cfg.socket.clone(), 3010 backend, 3011 self.force_iommu | vsock_cfg.iommu, 3012 self.seccomp_action.clone(), 3013 self.exit_evt 3014 .try_clone() 3015 .map_err(DeviceManagerError::EventFd)?, 3016 state_from_id(self.snapshot.as_ref(), id.as_str()) 3017 .map_err(DeviceManagerError::RestoreGetState)?, 3018 ) 3019 .map_err(DeviceManagerError::CreateVirtioVsock)?, 3020 )); 3021 3022 // Fill the device tree with a new node. In case of restore, we 3023 // know there is nothing to do, so we can simply override the 3024 // existing entry. 3025 self.device_tree 3026 .lock() 3027 .unwrap() 3028 .insert(id.clone(), device_node!(id, vsock_device)); 3029 3030 Ok(MetaVirtioDevice { 3031 virtio_device: Arc::clone(&vsock_device) 3032 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 3033 iommu: vsock_cfg.iommu, 3034 id, 3035 pci_segment: vsock_cfg.pci_segment, 3036 dma_handler: None, 3037 }) 3038 } 3039 3040 fn make_virtio_vsock_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 3041 let mut devices = Vec::new(); 3042 3043 let mut vsock = self.config.lock().unwrap().vsock.clone(); 3044 if let Some(ref mut vsock_cfg) = &mut vsock { 3045 devices.push(self.make_virtio_vsock_device(vsock_cfg)?); 3046 } 3047 self.config.lock().unwrap().vsock = vsock; 3048 3049 Ok(devices) 3050 } 3051 3052 fn make_virtio_mem_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 3053 let mut devices = Vec::new(); 3054 3055 let mm = self.memory_manager.clone(); 3056 let mut mm = mm.lock().unwrap(); 3057 for (memory_zone_id, memory_zone) in mm.memory_zones_mut().iter_mut() { 3058 if let Some(virtio_mem_zone) = memory_zone.virtio_mem_zone_mut() { 3059 info!("Creating virtio-mem device: id = {}", memory_zone_id); 3060 3061 let node_id = numa_node_id_from_memory_zone_id(&self.numa_nodes, memory_zone_id) 3062 .map(|i| i as u16); 3063 3064 let virtio_mem_device = Arc::new(Mutex::new( 3065 virtio_devices::Mem::new( 3066 memory_zone_id.clone(), 3067 virtio_mem_zone.region(), 3068 self.seccomp_action.clone(), 3069 node_id, 3070 virtio_mem_zone.hotplugged_size(), 3071 virtio_mem_zone.hugepages(), 3072 self.exit_evt 3073 .try_clone() 3074 .map_err(DeviceManagerError::EventFd)?, 3075 virtio_mem_zone.blocks_state().clone(), 3076 state_from_id(self.snapshot.as_ref(), memory_zone_id.as_str()) 3077 .map_err(DeviceManagerError::RestoreGetState)?, 3078 ) 3079 .map_err(DeviceManagerError::CreateVirtioMem)?, 3080 )); 3081 3082 // Update the virtio-mem zone so that it has a handle onto the 3083 // virtio-mem device, which will be used for triggering a resize 3084 // if needed. 3085 virtio_mem_zone.set_virtio_device(Arc::clone(&virtio_mem_device)); 3086 3087 self.virtio_mem_devices.push(Arc::clone(&virtio_mem_device)); 3088 3089 devices.push(MetaVirtioDevice { 3090 virtio_device: Arc::clone(&virtio_mem_device) 3091 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 3092 iommu: false, 3093 id: memory_zone_id.clone(), 3094 pci_segment: 0, 3095 dma_handler: None, 3096 }); 3097 3098 // Fill the device tree with a new node. In case of restore, we 3099 // know there is nothing to do, so we can simply override the 3100 // existing entry. 3101 self.device_tree.lock().unwrap().insert( 3102 memory_zone_id.clone(), 3103 device_node!(memory_zone_id, virtio_mem_device), 3104 ); 3105 } 3106 } 3107 3108 Ok(devices) 3109 } 3110 3111 #[cfg(feature = "pvmemcontrol")] 3112 fn make_pvmemcontrol_device( 3113 &mut self, 3114 ) -> DeviceManagerResult<( 3115 Arc<PvmemcontrolBusDevice>, 3116 Arc<Mutex<PvmemcontrolPciDevice>>, 3117 )> { 3118 let id = String::from(PVMEMCONTROL_DEVICE_NAME); 3119 let pci_segment_id = 0x0_u16; 3120 3121 let (pci_segment_id, pci_device_bdf, resources) = 3122 self.pci_resources(&id, pci_segment_id)?; 3123 3124 info!("Creating pvmemcontrol device: id = {}", id); 3125 let (pvmemcontrol_pci_device, pvmemcontrol_bus_device) = 3126 devices::pvmemcontrol::PvmemcontrolDevice::make_device( 3127 id.clone(), 3128 self.memory_manager.lock().unwrap().guest_memory(), 3129 ); 3130 3131 let pvmemcontrol_pci_device = Arc::new(Mutex::new(pvmemcontrol_pci_device)); 3132 let pvmemcontrol_bus_device = Arc::new(pvmemcontrol_bus_device); 3133 3134 let new_resources = self.add_pci_device( 3135 pvmemcontrol_bus_device.clone(), 3136 pvmemcontrol_pci_device.clone(), 3137 pci_segment_id, 3138 pci_device_bdf, 3139 resources, 3140 )?; 3141 3142 let mut node = device_node!(id, pvmemcontrol_pci_device); 3143 3144 node.resources = new_resources; 3145 node.pci_bdf = Some(pci_device_bdf); 3146 node.pci_device_handle = None; 3147 3148 self.device_tree.lock().unwrap().insert(id, node); 3149 3150 Ok((pvmemcontrol_bus_device, pvmemcontrol_pci_device)) 3151 } 3152 3153 fn make_virtio_balloon_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 3154 let mut devices = Vec::new(); 3155 3156 if let Some(balloon_config) = &self.config.lock().unwrap().balloon { 3157 let id = String::from(BALLOON_DEVICE_NAME); 3158 info!("Creating virtio-balloon device: id = {}", id); 3159 3160 let virtio_balloon_device = Arc::new(Mutex::new( 3161 virtio_devices::Balloon::new( 3162 id.clone(), 3163 balloon_config.size, 3164 balloon_config.deflate_on_oom, 3165 balloon_config.free_page_reporting, 3166 self.seccomp_action.clone(), 3167 self.exit_evt 3168 .try_clone() 3169 .map_err(DeviceManagerError::EventFd)?, 3170 state_from_id(self.snapshot.as_ref(), id.as_str()) 3171 .map_err(DeviceManagerError::RestoreGetState)?, 3172 ) 3173 .map_err(DeviceManagerError::CreateVirtioBalloon)?, 3174 )); 3175 3176 self.balloon = Some(virtio_balloon_device.clone()); 3177 3178 devices.push(MetaVirtioDevice { 3179 virtio_device: Arc::clone(&virtio_balloon_device) 3180 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 3181 iommu: false, 3182 id: id.clone(), 3183 pci_segment: 0, 3184 dma_handler: None, 3185 }); 3186 3187 self.device_tree 3188 .lock() 3189 .unwrap() 3190 .insert(id.clone(), device_node!(id, virtio_balloon_device)); 3191 } 3192 3193 Ok(devices) 3194 } 3195 3196 fn make_virtio_watchdog_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 3197 let mut devices = Vec::new(); 3198 3199 if !self.config.lock().unwrap().watchdog { 3200 return Ok(devices); 3201 } 3202 3203 let id = String::from(WATCHDOG_DEVICE_NAME); 3204 info!("Creating virtio-watchdog device: id = {}", id); 3205 3206 let virtio_watchdog_device = Arc::new(Mutex::new( 3207 virtio_devices::Watchdog::new( 3208 id.clone(), 3209 self.reset_evt.try_clone().unwrap(), 3210 self.seccomp_action.clone(), 3211 self.exit_evt 3212 .try_clone() 3213 .map_err(DeviceManagerError::EventFd)?, 3214 state_from_id(self.snapshot.as_ref(), id.as_str()) 3215 .map_err(DeviceManagerError::RestoreGetState)?, 3216 ) 3217 .map_err(DeviceManagerError::CreateVirtioWatchdog)?, 3218 )); 3219 devices.push(MetaVirtioDevice { 3220 virtio_device: Arc::clone(&virtio_watchdog_device) 3221 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 3222 iommu: false, 3223 id: id.clone(), 3224 pci_segment: 0, 3225 dma_handler: None, 3226 }); 3227 3228 self.device_tree 3229 .lock() 3230 .unwrap() 3231 .insert(id.clone(), device_node!(id, virtio_watchdog_device)); 3232 3233 Ok(devices) 3234 } 3235 3236 fn make_vdpa_device( 3237 &mut self, 3238 vdpa_cfg: &mut VdpaConfig, 3239 ) -> DeviceManagerResult<MetaVirtioDevice> { 3240 let id = if let Some(id) = &vdpa_cfg.id { 3241 id.clone() 3242 } else { 3243 let id = self.next_device_name(VDPA_DEVICE_NAME_PREFIX)?; 3244 vdpa_cfg.id = Some(id.clone()); 3245 id 3246 }; 3247 3248 info!("Creating vDPA device: {:?}", vdpa_cfg); 3249 3250 let device_path = vdpa_cfg 3251 .path 3252 .to_str() 3253 .ok_or(DeviceManagerError::CreateVdpaConvertPath)?; 3254 3255 let vdpa_device = Arc::new(Mutex::new( 3256 virtio_devices::Vdpa::new( 3257 id.clone(), 3258 device_path, 3259 self.memory_manager.lock().unwrap().guest_memory(), 3260 vdpa_cfg.num_queues as u16, 3261 state_from_id(self.snapshot.as_ref(), id.as_str()) 3262 .map_err(DeviceManagerError::RestoreGetState)?, 3263 ) 3264 .map_err(DeviceManagerError::CreateVdpa)?, 3265 )); 3266 3267 // Create the DMA handler that is required by the vDPA device 3268 let vdpa_mapping = Arc::new(VdpaDmaMapping::new( 3269 Arc::clone(&vdpa_device), 3270 Arc::new(self.memory_manager.lock().unwrap().guest_memory()), 3271 )); 3272 3273 self.device_tree 3274 .lock() 3275 .unwrap() 3276 .insert(id.clone(), device_node!(id, vdpa_device)); 3277 3278 Ok(MetaVirtioDevice { 3279 virtio_device: vdpa_device as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 3280 iommu: vdpa_cfg.iommu, 3281 id, 3282 pci_segment: vdpa_cfg.pci_segment, 3283 dma_handler: Some(vdpa_mapping), 3284 }) 3285 } 3286 3287 fn make_vdpa_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 3288 let mut devices = Vec::new(); 3289 // Add vdpa if required 3290 let mut vdpa_devices = self.config.lock().unwrap().vdpa.clone(); 3291 if let Some(vdpa_list_cfg) = &mut vdpa_devices { 3292 for vdpa_cfg in vdpa_list_cfg.iter_mut() { 3293 devices.push(self.make_vdpa_device(vdpa_cfg)?); 3294 } 3295 } 3296 self.config.lock().unwrap().vdpa = vdpa_devices; 3297 3298 Ok(devices) 3299 } 3300 3301 fn next_device_name(&mut self, prefix: &str) -> DeviceManagerResult<String> { 3302 let start_id = self.device_id_cnt; 3303 loop { 3304 // Generate the temporary name. 3305 let name = format!("{}{}", prefix, self.device_id_cnt); 3306 // Increment the counter. 3307 self.device_id_cnt += Wrapping(1); 3308 // Check if the name is already in use. 3309 if !self.boot_id_list.contains(&name) 3310 && !self.device_tree.lock().unwrap().contains_key(&name) 3311 { 3312 return Ok(name); 3313 } 3314 3315 if self.device_id_cnt == start_id { 3316 // We went through a full loop and there's nothing else we can 3317 // do. 3318 break; 3319 } 3320 } 3321 Err(DeviceManagerError::NoAvailableDeviceName) 3322 } 3323 3324 fn add_passthrough_device( 3325 &mut self, 3326 device_cfg: &mut DeviceConfig, 3327 ) -> DeviceManagerResult<(PciBdf, String)> { 3328 // If the passthrough device has not been created yet, it is created 3329 // here and stored in the DeviceManager structure for future needs. 3330 if self.passthrough_device.is_none() { 3331 self.passthrough_device = Some( 3332 self.address_manager 3333 .vm 3334 .create_passthrough_device() 3335 .map_err(|e| DeviceManagerError::CreatePassthroughDevice(e.into()))?, 3336 ); 3337 } 3338 3339 self.add_vfio_device(device_cfg) 3340 } 3341 3342 fn create_vfio_container(&self) -> DeviceManagerResult<Arc<VfioContainer>> { 3343 let passthrough_device = self 3344 .passthrough_device 3345 .as_ref() 3346 .ok_or(DeviceManagerError::NoDevicePassthroughSupport)?; 3347 3348 let dup = passthrough_device 3349 .try_clone() 3350 .map_err(DeviceManagerError::VfioCreate)?; 3351 3352 Ok(Arc::new( 3353 VfioContainer::new(Some(Arc::new(dup))).map_err(DeviceManagerError::VfioCreate)?, 3354 )) 3355 } 3356 3357 fn add_vfio_device( 3358 &mut self, 3359 device_cfg: &mut DeviceConfig, 3360 ) -> DeviceManagerResult<(PciBdf, String)> { 3361 let vfio_name = if let Some(id) = &device_cfg.id { 3362 id.clone() 3363 } else { 3364 let id = self.next_device_name(VFIO_DEVICE_NAME_PREFIX)?; 3365 device_cfg.id = Some(id.clone()); 3366 id 3367 }; 3368 3369 let (pci_segment_id, pci_device_bdf, resources) = 3370 self.pci_resources(&vfio_name, device_cfg.pci_segment)?; 3371 3372 let mut needs_dma_mapping = false; 3373 3374 // Here we create a new VFIO container for two reasons. Either this is 3375 // the first VFIO device, meaning we need a new VFIO container, which 3376 // will be shared with other VFIO devices. Or the new VFIO device is 3377 // attached to a vIOMMU, meaning we must create a dedicated VFIO 3378 // container. In the vIOMMU use case, we can't let all devices under 3379 // the same VFIO container since we couldn't map/unmap memory for each 3380 // device. That's simply because the map/unmap operations happen at the 3381 // VFIO container level. 3382 let vfio_container = if device_cfg.iommu { 3383 let vfio_container = self.create_vfio_container()?; 3384 3385 let vfio_mapping = Arc::new(VfioDmaMapping::new( 3386 Arc::clone(&vfio_container), 3387 Arc::new(self.memory_manager.lock().unwrap().guest_memory()), 3388 Arc::clone(&self.mmio_regions), 3389 )); 3390 3391 if let Some(iommu) = &self.iommu_device { 3392 iommu 3393 .lock() 3394 .unwrap() 3395 .add_external_mapping(pci_device_bdf.into(), vfio_mapping); 3396 } else { 3397 return Err(DeviceManagerError::MissingVirtualIommu); 3398 } 3399 3400 vfio_container 3401 } else if let Some(vfio_container) = &self.vfio_container { 3402 Arc::clone(vfio_container) 3403 } else { 3404 let vfio_container = self.create_vfio_container()?; 3405 needs_dma_mapping = true; 3406 self.vfio_container = Some(Arc::clone(&vfio_container)); 3407 3408 vfio_container 3409 }; 3410 3411 let vfio_device = VfioDevice::new(&device_cfg.path, Arc::clone(&vfio_container)) 3412 .map_err(DeviceManagerError::VfioCreate)?; 3413 3414 if needs_dma_mapping { 3415 // Register DMA mapping in IOMMU. 3416 // Do not register virtio-mem regions, as they are handled directly by 3417 // virtio-mem device itself. 3418 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() { 3419 for region in zone.regions() { 3420 vfio_container 3421 .vfio_dma_map( 3422 region.start_addr().raw_value(), 3423 region.len(), 3424 region.as_ptr() as u64, 3425 ) 3426 .map_err(DeviceManagerError::VfioDmaMap)?; 3427 } 3428 } 3429 3430 let vfio_mapping = Arc::new(VfioDmaMapping::new( 3431 Arc::clone(&vfio_container), 3432 Arc::new(self.memory_manager.lock().unwrap().guest_memory()), 3433 Arc::clone(&self.mmio_regions), 3434 )); 3435 3436 for virtio_mem_device in self.virtio_mem_devices.iter() { 3437 virtio_mem_device 3438 .lock() 3439 .unwrap() 3440 .add_dma_mapping_handler( 3441 VirtioMemMappingSource::Container, 3442 vfio_mapping.clone(), 3443 ) 3444 .map_err(DeviceManagerError::AddDmaMappingHandlerVirtioMem)?; 3445 } 3446 } 3447 3448 let legacy_interrupt_group = 3449 if let Some(legacy_interrupt_manager) = &self.legacy_interrupt_manager { 3450 Some( 3451 legacy_interrupt_manager 3452 .create_group(LegacyIrqGroupConfig { 3453 irq: self.pci_segments[pci_segment_id as usize].pci_irq_slots 3454 [pci_device_bdf.device() as usize] 3455 as InterruptIndex, 3456 }) 3457 .map_err(DeviceManagerError::CreateInterruptGroup)?, 3458 ) 3459 } else { 3460 None 3461 }; 3462 3463 let memory_manager = self.memory_manager.clone(); 3464 3465 let vfio_pci_device = VfioPciDevice::new( 3466 vfio_name.clone(), 3467 &self.address_manager.vm, 3468 vfio_device, 3469 vfio_container, 3470 self.msi_interrupt_manager.clone(), 3471 legacy_interrupt_group, 3472 device_cfg.iommu, 3473 pci_device_bdf, 3474 Arc::new(move || memory_manager.lock().unwrap().allocate_memory_slot()), 3475 vm_migration::snapshot_from_id(self.snapshot.as_ref(), vfio_name.as_str()), 3476 device_cfg.x_nv_gpudirect_clique, 3477 ) 3478 .map_err(DeviceManagerError::VfioPciCreate)?; 3479 3480 let vfio_pci_device = Arc::new(Mutex::new(vfio_pci_device)); 3481 3482 let new_resources = self.add_pci_device( 3483 vfio_pci_device.clone(), 3484 vfio_pci_device.clone(), 3485 pci_segment_id, 3486 pci_device_bdf, 3487 resources, 3488 )?; 3489 3490 vfio_pci_device 3491 .lock() 3492 .unwrap() 3493 .map_mmio_regions() 3494 .map_err(DeviceManagerError::VfioMapRegion)?; 3495 3496 for mmio_region in vfio_pci_device.lock().unwrap().mmio_regions() { 3497 self.mmio_regions.lock().unwrap().push(mmio_region); 3498 } 3499 3500 let mut node = device_node!(vfio_name, vfio_pci_device); 3501 3502 // Update the device tree with correct resource information. 3503 node.resources = new_resources; 3504 node.pci_bdf = Some(pci_device_bdf); 3505 node.pci_device_handle = Some(PciDeviceHandle::Vfio(vfio_pci_device)); 3506 3507 self.device_tree 3508 .lock() 3509 .unwrap() 3510 .insert(vfio_name.clone(), node); 3511 3512 Ok((pci_device_bdf, vfio_name)) 3513 } 3514 3515 fn add_pci_device( 3516 &mut self, 3517 bus_device: Arc<dyn BusDeviceSync>, 3518 pci_device: Arc<Mutex<dyn PciDevice>>, 3519 segment_id: u16, 3520 bdf: PciBdf, 3521 resources: Option<Vec<Resource>>, 3522 ) -> DeviceManagerResult<Vec<Resource>> { 3523 let bars = pci_device 3524 .lock() 3525 .unwrap() 3526 .allocate_bars( 3527 &self.address_manager.allocator, 3528 &mut self.pci_segments[segment_id as usize] 3529 .mem32_allocator 3530 .lock() 3531 .unwrap(), 3532 &mut self.pci_segments[segment_id as usize] 3533 .mem64_allocator 3534 .lock() 3535 .unwrap(), 3536 resources, 3537 ) 3538 .map_err(DeviceManagerError::AllocateBars)?; 3539 3540 let mut pci_bus = self.pci_segments[segment_id as usize] 3541 .pci_bus 3542 .lock() 3543 .unwrap(); 3544 3545 pci_bus 3546 .add_device(bdf.device() as u32, pci_device) 3547 .map_err(DeviceManagerError::AddPciDevice)?; 3548 3549 self.bus_devices.push(Arc::clone(&bus_device)); 3550 3551 pci_bus 3552 .register_mapping( 3553 bus_device, 3554 #[cfg(target_arch = "x86_64")] 3555 self.address_manager.io_bus.as_ref(), 3556 self.address_manager.mmio_bus.as_ref(), 3557 bars.clone(), 3558 ) 3559 .map_err(DeviceManagerError::AddPciDevice)?; 3560 3561 let mut new_resources = Vec::new(); 3562 for bar in bars { 3563 new_resources.push(Resource::PciBar { 3564 index: bar.idx(), 3565 base: bar.addr(), 3566 size: bar.size(), 3567 type_: bar.region_type().into(), 3568 prefetchable: bar.prefetchable().into(), 3569 }); 3570 } 3571 3572 Ok(new_resources) 3573 } 3574 3575 fn add_vfio_devices(&mut self) -> DeviceManagerResult<Vec<PciBdf>> { 3576 let mut iommu_attached_device_ids = Vec::new(); 3577 let mut devices = self.config.lock().unwrap().devices.clone(); 3578 3579 if let Some(device_list_cfg) = &mut devices { 3580 for device_cfg in device_list_cfg.iter_mut() { 3581 let (device_id, _) = self.add_passthrough_device(device_cfg)?; 3582 if device_cfg.iommu && self.iommu_device.is_some() { 3583 iommu_attached_device_ids.push(device_id); 3584 } 3585 } 3586 } 3587 3588 // Update the list of devices 3589 self.config.lock().unwrap().devices = devices; 3590 3591 Ok(iommu_attached_device_ids) 3592 } 3593 3594 fn add_vfio_user_device( 3595 &mut self, 3596 device_cfg: &mut UserDeviceConfig, 3597 ) -> DeviceManagerResult<(PciBdf, String)> { 3598 let vfio_user_name = if let Some(id) = &device_cfg.id { 3599 id.clone() 3600 } else { 3601 let id = self.next_device_name(VFIO_USER_DEVICE_NAME_PREFIX)?; 3602 device_cfg.id = Some(id.clone()); 3603 id 3604 }; 3605 3606 let (pci_segment_id, pci_device_bdf, resources) = 3607 self.pci_resources(&vfio_user_name, device_cfg.pci_segment)?; 3608 3609 let legacy_interrupt_group = 3610 if let Some(legacy_interrupt_manager) = &self.legacy_interrupt_manager { 3611 Some( 3612 legacy_interrupt_manager 3613 .create_group(LegacyIrqGroupConfig { 3614 irq: self.pci_segments[pci_segment_id as usize].pci_irq_slots 3615 [pci_device_bdf.device() as usize] 3616 as InterruptIndex, 3617 }) 3618 .map_err(DeviceManagerError::CreateInterruptGroup)?, 3619 ) 3620 } else { 3621 None 3622 }; 3623 3624 let client = Arc::new(Mutex::new( 3625 vfio_user::Client::new(&device_cfg.socket) 3626 .map_err(DeviceManagerError::VfioUserCreateClient)?, 3627 )); 3628 3629 let memory_manager = self.memory_manager.clone(); 3630 3631 let mut vfio_user_pci_device = VfioUserPciDevice::new( 3632 vfio_user_name.clone(), 3633 &self.address_manager.vm, 3634 client.clone(), 3635 self.msi_interrupt_manager.clone(), 3636 legacy_interrupt_group, 3637 pci_device_bdf, 3638 Arc::new(move || memory_manager.lock().unwrap().allocate_memory_slot()), 3639 vm_migration::snapshot_from_id(self.snapshot.as_ref(), vfio_user_name.as_str()), 3640 ) 3641 .map_err(DeviceManagerError::VfioUserCreate)?; 3642 3643 let memory = self.memory_manager.lock().unwrap().guest_memory(); 3644 let vfio_user_mapping = Arc::new(VfioUserDmaMapping::new(client, Arc::new(memory))); 3645 for virtio_mem_device in self.virtio_mem_devices.iter() { 3646 virtio_mem_device 3647 .lock() 3648 .unwrap() 3649 .add_dma_mapping_handler( 3650 VirtioMemMappingSource::Device(pci_device_bdf.into()), 3651 vfio_user_mapping.clone(), 3652 ) 3653 .map_err(DeviceManagerError::AddDmaMappingHandlerVirtioMem)?; 3654 } 3655 3656 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() { 3657 for region in zone.regions() { 3658 vfio_user_pci_device 3659 .dma_map(region) 3660 .map_err(DeviceManagerError::VfioUserDmaMap)?; 3661 } 3662 } 3663 3664 let vfio_user_pci_device = Arc::new(Mutex::new(vfio_user_pci_device)); 3665 3666 let new_resources = self.add_pci_device( 3667 vfio_user_pci_device.clone(), 3668 vfio_user_pci_device.clone(), 3669 pci_segment_id, 3670 pci_device_bdf, 3671 resources, 3672 )?; 3673 3674 // Note it is required to call 'add_pci_device()' in advance to have the list of 3675 // mmio regions provisioned correctly 3676 vfio_user_pci_device 3677 .lock() 3678 .unwrap() 3679 .map_mmio_regions() 3680 .map_err(DeviceManagerError::VfioUserMapRegion)?; 3681 3682 let mut node = device_node!(vfio_user_name, vfio_user_pci_device); 3683 3684 // Update the device tree with correct resource information. 3685 node.resources = new_resources; 3686 node.pci_bdf = Some(pci_device_bdf); 3687 node.pci_device_handle = Some(PciDeviceHandle::VfioUser(vfio_user_pci_device)); 3688 3689 self.device_tree 3690 .lock() 3691 .unwrap() 3692 .insert(vfio_user_name.clone(), node); 3693 3694 Ok((pci_device_bdf, vfio_user_name)) 3695 } 3696 3697 fn add_user_devices(&mut self) -> DeviceManagerResult<Vec<PciBdf>> { 3698 let mut user_devices = self.config.lock().unwrap().user_devices.clone(); 3699 3700 if let Some(device_list_cfg) = &mut user_devices { 3701 for device_cfg in device_list_cfg.iter_mut() { 3702 let (_device_id, _id) = self.add_vfio_user_device(device_cfg)?; 3703 } 3704 } 3705 3706 // Update the list of devices 3707 self.config.lock().unwrap().user_devices = user_devices; 3708 3709 Ok(vec![]) 3710 } 3711 3712 fn add_virtio_pci_device( 3713 &mut self, 3714 virtio_device: Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 3715 iommu_mapping: &Option<Arc<IommuMapping>>, 3716 virtio_device_id: String, 3717 pci_segment_id: u16, 3718 dma_handler: Option<Arc<dyn ExternalDmaMapping>>, 3719 ) -> DeviceManagerResult<PciBdf> { 3720 let id = format!("{VIRTIO_PCI_DEVICE_NAME_PREFIX}-{virtio_device_id}"); 3721 3722 // Add the new virtio-pci node to the device tree. 3723 let mut node = device_node!(id); 3724 node.children = vec![virtio_device_id.clone()]; 3725 3726 let (pci_segment_id, pci_device_bdf, resources) = 3727 self.pci_resources(&id, pci_segment_id)?; 3728 3729 // Update the existing virtio node by setting the parent. 3730 if let Some(node) = self.device_tree.lock().unwrap().get_mut(&virtio_device_id) { 3731 node.parent = Some(id.clone()); 3732 } else { 3733 return Err(DeviceManagerError::MissingNode); 3734 } 3735 3736 // Allows support for one MSI-X vector per queue. It also adds 1 3737 // as we need to take into account the dedicated vector to notify 3738 // about a virtio config change. 3739 let msix_num = (virtio_device.lock().unwrap().queue_max_sizes().len() + 1) as u16; 3740 3741 // Create the AccessPlatform trait from the implementation IommuMapping. 3742 // This will provide address translation for any virtio device sitting 3743 // behind a vIOMMU. 3744 let mut access_platform: Option<Arc<dyn AccessPlatform>> = None; 3745 3746 if let Some(mapping) = iommu_mapping { 3747 access_platform = Some(Arc::new(AccessPlatformMapping::new( 3748 pci_device_bdf.into(), 3749 mapping.clone(), 3750 ))); 3751 } 3752 3753 // If SEV-SNP is enabled create the AccessPlatform from SevSnpPageAccessProxy 3754 #[cfg(feature = "sev_snp")] 3755 if self.config.lock().unwrap().is_sev_snp_enabled() { 3756 access_platform = Some(Arc::new(SevSnpPageAccessProxy::new( 3757 self.address_manager.vm.clone(), 3758 ))); 3759 } 3760 3761 let memory = self.memory_manager.lock().unwrap().guest_memory(); 3762 3763 // Map DMA ranges if a DMA handler is available and if the device is 3764 // not attached to a virtual IOMMU. 3765 if let Some(dma_handler) = &dma_handler { 3766 if iommu_mapping.is_some() { 3767 if let Some(iommu) = &self.iommu_device { 3768 iommu 3769 .lock() 3770 .unwrap() 3771 .add_external_mapping(pci_device_bdf.into(), dma_handler.clone()); 3772 } else { 3773 return Err(DeviceManagerError::MissingVirtualIommu); 3774 } 3775 } else { 3776 // Let every virtio-mem device handle the DMA map/unmap through the 3777 // DMA handler provided. 3778 for virtio_mem_device in self.virtio_mem_devices.iter() { 3779 virtio_mem_device 3780 .lock() 3781 .unwrap() 3782 .add_dma_mapping_handler( 3783 VirtioMemMappingSource::Device(pci_device_bdf.into()), 3784 dma_handler.clone(), 3785 ) 3786 .map_err(DeviceManagerError::AddDmaMappingHandlerVirtioMem)?; 3787 } 3788 3789 // Do not register virtio-mem regions, as they are handled directly by 3790 // virtio-mem devices. 3791 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() { 3792 for region in zone.regions() { 3793 let gpa = region.start_addr().0; 3794 let size = region.len(); 3795 dma_handler 3796 .map(gpa, gpa, size) 3797 .map_err(DeviceManagerError::VirtioDmaMap)?; 3798 } 3799 } 3800 } 3801 } 3802 3803 let device_type = virtio_device.lock().unwrap().device_type(); 3804 let virtio_pci_device = Arc::new(Mutex::new( 3805 VirtioPciDevice::new( 3806 id.clone(), 3807 memory, 3808 virtio_device, 3809 msix_num, 3810 access_platform, 3811 &self.msi_interrupt_manager, 3812 pci_device_bdf.into(), 3813 self.activate_evt 3814 .try_clone() 3815 .map_err(DeviceManagerError::EventFd)?, 3816 // All device types *except* virtio block devices should be allocated a 64-bit bar 3817 // The block devices should be given a 32-bit BAR so that they are easily accessible 3818 // to firmware without requiring excessive identity mapping. 3819 // The exception being if not on the default PCI segment. 3820 pci_segment_id > 0 || device_type != VirtioDeviceType::Block as u32, 3821 dma_handler, 3822 self.pending_activations.clone(), 3823 vm_migration::snapshot_from_id(self.snapshot.as_ref(), id.as_str()), 3824 ) 3825 .map_err(DeviceManagerError::VirtioDevice)?, 3826 )); 3827 3828 let new_resources = self.add_pci_device( 3829 virtio_pci_device.clone(), 3830 virtio_pci_device.clone(), 3831 pci_segment_id, 3832 pci_device_bdf, 3833 resources, 3834 )?; 3835 3836 let bar_addr = virtio_pci_device.lock().unwrap().config_bar_addr(); 3837 for (event, addr) in virtio_pci_device.lock().unwrap().ioeventfds(bar_addr) { 3838 let io_addr = IoEventAddress::Mmio(addr); 3839 self.address_manager 3840 .vm 3841 .register_ioevent(event, &io_addr, None) 3842 .map_err(|e| DeviceManagerError::RegisterIoevent(e.into()))?; 3843 } 3844 3845 // Update the device tree with correct resource information. 3846 node.resources = new_resources; 3847 node.migratable = Some(Arc::clone(&virtio_pci_device) as Arc<Mutex<dyn Migratable>>); 3848 node.pci_bdf = Some(pci_device_bdf); 3849 node.pci_device_handle = Some(PciDeviceHandle::Virtio(virtio_pci_device)); 3850 self.device_tree.lock().unwrap().insert(id, node); 3851 3852 Ok(pci_device_bdf) 3853 } 3854 3855 fn add_pvpanic_device( 3856 &mut self, 3857 ) -> DeviceManagerResult<Option<Arc<Mutex<devices::PvPanicDevice>>>> { 3858 let id = String::from(PVPANIC_DEVICE_NAME); 3859 let pci_segment_id = 0x0_u16; 3860 3861 info!("Creating pvpanic device {}", id); 3862 3863 let (pci_segment_id, pci_device_bdf, resources) = 3864 self.pci_resources(&id, pci_segment_id)?; 3865 3866 let snapshot = snapshot_from_id(self.snapshot.as_ref(), id.as_str()); 3867 3868 let pvpanic_device = devices::PvPanicDevice::new(id.clone(), snapshot) 3869 .map_err(DeviceManagerError::PvPanicCreate)?; 3870 3871 let pvpanic_device = Arc::new(Mutex::new(pvpanic_device)); 3872 3873 let new_resources = self.add_pci_device( 3874 pvpanic_device.clone(), 3875 pvpanic_device.clone(), 3876 pci_segment_id, 3877 pci_device_bdf, 3878 resources, 3879 )?; 3880 3881 let mut node = device_node!(id, pvpanic_device); 3882 3883 node.resources = new_resources; 3884 node.pci_bdf = Some(pci_device_bdf); 3885 node.pci_device_handle = None; 3886 3887 self.device_tree.lock().unwrap().insert(id, node); 3888 3889 Ok(Some(pvpanic_device)) 3890 } 3891 3892 fn pci_resources( 3893 &self, 3894 id: &str, 3895 pci_segment_id: u16, 3896 ) -> DeviceManagerResult<(u16, PciBdf, Option<Vec<Resource>>)> { 3897 // Look for the id in the device tree. If it can be found, that means 3898 // the device is being restored, otherwise it's created from scratch. 3899 Ok( 3900 if let Some(node) = self.device_tree.lock().unwrap().get(id) { 3901 info!("Restoring virtio-pci {} resources", id); 3902 let pci_device_bdf: PciBdf = node 3903 .pci_bdf 3904 .ok_or(DeviceManagerError::MissingDeviceNodePciBdf)?; 3905 let pci_segment_id = pci_device_bdf.segment(); 3906 3907 self.pci_segments[pci_segment_id as usize] 3908 .pci_bus 3909 .lock() 3910 .unwrap() 3911 .get_device_id(pci_device_bdf.device() as usize) 3912 .map_err(DeviceManagerError::GetPciDeviceId)?; 3913 3914 (pci_segment_id, pci_device_bdf, Some(node.resources.clone())) 3915 } else { 3916 let pci_device_bdf = 3917 self.pci_segments[pci_segment_id as usize].next_device_bdf()?; 3918 3919 (pci_segment_id, pci_device_bdf, None) 3920 }, 3921 ) 3922 } 3923 3924 #[cfg(target_arch = "x86_64")] 3925 pub fn io_bus(&self) -> &Arc<Bus> { 3926 &self.address_manager.io_bus 3927 } 3928 3929 pub fn mmio_bus(&self) -> &Arc<Bus> { 3930 &self.address_manager.mmio_bus 3931 } 3932 3933 pub fn allocator(&self) -> &Arc<Mutex<SystemAllocator>> { 3934 &self.address_manager.allocator 3935 } 3936 3937 pub fn interrupt_controller(&self) -> Option<Arc<Mutex<dyn InterruptController>>> { 3938 self.interrupt_controller 3939 .as_ref() 3940 .map(|ic| ic.clone() as Arc<Mutex<dyn InterruptController>>) 3941 } 3942 3943 pub(crate) fn pci_segments(&self) -> &Vec<PciSegment> { 3944 &self.pci_segments 3945 } 3946 3947 pub fn console(&self) -> &Arc<Console> { 3948 &self.console 3949 } 3950 3951 #[cfg(target_arch = "aarch64")] 3952 pub fn cmdline_additions(&self) -> &[String] { 3953 self.cmdline_additions.as_slice() 3954 } 3955 3956 pub fn update_memory(&self, new_region: &Arc<GuestRegionMmap>) -> DeviceManagerResult<()> { 3957 for handle in self.virtio_devices.iter() { 3958 handle 3959 .virtio_device 3960 .lock() 3961 .unwrap() 3962 .add_memory_region(new_region) 3963 .map_err(DeviceManagerError::UpdateMemoryForVirtioDevice)?; 3964 3965 if let Some(dma_handler) = &handle.dma_handler { 3966 if !handle.iommu { 3967 let gpa = new_region.start_addr().0; 3968 let size = new_region.len(); 3969 dma_handler 3970 .map(gpa, gpa, size) 3971 .map_err(DeviceManagerError::VirtioDmaMap)?; 3972 } 3973 } 3974 } 3975 3976 // Take care of updating the memory for VFIO PCI devices. 3977 if let Some(vfio_container) = &self.vfio_container { 3978 vfio_container 3979 .vfio_dma_map( 3980 new_region.start_addr().raw_value(), 3981 new_region.len(), 3982 new_region.as_ptr() as u64, 3983 ) 3984 .map_err(DeviceManagerError::UpdateMemoryForVfioPciDevice)?; 3985 } 3986 3987 // Take care of updating the memory for vfio-user devices. 3988 { 3989 let device_tree = self.device_tree.lock().unwrap(); 3990 for pci_device_node in device_tree.pci_devices() { 3991 if let PciDeviceHandle::VfioUser(vfio_user_pci_device) = pci_device_node 3992 .pci_device_handle 3993 .as_ref() 3994 .ok_or(DeviceManagerError::MissingPciDevice)? 3995 { 3996 vfio_user_pci_device 3997 .lock() 3998 .unwrap() 3999 .dma_map(new_region) 4000 .map_err(DeviceManagerError::UpdateMemoryForVfioUserPciDevice)?; 4001 } 4002 } 4003 } 4004 4005 Ok(()) 4006 } 4007 4008 pub fn activate_virtio_devices(&self) -> DeviceManagerResult<()> { 4009 for mut activator in self.pending_activations.lock().unwrap().drain(..) { 4010 activator 4011 .activate() 4012 .map_err(DeviceManagerError::VirtioActivate)?; 4013 } 4014 Ok(()) 4015 } 4016 4017 pub fn notify_hotplug( 4018 &self, 4019 _notification_type: AcpiNotificationFlags, 4020 ) -> DeviceManagerResult<()> { 4021 return self 4022 .ged_notification_device 4023 .as_ref() 4024 .unwrap() 4025 .lock() 4026 .unwrap() 4027 .notify(_notification_type) 4028 .map_err(DeviceManagerError::HotPlugNotification); 4029 } 4030 4031 pub fn add_device( 4032 &mut self, 4033 device_cfg: &mut DeviceConfig, 4034 ) -> DeviceManagerResult<PciDeviceInfo> { 4035 self.validate_identifier(&device_cfg.id)?; 4036 4037 if device_cfg.iommu && !self.is_iommu_segment(device_cfg.pci_segment) { 4038 return Err(DeviceManagerError::InvalidIommuHotplug); 4039 } 4040 4041 let (bdf, device_name) = self.add_passthrough_device(device_cfg)?; 4042 4043 // Update the PCIU bitmap 4044 self.pci_segments[device_cfg.pci_segment as usize].pci_devices_up |= 1 << bdf.device(); 4045 4046 Ok(PciDeviceInfo { 4047 id: device_name, 4048 bdf, 4049 }) 4050 } 4051 4052 pub fn add_user_device( 4053 &mut self, 4054 device_cfg: &mut UserDeviceConfig, 4055 ) -> DeviceManagerResult<PciDeviceInfo> { 4056 self.validate_identifier(&device_cfg.id)?; 4057 4058 let (bdf, device_name) = self.add_vfio_user_device(device_cfg)?; 4059 4060 // Update the PCIU bitmap 4061 self.pci_segments[device_cfg.pci_segment as usize].pci_devices_up |= 1 << bdf.device(); 4062 4063 Ok(PciDeviceInfo { 4064 id: device_name, 4065 bdf, 4066 }) 4067 } 4068 4069 pub fn remove_device(&mut self, id: String) -> DeviceManagerResult<()> { 4070 // The node can be directly a PCI node in case the 'id' refers to a 4071 // VFIO device or a virtio-pci one. 4072 // In case the 'id' refers to a virtio device, we must find the PCI 4073 // node by looking at the parent. 4074 let device_tree = self.device_tree.lock().unwrap(); 4075 let node = device_tree 4076 .get(&id) 4077 .ok_or(DeviceManagerError::UnknownDeviceId(id))?; 4078 4079 let pci_device_node = if node.pci_bdf.is_some() && node.pci_device_handle.is_some() { 4080 node 4081 } else { 4082 let parent = node 4083 .parent 4084 .as_ref() 4085 .ok_or(DeviceManagerError::MissingNode)?; 4086 device_tree 4087 .get(parent) 4088 .ok_or(DeviceManagerError::MissingNode)? 4089 }; 4090 4091 let pci_device_bdf: PciBdf = pci_device_node 4092 .pci_bdf 4093 .ok_or(DeviceManagerError::MissingDeviceNodePciBdf)?; 4094 let pci_segment_id = pci_device_bdf.segment(); 4095 4096 let pci_device_handle = pci_device_node 4097 .pci_device_handle 4098 .as_ref() 4099 .ok_or(DeviceManagerError::MissingPciDevice)?; 4100 #[allow(irrefutable_let_patterns)] 4101 if let PciDeviceHandle::Virtio(virtio_pci_device) = pci_device_handle { 4102 let device_type = VirtioDeviceType::from( 4103 virtio_pci_device 4104 .lock() 4105 .unwrap() 4106 .virtio_device() 4107 .lock() 4108 .unwrap() 4109 .device_type(), 4110 ); 4111 match device_type { 4112 VirtioDeviceType::Net 4113 | VirtioDeviceType::Block 4114 | VirtioDeviceType::Pmem 4115 | VirtioDeviceType::Fs 4116 | VirtioDeviceType::Vsock => {} 4117 _ => return Err(DeviceManagerError::RemovalNotAllowed(device_type)), 4118 } 4119 } 4120 4121 // Update the PCID bitmap 4122 self.pci_segments[pci_segment_id as usize].pci_devices_down |= 1 << pci_device_bdf.device(); 4123 4124 Ok(()) 4125 } 4126 4127 pub fn eject_device(&mut self, pci_segment_id: u16, device_id: u8) -> DeviceManagerResult<()> { 4128 info!( 4129 "Ejecting device_id = {} on segment_id={}", 4130 device_id, pci_segment_id 4131 ); 4132 4133 // Convert the device ID into the corresponding b/d/f. 4134 let pci_device_bdf = PciBdf::new(pci_segment_id, 0, device_id, 0); 4135 4136 // Give the PCI device ID back to the PCI bus. 4137 self.pci_segments[pci_segment_id as usize] 4138 .pci_bus 4139 .lock() 4140 .unwrap() 4141 .put_device_id(device_id as usize) 4142 .map_err(DeviceManagerError::PutPciDeviceId)?; 4143 4144 // Remove the device from the device tree along with its children. 4145 let mut device_tree = self.device_tree.lock().unwrap(); 4146 let pci_device_node = device_tree 4147 .remove_node_by_pci_bdf(pci_device_bdf) 4148 .ok_or(DeviceManagerError::MissingPciDevice)?; 4149 4150 // For VFIO and vfio-user the PCI device id is the id. 4151 // For virtio we overwrite it later as we want the id of the 4152 // underlying device. 4153 let mut id = pci_device_node.id; 4154 let pci_device_handle = pci_device_node 4155 .pci_device_handle 4156 .ok_or(DeviceManagerError::MissingPciDevice)?; 4157 if matches!(pci_device_handle, PciDeviceHandle::Virtio(_)) { 4158 // The virtio-pci device has a single child 4159 if !pci_device_node.children.is_empty() { 4160 assert_eq!(pci_device_node.children.len(), 1); 4161 let child_id = &pci_device_node.children[0]; 4162 id.clone_from(child_id); 4163 } 4164 } 4165 for child in pci_device_node.children.iter() { 4166 device_tree.remove(child); 4167 } 4168 4169 let mut iommu_attached = false; 4170 if let Some((_, iommu_attached_devices)) = &self.iommu_attached_devices { 4171 if iommu_attached_devices.contains(&pci_device_bdf) { 4172 iommu_attached = true; 4173 } 4174 } 4175 4176 let (pci_device, bus_device, virtio_device, remove_dma_handler) = match pci_device_handle { 4177 // No need to remove any virtio-mem mapping here as the container outlives all devices 4178 PciDeviceHandle::Vfio(vfio_pci_device) => { 4179 for mmio_region in vfio_pci_device.lock().unwrap().mmio_regions() { 4180 self.mmio_regions 4181 .lock() 4182 .unwrap() 4183 .retain(|x| x.start != mmio_region.start) 4184 } 4185 4186 ( 4187 Arc::clone(&vfio_pci_device) as Arc<Mutex<dyn PciDevice>>, 4188 Arc::clone(&vfio_pci_device) as Arc<dyn BusDeviceSync>, 4189 None as Option<Arc<Mutex<dyn virtio_devices::VirtioDevice>>>, 4190 false, 4191 ) 4192 } 4193 PciDeviceHandle::Virtio(virtio_pci_device) => { 4194 let dev = virtio_pci_device.lock().unwrap(); 4195 let bar_addr = dev.config_bar_addr(); 4196 for (event, addr) in dev.ioeventfds(bar_addr) { 4197 let io_addr = IoEventAddress::Mmio(addr); 4198 self.address_manager 4199 .vm 4200 .unregister_ioevent(event, &io_addr) 4201 .map_err(|e| DeviceManagerError::UnRegisterIoevent(e.into()))?; 4202 } 4203 4204 if let Some(dma_handler) = dev.dma_handler() { 4205 if !iommu_attached { 4206 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() { 4207 for region in zone.regions() { 4208 let iova = region.start_addr().0; 4209 let size = region.len(); 4210 dma_handler 4211 .unmap(iova, size) 4212 .map_err(DeviceManagerError::VirtioDmaUnmap)?; 4213 } 4214 } 4215 } 4216 } 4217 4218 ( 4219 Arc::clone(&virtio_pci_device) as Arc<Mutex<dyn PciDevice>>, 4220 Arc::clone(&virtio_pci_device) as Arc<dyn BusDeviceSync>, 4221 Some(dev.virtio_device()), 4222 dev.dma_handler().is_some() && !iommu_attached, 4223 ) 4224 } 4225 PciDeviceHandle::VfioUser(vfio_user_pci_device) => { 4226 let mut dev = vfio_user_pci_device.lock().unwrap(); 4227 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() { 4228 for region in zone.regions() { 4229 dev.dma_unmap(region) 4230 .map_err(DeviceManagerError::VfioUserDmaUnmap)?; 4231 } 4232 } 4233 4234 ( 4235 Arc::clone(&vfio_user_pci_device) as Arc<Mutex<dyn PciDevice>>, 4236 Arc::clone(&vfio_user_pci_device) as Arc<dyn BusDeviceSync>, 4237 None as Option<Arc<Mutex<dyn virtio_devices::VirtioDevice>>>, 4238 true, 4239 ) 4240 } 4241 }; 4242 4243 if remove_dma_handler { 4244 for virtio_mem_device in self.virtio_mem_devices.iter() { 4245 virtio_mem_device 4246 .lock() 4247 .unwrap() 4248 .remove_dma_mapping_handler(VirtioMemMappingSource::Device( 4249 pci_device_bdf.into(), 4250 )) 4251 .map_err(DeviceManagerError::RemoveDmaMappingHandlerVirtioMem)?; 4252 } 4253 } 4254 4255 // Free the allocated BARs 4256 pci_device 4257 .lock() 4258 .unwrap() 4259 .free_bars( 4260 &mut self.address_manager.allocator.lock().unwrap(), 4261 &mut self.pci_segments[pci_segment_id as usize] 4262 .mem32_allocator 4263 .lock() 4264 .unwrap(), 4265 &mut self.pci_segments[pci_segment_id as usize] 4266 .mem64_allocator 4267 .lock() 4268 .unwrap(), 4269 ) 4270 .map_err(DeviceManagerError::FreePciBars)?; 4271 4272 // Remove the device from the PCI bus 4273 self.pci_segments[pci_segment_id as usize] 4274 .pci_bus 4275 .lock() 4276 .unwrap() 4277 .remove_by_device(&pci_device) 4278 .map_err(DeviceManagerError::RemoveDeviceFromPciBus)?; 4279 4280 #[cfg(target_arch = "x86_64")] 4281 // Remove the device from the IO bus 4282 self.io_bus() 4283 .remove_by_device(&bus_device) 4284 .map_err(DeviceManagerError::RemoveDeviceFromIoBus)?; 4285 4286 // Remove the device from the MMIO bus 4287 self.mmio_bus() 4288 .remove_by_device(&bus_device) 4289 .map_err(DeviceManagerError::RemoveDeviceFromMmioBus)?; 4290 4291 // Remove the device from the list of BusDevice held by the 4292 // DeviceManager. 4293 self.bus_devices 4294 .retain(|dev| !Arc::ptr_eq(dev, &bus_device)); 4295 4296 // Shutdown and remove the underlying virtio-device if present 4297 if let Some(virtio_device) = virtio_device { 4298 for mapping in virtio_device.lock().unwrap().userspace_mappings() { 4299 self.memory_manager 4300 .lock() 4301 .unwrap() 4302 .remove_userspace_mapping( 4303 mapping.addr.raw_value(), 4304 mapping.len, 4305 mapping.host_addr, 4306 mapping.mergeable, 4307 mapping.mem_slot, 4308 ) 4309 .map_err(DeviceManagerError::MemoryManager)?; 4310 } 4311 4312 virtio_device.lock().unwrap().shutdown(); 4313 4314 self.virtio_devices 4315 .retain(|handler| !Arc::ptr_eq(&handler.virtio_device, &virtio_device)); 4316 } 4317 4318 event!( 4319 "vm", 4320 "device-removed", 4321 "id", 4322 &id, 4323 "bdf", 4324 pci_device_bdf.to_string() 4325 ); 4326 4327 // At this point, the device has been removed from all the list and 4328 // buses where it was stored. At the end of this function, after 4329 // any_device, bus_device and pci_device are released, the actual 4330 // device will be dropped. 4331 Ok(()) 4332 } 4333 4334 fn hotplug_virtio_pci_device( 4335 &mut self, 4336 handle: MetaVirtioDevice, 4337 ) -> DeviceManagerResult<PciDeviceInfo> { 4338 // Add the virtio device to the device manager list. This is important 4339 // as the list is used to notify virtio devices about memory updates 4340 // for instance. 4341 self.virtio_devices.push(handle.clone()); 4342 4343 let mapping: Option<Arc<IommuMapping>> = if handle.iommu { 4344 self.iommu_mapping.clone() 4345 } else { 4346 None 4347 }; 4348 4349 let bdf = self.add_virtio_pci_device( 4350 handle.virtio_device, 4351 &mapping, 4352 handle.id.clone(), 4353 handle.pci_segment, 4354 handle.dma_handler, 4355 )?; 4356 4357 // Update the PCIU bitmap 4358 self.pci_segments[handle.pci_segment as usize].pci_devices_up |= 1 << bdf.device(); 4359 4360 Ok(PciDeviceInfo { id: handle.id, bdf }) 4361 } 4362 4363 fn is_iommu_segment(&self, pci_segment_id: u16) -> bool { 4364 self.config 4365 .lock() 4366 .as_ref() 4367 .unwrap() 4368 .platform 4369 .as_ref() 4370 .map(|pc| { 4371 pc.iommu_segments 4372 .as_ref() 4373 .map(|v| v.contains(&pci_segment_id)) 4374 .unwrap_or_default() 4375 }) 4376 .unwrap_or_default() 4377 } 4378 4379 pub fn add_disk(&mut self, disk_cfg: &mut DiskConfig) -> DeviceManagerResult<PciDeviceInfo> { 4380 self.validate_identifier(&disk_cfg.id)?; 4381 4382 if disk_cfg.iommu && !self.is_iommu_segment(disk_cfg.pci_segment) { 4383 return Err(DeviceManagerError::InvalidIommuHotplug); 4384 } 4385 4386 let device = self.make_virtio_block_device(disk_cfg)?; 4387 self.hotplug_virtio_pci_device(device) 4388 } 4389 4390 pub fn add_fs(&mut self, fs_cfg: &mut FsConfig) -> DeviceManagerResult<PciDeviceInfo> { 4391 self.validate_identifier(&fs_cfg.id)?; 4392 4393 let device = self.make_virtio_fs_device(fs_cfg)?; 4394 self.hotplug_virtio_pci_device(device) 4395 } 4396 4397 pub fn add_pmem(&mut self, pmem_cfg: &mut PmemConfig) -> DeviceManagerResult<PciDeviceInfo> { 4398 self.validate_identifier(&pmem_cfg.id)?; 4399 4400 if pmem_cfg.iommu && !self.is_iommu_segment(pmem_cfg.pci_segment) { 4401 return Err(DeviceManagerError::InvalidIommuHotplug); 4402 } 4403 4404 let device = self.make_virtio_pmem_device(pmem_cfg)?; 4405 self.hotplug_virtio_pci_device(device) 4406 } 4407 4408 pub fn add_net(&mut self, net_cfg: &mut NetConfig) -> DeviceManagerResult<PciDeviceInfo> { 4409 self.validate_identifier(&net_cfg.id)?; 4410 4411 if net_cfg.iommu && !self.is_iommu_segment(net_cfg.pci_segment) { 4412 return Err(DeviceManagerError::InvalidIommuHotplug); 4413 } 4414 4415 let device = self.make_virtio_net_device(net_cfg)?; 4416 self.hotplug_virtio_pci_device(device) 4417 } 4418 4419 pub fn add_vdpa(&mut self, vdpa_cfg: &mut VdpaConfig) -> DeviceManagerResult<PciDeviceInfo> { 4420 self.validate_identifier(&vdpa_cfg.id)?; 4421 4422 if vdpa_cfg.iommu && !self.is_iommu_segment(vdpa_cfg.pci_segment) { 4423 return Err(DeviceManagerError::InvalidIommuHotplug); 4424 } 4425 4426 let device = self.make_vdpa_device(vdpa_cfg)?; 4427 self.hotplug_virtio_pci_device(device) 4428 } 4429 4430 pub fn add_vsock(&mut self, vsock_cfg: &mut VsockConfig) -> DeviceManagerResult<PciDeviceInfo> { 4431 self.validate_identifier(&vsock_cfg.id)?; 4432 4433 if vsock_cfg.iommu && !self.is_iommu_segment(vsock_cfg.pci_segment) { 4434 return Err(DeviceManagerError::InvalidIommuHotplug); 4435 } 4436 4437 let device = self.make_virtio_vsock_device(vsock_cfg)?; 4438 self.hotplug_virtio_pci_device(device) 4439 } 4440 4441 pub fn counters(&self) -> HashMap<String, HashMap<&'static str, Wrapping<u64>>> { 4442 let mut counters = HashMap::new(); 4443 4444 for handle in &self.virtio_devices { 4445 let virtio_device = handle.virtio_device.lock().unwrap(); 4446 if let Some(device_counters) = virtio_device.counters() { 4447 counters.insert(handle.id.clone(), device_counters.clone()); 4448 } 4449 } 4450 4451 counters 4452 } 4453 4454 pub fn resize_balloon(&mut self, size: u64) -> DeviceManagerResult<()> { 4455 if let Some(balloon) = &self.balloon { 4456 return balloon 4457 .lock() 4458 .unwrap() 4459 .resize(size) 4460 .map_err(DeviceManagerError::VirtioBalloonResize); 4461 } 4462 4463 warn!("No balloon setup: Can't resize the balloon"); 4464 Err(DeviceManagerError::MissingVirtioBalloon) 4465 } 4466 4467 pub fn balloon_size(&self) -> u64 { 4468 if let Some(balloon) = &self.balloon { 4469 return balloon.lock().unwrap().get_actual(); 4470 } 4471 4472 0 4473 } 4474 4475 pub fn device_tree(&self) -> Arc<Mutex<DeviceTree>> { 4476 self.device_tree.clone() 4477 } 4478 4479 #[cfg(target_arch = "x86_64")] 4480 pub fn notify_power_button(&self) -> DeviceManagerResult<()> { 4481 self.ged_notification_device 4482 .as_ref() 4483 .unwrap() 4484 .lock() 4485 .unwrap() 4486 .notify(AcpiNotificationFlags::POWER_BUTTON_CHANGED) 4487 .map_err(DeviceManagerError::PowerButtonNotification) 4488 } 4489 4490 #[cfg(target_arch = "aarch64")] 4491 pub fn notify_power_button(&self) -> DeviceManagerResult<()> { 4492 // There are two use cases: 4493 // 1. Users will use direct kernel boot with device tree. 4494 // 2. Users will use ACPI+UEFI boot. 4495 4496 // Trigger a GPIO pin 3 event to satisfy use case 1. 4497 self.gpio_device 4498 .as_ref() 4499 .unwrap() 4500 .lock() 4501 .unwrap() 4502 .trigger_key(3) 4503 .map_err(DeviceManagerError::AArch64PowerButtonNotification)?; 4504 // Trigger a GED power button event to satisfy use case 2. 4505 return self 4506 .ged_notification_device 4507 .as_ref() 4508 .unwrap() 4509 .lock() 4510 .unwrap() 4511 .notify(AcpiNotificationFlags::POWER_BUTTON_CHANGED) 4512 .map_err(DeviceManagerError::PowerButtonNotification); 4513 } 4514 4515 pub fn iommu_attached_devices(&self) -> &Option<(PciBdf, Vec<PciBdf>)> { 4516 &self.iommu_attached_devices 4517 } 4518 4519 fn validate_identifier(&self, id: &Option<String>) -> DeviceManagerResult<()> { 4520 if let Some(id) = id { 4521 if id.starts_with("__") { 4522 return Err(DeviceManagerError::InvalidIdentifier(id.clone())); 4523 } 4524 4525 if self.device_tree.lock().unwrap().contains_key(id) { 4526 return Err(DeviceManagerError::IdentifierNotUnique(id.clone())); 4527 } 4528 } 4529 4530 Ok(()) 4531 } 4532 4533 pub(crate) fn acpi_platform_addresses(&self) -> &AcpiPlatformAddresses { 4534 &self.acpi_platform_addresses 4535 } 4536 } 4537 4538 fn numa_node_id_from_memory_zone_id(numa_nodes: &NumaNodes, memory_zone_id: &str) -> Option<u32> { 4539 for (numa_node_id, numa_node) in numa_nodes.iter() { 4540 if numa_node.memory_zones.contains(&memory_zone_id.to_owned()) { 4541 return Some(*numa_node_id); 4542 } 4543 } 4544 4545 None 4546 } 4547 4548 fn numa_node_id_from_pci_segment_id(numa_nodes: &NumaNodes, pci_segment_id: u16) -> u32 { 4549 for (numa_node_id, numa_node) in numa_nodes.iter() { 4550 if numa_node.pci_segments.contains(&pci_segment_id) { 4551 return *numa_node_id; 4552 } 4553 } 4554 4555 0 4556 } 4557 4558 struct TpmDevice {} 4559 4560 impl Aml for TpmDevice { 4561 fn to_aml_bytes(&self, sink: &mut dyn acpi_tables::AmlSink) { 4562 aml::Device::new( 4563 "TPM2".into(), 4564 vec![ 4565 &aml::Name::new("_HID".into(), &"MSFT0101"), 4566 &aml::Name::new("_STA".into(), &(0xF_usize)), 4567 &aml::Name::new( 4568 "_CRS".into(), 4569 &aml::ResourceTemplate::new(vec![&aml::Memory32Fixed::new( 4570 true, 4571 layout::TPM_START.0 as u32, 4572 layout::TPM_SIZE as u32, 4573 )]), 4574 ), 4575 ], 4576 ) 4577 .to_aml_bytes(sink) 4578 } 4579 } 4580 4581 impl Aml for DeviceManager { 4582 fn to_aml_bytes(&self, sink: &mut dyn acpi_tables::AmlSink) { 4583 #[cfg(target_arch = "aarch64")] 4584 use arch::aarch64::DeviceInfoForFdt; 4585 4586 let mut pci_scan_methods = Vec::new(); 4587 for i in 0..self.pci_segments.len() { 4588 pci_scan_methods.push(aml::MethodCall::new( 4589 format!("\\_SB_.PC{i:02X}.PCNT").as_str().into(), 4590 vec![], 4591 )); 4592 } 4593 let mut pci_scan_inner: Vec<&dyn Aml> = Vec::new(); 4594 for method in &pci_scan_methods { 4595 pci_scan_inner.push(method) 4596 } 4597 4598 // PCI hotplug controller 4599 aml::Device::new( 4600 "_SB_.PHPR".into(), 4601 vec![ 4602 &aml::Name::new("_HID".into(), &aml::EISAName::new("PNP0A06")), 4603 &aml::Name::new("_STA".into(), &0x0bu8), 4604 &aml::Name::new("_UID".into(), &"PCI Hotplug Controller"), 4605 &aml::Mutex::new("BLCK".into(), 0), 4606 &aml::Name::new( 4607 "_CRS".into(), 4608 &aml::ResourceTemplate::new(vec![&aml::AddressSpace::new_memory( 4609 aml::AddressSpaceCacheable::NotCacheable, 4610 true, 4611 self.acpi_address.0, 4612 self.acpi_address.0 + DEVICE_MANAGER_ACPI_SIZE as u64 - 1, 4613 None, 4614 )]), 4615 ), 4616 // OpRegion and Fields map MMIO range into individual field values 4617 &aml::OpRegion::new( 4618 "PCST".into(), 4619 aml::OpRegionSpace::SystemMemory, 4620 &(self.acpi_address.0 as usize), 4621 &DEVICE_MANAGER_ACPI_SIZE, 4622 ), 4623 &aml::Field::new( 4624 "PCST".into(), 4625 aml::FieldAccessType::DWord, 4626 aml::FieldLockRule::NoLock, 4627 aml::FieldUpdateRule::WriteAsZeroes, 4628 vec![ 4629 aml::FieldEntry::Named(*b"PCIU", 32), 4630 aml::FieldEntry::Named(*b"PCID", 32), 4631 aml::FieldEntry::Named(*b"B0EJ", 32), 4632 aml::FieldEntry::Named(*b"PSEG", 32), 4633 ], 4634 ), 4635 &aml::Method::new( 4636 "PCEJ".into(), 4637 2, 4638 true, 4639 vec![ 4640 // Take lock defined above 4641 &aml::Acquire::new("BLCK".into(), 0xffff), 4642 // Choose the current segment 4643 &aml::Store::new(&aml::Path::new("PSEG"), &aml::Arg(1)), 4644 // Write PCI bus number (in first argument) to I/O port via field 4645 &aml::ShiftLeft::new(&aml::Path::new("B0EJ"), &aml::ONE, &aml::Arg(0)), 4646 // Release lock 4647 &aml::Release::new("BLCK".into()), 4648 // Return 0 4649 &aml::Return::new(&aml::ZERO), 4650 ], 4651 ), 4652 &aml::Method::new("PSCN".into(), 0, true, pci_scan_inner), 4653 ], 4654 ) 4655 .to_aml_bytes(sink); 4656 4657 for segment in &self.pci_segments { 4658 segment.to_aml_bytes(sink); 4659 } 4660 4661 let mut mbrd_memory = Vec::new(); 4662 4663 for segment in &self.pci_segments { 4664 mbrd_memory.push(aml::Memory32Fixed::new( 4665 true, 4666 segment.mmio_config_address as u32, 4667 layout::PCI_MMIO_CONFIG_SIZE_PER_SEGMENT as u32, 4668 )) 4669 } 4670 4671 let mut mbrd_memory_refs = Vec::new(); 4672 for mbrd_memory_ref in &mbrd_memory { 4673 mbrd_memory_refs.push(mbrd_memory_ref as &dyn Aml); 4674 } 4675 4676 aml::Device::new( 4677 "_SB_.MBRD".into(), 4678 vec![ 4679 &aml::Name::new("_HID".into(), &aml::EISAName::new("PNP0C02")), 4680 &aml::Name::new("_UID".into(), &aml::ZERO), 4681 &aml::Name::new("_CRS".into(), &aml::ResourceTemplate::new(mbrd_memory_refs)), 4682 ], 4683 ) 4684 .to_aml_bytes(sink); 4685 4686 // Serial device 4687 #[cfg(target_arch = "x86_64")] 4688 let serial_irq = 4; 4689 #[cfg(target_arch = "aarch64")] 4690 let serial_irq = 4691 if self.config.lock().unwrap().serial.clone().mode != ConsoleOutputMode::Off { 4692 self.get_device_info() 4693 .clone() 4694 .get(&(DeviceType::Serial, DeviceType::Serial.to_string())) 4695 .unwrap() 4696 .irq() 4697 } else { 4698 // If serial is turned off, add a fake device with invalid irq. 4699 31 4700 }; 4701 if self.config.lock().unwrap().serial.mode != ConsoleOutputMode::Off { 4702 aml::Device::new( 4703 "_SB_.COM1".into(), 4704 vec![ 4705 &aml::Name::new( 4706 "_HID".into(), 4707 #[cfg(target_arch = "x86_64")] 4708 &aml::EISAName::new("PNP0501"), 4709 #[cfg(target_arch = "aarch64")] 4710 &"ARMH0011", 4711 ), 4712 &aml::Name::new("_UID".into(), &aml::ZERO), 4713 &aml::Name::new("_DDN".into(), &"COM1"), 4714 &aml::Name::new( 4715 "_CRS".into(), 4716 &aml::ResourceTemplate::new(vec![ 4717 &aml::Interrupt::new(true, true, false, false, serial_irq), 4718 #[cfg(target_arch = "x86_64")] 4719 &aml::IO::new(0x3f8, 0x3f8, 0, 0x8), 4720 #[cfg(target_arch = "aarch64")] 4721 &aml::Memory32Fixed::new( 4722 true, 4723 arch::layout::LEGACY_SERIAL_MAPPED_IO_START.raw_value() as u32, 4724 MMIO_LEN as u32, 4725 ), 4726 ]), 4727 ), 4728 ], 4729 ) 4730 .to_aml_bytes(sink); 4731 } 4732 4733 aml::Name::new("_S5_".into(), &aml::Package::new(vec![&5u8])).to_aml_bytes(sink); 4734 4735 aml::Device::new( 4736 "_SB_.PWRB".into(), 4737 vec![ 4738 &aml::Name::new("_HID".into(), &aml::EISAName::new("PNP0C0C")), 4739 &aml::Name::new("_UID".into(), &aml::ZERO), 4740 ], 4741 ) 4742 .to_aml_bytes(sink); 4743 4744 if self.config.lock().unwrap().tpm.is_some() { 4745 // Add tpm device 4746 TpmDevice {}.to_aml_bytes(sink); 4747 } 4748 4749 self.ged_notification_device 4750 .as_ref() 4751 .unwrap() 4752 .lock() 4753 .unwrap() 4754 .to_aml_bytes(sink) 4755 } 4756 } 4757 4758 impl Pausable for DeviceManager { 4759 fn pause(&mut self) -> result::Result<(), MigratableError> { 4760 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4761 if let Some(migratable) = &device_node.migratable { 4762 migratable.lock().unwrap().pause()?; 4763 } 4764 } 4765 // On AArch64, the pause of device manager needs to trigger 4766 // a "pause" of GIC, which will flush the GIC pending tables 4767 // and ITS tables to guest RAM. 4768 #[cfg(target_arch = "aarch64")] 4769 { 4770 self.get_interrupt_controller() 4771 .unwrap() 4772 .lock() 4773 .unwrap() 4774 .pause()?; 4775 }; 4776 4777 Ok(()) 4778 } 4779 4780 fn resume(&mut self) -> result::Result<(), MigratableError> { 4781 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4782 if let Some(migratable) = &device_node.migratable { 4783 migratable.lock().unwrap().resume()?; 4784 } 4785 } 4786 4787 Ok(()) 4788 } 4789 } 4790 4791 impl Snapshottable for DeviceManager { 4792 fn id(&self) -> String { 4793 DEVICE_MANAGER_SNAPSHOT_ID.to_string() 4794 } 4795 4796 fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> { 4797 let mut snapshot = Snapshot::from_data(SnapshotData::new_from_state(&self.state())?); 4798 4799 // We aggregate all devices snapshots. 4800 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4801 if let Some(migratable) = &device_node.migratable { 4802 let mut migratable = migratable.lock().unwrap(); 4803 snapshot.add_snapshot(migratable.id(), migratable.snapshot()?); 4804 } 4805 } 4806 4807 Ok(snapshot) 4808 } 4809 } 4810 4811 impl Transportable for DeviceManager {} 4812 4813 impl Migratable for DeviceManager { 4814 fn start_dirty_log(&mut self) -> std::result::Result<(), MigratableError> { 4815 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4816 if let Some(migratable) = &device_node.migratable { 4817 migratable.lock().unwrap().start_dirty_log()?; 4818 } 4819 } 4820 Ok(()) 4821 } 4822 4823 fn stop_dirty_log(&mut self) -> std::result::Result<(), MigratableError> { 4824 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4825 if let Some(migratable) = &device_node.migratable { 4826 migratable.lock().unwrap().stop_dirty_log()?; 4827 } 4828 } 4829 Ok(()) 4830 } 4831 4832 fn dirty_log(&mut self) -> std::result::Result<MemoryRangeTable, MigratableError> { 4833 let mut tables = Vec::new(); 4834 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4835 if let Some(migratable) = &device_node.migratable { 4836 tables.push(migratable.lock().unwrap().dirty_log()?); 4837 } 4838 } 4839 Ok(MemoryRangeTable::new_from_tables(tables)) 4840 } 4841 4842 fn start_migration(&mut self) -> std::result::Result<(), MigratableError> { 4843 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4844 if let Some(migratable) = &device_node.migratable { 4845 migratable.lock().unwrap().start_migration()?; 4846 } 4847 } 4848 Ok(()) 4849 } 4850 4851 fn complete_migration(&mut self) -> std::result::Result<(), MigratableError> { 4852 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4853 if let Some(migratable) = &device_node.migratable { 4854 migratable.lock().unwrap().complete_migration()?; 4855 } 4856 } 4857 Ok(()) 4858 } 4859 } 4860 4861 const PCIU_FIELD_OFFSET: u64 = 0; 4862 const PCID_FIELD_OFFSET: u64 = 4; 4863 const B0EJ_FIELD_OFFSET: u64 = 8; 4864 const PSEG_FIELD_OFFSET: u64 = 12; 4865 const PCIU_FIELD_SIZE: usize = 4; 4866 const PCID_FIELD_SIZE: usize = 4; 4867 const B0EJ_FIELD_SIZE: usize = 4; 4868 const PSEG_FIELD_SIZE: usize = 4; 4869 4870 impl BusDevice for DeviceManager { 4871 fn read(&mut self, base: u64, offset: u64, data: &mut [u8]) { 4872 match offset { 4873 PCIU_FIELD_OFFSET => { 4874 assert!(data.len() == PCIU_FIELD_SIZE); 4875 data.copy_from_slice( 4876 &self.pci_segments[self.selected_segment] 4877 .pci_devices_up 4878 .to_le_bytes(), 4879 ); 4880 // Clear the PCIU bitmap 4881 self.pci_segments[self.selected_segment].pci_devices_up = 0; 4882 } 4883 PCID_FIELD_OFFSET => { 4884 assert!(data.len() == PCID_FIELD_SIZE); 4885 data.copy_from_slice( 4886 &self.pci_segments[self.selected_segment] 4887 .pci_devices_down 4888 .to_le_bytes(), 4889 ); 4890 // Clear the PCID bitmap 4891 self.pci_segments[self.selected_segment].pci_devices_down = 0; 4892 } 4893 B0EJ_FIELD_OFFSET => { 4894 assert!(data.len() == B0EJ_FIELD_SIZE); 4895 // Always return an empty bitmap since the eject is always 4896 // taken care of right away during a write access. 4897 data.fill(0); 4898 } 4899 PSEG_FIELD_OFFSET => { 4900 assert_eq!(data.len(), PSEG_FIELD_SIZE); 4901 data.copy_from_slice(&(self.selected_segment as u32).to_le_bytes()); 4902 } 4903 _ => error!( 4904 "Accessing unknown location at base 0x{:x}, offset 0x{:x}", 4905 base, offset 4906 ), 4907 } 4908 4909 debug!( 4910 "PCI_HP_REG_R: base 0x{:x}, offset 0x{:x}, data {:?}", 4911 base, offset, data 4912 ) 4913 } 4914 4915 fn write(&mut self, base: u64, offset: u64, data: &[u8]) -> Option<Arc<std::sync::Barrier>> { 4916 match offset { 4917 B0EJ_FIELD_OFFSET => { 4918 assert!(data.len() == B0EJ_FIELD_SIZE); 4919 let mut data_array: [u8; 4] = [0, 0, 0, 0]; 4920 data_array.copy_from_slice(data); 4921 let mut slot_bitmap = u32::from_le_bytes(data_array); 4922 4923 while slot_bitmap > 0 { 4924 let slot_id = slot_bitmap.trailing_zeros(); 4925 if let Err(e) = self.eject_device(self.selected_segment as u16, slot_id as u8) { 4926 error!("Failed ejecting device {}: {:?}", slot_id, e); 4927 } 4928 slot_bitmap &= !(1 << slot_id); 4929 } 4930 } 4931 PSEG_FIELD_OFFSET => { 4932 assert_eq!(data.len(), PSEG_FIELD_SIZE); 4933 let mut data_array: [u8; 4] = [0, 0, 0, 0]; 4934 data_array.copy_from_slice(data); 4935 let selected_segment = u32::from_le_bytes(data_array) as usize; 4936 if selected_segment >= self.pci_segments.len() { 4937 error!( 4938 "Segment selection out of range: {} >= {}", 4939 selected_segment, 4940 self.pci_segments.len() 4941 ); 4942 return None; 4943 } 4944 self.selected_segment = selected_segment; 4945 } 4946 _ => error!( 4947 "Accessing unknown location at base 0x{:x}, offset 0x{:x}", 4948 base, offset 4949 ), 4950 } 4951 4952 debug!( 4953 "PCI_HP_REG_W: base 0x{:x}, offset 0x{:x}, data {:?}", 4954 base, offset, data 4955 ); 4956 4957 None 4958 } 4959 } 4960 4961 impl Drop for DeviceManager { 4962 fn drop(&mut self) { 4963 // Wake up the DeviceManager threads (mainly virtio device workers), 4964 // to avoid deadlock on waiting for paused/parked worker threads. 4965 if let Err(e) = self.resume() { 4966 error!("Error resuming DeviceManager: {:?}", e); 4967 } 4968 4969 for handle in self.virtio_devices.drain(..) { 4970 handle.virtio_device.lock().unwrap().shutdown(); 4971 } 4972 4973 if let Some(termios) = *self.original_termios_opt.lock().unwrap() { 4974 // SAFETY: FFI call 4975 let _ = unsafe { tcsetattr(stdout().lock().as_raw_fd(), TCSANOW, &termios) }; 4976 } 4977 } 4978 } 4979 4980 #[cfg(test)] 4981 mod tests { 4982 use super::*; 4983 4984 #[test] 4985 fn test_create_mmio_allocators() { 4986 let res = create_mmio_allocators(0x100000, 0x400000, 1, vec![1], 4 << 10); 4987 assert_eq!(res.len(), 1); 4988 assert_eq!( 4989 res[0].lock().unwrap().base(), 4990 vm_memory::GuestAddress(0x100000) 4991 ); 4992 assert_eq!( 4993 res[0].lock().unwrap().end(), 4994 vm_memory::GuestAddress(0x3fffff) 4995 ); 4996 4997 let res = create_mmio_allocators(0x100000, 0x400000, 2, vec![1, 1], 4 << 10); 4998 assert_eq!(res.len(), 2); 4999 assert_eq!( 5000 res[0].lock().unwrap().base(), 5001 vm_memory::GuestAddress(0x100000) 5002 ); 5003 assert_eq!( 5004 res[0].lock().unwrap().end(), 5005 vm_memory::GuestAddress(0x27ffff) 5006 ); 5007 assert_eq!( 5008 res[1].lock().unwrap().base(), 5009 vm_memory::GuestAddress(0x280000) 5010 ); 5011 assert_eq!( 5012 res[1].lock().unwrap().end(), 5013 vm_memory::GuestAddress(0x3fffff) 5014 ); 5015 5016 let res = create_mmio_allocators(0x100000, 0x400000, 2, vec![2, 1], 4 << 10); 5017 assert_eq!(res.len(), 2); 5018 assert_eq!( 5019 res[0].lock().unwrap().base(), 5020 vm_memory::GuestAddress(0x100000) 5021 ); 5022 assert_eq!( 5023 res[0].lock().unwrap().end(), 5024 vm_memory::GuestAddress(0x2fffff) 5025 ); 5026 assert_eq!( 5027 res[1].lock().unwrap().base(), 5028 vm_memory::GuestAddress(0x300000) 5029 ); 5030 assert_eq!( 5031 res[1].lock().unwrap().end(), 5032 vm_memory::GuestAddress(0x3fffff) 5033 ); 5034 } 5035 } 5036