1 // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 // 3 // Portions Copyright 2017 The Chromium OS Authors. All rights reserved. 4 // Use of this source code is governed by a BSD-style license that can be 5 // found in the LICENSE-BSD-3-Clause file. 6 // 7 // Copyright © 2019 Intel Corporation 8 // 9 // SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause 10 // 11 12 use std::collections::{BTreeMap, BTreeSet, HashMap}; 13 use std::fs::{File, OpenOptions}; 14 use std::io::{self, stdout, IsTerminal, Seek, SeekFrom}; 15 use std::num::Wrapping; 16 use std::os::unix::fs::OpenOptionsExt; 17 use std::os::unix::io::{AsRawFd, FromRawFd}; 18 use std::path::PathBuf; 19 use std::result; 20 use std::sync::{Arc, Mutex}; 21 use std::time::Instant; 22 23 use acpi_tables::sdt::GenericAddress; 24 use acpi_tables::{aml, Aml}; 25 use anyhow::anyhow; 26 #[cfg(target_arch = "x86_64")] 27 use arch::layout::{APIC_START, IOAPIC_SIZE, IOAPIC_START}; 28 use arch::{layout, NumaNodes}; 29 #[cfg(target_arch = "aarch64")] 30 use arch::{DeviceType, MmioDeviceInfo}; 31 use block::async_io::DiskFile; 32 use block::fixed_vhd_sync::FixedVhdDiskSync; 33 use block::qcow_sync::QcowDiskSync; 34 use block::raw_async_aio::RawFileDiskAio; 35 use block::raw_sync::RawFileDiskSync; 36 use block::vhdx_sync::VhdxDiskSync; 37 use block::{ 38 block_aio_is_supported, block_io_uring_is_supported, detect_image_type, qcow, vhdx, ImageType, 39 }; 40 #[cfg(feature = "io_uring")] 41 use block::{fixed_vhd_async::FixedVhdDiskAsync, raw_async::RawFileDisk}; 42 #[cfg(target_arch = "x86_64")] 43 use devices::debug_console::DebugConsole; 44 #[cfg(target_arch = "aarch64")] 45 use devices::gic; 46 use devices::interrupt_controller::InterruptController; 47 #[cfg(target_arch = "x86_64")] 48 use devices::ioapic; 49 #[cfg(target_arch = "aarch64")] 50 use devices::legacy::Pl011; 51 #[cfg(feature = "pvmemcontrol")] 52 use devices::pvmemcontrol::{PvmemcontrolBusDevice, PvmemcontrolPciDevice}; 53 use devices::{interrupt_controller, AcpiNotificationFlags}; 54 use hypervisor::IoEventAddress; 55 use libc::{ 56 tcsetattr, termios, MAP_NORESERVE, MAP_PRIVATE, MAP_SHARED, O_TMPFILE, PROT_READ, PROT_WRITE, 57 TCSANOW, 58 }; 59 use pci::{ 60 DeviceRelocation, MmioRegion, PciBarRegionType, PciBdf, PciDevice, VfioDmaMapping, 61 VfioPciDevice, VfioUserDmaMapping, VfioUserPciDevice, VfioUserPciDeviceError, 62 }; 63 use rate_limiter::group::RateLimiterGroup; 64 use seccompiler::SeccompAction; 65 use serde::{Deserialize, Serialize}; 66 use tracer::trace_scoped; 67 use vfio_ioctls::{VfioContainer, VfioDevice, VfioDeviceFd}; 68 use virtio_devices::transport::{VirtioPciDevice, VirtioPciDeviceActivator, VirtioTransport}; 69 use virtio_devices::vhost_user::VhostUserConfig; 70 use virtio_devices::{ 71 AccessPlatformMapping, ActivateError, Endpoint, IommuMapping, VdpaDmaMapping, 72 VirtioMemMappingSource, 73 }; 74 use vm_allocator::{AddressAllocator, SystemAllocator}; 75 use vm_device::dma_mapping::ExternalDmaMapping; 76 use vm_device::interrupt::{ 77 InterruptIndex, InterruptManager, LegacyIrqGroupConfig, MsiIrqGroupConfig, 78 }; 79 use vm_device::{Bus, BusDevice, BusDeviceSync, Resource}; 80 use vm_memory::guest_memory::FileOffset; 81 use vm_memory::{Address, GuestAddress, GuestMemoryRegion, GuestUsize, MmapRegion}; 82 #[cfg(target_arch = "x86_64")] 83 use vm_memory::{GuestAddressSpace, GuestMemory}; 84 use vm_migration::protocol::MemoryRangeTable; 85 use vm_migration::{ 86 snapshot_from_id, state_from_id, Migratable, MigratableError, Pausable, Snapshot, SnapshotData, 87 Snapshottable, Transportable, 88 }; 89 use vm_virtio::{AccessPlatform, VirtioDeviceType}; 90 use vmm_sys_util::eventfd::EventFd; 91 #[cfg(target_arch = "x86_64")] 92 use {devices::debug_console, devices::legacy::Serial}; 93 94 use crate::console_devices::{ConsoleDeviceError, ConsoleInfo, ConsoleOutput}; 95 use crate::cpu::{CpuManager, CPU_MANAGER_ACPI_SIZE}; 96 use crate::device_tree::{DeviceNode, DeviceTree}; 97 use crate::interrupt::{LegacyUserspaceInterruptManager, MsiInterruptManager}; 98 use crate::memory_manager::{Error as MemoryManagerError, MemoryManager, MEMORY_MANAGER_ACPI_SIZE}; 99 use crate::pci_segment::PciSegment; 100 use crate::serial_manager::{Error as SerialManagerError, SerialManager}; 101 use crate::vm_config::{ 102 ConsoleOutputMode, DeviceConfig, DiskConfig, FsConfig, NetConfig, PmemConfig, UserDeviceConfig, 103 VdpaConfig, VhostMode, VmConfig, VsockConfig, DEFAULT_PCI_SEGMENT_APERTURE_WEIGHT, 104 }; 105 use crate::{device_node, GuestRegionMmap, PciDeviceInfo, DEVICE_MANAGER_SNAPSHOT_ID}; 106 107 #[cfg(target_arch = "aarch64")] 108 const MMIO_LEN: u64 = 0x1000; 109 110 // Singleton devices / devices the user cannot name 111 #[cfg(target_arch = "x86_64")] 112 const IOAPIC_DEVICE_NAME: &str = "__ioapic"; 113 const SERIAL_DEVICE_NAME: &str = "__serial"; 114 #[cfg(target_arch = "x86_64")] 115 const DEBUGCON_DEVICE_NAME: &str = "__debug_console"; 116 #[cfg(target_arch = "aarch64")] 117 const GPIO_DEVICE_NAME: &str = "__gpio"; 118 const RNG_DEVICE_NAME: &str = "__rng"; 119 const IOMMU_DEVICE_NAME: &str = "__iommu"; 120 #[cfg(feature = "pvmemcontrol")] 121 const PVMEMCONTROL_DEVICE_NAME: &str = "__pvmemcontrol"; 122 const BALLOON_DEVICE_NAME: &str = "__balloon"; 123 const CONSOLE_DEVICE_NAME: &str = "__console"; 124 const PVPANIC_DEVICE_NAME: &str = "__pvpanic"; 125 126 // Devices that the user may name and for which we generate 127 // identifiers if the user doesn't give one 128 const DISK_DEVICE_NAME_PREFIX: &str = "_disk"; 129 const FS_DEVICE_NAME_PREFIX: &str = "_fs"; 130 const NET_DEVICE_NAME_PREFIX: &str = "_net"; 131 const PMEM_DEVICE_NAME_PREFIX: &str = "_pmem"; 132 const VDPA_DEVICE_NAME_PREFIX: &str = "_vdpa"; 133 const VSOCK_DEVICE_NAME_PREFIX: &str = "_vsock"; 134 const WATCHDOG_DEVICE_NAME: &str = "__watchdog"; 135 const VFIO_DEVICE_NAME_PREFIX: &str = "_vfio"; 136 const VFIO_USER_DEVICE_NAME_PREFIX: &str = "_vfio_user"; 137 const VIRTIO_PCI_DEVICE_NAME_PREFIX: &str = "_virtio-pci"; 138 139 /// Errors associated with device manager 140 #[derive(Debug)] 141 pub enum DeviceManagerError { 142 /// Cannot create EventFd. 143 EventFd(io::Error), 144 145 /// Cannot open disk path 146 Disk(io::Error), 147 148 /// Cannot create vhost-user-net device 149 CreateVhostUserNet(virtio_devices::vhost_user::Error), 150 151 /// Cannot create virtio-blk device 152 CreateVirtioBlock(io::Error), 153 154 /// Cannot create virtio-net device 155 CreateVirtioNet(virtio_devices::net::Error), 156 157 /// Cannot create virtio-console device 158 CreateVirtioConsole(io::Error), 159 160 /// Cannot create virtio-rng device 161 CreateVirtioRng(io::Error), 162 163 /// Cannot create virtio-fs device 164 CreateVirtioFs(virtio_devices::vhost_user::Error), 165 166 /// Virtio-fs device was created without a socket. 167 NoVirtioFsSock, 168 169 /// Cannot create vhost-user-blk device 170 CreateVhostUserBlk(virtio_devices::vhost_user::Error), 171 172 /// Cannot create virtio-pmem device 173 CreateVirtioPmem(io::Error), 174 175 /// Cannot create vDPA device 176 CreateVdpa(virtio_devices::vdpa::Error), 177 178 /// Cannot create virtio-vsock device 179 CreateVirtioVsock(io::Error), 180 181 /// Cannot create tpm device 182 CreateTpmDevice(anyhow::Error), 183 184 /// Failed to convert Path to &str for the vDPA device. 185 CreateVdpaConvertPath, 186 187 /// Failed to convert Path to &str for the virtio-vsock device. 188 CreateVsockConvertPath, 189 190 /// Cannot create virtio-vsock backend 191 CreateVsockBackend(virtio_devices::vsock::VsockUnixError), 192 193 /// Cannot create virtio-iommu device 194 CreateVirtioIommu(io::Error), 195 196 /// Cannot create virtio-balloon device 197 CreateVirtioBalloon(io::Error), 198 199 /// Cannot create pvmemcontrol device 200 #[cfg(feature = "pvmemcontrol")] 201 CreatePvmemcontrol(io::Error), 202 203 /// Cannot create virtio-watchdog device 204 CreateVirtioWatchdog(io::Error), 205 206 /// Failed to parse disk image format 207 DetectImageType(io::Error), 208 209 /// Cannot open qcow disk path 210 QcowDeviceCreate(qcow::Error), 211 212 /// Cannot create serial manager 213 CreateSerialManager(SerialManagerError), 214 215 /// Cannot spawn the serial manager thread 216 SpawnSerialManager(SerialManagerError), 217 218 /// Cannot open tap interface 219 OpenTap(net_util::TapError), 220 221 /// Cannot allocate IRQ. 222 AllocateIrq, 223 224 /// Cannot configure the IRQ. 225 Irq(vmm_sys_util::errno::Error), 226 227 /// Cannot allocate PCI BARs 228 AllocateBars(pci::PciDeviceError), 229 230 /// Could not free the BARs associated with a PCI device. 231 FreePciBars(pci::PciDeviceError), 232 233 /// Cannot register ioevent. 234 RegisterIoevent(anyhow::Error), 235 236 /// Cannot unregister ioevent. 237 UnRegisterIoevent(anyhow::Error), 238 239 /// Cannot create virtio device 240 VirtioDevice(virtio_devices::transport::VirtioPciDeviceError), 241 242 /// Cannot add PCI device 243 AddPciDevice(pci::PciRootError), 244 245 /// Cannot open persistent memory file 246 PmemFileOpen(io::Error), 247 248 /// Cannot set persistent memory file size 249 PmemFileSetLen(io::Error), 250 251 /// Cannot find a memory range for persistent memory 252 PmemRangeAllocation, 253 254 /// Cannot find a memory range for virtio-fs 255 FsRangeAllocation, 256 257 /// Error creating serial output file 258 SerialOutputFileOpen(io::Error), 259 260 #[cfg(target_arch = "x86_64")] 261 /// Error creating debug-console output file 262 DebugconOutputFileOpen(io::Error), 263 264 /// Error creating console output file 265 ConsoleOutputFileOpen(io::Error), 266 267 /// Error creating serial pty 268 SerialPtyOpen(io::Error), 269 270 /// Error creating console pty 271 ConsolePtyOpen(io::Error), 272 273 /// Error creating console pty 274 DebugconPtyOpen(io::Error), 275 276 /// Error setting pty raw mode 277 SetPtyRaw(ConsoleDeviceError), 278 279 /// Error getting pty peer 280 GetPtyPeer(vmm_sys_util::errno::Error), 281 282 /// Cannot create a VFIO device 283 VfioCreate(vfio_ioctls::VfioError), 284 285 /// Cannot create a VFIO PCI device 286 VfioPciCreate(pci::VfioPciError), 287 288 /// Failed to map VFIO MMIO region. 289 VfioMapRegion(pci::VfioPciError), 290 291 /// Failed to DMA map VFIO device. 292 VfioDmaMap(vfio_ioctls::VfioError), 293 294 /// Failed to DMA unmap VFIO device. 295 VfioDmaUnmap(pci::VfioPciError), 296 297 /// Failed to create the passthrough device. 298 CreatePassthroughDevice(anyhow::Error), 299 300 /// Failed to memory map. 301 Mmap(io::Error), 302 303 /// Cannot add legacy device to Bus. 304 BusError(vm_device::BusError), 305 306 /// Failed to allocate IO port 307 AllocateIoPort, 308 309 /// Failed to allocate MMIO address 310 AllocateMmioAddress, 311 312 /// Failed to make hotplug notification 313 HotPlugNotification(io::Error), 314 315 /// Error from a memory manager operation 316 MemoryManager(MemoryManagerError), 317 318 /// Failed to create new interrupt source group. 319 CreateInterruptGroup(io::Error), 320 321 /// Failed to update interrupt source group. 322 UpdateInterruptGroup(io::Error), 323 324 /// Failed to create interrupt controller. 325 CreateInterruptController(interrupt_controller::Error), 326 327 /// Failed to create a new MmapRegion instance. 328 NewMmapRegion(vm_memory::mmap::MmapRegionError), 329 330 /// Failed to clone a File. 331 CloneFile(io::Error), 332 333 /// Failed to create socket file 334 CreateSocketFile(io::Error), 335 336 /// Failed to spawn the network backend 337 SpawnNetBackend(io::Error), 338 339 /// Failed to spawn the block backend 340 SpawnBlockBackend(io::Error), 341 342 /// Missing PCI bus. 343 NoPciBus, 344 345 /// Could not find an available device name. 346 NoAvailableDeviceName, 347 348 /// Missing PCI device. 349 MissingPciDevice, 350 351 /// Failed to remove a PCI device from the PCI bus. 352 RemoveDeviceFromPciBus(pci::PciRootError), 353 354 /// Failed to remove a bus device from the IO bus. 355 RemoveDeviceFromIoBus(vm_device::BusError), 356 357 /// Failed to remove a bus device from the MMIO bus. 358 RemoveDeviceFromMmioBus(vm_device::BusError), 359 360 /// Failed to find the device corresponding to a specific PCI b/d/f. 361 UnknownPciBdf(u32), 362 363 /// Not allowed to remove this type of device from the VM. 364 RemovalNotAllowed(vm_virtio::VirtioDeviceType), 365 366 /// Failed to find device corresponding to the given identifier. 367 UnknownDeviceId(String), 368 369 /// Failed to find an available PCI device ID. 370 NextPciDeviceId(pci::PciRootError), 371 372 /// Could not reserve the PCI device ID. 373 GetPciDeviceId(pci::PciRootError), 374 375 /// Could not give the PCI device ID back. 376 PutPciDeviceId(pci::PciRootError), 377 378 /// No disk path was specified when one was expected 379 NoDiskPath, 380 381 /// Failed to update guest memory for virtio device. 382 UpdateMemoryForVirtioDevice(virtio_devices::Error), 383 384 /// Cannot create virtio-mem device 385 CreateVirtioMem(io::Error), 386 387 /// Cannot find a memory range for virtio-mem memory 388 VirtioMemRangeAllocation, 389 390 /// Failed to update guest memory for VFIO PCI device. 391 UpdateMemoryForVfioPciDevice(vfio_ioctls::VfioError), 392 393 /// Trying to use a directory for pmem but no size specified 394 PmemWithDirectorySizeMissing, 395 396 /// Trying to use a size that is not multiple of 2MiB 397 PmemSizeNotAligned, 398 399 /// Could not find the node in the device tree. 400 MissingNode, 401 402 /// Resource was already found. 403 ResourceAlreadyExists, 404 405 /// Expected resources for virtio-pmem could not be found. 406 MissingVirtioPmemResources, 407 408 /// Missing PCI b/d/f from the DeviceNode. 409 MissingDeviceNodePciBdf, 410 411 /// No support for device passthrough 412 NoDevicePassthroughSupport, 413 414 /// No socket option support for console device 415 NoSocketOptionSupportForConsoleDevice, 416 417 /// Failed to resize virtio-balloon 418 VirtioBalloonResize(virtio_devices::balloon::Error), 419 420 /// Missing virtio-balloon, can't proceed as expected. 421 MissingVirtioBalloon, 422 423 /// Missing virtual IOMMU device 424 MissingVirtualIommu, 425 426 /// Failed to do power button notification 427 PowerButtonNotification(io::Error), 428 429 /// Failed to do AArch64 GPIO power button notification 430 #[cfg(target_arch = "aarch64")] 431 AArch64PowerButtonNotification(devices::legacy::GpioDeviceError), 432 433 /// Failed to set O_DIRECT flag to file descriptor 434 SetDirectIo, 435 436 /// Failed to create FixedVhdDiskAsync 437 CreateFixedVhdDiskAsync(io::Error), 438 439 /// Failed to create FixedVhdDiskSync 440 CreateFixedVhdDiskSync(io::Error), 441 442 /// Failed to create QcowDiskSync 443 CreateQcowDiskSync(qcow::Error), 444 445 /// Failed to create FixedVhdxDiskSync 446 CreateFixedVhdxDiskSync(vhdx::VhdxError), 447 448 /// Failed to add DMA mapping handler to virtio-mem device. 449 AddDmaMappingHandlerVirtioMem(virtio_devices::mem::Error), 450 451 /// Failed to remove DMA mapping handler from virtio-mem device. 452 RemoveDmaMappingHandlerVirtioMem(virtio_devices::mem::Error), 453 454 /// Failed to create vfio-user client 455 VfioUserCreateClient(vfio_user::Error), 456 457 /// Failed to create VFIO user device 458 VfioUserCreate(VfioUserPciDeviceError), 459 460 /// Failed to map region from VFIO user device into guest 461 VfioUserMapRegion(VfioUserPciDeviceError), 462 463 /// Failed to DMA map VFIO user device. 464 VfioUserDmaMap(VfioUserPciDeviceError), 465 466 /// Failed to DMA unmap VFIO user device. 467 VfioUserDmaUnmap(VfioUserPciDeviceError), 468 469 /// Failed to update memory mappings for VFIO user device 470 UpdateMemoryForVfioUserPciDevice(VfioUserPciDeviceError), 471 472 /// Cannot duplicate file descriptor 473 DupFd(vmm_sys_util::errno::Error), 474 475 /// Failed to DMA map virtio device. 476 VirtioDmaMap(std::io::Error), 477 478 /// Failed to DMA unmap virtio device. 479 VirtioDmaUnmap(std::io::Error), 480 481 /// Cannot hotplug device behind vIOMMU 482 InvalidIommuHotplug, 483 484 /// Invalid identifier as it is not unique. 485 IdentifierNotUnique(String), 486 487 /// Invalid identifier 488 InvalidIdentifier(String), 489 490 /// Error activating virtio device 491 VirtioActivate(ActivateError), 492 493 /// Failed retrieving device state from snapshot 494 RestoreGetState(MigratableError), 495 496 /// Cannot create a PvPanic device 497 PvPanicCreate(devices::pvpanic::PvPanicError), 498 499 /// Cannot create a RateLimiterGroup 500 RateLimiterGroupCreate(rate_limiter::group::Error), 501 502 /// Cannot start sigwinch listener 503 StartSigwinchListener(std::io::Error), 504 505 // Invalid console info 506 InvalidConsoleInfo, 507 508 // Invalid console fd 509 InvalidConsoleFd, 510 } 511 512 pub type DeviceManagerResult<T> = result::Result<T, DeviceManagerError>; 513 514 const DEVICE_MANAGER_ACPI_SIZE: usize = 0x10; 515 516 #[derive(Default)] 517 pub struct Console { 518 console_resizer: Option<Arc<virtio_devices::ConsoleResizer>>, 519 } 520 521 impl Console { 522 pub fn need_resize(&self) -> bool { 523 if let Some(_resizer) = self.console_resizer.as_ref() { 524 return true; 525 } 526 527 false 528 } 529 530 pub fn update_console_size(&self) { 531 if let Some(resizer) = self.console_resizer.as_ref() { 532 resizer.update_console_size() 533 } 534 } 535 } 536 537 pub(crate) struct AddressManager { 538 pub(crate) allocator: Arc<Mutex<SystemAllocator>>, 539 pub(crate) io_bus: Arc<Bus>, 540 pub(crate) mmio_bus: Arc<Bus>, 541 pub(crate) vm: Arc<dyn hypervisor::Vm>, 542 device_tree: Arc<Mutex<DeviceTree>>, 543 pci_mmio32_allocators: Vec<Arc<Mutex<AddressAllocator>>>, 544 pci_mmio64_allocators: Vec<Arc<Mutex<AddressAllocator>>>, 545 } 546 547 impl DeviceRelocation for AddressManager { 548 fn move_bar( 549 &self, 550 old_base: u64, 551 new_base: u64, 552 len: u64, 553 pci_dev: &mut dyn PciDevice, 554 region_type: PciBarRegionType, 555 ) -> std::result::Result<(), std::io::Error> { 556 match region_type { 557 PciBarRegionType::IoRegion => { 558 // Update system allocator 559 self.allocator 560 .lock() 561 .unwrap() 562 .free_io_addresses(GuestAddress(old_base), len as GuestUsize); 563 564 self.allocator 565 .lock() 566 .unwrap() 567 .allocate_io_addresses(Some(GuestAddress(new_base)), len as GuestUsize, None) 568 .ok_or_else(|| { 569 io::Error::new(io::ErrorKind::Other, "failed allocating new IO range") 570 })?; 571 572 // Update PIO bus 573 self.io_bus 574 .update_range(old_base, len, new_base, len) 575 .map_err(|e| io::Error::new(io::ErrorKind::Other, e))?; 576 } 577 PciBarRegionType::Memory32BitRegion | PciBarRegionType::Memory64BitRegion => { 578 let allocators = if region_type == PciBarRegionType::Memory32BitRegion { 579 &self.pci_mmio32_allocators 580 } else { 581 &self.pci_mmio64_allocators 582 }; 583 584 // Find the specific allocator that this BAR was allocated from and use it for new one 585 for allocator in allocators { 586 let allocator_base = allocator.lock().unwrap().base(); 587 let allocator_end = allocator.lock().unwrap().end(); 588 589 if old_base >= allocator_base.0 && old_base <= allocator_end.0 { 590 allocator 591 .lock() 592 .unwrap() 593 .free(GuestAddress(old_base), len as GuestUsize); 594 595 allocator 596 .lock() 597 .unwrap() 598 .allocate(Some(GuestAddress(new_base)), len as GuestUsize, Some(len)) 599 .ok_or_else(|| { 600 io::Error::new( 601 io::ErrorKind::Other, 602 "failed allocating new MMIO range", 603 ) 604 })?; 605 606 break; 607 } 608 } 609 610 // Update MMIO bus 611 self.mmio_bus 612 .update_range(old_base, len, new_base, len) 613 .map_err(|e| io::Error::new(io::ErrorKind::Other, e))?; 614 } 615 } 616 617 // Update the device_tree resources associated with the device 618 if let Some(id) = pci_dev.id() { 619 if let Some(node) = self.device_tree.lock().unwrap().get_mut(&id) { 620 let mut resource_updated = false; 621 for resource in node.resources.iter_mut() { 622 if let Resource::PciBar { base, type_, .. } = resource { 623 if PciBarRegionType::from(*type_) == region_type && *base == old_base { 624 *base = new_base; 625 resource_updated = true; 626 break; 627 } 628 } 629 } 630 631 if !resource_updated { 632 return Err(io::Error::new( 633 io::ErrorKind::Other, 634 format!( 635 "Couldn't find a resource with base 0x{old_base:x} for device {id}" 636 ), 637 )); 638 } 639 } else { 640 return Err(io::Error::new( 641 io::ErrorKind::Other, 642 format!("Couldn't find device {id} from device tree"), 643 )); 644 } 645 } 646 647 let any_dev = pci_dev.as_any_mut(); 648 if let Some(virtio_pci_dev) = any_dev.downcast_ref::<VirtioPciDevice>() { 649 let bar_addr = virtio_pci_dev.config_bar_addr(); 650 if bar_addr == new_base { 651 for (event, addr) in virtio_pci_dev.ioeventfds(old_base) { 652 let io_addr = IoEventAddress::Mmio(addr); 653 self.vm.unregister_ioevent(event, &io_addr).map_err(|e| { 654 io::Error::new( 655 io::ErrorKind::Other, 656 format!("failed to unregister ioevent: {e:?}"), 657 ) 658 })?; 659 } 660 for (event, addr) in virtio_pci_dev.ioeventfds(new_base) { 661 let io_addr = IoEventAddress::Mmio(addr); 662 self.vm 663 .register_ioevent(event, &io_addr, None) 664 .map_err(|e| { 665 io::Error::new( 666 io::ErrorKind::Other, 667 format!("failed to register ioevent: {e:?}"), 668 ) 669 })?; 670 } 671 } else { 672 let virtio_dev = virtio_pci_dev.virtio_device(); 673 let mut virtio_dev = virtio_dev.lock().unwrap(); 674 if let Some(mut shm_regions) = virtio_dev.get_shm_regions() { 675 if shm_regions.addr.raw_value() == old_base { 676 let mem_region = self.vm.make_user_memory_region( 677 shm_regions.mem_slot, 678 old_base, 679 shm_regions.len, 680 shm_regions.host_addr, 681 false, 682 false, 683 ); 684 685 self.vm.remove_user_memory_region(mem_region).map_err(|e| { 686 io::Error::new( 687 io::ErrorKind::Other, 688 format!("failed to remove user memory region: {e:?}"), 689 ) 690 })?; 691 692 // Create new mapping by inserting new region to KVM. 693 let mem_region = self.vm.make_user_memory_region( 694 shm_regions.mem_slot, 695 new_base, 696 shm_regions.len, 697 shm_regions.host_addr, 698 false, 699 false, 700 ); 701 702 self.vm.create_user_memory_region(mem_region).map_err(|e| { 703 io::Error::new( 704 io::ErrorKind::Other, 705 format!("failed to create user memory regions: {e:?}"), 706 ) 707 })?; 708 709 // Update shared memory regions to reflect the new mapping. 710 shm_regions.addr = GuestAddress(new_base); 711 virtio_dev.set_shm_regions(shm_regions).map_err(|e| { 712 io::Error::new( 713 io::ErrorKind::Other, 714 format!("failed to update shared memory regions: {e:?}"), 715 ) 716 })?; 717 } 718 } 719 } 720 } 721 722 pci_dev.move_bar(old_base, new_base) 723 } 724 } 725 726 #[derive(Serialize, Deserialize)] 727 struct DeviceManagerState { 728 device_tree: DeviceTree, 729 device_id_cnt: Wrapping<usize>, 730 } 731 732 #[derive(Debug)] 733 pub struct PtyPair { 734 pub main: File, 735 pub path: PathBuf, 736 } 737 738 impl Clone for PtyPair { 739 fn clone(&self) -> Self { 740 PtyPair { 741 main: self.main.try_clone().unwrap(), 742 path: self.path.clone(), 743 } 744 } 745 } 746 747 #[derive(Clone)] 748 pub enum PciDeviceHandle { 749 Vfio(Arc<Mutex<VfioPciDevice>>), 750 Virtio(Arc<Mutex<VirtioPciDevice>>), 751 VfioUser(Arc<Mutex<VfioUserPciDevice>>), 752 } 753 754 #[derive(Clone)] 755 struct MetaVirtioDevice { 756 virtio_device: Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 757 iommu: bool, 758 id: String, 759 pci_segment: u16, 760 dma_handler: Option<Arc<dyn ExternalDmaMapping>>, 761 } 762 763 #[derive(Default)] 764 pub struct AcpiPlatformAddresses { 765 pub pm_timer_address: Option<GenericAddress>, 766 pub reset_reg_address: Option<GenericAddress>, 767 pub sleep_control_reg_address: Option<GenericAddress>, 768 pub sleep_status_reg_address: Option<GenericAddress>, 769 } 770 771 #[cfg(all(feature = "mshv", feature = "sev_snp"))] 772 struct SevSnpPageAccessProxy { 773 vm: Arc<dyn hypervisor::Vm>, 774 } 775 776 #[cfg(all(feature = "mshv", feature = "sev_snp"))] 777 impl std::fmt::Debug for SevSnpPageAccessProxy { 778 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 779 write!(f, "SNP Page access proxy") 780 } 781 } 782 783 #[cfg(all(feature = "mshv", feature = "sev_snp"))] 784 impl SevSnpPageAccessProxy { 785 fn new(vm: Arc<dyn hypervisor::Vm>) -> SevSnpPageAccessProxy { 786 SevSnpPageAccessProxy { vm } 787 } 788 } 789 790 #[cfg(all(feature = "mshv", feature = "sev_snp"))] 791 impl AccessPlatform for SevSnpPageAccessProxy { 792 fn translate_gpa(&self, base: u64, _size: u64) -> std::result::Result<u64, std::io::Error> { 793 Ok(base) 794 } 795 796 fn translate_gva(&self, base: u64, size: u64) -> std::result::Result<u64, std::io::Error> { 797 self.vm 798 .gain_page_access(base, size as u32) 799 .map_err(|e| io::Error::new(io::ErrorKind::Other, e))?; 800 Ok(base) 801 } 802 } 803 804 pub struct DeviceManager { 805 // Manage address space related to devices 806 address_manager: Arc<AddressManager>, 807 808 // Console abstraction 809 console: Arc<Console>, 810 811 // Serial Manager 812 serial_manager: Option<Arc<SerialManager>>, 813 814 // pty foreground status, 815 console_resize_pipe: Option<Arc<File>>, 816 817 // To restore on exit. 818 original_termios_opt: Arc<Mutex<Option<termios>>>, 819 820 // Interrupt controller 821 #[cfg(target_arch = "x86_64")] 822 interrupt_controller: Option<Arc<Mutex<ioapic::Ioapic>>>, 823 #[cfg(target_arch = "aarch64")] 824 interrupt_controller: Option<Arc<Mutex<gic::Gic>>>, 825 826 // Things to be added to the commandline (e.g. aarch64 early console) 827 #[cfg(target_arch = "aarch64")] 828 cmdline_additions: Vec<String>, 829 830 // ACPI GED notification device 831 ged_notification_device: Option<Arc<Mutex<devices::AcpiGedDevice>>>, 832 833 // VM configuration 834 config: Arc<Mutex<VmConfig>>, 835 836 // Memory Manager 837 memory_manager: Arc<Mutex<MemoryManager>>, 838 839 // CPU Manager 840 cpu_manager: Arc<Mutex<CpuManager>>, 841 842 // The virtio devices on the system 843 virtio_devices: Vec<MetaVirtioDevice>, 844 845 // List of bus devices 846 // Let the DeviceManager keep strong references to the BusDevice devices. 847 // This allows the IO and MMIO buses to be provided with Weak references, 848 // which prevents cyclic dependencies. 849 bus_devices: Vec<Arc<dyn BusDeviceSync>>, 850 851 // Counter to keep track of the consumed device IDs. 852 device_id_cnt: Wrapping<usize>, 853 854 pci_segments: Vec<PciSegment>, 855 856 #[cfg_attr(target_arch = "aarch64", allow(dead_code))] 857 // MSI Interrupt Manager 858 msi_interrupt_manager: Arc<dyn InterruptManager<GroupConfig = MsiIrqGroupConfig>>, 859 860 #[cfg_attr(feature = "mshv", allow(dead_code))] 861 // Legacy Interrupt Manager 862 legacy_interrupt_manager: Option<Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>>, 863 864 // Passthrough device handle 865 passthrough_device: Option<VfioDeviceFd>, 866 867 // VFIO container 868 // Only one container can be created, therefore it is stored as part of the 869 // DeviceManager to be reused. 870 vfio_container: Option<Arc<VfioContainer>>, 871 872 // Paravirtualized IOMMU 873 iommu_device: Option<Arc<Mutex<virtio_devices::Iommu>>>, 874 iommu_mapping: Option<Arc<IommuMapping>>, 875 876 // PCI information about devices attached to the paravirtualized IOMMU 877 // It contains the virtual IOMMU PCI BDF along with the list of PCI BDF 878 // representing the devices attached to the virtual IOMMU. This is useful 879 // information for filling the ACPI VIOT table. 880 iommu_attached_devices: Option<(PciBdf, Vec<PciBdf>)>, 881 882 // Tree of devices, representing the dependencies between devices. 883 // Useful for introspection, snapshot and restore. 884 device_tree: Arc<Mutex<DeviceTree>>, 885 886 // Exit event 887 exit_evt: EventFd, 888 reset_evt: EventFd, 889 890 #[cfg(target_arch = "aarch64")] 891 id_to_dev_info: HashMap<(DeviceType, String), MmioDeviceInfo>, 892 893 // seccomp action 894 seccomp_action: SeccompAction, 895 896 // List of guest NUMA nodes. 897 numa_nodes: NumaNodes, 898 899 // Possible handle to the virtio-balloon device 900 balloon: Option<Arc<Mutex<virtio_devices::Balloon>>>, 901 902 // Virtio Device activation EventFd to allow the VMM thread to trigger device 903 // activation and thus start the threads from the VMM thread 904 activate_evt: EventFd, 905 906 acpi_address: GuestAddress, 907 908 selected_segment: usize, 909 910 // Possible handle to the virtio-mem device 911 virtio_mem_devices: Vec<Arc<Mutex<virtio_devices::Mem>>>, 912 913 #[cfg(target_arch = "aarch64")] 914 // GPIO device for AArch64 915 gpio_device: Option<Arc<Mutex<devices::legacy::Gpio>>>, 916 917 #[cfg(feature = "pvmemcontrol")] 918 pvmemcontrol_devices: Option<( 919 Arc<PvmemcontrolBusDevice>, 920 Arc<Mutex<PvmemcontrolPciDevice>>, 921 )>, 922 923 // pvpanic device 924 pvpanic_device: Option<Arc<Mutex<devices::PvPanicDevice>>>, 925 926 // Flag to force setting the iommu on virtio devices 927 force_iommu: bool, 928 929 // io_uring availability if detected 930 io_uring_supported: Option<bool>, 931 932 // aio availability if detected 933 aio_supported: Option<bool>, 934 935 // List of unique identifiers provided at boot through the configuration. 936 boot_id_list: BTreeSet<String>, 937 938 // Start time of the VM 939 timestamp: Instant, 940 941 // Pending activations 942 pending_activations: Arc<Mutex<Vec<VirtioPciDeviceActivator>>>, 943 944 // Addresses for ACPI platform devices e.g. ACPI PM timer, sleep/reset registers 945 acpi_platform_addresses: AcpiPlatformAddresses, 946 947 snapshot: Option<Snapshot>, 948 949 rate_limit_groups: HashMap<String, Arc<RateLimiterGroup>>, 950 951 mmio_regions: Arc<Mutex<Vec<MmioRegion>>>, 952 } 953 954 fn create_mmio_allocators( 955 start: u64, 956 end: u64, 957 num_pci_segments: u16, 958 weights: Vec<u32>, 959 alignment: u64, 960 ) -> Vec<Arc<Mutex<AddressAllocator>>> { 961 let total_weight: u32 = weights.iter().sum(); 962 963 // Start each PCI segment mmio range on an aligned boundary 964 let pci_segment_mmio_size = (end - start + 1) / (alignment * total_weight as u64) * alignment; 965 966 let mut mmio_allocators = vec![]; 967 let mut i = 0; 968 for segment_id in 0..num_pci_segments as u64 { 969 let weight = weights[segment_id as usize] as u64; 970 let mmio_start = start + i * pci_segment_mmio_size; 971 let mmio_size = pci_segment_mmio_size * weight; 972 let allocator = Arc::new(Mutex::new( 973 AddressAllocator::new(GuestAddress(mmio_start), mmio_size).unwrap(), 974 )); 975 mmio_allocators.push(allocator); 976 i += weight; 977 } 978 979 mmio_allocators 980 } 981 982 impl DeviceManager { 983 #[allow(clippy::too_many_arguments)] 984 pub fn new( 985 io_bus: Arc<Bus>, 986 mmio_bus: Arc<Bus>, 987 vm: Arc<dyn hypervisor::Vm>, 988 config: Arc<Mutex<VmConfig>>, 989 memory_manager: Arc<Mutex<MemoryManager>>, 990 cpu_manager: Arc<Mutex<CpuManager>>, 991 exit_evt: EventFd, 992 reset_evt: EventFd, 993 seccomp_action: SeccompAction, 994 numa_nodes: NumaNodes, 995 activate_evt: &EventFd, 996 force_iommu: bool, 997 boot_id_list: BTreeSet<String>, 998 timestamp: Instant, 999 snapshot: Option<Snapshot>, 1000 dynamic: bool, 1001 ) -> DeviceManagerResult<Arc<Mutex<Self>>> { 1002 trace_scoped!("DeviceManager::new"); 1003 1004 let (device_tree, device_id_cnt) = if let Some(snapshot) = snapshot.as_ref() { 1005 let state: DeviceManagerState = snapshot.to_state().unwrap(); 1006 ( 1007 Arc::new(Mutex::new(state.device_tree.clone())), 1008 state.device_id_cnt, 1009 ) 1010 } else { 1011 (Arc::new(Mutex::new(DeviceTree::new())), Wrapping(0)) 1012 }; 1013 1014 let num_pci_segments = 1015 if let Some(platform_config) = config.lock().unwrap().platform.as_ref() { 1016 platform_config.num_pci_segments 1017 } else { 1018 1 1019 }; 1020 1021 let mut mmio32_aperture_weights: Vec<u32> = 1022 std::iter::repeat(DEFAULT_PCI_SEGMENT_APERTURE_WEIGHT) 1023 .take(num_pci_segments.into()) 1024 .collect(); 1025 if let Some(pci_segments) = &config.lock().unwrap().pci_segments { 1026 for pci_segment in pci_segments.iter() { 1027 mmio32_aperture_weights[pci_segment.pci_segment as usize] = 1028 pci_segment.mmio32_aperture_weight 1029 } 1030 } 1031 1032 let start_of_mmio32_area = layout::MEM_32BIT_DEVICES_START.0; 1033 let end_of_mmio32_area = layout::MEM_32BIT_DEVICES_START.0 + layout::MEM_32BIT_DEVICES_SIZE; 1034 let pci_mmio32_allocators = create_mmio_allocators( 1035 start_of_mmio32_area, 1036 end_of_mmio32_area, 1037 num_pci_segments, 1038 mmio32_aperture_weights, 1039 4 << 10, 1040 ); 1041 1042 let mut mmio64_aperture_weights: Vec<u32> = 1043 std::iter::repeat(DEFAULT_PCI_SEGMENT_APERTURE_WEIGHT) 1044 .take(num_pci_segments.into()) 1045 .collect(); 1046 if let Some(pci_segments) = &config.lock().unwrap().pci_segments { 1047 for pci_segment in pci_segments.iter() { 1048 mmio64_aperture_weights[pci_segment.pci_segment as usize] = 1049 pci_segment.mmio64_aperture_weight 1050 } 1051 } 1052 1053 let start_of_mmio64_area = memory_manager.lock().unwrap().start_of_device_area().0; 1054 let end_of_mmio64_area = memory_manager.lock().unwrap().end_of_device_area().0; 1055 let pci_mmio64_allocators = create_mmio_allocators( 1056 start_of_mmio64_area, 1057 end_of_mmio64_area, 1058 num_pci_segments, 1059 mmio64_aperture_weights, 1060 4 << 30, 1061 ); 1062 1063 let address_manager = Arc::new(AddressManager { 1064 allocator: memory_manager.lock().unwrap().allocator(), 1065 io_bus, 1066 mmio_bus, 1067 vm: vm.clone(), 1068 device_tree: Arc::clone(&device_tree), 1069 pci_mmio32_allocators, 1070 pci_mmio64_allocators, 1071 }); 1072 1073 // First we create the MSI interrupt manager, the legacy one is created 1074 // later, after the IOAPIC device creation. 1075 // The reason we create the MSI one first is because the IOAPIC needs it, 1076 // and then the legacy interrupt manager needs an IOAPIC. So we're 1077 // handling a linear dependency chain: 1078 // msi_interrupt_manager <- IOAPIC <- legacy_interrupt_manager. 1079 let msi_interrupt_manager: Arc<dyn InterruptManager<GroupConfig = MsiIrqGroupConfig>> = 1080 Arc::new(MsiInterruptManager::new( 1081 Arc::clone(&address_manager.allocator), 1082 vm, 1083 )); 1084 1085 let acpi_address = address_manager 1086 .allocator 1087 .lock() 1088 .unwrap() 1089 .allocate_platform_mmio_addresses(None, DEVICE_MANAGER_ACPI_SIZE as u64, None) 1090 .ok_or(DeviceManagerError::AllocateIoPort)?; 1091 1092 let mut pci_irq_slots = [0; 32]; 1093 PciSegment::reserve_legacy_interrupts_for_pci_devices( 1094 &address_manager, 1095 &mut pci_irq_slots, 1096 )?; 1097 1098 let mut pci_segments = vec![PciSegment::new_default_segment( 1099 &address_manager, 1100 Arc::clone(&address_manager.pci_mmio32_allocators[0]), 1101 Arc::clone(&address_manager.pci_mmio64_allocators[0]), 1102 &pci_irq_slots, 1103 )?]; 1104 1105 for i in 1..num_pci_segments as usize { 1106 pci_segments.push(PciSegment::new( 1107 i as u16, 1108 numa_node_id_from_pci_segment_id(&numa_nodes, i as u16), 1109 &address_manager, 1110 Arc::clone(&address_manager.pci_mmio32_allocators[i]), 1111 Arc::clone(&address_manager.pci_mmio64_allocators[i]), 1112 &pci_irq_slots, 1113 )?); 1114 } 1115 1116 if dynamic { 1117 let acpi_address = address_manager 1118 .allocator 1119 .lock() 1120 .unwrap() 1121 .allocate_platform_mmio_addresses(None, CPU_MANAGER_ACPI_SIZE as u64, None) 1122 .ok_or(DeviceManagerError::AllocateMmioAddress)?; 1123 1124 address_manager 1125 .mmio_bus 1126 .insert( 1127 cpu_manager.clone(), 1128 acpi_address.0, 1129 CPU_MANAGER_ACPI_SIZE as u64, 1130 ) 1131 .map_err(DeviceManagerError::BusError)?; 1132 1133 cpu_manager.lock().unwrap().set_acpi_address(acpi_address); 1134 } 1135 1136 let mut rate_limit_groups = HashMap::<String, Arc<RateLimiterGroup>>::new(); 1137 if let Some(rate_limit_groups_cfg) = config.lock().unwrap().rate_limit_groups.as_ref() { 1138 for rate_limit_group_cfg in rate_limit_groups_cfg { 1139 let rate_limit_cfg = rate_limit_group_cfg.rate_limiter_config; 1140 let bw = rate_limit_cfg.bandwidth.unwrap_or_default(); 1141 let ops = rate_limit_cfg.ops.unwrap_or_default(); 1142 let mut rate_limit_group = RateLimiterGroup::new( 1143 &rate_limit_group_cfg.id, 1144 bw.size, 1145 bw.one_time_burst.unwrap_or(0), 1146 bw.refill_time, 1147 ops.size, 1148 ops.one_time_burst.unwrap_or(0), 1149 ops.refill_time, 1150 ) 1151 .map_err(DeviceManagerError::RateLimiterGroupCreate)?; 1152 1153 let exit_evt = exit_evt.try_clone().map_err(DeviceManagerError::EventFd)?; 1154 1155 rate_limit_group.start_thread(exit_evt).unwrap(); 1156 rate_limit_groups 1157 .insert(rate_limit_group_cfg.id.clone(), Arc::new(rate_limit_group)); 1158 } 1159 } 1160 1161 let device_manager = DeviceManager { 1162 address_manager: Arc::clone(&address_manager), 1163 console: Arc::new(Console::default()), 1164 interrupt_controller: None, 1165 #[cfg(target_arch = "aarch64")] 1166 cmdline_additions: Vec::new(), 1167 ged_notification_device: None, 1168 config, 1169 memory_manager, 1170 cpu_manager, 1171 virtio_devices: Vec::new(), 1172 bus_devices: Vec::new(), 1173 device_id_cnt, 1174 msi_interrupt_manager, 1175 legacy_interrupt_manager: None, 1176 passthrough_device: None, 1177 vfio_container: None, 1178 iommu_device: None, 1179 iommu_mapping: None, 1180 iommu_attached_devices: None, 1181 pci_segments, 1182 device_tree, 1183 exit_evt, 1184 reset_evt, 1185 #[cfg(target_arch = "aarch64")] 1186 id_to_dev_info: HashMap::new(), 1187 seccomp_action, 1188 numa_nodes, 1189 balloon: None, 1190 activate_evt: activate_evt 1191 .try_clone() 1192 .map_err(DeviceManagerError::EventFd)?, 1193 acpi_address, 1194 selected_segment: 0, 1195 serial_manager: None, 1196 console_resize_pipe: None, 1197 original_termios_opt: Arc::new(Mutex::new(None)), 1198 virtio_mem_devices: Vec::new(), 1199 #[cfg(target_arch = "aarch64")] 1200 gpio_device: None, 1201 #[cfg(feature = "pvmemcontrol")] 1202 pvmemcontrol_devices: None, 1203 pvpanic_device: None, 1204 force_iommu, 1205 io_uring_supported: None, 1206 aio_supported: None, 1207 boot_id_list, 1208 timestamp, 1209 pending_activations: Arc::new(Mutex::new(Vec::default())), 1210 acpi_platform_addresses: AcpiPlatformAddresses::default(), 1211 snapshot, 1212 rate_limit_groups, 1213 mmio_regions: Arc::new(Mutex::new(Vec::new())), 1214 }; 1215 1216 let device_manager = Arc::new(Mutex::new(device_manager)); 1217 1218 address_manager 1219 .mmio_bus 1220 .insert( 1221 Arc::clone(&device_manager) as Arc<dyn BusDeviceSync>, 1222 acpi_address.0, 1223 DEVICE_MANAGER_ACPI_SIZE as u64, 1224 ) 1225 .map_err(DeviceManagerError::BusError)?; 1226 1227 Ok(device_manager) 1228 } 1229 1230 pub fn console_resize_pipe(&self) -> Option<Arc<File>> { 1231 self.console_resize_pipe.clone() 1232 } 1233 1234 pub fn create_devices( 1235 &mut self, 1236 console_info: Option<ConsoleInfo>, 1237 console_resize_pipe: Option<Arc<File>>, 1238 original_termios_opt: Arc<Mutex<Option<termios>>>, 1239 ) -> DeviceManagerResult<()> { 1240 trace_scoped!("create_devices"); 1241 1242 let mut virtio_devices: Vec<MetaVirtioDevice> = Vec::new(); 1243 1244 let interrupt_controller = self.add_interrupt_controller()?; 1245 1246 self.cpu_manager 1247 .lock() 1248 .unwrap() 1249 .set_interrupt_controller(interrupt_controller.clone()); 1250 1251 // Now we can create the legacy interrupt manager, which needs the freshly 1252 // formed IOAPIC device. 1253 let legacy_interrupt_manager: Arc< 1254 dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>, 1255 > = Arc::new(LegacyUserspaceInterruptManager::new(Arc::clone( 1256 &interrupt_controller, 1257 ))); 1258 1259 { 1260 if let Some(acpi_address) = self.memory_manager.lock().unwrap().acpi_address() { 1261 self.address_manager 1262 .mmio_bus 1263 .insert( 1264 Arc::clone(&self.memory_manager) as Arc<dyn BusDeviceSync>, 1265 acpi_address.0, 1266 MEMORY_MANAGER_ACPI_SIZE as u64, 1267 ) 1268 .map_err(DeviceManagerError::BusError)?; 1269 } 1270 } 1271 1272 #[cfg(target_arch = "x86_64")] 1273 self.add_legacy_devices( 1274 self.reset_evt 1275 .try_clone() 1276 .map_err(DeviceManagerError::EventFd)?, 1277 )?; 1278 1279 #[cfg(target_arch = "aarch64")] 1280 self.add_legacy_devices(&legacy_interrupt_manager)?; 1281 1282 { 1283 self.ged_notification_device = self.add_acpi_devices( 1284 &legacy_interrupt_manager, 1285 self.reset_evt 1286 .try_clone() 1287 .map_err(DeviceManagerError::EventFd)?, 1288 self.exit_evt 1289 .try_clone() 1290 .map_err(DeviceManagerError::EventFd)?, 1291 )?; 1292 } 1293 1294 self.original_termios_opt = original_termios_opt; 1295 1296 self.console = self.add_console_devices( 1297 &legacy_interrupt_manager, 1298 &mut virtio_devices, 1299 console_info, 1300 console_resize_pipe, 1301 )?; 1302 1303 if let Some(tpm) = self.config.clone().lock().unwrap().tpm.as_ref() { 1304 let tpm_dev = self.add_tpm_device(tpm.socket.clone())?; 1305 self.bus_devices 1306 .push(Arc::clone(&tpm_dev) as Arc<dyn BusDeviceSync>) 1307 } 1308 self.legacy_interrupt_manager = Some(legacy_interrupt_manager); 1309 1310 virtio_devices.append(&mut self.make_virtio_devices()?); 1311 1312 self.add_pci_devices(virtio_devices.clone())?; 1313 1314 self.virtio_devices = virtio_devices; 1315 1316 // Add pvmemcontrol if required 1317 #[cfg(feature = "pvmemcontrol")] 1318 { 1319 if self.config.lock().unwrap().pvmemcontrol.is_some() { 1320 let (pvmemcontrol_bus_device, pvmemcontrol_pci_device) = 1321 self.make_pvmemcontrol_device()?; 1322 self.pvmemcontrol_devices = 1323 Some((pvmemcontrol_bus_device, pvmemcontrol_pci_device)); 1324 } 1325 } 1326 1327 if self.config.clone().lock().unwrap().pvpanic { 1328 self.pvpanic_device = self.add_pvpanic_device()?; 1329 } 1330 1331 Ok(()) 1332 } 1333 1334 fn state(&self) -> DeviceManagerState { 1335 DeviceManagerState { 1336 device_tree: self.device_tree.lock().unwrap().clone(), 1337 device_id_cnt: self.device_id_cnt, 1338 } 1339 } 1340 1341 fn get_msi_iova_space(&mut self) -> (u64, u64) { 1342 #[cfg(target_arch = "aarch64")] 1343 { 1344 let vcpus = self.config.lock().unwrap().cpus.boot_vcpus; 1345 let vgic_config = gic::Gic::create_default_config(vcpus.into()); 1346 ( 1347 vgic_config.msi_addr, 1348 vgic_config.msi_addr + vgic_config.msi_size - 1, 1349 ) 1350 } 1351 #[cfg(target_arch = "x86_64")] 1352 (0xfee0_0000, 0xfeef_ffff) 1353 } 1354 1355 #[cfg(target_arch = "aarch64")] 1356 /// Gets the information of the devices registered up to some point in time. 1357 pub fn get_device_info(&self) -> &HashMap<(DeviceType, String), MmioDeviceInfo> { 1358 &self.id_to_dev_info 1359 } 1360 1361 #[allow(unused_variables)] 1362 fn add_pci_devices( 1363 &mut self, 1364 virtio_devices: Vec<MetaVirtioDevice>, 1365 ) -> DeviceManagerResult<()> { 1366 let iommu_id = String::from(IOMMU_DEVICE_NAME); 1367 1368 let iommu_device = if self.config.lock().unwrap().iommu { 1369 let (device, mapping) = virtio_devices::Iommu::new( 1370 iommu_id.clone(), 1371 self.seccomp_action.clone(), 1372 self.exit_evt 1373 .try_clone() 1374 .map_err(DeviceManagerError::EventFd)?, 1375 self.get_msi_iova_space(), 1376 state_from_id(self.snapshot.as_ref(), iommu_id.as_str()) 1377 .map_err(DeviceManagerError::RestoreGetState)?, 1378 ) 1379 .map_err(DeviceManagerError::CreateVirtioIommu)?; 1380 let device = Arc::new(Mutex::new(device)); 1381 self.iommu_device = Some(Arc::clone(&device)); 1382 self.iommu_mapping = Some(mapping); 1383 1384 // Fill the device tree with a new node. In case of restore, we 1385 // know there is nothing to do, so we can simply override the 1386 // existing entry. 1387 self.device_tree 1388 .lock() 1389 .unwrap() 1390 .insert(iommu_id.clone(), device_node!(iommu_id, device)); 1391 1392 Some(device) 1393 } else { 1394 None 1395 }; 1396 1397 let mut iommu_attached_devices = Vec::new(); 1398 { 1399 for handle in virtio_devices { 1400 let mapping: Option<Arc<IommuMapping>> = if handle.iommu { 1401 self.iommu_mapping.clone() 1402 } else { 1403 None 1404 }; 1405 1406 let dev_id = self.add_virtio_pci_device( 1407 handle.virtio_device, 1408 &mapping, 1409 handle.id, 1410 handle.pci_segment, 1411 handle.dma_handler, 1412 )?; 1413 1414 if handle.iommu { 1415 iommu_attached_devices.push(dev_id); 1416 } 1417 } 1418 1419 let mut vfio_iommu_device_ids = self.add_vfio_devices()?; 1420 iommu_attached_devices.append(&mut vfio_iommu_device_ids); 1421 1422 let mut vfio_user_iommu_device_ids = self.add_user_devices()?; 1423 iommu_attached_devices.append(&mut vfio_user_iommu_device_ids); 1424 1425 // Add all devices from forced iommu segments 1426 if let Some(platform_config) = self.config.lock().unwrap().platform.as_ref() { 1427 if let Some(iommu_segments) = platform_config.iommu_segments.as_ref() { 1428 for segment in iommu_segments { 1429 for device in 0..32 { 1430 let bdf = PciBdf::new(*segment, 0, device, 0); 1431 if !iommu_attached_devices.contains(&bdf) { 1432 iommu_attached_devices.push(bdf); 1433 } 1434 } 1435 } 1436 } 1437 } 1438 1439 if let Some(iommu_device) = iommu_device { 1440 let dev_id = self.add_virtio_pci_device(iommu_device, &None, iommu_id, 0, None)?; 1441 self.iommu_attached_devices = Some((dev_id, iommu_attached_devices)); 1442 } 1443 } 1444 1445 for segment in &self.pci_segments { 1446 #[cfg(target_arch = "x86_64")] 1447 if let Some(pci_config_io) = segment.pci_config_io.as_ref() { 1448 self.bus_devices 1449 .push(Arc::clone(pci_config_io) as Arc<dyn BusDeviceSync>); 1450 } 1451 1452 self.bus_devices 1453 .push(Arc::clone(&segment.pci_config_mmio) as Arc<dyn BusDeviceSync>); 1454 } 1455 1456 Ok(()) 1457 } 1458 1459 #[cfg(target_arch = "aarch64")] 1460 fn add_interrupt_controller( 1461 &mut self, 1462 ) -> DeviceManagerResult<Arc<Mutex<dyn InterruptController>>> { 1463 let interrupt_controller: Arc<Mutex<gic::Gic>> = Arc::new(Mutex::new( 1464 gic::Gic::new( 1465 self.config.lock().unwrap().cpus.boot_vcpus, 1466 Arc::clone(&self.msi_interrupt_manager), 1467 self.address_manager.vm.clone(), 1468 ) 1469 .map_err(DeviceManagerError::CreateInterruptController)?, 1470 )); 1471 1472 self.interrupt_controller = Some(interrupt_controller.clone()); 1473 1474 // Restore the vGic if this is in the process of restoration 1475 let id = String::from(gic::GIC_SNAPSHOT_ID); 1476 if let Some(vgic_snapshot) = snapshot_from_id(self.snapshot.as_ref(), &id) { 1477 // PMU support is optional. Nothing should be impacted if the PMU initialization failed. 1478 if self 1479 .cpu_manager 1480 .lock() 1481 .unwrap() 1482 .init_pmu(arch::aarch64::fdt::AARCH64_PMU_IRQ + 16) 1483 .is_err() 1484 { 1485 info!("Failed to initialize PMU"); 1486 } 1487 1488 let vgic_state = vgic_snapshot 1489 .to_state() 1490 .map_err(DeviceManagerError::RestoreGetState)?; 1491 let saved_vcpu_states = self.cpu_manager.lock().unwrap().get_saved_states(); 1492 interrupt_controller 1493 .lock() 1494 .unwrap() 1495 .restore_vgic(vgic_state, &saved_vcpu_states) 1496 .unwrap(); 1497 } 1498 1499 self.device_tree 1500 .lock() 1501 .unwrap() 1502 .insert(id.clone(), device_node!(id, interrupt_controller)); 1503 1504 Ok(interrupt_controller) 1505 } 1506 1507 #[cfg(target_arch = "aarch64")] 1508 pub fn get_interrupt_controller(&mut self) -> Option<&Arc<Mutex<gic::Gic>>> { 1509 self.interrupt_controller.as_ref() 1510 } 1511 1512 #[cfg(target_arch = "x86_64")] 1513 fn add_interrupt_controller( 1514 &mut self, 1515 ) -> DeviceManagerResult<Arc<Mutex<dyn InterruptController>>> { 1516 let id = String::from(IOAPIC_DEVICE_NAME); 1517 1518 // Create IOAPIC 1519 let interrupt_controller = Arc::new(Mutex::new( 1520 ioapic::Ioapic::new( 1521 id.clone(), 1522 APIC_START, 1523 Arc::clone(&self.msi_interrupt_manager), 1524 state_from_id(self.snapshot.as_ref(), id.as_str()) 1525 .map_err(DeviceManagerError::RestoreGetState)?, 1526 ) 1527 .map_err(DeviceManagerError::CreateInterruptController)?, 1528 )); 1529 1530 self.interrupt_controller = Some(interrupt_controller.clone()); 1531 1532 self.address_manager 1533 .mmio_bus 1534 .insert(interrupt_controller.clone(), IOAPIC_START.0, IOAPIC_SIZE) 1535 .map_err(DeviceManagerError::BusError)?; 1536 1537 self.bus_devices 1538 .push(Arc::clone(&interrupt_controller) as Arc<dyn BusDeviceSync>); 1539 1540 // Fill the device tree with a new node. In case of restore, we 1541 // know there is nothing to do, so we can simply override the 1542 // existing entry. 1543 self.device_tree 1544 .lock() 1545 .unwrap() 1546 .insert(id.clone(), device_node!(id, interrupt_controller)); 1547 1548 Ok(interrupt_controller) 1549 } 1550 1551 fn add_acpi_devices( 1552 &mut self, 1553 interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>, 1554 reset_evt: EventFd, 1555 exit_evt: EventFd, 1556 ) -> DeviceManagerResult<Option<Arc<Mutex<devices::AcpiGedDevice>>>> { 1557 let vcpus_kill_signalled = self 1558 .cpu_manager 1559 .lock() 1560 .unwrap() 1561 .vcpus_kill_signalled() 1562 .clone(); 1563 let shutdown_device = Arc::new(Mutex::new(devices::AcpiShutdownDevice::new( 1564 exit_evt, 1565 reset_evt, 1566 vcpus_kill_signalled, 1567 ))); 1568 1569 self.bus_devices 1570 .push(Arc::clone(&shutdown_device) as Arc<dyn BusDeviceSync>); 1571 1572 #[cfg(target_arch = "x86_64")] 1573 { 1574 let shutdown_pio_address: u16 = 0x600; 1575 1576 self.address_manager 1577 .allocator 1578 .lock() 1579 .unwrap() 1580 .allocate_io_addresses(Some(GuestAddress(shutdown_pio_address.into())), 0x8, None) 1581 .ok_or(DeviceManagerError::AllocateIoPort)?; 1582 1583 self.address_manager 1584 .io_bus 1585 .insert(shutdown_device, shutdown_pio_address.into(), 0x4) 1586 .map_err(DeviceManagerError::BusError)?; 1587 1588 self.acpi_platform_addresses.sleep_control_reg_address = 1589 Some(GenericAddress::io_port_address::<u8>(shutdown_pio_address)); 1590 self.acpi_platform_addresses.sleep_status_reg_address = 1591 Some(GenericAddress::io_port_address::<u8>(shutdown_pio_address)); 1592 self.acpi_platform_addresses.reset_reg_address = 1593 Some(GenericAddress::io_port_address::<u8>(shutdown_pio_address)); 1594 } 1595 1596 let ged_irq = self 1597 .address_manager 1598 .allocator 1599 .lock() 1600 .unwrap() 1601 .allocate_irq() 1602 .unwrap(); 1603 let interrupt_group = interrupt_manager 1604 .create_group(LegacyIrqGroupConfig { 1605 irq: ged_irq as InterruptIndex, 1606 }) 1607 .map_err(DeviceManagerError::CreateInterruptGroup)?; 1608 let ged_address = self 1609 .address_manager 1610 .allocator 1611 .lock() 1612 .unwrap() 1613 .allocate_platform_mmio_addresses( 1614 None, 1615 devices::acpi::GED_DEVICE_ACPI_SIZE as u64, 1616 None, 1617 ) 1618 .ok_or(DeviceManagerError::AllocateMmioAddress)?; 1619 let ged_device = Arc::new(Mutex::new(devices::AcpiGedDevice::new( 1620 interrupt_group, 1621 ged_irq, 1622 ged_address, 1623 ))); 1624 self.address_manager 1625 .mmio_bus 1626 .insert( 1627 ged_device.clone(), 1628 ged_address.0, 1629 devices::acpi::GED_DEVICE_ACPI_SIZE as u64, 1630 ) 1631 .map_err(DeviceManagerError::BusError)?; 1632 self.bus_devices 1633 .push(Arc::clone(&ged_device) as Arc<dyn BusDeviceSync>); 1634 1635 let pm_timer_device = Arc::new(Mutex::new(devices::AcpiPmTimerDevice::new())); 1636 1637 self.bus_devices 1638 .push(Arc::clone(&pm_timer_device) as Arc<dyn BusDeviceSync>); 1639 1640 #[cfg(target_arch = "x86_64")] 1641 { 1642 let pm_timer_pio_address: u16 = 0x608; 1643 1644 self.address_manager 1645 .allocator 1646 .lock() 1647 .unwrap() 1648 .allocate_io_addresses(Some(GuestAddress(pm_timer_pio_address.into())), 0x4, None) 1649 .ok_or(DeviceManagerError::AllocateIoPort)?; 1650 1651 self.address_manager 1652 .io_bus 1653 .insert(pm_timer_device, pm_timer_pio_address.into(), 0x4) 1654 .map_err(DeviceManagerError::BusError)?; 1655 1656 self.acpi_platform_addresses.pm_timer_address = 1657 Some(GenericAddress::io_port_address::<u32>(pm_timer_pio_address)); 1658 } 1659 1660 Ok(Some(ged_device)) 1661 } 1662 1663 #[cfg(target_arch = "x86_64")] 1664 fn add_legacy_devices(&mut self, reset_evt: EventFd) -> DeviceManagerResult<()> { 1665 let vcpus_kill_signalled = self 1666 .cpu_manager 1667 .lock() 1668 .unwrap() 1669 .vcpus_kill_signalled() 1670 .clone(); 1671 // Add a shutdown device (i8042) 1672 let i8042 = Arc::new(Mutex::new(devices::legacy::I8042Device::new( 1673 reset_evt.try_clone().unwrap(), 1674 vcpus_kill_signalled.clone(), 1675 ))); 1676 1677 self.bus_devices 1678 .push(Arc::clone(&i8042) as Arc<dyn BusDeviceSync>); 1679 1680 self.address_manager 1681 .io_bus 1682 .insert(i8042, 0x61, 0x4) 1683 .map_err(DeviceManagerError::BusError)?; 1684 { 1685 // Add a CMOS emulated device 1686 let mem_size = self 1687 .memory_manager 1688 .lock() 1689 .unwrap() 1690 .guest_memory() 1691 .memory() 1692 .last_addr() 1693 .0 1694 + 1; 1695 let mem_below_4g = std::cmp::min(arch::layout::MEM_32BIT_RESERVED_START.0, mem_size); 1696 let mem_above_4g = mem_size.saturating_sub(arch::layout::RAM_64BIT_START.0); 1697 1698 let cmos = Arc::new(Mutex::new(devices::legacy::Cmos::new( 1699 mem_below_4g, 1700 mem_above_4g, 1701 reset_evt, 1702 Some(vcpus_kill_signalled), 1703 ))); 1704 1705 self.bus_devices 1706 .push(Arc::clone(&cmos) as Arc<dyn BusDeviceSync>); 1707 1708 self.address_manager 1709 .io_bus 1710 .insert(cmos, 0x70, 0x2) 1711 .map_err(DeviceManagerError::BusError)?; 1712 1713 let fwdebug = Arc::new(Mutex::new(devices::legacy::FwDebugDevice::new())); 1714 1715 self.bus_devices 1716 .push(Arc::clone(&fwdebug) as Arc<dyn BusDeviceSync>); 1717 1718 self.address_manager 1719 .io_bus 1720 .insert(fwdebug, 0x402, 0x1) 1721 .map_err(DeviceManagerError::BusError)?; 1722 } 1723 1724 // 0x80 debug port 1725 let debug_port = Arc::new(Mutex::new(devices::legacy::DebugPort::new(self.timestamp))); 1726 self.bus_devices 1727 .push(Arc::clone(&debug_port) as Arc<dyn BusDeviceSync>); 1728 self.address_manager 1729 .io_bus 1730 .insert(debug_port, 0x80, 0x1) 1731 .map_err(DeviceManagerError::BusError)?; 1732 1733 Ok(()) 1734 } 1735 1736 #[cfg(target_arch = "aarch64")] 1737 fn add_legacy_devices( 1738 &mut self, 1739 interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>, 1740 ) -> DeviceManagerResult<()> { 1741 // Add a RTC device 1742 let rtc_irq = self 1743 .address_manager 1744 .allocator 1745 .lock() 1746 .unwrap() 1747 .allocate_irq() 1748 .unwrap(); 1749 1750 let interrupt_group = interrupt_manager 1751 .create_group(LegacyIrqGroupConfig { 1752 irq: rtc_irq as InterruptIndex, 1753 }) 1754 .map_err(DeviceManagerError::CreateInterruptGroup)?; 1755 1756 let rtc_device = Arc::new(Mutex::new(devices::legacy::Rtc::new(interrupt_group))); 1757 1758 self.bus_devices 1759 .push(Arc::clone(&rtc_device) as Arc<dyn BusDeviceSync>); 1760 1761 let addr = arch::layout::LEGACY_RTC_MAPPED_IO_START; 1762 1763 self.address_manager 1764 .mmio_bus 1765 .insert(rtc_device, addr.0, MMIO_LEN) 1766 .map_err(DeviceManagerError::BusError)?; 1767 1768 self.id_to_dev_info.insert( 1769 (DeviceType::Rtc, "rtc".to_string()), 1770 MmioDeviceInfo { 1771 addr: addr.0, 1772 len: MMIO_LEN, 1773 irq: rtc_irq, 1774 }, 1775 ); 1776 1777 // Add a GPIO device 1778 let id = String::from(GPIO_DEVICE_NAME); 1779 let gpio_irq = self 1780 .address_manager 1781 .allocator 1782 .lock() 1783 .unwrap() 1784 .allocate_irq() 1785 .unwrap(); 1786 1787 let interrupt_group = interrupt_manager 1788 .create_group(LegacyIrqGroupConfig { 1789 irq: gpio_irq as InterruptIndex, 1790 }) 1791 .map_err(DeviceManagerError::CreateInterruptGroup)?; 1792 1793 let gpio_device = Arc::new(Mutex::new(devices::legacy::Gpio::new( 1794 id.clone(), 1795 interrupt_group, 1796 state_from_id(self.snapshot.as_ref(), id.as_str()) 1797 .map_err(DeviceManagerError::RestoreGetState)?, 1798 ))); 1799 1800 self.bus_devices 1801 .push(Arc::clone(&gpio_device) as Arc<dyn BusDeviceSync>); 1802 1803 let addr = arch::layout::LEGACY_GPIO_MAPPED_IO_START; 1804 1805 self.address_manager 1806 .mmio_bus 1807 .insert(gpio_device.clone(), addr.0, MMIO_LEN) 1808 .map_err(DeviceManagerError::BusError)?; 1809 1810 self.gpio_device = Some(gpio_device.clone()); 1811 1812 self.id_to_dev_info.insert( 1813 (DeviceType::Gpio, "gpio".to_string()), 1814 MmioDeviceInfo { 1815 addr: addr.0, 1816 len: MMIO_LEN, 1817 irq: gpio_irq, 1818 }, 1819 ); 1820 1821 self.device_tree 1822 .lock() 1823 .unwrap() 1824 .insert(id.clone(), device_node!(id, gpio_device)); 1825 1826 Ok(()) 1827 } 1828 1829 #[cfg(target_arch = "x86_64")] 1830 fn add_debug_console_device( 1831 &mut self, 1832 debug_console_writer: Box<dyn io::Write + Send>, 1833 ) -> DeviceManagerResult<Arc<Mutex<DebugConsole>>> { 1834 let id = String::from(DEBUGCON_DEVICE_NAME); 1835 let debug_console = Arc::new(Mutex::new(DebugConsole::new( 1836 id.clone(), 1837 debug_console_writer, 1838 ))); 1839 1840 let port = self 1841 .config 1842 .lock() 1843 .unwrap() 1844 .debug_console 1845 .clone() 1846 .iobase 1847 .map(|port| port as u64) 1848 .unwrap_or(debug_console::DEFAULT_PORT); 1849 1850 self.bus_devices 1851 .push(Arc::clone(&debug_console) as Arc<dyn BusDeviceSync>); 1852 1853 self.address_manager 1854 .allocator 1855 .lock() 1856 .unwrap() 1857 .allocate_io_addresses(Some(GuestAddress(port)), 0x1, None) 1858 .ok_or(DeviceManagerError::AllocateIoPort)?; 1859 1860 self.address_manager 1861 .io_bus 1862 .insert(debug_console.clone(), port, 0x1) 1863 .map_err(DeviceManagerError::BusError)?; 1864 1865 // Fill the device tree with a new node. In case of restore, we 1866 // know there is nothing to do, so we can simply override the 1867 // existing entry. 1868 self.device_tree 1869 .lock() 1870 .unwrap() 1871 .insert(id.clone(), device_node!(id, debug_console)); 1872 1873 Ok(debug_console) 1874 } 1875 1876 #[cfg(target_arch = "x86_64")] 1877 fn add_serial_device( 1878 &mut self, 1879 interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>, 1880 serial_writer: Option<Box<dyn io::Write + Send>>, 1881 ) -> DeviceManagerResult<Arc<Mutex<Serial>>> { 1882 // Serial is tied to IRQ #4 1883 let serial_irq = 4; 1884 1885 let id = String::from(SERIAL_DEVICE_NAME); 1886 1887 let interrupt_group = interrupt_manager 1888 .create_group(LegacyIrqGroupConfig { 1889 irq: serial_irq as InterruptIndex, 1890 }) 1891 .map_err(DeviceManagerError::CreateInterruptGroup)?; 1892 1893 let serial = Arc::new(Mutex::new(Serial::new( 1894 id.clone(), 1895 interrupt_group, 1896 serial_writer, 1897 state_from_id(self.snapshot.as_ref(), id.as_str()) 1898 .map_err(DeviceManagerError::RestoreGetState)?, 1899 ))); 1900 1901 self.bus_devices 1902 .push(Arc::clone(&serial) as Arc<dyn BusDeviceSync>); 1903 1904 self.address_manager 1905 .allocator 1906 .lock() 1907 .unwrap() 1908 .allocate_io_addresses(Some(GuestAddress(0x3f8)), 0x8, None) 1909 .ok_or(DeviceManagerError::AllocateIoPort)?; 1910 1911 self.address_manager 1912 .io_bus 1913 .insert(serial.clone(), 0x3f8, 0x8) 1914 .map_err(DeviceManagerError::BusError)?; 1915 1916 // Fill the device tree with a new node. In case of restore, we 1917 // know there is nothing to do, so we can simply override the 1918 // existing entry. 1919 self.device_tree 1920 .lock() 1921 .unwrap() 1922 .insert(id.clone(), device_node!(id, serial)); 1923 1924 Ok(serial) 1925 } 1926 1927 #[cfg(target_arch = "aarch64")] 1928 fn add_serial_device( 1929 &mut self, 1930 interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>, 1931 serial_writer: Option<Box<dyn io::Write + Send>>, 1932 ) -> DeviceManagerResult<Arc<Mutex<Pl011>>> { 1933 let id = String::from(SERIAL_DEVICE_NAME); 1934 1935 let serial_irq = self 1936 .address_manager 1937 .allocator 1938 .lock() 1939 .unwrap() 1940 .allocate_irq() 1941 .unwrap(); 1942 1943 let interrupt_group = interrupt_manager 1944 .create_group(LegacyIrqGroupConfig { 1945 irq: serial_irq as InterruptIndex, 1946 }) 1947 .map_err(DeviceManagerError::CreateInterruptGroup)?; 1948 1949 let serial = Arc::new(Mutex::new(devices::legacy::Pl011::new( 1950 id.clone(), 1951 interrupt_group, 1952 serial_writer, 1953 self.timestamp, 1954 state_from_id(self.snapshot.as_ref(), id.as_str()) 1955 .map_err(DeviceManagerError::RestoreGetState)?, 1956 ))); 1957 1958 self.bus_devices 1959 .push(Arc::clone(&serial) as Arc<dyn BusDeviceSync>); 1960 1961 let addr = arch::layout::LEGACY_SERIAL_MAPPED_IO_START; 1962 1963 self.address_manager 1964 .mmio_bus 1965 .insert(serial.clone(), addr.0, MMIO_LEN) 1966 .map_err(DeviceManagerError::BusError)?; 1967 1968 self.id_to_dev_info.insert( 1969 (DeviceType::Serial, DeviceType::Serial.to_string()), 1970 MmioDeviceInfo { 1971 addr: addr.0, 1972 len: MMIO_LEN, 1973 irq: serial_irq, 1974 }, 1975 ); 1976 1977 self.cmdline_additions 1978 .push(format!("earlycon=pl011,mmio,0x{:08x}", addr.0)); 1979 1980 // Fill the device tree with a new node. In case of restore, we 1981 // know there is nothing to do, so we can simply override the 1982 // existing entry. 1983 self.device_tree 1984 .lock() 1985 .unwrap() 1986 .insert(id.clone(), device_node!(id, serial)); 1987 1988 Ok(serial) 1989 } 1990 1991 fn add_virtio_console_device( 1992 &mut self, 1993 virtio_devices: &mut Vec<MetaVirtioDevice>, 1994 console_fd: ConsoleOutput, 1995 resize_pipe: Option<Arc<File>>, 1996 ) -> DeviceManagerResult<Option<Arc<virtio_devices::ConsoleResizer>>> { 1997 let console_config = self.config.lock().unwrap().console.clone(); 1998 let endpoint = match console_fd { 1999 ConsoleOutput::File(file) => Endpoint::File(file), 2000 ConsoleOutput::Pty(file) => { 2001 self.console_resize_pipe = resize_pipe; 2002 Endpoint::PtyPair(Arc::new(file.try_clone().unwrap()), file) 2003 } 2004 ConsoleOutput::Tty(stdout) => { 2005 if stdout.is_terminal() { 2006 self.console_resize_pipe = resize_pipe; 2007 } 2008 2009 // If an interactive TTY then we can accept input 2010 // SAFETY: FFI call. Trivially safe. 2011 if unsafe { libc::isatty(libc::STDIN_FILENO) == 1 } { 2012 // SAFETY: FFI call to dup. Trivially safe. 2013 let stdin = unsafe { libc::dup(libc::STDIN_FILENO) }; 2014 if stdin == -1 { 2015 return vmm_sys_util::errno::errno_result() 2016 .map_err(DeviceManagerError::DupFd); 2017 } 2018 // SAFETY: stdin is valid and owned solely by us. 2019 let stdin = unsafe { File::from_raw_fd(stdin) }; 2020 Endpoint::FilePair(stdout, Arc::new(stdin)) 2021 } else { 2022 Endpoint::File(stdout) 2023 } 2024 } 2025 ConsoleOutput::Socket(_) => { 2026 return Err(DeviceManagerError::NoSocketOptionSupportForConsoleDevice); 2027 } 2028 ConsoleOutput::Null => Endpoint::Null, 2029 ConsoleOutput::Off => return Ok(None), 2030 }; 2031 let id = String::from(CONSOLE_DEVICE_NAME); 2032 2033 let (virtio_console_device, console_resizer) = virtio_devices::Console::new( 2034 id.clone(), 2035 endpoint, 2036 self.console_resize_pipe 2037 .as_ref() 2038 .map(|p| p.try_clone().unwrap()), 2039 self.force_iommu | console_config.iommu, 2040 self.seccomp_action.clone(), 2041 self.exit_evt 2042 .try_clone() 2043 .map_err(DeviceManagerError::EventFd)?, 2044 state_from_id(self.snapshot.as_ref(), id.as_str()) 2045 .map_err(DeviceManagerError::RestoreGetState)?, 2046 ) 2047 .map_err(DeviceManagerError::CreateVirtioConsole)?; 2048 let virtio_console_device = Arc::new(Mutex::new(virtio_console_device)); 2049 virtio_devices.push(MetaVirtioDevice { 2050 virtio_device: Arc::clone(&virtio_console_device) 2051 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2052 iommu: console_config.iommu, 2053 id: id.clone(), 2054 pci_segment: 0, 2055 dma_handler: None, 2056 }); 2057 2058 // Fill the device tree with a new node. In case of restore, we 2059 // know there is nothing to do, so we can simply override the 2060 // existing entry. 2061 self.device_tree 2062 .lock() 2063 .unwrap() 2064 .insert(id.clone(), device_node!(id, virtio_console_device)); 2065 2066 // Only provide a resizer (for SIGWINCH handling) if the console is attached to the TTY 2067 Ok(if matches!(console_config.mode, ConsoleOutputMode::Tty) { 2068 Some(console_resizer) 2069 } else { 2070 None 2071 }) 2072 } 2073 2074 /// Adds all devices that behave like a console with respect to the VM 2075 /// configuration. This includes: 2076 /// - debug-console 2077 /// - serial-console 2078 /// - virtio-console 2079 fn add_console_devices( 2080 &mut self, 2081 interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>, 2082 virtio_devices: &mut Vec<MetaVirtioDevice>, 2083 console_info: Option<ConsoleInfo>, 2084 console_resize_pipe: Option<Arc<File>>, 2085 ) -> DeviceManagerResult<Arc<Console>> { 2086 let serial_config = self.config.lock().unwrap().serial.clone(); 2087 if console_info.is_none() { 2088 return Err(DeviceManagerError::InvalidConsoleInfo); 2089 } 2090 2091 // SAFETY: console_info is Some, so it's safe to unwrap. 2092 let console_info = console_info.unwrap(); 2093 2094 let serial_writer: Option<Box<dyn io::Write + Send>> = match console_info.serial_main_fd { 2095 ConsoleOutput::File(ref file) | ConsoleOutput::Tty(ref file) => { 2096 Some(Box::new(Arc::clone(file))) 2097 } 2098 ConsoleOutput::Off 2099 | ConsoleOutput::Null 2100 | ConsoleOutput::Pty(_) 2101 | ConsoleOutput::Socket(_) => None, 2102 }; 2103 2104 if !matches!(console_info.serial_main_fd, ConsoleOutput::Off) { 2105 let serial = self.add_serial_device(interrupt_manager, serial_writer)?; 2106 self.serial_manager = match console_info.serial_main_fd { 2107 ConsoleOutput::Pty(_) | ConsoleOutput::Tty(_) | ConsoleOutput::Socket(_) => { 2108 let serial_manager = SerialManager::new( 2109 serial, 2110 console_info.serial_main_fd, 2111 serial_config.socket, 2112 ) 2113 .map_err(DeviceManagerError::CreateSerialManager)?; 2114 if let Some(mut serial_manager) = serial_manager { 2115 serial_manager 2116 .start_thread( 2117 self.exit_evt 2118 .try_clone() 2119 .map_err(DeviceManagerError::EventFd)?, 2120 ) 2121 .map_err(DeviceManagerError::SpawnSerialManager)?; 2122 Some(Arc::new(serial_manager)) 2123 } else { 2124 None 2125 } 2126 } 2127 _ => None, 2128 }; 2129 } 2130 2131 #[cfg(target_arch = "x86_64")] 2132 { 2133 let debug_console_writer: Option<Box<dyn io::Write + Send>> = 2134 match console_info.debug_main_fd { 2135 ConsoleOutput::File(file) | ConsoleOutput::Tty(file) => Some(Box::new(file)), 2136 ConsoleOutput::Off 2137 | ConsoleOutput::Null 2138 | ConsoleOutput::Pty(_) 2139 | ConsoleOutput::Socket(_) => None, 2140 }; 2141 if let Some(writer) = debug_console_writer { 2142 let _ = self.add_debug_console_device(writer)?; 2143 } 2144 } 2145 2146 let console_resizer = self.add_virtio_console_device( 2147 virtio_devices, 2148 console_info.console_main_fd, 2149 console_resize_pipe, 2150 )?; 2151 2152 Ok(Arc::new(Console { console_resizer })) 2153 } 2154 2155 fn add_tpm_device( 2156 &mut self, 2157 tpm_path: PathBuf, 2158 ) -> DeviceManagerResult<Arc<Mutex<devices::tpm::Tpm>>> { 2159 // Create TPM Device 2160 let tpm = devices::tpm::Tpm::new(tpm_path.to_str().unwrap().to_string()).map_err(|e| { 2161 DeviceManagerError::CreateTpmDevice(anyhow!("Failed to create TPM Device : {:?}", e)) 2162 })?; 2163 let tpm = Arc::new(Mutex::new(tpm)); 2164 2165 // Add TPM Device to mmio 2166 self.address_manager 2167 .mmio_bus 2168 .insert( 2169 tpm.clone(), 2170 arch::layout::TPM_START.0, 2171 arch::layout::TPM_SIZE, 2172 ) 2173 .map_err(DeviceManagerError::BusError)?; 2174 2175 Ok(tpm) 2176 } 2177 2178 fn make_virtio_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2179 let mut devices: Vec<MetaVirtioDevice> = Vec::new(); 2180 2181 // Create "standard" virtio devices (net/block/rng) 2182 devices.append(&mut self.make_virtio_block_devices()?); 2183 devices.append(&mut self.make_virtio_net_devices()?); 2184 devices.append(&mut self.make_virtio_rng_devices()?); 2185 2186 // Add virtio-fs if required 2187 devices.append(&mut self.make_virtio_fs_devices()?); 2188 2189 // Add virtio-pmem if required 2190 devices.append(&mut self.make_virtio_pmem_devices()?); 2191 2192 // Add virtio-vsock if required 2193 devices.append(&mut self.make_virtio_vsock_devices()?); 2194 2195 devices.append(&mut self.make_virtio_mem_devices()?); 2196 2197 // Add virtio-balloon if required 2198 devices.append(&mut self.make_virtio_balloon_devices()?); 2199 2200 // Add virtio-watchdog device 2201 devices.append(&mut self.make_virtio_watchdog_devices()?); 2202 2203 // Add vDPA devices if required 2204 devices.append(&mut self.make_vdpa_devices()?); 2205 2206 Ok(devices) 2207 } 2208 2209 // Cache whether aio is supported to avoid checking for very block device 2210 fn aio_is_supported(&mut self) -> bool { 2211 if let Some(supported) = self.aio_supported { 2212 return supported; 2213 } 2214 2215 let supported = block_aio_is_supported(); 2216 self.aio_supported = Some(supported); 2217 supported 2218 } 2219 2220 // Cache whether io_uring is supported to avoid probing for very block device 2221 fn io_uring_is_supported(&mut self) -> bool { 2222 if let Some(supported) = self.io_uring_supported { 2223 return supported; 2224 } 2225 2226 let supported = block_io_uring_is_supported(); 2227 self.io_uring_supported = Some(supported); 2228 supported 2229 } 2230 2231 fn make_virtio_block_device( 2232 &mut self, 2233 disk_cfg: &mut DiskConfig, 2234 ) -> DeviceManagerResult<MetaVirtioDevice> { 2235 let id = if let Some(id) = &disk_cfg.id { 2236 id.clone() 2237 } else { 2238 let id = self.next_device_name(DISK_DEVICE_NAME_PREFIX)?; 2239 disk_cfg.id = Some(id.clone()); 2240 id 2241 }; 2242 2243 info!("Creating virtio-block device: {:?}", disk_cfg); 2244 2245 let (virtio_device, migratable_device) = if disk_cfg.vhost_user { 2246 let socket = disk_cfg.vhost_socket.as_ref().unwrap().clone(); 2247 let vu_cfg = VhostUserConfig { 2248 socket, 2249 num_queues: disk_cfg.num_queues, 2250 queue_size: disk_cfg.queue_size, 2251 }; 2252 let vhost_user_block = Arc::new(Mutex::new( 2253 match virtio_devices::vhost_user::Blk::new( 2254 id.clone(), 2255 vu_cfg, 2256 self.seccomp_action.clone(), 2257 self.exit_evt 2258 .try_clone() 2259 .map_err(DeviceManagerError::EventFd)?, 2260 self.force_iommu, 2261 state_from_id(self.snapshot.as_ref(), id.as_str()) 2262 .map_err(DeviceManagerError::RestoreGetState)?, 2263 ) { 2264 Ok(vub_device) => vub_device, 2265 Err(e) => { 2266 return Err(DeviceManagerError::CreateVhostUserBlk(e)); 2267 } 2268 }, 2269 )); 2270 2271 ( 2272 Arc::clone(&vhost_user_block) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2273 vhost_user_block as Arc<Mutex<dyn Migratable>>, 2274 ) 2275 } else { 2276 let mut options = OpenOptions::new(); 2277 options.read(true); 2278 options.write(!disk_cfg.readonly); 2279 if disk_cfg.direct { 2280 options.custom_flags(libc::O_DIRECT); 2281 } 2282 // Open block device path 2283 let mut file: File = options 2284 .open( 2285 disk_cfg 2286 .path 2287 .as_ref() 2288 .ok_or(DeviceManagerError::NoDiskPath)? 2289 .clone(), 2290 ) 2291 .map_err(DeviceManagerError::Disk)?; 2292 let image_type = 2293 detect_image_type(&mut file).map_err(DeviceManagerError::DetectImageType)?; 2294 2295 let image = match image_type { 2296 ImageType::FixedVhd => { 2297 // Use asynchronous backend relying on io_uring if the 2298 // syscalls are supported. 2299 if cfg!(feature = "io_uring") 2300 && !disk_cfg.disable_io_uring 2301 && self.io_uring_is_supported() 2302 { 2303 info!("Using asynchronous fixed VHD disk file (io_uring)"); 2304 2305 #[cfg(not(feature = "io_uring"))] 2306 unreachable!("Checked in if statement above"); 2307 #[cfg(feature = "io_uring")] 2308 { 2309 Box::new( 2310 FixedVhdDiskAsync::new(file) 2311 .map_err(DeviceManagerError::CreateFixedVhdDiskAsync)?, 2312 ) as Box<dyn DiskFile> 2313 } 2314 } else { 2315 info!("Using synchronous fixed VHD disk file"); 2316 Box::new( 2317 FixedVhdDiskSync::new(file) 2318 .map_err(DeviceManagerError::CreateFixedVhdDiskSync)?, 2319 ) as Box<dyn DiskFile> 2320 } 2321 } 2322 ImageType::Raw => { 2323 // Use asynchronous backend relying on io_uring if the 2324 // syscalls are supported. 2325 if cfg!(feature = "io_uring") 2326 && !disk_cfg.disable_io_uring 2327 && self.io_uring_is_supported() 2328 { 2329 info!("Using asynchronous RAW disk file (io_uring)"); 2330 2331 #[cfg(not(feature = "io_uring"))] 2332 unreachable!("Checked in if statement above"); 2333 #[cfg(feature = "io_uring")] 2334 { 2335 Box::new(RawFileDisk::new(file)) as Box<dyn DiskFile> 2336 } 2337 } else if !disk_cfg.disable_aio && self.aio_is_supported() { 2338 info!("Using asynchronous RAW disk file (aio)"); 2339 Box::new(RawFileDiskAio::new(file)) as Box<dyn DiskFile> 2340 } else { 2341 info!("Using synchronous RAW disk file"); 2342 Box::new(RawFileDiskSync::new(file)) as Box<dyn DiskFile> 2343 } 2344 } 2345 ImageType::Qcow2 => { 2346 info!("Using synchronous QCOW disk file"); 2347 Box::new( 2348 QcowDiskSync::new(file, disk_cfg.direct) 2349 .map_err(DeviceManagerError::CreateQcowDiskSync)?, 2350 ) as Box<dyn DiskFile> 2351 } 2352 ImageType::Vhdx => { 2353 info!("Using synchronous VHDX disk file"); 2354 Box::new( 2355 VhdxDiskSync::new(file) 2356 .map_err(DeviceManagerError::CreateFixedVhdxDiskSync)?, 2357 ) as Box<dyn DiskFile> 2358 } 2359 }; 2360 2361 let rate_limit_group = 2362 if let Some(rate_limiter_cfg) = disk_cfg.rate_limiter_config.as_ref() { 2363 // Create an anonymous RateLimiterGroup that is dropped when the Disk 2364 // is dropped. 2365 let bw = rate_limiter_cfg.bandwidth.unwrap_or_default(); 2366 let ops = rate_limiter_cfg.ops.unwrap_or_default(); 2367 let mut rate_limit_group = RateLimiterGroup::new( 2368 disk_cfg.id.as_ref().unwrap(), 2369 bw.size, 2370 bw.one_time_burst.unwrap_or(0), 2371 bw.refill_time, 2372 ops.size, 2373 ops.one_time_burst.unwrap_or(0), 2374 ops.refill_time, 2375 ) 2376 .map_err(DeviceManagerError::RateLimiterGroupCreate)?; 2377 2378 rate_limit_group 2379 .start_thread( 2380 self.exit_evt 2381 .try_clone() 2382 .map_err(DeviceManagerError::EventFd)?, 2383 ) 2384 .unwrap(); 2385 2386 Some(Arc::new(rate_limit_group)) 2387 } else if let Some(rate_limit_group) = disk_cfg.rate_limit_group.as_ref() { 2388 self.rate_limit_groups.get(rate_limit_group).cloned() 2389 } else { 2390 None 2391 }; 2392 2393 let queue_affinity = if let Some(queue_affinity) = disk_cfg.queue_affinity.as_ref() { 2394 queue_affinity 2395 .iter() 2396 .map(|a| (a.queue_index, a.host_cpus.clone())) 2397 .collect() 2398 } else { 2399 BTreeMap::new() 2400 }; 2401 2402 let virtio_block = Arc::new(Mutex::new( 2403 virtio_devices::Block::new( 2404 id.clone(), 2405 image, 2406 disk_cfg 2407 .path 2408 .as_ref() 2409 .ok_or(DeviceManagerError::NoDiskPath)? 2410 .clone(), 2411 disk_cfg.readonly, 2412 self.force_iommu | disk_cfg.iommu, 2413 disk_cfg.num_queues, 2414 disk_cfg.queue_size, 2415 disk_cfg.serial.clone(), 2416 self.seccomp_action.clone(), 2417 rate_limit_group, 2418 self.exit_evt 2419 .try_clone() 2420 .map_err(DeviceManagerError::EventFd)?, 2421 state_from_id(self.snapshot.as_ref(), id.as_str()) 2422 .map_err(DeviceManagerError::RestoreGetState)?, 2423 queue_affinity, 2424 ) 2425 .map_err(DeviceManagerError::CreateVirtioBlock)?, 2426 )); 2427 2428 ( 2429 Arc::clone(&virtio_block) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2430 virtio_block as Arc<Mutex<dyn Migratable>>, 2431 ) 2432 }; 2433 2434 // Fill the device tree with a new node. In case of restore, we 2435 // know there is nothing to do, so we can simply override the 2436 // existing entry. 2437 self.device_tree 2438 .lock() 2439 .unwrap() 2440 .insert(id.clone(), device_node!(id, migratable_device)); 2441 2442 Ok(MetaVirtioDevice { 2443 virtio_device, 2444 iommu: disk_cfg.iommu, 2445 id, 2446 pci_segment: disk_cfg.pci_segment, 2447 dma_handler: None, 2448 }) 2449 } 2450 2451 fn make_virtio_block_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2452 let mut devices = Vec::new(); 2453 2454 let mut block_devices = self.config.lock().unwrap().disks.clone(); 2455 if let Some(disk_list_cfg) = &mut block_devices { 2456 for disk_cfg in disk_list_cfg.iter_mut() { 2457 devices.push(self.make_virtio_block_device(disk_cfg)?); 2458 } 2459 } 2460 self.config.lock().unwrap().disks = block_devices; 2461 2462 Ok(devices) 2463 } 2464 2465 fn make_virtio_net_device( 2466 &mut self, 2467 net_cfg: &mut NetConfig, 2468 ) -> DeviceManagerResult<MetaVirtioDevice> { 2469 let id = if let Some(id) = &net_cfg.id { 2470 id.clone() 2471 } else { 2472 let id = self.next_device_name(NET_DEVICE_NAME_PREFIX)?; 2473 net_cfg.id = Some(id.clone()); 2474 id 2475 }; 2476 info!("Creating virtio-net device: {:?}", net_cfg); 2477 2478 let (virtio_device, migratable_device) = if net_cfg.vhost_user { 2479 let socket = net_cfg.vhost_socket.as_ref().unwrap().clone(); 2480 let vu_cfg = VhostUserConfig { 2481 socket, 2482 num_queues: net_cfg.num_queues, 2483 queue_size: net_cfg.queue_size, 2484 }; 2485 let server = match net_cfg.vhost_mode { 2486 VhostMode::Client => false, 2487 VhostMode::Server => true, 2488 }; 2489 let vhost_user_net = Arc::new(Mutex::new( 2490 match virtio_devices::vhost_user::Net::new( 2491 id.clone(), 2492 net_cfg.mac, 2493 net_cfg.mtu, 2494 vu_cfg, 2495 server, 2496 self.seccomp_action.clone(), 2497 self.exit_evt 2498 .try_clone() 2499 .map_err(DeviceManagerError::EventFd)?, 2500 self.force_iommu, 2501 state_from_id(self.snapshot.as_ref(), id.as_str()) 2502 .map_err(DeviceManagerError::RestoreGetState)?, 2503 net_cfg.offload_tso, 2504 net_cfg.offload_ufo, 2505 net_cfg.offload_csum, 2506 ) { 2507 Ok(vun_device) => vun_device, 2508 Err(e) => { 2509 return Err(DeviceManagerError::CreateVhostUserNet(e)); 2510 } 2511 }, 2512 )); 2513 2514 ( 2515 Arc::clone(&vhost_user_net) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2516 vhost_user_net as Arc<Mutex<dyn Migratable>>, 2517 ) 2518 } else { 2519 let state = state_from_id(self.snapshot.as_ref(), id.as_str()) 2520 .map_err(DeviceManagerError::RestoreGetState)?; 2521 let virtio_net = if let Some(ref tap_if_name) = net_cfg.tap { 2522 Arc::new(Mutex::new( 2523 virtio_devices::Net::new( 2524 id.clone(), 2525 Some(tap_if_name), 2526 Some(net_cfg.ip), 2527 Some(net_cfg.mask), 2528 Some(net_cfg.mac), 2529 &mut net_cfg.host_mac, 2530 net_cfg.mtu, 2531 self.force_iommu | net_cfg.iommu, 2532 net_cfg.num_queues, 2533 net_cfg.queue_size, 2534 self.seccomp_action.clone(), 2535 net_cfg.rate_limiter_config, 2536 self.exit_evt 2537 .try_clone() 2538 .map_err(DeviceManagerError::EventFd)?, 2539 state, 2540 net_cfg.offload_tso, 2541 net_cfg.offload_ufo, 2542 net_cfg.offload_csum, 2543 ) 2544 .map_err(DeviceManagerError::CreateVirtioNet)?, 2545 )) 2546 } else if let Some(fds) = &net_cfg.fds { 2547 let net = virtio_devices::Net::from_tap_fds( 2548 id.clone(), 2549 fds, 2550 Some(net_cfg.mac), 2551 net_cfg.mtu, 2552 self.force_iommu | net_cfg.iommu, 2553 net_cfg.queue_size, 2554 self.seccomp_action.clone(), 2555 net_cfg.rate_limiter_config, 2556 self.exit_evt 2557 .try_clone() 2558 .map_err(DeviceManagerError::EventFd)?, 2559 state, 2560 net_cfg.offload_tso, 2561 net_cfg.offload_ufo, 2562 net_cfg.offload_csum, 2563 ) 2564 .map_err(DeviceManagerError::CreateVirtioNet)?; 2565 2566 // SAFETY: 'fds' are valid because TAP devices are created successfully 2567 unsafe { 2568 self.config.lock().unwrap().add_preserved_fds(fds.clone()); 2569 } 2570 2571 Arc::new(Mutex::new(net)) 2572 } else { 2573 Arc::new(Mutex::new( 2574 virtio_devices::Net::new( 2575 id.clone(), 2576 None, 2577 Some(net_cfg.ip), 2578 Some(net_cfg.mask), 2579 Some(net_cfg.mac), 2580 &mut net_cfg.host_mac, 2581 net_cfg.mtu, 2582 self.force_iommu | net_cfg.iommu, 2583 net_cfg.num_queues, 2584 net_cfg.queue_size, 2585 self.seccomp_action.clone(), 2586 net_cfg.rate_limiter_config, 2587 self.exit_evt 2588 .try_clone() 2589 .map_err(DeviceManagerError::EventFd)?, 2590 state, 2591 net_cfg.offload_tso, 2592 net_cfg.offload_ufo, 2593 net_cfg.offload_csum, 2594 ) 2595 .map_err(DeviceManagerError::CreateVirtioNet)?, 2596 )) 2597 }; 2598 2599 ( 2600 Arc::clone(&virtio_net) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2601 virtio_net as Arc<Mutex<dyn Migratable>>, 2602 ) 2603 }; 2604 2605 // Fill the device tree with a new node. In case of restore, we 2606 // know there is nothing to do, so we can simply override the 2607 // existing entry. 2608 self.device_tree 2609 .lock() 2610 .unwrap() 2611 .insert(id.clone(), device_node!(id, migratable_device)); 2612 2613 Ok(MetaVirtioDevice { 2614 virtio_device, 2615 iommu: net_cfg.iommu, 2616 id, 2617 pci_segment: net_cfg.pci_segment, 2618 dma_handler: None, 2619 }) 2620 } 2621 2622 /// Add virto-net and vhost-user-net devices 2623 fn make_virtio_net_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2624 let mut devices = Vec::new(); 2625 let mut net_devices = self.config.lock().unwrap().net.clone(); 2626 if let Some(net_list_cfg) = &mut net_devices { 2627 for net_cfg in net_list_cfg.iter_mut() { 2628 devices.push(self.make_virtio_net_device(net_cfg)?); 2629 } 2630 } 2631 self.config.lock().unwrap().net = net_devices; 2632 2633 Ok(devices) 2634 } 2635 2636 fn make_virtio_rng_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2637 let mut devices = Vec::new(); 2638 2639 // Add virtio-rng if required 2640 let rng_config = self.config.lock().unwrap().rng.clone(); 2641 if let Some(rng_path) = rng_config.src.to_str() { 2642 info!("Creating virtio-rng device: {:?}", rng_config); 2643 let id = String::from(RNG_DEVICE_NAME); 2644 2645 let virtio_rng_device = Arc::new(Mutex::new( 2646 virtio_devices::Rng::new( 2647 id.clone(), 2648 rng_path, 2649 self.force_iommu | rng_config.iommu, 2650 self.seccomp_action.clone(), 2651 self.exit_evt 2652 .try_clone() 2653 .map_err(DeviceManagerError::EventFd)?, 2654 state_from_id(self.snapshot.as_ref(), id.as_str()) 2655 .map_err(DeviceManagerError::RestoreGetState)?, 2656 ) 2657 .map_err(DeviceManagerError::CreateVirtioRng)?, 2658 )); 2659 devices.push(MetaVirtioDevice { 2660 virtio_device: Arc::clone(&virtio_rng_device) 2661 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2662 iommu: rng_config.iommu, 2663 id: id.clone(), 2664 pci_segment: 0, 2665 dma_handler: None, 2666 }); 2667 2668 // Fill the device tree with a new node. In case of restore, we 2669 // know there is nothing to do, so we can simply override the 2670 // existing entry. 2671 self.device_tree 2672 .lock() 2673 .unwrap() 2674 .insert(id.clone(), device_node!(id, virtio_rng_device)); 2675 } 2676 2677 Ok(devices) 2678 } 2679 2680 fn make_virtio_fs_device( 2681 &mut self, 2682 fs_cfg: &mut FsConfig, 2683 ) -> DeviceManagerResult<MetaVirtioDevice> { 2684 let id = if let Some(id) = &fs_cfg.id { 2685 id.clone() 2686 } else { 2687 let id = self.next_device_name(FS_DEVICE_NAME_PREFIX)?; 2688 fs_cfg.id = Some(id.clone()); 2689 id 2690 }; 2691 2692 info!("Creating virtio-fs device: {:?}", fs_cfg); 2693 2694 let mut node = device_node!(id); 2695 2696 if let Some(fs_socket) = fs_cfg.socket.to_str() { 2697 let virtio_fs_device = Arc::new(Mutex::new( 2698 virtio_devices::vhost_user::Fs::new( 2699 id.clone(), 2700 fs_socket, 2701 &fs_cfg.tag, 2702 fs_cfg.num_queues, 2703 fs_cfg.queue_size, 2704 None, 2705 self.seccomp_action.clone(), 2706 self.exit_evt 2707 .try_clone() 2708 .map_err(DeviceManagerError::EventFd)?, 2709 self.force_iommu, 2710 state_from_id(self.snapshot.as_ref(), id.as_str()) 2711 .map_err(DeviceManagerError::RestoreGetState)?, 2712 ) 2713 .map_err(DeviceManagerError::CreateVirtioFs)?, 2714 )); 2715 2716 // Update the device tree with the migratable device. 2717 node.migratable = Some(Arc::clone(&virtio_fs_device) as Arc<Mutex<dyn Migratable>>); 2718 self.device_tree.lock().unwrap().insert(id.clone(), node); 2719 2720 Ok(MetaVirtioDevice { 2721 virtio_device: Arc::clone(&virtio_fs_device) 2722 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2723 iommu: false, 2724 id, 2725 pci_segment: fs_cfg.pci_segment, 2726 dma_handler: None, 2727 }) 2728 } else { 2729 Err(DeviceManagerError::NoVirtioFsSock) 2730 } 2731 } 2732 2733 fn make_virtio_fs_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2734 let mut devices = Vec::new(); 2735 2736 let mut fs_devices = self.config.lock().unwrap().fs.clone(); 2737 if let Some(fs_list_cfg) = &mut fs_devices { 2738 for fs_cfg in fs_list_cfg.iter_mut() { 2739 devices.push(self.make_virtio_fs_device(fs_cfg)?); 2740 } 2741 } 2742 self.config.lock().unwrap().fs = fs_devices; 2743 2744 Ok(devices) 2745 } 2746 2747 fn make_virtio_pmem_device( 2748 &mut self, 2749 pmem_cfg: &mut PmemConfig, 2750 ) -> DeviceManagerResult<MetaVirtioDevice> { 2751 let id = if let Some(id) = &pmem_cfg.id { 2752 id.clone() 2753 } else { 2754 let id = self.next_device_name(PMEM_DEVICE_NAME_PREFIX)?; 2755 pmem_cfg.id = Some(id.clone()); 2756 id 2757 }; 2758 2759 info!("Creating virtio-pmem device: {:?}", pmem_cfg); 2760 2761 let mut node = device_node!(id); 2762 2763 // Look for the id in the device tree. If it can be found, that means 2764 // the device is being restored, otherwise it's created from scratch. 2765 let region_range = if let Some(node) = self.device_tree.lock().unwrap().get(&id) { 2766 info!("Restoring virtio-pmem {} resources", id); 2767 2768 let mut region_range: Option<(u64, u64)> = None; 2769 for resource in node.resources.iter() { 2770 match resource { 2771 Resource::MmioAddressRange { base, size } => { 2772 if region_range.is_some() { 2773 return Err(DeviceManagerError::ResourceAlreadyExists); 2774 } 2775 2776 region_range = Some((*base, *size)); 2777 } 2778 _ => { 2779 error!("Unexpected resource {:?} for {}", resource, id); 2780 } 2781 } 2782 } 2783 2784 if region_range.is_none() { 2785 return Err(DeviceManagerError::MissingVirtioPmemResources); 2786 } 2787 2788 region_range 2789 } else { 2790 None 2791 }; 2792 2793 let (custom_flags, set_len) = if pmem_cfg.file.is_dir() { 2794 if pmem_cfg.size.is_none() { 2795 return Err(DeviceManagerError::PmemWithDirectorySizeMissing); 2796 } 2797 (O_TMPFILE, true) 2798 } else { 2799 (0, false) 2800 }; 2801 2802 let mut file = OpenOptions::new() 2803 .read(true) 2804 .write(!pmem_cfg.discard_writes) 2805 .custom_flags(custom_flags) 2806 .open(&pmem_cfg.file) 2807 .map_err(DeviceManagerError::PmemFileOpen)?; 2808 2809 let size = if let Some(size) = pmem_cfg.size { 2810 if set_len { 2811 file.set_len(size) 2812 .map_err(DeviceManagerError::PmemFileSetLen)?; 2813 } 2814 size 2815 } else { 2816 file.seek(SeekFrom::End(0)) 2817 .map_err(DeviceManagerError::PmemFileSetLen)? 2818 }; 2819 2820 if size % 0x20_0000 != 0 { 2821 return Err(DeviceManagerError::PmemSizeNotAligned); 2822 } 2823 2824 let (region_base, region_size) = if let Some((base, size)) = region_range { 2825 // The memory needs to be 2MiB aligned in order to support 2826 // hugepages. 2827 self.pci_segments[pmem_cfg.pci_segment as usize] 2828 .mem64_allocator 2829 .lock() 2830 .unwrap() 2831 .allocate( 2832 Some(GuestAddress(base)), 2833 size as GuestUsize, 2834 Some(0x0020_0000), 2835 ) 2836 .ok_or(DeviceManagerError::PmemRangeAllocation)?; 2837 2838 (base, size) 2839 } else { 2840 // The memory needs to be 2MiB aligned in order to support 2841 // hugepages. 2842 let base = self.pci_segments[pmem_cfg.pci_segment as usize] 2843 .mem64_allocator 2844 .lock() 2845 .unwrap() 2846 .allocate(None, size as GuestUsize, Some(0x0020_0000)) 2847 .ok_or(DeviceManagerError::PmemRangeAllocation)?; 2848 2849 (base.raw_value(), size) 2850 }; 2851 2852 let cloned_file = file.try_clone().map_err(DeviceManagerError::CloneFile)?; 2853 let mmap_region = MmapRegion::build( 2854 Some(FileOffset::new(cloned_file, 0)), 2855 region_size as usize, 2856 PROT_READ | PROT_WRITE, 2857 MAP_NORESERVE 2858 | if pmem_cfg.discard_writes { 2859 MAP_PRIVATE 2860 } else { 2861 MAP_SHARED 2862 }, 2863 ) 2864 .map_err(DeviceManagerError::NewMmapRegion)?; 2865 let host_addr: u64 = mmap_region.as_ptr() as u64; 2866 2867 let mem_slot = self 2868 .memory_manager 2869 .lock() 2870 .unwrap() 2871 .create_userspace_mapping(region_base, region_size, host_addr, false, false, false) 2872 .map_err(DeviceManagerError::MemoryManager)?; 2873 2874 let mapping = virtio_devices::UserspaceMapping { 2875 host_addr, 2876 mem_slot, 2877 addr: GuestAddress(region_base), 2878 len: region_size, 2879 mergeable: false, 2880 }; 2881 2882 let virtio_pmem_device = Arc::new(Mutex::new( 2883 virtio_devices::Pmem::new( 2884 id.clone(), 2885 file, 2886 GuestAddress(region_base), 2887 mapping, 2888 mmap_region, 2889 self.force_iommu | pmem_cfg.iommu, 2890 self.seccomp_action.clone(), 2891 self.exit_evt 2892 .try_clone() 2893 .map_err(DeviceManagerError::EventFd)?, 2894 state_from_id(self.snapshot.as_ref(), id.as_str()) 2895 .map_err(DeviceManagerError::RestoreGetState)?, 2896 ) 2897 .map_err(DeviceManagerError::CreateVirtioPmem)?, 2898 )); 2899 2900 // Update the device tree with correct resource information and with 2901 // the migratable device. 2902 node.resources.push(Resource::MmioAddressRange { 2903 base: region_base, 2904 size: region_size, 2905 }); 2906 node.migratable = Some(Arc::clone(&virtio_pmem_device) as Arc<Mutex<dyn Migratable>>); 2907 self.device_tree.lock().unwrap().insert(id.clone(), node); 2908 2909 Ok(MetaVirtioDevice { 2910 virtio_device: Arc::clone(&virtio_pmem_device) 2911 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2912 iommu: pmem_cfg.iommu, 2913 id, 2914 pci_segment: pmem_cfg.pci_segment, 2915 dma_handler: None, 2916 }) 2917 } 2918 2919 fn make_virtio_pmem_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2920 let mut devices = Vec::new(); 2921 // Add virtio-pmem if required 2922 let mut pmem_devices = self.config.lock().unwrap().pmem.clone(); 2923 if let Some(pmem_list_cfg) = &mut pmem_devices { 2924 for pmem_cfg in pmem_list_cfg.iter_mut() { 2925 devices.push(self.make_virtio_pmem_device(pmem_cfg)?); 2926 } 2927 } 2928 self.config.lock().unwrap().pmem = pmem_devices; 2929 2930 Ok(devices) 2931 } 2932 2933 fn make_virtio_vsock_device( 2934 &mut self, 2935 vsock_cfg: &mut VsockConfig, 2936 ) -> DeviceManagerResult<MetaVirtioDevice> { 2937 let id = if let Some(id) = &vsock_cfg.id { 2938 id.clone() 2939 } else { 2940 let id = self.next_device_name(VSOCK_DEVICE_NAME_PREFIX)?; 2941 vsock_cfg.id = Some(id.clone()); 2942 id 2943 }; 2944 2945 info!("Creating virtio-vsock device: {:?}", vsock_cfg); 2946 2947 let socket_path = vsock_cfg 2948 .socket 2949 .to_str() 2950 .ok_or(DeviceManagerError::CreateVsockConvertPath)?; 2951 let backend = 2952 virtio_devices::vsock::VsockUnixBackend::new(vsock_cfg.cid, socket_path.to_string()) 2953 .map_err(DeviceManagerError::CreateVsockBackend)?; 2954 2955 let vsock_device = Arc::new(Mutex::new( 2956 virtio_devices::Vsock::new( 2957 id.clone(), 2958 vsock_cfg.cid, 2959 vsock_cfg.socket.clone(), 2960 backend, 2961 self.force_iommu | vsock_cfg.iommu, 2962 self.seccomp_action.clone(), 2963 self.exit_evt 2964 .try_clone() 2965 .map_err(DeviceManagerError::EventFd)?, 2966 state_from_id(self.snapshot.as_ref(), id.as_str()) 2967 .map_err(DeviceManagerError::RestoreGetState)?, 2968 ) 2969 .map_err(DeviceManagerError::CreateVirtioVsock)?, 2970 )); 2971 2972 // Fill the device tree with a new node. In case of restore, we 2973 // know there is nothing to do, so we can simply override the 2974 // existing entry. 2975 self.device_tree 2976 .lock() 2977 .unwrap() 2978 .insert(id.clone(), device_node!(id, vsock_device)); 2979 2980 Ok(MetaVirtioDevice { 2981 virtio_device: Arc::clone(&vsock_device) 2982 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2983 iommu: vsock_cfg.iommu, 2984 id, 2985 pci_segment: vsock_cfg.pci_segment, 2986 dma_handler: None, 2987 }) 2988 } 2989 2990 fn make_virtio_vsock_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2991 let mut devices = Vec::new(); 2992 2993 let mut vsock = self.config.lock().unwrap().vsock.clone(); 2994 if let Some(ref mut vsock_cfg) = &mut vsock { 2995 devices.push(self.make_virtio_vsock_device(vsock_cfg)?); 2996 } 2997 self.config.lock().unwrap().vsock = vsock; 2998 2999 Ok(devices) 3000 } 3001 3002 fn make_virtio_mem_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 3003 let mut devices = Vec::new(); 3004 3005 let mm = self.memory_manager.clone(); 3006 let mut mm = mm.lock().unwrap(); 3007 for (memory_zone_id, memory_zone) in mm.memory_zones_mut().iter_mut() { 3008 if let Some(virtio_mem_zone) = memory_zone.virtio_mem_zone_mut() { 3009 info!("Creating virtio-mem device: id = {}", memory_zone_id); 3010 3011 let node_id = numa_node_id_from_memory_zone_id(&self.numa_nodes, memory_zone_id) 3012 .map(|i| i as u16); 3013 3014 let virtio_mem_device = Arc::new(Mutex::new( 3015 virtio_devices::Mem::new( 3016 memory_zone_id.clone(), 3017 virtio_mem_zone.region(), 3018 self.seccomp_action.clone(), 3019 node_id, 3020 virtio_mem_zone.hotplugged_size(), 3021 virtio_mem_zone.hugepages(), 3022 self.exit_evt 3023 .try_clone() 3024 .map_err(DeviceManagerError::EventFd)?, 3025 virtio_mem_zone.blocks_state().clone(), 3026 state_from_id(self.snapshot.as_ref(), memory_zone_id.as_str()) 3027 .map_err(DeviceManagerError::RestoreGetState)?, 3028 ) 3029 .map_err(DeviceManagerError::CreateVirtioMem)?, 3030 )); 3031 3032 // Update the virtio-mem zone so that it has a handle onto the 3033 // virtio-mem device, which will be used for triggering a resize 3034 // if needed. 3035 virtio_mem_zone.set_virtio_device(Arc::clone(&virtio_mem_device)); 3036 3037 self.virtio_mem_devices.push(Arc::clone(&virtio_mem_device)); 3038 3039 devices.push(MetaVirtioDevice { 3040 virtio_device: Arc::clone(&virtio_mem_device) 3041 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 3042 iommu: false, 3043 id: memory_zone_id.clone(), 3044 pci_segment: 0, 3045 dma_handler: None, 3046 }); 3047 3048 // Fill the device tree with a new node. In case of restore, we 3049 // know there is nothing to do, so we can simply override the 3050 // existing entry. 3051 self.device_tree.lock().unwrap().insert( 3052 memory_zone_id.clone(), 3053 device_node!(memory_zone_id, virtio_mem_device), 3054 ); 3055 } 3056 } 3057 3058 Ok(devices) 3059 } 3060 3061 #[cfg(feature = "pvmemcontrol")] 3062 fn make_pvmemcontrol_device( 3063 &mut self, 3064 ) -> DeviceManagerResult<( 3065 Arc<PvmemcontrolBusDevice>, 3066 Arc<Mutex<PvmemcontrolPciDevice>>, 3067 )> { 3068 let id = String::from(PVMEMCONTROL_DEVICE_NAME); 3069 let pci_segment_id = 0x0_u16; 3070 3071 let (pci_segment_id, pci_device_bdf, resources) = 3072 self.pci_resources(&id, pci_segment_id)?; 3073 3074 info!("Creating pvmemcontrol device: id = {}", id); 3075 let (pvmemcontrol_pci_device, pvmemcontrol_bus_device) = 3076 devices::pvmemcontrol::PvmemcontrolDevice::make_device( 3077 id.clone(), 3078 self.memory_manager.lock().unwrap().guest_memory(), 3079 ); 3080 3081 let pvmemcontrol_pci_device = Arc::new(Mutex::new(pvmemcontrol_pci_device)); 3082 let pvmemcontrol_bus_device = Arc::new(pvmemcontrol_bus_device); 3083 3084 let new_resources = self.add_pci_device( 3085 pvmemcontrol_bus_device.clone(), 3086 pvmemcontrol_pci_device.clone(), 3087 pci_segment_id, 3088 pci_device_bdf, 3089 resources, 3090 )?; 3091 3092 let mut node = device_node!(id, pvmemcontrol_pci_device); 3093 3094 node.resources = new_resources; 3095 node.pci_bdf = Some(pci_device_bdf); 3096 node.pci_device_handle = None; 3097 3098 self.device_tree.lock().unwrap().insert(id, node); 3099 3100 Ok((pvmemcontrol_bus_device, pvmemcontrol_pci_device)) 3101 } 3102 3103 fn make_virtio_balloon_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 3104 let mut devices = Vec::new(); 3105 3106 if let Some(balloon_config) = &self.config.lock().unwrap().balloon { 3107 let id = String::from(BALLOON_DEVICE_NAME); 3108 info!("Creating virtio-balloon device: id = {}", id); 3109 3110 let virtio_balloon_device = Arc::new(Mutex::new( 3111 virtio_devices::Balloon::new( 3112 id.clone(), 3113 balloon_config.size, 3114 balloon_config.deflate_on_oom, 3115 balloon_config.free_page_reporting, 3116 self.seccomp_action.clone(), 3117 self.exit_evt 3118 .try_clone() 3119 .map_err(DeviceManagerError::EventFd)?, 3120 state_from_id(self.snapshot.as_ref(), id.as_str()) 3121 .map_err(DeviceManagerError::RestoreGetState)?, 3122 ) 3123 .map_err(DeviceManagerError::CreateVirtioBalloon)?, 3124 )); 3125 3126 self.balloon = Some(virtio_balloon_device.clone()); 3127 3128 devices.push(MetaVirtioDevice { 3129 virtio_device: Arc::clone(&virtio_balloon_device) 3130 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 3131 iommu: false, 3132 id: id.clone(), 3133 pci_segment: 0, 3134 dma_handler: None, 3135 }); 3136 3137 self.device_tree 3138 .lock() 3139 .unwrap() 3140 .insert(id.clone(), device_node!(id, virtio_balloon_device)); 3141 } 3142 3143 Ok(devices) 3144 } 3145 3146 fn make_virtio_watchdog_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 3147 let mut devices = Vec::new(); 3148 3149 if !self.config.lock().unwrap().watchdog { 3150 return Ok(devices); 3151 } 3152 3153 let id = String::from(WATCHDOG_DEVICE_NAME); 3154 info!("Creating virtio-watchdog device: id = {}", id); 3155 3156 let virtio_watchdog_device = Arc::new(Mutex::new( 3157 virtio_devices::Watchdog::new( 3158 id.clone(), 3159 self.reset_evt.try_clone().unwrap(), 3160 self.seccomp_action.clone(), 3161 self.exit_evt 3162 .try_clone() 3163 .map_err(DeviceManagerError::EventFd)?, 3164 state_from_id(self.snapshot.as_ref(), id.as_str()) 3165 .map_err(DeviceManagerError::RestoreGetState)?, 3166 ) 3167 .map_err(DeviceManagerError::CreateVirtioWatchdog)?, 3168 )); 3169 devices.push(MetaVirtioDevice { 3170 virtio_device: Arc::clone(&virtio_watchdog_device) 3171 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 3172 iommu: false, 3173 id: id.clone(), 3174 pci_segment: 0, 3175 dma_handler: None, 3176 }); 3177 3178 self.device_tree 3179 .lock() 3180 .unwrap() 3181 .insert(id.clone(), device_node!(id, virtio_watchdog_device)); 3182 3183 Ok(devices) 3184 } 3185 3186 fn make_vdpa_device( 3187 &mut self, 3188 vdpa_cfg: &mut VdpaConfig, 3189 ) -> DeviceManagerResult<MetaVirtioDevice> { 3190 let id = if let Some(id) = &vdpa_cfg.id { 3191 id.clone() 3192 } else { 3193 let id = self.next_device_name(VDPA_DEVICE_NAME_PREFIX)?; 3194 vdpa_cfg.id = Some(id.clone()); 3195 id 3196 }; 3197 3198 info!("Creating vDPA device: {:?}", vdpa_cfg); 3199 3200 let device_path = vdpa_cfg 3201 .path 3202 .to_str() 3203 .ok_or(DeviceManagerError::CreateVdpaConvertPath)?; 3204 3205 let vdpa_device = Arc::new(Mutex::new( 3206 virtio_devices::Vdpa::new( 3207 id.clone(), 3208 device_path, 3209 self.memory_manager.lock().unwrap().guest_memory(), 3210 vdpa_cfg.num_queues as u16, 3211 state_from_id(self.snapshot.as_ref(), id.as_str()) 3212 .map_err(DeviceManagerError::RestoreGetState)?, 3213 ) 3214 .map_err(DeviceManagerError::CreateVdpa)?, 3215 )); 3216 3217 // Create the DMA handler that is required by the vDPA device 3218 let vdpa_mapping = Arc::new(VdpaDmaMapping::new( 3219 Arc::clone(&vdpa_device), 3220 Arc::new(self.memory_manager.lock().unwrap().guest_memory()), 3221 )); 3222 3223 self.device_tree 3224 .lock() 3225 .unwrap() 3226 .insert(id.clone(), device_node!(id, vdpa_device)); 3227 3228 Ok(MetaVirtioDevice { 3229 virtio_device: vdpa_device as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 3230 iommu: vdpa_cfg.iommu, 3231 id, 3232 pci_segment: vdpa_cfg.pci_segment, 3233 dma_handler: Some(vdpa_mapping), 3234 }) 3235 } 3236 3237 fn make_vdpa_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 3238 let mut devices = Vec::new(); 3239 // Add vdpa if required 3240 let mut vdpa_devices = self.config.lock().unwrap().vdpa.clone(); 3241 if let Some(vdpa_list_cfg) = &mut vdpa_devices { 3242 for vdpa_cfg in vdpa_list_cfg.iter_mut() { 3243 devices.push(self.make_vdpa_device(vdpa_cfg)?); 3244 } 3245 } 3246 self.config.lock().unwrap().vdpa = vdpa_devices; 3247 3248 Ok(devices) 3249 } 3250 3251 fn next_device_name(&mut self, prefix: &str) -> DeviceManagerResult<String> { 3252 let start_id = self.device_id_cnt; 3253 loop { 3254 // Generate the temporary name. 3255 let name = format!("{}{}", prefix, self.device_id_cnt); 3256 // Increment the counter. 3257 self.device_id_cnt += Wrapping(1); 3258 // Check if the name is already in use. 3259 if !self.boot_id_list.contains(&name) 3260 && !self.device_tree.lock().unwrap().contains_key(&name) 3261 { 3262 return Ok(name); 3263 } 3264 3265 if self.device_id_cnt == start_id { 3266 // We went through a full loop and there's nothing else we can 3267 // do. 3268 break; 3269 } 3270 } 3271 Err(DeviceManagerError::NoAvailableDeviceName) 3272 } 3273 3274 fn add_passthrough_device( 3275 &mut self, 3276 device_cfg: &mut DeviceConfig, 3277 ) -> DeviceManagerResult<(PciBdf, String)> { 3278 // If the passthrough device has not been created yet, it is created 3279 // here and stored in the DeviceManager structure for future needs. 3280 if self.passthrough_device.is_none() { 3281 self.passthrough_device = Some( 3282 self.address_manager 3283 .vm 3284 .create_passthrough_device() 3285 .map_err(|e| DeviceManagerError::CreatePassthroughDevice(e.into()))?, 3286 ); 3287 } 3288 3289 self.add_vfio_device(device_cfg) 3290 } 3291 3292 fn create_vfio_container(&self) -> DeviceManagerResult<Arc<VfioContainer>> { 3293 let passthrough_device = self 3294 .passthrough_device 3295 .as_ref() 3296 .ok_or(DeviceManagerError::NoDevicePassthroughSupport)?; 3297 3298 let dup = passthrough_device 3299 .try_clone() 3300 .map_err(DeviceManagerError::VfioCreate)?; 3301 3302 Ok(Arc::new( 3303 VfioContainer::new(Some(Arc::new(dup))).map_err(DeviceManagerError::VfioCreate)?, 3304 )) 3305 } 3306 3307 fn add_vfio_device( 3308 &mut self, 3309 device_cfg: &mut DeviceConfig, 3310 ) -> DeviceManagerResult<(PciBdf, String)> { 3311 let vfio_name = if let Some(id) = &device_cfg.id { 3312 id.clone() 3313 } else { 3314 let id = self.next_device_name(VFIO_DEVICE_NAME_PREFIX)?; 3315 device_cfg.id = Some(id.clone()); 3316 id 3317 }; 3318 3319 let (pci_segment_id, pci_device_bdf, resources) = 3320 self.pci_resources(&vfio_name, device_cfg.pci_segment)?; 3321 3322 let mut needs_dma_mapping = false; 3323 3324 // Here we create a new VFIO container for two reasons. Either this is 3325 // the first VFIO device, meaning we need a new VFIO container, which 3326 // will be shared with other VFIO devices. Or the new VFIO device is 3327 // attached to a vIOMMU, meaning we must create a dedicated VFIO 3328 // container. In the vIOMMU use case, we can't let all devices under 3329 // the same VFIO container since we couldn't map/unmap memory for each 3330 // device. That's simply because the map/unmap operations happen at the 3331 // VFIO container level. 3332 let vfio_container = if device_cfg.iommu { 3333 let vfio_container = self.create_vfio_container()?; 3334 3335 let vfio_mapping = Arc::new(VfioDmaMapping::new( 3336 Arc::clone(&vfio_container), 3337 Arc::new(self.memory_manager.lock().unwrap().guest_memory()), 3338 Arc::clone(&self.mmio_regions), 3339 )); 3340 3341 if let Some(iommu) = &self.iommu_device { 3342 iommu 3343 .lock() 3344 .unwrap() 3345 .add_external_mapping(pci_device_bdf.into(), vfio_mapping); 3346 } else { 3347 return Err(DeviceManagerError::MissingVirtualIommu); 3348 } 3349 3350 vfio_container 3351 } else if let Some(vfio_container) = &self.vfio_container { 3352 Arc::clone(vfio_container) 3353 } else { 3354 let vfio_container = self.create_vfio_container()?; 3355 needs_dma_mapping = true; 3356 self.vfio_container = Some(Arc::clone(&vfio_container)); 3357 3358 vfio_container 3359 }; 3360 3361 let vfio_device = VfioDevice::new(&device_cfg.path, Arc::clone(&vfio_container)) 3362 .map_err(DeviceManagerError::VfioCreate)?; 3363 3364 if needs_dma_mapping { 3365 // Register DMA mapping in IOMMU. 3366 // Do not register virtio-mem regions, as they are handled directly by 3367 // virtio-mem device itself. 3368 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() { 3369 for region in zone.regions() { 3370 vfio_container 3371 .vfio_dma_map( 3372 region.start_addr().raw_value(), 3373 region.len(), 3374 region.as_ptr() as u64, 3375 ) 3376 .map_err(DeviceManagerError::VfioDmaMap)?; 3377 } 3378 } 3379 3380 let vfio_mapping = Arc::new(VfioDmaMapping::new( 3381 Arc::clone(&vfio_container), 3382 Arc::new(self.memory_manager.lock().unwrap().guest_memory()), 3383 Arc::clone(&self.mmio_regions), 3384 )); 3385 3386 for virtio_mem_device in self.virtio_mem_devices.iter() { 3387 virtio_mem_device 3388 .lock() 3389 .unwrap() 3390 .add_dma_mapping_handler( 3391 VirtioMemMappingSource::Container, 3392 vfio_mapping.clone(), 3393 ) 3394 .map_err(DeviceManagerError::AddDmaMappingHandlerVirtioMem)?; 3395 } 3396 } 3397 3398 let legacy_interrupt_group = 3399 if let Some(legacy_interrupt_manager) = &self.legacy_interrupt_manager { 3400 Some( 3401 legacy_interrupt_manager 3402 .create_group(LegacyIrqGroupConfig { 3403 irq: self.pci_segments[pci_segment_id as usize].pci_irq_slots 3404 [pci_device_bdf.device() as usize] 3405 as InterruptIndex, 3406 }) 3407 .map_err(DeviceManagerError::CreateInterruptGroup)?, 3408 ) 3409 } else { 3410 None 3411 }; 3412 3413 let memory_manager = self.memory_manager.clone(); 3414 3415 let vfio_pci_device = VfioPciDevice::new( 3416 vfio_name.clone(), 3417 &self.address_manager.vm, 3418 vfio_device, 3419 vfio_container, 3420 self.msi_interrupt_manager.clone(), 3421 legacy_interrupt_group, 3422 device_cfg.iommu, 3423 pci_device_bdf, 3424 memory_manager.lock().unwrap().memory_slot_allocator(), 3425 vm_migration::snapshot_from_id(self.snapshot.as_ref(), vfio_name.as_str()), 3426 device_cfg.x_nv_gpudirect_clique, 3427 ) 3428 .map_err(DeviceManagerError::VfioPciCreate)?; 3429 3430 let vfio_pci_device = Arc::new(Mutex::new(vfio_pci_device)); 3431 3432 let new_resources = self.add_pci_device( 3433 vfio_pci_device.clone(), 3434 vfio_pci_device.clone(), 3435 pci_segment_id, 3436 pci_device_bdf, 3437 resources, 3438 )?; 3439 3440 vfio_pci_device 3441 .lock() 3442 .unwrap() 3443 .map_mmio_regions() 3444 .map_err(DeviceManagerError::VfioMapRegion)?; 3445 3446 for mmio_region in vfio_pci_device.lock().unwrap().mmio_regions() { 3447 self.mmio_regions.lock().unwrap().push(mmio_region); 3448 } 3449 3450 let mut node = device_node!(vfio_name, vfio_pci_device); 3451 3452 // Update the device tree with correct resource information. 3453 node.resources = new_resources; 3454 node.pci_bdf = Some(pci_device_bdf); 3455 node.pci_device_handle = Some(PciDeviceHandle::Vfio(vfio_pci_device)); 3456 3457 self.device_tree 3458 .lock() 3459 .unwrap() 3460 .insert(vfio_name.clone(), node); 3461 3462 Ok((pci_device_bdf, vfio_name)) 3463 } 3464 3465 fn add_pci_device( 3466 &mut self, 3467 bus_device: Arc<dyn BusDeviceSync>, 3468 pci_device: Arc<Mutex<dyn PciDevice>>, 3469 segment_id: u16, 3470 bdf: PciBdf, 3471 resources: Option<Vec<Resource>>, 3472 ) -> DeviceManagerResult<Vec<Resource>> { 3473 let bars = pci_device 3474 .lock() 3475 .unwrap() 3476 .allocate_bars( 3477 &self.address_manager.allocator, 3478 &mut self.pci_segments[segment_id as usize] 3479 .mem32_allocator 3480 .lock() 3481 .unwrap(), 3482 &mut self.pci_segments[segment_id as usize] 3483 .mem64_allocator 3484 .lock() 3485 .unwrap(), 3486 resources, 3487 ) 3488 .map_err(DeviceManagerError::AllocateBars)?; 3489 3490 let mut pci_bus = self.pci_segments[segment_id as usize] 3491 .pci_bus 3492 .lock() 3493 .unwrap(); 3494 3495 pci_bus 3496 .add_device(bdf.device() as u32, pci_device) 3497 .map_err(DeviceManagerError::AddPciDevice)?; 3498 3499 self.bus_devices.push(Arc::clone(&bus_device)); 3500 3501 pci_bus 3502 .register_mapping( 3503 bus_device, 3504 self.address_manager.io_bus.as_ref(), 3505 self.address_manager.mmio_bus.as_ref(), 3506 bars.clone(), 3507 ) 3508 .map_err(DeviceManagerError::AddPciDevice)?; 3509 3510 let mut new_resources = Vec::new(); 3511 for bar in bars { 3512 new_resources.push(Resource::PciBar { 3513 index: bar.idx(), 3514 base: bar.addr(), 3515 size: bar.size(), 3516 type_: bar.region_type().into(), 3517 prefetchable: bar.prefetchable().into(), 3518 }); 3519 } 3520 3521 Ok(new_resources) 3522 } 3523 3524 fn add_vfio_devices(&mut self) -> DeviceManagerResult<Vec<PciBdf>> { 3525 let mut iommu_attached_device_ids = Vec::new(); 3526 let mut devices = self.config.lock().unwrap().devices.clone(); 3527 3528 if let Some(device_list_cfg) = &mut devices { 3529 for device_cfg in device_list_cfg.iter_mut() { 3530 let (device_id, _) = self.add_passthrough_device(device_cfg)?; 3531 if device_cfg.iommu && self.iommu_device.is_some() { 3532 iommu_attached_device_ids.push(device_id); 3533 } 3534 } 3535 } 3536 3537 // Update the list of devices 3538 self.config.lock().unwrap().devices = devices; 3539 3540 Ok(iommu_attached_device_ids) 3541 } 3542 3543 fn add_vfio_user_device( 3544 &mut self, 3545 device_cfg: &mut UserDeviceConfig, 3546 ) -> DeviceManagerResult<(PciBdf, String)> { 3547 let vfio_user_name = if let Some(id) = &device_cfg.id { 3548 id.clone() 3549 } else { 3550 let id = self.next_device_name(VFIO_USER_DEVICE_NAME_PREFIX)?; 3551 device_cfg.id = Some(id.clone()); 3552 id 3553 }; 3554 3555 let (pci_segment_id, pci_device_bdf, resources) = 3556 self.pci_resources(&vfio_user_name, device_cfg.pci_segment)?; 3557 3558 let legacy_interrupt_group = 3559 if let Some(legacy_interrupt_manager) = &self.legacy_interrupt_manager { 3560 Some( 3561 legacy_interrupt_manager 3562 .create_group(LegacyIrqGroupConfig { 3563 irq: self.pci_segments[pci_segment_id as usize].pci_irq_slots 3564 [pci_device_bdf.device() as usize] 3565 as InterruptIndex, 3566 }) 3567 .map_err(DeviceManagerError::CreateInterruptGroup)?, 3568 ) 3569 } else { 3570 None 3571 }; 3572 3573 let client = Arc::new(Mutex::new( 3574 vfio_user::Client::new(&device_cfg.socket) 3575 .map_err(DeviceManagerError::VfioUserCreateClient)?, 3576 )); 3577 3578 let memory_manager = self.memory_manager.clone(); 3579 3580 let mut vfio_user_pci_device = VfioUserPciDevice::new( 3581 vfio_user_name.clone(), 3582 &self.address_manager.vm, 3583 client.clone(), 3584 self.msi_interrupt_manager.clone(), 3585 legacy_interrupt_group, 3586 pci_device_bdf, 3587 memory_manager.lock().unwrap().memory_slot_allocator(), 3588 vm_migration::snapshot_from_id(self.snapshot.as_ref(), vfio_user_name.as_str()), 3589 ) 3590 .map_err(DeviceManagerError::VfioUserCreate)?; 3591 3592 let memory = self.memory_manager.lock().unwrap().guest_memory(); 3593 let vfio_user_mapping = Arc::new(VfioUserDmaMapping::new(client, Arc::new(memory))); 3594 for virtio_mem_device in self.virtio_mem_devices.iter() { 3595 virtio_mem_device 3596 .lock() 3597 .unwrap() 3598 .add_dma_mapping_handler( 3599 VirtioMemMappingSource::Device(pci_device_bdf.into()), 3600 vfio_user_mapping.clone(), 3601 ) 3602 .map_err(DeviceManagerError::AddDmaMappingHandlerVirtioMem)?; 3603 } 3604 3605 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() { 3606 for region in zone.regions() { 3607 vfio_user_pci_device 3608 .dma_map(region) 3609 .map_err(DeviceManagerError::VfioUserDmaMap)?; 3610 } 3611 } 3612 3613 let vfio_user_pci_device = Arc::new(Mutex::new(vfio_user_pci_device)); 3614 3615 let new_resources = self.add_pci_device( 3616 vfio_user_pci_device.clone(), 3617 vfio_user_pci_device.clone(), 3618 pci_segment_id, 3619 pci_device_bdf, 3620 resources, 3621 )?; 3622 3623 // Note it is required to call 'add_pci_device()' in advance to have the list of 3624 // mmio regions provisioned correctly 3625 vfio_user_pci_device 3626 .lock() 3627 .unwrap() 3628 .map_mmio_regions() 3629 .map_err(DeviceManagerError::VfioUserMapRegion)?; 3630 3631 let mut node = device_node!(vfio_user_name, vfio_user_pci_device); 3632 3633 // Update the device tree with correct resource information. 3634 node.resources = new_resources; 3635 node.pci_bdf = Some(pci_device_bdf); 3636 node.pci_device_handle = Some(PciDeviceHandle::VfioUser(vfio_user_pci_device)); 3637 3638 self.device_tree 3639 .lock() 3640 .unwrap() 3641 .insert(vfio_user_name.clone(), node); 3642 3643 Ok((pci_device_bdf, vfio_user_name)) 3644 } 3645 3646 fn add_user_devices(&mut self) -> DeviceManagerResult<Vec<PciBdf>> { 3647 let mut user_devices = self.config.lock().unwrap().user_devices.clone(); 3648 3649 if let Some(device_list_cfg) = &mut user_devices { 3650 for device_cfg in device_list_cfg.iter_mut() { 3651 let (_device_id, _id) = self.add_vfio_user_device(device_cfg)?; 3652 } 3653 } 3654 3655 // Update the list of devices 3656 self.config.lock().unwrap().user_devices = user_devices; 3657 3658 Ok(vec![]) 3659 } 3660 3661 fn add_virtio_pci_device( 3662 &mut self, 3663 virtio_device: Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 3664 iommu_mapping: &Option<Arc<IommuMapping>>, 3665 virtio_device_id: String, 3666 pci_segment_id: u16, 3667 dma_handler: Option<Arc<dyn ExternalDmaMapping>>, 3668 ) -> DeviceManagerResult<PciBdf> { 3669 let id = format!("{VIRTIO_PCI_DEVICE_NAME_PREFIX}-{virtio_device_id}"); 3670 3671 // Add the new virtio-pci node to the device tree. 3672 let mut node = device_node!(id); 3673 node.children = vec![virtio_device_id.clone()]; 3674 3675 let (pci_segment_id, pci_device_bdf, resources) = 3676 self.pci_resources(&id, pci_segment_id)?; 3677 3678 // Update the existing virtio node by setting the parent. 3679 if let Some(node) = self.device_tree.lock().unwrap().get_mut(&virtio_device_id) { 3680 node.parent = Some(id.clone()); 3681 } else { 3682 return Err(DeviceManagerError::MissingNode); 3683 } 3684 3685 // Allows support for one MSI-X vector per queue. It also adds 1 3686 // as we need to take into account the dedicated vector to notify 3687 // about a virtio config change. 3688 let msix_num = (virtio_device.lock().unwrap().queue_max_sizes().len() + 1) as u16; 3689 3690 // Create the AccessPlatform trait from the implementation IommuMapping. 3691 // This will provide address translation for any virtio device sitting 3692 // behind a vIOMMU. 3693 let mut access_platform: Option<Arc<dyn AccessPlatform>> = None; 3694 3695 if let Some(mapping) = iommu_mapping { 3696 access_platform = Some(Arc::new(AccessPlatformMapping::new( 3697 pci_device_bdf.into(), 3698 mapping.clone(), 3699 ))); 3700 } 3701 3702 // If SEV-SNP is enabled create the AccessPlatform from SevSnpPageAccessProxy 3703 #[cfg(feature = "sev_snp")] 3704 if self.config.lock().unwrap().is_sev_snp_enabled() { 3705 access_platform = Some(Arc::new(SevSnpPageAccessProxy::new( 3706 self.address_manager.vm.clone(), 3707 ))); 3708 } 3709 3710 let memory = self.memory_manager.lock().unwrap().guest_memory(); 3711 3712 // Map DMA ranges if a DMA handler is available and if the device is 3713 // not attached to a virtual IOMMU. 3714 if let Some(dma_handler) = &dma_handler { 3715 if iommu_mapping.is_some() { 3716 if let Some(iommu) = &self.iommu_device { 3717 iommu 3718 .lock() 3719 .unwrap() 3720 .add_external_mapping(pci_device_bdf.into(), dma_handler.clone()); 3721 } else { 3722 return Err(DeviceManagerError::MissingVirtualIommu); 3723 } 3724 } else { 3725 // Let every virtio-mem device handle the DMA map/unmap through the 3726 // DMA handler provided. 3727 for virtio_mem_device in self.virtio_mem_devices.iter() { 3728 virtio_mem_device 3729 .lock() 3730 .unwrap() 3731 .add_dma_mapping_handler( 3732 VirtioMemMappingSource::Device(pci_device_bdf.into()), 3733 dma_handler.clone(), 3734 ) 3735 .map_err(DeviceManagerError::AddDmaMappingHandlerVirtioMem)?; 3736 } 3737 3738 // Do not register virtio-mem regions, as they are handled directly by 3739 // virtio-mem devices. 3740 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() { 3741 for region in zone.regions() { 3742 let gpa = region.start_addr().0; 3743 let size = region.len(); 3744 dma_handler 3745 .map(gpa, gpa, size) 3746 .map_err(DeviceManagerError::VirtioDmaMap)?; 3747 } 3748 } 3749 } 3750 } 3751 3752 let device_type = virtio_device.lock().unwrap().device_type(); 3753 let virtio_pci_device = Arc::new(Mutex::new( 3754 VirtioPciDevice::new( 3755 id.clone(), 3756 memory, 3757 virtio_device, 3758 msix_num, 3759 access_platform, 3760 &self.msi_interrupt_manager, 3761 pci_device_bdf.into(), 3762 self.activate_evt 3763 .try_clone() 3764 .map_err(DeviceManagerError::EventFd)?, 3765 // All device types *except* virtio block devices should be allocated a 64-bit bar 3766 // The block devices should be given a 32-bit BAR so that they are easily accessible 3767 // to firmware without requiring excessive identity mapping. 3768 // The exception being if not on the default PCI segment. 3769 pci_segment_id > 0 || device_type != VirtioDeviceType::Block as u32, 3770 dma_handler, 3771 self.pending_activations.clone(), 3772 vm_migration::snapshot_from_id(self.snapshot.as_ref(), id.as_str()), 3773 ) 3774 .map_err(DeviceManagerError::VirtioDevice)?, 3775 )); 3776 3777 let new_resources = self.add_pci_device( 3778 virtio_pci_device.clone(), 3779 virtio_pci_device.clone(), 3780 pci_segment_id, 3781 pci_device_bdf, 3782 resources, 3783 )?; 3784 3785 let bar_addr = virtio_pci_device.lock().unwrap().config_bar_addr(); 3786 for (event, addr) in virtio_pci_device.lock().unwrap().ioeventfds(bar_addr) { 3787 let io_addr = IoEventAddress::Mmio(addr); 3788 self.address_manager 3789 .vm 3790 .register_ioevent(event, &io_addr, None) 3791 .map_err(|e| DeviceManagerError::RegisterIoevent(e.into()))?; 3792 } 3793 3794 // Update the device tree with correct resource information. 3795 node.resources = new_resources; 3796 node.migratable = Some(Arc::clone(&virtio_pci_device) as Arc<Mutex<dyn Migratable>>); 3797 node.pci_bdf = Some(pci_device_bdf); 3798 node.pci_device_handle = Some(PciDeviceHandle::Virtio(virtio_pci_device)); 3799 self.device_tree.lock().unwrap().insert(id, node); 3800 3801 Ok(pci_device_bdf) 3802 } 3803 3804 fn add_pvpanic_device( 3805 &mut self, 3806 ) -> DeviceManagerResult<Option<Arc<Mutex<devices::PvPanicDevice>>>> { 3807 let id = String::from(PVPANIC_DEVICE_NAME); 3808 let pci_segment_id = 0x0_u16; 3809 3810 info!("Creating pvpanic device {}", id); 3811 3812 let (pci_segment_id, pci_device_bdf, resources) = 3813 self.pci_resources(&id, pci_segment_id)?; 3814 3815 let snapshot = snapshot_from_id(self.snapshot.as_ref(), id.as_str()); 3816 3817 let pvpanic_device = devices::PvPanicDevice::new(id.clone(), snapshot) 3818 .map_err(DeviceManagerError::PvPanicCreate)?; 3819 3820 let pvpanic_device = Arc::new(Mutex::new(pvpanic_device)); 3821 3822 let new_resources = self.add_pci_device( 3823 pvpanic_device.clone(), 3824 pvpanic_device.clone(), 3825 pci_segment_id, 3826 pci_device_bdf, 3827 resources, 3828 )?; 3829 3830 let mut node = device_node!(id, pvpanic_device); 3831 3832 node.resources = new_resources; 3833 node.pci_bdf = Some(pci_device_bdf); 3834 node.pci_device_handle = None; 3835 3836 self.device_tree.lock().unwrap().insert(id, node); 3837 3838 Ok(Some(pvpanic_device)) 3839 } 3840 3841 fn pci_resources( 3842 &self, 3843 id: &str, 3844 pci_segment_id: u16, 3845 ) -> DeviceManagerResult<(u16, PciBdf, Option<Vec<Resource>>)> { 3846 // Look for the id in the device tree. If it can be found, that means 3847 // the device is being restored, otherwise it's created from scratch. 3848 let (pci_device_bdf, resources) = 3849 if let Some(node) = self.device_tree.lock().unwrap().get(id) { 3850 info!("Restoring virtio-pci {} resources", id); 3851 let pci_device_bdf: PciBdf = node 3852 .pci_bdf 3853 .ok_or(DeviceManagerError::MissingDeviceNodePciBdf)?; 3854 (Some(pci_device_bdf), Some(node.resources.clone())) 3855 } else { 3856 (None, None) 3857 }; 3858 3859 Ok(if let Some(pci_device_bdf) = pci_device_bdf { 3860 let pci_segment_id = pci_device_bdf.segment(); 3861 3862 self.pci_segments[pci_segment_id as usize] 3863 .pci_bus 3864 .lock() 3865 .unwrap() 3866 .get_device_id(pci_device_bdf.device() as usize) 3867 .map_err(DeviceManagerError::GetPciDeviceId)?; 3868 3869 (pci_segment_id, pci_device_bdf, resources) 3870 } else { 3871 let pci_device_bdf = self.pci_segments[pci_segment_id as usize].next_device_bdf()?; 3872 3873 (pci_segment_id, pci_device_bdf, None) 3874 }) 3875 } 3876 3877 #[cfg(target_arch = "x86_64")] 3878 pub fn io_bus(&self) -> &Arc<Bus> { 3879 &self.address_manager.io_bus 3880 } 3881 3882 pub fn mmio_bus(&self) -> &Arc<Bus> { 3883 &self.address_manager.mmio_bus 3884 } 3885 3886 pub fn allocator(&self) -> &Arc<Mutex<SystemAllocator>> { 3887 &self.address_manager.allocator 3888 } 3889 3890 pub fn interrupt_controller(&self) -> Option<Arc<Mutex<dyn InterruptController>>> { 3891 self.interrupt_controller 3892 .as_ref() 3893 .map(|ic| ic.clone() as Arc<Mutex<dyn InterruptController>>) 3894 } 3895 3896 pub(crate) fn pci_segments(&self) -> &Vec<PciSegment> { 3897 &self.pci_segments 3898 } 3899 3900 #[cfg(target_arch = "aarch64")] 3901 pub fn cmdline_additions(&self) -> &[String] { 3902 self.cmdline_additions.as_slice() 3903 } 3904 3905 pub fn update_memory(&self, new_region: &Arc<GuestRegionMmap>) -> DeviceManagerResult<()> { 3906 for handle in self.virtio_devices.iter() { 3907 handle 3908 .virtio_device 3909 .lock() 3910 .unwrap() 3911 .add_memory_region(new_region) 3912 .map_err(DeviceManagerError::UpdateMemoryForVirtioDevice)?; 3913 3914 if let Some(dma_handler) = &handle.dma_handler { 3915 if !handle.iommu { 3916 let gpa = new_region.start_addr().0; 3917 let size = new_region.len(); 3918 dma_handler 3919 .map(gpa, gpa, size) 3920 .map_err(DeviceManagerError::VirtioDmaMap)?; 3921 } 3922 } 3923 } 3924 3925 // Take care of updating the memory for VFIO PCI devices. 3926 if let Some(vfio_container) = &self.vfio_container { 3927 vfio_container 3928 .vfio_dma_map( 3929 new_region.start_addr().raw_value(), 3930 new_region.len(), 3931 new_region.as_ptr() as u64, 3932 ) 3933 .map_err(DeviceManagerError::UpdateMemoryForVfioPciDevice)?; 3934 } 3935 3936 // Take care of updating the memory for vfio-user devices. 3937 { 3938 let device_tree = self.device_tree.lock().unwrap(); 3939 for pci_device_node in device_tree.pci_devices() { 3940 if let PciDeviceHandle::VfioUser(vfio_user_pci_device) = pci_device_node 3941 .pci_device_handle 3942 .as_ref() 3943 .ok_or(DeviceManagerError::MissingPciDevice)? 3944 { 3945 vfio_user_pci_device 3946 .lock() 3947 .unwrap() 3948 .dma_map(new_region) 3949 .map_err(DeviceManagerError::UpdateMemoryForVfioUserPciDevice)?; 3950 } 3951 } 3952 } 3953 3954 Ok(()) 3955 } 3956 3957 pub fn activate_virtio_devices(&self) -> DeviceManagerResult<()> { 3958 for mut activator in self.pending_activations.lock().unwrap().drain(..) { 3959 activator 3960 .activate() 3961 .map_err(DeviceManagerError::VirtioActivate)?; 3962 } 3963 Ok(()) 3964 } 3965 3966 pub fn notify_hotplug( 3967 &self, 3968 _notification_type: AcpiNotificationFlags, 3969 ) -> DeviceManagerResult<()> { 3970 return self 3971 .ged_notification_device 3972 .as_ref() 3973 .unwrap() 3974 .lock() 3975 .unwrap() 3976 .notify(_notification_type) 3977 .map_err(DeviceManagerError::HotPlugNotification); 3978 } 3979 3980 pub fn add_device( 3981 &mut self, 3982 device_cfg: &mut DeviceConfig, 3983 ) -> DeviceManagerResult<PciDeviceInfo> { 3984 self.validate_identifier(&device_cfg.id)?; 3985 3986 if device_cfg.iommu && !self.is_iommu_segment(device_cfg.pci_segment) { 3987 return Err(DeviceManagerError::InvalidIommuHotplug); 3988 } 3989 3990 let (bdf, device_name) = self.add_passthrough_device(device_cfg)?; 3991 3992 // Update the PCIU bitmap 3993 self.pci_segments[device_cfg.pci_segment as usize].pci_devices_up |= 1 << bdf.device(); 3994 3995 Ok(PciDeviceInfo { 3996 id: device_name, 3997 bdf, 3998 }) 3999 } 4000 4001 pub fn add_user_device( 4002 &mut self, 4003 device_cfg: &mut UserDeviceConfig, 4004 ) -> DeviceManagerResult<PciDeviceInfo> { 4005 self.validate_identifier(&device_cfg.id)?; 4006 4007 let (bdf, device_name) = self.add_vfio_user_device(device_cfg)?; 4008 4009 // Update the PCIU bitmap 4010 self.pci_segments[device_cfg.pci_segment as usize].pci_devices_up |= 1 << bdf.device(); 4011 4012 Ok(PciDeviceInfo { 4013 id: device_name, 4014 bdf, 4015 }) 4016 } 4017 4018 pub fn remove_device(&mut self, id: String) -> DeviceManagerResult<()> { 4019 // The node can be directly a PCI node in case the 'id' refers to a 4020 // VFIO device or a virtio-pci one. 4021 // In case the 'id' refers to a virtio device, we must find the PCI 4022 // node by looking at the parent. 4023 let device_tree = self.device_tree.lock().unwrap(); 4024 let node = device_tree 4025 .get(&id) 4026 .ok_or(DeviceManagerError::UnknownDeviceId(id))?; 4027 4028 let pci_device_node = if node.pci_bdf.is_some() && node.pci_device_handle.is_some() { 4029 node 4030 } else { 4031 let parent = node 4032 .parent 4033 .as_ref() 4034 .ok_or(DeviceManagerError::MissingNode)?; 4035 device_tree 4036 .get(parent) 4037 .ok_or(DeviceManagerError::MissingNode)? 4038 }; 4039 4040 let pci_device_bdf: PciBdf = pci_device_node 4041 .pci_bdf 4042 .ok_or(DeviceManagerError::MissingDeviceNodePciBdf)?; 4043 let pci_segment_id = pci_device_bdf.segment(); 4044 4045 let pci_device_handle = pci_device_node 4046 .pci_device_handle 4047 .as_ref() 4048 .ok_or(DeviceManagerError::MissingPciDevice)?; 4049 #[allow(irrefutable_let_patterns)] 4050 if let PciDeviceHandle::Virtio(virtio_pci_device) = pci_device_handle { 4051 let device_type = VirtioDeviceType::from( 4052 virtio_pci_device 4053 .lock() 4054 .unwrap() 4055 .virtio_device() 4056 .lock() 4057 .unwrap() 4058 .device_type(), 4059 ); 4060 match device_type { 4061 VirtioDeviceType::Net 4062 | VirtioDeviceType::Block 4063 | VirtioDeviceType::Pmem 4064 | VirtioDeviceType::Fs 4065 | VirtioDeviceType::Vsock => {} 4066 _ => return Err(DeviceManagerError::RemovalNotAllowed(device_type)), 4067 } 4068 } 4069 4070 // Update the PCID bitmap 4071 self.pci_segments[pci_segment_id as usize].pci_devices_down |= 1 << pci_device_bdf.device(); 4072 4073 Ok(()) 4074 } 4075 4076 pub fn eject_device(&mut self, pci_segment_id: u16, device_id: u8) -> DeviceManagerResult<()> { 4077 info!( 4078 "Ejecting device_id = {} on segment_id={}", 4079 device_id, pci_segment_id 4080 ); 4081 4082 // Convert the device ID into the corresponding b/d/f. 4083 let pci_device_bdf = PciBdf::new(pci_segment_id, 0, device_id, 0); 4084 4085 // Give the PCI device ID back to the PCI bus. 4086 self.pci_segments[pci_segment_id as usize] 4087 .pci_bus 4088 .lock() 4089 .unwrap() 4090 .put_device_id(device_id as usize) 4091 .map_err(DeviceManagerError::PutPciDeviceId)?; 4092 4093 let (pci_device_handle, id) = { 4094 // Remove the device from the device tree along with its children. 4095 let mut device_tree = self.device_tree.lock().unwrap(); 4096 let pci_device_node = device_tree 4097 .remove_node_by_pci_bdf(pci_device_bdf) 4098 .ok_or(DeviceManagerError::MissingPciDevice)?; 4099 4100 // For VFIO and vfio-user the PCI device id is the id. 4101 // For virtio we overwrite it later as we want the id of the 4102 // underlying device. 4103 let mut id = pci_device_node.id; 4104 let pci_device_handle = pci_device_node 4105 .pci_device_handle 4106 .ok_or(DeviceManagerError::MissingPciDevice)?; 4107 if matches!(pci_device_handle, PciDeviceHandle::Virtio(_)) { 4108 // The virtio-pci device has a single child 4109 if !pci_device_node.children.is_empty() { 4110 assert_eq!(pci_device_node.children.len(), 1); 4111 let child_id = &pci_device_node.children[0]; 4112 id.clone_from(child_id); 4113 } 4114 } 4115 for child in pci_device_node.children.iter() { 4116 device_tree.remove(child); 4117 } 4118 4119 (pci_device_handle, id) 4120 }; 4121 4122 let mut iommu_attached = false; 4123 if let Some((_, iommu_attached_devices)) = &self.iommu_attached_devices { 4124 if iommu_attached_devices.contains(&pci_device_bdf) { 4125 iommu_attached = true; 4126 } 4127 } 4128 4129 let (pci_device, bus_device, virtio_device, remove_dma_handler) = match pci_device_handle { 4130 // No need to remove any virtio-mem mapping here as the container outlives all devices 4131 PciDeviceHandle::Vfio(vfio_pci_device) => { 4132 for mmio_region in vfio_pci_device.lock().unwrap().mmio_regions() { 4133 self.mmio_regions 4134 .lock() 4135 .unwrap() 4136 .retain(|x| x.start != mmio_region.start) 4137 } 4138 4139 ( 4140 Arc::clone(&vfio_pci_device) as Arc<Mutex<dyn PciDevice>>, 4141 Arc::clone(&vfio_pci_device) as Arc<dyn BusDeviceSync>, 4142 None as Option<Arc<Mutex<dyn virtio_devices::VirtioDevice>>>, 4143 false, 4144 ) 4145 } 4146 PciDeviceHandle::Virtio(virtio_pci_device) => { 4147 let dev = virtio_pci_device.lock().unwrap(); 4148 let bar_addr = dev.config_bar_addr(); 4149 for (event, addr) in dev.ioeventfds(bar_addr) { 4150 let io_addr = IoEventAddress::Mmio(addr); 4151 self.address_manager 4152 .vm 4153 .unregister_ioevent(event, &io_addr) 4154 .map_err(|e| DeviceManagerError::UnRegisterIoevent(e.into()))?; 4155 } 4156 4157 if let Some(dma_handler) = dev.dma_handler() { 4158 if !iommu_attached { 4159 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() { 4160 for region in zone.regions() { 4161 let iova = region.start_addr().0; 4162 let size = region.len(); 4163 dma_handler 4164 .unmap(iova, size) 4165 .map_err(DeviceManagerError::VirtioDmaUnmap)?; 4166 } 4167 } 4168 } 4169 } 4170 4171 ( 4172 Arc::clone(&virtio_pci_device) as Arc<Mutex<dyn PciDevice>>, 4173 Arc::clone(&virtio_pci_device) as Arc<dyn BusDeviceSync>, 4174 Some(dev.virtio_device()), 4175 dev.dma_handler().is_some() && !iommu_attached, 4176 ) 4177 } 4178 PciDeviceHandle::VfioUser(vfio_user_pci_device) => { 4179 let mut dev = vfio_user_pci_device.lock().unwrap(); 4180 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() { 4181 for region in zone.regions() { 4182 dev.dma_unmap(region) 4183 .map_err(DeviceManagerError::VfioUserDmaUnmap)?; 4184 } 4185 } 4186 4187 ( 4188 Arc::clone(&vfio_user_pci_device) as Arc<Mutex<dyn PciDevice>>, 4189 Arc::clone(&vfio_user_pci_device) as Arc<dyn BusDeviceSync>, 4190 None as Option<Arc<Mutex<dyn virtio_devices::VirtioDevice>>>, 4191 true, 4192 ) 4193 } 4194 }; 4195 4196 if remove_dma_handler { 4197 for virtio_mem_device in self.virtio_mem_devices.iter() { 4198 virtio_mem_device 4199 .lock() 4200 .unwrap() 4201 .remove_dma_mapping_handler(VirtioMemMappingSource::Device( 4202 pci_device_bdf.into(), 4203 )) 4204 .map_err(DeviceManagerError::RemoveDmaMappingHandlerVirtioMem)?; 4205 } 4206 } 4207 4208 // Free the allocated BARs 4209 pci_device 4210 .lock() 4211 .unwrap() 4212 .free_bars( 4213 &mut self.address_manager.allocator.lock().unwrap(), 4214 &mut self.pci_segments[pci_segment_id as usize] 4215 .mem32_allocator 4216 .lock() 4217 .unwrap(), 4218 &mut self.pci_segments[pci_segment_id as usize] 4219 .mem64_allocator 4220 .lock() 4221 .unwrap(), 4222 ) 4223 .map_err(DeviceManagerError::FreePciBars)?; 4224 4225 // Remove the device from the PCI bus 4226 self.pci_segments[pci_segment_id as usize] 4227 .pci_bus 4228 .lock() 4229 .unwrap() 4230 .remove_by_device(&pci_device) 4231 .map_err(DeviceManagerError::RemoveDeviceFromPciBus)?; 4232 4233 #[cfg(target_arch = "x86_64")] 4234 // Remove the device from the IO bus 4235 self.io_bus() 4236 .remove_by_device(&bus_device) 4237 .map_err(DeviceManagerError::RemoveDeviceFromIoBus)?; 4238 4239 // Remove the device from the MMIO bus 4240 self.mmio_bus() 4241 .remove_by_device(&bus_device) 4242 .map_err(DeviceManagerError::RemoveDeviceFromMmioBus)?; 4243 4244 // Remove the device from the list of BusDevice held by the 4245 // DeviceManager. 4246 self.bus_devices 4247 .retain(|dev| !Arc::ptr_eq(dev, &bus_device)); 4248 4249 // Shutdown and remove the underlying virtio-device if present 4250 if let Some(virtio_device) = virtio_device { 4251 for mapping in virtio_device.lock().unwrap().userspace_mappings() { 4252 self.memory_manager 4253 .lock() 4254 .unwrap() 4255 .remove_userspace_mapping( 4256 mapping.addr.raw_value(), 4257 mapping.len, 4258 mapping.host_addr, 4259 mapping.mergeable, 4260 mapping.mem_slot, 4261 ) 4262 .map_err(DeviceManagerError::MemoryManager)?; 4263 } 4264 4265 virtio_device.lock().unwrap().shutdown(); 4266 4267 self.virtio_devices 4268 .retain(|handler| !Arc::ptr_eq(&handler.virtio_device, &virtio_device)); 4269 } 4270 4271 event!( 4272 "vm", 4273 "device-removed", 4274 "id", 4275 &id, 4276 "bdf", 4277 pci_device_bdf.to_string() 4278 ); 4279 4280 // At this point, the device has been removed from all the list and 4281 // buses where it was stored. At the end of this function, after 4282 // any_device, bus_device and pci_device are released, the actual 4283 // device will be dropped. 4284 Ok(()) 4285 } 4286 4287 fn hotplug_virtio_pci_device( 4288 &mut self, 4289 handle: MetaVirtioDevice, 4290 ) -> DeviceManagerResult<PciDeviceInfo> { 4291 // Add the virtio device to the device manager list. This is important 4292 // as the list is used to notify virtio devices about memory updates 4293 // for instance. 4294 self.virtio_devices.push(handle.clone()); 4295 4296 let mapping: Option<Arc<IommuMapping>> = if handle.iommu { 4297 self.iommu_mapping.clone() 4298 } else { 4299 None 4300 }; 4301 4302 let bdf = self.add_virtio_pci_device( 4303 handle.virtio_device, 4304 &mapping, 4305 handle.id.clone(), 4306 handle.pci_segment, 4307 handle.dma_handler, 4308 )?; 4309 4310 // Update the PCIU bitmap 4311 self.pci_segments[handle.pci_segment as usize].pci_devices_up |= 1 << bdf.device(); 4312 4313 Ok(PciDeviceInfo { id: handle.id, bdf }) 4314 } 4315 4316 fn is_iommu_segment(&self, pci_segment_id: u16) -> bool { 4317 self.config 4318 .lock() 4319 .as_ref() 4320 .unwrap() 4321 .platform 4322 .as_ref() 4323 .map(|pc| { 4324 pc.iommu_segments 4325 .as_ref() 4326 .map(|v| v.contains(&pci_segment_id)) 4327 .unwrap_or_default() 4328 }) 4329 .unwrap_or_default() 4330 } 4331 4332 pub fn add_disk(&mut self, disk_cfg: &mut DiskConfig) -> DeviceManagerResult<PciDeviceInfo> { 4333 self.validate_identifier(&disk_cfg.id)?; 4334 4335 if disk_cfg.iommu && !self.is_iommu_segment(disk_cfg.pci_segment) { 4336 return Err(DeviceManagerError::InvalidIommuHotplug); 4337 } 4338 4339 let device = self.make_virtio_block_device(disk_cfg)?; 4340 self.hotplug_virtio_pci_device(device) 4341 } 4342 4343 pub fn add_fs(&mut self, fs_cfg: &mut FsConfig) -> DeviceManagerResult<PciDeviceInfo> { 4344 self.validate_identifier(&fs_cfg.id)?; 4345 4346 let device = self.make_virtio_fs_device(fs_cfg)?; 4347 self.hotplug_virtio_pci_device(device) 4348 } 4349 4350 pub fn add_pmem(&mut self, pmem_cfg: &mut PmemConfig) -> DeviceManagerResult<PciDeviceInfo> { 4351 self.validate_identifier(&pmem_cfg.id)?; 4352 4353 if pmem_cfg.iommu && !self.is_iommu_segment(pmem_cfg.pci_segment) { 4354 return Err(DeviceManagerError::InvalidIommuHotplug); 4355 } 4356 4357 let device = self.make_virtio_pmem_device(pmem_cfg)?; 4358 self.hotplug_virtio_pci_device(device) 4359 } 4360 4361 pub fn add_net(&mut self, net_cfg: &mut NetConfig) -> DeviceManagerResult<PciDeviceInfo> { 4362 self.validate_identifier(&net_cfg.id)?; 4363 4364 if net_cfg.iommu && !self.is_iommu_segment(net_cfg.pci_segment) { 4365 return Err(DeviceManagerError::InvalidIommuHotplug); 4366 } 4367 4368 let device = self.make_virtio_net_device(net_cfg)?; 4369 self.hotplug_virtio_pci_device(device) 4370 } 4371 4372 pub fn add_vdpa(&mut self, vdpa_cfg: &mut VdpaConfig) -> DeviceManagerResult<PciDeviceInfo> { 4373 self.validate_identifier(&vdpa_cfg.id)?; 4374 4375 if vdpa_cfg.iommu && !self.is_iommu_segment(vdpa_cfg.pci_segment) { 4376 return Err(DeviceManagerError::InvalidIommuHotplug); 4377 } 4378 4379 let device = self.make_vdpa_device(vdpa_cfg)?; 4380 self.hotplug_virtio_pci_device(device) 4381 } 4382 4383 pub fn add_vsock(&mut self, vsock_cfg: &mut VsockConfig) -> DeviceManagerResult<PciDeviceInfo> { 4384 self.validate_identifier(&vsock_cfg.id)?; 4385 4386 if vsock_cfg.iommu && !self.is_iommu_segment(vsock_cfg.pci_segment) { 4387 return Err(DeviceManagerError::InvalidIommuHotplug); 4388 } 4389 4390 let device = self.make_virtio_vsock_device(vsock_cfg)?; 4391 self.hotplug_virtio_pci_device(device) 4392 } 4393 4394 pub fn counters(&self) -> HashMap<String, HashMap<&'static str, Wrapping<u64>>> { 4395 let mut counters = HashMap::new(); 4396 4397 for handle in &self.virtio_devices { 4398 let virtio_device = handle.virtio_device.lock().unwrap(); 4399 if let Some(device_counters) = virtio_device.counters() { 4400 counters.insert(handle.id.clone(), device_counters.clone()); 4401 } 4402 } 4403 4404 counters 4405 } 4406 4407 pub fn resize_balloon(&mut self, size: u64) -> DeviceManagerResult<()> { 4408 if let Some(balloon) = &self.balloon { 4409 return balloon 4410 .lock() 4411 .unwrap() 4412 .resize(size) 4413 .map_err(DeviceManagerError::VirtioBalloonResize); 4414 } 4415 4416 warn!("No balloon setup: Can't resize the balloon"); 4417 Err(DeviceManagerError::MissingVirtioBalloon) 4418 } 4419 4420 pub fn balloon_size(&self) -> u64 { 4421 if let Some(balloon) = &self.balloon { 4422 return balloon.lock().unwrap().get_actual(); 4423 } 4424 4425 0 4426 } 4427 4428 pub fn device_tree(&self) -> Arc<Mutex<DeviceTree>> { 4429 self.device_tree.clone() 4430 } 4431 4432 #[cfg(target_arch = "x86_64")] 4433 pub fn notify_power_button(&self) -> DeviceManagerResult<()> { 4434 self.ged_notification_device 4435 .as_ref() 4436 .unwrap() 4437 .lock() 4438 .unwrap() 4439 .notify(AcpiNotificationFlags::POWER_BUTTON_CHANGED) 4440 .map_err(DeviceManagerError::PowerButtonNotification) 4441 } 4442 4443 #[cfg(target_arch = "aarch64")] 4444 pub fn notify_power_button(&self) -> DeviceManagerResult<()> { 4445 // There are two use cases: 4446 // 1. Users will use direct kernel boot with device tree. 4447 // 2. Users will use ACPI+UEFI boot. 4448 4449 // Trigger a GPIO pin 3 event to satisfy use case 1. 4450 self.gpio_device 4451 .as_ref() 4452 .unwrap() 4453 .lock() 4454 .unwrap() 4455 .trigger_key(3) 4456 .map_err(DeviceManagerError::AArch64PowerButtonNotification)?; 4457 // Trigger a GED power button event to satisfy use case 2. 4458 return self 4459 .ged_notification_device 4460 .as_ref() 4461 .unwrap() 4462 .lock() 4463 .unwrap() 4464 .notify(AcpiNotificationFlags::POWER_BUTTON_CHANGED) 4465 .map_err(DeviceManagerError::PowerButtonNotification); 4466 } 4467 4468 pub fn iommu_attached_devices(&self) -> &Option<(PciBdf, Vec<PciBdf>)> { 4469 &self.iommu_attached_devices 4470 } 4471 4472 fn validate_identifier(&self, id: &Option<String>) -> DeviceManagerResult<()> { 4473 if let Some(id) = id { 4474 if id.starts_with("__") { 4475 return Err(DeviceManagerError::InvalidIdentifier(id.clone())); 4476 } 4477 4478 if self.device_tree.lock().unwrap().contains_key(id) { 4479 return Err(DeviceManagerError::IdentifierNotUnique(id.clone())); 4480 } 4481 } 4482 4483 Ok(()) 4484 } 4485 4486 pub(crate) fn acpi_platform_addresses(&self) -> &AcpiPlatformAddresses { 4487 &self.acpi_platform_addresses 4488 } 4489 } 4490 4491 fn numa_node_id_from_memory_zone_id(numa_nodes: &NumaNodes, memory_zone_id: &str) -> Option<u32> { 4492 for (numa_node_id, numa_node) in numa_nodes.iter() { 4493 if numa_node.memory_zones.contains(&memory_zone_id.to_owned()) { 4494 return Some(*numa_node_id); 4495 } 4496 } 4497 4498 None 4499 } 4500 4501 fn numa_node_id_from_pci_segment_id(numa_nodes: &NumaNodes, pci_segment_id: u16) -> u32 { 4502 for (numa_node_id, numa_node) in numa_nodes.iter() { 4503 if numa_node.pci_segments.contains(&pci_segment_id) { 4504 return *numa_node_id; 4505 } 4506 } 4507 4508 0 4509 } 4510 4511 struct TpmDevice {} 4512 4513 impl Aml for TpmDevice { 4514 fn to_aml_bytes(&self, sink: &mut dyn acpi_tables::AmlSink) { 4515 aml::Device::new( 4516 "TPM2".into(), 4517 vec![ 4518 &aml::Name::new("_HID".into(), &"MSFT0101"), 4519 &aml::Name::new("_STA".into(), &(0xF_usize)), 4520 &aml::Name::new( 4521 "_CRS".into(), 4522 &aml::ResourceTemplate::new(vec![&aml::Memory32Fixed::new( 4523 true, 4524 layout::TPM_START.0 as u32, 4525 layout::TPM_SIZE as u32, 4526 )]), 4527 ), 4528 ], 4529 ) 4530 .to_aml_bytes(sink) 4531 } 4532 } 4533 4534 impl Aml for DeviceManager { 4535 fn to_aml_bytes(&self, sink: &mut dyn acpi_tables::AmlSink) { 4536 #[cfg(target_arch = "aarch64")] 4537 use arch::aarch64::DeviceInfoForFdt; 4538 4539 let mut pci_scan_methods = Vec::new(); 4540 for i in 0..self.pci_segments.len() { 4541 pci_scan_methods.push(aml::MethodCall::new( 4542 format!("\\_SB_.PC{i:02X}.PCNT").as_str().into(), 4543 vec![], 4544 )); 4545 } 4546 let mut pci_scan_inner: Vec<&dyn Aml> = Vec::new(); 4547 for method in &pci_scan_methods { 4548 pci_scan_inner.push(method) 4549 } 4550 4551 // PCI hotplug controller 4552 aml::Device::new( 4553 "_SB_.PHPR".into(), 4554 vec![ 4555 &aml::Name::new("_HID".into(), &aml::EISAName::new("PNP0A06")), 4556 &aml::Name::new("_STA".into(), &0x0bu8), 4557 &aml::Name::new("_UID".into(), &"PCI Hotplug Controller"), 4558 &aml::Mutex::new("BLCK".into(), 0), 4559 &aml::Name::new( 4560 "_CRS".into(), 4561 &aml::ResourceTemplate::new(vec![&aml::AddressSpace::new_memory( 4562 aml::AddressSpaceCacheable::NotCacheable, 4563 true, 4564 self.acpi_address.0, 4565 self.acpi_address.0 + DEVICE_MANAGER_ACPI_SIZE as u64 - 1, 4566 None, 4567 )]), 4568 ), 4569 // OpRegion and Fields map MMIO range into individual field values 4570 &aml::OpRegion::new( 4571 "PCST".into(), 4572 aml::OpRegionSpace::SystemMemory, 4573 &(self.acpi_address.0 as usize), 4574 &DEVICE_MANAGER_ACPI_SIZE, 4575 ), 4576 &aml::Field::new( 4577 "PCST".into(), 4578 aml::FieldAccessType::DWord, 4579 aml::FieldLockRule::NoLock, 4580 aml::FieldUpdateRule::WriteAsZeroes, 4581 vec![ 4582 aml::FieldEntry::Named(*b"PCIU", 32), 4583 aml::FieldEntry::Named(*b"PCID", 32), 4584 aml::FieldEntry::Named(*b"B0EJ", 32), 4585 aml::FieldEntry::Named(*b"PSEG", 32), 4586 ], 4587 ), 4588 &aml::Method::new( 4589 "PCEJ".into(), 4590 2, 4591 true, 4592 vec![ 4593 // Take lock defined above 4594 &aml::Acquire::new("BLCK".into(), 0xffff), 4595 // Choose the current segment 4596 &aml::Store::new(&aml::Path::new("PSEG"), &aml::Arg(1)), 4597 // Write PCI bus number (in first argument) to I/O port via field 4598 &aml::ShiftLeft::new(&aml::Path::new("B0EJ"), &aml::ONE, &aml::Arg(0)), 4599 // Release lock 4600 &aml::Release::new("BLCK".into()), 4601 // Return 0 4602 &aml::Return::new(&aml::ZERO), 4603 ], 4604 ), 4605 &aml::Method::new("PSCN".into(), 0, true, pci_scan_inner), 4606 ], 4607 ) 4608 .to_aml_bytes(sink); 4609 4610 for segment in &self.pci_segments { 4611 segment.to_aml_bytes(sink); 4612 } 4613 4614 let mut mbrd_memory = Vec::new(); 4615 4616 for segment in &self.pci_segments { 4617 mbrd_memory.push(aml::Memory32Fixed::new( 4618 true, 4619 segment.mmio_config_address as u32, 4620 layout::PCI_MMIO_CONFIG_SIZE_PER_SEGMENT as u32, 4621 )) 4622 } 4623 4624 let mut mbrd_memory_refs = Vec::new(); 4625 for mbrd_memory_ref in &mbrd_memory { 4626 mbrd_memory_refs.push(mbrd_memory_ref as &dyn Aml); 4627 } 4628 4629 aml::Device::new( 4630 "_SB_.MBRD".into(), 4631 vec![ 4632 &aml::Name::new("_HID".into(), &aml::EISAName::new("PNP0C02")), 4633 &aml::Name::new("_UID".into(), &aml::ZERO), 4634 &aml::Name::new("_CRS".into(), &aml::ResourceTemplate::new(mbrd_memory_refs)), 4635 ], 4636 ) 4637 .to_aml_bytes(sink); 4638 4639 // Serial device 4640 #[cfg(target_arch = "x86_64")] 4641 let serial_irq = 4; 4642 #[cfg(target_arch = "aarch64")] 4643 let serial_irq = 4644 if self.config.lock().unwrap().serial.clone().mode != ConsoleOutputMode::Off { 4645 self.get_device_info() 4646 .clone() 4647 .get(&(DeviceType::Serial, DeviceType::Serial.to_string())) 4648 .unwrap() 4649 .irq() 4650 } else { 4651 // If serial is turned off, add a fake device with invalid irq. 4652 31 4653 }; 4654 if self.config.lock().unwrap().serial.mode != ConsoleOutputMode::Off { 4655 aml::Device::new( 4656 "_SB_.COM1".into(), 4657 vec![ 4658 &aml::Name::new( 4659 "_HID".into(), 4660 #[cfg(target_arch = "x86_64")] 4661 &aml::EISAName::new("PNP0501"), 4662 #[cfg(target_arch = "aarch64")] 4663 &"ARMH0011", 4664 ), 4665 &aml::Name::new("_UID".into(), &aml::ZERO), 4666 &aml::Name::new("_DDN".into(), &"COM1"), 4667 &aml::Name::new( 4668 "_CRS".into(), 4669 &aml::ResourceTemplate::new(vec![ 4670 &aml::Interrupt::new(true, true, false, false, serial_irq), 4671 #[cfg(target_arch = "x86_64")] 4672 &aml::IO::new(0x3f8, 0x3f8, 0, 0x8), 4673 #[cfg(target_arch = "aarch64")] 4674 &aml::Memory32Fixed::new( 4675 true, 4676 arch::layout::LEGACY_SERIAL_MAPPED_IO_START.raw_value() as u32, 4677 MMIO_LEN as u32, 4678 ), 4679 ]), 4680 ), 4681 ], 4682 ) 4683 .to_aml_bytes(sink); 4684 } 4685 4686 aml::Name::new("_S5_".into(), &aml::Package::new(vec![&5u8])).to_aml_bytes(sink); 4687 4688 aml::Device::new( 4689 "_SB_.PWRB".into(), 4690 vec![ 4691 &aml::Name::new("_HID".into(), &aml::EISAName::new("PNP0C0C")), 4692 &aml::Name::new("_UID".into(), &aml::ZERO), 4693 ], 4694 ) 4695 .to_aml_bytes(sink); 4696 4697 if self.config.lock().unwrap().tpm.is_some() { 4698 // Add tpm device 4699 TpmDevice {}.to_aml_bytes(sink); 4700 } 4701 4702 self.ged_notification_device 4703 .as_ref() 4704 .unwrap() 4705 .lock() 4706 .unwrap() 4707 .to_aml_bytes(sink) 4708 } 4709 } 4710 4711 impl Pausable for DeviceManager { 4712 fn pause(&mut self) -> result::Result<(), MigratableError> { 4713 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4714 if let Some(migratable) = &device_node.migratable { 4715 migratable.lock().unwrap().pause()?; 4716 } 4717 } 4718 // On AArch64, the pause of device manager needs to trigger 4719 // a "pause" of GIC, which will flush the GIC pending tables 4720 // and ITS tables to guest RAM. 4721 #[cfg(target_arch = "aarch64")] 4722 { 4723 self.get_interrupt_controller() 4724 .unwrap() 4725 .lock() 4726 .unwrap() 4727 .pause()?; 4728 }; 4729 4730 Ok(()) 4731 } 4732 4733 fn resume(&mut self) -> result::Result<(), MigratableError> { 4734 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4735 if let Some(migratable) = &device_node.migratable { 4736 migratable.lock().unwrap().resume()?; 4737 } 4738 } 4739 4740 Ok(()) 4741 } 4742 } 4743 4744 impl Snapshottable for DeviceManager { 4745 fn id(&self) -> String { 4746 DEVICE_MANAGER_SNAPSHOT_ID.to_string() 4747 } 4748 4749 fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> { 4750 let mut snapshot = Snapshot::from_data(SnapshotData::new_from_state(&self.state())?); 4751 4752 // We aggregate all devices snapshots. 4753 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4754 if let Some(migratable) = &device_node.migratable { 4755 let mut migratable = migratable.lock().unwrap(); 4756 snapshot.add_snapshot(migratable.id(), migratable.snapshot()?); 4757 } 4758 } 4759 4760 Ok(snapshot) 4761 } 4762 } 4763 4764 impl Transportable for DeviceManager {} 4765 4766 impl Migratable for DeviceManager { 4767 fn start_dirty_log(&mut self) -> std::result::Result<(), MigratableError> { 4768 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4769 if let Some(migratable) = &device_node.migratable { 4770 migratable.lock().unwrap().start_dirty_log()?; 4771 } 4772 } 4773 Ok(()) 4774 } 4775 4776 fn stop_dirty_log(&mut self) -> std::result::Result<(), MigratableError> { 4777 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4778 if let Some(migratable) = &device_node.migratable { 4779 migratable.lock().unwrap().stop_dirty_log()?; 4780 } 4781 } 4782 Ok(()) 4783 } 4784 4785 fn dirty_log(&mut self) -> std::result::Result<MemoryRangeTable, MigratableError> { 4786 let mut tables = Vec::new(); 4787 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4788 if let Some(migratable) = &device_node.migratable { 4789 tables.push(migratable.lock().unwrap().dirty_log()?); 4790 } 4791 } 4792 Ok(MemoryRangeTable::new_from_tables(tables)) 4793 } 4794 4795 fn start_migration(&mut self) -> std::result::Result<(), MigratableError> { 4796 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4797 if let Some(migratable) = &device_node.migratable { 4798 migratable.lock().unwrap().start_migration()?; 4799 } 4800 } 4801 Ok(()) 4802 } 4803 4804 fn complete_migration(&mut self) -> std::result::Result<(), MigratableError> { 4805 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4806 if let Some(migratable) = &device_node.migratable { 4807 migratable.lock().unwrap().complete_migration()?; 4808 } 4809 } 4810 Ok(()) 4811 } 4812 } 4813 4814 const PCIU_FIELD_OFFSET: u64 = 0; 4815 const PCID_FIELD_OFFSET: u64 = 4; 4816 const B0EJ_FIELD_OFFSET: u64 = 8; 4817 const PSEG_FIELD_OFFSET: u64 = 12; 4818 const PCIU_FIELD_SIZE: usize = 4; 4819 const PCID_FIELD_SIZE: usize = 4; 4820 const B0EJ_FIELD_SIZE: usize = 4; 4821 const PSEG_FIELD_SIZE: usize = 4; 4822 4823 impl BusDevice for DeviceManager { 4824 fn read(&mut self, base: u64, offset: u64, data: &mut [u8]) { 4825 match offset { 4826 PCIU_FIELD_OFFSET => { 4827 assert!(data.len() == PCIU_FIELD_SIZE); 4828 data.copy_from_slice( 4829 &self.pci_segments[self.selected_segment] 4830 .pci_devices_up 4831 .to_le_bytes(), 4832 ); 4833 // Clear the PCIU bitmap 4834 self.pci_segments[self.selected_segment].pci_devices_up = 0; 4835 } 4836 PCID_FIELD_OFFSET => { 4837 assert!(data.len() == PCID_FIELD_SIZE); 4838 data.copy_from_slice( 4839 &self.pci_segments[self.selected_segment] 4840 .pci_devices_down 4841 .to_le_bytes(), 4842 ); 4843 // Clear the PCID bitmap 4844 self.pci_segments[self.selected_segment].pci_devices_down = 0; 4845 } 4846 B0EJ_FIELD_OFFSET => { 4847 assert!(data.len() == B0EJ_FIELD_SIZE); 4848 // Always return an empty bitmap since the eject is always 4849 // taken care of right away during a write access. 4850 data.fill(0); 4851 } 4852 PSEG_FIELD_OFFSET => { 4853 assert_eq!(data.len(), PSEG_FIELD_SIZE); 4854 data.copy_from_slice(&(self.selected_segment as u32).to_le_bytes()); 4855 } 4856 _ => error!( 4857 "Accessing unknown location at base 0x{:x}, offset 0x{:x}", 4858 base, offset 4859 ), 4860 } 4861 4862 debug!( 4863 "PCI_HP_REG_R: base 0x{:x}, offset 0x{:x}, data {:?}", 4864 base, offset, data 4865 ) 4866 } 4867 4868 fn write(&mut self, base: u64, offset: u64, data: &[u8]) -> Option<Arc<std::sync::Barrier>> { 4869 match offset { 4870 B0EJ_FIELD_OFFSET => { 4871 assert!(data.len() == B0EJ_FIELD_SIZE); 4872 let mut data_array: [u8; 4] = [0, 0, 0, 0]; 4873 data_array.copy_from_slice(data); 4874 let mut slot_bitmap = u32::from_le_bytes(data_array); 4875 4876 while slot_bitmap > 0 { 4877 let slot_id = slot_bitmap.trailing_zeros(); 4878 if let Err(e) = self.eject_device(self.selected_segment as u16, slot_id as u8) { 4879 error!("Failed ejecting device {}: {:?}", slot_id, e); 4880 } 4881 slot_bitmap &= !(1 << slot_id); 4882 } 4883 } 4884 PSEG_FIELD_OFFSET => { 4885 assert_eq!(data.len(), PSEG_FIELD_SIZE); 4886 let mut data_array: [u8; 4] = [0, 0, 0, 0]; 4887 data_array.copy_from_slice(data); 4888 let selected_segment = u32::from_le_bytes(data_array) as usize; 4889 if selected_segment >= self.pci_segments.len() { 4890 error!( 4891 "Segment selection out of range: {} >= {}", 4892 selected_segment, 4893 self.pci_segments.len() 4894 ); 4895 return None; 4896 } 4897 self.selected_segment = selected_segment; 4898 } 4899 _ => error!( 4900 "Accessing unknown location at base 0x{:x}, offset 0x{:x}", 4901 base, offset 4902 ), 4903 } 4904 4905 debug!( 4906 "PCI_HP_REG_W: base 0x{:x}, offset 0x{:x}, data {:?}", 4907 base, offset, data 4908 ); 4909 4910 None 4911 } 4912 } 4913 4914 impl Drop for DeviceManager { 4915 fn drop(&mut self) { 4916 // Wake up the DeviceManager threads (mainly virtio device workers), 4917 // to avoid deadlock on waiting for paused/parked worker threads. 4918 if let Err(e) = self.resume() { 4919 error!("Error resuming DeviceManager: {:?}", e); 4920 } 4921 4922 for handle in self.virtio_devices.drain(..) { 4923 handle.virtio_device.lock().unwrap().shutdown(); 4924 } 4925 4926 if let Some(termios) = *self.original_termios_opt.lock().unwrap() { 4927 // SAFETY: FFI call 4928 let _ = unsafe { tcsetattr(stdout().lock().as_raw_fd(), TCSANOW, &termios) }; 4929 } 4930 } 4931 } 4932 4933 #[cfg(test)] 4934 mod tests { 4935 use super::*; 4936 4937 #[test] 4938 fn test_create_mmio_allocators() { 4939 let res = create_mmio_allocators(0x100000, 0x400000, 1, vec![1], 4 << 10); 4940 assert_eq!(res.len(), 1); 4941 assert_eq!( 4942 res[0].lock().unwrap().base(), 4943 vm_memory::GuestAddress(0x100000) 4944 ); 4945 assert_eq!( 4946 res[0].lock().unwrap().end(), 4947 vm_memory::GuestAddress(0x3fffff) 4948 ); 4949 4950 let res = create_mmio_allocators(0x100000, 0x400000, 2, vec![1, 1], 4 << 10); 4951 assert_eq!(res.len(), 2); 4952 assert_eq!( 4953 res[0].lock().unwrap().base(), 4954 vm_memory::GuestAddress(0x100000) 4955 ); 4956 assert_eq!( 4957 res[0].lock().unwrap().end(), 4958 vm_memory::GuestAddress(0x27ffff) 4959 ); 4960 assert_eq!( 4961 res[1].lock().unwrap().base(), 4962 vm_memory::GuestAddress(0x280000) 4963 ); 4964 assert_eq!( 4965 res[1].lock().unwrap().end(), 4966 vm_memory::GuestAddress(0x3fffff) 4967 ); 4968 4969 let res = create_mmio_allocators(0x100000, 0x400000, 2, vec![2, 1], 4 << 10); 4970 assert_eq!(res.len(), 2); 4971 assert_eq!( 4972 res[0].lock().unwrap().base(), 4973 vm_memory::GuestAddress(0x100000) 4974 ); 4975 assert_eq!( 4976 res[0].lock().unwrap().end(), 4977 vm_memory::GuestAddress(0x2fffff) 4978 ); 4979 assert_eq!( 4980 res[1].lock().unwrap().base(), 4981 vm_memory::GuestAddress(0x300000) 4982 ); 4983 assert_eq!( 4984 res[1].lock().unwrap().end(), 4985 vm_memory::GuestAddress(0x3fffff) 4986 ); 4987 } 4988 } 4989