1 // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 // 3 // Portions Copyright 2017 The Chromium OS Authors. All rights reserved. 4 // Use of this source code is governed by a BSD-style license that can be 5 // found in the LICENSE-BSD-3-Clause file. 6 // 7 // Copyright © 2019 Intel Corporation 8 // 9 // SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause 10 // 11 12 use crate::config::{ 13 ConsoleOutputMode, DeviceConfig, DiskConfig, FsConfig, NetConfig, PmemConfig, UserDeviceConfig, 14 VdpaConfig, VhostMode, VmConfig, VsockConfig, 15 }; 16 use crate::console_devices::{ConsoleDeviceError, ConsoleInfo, ConsoleOutput}; 17 use crate::cpu::{CpuManager, CPU_MANAGER_ACPI_SIZE}; 18 use crate::device_tree::{DeviceNode, DeviceTree}; 19 use crate::interrupt::LegacyUserspaceInterruptManager; 20 use crate::interrupt::MsiInterruptManager; 21 use crate::memory_manager::{Error as MemoryManagerError, MemoryManager, MEMORY_MANAGER_ACPI_SIZE}; 22 use crate::pci_segment::PciSegment; 23 use crate::serial_manager::{Error as SerialManagerError, SerialManager}; 24 use crate::vm_config::DEFAULT_PCI_SEGMENT_APERTURE_WEIGHT; 25 use crate::GuestRegionMmap; 26 use crate::PciDeviceInfo; 27 use crate::{device_node, DEVICE_MANAGER_SNAPSHOT_ID}; 28 use acpi_tables::sdt::GenericAddress; 29 use acpi_tables::{aml, Aml}; 30 use anyhow::anyhow; 31 use arch::layout; 32 #[cfg(target_arch = "x86_64")] 33 use arch::layout::{APIC_START, IOAPIC_SIZE, IOAPIC_START}; 34 use arch::NumaNodes; 35 #[cfg(target_arch = "aarch64")] 36 use arch::{DeviceType, MmioDeviceInfo}; 37 use block::{ 38 async_io::DiskFile, block_aio_is_supported, block_io_uring_is_supported, detect_image_type, 39 fixed_vhd_sync::FixedVhdDiskSync, qcow, qcow_sync::QcowDiskSync, raw_async_aio::RawFileDiskAio, 40 raw_sync::RawFileDiskSync, vhdx, vhdx_sync::VhdxDiskSync, ImageType, 41 }; 42 #[cfg(feature = "io_uring")] 43 use block::{fixed_vhd_async::FixedVhdDiskAsync, raw_async::RawFileDisk}; 44 #[cfg(target_arch = "x86_64")] 45 use devices::debug_console::DebugConsole; 46 #[cfg(target_arch = "aarch64")] 47 use devices::gic; 48 #[cfg(target_arch = "x86_64")] 49 use devices::ioapic; 50 #[cfg(target_arch = "aarch64")] 51 use devices::legacy::Pl011; 52 #[cfg(feature = "pvmemcontrol")] 53 use devices::pvmemcontrol::{PvmemcontrolBusDevice, PvmemcontrolPciDevice}; 54 use devices::{ 55 interrupt_controller, interrupt_controller::InterruptController, AcpiNotificationFlags, 56 }; 57 use hypervisor::IoEventAddress; 58 use libc::{ 59 tcsetattr, termios, MAP_NORESERVE, MAP_PRIVATE, MAP_SHARED, O_TMPFILE, PROT_READ, PROT_WRITE, 60 TCSANOW, 61 }; 62 use pci::{ 63 DeviceRelocation, MmioRegion, PciBarRegionType, PciBdf, PciDevice, VfioDmaMapping, 64 VfioPciDevice, VfioUserDmaMapping, VfioUserPciDevice, VfioUserPciDeviceError, 65 }; 66 use rate_limiter::group::RateLimiterGroup; 67 use seccompiler::SeccompAction; 68 use serde::{Deserialize, Serialize}; 69 use std::collections::{BTreeMap, BTreeSet, HashMap}; 70 use std::fs::{File, OpenOptions}; 71 use std::io::{self, stdout, IsTerminal, Seek, SeekFrom}; 72 use std::num::Wrapping; 73 use std::os::unix::fs::OpenOptionsExt; 74 use std::os::unix::io::{AsRawFd, FromRawFd}; 75 use std::path::PathBuf; 76 use std::result; 77 use std::sync::{Arc, Mutex}; 78 use std::time::Instant; 79 use tracer::trace_scoped; 80 use vfio_ioctls::{VfioContainer, VfioDevice, VfioDeviceFd}; 81 use virtio_devices::transport::VirtioTransport; 82 use virtio_devices::transport::{VirtioPciDevice, VirtioPciDeviceActivator}; 83 use virtio_devices::vhost_user::VhostUserConfig; 84 use virtio_devices::{ 85 AccessPlatformMapping, ActivateError, VdpaDmaMapping, VirtioMemMappingSource, 86 }; 87 use virtio_devices::{Endpoint, IommuMapping}; 88 use vm_allocator::{AddressAllocator, SystemAllocator}; 89 use vm_device::dma_mapping::ExternalDmaMapping; 90 use vm_device::interrupt::{ 91 InterruptIndex, InterruptManager, LegacyIrqGroupConfig, MsiIrqGroupConfig, 92 }; 93 use vm_device::{Bus, BusDevice, BusDeviceSync, Resource}; 94 use vm_memory::guest_memory::FileOffset; 95 use vm_memory::GuestMemoryRegion; 96 use vm_memory::{Address, GuestAddress, GuestUsize, MmapRegion}; 97 #[cfg(target_arch = "x86_64")] 98 use vm_memory::{GuestAddressSpace, GuestMemory}; 99 use vm_migration::{ 100 protocol::MemoryRangeTable, snapshot_from_id, state_from_id, Migratable, MigratableError, 101 Pausable, Snapshot, SnapshotData, Snapshottable, Transportable, 102 }; 103 use vm_virtio::AccessPlatform; 104 use vm_virtio::VirtioDeviceType; 105 use vmm_sys_util::eventfd::EventFd; 106 #[cfg(target_arch = "x86_64")] 107 use {devices::debug_console, devices::legacy::Serial}; 108 109 #[cfg(target_arch = "aarch64")] 110 const MMIO_LEN: u64 = 0x1000; 111 112 // Singleton devices / devices the user cannot name 113 #[cfg(target_arch = "x86_64")] 114 const IOAPIC_DEVICE_NAME: &str = "__ioapic"; 115 const SERIAL_DEVICE_NAME: &str = "__serial"; 116 #[cfg(target_arch = "x86_64")] 117 const DEBUGCON_DEVICE_NAME: &str = "__debug_console"; 118 #[cfg(target_arch = "aarch64")] 119 const GPIO_DEVICE_NAME: &str = "__gpio"; 120 const RNG_DEVICE_NAME: &str = "__rng"; 121 const IOMMU_DEVICE_NAME: &str = "__iommu"; 122 #[cfg(feature = "pvmemcontrol")] 123 const PVMEMCONTROL_DEVICE_NAME: &str = "__pvmemcontrol"; 124 const BALLOON_DEVICE_NAME: &str = "__balloon"; 125 const CONSOLE_DEVICE_NAME: &str = "__console"; 126 const PVPANIC_DEVICE_NAME: &str = "__pvpanic"; 127 128 // Devices that the user may name and for which we generate 129 // identifiers if the user doesn't give one 130 const DISK_DEVICE_NAME_PREFIX: &str = "_disk"; 131 const FS_DEVICE_NAME_PREFIX: &str = "_fs"; 132 const NET_DEVICE_NAME_PREFIX: &str = "_net"; 133 const PMEM_DEVICE_NAME_PREFIX: &str = "_pmem"; 134 const VDPA_DEVICE_NAME_PREFIX: &str = "_vdpa"; 135 const VSOCK_DEVICE_NAME_PREFIX: &str = "_vsock"; 136 const WATCHDOG_DEVICE_NAME: &str = "__watchdog"; 137 const VFIO_DEVICE_NAME_PREFIX: &str = "_vfio"; 138 const VFIO_USER_DEVICE_NAME_PREFIX: &str = "_vfio_user"; 139 const VIRTIO_PCI_DEVICE_NAME_PREFIX: &str = "_virtio-pci"; 140 141 /// Errors associated with device manager 142 #[derive(Debug)] 143 pub enum DeviceManagerError { 144 /// Cannot create EventFd. 145 EventFd(io::Error), 146 147 /// Cannot open disk path 148 Disk(io::Error), 149 150 /// Cannot create vhost-user-net device 151 CreateVhostUserNet(virtio_devices::vhost_user::Error), 152 153 /// Cannot create virtio-blk device 154 CreateVirtioBlock(io::Error), 155 156 /// Cannot create virtio-net device 157 CreateVirtioNet(virtio_devices::net::Error), 158 159 /// Cannot create virtio-console device 160 CreateVirtioConsole(io::Error), 161 162 /// Cannot create virtio-rng device 163 CreateVirtioRng(io::Error), 164 165 /// Cannot create virtio-fs device 166 CreateVirtioFs(virtio_devices::vhost_user::Error), 167 168 /// Virtio-fs device was created without a socket. 169 NoVirtioFsSock, 170 171 /// Cannot create vhost-user-blk device 172 CreateVhostUserBlk(virtio_devices::vhost_user::Error), 173 174 /// Cannot create virtio-pmem device 175 CreateVirtioPmem(io::Error), 176 177 /// Cannot create vDPA device 178 CreateVdpa(virtio_devices::vdpa::Error), 179 180 /// Cannot create virtio-vsock device 181 CreateVirtioVsock(io::Error), 182 183 /// Cannot create tpm device 184 CreateTpmDevice(anyhow::Error), 185 186 /// Failed to convert Path to &str for the vDPA device. 187 CreateVdpaConvertPath, 188 189 /// Failed to convert Path to &str for the virtio-vsock device. 190 CreateVsockConvertPath, 191 192 /// Cannot create virtio-vsock backend 193 CreateVsockBackend(virtio_devices::vsock::VsockUnixError), 194 195 /// Cannot create virtio-iommu device 196 CreateVirtioIommu(io::Error), 197 198 /// Cannot create virtio-balloon device 199 CreateVirtioBalloon(io::Error), 200 201 /// Cannot create pvmemcontrol device 202 #[cfg(feature = "pvmemcontrol")] 203 CreatePvmemcontrol(io::Error), 204 205 /// Cannot create virtio-watchdog device 206 CreateVirtioWatchdog(io::Error), 207 208 /// Failed to parse disk image format 209 DetectImageType(io::Error), 210 211 /// Cannot open qcow disk path 212 QcowDeviceCreate(qcow::Error), 213 214 /// Cannot create serial manager 215 CreateSerialManager(SerialManagerError), 216 217 /// Cannot spawn the serial manager thread 218 SpawnSerialManager(SerialManagerError), 219 220 /// Cannot open tap interface 221 OpenTap(net_util::TapError), 222 223 /// Cannot allocate IRQ. 224 AllocateIrq, 225 226 /// Cannot configure the IRQ. 227 Irq(vmm_sys_util::errno::Error), 228 229 /// Cannot allocate PCI BARs 230 AllocateBars(pci::PciDeviceError), 231 232 /// Could not free the BARs associated with a PCI device. 233 FreePciBars(pci::PciDeviceError), 234 235 /// Cannot register ioevent. 236 RegisterIoevent(anyhow::Error), 237 238 /// Cannot unregister ioevent. 239 UnRegisterIoevent(anyhow::Error), 240 241 /// Cannot create virtio device 242 VirtioDevice(virtio_devices::transport::VirtioPciDeviceError), 243 244 /// Cannot add PCI device 245 AddPciDevice(pci::PciRootError), 246 247 /// Cannot open persistent memory file 248 PmemFileOpen(io::Error), 249 250 /// Cannot set persistent memory file size 251 PmemFileSetLen(io::Error), 252 253 /// Cannot find a memory range for persistent memory 254 PmemRangeAllocation, 255 256 /// Cannot find a memory range for virtio-fs 257 FsRangeAllocation, 258 259 /// Error creating serial output file 260 SerialOutputFileOpen(io::Error), 261 262 #[cfg(target_arch = "x86_64")] 263 /// Error creating debug-console output file 264 DebugconOutputFileOpen(io::Error), 265 266 /// Error creating console output file 267 ConsoleOutputFileOpen(io::Error), 268 269 /// Error creating serial pty 270 SerialPtyOpen(io::Error), 271 272 /// Error creating console pty 273 ConsolePtyOpen(io::Error), 274 275 /// Error creating console pty 276 DebugconPtyOpen(io::Error), 277 278 /// Error setting pty raw mode 279 SetPtyRaw(ConsoleDeviceError), 280 281 /// Error getting pty peer 282 GetPtyPeer(vmm_sys_util::errno::Error), 283 284 /// Cannot create a VFIO device 285 VfioCreate(vfio_ioctls::VfioError), 286 287 /// Cannot create a VFIO PCI device 288 VfioPciCreate(pci::VfioPciError), 289 290 /// Failed to map VFIO MMIO region. 291 VfioMapRegion(pci::VfioPciError), 292 293 /// Failed to DMA map VFIO device. 294 VfioDmaMap(vfio_ioctls::VfioError), 295 296 /// Failed to DMA unmap VFIO device. 297 VfioDmaUnmap(pci::VfioPciError), 298 299 /// Failed to create the passthrough device. 300 CreatePassthroughDevice(anyhow::Error), 301 302 /// Failed to memory map. 303 Mmap(io::Error), 304 305 /// Cannot add legacy device to Bus. 306 BusError(vm_device::BusError), 307 308 /// Failed to allocate IO port 309 AllocateIoPort, 310 311 /// Failed to allocate MMIO address 312 AllocateMmioAddress, 313 314 /// Failed to make hotplug notification 315 HotPlugNotification(io::Error), 316 317 /// Error from a memory manager operation 318 MemoryManager(MemoryManagerError), 319 320 /// Failed to create new interrupt source group. 321 CreateInterruptGroup(io::Error), 322 323 /// Failed to update interrupt source group. 324 UpdateInterruptGroup(io::Error), 325 326 /// Failed to create interrupt controller. 327 CreateInterruptController(interrupt_controller::Error), 328 329 /// Failed to create a new MmapRegion instance. 330 NewMmapRegion(vm_memory::mmap::MmapRegionError), 331 332 /// Failed to clone a File. 333 CloneFile(io::Error), 334 335 /// Failed to create socket file 336 CreateSocketFile(io::Error), 337 338 /// Failed to spawn the network backend 339 SpawnNetBackend(io::Error), 340 341 /// Failed to spawn the block backend 342 SpawnBlockBackend(io::Error), 343 344 /// Missing PCI bus. 345 NoPciBus, 346 347 /// Could not find an available device name. 348 NoAvailableDeviceName, 349 350 /// Missing PCI device. 351 MissingPciDevice, 352 353 /// Failed to remove a PCI device from the PCI bus. 354 RemoveDeviceFromPciBus(pci::PciRootError), 355 356 /// Failed to remove a bus device from the IO bus. 357 RemoveDeviceFromIoBus(vm_device::BusError), 358 359 /// Failed to remove a bus device from the MMIO bus. 360 RemoveDeviceFromMmioBus(vm_device::BusError), 361 362 /// Failed to find the device corresponding to a specific PCI b/d/f. 363 UnknownPciBdf(u32), 364 365 /// Not allowed to remove this type of device from the VM. 366 RemovalNotAllowed(vm_virtio::VirtioDeviceType), 367 368 /// Failed to find device corresponding to the given identifier. 369 UnknownDeviceId(String), 370 371 /// Failed to find an available PCI device ID. 372 NextPciDeviceId(pci::PciRootError), 373 374 /// Could not reserve the PCI device ID. 375 GetPciDeviceId(pci::PciRootError), 376 377 /// Could not give the PCI device ID back. 378 PutPciDeviceId(pci::PciRootError), 379 380 /// No disk path was specified when one was expected 381 NoDiskPath, 382 383 /// Failed to update guest memory for virtio device. 384 UpdateMemoryForVirtioDevice(virtio_devices::Error), 385 386 /// Cannot create virtio-mem device 387 CreateVirtioMem(io::Error), 388 389 /// Cannot find a memory range for virtio-mem memory 390 VirtioMemRangeAllocation, 391 392 /// Failed to update guest memory for VFIO PCI device. 393 UpdateMemoryForVfioPciDevice(vfio_ioctls::VfioError), 394 395 /// Trying to use a directory for pmem but no size specified 396 PmemWithDirectorySizeMissing, 397 398 /// Trying to use a size that is not multiple of 2MiB 399 PmemSizeNotAligned, 400 401 /// Could not find the node in the device tree. 402 MissingNode, 403 404 /// Resource was already found. 405 ResourceAlreadyExists, 406 407 /// Expected resources for virtio-pmem could not be found. 408 MissingVirtioPmemResources, 409 410 /// Missing PCI b/d/f from the DeviceNode. 411 MissingDeviceNodePciBdf, 412 413 /// No support for device passthrough 414 NoDevicePassthroughSupport, 415 416 /// No socket option support for console device 417 NoSocketOptionSupportForConsoleDevice, 418 419 /// Failed to resize virtio-balloon 420 VirtioBalloonResize(virtio_devices::balloon::Error), 421 422 /// Missing virtio-balloon, can't proceed as expected. 423 MissingVirtioBalloon, 424 425 /// Missing virtual IOMMU device 426 MissingVirtualIommu, 427 428 /// Failed to do power button notification 429 PowerButtonNotification(io::Error), 430 431 /// Failed to do AArch64 GPIO power button notification 432 #[cfg(target_arch = "aarch64")] 433 AArch64PowerButtonNotification(devices::legacy::GpioDeviceError), 434 435 /// Failed to set O_DIRECT flag to file descriptor 436 SetDirectIo, 437 438 /// Failed to create FixedVhdDiskAsync 439 CreateFixedVhdDiskAsync(io::Error), 440 441 /// Failed to create FixedVhdDiskSync 442 CreateFixedVhdDiskSync(io::Error), 443 444 /// Failed to create QcowDiskSync 445 CreateQcowDiskSync(qcow::Error), 446 447 /// Failed to create FixedVhdxDiskSync 448 CreateFixedVhdxDiskSync(vhdx::VhdxError), 449 450 /// Failed to add DMA mapping handler to virtio-mem device. 451 AddDmaMappingHandlerVirtioMem(virtio_devices::mem::Error), 452 453 /// Failed to remove DMA mapping handler from virtio-mem device. 454 RemoveDmaMappingHandlerVirtioMem(virtio_devices::mem::Error), 455 456 /// Failed to create vfio-user client 457 VfioUserCreateClient(vfio_user::Error), 458 459 /// Failed to create VFIO user device 460 VfioUserCreate(VfioUserPciDeviceError), 461 462 /// Failed to map region from VFIO user device into guest 463 VfioUserMapRegion(VfioUserPciDeviceError), 464 465 /// Failed to DMA map VFIO user device. 466 VfioUserDmaMap(VfioUserPciDeviceError), 467 468 /// Failed to DMA unmap VFIO user device. 469 VfioUserDmaUnmap(VfioUserPciDeviceError), 470 471 /// Failed to update memory mappings for VFIO user device 472 UpdateMemoryForVfioUserPciDevice(VfioUserPciDeviceError), 473 474 /// Cannot duplicate file descriptor 475 DupFd(vmm_sys_util::errno::Error), 476 477 /// Failed to DMA map virtio device. 478 VirtioDmaMap(std::io::Error), 479 480 /// Failed to DMA unmap virtio device. 481 VirtioDmaUnmap(std::io::Error), 482 483 /// Cannot hotplug device behind vIOMMU 484 InvalidIommuHotplug, 485 486 /// Invalid identifier as it is not unique. 487 IdentifierNotUnique(String), 488 489 /// Invalid identifier 490 InvalidIdentifier(String), 491 492 /// Error activating virtio device 493 VirtioActivate(ActivateError), 494 495 /// Failed retrieving device state from snapshot 496 RestoreGetState(MigratableError), 497 498 /// Cannot create a PvPanic device 499 PvPanicCreate(devices::pvpanic::PvPanicError), 500 501 /// Cannot create a RateLimiterGroup 502 RateLimiterGroupCreate(rate_limiter::group::Error), 503 504 /// Cannot start sigwinch listener 505 StartSigwinchListener(std::io::Error), 506 507 // Invalid console info 508 InvalidConsoleInfo, 509 510 // Invalid console fd 511 InvalidConsoleFd, 512 } 513 514 pub type DeviceManagerResult<T> = result::Result<T, DeviceManagerError>; 515 516 const DEVICE_MANAGER_ACPI_SIZE: usize = 0x10; 517 518 #[derive(Default)] 519 pub struct Console { 520 console_resizer: Option<Arc<virtio_devices::ConsoleResizer>>, 521 } 522 523 impl Console { 524 pub fn need_resize(&self) -> bool { 525 if let Some(_resizer) = self.console_resizer.as_ref() { 526 return true; 527 } 528 529 false 530 } 531 532 pub fn update_console_size(&self) { 533 if let Some(resizer) = self.console_resizer.as_ref() { 534 resizer.update_console_size() 535 } 536 } 537 } 538 539 pub(crate) struct AddressManager { 540 pub(crate) allocator: Arc<Mutex<SystemAllocator>>, 541 #[cfg(target_arch = "x86_64")] 542 pub(crate) io_bus: Arc<Bus>, 543 pub(crate) mmio_bus: Arc<Bus>, 544 pub(crate) vm: Arc<dyn hypervisor::Vm>, 545 device_tree: Arc<Mutex<DeviceTree>>, 546 pci_mmio32_allocators: Vec<Arc<Mutex<AddressAllocator>>>, 547 pci_mmio64_allocators: Vec<Arc<Mutex<AddressAllocator>>>, 548 } 549 550 impl DeviceRelocation for AddressManager { 551 fn move_bar( 552 &self, 553 old_base: u64, 554 new_base: u64, 555 len: u64, 556 pci_dev: &mut dyn PciDevice, 557 region_type: PciBarRegionType, 558 ) -> std::result::Result<(), std::io::Error> { 559 match region_type { 560 PciBarRegionType::IoRegion => { 561 #[cfg(target_arch = "x86_64")] 562 { 563 // Update system allocator 564 self.allocator 565 .lock() 566 .unwrap() 567 .free_io_addresses(GuestAddress(old_base), len as GuestUsize); 568 569 self.allocator 570 .lock() 571 .unwrap() 572 .allocate_io_addresses( 573 Some(GuestAddress(new_base)), 574 len as GuestUsize, 575 None, 576 ) 577 .ok_or_else(|| { 578 io::Error::new(io::ErrorKind::Other, "failed allocating new IO range") 579 })?; 580 581 // Update PIO bus 582 self.io_bus 583 .update_range(old_base, len, new_base, len) 584 .map_err(|e| io::Error::new(io::ErrorKind::Other, e))?; 585 } 586 #[cfg(target_arch = "aarch64")] 587 error!("I/O region is not supported"); 588 } 589 PciBarRegionType::Memory32BitRegion | PciBarRegionType::Memory64BitRegion => { 590 let allocators = if region_type == PciBarRegionType::Memory32BitRegion { 591 &self.pci_mmio32_allocators 592 } else { 593 &self.pci_mmio64_allocators 594 }; 595 596 // Find the specific allocator that this BAR was allocated from and use it for new one 597 for allocator in allocators { 598 let allocator_base = allocator.lock().unwrap().base(); 599 let allocator_end = allocator.lock().unwrap().end(); 600 601 if old_base >= allocator_base.0 && old_base <= allocator_end.0 { 602 allocator 603 .lock() 604 .unwrap() 605 .free(GuestAddress(old_base), len as GuestUsize); 606 607 allocator 608 .lock() 609 .unwrap() 610 .allocate(Some(GuestAddress(new_base)), len as GuestUsize, Some(len)) 611 .ok_or_else(|| { 612 io::Error::new( 613 io::ErrorKind::Other, 614 "failed allocating new MMIO range", 615 ) 616 })?; 617 618 break; 619 } 620 } 621 622 // Update MMIO bus 623 self.mmio_bus 624 .update_range(old_base, len, new_base, len) 625 .map_err(|e| io::Error::new(io::ErrorKind::Other, e))?; 626 } 627 } 628 629 // Update the device_tree resources associated with the device 630 if let Some(id) = pci_dev.id() { 631 if let Some(node) = self.device_tree.lock().unwrap().get_mut(&id) { 632 let mut resource_updated = false; 633 for resource in node.resources.iter_mut() { 634 if let Resource::PciBar { base, type_, .. } = resource { 635 if PciBarRegionType::from(*type_) == region_type && *base == old_base { 636 *base = new_base; 637 resource_updated = true; 638 break; 639 } 640 } 641 } 642 643 if !resource_updated { 644 return Err(io::Error::new( 645 io::ErrorKind::Other, 646 format!( 647 "Couldn't find a resource with base 0x{old_base:x} for device {id}" 648 ), 649 )); 650 } 651 } else { 652 return Err(io::Error::new( 653 io::ErrorKind::Other, 654 format!("Couldn't find device {id} from device tree"), 655 )); 656 } 657 } 658 659 let any_dev = pci_dev.as_any(); 660 if let Some(virtio_pci_dev) = any_dev.downcast_ref::<VirtioPciDevice>() { 661 let bar_addr = virtio_pci_dev.config_bar_addr(); 662 if bar_addr == new_base { 663 for (event, addr) in virtio_pci_dev.ioeventfds(old_base) { 664 let io_addr = IoEventAddress::Mmio(addr); 665 self.vm.unregister_ioevent(event, &io_addr).map_err(|e| { 666 io::Error::new( 667 io::ErrorKind::Other, 668 format!("failed to unregister ioevent: {e:?}"), 669 ) 670 })?; 671 } 672 for (event, addr) in virtio_pci_dev.ioeventfds(new_base) { 673 let io_addr = IoEventAddress::Mmio(addr); 674 self.vm 675 .register_ioevent(event, &io_addr, None) 676 .map_err(|e| { 677 io::Error::new( 678 io::ErrorKind::Other, 679 format!("failed to register ioevent: {e:?}"), 680 ) 681 })?; 682 } 683 } else { 684 let virtio_dev = virtio_pci_dev.virtio_device(); 685 let mut virtio_dev = virtio_dev.lock().unwrap(); 686 if let Some(mut shm_regions) = virtio_dev.get_shm_regions() { 687 if shm_regions.addr.raw_value() == old_base { 688 let mem_region = self.vm.make_user_memory_region( 689 shm_regions.mem_slot, 690 old_base, 691 shm_regions.len, 692 shm_regions.host_addr, 693 false, 694 false, 695 ); 696 697 self.vm.remove_user_memory_region(mem_region).map_err(|e| { 698 io::Error::new( 699 io::ErrorKind::Other, 700 format!("failed to remove user memory region: {e:?}"), 701 ) 702 })?; 703 704 // Create new mapping by inserting new region to KVM. 705 let mem_region = self.vm.make_user_memory_region( 706 shm_regions.mem_slot, 707 new_base, 708 shm_regions.len, 709 shm_regions.host_addr, 710 false, 711 false, 712 ); 713 714 self.vm.create_user_memory_region(mem_region).map_err(|e| { 715 io::Error::new( 716 io::ErrorKind::Other, 717 format!("failed to create user memory regions: {e:?}"), 718 ) 719 })?; 720 721 // Update shared memory regions to reflect the new mapping. 722 shm_regions.addr = GuestAddress(new_base); 723 virtio_dev.set_shm_regions(shm_regions).map_err(|e| { 724 io::Error::new( 725 io::ErrorKind::Other, 726 format!("failed to update shared memory regions: {e:?}"), 727 ) 728 })?; 729 } 730 } 731 } 732 } 733 734 pci_dev.move_bar(old_base, new_base) 735 } 736 } 737 738 #[derive(Serialize, Deserialize)] 739 struct DeviceManagerState { 740 device_tree: DeviceTree, 741 device_id_cnt: Wrapping<usize>, 742 } 743 744 #[derive(Debug)] 745 pub struct PtyPair { 746 pub main: File, 747 pub path: PathBuf, 748 } 749 750 impl Clone for PtyPair { 751 fn clone(&self) -> Self { 752 PtyPair { 753 main: self.main.try_clone().unwrap(), 754 path: self.path.clone(), 755 } 756 } 757 } 758 759 #[derive(Clone)] 760 pub enum PciDeviceHandle { 761 Vfio(Arc<Mutex<VfioPciDevice>>), 762 Virtio(Arc<Mutex<VirtioPciDevice>>), 763 VfioUser(Arc<Mutex<VfioUserPciDevice>>), 764 } 765 766 #[derive(Clone)] 767 struct MetaVirtioDevice { 768 virtio_device: Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 769 iommu: bool, 770 id: String, 771 pci_segment: u16, 772 dma_handler: Option<Arc<dyn ExternalDmaMapping>>, 773 } 774 775 #[derive(Default)] 776 pub struct AcpiPlatformAddresses { 777 pub pm_timer_address: Option<GenericAddress>, 778 pub reset_reg_address: Option<GenericAddress>, 779 pub sleep_control_reg_address: Option<GenericAddress>, 780 pub sleep_status_reg_address: Option<GenericAddress>, 781 } 782 783 #[cfg(all(feature = "mshv", feature = "sev_snp"))] 784 struct SevSnpPageAccessProxy { 785 vm: Arc<dyn hypervisor::Vm>, 786 } 787 788 #[cfg(all(feature = "mshv", feature = "sev_snp"))] 789 impl std::fmt::Debug for SevSnpPageAccessProxy { 790 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 791 write!(f, "SNP Page access proxy") 792 } 793 } 794 795 #[cfg(all(feature = "mshv", feature = "sev_snp"))] 796 impl SevSnpPageAccessProxy { 797 fn new(vm: Arc<dyn hypervisor::Vm>) -> SevSnpPageAccessProxy { 798 SevSnpPageAccessProxy { vm } 799 } 800 } 801 802 #[cfg(all(feature = "mshv", feature = "sev_snp"))] 803 impl AccessPlatform for SevSnpPageAccessProxy { 804 fn translate_gpa(&self, base: u64, _size: u64) -> std::result::Result<u64, std::io::Error> { 805 Ok(base) 806 } 807 808 fn translate_gva(&self, base: u64, size: u64) -> std::result::Result<u64, std::io::Error> { 809 self.vm 810 .gain_page_access(base, size as u32) 811 .map_err(|e| io::Error::new(io::ErrorKind::Other, e))?; 812 Ok(base) 813 } 814 } 815 816 pub struct DeviceManager { 817 // Manage address space related to devices 818 address_manager: Arc<AddressManager>, 819 820 // Console abstraction 821 console: Arc<Console>, 822 823 // Serial Manager 824 serial_manager: Option<Arc<SerialManager>>, 825 826 // pty foreground status, 827 console_resize_pipe: Option<Arc<File>>, 828 829 // To restore on exit. 830 original_termios_opt: Arc<Mutex<Option<termios>>>, 831 832 // Interrupt controller 833 #[cfg(target_arch = "x86_64")] 834 interrupt_controller: Option<Arc<Mutex<ioapic::Ioapic>>>, 835 #[cfg(target_arch = "aarch64")] 836 interrupt_controller: Option<Arc<Mutex<gic::Gic>>>, 837 838 // Things to be added to the commandline (e.g. aarch64 early console) 839 #[cfg(target_arch = "aarch64")] 840 cmdline_additions: Vec<String>, 841 842 // ACPI GED notification device 843 ged_notification_device: Option<Arc<Mutex<devices::AcpiGedDevice>>>, 844 845 // VM configuration 846 config: Arc<Mutex<VmConfig>>, 847 848 // Memory Manager 849 memory_manager: Arc<Mutex<MemoryManager>>, 850 851 // CPU Manager 852 cpu_manager: Arc<Mutex<CpuManager>>, 853 854 // The virtio devices on the system 855 virtio_devices: Vec<MetaVirtioDevice>, 856 857 // List of bus devices 858 // Let the DeviceManager keep strong references to the BusDevice devices. 859 // This allows the IO and MMIO buses to be provided with Weak references, 860 // which prevents cyclic dependencies. 861 bus_devices: Vec<Arc<dyn BusDeviceSync>>, 862 863 // Counter to keep track of the consumed device IDs. 864 device_id_cnt: Wrapping<usize>, 865 866 pci_segments: Vec<PciSegment>, 867 868 #[cfg_attr(target_arch = "aarch64", allow(dead_code))] 869 // MSI Interrupt Manager 870 msi_interrupt_manager: Arc<dyn InterruptManager<GroupConfig = MsiIrqGroupConfig>>, 871 872 #[cfg_attr(feature = "mshv", allow(dead_code))] 873 // Legacy Interrupt Manager 874 legacy_interrupt_manager: Option<Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>>, 875 876 // Passthrough device handle 877 passthrough_device: Option<VfioDeviceFd>, 878 879 // VFIO container 880 // Only one container can be created, therefore it is stored as part of the 881 // DeviceManager to be reused. 882 vfio_container: Option<Arc<VfioContainer>>, 883 884 // Paravirtualized IOMMU 885 iommu_device: Option<Arc<Mutex<virtio_devices::Iommu>>>, 886 iommu_mapping: Option<Arc<IommuMapping>>, 887 888 // PCI information about devices attached to the paravirtualized IOMMU 889 // It contains the virtual IOMMU PCI BDF along with the list of PCI BDF 890 // representing the devices attached to the virtual IOMMU. This is useful 891 // information for filling the ACPI VIOT table. 892 iommu_attached_devices: Option<(PciBdf, Vec<PciBdf>)>, 893 894 // Tree of devices, representing the dependencies between devices. 895 // Useful for introspection, snapshot and restore. 896 device_tree: Arc<Mutex<DeviceTree>>, 897 898 // Exit event 899 exit_evt: EventFd, 900 reset_evt: EventFd, 901 902 #[cfg(target_arch = "aarch64")] 903 id_to_dev_info: HashMap<(DeviceType, String), MmioDeviceInfo>, 904 905 // seccomp action 906 seccomp_action: SeccompAction, 907 908 // List of guest NUMA nodes. 909 numa_nodes: NumaNodes, 910 911 // Possible handle to the virtio-balloon device 912 balloon: Option<Arc<Mutex<virtio_devices::Balloon>>>, 913 914 // Virtio Device activation EventFd to allow the VMM thread to trigger device 915 // activation and thus start the threads from the VMM thread 916 activate_evt: EventFd, 917 918 acpi_address: GuestAddress, 919 920 selected_segment: usize, 921 922 // Possible handle to the virtio-mem device 923 virtio_mem_devices: Vec<Arc<Mutex<virtio_devices::Mem>>>, 924 925 #[cfg(target_arch = "aarch64")] 926 // GPIO device for AArch64 927 gpio_device: Option<Arc<Mutex<devices::legacy::Gpio>>>, 928 929 #[cfg(feature = "pvmemcontrol")] 930 pvmemcontrol_devices: Option<( 931 Arc<PvmemcontrolBusDevice>, 932 Arc<Mutex<PvmemcontrolPciDevice>>, 933 )>, 934 935 // pvpanic device 936 pvpanic_device: Option<Arc<Mutex<devices::PvPanicDevice>>>, 937 938 // Flag to force setting the iommu on virtio devices 939 force_iommu: bool, 940 941 // io_uring availability if detected 942 io_uring_supported: Option<bool>, 943 944 // aio availability if detected 945 aio_supported: Option<bool>, 946 947 // List of unique identifiers provided at boot through the configuration. 948 boot_id_list: BTreeSet<String>, 949 950 // Start time of the VM 951 timestamp: Instant, 952 953 // Pending activations 954 pending_activations: Arc<Mutex<Vec<VirtioPciDeviceActivator>>>, 955 956 // Addresses for ACPI platform devices e.g. ACPI PM timer, sleep/reset registers 957 acpi_platform_addresses: AcpiPlatformAddresses, 958 959 snapshot: Option<Snapshot>, 960 961 rate_limit_groups: HashMap<String, Arc<RateLimiterGroup>>, 962 963 mmio_regions: Arc<Mutex<Vec<MmioRegion>>>, 964 } 965 966 fn create_mmio_allocators( 967 start: u64, 968 end: u64, 969 num_pci_segments: u16, 970 weights: Vec<u32>, 971 alignment: u64, 972 ) -> Vec<Arc<Mutex<AddressAllocator>>> { 973 let total_weight: u32 = weights.iter().sum(); 974 975 // Start each PCI segment mmio range on an aligned boundary 976 let pci_segment_mmio_size = (end - start + 1) / (alignment * total_weight as u64) * alignment; 977 978 let mut mmio_allocators = vec![]; 979 let mut i = 0; 980 for segment_id in 0..num_pci_segments as u64 { 981 let weight = weights[segment_id as usize] as u64; 982 let mmio_start = start + i * pci_segment_mmio_size; 983 let mmio_size = pci_segment_mmio_size * weight; 984 let allocator = Arc::new(Mutex::new( 985 AddressAllocator::new(GuestAddress(mmio_start), mmio_size).unwrap(), 986 )); 987 mmio_allocators.push(allocator); 988 i += weight; 989 } 990 991 mmio_allocators 992 } 993 994 impl DeviceManager { 995 #[allow(clippy::too_many_arguments)] 996 pub fn new( 997 #[cfg(target_arch = "x86_64")] io_bus: Arc<Bus>, 998 mmio_bus: Arc<Bus>, 999 vm: Arc<dyn hypervisor::Vm>, 1000 config: Arc<Mutex<VmConfig>>, 1001 memory_manager: Arc<Mutex<MemoryManager>>, 1002 cpu_manager: Arc<Mutex<CpuManager>>, 1003 exit_evt: EventFd, 1004 reset_evt: EventFd, 1005 seccomp_action: SeccompAction, 1006 numa_nodes: NumaNodes, 1007 activate_evt: &EventFd, 1008 force_iommu: bool, 1009 boot_id_list: BTreeSet<String>, 1010 timestamp: Instant, 1011 snapshot: Option<Snapshot>, 1012 dynamic: bool, 1013 ) -> DeviceManagerResult<Arc<Mutex<Self>>> { 1014 trace_scoped!("DeviceManager::new"); 1015 1016 let (device_tree, device_id_cnt) = if let Some(snapshot) = snapshot.as_ref() { 1017 let state: DeviceManagerState = snapshot.to_state().unwrap(); 1018 ( 1019 Arc::new(Mutex::new(state.device_tree.clone())), 1020 state.device_id_cnt, 1021 ) 1022 } else { 1023 (Arc::new(Mutex::new(DeviceTree::new())), Wrapping(0)) 1024 }; 1025 1026 let num_pci_segments = 1027 if let Some(platform_config) = config.lock().unwrap().platform.as_ref() { 1028 platform_config.num_pci_segments 1029 } else { 1030 1 1031 }; 1032 1033 let mut mmio32_aperture_weights: Vec<u32> = 1034 std::iter::repeat(DEFAULT_PCI_SEGMENT_APERTURE_WEIGHT) 1035 .take(num_pci_segments.into()) 1036 .collect(); 1037 if let Some(pci_segments) = &config.lock().unwrap().pci_segments { 1038 for pci_segment in pci_segments.iter() { 1039 mmio32_aperture_weights[pci_segment.pci_segment as usize] = 1040 pci_segment.mmio32_aperture_weight 1041 } 1042 } 1043 1044 let start_of_mmio32_area = layout::MEM_32BIT_DEVICES_START.0; 1045 let end_of_mmio32_area = layout::MEM_32BIT_DEVICES_START.0 + layout::MEM_32BIT_DEVICES_SIZE; 1046 let pci_mmio32_allocators = create_mmio_allocators( 1047 start_of_mmio32_area, 1048 end_of_mmio32_area, 1049 num_pci_segments, 1050 mmio32_aperture_weights, 1051 4 << 10, 1052 ); 1053 1054 let mut mmio64_aperture_weights: Vec<u32> = 1055 std::iter::repeat(DEFAULT_PCI_SEGMENT_APERTURE_WEIGHT) 1056 .take(num_pci_segments.into()) 1057 .collect(); 1058 if let Some(pci_segments) = &config.lock().unwrap().pci_segments { 1059 for pci_segment in pci_segments.iter() { 1060 mmio64_aperture_weights[pci_segment.pci_segment as usize] = 1061 pci_segment.mmio64_aperture_weight 1062 } 1063 } 1064 1065 let start_of_mmio64_area = memory_manager.lock().unwrap().start_of_device_area().0; 1066 let end_of_mmio64_area = memory_manager.lock().unwrap().end_of_device_area().0; 1067 let pci_mmio64_allocators = create_mmio_allocators( 1068 start_of_mmio64_area, 1069 end_of_mmio64_area, 1070 num_pci_segments, 1071 mmio64_aperture_weights, 1072 4 << 30, 1073 ); 1074 1075 let address_manager = Arc::new(AddressManager { 1076 allocator: memory_manager.lock().unwrap().allocator(), 1077 #[cfg(target_arch = "x86_64")] 1078 io_bus, 1079 mmio_bus, 1080 vm: vm.clone(), 1081 device_tree: Arc::clone(&device_tree), 1082 pci_mmio32_allocators, 1083 pci_mmio64_allocators, 1084 }); 1085 1086 // First we create the MSI interrupt manager, the legacy one is created 1087 // later, after the IOAPIC device creation. 1088 // The reason we create the MSI one first is because the IOAPIC needs it, 1089 // and then the legacy interrupt manager needs an IOAPIC. So we're 1090 // handling a linear dependency chain: 1091 // msi_interrupt_manager <- IOAPIC <- legacy_interrupt_manager. 1092 let msi_interrupt_manager: Arc<dyn InterruptManager<GroupConfig = MsiIrqGroupConfig>> = 1093 Arc::new(MsiInterruptManager::new( 1094 Arc::clone(&address_manager.allocator), 1095 vm, 1096 )); 1097 1098 let acpi_address = address_manager 1099 .allocator 1100 .lock() 1101 .unwrap() 1102 .allocate_platform_mmio_addresses(None, DEVICE_MANAGER_ACPI_SIZE as u64, None) 1103 .ok_or(DeviceManagerError::AllocateIoPort)?; 1104 1105 let mut pci_irq_slots = [0; 32]; 1106 PciSegment::reserve_legacy_interrupts_for_pci_devices( 1107 &address_manager, 1108 &mut pci_irq_slots, 1109 )?; 1110 1111 let mut pci_segments = vec![PciSegment::new_default_segment( 1112 &address_manager, 1113 Arc::clone(&address_manager.pci_mmio32_allocators[0]), 1114 Arc::clone(&address_manager.pci_mmio64_allocators[0]), 1115 &pci_irq_slots, 1116 )?]; 1117 1118 for i in 1..num_pci_segments as usize { 1119 pci_segments.push(PciSegment::new( 1120 i as u16, 1121 numa_node_id_from_pci_segment_id(&numa_nodes, i as u16), 1122 &address_manager, 1123 Arc::clone(&address_manager.pci_mmio32_allocators[i]), 1124 Arc::clone(&address_manager.pci_mmio64_allocators[i]), 1125 &pci_irq_slots, 1126 )?); 1127 } 1128 1129 if dynamic { 1130 let acpi_address = address_manager 1131 .allocator 1132 .lock() 1133 .unwrap() 1134 .allocate_platform_mmio_addresses(None, CPU_MANAGER_ACPI_SIZE as u64, None) 1135 .ok_or(DeviceManagerError::AllocateMmioAddress)?; 1136 1137 address_manager 1138 .mmio_bus 1139 .insert( 1140 cpu_manager.clone(), 1141 acpi_address.0, 1142 CPU_MANAGER_ACPI_SIZE as u64, 1143 ) 1144 .map_err(DeviceManagerError::BusError)?; 1145 1146 cpu_manager.lock().unwrap().set_acpi_address(acpi_address); 1147 } 1148 1149 let mut rate_limit_groups = HashMap::<String, Arc<RateLimiterGroup>>::new(); 1150 if let Some(rate_limit_groups_cfg) = config.lock().unwrap().rate_limit_groups.as_ref() { 1151 for rate_limit_group_cfg in rate_limit_groups_cfg { 1152 let rate_limit_cfg = rate_limit_group_cfg.rate_limiter_config; 1153 let bw = rate_limit_cfg.bandwidth.unwrap_or_default(); 1154 let ops = rate_limit_cfg.ops.unwrap_or_default(); 1155 let mut rate_limit_group = RateLimiterGroup::new( 1156 &rate_limit_group_cfg.id, 1157 bw.size, 1158 bw.one_time_burst.unwrap_or(0), 1159 bw.refill_time, 1160 ops.size, 1161 ops.one_time_burst.unwrap_or(0), 1162 ops.refill_time, 1163 ) 1164 .map_err(DeviceManagerError::RateLimiterGroupCreate)?; 1165 1166 let exit_evt = exit_evt.try_clone().map_err(DeviceManagerError::EventFd)?; 1167 1168 rate_limit_group.start_thread(exit_evt).unwrap(); 1169 rate_limit_groups 1170 .insert(rate_limit_group_cfg.id.clone(), Arc::new(rate_limit_group)); 1171 } 1172 } 1173 1174 let device_manager = DeviceManager { 1175 address_manager: Arc::clone(&address_manager), 1176 console: Arc::new(Console::default()), 1177 interrupt_controller: None, 1178 #[cfg(target_arch = "aarch64")] 1179 cmdline_additions: Vec::new(), 1180 ged_notification_device: None, 1181 config, 1182 memory_manager, 1183 cpu_manager, 1184 virtio_devices: Vec::new(), 1185 bus_devices: Vec::new(), 1186 device_id_cnt, 1187 msi_interrupt_manager, 1188 legacy_interrupt_manager: None, 1189 passthrough_device: None, 1190 vfio_container: None, 1191 iommu_device: None, 1192 iommu_mapping: None, 1193 iommu_attached_devices: None, 1194 pci_segments, 1195 device_tree, 1196 exit_evt, 1197 reset_evt, 1198 #[cfg(target_arch = "aarch64")] 1199 id_to_dev_info: HashMap::new(), 1200 seccomp_action, 1201 numa_nodes, 1202 balloon: None, 1203 activate_evt: activate_evt 1204 .try_clone() 1205 .map_err(DeviceManagerError::EventFd)?, 1206 acpi_address, 1207 selected_segment: 0, 1208 serial_manager: None, 1209 console_resize_pipe: None, 1210 original_termios_opt: Arc::new(Mutex::new(None)), 1211 virtio_mem_devices: Vec::new(), 1212 #[cfg(target_arch = "aarch64")] 1213 gpio_device: None, 1214 #[cfg(feature = "pvmemcontrol")] 1215 pvmemcontrol_devices: None, 1216 pvpanic_device: None, 1217 force_iommu, 1218 io_uring_supported: None, 1219 aio_supported: None, 1220 boot_id_list, 1221 timestamp, 1222 pending_activations: Arc::new(Mutex::new(Vec::default())), 1223 acpi_platform_addresses: AcpiPlatformAddresses::default(), 1224 snapshot, 1225 rate_limit_groups, 1226 mmio_regions: Arc::new(Mutex::new(Vec::new())), 1227 }; 1228 1229 let device_manager = Arc::new(Mutex::new(device_manager)); 1230 1231 address_manager 1232 .mmio_bus 1233 .insert( 1234 Arc::clone(&device_manager) as Arc<dyn BusDeviceSync>, 1235 acpi_address.0, 1236 DEVICE_MANAGER_ACPI_SIZE as u64, 1237 ) 1238 .map_err(DeviceManagerError::BusError)?; 1239 1240 Ok(device_manager) 1241 } 1242 1243 pub fn console_resize_pipe(&self) -> Option<Arc<File>> { 1244 self.console_resize_pipe.clone() 1245 } 1246 1247 pub fn create_devices( 1248 &mut self, 1249 console_info: Option<ConsoleInfo>, 1250 console_resize_pipe: Option<Arc<File>>, 1251 original_termios_opt: Arc<Mutex<Option<termios>>>, 1252 ) -> DeviceManagerResult<()> { 1253 trace_scoped!("create_devices"); 1254 1255 let mut virtio_devices: Vec<MetaVirtioDevice> = Vec::new(); 1256 1257 let interrupt_controller = self.add_interrupt_controller()?; 1258 1259 self.cpu_manager 1260 .lock() 1261 .unwrap() 1262 .set_interrupt_controller(interrupt_controller.clone()); 1263 1264 // Now we can create the legacy interrupt manager, which needs the freshly 1265 // formed IOAPIC device. 1266 let legacy_interrupt_manager: Arc< 1267 dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>, 1268 > = Arc::new(LegacyUserspaceInterruptManager::new(Arc::clone( 1269 &interrupt_controller, 1270 ))); 1271 1272 { 1273 if let Some(acpi_address) = self.memory_manager.lock().unwrap().acpi_address() { 1274 self.address_manager 1275 .mmio_bus 1276 .insert( 1277 Arc::clone(&self.memory_manager) as Arc<dyn BusDeviceSync>, 1278 acpi_address.0, 1279 MEMORY_MANAGER_ACPI_SIZE as u64, 1280 ) 1281 .map_err(DeviceManagerError::BusError)?; 1282 } 1283 } 1284 1285 #[cfg(target_arch = "x86_64")] 1286 self.add_legacy_devices( 1287 self.reset_evt 1288 .try_clone() 1289 .map_err(DeviceManagerError::EventFd)?, 1290 )?; 1291 1292 #[cfg(target_arch = "aarch64")] 1293 self.add_legacy_devices(&legacy_interrupt_manager)?; 1294 1295 { 1296 self.ged_notification_device = self.add_acpi_devices( 1297 &legacy_interrupt_manager, 1298 self.reset_evt 1299 .try_clone() 1300 .map_err(DeviceManagerError::EventFd)?, 1301 self.exit_evt 1302 .try_clone() 1303 .map_err(DeviceManagerError::EventFd)?, 1304 )?; 1305 } 1306 1307 self.original_termios_opt = original_termios_opt; 1308 1309 self.console = self.add_console_devices( 1310 &legacy_interrupt_manager, 1311 &mut virtio_devices, 1312 console_info, 1313 console_resize_pipe, 1314 )?; 1315 1316 if let Some(tpm) = self.config.clone().lock().unwrap().tpm.as_ref() { 1317 let tpm_dev = self.add_tpm_device(tpm.socket.clone())?; 1318 self.bus_devices 1319 .push(Arc::clone(&tpm_dev) as Arc<dyn BusDeviceSync>) 1320 } 1321 self.legacy_interrupt_manager = Some(legacy_interrupt_manager); 1322 1323 virtio_devices.append(&mut self.make_virtio_devices()?); 1324 1325 self.add_pci_devices(virtio_devices.clone())?; 1326 1327 self.virtio_devices = virtio_devices; 1328 1329 // Add pvmemcontrol if required 1330 #[cfg(feature = "pvmemcontrol")] 1331 { 1332 if self.config.lock().unwrap().pvmemcontrol.is_some() { 1333 let (pvmemcontrol_bus_device, pvmemcontrol_pci_device) = 1334 self.make_pvmemcontrol_device()?; 1335 self.pvmemcontrol_devices = 1336 Some((pvmemcontrol_bus_device, pvmemcontrol_pci_device)); 1337 } 1338 } 1339 1340 if self.config.clone().lock().unwrap().pvpanic { 1341 self.pvpanic_device = self.add_pvpanic_device()?; 1342 } 1343 1344 Ok(()) 1345 } 1346 1347 fn state(&self) -> DeviceManagerState { 1348 DeviceManagerState { 1349 device_tree: self.device_tree.lock().unwrap().clone(), 1350 device_id_cnt: self.device_id_cnt, 1351 } 1352 } 1353 1354 fn get_msi_iova_space(&mut self) -> (u64, u64) { 1355 #[cfg(target_arch = "aarch64")] 1356 { 1357 let vcpus = self.config.lock().unwrap().cpus.boot_vcpus; 1358 let vgic_config = gic::Gic::create_default_config(vcpus.into()); 1359 ( 1360 vgic_config.msi_addr, 1361 vgic_config.msi_addr + vgic_config.msi_size - 1, 1362 ) 1363 } 1364 #[cfg(target_arch = "x86_64")] 1365 (0xfee0_0000, 0xfeef_ffff) 1366 } 1367 1368 #[cfg(target_arch = "aarch64")] 1369 /// Gets the information of the devices registered up to some point in time. 1370 pub fn get_device_info(&self) -> &HashMap<(DeviceType, String), MmioDeviceInfo> { 1371 &self.id_to_dev_info 1372 } 1373 1374 #[allow(unused_variables)] 1375 fn add_pci_devices( 1376 &mut self, 1377 virtio_devices: Vec<MetaVirtioDevice>, 1378 ) -> DeviceManagerResult<()> { 1379 let iommu_id = String::from(IOMMU_DEVICE_NAME); 1380 1381 let iommu_device = if self.config.lock().unwrap().iommu { 1382 let (device, mapping) = virtio_devices::Iommu::new( 1383 iommu_id.clone(), 1384 self.seccomp_action.clone(), 1385 self.exit_evt 1386 .try_clone() 1387 .map_err(DeviceManagerError::EventFd)?, 1388 self.get_msi_iova_space(), 1389 state_from_id(self.snapshot.as_ref(), iommu_id.as_str()) 1390 .map_err(DeviceManagerError::RestoreGetState)?, 1391 ) 1392 .map_err(DeviceManagerError::CreateVirtioIommu)?; 1393 let device = Arc::new(Mutex::new(device)); 1394 self.iommu_device = Some(Arc::clone(&device)); 1395 self.iommu_mapping = Some(mapping); 1396 1397 // Fill the device tree with a new node. In case of restore, we 1398 // know there is nothing to do, so we can simply override the 1399 // existing entry. 1400 self.device_tree 1401 .lock() 1402 .unwrap() 1403 .insert(iommu_id.clone(), device_node!(iommu_id, device)); 1404 1405 Some(device) 1406 } else { 1407 None 1408 }; 1409 1410 let mut iommu_attached_devices = Vec::new(); 1411 { 1412 for handle in virtio_devices { 1413 let mapping: Option<Arc<IommuMapping>> = if handle.iommu { 1414 self.iommu_mapping.clone() 1415 } else { 1416 None 1417 }; 1418 1419 let dev_id = self.add_virtio_pci_device( 1420 handle.virtio_device, 1421 &mapping, 1422 handle.id, 1423 handle.pci_segment, 1424 handle.dma_handler, 1425 )?; 1426 1427 if handle.iommu { 1428 iommu_attached_devices.push(dev_id); 1429 } 1430 } 1431 1432 let mut vfio_iommu_device_ids = self.add_vfio_devices()?; 1433 iommu_attached_devices.append(&mut vfio_iommu_device_ids); 1434 1435 let mut vfio_user_iommu_device_ids = self.add_user_devices()?; 1436 iommu_attached_devices.append(&mut vfio_user_iommu_device_ids); 1437 1438 // Add all devices from forced iommu segments 1439 if let Some(platform_config) = self.config.lock().unwrap().platform.as_ref() { 1440 if let Some(iommu_segments) = platform_config.iommu_segments.as_ref() { 1441 for segment in iommu_segments { 1442 for device in 0..32 { 1443 let bdf = PciBdf::new(*segment, 0, device, 0); 1444 if !iommu_attached_devices.contains(&bdf) { 1445 iommu_attached_devices.push(bdf); 1446 } 1447 } 1448 } 1449 } 1450 } 1451 1452 if let Some(iommu_device) = iommu_device { 1453 let dev_id = self.add_virtio_pci_device(iommu_device, &None, iommu_id, 0, None)?; 1454 self.iommu_attached_devices = Some((dev_id, iommu_attached_devices)); 1455 } 1456 } 1457 1458 for segment in &self.pci_segments { 1459 #[cfg(target_arch = "x86_64")] 1460 if let Some(pci_config_io) = segment.pci_config_io.as_ref() { 1461 self.bus_devices 1462 .push(Arc::clone(pci_config_io) as Arc<dyn BusDeviceSync>); 1463 } 1464 1465 self.bus_devices 1466 .push(Arc::clone(&segment.pci_config_mmio) as Arc<dyn BusDeviceSync>); 1467 } 1468 1469 Ok(()) 1470 } 1471 1472 #[cfg(target_arch = "aarch64")] 1473 fn add_interrupt_controller( 1474 &mut self, 1475 ) -> DeviceManagerResult<Arc<Mutex<dyn InterruptController>>> { 1476 let interrupt_controller: Arc<Mutex<gic::Gic>> = Arc::new(Mutex::new( 1477 gic::Gic::new( 1478 self.config.lock().unwrap().cpus.boot_vcpus, 1479 Arc::clone(&self.msi_interrupt_manager), 1480 self.address_manager.vm.clone(), 1481 ) 1482 .map_err(DeviceManagerError::CreateInterruptController)?, 1483 )); 1484 1485 self.interrupt_controller = Some(interrupt_controller.clone()); 1486 1487 // Restore the vGic if this is in the process of restoration 1488 let id = String::from(gic::GIC_SNAPSHOT_ID); 1489 if let Some(vgic_snapshot) = snapshot_from_id(self.snapshot.as_ref(), &id) { 1490 // PMU support is optional. Nothing should be impacted if the PMU initialization failed. 1491 if self 1492 .cpu_manager 1493 .lock() 1494 .unwrap() 1495 .init_pmu(arch::aarch64::fdt::AARCH64_PMU_IRQ + 16) 1496 .is_err() 1497 { 1498 info!("Failed to initialize PMU"); 1499 } 1500 1501 let vgic_state = vgic_snapshot 1502 .to_state() 1503 .map_err(DeviceManagerError::RestoreGetState)?; 1504 let saved_vcpu_states = self.cpu_manager.lock().unwrap().get_saved_states(); 1505 interrupt_controller 1506 .lock() 1507 .unwrap() 1508 .restore_vgic(vgic_state, &saved_vcpu_states) 1509 .unwrap(); 1510 } 1511 1512 self.device_tree 1513 .lock() 1514 .unwrap() 1515 .insert(id.clone(), device_node!(id, interrupt_controller)); 1516 1517 Ok(interrupt_controller) 1518 } 1519 1520 #[cfg(target_arch = "aarch64")] 1521 pub fn get_interrupt_controller(&mut self) -> Option<&Arc<Mutex<gic::Gic>>> { 1522 self.interrupt_controller.as_ref() 1523 } 1524 1525 #[cfg(target_arch = "x86_64")] 1526 fn add_interrupt_controller( 1527 &mut self, 1528 ) -> DeviceManagerResult<Arc<Mutex<dyn InterruptController>>> { 1529 let id = String::from(IOAPIC_DEVICE_NAME); 1530 1531 // Create IOAPIC 1532 let interrupt_controller = Arc::new(Mutex::new( 1533 ioapic::Ioapic::new( 1534 id.clone(), 1535 APIC_START, 1536 Arc::clone(&self.msi_interrupt_manager), 1537 state_from_id(self.snapshot.as_ref(), id.as_str()) 1538 .map_err(DeviceManagerError::RestoreGetState)?, 1539 ) 1540 .map_err(DeviceManagerError::CreateInterruptController)?, 1541 )); 1542 1543 self.interrupt_controller = Some(interrupt_controller.clone()); 1544 1545 self.address_manager 1546 .mmio_bus 1547 .insert(interrupt_controller.clone(), IOAPIC_START.0, IOAPIC_SIZE) 1548 .map_err(DeviceManagerError::BusError)?; 1549 1550 self.bus_devices 1551 .push(Arc::clone(&interrupt_controller) as Arc<dyn BusDeviceSync>); 1552 1553 // Fill the device tree with a new node. In case of restore, we 1554 // know there is nothing to do, so we can simply override the 1555 // existing entry. 1556 self.device_tree 1557 .lock() 1558 .unwrap() 1559 .insert(id.clone(), device_node!(id, interrupt_controller)); 1560 1561 Ok(interrupt_controller) 1562 } 1563 1564 fn add_acpi_devices( 1565 &mut self, 1566 interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>, 1567 reset_evt: EventFd, 1568 exit_evt: EventFd, 1569 ) -> DeviceManagerResult<Option<Arc<Mutex<devices::AcpiGedDevice>>>> { 1570 let vcpus_kill_signalled = self 1571 .cpu_manager 1572 .lock() 1573 .unwrap() 1574 .vcpus_kill_signalled() 1575 .clone(); 1576 let shutdown_device = Arc::new(Mutex::new(devices::AcpiShutdownDevice::new( 1577 exit_evt, 1578 reset_evt, 1579 vcpus_kill_signalled, 1580 ))); 1581 1582 self.bus_devices 1583 .push(Arc::clone(&shutdown_device) as Arc<dyn BusDeviceSync>); 1584 1585 #[cfg(target_arch = "x86_64")] 1586 { 1587 let shutdown_pio_address: u16 = 0x600; 1588 1589 self.address_manager 1590 .allocator 1591 .lock() 1592 .unwrap() 1593 .allocate_io_addresses(Some(GuestAddress(shutdown_pio_address.into())), 0x8, None) 1594 .ok_or(DeviceManagerError::AllocateIoPort)?; 1595 1596 self.address_manager 1597 .io_bus 1598 .insert(shutdown_device, shutdown_pio_address.into(), 0x4) 1599 .map_err(DeviceManagerError::BusError)?; 1600 1601 self.acpi_platform_addresses.sleep_control_reg_address = 1602 Some(GenericAddress::io_port_address::<u8>(shutdown_pio_address)); 1603 self.acpi_platform_addresses.sleep_status_reg_address = 1604 Some(GenericAddress::io_port_address::<u8>(shutdown_pio_address)); 1605 self.acpi_platform_addresses.reset_reg_address = 1606 Some(GenericAddress::io_port_address::<u8>(shutdown_pio_address)); 1607 } 1608 1609 let ged_irq = self 1610 .address_manager 1611 .allocator 1612 .lock() 1613 .unwrap() 1614 .allocate_irq() 1615 .unwrap(); 1616 let interrupt_group = interrupt_manager 1617 .create_group(LegacyIrqGroupConfig { 1618 irq: ged_irq as InterruptIndex, 1619 }) 1620 .map_err(DeviceManagerError::CreateInterruptGroup)?; 1621 let ged_address = self 1622 .address_manager 1623 .allocator 1624 .lock() 1625 .unwrap() 1626 .allocate_platform_mmio_addresses( 1627 None, 1628 devices::acpi::GED_DEVICE_ACPI_SIZE as u64, 1629 None, 1630 ) 1631 .ok_or(DeviceManagerError::AllocateMmioAddress)?; 1632 let ged_device = Arc::new(Mutex::new(devices::AcpiGedDevice::new( 1633 interrupt_group, 1634 ged_irq, 1635 ged_address, 1636 ))); 1637 self.address_manager 1638 .mmio_bus 1639 .insert( 1640 ged_device.clone(), 1641 ged_address.0, 1642 devices::acpi::GED_DEVICE_ACPI_SIZE as u64, 1643 ) 1644 .map_err(DeviceManagerError::BusError)?; 1645 self.bus_devices 1646 .push(Arc::clone(&ged_device) as Arc<dyn BusDeviceSync>); 1647 1648 let pm_timer_device = Arc::new(Mutex::new(devices::AcpiPmTimerDevice::new())); 1649 1650 self.bus_devices 1651 .push(Arc::clone(&pm_timer_device) as Arc<dyn BusDeviceSync>); 1652 1653 #[cfg(target_arch = "x86_64")] 1654 { 1655 let pm_timer_pio_address: u16 = 0x608; 1656 1657 self.address_manager 1658 .allocator 1659 .lock() 1660 .unwrap() 1661 .allocate_io_addresses(Some(GuestAddress(pm_timer_pio_address.into())), 0x4, None) 1662 .ok_or(DeviceManagerError::AllocateIoPort)?; 1663 1664 self.address_manager 1665 .io_bus 1666 .insert(pm_timer_device, pm_timer_pio_address.into(), 0x4) 1667 .map_err(DeviceManagerError::BusError)?; 1668 1669 self.acpi_platform_addresses.pm_timer_address = 1670 Some(GenericAddress::io_port_address::<u32>(pm_timer_pio_address)); 1671 } 1672 1673 Ok(Some(ged_device)) 1674 } 1675 1676 #[cfg(target_arch = "x86_64")] 1677 fn add_legacy_devices(&mut self, reset_evt: EventFd) -> DeviceManagerResult<()> { 1678 let vcpus_kill_signalled = self 1679 .cpu_manager 1680 .lock() 1681 .unwrap() 1682 .vcpus_kill_signalled() 1683 .clone(); 1684 // Add a shutdown device (i8042) 1685 let i8042 = Arc::new(Mutex::new(devices::legacy::I8042Device::new( 1686 reset_evt.try_clone().unwrap(), 1687 vcpus_kill_signalled.clone(), 1688 ))); 1689 1690 self.bus_devices 1691 .push(Arc::clone(&i8042) as Arc<dyn BusDeviceSync>); 1692 1693 self.address_manager 1694 .io_bus 1695 .insert(i8042, 0x61, 0x4) 1696 .map_err(DeviceManagerError::BusError)?; 1697 { 1698 // Add a CMOS emulated device 1699 let mem_size = self 1700 .memory_manager 1701 .lock() 1702 .unwrap() 1703 .guest_memory() 1704 .memory() 1705 .last_addr() 1706 .0 1707 + 1; 1708 let mem_below_4g = std::cmp::min(arch::layout::MEM_32BIT_RESERVED_START.0, mem_size); 1709 let mem_above_4g = mem_size.saturating_sub(arch::layout::RAM_64BIT_START.0); 1710 1711 let cmos = Arc::new(Mutex::new(devices::legacy::Cmos::new( 1712 mem_below_4g, 1713 mem_above_4g, 1714 reset_evt, 1715 Some(vcpus_kill_signalled), 1716 ))); 1717 1718 self.bus_devices 1719 .push(Arc::clone(&cmos) as Arc<dyn BusDeviceSync>); 1720 1721 self.address_manager 1722 .io_bus 1723 .insert(cmos, 0x70, 0x2) 1724 .map_err(DeviceManagerError::BusError)?; 1725 1726 let fwdebug = Arc::new(Mutex::new(devices::legacy::FwDebugDevice::new())); 1727 1728 self.bus_devices 1729 .push(Arc::clone(&fwdebug) as Arc<dyn BusDeviceSync>); 1730 1731 self.address_manager 1732 .io_bus 1733 .insert(fwdebug, 0x402, 0x1) 1734 .map_err(DeviceManagerError::BusError)?; 1735 } 1736 1737 // 0x80 debug port 1738 let debug_port = Arc::new(Mutex::new(devices::legacy::DebugPort::new(self.timestamp))); 1739 self.bus_devices 1740 .push(Arc::clone(&debug_port) as Arc<dyn BusDeviceSync>); 1741 self.address_manager 1742 .io_bus 1743 .insert(debug_port, 0x80, 0x1) 1744 .map_err(DeviceManagerError::BusError)?; 1745 1746 Ok(()) 1747 } 1748 1749 #[cfg(target_arch = "aarch64")] 1750 fn add_legacy_devices( 1751 &mut self, 1752 interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>, 1753 ) -> DeviceManagerResult<()> { 1754 // Add a RTC device 1755 let rtc_irq = self 1756 .address_manager 1757 .allocator 1758 .lock() 1759 .unwrap() 1760 .allocate_irq() 1761 .unwrap(); 1762 1763 let interrupt_group = interrupt_manager 1764 .create_group(LegacyIrqGroupConfig { 1765 irq: rtc_irq as InterruptIndex, 1766 }) 1767 .map_err(DeviceManagerError::CreateInterruptGroup)?; 1768 1769 let rtc_device = Arc::new(Mutex::new(devices::legacy::Rtc::new(interrupt_group))); 1770 1771 self.bus_devices 1772 .push(Arc::clone(&rtc_device) as Arc<dyn BusDeviceSync>); 1773 1774 let addr = arch::layout::LEGACY_RTC_MAPPED_IO_START; 1775 1776 self.address_manager 1777 .mmio_bus 1778 .insert(rtc_device, addr.0, MMIO_LEN) 1779 .map_err(DeviceManagerError::BusError)?; 1780 1781 self.id_to_dev_info.insert( 1782 (DeviceType::Rtc, "rtc".to_string()), 1783 MmioDeviceInfo { 1784 addr: addr.0, 1785 len: MMIO_LEN, 1786 irq: rtc_irq, 1787 }, 1788 ); 1789 1790 // Add a GPIO device 1791 let id = String::from(GPIO_DEVICE_NAME); 1792 let gpio_irq = self 1793 .address_manager 1794 .allocator 1795 .lock() 1796 .unwrap() 1797 .allocate_irq() 1798 .unwrap(); 1799 1800 let interrupt_group = interrupt_manager 1801 .create_group(LegacyIrqGroupConfig { 1802 irq: gpio_irq as InterruptIndex, 1803 }) 1804 .map_err(DeviceManagerError::CreateInterruptGroup)?; 1805 1806 let gpio_device = Arc::new(Mutex::new(devices::legacy::Gpio::new( 1807 id.clone(), 1808 interrupt_group, 1809 state_from_id(self.snapshot.as_ref(), id.as_str()) 1810 .map_err(DeviceManagerError::RestoreGetState)?, 1811 ))); 1812 1813 self.bus_devices 1814 .push(Arc::clone(&gpio_device) as Arc<dyn BusDeviceSync>); 1815 1816 let addr = arch::layout::LEGACY_GPIO_MAPPED_IO_START; 1817 1818 self.address_manager 1819 .mmio_bus 1820 .insert(gpio_device.clone(), addr.0, MMIO_LEN) 1821 .map_err(DeviceManagerError::BusError)?; 1822 1823 self.gpio_device = Some(gpio_device.clone()); 1824 1825 self.id_to_dev_info.insert( 1826 (DeviceType::Gpio, "gpio".to_string()), 1827 MmioDeviceInfo { 1828 addr: addr.0, 1829 len: MMIO_LEN, 1830 irq: gpio_irq, 1831 }, 1832 ); 1833 1834 self.device_tree 1835 .lock() 1836 .unwrap() 1837 .insert(id.clone(), device_node!(id, gpio_device)); 1838 1839 Ok(()) 1840 } 1841 1842 #[cfg(target_arch = "x86_64")] 1843 fn add_debug_console_device( 1844 &mut self, 1845 debug_console_writer: Box<dyn io::Write + Send>, 1846 ) -> DeviceManagerResult<Arc<Mutex<DebugConsole>>> { 1847 let id = String::from(DEBUGCON_DEVICE_NAME); 1848 let debug_console = Arc::new(Mutex::new(DebugConsole::new( 1849 id.clone(), 1850 debug_console_writer, 1851 ))); 1852 1853 let port = self 1854 .config 1855 .lock() 1856 .unwrap() 1857 .debug_console 1858 .clone() 1859 .iobase 1860 .map(|port| port as u64) 1861 .unwrap_or(debug_console::DEFAULT_PORT); 1862 1863 self.bus_devices 1864 .push(Arc::clone(&debug_console) as Arc<dyn BusDeviceSync>); 1865 1866 self.address_manager 1867 .allocator 1868 .lock() 1869 .unwrap() 1870 .allocate_io_addresses(Some(GuestAddress(port)), 0x1, None) 1871 .ok_or(DeviceManagerError::AllocateIoPort)?; 1872 1873 self.address_manager 1874 .io_bus 1875 .insert(debug_console.clone(), port, 0x1) 1876 .map_err(DeviceManagerError::BusError)?; 1877 1878 // Fill the device tree with a new node. In case of restore, we 1879 // know there is nothing to do, so we can simply override the 1880 // existing entry. 1881 self.device_tree 1882 .lock() 1883 .unwrap() 1884 .insert(id.clone(), device_node!(id, debug_console)); 1885 1886 Ok(debug_console) 1887 } 1888 1889 #[cfg(target_arch = "x86_64")] 1890 fn add_serial_device( 1891 &mut self, 1892 interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>, 1893 serial_writer: Option<Box<dyn io::Write + Send>>, 1894 ) -> DeviceManagerResult<Arc<Mutex<Serial>>> { 1895 // Serial is tied to IRQ #4 1896 let serial_irq = 4; 1897 1898 let id = String::from(SERIAL_DEVICE_NAME); 1899 1900 let interrupt_group = interrupt_manager 1901 .create_group(LegacyIrqGroupConfig { 1902 irq: serial_irq as InterruptIndex, 1903 }) 1904 .map_err(DeviceManagerError::CreateInterruptGroup)?; 1905 1906 let serial = Arc::new(Mutex::new(Serial::new( 1907 id.clone(), 1908 interrupt_group, 1909 serial_writer, 1910 state_from_id(self.snapshot.as_ref(), id.as_str()) 1911 .map_err(DeviceManagerError::RestoreGetState)?, 1912 ))); 1913 1914 self.bus_devices 1915 .push(Arc::clone(&serial) as Arc<dyn BusDeviceSync>); 1916 1917 self.address_manager 1918 .allocator 1919 .lock() 1920 .unwrap() 1921 .allocate_io_addresses(Some(GuestAddress(0x3f8)), 0x8, None) 1922 .ok_or(DeviceManagerError::AllocateIoPort)?; 1923 1924 self.address_manager 1925 .io_bus 1926 .insert(serial.clone(), 0x3f8, 0x8) 1927 .map_err(DeviceManagerError::BusError)?; 1928 1929 // Fill the device tree with a new node. In case of restore, we 1930 // know there is nothing to do, so we can simply override the 1931 // existing entry. 1932 self.device_tree 1933 .lock() 1934 .unwrap() 1935 .insert(id.clone(), device_node!(id, serial)); 1936 1937 Ok(serial) 1938 } 1939 1940 #[cfg(target_arch = "aarch64")] 1941 fn add_serial_device( 1942 &mut self, 1943 interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>, 1944 serial_writer: Option<Box<dyn io::Write + Send>>, 1945 ) -> DeviceManagerResult<Arc<Mutex<Pl011>>> { 1946 let id = String::from(SERIAL_DEVICE_NAME); 1947 1948 let serial_irq = self 1949 .address_manager 1950 .allocator 1951 .lock() 1952 .unwrap() 1953 .allocate_irq() 1954 .unwrap(); 1955 1956 let interrupt_group = interrupt_manager 1957 .create_group(LegacyIrqGroupConfig { 1958 irq: serial_irq as InterruptIndex, 1959 }) 1960 .map_err(DeviceManagerError::CreateInterruptGroup)?; 1961 1962 let serial = Arc::new(Mutex::new(devices::legacy::Pl011::new( 1963 id.clone(), 1964 interrupt_group, 1965 serial_writer, 1966 self.timestamp, 1967 state_from_id(self.snapshot.as_ref(), id.as_str()) 1968 .map_err(DeviceManagerError::RestoreGetState)?, 1969 ))); 1970 1971 self.bus_devices 1972 .push(Arc::clone(&serial) as Arc<dyn BusDeviceSync>); 1973 1974 let addr = arch::layout::LEGACY_SERIAL_MAPPED_IO_START; 1975 1976 self.address_manager 1977 .mmio_bus 1978 .insert(serial.clone(), addr.0, MMIO_LEN) 1979 .map_err(DeviceManagerError::BusError)?; 1980 1981 self.id_to_dev_info.insert( 1982 (DeviceType::Serial, DeviceType::Serial.to_string()), 1983 MmioDeviceInfo { 1984 addr: addr.0, 1985 len: MMIO_LEN, 1986 irq: serial_irq, 1987 }, 1988 ); 1989 1990 self.cmdline_additions 1991 .push(format!("earlycon=pl011,mmio,0x{:08x}", addr.0)); 1992 1993 // Fill the device tree with a new node. In case of restore, we 1994 // know there is nothing to do, so we can simply override the 1995 // existing entry. 1996 self.device_tree 1997 .lock() 1998 .unwrap() 1999 .insert(id.clone(), device_node!(id, serial)); 2000 2001 Ok(serial) 2002 } 2003 2004 fn add_virtio_console_device( 2005 &mut self, 2006 virtio_devices: &mut Vec<MetaVirtioDevice>, 2007 console_fd: ConsoleOutput, 2008 resize_pipe: Option<Arc<File>>, 2009 ) -> DeviceManagerResult<Option<Arc<virtio_devices::ConsoleResizer>>> { 2010 let console_config = self.config.lock().unwrap().console.clone(); 2011 let endpoint = match console_fd { 2012 ConsoleOutput::File(file) => Endpoint::File(file), 2013 ConsoleOutput::Pty(file) => { 2014 self.console_resize_pipe = resize_pipe; 2015 Endpoint::PtyPair(Arc::new(file.try_clone().unwrap()), file) 2016 } 2017 ConsoleOutput::Tty(stdout) => { 2018 if stdout.is_terminal() { 2019 self.console_resize_pipe = resize_pipe; 2020 } 2021 2022 // If an interactive TTY then we can accept input 2023 // SAFETY: FFI call. Trivially safe. 2024 if unsafe { libc::isatty(libc::STDIN_FILENO) == 1 } { 2025 // SAFETY: FFI call to dup. Trivially safe. 2026 let stdin = unsafe { libc::dup(libc::STDIN_FILENO) }; 2027 if stdin == -1 { 2028 return vmm_sys_util::errno::errno_result() 2029 .map_err(DeviceManagerError::DupFd); 2030 } 2031 // SAFETY: stdin is valid and owned solely by us. 2032 let stdin = unsafe { File::from_raw_fd(stdin) }; 2033 Endpoint::FilePair(stdout, Arc::new(stdin)) 2034 } else { 2035 Endpoint::File(stdout) 2036 } 2037 } 2038 ConsoleOutput::Socket(_) => { 2039 return Err(DeviceManagerError::NoSocketOptionSupportForConsoleDevice); 2040 } 2041 ConsoleOutput::Null => Endpoint::Null, 2042 ConsoleOutput::Off => return Ok(None), 2043 }; 2044 let id = String::from(CONSOLE_DEVICE_NAME); 2045 2046 let (virtio_console_device, console_resizer) = virtio_devices::Console::new( 2047 id.clone(), 2048 endpoint, 2049 self.console_resize_pipe 2050 .as_ref() 2051 .map(|p| p.try_clone().unwrap()), 2052 self.force_iommu | console_config.iommu, 2053 self.seccomp_action.clone(), 2054 self.exit_evt 2055 .try_clone() 2056 .map_err(DeviceManagerError::EventFd)?, 2057 state_from_id(self.snapshot.as_ref(), id.as_str()) 2058 .map_err(DeviceManagerError::RestoreGetState)?, 2059 ) 2060 .map_err(DeviceManagerError::CreateVirtioConsole)?; 2061 let virtio_console_device = Arc::new(Mutex::new(virtio_console_device)); 2062 virtio_devices.push(MetaVirtioDevice { 2063 virtio_device: Arc::clone(&virtio_console_device) 2064 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2065 iommu: console_config.iommu, 2066 id: id.clone(), 2067 pci_segment: 0, 2068 dma_handler: None, 2069 }); 2070 2071 // Fill the device tree with a new node. In case of restore, we 2072 // know there is nothing to do, so we can simply override the 2073 // existing entry. 2074 self.device_tree 2075 .lock() 2076 .unwrap() 2077 .insert(id.clone(), device_node!(id, virtio_console_device)); 2078 2079 // Only provide a resizer (for SIGWINCH handling) if the console is attached to the TTY 2080 Ok(if matches!(console_config.mode, ConsoleOutputMode::Tty) { 2081 Some(console_resizer) 2082 } else { 2083 None 2084 }) 2085 } 2086 2087 /// Adds all devices that behave like a console with respect to the VM 2088 /// configuration. This includes: 2089 /// - debug-console 2090 /// - serial-console 2091 /// - virtio-console 2092 fn add_console_devices( 2093 &mut self, 2094 interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>, 2095 virtio_devices: &mut Vec<MetaVirtioDevice>, 2096 console_info: Option<ConsoleInfo>, 2097 console_resize_pipe: Option<Arc<File>>, 2098 ) -> DeviceManagerResult<Arc<Console>> { 2099 let serial_config = self.config.lock().unwrap().serial.clone(); 2100 if console_info.is_none() { 2101 return Err(DeviceManagerError::InvalidConsoleInfo); 2102 } 2103 2104 // SAFETY: console_info is Some, so it's safe to unwrap. 2105 let console_info = console_info.unwrap(); 2106 2107 let serial_writer: Option<Box<dyn io::Write + Send>> = match console_info.serial_main_fd { 2108 ConsoleOutput::File(ref file) | ConsoleOutput::Tty(ref file) => { 2109 Some(Box::new(Arc::clone(file))) 2110 } 2111 ConsoleOutput::Off 2112 | ConsoleOutput::Null 2113 | ConsoleOutput::Pty(_) 2114 | ConsoleOutput::Socket(_) => None, 2115 }; 2116 2117 if !matches!(console_info.serial_main_fd, ConsoleOutput::Off) { 2118 let serial = self.add_serial_device(interrupt_manager, serial_writer)?; 2119 self.serial_manager = match console_info.serial_main_fd { 2120 ConsoleOutput::Pty(_) | ConsoleOutput::Tty(_) | ConsoleOutput::Socket(_) => { 2121 let serial_manager = SerialManager::new( 2122 serial, 2123 console_info.serial_main_fd, 2124 serial_config.socket, 2125 ) 2126 .map_err(DeviceManagerError::CreateSerialManager)?; 2127 if let Some(mut serial_manager) = serial_manager { 2128 serial_manager 2129 .start_thread( 2130 self.exit_evt 2131 .try_clone() 2132 .map_err(DeviceManagerError::EventFd)?, 2133 ) 2134 .map_err(DeviceManagerError::SpawnSerialManager)?; 2135 Some(Arc::new(serial_manager)) 2136 } else { 2137 None 2138 } 2139 } 2140 _ => None, 2141 }; 2142 } 2143 2144 #[cfg(target_arch = "x86_64")] 2145 { 2146 let debug_console_writer: Option<Box<dyn io::Write + Send>> = 2147 match console_info.debug_main_fd { 2148 ConsoleOutput::File(file) | ConsoleOutput::Tty(file) => Some(Box::new(file)), 2149 ConsoleOutput::Off 2150 | ConsoleOutput::Null 2151 | ConsoleOutput::Pty(_) 2152 | ConsoleOutput::Socket(_) => None, 2153 }; 2154 if let Some(writer) = debug_console_writer { 2155 let _ = self.add_debug_console_device(writer)?; 2156 } 2157 } 2158 2159 let console_resizer = self.add_virtio_console_device( 2160 virtio_devices, 2161 console_info.console_main_fd, 2162 console_resize_pipe, 2163 )?; 2164 2165 Ok(Arc::new(Console { console_resizer })) 2166 } 2167 2168 fn add_tpm_device( 2169 &mut self, 2170 tpm_path: PathBuf, 2171 ) -> DeviceManagerResult<Arc<Mutex<devices::tpm::Tpm>>> { 2172 // Create TPM Device 2173 let tpm = devices::tpm::Tpm::new(tpm_path.to_str().unwrap().to_string()).map_err(|e| { 2174 DeviceManagerError::CreateTpmDevice(anyhow!("Failed to create TPM Device : {:?}", e)) 2175 })?; 2176 let tpm = Arc::new(Mutex::new(tpm)); 2177 2178 // Add TPM Device to mmio 2179 self.address_manager 2180 .mmio_bus 2181 .insert( 2182 tpm.clone(), 2183 arch::layout::TPM_START.0, 2184 arch::layout::TPM_SIZE, 2185 ) 2186 .map_err(DeviceManagerError::BusError)?; 2187 2188 Ok(tpm) 2189 } 2190 2191 fn make_virtio_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2192 let mut devices: Vec<MetaVirtioDevice> = Vec::new(); 2193 2194 // Create "standard" virtio devices (net/block/rng) 2195 devices.append(&mut self.make_virtio_block_devices()?); 2196 devices.append(&mut self.make_virtio_net_devices()?); 2197 devices.append(&mut self.make_virtio_rng_devices()?); 2198 2199 // Add virtio-fs if required 2200 devices.append(&mut self.make_virtio_fs_devices()?); 2201 2202 // Add virtio-pmem if required 2203 devices.append(&mut self.make_virtio_pmem_devices()?); 2204 2205 // Add virtio-vsock if required 2206 devices.append(&mut self.make_virtio_vsock_devices()?); 2207 2208 devices.append(&mut self.make_virtio_mem_devices()?); 2209 2210 // Add virtio-balloon if required 2211 devices.append(&mut self.make_virtio_balloon_devices()?); 2212 2213 // Add virtio-watchdog device 2214 devices.append(&mut self.make_virtio_watchdog_devices()?); 2215 2216 // Add vDPA devices if required 2217 devices.append(&mut self.make_vdpa_devices()?); 2218 2219 Ok(devices) 2220 } 2221 2222 // Cache whether aio is supported to avoid checking for very block device 2223 fn aio_is_supported(&mut self) -> bool { 2224 if let Some(supported) = self.aio_supported { 2225 return supported; 2226 } 2227 2228 let supported = block_aio_is_supported(); 2229 self.aio_supported = Some(supported); 2230 supported 2231 } 2232 2233 // Cache whether io_uring is supported to avoid probing for very block device 2234 fn io_uring_is_supported(&mut self) -> bool { 2235 if let Some(supported) = self.io_uring_supported { 2236 return supported; 2237 } 2238 2239 let supported = block_io_uring_is_supported(); 2240 self.io_uring_supported = Some(supported); 2241 supported 2242 } 2243 2244 fn make_virtio_block_device( 2245 &mut self, 2246 disk_cfg: &mut DiskConfig, 2247 ) -> DeviceManagerResult<MetaVirtioDevice> { 2248 let id = if let Some(id) = &disk_cfg.id { 2249 id.clone() 2250 } else { 2251 let id = self.next_device_name(DISK_DEVICE_NAME_PREFIX)?; 2252 disk_cfg.id = Some(id.clone()); 2253 id 2254 }; 2255 2256 info!("Creating virtio-block device: {:?}", disk_cfg); 2257 2258 let (virtio_device, migratable_device) = if disk_cfg.vhost_user { 2259 let socket = disk_cfg.vhost_socket.as_ref().unwrap().clone(); 2260 let vu_cfg = VhostUserConfig { 2261 socket, 2262 num_queues: disk_cfg.num_queues, 2263 queue_size: disk_cfg.queue_size, 2264 }; 2265 let vhost_user_block = Arc::new(Mutex::new( 2266 match virtio_devices::vhost_user::Blk::new( 2267 id.clone(), 2268 vu_cfg, 2269 self.seccomp_action.clone(), 2270 self.exit_evt 2271 .try_clone() 2272 .map_err(DeviceManagerError::EventFd)?, 2273 self.force_iommu, 2274 state_from_id(self.snapshot.as_ref(), id.as_str()) 2275 .map_err(DeviceManagerError::RestoreGetState)?, 2276 ) { 2277 Ok(vub_device) => vub_device, 2278 Err(e) => { 2279 return Err(DeviceManagerError::CreateVhostUserBlk(e)); 2280 } 2281 }, 2282 )); 2283 2284 ( 2285 Arc::clone(&vhost_user_block) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2286 vhost_user_block as Arc<Mutex<dyn Migratable>>, 2287 ) 2288 } else { 2289 let mut options = OpenOptions::new(); 2290 options.read(true); 2291 options.write(!disk_cfg.readonly); 2292 if disk_cfg.direct { 2293 options.custom_flags(libc::O_DIRECT); 2294 } 2295 // Open block device path 2296 let mut file: File = options 2297 .open( 2298 disk_cfg 2299 .path 2300 .as_ref() 2301 .ok_or(DeviceManagerError::NoDiskPath)? 2302 .clone(), 2303 ) 2304 .map_err(DeviceManagerError::Disk)?; 2305 let image_type = 2306 detect_image_type(&mut file).map_err(DeviceManagerError::DetectImageType)?; 2307 2308 let image = match image_type { 2309 ImageType::FixedVhd => { 2310 // Use asynchronous backend relying on io_uring if the 2311 // syscalls are supported. 2312 if cfg!(feature = "io_uring") 2313 && !disk_cfg.disable_io_uring 2314 && self.io_uring_is_supported() 2315 { 2316 info!("Using asynchronous fixed VHD disk file (io_uring)"); 2317 2318 #[cfg(not(feature = "io_uring"))] 2319 unreachable!("Checked in if statement above"); 2320 #[cfg(feature = "io_uring")] 2321 { 2322 Box::new( 2323 FixedVhdDiskAsync::new(file) 2324 .map_err(DeviceManagerError::CreateFixedVhdDiskAsync)?, 2325 ) as Box<dyn DiskFile> 2326 } 2327 } else { 2328 info!("Using synchronous fixed VHD disk file"); 2329 Box::new( 2330 FixedVhdDiskSync::new(file) 2331 .map_err(DeviceManagerError::CreateFixedVhdDiskSync)?, 2332 ) as Box<dyn DiskFile> 2333 } 2334 } 2335 ImageType::Raw => { 2336 // Use asynchronous backend relying on io_uring if the 2337 // syscalls are supported. 2338 if cfg!(feature = "io_uring") 2339 && !disk_cfg.disable_io_uring 2340 && self.io_uring_is_supported() 2341 { 2342 info!("Using asynchronous RAW disk file (io_uring)"); 2343 2344 #[cfg(not(feature = "io_uring"))] 2345 unreachable!("Checked in if statement above"); 2346 #[cfg(feature = "io_uring")] 2347 { 2348 Box::new(RawFileDisk::new(file)) as Box<dyn DiskFile> 2349 } 2350 } else if !disk_cfg.disable_aio && self.aio_is_supported() { 2351 info!("Using asynchronous RAW disk file (aio)"); 2352 Box::new(RawFileDiskAio::new(file)) as Box<dyn DiskFile> 2353 } else { 2354 info!("Using synchronous RAW disk file"); 2355 Box::new(RawFileDiskSync::new(file)) as Box<dyn DiskFile> 2356 } 2357 } 2358 ImageType::Qcow2 => { 2359 info!("Using synchronous QCOW disk file"); 2360 Box::new( 2361 QcowDiskSync::new(file, disk_cfg.direct) 2362 .map_err(DeviceManagerError::CreateQcowDiskSync)?, 2363 ) as Box<dyn DiskFile> 2364 } 2365 ImageType::Vhdx => { 2366 info!("Using synchronous VHDX disk file"); 2367 Box::new( 2368 VhdxDiskSync::new(file) 2369 .map_err(DeviceManagerError::CreateFixedVhdxDiskSync)?, 2370 ) as Box<dyn DiskFile> 2371 } 2372 }; 2373 2374 let rate_limit_group = 2375 if let Some(rate_limiter_cfg) = disk_cfg.rate_limiter_config.as_ref() { 2376 // Create an anonymous RateLimiterGroup that is dropped when the Disk 2377 // is dropped. 2378 let bw = rate_limiter_cfg.bandwidth.unwrap_or_default(); 2379 let ops = rate_limiter_cfg.ops.unwrap_or_default(); 2380 let mut rate_limit_group = RateLimiterGroup::new( 2381 disk_cfg.id.as_ref().unwrap(), 2382 bw.size, 2383 bw.one_time_burst.unwrap_or(0), 2384 bw.refill_time, 2385 ops.size, 2386 ops.one_time_burst.unwrap_or(0), 2387 ops.refill_time, 2388 ) 2389 .map_err(DeviceManagerError::RateLimiterGroupCreate)?; 2390 2391 rate_limit_group 2392 .start_thread( 2393 self.exit_evt 2394 .try_clone() 2395 .map_err(DeviceManagerError::EventFd)?, 2396 ) 2397 .unwrap(); 2398 2399 Some(Arc::new(rate_limit_group)) 2400 } else if let Some(rate_limit_group) = disk_cfg.rate_limit_group.as_ref() { 2401 self.rate_limit_groups.get(rate_limit_group).cloned() 2402 } else { 2403 None 2404 }; 2405 2406 let queue_affinity = if let Some(queue_affinity) = disk_cfg.queue_affinity.as_ref() { 2407 queue_affinity 2408 .iter() 2409 .map(|a| (a.queue_index, a.host_cpus.clone())) 2410 .collect() 2411 } else { 2412 BTreeMap::new() 2413 }; 2414 2415 let virtio_block = Arc::new(Mutex::new( 2416 virtio_devices::Block::new( 2417 id.clone(), 2418 image, 2419 disk_cfg 2420 .path 2421 .as_ref() 2422 .ok_or(DeviceManagerError::NoDiskPath)? 2423 .clone(), 2424 disk_cfg.readonly, 2425 self.force_iommu | disk_cfg.iommu, 2426 disk_cfg.num_queues, 2427 disk_cfg.queue_size, 2428 disk_cfg.serial.clone(), 2429 self.seccomp_action.clone(), 2430 rate_limit_group, 2431 self.exit_evt 2432 .try_clone() 2433 .map_err(DeviceManagerError::EventFd)?, 2434 state_from_id(self.snapshot.as_ref(), id.as_str()) 2435 .map_err(DeviceManagerError::RestoreGetState)?, 2436 queue_affinity, 2437 ) 2438 .map_err(DeviceManagerError::CreateVirtioBlock)?, 2439 )); 2440 2441 ( 2442 Arc::clone(&virtio_block) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2443 virtio_block as Arc<Mutex<dyn Migratable>>, 2444 ) 2445 }; 2446 2447 // Fill the device tree with a new node. In case of restore, we 2448 // know there is nothing to do, so we can simply override the 2449 // existing entry. 2450 self.device_tree 2451 .lock() 2452 .unwrap() 2453 .insert(id.clone(), device_node!(id, migratable_device)); 2454 2455 Ok(MetaVirtioDevice { 2456 virtio_device, 2457 iommu: disk_cfg.iommu, 2458 id, 2459 pci_segment: disk_cfg.pci_segment, 2460 dma_handler: None, 2461 }) 2462 } 2463 2464 fn make_virtio_block_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2465 let mut devices = Vec::new(); 2466 2467 let mut block_devices = self.config.lock().unwrap().disks.clone(); 2468 if let Some(disk_list_cfg) = &mut block_devices { 2469 for disk_cfg in disk_list_cfg.iter_mut() { 2470 devices.push(self.make_virtio_block_device(disk_cfg)?); 2471 } 2472 } 2473 self.config.lock().unwrap().disks = block_devices; 2474 2475 Ok(devices) 2476 } 2477 2478 fn make_virtio_net_device( 2479 &mut self, 2480 net_cfg: &mut NetConfig, 2481 ) -> DeviceManagerResult<MetaVirtioDevice> { 2482 let id = if let Some(id) = &net_cfg.id { 2483 id.clone() 2484 } else { 2485 let id = self.next_device_name(NET_DEVICE_NAME_PREFIX)?; 2486 net_cfg.id = Some(id.clone()); 2487 id 2488 }; 2489 info!("Creating virtio-net device: {:?}", net_cfg); 2490 2491 let (virtio_device, migratable_device) = if net_cfg.vhost_user { 2492 let socket = net_cfg.vhost_socket.as_ref().unwrap().clone(); 2493 let vu_cfg = VhostUserConfig { 2494 socket, 2495 num_queues: net_cfg.num_queues, 2496 queue_size: net_cfg.queue_size, 2497 }; 2498 let server = match net_cfg.vhost_mode { 2499 VhostMode::Client => false, 2500 VhostMode::Server => true, 2501 }; 2502 let vhost_user_net = Arc::new(Mutex::new( 2503 match virtio_devices::vhost_user::Net::new( 2504 id.clone(), 2505 net_cfg.mac, 2506 net_cfg.mtu, 2507 vu_cfg, 2508 server, 2509 self.seccomp_action.clone(), 2510 self.exit_evt 2511 .try_clone() 2512 .map_err(DeviceManagerError::EventFd)?, 2513 self.force_iommu, 2514 state_from_id(self.snapshot.as_ref(), id.as_str()) 2515 .map_err(DeviceManagerError::RestoreGetState)?, 2516 net_cfg.offload_tso, 2517 net_cfg.offload_ufo, 2518 net_cfg.offload_csum, 2519 ) { 2520 Ok(vun_device) => vun_device, 2521 Err(e) => { 2522 return Err(DeviceManagerError::CreateVhostUserNet(e)); 2523 } 2524 }, 2525 )); 2526 2527 ( 2528 Arc::clone(&vhost_user_net) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2529 vhost_user_net as Arc<Mutex<dyn Migratable>>, 2530 ) 2531 } else { 2532 let state = state_from_id(self.snapshot.as_ref(), id.as_str()) 2533 .map_err(DeviceManagerError::RestoreGetState)?; 2534 let virtio_net = if let Some(ref tap_if_name) = net_cfg.tap { 2535 Arc::new(Mutex::new( 2536 virtio_devices::Net::new( 2537 id.clone(), 2538 Some(tap_if_name), 2539 Some(net_cfg.ip), 2540 Some(net_cfg.mask), 2541 Some(net_cfg.mac), 2542 &mut net_cfg.host_mac, 2543 net_cfg.mtu, 2544 self.force_iommu | net_cfg.iommu, 2545 net_cfg.num_queues, 2546 net_cfg.queue_size, 2547 self.seccomp_action.clone(), 2548 net_cfg.rate_limiter_config, 2549 self.exit_evt 2550 .try_clone() 2551 .map_err(DeviceManagerError::EventFd)?, 2552 state, 2553 net_cfg.offload_tso, 2554 net_cfg.offload_ufo, 2555 net_cfg.offload_csum, 2556 ) 2557 .map_err(DeviceManagerError::CreateVirtioNet)?, 2558 )) 2559 } else if let Some(fds) = &net_cfg.fds { 2560 let net = virtio_devices::Net::from_tap_fds( 2561 id.clone(), 2562 fds, 2563 Some(net_cfg.mac), 2564 net_cfg.mtu, 2565 self.force_iommu | net_cfg.iommu, 2566 net_cfg.queue_size, 2567 self.seccomp_action.clone(), 2568 net_cfg.rate_limiter_config, 2569 self.exit_evt 2570 .try_clone() 2571 .map_err(DeviceManagerError::EventFd)?, 2572 state, 2573 net_cfg.offload_tso, 2574 net_cfg.offload_ufo, 2575 net_cfg.offload_csum, 2576 ) 2577 .map_err(DeviceManagerError::CreateVirtioNet)?; 2578 2579 // SAFETY: 'fds' are valid because TAP devices are created successfully 2580 unsafe { 2581 self.config.lock().unwrap().add_preserved_fds(fds.clone()); 2582 } 2583 2584 Arc::new(Mutex::new(net)) 2585 } else { 2586 Arc::new(Mutex::new( 2587 virtio_devices::Net::new( 2588 id.clone(), 2589 None, 2590 Some(net_cfg.ip), 2591 Some(net_cfg.mask), 2592 Some(net_cfg.mac), 2593 &mut net_cfg.host_mac, 2594 net_cfg.mtu, 2595 self.force_iommu | net_cfg.iommu, 2596 net_cfg.num_queues, 2597 net_cfg.queue_size, 2598 self.seccomp_action.clone(), 2599 net_cfg.rate_limiter_config, 2600 self.exit_evt 2601 .try_clone() 2602 .map_err(DeviceManagerError::EventFd)?, 2603 state, 2604 net_cfg.offload_tso, 2605 net_cfg.offload_ufo, 2606 net_cfg.offload_csum, 2607 ) 2608 .map_err(DeviceManagerError::CreateVirtioNet)?, 2609 )) 2610 }; 2611 2612 ( 2613 Arc::clone(&virtio_net) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2614 virtio_net as Arc<Mutex<dyn Migratable>>, 2615 ) 2616 }; 2617 2618 // Fill the device tree with a new node. In case of restore, we 2619 // know there is nothing to do, so we can simply override the 2620 // existing entry. 2621 self.device_tree 2622 .lock() 2623 .unwrap() 2624 .insert(id.clone(), device_node!(id, migratable_device)); 2625 2626 Ok(MetaVirtioDevice { 2627 virtio_device, 2628 iommu: net_cfg.iommu, 2629 id, 2630 pci_segment: net_cfg.pci_segment, 2631 dma_handler: None, 2632 }) 2633 } 2634 2635 /// Add virto-net and vhost-user-net devices 2636 fn make_virtio_net_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2637 let mut devices = Vec::new(); 2638 let mut net_devices = self.config.lock().unwrap().net.clone(); 2639 if let Some(net_list_cfg) = &mut net_devices { 2640 for net_cfg in net_list_cfg.iter_mut() { 2641 devices.push(self.make_virtio_net_device(net_cfg)?); 2642 } 2643 } 2644 self.config.lock().unwrap().net = net_devices; 2645 2646 Ok(devices) 2647 } 2648 2649 fn make_virtio_rng_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2650 let mut devices = Vec::new(); 2651 2652 // Add virtio-rng if required 2653 let rng_config = self.config.lock().unwrap().rng.clone(); 2654 if let Some(rng_path) = rng_config.src.to_str() { 2655 info!("Creating virtio-rng device: {:?}", rng_config); 2656 let id = String::from(RNG_DEVICE_NAME); 2657 2658 let virtio_rng_device = Arc::new(Mutex::new( 2659 virtio_devices::Rng::new( 2660 id.clone(), 2661 rng_path, 2662 self.force_iommu | rng_config.iommu, 2663 self.seccomp_action.clone(), 2664 self.exit_evt 2665 .try_clone() 2666 .map_err(DeviceManagerError::EventFd)?, 2667 state_from_id(self.snapshot.as_ref(), id.as_str()) 2668 .map_err(DeviceManagerError::RestoreGetState)?, 2669 ) 2670 .map_err(DeviceManagerError::CreateVirtioRng)?, 2671 )); 2672 devices.push(MetaVirtioDevice { 2673 virtio_device: Arc::clone(&virtio_rng_device) 2674 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2675 iommu: rng_config.iommu, 2676 id: id.clone(), 2677 pci_segment: 0, 2678 dma_handler: None, 2679 }); 2680 2681 // Fill the device tree with a new node. In case of restore, we 2682 // know there is nothing to do, so we can simply override the 2683 // existing entry. 2684 self.device_tree 2685 .lock() 2686 .unwrap() 2687 .insert(id.clone(), device_node!(id, virtio_rng_device)); 2688 } 2689 2690 Ok(devices) 2691 } 2692 2693 fn make_virtio_fs_device( 2694 &mut self, 2695 fs_cfg: &mut FsConfig, 2696 ) -> DeviceManagerResult<MetaVirtioDevice> { 2697 let id = if let Some(id) = &fs_cfg.id { 2698 id.clone() 2699 } else { 2700 let id = self.next_device_name(FS_DEVICE_NAME_PREFIX)?; 2701 fs_cfg.id = Some(id.clone()); 2702 id 2703 }; 2704 2705 info!("Creating virtio-fs device: {:?}", fs_cfg); 2706 2707 let mut node = device_node!(id); 2708 2709 if let Some(fs_socket) = fs_cfg.socket.to_str() { 2710 let virtio_fs_device = Arc::new(Mutex::new( 2711 virtio_devices::vhost_user::Fs::new( 2712 id.clone(), 2713 fs_socket, 2714 &fs_cfg.tag, 2715 fs_cfg.num_queues, 2716 fs_cfg.queue_size, 2717 None, 2718 self.seccomp_action.clone(), 2719 self.exit_evt 2720 .try_clone() 2721 .map_err(DeviceManagerError::EventFd)?, 2722 self.force_iommu, 2723 state_from_id(self.snapshot.as_ref(), id.as_str()) 2724 .map_err(DeviceManagerError::RestoreGetState)?, 2725 ) 2726 .map_err(DeviceManagerError::CreateVirtioFs)?, 2727 )); 2728 2729 // Update the device tree with the migratable device. 2730 node.migratable = Some(Arc::clone(&virtio_fs_device) as Arc<Mutex<dyn Migratable>>); 2731 self.device_tree.lock().unwrap().insert(id.clone(), node); 2732 2733 Ok(MetaVirtioDevice { 2734 virtio_device: Arc::clone(&virtio_fs_device) 2735 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2736 iommu: false, 2737 id, 2738 pci_segment: fs_cfg.pci_segment, 2739 dma_handler: None, 2740 }) 2741 } else { 2742 Err(DeviceManagerError::NoVirtioFsSock) 2743 } 2744 } 2745 2746 fn make_virtio_fs_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2747 let mut devices = Vec::new(); 2748 2749 let mut fs_devices = self.config.lock().unwrap().fs.clone(); 2750 if let Some(fs_list_cfg) = &mut fs_devices { 2751 for fs_cfg in fs_list_cfg.iter_mut() { 2752 devices.push(self.make_virtio_fs_device(fs_cfg)?); 2753 } 2754 } 2755 self.config.lock().unwrap().fs = fs_devices; 2756 2757 Ok(devices) 2758 } 2759 2760 fn make_virtio_pmem_device( 2761 &mut self, 2762 pmem_cfg: &mut PmemConfig, 2763 ) -> DeviceManagerResult<MetaVirtioDevice> { 2764 let id = if let Some(id) = &pmem_cfg.id { 2765 id.clone() 2766 } else { 2767 let id = self.next_device_name(PMEM_DEVICE_NAME_PREFIX)?; 2768 pmem_cfg.id = Some(id.clone()); 2769 id 2770 }; 2771 2772 info!("Creating virtio-pmem device: {:?}", pmem_cfg); 2773 2774 let mut node = device_node!(id); 2775 2776 // Look for the id in the device tree. If it can be found, that means 2777 // the device is being restored, otherwise it's created from scratch. 2778 let region_range = if let Some(node) = self.device_tree.lock().unwrap().get(&id) { 2779 info!("Restoring virtio-pmem {} resources", id); 2780 2781 let mut region_range: Option<(u64, u64)> = None; 2782 for resource in node.resources.iter() { 2783 match resource { 2784 Resource::MmioAddressRange { base, size } => { 2785 if region_range.is_some() { 2786 return Err(DeviceManagerError::ResourceAlreadyExists); 2787 } 2788 2789 region_range = Some((*base, *size)); 2790 } 2791 _ => { 2792 error!("Unexpected resource {:?} for {}", resource, id); 2793 } 2794 } 2795 } 2796 2797 if region_range.is_none() { 2798 return Err(DeviceManagerError::MissingVirtioPmemResources); 2799 } 2800 2801 region_range 2802 } else { 2803 None 2804 }; 2805 2806 let (custom_flags, set_len) = if pmem_cfg.file.is_dir() { 2807 if pmem_cfg.size.is_none() { 2808 return Err(DeviceManagerError::PmemWithDirectorySizeMissing); 2809 } 2810 (O_TMPFILE, true) 2811 } else { 2812 (0, false) 2813 }; 2814 2815 let mut file = OpenOptions::new() 2816 .read(true) 2817 .write(!pmem_cfg.discard_writes) 2818 .custom_flags(custom_flags) 2819 .open(&pmem_cfg.file) 2820 .map_err(DeviceManagerError::PmemFileOpen)?; 2821 2822 let size = if let Some(size) = pmem_cfg.size { 2823 if set_len { 2824 file.set_len(size) 2825 .map_err(DeviceManagerError::PmemFileSetLen)?; 2826 } 2827 size 2828 } else { 2829 file.seek(SeekFrom::End(0)) 2830 .map_err(DeviceManagerError::PmemFileSetLen)? 2831 }; 2832 2833 if size % 0x20_0000 != 0 { 2834 return Err(DeviceManagerError::PmemSizeNotAligned); 2835 } 2836 2837 let (region_base, region_size) = if let Some((base, size)) = region_range { 2838 // The memory needs to be 2MiB aligned in order to support 2839 // hugepages. 2840 self.pci_segments[pmem_cfg.pci_segment as usize] 2841 .mem64_allocator 2842 .lock() 2843 .unwrap() 2844 .allocate( 2845 Some(GuestAddress(base)), 2846 size as GuestUsize, 2847 Some(0x0020_0000), 2848 ) 2849 .ok_or(DeviceManagerError::PmemRangeAllocation)?; 2850 2851 (base, size) 2852 } else { 2853 // The memory needs to be 2MiB aligned in order to support 2854 // hugepages. 2855 let base = self.pci_segments[pmem_cfg.pci_segment as usize] 2856 .mem64_allocator 2857 .lock() 2858 .unwrap() 2859 .allocate(None, size as GuestUsize, Some(0x0020_0000)) 2860 .ok_or(DeviceManagerError::PmemRangeAllocation)?; 2861 2862 (base.raw_value(), size) 2863 }; 2864 2865 let cloned_file = file.try_clone().map_err(DeviceManagerError::CloneFile)?; 2866 let mmap_region = MmapRegion::build( 2867 Some(FileOffset::new(cloned_file, 0)), 2868 region_size as usize, 2869 PROT_READ | PROT_WRITE, 2870 MAP_NORESERVE 2871 | if pmem_cfg.discard_writes { 2872 MAP_PRIVATE 2873 } else { 2874 MAP_SHARED 2875 }, 2876 ) 2877 .map_err(DeviceManagerError::NewMmapRegion)?; 2878 let host_addr: u64 = mmap_region.as_ptr() as u64; 2879 2880 let mem_slot = self 2881 .memory_manager 2882 .lock() 2883 .unwrap() 2884 .create_userspace_mapping(region_base, region_size, host_addr, false, false, false) 2885 .map_err(DeviceManagerError::MemoryManager)?; 2886 2887 let mapping = virtio_devices::UserspaceMapping { 2888 host_addr, 2889 mem_slot, 2890 addr: GuestAddress(region_base), 2891 len: region_size, 2892 mergeable: false, 2893 }; 2894 2895 let virtio_pmem_device = Arc::new(Mutex::new( 2896 virtio_devices::Pmem::new( 2897 id.clone(), 2898 file, 2899 GuestAddress(region_base), 2900 mapping, 2901 mmap_region, 2902 self.force_iommu | pmem_cfg.iommu, 2903 self.seccomp_action.clone(), 2904 self.exit_evt 2905 .try_clone() 2906 .map_err(DeviceManagerError::EventFd)?, 2907 state_from_id(self.snapshot.as_ref(), id.as_str()) 2908 .map_err(DeviceManagerError::RestoreGetState)?, 2909 ) 2910 .map_err(DeviceManagerError::CreateVirtioPmem)?, 2911 )); 2912 2913 // Update the device tree with correct resource information and with 2914 // the migratable device. 2915 node.resources.push(Resource::MmioAddressRange { 2916 base: region_base, 2917 size: region_size, 2918 }); 2919 node.migratable = Some(Arc::clone(&virtio_pmem_device) as Arc<Mutex<dyn Migratable>>); 2920 self.device_tree.lock().unwrap().insert(id.clone(), node); 2921 2922 Ok(MetaVirtioDevice { 2923 virtio_device: Arc::clone(&virtio_pmem_device) 2924 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2925 iommu: pmem_cfg.iommu, 2926 id, 2927 pci_segment: pmem_cfg.pci_segment, 2928 dma_handler: None, 2929 }) 2930 } 2931 2932 fn make_virtio_pmem_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2933 let mut devices = Vec::new(); 2934 // Add virtio-pmem if required 2935 let mut pmem_devices = self.config.lock().unwrap().pmem.clone(); 2936 if let Some(pmem_list_cfg) = &mut pmem_devices { 2937 for pmem_cfg in pmem_list_cfg.iter_mut() { 2938 devices.push(self.make_virtio_pmem_device(pmem_cfg)?); 2939 } 2940 } 2941 self.config.lock().unwrap().pmem = pmem_devices; 2942 2943 Ok(devices) 2944 } 2945 2946 fn make_virtio_vsock_device( 2947 &mut self, 2948 vsock_cfg: &mut VsockConfig, 2949 ) -> DeviceManagerResult<MetaVirtioDevice> { 2950 let id = if let Some(id) = &vsock_cfg.id { 2951 id.clone() 2952 } else { 2953 let id = self.next_device_name(VSOCK_DEVICE_NAME_PREFIX)?; 2954 vsock_cfg.id = Some(id.clone()); 2955 id 2956 }; 2957 2958 info!("Creating virtio-vsock device: {:?}", vsock_cfg); 2959 2960 let socket_path = vsock_cfg 2961 .socket 2962 .to_str() 2963 .ok_or(DeviceManagerError::CreateVsockConvertPath)?; 2964 let backend = 2965 virtio_devices::vsock::VsockUnixBackend::new(vsock_cfg.cid, socket_path.to_string()) 2966 .map_err(DeviceManagerError::CreateVsockBackend)?; 2967 2968 let vsock_device = Arc::new(Mutex::new( 2969 virtio_devices::Vsock::new( 2970 id.clone(), 2971 vsock_cfg.cid, 2972 vsock_cfg.socket.clone(), 2973 backend, 2974 self.force_iommu | vsock_cfg.iommu, 2975 self.seccomp_action.clone(), 2976 self.exit_evt 2977 .try_clone() 2978 .map_err(DeviceManagerError::EventFd)?, 2979 state_from_id(self.snapshot.as_ref(), id.as_str()) 2980 .map_err(DeviceManagerError::RestoreGetState)?, 2981 ) 2982 .map_err(DeviceManagerError::CreateVirtioVsock)?, 2983 )); 2984 2985 // Fill the device tree with a new node. In case of restore, we 2986 // know there is nothing to do, so we can simply override the 2987 // existing entry. 2988 self.device_tree 2989 .lock() 2990 .unwrap() 2991 .insert(id.clone(), device_node!(id, vsock_device)); 2992 2993 Ok(MetaVirtioDevice { 2994 virtio_device: Arc::clone(&vsock_device) 2995 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2996 iommu: vsock_cfg.iommu, 2997 id, 2998 pci_segment: vsock_cfg.pci_segment, 2999 dma_handler: None, 3000 }) 3001 } 3002 3003 fn make_virtio_vsock_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 3004 let mut devices = Vec::new(); 3005 3006 let mut vsock = self.config.lock().unwrap().vsock.clone(); 3007 if let Some(ref mut vsock_cfg) = &mut vsock { 3008 devices.push(self.make_virtio_vsock_device(vsock_cfg)?); 3009 } 3010 self.config.lock().unwrap().vsock = vsock; 3011 3012 Ok(devices) 3013 } 3014 3015 fn make_virtio_mem_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 3016 let mut devices = Vec::new(); 3017 3018 let mm = self.memory_manager.clone(); 3019 let mut mm = mm.lock().unwrap(); 3020 for (memory_zone_id, memory_zone) in mm.memory_zones_mut().iter_mut() { 3021 if let Some(virtio_mem_zone) = memory_zone.virtio_mem_zone_mut() { 3022 info!("Creating virtio-mem device: id = {}", memory_zone_id); 3023 3024 let node_id = numa_node_id_from_memory_zone_id(&self.numa_nodes, memory_zone_id) 3025 .map(|i| i as u16); 3026 3027 let virtio_mem_device = Arc::new(Mutex::new( 3028 virtio_devices::Mem::new( 3029 memory_zone_id.clone(), 3030 virtio_mem_zone.region(), 3031 self.seccomp_action.clone(), 3032 node_id, 3033 virtio_mem_zone.hotplugged_size(), 3034 virtio_mem_zone.hugepages(), 3035 self.exit_evt 3036 .try_clone() 3037 .map_err(DeviceManagerError::EventFd)?, 3038 virtio_mem_zone.blocks_state().clone(), 3039 state_from_id(self.snapshot.as_ref(), memory_zone_id.as_str()) 3040 .map_err(DeviceManagerError::RestoreGetState)?, 3041 ) 3042 .map_err(DeviceManagerError::CreateVirtioMem)?, 3043 )); 3044 3045 // Update the virtio-mem zone so that it has a handle onto the 3046 // virtio-mem device, which will be used for triggering a resize 3047 // if needed. 3048 virtio_mem_zone.set_virtio_device(Arc::clone(&virtio_mem_device)); 3049 3050 self.virtio_mem_devices.push(Arc::clone(&virtio_mem_device)); 3051 3052 devices.push(MetaVirtioDevice { 3053 virtio_device: Arc::clone(&virtio_mem_device) 3054 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 3055 iommu: false, 3056 id: memory_zone_id.clone(), 3057 pci_segment: 0, 3058 dma_handler: None, 3059 }); 3060 3061 // Fill the device tree with a new node. In case of restore, we 3062 // know there is nothing to do, so we can simply override the 3063 // existing entry. 3064 self.device_tree.lock().unwrap().insert( 3065 memory_zone_id.clone(), 3066 device_node!(memory_zone_id, virtio_mem_device), 3067 ); 3068 } 3069 } 3070 3071 Ok(devices) 3072 } 3073 3074 #[cfg(feature = "pvmemcontrol")] 3075 fn make_pvmemcontrol_device( 3076 &mut self, 3077 ) -> DeviceManagerResult<( 3078 Arc<PvmemcontrolBusDevice>, 3079 Arc<Mutex<PvmemcontrolPciDevice>>, 3080 )> { 3081 let id = String::from(PVMEMCONTROL_DEVICE_NAME); 3082 let pci_segment_id = 0x0_u16; 3083 3084 let (pci_segment_id, pci_device_bdf, resources) = 3085 self.pci_resources(&id, pci_segment_id)?; 3086 3087 info!("Creating pvmemcontrol device: id = {}", id); 3088 let (pvmemcontrol_pci_device, pvmemcontrol_bus_device) = 3089 devices::pvmemcontrol::PvmemcontrolDevice::make_device( 3090 id.clone(), 3091 self.memory_manager.lock().unwrap().guest_memory(), 3092 ); 3093 3094 let pvmemcontrol_pci_device = Arc::new(Mutex::new(pvmemcontrol_pci_device)); 3095 let pvmemcontrol_bus_device = Arc::new(pvmemcontrol_bus_device); 3096 3097 let new_resources = self.add_pci_device( 3098 pvmemcontrol_bus_device.clone(), 3099 pvmemcontrol_pci_device.clone(), 3100 pci_segment_id, 3101 pci_device_bdf, 3102 resources, 3103 )?; 3104 3105 let mut node = device_node!(id, pvmemcontrol_pci_device); 3106 3107 node.resources = new_resources; 3108 node.pci_bdf = Some(pci_device_bdf); 3109 node.pci_device_handle = None; 3110 3111 self.device_tree.lock().unwrap().insert(id, node); 3112 3113 Ok((pvmemcontrol_bus_device, pvmemcontrol_pci_device)) 3114 } 3115 3116 fn make_virtio_balloon_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 3117 let mut devices = Vec::new(); 3118 3119 if let Some(balloon_config) = &self.config.lock().unwrap().balloon { 3120 let id = String::from(BALLOON_DEVICE_NAME); 3121 info!("Creating virtio-balloon device: id = {}", id); 3122 3123 let virtio_balloon_device = Arc::new(Mutex::new( 3124 virtio_devices::Balloon::new( 3125 id.clone(), 3126 balloon_config.size, 3127 balloon_config.deflate_on_oom, 3128 balloon_config.free_page_reporting, 3129 self.seccomp_action.clone(), 3130 self.exit_evt 3131 .try_clone() 3132 .map_err(DeviceManagerError::EventFd)?, 3133 state_from_id(self.snapshot.as_ref(), id.as_str()) 3134 .map_err(DeviceManagerError::RestoreGetState)?, 3135 ) 3136 .map_err(DeviceManagerError::CreateVirtioBalloon)?, 3137 )); 3138 3139 self.balloon = Some(virtio_balloon_device.clone()); 3140 3141 devices.push(MetaVirtioDevice { 3142 virtio_device: Arc::clone(&virtio_balloon_device) 3143 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 3144 iommu: false, 3145 id: id.clone(), 3146 pci_segment: 0, 3147 dma_handler: None, 3148 }); 3149 3150 self.device_tree 3151 .lock() 3152 .unwrap() 3153 .insert(id.clone(), device_node!(id, virtio_balloon_device)); 3154 } 3155 3156 Ok(devices) 3157 } 3158 3159 fn make_virtio_watchdog_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 3160 let mut devices = Vec::new(); 3161 3162 if !self.config.lock().unwrap().watchdog { 3163 return Ok(devices); 3164 } 3165 3166 let id = String::from(WATCHDOG_DEVICE_NAME); 3167 info!("Creating virtio-watchdog device: id = {}", id); 3168 3169 let virtio_watchdog_device = Arc::new(Mutex::new( 3170 virtio_devices::Watchdog::new( 3171 id.clone(), 3172 self.reset_evt.try_clone().unwrap(), 3173 self.seccomp_action.clone(), 3174 self.exit_evt 3175 .try_clone() 3176 .map_err(DeviceManagerError::EventFd)?, 3177 state_from_id(self.snapshot.as_ref(), id.as_str()) 3178 .map_err(DeviceManagerError::RestoreGetState)?, 3179 ) 3180 .map_err(DeviceManagerError::CreateVirtioWatchdog)?, 3181 )); 3182 devices.push(MetaVirtioDevice { 3183 virtio_device: Arc::clone(&virtio_watchdog_device) 3184 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 3185 iommu: false, 3186 id: id.clone(), 3187 pci_segment: 0, 3188 dma_handler: None, 3189 }); 3190 3191 self.device_tree 3192 .lock() 3193 .unwrap() 3194 .insert(id.clone(), device_node!(id, virtio_watchdog_device)); 3195 3196 Ok(devices) 3197 } 3198 3199 fn make_vdpa_device( 3200 &mut self, 3201 vdpa_cfg: &mut VdpaConfig, 3202 ) -> DeviceManagerResult<MetaVirtioDevice> { 3203 let id = if let Some(id) = &vdpa_cfg.id { 3204 id.clone() 3205 } else { 3206 let id = self.next_device_name(VDPA_DEVICE_NAME_PREFIX)?; 3207 vdpa_cfg.id = Some(id.clone()); 3208 id 3209 }; 3210 3211 info!("Creating vDPA device: {:?}", vdpa_cfg); 3212 3213 let device_path = vdpa_cfg 3214 .path 3215 .to_str() 3216 .ok_or(DeviceManagerError::CreateVdpaConvertPath)?; 3217 3218 let vdpa_device = Arc::new(Mutex::new( 3219 virtio_devices::Vdpa::new( 3220 id.clone(), 3221 device_path, 3222 self.memory_manager.lock().unwrap().guest_memory(), 3223 vdpa_cfg.num_queues as u16, 3224 state_from_id(self.snapshot.as_ref(), id.as_str()) 3225 .map_err(DeviceManagerError::RestoreGetState)?, 3226 ) 3227 .map_err(DeviceManagerError::CreateVdpa)?, 3228 )); 3229 3230 // Create the DMA handler that is required by the vDPA device 3231 let vdpa_mapping = Arc::new(VdpaDmaMapping::new( 3232 Arc::clone(&vdpa_device), 3233 Arc::new(self.memory_manager.lock().unwrap().guest_memory()), 3234 )); 3235 3236 self.device_tree 3237 .lock() 3238 .unwrap() 3239 .insert(id.clone(), device_node!(id, vdpa_device)); 3240 3241 Ok(MetaVirtioDevice { 3242 virtio_device: vdpa_device as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 3243 iommu: vdpa_cfg.iommu, 3244 id, 3245 pci_segment: vdpa_cfg.pci_segment, 3246 dma_handler: Some(vdpa_mapping), 3247 }) 3248 } 3249 3250 fn make_vdpa_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 3251 let mut devices = Vec::new(); 3252 // Add vdpa if required 3253 let mut vdpa_devices = self.config.lock().unwrap().vdpa.clone(); 3254 if let Some(vdpa_list_cfg) = &mut vdpa_devices { 3255 for vdpa_cfg in vdpa_list_cfg.iter_mut() { 3256 devices.push(self.make_vdpa_device(vdpa_cfg)?); 3257 } 3258 } 3259 self.config.lock().unwrap().vdpa = vdpa_devices; 3260 3261 Ok(devices) 3262 } 3263 3264 fn next_device_name(&mut self, prefix: &str) -> DeviceManagerResult<String> { 3265 let start_id = self.device_id_cnt; 3266 loop { 3267 // Generate the temporary name. 3268 let name = format!("{}{}", prefix, self.device_id_cnt); 3269 // Increment the counter. 3270 self.device_id_cnt += Wrapping(1); 3271 // Check if the name is already in use. 3272 if !self.boot_id_list.contains(&name) 3273 && !self.device_tree.lock().unwrap().contains_key(&name) 3274 { 3275 return Ok(name); 3276 } 3277 3278 if self.device_id_cnt == start_id { 3279 // We went through a full loop and there's nothing else we can 3280 // do. 3281 break; 3282 } 3283 } 3284 Err(DeviceManagerError::NoAvailableDeviceName) 3285 } 3286 3287 fn add_passthrough_device( 3288 &mut self, 3289 device_cfg: &mut DeviceConfig, 3290 ) -> DeviceManagerResult<(PciBdf, String)> { 3291 // If the passthrough device has not been created yet, it is created 3292 // here and stored in the DeviceManager structure for future needs. 3293 if self.passthrough_device.is_none() { 3294 self.passthrough_device = Some( 3295 self.address_manager 3296 .vm 3297 .create_passthrough_device() 3298 .map_err(|e| DeviceManagerError::CreatePassthroughDevice(e.into()))?, 3299 ); 3300 } 3301 3302 self.add_vfio_device(device_cfg) 3303 } 3304 3305 fn create_vfio_container(&self) -> DeviceManagerResult<Arc<VfioContainer>> { 3306 let passthrough_device = self 3307 .passthrough_device 3308 .as_ref() 3309 .ok_or(DeviceManagerError::NoDevicePassthroughSupport)?; 3310 3311 let dup = passthrough_device 3312 .try_clone() 3313 .map_err(DeviceManagerError::VfioCreate)?; 3314 3315 Ok(Arc::new( 3316 VfioContainer::new(Some(Arc::new(dup))).map_err(DeviceManagerError::VfioCreate)?, 3317 )) 3318 } 3319 3320 fn add_vfio_device( 3321 &mut self, 3322 device_cfg: &mut DeviceConfig, 3323 ) -> DeviceManagerResult<(PciBdf, String)> { 3324 let vfio_name = if let Some(id) = &device_cfg.id { 3325 id.clone() 3326 } else { 3327 let id = self.next_device_name(VFIO_DEVICE_NAME_PREFIX)?; 3328 device_cfg.id = Some(id.clone()); 3329 id 3330 }; 3331 3332 let (pci_segment_id, pci_device_bdf, resources) = 3333 self.pci_resources(&vfio_name, device_cfg.pci_segment)?; 3334 3335 let mut needs_dma_mapping = false; 3336 3337 // Here we create a new VFIO container for two reasons. Either this is 3338 // the first VFIO device, meaning we need a new VFIO container, which 3339 // will be shared with other VFIO devices. Or the new VFIO device is 3340 // attached to a vIOMMU, meaning we must create a dedicated VFIO 3341 // container. In the vIOMMU use case, we can't let all devices under 3342 // the same VFIO container since we couldn't map/unmap memory for each 3343 // device. That's simply because the map/unmap operations happen at the 3344 // VFIO container level. 3345 let vfio_container = if device_cfg.iommu { 3346 let vfio_container = self.create_vfio_container()?; 3347 3348 let vfio_mapping = Arc::new(VfioDmaMapping::new( 3349 Arc::clone(&vfio_container), 3350 Arc::new(self.memory_manager.lock().unwrap().guest_memory()), 3351 Arc::clone(&self.mmio_regions), 3352 )); 3353 3354 if let Some(iommu) = &self.iommu_device { 3355 iommu 3356 .lock() 3357 .unwrap() 3358 .add_external_mapping(pci_device_bdf.into(), vfio_mapping); 3359 } else { 3360 return Err(DeviceManagerError::MissingVirtualIommu); 3361 } 3362 3363 vfio_container 3364 } else if let Some(vfio_container) = &self.vfio_container { 3365 Arc::clone(vfio_container) 3366 } else { 3367 let vfio_container = self.create_vfio_container()?; 3368 needs_dma_mapping = true; 3369 self.vfio_container = Some(Arc::clone(&vfio_container)); 3370 3371 vfio_container 3372 }; 3373 3374 let vfio_device = VfioDevice::new(&device_cfg.path, Arc::clone(&vfio_container)) 3375 .map_err(DeviceManagerError::VfioCreate)?; 3376 3377 if needs_dma_mapping { 3378 // Register DMA mapping in IOMMU. 3379 // Do not register virtio-mem regions, as they are handled directly by 3380 // virtio-mem device itself. 3381 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() { 3382 for region in zone.regions() { 3383 vfio_container 3384 .vfio_dma_map( 3385 region.start_addr().raw_value(), 3386 region.len(), 3387 region.as_ptr() as u64, 3388 ) 3389 .map_err(DeviceManagerError::VfioDmaMap)?; 3390 } 3391 } 3392 3393 let vfio_mapping = Arc::new(VfioDmaMapping::new( 3394 Arc::clone(&vfio_container), 3395 Arc::new(self.memory_manager.lock().unwrap().guest_memory()), 3396 Arc::clone(&self.mmio_regions), 3397 )); 3398 3399 for virtio_mem_device in self.virtio_mem_devices.iter() { 3400 virtio_mem_device 3401 .lock() 3402 .unwrap() 3403 .add_dma_mapping_handler( 3404 VirtioMemMappingSource::Container, 3405 vfio_mapping.clone(), 3406 ) 3407 .map_err(DeviceManagerError::AddDmaMappingHandlerVirtioMem)?; 3408 } 3409 } 3410 3411 let legacy_interrupt_group = 3412 if let Some(legacy_interrupt_manager) = &self.legacy_interrupt_manager { 3413 Some( 3414 legacy_interrupt_manager 3415 .create_group(LegacyIrqGroupConfig { 3416 irq: self.pci_segments[pci_segment_id as usize].pci_irq_slots 3417 [pci_device_bdf.device() as usize] 3418 as InterruptIndex, 3419 }) 3420 .map_err(DeviceManagerError::CreateInterruptGroup)?, 3421 ) 3422 } else { 3423 None 3424 }; 3425 3426 let memory_manager = self.memory_manager.clone(); 3427 3428 let vfio_pci_device = VfioPciDevice::new( 3429 vfio_name.clone(), 3430 &self.address_manager.vm, 3431 vfio_device, 3432 vfio_container, 3433 self.msi_interrupt_manager.clone(), 3434 legacy_interrupt_group, 3435 device_cfg.iommu, 3436 pci_device_bdf, 3437 Arc::new(move || memory_manager.lock().unwrap().allocate_memory_slot()), 3438 vm_migration::snapshot_from_id(self.snapshot.as_ref(), vfio_name.as_str()), 3439 device_cfg.x_nv_gpudirect_clique, 3440 ) 3441 .map_err(DeviceManagerError::VfioPciCreate)?; 3442 3443 let vfio_pci_device = Arc::new(Mutex::new(vfio_pci_device)); 3444 3445 let new_resources = self.add_pci_device( 3446 vfio_pci_device.clone(), 3447 vfio_pci_device.clone(), 3448 pci_segment_id, 3449 pci_device_bdf, 3450 resources, 3451 )?; 3452 3453 vfio_pci_device 3454 .lock() 3455 .unwrap() 3456 .map_mmio_regions() 3457 .map_err(DeviceManagerError::VfioMapRegion)?; 3458 3459 for mmio_region in vfio_pci_device.lock().unwrap().mmio_regions() { 3460 self.mmio_regions.lock().unwrap().push(mmio_region); 3461 } 3462 3463 let mut node = device_node!(vfio_name, vfio_pci_device); 3464 3465 // Update the device tree with correct resource information. 3466 node.resources = new_resources; 3467 node.pci_bdf = Some(pci_device_bdf); 3468 node.pci_device_handle = Some(PciDeviceHandle::Vfio(vfio_pci_device)); 3469 3470 self.device_tree 3471 .lock() 3472 .unwrap() 3473 .insert(vfio_name.clone(), node); 3474 3475 Ok((pci_device_bdf, vfio_name)) 3476 } 3477 3478 fn add_pci_device( 3479 &mut self, 3480 bus_device: Arc<dyn BusDeviceSync>, 3481 pci_device: Arc<Mutex<dyn PciDevice>>, 3482 segment_id: u16, 3483 bdf: PciBdf, 3484 resources: Option<Vec<Resource>>, 3485 ) -> DeviceManagerResult<Vec<Resource>> { 3486 let bars = pci_device 3487 .lock() 3488 .unwrap() 3489 .allocate_bars( 3490 &self.address_manager.allocator, 3491 &mut self.pci_segments[segment_id as usize] 3492 .mem32_allocator 3493 .lock() 3494 .unwrap(), 3495 &mut self.pci_segments[segment_id as usize] 3496 .mem64_allocator 3497 .lock() 3498 .unwrap(), 3499 resources, 3500 ) 3501 .map_err(DeviceManagerError::AllocateBars)?; 3502 3503 let mut pci_bus = self.pci_segments[segment_id as usize] 3504 .pci_bus 3505 .lock() 3506 .unwrap(); 3507 3508 pci_bus 3509 .add_device(bdf.device() as u32, pci_device) 3510 .map_err(DeviceManagerError::AddPciDevice)?; 3511 3512 self.bus_devices.push(Arc::clone(&bus_device)); 3513 3514 pci_bus 3515 .register_mapping( 3516 bus_device, 3517 #[cfg(target_arch = "x86_64")] 3518 self.address_manager.io_bus.as_ref(), 3519 self.address_manager.mmio_bus.as_ref(), 3520 bars.clone(), 3521 ) 3522 .map_err(DeviceManagerError::AddPciDevice)?; 3523 3524 let mut new_resources = Vec::new(); 3525 for bar in bars { 3526 new_resources.push(Resource::PciBar { 3527 index: bar.idx(), 3528 base: bar.addr(), 3529 size: bar.size(), 3530 type_: bar.region_type().into(), 3531 prefetchable: bar.prefetchable().into(), 3532 }); 3533 } 3534 3535 Ok(new_resources) 3536 } 3537 3538 fn add_vfio_devices(&mut self) -> DeviceManagerResult<Vec<PciBdf>> { 3539 let mut iommu_attached_device_ids = Vec::new(); 3540 let mut devices = self.config.lock().unwrap().devices.clone(); 3541 3542 if let Some(device_list_cfg) = &mut devices { 3543 for device_cfg in device_list_cfg.iter_mut() { 3544 let (device_id, _) = self.add_passthrough_device(device_cfg)?; 3545 if device_cfg.iommu && self.iommu_device.is_some() { 3546 iommu_attached_device_ids.push(device_id); 3547 } 3548 } 3549 } 3550 3551 // Update the list of devices 3552 self.config.lock().unwrap().devices = devices; 3553 3554 Ok(iommu_attached_device_ids) 3555 } 3556 3557 fn add_vfio_user_device( 3558 &mut self, 3559 device_cfg: &mut UserDeviceConfig, 3560 ) -> DeviceManagerResult<(PciBdf, String)> { 3561 let vfio_user_name = if let Some(id) = &device_cfg.id { 3562 id.clone() 3563 } else { 3564 let id = self.next_device_name(VFIO_USER_DEVICE_NAME_PREFIX)?; 3565 device_cfg.id = Some(id.clone()); 3566 id 3567 }; 3568 3569 let (pci_segment_id, pci_device_bdf, resources) = 3570 self.pci_resources(&vfio_user_name, device_cfg.pci_segment)?; 3571 3572 let legacy_interrupt_group = 3573 if let Some(legacy_interrupt_manager) = &self.legacy_interrupt_manager { 3574 Some( 3575 legacy_interrupt_manager 3576 .create_group(LegacyIrqGroupConfig { 3577 irq: self.pci_segments[pci_segment_id as usize].pci_irq_slots 3578 [pci_device_bdf.device() as usize] 3579 as InterruptIndex, 3580 }) 3581 .map_err(DeviceManagerError::CreateInterruptGroup)?, 3582 ) 3583 } else { 3584 None 3585 }; 3586 3587 let client = Arc::new(Mutex::new( 3588 vfio_user::Client::new(&device_cfg.socket) 3589 .map_err(DeviceManagerError::VfioUserCreateClient)?, 3590 )); 3591 3592 let memory_manager = self.memory_manager.clone(); 3593 3594 let mut vfio_user_pci_device = VfioUserPciDevice::new( 3595 vfio_user_name.clone(), 3596 &self.address_manager.vm, 3597 client.clone(), 3598 self.msi_interrupt_manager.clone(), 3599 legacy_interrupt_group, 3600 pci_device_bdf, 3601 Arc::new(move || memory_manager.lock().unwrap().allocate_memory_slot()), 3602 vm_migration::snapshot_from_id(self.snapshot.as_ref(), vfio_user_name.as_str()), 3603 ) 3604 .map_err(DeviceManagerError::VfioUserCreate)?; 3605 3606 let memory = self.memory_manager.lock().unwrap().guest_memory(); 3607 let vfio_user_mapping = Arc::new(VfioUserDmaMapping::new(client, Arc::new(memory))); 3608 for virtio_mem_device in self.virtio_mem_devices.iter() { 3609 virtio_mem_device 3610 .lock() 3611 .unwrap() 3612 .add_dma_mapping_handler( 3613 VirtioMemMappingSource::Device(pci_device_bdf.into()), 3614 vfio_user_mapping.clone(), 3615 ) 3616 .map_err(DeviceManagerError::AddDmaMappingHandlerVirtioMem)?; 3617 } 3618 3619 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() { 3620 for region in zone.regions() { 3621 vfio_user_pci_device 3622 .dma_map(region) 3623 .map_err(DeviceManagerError::VfioUserDmaMap)?; 3624 } 3625 } 3626 3627 let vfio_user_pci_device = Arc::new(Mutex::new(vfio_user_pci_device)); 3628 3629 let new_resources = self.add_pci_device( 3630 vfio_user_pci_device.clone(), 3631 vfio_user_pci_device.clone(), 3632 pci_segment_id, 3633 pci_device_bdf, 3634 resources, 3635 )?; 3636 3637 // Note it is required to call 'add_pci_device()' in advance to have the list of 3638 // mmio regions provisioned correctly 3639 vfio_user_pci_device 3640 .lock() 3641 .unwrap() 3642 .map_mmio_regions() 3643 .map_err(DeviceManagerError::VfioUserMapRegion)?; 3644 3645 let mut node = device_node!(vfio_user_name, vfio_user_pci_device); 3646 3647 // Update the device tree with correct resource information. 3648 node.resources = new_resources; 3649 node.pci_bdf = Some(pci_device_bdf); 3650 node.pci_device_handle = Some(PciDeviceHandle::VfioUser(vfio_user_pci_device)); 3651 3652 self.device_tree 3653 .lock() 3654 .unwrap() 3655 .insert(vfio_user_name.clone(), node); 3656 3657 Ok((pci_device_bdf, vfio_user_name)) 3658 } 3659 3660 fn add_user_devices(&mut self) -> DeviceManagerResult<Vec<PciBdf>> { 3661 let mut user_devices = self.config.lock().unwrap().user_devices.clone(); 3662 3663 if let Some(device_list_cfg) = &mut user_devices { 3664 for device_cfg in device_list_cfg.iter_mut() { 3665 let (_device_id, _id) = self.add_vfio_user_device(device_cfg)?; 3666 } 3667 } 3668 3669 // Update the list of devices 3670 self.config.lock().unwrap().user_devices = user_devices; 3671 3672 Ok(vec![]) 3673 } 3674 3675 fn add_virtio_pci_device( 3676 &mut self, 3677 virtio_device: Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 3678 iommu_mapping: &Option<Arc<IommuMapping>>, 3679 virtio_device_id: String, 3680 pci_segment_id: u16, 3681 dma_handler: Option<Arc<dyn ExternalDmaMapping>>, 3682 ) -> DeviceManagerResult<PciBdf> { 3683 let id = format!("{VIRTIO_PCI_DEVICE_NAME_PREFIX}-{virtio_device_id}"); 3684 3685 // Add the new virtio-pci node to the device tree. 3686 let mut node = device_node!(id); 3687 node.children = vec![virtio_device_id.clone()]; 3688 3689 let (pci_segment_id, pci_device_bdf, resources) = 3690 self.pci_resources(&id, pci_segment_id)?; 3691 3692 // Update the existing virtio node by setting the parent. 3693 if let Some(node) = self.device_tree.lock().unwrap().get_mut(&virtio_device_id) { 3694 node.parent = Some(id.clone()); 3695 } else { 3696 return Err(DeviceManagerError::MissingNode); 3697 } 3698 3699 // Allows support for one MSI-X vector per queue. It also adds 1 3700 // as we need to take into account the dedicated vector to notify 3701 // about a virtio config change. 3702 let msix_num = (virtio_device.lock().unwrap().queue_max_sizes().len() + 1) as u16; 3703 3704 // Create the AccessPlatform trait from the implementation IommuMapping. 3705 // This will provide address translation for any virtio device sitting 3706 // behind a vIOMMU. 3707 let mut access_platform: Option<Arc<dyn AccessPlatform>> = None; 3708 3709 if let Some(mapping) = iommu_mapping { 3710 access_platform = Some(Arc::new(AccessPlatformMapping::new( 3711 pci_device_bdf.into(), 3712 mapping.clone(), 3713 ))); 3714 } 3715 3716 // If SEV-SNP is enabled create the AccessPlatform from SevSnpPageAccessProxy 3717 #[cfg(feature = "sev_snp")] 3718 if self.config.lock().unwrap().is_sev_snp_enabled() { 3719 access_platform = Some(Arc::new(SevSnpPageAccessProxy::new( 3720 self.address_manager.vm.clone(), 3721 ))); 3722 } 3723 3724 let memory = self.memory_manager.lock().unwrap().guest_memory(); 3725 3726 // Map DMA ranges if a DMA handler is available and if the device is 3727 // not attached to a virtual IOMMU. 3728 if let Some(dma_handler) = &dma_handler { 3729 if iommu_mapping.is_some() { 3730 if let Some(iommu) = &self.iommu_device { 3731 iommu 3732 .lock() 3733 .unwrap() 3734 .add_external_mapping(pci_device_bdf.into(), dma_handler.clone()); 3735 } else { 3736 return Err(DeviceManagerError::MissingVirtualIommu); 3737 } 3738 } else { 3739 // Let every virtio-mem device handle the DMA map/unmap through the 3740 // DMA handler provided. 3741 for virtio_mem_device in self.virtio_mem_devices.iter() { 3742 virtio_mem_device 3743 .lock() 3744 .unwrap() 3745 .add_dma_mapping_handler( 3746 VirtioMemMappingSource::Device(pci_device_bdf.into()), 3747 dma_handler.clone(), 3748 ) 3749 .map_err(DeviceManagerError::AddDmaMappingHandlerVirtioMem)?; 3750 } 3751 3752 // Do not register virtio-mem regions, as they are handled directly by 3753 // virtio-mem devices. 3754 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() { 3755 for region in zone.regions() { 3756 let gpa = region.start_addr().0; 3757 let size = region.len(); 3758 dma_handler 3759 .map(gpa, gpa, size) 3760 .map_err(DeviceManagerError::VirtioDmaMap)?; 3761 } 3762 } 3763 } 3764 } 3765 3766 let device_type = virtio_device.lock().unwrap().device_type(); 3767 let virtio_pci_device = Arc::new(Mutex::new( 3768 VirtioPciDevice::new( 3769 id.clone(), 3770 memory, 3771 virtio_device, 3772 msix_num, 3773 access_platform, 3774 &self.msi_interrupt_manager, 3775 pci_device_bdf.into(), 3776 self.activate_evt 3777 .try_clone() 3778 .map_err(DeviceManagerError::EventFd)?, 3779 // All device types *except* virtio block devices should be allocated a 64-bit bar 3780 // The block devices should be given a 32-bit BAR so that they are easily accessible 3781 // to firmware without requiring excessive identity mapping. 3782 // The exception being if not on the default PCI segment. 3783 pci_segment_id > 0 || device_type != VirtioDeviceType::Block as u32, 3784 dma_handler, 3785 self.pending_activations.clone(), 3786 vm_migration::snapshot_from_id(self.snapshot.as_ref(), id.as_str()), 3787 ) 3788 .map_err(DeviceManagerError::VirtioDevice)?, 3789 )); 3790 3791 let new_resources = self.add_pci_device( 3792 virtio_pci_device.clone(), 3793 virtio_pci_device.clone(), 3794 pci_segment_id, 3795 pci_device_bdf, 3796 resources, 3797 )?; 3798 3799 let bar_addr = virtio_pci_device.lock().unwrap().config_bar_addr(); 3800 for (event, addr) in virtio_pci_device.lock().unwrap().ioeventfds(bar_addr) { 3801 let io_addr = IoEventAddress::Mmio(addr); 3802 self.address_manager 3803 .vm 3804 .register_ioevent(event, &io_addr, None) 3805 .map_err(|e| DeviceManagerError::RegisterIoevent(e.into()))?; 3806 } 3807 3808 // Update the device tree with correct resource information. 3809 node.resources = new_resources; 3810 node.migratable = Some(Arc::clone(&virtio_pci_device) as Arc<Mutex<dyn Migratable>>); 3811 node.pci_bdf = Some(pci_device_bdf); 3812 node.pci_device_handle = Some(PciDeviceHandle::Virtio(virtio_pci_device)); 3813 self.device_tree.lock().unwrap().insert(id, node); 3814 3815 Ok(pci_device_bdf) 3816 } 3817 3818 fn add_pvpanic_device( 3819 &mut self, 3820 ) -> DeviceManagerResult<Option<Arc<Mutex<devices::PvPanicDevice>>>> { 3821 let id = String::from(PVPANIC_DEVICE_NAME); 3822 let pci_segment_id = 0x0_u16; 3823 3824 info!("Creating pvpanic device {}", id); 3825 3826 let (pci_segment_id, pci_device_bdf, resources) = 3827 self.pci_resources(&id, pci_segment_id)?; 3828 3829 let snapshot = snapshot_from_id(self.snapshot.as_ref(), id.as_str()); 3830 3831 let pvpanic_device = devices::PvPanicDevice::new(id.clone(), snapshot) 3832 .map_err(DeviceManagerError::PvPanicCreate)?; 3833 3834 let pvpanic_device = Arc::new(Mutex::new(pvpanic_device)); 3835 3836 let new_resources = self.add_pci_device( 3837 pvpanic_device.clone(), 3838 pvpanic_device.clone(), 3839 pci_segment_id, 3840 pci_device_bdf, 3841 resources, 3842 )?; 3843 3844 let mut node = device_node!(id, pvpanic_device); 3845 3846 node.resources = new_resources; 3847 node.pci_bdf = Some(pci_device_bdf); 3848 node.pci_device_handle = None; 3849 3850 self.device_tree.lock().unwrap().insert(id, node); 3851 3852 Ok(Some(pvpanic_device)) 3853 } 3854 3855 fn pci_resources( 3856 &self, 3857 id: &str, 3858 pci_segment_id: u16, 3859 ) -> DeviceManagerResult<(u16, PciBdf, Option<Vec<Resource>>)> { 3860 // Look for the id in the device tree. If it can be found, that means 3861 // the device is being restored, otherwise it's created from scratch. 3862 Ok( 3863 if let Some(node) = self.device_tree.lock().unwrap().get(id) { 3864 info!("Restoring virtio-pci {} resources", id); 3865 let pci_device_bdf: PciBdf = node 3866 .pci_bdf 3867 .ok_or(DeviceManagerError::MissingDeviceNodePciBdf)?; 3868 let pci_segment_id = pci_device_bdf.segment(); 3869 3870 self.pci_segments[pci_segment_id as usize] 3871 .pci_bus 3872 .lock() 3873 .unwrap() 3874 .get_device_id(pci_device_bdf.device() as usize) 3875 .map_err(DeviceManagerError::GetPciDeviceId)?; 3876 3877 (pci_segment_id, pci_device_bdf, Some(node.resources.clone())) 3878 } else { 3879 let pci_device_bdf = 3880 self.pci_segments[pci_segment_id as usize].next_device_bdf()?; 3881 3882 (pci_segment_id, pci_device_bdf, None) 3883 }, 3884 ) 3885 } 3886 3887 #[cfg(target_arch = "x86_64")] 3888 pub fn io_bus(&self) -> &Arc<Bus> { 3889 &self.address_manager.io_bus 3890 } 3891 3892 pub fn mmio_bus(&self) -> &Arc<Bus> { 3893 &self.address_manager.mmio_bus 3894 } 3895 3896 pub fn allocator(&self) -> &Arc<Mutex<SystemAllocator>> { 3897 &self.address_manager.allocator 3898 } 3899 3900 pub fn interrupt_controller(&self) -> Option<Arc<Mutex<dyn InterruptController>>> { 3901 self.interrupt_controller 3902 .as_ref() 3903 .map(|ic| ic.clone() as Arc<Mutex<dyn InterruptController>>) 3904 } 3905 3906 pub(crate) fn pci_segments(&self) -> &Vec<PciSegment> { 3907 &self.pci_segments 3908 } 3909 3910 #[cfg(target_arch = "aarch64")] 3911 pub fn cmdline_additions(&self) -> &[String] { 3912 self.cmdline_additions.as_slice() 3913 } 3914 3915 pub fn update_memory(&self, new_region: &Arc<GuestRegionMmap>) -> DeviceManagerResult<()> { 3916 for handle in self.virtio_devices.iter() { 3917 handle 3918 .virtio_device 3919 .lock() 3920 .unwrap() 3921 .add_memory_region(new_region) 3922 .map_err(DeviceManagerError::UpdateMemoryForVirtioDevice)?; 3923 3924 if let Some(dma_handler) = &handle.dma_handler { 3925 if !handle.iommu { 3926 let gpa = new_region.start_addr().0; 3927 let size = new_region.len(); 3928 dma_handler 3929 .map(gpa, gpa, size) 3930 .map_err(DeviceManagerError::VirtioDmaMap)?; 3931 } 3932 } 3933 } 3934 3935 // Take care of updating the memory for VFIO PCI devices. 3936 if let Some(vfio_container) = &self.vfio_container { 3937 vfio_container 3938 .vfio_dma_map( 3939 new_region.start_addr().raw_value(), 3940 new_region.len(), 3941 new_region.as_ptr() as u64, 3942 ) 3943 .map_err(DeviceManagerError::UpdateMemoryForVfioPciDevice)?; 3944 } 3945 3946 // Take care of updating the memory for vfio-user devices. 3947 { 3948 let device_tree = self.device_tree.lock().unwrap(); 3949 for pci_device_node in device_tree.pci_devices() { 3950 if let PciDeviceHandle::VfioUser(vfio_user_pci_device) = pci_device_node 3951 .pci_device_handle 3952 .as_ref() 3953 .ok_or(DeviceManagerError::MissingPciDevice)? 3954 { 3955 vfio_user_pci_device 3956 .lock() 3957 .unwrap() 3958 .dma_map(new_region) 3959 .map_err(DeviceManagerError::UpdateMemoryForVfioUserPciDevice)?; 3960 } 3961 } 3962 } 3963 3964 Ok(()) 3965 } 3966 3967 pub fn activate_virtio_devices(&self) -> DeviceManagerResult<()> { 3968 for mut activator in self.pending_activations.lock().unwrap().drain(..) { 3969 activator 3970 .activate() 3971 .map_err(DeviceManagerError::VirtioActivate)?; 3972 } 3973 Ok(()) 3974 } 3975 3976 pub fn notify_hotplug( 3977 &self, 3978 _notification_type: AcpiNotificationFlags, 3979 ) -> DeviceManagerResult<()> { 3980 return self 3981 .ged_notification_device 3982 .as_ref() 3983 .unwrap() 3984 .lock() 3985 .unwrap() 3986 .notify(_notification_type) 3987 .map_err(DeviceManagerError::HotPlugNotification); 3988 } 3989 3990 pub fn add_device( 3991 &mut self, 3992 device_cfg: &mut DeviceConfig, 3993 ) -> DeviceManagerResult<PciDeviceInfo> { 3994 self.validate_identifier(&device_cfg.id)?; 3995 3996 if device_cfg.iommu && !self.is_iommu_segment(device_cfg.pci_segment) { 3997 return Err(DeviceManagerError::InvalidIommuHotplug); 3998 } 3999 4000 let (bdf, device_name) = self.add_passthrough_device(device_cfg)?; 4001 4002 // Update the PCIU bitmap 4003 self.pci_segments[device_cfg.pci_segment as usize].pci_devices_up |= 1 << bdf.device(); 4004 4005 Ok(PciDeviceInfo { 4006 id: device_name, 4007 bdf, 4008 }) 4009 } 4010 4011 pub fn add_user_device( 4012 &mut self, 4013 device_cfg: &mut UserDeviceConfig, 4014 ) -> DeviceManagerResult<PciDeviceInfo> { 4015 self.validate_identifier(&device_cfg.id)?; 4016 4017 let (bdf, device_name) = self.add_vfio_user_device(device_cfg)?; 4018 4019 // Update the PCIU bitmap 4020 self.pci_segments[device_cfg.pci_segment as usize].pci_devices_up |= 1 << bdf.device(); 4021 4022 Ok(PciDeviceInfo { 4023 id: device_name, 4024 bdf, 4025 }) 4026 } 4027 4028 pub fn remove_device(&mut self, id: String) -> DeviceManagerResult<()> { 4029 // The node can be directly a PCI node in case the 'id' refers to a 4030 // VFIO device or a virtio-pci one. 4031 // In case the 'id' refers to a virtio device, we must find the PCI 4032 // node by looking at the parent. 4033 let device_tree = self.device_tree.lock().unwrap(); 4034 let node = device_tree 4035 .get(&id) 4036 .ok_or(DeviceManagerError::UnknownDeviceId(id))?; 4037 4038 let pci_device_node = if node.pci_bdf.is_some() && node.pci_device_handle.is_some() { 4039 node 4040 } else { 4041 let parent = node 4042 .parent 4043 .as_ref() 4044 .ok_or(DeviceManagerError::MissingNode)?; 4045 device_tree 4046 .get(parent) 4047 .ok_or(DeviceManagerError::MissingNode)? 4048 }; 4049 4050 let pci_device_bdf: PciBdf = pci_device_node 4051 .pci_bdf 4052 .ok_or(DeviceManagerError::MissingDeviceNodePciBdf)?; 4053 let pci_segment_id = pci_device_bdf.segment(); 4054 4055 let pci_device_handle = pci_device_node 4056 .pci_device_handle 4057 .as_ref() 4058 .ok_or(DeviceManagerError::MissingPciDevice)?; 4059 #[allow(irrefutable_let_patterns)] 4060 if let PciDeviceHandle::Virtio(virtio_pci_device) = pci_device_handle { 4061 let device_type = VirtioDeviceType::from( 4062 virtio_pci_device 4063 .lock() 4064 .unwrap() 4065 .virtio_device() 4066 .lock() 4067 .unwrap() 4068 .device_type(), 4069 ); 4070 match device_type { 4071 VirtioDeviceType::Net 4072 | VirtioDeviceType::Block 4073 | VirtioDeviceType::Pmem 4074 | VirtioDeviceType::Fs 4075 | VirtioDeviceType::Vsock => {} 4076 _ => return Err(DeviceManagerError::RemovalNotAllowed(device_type)), 4077 } 4078 } 4079 4080 // Update the PCID bitmap 4081 self.pci_segments[pci_segment_id as usize].pci_devices_down |= 1 << pci_device_bdf.device(); 4082 4083 Ok(()) 4084 } 4085 4086 pub fn eject_device(&mut self, pci_segment_id: u16, device_id: u8) -> DeviceManagerResult<()> { 4087 info!( 4088 "Ejecting device_id = {} on segment_id={}", 4089 device_id, pci_segment_id 4090 ); 4091 4092 // Convert the device ID into the corresponding b/d/f. 4093 let pci_device_bdf = PciBdf::new(pci_segment_id, 0, device_id, 0); 4094 4095 // Give the PCI device ID back to the PCI bus. 4096 self.pci_segments[pci_segment_id as usize] 4097 .pci_bus 4098 .lock() 4099 .unwrap() 4100 .put_device_id(device_id as usize) 4101 .map_err(DeviceManagerError::PutPciDeviceId)?; 4102 4103 // Remove the device from the device tree along with its children. 4104 let mut device_tree = self.device_tree.lock().unwrap(); 4105 let pci_device_node = device_tree 4106 .remove_node_by_pci_bdf(pci_device_bdf) 4107 .ok_or(DeviceManagerError::MissingPciDevice)?; 4108 4109 // For VFIO and vfio-user the PCI device id is the id. 4110 // For virtio we overwrite it later as we want the id of the 4111 // underlying device. 4112 let mut id = pci_device_node.id; 4113 let pci_device_handle = pci_device_node 4114 .pci_device_handle 4115 .ok_or(DeviceManagerError::MissingPciDevice)?; 4116 if matches!(pci_device_handle, PciDeviceHandle::Virtio(_)) { 4117 // The virtio-pci device has a single child 4118 if !pci_device_node.children.is_empty() { 4119 assert_eq!(pci_device_node.children.len(), 1); 4120 let child_id = &pci_device_node.children[0]; 4121 id.clone_from(child_id); 4122 } 4123 } 4124 for child in pci_device_node.children.iter() { 4125 device_tree.remove(child); 4126 } 4127 4128 let mut iommu_attached = false; 4129 if let Some((_, iommu_attached_devices)) = &self.iommu_attached_devices { 4130 if iommu_attached_devices.contains(&pci_device_bdf) { 4131 iommu_attached = true; 4132 } 4133 } 4134 4135 let (pci_device, bus_device, virtio_device, remove_dma_handler) = match pci_device_handle { 4136 // No need to remove any virtio-mem mapping here as the container outlives all devices 4137 PciDeviceHandle::Vfio(vfio_pci_device) => { 4138 for mmio_region in vfio_pci_device.lock().unwrap().mmio_regions() { 4139 self.mmio_regions 4140 .lock() 4141 .unwrap() 4142 .retain(|x| x.start != mmio_region.start) 4143 } 4144 4145 ( 4146 Arc::clone(&vfio_pci_device) as Arc<Mutex<dyn PciDevice>>, 4147 Arc::clone(&vfio_pci_device) as Arc<dyn BusDeviceSync>, 4148 None as Option<Arc<Mutex<dyn virtio_devices::VirtioDevice>>>, 4149 false, 4150 ) 4151 } 4152 PciDeviceHandle::Virtio(virtio_pci_device) => { 4153 let dev = virtio_pci_device.lock().unwrap(); 4154 let bar_addr = dev.config_bar_addr(); 4155 for (event, addr) in dev.ioeventfds(bar_addr) { 4156 let io_addr = IoEventAddress::Mmio(addr); 4157 self.address_manager 4158 .vm 4159 .unregister_ioevent(event, &io_addr) 4160 .map_err(|e| DeviceManagerError::UnRegisterIoevent(e.into()))?; 4161 } 4162 4163 if let Some(dma_handler) = dev.dma_handler() { 4164 if !iommu_attached { 4165 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() { 4166 for region in zone.regions() { 4167 let iova = region.start_addr().0; 4168 let size = region.len(); 4169 dma_handler 4170 .unmap(iova, size) 4171 .map_err(DeviceManagerError::VirtioDmaUnmap)?; 4172 } 4173 } 4174 } 4175 } 4176 4177 ( 4178 Arc::clone(&virtio_pci_device) as Arc<Mutex<dyn PciDevice>>, 4179 Arc::clone(&virtio_pci_device) as Arc<dyn BusDeviceSync>, 4180 Some(dev.virtio_device()), 4181 dev.dma_handler().is_some() && !iommu_attached, 4182 ) 4183 } 4184 PciDeviceHandle::VfioUser(vfio_user_pci_device) => { 4185 let mut dev = vfio_user_pci_device.lock().unwrap(); 4186 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() { 4187 for region in zone.regions() { 4188 dev.dma_unmap(region) 4189 .map_err(DeviceManagerError::VfioUserDmaUnmap)?; 4190 } 4191 } 4192 4193 ( 4194 Arc::clone(&vfio_user_pci_device) as Arc<Mutex<dyn PciDevice>>, 4195 Arc::clone(&vfio_user_pci_device) as Arc<dyn BusDeviceSync>, 4196 None as Option<Arc<Mutex<dyn virtio_devices::VirtioDevice>>>, 4197 true, 4198 ) 4199 } 4200 }; 4201 4202 if remove_dma_handler { 4203 for virtio_mem_device in self.virtio_mem_devices.iter() { 4204 virtio_mem_device 4205 .lock() 4206 .unwrap() 4207 .remove_dma_mapping_handler(VirtioMemMappingSource::Device( 4208 pci_device_bdf.into(), 4209 )) 4210 .map_err(DeviceManagerError::RemoveDmaMappingHandlerVirtioMem)?; 4211 } 4212 } 4213 4214 // Free the allocated BARs 4215 pci_device 4216 .lock() 4217 .unwrap() 4218 .free_bars( 4219 &mut self.address_manager.allocator.lock().unwrap(), 4220 &mut self.pci_segments[pci_segment_id as usize] 4221 .mem32_allocator 4222 .lock() 4223 .unwrap(), 4224 &mut self.pci_segments[pci_segment_id as usize] 4225 .mem64_allocator 4226 .lock() 4227 .unwrap(), 4228 ) 4229 .map_err(DeviceManagerError::FreePciBars)?; 4230 4231 // Remove the device from the PCI bus 4232 self.pci_segments[pci_segment_id as usize] 4233 .pci_bus 4234 .lock() 4235 .unwrap() 4236 .remove_by_device(&pci_device) 4237 .map_err(DeviceManagerError::RemoveDeviceFromPciBus)?; 4238 4239 #[cfg(target_arch = "x86_64")] 4240 // Remove the device from the IO bus 4241 self.io_bus() 4242 .remove_by_device(&bus_device) 4243 .map_err(DeviceManagerError::RemoveDeviceFromIoBus)?; 4244 4245 // Remove the device from the MMIO bus 4246 self.mmio_bus() 4247 .remove_by_device(&bus_device) 4248 .map_err(DeviceManagerError::RemoveDeviceFromMmioBus)?; 4249 4250 // Remove the device from the list of BusDevice held by the 4251 // DeviceManager. 4252 self.bus_devices 4253 .retain(|dev| !Arc::ptr_eq(dev, &bus_device)); 4254 4255 // Shutdown and remove the underlying virtio-device if present 4256 if let Some(virtio_device) = virtio_device { 4257 for mapping in virtio_device.lock().unwrap().userspace_mappings() { 4258 self.memory_manager 4259 .lock() 4260 .unwrap() 4261 .remove_userspace_mapping( 4262 mapping.addr.raw_value(), 4263 mapping.len, 4264 mapping.host_addr, 4265 mapping.mergeable, 4266 mapping.mem_slot, 4267 ) 4268 .map_err(DeviceManagerError::MemoryManager)?; 4269 } 4270 4271 virtio_device.lock().unwrap().shutdown(); 4272 4273 self.virtio_devices 4274 .retain(|handler| !Arc::ptr_eq(&handler.virtio_device, &virtio_device)); 4275 } 4276 4277 event!( 4278 "vm", 4279 "device-removed", 4280 "id", 4281 &id, 4282 "bdf", 4283 pci_device_bdf.to_string() 4284 ); 4285 4286 // At this point, the device has been removed from all the list and 4287 // buses where it was stored. At the end of this function, after 4288 // any_device, bus_device and pci_device are released, the actual 4289 // device will be dropped. 4290 Ok(()) 4291 } 4292 4293 fn hotplug_virtio_pci_device( 4294 &mut self, 4295 handle: MetaVirtioDevice, 4296 ) -> DeviceManagerResult<PciDeviceInfo> { 4297 // Add the virtio device to the device manager list. This is important 4298 // as the list is used to notify virtio devices about memory updates 4299 // for instance. 4300 self.virtio_devices.push(handle.clone()); 4301 4302 let mapping: Option<Arc<IommuMapping>> = if handle.iommu { 4303 self.iommu_mapping.clone() 4304 } else { 4305 None 4306 }; 4307 4308 let bdf = self.add_virtio_pci_device( 4309 handle.virtio_device, 4310 &mapping, 4311 handle.id.clone(), 4312 handle.pci_segment, 4313 handle.dma_handler, 4314 )?; 4315 4316 // Update the PCIU bitmap 4317 self.pci_segments[handle.pci_segment as usize].pci_devices_up |= 1 << bdf.device(); 4318 4319 Ok(PciDeviceInfo { id: handle.id, bdf }) 4320 } 4321 4322 fn is_iommu_segment(&self, pci_segment_id: u16) -> bool { 4323 self.config 4324 .lock() 4325 .as_ref() 4326 .unwrap() 4327 .platform 4328 .as_ref() 4329 .map(|pc| { 4330 pc.iommu_segments 4331 .as_ref() 4332 .map(|v| v.contains(&pci_segment_id)) 4333 .unwrap_or_default() 4334 }) 4335 .unwrap_or_default() 4336 } 4337 4338 pub fn add_disk(&mut self, disk_cfg: &mut DiskConfig) -> DeviceManagerResult<PciDeviceInfo> { 4339 self.validate_identifier(&disk_cfg.id)?; 4340 4341 if disk_cfg.iommu && !self.is_iommu_segment(disk_cfg.pci_segment) { 4342 return Err(DeviceManagerError::InvalidIommuHotplug); 4343 } 4344 4345 let device = self.make_virtio_block_device(disk_cfg)?; 4346 self.hotplug_virtio_pci_device(device) 4347 } 4348 4349 pub fn add_fs(&mut self, fs_cfg: &mut FsConfig) -> DeviceManagerResult<PciDeviceInfo> { 4350 self.validate_identifier(&fs_cfg.id)?; 4351 4352 let device = self.make_virtio_fs_device(fs_cfg)?; 4353 self.hotplug_virtio_pci_device(device) 4354 } 4355 4356 pub fn add_pmem(&mut self, pmem_cfg: &mut PmemConfig) -> DeviceManagerResult<PciDeviceInfo> { 4357 self.validate_identifier(&pmem_cfg.id)?; 4358 4359 if pmem_cfg.iommu && !self.is_iommu_segment(pmem_cfg.pci_segment) { 4360 return Err(DeviceManagerError::InvalidIommuHotplug); 4361 } 4362 4363 let device = self.make_virtio_pmem_device(pmem_cfg)?; 4364 self.hotplug_virtio_pci_device(device) 4365 } 4366 4367 pub fn add_net(&mut self, net_cfg: &mut NetConfig) -> DeviceManagerResult<PciDeviceInfo> { 4368 self.validate_identifier(&net_cfg.id)?; 4369 4370 if net_cfg.iommu && !self.is_iommu_segment(net_cfg.pci_segment) { 4371 return Err(DeviceManagerError::InvalidIommuHotplug); 4372 } 4373 4374 let device = self.make_virtio_net_device(net_cfg)?; 4375 self.hotplug_virtio_pci_device(device) 4376 } 4377 4378 pub fn add_vdpa(&mut self, vdpa_cfg: &mut VdpaConfig) -> DeviceManagerResult<PciDeviceInfo> { 4379 self.validate_identifier(&vdpa_cfg.id)?; 4380 4381 if vdpa_cfg.iommu && !self.is_iommu_segment(vdpa_cfg.pci_segment) { 4382 return Err(DeviceManagerError::InvalidIommuHotplug); 4383 } 4384 4385 let device = self.make_vdpa_device(vdpa_cfg)?; 4386 self.hotplug_virtio_pci_device(device) 4387 } 4388 4389 pub fn add_vsock(&mut self, vsock_cfg: &mut VsockConfig) -> DeviceManagerResult<PciDeviceInfo> { 4390 self.validate_identifier(&vsock_cfg.id)?; 4391 4392 if vsock_cfg.iommu && !self.is_iommu_segment(vsock_cfg.pci_segment) { 4393 return Err(DeviceManagerError::InvalidIommuHotplug); 4394 } 4395 4396 let device = self.make_virtio_vsock_device(vsock_cfg)?; 4397 self.hotplug_virtio_pci_device(device) 4398 } 4399 4400 pub fn counters(&self) -> HashMap<String, HashMap<&'static str, Wrapping<u64>>> { 4401 let mut counters = HashMap::new(); 4402 4403 for handle in &self.virtio_devices { 4404 let virtio_device = handle.virtio_device.lock().unwrap(); 4405 if let Some(device_counters) = virtio_device.counters() { 4406 counters.insert(handle.id.clone(), device_counters.clone()); 4407 } 4408 } 4409 4410 counters 4411 } 4412 4413 pub fn resize_balloon(&mut self, size: u64) -> DeviceManagerResult<()> { 4414 if let Some(balloon) = &self.balloon { 4415 return balloon 4416 .lock() 4417 .unwrap() 4418 .resize(size) 4419 .map_err(DeviceManagerError::VirtioBalloonResize); 4420 } 4421 4422 warn!("No balloon setup: Can't resize the balloon"); 4423 Err(DeviceManagerError::MissingVirtioBalloon) 4424 } 4425 4426 pub fn balloon_size(&self) -> u64 { 4427 if let Some(balloon) = &self.balloon { 4428 return balloon.lock().unwrap().get_actual(); 4429 } 4430 4431 0 4432 } 4433 4434 pub fn device_tree(&self) -> Arc<Mutex<DeviceTree>> { 4435 self.device_tree.clone() 4436 } 4437 4438 #[cfg(target_arch = "x86_64")] 4439 pub fn notify_power_button(&self) -> DeviceManagerResult<()> { 4440 self.ged_notification_device 4441 .as_ref() 4442 .unwrap() 4443 .lock() 4444 .unwrap() 4445 .notify(AcpiNotificationFlags::POWER_BUTTON_CHANGED) 4446 .map_err(DeviceManagerError::PowerButtonNotification) 4447 } 4448 4449 #[cfg(target_arch = "aarch64")] 4450 pub fn notify_power_button(&self) -> DeviceManagerResult<()> { 4451 // There are two use cases: 4452 // 1. Users will use direct kernel boot with device tree. 4453 // 2. Users will use ACPI+UEFI boot. 4454 4455 // Trigger a GPIO pin 3 event to satisfy use case 1. 4456 self.gpio_device 4457 .as_ref() 4458 .unwrap() 4459 .lock() 4460 .unwrap() 4461 .trigger_key(3) 4462 .map_err(DeviceManagerError::AArch64PowerButtonNotification)?; 4463 // Trigger a GED power button event to satisfy use case 2. 4464 return self 4465 .ged_notification_device 4466 .as_ref() 4467 .unwrap() 4468 .lock() 4469 .unwrap() 4470 .notify(AcpiNotificationFlags::POWER_BUTTON_CHANGED) 4471 .map_err(DeviceManagerError::PowerButtonNotification); 4472 } 4473 4474 pub fn iommu_attached_devices(&self) -> &Option<(PciBdf, Vec<PciBdf>)> { 4475 &self.iommu_attached_devices 4476 } 4477 4478 fn validate_identifier(&self, id: &Option<String>) -> DeviceManagerResult<()> { 4479 if let Some(id) = id { 4480 if id.starts_with("__") { 4481 return Err(DeviceManagerError::InvalidIdentifier(id.clone())); 4482 } 4483 4484 if self.device_tree.lock().unwrap().contains_key(id) { 4485 return Err(DeviceManagerError::IdentifierNotUnique(id.clone())); 4486 } 4487 } 4488 4489 Ok(()) 4490 } 4491 4492 pub(crate) fn acpi_platform_addresses(&self) -> &AcpiPlatformAddresses { 4493 &self.acpi_platform_addresses 4494 } 4495 } 4496 4497 fn numa_node_id_from_memory_zone_id(numa_nodes: &NumaNodes, memory_zone_id: &str) -> Option<u32> { 4498 for (numa_node_id, numa_node) in numa_nodes.iter() { 4499 if numa_node.memory_zones.contains(&memory_zone_id.to_owned()) { 4500 return Some(*numa_node_id); 4501 } 4502 } 4503 4504 None 4505 } 4506 4507 fn numa_node_id_from_pci_segment_id(numa_nodes: &NumaNodes, pci_segment_id: u16) -> u32 { 4508 for (numa_node_id, numa_node) in numa_nodes.iter() { 4509 if numa_node.pci_segments.contains(&pci_segment_id) { 4510 return *numa_node_id; 4511 } 4512 } 4513 4514 0 4515 } 4516 4517 struct TpmDevice {} 4518 4519 impl Aml for TpmDevice { 4520 fn to_aml_bytes(&self, sink: &mut dyn acpi_tables::AmlSink) { 4521 aml::Device::new( 4522 "TPM2".into(), 4523 vec![ 4524 &aml::Name::new("_HID".into(), &"MSFT0101"), 4525 &aml::Name::new("_STA".into(), &(0xF_usize)), 4526 &aml::Name::new( 4527 "_CRS".into(), 4528 &aml::ResourceTemplate::new(vec![&aml::Memory32Fixed::new( 4529 true, 4530 layout::TPM_START.0 as u32, 4531 layout::TPM_SIZE as u32, 4532 )]), 4533 ), 4534 ], 4535 ) 4536 .to_aml_bytes(sink) 4537 } 4538 } 4539 4540 impl Aml for DeviceManager { 4541 fn to_aml_bytes(&self, sink: &mut dyn acpi_tables::AmlSink) { 4542 #[cfg(target_arch = "aarch64")] 4543 use arch::aarch64::DeviceInfoForFdt; 4544 4545 let mut pci_scan_methods = Vec::new(); 4546 for i in 0..self.pci_segments.len() { 4547 pci_scan_methods.push(aml::MethodCall::new( 4548 format!("\\_SB_.PC{i:02X}.PCNT").as_str().into(), 4549 vec![], 4550 )); 4551 } 4552 let mut pci_scan_inner: Vec<&dyn Aml> = Vec::new(); 4553 for method in &pci_scan_methods { 4554 pci_scan_inner.push(method) 4555 } 4556 4557 // PCI hotplug controller 4558 aml::Device::new( 4559 "_SB_.PHPR".into(), 4560 vec![ 4561 &aml::Name::new("_HID".into(), &aml::EISAName::new("PNP0A06")), 4562 &aml::Name::new("_STA".into(), &0x0bu8), 4563 &aml::Name::new("_UID".into(), &"PCI Hotplug Controller"), 4564 &aml::Mutex::new("BLCK".into(), 0), 4565 &aml::Name::new( 4566 "_CRS".into(), 4567 &aml::ResourceTemplate::new(vec![&aml::AddressSpace::new_memory( 4568 aml::AddressSpaceCacheable::NotCacheable, 4569 true, 4570 self.acpi_address.0, 4571 self.acpi_address.0 + DEVICE_MANAGER_ACPI_SIZE as u64 - 1, 4572 None, 4573 )]), 4574 ), 4575 // OpRegion and Fields map MMIO range into individual field values 4576 &aml::OpRegion::new( 4577 "PCST".into(), 4578 aml::OpRegionSpace::SystemMemory, 4579 &(self.acpi_address.0 as usize), 4580 &DEVICE_MANAGER_ACPI_SIZE, 4581 ), 4582 &aml::Field::new( 4583 "PCST".into(), 4584 aml::FieldAccessType::DWord, 4585 aml::FieldLockRule::NoLock, 4586 aml::FieldUpdateRule::WriteAsZeroes, 4587 vec![ 4588 aml::FieldEntry::Named(*b"PCIU", 32), 4589 aml::FieldEntry::Named(*b"PCID", 32), 4590 aml::FieldEntry::Named(*b"B0EJ", 32), 4591 aml::FieldEntry::Named(*b"PSEG", 32), 4592 ], 4593 ), 4594 &aml::Method::new( 4595 "PCEJ".into(), 4596 2, 4597 true, 4598 vec![ 4599 // Take lock defined above 4600 &aml::Acquire::new("BLCK".into(), 0xffff), 4601 // Choose the current segment 4602 &aml::Store::new(&aml::Path::new("PSEG"), &aml::Arg(1)), 4603 // Write PCI bus number (in first argument) to I/O port via field 4604 &aml::ShiftLeft::new(&aml::Path::new("B0EJ"), &aml::ONE, &aml::Arg(0)), 4605 // Release lock 4606 &aml::Release::new("BLCK".into()), 4607 // Return 0 4608 &aml::Return::new(&aml::ZERO), 4609 ], 4610 ), 4611 &aml::Method::new("PSCN".into(), 0, true, pci_scan_inner), 4612 ], 4613 ) 4614 .to_aml_bytes(sink); 4615 4616 for segment in &self.pci_segments { 4617 segment.to_aml_bytes(sink); 4618 } 4619 4620 let mut mbrd_memory = Vec::new(); 4621 4622 for segment in &self.pci_segments { 4623 mbrd_memory.push(aml::Memory32Fixed::new( 4624 true, 4625 segment.mmio_config_address as u32, 4626 layout::PCI_MMIO_CONFIG_SIZE_PER_SEGMENT as u32, 4627 )) 4628 } 4629 4630 let mut mbrd_memory_refs = Vec::new(); 4631 for mbrd_memory_ref in &mbrd_memory { 4632 mbrd_memory_refs.push(mbrd_memory_ref as &dyn Aml); 4633 } 4634 4635 aml::Device::new( 4636 "_SB_.MBRD".into(), 4637 vec![ 4638 &aml::Name::new("_HID".into(), &aml::EISAName::new("PNP0C02")), 4639 &aml::Name::new("_UID".into(), &aml::ZERO), 4640 &aml::Name::new("_CRS".into(), &aml::ResourceTemplate::new(mbrd_memory_refs)), 4641 ], 4642 ) 4643 .to_aml_bytes(sink); 4644 4645 // Serial device 4646 #[cfg(target_arch = "x86_64")] 4647 let serial_irq = 4; 4648 #[cfg(target_arch = "aarch64")] 4649 let serial_irq = 4650 if self.config.lock().unwrap().serial.clone().mode != ConsoleOutputMode::Off { 4651 self.get_device_info() 4652 .clone() 4653 .get(&(DeviceType::Serial, DeviceType::Serial.to_string())) 4654 .unwrap() 4655 .irq() 4656 } else { 4657 // If serial is turned off, add a fake device with invalid irq. 4658 31 4659 }; 4660 if self.config.lock().unwrap().serial.mode != ConsoleOutputMode::Off { 4661 aml::Device::new( 4662 "_SB_.COM1".into(), 4663 vec![ 4664 &aml::Name::new( 4665 "_HID".into(), 4666 #[cfg(target_arch = "x86_64")] 4667 &aml::EISAName::new("PNP0501"), 4668 #[cfg(target_arch = "aarch64")] 4669 &"ARMH0011", 4670 ), 4671 &aml::Name::new("_UID".into(), &aml::ZERO), 4672 &aml::Name::new("_DDN".into(), &"COM1"), 4673 &aml::Name::new( 4674 "_CRS".into(), 4675 &aml::ResourceTemplate::new(vec![ 4676 &aml::Interrupt::new(true, true, false, false, serial_irq), 4677 #[cfg(target_arch = "x86_64")] 4678 &aml::IO::new(0x3f8, 0x3f8, 0, 0x8), 4679 #[cfg(target_arch = "aarch64")] 4680 &aml::Memory32Fixed::new( 4681 true, 4682 arch::layout::LEGACY_SERIAL_MAPPED_IO_START.raw_value() as u32, 4683 MMIO_LEN as u32, 4684 ), 4685 ]), 4686 ), 4687 ], 4688 ) 4689 .to_aml_bytes(sink); 4690 } 4691 4692 aml::Name::new("_S5_".into(), &aml::Package::new(vec![&5u8])).to_aml_bytes(sink); 4693 4694 aml::Device::new( 4695 "_SB_.PWRB".into(), 4696 vec![ 4697 &aml::Name::new("_HID".into(), &aml::EISAName::new("PNP0C0C")), 4698 &aml::Name::new("_UID".into(), &aml::ZERO), 4699 ], 4700 ) 4701 .to_aml_bytes(sink); 4702 4703 if self.config.lock().unwrap().tpm.is_some() { 4704 // Add tpm device 4705 TpmDevice {}.to_aml_bytes(sink); 4706 } 4707 4708 self.ged_notification_device 4709 .as_ref() 4710 .unwrap() 4711 .lock() 4712 .unwrap() 4713 .to_aml_bytes(sink) 4714 } 4715 } 4716 4717 impl Pausable for DeviceManager { 4718 fn pause(&mut self) -> result::Result<(), MigratableError> { 4719 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4720 if let Some(migratable) = &device_node.migratable { 4721 migratable.lock().unwrap().pause()?; 4722 } 4723 } 4724 // On AArch64, the pause of device manager needs to trigger 4725 // a "pause" of GIC, which will flush the GIC pending tables 4726 // and ITS tables to guest RAM. 4727 #[cfg(target_arch = "aarch64")] 4728 { 4729 self.get_interrupt_controller() 4730 .unwrap() 4731 .lock() 4732 .unwrap() 4733 .pause()?; 4734 }; 4735 4736 Ok(()) 4737 } 4738 4739 fn resume(&mut self) -> result::Result<(), MigratableError> { 4740 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4741 if let Some(migratable) = &device_node.migratable { 4742 migratable.lock().unwrap().resume()?; 4743 } 4744 } 4745 4746 Ok(()) 4747 } 4748 } 4749 4750 impl Snapshottable for DeviceManager { 4751 fn id(&self) -> String { 4752 DEVICE_MANAGER_SNAPSHOT_ID.to_string() 4753 } 4754 4755 fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> { 4756 let mut snapshot = Snapshot::from_data(SnapshotData::new_from_state(&self.state())?); 4757 4758 // We aggregate all devices snapshots. 4759 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4760 if let Some(migratable) = &device_node.migratable { 4761 let mut migratable = migratable.lock().unwrap(); 4762 snapshot.add_snapshot(migratable.id(), migratable.snapshot()?); 4763 } 4764 } 4765 4766 Ok(snapshot) 4767 } 4768 } 4769 4770 impl Transportable for DeviceManager {} 4771 4772 impl Migratable for DeviceManager { 4773 fn start_dirty_log(&mut self) -> std::result::Result<(), MigratableError> { 4774 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4775 if let Some(migratable) = &device_node.migratable { 4776 migratable.lock().unwrap().start_dirty_log()?; 4777 } 4778 } 4779 Ok(()) 4780 } 4781 4782 fn stop_dirty_log(&mut self) -> std::result::Result<(), MigratableError> { 4783 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4784 if let Some(migratable) = &device_node.migratable { 4785 migratable.lock().unwrap().stop_dirty_log()?; 4786 } 4787 } 4788 Ok(()) 4789 } 4790 4791 fn dirty_log(&mut self) -> std::result::Result<MemoryRangeTable, MigratableError> { 4792 let mut tables = Vec::new(); 4793 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4794 if let Some(migratable) = &device_node.migratable { 4795 tables.push(migratable.lock().unwrap().dirty_log()?); 4796 } 4797 } 4798 Ok(MemoryRangeTable::new_from_tables(tables)) 4799 } 4800 4801 fn start_migration(&mut self) -> std::result::Result<(), MigratableError> { 4802 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4803 if let Some(migratable) = &device_node.migratable { 4804 migratable.lock().unwrap().start_migration()?; 4805 } 4806 } 4807 Ok(()) 4808 } 4809 4810 fn complete_migration(&mut self) -> std::result::Result<(), MigratableError> { 4811 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4812 if let Some(migratable) = &device_node.migratable { 4813 migratable.lock().unwrap().complete_migration()?; 4814 } 4815 } 4816 Ok(()) 4817 } 4818 } 4819 4820 const PCIU_FIELD_OFFSET: u64 = 0; 4821 const PCID_FIELD_OFFSET: u64 = 4; 4822 const B0EJ_FIELD_OFFSET: u64 = 8; 4823 const PSEG_FIELD_OFFSET: u64 = 12; 4824 const PCIU_FIELD_SIZE: usize = 4; 4825 const PCID_FIELD_SIZE: usize = 4; 4826 const B0EJ_FIELD_SIZE: usize = 4; 4827 const PSEG_FIELD_SIZE: usize = 4; 4828 4829 impl BusDevice for DeviceManager { 4830 fn read(&mut self, base: u64, offset: u64, data: &mut [u8]) { 4831 match offset { 4832 PCIU_FIELD_OFFSET => { 4833 assert!(data.len() == PCIU_FIELD_SIZE); 4834 data.copy_from_slice( 4835 &self.pci_segments[self.selected_segment] 4836 .pci_devices_up 4837 .to_le_bytes(), 4838 ); 4839 // Clear the PCIU bitmap 4840 self.pci_segments[self.selected_segment].pci_devices_up = 0; 4841 } 4842 PCID_FIELD_OFFSET => { 4843 assert!(data.len() == PCID_FIELD_SIZE); 4844 data.copy_from_slice( 4845 &self.pci_segments[self.selected_segment] 4846 .pci_devices_down 4847 .to_le_bytes(), 4848 ); 4849 // Clear the PCID bitmap 4850 self.pci_segments[self.selected_segment].pci_devices_down = 0; 4851 } 4852 B0EJ_FIELD_OFFSET => { 4853 assert!(data.len() == B0EJ_FIELD_SIZE); 4854 // Always return an empty bitmap since the eject is always 4855 // taken care of right away during a write access. 4856 data.fill(0); 4857 } 4858 PSEG_FIELD_OFFSET => { 4859 assert_eq!(data.len(), PSEG_FIELD_SIZE); 4860 data.copy_from_slice(&(self.selected_segment as u32).to_le_bytes()); 4861 } 4862 _ => error!( 4863 "Accessing unknown location at base 0x{:x}, offset 0x{:x}", 4864 base, offset 4865 ), 4866 } 4867 4868 debug!( 4869 "PCI_HP_REG_R: base 0x{:x}, offset 0x{:x}, data {:?}", 4870 base, offset, data 4871 ) 4872 } 4873 4874 fn write(&mut self, base: u64, offset: u64, data: &[u8]) -> Option<Arc<std::sync::Barrier>> { 4875 match offset { 4876 B0EJ_FIELD_OFFSET => { 4877 assert!(data.len() == B0EJ_FIELD_SIZE); 4878 let mut data_array: [u8; 4] = [0, 0, 0, 0]; 4879 data_array.copy_from_slice(data); 4880 let mut slot_bitmap = u32::from_le_bytes(data_array); 4881 4882 while slot_bitmap > 0 { 4883 let slot_id = slot_bitmap.trailing_zeros(); 4884 if let Err(e) = self.eject_device(self.selected_segment as u16, slot_id as u8) { 4885 error!("Failed ejecting device {}: {:?}", slot_id, e); 4886 } 4887 slot_bitmap &= !(1 << slot_id); 4888 } 4889 } 4890 PSEG_FIELD_OFFSET => { 4891 assert_eq!(data.len(), PSEG_FIELD_SIZE); 4892 let mut data_array: [u8; 4] = [0, 0, 0, 0]; 4893 data_array.copy_from_slice(data); 4894 let selected_segment = u32::from_le_bytes(data_array) as usize; 4895 if selected_segment >= self.pci_segments.len() { 4896 error!( 4897 "Segment selection out of range: {} >= {}", 4898 selected_segment, 4899 self.pci_segments.len() 4900 ); 4901 return None; 4902 } 4903 self.selected_segment = selected_segment; 4904 } 4905 _ => error!( 4906 "Accessing unknown location at base 0x{:x}, offset 0x{:x}", 4907 base, offset 4908 ), 4909 } 4910 4911 debug!( 4912 "PCI_HP_REG_W: base 0x{:x}, offset 0x{:x}, data {:?}", 4913 base, offset, data 4914 ); 4915 4916 None 4917 } 4918 } 4919 4920 impl Drop for DeviceManager { 4921 fn drop(&mut self) { 4922 // Wake up the DeviceManager threads (mainly virtio device workers), 4923 // to avoid deadlock on waiting for paused/parked worker threads. 4924 if let Err(e) = self.resume() { 4925 error!("Error resuming DeviceManager: {:?}", e); 4926 } 4927 4928 for handle in self.virtio_devices.drain(..) { 4929 handle.virtio_device.lock().unwrap().shutdown(); 4930 } 4931 4932 if let Some(termios) = *self.original_termios_opt.lock().unwrap() { 4933 // SAFETY: FFI call 4934 let _ = unsafe { tcsetattr(stdout().lock().as_raw_fd(), TCSANOW, &termios) }; 4935 } 4936 } 4937 } 4938 4939 #[cfg(test)] 4940 mod tests { 4941 use super::*; 4942 4943 #[test] 4944 fn test_create_mmio_allocators() { 4945 let res = create_mmio_allocators(0x100000, 0x400000, 1, vec![1], 4 << 10); 4946 assert_eq!(res.len(), 1); 4947 assert_eq!( 4948 res[0].lock().unwrap().base(), 4949 vm_memory::GuestAddress(0x100000) 4950 ); 4951 assert_eq!( 4952 res[0].lock().unwrap().end(), 4953 vm_memory::GuestAddress(0x3fffff) 4954 ); 4955 4956 let res = create_mmio_allocators(0x100000, 0x400000, 2, vec![1, 1], 4 << 10); 4957 assert_eq!(res.len(), 2); 4958 assert_eq!( 4959 res[0].lock().unwrap().base(), 4960 vm_memory::GuestAddress(0x100000) 4961 ); 4962 assert_eq!( 4963 res[0].lock().unwrap().end(), 4964 vm_memory::GuestAddress(0x27ffff) 4965 ); 4966 assert_eq!( 4967 res[1].lock().unwrap().base(), 4968 vm_memory::GuestAddress(0x280000) 4969 ); 4970 assert_eq!( 4971 res[1].lock().unwrap().end(), 4972 vm_memory::GuestAddress(0x3fffff) 4973 ); 4974 4975 let res = create_mmio_allocators(0x100000, 0x400000, 2, vec![2, 1], 4 << 10); 4976 assert_eq!(res.len(), 2); 4977 assert_eq!( 4978 res[0].lock().unwrap().base(), 4979 vm_memory::GuestAddress(0x100000) 4980 ); 4981 assert_eq!( 4982 res[0].lock().unwrap().end(), 4983 vm_memory::GuestAddress(0x2fffff) 4984 ); 4985 assert_eq!( 4986 res[1].lock().unwrap().base(), 4987 vm_memory::GuestAddress(0x300000) 4988 ); 4989 assert_eq!( 4990 res[1].lock().unwrap().end(), 4991 vm_memory::GuestAddress(0x3fffff) 4992 ); 4993 } 4994 } 4995