1 // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 // 3 // Portions Copyright 2017 The Chromium OS Authors. All rights reserved. 4 // Use of this source code is governed by a BSD-style license that can be 5 // found in the LICENSE-BSD-3-Clause file. 6 // 7 // Copyright © 2019 Intel Corporation 8 // 9 // SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause 10 // 11 12 use crate::config::{ 13 ConsoleOutputMode, DeviceConfig, DiskConfig, FsConfig, NetConfig, PmemConfig, UserDeviceConfig, 14 VdpaConfig, VhostMode, VmConfig, VsockConfig, 15 }; 16 use crate::console_devices::{ConsoleDeviceError, ConsoleInfo}; 17 use crate::cpu::{CpuManager, CPU_MANAGER_ACPI_SIZE}; 18 use crate::device_tree::{DeviceNode, DeviceTree}; 19 use crate::interrupt::LegacyUserspaceInterruptManager; 20 use crate::interrupt::MsiInterruptManager; 21 use crate::memory_manager::{Error as MemoryManagerError, MemoryManager, MEMORY_MANAGER_ACPI_SIZE}; 22 use crate::pci_segment::PciSegment; 23 use crate::serial_manager::{Error as SerialManagerError, SerialManager}; 24 use crate::vm_config::DEFAULT_PCI_SEGMENT_APERTURE_WEIGHT; 25 use crate::GuestRegionMmap; 26 use crate::PciDeviceInfo; 27 use crate::{device_node, DEVICE_MANAGER_SNAPSHOT_ID}; 28 use acpi_tables::sdt::GenericAddress; 29 use acpi_tables::{aml, Aml}; 30 use anyhow::anyhow; 31 use arch::layout; 32 #[cfg(target_arch = "x86_64")] 33 use arch::layout::{APIC_START, IOAPIC_SIZE, IOAPIC_START}; 34 use arch::NumaNodes; 35 #[cfg(target_arch = "aarch64")] 36 use arch::{DeviceType, MmioDeviceInfo}; 37 use block::{ 38 async_io::DiskFile, block_aio_is_supported, block_io_uring_is_supported, detect_image_type, 39 fixed_vhd_sync::FixedVhdDiskSync, qcow, qcow_sync::QcowDiskSync, raw_async_aio::RawFileDiskAio, 40 raw_sync::RawFileDiskSync, vhdx, vhdx_sync::VhdxDiskSync, ImageType, 41 }; 42 #[cfg(feature = "io_uring")] 43 use block::{fixed_vhd_async::FixedVhdDiskAsync, raw_async::RawFileDisk}; 44 #[cfg(target_arch = "x86_64")] 45 use devices::debug_console::DebugConsole; 46 #[cfg(target_arch = "aarch64")] 47 use devices::gic; 48 #[cfg(target_arch = "x86_64")] 49 use devices::ioapic; 50 #[cfg(target_arch = "aarch64")] 51 use devices::legacy::Pl011; 52 use devices::{ 53 interrupt_controller, interrupt_controller::InterruptController, AcpiNotificationFlags, 54 }; 55 use hypervisor::IoEventAddress; 56 use libc::{ 57 tcsetattr, termios, MAP_NORESERVE, MAP_PRIVATE, MAP_SHARED, O_TMPFILE, PROT_READ, PROT_WRITE, 58 TCSANOW, 59 }; 60 use pci::{ 61 DeviceRelocation, MmioRegion, PciBarRegionType, PciBdf, PciDevice, VfioDmaMapping, 62 VfioPciDevice, VfioUserDmaMapping, VfioUserPciDevice, VfioUserPciDeviceError, 63 }; 64 use rate_limiter::group::RateLimiterGroup; 65 use seccompiler::SeccompAction; 66 use serde::{Deserialize, Serialize}; 67 use std::collections::{BTreeMap, BTreeSet, HashMap}; 68 use std::fs::{File, OpenOptions}; 69 use std::io::{self, stdout, Seek, SeekFrom}; 70 use std::num::Wrapping; 71 use std::os::fd::RawFd; 72 use std::os::unix::fs::OpenOptionsExt; 73 use std::os::unix::io::{AsRawFd, FromRawFd}; 74 use std::path::PathBuf; 75 use std::result; 76 use std::sync::{Arc, Mutex}; 77 use std::time::Instant; 78 use tracer::trace_scoped; 79 use vfio_ioctls::{VfioContainer, VfioDevice, VfioDeviceFd}; 80 use virtio_devices::transport::VirtioTransport; 81 use virtio_devices::transport::{VirtioPciDevice, VirtioPciDeviceActivator}; 82 use virtio_devices::vhost_user::VhostUserConfig; 83 use virtio_devices::{ 84 AccessPlatformMapping, ActivateError, VdpaDmaMapping, VirtioMemMappingSource, 85 }; 86 use virtio_devices::{Endpoint, IommuMapping}; 87 use vm_allocator::{AddressAllocator, SystemAllocator}; 88 use vm_device::dma_mapping::ExternalDmaMapping; 89 use vm_device::interrupt::{ 90 InterruptIndex, InterruptManager, LegacyIrqGroupConfig, MsiIrqGroupConfig, 91 }; 92 use vm_device::{Bus, BusDevice, Resource}; 93 use vm_memory::guest_memory::FileOffset; 94 use vm_memory::GuestMemoryRegion; 95 use vm_memory::{Address, GuestAddress, GuestUsize, MmapRegion}; 96 #[cfg(target_arch = "x86_64")] 97 use vm_memory::{GuestAddressSpace, GuestMemory}; 98 use vm_migration::{ 99 protocol::MemoryRangeTable, snapshot_from_id, state_from_id, Migratable, MigratableError, 100 Pausable, Snapshot, SnapshotData, Snapshottable, Transportable, 101 }; 102 use vm_virtio::AccessPlatform; 103 use vm_virtio::VirtioDeviceType; 104 use vmm_sys_util::eventfd::EventFd; 105 #[cfg(target_arch = "x86_64")] 106 use {devices::debug_console, devices::legacy::Serial}; 107 108 #[cfg(target_arch = "aarch64")] 109 const MMIO_LEN: u64 = 0x1000; 110 111 // Singleton devices / devices the user cannot name 112 #[cfg(target_arch = "x86_64")] 113 const IOAPIC_DEVICE_NAME: &str = "__ioapic"; 114 const SERIAL_DEVICE_NAME: &str = "__serial"; 115 #[cfg(target_arch = "x86_64")] 116 const DEBUGCON_DEVICE_NAME: &str = "__debug_console"; 117 #[cfg(target_arch = "aarch64")] 118 const GPIO_DEVICE_NAME: &str = "__gpio"; 119 const RNG_DEVICE_NAME: &str = "__rng"; 120 const IOMMU_DEVICE_NAME: &str = "__iommu"; 121 const BALLOON_DEVICE_NAME: &str = "__balloon"; 122 const CONSOLE_DEVICE_NAME: &str = "__console"; 123 const PVPANIC_DEVICE_NAME: &str = "__pvpanic"; 124 125 // Devices that the user may name and for which we generate 126 // identifiers if the user doesn't give one 127 const DISK_DEVICE_NAME_PREFIX: &str = "_disk"; 128 const FS_DEVICE_NAME_PREFIX: &str = "_fs"; 129 const NET_DEVICE_NAME_PREFIX: &str = "_net"; 130 const PMEM_DEVICE_NAME_PREFIX: &str = "_pmem"; 131 const VDPA_DEVICE_NAME_PREFIX: &str = "_vdpa"; 132 const VSOCK_DEVICE_NAME_PREFIX: &str = "_vsock"; 133 const WATCHDOG_DEVICE_NAME: &str = "__watchdog"; 134 const VFIO_DEVICE_NAME_PREFIX: &str = "_vfio"; 135 const VFIO_USER_DEVICE_NAME_PREFIX: &str = "_vfio_user"; 136 const VIRTIO_PCI_DEVICE_NAME_PREFIX: &str = "_virtio-pci"; 137 138 /// Errors associated with device manager 139 #[derive(Debug)] 140 pub enum DeviceManagerError { 141 /// Cannot create EventFd. 142 EventFd(io::Error), 143 144 /// Cannot open disk path 145 Disk(io::Error), 146 147 /// Cannot create vhost-user-net device 148 CreateVhostUserNet(virtio_devices::vhost_user::Error), 149 150 /// Cannot create virtio-blk device 151 CreateVirtioBlock(io::Error), 152 153 /// Cannot create virtio-net device 154 CreateVirtioNet(virtio_devices::net::Error), 155 156 /// Cannot create virtio-console device 157 CreateVirtioConsole(io::Error), 158 159 /// Cannot create virtio-rng device 160 CreateVirtioRng(io::Error), 161 162 /// Cannot create virtio-fs device 163 CreateVirtioFs(virtio_devices::vhost_user::Error), 164 165 /// Virtio-fs device was created without a socket. 166 NoVirtioFsSock, 167 168 /// Cannot create vhost-user-blk device 169 CreateVhostUserBlk(virtio_devices::vhost_user::Error), 170 171 /// Cannot create virtio-pmem device 172 CreateVirtioPmem(io::Error), 173 174 /// Cannot create vDPA device 175 CreateVdpa(virtio_devices::vdpa::Error), 176 177 /// Cannot create virtio-vsock device 178 CreateVirtioVsock(io::Error), 179 180 /// Cannot create tpm device 181 CreateTpmDevice(anyhow::Error), 182 183 /// Failed to convert Path to &str for the vDPA device. 184 CreateVdpaConvertPath, 185 186 /// Failed to convert Path to &str for the virtio-vsock device. 187 CreateVsockConvertPath, 188 189 /// Cannot create virtio-vsock backend 190 CreateVsockBackend(virtio_devices::vsock::VsockUnixError), 191 192 /// Cannot create virtio-iommu device 193 CreateVirtioIommu(io::Error), 194 195 /// Cannot create virtio-balloon device 196 CreateVirtioBalloon(io::Error), 197 198 /// Cannot create virtio-watchdog device 199 CreateVirtioWatchdog(io::Error), 200 201 /// Failed to parse disk image format 202 DetectImageType(io::Error), 203 204 /// Cannot open qcow disk path 205 QcowDeviceCreate(qcow::Error), 206 207 /// Cannot create serial manager 208 CreateSerialManager(SerialManagerError), 209 210 /// Cannot spawn the serial manager thread 211 SpawnSerialManager(SerialManagerError), 212 213 /// Cannot open tap interface 214 OpenTap(net_util::TapError), 215 216 /// Cannot allocate IRQ. 217 AllocateIrq, 218 219 /// Cannot configure the IRQ. 220 Irq(vmm_sys_util::errno::Error), 221 222 /// Cannot allocate PCI BARs 223 AllocateBars(pci::PciDeviceError), 224 225 /// Could not free the BARs associated with a PCI device. 226 FreePciBars(pci::PciDeviceError), 227 228 /// Cannot register ioevent. 229 RegisterIoevent(anyhow::Error), 230 231 /// Cannot unregister ioevent. 232 UnRegisterIoevent(anyhow::Error), 233 234 /// Cannot create virtio device 235 VirtioDevice(virtio_devices::transport::VirtioPciDeviceError), 236 237 /// Cannot add PCI device 238 AddPciDevice(pci::PciRootError), 239 240 /// Cannot open persistent memory file 241 PmemFileOpen(io::Error), 242 243 /// Cannot set persistent memory file size 244 PmemFileSetLen(io::Error), 245 246 /// Cannot find a memory range for persistent memory 247 PmemRangeAllocation, 248 249 /// Cannot find a memory range for virtio-fs 250 FsRangeAllocation, 251 252 /// Error creating serial output file 253 SerialOutputFileOpen(io::Error), 254 255 #[cfg(target_arch = "x86_64")] 256 /// Error creating debug-console output file 257 DebugconOutputFileOpen(io::Error), 258 259 /// Error creating console output file 260 ConsoleOutputFileOpen(io::Error), 261 262 /// Error creating serial pty 263 SerialPtyOpen(io::Error), 264 265 /// Error creating console pty 266 ConsolePtyOpen(io::Error), 267 268 /// Error creating console pty 269 DebugconPtyOpen(io::Error), 270 271 /// Error setting pty raw mode 272 SetPtyRaw(ConsoleDeviceError), 273 274 /// Error getting pty peer 275 GetPtyPeer(vmm_sys_util::errno::Error), 276 277 /// Cannot create a VFIO device 278 VfioCreate(vfio_ioctls::VfioError), 279 280 /// Cannot create a VFIO PCI device 281 VfioPciCreate(pci::VfioPciError), 282 283 /// Failed to map VFIO MMIO region. 284 VfioMapRegion(pci::VfioPciError), 285 286 /// Failed to DMA map VFIO device. 287 VfioDmaMap(vfio_ioctls::VfioError), 288 289 /// Failed to DMA unmap VFIO device. 290 VfioDmaUnmap(pci::VfioPciError), 291 292 /// Failed to create the passthrough device. 293 CreatePassthroughDevice(anyhow::Error), 294 295 /// Failed to memory map. 296 Mmap(io::Error), 297 298 /// Cannot add legacy device to Bus. 299 BusError(vm_device::BusError), 300 301 /// Failed to allocate IO port 302 AllocateIoPort, 303 304 /// Failed to allocate MMIO address 305 AllocateMmioAddress, 306 307 /// Failed to make hotplug notification 308 HotPlugNotification(io::Error), 309 310 /// Error from a memory manager operation 311 MemoryManager(MemoryManagerError), 312 313 /// Failed to create new interrupt source group. 314 CreateInterruptGroup(io::Error), 315 316 /// Failed to update interrupt source group. 317 UpdateInterruptGroup(io::Error), 318 319 /// Failed to create interrupt controller. 320 CreateInterruptController(interrupt_controller::Error), 321 322 /// Failed to create a new MmapRegion instance. 323 NewMmapRegion(vm_memory::mmap::MmapRegionError), 324 325 /// Failed to clone a File. 326 CloneFile(io::Error), 327 328 /// Failed to create socket file 329 CreateSocketFile(io::Error), 330 331 /// Failed to spawn the network backend 332 SpawnNetBackend(io::Error), 333 334 /// Failed to spawn the block backend 335 SpawnBlockBackend(io::Error), 336 337 /// Missing PCI bus. 338 NoPciBus, 339 340 /// Could not find an available device name. 341 NoAvailableDeviceName, 342 343 /// Missing PCI device. 344 MissingPciDevice, 345 346 /// Failed to remove a PCI device from the PCI bus. 347 RemoveDeviceFromPciBus(pci::PciRootError), 348 349 /// Failed to remove a bus device from the IO bus. 350 RemoveDeviceFromIoBus(vm_device::BusError), 351 352 /// Failed to remove a bus device from the MMIO bus. 353 RemoveDeviceFromMmioBus(vm_device::BusError), 354 355 /// Failed to find the device corresponding to a specific PCI b/d/f. 356 UnknownPciBdf(u32), 357 358 /// Not allowed to remove this type of device from the VM. 359 RemovalNotAllowed(vm_virtio::VirtioDeviceType), 360 361 /// Failed to find device corresponding to the given identifier. 362 UnknownDeviceId(String), 363 364 /// Failed to find an available PCI device ID. 365 NextPciDeviceId(pci::PciRootError), 366 367 /// Could not reserve the PCI device ID. 368 GetPciDeviceId(pci::PciRootError), 369 370 /// Could not give the PCI device ID back. 371 PutPciDeviceId(pci::PciRootError), 372 373 /// No disk path was specified when one was expected 374 NoDiskPath, 375 376 /// Failed to update guest memory for virtio device. 377 UpdateMemoryForVirtioDevice(virtio_devices::Error), 378 379 /// Cannot create virtio-mem device 380 CreateVirtioMem(io::Error), 381 382 /// Cannot find a memory range for virtio-mem memory 383 VirtioMemRangeAllocation, 384 385 /// Failed to update guest memory for VFIO PCI device. 386 UpdateMemoryForVfioPciDevice(vfio_ioctls::VfioError), 387 388 /// Trying to use a directory for pmem but no size specified 389 PmemWithDirectorySizeMissing, 390 391 /// Trying to use a size that is not multiple of 2MiB 392 PmemSizeNotAligned, 393 394 /// Could not find the node in the device tree. 395 MissingNode, 396 397 /// Resource was already found. 398 ResourceAlreadyExists, 399 400 /// Expected resources for virtio-pmem could not be found. 401 MissingVirtioPmemResources, 402 403 /// Missing PCI b/d/f from the DeviceNode. 404 MissingDeviceNodePciBdf, 405 406 /// No support for device passthrough 407 NoDevicePassthroughSupport, 408 409 /// No socket option support for console device 410 NoSocketOptionSupportForConsoleDevice, 411 412 /// Failed to resize virtio-balloon 413 VirtioBalloonResize(virtio_devices::balloon::Error), 414 415 /// Missing virtio-balloon, can't proceed as expected. 416 MissingVirtioBalloon, 417 418 /// Missing virtual IOMMU device 419 MissingVirtualIommu, 420 421 /// Failed to do power button notification 422 PowerButtonNotification(io::Error), 423 424 /// Failed to do AArch64 GPIO power button notification 425 #[cfg(target_arch = "aarch64")] 426 AArch64PowerButtonNotification(devices::legacy::GpioDeviceError), 427 428 /// Failed to set O_DIRECT flag to file descriptor 429 SetDirectIo, 430 431 /// Failed to create FixedVhdDiskAsync 432 CreateFixedVhdDiskAsync(io::Error), 433 434 /// Failed to create FixedVhdDiskSync 435 CreateFixedVhdDiskSync(io::Error), 436 437 /// Failed to create QcowDiskSync 438 CreateQcowDiskSync(qcow::Error), 439 440 /// Failed to create FixedVhdxDiskSync 441 CreateFixedVhdxDiskSync(vhdx::VhdxError), 442 443 /// Failed to add DMA mapping handler to virtio-mem device. 444 AddDmaMappingHandlerVirtioMem(virtio_devices::mem::Error), 445 446 /// Failed to remove DMA mapping handler from virtio-mem device. 447 RemoveDmaMappingHandlerVirtioMem(virtio_devices::mem::Error), 448 449 /// Failed to create vfio-user client 450 VfioUserCreateClient(vfio_user::Error), 451 452 /// Failed to create VFIO user device 453 VfioUserCreate(VfioUserPciDeviceError), 454 455 /// Failed to map region from VFIO user device into guest 456 VfioUserMapRegion(VfioUserPciDeviceError), 457 458 /// Failed to DMA map VFIO user device. 459 VfioUserDmaMap(VfioUserPciDeviceError), 460 461 /// Failed to DMA unmap VFIO user device. 462 VfioUserDmaUnmap(VfioUserPciDeviceError), 463 464 /// Failed to update memory mappings for VFIO user device 465 UpdateMemoryForVfioUserPciDevice(VfioUserPciDeviceError), 466 467 /// Cannot duplicate file descriptor 468 DupFd(vmm_sys_util::errno::Error), 469 470 /// Failed to DMA map virtio device. 471 VirtioDmaMap(std::io::Error), 472 473 /// Failed to DMA unmap virtio device. 474 VirtioDmaUnmap(std::io::Error), 475 476 /// Cannot hotplug device behind vIOMMU 477 InvalidIommuHotplug, 478 479 /// Invalid identifier as it is not unique. 480 IdentifierNotUnique(String), 481 482 /// Invalid identifier 483 InvalidIdentifier(String), 484 485 /// Error activating virtio device 486 VirtioActivate(ActivateError), 487 488 /// Failed retrieving device state from snapshot 489 RestoreGetState(MigratableError), 490 491 /// Cannot create a PvPanic device 492 PvPanicCreate(devices::pvpanic::PvPanicError), 493 494 /// Cannot create a RateLimiterGroup 495 RateLimiterGroupCreate(rate_limiter::group::Error), 496 497 /// Cannot start sigwinch listener 498 StartSigwinchListener(std::io::Error), 499 500 // Invalid console info 501 InvalidConsoleInfo, 502 503 // Invalid console fd 504 InvalidConsoleFd, 505 } 506 507 pub type DeviceManagerResult<T> = result::Result<T, DeviceManagerError>; 508 509 const DEVICE_MANAGER_ACPI_SIZE: usize = 0x10; 510 511 #[derive(Default)] 512 pub struct Console { 513 console_resizer: Option<Arc<virtio_devices::ConsoleResizer>>, 514 } 515 516 impl Console { 517 pub fn need_resize(&self) -> bool { 518 if let Some(_resizer) = self.console_resizer.as_ref() { 519 return true; 520 } 521 522 false 523 } 524 525 pub fn update_console_size(&self) { 526 if let Some(resizer) = self.console_resizer.as_ref() { 527 resizer.update_console_size() 528 } 529 } 530 } 531 532 pub(crate) struct AddressManager { 533 pub(crate) allocator: Arc<Mutex<SystemAllocator>>, 534 #[cfg(target_arch = "x86_64")] 535 pub(crate) io_bus: Arc<Bus>, 536 pub(crate) mmio_bus: Arc<Bus>, 537 pub(crate) vm: Arc<dyn hypervisor::Vm>, 538 device_tree: Arc<Mutex<DeviceTree>>, 539 pci_mmio32_allocators: Vec<Arc<Mutex<AddressAllocator>>>, 540 pci_mmio64_allocators: Vec<Arc<Mutex<AddressAllocator>>>, 541 } 542 543 impl DeviceRelocation for AddressManager { 544 fn move_bar( 545 &self, 546 old_base: u64, 547 new_base: u64, 548 len: u64, 549 pci_dev: &mut dyn PciDevice, 550 region_type: PciBarRegionType, 551 ) -> std::result::Result<(), std::io::Error> { 552 match region_type { 553 PciBarRegionType::IoRegion => { 554 #[cfg(target_arch = "x86_64")] 555 { 556 // Update system allocator 557 self.allocator 558 .lock() 559 .unwrap() 560 .free_io_addresses(GuestAddress(old_base), len as GuestUsize); 561 562 self.allocator 563 .lock() 564 .unwrap() 565 .allocate_io_addresses( 566 Some(GuestAddress(new_base)), 567 len as GuestUsize, 568 None, 569 ) 570 .ok_or_else(|| { 571 io::Error::new(io::ErrorKind::Other, "failed allocating new IO range") 572 })?; 573 574 // Update PIO bus 575 self.io_bus 576 .update_range(old_base, len, new_base, len) 577 .map_err(|e| io::Error::new(io::ErrorKind::Other, e))?; 578 } 579 #[cfg(target_arch = "aarch64")] 580 error!("I/O region is not supported"); 581 } 582 PciBarRegionType::Memory32BitRegion | PciBarRegionType::Memory64BitRegion => { 583 let allocators = if region_type == PciBarRegionType::Memory32BitRegion { 584 &self.pci_mmio32_allocators 585 } else { 586 &self.pci_mmio64_allocators 587 }; 588 589 // Find the specific allocator that this BAR was allocated from and use it for new one 590 for allocator in allocators { 591 let allocator_base = allocator.lock().unwrap().base(); 592 let allocator_end = allocator.lock().unwrap().end(); 593 594 if old_base >= allocator_base.0 && old_base <= allocator_end.0 { 595 allocator 596 .lock() 597 .unwrap() 598 .free(GuestAddress(old_base), len as GuestUsize); 599 600 allocator 601 .lock() 602 .unwrap() 603 .allocate(Some(GuestAddress(new_base)), len as GuestUsize, Some(len)) 604 .ok_or_else(|| { 605 io::Error::new( 606 io::ErrorKind::Other, 607 "failed allocating new MMIO range", 608 ) 609 })?; 610 611 break; 612 } 613 } 614 615 // Update MMIO bus 616 self.mmio_bus 617 .update_range(old_base, len, new_base, len) 618 .map_err(|e| io::Error::new(io::ErrorKind::Other, e))?; 619 } 620 } 621 622 // Update the device_tree resources associated with the device 623 if let Some(id) = pci_dev.id() { 624 if let Some(node) = self.device_tree.lock().unwrap().get_mut(&id) { 625 let mut resource_updated = false; 626 for resource in node.resources.iter_mut() { 627 if let Resource::PciBar { base, type_, .. } = resource { 628 if PciBarRegionType::from(*type_) == region_type && *base == old_base { 629 *base = new_base; 630 resource_updated = true; 631 break; 632 } 633 } 634 } 635 636 if !resource_updated { 637 return Err(io::Error::new( 638 io::ErrorKind::Other, 639 format!( 640 "Couldn't find a resource with base 0x{old_base:x} for device {id}" 641 ), 642 )); 643 } 644 } else { 645 return Err(io::Error::new( 646 io::ErrorKind::Other, 647 format!("Couldn't find device {id} from device tree"), 648 )); 649 } 650 } 651 652 let any_dev = pci_dev.as_any(); 653 if let Some(virtio_pci_dev) = any_dev.downcast_ref::<VirtioPciDevice>() { 654 let bar_addr = virtio_pci_dev.config_bar_addr(); 655 if bar_addr == new_base { 656 for (event, addr) in virtio_pci_dev.ioeventfds(old_base) { 657 let io_addr = IoEventAddress::Mmio(addr); 658 self.vm.unregister_ioevent(event, &io_addr).map_err(|e| { 659 io::Error::new( 660 io::ErrorKind::Other, 661 format!("failed to unregister ioevent: {e:?}"), 662 ) 663 })?; 664 } 665 for (event, addr) in virtio_pci_dev.ioeventfds(new_base) { 666 let io_addr = IoEventAddress::Mmio(addr); 667 self.vm 668 .register_ioevent(event, &io_addr, None) 669 .map_err(|e| { 670 io::Error::new( 671 io::ErrorKind::Other, 672 format!("failed to register ioevent: {e:?}"), 673 ) 674 })?; 675 } 676 } else { 677 let virtio_dev = virtio_pci_dev.virtio_device(); 678 let mut virtio_dev = virtio_dev.lock().unwrap(); 679 if let Some(mut shm_regions) = virtio_dev.get_shm_regions() { 680 if shm_regions.addr.raw_value() == old_base { 681 let mem_region = self.vm.make_user_memory_region( 682 shm_regions.mem_slot, 683 old_base, 684 shm_regions.len, 685 shm_regions.host_addr, 686 false, 687 false, 688 ); 689 690 self.vm.remove_user_memory_region(mem_region).map_err(|e| { 691 io::Error::new( 692 io::ErrorKind::Other, 693 format!("failed to remove user memory region: {e:?}"), 694 ) 695 })?; 696 697 // Create new mapping by inserting new region to KVM. 698 let mem_region = self.vm.make_user_memory_region( 699 shm_regions.mem_slot, 700 new_base, 701 shm_regions.len, 702 shm_regions.host_addr, 703 false, 704 false, 705 ); 706 707 self.vm.create_user_memory_region(mem_region).map_err(|e| { 708 io::Error::new( 709 io::ErrorKind::Other, 710 format!("failed to create user memory regions: {e:?}"), 711 ) 712 })?; 713 714 // Update shared memory regions to reflect the new mapping. 715 shm_regions.addr = GuestAddress(new_base); 716 virtio_dev.set_shm_regions(shm_regions).map_err(|e| { 717 io::Error::new( 718 io::ErrorKind::Other, 719 format!("failed to update shared memory regions: {e:?}"), 720 ) 721 })?; 722 } 723 } 724 } 725 } 726 727 pci_dev.move_bar(old_base, new_base) 728 } 729 } 730 731 #[derive(Serialize, Deserialize)] 732 struct DeviceManagerState { 733 device_tree: DeviceTree, 734 device_id_cnt: Wrapping<usize>, 735 } 736 737 #[derive(Debug)] 738 pub struct PtyPair { 739 pub main: File, 740 pub path: PathBuf, 741 } 742 743 impl Clone for PtyPair { 744 fn clone(&self) -> Self { 745 PtyPair { 746 main: self.main.try_clone().unwrap(), 747 path: self.path.clone(), 748 } 749 } 750 } 751 752 #[derive(Clone)] 753 pub enum PciDeviceHandle { 754 Vfio(Arc<Mutex<VfioPciDevice>>), 755 Virtio(Arc<Mutex<VirtioPciDevice>>), 756 VfioUser(Arc<Mutex<VfioUserPciDevice>>), 757 } 758 759 #[derive(Clone)] 760 struct MetaVirtioDevice { 761 virtio_device: Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 762 iommu: bool, 763 id: String, 764 pci_segment: u16, 765 dma_handler: Option<Arc<dyn ExternalDmaMapping>>, 766 } 767 768 #[derive(Default)] 769 pub struct AcpiPlatformAddresses { 770 pub pm_timer_address: Option<GenericAddress>, 771 pub reset_reg_address: Option<GenericAddress>, 772 pub sleep_control_reg_address: Option<GenericAddress>, 773 pub sleep_status_reg_address: Option<GenericAddress>, 774 } 775 776 pub struct DeviceManager { 777 // Manage address space related to devices 778 address_manager: Arc<AddressManager>, 779 780 // Console abstraction 781 console: Arc<Console>, 782 783 // Serial Manager 784 serial_manager: Option<Arc<SerialManager>>, 785 786 // pty foreground status, 787 console_resize_pipe: Option<Arc<File>>, 788 789 // To restore on exit. 790 original_termios_opt: Arc<Mutex<Option<termios>>>, 791 792 // Interrupt controller 793 #[cfg(target_arch = "x86_64")] 794 interrupt_controller: Option<Arc<Mutex<ioapic::Ioapic>>>, 795 #[cfg(target_arch = "aarch64")] 796 interrupt_controller: Option<Arc<Mutex<gic::Gic>>>, 797 798 // Things to be added to the commandline (e.g. aarch64 early console) 799 #[cfg(target_arch = "aarch64")] 800 cmdline_additions: Vec<String>, 801 802 // ACPI GED notification device 803 ged_notification_device: Option<Arc<Mutex<devices::AcpiGedDevice>>>, 804 805 // VM configuration 806 config: Arc<Mutex<VmConfig>>, 807 808 // Memory Manager 809 memory_manager: Arc<Mutex<MemoryManager>>, 810 811 // CPU Manager 812 cpu_manager: Arc<Mutex<CpuManager>>, 813 814 // The virtio devices on the system 815 virtio_devices: Vec<MetaVirtioDevice>, 816 817 // List of bus devices 818 // Let the DeviceManager keep strong references to the BusDevice devices. 819 // This allows the IO and MMIO buses to be provided with Weak references, 820 // which prevents cyclic dependencies. 821 bus_devices: Vec<Arc<Mutex<dyn BusDevice>>>, 822 823 // Counter to keep track of the consumed device IDs. 824 device_id_cnt: Wrapping<usize>, 825 826 pci_segments: Vec<PciSegment>, 827 828 #[cfg_attr(target_arch = "aarch64", allow(dead_code))] 829 // MSI Interrupt Manager 830 msi_interrupt_manager: Arc<dyn InterruptManager<GroupConfig = MsiIrqGroupConfig>>, 831 832 #[cfg_attr(feature = "mshv", allow(dead_code))] 833 // Legacy Interrupt Manager 834 legacy_interrupt_manager: Option<Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>>, 835 836 // Passthrough device handle 837 passthrough_device: Option<VfioDeviceFd>, 838 839 // VFIO container 840 // Only one container can be created, therefore it is stored as part of the 841 // DeviceManager to be reused. 842 vfio_container: Option<Arc<VfioContainer>>, 843 844 // Paravirtualized IOMMU 845 iommu_device: Option<Arc<Mutex<virtio_devices::Iommu>>>, 846 iommu_mapping: Option<Arc<IommuMapping>>, 847 848 // PCI information about devices attached to the paravirtualized IOMMU 849 // It contains the virtual IOMMU PCI BDF along with the list of PCI BDF 850 // representing the devices attached to the virtual IOMMU. This is useful 851 // information for filling the ACPI VIOT table. 852 iommu_attached_devices: Option<(PciBdf, Vec<PciBdf>)>, 853 854 // Tree of devices, representing the dependencies between devices. 855 // Useful for introspection, snapshot and restore. 856 device_tree: Arc<Mutex<DeviceTree>>, 857 858 // Exit event 859 exit_evt: EventFd, 860 reset_evt: EventFd, 861 862 #[cfg(target_arch = "aarch64")] 863 id_to_dev_info: HashMap<(DeviceType, String), MmioDeviceInfo>, 864 865 // seccomp action 866 seccomp_action: SeccompAction, 867 868 // List of guest NUMA nodes. 869 numa_nodes: NumaNodes, 870 871 // Possible handle to the virtio-balloon device 872 balloon: Option<Arc<Mutex<virtio_devices::Balloon>>>, 873 874 // Virtio Device activation EventFd to allow the VMM thread to trigger device 875 // activation and thus start the threads from the VMM thread 876 activate_evt: EventFd, 877 878 acpi_address: GuestAddress, 879 880 selected_segment: usize, 881 882 // Possible handle to the virtio-mem device 883 virtio_mem_devices: Vec<Arc<Mutex<virtio_devices::Mem>>>, 884 885 #[cfg(target_arch = "aarch64")] 886 // GPIO device for AArch64 887 gpio_device: Option<Arc<Mutex<devices::legacy::Gpio>>>, 888 889 // pvpanic device 890 pvpanic_device: Option<Arc<Mutex<devices::PvPanicDevice>>>, 891 892 // Flag to force setting the iommu on virtio devices 893 force_iommu: bool, 894 895 // io_uring availability if detected 896 io_uring_supported: Option<bool>, 897 898 // aio availability if detected 899 aio_supported: Option<bool>, 900 901 // List of unique identifiers provided at boot through the configuration. 902 boot_id_list: BTreeSet<String>, 903 904 // Start time of the VM 905 timestamp: Instant, 906 907 // Pending activations 908 pending_activations: Arc<Mutex<Vec<VirtioPciDeviceActivator>>>, 909 910 // Addresses for ACPI platform devices e.g. ACPI PM timer, sleep/reset registers 911 acpi_platform_addresses: AcpiPlatformAddresses, 912 913 snapshot: Option<Snapshot>, 914 915 rate_limit_groups: HashMap<String, Arc<RateLimiterGroup>>, 916 917 mmio_regions: Arc<Mutex<Vec<MmioRegion>>>, 918 } 919 920 fn create_mmio_allocators( 921 start: u64, 922 end: u64, 923 num_pci_segments: u16, 924 weights: Vec<u32>, 925 alignment: u64, 926 ) -> Vec<Arc<Mutex<AddressAllocator>>> { 927 let total_weight: u32 = weights.iter().sum(); 928 929 // Start each PCI segment mmio range on an aligned boundary 930 let pci_segment_mmio_size = (end - start + 1) / (alignment * total_weight as u64) * alignment; 931 932 let mut mmio_allocators = vec![]; 933 let mut i = 0; 934 for segment_id in 0..num_pci_segments as u64 { 935 let weight = weights[segment_id as usize] as u64; 936 let mmio_start = start + i * pci_segment_mmio_size; 937 let mmio_size = pci_segment_mmio_size * weight; 938 let allocator = Arc::new(Mutex::new( 939 AddressAllocator::new(GuestAddress(mmio_start), mmio_size).unwrap(), 940 )); 941 mmio_allocators.push(allocator); 942 i += weight; 943 } 944 945 mmio_allocators 946 } 947 948 impl DeviceManager { 949 #[allow(clippy::too_many_arguments)] 950 pub fn new( 951 #[cfg(target_arch = "x86_64")] io_bus: Arc<Bus>, 952 mmio_bus: Arc<Bus>, 953 vm: Arc<dyn hypervisor::Vm>, 954 config: Arc<Mutex<VmConfig>>, 955 memory_manager: Arc<Mutex<MemoryManager>>, 956 cpu_manager: Arc<Mutex<CpuManager>>, 957 exit_evt: EventFd, 958 reset_evt: EventFd, 959 seccomp_action: SeccompAction, 960 numa_nodes: NumaNodes, 961 activate_evt: &EventFd, 962 force_iommu: bool, 963 boot_id_list: BTreeSet<String>, 964 timestamp: Instant, 965 snapshot: Option<Snapshot>, 966 dynamic: bool, 967 ) -> DeviceManagerResult<Arc<Mutex<Self>>> { 968 trace_scoped!("DeviceManager::new"); 969 970 let (device_tree, device_id_cnt) = if let Some(snapshot) = snapshot.as_ref() { 971 let state: DeviceManagerState = snapshot.to_state().unwrap(); 972 ( 973 Arc::new(Mutex::new(state.device_tree.clone())), 974 state.device_id_cnt, 975 ) 976 } else { 977 (Arc::new(Mutex::new(DeviceTree::new())), Wrapping(0)) 978 }; 979 980 let num_pci_segments = 981 if let Some(platform_config) = config.lock().unwrap().platform.as_ref() { 982 platform_config.num_pci_segments 983 } else { 984 1 985 }; 986 987 let mut mmio32_aperture_weights: Vec<u32> = 988 std::iter::repeat(DEFAULT_PCI_SEGMENT_APERTURE_WEIGHT) 989 .take(num_pci_segments.into()) 990 .collect(); 991 if let Some(pci_segments) = &config.lock().unwrap().pci_segments { 992 for pci_segment in pci_segments.iter() { 993 mmio32_aperture_weights[pci_segment.pci_segment as usize] = 994 pci_segment.mmio32_aperture_weight 995 } 996 } 997 998 let start_of_mmio32_area = layout::MEM_32BIT_DEVICES_START.0; 999 let end_of_mmio32_area = layout::MEM_32BIT_DEVICES_START.0 + layout::MEM_32BIT_DEVICES_SIZE; 1000 let pci_mmio32_allocators = create_mmio_allocators( 1001 start_of_mmio32_area, 1002 end_of_mmio32_area, 1003 num_pci_segments, 1004 mmio32_aperture_weights, 1005 4 << 10, 1006 ); 1007 1008 let mut mmio64_aperture_weights: Vec<u32> = 1009 std::iter::repeat(DEFAULT_PCI_SEGMENT_APERTURE_WEIGHT) 1010 .take(num_pci_segments.into()) 1011 .collect(); 1012 if let Some(pci_segments) = &config.lock().unwrap().pci_segments { 1013 for pci_segment in pci_segments.iter() { 1014 mmio64_aperture_weights[pci_segment.pci_segment as usize] = 1015 pci_segment.mmio64_aperture_weight 1016 } 1017 } 1018 1019 let start_of_mmio64_area = memory_manager.lock().unwrap().start_of_device_area().0; 1020 let end_of_mmio64_area = memory_manager.lock().unwrap().end_of_device_area().0; 1021 let pci_mmio64_allocators = create_mmio_allocators( 1022 start_of_mmio64_area, 1023 end_of_mmio64_area, 1024 num_pci_segments, 1025 mmio64_aperture_weights, 1026 4 << 30, 1027 ); 1028 1029 let address_manager = Arc::new(AddressManager { 1030 allocator: memory_manager.lock().unwrap().allocator(), 1031 #[cfg(target_arch = "x86_64")] 1032 io_bus, 1033 mmio_bus, 1034 vm: vm.clone(), 1035 device_tree: Arc::clone(&device_tree), 1036 pci_mmio32_allocators, 1037 pci_mmio64_allocators, 1038 }); 1039 1040 // First we create the MSI interrupt manager, the legacy one is created 1041 // later, after the IOAPIC device creation. 1042 // The reason we create the MSI one first is because the IOAPIC needs it, 1043 // and then the legacy interrupt manager needs an IOAPIC. So we're 1044 // handling a linear dependency chain: 1045 // msi_interrupt_manager <- IOAPIC <- legacy_interrupt_manager. 1046 let msi_interrupt_manager: Arc<dyn InterruptManager<GroupConfig = MsiIrqGroupConfig>> = 1047 Arc::new(MsiInterruptManager::new( 1048 Arc::clone(&address_manager.allocator), 1049 vm, 1050 )); 1051 1052 let acpi_address = address_manager 1053 .allocator 1054 .lock() 1055 .unwrap() 1056 .allocate_platform_mmio_addresses(None, DEVICE_MANAGER_ACPI_SIZE as u64, None) 1057 .ok_or(DeviceManagerError::AllocateIoPort)?; 1058 1059 let mut pci_irq_slots = [0; 32]; 1060 PciSegment::reserve_legacy_interrupts_for_pci_devices( 1061 &address_manager, 1062 &mut pci_irq_slots, 1063 )?; 1064 1065 let mut pci_segments = vec![PciSegment::new_default_segment( 1066 &address_manager, 1067 Arc::clone(&address_manager.pci_mmio32_allocators[0]), 1068 Arc::clone(&address_manager.pci_mmio64_allocators[0]), 1069 &pci_irq_slots, 1070 )?]; 1071 1072 for i in 1..num_pci_segments as usize { 1073 pci_segments.push(PciSegment::new( 1074 i as u16, 1075 numa_node_id_from_pci_segment_id(&numa_nodes, i as u16), 1076 &address_manager, 1077 Arc::clone(&address_manager.pci_mmio32_allocators[i]), 1078 Arc::clone(&address_manager.pci_mmio64_allocators[i]), 1079 &pci_irq_slots, 1080 )?); 1081 } 1082 1083 if dynamic { 1084 let acpi_address = address_manager 1085 .allocator 1086 .lock() 1087 .unwrap() 1088 .allocate_platform_mmio_addresses(None, CPU_MANAGER_ACPI_SIZE as u64, None) 1089 .ok_or(DeviceManagerError::AllocateMmioAddress)?; 1090 1091 address_manager 1092 .mmio_bus 1093 .insert( 1094 cpu_manager.clone(), 1095 acpi_address.0, 1096 CPU_MANAGER_ACPI_SIZE as u64, 1097 ) 1098 .map_err(DeviceManagerError::BusError)?; 1099 1100 cpu_manager.lock().unwrap().set_acpi_address(acpi_address); 1101 } 1102 1103 let mut rate_limit_groups = HashMap::<String, Arc<RateLimiterGroup>>::new(); 1104 if let Some(rate_limit_groups_cfg) = config.lock().unwrap().rate_limit_groups.as_ref() { 1105 for rate_limit_group_cfg in rate_limit_groups_cfg { 1106 let rate_limit_cfg = rate_limit_group_cfg.rate_limiter_config; 1107 let bw = rate_limit_cfg.bandwidth.unwrap_or_default(); 1108 let ops = rate_limit_cfg.ops.unwrap_or_default(); 1109 let mut rate_limit_group = RateLimiterGroup::new( 1110 &rate_limit_group_cfg.id, 1111 bw.size, 1112 bw.one_time_burst.unwrap_or(0), 1113 bw.refill_time, 1114 ops.size, 1115 ops.one_time_burst.unwrap_or(0), 1116 ops.refill_time, 1117 ) 1118 .map_err(DeviceManagerError::RateLimiterGroupCreate)?; 1119 1120 let exit_evt = exit_evt.try_clone().map_err(DeviceManagerError::EventFd)?; 1121 1122 rate_limit_group.start_thread(exit_evt).unwrap(); 1123 rate_limit_groups 1124 .insert(rate_limit_group_cfg.id.clone(), Arc::new(rate_limit_group)); 1125 } 1126 } 1127 1128 let device_manager = DeviceManager { 1129 address_manager: Arc::clone(&address_manager), 1130 console: Arc::new(Console::default()), 1131 interrupt_controller: None, 1132 #[cfg(target_arch = "aarch64")] 1133 cmdline_additions: Vec::new(), 1134 ged_notification_device: None, 1135 config, 1136 memory_manager, 1137 cpu_manager, 1138 virtio_devices: Vec::new(), 1139 bus_devices: Vec::new(), 1140 device_id_cnt, 1141 msi_interrupt_manager, 1142 legacy_interrupt_manager: None, 1143 passthrough_device: None, 1144 vfio_container: None, 1145 iommu_device: None, 1146 iommu_mapping: None, 1147 iommu_attached_devices: None, 1148 pci_segments, 1149 device_tree, 1150 exit_evt, 1151 reset_evt, 1152 #[cfg(target_arch = "aarch64")] 1153 id_to_dev_info: HashMap::new(), 1154 seccomp_action, 1155 numa_nodes, 1156 balloon: None, 1157 activate_evt: activate_evt 1158 .try_clone() 1159 .map_err(DeviceManagerError::EventFd)?, 1160 acpi_address, 1161 selected_segment: 0, 1162 serial_manager: None, 1163 console_resize_pipe: None, 1164 original_termios_opt: Arc::new(Mutex::new(None)), 1165 virtio_mem_devices: Vec::new(), 1166 #[cfg(target_arch = "aarch64")] 1167 gpio_device: None, 1168 pvpanic_device: None, 1169 force_iommu, 1170 io_uring_supported: None, 1171 aio_supported: None, 1172 boot_id_list, 1173 timestamp, 1174 pending_activations: Arc::new(Mutex::new(Vec::default())), 1175 acpi_platform_addresses: AcpiPlatformAddresses::default(), 1176 snapshot, 1177 rate_limit_groups, 1178 mmio_regions: Arc::new(Mutex::new(Vec::new())), 1179 }; 1180 1181 let device_manager = Arc::new(Mutex::new(device_manager)); 1182 1183 address_manager 1184 .mmio_bus 1185 .insert( 1186 Arc::clone(&device_manager) as Arc<Mutex<dyn BusDevice>>, 1187 acpi_address.0, 1188 DEVICE_MANAGER_ACPI_SIZE as u64, 1189 ) 1190 .map_err(DeviceManagerError::BusError)?; 1191 1192 Ok(device_manager) 1193 } 1194 1195 pub fn console_resize_pipe(&self) -> Option<Arc<File>> { 1196 self.console_resize_pipe.clone() 1197 } 1198 1199 pub fn create_devices( 1200 &mut self, 1201 console_info: Option<ConsoleInfo>, 1202 console_resize_pipe: Option<File>, 1203 original_termios_opt: Arc<Mutex<Option<termios>>>, 1204 ) -> DeviceManagerResult<()> { 1205 trace_scoped!("create_devices"); 1206 1207 let mut virtio_devices: Vec<MetaVirtioDevice> = Vec::new(); 1208 1209 let interrupt_controller = self.add_interrupt_controller()?; 1210 1211 self.cpu_manager 1212 .lock() 1213 .unwrap() 1214 .set_interrupt_controller(interrupt_controller.clone()); 1215 1216 // Now we can create the legacy interrupt manager, which needs the freshly 1217 // formed IOAPIC device. 1218 let legacy_interrupt_manager: Arc< 1219 dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>, 1220 > = Arc::new(LegacyUserspaceInterruptManager::new(Arc::clone( 1221 &interrupt_controller, 1222 ))); 1223 1224 { 1225 if let Some(acpi_address) = self.memory_manager.lock().unwrap().acpi_address() { 1226 self.address_manager 1227 .mmio_bus 1228 .insert( 1229 Arc::clone(&self.memory_manager) as Arc<Mutex<dyn BusDevice>>, 1230 acpi_address.0, 1231 MEMORY_MANAGER_ACPI_SIZE as u64, 1232 ) 1233 .map_err(DeviceManagerError::BusError)?; 1234 } 1235 } 1236 1237 #[cfg(target_arch = "x86_64")] 1238 self.add_legacy_devices( 1239 self.reset_evt 1240 .try_clone() 1241 .map_err(DeviceManagerError::EventFd)?, 1242 )?; 1243 1244 #[cfg(target_arch = "aarch64")] 1245 self.add_legacy_devices(&legacy_interrupt_manager)?; 1246 1247 { 1248 self.ged_notification_device = self.add_acpi_devices( 1249 &legacy_interrupt_manager, 1250 self.reset_evt 1251 .try_clone() 1252 .map_err(DeviceManagerError::EventFd)?, 1253 self.exit_evt 1254 .try_clone() 1255 .map_err(DeviceManagerError::EventFd)?, 1256 )?; 1257 } 1258 1259 self.original_termios_opt = original_termios_opt; 1260 1261 self.console = self.add_console_devices( 1262 &legacy_interrupt_manager, 1263 &mut virtio_devices, 1264 console_info, 1265 console_resize_pipe, 1266 )?; 1267 1268 if let Some(tpm) = self.config.clone().lock().unwrap().tpm.as_ref() { 1269 let tpm_dev = self.add_tpm_device(tpm.socket.clone())?; 1270 self.bus_devices 1271 .push(Arc::clone(&tpm_dev) as Arc<Mutex<dyn BusDevice>>) 1272 } 1273 self.legacy_interrupt_manager = Some(legacy_interrupt_manager); 1274 1275 virtio_devices.append(&mut self.make_virtio_devices()?); 1276 1277 self.add_pci_devices(virtio_devices.clone())?; 1278 1279 self.virtio_devices = virtio_devices; 1280 1281 if self.config.clone().lock().unwrap().pvpanic { 1282 self.pvpanic_device = self.add_pvpanic_device()?; 1283 } 1284 1285 Ok(()) 1286 } 1287 1288 fn state(&self) -> DeviceManagerState { 1289 DeviceManagerState { 1290 device_tree: self.device_tree.lock().unwrap().clone(), 1291 device_id_cnt: self.device_id_cnt, 1292 } 1293 } 1294 1295 fn get_msi_iova_space(&mut self) -> (u64, u64) { 1296 #[cfg(target_arch = "aarch64")] 1297 { 1298 let vcpus = self.config.lock().unwrap().cpus.boot_vcpus; 1299 let vgic_config = gic::Gic::create_default_config(vcpus.into()); 1300 ( 1301 vgic_config.msi_addr, 1302 vgic_config.msi_addr + vgic_config.msi_size - 1, 1303 ) 1304 } 1305 #[cfg(target_arch = "x86_64")] 1306 (0xfee0_0000, 0xfeef_ffff) 1307 } 1308 1309 #[cfg(target_arch = "aarch64")] 1310 /// Gets the information of the devices registered up to some point in time. 1311 pub fn get_device_info(&self) -> &HashMap<(DeviceType, String), MmioDeviceInfo> { 1312 &self.id_to_dev_info 1313 } 1314 1315 #[allow(unused_variables)] 1316 fn add_pci_devices( 1317 &mut self, 1318 virtio_devices: Vec<MetaVirtioDevice>, 1319 ) -> DeviceManagerResult<()> { 1320 let iommu_id = String::from(IOMMU_DEVICE_NAME); 1321 1322 let iommu_device = if self.config.lock().unwrap().iommu { 1323 let (device, mapping) = virtio_devices::Iommu::new( 1324 iommu_id.clone(), 1325 self.seccomp_action.clone(), 1326 self.exit_evt 1327 .try_clone() 1328 .map_err(DeviceManagerError::EventFd)?, 1329 self.get_msi_iova_space(), 1330 state_from_id(self.snapshot.as_ref(), iommu_id.as_str()) 1331 .map_err(DeviceManagerError::RestoreGetState)?, 1332 ) 1333 .map_err(DeviceManagerError::CreateVirtioIommu)?; 1334 let device = Arc::new(Mutex::new(device)); 1335 self.iommu_device = Some(Arc::clone(&device)); 1336 self.iommu_mapping = Some(mapping); 1337 1338 // Fill the device tree with a new node. In case of restore, we 1339 // know there is nothing to do, so we can simply override the 1340 // existing entry. 1341 self.device_tree 1342 .lock() 1343 .unwrap() 1344 .insert(iommu_id.clone(), device_node!(iommu_id, device)); 1345 1346 Some(device) 1347 } else { 1348 None 1349 }; 1350 1351 let mut iommu_attached_devices = Vec::new(); 1352 { 1353 for handle in virtio_devices { 1354 let mapping: Option<Arc<IommuMapping>> = if handle.iommu { 1355 self.iommu_mapping.clone() 1356 } else { 1357 None 1358 }; 1359 1360 let dev_id = self.add_virtio_pci_device( 1361 handle.virtio_device, 1362 &mapping, 1363 handle.id, 1364 handle.pci_segment, 1365 handle.dma_handler, 1366 )?; 1367 1368 if handle.iommu { 1369 iommu_attached_devices.push(dev_id); 1370 } 1371 } 1372 1373 let mut vfio_iommu_device_ids = self.add_vfio_devices()?; 1374 iommu_attached_devices.append(&mut vfio_iommu_device_ids); 1375 1376 let mut vfio_user_iommu_device_ids = self.add_user_devices()?; 1377 iommu_attached_devices.append(&mut vfio_user_iommu_device_ids); 1378 1379 // Add all devices from forced iommu segments 1380 if let Some(platform_config) = self.config.lock().unwrap().platform.as_ref() { 1381 if let Some(iommu_segments) = platform_config.iommu_segments.as_ref() { 1382 for segment in iommu_segments { 1383 for device in 0..32 { 1384 let bdf = PciBdf::new(*segment, 0, device, 0); 1385 if !iommu_attached_devices.contains(&bdf) { 1386 iommu_attached_devices.push(bdf); 1387 } 1388 } 1389 } 1390 } 1391 } 1392 1393 if let Some(iommu_device) = iommu_device { 1394 let dev_id = self.add_virtio_pci_device(iommu_device, &None, iommu_id, 0, None)?; 1395 self.iommu_attached_devices = Some((dev_id, iommu_attached_devices)); 1396 } 1397 } 1398 1399 for segment in &self.pci_segments { 1400 #[cfg(target_arch = "x86_64")] 1401 if let Some(pci_config_io) = segment.pci_config_io.as_ref() { 1402 self.bus_devices 1403 .push(Arc::clone(pci_config_io) as Arc<Mutex<dyn BusDevice>>); 1404 } 1405 1406 self.bus_devices 1407 .push(Arc::clone(&segment.pci_config_mmio) as Arc<Mutex<dyn BusDevice>>); 1408 } 1409 1410 Ok(()) 1411 } 1412 1413 #[cfg(target_arch = "aarch64")] 1414 fn add_interrupt_controller( 1415 &mut self, 1416 ) -> DeviceManagerResult<Arc<Mutex<dyn InterruptController>>> { 1417 let interrupt_controller: Arc<Mutex<gic::Gic>> = Arc::new(Mutex::new( 1418 gic::Gic::new( 1419 self.config.lock().unwrap().cpus.boot_vcpus, 1420 Arc::clone(&self.msi_interrupt_manager), 1421 self.address_manager.vm.clone(), 1422 ) 1423 .map_err(DeviceManagerError::CreateInterruptController)?, 1424 )); 1425 1426 self.interrupt_controller = Some(interrupt_controller.clone()); 1427 1428 // Restore the vGic if this is in the process of restoration 1429 let id = String::from(gic::GIC_SNAPSHOT_ID); 1430 if let Some(vgic_snapshot) = snapshot_from_id(self.snapshot.as_ref(), &id) { 1431 // PMU support is optional. Nothing should be impacted if the PMU initialization failed. 1432 if self 1433 .cpu_manager 1434 .lock() 1435 .unwrap() 1436 .init_pmu(arch::aarch64::fdt::AARCH64_PMU_IRQ + 16) 1437 .is_err() 1438 { 1439 info!("Failed to initialize PMU"); 1440 } 1441 1442 let vgic_state = vgic_snapshot 1443 .to_state() 1444 .map_err(DeviceManagerError::RestoreGetState)?; 1445 let saved_vcpu_states = self.cpu_manager.lock().unwrap().get_saved_states(); 1446 interrupt_controller 1447 .lock() 1448 .unwrap() 1449 .restore_vgic(vgic_state, &saved_vcpu_states) 1450 .unwrap(); 1451 } 1452 1453 self.device_tree 1454 .lock() 1455 .unwrap() 1456 .insert(id.clone(), device_node!(id, interrupt_controller)); 1457 1458 Ok(interrupt_controller) 1459 } 1460 1461 #[cfg(target_arch = "aarch64")] 1462 pub fn get_interrupt_controller(&mut self) -> Option<&Arc<Mutex<gic::Gic>>> { 1463 self.interrupt_controller.as_ref() 1464 } 1465 1466 #[cfg(target_arch = "x86_64")] 1467 fn add_interrupt_controller( 1468 &mut self, 1469 ) -> DeviceManagerResult<Arc<Mutex<dyn InterruptController>>> { 1470 let id = String::from(IOAPIC_DEVICE_NAME); 1471 1472 // Create IOAPIC 1473 let interrupt_controller = Arc::new(Mutex::new( 1474 ioapic::Ioapic::new( 1475 id.clone(), 1476 APIC_START, 1477 Arc::clone(&self.msi_interrupt_manager), 1478 state_from_id(self.snapshot.as_ref(), id.as_str()) 1479 .map_err(DeviceManagerError::RestoreGetState)?, 1480 ) 1481 .map_err(DeviceManagerError::CreateInterruptController)?, 1482 )); 1483 1484 self.interrupt_controller = Some(interrupt_controller.clone()); 1485 1486 self.address_manager 1487 .mmio_bus 1488 .insert(interrupt_controller.clone(), IOAPIC_START.0, IOAPIC_SIZE) 1489 .map_err(DeviceManagerError::BusError)?; 1490 1491 self.bus_devices 1492 .push(Arc::clone(&interrupt_controller) as Arc<Mutex<dyn BusDevice>>); 1493 1494 // Fill the device tree with a new node. In case of restore, we 1495 // know there is nothing to do, so we can simply override the 1496 // existing entry. 1497 self.device_tree 1498 .lock() 1499 .unwrap() 1500 .insert(id.clone(), device_node!(id, interrupt_controller)); 1501 1502 Ok(interrupt_controller) 1503 } 1504 1505 fn add_acpi_devices( 1506 &mut self, 1507 interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>, 1508 reset_evt: EventFd, 1509 exit_evt: EventFd, 1510 ) -> DeviceManagerResult<Option<Arc<Mutex<devices::AcpiGedDevice>>>> { 1511 let vcpus_kill_signalled = self 1512 .cpu_manager 1513 .lock() 1514 .unwrap() 1515 .vcpus_kill_signalled() 1516 .clone(); 1517 let shutdown_device = Arc::new(Mutex::new(devices::AcpiShutdownDevice::new( 1518 exit_evt, 1519 reset_evt, 1520 vcpus_kill_signalled, 1521 ))); 1522 1523 self.bus_devices 1524 .push(Arc::clone(&shutdown_device) as Arc<Mutex<dyn BusDevice>>); 1525 1526 #[cfg(target_arch = "x86_64")] 1527 { 1528 let shutdown_pio_address: u16 = 0x600; 1529 1530 self.address_manager 1531 .allocator 1532 .lock() 1533 .unwrap() 1534 .allocate_io_addresses(Some(GuestAddress(shutdown_pio_address.into())), 0x8, None) 1535 .ok_or(DeviceManagerError::AllocateIoPort)?; 1536 1537 self.address_manager 1538 .io_bus 1539 .insert(shutdown_device, shutdown_pio_address.into(), 0x4) 1540 .map_err(DeviceManagerError::BusError)?; 1541 1542 self.acpi_platform_addresses.sleep_control_reg_address = 1543 Some(GenericAddress::io_port_address::<u8>(shutdown_pio_address)); 1544 self.acpi_platform_addresses.sleep_status_reg_address = 1545 Some(GenericAddress::io_port_address::<u8>(shutdown_pio_address)); 1546 self.acpi_platform_addresses.reset_reg_address = 1547 Some(GenericAddress::io_port_address::<u8>(shutdown_pio_address)); 1548 } 1549 1550 let ged_irq = self 1551 .address_manager 1552 .allocator 1553 .lock() 1554 .unwrap() 1555 .allocate_irq() 1556 .unwrap(); 1557 let interrupt_group = interrupt_manager 1558 .create_group(LegacyIrqGroupConfig { 1559 irq: ged_irq as InterruptIndex, 1560 }) 1561 .map_err(DeviceManagerError::CreateInterruptGroup)?; 1562 let ged_address = self 1563 .address_manager 1564 .allocator 1565 .lock() 1566 .unwrap() 1567 .allocate_platform_mmio_addresses( 1568 None, 1569 devices::acpi::GED_DEVICE_ACPI_SIZE as u64, 1570 None, 1571 ) 1572 .ok_or(DeviceManagerError::AllocateMmioAddress)?; 1573 let ged_device = Arc::new(Mutex::new(devices::AcpiGedDevice::new( 1574 interrupt_group, 1575 ged_irq, 1576 ged_address, 1577 ))); 1578 self.address_manager 1579 .mmio_bus 1580 .insert( 1581 ged_device.clone(), 1582 ged_address.0, 1583 devices::acpi::GED_DEVICE_ACPI_SIZE as u64, 1584 ) 1585 .map_err(DeviceManagerError::BusError)?; 1586 self.bus_devices 1587 .push(Arc::clone(&ged_device) as Arc<Mutex<dyn BusDevice>>); 1588 1589 let pm_timer_device = Arc::new(Mutex::new(devices::AcpiPmTimerDevice::new())); 1590 1591 self.bus_devices 1592 .push(Arc::clone(&pm_timer_device) as Arc<Mutex<dyn BusDevice>>); 1593 1594 #[cfg(target_arch = "x86_64")] 1595 { 1596 let pm_timer_pio_address: u16 = 0x608; 1597 1598 self.address_manager 1599 .allocator 1600 .lock() 1601 .unwrap() 1602 .allocate_io_addresses(Some(GuestAddress(pm_timer_pio_address.into())), 0x4, None) 1603 .ok_or(DeviceManagerError::AllocateIoPort)?; 1604 1605 self.address_manager 1606 .io_bus 1607 .insert(pm_timer_device, pm_timer_pio_address.into(), 0x4) 1608 .map_err(DeviceManagerError::BusError)?; 1609 1610 self.acpi_platform_addresses.pm_timer_address = 1611 Some(GenericAddress::io_port_address::<u32>(pm_timer_pio_address)); 1612 } 1613 1614 Ok(Some(ged_device)) 1615 } 1616 1617 #[cfg(target_arch = "x86_64")] 1618 fn add_legacy_devices(&mut self, reset_evt: EventFd) -> DeviceManagerResult<()> { 1619 let vcpus_kill_signalled = self 1620 .cpu_manager 1621 .lock() 1622 .unwrap() 1623 .vcpus_kill_signalled() 1624 .clone(); 1625 // Add a shutdown device (i8042) 1626 let i8042 = Arc::new(Mutex::new(devices::legacy::I8042Device::new( 1627 reset_evt.try_clone().unwrap(), 1628 vcpus_kill_signalled.clone(), 1629 ))); 1630 1631 self.bus_devices 1632 .push(Arc::clone(&i8042) as Arc<Mutex<dyn BusDevice>>); 1633 1634 self.address_manager 1635 .io_bus 1636 .insert(i8042, 0x61, 0x4) 1637 .map_err(DeviceManagerError::BusError)?; 1638 { 1639 // Add a CMOS emulated device 1640 let mem_size = self 1641 .memory_manager 1642 .lock() 1643 .unwrap() 1644 .guest_memory() 1645 .memory() 1646 .last_addr() 1647 .0 1648 + 1; 1649 let mem_below_4g = std::cmp::min(arch::layout::MEM_32BIT_RESERVED_START.0, mem_size); 1650 let mem_above_4g = mem_size.saturating_sub(arch::layout::RAM_64BIT_START.0); 1651 1652 let cmos = Arc::new(Mutex::new(devices::legacy::Cmos::new( 1653 mem_below_4g, 1654 mem_above_4g, 1655 reset_evt, 1656 Some(vcpus_kill_signalled), 1657 ))); 1658 1659 self.bus_devices 1660 .push(Arc::clone(&cmos) as Arc<Mutex<dyn BusDevice>>); 1661 1662 self.address_manager 1663 .io_bus 1664 .insert(cmos, 0x70, 0x2) 1665 .map_err(DeviceManagerError::BusError)?; 1666 1667 let fwdebug = Arc::new(Mutex::new(devices::legacy::FwDebugDevice::new())); 1668 1669 self.bus_devices 1670 .push(Arc::clone(&fwdebug) as Arc<Mutex<dyn BusDevice>>); 1671 1672 self.address_manager 1673 .io_bus 1674 .insert(fwdebug, 0x402, 0x1) 1675 .map_err(DeviceManagerError::BusError)?; 1676 } 1677 1678 // 0x80 debug port 1679 let debug_port = Arc::new(Mutex::new(devices::legacy::DebugPort::new(self.timestamp))); 1680 self.bus_devices 1681 .push(Arc::clone(&debug_port) as Arc<Mutex<dyn BusDevice>>); 1682 self.address_manager 1683 .io_bus 1684 .insert(debug_port, 0x80, 0x1) 1685 .map_err(DeviceManagerError::BusError)?; 1686 1687 Ok(()) 1688 } 1689 1690 #[cfg(target_arch = "aarch64")] 1691 fn add_legacy_devices( 1692 &mut self, 1693 interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>, 1694 ) -> DeviceManagerResult<()> { 1695 // Add a RTC device 1696 let rtc_irq = self 1697 .address_manager 1698 .allocator 1699 .lock() 1700 .unwrap() 1701 .allocate_irq() 1702 .unwrap(); 1703 1704 let interrupt_group = interrupt_manager 1705 .create_group(LegacyIrqGroupConfig { 1706 irq: rtc_irq as InterruptIndex, 1707 }) 1708 .map_err(DeviceManagerError::CreateInterruptGroup)?; 1709 1710 let rtc_device = Arc::new(Mutex::new(devices::legacy::Rtc::new(interrupt_group))); 1711 1712 self.bus_devices 1713 .push(Arc::clone(&rtc_device) as Arc<Mutex<dyn BusDevice>>); 1714 1715 let addr = arch::layout::LEGACY_RTC_MAPPED_IO_START; 1716 1717 self.address_manager 1718 .mmio_bus 1719 .insert(rtc_device, addr.0, MMIO_LEN) 1720 .map_err(DeviceManagerError::BusError)?; 1721 1722 self.id_to_dev_info.insert( 1723 (DeviceType::Rtc, "rtc".to_string()), 1724 MmioDeviceInfo { 1725 addr: addr.0, 1726 len: MMIO_LEN, 1727 irq: rtc_irq, 1728 }, 1729 ); 1730 1731 // Add a GPIO device 1732 let id = String::from(GPIO_DEVICE_NAME); 1733 let gpio_irq = self 1734 .address_manager 1735 .allocator 1736 .lock() 1737 .unwrap() 1738 .allocate_irq() 1739 .unwrap(); 1740 1741 let interrupt_group = interrupt_manager 1742 .create_group(LegacyIrqGroupConfig { 1743 irq: gpio_irq as InterruptIndex, 1744 }) 1745 .map_err(DeviceManagerError::CreateInterruptGroup)?; 1746 1747 let gpio_device = Arc::new(Mutex::new(devices::legacy::Gpio::new( 1748 id.clone(), 1749 interrupt_group, 1750 state_from_id(self.snapshot.as_ref(), id.as_str()) 1751 .map_err(DeviceManagerError::RestoreGetState)?, 1752 ))); 1753 1754 self.bus_devices 1755 .push(Arc::clone(&gpio_device) as Arc<Mutex<dyn BusDevice>>); 1756 1757 let addr = arch::layout::LEGACY_GPIO_MAPPED_IO_START; 1758 1759 self.address_manager 1760 .mmio_bus 1761 .insert(gpio_device.clone(), addr.0, MMIO_LEN) 1762 .map_err(DeviceManagerError::BusError)?; 1763 1764 self.gpio_device = Some(gpio_device.clone()); 1765 1766 self.id_to_dev_info.insert( 1767 (DeviceType::Gpio, "gpio".to_string()), 1768 MmioDeviceInfo { 1769 addr: addr.0, 1770 len: MMIO_LEN, 1771 irq: gpio_irq, 1772 }, 1773 ); 1774 1775 self.device_tree 1776 .lock() 1777 .unwrap() 1778 .insert(id.clone(), device_node!(id, gpio_device)); 1779 1780 Ok(()) 1781 } 1782 1783 #[cfg(target_arch = "x86_64")] 1784 fn add_debug_console_device( 1785 &mut self, 1786 debug_console_writer: Box<dyn io::Write + Send>, 1787 ) -> DeviceManagerResult<Arc<Mutex<DebugConsole>>> { 1788 let id = String::from(DEBUGCON_DEVICE_NAME); 1789 let debug_console = Arc::new(Mutex::new(DebugConsole::new( 1790 id.clone(), 1791 debug_console_writer, 1792 ))); 1793 1794 let port = self 1795 .config 1796 .lock() 1797 .unwrap() 1798 .debug_console 1799 .clone() 1800 .iobase 1801 .map(|port| port as u64) 1802 .unwrap_or(debug_console::DEFAULT_PORT); 1803 1804 self.bus_devices 1805 .push(Arc::clone(&debug_console) as Arc<Mutex<dyn BusDevice>>); 1806 1807 self.address_manager 1808 .allocator 1809 .lock() 1810 .unwrap() 1811 .allocate_io_addresses(Some(GuestAddress(port)), 0x1, None) 1812 .ok_or(DeviceManagerError::AllocateIoPort)?; 1813 1814 self.address_manager 1815 .io_bus 1816 .insert(debug_console.clone(), port, 0x1) 1817 .map_err(DeviceManagerError::BusError)?; 1818 1819 // Fill the device tree with a new node. In case of restore, we 1820 // know there is nothing to do, so we can simply override the 1821 // existing entry. 1822 self.device_tree 1823 .lock() 1824 .unwrap() 1825 .insert(id.clone(), device_node!(id, debug_console)); 1826 1827 Ok(debug_console) 1828 } 1829 1830 #[cfg(target_arch = "x86_64")] 1831 fn add_serial_device( 1832 &mut self, 1833 interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>, 1834 serial_writer: Option<Box<dyn io::Write + Send>>, 1835 ) -> DeviceManagerResult<Arc<Mutex<Serial>>> { 1836 // Serial is tied to IRQ #4 1837 let serial_irq = 4; 1838 1839 let id = String::from(SERIAL_DEVICE_NAME); 1840 1841 let interrupt_group = interrupt_manager 1842 .create_group(LegacyIrqGroupConfig { 1843 irq: serial_irq as InterruptIndex, 1844 }) 1845 .map_err(DeviceManagerError::CreateInterruptGroup)?; 1846 1847 let serial = Arc::new(Mutex::new(Serial::new( 1848 id.clone(), 1849 interrupt_group, 1850 serial_writer, 1851 state_from_id(self.snapshot.as_ref(), id.as_str()) 1852 .map_err(DeviceManagerError::RestoreGetState)?, 1853 ))); 1854 1855 self.bus_devices 1856 .push(Arc::clone(&serial) as Arc<Mutex<dyn BusDevice>>); 1857 1858 self.address_manager 1859 .allocator 1860 .lock() 1861 .unwrap() 1862 .allocate_io_addresses(Some(GuestAddress(0x3f8)), 0x8, None) 1863 .ok_or(DeviceManagerError::AllocateIoPort)?; 1864 1865 self.address_manager 1866 .io_bus 1867 .insert(serial.clone(), 0x3f8, 0x8) 1868 .map_err(DeviceManagerError::BusError)?; 1869 1870 // Fill the device tree with a new node. In case of restore, we 1871 // know there is nothing to do, so we can simply override the 1872 // existing entry. 1873 self.device_tree 1874 .lock() 1875 .unwrap() 1876 .insert(id.clone(), device_node!(id, serial)); 1877 1878 Ok(serial) 1879 } 1880 1881 #[cfg(target_arch = "aarch64")] 1882 fn add_serial_device( 1883 &mut self, 1884 interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>, 1885 serial_writer: Option<Box<dyn io::Write + Send>>, 1886 ) -> DeviceManagerResult<Arc<Mutex<Pl011>>> { 1887 let id = String::from(SERIAL_DEVICE_NAME); 1888 1889 let serial_irq = self 1890 .address_manager 1891 .allocator 1892 .lock() 1893 .unwrap() 1894 .allocate_irq() 1895 .unwrap(); 1896 1897 let interrupt_group = interrupt_manager 1898 .create_group(LegacyIrqGroupConfig { 1899 irq: serial_irq as InterruptIndex, 1900 }) 1901 .map_err(DeviceManagerError::CreateInterruptGroup)?; 1902 1903 let serial = Arc::new(Mutex::new(devices::legacy::Pl011::new( 1904 id.clone(), 1905 interrupt_group, 1906 serial_writer, 1907 self.timestamp, 1908 state_from_id(self.snapshot.as_ref(), id.as_str()) 1909 .map_err(DeviceManagerError::RestoreGetState)?, 1910 ))); 1911 1912 self.bus_devices 1913 .push(Arc::clone(&serial) as Arc<Mutex<dyn BusDevice>>); 1914 1915 let addr = arch::layout::LEGACY_SERIAL_MAPPED_IO_START; 1916 1917 self.address_manager 1918 .mmio_bus 1919 .insert(serial.clone(), addr.0, MMIO_LEN) 1920 .map_err(DeviceManagerError::BusError)?; 1921 1922 self.id_to_dev_info.insert( 1923 (DeviceType::Serial, DeviceType::Serial.to_string()), 1924 MmioDeviceInfo { 1925 addr: addr.0, 1926 len: MMIO_LEN, 1927 irq: serial_irq, 1928 }, 1929 ); 1930 1931 self.cmdline_additions 1932 .push(format!("earlycon=pl011,mmio,0x{:08x}", addr.0)); 1933 1934 // Fill the device tree with a new node. In case of restore, we 1935 // know there is nothing to do, so we can simply override the 1936 // existing entry. 1937 self.device_tree 1938 .lock() 1939 .unwrap() 1940 .insert(id.clone(), device_node!(id, serial)); 1941 1942 Ok(serial) 1943 } 1944 1945 fn add_virtio_console_device( 1946 &mut self, 1947 virtio_devices: &mut Vec<MetaVirtioDevice>, 1948 console_fd: Option<RawFd>, 1949 resize_pipe: Option<File>, 1950 ) -> DeviceManagerResult<Option<Arc<virtio_devices::ConsoleResizer>>> { 1951 let console_config = self.config.lock().unwrap().console.clone(); 1952 let endpoint = match console_config.mode { 1953 ConsoleOutputMode::File => { 1954 if let Some(file_fd) = console_fd { 1955 // SAFETY: file_fd is guaranteed to be a valid fd from 1956 // pre_create_console_devices() in vmm/src/console_devices.rs 1957 Endpoint::File(unsafe { File::from_raw_fd(file_fd) }) 1958 } else { 1959 return Err(DeviceManagerError::InvalidConsoleFd); 1960 } 1961 } 1962 ConsoleOutputMode::Pty => { 1963 if let Some(pty_fd) = console_fd { 1964 // SAFETY: pty_fd is guaranteed to be a valid fd from 1965 // pre_create_console_devices() in vmm/src/console_devices.rs 1966 let file = unsafe { File::from_raw_fd(pty_fd) }; 1967 self.console_resize_pipe = resize_pipe.map(Arc::new); 1968 Endpoint::PtyPair(file.try_clone().unwrap(), file) 1969 } else { 1970 return Err(DeviceManagerError::InvalidConsoleFd); 1971 } 1972 } 1973 ConsoleOutputMode::Tty => { 1974 if let Some(tty_fd) = console_fd { 1975 // SAFETY: tty_fd is guaranteed to be a valid fd from 1976 // pre_create_console_devices() in vmm/src/console_devices.rs 1977 let stdout = unsafe { File::from_raw_fd(tty_fd) }; 1978 // If an interactive TTY then we can accept input 1979 // SAFETY: FFI call. Trivially safe. 1980 if unsafe { libc::isatty(libc::STDIN_FILENO) == 1 } { 1981 // SAFETY: FFI call to dup. Trivially safe. 1982 let stdin = unsafe { libc::dup(libc::STDIN_FILENO) }; 1983 if stdin == -1 { 1984 return vmm_sys_util::errno::errno_result() 1985 .map_err(DeviceManagerError::DupFd); 1986 } 1987 // SAFETY: stdin is valid and owned solely by us. 1988 let stdin = unsafe { File::from_raw_fd(stdin) }; 1989 Endpoint::FilePair(stdout, stdin) 1990 } else { 1991 Endpoint::File(stdout) 1992 } 1993 } else { 1994 return Err(DeviceManagerError::InvalidConsoleFd); 1995 } 1996 } 1997 ConsoleOutputMode::Socket => { 1998 return Err(DeviceManagerError::NoSocketOptionSupportForConsoleDevice); 1999 } 2000 ConsoleOutputMode::Null => Endpoint::Null, 2001 ConsoleOutputMode::Off => return Ok(None), 2002 }; 2003 let id = String::from(CONSOLE_DEVICE_NAME); 2004 2005 let (virtio_console_device, console_resizer) = virtio_devices::Console::new( 2006 id.clone(), 2007 endpoint, 2008 self.console_resize_pipe 2009 .as_ref() 2010 .map(|p| p.try_clone().unwrap()), 2011 self.force_iommu | console_config.iommu, 2012 self.seccomp_action.clone(), 2013 self.exit_evt 2014 .try_clone() 2015 .map_err(DeviceManagerError::EventFd)?, 2016 state_from_id(self.snapshot.as_ref(), id.as_str()) 2017 .map_err(DeviceManagerError::RestoreGetState)?, 2018 ) 2019 .map_err(DeviceManagerError::CreateVirtioConsole)?; 2020 let virtio_console_device = Arc::new(Mutex::new(virtio_console_device)); 2021 virtio_devices.push(MetaVirtioDevice { 2022 virtio_device: Arc::clone(&virtio_console_device) 2023 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2024 iommu: console_config.iommu, 2025 id: id.clone(), 2026 pci_segment: 0, 2027 dma_handler: None, 2028 }); 2029 2030 // Fill the device tree with a new node. In case of restore, we 2031 // know there is nothing to do, so we can simply override the 2032 // existing entry. 2033 self.device_tree 2034 .lock() 2035 .unwrap() 2036 .insert(id.clone(), device_node!(id, virtio_console_device)); 2037 2038 // Only provide a resizer (for SIGWINCH handling) if the console is attached to the TTY 2039 Ok(if matches!(console_config.mode, ConsoleOutputMode::Tty) { 2040 Some(console_resizer) 2041 } else { 2042 None 2043 }) 2044 } 2045 2046 /// Adds all devices that behave like a console with respect to the VM 2047 /// configuration. This includes: 2048 /// - debug-console 2049 /// - serial-console 2050 /// - virtio-console 2051 fn add_console_devices( 2052 &mut self, 2053 interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>, 2054 virtio_devices: &mut Vec<MetaVirtioDevice>, 2055 console_info: Option<ConsoleInfo>, 2056 console_resize_pipe: Option<File>, 2057 ) -> DeviceManagerResult<Arc<Console>> { 2058 let serial_config = self.config.lock().unwrap().serial.clone(); 2059 if console_info.is_none() { 2060 return Err(DeviceManagerError::InvalidConsoleInfo); 2061 } 2062 2063 // SAFETY: console_info is Some, so it's safe to unwrap. 2064 let console_info = console_info.unwrap(); 2065 let serial_writer: Option<Box<dyn io::Write + Send>> = match serial_config.mode { 2066 ConsoleOutputMode::File | ConsoleOutputMode::Tty => { 2067 if console_info.serial_main_fd.is_none() { 2068 return Err(DeviceManagerError::InvalidConsoleInfo); 2069 } 2070 // SAFETY: serial_main_fd is Some, so it's safe to unwrap. 2071 // SAFETY: serial_main_fd is guaranteed to be a valid fd from 2072 // pre_create_console_devices() in vmm/src/console_devices.rs 2073 Some(Box::new(unsafe { 2074 File::from_raw_fd(console_info.serial_main_fd.unwrap()) 2075 })) 2076 } 2077 ConsoleOutputMode::Off 2078 | ConsoleOutputMode::Null 2079 | ConsoleOutputMode::Pty 2080 | ConsoleOutputMode::Socket => None, 2081 }; 2082 if serial_config.mode != ConsoleOutputMode::Off { 2083 let serial = self.add_serial_device(interrupt_manager, serial_writer)?; 2084 self.serial_manager = match serial_config.mode { 2085 ConsoleOutputMode::Pty | ConsoleOutputMode::Tty | ConsoleOutputMode::Socket => { 2086 let serial_manager = SerialManager::new( 2087 serial, 2088 console_info.serial_main_fd, 2089 serial_config.mode, 2090 serial_config.socket, 2091 ) 2092 .map_err(DeviceManagerError::CreateSerialManager)?; 2093 if let Some(mut serial_manager) = serial_manager { 2094 serial_manager 2095 .start_thread( 2096 self.exit_evt 2097 .try_clone() 2098 .map_err(DeviceManagerError::EventFd)?, 2099 ) 2100 .map_err(DeviceManagerError::SpawnSerialManager)?; 2101 Some(Arc::new(serial_manager)) 2102 } else { 2103 None 2104 } 2105 } 2106 _ => None, 2107 }; 2108 } 2109 2110 #[cfg(target_arch = "x86_64")] 2111 { 2112 let debug_console_config = self.config.lock().unwrap().debug_console.clone(); 2113 let debug_console_writer: Option<Box<dyn io::Write + Send>> = 2114 match debug_console_config.mode { 2115 ConsoleOutputMode::File | ConsoleOutputMode::Tty => { 2116 if console_info.debug_main_fd.is_none() { 2117 return Err(DeviceManagerError::InvalidConsoleInfo); 2118 } 2119 // SAFETY: debug_main_fd is Some, so it's safe to unwrap. 2120 // SAFETY: debug_main_fd is guaranteed to be a valid fd from 2121 // pre_create_console_devices() in vmm/src/console_devices.rs 2122 Some(Box::new(unsafe { 2123 File::from_raw_fd(console_info.debug_main_fd.unwrap()) 2124 })) 2125 } 2126 ConsoleOutputMode::Off 2127 | ConsoleOutputMode::Null 2128 | ConsoleOutputMode::Pty 2129 | ConsoleOutputMode::Socket => None, 2130 }; 2131 if let Some(writer) = debug_console_writer { 2132 let _ = self.add_debug_console_device(writer)?; 2133 } 2134 } 2135 2136 let console_resizer = self.add_virtio_console_device( 2137 virtio_devices, 2138 console_info.console_main_fd, 2139 console_resize_pipe, 2140 )?; 2141 2142 Ok(Arc::new(Console { console_resizer })) 2143 } 2144 2145 fn add_tpm_device( 2146 &mut self, 2147 tpm_path: PathBuf, 2148 ) -> DeviceManagerResult<Arc<Mutex<devices::tpm::Tpm>>> { 2149 // Create TPM Device 2150 let tpm = devices::tpm::Tpm::new(tpm_path.to_str().unwrap().to_string()).map_err(|e| { 2151 DeviceManagerError::CreateTpmDevice(anyhow!("Failed to create TPM Device : {:?}", e)) 2152 })?; 2153 let tpm = Arc::new(Mutex::new(tpm)); 2154 2155 // Add TPM Device to mmio 2156 self.address_manager 2157 .mmio_bus 2158 .insert( 2159 tpm.clone(), 2160 arch::layout::TPM_START.0, 2161 arch::layout::TPM_SIZE, 2162 ) 2163 .map_err(DeviceManagerError::BusError)?; 2164 2165 Ok(tpm) 2166 } 2167 2168 fn make_virtio_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2169 let mut devices: Vec<MetaVirtioDevice> = Vec::new(); 2170 2171 // Create "standard" virtio devices (net/block/rng) 2172 devices.append(&mut self.make_virtio_block_devices()?); 2173 devices.append(&mut self.make_virtio_net_devices()?); 2174 devices.append(&mut self.make_virtio_rng_devices()?); 2175 2176 // Add virtio-fs if required 2177 devices.append(&mut self.make_virtio_fs_devices()?); 2178 2179 // Add virtio-pmem if required 2180 devices.append(&mut self.make_virtio_pmem_devices()?); 2181 2182 // Add virtio-vsock if required 2183 devices.append(&mut self.make_virtio_vsock_devices()?); 2184 2185 devices.append(&mut self.make_virtio_mem_devices()?); 2186 2187 // Add virtio-balloon if required 2188 devices.append(&mut self.make_virtio_balloon_devices()?); 2189 2190 // Add virtio-watchdog device 2191 devices.append(&mut self.make_virtio_watchdog_devices()?); 2192 2193 // Add vDPA devices if required 2194 devices.append(&mut self.make_vdpa_devices()?); 2195 2196 Ok(devices) 2197 } 2198 2199 // Cache whether aio is supported to avoid checking for very block device 2200 fn aio_is_supported(&mut self) -> bool { 2201 if let Some(supported) = self.aio_supported { 2202 return supported; 2203 } 2204 2205 let supported = block_aio_is_supported(); 2206 self.aio_supported = Some(supported); 2207 supported 2208 } 2209 2210 // Cache whether io_uring is supported to avoid probing for very block device 2211 fn io_uring_is_supported(&mut self) -> bool { 2212 if let Some(supported) = self.io_uring_supported { 2213 return supported; 2214 } 2215 2216 let supported = block_io_uring_is_supported(); 2217 self.io_uring_supported = Some(supported); 2218 supported 2219 } 2220 2221 fn make_virtio_block_device( 2222 &mut self, 2223 disk_cfg: &mut DiskConfig, 2224 ) -> DeviceManagerResult<MetaVirtioDevice> { 2225 let id = if let Some(id) = &disk_cfg.id { 2226 id.clone() 2227 } else { 2228 let id = self.next_device_name(DISK_DEVICE_NAME_PREFIX)?; 2229 disk_cfg.id = Some(id.clone()); 2230 id 2231 }; 2232 2233 info!("Creating virtio-block device: {:?}", disk_cfg); 2234 2235 let (virtio_device, migratable_device) = if disk_cfg.vhost_user { 2236 let socket = disk_cfg.vhost_socket.as_ref().unwrap().clone(); 2237 let vu_cfg = VhostUserConfig { 2238 socket, 2239 num_queues: disk_cfg.num_queues, 2240 queue_size: disk_cfg.queue_size, 2241 }; 2242 let vhost_user_block = Arc::new(Mutex::new( 2243 match virtio_devices::vhost_user::Blk::new( 2244 id.clone(), 2245 vu_cfg, 2246 self.seccomp_action.clone(), 2247 self.exit_evt 2248 .try_clone() 2249 .map_err(DeviceManagerError::EventFd)?, 2250 self.force_iommu, 2251 state_from_id(self.snapshot.as_ref(), id.as_str()) 2252 .map_err(DeviceManagerError::RestoreGetState)?, 2253 ) { 2254 Ok(vub_device) => vub_device, 2255 Err(e) => { 2256 return Err(DeviceManagerError::CreateVhostUserBlk(e)); 2257 } 2258 }, 2259 )); 2260 2261 ( 2262 Arc::clone(&vhost_user_block) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2263 vhost_user_block as Arc<Mutex<dyn Migratable>>, 2264 ) 2265 } else { 2266 let mut options = OpenOptions::new(); 2267 options.read(true); 2268 options.write(!disk_cfg.readonly); 2269 if disk_cfg.direct { 2270 options.custom_flags(libc::O_DIRECT); 2271 } 2272 // Open block device path 2273 let mut file: File = options 2274 .open( 2275 disk_cfg 2276 .path 2277 .as_ref() 2278 .ok_or(DeviceManagerError::NoDiskPath)? 2279 .clone(), 2280 ) 2281 .map_err(DeviceManagerError::Disk)?; 2282 let image_type = 2283 detect_image_type(&mut file).map_err(DeviceManagerError::DetectImageType)?; 2284 2285 let image = match image_type { 2286 ImageType::FixedVhd => { 2287 // Use asynchronous backend relying on io_uring if the 2288 // syscalls are supported. 2289 if cfg!(feature = "io_uring") 2290 && !disk_cfg.disable_io_uring 2291 && self.io_uring_is_supported() 2292 { 2293 info!("Using asynchronous fixed VHD disk file (io_uring)"); 2294 2295 #[cfg(not(feature = "io_uring"))] 2296 unreachable!("Checked in if statement above"); 2297 #[cfg(feature = "io_uring")] 2298 { 2299 Box::new( 2300 FixedVhdDiskAsync::new(file) 2301 .map_err(DeviceManagerError::CreateFixedVhdDiskAsync)?, 2302 ) as Box<dyn DiskFile> 2303 } 2304 } else { 2305 info!("Using synchronous fixed VHD disk file"); 2306 Box::new( 2307 FixedVhdDiskSync::new(file) 2308 .map_err(DeviceManagerError::CreateFixedVhdDiskSync)?, 2309 ) as Box<dyn DiskFile> 2310 } 2311 } 2312 ImageType::Raw => { 2313 // Use asynchronous backend relying on io_uring if the 2314 // syscalls are supported. 2315 if cfg!(feature = "io_uring") 2316 && !disk_cfg.disable_io_uring 2317 && self.io_uring_is_supported() 2318 { 2319 info!("Using asynchronous RAW disk file (io_uring)"); 2320 2321 #[cfg(not(feature = "io_uring"))] 2322 unreachable!("Checked in if statement above"); 2323 #[cfg(feature = "io_uring")] 2324 { 2325 Box::new(RawFileDisk::new(file)) as Box<dyn DiskFile> 2326 } 2327 } else if !disk_cfg.disable_aio && self.aio_is_supported() { 2328 info!("Using asynchronous RAW disk file (aio)"); 2329 Box::new(RawFileDiskAio::new(file)) as Box<dyn DiskFile> 2330 } else { 2331 info!("Using synchronous RAW disk file"); 2332 Box::new(RawFileDiskSync::new(file)) as Box<dyn DiskFile> 2333 } 2334 } 2335 ImageType::Qcow2 => { 2336 info!("Using synchronous QCOW disk file"); 2337 Box::new( 2338 QcowDiskSync::new(file, disk_cfg.direct) 2339 .map_err(DeviceManagerError::CreateQcowDiskSync)?, 2340 ) as Box<dyn DiskFile> 2341 } 2342 ImageType::Vhdx => { 2343 info!("Using synchronous VHDX disk file"); 2344 Box::new( 2345 VhdxDiskSync::new(file) 2346 .map_err(DeviceManagerError::CreateFixedVhdxDiskSync)?, 2347 ) as Box<dyn DiskFile> 2348 } 2349 }; 2350 2351 let rate_limit_group = 2352 if let Some(rate_limiter_cfg) = disk_cfg.rate_limiter_config.as_ref() { 2353 // Create an anonymous RateLimiterGroup that is dropped when the Disk 2354 // is dropped. 2355 let bw = rate_limiter_cfg.bandwidth.unwrap_or_default(); 2356 let ops = rate_limiter_cfg.ops.unwrap_or_default(); 2357 let mut rate_limit_group = RateLimiterGroup::new( 2358 disk_cfg.id.as_ref().unwrap(), 2359 bw.size, 2360 bw.one_time_burst.unwrap_or(0), 2361 bw.refill_time, 2362 ops.size, 2363 ops.one_time_burst.unwrap_or(0), 2364 ops.refill_time, 2365 ) 2366 .map_err(DeviceManagerError::RateLimiterGroupCreate)?; 2367 2368 rate_limit_group 2369 .start_thread( 2370 self.exit_evt 2371 .try_clone() 2372 .map_err(DeviceManagerError::EventFd)?, 2373 ) 2374 .unwrap(); 2375 2376 Some(Arc::new(rate_limit_group)) 2377 } else if let Some(rate_limit_group) = disk_cfg.rate_limit_group.as_ref() { 2378 self.rate_limit_groups.get(rate_limit_group).cloned() 2379 } else { 2380 None 2381 }; 2382 2383 let queue_affinity = if let Some(queue_affinity) = disk_cfg.queue_affinity.as_ref() { 2384 queue_affinity 2385 .iter() 2386 .map(|a| (a.queue_index, a.host_cpus.clone())) 2387 .collect() 2388 } else { 2389 BTreeMap::new() 2390 }; 2391 2392 let virtio_block = Arc::new(Mutex::new( 2393 virtio_devices::Block::new( 2394 id.clone(), 2395 image, 2396 disk_cfg 2397 .path 2398 .as_ref() 2399 .ok_or(DeviceManagerError::NoDiskPath)? 2400 .clone(), 2401 disk_cfg.readonly, 2402 self.force_iommu | disk_cfg.iommu, 2403 disk_cfg.num_queues, 2404 disk_cfg.queue_size, 2405 disk_cfg.serial.clone(), 2406 self.seccomp_action.clone(), 2407 rate_limit_group, 2408 self.exit_evt 2409 .try_clone() 2410 .map_err(DeviceManagerError::EventFd)?, 2411 state_from_id(self.snapshot.as_ref(), id.as_str()) 2412 .map_err(DeviceManagerError::RestoreGetState)?, 2413 queue_affinity, 2414 ) 2415 .map_err(DeviceManagerError::CreateVirtioBlock)?, 2416 )); 2417 2418 ( 2419 Arc::clone(&virtio_block) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2420 virtio_block as Arc<Mutex<dyn Migratable>>, 2421 ) 2422 }; 2423 2424 // Fill the device tree with a new node. In case of restore, we 2425 // know there is nothing to do, so we can simply override the 2426 // existing entry. 2427 self.device_tree 2428 .lock() 2429 .unwrap() 2430 .insert(id.clone(), device_node!(id, migratable_device)); 2431 2432 Ok(MetaVirtioDevice { 2433 virtio_device, 2434 iommu: disk_cfg.iommu, 2435 id, 2436 pci_segment: disk_cfg.pci_segment, 2437 dma_handler: None, 2438 }) 2439 } 2440 2441 fn make_virtio_block_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2442 let mut devices = Vec::new(); 2443 2444 let mut block_devices = self.config.lock().unwrap().disks.clone(); 2445 if let Some(disk_list_cfg) = &mut block_devices { 2446 for disk_cfg in disk_list_cfg.iter_mut() { 2447 devices.push(self.make_virtio_block_device(disk_cfg)?); 2448 } 2449 } 2450 self.config.lock().unwrap().disks = block_devices; 2451 2452 Ok(devices) 2453 } 2454 2455 fn make_virtio_net_device( 2456 &mut self, 2457 net_cfg: &mut NetConfig, 2458 ) -> DeviceManagerResult<MetaVirtioDevice> { 2459 let id = if let Some(id) = &net_cfg.id { 2460 id.clone() 2461 } else { 2462 let id = self.next_device_name(NET_DEVICE_NAME_PREFIX)?; 2463 net_cfg.id = Some(id.clone()); 2464 id 2465 }; 2466 info!("Creating virtio-net device: {:?}", net_cfg); 2467 2468 let (virtio_device, migratable_device) = if net_cfg.vhost_user { 2469 let socket = net_cfg.vhost_socket.as_ref().unwrap().clone(); 2470 let vu_cfg = VhostUserConfig { 2471 socket, 2472 num_queues: net_cfg.num_queues, 2473 queue_size: net_cfg.queue_size, 2474 }; 2475 let server = match net_cfg.vhost_mode { 2476 VhostMode::Client => false, 2477 VhostMode::Server => true, 2478 }; 2479 let vhost_user_net = Arc::new(Mutex::new( 2480 match virtio_devices::vhost_user::Net::new( 2481 id.clone(), 2482 net_cfg.mac, 2483 net_cfg.mtu, 2484 vu_cfg, 2485 server, 2486 self.seccomp_action.clone(), 2487 self.exit_evt 2488 .try_clone() 2489 .map_err(DeviceManagerError::EventFd)?, 2490 self.force_iommu, 2491 state_from_id(self.snapshot.as_ref(), id.as_str()) 2492 .map_err(DeviceManagerError::RestoreGetState)?, 2493 net_cfg.offload_tso, 2494 net_cfg.offload_ufo, 2495 net_cfg.offload_csum, 2496 ) { 2497 Ok(vun_device) => vun_device, 2498 Err(e) => { 2499 return Err(DeviceManagerError::CreateVhostUserNet(e)); 2500 } 2501 }, 2502 )); 2503 2504 ( 2505 Arc::clone(&vhost_user_net) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2506 vhost_user_net as Arc<Mutex<dyn Migratable>>, 2507 ) 2508 } else { 2509 let state = state_from_id(self.snapshot.as_ref(), id.as_str()) 2510 .map_err(DeviceManagerError::RestoreGetState)?; 2511 let virtio_net = if let Some(ref tap_if_name) = net_cfg.tap { 2512 Arc::new(Mutex::new( 2513 virtio_devices::Net::new( 2514 id.clone(), 2515 Some(tap_if_name), 2516 Some(net_cfg.ip), 2517 Some(net_cfg.mask), 2518 Some(net_cfg.mac), 2519 &mut net_cfg.host_mac, 2520 net_cfg.mtu, 2521 self.force_iommu | net_cfg.iommu, 2522 net_cfg.num_queues, 2523 net_cfg.queue_size, 2524 self.seccomp_action.clone(), 2525 net_cfg.rate_limiter_config, 2526 self.exit_evt 2527 .try_clone() 2528 .map_err(DeviceManagerError::EventFd)?, 2529 state, 2530 net_cfg.offload_tso, 2531 net_cfg.offload_ufo, 2532 net_cfg.offload_csum, 2533 ) 2534 .map_err(DeviceManagerError::CreateVirtioNet)?, 2535 )) 2536 } else if let Some(fds) = &net_cfg.fds { 2537 let net = virtio_devices::Net::from_tap_fds( 2538 id.clone(), 2539 fds, 2540 Some(net_cfg.mac), 2541 net_cfg.mtu, 2542 self.force_iommu | net_cfg.iommu, 2543 net_cfg.queue_size, 2544 self.seccomp_action.clone(), 2545 net_cfg.rate_limiter_config, 2546 self.exit_evt 2547 .try_clone() 2548 .map_err(DeviceManagerError::EventFd)?, 2549 state, 2550 net_cfg.offload_tso, 2551 net_cfg.offload_ufo, 2552 net_cfg.offload_csum, 2553 ) 2554 .map_err(DeviceManagerError::CreateVirtioNet)?; 2555 2556 // SAFETY: 'fds' are valid because TAP devices are created successfully 2557 unsafe { 2558 self.config.lock().unwrap().add_preserved_fds(fds.clone()); 2559 } 2560 2561 Arc::new(Mutex::new(net)) 2562 } else { 2563 Arc::new(Mutex::new( 2564 virtio_devices::Net::new( 2565 id.clone(), 2566 None, 2567 Some(net_cfg.ip), 2568 Some(net_cfg.mask), 2569 Some(net_cfg.mac), 2570 &mut net_cfg.host_mac, 2571 net_cfg.mtu, 2572 self.force_iommu | net_cfg.iommu, 2573 net_cfg.num_queues, 2574 net_cfg.queue_size, 2575 self.seccomp_action.clone(), 2576 net_cfg.rate_limiter_config, 2577 self.exit_evt 2578 .try_clone() 2579 .map_err(DeviceManagerError::EventFd)?, 2580 state, 2581 net_cfg.offload_tso, 2582 net_cfg.offload_ufo, 2583 net_cfg.offload_csum, 2584 ) 2585 .map_err(DeviceManagerError::CreateVirtioNet)?, 2586 )) 2587 }; 2588 2589 ( 2590 Arc::clone(&virtio_net) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2591 virtio_net as Arc<Mutex<dyn Migratable>>, 2592 ) 2593 }; 2594 2595 // Fill the device tree with a new node. In case of restore, we 2596 // know there is nothing to do, so we can simply override the 2597 // existing entry. 2598 self.device_tree 2599 .lock() 2600 .unwrap() 2601 .insert(id.clone(), device_node!(id, migratable_device)); 2602 2603 Ok(MetaVirtioDevice { 2604 virtio_device, 2605 iommu: net_cfg.iommu, 2606 id, 2607 pci_segment: net_cfg.pci_segment, 2608 dma_handler: None, 2609 }) 2610 } 2611 2612 /// Add virto-net and vhost-user-net devices 2613 fn make_virtio_net_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2614 let mut devices = Vec::new(); 2615 let mut net_devices = self.config.lock().unwrap().net.clone(); 2616 if let Some(net_list_cfg) = &mut net_devices { 2617 for net_cfg in net_list_cfg.iter_mut() { 2618 devices.push(self.make_virtio_net_device(net_cfg)?); 2619 } 2620 } 2621 self.config.lock().unwrap().net = net_devices; 2622 2623 Ok(devices) 2624 } 2625 2626 fn make_virtio_rng_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2627 let mut devices = Vec::new(); 2628 2629 // Add virtio-rng if required 2630 let rng_config = self.config.lock().unwrap().rng.clone(); 2631 if let Some(rng_path) = rng_config.src.to_str() { 2632 info!("Creating virtio-rng device: {:?}", rng_config); 2633 let id = String::from(RNG_DEVICE_NAME); 2634 2635 let virtio_rng_device = Arc::new(Mutex::new( 2636 virtio_devices::Rng::new( 2637 id.clone(), 2638 rng_path, 2639 self.force_iommu | rng_config.iommu, 2640 self.seccomp_action.clone(), 2641 self.exit_evt 2642 .try_clone() 2643 .map_err(DeviceManagerError::EventFd)?, 2644 state_from_id(self.snapshot.as_ref(), id.as_str()) 2645 .map_err(DeviceManagerError::RestoreGetState)?, 2646 ) 2647 .map_err(DeviceManagerError::CreateVirtioRng)?, 2648 )); 2649 devices.push(MetaVirtioDevice { 2650 virtio_device: Arc::clone(&virtio_rng_device) 2651 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2652 iommu: rng_config.iommu, 2653 id: id.clone(), 2654 pci_segment: 0, 2655 dma_handler: None, 2656 }); 2657 2658 // Fill the device tree with a new node. In case of restore, we 2659 // know there is nothing to do, so we can simply override the 2660 // existing entry. 2661 self.device_tree 2662 .lock() 2663 .unwrap() 2664 .insert(id.clone(), device_node!(id, virtio_rng_device)); 2665 } 2666 2667 Ok(devices) 2668 } 2669 2670 fn make_virtio_fs_device( 2671 &mut self, 2672 fs_cfg: &mut FsConfig, 2673 ) -> DeviceManagerResult<MetaVirtioDevice> { 2674 let id = if let Some(id) = &fs_cfg.id { 2675 id.clone() 2676 } else { 2677 let id = self.next_device_name(FS_DEVICE_NAME_PREFIX)?; 2678 fs_cfg.id = Some(id.clone()); 2679 id 2680 }; 2681 2682 info!("Creating virtio-fs device: {:?}", fs_cfg); 2683 2684 let mut node = device_node!(id); 2685 2686 if let Some(fs_socket) = fs_cfg.socket.to_str() { 2687 let virtio_fs_device = Arc::new(Mutex::new( 2688 virtio_devices::vhost_user::Fs::new( 2689 id.clone(), 2690 fs_socket, 2691 &fs_cfg.tag, 2692 fs_cfg.num_queues, 2693 fs_cfg.queue_size, 2694 None, 2695 self.seccomp_action.clone(), 2696 self.exit_evt 2697 .try_clone() 2698 .map_err(DeviceManagerError::EventFd)?, 2699 self.force_iommu, 2700 state_from_id(self.snapshot.as_ref(), id.as_str()) 2701 .map_err(DeviceManagerError::RestoreGetState)?, 2702 ) 2703 .map_err(DeviceManagerError::CreateVirtioFs)?, 2704 )); 2705 2706 // Update the device tree with the migratable device. 2707 node.migratable = Some(Arc::clone(&virtio_fs_device) as Arc<Mutex<dyn Migratable>>); 2708 self.device_tree.lock().unwrap().insert(id.clone(), node); 2709 2710 Ok(MetaVirtioDevice { 2711 virtio_device: Arc::clone(&virtio_fs_device) 2712 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2713 iommu: false, 2714 id, 2715 pci_segment: fs_cfg.pci_segment, 2716 dma_handler: None, 2717 }) 2718 } else { 2719 Err(DeviceManagerError::NoVirtioFsSock) 2720 } 2721 } 2722 2723 fn make_virtio_fs_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2724 let mut devices = Vec::new(); 2725 2726 let mut fs_devices = self.config.lock().unwrap().fs.clone(); 2727 if let Some(fs_list_cfg) = &mut fs_devices { 2728 for fs_cfg in fs_list_cfg.iter_mut() { 2729 devices.push(self.make_virtio_fs_device(fs_cfg)?); 2730 } 2731 } 2732 self.config.lock().unwrap().fs = fs_devices; 2733 2734 Ok(devices) 2735 } 2736 2737 fn make_virtio_pmem_device( 2738 &mut self, 2739 pmem_cfg: &mut PmemConfig, 2740 ) -> DeviceManagerResult<MetaVirtioDevice> { 2741 let id = if let Some(id) = &pmem_cfg.id { 2742 id.clone() 2743 } else { 2744 let id = self.next_device_name(PMEM_DEVICE_NAME_PREFIX)?; 2745 pmem_cfg.id = Some(id.clone()); 2746 id 2747 }; 2748 2749 info!("Creating virtio-pmem device: {:?}", pmem_cfg); 2750 2751 let mut node = device_node!(id); 2752 2753 // Look for the id in the device tree. If it can be found, that means 2754 // the device is being restored, otherwise it's created from scratch. 2755 let region_range = if let Some(node) = self.device_tree.lock().unwrap().get(&id) { 2756 info!("Restoring virtio-pmem {} resources", id); 2757 2758 let mut region_range: Option<(u64, u64)> = None; 2759 for resource in node.resources.iter() { 2760 match resource { 2761 Resource::MmioAddressRange { base, size } => { 2762 if region_range.is_some() { 2763 return Err(DeviceManagerError::ResourceAlreadyExists); 2764 } 2765 2766 region_range = Some((*base, *size)); 2767 } 2768 _ => { 2769 error!("Unexpected resource {:?} for {}", resource, id); 2770 } 2771 } 2772 } 2773 2774 if region_range.is_none() { 2775 return Err(DeviceManagerError::MissingVirtioPmemResources); 2776 } 2777 2778 region_range 2779 } else { 2780 None 2781 }; 2782 2783 let (custom_flags, set_len) = if pmem_cfg.file.is_dir() { 2784 if pmem_cfg.size.is_none() { 2785 return Err(DeviceManagerError::PmemWithDirectorySizeMissing); 2786 } 2787 (O_TMPFILE, true) 2788 } else { 2789 (0, false) 2790 }; 2791 2792 let mut file = OpenOptions::new() 2793 .read(true) 2794 .write(!pmem_cfg.discard_writes) 2795 .custom_flags(custom_flags) 2796 .open(&pmem_cfg.file) 2797 .map_err(DeviceManagerError::PmemFileOpen)?; 2798 2799 let size = if let Some(size) = pmem_cfg.size { 2800 if set_len { 2801 file.set_len(size) 2802 .map_err(DeviceManagerError::PmemFileSetLen)?; 2803 } 2804 size 2805 } else { 2806 file.seek(SeekFrom::End(0)) 2807 .map_err(DeviceManagerError::PmemFileSetLen)? 2808 }; 2809 2810 if size % 0x20_0000 != 0 { 2811 return Err(DeviceManagerError::PmemSizeNotAligned); 2812 } 2813 2814 let (region_base, region_size) = if let Some((base, size)) = region_range { 2815 // The memory needs to be 2MiB aligned in order to support 2816 // hugepages. 2817 self.pci_segments[pmem_cfg.pci_segment as usize] 2818 .mem64_allocator 2819 .lock() 2820 .unwrap() 2821 .allocate( 2822 Some(GuestAddress(base)), 2823 size as GuestUsize, 2824 Some(0x0020_0000), 2825 ) 2826 .ok_or(DeviceManagerError::PmemRangeAllocation)?; 2827 2828 (base, size) 2829 } else { 2830 // The memory needs to be 2MiB aligned in order to support 2831 // hugepages. 2832 let base = self.pci_segments[pmem_cfg.pci_segment as usize] 2833 .mem64_allocator 2834 .lock() 2835 .unwrap() 2836 .allocate(None, size as GuestUsize, Some(0x0020_0000)) 2837 .ok_or(DeviceManagerError::PmemRangeAllocation)?; 2838 2839 (base.raw_value(), size) 2840 }; 2841 2842 let cloned_file = file.try_clone().map_err(DeviceManagerError::CloneFile)?; 2843 let mmap_region = MmapRegion::build( 2844 Some(FileOffset::new(cloned_file, 0)), 2845 region_size as usize, 2846 PROT_READ | PROT_WRITE, 2847 MAP_NORESERVE 2848 | if pmem_cfg.discard_writes { 2849 MAP_PRIVATE 2850 } else { 2851 MAP_SHARED 2852 }, 2853 ) 2854 .map_err(DeviceManagerError::NewMmapRegion)?; 2855 let host_addr: u64 = mmap_region.as_ptr() as u64; 2856 2857 let mem_slot = self 2858 .memory_manager 2859 .lock() 2860 .unwrap() 2861 .create_userspace_mapping(region_base, region_size, host_addr, false, false, false) 2862 .map_err(DeviceManagerError::MemoryManager)?; 2863 2864 let mapping = virtio_devices::UserspaceMapping { 2865 host_addr, 2866 mem_slot, 2867 addr: GuestAddress(region_base), 2868 len: region_size, 2869 mergeable: false, 2870 }; 2871 2872 let virtio_pmem_device = Arc::new(Mutex::new( 2873 virtio_devices::Pmem::new( 2874 id.clone(), 2875 file, 2876 GuestAddress(region_base), 2877 mapping, 2878 mmap_region, 2879 self.force_iommu | pmem_cfg.iommu, 2880 self.seccomp_action.clone(), 2881 self.exit_evt 2882 .try_clone() 2883 .map_err(DeviceManagerError::EventFd)?, 2884 state_from_id(self.snapshot.as_ref(), id.as_str()) 2885 .map_err(DeviceManagerError::RestoreGetState)?, 2886 ) 2887 .map_err(DeviceManagerError::CreateVirtioPmem)?, 2888 )); 2889 2890 // Update the device tree with correct resource information and with 2891 // the migratable device. 2892 node.resources.push(Resource::MmioAddressRange { 2893 base: region_base, 2894 size: region_size, 2895 }); 2896 node.migratable = Some(Arc::clone(&virtio_pmem_device) as Arc<Mutex<dyn Migratable>>); 2897 self.device_tree.lock().unwrap().insert(id.clone(), node); 2898 2899 Ok(MetaVirtioDevice { 2900 virtio_device: Arc::clone(&virtio_pmem_device) 2901 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2902 iommu: pmem_cfg.iommu, 2903 id, 2904 pci_segment: pmem_cfg.pci_segment, 2905 dma_handler: None, 2906 }) 2907 } 2908 2909 fn make_virtio_pmem_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2910 let mut devices = Vec::new(); 2911 // Add virtio-pmem if required 2912 let mut pmem_devices = self.config.lock().unwrap().pmem.clone(); 2913 if let Some(pmem_list_cfg) = &mut pmem_devices { 2914 for pmem_cfg in pmem_list_cfg.iter_mut() { 2915 devices.push(self.make_virtio_pmem_device(pmem_cfg)?); 2916 } 2917 } 2918 self.config.lock().unwrap().pmem = pmem_devices; 2919 2920 Ok(devices) 2921 } 2922 2923 fn make_virtio_vsock_device( 2924 &mut self, 2925 vsock_cfg: &mut VsockConfig, 2926 ) -> DeviceManagerResult<MetaVirtioDevice> { 2927 let id = if let Some(id) = &vsock_cfg.id { 2928 id.clone() 2929 } else { 2930 let id = self.next_device_name(VSOCK_DEVICE_NAME_PREFIX)?; 2931 vsock_cfg.id = Some(id.clone()); 2932 id 2933 }; 2934 2935 info!("Creating virtio-vsock device: {:?}", vsock_cfg); 2936 2937 let socket_path = vsock_cfg 2938 .socket 2939 .to_str() 2940 .ok_or(DeviceManagerError::CreateVsockConvertPath)?; 2941 let backend = 2942 virtio_devices::vsock::VsockUnixBackend::new(vsock_cfg.cid, socket_path.to_string()) 2943 .map_err(DeviceManagerError::CreateVsockBackend)?; 2944 2945 let vsock_device = Arc::new(Mutex::new( 2946 virtio_devices::Vsock::new( 2947 id.clone(), 2948 vsock_cfg.cid, 2949 vsock_cfg.socket.clone(), 2950 backend, 2951 self.force_iommu | vsock_cfg.iommu, 2952 self.seccomp_action.clone(), 2953 self.exit_evt 2954 .try_clone() 2955 .map_err(DeviceManagerError::EventFd)?, 2956 state_from_id(self.snapshot.as_ref(), id.as_str()) 2957 .map_err(DeviceManagerError::RestoreGetState)?, 2958 ) 2959 .map_err(DeviceManagerError::CreateVirtioVsock)?, 2960 )); 2961 2962 // Fill the device tree with a new node. In case of restore, we 2963 // know there is nothing to do, so we can simply override the 2964 // existing entry. 2965 self.device_tree 2966 .lock() 2967 .unwrap() 2968 .insert(id.clone(), device_node!(id, vsock_device)); 2969 2970 Ok(MetaVirtioDevice { 2971 virtio_device: Arc::clone(&vsock_device) 2972 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2973 iommu: vsock_cfg.iommu, 2974 id, 2975 pci_segment: vsock_cfg.pci_segment, 2976 dma_handler: None, 2977 }) 2978 } 2979 2980 fn make_virtio_vsock_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2981 let mut devices = Vec::new(); 2982 2983 let mut vsock = self.config.lock().unwrap().vsock.clone(); 2984 if let Some(ref mut vsock_cfg) = &mut vsock { 2985 devices.push(self.make_virtio_vsock_device(vsock_cfg)?); 2986 } 2987 self.config.lock().unwrap().vsock = vsock; 2988 2989 Ok(devices) 2990 } 2991 2992 fn make_virtio_mem_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2993 let mut devices = Vec::new(); 2994 2995 let mm = self.memory_manager.clone(); 2996 let mut mm = mm.lock().unwrap(); 2997 for (memory_zone_id, memory_zone) in mm.memory_zones_mut().iter_mut() { 2998 if let Some(virtio_mem_zone) = memory_zone.virtio_mem_zone_mut() { 2999 info!("Creating virtio-mem device: id = {}", memory_zone_id); 3000 3001 let node_id = numa_node_id_from_memory_zone_id(&self.numa_nodes, memory_zone_id) 3002 .map(|i| i as u16); 3003 3004 let virtio_mem_device = Arc::new(Mutex::new( 3005 virtio_devices::Mem::new( 3006 memory_zone_id.clone(), 3007 virtio_mem_zone.region(), 3008 self.seccomp_action.clone(), 3009 node_id, 3010 virtio_mem_zone.hotplugged_size(), 3011 virtio_mem_zone.hugepages(), 3012 self.exit_evt 3013 .try_clone() 3014 .map_err(DeviceManagerError::EventFd)?, 3015 virtio_mem_zone.blocks_state().clone(), 3016 state_from_id(self.snapshot.as_ref(), memory_zone_id.as_str()) 3017 .map_err(DeviceManagerError::RestoreGetState)?, 3018 ) 3019 .map_err(DeviceManagerError::CreateVirtioMem)?, 3020 )); 3021 3022 // Update the virtio-mem zone so that it has a handle onto the 3023 // virtio-mem device, which will be used for triggering a resize 3024 // if needed. 3025 virtio_mem_zone.set_virtio_device(Arc::clone(&virtio_mem_device)); 3026 3027 self.virtio_mem_devices.push(Arc::clone(&virtio_mem_device)); 3028 3029 devices.push(MetaVirtioDevice { 3030 virtio_device: Arc::clone(&virtio_mem_device) 3031 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 3032 iommu: false, 3033 id: memory_zone_id.clone(), 3034 pci_segment: 0, 3035 dma_handler: None, 3036 }); 3037 3038 // Fill the device tree with a new node. In case of restore, we 3039 // know there is nothing to do, so we can simply override the 3040 // existing entry. 3041 self.device_tree.lock().unwrap().insert( 3042 memory_zone_id.clone(), 3043 device_node!(memory_zone_id, virtio_mem_device), 3044 ); 3045 } 3046 } 3047 3048 Ok(devices) 3049 } 3050 3051 fn make_virtio_balloon_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 3052 let mut devices = Vec::new(); 3053 3054 if let Some(balloon_config) = &self.config.lock().unwrap().balloon { 3055 let id = String::from(BALLOON_DEVICE_NAME); 3056 info!("Creating virtio-balloon device: id = {}", id); 3057 3058 let virtio_balloon_device = Arc::new(Mutex::new( 3059 virtio_devices::Balloon::new( 3060 id.clone(), 3061 balloon_config.size, 3062 balloon_config.deflate_on_oom, 3063 balloon_config.free_page_reporting, 3064 self.seccomp_action.clone(), 3065 self.exit_evt 3066 .try_clone() 3067 .map_err(DeviceManagerError::EventFd)?, 3068 state_from_id(self.snapshot.as_ref(), id.as_str()) 3069 .map_err(DeviceManagerError::RestoreGetState)?, 3070 ) 3071 .map_err(DeviceManagerError::CreateVirtioBalloon)?, 3072 )); 3073 3074 self.balloon = Some(virtio_balloon_device.clone()); 3075 3076 devices.push(MetaVirtioDevice { 3077 virtio_device: Arc::clone(&virtio_balloon_device) 3078 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 3079 iommu: false, 3080 id: id.clone(), 3081 pci_segment: 0, 3082 dma_handler: None, 3083 }); 3084 3085 self.device_tree 3086 .lock() 3087 .unwrap() 3088 .insert(id.clone(), device_node!(id, virtio_balloon_device)); 3089 } 3090 3091 Ok(devices) 3092 } 3093 3094 fn make_virtio_watchdog_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 3095 let mut devices = Vec::new(); 3096 3097 if !self.config.lock().unwrap().watchdog { 3098 return Ok(devices); 3099 } 3100 3101 let id = String::from(WATCHDOG_DEVICE_NAME); 3102 info!("Creating virtio-watchdog device: id = {}", id); 3103 3104 let virtio_watchdog_device = Arc::new(Mutex::new( 3105 virtio_devices::Watchdog::new( 3106 id.clone(), 3107 self.reset_evt.try_clone().unwrap(), 3108 self.seccomp_action.clone(), 3109 self.exit_evt 3110 .try_clone() 3111 .map_err(DeviceManagerError::EventFd)?, 3112 state_from_id(self.snapshot.as_ref(), id.as_str()) 3113 .map_err(DeviceManagerError::RestoreGetState)?, 3114 ) 3115 .map_err(DeviceManagerError::CreateVirtioWatchdog)?, 3116 )); 3117 devices.push(MetaVirtioDevice { 3118 virtio_device: Arc::clone(&virtio_watchdog_device) 3119 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 3120 iommu: false, 3121 id: id.clone(), 3122 pci_segment: 0, 3123 dma_handler: None, 3124 }); 3125 3126 self.device_tree 3127 .lock() 3128 .unwrap() 3129 .insert(id.clone(), device_node!(id, virtio_watchdog_device)); 3130 3131 Ok(devices) 3132 } 3133 3134 fn make_vdpa_device( 3135 &mut self, 3136 vdpa_cfg: &mut VdpaConfig, 3137 ) -> DeviceManagerResult<MetaVirtioDevice> { 3138 let id = if let Some(id) = &vdpa_cfg.id { 3139 id.clone() 3140 } else { 3141 let id = self.next_device_name(VDPA_DEVICE_NAME_PREFIX)?; 3142 vdpa_cfg.id = Some(id.clone()); 3143 id 3144 }; 3145 3146 info!("Creating vDPA device: {:?}", vdpa_cfg); 3147 3148 let device_path = vdpa_cfg 3149 .path 3150 .to_str() 3151 .ok_or(DeviceManagerError::CreateVdpaConvertPath)?; 3152 3153 let vdpa_device = Arc::new(Mutex::new( 3154 virtio_devices::Vdpa::new( 3155 id.clone(), 3156 device_path, 3157 self.memory_manager.lock().unwrap().guest_memory(), 3158 vdpa_cfg.num_queues as u16, 3159 state_from_id(self.snapshot.as_ref(), id.as_str()) 3160 .map_err(DeviceManagerError::RestoreGetState)?, 3161 ) 3162 .map_err(DeviceManagerError::CreateVdpa)?, 3163 )); 3164 3165 // Create the DMA handler that is required by the vDPA device 3166 let vdpa_mapping = Arc::new(VdpaDmaMapping::new( 3167 Arc::clone(&vdpa_device), 3168 Arc::new(self.memory_manager.lock().unwrap().guest_memory()), 3169 )); 3170 3171 self.device_tree 3172 .lock() 3173 .unwrap() 3174 .insert(id.clone(), device_node!(id, vdpa_device)); 3175 3176 Ok(MetaVirtioDevice { 3177 virtio_device: vdpa_device as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 3178 iommu: vdpa_cfg.iommu, 3179 id, 3180 pci_segment: vdpa_cfg.pci_segment, 3181 dma_handler: Some(vdpa_mapping), 3182 }) 3183 } 3184 3185 fn make_vdpa_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 3186 let mut devices = Vec::new(); 3187 // Add vdpa if required 3188 let mut vdpa_devices = self.config.lock().unwrap().vdpa.clone(); 3189 if let Some(vdpa_list_cfg) = &mut vdpa_devices { 3190 for vdpa_cfg in vdpa_list_cfg.iter_mut() { 3191 devices.push(self.make_vdpa_device(vdpa_cfg)?); 3192 } 3193 } 3194 self.config.lock().unwrap().vdpa = vdpa_devices; 3195 3196 Ok(devices) 3197 } 3198 3199 fn next_device_name(&mut self, prefix: &str) -> DeviceManagerResult<String> { 3200 let start_id = self.device_id_cnt; 3201 loop { 3202 // Generate the temporary name. 3203 let name = format!("{}{}", prefix, self.device_id_cnt); 3204 // Increment the counter. 3205 self.device_id_cnt += Wrapping(1); 3206 // Check if the name is already in use. 3207 if !self.boot_id_list.contains(&name) 3208 && !self.device_tree.lock().unwrap().contains_key(&name) 3209 { 3210 return Ok(name); 3211 } 3212 3213 if self.device_id_cnt == start_id { 3214 // We went through a full loop and there's nothing else we can 3215 // do. 3216 break; 3217 } 3218 } 3219 Err(DeviceManagerError::NoAvailableDeviceName) 3220 } 3221 3222 fn add_passthrough_device( 3223 &mut self, 3224 device_cfg: &mut DeviceConfig, 3225 ) -> DeviceManagerResult<(PciBdf, String)> { 3226 // If the passthrough device has not been created yet, it is created 3227 // here and stored in the DeviceManager structure for future needs. 3228 if self.passthrough_device.is_none() { 3229 self.passthrough_device = Some( 3230 self.address_manager 3231 .vm 3232 .create_passthrough_device() 3233 .map_err(|e| DeviceManagerError::CreatePassthroughDevice(e.into()))?, 3234 ); 3235 } 3236 3237 self.add_vfio_device(device_cfg) 3238 } 3239 3240 fn create_vfio_container(&self) -> DeviceManagerResult<Arc<VfioContainer>> { 3241 let passthrough_device = self 3242 .passthrough_device 3243 .as_ref() 3244 .ok_or(DeviceManagerError::NoDevicePassthroughSupport)?; 3245 3246 let dup = passthrough_device 3247 .try_clone() 3248 .map_err(DeviceManagerError::VfioCreate)?; 3249 3250 Ok(Arc::new( 3251 VfioContainer::new(Some(Arc::new(dup))).map_err(DeviceManagerError::VfioCreate)?, 3252 )) 3253 } 3254 3255 fn add_vfio_device( 3256 &mut self, 3257 device_cfg: &mut DeviceConfig, 3258 ) -> DeviceManagerResult<(PciBdf, String)> { 3259 let vfio_name = if let Some(id) = &device_cfg.id { 3260 id.clone() 3261 } else { 3262 let id = self.next_device_name(VFIO_DEVICE_NAME_PREFIX)?; 3263 device_cfg.id = Some(id.clone()); 3264 id 3265 }; 3266 3267 let (pci_segment_id, pci_device_bdf, resources) = 3268 self.pci_resources(&vfio_name, device_cfg.pci_segment)?; 3269 3270 let mut needs_dma_mapping = false; 3271 3272 // Here we create a new VFIO container for two reasons. Either this is 3273 // the first VFIO device, meaning we need a new VFIO container, which 3274 // will be shared with other VFIO devices. Or the new VFIO device is 3275 // attached to a vIOMMU, meaning we must create a dedicated VFIO 3276 // container. In the vIOMMU use case, we can't let all devices under 3277 // the same VFIO container since we couldn't map/unmap memory for each 3278 // device. That's simply because the map/unmap operations happen at the 3279 // VFIO container level. 3280 let vfio_container = if device_cfg.iommu { 3281 let vfio_container = self.create_vfio_container()?; 3282 3283 let vfio_mapping = Arc::new(VfioDmaMapping::new( 3284 Arc::clone(&vfio_container), 3285 Arc::new(self.memory_manager.lock().unwrap().guest_memory()), 3286 Arc::clone(&self.mmio_regions), 3287 )); 3288 3289 if let Some(iommu) = &self.iommu_device { 3290 iommu 3291 .lock() 3292 .unwrap() 3293 .add_external_mapping(pci_device_bdf.into(), vfio_mapping); 3294 } else { 3295 return Err(DeviceManagerError::MissingVirtualIommu); 3296 } 3297 3298 vfio_container 3299 } else if let Some(vfio_container) = &self.vfio_container { 3300 Arc::clone(vfio_container) 3301 } else { 3302 let vfio_container = self.create_vfio_container()?; 3303 needs_dma_mapping = true; 3304 self.vfio_container = Some(Arc::clone(&vfio_container)); 3305 3306 vfio_container 3307 }; 3308 3309 let vfio_device = VfioDevice::new(&device_cfg.path, Arc::clone(&vfio_container)) 3310 .map_err(DeviceManagerError::VfioCreate)?; 3311 3312 if needs_dma_mapping { 3313 // Register DMA mapping in IOMMU. 3314 // Do not register virtio-mem regions, as they are handled directly by 3315 // virtio-mem device itself. 3316 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() { 3317 for region in zone.regions() { 3318 vfio_container 3319 .vfio_dma_map( 3320 region.start_addr().raw_value(), 3321 region.len(), 3322 region.as_ptr() as u64, 3323 ) 3324 .map_err(DeviceManagerError::VfioDmaMap)?; 3325 } 3326 } 3327 3328 let vfio_mapping = Arc::new(VfioDmaMapping::new( 3329 Arc::clone(&vfio_container), 3330 Arc::new(self.memory_manager.lock().unwrap().guest_memory()), 3331 Arc::clone(&self.mmio_regions), 3332 )); 3333 3334 for virtio_mem_device in self.virtio_mem_devices.iter() { 3335 virtio_mem_device 3336 .lock() 3337 .unwrap() 3338 .add_dma_mapping_handler( 3339 VirtioMemMappingSource::Container, 3340 vfio_mapping.clone(), 3341 ) 3342 .map_err(DeviceManagerError::AddDmaMappingHandlerVirtioMem)?; 3343 } 3344 } 3345 3346 let legacy_interrupt_group = 3347 if let Some(legacy_interrupt_manager) = &self.legacy_interrupt_manager { 3348 Some( 3349 legacy_interrupt_manager 3350 .create_group(LegacyIrqGroupConfig { 3351 irq: self.pci_segments[pci_segment_id as usize].pci_irq_slots 3352 [pci_device_bdf.device() as usize] 3353 as InterruptIndex, 3354 }) 3355 .map_err(DeviceManagerError::CreateInterruptGroup)?, 3356 ) 3357 } else { 3358 None 3359 }; 3360 3361 let memory_manager = self.memory_manager.clone(); 3362 3363 let vfio_pci_device = VfioPciDevice::new( 3364 vfio_name.clone(), 3365 &self.address_manager.vm, 3366 vfio_device, 3367 vfio_container, 3368 self.msi_interrupt_manager.clone(), 3369 legacy_interrupt_group, 3370 device_cfg.iommu, 3371 pci_device_bdf, 3372 Arc::new(move || memory_manager.lock().unwrap().allocate_memory_slot()), 3373 vm_migration::snapshot_from_id(self.snapshot.as_ref(), vfio_name.as_str()), 3374 device_cfg.x_nv_gpudirect_clique, 3375 ) 3376 .map_err(DeviceManagerError::VfioPciCreate)?; 3377 3378 let vfio_pci_device = Arc::new(Mutex::new(vfio_pci_device)); 3379 3380 let new_resources = self.add_pci_device( 3381 vfio_pci_device.clone(), 3382 vfio_pci_device.clone(), 3383 pci_segment_id, 3384 pci_device_bdf, 3385 resources, 3386 )?; 3387 3388 vfio_pci_device 3389 .lock() 3390 .unwrap() 3391 .map_mmio_regions() 3392 .map_err(DeviceManagerError::VfioMapRegion)?; 3393 3394 for mmio_region in vfio_pci_device.lock().unwrap().mmio_regions() { 3395 self.mmio_regions.lock().unwrap().push(mmio_region); 3396 } 3397 3398 let mut node = device_node!(vfio_name, vfio_pci_device); 3399 3400 // Update the device tree with correct resource information. 3401 node.resources = new_resources; 3402 node.pci_bdf = Some(pci_device_bdf); 3403 node.pci_device_handle = Some(PciDeviceHandle::Vfio(vfio_pci_device)); 3404 3405 self.device_tree 3406 .lock() 3407 .unwrap() 3408 .insert(vfio_name.clone(), node); 3409 3410 Ok((pci_device_bdf, vfio_name)) 3411 } 3412 3413 fn add_pci_device( 3414 &mut self, 3415 bus_device: Arc<Mutex<dyn BusDevice>>, 3416 pci_device: Arc<Mutex<dyn PciDevice>>, 3417 segment_id: u16, 3418 bdf: PciBdf, 3419 resources: Option<Vec<Resource>>, 3420 ) -> DeviceManagerResult<Vec<Resource>> { 3421 let bars = pci_device 3422 .lock() 3423 .unwrap() 3424 .allocate_bars( 3425 &self.address_manager.allocator, 3426 &mut self.pci_segments[segment_id as usize] 3427 .mem32_allocator 3428 .lock() 3429 .unwrap(), 3430 &mut self.pci_segments[segment_id as usize] 3431 .mem64_allocator 3432 .lock() 3433 .unwrap(), 3434 resources, 3435 ) 3436 .map_err(DeviceManagerError::AllocateBars)?; 3437 3438 let mut pci_bus = self.pci_segments[segment_id as usize] 3439 .pci_bus 3440 .lock() 3441 .unwrap(); 3442 3443 pci_bus 3444 .add_device(bdf.device() as u32, pci_device) 3445 .map_err(DeviceManagerError::AddPciDevice)?; 3446 3447 self.bus_devices.push(Arc::clone(&bus_device)); 3448 3449 pci_bus 3450 .register_mapping( 3451 bus_device, 3452 #[cfg(target_arch = "x86_64")] 3453 self.address_manager.io_bus.as_ref(), 3454 self.address_manager.mmio_bus.as_ref(), 3455 bars.clone(), 3456 ) 3457 .map_err(DeviceManagerError::AddPciDevice)?; 3458 3459 let mut new_resources = Vec::new(); 3460 for bar in bars { 3461 new_resources.push(Resource::PciBar { 3462 index: bar.idx(), 3463 base: bar.addr(), 3464 size: bar.size(), 3465 type_: bar.region_type().into(), 3466 prefetchable: bar.prefetchable().into(), 3467 }); 3468 } 3469 3470 Ok(new_resources) 3471 } 3472 3473 fn add_vfio_devices(&mut self) -> DeviceManagerResult<Vec<PciBdf>> { 3474 let mut iommu_attached_device_ids = Vec::new(); 3475 let mut devices = self.config.lock().unwrap().devices.clone(); 3476 3477 if let Some(device_list_cfg) = &mut devices { 3478 for device_cfg in device_list_cfg.iter_mut() { 3479 let (device_id, _) = self.add_passthrough_device(device_cfg)?; 3480 if device_cfg.iommu && self.iommu_device.is_some() { 3481 iommu_attached_device_ids.push(device_id); 3482 } 3483 } 3484 } 3485 3486 // Update the list of devices 3487 self.config.lock().unwrap().devices = devices; 3488 3489 Ok(iommu_attached_device_ids) 3490 } 3491 3492 fn add_vfio_user_device( 3493 &mut self, 3494 device_cfg: &mut UserDeviceConfig, 3495 ) -> DeviceManagerResult<(PciBdf, String)> { 3496 let vfio_user_name = if let Some(id) = &device_cfg.id { 3497 id.clone() 3498 } else { 3499 let id = self.next_device_name(VFIO_USER_DEVICE_NAME_PREFIX)?; 3500 device_cfg.id = Some(id.clone()); 3501 id 3502 }; 3503 3504 let (pci_segment_id, pci_device_bdf, resources) = 3505 self.pci_resources(&vfio_user_name, device_cfg.pci_segment)?; 3506 3507 let legacy_interrupt_group = 3508 if let Some(legacy_interrupt_manager) = &self.legacy_interrupt_manager { 3509 Some( 3510 legacy_interrupt_manager 3511 .create_group(LegacyIrqGroupConfig { 3512 irq: self.pci_segments[pci_segment_id as usize].pci_irq_slots 3513 [pci_device_bdf.device() as usize] 3514 as InterruptIndex, 3515 }) 3516 .map_err(DeviceManagerError::CreateInterruptGroup)?, 3517 ) 3518 } else { 3519 None 3520 }; 3521 3522 let client = Arc::new(Mutex::new( 3523 vfio_user::Client::new(&device_cfg.socket) 3524 .map_err(DeviceManagerError::VfioUserCreateClient)?, 3525 )); 3526 3527 let memory_manager = self.memory_manager.clone(); 3528 3529 let mut vfio_user_pci_device = VfioUserPciDevice::new( 3530 vfio_user_name.clone(), 3531 &self.address_manager.vm, 3532 client.clone(), 3533 self.msi_interrupt_manager.clone(), 3534 legacy_interrupt_group, 3535 pci_device_bdf, 3536 Arc::new(move || memory_manager.lock().unwrap().allocate_memory_slot()), 3537 vm_migration::snapshot_from_id(self.snapshot.as_ref(), vfio_user_name.as_str()), 3538 ) 3539 .map_err(DeviceManagerError::VfioUserCreate)?; 3540 3541 let memory = self.memory_manager.lock().unwrap().guest_memory(); 3542 let vfio_user_mapping = Arc::new(VfioUserDmaMapping::new(client, Arc::new(memory))); 3543 for virtio_mem_device in self.virtio_mem_devices.iter() { 3544 virtio_mem_device 3545 .lock() 3546 .unwrap() 3547 .add_dma_mapping_handler( 3548 VirtioMemMappingSource::Device(pci_device_bdf.into()), 3549 vfio_user_mapping.clone(), 3550 ) 3551 .map_err(DeviceManagerError::AddDmaMappingHandlerVirtioMem)?; 3552 } 3553 3554 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() { 3555 for region in zone.regions() { 3556 vfio_user_pci_device 3557 .dma_map(region) 3558 .map_err(DeviceManagerError::VfioUserDmaMap)?; 3559 } 3560 } 3561 3562 let vfio_user_pci_device = Arc::new(Mutex::new(vfio_user_pci_device)); 3563 3564 let new_resources = self.add_pci_device( 3565 vfio_user_pci_device.clone(), 3566 vfio_user_pci_device.clone(), 3567 pci_segment_id, 3568 pci_device_bdf, 3569 resources, 3570 )?; 3571 3572 // Note it is required to call 'add_pci_device()' in advance to have the list of 3573 // mmio regions provisioned correctly 3574 vfio_user_pci_device 3575 .lock() 3576 .unwrap() 3577 .map_mmio_regions() 3578 .map_err(DeviceManagerError::VfioUserMapRegion)?; 3579 3580 let mut node = device_node!(vfio_user_name, vfio_user_pci_device); 3581 3582 // Update the device tree with correct resource information. 3583 node.resources = new_resources; 3584 node.pci_bdf = Some(pci_device_bdf); 3585 node.pci_device_handle = Some(PciDeviceHandle::VfioUser(vfio_user_pci_device)); 3586 3587 self.device_tree 3588 .lock() 3589 .unwrap() 3590 .insert(vfio_user_name.clone(), node); 3591 3592 Ok((pci_device_bdf, vfio_user_name)) 3593 } 3594 3595 fn add_user_devices(&mut self) -> DeviceManagerResult<Vec<PciBdf>> { 3596 let mut user_devices = self.config.lock().unwrap().user_devices.clone(); 3597 3598 if let Some(device_list_cfg) = &mut user_devices { 3599 for device_cfg in device_list_cfg.iter_mut() { 3600 let (_device_id, _id) = self.add_vfio_user_device(device_cfg)?; 3601 } 3602 } 3603 3604 // Update the list of devices 3605 self.config.lock().unwrap().user_devices = user_devices; 3606 3607 Ok(vec![]) 3608 } 3609 3610 fn add_virtio_pci_device( 3611 &mut self, 3612 virtio_device: Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 3613 iommu_mapping: &Option<Arc<IommuMapping>>, 3614 virtio_device_id: String, 3615 pci_segment_id: u16, 3616 dma_handler: Option<Arc<dyn ExternalDmaMapping>>, 3617 ) -> DeviceManagerResult<PciBdf> { 3618 let id = format!("{VIRTIO_PCI_DEVICE_NAME_PREFIX}-{virtio_device_id}"); 3619 3620 // Add the new virtio-pci node to the device tree. 3621 let mut node = device_node!(id); 3622 node.children = vec![virtio_device_id.clone()]; 3623 3624 let (pci_segment_id, pci_device_bdf, resources) = 3625 self.pci_resources(&id, pci_segment_id)?; 3626 3627 // Update the existing virtio node by setting the parent. 3628 if let Some(node) = self.device_tree.lock().unwrap().get_mut(&virtio_device_id) { 3629 node.parent = Some(id.clone()); 3630 } else { 3631 return Err(DeviceManagerError::MissingNode); 3632 } 3633 3634 // Allows support for one MSI-X vector per queue. It also adds 1 3635 // as we need to take into account the dedicated vector to notify 3636 // about a virtio config change. 3637 let msix_num = (virtio_device.lock().unwrap().queue_max_sizes().len() + 1) as u16; 3638 3639 // Create the AccessPlatform trait from the implementation IommuMapping. 3640 // This will provide address translation for any virtio device sitting 3641 // behind a vIOMMU. 3642 let access_platform: Option<Arc<dyn AccessPlatform>> = if let Some(mapping) = iommu_mapping 3643 { 3644 Some(Arc::new(AccessPlatformMapping::new( 3645 pci_device_bdf.into(), 3646 mapping.clone(), 3647 ))) 3648 } else { 3649 None 3650 }; 3651 3652 let memory = self.memory_manager.lock().unwrap().guest_memory(); 3653 3654 // Map DMA ranges if a DMA handler is available and if the device is 3655 // not attached to a virtual IOMMU. 3656 if let Some(dma_handler) = &dma_handler { 3657 if iommu_mapping.is_some() { 3658 if let Some(iommu) = &self.iommu_device { 3659 iommu 3660 .lock() 3661 .unwrap() 3662 .add_external_mapping(pci_device_bdf.into(), dma_handler.clone()); 3663 } else { 3664 return Err(DeviceManagerError::MissingVirtualIommu); 3665 } 3666 } else { 3667 // Let every virtio-mem device handle the DMA map/unmap through the 3668 // DMA handler provided. 3669 for virtio_mem_device in self.virtio_mem_devices.iter() { 3670 virtio_mem_device 3671 .lock() 3672 .unwrap() 3673 .add_dma_mapping_handler( 3674 VirtioMemMappingSource::Device(pci_device_bdf.into()), 3675 dma_handler.clone(), 3676 ) 3677 .map_err(DeviceManagerError::AddDmaMappingHandlerVirtioMem)?; 3678 } 3679 3680 // Do not register virtio-mem regions, as they are handled directly by 3681 // virtio-mem devices. 3682 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() { 3683 for region in zone.regions() { 3684 let gpa = region.start_addr().0; 3685 let size = region.len(); 3686 dma_handler 3687 .map(gpa, gpa, size) 3688 .map_err(DeviceManagerError::VirtioDmaMap)?; 3689 } 3690 } 3691 } 3692 } 3693 3694 let device_type = virtio_device.lock().unwrap().device_type(); 3695 let virtio_pci_device = Arc::new(Mutex::new( 3696 VirtioPciDevice::new( 3697 id.clone(), 3698 memory, 3699 virtio_device, 3700 msix_num, 3701 access_platform, 3702 &self.msi_interrupt_manager, 3703 pci_device_bdf.into(), 3704 self.activate_evt 3705 .try_clone() 3706 .map_err(DeviceManagerError::EventFd)?, 3707 // All device types *except* virtio block devices should be allocated a 64-bit bar 3708 // The block devices should be given a 32-bit BAR so that they are easily accessible 3709 // to firmware without requiring excessive identity mapping. 3710 // The exception being if not on the default PCI segment. 3711 pci_segment_id > 0 || device_type != VirtioDeviceType::Block as u32, 3712 dma_handler, 3713 self.pending_activations.clone(), 3714 vm_migration::snapshot_from_id(self.snapshot.as_ref(), id.as_str()), 3715 ) 3716 .map_err(DeviceManagerError::VirtioDevice)?, 3717 )); 3718 3719 let new_resources = self.add_pci_device( 3720 virtio_pci_device.clone(), 3721 virtio_pci_device.clone(), 3722 pci_segment_id, 3723 pci_device_bdf, 3724 resources, 3725 )?; 3726 3727 let bar_addr = virtio_pci_device.lock().unwrap().config_bar_addr(); 3728 for (event, addr) in virtio_pci_device.lock().unwrap().ioeventfds(bar_addr) { 3729 let io_addr = IoEventAddress::Mmio(addr); 3730 self.address_manager 3731 .vm 3732 .register_ioevent(event, &io_addr, None) 3733 .map_err(|e| DeviceManagerError::RegisterIoevent(e.into()))?; 3734 } 3735 3736 // Update the device tree with correct resource information. 3737 node.resources = new_resources; 3738 node.migratable = Some(Arc::clone(&virtio_pci_device) as Arc<Mutex<dyn Migratable>>); 3739 node.pci_bdf = Some(pci_device_bdf); 3740 node.pci_device_handle = Some(PciDeviceHandle::Virtio(virtio_pci_device)); 3741 self.device_tree.lock().unwrap().insert(id, node); 3742 3743 Ok(pci_device_bdf) 3744 } 3745 3746 fn add_pvpanic_device( 3747 &mut self, 3748 ) -> DeviceManagerResult<Option<Arc<Mutex<devices::PvPanicDevice>>>> { 3749 let id = String::from(PVPANIC_DEVICE_NAME); 3750 let pci_segment_id = 0x0_u16; 3751 3752 info!("Creating pvpanic device {}", id); 3753 3754 let (pci_segment_id, pci_device_bdf, resources) = 3755 self.pci_resources(&id, pci_segment_id)?; 3756 3757 let snapshot = snapshot_from_id(self.snapshot.as_ref(), id.as_str()); 3758 3759 let pvpanic_device = devices::PvPanicDevice::new(id.clone(), snapshot) 3760 .map_err(DeviceManagerError::PvPanicCreate)?; 3761 3762 let pvpanic_device = Arc::new(Mutex::new(pvpanic_device)); 3763 3764 let new_resources = self.add_pci_device( 3765 pvpanic_device.clone(), 3766 pvpanic_device.clone(), 3767 pci_segment_id, 3768 pci_device_bdf, 3769 resources, 3770 )?; 3771 3772 let mut node = device_node!(id, pvpanic_device); 3773 3774 node.resources = new_resources; 3775 node.pci_bdf = Some(pci_device_bdf); 3776 node.pci_device_handle = None; 3777 3778 self.device_tree.lock().unwrap().insert(id, node); 3779 3780 Ok(Some(pvpanic_device)) 3781 } 3782 3783 fn pci_resources( 3784 &self, 3785 id: &str, 3786 pci_segment_id: u16, 3787 ) -> DeviceManagerResult<(u16, PciBdf, Option<Vec<Resource>>)> { 3788 // Look for the id in the device tree. If it can be found, that means 3789 // the device is being restored, otherwise it's created from scratch. 3790 Ok( 3791 if let Some(node) = self.device_tree.lock().unwrap().get(id) { 3792 info!("Restoring virtio-pci {} resources", id); 3793 let pci_device_bdf: PciBdf = node 3794 .pci_bdf 3795 .ok_or(DeviceManagerError::MissingDeviceNodePciBdf)?; 3796 let pci_segment_id = pci_device_bdf.segment(); 3797 3798 self.pci_segments[pci_segment_id as usize] 3799 .pci_bus 3800 .lock() 3801 .unwrap() 3802 .get_device_id(pci_device_bdf.device() as usize) 3803 .map_err(DeviceManagerError::GetPciDeviceId)?; 3804 3805 (pci_segment_id, pci_device_bdf, Some(node.resources.clone())) 3806 } else { 3807 let pci_device_bdf = 3808 self.pci_segments[pci_segment_id as usize].next_device_bdf()?; 3809 3810 (pci_segment_id, pci_device_bdf, None) 3811 }, 3812 ) 3813 } 3814 3815 #[cfg(target_arch = "x86_64")] 3816 pub fn io_bus(&self) -> &Arc<Bus> { 3817 &self.address_manager.io_bus 3818 } 3819 3820 pub fn mmio_bus(&self) -> &Arc<Bus> { 3821 &self.address_manager.mmio_bus 3822 } 3823 3824 pub fn allocator(&self) -> &Arc<Mutex<SystemAllocator>> { 3825 &self.address_manager.allocator 3826 } 3827 3828 pub fn interrupt_controller(&self) -> Option<Arc<Mutex<dyn InterruptController>>> { 3829 self.interrupt_controller 3830 .as_ref() 3831 .map(|ic| ic.clone() as Arc<Mutex<dyn InterruptController>>) 3832 } 3833 3834 pub(crate) fn pci_segments(&self) -> &Vec<PciSegment> { 3835 &self.pci_segments 3836 } 3837 3838 pub fn console(&self) -> &Arc<Console> { 3839 &self.console 3840 } 3841 3842 #[cfg(target_arch = "aarch64")] 3843 pub fn cmdline_additions(&self) -> &[String] { 3844 self.cmdline_additions.as_slice() 3845 } 3846 3847 pub fn update_memory(&self, new_region: &Arc<GuestRegionMmap>) -> DeviceManagerResult<()> { 3848 for handle in self.virtio_devices.iter() { 3849 handle 3850 .virtio_device 3851 .lock() 3852 .unwrap() 3853 .add_memory_region(new_region) 3854 .map_err(DeviceManagerError::UpdateMemoryForVirtioDevice)?; 3855 3856 if let Some(dma_handler) = &handle.dma_handler { 3857 if !handle.iommu { 3858 let gpa = new_region.start_addr().0; 3859 let size = new_region.len(); 3860 dma_handler 3861 .map(gpa, gpa, size) 3862 .map_err(DeviceManagerError::VirtioDmaMap)?; 3863 } 3864 } 3865 } 3866 3867 // Take care of updating the memory for VFIO PCI devices. 3868 if let Some(vfio_container) = &self.vfio_container { 3869 vfio_container 3870 .vfio_dma_map( 3871 new_region.start_addr().raw_value(), 3872 new_region.len(), 3873 new_region.as_ptr() as u64, 3874 ) 3875 .map_err(DeviceManagerError::UpdateMemoryForVfioPciDevice)?; 3876 } 3877 3878 // Take care of updating the memory for vfio-user devices. 3879 { 3880 let device_tree = self.device_tree.lock().unwrap(); 3881 for pci_device_node in device_tree.pci_devices() { 3882 if let PciDeviceHandle::VfioUser(vfio_user_pci_device) = pci_device_node 3883 .pci_device_handle 3884 .as_ref() 3885 .ok_or(DeviceManagerError::MissingPciDevice)? 3886 { 3887 vfio_user_pci_device 3888 .lock() 3889 .unwrap() 3890 .dma_map(new_region) 3891 .map_err(DeviceManagerError::UpdateMemoryForVfioUserPciDevice)?; 3892 } 3893 } 3894 } 3895 3896 Ok(()) 3897 } 3898 3899 pub fn activate_virtio_devices(&self) -> DeviceManagerResult<()> { 3900 for mut activator in self.pending_activations.lock().unwrap().drain(..) { 3901 activator 3902 .activate() 3903 .map_err(DeviceManagerError::VirtioActivate)?; 3904 } 3905 Ok(()) 3906 } 3907 3908 pub fn notify_hotplug( 3909 &self, 3910 _notification_type: AcpiNotificationFlags, 3911 ) -> DeviceManagerResult<()> { 3912 return self 3913 .ged_notification_device 3914 .as_ref() 3915 .unwrap() 3916 .lock() 3917 .unwrap() 3918 .notify(_notification_type) 3919 .map_err(DeviceManagerError::HotPlugNotification); 3920 } 3921 3922 pub fn add_device( 3923 &mut self, 3924 device_cfg: &mut DeviceConfig, 3925 ) -> DeviceManagerResult<PciDeviceInfo> { 3926 self.validate_identifier(&device_cfg.id)?; 3927 3928 if device_cfg.iommu && !self.is_iommu_segment(device_cfg.pci_segment) { 3929 return Err(DeviceManagerError::InvalidIommuHotplug); 3930 } 3931 3932 let (bdf, device_name) = self.add_passthrough_device(device_cfg)?; 3933 3934 // Update the PCIU bitmap 3935 self.pci_segments[device_cfg.pci_segment as usize].pci_devices_up |= 1 << bdf.device(); 3936 3937 Ok(PciDeviceInfo { 3938 id: device_name, 3939 bdf, 3940 }) 3941 } 3942 3943 pub fn add_user_device( 3944 &mut self, 3945 device_cfg: &mut UserDeviceConfig, 3946 ) -> DeviceManagerResult<PciDeviceInfo> { 3947 self.validate_identifier(&device_cfg.id)?; 3948 3949 let (bdf, device_name) = self.add_vfio_user_device(device_cfg)?; 3950 3951 // Update the PCIU bitmap 3952 self.pci_segments[device_cfg.pci_segment as usize].pci_devices_up |= 1 << bdf.device(); 3953 3954 Ok(PciDeviceInfo { 3955 id: device_name, 3956 bdf, 3957 }) 3958 } 3959 3960 pub fn remove_device(&mut self, id: String) -> DeviceManagerResult<()> { 3961 // The node can be directly a PCI node in case the 'id' refers to a 3962 // VFIO device or a virtio-pci one. 3963 // In case the 'id' refers to a virtio device, we must find the PCI 3964 // node by looking at the parent. 3965 let device_tree = self.device_tree.lock().unwrap(); 3966 let node = device_tree 3967 .get(&id) 3968 .ok_or(DeviceManagerError::UnknownDeviceId(id))?; 3969 3970 let pci_device_node = if node.pci_bdf.is_some() && node.pci_device_handle.is_some() { 3971 node 3972 } else { 3973 let parent = node 3974 .parent 3975 .as_ref() 3976 .ok_or(DeviceManagerError::MissingNode)?; 3977 device_tree 3978 .get(parent) 3979 .ok_or(DeviceManagerError::MissingNode)? 3980 }; 3981 3982 let pci_device_bdf: PciBdf = pci_device_node 3983 .pci_bdf 3984 .ok_or(DeviceManagerError::MissingDeviceNodePciBdf)?; 3985 let pci_segment_id = pci_device_bdf.segment(); 3986 3987 let pci_device_handle = pci_device_node 3988 .pci_device_handle 3989 .as_ref() 3990 .ok_or(DeviceManagerError::MissingPciDevice)?; 3991 #[allow(irrefutable_let_patterns)] 3992 if let PciDeviceHandle::Virtio(virtio_pci_device) = pci_device_handle { 3993 let device_type = VirtioDeviceType::from( 3994 virtio_pci_device 3995 .lock() 3996 .unwrap() 3997 .virtio_device() 3998 .lock() 3999 .unwrap() 4000 .device_type(), 4001 ); 4002 match device_type { 4003 VirtioDeviceType::Net 4004 | VirtioDeviceType::Block 4005 | VirtioDeviceType::Pmem 4006 | VirtioDeviceType::Fs 4007 | VirtioDeviceType::Vsock => {} 4008 _ => return Err(DeviceManagerError::RemovalNotAllowed(device_type)), 4009 } 4010 } 4011 4012 // Update the PCID bitmap 4013 self.pci_segments[pci_segment_id as usize].pci_devices_down |= 1 << pci_device_bdf.device(); 4014 4015 Ok(()) 4016 } 4017 4018 pub fn eject_device(&mut self, pci_segment_id: u16, device_id: u8) -> DeviceManagerResult<()> { 4019 info!( 4020 "Ejecting device_id = {} on segment_id={}", 4021 device_id, pci_segment_id 4022 ); 4023 4024 // Convert the device ID into the corresponding b/d/f. 4025 let pci_device_bdf = PciBdf::new(pci_segment_id, 0, device_id, 0); 4026 4027 // Give the PCI device ID back to the PCI bus. 4028 self.pci_segments[pci_segment_id as usize] 4029 .pci_bus 4030 .lock() 4031 .unwrap() 4032 .put_device_id(device_id as usize) 4033 .map_err(DeviceManagerError::PutPciDeviceId)?; 4034 4035 // Remove the device from the device tree along with its children. 4036 let mut device_tree = self.device_tree.lock().unwrap(); 4037 let pci_device_node = device_tree 4038 .remove_node_by_pci_bdf(pci_device_bdf) 4039 .ok_or(DeviceManagerError::MissingPciDevice)?; 4040 4041 // For VFIO and vfio-user the PCI device id is the id. 4042 // For virtio we overwrite it later as we want the id of the 4043 // underlying device. 4044 let mut id = pci_device_node.id; 4045 let pci_device_handle = pci_device_node 4046 .pci_device_handle 4047 .ok_or(DeviceManagerError::MissingPciDevice)?; 4048 if matches!(pci_device_handle, PciDeviceHandle::Virtio(_)) { 4049 // The virtio-pci device has a single child 4050 if !pci_device_node.children.is_empty() { 4051 assert_eq!(pci_device_node.children.len(), 1); 4052 let child_id = &pci_device_node.children[0]; 4053 id.clone_from(child_id); 4054 } 4055 } 4056 for child in pci_device_node.children.iter() { 4057 device_tree.remove(child); 4058 } 4059 4060 let mut iommu_attached = false; 4061 if let Some((_, iommu_attached_devices)) = &self.iommu_attached_devices { 4062 if iommu_attached_devices.contains(&pci_device_bdf) { 4063 iommu_attached = true; 4064 } 4065 } 4066 4067 let (pci_device, bus_device, virtio_device, remove_dma_handler) = match pci_device_handle { 4068 // No need to remove any virtio-mem mapping here as the container outlives all devices 4069 PciDeviceHandle::Vfio(vfio_pci_device) => { 4070 for mmio_region in vfio_pci_device.lock().unwrap().mmio_regions() { 4071 self.mmio_regions 4072 .lock() 4073 .unwrap() 4074 .retain(|x| x.start != mmio_region.start) 4075 } 4076 4077 ( 4078 Arc::clone(&vfio_pci_device) as Arc<Mutex<dyn PciDevice>>, 4079 Arc::clone(&vfio_pci_device) as Arc<Mutex<dyn BusDevice>>, 4080 None as Option<Arc<Mutex<dyn virtio_devices::VirtioDevice>>>, 4081 false, 4082 ) 4083 } 4084 PciDeviceHandle::Virtio(virtio_pci_device) => { 4085 let dev = virtio_pci_device.lock().unwrap(); 4086 let bar_addr = dev.config_bar_addr(); 4087 for (event, addr) in dev.ioeventfds(bar_addr) { 4088 let io_addr = IoEventAddress::Mmio(addr); 4089 self.address_manager 4090 .vm 4091 .unregister_ioevent(event, &io_addr) 4092 .map_err(|e| DeviceManagerError::UnRegisterIoevent(e.into()))?; 4093 } 4094 4095 if let Some(dma_handler) = dev.dma_handler() { 4096 if !iommu_attached { 4097 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() { 4098 for region in zone.regions() { 4099 let iova = region.start_addr().0; 4100 let size = region.len(); 4101 dma_handler 4102 .unmap(iova, size) 4103 .map_err(DeviceManagerError::VirtioDmaUnmap)?; 4104 } 4105 } 4106 } 4107 } 4108 4109 ( 4110 Arc::clone(&virtio_pci_device) as Arc<Mutex<dyn PciDevice>>, 4111 Arc::clone(&virtio_pci_device) as Arc<Mutex<dyn BusDevice>>, 4112 Some(dev.virtio_device()), 4113 dev.dma_handler().is_some() && !iommu_attached, 4114 ) 4115 } 4116 PciDeviceHandle::VfioUser(vfio_user_pci_device) => { 4117 let mut dev = vfio_user_pci_device.lock().unwrap(); 4118 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() { 4119 for region in zone.regions() { 4120 dev.dma_unmap(region) 4121 .map_err(DeviceManagerError::VfioUserDmaUnmap)?; 4122 } 4123 } 4124 4125 ( 4126 Arc::clone(&vfio_user_pci_device) as Arc<Mutex<dyn PciDevice>>, 4127 Arc::clone(&vfio_user_pci_device) as Arc<Mutex<dyn BusDevice>>, 4128 None as Option<Arc<Mutex<dyn virtio_devices::VirtioDevice>>>, 4129 true, 4130 ) 4131 } 4132 }; 4133 4134 if remove_dma_handler { 4135 for virtio_mem_device in self.virtio_mem_devices.iter() { 4136 virtio_mem_device 4137 .lock() 4138 .unwrap() 4139 .remove_dma_mapping_handler(VirtioMemMappingSource::Device( 4140 pci_device_bdf.into(), 4141 )) 4142 .map_err(DeviceManagerError::RemoveDmaMappingHandlerVirtioMem)?; 4143 } 4144 } 4145 4146 // Free the allocated BARs 4147 pci_device 4148 .lock() 4149 .unwrap() 4150 .free_bars( 4151 &mut self.address_manager.allocator.lock().unwrap(), 4152 &mut self.pci_segments[pci_segment_id as usize] 4153 .mem32_allocator 4154 .lock() 4155 .unwrap(), 4156 &mut self.pci_segments[pci_segment_id as usize] 4157 .mem64_allocator 4158 .lock() 4159 .unwrap(), 4160 ) 4161 .map_err(DeviceManagerError::FreePciBars)?; 4162 4163 // Remove the device from the PCI bus 4164 self.pci_segments[pci_segment_id as usize] 4165 .pci_bus 4166 .lock() 4167 .unwrap() 4168 .remove_by_device(&pci_device) 4169 .map_err(DeviceManagerError::RemoveDeviceFromPciBus)?; 4170 4171 #[cfg(target_arch = "x86_64")] 4172 // Remove the device from the IO bus 4173 self.io_bus() 4174 .remove_by_device(&bus_device) 4175 .map_err(DeviceManagerError::RemoveDeviceFromIoBus)?; 4176 4177 // Remove the device from the MMIO bus 4178 self.mmio_bus() 4179 .remove_by_device(&bus_device) 4180 .map_err(DeviceManagerError::RemoveDeviceFromMmioBus)?; 4181 4182 // Remove the device from the list of BusDevice held by the 4183 // DeviceManager. 4184 self.bus_devices 4185 .retain(|dev| !Arc::ptr_eq(dev, &bus_device)); 4186 4187 // Shutdown and remove the underlying virtio-device if present 4188 if let Some(virtio_device) = virtio_device { 4189 for mapping in virtio_device.lock().unwrap().userspace_mappings() { 4190 self.memory_manager 4191 .lock() 4192 .unwrap() 4193 .remove_userspace_mapping( 4194 mapping.addr.raw_value(), 4195 mapping.len, 4196 mapping.host_addr, 4197 mapping.mergeable, 4198 mapping.mem_slot, 4199 ) 4200 .map_err(DeviceManagerError::MemoryManager)?; 4201 } 4202 4203 virtio_device.lock().unwrap().shutdown(); 4204 4205 self.virtio_devices 4206 .retain(|handler| !Arc::ptr_eq(&handler.virtio_device, &virtio_device)); 4207 } 4208 4209 event!( 4210 "vm", 4211 "device-removed", 4212 "id", 4213 &id, 4214 "bdf", 4215 pci_device_bdf.to_string() 4216 ); 4217 4218 // At this point, the device has been removed from all the list and 4219 // buses where it was stored. At the end of this function, after 4220 // any_device, bus_device and pci_device are released, the actual 4221 // device will be dropped. 4222 Ok(()) 4223 } 4224 4225 fn hotplug_virtio_pci_device( 4226 &mut self, 4227 handle: MetaVirtioDevice, 4228 ) -> DeviceManagerResult<PciDeviceInfo> { 4229 // Add the virtio device to the device manager list. This is important 4230 // as the list is used to notify virtio devices about memory updates 4231 // for instance. 4232 self.virtio_devices.push(handle.clone()); 4233 4234 let mapping: Option<Arc<IommuMapping>> = if handle.iommu { 4235 self.iommu_mapping.clone() 4236 } else { 4237 None 4238 }; 4239 4240 let bdf = self.add_virtio_pci_device( 4241 handle.virtio_device, 4242 &mapping, 4243 handle.id.clone(), 4244 handle.pci_segment, 4245 handle.dma_handler, 4246 )?; 4247 4248 // Update the PCIU bitmap 4249 self.pci_segments[handle.pci_segment as usize].pci_devices_up |= 1 << bdf.device(); 4250 4251 Ok(PciDeviceInfo { id: handle.id, bdf }) 4252 } 4253 4254 fn is_iommu_segment(&self, pci_segment_id: u16) -> bool { 4255 self.config 4256 .lock() 4257 .as_ref() 4258 .unwrap() 4259 .platform 4260 .as_ref() 4261 .map(|pc| { 4262 pc.iommu_segments 4263 .as_ref() 4264 .map(|v| v.contains(&pci_segment_id)) 4265 .unwrap_or_default() 4266 }) 4267 .unwrap_or_default() 4268 } 4269 4270 pub fn add_disk(&mut self, disk_cfg: &mut DiskConfig) -> DeviceManagerResult<PciDeviceInfo> { 4271 self.validate_identifier(&disk_cfg.id)?; 4272 4273 if disk_cfg.iommu && !self.is_iommu_segment(disk_cfg.pci_segment) { 4274 return Err(DeviceManagerError::InvalidIommuHotplug); 4275 } 4276 4277 let device = self.make_virtio_block_device(disk_cfg)?; 4278 self.hotplug_virtio_pci_device(device) 4279 } 4280 4281 pub fn add_fs(&mut self, fs_cfg: &mut FsConfig) -> DeviceManagerResult<PciDeviceInfo> { 4282 self.validate_identifier(&fs_cfg.id)?; 4283 4284 let device = self.make_virtio_fs_device(fs_cfg)?; 4285 self.hotplug_virtio_pci_device(device) 4286 } 4287 4288 pub fn add_pmem(&mut self, pmem_cfg: &mut PmemConfig) -> DeviceManagerResult<PciDeviceInfo> { 4289 self.validate_identifier(&pmem_cfg.id)?; 4290 4291 if pmem_cfg.iommu && !self.is_iommu_segment(pmem_cfg.pci_segment) { 4292 return Err(DeviceManagerError::InvalidIommuHotplug); 4293 } 4294 4295 let device = self.make_virtio_pmem_device(pmem_cfg)?; 4296 self.hotplug_virtio_pci_device(device) 4297 } 4298 4299 pub fn add_net(&mut self, net_cfg: &mut NetConfig) -> DeviceManagerResult<PciDeviceInfo> { 4300 self.validate_identifier(&net_cfg.id)?; 4301 4302 if net_cfg.iommu && !self.is_iommu_segment(net_cfg.pci_segment) { 4303 return Err(DeviceManagerError::InvalidIommuHotplug); 4304 } 4305 4306 let device = self.make_virtio_net_device(net_cfg)?; 4307 self.hotplug_virtio_pci_device(device) 4308 } 4309 4310 pub fn add_vdpa(&mut self, vdpa_cfg: &mut VdpaConfig) -> DeviceManagerResult<PciDeviceInfo> { 4311 self.validate_identifier(&vdpa_cfg.id)?; 4312 4313 if vdpa_cfg.iommu && !self.is_iommu_segment(vdpa_cfg.pci_segment) { 4314 return Err(DeviceManagerError::InvalidIommuHotplug); 4315 } 4316 4317 let device = self.make_vdpa_device(vdpa_cfg)?; 4318 self.hotplug_virtio_pci_device(device) 4319 } 4320 4321 pub fn add_vsock(&mut self, vsock_cfg: &mut VsockConfig) -> DeviceManagerResult<PciDeviceInfo> { 4322 self.validate_identifier(&vsock_cfg.id)?; 4323 4324 if vsock_cfg.iommu && !self.is_iommu_segment(vsock_cfg.pci_segment) { 4325 return Err(DeviceManagerError::InvalidIommuHotplug); 4326 } 4327 4328 let device = self.make_virtio_vsock_device(vsock_cfg)?; 4329 self.hotplug_virtio_pci_device(device) 4330 } 4331 4332 pub fn counters(&self) -> HashMap<String, HashMap<&'static str, Wrapping<u64>>> { 4333 let mut counters = HashMap::new(); 4334 4335 for handle in &self.virtio_devices { 4336 let virtio_device = handle.virtio_device.lock().unwrap(); 4337 if let Some(device_counters) = virtio_device.counters() { 4338 counters.insert(handle.id.clone(), device_counters.clone()); 4339 } 4340 } 4341 4342 counters 4343 } 4344 4345 pub fn resize_balloon(&mut self, size: u64) -> DeviceManagerResult<()> { 4346 if let Some(balloon) = &self.balloon { 4347 return balloon 4348 .lock() 4349 .unwrap() 4350 .resize(size) 4351 .map_err(DeviceManagerError::VirtioBalloonResize); 4352 } 4353 4354 warn!("No balloon setup: Can't resize the balloon"); 4355 Err(DeviceManagerError::MissingVirtioBalloon) 4356 } 4357 4358 pub fn balloon_size(&self) -> u64 { 4359 if let Some(balloon) = &self.balloon { 4360 return balloon.lock().unwrap().get_actual(); 4361 } 4362 4363 0 4364 } 4365 4366 pub fn device_tree(&self) -> Arc<Mutex<DeviceTree>> { 4367 self.device_tree.clone() 4368 } 4369 4370 #[cfg(target_arch = "x86_64")] 4371 pub fn notify_power_button(&self) -> DeviceManagerResult<()> { 4372 self.ged_notification_device 4373 .as_ref() 4374 .unwrap() 4375 .lock() 4376 .unwrap() 4377 .notify(AcpiNotificationFlags::POWER_BUTTON_CHANGED) 4378 .map_err(DeviceManagerError::PowerButtonNotification) 4379 } 4380 4381 #[cfg(target_arch = "aarch64")] 4382 pub fn notify_power_button(&self) -> DeviceManagerResult<()> { 4383 // There are two use cases: 4384 // 1. Users will use direct kernel boot with device tree. 4385 // 2. Users will use ACPI+UEFI boot. 4386 4387 // Trigger a GPIO pin 3 event to satisfy use case 1. 4388 self.gpio_device 4389 .as_ref() 4390 .unwrap() 4391 .lock() 4392 .unwrap() 4393 .trigger_key(3) 4394 .map_err(DeviceManagerError::AArch64PowerButtonNotification)?; 4395 // Trigger a GED power button event to satisfy use case 2. 4396 return self 4397 .ged_notification_device 4398 .as_ref() 4399 .unwrap() 4400 .lock() 4401 .unwrap() 4402 .notify(AcpiNotificationFlags::POWER_BUTTON_CHANGED) 4403 .map_err(DeviceManagerError::PowerButtonNotification); 4404 } 4405 4406 pub fn iommu_attached_devices(&self) -> &Option<(PciBdf, Vec<PciBdf>)> { 4407 &self.iommu_attached_devices 4408 } 4409 4410 fn validate_identifier(&self, id: &Option<String>) -> DeviceManagerResult<()> { 4411 if let Some(id) = id { 4412 if id.starts_with("__") { 4413 return Err(DeviceManagerError::InvalidIdentifier(id.clone())); 4414 } 4415 4416 if self.device_tree.lock().unwrap().contains_key(id) { 4417 return Err(DeviceManagerError::IdentifierNotUnique(id.clone())); 4418 } 4419 } 4420 4421 Ok(()) 4422 } 4423 4424 pub(crate) fn acpi_platform_addresses(&self) -> &AcpiPlatformAddresses { 4425 &self.acpi_platform_addresses 4426 } 4427 } 4428 4429 fn numa_node_id_from_memory_zone_id(numa_nodes: &NumaNodes, memory_zone_id: &str) -> Option<u32> { 4430 for (numa_node_id, numa_node) in numa_nodes.iter() { 4431 if numa_node.memory_zones.contains(&memory_zone_id.to_owned()) { 4432 return Some(*numa_node_id); 4433 } 4434 } 4435 4436 None 4437 } 4438 4439 fn numa_node_id_from_pci_segment_id(numa_nodes: &NumaNodes, pci_segment_id: u16) -> u32 { 4440 for (numa_node_id, numa_node) in numa_nodes.iter() { 4441 if numa_node.pci_segments.contains(&pci_segment_id) { 4442 return *numa_node_id; 4443 } 4444 } 4445 4446 0 4447 } 4448 4449 struct TpmDevice {} 4450 4451 impl Aml for TpmDevice { 4452 fn to_aml_bytes(&self, sink: &mut dyn acpi_tables::AmlSink) { 4453 aml::Device::new( 4454 "TPM2".into(), 4455 vec![ 4456 &aml::Name::new("_HID".into(), &"MSFT0101"), 4457 &aml::Name::new("_STA".into(), &(0xF_usize)), 4458 &aml::Name::new( 4459 "_CRS".into(), 4460 &aml::ResourceTemplate::new(vec![&aml::Memory32Fixed::new( 4461 true, 4462 layout::TPM_START.0 as u32, 4463 layout::TPM_SIZE as u32, 4464 )]), 4465 ), 4466 ], 4467 ) 4468 .to_aml_bytes(sink) 4469 } 4470 } 4471 4472 impl Aml for DeviceManager { 4473 fn to_aml_bytes(&self, sink: &mut dyn acpi_tables::AmlSink) { 4474 #[cfg(target_arch = "aarch64")] 4475 use arch::aarch64::DeviceInfoForFdt; 4476 4477 let mut pci_scan_methods = Vec::new(); 4478 for i in 0..self.pci_segments.len() { 4479 pci_scan_methods.push(aml::MethodCall::new( 4480 format!("\\_SB_.PC{i:02X}.PCNT").as_str().into(), 4481 vec![], 4482 )); 4483 } 4484 let mut pci_scan_inner: Vec<&dyn Aml> = Vec::new(); 4485 for method in &pci_scan_methods { 4486 pci_scan_inner.push(method) 4487 } 4488 4489 // PCI hotplug controller 4490 aml::Device::new( 4491 "_SB_.PHPR".into(), 4492 vec![ 4493 &aml::Name::new("_HID".into(), &aml::EISAName::new("PNP0A06")), 4494 &aml::Name::new("_STA".into(), &0x0bu8), 4495 &aml::Name::new("_UID".into(), &"PCI Hotplug Controller"), 4496 &aml::Mutex::new("BLCK".into(), 0), 4497 &aml::Name::new( 4498 "_CRS".into(), 4499 &aml::ResourceTemplate::new(vec![&aml::AddressSpace::new_memory( 4500 aml::AddressSpaceCacheable::NotCacheable, 4501 true, 4502 self.acpi_address.0, 4503 self.acpi_address.0 + DEVICE_MANAGER_ACPI_SIZE as u64 - 1, 4504 None, 4505 )]), 4506 ), 4507 // OpRegion and Fields map MMIO range into individual field values 4508 &aml::OpRegion::new( 4509 "PCST".into(), 4510 aml::OpRegionSpace::SystemMemory, 4511 &(self.acpi_address.0 as usize), 4512 &DEVICE_MANAGER_ACPI_SIZE, 4513 ), 4514 &aml::Field::new( 4515 "PCST".into(), 4516 aml::FieldAccessType::DWord, 4517 aml::FieldLockRule::NoLock, 4518 aml::FieldUpdateRule::WriteAsZeroes, 4519 vec![ 4520 aml::FieldEntry::Named(*b"PCIU", 32), 4521 aml::FieldEntry::Named(*b"PCID", 32), 4522 aml::FieldEntry::Named(*b"B0EJ", 32), 4523 aml::FieldEntry::Named(*b"PSEG", 32), 4524 ], 4525 ), 4526 &aml::Method::new( 4527 "PCEJ".into(), 4528 2, 4529 true, 4530 vec![ 4531 // Take lock defined above 4532 &aml::Acquire::new("BLCK".into(), 0xffff), 4533 // Choose the current segment 4534 &aml::Store::new(&aml::Path::new("PSEG"), &aml::Arg(1)), 4535 // Write PCI bus number (in first argument) to I/O port via field 4536 &aml::ShiftLeft::new(&aml::Path::new("B0EJ"), &aml::ONE, &aml::Arg(0)), 4537 // Release lock 4538 &aml::Release::new("BLCK".into()), 4539 // Return 0 4540 &aml::Return::new(&aml::ZERO), 4541 ], 4542 ), 4543 &aml::Method::new("PSCN".into(), 0, true, pci_scan_inner), 4544 ], 4545 ) 4546 .to_aml_bytes(sink); 4547 4548 for segment in &self.pci_segments { 4549 segment.to_aml_bytes(sink); 4550 } 4551 4552 let mut mbrd_memory = Vec::new(); 4553 4554 for segment in &self.pci_segments { 4555 mbrd_memory.push(aml::Memory32Fixed::new( 4556 true, 4557 segment.mmio_config_address as u32, 4558 layout::PCI_MMIO_CONFIG_SIZE_PER_SEGMENT as u32, 4559 )) 4560 } 4561 4562 let mut mbrd_memory_refs = Vec::new(); 4563 for mbrd_memory_ref in &mbrd_memory { 4564 mbrd_memory_refs.push(mbrd_memory_ref as &dyn Aml); 4565 } 4566 4567 aml::Device::new( 4568 "_SB_.MBRD".into(), 4569 vec![ 4570 &aml::Name::new("_HID".into(), &aml::EISAName::new("PNP0C02")), 4571 &aml::Name::new("_UID".into(), &aml::ZERO), 4572 &aml::Name::new("_CRS".into(), &aml::ResourceTemplate::new(mbrd_memory_refs)), 4573 ], 4574 ) 4575 .to_aml_bytes(sink); 4576 4577 // Serial device 4578 #[cfg(target_arch = "x86_64")] 4579 let serial_irq = 4; 4580 #[cfg(target_arch = "aarch64")] 4581 let serial_irq = 4582 if self.config.lock().unwrap().serial.clone().mode != ConsoleOutputMode::Off { 4583 self.get_device_info() 4584 .clone() 4585 .get(&(DeviceType::Serial, DeviceType::Serial.to_string())) 4586 .unwrap() 4587 .irq() 4588 } else { 4589 // If serial is turned off, add a fake device with invalid irq. 4590 31 4591 }; 4592 if self.config.lock().unwrap().serial.mode != ConsoleOutputMode::Off { 4593 aml::Device::new( 4594 "_SB_.COM1".into(), 4595 vec![ 4596 &aml::Name::new( 4597 "_HID".into(), 4598 #[cfg(target_arch = "x86_64")] 4599 &aml::EISAName::new("PNP0501"), 4600 #[cfg(target_arch = "aarch64")] 4601 &"ARMH0011", 4602 ), 4603 &aml::Name::new("_UID".into(), &aml::ZERO), 4604 &aml::Name::new("_DDN".into(), &"COM1"), 4605 &aml::Name::new( 4606 "_CRS".into(), 4607 &aml::ResourceTemplate::new(vec![ 4608 &aml::Interrupt::new(true, true, false, false, serial_irq), 4609 #[cfg(target_arch = "x86_64")] 4610 &aml::IO::new(0x3f8, 0x3f8, 0, 0x8), 4611 #[cfg(target_arch = "aarch64")] 4612 &aml::Memory32Fixed::new( 4613 true, 4614 arch::layout::LEGACY_SERIAL_MAPPED_IO_START.raw_value() as u32, 4615 MMIO_LEN as u32, 4616 ), 4617 ]), 4618 ), 4619 ], 4620 ) 4621 .to_aml_bytes(sink); 4622 } 4623 4624 aml::Name::new("_S5_".into(), &aml::Package::new(vec![&5u8])).to_aml_bytes(sink); 4625 4626 aml::Device::new( 4627 "_SB_.PWRB".into(), 4628 vec![ 4629 &aml::Name::new("_HID".into(), &aml::EISAName::new("PNP0C0C")), 4630 &aml::Name::new("_UID".into(), &aml::ZERO), 4631 ], 4632 ) 4633 .to_aml_bytes(sink); 4634 4635 if self.config.lock().unwrap().tpm.is_some() { 4636 // Add tpm device 4637 TpmDevice {}.to_aml_bytes(sink); 4638 } 4639 4640 self.ged_notification_device 4641 .as_ref() 4642 .unwrap() 4643 .lock() 4644 .unwrap() 4645 .to_aml_bytes(sink) 4646 } 4647 } 4648 4649 impl Pausable for DeviceManager { 4650 fn pause(&mut self) -> result::Result<(), MigratableError> { 4651 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4652 if let Some(migratable) = &device_node.migratable { 4653 migratable.lock().unwrap().pause()?; 4654 } 4655 } 4656 // On AArch64, the pause of device manager needs to trigger 4657 // a "pause" of GIC, which will flush the GIC pending tables 4658 // and ITS tables to guest RAM. 4659 #[cfg(target_arch = "aarch64")] 4660 { 4661 self.get_interrupt_controller() 4662 .unwrap() 4663 .lock() 4664 .unwrap() 4665 .pause()?; 4666 }; 4667 4668 Ok(()) 4669 } 4670 4671 fn resume(&mut self) -> result::Result<(), MigratableError> { 4672 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4673 if let Some(migratable) = &device_node.migratable { 4674 migratable.lock().unwrap().resume()?; 4675 } 4676 } 4677 4678 Ok(()) 4679 } 4680 } 4681 4682 impl Snapshottable for DeviceManager { 4683 fn id(&self) -> String { 4684 DEVICE_MANAGER_SNAPSHOT_ID.to_string() 4685 } 4686 4687 fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> { 4688 let mut snapshot = Snapshot::from_data(SnapshotData::new_from_state(&self.state())?); 4689 4690 // We aggregate all devices snapshots. 4691 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4692 if let Some(migratable) = &device_node.migratable { 4693 let mut migratable = migratable.lock().unwrap(); 4694 snapshot.add_snapshot(migratable.id(), migratable.snapshot()?); 4695 } 4696 } 4697 4698 Ok(snapshot) 4699 } 4700 } 4701 4702 impl Transportable for DeviceManager {} 4703 4704 impl Migratable for DeviceManager { 4705 fn start_dirty_log(&mut self) -> std::result::Result<(), MigratableError> { 4706 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4707 if let Some(migratable) = &device_node.migratable { 4708 migratable.lock().unwrap().start_dirty_log()?; 4709 } 4710 } 4711 Ok(()) 4712 } 4713 4714 fn stop_dirty_log(&mut self) -> std::result::Result<(), MigratableError> { 4715 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4716 if let Some(migratable) = &device_node.migratable { 4717 migratable.lock().unwrap().stop_dirty_log()?; 4718 } 4719 } 4720 Ok(()) 4721 } 4722 4723 fn dirty_log(&mut self) -> std::result::Result<MemoryRangeTable, MigratableError> { 4724 let mut tables = Vec::new(); 4725 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4726 if let Some(migratable) = &device_node.migratable { 4727 tables.push(migratable.lock().unwrap().dirty_log()?); 4728 } 4729 } 4730 Ok(MemoryRangeTable::new_from_tables(tables)) 4731 } 4732 4733 fn start_migration(&mut self) -> std::result::Result<(), MigratableError> { 4734 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4735 if let Some(migratable) = &device_node.migratable { 4736 migratable.lock().unwrap().start_migration()?; 4737 } 4738 } 4739 Ok(()) 4740 } 4741 4742 fn complete_migration(&mut self) -> std::result::Result<(), MigratableError> { 4743 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4744 if let Some(migratable) = &device_node.migratable { 4745 migratable.lock().unwrap().complete_migration()?; 4746 } 4747 } 4748 Ok(()) 4749 } 4750 } 4751 4752 const PCIU_FIELD_OFFSET: u64 = 0; 4753 const PCID_FIELD_OFFSET: u64 = 4; 4754 const B0EJ_FIELD_OFFSET: u64 = 8; 4755 const PSEG_FIELD_OFFSET: u64 = 12; 4756 const PCIU_FIELD_SIZE: usize = 4; 4757 const PCID_FIELD_SIZE: usize = 4; 4758 const B0EJ_FIELD_SIZE: usize = 4; 4759 const PSEG_FIELD_SIZE: usize = 4; 4760 4761 impl BusDevice for DeviceManager { 4762 fn read(&mut self, base: u64, offset: u64, data: &mut [u8]) { 4763 match offset { 4764 PCIU_FIELD_OFFSET => { 4765 assert!(data.len() == PCIU_FIELD_SIZE); 4766 data.copy_from_slice( 4767 &self.pci_segments[self.selected_segment] 4768 .pci_devices_up 4769 .to_le_bytes(), 4770 ); 4771 // Clear the PCIU bitmap 4772 self.pci_segments[self.selected_segment].pci_devices_up = 0; 4773 } 4774 PCID_FIELD_OFFSET => { 4775 assert!(data.len() == PCID_FIELD_SIZE); 4776 data.copy_from_slice( 4777 &self.pci_segments[self.selected_segment] 4778 .pci_devices_down 4779 .to_le_bytes(), 4780 ); 4781 // Clear the PCID bitmap 4782 self.pci_segments[self.selected_segment].pci_devices_down = 0; 4783 } 4784 B0EJ_FIELD_OFFSET => { 4785 assert!(data.len() == B0EJ_FIELD_SIZE); 4786 // Always return an empty bitmap since the eject is always 4787 // taken care of right away during a write access. 4788 data.fill(0); 4789 } 4790 PSEG_FIELD_OFFSET => { 4791 assert_eq!(data.len(), PSEG_FIELD_SIZE); 4792 data.copy_from_slice(&(self.selected_segment as u32).to_le_bytes()); 4793 } 4794 _ => error!( 4795 "Accessing unknown location at base 0x{:x}, offset 0x{:x}", 4796 base, offset 4797 ), 4798 } 4799 4800 debug!( 4801 "PCI_HP_REG_R: base 0x{:x}, offset 0x{:x}, data {:?}", 4802 base, offset, data 4803 ) 4804 } 4805 4806 fn write(&mut self, base: u64, offset: u64, data: &[u8]) -> Option<Arc<std::sync::Barrier>> { 4807 match offset { 4808 B0EJ_FIELD_OFFSET => { 4809 assert!(data.len() == B0EJ_FIELD_SIZE); 4810 let mut data_array: [u8; 4] = [0, 0, 0, 0]; 4811 data_array.copy_from_slice(data); 4812 let mut slot_bitmap = u32::from_le_bytes(data_array); 4813 4814 while slot_bitmap > 0 { 4815 let slot_id = slot_bitmap.trailing_zeros(); 4816 if let Err(e) = self.eject_device(self.selected_segment as u16, slot_id as u8) { 4817 error!("Failed ejecting device {}: {:?}", slot_id, e); 4818 } 4819 slot_bitmap &= !(1 << slot_id); 4820 } 4821 } 4822 PSEG_FIELD_OFFSET => { 4823 assert_eq!(data.len(), PSEG_FIELD_SIZE); 4824 let mut data_array: [u8; 4] = [0, 0, 0, 0]; 4825 data_array.copy_from_slice(data); 4826 let selected_segment = u32::from_le_bytes(data_array) as usize; 4827 if selected_segment >= self.pci_segments.len() { 4828 error!( 4829 "Segment selection out of range: {} >= {}", 4830 selected_segment, 4831 self.pci_segments.len() 4832 ); 4833 return None; 4834 } 4835 self.selected_segment = selected_segment; 4836 } 4837 _ => error!( 4838 "Accessing unknown location at base 0x{:x}, offset 0x{:x}", 4839 base, offset 4840 ), 4841 } 4842 4843 debug!( 4844 "PCI_HP_REG_W: base 0x{:x}, offset 0x{:x}, data {:?}", 4845 base, offset, data 4846 ); 4847 4848 None 4849 } 4850 } 4851 4852 impl Drop for DeviceManager { 4853 fn drop(&mut self) { 4854 // Wake up the DeviceManager threads (mainly virtio device workers), 4855 // to avoid deadlock on waiting for paused/parked worker threads. 4856 if let Err(e) = self.resume() { 4857 error!("Error resuming DeviceManager: {:?}", e); 4858 } 4859 4860 for handle in self.virtio_devices.drain(..) { 4861 handle.virtio_device.lock().unwrap().shutdown(); 4862 } 4863 4864 if let Some(termios) = *self.original_termios_opt.lock().unwrap() { 4865 // SAFETY: FFI call 4866 let _ = unsafe { tcsetattr(stdout().lock().as_raw_fd(), TCSANOW, &termios) }; 4867 } 4868 } 4869 } 4870 4871 #[cfg(test)] 4872 mod tests { 4873 use super::*; 4874 4875 #[test] 4876 fn test_create_mmio_allocators() { 4877 let res = create_mmio_allocators(0x100000, 0x400000, 1, vec![1], 4 << 10); 4878 assert_eq!(res.len(), 1); 4879 assert_eq!( 4880 res[0].lock().unwrap().base(), 4881 vm_memory::GuestAddress(0x100000) 4882 ); 4883 assert_eq!( 4884 res[0].lock().unwrap().end(), 4885 vm_memory::GuestAddress(0x3fffff) 4886 ); 4887 4888 let res = create_mmio_allocators(0x100000, 0x400000, 2, vec![1, 1], 4 << 10); 4889 assert_eq!(res.len(), 2); 4890 assert_eq!( 4891 res[0].lock().unwrap().base(), 4892 vm_memory::GuestAddress(0x100000) 4893 ); 4894 assert_eq!( 4895 res[0].lock().unwrap().end(), 4896 vm_memory::GuestAddress(0x27ffff) 4897 ); 4898 assert_eq!( 4899 res[1].lock().unwrap().base(), 4900 vm_memory::GuestAddress(0x280000) 4901 ); 4902 assert_eq!( 4903 res[1].lock().unwrap().end(), 4904 vm_memory::GuestAddress(0x3fffff) 4905 ); 4906 4907 let res = create_mmio_allocators(0x100000, 0x400000, 2, vec![2, 1], 4 << 10); 4908 assert_eq!(res.len(), 2); 4909 assert_eq!( 4910 res[0].lock().unwrap().base(), 4911 vm_memory::GuestAddress(0x100000) 4912 ); 4913 assert_eq!( 4914 res[0].lock().unwrap().end(), 4915 vm_memory::GuestAddress(0x2fffff) 4916 ); 4917 assert_eq!( 4918 res[1].lock().unwrap().base(), 4919 vm_memory::GuestAddress(0x300000) 4920 ); 4921 assert_eq!( 4922 res[1].lock().unwrap().end(), 4923 vm_memory::GuestAddress(0x3fffff) 4924 ); 4925 } 4926 } 4927