1 // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 // 3 // Portions Copyright 2017 The Chromium OS Authors. All rights reserved. 4 // Use of this source code is governed by a BSD-style license that can be 5 // found in the LICENSE-BSD-3-Clause file. 6 // 7 // Copyright © 2019 Intel Corporation 8 // 9 // SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause 10 // 11 12 use std::collections::{BTreeMap, BTreeSet, HashMap}; 13 use std::fs::{File, OpenOptions}; 14 use std::io::{self, stdout, IsTerminal, Seek, SeekFrom}; 15 use std::num::Wrapping; 16 use std::os::unix::fs::OpenOptionsExt; 17 use std::os::unix::io::{AsRawFd, FromRawFd}; 18 use std::path::PathBuf; 19 use std::result; 20 use std::sync::{Arc, Mutex}; 21 use std::time::Instant; 22 23 use acpi_tables::sdt::GenericAddress; 24 use acpi_tables::{aml, Aml}; 25 use anyhow::anyhow; 26 #[cfg(target_arch = "x86_64")] 27 use arch::layout::{APIC_START, IOAPIC_SIZE, IOAPIC_START}; 28 use arch::{layout, NumaNodes}; 29 #[cfg(target_arch = "aarch64")] 30 use arch::{DeviceType, MmioDeviceInfo}; 31 use block::async_io::DiskFile; 32 use block::fixed_vhd_sync::FixedVhdDiskSync; 33 use block::qcow_sync::QcowDiskSync; 34 use block::raw_async_aio::RawFileDiskAio; 35 use block::raw_sync::RawFileDiskSync; 36 use block::vhdx_sync::VhdxDiskSync; 37 use block::{ 38 block_aio_is_supported, block_io_uring_is_supported, detect_image_type, qcow, vhdx, ImageType, 39 }; 40 #[cfg(feature = "io_uring")] 41 use block::{fixed_vhd_async::FixedVhdDiskAsync, raw_async::RawFileDisk}; 42 #[cfg(target_arch = "x86_64")] 43 use devices::debug_console::DebugConsole; 44 #[cfg(target_arch = "aarch64")] 45 use devices::gic; 46 use devices::interrupt_controller::InterruptController; 47 #[cfg(target_arch = "x86_64")] 48 use devices::ioapic; 49 #[cfg(target_arch = "aarch64")] 50 use devices::legacy::Pl011; 51 #[cfg(feature = "pvmemcontrol")] 52 use devices::pvmemcontrol::{PvmemcontrolBusDevice, PvmemcontrolPciDevice}; 53 use devices::{interrupt_controller, AcpiNotificationFlags}; 54 use hypervisor::IoEventAddress; 55 use libc::{ 56 tcsetattr, termios, MAP_NORESERVE, MAP_PRIVATE, MAP_SHARED, O_TMPFILE, PROT_READ, PROT_WRITE, 57 TCSANOW, 58 }; 59 use pci::{ 60 DeviceRelocation, MmioRegion, PciBarRegionType, PciBdf, PciDevice, VfioDmaMapping, 61 VfioPciDevice, VfioUserDmaMapping, VfioUserPciDevice, VfioUserPciDeviceError, 62 }; 63 use rate_limiter::group::RateLimiterGroup; 64 use seccompiler::SeccompAction; 65 use serde::{Deserialize, Serialize}; 66 use tracer::trace_scoped; 67 use vfio_ioctls::{VfioContainer, VfioDevice, VfioDeviceFd}; 68 use virtio_devices::transport::{VirtioPciDevice, VirtioPciDeviceActivator, VirtioTransport}; 69 use virtio_devices::vhost_user::VhostUserConfig; 70 use virtio_devices::{ 71 AccessPlatformMapping, ActivateError, Endpoint, IommuMapping, VdpaDmaMapping, 72 VirtioMemMappingSource, 73 }; 74 use vm_allocator::{AddressAllocator, SystemAllocator}; 75 use vm_device::dma_mapping::ExternalDmaMapping; 76 use vm_device::interrupt::{ 77 InterruptIndex, InterruptManager, LegacyIrqGroupConfig, MsiIrqGroupConfig, 78 }; 79 use vm_device::{Bus, BusDevice, BusDeviceSync, Resource}; 80 use vm_memory::guest_memory::FileOffset; 81 use vm_memory::{Address, GuestAddress, GuestMemoryRegion, GuestUsize, MmapRegion}; 82 #[cfg(target_arch = "x86_64")] 83 use vm_memory::{GuestAddressSpace, GuestMemory}; 84 use vm_migration::protocol::MemoryRangeTable; 85 use vm_migration::{ 86 snapshot_from_id, state_from_id, Migratable, MigratableError, Pausable, Snapshot, SnapshotData, 87 Snapshottable, Transportable, 88 }; 89 use vm_virtio::{AccessPlatform, VirtioDeviceType}; 90 use vmm_sys_util::eventfd::EventFd; 91 #[cfg(target_arch = "x86_64")] 92 use {devices::debug_console, devices::legacy::Serial}; 93 94 use crate::console_devices::{ConsoleDeviceError, ConsoleInfo, ConsoleOutput}; 95 use crate::cpu::{CpuManager, CPU_MANAGER_ACPI_SIZE}; 96 use crate::device_tree::{DeviceNode, DeviceTree}; 97 use crate::interrupt::{LegacyUserspaceInterruptManager, MsiInterruptManager}; 98 use crate::memory_manager::{Error as MemoryManagerError, MemoryManager, MEMORY_MANAGER_ACPI_SIZE}; 99 use crate::pci_segment::PciSegment; 100 use crate::serial_manager::{Error as SerialManagerError, SerialManager}; 101 use crate::vm_config::{ 102 ConsoleOutputMode, DeviceConfig, DiskConfig, FsConfig, NetConfig, PmemConfig, UserDeviceConfig, 103 VdpaConfig, VhostMode, VmConfig, VsockConfig, DEFAULT_PCI_SEGMENT_APERTURE_WEIGHT, 104 }; 105 use crate::{device_node, GuestRegionMmap, PciDeviceInfo, DEVICE_MANAGER_SNAPSHOT_ID}; 106 107 #[cfg(target_arch = "aarch64")] 108 const MMIO_LEN: u64 = 0x1000; 109 110 // Singleton devices / devices the user cannot name 111 #[cfg(target_arch = "x86_64")] 112 const IOAPIC_DEVICE_NAME: &str = "__ioapic"; 113 const SERIAL_DEVICE_NAME: &str = "__serial"; 114 #[cfg(target_arch = "x86_64")] 115 const DEBUGCON_DEVICE_NAME: &str = "__debug_console"; 116 #[cfg(target_arch = "aarch64")] 117 const GPIO_DEVICE_NAME: &str = "__gpio"; 118 const RNG_DEVICE_NAME: &str = "__rng"; 119 const IOMMU_DEVICE_NAME: &str = "__iommu"; 120 #[cfg(feature = "pvmemcontrol")] 121 const PVMEMCONTROL_DEVICE_NAME: &str = "__pvmemcontrol"; 122 const BALLOON_DEVICE_NAME: &str = "__balloon"; 123 const CONSOLE_DEVICE_NAME: &str = "__console"; 124 const PVPANIC_DEVICE_NAME: &str = "__pvpanic"; 125 126 // Devices that the user may name and for which we generate 127 // identifiers if the user doesn't give one 128 const DISK_DEVICE_NAME_PREFIX: &str = "_disk"; 129 const FS_DEVICE_NAME_PREFIX: &str = "_fs"; 130 const NET_DEVICE_NAME_PREFIX: &str = "_net"; 131 const PMEM_DEVICE_NAME_PREFIX: &str = "_pmem"; 132 const VDPA_DEVICE_NAME_PREFIX: &str = "_vdpa"; 133 const VSOCK_DEVICE_NAME_PREFIX: &str = "_vsock"; 134 const WATCHDOG_DEVICE_NAME: &str = "__watchdog"; 135 const VFIO_DEVICE_NAME_PREFIX: &str = "_vfio"; 136 const VFIO_USER_DEVICE_NAME_PREFIX: &str = "_vfio_user"; 137 const VIRTIO_PCI_DEVICE_NAME_PREFIX: &str = "_virtio-pci"; 138 139 /// Errors associated with device manager 140 #[derive(Debug)] 141 pub enum DeviceManagerError { 142 /// Cannot create EventFd. 143 EventFd(io::Error), 144 145 /// Cannot open disk path 146 Disk(io::Error), 147 148 /// Cannot create vhost-user-net device 149 CreateVhostUserNet(virtio_devices::vhost_user::Error), 150 151 /// Cannot create virtio-blk device 152 CreateVirtioBlock(io::Error), 153 154 /// Cannot create virtio-net device 155 CreateVirtioNet(virtio_devices::net::Error), 156 157 /// Cannot create virtio-console device 158 CreateVirtioConsole(io::Error), 159 160 /// Cannot create virtio-rng device 161 CreateVirtioRng(io::Error), 162 163 /// Cannot create virtio-fs device 164 CreateVirtioFs(virtio_devices::vhost_user::Error), 165 166 /// Virtio-fs device was created without a socket. 167 NoVirtioFsSock, 168 169 /// Cannot create vhost-user-blk device 170 CreateVhostUserBlk(virtio_devices::vhost_user::Error), 171 172 /// Cannot create virtio-pmem device 173 CreateVirtioPmem(io::Error), 174 175 /// Cannot create vDPA device 176 CreateVdpa(virtio_devices::vdpa::Error), 177 178 /// Cannot create virtio-vsock device 179 CreateVirtioVsock(io::Error), 180 181 /// Cannot create tpm device 182 CreateTpmDevice(anyhow::Error), 183 184 /// Failed to convert Path to &str for the vDPA device. 185 CreateVdpaConvertPath, 186 187 /// Failed to convert Path to &str for the virtio-vsock device. 188 CreateVsockConvertPath, 189 190 /// Cannot create virtio-vsock backend 191 CreateVsockBackend(virtio_devices::vsock::VsockUnixError), 192 193 /// Cannot create virtio-iommu device 194 CreateVirtioIommu(io::Error), 195 196 /// Cannot create virtio-balloon device 197 CreateVirtioBalloon(io::Error), 198 199 /// Cannot create pvmemcontrol device 200 #[cfg(feature = "pvmemcontrol")] 201 CreatePvmemcontrol(io::Error), 202 203 /// Cannot create virtio-watchdog device 204 CreateVirtioWatchdog(io::Error), 205 206 /// Failed to parse disk image format 207 DetectImageType(io::Error), 208 209 /// Cannot open qcow disk path 210 QcowDeviceCreate(qcow::Error), 211 212 /// Cannot create serial manager 213 CreateSerialManager(SerialManagerError), 214 215 /// Cannot spawn the serial manager thread 216 SpawnSerialManager(SerialManagerError), 217 218 /// Cannot open tap interface 219 OpenTap(net_util::TapError), 220 221 /// Cannot allocate IRQ. 222 AllocateIrq, 223 224 /// Cannot configure the IRQ. 225 Irq(vmm_sys_util::errno::Error), 226 227 /// Cannot allocate PCI BARs 228 AllocateBars(pci::PciDeviceError), 229 230 /// Could not free the BARs associated with a PCI device. 231 FreePciBars(pci::PciDeviceError), 232 233 /// Cannot register ioevent. 234 RegisterIoevent(anyhow::Error), 235 236 /// Cannot unregister ioevent. 237 UnRegisterIoevent(anyhow::Error), 238 239 /// Cannot create virtio device 240 VirtioDevice(virtio_devices::transport::VirtioPciDeviceError), 241 242 /// Cannot add PCI device 243 AddPciDevice(pci::PciRootError), 244 245 /// Cannot open persistent memory file 246 PmemFileOpen(io::Error), 247 248 /// Cannot set persistent memory file size 249 PmemFileSetLen(io::Error), 250 251 /// Cannot find a memory range for persistent memory 252 PmemRangeAllocation, 253 254 /// Cannot find a memory range for virtio-fs 255 FsRangeAllocation, 256 257 /// Error creating serial output file 258 SerialOutputFileOpen(io::Error), 259 260 #[cfg(target_arch = "x86_64")] 261 /// Error creating debug-console output file 262 DebugconOutputFileOpen(io::Error), 263 264 /// Error creating console output file 265 ConsoleOutputFileOpen(io::Error), 266 267 /// Error creating serial pty 268 SerialPtyOpen(io::Error), 269 270 /// Error creating console pty 271 ConsolePtyOpen(io::Error), 272 273 /// Error creating console pty 274 DebugconPtyOpen(io::Error), 275 276 /// Error setting pty raw mode 277 SetPtyRaw(ConsoleDeviceError), 278 279 /// Error getting pty peer 280 GetPtyPeer(vmm_sys_util::errno::Error), 281 282 /// Cannot create a VFIO device 283 VfioCreate(vfio_ioctls::VfioError), 284 285 /// Cannot create a VFIO PCI device 286 VfioPciCreate(pci::VfioPciError), 287 288 /// Failed to map VFIO MMIO region. 289 VfioMapRegion(pci::VfioPciError), 290 291 /// Failed to DMA map VFIO device. 292 VfioDmaMap(vfio_ioctls::VfioError), 293 294 /// Failed to DMA unmap VFIO device. 295 VfioDmaUnmap(pci::VfioPciError), 296 297 /// Failed to create the passthrough device. 298 CreatePassthroughDevice(anyhow::Error), 299 300 /// Failed to memory map. 301 Mmap(io::Error), 302 303 /// Cannot add legacy device to Bus. 304 BusError(vm_device::BusError), 305 306 /// Failed to allocate IO port 307 AllocateIoPort, 308 309 /// Failed to allocate MMIO address 310 AllocateMmioAddress, 311 312 /// Failed to make hotplug notification 313 HotPlugNotification(io::Error), 314 315 /// Error from a memory manager operation 316 MemoryManager(MemoryManagerError), 317 318 /// Failed to create new interrupt source group. 319 CreateInterruptGroup(io::Error), 320 321 /// Failed to update interrupt source group. 322 UpdateInterruptGroup(io::Error), 323 324 /// Failed to create interrupt controller. 325 CreateInterruptController(interrupt_controller::Error), 326 327 /// Failed to create a new MmapRegion instance. 328 NewMmapRegion(vm_memory::mmap::MmapRegionError), 329 330 /// Failed to clone a File. 331 CloneFile(io::Error), 332 333 /// Failed to create socket file 334 CreateSocketFile(io::Error), 335 336 /// Failed to spawn the network backend 337 SpawnNetBackend(io::Error), 338 339 /// Failed to spawn the block backend 340 SpawnBlockBackend(io::Error), 341 342 /// Missing PCI bus. 343 NoPciBus, 344 345 /// Could not find an available device name. 346 NoAvailableDeviceName, 347 348 /// Missing PCI device. 349 MissingPciDevice, 350 351 /// Failed to remove a PCI device from the PCI bus. 352 RemoveDeviceFromPciBus(pci::PciRootError), 353 354 /// Failed to remove a bus device from the IO bus. 355 RemoveDeviceFromIoBus(vm_device::BusError), 356 357 /// Failed to remove a bus device from the MMIO bus. 358 RemoveDeviceFromMmioBus(vm_device::BusError), 359 360 /// Failed to find the device corresponding to a specific PCI b/d/f. 361 UnknownPciBdf(u32), 362 363 /// Not allowed to remove this type of device from the VM. 364 RemovalNotAllowed(vm_virtio::VirtioDeviceType), 365 366 /// Failed to find device corresponding to the given identifier. 367 UnknownDeviceId(String), 368 369 /// Failed to find an available PCI device ID. 370 NextPciDeviceId(pci::PciRootError), 371 372 /// Could not reserve the PCI device ID. 373 GetPciDeviceId(pci::PciRootError), 374 375 /// Could not give the PCI device ID back. 376 PutPciDeviceId(pci::PciRootError), 377 378 /// No disk path was specified when one was expected 379 NoDiskPath, 380 381 /// Failed to update guest memory for virtio device. 382 UpdateMemoryForVirtioDevice(virtio_devices::Error), 383 384 /// Cannot create virtio-mem device 385 CreateVirtioMem(io::Error), 386 387 /// Cannot find a memory range for virtio-mem memory 388 VirtioMemRangeAllocation, 389 390 /// Failed to update guest memory for VFIO PCI device. 391 UpdateMemoryForVfioPciDevice(vfio_ioctls::VfioError), 392 393 /// Trying to use a directory for pmem but no size specified 394 PmemWithDirectorySizeMissing, 395 396 /// Trying to use a size that is not multiple of 2MiB 397 PmemSizeNotAligned, 398 399 /// Could not find the node in the device tree. 400 MissingNode, 401 402 /// Resource was already found. 403 ResourceAlreadyExists, 404 405 /// Expected resources for virtio-pmem could not be found. 406 MissingVirtioPmemResources, 407 408 /// Missing PCI b/d/f from the DeviceNode. 409 MissingDeviceNodePciBdf, 410 411 /// No support for device passthrough 412 NoDevicePassthroughSupport, 413 414 /// No socket option support for console device 415 NoSocketOptionSupportForConsoleDevice, 416 417 /// Failed to resize virtio-balloon 418 VirtioBalloonResize(virtio_devices::balloon::Error), 419 420 /// Missing virtio-balloon, can't proceed as expected. 421 MissingVirtioBalloon, 422 423 /// Missing virtual IOMMU device 424 MissingVirtualIommu, 425 426 /// Failed to do power button notification 427 PowerButtonNotification(io::Error), 428 429 /// Failed to do AArch64 GPIO power button notification 430 #[cfg(target_arch = "aarch64")] 431 AArch64PowerButtonNotification(devices::legacy::GpioDeviceError), 432 433 /// Failed to set O_DIRECT flag to file descriptor 434 SetDirectIo, 435 436 /// Failed to create FixedVhdDiskAsync 437 CreateFixedVhdDiskAsync(io::Error), 438 439 /// Failed to create FixedVhdDiskSync 440 CreateFixedVhdDiskSync(io::Error), 441 442 /// Failed to create QcowDiskSync 443 CreateQcowDiskSync(qcow::Error), 444 445 /// Failed to create FixedVhdxDiskSync 446 CreateFixedVhdxDiskSync(vhdx::VhdxError), 447 448 /// Failed to add DMA mapping handler to virtio-mem device. 449 AddDmaMappingHandlerVirtioMem(virtio_devices::mem::Error), 450 451 /// Failed to remove DMA mapping handler from virtio-mem device. 452 RemoveDmaMappingHandlerVirtioMem(virtio_devices::mem::Error), 453 454 /// Failed to create vfio-user client 455 VfioUserCreateClient(vfio_user::Error), 456 457 /// Failed to create VFIO user device 458 VfioUserCreate(VfioUserPciDeviceError), 459 460 /// Failed to map region from VFIO user device into guest 461 VfioUserMapRegion(VfioUserPciDeviceError), 462 463 /// Failed to DMA map VFIO user device. 464 VfioUserDmaMap(VfioUserPciDeviceError), 465 466 /// Failed to DMA unmap VFIO user device. 467 VfioUserDmaUnmap(VfioUserPciDeviceError), 468 469 /// Failed to update memory mappings for VFIO user device 470 UpdateMemoryForVfioUserPciDevice(VfioUserPciDeviceError), 471 472 /// Cannot duplicate file descriptor 473 DupFd(vmm_sys_util::errno::Error), 474 475 /// Failed to DMA map virtio device. 476 VirtioDmaMap(std::io::Error), 477 478 /// Failed to DMA unmap virtio device. 479 VirtioDmaUnmap(std::io::Error), 480 481 /// Cannot hotplug device behind vIOMMU 482 InvalidIommuHotplug, 483 484 /// Invalid identifier as it is not unique. 485 IdentifierNotUnique(String), 486 487 /// Invalid identifier 488 InvalidIdentifier(String), 489 490 /// Error activating virtio device 491 VirtioActivate(ActivateError), 492 493 /// Failed retrieving device state from snapshot 494 RestoreGetState(MigratableError), 495 496 /// Cannot create a PvPanic device 497 PvPanicCreate(devices::pvpanic::PvPanicError), 498 499 /// Cannot create a RateLimiterGroup 500 RateLimiterGroupCreate(rate_limiter::group::Error), 501 502 /// Cannot start sigwinch listener 503 StartSigwinchListener(std::io::Error), 504 505 // Invalid console info 506 InvalidConsoleInfo, 507 508 // Invalid console fd 509 InvalidConsoleFd, 510 } 511 512 pub type DeviceManagerResult<T> = result::Result<T, DeviceManagerError>; 513 514 const DEVICE_MANAGER_ACPI_SIZE: usize = 0x10; 515 516 #[derive(Default)] 517 pub struct Console { 518 console_resizer: Option<Arc<virtio_devices::ConsoleResizer>>, 519 } 520 521 impl Console { 522 pub fn need_resize(&self) -> bool { 523 if let Some(_resizer) = self.console_resizer.as_ref() { 524 return true; 525 } 526 527 false 528 } 529 530 pub fn update_console_size(&self) { 531 if let Some(resizer) = self.console_resizer.as_ref() { 532 resizer.update_console_size() 533 } 534 } 535 } 536 537 pub(crate) struct AddressManager { 538 pub(crate) allocator: Arc<Mutex<SystemAllocator>>, 539 #[cfg(target_arch = "x86_64")] 540 pub(crate) io_bus: Arc<Bus>, 541 pub(crate) mmio_bus: Arc<Bus>, 542 pub(crate) vm: Arc<dyn hypervisor::Vm>, 543 device_tree: Arc<Mutex<DeviceTree>>, 544 pci_mmio32_allocators: Vec<Arc<Mutex<AddressAllocator>>>, 545 pci_mmio64_allocators: Vec<Arc<Mutex<AddressAllocator>>>, 546 } 547 548 impl DeviceRelocation for AddressManager { 549 fn move_bar( 550 &self, 551 old_base: u64, 552 new_base: u64, 553 len: u64, 554 pci_dev: &mut dyn PciDevice, 555 region_type: PciBarRegionType, 556 ) -> std::result::Result<(), std::io::Error> { 557 match region_type { 558 PciBarRegionType::IoRegion => { 559 #[cfg(target_arch = "x86_64")] 560 { 561 // Update system allocator 562 self.allocator 563 .lock() 564 .unwrap() 565 .free_io_addresses(GuestAddress(old_base), len as GuestUsize); 566 567 self.allocator 568 .lock() 569 .unwrap() 570 .allocate_io_addresses( 571 Some(GuestAddress(new_base)), 572 len as GuestUsize, 573 None, 574 ) 575 .ok_or_else(|| { 576 io::Error::new(io::ErrorKind::Other, "failed allocating new IO range") 577 })?; 578 579 // Update PIO bus 580 self.io_bus 581 .update_range(old_base, len, new_base, len) 582 .map_err(|e| io::Error::new(io::ErrorKind::Other, e))?; 583 } 584 #[cfg(target_arch = "aarch64")] 585 error!("I/O region is not supported"); 586 } 587 PciBarRegionType::Memory32BitRegion | PciBarRegionType::Memory64BitRegion => { 588 let allocators = if region_type == PciBarRegionType::Memory32BitRegion { 589 &self.pci_mmio32_allocators 590 } else { 591 &self.pci_mmio64_allocators 592 }; 593 594 // Find the specific allocator that this BAR was allocated from and use it for new one 595 for allocator in allocators { 596 let allocator_base = allocator.lock().unwrap().base(); 597 let allocator_end = allocator.lock().unwrap().end(); 598 599 if old_base >= allocator_base.0 && old_base <= allocator_end.0 { 600 allocator 601 .lock() 602 .unwrap() 603 .free(GuestAddress(old_base), len as GuestUsize); 604 605 allocator 606 .lock() 607 .unwrap() 608 .allocate(Some(GuestAddress(new_base)), len as GuestUsize, Some(len)) 609 .ok_or_else(|| { 610 io::Error::new( 611 io::ErrorKind::Other, 612 "failed allocating new MMIO range", 613 ) 614 })?; 615 616 break; 617 } 618 } 619 620 // Update MMIO bus 621 self.mmio_bus 622 .update_range(old_base, len, new_base, len) 623 .map_err(|e| io::Error::new(io::ErrorKind::Other, e))?; 624 } 625 } 626 627 // Update the device_tree resources associated with the device 628 if let Some(id) = pci_dev.id() { 629 if let Some(node) = self.device_tree.lock().unwrap().get_mut(&id) { 630 let mut resource_updated = false; 631 for resource in node.resources.iter_mut() { 632 if let Resource::PciBar { base, type_, .. } = resource { 633 if PciBarRegionType::from(*type_) == region_type && *base == old_base { 634 *base = new_base; 635 resource_updated = true; 636 break; 637 } 638 } 639 } 640 641 if !resource_updated { 642 return Err(io::Error::new( 643 io::ErrorKind::Other, 644 format!( 645 "Couldn't find a resource with base 0x{old_base:x} for device {id}" 646 ), 647 )); 648 } 649 } else { 650 return Err(io::Error::new( 651 io::ErrorKind::Other, 652 format!("Couldn't find device {id} from device tree"), 653 )); 654 } 655 } 656 657 let any_dev = pci_dev.as_any(); 658 if let Some(virtio_pci_dev) = any_dev.downcast_ref::<VirtioPciDevice>() { 659 let bar_addr = virtio_pci_dev.config_bar_addr(); 660 if bar_addr == new_base { 661 for (event, addr) in virtio_pci_dev.ioeventfds(old_base) { 662 let io_addr = IoEventAddress::Mmio(addr); 663 self.vm.unregister_ioevent(event, &io_addr).map_err(|e| { 664 io::Error::new( 665 io::ErrorKind::Other, 666 format!("failed to unregister ioevent: {e:?}"), 667 ) 668 })?; 669 } 670 for (event, addr) in virtio_pci_dev.ioeventfds(new_base) { 671 let io_addr = IoEventAddress::Mmio(addr); 672 self.vm 673 .register_ioevent(event, &io_addr, None) 674 .map_err(|e| { 675 io::Error::new( 676 io::ErrorKind::Other, 677 format!("failed to register ioevent: {e:?}"), 678 ) 679 })?; 680 } 681 } else { 682 let virtio_dev = virtio_pci_dev.virtio_device(); 683 let mut virtio_dev = virtio_dev.lock().unwrap(); 684 if let Some(mut shm_regions) = virtio_dev.get_shm_regions() { 685 if shm_regions.addr.raw_value() == old_base { 686 let mem_region = self.vm.make_user_memory_region( 687 shm_regions.mem_slot, 688 old_base, 689 shm_regions.len, 690 shm_regions.host_addr, 691 false, 692 false, 693 ); 694 695 self.vm.remove_user_memory_region(mem_region).map_err(|e| { 696 io::Error::new( 697 io::ErrorKind::Other, 698 format!("failed to remove user memory region: {e:?}"), 699 ) 700 })?; 701 702 // Create new mapping by inserting new region to KVM. 703 let mem_region = self.vm.make_user_memory_region( 704 shm_regions.mem_slot, 705 new_base, 706 shm_regions.len, 707 shm_regions.host_addr, 708 false, 709 false, 710 ); 711 712 self.vm.create_user_memory_region(mem_region).map_err(|e| { 713 io::Error::new( 714 io::ErrorKind::Other, 715 format!("failed to create user memory regions: {e:?}"), 716 ) 717 })?; 718 719 // Update shared memory regions to reflect the new mapping. 720 shm_regions.addr = GuestAddress(new_base); 721 virtio_dev.set_shm_regions(shm_regions).map_err(|e| { 722 io::Error::new( 723 io::ErrorKind::Other, 724 format!("failed to update shared memory regions: {e:?}"), 725 ) 726 })?; 727 } 728 } 729 } 730 } 731 732 pci_dev.move_bar(old_base, new_base) 733 } 734 } 735 736 #[derive(Serialize, Deserialize)] 737 struct DeviceManagerState { 738 device_tree: DeviceTree, 739 device_id_cnt: Wrapping<usize>, 740 } 741 742 #[derive(Debug)] 743 pub struct PtyPair { 744 pub main: File, 745 pub path: PathBuf, 746 } 747 748 impl Clone for PtyPair { 749 fn clone(&self) -> Self { 750 PtyPair { 751 main: self.main.try_clone().unwrap(), 752 path: self.path.clone(), 753 } 754 } 755 } 756 757 #[derive(Clone)] 758 pub enum PciDeviceHandle { 759 Vfio(Arc<Mutex<VfioPciDevice>>), 760 Virtio(Arc<Mutex<VirtioPciDevice>>), 761 VfioUser(Arc<Mutex<VfioUserPciDevice>>), 762 } 763 764 #[derive(Clone)] 765 struct MetaVirtioDevice { 766 virtio_device: Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 767 iommu: bool, 768 id: String, 769 pci_segment: u16, 770 dma_handler: Option<Arc<dyn ExternalDmaMapping>>, 771 } 772 773 #[derive(Default)] 774 pub struct AcpiPlatformAddresses { 775 pub pm_timer_address: Option<GenericAddress>, 776 pub reset_reg_address: Option<GenericAddress>, 777 pub sleep_control_reg_address: Option<GenericAddress>, 778 pub sleep_status_reg_address: Option<GenericAddress>, 779 } 780 781 #[cfg(all(feature = "mshv", feature = "sev_snp"))] 782 struct SevSnpPageAccessProxy { 783 vm: Arc<dyn hypervisor::Vm>, 784 } 785 786 #[cfg(all(feature = "mshv", feature = "sev_snp"))] 787 impl std::fmt::Debug for SevSnpPageAccessProxy { 788 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 789 write!(f, "SNP Page access proxy") 790 } 791 } 792 793 #[cfg(all(feature = "mshv", feature = "sev_snp"))] 794 impl SevSnpPageAccessProxy { 795 fn new(vm: Arc<dyn hypervisor::Vm>) -> SevSnpPageAccessProxy { 796 SevSnpPageAccessProxy { vm } 797 } 798 } 799 800 #[cfg(all(feature = "mshv", feature = "sev_snp"))] 801 impl AccessPlatform for SevSnpPageAccessProxy { 802 fn translate_gpa(&self, base: u64, _size: u64) -> std::result::Result<u64, std::io::Error> { 803 Ok(base) 804 } 805 806 fn translate_gva(&self, base: u64, size: u64) -> std::result::Result<u64, std::io::Error> { 807 self.vm 808 .gain_page_access(base, size as u32) 809 .map_err(|e| io::Error::new(io::ErrorKind::Other, e))?; 810 Ok(base) 811 } 812 } 813 814 pub struct DeviceManager { 815 // Manage address space related to devices 816 address_manager: Arc<AddressManager>, 817 818 // Console abstraction 819 console: Arc<Console>, 820 821 // Serial Manager 822 serial_manager: Option<Arc<SerialManager>>, 823 824 // pty foreground status, 825 console_resize_pipe: Option<Arc<File>>, 826 827 // To restore on exit. 828 original_termios_opt: Arc<Mutex<Option<termios>>>, 829 830 // Interrupt controller 831 #[cfg(target_arch = "x86_64")] 832 interrupt_controller: Option<Arc<Mutex<ioapic::Ioapic>>>, 833 #[cfg(target_arch = "aarch64")] 834 interrupt_controller: Option<Arc<Mutex<gic::Gic>>>, 835 836 // Things to be added to the commandline (e.g. aarch64 early console) 837 #[cfg(target_arch = "aarch64")] 838 cmdline_additions: Vec<String>, 839 840 // ACPI GED notification device 841 ged_notification_device: Option<Arc<Mutex<devices::AcpiGedDevice>>>, 842 843 // VM configuration 844 config: Arc<Mutex<VmConfig>>, 845 846 // Memory Manager 847 memory_manager: Arc<Mutex<MemoryManager>>, 848 849 // CPU Manager 850 cpu_manager: Arc<Mutex<CpuManager>>, 851 852 // The virtio devices on the system 853 virtio_devices: Vec<MetaVirtioDevice>, 854 855 // List of bus devices 856 // Let the DeviceManager keep strong references to the BusDevice devices. 857 // This allows the IO and MMIO buses to be provided with Weak references, 858 // which prevents cyclic dependencies. 859 bus_devices: Vec<Arc<dyn BusDeviceSync>>, 860 861 // Counter to keep track of the consumed device IDs. 862 device_id_cnt: Wrapping<usize>, 863 864 pci_segments: Vec<PciSegment>, 865 866 #[cfg_attr(target_arch = "aarch64", allow(dead_code))] 867 // MSI Interrupt Manager 868 msi_interrupt_manager: Arc<dyn InterruptManager<GroupConfig = MsiIrqGroupConfig>>, 869 870 #[cfg_attr(feature = "mshv", allow(dead_code))] 871 // Legacy Interrupt Manager 872 legacy_interrupt_manager: Option<Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>>, 873 874 // Passthrough device handle 875 passthrough_device: Option<VfioDeviceFd>, 876 877 // VFIO container 878 // Only one container can be created, therefore it is stored as part of the 879 // DeviceManager to be reused. 880 vfio_container: Option<Arc<VfioContainer>>, 881 882 // Paravirtualized IOMMU 883 iommu_device: Option<Arc<Mutex<virtio_devices::Iommu>>>, 884 iommu_mapping: Option<Arc<IommuMapping>>, 885 886 // PCI information about devices attached to the paravirtualized IOMMU 887 // It contains the virtual IOMMU PCI BDF along with the list of PCI BDF 888 // representing the devices attached to the virtual IOMMU. This is useful 889 // information for filling the ACPI VIOT table. 890 iommu_attached_devices: Option<(PciBdf, Vec<PciBdf>)>, 891 892 // Tree of devices, representing the dependencies between devices. 893 // Useful for introspection, snapshot and restore. 894 device_tree: Arc<Mutex<DeviceTree>>, 895 896 // Exit event 897 exit_evt: EventFd, 898 reset_evt: EventFd, 899 900 #[cfg(target_arch = "aarch64")] 901 id_to_dev_info: HashMap<(DeviceType, String), MmioDeviceInfo>, 902 903 // seccomp action 904 seccomp_action: SeccompAction, 905 906 // List of guest NUMA nodes. 907 numa_nodes: NumaNodes, 908 909 // Possible handle to the virtio-balloon device 910 balloon: Option<Arc<Mutex<virtio_devices::Balloon>>>, 911 912 // Virtio Device activation EventFd to allow the VMM thread to trigger device 913 // activation and thus start the threads from the VMM thread 914 activate_evt: EventFd, 915 916 acpi_address: GuestAddress, 917 918 selected_segment: usize, 919 920 // Possible handle to the virtio-mem device 921 virtio_mem_devices: Vec<Arc<Mutex<virtio_devices::Mem>>>, 922 923 #[cfg(target_arch = "aarch64")] 924 // GPIO device for AArch64 925 gpio_device: Option<Arc<Mutex<devices::legacy::Gpio>>>, 926 927 #[cfg(feature = "pvmemcontrol")] 928 pvmemcontrol_devices: Option<( 929 Arc<PvmemcontrolBusDevice>, 930 Arc<Mutex<PvmemcontrolPciDevice>>, 931 )>, 932 933 // pvpanic device 934 pvpanic_device: Option<Arc<Mutex<devices::PvPanicDevice>>>, 935 936 // Flag to force setting the iommu on virtio devices 937 force_iommu: bool, 938 939 // io_uring availability if detected 940 io_uring_supported: Option<bool>, 941 942 // aio availability if detected 943 aio_supported: Option<bool>, 944 945 // List of unique identifiers provided at boot through the configuration. 946 boot_id_list: BTreeSet<String>, 947 948 // Start time of the VM 949 timestamp: Instant, 950 951 // Pending activations 952 pending_activations: Arc<Mutex<Vec<VirtioPciDeviceActivator>>>, 953 954 // Addresses for ACPI platform devices e.g. ACPI PM timer, sleep/reset registers 955 acpi_platform_addresses: AcpiPlatformAddresses, 956 957 snapshot: Option<Snapshot>, 958 959 rate_limit_groups: HashMap<String, Arc<RateLimiterGroup>>, 960 961 mmio_regions: Arc<Mutex<Vec<MmioRegion>>>, 962 } 963 964 fn create_mmio_allocators( 965 start: u64, 966 end: u64, 967 num_pci_segments: u16, 968 weights: Vec<u32>, 969 alignment: u64, 970 ) -> Vec<Arc<Mutex<AddressAllocator>>> { 971 let total_weight: u32 = weights.iter().sum(); 972 973 // Start each PCI segment mmio range on an aligned boundary 974 let pci_segment_mmio_size = (end - start + 1) / (alignment * total_weight as u64) * alignment; 975 976 let mut mmio_allocators = vec![]; 977 let mut i = 0; 978 for segment_id in 0..num_pci_segments as u64 { 979 let weight = weights[segment_id as usize] as u64; 980 let mmio_start = start + i * pci_segment_mmio_size; 981 let mmio_size = pci_segment_mmio_size * weight; 982 let allocator = Arc::new(Mutex::new( 983 AddressAllocator::new(GuestAddress(mmio_start), mmio_size).unwrap(), 984 )); 985 mmio_allocators.push(allocator); 986 i += weight; 987 } 988 989 mmio_allocators 990 } 991 992 impl DeviceManager { 993 #[allow(clippy::too_many_arguments)] 994 pub fn new( 995 #[cfg(target_arch = "x86_64")] io_bus: Arc<Bus>, 996 mmio_bus: Arc<Bus>, 997 vm: Arc<dyn hypervisor::Vm>, 998 config: Arc<Mutex<VmConfig>>, 999 memory_manager: Arc<Mutex<MemoryManager>>, 1000 cpu_manager: Arc<Mutex<CpuManager>>, 1001 exit_evt: EventFd, 1002 reset_evt: EventFd, 1003 seccomp_action: SeccompAction, 1004 numa_nodes: NumaNodes, 1005 activate_evt: &EventFd, 1006 force_iommu: bool, 1007 boot_id_list: BTreeSet<String>, 1008 timestamp: Instant, 1009 snapshot: Option<Snapshot>, 1010 dynamic: bool, 1011 ) -> DeviceManagerResult<Arc<Mutex<Self>>> { 1012 trace_scoped!("DeviceManager::new"); 1013 1014 let (device_tree, device_id_cnt) = if let Some(snapshot) = snapshot.as_ref() { 1015 let state: DeviceManagerState = snapshot.to_state().unwrap(); 1016 ( 1017 Arc::new(Mutex::new(state.device_tree.clone())), 1018 state.device_id_cnt, 1019 ) 1020 } else { 1021 (Arc::new(Mutex::new(DeviceTree::new())), Wrapping(0)) 1022 }; 1023 1024 let num_pci_segments = 1025 if let Some(platform_config) = config.lock().unwrap().platform.as_ref() { 1026 platform_config.num_pci_segments 1027 } else { 1028 1 1029 }; 1030 1031 let mut mmio32_aperture_weights: Vec<u32> = 1032 std::iter::repeat(DEFAULT_PCI_SEGMENT_APERTURE_WEIGHT) 1033 .take(num_pci_segments.into()) 1034 .collect(); 1035 if let Some(pci_segments) = &config.lock().unwrap().pci_segments { 1036 for pci_segment in pci_segments.iter() { 1037 mmio32_aperture_weights[pci_segment.pci_segment as usize] = 1038 pci_segment.mmio32_aperture_weight 1039 } 1040 } 1041 1042 let start_of_mmio32_area = layout::MEM_32BIT_DEVICES_START.0; 1043 let end_of_mmio32_area = layout::MEM_32BIT_DEVICES_START.0 + layout::MEM_32BIT_DEVICES_SIZE; 1044 let pci_mmio32_allocators = create_mmio_allocators( 1045 start_of_mmio32_area, 1046 end_of_mmio32_area, 1047 num_pci_segments, 1048 mmio32_aperture_weights, 1049 4 << 10, 1050 ); 1051 1052 let mut mmio64_aperture_weights: Vec<u32> = 1053 std::iter::repeat(DEFAULT_PCI_SEGMENT_APERTURE_WEIGHT) 1054 .take(num_pci_segments.into()) 1055 .collect(); 1056 if let Some(pci_segments) = &config.lock().unwrap().pci_segments { 1057 for pci_segment in pci_segments.iter() { 1058 mmio64_aperture_weights[pci_segment.pci_segment as usize] = 1059 pci_segment.mmio64_aperture_weight 1060 } 1061 } 1062 1063 let start_of_mmio64_area = memory_manager.lock().unwrap().start_of_device_area().0; 1064 let end_of_mmio64_area = memory_manager.lock().unwrap().end_of_device_area().0; 1065 let pci_mmio64_allocators = create_mmio_allocators( 1066 start_of_mmio64_area, 1067 end_of_mmio64_area, 1068 num_pci_segments, 1069 mmio64_aperture_weights, 1070 4 << 30, 1071 ); 1072 1073 let address_manager = Arc::new(AddressManager { 1074 allocator: memory_manager.lock().unwrap().allocator(), 1075 #[cfg(target_arch = "x86_64")] 1076 io_bus, 1077 mmio_bus, 1078 vm: vm.clone(), 1079 device_tree: Arc::clone(&device_tree), 1080 pci_mmio32_allocators, 1081 pci_mmio64_allocators, 1082 }); 1083 1084 // First we create the MSI interrupt manager, the legacy one is created 1085 // later, after the IOAPIC device creation. 1086 // The reason we create the MSI one first is because the IOAPIC needs it, 1087 // and then the legacy interrupt manager needs an IOAPIC. So we're 1088 // handling a linear dependency chain: 1089 // msi_interrupt_manager <- IOAPIC <- legacy_interrupt_manager. 1090 let msi_interrupt_manager: Arc<dyn InterruptManager<GroupConfig = MsiIrqGroupConfig>> = 1091 Arc::new(MsiInterruptManager::new( 1092 Arc::clone(&address_manager.allocator), 1093 vm, 1094 )); 1095 1096 let acpi_address = address_manager 1097 .allocator 1098 .lock() 1099 .unwrap() 1100 .allocate_platform_mmio_addresses(None, DEVICE_MANAGER_ACPI_SIZE as u64, None) 1101 .ok_or(DeviceManagerError::AllocateIoPort)?; 1102 1103 let mut pci_irq_slots = [0; 32]; 1104 PciSegment::reserve_legacy_interrupts_for_pci_devices( 1105 &address_manager, 1106 &mut pci_irq_slots, 1107 )?; 1108 1109 let mut pci_segments = vec![PciSegment::new_default_segment( 1110 &address_manager, 1111 Arc::clone(&address_manager.pci_mmio32_allocators[0]), 1112 Arc::clone(&address_manager.pci_mmio64_allocators[0]), 1113 &pci_irq_slots, 1114 )?]; 1115 1116 for i in 1..num_pci_segments as usize { 1117 pci_segments.push(PciSegment::new( 1118 i as u16, 1119 numa_node_id_from_pci_segment_id(&numa_nodes, i as u16), 1120 &address_manager, 1121 Arc::clone(&address_manager.pci_mmio32_allocators[i]), 1122 Arc::clone(&address_manager.pci_mmio64_allocators[i]), 1123 &pci_irq_slots, 1124 )?); 1125 } 1126 1127 if dynamic { 1128 let acpi_address = address_manager 1129 .allocator 1130 .lock() 1131 .unwrap() 1132 .allocate_platform_mmio_addresses(None, CPU_MANAGER_ACPI_SIZE as u64, None) 1133 .ok_or(DeviceManagerError::AllocateMmioAddress)?; 1134 1135 address_manager 1136 .mmio_bus 1137 .insert( 1138 cpu_manager.clone(), 1139 acpi_address.0, 1140 CPU_MANAGER_ACPI_SIZE as u64, 1141 ) 1142 .map_err(DeviceManagerError::BusError)?; 1143 1144 cpu_manager.lock().unwrap().set_acpi_address(acpi_address); 1145 } 1146 1147 let mut rate_limit_groups = HashMap::<String, Arc<RateLimiterGroup>>::new(); 1148 if let Some(rate_limit_groups_cfg) = config.lock().unwrap().rate_limit_groups.as_ref() { 1149 for rate_limit_group_cfg in rate_limit_groups_cfg { 1150 let rate_limit_cfg = rate_limit_group_cfg.rate_limiter_config; 1151 let bw = rate_limit_cfg.bandwidth.unwrap_or_default(); 1152 let ops = rate_limit_cfg.ops.unwrap_or_default(); 1153 let mut rate_limit_group = RateLimiterGroup::new( 1154 &rate_limit_group_cfg.id, 1155 bw.size, 1156 bw.one_time_burst.unwrap_or(0), 1157 bw.refill_time, 1158 ops.size, 1159 ops.one_time_burst.unwrap_or(0), 1160 ops.refill_time, 1161 ) 1162 .map_err(DeviceManagerError::RateLimiterGroupCreate)?; 1163 1164 let exit_evt = exit_evt.try_clone().map_err(DeviceManagerError::EventFd)?; 1165 1166 rate_limit_group.start_thread(exit_evt).unwrap(); 1167 rate_limit_groups 1168 .insert(rate_limit_group_cfg.id.clone(), Arc::new(rate_limit_group)); 1169 } 1170 } 1171 1172 let device_manager = DeviceManager { 1173 address_manager: Arc::clone(&address_manager), 1174 console: Arc::new(Console::default()), 1175 interrupt_controller: None, 1176 #[cfg(target_arch = "aarch64")] 1177 cmdline_additions: Vec::new(), 1178 ged_notification_device: None, 1179 config, 1180 memory_manager, 1181 cpu_manager, 1182 virtio_devices: Vec::new(), 1183 bus_devices: Vec::new(), 1184 device_id_cnt, 1185 msi_interrupt_manager, 1186 legacy_interrupt_manager: None, 1187 passthrough_device: None, 1188 vfio_container: None, 1189 iommu_device: None, 1190 iommu_mapping: None, 1191 iommu_attached_devices: None, 1192 pci_segments, 1193 device_tree, 1194 exit_evt, 1195 reset_evt, 1196 #[cfg(target_arch = "aarch64")] 1197 id_to_dev_info: HashMap::new(), 1198 seccomp_action, 1199 numa_nodes, 1200 balloon: None, 1201 activate_evt: activate_evt 1202 .try_clone() 1203 .map_err(DeviceManagerError::EventFd)?, 1204 acpi_address, 1205 selected_segment: 0, 1206 serial_manager: None, 1207 console_resize_pipe: None, 1208 original_termios_opt: Arc::new(Mutex::new(None)), 1209 virtio_mem_devices: Vec::new(), 1210 #[cfg(target_arch = "aarch64")] 1211 gpio_device: None, 1212 #[cfg(feature = "pvmemcontrol")] 1213 pvmemcontrol_devices: None, 1214 pvpanic_device: None, 1215 force_iommu, 1216 io_uring_supported: None, 1217 aio_supported: None, 1218 boot_id_list, 1219 timestamp, 1220 pending_activations: Arc::new(Mutex::new(Vec::default())), 1221 acpi_platform_addresses: AcpiPlatformAddresses::default(), 1222 snapshot, 1223 rate_limit_groups, 1224 mmio_regions: Arc::new(Mutex::new(Vec::new())), 1225 }; 1226 1227 let device_manager = Arc::new(Mutex::new(device_manager)); 1228 1229 address_manager 1230 .mmio_bus 1231 .insert( 1232 Arc::clone(&device_manager) as Arc<dyn BusDeviceSync>, 1233 acpi_address.0, 1234 DEVICE_MANAGER_ACPI_SIZE as u64, 1235 ) 1236 .map_err(DeviceManagerError::BusError)?; 1237 1238 Ok(device_manager) 1239 } 1240 1241 pub fn console_resize_pipe(&self) -> Option<Arc<File>> { 1242 self.console_resize_pipe.clone() 1243 } 1244 1245 pub fn create_devices( 1246 &mut self, 1247 console_info: Option<ConsoleInfo>, 1248 console_resize_pipe: Option<Arc<File>>, 1249 original_termios_opt: Arc<Mutex<Option<termios>>>, 1250 ) -> DeviceManagerResult<()> { 1251 trace_scoped!("create_devices"); 1252 1253 let mut virtio_devices: Vec<MetaVirtioDevice> = Vec::new(); 1254 1255 let interrupt_controller = self.add_interrupt_controller()?; 1256 1257 self.cpu_manager 1258 .lock() 1259 .unwrap() 1260 .set_interrupt_controller(interrupt_controller.clone()); 1261 1262 // Now we can create the legacy interrupt manager, which needs the freshly 1263 // formed IOAPIC device. 1264 let legacy_interrupt_manager: Arc< 1265 dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>, 1266 > = Arc::new(LegacyUserspaceInterruptManager::new(Arc::clone( 1267 &interrupt_controller, 1268 ))); 1269 1270 { 1271 if let Some(acpi_address) = self.memory_manager.lock().unwrap().acpi_address() { 1272 self.address_manager 1273 .mmio_bus 1274 .insert( 1275 Arc::clone(&self.memory_manager) as Arc<dyn BusDeviceSync>, 1276 acpi_address.0, 1277 MEMORY_MANAGER_ACPI_SIZE as u64, 1278 ) 1279 .map_err(DeviceManagerError::BusError)?; 1280 } 1281 } 1282 1283 #[cfg(target_arch = "x86_64")] 1284 self.add_legacy_devices( 1285 self.reset_evt 1286 .try_clone() 1287 .map_err(DeviceManagerError::EventFd)?, 1288 )?; 1289 1290 #[cfg(target_arch = "aarch64")] 1291 self.add_legacy_devices(&legacy_interrupt_manager)?; 1292 1293 { 1294 self.ged_notification_device = self.add_acpi_devices( 1295 &legacy_interrupt_manager, 1296 self.reset_evt 1297 .try_clone() 1298 .map_err(DeviceManagerError::EventFd)?, 1299 self.exit_evt 1300 .try_clone() 1301 .map_err(DeviceManagerError::EventFd)?, 1302 )?; 1303 } 1304 1305 self.original_termios_opt = original_termios_opt; 1306 1307 self.console = self.add_console_devices( 1308 &legacy_interrupt_manager, 1309 &mut virtio_devices, 1310 console_info, 1311 console_resize_pipe, 1312 )?; 1313 1314 if let Some(tpm) = self.config.clone().lock().unwrap().tpm.as_ref() { 1315 let tpm_dev = self.add_tpm_device(tpm.socket.clone())?; 1316 self.bus_devices 1317 .push(Arc::clone(&tpm_dev) as Arc<dyn BusDeviceSync>) 1318 } 1319 self.legacy_interrupt_manager = Some(legacy_interrupt_manager); 1320 1321 virtio_devices.append(&mut self.make_virtio_devices()?); 1322 1323 self.add_pci_devices(virtio_devices.clone())?; 1324 1325 self.virtio_devices = virtio_devices; 1326 1327 // Add pvmemcontrol if required 1328 #[cfg(feature = "pvmemcontrol")] 1329 { 1330 if self.config.lock().unwrap().pvmemcontrol.is_some() { 1331 let (pvmemcontrol_bus_device, pvmemcontrol_pci_device) = 1332 self.make_pvmemcontrol_device()?; 1333 self.pvmemcontrol_devices = 1334 Some((pvmemcontrol_bus_device, pvmemcontrol_pci_device)); 1335 } 1336 } 1337 1338 if self.config.clone().lock().unwrap().pvpanic { 1339 self.pvpanic_device = self.add_pvpanic_device()?; 1340 } 1341 1342 Ok(()) 1343 } 1344 1345 fn state(&self) -> DeviceManagerState { 1346 DeviceManagerState { 1347 device_tree: self.device_tree.lock().unwrap().clone(), 1348 device_id_cnt: self.device_id_cnt, 1349 } 1350 } 1351 1352 fn get_msi_iova_space(&mut self) -> (u64, u64) { 1353 #[cfg(target_arch = "aarch64")] 1354 { 1355 let vcpus = self.config.lock().unwrap().cpus.boot_vcpus; 1356 let vgic_config = gic::Gic::create_default_config(vcpus.into()); 1357 ( 1358 vgic_config.msi_addr, 1359 vgic_config.msi_addr + vgic_config.msi_size - 1, 1360 ) 1361 } 1362 #[cfg(target_arch = "x86_64")] 1363 (0xfee0_0000, 0xfeef_ffff) 1364 } 1365 1366 #[cfg(target_arch = "aarch64")] 1367 /// Gets the information of the devices registered up to some point in time. 1368 pub fn get_device_info(&self) -> &HashMap<(DeviceType, String), MmioDeviceInfo> { 1369 &self.id_to_dev_info 1370 } 1371 1372 #[allow(unused_variables)] 1373 fn add_pci_devices( 1374 &mut self, 1375 virtio_devices: Vec<MetaVirtioDevice>, 1376 ) -> DeviceManagerResult<()> { 1377 let iommu_id = String::from(IOMMU_DEVICE_NAME); 1378 1379 let iommu_device = if self.config.lock().unwrap().iommu { 1380 let (device, mapping) = virtio_devices::Iommu::new( 1381 iommu_id.clone(), 1382 self.seccomp_action.clone(), 1383 self.exit_evt 1384 .try_clone() 1385 .map_err(DeviceManagerError::EventFd)?, 1386 self.get_msi_iova_space(), 1387 state_from_id(self.snapshot.as_ref(), iommu_id.as_str()) 1388 .map_err(DeviceManagerError::RestoreGetState)?, 1389 ) 1390 .map_err(DeviceManagerError::CreateVirtioIommu)?; 1391 let device = Arc::new(Mutex::new(device)); 1392 self.iommu_device = Some(Arc::clone(&device)); 1393 self.iommu_mapping = Some(mapping); 1394 1395 // Fill the device tree with a new node. In case of restore, we 1396 // know there is nothing to do, so we can simply override the 1397 // existing entry. 1398 self.device_tree 1399 .lock() 1400 .unwrap() 1401 .insert(iommu_id.clone(), device_node!(iommu_id, device)); 1402 1403 Some(device) 1404 } else { 1405 None 1406 }; 1407 1408 let mut iommu_attached_devices = Vec::new(); 1409 { 1410 for handle in virtio_devices { 1411 let mapping: Option<Arc<IommuMapping>> = if handle.iommu { 1412 self.iommu_mapping.clone() 1413 } else { 1414 None 1415 }; 1416 1417 let dev_id = self.add_virtio_pci_device( 1418 handle.virtio_device, 1419 &mapping, 1420 handle.id, 1421 handle.pci_segment, 1422 handle.dma_handler, 1423 )?; 1424 1425 if handle.iommu { 1426 iommu_attached_devices.push(dev_id); 1427 } 1428 } 1429 1430 let mut vfio_iommu_device_ids = self.add_vfio_devices()?; 1431 iommu_attached_devices.append(&mut vfio_iommu_device_ids); 1432 1433 let mut vfio_user_iommu_device_ids = self.add_user_devices()?; 1434 iommu_attached_devices.append(&mut vfio_user_iommu_device_ids); 1435 1436 // Add all devices from forced iommu segments 1437 if let Some(platform_config) = self.config.lock().unwrap().platform.as_ref() { 1438 if let Some(iommu_segments) = platform_config.iommu_segments.as_ref() { 1439 for segment in iommu_segments { 1440 for device in 0..32 { 1441 let bdf = PciBdf::new(*segment, 0, device, 0); 1442 if !iommu_attached_devices.contains(&bdf) { 1443 iommu_attached_devices.push(bdf); 1444 } 1445 } 1446 } 1447 } 1448 } 1449 1450 if let Some(iommu_device) = iommu_device { 1451 let dev_id = self.add_virtio_pci_device(iommu_device, &None, iommu_id, 0, None)?; 1452 self.iommu_attached_devices = Some((dev_id, iommu_attached_devices)); 1453 } 1454 } 1455 1456 for segment in &self.pci_segments { 1457 #[cfg(target_arch = "x86_64")] 1458 if let Some(pci_config_io) = segment.pci_config_io.as_ref() { 1459 self.bus_devices 1460 .push(Arc::clone(pci_config_io) as Arc<dyn BusDeviceSync>); 1461 } 1462 1463 self.bus_devices 1464 .push(Arc::clone(&segment.pci_config_mmio) as Arc<dyn BusDeviceSync>); 1465 } 1466 1467 Ok(()) 1468 } 1469 1470 #[cfg(target_arch = "aarch64")] 1471 fn add_interrupt_controller( 1472 &mut self, 1473 ) -> DeviceManagerResult<Arc<Mutex<dyn InterruptController>>> { 1474 let interrupt_controller: Arc<Mutex<gic::Gic>> = Arc::new(Mutex::new( 1475 gic::Gic::new( 1476 self.config.lock().unwrap().cpus.boot_vcpus, 1477 Arc::clone(&self.msi_interrupt_manager), 1478 self.address_manager.vm.clone(), 1479 ) 1480 .map_err(DeviceManagerError::CreateInterruptController)?, 1481 )); 1482 1483 self.interrupt_controller = Some(interrupt_controller.clone()); 1484 1485 // Restore the vGic if this is in the process of restoration 1486 let id = String::from(gic::GIC_SNAPSHOT_ID); 1487 if let Some(vgic_snapshot) = snapshot_from_id(self.snapshot.as_ref(), &id) { 1488 // PMU support is optional. Nothing should be impacted if the PMU initialization failed. 1489 if self 1490 .cpu_manager 1491 .lock() 1492 .unwrap() 1493 .init_pmu(arch::aarch64::fdt::AARCH64_PMU_IRQ + 16) 1494 .is_err() 1495 { 1496 info!("Failed to initialize PMU"); 1497 } 1498 1499 let vgic_state = vgic_snapshot 1500 .to_state() 1501 .map_err(DeviceManagerError::RestoreGetState)?; 1502 let saved_vcpu_states = self.cpu_manager.lock().unwrap().get_saved_states(); 1503 interrupt_controller 1504 .lock() 1505 .unwrap() 1506 .restore_vgic(vgic_state, &saved_vcpu_states) 1507 .unwrap(); 1508 } 1509 1510 self.device_tree 1511 .lock() 1512 .unwrap() 1513 .insert(id.clone(), device_node!(id, interrupt_controller)); 1514 1515 Ok(interrupt_controller) 1516 } 1517 1518 #[cfg(target_arch = "aarch64")] 1519 pub fn get_interrupt_controller(&mut self) -> Option<&Arc<Mutex<gic::Gic>>> { 1520 self.interrupt_controller.as_ref() 1521 } 1522 1523 #[cfg(target_arch = "x86_64")] 1524 fn add_interrupt_controller( 1525 &mut self, 1526 ) -> DeviceManagerResult<Arc<Mutex<dyn InterruptController>>> { 1527 let id = String::from(IOAPIC_DEVICE_NAME); 1528 1529 // Create IOAPIC 1530 let interrupt_controller = Arc::new(Mutex::new( 1531 ioapic::Ioapic::new( 1532 id.clone(), 1533 APIC_START, 1534 Arc::clone(&self.msi_interrupt_manager), 1535 state_from_id(self.snapshot.as_ref(), id.as_str()) 1536 .map_err(DeviceManagerError::RestoreGetState)?, 1537 ) 1538 .map_err(DeviceManagerError::CreateInterruptController)?, 1539 )); 1540 1541 self.interrupt_controller = Some(interrupt_controller.clone()); 1542 1543 self.address_manager 1544 .mmio_bus 1545 .insert(interrupt_controller.clone(), IOAPIC_START.0, IOAPIC_SIZE) 1546 .map_err(DeviceManagerError::BusError)?; 1547 1548 self.bus_devices 1549 .push(Arc::clone(&interrupt_controller) as Arc<dyn BusDeviceSync>); 1550 1551 // Fill the device tree with a new node. In case of restore, we 1552 // know there is nothing to do, so we can simply override the 1553 // existing entry. 1554 self.device_tree 1555 .lock() 1556 .unwrap() 1557 .insert(id.clone(), device_node!(id, interrupt_controller)); 1558 1559 Ok(interrupt_controller) 1560 } 1561 1562 fn add_acpi_devices( 1563 &mut self, 1564 interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>, 1565 reset_evt: EventFd, 1566 exit_evt: EventFd, 1567 ) -> DeviceManagerResult<Option<Arc<Mutex<devices::AcpiGedDevice>>>> { 1568 let vcpus_kill_signalled = self 1569 .cpu_manager 1570 .lock() 1571 .unwrap() 1572 .vcpus_kill_signalled() 1573 .clone(); 1574 let shutdown_device = Arc::new(Mutex::new(devices::AcpiShutdownDevice::new( 1575 exit_evt, 1576 reset_evt, 1577 vcpus_kill_signalled, 1578 ))); 1579 1580 self.bus_devices 1581 .push(Arc::clone(&shutdown_device) as Arc<dyn BusDeviceSync>); 1582 1583 #[cfg(target_arch = "x86_64")] 1584 { 1585 let shutdown_pio_address: u16 = 0x600; 1586 1587 self.address_manager 1588 .allocator 1589 .lock() 1590 .unwrap() 1591 .allocate_io_addresses(Some(GuestAddress(shutdown_pio_address.into())), 0x8, None) 1592 .ok_or(DeviceManagerError::AllocateIoPort)?; 1593 1594 self.address_manager 1595 .io_bus 1596 .insert(shutdown_device, shutdown_pio_address.into(), 0x4) 1597 .map_err(DeviceManagerError::BusError)?; 1598 1599 self.acpi_platform_addresses.sleep_control_reg_address = 1600 Some(GenericAddress::io_port_address::<u8>(shutdown_pio_address)); 1601 self.acpi_platform_addresses.sleep_status_reg_address = 1602 Some(GenericAddress::io_port_address::<u8>(shutdown_pio_address)); 1603 self.acpi_platform_addresses.reset_reg_address = 1604 Some(GenericAddress::io_port_address::<u8>(shutdown_pio_address)); 1605 } 1606 1607 let ged_irq = self 1608 .address_manager 1609 .allocator 1610 .lock() 1611 .unwrap() 1612 .allocate_irq() 1613 .unwrap(); 1614 let interrupt_group = interrupt_manager 1615 .create_group(LegacyIrqGroupConfig { 1616 irq: ged_irq as InterruptIndex, 1617 }) 1618 .map_err(DeviceManagerError::CreateInterruptGroup)?; 1619 let ged_address = self 1620 .address_manager 1621 .allocator 1622 .lock() 1623 .unwrap() 1624 .allocate_platform_mmio_addresses( 1625 None, 1626 devices::acpi::GED_DEVICE_ACPI_SIZE as u64, 1627 None, 1628 ) 1629 .ok_or(DeviceManagerError::AllocateMmioAddress)?; 1630 let ged_device = Arc::new(Mutex::new(devices::AcpiGedDevice::new( 1631 interrupt_group, 1632 ged_irq, 1633 ged_address, 1634 ))); 1635 self.address_manager 1636 .mmio_bus 1637 .insert( 1638 ged_device.clone(), 1639 ged_address.0, 1640 devices::acpi::GED_DEVICE_ACPI_SIZE as u64, 1641 ) 1642 .map_err(DeviceManagerError::BusError)?; 1643 self.bus_devices 1644 .push(Arc::clone(&ged_device) as Arc<dyn BusDeviceSync>); 1645 1646 let pm_timer_device = Arc::new(Mutex::new(devices::AcpiPmTimerDevice::new())); 1647 1648 self.bus_devices 1649 .push(Arc::clone(&pm_timer_device) as Arc<dyn BusDeviceSync>); 1650 1651 #[cfg(target_arch = "x86_64")] 1652 { 1653 let pm_timer_pio_address: u16 = 0x608; 1654 1655 self.address_manager 1656 .allocator 1657 .lock() 1658 .unwrap() 1659 .allocate_io_addresses(Some(GuestAddress(pm_timer_pio_address.into())), 0x4, None) 1660 .ok_or(DeviceManagerError::AllocateIoPort)?; 1661 1662 self.address_manager 1663 .io_bus 1664 .insert(pm_timer_device, pm_timer_pio_address.into(), 0x4) 1665 .map_err(DeviceManagerError::BusError)?; 1666 1667 self.acpi_platform_addresses.pm_timer_address = 1668 Some(GenericAddress::io_port_address::<u32>(pm_timer_pio_address)); 1669 } 1670 1671 Ok(Some(ged_device)) 1672 } 1673 1674 #[cfg(target_arch = "x86_64")] 1675 fn add_legacy_devices(&mut self, reset_evt: EventFd) -> DeviceManagerResult<()> { 1676 let vcpus_kill_signalled = self 1677 .cpu_manager 1678 .lock() 1679 .unwrap() 1680 .vcpus_kill_signalled() 1681 .clone(); 1682 // Add a shutdown device (i8042) 1683 let i8042 = Arc::new(Mutex::new(devices::legacy::I8042Device::new( 1684 reset_evt.try_clone().unwrap(), 1685 vcpus_kill_signalled.clone(), 1686 ))); 1687 1688 self.bus_devices 1689 .push(Arc::clone(&i8042) as Arc<dyn BusDeviceSync>); 1690 1691 self.address_manager 1692 .io_bus 1693 .insert(i8042, 0x61, 0x4) 1694 .map_err(DeviceManagerError::BusError)?; 1695 { 1696 // Add a CMOS emulated device 1697 let mem_size = self 1698 .memory_manager 1699 .lock() 1700 .unwrap() 1701 .guest_memory() 1702 .memory() 1703 .last_addr() 1704 .0 1705 + 1; 1706 let mem_below_4g = std::cmp::min(arch::layout::MEM_32BIT_RESERVED_START.0, mem_size); 1707 let mem_above_4g = mem_size.saturating_sub(arch::layout::RAM_64BIT_START.0); 1708 1709 let cmos = Arc::new(Mutex::new(devices::legacy::Cmos::new( 1710 mem_below_4g, 1711 mem_above_4g, 1712 reset_evt, 1713 Some(vcpus_kill_signalled), 1714 ))); 1715 1716 self.bus_devices 1717 .push(Arc::clone(&cmos) as Arc<dyn BusDeviceSync>); 1718 1719 self.address_manager 1720 .io_bus 1721 .insert(cmos, 0x70, 0x2) 1722 .map_err(DeviceManagerError::BusError)?; 1723 1724 let fwdebug = Arc::new(Mutex::new(devices::legacy::FwDebugDevice::new())); 1725 1726 self.bus_devices 1727 .push(Arc::clone(&fwdebug) as Arc<dyn BusDeviceSync>); 1728 1729 self.address_manager 1730 .io_bus 1731 .insert(fwdebug, 0x402, 0x1) 1732 .map_err(DeviceManagerError::BusError)?; 1733 } 1734 1735 // 0x80 debug port 1736 let debug_port = Arc::new(Mutex::new(devices::legacy::DebugPort::new(self.timestamp))); 1737 self.bus_devices 1738 .push(Arc::clone(&debug_port) as Arc<dyn BusDeviceSync>); 1739 self.address_manager 1740 .io_bus 1741 .insert(debug_port, 0x80, 0x1) 1742 .map_err(DeviceManagerError::BusError)?; 1743 1744 Ok(()) 1745 } 1746 1747 #[cfg(target_arch = "aarch64")] 1748 fn add_legacy_devices( 1749 &mut self, 1750 interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>, 1751 ) -> DeviceManagerResult<()> { 1752 // Add a RTC device 1753 let rtc_irq = self 1754 .address_manager 1755 .allocator 1756 .lock() 1757 .unwrap() 1758 .allocate_irq() 1759 .unwrap(); 1760 1761 let interrupt_group = interrupt_manager 1762 .create_group(LegacyIrqGroupConfig { 1763 irq: rtc_irq as InterruptIndex, 1764 }) 1765 .map_err(DeviceManagerError::CreateInterruptGroup)?; 1766 1767 let rtc_device = Arc::new(Mutex::new(devices::legacy::Rtc::new(interrupt_group))); 1768 1769 self.bus_devices 1770 .push(Arc::clone(&rtc_device) as Arc<dyn BusDeviceSync>); 1771 1772 let addr = arch::layout::LEGACY_RTC_MAPPED_IO_START; 1773 1774 self.address_manager 1775 .mmio_bus 1776 .insert(rtc_device, addr.0, MMIO_LEN) 1777 .map_err(DeviceManagerError::BusError)?; 1778 1779 self.id_to_dev_info.insert( 1780 (DeviceType::Rtc, "rtc".to_string()), 1781 MmioDeviceInfo { 1782 addr: addr.0, 1783 len: MMIO_LEN, 1784 irq: rtc_irq, 1785 }, 1786 ); 1787 1788 // Add a GPIO device 1789 let id = String::from(GPIO_DEVICE_NAME); 1790 let gpio_irq = self 1791 .address_manager 1792 .allocator 1793 .lock() 1794 .unwrap() 1795 .allocate_irq() 1796 .unwrap(); 1797 1798 let interrupt_group = interrupt_manager 1799 .create_group(LegacyIrqGroupConfig { 1800 irq: gpio_irq as InterruptIndex, 1801 }) 1802 .map_err(DeviceManagerError::CreateInterruptGroup)?; 1803 1804 let gpio_device = Arc::new(Mutex::new(devices::legacy::Gpio::new( 1805 id.clone(), 1806 interrupt_group, 1807 state_from_id(self.snapshot.as_ref(), id.as_str()) 1808 .map_err(DeviceManagerError::RestoreGetState)?, 1809 ))); 1810 1811 self.bus_devices 1812 .push(Arc::clone(&gpio_device) as Arc<dyn BusDeviceSync>); 1813 1814 let addr = arch::layout::LEGACY_GPIO_MAPPED_IO_START; 1815 1816 self.address_manager 1817 .mmio_bus 1818 .insert(gpio_device.clone(), addr.0, MMIO_LEN) 1819 .map_err(DeviceManagerError::BusError)?; 1820 1821 self.gpio_device = Some(gpio_device.clone()); 1822 1823 self.id_to_dev_info.insert( 1824 (DeviceType::Gpio, "gpio".to_string()), 1825 MmioDeviceInfo { 1826 addr: addr.0, 1827 len: MMIO_LEN, 1828 irq: gpio_irq, 1829 }, 1830 ); 1831 1832 self.device_tree 1833 .lock() 1834 .unwrap() 1835 .insert(id.clone(), device_node!(id, gpio_device)); 1836 1837 Ok(()) 1838 } 1839 1840 #[cfg(target_arch = "x86_64")] 1841 fn add_debug_console_device( 1842 &mut self, 1843 debug_console_writer: Box<dyn io::Write + Send>, 1844 ) -> DeviceManagerResult<Arc<Mutex<DebugConsole>>> { 1845 let id = String::from(DEBUGCON_DEVICE_NAME); 1846 let debug_console = Arc::new(Mutex::new(DebugConsole::new( 1847 id.clone(), 1848 debug_console_writer, 1849 ))); 1850 1851 let port = self 1852 .config 1853 .lock() 1854 .unwrap() 1855 .debug_console 1856 .clone() 1857 .iobase 1858 .map(|port| port as u64) 1859 .unwrap_or(debug_console::DEFAULT_PORT); 1860 1861 self.bus_devices 1862 .push(Arc::clone(&debug_console) as Arc<dyn BusDeviceSync>); 1863 1864 self.address_manager 1865 .allocator 1866 .lock() 1867 .unwrap() 1868 .allocate_io_addresses(Some(GuestAddress(port)), 0x1, None) 1869 .ok_or(DeviceManagerError::AllocateIoPort)?; 1870 1871 self.address_manager 1872 .io_bus 1873 .insert(debug_console.clone(), port, 0x1) 1874 .map_err(DeviceManagerError::BusError)?; 1875 1876 // Fill the device tree with a new node. In case of restore, we 1877 // know there is nothing to do, so we can simply override the 1878 // existing entry. 1879 self.device_tree 1880 .lock() 1881 .unwrap() 1882 .insert(id.clone(), device_node!(id, debug_console)); 1883 1884 Ok(debug_console) 1885 } 1886 1887 #[cfg(target_arch = "x86_64")] 1888 fn add_serial_device( 1889 &mut self, 1890 interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>, 1891 serial_writer: Option<Box<dyn io::Write + Send>>, 1892 ) -> DeviceManagerResult<Arc<Mutex<Serial>>> { 1893 // Serial is tied to IRQ #4 1894 let serial_irq = 4; 1895 1896 let id = String::from(SERIAL_DEVICE_NAME); 1897 1898 let interrupt_group = interrupt_manager 1899 .create_group(LegacyIrqGroupConfig { 1900 irq: serial_irq as InterruptIndex, 1901 }) 1902 .map_err(DeviceManagerError::CreateInterruptGroup)?; 1903 1904 let serial = Arc::new(Mutex::new(Serial::new( 1905 id.clone(), 1906 interrupt_group, 1907 serial_writer, 1908 state_from_id(self.snapshot.as_ref(), id.as_str()) 1909 .map_err(DeviceManagerError::RestoreGetState)?, 1910 ))); 1911 1912 self.bus_devices 1913 .push(Arc::clone(&serial) as Arc<dyn BusDeviceSync>); 1914 1915 self.address_manager 1916 .allocator 1917 .lock() 1918 .unwrap() 1919 .allocate_io_addresses(Some(GuestAddress(0x3f8)), 0x8, None) 1920 .ok_or(DeviceManagerError::AllocateIoPort)?; 1921 1922 self.address_manager 1923 .io_bus 1924 .insert(serial.clone(), 0x3f8, 0x8) 1925 .map_err(DeviceManagerError::BusError)?; 1926 1927 // Fill the device tree with a new node. In case of restore, we 1928 // know there is nothing to do, so we can simply override the 1929 // existing entry. 1930 self.device_tree 1931 .lock() 1932 .unwrap() 1933 .insert(id.clone(), device_node!(id, serial)); 1934 1935 Ok(serial) 1936 } 1937 1938 #[cfg(target_arch = "aarch64")] 1939 fn add_serial_device( 1940 &mut self, 1941 interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>, 1942 serial_writer: Option<Box<dyn io::Write + Send>>, 1943 ) -> DeviceManagerResult<Arc<Mutex<Pl011>>> { 1944 let id = String::from(SERIAL_DEVICE_NAME); 1945 1946 let serial_irq = self 1947 .address_manager 1948 .allocator 1949 .lock() 1950 .unwrap() 1951 .allocate_irq() 1952 .unwrap(); 1953 1954 let interrupt_group = interrupt_manager 1955 .create_group(LegacyIrqGroupConfig { 1956 irq: serial_irq as InterruptIndex, 1957 }) 1958 .map_err(DeviceManagerError::CreateInterruptGroup)?; 1959 1960 let serial = Arc::new(Mutex::new(devices::legacy::Pl011::new( 1961 id.clone(), 1962 interrupt_group, 1963 serial_writer, 1964 self.timestamp, 1965 state_from_id(self.snapshot.as_ref(), id.as_str()) 1966 .map_err(DeviceManagerError::RestoreGetState)?, 1967 ))); 1968 1969 self.bus_devices 1970 .push(Arc::clone(&serial) as Arc<dyn BusDeviceSync>); 1971 1972 let addr = arch::layout::LEGACY_SERIAL_MAPPED_IO_START; 1973 1974 self.address_manager 1975 .mmio_bus 1976 .insert(serial.clone(), addr.0, MMIO_LEN) 1977 .map_err(DeviceManagerError::BusError)?; 1978 1979 self.id_to_dev_info.insert( 1980 (DeviceType::Serial, DeviceType::Serial.to_string()), 1981 MmioDeviceInfo { 1982 addr: addr.0, 1983 len: MMIO_LEN, 1984 irq: serial_irq, 1985 }, 1986 ); 1987 1988 self.cmdline_additions 1989 .push(format!("earlycon=pl011,mmio,0x{:08x}", addr.0)); 1990 1991 // Fill the device tree with a new node. In case of restore, we 1992 // know there is nothing to do, so we can simply override the 1993 // existing entry. 1994 self.device_tree 1995 .lock() 1996 .unwrap() 1997 .insert(id.clone(), device_node!(id, serial)); 1998 1999 Ok(serial) 2000 } 2001 2002 fn add_virtio_console_device( 2003 &mut self, 2004 virtio_devices: &mut Vec<MetaVirtioDevice>, 2005 console_fd: ConsoleOutput, 2006 resize_pipe: Option<Arc<File>>, 2007 ) -> DeviceManagerResult<Option<Arc<virtio_devices::ConsoleResizer>>> { 2008 let console_config = self.config.lock().unwrap().console.clone(); 2009 let endpoint = match console_fd { 2010 ConsoleOutput::File(file) => Endpoint::File(file), 2011 ConsoleOutput::Pty(file) => { 2012 self.console_resize_pipe = resize_pipe; 2013 Endpoint::PtyPair(Arc::new(file.try_clone().unwrap()), file) 2014 } 2015 ConsoleOutput::Tty(stdout) => { 2016 if stdout.is_terminal() { 2017 self.console_resize_pipe = resize_pipe; 2018 } 2019 2020 // If an interactive TTY then we can accept input 2021 // SAFETY: FFI call. Trivially safe. 2022 if unsafe { libc::isatty(libc::STDIN_FILENO) == 1 } { 2023 // SAFETY: FFI call to dup. Trivially safe. 2024 let stdin = unsafe { libc::dup(libc::STDIN_FILENO) }; 2025 if stdin == -1 { 2026 return vmm_sys_util::errno::errno_result() 2027 .map_err(DeviceManagerError::DupFd); 2028 } 2029 // SAFETY: stdin is valid and owned solely by us. 2030 let stdin = unsafe { File::from_raw_fd(stdin) }; 2031 Endpoint::FilePair(stdout, Arc::new(stdin)) 2032 } else { 2033 Endpoint::File(stdout) 2034 } 2035 } 2036 ConsoleOutput::Socket(_) => { 2037 return Err(DeviceManagerError::NoSocketOptionSupportForConsoleDevice); 2038 } 2039 ConsoleOutput::Null => Endpoint::Null, 2040 ConsoleOutput::Off => return Ok(None), 2041 }; 2042 let id = String::from(CONSOLE_DEVICE_NAME); 2043 2044 let (virtio_console_device, console_resizer) = virtio_devices::Console::new( 2045 id.clone(), 2046 endpoint, 2047 self.console_resize_pipe 2048 .as_ref() 2049 .map(|p| p.try_clone().unwrap()), 2050 self.force_iommu | console_config.iommu, 2051 self.seccomp_action.clone(), 2052 self.exit_evt 2053 .try_clone() 2054 .map_err(DeviceManagerError::EventFd)?, 2055 state_from_id(self.snapshot.as_ref(), id.as_str()) 2056 .map_err(DeviceManagerError::RestoreGetState)?, 2057 ) 2058 .map_err(DeviceManagerError::CreateVirtioConsole)?; 2059 let virtio_console_device = Arc::new(Mutex::new(virtio_console_device)); 2060 virtio_devices.push(MetaVirtioDevice { 2061 virtio_device: Arc::clone(&virtio_console_device) 2062 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2063 iommu: console_config.iommu, 2064 id: id.clone(), 2065 pci_segment: 0, 2066 dma_handler: None, 2067 }); 2068 2069 // Fill the device tree with a new node. In case of restore, we 2070 // know there is nothing to do, so we can simply override the 2071 // existing entry. 2072 self.device_tree 2073 .lock() 2074 .unwrap() 2075 .insert(id.clone(), device_node!(id, virtio_console_device)); 2076 2077 // Only provide a resizer (for SIGWINCH handling) if the console is attached to the TTY 2078 Ok(if matches!(console_config.mode, ConsoleOutputMode::Tty) { 2079 Some(console_resizer) 2080 } else { 2081 None 2082 }) 2083 } 2084 2085 /// Adds all devices that behave like a console with respect to the VM 2086 /// configuration. This includes: 2087 /// - debug-console 2088 /// - serial-console 2089 /// - virtio-console 2090 fn add_console_devices( 2091 &mut self, 2092 interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>, 2093 virtio_devices: &mut Vec<MetaVirtioDevice>, 2094 console_info: Option<ConsoleInfo>, 2095 console_resize_pipe: Option<Arc<File>>, 2096 ) -> DeviceManagerResult<Arc<Console>> { 2097 let serial_config = self.config.lock().unwrap().serial.clone(); 2098 if console_info.is_none() { 2099 return Err(DeviceManagerError::InvalidConsoleInfo); 2100 } 2101 2102 // SAFETY: console_info is Some, so it's safe to unwrap. 2103 let console_info = console_info.unwrap(); 2104 2105 let serial_writer: Option<Box<dyn io::Write + Send>> = match console_info.serial_main_fd { 2106 ConsoleOutput::File(ref file) | ConsoleOutput::Tty(ref file) => { 2107 Some(Box::new(Arc::clone(file))) 2108 } 2109 ConsoleOutput::Off 2110 | ConsoleOutput::Null 2111 | ConsoleOutput::Pty(_) 2112 | ConsoleOutput::Socket(_) => None, 2113 }; 2114 2115 if !matches!(console_info.serial_main_fd, ConsoleOutput::Off) { 2116 let serial = self.add_serial_device(interrupt_manager, serial_writer)?; 2117 self.serial_manager = match console_info.serial_main_fd { 2118 ConsoleOutput::Pty(_) | ConsoleOutput::Tty(_) | ConsoleOutput::Socket(_) => { 2119 let serial_manager = SerialManager::new( 2120 serial, 2121 console_info.serial_main_fd, 2122 serial_config.socket, 2123 ) 2124 .map_err(DeviceManagerError::CreateSerialManager)?; 2125 if let Some(mut serial_manager) = serial_manager { 2126 serial_manager 2127 .start_thread( 2128 self.exit_evt 2129 .try_clone() 2130 .map_err(DeviceManagerError::EventFd)?, 2131 ) 2132 .map_err(DeviceManagerError::SpawnSerialManager)?; 2133 Some(Arc::new(serial_manager)) 2134 } else { 2135 None 2136 } 2137 } 2138 _ => None, 2139 }; 2140 } 2141 2142 #[cfg(target_arch = "x86_64")] 2143 { 2144 let debug_console_writer: Option<Box<dyn io::Write + Send>> = 2145 match console_info.debug_main_fd { 2146 ConsoleOutput::File(file) | ConsoleOutput::Tty(file) => Some(Box::new(file)), 2147 ConsoleOutput::Off 2148 | ConsoleOutput::Null 2149 | ConsoleOutput::Pty(_) 2150 | ConsoleOutput::Socket(_) => None, 2151 }; 2152 if let Some(writer) = debug_console_writer { 2153 let _ = self.add_debug_console_device(writer)?; 2154 } 2155 } 2156 2157 let console_resizer = self.add_virtio_console_device( 2158 virtio_devices, 2159 console_info.console_main_fd, 2160 console_resize_pipe, 2161 )?; 2162 2163 Ok(Arc::new(Console { console_resizer })) 2164 } 2165 2166 fn add_tpm_device( 2167 &mut self, 2168 tpm_path: PathBuf, 2169 ) -> DeviceManagerResult<Arc<Mutex<devices::tpm::Tpm>>> { 2170 // Create TPM Device 2171 let tpm = devices::tpm::Tpm::new(tpm_path.to_str().unwrap().to_string()).map_err(|e| { 2172 DeviceManagerError::CreateTpmDevice(anyhow!("Failed to create TPM Device : {:?}", e)) 2173 })?; 2174 let tpm = Arc::new(Mutex::new(tpm)); 2175 2176 // Add TPM Device to mmio 2177 self.address_manager 2178 .mmio_bus 2179 .insert( 2180 tpm.clone(), 2181 arch::layout::TPM_START.0, 2182 arch::layout::TPM_SIZE, 2183 ) 2184 .map_err(DeviceManagerError::BusError)?; 2185 2186 Ok(tpm) 2187 } 2188 2189 fn make_virtio_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2190 let mut devices: Vec<MetaVirtioDevice> = Vec::new(); 2191 2192 // Create "standard" virtio devices (net/block/rng) 2193 devices.append(&mut self.make_virtio_block_devices()?); 2194 devices.append(&mut self.make_virtio_net_devices()?); 2195 devices.append(&mut self.make_virtio_rng_devices()?); 2196 2197 // Add virtio-fs if required 2198 devices.append(&mut self.make_virtio_fs_devices()?); 2199 2200 // Add virtio-pmem if required 2201 devices.append(&mut self.make_virtio_pmem_devices()?); 2202 2203 // Add virtio-vsock if required 2204 devices.append(&mut self.make_virtio_vsock_devices()?); 2205 2206 devices.append(&mut self.make_virtio_mem_devices()?); 2207 2208 // Add virtio-balloon if required 2209 devices.append(&mut self.make_virtio_balloon_devices()?); 2210 2211 // Add virtio-watchdog device 2212 devices.append(&mut self.make_virtio_watchdog_devices()?); 2213 2214 // Add vDPA devices if required 2215 devices.append(&mut self.make_vdpa_devices()?); 2216 2217 Ok(devices) 2218 } 2219 2220 // Cache whether aio is supported to avoid checking for very block device 2221 fn aio_is_supported(&mut self) -> bool { 2222 if let Some(supported) = self.aio_supported { 2223 return supported; 2224 } 2225 2226 let supported = block_aio_is_supported(); 2227 self.aio_supported = Some(supported); 2228 supported 2229 } 2230 2231 // Cache whether io_uring is supported to avoid probing for very block device 2232 fn io_uring_is_supported(&mut self) -> bool { 2233 if let Some(supported) = self.io_uring_supported { 2234 return supported; 2235 } 2236 2237 let supported = block_io_uring_is_supported(); 2238 self.io_uring_supported = Some(supported); 2239 supported 2240 } 2241 2242 fn make_virtio_block_device( 2243 &mut self, 2244 disk_cfg: &mut DiskConfig, 2245 ) -> DeviceManagerResult<MetaVirtioDevice> { 2246 let id = if let Some(id) = &disk_cfg.id { 2247 id.clone() 2248 } else { 2249 let id = self.next_device_name(DISK_DEVICE_NAME_PREFIX)?; 2250 disk_cfg.id = Some(id.clone()); 2251 id 2252 }; 2253 2254 info!("Creating virtio-block device: {:?}", disk_cfg); 2255 2256 let (virtio_device, migratable_device) = if disk_cfg.vhost_user { 2257 let socket = disk_cfg.vhost_socket.as_ref().unwrap().clone(); 2258 let vu_cfg = VhostUserConfig { 2259 socket, 2260 num_queues: disk_cfg.num_queues, 2261 queue_size: disk_cfg.queue_size, 2262 }; 2263 let vhost_user_block = Arc::new(Mutex::new( 2264 match virtio_devices::vhost_user::Blk::new( 2265 id.clone(), 2266 vu_cfg, 2267 self.seccomp_action.clone(), 2268 self.exit_evt 2269 .try_clone() 2270 .map_err(DeviceManagerError::EventFd)?, 2271 self.force_iommu, 2272 state_from_id(self.snapshot.as_ref(), id.as_str()) 2273 .map_err(DeviceManagerError::RestoreGetState)?, 2274 ) { 2275 Ok(vub_device) => vub_device, 2276 Err(e) => { 2277 return Err(DeviceManagerError::CreateVhostUserBlk(e)); 2278 } 2279 }, 2280 )); 2281 2282 ( 2283 Arc::clone(&vhost_user_block) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2284 vhost_user_block as Arc<Mutex<dyn Migratable>>, 2285 ) 2286 } else { 2287 let mut options = OpenOptions::new(); 2288 options.read(true); 2289 options.write(!disk_cfg.readonly); 2290 if disk_cfg.direct { 2291 options.custom_flags(libc::O_DIRECT); 2292 } 2293 // Open block device path 2294 let mut file: File = options 2295 .open( 2296 disk_cfg 2297 .path 2298 .as_ref() 2299 .ok_or(DeviceManagerError::NoDiskPath)? 2300 .clone(), 2301 ) 2302 .map_err(DeviceManagerError::Disk)?; 2303 let image_type = 2304 detect_image_type(&mut file).map_err(DeviceManagerError::DetectImageType)?; 2305 2306 let image = match image_type { 2307 ImageType::FixedVhd => { 2308 // Use asynchronous backend relying on io_uring if the 2309 // syscalls are supported. 2310 if cfg!(feature = "io_uring") 2311 && !disk_cfg.disable_io_uring 2312 && self.io_uring_is_supported() 2313 { 2314 info!("Using asynchronous fixed VHD disk file (io_uring)"); 2315 2316 #[cfg(not(feature = "io_uring"))] 2317 unreachable!("Checked in if statement above"); 2318 #[cfg(feature = "io_uring")] 2319 { 2320 Box::new( 2321 FixedVhdDiskAsync::new(file) 2322 .map_err(DeviceManagerError::CreateFixedVhdDiskAsync)?, 2323 ) as Box<dyn DiskFile> 2324 } 2325 } else { 2326 info!("Using synchronous fixed VHD disk file"); 2327 Box::new( 2328 FixedVhdDiskSync::new(file) 2329 .map_err(DeviceManagerError::CreateFixedVhdDiskSync)?, 2330 ) as Box<dyn DiskFile> 2331 } 2332 } 2333 ImageType::Raw => { 2334 // Use asynchronous backend relying on io_uring if the 2335 // syscalls are supported. 2336 if cfg!(feature = "io_uring") 2337 && !disk_cfg.disable_io_uring 2338 && self.io_uring_is_supported() 2339 { 2340 info!("Using asynchronous RAW disk file (io_uring)"); 2341 2342 #[cfg(not(feature = "io_uring"))] 2343 unreachable!("Checked in if statement above"); 2344 #[cfg(feature = "io_uring")] 2345 { 2346 Box::new(RawFileDisk::new(file)) as Box<dyn DiskFile> 2347 } 2348 } else if !disk_cfg.disable_aio && self.aio_is_supported() { 2349 info!("Using asynchronous RAW disk file (aio)"); 2350 Box::new(RawFileDiskAio::new(file)) as Box<dyn DiskFile> 2351 } else { 2352 info!("Using synchronous RAW disk file"); 2353 Box::new(RawFileDiskSync::new(file)) as Box<dyn DiskFile> 2354 } 2355 } 2356 ImageType::Qcow2 => { 2357 info!("Using synchronous QCOW disk file"); 2358 Box::new( 2359 QcowDiskSync::new(file, disk_cfg.direct) 2360 .map_err(DeviceManagerError::CreateQcowDiskSync)?, 2361 ) as Box<dyn DiskFile> 2362 } 2363 ImageType::Vhdx => { 2364 info!("Using synchronous VHDX disk file"); 2365 Box::new( 2366 VhdxDiskSync::new(file) 2367 .map_err(DeviceManagerError::CreateFixedVhdxDiskSync)?, 2368 ) as Box<dyn DiskFile> 2369 } 2370 }; 2371 2372 let rate_limit_group = 2373 if let Some(rate_limiter_cfg) = disk_cfg.rate_limiter_config.as_ref() { 2374 // Create an anonymous RateLimiterGroup that is dropped when the Disk 2375 // is dropped. 2376 let bw = rate_limiter_cfg.bandwidth.unwrap_or_default(); 2377 let ops = rate_limiter_cfg.ops.unwrap_or_default(); 2378 let mut rate_limit_group = RateLimiterGroup::new( 2379 disk_cfg.id.as_ref().unwrap(), 2380 bw.size, 2381 bw.one_time_burst.unwrap_or(0), 2382 bw.refill_time, 2383 ops.size, 2384 ops.one_time_burst.unwrap_or(0), 2385 ops.refill_time, 2386 ) 2387 .map_err(DeviceManagerError::RateLimiterGroupCreate)?; 2388 2389 rate_limit_group 2390 .start_thread( 2391 self.exit_evt 2392 .try_clone() 2393 .map_err(DeviceManagerError::EventFd)?, 2394 ) 2395 .unwrap(); 2396 2397 Some(Arc::new(rate_limit_group)) 2398 } else if let Some(rate_limit_group) = disk_cfg.rate_limit_group.as_ref() { 2399 self.rate_limit_groups.get(rate_limit_group).cloned() 2400 } else { 2401 None 2402 }; 2403 2404 let queue_affinity = if let Some(queue_affinity) = disk_cfg.queue_affinity.as_ref() { 2405 queue_affinity 2406 .iter() 2407 .map(|a| (a.queue_index, a.host_cpus.clone())) 2408 .collect() 2409 } else { 2410 BTreeMap::new() 2411 }; 2412 2413 let virtio_block = Arc::new(Mutex::new( 2414 virtio_devices::Block::new( 2415 id.clone(), 2416 image, 2417 disk_cfg 2418 .path 2419 .as_ref() 2420 .ok_or(DeviceManagerError::NoDiskPath)? 2421 .clone(), 2422 disk_cfg.readonly, 2423 self.force_iommu | disk_cfg.iommu, 2424 disk_cfg.num_queues, 2425 disk_cfg.queue_size, 2426 disk_cfg.serial.clone(), 2427 self.seccomp_action.clone(), 2428 rate_limit_group, 2429 self.exit_evt 2430 .try_clone() 2431 .map_err(DeviceManagerError::EventFd)?, 2432 state_from_id(self.snapshot.as_ref(), id.as_str()) 2433 .map_err(DeviceManagerError::RestoreGetState)?, 2434 queue_affinity, 2435 ) 2436 .map_err(DeviceManagerError::CreateVirtioBlock)?, 2437 )); 2438 2439 ( 2440 Arc::clone(&virtio_block) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2441 virtio_block as Arc<Mutex<dyn Migratable>>, 2442 ) 2443 }; 2444 2445 // Fill the device tree with a new node. In case of restore, we 2446 // know there is nothing to do, so we can simply override the 2447 // existing entry. 2448 self.device_tree 2449 .lock() 2450 .unwrap() 2451 .insert(id.clone(), device_node!(id, migratable_device)); 2452 2453 Ok(MetaVirtioDevice { 2454 virtio_device, 2455 iommu: disk_cfg.iommu, 2456 id, 2457 pci_segment: disk_cfg.pci_segment, 2458 dma_handler: None, 2459 }) 2460 } 2461 2462 fn make_virtio_block_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2463 let mut devices = Vec::new(); 2464 2465 let mut block_devices = self.config.lock().unwrap().disks.clone(); 2466 if let Some(disk_list_cfg) = &mut block_devices { 2467 for disk_cfg in disk_list_cfg.iter_mut() { 2468 devices.push(self.make_virtio_block_device(disk_cfg)?); 2469 } 2470 } 2471 self.config.lock().unwrap().disks = block_devices; 2472 2473 Ok(devices) 2474 } 2475 2476 fn make_virtio_net_device( 2477 &mut self, 2478 net_cfg: &mut NetConfig, 2479 ) -> DeviceManagerResult<MetaVirtioDevice> { 2480 let id = if let Some(id) = &net_cfg.id { 2481 id.clone() 2482 } else { 2483 let id = self.next_device_name(NET_DEVICE_NAME_PREFIX)?; 2484 net_cfg.id = Some(id.clone()); 2485 id 2486 }; 2487 info!("Creating virtio-net device: {:?}", net_cfg); 2488 2489 let (virtio_device, migratable_device) = if net_cfg.vhost_user { 2490 let socket = net_cfg.vhost_socket.as_ref().unwrap().clone(); 2491 let vu_cfg = VhostUserConfig { 2492 socket, 2493 num_queues: net_cfg.num_queues, 2494 queue_size: net_cfg.queue_size, 2495 }; 2496 let server = match net_cfg.vhost_mode { 2497 VhostMode::Client => false, 2498 VhostMode::Server => true, 2499 }; 2500 let vhost_user_net = Arc::new(Mutex::new( 2501 match virtio_devices::vhost_user::Net::new( 2502 id.clone(), 2503 net_cfg.mac, 2504 net_cfg.mtu, 2505 vu_cfg, 2506 server, 2507 self.seccomp_action.clone(), 2508 self.exit_evt 2509 .try_clone() 2510 .map_err(DeviceManagerError::EventFd)?, 2511 self.force_iommu, 2512 state_from_id(self.snapshot.as_ref(), id.as_str()) 2513 .map_err(DeviceManagerError::RestoreGetState)?, 2514 net_cfg.offload_tso, 2515 net_cfg.offload_ufo, 2516 net_cfg.offload_csum, 2517 ) { 2518 Ok(vun_device) => vun_device, 2519 Err(e) => { 2520 return Err(DeviceManagerError::CreateVhostUserNet(e)); 2521 } 2522 }, 2523 )); 2524 2525 ( 2526 Arc::clone(&vhost_user_net) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2527 vhost_user_net as Arc<Mutex<dyn Migratable>>, 2528 ) 2529 } else { 2530 let state = state_from_id(self.snapshot.as_ref(), id.as_str()) 2531 .map_err(DeviceManagerError::RestoreGetState)?; 2532 let virtio_net = if let Some(ref tap_if_name) = net_cfg.tap { 2533 Arc::new(Mutex::new( 2534 virtio_devices::Net::new( 2535 id.clone(), 2536 Some(tap_if_name), 2537 Some(net_cfg.ip), 2538 Some(net_cfg.mask), 2539 Some(net_cfg.mac), 2540 &mut net_cfg.host_mac, 2541 net_cfg.mtu, 2542 self.force_iommu | net_cfg.iommu, 2543 net_cfg.num_queues, 2544 net_cfg.queue_size, 2545 self.seccomp_action.clone(), 2546 net_cfg.rate_limiter_config, 2547 self.exit_evt 2548 .try_clone() 2549 .map_err(DeviceManagerError::EventFd)?, 2550 state, 2551 net_cfg.offload_tso, 2552 net_cfg.offload_ufo, 2553 net_cfg.offload_csum, 2554 ) 2555 .map_err(DeviceManagerError::CreateVirtioNet)?, 2556 )) 2557 } else if let Some(fds) = &net_cfg.fds { 2558 let net = virtio_devices::Net::from_tap_fds( 2559 id.clone(), 2560 fds, 2561 Some(net_cfg.mac), 2562 net_cfg.mtu, 2563 self.force_iommu | net_cfg.iommu, 2564 net_cfg.queue_size, 2565 self.seccomp_action.clone(), 2566 net_cfg.rate_limiter_config, 2567 self.exit_evt 2568 .try_clone() 2569 .map_err(DeviceManagerError::EventFd)?, 2570 state, 2571 net_cfg.offload_tso, 2572 net_cfg.offload_ufo, 2573 net_cfg.offload_csum, 2574 ) 2575 .map_err(DeviceManagerError::CreateVirtioNet)?; 2576 2577 // SAFETY: 'fds' are valid because TAP devices are created successfully 2578 unsafe { 2579 self.config.lock().unwrap().add_preserved_fds(fds.clone()); 2580 } 2581 2582 Arc::new(Mutex::new(net)) 2583 } else { 2584 Arc::new(Mutex::new( 2585 virtio_devices::Net::new( 2586 id.clone(), 2587 None, 2588 Some(net_cfg.ip), 2589 Some(net_cfg.mask), 2590 Some(net_cfg.mac), 2591 &mut net_cfg.host_mac, 2592 net_cfg.mtu, 2593 self.force_iommu | net_cfg.iommu, 2594 net_cfg.num_queues, 2595 net_cfg.queue_size, 2596 self.seccomp_action.clone(), 2597 net_cfg.rate_limiter_config, 2598 self.exit_evt 2599 .try_clone() 2600 .map_err(DeviceManagerError::EventFd)?, 2601 state, 2602 net_cfg.offload_tso, 2603 net_cfg.offload_ufo, 2604 net_cfg.offload_csum, 2605 ) 2606 .map_err(DeviceManagerError::CreateVirtioNet)?, 2607 )) 2608 }; 2609 2610 ( 2611 Arc::clone(&virtio_net) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2612 virtio_net as Arc<Mutex<dyn Migratable>>, 2613 ) 2614 }; 2615 2616 // Fill the device tree with a new node. In case of restore, we 2617 // know there is nothing to do, so we can simply override the 2618 // existing entry. 2619 self.device_tree 2620 .lock() 2621 .unwrap() 2622 .insert(id.clone(), device_node!(id, migratable_device)); 2623 2624 Ok(MetaVirtioDevice { 2625 virtio_device, 2626 iommu: net_cfg.iommu, 2627 id, 2628 pci_segment: net_cfg.pci_segment, 2629 dma_handler: None, 2630 }) 2631 } 2632 2633 /// Add virto-net and vhost-user-net devices 2634 fn make_virtio_net_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2635 let mut devices = Vec::new(); 2636 let mut net_devices = self.config.lock().unwrap().net.clone(); 2637 if let Some(net_list_cfg) = &mut net_devices { 2638 for net_cfg in net_list_cfg.iter_mut() { 2639 devices.push(self.make_virtio_net_device(net_cfg)?); 2640 } 2641 } 2642 self.config.lock().unwrap().net = net_devices; 2643 2644 Ok(devices) 2645 } 2646 2647 fn make_virtio_rng_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2648 let mut devices = Vec::new(); 2649 2650 // Add virtio-rng if required 2651 let rng_config = self.config.lock().unwrap().rng.clone(); 2652 if let Some(rng_path) = rng_config.src.to_str() { 2653 info!("Creating virtio-rng device: {:?}", rng_config); 2654 let id = String::from(RNG_DEVICE_NAME); 2655 2656 let virtio_rng_device = Arc::new(Mutex::new( 2657 virtio_devices::Rng::new( 2658 id.clone(), 2659 rng_path, 2660 self.force_iommu | rng_config.iommu, 2661 self.seccomp_action.clone(), 2662 self.exit_evt 2663 .try_clone() 2664 .map_err(DeviceManagerError::EventFd)?, 2665 state_from_id(self.snapshot.as_ref(), id.as_str()) 2666 .map_err(DeviceManagerError::RestoreGetState)?, 2667 ) 2668 .map_err(DeviceManagerError::CreateVirtioRng)?, 2669 )); 2670 devices.push(MetaVirtioDevice { 2671 virtio_device: Arc::clone(&virtio_rng_device) 2672 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2673 iommu: rng_config.iommu, 2674 id: id.clone(), 2675 pci_segment: 0, 2676 dma_handler: None, 2677 }); 2678 2679 // Fill the device tree with a new node. In case of restore, we 2680 // know there is nothing to do, so we can simply override the 2681 // existing entry. 2682 self.device_tree 2683 .lock() 2684 .unwrap() 2685 .insert(id.clone(), device_node!(id, virtio_rng_device)); 2686 } 2687 2688 Ok(devices) 2689 } 2690 2691 fn make_virtio_fs_device( 2692 &mut self, 2693 fs_cfg: &mut FsConfig, 2694 ) -> DeviceManagerResult<MetaVirtioDevice> { 2695 let id = if let Some(id) = &fs_cfg.id { 2696 id.clone() 2697 } else { 2698 let id = self.next_device_name(FS_DEVICE_NAME_PREFIX)?; 2699 fs_cfg.id = Some(id.clone()); 2700 id 2701 }; 2702 2703 info!("Creating virtio-fs device: {:?}", fs_cfg); 2704 2705 let mut node = device_node!(id); 2706 2707 if let Some(fs_socket) = fs_cfg.socket.to_str() { 2708 let virtio_fs_device = Arc::new(Mutex::new( 2709 virtio_devices::vhost_user::Fs::new( 2710 id.clone(), 2711 fs_socket, 2712 &fs_cfg.tag, 2713 fs_cfg.num_queues, 2714 fs_cfg.queue_size, 2715 None, 2716 self.seccomp_action.clone(), 2717 self.exit_evt 2718 .try_clone() 2719 .map_err(DeviceManagerError::EventFd)?, 2720 self.force_iommu, 2721 state_from_id(self.snapshot.as_ref(), id.as_str()) 2722 .map_err(DeviceManagerError::RestoreGetState)?, 2723 ) 2724 .map_err(DeviceManagerError::CreateVirtioFs)?, 2725 )); 2726 2727 // Update the device tree with the migratable device. 2728 node.migratable = Some(Arc::clone(&virtio_fs_device) as Arc<Mutex<dyn Migratable>>); 2729 self.device_tree.lock().unwrap().insert(id.clone(), node); 2730 2731 Ok(MetaVirtioDevice { 2732 virtio_device: Arc::clone(&virtio_fs_device) 2733 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2734 iommu: false, 2735 id, 2736 pci_segment: fs_cfg.pci_segment, 2737 dma_handler: None, 2738 }) 2739 } else { 2740 Err(DeviceManagerError::NoVirtioFsSock) 2741 } 2742 } 2743 2744 fn make_virtio_fs_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2745 let mut devices = Vec::new(); 2746 2747 let mut fs_devices = self.config.lock().unwrap().fs.clone(); 2748 if let Some(fs_list_cfg) = &mut fs_devices { 2749 for fs_cfg in fs_list_cfg.iter_mut() { 2750 devices.push(self.make_virtio_fs_device(fs_cfg)?); 2751 } 2752 } 2753 self.config.lock().unwrap().fs = fs_devices; 2754 2755 Ok(devices) 2756 } 2757 2758 fn make_virtio_pmem_device( 2759 &mut self, 2760 pmem_cfg: &mut PmemConfig, 2761 ) -> DeviceManagerResult<MetaVirtioDevice> { 2762 let id = if let Some(id) = &pmem_cfg.id { 2763 id.clone() 2764 } else { 2765 let id = self.next_device_name(PMEM_DEVICE_NAME_PREFIX)?; 2766 pmem_cfg.id = Some(id.clone()); 2767 id 2768 }; 2769 2770 info!("Creating virtio-pmem device: {:?}", pmem_cfg); 2771 2772 let mut node = device_node!(id); 2773 2774 // Look for the id in the device tree. If it can be found, that means 2775 // the device is being restored, otherwise it's created from scratch. 2776 let region_range = if let Some(node) = self.device_tree.lock().unwrap().get(&id) { 2777 info!("Restoring virtio-pmem {} resources", id); 2778 2779 let mut region_range: Option<(u64, u64)> = None; 2780 for resource in node.resources.iter() { 2781 match resource { 2782 Resource::MmioAddressRange { base, size } => { 2783 if region_range.is_some() { 2784 return Err(DeviceManagerError::ResourceAlreadyExists); 2785 } 2786 2787 region_range = Some((*base, *size)); 2788 } 2789 _ => { 2790 error!("Unexpected resource {:?} for {}", resource, id); 2791 } 2792 } 2793 } 2794 2795 if region_range.is_none() { 2796 return Err(DeviceManagerError::MissingVirtioPmemResources); 2797 } 2798 2799 region_range 2800 } else { 2801 None 2802 }; 2803 2804 let (custom_flags, set_len) = if pmem_cfg.file.is_dir() { 2805 if pmem_cfg.size.is_none() { 2806 return Err(DeviceManagerError::PmemWithDirectorySizeMissing); 2807 } 2808 (O_TMPFILE, true) 2809 } else { 2810 (0, false) 2811 }; 2812 2813 let mut file = OpenOptions::new() 2814 .read(true) 2815 .write(!pmem_cfg.discard_writes) 2816 .custom_flags(custom_flags) 2817 .open(&pmem_cfg.file) 2818 .map_err(DeviceManagerError::PmemFileOpen)?; 2819 2820 let size = if let Some(size) = pmem_cfg.size { 2821 if set_len { 2822 file.set_len(size) 2823 .map_err(DeviceManagerError::PmemFileSetLen)?; 2824 } 2825 size 2826 } else { 2827 file.seek(SeekFrom::End(0)) 2828 .map_err(DeviceManagerError::PmemFileSetLen)? 2829 }; 2830 2831 if size % 0x20_0000 != 0 { 2832 return Err(DeviceManagerError::PmemSizeNotAligned); 2833 } 2834 2835 let (region_base, region_size) = if let Some((base, size)) = region_range { 2836 // The memory needs to be 2MiB aligned in order to support 2837 // hugepages. 2838 self.pci_segments[pmem_cfg.pci_segment as usize] 2839 .mem64_allocator 2840 .lock() 2841 .unwrap() 2842 .allocate( 2843 Some(GuestAddress(base)), 2844 size as GuestUsize, 2845 Some(0x0020_0000), 2846 ) 2847 .ok_or(DeviceManagerError::PmemRangeAllocation)?; 2848 2849 (base, size) 2850 } else { 2851 // The memory needs to be 2MiB aligned in order to support 2852 // hugepages. 2853 let base = self.pci_segments[pmem_cfg.pci_segment as usize] 2854 .mem64_allocator 2855 .lock() 2856 .unwrap() 2857 .allocate(None, size as GuestUsize, Some(0x0020_0000)) 2858 .ok_or(DeviceManagerError::PmemRangeAllocation)?; 2859 2860 (base.raw_value(), size) 2861 }; 2862 2863 let cloned_file = file.try_clone().map_err(DeviceManagerError::CloneFile)?; 2864 let mmap_region = MmapRegion::build( 2865 Some(FileOffset::new(cloned_file, 0)), 2866 region_size as usize, 2867 PROT_READ | PROT_WRITE, 2868 MAP_NORESERVE 2869 | if pmem_cfg.discard_writes { 2870 MAP_PRIVATE 2871 } else { 2872 MAP_SHARED 2873 }, 2874 ) 2875 .map_err(DeviceManagerError::NewMmapRegion)?; 2876 let host_addr: u64 = mmap_region.as_ptr() as u64; 2877 2878 let mem_slot = self 2879 .memory_manager 2880 .lock() 2881 .unwrap() 2882 .create_userspace_mapping(region_base, region_size, host_addr, false, false, false) 2883 .map_err(DeviceManagerError::MemoryManager)?; 2884 2885 let mapping = virtio_devices::UserspaceMapping { 2886 host_addr, 2887 mem_slot, 2888 addr: GuestAddress(region_base), 2889 len: region_size, 2890 mergeable: false, 2891 }; 2892 2893 let virtio_pmem_device = Arc::new(Mutex::new( 2894 virtio_devices::Pmem::new( 2895 id.clone(), 2896 file, 2897 GuestAddress(region_base), 2898 mapping, 2899 mmap_region, 2900 self.force_iommu | pmem_cfg.iommu, 2901 self.seccomp_action.clone(), 2902 self.exit_evt 2903 .try_clone() 2904 .map_err(DeviceManagerError::EventFd)?, 2905 state_from_id(self.snapshot.as_ref(), id.as_str()) 2906 .map_err(DeviceManagerError::RestoreGetState)?, 2907 ) 2908 .map_err(DeviceManagerError::CreateVirtioPmem)?, 2909 )); 2910 2911 // Update the device tree with correct resource information and with 2912 // the migratable device. 2913 node.resources.push(Resource::MmioAddressRange { 2914 base: region_base, 2915 size: region_size, 2916 }); 2917 node.migratable = Some(Arc::clone(&virtio_pmem_device) as Arc<Mutex<dyn Migratable>>); 2918 self.device_tree.lock().unwrap().insert(id.clone(), node); 2919 2920 Ok(MetaVirtioDevice { 2921 virtio_device: Arc::clone(&virtio_pmem_device) 2922 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2923 iommu: pmem_cfg.iommu, 2924 id, 2925 pci_segment: pmem_cfg.pci_segment, 2926 dma_handler: None, 2927 }) 2928 } 2929 2930 fn make_virtio_pmem_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2931 let mut devices = Vec::new(); 2932 // Add virtio-pmem if required 2933 let mut pmem_devices = self.config.lock().unwrap().pmem.clone(); 2934 if let Some(pmem_list_cfg) = &mut pmem_devices { 2935 for pmem_cfg in pmem_list_cfg.iter_mut() { 2936 devices.push(self.make_virtio_pmem_device(pmem_cfg)?); 2937 } 2938 } 2939 self.config.lock().unwrap().pmem = pmem_devices; 2940 2941 Ok(devices) 2942 } 2943 2944 fn make_virtio_vsock_device( 2945 &mut self, 2946 vsock_cfg: &mut VsockConfig, 2947 ) -> DeviceManagerResult<MetaVirtioDevice> { 2948 let id = if let Some(id) = &vsock_cfg.id { 2949 id.clone() 2950 } else { 2951 let id = self.next_device_name(VSOCK_DEVICE_NAME_PREFIX)?; 2952 vsock_cfg.id = Some(id.clone()); 2953 id 2954 }; 2955 2956 info!("Creating virtio-vsock device: {:?}", vsock_cfg); 2957 2958 let socket_path = vsock_cfg 2959 .socket 2960 .to_str() 2961 .ok_or(DeviceManagerError::CreateVsockConvertPath)?; 2962 let backend = 2963 virtio_devices::vsock::VsockUnixBackend::new(vsock_cfg.cid, socket_path.to_string()) 2964 .map_err(DeviceManagerError::CreateVsockBackend)?; 2965 2966 let vsock_device = Arc::new(Mutex::new( 2967 virtio_devices::Vsock::new( 2968 id.clone(), 2969 vsock_cfg.cid, 2970 vsock_cfg.socket.clone(), 2971 backend, 2972 self.force_iommu | vsock_cfg.iommu, 2973 self.seccomp_action.clone(), 2974 self.exit_evt 2975 .try_clone() 2976 .map_err(DeviceManagerError::EventFd)?, 2977 state_from_id(self.snapshot.as_ref(), id.as_str()) 2978 .map_err(DeviceManagerError::RestoreGetState)?, 2979 ) 2980 .map_err(DeviceManagerError::CreateVirtioVsock)?, 2981 )); 2982 2983 // Fill the device tree with a new node. In case of restore, we 2984 // know there is nothing to do, so we can simply override the 2985 // existing entry. 2986 self.device_tree 2987 .lock() 2988 .unwrap() 2989 .insert(id.clone(), device_node!(id, vsock_device)); 2990 2991 Ok(MetaVirtioDevice { 2992 virtio_device: Arc::clone(&vsock_device) 2993 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2994 iommu: vsock_cfg.iommu, 2995 id, 2996 pci_segment: vsock_cfg.pci_segment, 2997 dma_handler: None, 2998 }) 2999 } 3000 3001 fn make_virtio_vsock_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 3002 let mut devices = Vec::new(); 3003 3004 let mut vsock = self.config.lock().unwrap().vsock.clone(); 3005 if let Some(ref mut vsock_cfg) = &mut vsock { 3006 devices.push(self.make_virtio_vsock_device(vsock_cfg)?); 3007 } 3008 self.config.lock().unwrap().vsock = vsock; 3009 3010 Ok(devices) 3011 } 3012 3013 fn make_virtio_mem_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 3014 let mut devices = Vec::new(); 3015 3016 let mm = self.memory_manager.clone(); 3017 let mut mm = mm.lock().unwrap(); 3018 for (memory_zone_id, memory_zone) in mm.memory_zones_mut().iter_mut() { 3019 if let Some(virtio_mem_zone) = memory_zone.virtio_mem_zone_mut() { 3020 info!("Creating virtio-mem device: id = {}", memory_zone_id); 3021 3022 let node_id = numa_node_id_from_memory_zone_id(&self.numa_nodes, memory_zone_id) 3023 .map(|i| i as u16); 3024 3025 let virtio_mem_device = Arc::new(Mutex::new( 3026 virtio_devices::Mem::new( 3027 memory_zone_id.clone(), 3028 virtio_mem_zone.region(), 3029 self.seccomp_action.clone(), 3030 node_id, 3031 virtio_mem_zone.hotplugged_size(), 3032 virtio_mem_zone.hugepages(), 3033 self.exit_evt 3034 .try_clone() 3035 .map_err(DeviceManagerError::EventFd)?, 3036 virtio_mem_zone.blocks_state().clone(), 3037 state_from_id(self.snapshot.as_ref(), memory_zone_id.as_str()) 3038 .map_err(DeviceManagerError::RestoreGetState)?, 3039 ) 3040 .map_err(DeviceManagerError::CreateVirtioMem)?, 3041 )); 3042 3043 // Update the virtio-mem zone so that it has a handle onto the 3044 // virtio-mem device, which will be used for triggering a resize 3045 // if needed. 3046 virtio_mem_zone.set_virtio_device(Arc::clone(&virtio_mem_device)); 3047 3048 self.virtio_mem_devices.push(Arc::clone(&virtio_mem_device)); 3049 3050 devices.push(MetaVirtioDevice { 3051 virtio_device: Arc::clone(&virtio_mem_device) 3052 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 3053 iommu: false, 3054 id: memory_zone_id.clone(), 3055 pci_segment: 0, 3056 dma_handler: None, 3057 }); 3058 3059 // Fill the device tree with a new node. In case of restore, we 3060 // know there is nothing to do, so we can simply override the 3061 // existing entry. 3062 self.device_tree.lock().unwrap().insert( 3063 memory_zone_id.clone(), 3064 device_node!(memory_zone_id, virtio_mem_device), 3065 ); 3066 } 3067 } 3068 3069 Ok(devices) 3070 } 3071 3072 #[cfg(feature = "pvmemcontrol")] 3073 fn make_pvmemcontrol_device( 3074 &mut self, 3075 ) -> DeviceManagerResult<( 3076 Arc<PvmemcontrolBusDevice>, 3077 Arc<Mutex<PvmemcontrolPciDevice>>, 3078 )> { 3079 let id = String::from(PVMEMCONTROL_DEVICE_NAME); 3080 let pci_segment_id = 0x0_u16; 3081 3082 let (pci_segment_id, pci_device_bdf, resources) = 3083 self.pci_resources(&id, pci_segment_id)?; 3084 3085 info!("Creating pvmemcontrol device: id = {}", id); 3086 let (pvmemcontrol_pci_device, pvmemcontrol_bus_device) = 3087 devices::pvmemcontrol::PvmemcontrolDevice::make_device( 3088 id.clone(), 3089 self.memory_manager.lock().unwrap().guest_memory(), 3090 ); 3091 3092 let pvmemcontrol_pci_device = Arc::new(Mutex::new(pvmemcontrol_pci_device)); 3093 let pvmemcontrol_bus_device = Arc::new(pvmemcontrol_bus_device); 3094 3095 let new_resources = self.add_pci_device( 3096 pvmemcontrol_bus_device.clone(), 3097 pvmemcontrol_pci_device.clone(), 3098 pci_segment_id, 3099 pci_device_bdf, 3100 resources, 3101 )?; 3102 3103 let mut node = device_node!(id, pvmemcontrol_pci_device); 3104 3105 node.resources = new_resources; 3106 node.pci_bdf = Some(pci_device_bdf); 3107 node.pci_device_handle = None; 3108 3109 self.device_tree.lock().unwrap().insert(id, node); 3110 3111 Ok((pvmemcontrol_bus_device, pvmemcontrol_pci_device)) 3112 } 3113 3114 fn make_virtio_balloon_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 3115 let mut devices = Vec::new(); 3116 3117 if let Some(balloon_config) = &self.config.lock().unwrap().balloon { 3118 let id = String::from(BALLOON_DEVICE_NAME); 3119 info!("Creating virtio-balloon device: id = {}", id); 3120 3121 let virtio_balloon_device = Arc::new(Mutex::new( 3122 virtio_devices::Balloon::new( 3123 id.clone(), 3124 balloon_config.size, 3125 balloon_config.deflate_on_oom, 3126 balloon_config.free_page_reporting, 3127 self.seccomp_action.clone(), 3128 self.exit_evt 3129 .try_clone() 3130 .map_err(DeviceManagerError::EventFd)?, 3131 state_from_id(self.snapshot.as_ref(), id.as_str()) 3132 .map_err(DeviceManagerError::RestoreGetState)?, 3133 ) 3134 .map_err(DeviceManagerError::CreateVirtioBalloon)?, 3135 )); 3136 3137 self.balloon = Some(virtio_balloon_device.clone()); 3138 3139 devices.push(MetaVirtioDevice { 3140 virtio_device: Arc::clone(&virtio_balloon_device) 3141 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 3142 iommu: false, 3143 id: id.clone(), 3144 pci_segment: 0, 3145 dma_handler: None, 3146 }); 3147 3148 self.device_tree 3149 .lock() 3150 .unwrap() 3151 .insert(id.clone(), device_node!(id, virtio_balloon_device)); 3152 } 3153 3154 Ok(devices) 3155 } 3156 3157 fn make_virtio_watchdog_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 3158 let mut devices = Vec::new(); 3159 3160 if !self.config.lock().unwrap().watchdog { 3161 return Ok(devices); 3162 } 3163 3164 let id = String::from(WATCHDOG_DEVICE_NAME); 3165 info!("Creating virtio-watchdog device: id = {}", id); 3166 3167 let virtio_watchdog_device = Arc::new(Mutex::new( 3168 virtio_devices::Watchdog::new( 3169 id.clone(), 3170 self.reset_evt.try_clone().unwrap(), 3171 self.seccomp_action.clone(), 3172 self.exit_evt 3173 .try_clone() 3174 .map_err(DeviceManagerError::EventFd)?, 3175 state_from_id(self.snapshot.as_ref(), id.as_str()) 3176 .map_err(DeviceManagerError::RestoreGetState)?, 3177 ) 3178 .map_err(DeviceManagerError::CreateVirtioWatchdog)?, 3179 )); 3180 devices.push(MetaVirtioDevice { 3181 virtio_device: Arc::clone(&virtio_watchdog_device) 3182 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 3183 iommu: false, 3184 id: id.clone(), 3185 pci_segment: 0, 3186 dma_handler: None, 3187 }); 3188 3189 self.device_tree 3190 .lock() 3191 .unwrap() 3192 .insert(id.clone(), device_node!(id, virtio_watchdog_device)); 3193 3194 Ok(devices) 3195 } 3196 3197 fn make_vdpa_device( 3198 &mut self, 3199 vdpa_cfg: &mut VdpaConfig, 3200 ) -> DeviceManagerResult<MetaVirtioDevice> { 3201 let id = if let Some(id) = &vdpa_cfg.id { 3202 id.clone() 3203 } else { 3204 let id = self.next_device_name(VDPA_DEVICE_NAME_PREFIX)?; 3205 vdpa_cfg.id = Some(id.clone()); 3206 id 3207 }; 3208 3209 info!("Creating vDPA device: {:?}", vdpa_cfg); 3210 3211 let device_path = vdpa_cfg 3212 .path 3213 .to_str() 3214 .ok_or(DeviceManagerError::CreateVdpaConvertPath)?; 3215 3216 let vdpa_device = Arc::new(Mutex::new( 3217 virtio_devices::Vdpa::new( 3218 id.clone(), 3219 device_path, 3220 self.memory_manager.lock().unwrap().guest_memory(), 3221 vdpa_cfg.num_queues as u16, 3222 state_from_id(self.snapshot.as_ref(), id.as_str()) 3223 .map_err(DeviceManagerError::RestoreGetState)?, 3224 ) 3225 .map_err(DeviceManagerError::CreateVdpa)?, 3226 )); 3227 3228 // Create the DMA handler that is required by the vDPA device 3229 let vdpa_mapping = Arc::new(VdpaDmaMapping::new( 3230 Arc::clone(&vdpa_device), 3231 Arc::new(self.memory_manager.lock().unwrap().guest_memory()), 3232 )); 3233 3234 self.device_tree 3235 .lock() 3236 .unwrap() 3237 .insert(id.clone(), device_node!(id, vdpa_device)); 3238 3239 Ok(MetaVirtioDevice { 3240 virtio_device: vdpa_device as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 3241 iommu: vdpa_cfg.iommu, 3242 id, 3243 pci_segment: vdpa_cfg.pci_segment, 3244 dma_handler: Some(vdpa_mapping), 3245 }) 3246 } 3247 3248 fn make_vdpa_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 3249 let mut devices = Vec::new(); 3250 // Add vdpa if required 3251 let mut vdpa_devices = self.config.lock().unwrap().vdpa.clone(); 3252 if let Some(vdpa_list_cfg) = &mut vdpa_devices { 3253 for vdpa_cfg in vdpa_list_cfg.iter_mut() { 3254 devices.push(self.make_vdpa_device(vdpa_cfg)?); 3255 } 3256 } 3257 self.config.lock().unwrap().vdpa = vdpa_devices; 3258 3259 Ok(devices) 3260 } 3261 3262 fn next_device_name(&mut self, prefix: &str) -> DeviceManagerResult<String> { 3263 let start_id = self.device_id_cnt; 3264 loop { 3265 // Generate the temporary name. 3266 let name = format!("{}{}", prefix, self.device_id_cnt); 3267 // Increment the counter. 3268 self.device_id_cnt += Wrapping(1); 3269 // Check if the name is already in use. 3270 if !self.boot_id_list.contains(&name) 3271 && !self.device_tree.lock().unwrap().contains_key(&name) 3272 { 3273 return Ok(name); 3274 } 3275 3276 if self.device_id_cnt == start_id { 3277 // We went through a full loop and there's nothing else we can 3278 // do. 3279 break; 3280 } 3281 } 3282 Err(DeviceManagerError::NoAvailableDeviceName) 3283 } 3284 3285 fn add_passthrough_device( 3286 &mut self, 3287 device_cfg: &mut DeviceConfig, 3288 ) -> DeviceManagerResult<(PciBdf, String)> { 3289 // If the passthrough device has not been created yet, it is created 3290 // here and stored in the DeviceManager structure for future needs. 3291 if self.passthrough_device.is_none() { 3292 self.passthrough_device = Some( 3293 self.address_manager 3294 .vm 3295 .create_passthrough_device() 3296 .map_err(|e| DeviceManagerError::CreatePassthroughDevice(e.into()))?, 3297 ); 3298 } 3299 3300 self.add_vfio_device(device_cfg) 3301 } 3302 3303 fn create_vfio_container(&self) -> DeviceManagerResult<Arc<VfioContainer>> { 3304 let passthrough_device = self 3305 .passthrough_device 3306 .as_ref() 3307 .ok_or(DeviceManagerError::NoDevicePassthroughSupport)?; 3308 3309 let dup = passthrough_device 3310 .try_clone() 3311 .map_err(DeviceManagerError::VfioCreate)?; 3312 3313 Ok(Arc::new( 3314 VfioContainer::new(Some(Arc::new(dup))).map_err(DeviceManagerError::VfioCreate)?, 3315 )) 3316 } 3317 3318 fn add_vfio_device( 3319 &mut self, 3320 device_cfg: &mut DeviceConfig, 3321 ) -> DeviceManagerResult<(PciBdf, String)> { 3322 let vfio_name = if let Some(id) = &device_cfg.id { 3323 id.clone() 3324 } else { 3325 let id = self.next_device_name(VFIO_DEVICE_NAME_PREFIX)?; 3326 device_cfg.id = Some(id.clone()); 3327 id 3328 }; 3329 3330 let (pci_segment_id, pci_device_bdf, resources) = 3331 self.pci_resources(&vfio_name, device_cfg.pci_segment)?; 3332 3333 let mut needs_dma_mapping = false; 3334 3335 // Here we create a new VFIO container for two reasons. Either this is 3336 // the first VFIO device, meaning we need a new VFIO container, which 3337 // will be shared with other VFIO devices. Or the new VFIO device is 3338 // attached to a vIOMMU, meaning we must create a dedicated VFIO 3339 // container. In the vIOMMU use case, we can't let all devices under 3340 // the same VFIO container since we couldn't map/unmap memory for each 3341 // device. That's simply because the map/unmap operations happen at the 3342 // VFIO container level. 3343 let vfio_container = if device_cfg.iommu { 3344 let vfio_container = self.create_vfio_container()?; 3345 3346 let vfio_mapping = Arc::new(VfioDmaMapping::new( 3347 Arc::clone(&vfio_container), 3348 Arc::new(self.memory_manager.lock().unwrap().guest_memory()), 3349 Arc::clone(&self.mmio_regions), 3350 )); 3351 3352 if let Some(iommu) = &self.iommu_device { 3353 iommu 3354 .lock() 3355 .unwrap() 3356 .add_external_mapping(pci_device_bdf.into(), vfio_mapping); 3357 } else { 3358 return Err(DeviceManagerError::MissingVirtualIommu); 3359 } 3360 3361 vfio_container 3362 } else if let Some(vfio_container) = &self.vfio_container { 3363 Arc::clone(vfio_container) 3364 } else { 3365 let vfio_container = self.create_vfio_container()?; 3366 needs_dma_mapping = true; 3367 self.vfio_container = Some(Arc::clone(&vfio_container)); 3368 3369 vfio_container 3370 }; 3371 3372 let vfio_device = VfioDevice::new(&device_cfg.path, Arc::clone(&vfio_container)) 3373 .map_err(DeviceManagerError::VfioCreate)?; 3374 3375 if needs_dma_mapping { 3376 // Register DMA mapping in IOMMU. 3377 // Do not register virtio-mem regions, as they are handled directly by 3378 // virtio-mem device itself. 3379 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() { 3380 for region in zone.regions() { 3381 vfio_container 3382 .vfio_dma_map( 3383 region.start_addr().raw_value(), 3384 region.len(), 3385 region.as_ptr() as u64, 3386 ) 3387 .map_err(DeviceManagerError::VfioDmaMap)?; 3388 } 3389 } 3390 3391 let vfio_mapping = Arc::new(VfioDmaMapping::new( 3392 Arc::clone(&vfio_container), 3393 Arc::new(self.memory_manager.lock().unwrap().guest_memory()), 3394 Arc::clone(&self.mmio_regions), 3395 )); 3396 3397 for virtio_mem_device in self.virtio_mem_devices.iter() { 3398 virtio_mem_device 3399 .lock() 3400 .unwrap() 3401 .add_dma_mapping_handler( 3402 VirtioMemMappingSource::Container, 3403 vfio_mapping.clone(), 3404 ) 3405 .map_err(DeviceManagerError::AddDmaMappingHandlerVirtioMem)?; 3406 } 3407 } 3408 3409 let legacy_interrupt_group = 3410 if let Some(legacy_interrupt_manager) = &self.legacy_interrupt_manager { 3411 Some( 3412 legacy_interrupt_manager 3413 .create_group(LegacyIrqGroupConfig { 3414 irq: self.pci_segments[pci_segment_id as usize].pci_irq_slots 3415 [pci_device_bdf.device() as usize] 3416 as InterruptIndex, 3417 }) 3418 .map_err(DeviceManagerError::CreateInterruptGroup)?, 3419 ) 3420 } else { 3421 None 3422 }; 3423 3424 let memory_manager = self.memory_manager.clone(); 3425 3426 let vfio_pci_device = VfioPciDevice::new( 3427 vfio_name.clone(), 3428 &self.address_manager.vm, 3429 vfio_device, 3430 vfio_container, 3431 self.msi_interrupt_manager.clone(), 3432 legacy_interrupt_group, 3433 device_cfg.iommu, 3434 pci_device_bdf, 3435 Arc::new(move || memory_manager.lock().unwrap().allocate_memory_slot()), 3436 vm_migration::snapshot_from_id(self.snapshot.as_ref(), vfio_name.as_str()), 3437 device_cfg.x_nv_gpudirect_clique, 3438 ) 3439 .map_err(DeviceManagerError::VfioPciCreate)?; 3440 3441 let vfio_pci_device = Arc::new(Mutex::new(vfio_pci_device)); 3442 3443 let new_resources = self.add_pci_device( 3444 vfio_pci_device.clone(), 3445 vfio_pci_device.clone(), 3446 pci_segment_id, 3447 pci_device_bdf, 3448 resources, 3449 )?; 3450 3451 vfio_pci_device 3452 .lock() 3453 .unwrap() 3454 .map_mmio_regions() 3455 .map_err(DeviceManagerError::VfioMapRegion)?; 3456 3457 for mmio_region in vfio_pci_device.lock().unwrap().mmio_regions() { 3458 self.mmio_regions.lock().unwrap().push(mmio_region); 3459 } 3460 3461 let mut node = device_node!(vfio_name, vfio_pci_device); 3462 3463 // Update the device tree with correct resource information. 3464 node.resources = new_resources; 3465 node.pci_bdf = Some(pci_device_bdf); 3466 node.pci_device_handle = Some(PciDeviceHandle::Vfio(vfio_pci_device)); 3467 3468 self.device_tree 3469 .lock() 3470 .unwrap() 3471 .insert(vfio_name.clone(), node); 3472 3473 Ok((pci_device_bdf, vfio_name)) 3474 } 3475 3476 fn add_pci_device( 3477 &mut self, 3478 bus_device: Arc<dyn BusDeviceSync>, 3479 pci_device: Arc<Mutex<dyn PciDevice>>, 3480 segment_id: u16, 3481 bdf: PciBdf, 3482 resources: Option<Vec<Resource>>, 3483 ) -> DeviceManagerResult<Vec<Resource>> { 3484 let bars = pci_device 3485 .lock() 3486 .unwrap() 3487 .allocate_bars( 3488 &self.address_manager.allocator, 3489 &mut self.pci_segments[segment_id as usize] 3490 .mem32_allocator 3491 .lock() 3492 .unwrap(), 3493 &mut self.pci_segments[segment_id as usize] 3494 .mem64_allocator 3495 .lock() 3496 .unwrap(), 3497 resources, 3498 ) 3499 .map_err(DeviceManagerError::AllocateBars)?; 3500 3501 let mut pci_bus = self.pci_segments[segment_id as usize] 3502 .pci_bus 3503 .lock() 3504 .unwrap(); 3505 3506 pci_bus 3507 .add_device(bdf.device() as u32, pci_device) 3508 .map_err(DeviceManagerError::AddPciDevice)?; 3509 3510 self.bus_devices.push(Arc::clone(&bus_device)); 3511 3512 pci_bus 3513 .register_mapping( 3514 bus_device, 3515 #[cfg(target_arch = "x86_64")] 3516 self.address_manager.io_bus.as_ref(), 3517 self.address_manager.mmio_bus.as_ref(), 3518 bars.clone(), 3519 ) 3520 .map_err(DeviceManagerError::AddPciDevice)?; 3521 3522 let mut new_resources = Vec::new(); 3523 for bar in bars { 3524 new_resources.push(Resource::PciBar { 3525 index: bar.idx(), 3526 base: bar.addr(), 3527 size: bar.size(), 3528 type_: bar.region_type().into(), 3529 prefetchable: bar.prefetchable().into(), 3530 }); 3531 } 3532 3533 Ok(new_resources) 3534 } 3535 3536 fn add_vfio_devices(&mut self) -> DeviceManagerResult<Vec<PciBdf>> { 3537 let mut iommu_attached_device_ids = Vec::new(); 3538 let mut devices = self.config.lock().unwrap().devices.clone(); 3539 3540 if let Some(device_list_cfg) = &mut devices { 3541 for device_cfg in device_list_cfg.iter_mut() { 3542 let (device_id, _) = self.add_passthrough_device(device_cfg)?; 3543 if device_cfg.iommu && self.iommu_device.is_some() { 3544 iommu_attached_device_ids.push(device_id); 3545 } 3546 } 3547 } 3548 3549 // Update the list of devices 3550 self.config.lock().unwrap().devices = devices; 3551 3552 Ok(iommu_attached_device_ids) 3553 } 3554 3555 fn add_vfio_user_device( 3556 &mut self, 3557 device_cfg: &mut UserDeviceConfig, 3558 ) -> DeviceManagerResult<(PciBdf, String)> { 3559 let vfio_user_name = if let Some(id) = &device_cfg.id { 3560 id.clone() 3561 } else { 3562 let id = self.next_device_name(VFIO_USER_DEVICE_NAME_PREFIX)?; 3563 device_cfg.id = Some(id.clone()); 3564 id 3565 }; 3566 3567 let (pci_segment_id, pci_device_bdf, resources) = 3568 self.pci_resources(&vfio_user_name, device_cfg.pci_segment)?; 3569 3570 let legacy_interrupt_group = 3571 if let Some(legacy_interrupt_manager) = &self.legacy_interrupt_manager { 3572 Some( 3573 legacy_interrupt_manager 3574 .create_group(LegacyIrqGroupConfig { 3575 irq: self.pci_segments[pci_segment_id as usize].pci_irq_slots 3576 [pci_device_bdf.device() as usize] 3577 as InterruptIndex, 3578 }) 3579 .map_err(DeviceManagerError::CreateInterruptGroup)?, 3580 ) 3581 } else { 3582 None 3583 }; 3584 3585 let client = Arc::new(Mutex::new( 3586 vfio_user::Client::new(&device_cfg.socket) 3587 .map_err(DeviceManagerError::VfioUserCreateClient)?, 3588 )); 3589 3590 let memory_manager = self.memory_manager.clone(); 3591 3592 let mut vfio_user_pci_device = VfioUserPciDevice::new( 3593 vfio_user_name.clone(), 3594 &self.address_manager.vm, 3595 client.clone(), 3596 self.msi_interrupt_manager.clone(), 3597 legacy_interrupt_group, 3598 pci_device_bdf, 3599 Arc::new(move || memory_manager.lock().unwrap().allocate_memory_slot()), 3600 vm_migration::snapshot_from_id(self.snapshot.as_ref(), vfio_user_name.as_str()), 3601 ) 3602 .map_err(DeviceManagerError::VfioUserCreate)?; 3603 3604 let memory = self.memory_manager.lock().unwrap().guest_memory(); 3605 let vfio_user_mapping = Arc::new(VfioUserDmaMapping::new(client, Arc::new(memory))); 3606 for virtio_mem_device in self.virtio_mem_devices.iter() { 3607 virtio_mem_device 3608 .lock() 3609 .unwrap() 3610 .add_dma_mapping_handler( 3611 VirtioMemMappingSource::Device(pci_device_bdf.into()), 3612 vfio_user_mapping.clone(), 3613 ) 3614 .map_err(DeviceManagerError::AddDmaMappingHandlerVirtioMem)?; 3615 } 3616 3617 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() { 3618 for region in zone.regions() { 3619 vfio_user_pci_device 3620 .dma_map(region) 3621 .map_err(DeviceManagerError::VfioUserDmaMap)?; 3622 } 3623 } 3624 3625 let vfio_user_pci_device = Arc::new(Mutex::new(vfio_user_pci_device)); 3626 3627 let new_resources = self.add_pci_device( 3628 vfio_user_pci_device.clone(), 3629 vfio_user_pci_device.clone(), 3630 pci_segment_id, 3631 pci_device_bdf, 3632 resources, 3633 )?; 3634 3635 // Note it is required to call 'add_pci_device()' in advance to have the list of 3636 // mmio regions provisioned correctly 3637 vfio_user_pci_device 3638 .lock() 3639 .unwrap() 3640 .map_mmio_regions() 3641 .map_err(DeviceManagerError::VfioUserMapRegion)?; 3642 3643 let mut node = device_node!(vfio_user_name, vfio_user_pci_device); 3644 3645 // Update the device tree with correct resource information. 3646 node.resources = new_resources; 3647 node.pci_bdf = Some(pci_device_bdf); 3648 node.pci_device_handle = Some(PciDeviceHandle::VfioUser(vfio_user_pci_device)); 3649 3650 self.device_tree 3651 .lock() 3652 .unwrap() 3653 .insert(vfio_user_name.clone(), node); 3654 3655 Ok((pci_device_bdf, vfio_user_name)) 3656 } 3657 3658 fn add_user_devices(&mut self) -> DeviceManagerResult<Vec<PciBdf>> { 3659 let mut user_devices = self.config.lock().unwrap().user_devices.clone(); 3660 3661 if let Some(device_list_cfg) = &mut user_devices { 3662 for device_cfg in device_list_cfg.iter_mut() { 3663 let (_device_id, _id) = self.add_vfio_user_device(device_cfg)?; 3664 } 3665 } 3666 3667 // Update the list of devices 3668 self.config.lock().unwrap().user_devices = user_devices; 3669 3670 Ok(vec![]) 3671 } 3672 3673 fn add_virtio_pci_device( 3674 &mut self, 3675 virtio_device: Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 3676 iommu_mapping: &Option<Arc<IommuMapping>>, 3677 virtio_device_id: String, 3678 pci_segment_id: u16, 3679 dma_handler: Option<Arc<dyn ExternalDmaMapping>>, 3680 ) -> DeviceManagerResult<PciBdf> { 3681 let id = format!("{VIRTIO_PCI_DEVICE_NAME_PREFIX}-{virtio_device_id}"); 3682 3683 // Add the new virtio-pci node to the device tree. 3684 let mut node = device_node!(id); 3685 node.children = vec![virtio_device_id.clone()]; 3686 3687 let (pci_segment_id, pci_device_bdf, resources) = 3688 self.pci_resources(&id, pci_segment_id)?; 3689 3690 // Update the existing virtio node by setting the parent. 3691 if let Some(node) = self.device_tree.lock().unwrap().get_mut(&virtio_device_id) { 3692 node.parent = Some(id.clone()); 3693 } else { 3694 return Err(DeviceManagerError::MissingNode); 3695 } 3696 3697 // Allows support for one MSI-X vector per queue. It also adds 1 3698 // as we need to take into account the dedicated vector to notify 3699 // about a virtio config change. 3700 let msix_num = (virtio_device.lock().unwrap().queue_max_sizes().len() + 1) as u16; 3701 3702 // Create the AccessPlatform trait from the implementation IommuMapping. 3703 // This will provide address translation for any virtio device sitting 3704 // behind a vIOMMU. 3705 let mut access_platform: Option<Arc<dyn AccessPlatform>> = None; 3706 3707 if let Some(mapping) = iommu_mapping { 3708 access_platform = Some(Arc::new(AccessPlatformMapping::new( 3709 pci_device_bdf.into(), 3710 mapping.clone(), 3711 ))); 3712 } 3713 3714 // If SEV-SNP is enabled create the AccessPlatform from SevSnpPageAccessProxy 3715 #[cfg(feature = "sev_snp")] 3716 if self.config.lock().unwrap().is_sev_snp_enabled() { 3717 access_platform = Some(Arc::new(SevSnpPageAccessProxy::new( 3718 self.address_manager.vm.clone(), 3719 ))); 3720 } 3721 3722 let memory = self.memory_manager.lock().unwrap().guest_memory(); 3723 3724 // Map DMA ranges if a DMA handler is available and if the device is 3725 // not attached to a virtual IOMMU. 3726 if let Some(dma_handler) = &dma_handler { 3727 if iommu_mapping.is_some() { 3728 if let Some(iommu) = &self.iommu_device { 3729 iommu 3730 .lock() 3731 .unwrap() 3732 .add_external_mapping(pci_device_bdf.into(), dma_handler.clone()); 3733 } else { 3734 return Err(DeviceManagerError::MissingVirtualIommu); 3735 } 3736 } else { 3737 // Let every virtio-mem device handle the DMA map/unmap through the 3738 // DMA handler provided. 3739 for virtio_mem_device in self.virtio_mem_devices.iter() { 3740 virtio_mem_device 3741 .lock() 3742 .unwrap() 3743 .add_dma_mapping_handler( 3744 VirtioMemMappingSource::Device(pci_device_bdf.into()), 3745 dma_handler.clone(), 3746 ) 3747 .map_err(DeviceManagerError::AddDmaMappingHandlerVirtioMem)?; 3748 } 3749 3750 // Do not register virtio-mem regions, as they are handled directly by 3751 // virtio-mem devices. 3752 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() { 3753 for region in zone.regions() { 3754 let gpa = region.start_addr().0; 3755 let size = region.len(); 3756 dma_handler 3757 .map(gpa, gpa, size) 3758 .map_err(DeviceManagerError::VirtioDmaMap)?; 3759 } 3760 } 3761 } 3762 } 3763 3764 let device_type = virtio_device.lock().unwrap().device_type(); 3765 let virtio_pci_device = Arc::new(Mutex::new( 3766 VirtioPciDevice::new( 3767 id.clone(), 3768 memory, 3769 virtio_device, 3770 msix_num, 3771 access_platform, 3772 &self.msi_interrupt_manager, 3773 pci_device_bdf.into(), 3774 self.activate_evt 3775 .try_clone() 3776 .map_err(DeviceManagerError::EventFd)?, 3777 // All device types *except* virtio block devices should be allocated a 64-bit bar 3778 // The block devices should be given a 32-bit BAR so that they are easily accessible 3779 // to firmware without requiring excessive identity mapping. 3780 // The exception being if not on the default PCI segment. 3781 pci_segment_id > 0 || device_type != VirtioDeviceType::Block as u32, 3782 dma_handler, 3783 self.pending_activations.clone(), 3784 vm_migration::snapshot_from_id(self.snapshot.as_ref(), id.as_str()), 3785 ) 3786 .map_err(DeviceManagerError::VirtioDevice)?, 3787 )); 3788 3789 let new_resources = self.add_pci_device( 3790 virtio_pci_device.clone(), 3791 virtio_pci_device.clone(), 3792 pci_segment_id, 3793 pci_device_bdf, 3794 resources, 3795 )?; 3796 3797 let bar_addr = virtio_pci_device.lock().unwrap().config_bar_addr(); 3798 for (event, addr) in virtio_pci_device.lock().unwrap().ioeventfds(bar_addr) { 3799 let io_addr = IoEventAddress::Mmio(addr); 3800 self.address_manager 3801 .vm 3802 .register_ioevent(event, &io_addr, None) 3803 .map_err(|e| DeviceManagerError::RegisterIoevent(e.into()))?; 3804 } 3805 3806 // Update the device tree with correct resource information. 3807 node.resources = new_resources; 3808 node.migratable = Some(Arc::clone(&virtio_pci_device) as Arc<Mutex<dyn Migratable>>); 3809 node.pci_bdf = Some(pci_device_bdf); 3810 node.pci_device_handle = Some(PciDeviceHandle::Virtio(virtio_pci_device)); 3811 self.device_tree.lock().unwrap().insert(id, node); 3812 3813 Ok(pci_device_bdf) 3814 } 3815 3816 fn add_pvpanic_device( 3817 &mut self, 3818 ) -> DeviceManagerResult<Option<Arc<Mutex<devices::PvPanicDevice>>>> { 3819 let id = String::from(PVPANIC_DEVICE_NAME); 3820 let pci_segment_id = 0x0_u16; 3821 3822 info!("Creating pvpanic device {}", id); 3823 3824 let (pci_segment_id, pci_device_bdf, resources) = 3825 self.pci_resources(&id, pci_segment_id)?; 3826 3827 let snapshot = snapshot_from_id(self.snapshot.as_ref(), id.as_str()); 3828 3829 let pvpanic_device = devices::PvPanicDevice::new(id.clone(), snapshot) 3830 .map_err(DeviceManagerError::PvPanicCreate)?; 3831 3832 let pvpanic_device = Arc::new(Mutex::new(pvpanic_device)); 3833 3834 let new_resources = self.add_pci_device( 3835 pvpanic_device.clone(), 3836 pvpanic_device.clone(), 3837 pci_segment_id, 3838 pci_device_bdf, 3839 resources, 3840 )?; 3841 3842 let mut node = device_node!(id, pvpanic_device); 3843 3844 node.resources = new_resources; 3845 node.pci_bdf = Some(pci_device_bdf); 3846 node.pci_device_handle = None; 3847 3848 self.device_tree.lock().unwrap().insert(id, node); 3849 3850 Ok(Some(pvpanic_device)) 3851 } 3852 3853 fn pci_resources( 3854 &self, 3855 id: &str, 3856 pci_segment_id: u16, 3857 ) -> DeviceManagerResult<(u16, PciBdf, Option<Vec<Resource>>)> { 3858 // Look for the id in the device tree. If it can be found, that means 3859 // the device is being restored, otherwise it's created from scratch. 3860 Ok( 3861 if let Some(node) = self.device_tree.lock().unwrap().get(id) { 3862 info!("Restoring virtio-pci {} resources", id); 3863 let pci_device_bdf: PciBdf = node 3864 .pci_bdf 3865 .ok_or(DeviceManagerError::MissingDeviceNodePciBdf)?; 3866 let pci_segment_id = pci_device_bdf.segment(); 3867 3868 self.pci_segments[pci_segment_id as usize] 3869 .pci_bus 3870 .lock() 3871 .unwrap() 3872 .get_device_id(pci_device_bdf.device() as usize) 3873 .map_err(DeviceManagerError::GetPciDeviceId)?; 3874 3875 (pci_segment_id, pci_device_bdf, Some(node.resources.clone())) 3876 } else { 3877 let pci_device_bdf = 3878 self.pci_segments[pci_segment_id as usize].next_device_bdf()?; 3879 3880 (pci_segment_id, pci_device_bdf, None) 3881 }, 3882 ) 3883 } 3884 3885 #[cfg(target_arch = "x86_64")] 3886 pub fn io_bus(&self) -> &Arc<Bus> { 3887 &self.address_manager.io_bus 3888 } 3889 3890 pub fn mmio_bus(&self) -> &Arc<Bus> { 3891 &self.address_manager.mmio_bus 3892 } 3893 3894 pub fn allocator(&self) -> &Arc<Mutex<SystemAllocator>> { 3895 &self.address_manager.allocator 3896 } 3897 3898 pub fn interrupt_controller(&self) -> Option<Arc<Mutex<dyn InterruptController>>> { 3899 self.interrupt_controller 3900 .as_ref() 3901 .map(|ic| ic.clone() as Arc<Mutex<dyn InterruptController>>) 3902 } 3903 3904 pub(crate) fn pci_segments(&self) -> &Vec<PciSegment> { 3905 &self.pci_segments 3906 } 3907 3908 #[cfg(target_arch = "aarch64")] 3909 pub fn cmdline_additions(&self) -> &[String] { 3910 self.cmdline_additions.as_slice() 3911 } 3912 3913 pub fn update_memory(&self, new_region: &Arc<GuestRegionMmap>) -> DeviceManagerResult<()> { 3914 for handle in self.virtio_devices.iter() { 3915 handle 3916 .virtio_device 3917 .lock() 3918 .unwrap() 3919 .add_memory_region(new_region) 3920 .map_err(DeviceManagerError::UpdateMemoryForVirtioDevice)?; 3921 3922 if let Some(dma_handler) = &handle.dma_handler { 3923 if !handle.iommu { 3924 let gpa = new_region.start_addr().0; 3925 let size = new_region.len(); 3926 dma_handler 3927 .map(gpa, gpa, size) 3928 .map_err(DeviceManagerError::VirtioDmaMap)?; 3929 } 3930 } 3931 } 3932 3933 // Take care of updating the memory for VFIO PCI devices. 3934 if let Some(vfio_container) = &self.vfio_container { 3935 vfio_container 3936 .vfio_dma_map( 3937 new_region.start_addr().raw_value(), 3938 new_region.len(), 3939 new_region.as_ptr() as u64, 3940 ) 3941 .map_err(DeviceManagerError::UpdateMemoryForVfioPciDevice)?; 3942 } 3943 3944 // Take care of updating the memory for vfio-user devices. 3945 { 3946 let device_tree = self.device_tree.lock().unwrap(); 3947 for pci_device_node in device_tree.pci_devices() { 3948 if let PciDeviceHandle::VfioUser(vfio_user_pci_device) = pci_device_node 3949 .pci_device_handle 3950 .as_ref() 3951 .ok_or(DeviceManagerError::MissingPciDevice)? 3952 { 3953 vfio_user_pci_device 3954 .lock() 3955 .unwrap() 3956 .dma_map(new_region) 3957 .map_err(DeviceManagerError::UpdateMemoryForVfioUserPciDevice)?; 3958 } 3959 } 3960 } 3961 3962 Ok(()) 3963 } 3964 3965 pub fn activate_virtio_devices(&self) -> DeviceManagerResult<()> { 3966 for mut activator in self.pending_activations.lock().unwrap().drain(..) { 3967 activator 3968 .activate() 3969 .map_err(DeviceManagerError::VirtioActivate)?; 3970 } 3971 Ok(()) 3972 } 3973 3974 pub fn notify_hotplug( 3975 &self, 3976 _notification_type: AcpiNotificationFlags, 3977 ) -> DeviceManagerResult<()> { 3978 return self 3979 .ged_notification_device 3980 .as_ref() 3981 .unwrap() 3982 .lock() 3983 .unwrap() 3984 .notify(_notification_type) 3985 .map_err(DeviceManagerError::HotPlugNotification); 3986 } 3987 3988 pub fn add_device( 3989 &mut self, 3990 device_cfg: &mut DeviceConfig, 3991 ) -> DeviceManagerResult<PciDeviceInfo> { 3992 self.validate_identifier(&device_cfg.id)?; 3993 3994 if device_cfg.iommu && !self.is_iommu_segment(device_cfg.pci_segment) { 3995 return Err(DeviceManagerError::InvalidIommuHotplug); 3996 } 3997 3998 let (bdf, device_name) = self.add_passthrough_device(device_cfg)?; 3999 4000 // Update the PCIU bitmap 4001 self.pci_segments[device_cfg.pci_segment as usize].pci_devices_up |= 1 << bdf.device(); 4002 4003 Ok(PciDeviceInfo { 4004 id: device_name, 4005 bdf, 4006 }) 4007 } 4008 4009 pub fn add_user_device( 4010 &mut self, 4011 device_cfg: &mut UserDeviceConfig, 4012 ) -> DeviceManagerResult<PciDeviceInfo> { 4013 self.validate_identifier(&device_cfg.id)?; 4014 4015 let (bdf, device_name) = self.add_vfio_user_device(device_cfg)?; 4016 4017 // Update the PCIU bitmap 4018 self.pci_segments[device_cfg.pci_segment as usize].pci_devices_up |= 1 << bdf.device(); 4019 4020 Ok(PciDeviceInfo { 4021 id: device_name, 4022 bdf, 4023 }) 4024 } 4025 4026 pub fn remove_device(&mut self, id: String) -> DeviceManagerResult<()> { 4027 // The node can be directly a PCI node in case the 'id' refers to a 4028 // VFIO device or a virtio-pci one. 4029 // In case the 'id' refers to a virtio device, we must find the PCI 4030 // node by looking at the parent. 4031 let device_tree = self.device_tree.lock().unwrap(); 4032 let node = device_tree 4033 .get(&id) 4034 .ok_or(DeviceManagerError::UnknownDeviceId(id))?; 4035 4036 let pci_device_node = if node.pci_bdf.is_some() && node.pci_device_handle.is_some() { 4037 node 4038 } else { 4039 let parent = node 4040 .parent 4041 .as_ref() 4042 .ok_or(DeviceManagerError::MissingNode)?; 4043 device_tree 4044 .get(parent) 4045 .ok_or(DeviceManagerError::MissingNode)? 4046 }; 4047 4048 let pci_device_bdf: PciBdf = pci_device_node 4049 .pci_bdf 4050 .ok_or(DeviceManagerError::MissingDeviceNodePciBdf)?; 4051 let pci_segment_id = pci_device_bdf.segment(); 4052 4053 let pci_device_handle = pci_device_node 4054 .pci_device_handle 4055 .as_ref() 4056 .ok_or(DeviceManagerError::MissingPciDevice)?; 4057 #[allow(irrefutable_let_patterns)] 4058 if let PciDeviceHandle::Virtio(virtio_pci_device) = pci_device_handle { 4059 let device_type = VirtioDeviceType::from( 4060 virtio_pci_device 4061 .lock() 4062 .unwrap() 4063 .virtio_device() 4064 .lock() 4065 .unwrap() 4066 .device_type(), 4067 ); 4068 match device_type { 4069 VirtioDeviceType::Net 4070 | VirtioDeviceType::Block 4071 | VirtioDeviceType::Pmem 4072 | VirtioDeviceType::Fs 4073 | VirtioDeviceType::Vsock => {} 4074 _ => return Err(DeviceManagerError::RemovalNotAllowed(device_type)), 4075 } 4076 } 4077 4078 // Update the PCID bitmap 4079 self.pci_segments[pci_segment_id as usize].pci_devices_down |= 1 << pci_device_bdf.device(); 4080 4081 Ok(()) 4082 } 4083 4084 pub fn eject_device(&mut self, pci_segment_id: u16, device_id: u8) -> DeviceManagerResult<()> { 4085 info!( 4086 "Ejecting device_id = {} on segment_id={}", 4087 device_id, pci_segment_id 4088 ); 4089 4090 // Convert the device ID into the corresponding b/d/f. 4091 let pci_device_bdf = PciBdf::new(pci_segment_id, 0, device_id, 0); 4092 4093 // Give the PCI device ID back to the PCI bus. 4094 self.pci_segments[pci_segment_id as usize] 4095 .pci_bus 4096 .lock() 4097 .unwrap() 4098 .put_device_id(device_id as usize) 4099 .map_err(DeviceManagerError::PutPciDeviceId)?; 4100 4101 // Remove the device from the device tree along with its children. 4102 let mut device_tree = self.device_tree.lock().unwrap(); 4103 let pci_device_node = device_tree 4104 .remove_node_by_pci_bdf(pci_device_bdf) 4105 .ok_or(DeviceManagerError::MissingPciDevice)?; 4106 4107 // For VFIO and vfio-user the PCI device id is the id. 4108 // For virtio we overwrite it later as we want the id of the 4109 // underlying device. 4110 let mut id = pci_device_node.id; 4111 let pci_device_handle = pci_device_node 4112 .pci_device_handle 4113 .ok_or(DeviceManagerError::MissingPciDevice)?; 4114 if matches!(pci_device_handle, PciDeviceHandle::Virtio(_)) { 4115 // The virtio-pci device has a single child 4116 if !pci_device_node.children.is_empty() { 4117 assert_eq!(pci_device_node.children.len(), 1); 4118 let child_id = &pci_device_node.children[0]; 4119 id.clone_from(child_id); 4120 } 4121 } 4122 for child in pci_device_node.children.iter() { 4123 device_tree.remove(child); 4124 } 4125 4126 let mut iommu_attached = false; 4127 if let Some((_, iommu_attached_devices)) = &self.iommu_attached_devices { 4128 if iommu_attached_devices.contains(&pci_device_bdf) { 4129 iommu_attached = true; 4130 } 4131 } 4132 4133 let (pci_device, bus_device, virtio_device, remove_dma_handler) = match pci_device_handle { 4134 // No need to remove any virtio-mem mapping here as the container outlives all devices 4135 PciDeviceHandle::Vfio(vfio_pci_device) => { 4136 for mmio_region in vfio_pci_device.lock().unwrap().mmio_regions() { 4137 self.mmio_regions 4138 .lock() 4139 .unwrap() 4140 .retain(|x| x.start != mmio_region.start) 4141 } 4142 4143 ( 4144 Arc::clone(&vfio_pci_device) as Arc<Mutex<dyn PciDevice>>, 4145 Arc::clone(&vfio_pci_device) as Arc<dyn BusDeviceSync>, 4146 None as Option<Arc<Mutex<dyn virtio_devices::VirtioDevice>>>, 4147 false, 4148 ) 4149 } 4150 PciDeviceHandle::Virtio(virtio_pci_device) => { 4151 let dev = virtio_pci_device.lock().unwrap(); 4152 let bar_addr = dev.config_bar_addr(); 4153 for (event, addr) in dev.ioeventfds(bar_addr) { 4154 let io_addr = IoEventAddress::Mmio(addr); 4155 self.address_manager 4156 .vm 4157 .unregister_ioevent(event, &io_addr) 4158 .map_err(|e| DeviceManagerError::UnRegisterIoevent(e.into()))?; 4159 } 4160 4161 if let Some(dma_handler) = dev.dma_handler() { 4162 if !iommu_attached { 4163 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() { 4164 for region in zone.regions() { 4165 let iova = region.start_addr().0; 4166 let size = region.len(); 4167 dma_handler 4168 .unmap(iova, size) 4169 .map_err(DeviceManagerError::VirtioDmaUnmap)?; 4170 } 4171 } 4172 } 4173 } 4174 4175 ( 4176 Arc::clone(&virtio_pci_device) as Arc<Mutex<dyn PciDevice>>, 4177 Arc::clone(&virtio_pci_device) as Arc<dyn BusDeviceSync>, 4178 Some(dev.virtio_device()), 4179 dev.dma_handler().is_some() && !iommu_attached, 4180 ) 4181 } 4182 PciDeviceHandle::VfioUser(vfio_user_pci_device) => { 4183 let mut dev = vfio_user_pci_device.lock().unwrap(); 4184 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() { 4185 for region in zone.regions() { 4186 dev.dma_unmap(region) 4187 .map_err(DeviceManagerError::VfioUserDmaUnmap)?; 4188 } 4189 } 4190 4191 ( 4192 Arc::clone(&vfio_user_pci_device) as Arc<Mutex<dyn PciDevice>>, 4193 Arc::clone(&vfio_user_pci_device) as Arc<dyn BusDeviceSync>, 4194 None as Option<Arc<Mutex<dyn virtio_devices::VirtioDevice>>>, 4195 true, 4196 ) 4197 } 4198 }; 4199 4200 if remove_dma_handler { 4201 for virtio_mem_device in self.virtio_mem_devices.iter() { 4202 virtio_mem_device 4203 .lock() 4204 .unwrap() 4205 .remove_dma_mapping_handler(VirtioMemMappingSource::Device( 4206 pci_device_bdf.into(), 4207 )) 4208 .map_err(DeviceManagerError::RemoveDmaMappingHandlerVirtioMem)?; 4209 } 4210 } 4211 4212 // Free the allocated BARs 4213 pci_device 4214 .lock() 4215 .unwrap() 4216 .free_bars( 4217 &mut self.address_manager.allocator.lock().unwrap(), 4218 &mut self.pci_segments[pci_segment_id as usize] 4219 .mem32_allocator 4220 .lock() 4221 .unwrap(), 4222 &mut self.pci_segments[pci_segment_id as usize] 4223 .mem64_allocator 4224 .lock() 4225 .unwrap(), 4226 ) 4227 .map_err(DeviceManagerError::FreePciBars)?; 4228 4229 // Remove the device from the PCI bus 4230 self.pci_segments[pci_segment_id as usize] 4231 .pci_bus 4232 .lock() 4233 .unwrap() 4234 .remove_by_device(&pci_device) 4235 .map_err(DeviceManagerError::RemoveDeviceFromPciBus)?; 4236 4237 #[cfg(target_arch = "x86_64")] 4238 // Remove the device from the IO bus 4239 self.io_bus() 4240 .remove_by_device(&bus_device) 4241 .map_err(DeviceManagerError::RemoveDeviceFromIoBus)?; 4242 4243 // Remove the device from the MMIO bus 4244 self.mmio_bus() 4245 .remove_by_device(&bus_device) 4246 .map_err(DeviceManagerError::RemoveDeviceFromMmioBus)?; 4247 4248 // Remove the device from the list of BusDevice held by the 4249 // DeviceManager. 4250 self.bus_devices 4251 .retain(|dev| !Arc::ptr_eq(dev, &bus_device)); 4252 4253 // Shutdown and remove the underlying virtio-device if present 4254 if let Some(virtio_device) = virtio_device { 4255 for mapping in virtio_device.lock().unwrap().userspace_mappings() { 4256 self.memory_manager 4257 .lock() 4258 .unwrap() 4259 .remove_userspace_mapping( 4260 mapping.addr.raw_value(), 4261 mapping.len, 4262 mapping.host_addr, 4263 mapping.mergeable, 4264 mapping.mem_slot, 4265 ) 4266 .map_err(DeviceManagerError::MemoryManager)?; 4267 } 4268 4269 virtio_device.lock().unwrap().shutdown(); 4270 4271 self.virtio_devices 4272 .retain(|handler| !Arc::ptr_eq(&handler.virtio_device, &virtio_device)); 4273 } 4274 4275 event!( 4276 "vm", 4277 "device-removed", 4278 "id", 4279 &id, 4280 "bdf", 4281 pci_device_bdf.to_string() 4282 ); 4283 4284 // At this point, the device has been removed from all the list and 4285 // buses where it was stored. At the end of this function, after 4286 // any_device, bus_device and pci_device are released, the actual 4287 // device will be dropped. 4288 Ok(()) 4289 } 4290 4291 fn hotplug_virtio_pci_device( 4292 &mut self, 4293 handle: MetaVirtioDevice, 4294 ) -> DeviceManagerResult<PciDeviceInfo> { 4295 // Add the virtio device to the device manager list. This is important 4296 // as the list is used to notify virtio devices about memory updates 4297 // for instance. 4298 self.virtio_devices.push(handle.clone()); 4299 4300 let mapping: Option<Arc<IommuMapping>> = if handle.iommu { 4301 self.iommu_mapping.clone() 4302 } else { 4303 None 4304 }; 4305 4306 let bdf = self.add_virtio_pci_device( 4307 handle.virtio_device, 4308 &mapping, 4309 handle.id.clone(), 4310 handle.pci_segment, 4311 handle.dma_handler, 4312 )?; 4313 4314 // Update the PCIU bitmap 4315 self.pci_segments[handle.pci_segment as usize].pci_devices_up |= 1 << bdf.device(); 4316 4317 Ok(PciDeviceInfo { id: handle.id, bdf }) 4318 } 4319 4320 fn is_iommu_segment(&self, pci_segment_id: u16) -> bool { 4321 self.config 4322 .lock() 4323 .as_ref() 4324 .unwrap() 4325 .platform 4326 .as_ref() 4327 .map(|pc| { 4328 pc.iommu_segments 4329 .as_ref() 4330 .map(|v| v.contains(&pci_segment_id)) 4331 .unwrap_or_default() 4332 }) 4333 .unwrap_or_default() 4334 } 4335 4336 pub fn add_disk(&mut self, disk_cfg: &mut DiskConfig) -> DeviceManagerResult<PciDeviceInfo> { 4337 self.validate_identifier(&disk_cfg.id)?; 4338 4339 if disk_cfg.iommu && !self.is_iommu_segment(disk_cfg.pci_segment) { 4340 return Err(DeviceManagerError::InvalidIommuHotplug); 4341 } 4342 4343 let device = self.make_virtio_block_device(disk_cfg)?; 4344 self.hotplug_virtio_pci_device(device) 4345 } 4346 4347 pub fn add_fs(&mut self, fs_cfg: &mut FsConfig) -> DeviceManagerResult<PciDeviceInfo> { 4348 self.validate_identifier(&fs_cfg.id)?; 4349 4350 let device = self.make_virtio_fs_device(fs_cfg)?; 4351 self.hotplug_virtio_pci_device(device) 4352 } 4353 4354 pub fn add_pmem(&mut self, pmem_cfg: &mut PmemConfig) -> DeviceManagerResult<PciDeviceInfo> { 4355 self.validate_identifier(&pmem_cfg.id)?; 4356 4357 if pmem_cfg.iommu && !self.is_iommu_segment(pmem_cfg.pci_segment) { 4358 return Err(DeviceManagerError::InvalidIommuHotplug); 4359 } 4360 4361 let device = self.make_virtio_pmem_device(pmem_cfg)?; 4362 self.hotplug_virtio_pci_device(device) 4363 } 4364 4365 pub fn add_net(&mut self, net_cfg: &mut NetConfig) -> DeviceManagerResult<PciDeviceInfo> { 4366 self.validate_identifier(&net_cfg.id)?; 4367 4368 if net_cfg.iommu && !self.is_iommu_segment(net_cfg.pci_segment) { 4369 return Err(DeviceManagerError::InvalidIommuHotplug); 4370 } 4371 4372 let device = self.make_virtio_net_device(net_cfg)?; 4373 self.hotplug_virtio_pci_device(device) 4374 } 4375 4376 pub fn add_vdpa(&mut self, vdpa_cfg: &mut VdpaConfig) -> DeviceManagerResult<PciDeviceInfo> { 4377 self.validate_identifier(&vdpa_cfg.id)?; 4378 4379 if vdpa_cfg.iommu && !self.is_iommu_segment(vdpa_cfg.pci_segment) { 4380 return Err(DeviceManagerError::InvalidIommuHotplug); 4381 } 4382 4383 let device = self.make_vdpa_device(vdpa_cfg)?; 4384 self.hotplug_virtio_pci_device(device) 4385 } 4386 4387 pub fn add_vsock(&mut self, vsock_cfg: &mut VsockConfig) -> DeviceManagerResult<PciDeviceInfo> { 4388 self.validate_identifier(&vsock_cfg.id)?; 4389 4390 if vsock_cfg.iommu && !self.is_iommu_segment(vsock_cfg.pci_segment) { 4391 return Err(DeviceManagerError::InvalidIommuHotplug); 4392 } 4393 4394 let device = self.make_virtio_vsock_device(vsock_cfg)?; 4395 self.hotplug_virtio_pci_device(device) 4396 } 4397 4398 pub fn counters(&self) -> HashMap<String, HashMap<&'static str, Wrapping<u64>>> { 4399 let mut counters = HashMap::new(); 4400 4401 for handle in &self.virtio_devices { 4402 let virtio_device = handle.virtio_device.lock().unwrap(); 4403 if let Some(device_counters) = virtio_device.counters() { 4404 counters.insert(handle.id.clone(), device_counters.clone()); 4405 } 4406 } 4407 4408 counters 4409 } 4410 4411 pub fn resize_balloon(&mut self, size: u64) -> DeviceManagerResult<()> { 4412 if let Some(balloon) = &self.balloon { 4413 return balloon 4414 .lock() 4415 .unwrap() 4416 .resize(size) 4417 .map_err(DeviceManagerError::VirtioBalloonResize); 4418 } 4419 4420 warn!("No balloon setup: Can't resize the balloon"); 4421 Err(DeviceManagerError::MissingVirtioBalloon) 4422 } 4423 4424 pub fn balloon_size(&self) -> u64 { 4425 if let Some(balloon) = &self.balloon { 4426 return balloon.lock().unwrap().get_actual(); 4427 } 4428 4429 0 4430 } 4431 4432 pub fn device_tree(&self) -> Arc<Mutex<DeviceTree>> { 4433 self.device_tree.clone() 4434 } 4435 4436 #[cfg(target_arch = "x86_64")] 4437 pub fn notify_power_button(&self) -> DeviceManagerResult<()> { 4438 self.ged_notification_device 4439 .as_ref() 4440 .unwrap() 4441 .lock() 4442 .unwrap() 4443 .notify(AcpiNotificationFlags::POWER_BUTTON_CHANGED) 4444 .map_err(DeviceManagerError::PowerButtonNotification) 4445 } 4446 4447 #[cfg(target_arch = "aarch64")] 4448 pub fn notify_power_button(&self) -> DeviceManagerResult<()> { 4449 // There are two use cases: 4450 // 1. Users will use direct kernel boot with device tree. 4451 // 2. Users will use ACPI+UEFI boot. 4452 4453 // Trigger a GPIO pin 3 event to satisfy use case 1. 4454 self.gpio_device 4455 .as_ref() 4456 .unwrap() 4457 .lock() 4458 .unwrap() 4459 .trigger_key(3) 4460 .map_err(DeviceManagerError::AArch64PowerButtonNotification)?; 4461 // Trigger a GED power button event to satisfy use case 2. 4462 return self 4463 .ged_notification_device 4464 .as_ref() 4465 .unwrap() 4466 .lock() 4467 .unwrap() 4468 .notify(AcpiNotificationFlags::POWER_BUTTON_CHANGED) 4469 .map_err(DeviceManagerError::PowerButtonNotification); 4470 } 4471 4472 pub fn iommu_attached_devices(&self) -> &Option<(PciBdf, Vec<PciBdf>)> { 4473 &self.iommu_attached_devices 4474 } 4475 4476 fn validate_identifier(&self, id: &Option<String>) -> DeviceManagerResult<()> { 4477 if let Some(id) = id { 4478 if id.starts_with("__") { 4479 return Err(DeviceManagerError::InvalidIdentifier(id.clone())); 4480 } 4481 4482 if self.device_tree.lock().unwrap().contains_key(id) { 4483 return Err(DeviceManagerError::IdentifierNotUnique(id.clone())); 4484 } 4485 } 4486 4487 Ok(()) 4488 } 4489 4490 pub(crate) fn acpi_platform_addresses(&self) -> &AcpiPlatformAddresses { 4491 &self.acpi_platform_addresses 4492 } 4493 } 4494 4495 fn numa_node_id_from_memory_zone_id(numa_nodes: &NumaNodes, memory_zone_id: &str) -> Option<u32> { 4496 for (numa_node_id, numa_node) in numa_nodes.iter() { 4497 if numa_node.memory_zones.contains(&memory_zone_id.to_owned()) { 4498 return Some(*numa_node_id); 4499 } 4500 } 4501 4502 None 4503 } 4504 4505 fn numa_node_id_from_pci_segment_id(numa_nodes: &NumaNodes, pci_segment_id: u16) -> u32 { 4506 for (numa_node_id, numa_node) in numa_nodes.iter() { 4507 if numa_node.pci_segments.contains(&pci_segment_id) { 4508 return *numa_node_id; 4509 } 4510 } 4511 4512 0 4513 } 4514 4515 struct TpmDevice {} 4516 4517 impl Aml for TpmDevice { 4518 fn to_aml_bytes(&self, sink: &mut dyn acpi_tables::AmlSink) { 4519 aml::Device::new( 4520 "TPM2".into(), 4521 vec![ 4522 &aml::Name::new("_HID".into(), &"MSFT0101"), 4523 &aml::Name::new("_STA".into(), &(0xF_usize)), 4524 &aml::Name::new( 4525 "_CRS".into(), 4526 &aml::ResourceTemplate::new(vec![&aml::Memory32Fixed::new( 4527 true, 4528 layout::TPM_START.0 as u32, 4529 layout::TPM_SIZE as u32, 4530 )]), 4531 ), 4532 ], 4533 ) 4534 .to_aml_bytes(sink) 4535 } 4536 } 4537 4538 impl Aml for DeviceManager { 4539 fn to_aml_bytes(&self, sink: &mut dyn acpi_tables::AmlSink) { 4540 #[cfg(target_arch = "aarch64")] 4541 use arch::aarch64::DeviceInfoForFdt; 4542 4543 let mut pci_scan_methods = Vec::new(); 4544 for i in 0..self.pci_segments.len() { 4545 pci_scan_methods.push(aml::MethodCall::new( 4546 format!("\\_SB_.PC{i:02X}.PCNT").as_str().into(), 4547 vec![], 4548 )); 4549 } 4550 let mut pci_scan_inner: Vec<&dyn Aml> = Vec::new(); 4551 for method in &pci_scan_methods { 4552 pci_scan_inner.push(method) 4553 } 4554 4555 // PCI hotplug controller 4556 aml::Device::new( 4557 "_SB_.PHPR".into(), 4558 vec![ 4559 &aml::Name::new("_HID".into(), &aml::EISAName::new("PNP0A06")), 4560 &aml::Name::new("_STA".into(), &0x0bu8), 4561 &aml::Name::new("_UID".into(), &"PCI Hotplug Controller"), 4562 &aml::Mutex::new("BLCK".into(), 0), 4563 &aml::Name::new( 4564 "_CRS".into(), 4565 &aml::ResourceTemplate::new(vec![&aml::AddressSpace::new_memory( 4566 aml::AddressSpaceCacheable::NotCacheable, 4567 true, 4568 self.acpi_address.0, 4569 self.acpi_address.0 + DEVICE_MANAGER_ACPI_SIZE as u64 - 1, 4570 None, 4571 )]), 4572 ), 4573 // OpRegion and Fields map MMIO range into individual field values 4574 &aml::OpRegion::new( 4575 "PCST".into(), 4576 aml::OpRegionSpace::SystemMemory, 4577 &(self.acpi_address.0 as usize), 4578 &DEVICE_MANAGER_ACPI_SIZE, 4579 ), 4580 &aml::Field::new( 4581 "PCST".into(), 4582 aml::FieldAccessType::DWord, 4583 aml::FieldLockRule::NoLock, 4584 aml::FieldUpdateRule::WriteAsZeroes, 4585 vec![ 4586 aml::FieldEntry::Named(*b"PCIU", 32), 4587 aml::FieldEntry::Named(*b"PCID", 32), 4588 aml::FieldEntry::Named(*b"B0EJ", 32), 4589 aml::FieldEntry::Named(*b"PSEG", 32), 4590 ], 4591 ), 4592 &aml::Method::new( 4593 "PCEJ".into(), 4594 2, 4595 true, 4596 vec![ 4597 // Take lock defined above 4598 &aml::Acquire::new("BLCK".into(), 0xffff), 4599 // Choose the current segment 4600 &aml::Store::new(&aml::Path::new("PSEG"), &aml::Arg(1)), 4601 // Write PCI bus number (in first argument) to I/O port via field 4602 &aml::ShiftLeft::new(&aml::Path::new("B0EJ"), &aml::ONE, &aml::Arg(0)), 4603 // Release lock 4604 &aml::Release::new("BLCK".into()), 4605 // Return 0 4606 &aml::Return::new(&aml::ZERO), 4607 ], 4608 ), 4609 &aml::Method::new("PSCN".into(), 0, true, pci_scan_inner), 4610 ], 4611 ) 4612 .to_aml_bytes(sink); 4613 4614 for segment in &self.pci_segments { 4615 segment.to_aml_bytes(sink); 4616 } 4617 4618 let mut mbrd_memory = Vec::new(); 4619 4620 for segment in &self.pci_segments { 4621 mbrd_memory.push(aml::Memory32Fixed::new( 4622 true, 4623 segment.mmio_config_address as u32, 4624 layout::PCI_MMIO_CONFIG_SIZE_PER_SEGMENT as u32, 4625 )) 4626 } 4627 4628 let mut mbrd_memory_refs = Vec::new(); 4629 for mbrd_memory_ref in &mbrd_memory { 4630 mbrd_memory_refs.push(mbrd_memory_ref as &dyn Aml); 4631 } 4632 4633 aml::Device::new( 4634 "_SB_.MBRD".into(), 4635 vec![ 4636 &aml::Name::new("_HID".into(), &aml::EISAName::new("PNP0C02")), 4637 &aml::Name::new("_UID".into(), &aml::ZERO), 4638 &aml::Name::new("_CRS".into(), &aml::ResourceTemplate::new(mbrd_memory_refs)), 4639 ], 4640 ) 4641 .to_aml_bytes(sink); 4642 4643 // Serial device 4644 #[cfg(target_arch = "x86_64")] 4645 let serial_irq = 4; 4646 #[cfg(target_arch = "aarch64")] 4647 let serial_irq = 4648 if self.config.lock().unwrap().serial.clone().mode != ConsoleOutputMode::Off { 4649 self.get_device_info() 4650 .clone() 4651 .get(&(DeviceType::Serial, DeviceType::Serial.to_string())) 4652 .unwrap() 4653 .irq() 4654 } else { 4655 // If serial is turned off, add a fake device with invalid irq. 4656 31 4657 }; 4658 if self.config.lock().unwrap().serial.mode != ConsoleOutputMode::Off { 4659 aml::Device::new( 4660 "_SB_.COM1".into(), 4661 vec![ 4662 &aml::Name::new( 4663 "_HID".into(), 4664 #[cfg(target_arch = "x86_64")] 4665 &aml::EISAName::new("PNP0501"), 4666 #[cfg(target_arch = "aarch64")] 4667 &"ARMH0011", 4668 ), 4669 &aml::Name::new("_UID".into(), &aml::ZERO), 4670 &aml::Name::new("_DDN".into(), &"COM1"), 4671 &aml::Name::new( 4672 "_CRS".into(), 4673 &aml::ResourceTemplate::new(vec![ 4674 &aml::Interrupt::new(true, true, false, false, serial_irq), 4675 #[cfg(target_arch = "x86_64")] 4676 &aml::IO::new(0x3f8, 0x3f8, 0, 0x8), 4677 #[cfg(target_arch = "aarch64")] 4678 &aml::Memory32Fixed::new( 4679 true, 4680 arch::layout::LEGACY_SERIAL_MAPPED_IO_START.raw_value() as u32, 4681 MMIO_LEN as u32, 4682 ), 4683 ]), 4684 ), 4685 ], 4686 ) 4687 .to_aml_bytes(sink); 4688 } 4689 4690 aml::Name::new("_S5_".into(), &aml::Package::new(vec![&5u8])).to_aml_bytes(sink); 4691 4692 aml::Device::new( 4693 "_SB_.PWRB".into(), 4694 vec![ 4695 &aml::Name::new("_HID".into(), &aml::EISAName::new("PNP0C0C")), 4696 &aml::Name::new("_UID".into(), &aml::ZERO), 4697 ], 4698 ) 4699 .to_aml_bytes(sink); 4700 4701 if self.config.lock().unwrap().tpm.is_some() { 4702 // Add tpm device 4703 TpmDevice {}.to_aml_bytes(sink); 4704 } 4705 4706 self.ged_notification_device 4707 .as_ref() 4708 .unwrap() 4709 .lock() 4710 .unwrap() 4711 .to_aml_bytes(sink) 4712 } 4713 } 4714 4715 impl Pausable for DeviceManager { 4716 fn pause(&mut self) -> result::Result<(), MigratableError> { 4717 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4718 if let Some(migratable) = &device_node.migratable { 4719 migratable.lock().unwrap().pause()?; 4720 } 4721 } 4722 // On AArch64, the pause of device manager needs to trigger 4723 // a "pause" of GIC, which will flush the GIC pending tables 4724 // and ITS tables to guest RAM. 4725 #[cfg(target_arch = "aarch64")] 4726 { 4727 self.get_interrupt_controller() 4728 .unwrap() 4729 .lock() 4730 .unwrap() 4731 .pause()?; 4732 }; 4733 4734 Ok(()) 4735 } 4736 4737 fn resume(&mut self) -> result::Result<(), MigratableError> { 4738 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4739 if let Some(migratable) = &device_node.migratable { 4740 migratable.lock().unwrap().resume()?; 4741 } 4742 } 4743 4744 Ok(()) 4745 } 4746 } 4747 4748 impl Snapshottable for DeviceManager { 4749 fn id(&self) -> String { 4750 DEVICE_MANAGER_SNAPSHOT_ID.to_string() 4751 } 4752 4753 fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> { 4754 let mut snapshot = Snapshot::from_data(SnapshotData::new_from_state(&self.state())?); 4755 4756 // We aggregate all devices snapshots. 4757 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4758 if let Some(migratable) = &device_node.migratable { 4759 let mut migratable = migratable.lock().unwrap(); 4760 snapshot.add_snapshot(migratable.id(), migratable.snapshot()?); 4761 } 4762 } 4763 4764 Ok(snapshot) 4765 } 4766 } 4767 4768 impl Transportable for DeviceManager {} 4769 4770 impl Migratable for DeviceManager { 4771 fn start_dirty_log(&mut self) -> std::result::Result<(), MigratableError> { 4772 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4773 if let Some(migratable) = &device_node.migratable { 4774 migratable.lock().unwrap().start_dirty_log()?; 4775 } 4776 } 4777 Ok(()) 4778 } 4779 4780 fn stop_dirty_log(&mut self) -> std::result::Result<(), MigratableError> { 4781 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4782 if let Some(migratable) = &device_node.migratable { 4783 migratable.lock().unwrap().stop_dirty_log()?; 4784 } 4785 } 4786 Ok(()) 4787 } 4788 4789 fn dirty_log(&mut self) -> std::result::Result<MemoryRangeTable, MigratableError> { 4790 let mut tables = Vec::new(); 4791 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4792 if let Some(migratable) = &device_node.migratable { 4793 tables.push(migratable.lock().unwrap().dirty_log()?); 4794 } 4795 } 4796 Ok(MemoryRangeTable::new_from_tables(tables)) 4797 } 4798 4799 fn start_migration(&mut self) -> std::result::Result<(), MigratableError> { 4800 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4801 if let Some(migratable) = &device_node.migratable { 4802 migratable.lock().unwrap().start_migration()?; 4803 } 4804 } 4805 Ok(()) 4806 } 4807 4808 fn complete_migration(&mut self) -> std::result::Result<(), MigratableError> { 4809 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4810 if let Some(migratable) = &device_node.migratable { 4811 migratable.lock().unwrap().complete_migration()?; 4812 } 4813 } 4814 Ok(()) 4815 } 4816 } 4817 4818 const PCIU_FIELD_OFFSET: u64 = 0; 4819 const PCID_FIELD_OFFSET: u64 = 4; 4820 const B0EJ_FIELD_OFFSET: u64 = 8; 4821 const PSEG_FIELD_OFFSET: u64 = 12; 4822 const PCIU_FIELD_SIZE: usize = 4; 4823 const PCID_FIELD_SIZE: usize = 4; 4824 const B0EJ_FIELD_SIZE: usize = 4; 4825 const PSEG_FIELD_SIZE: usize = 4; 4826 4827 impl BusDevice for DeviceManager { 4828 fn read(&mut self, base: u64, offset: u64, data: &mut [u8]) { 4829 match offset { 4830 PCIU_FIELD_OFFSET => { 4831 assert!(data.len() == PCIU_FIELD_SIZE); 4832 data.copy_from_slice( 4833 &self.pci_segments[self.selected_segment] 4834 .pci_devices_up 4835 .to_le_bytes(), 4836 ); 4837 // Clear the PCIU bitmap 4838 self.pci_segments[self.selected_segment].pci_devices_up = 0; 4839 } 4840 PCID_FIELD_OFFSET => { 4841 assert!(data.len() == PCID_FIELD_SIZE); 4842 data.copy_from_slice( 4843 &self.pci_segments[self.selected_segment] 4844 .pci_devices_down 4845 .to_le_bytes(), 4846 ); 4847 // Clear the PCID bitmap 4848 self.pci_segments[self.selected_segment].pci_devices_down = 0; 4849 } 4850 B0EJ_FIELD_OFFSET => { 4851 assert!(data.len() == B0EJ_FIELD_SIZE); 4852 // Always return an empty bitmap since the eject is always 4853 // taken care of right away during a write access. 4854 data.fill(0); 4855 } 4856 PSEG_FIELD_OFFSET => { 4857 assert_eq!(data.len(), PSEG_FIELD_SIZE); 4858 data.copy_from_slice(&(self.selected_segment as u32).to_le_bytes()); 4859 } 4860 _ => error!( 4861 "Accessing unknown location at base 0x{:x}, offset 0x{:x}", 4862 base, offset 4863 ), 4864 } 4865 4866 debug!( 4867 "PCI_HP_REG_R: base 0x{:x}, offset 0x{:x}, data {:?}", 4868 base, offset, data 4869 ) 4870 } 4871 4872 fn write(&mut self, base: u64, offset: u64, data: &[u8]) -> Option<Arc<std::sync::Barrier>> { 4873 match offset { 4874 B0EJ_FIELD_OFFSET => { 4875 assert!(data.len() == B0EJ_FIELD_SIZE); 4876 let mut data_array: [u8; 4] = [0, 0, 0, 0]; 4877 data_array.copy_from_slice(data); 4878 let mut slot_bitmap = u32::from_le_bytes(data_array); 4879 4880 while slot_bitmap > 0 { 4881 let slot_id = slot_bitmap.trailing_zeros(); 4882 if let Err(e) = self.eject_device(self.selected_segment as u16, slot_id as u8) { 4883 error!("Failed ejecting device {}: {:?}", slot_id, e); 4884 } 4885 slot_bitmap &= !(1 << slot_id); 4886 } 4887 } 4888 PSEG_FIELD_OFFSET => { 4889 assert_eq!(data.len(), PSEG_FIELD_SIZE); 4890 let mut data_array: [u8; 4] = [0, 0, 0, 0]; 4891 data_array.copy_from_slice(data); 4892 let selected_segment = u32::from_le_bytes(data_array) as usize; 4893 if selected_segment >= self.pci_segments.len() { 4894 error!( 4895 "Segment selection out of range: {} >= {}", 4896 selected_segment, 4897 self.pci_segments.len() 4898 ); 4899 return None; 4900 } 4901 self.selected_segment = selected_segment; 4902 } 4903 _ => error!( 4904 "Accessing unknown location at base 0x{:x}, offset 0x{:x}", 4905 base, offset 4906 ), 4907 } 4908 4909 debug!( 4910 "PCI_HP_REG_W: base 0x{:x}, offset 0x{:x}, data {:?}", 4911 base, offset, data 4912 ); 4913 4914 None 4915 } 4916 } 4917 4918 impl Drop for DeviceManager { 4919 fn drop(&mut self) { 4920 // Wake up the DeviceManager threads (mainly virtio device workers), 4921 // to avoid deadlock on waiting for paused/parked worker threads. 4922 if let Err(e) = self.resume() { 4923 error!("Error resuming DeviceManager: {:?}", e); 4924 } 4925 4926 for handle in self.virtio_devices.drain(..) { 4927 handle.virtio_device.lock().unwrap().shutdown(); 4928 } 4929 4930 if let Some(termios) = *self.original_termios_opt.lock().unwrap() { 4931 // SAFETY: FFI call 4932 let _ = unsafe { tcsetattr(stdout().lock().as_raw_fd(), TCSANOW, &termios) }; 4933 } 4934 } 4935 } 4936 4937 #[cfg(test)] 4938 mod tests { 4939 use super::*; 4940 4941 #[test] 4942 fn test_create_mmio_allocators() { 4943 let res = create_mmio_allocators(0x100000, 0x400000, 1, vec![1], 4 << 10); 4944 assert_eq!(res.len(), 1); 4945 assert_eq!( 4946 res[0].lock().unwrap().base(), 4947 vm_memory::GuestAddress(0x100000) 4948 ); 4949 assert_eq!( 4950 res[0].lock().unwrap().end(), 4951 vm_memory::GuestAddress(0x3fffff) 4952 ); 4953 4954 let res = create_mmio_allocators(0x100000, 0x400000, 2, vec![1, 1], 4 << 10); 4955 assert_eq!(res.len(), 2); 4956 assert_eq!( 4957 res[0].lock().unwrap().base(), 4958 vm_memory::GuestAddress(0x100000) 4959 ); 4960 assert_eq!( 4961 res[0].lock().unwrap().end(), 4962 vm_memory::GuestAddress(0x27ffff) 4963 ); 4964 assert_eq!( 4965 res[1].lock().unwrap().base(), 4966 vm_memory::GuestAddress(0x280000) 4967 ); 4968 assert_eq!( 4969 res[1].lock().unwrap().end(), 4970 vm_memory::GuestAddress(0x3fffff) 4971 ); 4972 4973 let res = create_mmio_allocators(0x100000, 0x400000, 2, vec![2, 1], 4 << 10); 4974 assert_eq!(res.len(), 2); 4975 assert_eq!( 4976 res[0].lock().unwrap().base(), 4977 vm_memory::GuestAddress(0x100000) 4978 ); 4979 assert_eq!( 4980 res[0].lock().unwrap().end(), 4981 vm_memory::GuestAddress(0x2fffff) 4982 ); 4983 assert_eq!( 4984 res[1].lock().unwrap().base(), 4985 vm_memory::GuestAddress(0x300000) 4986 ); 4987 assert_eq!( 4988 res[1].lock().unwrap().end(), 4989 vm_memory::GuestAddress(0x3fffff) 4990 ); 4991 } 4992 } 4993