1 // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 // 3 // Portions Copyright 2017 The Chromium OS Authors. All rights reserved. 4 // Use of this source code is governed by a BSD-style license that can be 5 // found in the LICENSE-BSD-3-Clause file. 6 // 7 // Copyright © 2019 Intel Corporation 8 // 9 // SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause 10 // 11 12 use std::collections::{BTreeMap, BTreeSet, HashMap}; 13 use std::fs::{File, OpenOptions}; 14 use std::io::{self, stdout, IsTerminal, Seek, SeekFrom}; 15 use std::num::Wrapping; 16 use std::os::unix::fs::OpenOptionsExt; 17 use std::os::unix::io::{AsRawFd, FromRawFd}; 18 use std::path::PathBuf; 19 use std::result; 20 use std::sync::{Arc, Mutex}; 21 #[cfg(not(target_arch = "riscv64"))] 22 use std::time::Instant; 23 24 use acpi_tables::sdt::GenericAddress; 25 #[cfg(not(target_arch = "riscv64"))] 26 use acpi_tables::{aml, Aml}; 27 #[cfg(not(target_arch = "riscv64"))] 28 use anyhow::anyhow; 29 #[cfg(target_arch = "x86_64")] 30 use arch::layout::{APIC_START, IOAPIC_SIZE, IOAPIC_START}; 31 use arch::{layout, NumaNodes}; 32 #[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))] 33 use arch::{DeviceType, MmioDeviceInfo}; 34 use block::async_io::DiskFile; 35 use block::fixed_vhd_sync::FixedVhdDiskSync; 36 use block::qcow_sync::QcowDiskSync; 37 use block::raw_async_aio::RawFileDiskAio; 38 use block::raw_sync::RawFileDiskSync; 39 use block::vhdx_sync::VhdxDiskSync; 40 use block::{ 41 block_aio_is_supported, block_io_uring_is_supported, detect_image_type, qcow, vhdx, ImageType, 42 }; 43 #[cfg(feature = "io_uring")] 44 use block::{fixed_vhd_async::FixedVhdDiskAsync, raw_async::RawFileDisk}; 45 #[cfg(target_arch = "riscv64")] 46 use devices::aia; 47 #[cfg(target_arch = "x86_64")] 48 use devices::debug_console; 49 #[cfg(target_arch = "x86_64")] 50 use devices::debug_console::DebugConsole; 51 #[cfg(target_arch = "aarch64")] 52 use devices::gic; 53 use devices::interrupt_controller::InterruptController; 54 #[cfg(target_arch = "x86_64")] 55 use devices::ioapic; 56 #[cfg(target_arch = "aarch64")] 57 use devices::legacy::Pl011; 58 #[cfg(any(target_arch = "x86_64", target_arch = "riscv64"))] 59 use devices::legacy::Serial; 60 #[cfg(feature = "pvmemcontrol")] 61 use devices::pvmemcontrol::{PvmemcontrolBusDevice, PvmemcontrolPciDevice}; 62 use devices::{interrupt_controller, AcpiNotificationFlags}; 63 #[cfg(target_arch = "aarch64")] 64 use hypervisor::arch::aarch64::regs::AARCH64_PMU_IRQ; 65 use hypervisor::IoEventAddress; 66 use libc::{ 67 tcsetattr, termios, MAP_NORESERVE, MAP_PRIVATE, MAP_SHARED, O_TMPFILE, PROT_READ, PROT_WRITE, 68 TCSANOW, 69 }; 70 use pci::{ 71 DeviceRelocation, MmioRegion, PciBarRegionType, PciBdf, PciDevice, VfioDmaMapping, 72 VfioPciDevice, VfioUserDmaMapping, VfioUserPciDevice, VfioUserPciDeviceError, 73 }; 74 use rate_limiter::group::RateLimiterGroup; 75 use seccompiler::SeccompAction; 76 use serde::{Deserialize, Serialize}; 77 use thiserror::Error; 78 use tracer::trace_scoped; 79 use vfio_ioctls::{VfioContainer, VfioDevice, VfioDeviceFd}; 80 use virtio_devices::transport::{VirtioPciDevice, VirtioPciDeviceActivator, VirtioTransport}; 81 use virtio_devices::vhost_user::VhostUserConfig; 82 use virtio_devices::{ 83 AccessPlatformMapping, ActivateError, Block, Endpoint, IommuMapping, VdpaDmaMapping, 84 VirtioMemMappingSource, 85 }; 86 use vm_allocator::{AddressAllocator, SystemAllocator}; 87 use vm_device::dma_mapping::ExternalDmaMapping; 88 use vm_device::interrupt::{ 89 InterruptIndex, InterruptManager, LegacyIrqGroupConfig, MsiIrqGroupConfig, 90 }; 91 use vm_device::{Bus, BusDevice, BusDeviceSync, Resource}; 92 use vm_memory::guest_memory::FileOffset; 93 use vm_memory::{Address, GuestAddress, GuestMemoryRegion, GuestUsize, MmapRegion}; 94 #[cfg(target_arch = "x86_64")] 95 use vm_memory::{GuestAddressSpace, GuestMemory}; 96 use vm_migration::protocol::MemoryRangeTable; 97 use vm_migration::{ 98 snapshot_from_id, state_from_id, Migratable, MigratableError, Pausable, Snapshot, SnapshotData, 99 Snapshottable, Transportable, 100 }; 101 use vm_virtio::{AccessPlatform, VirtioDeviceType}; 102 use vmm_sys_util::eventfd::EventFd; 103 104 use crate::console_devices::{ConsoleDeviceError, ConsoleInfo, ConsoleOutput}; 105 use crate::cpu::{CpuManager, CPU_MANAGER_ACPI_SIZE}; 106 use crate::device_tree::{DeviceNode, DeviceTree}; 107 use crate::interrupt::{LegacyUserspaceInterruptManager, MsiInterruptManager}; 108 use crate::memory_manager::{Error as MemoryManagerError, MemoryManager, MEMORY_MANAGER_ACPI_SIZE}; 109 use crate::pci_segment::PciSegment; 110 use crate::serial_manager::{Error as SerialManagerError, SerialManager}; 111 use crate::vm_config::{ 112 ConsoleOutputMode, DeviceConfig, DiskConfig, FsConfig, NetConfig, PmemConfig, UserDeviceConfig, 113 VdpaConfig, VhostMode, VmConfig, VsockConfig, DEFAULT_IOMMU_ADDRESS_WIDTH_BITS, 114 DEFAULT_PCI_SEGMENT_APERTURE_WEIGHT, 115 }; 116 use crate::{device_node, GuestRegionMmap, PciDeviceInfo, DEVICE_MANAGER_SNAPSHOT_ID}; 117 118 #[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))] 119 const MMIO_LEN: u64 = 0x1000; 120 121 // Singleton devices / devices the user cannot name 122 #[cfg(target_arch = "x86_64")] 123 const IOAPIC_DEVICE_NAME: &str = "__ioapic"; 124 const SERIAL_DEVICE_NAME: &str = "__serial"; 125 #[cfg(target_arch = "x86_64")] 126 const DEBUGCON_DEVICE_NAME: &str = "__debug_console"; 127 #[cfg(target_arch = "aarch64")] 128 const GPIO_DEVICE_NAME: &str = "__gpio"; 129 const RNG_DEVICE_NAME: &str = "__rng"; 130 const IOMMU_DEVICE_NAME: &str = "__iommu"; 131 #[cfg(feature = "pvmemcontrol")] 132 const PVMEMCONTROL_DEVICE_NAME: &str = "__pvmemcontrol"; 133 const BALLOON_DEVICE_NAME: &str = "__balloon"; 134 const CONSOLE_DEVICE_NAME: &str = "__console"; 135 const PVPANIC_DEVICE_NAME: &str = "__pvpanic"; 136 137 // Devices that the user may name and for which we generate 138 // identifiers if the user doesn't give one 139 const DISK_DEVICE_NAME_PREFIX: &str = "_disk"; 140 const FS_DEVICE_NAME_PREFIX: &str = "_fs"; 141 const NET_DEVICE_NAME_PREFIX: &str = "_net"; 142 const PMEM_DEVICE_NAME_PREFIX: &str = "_pmem"; 143 const VDPA_DEVICE_NAME_PREFIX: &str = "_vdpa"; 144 const VSOCK_DEVICE_NAME_PREFIX: &str = "_vsock"; 145 const WATCHDOG_DEVICE_NAME: &str = "__watchdog"; 146 const VFIO_DEVICE_NAME_PREFIX: &str = "_vfio"; 147 const VFIO_USER_DEVICE_NAME_PREFIX: &str = "_vfio_user"; 148 const VIRTIO_PCI_DEVICE_NAME_PREFIX: &str = "_virtio-pci"; 149 150 /// Errors associated with device manager 151 #[derive(Error, Debug)] 152 pub enum DeviceManagerError { 153 /// Cannot create EventFd. 154 #[error("Cannot create EventFd")] 155 EventFd(#[source] io::Error), 156 157 /// Cannot open disk path 158 #[error("Cannot open disk path")] 159 Disk(#[source] io::Error), 160 161 /// Cannot create vhost-user-net device 162 #[error("Cannot create vhost-user-net device")] 163 CreateVhostUserNet(#[source] virtio_devices::vhost_user::Error), 164 165 /// Cannot create virtio-blk device 166 #[error("Cannot create virtio-blk device")] 167 CreateVirtioBlock(#[source] io::Error), 168 169 /// Cannot create virtio-net device 170 #[error("Cannot create virtio-net device")] 171 CreateVirtioNet(#[source] virtio_devices::net::Error), 172 173 /// Cannot create virtio-console device 174 #[error("Cannot create virtio-console device")] 175 CreateVirtioConsole(#[source] io::Error), 176 177 /// Cannot create virtio-rng device 178 #[error("Cannot create virtio-rng device")] 179 CreateVirtioRng(#[source] io::Error), 180 181 /// Cannot create virtio-fs device 182 #[error("Cannot create virtio-fs device")] 183 CreateVirtioFs(#[source] virtio_devices::vhost_user::Error), 184 185 /// Virtio-fs device was created without a socket. 186 #[error("Virtio-fs device was created without a socket")] 187 NoVirtioFsSock, 188 189 /// Cannot create vhost-user-blk device 190 #[error("Cannot create vhost-user-blk device")] 191 CreateVhostUserBlk(#[source] virtio_devices::vhost_user::Error), 192 193 /// Cannot create virtio-pmem device 194 #[error("Cannot create virtio-pmem device")] 195 CreateVirtioPmem(#[source] io::Error), 196 197 /// Cannot create vDPA device 198 #[error("Cannot create vdpa device")] 199 CreateVdpa(#[source] virtio_devices::vdpa::Error), 200 201 /// Cannot create virtio-vsock device 202 #[error("Cannot create virtio-vsock device")] 203 CreateVirtioVsock(#[source] io::Error), 204 205 /// Cannot create tpm device 206 #[error("Cannot create tmp device")] 207 CreateTpmDevice(#[source] anyhow::Error), 208 209 /// Failed to convert Path to &str for the vDPA device. 210 #[error("Failed to convert Path to &str for the vDPA device")] 211 CreateVdpaConvertPath, 212 213 /// Failed to convert Path to &str for the virtio-vsock device. 214 #[error("Failed to convert Path to &str for the virtio-vsock device")] 215 CreateVsockConvertPath, 216 217 /// Cannot create virtio-vsock backend 218 #[error("Cannot create virtio-vsock backend")] 219 CreateVsockBackend(#[source] virtio_devices::vsock::VsockUnixError), 220 221 /// Cannot create virtio-iommu device 222 #[error("Cannot create virtio-iommu device")] 223 CreateVirtioIommu(#[source] io::Error), 224 225 /// Cannot create virtio-balloon device 226 #[error("Cannot create virtio-balloon device")] 227 CreateVirtioBalloon(#[source] io::Error), 228 229 /// Cannot create pvmemcontrol device 230 #[cfg(feature = "pvmemcontrol")] 231 #[error("Cannot create pvmemcontrol device")] 232 CreatePvmemcontrol(#[source] io::Error), 233 234 /// Cannot create virtio-watchdog device 235 #[error("Cannot create virtio-watchdog device")] 236 CreateVirtioWatchdog(#[source] io::Error), 237 238 /// Failed to parse disk image format 239 #[error("Failed to parse disk image format")] 240 DetectImageType(#[source] io::Error), 241 242 /// Cannot open qcow disk path 243 #[error("Cannot open qcow disk path")] 244 QcowDeviceCreate(#[source] qcow::Error), 245 246 /// Cannot create serial manager 247 #[error("Cannot create serial manager")] 248 CreateSerialManager(#[source] SerialManagerError), 249 250 /// Cannot spawn the serial manager thread 251 #[error("Cannot spawn serial manager thread")] 252 SpawnSerialManager(#[source] SerialManagerError), 253 254 /// Cannot open tap interface 255 #[error("Cannot open tap interface")] 256 OpenTap(#[source] net_util::TapError), 257 258 /// Cannot allocate IRQ. 259 #[error("Cannot allocate IRQ")] 260 AllocateIrq, 261 262 /// Cannot configure the IRQ. 263 #[error("Cannot configure the IRQ")] 264 Irq(#[source] vmm_sys_util::errno::Error), 265 266 /// Cannot allocate PCI BARs 267 #[error("Cannot allocate PCI BARs")] 268 AllocateBars(#[source] pci::PciDeviceError), 269 270 /// Could not free the BARs associated with a PCI device. 271 #[error("Could not free the BARs associated with a PCI device")] 272 FreePciBars(#[source] pci::PciDeviceError), 273 274 /// Cannot register ioevent. 275 #[error("Cannot register ioevent")] 276 RegisterIoevent(#[source] anyhow::Error), 277 278 /// Cannot unregister ioevent. 279 #[error("Cannot unregister ioevent")] 280 UnRegisterIoevent(#[source] anyhow::Error), 281 282 /// Cannot create virtio device 283 #[error("Cannot create virtio device")] 284 VirtioDevice(#[source] virtio_devices::transport::VirtioPciDeviceError), 285 286 /// Cannot add PCI device 287 #[error("Cannot add PCI device")] 288 AddPciDevice(#[source] pci::PciRootError), 289 290 /// Cannot open persistent memory file 291 #[error("Cannot open persistent memory file")] 292 PmemFileOpen(#[source] io::Error), 293 294 /// Cannot set persistent memory file size 295 #[error("Cannot set persistent memory file size")] 296 PmemFileSetLen(#[source] io::Error), 297 298 /// Cannot find a memory range for persistent memory 299 #[error("Cannot find a memory range for persistent memory")] 300 PmemRangeAllocation, 301 302 /// Cannot find a memory range for virtio-fs 303 #[error("Cannot find a memory range for virtio-fs")] 304 FsRangeAllocation, 305 306 /// Error creating serial output file 307 #[error("Error creating serial output file")] 308 SerialOutputFileOpen(#[source] io::Error), 309 310 /// Error creating debug-console output file 311 #[cfg(target_arch = "x86_64")] 312 #[error("Error creating debug-console output file")] 313 DebugconOutputFileOpen(#[source] io::Error), 314 315 /// Error creating console output file 316 #[error("Error creating console output file")] 317 ConsoleOutputFileOpen(#[source] io::Error), 318 319 /// Error creating serial pty 320 #[error("Error creating serial pty")] 321 SerialPtyOpen(#[source] io::Error), 322 323 /// Error creating console pty 324 #[error("Error creating console pty")] 325 ConsolePtyOpen(#[source] io::Error), 326 327 /// Error creating debugcon pty 328 #[error("Error creating console pty")] 329 DebugconPtyOpen(#[source] io::Error), 330 331 /// Error setting pty raw mode 332 #[error("Error setting pty raw mode")] 333 SetPtyRaw(#[source] ConsoleDeviceError), 334 335 /// Error getting pty peer 336 #[error("Error getting pty peer")] 337 GetPtyPeer(#[source] vmm_sys_util::errno::Error), 338 339 /// Cannot create a VFIO device 340 #[error("Cannot create a VFIO device")] 341 VfioCreate(#[source] vfio_ioctls::VfioError), 342 343 /// Cannot create a VFIO PCI device 344 #[error("Cannot create a VFIO PCI device")] 345 VfioPciCreate(#[source] pci::VfioPciError), 346 347 /// Failed to map VFIO MMIO region. 348 #[error("Failed to map VFIO MMIO region")] 349 VfioMapRegion(#[source] pci::VfioPciError), 350 351 /// Failed to DMA map VFIO device. 352 #[error("Failed to DMA map VFIO device")] 353 VfioDmaMap(#[source] vfio_ioctls::VfioError), 354 355 /// Failed to DMA unmap VFIO device. 356 #[error("Failed to DMA unmap VFIO device")] 357 VfioDmaUnmap(#[source] pci::VfioPciError), 358 359 /// Failed to create the passthrough device. 360 #[error("Failed to create the passthrough device")] 361 CreatePassthroughDevice(#[source] anyhow::Error), 362 363 /// Failed to memory map. 364 #[error("Failed to memory map")] 365 Mmap(#[source] io::Error), 366 367 /// Cannot add legacy device to Bus. 368 #[error("Cannot add legacy device to Bus")] 369 BusError(#[source] vm_device::BusError), 370 371 /// Failed to allocate IO port 372 #[error("Failed to allocate IO port")] 373 AllocateIoPort, 374 375 /// Failed to allocate MMIO address 376 #[error("Failed to allocate MMIO address")] 377 AllocateMmioAddress, 378 379 /// Failed to make hotplug notification 380 #[error("Failed to make hotplug notification")] 381 HotPlugNotification(#[source] io::Error), 382 383 /// Error from a memory manager operation 384 #[error("Error from a memory manager operation")] 385 MemoryManager(#[source] MemoryManagerError), 386 387 /// Failed to create new interrupt source group. 388 #[error("Failed to create new interrupt source group")] 389 CreateInterruptGroup(#[source] io::Error), 390 391 /// Failed to update interrupt source group. 392 #[error("Failed to update interrupt source group")] 393 UpdateInterruptGroup(#[source] io::Error), 394 395 /// Failed to create interrupt controller. 396 #[error("Failed to create interrupt controller")] 397 CreateInterruptController(#[source] interrupt_controller::Error), 398 399 /// Failed to create a new MmapRegion instance. 400 #[error("Failed to create a new MmapRegion instance")] 401 NewMmapRegion(#[source] vm_memory::mmap::MmapRegionError), 402 403 /// Failed to clone a File. 404 #[error("Failed to clone a File")] 405 CloneFile(#[source] io::Error), 406 407 /// Failed to create socket file 408 #[error("Failed to create socket file")] 409 CreateSocketFile(#[source] io::Error), 410 411 /// Failed to spawn the network backend 412 #[error("Failed to spawn the network backend")] 413 SpawnNetBackend(#[source] io::Error), 414 415 /// Failed to spawn the block backend 416 #[error("Failed to spawn the block backend")] 417 SpawnBlockBackend(#[source] io::Error), 418 419 /// Missing PCI bus. 420 #[error("Missing PCI bus")] 421 NoPciBus, 422 423 /// Could not find an available device name. 424 #[error("Could not find an available device name")] 425 NoAvailableDeviceName, 426 427 /// Missing PCI device. 428 #[error("Missing PCI device")] 429 MissingPciDevice, 430 431 /// Failed to remove a PCI device from the PCI bus. 432 #[error("Failed to remove a PCI device from the PCI bus")] 433 RemoveDeviceFromPciBus(#[source] pci::PciRootError), 434 435 /// Failed to remove a bus device from the IO bus. 436 #[error("Failed to remove a bus device from the IO bus")] 437 RemoveDeviceFromIoBus(#[source] vm_device::BusError), 438 439 /// Failed to remove a bus device from the MMIO bus. 440 #[error("Failed to remove a bus device from the MMIO bus")] 441 RemoveDeviceFromMmioBus(#[source] vm_device::BusError), 442 443 /// Failed to find the device corresponding to a specific PCI b/d/f. 444 #[error("Failed to find the device corresponding to a specific PCI b/d/f")] 445 UnknownPciBdf(u32), 446 447 /// Not allowed to remove this type of device from the VM. 448 #[error("Not allowed to remove this type of device from the VM: {0}")] 449 RemovalNotAllowed(vm_virtio::VirtioDeviceType), 450 451 /// Failed to find device corresponding to the given identifier. 452 #[error("Failed to find device corresponding to the given identifier")] 453 UnknownDeviceId(String), 454 455 /// Failed to find an available PCI device ID. 456 #[error("Failed to find an available PCI device ID")] 457 NextPciDeviceId(#[source] pci::PciRootError), 458 459 /// Could not reserve the PCI device ID. 460 #[error("Could not reserve the PCI device ID")] 461 GetPciDeviceId(#[source] pci::PciRootError), 462 463 /// Could not give the PCI device ID back. 464 #[error("Could not give the PCI device ID back")] 465 PutPciDeviceId(#[source] pci::PciRootError), 466 467 /// No disk path was specified when one was expected 468 #[error("No disk path was specified when one was expected")] 469 NoDiskPath, 470 471 /// Failed to update guest memory for virtio device. 472 #[error("Failed to update guest memory for virtio device")] 473 UpdateMemoryForVirtioDevice(#[source] virtio_devices::Error), 474 475 /// Cannot create virtio-mem device 476 #[error("Cannot create virtio-mem device")] 477 CreateVirtioMem(#[source] io::Error), 478 479 /// Cannot find a memory range for virtio-mem memory 480 #[error("Cannot find a memory range for virtio-mem memory")] 481 VirtioMemRangeAllocation, 482 483 /// Failed to update guest memory for VFIO PCI device. 484 #[error("Failed to update guest memory for VFIO PCI device")] 485 UpdateMemoryForVfioPciDevice(#[source] vfio_ioctls::VfioError), 486 487 /// Trying to use a directory for pmem but no size specified 488 #[error("Trying to use a directory for pmem but no size specified")] 489 PmemWithDirectorySizeMissing, 490 491 /// Trying to use a size that is not multiple of 2MiB 492 #[error("Trying to use a size that is not multiple of 2MiB")] 493 PmemSizeNotAligned, 494 495 /// Could not find the node in the device tree. 496 #[error("Could not find the node in the device tree")] 497 MissingNode, 498 499 /// Resource was already found. 500 #[error("Resource was already found")] 501 ResourceAlreadyExists, 502 503 /// Expected resources for virtio-pmem could not be found. 504 #[error("Expected resources for virtio-pmem could not be found")] 505 MissingVirtioPmemResources, 506 507 /// Missing PCI b/d/f from the DeviceNode. 508 #[error("Missing PCI b/d/f from the DeviceNode")] 509 MissingDeviceNodePciBdf, 510 511 /// No support for device passthrough 512 #[error("No support for device passthrough")] 513 NoDevicePassthroughSupport, 514 515 /// No socket option support for console device 516 #[error("No socket option support for console device")] 517 NoSocketOptionSupportForConsoleDevice, 518 519 /// Failed to resize virtio-balloon 520 #[error("Failed to resize virtio-balloon")] 521 VirtioBalloonResize(#[source] virtio_devices::balloon::Error), 522 523 /// Missing virtio-balloon, can't proceed as expected. 524 #[error("Missing virtio-balloon, can't proceed as expected")] 525 MissingVirtioBalloon, 526 527 /// Missing virtual IOMMU device 528 #[error("Missing virtual IOMMU device")] 529 MissingVirtualIommu, 530 531 /// Failed to do power button notification 532 #[error("Failed to do power button notification")] 533 PowerButtonNotification(#[source] io::Error), 534 535 /// Failed to do AArch64 GPIO power button notification 536 #[cfg(target_arch = "aarch64")] 537 #[error("Failed to do AArch64 GPIO power button notification")] 538 AArch64PowerButtonNotification(#[source] devices::legacy::GpioDeviceError), 539 540 /// Failed to set O_DIRECT flag to file descriptor 541 #[error("Failed to set O_DIRECT flag to file descriptor")] 542 SetDirectIo, 543 544 /// Failed to create FixedVhdDiskAsync 545 #[error("Failed to create FixedVhdDiskAsync")] 546 CreateFixedVhdDiskAsync(#[source] io::Error), 547 548 /// Failed to create FixedVhdDiskSync 549 #[error("Failed to create FixedVhdDiskSync")] 550 CreateFixedVhdDiskSync(#[source] io::Error), 551 552 /// Failed to create QcowDiskSync 553 #[error("Failed to create QcowDiskSync")] 554 CreateQcowDiskSync(#[source] qcow::Error), 555 556 /// Failed to create FixedVhdxDiskSync 557 #[error("Failed to create FixedVhdxDiskSync")] 558 CreateFixedVhdxDiskSync(#[source] vhdx::VhdxError), 559 560 /// Failed to add DMA mapping handler to virtio-mem device. 561 #[error("Failed to add DMA mapping handler to virtio-mem device")] 562 AddDmaMappingHandlerVirtioMem(#[source] virtio_devices::mem::Error), 563 564 /// Failed to remove DMA mapping handler from virtio-mem device. 565 #[error("Failed to remove DMA mapping handler from virtio-mem device")] 566 RemoveDmaMappingHandlerVirtioMem(#[source] virtio_devices::mem::Error), 567 568 /// Failed to create vfio-user client 569 #[error("Failed to create vfio-user client")] 570 VfioUserCreateClient(#[source] vfio_user::Error), 571 572 /// Failed to create VFIO user device 573 #[error("Failed to create VFIO user device")] 574 VfioUserCreate(#[source] VfioUserPciDeviceError), 575 576 /// Failed to map region from VFIO user device into guest 577 #[error("Failed to map region from VFIO user device into guest")] 578 VfioUserMapRegion(#[source] VfioUserPciDeviceError), 579 580 /// Failed to DMA map VFIO user device. 581 #[error("Failed to DMA map VFIO user device")] 582 VfioUserDmaMap(#[source] VfioUserPciDeviceError), 583 584 /// Failed to DMA unmap VFIO user device. 585 #[error("Failed to DMA unmap VFIO user device")] 586 VfioUserDmaUnmap(#[source] VfioUserPciDeviceError), 587 588 /// Failed to update memory mappings for VFIO user device 589 #[error("Failed to update memory mappings for VFIO user device")] 590 UpdateMemoryForVfioUserPciDevice(#[source] VfioUserPciDeviceError), 591 592 /// Cannot duplicate file descriptor 593 #[error("Cannot duplicate file descriptor")] 594 DupFd(#[source] vmm_sys_util::errno::Error), 595 596 /// Failed to DMA map virtio device. 597 #[error("Failed to DMA map virtio device")] 598 VirtioDmaMap(#[source] std::io::Error), 599 600 /// Failed to DMA unmap virtio device. 601 #[error("Failed to DMA unmap virtio device")] 602 VirtioDmaUnmap(#[source] std::io::Error), 603 604 /// Cannot hotplug device behind vIOMMU 605 #[error("Cannot hotplug device behind vIOMMU")] 606 InvalidIommuHotplug, 607 608 /// Invalid identifier as it is not unique. 609 #[error("Invalid identifier as it is not unique: {0}")] 610 IdentifierNotUnique(String), 611 612 /// Invalid identifier 613 #[error("Invalid identifier: {0}")] 614 InvalidIdentifier(String), 615 616 /// Error activating virtio device 617 #[error("Error activating virtio device")] 618 VirtioActivate(#[source] ActivateError), 619 620 /// Failed retrieving device state from snapshot 621 #[error("Failed retrieving device state from snapshot")] 622 RestoreGetState(#[source] MigratableError), 623 624 /// Cannot create a PvPanic device 625 #[error("Cannot create a PvPanic device")] 626 PvPanicCreate(#[source] devices::pvpanic::PvPanicError), 627 628 /// Cannot create a RateLimiterGroup 629 #[error("Cannot create a RateLimiterGroup")] 630 RateLimiterGroupCreate(#[source] rate_limiter::group::Error), 631 632 /// Cannot start sigwinch listener 633 #[error("Cannot start sigwinch listener")] 634 StartSigwinchListener(#[source] std::io::Error), 635 636 // Invalid console info 637 #[error("Invalid console info")] 638 InvalidConsoleInfo, 639 640 // Invalid console fd 641 #[error("Invalid console fd")] 642 InvalidConsoleFd, 643 644 /// Cannot lock images of all block devices. 645 #[error("Cannot lock images of all block devices")] 646 DiskLockError(#[source] virtio_devices::block::Error), 647 } 648 649 pub type DeviceManagerResult<T> = result::Result<T, DeviceManagerError>; 650 651 const DEVICE_MANAGER_ACPI_SIZE: usize = 0x10; 652 653 #[derive(Default)] 654 pub struct Console { 655 console_resizer: Option<Arc<virtio_devices::ConsoleResizer>>, 656 } 657 658 impl Console { 659 pub fn need_resize(&self) -> bool { 660 if let Some(_resizer) = self.console_resizer.as_ref() { 661 return true; 662 } 663 664 false 665 } 666 667 pub fn update_console_size(&self) { 668 if let Some(resizer) = self.console_resizer.as_ref() { 669 resizer.update_console_size() 670 } 671 } 672 } 673 674 pub(crate) struct AddressManager { 675 pub(crate) allocator: Arc<Mutex<SystemAllocator>>, 676 pub(crate) io_bus: Arc<Bus>, 677 pub(crate) mmio_bus: Arc<Bus>, 678 pub(crate) vm: Arc<dyn hypervisor::Vm>, 679 device_tree: Arc<Mutex<DeviceTree>>, 680 pci_mmio32_allocators: Vec<Arc<Mutex<AddressAllocator>>>, 681 pci_mmio64_allocators: Vec<Arc<Mutex<AddressAllocator>>>, 682 } 683 684 impl DeviceRelocation for AddressManager { 685 fn move_bar( 686 &self, 687 old_base: u64, 688 new_base: u64, 689 len: u64, 690 pci_dev: &mut dyn PciDevice, 691 region_type: PciBarRegionType, 692 ) -> std::result::Result<(), std::io::Error> { 693 match region_type { 694 PciBarRegionType::IoRegion => { 695 // Update system allocator 696 self.allocator 697 .lock() 698 .unwrap() 699 .free_io_addresses(GuestAddress(old_base), len as GuestUsize); 700 701 self.allocator 702 .lock() 703 .unwrap() 704 .allocate_io_addresses(Some(GuestAddress(new_base)), len as GuestUsize, None) 705 .ok_or_else(|| io::Error::other("failed allocating new IO range"))?; 706 707 // Update PIO bus 708 self.io_bus 709 .update_range(old_base, len, new_base, len) 710 .map_err(io::Error::other)?; 711 } 712 PciBarRegionType::Memory32BitRegion | PciBarRegionType::Memory64BitRegion => { 713 let allocators = if region_type == PciBarRegionType::Memory32BitRegion { 714 &self.pci_mmio32_allocators 715 } else { 716 &self.pci_mmio64_allocators 717 }; 718 719 // Find the specific allocator that this BAR was allocated from and use it for new one 720 for allocator in allocators { 721 let allocator_base = allocator.lock().unwrap().base(); 722 let allocator_end = allocator.lock().unwrap().end(); 723 724 if old_base >= allocator_base.0 && old_base <= allocator_end.0 { 725 allocator 726 .lock() 727 .unwrap() 728 .free(GuestAddress(old_base), len as GuestUsize); 729 730 allocator 731 .lock() 732 .unwrap() 733 .allocate(Some(GuestAddress(new_base)), len as GuestUsize, Some(len)) 734 .ok_or_else(|| io::Error::other("failed allocating new MMIO range"))?; 735 736 break; 737 } 738 } 739 740 // Update MMIO bus 741 self.mmio_bus 742 .update_range(old_base, len, new_base, len) 743 .map_err(io::Error::other)?; 744 } 745 } 746 747 // Update the device_tree resources associated with the device 748 if let Some(id) = pci_dev.id() { 749 if let Some(node) = self.device_tree.lock().unwrap().get_mut(&id) { 750 let mut resource_updated = false; 751 for resource in node.resources.iter_mut() { 752 if let Resource::PciBar { base, type_, .. } = resource { 753 if PciBarRegionType::from(*type_) == region_type && *base == old_base { 754 *base = new_base; 755 resource_updated = true; 756 break; 757 } 758 } 759 } 760 761 if !resource_updated { 762 return Err(io::Error::other(format!( 763 "Couldn't find a resource with base 0x{old_base:x} for device {id}" 764 ))); 765 } 766 } else { 767 return Err(io::Error::other(format!( 768 "Couldn't find device {id} from device tree" 769 ))); 770 } 771 } 772 773 let any_dev = pci_dev.as_any_mut(); 774 if let Some(virtio_pci_dev) = any_dev.downcast_ref::<VirtioPciDevice>() { 775 let bar_addr = virtio_pci_dev.config_bar_addr(); 776 if bar_addr == new_base { 777 for (event, addr) in virtio_pci_dev.ioeventfds(old_base) { 778 let io_addr = IoEventAddress::Mmio(addr); 779 self.vm.unregister_ioevent(event, &io_addr).map_err(|e| { 780 io::Error::other(format!("failed to unregister ioevent: {e:?}")) 781 })?; 782 } 783 for (event, addr) in virtio_pci_dev.ioeventfds(new_base) { 784 let io_addr = IoEventAddress::Mmio(addr); 785 self.vm 786 .register_ioevent(event, &io_addr, None) 787 .map_err(|e| { 788 io::Error::other(format!("failed to register ioevent: {e:?}")) 789 })?; 790 } 791 } else { 792 let virtio_dev = virtio_pci_dev.virtio_device(); 793 let mut virtio_dev = virtio_dev.lock().unwrap(); 794 if let Some(mut shm_regions) = virtio_dev.get_shm_regions() { 795 if shm_regions.addr.raw_value() == old_base { 796 let mem_region = self.vm.make_user_memory_region( 797 shm_regions.mem_slot, 798 old_base, 799 shm_regions.len, 800 shm_regions.host_addr, 801 false, 802 false, 803 ); 804 805 self.vm.remove_user_memory_region(mem_region).map_err(|e| { 806 io::Error::other(format!("failed to remove user memory region: {e:?}")) 807 })?; 808 809 // Create new mapping by inserting new region to KVM. 810 let mem_region = self.vm.make_user_memory_region( 811 shm_regions.mem_slot, 812 new_base, 813 shm_regions.len, 814 shm_regions.host_addr, 815 false, 816 false, 817 ); 818 819 self.vm.create_user_memory_region(mem_region).map_err(|e| { 820 io::Error::other(format!("failed to create user memory regions: {e:?}")) 821 })?; 822 823 // Update shared memory regions to reflect the new mapping. 824 shm_regions.addr = GuestAddress(new_base); 825 virtio_dev.set_shm_regions(shm_regions).map_err(|e| { 826 io::Error::other(format!( 827 "failed to update shared memory regions: {e:?}" 828 )) 829 })?; 830 } 831 } 832 } 833 } 834 835 pci_dev.move_bar(old_base, new_base) 836 } 837 } 838 839 #[derive(Serialize, Deserialize)] 840 struct DeviceManagerState { 841 device_tree: DeviceTree, 842 device_id_cnt: Wrapping<usize>, 843 } 844 845 #[derive(Debug)] 846 pub struct PtyPair { 847 pub main: File, 848 pub path: PathBuf, 849 } 850 851 impl Clone for PtyPair { 852 fn clone(&self) -> Self { 853 PtyPair { 854 main: self.main.try_clone().unwrap(), 855 path: self.path.clone(), 856 } 857 } 858 } 859 860 #[derive(Clone)] 861 pub enum PciDeviceHandle { 862 Vfio(Arc<Mutex<VfioPciDevice>>), 863 Virtio(Arc<Mutex<VirtioPciDevice>>), 864 VfioUser(Arc<Mutex<VfioUserPciDevice>>), 865 } 866 867 #[derive(Clone)] 868 struct MetaVirtioDevice { 869 virtio_device: Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 870 iommu: bool, 871 id: String, 872 pci_segment: u16, 873 dma_handler: Option<Arc<dyn ExternalDmaMapping>>, 874 } 875 876 #[derive(Default)] 877 pub struct AcpiPlatformAddresses { 878 pub pm_timer_address: Option<GenericAddress>, 879 pub reset_reg_address: Option<GenericAddress>, 880 pub sleep_control_reg_address: Option<GenericAddress>, 881 pub sleep_status_reg_address: Option<GenericAddress>, 882 } 883 884 #[cfg(all(feature = "mshv", feature = "sev_snp"))] 885 struct SevSnpPageAccessProxy { 886 vm: Arc<dyn hypervisor::Vm>, 887 } 888 889 #[cfg(all(feature = "mshv", feature = "sev_snp"))] 890 impl std::fmt::Debug for SevSnpPageAccessProxy { 891 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 892 write!(f, "SNP Page access proxy") 893 } 894 } 895 896 #[cfg(all(feature = "mshv", feature = "sev_snp"))] 897 impl SevSnpPageAccessProxy { 898 fn new(vm: Arc<dyn hypervisor::Vm>) -> SevSnpPageAccessProxy { 899 SevSnpPageAccessProxy { vm } 900 } 901 } 902 903 #[cfg(all(feature = "mshv", feature = "sev_snp"))] 904 impl AccessPlatform for SevSnpPageAccessProxy { 905 fn translate_gpa(&self, base: u64, _size: u64) -> std::result::Result<u64, std::io::Error> { 906 Ok(base) 907 } 908 909 fn translate_gva(&self, base: u64, size: u64) -> std::result::Result<u64, std::io::Error> { 910 self.vm 911 .gain_page_access(base, size as u32) 912 .map_err(io::Error::other)?; 913 Ok(base) 914 } 915 } 916 917 pub struct DeviceManager { 918 // Manage address space related to devices 919 address_manager: Arc<AddressManager>, 920 921 // Console abstraction 922 console: Arc<Console>, 923 924 // Serial Manager 925 serial_manager: Option<Arc<SerialManager>>, 926 927 // pty foreground status, 928 console_resize_pipe: Option<Arc<File>>, 929 930 // To restore on exit. 931 original_termios_opt: Arc<Mutex<Option<termios>>>, 932 933 // Interrupt controller 934 #[cfg(target_arch = "x86_64")] 935 interrupt_controller: Option<Arc<Mutex<ioapic::Ioapic>>>, 936 #[cfg(target_arch = "aarch64")] 937 interrupt_controller: Option<Arc<Mutex<gic::Gic>>>, 938 #[cfg(target_arch = "riscv64")] 939 interrupt_controller: Option<Arc<Mutex<aia::Aia>>>, 940 941 // Things to be added to the commandline (e.g. aarch64 or riscv64 early console) 942 #[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))] 943 cmdline_additions: Vec<String>, 944 945 // ACPI GED notification device 946 ged_notification_device: Option<Arc<Mutex<devices::AcpiGedDevice>>>, 947 948 // VM configuration 949 config: Arc<Mutex<VmConfig>>, 950 951 // Memory Manager 952 memory_manager: Arc<Mutex<MemoryManager>>, 953 954 // CPU Manager 955 cpu_manager: Arc<Mutex<CpuManager>>, 956 957 // The virtio devices on the system 958 virtio_devices: Vec<MetaVirtioDevice>, 959 960 /// All disks. Needed for locking and unlocking the images. 961 block_devices: Vec<Arc<Mutex<Block>>>, 962 963 // List of bus devices 964 // Let the DeviceManager keep strong references to the BusDevice devices. 965 // This allows the IO and MMIO buses to be provided with Weak references, 966 // which prevents cyclic dependencies. 967 bus_devices: Vec<Arc<dyn BusDeviceSync>>, 968 969 // Counter to keep track of the consumed device IDs. 970 device_id_cnt: Wrapping<usize>, 971 972 pci_segments: Vec<PciSegment>, 973 974 #[cfg_attr(target_arch = "aarch64", allow(dead_code))] 975 // MSI Interrupt Manager 976 msi_interrupt_manager: Arc<dyn InterruptManager<GroupConfig = MsiIrqGroupConfig>>, 977 978 #[cfg_attr(feature = "mshv", allow(dead_code))] 979 // Legacy Interrupt Manager 980 legacy_interrupt_manager: Option<Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>>, 981 982 // Passthrough device handle 983 passthrough_device: Option<VfioDeviceFd>, 984 985 // VFIO container 986 // Only one container can be created, therefore it is stored as part of the 987 // DeviceManager to be reused. 988 vfio_container: Option<Arc<VfioContainer>>, 989 990 // Paravirtualized IOMMU 991 iommu_device: Option<Arc<Mutex<virtio_devices::Iommu>>>, 992 iommu_mapping: Option<Arc<IommuMapping>>, 993 994 // PCI information about devices attached to the paravirtualized IOMMU 995 // It contains the virtual IOMMU PCI BDF along with the list of PCI BDF 996 // representing the devices attached to the virtual IOMMU. This is useful 997 // information for filling the ACPI VIOT table. 998 iommu_attached_devices: Option<(PciBdf, Vec<PciBdf>)>, 999 1000 // Tree of devices, representing the dependencies between devices. 1001 // Useful for introspection, snapshot and restore. 1002 device_tree: Arc<Mutex<DeviceTree>>, 1003 1004 // Exit event 1005 exit_evt: EventFd, 1006 reset_evt: EventFd, 1007 1008 #[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))] 1009 id_to_dev_info: HashMap<(DeviceType, String), MmioDeviceInfo>, 1010 1011 // seccomp action 1012 seccomp_action: SeccompAction, 1013 1014 // List of guest NUMA nodes. 1015 numa_nodes: NumaNodes, 1016 1017 // Possible handle to the virtio-balloon device 1018 balloon: Option<Arc<Mutex<virtio_devices::Balloon>>>, 1019 1020 // Virtio Device activation EventFd to allow the VMM thread to trigger device 1021 // activation and thus start the threads from the VMM thread 1022 activate_evt: EventFd, 1023 1024 #[cfg(not(target_arch = "riscv64"))] 1025 acpi_address: GuestAddress, 1026 1027 selected_segment: usize, 1028 1029 // Possible handle to the virtio-mem device 1030 virtio_mem_devices: Vec<Arc<Mutex<virtio_devices::Mem>>>, 1031 1032 #[cfg(target_arch = "aarch64")] 1033 // GPIO device for AArch64 1034 gpio_device: Option<Arc<Mutex<devices::legacy::Gpio>>>, 1035 1036 #[cfg(feature = "pvmemcontrol")] 1037 pvmemcontrol_devices: Option<( 1038 Arc<PvmemcontrolBusDevice>, 1039 Arc<Mutex<PvmemcontrolPciDevice>>, 1040 )>, 1041 1042 // pvpanic device 1043 pvpanic_device: Option<Arc<Mutex<devices::PvPanicDevice>>>, 1044 1045 // Flag to force setting the iommu on virtio devices 1046 force_iommu: bool, 1047 1048 // io_uring availability if detected 1049 io_uring_supported: Option<bool>, 1050 1051 // aio availability if detected 1052 aio_supported: Option<bool>, 1053 1054 // List of unique identifiers provided at boot through the configuration. 1055 boot_id_list: BTreeSet<String>, 1056 1057 #[cfg(not(target_arch = "riscv64"))] 1058 // Start time of the VM 1059 timestamp: Instant, 1060 1061 // Pending activations 1062 pending_activations: Arc<Mutex<Vec<VirtioPciDeviceActivator>>>, 1063 1064 #[cfg(not(target_arch = "riscv64"))] 1065 // Addresses for ACPI platform devices e.g. ACPI PM timer, sleep/reset registers 1066 acpi_platform_addresses: AcpiPlatformAddresses, 1067 1068 snapshot: Option<Snapshot>, 1069 1070 rate_limit_groups: HashMap<String, Arc<RateLimiterGroup>>, 1071 1072 mmio_regions: Arc<Mutex<Vec<MmioRegion>>>, 1073 } 1074 1075 fn create_mmio_allocators( 1076 start: u64, 1077 end: u64, 1078 num_pci_segments: u16, 1079 weights: Vec<u32>, 1080 alignment: u64, 1081 ) -> Vec<Arc<Mutex<AddressAllocator>>> { 1082 let total_weight: u32 = weights.iter().sum(); 1083 1084 // Start each PCI segment mmio range on an aligned boundary 1085 let pci_segment_mmio_size = (end - start + 1) / (alignment * total_weight as u64) * alignment; 1086 1087 let mut mmio_allocators = vec![]; 1088 let mut i = 0; 1089 for segment_id in 0..num_pci_segments as u64 { 1090 let weight = weights[segment_id as usize] as u64; 1091 let mmio_start = start + i * pci_segment_mmio_size; 1092 let mmio_size = pci_segment_mmio_size * weight; 1093 let allocator = Arc::new(Mutex::new( 1094 AddressAllocator::new(GuestAddress(mmio_start), mmio_size).unwrap(), 1095 )); 1096 mmio_allocators.push(allocator); 1097 i += weight; 1098 } 1099 1100 mmio_allocators 1101 } 1102 1103 impl DeviceManager { 1104 #[allow(clippy::too_many_arguments)] 1105 pub fn new( 1106 io_bus: Arc<Bus>, 1107 mmio_bus: Arc<Bus>, 1108 vm: Arc<dyn hypervisor::Vm>, 1109 config: Arc<Mutex<VmConfig>>, 1110 memory_manager: Arc<Mutex<MemoryManager>>, 1111 cpu_manager: Arc<Mutex<CpuManager>>, 1112 exit_evt: EventFd, 1113 reset_evt: EventFd, 1114 seccomp_action: SeccompAction, 1115 numa_nodes: NumaNodes, 1116 activate_evt: &EventFd, 1117 force_iommu: bool, 1118 boot_id_list: BTreeSet<String>, 1119 #[cfg(not(target_arch = "riscv64"))] timestamp: Instant, 1120 snapshot: Option<Snapshot>, 1121 dynamic: bool, 1122 ) -> DeviceManagerResult<Arc<Mutex<Self>>> { 1123 trace_scoped!("DeviceManager::new"); 1124 1125 let (device_tree, device_id_cnt) = if let Some(snapshot) = snapshot.as_ref() { 1126 let state: DeviceManagerState = snapshot.to_state().unwrap(); 1127 ( 1128 Arc::new(Mutex::new(state.device_tree.clone())), 1129 state.device_id_cnt, 1130 ) 1131 } else { 1132 (Arc::new(Mutex::new(DeviceTree::new())), Wrapping(0)) 1133 }; 1134 1135 let num_pci_segments = 1136 if let Some(platform_config) = config.lock().unwrap().platform.as_ref() { 1137 platform_config.num_pci_segments 1138 } else { 1139 1 1140 }; 1141 1142 let mut mmio32_aperture_weights: Vec<u32> = 1143 std::iter::repeat_n(DEFAULT_PCI_SEGMENT_APERTURE_WEIGHT, num_pci_segments.into()) 1144 .collect(); 1145 if let Some(pci_segments) = &config.lock().unwrap().pci_segments { 1146 for pci_segment in pci_segments.iter() { 1147 mmio32_aperture_weights[pci_segment.pci_segment as usize] = 1148 pci_segment.mmio32_aperture_weight 1149 } 1150 } 1151 1152 let start_of_mmio32_area = layout::MEM_32BIT_DEVICES_START.0; 1153 let end_of_mmio32_area = layout::MEM_32BIT_DEVICES_START.0 + layout::MEM_32BIT_DEVICES_SIZE; 1154 let pci_mmio32_allocators = create_mmio_allocators( 1155 start_of_mmio32_area, 1156 end_of_mmio32_area, 1157 num_pci_segments, 1158 mmio32_aperture_weights, 1159 4 << 10, 1160 ); 1161 1162 let mut mmio64_aperture_weights: Vec<u32> = 1163 std::iter::repeat_n(DEFAULT_PCI_SEGMENT_APERTURE_WEIGHT, num_pci_segments.into()) 1164 .collect(); 1165 if let Some(pci_segments) = &config.lock().unwrap().pci_segments { 1166 for pci_segment in pci_segments.iter() { 1167 mmio64_aperture_weights[pci_segment.pci_segment as usize] = 1168 pci_segment.mmio64_aperture_weight 1169 } 1170 } 1171 1172 let start_of_mmio64_area = memory_manager.lock().unwrap().start_of_device_area().0; 1173 let end_of_mmio64_area = memory_manager.lock().unwrap().end_of_device_area().0; 1174 let pci_mmio64_allocators = create_mmio_allocators( 1175 start_of_mmio64_area, 1176 end_of_mmio64_area, 1177 num_pci_segments, 1178 mmio64_aperture_weights, 1179 4 << 30, 1180 ); 1181 1182 let address_manager = Arc::new(AddressManager { 1183 allocator: memory_manager.lock().unwrap().allocator(), 1184 io_bus, 1185 mmio_bus, 1186 vm: vm.clone(), 1187 device_tree: Arc::clone(&device_tree), 1188 pci_mmio32_allocators, 1189 pci_mmio64_allocators, 1190 }); 1191 1192 // First we create the MSI interrupt manager, the legacy one is created 1193 // later, after the IOAPIC device creation. 1194 // The reason we create the MSI one first is because the IOAPIC needs it, 1195 // and then the legacy interrupt manager needs an IOAPIC. So we're 1196 // handling a linear dependency chain: 1197 // msi_interrupt_manager <- IOAPIC <- legacy_interrupt_manager. 1198 let msi_interrupt_manager: Arc<dyn InterruptManager<GroupConfig = MsiIrqGroupConfig>> = 1199 Arc::new(MsiInterruptManager::new( 1200 Arc::clone(&address_manager.allocator), 1201 vm, 1202 )); 1203 1204 let acpi_address = address_manager 1205 .allocator 1206 .lock() 1207 .unwrap() 1208 .allocate_platform_mmio_addresses(None, DEVICE_MANAGER_ACPI_SIZE as u64, None) 1209 .ok_or(DeviceManagerError::AllocateIoPort)?; 1210 1211 let mut pci_irq_slots = [0; 32]; 1212 PciSegment::reserve_legacy_interrupts_for_pci_devices( 1213 &address_manager, 1214 &mut pci_irq_slots, 1215 )?; 1216 1217 let mut pci_segments = vec![PciSegment::new_default_segment( 1218 &address_manager, 1219 Arc::clone(&address_manager.pci_mmio32_allocators[0]), 1220 Arc::clone(&address_manager.pci_mmio64_allocators[0]), 1221 &pci_irq_slots, 1222 )?]; 1223 1224 for i in 1..num_pci_segments as usize { 1225 pci_segments.push(PciSegment::new( 1226 i as u16, 1227 numa_node_id_from_pci_segment_id(&numa_nodes, i as u16), 1228 &address_manager, 1229 Arc::clone(&address_manager.pci_mmio32_allocators[i]), 1230 Arc::clone(&address_manager.pci_mmio64_allocators[i]), 1231 &pci_irq_slots, 1232 )?); 1233 } 1234 1235 if dynamic { 1236 let acpi_address = address_manager 1237 .allocator 1238 .lock() 1239 .unwrap() 1240 .allocate_platform_mmio_addresses(None, CPU_MANAGER_ACPI_SIZE as u64, None) 1241 .ok_or(DeviceManagerError::AllocateMmioAddress)?; 1242 1243 address_manager 1244 .mmio_bus 1245 .insert( 1246 cpu_manager.clone(), 1247 acpi_address.0, 1248 CPU_MANAGER_ACPI_SIZE as u64, 1249 ) 1250 .map_err(DeviceManagerError::BusError)?; 1251 1252 cpu_manager.lock().unwrap().set_acpi_address(acpi_address); 1253 } 1254 1255 let mut rate_limit_groups = HashMap::<String, Arc<RateLimiterGroup>>::new(); 1256 if let Some(rate_limit_groups_cfg) = config.lock().unwrap().rate_limit_groups.as_ref() { 1257 for rate_limit_group_cfg in rate_limit_groups_cfg { 1258 let rate_limit_cfg = rate_limit_group_cfg.rate_limiter_config; 1259 let bw = rate_limit_cfg.bandwidth.unwrap_or_default(); 1260 let ops = rate_limit_cfg.ops.unwrap_or_default(); 1261 let mut rate_limit_group = RateLimiterGroup::new( 1262 &rate_limit_group_cfg.id, 1263 bw.size, 1264 bw.one_time_burst.unwrap_or(0), 1265 bw.refill_time, 1266 ops.size, 1267 ops.one_time_burst.unwrap_or(0), 1268 ops.refill_time, 1269 ) 1270 .map_err(DeviceManagerError::RateLimiterGroupCreate)?; 1271 1272 let exit_evt = exit_evt.try_clone().map_err(DeviceManagerError::EventFd)?; 1273 1274 rate_limit_group.start_thread(exit_evt).unwrap(); 1275 rate_limit_groups 1276 .insert(rate_limit_group_cfg.id.clone(), Arc::new(rate_limit_group)); 1277 } 1278 } 1279 1280 let device_manager = DeviceManager { 1281 address_manager: Arc::clone(&address_manager), 1282 console: Arc::new(Console::default()), 1283 interrupt_controller: None, 1284 #[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))] 1285 cmdline_additions: Vec::new(), 1286 ged_notification_device: None, 1287 config, 1288 memory_manager, 1289 cpu_manager, 1290 virtio_devices: Vec::new(), 1291 block_devices: vec![], 1292 bus_devices: Vec::new(), 1293 device_id_cnt, 1294 msi_interrupt_manager, 1295 legacy_interrupt_manager: None, 1296 passthrough_device: None, 1297 vfio_container: None, 1298 iommu_device: None, 1299 iommu_mapping: None, 1300 iommu_attached_devices: None, 1301 pci_segments, 1302 device_tree, 1303 exit_evt, 1304 reset_evt, 1305 #[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))] 1306 id_to_dev_info: HashMap::new(), 1307 seccomp_action, 1308 numa_nodes, 1309 balloon: None, 1310 activate_evt: activate_evt 1311 .try_clone() 1312 .map_err(DeviceManagerError::EventFd)?, 1313 #[cfg(not(target_arch = "riscv64"))] 1314 acpi_address, 1315 selected_segment: 0, 1316 serial_manager: None, 1317 console_resize_pipe: None, 1318 original_termios_opt: Arc::new(Mutex::new(None)), 1319 virtio_mem_devices: Vec::new(), 1320 #[cfg(target_arch = "aarch64")] 1321 gpio_device: None, 1322 #[cfg(feature = "pvmemcontrol")] 1323 pvmemcontrol_devices: None, 1324 pvpanic_device: None, 1325 force_iommu, 1326 io_uring_supported: None, 1327 aio_supported: None, 1328 boot_id_list, 1329 #[cfg(not(target_arch = "riscv64"))] 1330 timestamp, 1331 pending_activations: Arc::new(Mutex::new(Vec::default())), 1332 #[cfg(not(target_arch = "riscv64"))] 1333 acpi_platform_addresses: AcpiPlatformAddresses::default(), 1334 snapshot, 1335 rate_limit_groups, 1336 mmio_regions: Arc::new(Mutex::new(Vec::new())), 1337 }; 1338 1339 let device_manager = Arc::new(Mutex::new(device_manager)); 1340 1341 address_manager 1342 .mmio_bus 1343 .insert( 1344 Arc::clone(&device_manager) as Arc<dyn BusDeviceSync>, 1345 acpi_address.0, 1346 DEVICE_MANAGER_ACPI_SIZE as u64, 1347 ) 1348 .map_err(DeviceManagerError::BusError)?; 1349 1350 Ok(device_manager) 1351 } 1352 1353 pub fn console_resize_pipe(&self) -> Option<Arc<File>> { 1354 self.console_resize_pipe.clone() 1355 } 1356 1357 pub fn create_interrupt_controller( 1358 &mut self, 1359 ) -> DeviceManagerResult<Arc<Mutex<dyn InterruptController>>> { 1360 self.add_interrupt_controller() 1361 } 1362 1363 pub fn create_devices( 1364 &mut self, 1365 console_info: Option<ConsoleInfo>, 1366 console_resize_pipe: Option<Arc<File>>, 1367 original_termios_opt: Arc<Mutex<Option<termios>>>, 1368 interrupt_controller: Arc<Mutex<dyn InterruptController>>, 1369 ) -> DeviceManagerResult<()> { 1370 trace_scoped!("create_devices"); 1371 1372 let mut virtio_devices: Vec<MetaVirtioDevice> = Vec::new(); 1373 1374 self.cpu_manager 1375 .lock() 1376 .unwrap() 1377 .set_interrupt_controller(interrupt_controller.clone()); 1378 1379 // Now we can create the legacy interrupt manager, which needs the freshly 1380 // formed IOAPIC device. 1381 let legacy_interrupt_manager: Arc< 1382 dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>, 1383 > = Arc::new(LegacyUserspaceInterruptManager::new(Arc::clone( 1384 &interrupt_controller, 1385 ))); 1386 1387 { 1388 if let Some(acpi_address) = self.memory_manager.lock().unwrap().acpi_address() { 1389 self.address_manager 1390 .mmio_bus 1391 .insert( 1392 Arc::clone(&self.memory_manager) as Arc<dyn BusDeviceSync>, 1393 acpi_address.0, 1394 MEMORY_MANAGER_ACPI_SIZE as u64, 1395 ) 1396 .map_err(DeviceManagerError::BusError)?; 1397 } 1398 } 1399 1400 #[cfg(target_arch = "x86_64")] 1401 self.add_legacy_devices( 1402 self.reset_evt 1403 .try_clone() 1404 .map_err(DeviceManagerError::EventFd)?, 1405 )?; 1406 1407 #[cfg(target_arch = "aarch64")] 1408 self.add_legacy_devices(&legacy_interrupt_manager)?; 1409 1410 { 1411 self.ged_notification_device = self.add_acpi_devices( 1412 &legacy_interrupt_manager, 1413 self.reset_evt 1414 .try_clone() 1415 .map_err(DeviceManagerError::EventFd)?, 1416 self.exit_evt 1417 .try_clone() 1418 .map_err(DeviceManagerError::EventFd)?, 1419 )?; 1420 } 1421 1422 self.original_termios_opt = original_termios_opt; 1423 1424 self.console = self.add_console_devices( 1425 &legacy_interrupt_manager, 1426 &mut virtio_devices, 1427 console_info, 1428 console_resize_pipe, 1429 )?; 1430 1431 #[cfg(not(target_arch = "riscv64"))] 1432 if let Some(tpm) = self.config.clone().lock().unwrap().tpm.as_ref() { 1433 let tpm_dev = self.add_tpm_device(tpm.socket.clone())?; 1434 self.bus_devices 1435 .push(Arc::clone(&tpm_dev) as Arc<dyn BusDeviceSync>) 1436 } 1437 self.legacy_interrupt_manager = Some(legacy_interrupt_manager); 1438 1439 virtio_devices.append(&mut self.make_virtio_devices()?); 1440 1441 self.add_pci_devices(virtio_devices.clone())?; 1442 1443 self.virtio_devices = virtio_devices; 1444 1445 // Add pvmemcontrol if required 1446 #[cfg(feature = "pvmemcontrol")] 1447 { 1448 if self.config.lock().unwrap().pvmemcontrol.is_some() { 1449 let (pvmemcontrol_bus_device, pvmemcontrol_pci_device) = 1450 self.make_pvmemcontrol_device()?; 1451 self.pvmemcontrol_devices = 1452 Some((pvmemcontrol_bus_device, pvmemcontrol_pci_device)); 1453 } 1454 } 1455 1456 if self.config.clone().lock().unwrap().pvpanic { 1457 self.pvpanic_device = self.add_pvpanic_device()?; 1458 } 1459 1460 Ok(()) 1461 } 1462 1463 fn state(&self) -> DeviceManagerState { 1464 DeviceManagerState { 1465 device_tree: self.device_tree.lock().unwrap().clone(), 1466 device_id_cnt: self.device_id_cnt, 1467 } 1468 } 1469 1470 fn get_msi_iova_space(&mut self) -> (u64, u64) { 1471 #[cfg(target_arch = "aarch64")] 1472 { 1473 let vcpus = self.config.lock().unwrap().cpus.boot_vcpus; 1474 let vgic_config = gic::Gic::create_default_config(vcpus.into()); 1475 ( 1476 vgic_config.msi_addr, 1477 vgic_config.msi_addr + vgic_config.msi_size - 1, 1478 ) 1479 } 1480 #[cfg(target_arch = "riscv64")] 1481 { 1482 let vcpus = self.config.lock().unwrap().cpus.boot_vcpus; 1483 let vaia_config = aia::Aia::create_default_config(vcpus.into()); 1484 ( 1485 vaia_config.imsic_addr, 1486 vaia_config.imsic_addr + vaia_config.vcpu_count as u64 * arch::layout::IMSIC_SIZE 1487 - 1, 1488 ) 1489 } 1490 #[cfg(target_arch = "x86_64")] 1491 (0xfee0_0000, 0xfeef_ffff) 1492 } 1493 1494 #[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))] 1495 /// Gets the information of the devices registered up to some point in time. 1496 pub fn get_device_info(&self) -> &HashMap<(DeviceType, String), MmioDeviceInfo> { 1497 &self.id_to_dev_info 1498 } 1499 1500 #[allow(unused_variables)] 1501 fn add_pci_devices( 1502 &mut self, 1503 virtio_devices: Vec<MetaVirtioDevice>, 1504 ) -> DeviceManagerResult<()> { 1505 let iommu_id = String::from(IOMMU_DEVICE_NAME); 1506 1507 let iommu_address_width_bits = 1508 if let Some(ref platform) = self.config.lock().unwrap().platform { 1509 platform.iommu_address_width_bits 1510 } else { 1511 DEFAULT_IOMMU_ADDRESS_WIDTH_BITS 1512 }; 1513 1514 let iommu_device = if self.config.lock().unwrap().iommu { 1515 let (device, mapping) = virtio_devices::Iommu::new( 1516 iommu_id.clone(), 1517 self.seccomp_action.clone(), 1518 self.exit_evt 1519 .try_clone() 1520 .map_err(DeviceManagerError::EventFd)?, 1521 self.get_msi_iova_space(), 1522 iommu_address_width_bits, 1523 state_from_id(self.snapshot.as_ref(), iommu_id.as_str()) 1524 .map_err(DeviceManagerError::RestoreGetState)?, 1525 ) 1526 .map_err(DeviceManagerError::CreateVirtioIommu)?; 1527 let device = Arc::new(Mutex::new(device)); 1528 self.iommu_device = Some(Arc::clone(&device)); 1529 self.iommu_mapping = Some(mapping); 1530 1531 // Fill the device tree with a new node. In case of restore, we 1532 // know there is nothing to do, so we can simply override the 1533 // existing entry. 1534 self.device_tree 1535 .lock() 1536 .unwrap() 1537 .insert(iommu_id.clone(), device_node!(iommu_id, device)); 1538 1539 Some(device) 1540 } else { 1541 None 1542 }; 1543 1544 let mut iommu_attached_devices = Vec::new(); 1545 { 1546 for handle in virtio_devices { 1547 let mapping: Option<Arc<IommuMapping>> = if handle.iommu { 1548 self.iommu_mapping.clone() 1549 } else { 1550 None 1551 }; 1552 1553 let dev_id = self.add_virtio_pci_device( 1554 handle.virtio_device, 1555 &mapping, 1556 handle.id, 1557 handle.pci_segment, 1558 handle.dma_handler, 1559 )?; 1560 1561 if handle.iommu { 1562 iommu_attached_devices.push(dev_id); 1563 } 1564 } 1565 1566 let mut vfio_iommu_device_ids = self.add_vfio_devices()?; 1567 iommu_attached_devices.append(&mut vfio_iommu_device_ids); 1568 1569 let mut vfio_user_iommu_device_ids = self.add_user_devices()?; 1570 iommu_attached_devices.append(&mut vfio_user_iommu_device_ids); 1571 1572 // Add all devices from forced iommu segments 1573 if let Some(platform_config) = self.config.lock().unwrap().platform.as_ref() { 1574 if let Some(iommu_segments) = platform_config.iommu_segments.as_ref() { 1575 for segment in iommu_segments { 1576 for device in 0..32 { 1577 let bdf = PciBdf::new(*segment, 0, device, 0); 1578 if !iommu_attached_devices.contains(&bdf) { 1579 iommu_attached_devices.push(bdf); 1580 } 1581 } 1582 } 1583 } 1584 } 1585 1586 if let Some(iommu_device) = iommu_device { 1587 let dev_id = self.add_virtio_pci_device(iommu_device, &None, iommu_id, 0, None)?; 1588 self.iommu_attached_devices = Some((dev_id, iommu_attached_devices)); 1589 } 1590 } 1591 1592 for segment in &self.pci_segments { 1593 #[cfg(target_arch = "x86_64")] 1594 if let Some(pci_config_io) = segment.pci_config_io.as_ref() { 1595 self.bus_devices 1596 .push(Arc::clone(pci_config_io) as Arc<dyn BusDeviceSync>); 1597 } 1598 1599 self.bus_devices 1600 .push(Arc::clone(&segment.pci_config_mmio) as Arc<dyn BusDeviceSync>); 1601 } 1602 1603 Ok(()) 1604 } 1605 1606 #[cfg(target_arch = "aarch64")] 1607 fn add_interrupt_controller( 1608 &mut self, 1609 ) -> DeviceManagerResult<Arc<Mutex<dyn InterruptController>>> { 1610 let interrupt_controller: Arc<Mutex<gic::Gic>> = Arc::new(Mutex::new( 1611 gic::Gic::new( 1612 self.config.lock().unwrap().cpus.boot_vcpus, 1613 Arc::clone(&self.msi_interrupt_manager), 1614 self.address_manager.vm.clone(), 1615 ) 1616 .map_err(DeviceManagerError::CreateInterruptController)?, 1617 )); 1618 1619 self.interrupt_controller = Some(interrupt_controller.clone()); 1620 1621 // Restore the vGic if this is in the process of restoration 1622 let id = String::from(gic::GIC_SNAPSHOT_ID); 1623 if let Some(vgic_snapshot) = snapshot_from_id(self.snapshot.as_ref(), &id) { 1624 // PMU support is optional. Nothing should be impacted if the PMU initialization failed. 1625 if self 1626 .cpu_manager 1627 .lock() 1628 .unwrap() 1629 .init_pmu(AARCH64_PMU_IRQ + 16) 1630 .is_err() 1631 { 1632 info!("Failed to initialize PMU"); 1633 } 1634 1635 let vgic_state = vgic_snapshot 1636 .to_state() 1637 .map_err(DeviceManagerError::RestoreGetState)?; 1638 let saved_vcpu_states = self.cpu_manager.lock().unwrap().get_saved_states(); 1639 interrupt_controller 1640 .lock() 1641 .unwrap() 1642 .restore_vgic(vgic_state, &saved_vcpu_states) 1643 .unwrap(); 1644 } 1645 1646 self.device_tree 1647 .lock() 1648 .unwrap() 1649 .insert(id.clone(), device_node!(id, interrupt_controller)); 1650 1651 Ok(interrupt_controller) 1652 } 1653 1654 #[cfg(target_arch = "aarch64")] 1655 pub fn get_interrupt_controller(&mut self) -> Option<&Arc<Mutex<gic::Gic>>> { 1656 self.interrupt_controller.as_ref() 1657 } 1658 1659 #[cfg(target_arch = "riscv64")] 1660 fn add_interrupt_controller( 1661 &mut self, 1662 ) -> DeviceManagerResult<Arc<Mutex<dyn InterruptController>>> { 1663 let interrupt_controller: Arc<Mutex<aia::Aia>> = Arc::new(Mutex::new( 1664 aia::Aia::new( 1665 self.config.lock().unwrap().cpus.boot_vcpus, 1666 Arc::clone(&self.msi_interrupt_manager), 1667 self.address_manager.vm.clone(), 1668 ) 1669 .map_err(DeviceManagerError::CreateInterruptController)?, 1670 )); 1671 1672 self.interrupt_controller = Some(interrupt_controller.clone()); 1673 1674 // Restore the vAia if this is in the process of restoration 1675 let id = String::from(aia::_AIA_SNAPSHOT_ID); 1676 if let Some(_vaia_snapshot) = snapshot_from_id(self.snapshot.as_ref(), &id) { 1677 // TODO: vAia snapshotting and restoration is scheduled to next stage of riscv64 support. 1678 // TODO: PMU support is scheduled to next stage of riscv64 support. 1679 // PMU support is optional. Nothing should be impacted if the PMU initialization failed. 1680 unimplemented!() 1681 } 1682 1683 self.device_tree 1684 .lock() 1685 .unwrap() 1686 .insert(id.clone(), device_node!(id, interrupt_controller)); 1687 1688 Ok(interrupt_controller) 1689 } 1690 1691 #[cfg(target_arch = "riscv64")] 1692 pub fn get_interrupt_controller(&mut self) -> Option<&Arc<Mutex<aia::Aia>>> { 1693 self.interrupt_controller.as_ref() 1694 } 1695 1696 #[cfg(target_arch = "x86_64")] 1697 fn add_interrupt_controller( 1698 &mut self, 1699 ) -> DeviceManagerResult<Arc<Mutex<dyn InterruptController>>> { 1700 let id = String::from(IOAPIC_DEVICE_NAME); 1701 1702 // Create IOAPIC 1703 let interrupt_controller = Arc::new(Mutex::new( 1704 ioapic::Ioapic::new( 1705 id.clone(), 1706 APIC_START, 1707 Arc::clone(&self.msi_interrupt_manager), 1708 state_from_id(self.snapshot.as_ref(), id.as_str()) 1709 .map_err(DeviceManagerError::RestoreGetState)?, 1710 ) 1711 .map_err(DeviceManagerError::CreateInterruptController)?, 1712 )); 1713 1714 self.interrupt_controller = Some(interrupt_controller.clone()); 1715 1716 self.address_manager 1717 .mmio_bus 1718 .insert(interrupt_controller.clone(), IOAPIC_START.0, IOAPIC_SIZE) 1719 .map_err(DeviceManagerError::BusError)?; 1720 1721 self.bus_devices 1722 .push(Arc::clone(&interrupt_controller) as Arc<dyn BusDeviceSync>); 1723 1724 // Fill the device tree with a new node. In case of restore, we 1725 // know there is nothing to do, so we can simply override the 1726 // existing entry. 1727 self.device_tree 1728 .lock() 1729 .unwrap() 1730 .insert(id.clone(), device_node!(id, interrupt_controller)); 1731 1732 Ok(interrupt_controller) 1733 } 1734 1735 fn add_acpi_devices( 1736 &mut self, 1737 interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>, 1738 reset_evt: EventFd, 1739 exit_evt: EventFd, 1740 ) -> DeviceManagerResult<Option<Arc<Mutex<devices::AcpiGedDevice>>>> { 1741 let vcpus_kill_signalled = self 1742 .cpu_manager 1743 .lock() 1744 .unwrap() 1745 .vcpus_kill_signalled() 1746 .clone(); 1747 let shutdown_device = Arc::new(Mutex::new(devices::AcpiShutdownDevice::new( 1748 exit_evt, 1749 reset_evt, 1750 vcpus_kill_signalled, 1751 ))); 1752 1753 self.bus_devices 1754 .push(Arc::clone(&shutdown_device) as Arc<dyn BusDeviceSync>); 1755 1756 #[cfg(target_arch = "x86_64")] 1757 { 1758 let shutdown_pio_address: u16 = 0x600; 1759 1760 self.address_manager 1761 .allocator 1762 .lock() 1763 .unwrap() 1764 .allocate_io_addresses(Some(GuestAddress(shutdown_pio_address.into())), 0x8, None) 1765 .ok_or(DeviceManagerError::AllocateIoPort)?; 1766 1767 self.address_manager 1768 .io_bus 1769 .insert(shutdown_device, shutdown_pio_address.into(), 0x4) 1770 .map_err(DeviceManagerError::BusError)?; 1771 1772 self.acpi_platform_addresses.sleep_control_reg_address = 1773 Some(GenericAddress::io_port_address::<u8>(shutdown_pio_address)); 1774 self.acpi_platform_addresses.sleep_status_reg_address = 1775 Some(GenericAddress::io_port_address::<u8>(shutdown_pio_address)); 1776 self.acpi_platform_addresses.reset_reg_address = 1777 Some(GenericAddress::io_port_address::<u8>(shutdown_pio_address)); 1778 } 1779 1780 let ged_irq = self 1781 .address_manager 1782 .allocator 1783 .lock() 1784 .unwrap() 1785 .allocate_irq() 1786 .unwrap(); 1787 let interrupt_group = interrupt_manager 1788 .create_group(LegacyIrqGroupConfig { 1789 irq: ged_irq as InterruptIndex, 1790 }) 1791 .map_err(DeviceManagerError::CreateInterruptGroup)?; 1792 let ged_address = self 1793 .address_manager 1794 .allocator 1795 .lock() 1796 .unwrap() 1797 .allocate_platform_mmio_addresses( 1798 None, 1799 devices::acpi::GED_DEVICE_ACPI_SIZE as u64, 1800 None, 1801 ) 1802 .ok_or(DeviceManagerError::AllocateMmioAddress)?; 1803 let ged_device = Arc::new(Mutex::new(devices::AcpiGedDevice::new( 1804 interrupt_group, 1805 ged_irq, 1806 ged_address, 1807 ))); 1808 self.address_manager 1809 .mmio_bus 1810 .insert( 1811 ged_device.clone(), 1812 ged_address.0, 1813 devices::acpi::GED_DEVICE_ACPI_SIZE as u64, 1814 ) 1815 .map_err(DeviceManagerError::BusError)?; 1816 self.bus_devices 1817 .push(Arc::clone(&ged_device) as Arc<dyn BusDeviceSync>); 1818 1819 let pm_timer_device = Arc::new(Mutex::new(devices::AcpiPmTimerDevice::new())); 1820 1821 self.bus_devices 1822 .push(Arc::clone(&pm_timer_device) as Arc<dyn BusDeviceSync>); 1823 1824 #[cfg(target_arch = "x86_64")] 1825 { 1826 let pm_timer_pio_address: u16 = 0x608; 1827 1828 self.address_manager 1829 .allocator 1830 .lock() 1831 .unwrap() 1832 .allocate_io_addresses(Some(GuestAddress(pm_timer_pio_address.into())), 0x4, None) 1833 .ok_or(DeviceManagerError::AllocateIoPort)?; 1834 1835 self.address_manager 1836 .io_bus 1837 .insert(pm_timer_device, pm_timer_pio_address.into(), 0x4) 1838 .map_err(DeviceManagerError::BusError)?; 1839 1840 self.acpi_platform_addresses.pm_timer_address = 1841 Some(GenericAddress::io_port_address::<u32>(pm_timer_pio_address)); 1842 } 1843 1844 Ok(Some(ged_device)) 1845 } 1846 1847 #[cfg(target_arch = "x86_64")] 1848 fn add_legacy_devices(&mut self, reset_evt: EventFd) -> DeviceManagerResult<()> { 1849 let vcpus_kill_signalled = self 1850 .cpu_manager 1851 .lock() 1852 .unwrap() 1853 .vcpus_kill_signalled() 1854 .clone(); 1855 // Add a shutdown device (i8042) 1856 let i8042 = Arc::new(Mutex::new(devices::legacy::I8042Device::new( 1857 reset_evt.try_clone().unwrap(), 1858 vcpus_kill_signalled.clone(), 1859 ))); 1860 1861 self.bus_devices 1862 .push(Arc::clone(&i8042) as Arc<dyn BusDeviceSync>); 1863 1864 self.address_manager 1865 .io_bus 1866 .insert(i8042, 0x61, 0x4) 1867 .map_err(DeviceManagerError::BusError)?; 1868 { 1869 // Add a CMOS emulated device 1870 let mem_size = self 1871 .memory_manager 1872 .lock() 1873 .unwrap() 1874 .guest_memory() 1875 .memory() 1876 .last_addr() 1877 .0 1878 + 1; 1879 let mem_below_4g = std::cmp::min(arch::layout::MEM_32BIT_RESERVED_START.0, mem_size); 1880 let mem_above_4g = mem_size.saturating_sub(arch::layout::RAM_64BIT_START.0); 1881 1882 let cmos = Arc::new(Mutex::new(devices::legacy::Cmos::new( 1883 mem_below_4g, 1884 mem_above_4g, 1885 reset_evt, 1886 Some(vcpus_kill_signalled), 1887 ))); 1888 1889 self.bus_devices 1890 .push(Arc::clone(&cmos) as Arc<dyn BusDeviceSync>); 1891 1892 self.address_manager 1893 .io_bus 1894 .insert(cmos, 0x70, 0x2) 1895 .map_err(DeviceManagerError::BusError)?; 1896 1897 let fwdebug = Arc::new(Mutex::new(devices::legacy::FwDebugDevice::new())); 1898 1899 self.bus_devices 1900 .push(Arc::clone(&fwdebug) as Arc<dyn BusDeviceSync>); 1901 1902 self.address_manager 1903 .io_bus 1904 .insert(fwdebug, 0x402, 0x1) 1905 .map_err(DeviceManagerError::BusError)?; 1906 } 1907 1908 // 0x80 debug port 1909 let debug_port = Arc::new(Mutex::new(devices::legacy::DebugPort::new(self.timestamp))); 1910 self.bus_devices 1911 .push(Arc::clone(&debug_port) as Arc<dyn BusDeviceSync>); 1912 self.address_manager 1913 .io_bus 1914 .insert(debug_port, 0x80, 0x1) 1915 .map_err(DeviceManagerError::BusError)?; 1916 1917 Ok(()) 1918 } 1919 1920 #[cfg(target_arch = "aarch64")] 1921 fn add_legacy_devices( 1922 &mut self, 1923 interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>, 1924 ) -> DeviceManagerResult<()> { 1925 // Add a RTC device 1926 let rtc_irq = self 1927 .address_manager 1928 .allocator 1929 .lock() 1930 .unwrap() 1931 .allocate_irq() 1932 .unwrap(); 1933 1934 let interrupt_group = interrupt_manager 1935 .create_group(LegacyIrqGroupConfig { 1936 irq: rtc_irq as InterruptIndex, 1937 }) 1938 .map_err(DeviceManagerError::CreateInterruptGroup)?; 1939 1940 let rtc_device = Arc::new(Mutex::new(devices::legacy::Rtc::new(interrupt_group))); 1941 1942 self.bus_devices 1943 .push(Arc::clone(&rtc_device) as Arc<dyn BusDeviceSync>); 1944 1945 let addr = arch::layout::LEGACY_RTC_MAPPED_IO_START; 1946 1947 self.address_manager 1948 .mmio_bus 1949 .insert(rtc_device, addr.0, MMIO_LEN) 1950 .map_err(DeviceManagerError::BusError)?; 1951 1952 self.id_to_dev_info.insert( 1953 (DeviceType::Rtc, "rtc".to_string()), 1954 MmioDeviceInfo { 1955 addr: addr.0, 1956 len: MMIO_LEN, 1957 irq: rtc_irq, 1958 }, 1959 ); 1960 1961 // Add a GPIO device 1962 let id = String::from(GPIO_DEVICE_NAME); 1963 let gpio_irq = self 1964 .address_manager 1965 .allocator 1966 .lock() 1967 .unwrap() 1968 .allocate_irq() 1969 .unwrap(); 1970 1971 let interrupt_group = interrupt_manager 1972 .create_group(LegacyIrqGroupConfig { 1973 irq: gpio_irq as InterruptIndex, 1974 }) 1975 .map_err(DeviceManagerError::CreateInterruptGroup)?; 1976 1977 let gpio_device = Arc::new(Mutex::new(devices::legacy::Gpio::new( 1978 id.clone(), 1979 interrupt_group, 1980 state_from_id(self.snapshot.as_ref(), id.as_str()) 1981 .map_err(DeviceManagerError::RestoreGetState)?, 1982 ))); 1983 1984 self.bus_devices 1985 .push(Arc::clone(&gpio_device) as Arc<dyn BusDeviceSync>); 1986 1987 let addr = arch::layout::LEGACY_GPIO_MAPPED_IO_START; 1988 1989 self.address_manager 1990 .mmio_bus 1991 .insert(gpio_device.clone(), addr.0, MMIO_LEN) 1992 .map_err(DeviceManagerError::BusError)?; 1993 1994 self.gpio_device = Some(gpio_device.clone()); 1995 1996 self.id_to_dev_info.insert( 1997 (DeviceType::Gpio, "gpio".to_string()), 1998 MmioDeviceInfo { 1999 addr: addr.0, 2000 len: MMIO_LEN, 2001 irq: gpio_irq, 2002 }, 2003 ); 2004 2005 self.device_tree 2006 .lock() 2007 .unwrap() 2008 .insert(id.clone(), device_node!(id, gpio_device)); 2009 2010 Ok(()) 2011 } 2012 2013 #[cfg(target_arch = "x86_64")] 2014 fn add_debug_console_device( 2015 &mut self, 2016 debug_console_writer: Box<dyn io::Write + Send>, 2017 ) -> DeviceManagerResult<Arc<Mutex<DebugConsole>>> { 2018 let id = String::from(DEBUGCON_DEVICE_NAME); 2019 let debug_console = Arc::new(Mutex::new(DebugConsole::new( 2020 id.clone(), 2021 debug_console_writer, 2022 ))); 2023 2024 let port = self 2025 .config 2026 .lock() 2027 .unwrap() 2028 .debug_console 2029 .clone() 2030 .iobase 2031 .map(|port| port as u64) 2032 .unwrap_or(debug_console::DEFAULT_PORT); 2033 2034 self.bus_devices 2035 .push(Arc::clone(&debug_console) as Arc<dyn BusDeviceSync>); 2036 2037 self.address_manager 2038 .allocator 2039 .lock() 2040 .unwrap() 2041 .allocate_io_addresses(Some(GuestAddress(port)), 0x1, None) 2042 .ok_or(DeviceManagerError::AllocateIoPort)?; 2043 2044 self.address_manager 2045 .io_bus 2046 .insert(debug_console.clone(), port, 0x1) 2047 .map_err(DeviceManagerError::BusError)?; 2048 2049 // Fill the device tree with a new node. In case of restore, we 2050 // know there is nothing to do, so we can simply override the 2051 // existing entry. 2052 self.device_tree 2053 .lock() 2054 .unwrap() 2055 .insert(id.clone(), device_node!(id, debug_console)); 2056 2057 Ok(debug_console) 2058 } 2059 2060 #[cfg(target_arch = "x86_64")] 2061 fn add_serial_device( 2062 &mut self, 2063 interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>, 2064 serial_writer: Option<Box<dyn io::Write + Send>>, 2065 ) -> DeviceManagerResult<Arc<Mutex<Serial>>> { 2066 // Serial is tied to IRQ #4 2067 let serial_irq = 4; 2068 2069 let id = String::from(SERIAL_DEVICE_NAME); 2070 2071 let interrupt_group = interrupt_manager 2072 .create_group(LegacyIrqGroupConfig { 2073 irq: serial_irq as InterruptIndex, 2074 }) 2075 .map_err(DeviceManagerError::CreateInterruptGroup)?; 2076 2077 let serial = Arc::new(Mutex::new(Serial::new( 2078 id.clone(), 2079 interrupt_group, 2080 serial_writer, 2081 state_from_id(self.snapshot.as_ref(), id.as_str()) 2082 .map_err(DeviceManagerError::RestoreGetState)?, 2083 ))); 2084 2085 self.bus_devices 2086 .push(Arc::clone(&serial) as Arc<dyn BusDeviceSync>); 2087 2088 self.address_manager 2089 .allocator 2090 .lock() 2091 .unwrap() 2092 .allocate_io_addresses(Some(GuestAddress(0x3f8)), 0x8, None) 2093 .ok_or(DeviceManagerError::AllocateIoPort)?; 2094 2095 self.address_manager 2096 .io_bus 2097 .insert(serial.clone(), 0x3f8, 0x8) 2098 .map_err(DeviceManagerError::BusError)?; 2099 2100 // Fill the device tree with a new node. In case of restore, we 2101 // know there is nothing to do, so we can simply override the 2102 // existing entry. 2103 self.device_tree 2104 .lock() 2105 .unwrap() 2106 .insert(id.clone(), device_node!(id, serial)); 2107 2108 Ok(serial) 2109 } 2110 2111 #[cfg(target_arch = "aarch64")] 2112 fn add_serial_device( 2113 &mut self, 2114 interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>, 2115 serial_writer: Option<Box<dyn io::Write + Send>>, 2116 ) -> DeviceManagerResult<Arc<Mutex<Pl011>>> { 2117 let id = String::from(SERIAL_DEVICE_NAME); 2118 2119 let serial_irq = self 2120 .address_manager 2121 .allocator 2122 .lock() 2123 .unwrap() 2124 .allocate_irq() 2125 .unwrap(); 2126 2127 let interrupt_group = interrupt_manager 2128 .create_group(LegacyIrqGroupConfig { 2129 irq: serial_irq as InterruptIndex, 2130 }) 2131 .map_err(DeviceManagerError::CreateInterruptGroup)?; 2132 2133 let serial = Arc::new(Mutex::new(devices::legacy::Pl011::new( 2134 id.clone(), 2135 interrupt_group, 2136 serial_writer, 2137 self.timestamp, 2138 state_from_id(self.snapshot.as_ref(), id.as_str()) 2139 .map_err(DeviceManagerError::RestoreGetState)?, 2140 ))); 2141 2142 self.bus_devices 2143 .push(Arc::clone(&serial) as Arc<dyn BusDeviceSync>); 2144 2145 let addr = arch::layout::LEGACY_SERIAL_MAPPED_IO_START; 2146 2147 self.address_manager 2148 .mmio_bus 2149 .insert(serial.clone(), addr.0, MMIO_LEN) 2150 .map_err(DeviceManagerError::BusError)?; 2151 2152 self.id_to_dev_info.insert( 2153 (DeviceType::Serial, DeviceType::Serial.to_string()), 2154 MmioDeviceInfo { 2155 addr: addr.0, 2156 len: MMIO_LEN, 2157 irq: serial_irq, 2158 }, 2159 ); 2160 2161 self.cmdline_additions 2162 .push(format!("earlycon=pl011,mmio,0x{:08x}", addr.0)); 2163 2164 // Fill the device tree with a new node. In case of restore, we 2165 // know there is nothing to do, so we can simply override the 2166 // existing entry. 2167 self.device_tree 2168 .lock() 2169 .unwrap() 2170 .insert(id.clone(), device_node!(id, serial)); 2171 2172 Ok(serial) 2173 } 2174 2175 #[cfg(target_arch = "riscv64")] 2176 fn add_serial_device( 2177 &mut self, 2178 interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>, 2179 serial_writer: Option<Box<dyn io::Write + Send>>, 2180 ) -> DeviceManagerResult<Arc<Mutex<Serial>>> { 2181 let id = String::from(SERIAL_DEVICE_NAME); 2182 2183 let serial_irq = self 2184 .address_manager 2185 .allocator 2186 .lock() 2187 .unwrap() 2188 .allocate_irq() 2189 .unwrap(); 2190 2191 let interrupt_group = interrupt_manager 2192 .create_group(LegacyIrqGroupConfig { 2193 irq: serial_irq as InterruptIndex, 2194 }) 2195 .map_err(DeviceManagerError::CreateInterruptGroup)?; 2196 2197 let serial = Arc::new(Mutex::new(Serial::new( 2198 id.clone(), 2199 interrupt_group, 2200 serial_writer, 2201 state_from_id(self.snapshot.as_ref(), id.as_str()) 2202 .map_err(DeviceManagerError::RestoreGetState)?, 2203 ))); 2204 2205 self.bus_devices 2206 .push(Arc::clone(&serial) as Arc<dyn BusDeviceSync>); 2207 2208 let addr = arch::layout::LEGACY_SERIAL_MAPPED_IO_START; 2209 2210 self.address_manager 2211 .mmio_bus 2212 .insert(serial.clone(), addr.0, MMIO_LEN) 2213 .map_err(DeviceManagerError::BusError)?; 2214 2215 self.id_to_dev_info.insert( 2216 (DeviceType::Serial, DeviceType::Serial.to_string()), 2217 MmioDeviceInfo { 2218 addr: addr.0, 2219 len: MMIO_LEN, 2220 irq: serial_irq, 2221 }, 2222 ); 2223 2224 self.cmdline_additions 2225 .push(format!("earlycon=uart,mmio,0x{:08x}", addr.0)); 2226 2227 // Fill the device tree with a new node. In case of restore, we 2228 // know there is nothing to do, so we can simply override the 2229 // existing entry. 2230 self.device_tree 2231 .lock() 2232 .unwrap() 2233 .insert(id.clone(), device_node!(id, serial)); 2234 2235 Ok(serial) 2236 } 2237 2238 fn add_virtio_console_device( 2239 &mut self, 2240 virtio_devices: &mut Vec<MetaVirtioDevice>, 2241 console_fd: ConsoleOutput, 2242 resize_pipe: Option<Arc<File>>, 2243 ) -> DeviceManagerResult<Option<Arc<virtio_devices::ConsoleResizer>>> { 2244 let console_config = self.config.lock().unwrap().console.clone(); 2245 let endpoint = match console_fd { 2246 ConsoleOutput::File(file) => Endpoint::File(file), 2247 ConsoleOutput::Pty(file) => { 2248 self.console_resize_pipe = resize_pipe; 2249 Endpoint::PtyPair(Arc::new(file.try_clone().unwrap()), file) 2250 } 2251 ConsoleOutput::Tty(stdout) => { 2252 if stdout.is_terminal() { 2253 self.console_resize_pipe = resize_pipe; 2254 } 2255 2256 // If an interactive TTY then we can accept input 2257 // SAFETY: FFI call. Trivially safe. 2258 if unsafe { libc::isatty(libc::STDIN_FILENO) == 1 } { 2259 // SAFETY: FFI call to dup. Trivially safe. 2260 let stdin = unsafe { libc::dup(libc::STDIN_FILENO) }; 2261 if stdin == -1 { 2262 return vmm_sys_util::errno::errno_result() 2263 .map_err(DeviceManagerError::DupFd); 2264 } 2265 // SAFETY: stdin is valid and owned solely by us. 2266 let stdin = unsafe { File::from_raw_fd(stdin) }; 2267 Endpoint::FilePair(stdout, Arc::new(stdin)) 2268 } else { 2269 Endpoint::File(stdout) 2270 } 2271 } 2272 ConsoleOutput::Socket(_) => { 2273 return Err(DeviceManagerError::NoSocketOptionSupportForConsoleDevice); 2274 } 2275 ConsoleOutput::Null => Endpoint::Null, 2276 ConsoleOutput::Off => return Ok(None), 2277 }; 2278 let id = String::from(CONSOLE_DEVICE_NAME); 2279 2280 let (virtio_console_device, console_resizer) = virtio_devices::Console::new( 2281 id.clone(), 2282 endpoint, 2283 self.console_resize_pipe 2284 .as_ref() 2285 .map(|p| p.try_clone().unwrap()), 2286 self.force_iommu | console_config.iommu, 2287 self.seccomp_action.clone(), 2288 self.exit_evt 2289 .try_clone() 2290 .map_err(DeviceManagerError::EventFd)?, 2291 state_from_id(self.snapshot.as_ref(), id.as_str()) 2292 .map_err(DeviceManagerError::RestoreGetState)?, 2293 ) 2294 .map_err(DeviceManagerError::CreateVirtioConsole)?; 2295 let virtio_console_device = Arc::new(Mutex::new(virtio_console_device)); 2296 virtio_devices.push(MetaVirtioDevice { 2297 virtio_device: Arc::clone(&virtio_console_device) 2298 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2299 iommu: console_config.iommu, 2300 id: id.clone(), 2301 pci_segment: 0, 2302 dma_handler: None, 2303 }); 2304 2305 // Fill the device tree with a new node. In case of restore, we 2306 // know there is nothing to do, so we can simply override the 2307 // existing entry. 2308 self.device_tree 2309 .lock() 2310 .unwrap() 2311 .insert(id.clone(), device_node!(id, virtio_console_device)); 2312 2313 // Only provide a resizer (for SIGWINCH handling) if the console is attached to the TTY 2314 Ok(if matches!(console_config.mode, ConsoleOutputMode::Tty) { 2315 Some(console_resizer) 2316 } else { 2317 None 2318 }) 2319 } 2320 2321 /// Adds all devices that behave like a console with respect to the VM 2322 /// configuration. This includes: 2323 /// - debug-console 2324 /// - serial-console 2325 /// - virtio-console 2326 fn add_console_devices( 2327 &mut self, 2328 interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>, 2329 virtio_devices: &mut Vec<MetaVirtioDevice>, 2330 console_info: Option<ConsoleInfo>, 2331 console_resize_pipe: Option<Arc<File>>, 2332 ) -> DeviceManagerResult<Arc<Console>> { 2333 let serial_config = self.config.lock().unwrap().serial.clone(); 2334 if console_info.is_none() { 2335 return Err(DeviceManagerError::InvalidConsoleInfo); 2336 } 2337 2338 // SAFETY: console_info is Some, so it's safe to unwrap. 2339 let console_info = console_info.unwrap(); 2340 2341 let serial_writer: Option<Box<dyn io::Write + Send>> = match console_info.serial_main_fd { 2342 ConsoleOutput::File(ref file) | ConsoleOutput::Tty(ref file) => { 2343 Some(Box::new(Arc::clone(file))) 2344 } 2345 ConsoleOutput::Off 2346 | ConsoleOutput::Null 2347 | ConsoleOutput::Pty(_) 2348 | ConsoleOutput::Socket(_) => None, 2349 }; 2350 2351 if !matches!(console_info.serial_main_fd, ConsoleOutput::Off) { 2352 let serial = self.add_serial_device(interrupt_manager, serial_writer)?; 2353 self.serial_manager = match console_info.serial_main_fd { 2354 ConsoleOutput::Pty(_) | ConsoleOutput::Tty(_) | ConsoleOutput::Socket(_) => { 2355 let serial_manager = SerialManager::new( 2356 serial, 2357 console_info.serial_main_fd, 2358 serial_config.socket, 2359 ) 2360 .map_err(DeviceManagerError::CreateSerialManager)?; 2361 if let Some(mut serial_manager) = serial_manager { 2362 serial_manager 2363 .start_thread( 2364 self.exit_evt 2365 .try_clone() 2366 .map_err(DeviceManagerError::EventFd)?, 2367 ) 2368 .map_err(DeviceManagerError::SpawnSerialManager)?; 2369 Some(Arc::new(serial_manager)) 2370 } else { 2371 None 2372 } 2373 } 2374 _ => None, 2375 }; 2376 } 2377 2378 #[cfg(target_arch = "x86_64")] 2379 { 2380 let debug_console_writer: Option<Box<dyn io::Write + Send>> = 2381 match console_info.debug_main_fd { 2382 ConsoleOutput::File(file) | ConsoleOutput::Tty(file) => Some(Box::new(file)), 2383 ConsoleOutput::Off 2384 | ConsoleOutput::Null 2385 | ConsoleOutput::Pty(_) 2386 | ConsoleOutput::Socket(_) => None, 2387 }; 2388 if let Some(writer) = debug_console_writer { 2389 let _ = self.add_debug_console_device(writer)?; 2390 } 2391 } 2392 2393 let console_resizer = self.add_virtio_console_device( 2394 virtio_devices, 2395 console_info.console_main_fd, 2396 console_resize_pipe, 2397 )?; 2398 2399 Ok(Arc::new(Console { console_resizer })) 2400 } 2401 2402 #[cfg(not(target_arch = "riscv64"))] 2403 fn add_tpm_device( 2404 &mut self, 2405 tpm_path: PathBuf, 2406 ) -> DeviceManagerResult<Arc<Mutex<devices::tpm::Tpm>>> { 2407 // Create TPM Device 2408 let tpm = devices::tpm::Tpm::new(tpm_path.to_str().unwrap().to_string()).map_err(|e| { 2409 DeviceManagerError::CreateTpmDevice(anyhow!("Failed to create TPM Device : {:?}", e)) 2410 })?; 2411 let tpm = Arc::new(Mutex::new(tpm)); 2412 2413 // Add TPM Device to mmio 2414 self.address_manager 2415 .mmio_bus 2416 .insert( 2417 tpm.clone(), 2418 arch::layout::TPM_START.0, 2419 arch::layout::TPM_SIZE, 2420 ) 2421 .map_err(DeviceManagerError::BusError)?; 2422 2423 Ok(tpm) 2424 } 2425 2426 /// Tries to acquire advisory locks for all disk images. 2427 /// 2428 /// This should only be called when a VM boots or VM state is restored. 2429 /// For live-migration, the locks must be released on the destination side 2430 /// before they are acquired again by the receiving side. 2431 pub fn try_lock_disks(&self) -> DeviceManagerResult<()> { 2432 for dev in &self.block_devices { 2433 let mut dev = dev.lock().unwrap(); 2434 dev.try_lock_image() 2435 .map_err(DeviceManagerError::DiskLockError)?; 2436 } 2437 Ok(()) 2438 } 2439 2440 /// Release all advisory locks held for the disk images. 2441 /// 2442 /// This should only be called when the VM is stopped and the VMM supposed 2443 /// to shut down. A new VMM, either after a live migration or a 2444 /// state save/resume cycle, should then acquire all locks before the VM 2445 /// starts to run. 2446 pub fn release_disk_locks(&self) -> DeviceManagerResult<()> { 2447 for dev in &self.block_devices { 2448 let mut dev = dev.lock().unwrap(); 2449 dev.unlock_image() 2450 .map_err(DeviceManagerError::DiskLockError)?; 2451 } 2452 Ok(()) 2453 } 2454 2455 fn make_virtio_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2456 let mut devices: Vec<MetaVirtioDevice> = Vec::new(); 2457 2458 // Create "standard" virtio devices (net/block/rng) 2459 devices.append(&mut self.make_virtio_block_devices()?); 2460 devices.append(&mut self.make_virtio_net_devices()?); 2461 devices.append(&mut self.make_virtio_rng_devices()?); 2462 2463 // Add virtio-fs if required 2464 devices.append(&mut self.make_virtio_fs_devices()?); 2465 2466 // Add virtio-pmem if required 2467 devices.append(&mut self.make_virtio_pmem_devices()?); 2468 2469 // Add virtio-vsock if required 2470 devices.append(&mut self.make_virtio_vsock_devices()?); 2471 2472 devices.append(&mut self.make_virtio_mem_devices()?); 2473 2474 // Add virtio-balloon if required 2475 devices.append(&mut self.make_virtio_balloon_devices()?); 2476 2477 // Add virtio-watchdog device 2478 devices.append(&mut self.make_virtio_watchdog_devices()?); 2479 2480 // Add vDPA devices if required 2481 devices.append(&mut self.make_vdpa_devices()?); 2482 2483 Ok(devices) 2484 } 2485 2486 // Cache whether aio is supported to avoid checking for very block device 2487 fn aio_is_supported(&mut self) -> bool { 2488 if let Some(supported) = self.aio_supported { 2489 return supported; 2490 } 2491 2492 let supported = block_aio_is_supported(); 2493 self.aio_supported = Some(supported); 2494 supported 2495 } 2496 2497 // Cache whether io_uring is supported to avoid probing for very block device 2498 fn io_uring_is_supported(&mut self) -> bool { 2499 if let Some(supported) = self.io_uring_supported { 2500 return supported; 2501 } 2502 2503 let supported = block_io_uring_is_supported(); 2504 self.io_uring_supported = Some(supported); 2505 supported 2506 } 2507 2508 /// Creates a [`MetaVirtioDevice`] from the provided [`DiskConfig`]. 2509 /// 2510 /// Depending on the config, this is a [`vhost_user::Blk`] device or a [`virtio_devices::Block`] 2511 /// device. 2512 /// 2513 /// # Arguments 2514 /// - `disk_cfg`: The [`DiskConfig`] used to create the block device. 2515 /// - `is_hotplug`: Whether the device is being hotplugged and the lock for the disk image 2516 /// should be acquired right away. Locking will only happen for normal block devices, and not 2517 /// vhost-user devices. 2518 fn make_virtio_block_device( 2519 &mut self, 2520 disk_cfg: &mut DiskConfig, 2521 is_hotplug: bool, 2522 ) -> DeviceManagerResult<MetaVirtioDevice> { 2523 let id = if let Some(id) = &disk_cfg.id { 2524 id.clone() 2525 } else { 2526 let id = self.next_device_name(DISK_DEVICE_NAME_PREFIX)?; 2527 disk_cfg.id = Some(id.clone()); 2528 id 2529 }; 2530 2531 info!("Creating virtio-block device: {:?}", disk_cfg); 2532 2533 let (virtio_device, migratable_device) = if disk_cfg.vhost_user { 2534 if is_hotplug { 2535 log::debug!("Acquiring image lock for vhost-user block device not supported"); 2536 } 2537 let socket = disk_cfg.vhost_socket.as_ref().unwrap().clone(); 2538 let vu_cfg = VhostUserConfig { 2539 socket, 2540 num_queues: disk_cfg.num_queues, 2541 queue_size: disk_cfg.queue_size, 2542 }; 2543 let vhost_user_block = Arc::new(Mutex::new( 2544 match virtio_devices::vhost_user::Blk::new( 2545 id.clone(), 2546 vu_cfg, 2547 self.seccomp_action.clone(), 2548 self.exit_evt 2549 .try_clone() 2550 .map_err(DeviceManagerError::EventFd)?, 2551 self.force_iommu, 2552 state_from_id(self.snapshot.as_ref(), id.as_str()) 2553 .map_err(DeviceManagerError::RestoreGetState)?, 2554 ) { 2555 Ok(vub_device) => vub_device, 2556 Err(e) => { 2557 return Err(DeviceManagerError::CreateVhostUserBlk(e)); 2558 } 2559 }, 2560 )); 2561 2562 ( 2563 Arc::clone(&vhost_user_block) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2564 vhost_user_block as Arc<Mutex<dyn Migratable>>, 2565 ) 2566 } else { 2567 let mut options = OpenOptions::new(); 2568 options.read(true); 2569 options.write(!disk_cfg.readonly); 2570 if disk_cfg.direct { 2571 options.custom_flags(libc::O_DIRECT); 2572 } 2573 // Open block device path 2574 let mut file: File = options 2575 .open( 2576 disk_cfg 2577 .path 2578 .as_ref() 2579 .ok_or(DeviceManagerError::NoDiskPath)? 2580 .clone(), 2581 ) 2582 .map_err(DeviceManagerError::Disk)?; 2583 let image_type = 2584 detect_image_type(&mut file).map_err(DeviceManagerError::DetectImageType)?; 2585 2586 let image = match image_type { 2587 ImageType::FixedVhd => { 2588 // Use asynchronous backend relying on io_uring if the 2589 // syscalls are supported. 2590 if cfg!(feature = "io_uring") 2591 && !disk_cfg.disable_io_uring 2592 && self.io_uring_is_supported() 2593 { 2594 info!("Using asynchronous fixed VHD disk file (io_uring)"); 2595 2596 #[cfg(not(feature = "io_uring"))] 2597 unreachable!("Checked in if statement above"); 2598 #[cfg(feature = "io_uring")] 2599 { 2600 Box::new( 2601 FixedVhdDiskAsync::new(file) 2602 .map_err(DeviceManagerError::CreateFixedVhdDiskAsync)?, 2603 ) as Box<dyn DiskFile> 2604 } 2605 } else { 2606 info!("Using synchronous fixed VHD disk file"); 2607 Box::new( 2608 FixedVhdDiskSync::new(file) 2609 .map_err(DeviceManagerError::CreateFixedVhdDiskSync)?, 2610 ) as Box<dyn DiskFile> 2611 } 2612 } 2613 ImageType::Raw => { 2614 // Use asynchronous backend relying on io_uring if the 2615 // syscalls are supported. 2616 if cfg!(feature = "io_uring") 2617 && !disk_cfg.disable_io_uring 2618 && self.io_uring_is_supported() 2619 { 2620 info!("Using asynchronous RAW disk file (io_uring)"); 2621 2622 #[cfg(not(feature = "io_uring"))] 2623 unreachable!("Checked in if statement above"); 2624 #[cfg(feature = "io_uring")] 2625 { 2626 Box::new(RawFileDisk::new(file)) as Box<dyn DiskFile> 2627 } 2628 } else if !disk_cfg.disable_aio && self.aio_is_supported() { 2629 info!("Using asynchronous RAW disk file (aio)"); 2630 Box::new(RawFileDiskAio::new(file)) as Box<dyn DiskFile> 2631 } else { 2632 info!("Using synchronous RAW disk file"); 2633 Box::new(RawFileDiskSync::new(file)) as Box<dyn DiskFile> 2634 } 2635 } 2636 ImageType::Qcow2 => { 2637 info!("Using synchronous QCOW2 disk file"); 2638 Box::new( 2639 QcowDiskSync::new(file, disk_cfg.direct) 2640 .map_err(DeviceManagerError::CreateQcowDiskSync)?, 2641 ) as Box<dyn DiskFile> 2642 } 2643 ImageType::Vhdx => { 2644 info!("Using synchronous VHDX disk file"); 2645 Box::new( 2646 VhdxDiskSync::new(file) 2647 .map_err(DeviceManagerError::CreateFixedVhdxDiskSync)?, 2648 ) as Box<dyn DiskFile> 2649 } 2650 }; 2651 2652 let rate_limit_group = 2653 if let Some(rate_limiter_cfg) = disk_cfg.rate_limiter_config.as_ref() { 2654 // Create an anonymous RateLimiterGroup that is dropped when the Disk 2655 // is dropped. 2656 let bw = rate_limiter_cfg.bandwidth.unwrap_or_default(); 2657 let ops = rate_limiter_cfg.ops.unwrap_or_default(); 2658 let mut rate_limit_group = RateLimiterGroup::new( 2659 disk_cfg.id.as_ref().unwrap(), 2660 bw.size, 2661 bw.one_time_burst.unwrap_or(0), 2662 bw.refill_time, 2663 ops.size, 2664 ops.one_time_burst.unwrap_or(0), 2665 ops.refill_time, 2666 ) 2667 .map_err(DeviceManagerError::RateLimiterGroupCreate)?; 2668 2669 rate_limit_group 2670 .start_thread( 2671 self.exit_evt 2672 .try_clone() 2673 .map_err(DeviceManagerError::EventFd)?, 2674 ) 2675 .unwrap(); 2676 2677 Some(Arc::new(rate_limit_group)) 2678 } else if let Some(rate_limit_group) = disk_cfg.rate_limit_group.as_ref() { 2679 self.rate_limit_groups.get(rate_limit_group).cloned() 2680 } else { 2681 None 2682 }; 2683 2684 let queue_affinity = if let Some(queue_affinity) = disk_cfg.queue_affinity.as_ref() { 2685 queue_affinity 2686 .iter() 2687 .map(|a| (a.queue_index, a.host_cpus.clone())) 2688 .collect() 2689 } else { 2690 BTreeMap::new() 2691 }; 2692 2693 let mut virtio_block = virtio_devices::Block::new( 2694 id.clone(), 2695 image, 2696 disk_cfg 2697 .path 2698 .as_ref() 2699 .ok_or(DeviceManagerError::NoDiskPath)? 2700 .clone(), 2701 disk_cfg.readonly, 2702 self.force_iommu | disk_cfg.iommu, 2703 disk_cfg.num_queues, 2704 disk_cfg.queue_size, 2705 disk_cfg.serial.clone(), 2706 self.seccomp_action.clone(), 2707 rate_limit_group, 2708 self.exit_evt 2709 .try_clone() 2710 .map_err(DeviceManagerError::EventFd)?, 2711 state_from_id(self.snapshot.as_ref(), id.as_str()) 2712 .map_err(DeviceManagerError::RestoreGetState)?, 2713 queue_affinity, 2714 ) 2715 .map_err(DeviceManagerError::CreateVirtioBlock)?; 2716 2717 // We lock the file here only for hotplugging. In normal operation, 2718 // state save/resume, and live-migration, locking is part of the outer control flow 2719 // to ensure proper order of (un)locking. 2720 if is_hotplug { 2721 log::debug!("Acquiring lock for hotplugged image"); 2722 virtio_block 2723 .try_lock_image() 2724 .map_err(DeviceManagerError::DiskLockError)?; 2725 } 2726 2727 let virtio_block = Arc::new(Mutex::new(virtio_block)); 2728 2729 self.block_devices.push(virtio_block.clone()); 2730 2731 ( 2732 Arc::clone(&virtio_block) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2733 virtio_block as Arc<Mutex<dyn Migratable>>, 2734 ) 2735 }; 2736 2737 // Fill the device tree with a new node. In case of restore, we 2738 // know there is nothing to do, so we can simply override the 2739 // existing entry. 2740 self.device_tree 2741 .lock() 2742 .unwrap() 2743 .insert(id.clone(), device_node!(id, migratable_device)); 2744 2745 Ok(MetaVirtioDevice { 2746 virtio_device, 2747 iommu: disk_cfg.iommu, 2748 id, 2749 pci_segment: disk_cfg.pci_segment, 2750 dma_handler: None, 2751 }) 2752 } 2753 2754 fn make_virtio_block_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2755 let mut devices = Vec::new(); 2756 2757 let mut block_devices = self.config.lock().unwrap().disks.clone(); 2758 if let Some(disk_list_cfg) = &mut block_devices { 2759 for disk_cfg in disk_list_cfg.iter_mut() { 2760 devices.push(self.make_virtio_block_device(disk_cfg, false)?); 2761 } 2762 } 2763 self.config.lock().unwrap().disks = block_devices; 2764 2765 Ok(devices) 2766 } 2767 2768 fn make_virtio_net_device( 2769 &mut self, 2770 net_cfg: &mut NetConfig, 2771 ) -> DeviceManagerResult<MetaVirtioDevice> { 2772 let id = if let Some(id) = &net_cfg.id { 2773 id.clone() 2774 } else { 2775 let id = self.next_device_name(NET_DEVICE_NAME_PREFIX)?; 2776 net_cfg.id = Some(id.clone()); 2777 id 2778 }; 2779 info!("Creating virtio-net device: {:?}", net_cfg); 2780 2781 let (virtio_device, migratable_device) = if net_cfg.vhost_user { 2782 let socket = net_cfg.vhost_socket.as_ref().unwrap().clone(); 2783 let vu_cfg = VhostUserConfig { 2784 socket, 2785 num_queues: net_cfg.num_queues, 2786 queue_size: net_cfg.queue_size, 2787 }; 2788 let server = match net_cfg.vhost_mode { 2789 VhostMode::Client => false, 2790 VhostMode::Server => true, 2791 }; 2792 let vhost_user_net = Arc::new(Mutex::new( 2793 match virtio_devices::vhost_user::Net::new( 2794 id.clone(), 2795 net_cfg.mac, 2796 net_cfg.mtu, 2797 vu_cfg, 2798 server, 2799 self.seccomp_action.clone(), 2800 self.exit_evt 2801 .try_clone() 2802 .map_err(DeviceManagerError::EventFd)?, 2803 self.force_iommu, 2804 state_from_id(self.snapshot.as_ref(), id.as_str()) 2805 .map_err(DeviceManagerError::RestoreGetState)?, 2806 net_cfg.offload_tso, 2807 net_cfg.offload_ufo, 2808 net_cfg.offload_csum, 2809 ) { 2810 Ok(vun_device) => vun_device, 2811 Err(e) => { 2812 return Err(DeviceManagerError::CreateVhostUserNet(e)); 2813 } 2814 }, 2815 )); 2816 2817 ( 2818 Arc::clone(&vhost_user_net) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2819 vhost_user_net as Arc<Mutex<dyn Migratable>>, 2820 ) 2821 } else { 2822 let state = state_from_id(self.snapshot.as_ref(), id.as_str()) 2823 .map_err(DeviceManagerError::RestoreGetState)?; 2824 let virtio_net = if let Some(ref tap_if_name) = net_cfg.tap { 2825 Arc::new(Mutex::new( 2826 virtio_devices::Net::new( 2827 id.clone(), 2828 Some(tap_if_name), 2829 Some(net_cfg.ip), 2830 Some(net_cfg.mask), 2831 Some(net_cfg.mac), 2832 &mut net_cfg.host_mac, 2833 net_cfg.mtu, 2834 self.force_iommu | net_cfg.iommu, 2835 net_cfg.num_queues, 2836 net_cfg.queue_size, 2837 self.seccomp_action.clone(), 2838 net_cfg.rate_limiter_config, 2839 self.exit_evt 2840 .try_clone() 2841 .map_err(DeviceManagerError::EventFd)?, 2842 state, 2843 net_cfg.offload_tso, 2844 net_cfg.offload_ufo, 2845 net_cfg.offload_csum, 2846 ) 2847 .map_err(DeviceManagerError::CreateVirtioNet)?, 2848 )) 2849 } else if let Some(fds) = &net_cfg.fds { 2850 let net = virtio_devices::Net::from_tap_fds( 2851 id.clone(), 2852 fds, 2853 Some(net_cfg.mac), 2854 net_cfg.mtu, 2855 self.force_iommu | net_cfg.iommu, 2856 net_cfg.queue_size, 2857 self.seccomp_action.clone(), 2858 net_cfg.rate_limiter_config, 2859 self.exit_evt 2860 .try_clone() 2861 .map_err(DeviceManagerError::EventFd)?, 2862 state, 2863 net_cfg.offload_tso, 2864 net_cfg.offload_ufo, 2865 net_cfg.offload_csum, 2866 ) 2867 .map_err(DeviceManagerError::CreateVirtioNet)?; 2868 2869 // SAFETY: 'fds' are valid because TAP devices are created successfully 2870 unsafe { 2871 self.config.lock().unwrap().add_preserved_fds(fds.clone()); 2872 } 2873 2874 Arc::new(Mutex::new(net)) 2875 } else { 2876 Arc::new(Mutex::new( 2877 virtio_devices::Net::new( 2878 id.clone(), 2879 None, 2880 Some(net_cfg.ip), 2881 Some(net_cfg.mask), 2882 Some(net_cfg.mac), 2883 &mut net_cfg.host_mac, 2884 net_cfg.mtu, 2885 self.force_iommu | net_cfg.iommu, 2886 net_cfg.num_queues, 2887 net_cfg.queue_size, 2888 self.seccomp_action.clone(), 2889 net_cfg.rate_limiter_config, 2890 self.exit_evt 2891 .try_clone() 2892 .map_err(DeviceManagerError::EventFd)?, 2893 state, 2894 net_cfg.offload_tso, 2895 net_cfg.offload_ufo, 2896 net_cfg.offload_csum, 2897 ) 2898 .map_err(DeviceManagerError::CreateVirtioNet)?, 2899 )) 2900 }; 2901 2902 ( 2903 Arc::clone(&virtio_net) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2904 virtio_net as Arc<Mutex<dyn Migratable>>, 2905 ) 2906 }; 2907 2908 // Fill the device tree with a new node. In case of restore, we 2909 // know there is nothing to do, so we can simply override the 2910 // existing entry. 2911 self.device_tree 2912 .lock() 2913 .unwrap() 2914 .insert(id.clone(), device_node!(id, migratable_device)); 2915 2916 Ok(MetaVirtioDevice { 2917 virtio_device, 2918 iommu: net_cfg.iommu, 2919 id, 2920 pci_segment: net_cfg.pci_segment, 2921 dma_handler: None, 2922 }) 2923 } 2924 2925 /// Add virto-net and vhost-user-net devices 2926 fn make_virtio_net_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2927 let mut devices = Vec::new(); 2928 let mut net_devices = self.config.lock().unwrap().net.clone(); 2929 if let Some(net_list_cfg) = &mut net_devices { 2930 for net_cfg in net_list_cfg.iter_mut() { 2931 devices.push(self.make_virtio_net_device(net_cfg)?); 2932 } 2933 } 2934 self.config.lock().unwrap().net = net_devices; 2935 2936 Ok(devices) 2937 } 2938 2939 fn make_virtio_rng_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2940 let mut devices = Vec::new(); 2941 2942 // Add virtio-rng if required 2943 let rng_config = self.config.lock().unwrap().rng.clone(); 2944 if let Some(rng_path) = rng_config.src.to_str() { 2945 info!("Creating virtio-rng device: {:?}", rng_config); 2946 let id = String::from(RNG_DEVICE_NAME); 2947 2948 let virtio_rng_device = Arc::new(Mutex::new( 2949 virtio_devices::Rng::new( 2950 id.clone(), 2951 rng_path, 2952 self.force_iommu | rng_config.iommu, 2953 self.seccomp_action.clone(), 2954 self.exit_evt 2955 .try_clone() 2956 .map_err(DeviceManagerError::EventFd)?, 2957 state_from_id(self.snapshot.as_ref(), id.as_str()) 2958 .map_err(DeviceManagerError::RestoreGetState)?, 2959 ) 2960 .map_err(DeviceManagerError::CreateVirtioRng)?, 2961 )); 2962 devices.push(MetaVirtioDevice { 2963 virtio_device: Arc::clone(&virtio_rng_device) 2964 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2965 iommu: rng_config.iommu, 2966 id: id.clone(), 2967 pci_segment: 0, 2968 dma_handler: None, 2969 }); 2970 2971 // Fill the device tree with a new node. In case of restore, we 2972 // know there is nothing to do, so we can simply override the 2973 // existing entry. 2974 self.device_tree 2975 .lock() 2976 .unwrap() 2977 .insert(id.clone(), device_node!(id, virtio_rng_device)); 2978 } 2979 2980 Ok(devices) 2981 } 2982 2983 fn make_virtio_fs_device( 2984 &mut self, 2985 fs_cfg: &mut FsConfig, 2986 ) -> DeviceManagerResult<MetaVirtioDevice> { 2987 let id = if let Some(id) = &fs_cfg.id { 2988 id.clone() 2989 } else { 2990 let id = self.next_device_name(FS_DEVICE_NAME_PREFIX)?; 2991 fs_cfg.id = Some(id.clone()); 2992 id 2993 }; 2994 2995 info!("Creating virtio-fs device: {:?}", fs_cfg); 2996 2997 let mut node = device_node!(id); 2998 2999 if let Some(fs_socket) = fs_cfg.socket.to_str() { 3000 let virtio_fs_device = Arc::new(Mutex::new( 3001 virtio_devices::vhost_user::Fs::new( 3002 id.clone(), 3003 fs_socket, 3004 &fs_cfg.tag, 3005 fs_cfg.num_queues, 3006 fs_cfg.queue_size, 3007 None, 3008 self.seccomp_action.clone(), 3009 self.exit_evt 3010 .try_clone() 3011 .map_err(DeviceManagerError::EventFd)?, 3012 self.force_iommu, 3013 state_from_id(self.snapshot.as_ref(), id.as_str()) 3014 .map_err(DeviceManagerError::RestoreGetState)?, 3015 ) 3016 .map_err(DeviceManagerError::CreateVirtioFs)?, 3017 )); 3018 3019 // Update the device tree with the migratable device. 3020 node.migratable = Some(Arc::clone(&virtio_fs_device) as Arc<Mutex<dyn Migratable>>); 3021 self.device_tree.lock().unwrap().insert(id.clone(), node); 3022 3023 Ok(MetaVirtioDevice { 3024 virtio_device: Arc::clone(&virtio_fs_device) 3025 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 3026 iommu: false, 3027 id, 3028 pci_segment: fs_cfg.pci_segment, 3029 dma_handler: None, 3030 }) 3031 } else { 3032 Err(DeviceManagerError::NoVirtioFsSock) 3033 } 3034 } 3035 3036 fn make_virtio_fs_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 3037 let mut devices = Vec::new(); 3038 3039 let mut fs_devices = self.config.lock().unwrap().fs.clone(); 3040 if let Some(fs_list_cfg) = &mut fs_devices { 3041 for fs_cfg in fs_list_cfg.iter_mut() { 3042 devices.push(self.make_virtio_fs_device(fs_cfg)?); 3043 } 3044 } 3045 self.config.lock().unwrap().fs = fs_devices; 3046 3047 Ok(devices) 3048 } 3049 3050 fn make_virtio_pmem_device( 3051 &mut self, 3052 pmem_cfg: &mut PmemConfig, 3053 ) -> DeviceManagerResult<MetaVirtioDevice> { 3054 let id = if let Some(id) = &pmem_cfg.id { 3055 id.clone() 3056 } else { 3057 let id = self.next_device_name(PMEM_DEVICE_NAME_PREFIX)?; 3058 pmem_cfg.id = Some(id.clone()); 3059 id 3060 }; 3061 3062 info!("Creating virtio-pmem device: {:?}", pmem_cfg); 3063 3064 let mut node = device_node!(id); 3065 3066 // Look for the id in the device tree. If it can be found, that means 3067 // the device is being restored, otherwise it's created from scratch. 3068 let region_range = if let Some(node) = self.device_tree.lock().unwrap().get(&id) { 3069 info!("Restoring virtio-pmem {} resources", id); 3070 3071 let mut region_range: Option<(u64, u64)> = None; 3072 for resource in node.resources.iter() { 3073 match resource { 3074 Resource::MmioAddressRange { base, size } => { 3075 if region_range.is_some() { 3076 return Err(DeviceManagerError::ResourceAlreadyExists); 3077 } 3078 3079 region_range = Some((*base, *size)); 3080 } 3081 _ => { 3082 error!("Unexpected resource {:?} for {}", resource, id); 3083 } 3084 } 3085 } 3086 3087 if region_range.is_none() { 3088 return Err(DeviceManagerError::MissingVirtioPmemResources); 3089 } 3090 3091 region_range 3092 } else { 3093 None 3094 }; 3095 3096 let (custom_flags, set_len) = if pmem_cfg.file.is_dir() { 3097 if pmem_cfg.size.is_none() { 3098 return Err(DeviceManagerError::PmemWithDirectorySizeMissing); 3099 } 3100 (O_TMPFILE, true) 3101 } else { 3102 (0, false) 3103 }; 3104 3105 let mut file = OpenOptions::new() 3106 .read(true) 3107 .write(!pmem_cfg.discard_writes) 3108 .custom_flags(custom_flags) 3109 .open(&pmem_cfg.file) 3110 .map_err(DeviceManagerError::PmemFileOpen)?; 3111 3112 let size = if let Some(size) = pmem_cfg.size { 3113 if set_len { 3114 file.set_len(size) 3115 .map_err(DeviceManagerError::PmemFileSetLen)?; 3116 } 3117 size 3118 } else { 3119 file.seek(SeekFrom::End(0)) 3120 .map_err(DeviceManagerError::PmemFileSetLen)? 3121 }; 3122 3123 if size % 0x20_0000 != 0 { 3124 return Err(DeviceManagerError::PmemSizeNotAligned); 3125 } 3126 3127 let (region_base, region_size) = if let Some((base, size)) = region_range { 3128 // The memory needs to be 2MiB aligned in order to support 3129 // hugepages. 3130 self.pci_segments[pmem_cfg.pci_segment as usize] 3131 .mem64_allocator 3132 .lock() 3133 .unwrap() 3134 .allocate( 3135 Some(GuestAddress(base)), 3136 size as GuestUsize, 3137 Some(0x0020_0000), 3138 ) 3139 .ok_or(DeviceManagerError::PmemRangeAllocation)?; 3140 3141 (base, size) 3142 } else { 3143 // The memory needs to be 2MiB aligned in order to support 3144 // hugepages. 3145 let base = self.pci_segments[pmem_cfg.pci_segment as usize] 3146 .mem64_allocator 3147 .lock() 3148 .unwrap() 3149 .allocate(None, size as GuestUsize, Some(0x0020_0000)) 3150 .ok_or(DeviceManagerError::PmemRangeAllocation)?; 3151 3152 (base.raw_value(), size) 3153 }; 3154 3155 let cloned_file = file.try_clone().map_err(DeviceManagerError::CloneFile)?; 3156 let mmap_region = MmapRegion::build( 3157 Some(FileOffset::new(cloned_file, 0)), 3158 region_size as usize, 3159 PROT_READ | PROT_WRITE, 3160 MAP_NORESERVE 3161 | if pmem_cfg.discard_writes { 3162 MAP_PRIVATE 3163 } else { 3164 MAP_SHARED 3165 }, 3166 ) 3167 .map_err(DeviceManagerError::NewMmapRegion)?; 3168 let host_addr: u64 = mmap_region.as_ptr() as u64; 3169 3170 let mem_slot = self 3171 .memory_manager 3172 .lock() 3173 .unwrap() 3174 .create_userspace_mapping(region_base, region_size, host_addr, false, false, false) 3175 .map_err(DeviceManagerError::MemoryManager)?; 3176 3177 let mapping = virtio_devices::UserspaceMapping { 3178 host_addr, 3179 mem_slot, 3180 addr: GuestAddress(region_base), 3181 len: region_size, 3182 mergeable: false, 3183 }; 3184 3185 let virtio_pmem_device = Arc::new(Mutex::new( 3186 virtio_devices::Pmem::new( 3187 id.clone(), 3188 file, 3189 GuestAddress(region_base), 3190 mapping, 3191 mmap_region, 3192 self.force_iommu | pmem_cfg.iommu, 3193 self.seccomp_action.clone(), 3194 self.exit_evt 3195 .try_clone() 3196 .map_err(DeviceManagerError::EventFd)?, 3197 state_from_id(self.snapshot.as_ref(), id.as_str()) 3198 .map_err(DeviceManagerError::RestoreGetState)?, 3199 ) 3200 .map_err(DeviceManagerError::CreateVirtioPmem)?, 3201 )); 3202 3203 // Update the device tree with correct resource information and with 3204 // the migratable device. 3205 node.resources.push(Resource::MmioAddressRange { 3206 base: region_base, 3207 size: region_size, 3208 }); 3209 node.migratable = Some(Arc::clone(&virtio_pmem_device) as Arc<Mutex<dyn Migratable>>); 3210 self.device_tree.lock().unwrap().insert(id.clone(), node); 3211 3212 Ok(MetaVirtioDevice { 3213 virtio_device: Arc::clone(&virtio_pmem_device) 3214 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 3215 iommu: pmem_cfg.iommu, 3216 id, 3217 pci_segment: pmem_cfg.pci_segment, 3218 dma_handler: None, 3219 }) 3220 } 3221 3222 fn make_virtio_pmem_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 3223 let mut devices = Vec::new(); 3224 // Add virtio-pmem if required 3225 let mut pmem_devices = self.config.lock().unwrap().pmem.clone(); 3226 if let Some(pmem_list_cfg) = &mut pmem_devices { 3227 for pmem_cfg in pmem_list_cfg.iter_mut() { 3228 devices.push(self.make_virtio_pmem_device(pmem_cfg)?); 3229 } 3230 } 3231 self.config.lock().unwrap().pmem = pmem_devices; 3232 3233 Ok(devices) 3234 } 3235 3236 fn make_virtio_vsock_device( 3237 &mut self, 3238 vsock_cfg: &mut VsockConfig, 3239 ) -> DeviceManagerResult<MetaVirtioDevice> { 3240 let id = if let Some(id) = &vsock_cfg.id { 3241 id.clone() 3242 } else { 3243 let id = self.next_device_name(VSOCK_DEVICE_NAME_PREFIX)?; 3244 vsock_cfg.id = Some(id.clone()); 3245 id 3246 }; 3247 3248 info!("Creating virtio-vsock device: {:?}", vsock_cfg); 3249 3250 let socket_path = vsock_cfg 3251 .socket 3252 .to_str() 3253 .ok_or(DeviceManagerError::CreateVsockConvertPath)?; 3254 let backend = 3255 virtio_devices::vsock::VsockUnixBackend::new(vsock_cfg.cid, socket_path.to_string()) 3256 .map_err(DeviceManagerError::CreateVsockBackend)?; 3257 3258 let vsock_device = Arc::new(Mutex::new( 3259 virtio_devices::Vsock::new( 3260 id.clone(), 3261 vsock_cfg.cid, 3262 vsock_cfg.socket.clone(), 3263 backend, 3264 self.force_iommu | vsock_cfg.iommu, 3265 self.seccomp_action.clone(), 3266 self.exit_evt 3267 .try_clone() 3268 .map_err(DeviceManagerError::EventFd)?, 3269 state_from_id(self.snapshot.as_ref(), id.as_str()) 3270 .map_err(DeviceManagerError::RestoreGetState)?, 3271 ) 3272 .map_err(DeviceManagerError::CreateVirtioVsock)?, 3273 )); 3274 3275 // Fill the device tree with a new node. In case of restore, we 3276 // know there is nothing to do, so we can simply override the 3277 // existing entry. 3278 self.device_tree 3279 .lock() 3280 .unwrap() 3281 .insert(id.clone(), device_node!(id, vsock_device)); 3282 3283 Ok(MetaVirtioDevice { 3284 virtio_device: Arc::clone(&vsock_device) 3285 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 3286 iommu: vsock_cfg.iommu, 3287 id, 3288 pci_segment: vsock_cfg.pci_segment, 3289 dma_handler: None, 3290 }) 3291 } 3292 3293 fn make_virtio_vsock_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 3294 let mut devices = Vec::new(); 3295 3296 let mut vsock = self.config.lock().unwrap().vsock.clone(); 3297 if let Some(ref mut vsock_cfg) = &mut vsock { 3298 devices.push(self.make_virtio_vsock_device(vsock_cfg)?); 3299 } 3300 self.config.lock().unwrap().vsock = vsock; 3301 3302 Ok(devices) 3303 } 3304 3305 fn make_virtio_mem_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 3306 let mut devices = Vec::new(); 3307 3308 let mm = self.memory_manager.clone(); 3309 let mut mm = mm.lock().unwrap(); 3310 for (memory_zone_id, memory_zone) in mm.memory_zones_mut().iter_mut() { 3311 if let Some(virtio_mem_zone) = memory_zone.virtio_mem_zone_mut() { 3312 info!("Creating virtio-mem device: id = {}", memory_zone_id); 3313 3314 let node_id = numa_node_id_from_memory_zone_id(&self.numa_nodes, memory_zone_id) 3315 .map(|i| i as u16); 3316 3317 let virtio_mem_device = Arc::new(Mutex::new( 3318 virtio_devices::Mem::new( 3319 memory_zone_id.clone(), 3320 virtio_mem_zone.region(), 3321 self.seccomp_action.clone(), 3322 node_id, 3323 virtio_mem_zone.hotplugged_size(), 3324 virtio_mem_zone.hugepages(), 3325 self.exit_evt 3326 .try_clone() 3327 .map_err(DeviceManagerError::EventFd)?, 3328 virtio_mem_zone.blocks_state().clone(), 3329 state_from_id(self.snapshot.as_ref(), memory_zone_id.as_str()) 3330 .map_err(DeviceManagerError::RestoreGetState)?, 3331 ) 3332 .map_err(DeviceManagerError::CreateVirtioMem)?, 3333 )); 3334 3335 // Update the virtio-mem zone so that it has a handle onto the 3336 // virtio-mem device, which will be used for triggering a resize 3337 // if needed. 3338 virtio_mem_zone.set_virtio_device(Arc::clone(&virtio_mem_device)); 3339 3340 self.virtio_mem_devices.push(Arc::clone(&virtio_mem_device)); 3341 3342 devices.push(MetaVirtioDevice { 3343 virtio_device: Arc::clone(&virtio_mem_device) 3344 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 3345 iommu: false, 3346 id: memory_zone_id.clone(), 3347 pci_segment: 0, 3348 dma_handler: None, 3349 }); 3350 3351 // Fill the device tree with a new node. In case of restore, we 3352 // know there is nothing to do, so we can simply override the 3353 // existing entry. 3354 self.device_tree.lock().unwrap().insert( 3355 memory_zone_id.clone(), 3356 device_node!(memory_zone_id, virtio_mem_device), 3357 ); 3358 } 3359 } 3360 3361 Ok(devices) 3362 } 3363 3364 #[cfg(feature = "pvmemcontrol")] 3365 fn make_pvmemcontrol_device( 3366 &mut self, 3367 ) -> DeviceManagerResult<( 3368 Arc<PvmemcontrolBusDevice>, 3369 Arc<Mutex<PvmemcontrolPciDevice>>, 3370 )> { 3371 let id = String::from(PVMEMCONTROL_DEVICE_NAME); 3372 let pci_segment_id = 0x0_u16; 3373 3374 let (pci_segment_id, pci_device_bdf, resources) = 3375 self.pci_resources(&id, pci_segment_id)?; 3376 3377 info!("Creating pvmemcontrol device: id = {}", id); 3378 let (pvmemcontrol_pci_device, pvmemcontrol_bus_device) = 3379 devices::pvmemcontrol::PvmemcontrolDevice::make_device( 3380 id.clone(), 3381 self.memory_manager.lock().unwrap().guest_memory(), 3382 ); 3383 3384 let pvmemcontrol_pci_device = Arc::new(Mutex::new(pvmemcontrol_pci_device)); 3385 let pvmemcontrol_bus_device = Arc::new(pvmemcontrol_bus_device); 3386 3387 let new_resources = self.add_pci_device( 3388 pvmemcontrol_bus_device.clone(), 3389 pvmemcontrol_pci_device.clone(), 3390 pci_segment_id, 3391 pci_device_bdf, 3392 resources, 3393 )?; 3394 3395 let mut node = device_node!(id, pvmemcontrol_pci_device); 3396 3397 node.resources = new_resources; 3398 node.pci_bdf = Some(pci_device_bdf); 3399 node.pci_device_handle = None; 3400 3401 self.device_tree.lock().unwrap().insert(id, node); 3402 3403 Ok((pvmemcontrol_bus_device, pvmemcontrol_pci_device)) 3404 } 3405 3406 fn make_virtio_balloon_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 3407 let mut devices = Vec::new(); 3408 3409 if let Some(balloon_config) = &self.config.lock().unwrap().balloon { 3410 let id = String::from(BALLOON_DEVICE_NAME); 3411 info!("Creating virtio-balloon device: id = {}", id); 3412 3413 let virtio_balloon_device = Arc::new(Mutex::new( 3414 virtio_devices::Balloon::new( 3415 id.clone(), 3416 balloon_config.size, 3417 balloon_config.deflate_on_oom, 3418 balloon_config.free_page_reporting, 3419 self.seccomp_action.clone(), 3420 self.exit_evt 3421 .try_clone() 3422 .map_err(DeviceManagerError::EventFd)?, 3423 state_from_id(self.snapshot.as_ref(), id.as_str()) 3424 .map_err(DeviceManagerError::RestoreGetState)?, 3425 ) 3426 .map_err(DeviceManagerError::CreateVirtioBalloon)?, 3427 )); 3428 3429 self.balloon = Some(virtio_balloon_device.clone()); 3430 3431 devices.push(MetaVirtioDevice { 3432 virtio_device: Arc::clone(&virtio_balloon_device) 3433 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 3434 iommu: false, 3435 id: id.clone(), 3436 pci_segment: 0, 3437 dma_handler: None, 3438 }); 3439 3440 self.device_tree 3441 .lock() 3442 .unwrap() 3443 .insert(id.clone(), device_node!(id, virtio_balloon_device)); 3444 } 3445 3446 Ok(devices) 3447 } 3448 3449 fn make_virtio_watchdog_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 3450 let mut devices = Vec::new(); 3451 3452 if !self.config.lock().unwrap().watchdog { 3453 return Ok(devices); 3454 } 3455 3456 let id = String::from(WATCHDOG_DEVICE_NAME); 3457 info!("Creating virtio-watchdog device: id = {}", id); 3458 3459 let virtio_watchdog_device = Arc::new(Mutex::new( 3460 virtio_devices::Watchdog::new( 3461 id.clone(), 3462 self.reset_evt.try_clone().unwrap(), 3463 self.seccomp_action.clone(), 3464 self.exit_evt 3465 .try_clone() 3466 .map_err(DeviceManagerError::EventFd)?, 3467 state_from_id(self.snapshot.as_ref(), id.as_str()) 3468 .map_err(DeviceManagerError::RestoreGetState)?, 3469 ) 3470 .map_err(DeviceManagerError::CreateVirtioWatchdog)?, 3471 )); 3472 devices.push(MetaVirtioDevice { 3473 virtio_device: Arc::clone(&virtio_watchdog_device) 3474 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 3475 iommu: false, 3476 id: id.clone(), 3477 pci_segment: 0, 3478 dma_handler: None, 3479 }); 3480 3481 self.device_tree 3482 .lock() 3483 .unwrap() 3484 .insert(id.clone(), device_node!(id, virtio_watchdog_device)); 3485 3486 Ok(devices) 3487 } 3488 3489 fn make_vdpa_device( 3490 &mut self, 3491 vdpa_cfg: &mut VdpaConfig, 3492 ) -> DeviceManagerResult<MetaVirtioDevice> { 3493 let id = if let Some(id) = &vdpa_cfg.id { 3494 id.clone() 3495 } else { 3496 let id = self.next_device_name(VDPA_DEVICE_NAME_PREFIX)?; 3497 vdpa_cfg.id = Some(id.clone()); 3498 id 3499 }; 3500 3501 info!("Creating vDPA device: {:?}", vdpa_cfg); 3502 3503 let device_path = vdpa_cfg 3504 .path 3505 .to_str() 3506 .ok_or(DeviceManagerError::CreateVdpaConvertPath)?; 3507 3508 let vdpa_device = Arc::new(Mutex::new( 3509 virtio_devices::Vdpa::new( 3510 id.clone(), 3511 device_path, 3512 self.memory_manager.lock().unwrap().guest_memory(), 3513 vdpa_cfg.num_queues as u16, 3514 state_from_id(self.snapshot.as_ref(), id.as_str()) 3515 .map_err(DeviceManagerError::RestoreGetState)?, 3516 ) 3517 .map_err(DeviceManagerError::CreateVdpa)?, 3518 )); 3519 3520 // Create the DMA handler that is required by the vDPA device 3521 let vdpa_mapping = Arc::new(VdpaDmaMapping::new( 3522 Arc::clone(&vdpa_device), 3523 Arc::new(self.memory_manager.lock().unwrap().guest_memory()), 3524 )); 3525 3526 self.device_tree 3527 .lock() 3528 .unwrap() 3529 .insert(id.clone(), device_node!(id, vdpa_device)); 3530 3531 Ok(MetaVirtioDevice { 3532 virtio_device: vdpa_device as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 3533 iommu: vdpa_cfg.iommu, 3534 id, 3535 pci_segment: vdpa_cfg.pci_segment, 3536 dma_handler: Some(vdpa_mapping), 3537 }) 3538 } 3539 3540 fn make_vdpa_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 3541 let mut devices = Vec::new(); 3542 // Add vdpa if required 3543 let mut vdpa_devices = self.config.lock().unwrap().vdpa.clone(); 3544 if let Some(vdpa_list_cfg) = &mut vdpa_devices { 3545 for vdpa_cfg in vdpa_list_cfg.iter_mut() { 3546 devices.push(self.make_vdpa_device(vdpa_cfg)?); 3547 } 3548 } 3549 self.config.lock().unwrap().vdpa = vdpa_devices; 3550 3551 Ok(devices) 3552 } 3553 3554 fn next_device_name(&mut self, prefix: &str) -> DeviceManagerResult<String> { 3555 let start_id = self.device_id_cnt; 3556 loop { 3557 // Generate the temporary name. 3558 let name = format!("{}{}", prefix, self.device_id_cnt); 3559 // Increment the counter. 3560 self.device_id_cnt += Wrapping(1); 3561 // Check if the name is already in use. 3562 if !self.boot_id_list.contains(&name) 3563 && !self.device_tree.lock().unwrap().contains_key(&name) 3564 { 3565 return Ok(name); 3566 } 3567 3568 if self.device_id_cnt == start_id { 3569 // We went through a full loop and there's nothing else we can 3570 // do. 3571 break; 3572 } 3573 } 3574 Err(DeviceManagerError::NoAvailableDeviceName) 3575 } 3576 3577 fn add_passthrough_device( 3578 &mut self, 3579 device_cfg: &mut DeviceConfig, 3580 ) -> DeviceManagerResult<(PciBdf, String)> { 3581 // If the passthrough device has not been created yet, it is created 3582 // here and stored in the DeviceManager structure for future needs. 3583 if self.passthrough_device.is_none() { 3584 self.passthrough_device = Some( 3585 self.address_manager 3586 .vm 3587 .create_passthrough_device() 3588 .map_err(|e| DeviceManagerError::CreatePassthroughDevice(e.into()))?, 3589 ); 3590 } 3591 3592 self.add_vfio_device(device_cfg) 3593 } 3594 3595 fn create_vfio_container(&self) -> DeviceManagerResult<Arc<VfioContainer>> { 3596 let passthrough_device = self 3597 .passthrough_device 3598 .as_ref() 3599 .ok_or(DeviceManagerError::NoDevicePassthroughSupport)?; 3600 3601 let dup = passthrough_device 3602 .try_clone() 3603 .map_err(DeviceManagerError::VfioCreate)?; 3604 3605 Ok(Arc::new( 3606 VfioContainer::new(Some(Arc::new(dup))).map_err(DeviceManagerError::VfioCreate)?, 3607 )) 3608 } 3609 3610 fn add_vfio_device( 3611 &mut self, 3612 device_cfg: &mut DeviceConfig, 3613 ) -> DeviceManagerResult<(PciBdf, String)> { 3614 let vfio_name = if let Some(id) = &device_cfg.id { 3615 id.clone() 3616 } else { 3617 let id = self.next_device_name(VFIO_DEVICE_NAME_PREFIX)?; 3618 device_cfg.id = Some(id.clone()); 3619 id 3620 }; 3621 3622 let (pci_segment_id, pci_device_bdf, resources) = 3623 self.pci_resources(&vfio_name, device_cfg.pci_segment)?; 3624 3625 let mut needs_dma_mapping = false; 3626 3627 // Here we create a new VFIO container for two reasons. Either this is 3628 // the first VFIO device, meaning we need a new VFIO container, which 3629 // will be shared with other VFIO devices. Or the new VFIO device is 3630 // attached to a vIOMMU, meaning we must create a dedicated VFIO 3631 // container. In the vIOMMU use case, we can't let all devices under 3632 // the same VFIO container since we couldn't map/unmap memory for each 3633 // device. That's simply because the map/unmap operations happen at the 3634 // VFIO container level. 3635 let vfio_container = if device_cfg.iommu { 3636 let vfio_container = self.create_vfio_container()?; 3637 3638 let vfio_mapping = Arc::new(VfioDmaMapping::new( 3639 Arc::clone(&vfio_container), 3640 Arc::new(self.memory_manager.lock().unwrap().guest_memory()), 3641 Arc::clone(&self.mmio_regions), 3642 )); 3643 3644 if let Some(iommu) = &self.iommu_device { 3645 iommu 3646 .lock() 3647 .unwrap() 3648 .add_external_mapping(pci_device_bdf.into(), vfio_mapping); 3649 } else { 3650 return Err(DeviceManagerError::MissingVirtualIommu); 3651 } 3652 3653 vfio_container 3654 } else if let Some(vfio_container) = &self.vfio_container { 3655 Arc::clone(vfio_container) 3656 } else { 3657 let vfio_container = self.create_vfio_container()?; 3658 needs_dma_mapping = true; 3659 self.vfio_container = Some(Arc::clone(&vfio_container)); 3660 3661 vfio_container 3662 }; 3663 3664 let vfio_device = VfioDevice::new(&device_cfg.path, Arc::clone(&vfio_container)) 3665 .map_err(DeviceManagerError::VfioCreate)?; 3666 3667 if needs_dma_mapping { 3668 // Register DMA mapping in IOMMU. 3669 // Do not register virtio-mem regions, as they are handled directly by 3670 // virtio-mem device itself. 3671 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() { 3672 for region in zone.regions() { 3673 vfio_container 3674 .vfio_dma_map( 3675 region.start_addr().raw_value(), 3676 region.len(), 3677 region.as_ptr() as u64, 3678 ) 3679 .map_err(DeviceManagerError::VfioDmaMap)?; 3680 } 3681 } 3682 3683 let vfio_mapping = Arc::new(VfioDmaMapping::new( 3684 Arc::clone(&vfio_container), 3685 Arc::new(self.memory_manager.lock().unwrap().guest_memory()), 3686 Arc::clone(&self.mmio_regions), 3687 )); 3688 3689 for virtio_mem_device in self.virtio_mem_devices.iter() { 3690 virtio_mem_device 3691 .lock() 3692 .unwrap() 3693 .add_dma_mapping_handler( 3694 VirtioMemMappingSource::Container, 3695 vfio_mapping.clone(), 3696 ) 3697 .map_err(DeviceManagerError::AddDmaMappingHandlerVirtioMem)?; 3698 } 3699 } 3700 3701 let legacy_interrupt_group = 3702 if let Some(legacy_interrupt_manager) = &self.legacy_interrupt_manager { 3703 Some( 3704 legacy_interrupt_manager 3705 .create_group(LegacyIrqGroupConfig { 3706 irq: self.pci_segments[pci_segment_id as usize].pci_irq_slots 3707 [pci_device_bdf.device() as usize] 3708 as InterruptIndex, 3709 }) 3710 .map_err(DeviceManagerError::CreateInterruptGroup)?, 3711 ) 3712 } else { 3713 None 3714 }; 3715 3716 let memory_manager = self.memory_manager.clone(); 3717 3718 let vfio_pci_device = VfioPciDevice::new( 3719 vfio_name.clone(), 3720 &self.address_manager.vm, 3721 vfio_device, 3722 vfio_container, 3723 self.msi_interrupt_manager.clone(), 3724 legacy_interrupt_group, 3725 device_cfg.iommu, 3726 pci_device_bdf, 3727 memory_manager.lock().unwrap().memory_slot_allocator(), 3728 vm_migration::snapshot_from_id(self.snapshot.as_ref(), vfio_name.as_str()), 3729 device_cfg.x_nv_gpudirect_clique, 3730 device_cfg.path.clone(), 3731 ) 3732 .map_err(DeviceManagerError::VfioPciCreate)?; 3733 3734 let vfio_pci_device = Arc::new(Mutex::new(vfio_pci_device)); 3735 3736 let new_resources = self.add_pci_device( 3737 vfio_pci_device.clone(), 3738 vfio_pci_device.clone(), 3739 pci_segment_id, 3740 pci_device_bdf, 3741 resources, 3742 )?; 3743 3744 vfio_pci_device 3745 .lock() 3746 .unwrap() 3747 .map_mmio_regions() 3748 .map_err(DeviceManagerError::VfioMapRegion)?; 3749 3750 for mmio_region in vfio_pci_device.lock().unwrap().mmio_regions() { 3751 self.mmio_regions.lock().unwrap().push(mmio_region); 3752 } 3753 3754 let mut node = device_node!(vfio_name, vfio_pci_device); 3755 3756 // Update the device tree with correct resource information. 3757 node.resources = new_resources; 3758 node.pci_bdf = Some(pci_device_bdf); 3759 node.pci_device_handle = Some(PciDeviceHandle::Vfio(vfio_pci_device)); 3760 3761 self.device_tree 3762 .lock() 3763 .unwrap() 3764 .insert(vfio_name.clone(), node); 3765 3766 Ok((pci_device_bdf, vfio_name)) 3767 } 3768 3769 fn add_pci_device( 3770 &mut self, 3771 bus_device: Arc<dyn BusDeviceSync>, 3772 pci_device: Arc<Mutex<dyn PciDevice>>, 3773 segment_id: u16, 3774 bdf: PciBdf, 3775 resources: Option<Vec<Resource>>, 3776 ) -> DeviceManagerResult<Vec<Resource>> { 3777 let bars = pci_device 3778 .lock() 3779 .unwrap() 3780 .allocate_bars( 3781 &self.address_manager.allocator, 3782 &mut self.pci_segments[segment_id as usize] 3783 .mem32_allocator 3784 .lock() 3785 .unwrap(), 3786 &mut self.pci_segments[segment_id as usize] 3787 .mem64_allocator 3788 .lock() 3789 .unwrap(), 3790 resources, 3791 ) 3792 .map_err(DeviceManagerError::AllocateBars)?; 3793 3794 let mut pci_bus = self.pci_segments[segment_id as usize] 3795 .pci_bus 3796 .lock() 3797 .unwrap(); 3798 3799 pci_bus 3800 .add_device(bdf.device() as u32, pci_device) 3801 .map_err(DeviceManagerError::AddPciDevice)?; 3802 3803 self.bus_devices.push(Arc::clone(&bus_device)); 3804 3805 pci_bus 3806 .register_mapping( 3807 bus_device, 3808 self.address_manager.io_bus.as_ref(), 3809 self.address_manager.mmio_bus.as_ref(), 3810 bars.clone(), 3811 ) 3812 .map_err(DeviceManagerError::AddPciDevice)?; 3813 3814 let mut new_resources = Vec::new(); 3815 for bar in bars { 3816 new_resources.push(Resource::PciBar { 3817 index: bar.idx(), 3818 base: bar.addr(), 3819 size: bar.size(), 3820 type_: bar.region_type().into(), 3821 prefetchable: bar.prefetchable().into(), 3822 }); 3823 } 3824 3825 Ok(new_resources) 3826 } 3827 3828 fn add_vfio_devices(&mut self) -> DeviceManagerResult<Vec<PciBdf>> { 3829 let mut iommu_attached_device_ids = Vec::new(); 3830 let mut devices = self.config.lock().unwrap().devices.clone(); 3831 3832 if let Some(device_list_cfg) = &mut devices { 3833 for device_cfg in device_list_cfg.iter_mut() { 3834 let (device_id, _) = self.add_passthrough_device(device_cfg)?; 3835 if device_cfg.iommu && self.iommu_device.is_some() { 3836 iommu_attached_device_ids.push(device_id); 3837 } 3838 } 3839 } 3840 3841 // Update the list of devices 3842 self.config.lock().unwrap().devices = devices; 3843 3844 Ok(iommu_attached_device_ids) 3845 } 3846 3847 fn add_vfio_user_device( 3848 &mut self, 3849 device_cfg: &mut UserDeviceConfig, 3850 ) -> DeviceManagerResult<(PciBdf, String)> { 3851 let vfio_user_name = if let Some(id) = &device_cfg.id { 3852 id.clone() 3853 } else { 3854 let id = self.next_device_name(VFIO_USER_DEVICE_NAME_PREFIX)?; 3855 device_cfg.id = Some(id.clone()); 3856 id 3857 }; 3858 3859 let (pci_segment_id, pci_device_bdf, resources) = 3860 self.pci_resources(&vfio_user_name, device_cfg.pci_segment)?; 3861 3862 let legacy_interrupt_group = 3863 if let Some(legacy_interrupt_manager) = &self.legacy_interrupt_manager { 3864 Some( 3865 legacy_interrupt_manager 3866 .create_group(LegacyIrqGroupConfig { 3867 irq: self.pci_segments[pci_segment_id as usize].pci_irq_slots 3868 [pci_device_bdf.device() as usize] 3869 as InterruptIndex, 3870 }) 3871 .map_err(DeviceManagerError::CreateInterruptGroup)?, 3872 ) 3873 } else { 3874 None 3875 }; 3876 3877 let client = Arc::new(Mutex::new( 3878 vfio_user::Client::new(&device_cfg.socket) 3879 .map_err(DeviceManagerError::VfioUserCreateClient)?, 3880 )); 3881 3882 let memory_manager = self.memory_manager.clone(); 3883 3884 let mut vfio_user_pci_device = VfioUserPciDevice::new( 3885 vfio_user_name.clone(), 3886 &self.address_manager.vm, 3887 client.clone(), 3888 self.msi_interrupt_manager.clone(), 3889 legacy_interrupt_group, 3890 pci_device_bdf, 3891 memory_manager.lock().unwrap().memory_slot_allocator(), 3892 vm_migration::snapshot_from_id(self.snapshot.as_ref(), vfio_user_name.as_str()), 3893 ) 3894 .map_err(DeviceManagerError::VfioUserCreate)?; 3895 3896 let memory = self.memory_manager.lock().unwrap().guest_memory(); 3897 let vfio_user_mapping = Arc::new(VfioUserDmaMapping::new(client, Arc::new(memory))); 3898 for virtio_mem_device in self.virtio_mem_devices.iter() { 3899 virtio_mem_device 3900 .lock() 3901 .unwrap() 3902 .add_dma_mapping_handler( 3903 VirtioMemMappingSource::Device(pci_device_bdf.into()), 3904 vfio_user_mapping.clone(), 3905 ) 3906 .map_err(DeviceManagerError::AddDmaMappingHandlerVirtioMem)?; 3907 } 3908 3909 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() { 3910 for region in zone.regions() { 3911 vfio_user_pci_device 3912 .dma_map(region) 3913 .map_err(DeviceManagerError::VfioUserDmaMap)?; 3914 } 3915 } 3916 3917 let vfio_user_pci_device = Arc::new(Mutex::new(vfio_user_pci_device)); 3918 3919 let new_resources = self.add_pci_device( 3920 vfio_user_pci_device.clone(), 3921 vfio_user_pci_device.clone(), 3922 pci_segment_id, 3923 pci_device_bdf, 3924 resources, 3925 )?; 3926 3927 // Note it is required to call 'add_pci_device()' in advance to have the list of 3928 // mmio regions provisioned correctly 3929 vfio_user_pci_device 3930 .lock() 3931 .unwrap() 3932 .map_mmio_regions() 3933 .map_err(DeviceManagerError::VfioUserMapRegion)?; 3934 3935 let mut node = device_node!(vfio_user_name, vfio_user_pci_device); 3936 3937 // Update the device tree with correct resource information. 3938 node.resources = new_resources; 3939 node.pci_bdf = Some(pci_device_bdf); 3940 node.pci_device_handle = Some(PciDeviceHandle::VfioUser(vfio_user_pci_device)); 3941 3942 self.device_tree 3943 .lock() 3944 .unwrap() 3945 .insert(vfio_user_name.clone(), node); 3946 3947 Ok((pci_device_bdf, vfio_user_name)) 3948 } 3949 3950 fn add_user_devices(&mut self) -> DeviceManagerResult<Vec<PciBdf>> { 3951 let mut user_devices = self.config.lock().unwrap().user_devices.clone(); 3952 3953 if let Some(device_list_cfg) = &mut user_devices { 3954 for device_cfg in device_list_cfg.iter_mut() { 3955 let (_device_id, _id) = self.add_vfio_user_device(device_cfg)?; 3956 } 3957 } 3958 3959 // Update the list of devices 3960 self.config.lock().unwrap().user_devices = user_devices; 3961 3962 Ok(vec![]) 3963 } 3964 3965 fn add_virtio_pci_device( 3966 &mut self, 3967 virtio_device: Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 3968 iommu_mapping: &Option<Arc<IommuMapping>>, 3969 virtio_device_id: String, 3970 pci_segment_id: u16, 3971 dma_handler: Option<Arc<dyn ExternalDmaMapping>>, 3972 ) -> DeviceManagerResult<PciBdf> { 3973 let id = format!("{VIRTIO_PCI_DEVICE_NAME_PREFIX}-{virtio_device_id}"); 3974 3975 // Add the new virtio-pci node to the device tree. 3976 let mut node = device_node!(id); 3977 node.children = vec![virtio_device_id.clone()]; 3978 3979 let (pci_segment_id, pci_device_bdf, resources) = 3980 self.pci_resources(&id, pci_segment_id)?; 3981 3982 // Update the existing virtio node by setting the parent. 3983 if let Some(node) = self.device_tree.lock().unwrap().get_mut(&virtio_device_id) { 3984 node.parent = Some(id.clone()); 3985 } else { 3986 return Err(DeviceManagerError::MissingNode); 3987 } 3988 3989 // Allows support for one MSI-X vector per queue. It also adds 1 3990 // as we need to take into account the dedicated vector to notify 3991 // about a virtio config change. 3992 let msix_num = (virtio_device.lock().unwrap().queue_max_sizes().len() + 1) as u16; 3993 3994 // Create the AccessPlatform trait from the implementation IommuMapping. 3995 // This will provide address translation for any virtio device sitting 3996 // behind a vIOMMU. 3997 let mut access_platform: Option<Arc<dyn AccessPlatform>> = None; 3998 3999 if let Some(mapping) = iommu_mapping { 4000 access_platform = Some(Arc::new(AccessPlatformMapping::new( 4001 pci_device_bdf.into(), 4002 mapping.clone(), 4003 ))); 4004 } 4005 4006 // If SEV-SNP is enabled create the AccessPlatform from SevSnpPageAccessProxy 4007 #[cfg(feature = "sev_snp")] 4008 if self.config.lock().unwrap().is_sev_snp_enabled() { 4009 access_platform = Some(Arc::new(SevSnpPageAccessProxy::new( 4010 self.address_manager.vm.clone(), 4011 ))); 4012 } 4013 4014 let memory = self.memory_manager.lock().unwrap().guest_memory(); 4015 4016 // Map DMA ranges if a DMA handler is available and if the device is 4017 // not attached to a virtual IOMMU. 4018 if let Some(dma_handler) = &dma_handler { 4019 if iommu_mapping.is_some() { 4020 if let Some(iommu) = &self.iommu_device { 4021 iommu 4022 .lock() 4023 .unwrap() 4024 .add_external_mapping(pci_device_bdf.into(), dma_handler.clone()); 4025 } else { 4026 return Err(DeviceManagerError::MissingVirtualIommu); 4027 } 4028 } else { 4029 // Let every virtio-mem device handle the DMA map/unmap through the 4030 // DMA handler provided. 4031 for virtio_mem_device in self.virtio_mem_devices.iter() { 4032 virtio_mem_device 4033 .lock() 4034 .unwrap() 4035 .add_dma_mapping_handler( 4036 VirtioMemMappingSource::Device(pci_device_bdf.into()), 4037 dma_handler.clone(), 4038 ) 4039 .map_err(DeviceManagerError::AddDmaMappingHandlerVirtioMem)?; 4040 } 4041 4042 // Do not register virtio-mem regions, as they are handled directly by 4043 // virtio-mem devices. 4044 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() { 4045 for region in zone.regions() { 4046 let gpa = region.start_addr().0; 4047 let size = region.len(); 4048 dma_handler 4049 .map(gpa, gpa, size) 4050 .map_err(DeviceManagerError::VirtioDmaMap)?; 4051 } 4052 } 4053 } 4054 } 4055 4056 let device_type = virtio_device.lock().unwrap().device_type(); 4057 let virtio_pci_device = Arc::new(Mutex::new( 4058 VirtioPciDevice::new( 4059 id.clone(), 4060 memory, 4061 virtio_device, 4062 msix_num, 4063 access_platform, 4064 &self.msi_interrupt_manager, 4065 pci_device_bdf.into(), 4066 self.activate_evt 4067 .try_clone() 4068 .map_err(DeviceManagerError::EventFd)?, 4069 // All device types *except* virtio block devices should be allocated a 64-bit bar 4070 // The block devices should be given a 32-bit BAR so that they are easily accessible 4071 // to firmware without requiring excessive identity mapping. 4072 // The exception being if not on the default PCI segment. 4073 pci_segment_id > 0 || device_type != VirtioDeviceType::Block as u32, 4074 dma_handler, 4075 self.pending_activations.clone(), 4076 vm_migration::snapshot_from_id(self.snapshot.as_ref(), id.as_str()), 4077 ) 4078 .map_err(DeviceManagerError::VirtioDevice)?, 4079 )); 4080 4081 let new_resources = self.add_pci_device( 4082 virtio_pci_device.clone(), 4083 virtio_pci_device.clone(), 4084 pci_segment_id, 4085 pci_device_bdf, 4086 resources, 4087 )?; 4088 4089 let bar_addr = virtio_pci_device.lock().unwrap().config_bar_addr(); 4090 for (event, addr) in virtio_pci_device.lock().unwrap().ioeventfds(bar_addr) { 4091 let io_addr = IoEventAddress::Mmio(addr); 4092 self.address_manager 4093 .vm 4094 .register_ioevent(event, &io_addr, None) 4095 .map_err(|e| DeviceManagerError::RegisterIoevent(e.into()))?; 4096 } 4097 4098 // Update the device tree with correct resource information. 4099 node.resources = new_resources; 4100 node.migratable = Some(Arc::clone(&virtio_pci_device) as Arc<Mutex<dyn Migratable>>); 4101 node.pci_bdf = Some(pci_device_bdf); 4102 node.pci_device_handle = Some(PciDeviceHandle::Virtio(virtio_pci_device)); 4103 self.device_tree.lock().unwrap().insert(id, node); 4104 4105 Ok(pci_device_bdf) 4106 } 4107 4108 fn add_pvpanic_device( 4109 &mut self, 4110 ) -> DeviceManagerResult<Option<Arc<Mutex<devices::PvPanicDevice>>>> { 4111 let id = String::from(PVPANIC_DEVICE_NAME); 4112 let pci_segment_id = 0x0_u16; 4113 4114 info!("Creating pvpanic device {}", id); 4115 4116 let (pci_segment_id, pci_device_bdf, resources) = 4117 self.pci_resources(&id, pci_segment_id)?; 4118 4119 let snapshot = snapshot_from_id(self.snapshot.as_ref(), id.as_str()); 4120 4121 let pvpanic_device = devices::PvPanicDevice::new(id.clone(), snapshot) 4122 .map_err(DeviceManagerError::PvPanicCreate)?; 4123 4124 let pvpanic_device = Arc::new(Mutex::new(pvpanic_device)); 4125 4126 let new_resources = self.add_pci_device( 4127 pvpanic_device.clone(), 4128 pvpanic_device.clone(), 4129 pci_segment_id, 4130 pci_device_bdf, 4131 resources, 4132 )?; 4133 4134 let mut node = device_node!(id, pvpanic_device); 4135 4136 node.resources = new_resources; 4137 node.pci_bdf = Some(pci_device_bdf); 4138 node.pci_device_handle = None; 4139 4140 self.device_tree.lock().unwrap().insert(id, node); 4141 4142 Ok(Some(pvpanic_device)) 4143 } 4144 4145 fn pci_resources( 4146 &self, 4147 id: &str, 4148 pci_segment_id: u16, 4149 ) -> DeviceManagerResult<(u16, PciBdf, Option<Vec<Resource>>)> { 4150 // Look for the id in the device tree. If it can be found, that means 4151 // the device is being restored, otherwise it's created from scratch. 4152 let (pci_device_bdf, resources) = 4153 if let Some(node) = self.device_tree.lock().unwrap().get(id) { 4154 info!("Restoring virtio-pci {} resources", id); 4155 let pci_device_bdf: PciBdf = node 4156 .pci_bdf 4157 .ok_or(DeviceManagerError::MissingDeviceNodePciBdf)?; 4158 (Some(pci_device_bdf), Some(node.resources.clone())) 4159 } else { 4160 (None, None) 4161 }; 4162 4163 Ok(if let Some(pci_device_bdf) = pci_device_bdf { 4164 let pci_segment_id = pci_device_bdf.segment(); 4165 4166 self.pci_segments[pci_segment_id as usize] 4167 .pci_bus 4168 .lock() 4169 .unwrap() 4170 .get_device_id(pci_device_bdf.device() as usize) 4171 .map_err(DeviceManagerError::GetPciDeviceId)?; 4172 4173 (pci_segment_id, pci_device_bdf, resources) 4174 } else { 4175 let pci_device_bdf = self.pci_segments[pci_segment_id as usize].next_device_bdf()?; 4176 4177 (pci_segment_id, pci_device_bdf, None) 4178 }) 4179 } 4180 4181 #[cfg(target_arch = "x86_64")] 4182 pub fn io_bus(&self) -> &Arc<Bus> { 4183 &self.address_manager.io_bus 4184 } 4185 4186 pub fn mmio_bus(&self) -> &Arc<Bus> { 4187 &self.address_manager.mmio_bus 4188 } 4189 4190 pub fn allocator(&self) -> &Arc<Mutex<SystemAllocator>> { 4191 &self.address_manager.allocator 4192 } 4193 4194 pub fn interrupt_controller(&self) -> Option<Arc<Mutex<dyn InterruptController>>> { 4195 self.interrupt_controller 4196 .as_ref() 4197 .map(|ic| ic.clone() as Arc<Mutex<dyn InterruptController>>) 4198 } 4199 4200 pub(crate) fn pci_segments(&self) -> &Vec<PciSegment> { 4201 &self.pci_segments 4202 } 4203 4204 #[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))] 4205 pub fn cmdline_additions(&self) -> &[String] { 4206 self.cmdline_additions.as_slice() 4207 } 4208 4209 pub fn update_memory(&self, new_region: &Arc<GuestRegionMmap>) -> DeviceManagerResult<()> { 4210 for handle in self.virtio_devices.iter() { 4211 handle 4212 .virtio_device 4213 .lock() 4214 .unwrap() 4215 .add_memory_region(new_region) 4216 .map_err(DeviceManagerError::UpdateMemoryForVirtioDevice)?; 4217 4218 if let Some(dma_handler) = &handle.dma_handler { 4219 if !handle.iommu { 4220 let gpa = new_region.start_addr().0; 4221 let size = new_region.len(); 4222 dma_handler 4223 .map(gpa, gpa, size) 4224 .map_err(DeviceManagerError::VirtioDmaMap)?; 4225 } 4226 } 4227 } 4228 4229 // Take care of updating the memory for VFIO PCI devices. 4230 if let Some(vfio_container) = &self.vfio_container { 4231 vfio_container 4232 .vfio_dma_map( 4233 new_region.start_addr().raw_value(), 4234 new_region.len(), 4235 new_region.as_ptr() as u64, 4236 ) 4237 .map_err(DeviceManagerError::UpdateMemoryForVfioPciDevice)?; 4238 } 4239 4240 // Take care of updating the memory for vfio-user devices. 4241 { 4242 let device_tree = self.device_tree.lock().unwrap(); 4243 for pci_device_node in device_tree.pci_devices() { 4244 if let PciDeviceHandle::VfioUser(vfio_user_pci_device) = pci_device_node 4245 .pci_device_handle 4246 .as_ref() 4247 .ok_or(DeviceManagerError::MissingPciDevice)? 4248 { 4249 vfio_user_pci_device 4250 .lock() 4251 .unwrap() 4252 .dma_map(new_region) 4253 .map_err(DeviceManagerError::UpdateMemoryForVfioUserPciDevice)?; 4254 } 4255 } 4256 } 4257 4258 Ok(()) 4259 } 4260 4261 pub fn activate_virtio_devices(&self) -> DeviceManagerResult<()> { 4262 for mut activator in self.pending_activations.lock().unwrap().drain(..) { 4263 activator 4264 .activate() 4265 .map_err(DeviceManagerError::VirtioActivate)?; 4266 } 4267 Ok(()) 4268 } 4269 4270 pub fn notify_hotplug( 4271 &self, 4272 _notification_type: AcpiNotificationFlags, 4273 ) -> DeviceManagerResult<()> { 4274 return self 4275 .ged_notification_device 4276 .as_ref() 4277 .unwrap() 4278 .lock() 4279 .unwrap() 4280 .notify(_notification_type) 4281 .map_err(DeviceManagerError::HotPlugNotification); 4282 } 4283 4284 pub fn add_device( 4285 &mut self, 4286 device_cfg: &mut DeviceConfig, 4287 ) -> DeviceManagerResult<PciDeviceInfo> { 4288 self.validate_identifier(&device_cfg.id)?; 4289 4290 if device_cfg.iommu && !self.is_iommu_segment(device_cfg.pci_segment) { 4291 return Err(DeviceManagerError::InvalidIommuHotplug); 4292 } 4293 4294 let (bdf, device_name) = self.add_passthrough_device(device_cfg)?; 4295 4296 // Update the PCIU bitmap 4297 self.pci_segments[device_cfg.pci_segment as usize].pci_devices_up |= 1 << bdf.device(); 4298 4299 Ok(PciDeviceInfo { 4300 id: device_name, 4301 bdf, 4302 }) 4303 } 4304 4305 pub fn add_user_device( 4306 &mut self, 4307 device_cfg: &mut UserDeviceConfig, 4308 ) -> DeviceManagerResult<PciDeviceInfo> { 4309 self.validate_identifier(&device_cfg.id)?; 4310 4311 let (bdf, device_name) = self.add_vfio_user_device(device_cfg)?; 4312 4313 // Update the PCIU bitmap 4314 self.pci_segments[device_cfg.pci_segment as usize].pci_devices_up |= 1 << bdf.device(); 4315 4316 Ok(PciDeviceInfo { 4317 id: device_name, 4318 bdf, 4319 }) 4320 } 4321 4322 pub fn remove_device(&mut self, id: String) -> DeviceManagerResult<()> { 4323 // The node can be directly a PCI node in case the 'id' refers to a 4324 // VFIO device or a virtio-pci one. 4325 // In case the 'id' refers to a virtio device, we must find the PCI 4326 // node by looking at the parent. 4327 let device_tree = self.device_tree.lock().unwrap(); 4328 let node = device_tree 4329 .get(&id) 4330 .ok_or(DeviceManagerError::UnknownDeviceId(id.clone()))?; 4331 4332 // Release advisory locks by dropping all references. 4333 // Linux automatically releases all locks of that file if the last open FD is closed. 4334 { 4335 let maybe_block_device_index = self 4336 .block_devices 4337 .iter() 4338 .enumerate() 4339 .find(|(_, dev)| { 4340 let dev = dev.lock().unwrap(); 4341 dev.id() == id 4342 }) 4343 .map(|(i, _)| i); 4344 if let Some(index) = maybe_block_device_index { 4345 let _ = self.block_devices.swap_remove(index); 4346 } 4347 } 4348 4349 let pci_device_node = if node.pci_bdf.is_some() && node.pci_device_handle.is_some() { 4350 node 4351 } else { 4352 let parent = node 4353 .parent 4354 .as_ref() 4355 .ok_or(DeviceManagerError::MissingNode)?; 4356 device_tree 4357 .get(parent) 4358 .ok_or(DeviceManagerError::MissingNode)? 4359 }; 4360 4361 let pci_device_bdf: PciBdf = pci_device_node 4362 .pci_bdf 4363 .ok_or(DeviceManagerError::MissingDeviceNodePciBdf)?; 4364 let pci_segment_id = pci_device_bdf.segment(); 4365 4366 let pci_device_handle = pci_device_node 4367 .pci_device_handle 4368 .as_ref() 4369 .ok_or(DeviceManagerError::MissingPciDevice)?; 4370 #[allow(irrefutable_let_patterns)] 4371 if let PciDeviceHandle::Virtio(virtio_pci_device) = pci_device_handle { 4372 let device_type = VirtioDeviceType::from( 4373 virtio_pci_device 4374 .lock() 4375 .unwrap() 4376 .virtio_device() 4377 .lock() 4378 .unwrap() 4379 .device_type(), 4380 ); 4381 match device_type { 4382 VirtioDeviceType::Net 4383 | VirtioDeviceType::Block 4384 | VirtioDeviceType::Pmem 4385 | VirtioDeviceType::Fs 4386 | VirtioDeviceType::Vsock => {} 4387 _ => return Err(DeviceManagerError::RemovalNotAllowed(device_type)), 4388 } 4389 } 4390 4391 // Update the PCID bitmap 4392 self.pci_segments[pci_segment_id as usize].pci_devices_down |= 1 << pci_device_bdf.device(); 4393 4394 Ok(()) 4395 } 4396 4397 pub fn eject_device(&mut self, pci_segment_id: u16, device_id: u8) -> DeviceManagerResult<()> { 4398 info!( 4399 "Ejecting device_id = {} on segment_id={}", 4400 device_id, pci_segment_id 4401 ); 4402 4403 // Convert the device ID into the corresponding b/d/f. 4404 let pci_device_bdf = PciBdf::new(pci_segment_id, 0, device_id, 0); 4405 4406 // Give the PCI device ID back to the PCI bus. 4407 self.pci_segments[pci_segment_id as usize] 4408 .pci_bus 4409 .lock() 4410 .unwrap() 4411 .put_device_id(device_id as usize) 4412 .map_err(DeviceManagerError::PutPciDeviceId)?; 4413 4414 let (pci_device_handle, id) = { 4415 // Remove the device from the device tree along with its children. 4416 let mut device_tree = self.device_tree.lock().unwrap(); 4417 let pci_device_node = device_tree 4418 .remove_node_by_pci_bdf(pci_device_bdf) 4419 .ok_or(DeviceManagerError::MissingPciDevice)?; 4420 4421 // For VFIO and vfio-user the PCI device id is the id. 4422 // For virtio we overwrite it later as we want the id of the 4423 // underlying device. 4424 let mut id = pci_device_node.id; 4425 let pci_device_handle = pci_device_node 4426 .pci_device_handle 4427 .ok_or(DeviceManagerError::MissingPciDevice)?; 4428 if matches!(pci_device_handle, PciDeviceHandle::Virtio(_)) { 4429 // The virtio-pci device has a single child 4430 if !pci_device_node.children.is_empty() { 4431 assert_eq!(pci_device_node.children.len(), 1); 4432 let child_id = &pci_device_node.children[0]; 4433 id.clone_from(child_id); 4434 } 4435 } 4436 for child in pci_device_node.children.iter() { 4437 device_tree.remove(child); 4438 } 4439 4440 (pci_device_handle, id) 4441 }; 4442 4443 let mut iommu_attached = false; 4444 if let Some((_, iommu_attached_devices)) = &self.iommu_attached_devices { 4445 if iommu_attached_devices.contains(&pci_device_bdf) { 4446 iommu_attached = true; 4447 } 4448 } 4449 4450 let (pci_device, bus_device, virtio_device, remove_dma_handler) = match pci_device_handle { 4451 // No need to remove any virtio-mem mapping here as the container outlives all devices 4452 PciDeviceHandle::Vfio(vfio_pci_device) => { 4453 for mmio_region in vfio_pci_device.lock().unwrap().mmio_regions() { 4454 self.mmio_regions 4455 .lock() 4456 .unwrap() 4457 .retain(|x| x.start != mmio_region.start) 4458 } 4459 4460 ( 4461 Arc::clone(&vfio_pci_device) as Arc<Mutex<dyn PciDevice>>, 4462 Arc::clone(&vfio_pci_device) as Arc<dyn BusDeviceSync>, 4463 None as Option<Arc<Mutex<dyn virtio_devices::VirtioDevice>>>, 4464 false, 4465 ) 4466 } 4467 PciDeviceHandle::Virtio(virtio_pci_device) => { 4468 let dev = virtio_pci_device.lock().unwrap(); 4469 let bar_addr = dev.config_bar_addr(); 4470 for (event, addr) in dev.ioeventfds(bar_addr) { 4471 let io_addr = IoEventAddress::Mmio(addr); 4472 self.address_manager 4473 .vm 4474 .unregister_ioevent(event, &io_addr) 4475 .map_err(|e| DeviceManagerError::UnRegisterIoevent(e.into()))?; 4476 } 4477 4478 if let Some(dma_handler) = dev.dma_handler() { 4479 if !iommu_attached { 4480 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() { 4481 for region in zone.regions() { 4482 let iova = region.start_addr().0; 4483 let size = region.len(); 4484 dma_handler 4485 .unmap(iova, size) 4486 .map_err(DeviceManagerError::VirtioDmaUnmap)?; 4487 } 4488 } 4489 } 4490 } 4491 4492 ( 4493 Arc::clone(&virtio_pci_device) as Arc<Mutex<dyn PciDevice>>, 4494 Arc::clone(&virtio_pci_device) as Arc<dyn BusDeviceSync>, 4495 Some(dev.virtio_device()), 4496 dev.dma_handler().is_some() && !iommu_attached, 4497 ) 4498 } 4499 PciDeviceHandle::VfioUser(vfio_user_pci_device) => { 4500 let mut dev = vfio_user_pci_device.lock().unwrap(); 4501 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() { 4502 for region in zone.regions() { 4503 dev.dma_unmap(region) 4504 .map_err(DeviceManagerError::VfioUserDmaUnmap)?; 4505 } 4506 } 4507 4508 ( 4509 Arc::clone(&vfio_user_pci_device) as Arc<Mutex<dyn PciDevice>>, 4510 Arc::clone(&vfio_user_pci_device) as Arc<dyn BusDeviceSync>, 4511 None as Option<Arc<Mutex<dyn virtio_devices::VirtioDevice>>>, 4512 true, 4513 ) 4514 } 4515 }; 4516 4517 if remove_dma_handler { 4518 for virtio_mem_device in self.virtio_mem_devices.iter() { 4519 virtio_mem_device 4520 .lock() 4521 .unwrap() 4522 .remove_dma_mapping_handler(VirtioMemMappingSource::Device( 4523 pci_device_bdf.into(), 4524 )) 4525 .map_err(DeviceManagerError::RemoveDmaMappingHandlerVirtioMem)?; 4526 } 4527 } 4528 4529 // Free the allocated BARs 4530 pci_device 4531 .lock() 4532 .unwrap() 4533 .free_bars( 4534 &mut self.address_manager.allocator.lock().unwrap(), 4535 &mut self.pci_segments[pci_segment_id as usize] 4536 .mem32_allocator 4537 .lock() 4538 .unwrap(), 4539 &mut self.pci_segments[pci_segment_id as usize] 4540 .mem64_allocator 4541 .lock() 4542 .unwrap(), 4543 ) 4544 .map_err(DeviceManagerError::FreePciBars)?; 4545 4546 // Remove the device from the PCI bus 4547 self.pci_segments[pci_segment_id as usize] 4548 .pci_bus 4549 .lock() 4550 .unwrap() 4551 .remove_by_device(&pci_device) 4552 .map_err(DeviceManagerError::RemoveDeviceFromPciBus)?; 4553 4554 #[cfg(target_arch = "x86_64")] 4555 // Remove the device from the IO bus 4556 self.io_bus() 4557 .remove_by_device(&bus_device) 4558 .map_err(DeviceManagerError::RemoveDeviceFromIoBus)?; 4559 4560 // Remove the device from the MMIO bus 4561 self.mmio_bus() 4562 .remove_by_device(&bus_device) 4563 .map_err(DeviceManagerError::RemoveDeviceFromMmioBus)?; 4564 4565 // Remove the device from the list of BusDevice held by the 4566 // DeviceManager. 4567 self.bus_devices 4568 .retain(|dev| !Arc::ptr_eq(dev, &bus_device)); 4569 4570 // Shutdown and remove the underlying virtio-device if present 4571 if let Some(virtio_device) = virtio_device { 4572 for mapping in virtio_device.lock().unwrap().userspace_mappings() { 4573 self.memory_manager 4574 .lock() 4575 .unwrap() 4576 .remove_userspace_mapping( 4577 mapping.addr.raw_value(), 4578 mapping.len, 4579 mapping.host_addr, 4580 mapping.mergeable, 4581 mapping.mem_slot, 4582 ) 4583 .map_err(DeviceManagerError::MemoryManager)?; 4584 } 4585 4586 virtio_device.lock().unwrap().shutdown(); 4587 4588 self.virtio_devices 4589 .retain(|handler| !Arc::ptr_eq(&handler.virtio_device, &virtio_device)); 4590 } 4591 4592 event!( 4593 "vm", 4594 "device-removed", 4595 "id", 4596 &id, 4597 "bdf", 4598 pci_device_bdf.to_string() 4599 ); 4600 4601 // At this point, the device has been removed from all the list and 4602 // buses where it was stored. At the end of this function, after 4603 // any_device, bus_device and pci_device are released, the actual 4604 // device will be dropped. 4605 Ok(()) 4606 } 4607 4608 fn hotplug_virtio_pci_device( 4609 &mut self, 4610 handle: MetaVirtioDevice, 4611 ) -> DeviceManagerResult<PciDeviceInfo> { 4612 // Add the virtio device to the device manager list. This is important 4613 // as the list is used to notify virtio devices about memory updates 4614 // for instance. 4615 self.virtio_devices.push(handle.clone()); 4616 4617 let mapping: Option<Arc<IommuMapping>> = if handle.iommu { 4618 self.iommu_mapping.clone() 4619 } else { 4620 None 4621 }; 4622 4623 let bdf = self.add_virtio_pci_device( 4624 handle.virtio_device, 4625 &mapping, 4626 handle.id.clone(), 4627 handle.pci_segment, 4628 handle.dma_handler, 4629 )?; 4630 4631 // Update the PCIU bitmap 4632 self.pci_segments[handle.pci_segment as usize].pci_devices_up |= 1 << bdf.device(); 4633 4634 Ok(PciDeviceInfo { id: handle.id, bdf }) 4635 } 4636 4637 fn is_iommu_segment(&self, pci_segment_id: u16) -> bool { 4638 self.config 4639 .lock() 4640 .as_ref() 4641 .unwrap() 4642 .platform 4643 .as_ref() 4644 .map(|pc| { 4645 pc.iommu_segments 4646 .as_ref() 4647 .map(|v| v.contains(&pci_segment_id)) 4648 .unwrap_or_default() 4649 }) 4650 .unwrap_or_default() 4651 } 4652 4653 pub fn add_disk(&mut self, disk_cfg: &mut DiskConfig) -> DeviceManagerResult<PciDeviceInfo> { 4654 self.validate_identifier(&disk_cfg.id)?; 4655 4656 if disk_cfg.iommu && !self.is_iommu_segment(disk_cfg.pci_segment) { 4657 return Err(DeviceManagerError::InvalidIommuHotplug); 4658 } 4659 4660 let device = self.make_virtio_block_device(disk_cfg, true)?; 4661 self.hotplug_virtio_pci_device(device) 4662 } 4663 4664 pub fn add_fs(&mut self, fs_cfg: &mut FsConfig) -> DeviceManagerResult<PciDeviceInfo> { 4665 self.validate_identifier(&fs_cfg.id)?; 4666 4667 let device = self.make_virtio_fs_device(fs_cfg)?; 4668 self.hotplug_virtio_pci_device(device) 4669 } 4670 4671 pub fn add_pmem(&mut self, pmem_cfg: &mut PmemConfig) -> DeviceManagerResult<PciDeviceInfo> { 4672 self.validate_identifier(&pmem_cfg.id)?; 4673 4674 if pmem_cfg.iommu && !self.is_iommu_segment(pmem_cfg.pci_segment) { 4675 return Err(DeviceManagerError::InvalidIommuHotplug); 4676 } 4677 4678 let device = self.make_virtio_pmem_device(pmem_cfg)?; 4679 self.hotplug_virtio_pci_device(device) 4680 } 4681 4682 pub fn add_net(&mut self, net_cfg: &mut NetConfig) -> DeviceManagerResult<PciDeviceInfo> { 4683 self.validate_identifier(&net_cfg.id)?; 4684 4685 if net_cfg.iommu && !self.is_iommu_segment(net_cfg.pci_segment) { 4686 return Err(DeviceManagerError::InvalidIommuHotplug); 4687 } 4688 4689 let device = self.make_virtio_net_device(net_cfg)?; 4690 self.hotplug_virtio_pci_device(device) 4691 } 4692 4693 pub fn add_vdpa(&mut self, vdpa_cfg: &mut VdpaConfig) -> DeviceManagerResult<PciDeviceInfo> { 4694 self.validate_identifier(&vdpa_cfg.id)?; 4695 4696 if vdpa_cfg.iommu && !self.is_iommu_segment(vdpa_cfg.pci_segment) { 4697 return Err(DeviceManagerError::InvalidIommuHotplug); 4698 } 4699 4700 let device = self.make_vdpa_device(vdpa_cfg)?; 4701 self.hotplug_virtio_pci_device(device) 4702 } 4703 4704 pub fn add_vsock(&mut self, vsock_cfg: &mut VsockConfig) -> DeviceManagerResult<PciDeviceInfo> { 4705 self.validate_identifier(&vsock_cfg.id)?; 4706 4707 if vsock_cfg.iommu && !self.is_iommu_segment(vsock_cfg.pci_segment) { 4708 return Err(DeviceManagerError::InvalidIommuHotplug); 4709 } 4710 4711 let device = self.make_virtio_vsock_device(vsock_cfg)?; 4712 self.hotplug_virtio_pci_device(device) 4713 } 4714 4715 pub fn counters(&self) -> HashMap<String, HashMap<&'static str, Wrapping<u64>>> { 4716 let mut counters = HashMap::new(); 4717 4718 for handle in &self.virtio_devices { 4719 let virtio_device = handle.virtio_device.lock().unwrap(); 4720 if let Some(device_counters) = virtio_device.counters() { 4721 counters.insert(handle.id.clone(), device_counters.clone()); 4722 } 4723 } 4724 4725 counters 4726 } 4727 4728 pub fn resize_balloon(&mut self, size: u64) -> DeviceManagerResult<()> { 4729 if let Some(balloon) = &self.balloon { 4730 return balloon 4731 .lock() 4732 .unwrap() 4733 .resize(size) 4734 .map_err(DeviceManagerError::VirtioBalloonResize); 4735 } 4736 4737 warn!("No balloon setup: Can't resize the balloon"); 4738 Err(DeviceManagerError::MissingVirtioBalloon) 4739 } 4740 4741 pub fn balloon_size(&self) -> u64 { 4742 if let Some(balloon) = &self.balloon { 4743 return balloon.lock().unwrap().get_actual(); 4744 } 4745 4746 0 4747 } 4748 4749 pub fn device_tree(&self) -> Arc<Mutex<DeviceTree>> { 4750 self.device_tree.clone() 4751 } 4752 4753 #[cfg(target_arch = "x86_64")] 4754 pub fn notify_power_button(&self) -> DeviceManagerResult<()> { 4755 self.ged_notification_device 4756 .as_ref() 4757 .unwrap() 4758 .lock() 4759 .unwrap() 4760 .notify(AcpiNotificationFlags::POWER_BUTTON_CHANGED) 4761 .map_err(DeviceManagerError::PowerButtonNotification) 4762 } 4763 4764 #[cfg(target_arch = "aarch64")] 4765 pub fn notify_power_button(&self) -> DeviceManagerResult<()> { 4766 // There are two use cases: 4767 // 1. Users will use direct kernel boot with device tree. 4768 // 2. Users will use ACPI+UEFI boot. 4769 4770 // Trigger a GPIO pin 3 event to satisfy use case 1. 4771 self.gpio_device 4772 .as_ref() 4773 .unwrap() 4774 .lock() 4775 .unwrap() 4776 .trigger_key(3) 4777 .map_err(DeviceManagerError::AArch64PowerButtonNotification)?; 4778 // Trigger a GED power button event to satisfy use case 2. 4779 return self 4780 .ged_notification_device 4781 .as_ref() 4782 .unwrap() 4783 .lock() 4784 .unwrap() 4785 .notify(AcpiNotificationFlags::POWER_BUTTON_CHANGED) 4786 .map_err(DeviceManagerError::PowerButtonNotification); 4787 } 4788 4789 pub fn iommu_attached_devices(&self) -> &Option<(PciBdf, Vec<PciBdf>)> { 4790 &self.iommu_attached_devices 4791 } 4792 4793 fn validate_identifier(&self, id: &Option<String>) -> DeviceManagerResult<()> { 4794 if let Some(id) = id { 4795 if id.starts_with("__") { 4796 return Err(DeviceManagerError::InvalidIdentifier(id.clone())); 4797 } 4798 4799 if self.device_tree.lock().unwrap().contains_key(id) { 4800 return Err(DeviceManagerError::IdentifierNotUnique(id.clone())); 4801 } 4802 } 4803 4804 Ok(()) 4805 } 4806 4807 #[cfg(not(target_arch = "riscv64"))] 4808 pub(crate) fn acpi_platform_addresses(&self) -> &AcpiPlatformAddresses { 4809 &self.acpi_platform_addresses 4810 } 4811 } 4812 4813 fn numa_node_id_from_memory_zone_id(numa_nodes: &NumaNodes, memory_zone_id: &str) -> Option<u32> { 4814 for (numa_node_id, numa_node) in numa_nodes.iter() { 4815 if numa_node.memory_zones.contains(&memory_zone_id.to_owned()) { 4816 return Some(*numa_node_id); 4817 } 4818 } 4819 4820 None 4821 } 4822 4823 fn numa_node_id_from_pci_segment_id(numa_nodes: &NumaNodes, pci_segment_id: u16) -> u32 { 4824 for (numa_node_id, numa_node) in numa_nodes.iter() { 4825 if numa_node.pci_segments.contains(&pci_segment_id) { 4826 return *numa_node_id; 4827 } 4828 } 4829 4830 0 4831 } 4832 4833 #[cfg(not(target_arch = "riscv64"))] 4834 struct TpmDevice {} 4835 4836 #[cfg(not(target_arch = "riscv64"))] 4837 impl Aml for TpmDevice { 4838 fn to_aml_bytes(&self, sink: &mut dyn acpi_tables::AmlSink) { 4839 aml::Device::new( 4840 "TPM2".into(), 4841 vec![ 4842 &aml::Name::new("_HID".into(), &"MSFT0101"), 4843 &aml::Name::new("_STA".into(), &(0xF_usize)), 4844 &aml::Name::new( 4845 "_CRS".into(), 4846 &aml::ResourceTemplate::new(vec![&aml::Memory32Fixed::new( 4847 true, 4848 layout::TPM_START.0 as u32, 4849 layout::TPM_SIZE as u32, 4850 )]), 4851 ), 4852 ], 4853 ) 4854 .to_aml_bytes(sink) 4855 } 4856 } 4857 4858 #[cfg(not(target_arch = "riscv64"))] 4859 impl Aml for DeviceManager { 4860 fn to_aml_bytes(&self, sink: &mut dyn acpi_tables::AmlSink) { 4861 #[cfg(target_arch = "aarch64")] 4862 use arch::aarch64::DeviceInfoForFdt; 4863 4864 let mut pci_scan_methods = Vec::new(); 4865 for i in 0..self.pci_segments.len() { 4866 pci_scan_methods.push(aml::MethodCall::new( 4867 format!("\\_SB_.PC{i:02X}.PCNT").as_str().into(), 4868 vec![], 4869 )); 4870 } 4871 let mut pci_scan_inner: Vec<&dyn Aml> = Vec::new(); 4872 for method in &pci_scan_methods { 4873 pci_scan_inner.push(method) 4874 } 4875 4876 // PCI hotplug controller 4877 aml::Device::new( 4878 "_SB_.PHPR".into(), 4879 vec![ 4880 &aml::Name::new("_HID".into(), &aml::EISAName::new("PNP0A06")), 4881 &aml::Name::new("_STA".into(), &0x0bu8), 4882 &aml::Name::new("_UID".into(), &"PCI Hotplug Controller"), 4883 &aml::Mutex::new("BLCK".into(), 0), 4884 &aml::Name::new( 4885 "_CRS".into(), 4886 &aml::ResourceTemplate::new(vec![&aml::AddressSpace::new_memory( 4887 aml::AddressSpaceCacheable::NotCacheable, 4888 true, 4889 self.acpi_address.0, 4890 self.acpi_address.0 + DEVICE_MANAGER_ACPI_SIZE as u64 - 1, 4891 None, 4892 )]), 4893 ), 4894 // OpRegion and Fields map MMIO range into individual field values 4895 &aml::OpRegion::new( 4896 "PCST".into(), 4897 aml::OpRegionSpace::SystemMemory, 4898 &(self.acpi_address.0 as usize), 4899 &DEVICE_MANAGER_ACPI_SIZE, 4900 ), 4901 &aml::Field::new( 4902 "PCST".into(), 4903 aml::FieldAccessType::DWord, 4904 aml::FieldLockRule::NoLock, 4905 aml::FieldUpdateRule::WriteAsZeroes, 4906 vec![ 4907 aml::FieldEntry::Named(*b"PCIU", 32), 4908 aml::FieldEntry::Named(*b"PCID", 32), 4909 aml::FieldEntry::Named(*b"B0EJ", 32), 4910 aml::FieldEntry::Named(*b"PSEG", 32), 4911 ], 4912 ), 4913 &aml::Method::new( 4914 "PCEJ".into(), 4915 2, 4916 true, 4917 vec![ 4918 // Take lock defined above 4919 &aml::Acquire::new("BLCK".into(), 0xffff), 4920 // Choose the current segment 4921 &aml::Store::new(&aml::Path::new("PSEG"), &aml::Arg(1)), 4922 // Write PCI bus number (in first argument) to I/O port via field 4923 &aml::ShiftLeft::new(&aml::Path::new("B0EJ"), &aml::ONE, &aml::Arg(0)), 4924 // Release lock 4925 &aml::Release::new("BLCK".into()), 4926 // Return 0 4927 &aml::Return::new(&aml::ZERO), 4928 ], 4929 ), 4930 &aml::Method::new("PSCN".into(), 0, true, pci_scan_inner), 4931 ], 4932 ) 4933 .to_aml_bytes(sink); 4934 4935 for segment in &self.pci_segments { 4936 segment.to_aml_bytes(sink); 4937 } 4938 4939 let mut mbrd_memory = Vec::new(); 4940 4941 for segment in &self.pci_segments { 4942 mbrd_memory.push(aml::Memory32Fixed::new( 4943 true, 4944 segment.mmio_config_address as u32, 4945 layout::PCI_MMIO_CONFIG_SIZE_PER_SEGMENT as u32, 4946 )) 4947 } 4948 4949 let mut mbrd_memory_refs = Vec::new(); 4950 for mbrd_memory_ref in &mbrd_memory { 4951 mbrd_memory_refs.push(mbrd_memory_ref as &dyn Aml); 4952 } 4953 4954 aml::Device::new( 4955 "_SB_.MBRD".into(), 4956 vec![ 4957 &aml::Name::new("_HID".into(), &aml::EISAName::new("PNP0C02")), 4958 &aml::Name::new("_UID".into(), &aml::ZERO), 4959 &aml::Name::new("_CRS".into(), &aml::ResourceTemplate::new(mbrd_memory_refs)), 4960 ], 4961 ) 4962 .to_aml_bytes(sink); 4963 4964 // Serial device 4965 #[cfg(target_arch = "x86_64")] 4966 let serial_irq = 4; 4967 #[cfg(target_arch = "aarch64")] 4968 let serial_irq = 4969 if self.config.lock().unwrap().serial.clone().mode != ConsoleOutputMode::Off { 4970 self.get_device_info() 4971 .clone() 4972 .get(&(DeviceType::Serial, DeviceType::Serial.to_string())) 4973 .unwrap() 4974 .irq() 4975 } else { 4976 // If serial is turned off, add a fake device with invalid irq. 4977 31 4978 }; 4979 if self.config.lock().unwrap().serial.mode != ConsoleOutputMode::Off { 4980 aml::Device::new( 4981 "_SB_.COM1".into(), 4982 vec![ 4983 &aml::Name::new( 4984 "_HID".into(), 4985 #[cfg(target_arch = "x86_64")] 4986 &aml::EISAName::new("PNP0501"), 4987 #[cfg(target_arch = "aarch64")] 4988 &"ARMH0011", 4989 ), 4990 &aml::Name::new("_UID".into(), &aml::ZERO), 4991 &aml::Name::new("_DDN".into(), &"COM1"), 4992 &aml::Name::new( 4993 "_CRS".into(), 4994 &aml::ResourceTemplate::new(vec![ 4995 &aml::Interrupt::new(true, true, false, false, serial_irq), 4996 #[cfg(target_arch = "x86_64")] 4997 &aml::IO::new(0x3f8, 0x3f8, 0, 0x8), 4998 #[cfg(target_arch = "aarch64")] 4999 &aml::Memory32Fixed::new( 5000 true, 5001 arch::layout::LEGACY_SERIAL_MAPPED_IO_START.raw_value() as u32, 5002 MMIO_LEN as u32, 5003 ), 5004 ]), 5005 ), 5006 ], 5007 ) 5008 .to_aml_bytes(sink); 5009 } 5010 5011 aml::Name::new("_S5_".into(), &aml::Package::new(vec![&5u8])).to_aml_bytes(sink); 5012 5013 aml::Device::new( 5014 "_SB_.PWRB".into(), 5015 vec![ 5016 &aml::Name::new("_HID".into(), &aml::EISAName::new("PNP0C0C")), 5017 &aml::Name::new("_UID".into(), &aml::ZERO), 5018 ], 5019 ) 5020 .to_aml_bytes(sink); 5021 5022 if self.config.lock().unwrap().tpm.is_some() { 5023 // Add tpm device 5024 TpmDevice {}.to_aml_bytes(sink); 5025 } 5026 5027 self.ged_notification_device 5028 .as_ref() 5029 .unwrap() 5030 .lock() 5031 .unwrap() 5032 .to_aml_bytes(sink) 5033 } 5034 } 5035 5036 impl Pausable for DeviceManager { 5037 fn pause(&mut self) -> result::Result<(), MigratableError> { 5038 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 5039 if let Some(migratable) = &device_node.migratable { 5040 migratable.lock().unwrap().pause()?; 5041 } 5042 } 5043 // On AArch64, the pause of device manager needs to trigger 5044 // a "pause" of GIC, which will flush the GIC pending tables 5045 // and ITS tables to guest RAM. 5046 #[cfg(target_arch = "aarch64")] 5047 { 5048 self.get_interrupt_controller() 5049 .unwrap() 5050 .lock() 5051 .unwrap() 5052 .pause()?; 5053 }; 5054 5055 Ok(()) 5056 } 5057 5058 fn resume(&mut self) -> result::Result<(), MigratableError> { 5059 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 5060 if let Some(migratable) = &device_node.migratable { 5061 migratable.lock().unwrap().resume()?; 5062 } 5063 } 5064 Ok(()) 5065 } 5066 } 5067 5068 impl Snapshottable for DeviceManager { 5069 fn id(&self) -> String { 5070 DEVICE_MANAGER_SNAPSHOT_ID.to_string() 5071 } 5072 5073 fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> { 5074 let mut snapshot = Snapshot::from_data(SnapshotData::new_from_state(&self.state())?); 5075 5076 // We aggregate all devices snapshots. 5077 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 5078 if let Some(migratable) = &device_node.migratable { 5079 let mut migratable = migratable.lock().unwrap(); 5080 snapshot.add_snapshot(migratable.id(), migratable.snapshot()?); 5081 } 5082 } 5083 5084 Ok(snapshot) 5085 } 5086 } 5087 5088 impl Transportable for DeviceManager {} 5089 5090 impl Migratable for DeviceManager { 5091 fn start_dirty_log(&mut self) -> std::result::Result<(), MigratableError> { 5092 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 5093 if let Some(migratable) = &device_node.migratable { 5094 migratable.lock().unwrap().start_dirty_log()?; 5095 } 5096 } 5097 Ok(()) 5098 } 5099 5100 fn stop_dirty_log(&mut self) -> std::result::Result<(), MigratableError> { 5101 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 5102 if let Some(migratable) = &device_node.migratable { 5103 migratable.lock().unwrap().stop_dirty_log()?; 5104 } 5105 } 5106 Ok(()) 5107 } 5108 5109 fn dirty_log(&mut self) -> std::result::Result<MemoryRangeTable, MigratableError> { 5110 let mut tables = Vec::new(); 5111 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 5112 if let Some(migratable) = &device_node.migratable { 5113 tables.push(migratable.lock().unwrap().dirty_log()?); 5114 } 5115 } 5116 Ok(MemoryRangeTable::new_from_tables(tables)) 5117 } 5118 5119 fn start_migration(&mut self) -> std::result::Result<(), MigratableError> { 5120 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 5121 if let Some(migratable) = &device_node.migratable { 5122 migratable.lock().unwrap().start_migration()?; 5123 } 5124 } 5125 Ok(()) 5126 } 5127 5128 fn complete_migration(&mut self) -> std::result::Result<(), MigratableError> { 5129 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 5130 if let Some(migratable) = &device_node.migratable { 5131 migratable.lock().unwrap().complete_migration()?; 5132 } 5133 } 5134 Ok(()) 5135 } 5136 } 5137 5138 const PCIU_FIELD_OFFSET: u64 = 0; 5139 const PCID_FIELD_OFFSET: u64 = 4; 5140 const B0EJ_FIELD_OFFSET: u64 = 8; 5141 const PSEG_FIELD_OFFSET: u64 = 12; 5142 const PCIU_FIELD_SIZE: usize = 4; 5143 const PCID_FIELD_SIZE: usize = 4; 5144 const B0EJ_FIELD_SIZE: usize = 4; 5145 const PSEG_FIELD_SIZE: usize = 4; 5146 5147 impl BusDevice for DeviceManager { 5148 fn read(&mut self, base: u64, offset: u64, data: &mut [u8]) { 5149 match offset { 5150 PCIU_FIELD_OFFSET => { 5151 assert!(data.len() == PCIU_FIELD_SIZE); 5152 data.copy_from_slice( 5153 &self.pci_segments[self.selected_segment] 5154 .pci_devices_up 5155 .to_le_bytes(), 5156 ); 5157 // Clear the PCIU bitmap 5158 self.pci_segments[self.selected_segment].pci_devices_up = 0; 5159 } 5160 PCID_FIELD_OFFSET => { 5161 assert!(data.len() == PCID_FIELD_SIZE); 5162 data.copy_from_slice( 5163 &self.pci_segments[self.selected_segment] 5164 .pci_devices_down 5165 .to_le_bytes(), 5166 ); 5167 // Clear the PCID bitmap 5168 self.pci_segments[self.selected_segment].pci_devices_down = 0; 5169 } 5170 B0EJ_FIELD_OFFSET => { 5171 assert!(data.len() == B0EJ_FIELD_SIZE); 5172 // Always return an empty bitmap since the eject is always 5173 // taken care of right away during a write access. 5174 data.fill(0); 5175 } 5176 PSEG_FIELD_OFFSET => { 5177 assert_eq!(data.len(), PSEG_FIELD_SIZE); 5178 data.copy_from_slice(&(self.selected_segment as u32).to_le_bytes()); 5179 } 5180 _ => error!( 5181 "Accessing unknown location at base 0x{:x}, offset 0x{:x}", 5182 base, offset 5183 ), 5184 } 5185 5186 debug!( 5187 "PCI_HP_REG_R: base 0x{:x}, offset 0x{:x}, data {:?}", 5188 base, offset, data 5189 ) 5190 } 5191 5192 fn write(&mut self, base: u64, offset: u64, data: &[u8]) -> Option<Arc<std::sync::Barrier>> { 5193 match offset { 5194 B0EJ_FIELD_OFFSET => { 5195 assert!(data.len() == B0EJ_FIELD_SIZE); 5196 let mut data_array: [u8; 4] = [0, 0, 0, 0]; 5197 data_array.copy_from_slice(data); 5198 let mut slot_bitmap = u32::from_le_bytes(data_array); 5199 5200 while slot_bitmap > 0 { 5201 let slot_id = slot_bitmap.trailing_zeros(); 5202 if let Err(e) = self.eject_device(self.selected_segment as u16, slot_id as u8) { 5203 error!("Failed ejecting device {}: {:?}", slot_id, e); 5204 } 5205 slot_bitmap &= !(1 << slot_id); 5206 } 5207 } 5208 PSEG_FIELD_OFFSET => { 5209 assert_eq!(data.len(), PSEG_FIELD_SIZE); 5210 let mut data_array: [u8; 4] = [0, 0, 0, 0]; 5211 data_array.copy_from_slice(data); 5212 let selected_segment = u32::from_le_bytes(data_array) as usize; 5213 if selected_segment >= self.pci_segments.len() { 5214 error!( 5215 "Segment selection out of range: {} >= {}", 5216 selected_segment, 5217 self.pci_segments.len() 5218 ); 5219 return None; 5220 } 5221 self.selected_segment = selected_segment; 5222 } 5223 _ => error!( 5224 "Accessing unknown location at base 0x{:x}, offset 0x{:x}", 5225 base, offset 5226 ), 5227 } 5228 5229 debug!( 5230 "PCI_HP_REG_W: base 0x{:x}, offset 0x{:x}, data {:?}", 5231 base, offset, data 5232 ); 5233 5234 None 5235 } 5236 } 5237 5238 impl Drop for DeviceManager { 5239 fn drop(&mut self) { 5240 // Wake up the DeviceManager threads (mainly virtio device workers), 5241 // to avoid deadlock on waiting for paused/parked worker threads. 5242 if let Err(e) = self.resume() { 5243 error!("Error resuming DeviceManager: {:?}", e); 5244 } 5245 5246 for handle in self.virtio_devices.drain(..) { 5247 handle.virtio_device.lock().unwrap().shutdown(); 5248 } 5249 5250 if let Some(termios) = *self.original_termios_opt.lock().unwrap() { 5251 // SAFETY: FFI call 5252 let _ = unsafe { tcsetattr(stdout().lock().as_raw_fd(), TCSANOW, &termios) }; 5253 } 5254 } 5255 } 5256 5257 #[cfg(test)] 5258 mod tests { 5259 use super::*; 5260 5261 #[test] 5262 fn test_create_mmio_allocators() { 5263 let res = create_mmio_allocators(0x100000, 0x400000, 1, vec![1], 4 << 10); 5264 assert_eq!(res.len(), 1); 5265 assert_eq!( 5266 res[0].lock().unwrap().base(), 5267 vm_memory::GuestAddress(0x100000) 5268 ); 5269 assert_eq!( 5270 res[0].lock().unwrap().end(), 5271 vm_memory::GuestAddress(0x3fffff) 5272 ); 5273 5274 let res = create_mmio_allocators(0x100000, 0x400000, 2, vec![1, 1], 4 << 10); 5275 assert_eq!(res.len(), 2); 5276 assert_eq!( 5277 res[0].lock().unwrap().base(), 5278 vm_memory::GuestAddress(0x100000) 5279 ); 5280 assert_eq!( 5281 res[0].lock().unwrap().end(), 5282 vm_memory::GuestAddress(0x27ffff) 5283 ); 5284 assert_eq!( 5285 res[1].lock().unwrap().base(), 5286 vm_memory::GuestAddress(0x280000) 5287 ); 5288 assert_eq!( 5289 res[1].lock().unwrap().end(), 5290 vm_memory::GuestAddress(0x3fffff) 5291 ); 5292 5293 let res = create_mmio_allocators(0x100000, 0x400000, 2, vec![2, 1], 4 << 10); 5294 assert_eq!(res.len(), 2); 5295 assert_eq!( 5296 res[0].lock().unwrap().base(), 5297 vm_memory::GuestAddress(0x100000) 5298 ); 5299 assert_eq!( 5300 res[0].lock().unwrap().end(), 5301 vm_memory::GuestAddress(0x2fffff) 5302 ); 5303 assert_eq!( 5304 res[1].lock().unwrap().base(), 5305 vm_memory::GuestAddress(0x300000) 5306 ); 5307 assert_eq!( 5308 res[1].lock().unwrap().end(), 5309 vm_memory::GuestAddress(0x3fffff) 5310 ); 5311 } 5312 } 5313