1 // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 // 3 // Portions Copyright 2017 The Chromium OS Authors. All rights reserved. 4 // Use of this source code is governed by a BSD-style license that can be 5 // found in the LICENSE-BSD-3-Clause file. 6 // 7 // Copyright © 2019 Intel Corporation 8 // 9 // SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause 10 // 11 12 use std::collections::{BTreeMap, BTreeSet, HashMap}; 13 use std::fs::{File, OpenOptions}; 14 use std::io::{self, stdout, IsTerminal, Seek, SeekFrom}; 15 use std::num::Wrapping; 16 use std::os::unix::fs::OpenOptionsExt; 17 use std::os::unix::io::{AsRawFd, FromRawFd}; 18 use std::path::PathBuf; 19 use std::result; 20 use std::sync::{Arc, Mutex}; 21 use std::time::Instant; 22 23 use acpi_tables::sdt::GenericAddress; 24 use acpi_tables::{aml, Aml}; 25 use anyhow::anyhow; 26 #[cfg(target_arch = "x86_64")] 27 use arch::layout::{APIC_START, IOAPIC_SIZE, IOAPIC_START}; 28 use arch::{layout, NumaNodes}; 29 #[cfg(target_arch = "aarch64")] 30 use arch::{DeviceType, MmioDeviceInfo}; 31 use block::async_io::DiskFile; 32 use block::fixed_vhd_sync::FixedVhdDiskSync; 33 use block::qcow_sync::QcowDiskSync; 34 use block::raw_async_aio::RawFileDiskAio; 35 use block::raw_sync::RawFileDiskSync; 36 use block::vhdx_sync::VhdxDiskSync; 37 use block::{ 38 block_aio_is_supported, block_io_uring_is_supported, detect_image_type, qcow, vhdx, ImageType, 39 }; 40 #[cfg(feature = "io_uring")] 41 use block::{fixed_vhd_async::FixedVhdDiskAsync, raw_async::RawFileDisk}; 42 #[cfg(target_arch = "x86_64")] 43 use devices::debug_console::DebugConsole; 44 #[cfg(target_arch = "aarch64")] 45 use devices::gic; 46 use devices::interrupt_controller::InterruptController; 47 #[cfg(target_arch = "x86_64")] 48 use devices::ioapic; 49 #[cfg(target_arch = "aarch64")] 50 use devices::legacy::Pl011; 51 #[cfg(feature = "pvmemcontrol")] 52 use devices::pvmemcontrol::{PvmemcontrolBusDevice, PvmemcontrolPciDevice}; 53 use devices::{interrupt_controller, AcpiNotificationFlags}; 54 use hypervisor::IoEventAddress; 55 use libc::{ 56 tcsetattr, termios, MAP_NORESERVE, MAP_PRIVATE, MAP_SHARED, O_TMPFILE, PROT_READ, PROT_WRITE, 57 TCSANOW, 58 }; 59 use pci::{ 60 DeviceRelocation, MmioRegion, PciBarRegionType, PciBdf, PciDevice, VfioDmaMapping, 61 VfioPciDevice, VfioUserDmaMapping, VfioUserPciDevice, VfioUserPciDeviceError, 62 }; 63 use rate_limiter::group::RateLimiterGroup; 64 use seccompiler::SeccompAction; 65 use serde::{Deserialize, Serialize}; 66 use tracer::trace_scoped; 67 use vfio_ioctls::{VfioContainer, VfioDevice, VfioDeviceFd}; 68 use virtio_devices::transport::{VirtioPciDevice, VirtioPciDeviceActivator, VirtioTransport}; 69 use virtio_devices::vhost_user::VhostUserConfig; 70 use virtio_devices::{ 71 AccessPlatformMapping, ActivateError, Endpoint, IommuMapping, VdpaDmaMapping, 72 VirtioMemMappingSource, 73 }; 74 use vm_allocator::{AddressAllocator, SystemAllocator}; 75 use vm_device::dma_mapping::ExternalDmaMapping; 76 use vm_device::interrupt::{ 77 InterruptIndex, InterruptManager, LegacyIrqGroupConfig, MsiIrqGroupConfig, 78 }; 79 use vm_device::{Bus, BusDevice, BusDeviceSync, Resource}; 80 use vm_memory::guest_memory::FileOffset; 81 use vm_memory::{Address, GuestAddress, GuestMemoryRegion, GuestUsize, MmapRegion}; 82 #[cfg(target_arch = "x86_64")] 83 use vm_memory::{GuestAddressSpace, GuestMemory}; 84 use vm_migration::protocol::MemoryRangeTable; 85 use vm_migration::{ 86 snapshot_from_id, state_from_id, Migratable, MigratableError, Pausable, Snapshot, SnapshotData, 87 Snapshottable, Transportable, 88 }; 89 use vm_virtio::{AccessPlatform, VirtioDeviceType}; 90 use vmm_sys_util::eventfd::EventFd; 91 #[cfg(target_arch = "x86_64")] 92 use {devices::debug_console, devices::legacy::Serial}; 93 94 use crate::config::{ 95 ConsoleOutputMode, DeviceConfig, DiskConfig, FsConfig, NetConfig, PmemConfig, UserDeviceConfig, 96 VdpaConfig, VhostMode, VmConfig, VsockConfig, 97 }; 98 use crate::console_devices::{ConsoleDeviceError, ConsoleInfo, ConsoleOutput}; 99 use crate::cpu::{CpuManager, CPU_MANAGER_ACPI_SIZE}; 100 use crate::device_tree::{DeviceNode, DeviceTree}; 101 use crate::interrupt::{LegacyUserspaceInterruptManager, MsiInterruptManager}; 102 use crate::memory_manager::{Error as MemoryManagerError, MemoryManager, MEMORY_MANAGER_ACPI_SIZE}; 103 use crate::pci_segment::PciSegment; 104 use crate::serial_manager::{Error as SerialManagerError, SerialManager}; 105 use crate::vm_config::DEFAULT_PCI_SEGMENT_APERTURE_WEIGHT; 106 use crate::{device_node, GuestRegionMmap, PciDeviceInfo, DEVICE_MANAGER_SNAPSHOT_ID}; 107 108 #[cfg(target_arch = "aarch64")] 109 const MMIO_LEN: u64 = 0x1000; 110 111 // Singleton devices / devices the user cannot name 112 #[cfg(target_arch = "x86_64")] 113 const IOAPIC_DEVICE_NAME: &str = "__ioapic"; 114 const SERIAL_DEVICE_NAME: &str = "__serial"; 115 #[cfg(target_arch = "x86_64")] 116 const DEBUGCON_DEVICE_NAME: &str = "__debug_console"; 117 #[cfg(target_arch = "aarch64")] 118 const GPIO_DEVICE_NAME: &str = "__gpio"; 119 const RNG_DEVICE_NAME: &str = "__rng"; 120 const IOMMU_DEVICE_NAME: &str = "__iommu"; 121 #[cfg(feature = "pvmemcontrol")] 122 const PVMEMCONTROL_DEVICE_NAME: &str = "__pvmemcontrol"; 123 const BALLOON_DEVICE_NAME: &str = "__balloon"; 124 const CONSOLE_DEVICE_NAME: &str = "__console"; 125 const PVPANIC_DEVICE_NAME: &str = "__pvpanic"; 126 127 // Devices that the user may name and for which we generate 128 // identifiers if the user doesn't give one 129 const DISK_DEVICE_NAME_PREFIX: &str = "_disk"; 130 const FS_DEVICE_NAME_PREFIX: &str = "_fs"; 131 const NET_DEVICE_NAME_PREFIX: &str = "_net"; 132 const PMEM_DEVICE_NAME_PREFIX: &str = "_pmem"; 133 const VDPA_DEVICE_NAME_PREFIX: &str = "_vdpa"; 134 const VSOCK_DEVICE_NAME_PREFIX: &str = "_vsock"; 135 const WATCHDOG_DEVICE_NAME: &str = "__watchdog"; 136 const VFIO_DEVICE_NAME_PREFIX: &str = "_vfio"; 137 const VFIO_USER_DEVICE_NAME_PREFIX: &str = "_vfio_user"; 138 const VIRTIO_PCI_DEVICE_NAME_PREFIX: &str = "_virtio-pci"; 139 140 /// Errors associated with device manager 141 #[derive(Debug)] 142 pub enum DeviceManagerError { 143 /// Cannot create EventFd. 144 EventFd(io::Error), 145 146 /// Cannot open disk path 147 Disk(io::Error), 148 149 /// Cannot create vhost-user-net device 150 CreateVhostUserNet(virtio_devices::vhost_user::Error), 151 152 /// Cannot create virtio-blk device 153 CreateVirtioBlock(io::Error), 154 155 /// Cannot create virtio-net device 156 CreateVirtioNet(virtio_devices::net::Error), 157 158 /// Cannot create virtio-console device 159 CreateVirtioConsole(io::Error), 160 161 /// Cannot create virtio-rng device 162 CreateVirtioRng(io::Error), 163 164 /// Cannot create virtio-fs device 165 CreateVirtioFs(virtio_devices::vhost_user::Error), 166 167 /// Virtio-fs device was created without a socket. 168 NoVirtioFsSock, 169 170 /// Cannot create vhost-user-blk device 171 CreateVhostUserBlk(virtio_devices::vhost_user::Error), 172 173 /// Cannot create virtio-pmem device 174 CreateVirtioPmem(io::Error), 175 176 /// Cannot create vDPA device 177 CreateVdpa(virtio_devices::vdpa::Error), 178 179 /// Cannot create virtio-vsock device 180 CreateVirtioVsock(io::Error), 181 182 /// Cannot create tpm device 183 CreateTpmDevice(anyhow::Error), 184 185 /// Failed to convert Path to &str for the vDPA device. 186 CreateVdpaConvertPath, 187 188 /// Failed to convert Path to &str for the virtio-vsock device. 189 CreateVsockConvertPath, 190 191 /// Cannot create virtio-vsock backend 192 CreateVsockBackend(virtio_devices::vsock::VsockUnixError), 193 194 /// Cannot create virtio-iommu device 195 CreateVirtioIommu(io::Error), 196 197 /// Cannot create virtio-balloon device 198 CreateVirtioBalloon(io::Error), 199 200 /// Cannot create pvmemcontrol device 201 #[cfg(feature = "pvmemcontrol")] 202 CreatePvmemcontrol(io::Error), 203 204 /// Cannot create virtio-watchdog device 205 CreateVirtioWatchdog(io::Error), 206 207 /// Failed to parse disk image format 208 DetectImageType(io::Error), 209 210 /// Cannot open qcow disk path 211 QcowDeviceCreate(qcow::Error), 212 213 /// Cannot create serial manager 214 CreateSerialManager(SerialManagerError), 215 216 /// Cannot spawn the serial manager thread 217 SpawnSerialManager(SerialManagerError), 218 219 /// Cannot open tap interface 220 OpenTap(net_util::TapError), 221 222 /// Cannot allocate IRQ. 223 AllocateIrq, 224 225 /// Cannot configure the IRQ. 226 Irq(vmm_sys_util::errno::Error), 227 228 /// Cannot allocate PCI BARs 229 AllocateBars(pci::PciDeviceError), 230 231 /// Could not free the BARs associated with a PCI device. 232 FreePciBars(pci::PciDeviceError), 233 234 /// Cannot register ioevent. 235 RegisterIoevent(anyhow::Error), 236 237 /// Cannot unregister ioevent. 238 UnRegisterIoevent(anyhow::Error), 239 240 /// Cannot create virtio device 241 VirtioDevice(virtio_devices::transport::VirtioPciDeviceError), 242 243 /// Cannot add PCI device 244 AddPciDevice(pci::PciRootError), 245 246 /// Cannot open persistent memory file 247 PmemFileOpen(io::Error), 248 249 /// Cannot set persistent memory file size 250 PmemFileSetLen(io::Error), 251 252 /// Cannot find a memory range for persistent memory 253 PmemRangeAllocation, 254 255 /// Cannot find a memory range for virtio-fs 256 FsRangeAllocation, 257 258 /// Error creating serial output file 259 SerialOutputFileOpen(io::Error), 260 261 #[cfg(target_arch = "x86_64")] 262 /// Error creating debug-console output file 263 DebugconOutputFileOpen(io::Error), 264 265 /// Error creating console output file 266 ConsoleOutputFileOpen(io::Error), 267 268 /// Error creating serial pty 269 SerialPtyOpen(io::Error), 270 271 /// Error creating console pty 272 ConsolePtyOpen(io::Error), 273 274 /// Error creating console pty 275 DebugconPtyOpen(io::Error), 276 277 /// Error setting pty raw mode 278 SetPtyRaw(ConsoleDeviceError), 279 280 /// Error getting pty peer 281 GetPtyPeer(vmm_sys_util::errno::Error), 282 283 /// Cannot create a VFIO device 284 VfioCreate(vfio_ioctls::VfioError), 285 286 /// Cannot create a VFIO PCI device 287 VfioPciCreate(pci::VfioPciError), 288 289 /// Failed to map VFIO MMIO region. 290 VfioMapRegion(pci::VfioPciError), 291 292 /// Failed to DMA map VFIO device. 293 VfioDmaMap(vfio_ioctls::VfioError), 294 295 /// Failed to DMA unmap VFIO device. 296 VfioDmaUnmap(pci::VfioPciError), 297 298 /// Failed to create the passthrough device. 299 CreatePassthroughDevice(anyhow::Error), 300 301 /// Failed to memory map. 302 Mmap(io::Error), 303 304 /// Cannot add legacy device to Bus. 305 BusError(vm_device::BusError), 306 307 /// Failed to allocate IO port 308 AllocateIoPort, 309 310 /// Failed to allocate MMIO address 311 AllocateMmioAddress, 312 313 /// Failed to make hotplug notification 314 HotPlugNotification(io::Error), 315 316 /// Error from a memory manager operation 317 MemoryManager(MemoryManagerError), 318 319 /// Failed to create new interrupt source group. 320 CreateInterruptGroup(io::Error), 321 322 /// Failed to update interrupt source group. 323 UpdateInterruptGroup(io::Error), 324 325 /// Failed to create interrupt controller. 326 CreateInterruptController(interrupt_controller::Error), 327 328 /// Failed to create a new MmapRegion instance. 329 NewMmapRegion(vm_memory::mmap::MmapRegionError), 330 331 /// Failed to clone a File. 332 CloneFile(io::Error), 333 334 /// Failed to create socket file 335 CreateSocketFile(io::Error), 336 337 /// Failed to spawn the network backend 338 SpawnNetBackend(io::Error), 339 340 /// Failed to spawn the block backend 341 SpawnBlockBackend(io::Error), 342 343 /// Missing PCI bus. 344 NoPciBus, 345 346 /// Could not find an available device name. 347 NoAvailableDeviceName, 348 349 /// Missing PCI device. 350 MissingPciDevice, 351 352 /// Failed to remove a PCI device from the PCI bus. 353 RemoveDeviceFromPciBus(pci::PciRootError), 354 355 /// Failed to remove a bus device from the IO bus. 356 RemoveDeviceFromIoBus(vm_device::BusError), 357 358 /// Failed to remove a bus device from the MMIO bus. 359 RemoveDeviceFromMmioBus(vm_device::BusError), 360 361 /// Failed to find the device corresponding to a specific PCI b/d/f. 362 UnknownPciBdf(u32), 363 364 /// Not allowed to remove this type of device from the VM. 365 RemovalNotAllowed(vm_virtio::VirtioDeviceType), 366 367 /// Failed to find device corresponding to the given identifier. 368 UnknownDeviceId(String), 369 370 /// Failed to find an available PCI device ID. 371 NextPciDeviceId(pci::PciRootError), 372 373 /// Could not reserve the PCI device ID. 374 GetPciDeviceId(pci::PciRootError), 375 376 /// Could not give the PCI device ID back. 377 PutPciDeviceId(pci::PciRootError), 378 379 /// No disk path was specified when one was expected 380 NoDiskPath, 381 382 /// Failed to update guest memory for virtio device. 383 UpdateMemoryForVirtioDevice(virtio_devices::Error), 384 385 /// Cannot create virtio-mem device 386 CreateVirtioMem(io::Error), 387 388 /// Cannot find a memory range for virtio-mem memory 389 VirtioMemRangeAllocation, 390 391 /// Failed to update guest memory for VFIO PCI device. 392 UpdateMemoryForVfioPciDevice(vfio_ioctls::VfioError), 393 394 /// Trying to use a directory for pmem but no size specified 395 PmemWithDirectorySizeMissing, 396 397 /// Trying to use a size that is not multiple of 2MiB 398 PmemSizeNotAligned, 399 400 /// Could not find the node in the device tree. 401 MissingNode, 402 403 /// Resource was already found. 404 ResourceAlreadyExists, 405 406 /// Expected resources for virtio-pmem could not be found. 407 MissingVirtioPmemResources, 408 409 /// Missing PCI b/d/f from the DeviceNode. 410 MissingDeviceNodePciBdf, 411 412 /// No support for device passthrough 413 NoDevicePassthroughSupport, 414 415 /// No socket option support for console device 416 NoSocketOptionSupportForConsoleDevice, 417 418 /// Failed to resize virtio-balloon 419 VirtioBalloonResize(virtio_devices::balloon::Error), 420 421 /// Missing virtio-balloon, can't proceed as expected. 422 MissingVirtioBalloon, 423 424 /// Missing virtual IOMMU device 425 MissingVirtualIommu, 426 427 /// Failed to do power button notification 428 PowerButtonNotification(io::Error), 429 430 /// Failed to do AArch64 GPIO power button notification 431 #[cfg(target_arch = "aarch64")] 432 AArch64PowerButtonNotification(devices::legacy::GpioDeviceError), 433 434 /// Failed to set O_DIRECT flag to file descriptor 435 SetDirectIo, 436 437 /// Failed to create FixedVhdDiskAsync 438 CreateFixedVhdDiskAsync(io::Error), 439 440 /// Failed to create FixedVhdDiskSync 441 CreateFixedVhdDiskSync(io::Error), 442 443 /// Failed to create QcowDiskSync 444 CreateQcowDiskSync(qcow::Error), 445 446 /// Failed to create FixedVhdxDiskSync 447 CreateFixedVhdxDiskSync(vhdx::VhdxError), 448 449 /// Failed to add DMA mapping handler to virtio-mem device. 450 AddDmaMappingHandlerVirtioMem(virtio_devices::mem::Error), 451 452 /// Failed to remove DMA mapping handler from virtio-mem device. 453 RemoveDmaMappingHandlerVirtioMem(virtio_devices::mem::Error), 454 455 /// Failed to create vfio-user client 456 VfioUserCreateClient(vfio_user::Error), 457 458 /// Failed to create VFIO user device 459 VfioUserCreate(VfioUserPciDeviceError), 460 461 /// Failed to map region from VFIO user device into guest 462 VfioUserMapRegion(VfioUserPciDeviceError), 463 464 /// Failed to DMA map VFIO user device. 465 VfioUserDmaMap(VfioUserPciDeviceError), 466 467 /// Failed to DMA unmap VFIO user device. 468 VfioUserDmaUnmap(VfioUserPciDeviceError), 469 470 /// Failed to update memory mappings for VFIO user device 471 UpdateMemoryForVfioUserPciDevice(VfioUserPciDeviceError), 472 473 /// Cannot duplicate file descriptor 474 DupFd(vmm_sys_util::errno::Error), 475 476 /// Failed to DMA map virtio device. 477 VirtioDmaMap(std::io::Error), 478 479 /// Failed to DMA unmap virtio device. 480 VirtioDmaUnmap(std::io::Error), 481 482 /// Cannot hotplug device behind vIOMMU 483 InvalidIommuHotplug, 484 485 /// Invalid identifier as it is not unique. 486 IdentifierNotUnique(String), 487 488 /// Invalid identifier 489 InvalidIdentifier(String), 490 491 /// Error activating virtio device 492 VirtioActivate(ActivateError), 493 494 /// Failed retrieving device state from snapshot 495 RestoreGetState(MigratableError), 496 497 /// Cannot create a PvPanic device 498 PvPanicCreate(devices::pvpanic::PvPanicError), 499 500 /// Cannot create a RateLimiterGroup 501 RateLimiterGroupCreate(rate_limiter::group::Error), 502 503 /// Cannot start sigwinch listener 504 StartSigwinchListener(std::io::Error), 505 506 // Invalid console info 507 InvalidConsoleInfo, 508 509 // Invalid console fd 510 InvalidConsoleFd, 511 } 512 513 pub type DeviceManagerResult<T> = result::Result<T, DeviceManagerError>; 514 515 const DEVICE_MANAGER_ACPI_SIZE: usize = 0x10; 516 517 #[derive(Default)] 518 pub struct Console { 519 console_resizer: Option<Arc<virtio_devices::ConsoleResizer>>, 520 } 521 522 impl Console { 523 pub fn need_resize(&self) -> bool { 524 if let Some(_resizer) = self.console_resizer.as_ref() { 525 return true; 526 } 527 528 false 529 } 530 531 pub fn update_console_size(&self) { 532 if let Some(resizer) = self.console_resizer.as_ref() { 533 resizer.update_console_size() 534 } 535 } 536 } 537 538 pub(crate) struct AddressManager { 539 pub(crate) allocator: Arc<Mutex<SystemAllocator>>, 540 #[cfg(target_arch = "x86_64")] 541 pub(crate) io_bus: Arc<Bus>, 542 pub(crate) mmio_bus: Arc<Bus>, 543 pub(crate) vm: Arc<dyn hypervisor::Vm>, 544 device_tree: Arc<Mutex<DeviceTree>>, 545 pci_mmio32_allocators: Vec<Arc<Mutex<AddressAllocator>>>, 546 pci_mmio64_allocators: Vec<Arc<Mutex<AddressAllocator>>>, 547 } 548 549 impl DeviceRelocation for AddressManager { 550 fn move_bar( 551 &self, 552 old_base: u64, 553 new_base: u64, 554 len: u64, 555 pci_dev: &mut dyn PciDevice, 556 region_type: PciBarRegionType, 557 ) -> std::result::Result<(), std::io::Error> { 558 match region_type { 559 PciBarRegionType::IoRegion => { 560 #[cfg(target_arch = "x86_64")] 561 { 562 // Update system allocator 563 self.allocator 564 .lock() 565 .unwrap() 566 .free_io_addresses(GuestAddress(old_base), len as GuestUsize); 567 568 self.allocator 569 .lock() 570 .unwrap() 571 .allocate_io_addresses( 572 Some(GuestAddress(new_base)), 573 len as GuestUsize, 574 None, 575 ) 576 .ok_or_else(|| { 577 io::Error::new(io::ErrorKind::Other, "failed allocating new IO range") 578 })?; 579 580 // Update PIO bus 581 self.io_bus 582 .update_range(old_base, len, new_base, len) 583 .map_err(|e| io::Error::new(io::ErrorKind::Other, e))?; 584 } 585 #[cfg(target_arch = "aarch64")] 586 error!("I/O region is not supported"); 587 } 588 PciBarRegionType::Memory32BitRegion | PciBarRegionType::Memory64BitRegion => { 589 let allocators = if region_type == PciBarRegionType::Memory32BitRegion { 590 &self.pci_mmio32_allocators 591 } else { 592 &self.pci_mmio64_allocators 593 }; 594 595 // Find the specific allocator that this BAR was allocated from and use it for new one 596 for allocator in allocators { 597 let allocator_base = allocator.lock().unwrap().base(); 598 let allocator_end = allocator.lock().unwrap().end(); 599 600 if old_base >= allocator_base.0 && old_base <= allocator_end.0 { 601 allocator 602 .lock() 603 .unwrap() 604 .free(GuestAddress(old_base), len as GuestUsize); 605 606 allocator 607 .lock() 608 .unwrap() 609 .allocate(Some(GuestAddress(new_base)), len as GuestUsize, Some(len)) 610 .ok_or_else(|| { 611 io::Error::new( 612 io::ErrorKind::Other, 613 "failed allocating new MMIO range", 614 ) 615 })?; 616 617 break; 618 } 619 } 620 621 // Update MMIO bus 622 self.mmio_bus 623 .update_range(old_base, len, new_base, len) 624 .map_err(|e| io::Error::new(io::ErrorKind::Other, e))?; 625 } 626 } 627 628 // Update the device_tree resources associated with the device 629 if let Some(id) = pci_dev.id() { 630 if let Some(node) = self.device_tree.lock().unwrap().get_mut(&id) { 631 let mut resource_updated = false; 632 for resource in node.resources.iter_mut() { 633 if let Resource::PciBar { base, type_, .. } = resource { 634 if PciBarRegionType::from(*type_) == region_type && *base == old_base { 635 *base = new_base; 636 resource_updated = true; 637 break; 638 } 639 } 640 } 641 642 if !resource_updated { 643 return Err(io::Error::new( 644 io::ErrorKind::Other, 645 format!( 646 "Couldn't find a resource with base 0x{old_base:x} for device {id}" 647 ), 648 )); 649 } 650 } else { 651 return Err(io::Error::new( 652 io::ErrorKind::Other, 653 format!("Couldn't find device {id} from device tree"), 654 )); 655 } 656 } 657 658 let any_dev = pci_dev.as_any(); 659 if let Some(virtio_pci_dev) = any_dev.downcast_ref::<VirtioPciDevice>() { 660 let bar_addr = virtio_pci_dev.config_bar_addr(); 661 if bar_addr == new_base { 662 for (event, addr) in virtio_pci_dev.ioeventfds(old_base) { 663 let io_addr = IoEventAddress::Mmio(addr); 664 self.vm.unregister_ioevent(event, &io_addr).map_err(|e| { 665 io::Error::new( 666 io::ErrorKind::Other, 667 format!("failed to unregister ioevent: {e:?}"), 668 ) 669 })?; 670 } 671 for (event, addr) in virtio_pci_dev.ioeventfds(new_base) { 672 let io_addr = IoEventAddress::Mmio(addr); 673 self.vm 674 .register_ioevent(event, &io_addr, None) 675 .map_err(|e| { 676 io::Error::new( 677 io::ErrorKind::Other, 678 format!("failed to register ioevent: {e:?}"), 679 ) 680 })?; 681 } 682 } else { 683 let virtio_dev = virtio_pci_dev.virtio_device(); 684 let mut virtio_dev = virtio_dev.lock().unwrap(); 685 if let Some(mut shm_regions) = virtio_dev.get_shm_regions() { 686 if shm_regions.addr.raw_value() == old_base { 687 let mem_region = self.vm.make_user_memory_region( 688 shm_regions.mem_slot, 689 old_base, 690 shm_regions.len, 691 shm_regions.host_addr, 692 false, 693 false, 694 ); 695 696 self.vm.remove_user_memory_region(mem_region).map_err(|e| { 697 io::Error::new( 698 io::ErrorKind::Other, 699 format!("failed to remove user memory region: {e:?}"), 700 ) 701 })?; 702 703 // Create new mapping by inserting new region to KVM. 704 let mem_region = self.vm.make_user_memory_region( 705 shm_regions.mem_slot, 706 new_base, 707 shm_regions.len, 708 shm_regions.host_addr, 709 false, 710 false, 711 ); 712 713 self.vm.create_user_memory_region(mem_region).map_err(|e| { 714 io::Error::new( 715 io::ErrorKind::Other, 716 format!("failed to create user memory regions: {e:?}"), 717 ) 718 })?; 719 720 // Update shared memory regions to reflect the new mapping. 721 shm_regions.addr = GuestAddress(new_base); 722 virtio_dev.set_shm_regions(shm_regions).map_err(|e| { 723 io::Error::new( 724 io::ErrorKind::Other, 725 format!("failed to update shared memory regions: {e:?}"), 726 ) 727 })?; 728 } 729 } 730 } 731 } 732 733 pci_dev.move_bar(old_base, new_base) 734 } 735 } 736 737 #[derive(Serialize, Deserialize)] 738 struct DeviceManagerState { 739 device_tree: DeviceTree, 740 device_id_cnt: Wrapping<usize>, 741 } 742 743 #[derive(Debug)] 744 pub struct PtyPair { 745 pub main: File, 746 pub path: PathBuf, 747 } 748 749 impl Clone for PtyPair { 750 fn clone(&self) -> Self { 751 PtyPair { 752 main: self.main.try_clone().unwrap(), 753 path: self.path.clone(), 754 } 755 } 756 } 757 758 #[derive(Clone)] 759 pub enum PciDeviceHandle { 760 Vfio(Arc<Mutex<VfioPciDevice>>), 761 Virtio(Arc<Mutex<VirtioPciDevice>>), 762 VfioUser(Arc<Mutex<VfioUserPciDevice>>), 763 } 764 765 #[derive(Clone)] 766 struct MetaVirtioDevice { 767 virtio_device: Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 768 iommu: bool, 769 id: String, 770 pci_segment: u16, 771 dma_handler: Option<Arc<dyn ExternalDmaMapping>>, 772 } 773 774 #[derive(Default)] 775 pub struct AcpiPlatformAddresses { 776 pub pm_timer_address: Option<GenericAddress>, 777 pub reset_reg_address: Option<GenericAddress>, 778 pub sleep_control_reg_address: Option<GenericAddress>, 779 pub sleep_status_reg_address: Option<GenericAddress>, 780 } 781 782 #[cfg(all(feature = "mshv", feature = "sev_snp"))] 783 struct SevSnpPageAccessProxy { 784 vm: Arc<dyn hypervisor::Vm>, 785 } 786 787 #[cfg(all(feature = "mshv", feature = "sev_snp"))] 788 impl std::fmt::Debug for SevSnpPageAccessProxy { 789 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 790 write!(f, "SNP Page access proxy") 791 } 792 } 793 794 #[cfg(all(feature = "mshv", feature = "sev_snp"))] 795 impl SevSnpPageAccessProxy { 796 fn new(vm: Arc<dyn hypervisor::Vm>) -> SevSnpPageAccessProxy { 797 SevSnpPageAccessProxy { vm } 798 } 799 } 800 801 #[cfg(all(feature = "mshv", feature = "sev_snp"))] 802 impl AccessPlatform for SevSnpPageAccessProxy { 803 fn translate_gpa(&self, base: u64, _size: u64) -> std::result::Result<u64, std::io::Error> { 804 Ok(base) 805 } 806 807 fn translate_gva(&self, base: u64, size: u64) -> std::result::Result<u64, std::io::Error> { 808 self.vm 809 .gain_page_access(base, size as u32) 810 .map_err(|e| io::Error::new(io::ErrorKind::Other, e))?; 811 Ok(base) 812 } 813 } 814 815 pub struct DeviceManager { 816 // Manage address space related to devices 817 address_manager: Arc<AddressManager>, 818 819 // Console abstraction 820 console: Arc<Console>, 821 822 // Serial Manager 823 serial_manager: Option<Arc<SerialManager>>, 824 825 // pty foreground status, 826 console_resize_pipe: Option<Arc<File>>, 827 828 // To restore on exit. 829 original_termios_opt: Arc<Mutex<Option<termios>>>, 830 831 // Interrupt controller 832 #[cfg(target_arch = "x86_64")] 833 interrupt_controller: Option<Arc<Mutex<ioapic::Ioapic>>>, 834 #[cfg(target_arch = "aarch64")] 835 interrupt_controller: Option<Arc<Mutex<gic::Gic>>>, 836 837 // Things to be added to the commandline (e.g. aarch64 early console) 838 #[cfg(target_arch = "aarch64")] 839 cmdline_additions: Vec<String>, 840 841 // ACPI GED notification device 842 ged_notification_device: Option<Arc<Mutex<devices::AcpiGedDevice>>>, 843 844 // VM configuration 845 config: Arc<Mutex<VmConfig>>, 846 847 // Memory Manager 848 memory_manager: Arc<Mutex<MemoryManager>>, 849 850 // CPU Manager 851 cpu_manager: Arc<Mutex<CpuManager>>, 852 853 // The virtio devices on the system 854 virtio_devices: Vec<MetaVirtioDevice>, 855 856 // List of bus devices 857 // Let the DeviceManager keep strong references to the BusDevice devices. 858 // This allows the IO and MMIO buses to be provided with Weak references, 859 // which prevents cyclic dependencies. 860 bus_devices: Vec<Arc<dyn BusDeviceSync>>, 861 862 // Counter to keep track of the consumed device IDs. 863 device_id_cnt: Wrapping<usize>, 864 865 pci_segments: Vec<PciSegment>, 866 867 #[cfg_attr(target_arch = "aarch64", allow(dead_code))] 868 // MSI Interrupt Manager 869 msi_interrupt_manager: Arc<dyn InterruptManager<GroupConfig = MsiIrqGroupConfig>>, 870 871 #[cfg_attr(feature = "mshv", allow(dead_code))] 872 // Legacy Interrupt Manager 873 legacy_interrupt_manager: Option<Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>>, 874 875 // Passthrough device handle 876 passthrough_device: Option<VfioDeviceFd>, 877 878 // VFIO container 879 // Only one container can be created, therefore it is stored as part of the 880 // DeviceManager to be reused. 881 vfio_container: Option<Arc<VfioContainer>>, 882 883 // Paravirtualized IOMMU 884 iommu_device: Option<Arc<Mutex<virtio_devices::Iommu>>>, 885 iommu_mapping: Option<Arc<IommuMapping>>, 886 887 // PCI information about devices attached to the paravirtualized IOMMU 888 // It contains the virtual IOMMU PCI BDF along with the list of PCI BDF 889 // representing the devices attached to the virtual IOMMU. This is useful 890 // information for filling the ACPI VIOT table. 891 iommu_attached_devices: Option<(PciBdf, Vec<PciBdf>)>, 892 893 // Tree of devices, representing the dependencies between devices. 894 // Useful for introspection, snapshot and restore. 895 device_tree: Arc<Mutex<DeviceTree>>, 896 897 // Exit event 898 exit_evt: EventFd, 899 reset_evt: EventFd, 900 901 #[cfg(target_arch = "aarch64")] 902 id_to_dev_info: HashMap<(DeviceType, String), MmioDeviceInfo>, 903 904 // seccomp action 905 seccomp_action: SeccompAction, 906 907 // List of guest NUMA nodes. 908 numa_nodes: NumaNodes, 909 910 // Possible handle to the virtio-balloon device 911 balloon: Option<Arc<Mutex<virtio_devices::Balloon>>>, 912 913 // Virtio Device activation EventFd to allow the VMM thread to trigger device 914 // activation and thus start the threads from the VMM thread 915 activate_evt: EventFd, 916 917 acpi_address: GuestAddress, 918 919 selected_segment: usize, 920 921 // Possible handle to the virtio-mem device 922 virtio_mem_devices: Vec<Arc<Mutex<virtio_devices::Mem>>>, 923 924 #[cfg(target_arch = "aarch64")] 925 // GPIO device for AArch64 926 gpio_device: Option<Arc<Mutex<devices::legacy::Gpio>>>, 927 928 #[cfg(feature = "pvmemcontrol")] 929 pvmemcontrol_devices: Option<( 930 Arc<PvmemcontrolBusDevice>, 931 Arc<Mutex<PvmemcontrolPciDevice>>, 932 )>, 933 934 // pvpanic device 935 pvpanic_device: Option<Arc<Mutex<devices::PvPanicDevice>>>, 936 937 // Flag to force setting the iommu on virtio devices 938 force_iommu: bool, 939 940 // io_uring availability if detected 941 io_uring_supported: Option<bool>, 942 943 // aio availability if detected 944 aio_supported: Option<bool>, 945 946 // List of unique identifiers provided at boot through the configuration. 947 boot_id_list: BTreeSet<String>, 948 949 // Start time of the VM 950 timestamp: Instant, 951 952 // Pending activations 953 pending_activations: Arc<Mutex<Vec<VirtioPciDeviceActivator>>>, 954 955 // Addresses for ACPI platform devices e.g. ACPI PM timer, sleep/reset registers 956 acpi_platform_addresses: AcpiPlatformAddresses, 957 958 snapshot: Option<Snapshot>, 959 960 rate_limit_groups: HashMap<String, Arc<RateLimiterGroup>>, 961 962 mmio_regions: Arc<Mutex<Vec<MmioRegion>>>, 963 } 964 965 fn create_mmio_allocators( 966 start: u64, 967 end: u64, 968 num_pci_segments: u16, 969 weights: Vec<u32>, 970 alignment: u64, 971 ) -> Vec<Arc<Mutex<AddressAllocator>>> { 972 let total_weight: u32 = weights.iter().sum(); 973 974 // Start each PCI segment mmio range on an aligned boundary 975 let pci_segment_mmio_size = (end - start + 1) / (alignment * total_weight as u64) * alignment; 976 977 let mut mmio_allocators = vec![]; 978 let mut i = 0; 979 for segment_id in 0..num_pci_segments as u64 { 980 let weight = weights[segment_id as usize] as u64; 981 let mmio_start = start + i * pci_segment_mmio_size; 982 let mmio_size = pci_segment_mmio_size * weight; 983 let allocator = Arc::new(Mutex::new( 984 AddressAllocator::new(GuestAddress(mmio_start), mmio_size).unwrap(), 985 )); 986 mmio_allocators.push(allocator); 987 i += weight; 988 } 989 990 mmio_allocators 991 } 992 993 impl DeviceManager { 994 #[allow(clippy::too_many_arguments)] 995 pub fn new( 996 #[cfg(target_arch = "x86_64")] io_bus: Arc<Bus>, 997 mmio_bus: Arc<Bus>, 998 vm: Arc<dyn hypervisor::Vm>, 999 config: Arc<Mutex<VmConfig>>, 1000 memory_manager: Arc<Mutex<MemoryManager>>, 1001 cpu_manager: Arc<Mutex<CpuManager>>, 1002 exit_evt: EventFd, 1003 reset_evt: EventFd, 1004 seccomp_action: SeccompAction, 1005 numa_nodes: NumaNodes, 1006 activate_evt: &EventFd, 1007 force_iommu: bool, 1008 boot_id_list: BTreeSet<String>, 1009 timestamp: Instant, 1010 snapshot: Option<Snapshot>, 1011 dynamic: bool, 1012 ) -> DeviceManagerResult<Arc<Mutex<Self>>> { 1013 trace_scoped!("DeviceManager::new"); 1014 1015 let (device_tree, device_id_cnt) = if let Some(snapshot) = snapshot.as_ref() { 1016 let state: DeviceManagerState = snapshot.to_state().unwrap(); 1017 ( 1018 Arc::new(Mutex::new(state.device_tree.clone())), 1019 state.device_id_cnt, 1020 ) 1021 } else { 1022 (Arc::new(Mutex::new(DeviceTree::new())), Wrapping(0)) 1023 }; 1024 1025 let num_pci_segments = 1026 if let Some(platform_config) = config.lock().unwrap().platform.as_ref() { 1027 platform_config.num_pci_segments 1028 } else { 1029 1 1030 }; 1031 1032 let mut mmio32_aperture_weights: Vec<u32> = 1033 std::iter::repeat(DEFAULT_PCI_SEGMENT_APERTURE_WEIGHT) 1034 .take(num_pci_segments.into()) 1035 .collect(); 1036 if let Some(pci_segments) = &config.lock().unwrap().pci_segments { 1037 for pci_segment in pci_segments.iter() { 1038 mmio32_aperture_weights[pci_segment.pci_segment as usize] = 1039 pci_segment.mmio32_aperture_weight 1040 } 1041 } 1042 1043 let start_of_mmio32_area = layout::MEM_32BIT_DEVICES_START.0; 1044 let end_of_mmio32_area = layout::MEM_32BIT_DEVICES_START.0 + layout::MEM_32BIT_DEVICES_SIZE; 1045 let pci_mmio32_allocators = create_mmio_allocators( 1046 start_of_mmio32_area, 1047 end_of_mmio32_area, 1048 num_pci_segments, 1049 mmio32_aperture_weights, 1050 4 << 10, 1051 ); 1052 1053 let mut mmio64_aperture_weights: Vec<u32> = 1054 std::iter::repeat(DEFAULT_PCI_SEGMENT_APERTURE_WEIGHT) 1055 .take(num_pci_segments.into()) 1056 .collect(); 1057 if let Some(pci_segments) = &config.lock().unwrap().pci_segments { 1058 for pci_segment in pci_segments.iter() { 1059 mmio64_aperture_weights[pci_segment.pci_segment as usize] = 1060 pci_segment.mmio64_aperture_weight 1061 } 1062 } 1063 1064 let start_of_mmio64_area = memory_manager.lock().unwrap().start_of_device_area().0; 1065 let end_of_mmio64_area = memory_manager.lock().unwrap().end_of_device_area().0; 1066 let pci_mmio64_allocators = create_mmio_allocators( 1067 start_of_mmio64_area, 1068 end_of_mmio64_area, 1069 num_pci_segments, 1070 mmio64_aperture_weights, 1071 4 << 30, 1072 ); 1073 1074 let address_manager = Arc::new(AddressManager { 1075 allocator: memory_manager.lock().unwrap().allocator(), 1076 #[cfg(target_arch = "x86_64")] 1077 io_bus, 1078 mmio_bus, 1079 vm: vm.clone(), 1080 device_tree: Arc::clone(&device_tree), 1081 pci_mmio32_allocators, 1082 pci_mmio64_allocators, 1083 }); 1084 1085 // First we create the MSI interrupt manager, the legacy one is created 1086 // later, after the IOAPIC device creation. 1087 // The reason we create the MSI one first is because the IOAPIC needs it, 1088 // and then the legacy interrupt manager needs an IOAPIC. So we're 1089 // handling a linear dependency chain: 1090 // msi_interrupt_manager <- IOAPIC <- legacy_interrupt_manager. 1091 let msi_interrupt_manager: Arc<dyn InterruptManager<GroupConfig = MsiIrqGroupConfig>> = 1092 Arc::new(MsiInterruptManager::new( 1093 Arc::clone(&address_manager.allocator), 1094 vm, 1095 )); 1096 1097 let acpi_address = address_manager 1098 .allocator 1099 .lock() 1100 .unwrap() 1101 .allocate_platform_mmio_addresses(None, DEVICE_MANAGER_ACPI_SIZE as u64, None) 1102 .ok_or(DeviceManagerError::AllocateIoPort)?; 1103 1104 let mut pci_irq_slots = [0; 32]; 1105 PciSegment::reserve_legacy_interrupts_for_pci_devices( 1106 &address_manager, 1107 &mut pci_irq_slots, 1108 )?; 1109 1110 let mut pci_segments = vec![PciSegment::new_default_segment( 1111 &address_manager, 1112 Arc::clone(&address_manager.pci_mmio32_allocators[0]), 1113 Arc::clone(&address_manager.pci_mmio64_allocators[0]), 1114 &pci_irq_slots, 1115 )?]; 1116 1117 for i in 1..num_pci_segments as usize { 1118 pci_segments.push(PciSegment::new( 1119 i as u16, 1120 numa_node_id_from_pci_segment_id(&numa_nodes, i as u16), 1121 &address_manager, 1122 Arc::clone(&address_manager.pci_mmio32_allocators[i]), 1123 Arc::clone(&address_manager.pci_mmio64_allocators[i]), 1124 &pci_irq_slots, 1125 )?); 1126 } 1127 1128 if dynamic { 1129 let acpi_address = address_manager 1130 .allocator 1131 .lock() 1132 .unwrap() 1133 .allocate_platform_mmio_addresses(None, CPU_MANAGER_ACPI_SIZE as u64, None) 1134 .ok_or(DeviceManagerError::AllocateMmioAddress)?; 1135 1136 address_manager 1137 .mmio_bus 1138 .insert( 1139 cpu_manager.clone(), 1140 acpi_address.0, 1141 CPU_MANAGER_ACPI_SIZE as u64, 1142 ) 1143 .map_err(DeviceManagerError::BusError)?; 1144 1145 cpu_manager.lock().unwrap().set_acpi_address(acpi_address); 1146 } 1147 1148 let mut rate_limit_groups = HashMap::<String, Arc<RateLimiterGroup>>::new(); 1149 if let Some(rate_limit_groups_cfg) = config.lock().unwrap().rate_limit_groups.as_ref() { 1150 for rate_limit_group_cfg in rate_limit_groups_cfg { 1151 let rate_limit_cfg = rate_limit_group_cfg.rate_limiter_config; 1152 let bw = rate_limit_cfg.bandwidth.unwrap_or_default(); 1153 let ops = rate_limit_cfg.ops.unwrap_or_default(); 1154 let mut rate_limit_group = RateLimiterGroup::new( 1155 &rate_limit_group_cfg.id, 1156 bw.size, 1157 bw.one_time_burst.unwrap_or(0), 1158 bw.refill_time, 1159 ops.size, 1160 ops.one_time_burst.unwrap_or(0), 1161 ops.refill_time, 1162 ) 1163 .map_err(DeviceManagerError::RateLimiterGroupCreate)?; 1164 1165 let exit_evt = exit_evt.try_clone().map_err(DeviceManagerError::EventFd)?; 1166 1167 rate_limit_group.start_thread(exit_evt).unwrap(); 1168 rate_limit_groups 1169 .insert(rate_limit_group_cfg.id.clone(), Arc::new(rate_limit_group)); 1170 } 1171 } 1172 1173 let device_manager = DeviceManager { 1174 address_manager: Arc::clone(&address_manager), 1175 console: Arc::new(Console::default()), 1176 interrupt_controller: None, 1177 #[cfg(target_arch = "aarch64")] 1178 cmdline_additions: Vec::new(), 1179 ged_notification_device: None, 1180 config, 1181 memory_manager, 1182 cpu_manager, 1183 virtio_devices: Vec::new(), 1184 bus_devices: Vec::new(), 1185 device_id_cnt, 1186 msi_interrupt_manager, 1187 legacy_interrupt_manager: None, 1188 passthrough_device: None, 1189 vfio_container: None, 1190 iommu_device: None, 1191 iommu_mapping: None, 1192 iommu_attached_devices: None, 1193 pci_segments, 1194 device_tree, 1195 exit_evt, 1196 reset_evt, 1197 #[cfg(target_arch = "aarch64")] 1198 id_to_dev_info: HashMap::new(), 1199 seccomp_action, 1200 numa_nodes, 1201 balloon: None, 1202 activate_evt: activate_evt 1203 .try_clone() 1204 .map_err(DeviceManagerError::EventFd)?, 1205 acpi_address, 1206 selected_segment: 0, 1207 serial_manager: None, 1208 console_resize_pipe: None, 1209 original_termios_opt: Arc::new(Mutex::new(None)), 1210 virtio_mem_devices: Vec::new(), 1211 #[cfg(target_arch = "aarch64")] 1212 gpio_device: None, 1213 #[cfg(feature = "pvmemcontrol")] 1214 pvmemcontrol_devices: None, 1215 pvpanic_device: None, 1216 force_iommu, 1217 io_uring_supported: None, 1218 aio_supported: None, 1219 boot_id_list, 1220 timestamp, 1221 pending_activations: Arc::new(Mutex::new(Vec::default())), 1222 acpi_platform_addresses: AcpiPlatformAddresses::default(), 1223 snapshot, 1224 rate_limit_groups, 1225 mmio_regions: Arc::new(Mutex::new(Vec::new())), 1226 }; 1227 1228 let device_manager = Arc::new(Mutex::new(device_manager)); 1229 1230 address_manager 1231 .mmio_bus 1232 .insert( 1233 Arc::clone(&device_manager) as Arc<dyn BusDeviceSync>, 1234 acpi_address.0, 1235 DEVICE_MANAGER_ACPI_SIZE as u64, 1236 ) 1237 .map_err(DeviceManagerError::BusError)?; 1238 1239 Ok(device_manager) 1240 } 1241 1242 pub fn console_resize_pipe(&self) -> Option<Arc<File>> { 1243 self.console_resize_pipe.clone() 1244 } 1245 1246 pub fn create_devices( 1247 &mut self, 1248 console_info: Option<ConsoleInfo>, 1249 console_resize_pipe: Option<Arc<File>>, 1250 original_termios_opt: Arc<Mutex<Option<termios>>>, 1251 ) -> DeviceManagerResult<()> { 1252 trace_scoped!("create_devices"); 1253 1254 let mut virtio_devices: Vec<MetaVirtioDevice> = Vec::new(); 1255 1256 let interrupt_controller = self.add_interrupt_controller()?; 1257 1258 self.cpu_manager 1259 .lock() 1260 .unwrap() 1261 .set_interrupt_controller(interrupt_controller.clone()); 1262 1263 // Now we can create the legacy interrupt manager, which needs the freshly 1264 // formed IOAPIC device. 1265 let legacy_interrupt_manager: Arc< 1266 dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>, 1267 > = Arc::new(LegacyUserspaceInterruptManager::new(Arc::clone( 1268 &interrupt_controller, 1269 ))); 1270 1271 { 1272 if let Some(acpi_address) = self.memory_manager.lock().unwrap().acpi_address() { 1273 self.address_manager 1274 .mmio_bus 1275 .insert( 1276 Arc::clone(&self.memory_manager) as Arc<dyn BusDeviceSync>, 1277 acpi_address.0, 1278 MEMORY_MANAGER_ACPI_SIZE as u64, 1279 ) 1280 .map_err(DeviceManagerError::BusError)?; 1281 } 1282 } 1283 1284 #[cfg(target_arch = "x86_64")] 1285 self.add_legacy_devices( 1286 self.reset_evt 1287 .try_clone() 1288 .map_err(DeviceManagerError::EventFd)?, 1289 )?; 1290 1291 #[cfg(target_arch = "aarch64")] 1292 self.add_legacy_devices(&legacy_interrupt_manager)?; 1293 1294 { 1295 self.ged_notification_device = self.add_acpi_devices( 1296 &legacy_interrupt_manager, 1297 self.reset_evt 1298 .try_clone() 1299 .map_err(DeviceManagerError::EventFd)?, 1300 self.exit_evt 1301 .try_clone() 1302 .map_err(DeviceManagerError::EventFd)?, 1303 )?; 1304 } 1305 1306 self.original_termios_opt = original_termios_opt; 1307 1308 self.console = self.add_console_devices( 1309 &legacy_interrupt_manager, 1310 &mut virtio_devices, 1311 console_info, 1312 console_resize_pipe, 1313 )?; 1314 1315 if let Some(tpm) = self.config.clone().lock().unwrap().tpm.as_ref() { 1316 let tpm_dev = self.add_tpm_device(tpm.socket.clone())?; 1317 self.bus_devices 1318 .push(Arc::clone(&tpm_dev) as Arc<dyn BusDeviceSync>) 1319 } 1320 self.legacy_interrupt_manager = Some(legacy_interrupt_manager); 1321 1322 virtio_devices.append(&mut self.make_virtio_devices()?); 1323 1324 self.add_pci_devices(virtio_devices.clone())?; 1325 1326 self.virtio_devices = virtio_devices; 1327 1328 // Add pvmemcontrol if required 1329 #[cfg(feature = "pvmemcontrol")] 1330 { 1331 if self.config.lock().unwrap().pvmemcontrol.is_some() { 1332 let (pvmemcontrol_bus_device, pvmemcontrol_pci_device) = 1333 self.make_pvmemcontrol_device()?; 1334 self.pvmemcontrol_devices = 1335 Some((pvmemcontrol_bus_device, pvmemcontrol_pci_device)); 1336 } 1337 } 1338 1339 if self.config.clone().lock().unwrap().pvpanic { 1340 self.pvpanic_device = self.add_pvpanic_device()?; 1341 } 1342 1343 Ok(()) 1344 } 1345 1346 fn state(&self) -> DeviceManagerState { 1347 DeviceManagerState { 1348 device_tree: self.device_tree.lock().unwrap().clone(), 1349 device_id_cnt: self.device_id_cnt, 1350 } 1351 } 1352 1353 fn get_msi_iova_space(&mut self) -> (u64, u64) { 1354 #[cfg(target_arch = "aarch64")] 1355 { 1356 let vcpus = self.config.lock().unwrap().cpus.boot_vcpus; 1357 let vgic_config = gic::Gic::create_default_config(vcpus.into()); 1358 ( 1359 vgic_config.msi_addr, 1360 vgic_config.msi_addr + vgic_config.msi_size - 1, 1361 ) 1362 } 1363 #[cfg(target_arch = "x86_64")] 1364 (0xfee0_0000, 0xfeef_ffff) 1365 } 1366 1367 #[cfg(target_arch = "aarch64")] 1368 /// Gets the information of the devices registered up to some point in time. 1369 pub fn get_device_info(&self) -> &HashMap<(DeviceType, String), MmioDeviceInfo> { 1370 &self.id_to_dev_info 1371 } 1372 1373 #[allow(unused_variables)] 1374 fn add_pci_devices( 1375 &mut self, 1376 virtio_devices: Vec<MetaVirtioDevice>, 1377 ) -> DeviceManagerResult<()> { 1378 let iommu_id = String::from(IOMMU_DEVICE_NAME); 1379 1380 let iommu_device = if self.config.lock().unwrap().iommu { 1381 let (device, mapping) = virtio_devices::Iommu::new( 1382 iommu_id.clone(), 1383 self.seccomp_action.clone(), 1384 self.exit_evt 1385 .try_clone() 1386 .map_err(DeviceManagerError::EventFd)?, 1387 self.get_msi_iova_space(), 1388 state_from_id(self.snapshot.as_ref(), iommu_id.as_str()) 1389 .map_err(DeviceManagerError::RestoreGetState)?, 1390 ) 1391 .map_err(DeviceManagerError::CreateVirtioIommu)?; 1392 let device = Arc::new(Mutex::new(device)); 1393 self.iommu_device = Some(Arc::clone(&device)); 1394 self.iommu_mapping = Some(mapping); 1395 1396 // Fill the device tree with a new node. In case of restore, we 1397 // know there is nothing to do, so we can simply override the 1398 // existing entry. 1399 self.device_tree 1400 .lock() 1401 .unwrap() 1402 .insert(iommu_id.clone(), device_node!(iommu_id, device)); 1403 1404 Some(device) 1405 } else { 1406 None 1407 }; 1408 1409 let mut iommu_attached_devices = Vec::new(); 1410 { 1411 for handle in virtio_devices { 1412 let mapping: Option<Arc<IommuMapping>> = if handle.iommu { 1413 self.iommu_mapping.clone() 1414 } else { 1415 None 1416 }; 1417 1418 let dev_id = self.add_virtio_pci_device( 1419 handle.virtio_device, 1420 &mapping, 1421 handle.id, 1422 handle.pci_segment, 1423 handle.dma_handler, 1424 )?; 1425 1426 if handle.iommu { 1427 iommu_attached_devices.push(dev_id); 1428 } 1429 } 1430 1431 let mut vfio_iommu_device_ids = self.add_vfio_devices()?; 1432 iommu_attached_devices.append(&mut vfio_iommu_device_ids); 1433 1434 let mut vfio_user_iommu_device_ids = self.add_user_devices()?; 1435 iommu_attached_devices.append(&mut vfio_user_iommu_device_ids); 1436 1437 // Add all devices from forced iommu segments 1438 if let Some(platform_config) = self.config.lock().unwrap().platform.as_ref() { 1439 if let Some(iommu_segments) = platform_config.iommu_segments.as_ref() { 1440 for segment in iommu_segments { 1441 for device in 0..32 { 1442 let bdf = PciBdf::new(*segment, 0, device, 0); 1443 if !iommu_attached_devices.contains(&bdf) { 1444 iommu_attached_devices.push(bdf); 1445 } 1446 } 1447 } 1448 } 1449 } 1450 1451 if let Some(iommu_device) = iommu_device { 1452 let dev_id = self.add_virtio_pci_device(iommu_device, &None, iommu_id, 0, None)?; 1453 self.iommu_attached_devices = Some((dev_id, iommu_attached_devices)); 1454 } 1455 } 1456 1457 for segment in &self.pci_segments { 1458 #[cfg(target_arch = "x86_64")] 1459 if let Some(pci_config_io) = segment.pci_config_io.as_ref() { 1460 self.bus_devices 1461 .push(Arc::clone(pci_config_io) as Arc<dyn BusDeviceSync>); 1462 } 1463 1464 self.bus_devices 1465 .push(Arc::clone(&segment.pci_config_mmio) as Arc<dyn BusDeviceSync>); 1466 } 1467 1468 Ok(()) 1469 } 1470 1471 #[cfg(target_arch = "aarch64")] 1472 fn add_interrupt_controller( 1473 &mut self, 1474 ) -> DeviceManagerResult<Arc<Mutex<dyn InterruptController>>> { 1475 let interrupt_controller: Arc<Mutex<gic::Gic>> = Arc::new(Mutex::new( 1476 gic::Gic::new( 1477 self.config.lock().unwrap().cpus.boot_vcpus, 1478 Arc::clone(&self.msi_interrupt_manager), 1479 self.address_manager.vm.clone(), 1480 ) 1481 .map_err(DeviceManagerError::CreateInterruptController)?, 1482 )); 1483 1484 self.interrupt_controller = Some(interrupt_controller.clone()); 1485 1486 // Restore the vGic if this is in the process of restoration 1487 let id = String::from(gic::GIC_SNAPSHOT_ID); 1488 if let Some(vgic_snapshot) = snapshot_from_id(self.snapshot.as_ref(), &id) { 1489 // PMU support is optional. Nothing should be impacted if the PMU initialization failed. 1490 if self 1491 .cpu_manager 1492 .lock() 1493 .unwrap() 1494 .init_pmu(arch::aarch64::fdt::AARCH64_PMU_IRQ + 16) 1495 .is_err() 1496 { 1497 info!("Failed to initialize PMU"); 1498 } 1499 1500 let vgic_state = vgic_snapshot 1501 .to_state() 1502 .map_err(DeviceManagerError::RestoreGetState)?; 1503 let saved_vcpu_states = self.cpu_manager.lock().unwrap().get_saved_states(); 1504 interrupt_controller 1505 .lock() 1506 .unwrap() 1507 .restore_vgic(vgic_state, &saved_vcpu_states) 1508 .unwrap(); 1509 } 1510 1511 self.device_tree 1512 .lock() 1513 .unwrap() 1514 .insert(id.clone(), device_node!(id, interrupt_controller)); 1515 1516 Ok(interrupt_controller) 1517 } 1518 1519 #[cfg(target_arch = "aarch64")] 1520 pub fn get_interrupt_controller(&mut self) -> Option<&Arc<Mutex<gic::Gic>>> { 1521 self.interrupt_controller.as_ref() 1522 } 1523 1524 #[cfg(target_arch = "x86_64")] 1525 fn add_interrupt_controller( 1526 &mut self, 1527 ) -> DeviceManagerResult<Arc<Mutex<dyn InterruptController>>> { 1528 let id = String::from(IOAPIC_DEVICE_NAME); 1529 1530 // Create IOAPIC 1531 let interrupt_controller = Arc::new(Mutex::new( 1532 ioapic::Ioapic::new( 1533 id.clone(), 1534 APIC_START, 1535 Arc::clone(&self.msi_interrupt_manager), 1536 state_from_id(self.snapshot.as_ref(), id.as_str()) 1537 .map_err(DeviceManagerError::RestoreGetState)?, 1538 ) 1539 .map_err(DeviceManagerError::CreateInterruptController)?, 1540 )); 1541 1542 self.interrupt_controller = Some(interrupt_controller.clone()); 1543 1544 self.address_manager 1545 .mmio_bus 1546 .insert(interrupt_controller.clone(), IOAPIC_START.0, IOAPIC_SIZE) 1547 .map_err(DeviceManagerError::BusError)?; 1548 1549 self.bus_devices 1550 .push(Arc::clone(&interrupt_controller) as Arc<dyn BusDeviceSync>); 1551 1552 // Fill the device tree with a new node. In case of restore, we 1553 // know there is nothing to do, so we can simply override the 1554 // existing entry. 1555 self.device_tree 1556 .lock() 1557 .unwrap() 1558 .insert(id.clone(), device_node!(id, interrupt_controller)); 1559 1560 Ok(interrupt_controller) 1561 } 1562 1563 fn add_acpi_devices( 1564 &mut self, 1565 interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>, 1566 reset_evt: EventFd, 1567 exit_evt: EventFd, 1568 ) -> DeviceManagerResult<Option<Arc<Mutex<devices::AcpiGedDevice>>>> { 1569 let vcpus_kill_signalled = self 1570 .cpu_manager 1571 .lock() 1572 .unwrap() 1573 .vcpus_kill_signalled() 1574 .clone(); 1575 let shutdown_device = Arc::new(Mutex::new(devices::AcpiShutdownDevice::new( 1576 exit_evt, 1577 reset_evt, 1578 vcpus_kill_signalled, 1579 ))); 1580 1581 self.bus_devices 1582 .push(Arc::clone(&shutdown_device) as Arc<dyn BusDeviceSync>); 1583 1584 #[cfg(target_arch = "x86_64")] 1585 { 1586 let shutdown_pio_address: u16 = 0x600; 1587 1588 self.address_manager 1589 .allocator 1590 .lock() 1591 .unwrap() 1592 .allocate_io_addresses(Some(GuestAddress(shutdown_pio_address.into())), 0x8, None) 1593 .ok_or(DeviceManagerError::AllocateIoPort)?; 1594 1595 self.address_manager 1596 .io_bus 1597 .insert(shutdown_device, shutdown_pio_address.into(), 0x4) 1598 .map_err(DeviceManagerError::BusError)?; 1599 1600 self.acpi_platform_addresses.sleep_control_reg_address = 1601 Some(GenericAddress::io_port_address::<u8>(shutdown_pio_address)); 1602 self.acpi_platform_addresses.sleep_status_reg_address = 1603 Some(GenericAddress::io_port_address::<u8>(shutdown_pio_address)); 1604 self.acpi_platform_addresses.reset_reg_address = 1605 Some(GenericAddress::io_port_address::<u8>(shutdown_pio_address)); 1606 } 1607 1608 let ged_irq = self 1609 .address_manager 1610 .allocator 1611 .lock() 1612 .unwrap() 1613 .allocate_irq() 1614 .unwrap(); 1615 let interrupt_group = interrupt_manager 1616 .create_group(LegacyIrqGroupConfig { 1617 irq: ged_irq as InterruptIndex, 1618 }) 1619 .map_err(DeviceManagerError::CreateInterruptGroup)?; 1620 let ged_address = self 1621 .address_manager 1622 .allocator 1623 .lock() 1624 .unwrap() 1625 .allocate_platform_mmio_addresses( 1626 None, 1627 devices::acpi::GED_DEVICE_ACPI_SIZE as u64, 1628 None, 1629 ) 1630 .ok_or(DeviceManagerError::AllocateMmioAddress)?; 1631 let ged_device = Arc::new(Mutex::new(devices::AcpiGedDevice::new( 1632 interrupt_group, 1633 ged_irq, 1634 ged_address, 1635 ))); 1636 self.address_manager 1637 .mmio_bus 1638 .insert( 1639 ged_device.clone(), 1640 ged_address.0, 1641 devices::acpi::GED_DEVICE_ACPI_SIZE as u64, 1642 ) 1643 .map_err(DeviceManagerError::BusError)?; 1644 self.bus_devices 1645 .push(Arc::clone(&ged_device) as Arc<dyn BusDeviceSync>); 1646 1647 let pm_timer_device = Arc::new(Mutex::new(devices::AcpiPmTimerDevice::new())); 1648 1649 self.bus_devices 1650 .push(Arc::clone(&pm_timer_device) as Arc<dyn BusDeviceSync>); 1651 1652 #[cfg(target_arch = "x86_64")] 1653 { 1654 let pm_timer_pio_address: u16 = 0x608; 1655 1656 self.address_manager 1657 .allocator 1658 .lock() 1659 .unwrap() 1660 .allocate_io_addresses(Some(GuestAddress(pm_timer_pio_address.into())), 0x4, None) 1661 .ok_or(DeviceManagerError::AllocateIoPort)?; 1662 1663 self.address_manager 1664 .io_bus 1665 .insert(pm_timer_device, pm_timer_pio_address.into(), 0x4) 1666 .map_err(DeviceManagerError::BusError)?; 1667 1668 self.acpi_platform_addresses.pm_timer_address = 1669 Some(GenericAddress::io_port_address::<u32>(pm_timer_pio_address)); 1670 } 1671 1672 Ok(Some(ged_device)) 1673 } 1674 1675 #[cfg(target_arch = "x86_64")] 1676 fn add_legacy_devices(&mut self, reset_evt: EventFd) -> DeviceManagerResult<()> { 1677 let vcpus_kill_signalled = self 1678 .cpu_manager 1679 .lock() 1680 .unwrap() 1681 .vcpus_kill_signalled() 1682 .clone(); 1683 // Add a shutdown device (i8042) 1684 let i8042 = Arc::new(Mutex::new(devices::legacy::I8042Device::new( 1685 reset_evt.try_clone().unwrap(), 1686 vcpus_kill_signalled.clone(), 1687 ))); 1688 1689 self.bus_devices 1690 .push(Arc::clone(&i8042) as Arc<dyn BusDeviceSync>); 1691 1692 self.address_manager 1693 .io_bus 1694 .insert(i8042, 0x61, 0x4) 1695 .map_err(DeviceManagerError::BusError)?; 1696 { 1697 // Add a CMOS emulated device 1698 let mem_size = self 1699 .memory_manager 1700 .lock() 1701 .unwrap() 1702 .guest_memory() 1703 .memory() 1704 .last_addr() 1705 .0 1706 + 1; 1707 let mem_below_4g = std::cmp::min(arch::layout::MEM_32BIT_RESERVED_START.0, mem_size); 1708 let mem_above_4g = mem_size.saturating_sub(arch::layout::RAM_64BIT_START.0); 1709 1710 let cmos = Arc::new(Mutex::new(devices::legacy::Cmos::new( 1711 mem_below_4g, 1712 mem_above_4g, 1713 reset_evt, 1714 Some(vcpus_kill_signalled), 1715 ))); 1716 1717 self.bus_devices 1718 .push(Arc::clone(&cmos) as Arc<dyn BusDeviceSync>); 1719 1720 self.address_manager 1721 .io_bus 1722 .insert(cmos, 0x70, 0x2) 1723 .map_err(DeviceManagerError::BusError)?; 1724 1725 let fwdebug = Arc::new(Mutex::new(devices::legacy::FwDebugDevice::new())); 1726 1727 self.bus_devices 1728 .push(Arc::clone(&fwdebug) as Arc<dyn BusDeviceSync>); 1729 1730 self.address_manager 1731 .io_bus 1732 .insert(fwdebug, 0x402, 0x1) 1733 .map_err(DeviceManagerError::BusError)?; 1734 } 1735 1736 // 0x80 debug port 1737 let debug_port = Arc::new(Mutex::new(devices::legacy::DebugPort::new(self.timestamp))); 1738 self.bus_devices 1739 .push(Arc::clone(&debug_port) as Arc<dyn BusDeviceSync>); 1740 self.address_manager 1741 .io_bus 1742 .insert(debug_port, 0x80, 0x1) 1743 .map_err(DeviceManagerError::BusError)?; 1744 1745 Ok(()) 1746 } 1747 1748 #[cfg(target_arch = "aarch64")] 1749 fn add_legacy_devices( 1750 &mut self, 1751 interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>, 1752 ) -> DeviceManagerResult<()> { 1753 // Add a RTC device 1754 let rtc_irq = self 1755 .address_manager 1756 .allocator 1757 .lock() 1758 .unwrap() 1759 .allocate_irq() 1760 .unwrap(); 1761 1762 let interrupt_group = interrupt_manager 1763 .create_group(LegacyIrqGroupConfig { 1764 irq: rtc_irq as InterruptIndex, 1765 }) 1766 .map_err(DeviceManagerError::CreateInterruptGroup)?; 1767 1768 let rtc_device = Arc::new(Mutex::new(devices::legacy::Rtc::new(interrupt_group))); 1769 1770 self.bus_devices 1771 .push(Arc::clone(&rtc_device) as Arc<dyn BusDeviceSync>); 1772 1773 let addr = arch::layout::LEGACY_RTC_MAPPED_IO_START; 1774 1775 self.address_manager 1776 .mmio_bus 1777 .insert(rtc_device, addr.0, MMIO_LEN) 1778 .map_err(DeviceManagerError::BusError)?; 1779 1780 self.id_to_dev_info.insert( 1781 (DeviceType::Rtc, "rtc".to_string()), 1782 MmioDeviceInfo { 1783 addr: addr.0, 1784 len: MMIO_LEN, 1785 irq: rtc_irq, 1786 }, 1787 ); 1788 1789 // Add a GPIO device 1790 let id = String::from(GPIO_DEVICE_NAME); 1791 let gpio_irq = self 1792 .address_manager 1793 .allocator 1794 .lock() 1795 .unwrap() 1796 .allocate_irq() 1797 .unwrap(); 1798 1799 let interrupt_group = interrupt_manager 1800 .create_group(LegacyIrqGroupConfig { 1801 irq: gpio_irq as InterruptIndex, 1802 }) 1803 .map_err(DeviceManagerError::CreateInterruptGroup)?; 1804 1805 let gpio_device = Arc::new(Mutex::new(devices::legacy::Gpio::new( 1806 id.clone(), 1807 interrupt_group, 1808 state_from_id(self.snapshot.as_ref(), id.as_str()) 1809 .map_err(DeviceManagerError::RestoreGetState)?, 1810 ))); 1811 1812 self.bus_devices 1813 .push(Arc::clone(&gpio_device) as Arc<dyn BusDeviceSync>); 1814 1815 let addr = arch::layout::LEGACY_GPIO_MAPPED_IO_START; 1816 1817 self.address_manager 1818 .mmio_bus 1819 .insert(gpio_device.clone(), addr.0, MMIO_LEN) 1820 .map_err(DeviceManagerError::BusError)?; 1821 1822 self.gpio_device = Some(gpio_device.clone()); 1823 1824 self.id_to_dev_info.insert( 1825 (DeviceType::Gpio, "gpio".to_string()), 1826 MmioDeviceInfo { 1827 addr: addr.0, 1828 len: MMIO_LEN, 1829 irq: gpio_irq, 1830 }, 1831 ); 1832 1833 self.device_tree 1834 .lock() 1835 .unwrap() 1836 .insert(id.clone(), device_node!(id, gpio_device)); 1837 1838 Ok(()) 1839 } 1840 1841 #[cfg(target_arch = "x86_64")] 1842 fn add_debug_console_device( 1843 &mut self, 1844 debug_console_writer: Box<dyn io::Write + Send>, 1845 ) -> DeviceManagerResult<Arc<Mutex<DebugConsole>>> { 1846 let id = String::from(DEBUGCON_DEVICE_NAME); 1847 let debug_console = Arc::new(Mutex::new(DebugConsole::new( 1848 id.clone(), 1849 debug_console_writer, 1850 ))); 1851 1852 let port = self 1853 .config 1854 .lock() 1855 .unwrap() 1856 .debug_console 1857 .clone() 1858 .iobase 1859 .map(|port| port as u64) 1860 .unwrap_or(debug_console::DEFAULT_PORT); 1861 1862 self.bus_devices 1863 .push(Arc::clone(&debug_console) as Arc<dyn BusDeviceSync>); 1864 1865 self.address_manager 1866 .allocator 1867 .lock() 1868 .unwrap() 1869 .allocate_io_addresses(Some(GuestAddress(port)), 0x1, None) 1870 .ok_or(DeviceManagerError::AllocateIoPort)?; 1871 1872 self.address_manager 1873 .io_bus 1874 .insert(debug_console.clone(), port, 0x1) 1875 .map_err(DeviceManagerError::BusError)?; 1876 1877 // Fill the device tree with a new node. In case of restore, we 1878 // know there is nothing to do, so we can simply override the 1879 // existing entry. 1880 self.device_tree 1881 .lock() 1882 .unwrap() 1883 .insert(id.clone(), device_node!(id, debug_console)); 1884 1885 Ok(debug_console) 1886 } 1887 1888 #[cfg(target_arch = "x86_64")] 1889 fn add_serial_device( 1890 &mut self, 1891 interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>, 1892 serial_writer: Option<Box<dyn io::Write + Send>>, 1893 ) -> DeviceManagerResult<Arc<Mutex<Serial>>> { 1894 // Serial is tied to IRQ #4 1895 let serial_irq = 4; 1896 1897 let id = String::from(SERIAL_DEVICE_NAME); 1898 1899 let interrupt_group = interrupt_manager 1900 .create_group(LegacyIrqGroupConfig { 1901 irq: serial_irq as InterruptIndex, 1902 }) 1903 .map_err(DeviceManagerError::CreateInterruptGroup)?; 1904 1905 let serial = Arc::new(Mutex::new(Serial::new( 1906 id.clone(), 1907 interrupt_group, 1908 serial_writer, 1909 state_from_id(self.snapshot.as_ref(), id.as_str()) 1910 .map_err(DeviceManagerError::RestoreGetState)?, 1911 ))); 1912 1913 self.bus_devices 1914 .push(Arc::clone(&serial) as Arc<dyn BusDeviceSync>); 1915 1916 self.address_manager 1917 .allocator 1918 .lock() 1919 .unwrap() 1920 .allocate_io_addresses(Some(GuestAddress(0x3f8)), 0x8, None) 1921 .ok_or(DeviceManagerError::AllocateIoPort)?; 1922 1923 self.address_manager 1924 .io_bus 1925 .insert(serial.clone(), 0x3f8, 0x8) 1926 .map_err(DeviceManagerError::BusError)?; 1927 1928 // Fill the device tree with a new node. In case of restore, we 1929 // know there is nothing to do, so we can simply override the 1930 // existing entry. 1931 self.device_tree 1932 .lock() 1933 .unwrap() 1934 .insert(id.clone(), device_node!(id, serial)); 1935 1936 Ok(serial) 1937 } 1938 1939 #[cfg(target_arch = "aarch64")] 1940 fn add_serial_device( 1941 &mut self, 1942 interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>, 1943 serial_writer: Option<Box<dyn io::Write + Send>>, 1944 ) -> DeviceManagerResult<Arc<Mutex<Pl011>>> { 1945 let id = String::from(SERIAL_DEVICE_NAME); 1946 1947 let serial_irq = self 1948 .address_manager 1949 .allocator 1950 .lock() 1951 .unwrap() 1952 .allocate_irq() 1953 .unwrap(); 1954 1955 let interrupt_group = interrupt_manager 1956 .create_group(LegacyIrqGroupConfig { 1957 irq: serial_irq as InterruptIndex, 1958 }) 1959 .map_err(DeviceManagerError::CreateInterruptGroup)?; 1960 1961 let serial = Arc::new(Mutex::new(devices::legacy::Pl011::new( 1962 id.clone(), 1963 interrupt_group, 1964 serial_writer, 1965 self.timestamp, 1966 state_from_id(self.snapshot.as_ref(), id.as_str()) 1967 .map_err(DeviceManagerError::RestoreGetState)?, 1968 ))); 1969 1970 self.bus_devices 1971 .push(Arc::clone(&serial) as Arc<dyn BusDeviceSync>); 1972 1973 let addr = arch::layout::LEGACY_SERIAL_MAPPED_IO_START; 1974 1975 self.address_manager 1976 .mmio_bus 1977 .insert(serial.clone(), addr.0, MMIO_LEN) 1978 .map_err(DeviceManagerError::BusError)?; 1979 1980 self.id_to_dev_info.insert( 1981 (DeviceType::Serial, DeviceType::Serial.to_string()), 1982 MmioDeviceInfo { 1983 addr: addr.0, 1984 len: MMIO_LEN, 1985 irq: serial_irq, 1986 }, 1987 ); 1988 1989 self.cmdline_additions 1990 .push(format!("earlycon=pl011,mmio,0x{:08x}", addr.0)); 1991 1992 // Fill the device tree with a new node. In case of restore, we 1993 // know there is nothing to do, so we can simply override the 1994 // existing entry. 1995 self.device_tree 1996 .lock() 1997 .unwrap() 1998 .insert(id.clone(), device_node!(id, serial)); 1999 2000 Ok(serial) 2001 } 2002 2003 fn add_virtio_console_device( 2004 &mut self, 2005 virtio_devices: &mut Vec<MetaVirtioDevice>, 2006 console_fd: ConsoleOutput, 2007 resize_pipe: Option<Arc<File>>, 2008 ) -> DeviceManagerResult<Option<Arc<virtio_devices::ConsoleResizer>>> { 2009 let console_config = self.config.lock().unwrap().console.clone(); 2010 let endpoint = match console_fd { 2011 ConsoleOutput::File(file) => Endpoint::File(file), 2012 ConsoleOutput::Pty(file) => { 2013 self.console_resize_pipe = resize_pipe; 2014 Endpoint::PtyPair(Arc::new(file.try_clone().unwrap()), file) 2015 } 2016 ConsoleOutput::Tty(stdout) => { 2017 if stdout.is_terminal() { 2018 self.console_resize_pipe = resize_pipe; 2019 } 2020 2021 // If an interactive TTY then we can accept input 2022 // SAFETY: FFI call. Trivially safe. 2023 if unsafe { libc::isatty(libc::STDIN_FILENO) == 1 } { 2024 // SAFETY: FFI call to dup. Trivially safe. 2025 let stdin = unsafe { libc::dup(libc::STDIN_FILENO) }; 2026 if stdin == -1 { 2027 return vmm_sys_util::errno::errno_result() 2028 .map_err(DeviceManagerError::DupFd); 2029 } 2030 // SAFETY: stdin is valid and owned solely by us. 2031 let stdin = unsafe { File::from_raw_fd(stdin) }; 2032 Endpoint::FilePair(stdout, Arc::new(stdin)) 2033 } else { 2034 Endpoint::File(stdout) 2035 } 2036 } 2037 ConsoleOutput::Socket(_) => { 2038 return Err(DeviceManagerError::NoSocketOptionSupportForConsoleDevice); 2039 } 2040 ConsoleOutput::Null => Endpoint::Null, 2041 ConsoleOutput::Off => return Ok(None), 2042 }; 2043 let id = String::from(CONSOLE_DEVICE_NAME); 2044 2045 let (virtio_console_device, console_resizer) = virtio_devices::Console::new( 2046 id.clone(), 2047 endpoint, 2048 self.console_resize_pipe 2049 .as_ref() 2050 .map(|p| p.try_clone().unwrap()), 2051 self.force_iommu | console_config.iommu, 2052 self.seccomp_action.clone(), 2053 self.exit_evt 2054 .try_clone() 2055 .map_err(DeviceManagerError::EventFd)?, 2056 state_from_id(self.snapshot.as_ref(), id.as_str()) 2057 .map_err(DeviceManagerError::RestoreGetState)?, 2058 ) 2059 .map_err(DeviceManagerError::CreateVirtioConsole)?; 2060 let virtio_console_device = Arc::new(Mutex::new(virtio_console_device)); 2061 virtio_devices.push(MetaVirtioDevice { 2062 virtio_device: Arc::clone(&virtio_console_device) 2063 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2064 iommu: console_config.iommu, 2065 id: id.clone(), 2066 pci_segment: 0, 2067 dma_handler: None, 2068 }); 2069 2070 // Fill the device tree with a new node. In case of restore, we 2071 // know there is nothing to do, so we can simply override the 2072 // existing entry. 2073 self.device_tree 2074 .lock() 2075 .unwrap() 2076 .insert(id.clone(), device_node!(id, virtio_console_device)); 2077 2078 // Only provide a resizer (for SIGWINCH handling) if the console is attached to the TTY 2079 Ok(if matches!(console_config.mode, ConsoleOutputMode::Tty) { 2080 Some(console_resizer) 2081 } else { 2082 None 2083 }) 2084 } 2085 2086 /// Adds all devices that behave like a console with respect to the VM 2087 /// configuration. This includes: 2088 /// - debug-console 2089 /// - serial-console 2090 /// - virtio-console 2091 fn add_console_devices( 2092 &mut self, 2093 interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>, 2094 virtio_devices: &mut Vec<MetaVirtioDevice>, 2095 console_info: Option<ConsoleInfo>, 2096 console_resize_pipe: Option<Arc<File>>, 2097 ) -> DeviceManagerResult<Arc<Console>> { 2098 let serial_config = self.config.lock().unwrap().serial.clone(); 2099 if console_info.is_none() { 2100 return Err(DeviceManagerError::InvalidConsoleInfo); 2101 } 2102 2103 // SAFETY: console_info is Some, so it's safe to unwrap. 2104 let console_info = console_info.unwrap(); 2105 2106 let serial_writer: Option<Box<dyn io::Write + Send>> = match console_info.serial_main_fd { 2107 ConsoleOutput::File(ref file) | ConsoleOutput::Tty(ref file) => { 2108 Some(Box::new(Arc::clone(file))) 2109 } 2110 ConsoleOutput::Off 2111 | ConsoleOutput::Null 2112 | ConsoleOutput::Pty(_) 2113 | ConsoleOutput::Socket(_) => None, 2114 }; 2115 2116 if !matches!(console_info.serial_main_fd, ConsoleOutput::Off) { 2117 let serial = self.add_serial_device(interrupt_manager, serial_writer)?; 2118 self.serial_manager = match console_info.serial_main_fd { 2119 ConsoleOutput::Pty(_) | ConsoleOutput::Tty(_) | ConsoleOutput::Socket(_) => { 2120 let serial_manager = SerialManager::new( 2121 serial, 2122 console_info.serial_main_fd, 2123 serial_config.socket, 2124 ) 2125 .map_err(DeviceManagerError::CreateSerialManager)?; 2126 if let Some(mut serial_manager) = serial_manager { 2127 serial_manager 2128 .start_thread( 2129 self.exit_evt 2130 .try_clone() 2131 .map_err(DeviceManagerError::EventFd)?, 2132 ) 2133 .map_err(DeviceManagerError::SpawnSerialManager)?; 2134 Some(Arc::new(serial_manager)) 2135 } else { 2136 None 2137 } 2138 } 2139 _ => None, 2140 }; 2141 } 2142 2143 #[cfg(target_arch = "x86_64")] 2144 { 2145 let debug_console_writer: Option<Box<dyn io::Write + Send>> = 2146 match console_info.debug_main_fd { 2147 ConsoleOutput::File(file) | ConsoleOutput::Tty(file) => Some(Box::new(file)), 2148 ConsoleOutput::Off 2149 | ConsoleOutput::Null 2150 | ConsoleOutput::Pty(_) 2151 | ConsoleOutput::Socket(_) => None, 2152 }; 2153 if let Some(writer) = debug_console_writer { 2154 let _ = self.add_debug_console_device(writer)?; 2155 } 2156 } 2157 2158 let console_resizer = self.add_virtio_console_device( 2159 virtio_devices, 2160 console_info.console_main_fd, 2161 console_resize_pipe, 2162 )?; 2163 2164 Ok(Arc::new(Console { console_resizer })) 2165 } 2166 2167 fn add_tpm_device( 2168 &mut self, 2169 tpm_path: PathBuf, 2170 ) -> DeviceManagerResult<Arc<Mutex<devices::tpm::Tpm>>> { 2171 // Create TPM Device 2172 let tpm = devices::tpm::Tpm::new(tpm_path.to_str().unwrap().to_string()).map_err(|e| { 2173 DeviceManagerError::CreateTpmDevice(anyhow!("Failed to create TPM Device : {:?}", e)) 2174 })?; 2175 let tpm = Arc::new(Mutex::new(tpm)); 2176 2177 // Add TPM Device to mmio 2178 self.address_manager 2179 .mmio_bus 2180 .insert( 2181 tpm.clone(), 2182 arch::layout::TPM_START.0, 2183 arch::layout::TPM_SIZE, 2184 ) 2185 .map_err(DeviceManagerError::BusError)?; 2186 2187 Ok(tpm) 2188 } 2189 2190 fn make_virtio_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2191 let mut devices: Vec<MetaVirtioDevice> = Vec::new(); 2192 2193 // Create "standard" virtio devices (net/block/rng) 2194 devices.append(&mut self.make_virtio_block_devices()?); 2195 devices.append(&mut self.make_virtio_net_devices()?); 2196 devices.append(&mut self.make_virtio_rng_devices()?); 2197 2198 // Add virtio-fs if required 2199 devices.append(&mut self.make_virtio_fs_devices()?); 2200 2201 // Add virtio-pmem if required 2202 devices.append(&mut self.make_virtio_pmem_devices()?); 2203 2204 // Add virtio-vsock if required 2205 devices.append(&mut self.make_virtio_vsock_devices()?); 2206 2207 devices.append(&mut self.make_virtio_mem_devices()?); 2208 2209 // Add virtio-balloon if required 2210 devices.append(&mut self.make_virtio_balloon_devices()?); 2211 2212 // Add virtio-watchdog device 2213 devices.append(&mut self.make_virtio_watchdog_devices()?); 2214 2215 // Add vDPA devices if required 2216 devices.append(&mut self.make_vdpa_devices()?); 2217 2218 Ok(devices) 2219 } 2220 2221 // Cache whether aio is supported to avoid checking for very block device 2222 fn aio_is_supported(&mut self) -> bool { 2223 if let Some(supported) = self.aio_supported { 2224 return supported; 2225 } 2226 2227 let supported = block_aio_is_supported(); 2228 self.aio_supported = Some(supported); 2229 supported 2230 } 2231 2232 // Cache whether io_uring is supported to avoid probing for very block device 2233 fn io_uring_is_supported(&mut self) -> bool { 2234 if let Some(supported) = self.io_uring_supported { 2235 return supported; 2236 } 2237 2238 let supported = block_io_uring_is_supported(); 2239 self.io_uring_supported = Some(supported); 2240 supported 2241 } 2242 2243 fn make_virtio_block_device( 2244 &mut self, 2245 disk_cfg: &mut DiskConfig, 2246 ) -> DeviceManagerResult<MetaVirtioDevice> { 2247 let id = if let Some(id) = &disk_cfg.id { 2248 id.clone() 2249 } else { 2250 let id = self.next_device_name(DISK_DEVICE_NAME_PREFIX)?; 2251 disk_cfg.id = Some(id.clone()); 2252 id 2253 }; 2254 2255 info!("Creating virtio-block device: {:?}", disk_cfg); 2256 2257 let (virtio_device, migratable_device) = if disk_cfg.vhost_user { 2258 let socket = disk_cfg.vhost_socket.as_ref().unwrap().clone(); 2259 let vu_cfg = VhostUserConfig { 2260 socket, 2261 num_queues: disk_cfg.num_queues, 2262 queue_size: disk_cfg.queue_size, 2263 }; 2264 let vhost_user_block = Arc::new(Mutex::new( 2265 match virtio_devices::vhost_user::Blk::new( 2266 id.clone(), 2267 vu_cfg, 2268 self.seccomp_action.clone(), 2269 self.exit_evt 2270 .try_clone() 2271 .map_err(DeviceManagerError::EventFd)?, 2272 self.force_iommu, 2273 state_from_id(self.snapshot.as_ref(), id.as_str()) 2274 .map_err(DeviceManagerError::RestoreGetState)?, 2275 ) { 2276 Ok(vub_device) => vub_device, 2277 Err(e) => { 2278 return Err(DeviceManagerError::CreateVhostUserBlk(e)); 2279 } 2280 }, 2281 )); 2282 2283 ( 2284 Arc::clone(&vhost_user_block) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2285 vhost_user_block as Arc<Mutex<dyn Migratable>>, 2286 ) 2287 } else { 2288 let mut options = OpenOptions::new(); 2289 options.read(true); 2290 options.write(!disk_cfg.readonly); 2291 if disk_cfg.direct { 2292 options.custom_flags(libc::O_DIRECT); 2293 } 2294 // Open block device path 2295 let mut file: File = options 2296 .open( 2297 disk_cfg 2298 .path 2299 .as_ref() 2300 .ok_or(DeviceManagerError::NoDiskPath)? 2301 .clone(), 2302 ) 2303 .map_err(DeviceManagerError::Disk)?; 2304 let image_type = 2305 detect_image_type(&mut file).map_err(DeviceManagerError::DetectImageType)?; 2306 2307 let image = match image_type { 2308 ImageType::FixedVhd => { 2309 // Use asynchronous backend relying on io_uring if the 2310 // syscalls are supported. 2311 if cfg!(feature = "io_uring") 2312 && !disk_cfg.disable_io_uring 2313 && self.io_uring_is_supported() 2314 { 2315 info!("Using asynchronous fixed VHD disk file (io_uring)"); 2316 2317 #[cfg(not(feature = "io_uring"))] 2318 unreachable!("Checked in if statement above"); 2319 #[cfg(feature = "io_uring")] 2320 { 2321 Box::new( 2322 FixedVhdDiskAsync::new(file) 2323 .map_err(DeviceManagerError::CreateFixedVhdDiskAsync)?, 2324 ) as Box<dyn DiskFile> 2325 } 2326 } else { 2327 info!("Using synchronous fixed VHD disk file"); 2328 Box::new( 2329 FixedVhdDiskSync::new(file) 2330 .map_err(DeviceManagerError::CreateFixedVhdDiskSync)?, 2331 ) as Box<dyn DiskFile> 2332 } 2333 } 2334 ImageType::Raw => { 2335 // Use asynchronous backend relying on io_uring if the 2336 // syscalls are supported. 2337 if cfg!(feature = "io_uring") 2338 && !disk_cfg.disable_io_uring 2339 && self.io_uring_is_supported() 2340 { 2341 info!("Using asynchronous RAW disk file (io_uring)"); 2342 2343 #[cfg(not(feature = "io_uring"))] 2344 unreachable!("Checked in if statement above"); 2345 #[cfg(feature = "io_uring")] 2346 { 2347 Box::new(RawFileDisk::new(file)) as Box<dyn DiskFile> 2348 } 2349 } else if !disk_cfg.disable_aio && self.aio_is_supported() { 2350 info!("Using asynchronous RAW disk file (aio)"); 2351 Box::new(RawFileDiskAio::new(file)) as Box<dyn DiskFile> 2352 } else { 2353 info!("Using synchronous RAW disk file"); 2354 Box::new(RawFileDiskSync::new(file)) as Box<dyn DiskFile> 2355 } 2356 } 2357 ImageType::Qcow2 => { 2358 info!("Using synchronous QCOW disk file"); 2359 Box::new( 2360 QcowDiskSync::new(file, disk_cfg.direct) 2361 .map_err(DeviceManagerError::CreateQcowDiskSync)?, 2362 ) as Box<dyn DiskFile> 2363 } 2364 ImageType::Vhdx => { 2365 info!("Using synchronous VHDX disk file"); 2366 Box::new( 2367 VhdxDiskSync::new(file) 2368 .map_err(DeviceManagerError::CreateFixedVhdxDiskSync)?, 2369 ) as Box<dyn DiskFile> 2370 } 2371 }; 2372 2373 let rate_limit_group = 2374 if let Some(rate_limiter_cfg) = disk_cfg.rate_limiter_config.as_ref() { 2375 // Create an anonymous RateLimiterGroup that is dropped when the Disk 2376 // is dropped. 2377 let bw = rate_limiter_cfg.bandwidth.unwrap_or_default(); 2378 let ops = rate_limiter_cfg.ops.unwrap_or_default(); 2379 let mut rate_limit_group = RateLimiterGroup::new( 2380 disk_cfg.id.as_ref().unwrap(), 2381 bw.size, 2382 bw.one_time_burst.unwrap_or(0), 2383 bw.refill_time, 2384 ops.size, 2385 ops.one_time_burst.unwrap_or(0), 2386 ops.refill_time, 2387 ) 2388 .map_err(DeviceManagerError::RateLimiterGroupCreate)?; 2389 2390 rate_limit_group 2391 .start_thread( 2392 self.exit_evt 2393 .try_clone() 2394 .map_err(DeviceManagerError::EventFd)?, 2395 ) 2396 .unwrap(); 2397 2398 Some(Arc::new(rate_limit_group)) 2399 } else if let Some(rate_limit_group) = disk_cfg.rate_limit_group.as_ref() { 2400 self.rate_limit_groups.get(rate_limit_group).cloned() 2401 } else { 2402 None 2403 }; 2404 2405 let queue_affinity = if let Some(queue_affinity) = disk_cfg.queue_affinity.as_ref() { 2406 queue_affinity 2407 .iter() 2408 .map(|a| (a.queue_index, a.host_cpus.clone())) 2409 .collect() 2410 } else { 2411 BTreeMap::new() 2412 }; 2413 2414 let virtio_block = Arc::new(Mutex::new( 2415 virtio_devices::Block::new( 2416 id.clone(), 2417 image, 2418 disk_cfg 2419 .path 2420 .as_ref() 2421 .ok_or(DeviceManagerError::NoDiskPath)? 2422 .clone(), 2423 disk_cfg.readonly, 2424 self.force_iommu | disk_cfg.iommu, 2425 disk_cfg.num_queues, 2426 disk_cfg.queue_size, 2427 disk_cfg.serial.clone(), 2428 self.seccomp_action.clone(), 2429 rate_limit_group, 2430 self.exit_evt 2431 .try_clone() 2432 .map_err(DeviceManagerError::EventFd)?, 2433 state_from_id(self.snapshot.as_ref(), id.as_str()) 2434 .map_err(DeviceManagerError::RestoreGetState)?, 2435 queue_affinity, 2436 ) 2437 .map_err(DeviceManagerError::CreateVirtioBlock)?, 2438 )); 2439 2440 ( 2441 Arc::clone(&virtio_block) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2442 virtio_block as Arc<Mutex<dyn Migratable>>, 2443 ) 2444 }; 2445 2446 // Fill the device tree with a new node. In case of restore, we 2447 // know there is nothing to do, so we can simply override the 2448 // existing entry. 2449 self.device_tree 2450 .lock() 2451 .unwrap() 2452 .insert(id.clone(), device_node!(id, migratable_device)); 2453 2454 Ok(MetaVirtioDevice { 2455 virtio_device, 2456 iommu: disk_cfg.iommu, 2457 id, 2458 pci_segment: disk_cfg.pci_segment, 2459 dma_handler: None, 2460 }) 2461 } 2462 2463 fn make_virtio_block_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2464 let mut devices = Vec::new(); 2465 2466 let mut block_devices = self.config.lock().unwrap().disks.clone(); 2467 if let Some(disk_list_cfg) = &mut block_devices { 2468 for disk_cfg in disk_list_cfg.iter_mut() { 2469 devices.push(self.make_virtio_block_device(disk_cfg)?); 2470 } 2471 } 2472 self.config.lock().unwrap().disks = block_devices; 2473 2474 Ok(devices) 2475 } 2476 2477 fn make_virtio_net_device( 2478 &mut self, 2479 net_cfg: &mut NetConfig, 2480 ) -> DeviceManagerResult<MetaVirtioDevice> { 2481 let id = if let Some(id) = &net_cfg.id { 2482 id.clone() 2483 } else { 2484 let id = self.next_device_name(NET_DEVICE_NAME_PREFIX)?; 2485 net_cfg.id = Some(id.clone()); 2486 id 2487 }; 2488 info!("Creating virtio-net device: {:?}", net_cfg); 2489 2490 let (virtio_device, migratable_device) = if net_cfg.vhost_user { 2491 let socket = net_cfg.vhost_socket.as_ref().unwrap().clone(); 2492 let vu_cfg = VhostUserConfig { 2493 socket, 2494 num_queues: net_cfg.num_queues, 2495 queue_size: net_cfg.queue_size, 2496 }; 2497 let server = match net_cfg.vhost_mode { 2498 VhostMode::Client => false, 2499 VhostMode::Server => true, 2500 }; 2501 let vhost_user_net = Arc::new(Mutex::new( 2502 match virtio_devices::vhost_user::Net::new( 2503 id.clone(), 2504 net_cfg.mac, 2505 net_cfg.mtu, 2506 vu_cfg, 2507 server, 2508 self.seccomp_action.clone(), 2509 self.exit_evt 2510 .try_clone() 2511 .map_err(DeviceManagerError::EventFd)?, 2512 self.force_iommu, 2513 state_from_id(self.snapshot.as_ref(), id.as_str()) 2514 .map_err(DeviceManagerError::RestoreGetState)?, 2515 net_cfg.offload_tso, 2516 net_cfg.offload_ufo, 2517 net_cfg.offload_csum, 2518 ) { 2519 Ok(vun_device) => vun_device, 2520 Err(e) => { 2521 return Err(DeviceManagerError::CreateVhostUserNet(e)); 2522 } 2523 }, 2524 )); 2525 2526 ( 2527 Arc::clone(&vhost_user_net) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2528 vhost_user_net as Arc<Mutex<dyn Migratable>>, 2529 ) 2530 } else { 2531 let state = state_from_id(self.snapshot.as_ref(), id.as_str()) 2532 .map_err(DeviceManagerError::RestoreGetState)?; 2533 let virtio_net = if let Some(ref tap_if_name) = net_cfg.tap { 2534 Arc::new(Mutex::new( 2535 virtio_devices::Net::new( 2536 id.clone(), 2537 Some(tap_if_name), 2538 Some(net_cfg.ip), 2539 Some(net_cfg.mask), 2540 Some(net_cfg.mac), 2541 &mut net_cfg.host_mac, 2542 net_cfg.mtu, 2543 self.force_iommu | net_cfg.iommu, 2544 net_cfg.num_queues, 2545 net_cfg.queue_size, 2546 self.seccomp_action.clone(), 2547 net_cfg.rate_limiter_config, 2548 self.exit_evt 2549 .try_clone() 2550 .map_err(DeviceManagerError::EventFd)?, 2551 state, 2552 net_cfg.offload_tso, 2553 net_cfg.offload_ufo, 2554 net_cfg.offload_csum, 2555 ) 2556 .map_err(DeviceManagerError::CreateVirtioNet)?, 2557 )) 2558 } else if let Some(fds) = &net_cfg.fds { 2559 let net = virtio_devices::Net::from_tap_fds( 2560 id.clone(), 2561 fds, 2562 Some(net_cfg.mac), 2563 net_cfg.mtu, 2564 self.force_iommu | net_cfg.iommu, 2565 net_cfg.queue_size, 2566 self.seccomp_action.clone(), 2567 net_cfg.rate_limiter_config, 2568 self.exit_evt 2569 .try_clone() 2570 .map_err(DeviceManagerError::EventFd)?, 2571 state, 2572 net_cfg.offload_tso, 2573 net_cfg.offload_ufo, 2574 net_cfg.offload_csum, 2575 ) 2576 .map_err(DeviceManagerError::CreateVirtioNet)?; 2577 2578 // SAFETY: 'fds' are valid because TAP devices are created successfully 2579 unsafe { 2580 self.config.lock().unwrap().add_preserved_fds(fds.clone()); 2581 } 2582 2583 Arc::new(Mutex::new(net)) 2584 } else { 2585 Arc::new(Mutex::new( 2586 virtio_devices::Net::new( 2587 id.clone(), 2588 None, 2589 Some(net_cfg.ip), 2590 Some(net_cfg.mask), 2591 Some(net_cfg.mac), 2592 &mut net_cfg.host_mac, 2593 net_cfg.mtu, 2594 self.force_iommu | net_cfg.iommu, 2595 net_cfg.num_queues, 2596 net_cfg.queue_size, 2597 self.seccomp_action.clone(), 2598 net_cfg.rate_limiter_config, 2599 self.exit_evt 2600 .try_clone() 2601 .map_err(DeviceManagerError::EventFd)?, 2602 state, 2603 net_cfg.offload_tso, 2604 net_cfg.offload_ufo, 2605 net_cfg.offload_csum, 2606 ) 2607 .map_err(DeviceManagerError::CreateVirtioNet)?, 2608 )) 2609 }; 2610 2611 ( 2612 Arc::clone(&virtio_net) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2613 virtio_net as Arc<Mutex<dyn Migratable>>, 2614 ) 2615 }; 2616 2617 // Fill the device tree with a new node. In case of restore, we 2618 // know there is nothing to do, so we can simply override the 2619 // existing entry. 2620 self.device_tree 2621 .lock() 2622 .unwrap() 2623 .insert(id.clone(), device_node!(id, migratable_device)); 2624 2625 Ok(MetaVirtioDevice { 2626 virtio_device, 2627 iommu: net_cfg.iommu, 2628 id, 2629 pci_segment: net_cfg.pci_segment, 2630 dma_handler: None, 2631 }) 2632 } 2633 2634 /// Add virto-net and vhost-user-net devices 2635 fn make_virtio_net_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2636 let mut devices = Vec::new(); 2637 let mut net_devices = self.config.lock().unwrap().net.clone(); 2638 if let Some(net_list_cfg) = &mut net_devices { 2639 for net_cfg in net_list_cfg.iter_mut() { 2640 devices.push(self.make_virtio_net_device(net_cfg)?); 2641 } 2642 } 2643 self.config.lock().unwrap().net = net_devices; 2644 2645 Ok(devices) 2646 } 2647 2648 fn make_virtio_rng_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2649 let mut devices = Vec::new(); 2650 2651 // Add virtio-rng if required 2652 let rng_config = self.config.lock().unwrap().rng.clone(); 2653 if let Some(rng_path) = rng_config.src.to_str() { 2654 info!("Creating virtio-rng device: {:?}", rng_config); 2655 let id = String::from(RNG_DEVICE_NAME); 2656 2657 let virtio_rng_device = Arc::new(Mutex::new( 2658 virtio_devices::Rng::new( 2659 id.clone(), 2660 rng_path, 2661 self.force_iommu | rng_config.iommu, 2662 self.seccomp_action.clone(), 2663 self.exit_evt 2664 .try_clone() 2665 .map_err(DeviceManagerError::EventFd)?, 2666 state_from_id(self.snapshot.as_ref(), id.as_str()) 2667 .map_err(DeviceManagerError::RestoreGetState)?, 2668 ) 2669 .map_err(DeviceManagerError::CreateVirtioRng)?, 2670 )); 2671 devices.push(MetaVirtioDevice { 2672 virtio_device: Arc::clone(&virtio_rng_device) 2673 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2674 iommu: rng_config.iommu, 2675 id: id.clone(), 2676 pci_segment: 0, 2677 dma_handler: None, 2678 }); 2679 2680 // Fill the device tree with a new node. In case of restore, we 2681 // know there is nothing to do, so we can simply override the 2682 // existing entry. 2683 self.device_tree 2684 .lock() 2685 .unwrap() 2686 .insert(id.clone(), device_node!(id, virtio_rng_device)); 2687 } 2688 2689 Ok(devices) 2690 } 2691 2692 fn make_virtio_fs_device( 2693 &mut self, 2694 fs_cfg: &mut FsConfig, 2695 ) -> DeviceManagerResult<MetaVirtioDevice> { 2696 let id = if let Some(id) = &fs_cfg.id { 2697 id.clone() 2698 } else { 2699 let id = self.next_device_name(FS_DEVICE_NAME_PREFIX)?; 2700 fs_cfg.id = Some(id.clone()); 2701 id 2702 }; 2703 2704 info!("Creating virtio-fs device: {:?}", fs_cfg); 2705 2706 let mut node = device_node!(id); 2707 2708 if let Some(fs_socket) = fs_cfg.socket.to_str() { 2709 let virtio_fs_device = Arc::new(Mutex::new( 2710 virtio_devices::vhost_user::Fs::new( 2711 id.clone(), 2712 fs_socket, 2713 &fs_cfg.tag, 2714 fs_cfg.num_queues, 2715 fs_cfg.queue_size, 2716 None, 2717 self.seccomp_action.clone(), 2718 self.exit_evt 2719 .try_clone() 2720 .map_err(DeviceManagerError::EventFd)?, 2721 self.force_iommu, 2722 state_from_id(self.snapshot.as_ref(), id.as_str()) 2723 .map_err(DeviceManagerError::RestoreGetState)?, 2724 ) 2725 .map_err(DeviceManagerError::CreateVirtioFs)?, 2726 )); 2727 2728 // Update the device tree with the migratable device. 2729 node.migratable = Some(Arc::clone(&virtio_fs_device) as Arc<Mutex<dyn Migratable>>); 2730 self.device_tree.lock().unwrap().insert(id.clone(), node); 2731 2732 Ok(MetaVirtioDevice { 2733 virtio_device: Arc::clone(&virtio_fs_device) 2734 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2735 iommu: false, 2736 id, 2737 pci_segment: fs_cfg.pci_segment, 2738 dma_handler: None, 2739 }) 2740 } else { 2741 Err(DeviceManagerError::NoVirtioFsSock) 2742 } 2743 } 2744 2745 fn make_virtio_fs_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2746 let mut devices = Vec::new(); 2747 2748 let mut fs_devices = self.config.lock().unwrap().fs.clone(); 2749 if let Some(fs_list_cfg) = &mut fs_devices { 2750 for fs_cfg in fs_list_cfg.iter_mut() { 2751 devices.push(self.make_virtio_fs_device(fs_cfg)?); 2752 } 2753 } 2754 self.config.lock().unwrap().fs = fs_devices; 2755 2756 Ok(devices) 2757 } 2758 2759 fn make_virtio_pmem_device( 2760 &mut self, 2761 pmem_cfg: &mut PmemConfig, 2762 ) -> DeviceManagerResult<MetaVirtioDevice> { 2763 let id = if let Some(id) = &pmem_cfg.id { 2764 id.clone() 2765 } else { 2766 let id = self.next_device_name(PMEM_DEVICE_NAME_PREFIX)?; 2767 pmem_cfg.id = Some(id.clone()); 2768 id 2769 }; 2770 2771 info!("Creating virtio-pmem device: {:?}", pmem_cfg); 2772 2773 let mut node = device_node!(id); 2774 2775 // Look for the id in the device tree. If it can be found, that means 2776 // the device is being restored, otherwise it's created from scratch. 2777 let region_range = if let Some(node) = self.device_tree.lock().unwrap().get(&id) { 2778 info!("Restoring virtio-pmem {} resources", id); 2779 2780 let mut region_range: Option<(u64, u64)> = None; 2781 for resource in node.resources.iter() { 2782 match resource { 2783 Resource::MmioAddressRange { base, size } => { 2784 if region_range.is_some() { 2785 return Err(DeviceManagerError::ResourceAlreadyExists); 2786 } 2787 2788 region_range = Some((*base, *size)); 2789 } 2790 _ => { 2791 error!("Unexpected resource {:?} for {}", resource, id); 2792 } 2793 } 2794 } 2795 2796 if region_range.is_none() { 2797 return Err(DeviceManagerError::MissingVirtioPmemResources); 2798 } 2799 2800 region_range 2801 } else { 2802 None 2803 }; 2804 2805 let (custom_flags, set_len) = if pmem_cfg.file.is_dir() { 2806 if pmem_cfg.size.is_none() { 2807 return Err(DeviceManagerError::PmemWithDirectorySizeMissing); 2808 } 2809 (O_TMPFILE, true) 2810 } else { 2811 (0, false) 2812 }; 2813 2814 let mut file = OpenOptions::new() 2815 .read(true) 2816 .write(!pmem_cfg.discard_writes) 2817 .custom_flags(custom_flags) 2818 .open(&pmem_cfg.file) 2819 .map_err(DeviceManagerError::PmemFileOpen)?; 2820 2821 let size = if let Some(size) = pmem_cfg.size { 2822 if set_len { 2823 file.set_len(size) 2824 .map_err(DeviceManagerError::PmemFileSetLen)?; 2825 } 2826 size 2827 } else { 2828 file.seek(SeekFrom::End(0)) 2829 .map_err(DeviceManagerError::PmemFileSetLen)? 2830 }; 2831 2832 if size % 0x20_0000 != 0 { 2833 return Err(DeviceManagerError::PmemSizeNotAligned); 2834 } 2835 2836 let (region_base, region_size) = if let Some((base, size)) = region_range { 2837 // The memory needs to be 2MiB aligned in order to support 2838 // hugepages. 2839 self.pci_segments[pmem_cfg.pci_segment as usize] 2840 .mem64_allocator 2841 .lock() 2842 .unwrap() 2843 .allocate( 2844 Some(GuestAddress(base)), 2845 size as GuestUsize, 2846 Some(0x0020_0000), 2847 ) 2848 .ok_or(DeviceManagerError::PmemRangeAllocation)?; 2849 2850 (base, size) 2851 } else { 2852 // The memory needs to be 2MiB aligned in order to support 2853 // hugepages. 2854 let base = self.pci_segments[pmem_cfg.pci_segment as usize] 2855 .mem64_allocator 2856 .lock() 2857 .unwrap() 2858 .allocate(None, size as GuestUsize, Some(0x0020_0000)) 2859 .ok_or(DeviceManagerError::PmemRangeAllocation)?; 2860 2861 (base.raw_value(), size) 2862 }; 2863 2864 let cloned_file = file.try_clone().map_err(DeviceManagerError::CloneFile)?; 2865 let mmap_region = MmapRegion::build( 2866 Some(FileOffset::new(cloned_file, 0)), 2867 region_size as usize, 2868 PROT_READ | PROT_WRITE, 2869 MAP_NORESERVE 2870 | if pmem_cfg.discard_writes { 2871 MAP_PRIVATE 2872 } else { 2873 MAP_SHARED 2874 }, 2875 ) 2876 .map_err(DeviceManagerError::NewMmapRegion)?; 2877 let host_addr: u64 = mmap_region.as_ptr() as u64; 2878 2879 let mem_slot = self 2880 .memory_manager 2881 .lock() 2882 .unwrap() 2883 .create_userspace_mapping(region_base, region_size, host_addr, false, false, false) 2884 .map_err(DeviceManagerError::MemoryManager)?; 2885 2886 let mapping = virtio_devices::UserspaceMapping { 2887 host_addr, 2888 mem_slot, 2889 addr: GuestAddress(region_base), 2890 len: region_size, 2891 mergeable: false, 2892 }; 2893 2894 let virtio_pmem_device = Arc::new(Mutex::new( 2895 virtio_devices::Pmem::new( 2896 id.clone(), 2897 file, 2898 GuestAddress(region_base), 2899 mapping, 2900 mmap_region, 2901 self.force_iommu | pmem_cfg.iommu, 2902 self.seccomp_action.clone(), 2903 self.exit_evt 2904 .try_clone() 2905 .map_err(DeviceManagerError::EventFd)?, 2906 state_from_id(self.snapshot.as_ref(), id.as_str()) 2907 .map_err(DeviceManagerError::RestoreGetState)?, 2908 ) 2909 .map_err(DeviceManagerError::CreateVirtioPmem)?, 2910 )); 2911 2912 // Update the device tree with correct resource information and with 2913 // the migratable device. 2914 node.resources.push(Resource::MmioAddressRange { 2915 base: region_base, 2916 size: region_size, 2917 }); 2918 node.migratable = Some(Arc::clone(&virtio_pmem_device) as Arc<Mutex<dyn Migratable>>); 2919 self.device_tree.lock().unwrap().insert(id.clone(), node); 2920 2921 Ok(MetaVirtioDevice { 2922 virtio_device: Arc::clone(&virtio_pmem_device) 2923 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2924 iommu: pmem_cfg.iommu, 2925 id, 2926 pci_segment: pmem_cfg.pci_segment, 2927 dma_handler: None, 2928 }) 2929 } 2930 2931 fn make_virtio_pmem_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2932 let mut devices = Vec::new(); 2933 // Add virtio-pmem if required 2934 let mut pmem_devices = self.config.lock().unwrap().pmem.clone(); 2935 if let Some(pmem_list_cfg) = &mut pmem_devices { 2936 for pmem_cfg in pmem_list_cfg.iter_mut() { 2937 devices.push(self.make_virtio_pmem_device(pmem_cfg)?); 2938 } 2939 } 2940 self.config.lock().unwrap().pmem = pmem_devices; 2941 2942 Ok(devices) 2943 } 2944 2945 fn make_virtio_vsock_device( 2946 &mut self, 2947 vsock_cfg: &mut VsockConfig, 2948 ) -> DeviceManagerResult<MetaVirtioDevice> { 2949 let id = if let Some(id) = &vsock_cfg.id { 2950 id.clone() 2951 } else { 2952 let id = self.next_device_name(VSOCK_DEVICE_NAME_PREFIX)?; 2953 vsock_cfg.id = Some(id.clone()); 2954 id 2955 }; 2956 2957 info!("Creating virtio-vsock device: {:?}", vsock_cfg); 2958 2959 let socket_path = vsock_cfg 2960 .socket 2961 .to_str() 2962 .ok_or(DeviceManagerError::CreateVsockConvertPath)?; 2963 let backend = 2964 virtio_devices::vsock::VsockUnixBackend::new(vsock_cfg.cid, socket_path.to_string()) 2965 .map_err(DeviceManagerError::CreateVsockBackend)?; 2966 2967 let vsock_device = Arc::new(Mutex::new( 2968 virtio_devices::Vsock::new( 2969 id.clone(), 2970 vsock_cfg.cid, 2971 vsock_cfg.socket.clone(), 2972 backend, 2973 self.force_iommu | vsock_cfg.iommu, 2974 self.seccomp_action.clone(), 2975 self.exit_evt 2976 .try_clone() 2977 .map_err(DeviceManagerError::EventFd)?, 2978 state_from_id(self.snapshot.as_ref(), id.as_str()) 2979 .map_err(DeviceManagerError::RestoreGetState)?, 2980 ) 2981 .map_err(DeviceManagerError::CreateVirtioVsock)?, 2982 )); 2983 2984 // Fill the device tree with a new node. In case of restore, we 2985 // know there is nothing to do, so we can simply override the 2986 // existing entry. 2987 self.device_tree 2988 .lock() 2989 .unwrap() 2990 .insert(id.clone(), device_node!(id, vsock_device)); 2991 2992 Ok(MetaVirtioDevice { 2993 virtio_device: Arc::clone(&vsock_device) 2994 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2995 iommu: vsock_cfg.iommu, 2996 id, 2997 pci_segment: vsock_cfg.pci_segment, 2998 dma_handler: None, 2999 }) 3000 } 3001 3002 fn make_virtio_vsock_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 3003 let mut devices = Vec::new(); 3004 3005 let mut vsock = self.config.lock().unwrap().vsock.clone(); 3006 if let Some(ref mut vsock_cfg) = &mut vsock { 3007 devices.push(self.make_virtio_vsock_device(vsock_cfg)?); 3008 } 3009 self.config.lock().unwrap().vsock = vsock; 3010 3011 Ok(devices) 3012 } 3013 3014 fn make_virtio_mem_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 3015 let mut devices = Vec::new(); 3016 3017 let mm = self.memory_manager.clone(); 3018 let mut mm = mm.lock().unwrap(); 3019 for (memory_zone_id, memory_zone) in mm.memory_zones_mut().iter_mut() { 3020 if let Some(virtio_mem_zone) = memory_zone.virtio_mem_zone_mut() { 3021 info!("Creating virtio-mem device: id = {}", memory_zone_id); 3022 3023 let node_id = numa_node_id_from_memory_zone_id(&self.numa_nodes, memory_zone_id) 3024 .map(|i| i as u16); 3025 3026 let virtio_mem_device = Arc::new(Mutex::new( 3027 virtio_devices::Mem::new( 3028 memory_zone_id.clone(), 3029 virtio_mem_zone.region(), 3030 self.seccomp_action.clone(), 3031 node_id, 3032 virtio_mem_zone.hotplugged_size(), 3033 virtio_mem_zone.hugepages(), 3034 self.exit_evt 3035 .try_clone() 3036 .map_err(DeviceManagerError::EventFd)?, 3037 virtio_mem_zone.blocks_state().clone(), 3038 state_from_id(self.snapshot.as_ref(), memory_zone_id.as_str()) 3039 .map_err(DeviceManagerError::RestoreGetState)?, 3040 ) 3041 .map_err(DeviceManagerError::CreateVirtioMem)?, 3042 )); 3043 3044 // Update the virtio-mem zone so that it has a handle onto the 3045 // virtio-mem device, which will be used for triggering a resize 3046 // if needed. 3047 virtio_mem_zone.set_virtio_device(Arc::clone(&virtio_mem_device)); 3048 3049 self.virtio_mem_devices.push(Arc::clone(&virtio_mem_device)); 3050 3051 devices.push(MetaVirtioDevice { 3052 virtio_device: Arc::clone(&virtio_mem_device) 3053 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 3054 iommu: false, 3055 id: memory_zone_id.clone(), 3056 pci_segment: 0, 3057 dma_handler: None, 3058 }); 3059 3060 // Fill the device tree with a new node. In case of restore, we 3061 // know there is nothing to do, so we can simply override the 3062 // existing entry. 3063 self.device_tree.lock().unwrap().insert( 3064 memory_zone_id.clone(), 3065 device_node!(memory_zone_id, virtio_mem_device), 3066 ); 3067 } 3068 } 3069 3070 Ok(devices) 3071 } 3072 3073 #[cfg(feature = "pvmemcontrol")] 3074 fn make_pvmemcontrol_device( 3075 &mut self, 3076 ) -> DeviceManagerResult<( 3077 Arc<PvmemcontrolBusDevice>, 3078 Arc<Mutex<PvmemcontrolPciDevice>>, 3079 )> { 3080 let id = String::from(PVMEMCONTROL_DEVICE_NAME); 3081 let pci_segment_id = 0x0_u16; 3082 3083 let (pci_segment_id, pci_device_bdf, resources) = 3084 self.pci_resources(&id, pci_segment_id)?; 3085 3086 info!("Creating pvmemcontrol device: id = {}", id); 3087 let (pvmemcontrol_pci_device, pvmemcontrol_bus_device) = 3088 devices::pvmemcontrol::PvmemcontrolDevice::make_device( 3089 id.clone(), 3090 self.memory_manager.lock().unwrap().guest_memory(), 3091 ); 3092 3093 let pvmemcontrol_pci_device = Arc::new(Mutex::new(pvmemcontrol_pci_device)); 3094 let pvmemcontrol_bus_device = Arc::new(pvmemcontrol_bus_device); 3095 3096 let new_resources = self.add_pci_device( 3097 pvmemcontrol_bus_device.clone(), 3098 pvmemcontrol_pci_device.clone(), 3099 pci_segment_id, 3100 pci_device_bdf, 3101 resources, 3102 )?; 3103 3104 let mut node = device_node!(id, pvmemcontrol_pci_device); 3105 3106 node.resources = new_resources; 3107 node.pci_bdf = Some(pci_device_bdf); 3108 node.pci_device_handle = None; 3109 3110 self.device_tree.lock().unwrap().insert(id, node); 3111 3112 Ok((pvmemcontrol_bus_device, pvmemcontrol_pci_device)) 3113 } 3114 3115 fn make_virtio_balloon_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 3116 let mut devices = Vec::new(); 3117 3118 if let Some(balloon_config) = &self.config.lock().unwrap().balloon { 3119 let id = String::from(BALLOON_DEVICE_NAME); 3120 info!("Creating virtio-balloon device: id = {}", id); 3121 3122 let virtio_balloon_device = Arc::new(Mutex::new( 3123 virtio_devices::Balloon::new( 3124 id.clone(), 3125 balloon_config.size, 3126 balloon_config.deflate_on_oom, 3127 balloon_config.free_page_reporting, 3128 self.seccomp_action.clone(), 3129 self.exit_evt 3130 .try_clone() 3131 .map_err(DeviceManagerError::EventFd)?, 3132 state_from_id(self.snapshot.as_ref(), id.as_str()) 3133 .map_err(DeviceManagerError::RestoreGetState)?, 3134 ) 3135 .map_err(DeviceManagerError::CreateVirtioBalloon)?, 3136 )); 3137 3138 self.balloon = Some(virtio_balloon_device.clone()); 3139 3140 devices.push(MetaVirtioDevice { 3141 virtio_device: Arc::clone(&virtio_balloon_device) 3142 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 3143 iommu: false, 3144 id: id.clone(), 3145 pci_segment: 0, 3146 dma_handler: None, 3147 }); 3148 3149 self.device_tree 3150 .lock() 3151 .unwrap() 3152 .insert(id.clone(), device_node!(id, virtio_balloon_device)); 3153 } 3154 3155 Ok(devices) 3156 } 3157 3158 fn make_virtio_watchdog_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 3159 let mut devices = Vec::new(); 3160 3161 if !self.config.lock().unwrap().watchdog { 3162 return Ok(devices); 3163 } 3164 3165 let id = String::from(WATCHDOG_DEVICE_NAME); 3166 info!("Creating virtio-watchdog device: id = {}", id); 3167 3168 let virtio_watchdog_device = Arc::new(Mutex::new( 3169 virtio_devices::Watchdog::new( 3170 id.clone(), 3171 self.reset_evt.try_clone().unwrap(), 3172 self.seccomp_action.clone(), 3173 self.exit_evt 3174 .try_clone() 3175 .map_err(DeviceManagerError::EventFd)?, 3176 state_from_id(self.snapshot.as_ref(), id.as_str()) 3177 .map_err(DeviceManagerError::RestoreGetState)?, 3178 ) 3179 .map_err(DeviceManagerError::CreateVirtioWatchdog)?, 3180 )); 3181 devices.push(MetaVirtioDevice { 3182 virtio_device: Arc::clone(&virtio_watchdog_device) 3183 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 3184 iommu: false, 3185 id: id.clone(), 3186 pci_segment: 0, 3187 dma_handler: None, 3188 }); 3189 3190 self.device_tree 3191 .lock() 3192 .unwrap() 3193 .insert(id.clone(), device_node!(id, virtio_watchdog_device)); 3194 3195 Ok(devices) 3196 } 3197 3198 fn make_vdpa_device( 3199 &mut self, 3200 vdpa_cfg: &mut VdpaConfig, 3201 ) -> DeviceManagerResult<MetaVirtioDevice> { 3202 let id = if let Some(id) = &vdpa_cfg.id { 3203 id.clone() 3204 } else { 3205 let id = self.next_device_name(VDPA_DEVICE_NAME_PREFIX)?; 3206 vdpa_cfg.id = Some(id.clone()); 3207 id 3208 }; 3209 3210 info!("Creating vDPA device: {:?}", vdpa_cfg); 3211 3212 let device_path = vdpa_cfg 3213 .path 3214 .to_str() 3215 .ok_or(DeviceManagerError::CreateVdpaConvertPath)?; 3216 3217 let vdpa_device = Arc::new(Mutex::new( 3218 virtio_devices::Vdpa::new( 3219 id.clone(), 3220 device_path, 3221 self.memory_manager.lock().unwrap().guest_memory(), 3222 vdpa_cfg.num_queues as u16, 3223 state_from_id(self.snapshot.as_ref(), id.as_str()) 3224 .map_err(DeviceManagerError::RestoreGetState)?, 3225 ) 3226 .map_err(DeviceManagerError::CreateVdpa)?, 3227 )); 3228 3229 // Create the DMA handler that is required by the vDPA device 3230 let vdpa_mapping = Arc::new(VdpaDmaMapping::new( 3231 Arc::clone(&vdpa_device), 3232 Arc::new(self.memory_manager.lock().unwrap().guest_memory()), 3233 )); 3234 3235 self.device_tree 3236 .lock() 3237 .unwrap() 3238 .insert(id.clone(), device_node!(id, vdpa_device)); 3239 3240 Ok(MetaVirtioDevice { 3241 virtio_device: vdpa_device as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 3242 iommu: vdpa_cfg.iommu, 3243 id, 3244 pci_segment: vdpa_cfg.pci_segment, 3245 dma_handler: Some(vdpa_mapping), 3246 }) 3247 } 3248 3249 fn make_vdpa_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 3250 let mut devices = Vec::new(); 3251 // Add vdpa if required 3252 let mut vdpa_devices = self.config.lock().unwrap().vdpa.clone(); 3253 if let Some(vdpa_list_cfg) = &mut vdpa_devices { 3254 for vdpa_cfg in vdpa_list_cfg.iter_mut() { 3255 devices.push(self.make_vdpa_device(vdpa_cfg)?); 3256 } 3257 } 3258 self.config.lock().unwrap().vdpa = vdpa_devices; 3259 3260 Ok(devices) 3261 } 3262 3263 fn next_device_name(&mut self, prefix: &str) -> DeviceManagerResult<String> { 3264 let start_id = self.device_id_cnt; 3265 loop { 3266 // Generate the temporary name. 3267 let name = format!("{}{}", prefix, self.device_id_cnt); 3268 // Increment the counter. 3269 self.device_id_cnt += Wrapping(1); 3270 // Check if the name is already in use. 3271 if !self.boot_id_list.contains(&name) 3272 && !self.device_tree.lock().unwrap().contains_key(&name) 3273 { 3274 return Ok(name); 3275 } 3276 3277 if self.device_id_cnt == start_id { 3278 // We went through a full loop and there's nothing else we can 3279 // do. 3280 break; 3281 } 3282 } 3283 Err(DeviceManagerError::NoAvailableDeviceName) 3284 } 3285 3286 fn add_passthrough_device( 3287 &mut self, 3288 device_cfg: &mut DeviceConfig, 3289 ) -> DeviceManagerResult<(PciBdf, String)> { 3290 // If the passthrough device has not been created yet, it is created 3291 // here and stored in the DeviceManager structure for future needs. 3292 if self.passthrough_device.is_none() { 3293 self.passthrough_device = Some( 3294 self.address_manager 3295 .vm 3296 .create_passthrough_device() 3297 .map_err(|e| DeviceManagerError::CreatePassthroughDevice(e.into()))?, 3298 ); 3299 } 3300 3301 self.add_vfio_device(device_cfg) 3302 } 3303 3304 fn create_vfio_container(&self) -> DeviceManagerResult<Arc<VfioContainer>> { 3305 let passthrough_device = self 3306 .passthrough_device 3307 .as_ref() 3308 .ok_or(DeviceManagerError::NoDevicePassthroughSupport)?; 3309 3310 let dup = passthrough_device 3311 .try_clone() 3312 .map_err(DeviceManagerError::VfioCreate)?; 3313 3314 Ok(Arc::new( 3315 VfioContainer::new(Some(Arc::new(dup))).map_err(DeviceManagerError::VfioCreate)?, 3316 )) 3317 } 3318 3319 fn add_vfio_device( 3320 &mut self, 3321 device_cfg: &mut DeviceConfig, 3322 ) -> DeviceManagerResult<(PciBdf, String)> { 3323 let vfio_name = if let Some(id) = &device_cfg.id { 3324 id.clone() 3325 } else { 3326 let id = self.next_device_name(VFIO_DEVICE_NAME_PREFIX)?; 3327 device_cfg.id = Some(id.clone()); 3328 id 3329 }; 3330 3331 let (pci_segment_id, pci_device_bdf, resources) = 3332 self.pci_resources(&vfio_name, device_cfg.pci_segment)?; 3333 3334 let mut needs_dma_mapping = false; 3335 3336 // Here we create a new VFIO container for two reasons. Either this is 3337 // the first VFIO device, meaning we need a new VFIO container, which 3338 // will be shared with other VFIO devices. Or the new VFIO device is 3339 // attached to a vIOMMU, meaning we must create a dedicated VFIO 3340 // container. In the vIOMMU use case, we can't let all devices under 3341 // the same VFIO container since we couldn't map/unmap memory for each 3342 // device. That's simply because the map/unmap operations happen at the 3343 // VFIO container level. 3344 let vfio_container = if device_cfg.iommu { 3345 let vfio_container = self.create_vfio_container()?; 3346 3347 let vfio_mapping = Arc::new(VfioDmaMapping::new( 3348 Arc::clone(&vfio_container), 3349 Arc::new(self.memory_manager.lock().unwrap().guest_memory()), 3350 Arc::clone(&self.mmio_regions), 3351 )); 3352 3353 if let Some(iommu) = &self.iommu_device { 3354 iommu 3355 .lock() 3356 .unwrap() 3357 .add_external_mapping(pci_device_bdf.into(), vfio_mapping); 3358 } else { 3359 return Err(DeviceManagerError::MissingVirtualIommu); 3360 } 3361 3362 vfio_container 3363 } else if let Some(vfio_container) = &self.vfio_container { 3364 Arc::clone(vfio_container) 3365 } else { 3366 let vfio_container = self.create_vfio_container()?; 3367 needs_dma_mapping = true; 3368 self.vfio_container = Some(Arc::clone(&vfio_container)); 3369 3370 vfio_container 3371 }; 3372 3373 let vfio_device = VfioDevice::new(&device_cfg.path, Arc::clone(&vfio_container)) 3374 .map_err(DeviceManagerError::VfioCreate)?; 3375 3376 if needs_dma_mapping { 3377 // Register DMA mapping in IOMMU. 3378 // Do not register virtio-mem regions, as they are handled directly by 3379 // virtio-mem device itself. 3380 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() { 3381 for region in zone.regions() { 3382 vfio_container 3383 .vfio_dma_map( 3384 region.start_addr().raw_value(), 3385 region.len(), 3386 region.as_ptr() as u64, 3387 ) 3388 .map_err(DeviceManagerError::VfioDmaMap)?; 3389 } 3390 } 3391 3392 let vfio_mapping = Arc::new(VfioDmaMapping::new( 3393 Arc::clone(&vfio_container), 3394 Arc::new(self.memory_manager.lock().unwrap().guest_memory()), 3395 Arc::clone(&self.mmio_regions), 3396 )); 3397 3398 for virtio_mem_device in self.virtio_mem_devices.iter() { 3399 virtio_mem_device 3400 .lock() 3401 .unwrap() 3402 .add_dma_mapping_handler( 3403 VirtioMemMappingSource::Container, 3404 vfio_mapping.clone(), 3405 ) 3406 .map_err(DeviceManagerError::AddDmaMappingHandlerVirtioMem)?; 3407 } 3408 } 3409 3410 let legacy_interrupt_group = 3411 if let Some(legacy_interrupt_manager) = &self.legacy_interrupt_manager { 3412 Some( 3413 legacy_interrupt_manager 3414 .create_group(LegacyIrqGroupConfig { 3415 irq: self.pci_segments[pci_segment_id as usize].pci_irq_slots 3416 [pci_device_bdf.device() as usize] 3417 as InterruptIndex, 3418 }) 3419 .map_err(DeviceManagerError::CreateInterruptGroup)?, 3420 ) 3421 } else { 3422 None 3423 }; 3424 3425 let memory_manager = self.memory_manager.clone(); 3426 3427 let vfio_pci_device = VfioPciDevice::new( 3428 vfio_name.clone(), 3429 &self.address_manager.vm, 3430 vfio_device, 3431 vfio_container, 3432 self.msi_interrupt_manager.clone(), 3433 legacy_interrupt_group, 3434 device_cfg.iommu, 3435 pci_device_bdf, 3436 Arc::new(move || memory_manager.lock().unwrap().allocate_memory_slot()), 3437 vm_migration::snapshot_from_id(self.snapshot.as_ref(), vfio_name.as_str()), 3438 device_cfg.x_nv_gpudirect_clique, 3439 ) 3440 .map_err(DeviceManagerError::VfioPciCreate)?; 3441 3442 let vfio_pci_device = Arc::new(Mutex::new(vfio_pci_device)); 3443 3444 let new_resources = self.add_pci_device( 3445 vfio_pci_device.clone(), 3446 vfio_pci_device.clone(), 3447 pci_segment_id, 3448 pci_device_bdf, 3449 resources, 3450 )?; 3451 3452 vfio_pci_device 3453 .lock() 3454 .unwrap() 3455 .map_mmio_regions() 3456 .map_err(DeviceManagerError::VfioMapRegion)?; 3457 3458 for mmio_region in vfio_pci_device.lock().unwrap().mmio_regions() { 3459 self.mmio_regions.lock().unwrap().push(mmio_region); 3460 } 3461 3462 let mut node = device_node!(vfio_name, vfio_pci_device); 3463 3464 // Update the device tree with correct resource information. 3465 node.resources = new_resources; 3466 node.pci_bdf = Some(pci_device_bdf); 3467 node.pci_device_handle = Some(PciDeviceHandle::Vfio(vfio_pci_device)); 3468 3469 self.device_tree 3470 .lock() 3471 .unwrap() 3472 .insert(vfio_name.clone(), node); 3473 3474 Ok((pci_device_bdf, vfio_name)) 3475 } 3476 3477 fn add_pci_device( 3478 &mut self, 3479 bus_device: Arc<dyn BusDeviceSync>, 3480 pci_device: Arc<Mutex<dyn PciDevice>>, 3481 segment_id: u16, 3482 bdf: PciBdf, 3483 resources: Option<Vec<Resource>>, 3484 ) -> DeviceManagerResult<Vec<Resource>> { 3485 let bars = pci_device 3486 .lock() 3487 .unwrap() 3488 .allocate_bars( 3489 &self.address_manager.allocator, 3490 &mut self.pci_segments[segment_id as usize] 3491 .mem32_allocator 3492 .lock() 3493 .unwrap(), 3494 &mut self.pci_segments[segment_id as usize] 3495 .mem64_allocator 3496 .lock() 3497 .unwrap(), 3498 resources, 3499 ) 3500 .map_err(DeviceManagerError::AllocateBars)?; 3501 3502 let mut pci_bus = self.pci_segments[segment_id as usize] 3503 .pci_bus 3504 .lock() 3505 .unwrap(); 3506 3507 pci_bus 3508 .add_device(bdf.device() as u32, pci_device) 3509 .map_err(DeviceManagerError::AddPciDevice)?; 3510 3511 self.bus_devices.push(Arc::clone(&bus_device)); 3512 3513 pci_bus 3514 .register_mapping( 3515 bus_device, 3516 #[cfg(target_arch = "x86_64")] 3517 self.address_manager.io_bus.as_ref(), 3518 self.address_manager.mmio_bus.as_ref(), 3519 bars.clone(), 3520 ) 3521 .map_err(DeviceManagerError::AddPciDevice)?; 3522 3523 let mut new_resources = Vec::new(); 3524 for bar in bars { 3525 new_resources.push(Resource::PciBar { 3526 index: bar.idx(), 3527 base: bar.addr(), 3528 size: bar.size(), 3529 type_: bar.region_type().into(), 3530 prefetchable: bar.prefetchable().into(), 3531 }); 3532 } 3533 3534 Ok(new_resources) 3535 } 3536 3537 fn add_vfio_devices(&mut self) -> DeviceManagerResult<Vec<PciBdf>> { 3538 let mut iommu_attached_device_ids = Vec::new(); 3539 let mut devices = self.config.lock().unwrap().devices.clone(); 3540 3541 if let Some(device_list_cfg) = &mut devices { 3542 for device_cfg in device_list_cfg.iter_mut() { 3543 let (device_id, _) = self.add_passthrough_device(device_cfg)?; 3544 if device_cfg.iommu && self.iommu_device.is_some() { 3545 iommu_attached_device_ids.push(device_id); 3546 } 3547 } 3548 } 3549 3550 // Update the list of devices 3551 self.config.lock().unwrap().devices = devices; 3552 3553 Ok(iommu_attached_device_ids) 3554 } 3555 3556 fn add_vfio_user_device( 3557 &mut self, 3558 device_cfg: &mut UserDeviceConfig, 3559 ) -> DeviceManagerResult<(PciBdf, String)> { 3560 let vfio_user_name = if let Some(id) = &device_cfg.id { 3561 id.clone() 3562 } else { 3563 let id = self.next_device_name(VFIO_USER_DEVICE_NAME_PREFIX)?; 3564 device_cfg.id = Some(id.clone()); 3565 id 3566 }; 3567 3568 let (pci_segment_id, pci_device_bdf, resources) = 3569 self.pci_resources(&vfio_user_name, device_cfg.pci_segment)?; 3570 3571 let legacy_interrupt_group = 3572 if let Some(legacy_interrupt_manager) = &self.legacy_interrupt_manager { 3573 Some( 3574 legacy_interrupt_manager 3575 .create_group(LegacyIrqGroupConfig { 3576 irq: self.pci_segments[pci_segment_id as usize].pci_irq_slots 3577 [pci_device_bdf.device() as usize] 3578 as InterruptIndex, 3579 }) 3580 .map_err(DeviceManagerError::CreateInterruptGroup)?, 3581 ) 3582 } else { 3583 None 3584 }; 3585 3586 let client = Arc::new(Mutex::new( 3587 vfio_user::Client::new(&device_cfg.socket) 3588 .map_err(DeviceManagerError::VfioUserCreateClient)?, 3589 )); 3590 3591 let memory_manager = self.memory_manager.clone(); 3592 3593 let mut vfio_user_pci_device = VfioUserPciDevice::new( 3594 vfio_user_name.clone(), 3595 &self.address_manager.vm, 3596 client.clone(), 3597 self.msi_interrupt_manager.clone(), 3598 legacy_interrupt_group, 3599 pci_device_bdf, 3600 Arc::new(move || memory_manager.lock().unwrap().allocate_memory_slot()), 3601 vm_migration::snapshot_from_id(self.snapshot.as_ref(), vfio_user_name.as_str()), 3602 ) 3603 .map_err(DeviceManagerError::VfioUserCreate)?; 3604 3605 let memory = self.memory_manager.lock().unwrap().guest_memory(); 3606 let vfio_user_mapping = Arc::new(VfioUserDmaMapping::new(client, Arc::new(memory))); 3607 for virtio_mem_device in self.virtio_mem_devices.iter() { 3608 virtio_mem_device 3609 .lock() 3610 .unwrap() 3611 .add_dma_mapping_handler( 3612 VirtioMemMappingSource::Device(pci_device_bdf.into()), 3613 vfio_user_mapping.clone(), 3614 ) 3615 .map_err(DeviceManagerError::AddDmaMappingHandlerVirtioMem)?; 3616 } 3617 3618 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() { 3619 for region in zone.regions() { 3620 vfio_user_pci_device 3621 .dma_map(region) 3622 .map_err(DeviceManagerError::VfioUserDmaMap)?; 3623 } 3624 } 3625 3626 let vfio_user_pci_device = Arc::new(Mutex::new(vfio_user_pci_device)); 3627 3628 let new_resources = self.add_pci_device( 3629 vfio_user_pci_device.clone(), 3630 vfio_user_pci_device.clone(), 3631 pci_segment_id, 3632 pci_device_bdf, 3633 resources, 3634 )?; 3635 3636 // Note it is required to call 'add_pci_device()' in advance to have the list of 3637 // mmio regions provisioned correctly 3638 vfio_user_pci_device 3639 .lock() 3640 .unwrap() 3641 .map_mmio_regions() 3642 .map_err(DeviceManagerError::VfioUserMapRegion)?; 3643 3644 let mut node = device_node!(vfio_user_name, vfio_user_pci_device); 3645 3646 // Update the device tree with correct resource information. 3647 node.resources = new_resources; 3648 node.pci_bdf = Some(pci_device_bdf); 3649 node.pci_device_handle = Some(PciDeviceHandle::VfioUser(vfio_user_pci_device)); 3650 3651 self.device_tree 3652 .lock() 3653 .unwrap() 3654 .insert(vfio_user_name.clone(), node); 3655 3656 Ok((pci_device_bdf, vfio_user_name)) 3657 } 3658 3659 fn add_user_devices(&mut self) -> DeviceManagerResult<Vec<PciBdf>> { 3660 let mut user_devices = self.config.lock().unwrap().user_devices.clone(); 3661 3662 if let Some(device_list_cfg) = &mut user_devices { 3663 for device_cfg in device_list_cfg.iter_mut() { 3664 let (_device_id, _id) = self.add_vfio_user_device(device_cfg)?; 3665 } 3666 } 3667 3668 // Update the list of devices 3669 self.config.lock().unwrap().user_devices = user_devices; 3670 3671 Ok(vec![]) 3672 } 3673 3674 fn add_virtio_pci_device( 3675 &mut self, 3676 virtio_device: Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 3677 iommu_mapping: &Option<Arc<IommuMapping>>, 3678 virtio_device_id: String, 3679 pci_segment_id: u16, 3680 dma_handler: Option<Arc<dyn ExternalDmaMapping>>, 3681 ) -> DeviceManagerResult<PciBdf> { 3682 let id = format!("{VIRTIO_PCI_DEVICE_NAME_PREFIX}-{virtio_device_id}"); 3683 3684 // Add the new virtio-pci node to the device tree. 3685 let mut node = device_node!(id); 3686 node.children = vec![virtio_device_id.clone()]; 3687 3688 let (pci_segment_id, pci_device_bdf, resources) = 3689 self.pci_resources(&id, pci_segment_id)?; 3690 3691 // Update the existing virtio node by setting the parent. 3692 if let Some(node) = self.device_tree.lock().unwrap().get_mut(&virtio_device_id) { 3693 node.parent = Some(id.clone()); 3694 } else { 3695 return Err(DeviceManagerError::MissingNode); 3696 } 3697 3698 // Allows support for one MSI-X vector per queue. It also adds 1 3699 // as we need to take into account the dedicated vector to notify 3700 // about a virtio config change. 3701 let msix_num = (virtio_device.lock().unwrap().queue_max_sizes().len() + 1) as u16; 3702 3703 // Create the AccessPlatform trait from the implementation IommuMapping. 3704 // This will provide address translation for any virtio device sitting 3705 // behind a vIOMMU. 3706 let mut access_platform: Option<Arc<dyn AccessPlatform>> = None; 3707 3708 if let Some(mapping) = iommu_mapping { 3709 access_platform = Some(Arc::new(AccessPlatformMapping::new( 3710 pci_device_bdf.into(), 3711 mapping.clone(), 3712 ))); 3713 } 3714 3715 // If SEV-SNP is enabled create the AccessPlatform from SevSnpPageAccessProxy 3716 #[cfg(feature = "sev_snp")] 3717 if self.config.lock().unwrap().is_sev_snp_enabled() { 3718 access_platform = Some(Arc::new(SevSnpPageAccessProxy::new( 3719 self.address_manager.vm.clone(), 3720 ))); 3721 } 3722 3723 let memory = self.memory_manager.lock().unwrap().guest_memory(); 3724 3725 // Map DMA ranges if a DMA handler is available and if the device is 3726 // not attached to a virtual IOMMU. 3727 if let Some(dma_handler) = &dma_handler { 3728 if iommu_mapping.is_some() { 3729 if let Some(iommu) = &self.iommu_device { 3730 iommu 3731 .lock() 3732 .unwrap() 3733 .add_external_mapping(pci_device_bdf.into(), dma_handler.clone()); 3734 } else { 3735 return Err(DeviceManagerError::MissingVirtualIommu); 3736 } 3737 } else { 3738 // Let every virtio-mem device handle the DMA map/unmap through the 3739 // DMA handler provided. 3740 for virtio_mem_device in self.virtio_mem_devices.iter() { 3741 virtio_mem_device 3742 .lock() 3743 .unwrap() 3744 .add_dma_mapping_handler( 3745 VirtioMemMappingSource::Device(pci_device_bdf.into()), 3746 dma_handler.clone(), 3747 ) 3748 .map_err(DeviceManagerError::AddDmaMappingHandlerVirtioMem)?; 3749 } 3750 3751 // Do not register virtio-mem regions, as they are handled directly by 3752 // virtio-mem devices. 3753 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() { 3754 for region in zone.regions() { 3755 let gpa = region.start_addr().0; 3756 let size = region.len(); 3757 dma_handler 3758 .map(gpa, gpa, size) 3759 .map_err(DeviceManagerError::VirtioDmaMap)?; 3760 } 3761 } 3762 } 3763 } 3764 3765 let device_type = virtio_device.lock().unwrap().device_type(); 3766 let virtio_pci_device = Arc::new(Mutex::new( 3767 VirtioPciDevice::new( 3768 id.clone(), 3769 memory, 3770 virtio_device, 3771 msix_num, 3772 access_platform, 3773 &self.msi_interrupt_manager, 3774 pci_device_bdf.into(), 3775 self.activate_evt 3776 .try_clone() 3777 .map_err(DeviceManagerError::EventFd)?, 3778 // All device types *except* virtio block devices should be allocated a 64-bit bar 3779 // The block devices should be given a 32-bit BAR so that they are easily accessible 3780 // to firmware without requiring excessive identity mapping. 3781 // The exception being if not on the default PCI segment. 3782 pci_segment_id > 0 || device_type != VirtioDeviceType::Block as u32, 3783 dma_handler, 3784 self.pending_activations.clone(), 3785 vm_migration::snapshot_from_id(self.snapshot.as_ref(), id.as_str()), 3786 ) 3787 .map_err(DeviceManagerError::VirtioDevice)?, 3788 )); 3789 3790 let new_resources = self.add_pci_device( 3791 virtio_pci_device.clone(), 3792 virtio_pci_device.clone(), 3793 pci_segment_id, 3794 pci_device_bdf, 3795 resources, 3796 )?; 3797 3798 let bar_addr = virtio_pci_device.lock().unwrap().config_bar_addr(); 3799 for (event, addr) in virtio_pci_device.lock().unwrap().ioeventfds(bar_addr) { 3800 let io_addr = IoEventAddress::Mmio(addr); 3801 self.address_manager 3802 .vm 3803 .register_ioevent(event, &io_addr, None) 3804 .map_err(|e| DeviceManagerError::RegisterIoevent(e.into()))?; 3805 } 3806 3807 // Update the device tree with correct resource information. 3808 node.resources = new_resources; 3809 node.migratable = Some(Arc::clone(&virtio_pci_device) as Arc<Mutex<dyn Migratable>>); 3810 node.pci_bdf = Some(pci_device_bdf); 3811 node.pci_device_handle = Some(PciDeviceHandle::Virtio(virtio_pci_device)); 3812 self.device_tree.lock().unwrap().insert(id, node); 3813 3814 Ok(pci_device_bdf) 3815 } 3816 3817 fn add_pvpanic_device( 3818 &mut self, 3819 ) -> DeviceManagerResult<Option<Arc<Mutex<devices::PvPanicDevice>>>> { 3820 let id = String::from(PVPANIC_DEVICE_NAME); 3821 let pci_segment_id = 0x0_u16; 3822 3823 info!("Creating pvpanic device {}", id); 3824 3825 let (pci_segment_id, pci_device_bdf, resources) = 3826 self.pci_resources(&id, pci_segment_id)?; 3827 3828 let snapshot = snapshot_from_id(self.snapshot.as_ref(), id.as_str()); 3829 3830 let pvpanic_device = devices::PvPanicDevice::new(id.clone(), snapshot) 3831 .map_err(DeviceManagerError::PvPanicCreate)?; 3832 3833 let pvpanic_device = Arc::new(Mutex::new(pvpanic_device)); 3834 3835 let new_resources = self.add_pci_device( 3836 pvpanic_device.clone(), 3837 pvpanic_device.clone(), 3838 pci_segment_id, 3839 pci_device_bdf, 3840 resources, 3841 )?; 3842 3843 let mut node = device_node!(id, pvpanic_device); 3844 3845 node.resources = new_resources; 3846 node.pci_bdf = Some(pci_device_bdf); 3847 node.pci_device_handle = None; 3848 3849 self.device_tree.lock().unwrap().insert(id, node); 3850 3851 Ok(Some(pvpanic_device)) 3852 } 3853 3854 fn pci_resources( 3855 &self, 3856 id: &str, 3857 pci_segment_id: u16, 3858 ) -> DeviceManagerResult<(u16, PciBdf, Option<Vec<Resource>>)> { 3859 // Look for the id in the device tree. If it can be found, that means 3860 // the device is being restored, otherwise it's created from scratch. 3861 Ok( 3862 if let Some(node) = self.device_tree.lock().unwrap().get(id) { 3863 info!("Restoring virtio-pci {} resources", id); 3864 let pci_device_bdf: PciBdf = node 3865 .pci_bdf 3866 .ok_or(DeviceManagerError::MissingDeviceNodePciBdf)?; 3867 let pci_segment_id = pci_device_bdf.segment(); 3868 3869 self.pci_segments[pci_segment_id as usize] 3870 .pci_bus 3871 .lock() 3872 .unwrap() 3873 .get_device_id(pci_device_bdf.device() as usize) 3874 .map_err(DeviceManagerError::GetPciDeviceId)?; 3875 3876 (pci_segment_id, pci_device_bdf, Some(node.resources.clone())) 3877 } else { 3878 let pci_device_bdf = 3879 self.pci_segments[pci_segment_id as usize].next_device_bdf()?; 3880 3881 (pci_segment_id, pci_device_bdf, None) 3882 }, 3883 ) 3884 } 3885 3886 #[cfg(target_arch = "x86_64")] 3887 pub fn io_bus(&self) -> &Arc<Bus> { 3888 &self.address_manager.io_bus 3889 } 3890 3891 pub fn mmio_bus(&self) -> &Arc<Bus> { 3892 &self.address_manager.mmio_bus 3893 } 3894 3895 pub fn allocator(&self) -> &Arc<Mutex<SystemAllocator>> { 3896 &self.address_manager.allocator 3897 } 3898 3899 pub fn interrupt_controller(&self) -> Option<Arc<Mutex<dyn InterruptController>>> { 3900 self.interrupt_controller 3901 .as_ref() 3902 .map(|ic| ic.clone() as Arc<Mutex<dyn InterruptController>>) 3903 } 3904 3905 pub(crate) fn pci_segments(&self) -> &Vec<PciSegment> { 3906 &self.pci_segments 3907 } 3908 3909 #[cfg(target_arch = "aarch64")] 3910 pub fn cmdline_additions(&self) -> &[String] { 3911 self.cmdline_additions.as_slice() 3912 } 3913 3914 pub fn update_memory(&self, new_region: &Arc<GuestRegionMmap>) -> DeviceManagerResult<()> { 3915 for handle in self.virtio_devices.iter() { 3916 handle 3917 .virtio_device 3918 .lock() 3919 .unwrap() 3920 .add_memory_region(new_region) 3921 .map_err(DeviceManagerError::UpdateMemoryForVirtioDevice)?; 3922 3923 if let Some(dma_handler) = &handle.dma_handler { 3924 if !handle.iommu { 3925 let gpa = new_region.start_addr().0; 3926 let size = new_region.len(); 3927 dma_handler 3928 .map(gpa, gpa, size) 3929 .map_err(DeviceManagerError::VirtioDmaMap)?; 3930 } 3931 } 3932 } 3933 3934 // Take care of updating the memory for VFIO PCI devices. 3935 if let Some(vfio_container) = &self.vfio_container { 3936 vfio_container 3937 .vfio_dma_map( 3938 new_region.start_addr().raw_value(), 3939 new_region.len(), 3940 new_region.as_ptr() as u64, 3941 ) 3942 .map_err(DeviceManagerError::UpdateMemoryForVfioPciDevice)?; 3943 } 3944 3945 // Take care of updating the memory for vfio-user devices. 3946 { 3947 let device_tree = self.device_tree.lock().unwrap(); 3948 for pci_device_node in device_tree.pci_devices() { 3949 if let PciDeviceHandle::VfioUser(vfio_user_pci_device) = pci_device_node 3950 .pci_device_handle 3951 .as_ref() 3952 .ok_or(DeviceManagerError::MissingPciDevice)? 3953 { 3954 vfio_user_pci_device 3955 .lock() 3956 .unwrap() 3957 .dma_map(new_region) 3958 .map_err(DeviceManagerError::UpdateMemoryForVfioUserPciDevice)?; 3959 } 3960 } 3961 } 3962 3963 Ok(()) 3964 } 3965 3966 pub fn activate_virtio_devices(&self) -> DeviceManagerResult<()> { 3967 for mut activator in self.pending_activations.lock().unwrap().drain(..) { 3968 activator 3969 .activate() 3970 .map_err(DeviceManagerError::VirtioActivate)?; 3971 } 3972 Ok(()) 3973 } 3974 3975 pub fn notify_hotplug( 3976 &self, 3977 _notification_type: AcpiNotificationFlags, 3978 ) -> DeviceManagerResult<()> { 3979 return self 3980 .ged_notification_device 3981 .as_ref() 3982 .unwrap() 3983 .lock() 3984 .unwrap() 3985 .notify(_notification_type) 3986 .map_err(DeviceManagerError::HotPlugNotification); 3987 } 3988 3989 pub fn add_device( 3990 &mut self, 3991 device_cfg: &mut DeviceConfig, 3992 ) -> DeviceManagerResult<PciDeviceInfo> { 3993 self.validate_identifier(&device_cfg.id)?; 3994 3995 if device_cfg.iommu && !self.is_iommu_segment(device_cfg.pci_segment) { 3996 return Err(DeviceManagerError::InvalidIommuHotplug); 3997 } 3998 3999 let (bdf, device_name) = self.add_passthrough_device(device_cfg)?; 4000 4001 // Update the PCIU bitmap 4002 self.pci_segments[device_cfg.pci_segment as usize].pci_devices_up |= 1 << bdf.device(); 4003 4004 Ok(PciDeviceInfo { 4005 id: device_name, 4006 bdf, 4007 }) 4008 } 4009 4010 pub fn add_user_device( 4011 &mut self, 4012 device_cfg: &mut UserDeviceConfig, 4013 ) -> DeviceManagerResult<PciDeviceInfo> { 4014 self.validate_identifier(&device_cfg.id)?; 4015 4016 let (bdf, device_name) = self.add_vfio_user_device(device_cfg)?; 4017 4018 // Update the PCIU bitmap 4019 self.pci_segments[device_cfg.pci_segment as usize].pci_devices_up |= 1 << bdf.device(); 4020 4021 Ok(PciDeviceInfo { 4022 id: device_name, 4023 bdf, 4024 }) 4025 } 4026 4027 pub fn remove_device(&mut self, id: String) -> DeviceManagerResult<()> { 4028 // The node can be directly a PCI node in case the 'id' refers to a 4029 // VFIO device or a virtio-pci one. 4030 // In case the 'id' refers to a virtio device, we must find the PCI 4031 // node by looking at the parent. 4032 let device_tree = self.device_tree.lock().unwrap(); 4033 let node = device_tree 4034 .get(&id) 4035 .ok_or(DeviceManagerError::UnknownDeviceId(id))?; 4036 4037 let pci_device_node = if node.pci_bdf.is_some() && node.pci_device_handle.is_some() { 4038 node 4039 } else { 4040 let parent = node 4041 .parent 4042 .as_ref() 4043 .ok_or(DeviceManagerError::MissingNode)?; 4044 device_tree 4045 .get(parent) 4046 .ok_or(DeviceManagerError::MissingNode)? 4047 }; 4048 4049 let pci_device_bdf: PciBdf = pci_device_node 4050 .pci_bdf 4051 .ok_or(DeviceManagerError::MissingDeviceNodePciBdf)?; 4052 let pci_segment_id = pci_device_bdf.segment(); 4053 4054 let pci_device_handle = pci_device_node 4055 .pci_device_handle 4056 .as_ref() 4057 .ok_or(DeviceManagerError::MissingPciDevice)?; 4058 #[allow(irrefutable_let_patterns)] 4059 if let PciDeviceHandle::Virtio(virtio_pci_device) = pci_device_handle { 4060 let device_type = VirtioDeviceType::from( 4061 virtio_pci_device 4062 .lock() 4063 .unwrap() 4064 .virtio_device() 4065 .lock() 4066 .unwrap() 4067 .device_type(), 4068 ); 4069 match device_type { 4070 VirtioDeviceType::Net 4071 | VirtioDeviceType::Block 4072 | VirtioDeviceType::Pmem 4073 | VirtioDeviceType::Fs 4074 | VirtioDeviceType::Vsock => {} 4075 _ => return Err(DeviceManagerError::RemovalNotAllowed(device_type)), 4076 } 4077 } 4078 4079 // Update the PCID bitmap 4080 self.pci_segments[pci_segment_id as usize].pci_devices_down |= 1 << pci_device_bdf.device(); 4081 4082 Ok(()) 4083 } 4084 4085 pub fn eject_device(&mut self, pci_segment_id: u16, device_id: u8) -> DeviceManagerResult<()> { 4086 info!( 4087 "Ejecting device_id = {} on segment_id={}", 4088 device_id, pci_segment_id 4089 ); 4090 4091 // Convert the device ID into the corresponding b/d/f. 4092 let pci_device_bdf = PciBdf::new(pci_segment_id, 0, device_id, 0); 4093 4094 // Give the PCI device ID back to the PCI bus. 4095 self.pci_segments[pci_segment_id as usize] 4096 .pci_bus 4097 .lock() 4098 .unwrap() 4099 .put_device_id(device_id as usize) 4100 .map_err(DeviceManagerError::PutPciDeviceId)?; 4101 4102 // Remove the device from the device tree along with its children. 4103 let mut device_tree = self.device_tree.lock().unwrap(); 4104 let pci_device_node = device_tree 4105 .remove_node_by_pci_bdf(pci_device_bdf) 4106 .ok_or(DeviceManagerError::MissingPciDevice)?; 4107 4108 // For VFIO and vfio-user the PCI device id is the id. 4109 // For virtio we overwrite it later as we want the id of the 4110 // underlying device. 4111 let mut id = pci_device_node.id; 4112 let pci_device_handle = pci_device_node 4113 .pci_device_handle 4114 .ok_or(DeviceManagerError::MissingPciDevice)?; 4115 if matches!(pci_device_handle, PciDeviceHandle::Virtio(_)) { 4116 // The virtio-pci device has a single child 4117 if !pci_device_node.children.is_empty() { 4118 assert_eq!(pci_device_node.children.len(), 1); 4119 let child_id = &pci_device_node.children[0]; 4120 id.clone_from(child_id); 4121 } 4122 } 4123 for child in pci_device_node.children.iter() { 4124 device_tree.remove(child); 4125 } 4126 4127 let mut iommu_attached = false; 4128 if let Some((_, iommu_attached_devices)) = &self.iommu_attached_devices { 4129 if iommu_attached_devices.contains(&pci_device_bdf) { 4130 iommu_attached = true; 4131 } 4132 } 4133 4134 let (pci_device, bus_device, virtio_device, remove_dma_handler) = match pci_device_handle { 4135 // No need to remove any virtio-mem mapping here as the container outlives all devices 4136 PciDeviceHandle::Vfio(vfio_pci_device) => { 4137 for mmio_region in vfio_pci_device.lock().unwrap().mmio_regions() { 4138 self.mmio_regions 4139 .lock() 4140 .unwrap() 4141 .retain(|x| x.start != mmio_region.start) 4142 } 4143 4144 ( 4145 Arc::clone(&vfio_pci_device) as Arc<Mutex<dyn PciDevice>>, 4146 Arc::clone(&vfio_pci_device) as Arc<dyn BusDeviceSync>, 4147 None as Option<Arc<Mutex<dyn virtio_devices::VirtioDevice>>>, 4148 false, 4149 ) 4150 } 4151 PciDeviceHandle::Virtio(virtio_pci_device) => { 4152 let dev = virtio_pci_device.lock().unwrap(); 4153 let bar_addr = dev.config_bar_addr(); 4154 for (event, addr) in dev.ioeventfds(bar_addr) { 4155 let io_addr = IoEventAddress::Mmio(addr); 4156 self.address_manager 4157 .vm 4158 .unregister_ioevent(event, &io_addr) 4159 .map_err(|e| DeviceManagerError::UnRegisterIoevent(e.into()))?; 4160 } 4161 4162 if let Some(dma_handler) = dev.dma_handler() { 4163 if !iommu_attached { 4164 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() { 4165 for region in zone.regions() { 4166 let iova = region.start_addr().0; 4167 let size = region.len(); 4168 dma_handler 4169 .unmap(iova, size) 4170 .map_err(DeviceManagerError::VirtioDmaUnmap)?; 4171 } 4172 } 4173 } 4174 } 4175 4176 ( 4177 Arc::clone(&virtio_pci_device) as Arc<Mutex<dyn PciDevice>>, 4178 Arc::clone(&virtio_pci_device) as Arc<dyn BusDeviceSync>, 4179 Some(dev.virtio_device()), 4180 dev.dma_handler().is_some() && !iommu_attached, 4181 ) 4182 } 4183 PciDeviceHandle::VfioUser(vfio_user_pci_device) => { 4184 let mut dev = vfio_user_pci_device.lock().unwrap(); 4185 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() { 4186 for region in zone.regions() { 4187 dev.dma_unmap(region) 4188 .map_err(DeviceManagerError::VfioUserDmaUnmap)?; 4189 } 4190 } 4191 4192 ( 4193 Arc::clone(&vfio_user_pci_device) as Arc<Mutex<dyn PciDevice>>, 4194 Arc::clone(&vfio_user_pci_device) as Arc<dyn BusDeviceSync>, 4195 None as Option<Arc<Mutex<dyn virtio_devices::VirtioDevice>>>, 4196 true, 4197 ) 4198 } 4199 }; 4200 4201 if remove_dma_handler { 4202 for virtio_mem_device in self.virtio_mem_devices.iter() { 4203 virtio_mem_device 4204 .lock() 4205 .unwrap() 4206 .remove_dma_mapping_handler(VirtioMemMappingSource::Device( 4207 pci_device_bdf.into(), 4208 )) 4209 .map_err(DeviceManagerError::RemoveDmaMappingHandlerVirtioMem)?; 4210 } 4211 } 4212 4213 // Free the allocated BARs 4214 pci_device 4215 .lock() 4216 .unwrap() 4217 .free_bars( 4218 &mut self.address_manager.allocator.lock().unwrap(), 4219 &mut self.pci_segments[pci_segment_id as usize] 4220 .mem32_allocator 4221 .lock() 4222 .unwrap(), 4223 &mut self.pci_segments[pci_segment_id as usize] 4224 .mem64_allocator 4225 .lock() 4226 .unwrap(), 4227 ) 4228 .map_err(DeviceManagerError::FreePciBars)?; 4229 4230 // Remove the device from the PCI bus 4231 self.pci_segments[pci_segment_id as usize] 4232 .pci_bus 4233 .lock() 4234 .unwrap() 4235 .remove_by_device(&pci_device) 4236 .map_err(DeviceManagerError::RemoveDeviceFromPciBus)?; 4237 4238 #[cfg(target_arch = "x86_64")] 4239 // Remove the device from the IO bus 4240 self.io_bus() 4241 .remove_by_device(&bus_device) 4242 .map_err(DeviceManagerError::RemoveDeviceFromIoBus)?; 4243 4244 // Remove the device from the MMIO bus 4245 self.mmio_bus() 4246 .remove_by_device(&bus_device) 4247 .map_err(DeviceManagerError::RemoveDeviceFromMmioBus)?; 4248 4249 // Remove the device from the list of BusDevice held by the 4250 // DeviceManager. 4251 self.bus_devices 4252 .retain(|dev| !Arc::ptr_eq(dev, &bus_device)); 4253 4254 // Shutdown and remove the underlying virtio-device if present 4255 if let Some(virtio_device) = virtio_device { 4256 for mapping in virtio_device.lock().unwrap().userspace_mappings() { 4257 self.memory_manager 4258 .lock() 4259 .unwrap() 4260 .remove_userspace_mapping( 4261 mapping.addr.raw_value(), 4262 mapping.len, 4263 mapping.host_addr, 4264 mapping.mergeable, 4265 mapping.mem_slot, 4266 ) 4267 .map_err(DeviceManagerError::MemoryManager)?; 4268 } 4269 4270 virtio_device.lock().unwrap().shutdown(); 4271 4272 self.virtio_devices 4273 .retain(|handler| !Arc::ptr_eq(&handler.virtio_device, &virtio_device)); 4274 } 4275 4276 event!( 4277 "vm", 4278 "device-removed", 4279 "id", 4280 &id, 4281 "bdf", 4282 pci_device_bdf.to_string() 4283 ); 4284 4285 // At this point, the device has been removed from all the list and 4286 // buses where it was stored. At the end of this function, after 4287 // any_device, bus_device and pci_device are released, the actual 4288 // device will be dropped. 4289 Ok(()) 4290 } 4291 4292 fn hotplug_virtio_pci_device( 4293 &mut self, 4294 handle: MetaVirtioDevice, 4295 ) -> DeviceManagerResult<PciDeviceInfo> { 4296 // Add the virtio device to the device manager list. This is important 4297 // as the list is used to notify virtio devices about memory updates 4298 // for instance. 4299 self.virtio_devices.push(handle.clone()); 4300 4301 let mapping: Option<Arc<IommuMapping>> = if handle.iommu { 4302 self.iommu_mapping.clone() 4303 } else { 4304 None 4305 }; 4306 4307 let bdf = self.add_virtio_pci_device( 4308 handle.virtio_device, 4309 &mapping, 4310 handle.id.clone(), 4311 handle.pci_segment, 4312 handle.dma_handler, 4313 )?; 4314 4315 // Update the PCIU bitmap 4316 self.pci_segments[handle.pci_segment as usize].pci_devices_up |= 1 << bdf.device(); 4317 4318 Ok(PciDeviceInfo { id: handle.id, bdf }) 4319 } 4320 4321 fn is_iommu_segment(&self, pci_segment_id: u16) -> bool { 4322 self.config 4323 .lock() 4324 .as_ref() 4325 .unwrap() 4326 .platform 4327 .as_ref() 4328 .map(|pc| { 4329 pc.iommu_segments 4330 .as_ref() 4331 .map(|v| v.contains(&pci_segment_id)) 4332 .unwrap_or_default() 4333 }) 4334 .unwrap_or_default() 4335 } 4336 4337 pub fn add_disk(&mut self, disk_cfg: &mut DiskConfig) -> DeviceManagerResult<PciDeviceInfo> { 4338 self.validate_identifier(&disk_cfg.id)?; 4339 4340 if disk_cfg.iommu && !self.is_iommu_segment(disk_cfg.pci_segment) { 4341 return Err(DeviceManagerError::InvalidIommuHotplug); 4342 } 4343 4344 let device = self.make_virtio_block_device(disk_cfg)?; 4345 self.hotplug_virtio_pci_device(device) 4346 } 4347 4348 pub fn add_fs(&mut self, fs_cfg: &mut FsConfig) -> DeviceManagerResult<PciDeviceInfo> { 4349 self.validate_identifier(&fs_cfg.id)?; 4350 4351 let device = self.make_virtio_fs_device(fs_cfg)?; 4352 self.hotplug_virtio_pci_device(device) 4353 } 4354 4355 pub fn add_pmem(&mut self, pmem_cfg: &mut PmemConfig) -> DeviceManagerResult<PciDeviceInfo> { 4356 self.validate_identifier(&pmem_cfg.id)?; 4357 4358 if pmem_cfg.iommu && !self.is_iommu_segment(pmem_cfg.pci_segment) { 4359 return Err(DeviceManagerError::InvalidIommuHotplug); 4360 } 4361 4362 let device = self.make_virtio_pmem_device(pmem_cfg)?; 4363 self.hotplug_virtio_pci_device(device) 4364 } 4365 4366 pub fn add_net(&mut self, net_cfg: &mut NetConfig) -> DeviceManagerResult<PciDeviceInfo> { 4367 self.validate_identifier(&net_cfg.id)?; 4368 4369 if net_cfg.iommu && !self.is_iommu_segment(net_cfg.pci_segment) { 4370 return Err(DeviceManagerError::InvalidIommuHotplug); 4371 } 4372 4373 let device = self.make_virtio_net_device(net_cfg)?; 4374 self.hotplug_virtio_pci_device(device) 4375 } 4376 4377 pub fn add_vdpa(&mut self, vdpa_cfg: &mut VdpaConfig) -> DeviceManagerResult<PciDeviceInfo> { 4378 self.validate_identifier(&vdpa_cfg.id)?; 4379 4380 if vdpa_cfg.iommu && !self.is_iommu_segment(vdpa_cfg.pci_segment) { 4381 return Err(DeviceManagerError::InvalidIommuHotplug); 4382 } 4383 4384 let device = self.make_vdpa_device(vdpa_cfg)?; 4385 self.hotplug_virtio_pci_device(device) 4386 } 4387 4388 pub fn add_vsock(&mut self, vsock_cfg: &mut VsockConfig) -> DeviceManagerResult<PciDeviceInfo> { 4389 self.validate_identifier(&vsock_cfg.id)?; 4390 4391 if vsock_cfg.iommu && !self.is_iommu_segment(vsock_cfg.pci_segment) { 4392 return Err(DeviceManagerError::InvalidIommuHotplug); 4393 } 4394 4395 let device = self.make_virtio_vsock_device(vsock_cfg)?; 4396 self.hotplug_virtio_pci_device(device) 4397 } 4398 4399 pub fn counters(&self) -> HashMap<String, HashMap<&'static str, Wrapping<u64>>> { 4400 let mut counters = HashMap::new(); 4401 4402 for handle in &self.virtio_devices { 4403 let virtio_device = handle.virtio_device.lock().unwrap(); 4404 if let Some(device_counters) = virtio_device.counters() { 4405 counters.insert(handle.id.clone(), device_counters.clone()); 4406 } 4407 } 4408 4409 counters 4410 } 4411 4412 pub fn resize_balloon(&mut self, size: u64) -> DeviceManagerResult<()> { 4413 if let Some(balloon) = &self.balloon { 4414 return balloon 4415 .lock() 4416 .unwrap() 4417 .resize(size) 4418 .map_err(DeviceManagerError::VirtioBalloonResize); 4419 } 4420 4421 warn!("No balloon setup: Can't resize the balloon"); 4422 Err(DeviceManagerError::MissingVirtioBalloon) 4423 } 4424 4425 pub fn balloon_size(&self) -> u64 { 4426 if let Some(balloon) = &self.balloon { 4427 return balloon.lock().unwrap().get_actual(); 4428 } 4429 4430 0 4431 } 4432 4433 pub fn device_tree(&self) -> Arc<Mutex<DeviceTree>> { 4434 self.device_tree.clone() 4435 } 4436 4437 #[cfg(target_arch = "x86_64")] 4438 pub fn notify_power_button(&self) -> DeviceManagerResult<()> { 4439 self.ged_notification_device 4440 .as_ref() 4441 .unwrap() 4442 .lock() 4443 .unwrap() 4444 .notify(AcpiNotificationFlags::POWER_BUTTON_CHANGED) 4445 .map_err(DeviceManagerError::PowerButtonNotification) 4446 } 4447 4448 #[cfg(target_arch = "aarch64")] 4449 pub fn notify_power_button(&self) -> DeviceManagerResult<()> { 4450 // There are two use cases: 4451 // 1. Users will use direct kernel boot with device tree. 4452 // 2. Users will use ACPI+UEFI boot. 4453 4454 // Trigger a GPIO pin 3 event to satisfy use case 1. 4455 self.gpio_device 4456 .as_ref() 4457 .unwrap() 4458 .lock() 4459 .unwrap() 4460 .trigger_key(3) 4461 .map_err(DeviceManagerError::AArch64PowerButtonNotification)?; 4462 // Trigger a GED power button event to satisfy use case 2. 4463 return self 4464 .ged_notification_device 4465 .as_ref() 4466 .unwrap() 4467 .lock() 4468 .unwrap() 4469 .notify(AcpiNotificationFlags::POWER_BUTTON_CHANGED) 4470 .map_err(DeviceManagerError::PowerButtonNotification); 4471 } 4472 4473 pub fn iommu_attached_devices(&self) -> &Option<(PciBdf, Vec<PciBdf>)> { 4474 &self.iommu_attached_devices 4475 } 4476 4477 fn validate_identifier(&self, id: &Option<String>) -> DeviceManagerResult<()> { 4478 if let Some(id) = id { 4479 if id.starts_with("__") { 4480 return Err(DeviceManagerError::InvalidIdentifier(id.clone())); 4481 } 4482 4483 if self.device_tree.lock().unwrap().contains_key(id) { 4484 return Err(DeviceManagerError::IdentifierNotUnique(id.clone())); 4485 } 4486 } 4487 4488 Ok(()) 4489 } 4490 4491 pub(crate) fn acpi_platform_addresses(&self) -> &AcpiPlatformAddresses { 4492 &self.acpi_platform_addresses 4493 } 4494 } 4495 4496 fn numa_node_id_from_memory_zone_id(numa_nodes: &NumaNodes, memory_zone_id: &str) -> Option<u32> { 4497 for (numa_node_id, numa_node) in numa_nodes.iter() { 4498 if numa_node.memory_zones.contains(&memory_zone_id.to_owned()) { 4499 return Some(*numa_node_id); 4500 } 4501 } 4502 4503 None 4504 } 4505 4506 fn numa_node_id_from_pci_segment_id(numa_nodes: &NumaNodes, pci_segment_id: u16) -> u32 { 4507 for (numa_node_id, numa_node) in numa_nodes.iter() { 4508 if numa_node.pci_segments.contains(&pci_segment_id) { 4509 return *numa_node_id; 4510 } 4511 } 4512 4513 0 4514 } 4515 4516 struct TpmDevice {} 4517 4518 impl Aml for TpmDevice { 4519 fn to_aml_bytes(&self, sink: &mut dyn acpi_tables::AmlSink) { 4520 aml::Device::new( 4521 "TPM2".into(), 4522 vec![ 4523 &aml::Name::new("_HID".into(), &"MSFT0101"), 4524 &aml::Name::new("_STA".into(), &(0xF_usize)), 4525 &aml::Name::new( 4526 "_CRS".into(), 4527 &aml::ResourceTemplate::new(vec![&aml::Memory32Fixed::new( 4528 true, 4529 layout::TPM_START.0 as u32, 4530 layout::TPM_SIZE as u32, 4531 )]), 4532 ), 4533 ], 4534 ) 4535 .to_aml_bytes(sink) 4536 } 4537 } 4538 4539 impl Aml for DeviceManager { 4540 fn to_aml_bytes(&self, sink: &mut dyn acpi_tables::AmlSink) { 4541 #[cfg(target_arch = "aarch64")] 4542 use arch::aarch64::DeviceInfoForFdt; 4543 4544 let mut pci_scan_methods = Vec::new(); 4545 for i in 0..self.pci_segments.len() { 4546 pci_scan_methods.push(aml::MethodCall::new( 4547 format!("\\_SB_.PC{i:02X}.PCNT").as_str().into(), 4548 vec![], 4549 )); 4550 } 4551 let mut pci_scan_inner: Vec<&dyn Aml> = Vec::new(); 4552 for method in &pci_scan_methods { 4553 pci_scan_inner.push(method) 4554 } 4555 4556 // PCI hotplug controller 4557 aml::Device::new( 4558 "_SB_.PHPR".into(), 4559 vec![ 4560 &aml::Name::new("_HID".into(), &aml::EISAName::new("PNP0A06")), 4561 &aml::Name::new("_STA".into(), &0x0bu8), 4562 &aml::Name::new("_UID".into(), &"PCI Hotplug Controller"), 4563 &aml::Mutex::new("BLCK".into(), 0), 4564 &aml::Name::new( 4565 "_CRS".into(), 4566 &aml::ResourceTemplate::new(vec![&aml::AddressSpace::new_memory( 4567 aml::AddressSpaceCacheable::NotCacheable, 4568 true, 4569 self.acpi_address.0, 4570 self.acpi_address.0 + DEVICE_MANAGER_ACPI_SIZE as u64 - 1, 4571 None, 4572 )]), 4573 ), 4574 // OpRegion and Fields map MMIO range into individual field values 4575 &aml::OpRegion::new( 4576 "PCST".into(), 4577 aml::OpRegionSpace::SystemMemory, 4578 &(self.acpi_address.0 as usize), 4579 &DEVICE_MANAGER_ACPI_SIZE, 4580 ), 4581 &aml::Field::new( 4582 "PCST".into(), 4583 aml::FieldAccessType::DWord, 4584 aml::FieldLockRule::NoLock, 4585 aml::FieldUpdateRule::WriteAsZeroes, 4586 vec![ 4587 aml::FieldEntry::Named(*b"PCIU", 32), 4588 aml::FieldEntry::Named(*b"PCID", 32), 4589 aml::FieldEntry::Named(*b"B0EJ", 32), 4590 aml::FieldEntry::Named(*b"PSEG", 32), 4591 ], 4592 ), 4593 &aml::Method::new( 4594 "PCEJ".into(), 4595 2, 4596 true, 4597 vec![ 4598 // Take lock defined above 4599 &aml::Acquire::new("BLCK".into(), 0xffff), 4600 // Choose the current segment 4601 &aml::Store::new(&aml::Path::new("PSEG"), &aml::Arg(1)), 4602 // Write PCI bus number (in first argument) to I/O port via field 4603 &aml::ShiftLeft::new(&aml::Path::new("B0EJ"), &aml::ONE, &aml::Arg(0)), 4604 // Release lock 4605 &aml::Release::new("BLCK".into()), 4606 // Return 0 4607 &aml::Return::new(&aml::ZERO), 4608 ], 4609 ), 4610 &aml::Method::new("PSCN".into(), 0, true, pci_scan_inner), 4611 ], 4612 ) 4613 .to_aml_bytes(sink); 4614 4615 for segment in &self.pci_segments { 4616 segment.to_aml_bytes(sink); 4617 } 4618 4619 let mut mbrd_memory = Vec::new(); 4620 4621 for segment in &self.pci_segments { 4622 mbrd_memory.push(aml::Memory32Fixed::new( 4623 true, 4624 segment.mmio_config_address as u32, 4625 layout::PCI_MMIO_CONFIG_SIZE_PER_SEGMENT as u32, 4626 )) 4627 } 4628 4629 let mut mbrd_memory_refs = Vec::new(); 4630 for mbrd_memory_ref in &mbrd_memory { 4631 mbrd_memory_refs.push(mbrd_memory_ref as &dyn Aml); 4632 } 4633 4634 aml::Device::new( 4635 "_SB_.MBRD".into(), 4636 vec![ 4637 &aml::Name::new("_HID".into(), &aml::EISAName::new("PNP0C02")), 4638 &aml::Name::new("_UID".into(), &aml::ZERO), 4639 &aml::Name::new("_CRS".into(), &aml::ResourceTemplate::new(mbrd_memory_refs)), 4640 ], 4641 ) 4642 .to_aml_bytes(sink); 4643 4644 // Serial device 4645 #[cfg(target_arch = "x86_64")] 4646 let serial_irq = 4; 4647 #[cfg(target_arch = "aarch64")] 4648 let serial_irq = 4649 if self.config.lock().unwrap().serial.clone().mode != ConsoleOutputMode::Off { 4650 self.get_device_info() 4651 .clone() 4652 .get(&(DeviceType::Serial, DeviceType::Serial.to_string())) 4653 .unwrap() 4654 .irq() 4655 } else { 4656 // If serial is turned off, add a fake device with invalid irq. 4657 31 4658 }; 4659 if self.config.lock().unwrap().serial.mode != ConsoleOutputMode::Off { 4660 aml::Device::new( 4661 "_SB_.COM1".into(), 4662 vec![ 4663 &aml::Name::new( 4664 "_HID".into(), 4665 #[cfg(target_arch = "x86_64")] 4666 &aml::EISAName::new("PNP0501"), 4667 #[cfg(target_arch = "aarch64")] 4668 &"ARMH0011", 4669 ), 4670 &aml::Name::new("_UID".into(), &aml::ZERO), 4671 &aml::Name::new("_DDN".into(), &"COM1"), 4672 &aml::Name::new( 4673 "_CRS".into(), 4674 &aml::ResourceTemplate::new(vec![ 4675 &aml::Interrupt::new(true, true, false, false, serial_irq), 4676 #[cfg(target_arch = "x86_64")] 4677 &aml::IO::new(0x3f8, 0x3f8, 0, 0x8), 4678 #[cfg(target_arch = "aarch64")] 4679 &aml::Memory32Fixed::new( 4680 true, 4681 arch::layout::LEGACY_SERIAL_MAPPED_IO_START.raw_value() as u32, 4682 MMIO_LEN as u32, 4683 ), 4684 ]), 4685 ), 4686 ], 4687 ) 4688 .to_aml_bytes(sink); 4689 } 4690 4691 aml::Name::new("_S5_".into(), &aml::Package::new(vec![&5u8])).to_aml_bytes(sink); 4692 4693 aml::Device::new( 4694 "_SB_.PWRB".into(), 4695 vec![ 4696 &aml::Name::new("_HID".into(), &aml::EISAName::new("PNP0C0C")), 4697 &aml::Name::new("_UID".into(), &aml::ZERO), 4698 ], 4699 ) 4700 .to_aml_bytes(sink); 4701 4702 if self.config.lock().unwrap().tpm.is_some() { 4703 // Add tpm device 4704 TpmDevice {}.to_aml_bytes(sink); 4705 } 4706 4707 self.ged_notification_device 4708 .as_ref() 4709 .unwrap() 4710 .lock() 4711 .unwrap() 4712 .to_aml_bytes(sink) 4713 } 4714 } 4715 4716 impl Pausable for DeviceManager { 4717 fn pause(&mut self) -> result::Result<(), MigratableError> { 4718 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4719 if let Some(migratable) = &device_node.migratable { 4720 migratable.lock().unwrap().pause()?; 4721 } 4722 } 4723 // On AArch64, the pause of device manager needs to trigger 4724 // a "pause" of GIC, which will flush the GIC pending tables 4725 // and ITS tables to guest RAM. 4726 #[cfg(target_arch = "aarch64")] 4727 { 4728 self.get_interrupt_controller() 4729 .unwrap() 4730 .lock() 4731 .unwrap() 4732 .pause()?; 4733 }; 4734 4735 Ok(()) 4736 } 4737 4738 fn resume(&mut self) -> result::Result<(), MigratableError> { 4739 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4740 if let Some(migratable) = &device_node.migratable { 4741 migratable.lock().unwrap().resume()?; 4742 } 4743 } 4744 4745 Ok(()) 4746 } 4747 } 4748 4749 impl Snapshottable for DeviceManager { 4750 fn id(&self) -> String { 4751 DEVICE_MANAGER_SNAPSHOT_ID.to_string() 4752 } 4753 4754 fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> { 4755 let mut snapshot = Snapshot::from_data(SnapshotData::new_from_state(&self.state())?); 4756 4757 // We aggregate all devices snapshots. 4758 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4759 if let Some(migratable) = &device_node.migratable { 4760 let mut migratable = migratable.lock().unwrap(); 4761 snapshot.add_snapshot(migratable.id(), migratable.snapshot()?); 4762 } 4763 } 4764 4765 Ok(snapshot) 4766 } 4767 } 4768 4769 impl Transportable for DeviceManager {} 4770 4771 impl Migratable for DeviceManager { 4772 fn start_dirty_log(&mut self) -> std::result::Result<(), MigratableError> { 4773 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4774 if let Some(migratable) = &device_node.migratable { 4775 migratable.lock().unwrap().start_dirty_log()?; 4776 } 4777 } 4778 Ok(()) 4779 } 4780 4781 fn stop_dirty_log(&mut self) -> std::result::Result<(), MigratableError> { 4782 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4783 if let Some(migratable) = &device_node.migratable { 4784 migratable.lock().unwrap().stop_dirty_log()?; 4785 } 4786 } 4787 Ok(()) 4788 } 4789 4790 fn dirty_log(&mut self) -> std::result::Result<MemoryRangeTable, MigratableError> { 4791 let mut tables = Vec::new(); 4792 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4793 if let Some(migratable) = &device_node.migratable { 4794 tables.push(migratable.lock().unwrap().dirty_log()?); 4795 } 4796 } 4797 Ok(MemoryRangeTable::new_from_tables(tables)) 4798 } 4799 4800 fn start_migration(&mut self) -> std::result::Result<(), MigratableError> { 4801 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4802 if let Some(migratable) = &device_node.migratable { 4803 migratable.lock().unwrap().start_migration()?; 4804 } 4805 } 4806 Ok(()) 4807 } 4808 4809 fn complete_migration(&mut self) -> std::result::Result<(), MigratableError> { 4810 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4811 if let Some(migratable) = &device_node.migratable { 4812 migratable.lock().unwrap().complete_migration()?; 4813 } 4814 } 4815 Ok(()) 4816 } 4817 } 4818 4819 const PCIU_FIELD_OFFSET: u64 = 0; 4820 const PCID_FIELD_OFFSET: u64 = 4; 4821 const B0EJ_FIELD_OFFSET: u64 = 8; 4822 const PSEG_FIELD_OFFSET: u64 = 12; 4823 const PCIU_FIELD_SIZE: usize = 4; 4824 const PCID_FIELD_SIZE: usize = 4; 4825 const B0EJ_FIELD_SIZE: usize = 4; 4826 const PSEG_FIELD_SIZE: usize = 4; 4827 4828 impl BusDevice for DeviceManager { 4829 fn read(&mut self, base: u64, offset: u64, data: &mut [u8]) { 4830 match offset { 4831 PCIU_FIELD_OFFSET => { 4832 assert!(data.len() == PCIU_FIELD_SIZE); 4833 data.copy_from_slice( 4834 &self.pci_segments[self.selected_segment] 4835 .pci_devices_up 4836 .to_le_bytes(), 4837 ); 4838 // Clear the PCIU bitmap 4839 self.pci_segments[self.selected_segment].pci_devices_up = 0; 4840 } 4841 PCID_FIELD_OFFSET => { 4842 assert!(data.len() == PCID_FIELD_SIZE); 4843 data.copy_from_slice( 4844 &self.pci_segments[self.selected_segment] 4845 .pci_devices_down 4846 .to_le_bytes(), 4847 ); 4848 // Clear the PCID bitmap 4849 self.pci_segments[self.selected_segment].pci_devices_down = 0; 4850 } 4851 B0EJ_FIELD_OFFSET => { 4852 assert!(data.len() == B0EJ_FIELD_SIZE); 4853 // Always return an empty bitmap since the eject is always 4854 // taken care of right away during a write access. 4855 data.fill(0); 4856 } 4857 PSEG_FIELD_OFFSET => { 4858 assert_eq!(data.len(), PSEG_FIELD_SIZE); 4859 data.copy_from_slice(&(self.selected_segment as u32).to_le_bytes()); 4860 } 4861 _ => error!( 4862 "Accessing unknown location at base 0x{:x}, offset 0x{:x}", 4863 base, offset 4864 ), 4865 } 4866 4867 debug!( 4868 "PCI_HP_REG_R: base 0x{:x}, offset 0x{:x}, data {:?}", 4869 base, offset, data 4870 ) 4871 } 4872 4873 fn write(&mut self, base: u64, offset: u64, data: &[u8]) -> Option<Arc<std::sync::Barrier>> { 4874 match offset { 4875 B0EJ_FIELD_OFFSET => { 4876 assert!(data.len() == B0EJ_FIELD_SIZE); 4877 let mut data_array: [u8; 4] = [0, 0, 0, 0]; 4878 data_array.copy_from_slice(data); 4879 let mut slot_bitmap = u32::from_le_bytes(data_array); 4880 4881 while slot_bitmap > 0 { 4882 let slot_id = slot_bitmap.trailing_zeros(); 4883 if let Err(e) = self.eject_device(self.selected_segment as u16, slot_id as u8) { 4884 error!("Failed ejecting device {}: {:?}", slot_id, e); 4885 } 4886 slot_bitmap &= !(1 << slot_id); 4887 } 4888 } 4889 PSEG_FIELD_OFFSET => { 4890 assert_eq!(data.len(), PSEG_FIELD_SIZE); 4891 let mut data_array: [u8; 4] = [0, 0, 0, 0]; 4892 data_array.copy_from_slice(data); 4893 let selected_segment = u32::from_le_bytes(data_array) as usize; 4894 if selected_segment >= self.pci_segments.len() { 4895 error!( 4896 "Segment selection out of range: {} >= {}", 4897 selected_segment, 4898 self.pci_segments.len() 4899 ); 4900 return None; 4901 } 4902 self.selected_segment = selected_segment; 4903 } 4904 _ => error!( 4905 "Accessing unknown location at base 0x{:x}, offset 0x{:x}", 4906 base, offset 4907 ), 4908 } 4909 4910 debug!( 4911 "PCI_HP_REG_W: base 0x{:x}, offset 0x{:x}, data {:?}", 4912 base, offset, data 4913 ); 4914 4915 None 4916 } 4917 } 4918 4919 impl Drop for DeviceManager { 4920 fn drop(&mut self) { 4921 // Wake up the DeviceManager threads (mainly virtio device workers), 4922 // to avoid deadlock on waiting for paused/parked worker threads. 4923 if let Err(e) = self.resume() { 4924 error!("Error resuming DeviceManager: {:?}", e); 4925 } 4926 4927 for handle in self.virtio_devices.drain(..) { 4928 handle.virtio_device.lock().unwrap().shutdown(); 4929 } 4930 4931 if let Some(termios) = *self.original_termios_opt.lock().unwrap() { 4932 // SAFETY: FFI call 4933 let _ = unsafe { tcsetattr(stdout().lock().as_raw_fd(), TCSANOW, &termios) }; 4934 } 4935 } 4936 } 4937 4938 #[cfg(test)] 4939 mod tests { 4940 use super::*; 4941 4942 #[test] 4943 fn test_create_mmio_allocators() { 4944 let res = create_mmio_allocators(0x100000, 0x400000, 1, vec![1], 4 << 10); 4945 assert_eq!(res.len(), 1); 4946 assert_eq!( 4947 res[0].lock().unwrap().base(), 4948 vm_memory::GuestAddress(0x100000) 4949 ); 4950 assert_eq!( 4951 res[0].lock().unwrap().end(), 4952 vm_memory::GuestAddress(0x3fffff) 4953 ); 4954 4955 let res = create_mmio_allocators(0x100000, 0x400000, 2, vec![1, 1], 4 << 10); 4956 assert_eq!(res.len(), 2); 4957 assert_eq!( 4958 res[0].lock().unwrap().base(), 4959 vm_memory::GuestAddress(0x100000) 4960 ); 4961 assert_eq!( 4962 res[0].lock().unwrap().end(), 4963 vm_memory::GuestAddress(0x27ffff) 4964 ); 4965 assert_eq!( 4966 res[1].lock().unwrap().base(), 4967 vm_memory::GuestAddress(0x280000) 4968 ); 4969 assert_eq!( 4970 res[1].lock().unwrap().end(), 4971 vm_memory::GuestAddress(0x3fffff) 4972 ); 4973 4974 let res = create_mmio_allocators(0x100000, 0x400000, 2, vec![2, 1], 4 << 10); 4975 assert_eq!(res.len(), 2); 4976 assert_eq!( 4977 res[0].lock().unwrap().base(), 4978 vm_memory::GuestAddress(0x100000) 4979 ); 4980 assert_eq!( 4981 res[0].lock().unwrap().end(), 4982 vm_memory::GuestAddress(0x2fffff) 4983 ); 4984 assert_eq!( 4985 res[1].lock().unwrap().base(), 4986 vm_memory::GuestAddress(0x300000) 4987 ); 4988 assert_eq!( 4989 res[1].lock().unwrap().end(), 4990 vm_memory::GuestAddress(0x3fffff) 4991 ); 4992 } 4993 } 4994