1 // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 // 3 // Portions Copyright 2017 The Chromium OS Authors. All rights reserved. 4 // Use of this source code is governed by a BSD-style license that can be 5 // found in the LICENSE-BSD-3-Clause file. 6 // 7 // Copyright © 2019 Intel Corporation 8 // 9 // SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause 10 // 11 12 use crate::config::{ 13 ConsoleOutputMode, DeviceConfig, DiskConfig, FsConfig, NetConfig, PmemConfig, UserDeviceConfig, 14 VdpaConfig, VhostMode, VmConfig, VsockConfig, 15 }; 16 use crate::cpu::{CpuManager, CPU_MANAGER_ACPI_SIZE}; 17 use crate::device_tree::{DeviceNode, DeviceTree}; 18 use crate::interrupt::LegacyUserspaceInterruptManager; 19 use crate::interrupt::MsiInterruptManager; 20 use crate::memory_manager::{Error as MemoryManagerError, MemoryManager, MEMORY_MANAGER_ACPI_SIZE}; 21 use crate::pci_segment::PciSegment; 22 use crate::seccomp_filters::{get_seccomp_filter, Thread}; 23 use crate::serial_manager::{Error as SerialManagerError, SerialManager}; 24 use crate::sigwinch_listener::start_sigwinch_listener; 25 use crate::GuestRegionMmap; 26 use crate::PciDeviceInfo; 27 use crate::{device_node, DEVICE_MANAGER_SNAPSHOT_ID}; 28 use acpi_tables::sdt::GenericAddress; 29 use acpi_tables::{aml, Aml}; 30 use anyhow::anyhow; 31 use arch::layout; 32 #[cfg(target_arch = "x86_64")] 33 use arch::layout::{APIC_START, IOAPIC_SIZE, IOAPIC_START}; 34 use arch::NumaNodes; 35 #[cfg(target_arch = "aarch64")] 36 use arch::{DeviceType, MmioDeviceInfo}; 37 use block::{ 38 async_io::DiskFile, block_aio_is_supported, block_io_uring_is_supported, detect_image_type, 39 fixed_vhd_sync::FixedVhdDiskSync, qcow, qcow_sync::QcowDiskSync, raw_async_aio::RawFileDiskAio, 40 raw_sync::RawFileDiskSync, vhdx, vhdx_sync::VhdxDiskSync, ImageType, 41 }; 42 #[cfg(feature = "io_uring")] 43 use block::{fixed_vhd_async::FixedVhdDiskAsync, raw_async::RawFileDisk}; 44 #[cfg(target_arch = "x86_64")] 45 use devices::debug_console::DebugConsole; 46 #[cfg(target_arch = "aarch64")] 47 use devices::gic; 48 #[cfg(target_arch = "x86_64")] 49 use devices::ioapic; 50 #[cfg(target_arch = "aarch64")] 51 use devices::legacy::Pl011; 52 use devices::{ 53 interrupt_controller, interrupt_controller::InterruptController, AcpiNotificationFlags, 54 }; 55 use hypervisor::{HypervisorType, IoEventAddress}; 56 use libc::{ 57 cfmakeraw, isatty, tcgetattr, tcsetattr, termios, MAP_NORESERVE, MAP_PRIVATE, MAP_SHARED, 58 O_TMPFILE, PROT_READ, PROT_WRITE, TCSANOW, 59 }; 60 use pci::{ 61 DeviceRelocation, MmioRegion, PciBarRegionType, PciBdf, PciDevice, VfioDmaMapping, 62 VfioPciDevice, VfioUserDmaMapping, VfioUserPciDevice, VfioUserPciDeviceError, 63 }; 64 use rate_limiter::group::RateLimiterGroup; 65 use seccompiler::SeccompAction; 66 use serde::{Deserialize, Serialize}; 67 use std::collections::{BTreeMap, BTreeSet, HashMap}; 68 use std::fs::{read_link, File, OpenOptions}; 69 use std::io::{self, stdout, Seek, SeekFrom}; 70 use std::mem::zeroed; 71 use std::num::Wrapping; 72 use std::os::unix::fs::OpenOptionsExt; 73 use std::os::unix::io::{AsRawFd, FromRawFd, RawFd}; 74 use std::path::PathBuf; 75 use std::result; 76 use std::sync::{Arc, Mutex}; 77 use std::time::Instant; 78 use tracer::trace_scoped; 79 use vfio_ioctls::{VfioContainer, VfioDevice, VfioDeviceFd}; 80 use virtio_devices::transport::VirtioTransport; 81 use virtio_devices::transport::{VirtioPciDevice, VirtioPciDeviceActivator}; 82 use virtio_devices::vhost_user::VhostUserConfig; 83 use virtio_devices::{ 84 AccessPlatformMapping, ActivateError, VdpaDmaMapping, VirtioMemMappingSource, 85 }; 86 use virtio_devices::{Endpoint, IommuMapping}; 87 use vm_allocator::{AddressAllocator, SystemAllocator}; 88 use vm_device::dma_mapping::ExternalDmaMapping; 89 use vm_device::interrupt::{ 90 InterruptIndex, InterruptManager, LegacyIrqGroupConfig, MsiIrqGroupConfig, 91 }; 92 use vm_device::{Bus, BusDevice, Resource}; 93 use vm_memory::guest_memory::FileOffset; 94 use vm_memory::GuestMemoryRegion; 95 use vm_memory::{Address, GuestAddress, GuestUsize, MmapRegion}; 96 #[cfg(target_arch = "x86_64")] 97 use vm_memory::{GuestAddressSpace, GuestMemory}; 98 use vm_migration::{ 99 protocol::MemoryRangeTable, snapshot_from_id, versioned_state_from_id, Migratable, 100 MigratableError, Pausable, Snapshot, SnapshotData, Snapshottable, Transportable, 101 }; 102 use vm_virtio::AccessPlatform; 103 use vm_virtio::VirtioDeviceType; 104 use vmm_sys_util::eventfd::EventFd; 105 #[cfg(target_arch = "x86_64")] 106 use {devices::debug_console, devices::legacy::Serial}; 107 108 #[cfg(target_arch = "aarch64")] 109 const MMIO_LEN: u64 = 0x1000; 110 111 // Singleton devices / devices the user cannot name 112 #[cfg(target_arch = "x86_64")] 113 const IOAPIC_DEVICE_NAME: &str = "__ioapic"; 114 const SERIAL_DEVICE_NAME: &str = "__serial"; 115 #[cfg(target_arch = "x86_64")] 116 const DEBUGCON_DEVICE_NAME: &str = "__debug_console"; 117 #[cfg(target_arch = "aarch64")] 118 const GPIO_DEVICE_NAME: &str = "__gpio"; 119 const RNG_DEVICE_NAME: &str = "__rng"; 120 const IOMMU_DEVICE_NAME: &str = "__iommu"; 121 const BALLOON_DEVICE_NAME: &str = "__balloon"; 122 const CONSOLE_DEVICE_NAME: &str = "__console"; 123 const PVPANIC_DEVICE_NAME: &str = "__pvpanic"; 124 125 // Devices that the user may name and for which we generate 126 // identifiers if the user doesn't give one 127 const DISK_DEVICE_NAME_PREFIX: &str = "_disk"; 128 const FS_DEVICE_NAME_PREFIX: &str = "_fs"; 129 const NET_DEVICE_NAME_PREFIX: &str = "_net"; 130 const PMEM_DEVICE_NAME_PREFIX: &str = "_pmem"; 131 const VDPA_DEVICE_NAME_PREFIX: &str = "_vdpa"; 132 const VSOCK_DEVICE_NAME_PREFIX: &str = "_vsock"; 133 const WATCHDOG_DEVICE_NAME: &str = "__watchdog"; 134 const VFIO_DEVICE_NAME_PREFIX: &str = "_vfio"; 135 const VFIO_USER_DEVICE_NAME_PREFIX: &str = "_vfio_user"; 136 const VIRTIO_PCI_DEVICE_NAME_PREFIX: &str = "_virtio-pci"; 137 138 /// Errors associated with device manager 139 #[derive(Debug)] 140 pub enum DeviceManagerError { 141 /// Cannot create EventFd. 142 EventFd(io::Error), 143 144 /// Cannot open disk path 145 Disk(io::Error), 146 147 /// Cannot create vhost-user-net device 148 CreateVhostUserNet(virtio_devices::vhost_user::Error), 149 150 /// Cannot create virtio-blk device 151 CreateVirtioBlock(io::Error), 152 153 /// Cannot create virtio-net device 154 CreateVirtioNet(virtio_devices::net::Error), 155 156 /// Cannot create virtio-console device 157 CreateVirtioConsole(io::Error), 158 159 /// Cannot create virtio-rng device 160 CreateVirtioRng(io::Error), 161 162 /// Cannot create virtio-fs device 163 CreateVirtioFs(virtio_devices::vhost_user::Error), 164 165 /// Virtio-fs device was created without a socket. 166 NoVirtioFsSock, 167 168 /// Cannot create vhost-user-blk device 169 CreateVhostUserBlk(virtio_devices::vhost_user::Error), 170 171 /// Cannot create virtio-pmem device 172 CreateVirtioPmem(io::Error), 173 174 /// Cannot create vDPA device 175 CreateVdpa(virtio_devices::vdpa::Error), 176 177 /// Cannot create virtio-vsock device 178 CreateVirtioVsock(io::Error), 179 180 /// Cannot create tpm device 181 CreateTpmDevice(anyhow::Error), 182 183 /// Failed to convert Path to &str for the vDPA device. 184 CreateVdpaConvertPath, 185 186 /// Failed to convert Path to &str for the virtio-vsock device. 187 CreateVsockConvertPath, 188 189 /// Cannot create virtio-vsock backend 190 CreateVsockBackend(virtio_devices::vsock::VsockUnixError), 191 192 /// Cannot create virtio-iommu device 193 CreateVirtioIommu(io::Error), 194 195 /// Cannot create virtio-balloon device 196 CreateVirtioBalloon(io::Error), 197 198 /// Cannot create virtio-watchdog device 199 CreateVirtioWatchdog(io::Error), 200 201 /// Failed to parse disk image format 202 DetectImageType(io::Error), 203 204 /// Cannot open qcow disk path 205 QcowDeviceCreate(qcow::Error), 206 207 /// Cannot create serial manager 208 CreateSerialManager(SerialManagerError), 209 210 /// Cannot spawn the serial manager thread 211 SpawnSerialManager(SerialManagerError), 212 213 /// Cannot open tap interface 214 OpenTap(net_util::TapError), 215 216 /// Cannot allocate IRQ. 217 AllocateIrq, 218 219 /// Cannot configure the IRQ. 220 Irq(vmm_sys_util::errno::Error), 221 222 /// Cannot allocate PCI BARs 223 AllocateBars(pci::PciDeviceError), 224 225 /// Could not free the BARs associated with a PCI device. 226 FreePciBars(pci::PciDeviceError), 227 228 /// Cannot register ioevent. 229 RegisterIoevent(anyhow::Error), 230 231 /// Cannot unregister ioevent. 232 UnRegisterIoevent(anyhow::Error), 233 234 /// Cannot create virtio device 235 VirtioDevice(virtio_devices::transport::VirtioPciDeviceError), 236 237 /// Cannot add PCI device 238 AddPciDevice(pci::PciRootError), 239 240 /// Cannot open persistent memory file 241 PmemFileOpen(io::Error), 242 243 /// Cannot set persistent memory file size 244 PmemFileSetLen(io::Error), 245 246 /// Cannot find a memory range for persistent memory 247 PmemRangeAllocation, 248 249 /// Cannot find a memory range for virtio-fs 250 FsRangeAllocation, 251 252 /// Error creating serial output file 253 SerialOutputFileOpen(io::Error), 254 255 #[cfg(target_arch = "x86_64")] 256 /// Error creating debug-console output file 257 DebugconOutputFileOpen(io::Error), 258 259 /// Error creating console output file 260 ConsoleOutputFileOpen(io::Error), 261 262 /// Error creating serial pty 263 SerialPtyOpen(io::Error), 264 265 /// Error creating console pty 266 ConsolePtyOpen(io::Error), 267 268 /// Error creating console pty 269 DebugconPtyOpen(io::Error), 270 271 /// Error setting pty raw mode 272 SetPtyRaw(vmm_sys_util::errno::Error), 273 274 /// Error getting pty peer 275 GetPtyPeer(vmm_sys_util::errno::Error), 276 277 /// Cannot create a VFIO device 278 VfioCreate(vfio_ioctls::VfioError), 279 280 /// Cannot create a VFIO PCI device 281 VfioPciCreate(pci::VfioPciError), 282 283 /// Failed to map VFIO MMIO region. 284 VfioMapRegion(pci::VfioPciError), 285 286 /// Failed to DMA map VFIO device. 287 VfioDmaMap(vfio_ioctls::VfioError), 288 289 /// Failed to DMA unmap VFIO device. 290 VfioDmaUnmap(pci::VfioPciError), 291 292 /// Failed to create the passthrough device. 293 CreatePassthroughDevice(anyhow::Error), 294 295 /// Failed to memory map. 296 Mmap(io::Error), 297 298 /// Cannot add legacy device to Bus. 299 BusError(vm_device::BusError), 300 301 /// Failed to allocate IO port 302 AllocateIoPort, 303 304 /// Failed to allocate MMIO address 305 AllocateMmioAddress, 306 307 /// Failed to make hotplug notification 308 HotPlugNotification(io::Error), 309 310 /// Error from a memory manager operation 311 MemoryManager(MemoryManagerError), 312 313 /// Failed to create new interrupt source group. 314 CreateInterruptGroup(io::Error), 315 316 /// Failed to update interrupt source group. 317 UpdateInterruptGroup(io::Error), 318 319 /// Failed to create interrupt controller. 320 CreateInterruptController(interrupt_controller::Error), 321 322 /// Failed to create a new MmapRegion instance. 323 NewMmapRegion(vm_memory::mmap::MmapRegionError), 324 325 /// Failed to clone a File. 326 CloneFile(io::Error), 327 328 /// Failed to create socket file 329 CreateSocketFile(io::Error), 330 331 /// Failed to spawn the network backend 332 SpawnNetBackend(io::Error), 333 334 /// Failed to spawn the block backend 335 SpawnBlockBackend(io::Error), 336 337 /// Missing PCI bus. 338 NoPciBus, 339 340 /// Could not find an available device name. 341 NoAvailableDeviceName, 342 343 /// Missing PCI device. 344 MissingPciDevice, 345 346 /// Failed to remove a PCI device from the PCI bus. 347 RemoveDeviceFromPciBus(pci::PciRootError), 348 349 /// Failed to remove a bus device from the IO bus. 350 RemoveDeviceFromIoBus(vm_device::BusError), 351 352 /// Failed to remove a bus device from the MMIO bus. 353 RemoveDeviceFromMmioBus(vm_device::BusError), 354 355 /// Failed to find the device corresponding to a specific PCI b/d/f. 356 UnknownPciBdf(u32), 357 358 /// Not allowed to remove this type of device from the VM. 359 RemovalNotAllowed(vm_virtio::VirtioDeviceType), 360 361 /// Failed to find device corresponding to the given identifier. 362 UnknownDeviceId(String), 363 364 /// Failed to find an available PCI device ID. 365 NextPciDeviceId(pci::PciRootError), 366 367 /// Could not reserve the PCI device ID. 368 GetPciDeviceId(pci::PciRootError), 369 370 /// Could not give the PCI device ID back. 371 PutPciDeviceId(pci::PciRootError), 372 373 /// No disk path was specified when one was expected 374 NoDiskPath, 375 376 /// Failed to update guest memory for virtio device. 377 UpdateMemoryForVirtioDevice(virtio_devices::Error), 378 379 /// Cannot create virtio-mem device 380 CreateVirtioMem(io::Error), 381 382 /// Cannot find a memory range for virtio-mem memory 383 VirtioMemRangeAllocation, 384 385 /// Failed to update guest memory for VFIO PCI device. 386 UpdateMemoryForVfioPciDevice(vfio_ioctls::VfioError), 387 388 /// Trying to use a directory for pmem but no size specified 389 PmemWithDirectorySizeMissing, 390 391 /// Trying to use a size that is not multiple of 2MiB 392 PmemSizeNotAligned, 393 394 /// Could not find the node in the device tree. 395 MissingNode, 396 397 /// Resource was already found. 398 ResourceAlreadyExists, 399 400 /// Expected resources for virtio-pmem could not be found. 401 MissingVirtioPmemResources, 402 403 /// Missing PCI b/d/f from the DeviceNode. 404 MissingDeviceNodePciBdf, 405 406 /// No support for device passthrough 407 NoDevicePassthroughSupport, 408 409 /// No socket option support for console device 410 NoSocketOptionSupportForConsoleDevice, 411 412 /// Failed to resize virtio-balloon 413 VirtioBalloonResize(virtio_devices::balloon::Error), 414 415 /// Missing virtio-balloon, can't proceed as expected. 416 MissingVirtioBalloon, 417 418 /// Missing virtual IOMMU device 419 MissingVirtualIommu, 420 421 /// Failed to do power button notification 422 PowerButtonNotification(io::Error), 423 424 /// Failed to do AArch64 GPIO power button notification 425 #[cfg(target_arch = "aarch64")] 426 AArch64PowerButtonNotification(devices::legacy::GpioDeviceError), 427 428 /// Failed to set O_DIRECT flag to file descriptor 429 SetDirectIo, 430 431 /// Failed to create FixedVhdDiskAsync 432 CreateFixedVhdDiskAsync(io::Error), 433 434 /// Failed to create FixedVhdDiskSync 435 CreateFixedVhdDiskSync(io::Error), 436 437 /// Failed to create QcowDiskSync 438 CreateQcowDiskSync(qcow::Error), 439 440 /// Failed to create FixedVhdxDiskSync 441 CreateFixedVhdxDiskSync(vhdx::VhdxError), 442 443 /// Failed to add DMA mapping handler to virtio-mem device. 444 AddDmaMappingHandlerVirtioMem(virtio_devices::mem::Error), 445 446 /// Failed to remove DMA mapping handler from virtio-mem device. 447 RemoveDmaMappingHandlerVirtioMem(virtio_devices::mem::Error), 448 449 /// Failed to create vfio-user client 450 VfioUserCreateClient(vfio_user::Error), 451 452 /// Failed to create VFIO user device 453 VfioUserCreate(VfioUserPciDeviceError), 454 455 /// Failed to map region from VFIO user device into guest 456 VfioUserMapRegion(VfioUserPciDeviceError), 457 458 /// Failed to DMA map VFIO user device. 459 VfioUserDmaMap(VfioUserPciDeviceError), 460 461 /// Failed to DMA unmap VFIO user device. 462 VfioUserDmaUnmap(VfioUserPciDeviceError), 463 464 /// Failed to update memory mappings for VFIO user device 465 UpdateMemoryForVfioUserPciDevice(VfioUserPciDeviceError), 466 467 /// Cannot duplicate file descriptor 468 DupFd(vmm_sys_util::errno::Error), 469 470 /// Failed to DMA map virtio device. 471 VirtioDmaMap(std::io::Error), 472 473 /// Failed to DMA unmap virtio device. 474 VirtioDmaUnmap(std::io::Error), 475 476 /// Cannot hotplug device behind vIOMMU 477 InvalidIommuHotplug, 478 479 /// Invalid identifier as it is not unique. 480 IdentifierNotUnique(String), 481 482 /// Invalid identifier 483 InvalidIdentifier(String), 484 485 /// Error activating virtio device 486 VirtioActivate(ActivateError), 487 488 /// Failed retrieving device state from snapshot 489 RestoreGetState(MigratableError), 490 491 /// Cannot create a PvPanic device 492 PvPanicCreate(devices::pvpanic::PvPanicError), 493 494 /// Cannot create a RateLimiterGroup 495 RateLimiterGroupCreate(rate_limiter::group::Error), 496 } 497 498 pub type DeviceManagerResult<T> = result::Result<T, DeviceManagerError>; 499 500 const DEVICE_MANAGER_ACPI_SIZE: usize = 0x10; 501 502 const TIOCSPTLCK: libc::c_int = 0x4004_5431; 503 const TIOCGTPEER: libc::c_int = 0x5441; 504 505 pub fn create_pty() -> io::Result<(File, File, PathBuf)> { 506 // Try to use /dev/pts/ptmx first then fall back to /dev/ptmx 507 // This is done to try and use the devpts filesystem that 508 // could be available for use in the process's namespace first. 509 // Ideally these are all the same file though but different 510 // kernels could have things setup differently. 511 // See https://www.kernel.org/doc/Documentation/filesystems/devpts.txt 512 // for further details. 513 514 let custom_flags = libc::O_NONBLOCK; 515 let main = match OpenOptions::new() 516 .read(true) 517 .write(true) 518 .custom_flags(custom_flags) 519 .open("/dev/pts/ptmx") 520 { 521 Ok(f) => f, 522 _ => OpenOptions::new() 523 .read(true) 524 .write(true) 525 .custom_flags(custom_flags) 526 .open("/dev/ptmx")?, 527 }; 528 let mut unlock: libc::c_ulong = 0; 529 // SAFETY: FFI call into libc, trivially safe 530 unsafe { libc::ioctl(main.as_raw_fd(), TIOCSPTLCK as _, &mut unlock) }; 531 532 // SAFETY: FFI call into libc, trivially safe 533 let sub_fd = unsafe { 534 libc::ioctl( 535 main.as_raw_fd(), 536 TIOCGTPEER as _, 537 libc::O_NOCTTY | libc::O_RDWR, 538 ) 539 }; 540 if sub_fd == -1 { 541 return vmm_sys_util::errno::errno_result().map_err(|e| e.into()); 542 } 543 544 let proc_path = PathBuf::from(format!("/proc/self/fd/{sub_fd}")); 545 let path = read_link(proc_path)?; 546 547 // SAFETY: sub_fd is checked to be valid before being wrapped in File 548 Ok((main, unsafe { File::from_raw_fd(sub_fd) }, path)) 549 } 550 551 #[derive(Default)] 552 pub struct Console { 553 console_resizer: Option<Arc<virtio_devices::ConsoleResizer>>, 554 } 555 556 impl Console { 557 pub fn need_resize(&self) -> bool { 558 if let Some(_resizer) = self.console_resizer.as_ref() { 559 return true; 560 } 561 562 false 563 } 564 565 pub fn update_console_size(&self) { 566 if let Some(resizer) = self.console_resizer.as_ref() { 567 resizer.update_console_size() 568 } 569 } 570 } 571 572 pub(crate) struct AddressManager { 573 pub(crate) allocator: Arc<Mutex<SystemAllocator>>, 574 #[cfg(target_arch = "x86_64")] 575 pub(crate) io_bus: Arc<Bus>, 576 pub(crate) mmio_bus: Arc<Bus>, 577 pub(crate) vm: Arc<dyn hypervisor::Vm>, 578 device_tree: Arc<Mutex<DeviceTree>>, 579 pci_mmio32_allocators: Vec<Arc<Mutex<AddressAllocator>>>, 580 pci_mmio64_allocators: Vec<Arc<Mutex<AddressAllocator>>>, 581 } 582 583 impl DeviceRelocation for AddressManager { 584 fn move_bar( 585 &self, 586 old_base: u64, 587 new_base: u64, 588 len: u64, 589 pci_dev: &mut dyn PciDevice, 590 region_type: PciBarRegionType, 591 ) -> std::result::Result<(), std::io::Error> { 592 match region_type { 593 PciBarRegionType::IoRegion => { 594 #[cfg(target_arch = "x86_64")] 595 { 596 // Update system allocator 597 self.allocator 598 .lock() 599 .unwrap() 600 .free_io_addresses(GuestAddress(old_base), len as GuestUsize); 601 602 self.allocator 603 .lock() 604 .unwrap() 605 .allocate_io_addresses( 606 Some(GuestAddress(new_base)), 607 len as GuestUsize, 608 None, 609 ) 610 .ok_or_else(|| { 611 io::Error::new(io::ErrorKind::Other, "failed allocating new IO range") 612 })?; 613 614 // Update PIO bus 615 self.io_bus 616 .update_range(old_base, len, new_base, len) 617 .map_err(|e| io::Error::new(io::ErrorKind::Other, e))?; 618 } 619 #[cfg(target_arch = "aarch64")] 620 error!("I/O region is not supported"); 621 } 622 PciBarRegionType::Memory32BitRegion | PciBarRegionType::Memory64BitRegion => { 623 let allocators = if region_type == PciBarRegionType::Memory32BitRegion { 624 &self.pci_mmio32_allocators 625 } else { 626 &self.pci_mmio64_allocators 627 }; 628 629 // Find the specific allocator that this BAR was allocated from and use it for new one 630 for allocator in allocators { 631 let allocator_base = allocator.lock().unwrap().base(); 632 let allocator_end = allocator.lock().unwrap().end(); 633 634 if old_base >= allocator_base.0 && old_base <= allocator_end.0 { 635 allocator 636 .lock() 637 .unwrap() 638 .free(GuestAddress(old_base), len as GuestUsize); 639 640 allocator 641 .lock() 642 .unwrap() 643 .allocate(Some(GuestAddress(new_base)), len as GuestUsize, Some(len)) 644 .ok_or_else(|| { 645 io::Error::new( 646 io::ErrorKind::Other, 647 "failed allocating new MMIO range", 648 ) 649 })?; 650 651 break; 652 } 653 } 654 655 // Update MMIO bus 656 self.mmio_bus 657 .update_range(old_base, len, new_base, len) 658 .map_err(|e| io::Error::new(io::ErrorKind::Other, e))?; 659 } 660 } 661 662 // Update the device_tree resources associated with the device 663 if let Some(id) = pci_dev.id() { 664 if let Some(node) = self.device_tree.lock().unwrap().get_mut(&id) { 665 let mut resource_updated = false; 666 for resource in node.resources.iter_mut() { 667 if let Resource::PciBar { base, type_, .. } = resource { 668 if PciBarRegionType::from(*type_) == region_type && *base == old_base { 669 *base = new_base; 670 resource_updated = true; 671 break; 672 } 673 } 674 } 675 676 if !resource_updated { 677 return Err(io::Error::new( 678 io::ErrorKind::Other, 679 format!( 680 "Couldn't find a resource with base 0x{old_base:x} for device {id}" 681 ), 682 )); 683 } 684 } else { 685 return Err(io::Error::new( 686 io::ErrorKind::Other, 687 format!("Couldn't find device {id} from device tree"), 688 )); 689 } 690 } 691 692 let any_dev = pci_dev.as_any(); 693 if let Some(virtio_pci_dev) = any_dev.downcast_ref::<VirtioPciDevice>() { 694 let bar_addr = virtio_pci_dev.config_bar_addr(); 695 if bar_addr == new_base { 696 for (event, addr) in virtio_pci_dev.ioeventfds(old_base) { 697 let io_addr = IoEventAddress::Mmio(addr); 698 self.vm.unregister_ioevent(event, &io_addr).map_err(|e| { 699 io::Error::new( 700 io::ErrorKind::Other, 701 format!("failed to unregister ioevent: {e:?}"), 702 ) 703 })?; 704 } 705 for (event, addr) in virtio_pci_dev.ioeventfds(new_base) { 706 let io_addr = IoEventAddress::Mmio(addr); 707 self.vm 708 .register_ioevent(event, &io_addr, None) 709 .map_err(|e| { 710 io::Error::new( 711 io::ErrorKind::Other, 712 format!("failed to register ioevent: {e:?}"), 713 ) 714 })?; 715 } 716 } else { 717 let virtio_dev = virtio_pci_dev.virtio_device(); 718 let mut virtio_dev = virtio_dev.lock().unwrap(); 719 if let Some(mut shm_regions) = virtio_dev.get_shm_regions() { 720 if shm_regions.addr.raw_value() == old_base { 721 let mem_region = self.vm.make_user_memory_region( 722 shm_regions.mem_slot, 723 old_base, 724 shm_regions.len, 725 shm_regions.host_addr, 726 false, 727 false, 728 ); 729 730 self.vm.remove_user_memory_region(mem_region).map_err(|e| { 731 io::Error::new( 732 io::ErrorKind::Other, 733 format!("failed to remove user memory region: {e:?}"), 734 ) 735 })?; 736 737 // Create new mapping by inserting new region to KVM. 738 let mem_region = self.vm.make_user_memory_region( 739 shm_regions.mem_slot, 740 new_base, 741 shm_regions.len, 742 shm_regions.host_addr, 743 false, 744 false, 745 ); 746 747 self.vm.create_user_memory_region(mem_region).map_err(|e| { 748 io::Error::new( 749 io::ErrorKind::Other, 750 format!("failed to create user memory regions: {e:?}"), 751 ) 752 })?; 753 754 // Update shared memory regions to reflect the new mapping. 755 shm_regions.addr = GuestAddress(new_base); 756 virtio_dev.set_shm_regions(shm_regions).map_err(|e| { 757 io::Error::new( 758 io::ErrorKind::Other, 759 format!("failed to update shared memory regions: {e:?}"), 760 ) 761 })?; 762 } 763 } 764 } 765 } 766 767 pci_dev.move_bar(old_base, new_base) 768 } 769 } 770 771 #[derive(Serialize, Deserialize)] 772 struct DeviceManagerState { 773 device_tree: DeviceTree, 774 device_id_cnt: Wrapping<usize>, 775 } 776 777 #[derive(Debug)] 778 pub struct PtyPair { 779 pub main: File, 780 pub path: PathBuf, 781 } 782 783 impl Clone for PtyPair { 784 fn clone(&self) -> Self { 785 PtyPair { 786 main: self.main.try_clone().unwrap(), 787 path: self.path.clone(), 788 } 789 } 790 } 791 792 #[derive(Clone)] 793 pub enum PciDeviceHandle { 794 Vfio(Arc<Mutex<VfioPciDevice>>), 795 Virtio(Arc<Mutex<VirtioPciDevice>>), 796 VfioUser(Arc<Mutex<VfioUserPciDevice>>), 797 } 798 799 #[derive(Clone)] 800 struct MetaVirtioDevice { 801 virtio_device: Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 802 iommu: bool, 803 id: String, 804 pci_segment: u16, 805 dma_handler: Option<Arc<dyn ExternalDmaMapping>>, 806 } 807 808 #[derive(Default)] 809 pub struct AcpiPlatformAddresses { 810 pub pm_timer_address: Option<GenericAddress>, 811 pub reset_reg_address: Option<GenericAddress>, 812 pub sleep_control_reg_address: Option<GenericAddress>, 813 pub sleep_status_reg_address: Option<GenericAddress>, 814 } 815 816 pub struct DeviceManager { 817 // The underlying hypervisor 818 hypervisor_type: HypervisorType, 819 820 // Manage address space related to devices 821 address_manager: Arc<AddressManager>, 822 823 // Console abstraction 824 console: Arc<Console>, 825 826 // console PTY 827 console_pty: Option<Arc<Mutex<PtyPair>>>, 828 829 // serial PTY 830 serial_pty: Option<Arc<Mutex<PtyPair>>>, 831 832 // debug-console PTY 833 debug_console_pty: Option<Arc<Mutex<PtyPair>>>, 834 835 // Serial Manager 836 serial_manager: Option<Arc<SerialManager>>, 837 838 // pty foreground status, 839 console_resize_pipe: Option<Arc<File>>, 840 841 // To restore on exit. 842 original_termios_opt: Arc<Mutex<Option<termios>>>, 843 844 // Interrupt controller 845 #[cfg(target_arch = "x86_64")] 846 interrupt_controller: Option<Arc<Mutex<ioapic::Ioapic>>>, 847 #[cfg(target_arch = "aarch64")] 848 interrupt_controller: Option<Arc<Mutex<gic::Gic>>>, 849 850 // Things to be added to the commandline (e.g. aarch64 early console) 851 #[cfg(target_arch = "aarch64")] 852 cmdline_additions: Vec<String>, 853 854 // ACPI GED notification device 855 ged_notification_device: Option<Arc<Mutex<devices::AcpiGedDevice>>>, 856 857 // VM configuration 858 config: Arc<Mutex<VmConfig>>, 859 860 // Memory Manager 861 memory_manager: Arc<Mutex<MemoryManager>>, 862 863 // CPU Manager 864 cpu_manager: Arc<Mutex<CpuManager>>, 865 866 // The virtio devices on the system 867 virtio_devices: Vec<MetaVirtioDevice>, 868 869 // List of bus devices 870 // Let the DeviceManager keep strong references to the BusDevice devices. 871 // This allows the IO and MMIO buses to be provided with Weak references, 872 // which prevents cyclic dependencies. 873 bus_devices: Vec<Arc<Mutex<dyn BusDevice>>>, 874 875 // Counter to keep track of the consumed device IDs. 876 device_id_cnt: Wrapping<usize>, 877 878 pci_segments: Vec<PciSegment>, 879 880 #[cfg_attr(target_arch = "aarch64", allow(dead_code))] 881 // MSI Interrupt Manager 882 msi_interrupt_manager: Arc<dyn InterruptManager<GroupConfig = MsiIrqGroupConfig>>, 883 884 #[cfg_attr(feature = "mshv", allow(dead_code))] 885 // Legacy Interrupt Manager 886 legacy_interrupt_manager: Option<Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>>, 887 888 // Passthrough device handle 889 passthrough_device: Option<VfioDeviceFd>, 890 891 // VFIO container 892 // Only one container can be created, therefore it is stored as part of the 893 // DeviceManager to be reused. 894 vfio_container: Option<Arc<VfioContainer>>, 895 896 // Paravirtualized IOMMU 897 iommu_device: Option<Arc<Mutex<virtio_devices::Iommu>>>, 898 iommu_mapping: Option<Arc<IommuMapping>>, 899 900 // PCI information about devices attached to the paravirtualized IOMMU 901 // It contains the virtual IOMMU PCI BDF along with the list of PCI BDF 902 // representing the devices attached to the virtual IOMMU. This is useful 903 // information for filling the ACPI VIOT table. 904 iommu_attached_devices: Option<(PciBdf, Vec<PciBdf>)>, 905 906 // Tree of devices, representing the dependencies between devices. 907 // Useful for introspection, snapshot and restore. 908 device_tree: Arc<Mutex<DeviceTree>>, 909 910 // Exit event 911 exit_evt: EventFd, 912 reset_evt: EventFd, 913 914 #[cfg(target_arch = "aarch64")] 915 id_to_dev_info: HashMap<(DeviceType, String), MmioDeviceInfo>, 916 917 // seccomp action 918 seccomp_action: SeccompAction, 919 920 // List of guest NUMA nodes. 921 numa_nodes: NumaNodes, 922 923 // Possible handle to the virtio-balloon device 924 balloon: Option<Arc<Mutex<virtio_devices::Balloon>>>, 925 926 // Virtio Device activation EventFd to allow the VMM thread to trigger device 927 // activation and thus start the threads from the VMM thread 928 activate_evt: EventFd, 929 930 acpi_address: GuestAddress, 931 932 selected_segment: usize, 933 934 // Possible handle to the virtio-mem device 935 virtio_mem_devices: Vec<Arc<Mutex<virtio_devices::Mem>>>, 936 937 #[cfg(target_arch = "aarch64")] 938 // GPIO device for AArch64 939 gpio_device: Option<Arc<Mutex<devices::legacy::Gpio>>>, 940 941 // pvpanic device 942 pvpanic_device: Option<Arc<Mutex<devices::PvPanicDevice>>>, 943 944 // Flag to force setting the iommu on virtio devices 945 force_iommu: bool, 946 947 // io_uring availability if detected 948 io_uring_supported: Option<bool>, 949 950 // aio availability if detected 951 aio_supported: Option<bool>, 952 953 // List of unique identifiers provided at boot through the configuration. 954 boot_id_list: BTreeSet<String>, 955 956 // Start time of the VM 957 timestamp: Instant, 958 959 // Pending activations 960 pending_activations: Arc<Mutex<Vec<VirtioPciDeviceActivator>>>, 961 962 // Addresses for ACPI platform devices e.g. ACPI PM timer, sleep/reset registers 963 acpi_platform_addresses: AcpiPlatformAddresses, 964 965 snapshot: Option<Snapshot>, 966 967 rate_limit_groups: HashMap<String, Arc<RateLimiterGroup>>, 968 969 mmio_regions: Arc<Mutex<Vec<MmioRegion>>>, 970 } 971 972 impl DeviceManager { 973 #[allow(clippy::too_many_arguments)] 974 pub fn new( 975 #[cfg(target_arch = "x86_64")] io_bus: Arc<Bus>, 976 mmio_bus: Arc<Bus>, 977 hypervisor_type: HypervisorType, 978 vm: Arc<dyn hypervisor::Vm>, 979 config: Arc<Mutex<VmConfig>>, 980 memory_manager: Arc<Mutex<MemoryManager>>, 981 cpu_manager: Arc<Mutex<CpuManager>>, 982 exit_evt: EventFd, 983 reset_evt: EventFd, 984 seccomp_action: SeccompAction, 985 numa_nodes: NumaNodes, 986 activate_evt: &EventFd, 987 force_iommu: bool, 988 boot_id_list: BTreeSet<String>, 989 timestamp: Instant, 990 snapshot: Option<Snapshot>, 991 dynamic: bool, 992 ) -> DeviceManagerResult<Arc<Mutex<Self>>> { 993 trace_scoped!("DeviceManager::new"); 994 995 let (device_tree, device_id_cnt) = if let Some(snapshot) = snapshot.as_ref() { 996 let state: DeviceManagerState = snapshot.to_state().unwrap(); 997 ( 998 Arc::new(Mutex::new(state.device_tree.clone())), 999 state.device_id_cnt, 1000 ) 1001 } else { 1002 (Arc::new(Mutex::new(DeviceTree::new())), Wrapping(0)) 1003 }; 1004 1005 let num_pci_segments = 1006 if let Some(platform_config) = config.lock().unwrap().platform.as_ref() { 1007 platform_config.num_pci_segments 1008 } else { 1009 1 1010 }; 1011 1012 let create_mmio_allocators = |start, end, num_pci_segments, alignment| { 1013 // Start each PCI segment mmio range on an aligned boundary 1014 let pci_segment_mmio_size = 1015 (end - start + 1) / (alignment * num_pci_segments as u64) * alignment; 1016 1017 let mut mmio_allocators = vec![]; 1018 for i in 0..num_pci_segments as u64 { 1019 let mmio_start = start + i * pci_segment_mmio_size; 1020 let allocator = Arc::new(Mutex::new( 1021 AddressAllocator::new(GuestAddress(mmio_start), pci_segment_mmio_size).unwrap(), 1022 )); 1023 mmio_allocators.push(allocator) 1024 } 1025 1026 mmio_allocators 1027 }; 1028 1029 let start_of_mmio32_area = layout::MEM_32BIT_DEVICES_START.0; 1030 let end_of_mmio32_area = layout::MEM_32BIT_DEVICES_START.0 + layout::MEM_32BIT_DEVICES_SIZE; 1031 let pci_mmio32_allocators = create_mmio_allocators( 1032 start_of_mmio32_area, 1033 end_of_mmio32_area, 1034 num_pci_segments, 1035 4 << 10, 1036 ); 1037 1038 let start_of_mmio64_area = memory_manager.lock().unwrap().start_of_device_area().0; 1039 let end_of_mmio64_area = memory_manager.lock().unwrap().end_of_device_area().0; 1040 let pci_mmio64_allocators = create_mmio_allocators( 1041 start_of_mmio64_area, 1042 end_of_mmio64_area, 1043 num_pci_segments, 1044 4 << 30, 1045 ); 1046 1047 let address_manager = Arc::new(AddressManager { 1048 allocator: memory_manager.lock().unwrap().allocator(), 1049 #[cfg(target_arch = "x86_64")] 1050 io_bus, 1051 mmio_bus, 1052 vm: vm.clone(), 1053 device_tree: Arc::clone(&device_tree), 1054 pci_mmio32_allocators, 1055 pci_mmio64_allocators, 1056 }); 1057 1058 // First we create the MSI interrupt manager, the legacy one is created 1059 // later, after the IOAPIC device creation. 1060 // The reason we create the MSI one first is because the IOAPIC needs it, 1061 // and then the legacy interrupt manager needs an IOAPIC. So we're 1062 // handling a linear dependency chain: 1063 // msi_interrupt_manager <- IOAPIC <- legacy_interrupt_manager. 1064 let msi_interrupt_manager: Arc<dyn InterruptManager<GroupConfig = MsiIrqGroupConfig>> = 1065 Arc::new(MsiInterruptManager::new( 1066 Arc::clone(&address_manager.allocator), 1067 vm, 1068 )); 1069 1070 let acpi_address = address_manager 1071 .allocator 1072 .lock() 1073 .unwrap() 1074 .allocate_platform_mmio_addresses(None, DEVICE_MANAGER_ACPI_SIZE as u64, None) 1075 .ok_or(DeviceManagerError::AllocateIoPort)?; 1076 1077 let mut pci_irq_slots = [0; 32]; 1078 PciSegment::reserve_legacy_interrupts_for_pci_devices( 1079 &address_manager, 1080 &mut pci_irq_slots, 1081 )?; 1082 1083 let mut pci_segments = vec![PciSegment::new_default_segment( 1084 &address_manager, 1085 Arc::clone(&address_manager.pci_mmio32_allocators[0]), 1086 Arc::clone(&address_manager.pci_mmio64_allocators[0]), 1087 &pci_irq_slots, 1088 )?]; 1089 1090 for i in 1..num_pci_segments as usize { 1091 pci_segments.push(PciSegment::new( 1092 i as u16, 1093 numa_node_id_from_pci_segment_id(&numa_nodes, i as u16), 1094 &address_manager, 1095 Arc::clone(&address_manager.pci_mmio32_allocators[i]), 1096 Arc::clone(&address_manager.pci_mmio64_allocators[i]), 1097 &pci_irq_slots, 1098 )?); 1099 } 1100 1101 if dynamic { 1102 let acpi_address = address_manager 1103 .allocator 1104 .lock() 1105 .unwrap() 1106 .allocate_platform_mmio_addresses(None, CPU_MANAGER_ACPI_SIZE as u64, None) 1107 .ok_or(DeviceManagerError::AllocateMmioAddress)?; 1108 1109 address_manager 1110 .mmio_bus 1111 .insert( 1112 cpu_manager.clone(), 1113 acpi_address.0, 1114 CPU_MANAGER_ACPI_SIZE as u64, 1115 ) 1116 .map_err(DeviceManagerError::BusError)?; 1117 1118 cpu_manager.lock().unwrap().set_acpi_address(acpi_address); 1119 } 1120 1121 let mut rate_limit_groups = HashMap::<String, Arc<RateLimiterGroup>>::new(); 1122 if let Some(rate_limit_groups_cfg) = config.lock().unwrap().rate_limit_groups.as_ref() { 1123 for rate_limit_group_cfg in rate_limit_groups_cfg { 1124 let rate_limit_cfg = rate_limit_group_cfg.rate_limiter_config; 1125 let bw = rate_limit_cfg.bandwidth.unwrap_or_default(); 1126 let ops = rate_limit_cfg.ops.unwrap_or_default(); 1127 let mut rate_limit_group = RateLimiterGroup::new( 1128 &rate_limit_group_cfg.id, 1129 bw.size, 1130 bw.one_time_burst.unwrap_or(0), 1131 bw.refill_time, 1132 ops.size, 1133 ops.one_time_burst.unwrap_or(0), 1134 ops.refill_time, 1135 ) 1136 .map_err(DeviceManagerError::RateLimiterGroupCreate)?; 1137 1138 let exit_evt = exit_evt.try_clone().map_err(DeviceManagerError::EventFd)?; 1139 1140 rate_limit_group.start_thread(exit_evt).unwrap(); 1141 rate_limit_groups 1142 .insert(rate_limit_group_cfg.id.clone(), Arc::new(rate_limit_group)); 1143 } 1144 } 1145 1146 let device_manager = DeviceManager { 1147 hypervisor_type, 1148 address_manager: Arc::clone(&address_manager), 1149 console: Arc::new(Console::default()), 1150 interrupt_controller: None, 1151 #[cfg(target_arch = "aarch64")] 1152 cmdline_additions: Vec::new(), 1153 ged_notification_device: None, 1154 config, 1155 memory_manager, 1156 cpu_manager, 1157 virtio_devices: Vec::new(), 1158 bus_devices: Vec::new(), 1159 device_id_cnt, 1160 msi_interrupt_manager, 1161 legacy_interrupt_manager: None, 1162 passthrough_device: None, 1163 vfio_container: None, 1164 iommu_device: None, 1165 iommu_mapping: None, 1166 iommu_attached_devices: None, 1167 pci_segments, 1168 device_tree, 1169 exit_evt, 1170 reset_evt, 1171 #[cfg(target_arch = "aarch64")] 1172 id_to_dev_info: HashMap::new(), 1173 seccomp_action, 1174 numa_nodes, 1175 balloon: None, 1176 activate_evt: activate_evt 1177 .try_clone() 1178 .map_err(DeviceManagerError::EventFd)?, 1179 acpi_address, 1180 selected_segment: 0, 1181 serial_pty: None, 1182 serial_manager: None, 1183 console_pty: None, 1184 debug_console_pty: None, 1185 console_resize_pipe: None, 1186 original_termios_opt: Arc::new(Mutex::new(None)), 1187 virtio_mem_devices: Vec::new(), 1188 #[cfg(target_arch = "aarch64")] 1189 gpio_device: None, 1190 pvpanic_device: None, 1191 force_iommu, 1192 io_uring_supported: None, 1193 aio_supported: None, 1194 boot_id_list, 1195 timestamp, 1196 pending_activations: Arc::new(Mutex::new(Vec::default())), 1197 acpi_platform_addresses: AcpiPlatformAddresses::default(), 1198 snapshot, 1199 rate_limit_groups, 1200 mmio_regions: Arc::new(Mutex::new(Vec::new())), 1201 }; 1202 1203 let device_manager = Arc::new(Mutex::new(device_manager)); 1204 1205 address_manager 1206 .mmio_bus 1207 .insert( 1208 Arc::clone(&device_manager) as Arc<Mutex<dyn BusDevice>>, 1209 acpi_address.0, 1210 DEVICE_MANAGER_ACPI_SIZE as u64, 1211 ) 1212 .map_err(DeviceManagerError::BusError)?; 1213 1214 Ok(device_manager) 1215 } 1216 1217 pub fn serial_pty(&self) -> Option<PtyPair> { 1218 self.serial_pty 1219 .as_ref() 1220 .map(|pty| pty.lock().unwrap().clone()) 1221 } 1222 1223 pub fn console_pty(&self) -> Option<PtyPair> { 1224 self.console_pty 1225 .as_ref() 1226 .map(|pty| pty.lock().unwrap().clone()) 1227 } 1228 1229 pub fn debug_console_pty(&self) -> Option<PtyPair> { 1230 self.debug_console_pty 1231 .as_ref() 1232 .map(|pty| pty.lock().unwrap().clone()) 1233 } 1234 1235 pub fn console_resize_pipe(&self) -> Option<Arc<File>> { 1236 self.console_resize_pipe.clone() 1237 } 1238 1239 pub fn create_devices( 1240 &mut self, 1241 serial_pty: Option<PtyPair>, 1242 console_pty: Option<PtyPair>, 1243 debug_console_pty: Option<PtyPair>, 1244 console_resize_pipe: Option<File>, 1245 original_termios_opt: Arc<Mutex<Option<termios>>>, 1246 ) -> DeviceManagerResult<()> { 1247 trace_scoped!("create_devices"); 1248 1249 let mut virtio_devices: Vec<MetaVirtioDevice> = Vec::new(); 1250 1251 let interrupt_controller = self.add_interrupt_controller()?; 1252 1253 self.cpu_manager 1254 .lock() 1255 .unwrap() 1256 .set_interrupt_controller(interrupt_controller.clone()); 1257 1258 // Now we can create the legacy interrupt manager, which needs the freshly 1259 // formed IOAPIC device. 1260 let legacy_interrupt_manager: Arc< 1261 dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>, 1262 > = Arc::new(LegacyUserspaceInterruptManager::new(Arc::clone( 1263 &interrupt_controller, 1264 ))); 1265 1266 { 1267 if let Some(acpi_address) = self.memory_manager.lock().unwrap().acpi_address() { 1268 self.address_manager 1269 .mmio_bus 1270 .insert( 1271 Arc::clone(&self.memory_manager) as Arc<Mutex<dyn BusDevice>>, 1272 acpi_address.0, 1273 MEMORY_MANAGER_ACPI_SIZE as u64, 1274 ) 1275 .map_err(DeviceManagerError::BusError)?; 1276 } 1277 } 1278 1279 #[cfg(target_arch = "x86_64")] 1280 self.add_legacy_devices( 1281 self.reset_evt 1282 .try_clone() 1283 .map_err(DeviceManagerError::EventFd)?, 1284 )?; 1285 1286 #[cfg(target_arch = "aarch64")] 1287 self.add_legacy_devices(&legacy_interrupt_manager)?; 1288 1289 { 1290 self.ged_notification_device = self.add_acpi_devices( 1291 &legacy_interrupt_manager, 1292 self.reset_evt 1293 .try_clone() 1294 .map_err(DeviceManagerError::EventFd)?, 1295 self.exit_evt 1296 .try_clone() 1297 .map_err(DeviceManagerError::EventFd)?, 1298 )?; 1299 } 1300 1301 self.original_termios_opt = original_termios_opt; 1302 1303 self.console = self.add_console_devices( 1304 &legacy_interrupt_manager, 1305 &mut virtio_devices, 1306 serial_pty, 1307 console_pty, 1308 debug_console_pty, 1309 console_resize_pipe, 1310 )?; 1311 1312 if let Some(tpm) = self.config.clone().lock().unwrap().tpm.as_ref() { 1313 let tpm_dev = self.add_tpm_device(tpm.socket.clone())?; 1314 self.bus_devices 1315 .push(Arc::clone(&tpm_dev) as Arc<Mutex<dyn BusDevice>>) 1316 } 1317 self.legacy_interrupt_manager = Some(legacy_interrupt_manager); 1318 1319 virtio_devices.append(&mut self.make_virtio_devices()?); 1320 1321 self.add_pci_devices(virtio_devices.clone())?; 1322 1323 self.virtio_devices = virtio_devices; 1324 1325 if self.config.clone().lock().unwrap().pvpanic { 1326 self.pvpanic_device = self.add_pvpanic_device()?; 1327 } 1328 1329 Ok(()) 1330 } 1331 1332 fn state(&self) -> DeviceManagerState { 1333 DeviceManagerState { 1334 device_tree: self.device_tree.lock().unwrap().clone(), 1335 device_id_cnt: self.device_id_cnt, 1336 } 1337 } 1338 1339 fn get_msi_iova_space(&mut self) -> (u64, u64) { 1340 #[cfg(target_arch = "aarch64")] 1341 { 1342 let vcpus = self.config.lock().unwrap().cpus.boot_vcpus; 1343 let vgic_config = gic::Gic::create_default_config(vcpus.into()); 1344 ( 1345 vgic_config.msi_addr, 1346 vgic_config.msi_addr + vgic_config.msi_size - 1, 1347 ) 1348 } 1349 #[cfg(target_arch = "x86_64")] 1350 (0xfee0_0000, 0xfeef_ffff) 1351 } 1352 1353 #[cfg(target_arch = "aarch64")] 1354 /// Gets the information of the devices registered up to some point in time. 1355 pub fn get_device_info(&self) -> &HashMap<(DeviceType, String), MmioDeviceInfo> { 1356 &self.id_to_dev_info 1357 } 1358 1359 #[allow(unused_variables)] 1360 fn add_pci_devices( 1361 &mut self, 1362 virtio_devices: Vec<MetaVirtioDevice>, 1363 ) -> DeviceManagerResult<()> { 1364 let iommu_id = String::from(IOMMU_DEVICE_NAME); 1365 1366 let iommu_device = if self.config.lock().unwrap().iommu { 1367 let (device, mapping) = virtio_devices::Iommu::new( 1368 iommu_id.clone(), 1369 self.seccomp_action.clone(), 1370 self.exit_evt 1371 .try_clone() 1372 .map_err(DeviceManagerError::EventFd)?, 1373 self.get_msi_iova_space(), 1374 versioned_state_from_id(self.snapshot.as_ref(), iommu_id.as_str()) 1375 .map_err(DeviceManagerError::RestoreGetState)?, 1376 ) 1377 .map_err(DeviceManagerError::CreateVirtioIommu)?; 1378 let device = Arc::new(Mutex::new(device)); 1379 self.iommu_device = Some(Arc::clone(&device)); 1380 self.iommu_mapping = Some(mapping); 1381 1382 // Fill the device tree with a new node. In case of restore, we 1383 // know there is nothing to do, so we can simply override the 1384 // existing entry. 1385 self.device_tree 1386 .lock() 1387 .unwrap() 1388 .insert(iommu_id.clone(), device_node!(iommu_id, device)); 1389 1390 Some(device) 1391 } else { 1392 None 1393 }; 1394 1395 let mut iommu_attached_devices = Vec::new(); 1396 { 1397 for handle in virtio_devices { 1398 let mapping: Option<Arc<IommuMapping>> = if handle.iommu { 1399 self.iommu_mapping.clone() 1400 } else { 1401 None 1402 }; 1403 1404 let dev_id = self.add_virtio_pci_device( 1405 handle.virtio_device, 1406 &mapping, 1407 handle.id, 1408 handle.pci_segment, 1409 handle.dma_handler, 1410 )?; 1411 1412 if handle.iommu { 1413 iommu_attached_devices.push(dev_id); 1414 } 1415 } 1416 1417 let mut vfio_iommu_device_ids = self.add_vfio_devices()?; 1418 iommu_attached_devices.append(&mut vfio_iommu_device_ids); 1419 1420 let mut vfio_user_iommu_device_ids = self.add_user_devices()?; 1421 iommu_attached_devices.append(&mut vfio_user_iommu_device_ids); 1422 1423 // Add all devices from forced iommu segments 1424 if let Some(platform_config) = self.config.lock().unwrap().platform.as_ref() { 1425 if let Some(iommu_segments) = platform_config.iommu_segments.as_ref() { 1426 for segment in iommu_segments { 1427 for device in 0..32 { 1428 let bdf = PciBdf::new(*segment, 0, device, 0); 1429 if !iommu_attached_devices.contains(&bdf) { 1430 iommu_attached_devices.push(bdf); 1431 } 1432 } 1433 } 1434 } 1435 } 1436 1437 if let Some(iommu_device) = iommu_device { 1438 let dev_id = self.add_virtio_pci_device(iommu_device, &None, iommu_id, 0, None)?; 1439 self.iommu_attached_devices = Some((dev_id, iommu_attached_devices)); 1440 } 1441 } 1442 1443 for segment in &self.pci_segments { 1444 #[cfg(target_arch = "x86_64")] 1445 if let Some(pci_config_io) = segment.pci_config_io.as_ref() { 1446 self.bus_devices 1447 .push(Arc::clone(pci_config_io) as Arc<Mutex<dyn BusDevice>>); 1448 } 1449 1450 self.bus_devices 1451 .push(Arc::clone(&segment.pci_config_mmio) as Arc<Mutex<dyn BusDevice>>); 1452 } 1453 1454 Ok(()) 1455 } 1456 1457 #[cfg(target_arch = "aarch64")] 1458 fn add_interrupt_controller( 1459 &mut self, 1460 ) -> DeviceManagerResult<Arc<Mutex<dyn InterruptController>>> { 1461 let interrupt_controller: Arc<Mutex<gic::Gic>> = Arc::new(Mutex::new( 1462 gic::Gic::new( 1463 self.config.lock().unwrap().cpus.boot_vcpus, 1464 Arc::clone(&self.msi_interrupt_manager), 1465 self.address_manager.vm.clone(), 1466 ) 1467 .map_err(DeviceManagerError::CreateInterruptController)?, 1468 )); 1469 1470 self.interrupt_controller = Some(interrupt_controller.clone()); 1471 1472 // Restore the vGic if this is in the process of restoration 1473 let id = String::from(gic::GIC_SNAPSHOT_ID); 1474 if let Some(vgic_snapshot) = snapshot_from_id(self.snapshot.as_ref(), &id) { 1475 // PMU support is optional. Nothing should be impacted if the PMU initialization failed. 1476 if self 1477 .cpu_manager 1478 .lock() 1479 .unwrap() 1480 .init_pmu(arch::aarch64::fdt::AARCH64_PMU_IRQ + 16) 1481 .is_err() 1482 { 1483 info!("Failed to initialize PMU"); 1484 } 1485 1486 let vgic_state = vgic_snapshot 1487 .to_state() 1488 .map_err(DeviceManagerError::RestoreGetState)?; 1489 let saved_vcpu_states = self.cpu_manager.lock().unwrap().get_saved_states(); 1490 interrupt_controller 1491 .lock() 1492 .unwrap() 1493 .restore_vgic(vgic_state, &saved_vcpu_states) 1494 .unwrap(); 1495 } 1496 1497 self.device_tree 1498 .lock() 1499 .unwrap() 1500 .insert(id.clone(), device_node!(id, interrupt_controller)); 1501 1502 Ok(interrupt_controller) 1503 } 1504 1505 #[cfg(target_arch = "aarch64")] 1506 pub fn get_interrupt_controller(&mut self) -> Option<&Arc<Mutex<gic::Gic>>> { 1507 self.interrupt_controller.as_ref() 1508 } 1509 1510 #[cfg(target_arch = "x86_64")] 1511 fn add_interrupt_controller( 1512 &mut self, 1513 ) -> DeviceManagerResult<Arc<Mutex<dyn InterruptController>>> { 1514 let id = String::from(IOAPIC_DEVICE_NAME); 1515 1516 // Create IOAPIC 1517 let interrupt_controller = Arc::new(Mutex::new( 1518 ioapic::Ioapic::new( 1519 id.clone(), 1520 APIC_START, 1521 Arc::clone(&self.msi_interrupt_manager), 1522 versioned_state_from_id(self.snapshot.as_ref(), id.as_str()) 1523 .map_err(DeviceManagerError::RestoreGetState)?, 1524 ) 1525 .map_err(DeviceManagerError::CreateInterruptController)?, 1526 )); 1527 1528 self.interrupt_controller = Some(interrupt_controller.clone()); 1529 1530 self.address_manager 1531 .mmio_bus 1532 .insert(interrupt_controller.clone(), IOAPIC_START.0, IOAPIC_SIZE) 1533 .map_err(DeviceManagerError::BusError)?; 1534 1535 self.bus_devices 1536 .push(Arc::clone(&interrupt_controller) as Arc<Mutex<dyn BusDevice>>); 1537 1538 // Fill the device tree with a new node. In case of restore, we 1539 // know there is nothing to do, so we can simply override the 1540 // existing entry. 1541 self.device_tree 1542 .lock() 1543 .unwrap() 1544 .insert(id.clone(), device_node!(id, interrupt_controller)); 1545 1546 Ok(interrupt_controller) 1547 } 1548 1549 fn add_acpi_devices( 1550 &mut self, 1551 interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>, 1552 reset_evt: EventFd, 1553 exit_evt: EventFd, 1554 ) -> DeviceManagerResult<Option<Arc<Mutex<devices::AcpiGedDevice>>>> { 1555 let vcpus_kill_signalled = self 1556 .cpu_manager 1557 .lock() 1558 .unwrap() 1559 .vcpus_kill_signalled() 1560 .clone(); 1561 let shutdown_device = Arc::new(Mutex::new(devices::AcpiShutdownDevice::new( 1562 exit_evt, 1563 reset_evt, 1564 vcpus_kill_signalled, 1565 ))); 1566 1567 self.bus_devices 1568 .push(Arc::clone(&shutdown_device) as Arc<Mutex<dyn BusDevice>>); 1569 1570 #[cfg(target_arch = "x86_64")] 1571 { 1572 let shutdown_pio_address: u16 = 0x600; 1573 1574 self.address_manager 1575 .allocator 1576 .lock() 1577 .unwrap() 1578 .allocate_io_addresses(Some(GuestAddress(shutdown_pio_address.into())), 0x8, None) 1579 .ok_or(DeviceManagerError::AllocateIoPort)?; 1580 1581 self.address_manager 1582 .io_bus 1583 .insert(shutdown_device, shutdown_pio_address.into(), 0x4) 1584 .map_err(DeviceManagerError::BusError)?; 1585 1586 self.acpi_platform_addresses.sleep_control_reg_address = 1587 Some(GenericAddress::io_port_address::<u8>(shutdown_pio_address)); 1588 self.acpi_platform_addresses.sleep_status_reg_address = 1589 Some(GenericAddress::io_port_address::<u8>(shutdown_pio_address)); 1590 self.acpi_platform_addresses.reset_reg_address = 1591 Some(GenericAddress::io_port_address::<u8>(shutdown_pio_address)); 1592 } 1593 1594 let ged_irq = self 1595 .address_manager 1596 .allocator 1597 .lock() 1598 .unwrap() 1599 .allocate_irq() 1600 .unwrap(); 1601 let interrupt_group = interrupt_manager 1602 .create_group(LegacyIrqGroupConfig { 1603 irq: ged_irq as InterruptIndex, 1604 }) 1605 .map_err(DeviceManagerError::CreateInterruptGroup)?; 1606 let ged_address = self 1607 .address_manager 1608 .allocator 1609 .lock() 1610 .unwrap() 1611 .allocate_platform_mmio_addresses( 1612 None, 1613 devices::acpi::GED_DEVICE_ACPI_SIZE as u64, 1614 None, 1615 ) 1616 .ok_or(DeviceManagerError::AllocateMmioAddress)?; 1617 let ged_device = Arc::new(Mutex::new(devices::AcpiGedDevice::new( 1618 interrupt_group, 1619 ged_irq, 1620 ged_address, 1621 ))); 1622 self.address_manager 1623 .mmio_bus 1624 .insert( 1625 ged_device.clone(), 1626 ged_address.0, 1627 devices::acpi::GED_DEVICE_ACPI_SIZE as u64, 1628 ) 1629 .map_err(DeviceManagerError::BusError)?; 1630 self.bus_devices 1631 .push(Arc::clone(&ged_device) as Arc<Mutex<dyn BusDevice>>); 1632 1633 let pm_timer_device = Arc::new(Mutex::new(devices::AcpiPmTimerDevice::new())); 1634 1635 self.bus_devices 1636 .push(Arc::clone(&pm_timer_device) as Arc<Mutex<dyn BusDevice>>); 1637 1638 #[cfg(target_arch = "x86_64")] 1639 { 1640 let pm_timer_pio_address: u16 = 0x608; 1641 1642 self.address_manager 1643 .allocator 1644 .lock() 1645 .unwrap() 1646 .allocate_io_addresses(Some(GuestAddress(pm_timer_pio_address.into())), 0x4, None) 1647 .ok_or(DeviceManagerError::AllocateIoPort)?; 1648 1649 self.address_manager 1650 .io_bus 1651 .insert(pm_timer_device, pm_timer_pio_address.into(), 0x4) 1652 .map_err(DeviceManagerError::BusError)?; 1653 1654 self.acpi_platform_addresses.pm_timer_address = 1655 Some(GenericAddress::io_port_address::<u32>(pm_timer_pio_address)); 1656 } 1657 1658 Ok(Some(ged_device)) 1659 } 1660 1661 #[cfg(target_arch = "x86_64")] 1662 fn add_legacy_devices(&mut self, reset_evt: EventFd) -> DeviceManagerResult<()> { 1663 let vcpus_kill_signalled = self 1664 .cpu_manager 1665 .lock() 1666 .unwrap() 1667 .vcpus_kill_signalled() 1668 .clone(); 1669 // Add a shutdown device (i8042) 1670 let i8042 = Arc::new(Mutex::new(devices::legacy::I8042Device::new( 1671 reset_evt.try_clone().unwrap(), 1672 vcpus_kill_signalled.clone(), 1673 ))); 1674 1675 self.bus_devices 1676 .push(Arc::clone(&i8042) as Arc<Mutex<dyn BusDevice>>); 1677 1678 self.address_manager 1679 .io_bus 1680 .insert(i8042, 0x61, 0x4) 1681 .map_err(DeviceManagerError::BusError)?; 1682 { 1683 // Add a CMOS emulated device 1684 let mem_size = self 1685 .memory_manager 1686 .lock() 1687 .unwrap() 1688 .guest_memory() 1689 .memory() 1690 .last_addr() 1691 .0 1692 + 1; 1693 let mem_below_4g = std::cmp::min(arch::layout::MEM_32BIT_RESERVED_START.0, mem_size); 1694 let mem_above_4g = mem_size.saturating_sub(arch::layout::RAM_64BIT_START.0); 1695 1696 let cmos = Arc::new(Mutex::new(devices::legacy::Cmos::new( 1697 mem_below_4g, 1698 mem_above_4g, 1699 reset_evt, 1700 Some(vcpus_kill_signalled), 1701 ))); 1702 1703 self.bus_devices 1704 .push(Arc::clone(&cmos) as Arc<Mutex<dyn BusDevice>>); 1705 1706 self.address_manager 1707 .io_bus 1708 .insert(cmos, 0x70, 0x2) 1709 .map_err(DeviceManagerError::BusError)?; 1710 1711 let fwdebug = Arc::new(Mutex::new(devices::legacy::FwDebugDevice::new())); 1712 1713 self.bus_devices 1714 .push(Arc::clone(&fwdebug) as Arc<Mutex<dyn BusDevice>>); 1715 1716 self.address_manager 1717 .io_bus 1718 .insert(fwdebug, 0x402, 0x1) 1719 .map_err(DeviceManagerError::BusError)?; 1720 } 1721 1722 // 0x80 debug port 1723 let debug_port = Arc::new(Mutex::new(devices::legacy::DebugPort::new(self.timestamp))); 1724 self.bus_devices 1725 .push(Arc::clone(&debug_port) as Arc<Mutex<dyn BusDevice>>); 1726 self.address_manager 1727 .io_bus 1728 .insert(debug_port, 0x80, 0x1) 1729 .map_err(DeviceManagerError::BusError)?; 1730 1731 Ok(()) 1732 } 1733 1734 #[cfg(target_arch = "aarch64")] 1735 fn add_legacy_devices( 1736 &mut self, 1737 interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>, 1738 ) -> DeviceManagerResult<()> { 1739 // Add a RTC device 1740 let rtc_irq = self 1741 .address_manager 1742 .allocator 1743 .lock() 1744 .unwrap() 1745 .allocate_irq() 1746 .unwrap(); 1747 1748 let interrupt_group = interrupt_manager 1749 .create_group(LegacyIrqGroupConfig { 1750 irq: rtc_irq as InterruptIndex, 1751 }) 1752 .map_err(DeviceManagerError::CreateInterruptGroup)?; 1753 1754 let rtc_device = Arc::new(Mutex::new(devices::legacy::Rtc::new(interrupt_group))); 1755 1756 self.bus_devices 1757 .push(Arc::clone(&rtc_device) as Arc<Mutex<dyn BusDevice>>); 1758 1759 let addr = arch::layout::LEGACY_RTC_MAPPED_IO_START; 1760 1761 self.address_manager 1762 .mmio_bus 1763 .insert(rtc_device, addr.0, MMIO_LEN) 1764 .map_err(DeviceManagerError::BusError)?; 1765 1766 self.id_to_dev_info.insert( 1767 (DeviceType::Rtc, "rtc".to_string()), 1768 MmioDeviceInfo { 1769 addr: addr.0, 1770 len: MMIO_LEN, 1771 irq: rtc_irq, 1772 }, 1773 ); 1774 1775 // Add a GPIO device 1776 let id = String::from(GPIO_DEVICE_NAME); 1777 let gpio_irq = self 1778 .address_manager 1779 .allocator 1780 .lock() 1781 .unwrap() 1782 .allocate_irq() 1783 .unwrap(); 1784 1785 let interrupt_group = interrupt_manager 1786 .create_group(LegacyIrqGroupConfig { 1787 irq: gpio_irq as InterruptIndex, 1788 }) 1789 .map_err(DeviceManagerError::CreateInterruptGroup)?; 1790 1791 let gpio_device = Arc::new(Mutex::new(devices::legacy::Gpio::new( 1792 id.clone(), 1793 interrupt_group, 1794 versioned_state_from_id(self.snapshot.as_ref(), id.as_str()) 1795 .map_err(DeviceManagerError::RestoreGetState)?, 1796 ))); 1797 1798 self.bus_devices 1799 .push(Arc::clone(&gpio_device) as Arc<Mutex<dyn BusDevice>>); 1800 1801 let addr = arch::layout::LEGACY_GPIO_MAPPED_IO_START; 1802 1803 self.address_manager 1804 .mmio_bus 1805 .insert(gpio_device.clone(), addr.0, MMIO_LEN) 1806 .map_err(DeviceManagerError::BusError)?; 1807 1808 self.gpio_device = Some(gpio_device.clone()); 1809 1810 self.id_to_dev_info.insert( 1811 (DeviceType::Gpio, "gpio".to_string()), 1812 MmioDeviceInfo { 1813 addr: addr.0, 1814 len: MMIO_LEN, 1815 irq: gpio_irq, 1816 }, 1817 ); 1818 1819 self.device_tree 1820 .lock() 1821 .unwrap() 1822 .insert(id.clone(), device_node!(id, gpio_device)); 1823 1824 Ok(()) 1825 } 1826 1827 #[cfg(target_arch = "x86_64")] 1828 fn add_debug_console_device( 1829 &mut self, 1830 debug_console_writer: Box<dyn io::Write + Send>, 1831 ) -> DeviceManagerResult<Arc<Mutex<DebugConsole>>> { 1832 let id = String::from(DEBUGCON_DEVICE_NAME); 1833 let debug_console = Arc::new(Mutex::new(DebugConsole::new( 1834 id.clone(), 1835 debug_console_writer, 1836 ))); 1837 1838 let port = self 1839 .config 1840 .lock() 1841 .unwrap() 1842 .debug_console 1843 .clone() 1844 .iobase 1845 .map(|port| port as u64) 1846 .unwrap_or(debug_console::DEFAULT_PORT); 1847 1848 self.bus_devices 1849 .push(Arc::clone(&debug_console) as Arc<Mutex<dyn BusDevice>>); 1850 1851 self.address_manager 1852 .allocator 1853 .lock() 1854 .unwrap() 1855 .allocate_io_addresses(Some(GuestAddress(port)), 0x1, None) 1856 .ok_or(DeviceManagerError::AllocateIoPort)?; 1857 1858 self.address_manager 1859 .io_bus 1860 .insert(debug_console.clone(), port, 0x1) 1861 .map_err(DeviceManagerError::BusError)?; 1862 1863 // Fill the device tree with a new node. In case of restore, we 1864 // know there is nothing to do, so we can simply override the 1865 // existing entry. 1866 self.device_tree 1867 .lock() 1868 .unwrap() 1869 .insert(id.clone(), device_node!(id, debug_console)); 1870 1871 Ok(debug_console) 1872 } 1873 1874 #[cfg(target_arch = "x86_64")] 1875 fn add_serial_device( 1876 &mut self, 1877 interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>, 1878 serial_writer: Option<Box<dyn io::Write + Send>>, 1879 ) -> DeviceManagerResult<Arc<Mutex<Serial>>> { 1880 // Serial is tied to IRQ #4 1881 let serial_irq = 4; 1882 1883 let id = String::from(SERIAL_DEVICE_NAME); 1884 1885 let interrupt_group = interrupt_manager 1886 .create_group(LegacyIrqGroupConfig { 1887 irq: serial_irq as InterruptIndex, 1888 }) 1889 .map_err(DeviceManagerError::CreateInterruptGroup)?; 1890 1891 let serial = Arc::new(Mutex::new(Serial::new( 1892 id.clone(), 1893 interrupt_group, 1894 serial_writer, 1895 versioned_state_from_id(self.snapshot.as_ref(), id.as_str()) 1896 .map_err(DeviceManagerError::RestoreGetState)?, 1897 ))); 1898 1899 self.bus_devices 1900 .push(Arc::clone(&serial) as Arc<Mutex<dyn BusDevice>>); 1901 1902 self.address_manager 1903 .allocator 1904 .lock() 1905 .unwrap() 1906 .allocate_io_addresses(Some(GuestAddress(0x3f8)), 0x8, None) 1907 .ok_or(DeviceManagerError::AllocateIoPort)?; 1908 1909 self.address_manager 1910 .io_bus 1911 .insert(serial.clone(), 0x3f8, 0x8) 1912 .map_err(DeviceManagerError::BusError)?; 1913 1914 // Fill the device tree with a new node. In case of restore, we 1915 // know there is nothing to do, so we can simply override the 1916 // existing entry. 1917 self.device_tree 1918 .lock() 1919 .unwrap() 1920 .insert(id.clone(), device_node!(id, serial)); 1921 1922 Ok(serial) 1923 } 1924 1925 #[cfg(target_arch = "aarch64")] 1926 fn add_serial_device( 1927 &mut self, 1928 interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>, 1929 serial_writer: Option<Box<dyn io::Write + Send>>, 1930 ) -> DeviceManagerResult<Arc<Mutex<Pl011>>> { 1931 let id = String::from(SERIAL_DEVICE_NAME); 1932 1933 let serial_irq = self 1934 .address_manager 1935 .allocator 1936 .lock() 1937 .unwrap() 1938 .allocate_irq() 1939 .unwrap(); 1940 1941 let interrupt_group = interrupt_manager 1942 .create_group(LegacyIrqGroupConfig { 1943 irq: serial_irq as InterruptIndex, 1944 }) 1945 .map_err(DeviceManagerError::CreateInterruptGroup)?; 1946 1947 let serial = Arc::new(Mutex::new(devices::legacy::Pl011::new( 1948 id.clone(), 1949 interrupt_group, 1950 serial_writer, 1951 self.timestamp, 1952 versioned_state_from_id(self.snapshot.as_ref(), id.as_str()) 1953 .map_err(DeviceManagerError::RestoreGetState)?, 1954 ))); 1955 1956 self.bus_devices 1957 .push(Arc::clone(&serial) as Arc<Mutex<dyn BusDevice>>); 1958 1959 let addr = arch::layout::LEGACY_SERIAL_MAPPED_IO_START; 1960 1961 self.address_manager 1962 .mmio_bus 1963 .insert(serial.clone(), addr.0, MMIO_LEN) 1964 .map_err(DeviceManagerError::BusError)?; 1965 1966 self.id_to_dev_info.insert( 1967 (DeviceType::Serial, DeviceType::Serial.to_string()), 1968 MmioDeviceInfo { 1969 addr: addr.0, 1970 len: MMIO_LEN, 1971 irq: serial_irq, 1972 }, 1973 ); 1974 1975 self.cmdline_additions 1976 .push(format!("earlycon=pl011,mmio,0x{:08x}", addr.0)); 1977 1978 // Fill the device tree with a new node. In case of restore, we 1979 // know there is nothing to do, so we can simply override the 1980 // existing entry. 1981 self.device_tree 1982 .lock() 1983 .unwrap() 1984 .insert(id.clone(), device_node!(id, serial)); 1985 1986 Ok(serial) 1987 } 1988 1989 fn modify_mode<F: FnOnce(&mut termios)>( 1990 &mut self, 1991 fd: RawFd, 1992 f: F, 1993 ) -> vmm_sys_util::errno::Result<()> { 1994 // SAFETY: safe because we check the return value of isatty. 1995 if unsafe { isatty(fd) } != 1 { 1996 return Ok(()); 1997 } 1998 1999 // SAFETY: The following pair are safe because termios gets totally overwritten by tcgetattr 2000 // and we check the return result. 2001 let mut termios: termios = unsafe { zeroed() }; 2002 // SAFETY: see above 2003 let ret = unsafe { tcgetattr(fd, &mut termios as *mut _) }; 2004 if ret < 0 { 2005 return vmm_sys_util::errno::errno_result(); 2006 } 2007 let mut original_termios_opt = self.original_termios_opt.lock().unwrap(); 2008 if original_termios_opt.is_none() { 2009 *original_termios_opt = Some(termios); 2010 } 2011 f(&mut termios); 2012 // SAFETY: Safe because the syscall will only read the extent of termios and we check 2013 // the return result. 2014 let ret = unsafe { tcsetattr(fd, TCSANOW, &termios as *const _) }; 2015 if ret < 0 { 2016 return vmm_sys_util::errno::errno_result(); 2017 } 2018 2019 Ok(()) 2020 } 2021 2022 fn set_raw_mode(&mut self, f: &dyn AsRawFd) -> vmm_sys_util::errno::Result<()> { 2023 // SAFETY: FFI call. Variable t is guaranteed to be a valid termios from modify_mode. 2024 self.modify_mode(f.as_raw_fd(), |t| unsafe { cfmakeraw(t) }) 2025 } 2026 2027 fn listen_for_sigwinch_on_tty(&mut self, pty_sub: File) -> std::io::Result<()> { 2028 let seccomp_filter = get_seccomp_filter( 2029 &self.seccomp_action, 2030 Thread::PtyForeground, 2031 self.hypervisor_type, 2032 ) 2033 .unwrap(); 2034 2035 self.console_resize_pipe = 2036 Some(Arc::new(start_sigwinch_listener(seccomp_filter, pty_sub)?)); 2037 2038 Ok(()) 2039 } 2040 2041 fn add_virtio_console_device( 2042 &mut self, 2043 virtio_devices: &mut Vec<MetaVirtioDevice>, 2044 console_pty: Option<PtyPair>, 2045 resize_pipe: Option<File>, 2046 ) -> DeviceManagerResult<Option<Arc<virtio_devices::ConsoleResizer>>> { 2047 let console_config = self.config.lock().unwrap().console.clone(); 2048 let endpoint = match console_config.mode { 2049 ConsoleOutputMode::File => { 2050 let file = File::create(console_config.file.as_ref().unwrap()) 2051 .map_err(DeviceManagerError::ConsoleOutputFileOpen)?; 2052 Endpoint::File(file) 2053 } 2054 ConsoleOutputMode::Pty => { 2055 if let Some(pty) = console_pty { 2056 self.config.lock().unwrap().console.file = Some(pty.path.clone()); 2057 let file = pty.main.try_clone().unwrap(); 2058 self.console_pty = Some(Arc::new(Mutex::new(pty))); 2059 self.console_resize_pipe = resize_pipe.map(Arc::new); 2060 Endpoint::PtyPair(file.try_clone().unwrap(), file) 2061 } else { 2062 let (main, sub, path) = 2063 create_pty().map_err(DeviceManagerError::ConsolePtyOpen)?; 2064 self.set_raw_mode(&sub) 2065 .map_err(DeviceManagerError::SetPtyRaw)?; 2066 self.config.lock().unwrap().console.file = Some(path.clone()); 2067 let file = main.try_clone().unwrap(); 2068 assert!(resize_pipe.is_none()); 2069 self.listen_for_sigwinch_on_tty(sub).unwrap(); 2070 self.console_pty = Some(Arc::new(Mutex::new(PtyPair { main, path }))); 2071 Endpoint::PtyPair(file.try_clone().unwrap(), file) 2072 } 2073 } 2074 ConsoleOutputMode::Tty => { 2075 // Duplicating the file descriptors like this is needed as otherwise 2076 // they will be closed on a reboot and the numbers reused 2077 2078 // SAFETY: FFI call to dup. Trivially safe. 2079 let stdout = unsafe { libc::dup(libc::STDOUT_FILENO) }; 2080 if stdout == -1 { 2081 return vmm_sys_util::errno::errno_result().map_err(DeviceManagerError::DupFd); 2082 } 2083 // SAFETY: stdout is valid and owned solely by us. 2084 let stdout = unsafe { File::from_raw_fd(stdout) }; 2085 2086 // Make sure stdout is in raw mode, if it's a terminal. 2087 let _ = self.set_raw_mode(&stdout); 2088 2089 // SAFETY: FFI call. Trivially safe. 2090 if unsafe { libc::isatty(libc::STDOUT_FILENO) } == 1 { 2091 self.listen_for_sigwinch_on_tty(stdout.try_clone().unwrap()) 2092 .unwrap(); 2093 } 2094 2095 // If an interactive TTY then we can accept input 2096 // SAFETY: FFI call. Trivially safe. 2097 if unsafe { libc::isatty(libc::STDIN_FILENO) == 1 } { 2098 // SAFETY: FFI call to dup. Trivially safe. 2099 let stdin = unsafe { libc::dup(libc::STDIN_FILENO) }; 2100 if stdin == -1 { 2101 return vmm_sys_util::errno::errno_result() 2102 .map_err(DeviceManagerError::DupFd); 2103 } 2104 // SAFETY: stdin is valid and owned solely by us. 2105 let stdin = unsafe { File::from_raw_fd(stdin) }; 2106 2107 Endpoint::FilePair(stdout, stdin) 2108 } else { 2109 Endpoint::File(stdout) 2110 } 2111 } 2112 ConsoleOutputMode::Socket => { 2113 return Err(DeviceManagerError::NoSocketOptionSupportForConsoleDevice); 2114 } 2115 ConsoleOutputMode::Null => Endpoint::Null, 2116 ConsoleOutputMode::Off => return Ok(None), 2117 }; 2118 let id = String::from(CONSOLE_DEVICE_NAME); 2119 2120 let (virtio_console_device, console_resizer) = virtio_devices::Console::new( 2121 id.clone(), 2122 endpoint, 2123 self.console_resize_pipe 2124 .as_ref() 2125 .map(|p| p.try_clone().unwrap()), 2126 self.force_iommu | console_config.iommu, 2127 self.seccomp_action.clone(), 2128 self.exit_evt 2129 .try_clone() 2130 .map_err(DeviceManagerError::EventFd)?, 2131 versioned_state_from_id(self.snapshot.as_ref(), id.as_str()) 2132 .map_err(DeviceManagerError::RestoreGetState)?, 2133 ) 2134 .map_err(DeviceManagerError::CreateVirtioConsole)?; 2135 let virtio_console_device = Arc::new(Mutex::new(virtio_console_device)); 2136 virtio_devices.push(MetaVirtioDevice { 2137 virtio_device: Arc::clone(&virtio_console_device) 2138 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2139 iommu: console_config.iommu, 2140 id: id.clone(), 2141 pci_segment: 0, 2142 dma_handler: None, 2143 }); 2144 2145 // Fill the device tree with a new node. In case of restore, we 2146 // know there is nothing to do, so we can simply override the 2147 // existing entry. 2148 self.device_tree 2149 .lock() 2150 .unwrap() 2151 .insert(id.clone(), device_node!(id, virtio_console_device)); 2152 2153 // Only provide a resizer (for SIGWINCH handling) if the console is attached to the TTY 2154 Ok(if matches!(console_config.mode, ConsoleOutputMode::Tty) { 2155 Some(console_resizer) 2156 } else { 2157 None 2158 }) 2159 } 2160 2161 /// Adds all devices that behave like a console with respect to the VM 2162 /// configuration. This includes: 2163 /// - debug-console 2164 /// - serial-console 2165 /// - virtio-console 2166 fn add_console_devices( 2167 &mut self, 2168 interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>, 2169 virtio_devices: &mut Vec<MetaVirtioDevice>, 2170 serial_pty: Option<PtyPair>, 2171 console_pty: Option<PtyPair>, 2172 #[cfg(target_arch = "x86_64")] debug_console_pty: Option<PtyPair>, 2173 #[cfg(not(target_arch = "x86_64"))] _: Option<PtyPair>, 2174 console_resize_pipe: Option<File>, 2175 ) -> DeviceManagerResult<Arc<Console>> { 2176 let serial_config = self.config.lock().unwrap().serial.clone(); 2177 let serial_writer: Option<Box<dyn io::Write + Send>> = match serial_config.mode { 2178 ConsoleOutputMode::File => Some(Box::new( 2179 File::create(serial_config.file.as_ref().unwrap()) 2180 .map_err(DeviceManagerError::SerialOutputFileOpen)?, 2181 )), 2182 ConsoleOutputMode::Pty => { 2183 if let Some(pty) = serial_pty.clone() { 2184 self.config.lock().unwrap().serial.file = Some(pty.path.clone()); 2185 self.serial_pty = Some(Arc::new(Mutex::new(pty))); 2186 } else { 2187 let (main, sub, path) = 2188 create_pty().map_err(DeviceManagerError::SerialPtyOpen)?; 2189 self.set_raw_mode(&sub) 2190 .map_err(DeviceManagerError::SetPtyRaw)?; 2191 self.config.lock().unwrap().serial.file = Some(path.clone()); 2192 self.serial_pty = Some(Arc::new(Mutex::new(PtyPair { main, path }))); 2193 } 2194 None 2195 } 2196 ConsoleOutputMode::Tty => { 2197 let out = stdout(); 2198 let _ = self.set_raw_mode(&out); 2199 Some(Box::new(out)) 2200 } 2201 ConsoleOutputMode::Off | ConsoleOutputMode::Null | ConsoleOutputMode::Socket => None, 2202 }; 2203 if serial_config.mode != ConsoleOutputMode::Off { 2204 let serial = self.add_serial_device(interrupt_manager, serial_writer)?; 2205 self.serial_manager = match serial_config.mode { 2206 ConsoleOutputMode::Pty | ConsoleOutputMode::Tty | ConsoleOutputMode::Socket => { 2207 let serial_manager = SerialManager::new( 2208 serial, 2209 self.serial_pty.clone(), 2210 serial_config.mode, 2211 serial_config.socket, 2212 ) 2213 .map_err(DeviceManagerError::CreateSerialManager)?; 2214 if let Some(mut serial_manager) = serial_manager { 2215 serial_manager 2216 .start_thread( 2217 self.exit_evt 2218 .try_clone() 2219 .map_err(DeviceManagerError::EventFd)?, 2220 ) 2221 .map_err(DeviceManagerError::SpawnSerialManager)?; 2222 Some(Arc::new(serial_manager)) 2223 } else { 2224 None 2225 } 2226 } 2227 _ => None, 2228 }; 2229 } 2230 2231 #[cfg(target_arch = "x86_64")] 2232 { 2233 let debug_console_config = self.config.lock().unwrap().debug_console.clone(); 2234 let debug_console_writer: Option<Box<dyn io::Write + Send>> = match debug_console_config 2235 .mode 2236 { 2237 ConsoleOutputMode::File => Some(Box::new( 2238 File::create(debug_console_config.file.as_ref().unwrap()) 2239 .map_err(DeviceManagerError::DebugconOutputFileOpen)?, 2240 )), 2241 ConsoleOutputMode::Pty => { 2242 if let Some(pty) = debug_console_pty { 2243 self.config.lock().unwrap().debug_console.file = Some(pty.path.clone()); 2244 self.debug_console_pty = Some(Arc::new(Mutex::new(pty))); 2245 } else { 2246 let (main, sub, path) = 2247 create_pty().map_err(DeviceManagerError::DebugconPtyOpen)?; 2248 self.set_raw_mode(&sub) 2249 .map_err(DeviceManagerError::SetPtyRaw)?; 2250 self.config.lock().unwrap().debug_console.file = Some(path.clone()); 2251 self.debug_console_pty = Some(Arc::new(Mutex::new(PtyPair { main, path }))); 2252 } 2253 None 2254 } 2255 ConsoleOutputMode::Tty => { 2256 let out = stdout(); 2257 let _ = self.set_raw_mode(&out); 2258 Some(Box::new(out)) 2259 } 2260 ConsoleOutputMode::Off | ConsoleOutputMode::Null | ConsoleOutputMode::Socket => { 2261 None 2262 } 2263 }; 2264 if let Some(writer) = debug_console_writer { 2265 let _ = self.add_debug_console_device(writer)?; 2266 } 2267 } 2268 2269 let console_resizer = 2270 self.add_virtio_console_device(virtio_devices, console_pty, console_resize_pipe)?; 2271 2272 Ok(Arc::new(Console { console_resizer })) 2273 } 2274 2275 fn add_tpm_device( 2276 &mut self, 2277 tpm_path: PathBuf, 2278 ) -> DeviceManagerResult<Arc<Mutex<devices::tpm::Tpm>>> { 2279 // Create TPM Device 2280 let tpm = devices::tpm::Tpm::new(tpm_path.to_str().unwrap().to_string()).map_err(|e| { 2281 DeviceManagerError::CreateTpmDevice(anyhow!("Failed to create TPM Device : {:?}", e)) 2282 })?; 2283 let tpm = Arc::new(Mutex::new(tpm)); 2284 2285 // Add TPM Device to mmio 2286 self.address_manager 2287 .mmio_bus 2288 .insert( 2289 tpm.clone(), 2290 arch::layout::TPM_START.0, 2291 arch::layout::TPM_SIZE, 2292 ) 2293 .map_err(DeviceManagerError::BusError)?; 2294 2295 Ok(tpm) 2296 } 2297 2298 fn make_virtio_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2299 let mut devices: Vec<MetaVirtioDevice> = Vec::new(); 2300 2301 // Create "standard" virtio devices (net/block/rng) 2302 devices.append(&mut self.make_virtio_block_devices()?); 2303 devices.append(&mut self.make_virtio_net_devices()?); 2304 devices.append(&mut self.make_virtio_rng_devices()?); 2305 2306 // Add virtio-fs if required 2307 devices.append(&mut self.make_virtio_fs_devices()?); 2308 2309 // Add virtio-pmem if required 2310 devices.append(&mut self.make_virtio_pmem_devices()?); 2311 2312 // Add virtio-vsock if required 2313 devices.append(&mut self.make_virtio_vsock_devices()?); 2314 2315 devices.append(&mut self.make_virtio_mem_devices()?); 2316 2317 // Add virtio-balloon if required 2318 devices.append(&mut self.make_virtio_balloon_devices()?); 2319 2320 // Add virtio-watchdog device 2321 devices.append(&mut self.make_virtio_watchdog_devices()?); 2322 2323 // Add vDPA devices if required 2324 devices.append(&mut self.make_vdpa_devices()?); 2325 2326 Ok(devices) 2327 } 2328 2329 // Cache whether aio is supported to avoid checking for very block device 2330 fn aio_is_supported(&mut self) -> bool { 2331 if let Some(supported) = self.aio_supported { 2332 return supported; 2333 } 2334 2335 let supported = block_aio_is_supported(); 2336 self.aio_supported = Some(supported); 2337 supported 2338 } 2339 2340 // Cache whether io_uring is supported to avoid probing for very block device 2341 fn io_uring_is_supported(&mut self) -> bool { 2342 if let Some(supported) = self.io_uring_supported { 2343 return supported; 2344 } 2345 2346 let supported = block_io_uring_is_supported(); 2347 self.io_uring_supported = Some(supported); 2348 supported 2349 } 2350 2351 fn make_virtio_block_device( 2352 &mut self, 2353 disk_cfg: &mut DiskConfig, 2354 ) -> DeviceManagerResult<MetaVirtioDevice> { 2355 let id = if let Some(id) = &disk_cfg.id { 2356 id.clone() 2357 } else { 2358 let id = self.next_device_name(DISK_DEVICE_NAME_PREFIX)?; 2359 disk_cfg.id = Some(id.clone()); 2360 id 2361 }; 2362 2363 info!("Creating virtio-block device: {:?}", disk_cfg); 2364 2365 let snapshot = snapshot_from_id(self.snapshot.as_ref(), id.as_str()); 2366 2367 let (virtio_device, migratable_device) = if disk_cfg.vhost_user { 2368 let socket = disk_cfg.vhost_socket.as_ref().unwrap().clone(); 2369 let vu_cfg = VhostUserConfig { 2370 socket, 2371 num_queues: disk_cfg.num_queues, 2372 queue_size: disk_cfg.queue_size, 2373 }; 2374 let vhost_user_block = Arc::new(Mutex::new( 2375 match virtio_devices::vhost_user::Blk::new( 2376 id.clone(), 2377 vu_cfg, 2378 self.seccomp_action.clone(), 2379 self.exit_evt 2380 .try_clone() 2381 .map_err(DeviceManagerError::EventFd)?, 2382 self.force_iommu, 2383 snapshot 2384 .map(|s| s.to_versioned_state()) 2385 .transpose() 2386 .map_err(DeviceManagerError::RestoreGetState)?, 2387 ) { 2388 Ok(vub_device) => vub_device, 2389 Err(e) => { 2390 return Err(DeviceManagerError::CreateVhostUserBlk(e)); 2391 } 2392 }, 2393 )); 2394 2395 ( 2396 Arc::clone(&vhost_user_block) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2397 vhost_user_block as Arc<Mutex<dyn Migratable>>, 2398 ) 2399 } else { 2400 let mut options = OpenOptions::new(); 2401 options.read(true); 2402 options.write(!disk_cfg.readonly); 2403 if disk_cfg.direct { 2404 options.custom_flags(libc::O_DIRECT); 2405 } 2406 // Open block device path 2407 let mut file: File = options 2408 .open( 2409 disk_cfg 2410 .path 2411 .as_ref() 2412 .ok_or(DeviceManagerError::NoDiskPath)? 2413 .clone(), 2414 ) 2415 .map_err(DeviceManagerError::Disk)?; 2416 let image_type = 2417 detect_image_type(&mut file).map_err(DeviceManagerError::DetectImageType)?; 2418 2419 let image = match image_type { 2420 ImageType::FixedVhd => { 2421 // Use asynchronous backend relying on io_uring if the 2422 // syscalls are supported. 2423 if cfg!(feature = "io_uring") 2424 && !disk_cfg.disable_io_uring 2425 && self.io_uring_is_supported() 2426 { 2427 info!("Using asynchronous fixed VHD disk file (io_uring)"); 2428 2429 #[cfg(not(feature = "io_uring"))] 2430 unreachable!("Checked in if statement above"); 2431 #[cfg(feature = "io_uring")] 2432 { 2433 Box::new( 2434 FixedVhdDiskAsync::new(file) 2435 .map_err(DeviceManagerError::CreateFixedVhdDiskAsync)?, 2436 ) as Box<dyn DiskFile> 2437 } 2438 } else { 2439 info!("Using synchronous fixed VHD disk file"); 2440 Box::new( 2441 FixedVhdDiskSync::new(file) 2442 .map_err(DeviceManagerError::CreateFixedVhdDiskSync)?, 2443 ) as Box<dyn DiskFile> 2444 } 2445 } 2446 ImageType::Raw => { 2447 // Use asynchronous backend relying on io_uring if the 2448 // syscalls are supported. 2449 if cfg!(feature = "io_uring") 2450 && !disk_cfg.disable_io_uring 2451 && self.io_uring_is_supported() 2452 { 2453 info!("Using asynchronous RAW disk file (io_uring)"); 2454 2455 #[cfg(not(feature = "io_uring"))] 2456 unreachable!("Checked in if statement above"); 2457 #[cfg(feature = "io_uring")] 2458 { 2459 Box::new(RawFileDisk::new(file)) as Box<dyn DiskFile> 2460 } 2461 } else if !disk_cfg.disable_aio && self.aio_is_supported() { 2462 info!("Using asynchronous RAW disk file (aio)"); 2463 Box::new(RawFileDiskAio::new(file)) as Box<dyn DiskFile> 2464 } else { 2465 info!("Using synchronous RAW disk file"); 2466 Box::new(RawFileDiskSync::new(file)) as Box<dyn DiskFile> 2467 } 2468 } 2469 ImageType::Qcow2 => { 2470 info!("Using synchronous QCOW disk file"); 2471 Box::new( 2472 QcowDiskSync::new(file, disk_cfg.direct) 2473 .map_err(DeviceManagerError::CreateQcowDiskSync)?, 2474 ) as Box<dyn DiskFile> 2475 } 2476 ImageType::Vhdx => { 2477 info!("Using synchronous VHDX disk file"); 2478 Box::new( 2479 VhdxDiskSync::new(file) 2480 .map_err(DeviceManagerError::CreateFixedVhdxDiskSync)?, 2481 ) as Box<dyn DiskFile> 2482 } 2483 }; 2484 2485 let rate_limit_group = 2486 if let Some(rate_limiter_cfg) = disk_cfg.rate_limiter_config.as_ref() { 2487 // Create an anonymous RateLimiterGroup that is dropped when the Disk 2488 // is dropped. 2489 let bw = rate_limiter_cfg.bandwidth.unwrap_or_default(); 2490 let ops = rate_limiter_cfg.ops.unwrap_or_default(); 2491 let mut rate_limit_group = RateLimiterGroup::new( 2492 disk_cfg.id.as_ref().unwrap(), 2493 bw.size, 2494 bw.one_time_burst.unwrap_or(0), 2495 bw.refill_time, 2496 ops.size, 2497 ops.one_time_burst.unwrap_or(0), 2498 ops.refill_time, 2499 ) 2500 .map_err(DeviceManagerError::RateLimiterGroupCreate)?; 2501 2502 rate_limit_group 2503 .start_thread( 2504 self.exit_evt 2505 .try_clone() 2506 .map_err(DeviceManagerError::EventFd)?, 2507 ) 2508 .unwrap(); 2509 2510 Some(Arc::new(rate_limit_group)) 2511 } else if let Some(rate_limit_group) = disk_cfg.rate_limit_group.as_ref() { 2512 self.rate_limit_groups.get(rate_limit_group).cloned() 2513 } else { 2514 None 2515 }; 2516 2517 let queue_affinity = if let Some(queue_affinity) = disk_cfg.queue_affinity.as_ref() { 2518 queue_affinity 2519 .iter() 2520 .map(|a| (a.queue_index, a.host_cpus.clone())) 2521 .collect() 2522 } else { 2523 BTreeMap::new() 2524 }; 2525 2526 let virtio_block = Arc::new(Mutex::new( 2527 virtio_devices::Block::new( 2528 id.clone(), 2529 image, 2530 disk_cfg 2531 .path 2532 .as_ref() 2533 .ok_or(DeviceManagerError::NoDiskPath)? 2534 .clone(), 2535 disk_cfg.readonly, 2536 self.force_iommu | disk_cfg.iommu, 2537 disk_cfg.num_queues, 2538 disk_cfg.queue_size, 2539 disk_cfg.serial.clone(), 2540 self.seccomp_action.clone(), 2541 rate_limit_group, 2542 self.exit_evt 2543 .try_clone() 2544 .map_err(DeviceManagerError::EventFd)?, 2545 snapshot 2546 .map(|s| s.to_versioned_state()) 2547 .transpose() 2548 .map_err(DeviceManagerError::RestoreGetState)?, 2549 queue_affinity, 2550 ) 2551 .map_err(DeviceManagerError::CreateVirtioBlock)?, 2552 )); 2553 2554 ( 2555 Arc::clone(&virtio_block) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2556 virtio_block as Arc<Mutex<dyn Migratable>>, 2557 ) 2558 }; 2559 2560 // Fill the device tree with a new node. In case of restore, we 2561 // know there is nothing to do, so we can simply override the 2562 // existing entry. 2563 self.device_tree 2564 .lock() 2565 .unwrap() 2566 .insert(id.clone(), device_node!(id, migratable_device)); 2567 2568 Ok(MetaVirtioDevice { 2569 virtio_device, 2570 iommu: disk_cfg.iommu, 2571 id, 2572 pci_segment: disk_cfg.pci_segment, 2573 dma_handler: None, 2574 }) 2575 } 2576 2577 fn make_virtio_block_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2578 let mut devices = Vec::new(); 2579 2580 let mut block_devices = self.config.lock().unwrap().disks.clone(); 2581 if let Some(disk_list_cfg) = &mut block_devices { 2582 for disk_cfg in disk_list_cfg.iter_mut() { 2583 devices.push(self.make_virtio_block_device(disk_cfg)?); 2584 } 2585 } 2586 self.config.lock().unwrap().disks = block_devices; 2587 2588 Ok(devices) 2589 } 2590 2591 fn make_virtio_net_device( 2592 &mut self, 2593 net_cfg: &mut NetConfig, 2594 ) -> DeviceManagerResult<MetaVirtioDevice> { 2595 let id = if let Some(id) = &net_cfg.id { 2596 id.clone() 2597 } else { 2598 let id = self.next_device_name(NET_DEVICE_NAME_PREFIX)?; 2599 net_cfg.id = Some(id.clone()); 2600 id 2601 }; 2602 info!("Creating virtio-net device: {:?}", net_cfg); 2603 2604 let snapshot = snapshot_from_id(self.snapshot.as_ref(), id.as_str()); 2605 2606 let (virtio_device, migratable_device) = if net_cfg.vhost_user { 2607 let socket = net_cfg.vhost_socket.as_ref().unwrap().clone(); 2608 let vu_cfg = VhostUserConfig { 2609 socket, 2610 num_queues: net_cfg.num_queues, 2611 queue_size: net_cfg.queue_size, 2612 }; 2613 let server = match net_cfg.vhost_mode { 2614 VhostMode::Client => false, 2615 VhostMode::Server => true, 2616 }; 2617 let vhost_user_net = Arc::new(Mutex::new( 2618 match virtio_devices::vhost_user::Net::new( 2619 id.clone(), 2620 net_cfg.mac, 2621 net_cfg.mtu, 2622 vu_cfg, 2623 server, 2624 self.seccomp_action.clone(), 2625 self.exit_evt 2626 .try_clone() 2627 .map_err(DeviceManagerError::EventFd)?, 2628 self.force_iommu, 2629 snapshot 2630 .map(|s| s.to_versioned_state()) 2631 .transpose() 2632 .map_err(DeviceManagerError::RestoreGetState)?, 2633 net_cfg.offload_tso, 2634 net_cfg.offload_ufo, 2635 net_cfg.offload_csum, 2636 ) { 2637 Ok(vun_device) => vun_device, 2638 Err(e) => { 2639 return Err(DeviceManagerError::CreateVhostUserNet(e)); 2640 } 2641 }, 2642 )); 2643 2644 ( 2645 Arc::clone(&vhost_user_net) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2646 vhost_user_net as Arc<Mutex<dyn Migratable>>, 2647 ) 2648 } else { 2649 let state = snapshot 2650 .map(|s| s.to_versioned_state()) 2651 .transpose() 2652 .map_err(DeviceManagerError::RestoreGetState)?; 2653 2654 let virtio_net = if let Some(ref tap_if_name) = net_cfg.tap { 2655 Arc::new(Mutex::new( 2656 virtio_devices::Net::new( 2657 id.clone(), 2658 Some(tap_if_name), 2659 Some(net_cfg.ip), 2660 Some(net_cfg.mask), 2661 Some(net_cfg.mac), 2662 &mut net_cfg.host_mac, 2663 net_cfg.mtu, 2664 self.force_iommu | net_cfg.iommu, 2665 net_cfg.num_queues, 2666 net_cfg.queue_size, 2667 self.seccomp_action.clone(), 2668 net_cfg.rate_limiter_config, 2669 self.exit_evt 2670 .try_clone() 2671 .map_err(DeviceManagerError::EventFd)?, 2672 state, 2673 net_cfg.offload_tso, 2674 net_cfg.offload_ufo, 2675 net_cfg.offload_csum, 2676 ) 2677 .map_err(DeviceManagerError::CreateVirtioNet)?, 2678 )) 2679 } else if let Some(fds) = &net_cfg.fds { 2680 let net = virtio_devices::Net::from_tap_fds( 2681 id.clone(), 2682 fds, 2683 Some(net_cfg.mac), 2684 net_cfg.mtu, 2685 self.force_iommu | net_cfg.iommu, 2686 net_cfg.queue_size, 2687 self.seccomp_action.clone(), 2688 net_cfg.rate_limiter_config, 2689 self.exit_evt 2690 .try_clone() 2691 .map_err(DeviceManagerError::EventFd)?, 2692 state, 2693 net_cfg.offload_tso, 2694 net_cfg.offload_ufo, 2695 net_cfg.offload_csum, 2696 ) 2697 .map_err(DeviceManagerError::CreateVirtioNet)?; 2698 2699 // SAFETY: 'fds' are valid because TAP devices are created successfully 2700 unsafe { 2701 self.config.lock().unwrap().add_preserved_fds(fds.clone()); 2702 } 2703 2704 Arc::new(Mutex::new(net)) 2705 } else { 2706 Arc::new(Mutex::new( 2707 virtio_devices::Net::new( 2708 id.clone(), 2709 None, 2710 Some(net_cfg.ip), 2711 Some(net_cfg.mask), 2712 Some(net_cfg.mac), 2713 &mut net_cfg.host_mac, 2714 net_cfg.mtu, 2715 self.force_iommu | net_cfg.iommu, 2716 net_cfg.num_queues, 2717 net_cfg.queue_size, 2718 self.seccomp_action.clone(), 2719 net_cfg.rate_limiter_config, 2720 self.exit_evt 2721 .try_clone() 2722 .map_err(DeviceManagerError::EventFd)?, 2723 state, 2724 net_cfg.offload_tso, 2725 net_cfg.offload_ufo, 2726 net_cfg.offload_csum, 2727 ) 2728 .map_err(DeviceManagerError::CreateVirtioNet)?, 2729 )) 2730 }; 2731 2732 ( 2733 Arc::clone(&virtio_net) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2734 virtio_net as Arc<Mutex<dyn Migratable>>, 2735 ) 2736 }; 2737 2738 // Fill the device tree with a new node. In case of restore, we 2739 // know there is nothing to do, so we can simply override the 2740 // existing entry. 2741 self.device_tree 2742 .lock() 2743 .unwrap() 2744 .insert(id.clone(), device_node!(id, migratable_device)); 2745 2746 Ok(MetaVirtioDevice { 2747 virtio_device, 2748 iommu: net_cfg.iommu, 2749 id, 2750 pci_segment: net_cfg.pci_segment, 2751 dma_handler: None, 2752 }) 2753 } 2754 2755 /// Add virto-net and vhost-user-net devices 2756 fn make_virtio_net_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2757 let mut devices = Vec::new(); 2758 let mut net_devices = self.config.lock().unwrap().net.clone(); 2759 if let Some(net_list_cfg) = &mut net_devices { 2760 for net_cfg in net_list_cfg.iter_mut() { 2761 devices.push(self.make_virtio_net_device(net_cfg)?); 2762 } 2763 } 2764 self.config.lock().unwrap().net = net_devices; 2765 2766 Ok(devices) 2767 } 2768 2769 fn make_virtio_rng_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2770 let mut devices = Vec::new(); 2771 2772 // Add virtio-rng if required 2773 let rng_config = self.config.lock().unwrap().rng.clone(); 2774 if let Some(rng_path) = rng_config.src.to_str() { 2775 info!("Creating virtio-rng device: {:?}", rng_config); 2776 let id = String::from(RNG_DEVICE_NAME); 2777 2778 let virtio_rng_device = Arc::new(Mutex::new( 2779 virtio_devices::Rng::new( 2780 id.clone(), 2781 rng_path, 2782 self.force_iommu | rng_config.iommu, 2783 self.seccomp_action.clone(), 2784 self.exit_evt 2785 .try_clone() 2786 .map_err(DeviceManagerError::EventFd)?, 2787 versioned_state_from_id(self.snapshot.as_ref(), id.as_str()) 2788 .map_err(DeviceManagerError::RestoreGetState)?, 2789 ) 2790 .map_err(DeviceManagerError::CreateVirtioRng)?, 2791 )); 2792 devices.push(MetaVirtioDevice { 2793 virtio_device: Arc::clone(&virtio_rng_device) 2794 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2795 iommu: rng_config.iommu, 2796 id: id.clone(), 2797 pci_segment: 0, 2798 dma_handler: None, 2799 }); 2800 2801 // Fill the device tree with a new node. In case of restore, we 2802 // know there is nothing to do, so we can simply override the 2803 // existing entry. 2804 self.device_tree 2805 .lock() 2806 .unwrap() 2807 .insert(id.clone(), device_node!(id, virtio_rng_device)); 2808 } 2809 2810 Ok(devices) 2811 } 2812 2813 fn make_virtio_fs_device( 2814 &mut self, 2815 fs_cfg: &mut FsConfig, 2816 ) -> DeviceManagerResult<MetaVirtioDevice> { 2817 let id = if let Some(id) = &fs_cfg.id { 2818 id.clone() 2819 } else { 2820 let id = self.next_device_name(FS_DEVICE_NAME_PREFIX)?; 2821 fs_cfg.id = Some(id.clone()); 2822 id 2823 }; 2824 2825 info!("Creating virtio-fs device: {:?}", fs_cfg); 2826 2827 let mut node = device_node!(id); 2828 2829 if let Some(fs_socket) = fs_cfg.socket.to_str() { 2830 let virtio_fs_device = Arc::new(Mutex::new( 2831 virtio_devices::vhost_user::Fs::new( 2832 id.clone(), 2833 fs_socket, 2834 &fs_cfg.tag, 2835 fs_cfg.num_queues, 2836 fs_cfg.queue_size, 2837 None, 2838 self.seccomp_action.clone(), 2839 self.exit_evt 2840 .try_clone() 2841 .map_err(DeviceManagerError::EventFd)?, 2842 self.force_iommu, 2843 versioned_state_from_id(self.snapshot.as_ref(), id.as_str()) 2844 .map_err(DeviceManagerError::RestoreGetState)?, 2845 ) 2846 .map_err(DeviceManagerError::CreateVirtioFs)?, 2847 )); 2848 2849 // Update the device tree with the migratable device. 2850 node.migratable = Some(Arc::clone(&virtio_fs_device) as Arc<Mutex<dyn Migratable>>); 2851 self.device_tree.lock().unwrap().insert(id.clone(), node); 2852 2853 Ok(MetaVirtioDevice { 2854 virtio_device: Arc::clone(&virtio_fs_device) 2855 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2856 iommu: false, 2857 id, 2858 pci_segment: fs_cfg.pci_segment, 2859 dma_handler: None, 2860 }) 2861 } else { 2862 Err(DeviceManagerError::NoVirtioFsSock) 2863 } 2864 } 2865 2866 fn make_virtio_fs_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2867 let mut devices = Vec::new(); 2868 2869 let mut fs_devices = self.config.lock().unwrap().fs.clone(); 2870 if let Some(fs_list_cfg) = &mut fs_devices { 2871 for fs_cfg in fs_list_cfg.iter_mut() { 2872 devices.push(self.make_virtio_fs_device(fs_cfg)?); 2873 } 2874 } 2875 self.config.lock().unwrap().fs = fs_devices; 2876 2877 Ok(devices) 2878 } 2879 2880 fn make_virtio_pmem_device( 2881 &mut self, 2882 pmem_cfg: &mut PmemConfig, 2883 ) -> DeviceManagerResult<MetaVirtioDevice> { 2884 let id = if let Some(id) = &pmem_cfg.id { 2885 id.clone() 2886 } else { 2887 let id = self.next_device_name(PMEM_DEVICE_NAME_PREFIX)?; 2888 pmem_cfg.id = Some(id.clone()); 2889 id 2890 }; 2891 2892 info!("Creating virtio-pmem device: {:?}", pmem_cfg); 2893 2894 let mut node = device_node!(id); 2895 2896 // Look for the id in the device tree. If it can be found, that means 2897 // the device is being restored, otherwise it's created from scratch. 2898 let region_range = if let Some(node) = self.device_tree.lock().unwrap().get(&id) { 2899 info!("Restoring virtio-pmem {} resources", id); 2900 2901 let mut region_range: Option<(u64, u64)> = None; 2902 for resource in node.resources.iter() { 2903 match resource { 2904 Resource::MmioAddressRange { base, size } => { 2905 if region_range.is_some() { 2906 return Err(DeviceManagerError::ResourceAlreadyExists); 2907 } 2908 2909 region_range = Some((*base, *size)); 2910 } 2911 _ => { 2912 error!("Unexpected resource {:?} for {}", resource, id); 2913 } 2914 } 2915 } 2916 2917 if region_range.is_none() { 2918 return Err(DeviceManagerError::MissingVirtioPmemResources); 2919 } 2920 2921 region_range 2922 } else { 2923 None 2924 }; 2925 2926 let (custom_flags, set_len) = if pmem_cfg.file.is_dir() { 2927 if pmem_cfg.size.is_none() { 2928 return Err(DeviceManagerError::PmemWithDirectorySizeMissing); 2929 } 2930 (O_TMPFILE, true) 2931 } else { 2932 (0, false) 2933 }; 2934 2935 let mut file = OpenOptions::new() 2936 .read(true) 2937 .write(!pmem_cfg.discard_writes) 2938 .custom_flags(custom_flags) 2939 .open(&pmem_cfg.file) 2940 .map_err(DeviceManagerError::PmemFileOpen)?; 2941 2942 let size = if let Some(size) = pmem_cfg.size { 2943 if set_len { 2944 file.set_len(size) 2945 .map_err(DeviceManagerError::PmemFileSetLen)?; 2946 } 2947 size 2948 } else { 2949 file.seek(SeekFrom::End(0)) 2950 .map_err(DeviceManagerError::PmemFileSetLen)? 2951 }; 2952 2953 if size % 0x20_0000 != 0 { 2954 return Err(DeviceManagerError::PmemSizeNotAligned); 2955 } 2956 2957 let (region_base, region_size) = if let Some((base, size)) = region_range { 2958 // The memory needs to be 2MiB aligned in order to support 2959 // hugepages. 2960 self.pci_segments[pmem_cfg.pci_segment as usize] 2961 .mem64_allocator 2962 .lock() 2963 .unwrap() 2964 .allocate( 2965 Some(GuestAddress(base)), 2966 size as GuestUsize, 2967 Some(0x0020_0000), 2968 ) 2969 .ok_or(DeviceManagerError::PmemRangeAllocation)?; 2970 2971 (base, size) 2972 } else { 2973 // The memory needs to be 2MiB aligned in order to support 2974 // hugepages. 2975 let base = self.pci_segments[pmem_cfg.pci_segment as usize] 2976 .mem64_allocator 2977 .lock() 2978 .unwrap() 2979 .allocate(None, size as GuestUsize, Some(0x0020_0000)) 2980 .ok_or(DeviceManagerError::PmemRangeAllocation)?; 2981 2982 (base.raw_value(), size) 2983 }; 2984 2985 let cloned_file = file.try_clone().map_err(DeviceManagerError::CloneFile)?; 2986 let mmap_region = MmapRegion::build( 2987 Some(FileOffset::new(cloned_file, 0)), 2988 region_size as usize, 2989 PROT_READ | PROT_WRITE, 2990 MAP_NORESERVE 2991 | if pmem_cfg.discard_writes { 2992 MAP_PRIVATE 2993 } else { 2994 MAP_SHARED 2995 }, 2996 ) 2997 .map_err(DeviceManagerError::NewMmapRegion)?; 2998 let host_addr: u64 = mmap_region.as_ptr() as u64; 2999 3000 let mem_slot = self 3001 .memory_manager 3002 .lock() 3003 .unwrap() 3004 .create_userspace_mapping(region_base, region_size, host_addr, false, false, false) 3005 .map_err(DeviceManagerError::MemoryManager)?; 3006 3007 let mapping = virtio_devices::UserspaceMapping { 3008 host_addr, 3009 mem_slot, 3010 addr: GuestAddress(region_base), 3011 len: region_size, 3012 mergeable: false, 3013 }; 3014 3015 let virtio_pmem_device = Arc::new(Mutex::new( 3016 virtio_devices::Pmem::new( 3017 id.clone(), 3018 file, 3019 GuestAddress(region_base), 3020 mapping, 3021 mmap_region, 3022 self.force_iommu | pmem_cfg.iommu, 3023 self.seccomp_action.clone(), 3024 self.exit_evt 3025 .try_clone() 3026 .map_err(DeviceManagerError::EventFd)?, 3027 versioned_state_from_id(self.snapshot.as_ref(), id.as_str()) 3028 .map_err(DeviceManagerError::RestoreGetState)?, 3029 ) 3030 .map_err(DeviceManagerError::CreateVirtioPmem)?, 3031 )); 3032 3033 // Update the device tree with correct resource information and with 3034 // the migratable device. 3035 node.resources.push(Resource::MmioAddressRange { 3036 base: region_base, 3037 size: region_size, 3038 }); 3039 node.migratable = Some(Arc::clone(&virtio_pmem_device) as Arc<Mutex<dyn Migratable>>); 3040 self.device_tree.lock().unwrap().insert(id.clone(), node); 3041 3042 Ok(MetaVirtioDevice { 3043 virtio_device: Arc::clone(&virtio_pmem_device) 3044 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 3045 iommu: pmem_cfg.iommu, 3046 id, 3047 pci_segment: pmem_cfg.pci_segment, 3048 dma_handler: None, 3049 }) 3050 } 3051 3052 fn make_virtio_pmem_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 3053 let mut devices = Vec::new(); 3054 // Add virtio-pmem if required 3055 let mut pmem_devices = self.config.lock().unwrap().pmem.clone(); 3056 if let Some(pmem_list_cfg) = &mut pmem_devices { 3057 for pmem_cfg in pmem_list_cfg.iter_mut() { 3058 devices.push(self.make_virtio_pmem_device(pmem_cfg)?); 3059 } 3060 } 3061 self.config.lock().unwrap().pmem = pmem_devices; 3062 3063 Ok(devices) 3064 } 3065 3066 fn make_virtio_vsock_device( 3067 &mut self, 3068 vsock_cfg: &mut VsockConfig, 3069 ) -> DeviceManagerResult<MetaVirtioDevice> { 3070 let id = if let Some(id) = &vsock_cfg.id { 3071 id.clone() 3072 } else { 3073 let id = self.next_device_name(VSOCK_DEVICE_NAME_PREFIX)?; 3074 vsock_cfg.id = Some(id.clone()); 3075 id 3076 }; 3077 3078 info!("Creating virtio-vsock device: {:?}", vsock_cfg); 3079 3080 let socket_path = vsock_cfg 3081 .socket 3082 .to_str() 3083 .ok_or(DeviceManagerError::CreateVsockConvertPath)?; 3084 let backend = 3085 virtio_devices::vsock::VsockUnixBackend::new(vsock_cfg.cid, socket_path.to_string()) 3086 .map_err(DeviceManagerError::CreateVsockBackend)?; 3087 3088 let vsock_device = Arc::new(Mutex::new( 3089 virtio_devices::Vsock::new( 3090 id.clone(), 3091 vsock_cfg.cid, 3092 vsock_cfg.socket.clone(), 3093 backend, 3094 self.force_iommu | vsock_cfg.iommu, 3095 self.seccomp_action.clone(), 3096 self.exit_evt 3097 .try_clone() 3098 .map_err(DeviceManagerError::EventFd)?, 3099 versioned_state_from_id(self.snapshot.as_ref(), id.as_str()) 3100 .map_err(DeviceManagerError::RestoreGetState)?, 3101 ) 3102 .map_err(DeviceManagerError::CreateVirtioVsock)?, 3103 )); 3104 3105 // Fill the device tree with a new node. In case of restore, we 3106 // know there is nothing to do, so we can simply override the 3107 // existing entry. 3108 self.device_tree 3109 .lock() 3110 .unwrap() 3111 .insert(id.clone(), device_node!(id, vsock_device)); 3112 3113 Ok(MetaVirtioDevice { 3114 virtio_device: Arc::clone(&vsock_device) 3115 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 3116 iommu: vsock_cfg.iommu, 3117 id, 3118 pci_segment: vsock_cfg.pci_segment, 3119 dma_handler: None, 3120 }) 3121 } 3122 3123 fn make_virtio_vsock_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 3124 let mut devices = Vec::new(); 3125 3126 let mut vsock = self.config.lock().unwrap().vsock.clone(); 3127 if let Some(ref mut vsock_cfg) = &mut vsock { 3128 devices.push(self.make_virtio_vsock_device(vsock_cfg)?); 3129 } 3130 self.config.lock().unwrap().vsock = vsock; 3131 3132 Ok(devices) 3133 } 3134 3135 fn make_virtio_mem_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 3136 let mut devices = Vec::new(); 3137 3138 let mm = self.memory_manager.clone(); 3139 let mut mm = mm.lock().unwrap(); 3140 for (memory_zone_id, memory_zone) in mm.memory_zones_mut().iter_mut() { 3141 if let Some(virtio_mem_zone) = memory_zone.virtio_mem_zone_mut() { 3142 info!("Creating virtio-mem device: id = {}", memory_zone_id); 3143 3144 let node_id = numa_node_id_from_memory_zone_id(&self.numa_nodes, memory_zone_id) 3145 .map(|i| i as u16); 3146 3147 let virtio_mem_device = Arc::new(Mutex::new( 3148 virtio_devices::Mem::new( 3149 memory_zone_id.clone(), 3150 virtio_mem_zone.region(), 3151 self.seccomp_action.clone(), 3152 node_id, 3153 virtio_mem_zone.hotplugged_size(), 3154 virtio_mem_zone.hugepages(), 3155 self.exit_evt 3156 .try_clone() 3157 .map_err(DeviceManagerError::EventFd)?, 3158 virtio_mem_zone.blocks_state().clone(), 3159 versioned_state_from_id(self.snapshot.as_ref(), memory_zone_id.as_str()) 3160 .map_err(DeviceManagerError::RestoreGetState)?, 3161 ) 3162 .map_err(DeviceManagerError::CreateVirtioMem)?, 3163 )); 3164 3165 // Update the virtio-mem zone so that it has a handle onto the 3166 // virtio-mem device, which will be used for triggering a resize 3167 // if needed. 3168 virtio_mem_zone.set_virtio_device(Arc::clone(&virtio_mem_device)); 3169 3170 self.virtio_mem_devices.push(Arc::clone(&virtio_mem_device)); 3171 3172 devices.push(MetaVirtioDevice { 3173 virtio_device: Arc::clone(&virtio_mem_device) 3174 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 3175 iommu: false, 3176 id: memory_zone_id.clone(), 3177 pci_segment: 0, 3178 dma_handler: None, 3179 }); 3180 3181 // Fill the device tree with a new node. In case of restore, we 3182 // know there is nothing to do, so we can simply override the 3183 // existing entry. 3184 self.device_tree.lock().unwrap().insert( 3185 memory_zone_id.clone(), 3186 device_node!(memory_zone_id, virtio_mem_device), 3187 ); 3188 } 3189 } 3190 3191 Ok(devices) 3192 } 3193 3194 fn make_virtio_balloon_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 3195 let mut devices = Vec::new(); 3196 3197 if let Some(balloon_config) = &self.config.lock().unwrap().balloon { 3198 let id = String::from(BALLOON_DEVICE_NAME); 3199 info!("Creating virtio-balloon device: id = {}", id); 3200 3201 let virtio_balloon_device = Arc::new(Mutex::new( 3202 virtio_devices::Balloon::new( 3203 id.clone(), 3204 balloon_config.size, 3205 balloon_config.deflate_on_oom, 3206 balloon_config.free_page_reporting, 3207 self.seccomp_action.clone(), 3208 self.exit_evt 3209 .try_clone() 3210 .map_err(DeviceManagerError::EventFd)?, 3211 versioned_state_from_id(self.snapshot.as_ref(), id.as_str()) 3212 .map_err(DeviceManagerError::RestoreGetState)?, 3213 ) 3214 .map_err(DeviceManagerError::CreateVirtioBalloon)?, 3215 )); 3216 3217 self.balloon = Some(virtio_balloon_device.clone()); 3218 3219 devices.push(MetaVirtioDevice { 3220 virtio_device: Arc::clone(&virtio_balloon_device) 3221 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 3222 iommu: false, 3223 id: id.clone(), 3224 pci_segment: 0, 3225 dma_handler: None, 3226 }); 3227 3228 self.device_tree 3229 .lock() 3230 .unwrap() 3231 .insert(id.clone(), device_node!(id, virtio_balloon_device)); 3232 } 3233 3234 Ok(devices) 3235 } 3236 3237 fn make_virtio_watchdog_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 3238 let mut devices = Vec::new(); 3239 3240 if !self.config.lock().unwrap().watchdog { 3241 return Ok(devices); 3242 } 3243 3244 let id = String::from(WATCHDOG_DEVICE_NAME); 3245 info!("Creating virtio-watchdog device: id = {}", id); 3246 3247 let virtio_watchdog_device = Arc::new(Mutex::new( 3248 virtio_devices::Watchdog::new( 3249 id.clone(), 3250 self.reset_evt.try_clone().unwrap(), 3251 self.seccomp_action.clone(), 3252 self.exit_evt 3253 .try_clone() 3254 .map_err(DeviceManagerError::EventFd)?, 3255 versioned_state_from_id(self.snapshot.as_ref(), id.as_str()) 3256 .map_err(DeviceManagerError::RestoreGetState)?, 3257 ) 3258 .map_err(DeviceManagerError::CreateVirtioWatchdog)?, 3259 )); 3260 devices.push(MetaVirtioDevice { 3261 virtio_device: Arc::clone(&virtio_watchdog_device) 3262 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 3263 iommu: false, 3264 id: id.clone(), 3265 pci_segment: 0, 3266 dma_handler: None, 3267 }); 3268 3269 self.device_tree 3270 .lock() 3271 .unwrap() 3272 .insert(id.clone(), device_node!(id, virtio_watchdog_device)); 3273 3274 Ok(devices) 3275 } 3276 3277 fn make_vdpa_device( 3278 &mut self, 3279 vdpa_cfg: &mut VdpaConfig, 3280 ) -> DeviceManagerResult<MetaVirtioDevice> { 3281 let id = if let Some(id) = &vdpa_cfg.id { 3282 id.clone() 3283 } else { 3284 let id = self.next_device_name(VDPA_DEVICE_NAME_PREFIX)?; 3285 vdpa_cfg.id = Some(id.clone()); 3286 id 3287 }; 3288 3289 info!("Creating vDPA device: {:?}", vdpa_cfg); 3290 3291 let device_path = vdpa_cfg 3292 .path 3293 .to_str() 3294 .ok_or(DeviceManagerError::CreateVdpaConvertPath)?; 3295 3296 let vdpa_device = Arc::new(Mutex::new( 3297 virtio_devices::Vdpa::new( 3298 id.clone(), 3299 device_path, 3300 self.memory_manager.lock().unwrap().guest_memory(), 3301 vdpa_cfg.num_queues as u16, 3302 versioned_state_from_id(self.snapshot.as_ref(), id.as_str()) 3303 .map_err(DeviceManagerError::RestoreGetState)?, 3304 ) 3305 .map_err(DeviceManagerError::CreateVdpa)?, 3306 )); 3307 3308 // Create the DMA handler that is required by the vDPA device 3309 let vdpa_mapping = Arc::new(VdpaDmaMapping::new( 3310 Arc::clone(&vdpa_device), 3311 Arc::new(self.memory_manager.lock().unwrap().guest_memory()), 3312 )); 3313 3314 self.device_tree 3315 .lock() 3316 .unwrap() 3317 .insert(id.clone(), device_node!(id, vdpa_device)); 3318 3319 Ok(MetaVirtioDevice { 3320 virtio_device: vdpa_device as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 3321 iommu: vdpa_cfg.iommu, 3322 id, 3323 pci_segment: vdpa_cfg.pci_segment, 3324 dma_handler: Some(vdpa_mapping), 3325 }) 3326 } 3327 3328 fn make_vdpa_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 3329 let mut devices = Vec::new(); 3330 // Add vdpa if required 3331 let mut vdpa_devices = self.config.lock().unwrap().vdpa.clone(); 3332 if let Some(vdpa_list_cfg) = &mut vdpa_devices { 3333 for vdpa_cfg in vdpa_list_cfg.iter_mut() { 3334 devices.push(self.make_vdpa_device(vdpa_cfg)?); 3335 } 3336 } 3337 self.config.lock().unwrap().vdpa = vdpa_devices; 3338 3339 Ok(devices) 3340 } 3341 3342 fn next_device_name(&mut self, prefix: &str) -> DeviceManagerResult<String> { 3343 let start_id = self.device_id_cnt; 3344 loop { 3345 // Generate the temporary name. 3346 let name = format!("{}{}", prefix, self.device_id_cnt); 3347 // Increment the counter. 3348 self.device_id_cnt += Wrapping(1); 3349 // Check if the name is already in use. 3350 if !self.boot_id_list.contains(&name) 3351 && !self.device_tree.lock().unwrap().contains_key(&name) 3352 { 3353 return Ok(name); 3354 } 3355 3356 if self.device_id_cnt == start_id { 3357 // We went through a full loop and there's nothing else we can 3358 // do. 3359 break; 3360 } 3361 } 3362 Err(DeviceManagerError::NoAvailableDeviceName) 3363 } 3364 3365 fn add_passthrough_device( 3366 &mut self, 3367 device_cfg: &mut DeviceConfig, 3368 ) -> DeviceManagerResult<(PciBdf, String)> { 3369 // If the passthrough device has not been created yet, it is created 3370 // here and stored in the DeviceManager structure for future needs. 3371 if self.passthrough_device.is_none() { 3372 self.passthrough_device = Some( 3373 self.address_manager 3374 .vm 3375 .create_passthrough_device() 3376 .map_err(|e| DeviceManagerError::CreatePassthroughDevice(e.into()))?, 3377 ); 3378 } 3379 3380 self.add_vfio_device(device_cfg) 3381 } 3382 3383 fn create_vfio_container(&self) -> DeviceManagerResult<Arc<VfioContainer>> { 3384 let passthrough_device = self 3385 .passthrough_device 3386 .as_ref() 3387 .ok_or(DeviceManagerError::NoDevicePassthroughSupport)?; 3388 3389 let dup = passthrough_device 3390 .try_clone() 3391 .map_err(DeviceManagerError::VfioCreate)?; 3392 3393 Ok(Arc::new( 3394 VfioContainer::new(Some(Arc::new(dup))).map_err(DeviceManagerError::VfioCreate)?, 3395 )) 3396 } 3397 3398 fn add_vfio_device( 3399 &mut self, 3400 device_cfg: &mut DeviceConfig, 3401 ) -> DeviceManagerResult<(PciBdf, String)> { 3402 let vfio_name = if let Some(id) = &device_cfg.id { 3403 id.clone() 3404 } else { 3405 let id = self.next_device_name(VFIO_DEVICE_NAME_PREFIX)?; 3406 device_cfg.id = Some(id.clone()); 3407 id 3408 }; 3409 3410 let (pci_segment_id, pci_device_bdf, resources) = 3411 self.pci_resources(&vfio_name, device_cfg.pci_segment)?; 3412 3413 let mut needs_dma_mapping = false; 3414 3415 // Here we create a new VFIO container for two reasons. Either this is 3416 // the first VFIO device, meaning we need a new VFIO container, which 3417 // will be shared with other VFIO devices. Or the new VFIO device is 3418 // attached to a vIOMMU, meaning we must create a dedicated VFIO 3419 // container. In the vIOMMU use case, we can't let all devices under 3420 // the same VFIO container since we couldn't map/unmap memory for each 3421 // device. That's simply because the map/unmap operations happen at the 3422 // VFIO container level. 3423 let vfio_container = if device_cfg.iommu { 3424 let vfio_container = self.create_vfio_container()?; 3425 3426 let vfio_mapping = Arc::new(VfioDmaMapping::new( 3427 Arc::clone(&vfio_container), 3428 Arc::new(self.memory_manager.lock().unwrap().guest_memory()), 3429 Arc::clone(&self.mmio_regions), 3430 )); 3431 3432 if let Some(iommu) = &self.iommu_device { 3433 iommu 3434 .lock() 3435 .unwrap() 3436 .add_external_mapping(pci_device_bdf.into(), vfio_mapping); 3437 } else { 3438 return Err(DeviceManagerError::MissingVirtualIommu); 3439 } 3440 3441 vfio_container 3442 } else if let Some(vfio_container) = &self.vfio_container { 3443 Arc::clone(vfio_container) 3444 } else { 3445 let vfio_container = self.create_vfio_container()?; 3446 needs_dma_mapping = true; 3447 self.vfio_container = Some(Arc::clone(&vfio_container)); 3448 3449 vfio_container 3450 }; 3451 3452 let vfio_device = VfioDevice::new(&device_cfg.path, Arc::clone(&vfio_container)) 3453 .map_err(DeviceManagerError::VfioCreate)?; 3454 3455 if needs_dma_mapping { 3456 // Register DMA mapping in IOMMU. 3457 // Do not register virtio-mem regions, as they are handled directly by 3458 // virtio-mem device itself. 3459 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() { 3460 for region in zone.regions() { 3461 vfio_container 3462 .vfio_dma_map( 3463 region.start_addr().raw_value(), 3464 region.len(), 3465 region.as_ptr() as u64, 3466 ) 3467 .map_err(DeviceManagerError::VfioDmaMap)?; 3468 } 3469 } 3470 3471 let vfio_mapping = Arc::new(VfioDmaMapping::new( 3472 Arc::clone(&vfio_container), 3473 Arc::new(self.memory_manager.lock().unwrap().guest_memory()), 3474 Arc::clone(&self.mmio_regions), 3475 )); 3476 3477 for virtio_mem_device in self.virtio_mem_devices.iter() { 3478 virtio_mem_device 3479 .lock() 3480 .unwrap() 3481 .add_dma_mapping_handler( 3482 VirtioMemMappingSource::Container, 3483 vfio_mapping.clone(), 3484 ) 3485 .map_err(DeviceManagerError::AddDmaMappingHandlerVirtioMem)?; 3486 } 3487 } 3488 3489 let legacy_interrupt_group = 3490 if let Some(legacy_interrupt_manager) = &self.legacy_interrupt_manager { 3491 Some( 3492 legacy_interrupt_manager 3493 .create_group(LegacyIrqGroupConfig { 3494 irq: self.pci_segments[pci_segment_id as usize].pci_irq_slots 3495 [pci_device_bdf.device() as usize] 3496 as InterruptIndex, 3497 }) 3498 .map_err(DeviceManagerError::CreateInterruptGroup)?, 3499 ) 3500 } else { 3501 None 3502 }; 3503 3504 let memory_manager = self.memory_manager.clone(); 3505 3506 let vfio_pci_device = VfioPciDevice::new( 3507 vfio_name.clone(), 3508 &self.address_manager.vm, 3509 vfio_device, 3510 vfio_container, 3511 self.msi_interrupt_manager.clone(), 3512 legacy_interrupt_group, 3513 device_cfg.iommu, 3514 pci_device_bdf, 3515 Arc::new(move || memory_manager.lock().unwrap().allocate_memory_slot()), 3516 vm_migration::snapshot_from_id(self.snapshot.as_ref(), vfio_name.as_str()), 3517 device_cfg.x_nv_gpudirect_clique, 3518 ) 3519 .map_err(DeviceManagerError::VfioPciCreate)?; 3520 3521 let vfio_pci_device = Arc::new(Mutex::new(vfio_pci_device)); 3522 3523 let new_resources = self.add_pci_device( 3524 vfio_pci_device.clone(), 3525 vfio_pci_device.clone(), 3526 pci_segment_id, 3527 pci_device_bdf, 3528 resources, 3529 )?; 3530 3531 vfio_pci_device 3532 .lock() 3533 .unwrap() 3534 .map_mmio_regions() 3535 .map_err(DeviceManagerError::VfioMapRegion)?; 3536 3537 for mmio_region in vfio_pci_device.lock().unwrap().mmio_regions() { 3538 self.mmio_regions.lock().unwrap().push(mmio_region); 3539 } 3540 3541 let mut node = device_node!(vfio_name, vfio_pci_device); 3542 3543 // Update the device tree with correct resource information. 3544 node.resources = new_resources; 3545 node.pci_bdf = Some(pci_device_bdf); 3546 node.pci_device_handle = Some(PciDeviceHandle::Vfio(vfio_pci_device)); 3547 3548 self.device_tree 3549 .lock() 3550 .unwrap() 3551 .insert(vfio_name.clone(), node); 3552 3553 Ok((pci_device_bdf, vfio_name)) 3554 } 3555 3556 fn add_pci_device( 3557 &mut self, 3558 bus_device: Arc<Mutex<dyn BusDevice>>, 3559 pci_device: Arc<Mutex<dyn PciDevice>>, 3560 segment_id: u16, 3561 bdf: PciBdf, 3562 resources: Option<Vec<Resource>>, 3563 ) -> DeviceManagerResult<Vec<Resource>> { 3564 let bars = pci_device 3565 .lock() 3566 .unwrap() 3567 .allocate_bars( 3568 &self.address_manager.allocator, 3569 &mut self.pci_segments[segment_id as usize] 3570 .mem32_allocator 3571 .lock() 3572 .unwrap(), 3573 &mut self.pci_segments[segment_id as usize] 3574 .mem64_allocator 3575 .lock() 3576 .unwrap(), 3577 resources, 3578 ) 3579 .map_err(DeviceManagerError::AllocateBars)?; 3580 3581 let mut pci_bus = self.pci_segments[segment_id as usize] 3582 .pci_bus 3583 .lock() 3584 .unwrap(); 3585 3586 pci_bus 3587 .add_device(bdf.device() as u32, pci_device) 3588 .map_err(DeviceManagerError::AddPciDevice)?; 3589 3590 self.bus_devices.push(Arc::clone(&bus_device)); 3591 3592 pci_bus 3593 .register_mapping( 3594 bus_device, 3595 #[cfg(target_arch = "x86_64")] 3596 self.address_manager.io_bus.as_ref(), 3597 self.address_manager.mmio_bus.as_ref(), 3598 bars.clone(), 3599 ) 3600 .map_err(DeviceManagerError::AddPciDevice)?; 3601 3602 let mut new_resources = Vec::new(); 3603 for bar in bars { 3604 new_resources.push(Resource::PciBar { 3605 index: bar.idx(), 3606 base: bar.addr(), 3607 size: bar.size(), 3608 type_: bar.region_type().into(), 3609 prefetchable: bar.prefetchable().into(), 3610 }); 3611 } 3612 3613 Ok(new_resources) 3614 } 3615 3616 fn add_vfio_devices(&mut self) -> DeviceManagerResult<Vec<PciBdf>> { 3617 let mut iommu_attached_device_ids = Vec::new(); 3618 let mut devices = self.config.lock().unwrap().devices.clone(); 3619 3620 if let Some(device_list_cfg) = &mut devices { 3621 for device_cfg in device_list_cfg.iter_mut() { 3622 let (device_id, _) = self.add_passthrough_device(device_cfg)?; 3623 if device_cfg.iommu && self.iommu_device.is_some() { 3624 iommu_attached_device_ids.push(device_id); 3625 } 3626 } 3627 } 3628 3629 // Update the list of devices 3630 self.config.lock().unwrap().devices = devices; 3631 3632 Ok(iommu_attached_device_ids) 3633 } 3634 3635 fn add_vfio_user_device( 3636 &mut self, 3637 device_cfg: &mut UserDeviceConfig, 3638 ) -> DeviceManagerResult<(PciBdf, String)> { 3639 let vfio_user_name = if let Some(id) = &device_cfg.id { 3640 id.clone() 3641 } else { 3642 let id = self.next_device_name(VFIO_USER_DEVICE_NAME_PREFIX)?; 3643 device_cfg.id = Some(id.clone()); 3644 id 3645 }; 3646 3647 let (pci_segment_id, pci_device_bdf, resources) = 3648 self.pci_resources(&vfio_user_name, device_cfg.pci_segment)?; 3649 3650 let legacy_interrupt_group = 3651 if let Some(legacy_interrupt_manager) = &self.legacy_interrupt_manager { 3652 Some( 3653 legacy_interrupt_manager 3654 .create_group(LegacyIrqGroupConfig { 3655 irq: self.pci_segments[pci_segment_id as usize].pci_irq_slots 3656 [pci_device_bdf.device() as usize] 3657 as InterruptIndex, 3658 }) 3659 .map_err(DeviceManagerError::CreateInterruptGroup)?, 3660 ) 3661 } else { 3662 None 3663 }; 3664 3665 let client = Arc::new(Mutex::new( 3666 vfio_user::Client::new(&device_cfg.socket) 3667 .map_err(DeviceManagerError::VfioUserCreateClient)?, 3668 )); 3669 3670 let memory_manager = self.memory_manager.clone(); 3671 3672 let mut vfio_user_pci_device = VfioUserPciDevice::new( 3673 vfio_user_name.clone(), 3674 &self.address_manager.vm, 3675 client.clone(), 3676 self.msi_interrupt_manager.clone(), 3677 legacy_interrupt_group, 3678 pci_device_bdf, 3679 Arc::new(move || memory_manager.lock().unwrap().allocate_memory_slot()), 3680 vm_migration::snapshot_from_id(self.snapshot.as_ref(), vfio_user_name.as_str()), 3681 ) 3682 .map_err(DeviceManagerError::VfioUserCreate)?; 3683 3684 let memory = self.memory_manager.lock().unwrap().guest_memory(); 3685 let vfio_user_mapping = Arc::new(VfioUserDmaMapping::new(client, Arc::new(memory))); 3686 for virtio_mem_device in self.virtio_mem_devices.iter() { 3687 virtio_mem_device 3688 .lock() 3689 .unwrap() 3690 .add_dma_mapping_handler( 3691 VirtioMemMappingSource::Device(pci_device_bdf.into()), 3692 vfio_user_mapping.clone(), 3693 ) 3694 .map_err(DeviceManagerError::AddDmaMappingHandlerVirtioMem)?; 3695 } 3696 3697 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() { 3698 for region in zone.regions() { 3699 vfio_user_pci_device 3700 .dma_map(region) 3701 .map_err(DeviceManagerError::VfioUserDmaMap)?; 3702 } 3703 } 3704 3705 let vfio_user_pci_device = Arc::new(Mutex::new(vfio_user_pci_device)); 3706 3707 let new_resources = self.add_pci_device( 3708 vfio_user_pci_device.clone(), 3709 vfio_user_pci_device.clone(), 3710 pci_segment_id, 3711 pci_device_bdf, 3712 resources, 3713 )?; 3714 3715 // Note it is required to call 'add_pci_device()' in advance to have the list of 3716 // mmio regions provisioned correctly 3717 vfio_user_pci_device 3718 .lock() 3719 .unwrap() 3720 .map_mmio_regions() 3721 .map_err(DeviceManagerError::VfioUserMapRegion)?; 3722 3723 let mut node = device_node!(vfio_user_name, vfio_user_pci_device); 3724 3725 // Update the device tree with correct resource information. 3726 node.resources = new_resources; 3727 node.pci_bdf = Some(pci_device_bdf); 3728 node.pci_device_handle = Some(PciDeviceHandle::VfioUser(vfio_user_pci_device)); 3729 3730 self.device_tree 3731 .lock() 3732 .unwrap() 3733 .insert(vfio_user_name.clone(), node); 3734 3735 Ok((pci_device_bdf, vfio_user_name)) 3736 } 3737 3738 fn add_user_devices(&mut self) -> DeviceManagerResult<Vec<PciBdf>> { 3739 let mut user_devices = self.config.lock().unwrap().user_devices.clone(); 3740 3741 if let Some(device_list_cfg) = &mut user_devices { 3742 for device_cfg in device_list_cfg.iter_mut() { 3743 let (_device_id, _id) = self.add_vfio_user_device(device_cfg)?; 3744 } 3745 } 3746 3747 // Update the list of devices 3748 self.config.lock().unwrap().user_devices = user_devices; 3749 3750 Ok(vec![]) 3751 } 3752 3753 fn add_virtio_pci_device( 3754 &mut self, 3755 virtio_device: Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 3756 iommu_mapping: &Option<Arc<IommuMapping>>, 3757 virtio_device_id: String, 3758 pci_segment_id: u16, 3759 dma_handler: Option<Arc<dyn ExternalDmaMapping>>, 3760 ) -> DeviceManagerResult<PciBdf> { 3761 let id = format!("{VIRTIO_PCI_DEVICE_NAME_PREFIX}-{virtio_device_id}"); 3762 3763 // Add the new virtio-pci node to the device tree. 3764 let mut node = device_node!(id); 3765 node.children = vec![virtio_device_id.clone()]; 3766 3767 let (pci_segment_id, pci_device_bdf, resources) = 3768 self.pci_resources(&id, pci_segment_id)?; 3769 3770 // Update the existing virtio node by setting the parent. 3771 if let Some(node) = self.device_tree.lock().unwrap().get_mut(&virtio_device_id) { 3772 node.parent = Some(id.clone()); 3773 } else { 3774 return Err(DeviceManagerError::MissingNode); 3775 } 3776 3777 // Allows support for one MSI-X vector per queue. It also adds 1 3778 // as we need to take into account the dedicated vector to notify 3779 // about a virtio config change. 3780 let msix_num = (virtio_device.lock().unwrap().queue_max_sizes().len() + 1) as u16; 3781 3782 // Create the AccessPlatform trait from the implementation IommuMapping. 3783 // This will provide address translation for any virtio device sitting 3784 // behind a vIOMMU. 3785 let access_platform: Option<Arc<dyn AccessPlatform>> = if let Some(mapping) = iommu_mapping 3786 { 3787 Some(Arc::new(AccessPlatformMapping::new( 3788 pci_device_bdf.into(), 3789 mapping.clone(), 3790 ))) 3791 } else { 3792 None 3793 }; 3794 3795 let memory = self.memory_manager.lock().unwrap().guest_memory(); 3796 3797 // Map DMA ranges if a DMA handler is available and if the device is 3798 // not attached to a virtual IOMMU. 3799 if let Some(dma_handler) = &dma_handler { 3800 if iommu_mapping.is_some() { 3801 if let Some(iommu) = &self.iommu_device { 3802 iommu 3803 .lock() 3804 .unwrap() 3805 .add_external_mapping(pci_device_bdf.into(), dma_handler.clone()); 3806 } else { 3807 return Err(DeviceManagerError::MissingVirtualIommu); 3808 } 3809 } else { 3810 // Let every virtio-mem device handle the DMA map/unmap through the 3811 // DMA handler provided. 3812 for virtio_mem_device in self.virtio_mem_devices.iter() { 3813 virtio_mem_device 3814 .lock() 3815 .unwrap() 3816 .add_dma_mapping_handler( 3817 VirtioMemMappingSource::Device(pci_device_bdf.into()), 3818 dma_handler.clone(), 3819 ) 3820 .map_err(DeviceManagerError::AddDmaMappingHandlerVirtioMem)?; 3821 } 3822 3823 // Do not register virtio-mem regions, as they are handled directly by 3824 // virtio-mem devices. 3825 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() { 3826 for region in zone.regions() { 3827 let gpa = region.start_addr().0; 3828 let size = region.len(); 3829 dma_handler 3830 .map(gpa, gpa, size) 3831 .map_err(DeviceManagerError::VirtioDmaMap)?; 3832 } 3833 } 3834 } 3835 } 3836 3837 let device_type = virtio_device.lock().unwrap().device_type(); 3838 let virtio_pci_device = Arc::new(Mutex::new( 3839 VirtioPciDevice::new( 3840 id.clone(), 3841 memory, 3842 virtio_device, 3843 msix_num, 3844 access_platform, 3845 &self.msi_interrupt_manager, 3846 pci_device_bdf.into(), 3847 self.activate_evt 3848 .try_clone() 3849 .map_err(DeviceManagerError::EventFd)?, 3850 // All device types *except* virtio block devices should be allocated a 64-bit bar 3851 // The block devices should be given a 32-bit BAR so that they are easily accessible 3852 // to firmware without requiring excessive identity mapping. 3853 // The exception being if not on the default PCI segment. 3854 pci_segment_id > 0 || device_type != VirtioDeviceType::Block as u32, 3855 dma_handler, 3856 self.pending_activations.clone(), 3857 vm_migration::snapshot_from_id(self.snapshot.as_ref(), id.as_str()), 3858 ) 3859 .map_err(DeviceManagerError::VirtioDevice)?, 3860 )); 3861 3862 let new_resources = self.add_pci_device( 3863 virtio_pci_device.clone(), 3864 virtio_pci_device.clone(), 3865 pci_segment_id, 3866 pci_device_bdf, 3867 resources, 3868 )?; 3869 3870 let bar_addr = virtio_pci_device.lock().unwrap().config_bar_addr(); 3871 for (event, addr) in virtio_pci_device.lock().unwrap().ioeventfds(bar_addr) { 3872 let io_addr = IoEventAddress::Mmio(addr); 3873 self.address_manager 3874 .vm 3875 .register_ioevent(event, &io_addr, None) 3876 .map_err(|e| DeviceManagerError::RegisterIoevent(e.into()))?; 3877 } 3878 3879 // Update the device tree with correct resource information. 3880 node.resources = new_resources; 3881 node.migratable = Some(Arc::clone(&virtio_pci_device) as Arc<Mutex<dyn Migratable>>); 3882 node.pci_bdf = Some(pci_device_bdf); 3883 node.pci_device_handle = Some(PciDeviceHandle::Virtio(virtio_pci_device)); 3884 self.device_tree.lock().unwrap().insert(id, node); 3885 3886 Ok(pci_device_bdf) 3887 } 3888 3889 fn add_pvpanic_device( 3890 &mut self, 3891 ) -> DeviceManagerResult<Option<Arc<Mutex<devices::PvPanicDevice>>>> { 3892 let id = String::from(PVPANIC_DEVICE_NAME); 3893 let pci_segment_id = 0x0_u16; 3894 3895 info!("Creating pvpanic device {}", id); 3896 3897 let (pci_segment_id, pci_device_bdf, resources) = 3898 self.pci_resources(&id, pci_segment_id)?; 3899 3900 let snapshot = snapshot_from_id(self.snapshot.as_ref(), id.as_str()); 3901 3902 let pvpanic_device = devices::PvPanicDevice::new(id.clone(), snapshot) 3903 .map_err(DeviceManagerError::PvPanicCreate)?; 3904 3905 let pvpanic_device = Arc::new(Mutex::new(pvpanic_device)); 3906 3907 let new_resources = self.add_pci_device( 3908 pvpanic_device.clone(), 3909 pvpanic_device.clone(), 3910 pci_segment_id, 3911 pci_device_bdf, 3912 resources, 3913 )?; 3914 3915 let mut node = device_node!(id, pvpanic_device); 3916 3917 node.resources = new_resources; 3918 node.pci_bdf = Some(pci_device_bdf); 3919 node.pci_device_handle = None; 3920 3921 self.device_tree.lock().unwrap().insert(id, node); 3922 3923 Ok(Some(pvpanic_device)) 3924 } 3925 3926 fn pci_resources( 3927 &self, 3928 id: &str, 3929 pci_segment_id: u16, 3930 ) -> DeviceManagerResult<(u16, PciBdf, Option<Vec<Resource>>)> { 3931 // Look for the id in the device tree. If it can be found, that means 3932 // the device is being restored, otherwise it's created from scratch. 3933 Ok( 3934 if let Some(node) = self.device_tree.lock().unwrap().get(id) { 3935 info!("Restoring virtio-pci {} resources", id); 3936 let pci_device_bdf: PciBdf = node 3937 .pci_bdf 3938 .ok_or(DeviceManagerError::MissingDeviceNodePciBdf)?; 3939 let pci_segment_id = pci_device_bdf.segment(); 3940 3941 self.pci_segments[pci_segment_id as usize] 3942 .pci_bus 3943 .lock() 3944 .unwrap() 3945 .get_device_id(pci_device_bdf.device() as usize) 3946 .map_err(DeviceManagerError::GetPciDeviceId)?; 3947 3948 (pci_segment_id, pci_device_bdf, Some(node.resources.clone())) 3949 } else { 3950 let pci_device_bdf = 3951 self.pci_segments[pci_segment_id as usize].next_device_bdf()?; 3952 3953 (pci_segment_id, pci_device_bdf, None) 3954 }, 3955 ) 3956 } 3957 3958 #[cfg(target_arch = "x86_64")] 3959 pub fn io_bus(&self) -> &Arc<Bus> { 3960 &self.address_manager.io_bus 3961 } 3962 3963 pub fn mmio_bus(&self) -> &Arc<Bus> { 3964 &self.address_manager.mmio_bus 3965 } 3966 3967 pub fn allocator(&self) -> &Arc<Mutex<SystemAllocator>> { 3968 &self.address_manager.allocator 3969 } 3970 3971 pub fn interrupt_controller(&self) -> Option<Arc<Mutex<dyn InterruptController>>> { 3972 self.interrupt_controller 3973 .as_ref() 3974 .map(|ic| ic.clone() as Arc<Mutex<dyn InterruptController>>) 3975 } 3976 3977 pub(crate) fn pci_segments(&self) -> &Vec<PciSegment> { 3978 &self.pci_segments 3979 } 3980 3981 pub fn console(&self) -> &Arc<Console> { 3982 &self.console 3983 } 3984 3985 #[cfg(target_arch = "aarch64")] 3986 pub fn cmdline_additions(&self) -> &[String] { 3987 self.cmdline_additions.as_slice() 3988 } 3989 3990 pub fn update_memory(&self, new_region: &Arc<GuestRegionMmap>) -> DeviceManagerResult<()> { 3991 for handle in self.virtio_devices.iter() { 3992 handle 3993 .virtio_device 3994 .lock() 3995 .unwrap() 3996 .add_memory_region(new_region) 3997 .map_err(DeviceManagerError::UpdateMemoryForVirtioDevice)?; 3998 3999 if let Some(dma_handler) = &handle.dma_handler { 4000 if !handle.iommu { 4001 let gpa = new_region.start_addr().0; 4002 let size = new_region.len(); 4003 dma_handler 4004 .map(gpa, gpa, size) 4005 .map_err(DeviceManagerError::VirtioDmaMap)?; 4006 } 4007 } 4008 } 4009 4010 // Take care of updating the memory for VFIO PCI devices. 4011 if let Some(vfio_container) = &self.vfio_container { 4012 vfio_container 4013 .vfio_dma_map( 4014 new_region.start_addr().raw_value(), 4015 new_region.len(), 4016 new_region.as_ptr() as u64, 4017 ) 4018 .map_err(DeviceManagerError::UpdateMemoryForVfioPciDevice)?; 4019 } 4020 4021 // Take care of updating the memory for vfio-user devices. 4022 { 4023 let device_tree = self.device_tree.lock().unwrap(); 4024 for pci_device_node in device_tree.pci_devices() { 4025 if let PciDeviceHandle::VfioUser(vfio_user_pci_device) = pci_device_node 4026 .pci_device_handle 4027 .as_ref() 4028 .ok_or(DeviceManagerError::MissingPciDevice)? 4029 { 4030 vfio_user_pci_device 4031 .lock() 4032 .unwrap() 4033 .dma_map(new_region) 4034 .map_err(DeviceManagerError::UpdateMemoryForVfioUserPciDevice)?; 4035 } 4036 } 4037 } 4038 4039 Ok(()) 4040 } 4041 4042 pub fn activate_virtio_devices(&self) -> DeviceManagerResult<()> { 4043 for mut activator in self.pending_activations.lock().unwrap().drain(..) { 4044 activator 4045 .activate() 4046 .map_err(DeviceManagerError::VirtioActivate)?; 4047 } 4048 Ok(()) 4049 } 4050 4051 pub fn notify_hotplug( 4052 &self, 4053 _notification_type: AcpiNotificationFlags, 4054 ) -> DeviceManagerResult<()> { 4055 return self 4056 .ged_notification_device 4057 .as_ref() 4058 .unwrap() 4059 .lock() 4060 .unwrap() 4061 .notify(_notification_type) 4062 .map_err(DeviceManagerError::HotPlugNotification); 4063 } 4064 4065 pub fn add_device( 4066 &mut self, 4067 device_cfg: &mut DeviceConfig, 4068 ) -> DeviceManagerResult<PciDeviceInfo> { 4069 self.validate_identifier(&device_cfg.id)?; 4070 4071 if device_cfg.iommu && !self.is_iommu_segment(device_cfg.pci_segment) { 4072 return Err(DeviceManagerError::InvalidIommuHotplug); 4073 } 4074 4075 let (bdf, device_name) = self.add_passthrough_device(device_cfg)?; 4076 4077 // Update the PCIU bitmap 4078 self.pci_segments[device_cfg.pci_segment as usize].pci_devices_up |= 1 << bdf.device(); 4079 4080 Ok(PciDeviceInfo { 4081 id: device_name, 4082 bdf, 4083 }) 4084 } 4085 4086 pub fn add_user_device( 4087 &mut self, 4088 device_cfg: &mut UserDeviceConfig, 4089 ) -> DeviceManagerResult<PciDeviceInfo> { 4090 self.validate_identifier(&device_cfg.id)?; 4091 4092 let (bdf, device_name) = self.add_vfio_user_device(device_cfg)?; 4093 4094 // Update the PCIU bitmap 4095 self.pci_segments[device_cfg.pci_segment as usize].pci_devices_up |= 1 << bdf.device(); 4096 4097 Ok(PciDeviceInfo { 4098 id: device_name, 4099 bdf, 4100 }) 4101 } 4102 4103 pub fn remove_device(&mut self, id: String) -> DeviceManagerResult<()> { 4104 // The node can be directly a PCI node in case the 'id' refers to a 4105 // VFIO device or a virtio-pci one. 4106 // In case the 'id' refers to a virtio device, we must find the PCI 4107 // node by looking at the parent. 4108 let device_tree = self.device_tree.lock().unwrap(); 4109 let node = device_tree 4110 .get(&id) 4111 .ok_or(DeviceManagerError::UnknownDeviceId(id))?; 4112 4113 let pci_device_node = if node.pci_bdf.is_some() && node.pci_device_handle.is_some() { 4114 node 4115 } else { 4116 let parent = node 4117 .parent 4118 .as_ref() 4119 .ok_or(DeviceManagerError::MissingNode)?; 4120 device_tree 4121 .get(parent) 4122 .ok_or(DeviceManagerError::MissingNode)? 4123 }; 4124 4125 let pci_device_bdf: PciBdf = pci_device_node 4126 .pci_bdf 4127 .ok_or(DeviceManagerError::MissingDeviceNodePciBdf)?; 4128 let pci_segment_id = pci_device_bdf.segment(); 4129 4130 let pci_device_handle = pci_device_node 4131 .pci_device_handle 4132 .as_ref() 4133 .ok_or(DeviceManagerError::MissingPciDevice)?; 4134 #[allow(irrefutable_let_patterns)] 4135 if let PciDeviceHandle::Virtio(virtio_pci_device) = pci_device_handle { 4136 let device_type = VirtioDeviceType::from( 4137 virtio_pci_device 4138 .lock() 4139 .unwrap() 4140 .virtio_device() 4141 .lock() 4142 .unwrap() 4143 .device_type(), 4144 ); 4145 match device_type { 4146 VirtioDeviceType::Net 4147 | VirtioDeviceType::Block 4148 | VirtioDeviceType::Pmem 4149 | VirtioDeviceType::Fs 4150 | VirtioDeviceType::Vsock => {} 4151 _ => return Err(DeviceManagerError::RemovalNotAllowed(device_type)), 4152 } 4153 } 4154 4155 // Update the PCID bitmap 4156 self.pci_segments[pci_segment_id as usize].pci_devices_down |= 1 << pci_device_bdf.device(); 4157 4158 Ok(()) 4159 } 4160 4161 pub fn eject_device(&mut self, pci_segment_id: u16, device_id: u8) -> DeviceManagerResult<()> { 4162 info!( 4163 "Ejecting device_id = {} on segment_id={}", 4164 device_id, pci_segment_id 4165 ); 4166 4167 // Convert the device ID into the corresponding b/d/f. 4168 let pci_device_bdf = PciBdf::new(pci_segment_id, 0, device_id, 0); 4169 4170 // Give the PCI device ID back to the PCI bus. 4171 self.pci_segments[pci_segment_id as usize] 4172 .pci_bus 4173 .lock() 4174 .unwrap() 4175 .put_device_id(device_id as usize) 4176 .map_err(DeviceManagerError::PutPciDeviceId)?; 4177 4178 // Remove the device from the device tree along with its children. 4179 let mut device_tree = self.device_tree.lock().unwrap(); 4180 let pci_device_node = device_tree 4181 .remove_node_by_pci_bdf(pci_device_bdf) 4182 .ok_or(DeviceManagerError::MissingPciDevice)?; 4183 4184 // For VFIO and vfio-user the PCI device id is the id. 4185 // For virtio we overwrite it later as we want the id of the 4186 // underlying device. 4187 let mut id = pci_device_node.id; 4188 let pci_device_handle = pci_device_node 4189 .pci_device_handle 4190 .ok_or(DeviceManagerError::MissingPciDevice)?; 4191 if matches!(pci_device_handle, PciDeviceHandle::Virtio(_)) { 4192 // The virtio-pci device has a single child 4193 if !pci_device_node.children.is_empty() { 4194 assert_eq!(pci_device_node.children.len(), 1); 4195 let child_id = &pci_device_node.children[0]; 4196 id.clone_from(child_id); 4197 } 4198 } 4199 for child in pci_device_node.children.iter() { 4200 device_tree.remove(child); 4201 } 4202 4203 let mut iommu_attached = false; 4204 if let Some((_, iommu_attached_devices)) = &self.iommu_attached_devices { 4205 if iommu_attached_devices.contains(&pci_device_bdf) { 4206 iommu_attached = true; 4207 } 4208 } 4209 4210 let (pci_device, bus_device, virtio_device, remove_dma_handler) = match pci_device_handle { 4211 // No need to remove any virtio-mem mapping here as the container outlives all devices 4212 PciDeviceHandle::Vfio(vfio_pci_device) => { 4213 for mmio_region in vfio_pci_device.lock().unwrap().mmio_regions() { 4214 self.mmio_regions 4215 .lock() 4216 .unwrap() 4217 .retain(|x| x.start != mmio_region.start) 4218 } 4219 4220 ( 4221 Arc::clone(&vfio_pci_device) as Arc<Mutex<dyn PciDevice>>, 4222 Arc::clone(&vfio_pci_device) as Arc<Mutex<dyn BusDevice>>, 4223 None as Option<Arc<Mutex<dyn virtio_devices::VirtioDevice>>>, 4224 false, 4225 ) 4226 } 4227 PciDeviceHandle::Virtio(virtio_pci_device) => { 4228 let dev = virtio_pci_device.lock().unwrap(); 4229 let bar_addr = dev.config_bar_addr(); 4230 for (event, addr) in dev.ioeventfds(bar_addr) { 4231 let io_addr = IoEventAddress::Mmio(addr); 4232 self.address_manager 4233 .vm 4234 .unregister_ioevent(event, &io_addr) 4235 .map_err(|e| DeviceManagerError::UnRegisterIoevent(e.into()))?; 4236 } 4237 4238 if let Some(dma_handler) = dev.dma_handler() { 4239 if !iommu_attached { 4240 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() { 4241 for region in zone.regions() { 4242 let iova = region.start_addr().0; 4243 let size = region.len(); 4244 dma_handler 4245 .unmap(iova, size) 4246 .map_err(DeviceManagerError::VirtioDmaUnmap)?; 4247 } 4248 } 4249 } 4250 } 4251 4252 ( 4253 Arc::clone(&virtio_pci_device) as Arc<Mutex<dyn PciDevice>>, 4254 Arc::clone(&virtio_pci_device) as Arc<Mutex<dyn BusDevice>>, 4255 Some(dev.virtio_device()), 4256 dev.dma_handler().is_some() && !iommu_attached, 4257 ) 4258 } 4259 PciDeviceHandle::VfioUser(vfio_user_pci_device) => { 4260 let mut dev = vfio_user_pci_device.lock().unwrap(); 4261 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() { 4262 for region in zone.regions() { 4263 dev.dma_unmap(region) 4264 .map_err(DeviceManagerError::VfioUserDmaUnmap)?; 4265 } 4266 } 4267 4268 ( 4269 Arc::clone(&vfio_user_pci_device) as Arc<Mutex<dyn PciDevice>>, 4270 Arc::clone(&vfio_user_pci_device) as Arc<Mutex<dyn BusDevice>>, 4271 None as Option<Arc<Mutex<dyn virtio_devices::VirtioDevice>>>, 4272 true, 4273 ) 4274 } 4275 }; 4276 4277 if remove_dma_handler { 4278 for virtio_mem_device in self.virtio_mem_devices.iter() { 4279 virtio_mem_device 4280 .lock() 4281 .unwrap() 4282 .remove_dma_mapping_handler(VirtioMemMappingSource::Device( 4283 pci_device_bdf.into(), 4284 )) 4285 .map_err(DeviceManagerError::RemoveDmaMappingHandlerVirtioMem)?; 4286 } 4287 } 4288 4289 // Free the allocated BARs 4290 pci_device 4291 .lock() 4292 .unwrap() 4293 .free_bars( 4294 &mut self.address_manager.allocator.lock().unwrap(), 4295 &mut self.pci_segments[pci_segment_id as usize] 4296 .mem32_allocator 4297 .lock() 4298 .unwrap(), 4299 &mut self.pci_segments[pci_segment_id as usize] 4300 .mem64_allocator 4301 .lock() 4302 .unwrap(), 4303 ) 4304 .map_err(DeviceManagerError::FreePciBars)?; 4305 4306 // Remove the device from the PCI bus 4307 self.pci_segments[pci_segment_id as usize] 4308 .pci_bus 4309 .lock() 4310 .unwrap() 4311 .remove_by_device(&pci_device) 4312 .map_err(DeviceManagerError::RemoveDeviceFromPciBus)?; 4313 4314 #[cfg(target_arch = "x86_64")] 4315 // Remove the device from the IO bus 4316 self.io_bus() 4317 .remove_by_device(&bus_device) 4318 .map_err(DeviceManagerError::RemoveDeviceFromIoBus)?; 4319 4320 // Remove the device from the MMIO bus 4321 self.mmio_bus() 4322 .remove_by_device(&bus_device) 4323 .map_err(DeviceManagerError::RemoveDeviceFromMmioBus)?; 4324 4325 // Remove the device from the list of BusDevice held by the 4326 // DeviceManager. 4327 self.bus_devices 4328 .retain(|dev| !Arc::ptr_eq(dev, &bus_device)); 4329 4330 // Shutdown and remove the underlying virtio-device if present 4331 if let Some(virtio_device) = virtio_device { 4332 for mapping in virtio_device.lock().unwrap().userspace_mappings() { 4333 self.memory_manager 4334 .lock() 4335 .unwrap() 4336 .remove_userspace_mapping( 4337 mapping.addr.raw_value(), 4338 mapping.len, 4339 mapping.host_addr, 4340 mapping.mergeable, 4341 mapping.mem_slot, 4342 ) 4343 .map_err(DeviceManagerError::MemoryManager)?; 4344 } 4345 4346 virtio_device.lock().unwrap().shutdown(); 4347 4348 self.virtio_devices 4349 .retain(|handler| !Arc::ptr_eq(&handler.virtio_device, &virtio_device)); 4350 } 4351 4352 event!( 4353 "vm", 4354 "device-removed", 4355 "id", 4356 &id, 4357 "bdf", 4358 pci_device_bdf.to_string() 4359 ); 4360 4361 // At this point, the device has been removed from all the list and 4362 // buses where it was stored. At the end of this function, after 4363 // any_device, bus_device and pci_device are released, the actual 4364 // device will be dropped. 4365 Ok(()) 4366 } 4367 4368 fn hotplug_virtio_pci_device( 4369 &mut self, 4370 handle: MetaVirtioDevice, 4371 ) -> DeviceManagerResult<PciDeviceInfo> { 4372 // Add the virtio device to the device manager list. This is important 4373 // as the list is used to notify virtio devices about memory updates 4374 // for instance. 4375 self.virtio_devices.push(handle.clone()); 4376 4377 let mapping: Option<Arc<IommuMapping>> = if handle.iommu { 4378 self.iommu_mapping.clone() 4379 } else { 4380 None 4381 }; 4382 4383 let bdf = self.add_virtio_pci_device( 4384 handle.virtio_device, 4385 &mapping, 4386 handle.id.clone(), 4387 handle.pci_segment, 4388 handle.dma_handler, 4389 )?; 4390 4391 // Update the PCIU bitmap 4392 self.pci_segments[handle.pci_segment as usize].pci_devices_up |= 1 << bdf.device(); 4393 4394 Ok(PciDeviceInfo { id: handle.id, bdf }) 4395 } 4396 4397 fn is_iommu_segment(&self, pci_segment_id: u16) -> bool { 4398 self.config 4399 .lock() 4400 .as_ref() 4401 .unwrap() 4402 .platform 4403 .as_ref() 4404 .map(|pc| { 4405 pc.iommu_segments 4406 .as_ref() 4407 .map(|v| v.contains(&pci_segment_id)) 4408 .unwrap_or_default() 4409 }) 4410 .unwrap_or_default() 4411 } 4412 4413 pub fn add_disk(&mut self, disk_cfg: &mut DiskConfig) -> DeviceManagerResult<PciDeviceInfo> { 4414 self.validate_identifier(&disk_cfg.id)?; 4415 4416 if disk_cfg.iommu && !self.is_iommu_segment(disk_cfg.pci_segment) { 4417 return Err(DeviceManagerError::InvalidIommuHotplug); 4418 } 4419 4420 let device = self.make_virtio_block_device(disk_cfg)?; 4421 self.hotplug_virtio_pci_device(device) 4422 } 4423 4424 pub fn add_fs(&mut self, fs_cfg: &mut FsConfig) -> DeviceManagerResult<PciDeviceInfo> { 4425 self.validate_identifier(&fs_cfg.id)?; 4426 4427 let device = self.make_virtio_fs_device(fs_cfg)?; 4428 self.hotplug_virtio_pci_device(device) 4429 } 4430 4431 pub fn add_pmem(&mut self, pmem_cfg: &mut PmemConfig) -> DeviceManagerResult<PciDeviceInfo> { 4432 self.validate_identifier(&pmem_cfg.id)?; 4433 4434 if pmem_cfg.iommu && !self.is_iommu_segment(pmem_cfg.pci_segment) { 4435 return Err(DeviceManagerError::InvalidIommuHotplug); 4436 } 4437 4438 let device = self.make_virtio_pmem_device(pmem_cfg)?; 4439 self.hotplug_virtio_pci_device(device) 4440 } 4441 4442 pub fn add_net(&mut self, net_cfg: &mut NetConfig) -> DeviceManagerResult<PciDeviceInfo> { 4443 self.validate_identifier(&net_cfg.id)?; 4444 4445 if net_cfg.iommu && !self.is_iommu_segment(net_cfg.pci_segment) { 4446 return Err(DeviceManagerError::InvalidIommuHotplug); 4447 } 4448 4449 let device = self.make_virtio_net_device(net_cfg)?; 4450 self.hotplug_virtio_pci_device(device) 4451 } 4452 4453 pub fn add_vdpa(&mut self, vdpa_cfg: &mut VdpaConfig) -> DeviceManagerResult<PciDeviceInfo> { 4454 self.validate_identifier(&vdpa_cfg.id)?; 4455 4456 if vdpa_cfg.iommu && !self.is_iommu_segment(vdpa_cfg.pci_segment) { 4457 return Err(DeviceManagerError::InvalidIommuHotplug); 4458 } 4459 4460 let device = self.make_vdpa_device(vdpa_cfg)?; 4461 self.hotplug_virtio_pci_device(device) 4462 } 4463 4464 pub fn add_vsock(&mut self, vsock_cfg: &mut VsockConfig) -> DeviceManagerResult<PciDeviceInfo> { 4465 self.validate_identifier(&vsock_cfg.id)?; 4466 4467 if vsock_cfg.iommu && !self.is_iommu_segment(vsock_cfg.pci_segment) { 4468 return Err(DeviceManagerError::InvalidIommuHotplug); 4469 } 4470 4471 let device = self.make_virtio_vsock_device(vsock_cfg)?; 4472 self.hotplug_virtio_pci_device(device) 4473 } 4474 4475 pub fn counters(&self) -> HashMap<String, HashMap<&'static str, Wrapping<u64>>> { 4476 let mut counters = HashMap::new(); 4477 4478 for handle in &self.virtio_devices { 4479 let virtio_device = handle.virtio_device.lock().unwrap(); 4480 if let Some(device_counters) = virtio_device.counters() { 4481 counters.insert(handle.id.clone(), device_counters.clone()); 4482 } 4483 } 4484 4485 counters 4486 } 4487 4488 pub fn resize_balloon(&mut self, size: u64) -> DeviceManagerResult<()> { 4489 if let Some(balloon) = &self.balloon { 4490 return balloon 4491 .lock() 4492 .unwrap() 4493 .resize(size) 4494 .map_err(DeviceManagerError::VirtioBalloonResize); 4495 } 4496 4497 warn!("No balloon setup: Can't resize the balloon"); 4498 Err(DeviceManagerError::MissingVirtioBalloon) 4499 } 4500 4501 pub fn balloon_size(&self) -> u64 { 4502 if let Some(balloon) = &self.balloon { 4503 return balloon.lock().unwrap().get_actual(); 4504 } 4505 4506 0 4507 } 4508 4509 pub fn device_tree(&self) -> Arc<Mutex<DeviceTree>> { 4510 self.device_tree.clone() 4511 } 4512 4513 #[cfg(target_arch = "x86_64")] 4514 pub fn notify_power_button(&self) -> DeviceManagerResult<()> { 4515 self.ged_notification_device 4516 .as_ref() 4517 .unwrap() 4518 .lock() 4519 .unwrap() 4520 .notify(AcpiNotificationFlags::POWER_BUTTON_CHANGED) 4521 .map_err(DeviceManagerError::PowerButtonNotification) 4522 } 4523 4524 #[cfg(target_arch = "aarch64")] 4525 pub fn notify_power_button(&self) -> DeviceManagerResult<()> { 4526 // There are two use cases: 4527 // 1. Users will use direct kernel boot with device tree. 4528 // 2. Users will use ACPI+UEFI boot. 4529 4530 // Trigger a GPIO pin 3 event to satisfy use case 1. 4531 self.gpio_device 4532 .as_ref() 4533 .unwrap() 4534 .lock() 4535 .unwrap() 4536 .trigger_key(3) 4537 .map_err(DeviceManagerError::AArch64PowerButtonNotification)?; 4538 // Trigger a GED power button event to satisfy use case 2. 4539 return self 4540 .ged_notification_device 4541 .as_ref() 4542 .unwrap() 4543 .lock() 4544 .unwrap() 4545 .notify(AcpiNotificationFlags::POWER_BUTTON_CHANGED) 4546 .map_err(DeviceManagerError::PowerButtonNotification); 4547 } 4548 4549 pub fn iommu_attached_devices(&self) -> &Option<(PciBdf, Vec<PciBdf>)> { 4550 &self.iommu_attached_devices 4551 } 4552 4553 fn validate_identifier(&self, id: &Option<String>) -> DeviceManagerResult<()> { 4554 if let Some(id) = id { 4555 if id.starts_with("__") { 4556 return Err(DeviceManagerError::InvalidIdentifier(id.clone())); 4557 } 4558 4559 if self.device_tree.lock().unwrap().contains_key(id) { 4560 return Err(DeviceManagerError::IdentifierNotUnique(id.clone())); 4561 } 4562 } 4563 4564 Ok(()) 4565 } 4566 4567 pub(crate) fn acpi_platform_addresses(&self) -> &AcpiPlatformAddresses { 4568 &self.acpi_platform_addresses 4569 } 4570 } 4571 4572 fn numa_node_id_from_memory_zone_id(numa_nodes: &NumaNodes, memory_zone_id: &str) -> Option<u32> { 4573 for (numa_node_id, numa_node) in numa_nodes.iter() { 4574 if numa_node.memory_zones.contains(&memory_zone_id.to_owned()) { 4575 return Some(*numa_node_id); 4576 } 4577 } 4578 4579 None 4580 } 4581 4582 fn numa_node_id_from_pci_segment_id(numa_nodes: &NumaNodes, pci_segment_id: u16) -> u32 { 4583 for (numa_node_id, numa_node) in numa_nodes.iter() { 4584 if numa_node.pci_segments.contains(&pci_segment_id) { 4585 return *numa_node_id; 4586 } 4587 } 4588 4589 0 4590 } 4591 4592 struct TpmDevice {} 4593 4594 impl Aml for TpmDevice { 4595 fn to_aml_bytes(&self, sink: &mut dyn acpi_tables::AmlSink) { 4596 aml::Device::new( 4597 "TPM2".into(), 4598 vec![ 4599 &aml::Name::new("_HID".into(), &"MSFT0101"), 4600 &aml::Name::new("_STA".into(), &(0xF_usize)), 4601 &aml::Name::new( 4602 "_CRS".into(), 4603 &aml::ResourceTemplate::new(vec![&aml::Memory32Fixed::new( 4604 true, 4605 layout::TPM_START.0 as u32, 4606 layout::TPM_SIZE as u32, 4607 )]), 4608 ), 4609 ], 4610 ) 4611 .to_aml_bytes(sink) 4612 } 4613 } 4614 4615 impl Aml for DeviceManager { 4616 fn to_aml_bytes(&self, sink: &mut dyn acpi_tables::AmlSink) { 4617 #[cfg(target_arch = "aarch64")] 4618 use arch::aarch64::DeviceInfoForFdt; 4619 4620 let mut pci_scan_methods = Vec::new(); 4621 for i in 0..self.pci_segments.len() { 4622 pci_scan_methods.push(aml::MethodCall::new( 4623 format!("\\_SB_.PC{i:02X}.PCNT").as_str().into(), 4624 vec![], 4625 )); 4626 } 4627 let mut pci_scan_inner: Vec<&dyn Aml> = Vec::new(); 4628 for method in &pci_scan_methods { 4629 pci_scan_inner.push(method) 4630 } 4631 4632 // PCI hotplug controller 4633 aml::Device::new( 4634 "_SB_.PHPR".into(), 4635 vec![ 4636 &aml::Name::new("_HID".into(), &aml::EISAName::new("PNP0A06")), 4637 &aml::Name::new("_STA".into(), &0x0bu8), 4638 &aml::Name::new("_UID".into(), &"PCI Hotplug Controller"), 4639 &aml::Mutex::new("BLCK".into(), 0), 4640 &aml::Name::new( 4641 "_CRS".into(), 4642 &aml::ResourceTemplate::new(vec![&aml::AddressSpace::new_memory( 4643 aml::AddressSpaceCacheable::NotCacheable, 4644 true, 4645 self.acpi_address.0, 4646 self.acpi_address.0 + DEVICE_MANAGER_ACPI_SIZE as u64 - 1, 4647 None, 4648 )]), 4649 ), 4650 // OpRegion and Fields map MMIO range into individual field values 4651 &aml::OpRegion::new( 4652 "PCST".into(), 4653 aml::OpRegionSpace::SystemMemory, 4654 &(self.acpi_address.0 as usize), 4655 &DEVICE_MANAGER_ACPI_SIZE, 4656 ), 4657 &aml::Field::new( 4658 "PCST".into(), 4659 aml::FieldAccessType::DWord, 4660 aml::FieldLockRule::NoLock, 4661 aml::FieldUpdateRule::WriteAsZeroes, 4662 vec![ 4663 aml::FieldEntry::Named(*b"PCIU", 32), 4664 aml::FieldEntry::Named(*b"PCID", 32), 4665 aml::FieldEntry::Named(*b"B0EJ", 32), 4666 aml::FieldEntry::Named(*b"PSEG", 32), 4667 ], 4668 ), 4669 &aml::Method::new( 4670 "PCEJ".into(), 4671 2, 4672 true, 4673 vec![ 4674 // Take lock defined above 4675 &aml::Acquire::new("BLCK".into(), 0xffff), 4676 // Choose the current segment 4677 &aml::Store::new(&aml::Path::new("PSEG"), &aml::Arg(1)), 4678 // Write PCI bus number (in first argument) to I/O port via field 4679 &aml::ShiftLeft::new(&aml::Path::new("B0EJ"), &aml::ONE, &aml::Arg(0)), 4680 // Release lock 4681 &aml::Release::new("BLCK".into()), 4682 // Return 0 4683 &aml::Return::new(&aml::ZERO), 4684 ], 4685 ), 4686 &aml::Method::new("PSCN".into(), 0, true, pci_scan_inner), 4687 ], 4688 ) 4689 .to_aml_bytes(sink); 4690 4691 for segment in &self.pci_segments { 4692 segment.to_aml_bytes(sink); 4693 } 4694 4695 let mut mbrd_memory = Vec::new(); 4696 4697 for segment in &self.pci_segments { 4698 mbrd_memory.push(aml::Memory32Fixed::new( 4699 true, 4700 segment.mmio_config_address as u32, 4701 layout::PCI_MMIO_CONFIG_SIZE_PER_SEGMENT as u32, 4702 )) 4703 } 4704 4705 let mut mbrd_memory_refs = Vec::new(); 4706 for mbrd_memory_ref in &mbrd_memory { 4707 mbrd_memory_refs.push(mbrd_memory_ref as &dyn Aml); 4708 } 4709 4710 aml::Device::new( 4711 "_SB_.MBRD".into(), 4712 vec![ 4713 &aml::Name::new("_HID".into(), &aml::EISAName::new("PNP0C02")), 4714 &aml::Name::new("_UID".into(), &aml::ZERO), 4715 &aml::Name::new("_CRS".into(), &aml::ResourceTemplate::new(mbrd_memory_refs)), 4716 ], 4717 ) 4718 .to_aml_bytes(sink); 4719 4720 // Serial device 4721 #[cfg(target_arch = "x86_64")] 4722 let serial_irq = 4; 4723 #[cfg(target_arch = "aarch64")] 4724 let serial_irq = 4725 if self.config.lock().unwrap().serial.clone().mode != ConsoleOutputMode::Off { 4726 self.get_device_info() 4727 .clone() 4728 .get(&(DeviceType::Serial, DeviceType::Serial.to_string())) 4729 .unwrap() 4730 .irq() 4731 } else { 4732 // If serial is turned off, add a fake device with invalid irq. 4733 31 4734 }; 4735 if self.config.lock().unwrap().serial.mode != ConsoleOutputMode::Off { 4736 aml::Device::new( 4737 "_SB_.COM1".into(), 4738 vec![ 4739 &aml::Name::new( 4740 "_HID".into(), 4741 #[cfg(target_arch = "x86_64")] 4742 &aml::EISAName::new("PNP0501"), 4743 #[cfg(target_arch = "aarch64")] 4744 &"ARMH0011", 4745 ), 4746 &aml::Name::new("_UID".into(), &aml::ZERO), 4747 &aml::Name::new("_DDN".into(), &"COM1"), 4748 &aml::Name::new( 4749 "_CRS".into(), 4750 &aml::ResourceTemplate::new(vec![ 4751 &aml::Interrupt::new(true, true, false, false, serial_irq), 4752 #[cfg(target_arch = "x86_64")] 4753 &aml::IO::new(0x3f8, 0x3f8, 0, 0x8), 4754 #[cfg(target_arch = "aarch64")] 4755 &aml::Memory32Fixed::new( 4756 true, 4757 arch::layout::LEGACY_SERIAL_MAPPED_IO_START.raw_value() as u32, 4758 MMIO_LEN as u32, 4759 ), 4760 ]), 4761 ), 4762 ], 4763 ) 4764 .to_aml_bytes(sink); 4765 } 4766 4767 aml::Name::new("_S5_".into(), &aml::Package::new(vec![&5u8])).to_aml_bytes(sink); 4768 4769 aml::Device::new( 4770 "_SB_.PWRB".into(), 4771 vec![ 4772 &aml::Name::new("_HID".into(), &aml::EISAName::new("PNP0C0C")), 4773 &aml::Name::new("_UID".into(), &aml::ZERO), 4774 ], 4775 ) 4776 .to_aml_bytes(sink); 4777 4778 if self.config.lock().unwrap().tpm.is_some() { 4779 // Add tpm device 4780 TpmDevice {}.to_aml_bytes(sink); 4781 } 4782 4783 self.ged_notification_device 4784 .as_ref() 4785 .unwrap() 4786 .lock() 4787 .unwrap() 4788 .to_aml_bytes(sink) 4789 } 4790 } 4791 4792 impl Pausable for DeviceManager { 4793 fn pause(&mut self) -> result::Result<(), MigratableError> { 4794 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4795 if let Some(migratable) = &device_node.migratable { 4796 migratable.lock().unwrap().pause()?; 4797 } 4798 } 4799 // On AArch64, the pause of device manager needs to trigger 4800 // a "pause" of GIC, which will flush the GIC pending tables 4801 // and ITS tables to guest RAM. 4802 #[cfg(target_arch = "aarch64")] 4803 { 4804 self.get_interrupt_controller() 4805 .unwrap() 4806 .lock() 4807 .unwrap() 4808 .pause()?; 4809 }; 4810 4811 Ok(()) 4812 } 4813 4814 fn resume(&mut self) -> result::Result<(), MigratableError> { 4815 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4816 if let Some(migratable) = &device_node.migratable { 4817 migratable.lock().unwrap().resume()?; 4818 } 4819 } 4820 4821 Ok(()) 4822 } 4823 } 4824 4825 impl Snapshottable for DeviceManager { 4826 fn id(&self) -> String { 4827 DEVICE_MANAGER_SNAPSHOT_ID.to_string() 4828 } 4829 4830 fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> { 4831 let mut snapshot = Snapshot::from_data(SnapshotData::new_from_state(&self.state())?); 4832 4833 // We aggregate all devices snapshots. 4834 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4835 if let Some(migratable) = &device_node.migratable { 4836 let mut migratable = migratable.lock().unwrap(); 4837 snapshot.add_snapshot(migratable.id(), migratable.snapshot()?); 4838 } 4839 } 4840 4841 Ok(snapshot) 4842 } 4843 } 4844 4845 impl Transportable for DeviceManager {} 4846 4847 impl Migratable for DeviceManager { 4848 fn start_dirty_log(&mut self) -> std::result::Result<(), MigratableError> { 4849 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4850 if let Some(migratable) = &device_node.migratable { 4851 migratable.lock().unwrap().start_dirty_log()?; 4852 } 4853 } 4854 Ok(()) 4855 } 4856 4857 fn stop_dirty_log(&mut self) -> std::result::Result<(), MigratableError> { 4858 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4859 if let Some(migratable) = &device_node.migratable { 4860 migratable.lock().unwrap().stop_dirty_log()?; 4861 } 4862 } 4863 Ok(()) 4864 } 4865 4866 fn dirty_log(&mut self) -> std::result::Result<MemoryRangeTable, MigratableError> { 4867 let mut tables = Vec::new(); 4868 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4869 if let Some(migratable) = &device_node.migratable { 4870 tables.push(migratable.lock().unwrap().dirty_log()?); 4871 } 4872 } 4873 Ok(MemoryRangeTable::new_from_tables(tables)) 4874 } 4875 4876 fn start_migration(&mut self) -> std::result::Result<(), MigratableError> { 4877 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4878 if let Some(migratable) = &device_node.migratable { 4879 migratable.lock().unwrap().start_migration()?; 4880 } 4881 } 4882 Ok(()) 4883 } 4884 4885 fn complete_migration(&mut self) -> std::result::Result<(), MigratableError> { 4886 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4887 if let Some(migratable) = &device_node.migratable { 4888 migratable.lock().unwrap().complete_migration()?; 4889 } 4890 } 4891 Ok(()) 4892 } 4893 } 4894 4895 const PCIU_FIELD_OFFSET: u64 = 0; 4896 const PCID_FIELD_OFFSET: u64 = 4; 4897 const B0EJ_FIELD_OFFSET: u64 = 8; 4898 const PSEG_FIELD_OFFSET: u64 = 12; 4899 const PCIU_FIELD_SIZE: usize = 4; 4900 const PCID_FIELD_SIZE: usize = 4; 4901 const B0EJ_FIELD_SIZE: usize = 4; 4902 const PSEG_FIELD_SIZE: usize = 4; 4903 4904 impl BusDevice for DeviceManager { 4905 fn read(&mut self, base: u64, offset: u64, data: &mut [u8]) { 4906 match offset { 4907 PCIU_FIELD_OFFSET => { 4908 assert!(data.len() == PCIU_FIELD_SIZE); 4909 data.copy_from_slice( 4910 &self.pci_segments[self.selected_segment] 4911 .pci_devices_up 4912 .to_le_bytes(), 4913 ); 4914 // Clear the PCIU bitmap 4915 self.pci_segments[self.selected_segment].pci_devices_up = 0; 4916 } 4917 PCID_FIELD_OFFSET => { 4918 assert!(data.len() == PCID_FIELD_SIZE); 4919 data.copy_from_slice( 4920 &self.pci_segments[self.selected_segment] 4921 .pci_devices_down 4922 .to_le_bytes(), 4923 ); 4924 // Clear the PCID bitmap 4925 self.pci_segments[self.selected_segment].pci_devices_down = 0; 4926 } 4927 B0EJ_FIELD_OFFSET => { 4928 assert!(data.len() == B0EJ_FIELD_SIZE); 4929 // Always return an empty bitmap since the eject is always 4930 // taken care of right away during a write access. 4931 data.fill(0); 4932 } 4933 PSEG_FIELD_OFFSET => { 4934 assert_eq!(data.len(), PSEG_FIELD_SIZE); 4935 data.copy_from_slice(&(self.selected_segment as u32).to_le_bytes()); 4936 } 4937 _ => error!( 4938 "Accessing unknown location at base 0x{:x}, offset 0x{:x}", 4939 base, offset 4940 ), 4941 } 4942 4943 debug!( 4944 "PCI_HP_REG_R: base 0x{:x}, offset 0x{:x}, data {:?}", 4945 base, offset, data 4946 ) 4947 } 4948 4949 fn write(&mut self, base: u64, offset: u64, data: &[u8]) -> Option<Arc<std::sync::Barrier>> { 4950 match offset { 4951 B0EJ_FIELD_OFFSET => { 4952 assert!(data.len() == B0EJ_FIELD_SIZE); 4953 let mut data_array: [u8; 4] = [0, 0, 0, 0]; 4954 data_array.copy_from_slice(data); 4955 let mut slot_bitmap = u32::from_le_bytes(data_array); 4956 4957 while slot_bitmap > 0 { 4958 let slot_id = slot_bitmap.trailing_zeros(); 4959 if let Err(e) = self.eject_device(self.selected_segment as u16, slot_id as u8) { 4960 error!("Failed ejecting device {}: {:?}", slot_id, e); 4961 } 4962 slot_bitmap &= !(1 << slot_id); 4963 } 4964 } 4965 PSEG_FIELD_OFFSET => { 4966 assert_eq!(data.len(), PSEG_FIELD_SIZE); 4967 let mut data_array: [u8; 4] = [0, 0, 0, 0]; 4968 data_array.copy_from_slice(data); 4969 let selected_segment = u32::from_le_bytes(data_array) as usize; 4970 if selected_segment >= self.pci_segments.len() { 4971 error!( 4972 "Segment selection out of range: {} >= {}", 4973 selected_segment, 4974 self.pci_segments.len() 4975 ); 4976 return None; 4977 } 4978 self.selected_segment = selected_segment; 4979 } 4980 _ => error!( 4981 "Accessing unknown location at base 0x{:x}, offset 0x{:x}", 4982 base, offset 4983 ), 4984 } 4985 4986 debug!( 4987 "PCI_HP_REG_W: base 0x{:x}, offset 0x{:x}, data {:?}", 4988 base, offset, data 4989 ); 4990 4991 None 4992 } 4993 } 4994 4995 impl Drop for DeviceManager { 4996 fn drop(&mut self) { 4997 // Wake up the DeviceManager threads (mainly virtio device workers), 4998 // to avoid deadlock on waiting for paused/parked worker threads. 4999 if let Err(e) = self.resume() { 5000 error!("Error resuming DeviceManager: {:?}", e); 5001 } 5002 5003 for handle in self.virtio_devices.drain(..) { 5004 handle.virtio_device.lock().unwrap().shutdown(); 5005 } 5006 5007 if let Some(termios) = *self.original_termios_opt.lock().unwrap() { 5008 // SAFETY: FFI call 5009 let _ = unsafe { tcsetattr(stdout().lock().as_raw_fd(), TCSANOW, &termios) }; 5010 } 5011 } 5012 } 5013