1 // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 // 3 // Portions Copyright 2017 The Chromium OS Authors. All rights reserved. 4 // Use of this source code is governed by a BSD-style license that can be 5 // found in the LICENSE-BSD-3-Clause file. 6 // 7 // Copyright © 2019 Intel Corporation 8 // 9 // SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause 10 // 11 12 use crate::config::{ 13 ConsoleOutputMode, DeviceConfig, DiskConfig, FsConfig, NetConfig, PmemConfig, UserDeviceConfig, 14 VdpaConfig, VhostMode, VmConfig, VsockConfig, 15 }; 16 use crate::cpu::{CpuManager, CPU_MANAGER_ACPI_SIZE}; 17 use crate::device_tree::{DeviceNode, DeviceTree}; 18 use crate::interrupt::LegacyUserspaceInterruptManager; 19 use crate::interrupt::MsiInterruptManager; 20 use crate::memory_manager::{Error as MemoryManagerError, MemoryManager, MEMORY_MANAGER_ACPI_SIZE}; 21 use crate::pci_segment::PciSegment; 22 use crate::seccomp_filters::{get_seccomp_filter, Thread}; 23 use crate::serial_manager::{Error as SerialManagerError, SerialManager}; 24 use crate::sigwinch_listener::start_sigwinch_listener; 25 use crate::GuestRegionMmap; 26 use crate::PciDeviceInfo; 27 use crate::{device_node, DEVICE_MANAGER_SNAPSHOT_ID}; 28 use acpi_tables::sdt::GenericAddress; 29 use acpi_tables::{aml, Aml}; 30 use anyhow::anyhow; 31 use arch::layout; 32 #[cfg(target_arch = "x86_64")] 33 use arch::layout::{APIC_START, IOAPIC_SIZE, IOAPIC_START}; 34 use arch::NumaNodes; 35 #[cfg(target_arch = "aarch64")] 36 use arch::{DeviceType, MmioDeviceInfo}; 37 use block::{ 38 async_io::DiskFile, block_aio_is_supported, block_io_uring_is_supported, detect_image_type, 39 fixed_vhd_sync::FixedVhdDiskSync, qcow, qcow_sync::QcowDiskSync, raw_async_aio::RawFileDiskAio, 40 raw_sync::RawFileDiskSync, vhdx, vhdx_sync::VhdxDiskSync, ImageType, 41 }; 42 #[cfg(feature = "io_uring")] 43 use block::{fixed_vhd_async::FixedVhdDiskAsync, raw_async::RawFileDisk}; 44 #[cfg(target_arch = "x86_64")] 45 use devices::debug_console::DebugConsole; 46 #[cfg(target_arch = "aarch64")] 47 use devices::gic; 48 #[cfg(target_arch = "x86_64")] 49 use devices::ioapic; 50 #[cfg(target_arch = "aarch64")] 51 use devices::legacy::Pl011; 52 use devices::{ 53 interrupt_controller, interrupt_controller::InterruptController, AcpiNotificationFlags, 54 }; 55 use hypervisor::{HypervisorType, IoEventAddress}; 56 use libc::{ 57 cfmakeraw, isatty, tcgetattr, tcsetattr, termios, MAP_NORESERVE, MAP_PRIVATE, MAP_SHARED, 58 O_TMPFILE, PROT_READ, PROT_WRITE, TCSANOW, 59 }; 60 use pci::{ 61 DeviceRelocation, PciBarRegionType, PciBdf, PciDevice, VfioPciDevice, VfioUserDmaMapping, 62 VfioUserPciDevice, VfioUserPciDeviceError, 63 }; 64 use rate_limiter::group::RateLimiterGroup; 65 use seccompiler::SeccompAction; 66 use serde::{Deserialize, Serialize}; 67 use std::collections::{BTreeSet, HashMap}; 68 use std::fs::{read_link, File, OpenOptions}; 69 use std::io::{self, stdout, Seek, SeekFrom}; 70 use std::mem::zeroed; 71 use std::num::Wrapping; 72 use std::os::unix::fs::OpenOptionsExt; 73 use std::os::unix::io::{AsRawFd, FromRawFd, RawFd}; 74 use std::path::PathBuf; 75 use std::result; 76 use std::sync::{Arc, Mutex}; 77 use std::time::Instant; 78 use tracer::trace_scoped; 79 use vfio_ioctls::{VfioContainer, VfioDevice, VfioDeviceFd}; 80 use virtio_devices::transport::VirtioTransport; 81 use virtio_devices::transport::{VirtioPciDevice, VirtioPciDeviceActivator}; 82 use virtio_devices::vhost_user::VhostUserConfig; 83 use virtio_devices::{ 84 AccessPlatformMapping, ActivateError, VdpaDmaMapping, VirtioMemMappingSource, 85 }; 86 use virtio_devices::{Endpoint, IommuMapping}; 87 use vm_allocator::{AddressAllocator, SystemAllocator}; 88 use vm_device::dma_mapping::vfio::VfioDmaMapping; 89 use vm_device::dma_mapping::ExternalDmaMapping; 90 use vm_device::interrupt::{ 91 InterruptIndex, InterruptManager, LegacyIrqGroupConfig, MsiIrqGroupConfig, 92 }; 93 use vm_device::{Bus, BusDevice, Resource}; 94 use vm_memory::guest_memory::FileOffset; 95 use vm_memory::GuestMemoryRegion; 96 use vm_memory::{Address, GuestAddress, GuestUsize, MmapRegion}; 97 #[cfg(target_arch = "x86_64")] 98 use vm_memory::{GuestAddressSpace, GuestMemory}; 99 use vm_migration::{ 100 protocol::MemoryRangeTable, snapshot_from_id, versioned_state_from_id, Migratable, 101 MigratableError, Pausable, Snapshot, SnapshotData, Snapshottable, Transportable, 102 }; 103 use vm_virtio::AccessPlatform; 104 use vm_virtio::VirtioDeviceType; 105 use vmm_sys_util::eventfd::EventFd; 106 #[cfg(target_arch = "x86_64")] 107 use {devices::debug_console, devices::legacy::Serial}; 108 109 #[cfg(target_arch = "aarch64")] 110 const MMIO_LEN: u64 = 0x1000; 111 112 // Singleton devices / devices the user cannot name 113 #[cfg(target_arch = "x86_64")] 114 const IOAPIC_DEVICE_NAME: &str = "__ioapic"; 115 const SERIAL_DEVICE_NAME: &str = "__serial"; 116 #[cfg(target_arch = "x86_64")] 117 const DEBUGCON_DEVICE_NAME: &str = "__debug_console"; 118 #[cfg(target_arch = "aarch64")] 119 const GPIO_DEVICE_NAME: &str = "__gpio"; 120 const RNG_DEVICE_NAME: &str = "__rng"; 121 const IOMMU_DEVICE_NAME: &str = "__iommu"; 122 const BALLOON_DEVICE_NAME: &str = "__balloon"; 123 const CONSOLE_DEVICE_NAME: &str = "__console"; 124 const PVPANIC_DEVICE_NAME: &str = "__pvpanic"; 125 126 // Devices that the user may name and for which we generate 127 // identifiers if the user doesn't give one 128 const DISK_DEVICE_NAME_PREFIX: &str = "_disk"; 129 const FS_DEVICE_NAME_PREFIX: &str = "_fs"; 130 const NET_DEVICE_NAME_PREFIX: &str = "_net"; 131 const PMEM_DEVICE_NAME_PREFIX: &str = "_pmem"; 132 const VDPA_DEVICE_NAME_PREFIX: &str = "_vdpa"; 133 const VSOCK_DEVICE_NAME_PREFIX: &str = "_vsock"; 134 const WATCHDOG_DEVICE_NAME: &str = "__watchdog"; 135 const VFIO_DEVICE_NAME_PREFIX: &str = "_vfio"; 136 const VFIO_USER_DEVICE_NAME_PREFIX: &str = "_vfio_user"; 137 const VIRTIO_PCI_DEVICE_NAME_PREFIX: &str = "_virtio-pci"; 138 139 /// Errors associated with device manager 140 #[derive(Debug)] 141 pub enum DeviceManagerError { 142 /// Cannot create EventFd. 143 EventFd(io::Error), 144 145 /// Cannot open disk path 146 Disk(io::Error), 147 148 /// Cannot create vhost-user-net device 149 CreateVhostUserNet(virtio_devices::vhost_user::Error), 150 151 /// Cannot create virtio-blk device 152 CreateVirtioBlock(io::Error), 153 154 /// Cannot create virtio-net device 155 CreateVirtioNet(virtio_devices::net::Error), 156 157 /// Cannot create virtio-console device 158 CreateVirtioConsole(io::Error), 159 160 /// Cannot create virtio-rng device 161 CreateVirtioRng(io::Error), 162 163 /// Cannot create virtio-fs device 164 CreateVirtioFs(virtio_devices::vhost_user::Error), 165 166 /// Virtio-fs device was created without a socket. 167 NoVirtioFsSock, 168 169 /// Cannot create vhost-user-blk device 170 CreateVhostUserBlk(virtio_devices::vhost_user::Error), 171 172 /// Cannot create virtio-pmem device 173 CreateVirtioPmem(io::Error), 174 175 /// Cannot create vDPA device 176 CreateVdpa(virtio_devices::vdpa::Error), 177 178 /// Cannot create virtio-vsock device 179 CreateVirtioVsock(io::Error), 180 181 /// Cannot create tpm device 182 CreateTpmDevice(anyhow::Error), 183 184 /// Failed to convert Path to &str for the vDPA device. 185 CreateVdpaConvertPath, 186 187 /// Failed to convert Path to &str for the virtio-vsock device. 188 CreateVsockConvertPath, 189 190 /// Cannot create virtio-vsock backend 191 CreateVsockBackend(virtio_devices::vsock::VsockUnixError), 192 193 /// Cannot create virtio-iommu device 194 CreateVirtioIommu(io::Error), 195 196 /// Cannot create virtio-balloon device 197 CreateVirtioBalloon(io::Error), 198 199 /// Cannot create virtio-watchdog device 200 CreateVirtioWatchdog(io::Error), 201 202 /// Failed to parse disk image format 203 DetectImageType(io::Error), 204 205 /// Cannot open qcow disk path 206 QcowDeviceCreate(qcow::Error), 207 208 /// Cannot create serial manager 209 CreateSerialManager(SerialManagerError), 210 211 /// Cannot spawn the serial manager thread 212 SpawnSerialManager(SerialManagerError), 213 214 /// Cannot open tap interface 215 OpenTap(net_util::TapError), 216 217 /// Cannot allocate IRQ. 218 AllocateIrq, 219 220 /// Cannot configure the IRQ. 221 Irq(vmm_sys_util::errno::Error), 222 223 /// Cannot allocate PCI BARs 224 AllocateBars(pci::PciDeviceError), 225 226 /// Could not free the BARs associated with a PCI device. 227 FreePciBars(pci::PciDeviceError), 228 229 /// Cannot register ioevent. 230 RegisterIoevent(anyhow::Error), 231 232 /// Cannot unregister ioevent. 233 UnRegisterIoevent(anyhow::Error), 234 235 /// Cannot create virtio device 236 VirtioDevice(virtio_devices::transport::VirtioPciDeviceError), 237 238 /// Cannot add PCI device 239 AddPciDevice(pci::PciRootError), 240 241 /// Cannot open persistent memory file 242 PmemFileOpen(io::Error), 243 244 /// Cannot set persistent memory file size 245 PmemFileSetLen(io::Error), 246 247 /// Cannot find a memory range for persistent memory 248 PmemRangeAllocation, 249 250 /// Cannot find a memory range for virtio-fs 251 FsRangeAllocation, 252 253 /// Error creating serial output file 254 SerialOutputFileOpen(io::Error), 255 256 #[cfg(target_arch = "x86_64")] 257 /// Error creating debug-console output file 258 DebugconOutputFileOpen(io::Error), 259 260 /// Error creating console output file 261 ConsoleOutputFileOpen(io::Error), 262 263 /// Error creating serial pty 264 SerialPtyOpen(io::Error), 265 266 /// Error creating console pty 267 ConsolePtyOpen(io::Error), 268 269 /// Error creating console pty 270 DebugconPtyOpen(io::Error), 271 272 /// Error setting pty raw mode 273 SetPtyRaw(vmm_sys_util::errno::Error), 274 275 /// Error getting pty peer 276 GetPtyPeer(vmm_sys_util::errno::Error), 277 278 /// Cannot create a VFIO device 279 VfioCreate(vfio_ioctls::VfioError), 280 281 /// Cannot create a VFIO PCI device 282 VfioPciCreate(pci::VfioPciError), 283 284 /// Failed to map VFIO MMIO region. 285 VfioMapRegion(pci::VfioPciError), 286 287 /// Failed to DMA map VFIO device. 288 VfioDmaMap(vfio_ioctls::VfioError), 289 290 /// Failed to DMA unmap VFIO device. 291 VfioDmaUnmap(pci::VfioPciError), 292 293 /// Failed to create the passthrough device. 294 CreatePassthroughDevice(anyhow::Error), 295 296 /// Failed to memory map. 297 Mmap(io::Error), 298 299 /// Cannot add legacy device to Bus. 300 BusError(vm_device::BusError), 301 302 /// Failed to allocate IO port 303 AllocateIoPort, 304 305 /// Failed to allocate MMIO address 306 AllocateMmioAddress, 307 308 /// Failed to make hotplug notification 309 HotPlugNotification(io::Error), 310 311 /// Error from a memory manager operation 312 MemoryManager(MemoryManagerError), 313 314 /// Failed to create new interrupt source group. 315 CreateInterruptGroup(io::Error), 316 317 /// Failed to update interrupt source group. 318 UpdateInterruptGroup(io::Error), 319 320 /// Failed to create interrupt controller. 321 CreateInterruptController(interrupt_controller::Error), 322 323 /// Failed to create a new MmapRegion instance. 324 NewMmapRegion(vm_memory::mmap::MmapRegionError), 325 326 /// Failed to clone a File. 327 CloneFile(io::Error), 328 329 /// Failed to create socket file 330 CreateSocketFile(io::Error), 331 332 /// Failed to spawn the network backend 333 SpawnNetBackend(io::Error), 334 335 /// Failed to spawn the block backend 336 SpawnBlockBackend(io::Error), 337 338 /// Missing PCI bus. 339 NoPciBus, 340 341 /// Could not find an available device name. 342 NoAvailableDeviceName, 343 344 /// Missing PCI device. 345 MissingPciDevice, 346 347 /// Failed to remove a PCI device from the PCI bus. 348 RemoveDeviceFromPciBus(pci::PciRootError), 349 350 /// Failed to remove a bus device from the IO bus. 351 RemoveDeviceFromIoBus(vm_device::BusError), 352 353 /// Failed to remove a bus device from the MMIO bus. 354 RemoveDeviceFromMmioBus(vm_device::BusError), 355 356 /// Failed to find the device corresponding to a specific PCI b/d/f. 357 UnknownPciBdf(u32), 358 359 /// Not allowed to remove this type of device from the VM. 360 RemovalNotAllowed(vm_virtio::VirtioDeviceType), 361 362 /// Failed to find device corresponding to the given identifier. 363 UnknownDeviceId(String), 364 365 /// Failed to find an available PCI device ID. 366 NextPciDeviceId(pci::PciRootError), 367 368 /// Could not reserve the PCI device ID. 369 GetPciDeviceId(pci::PciRootError), 370 371 /// Could not give the PCI device ID back. 372 PutPciDeviceId(pci::PciRootError), 373 374 /// No disk path was specified when one was expected 375 NoDiskPath, 376 377 /// Failed to update guest memory for virtio device. 378 UpdateMemoryForVirtioDevice(virtio_devices::Error), 379 380 /// Cannot create virtio-mem device 381 CreateVirtioMem(io::Error), 382 383 /// Cannot find a memory range for virtio-mem memory 384 VirtioMemRangeAllocation, 385 386 /// Failed to update guest memory for VFIO PCI device. 387 UpdateMemoryForVfioPciDevice(vfio_ioctls::VfioError), 388 389 /// Trying to use a directory for pmem but no size specified 390 PmemWithDirectorySizeMissing, 391 392 /// Trying to use a size that is not multiple of 2MiB 393 PmemSizeNotAligned, 394 395 /// Could not find the node in the device tree. 396 MissingNode, 397 398 /// Resource was already found. 399 ResourceAlreadyExists, 400 401 /// Expected resources for virtio-pmem could not be found. 402 MissingVirtioPmemResources, 403 404 /// Missing PCI b/d/f from the DeviceNode. 405 MissingDeviceNodePciBdf, 406 407 /// No support for device passthrough 408 NoDevicePassthroughSupport, 409 410 /// No socket option support for console device 411 NoSocketOptionSupportForConsoleDevice, 412 413 /// Failed to resize virtio-balloon 414 VirtioBalloonResize(virtio_devices::balloon::Error), 415 416 /// Missing virtio-balloon, can't proceed as expected. 417 MissingVirtioBalloon, 418 419 /// Missing virtual IOMMU device 420 MissingVirtualIommu, 421 422 /// Failed to do power button notification 423 PowerButtonNotification(io::Error), 424 425 /// Failed to do AArch64 GPIO power button notification 426 #[cfg(target_arch = "aarch64")] 427 AArch64PowerButtonNotification(devices::legacy::GpioDeviceError), 428 429 /// Failed to set O_DIRECT flag to file descriptor 430 SetDirectIo, 431 432 /// Failed to create FixedVhdDiskAsync 433 CreateFixedVhdDiskAsync(io::Error), 434 435 /// Failed to create FixedVhdDiskSync 436 CreateFixedVhdDiskSync(io::Error), 437 438 /// Failed to create QcowDiskSync 439 CreateQcowDiskSync(qcow::Error), 440 441 /// Failed to create FixedVhdxDiskSync 442 CreateFixedVhdxDiskSync(vhdx::VhdxError), 443 444 /// Failed to add DMA mapping handler to virtio-mem device. 445 AddDmaMappingHandlerVirtioMem(virtio_devices::mem::Error), 446 447 /// Failed to remove DMA mapping handler from virtio-mem device. 448 RemoveDmaMappingHandlerVirtioMem(virtio_devices::mem::Error), 449 450 /// Failed to create vfio-user client 451 VfioUserCreateClient(vfio_user::Error), 452 453 /// Failed to create VFIO user device 454 VfioUserCreate(VfioUserPciDeviceError), 455 456 /// Failed to map region from VFIO user device into guest 457 VfioUserMapRegion(VfioUserPciDeviceError), 458 459 /// Failed to DMA map VFIO user device. 460 VfioUserDmaMap(VfioUserPciDeviceError), 461 462 /// Failed to DMA unmap VFIO user device. 463 VfioUserDmaUnmap(VfioUserPciDeviceError), 464 465 /// Failed to update memory mappings for VFIO user device 466 UpdateMemoryForVfioUserPciDevice(VfioUserPciDeviceError), 467 468 /// Cannot duplicate file descriptor 469 DupFd(vmm_sys_util::errno::Error), 470 471 /// Failed to DMA map virtio device. 472 VirtioDmaMap(std::io::Error), 473 474 /// Failed to DMA unmap virtio device. 475 VirtioDmaUnmap(std::io::Error), 476 477 /// Cannot hotplug device behind vIOMMU 478 InvalidIommuHotplug, 479 480 /// Invalid identifier as it is not unique. 481 IdentifierNotUnique(String), 482 483 /// Invalid identifier 484 InvalidIdentifier(String), 485 486 /// Error activating virtio device 487 VirtioActivate(ActivateError), 488 489 /// Failed retrieving device state from snapshot 490 RestoreGetState(MigratableError), 491 492 /// Cannot create a PvPanic device 493 PvPanicCreate(devices::pvpanic::PvPanicError), 494 495 /// Cannot create a RateLimiterGroup 496 RateLimiterGroupCreate(rate_limiter::group::Error), 497 } 498 499 pub type DeviceManagerResult<T> = result::Result<T, DeviceManagerError>; 500 501 const DEVICE_MANAGER_ACPI_SIZE: usize = 0x10; 502 503 const TIOCSPTLCK: libc::c_int = 0x4004_5431; 504 const TIOCGTPEER: libc::c_int = 0x5441; 505 506 pub fn create_pty() -> io::Result<(File, File, PathBuf)> { 507 // Try to use /dev/pts/ptmx first then fall back to /dev/ptmx 508 // This is done to try and use the devpts filesystem that 509 // could be available for use in the process's namespace first. 510 // Ideally these are all the same file though but different 511 // kernels could have things setup differently. 512 // See https://www.kernel.org/doc/Documentation/filesystems/devpts.txt 513 // for further details. 514 515 let custom_flags = libc::O_NONBLOCK; 516 let main = match OpenOptions::new() 517 .read(true) 518 .write(true) 519 .custom_flags(custom_flags) 520 .open("/dev/pts/ptmx") 521 { 522 Ok(f) => f, 523 _ => OpenOptions::new() 524 .read(true) 525 .write(true) 526 .custom_flags(custom_flags) 527 .open("/dev/ptmx")?, 528 }; 529 let mut unlock: libc::c_ulong = 0; 530 // SAFETY: FFI call into libc, trivially safe 531 unsafe { libc::ioctl(main.as_raw_fd(), TIOCSPTLCK as _, &mut unlock) }; 532 533 // SAFETY: FFI call into libc, trivially safe 534 let sub_fd = unsafe { 535 libc::ioctl( 536 main.as_raw_fd(), 537 TIOCGTPEER as _, 538 libc::O_NOCTTY | libc::O_RDWR, 539 ) 540 }; 541 if sub_fd == -1 { 542 return vmm_sys_util::errno::errno_result().map_err(|e| e.into()); 543 } 544 545 let proc_path = PathBuf::from(format!("/proc/self/fd/{sub_fd}")); 546 let path = read_link(proc_path)?; 547 548 // SAFETY: sub_fd is checked to be valid before being wrapped in File 549 Ok((main, unsafe { File::from_raw_fd(sub_fd) }, path)) 550 } 551 552 #[derive(Default)] 553 pub struct Console { 554 console_resizer: Option<Arc<virtio_devices::ConsoleResizer>>, 555 } 556 557 impl Console { 558 pub fn need_resize(&self) -> bool { 559 if let Some(_resizer) = self.console_resizer.as_ref() { 560 return true; 561 } 562 563 false 564 } 565 566 pub fn update_console_size(&self) { 567 if let Some(resizer) = self.console_resizer.as_ref() { 568 resizer.update_console_size() 569 } 570 } 571 } 572 573 pub(crate) struct AddressManager { 574 pub(crate) allocator: Arc<Mutex<SystemAllocator>>, 575 #[cfg(target_arch = "x86_64")] 576 pub(crate) io_bus: Arc<Bus>, 577 pub(crate) mmio_bus: Arc<Bus>, 578 pub(crate) vm: Arc<dyn hypervisor::Vm>, 579 device_tree: Arc<Mutex<DeviceTree>>, 580 pci_mmio32_allocators: Vec<Arc<Mutex<AddressAllocator>>>, 581 pci_mmio64_allocators: Vec<Arc<Mutex<AddressAllocator>>>, 582 } 583 584 impl DeviceRelocation for AddressManager { 585 fn move_bar( 586 &self, 587 old_base: u64, 588 new_base: u64, 589 len: u64, 590 pci_dev: &mut dyn PciDevice, 591 region_type: PciBarRegionType, 592 ) -> std::result::Result<(), std::io::Error> { 593 match region_type { 594 PciBarRegionType::IoRegion => { 595 #[cfg(target_arch = "x86_64")] 596 { 597 // Update system allocator 598 self.allocator 599 .lock() 600 .unwrap() 601 .free_io_addresses(GuestAddress(old_base), len as GuestUsize); 602 603 self.allocator 604 .lock() 605 .unwrap() 606 .allocate_io_addresses( 607 Some(GuestAddress(new_base)), 608 len as GuestUsize, 609 None, 610 ) 611 .ok_or_else(|| { 612 io::Error::new(io::ErrorKind::Other, "failed allocating new IO range") 613 })?; 614 615 // Update PIO bus 616 self.io_bus 617 .update_range(old_base, len, new_base, len) 618 .map_err(|e| io::Error::new(io::ErrorKind::Other, e))?; 619 } 620 #[cfg(target_arch = "aarch64")] 621 error!("I/O region is not supported"); 622 } 623 PciBarRegionType::Memory32BitRegion | PciBarRegionType::Memory64BitRegion => { 624 let allocators = if region_type == PciBarRegionType::Memory32BitRegion { 625 &self.pci_mmio32_allocators 626 } else { 627 &self.pci_mmio64_allocators 628 }; 629 630 // Find the specific allocator that this BAR was allocated from and use it for new one 631 for allocator in allocators { 632 let allocator_base = allocator.lock().unwrap().base(); 633 let allocator_end = allocator.lock().unwrap().end(); 634 635 if old_base >= allocator_base.0 && old_base <= allocator_end.0 { 636 allocator 637 .lock() 638 .unwrap() 639 .free(GuestAddress(old_base), len as GuestUsize); 640 641 allocator 642 .lock() 643 .unwrap() 644 .allocate(Some(GuestAddress(new_base)), len as GuestUsize, Some(len)) 645 .ok_or_else(|| { 646 io::Error::new( 647 io::ErrorKind::Other, 648 "failed allocating new MMIO range", 649 ) 650 })?; 651 652 break; 653 } 654 } 655 656 // Update MMIO bus 657 self.mmio_bus 658 .update_range(old_base, len, new_base, len) 659 .map_err(|e| io::Error::new(io::ErrorKind::Other, e))?; 660 } 661 } 662 663 // Update the device_tree resources associated with the device 664 if let Some(id) = pci_dev.id() { 665 if let Some(node) = self.device_tree.lock().unwrap().get_mut(&id) { 666 let mut resource_updated = false; 667 for resource in node.resources.iter_mut() { 668 if let Resource::PciBar { base, type_, .. } = resource { 669 if PciBarRegionType::from(*type_) == region_type && *base == old_base { 670 *base = new_base; 671 resource_updated = true; 672 break; 673 } 674 } 675 } 676 677 if !resource_updated { 678 return Err(io::Error::new( 679 io::ErrorKind::Other, 680 format!( 681 "Couldn't find a resource with base 0x{old_base:x} for device {id}" 682 ), 683 )); 684 } 685 } else { 686 return Err(io::Error::new( 687 io::ErrorKind::Other, 688 format!("Couldn't find device {id} from device tree"), 689 )); 690 } 691 } 692 693 let any_dev = pci_dev.as_any(); 694 if let Some(virtio_pci_dev) = any_dev.downcast_ref::<VirtioPciDevice>() { 695 let bar_addr = virtio_pci_dev.config_bar_addr(); 696 if bar_addr == new_base { 697 for (event, addr) in virtio_pci_dev.ioeventfds(old_base) { 698 let io_addr = IoEventAddress::Mmio(addr); 699 self.vm.unregister_ioevent(event, &io_addr).map_err(|e| { 700 io::Error::new( 701 io::ErrorKind::Other, 702 format!("failed to unregister ioevent: {e:?}"), 703 ) 704 })?; 705 } 706 for (event, addr) in virtio_pci_dev.ioeventfds(new_base) { 707 let io_addr = IoEventAddress::Mmio(addr); 708 self.vm 709 .register_ioevent(event, &io_addr, None) 710 .map_err(|e| { 711 io::Error::new( 712 io::ErrorKind::Other, 713 format!("failed to register ioevent: {e:?}"), 714 ) 715 })?; 716 } 717 } else { 718 let virtio_dev = virtio_pci_dev.virtio_device(); 719 let mut virtio_dev = virtio_dev.lock().unwrap(); 720 if let Some(mut shm_regions) = virtio_dev.get_shm_regions() { 721 if shm_regions.addr.raw_value() == old_base { 722 let mem_region = self.vm.make_user_memory_region( 723 shm_regions.mem_slot, 724 old_base, 725 shm_regions.len, 726 shm_regions.host_addr, 727 false, 728 false, 729 ); 730 731 self.vm.remove_user_memory_region(mem_region).map_err(|e| { 732 io::Error::new( 733 io::ErrorKind::Other, 734 format!("failed to remove user memory region: {e:?}"), 735 ) 736 })?; 737 738 // Create new mapping by inserting new region to KVM. 739 let mem_region = self.vm.make_user_memory_region( 740 shm_regions.mem_slot, 741 new_base, 742 shm_regions.len, 743 shm_regions.host_addr, 744 false, 745 false, 746 ); 747 748 self.vm.create_user_memory_region(mem_region).map_err(|e| { 749 io::Error::new( 750 io::ErrorKind::Other, 751 format!("failed to create user memory regions: {e:?}"), 752 ) 753 })?; 754 755 // Update shared memory regions to reflect the new mapping. 756 shm_regions.addr = GuestAddress(new_base); 757 virtio_dev.set_shm_regions(shm_regions).map_err(|e| { 758 io::Error::new( 759 io::ErrorKind::Other, 760 format!("failed to update shared memory regions: {e:?}"), 761 ) 762 })?; 763 } 764 } 765 } 766 } 767 768 pci_dev.move_bar(old_base, new_base) 769 } 770 } 771 772 #[derive(Serialize, Deserialize)] 773 struct DeviceManagerState { 774 device_tree: DeviceTree, 775 device_id_cnt: Wrapping<usize>, 776 } 777 778 #[derive(Debug)] 779 pub struct PtyPair { 780 pub main: File, 781 pub path: PathBuf, 782 } 783 784 impl Clone for PtyPair { 785 fn clone(&self) -> Self { 786 PtyPair { 787 main: self.main.try_clone().unwrap(), 788 path: self.path.clone(), 789 } 790 } 791 } 792 793 #[derive(Clone)] 794 pub enum PciDeviceHandle { 795 Vfio(Arc<Mutex<VfioPciDevice>>), 796 Virtio(Arc<Mutex<VirtioPciDevice>>), 797 VfioUser(Arc<Mutex<VfioUserPciDevice>>), 798 } 799 800 #[derive(Clone)] 801 struct MetaVirtioDevice { 802 virtio_device: Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 803 iommu: bool, 804 id: String, 805 pci_segment: u16, 806 dma_handler: Option<Arc<dyn ExternalDmaMapping>>, 807 } 808 809 #[derive(Default)] 810 pub struct AcpiPlatformAddresses { 811 pub pm_timer_address: Option<GenericAddress>, 812 pub reset_reg_address: Option<GenericAddress>, 813 pub sleep_control_reg_address: Option<GenericAddress>, 814 pub sleep_status_reg_address: Option<GenericAddress>, 815 } 816 817 pub struct DeviceManager { 818 // The underlying hypervisor 819 hypervisor_type: HypervisorType, 820 821 // Manage address space related to devices 822 address_manager: Arc<AddressManager>, 823 824 // Console abstraction 825 console: Arc<Console>, 826 827 // console PTY 828 console_pty: Option<Arc<Mutex<PtyPair>>>, 829 830 // serial PTY 831 serial_pty: Option<Arc<Mutex<PtyPair>>>, 832 833 // debug-console PTY 834 debug_console_pty: Option<Arc<Mutex<PtyPair>>>, 835 836 // Serial Manager 837 serial_manager: Option<Arc<SerialManager>>, 838 839 // pty foreground status, 840 console_resize_pipe: Option<Arc<File>>, 841 842 // To restore on exit. 843 original_termios_opt: Arc<Mutex<Option<termios>>>, 844 845 // Interrupt controller 846 #[cfg(target_arch = "x86_64")] 847 interrupt_controller: Option<Arc<Mutex<ioapic::Ioapic>>>, 848 #[cfg(target_arch = "aarch64")] 849 interrupt_controller: Option<Arc<Mutex<gic::Gic>>>, 850 851 // Things to be added to the commandline (e.g. aarch64 early console) 852 #[cfg(target_arch = "aarch64")] 853 cmdline_additions: Vec<String>, 854 855 // ACPI GED notification device 856 ged_notification_device: Option<Arc<Mutex<devices::AcpiGedDevice>>>, 857 858 // VM configuration 859 config: Arc<Mutex<VmConfig>>, 860 861 // Memory Manager 862 memory_manager: Arc<Mutex<MemoryManager>>, 863 864 // CPU Manager 865 cpu_manager: Arc<Mutex<CpuManager>>, 866 867 // The virtio devices on the system 868 virtio_devices: Vec<MetaVirtioDevice>, 869 870 // List of bus devices 871 // Let the DeviceManager keep strong references to the BusDevice devices. 872 // This allows the IO and MMIO buses to be provided with Weak references, 873 // which prevents cyclic dependencies. 874 bus_devices: Vec<Arc<Mutex<dyn BusDevice>>>, 875 876 // Counter to keep track of the consumed device IDs. 877 device_id_cnt: Wrapping<usize>, 878 879 pci_segments: Vec<PciSegment>, 880 881 #[cfg_attr(target_arch = "aarch64", allow(dead_code))] 882 // MSI Interrupt Manager 883 msi_interrupt_manager: Arc<dyn InterruptManager<GroupConfig = MsiIrqGroupConfig>>, 884 885 #[cfg_attr(feature = "mshv", allow(dead_code))] 886 // Legacy Interrupt Manager 887 legacy_interrupt_manager: Option<Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>>, 888 889 // Passthrough device handle 890 passthrough_device: Option<VfioDeviceFd>, 891 892 // VFIO container 893 // Only one container can be created, therefore it is stored as part of the 894 // DeviceManager to be reused. 895 vfio_container: Option<Arc<VfioContainer>>, 896 897 // Paravirtualized IOMMU 898 iommu_device: Option<Arc<Mutex<virtio_devices::Iommu>>>, 899 iommu_mapping: Option<Arc<IommuMapping>>, 900 901 // PCI information about devices attached to the paravirtualized IOMMU 902 // It contains the virtual IOMMU PCI BDF along with the list of PCI BDF 903 // representing the devices attached to the virtual IOMMU. This is useful 904 // information for filling the ACPI VIOT table. 905 iommu_attached_devices: Option<(PciBdf, Vec<PciBdf>)>, 906 907 // Tree of devices, representing the dependencies between devices. 908 // Useful for introspection, snapshot and restore. 909 device_tree: Arc<Mutex<DeviceTree>>, 910 911 // Exit event 912 exit_evt: EventFd, 913 reset_evt: EventFd, 914 915 #[cfg(target_arch = "aarch64")] 916 id_to_dev_info: HashMap<(DeviceType, String), MmioDeviceInfo>, 917 918 // seccomp action 919 seccomp_action: SeccompAction, 920 921 // List of guest NUMA nodes. 922 numa_nodes: NumaNodes, 923 924 // Possible handle to the virtio-balloon device 925 balloon: Option<Arc<Mutex<virtio_devices::Balloon>>>, 926 927 // Virtio Device activation EventFd to allow the VMM thread to trigger device 928 // activation and thus start the threads from the VMM thread 929 activate_evt: EventFd, 930 931 acpi_address: GuestAddress, 932 933 selected_segment: usize, 934 935 // Possible handle to the virtio-mem device 936 virtio_mem_devices: Vec<Arc<Mutex<virtio_devices::Mem>>>, 937 938 #[cfg(target_arch = "aarch64")] 939 // GPIO device for AArch64 940 gpio_device: Option<Arc<Mutex<devices::legacy::Gpio>>>, 941 942 // pvpanic device 943 pvpanic_device: Option<Arc<Mutex<devices::PvPanicDevice>>>, 944 945 // Flag to force setting the iommu on virtio devices 946 force_iommu: bool, 947 948 // io_uring availability if detected 949 io_uring_supported: Option<bool>, 950 951 // aio availability if detected 952 aio_supported: Option<bool>, 953 954 // List of unique identifiers provided at boot through the configuration. 955 boot_id_list: BTreeSet<String>, 956 957 // Start time of the VM 958 timestamp: Instant, 959 960 // Pending activations 961 pending_activations: Arc<Mutex<Vec<VirtioPciDeviceActivator>>>, 962 963 // Addresses for ACPI platform devices e.g. ACPI PM timer, sleep/reset registers 964 acpi_platform_addresses: AcpiPlatformAddresses, 965 966 snapshot: Option<Snapshot>, 967 968 rate_limit_groups: HashMap<String, Arc<RateLimiterGroup>>, 969 } 970 971 impl DeviceManager { 972 #[allow(clippy::too_many_arguments)] 973 pub fn new( 974 #[cfg(target_arch = "x86_64")] io_bus: Arc<Bus>, 975 mmio_bus: Arc<Bus>, 976 hypervisor_type: HypervisorType, 977 vm: Arc<dyn hypervisor::Vm>, 978 config: Arc<Mutex<VmConfig>>, 979 memory_manager: Arc<Mutex<MemoryManager>>, 980 cpu_manager: Arc<Mutex<CpuManager>>, 981 exit_evt: EventFd, 982 reset_evt: EventFd, 983 seccomp_action: SeccompAction, 984 numa_nodes: NumaNodes, 985 activate_evt: &EventFd, 986 force_iommu: bool, 987 boot_id_list: BTreeSet<String>, 988 timestamp: Instant, 989 snapshot: Option<Snapshot>, 990 dynamic: bool, 991 ) -> DeviceManagerResult<Arc<Mutex<Self>>> { 992 trace_scoped!("DeviceManager::new"); 993 994 let (device_tree, device_id_cnt) = if let Some(snapshot) = snapshot.as_ref() { 995 let state: DeviceManagerState = snapshot.to_state().unwrap(); 996 ( 997 Arc::new(Mutex::new(state.device_tree.clone())), 998 state.device_id_cnt, 999 ) 1000 } else { 1001 (Arc::new(Mutex::new(DeviceTree::new())), Wrapping(0)) 1002 }; 1003 1004 let num_pci_segments = 1005 if let Some(platform_config) = config.lock().unwrap().platform.as_ref() { 1006 platform_config.num_pci_segments 1007 } else { 1008 1 1009 }; 1010 1011 let create_mmio_allocators = |start, end, num_pci_segments, alignment| { 1012 // Start each PCI segment mmio range on an aligned boundary 1013 let pci_segment_mmio_size = 1014 (end - start + 1) / (alignment * num_pci_segments as u64) * alignment; 1015 1016 let mut mmio_allocators = vec![]; 1017 for i in 0..num_pci_segments as u64 { 1018 let mmio_start = start + i * pci_segment_mmio_size; 1019 let allocator = Arc::new(Mutex::new( 1020 AddressAllocator::new(GuestAddress(mmio_start), pci_segment_mmio_size).unwrap(), 1021 )); 1022 mmio_allocators.push(allocator) 1023 } 1024 1025 mmio_allocators 1026 }; 1027 1028 let start_of_mmio32_area = layout::MEM_32BIT_DEVICES_START.0; 1029 let end_of_mmio32_area = layout::MEM_32BIT_DEVICES_START.0 + layout::MEM_32BIT_DEVICES_SIZE; 1030 let pci_mmio32_allocators = create_mmio_allocators( 1031 start_of_mmio32_area, 1032 end_of_mmio32_area, 1033 num_pci_segments, 1034 4 << 10, 1035 ); 1036 1037 let start_of_mmio64_area = memory_manager.lock().unwrap().start_of_device_area().0; 1038 let end_of_mmio64_area = memory_manager.lock().unwrap().end_of_device_area().0; 1039 let pci_mmio64_allocators = create_mmio_allocators( 1040 start_of_mmio64_area, 1041 end_of_mmio64_area, 1042 num_pci_segments, 1043 4 << 30, 1044 ); 1045 1046 let address_manager = Arc::new(AddressManager { 1047 allocator: memory_manager.lock().unwrap().allocator(), 1048 #[cfg(target_arch = "x86_64")] 1049 io_bus, 1050 mmio_bus, 1051 vm: vm.clone(), 1052 device_tree: Arc::clone(&device_tree), 1053 pci_mmio32_allocators, 1054 pci_mmio64_allocators, 1055 }); 1056 1057 // First we create the MSI interrupt manager, the legacy one is created 1058 // later, after the IOAPIC device creation. 1059 // The reason we create the MSI one first is because the IOAPIC needs it, 1060 // and then the legacy interrupt manager needs an IOAPIC. So we're 1061 // handling a linear dependency chain: 1062 // msi_interrupt_manager <- IOAPIC <- legacy_interrupt_manager. 1063 let msi_interrupt_manager: Arc<dyn InterruptManager<GroupConfig = MsiIrqGroupConfig>> = 1064 Arc::new(MsiInterruptManager::new( 1065 Arc::clone(&address_manager.allocator), 1066 vm, 1067 )); 1068 1069 let acpi_address = address_manager 1070 .allocator 1071 .lock() 1072 .unwrap() 1073 .allocate_platform_mmio_addresses(None, DEVICE_MANAGER_ACPI_SIZE as u64, None) 1074 .ok_or(DeviceManagerError::AllocateIoPort)?; 1075 1076 let mut pci_irq_slots = [0; 32]; 1077 PciSegment::reserve_legacy_interrupts_for_pci_devices( 1078 &address_manager, 1079 &mut pci_irq_slots, 1080 )?; 1081 1082 let mut pci_segments = vec![PciSegment::new_default_segment( 1083 &address_manager, 1084 Arc::clone(&address_manager.pci_mmio32_allocators[0]), 1085 Arc::clone(&address_manager.pci_mmio64_allocators[0]), 1086 &pci_irq_slots, 1087 )?]; 1088 1089 for i in 1..num_pci_segments as usize { 1090 pci_segments.push(PciSegment::new( 1091 i as u16, 1092 numa_node_id_from_pci_segment_id(&numa_nodes, i as u16), 1093 &address_manager, 1094 Arc::clone(&address_manager.pci_mmio32_allocators[i]), 1095 Arc::clone(&address_manager.pci_mmio64_allocators[i]), 1096 &pci_irq_slots, 1097 )?); 1098 } 1099 1100 if dynamic { 1101 let acpi_address = address_manager 1102 .allocator 1103 .lock() 1104 .unwrap() 1105 .allocate_platform_mmio_addresses(None, CPU_MANAGER_ACPI_SIZE as u64, None) 1106 .ok_or(DeviceManagerError::AllocateMmioAddress)?; 1107 1108 address_manager 1109 .mmio_bus 1110 .insert( 1111 cpu_manager.clone(), 1112 acpi_address.0, 1113 CPU_MANAGER_ACPI_SIZE as u64, 1114 ) 1115 .map_err(DeviceManagerError::BusError)?; 1116 1117 cpu_manager.lock().unwrap().set_acpi_address(acpi_address); 1118 } 1119 1120 let mut rate_limit_groups = HashMap::<String, Arc<RateLimiterGroup>>::new(); 1121 if let Some(rate_limit_groups_cfg) = config.lock().unwrap().rate_limit_groups.as_ref() { 1122 for rate_limit_group_cfg in rate_limit_groups_cfg { 1123 let rate_limit_cfg = rate_limit_group_cfg.rate_limiter_config; 1124 let bw = rate_limit_cfg.bandwidth.unwrap_or_default(); 1125 let ops = rate_limit_cfg.ops.unwrap_or_default(); 1126 let mut rate_limit_group = RateLimiterGroup::new( 1127 &rate_limit_group_cfg.id, 1128 bw.size, 1129 bw.one_time_burst.unwrap_or(0), 1130 bw.refill_time, 1131 ops.size, 1132 ops.one_time_burst.unwrap_or(0), 1133 ops.refill_time, 1134 ) 1135 .map_err(DeviceManagerError::RateLimiterGroupCreate)?; 1136 1137 let exit_evt = exit_evt.try_clone().map_err(DeviceManagerError::EventFd)?; 1138 1139 rate_limit_group.start_thread(exit_evt).unwrap(); 1140 rate_limit_groups 1141 .insert(rate_limit_group_cfg.id.clone(), Arc::new(rate_limit_group)); 1142 } 1143 } 1144 1145 let device_manager = DeviceManager { 1146 hypervisor_type, 1147 address_manager: Arc::clone(&address_manager), 1148 console: Arc::new(Console::default()), 1149 interrupt_controller: None, 1150 #[cfg(target_arch = "aarch64")] 1151 cmdline_additions: Vec::new(), 1152 ged_notification_device: None, 1153 config, 1154 memory_manager, 1155 cpu_manager, 1156 virtio_devices: Vec::new(), 1157 bus_devices: Vec::new(), 1158 device_id_cnt, 1159 msi_interrupt_manager, 1160 legacy_interrupt_manager: None, 1161 passthrough_device: None, 1162 vfio_container: None, 1163 iommu_device: None, 1164 iommu_mapping: None, 1165 iommu_attached_devices: None, 1166 pci_segments, 1167 device_tree, 1168 exit_evt, 1169 reset_evt, 1170 #[cfg(target_arch = "aarch64")] 1171 id_to_dev_info: HashMap::new(), 1172 seccomp_action, 1173 numa_nodes, 1174 balloon: None, 1175 activate_evt: activate_evt 1176 .try_clone() 1177 .map_err(DeviceManagerError::EventFd)?, 1178 acpi_address, 1179 selected_segment: 0, 1180 serial_pty: None, 1181 serial_manager: None, 1182 console_pty: None, 1183 debug_console_pty: None, 1184 console_resize_pipe: None, 1185 original_termios_opt: Arc::new(Mutex::new(None)), 1186 virtio_mem_devices: Vec::new(), 1187 #[cfg(target_arch = "aarch64")] 1188 gpio_device: None, 1189 pvpanic_device: None, 1190 force_iommu, 1191 io_uring_supported: None, 1192 aio_supported: None, 1193 boot_id_list, 1194 timestamp, 1195 pending_activations: Arc::new(Mutex::new(Vec::default())), 1196 acpi_platform_addresses: AcpiPlatformAddresses::default(), 1197 snapshot, 1198 rate_limit_groups, 1199 }; 1200 1201 let device_manager = Arc::new(Mutex::new(device_manager)); 1202 1203 address_manager 1204 .mmio_bus 1205 .insert( 1206 Arc::clone(&device_manager) as Arc<Mutex<dyn BusDevice>>, 1207 acpi_address.0, 1208 DEVICE_MANAGER_ACPI_SIZE as u64, 1209 ) 1210 .map_err(DeviceManagerError::BusError)?; 1211 1212 Ok(device_manager) 1213 } 1214 1215 pub fn serial_pty(&self) -> Option<PtyPair> { 1216 self.serial_pty 1217 .as_ref() 1218 .map(|pty| pty.lock().unwrap().clone()) 1219 } 1220 1221 pub fn console_pty(&self) -> Option<PtyPair> { 1222 self.console_pty 1223 .as_ref() 1224 .map(|pty| pty.lock().unwrap().clone()) 1225 } 1226 1227 pub fn debug_console_pty(&self) -> Option<PtyPair> { 1228 self.debug_console_pty 1229 .as_ref() 1230 .map(|pty| pty.lock().unwrap().clone()) 1231 } 1232 1233 pub fn console_resize_pipe(&self) -> Option<Arc<File>> { 1234 self.console_resize_pipe.clone() 1235 } 1236 1237 pub fn create_devices( 1238 &mut self, 1239 serial_pty: Option<PtyPair>, 1240 console_pty: Option<PtyPair>, 1241 debug_console_pty: Option<PtyPair>, 1242 console_resize_pipe: Option<File>, 1243 original_termios_opt: Arc<Mutex<Option<termios>>>, 1244 ) -> DeviceManagerResult<()> { 1245 trace_scoped!("create_devices"); 1246 1247 let mut virtio_devices: Vec<MetaVirtioDevice> = Vec::new(); 1248 1249 let interrupt_controller = self.add_interrupt_controller()?; 1250 1251 self.cpu_manager 1252 .lock() 1253 .unwrap() 1254 .set_interrupt_controller(interrupt_controller.clone()); 1255 1256 // Now we can create the legacy interrupt manager, which needs the freshly 1257 // formed IOAPIC device. 1258 let legacy_interrupt_manager: Arc< 1259 dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>, 1260 > = Arc::new(LegacyUserspaceInterruptManager::new(Arc::clone( 1261 &interrupt_controller, 1262 ))); 1263 1264 { 1265 if let Some(acpi_address) = self.memory_manager.lock().unwrap().acpi_address() { 1266 self.address_manager 1267 .mmio_bus 1268 .insert( 1269 Arc::clone(&self.memory_manager) as Arc<Mutex<dyn BusDevice>>, 1270 acpi_address.0, 1271 MEMORY_MANAGER_ACPI_SIZE as u64, 1272 ) 1273 .map_err(DeviceManagerError::BusError)?; 1274 } 1275 } 1276 1277 #[cfg(target_arch = "x86_64")] 1278 self.add_legacy_devices( 1279 self.reset_evt 1280 .try_clone() 1281 .map_err(DeviceManagerError::EventFd)?, 1282 )?; 1283 1284 #[cfg(target_arch = "aarch64")] 1285 self.add_legacy_devices(&legacy_interrupt_manager)?; 1286 1287 { 1288 self.ged_notification_device = self.add_acpi_devices( 1289 &legacy_interrupt_manager, 1290 self.reset_evt 1291 .try_clone() 1292 .map_err(DeviceManagerError::EventFd)?, 1293 self.exit_evt 1294 .try_clone() 1295 .map_err(DeviceManagerError::EventFd)?, 1296 )?; 1297 } 1298 1299 self.original_termios_opt = original_termios_opt; 1300 1301 self.console = self.add_console_devices( 1302 &legacy_interrupt_manager, 1303 &mut virtio_devices, 1304 serial_pty, 1305 console_pty, 1306 debug_console_pty, 1307 console_resize_pipe, 1308 )?; 1309 1310 if let Some(tpm) = self.config.clone().lock().unwrap().tpm.as_ref() { 1311 let tpm_dev = self.add_tpm_device(tpm.socket.clone())?; 1312 self.bus_devices 1313 .push(Arc::clone(&tpm_dev) as Arc<Mutex<dyn BusDevice>>) 1314 } 1315 self.legacy_interrupt_manager = Some(legacy_interrupt_manager); 1316 1317 virtio_devices.append(&mut self.make_virtio_devices()?); 1318 1319 self.add_pci_devices(virtio_devices.clone())?; 1320 1321 self.virtio_devices = virtio_devices; 1322 1323 if self.config.clone().lock().unwrap().pvpanic { 1324 self.pvpanic_device = self.add_pvpanic_device()?; 1325 } 1326 1327 Ok(()) 1328 } 1329 1330 fn state(&self) -> DeviceManagerState { 1331 DeviceManagerState { 1332 device_tree: self.device_tree.lock().unwrap().clone(), 1333 device_id_cnt: self.device_id_cnt, 1334 } 1335 } 1336 1337 fn get_msi_iova_space(&mut self) -> (u64, u64) { 1338 #[cfg(target_arch = "aarch64")] 1339 { 1340 let vcpus = self.config.lock().unwrap().cpus.boot_vcpus; 1341 let vgic_config = gic::Gic::create_default_config(vcpus.into()); 1342 ( 1343 vgic_config.msi_addr, 1344 vgic_config.msi_addr + vgic_config.msi_size - 1, 1345 ) 1346 } 1347 #[cfg(target_arch = "x86_64")] 1348 (0xfee0_0000, 0xfeef_ffff) 1349 } 1350 1351 #[cfg(target_arch = "aarch64")] 1352 /// Gets the information of the devices registered up to some point in time. 1353 pub fn get_device_info(&self) -> &HashMap<(DeviceType, String), MmioDeviceInfo> { 1354 &self.id_to_dev_info 1355 } 1356 1357 #[allow(unused_variables)] 1358 fn add_pci_devices( 1359 &mut self, 1360 virtio_devices: Vec<MetaVirtioDevice>, 1361 ) -> DeviceManagerResult<()> { 1362 let iommu_id = String::from(IOMMU_DEVICE_NAME); 1363 1364 let iommu_device = if self.config.lock().unwrap().iommu { 1365 let (device, mapping) = virtio_devices::Iommu::new( 1366 iommu_id.clone(), 1367 self.seccomp_action.clone(), 1368 self.exit_evt 1369 .try_clone() 1370 .map_err(DeviceManagerError::EventFd)?, 1371 self.get_msi_iova_space(), 1372 versioned_state_from_id(self.snapshot.as_ref(), iommu_id.as_str()) 1373 .map_err(DeviceManagerError::RestoreGetState)?, 1374 ) 1375 .map_err(DeviceManagerError::CreateVirtioIommu)?; 1376 let device = Arc::new(Mutex::new(device)); 1377 self.iommu_device = Some(Arc::clone(&device)); 1378 self.iommu_mapping = Some(mapping); 1379 1380 // Fill the device tree with a new node. In case of restore, we 1381 // know there is nothing to do, so we can simply override the 1382 // existing entry. 1383 self.device_tree 1384 .lock() 1385 .unwrap() 1386 .insert(iommu_id.clone(), device_node!(iommu_id, device)); 1387 1388 Some(device) 1389 } else { 1390 None 1391 }; 1392 1393 let mut iommu_attached_devices = Vec::new(); 1394 { 1395 for handle in virtio_devices { 1396 let mapping: Option<Arc<IommuMapping>> = if handle.iommu { 1397 self.iommu_mapping.clone() 1398 } else { 1399 None 1400 }; 1401 1402 let dev_id = self.add_virtio_pci_device( 1403 handle.virtio_device, 1404 &mapping, 1405 handle.id, 1406 handle.pci_segment, 1407 handle.dma_handler, 1408 )?; 1409 1410 if handle.iommu { 1411 iommu_attached_devices.push(dev_id); 1412 } 1413 } 1414 1415 let mut vfio_iommu_device_ids = self.add_vfio_devices()?; 1416 iommu_attached_devices.append(&mut vfio_iommu_device_ids); 1417 1418 let mut vfio_user_iommu_device_ids = self.add_user_devices()?; 1419 iommu_attached_devices.append(&mut vfio_user_iommu_device_ids); 1420 1421 // Add all devices from forced iommu segments 1422 if let Some(platform_config) = self.config.lock().unwrap().platform.as_ref() { 1423 if let Some(iommu_segments) = platform_config.iommu_segments.as_ref() { 1424 for segment in iommu_segments { 1425 for device in 0..32 { 1426 let bdf = PciBdf::new(*segment, 0, device, 0); 1427 if !iommu_attached_devices.contains(&bdf) { 1428 iommu_attached_devices.push(bdf); 1429 } 1430 } 1431 } 1432 } 1433 } 1434 1435 if let Some(iommu_device) = iommu_device { 1436 let dev_id = self.add_virtio_pci_device(iommu_device, &None, iommu_id, 0, None)?; 1437 self.iommu_attached_devices = Some((dev_id, iommu_attached_devices)); 1438 } 1439 } 1440 1441 for segment in &self.pci_segments { 1442 #[cfg(target_arch = "x86_64")] 1443 if let Some(pci_config_io) = segment.pci_config_io.as_ref() { 1444 self.bus_devices 1445 .push(Arc::clone(pci_config_io) as Arc<Mutex<dyn BusDevice>>); 1446 } 1447 1448 self.bus_devices 1449 .push(Arc::clone(&segment.pci_config_mmio) as Arc<Mutex<dyn BusDevice>>); 1450 } 1451 1452 Ok(()) 1453 } 1454 1455 #[cfg(target_arch = "aarch64")] 1456 fn add_interrupt_controller( 1457 &mut self, 1458 ) -> DeviceManagerResult<Arc<Mutex<dyn InterruptController>>> { 1459 let interrupt_controller: Arc<Mutex<gic::Gic>> = Arc::new(Mutex::new( 1460 gic::Gic::new( 1461 self.config.lock().unwrap().cpus.boot_vcpus, 1462 Arc::clone(&self.msi_interrupt_manager), 1463 self.address_manager.vm.clone(), 1464 ) 1465 .map_err(DeviceManagerError::CreateInterruptController)?, 1466 )); 1467 1468 self.interrupt_controller = Some(interrupt_controller.clone()); 1469 1470 // Restore the vGic if this is in the process of restoration 1471 let id = String::from(gic::GIC_SNAPSHOT_ID); 1472 if let Some(vgic_snapshot) = snapshot_from_id(self.snapshot.as_ref(), &id) { 1473 // PMU support is optional. Nothing should be impacted if the PMU initialization failed. 1474 if self 1475 .cpu_manager 1476 .lock() 1477 .unwrap() 1478 .init_pmu(arch::aarch64::fdt::AARCH64_PMU_IRQ + 16) 1479 .is_err() 1480 { 1481 info!("Failed to initialize PMU"); 1482 } 1483 1484 let vgic_state = vgic_snapshot 1485 .to_state() 1486 .map_err(DeviceManagerError::RestoreGetState)?; 1487 let saved_vcpu_states = self.cpu_manager.lock().unwrap().get_saved_states(); 1488 interrupt_controller 1489 .lock() 1490 .unwrap() 1491 .restore_vgic(vgic_state, &saved_vcpu_states) 1492 .unwrap(); 1493 } 1494 1495 self.device_tree 1496 .lock() 1497 .unwrap() 1498 .insert(id.clone(), device_node!(id, interrupt_controller)); 1499 1500 Ok(interrupt_controller) 1501 } 1502 1503 #[cfg(target_arch = "aarch64")] 1504 pub fn get_interrupt_controller(&mut self) -> Option<&Arc<Mutex<gic::Gic>>> { 1505 self.interrupt_controller.as_ref() 1506 } 1507 1508 #[cfg(target_arch = "x86_64")] 1509 fn add_interrupt_controller( 1510 &mut self, 1511 ) -> DeviceManagerResult<Arc<Mutex<dyn InterruptController>>> { 1512 let id = String::from(IOAPIC_DEVICE_NAME); 1513 1514 // Create IOAPIC 1515 let interrupt_controller = Arc::new(Mutex::new( 1516 ioapic::Ioapic::new( 1517 id.clone(), 1518 APIC_START, 1519 Arc::clone(&self.msi_interrupt_manager), 1520 versioned_state_from_id(self.snapshot.as_ref(), id.as_str()) 1521 .map_err(DeviceManagerError::RestoreGetState)?, 1522 ) 1523 .map_err(DeviceManagerError::CreateInterruptController)?, 1524 )); 1525 1526 self.interrupt_controller = Some(interrupt_controller.clone()); 1527 1528 self.address_manager 1529 .mmio_bus 1530 .insert(interrupt_controller.clone(), IOAPIC_START.0, IOAPIC_SIZE) 1531 .map_err(DeviceManagerError::BusError)?; 1532 1533 self.bus_devices 1534 .push(Arc::clone(&interrupt_controller) as Arc<Mutex<dyn BusDevice>>); 1535 1536 // Fill the device tree with a new node. In case of restore, we 1537 // know there is nothing to do, so we can simply override the 1538 // existing entry. 1539 self.device_tree 1540 .lock() 1541 .unwrap() 1542 .insert(id.clone(), device_node!(id, interrupt_controller)); 1543 1544 Ok(interrupt_controller) 1545 } 1546 1547 fn add_acpi_devices( 1548 &mut self, 1549 interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>, 1550 reset_evt: EventFd, 1551 exit_evt: EventFd, 1552 ) -> DeviceManagerResult<Option<Arc<Mutex<devices::AcpiGedDevice>>>> { 1553 let vcpus_kill_signalled = self 1554 .cpu_manager 1555 .lock() 1556 .unwrap() 1557 .vcpus_kill_signalled() 1558 .clone(); 1559 let shutdown_device = Arc::new(Mutex::new(devices::AcpiShutdownDevice::new( 1560 exit_evt, 1561 reset_evt, 1562 vcpus_kill_signalled, 1563 ))); 1564 1565 self.bus_devices 1566 .push(Arc::clone(&shutdown_device) as Arc<Mutex<dyn BusDevice>>); 1567 1568 #[cfg(target_arch = "x86_64")] 1569 { 1570 let shutdown_pio_address: u16 = 0x600; 1571 1572 self.address_manager 1573 .allocator 1574 .lock() 1575 .unwrap() 1576 .allocate_io_addresses(Some(GuestAddress(shutdown_pio_address.into())), 0x8, None) 1577 .ok_or(DeviceManagerError::AllocateIoPort)?; 1578 1579 self.address_manager 1580 .io_bus 1581 .insert(shutdown_device, shutdown_pio_address.into(), 0x4) 1582 .map_err(DeviceManagerError::BusError)?; 1583 1584 self.acpi_platform_addresses.sleep_control_reg_address = 1585 Some(GenericAddress::io_port_address::<u8>(shutdown_pio_address)); 1586 self.acpi_platform_addresses.sleep_status_reg_address = 1587 Some(GenericAddress::io_port_address::<u8>(shutdown_pio_address)); 1588 self.acpi_platform_addresses.reset_reg_address = 1589 Some(GenericAddress::io_port_address::<u8>(shutdown_pio_address)); 1590 } 1591 1592 let ged_irq = self 1593 .address_manager 1594 .allocator 1595 .lock() 1596 .unwrap() 1597 .allocate_irq() 1598 .unwrap(); 1599 let interrupt_group = interrupt_manager 1600 .create_group(LegacyIrqGroupConfig { 1601 irq: ged_irq as InterruptIndex, 1602 }) 1603 .map_err(DeviceManagerError::CreateInterruptGroup)?; 1604 let ged_address = self 1605 .address_manager 1606 .allocator 1607 .lock() 1608 .unwrap() 1609 .allocate_platform_mmio_addresses( 1610 None, 1611 devices::acpi::GED_DEVICE_ACPI_SIZE as u64, 1612 None, 1613 ) 1614 .ok_or(DeviceManagerError::AllocateMmioAddress)?; 1615 let ged_device = Arc::new(Mutex::new(devices::AcpiGedDevice::new( 1616 interrupt_group, 1617 ged_irq, 1618 ged_address, 1619 ))); 1620 self.address_manager 1621 .mmio_bus 1622 .insert( 1623 ged_device.clone(), 1624 ged_address.0, 1625 devices::acpi::GED_DEVICE_ACPI_SIZE as u64, 1626 ) 1627 .map_err(DeviceManagerError::BusError)?; 1628 self.bus_devices 1629 .push(Arc::clone(&ged_device) as Arc<Mutex<dyn BusDevice>>); 1630 1631 let pm_timer_device = Arc::new(Mutex::new(devices::AcpiPmTimerDevice::new())); 1632 1633 self.bus_devices 1634 .push(Arc::clone(&pm_timer_device) as Arc<Mutex<dyn BusDevice>>); 1635 1636 #[cfg(target_arch = "x86_64")] 1637 { 1638 let pm_timer_pio_address: u16 = 0x608; 1639 1640 self.address_manager 1641 .allocator 1642 .lock() 1643 .unwrap() 1644 .allocate_io_addresses(Some(GuestAddress(pm_timer_pio_address.into())), 0x4, None) 1645 .ok_or(DeviceManagerError::AllocateIoPort)?; 1646 1647 self.address_manager 1648 .io_bus 1649 .insert(pm_timer_device, pm_timer_pio_address.into(), 0x4) 1650 .map_err(DeviceManagerError::BusError)?; 1651 1652 self.acpi_platform_addresses.pm_timer_address = 1653 Some(GenericAddress::io_port_address::<u32>(pm_timer_pio_address)); 1654 } 1655 1656 Ok(Some(ged_device)) 1657 } 1658 1659 #[cfg(target_arch = "x86_64")] 1660 fn add_legacy_devices(&mut self, reset_evt: EventFd) -> DeviceManagerResult<()> { 1661 let vcpus_kill_signalled = self 1662 .cpu_manager 1663 .lock() 1664 .unwrap() 1665 .vcpus_kill_signalled() 1666 .clone(); 1667 // Add a shutdown device (i8042) 1668 let i8042 = Arc::new(Mutex::new(devices::legacy::I8042Device::new( 1669 reset_evt.try_clone().unwrap(), 1670 vcpus_kill_signalled.clone(), 1671 ))); 1672 1673 self.bus_devices 1674 .push(Arc::clone(&i8042) as Arc<Mutex<dyn BusDevice>>); 1675 1676 self.address_manager 1677 .io_bus 1678 .insert(i8042, 0x61, 0x4) 1679 .map_err(DeviceManagerError::BusError)?; 1680 { 1681 // Add a CMOS emulated device 1682 let mem_size = self 1683 .memory_manager 1684 .lock() 1685 .unwrap() 1686 .guest_memory() 1687 .memory() 1688 .last_addr() 1689 .0 1690 + 1; 1691 let mem_below_4g = std::cmp::min(arch::layout::MEM_32BIT_RESERVED_START.0, mem_size); 1692 let mem_above_4g = mem_size.saturating_sub(arch::layout::RAM_64BIT_START.0); 1693 1694 let cmos = Arc::new(Mutex::new(devices::legacy::Cmos::new( 1695 mem_below_4g, 1696 mem_above_4g, 1697 reset_evt, 1698 Some(vcpus_kill_signalled), 1699 ))); 1700 1701 self.bus_devices 1702 .push(Arc::clone(&cmos) as Arc<Mutex<dyn BusDevice>>); 1703 1704 self.address_manager 1705 .io_bus 1706 .insert(cmos, 0x70, 0x2) 1707 .map_err(DeviceManagerError::BusError)?; 1708 1709 let fwdebug = Arc::new(Mutex::new(devices::legacy::FwDebugDevice::new())); 1710 1711 self.bus_devices 1712 .push(Arc::clone(&fwdebug) as Arc<Mutex<dyn BusDevice>>); 1713 1714 self.address_manager 1715 .io_bus 1716 .insert(fwdebug, 0x402, 0x1) 1717 .map_err(DeviceManagerError::BusError)?; 1718 } 1719 1720 // 0x80 debug port 1721 let debug_port = Arc::new(Mutex::new(devices::legacy::DebugPort::new(self.timestamp))); 1722 self.bus_devices 1723 .push(Arc::clone(&debug_port) as Arc<Mutex<dyn BusDevice>>); 1724 self.address_manager 1725 .io_bus 1726 .insert(debug_port, 0x80, 0x1) 1727 .map_err(DeviceManagerError::BusError)?; 1728 1729 Ok(()) 1730 } 1731 1732 #[cfg(target_arch = "aarch64")] 1733 fn add_legacy_devices( 1734 &mut self, 1735 interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>, 1736 ) -> DeviceManagerResult<()> { 1737 // Add a RTC device 1738 let rtc_irq = self 1739 .address_manager 1740 .allocator 1741 .lock() 1742 .unwrap() 1743 .allocate_irq() 1744 .unwrap(); 1745 1746 let interrupt_group = interrupt_manager 1747 .create_group(LegacyIrqGroupConfig { 1748 irq: rtc_irq as InterruptIndex, 1749 }) 1750 .map_err(DeviceManagerError::CreateInterruptGroup)?; 1751 1752 let rtc_device = Arc::new(Mutex::new(devices::legacy::Rtc::new(interrupt_group))); 1753 1754 self.bus_devices 1755 .push(Arc::clone(&rtc_device) as Arc<Mutex<dyn BusDevice>>); 1756 1757 let addr = arch::layout::LEGACY_RTC_MAPPED_IO_START; 1758 1759 self.address_manager 1760 .mmio_bus 1761 .insert(rtc_device, addr.0, MMIO_LEN) 1762 .map_err(DeviceManagerError::BusError)?; 1763 1764 self.id_to_dev_info.insert( 1765 (DeviceType::Rtc, "rtc".to_string()), 1766 MmioDeviceInfo { 1767 addr: addr.0, 1768 len: MMIO_LEN, 1769 irq: rtc_irq, 1770 }, 1771 ); 1772 1773 // Add a GPIO device 1774 let id = String::from(GPIO_DEVICE_NAME); 1775 let gpio_irq = self 1776 .address_manager 1777 .allocator 1778 .lock() 1779 .unwrap() 1780 .allocate_irq() 1781 .unwrap(); 1782 1783 let interrupt_group = interrupt_manager 1784 .create_group(LegacyIrqGroupConfig { 1785 irq: gpio_irq as InterruptIndex, 1786 }) 1787 .map_err(DeviceManagerError::CreateInterruptGroup)?; 1788 1789 let gpio_device = Arc::new(Mutex::new(devices::legacy::Gpio::new( 1790 id.clone(), 1791 interrupt_group, 1792 versioned_state_from_id(self.snapshot.as_ref(), id.as_str()) 1793 .map_err(DeviceManagerError::RestoreGetState)?, 1794 ))); 1795 1796 self.bus_devices 1797 .push(Arc::clone(&gpio_device) as Arc<Mutex<dyn BusDevice>>); 1798 1799 let addr = arch::layout::LEGACY_GPIO_MAPPED_IO_START; 1800 1801 self.address_manager 1802 .mmio_bus 1803 .insert(gpio_device.clone(), addr.0, MMIO_LEN) 1804 .map_err(DeviceManagerError::BusError)?; 1805 1806 self.gpio_device = Some(gpio_device.clone()); 1807 1808 self.id_to_dev_info.insert( 1809 (DeviceType::Gpio, "gpio".to_string()), 1810 MmioDeviceInfo { 1811 addr: addr.0, 1812 len: MMIO_LEN, 1813 irq: gpio_irq, 1814 }, 1815 ); 1816 1817 self.device_tree 1818 .lock() 1819 .unwrap() 1820 .insert(id.clone(), device_node!(id, gpio_device)); 1821 1822 Ok(()) 1823 } 1824 1825 #[cfg(target_arch = "x86_64")] 1826 fn add_debug_console_device( 1827 &mut self, 1828 debug_console_writer: Box<dyn io::Write + Send>, 1829 ) -> DeviceManagerResult<Arc<Mutex<DebugConsole>>> { 1830 let id = String::from(DEBUGCON_DEVICE_NAME); 1831 let debug_console = Arc::new(Mutex::new(DebugConsole::new( 1832 id.clone(), 1833 debug_console_writer, 1834 ))); 1835 1836 let port = self 1837 .config 1838 .lock() 1839 .unwrap() 1840 .debug_console 1841 .clone() 1842 .iobase 1843 .map(|port| port as u64) 1844 .unwrap_or(debug_console::DEFAULT_PORT); 1845 1846 self.bus_devices 1847 .push(Arc::clone(&debug_console) as Arc<Mutex<dyn BusDevice>>); 1848 1849 self.address_manager 1850 .allocator 1851 .lock() 1852 .unwrap() 1853 .allocate_io_addresses(Some(GuestAddress(port)), 0x1, None) 1854 .ok_or(DeviceManagerError::AllocateIoPort)?; 1855 1856 self.address_manager 1857 .io_bus 1858 .insert(debug_console.clone(), port, 0x1) 1859 .map_err(DeviceManagerError::BusError)?; 1860 1861 // Fill the device tree with a new node. In case of restore, we 1862 // know there is nothing to do, so we can simply override the 1863 // existing entry. 1864 self.device_tree 1865 .lock() 1866 .unwrap() 1867 .insert(id.clone(), device_node!(id, debug_console)); 1868 1869 Ok(debug_console) 1870 } 1871 1872 #[cfg(target_arch = "x86_64")] 1873 fn add_serial_device( 1874 &mut self, 1875 interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>, 1876 serial_writer: Option<Box<dyn io::Write + Send>>, 1877 ) -> DeviceManagerResult<Arc<Mutex<Serial>>> { 1878 // Serial is tied to IRQ #4 1879 let serial_irq = 4; 1880 1881 let id = String::from(SERIAL_DEVICE_NAME); 1882 1883 let interrupt_group = interrupt_manager 1884 .create_group(LegacyIrqGroupConfig { 1885 irq: serial_irq as InterruptIndex, 1886 }) 1887 .map_err(DeviceManagerError::CreateInterruptGroup)?; 1888 1889 let serial = Arc::new(Mutex::new(Serial::new( 1890 id.clone(), 1891 interrupt_group, 1892 serial_writer, 1893 versioned_state_from_id(self.snapshot.as_ref(), id.as_str()) 1894 .map_err(DeviceManagerError::RestoreGetState)?, 1895 ))); 1896 1897 self.bus_devices 1898 .push(Arc::clone(&serial) as Arc<Mutex<dyn BusDevice>>); 1899 1900 self.address_manager 1901 .allocator 1902 .lock() 1903 .unwrap() 1904 .allocate_io_addresses(Some(GuestAddress(0x3f8)), 0x8, None) 1905 .ok_or(DeviceManagerError::AllocateIoPort)?; 1906 1907 self.address_manager 1908 .io_bus 1909 .insert(serial.clone(), 0x3f8, 0x8) 1910 .map_err(DeviceManagerError::BusError)?; 1911 1912 // Fill the device tree with a new node. In case of restore, we 1913 // know there is nothing to do, so we can simply override the 1914 // existing entry. 1915 self.device_tree 1916 .lock() 1917 .unwrap() 1918 .insert(id.clone(), device_node!(id, serial)); 1919 1920 Ok(serial) 1921 } 1922 1923 #[cfg(target_arch = "aarch64")] 1924 fn add_serial_device( 1925 &mut self, 1926 interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>, 1927 serial_writer: Option<Box<dyn io::Write + Send>>, 1928 ) -> DeviceManagerResult<Arc<Mutex<Pl011>>> { 1929 let id = String::from(SERIAL_DEVICE_NAME); 1930 1931 let serial_irq = self 1932 .address_manager 1933 .allocator 1934 .lock() 1935 .unwrap() 1936 .allocate_irq() 1937 .unwrap(); 1938 1939 let interrupt_group = interrupt_manager 1940 .create_group(LegacyIrqGroupConfig { 1941 irq: serial_irq as InterruptIndex, 1942 }) 1943 .map_err(DeviceManagerError::CreateInterruptGroup)?; 1944 1945 let serial = Arc::new(Mutex::new(devices::legacy::Pl011::new( 1946 id.clone(), 1947 interrupt_group, 1948 serial_writer, 1949 self.timestamp, 1950 versioned_state_from_id(self.snapshot.as_ref(), id.as_str()) 1951 .map_err(DeviceManagerError::RestoreGetState)?, 1952 ))); 1953 1954 self.bus_devices 1955 .push(Arc::clone(&serial) as Arc<Mutex<dyn BusDevice>>); 1956 1957 let addr = arch::layout::LEGACY_SERIAL_MAPPED_IO_START; 1958 1959 self.address_manager 1960 .mmio_bus 1961 .insert(serial.clone(), addr.0, MMIO_LEN) 1962 .map_err(DeviceManagerError::BusError)?; 1963 1964 self.id_to_dev_info.insert( 1965 (DeviceType::Serial, DeviceType::Serial.to_string()), 1966 MmioDeviceInfo { 1967 addr: addr.0, 1968 len: MMIO_LEN, 1969 irq: serial_irq, 1970 }, 1971 ); 1972 1973 self.cmdline_additions 1974 .push(format!("earlycon=pl011,mmio,0x{:08x}", addr.0)); 1975 1976 // Fill the device tree with a new node. In case of restore, we 1977 // know there is nothing to do, so we can simply override the 1978 // existing entry. 1979 self.device_tree 1980 .lock() 1981 .unwrap() 1982 .insert(id.clone(), device_node!(id, serial)); 1983 1984 Ok(serial) 1985 } 1986 1987 fn modify_mode<F: FnOnce(&mut termios)>( 1988 &mut self, 1989 fd: RawFd, 1990 f: F, 1991 ) -> vmm_sys_util::errno::Result<()> { 1992 // SAFETY: safe because we check the return value of isatty. 1993 if unsafe { isatty(fd) } != 1 { 1994 return Ok(()); 1995 } 1996 1997 // SAFETY: The following pair are safe because termios gets totally overwritten by tcgetattr 1998 // and we check the return result. 1999 let mut termios: termios = unsafe { zeroed() }; 2000 // SAFETY: see above 2001 let ret = unsafe { tcgetattr(fd, &mut termios as *mut _) }; 2002 if ret < 0 { 2003 return vmm_sys_util::errno::errno_result(); 2004 } 2005 let mut original_termios_opt = self.original_termios_opt.lock().unwrap(); 2006 if original_termios_opt.is_none() { 2007 *original_termios_opt = Some(termios); 2008 } 2009 f(&mut termios); 2010 // SAFETY: Safe because the syscall will only read the extent of termios and we check 2011 // the return result. 2012 let ret = unsafe { tcsetattr(fd, TCSANOW, &termios as *const _) }; 2013 if ret < 0 { 2014 return vmm_sys_util::errno::errno_result(); 2015 } 2016 2017 Ok(()) 2018 } 2019 2020 fn set_raw_mode(&mut self, f: &dyn AsRawFd) -> vmm_sys_util::errno::Result<()> { 2021 // SAFETY: FFI call. Variable t is guaranteed to be a valid termios from modify_mode. 2022 self.modify_mode(f.as_raw_fd(), |t| unsafe { cfmakeraw(t) }) 2023 } 2024 2025 fn listen_for_sigwinch_on_tty(&mut self, pty_sub: File) -> std::io::Result<()> { 2026 let seccomp_filter = get_seccomp_filter( 2027 &self.seccomp_action, 2028 Thread::PtyForeground, 2029 self.hypervisor_type, 2030 ) 2031 .unwrap(); 2032 2033 self.console_resize_pipe = 2034 Some(Arc::new(start_sigwinch_listener(seccomp_filter, pty_sub)?)); 2035 2036 Ok(()) 2037 } 2038 2039 fn add_virtio_console_device( 2040 &mut self, 2041 virtio_devices: &mut Vec<MetaVirtioDevice>, 2042 console_pty: Option<PtyPair>, 2043 resize_pipe: Option<File>, 2044 ) -> DeviceManagerResult<Option<Arc<virtio_devices::ConsoleResizer>>> { 2045 let console_config = self.config.lock().unwrap().console.clone(); 2046 let endpoint = match console_config.mode { 2047 ConsoleOutputMode::File => { 2048 let file = File::create(console_config.file.as_ref().unwrap()) 2049 .map_err(DeviceManagerError::ConsoleOutputFileOpen)?; 2050 Endpoint::File(file) 2051 } 2052 ConsoleOutputMode::Pty => { 2053 if let Some(pty) = console_pty { 2054 self.config.lock().unwrap().console.file = Some(pty.path.clone()); 2055 let file = pty.main.try_clone().unwrap(); 2056 self.console_pty = Some(Arc::new(Mutex::new(pty))); 2057 self.console_resize_pipe = resize_pipe.map(Arc::new); 2058 Endpoint::PtyPair(file.try_clone().unwrap(), file) 2059 } else { 2060 let (main, sub, path) = 2061 create_pty().map_err(DeviceManagerError::ConsolePtyOpen)?; 2062 self.set_raw_mode(&sub) 2063 .map_err(DeviceManagerError::SetPtyRaw)?; 2064 self.config.lock().unwrap().console.file = Some(path.clone()); 2065 let file = main.try_clone().unwrap(); 2066 assert!(resize_pipe.is_none()); 2067 self.listen_for_sigwinch_on_tty(sub).unwrap(); 2068 self.console_pty = Some(Arc::new(Mutex::new(PtyPair { main, path }))); 2069 Endpoint::PtyPair(file.try_clone().unwrap(), file) 2070 } 2071 } 2072 ConsoleOutputMode::Tty => { 2073 // Duplicating the file descriptors like this is needed as otherwise 2074 // they will be closed on a reboot and the numbers reused 2075 2076 // SAFETY: FFI call to dup. Trivially safe. 2077 let stdout = unsafe { libc::dup(libc::STDOUT_FILENO) }; 2078 if stdout == -1 { 2079 return vmm_sys_util::errno::errno_result().map_err(DeviceManagerError::DupFd); 2080 } 2081 // SAFETY: stdout is valid and owned solely by us. 2082 let stdout = unsafe { File::from_raw_fd(stdout) }; 2083 2084 // Make sure stdout is in raw mode, if it's a terminal. 2085 let _ = self.set_raw_mode(&stdout); 2086 2087 // SAFETY: FFI call. Trivially safe. 2088 if unsafe { libc::isatty(libc::STDOUT_FILENO) } == 1 { 2089 self.listen_for_sigwinch_on_tty(stdout.try_clone().unwrap()) 2090 .unwrap(); 2091 } 2092 2093 // If an interactive TTY then we can accept input 2094 // SAFETY: FFI call. Trivially safe. 2095 if unsafe { libc::isatty(libc::STDIN_FILENO) == 1 } { 2096 // SAFETY: FFI call to dup. Trivially safe. 2097 let stdin = unsafe { libc::dup(libc::STDIN_FILENO) }; 2098 if stdin == -1 { 2099 return vmm_sys_util::errno::errno_result() 2100 .map_err(DeviceManagerError::DupFd); 2101 } 2102 // SAFETY: stdin is valid and owned solely by us. 2103 let stdin = unsafe { File::from_raw_fd(stdin) }; 2104 2105 Endpoint::FilePair(stdout, stdin) 2106 } else { 2107 Endpoint::File(stdout) 2108 } 2109 } 2110 ConsoleOutputMode::Socket => { 2111 return Err(DeviceManagerError::NoSocketOptionSupportForConsoleDevice); 2112 } 2113 ConsoleOutputMode::Null => Endpoint::Null, 2114 ConsoleOutputMode::Off => return Ok(None), 2115 }; 2116 let id = String::from(CONSOLE_DEVICE_NAME); 2117 2118 let (virtio_console_device, console_resizer) = virtio_devices::Console::new( 2119 id.clone(), 2120 endpoint, 2121 self.console_resize_pipe 2122 .as_ref() 2123 .map(|p| p.try_clone().unwrap()), 2124 self.force_iommu | console_config.iommu, 2125 self.seccomp_action.clone(), 2126 self.exit_evt 2127 .try_clone() 2128 .map_err(DeviceManagerError::EventFd)?, 2129 versioned_state_from_id(self.snapshot.as_ref(), id.as_str()) 2130 .map_err(DeviceManagerError::RestoreGetState)?, 2131 ) 2132 .map_err(DeviceManagerError::CreateVirtioConsole)?; 2133 let virtio_console_device = Arc::new(Mutex::new(virtio_console_device)); 2134 virtio_devices.push(MetaVirtioDevice { 2135 virtio_device: Arc::clone(&virtio_console_device) 2136 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2137 iommu: console_config.iommu, 2138 id: id.clone(), 2139 pci_segment: 0, 2140 dma_handler: None, 2141 }); 2142 2143 // Fill the device tree with a new node. In case of restore, we 2144 // know there is nothing to do, so we can simply override the 2145 // existing entry. 2146 self.device_tree 2147 .lock() 2148 .unwrap() 2149 .insert(id.clone(), device_node!(id, virtio_console_device)); 2150 2151 // Only provide a resizer (for SIGWINCH handling) if the console is attached to the TTY 2152 Ok(if matches!(console_config.mode, ConsoleOutputMode::Tty) { 2153 Some(console_resizer) 2154 } else { 2155 None 2156 }) 2157 } 2158 2159 /// Adds all devices that behave like a console with respect to the VM 2160 /// configuration. This includes: 2161 /// - debug-console 2162 /// - serial-console 2163 /// - virtio-console 2164 fn add_console_devices( 2165 &mut self, 2166 interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>, 2167 virtio_devices: &mut Vec<MetaVirtioDevice>, 2168 serial_pty: Option<PtyPair>, 2169 console_pty: Option<PtyPair>, 2170 #[cfg(target_arch = "x86_64")] debug_console_pty: Option<PtyPair>, 2171 #[cfg(not(target_arch = "x86_64"))] _: Option<PtyPair>, 2172 console_resize_pipe: Option<File>, 2173 ) -> DeviceManagerResult<Arc<Console>> { 2174 let serial_config = self.config.lock().unwrap().serial.clone(); 2175 let serial_writer: Option<Box<dyn io::Write + Send>> = match serial_config.mode { 2176 ConsoleOutputMode::File => Some(Box::new( 2177 File::create(serial_config.file.as_ref().unwrap()) 2178 .map_err(DeviceManagerError::SerialOutputFileOpen)?, 2179 )), 2180 ConsoleOutputMode::Pty => { 2181 if let Some(pty) = serial_pty.clone() { 2182 self.config.lock().unwrap().serial.file = Some(pty.path.clone()); 2183 self.serial_pty = Some(Arc::new(Mutex::new(pty))); 2184 } else { 2185 let (main, sub, path) = 2186 create_pty().map_err(DeviceManagerError::SerialPtyOpen)?; 2187 self.set_raw_mode(&sub) 2188 .map_err(DeviceManagerError::SetPtyRaw)?; 2189 self.config.lock().unwrap().serial.file = Some(path.clone()); 2190 self.serial_pty = Some(Arc::new(Mutex::new(PtyPair { main, path }))); 2191 } 2192 None 2193 } 2194 ConsoleOutputMode::Tty => { 2195 let out = stdout(); 2196 let _ = self.set_raw_mode(&out); 2197 Some(Box::new(out)) 2198 } 2199 ConsoleOutputMode::Off | ConsoleOutputMode::Null | ConsoleOutputMode::Socket => None, 2200 }; 2201 if serial_config.mode != ConsoleOutputMode::Off { 2202 let serial = self.add_serial_device(interrupt_manager, serial_writer)?; 2203 self.serial_manager = match serial_config.mode { 2204 ConsoleOutputMode::Pty | ConsoleOutputMode::Tty | ConsoleOutputMode::Socket => { 2205 let serial_manager = SerialManager::new( 2206 serial, 2207 self.serial_pty.clone(), 2208 serial_config.mode, 2209 serial_config.socket, 2210 ) 2211 .map_err(DeviceManagerError::CreateSerialManager)?; 2212 if let Some(mut serial_manager) = serial_manager { 2213 serial_manager 2214 .start_thread( 2215 self.exit_evt 2216 .try_clone() 2217 .map_err(DeviceManagerError::EventFd)?, 2218 ) 2219 .map_err(DeviceManagerError::SpawnSerialManager)?; 2220 Some(Arc::new(serial_manager)) 2221 } else { 2222 None 2223 } 2224 } 2225 _ => None, 2226 }; 2227 } 2228 2229 #[cfg(target_arch = "x86_64")] 2230 { 2231 let debug_console_config = self.config.lock().unwrap().debug_console.clone(); 2232 let debug_console_writer: Option<Box<dyn io::Write + Send>> = match debug_console_config 2233 .mode 2234 { 2235 ConsoleOutputMode::File => Some(Box::new( 2236 File::create(debug_console_config.file.as_ref().unwrap()) 2237 .map_err(DeviceManagerError::DebugconOutputFileOpen)?, 2238 )), 2239 ConsoleOutputMode::Pty => { 2240 if let Some(pty) = debug_console_pty { 2241 self.config.lock().unwrap().debug_console.file = Some(pty.path.clone()); 2242 self.debug_console_pty = Some(Arc::new(Mutex::new(pty))); 2243 } else { 2244 let (main, sub, path) = 2245 create_pty().map_err(DeviceManagerError::DebugconPtyOpen)?; 2246 self.set_raw_mode(&sub) 2247 .map_err(DeviceManagerError::SetPtyRaw)?; 2248 self.config.lock().unwrap().debug_console.file = Some(path.clone()); 2249 self.debug_console_pty = Some(Arc::new(Mutex::new(PtyPair { main, path }))); 2250 } 2251 None 2252 } 2253 ConsoleOutputMode::Tty => { 2254 let out = stdout(); 2255 let _ = self.set_raw_mode(&out); 2256 Some(Box::new(out)) 2257 } 2258 ConsoleOutputMode::Off | ConsoleOutputMode::Null | ConsoleOutputMode::Socket => { 2259 None 2260 } 2261 }; 2262 if let Some(writer) = debug_console_writer { 2263 let _ = self.add_debug_console_device(writer)?; 2264 } 2265 } 2266 2267 let console_resizer = 2268 self.add_virtio_console_device(virtio_devices, console_pty, console_resize_pipe)?; 2269 2270 Ok(Arc::new(Console { console_resizer })) 2271 } 2272 2273 fn add_tpm_device( 2274 &mut self, 2275 tpm_path: PathBuf, 2276 ) -> DeviceManagerResult<Arc<Mutex<devices::tpm::Tpm>>> { 2277 // Create TPM Device 2278 let tpm = devices::tpm::Tpm::new(tpm_path.to_str().unwrap().to_string()).map_err(|e| { 2279 DeviceManagerError::CreateTpmDevice(anyhow!("Failed to create TPM Device : {:?}", e)) 2280 })?; 2281 let tpm = Arc::new(Mutex::new(tpm)); 2282 2283 // Add TPM Device to mmio 2284 self.address_manager 2285 .mmio_bus 2286 .insert( 2287 tpm.clone(), 2288 arch::layout::TPM_START.0, 2289 arch::layout::TPM_SIZE, 2290 ) 2291 .map_err(DeviceManagerError::BusError)?; 2292 2293 Ok(tpm) 2294 } 2295 2296 fn make_virtio_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2297 let mut devices: Vec<MetaVirtioDevice> = Vec::new(); 2298 2299 // Create "standard" virtio devices (net/block/rng) 2300 devices.append(&mut self.make_virtio_block_devices()?); 2301 devices.append(&mut self.make_virtio_net_devices()?); 2302 devices.append(&mut self.make_virtio_rng_devices()?); 2303 2304 // Add virtio-fs if required 2305 devices.append(&mut self.make_virtio_fs_devices()?); 2306 2307 // Add virtio-pmem if required 2308 devices.append(&mut self.make_virtio_pmem_devices()?); 2309 2310 // Add virtio-vsock if required 2311 devices.append(&mut self.make_virtio_vsock_devices()?); 2312 2313 devices.append(&mut self.make_virtio_mem_devices()?); 2314 2315 // Add virtio-balloon if required 2316 devices.append(&mut self.make_virtio_balloon_devices()?); 2317 2318 // Add virtio-watchdog device 2319 devices.append(&mut self.make_virtio_watchdog_devices()?); 2320 2321 // Add vDPA devices if required 2322 devices.append(&mut self.make_vdpa_devices()?); 2323 2324 Ok(devices) 2325 } 2326 2327 // Cache whether aio is supported to avoid checking for very block device 2328 fn aio_is_supported(&mut self) -> bool { 2329 if let Some(supported) = self.aio_supported { 2330 return supported; 2331 } 2332 2333 let supported = block_aio_is_supported(); 2334 self.aio_supported = Some(supported); 2335 supported 2336 } 2337 2338 // Cache whether io_uring is supported to avoid probing for very block device 2339 fn io_uring_is_supported(&mut self) -> bool { 2340 if let Some(supported) = self.io_uring_supported { 2341 return supported; 2342 } 2343 2344 let supported = block_io_uring_is_supported(); 2345 self.io_uring_supported = Some(supported); 2346 supported 2347 } 2348 2349 fn make_virtio_block_device( 2350 &mut self, 2351 disk_cfg: &mut DiskConfig, 2352 ) -> DeviceManagerResult<MetaVirtioDevice> { 2353 let id = if let Some(id) = &disk_cfg.id { 2354 id.clone() 2355 } else { 2356 let id = self.next_device_name(DISK_DEVICE_NAME_PREFIX)?; 2357 disk_cfg.id = Some(id.clone()); 2358 id 2359 }; 2360 2361 info!("Creating virtio-block device: {:?}", disk_cfg); 2362 2363 let snapshot = snapshot_from_id(self.snapshot.as_ref(), id.as_str()); 2364 2365 let (virtio_device, migratable_device) = if disk_cfg.vhost_user { 2366 let socket = disk_cfg.vhost_socket.as_ref().unwrap().clone(); 2367 let vu_cfg = VhostUserConfig { 2368 socket, 2369 num_queues: disk_cfg.num_queues, 2370 queue_size: disk_cfg.queue_size, 2371 }; 2372 let vhost_user_block = Arc::new(Mutex::new( 2373 match virtio_devices::vhost_user::Blk::new( 2374 id.clone(), 2375 vu_cfg, 2376 self.seccomp_action.clone(), 2377 self.exit_evt 2378 .try_clone() 2379 .map_err(DeviceManagerError::EventFd)?, 2380 self.force_iommu, 2381 snapshot 2382 .map(|s| s.to_versioned_state()) 2383 .transpose() 2384 .map_err(DeviceManagerError::RestoreGetState)?, 2385 ) { 2386 Ok(vub_device) => vub_device, 2387 Err(e) => { 2388 return Err(DeviceManagerError::CreateVhostUserBlk(e)); 2389 } 2390 }, 2391 )); 2392 2393 ( 2394 Arc::clone(&vhost_user_block) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2395 vhost_user_block as Arc<Mutex<dyn Migratable>>, 2396 ) 2397 } else { 2398 let mut options = OpenOptions::new(); 2399 options.read(true); 2400 options.write(!disk_cfg.readonly); 2401 if disk_cfg.direct { 2402 options.custom_flags(libc::O_DIRECT); 2403 } 2404 // Open block device path 2405 let mut file: File = options 2406 .open( 2407 disk_cfg 2408 .path 2409 .as_ref() 2410 .ok_or(DeviceManagerError::NoDiskPath)? 2411 .clone(), 2412 ) 2413 .map_err(DeviceManagerError::Disk)?; 2414 let image_type = 2415 detect_image_type(&mut file).map_err(DeviceManagerError::DetectImageType)?; 2416 2417 let image = match image_type { 2418 ImageType::FixedVhd => { 2419 // Use asynchronous backend relying on io_uring if the 2420 // syscalls are supported. 2421 if cfg!(feature = "io_uring") 2422 && !disk_cfg.disable_io_uring 2423 && self.io_uring_is_supported() 2424 { 2425 info!("Using asynchronous fixed VHD disk file (io_uring)"); 2426 2427 #[cfg(not(feature = "io_uring"))] 2428 unreachable!("Checked in if statement above"); 2429 #[cfg(feature = "io_uring")] 2430 { 2431 Box::new( 2432 FixedVhdDiskAsync::new(file) 2433 .map_err(DeviceManagerError::CreateFixedVhdDiskAsync)?, 2434 ) as Box<dyn DiskFile> 2435 } 2436 } else { 2437 info!("Using synchronous fixed VHD disk file"); 2438 Box::new( 2439 FixedVhdDiskSync::new(file) 2440 .map_err(DeviceManagerError::CreateFixedVhdDiskSync)?, 2441 ) as Box<dyn DiskFile> 2442 } 2443 } 2444 ImageType::Raw => { 2445 // Use asynchronous backend relying on io_uring if the 2446 // syscalls are supported. 2447 if cfg!(feature = "io_uring") 2448 && !disk_cfg.disable_io_uring 2449 && self.io_uring_is_supported() 2450 { 2451 info!("Using asynchronous RAW disk file (io_uring)"); 2452 2453 #[cfg(not(feature = "io_uring"))] 2454 unreachable!("Checked in if statement above"); 2455 #[cfg(feature = "io_uring")] 2456 { 2457 Box::new(RawFileDisk::new(file)) as Box<dyn DiskFile> 2458 } 2459 } else if !disk_cfg.disable_aio && self.aio_is_supported() { 2460 info!("Using asynchronous RAW disk file (aio)"); 2461 Box::new(RawFileDiskAio::new(file)) as Box<dyn DiskFile> 2462 } else { 2463 info!("Using synchronous RAW disk file"); 2464 Box::new(RawFileDiskSync::new(file)) as Box<dyn DiskFile> 2465 } 2466 } 2467 ImageType::Qcow2 => { 2468 info!("Using synchronous QCOW disk file"); 2469 Box::new( 2470 QcowDiskSync::new(file, disk_cfg.direct) 2471 .map_err(DeviceManagerError::CreateQcowDiskSync)?, 2472 ) as Box<dyn DiskFile> 2473 } 2474 ImageType::Vhdx => { 2475 info!("Using synchronous VHDX disk file"); 2476 Box::new( 2477 VhdxDiskSync::new(file) 2478 .map_err(DeviceManagerError::CreateFixedVhdxDiskSync)?, 2479 ) as Box<dyn DiskFile> 2480 } 2481 }; 2482 2483 let rate_limit_group = 2484 if let Some(rate_limiter_cfg) = disk_cfg.rate_limiter_config.as_ref() { 2485 // Create an anonymous RateLimiterGroup that is dropped when the Disk 2486 // is dropped. 2487 let bw = rate_limiter_cfg.bandwidth.unwrap_or_default(); 2488 let ops = rate_limiter_cfg.ops.unwrap_or_default(); 2489 let mut rate_limit_group = RateLimiterGroup::new( 2490 disk_cfg.id.as_ref().unwrap(), 2491 bw.size, 2492 bw.one_time_burst.unwrap_or(0), 2493 bw.refill_time, 2494 ops.size, 2495 ops.one_time_burst.unwrap_or(0), 2496 ops.refill_time, 2497 ) 2498 .map_err(DeviceManagerError::RateLimiterGroupCreate)?; 2499 2500 rate_limit_group 2501 .start_thread( 2502 self.exit_evt 2503 .try_clone() 2504 .map_err(DeviceManagerError::EventFd)?, 2505 ) 2506 .unwrap(); 2507 2508 Some(Arc::new(rate_limit_group)) 2509 } else if let Some(rate_limit_group) = disk_cfg.rate_limit_group.as_ref() { 2510 self.rate_limit_groups.get(rate_limit_group).cloned() 2511 } else { 2512 None 2513 }; 2514 2515 let virtio_block = Arc::new(Mutex::new( 2516 virtio_devices::Block::new( 2517 id.clone(), 2518 image, 2519 disk_cfg 2520 .path 2521 .as_ref() 2522 .ok_or(DeviceManagerError::NoDiskPath)? 2523 .clone(), 2524 disk_cfg.readonly, 2525 self.force_iommu | disk_cfg.iommu, 2526 disk_cfg.num_queues, 2527 disk_cfg.queue_size, 2528 disk_cfg.serial.clone(), 2529 self.seccomp_action.clone(), 2530 rate_limit_group, 2531 self.exit_evt 2532 .try_clone() 2533 .map_err(DeviceManagerError::EventFd)?, 2534 snapshot 2535 .map(|s| s.to_versioned_state()) 2536 .transpose() 2537 .map_err(DeviceManagerError::RestoreGetState)?, 2538 ) 2539 .map_err(DeviceManagerError::CreateVirtioBlock)?, 2540 )); 2541 2542 ( 2543 Arc::clone(&virtio_block) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2544 virtio_block as Arc<Mutex<dyn Migratable>>, 2545 ) 2546 }; 2547 2548 // Fill the device tree with a new node. In case of restore, we 2549 // know there is nothing to do, so we can simply override the 2550 // existing entry. 2551 self.device_tree 2552 .lock() 2553 .unwrap() 2554 .insert(id.clone(), device_node!(id, migratable_device)); 2555 2556 Ok(MetaVirtioDevice { 2557 virtio_device, 2558 iommu: disk_cfg.iommu, 2559 id, 2560 pci_segment: disk_cfg.pci_segment, 2561 dma_handler: None, 2562 }) 2563 } 2564 2565 fn make_virtio_block_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2566 let mut devices = Vec::new(); 2567 2568 let mut block_devices = self.config.lock().unwrap().disks.clone(); 2569 if let Some(disk_list_cfg) = &mut block_devices { 2570 for disk_cfg in disk_list_cfg.iter_mut() { 2571 devices.push(self.make_virtio_block_device(disk_cfg)?); 2572 } 2573 } 2574 self.config.lock().unwrap().disks = block_devices; 2575 2576 Ok(devices) 2577 } 2578 2579 fn make_virtio_net_device( 2580 &mut self, 2581 net_cfg: &mut NetConfig, 2582 ) -> DeviceManagerResult<MetaVirtioDevice> { 2583 let id = if let Some(id) = &net_cfg.id { 2584 id.clone() 2585 } else { 2586 let id = self.next_device_name(NET_DEVICE_NAME_PREFIX)?; 2587 net_cfg.id = Some(id.clone()); 2588 id 2589 }; 2590 info!("Creating virtio-net device: {:?}", net_cfg); 2591 2592 let snapshot = snapshot_from_id(self.snapshot.as_ref(), id.as_str()); 2593 2594 let (virtio_device, migratable_device) = if net_cfg.vhost_user { 2595 let socket = net_cfg.vhost_socket.as_ref().unwrap().clone(); 2596 let vu_cfg = VhostUserConfig { 2597 socket, 2598 num_queues: net_cfg.num_queues, 2599 queue_size: net_cfg.queue_size, 2600 }; 2601 let server = match net_cfg.vhost_mode { 2602 VhostMode::Client => false, 2603 VhostMode::Server => true, 2604 }; 2605 let vhost_user_net = Arc::new(Mutex::new( 2606 match virtio_devices::vhost_user::Net::new( 2607 id.clone(), 2608 net_cfg.mac, 2609 net_cfg.mtu, 2610 vu_cfg, 2611 server, 2612 self.seccomp_action.clone(), 2613 self.exit_evt 2614 .try_clone() 2615 .map_err(DeviceManagerError::EventFd)?, 2616 self.force_iommu, 2617 snapshot 2618 .map(|s| s.to_versioned_state()) 2619 .transpose() 2620 .map_err(DeviceManagerError::RestoreGetState)?, 2621 net_cfg.offload_tso, 2622 net_cfg.offload_ufo, 2623 net_cfg.offload_csum, 2624 ) { 2625 Ok(vun_device) => vun_device, 2626 Err(e) => { 2627 return Err(DeviceManagerError::CreateVhostUserNet(e)); 2628 } 2629 }, 2630 )); 2631 2632 ( 2633 Arc::clone(&vhost_user_net) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2634 vhost_user_net as Arc<Mutex<dyn Migratable>>, 2635 ) 2636 } else { 2637 let state = snapshot 2638 .map(|s| s.to_versioned_state()) 2639 .transpose() 2640 .map_err(DeviceManagerError::RestoreGetState)?; 2641 2642 let virtio_net = if let Some(ref tap_if_name) = net_cfg.tap { 2643 Arc::new(Mutex::new( 2644 virtio_devices::Net::new( 2645 id.clone(), 2646 Some(tap_if_name), 2647 Some(net_cfg.ip), 2648 Some(net_cfg.mask), 2649 Some(net_cfg.mac), 2650 &mut net_cfg.host_mac, 2651 net_cfg.mtu, 2652 self.force_iommu | net_cfg.iommu, 2653 net_cfg.num_queues, 2654 net_cfg.queue_size, 2655 self.seccomp_action.clone(), 2656 net_cfg.rate_limiter_config, 2657 self.exit_evt 2658 .try_clone() 2659 .map_err(DeviceManagerError::EventFd)?, 2660 state, 2661 net_cfg.offload_tso, 2662 net_cfg.offload_ufo, 2663 net_cfg.offload_csum, 2664 ) 2665 .map_err(DeviceManagerError::CreateVirtioNet)?, 2666 )) 2667 } else if let Some(fds) = &net_cfg.fds { 2668 let net = virtio_devices::Net::from_tap_fds( 2669 id.clone(), 2670 fds, 2671 Some(net_cfg.mac), 2672 net_cfg.mtu, 2673 self.force_iommu | net_cfg.iommu, 2674 net_cfg.queue_size, 2675 self.seccomp_action.clone(), 2676 net_cfg.rate_limiter_config, 2677 self.exit_evt 2678 .try_clone() 2679 .map_err(DeviceManagerError::EventFd)?, 2680 state, 2681 net_cfg.offload_tso, 2682 net_cfg.offload_ufo, 2683 net_cfg.offload_csum, 2684 ) 2685 .map_err(DeviceManagerError::CreateVirtioNet)?; 2686 2687 // SAFETY: 'fds' are valid because TAP devices are created successfully 2688 unsafe { 2689 self.config.lock().unwrap().add_preserved_fds(fds.clone()); 2690 } 2691 2692 Arc::new(Mutex::new(net)) 2693 } else { 2694 Arc::new(Mutex::new( 2695 virtio_devices::Net::new( 2696 id.clone(), 2697 None, 2698 Some(net_cfg.ip), 2699 Some(net_cfg.mask), 2700 Some(net_cfg.mac), 2701 &mut net_cfg.host_mac, 2702 net_cfg.mtu, 2703 self.force_iommu | net_cfg.iommu, 2704 net_cfg.num_queues, 2705 net_cfg.queue_size, 2706 self.seccomp_action.clone(), 2707 net_cfg.rate_limiter_config, 2708 self.exit_evt 2709 .try_clone() 2710 .map_err(DeviceManagerError::EventFd)?, 2711 state, 2712 net_cfg.offload_tso, 2713 net_cfg.offload_ufo, 2714 net_cfg.offload_csum, 2715 ) 2716 .map_err(DeviceManagerError::CreateVirtioNet)?, 2717 )) 2718 }; 2719 2720 ( 2721 Arc::clone(&virtio_net) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2722 virtio_net as Arc<Mutex<dyn Migratable>>, 2723 ) 2724 }; 2725 2726 // Fill the device tree with a new node. In case of restore, we 2727 // know there is nothing to do, so we can simply override the 2728 // existing entry. 2729 self.device_tree 2730 .lock() 2731 .unwrap() 2732 .insert(id.clone(), device_node!(id, migratable_device)); 2733 2734 Ok(MetaVirtioDevice { 2735 virtio_device, 2736 iommu: net_cfg.iommu, 2737 id, 2738 pci_segment: net_cfg.pci_segment, 2739 dma_handler: None, 2740 }) 2741 } 2742 2743 /// Add virto-net and vhost-user-net devices 2744 fn make_virtio_net_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2745 let mut devices = Vec::new(); 2746 let mut net_devices = self.config.lock().unwrap().net.clone(); 2747 if let Some(net_list_cfg) = &mut net_devices { 2748 for net_cfg in net_list_cfg.iter_mut() { 2749 devices.push(self.make_virtio_net_device(net_cfg)?); 2750 } 2751 } 2752 self.config.lock().unwrap().net = net_devices; 2753 2754 Ok(devices) 2755 } 2756 2757 fn make_virtio_rng_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2758 let mut devices = Vec::new(); 2759 2760 // Add virtio-rng if required 2761 let rng_config = self.config.lock().unwrap().rng.clone(); 2762 if let Some(rng_path) = rng_config.src.to_str() { 2763 info!("Creating virtio-rng device: {:?}", rng_config); 2764 let id = String::from(RNG_DEVICE_NAME); 2765 2766 let virtio_rng_device = Arc::new(Mutex::new( 2767 virtio_devices::Rng::new( 2768 id.clone(), 2769 rng_path, 2770 self.force_iommu | rng_config.iommu, 2771 self.seccomp_action.clone(), 2772 self.exit_evt 2773 .try_clone() 2774 .map_err(DeviceManagerError::EventFd)?, 2775 versioned_state_from_id(self.snapshot.as_ref(), id.as_str()) 2776 .map_err(DeviceManagerError::RestoreGetState)?, 2777 ) 2778 .map_err(DeviceManagerError::CreateVirtioRng)?, 2779 )); 2780 devices.push(MetaVirtioDevice { 2781 virtio_device: Arc::clone(&virtio_rng_device) 2782 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2783 iommu: rng_config.iommu, 2784 id: id.clone(), 2785 pci_segment: 0, 2786 dma_handler: None, 2787 }); 2788 2789 // Fill the device tree with a new node. In case of restore, we 2790 // know there is nothing to do, so we can simply override the 2791 // existing entry. 2792 self.device_tree 2793 .lock() 2794 .unwrap() 2795 .insert(id.clone(), device_node!(id, virtio_rng_device)); 2796 } 2797 2798 Ok(devices) 2799 } 2800 2801 fn make_virtio_fs_device( 2802 &mut self, 2803 fs_cfg: &mut FsConfig, 2804 ) -> DeviceManagerResult<MetaVirtioDevice> { 2805 let id = if let Some(id) = &fs_cfg.id { 2806 id.clone() 2807 } else { 2808 let id = self.next_device_name(FS_DEVICE_NAME_PREFIX)?; 2809 fs_cfg.id = Some(id.clone()); 2810 id 2811 }; 2812 2813 info!("Creating virtio-fs device: {:?}", fs_cfg); 2814 2815 let mut node = device_node!(id); 2816 2817 if let Some(fs_socket) = fs_cfg.socket.to_str() { 2818 let virtio_fs_device = Arc::new(Mutex::new( 2819 virtio_devices::vhost_user::Fs::new( 2820 id.clone(), 2821 fs_socket, 2822 &fs_cfg.tag, 2823 fs_cfg.num_queues, 2824 fs_cfg.queue_size, 2825 None, 2826 self.seccomp_action.clone(), 2827 self.exit_evt 2828 .try_clone() 2829 .map_err(DeviceManagerError::EventFd)?, 2830 self.force_iommu, 2831 versioned_state_from_id(self.snapshot.as_ref(), id.as_str()) 2832 .map_err(DeviceManagerError::RestoreGetState)?, 2833 ) 2834 .map_err(DeviceManagerError::CreateVirtioFs)?, 2835 )); 2836 2837 // Update the device tree with the migratable device. 2838 node.migratable = Some(Arc::clone(&virtio_fs_device) as Arc<Mutex<dyn Migratable>>); 2839 self.device_tree.lock().unwrap().insert(id.clone(), node); 2840 2841 Ok(MetaVirtioDevice { 2842 virtio_device: Arc::clone(&virtio_fs_device) 2843 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2844 iommu: false, 2845 id, 2846 pci_segment: fs_cfg.pci_segment, 2847 dma_handler: None, 2848 }) 2849 } else { 2850 Err(DeviceManagerError::NoVirtioFsSock) 2851 } 2852 } 2853 2854 fn make_virtio_fs_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2855 let mut devices = Vec::new(); 2856 2857 let mut fs_devices = self.config.lock().unwrap().fs.clone(); 2858 if let Some(fs_list_cfg) = &mut fs_devices { 2859 for fs_cfg in fs_list_cfg.iter_mut() { 2860 devices.push(self.make_virtio_fs_device(fs_cfg)?); 2861 } 2862 } 2863 self.config.lock().unwrap().fs = fs_devices; 2864 2865 Ok(devices) 2866 } 2867 2868 fn make_virtio_pmem_device( 2869 &mut self, 2870 pmem_cfg: &mut PmemConfig, 2871 ) -> DeviceManagerResult<MetaVirtioDevice> { 2872 let id = if let Some(id) = &pmem_cfg.id { 2873 id.clone() 2874 } else { 2875 let id = self.next_device_name(PMEM_DEVICE_NAME_PREFIX)?; 2876 pmem_cfg.id = Some(id.clone()); 2877 id 2878 }; 2879 2880 info!("Creating virtio-pmem device: {:?}", pmem_cfg); 2881 2882 let mut node = device_node!(id); 2883 2884 // Look for the id in the device tree. If it can be found, that means 2885 // the device is being restored, otherwise it's created from scratch. 2886 let region_range = if let Some(node) = self.device_tree.lock().unwrap().get(&id) { 2887 info!("Restoring virtio-pmem {} resources", id); 2888 2889 let mut region_range: Option<(u64, u64)> = None; 2890 for resource in node.resources.iter() { 2891 match resource { 2892 Resource::MmioAddressRange { base, size } => { 2893 if region_range.is_some() { 2894 return Err(DeviceManagerError::ResourceAlreadyExists); 2895 } 2896 2897 region_range = Some((*base, *size)); 2898 } 2899 _ => { 2900 error!("Unexpected resource {:?} for {}", resource, id); 2901 } 2902 } 2903 } 2904 2905 if region_range.is_none() { 2906 return Err(DeviceManagerError::MissingVirtioPmemResources); 2907 } 2908 2909 region_range 2910 } else { 2911 None 2912 }; 2913 2914 let (custom_flags, set_len) = if pmem_cfg.file.is_dir() { 2915 if pmem_cfg.size.is_none() { 2916 return Err(DeviceManagerError::PmemWithDirectorySizeMissing); 2917 } 2918 (O_TMPFILE, true) 2919 } else { 2920 (0, false) 2921 }; 2922 2923 let mut file = OpenOptions::new() 2924 .read(true) 2925 .write(!pmem_cfg.discard_writes) 2926 .custom_flags(custom_flags) 2927 .open(&pmem_cfg.file) 2928 .map_err(DeviceManagerError::PmemFileOpen)?; 2929 2930 let size = if let Some(size) = pmem_cfg.size { 2931 if set_len { 2932 file.set_len(size) 2933 .map_err(DeviceManagerError::PmemFileSetLen)?; 2934 } 2935 size 2936 } else { 2937 file.seek(SeekFrom::End(0)) 2938 .map_err(DeviceManagerError::PmemFileSetLen)? 2939 }; 2940 2941 if size % 0x20_0000 != 0 { 2942 return Err(DeviceManagerError::PmemSizeNotAligned); 2943 } 2944 2945 let (region_base, region_size) = if let Some((base, size)) = region_range { 2946 // The memory needs to be 2MiB aligned in order to support 2947 // hugepages. 2948 self.pci_segments[pmem_cfg.pci_segment as usize] 2949 .mem64_allocator 2950 .lock() 2951 .unwrap() 2952 .allocate( 2953 Some(GuestAddress(base)), 2954 size as GuestUsize, 2955 Some(0x0020_0000), 2956 ) 2957 .ok_or(DeviceManagerError::PmemRangeAllocation)?; 2958 2959 (base, size) 2960 } else { 2961 // The memory needs to be 2MiB aligned in order to support 2962 // hugepages. 2963 let base = self.pci_segments[pmem_cfg.pci_segment as usize] 2964 .mem64_allocator 2965 .lock() 2966 .unwrap() 2967 .allocate(None, size as GuestUsize, Some(0x0020_0000)) 2968 .ok_or(DeviceManagerError::PmemRangeAllocation)?; 2969 2970 (base.raw_value(), size) 2971 }; 2972 2973 let cloned_file = file.try_clone().map_err(DeviceManagerError::CloneFile)?; 2974 let mmap_region = MmapRegion::build( 2975 Some(FileOffset::new(cloned_file, 0)), 2976 region_size as usize, 2977 PROT_READ | PROT_WRITE, 2978 MAP_NORESERVE 2979 | if pmem_cfg.discard_writes { 2980 MAP_PRIVATE 2981 } else { 2982 MAP_SHARED 2983 }, 2984 ) 2985 .map_err(DeviceManagerError::NewMmapRegion)?; 2986 let host_addr: u64 = mmap_region.as_ptr() as u64; 2987 2988 let mem_slot = self 2989 .memory_manager 2990 .lock() 2991 .unwrap() 2992 .create_userspace_mapping(region_base, region_size, host_addr, false, false, false) 2993 .map_err(DeviceManagerError::MemoryManager)?; 2994 2995 let mapping = virtio_devices::UserspaceMapping { 2996 host_addr, 2997 mem_slot, 2998 addr: GuestAddress(region_base), 2999 len: region_size, 3000 mergeable: false, 3001 }; 3002 3003 let virtio_pmem_device = Arc::new(Mutex::new( 3004 virtio_devices::Pmem::new( 3005 id.clone(), 3006 file, 3007 GuestAddress(region_base), 3008 mapping, 3009 mmap_region, 3010 self.force_iommu | pmem_cfg.iommu, 3011 self.seccomp_action.clone(), 3012 self.exit_evt 3013 .try_clone() 3014 .map_err(DeviceManagerError::EventFd)?, 3015 versioned_state_from_id(self.snapshot.as_ref(), id.as_str()) 3016 .map_err(DeviceManagerError::RestoreGetState)?, 3017 ) 3018 .map_err(DeviceManagerError::CreateVirtioPmem)?, 3019 )); 3020 3021 // Update the device tree with correct resource information and with 3022 // the migratable device. 3023 node.resources.push(Resource::MmioAddressRange { 3024 base: region_base, 3025 size: region_size, 3026 }); 3027 node.migratable = Some(Arc::clone(&virtio_pmem_device) as Arc<Mutex<dyn Migratable>>); 3028 self.device_tree.lock().unwrap().insert(id.clone(), node); 3029 3030 Ok(MetaVirtioDevice { 3031 virtio_device: Arc::clone(&virtio_pmem_device) 3032 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 3033 iommu: pmem_cfg.iommu, 3034 id, 3035 pci_segment: pmem_cfg.pci_segment, 3036 dma_handler: None, 3037 }) 3038 } 3039 3040 fn make_virtio_pmem_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 3041 let mut devices = Vec::new(); 3042 // Add virtio-pmem if required 3043 let mut pmem_devices = self.config.lock().unwrap().pmem.clone(); 3044 if let Some(pmem_list_cfg) = &mut pmem_devices { 3045 for pmem_cfg in pmem_list_cfg.iter_mut() { 3046 devices.push(self.make_virtio_pmem_device(pmem_cfg)?); 3047 } 3048 } 3049 self.config.lock().unwrap().pmem = pmem_devices; 3050 3051 Ok(devices) 3052 } 3053 3054 fn make_virtio_vsock_device( 3055 &mut self, 3056 vsock_cfg: &mut VsockConfig, 3057 ) -> DeviceManagerResult<MetaVirtioDevice> { 3058 let id = if let Some(id) = &vsock_cfg.id { 3059 id.clone() 3060 } else { 3061 let id = self.next_device_name(VSOCK_DEVICE_NAME_PREFIX)?; 3062 vsock_cfg.id = Some(id.clone()); 3063 id 3064 }; 3065 3066 info!("Creating virtio-vsock device: {:?}", vsock_cfg); 3067 3068 let socket_path = vsock_cfg 3069 .socket 3070 .to_str() 3071 .ok_or(DeviceManagerError::CreateVsockConvertPath)?; 3072 let backend = 3073 virtio_devices::vsock::VsockUnixBackend::new(vsock_cfg.cid, socket_path.to_string()) 3074 .map_err(DeviceManagerError::CreateVsockBackend)?; 3075 3076 let vsock_device = Arc::new(Mutex::new( 3077 virtio_devices::Vsock::new( 3078 id.clone(), 3079 vsock_cfg.cid, 3080 vsock_cfg.socket.clone(), 3081 backend, 3082 self.force_iommu | vsock_cfg.iommu, 3083 self.seccomp_action.clone(), 3084 self.exit_evt 3085 .try_clone() 3086 .map_err(DeviceManagerError::EventFd)?, 3087 versioned_state_from_id(self.snapshot.as_ref(), id.as_str()) 3088 .map_err(DeviceManagerError::RestoreGetState)?, 3089 ) 3090 .map_err(DeviceManagerError::CreateVirtioVsock)?, 3091 )); 3092 3093 // Fill the device tree with a new node. In case of restore, we 3094 // know there is nothing to do, so we can simply override the 3095 // existing entry. 3096 self.device_tree 3097 .lock() 3098 .unwrap() 3099 .insert(id.clone(), device_node!(id, vsock_device)); 3100 3101 Ok(MetaVirtioDevice { 3102 virtio_device: Arc::clone(&vsock_device) 3103 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 3104 iommu: vsock_cfg.iommu, 3105 id, 3106 pci_segment: vsock_cfg.pci_segment, 3107 dma_handler: None, 3108 }) 3109 } 3110 3111 fn make_virtio_vsock_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 3112 let mut devices = Vec::new(); 3113 3114 let mut vsock = self.config.lock().unwrap().vsock.clone(); 3115 if let Some(ref mut vsock_cfg) = &mut vsock { 3116 devices.push(self.make_virtio_vsock_device(vsock_cfg)?); 3117 } 3118 self.config.lock().unwrap().vsock = vsock; 3119 3120 Ok(devices) 3121 } 3122 3123 fn make_virtio_mem_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 3124 let mut devices = Vec::new(); 3125 3126 let mm = self.memory_manager.clone(); 3127 let mut mm = mm.lock().unwrap(); 3128 for (memory_zone_id, memory_zone) in mm.memory_zones_mut().iter_mut() { 3129 if let Some(virtio_mem_zone) = memory_zone.virtio_mem_zone_mut() { 3130 info!("Creating virtio-mem device: id = {}", memory_zone_id); 3131 3132 let node_id = numa_node_id_from_memory_zone_id(&self.numa_nodes, memory_zone_id) 3133 .map(|i| i as u16); 3134 3135 let virtio_mem_device = Arc::new(Mutex::new( 3136 virtio_devices::Mem::new( 3137 memory_zone_id.clone(), 3138 virtio_mem_zone.region(), 3139 self.seccomp_action.clone(), 3140 node_id, 3141 virtio_mem_zone.hotplugged_size(), 3142 virtio_mem_zone.hugepages(), 3143 self.exit_evt 3144 .try_clone() 3145 .map_err(DeviceManagerError::EventFd)?, 3146 virtio_mem_zone.blocks_state().clone(), 3147 versioned_state_from_id(self.snapshot.as_ref(), memory_zone_id.as_str()) 3148 .map_err(DeviceManagerError::RestoreGetState)?, 3149 ) 3150 .map_err(DeviceManagerError::CreateVirtioMem)?, 3151 )); 3152 3153 // Update the virtio-mem zone so that it has a handle onto the 3154 // virtio-mem device, which will be used for triggering a resize 3155 // if needed. 3156 virtio_mem_zone.set_virtio_device(Arc::clone(&virtio_mem_device)); 3157 3158 self.virtio_mem_devices.push(Arc::clone(&virtio_mem_device)); 3159 3160 devices.push(MetaVirtioDevice { 3161 virtio_device: Arc::clone(&virtio_mem_device) 3162 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 3163 iommu: false, 3164 id: memory_zone_id.clone(), 3165 pci_segment: 0, 3166 dma_handler: None, 3167 }); 3168 3169 // Fill the device tree with a new node. In case of restore, we 3170 // know there is nothing to do, so we can simply override the 3171 // existing entry. 3172 self.device_tree.lock().unwrap().insert( 3173 memory_zone_id.clone(), 3174 device_node!(memory_zone_id, virtio_mem_device), 3175 ); 3176 } 3177 } 3178 3179 Ok(devices) 3180 } 3181 3182 fn make_virtio_balloon_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 3183 let mut devices = Vec::new(); 3184 3185 if let Some(balloon_config) = &self.config.lock().unwrap().balloon { 3186 let id = String::from(BALLOON_DEVICE_NAME); 3187 info!("Creating virtio-balloon device: id = {}", id); 3188 3189 let virtio_balloon_device = Arc::new(Mutex::new( 3190 virtio_devices::Balloon::new( 3191 id.clone(), 3192 balloon_config.size, 3193 balloon_config.deflate_on_oom, 3194 balloon_config.free_page_reporting, 3195 self.seccomp_action.clone(), 3196 self.exit_evt 3197 .try_clone() 3198 .map_err(DeviceManagerError::EventFd)?, 3199 versioned_state_from_id(self.snapshot.as_ref(), id.as_str()) 3200 .map_err(DeviceManagerError::RestoreGetState)?, 3201 ) 3202 .map_err(DeviceManagerError::CreateVirtioBalloon)?, 3203 )); 3204 3205 self.balloon = Some(virtio_balloon_device.clone()); 3206 3207 devices.push(MetaVirtioDevice { 3208 virtio_device: Arc::clone(&virtio_balloon_device) 3209 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 3210 iommu: false, 3211 id: id.clone(), 3212 pci_segment: 0, 3213 dma_handler: None, 3214 }); 3215 3216 self.device_tree 3217 .lock() 3218 .unwrap() 3219 .insert(id.clone(), device_node!(id, virtio_balloon_device)); 3220 } 3221 3222 Ok(devices) 3223 } 3224 3225 fn make_virtio_watchdog_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 3226 let mut devices = Vec::new(); 3227 3228 if !self.config.lock().unwrap().watchdog { 3229 return Ok(devices); 3230 } 3231 3232 let id = String::from(WATCHDOG_DEVICE_NAME); 3233 info!("Creating virtio-watchdog device: id = {}", id); 3234 3235 let virtio_watchdog_device = Arc::new(Mutex::new( 3236 virtio_devices::Watchdog::new( 3237 id.clone(), 3238 self.reset_evt.try_clone().unwrap(), 3239 self.seccomp_action.clone(), 3240 self.exit_evt 3241 .try_clone() 3242 .map_err(DeviceManagerError::EventFd)?, 3243 versioned_state_from_id(self.snapshot.as_ref(), id.as_str()) 3244 .map_err(DeviceManagerError::RestoreGetState)?, 3245 ) 3246 .map_err(DeviceManagerError::CreateVirtioWatchdog)?, 3247 )); 3248 devices.push(MetaVirtioDevice { 3249 virtio_device: Arc::clone(&virtio_watchdog_device) 3250 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 3251 iommu: false, 3252 id: id.clone(), 3253 pci_segment: 0, 3254 dma_handler: None, 3255 }); 3256 3257 self.device_tree 3258 .lock() 3259 .unwrap() 3260 .insert(id.clone(), device_node!(id, virtio_watchdog_device)); 3261 3262 Ok(devices) 3263 } 3264 3265 fn make_vdpa_device( 3266 &mut self, 3267 vdpa_cfg: &mut VdpaConfig, 3268 ) -> DeviceManagerResult<MetaVirtioDevice> { 3269 let id = if let Some(id) = &vdpa_cfg.id { 3270 id.clone() 3271 } else { 3272 let id = self.next_device_name(VDPA_DEVICE_NAME_PREFIX)?; 3273 vdpa_cfg.id = Some(id.clone()); 3274 id 3275 }; 3276 3277 info!("Creating vDPA device: {:?}", vdpa_cfg); 3278 3279 let device_path = vdpa_cfg 3280 .path 3281 .to_str() 3282 .ok_or(DeviceManagerError::CreateVdpaConvertPath)?; 3283 3284 let vdpa_device = Arc::new(Mutex::new( 3285 virtio_devices::Vdpa::new( 3286 id.clone(), 3287 device_path, 3288 self.memory_manager.lock().unwrap().guest_memory(), 3289 vdpa_cfg.num_queues as u16, 3290 versioned_state_from_id(self.snapshot.as_ref(), id.as_str()) 3291 .map_err(DeviceManagerError::RestoreGetState)?, 3292 ) 3293 .map_err(DeviceManagerError::CreateVdpa)?, 3294 )); 3295 3296 // Create the DMA handler that is required by the vDPA device 3297 let vdpa_mapping = Arc::new(VdpaDmaMapping::new( 3298 Arc::clone(&vdpa_device), 3299 Arc::new(self.memory_manager.lock().unwrap().guest_memory()), 3300 )); 3301 3302 self.device_tree 3303 .lock() 3304 .unwrap() 3305 .insert(id.clone(), device_node!(id, vdpa_device)); 3306 3307 Ok(MetaVirtioDevice { 3308 virtio_device: vdpa_device as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 3309 iommu: vdpa_cfg.iommu, 3310 id, 3311 pci_segment: vdpa_cfg.pci_segment, 3312 dma_handler: Some(vdpa_mapping), 3313 }) 3314 } 3315 3316 fn make_vdpa_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 3317 let mut devices = Vec::new(); 3318 // Add vdpa if required 3319 let mut vdpa_devices = self.config.lock().unwrap().vdpa.clone(); 3320 if let Some(vdpa_list_cfg) = &mut vdpa_devices { 3321 for vdpa_cfg in vdpa_list_cfg.iter_mut() { 3322 devices.push(self.make_vdpa_device(vdpa_cfg)?); 3323 } 3324 } 3325 self.config.lock().unwrap().vdpa = vdpa_devices; 3326 3327 Ok(devices) 3328 } 3329 3330 fn next_device_name(&mut self, prefix: &str) -> DeviceManagerResult<String> { 3331 let start_id = self.device_id_cnt; 3332 loop { 3333 // Generate the temporary name. 3334 let name = format!("{}{}", prefix, self.device_id_cnt); 3335 // Increment the counter. 3336 self.device_id_cnt += Wrapping(1); 3337 // Check if the name is already in use. 3338 if !self.boot_id_list.contains(&name) 3339 && !self.device_tree.lock().unwrap().contains_key(&name) 3340 { 3341 return Ok(name); 3342 } 3343 3344 if self.device_id_cnt == start_id { 3345 // We went through a full loop and there's nothing else we can 3346 // do. 3347 break; 3348 } 3349 } 3350 Err(DeviceManagerError::NoAvailableDeviceName) 3351 } 3352 3353 fn add_passthrough_device( 3354 &mut self, 3355 device_cfg: &mut DeviceConfig, 3356 ) -> DeviceManagerResult<(PciBdf, String)> { 3357 // If the passthrough device has not been created yet, it is created 3358 // here and stored in the DeviceManager structure for future needs. 3359 if self.passthrough_device.is_none() { 3360 self.passthrough_device = Some( 3361 self.address_manager 3362 .vm 3363 .create_passthrough_device() 3364 .map_err(|e| DeviceManagerError::CreatePassthroughDevice(e.into()))?, 3365 ); 3366 } 3367 3368 self.add_vfio_device(device_cfg) 3369 } 3370 3371 fn create_vfio_container(&self) -> DeviceManagerResult<Arc<VfioContainer>> { 3372 let passthrough_device = self 3373 .passthrough_device 3374 .as_ref() 3375 .ok_or(DeviceManagerError::NoDevicePassthroughSupport)?; 3376 3377 let dup = passthrough_device 3378 .try_clone() 3379 .map_err(DeviceManagerError::VfioCreate)?; 3380 3381 Ok(Arc::new( 3382 VfioContainer::new(Some(Arc::new(dup))).map_err(DeviceManagerError::VfioCreate)?, 3383 )) 3384 } 3385 3386 fn add_vfio_device( 3387 &mut self, 3388 device_cfg: &mut DeviceConfig, 3389 ) -> DeviceManagerResult<(PciBdf, String)> { 3390 let vfio_name = if let Some(id) = &device_cfg.id { 3391 id.clone() 3392 } else { 3393 let id = self.next_device_name(VFIO_DEVICE_NAME_PREFIX)?; 3394 device_cfg.id = Some(id.clone()); 3395 id 3396 }; 3397 3398 let (pci_segment_id, pci_device_bdf, resources) = 3399 self.pci_resources(&vfio_name, device_cfg.pci_segment)?; 3400 3401 let mut needs_dma_mapping = false; 3402 3403 // Here we create a new VFIO container for two reasons. Either this is 3404 // the first VFIO device, meaning we need a new VFIO container, which 3405 // will be shared with other VFIO devices. Or the new VFIO device is 3406 // attached to a vIOMMU, meaning we must create a dedicated VFIO 3407 // container. In the vIOMMU use case, we can't let all devices under 3408 // the same VFIO container since we couldn't map/unmap memory for each 3409 // device. That's simply because the map/unmap operations happen at the 3410 // VFIO container level. 3411 let vfio_container = if device_cfg.iommu { 3412 let vfio_container = self.create_vfio_container()?; 3413 3414 let vfio_mapping = Arc::new(VfioDmaMapping::new( 3415 Arc::clone(&vfio_container), 3416 Arc::new(self.memory_manager.lock().unwrap().guest_memory()), 3417 )); 3418 3419 if let Some(iommu) = &self.iommu_device { 3420 iommu 3421 .lock() 3422 .unwrap() 3423 .add_external_mapping(pci_device_bdf.into(), vfio_mapping); 3424 } else { 3425 return Err(DeviceManagerError::MissingVirtualIommu); 3426 } 3427 3428 vfio_container 3429 } else if let Some(vfio_container) = &self.vfio_container { 3430 Arc::clone(vfio_container) 3431 } else { 3432 let vfio_container = self.create_vfio_container()?; 3433 needs_dma_mapping = true; 3434 self.vfio_container = Some(Arc::clone(&vfio_container)); 3435 3436 vfio_container 3437 }; 3438 3439 let vfio_device = VfioDevice::new(&device_cfg.path, Arc::clone(&vfio_container)) 3440 .map_err(DeviceManagerError::VfioCreate)?; 3441 3442 if needs_dma_mapping { 3443 // Register DMA mapping in IOMMU. 3444 // Do not register virtio-mem regions, as they are handled directly by 3445 // virtio-mem device itself. 3446 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() { 3447 for region in zone.regions() { 3448 vfio_container 3449 .vfio_dma_map( 3450 region.start_addr().raw_value(), 3451 region.len(), 3452 region.as_ptr() as u64, 3453 ) 3454 .map_err(DeviceManagerError::VfioDmaMap)?; 3455 } 3456 } 3457 3458 let vfio_mapping = Arc::new(VfioDmaMapping::new( 3459 Arc::clone(&vfio_container), 3460 Arc::new(self.memory_manager.lock().unwrap().guest_memory()), 3461 )); 3462 3463 for virtio_mem_device in self.virtio_mem_devices.iter() { 3464 virtio_mem_device 3465 .lock() 3466 .unwrap() 3467 .add_dma_mapping_handler( 3468 VirtioMemMappingSource::Container, 3469 vfio_mapping.clone(), 3470 ) 3471 .map_err(DeviceManagerError::AddDmaMappingHandlerVirtioMem)?; 3472 } 3473 } 3474 3475 let legacy_interrupt_group = 3476 if let Some(legacy_interrupt_manager) = &self.legacy_interrupt_manager { 3477 Some( 3478 legacy_interrupt_manager 3479 .create_group(LegacyIrqGroupConfig { 3480 irq: self.pci_segments[pci_segment_id as usize].pci_irq_slots 3481 [pci_device_bdf.device() as usize] 3482 as InterruptIndex, 3483 }) 3484 .map_err(DeviceManagerError::CreateInterruptGroup)?, 3485 ) 3486 } else { 3487 None 3488 }; 3489 3490 let memory_manager = self.memory_manager.clone(); 3491 3492 let vfio_pci_device = VfioPciDevice::new( 3493 vfio_name.clone(), 3494 &self.address_manager.vm, 3495 vfio_device, 3496 vfio_container, 3497 self.msi_interrupt_manager.clone(), 3498 legacy_interrupt_group, 3499 device_cfg.iommu, 3500 pci_device_bdf, 3501 Arc::new(move || memory_manager.lock().unwrap().allocate_memory_slot()), 3502 vm_migration::snapshot_from_id(self.snapshot.as_ref(), vfio_name.as_str()), 3503 ) 3504 .map_err(DeviceManagerError::VfioPciCreate)?; 3505 3506 let vfio_pci_device = Arc::new(Mutex::new(vfio_pci_device)); 3507 3508 let new_resources = self.add_pci_device( 3509 vfio_pci_device.clone(), 3510 vfio_pci_device.clone(), 3511 pci_segment_id, 3512 pci_device_bdf, 3513 resources, 3514 )?; 3515 3516 vfio_pci_device 3517 .lock() 3518 .unwrap() 3519 .map_mmio_regions() 3520 .map_err(DeviceManagerError::VfioMapRegion)?; 3521 3522 let mut node = device_node!(vfio_name, vfio_pci_device); 3523 3524 // Update the device tree with correct resource information. 3525 node.resources = new_resources; 3526 node.pci_bdf = Some(pci_device_bdf); 3527 node.pci_device_handle = Some(PciDeviceHandle::Vfio(vfio_pci_device)); 3528 3529 self.device_tree 3530 .lock() 3531 .unwrap() 3532 .insert(vfio_name.clone(), node); 3533 3534 Ok((pci_device_bdf, vfio_name)) 3535 } 3536 3537 fn add_pci_device( 3538 &mut self, 3539 bus_device: Arc<Mutex<dyn BusDevice>>, 3540 pci_device: Arc<Mutex<dyn PciDevice>>, 3541 segment_id: u16, 3542 bdf: PciBdf, 3543 resources: Option<Vec<Resource>>, 3544 ) -> DeviceManagerResult<Vec<Resource>> { 3545 let bars = pci_device 3546 .lock() 3547 .unwrap() 3548 .allocate_bars( 3549 &self.address_manager.allocator, 3550 &mut self.pci_segments[segment_id as usize] 3551 .mem32_allocator 3552 .lock() 3553 .unwrap(), 3554 &mut self.pci_segments[segment_id as usize] 3555 .mem64_allocator 3556 .lock() 3557 .unwrap(), 3558 resources, 3559 ) 3560 .map_err(DeviceManagerError::AllocateBars)?; 3561 3562 let mut pci_bus = self.pci_segments[segment_id as usize] 3563 .pci_bus 3564 .lock() 3565 .unwrap(); 3566 3567 pci_bus 3568 .add_device(bdf.device() as u32, pci_device) 3569 .map_err(DeviceManagerError::AddPciDevice)?; 3570 3571 self.bus_devices.push(Arc::clone(&bus_device)); 3572 3573 pci_bus 3574 .register_mapping( 3575 bus_device, 3576 #[cfg(target_arch = "x86_64")] 3577 self.address_manager.io_bus.as_ref(), 3578 self.address_manager.mmio_bus.as_ref(), 3579 bars.clone(), 3580 ) 3581 .map_err(DeviceManagerError::AddPciDevice)?; 3582 3583 let mut new_resources = Vec::new(); 3584 for bar in bars { 3585 new_resources.push(Resource::PciBar { 3586 index: bar.idx(), 3587 base: bar.addr(), 3588 size: bar.size(), 3589 type_: bar.region_type().into(), 3590 prefetchable: bar.prefetchable().into(), 3591 }); 3592 } 3593 3594 Ok(new_resources) 3595 } 3596 3597 fn add_vfio_devices(&mut self) -> DeviceManagerResult<Vec<PciBdf>> { 3598 let mut iommu_attached_device_ids = Vec::new(); 3599 let mut devices = self.config.lock().unwrap().devices.clone(); 3600 3601 if let Some(device_list_cfg) = &mut devices { 3602 for device_cfg in device_list_cfg.iter_mut() { 3603 let (device_id, _) = self.add_passthrough_device(device_cfg)?; 3604 if device_cfg.iommu && self.iommu_device.is_some() { 3605 iommu_attached_device_ids.push(device_id); 3606 } 3607 } 3608 } 3609 3610 // Update the list of devices 3611 self.config.lock().unwrap().devices = devices; 3612 3613 Ok(iommu_attached_device_ids) 3614 } 3615 3616 fn add_vfio_user_device( 3617 &mut self, 3618 device_cfg: &mut UserDeviceConfig, 3619 ) -> DeviceManagerResult<(PciBdf, String)> { 3620 let vfio_user_name = if let Some(id) = &device_cfg.id { 3621 id.clone() 3622 } else { 3623 let id = self.next_device_name(VFIO_USER_DEVICE_NAME_PREFIX)?; 3624 device_cfg.id = Some(id.clone()); 3625 id 3626 }; 3627 3628 let (pci_segment_id, pci_device_bdf, resources) = 3629 self.pci_resources(&vfio_user_name, device_cfg.pci_segment)?; 3630 3631 let legacy_interrupt_group = 3632 if let Some(legacy_interrupt_manager) = &self.legacy_interrupt_manager { 3633 Some( 3634 legacy_interrupt_manager 3635 .create_group(LegacyIrqGroupConfig { 3636 irq: self.pci_segments[pci_segment_id as usize].pci_irq_slots 3637 [pci_device_bdf.device() as usize] 3638 as InterruptIndex, 3639 }) 3640 .map_err(DeviceManagerError::CreateInterruptGroup)?, 3641 ) 3642 } else { 3643 None 3644 }; 3645 3646 let client = Arc::new(Mutex::new( 3647 vfio_user::Client::new(&device_cfg.socket) 3648 .map_err(DeviceManagerError::VfioUserCreateClient)?, 3649 )); 3650 3651 let memory_manager = self.memory_manager.clone(); 3652 3653 let mut vfio_user_pci_device = VfioUserPciDevice::new( 3654 vfio_user_name.clone(), 3655 &self.address_manager.vm, 3656 client.clone(), 3657 self.msi_interrupt_manager.clone(), 3658 legacy_interrupt_group, 3659 pci_device_bdf, 3660 Arc::new(move || memory_manager.lock().unwrap().allocate_memory_slot()), 3661 vm_migration::snapshot_from_id(self.snapshot.as_ref(), vfio_user_name.as_str()), 3662 ) 3663 .map_err(DeviceManagerError::VfioUserCreate)?; 3664 3665 let memory = self.memory_manager.lock().unwrap().guest_memory(); 3666 let vfio_user_mapping = Arc::new(VfioUserDmaMapping::new(client, Arc::new(memory))); 3667 for virtio_mem_device in self.virtio_mem_devices.iter() { 3668 virtio_mem_device 3669 .lock() 3670 .unwrap() 3671 .add_dma_mapping_handler( 3672 VirtioMemMappingSource::Device(pci_device_bdf.into()), 3673 vfio_user_mapping.clone(), 3674 ) 3675 .map_err(DeviceManagerError::AddDmaMappingHandlerVirtioMem)?; 3676 } 3677 3678 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() { 3679 for region in zone.regions() { 3680 vfio_user_pci_device 3681 .dma_map(region) 3682 .map_err(DeviceManagerError::VfioUserDmaMap)?; 3683 } 3684 } 3685 3686 let vfio_user_pci_device = Arc::new(Mutex::new(vfio_user_pci_device)); 3687 3688 let new_resources = self.add_pci_device( 3689 vfio_user_pci_device.clone(), 3690 vfio_user_pci_device.clone(), 3691 pci_segment_id, 3692 pci_device_bdf, 3693 resources, 3694 )?; 3695 3696 // Note it is required to call 'add_pci_device()' in advance to have the list of 3697 // mmio regions provisioned correctly 3698 vfio_user_pci_device 3699 .lock() 3700 .unwrap() 3701 .map_mmio_regions() 3702 .map_err(DeviceManagerError::VfioUserMapRegion)?; 3703 3704 let mut node = device_node!(vfio_user_name, vfio_user_pci_device); 3705 3706 // Update the device tree with correct resource information. 3707 node.resources = new_resources; 3708 node.pci_bdf = Some(pci_device_bdf); 3709 node.pci_device_handle = Some(PciDeviceHandle::VfioUser(vfio_user_pci_device)); 3710 3711 self.device_tree 3712 .lock() 3713 .unwrap() 3714 .insert(vfio_user_name.clone(), node); 3715 3716 Ok((pci_device_bdf, vfio_user_name)) 3717 } 3718 3719 fn add_user_devices(&mut self) -> DeviceManagerResult<Vec<PciBdf>> { 3720 let mut user_devices = self.config.lock().unwrap().user_devices.clone(); 3721 3722 if let Some(device_list_cfg) = &mut user_devices { 3723 for device_cfg in device_list_cfg.iter_mut() { 3724 let (_device_id, _id) = self.add_vfio_user_device(device_cfg)?; 3725 } 3726 } 3727 3728 // Update the list of devices 3729 self.config.lock().unwrap().user_devices = user_devices; 3730 3731 Ok(vec![]) 3732 } 3733 3734 fn add_virtio_pci_device( 3735 &mut self, 3736 virtio_device: Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 3737 iommu_mapping: &Option<Arc<IommuMapping>>, 3738 virtio_device_id: String, 3739 pci_segment_id: u16, 3740 dma_handler: Option<Arc<dyn ExternalDmaMapping>>, 3741 ) -> DeviceManagerResult<PciBdf> { 3742 let id = format!("{VIRTIO_PCI_DEVICE_NAME_PREFIX}-{virtio_device_id}"); 3743 3744 // Add the new virtio-pci node to the device tree. 3745 let mut node = device_node!(id); 3746 node.children = vec![virtio_device_id.clone()]; 3747 3748 let (pci_segment_id, pci_device_bdf, resources) = 3749 self.pci_resources(&id, pci_segment_id)?; 3750 3751 // Update the existing virtio node by setting the parent. 3752 if let Some(node) = self.device_tree.lock().unwrap().get_mut(&virtio_device_id) { 3753 node.parent = Some(id.clone()); 3754 } else { 3755 return Err(DeviceManagerError::MissingNode); 3756 } 3757 3758 // Allows support for one MSI-X vector per queue. It also adds 1 3759 // as we need to take into account the dedicated vector to notify 3760 // about a virtio config change. 3761 let msix_num = (virtio_device.lock().unwrap().queue_max_sizes().len() + 1) as u16; 3762 3763 // Create the AccessPlatform trait from the implementation IommuMapping. 3764 // This will provide address translation for any virtio device sitting 3765 // behind a vIOMMU. 3766 let access_platform: Option<Arc<dyn AccessPlatform>> = if let Some(mapping) = iommu_mapping 3767 { 3768 Some(Arc::new(AccessPlatformMapping::new( 3769 pci_device_bdf.into(), 3770 mapping.clone(), 3771 ))) 3772 } else { 3773 None 3774 }; 3775 3776 let memory = self.memory_manager.lock().unwrap().guest_memory(); 3777 3778 // Map DMA ranges if a DMA handler is available and if the device is 3779 // not attached to a virtual IOMMU. 3780 if let Some(dma_handler) = &dma_handler { 3781 if iommu_mapping.is_some() { 3782 if let Some(iommu) = &self.iommu_device { 3783 iommu 3784 .lock() 3785 .unwrap() 3786 .add_external_mapping(pci_device_bdf.into(), dma_handler.clone()); 3787 } else { 3788 return Err(DeviceManagerError::MissingVirtualIommu); 3789 } 3790 } else { 3791 // Let every virtio-mem device handle the DMA map/unmap through the 3792 // DMA handler provided. 3793 for virtio_mem_device in self.virtio_mem_devices.iter() { 3794 virtio_mem_device 3795 .lock() 3796 .unwrap() 3797 .add_dma_mapping_handler( 3798 VirtioMemMappingSource::Device(pci_device_bdf.into()), 3799 dma_handler.clone(), 3800 ) 3801 .map_err(DeviceManagerError::AddDmaMappingHandlerVirtioMem)?; 3802 } 3803 3804 // Do not register virtio-mem regions, as they are handled directly by 3805 // virtio-mem devices. 3806 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() { 3807 for region in zone.regions() { 3808 let gpa = region.start_addr().0; 3809 let size = region.len(); 3810 dma_handler 3811 .map(gpa, gpa, size) 3812 .map_err(DeviceManagerError::VirtioDmaMap)?; 3813 } 3814 } 3815 } 3816 } 3817 3818 let device_type = virtio_device.lock().unwrap().device_type(); 3819 let virtio_pci_device = Arc::new(Mutex::new( 3820 VirtioPciDevice::new( 3821 id.clone(), 3822 memory, 3823 virtio_device, 3824 msix_num, 3825 access_platform, 3826 &self.msi_interrupt_manager, 3827 pci_device_bdf.into(), 3828 self.activate_evt 3829 .try_clone() 3830 .map_err(DeviceManagerError::EventFd)?, 3831 // All device types *except* virtio block devices should be allocated a 64-bit bar 3832 // The block devices should be given a 32-bit BAR so that they are easily accessible 3833 // to firmware without requiring excessive identity mapping. 3834 // The exception being if not on the default PCI segment. 3835 pci_segment_id > 0 || device_type != VirtioDeviceType::Block as u32, 3836 dma_handler, 3837 self.pending_activations.clone(), 3838 vm_migration::snapshot_from_id(self.snapshot.as_ref(), id.as_str()), 3839 ) 3840 .map_err(DeviceManagerError::VirtioDevice)?, 3841 )); 3842 3843 let new_resources = self.add_pci_device( 3844 virtio_pci_device.clone(), 3845 virtio_pci_device.clone(), 3846 pci_segment_id, 3847 pci_device_bdf, 3848 resources, 3849 )?; 3850 3851 let bar_addr = virtio_pci_device.lock().unwrap().config_bar_addr(); 3852 for (event, addr) in virtio_pci_device.lock().unwrap().ioeventfds(bar_addr) { 3853 let io_addr = IoEventAddress::Mmio(addr); 3854 self.address_manager 3855 .vm 3856 .register_ioevent(event, &io_addr, None) 3857 .map_err(|e| DeviceManagerError::RegisterIoevent(e.into()))?; 3858 } 3859 3860 // Update the device tree with correct resource information. 3861 node.resources = new_resources; 3862 node.migratable = Some(Arc::clone(&virtio_pci_device) as Arc<Mutex<dyn Migratable>>); 3863 node.pci_bdf = Some(pci_device_bdf); 3864 node.pci_device_handle = Some(PciDeviceHandle::Virtio(virtio_pci_device)); 3865 self.device_tree.lock().unwrap().insert(id, node); 3866 3867 Ok(pci_device_bdf) 3868 } 3869 3870 fn add_pvpanic_device( 3871 &mut self, 3872 ) -> DeviceManagerResult<Option<Arc<Mutex<devices::PvPanicDevice>>>> { 3873 let id = String::from(PVPANIC_DEVICE_NAME); 3874 let pci_segment_id = 0x0_u16; 3875 3876 info!("Creating pvpanic device {}", id); 3877 3878 let (pci_segment_id, pci_device_bdf, resources) = 3879 self.pci_resources(&id, pci_segment_id)?; 3880 3881 let snapshot = snapshot_from_id(self.snapshot.as_ref(), id.as_str()); 3882 3883 let pvpanic_device = devices::PvPanicDevice::new(id.clone(), snapshot) 3884 .map_err(DeviceManagerError::PvPanicCreate)?; 3885 3886 let pvpanic_device = Arc::new(Mutex::new(pvpanic_device)); 3887 3888 let new_resources = self.add_pci_device( 3889 pvpanic_device.clone(), 3890 pvpanic_device.clone(), 3891 pci_segment_id, 3892 pci_device_bdf, 3893 resources, 3894 )?; 3895 3896 let mut node = device_node!(id, pvpanic_device); 3897 3898 node.resources = new_resources; 3899 node.pci_bdf = Some(pci_device_bdf); 3900 node.pci_device_handle = None; 3901 3902 self.device_tree.lock().unwrap().insert(id, node); 3903 3904 Ok(Some(pvpanic_device)) 3905 } 3906 3907 fn pci_resources( 3908 &self, 3909 id: &str, 3910 pci_segment_id: u16, 3911 ) -> DeviceManagerResult<(u16, PciBdf, Option<Vec<Resource>>)> { 3912 // Look for the id in the device tree. If it can be found, that means 3913 // the device is being restored, otherwise it's created from scratch. 3914 Ok( 3915 if let Some(node) = self.device_tree.lock().unwrap().get(id) { 3916 info!("Restoring virtio-pci {} resources", id); 3917 let pci_device_bdf: PciBdf = node 3918 .pci_bdf 3919 .ok_or(DeviceManagerError::MissingDeviceNodePciBdf)?; 3920 let pci_segment_id = pci_device_bdf.segment(); 3921 3922 self.pci_segments[pci_segment_id as usize] 3923 .pci_bus 3924 .lock() 3925 .unwrap() 3926 .get_device_id(pci_device_bdf.device() as usize) 3927 .map_err(DeviceManagerError::GetPciDeviceId)?; 3928 3929 (pci_segment_id, pci_device_bdf, Some(node.resources.clone())) 3930 } else { 3931 let pci_device_bdf = 3932 self.pci_segments[pci_segment_id as usize].next_device_bdf()?; 3933 3934 (pci_segment_id, pci_device_bdf, None) 3935 }, 3936 ) 3937 } 3938 3939 #[cfg(target_arch = "x86_64")] 3940 pub fn io_bus(&self) -> &Arc<Bus> { 3941 &self.address_manager.io_bus 3942 } 3943 3944 pub fn mmio_bus(&self) -> &Arc<Bus> { 3945 &self.address_manager.mmio_bus 3946 } 3947 3948 pub fn allocator(&self) -> &Arc<Mutex<SystemAllocator>> { 3949 &self.address_manager.allocator 3950 } 3951 3952 pub fn interrupt_controller(&self) -> Option<Arc<Mutex<dyn InterruptController>>> { 3953 self.interrupt_controller 3954 .as_ref() 3955 .map(|ic| ic.clone() as Arc<Mutex<dyn InterruptController>>) 3956 } 3957 3958 pub(crate) fn pci_segments(&self) -> &Vec<PciSegment> { 3959 &self.pci_segments 3960 } 3961 3962 pub fn console(&self) -> &Arc<Console> { 3963 &self.console 3964 } 3965 3966 #[cfg(target_arch = "aarch64")] 3967 pub fn cmdline_additions(&self) -> &[String] { 3968 self.cmdline_additions.as_slice() 3969 } 3970 3971 pub fn update_memory(&self, new_region: &Arc<GuestRegionMmap>) -> DeviceManagerResult<()> { 3972 for handle in self.virtio_devices.iter() { 3973 handle 3974 .virtio_device 3975 .lock() 3976 .unwrap() 3977 .add_memory_region(new_region) 3978 .map_err(DeviceManagerError::UpdateMemoryForVirtioDevice)?; 3979 3980 if let Some(dma_handler) = &handle.dma_handler { 3981 if !handle.iommu { 3982 let gpa = new_region.start_addr().0; 3983 let size = new_region.len(); 3984 dma_handler 3985 .map(gpa, gpa, size) 3986 .map_err(DeviceManagerError::VirtioDmaMap)?; 3987 } 3988 } 3989 } 3990 3991 // Take care of updating the memory for VFIO PCI devices. 3992 if let Some(vfio_container) = &self.vfio_container { 3993 vfio_container 3994 .vfio_dma_map( 3995 new_region.start_addr().raw_value(), 3996 new_region.len(), 3997 new_region.as_ptr() as u64, 3998 ) 3999 .map_err(DeviceManagerError::UpdateMemoryForVfioPciDevice)?; 4000 } 4001 4002 // Take care of updating the memory for vfio-user devices. 4003 { 4004 let device_tree = self.device_tree.lock().unwrap(); 4005 for pci_device_node in device_tree.pci_devices() { 4006 if let PciDeviceHandle::VfioUser(vfio_user_pci_device) = pci_device_node 4007 .pci_device_handle 4008 .as_ref() 4009 .ok_or(DeviceManagerError::MissingPciDevice)? 4010 { 4011 vfio_user_pci_device 4012 .lock() 4013 .unwrap() 4014 .dma_map(new_region) 4015 .map_err(DeviceManagerError::UpdateMemoryForVfioUserPciDevice)?; 4016 } 4017 } 4018 } 4019 4020 Ok(()) 4021 } 4022 4023 pub fn activate_virtio_devices(&self) -> DeviceManagerResult<()> { 4024 for mut activator in self.pending_activations.lock().unwrap().drain(..) { 4025 activator 4026 .activate() 4027 .map_err(DeviceManagerError::VirtioActivate)?; 4028 } 4029 Ok(()) 4030 } 4031 4032 pub fn notify_hotplug( 4033 &self, 4034 _notification_type: AcpiNotificationFlags, 4035 ) -> DeviceManagerResult<()> { 4036 return self 4037 .ged_notification_device 4038 .as_ref() 4039 .unwrap() 4040 .lock() 4041 .unwrap() 4042 .notify(_notification_type) 4043 .map_err(DeviceManagerError::HotPlugNotification); 4044 } 4045 4046 pub fn add_device( 4047 &mut self, 4048 device_cfg: &mut DeviceConfig, 4049 ) -> DeviceManagerResult<PciDeviceInfo> { 4050 self.validate_identifier(&device_cfg.id)?; 4051 4052 if device_cfg.iommu && !self.is_iommu_segment(device_cfg.pci_segment) { 4053 return Err(DeviceManagerError::InvalidIommuHotplug); 4054 } 4055 4056 let (bdf, device_name) = self.add_passthrough_device(device_cfg)?; 4057 4058 // Update the PCIU bitmap 4059 self.pci_segments[device_cfg.pci_segment as usize].pci_devices_up |= 1 << bdf.device(); 4060 4061 Ok(PciDeviceInfo { 4062 id: device_name, 4063 bdf, 4064 }) 4065 } 4066 4067 pub fn add_user_device( 4068 &mut self, 4069 device_cfg: &mut UserDeviceConfig, 4070 ) -> DeviceManagerResult<PciDeviceInfo> { 4071 self.validate_identifier(&device_cfg.id)?; 4072 4073 let (bdf, device_name) = self.add_vfio_user_device(device_cfg)?; 4074 4075 // Update the PCIU bitmap 4076 self.pci_segments[device_cfg.pci_segment as usize].pci_devices_up |= 1 << bdf.device(); 4077 4078 Ok(PciDeviceInfo { 4079 id: device_name, 4080 bdf, 4081 }) 4082 } 4083 4084 pub fn remove_device(&mut self, id: String) -> DeviceManagerResult<()> { 4085 // The node can be directly a PCI node in case the 'id' refers to a 4086 // VFIO device or a virtio-pci one. 4087 // In case the 'id' refers to a virtio device, we must find the PCI 4088 // node by looking at the parent. 4089 let device_tree = self.device_tree.lock().unwrap(); 4090 let node = device_tree 4091 .get(&id) 4092 .ok_or(DeviceManagerError::UnknownDeviceId(id))?; 4093 4094 let pci_device_node = if node.pci_bdf.is_some() && node.pci_device_handle.is_some() { 4095 node 4096 } else { 4097 let parent = node 4098 .parent 4099 .as_ref() 4100 .ok_or(DeviceManagerError::MissingNode)?; 4101 device_tree 4102 .get(parent) 4103 .ok_or(DeviceManagerError::MissingNode)? 4104 }; 4105 4106 let pci_device_bdf: PciBdf = pci_device_node 4107 .pci_bdf 4108 .ok_or(DeviceManagerError::MissingDeviceNodePciBdf)?; 4109 let pci_segment_id = pci_device_bdf.segment(); 4110 4111 let pci_device_handle = pci_device_node 4112 .pci_device_handle 4113 .as_ref() 4114 .ok_or(DeviceManagerError::MissingPciDevice)?; 4115 #[allow(irrefutable_let_patterns)] 4116 if let PciDeviceHandle::Virtio(virtio_pci_device) = pci_device_handle { 4117 let device_type = VirtioDeviceType::from( 4118 virtio_pci_device 4119 .lock() 4120 .unwrap() 4121 .virtio_device() 4122 .lock() 4123 .unwrap() 4124 .device_type(), 4125 ); 4126 match device_type { 4127 VirtioDeviceType::Net 4128 | VirtioDeviceType::Block 4129 | VirtioDeviceType::Pmem 4130 | VirtioDeviceType::Fs 4131 | VirtioDeviceType::Vsock => {} 4132 _ => return Err(DeviceManagerError::RemovalNotAllowed(device_type)), 4133 } 4134 } 4135 4136 // Update the PCID bitmap 4137 self.pci_segments[pci_segment_id as usize].pci_devices_down |= 1 << pci_device_bdf.device(); 4138 4139 Ok(()) 4140 } 4141 4142 pub fn eject_device(&mut self, pci_segment_id: u16, device_id: u8) -> DeviceManagerResult<()> { 4143 info!( 4144 "Ejecting device_id = {} on segment_id={}", 4145 device_id, pci_segment_id 4146 ); 4147 4148 // Convert the device ID into the corresponding b/d/f. 4149 let pci_device_bdf = PciBdf::new(pci_segment_id, 0, device_id, 0); 4150 4151 // Give the PCI device ID back to the PCI bus. 4152 self.pci_segments[pci_segment_id as usize] 4153 .pci_bus 4154 .lock() 4155 .unwrap() 4156 .put_device_id(device_id as usize) 4157 .map_err(DeviceManagerError::PutPciDeviceId)?; 4158 4159 // Remove the device from the device tree along with its children. 4160 let mut device_tree = self.device_tree.lock().unwrap(); 4161 let pci_device_node = device_tree 4162 .remove_node_by_pci_bdf(pci_device_bdf) 4163 .ok_or(DeviceManagerError::MissingPciDevice)?; 4164 4165 // For VFIO and vfio-user the PCI device id is the id. 4166 // For virtio we overwrite it later as we want the id of the 4167 // underlying device. 4168 let mut id = pci_device_node.id; 4169 let pci_device_handle = pci_device_node 4170 .pci_device_handle 4171 .ok_or(DeviceManagerError::MissingPciDevice)?; 4172 if matches!(pci_device_handle, PciDeviceHandle::Virtio(_)) { 4173 // The virtio-pci device has a single child 4174 if !pci_device_node.children.is_empty() { 4175 assert_eq!(pci_device_node.children.len(), 1); 4176 let child_id = &pci_device_node.children[0]; 4177 id = child_id.clone(); 4178 } 4179 } 4180 for child in pci_device_node.children.iter() { 4181 device_tree.remove(child); 4182 } 4183 4184 let mut iommu_attached = false; 4185 if let Some((_, iommu_attached_devices)) = &self.iommu_attached_devices { 4186 if iommu_attached_devices.contains(&pci_device_bdf) { 4187 iommu_attached = true; 4188 } 4189 } 4190 4191 let (pci_device, bus_device, virtio_device, remove_dma_handler) = match pci_device_handle { 4192 // No need to remove any virtio-mem mapping here as the container outlives all devices 4193 PciDeviceHandle::Vfio(vfio_pci_device) => ( 4194 Arc::clone(&vfio_pci_device) as Arc<Mutex<dyn PciDevice>>, 4195 Arc::clone(&vfio_pci_device) as Arc<Mutex<dyn BusDevice>>, 4196 None as Option<Arc<Mutex<dyn virtio_devices::VirtioDevice>>>, 4197 false, 4198 ), 4199 PciDeviceHandle::Virtio(virtio_pci_device) => { 4200 let dev = virtio_pci_device.lock().unwrap(); 4201 let bar_addr = dev.config_bar_addr(); 4202 for (event, addr) in dev.ioeventfds(bar_addr) { 4203 let io_addr = IoEventAddress::Mmio(addr); 4204 self.address_manager 4205 .vm 4206 .unregister_ioevent(event, &io_addr) 4207 .map_err(|e| DeviceManagerError::UnRegisterIoevent(e.into()))?; 4208 } 4209 4210 if let Some(dma_handler) = dev.dma_handler() { 4211 if !iommu_attached { 4212 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() { 4213 for region in zone.regions() { 4214 let iova = region.start_addr().0; 4215 let size = region.len(); 4216 dma_handler 4217 .unmap(iova, size) 4218 .map_err(DeviceManagerError::VirtioDmaUnmap)?; 4219 } 4220 } 4221 } 4222 } 4223 4224 ( 4225 Arc::clone(&virtio_pci_device) as Arc<Mutex<dyn PciDevice>>, 4226 Arc::clone(&virtio_pci_device) as Arc<Mutex<dyn BusDevice>>, 4227 Some(dev.virtio_device()), 4228 dev.dma_handler().is_some() && !iommu_attached, 4229 ) 4230 } 4231 PciDeviceHandle::VfioUser(vfio_user_pci_device) => { 4232 let mut dev = vfio_user_pci_device.lock().unwrap(); 4233 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() { 4234 for region in zone.regions() { 4235 dev.dma_unmap(region) 4236 .map_err(DeviceManagerError::VfioUserDmaUnmap)?; 4237 } 4238 } 4239 4240 ( 4241 Arc::clone(&vfio_user_pci_device) as Arc<Mutex<dyn PciDevice>>, 4242 Arc::clone(&vfio_user_pci_device) as Arc<Mutex<dyn BusDevice>>, 4243 None as Option<Arc<Mutex<dyn virtio_devices::VirtioDevice>>>, 4244 true, 4245 ) 4246 } 4247 }; 4248 4249 if remove_dma_handler { 4250 for virtio_mem_device in self.virtio_mem_devices.iter() { 4251 virtio_mem_device 4252 .lock() 4253 .unwrap() 4254 .remove_dma_mapping_handler(VirtioMemMappingSource::Device( 4255 pci_device_bdf.into(), 4256 )) 4257 .map_err(DeviceManagerError::RemoveDmaMappingHandlerVirtioMem)?; 4258 } 4259 } 4260 4261 // Free the allocated BARs 4262 pci_device 4263 .lock() 4264 .unwrap() 4265 .free_bars( 4266 &mut self.address_manager.allocator.lock().unwrap(), 4267 &mut self.pci_segments[pci_segment_id as usize] 4268 .mem32_allocator 4269 .lock() 4270 .unwrap(), 4271 &mut self.pci_segments[pci_segment_id as usize] 4272 .mem64_allocator 4273 .lock() 4274 .unwrap(), 4275 ) 4276 .map_err(DeviceManagerError::FreePciBars)?; 4277 4278 // Remove the device from the PCI bus 4279 self.pci_segments[pci_segment_id as usize] 4280 .pci_bus 4281 .lock() 4282 .unwrap() 4283 .remove_by_device(&pci_device) 4284 .map_err(DeviceManagerError::RemoveDeviceFromPciBus)?; 4285 4286 #[cfg(target_arch = "x86_64")] 4287 // Remove the device from the IO bus 4288 self.io_bus() 4289 .remove_by_device(&bus_device) 4290 .map_err(DeviceManagerError::RemoveDeviceFromIoBus)?; 4291 4292 // Remove the device from the MMIO bus 4293 self.mmio_bus() 4294 .remove_by_device(&bus_device) 4295 .map_err(DeviceManagerError::RemoveDeviceFromMmioBus)?; 4296 4297 // Remove the device from the list of BusDevice held by the 4298 // DeviceManager. 4299 self.bus_devices 4300 .retain(|dev| !Arc::ptr_eq(dev, &bus_device)); 4301 4302 // Shutdown and remove the underlying virtio-device if present 4303 if let Some(virtio_device) = virtio_device { 4304 for mapping in virtio_device.lock().unwrap().userspace_mappings() { 4305 self.memory_manager 4306 .lock() 4307 .unwrap() 4308 .remove_userspace_mapping( 4309 mapping.addr.raw_value(), 4310 mapping.len, 4311 mapping.host_addr, 4312 mapping.mergeable, 4313 mapping.mem_slot, 4314 ) 4315 .map_err(DeviceManagerError::MemoryManager)?; 4316 } 4317 4318 virtio_device.lock().unwrap().shutdown(); 4319 4320 self.virtio_devices 4321 .retain(|handler| !Arc::ptr_eq(&handler.virtio_device, &virtio_device)); 4322 } 4323 4324 event!( 4325 "vm", 4326 "device-removed", 4327 "id", 4328 &id, 4329 "bdf", 4330 pci_device_bdf.to_string() 4331 ); 4332 4333 // At this point, the device has been removed from all the list and 4334 // buses where it was stored. At the end of this function, after 4335 // any_device, bus_device and pci_device are released, the actual 4336 // device will be dropped. 4337 Ok(()) 4338 } 4339 4340 fn hotplug_virtio_pci_device( 4341 &mut self, 4342 handle: MetaVirtioDevice, 4343 ) -> DeviceManagerResult<PciDeviceInfo> { 4344 // Add the virtio device to the device manager list. This is important 4345 // as the list is used to notify virtio devices about memory updates 4346 // for instance. 4347 self.virtio_devices.push(handle.clone()); 4348 4349 let mapping: Option<Arc<IommuMapping>> = if handle.iommu { 4350 self.iommu_mapping.clone() 4351 } else { 4352 None 4353 }; 4354 4355 let bdf = self.add_virtio_pci_device( 4356 handle.virtio_device, 4357 &mapping, 4358 handle.id.clone(), 4359 handle.pci_segment, 4360 handle.dma_handler, 4361 )?; 4362 4363 // Update the PCIU bitmap 4364 self.pci_segments[handle.pci_segment as usize].pci_devices_up |= 1 << bdf.device(); 4365 4366 Ok(PciDeviceInfo { id: handle.id, bdf }) 4367 } 4368 4369 fn is_iommu_segment(&self, pci_segment_id: u16) -> bool { 4370 self.config 4371 .lock() 4372 .as_ref() 4373 .unwrap() 4374 .platform 4375 .as_ref() 4376 .map(|pc| { 4377 pc.iommu_segments 4378 .as_ref() 4379 .map(|v| v.contains(&pci_segment_id)) 4380 .unwrap_or_default() 4381 }) 4382 .unwrap_or_default() 4383 } 4384 4385 pub fn add_disk(&mut self, disk_cfg: &mut DiskConfig) -> DeviceManagerResult<PciDeviceInfo> { 4386 self.validate_identifier(&disk_cfg.id)?; 4387 4388 if disk_cfg.iommu && !self.is_iommu_segment(disk_cfg.pci_segment) { 4389 return Err(DeviceManagerError::InvalidIommuHotplug); 4390 } 4391 4392 let device = self.make_virtio_block_device(disk_cfg)?; 4393 self.hotplug_virtio_pci_device(device) 4394 } 4395 4396 pub fn add_fs(&mut self, fs_cfg: &mut FsConfig) -> DeviceManagerResult<PciDeviceInfo> { 4397 self.validate_identifier(&fs_cfg.id)?; 4398 4399 let device = self.make_virtio_fs_device(fs_cfg)?; 4400 self.hotplug_virtio_pci_device(device) 4401 } 4402 4403 pub fn add_pmem(&mut self, pmem_cfg: &mut PmemConfig) -> DeviceManagerResult<PciDeviceInfo> { 4404 self.validate_identifier(&pmem_cfg.id)?; 4405 4406 if pmem_cfg.iommu && !self.is_iommu_segment(pmem_cfg.pci_segment) { 4407 return Err(DeviceManagerError::InvalidIommuHotplug); 4408 } 4409 4410 let device = self.make_virtio_pmem_device(pmem_cfg)?; 4411 self.hotplug_virtio_pci_device(device) 4412 } 4413 4414 pub fn add_net(&mut self, net_cfg: &mut NetConfig) -> DeviceManagerResult<PciDeviceInfo> { 4415 self.validate_identifier(&net_cfg.id)?; 4416 4417 if net_cfg.iommu && !self.is_iommu_segment(net_cfg.pci_segment) { 4418 return Err(DeviceManagerError::InvalidIommuHotplug); 4419 } 4420 4421 let device = self.make_virtio_net_device(net_cfg)?; 4422 self.hotplug_virtio_pci_device(device) 4423 } 4424 4425 pub fn add_vdpa(&mut self, vdpa_cfg: &mut VdpaConfig) -> DeviceManagerResult<PciDeviceInfo> { 4426 self.validate_identifier(&vdpa_cfg.id)?; 4427 4428 if vdpa_cfg.iommu && !self.is_iommu_segment(vdpa_cfg.pci_segment) { 4429 return Err(DeviceManagerError::InvalidIommuHotplug); 4430 } 4431 4432 let device = self.make_vdpa_device(vdpa_cfg)?; 4433 self.hotplug_virtio_pci_device(device) 4434 } 4435 4436 pub fn add_vsock(&mut self, vsock_cfg: &mut VsockConfig) -> DeviceManagerResult<PciDeviceInfo> { 4437 self.validate_identifier(&vsock_cfg.id)?; 4438 4439 if vsock_cfg.iommu && !self.is_iommu_segment(vsock_cfg.pci_segment) { 4440 return Err(DeviceManagerError::InvalidIommuHotplug); 4441 } 4442 4443 let device = self.make_virtio_vsock_device(vsock_cfg)?; 4444 self.hotplug_virtio_pci_device(device) 4445 } 4446 4447 pub fn counters(&self) -> HashMap<String, HashMap<&'static str, Wrapping<u64>>> { 4448 let mut counters = HashMap::new(); 4449 4450 for handle in &self.virtio_devices { 4451 let virtio_device = handle.virtio_device.lock().unwrap(); 4452 if let Some(device_counters) = virtio_device.counters() { 4453 counters.insert(handle.id.clone(), device_counters.clone()); 4454 } 4455 } 4456 4457 counters 4458 } 4459 4460 pub fn resize_balloon(&mut self, size: u64) -> DeviceManagerResult<()> { 4461 if let Some(balloon) = &self.balloon { 4462 return balloon 4463 .lock() 4464 .unwrap() 4465 .resize(size) 4466 .map_err(DeviceManagerError::VirtioBalloonResize); 4467 } 4468 4469 warn!("No balloon setup: Can't resize the balloon"); 4470 Err(DeviceManagerError::MissingVirtioBalloon) 4471 } 4472 4473 pub fn balloon_size(&self) -> u64 { 4474 if let Some(balloon) = &self.balloon { 4475 return balloon.lock().unwrap().get_actual(); 4476 } 4477 4478 0 4479 } 4480 4481 pub fn device_tree(&self) -> Arc<Mutex<DeviceTree>> { 4482 self.device_tree.clone() 4483 } 4484 4485 #[cfg(target_arch = "x86_64")] 4486 pub fn notify_power_button(&self) -> DeviceManagerResult<()> { 4487 self.ged_notification_device 4488 .as_ref() 4489 .unwrap() 4490 .lock() 4491 .unwrap() 4492 .notify(AcpiNotificationFlags::POWER_BUTTON_CHANGED) 4493 .map_err(DeviceManagerError::PowerButtonNotification) 4494 } 4495 4496 #[cfg(target_arch = "aarch64")] 4497 pub fn notify_power_button(&self) -> DeviceManagerResult<()> { 4498 // There are two use cases: 4499 // 1. Users will use direct kernel boot with device tree. 4500 // 2. Users will use ACPI+UEFI boot. 4501 4502 // Trigger a GPIO pin 3 event to satisfy use case 1. 4503 self.gpio_device 4504 .as_ref() 4505 .unwrap() 4506 .lock() 4507 .unwrap() 4508 .trigger_key(3) 4509 .map_err(DeviceManagerError::AArch64PowerButtonNotification)?; 4510 // Trigger a GED power button event to satisfy use case 2. 4511 return self 4512 .ged_notification_device 4513 .as_ref() 4514 .unwrap() 4515 .lock() 4516 .unwrap() 4517 .notify(AcpiNotificationFlags::POWER_BUTTON_CHANGED) 4518 .map_err(DeviceManagerError::PowerButtonNotification); 4519 } 4520 4521 pub fn iommu_attached_devices(&self) -> &Option<(PciBdf, Vec<PciBdf>)> { 4522 &self.iommu_attached_devices 4523 } 4524 4525 fn validate_identifier(&self, id: &Option<String>) -> DeviceManagerResult<()> { 4526 if let Some(id) = id { 4527 if id.starts_with("__") { 4528 return Err(DeviceManagerError::InvalidIdentifier(id.clone())); 4529 } 4530 4531 if self.device_tree.lock().unwrap().contains_key(id) { 4532 return Err(DeviceManagerError::IdentifierNotUnique(id.clone())); 4533 } 4534 } 4535 4536 Ok(()) 4537 } 4538 4539 pub(crate) fn acpi_platform_addresses(&self) -> &AcpiPlatformAddresses { 4540 &self.acpi_platform_addresses 4541 } 4542 } 4543 4544 fn numa_node_id_from_memory_zone_id(numa_nodes: &NumaNodes, memory_zone_id: &str) -> Option<u32> { 4545 for (numa_node_id, numa_node) in numa_nodes.iter() { 4546 if numa_node.memory_zones.contains(&memory_zone_id.to_owned()) { 4547 return Some(*numa_node_id); 4548 } 4549 } 4550 4551 None 4552 } 4553 4554 fn numa_node_id_from_pci_segment_id(numa_nodes: &NumaNodes, pci_segment_id: u16) -> u32 { 4555 for (numa_node_id, numa_node) in numa_nodes.iter() { 4556 if numa_node.pci_segments.contains(&pci_segment_id) { 4557 return *numa_node_id; 4558 } 4559 } 4560 4561 0 4562 } 4563 4564 struct TpmDevice {} 4565 4566 impl Aml for TpmDevice { 4567 fn to_aml_bytes(&self, sink: &mut dyn acpi_tables::AmlSink) { 4568 aml::Device::new( 4569 "TPM2".into(), 4570 vec![ 4571 &aml::Name::new("_HID".into(), &"MSFT0101"), 4572 &aml::Name::new("_STA".into(), &(0xF_usize)), 4573 &aml::Name::new( 4574 "_CRS".into(), 4575 &aml::ResourceTemplate::new(vec![&aml::Memory32Fixed::new( 4576 true, 4577 layout::TPM_START.0 as u32, 4578 layout::TPM_SIZE as u32, 4579 )]), 4580 ), 4581 ], 4582 ) 4583 .to_aml_bytes(sink) 4584 } 4585 } 4586 4587 impl Aml for DeviceManager { 4588 fn to_aml_bytes(&self, sink: &mut dyn acpi_tables::AmlSink) { 4589 #[cfg(target_arch = "aarch64")] 4590 use arch::aarch64::DeviceInfoForFdt; 4591 4592 let mut pci_scan_methods = Vec::new(); 4593 for i in 0..self.pci_segments.len() { 4594 pci_scan_methods.push(aml::MethodCall::new( 4595 format!("\\_SB_.PC{i:02X}.PCNT").as_str().into(), 4596 vec![], 4597 )); 4598 } 4599 let mut pci_scan_inner: Vec<&dyn Aml> = Vec::new(); 4600 for method in &pci_scan_methods { 4601 pci_scan_inner.push(method) 4602 } 4603 4604 // PCI hotplug controller 4605 aml::Device::new( 4606 "_SB_.PHPR".into(), 4607 vec![ 4608 &aml::Name::new("_HID".into(), &aml::EISAName::new("PNP0A06")), 4609 &aml::Name::new("_STA".into(), &0x0bu8), 4610 &aml::Name::new("_UID".into(), &"PCI Hotplug Controller"), 4611 &aml::Mutex::new("BLCK".into(), 0), 4612 &aml::Name::new( 4613 "_CRS".into(), 4614 &aml::ResourceTemplate::new(vec![&aml::AddressSpace::new_memory( 4615 aml::AddressSpaceCacheable::NotCacheable, 4616 true, 4617 self.acpi_address.0, 4618 self.acpi_address.0 + DEVICE_MANAGER_ACPI_SIZE as u64 - 1, 4619 None, 4620 )]), 4621 ), 4622 // OpRegion and Fields map MMIO range into individual field values 4623 &aml::OpRegion::new( 4624 "PCST".into(), 4625 aml::OpRegionSpace::SystemMemory, 4626 &(self.acpi_address.0 as usize), 4627 &DEVICE_MANAGER_ACPI_SIZE, 4628 ), 4629 &aml::Field::new( 4630 "PCST".into(), 4631 aml::FieldAccessType::DWord, 4632 aml::FieldLockRule::NoLock, 4633 aml::FieldUpdateRule::WriteAsZeroes, 4634 vec![ 4635 aml::FieldEntry::Named(*b"PCIU", 32), 4636 aml::FieldEntry::Named(*b"PCID", 32), 4637 aml::FieldEntry::Named(*b"B0EJ", 32), 4638 aml::FieldEntry::Named(*b"PSEG", 32), 4639 ], 4640 ), 4641 &aml::Method::new( 4642 "PCEJ".into(), 4643 2, 4644 true, 4645 vec![ 4646 // Take lock defined above 4647 &aml::Acquire::new("BLCK".into(), 0xffff), 4648 // Choose the current segment 4649 &aml::Store::new(&aml::Path::new("PSEG"), &aml::Arg(1)), 4650 // Write PCI bus number (in first argument) to I/O port via field 4651 &aml::ShiftLeft::new(&aml::Path::new("B0EJ"), &aml::ONE, &aml::Arg(0)), 4652 // Release lock 4653 &aml::Release::new("BLCK".into()), 4654 // Return 0 4655 &aml::Return::new(&aml::ZERO), 4656 ], 4657 ), 4658 &aml::Method::new("PSCN".into(), 0, true, pci_scan_inner), 4659 ], 4660 ) 4661 .to_aml_bytes(sink); 4662 4663 for segment in &self.pci_segments { 4664 segment.to_aml_bytes(sink); 4665 } 4666 4667 let mut mbrd_memory = Vec::new(); 4668 4669 for segment in &self.pci_segments { 4670 mbrd_memory.push(aml::Memory32Fixed::new( 4671 true, 4672 segment.mmio_config_address as u32, 4673 layout::PCI_MMIO_CONFIG_SIZE_PER_SEGMENT as u32, 4674 )) 4675 } 4676 4677 let mut mbrd_memory_refs = Vec::new(); 4678 for mbrd_memory_ref in &mbrd_memory { 4679 mbrd_memory_refs.push(mbrd_memory_ref as &dyn Aml); 4680 } 4681 4682 aml::Device::new( 4683 "_SB_.MBRD".into(), 4684 vec![ 4685 &aml::Name::new("_HID".into(), &aml::EISAName::new("PNP0C02")), 4686 &aml::Name::new("_UID".into(), &aml::ZERO), 4687 &aml::Name::new("_CRS".into(), &aml::ResourceTemplate::new(mbrd_memory_refs)), 4688 ], 4689 ) 4690 .to_aml_bytes(sink); 4691 4692 // Serial device 4693 #[cfg(target_arch = "x86_64")] 4694 let serial_irq = 4; 4695 #[cfg(target_arch = "aarch64")] 4696 let serial_irq = 4697 if self.config.lock().unwrap().serial.clone().mode != ConsoleOutputMode::Off { 4698 self.get_device_info() 4699 .clone() 4700 .get(&(DeviceType::Serial, DeviceType::Serial.to_string())) 4701 .unwrap() 4702 .irq() 4703 } else { 4704 // If serial is turned off, add a fake device with invalid irq. 4705 31 4706 }; 4707 if self.config.lock().unwrap().serial.mode != ConsoleOutputMode::Off { 4708 aml::Device::new( 4709 "_SB_.COM1".into(), 4710 vec![ 4711 &aml::Name::new( 4712 "_HID".into(), 4713 #[cfg(target_arch = "x86_64")] 4714 &aml::EISAName::new("PNP0501"), 4715 #[cfg(target_arch = "aarch64")] 4716 &"ARMH0011", 4717 ), 4718 &aml::Name::new("_UID".into(), &aml::ZERO), 4719 &aml::Name::new("_DDN".into(), &"COM1"), 4720 &aml::Name::new( 4721 "_CRS".into(), 4722 &aml::ResourceTemplate::new(vec![ 4723 &aml::Interrupt::new(true, true, false, false, serial_irq), 4724 #[cfg(target_arch = "x86_64")] 4725 &aml::IO::new(0x3f8, 0x3f8, 0, 0x8), 4726 #[cfg(target_arch = "aarch64")] 4727 &aml::Memory32Fixed::new( 4728 true, 4729 arch::layout::LEGACY_SERIAL_MAPPED_IO_START.raw_value() as u32, 4730 MMIO_LEN as u32, 4731 ), 4732 ]), 4733 ), 4734 ], 4735 ) 4736 .to_aml_bytes(sink); 4737 } 4738 4739 aml::Name::new("_S5_".into(), &aml::Package::new(vec![&5u8])).to_aml_bytes(sink); 4740 4741 aml::Device::new( 4742 "_SB_.PWRB".into(), 4743 vec![ 4744 &aml::Name::new("_HID".into(), &aml::EISAName::new("PNP0C0C")), 4745 &aml::Name::new("_UID".into(), &aml::ZERO), 4746 ], 4747 ) 4748 .to_aml_bytes(sink); 4749 4750 if self.config.lock().unwrap().tpm.is_some() { 4751 // Add tpm device 4752 TpmDevice {}.to_aml_bytes(sink); 4753 } 4754 4755 self.ged_notification_device 4756 .as_ref() 4757 .unwrap() 4758 .lock() 4759 .unwrap() 4760 .to_aml_bytes(sink) 4761 } 4762 } 4763 4764 impl Pausable for DeviceManager { 4765 fn pause(&mut self) -> result::Result<(), MigratableError> { 4766 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4767 if let Some(migratable) = &device_node.migratable { 4768 migratable.lock().unwrap().pause()?; 4769 } 4770 } 4771 // On AArch64, the pause of device manager needs to trigger 4772 // a "pause" of GIC, which will flush the GIC pending tables 4773 // and ITS tables to guest RAM. 4774 #[cfg(target_arch = "aarch64")] 4775 { 4776 self.get_interrupt_controller() 4777 .unwrap() 4778 .lock() 4779 .unwrap() 4780 .pause()?; 4781 }; 4782 4783 Ok(()) 4784 } 4785 4786 fn resume(&mut self) -> result::Result<(), MigratableError> { 4787 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4788 if let Some(migratable) = &device_node.migratable { 4789 migratable.lock().unwrap().resume()?; 4790 } 4791 } 4792 4793 Ok(()) 4794 } 4795 } 4796 4797 impl Snapshottable for DeviceManager { 4798 fn id(&self) -> String { 4799 DEVICE_MANAGER_SNAPSHOT_ID.to_string() 4800 } 4801 4802 fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> { 4803 let mut snapshot = Snapshot::from_data(SnapshotData::new_from_state(&self.state())?); 4804 4805 // We aggregate all devices snapshots. 4806 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4807 if let Some(migratable) = &device_node.migratable { 4808 let mut migratable = migratable.lock().unwrap(); 4809 snapshot.add_snapshot(migratable.id(), migratable.snapshot()?); 4810 } 4811 } 4812 4813 Ok(snapshot) 4814 } 4815 } 4816 4817 impl Transportable for DeviceManager {} 4818 4819 impl Migratable for DeviceManager { 4820 fn start_dirty_log(&mut self) -> std::result::Result<(), MigratableError> { 4821 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4822 if let Some(migratable) = &device_node.migratable { 4823 migratable.lock().unwrap().start_dirty_log()?; 4824 } 4825 } 4826 Ok(()) 4827 } 4828 4829 fn stop_dirty_log(&mut self) -> std::result::Result<(), MigratableError> { 4830 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4831 if let Some(migratable) = &device_node.migratable { 4832 migratable.lock().unwrap().stop_dirty_log()?; 4833 } 4834 } 4835 Ok(()) 4836 } 4837 4838 fn dirty_log(&mut self) -> std::result::Result<MemoryRangeTable, MigratableError> { 4839 let mut tables = Vec::new(); 4840 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4841 if let Some(migratable) = &device_node.migratable { 4842 tables.push(migratable.lock().unwrap().dirty_log()?); 4843 } 4844 } 4845 Ok(MemoryRangeTable::new_from_tables(tables)) 4846 } 4847 4848 fn start_migration(&mut self) -> std::result::Result<(), MigratableError> { 4849 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4850 if let Some(migratable) = &device_node.migratable { 4851 migratable.lock().unwrap().start_migration()?; 4852 } 4853 } 4854 Ok(()) 4855 } 4856 4857 fn complete_migration(&mut self) -> std::result::Result<(), MigratableError> { 4858 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4859 if let Some(migratable) = &device_node.migratable { 4860 migratable.lock().unwrap().complete_migration()?; 4861 } 4862 } 4863 Ok(()) 4864 } 4865 } 4866 4867 const PCIU_FIELD_OFFSET: u64 = 0; 4868 const PCID_FIELD_OFFSET: u64 = 4; 4869 const B0EJ_FIELD_OFFSET: u64 = 8; 4870 const PSEG_FIELD_OFFSET: u64 = 12; 4871 const PCIU_FIELD_SIZE: usize = 4; 4872 const PCID_FIELD_SIZE: usize = 4; 4873 const B0EJ_FIELD_SIZE: usize = 4; 4874 const PSEG_FIELD_SIZE: usize = 4; 4875 4876 impl BusDevice for DeviceManager { 4877 fn read(&mut self, base: u64, offset: u64, data: &mut [u8]) { 4878 match offset { 4879 PCIU_FIELD_OFFSET => { 4880 assert!(data.len() == PCIU_FIELD_SIZE); 4881 data.copy_from_slice( 4882 &self.pci_segments[self.selected_segment] 4883 .pci_devices_up 4884 .to_le_bytes(), 4885 ); 4886 // Clear the PCIU bitmap 4887 self.pci_segments[self.selected_segment].pci_devices_up = 0; 4888 } 4889 PCID_FIELD_OFFSET => { 4890 assert!(data.len() == PCID_FIELD_SIZE); 4891 data.copy_from_slice( 4892 &self.pci_segments[self.selected_segment] 4893 .pci_devices_down 4894 .to_le_bytes(), 4895 ); 4896 // Clear the PCID bitmap 4897 self.pci_segments[self.selected_segment].pci_devices_down = 0; 4898 } 4899 B0EJ_FIELD_OFFSET => { 4900 assert!(data.len() == B0EJ_FIELD_SIZE); 4901 // Always return an empty bitmap since the eject is always 4902 // taken care of right away during a write access. 4903 data.fill(0); 4904 } 4905 PSEG_FIELD_OFFSET => { 4906 assert_eq!(data.len(), PSEG_FIELD_SIZE); 4907 data.copy_from_slice(&(self.selected_segment as u32).to_le_bytes()); 4908 } 4909 _ => error!( 4910 "Accessing unknown location at base 0x{:x}, offset 0x{:x}", 4911 base, offset 4912 ), 4913 } 4914 4915 debug!( 4916 "PCI_HP_REG_R: base 0x{:x}, offset 0x{:x}, data {:?}", 4917 base, offset, data 4918 ) 4919 } 4920 4921 fn write(&mut self, base: u64, offset: u64, data: &[u8]) -> Option<Arc<std::sync::Barrier>> { 4922 match offset { 4923 B0EJ_FIELD_OFFSET => { 4924 assert!(data.len() == B0EJ_FIELD_SIZE); 4925 let mut data_array: [u8; 4] = [0, 0, 0, 0]; 4926 data_array.copy_from_slice(data); 4927 let mut slot_bitmap = u32::from_le_bytes(data_array); 4928 4929 while slot_bitmap > 0 { 4930 let slot_id = slot_bitmap.trailing_zeros(); 4931 if let Err(e) = self.eject_device(self.selected_segment as u16, slot_id as u8) { 4932 error!("Failed ejecting device {}: {:?}", slot_id, e); 4933 } 4934 slot_bitmap &= !(1 << slot_id); 4935 } 4936 } 4937 PSEG_FIELD_OFFSET => { 4938 assert_eq!(data.len(), PSEG_FIELD_SIZE); 4939 let mut data_array: [u8; 4] = [0, 0, 0, 0]; 4940 data_array.copy_from_slice(data); 4941 let selected_segment = u32::from_le_bytes(data_array) as usize; 4942 if selected_segment >= self.pci_segments.len() { 4943 error!( 4944 "Segment selection out of range: {} >= {}", 4945 selected_segment, 4946 self.pci_segments.len() 4947 ); 4948 return None; 4949 } 4950 self.selected_segment = selected_segment; 4951 } 4952 _ => error!( 4953 "Accessing unknown location at base 0x{:x}, offset 0x{:x}", 4954 base, offset 4955 ), 4956 } 4957 4958 debug!( 4959 "PCI_HP_REG_W: base 0x{:x}, offset 0x{:x}, data {:?}", 4960 base, offset, data 4961 ); 4962 4963 None 4964 } 4965 } 4966 4967 impl Drop for DeviceManager { 4968 fn drop(&mut self) { 4969 for handle in self.virtio_devices.drain(..) { 4970 handle.virtio_device.lock().unwrap().shutdown(); 4971 } 4972 4973 if let Some(termios) = *self.original_termios_opt.lock().unwrap() { 4974 // SAFETY: FFI call 4975 let _ = unsafe { tcsetattr(stdout().lock().as_raw_fd(), TCSANOW, &termios) }; 4976 } 4977 } 4978 } 4979