1 // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 // 3 // Portions Copyright 2017 The Chromium OS Authors. All rights reserved. 4 // Use of this source code is governed by a BSD-style license that can be 5 // found in the LICENSE-BSD-3-Clause file. 6 // 7 // Copyright © 2019 Intel Corporation 8 // 9 // SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause 10 // 11 12 use crate::config::{ 13 ConsoleOutputMode, DeviceConfig, DiskConfig, FsConfig, NetConfig, PmemConfig, UserDeviceConfig, 14 VdpaConfig, VhostMode, VmConfig, VsockConfig, 15 }; 16 use crate::cpu::{CpuManager, CPU_MANAGER_ACPI_SIZE}; 17 use crate::device_tree::{DeviceNode, DeviceTree}; 18 use crate::interrupt::LegacyUserspaceInterruptManager; 19 use crate::interrupt::MsiInterruptManager; 20 use crate::memory_manager::{Error as MemoryManagerError, MemoryManager, MEMORY_MANAGER_ACPI_SIZE}; 21 use crate::pci_segment::PciSegment; 22 use crate::seccomp_filters::{get_seccomp_filter, Thread}; 23 use crate::serial_manager::{Error as SerialManagerError, SerialManager}; 24 use crate::sigwinch_listener::start_sigwinch_listener; 25 use crate::GuestRegionMmap; 26 use crate::PciDeviceInfo; 27 use crate::{device_node, DEVICE_MANAGER_SNAPSHOT_ID}; 28 use acpi_tables::sdt::GenericAddress; 29 use acpi_tables::{aml, Aml}; 30 use anyhow::anyhow; 31 use arch::layout; 32 #[cfg(target_arch = "x86_64")] 33 use arch::layout::{APIC_START, IOAPIC_SIZE, IOAPIC_START}; 34 use arch::NumaNodes; 35 #[cfg(target_arch = "aarch64")] 36 use arch::{DeviceType, MmioDeviceInfo}; 37 use block::{ 38 async_io::DiskFile, block_io_uring_is_supported, detect_image_type, 39 fixed_vhd_sync::FixedVhdDiskSync, qcow, qcow_sync::QcowDiskSync, raw_sync::RawFileDiskSync, 40 vhdx, vhdx_sync::VhdxDiskSync, ImageType, 41 }; 42 #[cfg(feature = "io_uring")] 43 use block::{fixed_vhd_async::FixedVhdDiskAsync, raw_async::RawFileDisk}; 44 #[cfg(target_arch = "aarch64")] 45 use devices::gic; 46 #[cfg(target_arch = "x86_64")] 47 use devices::ioapic; 48 #[cfg(target_arch = "aarch64")] 49 use devices::legacy::Pl011; 50 #[cfg(target_arch = "x86_64")] 51 use devices::legacy::Serial; 52 use devices::{ 53 interrupt_controller, interrupt_controller::InterruptController, AcpiNotificationFlags, 54 }; 55 use hypervisor::{HypervisorType, IoEventAddress}; 56 use libc::{ 57 cfmakeraw, isatty, tcgetattr, tcsetattr, termios, MAP_NORESERVE, MAP_PRIVATE, MAP_SHARED, 58 O_TMPFILE, PROT_READ, PROT_WRITE, TCSANOW, 59 }; 60 use pci::{ 61 DeviceRelocation, PciBarRegionType, PciBdf, PciDevice, VfioPciDevice, VfioUserDmaMapping, 62 VfioUserPciDevice, VfioUserPciDeviceError, 63 }; 64 use seccompiler::SeccompAction; 65 use serde::{Deserialize, Serialize}; 66 use std::collections::{BTreeSet, HashMap}; 67 use std::fs::{read_link, File, OpenOptions}; 68 use std::io::{self, stdout, Seek, SeekFrom}; 69 use std::mem::zeroed; 70 use std::num::Wrapping; 71 use std::os::unix::fs::OpenOptionsExt; 72 use std::os::unix::io::{AsRawFd, FromRawFd, RawFd}; 73 use std::path::PathBuf; 74 use std::result; 75 use std::sync::{Arc, Mutex}; 76 use std::time::Instant; 77 use tracer::trace_scoped; 78 use vfio_ioctls::{VfioContainer, VfioDevice, VfioDeviceFd}; 79 use virtio_devices::transport::VirtioTransport; 80 use virtio_devices::transport::{VirtioPciDevice, VirtioPciDeviceActivator}; 81 use virtio_devices::vhost_user::VhostUserConfig; 82 use virtio_devices::{ 83 AccessPlatformMapping, ActivateError, VdpaDmaMapping, VirtioMemMappingSource, 84 }; 85 use virtio_devices::{Endpoint, IommuMapping}; 86 use vm_allocator::{AddressAllocator, SystemAllocator}; 87 use vm_device::dma_mapping::vfio::VfioDmaMapping; 88 use vm_device::dma_mapping::ExternalDmaMapping; 89 use vm_device::interrupt::{ 90 InterruptIndex, InterruptManager, LegacyIrqGroupConfig, MsiIrqGroupConfig, 91 }; 92 use vm_device::{Bus, BusDevice, Resource}; 93 use vm_memory::guest_memory::FileOffset; 94 use vm_memory::GuestMemoryRegion; 95 use vm_memory::{Address, GuestAddress, GuestUsize, MmapRegion}; 96 #[cfg(target_arch = "x86_64")] 97 use vm_memory::{GuestAddressSpace, GuestMemory}; 98 use vm_migration::{ 99 protocol::MemoryRangeTable, snapshot_from_id, versioned_state_from_id, Migratable, 100 MigratableError, Pausable, Snapshot, SnapshotData, Snapshottable, Transportable, 101 }; 102 use vm_virtio::AccessPlatform; 103 use vm_virtio::VirtioDeviceType; 104 use vmm_sys_util::eventfd::EventFd; 105 106 #[cfg(target_arch = "aarch64")] 107 const MMIO_LEN: u64 = 0x1000; 108 109 // Singleton devices / devices the user cannot name 110 #[cfg(target_arch = "x86_64")] 111 const IOAPIC_DEVICE_NAME: &str = "__ioapic"; 112 const SERIAL_DEVICE_NAME: &str = "__serial"; 113 #[cfg(target_arch = "aarch64")] 114 const GPIO_DEVICE_NAME: &str = "__gpio"; 115 const RNG_DEVICE_NAME: &str = "__rng"; 116 const IOMMU_DEVICE_NAME: &str = "__iommu"; 117 const BALLOON_DEVICE_NAME: &str = "__balloon"; 118 const CONSOLE_DEVICE_NAME: &str = "__console"; 119 const PVPANIC_DEVICE_NAME: &str = "__pvpanic"; 120 121 // Devices that the user may name and for which we generate 122 // identifiers if the user doesn't give one 123 const DISK_DEVICE_NAME_PREFIX: &str = "_disk"; 124 const FS_DEVICE_NAME_PREFIX: &str = "_fs"; 125 const NET_DEVICE_NAME_PREFIX: &str = "_net"; 126 const PMEM_DEVICE_NAME_PREFIX: &str = "_pmem"; 127 const VDPA_DEVICE_NAME_PREFIX: &str = "_vdpa"; 128 const VSOCK_DEVICE_NAME_PREFIX: &str = "_vsock"; 129 const WATCHDOG_DEVICE_NAME: &str = "__watchdog"; 130 const VFIO_DEVICE_NAME_PREFIX: &str = "_vfio"; 131 const VFIO_USER_DEVICE_NAME_PREFIX: &str = "_vfio_user"; 132 const VIRTIO_PCI_DEVICE_NAME_PREFIX: &str = "_virtio-pci"; 133 134 /// Errors associated with device manager 135 #[derive(Debug)] 136 pub enum DeviceManagerError { 137 /// Cannot create EventFd. 138 EventFd(io::Error), 139 140 /// Cannot open disk path 141 Disk(io::Error), 142 143 /// Cannot create vhost-user-net device 144 CreateVhostUserNet(virtio_devices::vhost_user::Error), 145 146 /// Cannot create virtio-blk device 147 CreateVirtioBlock(io::Error), 148 149 /// Cannot create virtio-net device 150 CreateVirtioNet(virtio_devices::net::Error), 151 152 /// Cannot create virtio-console device 153 CreateVirtioConsole(io::Error), 154 155 /// Cannot create virtio-rng device 156 CreateVirtioRng(io::Error), 157 158 /// Cannot create virtio-fs device 159 CreateVirtioFs(virtio_devices::vhost_user::Error), 160 161 /// Virtio-fs device was created without a socket. 162 NoVirtioFsSock, 163 164 /// Cannot create vhost-user-blk device 165 CreateVhostUserBlk(virtio_devices::vhost_user::Error), 166 167 /// Cannot create virtio-pmem device 168 CreateVirtioPmem(io::Error), 169 170 /// Cannot create vDPA device 171 CreateVdpa(virtio_devices::vdpa::Error), 172 173 /// Cannot create virtio-vsock device 174 CreateVirtioVsock(io::Error), 175 176 /// Cannot create tpm device 177 CreateTpmDevice(anyhow::Error), 178 179 /// Failed to convert Path to &str for the vDPA device. 180 CreateVdpaConvertPath, 181 182 /// Failed to convert Path to &str for the virtio-vsock device. 183 CreateVsockConvertPath, 184 185 /// Cannot create virtio-vsock backend 186 CreateVsockBackend(virtio_devices::vsock::VsockUnixError), 187 188 /// Cannot create virtio-iommu device 189 CreateVirtioIommu(io::Error), 190 191 /// Cannot create virtio-balloon device 192 CreateVirtioBalloon(io::Error), 193 194 /// Cannot create virtio-watchdog device 195 CreateVirtioWatchdog(io::Error), 196 197 /// Failed to parse disk image format 198 DetectImageType(io::Error), 199 200 /// Cannot open qcow disk path 201 QcowDeviceCreate(qcow::Error), 202 203 /// Cannot create serial manager 204 CreateSerialManager(SerialManagerError), 205 206 /// Cannot spawn the serial manager thread 207 SpawnSerialManager(SerialManagerError), 208 209 /// Cannot open tap interface 210 OpenTap(net_util::TapError), 211 212 /// Cannot allocate IRQ. 213 AllocateIrq, 214 215 /// Cannot configure the IRQ. 216 Irq(vmm_sys_util::errno::Error), 217 218 /// Cannot allocate PCI BARs 219 AllocateBars(pci::PciDeviceError), 220 221 /// Could not free the BARs associated with a PCI device. 222 FreePciBars(pci::PciDeviceError), 223 224 /// Cannot register ioevent. 225 RegisterIoevent(anyhow::Error), 226 227 /// Cannot unregister ioevent. 228 UnRegisterIoevent(anyhow::Error), 229 230 /// Cannot create virtio device 231 VirtioDevice(virtio_devices::transport::VirtioPciDeviceError), 232 233 /// Cannot add PCI device 234 AddPciDevice(pci::PciRootError), 235 236 /// Cannot open persistent memory file 237 PmemFileOpen(io::Error), 238 239 /// Cannot set persistent memory file size 240 PmemFileSetLen(io::Error), 241 242 /// Cannot find a memory range for persistent memory 243 PmemRangeAllocation, 244 245 /// Cannot find a memory range for virtio-fs 246 FsRangeAllocation, 247 248 /// Error creating serial output file 249 SerialOutputFileOpen(io::Error), 250 251 /// Error creating console output file 252 ConsoleOutputFileOpen(io::Error), 253 254 /// Error creating serial pty 255 SerialPtyOpen(io::Error), 256 257 /// Error creating console pty 258 ConsolePtyOpen(io::Error), 259 260 /// Error setting pty raw mode 261 SetPtyRaw(vmm_sys_util::errno::Error), 262 263 /// Error getting pty peer 264 GetPtyPeer(vmm_sys_util::errno::Error), 265 266 /// Cannot create a VFIO device 267 VfioCreate(vfio_ioctls::VfioError), 268 269 /// Cannot create a VFIO PCI device 270 VfioPciCreate(pci::VfioPciError), 271 272 /// Failed to map VFIO MMIO region. 273 VfioMapRegion(pci::VfioPciError), 274 275 /// Failed to DMA map VFIO device. 276 VfioDmaMap(vfio_ioctls::VfioError), 277 278 /// Failed to DMA unmap VFIO device. 279 VfioDmaUnmap(pci::VfioPciError), 280 281 /// Failed to create the passthrough device. 282 CreatePassthroughDevice(anyhow::Error), 283 284 /// Failed to memory map. 285 Mmap(io::Error), 286 287 /// Cannot add legacy device to Bus. 288 BusError(vm_device::BusError), 289 290 /// Failed to allocate IO port 291 AllocateIoPort, 292 293 /// Failed to allocate MMIO address 294 AllocateMmioAddress, 295 296 /// Failed to make hotplug notification 297 HotPlugNotification(io::Error), 298 299 /// Error from a memory manager operation 300 MemoryManager(MemoryManagerError), 301 302 /// Failed to create new interrupt source group. 303 CreateInterruptGroup(io::Error), 304 305 /// Failed to update interrupt source group. 306 UpdateInterruptGroup(io::Error), 307 308 /// Failed to create interrupt controller. 309 CreateInterruptController(interrupt_controller::Error), 310 311 /// Failed to create a new MmapRegion instance. 312 NewMmapRegion(vm_memory::mmap::MmapRegionError), 313 314 /// Failed to clone a File. 315 CloneFile(io::Error), 316 317 /// Failed to create socket file 318 CreateSocketFile(io::Error), 319 320 /// Failed to spawn the network backend 321 SpawnNetBackend(io::Error), 322 323 /// Failed to spawn the block backend 324 SpawnBlockBackend(io::Error), 325 326 /// Missing PCI bus. 327 NoPciBus, 328 329 /// Could not find an available device name. 330 NoAvailableDeviceName, 331 332 /// Missing PCI device. 333 MissingPciDevice, 334 335 /// Failed to remove a PCI device from the PCI bus. 336 RemoveDeviceFromPciBus(pci::PciRootError), 337 338 /// Failed to remove a bus device from the IO bus. 339 RemoveDeviceFromIoBus(vm_device::BusError), 340 341 /// Failed to remove a bus device from the MMIO bus. 342 RemoveDeviceFromMmioBus(vm_device::BusError), 343 344 /// Failed to find the device corresponding to a specific PCI b/d/f. 345 UnknownPciBdf(u32), 346 347 /// Not allowed to remove this type of device from the VM. 348 RemovalNotAllowed(vm_virtio::VirtioDeviceType), 349 350 /// Failed to find device corresponding to the given identifier. 351 UnknownDeviceId(String), 352 353 /// Failed to find an available PCI device ID. 354 NextPciDeviceId(pci::PciRootError), 355 356 /// Could not reserve the PCI device ID. 357 GetPciDeviceId(pci::PciRootError), 358 359 /// Could not give the PCI device ID back. 360 PutPciDeviceId(pci::PciRootError), 361 362 /// No disk path was specified when one was expected 363 NoDiskPath, 364 365 /// Failed to update guest memory for virtio device. 366 UpdateMemoryForVirtioDevice(virtio_devices::Error), 367 368 /// Cannot create virtio-mem device 369 CreateVirtioMem(io::Error), 370 371 /// Cannot find a memory range for virtio-mem memory 372 VirtioMemRangeAllocation, 373 374 /// Failed to update guest memory for VFIO PCI device. 375 UpdateMemoryForVfioPciDevice(vfio_ioctls::VfioError), 376 377 /// Trying to use a directory for pmem but no size specified 378 PmemWithDirectorySizeMissing, 379 380 /// Trying to use a size that is not multiple of 2MiB 381 PmemSizeNotAligned, 382 383 /// Could not find the node in the device tree. 384 MissingNode, 385 386 /// Resource was already found. 387 ResourceAlreadyExists, 388 389 /// Expected resources for virtio-pmem could not be found. 390 MissingVirtioPmemResources, 391 392 /// Missing PCI b/d/f from the DeviceNode. 393 MissingDeviceNodePciBdf, 394 395 /// No support for device passthrough 396 NoDevicePassthroughSupport, 397 398 /// No socket option support for console device 399 NoSocketOptionSupportForConsoleDevice, 400 401 /// Failed to resize virtio-balloon 402 VirtioBalloonResize(virtio_devices::balloon::Error), 403 404 /// Missing virtio-balloon, can't proceed as expected. 405 MissingVirtioBalloon, 406 407 /// Missing virtual IOMMU device 408 MissingVirtualIommu, 409 410 /// Failed to do power button notification 411 PowerButtonNotification(io::Error), 412 413 /// Failed to do AArch64 GPIO power button notification 414 #[cfg(target_arch = "aarch64")] 415 AArch64PowerButtonNotification(devices::legacy::GpioDeviceError), 416 417 /// Failed to set O_DIRECT flag to file descriptor 418 SetDirectIo, 419 420 /// Failed to create FixedVhdDiskAsync 421 CreateFixedVhdDiskAsync(io::Error), 422 423 /// Failed to create FixedVhdDiskSync 424 CreateFixedVhdDiskSync(io::Error), 425 426 /// Failed to create QcowDiskSync 427 CreateQcowDiskSync(qcow::Error), 428 429 /// Failed to create FixedVhdxDiskSync 430 CreateFixedVhdxDiskSync(vhdx::VhdxError), 431 432 /// Failed to add DMA mapping handler to virtio-mem device. 433 AddDmaMappingHandlerVirtioMem(virtio_devices::mem::Error), 434 435 /// Failed to remove DMA mapping handler from virtio-mem device. 436 RemoveDmaMappingHandlerVirtioMem(virtio_devices::mem::Error), 437 438 /// Failed to create vfio-user client 439 VfioUserCreateClient(vfio_user::Error), 440 441 /// Failed to create VFIO user device 442 VfioUserCreate(VfioUserPciDeviceError), 443 444 /// Failed to map region from VFIO user device into guest 445 VfioUserMapRegion(VfioUserPciDeviceError), 446 447 /// Failed to DMA map VFIO user device. 448 VfioUserDmaMap(VfioUserPciDeviceError), 449 450 /// Failed to DMA unmap VFIO user device. 451 VfioUserDmaUnmap(VfioUserPciDeviceError), 452 453 /// Failed to update memory mappings for VFIO user device 454 UpdateMemoryForVfioUserPciDevice(VfioUserPciDeviceError), 455 456 /// Cannot duplicate file descriptor 457 DupFd(vmm_sys_util::errno::Error), 458 459 /// Failed to DMA map virtio device. 460 VirtioDmaMap(std::io::Error), 461 462 /// Failed to DMA unmap virtio device. 463 VirtioDmaUnmap(std::io::Error), 464 465 /// Cannot hotplug device behind vIOMMU 466 InvalidIommuHotplug, 467 468 /// Invalid identifier as it is not unique. 469 IdentifierNotUnique(String), 470 471 /// Invalid identifier 472 InvalidIdentifier(String), 473 474 /// Error activating virtio device 475 VirtioActivate(ActivateError), 476 477 /// Failed retrieving device state from snapshot 478 RestoreGetState(MigratableError), 479 480 /// Cannot create a PvPanic device 481 PvPanicCreate(devices::pvpanic::PvPanicError), 482 } 483 pub type DeviceManagerResult<T> = result::Result<T, DeviceManagerError>; 484 485 const DEVICE_MANAGER_ACPI_SIZE: usize = 0x10; 486 487 const TIOCSPTLCK: libc::c_int = 0x4004_5431; 488 const TIOCGTPEER: libc::c_int = 0x5441; 489 490 pub fn create_pty() -> io::Result<(File, File, PathBuf)> { 491 // Try to use /dev/pts/ptmx first then fall back to /dev/ptmx 492 // This is done to try and use the devpts filesystem that 493 // could be available for use in the process's namespace first. 494 // Ideally these are all the same file though but different 495 // kernels could have things setup differently. 496 // See https://www.kernel.org/doc/Documentation/filesystems/devpts.txt 497 // for further details. 498 499 let custom_flags = libc::O_NONBLOCK; 500 let main = match OpenOptions::new() 501 .read(true) 502 .write(true) 503 .custom_flags(custom_flags) 504 .open("/dev/pts/ptmx") 505 { 506 Ok(f) => f, 507 _ => OpenOptions::new() 508 .read(true) 509 .write(true) 510 .custom_flags(custom_flags) 511 .open("/dev/ptmx")?, 512 }; 513 let mut unlock: libc::c_ulong = 0; 514 // SAFETY: FFI call into libc, trivially safe 515 unsafe { libc::ioctl(main.as_raw_fd(), TIOCSPTLCK as _, &mut unlock) }; 516 517 // SAFETY: FFI call into libc, trivially safe 518 let sub_fd = unsafe { 519 libc::ioctl( 520 main.as_raw_fd(), 521 TIOCGTPEER as _, 522 libc::O_NOCTTY | libc::O_RDWR, 523 ) 524 }; 525 if sub_fd == -1 { 526 return vmm_sys_util::errno::errno_result().map_err(|e| e.into()); 527 } 528 529 let proc_path = PathBuf::from(format!("/proc/self/fd/{sub_fd}")); 530 let path = read_link(proc_path)?; 531 532 // SAFETY: sub_fd is checked to be valid before being wrapped in File 533 Ok((main, unsafe { File::from_raw_fd(sub_fd) }, path)) 534 } 535 536 #[derive(Default)] 537 pub struct Console { 538 console_resizer: Option<Arc<virtio_devices::ConsoleResizer>>, 539 } 540 541 impl Console { 542 pub fn need_resize(&self) -> bool { 543 if let Some(_resizer) = self.console_resizer.as_ref() { 544 return true; 545 } 546 547 false 548 } 549 550 pub fn update_console_size(&self) { 551 if let Some(resizer) = self.console_resizer.as_ref() { 552 resizer.update_console_size() 553 } 554 } 555 } 556 557 pub(crate) struct AddressManager { 558 pub(crate) allocator: Arc<Mutex<SystemAllocator>>, 559 #[cfg(target_arch = "x86_64")] 560 pub(crate) io_bus: Arc<Bus>, 561 pub(crate) mmio_bus: Arc<Bus>, 562 pub(crate) vm: Arc<dyn hypervisor::Vm>, 563 device_tree: Arc<Mutex<DeviceTree>>, 564 pci_mmio_allocators: Vec<Arc<Mutex<AddressAllocator>>>, 565 } 566 567 impl DeviceRelocation for AddressManager { 568 fn move_bar( 569 &self, 570 old_base: u64, 571 new_base: u64, 572 len: u64, 573 pci_dev: &mut dyn PciDevice, 574 region_type: PciBarRegionType, 575 ) -> std::result::Result<(), std::io::Error> { 576 match region_type { 577 PciBarRegionType::IoRegion => { 578 #[cfg(target_arch = "x86_64")] 579 { 580 // Update system allocator 581 self.allocator 582 .lock() 583 .unwrap() 584 .free_io_addresses(GuestAddress(old_base), len as GuestUsize); 585 586 self.allocator 587 .lock() 588 .unwrap() 589 .allocate_io_addresses( 590 Some(GuestAddress(new_base)), 591 len as GuestUsize, 592 None, 593 ) 594 .ok_or_else(|| { 595 io::Error::new(io::ErrorKind::Other, "failed allocating new IO range") 596 })?; 597 598 // Update PIO bus 599 self.io_bus 600 .update_range(old_base, len, new_base, len) 601 .map_err(|e| io::Error::new(io::ErrorKind::Other, e))?; 602 } 603 #[cfg(target_arch = "aarch64")] 604 error!("I/O region is not supported"); 605 } 606 PciBarRegionType::Memory32BitRegion | PciBarRegionType::Memory64BitRegion => { 607 // Update system allocator 608 if region_type == PciBarRegionType::Memory32BitRegion { 609 self.allocator 610 .lock() 611 .unwrap() 612 .free_mmio_hole_addresses(GuestAddress(old_base), len as GuestUsize); 613 614 self.allocator 615 .lock() 616 .unwrap() 617 .allocate_mmio_hole_addresses( 618 Some(GuestAddress(new_base)), 619 len as GuestUsize, 620 Some(len), 621 ) 622 .ok_or_else(|| { 623 io::Error::new( 624 io::ErrorKind::Other, 625 "failed allocating new 32 bits MMIO range", 626 ) 627 })?; 628 } else { 629 // Find the specific allocator that this BAR was allocated from and use it for new one 630 for allocator in &self.pci_mmio_allocators { 631 let allocator_base = allocator.lock().unwrap().base(); 632 let allocator_end = allocator.lock().unwrap().end(); 633 634 if old_base >= allocator_base.0 && old_base <= allocator_end.0 { 635 allocator 636 .lock() 637 .unwrap() 638 .free(GuestAddress(old_base), len as GuestUsize); 639 640 allocator 641 .lock() 642 .unwrap() 643 .allocate( 644 Some(GuestAddress(new_base)), 645 len as GuestUsize, 646 Some(len), 647 ) 648 .ok_or_else(|| { 649 io::Error::new( 650 io::ErrorKind::Other, 651 "failed allocating new 64 bits MMIO range", 652 ) 653 })?; 654 655 break; 656 } 657 } 658 } 659 660 // Update MMIO bus 661 self.mmio_bus 662 .update_range(old_base, len, new_base, len) 663 .map_err(|e| io::Error::new(io::ErrorKind::Other, e))?; 664 } 665 } 666 667 // Update the device_tree resources associated with the device 668 if let Some(id) = pci_dev.id() { 669 if let Some(node) = self.device_tree.lock().unwrap().get_mut(&id) { 670 let mut resource_updated = false; 671 for resource in node.resources.iter_mut() { 672 if let Resource::PciBar { base, type_, .. } = resource { 673 if PciBarRegionType::from(*type_) == region_type && *base == old_base { 674 *base = new_base; 675 resource_updated = true; 676 break; 677 } 678 } 679 } 680 681 if !resource_updated { 682 return Err(io::Error::new( 683 io::ErrorKind::Other, 684 format!( 685 "Couldn't find a resource with base 0x{old_base:x} for device {id}" 686 ), 687 )); 688 } 689 } else { 690 return Err(io::Error::new( 691 io::ErrorKind::Other, 692 format!("Couldn't find device {id} from device tree"), 693 )); 694 } 695 } 696 697 let any_dev = pci_dev.as_any(); 698 if let Some(virtio_pci_dev) = any_dev.downcast_ref::<VirtioPciDevice>() { 699 let bar_addr = virtio_pci_dev.config_bar_addr(); 700 if bar_addr == new_base { 701 for (event, addr) in virtio_pci_dev.ioeventfds(old_base) { 702 let io_addr = IoEventAddress::Mmio(addr); 703 self.vm.unregister_ioevent(event, &io_addr).map_err(|e| { 704 io::Error::new( 705 io::ErrorKind::Other, 706 format!("failed to unregister ioevent: {e:?}"), 707 ) 708 })?; 709 } 710 for (event, addr) in virtio_pci_dev.ioeventfds(new_base) { 711 let io_addr = IoEventAddress::Mmio(addr); 712 self.vm 713 .register_ioevent(event, &io_addr, None) 714 .map_err(|e| { 715 io::Error::new( 716 io::ErrorKind::Other, 717 format!("failed to register ioevent: {e:?}"), 718 ) 719 })?; 720 } 721 } else { 722 let virtio_dev = virtio_pci_dev.virtio_device(); 723 let mut virtio_dev = virtio_dev.lock().unwrap(); 724 if let Some(mut shm_regions) = virtio_dev.get_shm_regions() { 725 if shm_regions.addr.raw_value() == old_base { 726 let mem_region = self.vm.make_user_memory_region( 727 shm_regions.mem_slot, 728 old_base, 729 shm_regions.len, 730 shm_regions.host_addr, 731 false, 732 false, 733 ); 734 735 self.vm.remove_user_memory_region(mem_region).map_err(|e| { 736 io::Error::new( 737 io::ErrorKind::Other, 738 format!("failed to remove user memory region: {e:?}"), 739 ) 740 })?; 741 742 // Create new mapping by inserting new region to KVM. 743 let mem_region = self.vm.make_user_memory_region( 744 shm_regions.mem_slot, 745 new_base, 746 shm_regions.len, 747 shm_regions.host_addr, 748 false, 749 false, 750 ); 751 752 self.vm.create_user_memory_region(mem_region).map_err(|e| { 753 io::Error::new( 754 io::ErrorKind::Other, 755 format!("failed to create user memory regions: {e:?}"), 756 ) 757 })?; 758 759 // Update shared memory regions to reflect the new mapping. 760 shm_regions.addr = GuestAddress(new_base); 761 virtio_dev.set_shm_regions(shm_regions).map_err(|e| { 762 io::Error::new( 763 io::ErrorKind::Other, 764 format!("failed to update shared memory regions: {e:?}"), 765 ) 766 })?; 767 } 768 } 769 } 770 } 771 772 pci_dev.move_bar(old_base, new_base) 773 } 774 } 775 776 #[derive(Serialize, Deserialize)] 777 struct DeviceManagerState { 778 device_tree: DeviceTree, 779 device_id_cnt: Wrapping<usize>, 780 } 781 782 #[derive(Debug)] 783 pub struct PtyPair { 784 pub main: File, 785 pub path: PathBuf, 786 } 787 788 impl Clone for PtyPair { 789 fn clone(&self) -> Self { 790 PtyPair { 791 main: self.main.try_clone().unwrap(), 792 path: self.path.clone(), 793 } 794 } 795 } 796 797 #[derive(Clone)] 798 pub enum PciDeviceHandle { 799 Vfio(Arc<Mutex<VfioPciDevice>>), 800 Virtio(Arc<Mutex<VirtioPciDevice>>), 801 VfioUser(Arc<Mutex<VfioUserPciDevice>>), 802 } 803 804 #[derive(Clone)] 805 struct MetaVirtioDevice { 806 virtio_device: Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 807 iommu: bool, 808 id: String, 809 pci_segment: u16, 810 dma_handler: Option<Arc<dyn ExternalDmaMapping>>, 811 } 812 813 #[derive(Default)] 814 pub struct AcpiPlatformAddresses { 815 pub pm_timer_address: Option<GenericAddress>, 816 pub reset_reg_address: Option<GenericAddress>, 817 pub sleep_control_reg_address: Option<GenericAddress>, 818 pub sleep_status_reg_address: Option<GenericAddress>, 819 } 820 821 pub struct DeviceManager { 822 // The underlying hypervisor 823 hypervisor_type: HypervisorType, 824 825 // Manage address space related to devices 826 address_manager: Arc<AddressManager>, 827 828 // Console abstraction 829 console: Arc<Console>, 830 831 // console PTY 832 console_pty: Option<Arc<Mutex<PtyPair>>>, 833 834 // serial PTY 835 serial_pty: Option<Arc<Mutex<PtyPair>>>, 836 837 // Serial Manager 838 serial_manager: Option<Arc<SerialManager>>, 839 840 // pty foreground status, 841 console_resize_pipe: Option<Arc<File>>, 842 843 // To restore on exit. 844 original_termios_opt: Arc<Mutex<Option<termios>>>, 845 846 // Interrupt controller 847 #[cfg(target_arch = "x86_64")] 848 interrupt_controller: Option<Arc<Mutex<ioapic::Ioapic>>>, 849 #[cfg(target_arch = "aarch64")] 850 interrupt_controller: Option<Arc<Mutex<gic::Gic>>>, 851 852 // Things to be added to the commandline (e.g. aarch64 early console) 853 #[cfg(target_arch = "aarch64")] 854 cmdline_additions: Vec<String>, 855 856 // ACPI GED notification device 857 ged_notification_device: Option<Arc<Mutex<devices::AcpiGedDevice>>>, 858 859 // VM configuration 860 config: Arc<Mutex<VmConfig>>, 861 862 // Memory Manager 863 memory_manager: Arc<Mutex<MemoryManager>>, 864 865 // CPU Manager 866 cpu_manager: Arc<Mutex<CpuManager>>, 867 868 // The virtio devices on the system 869 virtio_devices: Vec<MetaVirtioDevice>, 870 871 // List of bus devices 872 // Let the DeviceManager keep strong references to the BusDevice devices. 873 // This allows the IO and MMIO buses to be provided with Weak references, 874 // which prevents cyclic dependencies. 875 bus_devices: Vec<Arc<Mutex<dyn BusDevice>>>, 876 877 // Counter to keep track of the consumed device IDs. 878 device_id_cnt: Wrapping<usize>, 879 880 pci_segments: Vec<PciSegment>, 881 882 #[cfg_attr(target_arch = "aarch64", allow(dead_code))] 883 // MSI Interrupt Manager 884 msi_interrupt_manager: Arc<dyn InterruptManager<GroupConfig = MsiIrqGroupConfig>>, 885 886 #[cfg_attr(feature = "mshv", allow(dead_code))] 887 // Legacy Interrupt Manager 888 legacy_interrupt_manager: Option<Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>>, 889 890 // Passthrough device handle 891 passthrough_device: Option<VfioDeviceFd>, 892 893 // VFIO container 894 // Only one container can be created, therefore it is stored as part of the 895 // DeviceManager to be reused. 896 vfio_container: Option<Arc<VfioContainer>>, 897 898 // Paravirtualized IOMMU 899 iommu_device: Option<Arc<Mutex<virtio_devices::Iommu>>>, 900 iommu_mapping: Option<Arc<IommuMapping>>, 901 902 // PCI information about devices attached to the paravirtualized IOMMU 903 // It contains the virtual IOMMU PCI BDF along with the list of PCI BDF 904 // representing the devices attached to the virtual IOMMU. This is useful 905 // information for filling the ACPI VIOT table. 906 iommu_attached_devices: Option<(PciBdf, Vec<PciBdf>)>, 907 908 // Tree of devices, representing the dependencies between devices. 909 // Useful for introspection, snapshot and restore. 910 device_tree: Arc<Mutex<DeviceTree>>, 911 912 // Exit event 913 exit_evt: EventFd, 914 reset_evt: EventFd, 915 916 #[cfg(target_arch = "aarch64")] 917 id_to_dev_info: HashMap<(DeviceType, String), MmioDeviceInfo>, 918 919 // seccomp action 920 seccomp_action: SeccompAction, 921 922 // List of guest NUMA nodes. 923 numa_nodes: NumaNodes, 924 925 // Possible handle to the virtio-balloon device 926 balloon: Option<Arc<Mutex<virtio_devices::Balloon>>>, 927 928 // Virtio Device activation EventFd to allow the VMM thread to trigger device 929 // activation and thus start the threads from the VMM thread 930 activate_evt: EventFd, 931 932 acpi_address: GuestAddress, 933 934 selected_segment: usize, 935 936 // Possible handle to the virtio-mem device 937 virtio_mem_devices: Vec<Arc<Mutex<virtio_devices::Mem>>>, 938 939 #[cfg(target_arch = "aarch64")] 940 // GPIO device for AArch64 941 gpio_device: Option<Arc<Mutex<devices::legacy::Gpio>>>, 942 943 // pvpanic device 944 pvpanic_device: Option<Arc<Mutex<devices::PvPanicDevice>>>, 945 946 // Flag to force setting the iommu on virtio devices 947 force_iommu: bool, 948 949 // io_uring availability if detected 950 io_uring_supported: Option<bool>, 951 952 // List of unique identifiers provided at boot through the configuration. 953 boot_id_list: BTreeSet<String>, 954 955 // Start time of the VM 956 timestamp: Instant, 957 958 // Pending activations 959 pending_activations: Arc<Mutex<Vec<VirtioPciDeviceActivator>>>, 960 961 // Addresses for ACPI platform devices e.g. ACPI PM timer, sleep/reset registers 962 acpi_platform_addresses: AcpiPlatformAddresses, 963 964 snapshot: Option<Snapshot>, 965 } 966 967 impl DeviceManager { 968 #[allow(clippy::too_many_arguments)] 969 pub fn new( 970 #[cfg(target_arch = "x86_64")] io_bus: Arc<Bus>, 971 mmio_bus: Arc<Bus>, 972 hypervisor_type: HypervisorType, 973 vm: Arc<dyn hypervisor::Vm>, 974 config: Arc<Mutex<VmConfig>>, 975 memory_manager: Arc<Mutex<MemoryManager>>, 976 cpu_manager: Arc<Mutex<CpuManager>>, 977 exit_evt: EventFd, 978 reset_evt: EventFd, 979 seccomp_action: SeccompAction, 980 numa_nodes: NumaNodes, 981 activate_evt: &EventFd, 982 force_iommu: bool, 983 boot_id_list: BTreeSet<String>, 984 timestamp: Instant, 985 snapshot: Option<Snapshot>, 986 dynamic: bool, 987 ) -> DeviceManagerResult<Arc<Mutex<Self>>> { 988 trace_scoped!("DeviceManager::new"); 989 990 let (device_tree, device_id_cnt) = if let Some(snapshot) = snapshot.as_ref() { 991 let state: DeviceManagerState = snapshot.to_state().unwrap(); 992 ( 993 Arc::new(Mutex::new(state.device_tree.clone())), 994 state.device_id_cnt, 995 ) 996 } else { 997 (Arc::new(Mutex::new(DeviceTree::new())), Wrapping(0)) 998 }; 999 1000 let num_pci_segments = 1001 if let Some(platform_config) = config.lock().unwrap().platform.as_ref() { 1002 platform_config.num_pci_segments 1003 } else { 1004 1 1005 }; 1006 1007 let start_of_device_area = memory_manager.lock().unwrap().start_of_device_area().0; 1008 let end_of_device_area = memory_manager.lock().unwrap().end_of_device_area().0; 1009 1010 // Start each PCI segment range on a 4GiB boundary 1011 let pci_segment_size = (end_of_device_area - start_of_device_area + 1) 1012 / ((4 << 30) * num_pci_segments as u64) 1013 * (4 << 30); 1014 1015 let mut pci_mmio_allocators = vec![]; 1016 for i in 0..num_pci_segments as u64 { 1017 let mmio_start = start_of_device_area + i * pci_segment_size; 1018 let allocator = Arc::new(Mutex::new( 1019 AddressAllocator::new(GuestAddress(mmio_start), pci_segment_size).unwrap(), 1020 )); 1021 pci_mmio_allocators.push(allocator) 1022 } 1023 1024 let address_manager = Arc::new(AddressManager { 1025 allocator: memory_manager.lock().unwrap().allocator(), 1026 #[cfg(target_arch = "x86_64")] 1027 io_bus, 1028 mmio_bus, 1029 vm: vm.clone(), 1030 device_tree: Arc::clone(&device_tree), 1031 pci_mmio_allocators, 1032 }); 1033 1034 // First we create the MSI interrupt manager, the legacy one is created 1035 // later, after the IOAPIC device creation. 1036 // The reason we create the MSI one first is because the IOAPIC needs it, 1037 // and then the legacy interrupt manager needs an IOAPIC. So we're 1038 // handling a linear dependency chain: 1039 // msi_interrupt_manager <- IOAPIC <- legacy_interrupt_manager. 1040 let msi_interrupt_manager: Arc<dyn InterruptManager<GroupConfig = MsiIrqGroupConfig>> = 1041 Arc::new(MsiInterruptManager::new( 1042 Arc::clone(&address_manager.allocator), 1043 vm, 1044 )); 1045 1046 let acpi_address = address_manager 1047 .allocator 1048 .lock() 1049 .unwrap() 1050 .allocate_platform_mmio_addresses(None, DEVICE_MANAGER_ACPI_SIZE as u64, None) 1051 .ok_or(DeviceManagerError::AllocateIoPort)?; 1052 1053 let mut pci_irq_slots = [0; 32]; 1054 PciSegment::reserve_legacy_interrupts_for_pci_devices( 1055 &address_manager, 1056 &mut pci_irq_slots, 1057 )?; 1058 1059 let mut pci_segments = vec![PciSegment::new_default_segment( 1060 &address_manager, 1061 Arc::clone(&address_manager.pci_mmio_allocators[0]), 1062 &pci_irq_slots, 1063 )?]; 1064 1065 for i in 1..num_pci_segments as usize { 1066 pci_segments.push(PciSegment::new( 1067 i as u16, 1068 numa_node_id_from_pci_segment_id(&numa_nodes, i as u16), 1069 &address_manager, 1070 Arc::clone(&address_manager.pci_mmio_allocators[i]), 1071 &pci_irq_slots, 1072 )?); 1073 } 1074 1075 if dynamic { 1076 let acpi_address = address_manager 1077 .allocator 1078 .lock() 1079 .unwrap() 1080 .allocate_platform_mmio_addresses(None, CPU_MANAGER_ACPI_SIZE as u64, None) 1081 .ok_or(DeviceManagerError::AllocateMmioAddress)?; 1082 1083 address_manager 1084 .mmio_bus 1085 .insert( 1086 cpu_manager.clone(), 1087 acpi_address.0, 1088 CPU_MANAGER_ACPI_SIZE as u64, 1089 ) 1090 .map_err(DeviceManagerError::BusError)?; 1091 1092 cpu_manager.lock().unwrap().set_acpi_address(acpi_address); 1093 } 1094 1095 let device_manager = DeviceManager { 1096 hypervisor_type, 1097 address_manager: Arc::clone(&address_manager), 1098 console: Arc::new(Console::default()), 1099 interrupt_controller: None, 1100 #[cfg(target_arch = "aarch64")] 1101 cmdline_additions: Vec::new(), 1102 ged_notification_device: None, 1103 config, 1104 memory_manager, 1105 cpu_manager, 1106 virtio_devices: Vec::new(), 1107 bus_devices: Vec::new(), 1108 device_id_cnt, 1109 msi_interrupt_manager, 1110 legacy_interrupt_manager: None, 1111 passthrough_device: None, 1112 vfio_container: None, 1113 iommu_device: None, 1114 iommu_mapping: None, 1115 iommu_attached_devices: None, 1116 pci_segments, 1117 device_tree, 1118 exit_evt, 1119 reset_evt, 1120 #[cfg(target_arch = "aarch64")] 1121 id_to_dev_info: HashMap::new(), 1122 seccomp_action, 1123 numa_nodes, 1124 balloon: None, 1125 activate_evt: activate_evt 1126 .try_clone() 1127 .map_err(DeviceManagerError::EventFd)?, 1128 acpi_address, 1129 selected_segment: 0, 1130 serial_pty: None, 1131 serial_manager: None, 1132 console_pty: None, 1133 console_resize_pipe: None, 1134 original_termios_opt: Arc::new(Mutex::new(None)), 1135 virtio_mem_devices: Vec::new(), 1136 #[cfg(target_arch = "aarch64")] 1137 gpio_device: None, 1138 pvpanic_device: None, 1139 force_iommu, 1140 io_uring_supported: None, 1141 boot_id_list, 1142 timestamp, 1143 pending_activations: Arc::new(Mutex::new(Vec::default())), 1144 acpi_platform_addresses: AcpiPlatformAddresses::default(), 1145 snapshot, 1146 }; 1147 1148 let device_manager = Arc::new(Mutex::new(device_manager)); 1149 1150 address_manager 1151 .mmio_bus 1152 .insert( 1153 Arc::clone(&device_manager) as Arc<Mutex<dyn BusDevice>>, 1154 acpi_address.0, 1155 DEVICE_MANAGER_ACPI_SIZE as u64, 1156 ) 1157 .map_err(DeviceManagerError::BusError)?; 1158 1159 Ok(device_manager) 1160 } 1161 1162 pub fn serial_pty(&self) -> Option<PtyPair> { 1163 self.serial_pty 1164 .as_ref() 1165 .map(|pty| pty.lock().unwrap().clone()) 1166 } 1167 1168 pub fn console_pty(&self) -> Option<PtyPair> { 1169 self.console_pty 1170 .as_ref() 1171 .map(|pty| pty.lock().unwrap().clone()) 1172 } 1173 1174 pub fn console_resize_pipe(&self) -> Option<Arc<File>> { 1175 self.console_resize_pipe.as_ref().map(Arc::clone) 1176 } 1177 1178 pub fn create_devices( 1179 &mut self, 1180 serial_pty: Option<PtyPair>, 1181 console_pty: Option<PtyPair>, 1182 console_resize_pipe: Option<File>, 1183 original_termios_opt: Arc<Mutex<Option<termios>>>, 1184 ) -> DeviceManagerResult<()> { 1185 trace_scoped!("create_devices"); 1186 1187 let mut virtio_devices: Vec<MetaVirtioDevice> = Vec::new(); 1188 1189 let interrupt_controller = self.add_interrupt_controller()?; 1190 1191 self.cpu_manager 1192 .lock() 1193 .unwrap() 1194 .set_interrupt_controller(interrupt_controller.clone()); 1195 1196 // Now we can create the legacy interrupt manager, which needs the freshly 1197 // formed IOAPIC device. 1198 let legacy_interrupt_manager: Arc< 1199 dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>, 1200 > = Arc::new(LegacyUserspaceInterruptManager::new(Arc::clone( 1201 &interrupt_controller, 1202 ))); 1203 1204 { 1205 if let Some(acpi_address) = self.memory_manager.lock().unwrap().acpi_address() { 1206 self.address_manager 1207 .mmio_bus 1208 .insert( 1209 Arc::clone(&self.memory_manager) as Arc<Mutex<dyn BusDevice>>, 1210 acpi_address.0, 1211 MEMORY_MANAGER_ACPI_SIZE as u64, 1212 ) 1213 .map_err(DeviceManagerError::BusError)?; 1214 } 1215 } 1216 1217 #[cfg(target_arch = "x86_64")] 1218 self.add_legacy_devices( 1219 self.reset_evt 1220 .try_clone() 1221 .map_err(DeviceManagerError::EventFd)?, 1222 )?; 1223 1224 #[cfg(target_arch = "aarch64")] 1225 self.add_legacy_devices(&legacy_interrupt_manager)?; 1226 1227 { 1228 self.ged_notification_device = self.add_acpi_devices( 1229 &legacy_interrupt_manager, 1230 self.reset_evt 1231 .try_clone() 1232 .map_err(DeviceManagerError::EventFd)?, 1233 self.exit_evt 1234 .try_clone() 1235 .map_err(DeviceManagerError::EventFd)?, 1236 )?; 1237 } 1238 1239 self.original_termios_opt = original_termios_opt; 1240 1241 self.console = self.add_console_device( 1242 &legacy_interrupt_manager, 1243 &mut virtio_devices, 1244 serial_pty, 1245 console_pty, 1246 console_resize_pipe, 1247 )?; 1248 1249 if let Some(tpm) = self.config.clone().lock().unwrap().tpm.as_ref() { 1250 let tpm_dev = self.add_tpm_device(tpm.socket.clone())?; 1251 self.bus_devices 1252 .push(Arc::clone(&tpm_dev) as Arc<Mutex<dyn BusDevice>>) 1253 } 1254 self.legacy_interrupt_manager = Some(legacy_interrupt_manager); 1255 1256 virtio_devices.append(&mut self.make_virtio_devices()?); 1257 1258 self.add_pci_devices(virtio_devices.clone())?; 1259 1260 self.virtio_devices = virtio_devices; 1261 1262 if self.config.clone().lock().unwrap().pvpanic { 1263 self.pvpanic_device = self.add_pvpanic_device()?; 1264 } 1265 1266 Ok(()) 1267 } 1268 1269 fn state(&self) -> DeviceManagerState { 1270 DeviceManagerState { 1271 device_tree: self.device_tree.lock().unwrap().clone(), 1272 device_id_cnt: self.device_id_cnt, 1273 } 1274 } 1275 1276 fn get_msi_iova_space(&mut self) -> (u64, u64) { 1277 #[cfg(target_arch = "aarch64")] 1278 { 1279 let vcpus = self.config.lock().unwrap().cpus.boot_vcpus; 1280 let vgic_config = gic::Gic::create_default_config(vcpus.into()); 1281 ( 1282 vgic_config.msi_addr, 1283 vgic_config.msi_addr + vgic_config.msi_size - 1, 1284 ) 1285 } 1286 #[cfg(target_arch = "x86_64")] 1287 (0xfee0_0000, 0xfeef_ffff) 1288 } 1289 1290 #[cfg(target_arch = "aarch64")] 1291 /// Gets the information of the devices registered up to some point in time. 1292 pub fn get_device_info(&self) -> &HashMap<(DeviceType, String), MmioDeviceInfo> { 1293 &self.id_to_dev_info 1294 } 1295 1296 #[allow(unused_variables)] 1297 fn add_pci_devices( 1298 &mut self, 1299 virtio_devices: Vec<MetaVirtioDevice>, 1300 ) -> DeviceManagerResult<()> { 1301 let iommu_id = String::from(IOMMU_DEVICE_NAME); 1302 1303 let iommu_device = if self.config.lock().unwrap().iommu { 1304 let (device, mapping) = virtio_devices::Iommu::new( 1305 iommu_id.clone(), 1306 self.seccomp_action.clone(), 1307 self.exit_evt 1308 .try_clone() 1309 .map_err(DeviceManagerError::EventFd)?, 1310 self.get_msi_iova_space(), 1311 versioned_state_from_id(self.snapshot.as_ref(), iommu_id.as_str()) 1312 .map_err(DeviceManagerError::RestoreGetState)?, 1313 ) 1314 .map_err(DeviceManagerError::CreateVirtioIommu)?; 1315 let device = Arc::new(Mutex::new(device)); 1316 self.iommu_device = Some(Arc::clone(&device)); 1317 self.iommu_mapping = Some(mapping); 1318 1319 // Fill the device tree with a new node. In case of restore, we 1320 // know there is nothing to do, so we can simply override the 1321 // existing entry. 1322 self.device_tree 1323 .lock() 1324 .unwrap() 1325 .insert(iommu_id.clone(), device_node!(iommu_id, device)); 1326 1327 Some(device) 1328 } else { 1329 None 1330 }; 1331 1332 let mut iommu_attached_devices = Vec::new(); 1333 { 1334 for handle in virtio_devices { 1335 let mapping: Option<Arc<IommuMapping>> = if handle.iommu { 1336 self.iommu_mapping.clone() 1337 } else { 1338 None 1339 }; 1340 1341 let dev_id = self.add_virtio_pci_device( 1342 handle.virtio_device, 1343 &mapping, 1344 handle.id, 1345 handle.pci_segment, 1346 handle.dma_handler, 1347 )?; 1348 1349 if handle.iommu { 1350 iommu_attached_devices.push(dev_id); 1351 } 1352 } 1353 1354 let mut vfio_iommu_device_ids = self.add_vfio_devices()?; 1355 iommu_attached_devices.append(&mut vfio_iommu_device_ids); 1356 1357 let mut vfio_user_iommu_device_ids = self.add_user_devices()?; 1358 iommu_attached_devices.append(&mut vfio_user_iommu_device_ids); 1359 1360 // Add all devices from forced iommu segments 1361 if let Some(platform_config) = self.config.lock().unwrap().platform.as_ref() { 1362 if let Some(iommu_segments) = platform_config.iommu_segments.as_ref() { 1363 for segment in iommu_segments { 1364 for device in 0..32 { 1365 let bdf = PciBdf::new(*segment, 0, device, 0); 1366 if !iommu_attached_devices.contains(&bdf) { 1367 iommu_attached_devices.push(bdf); 1368 } 1369 } 1370 } 1371 } 1372 } 1373 1374 if let Some(iommu_device) = iommu_device { 1375 let dev_id = self.add_virtio_pci_device(iommu_device, &None, iommu_id, 0, None)?; 1376 self.iommu_attached_devices = Some((dev_id, iommu_attached_devices)); 1377 } 1378 } 1379 1380 for segment in &self.pci_segments { 1381 #[cfg(target_arch = "x86_64")] 1382 if let Some(pci_config_io) = segment.pci_config_io.as_ref() { 1383 self.bus_devices 1384 .push(Arc::clone(pci_config_io) as Arc<Mutex<dyn BusDevice>>); 1385 } 1386 1387 self.bus_devices 1388 .push(Arc::clone(&segment.pci_config_mmio) as Arc<Mutex<dyn BusDevice>>); 1389 } 1390 1391 Ok(()) 1392 } 1393 1394 #[cfg(target_arch = "aarch64")] 1395 fn add_interrupt_controller( 1396 &mut self, 1397 ) -> DeviceManagerResult<Arc<Mutex<dyn InterruptController>>> { 1398 let interrupt_controller: Arc<Mutex<gic::Gic>> = Arc::new(Mutex::new( 1399 gic::Gic::new( 1400 self.config.lock().unwrap().cpus.boot_vcpus, 1401 Arc::clone(&self.msi_interrupt_manager), 1402 self.address_manager.vm.clone(), 1403 ) 1404 .map_err(DeviceManagerError::CreateInterruptController)?, 1405 )); 1406 1407 self.interrupt_controller = Some(interrupt_controller.clone()); 1408 1409 // Restore the vGic if this is in the process of restoration 1410 let id = String::from(gic::GIC_SNAPSHOT_ID); 1411 if let Some(vgic_snapshot) = snapshot_from_id(self.snapshot.as_ref(), &id) { 1412 // PMU support is optional. Nothing should be impacted if the PMU initialization failed. 1413 if self 1414 .cpu_manager 1415 .lock() 1416 .unwrap() 1417 .init_pmu(arch::aarch64::fdt::AARCH64_PMU_IRQ + 16) 1418 .is_err() 1419 { 1420 info!("Failed to initialize PMU"); 1421 } 1422 1423 let vgic_state = vgic_snapshot 1424 .to_state() 1425 .map_err(DeviceManagerError::RestoreGetState)?; 1426 let saved_vcpu_states = self.cpu_manager.lock().unwrap().get_saved_states(); 1427 interrupt_controller 1428 .lock() 1429 .unwrap() 1430 .restore_vgic(vgic_state, &saved_vcpu_states) 1431 .unwrap(); 1432 } 1433 1434 self.device_tree 1435 .lock() 1436 .unwrap() 1437 .insert(id.clone(), device_node!(id, interrupt_controller)); 1438 1439 Ok(interrupt_controller) 1440 } 1441 1442 #[cfg(target_arch = "aarch64")] 1443 pub fn get_interrupt_controller(&mut self) -> Option<&Arc<Mutex<gic::Gic>>> { 1444 self.interrupt_controller.as_ref() 1445 } 1446 1447 #[cfg(target_arch = "x86_64")] 1448 fn add_interrupt_controller( 1449 &mut self, 1450 ) -> DeviceManagerResult<Arc<Mutex<dyn InterruptController>>> { 1451 let id = String::from(IOAPIC_DEVICE_NAME); 1452 1453 // Create IOAPIC 1454 let interrupt_controller = Arc::new(Mutex::new( 1455 ioapic::Ioapic::new( 1456 id.clone(), 1457 APIC_START, 1458 Arc::clone(&self.msi_interrupt_manager), 1459 versioned_state_from_id(self.snapshot.as_ref(), id.as_str()) 1460 .map_err(DeviceManagerError::RestoreGetState)?, 1461 ) 1462 .map_err(DeviceManagerError::CreateInterruptController)?, 1463 )); 1464 1465 self.interrupt_controller = Some(interrupt_controller.clone()); 1466 1467 self.address_manager 1468 .mmio_bus 1469 .insert(interrupt_controller.clone(), IOAPIC_START.0, IOAPIC_SIZE) 1470 .map_err(DeviceManagerError::BusError)?; 1471 1472 self.bus_devices 1473 .push(Arc::clone(&interrupt_controller) as Arc<Mutex<dyn BusDevice>>); 1474 1475 // Fill the device tree with a new node. In case of restore, we 1476 // know there is nothing to do, so we can simply override the 1477 // existing entry. 1478 self.device_tree 1479 .lock() 1480 .unwrap() 1481 .insert(id.clone(), device_node!(id, interrupt_controller)); 1482 1483 Ok(interrupt_controller) 1484 } 1485 1486 fn add_acpi_devices( 1487 &mut self, 1488 interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>, 1489 reset_evt: EventFd, 1490 exit_evt: EventFd, 1491 ) -> DeviceManagerResult<Option<Arc<Mutex<devices::AcpiGedDevice>>>> { 1492 let vcpus_kill_signalled = self 1493 .cpu_manager 1494 .lock() 1495 .unwrap() 1496 .vcpus_kill_signalled() 1497 .clone(); 1498 let shutdown_device = Arc::new(Mutex::new(devices::AcpiShutdownDevice::new( 1499 exit_evt, 1500 reset_evt, 1501 vcpus_kill_signalled, 1502 ))); 1503 1504 self.bus_devices 1505 .push(Arc::clone(&shutdown_device) as Arc<Mutex<dyn BusDevice>>); 1506 1507 #[cfg(target_arch = "x86_64")] 1508 { 1509 let shutdown_pio_address: u16 = 0x600; 1510 1511 self.address_manager 1512 .allocator 1513 .lock() 1514 .unwrap() 1515 .allocate_io_addresses(Some(GuestAddress(shutdown_pio_address.into())), 0x8, None) 1516 .ok_or(DeviceManagerError::AllocateIoPort)?; 1517 1518 self.address_manager 1519 .io_bus 1520 .insert(shutdown_device, shutdown_pio_address.into(), 0x4) 1521 .map_err(DeviceManagerError::BusError)?; 1522 1523 self.acpi_platform_addresses.sleep_control_reg_address = 1524 Some(GenericAddress::io_port_address::<u8>(shutdown_pio_address)); 1525 self.acpi_platform_addresses.sleep_status_reg_address = 1526 Some(GenericAddress::io_port_address::<u8>(shutdown_pio_address)); 1527 self.acpi_platform_addresses.reset_reg_address = 1528 Some(GenericAddress::io_port_address::<u8>(shutdown_pio_address)); 1529 } 1530 1531 let ged_irq = self 1532 .address_manager 1533 .allocator 1534 .lock() 1535 .unwrap() 1536 .allocate_irq() 1537 .unwrap(); 1538 let interrupt_group = interrupt_manager 1539 .create_group(LegacyIrqGroupConfig { 1540 irq: ged_irq as InterruptIndex, 1541 }) 1542 .map_err(DeviceManagerError::CreateInterruptGroup)?; 1543 let ged_address = self 1544 .address_manager 1545 .allocator 1546 .lock() 1547 .unwrap() 1548 .allocate_platform_mmio_addresses( 1549 None, 1550 devices::acpi::GED_DEVICE_ACPI_SIZE as u64, 1551 None, 1552 ) 1553 .ok_or(DeviceManagerError::AllocateMmioAddress)?; 1554 let ged_device = Arc::new(Mutex::new(devices::AcpiGedDevice::new( 1555 interrupt_group, 1556 ged_irq, 1557 ged_address, 1558 ))); 1559 self.address_manager 1560 .mmio_bus 1561 .insert( 1562 ged_device.clone(), 1563 ged_address.0, 1564 devices::acpi::GED_DEVICE_ACPI_SIZE as u64, 1565 ) 1566 .map_err(DeviceManagerError::BusError)?; 1567 self.bus_devices 1568 .push(Arc::clone(&ged_device) as Arc<Mutex<dyn BusDevice>>); 1569 1570 let pm_timer_device = Arc::new(Mutex::new(devices::AcpiPmTimerDevice::new())); 1571 1572 self.bus_devices 1573 .push(Arc::clone(&pm_timer_device) as Arc<Mutex<dyn BusDevice>>); 1574 1575 #[cfg(target_arch = "x86_64")] 1576 { 1577 let pm_timer_pio_address: u16 = 0x608; 1578 1579 self.address_manager 1580 .allocator 1581 .lock() 1582 .unwrap() 1583 .allocate_io_addresses(Some(GuestAddress(pm_timer_pio_address.into())), 0x4, None) 1584 .ok_or(DeviceManagerError::AllocateIoPort)?; 1585 1586 self.address_manager 1587 .io_bus 1588 .insert(pm_timer_device, pm_timer_pio_address.into(), 0x4) 1589 .map_err(DeviceManagerError::BusError)?; 1590 1591 self.acpi_platform_addresses.pm_timer_address = 1592 Some(GenericAddress::io_port_address::<u32>(pm_timer_pio_address)); 1593 } 1594 1595 Ok(Some(ged_device)) 1596 } 1597 1598 #[cfg(target_arch = "x86_64")] 1599 fn add_legacy_devices(&mut self, reset_evt: EventFd) -> DeviceManagerResult<()> { 1600 let vcpus_kill_signalled = self 1601 .cpu_manager 1602 .lock() 1603 .unwrap() 1604 .vcpus_kill_signalled() 1605 .clone(); 1606 // Add a shutdown device (i8042) 1607 let i8042 = Arc::new(Mutex::new(devices::legacy::I8042Device::new( 1608 reset_evt.try_clone().unwrap(), 1609 vcpus_kill_signalled.clone(), 1610 ))); 1611 1612 self.bus_devices 1613 .push(Arc::clone(&i8042) as Arc<Mutex<dyn BusDevice>>); 1614 1615 self.address_manager 1616 .io_bus 1617 .insert(i8042, 0x61, 0x4) 1618 .map_err(DeviceManagerError::BusError)?; 1619 { 1620 // Add a CMOS emulated device 1621 let mem_size = self 1622 .memory_manager 1623 .lock() 1624 .unwrap() 1625 .guest_memory() 1626 .memory() 1627 .last_addr() 1628 .0 1629 + 1; 1630 let mem_below_4g = std::cmp::min(arch::layout::MEM_32BIT_RESERVED_START.0, mem_size); 1631 let mem_above_4g = mem_size.saturating_sub(arch::layout::RAM_64BIT_START.0); 1632 1633 let cmos = Arc::new(Mutex::new(devices::legacy::Cmos::new( 1634 mem_below_4g, 1635 mem_above_4g, 1636 reset_evt, 1637 Some(vcpus_kill_signalled), 1638 ))); 1639 1640 self.bus_devices 1641 .push(Arc::clone(&cmos) as Arc<Mutex<dyn BusDevice>>); 1642 1643 self.address_manager 1644 .io_bus 1645 .insert(cmos, 0x70, 0x2) 1646 .map_err(DeviceManagerError::BusError)?; 1647 1648 let fwdebug = Arc::new(Mutex::new(devices::legacy::FwDebugDevice::new())); 1649 1650 self.bus_devices 1651 .push(Arc::clone(&fwdebug) as Arc<Mutex<dyn BusDevice>>); 1652 1653 self.address_manager 1654 .io_bus 1655 .insert(fwdebug, 0x402, 0x1) 1656 .map_err(DeviceManagerError::BusError)?; 1657 } 1658 1659 // 0x80 debug port 1660 let debug_port = Arc::new(Mutex::new(devices::legacy::DebugPort::new(self.timestamp))); 1661 self.bus_devices 1662 .push(Arc::clone(&debug_port) as Arc<Mutex<dyn BusDevice>>); 1663 self.address_manager 1664 .io_bus 1665 .insert(debug_port, 0x80, 0x1) 1666 .map_err(DeviceManagerError::BusError)?; 1667 1668 Ok(()) 1669 } 1670 1671 #[cfg(target_arch = "aarch64")] 1672 fn add_legacy_devices( 1673 &mut self, 1674 interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>, 1675 ) -> DeviceManagerResult<()> { 1676 // Add a RTC device 1677 let rtc_irq = self 1678 .address_manager 1679 .allocator 1680 .lock() 1681 .unwrap() 1682 .allocate_irq() 1683 .unwrap(); 1684 1685 let interrupt_group = interrupt_manager 1686 .create_group(LegacyIrqGroupConfig { 1687 irq: rtc_irq as InterruptIndex, 1688 }) 1689 .map_err(DeviceManagerError::CreateInterruptGroup)?; 1690 1691 let rtc_device = Arc::new(Mutex::new(devices::legacy::Rtc::new(interrupt_group))); 1692 1693 self.bus_devices 1694 .push(Arc::clone(&rtc_device) as Arc<Mutex<dyn BusDevice>>); 1695 1696 let addr = arch::layout::LEGACY_RTC_MAPPED_IO_START; 1697 1698 self.address_manager 1699 .mmio_bus 1700 .insert(rtc_device, addr.0, MMIO_LEN) 1701 .map_err(DeviceManagerError::BusError)?; 1702 1703 self.id_to_dev_info.insert( 1704 (DeviceType::Rtc, "rtc".to_string()), 1705 MmioDeviceInfo { 1706 addr: addr.0, 1707 len: MMIO_LEN, 1708 irq: rtc_irq, 1709 }, 1710 ); 1711 1712 // Add a GPIO device 1713 let id = String::from(GPIO_DEVICE_NAME); 1714 let gpio_irq = self 1715 .address_manager 1716 .allocator 1717 .lock() 1718 .unwrap() 1719 .allocate_irq() 1720 .unwrap(); 1721 1722 let interrupt_group = interrupt_manager 1723 .create_group(LegacyIrqGroupConfig { 1724 irq: gpio_irq as InterruptIndex, 1725 }) 1726 .map_err(DeviceManagerError::CreateInterruptGroup)?; 1727 1728 let gpio_device = Arc::new(Mutex::new(devices::legacy::Gpio::new( 1729 id.clone(), 1730 interrupt_group, 1731 versioned_state_from_id(self.snapshot.as_ref(), id.as_str()) 1732 .map_err(DeviceManagerError::RestoreGetState)?, 1733 ))); 1734 1735 self.bus_devices 1736 .push(Arc::clone(&gpio_device) as Arc<Mutex<dyn BusDevice>>); 1737 1738 let addr = arch::layout::LEGACY_GPIO_MAPPED_IO_START; 1739 1740 self.address_manager 1741 .mmio_bus 1742 .insert(gpio_device.clone(), addr.0, MMIO_LEN) 1743 .map_err(DeviceManagerError::BusError)?; 1744 1745 self.gpio_device = Some(gpio_device.clone()); 1746 1747 self.id_to_dev_info.insert( 1748 (DeviceType::Gpio, "gpio".to_string()), 1749 MmioDeviceInfo { 1750 addr: addr.0, 1751 len: MMIO_LEN, 1752 irq: gpio_irq, 1753 }, 1754 ); 1755 1756 self.device_tree 1757 .lock() 1758 .unwrap() 1759 .insert(id.clone(), device_node!(id, gpio_device)); 1760 1761 Ok(()) 1762 } 1763 1764 #[cfg(target_arch = "x86_64")] 1765 fn add_serial_device( 1766 &mut self, 1767 interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>, 1768 serial_writer: Option<Box<dyn io::Write + Send>>, 1769 ) -> DeviceManagerResult<Arc<Mutex<Serial>>> { 1770 // Serial is tied to IRQ #4 1771 let serial_irq = 4; 1772 1773 let id = String::from(SERIAL_DEVICE_NAME); 1774 1775 let interrupt_group = interrupt_manager 1776 .create_group(LegacyIrqGroupConfig { 1777 irq: serial_irq as InterruptIndex, 1778 }) 1779 .map_err(DeviceManagerError::CreateInterruptGroup)?; 1780 1781 let serial = Arc::new(Mutex::new(Serial::new( 1782 id.clone(), 1783 interrupt_group, 1784 serial_writer, 1785 versioned_state_from_id(self.snapshot.as_ref(), id.as_str()) 1786 .map_err(DeviceManagerError::RestoreGetState)?, 1787 ))); 1788 1789 self.bus_devices 1790 .push(Arc::clone(&serial) as Arc<Mutex<dyn BusDevice>>); 1791 1792 self.address_manager 1793 .allocator 1794 .lock() 1795 .unwrap() 1796 .allocate_io_addresses(Some(GuestAddress(0x3f8)), 0x8, None) 1797 .ok_or(DeviceManagerError::AllocateIoPort)?; 1798 1799 self.address_manager 1800 .io_bus 1801 .insert(serial.clone(), 0x3f8, 0x8) 1802 .map_err(DeviceManagerError::BusError)?; 1803 1804 // Fill the device tree with a new node. In case of restore, we 1805 // know there is nothing to do, so we can simply override the 1806 // existing entry. 1807 self.device_tree 1808 .lock() 1809 .unwrap() 1810 .insert(id.clone(), device_node!(id, serial)); 1811 1812 Ok(serial) 1813 } 1814 1815 #[cfg(target_arch = "aarch64")] 1816 fn add_serial_device( 1817 &mut self, 1818 interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>, 1819 serial_writer: Option<Box<dyn io::Write + Send>>, 1820 ) -> DeviceManagerResult<Arc<Mutex<Pl011>>> { 1821 let id = String::from(SERIAL_DEVICE_NAME); 1822 1823 let serial_irq = self 1824 .address_manager 1825 .allocator 1826 .lock() 1827 .unwrap() 1828 .allocate_irq() 1829 .unwrap(); 1830 1831 let interrupt_group = interrupt_manager 1832 .create_group(LegacyIrqGroupConfig { 1833 irq: serial_irq as InterruptIndex, 1834 }) 1835 .map_err(DeviceManagerError::CreateInterruptGroup)?; 1836 1837 let serial = Arc::new(Mutex::new(devices::legacy::Pl011::new( 1838 id.clone(), 1839 interrupt_group, 1840 serial_writer, 1841 self.timestamp, 1842 versioned_state_from_id(self.snapshot.as_ref(), id.as_str()) 1843 .map_err(DeviceManagerError::RestoreGetState)?, 1844 ))); 1845 1846 self.bus_devices 1847 .push(Arc::clone(&serial) as Arc<Mutex<dyn BusDevice>>); 1848 1849 let addr = arch::layout::LEGACY_SERIAL_MAPPED_IO_START; 1850 1851 self.address_manager 1852 .mmio_bus 1853 .insert(serial.clone(), addr.0, MMIO_LEN) 1854 .map_err(DeviceManagerError::BusError)?; 1855 1856 self.id_to_dev_info.insert( 1857 (DeviceType::Serial, DeviceType::Serial.to_string()), 1858 MmioDeviceInfo { 1859 addr: addr.0, 1860 len: MMIO_LEN, 1861 irq: serial_irq, 1862 }, 1863 ); 1864 1865 self.cmdline_additions 1866 .push(format!("earlycon=pl011,mmio,0x{:08x}", addr.0)); 1867 1868 // Fill the device tree with a new node. In case of restore, we 1869 // know there is nothing to do, so we can simply override the 1870 // existing entry. 1871 self.device_tree 1872 .lock() 1873 .unwrap() 1874 .insert(id.clone(), device_node!(id, serial)); 1875 1876 Ok(serial) 1877 } 1878 1879 fn modify_mode<F: FnOnce(&mut termios)>( 1880 &mut self, 1881 fd: RawFd, 1882 f: F, 1883 ) -> vmm_sys_util::errno::Result<()> { 1884 // SAFETY: safe because we check the return value of isatty. 1885 if unsafe { isatty(fd) } != 1 { 1886 return Ok(()); 1887 } 1888 1889 // SAFETY: The following pair are safe because termios gets totally overwritten by tcgetattr 1890 // and we check the return result. 1891 let mut termios: termios = unsafe { zeroed() }; 1892 // SAFETY: see above 1893 let ret = unsafe { tcgetattr(fd, &mut termios as *mut _) }; 1894 if ret < 0 { 1895 return vmm_sys_util::errno::errno_result(); 1896 } 1897 let mut original_termios_opt = self.original_termios_opt.lock().unwrap(); 1898 if original_termios_opt.is_none() { 1899 *original_termios_opt = Some(termios); 1900 } 1901 f(&mut termios); 1902 // SAFETY: Safe because the syscall will only read the extent of termios and we check 1903 // the return result. 1904 let ret = unsafe { tcsetattr(fd, TCSANOW, &termios as *const _) }; 1905 if ret < 0 { 1906 return vmm_sys_util::errno::errno_result(); 1907 } 1908 1909 Ok(()) 1910 } 1911 1912 fn set_raw_mode(&mut self, f: &dyn AsRawFd) -> vmm_sys_util::errno::Result<()> { 1913 // SAFETY: FFI call. Variable t is guaranteed to be a valid termios from modify_mode. 1914 self.modify_mode(f.as_raw_fd(), |t| unsafe { cfmakeraw(t) }) 1915 } 1916 1917 fn listen_for_sigwinch_on_tty(&mut self, pty_sub: File) -> std::io::Result<()> { 1918 let seccomp_filter = get_seccomp_filter( 1919 &self.seccomp_action, 1920 Thread::PtyForeground, 1921 self.hypervisor_type, 1922 ) 1923 .unwrap(); 1924 1925 self.console_resize_pipe = 1926 Some(Arc::new(start_sigwinch_listener(seccomp_filter, pty_sub)?)); 1927 1928 Ok(()) 1929 } 1930 1931 fn add_virtio_console_device( 1932 &mut self, 1933 virtio_devices: &mut Vec<MetaVirtioDevice>, 1934 console_pty: Option<PtyPair>, 1935 resize_pipe: Option<File>, 1936 ) -> DeviceManagerResult<Option<Arc<virtio_devices::ConsoleResizer>>> { 1937 let console_config = self.config.lock().unwrap().console.clone(); 1938 let endpoint = match console_config.mode { 1939 ConsoleOutputMode::File => { 1940 let file = File::create(console_config.file.as_ref().unwrap()) 1941 .map_err(DeviceManagerError::ConsoleOutputFileOpen)?; 1942 Endpoint::File(file) 1943 } 1944 ConsoleOutputMode::Pty => { 1945 if let Some(pty) = console_pty { 1946 self.config.lock().unwrap().console.file = Some(pty.path.clone()); 1947 let file = pty.main.try_clone().unwrap(); 1948 self.console_pty = Some(Arc::new(Mutex::new(pty))); 1949 self.console_resize_pipe = resize_pipe.map(Arc::new); 1950 Endpoint::PtyPair(file.try_clone().unwrap(), file) 1951 } else { 1952 let (main, sub, path) = 1953 create_pty().map_err(DeviceManagerError::ConsolePtyOpen)?; 1954 self.set_raw_mode(&sub) 1955 .map_err(DeviceManagerError::SetPtyRaw)?; 1956 self.config.lock().unwrap().console.file = Some(path.clone()); 1957 let file = main.try_clone().unwrap(); 1958 assert!(resize_pipe.is_none()); 1959 self.listen_for_sigwinch_on_tty(sub).unwrap(); 1960 self.console_pty = Some(Arc::new(Mutex::new(PtyPair { main, path }))); 1961 Endpoint::PtyPair(file.try_clone().unwrap(), file) 1962 } 1963 } 1964 ConsoleOutputMode::Tty => { 1965 // Duplicating the file descriptors like this is needed as otherwise 1966 // they will be closed on a reboot and the numbers reused 1967 1968 // SAFETY: FFI call to dup. Trivially safe. 1969 let stdout = unsafe { libc::dup(libc::STDOUT_FILENO) }; 1970 if stdout == -1 { 1971 return vmm_sys_util::errno::errno_result().map_err(DeviceManagerError::DupFd); 1972 } 1973 // SAFETY: stdout is valid and owned solely by us. 1974 let stdout = unsafe { File::from_raw_fd(stdout) }; 1975 1976 // Make sure stdout is in raw mode, if it's a terminal. 1977 let _ = self.set_raw_mode(&stdout); 1978 1979 // SAFETY: FFI call. Trivially safe. 1980 if unsafe { libc::isatty(libc::STDOUT_FILENO) } == 1 { 1981 self.listen_for_sigwinch_on_tty(stdout.try_clone().unwrap()) 1982 .unwrap(); 1983 } 1984 1985 // If an interactive TTY then we can accept input 1986 // SAFETY: FFI call. Trivially safe. 1987 if unsafe { libc::isatty(libc::STDIN_FILENO) == 1 } { 1988 // SAFETY: FFI call to dup. Trivially safe. 1989 let stdin = unsafe { libc::dup(libc::STDIN_FILENO) }; 1990 if stdin == -1 { 1991 return vmm_sys_util::errno::errno_result() 1992 .map_err(DeviceManagerError::DupFd); 1993 } 1994 // SAFETY: stdin is valid and owned solely by us. 1995 let stdin = unsafe { File::from_raw_fd(stdin) }; 1996 1997 Endpoint::FilePair(stdout, stdin) 1998 } else { 1999 Endpoint::File(stdout) 2000 } 2001 } 2002 ConsoleOutputMode::Socket => { 2003 return Err(DeviceManagerError::NoSocketOptionSupportForConsoleDevice); 2004 } 2005 ConsoleOutputMode::Null => Endpoint::Null, 2006 ConsoleOutputMode::Off => return Ok(None), 2007 }; 2008 let id = String::from(CONSOLE_DEVICE_NAME); 2009 2010 let (virtio_console_device, console_resizer) = virtio_devices::Console::new( 2011 id.clone(), 2012 endpoint, 2013 self.console_resize_pipe 2014 .as_ref() 2015 .map(|p| p.try_clone().unwrap()), 2016 self.force_iommu | console_config.iommu, 2017 self.seccomp_action.clone(), 2018 self.exit_evt 2019 .try_clone() 2020 .map_err(DeviceManagerError::EventFd)?, 2021 versioned_state_from_id(self.snapshot.as_ref(), id.as_str()) 2022 .map_err(DeviceManagerError::RestoreGetState)?, 2023 ) 2024 .map_err(DeviceManagerError::CreateVirtioConsole)?; 2025 let virtio_console_device = Arc::new(Mutex::new(virtio_console_device)); 2026 virtio_devices.push(MetaVirtioDevice { 2027 virtio_device: Arc::clone(&virtio_console_device) 2028 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2029 iommu: console_config.iommu, 2030 id: id.clone(), 2031 pci_segment: 0, 2032 dma_handler: None, 2033 }); 2034 2035 // Fill the device tree with a new node. In case of restore, we 2036 // know there is nothing to do, so we can simply override the 2037 // existing entry. 2038 self.device_tree 2039 .lock() 2040 .unwrap() 2041 .insert(id.clone(), device_node!(id, virtio_console_device)); 2042 2043 // Only provide a resizer (for SIGWINCH handling) if the console is attached to the TTY 2044 Ok(if matches!(console_config.mode, ConsoleOutputMode::Tty) { 2045 Some(console_resizer) 2046 } else { 2047 None 2048 }) 2049 } 2050 2051 fn add_console_device( 2052 &mut self, 2053 interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>, 2054 virtio_devices: &mut Vec<MetaVirtioDevice>, 2055 serial_pty: Option<PtyPair>, 2056 console_pty: Option<PtyPair>, 2057 console_resize_pipe: Option<File>, 2058 ) -> DeviceManagerResult<Arc<Console>> { 2059 let serial_config = self.config.lock().unwrap().serial.clone(); 2060 let serial_writer: Option<Box<dyn io::Write + Send>> = match serial_config.mode { 2061 ConsoleOutputMode::File => Some(Box::new( 2062 File::create(serial_config.file.as_ref().unwrap()) 2063 .map_err(DeviceManagerError::SerialOutputFileOpen)?, 2064 )), 2065 ConsoleOutputMode::Pty => { 2066 if let Some(pty) = serial_pty { 2067 self.config.lock().unwrap().serial.file = Some(pty.path.clone()); 2068 self.serial_pty = Some(Arc::new(Mutex::new(pty))); 2069 } else { 2070 let (main, sub, path) = 2071 create_pty().map_err(DeviceManagerError::SerialPtyOpen)?; 2072 self.set_raw_mode(&sub) 2073 .map_err(DeviceManagerError::SetPtyRaw)?; 2074 self.config.lock().unwrap().serial.file = Some(path.clone()); 2075 self.serial_pty = Some(Arc::new(Mutex::new(PtyPair { main, path }))); 2076 } 2077 None 2078 } 2079 ConsoleOutputMode::Tty => { 2080 let out = stdout(); 2081 let _ = self.set_raw_mode(&out); 2082 Some(Box::new(out)) 2083 } 2084 ConsoleOutputMode::Off | ConsoleOutputMode::Null | ConsoleOutputMode::Socket => None, 2085 }; 2086 if serial_config.mode != ConsoleOutputMode::Off { 2087 let serial = self.add_serial_device(interrupt_manager, serial_writer)?; 2088 self.serial_manager = match serial_config.mode { 2089 ConsoleOutputMode::Pty | ConsoleOutputMode::Tty | ConsoleOutputMode::Socket => { 2090 let serial_manager = SerialManager::new( 2091 serial, 2092 self.serial_pty.clone(), 2093 serial_config.mode, 2094 serial_config.socket, 2095 ) 2096 .map_err(DeviceManagerError::CreateSerialManager)?; 2097 if let Some(mut serial_manager) = serial_manager { 2098 serial_manager 2099 .start_thread( 2100 self.exit_evt 2101 .try_clone() 2102 .map_err(DeviceManagerError::EventFd)?, 2103 ) 2104 .map_err(DeviceManagerError::SpawnSerialManager)?; 2105 Some(Arc::new(serial_manager)) 2106 } else { 2107 None 2108 } 2109 } 2110 _ => None, 2111 }; 2112 } 2113 2114 let console_resizer = 2115 self.add_virtio_console_device(virtio_devices, console_pty, console_resize_pipe)?; 2116 2117 Ok(Arc::new(Console { console_resizer })) 2118 } 2119 2120 fn add_tpm_device( 2121 &mut self, 2122 tpm_path: PathBuf, 2123 ) -> DeviceManagerResult<Arc<Mutex<devices::tpm::Tpm>>> { 2124 // Create TPM Device 2125 let tpm = devices::tpm::Tpm::new(tpm_path.to_str().unwrap().to_string()).map_err(|e| { 2126 DeviceManagerError::CreateTpmDevice(anyhow!("Failed to create TPM Device : {:?}", e)) 2127 })?; 2128 let tpm = Arc::new(Mutex::new(tpm)); 2129 2130 // Add TPM Device to mmio 2131 self.address_manager 2132 .mmio_bus 2133 .insert( 2134 tpm.clone(), 2135 arch::layout::TPM_START.0, 2136 arch::layout::TPM_SIZE, 2137 ) 2138 .map_err(DeviceManagerError::BusError)?; 2139 2140 Ok(tpm) 2141 } 2142 2143 fn make_virtio_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2144 let mut devices: Vec<MetaVirtioDevice> = Vec::new(); 2145 2146 // Create "standard" virtio devices (net/block/rng) 2147 devices.append(&mut self.make_virtio_block_devices()?); 2148 devices.append(&mut self.make_virtio_net_devices()?); 2149 devices.append(&mut self.make_virtio_rng_devices()?); 2150 2151 // Add virtio-fs if required 2152 devices.append(&mut self.make_virtio_fs_devices()?); 2153 2154 // Add virtio-pmem if required 2155 devices.append(&mut self.make_virtio_pmem_devices()?); 2156 2157 // Add virtio-vsock if required 2158 devices.append(&mut self.make_virtio_vsock_devices()?); 2159 2160 devices.append(&mut self.make_virtio_mem_devices()?); 2161 2162 // Add virtio-balloon if required 2163 devices.append(&mut self.make_virtio_balloon_devices()?); 2164 2165 // Add virtio-watchdog device 2166 devices.append(&mut self.make_virtio_watchdog_devices()?); 2167 2168 // Add vDPA devices if required 2169 devices.append(&mut self.make_vdpa_devices()?); 2170 2171 Ok(devices) 2172 } 2173 2174 // Cache whether io_uring is supported to avoid probing for very block device 2175 fn io_uring_is_supported(&mut self) -> bool { 2176 if let Some(supported) = self.io_uring_supported { 2177 return supported; 2178 } 2179 2180 let supported = block_io_uring_is_supported(); 2181 self.io_uring_supported = Some(supported); 2182 supported 2183 } 2184 2185 fn make_virtio_block_device( 2186 &mut self, 2187 disk_cfg: &mut DiskConfig, 2188 ) -> DeviceManagerResult<MetaVirtioDevice> { 2189 let id = if let Some(id) = &disk_cfg.id { 2190 id.clone() 2191 } else { 2192 let id = self.next_device_name(DISK_DEVICE_NAME_PREFIX)?; 2193 disk_cfg.id = Some(id.clone()); 2194 id 2195 }; 2196 2197 info!("Creating virtio-block device: {:?}", disk_cfg); 2198 2199 let snapshot = snapshot_from_id(self.snapshot.as_ref(), id.as_str()); 2200 2201 let (virtio_device, migratable_device) = if disk_cfg.vhost_user { 2202 let socket = disk_cfg.vhost_socket.as_ref().unwrap().clone(); 2203 let vu_cfg = VhostUserConfig { 2204 socket, 2205 num_queues: disk_cfg.num_queues, 2206 queue_size: disk_cfg.queue_size, 2207 }; 2208 let vhost_user_block = Arc::new(Mutex::new( 2209 match virtio_devices::vhost_user::Blk::new( 2210 id.clone(), 2211 vu_cfg, 2212 self.seccomp_action.clone(), 2213 self.exit_evt 2214 .try_clone() 2215 .map_err(DeviceManagerError::EventFd)?, 2216 self.force_iommu, 2217 snapshot 2218 .map(|s| s.to_versioned_state()) 2219 .transpose() 2220 .map_err(DeviceManagerError::RestoreGetState)?, 2221 ) { 2222 Ok(vub_device) => vub_device, 2223 Err(e) => { 2224 return Err(DeviceManagerError::CreateVhostUserBlk(e)); 2225 } 2226 }, 2227 )); 2228 2229 ( 2230 Arc::clone(&vhost_user_block) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2231 vhost_user_block as Arc<Mutex<dyn Migratable>>, 2232 ) 2233 } else { 2234 let mut options = OpenOptions::new(); 2235 options.read(true); 2236 options.write(!disk_cfg.readonly); 2237 if disk_cfg.direct { 2238 options.custom_flags(libc::O_DIRECT); 2239 } 2240 // Open block device path 2241 let mut file: File = options 2242 .open( 2243 disk_cfg 2244 .path 2245 .as_ref() 2246 .ok_or(DeviceManagerError::NoDiskPath)? 2247 .clone(), 2248 ) 2249 .map_err(DeviceManagerError::Disk)?; 2250 let image_type = 2251 detect_image_type(&mut file).map_err(DeviceManagerError::DetectImageType)?; 2252 2253 let image = match image_type { 2254 ImageType::FixedVhd => { 2255 // Use asynchronous backend relying on io_uring if the 2256 // syscalls are supported. 2257 if cfg!(feature = "io_uring") 2258 && !disk_cfg.disable_io_uring 2259 && self.io_uring_is_supported() 2260 { 2261 info!("Using asynchronous fixed VHD disk file (io_uring)"); 2262 2263 #[cfg(not(feature = "io_uring"))] 2264 unreachable!("Checked in if statement above"); 2265 #[cfg(feature = "io_uring")] 2266 { 2267 Box::new( 2268 FixedVhdDiskAsync::new(file) 2269 .map_err(DeviceManagerError::CreateFixedVhdDiskAsync)?, 2270 ) as Box<dyn DiskFile> 2271 } 2272 } else { 2273 info!("Using synchronous fixed VHD disk file"); 2274 Box::new( 2275 FixedVhdDiskSync::new(file) 2276 .map_err(DeviceManagerError::CreateFixedVhdDiskSync)?, 2277 ) as Box<dyn DiskFile> 2278 } 2279 } 2280 ImageType::Raw => { 2281 // Use asynchronous backend relying on io_uring if the 2282 // syscalls are supported. 2283 if cfg!(feature = "io_uring") 2284 && !disk_cfg.disable_io_uring 2285 && self.io_uring_is_supported() 2286 { 2287 info!("Using asynchronous RAW disk file (io_uring)"); 2288 2289 #[cfg(not(feature = "io_uring"))] 2290 unreachable!("Checked in if statement above"); 2291 #[cfg(feature = "io_uring")] 2292 { 2293 Box::new(RawFileDisk::new(file)) as Box<dyn DiskFile> 2294 } 2295 } else { 2296 info!("Using synchronous RAW disk file"); 2297 Box::new(RawFileDiskSync::new(file)) as Box<dyn DiskFile> 2298 } 2299 } 2300 ImageType::Qcow2 => { 2301 info!("Using synchronous QCOW disk file"); 2302 Box::new( 2303 QcowDiskSync::new(file, disk_cfg.direct) 2304 .map_err(DeviceManagerError::CreateQcowDiskSync)?, 2305 ) as Box<dyn DiskFile> 2306 } 2307 ImageType::Vhdx => { 2308 info!("Using synchronous VHDX disk file"); 2309 Box::new( 2310 VhdxDiskSync::new(file) 2311 .map_err(DeviceManagerError::CreateFixedVhdxDiskSync)?, 2312 ) as Box<dyn DiskFile> 2313 } 2314 }; 2315 2316 let virtio_block = Arc::new(Mutex::new( 2317 virtio_devices::Block::new( 2318 id.clone(), 2319 image, 2320 disk_cfg 2321 .path 2322 .as_ref() 2323 .ok_or(DeviceManagerError::NoDiskPath)? 2324 .clone(), 2325 disk_cfg.readonly, 2326 self.force_iommu | disk_cfg.iommu, 2327 disk_cfg.num_queues, 2328 disk_cfg.queue_size, 2329 disk_cfg.serial.clone(), 2330 self.seccomp_action.clone(), 2331 disk_cfg.rate_limiter_config, 2332 self.exit_evt 2333 .try_clone() 2334 .map_err(DeviceManagerError::EventFd)?, 2335 snapshot 2336 .map(|s| s.to_versioned_state()) 2337 .transpose() 2338 .map_err(DeviceManagerError::RestoreGetState)?, 2339 ) 2340 .map_err(DeviceManagerError::CreateVirtioBlock)?, 2341 )); 2342 2343 ( 2344 Arc::clone(&virtio_block) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2345 virtio_block as Arc<Mutex<dyn Migratable>>, 2346 ) 2347 }; 2348 2349 // Fill the device tree with a new node. In case of restore, we 2350 // know there is nothing to do, so we can simply override the 2351 // existing entry. 2352 self.device_tree 2353 .lock() 2354 .unwrap() 2355 .insert(id.clone(), device_node!(id, migratable_device)); 2356 2357 Ok(MetaVirtioDevice { 2358 virtio_device, 2359 iommu: disk_cfg.iommu, 2360 id, 2361 pci_segment: disk_cfg.pci_segment, 2362 dma_handler: None, 2363 }) 2364 } 2365 2366 fn make_virtio_block_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2367 let mut devices = Vec::new(); 2368 2369 let mut block_devices = self.config.lock().unwrap().disks.clone(); 2370 if let Some(disk_list_cfg) = &mut block_devices { 2371 for disk_cfg in disk_list_cfg.iter_mut() { 2372 devices.push(self.make_virtio_block_device(disk_cfg)?); 2373 } 2374 } 2375 self.config.lock().unwrap().disks = block_devices; 2376 2377 Ok(devices) 2378 } 2379 2380 fn make_virtio_net_device( 2381 &mut self, 2382 net_cfg: &mut NetConfig, 2383 ) -> DeviceManagerResult<MetaVirtioDevice> { 2384 let id = if let Some(id) = &net_cfg.id { 2385 id.clone() 2386 } else { 2387 let id = self.next_device_name(NET_DEVICE_NAME_PREFIX)?; 2388 net_cfg.id = Some(id.clone()); 2389 id 2390 }; 2391 info!("Creating virtio-net device: {:?}", net_cfg); 2392 2393 let snapshot = snapshot_from_id(self.snapshot.as_ref(), id.as_str()); 2394 2395 let (virtio_device, migratable_device) = if net_cfg.vhost_user { 2396 let socket = net_cfg.vhost_socket.as_ref().unwrap().clone(); 2397 let vu_cfg = VhostUserConfig { 2398 socket, 2399 num_queues: net_cfg.num_queues, 2400 queue_size: net_cfg.queue_size, 2401 }; 2402 let server = match net_cfg.vhost_mode { 2403 VhostMode::Client => false, 2404 VhostMode::Server => true, 2405 }; 2406 let vhost_user_net = Arc::new(Mutex::new( 2407 match virtio_devices::vhost_user::Net::new( 2408 id.clone(), 2409 net_cfg.mac, 2410 net_cfg.mtu, 2411 vu_cfg, 2412 server, 2413 self.seccomp_action.clone(), 2414 self.exit_evt 2415 .try_clone() 2416 .map_err(DeviceManagerError::EventFd)?, 2417 self.force_iommu, 2418 snapshot 2419 .map(|s| s.to_versioned_state()) 2420 .transpose() 2421 .map_err(DeviceManagerError::RestoreGetState)?, 2422 net_cfg.offload_tso, 2423 net_cfg.offload_ufo, 2424 net_cfg.offload_csum, 2425 ) { 2426 Ok(vun_device) => vun_device, 2427 Err(e) => { 2428 return Err(DeviceManagerError::CreateVhostUserNet(e)); 2429 } 2430 }, 2431 )); 2432 2433 ( 2434 Arc::clone(&vhost_user_net) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2435 vhost_user_net as Arc<Mutex<dyn Migratable>>, 2436 ) 2437 } else { 2438 let state = snapshot 2439 .map(|s| s.to_versioned_state()) 2440 .transpose() 2441 .map_err(DeviceManagerError::RestoreGetState)?; 2442 2443 let virtio_net = if let Some(ref tap_if_name) = net_cfg.tap { 2444 Arc::new(Mutex::new( 2445 virtio_devices::Net::new( 2446 id.clone(), 2447 Some(tap_if_name), 2448 None, 2449 None, 2450 Some(net_cfg.mac), 2451 &mut net_cfg.host_mac, 2452 net_cfg.mtu, 2453 self.force_iommu | net_cfg.iommu, 2454 net_cfg.num_queues, 2455 net_cfg.queue_size, 2456 self.seccomp_action.clone(), 2457 net_cfg.rate_limiter_config, 2458 self.exit_evt 2459 .try_clone() 2460 .map_err(DeviceManagerError::EventFd)?, 2461 state, 2462 net_cfg.offload_tso, 2463 net_cfg.offload_ufo, 2464 net_cfg.offload_csum, 2465 ) 2466 .map_err(DeviceManagerError::CreateVirtioNet)?, 2467 )) 2468 } else if let Some(fds) = &net_cfg.fds { 2469 let net = virtio_devices::Net::from_tap_fds( 2470 id.clone(), 2471 fds, 2472 Some(net_cfg.mac), 2473 net_cfg.mtu, 2474 self.force_iommu | net_cfg.iommu, 2475 net_cfg.queue_size, 2476 self.seccomp_action.clone(), 2477 net_cfg.rate_limiter_config, 2478 self.exit_evt 2479 .try_clone() 2480 .map_err(DeviceManagerError::EventFd)?, 2481 state, 2482 net_cfg.offload_tso, 2483 net_cfg.offload_ufo, 2484 net_cfg.offload_csum, 2485 ) 2486 .map_err(DeviceManagerError::CreateVirtioNet)?; 2487 2488 // SAFETY: 'fds' are valid because TAP devices are created successfully 2489 unsafe { 2490 self.config.lock().unwrap().add_preserved_fds(fds.clone()); 2491 } 2492 2493 Arc::new(Mutex::new(net)) 2494 } else { 2495 Arc::new(Mutex::new( 2496 virtio_devices::Net::new( 2497 id.clone(), 2498 None, 2499 Some(net_cfg.ip), 2500 Some(net_cfg.mask), 2501 Some(net_cfg.mac), 2502 &mut net_cfg.host_mac, 2503 net_cfg.mtu, 2504 self.force_iommu | net_cfg.iommu, 2505 net_cfg.num_queues, 2506 net_cfg.queue_size, 2507 self.seccomp_action.clone(), 2508 net_cfg.rate_limiter_config, 2509 self.exit_evt 2510 .try_clone() 2511 .map_err(DeviceManagerError::EventFd)?, 2512 state, 2513 net_cfg.offload_tso, 2514 net_cfg.offload_ufo, 2515 net_cfg.offload_csum, 2516 ) 2517 .map_err(DeviceManagerError::CreateVirtioNet)?, 2518 )) 2519 }; 2520 2521 ( 2522 Arc::clone(&virtio_net) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2523 virtio_net as Arc<Mutex<dyn Migratable>>, 2524 ) 2525 }; 2526 2527 // Fill the device tree with a new node. In case of restore, we 2528 // know there is nothing to do, so we can simply override the 2529 // existing entry. 2530 self.device_tree 2531 .lock() 2532 .unwrap() 2533 .insert(id.clone(), device_node!(id, migratable_device)); 2534 2535 Ok(MetaVirtioDevice { 2536 virtio_device, 2537 iommu: net_cfg.iommu, 2538 id, 2539 pci_segment: net_cfg.pci_segment, 2540 dma_handler: None, 2541 }) 2542 } 2543 2544 /// Add virto-net and vhost-user-net devices 2545 fn make_virtio_net_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2546 let mut devices = Vec::new(); 2547 let mut net_devices = self.config.lock().unwrap().net.clone(); 2548 if let Some(net_list_cfg) = &mut net_devices { 2549 for net_cfg in net_list_cfg.iter_mut() { 2550 devices.push(self.make_virtio_net_device(net_cfg)?); 2551 } 2552 } 2553 self.config.lock().unwrap().net = net_devices; 2554 2555 Ok(devices) 2556 } 2557 2558 fn make_virtio_rng_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2559 let mut devices = Vec::new(); 2560 2561 // Add virtio-rng if required 2562 let rng_config = self.config.lock().unwrap().rng.clone(); 2563 if let Some(rng_path) = rng_config.src.to_str() { 2564 info!("Creating virtio-rng device: {:?}", rng_config); 2565 let id = String::from(RNG_DEVICE_NAME); 2566 2567 let virtio_rng_device = Arc::new(Mutex::new( 2568 virtio_devices::Rng::new( 2569 id.clone(), 2570 rng_path, 2571 self.force_iommu | rng_config.iommu, 2572 self.seccomp_action.clone(), 2573 self.exit_evt 2574 .try_clone() 2575 .map_err(DeviceManagerError::EventFd)?, 2576 versioned_state_from_id(self.snapshot.as_ref(), id.as_str()) 2577 .map_err(DeviceManagerError::RestoreGetState)?, 2578 ) 2579 .map_err(DeviceManagerError::CreateVirtioRng)?, 2580 )); 2581 devices.push(MetaVirtioDevice { 2582 virtio_device: Arc::clone(&virtio_rng_device) 2583 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2584 iommu: rng_config.iommu, 2585 id: id.clone(), 2586 pci_segment: 0, 2587 dma_handler: None, 2588 }); 2589 2590 // Fill the device tree with a new node. In case of restore, we 2591 // know there is nothing to do, so we can simply override the 2592 // existing entry. 2593 self.device_tree 2594 .lock() 2595 .unwrap() 2596 .insert(id.clone(), device_node!(id, virtio_rng_device)); 2597 } 2598 2599 Ok(devices) 2600 } 2601 2602 fn make_virtio_fs_device( 2603 &mut self, 2604 fs_cfg: &mut FsConfig, 2605 ) -> DeviceManagerResult<MetaVirtioDevice> { 2606 let id = if let Some(id) = &fs_cfg.id { 2607 id.clone() 2608 } else { 2609 let id = self.next_device_name(FS_DEVICE_NAME_PREFIX)?; 2610 fs_cfg.id = Some(id.clone()); 2611 id 2612 }; 2613 2614 info!("Creating virtio-fs device: {:?}", fs_cfg); 2615 2616 let mut node = device_node!(id); 2617 2618 if let Some(fs_socket) = fs_cfg.socket.to_str() { 2619 let virtio_fs_device = Arc::new(Mutex::new( 2620 virtio_devices::vhost_user::Fs::new( 2621 id.clone(), 2622 fs_socket, 2623 &fs_cfg.tag, 2624 fs_cfg.num_queues, 2625 fs_cfg.queue_size, 2626 None, 2627 self.seccomp_action.clone(), 2628 self.exit_evt 2629 .try_clone() 2630 .map_err(DeviceManagerError::EventFd)?, 2631 self.force_iommu, 2632 versioned_state_from_id(self.snapshot.as_ref(), id.as_str()) 2633 .map_err(DeviceManagerError::RestoreGetState)?, 2634 ) 2635 .map_err(DeviceManagerError::CreateVirtioFs)?, 2636 )); 2637 2638 // Update the device tree with the migratable device. 2639 node.migratable = Some(Arc::clone(&virtio_fs_device) as Arc<Mutex<dyn Migratable>>); 2640 self.device_tree.lock().unwrap().insert(id.clone(), node); 2641 2642 Ok(MetaVirtioDevice { 2643 virtio_device: Arc::clone(&virtio_fs_device) 2644 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2645 iommu: false, 2646 id, 2647 pci_segment: fs_cfg.pci_segment, 2648 dma_handler: None, 2649 }) 2650 } else { 2651 Err(DeviceManagerError::NoVirtioFsSock) 2652 } 2653 } 2654 2655 fn make_virtio_fs_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2656 let mut devices = Vec::new(); 2657 2658 let mut fs_devices = self.config.lock().unwrap().fs.clone(); 2659 if let Some(fs_list_cfg) = &mut fs_devices { 2660 for fs_cfg in fs_list_cfg.iter_mut() { 2661 devices.push(self.make_virtio_fs_device(fs_cfg)?); 2662 } 2663 } 2664 self.config.lock().unwrap().fs = fs_devices; 2665 2666 Ok(devices) 2667 } 2668 2669 fn make_virtio_pmem_device( 2670 &mut self, 2671 pmem_cfg: &mut PmemConfig, 2672 ) -> DeviceManagerResult<MetaVirtioDevice> { 2673 let id = if let Some(id) = &pmem_cfg.id { 2674 id.clone() 2675 } else { 2676 let id = self.next_device_name(PMEM_DEVICE_NAME_PREFIX)?; 2677 pmem_cfg.id = Some(id.clone()); 2678 id 2679 }; 2680 2681 info!("Creating virtio-pmem device: {:?}", pmem_cfg); 2682 2683 let mut node = device_node!(id); 2684 2685 // Look for the id in the device tree. If it can be found, that means 2686 // the device is being restored, otherwise it's created from scratch. 2687 let region_range = if let Some(node) = self.device_tree.lock().unwrap().get(&id) { 2688 info!("Restoring virtio-pmem {} resources", id); 2689 2690 let mut region_range: Option<(u64, u64)> = None; 2691 for resource in node.resources.iter() { 2692 match resource { 2693 Resource::MmioAddressRange { base, size } => { 2694 if region_range.is_some() { 2695 return Err(DeviceManagerError::ResourceAlreadyExists); 2696 } 2697 2698 region_range = Some((*base, *size)); 2699 } 2700 _ => { 2701 error!("Unexpected resource {:?} for {}", resource, id); 2702 } 2703 } 2704 } 2705 2706 if region_range.is_none() { 2707 return Err(DeviceManagerError::MissingVirtioPmemResources); 2708 } 2709 2710 region_range 2711 } else { 2712 None 2713 }; 2714 2715 let (custom_flags, set_len) = if pmem_cfg.file.is_dir() { 2716 if pmem_cfg.size.is_none() { 2717 return Err(DeviceManagerError::PmemWithDirectorySizeMissing); 2718 } 2719 (O_TMPFILE, true) 2720 } else { 2721 (0, false) 2722 }; 2723 2724 let mut file = OpenOptions::new() 2725 .read(true) 2726 .write(!pmem_cfg.discard_writes) 2727 .custom_flags(custom_flags) 2728 .open(&pmem_cfg.file) 2729 .map_err(DeviceManagerError::PmemFileOpen)?; 2730 2731 let size = if let Some(size) = pmem_cfg.size { 2732 if set_len { 2733 file.set_len(size) 2734 .map_err(DeviceManagerError::PmemFileSetLen)?; 2735 } 2736 size 2737 } else { 2738 file.seek(SeekFrom::End(0)) 2739 .map_err(DeviceManagerError::PmemFileSetLen)? 2740 }; 2741 2742 if size % 0x20_0000 != 0 { 2743 return Err(DeviceManagerError::PmemSizeNotAligned); 2744 } 2745 2746 let (region_base, region_size) = if let Some((base, size)) = region_range { 2747 // The memory needs to be 2MiB aligned in order to support 2748 // hugepages. 2749 self.pci_segments[pmem_cfg.pci_segment as usize] 2750 .allocator 2751 .lock() 2752 .unwrap() 2753 .allocate( 2754 Some(GuestAddress(base)), 2755 size as GuestUsize, 2756 Some(0x0020_0000), 2757 ) 2758 .ok_or(DeviceManagerError::PmemRangeAllocation)?; 2759 2760 (base, size) 2761 } else { 2762 // The memory needs to be 2MiB aligned in order to support 2763 // hugepages. 2764 let base = self.pci_segments[pmem_cfg.pci_segment as usize] 2765 .allocator 2766 .lock() 2767 .unwrap() 2768 .allocate(None, size as GuestUsize, Some(0x0020_0000)) 2769 .ok_or(DeviceManagerError::PmemRangeAllocation)?; 2770 2771 (base.raw_value(), size) 2772 }; 2773 2774 let cloned_file = file.try_clone().map_err(DeviceManagerError::CloneFile)?; 2775 let mmap_region = MmapRegion::build( 2776 Some(FileOffset::new(cloned_file, 0)), 2777 region_size as usize, 2778 PROT_READ | PROT_WRITE, 2779 MAP_NORESERVE 2780 | if pmem_cfg.discard_writes { 2781 MAP_PRIVATE 2782 } else { 2783 MAP_SHARED 2784 }, 2785 ) 2786 .map_err(DeviceManagerError::NewMmapRegion)?; 2787 let host_addr: u64 = mmap_region.as_ptr() as u64; 2788 2789 let mem_slot = self 2790 .memory_manager 2791 .lock() 2792 .unwrap() 2793 .create_userspace_mapping(region_base, region_size, host_addr, false, false, false) 2794 .map_err(DeviceManagerError::MemoryManager)?; 2795 2796 let mapping = virtio_devices::UserspaceMapping { 2797 host_addr, 2798 mem_slot, 2799 addr: GuestAddress(region_base), 2800 len: region_size, 2801 mergeable: false, 2802 }; 2803 2804 let virtio_pmem_device = Arc::new(Mutex::new( 2805 virtio_devices::Pmem::new( 2806 id.clone(), 2807 file, 2808 GuestAddress(region_base), 2809 mapping, 2810 mmap_region, 2811 self.force_iommu | pmem_cfg.iommu, 2812 self.seccomp_action.clone(), 2813 self.exit_evt 2814 .try_clone() 2815 .map_err(DeviceManagerError::EventFd)?, 2816 versioned_state_from_id(self.snapshot.as_ref(), id.as_str()) 2817 .map_err(DeviceManagerError::RestoreGetState)?, 2818 ) 2819 .map_err(DeviceManagerError::CreateVirtioPmem)?, 2820 )); 2821 2822 // Update the device tree with correct resource information and with 2823 // the migratable device. 2824 node.resources.push(Resource::MmioAddressRange { 2825 base: region_base, 2826 size: region_size, 2827 }); 2828 node.migratable = Some(Arc::clone(&virtio_pmem_device) as Arc<Mutex<dyn Migratable>>); 2829 self.device_tree.lock().unwrap().insert(id.clone(), node); 2830 2831 Ok(MetaVirtioDevice { 2832 virtio_device: Arc::clone(&virtio_pmem_device) 2833 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2834 iommu: pmem_cfg.iommu, 2835 id, 2836 pci_segment: pmem_cfg.pci_segment, 2837 dma_handler: None, 2838 }) 2839 } 2840 2841 fn make_virtio_pmem_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2842 let mut devices = Vec::new(); 2843 // Add virtio-pmem if required 2844 let mut pmem_devices = self.config.lock().unwrap().pmem.clone(); 2845 if let Some(pmem_list_cfg) = &mut pmem_devices { 2846 for pmem_cfg in pmem_list_cfg.iter_mut() { 2847 devices.push(self.make_virtio_pmem_device(pmem_cfg)?); 2848 } 2849 } 2850 self.config.lock().unwrap().pmem = pmem_devices; 2851 2852 Ok(devices) 2853 } 2854 2855 fn make_virtio_vsock_device( 2856 &mut self, 2857 vsock_cfg: &mut VsockConfig, 2858 ) -> DeviceManagerResult<MetaVirtioDevice> { 2859 let id = if let Some(id) = &vsock_cfg.id { 2860 id.clone() 2861 } else { 2862 let id = self.next_device_name(VSOCK_DEVICE_NAME_PREFIX)?; 2863 vsock_cfg.id = Some(id.clone()); 2864 id 2865 }; 2866 2867 info!("Creating virtio-vsock device: {:?}", vsock_cfg); 2868 2869 let socket_path = vsock_cfg 2870 .socket 2871 .to_str() 2872 .ok_or(DeviceManagerError::CreateVsockConvertPath)?; 2873 let backend = 2874 virtio_devices::vsock::VsockUnixBackend::new(vsock_cfg.cid, socket_path.to_string()) 2875 .map_err(DeviceManagerError::CreateVsockBackend)?; 2876 2877 let vsock_device = Arc::new(Mutex::new( 2878 virtio_devices::Vsock::new( 2879 id.clone(), 2880 vsock_cfg.cid, 2881 vsock_cfg.socket.clone(), 2882 backend, 2883 self.force_iommu | vsock_cfg.iommu, 2884 self.seccomp_action.clone(), 2885 self.exit_evt 2886 .try_clone() 2887 .map_err(DeviceManagerError::EventFd)?, 2888 versioned_state_from_id(self.snapshot.as_ref(), id.as_str()) 2889 .map_err(DeviceManagerError::RestoreGetState)?, 2890 ) 2891 .map_err(DeviceManagerError::CreateVirtioVsock)?, 2892 )); 2893 2894 // Fill the device tree with a new node. In case of restore, we 2895 // know there is nothing to do, so we can simply override the 2896 // existing entry. 2897 self.device_tree 2898 .lock() 2899 .unwrap() 2900 .insert(id.clone(), device_node!(id, vsock_device)); 2901 2902 Ok(MetaVirtioDevice { 2903 virtio_device: Arc::clone(&vsock_device) 2904 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2905 iommu: vsock_cfg.iommu, 2906 id, 2907 pci_segment: vsock_cfg.pci_segment, 2908 dma_handler: None, 2909 }) 2910 } 2911 2912 fn make_virtio_vsock_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2913 let mut devices = Vec::new(); 2914 2915 let mut vsock = self.config.lock().unwrap().vsock.clone(); 2916 if let Some(ref mut vsock_cfg) = &mut vsock { 2917 devices.push(self.make_virtio_vsock_device(vsock_cfg)?); 2918 } 2919 self.config.lock().unwrap().vsock = vsock; 2920 2921 Ok(devices) 2922 } 2923 2924 fn make_virtio_mem_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2925 let mut devices = Vec::new(); 2926 2927 let mm = self.memory_manager.clone(); 2928 let mut mm = mm.lock().unwrap(); 2929 for (memory_zone_id, memory_zone) in mm.memory_zones_mut().iter_mut() { 2930 if let Some(virtio_mem_zone) = memory_zone.virtio_mem_zone_mut() { 2931 info!("Creating virtio-mem device: id = {}", memory_zone_id); 2932 2933 let node_id = numa_node_id_from_memory_zone_id(&self.numa_nodes, memory_zone_id) 2934 .map(|i| i as u16); 2935 2936 let virtio_mem_device = Arc::new(Mutex::new( 2937 virtio_devices::Mem::new( 2938 memory_zone_id.clone(), 2939 virtio_mem_zone.region(), 2940 self.seccomp_action.clone(), 2941 node_id, 2942 virtio_mem_zone.hotplugged_size(), 2943 virtio_mem_zone.hugepages(), 2944 self.exit_evt 2945 .try_clone() 2946 .map_err(DeviceManagerError::EventFd)?, 2947 virtio_mem_zone.blocks_state().clone(), 2948 versioned_state_from_id(self.snapshot.as_ref(), memory_zone_id.as_str()) 2949 .map_err(DeviceManagerError::RestoreGetState)?, 2950 ) 2951 .map_err(DeviceManagerError::CreateVirtioMem)?, 2952 )); 2953 2954 // Update the virtio-mem zone so that it has a handle onto the 2955 // virtio-mem device, which will be used for triggering a resize 2956 // if needed. 2957 virtio_mem_zone.set_virtio_device(Arc::clone(&virtio_mem_device)); 2958 2959 self.virtio_mem_devices.push(Arc::clone(&virtio_mem_device)); 2960 2961 devices.push(MetaVirtioDevice { 2962 virtio_device: Arc::clone(&virtio_mem_device) 2963 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2964 iommu: false, 2965 id: memory_zone_id.clone(), 2966 pci_segment: 0, 2967 dma_handler: None, 2968 }); 2969 2970 // Fill the device tree with a new node. In case of restore, we 2971 // know there is nothing to do, so we can simply override the 2972 // existing entry. 2973 self.device_tree.lock().unwrap().insert( 2974 memory_zone_id.clone(), 2975 device_node!(memory_zone_id, virtio_mem_device), 2976 ); 2977 } 2978 } 2979 2980 Ok(devices) 2981 } 2982 2983 fn make_virtio_balloon_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2984 let mut devices = Vec::new(); 2985 2986 if let Some(balloon_config) = &self.config.lock().unwrap().balloon { 2987 let id = String::from(BALLOON_DEVICE_NAME); 2988 info!("Creating virtio-balloon device: id = {}", id); 2989 2990 let virtio_balloon_device = Arc::new(Mutex::new( 2991 virtio_devices::Balloon::new( 2992 id.clone(), 2993 balloon_config.size, 2994 balloon_config.deflate_on_oom, 2995 balloon_config.free_page_reporting, 2996 self.seccomp_action.clone(), 2997 self.exit_evt 2998 .try_clone() 2999 .map_err(DeviceManagerError::EventFd)?, 3000 versioned_state_from_id(self.snapshot.as_ref(), id.as_str()) 3001 .map_err(DeviceManagerError::RestoreGetState)?, 3002 ) 3003 .map_err(DeviceManagerError::CreateVirtioBalloon)?, 3004 )); 3005 3006 self.balloon = Some(virtio_balloon_device.clone()); 3007 3008 devices.push(MetaVirtioDevice { 3009 virtio_device: Arc::clone(&virtio_balloon_device) 3010 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 3011 iommu: false, 3012 id: id.clone(), 3013 pci_segment: 0, 3014 dma_handler: None, 3015 }); 3016 3017 self.device_tree 3018 .lock() 3019 .unwrap() 3020 .insert(id.clone(), device_node!(id, virtio_balloon_device)); 3021 } 3022 3023 Ok(devices) 3024 } 3025 3026 fn make_virtio_watchdog_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 3027 let mut devices = Vec::new(); 3028 3029 if !self.config.lock().unwrap().watchdog { 3030 return Ok(devices); 3031 } 3032 3033 let id = String::from(WATCHDOG_DEVICE_NAME); 3034 info!("Creating virtio-watchdog device: id = {}", id); 3035 3036 let virtio_watchdog_device = Arc::new(Mutex::new( 3037 virtio_devices::Watchdog::new( 3038 id.clone(), 3039 self.reset_evt.try_clone().unwrap(), 3040 self.seccomp_action.clone(), 3041 self.exit_evt 3042 .try_clone() 3043 .map_err(DeviceManagerError::EventFd)?, 3044 versioned_state_from_id(self.snapshot.as_ref(), id.as_str()) 3045 .map_err(DeviceManagerError::RestoreGetState)?, 3046 ) 3047 .map_err(DeviceManagerError::CreateVirtioWatchdog)?, 3048 )); 3049 devices.push(MetaVirtioDevice { 3050 virtio_device: Arc::clone(&virtio_watchdog_device) 3051 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 3052 iommu: false, 3053 id: id.clone(), 3054 pci_segment: 0, 3055 dma_handler: None, 3056 }); 3057 3058 self.device_tree 3059 .lock() 3060 .unwrap() 3061 .insert(id.clone(), device_node!(id, virtio_watchdog_device)); 3062 3063 Ok(devices) 3064 } 3065 3066 fn make_vdpa_device( 3067 &mut self, 3068 vdpa_cfg: &mut VdpaConfig, 3069 ) -> DeviceManagerResult<MetaVirtioDevice> { 3070 let id = if let Some(id) = &vdpa_cfg.id { 3071 id.clone() 3072 } else { 3073 let id = self.next_device_name(VDPA_DEVICE_NAME_PREFIX)?; 3074 vdpa_cfg.id = Some(id.clone()); 3075 id 3076 }; 3077 3078 info!("Creating vDPA device: {:?}", vdpa_cfg); 3079 3080 let device_path = vdpa_cfg 3081 .path 3082 .to_str() 3083 .ok_or(DeviceManagerError::CreateVdpaConvertPath)?; 3084 3085 let vdpa_device = Arc::new(Mutex::new( 3086 virtio_devices::Vdpa::new( 3087 id.clone(), 3088 device_path, 3089 self.memory_manager.lock().unwrap().guest_memory(), 3090 vdpa_cfg.num_queues as u16, 3091 versioned_state_from_id(self.snapshot.as_ref(), id.as_str()) 3092 .map_err(DeviceManagerError::RestoreGetState)?, 3093 ) 3094 .map_err(DeviceManagerError::CreateVdpa)?, 3095 )); 3096 3097 // Create the DMA handler that is required by the vDPA device 3098 let vdpa_mapping = Arc::new(VdpaDmaMapping::new( 3099 Arc::clone(&vdpa_device), 3100 Arc::new(self.memory_manager.lock().unwrap().guest_memory()), 3101 )); 3102 3103 self.device_tree 3104 .lock() 3105 .unwrap() 3106 .insert(id.clone(), device_node!(id, vdpa_device)); 3107 3108 Ok(MetaVirtioDevice { 3109 virtio_device: vdpa_device as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 3110 iommu: vdpa_cfg.iommu, 3111 id, 3112 pci_segment: vdpa_cfg.pci_segment, 3113 dma_handler: Some(vdpa_mapping), 3114 }) 3115 } 3116 3117 fn make_vdpa_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 3118 let mut devices = Vec::new(); 3119 // Add vdpa if required 3120 let mut vdpa_devices = self.config.lock().unwrap().vdpa.clone(); 3121 if let Some(vdpa_list_cfg) = &mut vdpa_devices { 3122 for vdpa_cfg in vdpa_list_cfg.iter_mut() { 3123 devices.push(self.make_vdpa_device(vdpa_cfg)?); 3124 } 3125 } 3126 self.config.lock().unwrap().vdpa = vdpa_devices; 3127 3128 Ok(devices) 3129 } 3130 3131 fn next_device_name(&mut self, prefix: &str) -> DeviceManagerResult<String> { 3132 let start_id = self.device_id_cnt; 3133 loop { 3134 // Generate the temporary name. 3135 let name = format!("{}{}", prefix, self.device_id_cnt); 3136 // Increment the counter. 3137 self.device_id_cnt += Wrapping(1); 3138 // Check if the name is already in use. 3139 if !self.boot_id_list.contains(&name) 3140 && !self.device_tree.lock().unwrap().contains_key(&name) 3141 { 3142 return Ok(name); 3143 } 3144 3145 if self.device_id_cnt == start_id { 3146 // We went through a full loop and there's nothing else we can 3147 // do. 3148 break; 3149 } 3150 } 3151 Err(DeviceManagerError::NoAvailableDeviceName) 3152 } 3153 3154 fn add_passthrough_device( 3155 &mut self, 3156 device_cfg: &mut DeviceConfig, 3157 ) -> DeviceManagerResult<(PciBdf, String)> { 3158 // If the passthrough device has not been created yet, it is created 3159 // here and stored in the DeviceManager structure for future needs. 3160 if self.passthrough_device.is_none() { 3161 self.passthrough_device = Some( 3162 self.address_manager 3163 .vm 3164 .create_passthrough_device() 3165 .map_err(|e| DeviceManagerError::CreatePassthroughDevice(e.into()))?, 3166 ); 3167 } 3168 3169 self.add_vfio_device(device_cfg) 3170 } 3171 3172 fn create_vfio_container(&self) -> DeviceManagerResult<Arc<VfioContainer>> { 3173 let passthrough_device = self 3174 .passthrough_device 3175 .as_ref() 3176 .ok_or(DeviceManagerError::NoDevicePassthroughSupport)?; 3177 3178 let dup = passthrough_device 3179 .try_clone() 3180 .map_err(DeviceManagerError::VfioCreate)?; 3181 3182 Ok(Arc::new( 3183 VfioContainer::new(Some(Arc::new(dup))).map_err(DeviceManagerError::VfioCreate)?, 3184 )) 3185 } 3186 3187 fn add_vfio_device( 3188 &mut self, 3189 device_cfg: &mut DeviceConfig, 3190 ) -> DeviceManagerResult<(PciBdf, String)> { 3191 let vfio_name = if let Some(id) = &device_cfg.id { 3192 id.clone() 3193 } else { 3194 let id = self.next_device_name(VFIO_DEVICE_NAME_PREFIX)?; 3195 device_cfg.id = Some(id.clone()); 3196 id 3197 }; 3198 3199 let (pci_segment_id, pci_device_bdf, resources) = 3200 self.pci_resources(&vfio_name, device_cfg.pci_segment)?; 3201 3202 let mut needs_dma_mapping = false; 3203 3204 // Here we create a new VFIO container for two reasons. Either this is 3205 // the first VFIO device, meaning we need a new VFIO container, which 3206 // will be shared with other VFIO devices. Or the new VFIO device is 3207 // attached to a vIOMMU, meaning we must create a dedicated VFIO 3208 // container. In the vIOMMU use case, we can't let all devices under 3209 // the same VFIO container since we couldn't map/unmap memory for each 3210 // device. That's simply because the map/unmap operations happen at the 3211 // VFIO container level. 3212 let vfio_container = if device_cfg.iommu { 3213 let vfio_container = self.create_vfio_container()?; 3214 3215 let vfio_mapping = Arc::new(VfioDmaMapping::new( 3216 Arc::clone(&vfio_container), 3217 Arc::new(self.memory_manager.lock().unwrap().guest_memory()), 3218 )); 3219 3220 if let Some(iommu) = &self.iommu_device { 3221 iommu 3222 .lock() 3223 .unwrap() 3224 .add_external_mapping(pci_device_bdf.into(), vfio_mapping); 3225 } else { 3226 return Err(DeviceManagerError::MissingVirtualIommu); 3227 } 3228 3229 vfio_container 3230 } else if let Some(vfio_container) = &self.vfio_container { 3231 Arc::clone(vfio_container) 3232 } else { 3233 let vfio_container = self.create_vfio_container()?; 3234 needs_dma_mapping = true; 3235 self.vfio_container = Some(Arc::clone(&vfio_container)); 3236 3237 vfio_container 3238 }; 3239 3240 let vfio_device = VfioDevice::new(&device_cfg.path, Arc::clone(&vfio_container)) 3241 .map_err(DeviceManagerError::VfioCreate)?; 3242 3243 if needs_dma_mapping { 3244 // Register DMA mapping in IOMMU. 3245 // Do not register virtio-mem regions, as they are handled directly by 3246 // virtio-mem device itself. 3247 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() { 3248 for region in zone.regions() { 3249 vfio_container 3250 .vfio_dma_map( 3251 region.start_addr().raw_value(), 3252 region.len(), 3253 region.as_ptr() as u64, 3254 ) 3255 .map_err(DeviceManagerError::VfioDmaMap)?; 3256 } 3257 } 3258 3259 let vfio_mapping = Arc::new(VfioDmaMapping::new( 3260 Arc::clone(&vfio_container), 3261 Arc::new(self.memory_manager.lock().unwrap().guest_memory()), 3262 )); 3263 3264 for virtio_mem_device in self.virtio_mem_devices.iter() { 3265 virtio_mem_device 3266 .lock() 3267 .unwrap() 3268 .add_dma_mapping_handler( 3269 VirtioMemMappingSource::Container, 3270 vfio_mapping.clone(), 3271 ) 3272 .map_err(DeviceManagerError::AddDmaMappingHandlerVirtioMem)?; 3273 } 3274 } 3275 3276 let legacy_interrupt_group = 3277 if let Some(legacy_interrupt_manager) = &self.legacy_interrupt_manager { 3278 Some( 3279 legacy_interrupt_manager 3280 .create_group(LegacyIrqGroupConfig { 3281 irq: self.pci_segments[pci_segment_id as usize].pci_irq_slots 3282 [pci_device_bdf.device() as usize] 3283 as InterruptIndex, 3284 }) 3285 .map_err(DeviceManagerError::CreateInterruptGroup)?, 3286 ) 3287 } else { 3288 None 3289 }; 3290 3291 let memory_manager = self.memory_manager.clone(); 3292 3293 let vfio_pci_device = VfioPciDevice::new( 3294 vfio_name.clone(), 3295 &self.address_manager.vm, 3296 vfio_device, 3297 vfio_container, 3298 self.msi_interrupt_manager.clone(), 3299 legacy_interrupt_group, 3300 device_cfg.iommu, 3301 pci_device_bdf, 3302 Arc::new(move || memory_manager.lock().unwrap().allocate_memory_slot()), 3303 vm_migration::snapshot_from_id(self.snapshot.as_ref(), vfio_name.as_str()), 3304 ) 3305 .map_err(DeviceManagerError::VfioPciCreate)?; 3306 3307 let vfio_pci_device = Arc::new(Mutex::new(vfio_pci_device)); 3308 3309 let new_resources = self.add_pci_device( 3310 vfio_pci_device.clone(), 3311 vfio_pci_device.clone(), 3312 pci_segment_id, 3313 pci_device_bdf, 3314 resources, 3315 )?; 3316 3317 vfio_pci_device 3318 .lock() 3319 .unwrap() 3320 .map_mmio_regions() 3321 .map_err(DeviceManagerError::VfioMapRegion)?; 3322 3323 let mut node = device_node!(vfio_name, vfio_pci_device); 3324 3325 // Update the device tree with correct resource information. 3326 node.resources = new_resources; 3327 node.pci_bdf = Some(pci_device_bdf); 3328 node.pci_device_handle = Some(PciDeviceHandle::Vfio(vfio_pci_device)); 3329 3330 self.device_tree 3331 .lock() 3332 .unwrap() 3333 .insert(vfio_name.clone(), node); 3334 3335 Ok((pci_device_bdf, vfio_name)) 3336 } 3337 3338 fn add_pci_device( 3339 &mut self, 3340 bus_device: Arc<Mutex<dyn BusDevice>>, 3341 pci_device: Arc<Mutex<dyn PciDevice>>, 3342 segment_id: u16, 3343 bdf: PciBdf, 3344 resources: Option<Vec<Resource>>, 3345 ) -> DeviceManagerResult<Vec<Resource>> { 3346 let bars = pci_device 3347 .lock() 3348 .unwrap() 3349 .allocate_bars( 3350 &self.address_manager.allocator, 3351 &mut self.pci_segments[segment_id as usize] 3352 .allocator 3353 .lock() 3354 .unwrap(), 3355 resources, 3356 ) 3357 .map_err(DeviceManagerError::AllocateBars)?; 3358 3359 let mut pci_bus = self.pci_segments[segment_id as usize] 3360 .pci_bus 3361 .lock() 3362 .unwrap(); 3363 3364 pci_bus 3365 .add_device(bdf.device() as u32, pci_device) 3366 .map_err(DeviceManagerError::AddPciDevice)?; 3367 3368 self.bus_devices.push(Arc::clone(&bus_device)); 3369 3370 pci_bus 3371 .register_mapping( 3372 bus_device, 3373 #[cfg(target_arch = "x86_64")] 3374 self.address_manager.io_bus.as_ref(), 3375 self.address_manager.mmio_bus.as_ref(), 3376 bars.clone(), 3377 ) 3378 .map_err(DeviceManagerError::AddPciDevice)?; 3379 3380 let mut new_resources = Vec::new(); 3381 for bar in bars { 3382 new_resources.push(Resource::PciBar { 3383 index: bar.idx(), 3384 base: bar.addr(), 3385 size: bar.size(), 3386 type_: bar.region_type().into(), 3387 prefetchable: bar.prefetchable().into(), 3388 }); 3389 } 3390 3391 Ok(new_resources) 3392 } 3393 3394 fn add_vfio_devices(&mut self) -> DeviceManagerResult<Vec<PciBdf>> { 3395 let mut iommu_attached_device_ids = Vec::new(); 3396 let mut devices = self.config.lock().unwrap().devices.clone(); 3397 3398 if let Some(device_list_cfg) = &mut devices { 3399 for device_cfg in device_list_cfg.iter_mut() { 3400 let (device_id, _) = self.add_passthrough_device(device_cfg)?; 3401 if device_cfg.iommu && self.iommu_device.is_some() { 3402 iommu_attached_device_ids.push(device_id); 3403 } 3404 } 3405 } 3406 3407 // Update the list of devices 3408 self.config.lock().unwrap().devices = devices; 3409 3410 Ok(iommu_attached_device_ids) 3411 } 3412 3413 fn add_vfio_user_device( 3414 &mut self, 3415 device_cfg: &mut UserDeviceConfig, 3416 ) -> DeviceManagerResult<(PciBdf, String)> { 3417 let vfio_user_name = if let Some(id) = &device_cfg.id { 3418 id.clone() 3419 } else { 3420 let id = self.next_device_name(VFIO_USER_DEVICE_NAME_PREFIX)?; 3421 device_cfg.id = Some(id.clone()); 3422 id 3423 }; 3424 3425 let (pci_segment_id, pci_device_bdf, resources) = 3426 self.pci_resources(&vfio_user_name, device_cfg.pci_segment)?; 3427 3428 let legacy_interrupt_group = 3429 if let Some(legacy_interrupt_manager) = &self.legacy_interrupt_manager { 3430 Some( 3431 legacy_interrupt_manager 3432 .create_group(LegacyIrqGroupConfig { 3433 irq: self.pci_segments[pci_segment_id as usize].pci_irq_slots 3434 [pci_device_bdf.device() as usize] 3435 as InterruptIndex, 3436 }) 3437 .map_err(DeviceManagerError::CreateInterruptGroup)?, 3438 ) 3439 } else { 3440 None 3441 }; 3442 3443 let client = Arc::new(Mutex::new( 3444 vfio_user::Client::new(&device_cfg.socket) 3445 .map_err(DeviceManagerError::VfioUserCreateClient)?, 3446 )); 3447 3448 let memory_manager = self.memory_manager.clone(); 3449 3450 let mut vfio_user_pci_device = VfioUserPciDevice::new( 3451 vfio_user_name.clone(), 3452 &self.address_manager.vm, 3453 client.clone(), 3454 self.msi_interrupt_manager.clone(), 3455 legacy_interrupt_group, 3456 pci_device_bdf, 3457 Arc::new(move || memory_manager.lock().unwrap().allocate_memory_slot()), 3458 vm_migration::snapshot_from_id(self.snapshot.as_ref(), vfio_user_name.as_str()), 3459 ) 3460 .map_err(DeviceManagerError::VfioUserCreate)?; 3461 3462 let memory = self.memory_manager.lock().unwrap().guest_memory(); 3463 let vfio_user_mapping = Arc::new(VfioUserDmaMapping::new(client, Arc::new(memory))); 3464 for virtio_mem_device in self.virtio_mem_devices.iter() { 3465 virtio_mem_device 3466 .lock() 3467 .unwrap() 3468 .add_dma_mapping_handler( 3469 VirtioMemMappingSource::Device(pci_device_bdf.into()), 3470 vfio_user_mapping.clone(), 3471 ) 3472 .map_err(DeviceManagerError::AddDmaMappingHandlerVirtioMem)?; 3473 } 3474 3475 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() { 3476 for region in zone.regions() { 3477 vfio_user_pci_device 3478 .dma_map(region) 3479 .map_err(DeviceManagerError::VfioUserDmaMap)?; 3480 } 3481 } 3482 3483 let vfio_user_pci_device = Arc::new(Mutex::new(vfio_user_pci_device)); 3484 3485 let new_resources = self.add_pci_device( 3486 vfio_user_pci_device.clone(), 3487 vfio_user_pci_device.clone(), 3488 pci_segment_id, 3489 pci_device_bdf, 3490 resources, 3491 )?; 3492 3493 // Note it is required to call 'add_pci_device()' in advance to have the list of 3494 // mmio regions provisioned correctly 3495 vfio_user_pci_device 3496 .lock() 3497 .unwrap() 3498 .map_mmio_regions() 3499 .map_err(DeviceManagerError::VfioUserMapRegion)?; 3500 3501 let mut node = device_node!(vfio_user_name, vfio_user_pci_device); 3502 3503 // Update the device tree with correct resource information. 3504 node.resources = new_resources; 3505 node.pci_bdf = Some(pci_device_bdf); 3506 node.pci_device_handle = Some(PciDeviceHandle::VfioUser(vfio_user_pci_device)); 3507 3508 self.device_tree 3509 .lock() 3510 .unwrap() 3511 .insert(vfio_user_name.clone(), node); 3512 3513 Ok((pci_device_bdf, vfio_user_name)) 3514 } 3515 3516 fn add_user_devices(&mut self) -> DeviceManagerResult<Vec<PciBdf>> { 3517 let mut user_devices = self.config.lock().unwrap().user_devices.clone(); 3518 3519 if let Some(device_list_cfg) = &mut user_devices { 3520 for device_cfg in device_list_cfg.iter_mut() { 3521 let (_device_id, _id) = self.add_vfio_user_device(device_cfg)?; 3522 } 3523 } 3524 3525 // Update the list of devices 3526 self.config.lock().unwrap().user_devices = user_devices; 3527 3528 Ok(vec![]) 3529 } 3530 3531 fn add_virtio_pci_device( 3532 &mut self, 3533 virtio_device: Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 3534 iommu_mapping: &Option<Arc<IommuMapping>>, 3535 virtio_device_id: String, 3536 pci_segment_id: u16, 3537 dma_handler: Option<Arc<dyn ExternalDmaMapping>>, 3538 ) -> DeviceManagerResult<PciBdf> { 3539 let id = format!("{VIRTIO_PCI_DEVICE_NAME_PREFIX}-{virtio_device_id}"); 3540 3541 // Add the new virtio-pci node to the device tree. 3542 let mut node = device_node!(id); 3543 node.children = vec![virtio_device_id.clone()]; 3544 3545 let (pci_segment_id, pci_device_bdf, resources) = 3546 self.pci_resources(&id, pci_segment_id)?; 3547 3548 // Update the existing virtio node by setting the parent. 3549 if let Some(node) = self.device_tree.lock().unwrap().get_mut(&virtio_device_id) { 3550 node.parent = Some(id.clone()); 3551 } else { 3552 return Err(DeviceManagerError::MissingNode); 3553 } 3554 3555 // Allows support for one MSI-X vector per queue. It also adds 1 3556 // as we need to take into account the dedicated vector to notify 3557 // about a virtio config change. 3558 let msix_num = (virtio_device.lock().unwrap().queue_max_sizes().len() + 1) as u16; 3559 3560 // Create the AccessPlatform trait from the implementation IommuMapping. 3561 // This will provide address translation for any virtio device sitting 3562 // behind a vIOMMU. 3563 let access_platform: Option<Arc<dyn AccessPlatform>> = if let Some(mapping) = iommu_mapping 3564 { 3565 Some(Arc::new(AccessPlatformMapping::new( 3566 pci_device_bdf.into(), 3567 mapping.clone(), 3568 ))) 3569 } else { 3570 None 3571 }; 3572 3573 let memory = self.memory_manager.lock().unwrap().guest_memory(); 3574 3575 // Map DMA ranges if a DMA handler is available and if the device is 3576 // not attached to a virtual IOMMU. 3577 if let Some(dma_handler) = &dma_handler { 3578 if iommu_mapping.is_some() { 3579 if let Some(iommu) = &self.iommu_device { 3580 iommu 3581 .lock() 3582 .unwrap() 3583 .add_external_mapping(pci_device_bdf.into(), dma_handler.clone()); 3584 } else { 3585 return Err(DeviceManagerError::MissingVirtualIommu); 3586 } 3587 } else { 3588 // Let every virtio-mem device handle the DMA map/unmap through the 3589 // DMA handler provided. 3590 for virtio_mem_device in self.virtio_mem_devices.iter() { 3591 virtio_mem_device 3592 .lock() 3593 .unwrap() 3594 .add_dma_mapping_handler( 3595 VirtioMemMappingSource::Device(pci_device_bdf.into()), 3596 dma_handler.clone(), 3597 ) 3598 .map_err(DeviceManagerError::AddDmaMappingHandlerVirtioMem)?; 3599 } 3600 3601 // Do not register virtio-mem regions, as they are handled directly by 3602 // virtio-mem devices. 3603 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() { 3604 for region in zone.regions() { 3605 let gpa = region.start_addr().0; 3606 let size = region.len(); 3607 dma_handler 3608 .map(gpa, gpa, size) 3609 .map_err(DeviceManagerError::VirtioDmaMap)?; 3610 } 3611 } 3612 } 3613 } 3614 3615 let device_type = virtio_device.lock().unwrap().device_type(); 3616 let virtio_pci_device = Arc::new(Mutex::new( 3617 VirtioPciDevice::new( 3618 id.clone(), 3619 memory, 3620 virtio_device, 3621 msix_num, 3622 access_platform, 3623 &self.msi_interrupt_manager, 3624 pci_device_bdf.into(), 3625 self.activate_evt 3626 .try_clone() 3627 .map_err(DeviceManagerError::EventFd)?, 3628 // All device types *except* virtio block devices should be allocated a 64-bit bar 3629 // The block devices should be given a 32-bit BAR so that they are easily accessible 3630 // to firmware without requiring excessive identity mapping. 3631 // The exception being if not on the default PCI segment. 3632 pci_segment_id > 0 || device_type != VirtioDeviceType::Block as u32, 3633 dma_handler, 3634 self.pending_activations.clone(), 3635 vm_migration::snapshot_from_id(self.snapshot.as_ref(), id.as_str()), 3636 ) 3637 .map_err(DeviceManagerError::VirtioDevice)?, 3638 )); 3639 3640 let new_resources = self.add_pci_device( 3641 virtio_pci_device.clone(), 3642 virtio_pci_device.clone(), 3643 pci_segment_id, 3644 pci_device_bdf, 3645 resources, 3646 )?; 3647 3648 let bar_addr = virtio_pci_device.lock().unwrap().config_bar_addr(); 3649 for (event, addr) in virtio_pci_device.lock().unwrap().ioeventfds(bar_addr) { 3650 let io_addr = IoEventAddress::Mmio(addr); 3651 self.address_manager 3652 .vm 3653 .register_ioevent(event, &io_addr, None) 3654 .map_err(|e| DeviceManagerError::RegisterIoevent(e.into()))?; 3655 } 3656 3657 // Update the device tree with correct resource information. 3658 node.resources = new_resources; 3659 node.migratable = Some(Arc::clone(&virtio_pci_device) as Arc<Mutex<dyn Migratable>>); 3660 node.pci_bdf = Some(pci_device_bdf); 3661 node.pci_device_handle = Some(PciDeviceHandle::Virtio(virtio_pci_device)); 3662 self.device_tree.lock().unwrap().insert(id, node); 3663 3664 Ok(pci_device_bdf) 3665 } 3666 3667 fn add_pvpanic_device( 3668 &mut self, 3669 ) -> DeviceManagerResult<Option<Arc<Mutex<devices::PvPanicDevice>>>> { 3670 let id = String::from(PVPANIC_DEVICE_NAME); 3671 let pci_segment_id = 0x0_u16; 3672 3673 info!("Creating pvpanic device {}", id); 3674 3675 let (pci_segment_id, pci_device_bdf, resources) = 3676 self.pci_resources(&id, pci_segment_id)?; 3677 3678 let snapshot = snapshot_from_id(self.snapshot.as_ref(), id.as_str()); 3679 3680 let pvpanic_device = devices::PvPanicDevice::new(id.clone(), snapshot) 3681 .map_err(DeviceManagerError::PvPanicCreate)?; 3682 3683 let pvpanic_device = Arc::new(Mutex::new(pvpanic_device)); 3684 3685 let new_resources = self.add_pci_device( 3686 pvpanic_device.clone(), 3687 pvpanic_device.clone(), 3688 pci_segment_id, 3689 pci_device_bdf, 3690 resources, 3691 )?; 3692 3693 let mut node = device_node!(id, pvpanic_device); 3694 3695 node.resources = new_resources; 3696 node.pci_bdf = Some(pci_device_bdf); 3697 node.pci_device_handle = None; 3698 3699 self.device_tree.lock().unwrap().insert(id, node); 3700 3701 Ok(Some(pvpanic_device)) 3702 } 3703 3704 fn pci_resources( 3705 &self, 3706 id: &str, 3707 pci_segment_id: u16, 3708 ) -> DeviceManagerResult<(u16, PciBdf, Option<Vec<Resource>>)> { 3709 // Look for the id in the device tree. If it can be found, that means 3710 // the device is being restored, otherwise it's created from scratch. 3711 Ok( 3712 if let Some(node) = self.device_tree.lock().unwrap().get(id) { 3713 info!("Restoring virtio-pci {} resources", id); 3714 let pci_device_bdf: PciBdf = node 3715 .pci_bdf 3716 .ok_or(DeviceManagerError::MissingDeviceNodePciBdf)?; 3717 let pci_segment_id = pci_device_bdf.segment(); 3718 3719 self.pci_segments[pci_segment_id as usize] 3720 .pci_bus 3721 .lock() 3722 .unwrap() 3723 .get_device_id(pci_device_bdf.device() as usize) 3724 .map_err(DeviceManagerError::GetPciDeviceId)?; 3725 3726 (pci_segment_id, pci_device_bdf, Some(node.resources.clone())) 3727 } else { 3728 let pci_device_bdf = 3729 self.pci_segments[pci_segment_id as usize].next_device_bdf()?; 3730 3731 (pci_segment_id, pci_device_bdf, None) 3732 }, 3733 ) 3734 } 3735 3736 #[cfg(target_arch = "x86_64")] 3737 pub fn io_bus(&self) -> &Arc<Bus> { 3738 &self.address_manager.io_bus 3739 } 3740 3741 pub fn mmio_bus(&self) -> &Arc<Bus> { 3742 &self.address_manager.mmio_bus 3743 } 3744 3745 pub fn allocator(&self) -> &Arc<Mutex<SystemAllocator>> { 3746 &self.address_manager.allocator 3747 } 3748 3749 pub fn interrupt_controller(&self) -> Option<Arc<Mutex<dyn InterruptController>>> { 3750 self.interrupt_controller 3751 .as_ref() 3752 .map(|ic| ic.clone() as Arc<Mutex<dyn InterruptController>>) 3753 } 3754 3755 pub(crate) fn pci_segments(&self) -> &Vec<PciSegment> { 3756 &self.pci_segments 3757 } 3758 3759 pub fn console(&self) -> &Arc<Console> { 3760 &self.console 3761 } 3762 3763 #[cfg(target_arch = "aarch64")] 3764 pub fn cmdline_additions(&self) -> &[String] { 3765 self.cmdline_additions.as_slice() 3766 } 3767 3768 pub fn update_memory(&self, new_region: &Arc<GuestRegionMmap>) -> DeviceManagerResult<()> { 3769 for handle in self.virtio_devices.iter() { 3770 handle 3771 .virtio_device 3772 .lock() 3773 .unwrap() 3774 .add_memory_region(new_region) 3775 .map_err(DeviceManagerError::UpdateMemoryForVirtioDevice)?; 3776 3777 if let Some(dma_handler) = &handle.dma_handler { 3778 if !handle.iommu { 3779 let gpa = new_region.start_addr().0; 3780 let size = new_region.len(); 3781 dma_handler 3782 .map(gpa, gpa, size) 3783 .map_err(DeviceManagerError::VirtioDmaMap)?; 3784 } 3785 } 3786 } 3787 3788 // Take care of updating the memory for VFIO PCI devices. 3789 if let Some(vfio_container) = &self.vfio_container { 3790 vfio_container 3791 .vfio_dma_map( 3792 new_region.start_addr().raw_value(), 3793 new_region.len(), 3794 new_region.as_ptr() as u64, 3795 ) 3796 .map_err(DeviceManagerError::UpdateMemoryForVfioPciDevice)?; 3797 } 3798 3799 // Take care of updating the memory for vfio-user devices. 3800 { 3801 let device_tree = self.device_tree.lock().unwrap(); 3802 for pci_device_node in device_tree.pci_devices() { 3803 if let PciDeviceHandle::VfioUser(vfio_user_pci_device) = pci_device_node 3804 .pci_device_handle 3805 .as_ref() 3806 .ok_or(DeviceManagerError::MissingPciDevice)? 3807 { 3808 vfio_user_pci_device 3809 .lock() 3810 .unwrap() 3811 .dma_map(new_region) 3812 .map_err(DeviceManagerError::UpdateMemoryForVfioUserPciDevice)?; 3813 } 3814 } 3815 } 3816 3817 Ok(()) 3818 } 3819 3820 pub fn activate_virtio_devices(&self) -> DeviceManagerResult<()> { 3821 for mut activator in self.pending_activations.lock().unwrap().drain(..) { 3822 activator 3823 .activate() 3824 .map_err(DeviceManagerError::VirtioActivate)?; 3825 } 3826 Ok(()) 3827 } 3828 3829 pub fn notify_hotplug( 3830 &self, 3831 _notification_type: AcpiNotificationFlags, 3832 ) -> DeviceManagerResult<()> { 3833 return self 3834 .ged_notification_device 3835 .as_ref() 3836 .unwrap() 3837 .lock() 3838 .unwrap() 3839 .notify(_notification_type) 3840 .map_err(DeviceManagerError::HotPlugNotification); 3841 } 3842 3843 pub fn add_device( 3844 &mut self, 3845 device_cfg: &mut DeviceConfig, 3846 ) -> DeviceManagerResult<PciDeviceInfo> { 3847 self.validate_identifier(&device_cfg.id)?; 3848 3849 if device_cfg.iommu && !self.is_iommu_segment(device_cfg.pci_segment) { 3850 return Err(DeviceManagerError::InvalidIommuHotplug); 3851 } 3852 3853 let (bdf, device_name) = self.add_passthrough_device(device_cfg)?; 3854 3855 // Update the PCIU bitmap 3856 self.pci_segments[device_cfg.pci_segment as usize].pci_devices_up |= 1 << bdf.device(); 3857 3858 Ok(PciDeviceInfo { 3859 id: device_name, 3860 bdf, 3861 }) 3862 } 3863 3864 pub fn add_user_device( 3865 &mut self, 3866 device_cfg: &mut UserDeviceConfig, 3867 ) -> DeviceManagerResult<PciDeviceInfo> { 3868 self.validate_identifier(&device_cfg.id)?; 3869 3870 let (bdf, device_name) = self.add_vfio_user_device(device_cfg)?; 3871 3872 // Update the PCIU bitmap 3873 self.pci_segments[device_cfg.pci_segment as usize].pci_devices_up |= 1 << bdf.device(); 3874 3875 Ok(PciDeviceInfo { 3876 id: device_name, 3877 bdf, 3878 }) 3879 } 3880 3881 pub fn remove_device(&mut self, id: String) -> DeviceManagerResult<()> { 3882 // The node can be directly a PCI node in case the 'id' refers to a 3883 // VFIO device or a virtio-pci one. 3884 // In case the 'id' refers to a virtio device, we must find the PCI 3885 // node by looking at the parent. 3886 let device_tree = self.device_tree.lock().unwrap(); 3887 let node = device_tree 3888 .get(&id) 3889 .ok_or(DeviceManagerError::UnknownDeviceId(id))?; 3890 3891 let pci_device_node = if node.pci_bdf.is_some() && node.pci_device_handle.is_some() { 3892 node 3893 } else { 3894 let parent = node 3895 .parent 3896 .as_ref() 3897 .ok_or(DeviceManagerError::MissingNode)?; 3898 device_tree 3899 .get(parent) 3900 .ok_or(DeviceManagerError::MissingNode)? 3901 }; 3902 3903 let pci_device_bdf: PciBdf = pci_device_node 3904 .pci_bdf 3905 .ok_or(DeviceManagerError::MissingDeviceNodePciBdf)?; 3906 let pci_segment_id = pci_device_bdf.segment(); 3907 3908 let pci_device_handle = pci_device_node 3909 .pci_device_handle 3910 .as_ref() 3911 .ok_or(DeviceManagerError::MissingPciDevice)?; 3912 #[allow(irrefutable_let_patterns)] 3913 if let PciDeviceHandle::Virtio(virtio_pci_device) = pci_device_handle { 3914 let device_type = VirtioDeviceType::from( 3915 virtio_pci_device 3916 .lock() 3917 .unwrap() 3918 .virtio_device() 3919 .lock() 3920 .unwrap() 3921 .device_type(), 3922 ); 3923 match device_type { 3924 VirtioDeviceType::Net 3925 | VirtioDeviceType::Block 3926 | VirtioDeviceType::Pmem 3927 | VirtioDeviceType::Fs 3928 | VirtioDeviceType::Vsock => {} 3929 _ => return Err(DeviceManagerError::RemovalNotAllowed(device_type)), 3930 } 3931 } 3932 3933 // Update the PCID bitmap 3934 self.pci_segments[pci_segment_id as usize].pci_devices_down |= 1 << pci_device_bdf.device(); 3935 3936 Ok(()) 3937 } 3938 3939 pub fn eject_device(&mut self, pci_segment_id: u16, device_id: u8) -> DeviceManagerResult<()> { 3940 info!( 3941 "Ejecting device_id = {} on segment_id={}", 3942 device_id, pci_segment_id 3943 ); 3944 3945 // Convert the device ID into the corresponding b/d/f. 3946 let pci_device_bdf = PciBdf::new(pci_segment_id, 0, device_id, 0); 3947 3948 // Give the PCI device ID back to the PCI bus. 3949 self.pci_segments[pci_segment_id as usize] 3950 .pci_bus 3951 .lock() 3952 .unwrap() 3953 .put_device_id(device_id as usize) 3954 .map_err(DeviceManagerError::PutPciDeviceId)?; 3955 3956 // Remove the device from the device tree along with its children. 3957 let mut device_tree = self.device_tree.lock().unwrap(); 3958 let pci_device_node = device_tree 3959 .remove_node_by_pci_bdf(pci_device_bdf) 3960 .ok_or(DeviceManagerError::MissingPciDevice)?; 3961 3962 // For VFIO and vfio-user the PCI device id is the id. 3963 // For virtio we overwrite it later as we want the id of the 3964 // underlying device. 3965 let mut id = pci_device_node.id; 3966 let pci_device_handle = pci_device_node 3967 .pci_device_handle 3968 .ok_or(DeviceManagerError::MissingPciDevice)?; 3969 if matches!(pci_device_handle, PciDeviceHandle::Virtio(_)) { 3970 // The virtio-pci device has a single child 3971 if !pci_device_node.children.is_empty() { 3972 assert_eq!(pci_device_node.children.len(), 1); 3973 let child_id = &pci_device_node.children[0]; 3974 id = child_id.clone(); 3975 } 3976 } 3977 for child in pci_device_node.children.iter() { 3978 device_tree.remove(child); 3979 } 3980 3981 let mut iommu_attached = false; 3982 if let Some((_, iommu_attached_devices)) = &self.iommu_attached_devices { 3983 if iommu_attached_devices.contains(&pci_device_bdf) { 3984 iommu_attached = true; 3985 } 3986 } 3987 3988 let (pci_device, bus_device, virtio_device, remove_dma_handler) = match pci_device_handle { 3989 // No need to remove any virtio-mem mapping here as the container outlives all devices 3990 PciDeviceHandle::Vfio(vfio_pci_device) => ( 3991 Arc::clone(&vfio_pci_device) as Arc<Mutex<dyn PciDevice>>, 3992 Arc::clone(&vfio_pci_device) as Arc<Mutex<dyn BusDevice>>, 3993 None as Option<Arc<Mutex<dyn virtio_devices::VirtioDevice>>>, 3994 false, 3995 ), 3996 PciDeviceHandle::Virtio(virtio_pci_device) => { 3997 let dev = virtio_pci_device.lock().unwrap(); 3998 let bar_addr = dev.config_bar_addr(); 3999 for (event, addr) in dev.ioeventfds(bar_addr) { 4000 let io_addr = IoEventAddress::Mmio(addr); 4001 self.address_manager 4002 .vm 4003 .unregister_ioevent(event, &io_addr) 4004 .map_err(|e| DeviceManagerError::UnRegisterIoevent(e.into()))?; 4005 } 4006 4007 if let Some(dma_handler) = dev.dma_handler() { 4008 if !iommu_attached { 4009 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() { 4010 for region in zone.regions() { 4011 let iova = region.start_addr().0; 4012 let size = region.len(); 4013 dma_handler 4014 .unmap(iova, size) 4015 .map_err(DeviceManagerError::VirtioDmaUnmap)?; 4016 } 4017 } 4018 } 4019 } 4020 4021 ( 4022 Arc::clone(&virtio_pci_device) as Arc<Mutex<dyn PciDevice>>, 4023 Arc::clone(&virtio_pci_device) as Arc<Mutex<dyn BusDevice>>, 4024 Some(dev.virtio_device()), 4025 dev.dma_handler().is_some() && !iommu_attached, 4026 ) 4027 } 4028 PciDeviceHandle::VfioUser(vfio_user_pci_device) => { 4029 let mut dev = vfio_user_pci_device.lock().unwrap(); 4030 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() { 4031 for region in zone.regions() { 4032 dev.dma_unmap(region) 4033 .map_err(DeviceManagerError::VfioUserDmaUnmap)?; 4034 } 4035 } 4036 4037 ( 4038 Arc::clone(&vfio_user_pci_device) as Arc<Mutex<dyn PciDevice>>, 4039 Arc::clone(&vfio_user_pci_device) as Arc<Mutex<dyn BusDevice>>, 4040 None as Option<Arc<Mutex<dyn virtio_devices::VirtioDevice>>>, 4041 true, 4042 ) 4043 } 4044 }; 4045 4046 if remove_dma_handler { 4047 for virtio_mem_device in self.virtio_mem_devices.iter() { 4048 virtio_mem_device 4049 .lock() 4050 .unwrap() 4051 .remove_dma_mapping_handler(VirtioMemMappingSource::Device( 4052 pci_device_bdf.into(), 4053 )) 4054 .map_err(DeviceManagerError::RemoveDmaMappingHandlerVirtioMem)?; 4055 } 4056 } 4057 4058 // Free the allocated BARs 4059 pci_device 4060 .lock() 4061 .unwrap() 4062 .free_bars( 4063 &mut self.address_manager.allocator.lock().unwrap(), 4064 &mut self.pci_segments[pci_segment_id as usize] 4065 .allocator 4066 .lock() 4067 .unwrap(), 4068 ) 4069 .map_err(DeviceManagerError::FreePciBars)?; 4070 4071 // Remove the device from the PCI bus 4072 self.pci_segments[pci_segment_id as usize] 4073 .pci_bus 4074 .lock() 4075 .unwrap() 4076 .remove_by_device(&pci_device) 4077 .map_err(DeviceManagerError::RemoveDeviceFromPciBus)?; 4078 4079 #[cfg(target_arch = "x86_64")] 4080 // Remove the device from the IO bus 4081 self.io_bus() 4082 .remove_by_device(&bus_device) 4083 .map_err(DeviceManagerError::RemoveDeviceFromIoBus)?; 4084 4085 // Remove the device from the MMIO bus 4086 self.mmio_bus() 4087 .remove_by_device(&bus_device) 4088 .map_err(DeviceManagerError::RemoveDeviceFromMmioBus)?; 4089 4090 // Remove the device from the list of BusDevice held by the 4091 // DeviceManager. 4092 self.bus_devices 4093 .retain(|dev| !Arc::ptr_eq(dev, &bus_device)); 4094 4095 // Shutdown and remove the underlying virtio-device if present 4096 if let Some(virtio_device) = virtio_device { 4097 for mapping in virtio_device.lock().unwrap().userspace_mappings() { 4098 self.memory_manager 4099 .lock() 4100 .unwrap() 4101 .remove_userspace_mapping( 4102 mapping.addr.raw_value(), 4103 mapping.len, 4104 mapping.host_addr, 4105 mapping.mergeable, 4106 mapping.mem_slot, 4107 ) 4108 .map_err(DeviceManagerError::MemoryManager)?; 4109 } 4110 4111 virtio_device.lock().unwrap().shutdown(); 4112 4113 self.virtio_devices 4114 .retain(|handler| !Arc::ptr_eq(&handler.virtio_device, &virtio_device)); 4115 } 4116 4117 event!( 4118 "vm", 4119 "device-removed", 4120 "id", 4121 &id, 4122 "bdf", 4123 pci_device_bdf.to_string() 4124 ); 4125 4126 // At this point, the device has been removed from all the list and 4127 // buses where it was stored. At the end of this function, after 4128 // any_device, bus_device and pci_device are released, the actual 4129 // device will be dropped. 4130 Ok(()) 4131 } 4132 4133 fn hotplug_virtio_pci_device( 4134 &mut self, 4135 handle: MetaVirtioDevice, 4136 ) -> DeviceManagerResult<PciDeviceInfo> { 4137 // Add the virtio device to the device manager list. This is important 4138 // as the list is used to notify virtio devices about memory updates 4139 // for instance. 4140 self.virtio_devices.push(handle.clone()); 4141 4142 let mapping: Option<Arc<IommuMapping>> = if handle.iommu { 4143 self.iommu_mapping.clone() 4144 } else { 4145 None 4146 }; 4147 4148 let bdf = self.add_virtio_pci_device( 4149 handle.virtio_device, 4150 &mapping, 4151 handle.id.clone(), 4152 handle.pci_segment, 4153 handle.dma_handler, 4154 )?; 4155 4156 // Update the PCIU bitmap 4157 self.pci_segments[handle.pci_segment as usize].pci_devices_up |= 1 << bdf.device(); 4158 4159 Ok(PciDeviceInfo { id: handle.id, bdf }) 4160 } 4161 4162 fn is_iommu_segment(&self, pci_segment_id: u16) -> bool { 4163 self.config 4164 .lock() 4165 .as_ref() 4166 .unwrap() 4167 .platform 4168 .as_ref() 4169 .map(|pc| { 4170 pc.iommu_segments 4171 .as_ref() 4172 .map(|v| v.contains(&pci_segment_id)) 4173 .unwrap_or_default() 4174 }) 4175 .unwrap_or_default() 4176 } 4177 4178 pub fn add_disk(&mut self, disk_cfg: &mut DiskConfig) -> DeviceManagerResult<PciDeviceInfo> { 4179 self.validate_identifier(&disk_cfg.id)?; 4180 4181 if disk_cfg.iommu && !self.is_iommu_segment(disk_cfg.pci_segment) { 4182 return Err(DeviceManagerError::InvalidIommuHotplug); 4183 } 4184 4185 let device = self.make_virtio_block_device(disk_cfg)?; 4186 self.hotplug_virtio_pci_device(device) 4187 } 4188 4189 pub fn add_fs(&mut self, fs_cfg: &mut FsConfig) -> DeviceManagerResult<PciDeviceInfo> { 4190 self.validate_identifier(&fs_cfg.id)?; 4191 4192 let device = self.make_virtio_fs_device(fs_cfg)?; 4193 self.hotplug_virtio_pci_device(device) 4194 } 4195 4196 pub fn add_pmem(&mut self, pmem_cfg: &mut PmemConfig) -> DeviceManagerResult<PciDeviceInfo> { 4197 self.validate_identifier(&pmem_cfg.id)?; 4198 4199 if pmem_cfg.iommu && !self.is_iommu_segment(pmem_cfg.pci_segment) { 4200 return Err(DeviceManagerError::InvalidIommuHotplug); 4201 } 4202 4203 let device = self.make_virtio_pmem_device(pmem_cfg)?; 4204 self.hotplug_virtio_pci_device(device) 4205 } 4206 4207 pub fn add_net(&mut self, net_cfg: &mut NetConfig) -> DeviceManagerResult<PciDeviceInfo> { 4208 self.validate_identifier(&net_cfg.id)?; 4209 4210 if net_cfg.iommu && !self.is_iommu_segment(net_cfg.pci_segment) { 4211 return Err(DeviceManagerError::InvalidIommuHotplug); 4212 } 4213 4214 let device = self.make_virtio_net_device(net_cfg)?; 4215 self.hotplug_virtio_pci_device(device) 4216 } 4217 4218 pub fn add_vdpa(&mut self, vdpa_cfg: &mut VdpaConfig) -> DeviceManagerResult<PciDeviceInfo> { 4219 self.validate_identifier(&vdpa_cfg.id)?; 4220 4221 if vdpa_cfg.iommu && !self.is_iommu_segment(vdpa_cfg.pci_segment) { 4222 return Err(DeviceManagerError::InvalidIommuHotplug); 4223 } 4224 4225 let device = self.make_vdpa_device(vdpa_cfg)?; 4226 self.hotplug_virtio_pci_device(device) 4227 } 4228 4229 pub fn add_vsock(&mut self, vsock_cfg: &mut VsockConfig) -> DeviceManagerResult<PciDeviceInfo> { 4230 self.validate_identifier(&vsock_cfg.id)?; 4231 4232 if vsock_cfg.iommu && !self.is_iommu_segment(vsock_cfg.pci_segment) { 4233 return Err(DeviceManagerError::InvalidIommuHotplug); 4234 } 4235 4236 let device = self.make_virtio_vsock_device(vsock_cfg)?; 4237 self.hotplug_virtio_pci_device(device) 4238 } 4239 4240 pub fn counters(&self) -> HashMap<String, HashMap<&'static str, Wrapping<u64>>> { 4241 let mut counters = HashMap::new(); 4242 4243 for handle in &self.virtio_devices { 4244 let virtio_device = handle.virtio_device.lock().unwrap(); 4245 if let Some(device_counters) = virtio_device.counters() { 4246 counters.insert(handle.id.clone(), device_counters.clone()); 4247 } 4248 } 4249 4250 counters 4251 } 4252 4253 pub fn resize_balloon(&mut self, size: u64) -> DeviceManagerResult<()> { 4254 if let Some(balloon) = &self.balloon { 4255 return balloon 4256 .lock() 4257 .unwrap() 4258 .resize(size) 4259 .map_err(DeviceManagerError::VirtioBalloonResize); 4260 } 4261 4262 warn!("No balloon setup: Can't resize the balloon"); 4263 Err(DeviceManagerError::MissingVirtioBalloon) 4264 } 4265 4266 pub fn balloon_size(&self) -> u64 { 4267 if let Some(balloon) = &self.balloon { 4268 return balloon.lock().unwrap().get_actual(); 4269 } 4270 4271 0 4272 } 4273 4274 pub fn device_tree(&self) -> Arc<Mutex<DeviceTree>> { 4275 self.device_tree.clone() 4276 } 4277 4278 #[cfg(target_arch = "x86_64")] 4279 pub fn notify_power_button(&self) -> DeviceManagerResult<()> { 4280 self.ged_notification_device 4281 .as_ref() 4282 .unwrap() 4283 .lock() 4284 .unwrap() 4285 .notify(AcpiNotificationFlags::POWER_BUTTON_CHANGED) 4286 .map_err(DeviceManagerError::PowerButtonNotification) 4287 } 4288 4289 #[cfg(target_arch = "aarch64")] 4290 pub fn notify_power_button(&self) -> DeviceManagerResult<()> { 4291 // There are two use cases: 4292 // 1. Users will use direct kernel boot with device tree. 4293 // 2. Users will use ACPI+UEFI boot. 4294 4295 // Trigger a GPIO pin 3 event to satisfy use case 1. 4296 self.gpio_device 4297 .as_ref() 4298 .unwrap() 4299 .lock() 4300 .unwrap() 4301 .trigger_key(3) 4302 .map_err(DeviceManagerError::AArch64PowerButtonNotification)?; 4303 // Trigger a GED power button event to satisfy use case 2. 4304 return self 4305 .ged_notification_device 4306 .as_ref() 4307 .unwrap() 4308 .lock() 4309 .unwrap() 4310 .notify(AcpiNotificationFlags::POWER_BUTTON_CHANGED) 4311 .map_err(DeviceManagerError::PowerButtonNotification); 4312 } 4313 4314 pub fn iommu_attached_devices(&self) -> &Option<(PciBdf, Vec<PciBdf>)> { 4315 &self.iommu_attached_devices 4316 } 4317 4318 fn validate_identifier(&self, id: &Option<String>) -> DeviceManagerResult<()> { 4319 if let Some(id) = id { 4320 if id.starts_with("__") { 4321 return Err(DeviceManagerError::InvalidIdentifier(id.clone())); 4322 } 4323 4324 if self.device_tree.lock().unwrap().contains_key(id) { 4325 return Err(DeviceManagerError::IdentifierNotUnique(id.clone())); 4326 } 4327 } 4328 4329 Ok(()) 4330 } 4331 4332 pub(crate) fn acpi_platform_addresses(&self) -> &AcpiPlatformAddresses { 4333 &self.acpi_platform_addresses 4334 } 4335 } 4336 4337 fn numa_node_id_from_memory_zone_id(numa_nodes: &NumaNodes, memory_zone_id: &str) -> Option<u32> { 4338 for (numa_node_id, numa_node) in numa_nodes.iter() { 4339 if numa_node.memory_zones.contains(&memory_zone_id.to_owned()) { 4340 return Some(*numa_node_id); 4341 } 4342 } 4343 4344 None 4345 } 4346 4347 fn numa_node_id_from_pci_segment_id(numa_nodes: &NumaNodes, pci_segment_id: u16) -> u32 { 4348 for (numa_node_id, numa_node) in numa_nodes.iter() { 4349 if numa_node.pci_segments.contains(&pci_segment_id) { 4350 return *numa_node_id; 4351 } 4352 } 4353 4354 0 4355 } 4356 4357 struct TpmDevice {} 4358 4359 impl Aml for TpmDevice { 4360 fn to_aml_bytes(&self, sink: &mut dyn acpi_tables::AmlSink) { 4361 aml::Device::new( 4362 "TPM2".into(), 4363 vec![ 4364 &aml::Name::new("_HID".into(), &"MSFT0101"), 4365 &aml::Name::new("_STA".into(), &(0xF_usize)), 4366 &aml::Name::new( 4367 "_CRS".into(), 4368 &aml::ResourceTemplate::new(vec![&aml::Memory32Fixed::new( 4369 true, 4370 layout::TPM_START.0 as u32, 4371 layout::TPM_SIZE as u32, 4372 )]), 4373 ), 4374 ], 4375 ) 4376 .to_aml_bytes(sink) 4377 } 4378 } 4379 4380 impl Aml for DeviceManager { 4381 fn to_aml_bytes(&self, sink: &mut dyn acpi_tables::AmlSink) { 4382 #[cfg(target_arch = "aarch64")] 4383 use arch::aarch64::DeviceInfoForFdt; 4384 4385 let mut pci_scan_methods = Vec::new(); 4386 for i in 0..self.pci_segments.len() { 4387 pci_scan_methods.push(aml::MethodCall::new( 4388 format!("\\_SB_.PC{i:02X}.PCNT").as_str().into(), 4389 vec![], 4390 )); 4391 } 4392 let mut pci_scan_inner: Vec<&dyn Aml> = Vec::new(); 4393 for method in &pci_scan_methods { 4394 pci_scan_inner.push(method) 4395 } 4396 4397 // PCI hotplug controller 4398 aml::Device::new( 4399 "_SB_.PHPR".into(), 4400 vec![ 4401 &aml::Name::new("_HID".into(), &aml::EISAName::new("PNP0A06")), 4402 &aml::Name::new("_STA".into(), &0x0bu8), 4403 &aml::Name::new("_UID".into(), &"PCI Hotplug Controller"), 4404 &aml::Mutex::new("BLCK".into(), 0), 4405 &aml::Name::new( 4406 "_CRS".into(), 4407 &aml::ResourceTemplate::new(vec![&aml::AddressSpace::new_memory( 4408 aml::AddressSpaceCacheable::NotCacheable, 4409 true, 4410 self.acpi_address.0, 4411 self.acpi_address.0 + DEVICE_MANAGER_ACPI_SIZE as u64 - 1, 4412 None, 4413 )]), 4414 ), 4415 // OpRegion and Fields map MMIO range into individual field values 4416 &aml::OpRegion::new( 4417 "PCST".into(), 4418 aml::OpRegionSpace::SystemMemory, 4419 &(self.acpi_address.0 as usize), 4420 &DEVICE_MANAGER_ACPI_SIZE, 4421 ), 4422 &aml::Field::new( 4423 "PCST".into(), 4424 aml::FieldAccessType::DWord, 4425 aml::FieldLockRule::NoLock, 4426 aml::FieldUpdateRule::WriteAsZeroes, 4427 vec![ 4428 aml::FieldEntry::Named(*b"PCIU", 32), 4429 aml::FieldEntry::Named(*b"PCID", 32), 4430 aml::FieldEntry::Named(*b"B0EJ", 32), 4431 aml::FieldEntry::Named(*b"PSEG", 32), 4432 ], 4433 ), 4434 &aml::Method::new( 4435 "PCEJ".into(), 4436 2, 4437 true, 4438 vec![ 4439 // Take lock defined above 4440 &aml::Acquire::new("BLCK".into(), 0xffff), 4441 // Choose the current segment 4442 &aml::Store::new(&aml::Path::new("PSEG"), &aml::Arg(1)), 4443 // Write PCI bus number (in first argument) to I/O port via field 4444 &aml::ShiftLeft::new(&aml::Path::new("B0EJ"), &aml::ONE, &aml::Arg(0)), 4445 // Release lock 4446 &aml::Release::new("BLCK".into()), 4447 // Return 0 4448 &aml::Return::new(&aml::ZERO), 4449 ], 4450 ), 4451 &aml::Method::new("PSCN".into(), 0, true, pci_scan_inner), 4452 ], 4453 ) 4454 .to_aml_bytes(sink); 4455 4456 for segment in &self.pci_segments { 4457 segment.to_aml_bytes(sink); 4458 } 4459 4460 let mut mbrd_memory = Vec::new(); 4461 4462 for segment in &self.pci_segments { 4463 mbrd_memory.push(aml::Memory32Fixed::new( 4464 true, 4465 segment.mmio_config_address as u32, 4466 layout::PCI_MMIO_CONFIG_SIZE_PER_SEGMENT as u32, 4467 )) 4468 } 4469 4470 let mut mbrd_memory_refs = Vec::new(); 4471 for mbrd_memory_ref in &mbrd_memory { 4472 mbrd_memory_refs.push(mbrd_memory_ref as &dyn Aml); 4473 } 4474 4475 aml::Device::new( 4476 "_SB_.MBRD".into(), 4477 vec![ 4478 &aml::Name::new("_HID".into(), &aml::EISAName::new("PNP0C02")), 4479 &aml::Name::new("_UID".into(), &aml::ZERO), 4480 &aml::Name::new("_CRS".into(), &aml::ResourceTemplate::new(mbrd_memory_refs)), 4481 ], 4482 ) 4483 .to_aml_bytes(sink); 4484 4485 // Serial device 4486 #[cfg(target_arch = "x86_64")] 4487 let serial_irq = 4; 4488 #[cfg(target_arch = "aarch64")] 4489 let serial_irq = 4490 if self.config.lock().unwrap().serial.clone().mode != ConsoleOutputMode::Off { 4491 self.get_device_info() 4492 .clone() 4493 .get(&(DeviceType::Serial, DeviceType::Serial.to_string())) 4494 .unwrap() 4495 .irq() 4496 } else { 4497 // If serial is turned off, add a fake device with invalid irq. 4498 31 4499 }; 4500 if self.config.lock().unwrap().serial.mode != ConsoleOutputMode::Off { 4501 aml::Device::new( 4502 "_SB_.COM1".into(), 4503 vec![ 4504 &aml::Name::new( 4505 "_HID".into(), 4506 #[cfg(target_arch = "x86_64")] 4507 &aml::EISAName::new("PNP0501"), 4508 #[cfg(target_arch = "aarch64")] 4509 &"ARMH0011", 4510 ), 4511 &aml::Name::new("_UID".into(), &aml::ZERO), 4512 &aml::Name::new("_DDN".into(), &"COM1"), 4513 &aml::Name::new( 4514 "_CRS".into(), 4515 &aml::ResourceTemplate::new(vec![ 4516 &aml::Interrupt::new(true, true, false, false, serial_irq), 4517 #[cfg(target_arch = "x86_64")] 4518 &aml::IO::new(0x3f8, 0x3f8, 0, 0x8), 4519 #[cfg(target_arch = "aarch64")] 4520 &aml::Memory32Fixed::new( 4521 true, 4522 arch::layout::LEGACY_SERIAL_MAPPED_IO_START.raw_value() as u32, 4523 MMIO_LEN as u32, 4524 ), 4525 ]), 4526 ), 4527 ], 4528 ) 4529 .to_aml_bytes(sink); 4530 } 4531 4532 aml::Name::new("_S5_".into(), &aml::Package::new(vec![&5u8])).to_aml_bytes(sink); 4533 4534 aml::Device::new( 4535 "_SB_.PWRB".into(), 4536 vec![ 4537 &aml::Name::new("_HID".into(), &aml::EISAName::new("PNP0C0C")), 4538 &aml::Name::new("_UID".into(), &aml::ZERO), 4539 ], 4540 ) 4541 .to_aml_bytes(sink); 4542 4543 if self.config.lock().unwrap().tpm.is_some() { 4544 // Add tpm device 4545 TpmDevice {}.to_aml_bytes(sink); 4546 } 4547 4548 self.ged_notification_device 4549 .as_ref() 4550 .unwrap() 4551 .lock() 4552 .unwrap() 4553 .to_aml_bytes(sink) 4554 } 4555 } 4556 4557 impl Pausable for DeviceManager { 4558 fn pause(&mut self) -> result::Result<(), MigratableError> { 4559 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4560 if let Some(migratable) = &device_node.migratable { 4561 migratable.lock().unwrap().pause()?; 4562 } 4563 } 4564 // On AArch64, the pause of device manager needs to trigger 4565 // a "pause" of GIC, which will flush the GIC pending tables 4566 // and ITS tables to guest RAM. 4567 #[cfg(target_arch = "aarch64")] 4568 { 4569 self.get_interrupt_controller() 4570 .unwrap() 4571 .lock() 4572 .unwrap() 4573 .pause()?; 4574 }; 4575 4576 Ok(()) 4577 } 4578 4579 fn resume(&mut self) -> result::Result<(), MigratableError> { 4580 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4581 if let Some(migratable) = &device_node.migratable { 4582 migratable.lock().unwrap().resume()?; 4583 } 4584 } 4585 4586 Ok(()) 4587 } 4588 } 4589 4590 impl Snapshottable for DeviceManager { 4591 fn id(&self) -> String { 4592 DEVICE_MANAGER_SNAPSHOT_ID.to_string() 4593 } 4594 4595 fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> { 4596 let mut snapshot = Snapshot::from_data(SnapshotData::new_from_state(&self.state())?); 4597 4598 // We aggregate all devices snapshots. 4599 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4600 if let Some(migratable) = &device_node.migratable { 4601 let mut migratable = migratable.lock().unwrap(); 4602 snapshot.add_snapshot(migratable.id(), migratable.snapshot()?); 4603 } 4604 } 4605 4606 Ok(snapshot) 4607 } 4608 } 4609 4610 impl Transportable for DeviceManager {} 4611 4612 impl Migratable for DeviceManager { 4613 fn start_dirty_log(&mut self) -> std::result::Result<(), MigratableError> { 4614 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4615 if let Some(migratable) = &device_node.migratable { 4616 migratable.lock().unwrap().start_dirty_log()?; 4617 } 4618 } 4619 Ok(()) 4620 } 4621 4622 fn stop_dirty_log(&mut self) -> std::result::Result<(), MigratableError> { 4623 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4624 if let Some(migratable) = &device_node.migratable { 4625 migratable.lock().unwrap().stop_dirty_log()?; 4626 } 4627 } 4628 Ok(()) 4629 } 4630 4631 fn dirty_log(&mut self) -> std::result::Result<MemoryRangeTable, MigratableError> { 4632 let mut tables = Vec::new(); 4633 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4634 if let Some(migratable) = &device_node.migratable { 4635 tables.push(migratable.lock().unwrap().dirty_log()?); 4636 } 4637 } 4638 Ok(MemoryRangeTable::new_from_tables(tables)) 4639 } 4640 4641 fn start_migration(&mut self) -> std::result::Result<(), MigratableError> { 4642 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4643 if let Some(migratable) = &device_node.migratable { 4644 migratable.lock().unwrap().start_migration()?; 4645 } 4646 } 4647 Ok(()) 4648 } 4649 4650 fn complete_migration(&mut self) -> std::result::Result<(), MigratableError> { 4651 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4652 if let Some(migratable) = &device_node.migratable { 4653 migratable.lock().unwrap().complete_migration()?; 4654 } 4655 } 4656 Ok(()) 4657 } 4658 } 4659 4660 const PCIU_FIELD_OFFSET: u64 = 0; 4661 const PCID_FIELD_OFFSET: u64 = 4; 4662 const B0EJ_FIELD_OFFSET: u64 = 8; 4663 const PSEG_FIELD_OFFSET: u64 = 12; 4664 const PCIU_FIELD_SIZE: usize = 4; 4665 const PCID_FIELD_SIZE: usize = 4; 4666 const B0EJ_FIELD_SIZE: usize = 4; 4667 const PSEG_FIELD_SIZE: usize = 4; 4668 4669 impl BusDevice for DeviceManager { 4670 fn read(&mut self, base: u64, offset: u64, data: &mut [u8]) { 4671 match offset { 4672 PCIU_FIELD_OFFSET => { 4673 assert!(data.len() == PCIU_FIELD_SIZE); 4674 data.copy_from_slice( 4675 &self.pci_segments[self.selected_segment] 4676 .pci_devices_up 4677 .to_le_bytes(), 4678 ); 4679 // Clear the PCIU bitmap 4680 self.pci_segments[self.selected_segment].pci_devices_up = 0; 4681 } 4682 PCID_FIELD_OFFSET => { 4683 assert!(data.len() == PCID_FIELD_SIZE); 4684 data.copy_from_slice( 4685 &self.pci_segments[self.selected_segment] 4686 .pci_devices_down 4687 .to_le_bytes(), 4688 ); 4689 // Clear the PCID bitmap 4690 self.pci_segments[self.selected_segment].pci_devices_down = 0; 4691 } 4692 B0EJ_FIELD_OFFSET => { 4693 assert!(data.len() == B0EJ_FIELD_SIZE); 4694 // Always return an empty bitmap since the eject is always 4695 // taken care of right away during a write access. 4696 data.fill(0); 4697 } 4698 PSEG_FIELD_OFFSET => { 4699 assert_eq!(data.len(), PSEG_FIELD_SIZE); 4700 data.copy_from_slice(&(self.selected_segment as u32).to_le_bytes()); 4701 } 4702 _ => error!( 4703 "Accessing unknown location at base 0x{:x}, offset 0x{:x}", 4704 base, offset 4705 ), 4706 } 4707 4708 debug!( 4709 "PCI_HP_REG_R: base 0x{:x}, offset 0x{:x}, data {:?}", 4710 base, offset, data 4711 ) 4712 } 4713 4714 fn write(&mut self, base: u64, offset: u64, data: &[u8]) -> Option<Arc<std::sync::Barrier>> { 4715 match offset { 4716 B0EJ_FIELD_OFFSET => { 4717 assert!(data.len() == B0EJ_FIELD_SIZE); 4718 let mut data_array: [u8; 4] = [0, 0, 0, 0]; 4719 data_array.copy_from_slice(data); 4720 let mut slot_bitmap = u32::from_le_bytes(data_array); 4721 4722 while slot_bitmap > 0 { 4723 let slot_id = slot_bitmap.trailing_zeros(); 4724 if let Err(e) = self.eject_device(self.selected_segment as u16, slot_id as u8) { 4725 error!("Failed ejecting device {}: {:?}", slot_id, e); 4726 } 4727 slot_bitmap &= !(1 << slot_id); 4728 } 4729 } 4730 PSEG_FIELD_OFFSET => { 4731 assert_eq!(data.len(), PSEG_FIELD_SIZE); 4732 let mut data_array: [u8; 4] = [0, 0, 0, 0]; 4733 data_array.copy_from_slice(data); 4734 let selected_segment = u32::from_le_bytes(data_array) as usize; 4735 if selected_segment >= self.pci_segments.len() { 4736 error!( 4737 "Segment selection out of range: {} >= {}", 4738 selected_segment, 4739 self.pci_segments.len() 4740 ); 4741 return None; 4742 } 4743 self.selected_segment = selected_segment; 4744 } 4745 _ => error!( 4746 "Accessing unknown location at base 0x{:x}, offset 0x{:x}", 4747 base, offset 4748 ), 4749 } 4750 4751 debug!( 4752 "PCI_HP_REG_W: base 0x{:x}, offset 0x{:x}, data {:?}", 4753 base, offset, data 4754 ); 4755 4756 None 4757 } 4758 } 4759 4760 impl Drop for DeviceManager { 4761 fn drop(&mut self) { 4762 for handle in self.virtio_devices.drain(..) { 4763 handle.virtio_device.lock().unwrap().shutdown(); 4764 } 4765 4766 if let Some(termios) = *self.original_termios_opt.lock().unwrap() { 4767 // SAFETY: FFI call 4768 let _ = unsafe { tcsetattr(stdout().lock().as_raw_fd(), TCSANOW, &termios) }; 4769 } 4770 } 4771 } 4772