1 // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 // 3 // Portions Copyright 2017 The Chromium OS Authors. All rights reserved. 4 // Use of this source code is governed by a BSD-style license that can be 5 // found in the LICENSE-BSD-3-Clause file. 6 // 7 // Copyright © 2019 Intel Corporation 8 // 9 // SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause 10 // 11 12 use crate::config::{ 13 ConsoleOutputMode, DeviceConfig, DiskConfig, FsConfig, NetConfig, PmemConfig, UserDeviceConfig, 14 VdpaConfig, VhostMode, VmConfig, VsockConfig, 15 }; 16 use crate::cpu::{CpuManager, CPU_MANAGER_ACPI_SIZE}; 17 use crate::device_tree::{DeviceNode, DeviceTree}; 18 use crate::interrupt::LegacyUserspaceInterruptManager; 19 use crate::interrupt::MsiInterruptManager; 20 use crate::memory_manager::{Error as MemoryManagerError, MemoryManager, MEMORY_MANAGER_ACPI_SIZE}; 21 use crate::pci_segment::PciSegment; 22 use crate::seccomp_filters::{get_seccomp_filter, Thread}; 23 use crate::serial_manager::{Error as SerialManagerError, SerialManager}; 24 use crate::sigwinch_listener::start_sigwinch_listener; 25 use crate::GuestRegionMmap; 26 use crate::PciDeviceInfo; 27 use crate::{device_node, DEVICE_MANAGER_SNAPSHOT_ID}; 28 use acpi_tables::sdt::GenericAddress; 29 use acpi_tables::{aml, aml::Aml}; 30 use anyhow::anyhow; 31 use arch::layout; 32 #[cfg(target_arch = "x86_64")] 33 use arch::layout::{APIC_START, IOAPIC_SIZE, IOAPIC_START}; 34 use arch::NumaNodes; 35 #[cfg(target_arch = "aarch64")] 36 use arch::{DeviceType, MmioDeviceInfo}; 37 use block_util::{ 38 async_io::DiskFile, block_io_uring_is_supported, detect_image_type, 39 fixed_vhd_async::FixedVhdDiskAsync, fixed_vhd_sync::FixedVhdDiskSync, qcow_sync::QcowDiskSync, 40 raw_async::RawFileDisk, raw_sync::RawFileDiskSync, vhdx_sync::VhdxDiskSync, ImageType, 41 }; 42 #[cfg(target_arch = "aarch64")] 43 use devices::gic; 44 #[cfg(target_arch = "x86_64")] 45 use devices::ioapic; 46 #[cfg(target_arch = "aarch64")] 47 use devices::legacy::Pl011; 48 #[cfg(target_arch = "x86_64")] 49 use devices::legacy::Serial; 50 use devices::{ 51 interrupt_controller, interrupt_controller::InterruptController, AcpiNotificationFlags, 52 }; 53 use hypervisor::{HypervisorType, IoEventAddress}; 54 use libc::{ 55 cfmakeraw, isatty, tcgetattr, tcsetattr, termios, MAP_NORESERVE, MAP_PRIVATE, MAP_SHARED, 56 O_TMPFILE, PROT_READ, PROT_WRITE, TCSANOW, 57 }; 58 use pci::{ 59 DeviceRelocation, PciBarRegionType, PciBdf, PciDevice, VfioPciDevice, VfioUserDmaMapping, 60 VfioUserPciDevice, VfioUserPciDeviceError, 61 }; 62 use seccompiler::SeccompAction; 63 use serde::{Deserialize, Serialize}; 64 use std::collections::{BTreeSet, HashMap}; 65 use std::convert::TryInto; 66 use std::fs::{read_link, File, OpenOptions}; 67 use std::io::{self, stdout, Seek, SeekFrom}; 68 use std::mem::zeroed; 69 use std::num::Wrapping; 70 use std::os::unix::fs::OpenOptionsExt; 71 use std::os::unix::io::{AsRawFd, FromRawFd, RawFd}; 72 use std::path::PathBuf; 73 use std::result; 74 use std::sync::{Arc, Mutex}; 75 use std::time::Instant; 76 use tracer::trace_scoped; 77 use vfio_ioctls::{VfioContainer, VfioDevice, VfioDeviceFd}; 78 use virtio_devices::transport::VirtioTransport; 79 use virtio_devices::transport::{VirtioPciDevice, VirtioPciDeviceActivator}; 80 use virtio_devices::vhost_user::VhostUserConfig; 81 use virtio_devices::{ 82 AccessPlatformMapping, ActivateError, VdpaDmaMapping, VirtioMemMappingSource, 83 }; 84 use virtio_devices::{Endpoint, IommuMapping}; 85 use vm_allocator::{AddressAllocator, SystemAllocator}; 86 use vm_device::dma_mapping::vfio::VfioDmaMapping; 87 use vm_device::dma_mapping::ExternalDmaMapping; 88 use vm_device::interrupt::{ 89 InterruptIndex, InterruptManager, LegacyIrqGroupConfig, MsiIrqGroupConfig, 90 }; 91 use vm_device::{Bus, BusDevice, Resource}; 92 use vm_memory::guest_memory::FileOffset; 93 use vm_memory::GuestMemoryRegion; 94 use vm_memory::{Address, GuestAddress, GuestUsize, MmapRegion}; 95 #[cfg(target_arch = "x86_64")] 96 use vm_memory::{GuestAddressSpace, GuestMemory}; 97 use vm_migration::{ 98 protocol::MemoryRangeTable, snapshot_from_id, versioned_state_from_id, Migratable, 99 MigratableError, Pausable, Snapshot, SnapshotDataSection, Snapshottable, Transportable, 100 }; 101 use vm_virtio::AccessPlatform; 102 use vm_virtio::VirtioDeviceType; 103 use vmm_sys_util::eventfd::EventFd; 104 105 #[cfg(target_arch = "aarch64")] 106 const MMIO_LEN: u64 = 0x1000; 107 108 // Singleton devices / devices the user cannot name 109 #[cfg(target_arch = "x86_64")] 110 const IOAPIC_DEVICE_NAME: &str = "__ioapic"; 111 const SERIAL_DEVICE_NAME: &str = "__serial"; 112 #[cfg(target_arch = "aarch64")] 113 const GPIO_DEVICE_NAME: &str = "__gpio"; 114 const RNG_DEVICE_NAME: &str = "__rng"; 115 const IOMMU_DEVICE_NAME: &str = "__iommu"; 116 const BALLOON_DEVICE_NAME: &str = "__balloon"; 117 const CONSOLE_DEVICE_NAME: &str = "__console"; 118 119 // Devices that the user may name and for which we generate 120 // identifiers if the user doesn't give one 121 const DISK_DEVICE_NAME_PREFIX: &str = "_disk"; 122 const FS_DEVICE_NAME_PREFIX: &str = "_fs"; 123 const NET_DEVICE_NAME_PREFIX: &str = "_net"; 124 const PMEM_DEVICE_NAME_PREFIX: &str = "_pmem"; 125 const VDPA_DEVICE_NAME_PREFIX: &str = "_vdpa"; 126 const VSOCK_DEVICE_NAME_PREFIX: &str = "_vsock"; 127 const WATCHDOG_DEVICE_NAME: &str = "__watchdog"; 128 const VFIO_DEVICE_NAME_PREFIX: &str = "_vfio"; 129 const VFIO_USER_DEVICE_NAME_PREFIX: &str = "_vfio_user"; 130 const VIRTIO_PCI_DEVICE_NAME_PREFIX: &str = "_virtio-pci"; 131 132 /// Errors associated with device manager 133 #[derive(Debug)] 134 pub enum DeviceManagerError { 135 /// Cannot create EventFd. 136 EventFd(io::Error), 137 138 /// Cannot open disk path 139 Disk(io::Error), 140 141 /// Cannot create vhost-user-net device 142 CreateVhostUserNet(virtio_devices::vhost_user::Error), 143 144 /// Cannot create virtio-blk device 145 CreateVirtioBlock(io::Error), 146 147 /// Cannot create virtio-net device 148 CreateVirtioNet(virtio_devices::net::Error), 149 150 /// Cannot create virtio-console device 151 CreateVirtioConsole(io::Error), 152 153 /// Cannot create virtio-rng device 154 CreateVirtioRng(io::Error), 155 156 /// Cannot create virtio-fs device 157 CreateVirtioFs(virtio_devices::vhost_user::Error), 158 159 /// Virtio-fs device was created without a socket. 160 NoVirtioFsSock, 161 162 /// Cannot create vhost-user-blk device 163 CreateVhostUserBlk(virtio_devices::vhost_user::Error), 164 165 /// Cannot create virtio-pmem device 166 CreateVirtioPmem(io::Error), 167 168 /// Cannot create vDPA device 169 CreateVdpa(virtio_devices::vdpa::Error), 170 171 /// Cannot create virtio-vsock device 172 CreateVirtioVsock(io::Error), 173 174 /// Cannot create tpm device 175 CreateTpmDevice(anyhow::Error), 176 177 /// Failed to convert Path to &str for the vDPA device. 178 CreateVdpaConvertPath, 179 180 /// Failed to convert Path to &str for the virtio-vsock device. 181 CreateVsockConvertPath, 182 183 /// Cannot create virtio-vsock backend 184 CreateVsockBackend(virtio_devices::vsock::VsockUnixError), 185 186 /// Cannot create virtio-iommu device 187 CreateVirtioIommu(io::Error), 188 189 /// Cannot create virtio-balloon device 190 CreateVirtioBalloon(io::Error), 191 192 /// Cannot create virtio-watchdog device 193 CreateVirtioWatchdog(io::Error), 194 195 /// Failed to parse disk image format 196 DetectImageType(io::Error), 197 198 /// Cannot open qcow disk path 199 QcowDeviceCreate(qcow::Error), 200 201 /// Cannot create serial manager 202 CreateSerialManager(SerialManagerError), 203 204 /// Cannot spawn the serial manager thread 205 SpawnSerialManager(SerialManagerError), 206 207 /// Cannot open tap interface 208 OpenTap(net_util::TapError), 209 210 /// Cannot allocate IRQ. 211 AllocateIrq, 212 213 /// Cannot configure the IRQ. 214 Irq(vmm_sys_util::errno::Error), 215 216 /// Cannot allocate PCI BARs 217 AllocateBars(pci::PciDeviceError), 218 219 /// Could not free the BARs associated with a PCI device. 220 FreePciBars(pci::PciDeviceError), 221 222 /// Cannot register ioevent. 223 RegisterIoevent(anyhow::Error), 224 225 /// Cannot unregister ioevent. 226 UnRegisterIoevent(anyhow::Error), 227 228 /// Cannot create virtio device 229 VirtioDevice(virtio_devices::transport::VirtioPciDeviceError), 230 231 /// Cannot add PCI device 232 AddPciDevice(pci::PciRootError), 233 234 /// Cannot open persistent memory file 235 PmemFileOpen(io::Error), 236 237 /// Cannot set persistent memory file size 238 PmemFileSetLen(io::Error), 239 240 /// Cannot find a memory range for persistent memory 241 PmemRangeAllocation, 242 243 /// Cannot find a memory range for virtio-fs 244 FsRangeAllocation, 245 246 /// Error creating serial output file 247 SerialOutputFileOpen(io::Error), 248 249 /// Error creating console output file 250 ConsoleOutputFileOpen(io::Error), 251 252 /// Error creating serial pty 253 SerialPtyOpen(io::Error), 254 255 /// Error creating console pty 256 ConsolePtyOpen(io::Error), 257 258 /// Error setting pty raw mode 259 SetPtyRaw(vmm_sys_util::errno::Error), 260 261 /// Error getting pty peer 262 GetPtyPeer(vmm_sys_util::errno::Error), 263 264 /// Cannot create a VFIO device 265 VfioCreate(vfio_ioctls::VfioError), 266 267 /// Cannot create a VFIO PCI device 268 VfioPciCreate(pci::VfioPciError), 269 270 /// Failed to map VFIO MMIO region. 271 VfioMapRegion(pci::VfioPciError), 272 273 /// Failed to DMA map VFIO device. 274 VfioDmaMap(vfio_ioctls::VfioError), 275 276 /// Failed to DMA unmap VFIO device. 277 VfioDmaUnmap(pci::VfioPciError), 278 279 /// Failed to create the passthrough device. 280 CreatePassthroughDevice(anyhow::Error), 281 282 /// Failed to memory map. 283 Mmap(io::Error), 284 285 /// Cannot add legacy device to Bus. 286 BusError(vm_device::BusError), 287 288 /// Failed to allocate IO port 289 AllocateIoPort, 290 291 /// Failed to allocate MMIO address 292 AllocateMmioAddress, 293 294 /// Failed to make hotplug notification 295 HotPlugNotification(io::Error), 296 297 /// Error from a memory manager operation 298 MemoryManager(MemoryManagerError), 299 300 /// Failed to create new interrupt source group. 301 CreateInterruptGroup(io::Error), 302 303 /// Failed to update interrupt source group. 304 UpdateInterruptGroup(io::Error), 305 306 /// Failed to create interrupt controller. 307 CreateInterruptController(interrupt_controller::Error), 308 309 /// Failed to create a new MmapRegion instance. 310 NewMmapRegion(vm_memory::mmap::MmapRegionError), 311 312 /// Failed to clone a File. 313 CloneFile(io::Error), 314 315 /// Failed to create socket file 316 CreateSocketFile(io::Error), 317 318 /// Failed to spawn the network backend 319 SpawnNetBackend(io::Error), 320 321 /// Failed to spawn the block backend 322 SpawnBlockBackend(io::Error), 323 324 /// Missing PCI bus. 325 NoPciBus, 326 327 /// Could not find an available device name. 328 NoAvailableDeviceName, 329 330 /// Missing PCI device. 331 MissingPciDevice, 332 333 /// Failed to remove a PCI device from the PCI bus. 334 RemoveDeviceFromPciBus(pci::PciRootError), 335 336 /// Failed to remove a bus device from the IO bus. 337 RemoveDeviceFromIoBus(vm_device::BusError), 338 339 /// Failed to remove a bus device from the MMIO bus. 340 RemoveDeviceFromMmioBus(vm_device::BusError), 341 342 /// Failed to find the device corresponding to a specific PCI b/d/f. 343 UnknownPciBdf(u32), 344 345 /// Not allowed to remove this type of device from the VM. 346 RemovalNotAllowed(vm_virtio::VirtioDeviceType), 347 348 /// Failed to find device corresponding to the given identifier. 349 UnknownDeviceId(String), 350 351 /// Failed to find an available PCI device ID. 352 NextPciDeviceId(pci::PciRootError), 353 354 /// Could not reserve the PCI device ID. 355 GetPciDeviceId(pci::PciRootError), 356 357 /// Could not give the PCI device ID back. 358 PutPciDeviceId(pci::PciRootError), 359 360 /// No disk path was specified when one was expected 361 NoDiskPath, 362 363 /// Failed to update guest memory for virtio device. 364 UpdateMemoryForVirtioDevice(virtio_devices::Error), 365 366 /// Cannot create virtio-mem device 367 CreateVirtioMem(io::Error), 368 369 /// Cannot find a memory range for virtio-mem memory 370 VirtioMemRangeAllocation, 371 372 /// Failed to update guest memory for VFIO PCI device. 373 UpdateMemoryForVfioPciDevice(vfio_ioctls::VfioError), 374 375 /// Trying to use a directory for pmem but no size specified 376 PmemWithDirectorySizeMissing, 377 378 /// Trying to use a size that is not multiple of 2MiB 379 PmemSizeNotAligned, 380 381 /// Could not find the node in the device tree. 382 MissingNode, 383 384 /// Resource was already found. 385 ResourceAlreadyExists, 386 387 /// Expected resources for virtio-pmem could not be found. 388 MissingVirtioPmemResources, 389 390 /// Missing PCI b/d/f from the DeviceNode. 391 MissingDeviceNodePciBdf, 392 393 /// No support for device passthrough 394 NoDevicePassthroughSupport, 395 396 /// Failed to resize virtio-balloon 397 VirtioBalloonResize(virtio_devices::balloon::Error), 398 399 /// Missing virtio-balloon, can't proceed as expected. 400 MissingVirtioBalloon, 401 402 /// Missing virtual IOMMU device 403 MissingVirtualIommu, 404 405 /// Failed to do power button notification 406 PowerButtonNotification(io::Error), 407 408 /// Failed to do AArch64 GPIO power button notification 409 #[cfg(target_arch = "aarch64")] 410 AArch64PowerButtonNotification(devices::legacy::GpioDeviceError), 411 412 /// Failed to set O_DIRECT flag to file descriptor 413 SetDirectIo, 414 415 /// Failed to create FixedVhdDiskAsync 416 CreateFixedVhdDiskAsync(io::Error), 417 418 /// Failed to create FixedVhdDiskSync 419 CreateFixedVhdDiskSync(io::Error), 420 421 /// Failed to create QcowDiskSync 422 CreateQcowDiskSync(qcow::Error), 423 424 /// Failed to create FixedVhdxDiskSync 425 CreateFixedVhdxDiskSync(vhdx::vhdx::VhdxError), 426 427 /// Failed to add DMA mapping handler to virtio-mem device. 428 AddDmaMappingHandlerVirtioMem(virtio_devices::mem::Error), 429 430 /// Failed to remove DMA mapping handler from virtio-mem device. 431 RemoveDmaMappingHandlerVirtioMem(virtio_devices::mem::Error), 432 433 /// Failed to create vfio-user client 434 VfioUserCreateClient(vfio_user::Error), 435 436 /// Failed to create VFIO user device 437 VfioUserCreate(VfioUserPciDeviceError), 438 439 /// Failed to map region from VFIO user device into guest 440 VfioUserMapRegion(VfioUserPciDeviceError), 441 442 /// Failed to DMA map VFIO user device. 443 VfioUserDmaMap(VfioUserPciDeviceError), 444 445 /// Failed to DMA unmap VFIO user device. 446 VfioUserDmaUnmap(VfioUserPciDeviceError), 447 448 /// Failed to update memory mappings for VFIO user device 449 UpdateMemoryForVfioUserPciDevice(VfioUserPciDeviceError), 450 451 /// Cannot duplicate file descriptor 452 DupFd(vmm_sys_util::errno::Error), 453 454 /// Failed to DMA map virtio device. 455 VirtioDmaMap(std::io::Error), 456 457 /// Failed to DMA unmap virtio device. 458 VirtioDmaUnmap(std::io::Error), 459 460 /// Cannot hotplug device behind vIOMMU 461 InvalidIommuHotplug, 462 463 /// Invalid identifier as it is not unique. 464 IdentifierNotUnique(String), 465 466 /// Invalid identifier 467 InvalidIdentifier(String), 468 469 /// Error activating virtio device 470 VirtioActivate(ActivateError), 471 472 /// Failed retrieving device state from snapshot 473 RestoreGetState(MigratableError), 474 } 475 pub type DeviceManagerResult<T> = result::Result<T, DeviceManagerError>; 476 477 const DEVICE_MANAGER_ACPI_SIZE: usize = 0x10; 478 479 const TIOCSPTLCK: libc::c_int = 0x4004_5431; 480 const TIOCGTPEER: libc::c_int = 0x5441; 481 482 pub fn create_pty() -> io::Result<(File, File, PathBuf)> { 483 // Try to use /dev/pts/ptmx first then fall back to /dev/ptmx 484 // This is done to try and use the devpts filesystem that 485 // could be available for use in the process's namespace first. 486 // Ideally these are all the same file though but different 487 // kernels could have things setup differently. 488 // See https://www.kernel.org/doc/Documentation/filesystems/devpts.txt 489 // for further details. 490 491 let custom_flags = libc::O_NONBLOCK; 492 let main = match OpenOptions::new() 493 .read(true) 494 .write(true) 495 .custom_flags(custom_flags) 496 .open("/dev/pts/ptmx") 497 { 498 Ok(f) => f, 499 _ => OpenOptions::new() 500 .read(true) 501 .write(true) 502 .custom_flags(custom_flags) 503 .open("/dev/ptmx")?, 504 }; 505 let mut unlock: libc::c_ulong = 0; 506 // SAFETY: FFI call into libc, trivially safe 507 unsafe { 508 libc::ioctl( 509 main.as_raw_fd(), 510 TIOCSPTLCK.try_into().unwrap(), 511 &mut unlock, 512 ) 513 }; 514 515 // SAFETY: FFI call into libc, trivally safe 516 let sub_fd = unsafe { 517 libc::ioctl( 518 main.as_raw_fd(), 519 TIOCGTPEER.try_into().unwrap(), 520 libc::O_NOCTTY | libc::O_RDWR, 521 ) 522 }; 523 if sub_fd == -1 { 524 return vmm_sys_util::errno::errno_result().map_err(|e| e.into()); 525 } 526 527 let proc_path = PathBuf::from(format!("/proc/self/fd/{}", sub_fd)); 528 let path = read_link(proc_path)?; 529 530 // SAFETY: sub_fd is checked to be valid before being wrapped in File 531 Ok((main, unsafe { File::from_raw_fd(sub_fd) }, path)) 532 } 533 534 #[derive(Default)] 535 pub struct Console { 536 console_resizer: Option<Arc<virtio_devices::ConsoleResizer>>, 537 } 538 539 impl Console { 540 pub fn update_console_size(&self) { 541 if let Some(resizer) = self.console_resizer.as_ref() { 542 resizer.update_console_size() 543 } 544 } 545 } 546 547 pub(crate) struct AddressManager { 548 pub(crate) allocator: Arc<Mutex<SystemAllocator>>, 549 #[cfg(target_arch = "x86_64")] 550 pub(crate) io_bus: Arc<Bus>, 551 pub(crate) mmio_bus: Arc<Bus>, 552 pub(crate) vm: Arc<dyn hypervisor::Vm>, 553 device_tree: Arc<Mutex<DeviceTree>>, 554 pci_mmio_allocators: Vec<Arc<Mutex<AddressAllocator>>>, 555 } 556 557 impl DeviceRelocation for AddressManager { 558 fn move_bar( 559 &self, 560 old_base: u64, 561 new_base: u64, 562 len: u64, 563 pci_dev: &mut dyn PciDevice, 564 region_type: PciBarRegionType, 565 ) -> std::result::Result<(), std::io::Error> { 566 match region_type { 567 PciBarRegionType::IoRegion => { 568 #[cfg(target_arch = "x86_64")] 569 { 570 // Update system allocator 571 self.allocator 572 .lock() 573 .unwrap() 574 .free_io_addresses(GuestAddress(old_base), len as GuestUsize); 575 576 self.allocator 577 .lock() 578 .unwrap() 579 .allocate_io_addresses( 580 Some(GuestAddress(new_base)), 581 len as GuestUsize, 582 None, 583 ) 584 .ok_or_else(|| { 585 io::Error::new(io::ErrorKind::Other, "failed allocating new IO range") 586 })?; 587 588 // Update PIO bus 589 self.io_bus 590 .update_range(old_base, len, new_base, len) 591 .map_err(|e| io::Error::new(io::ErrorKind::Other, e))?; 592 } 593 #[cfg(target_arch = "aarch64")] 594 error!("I/O region is not supported"); 595 } 596 PciBarRegionType::Memory32BitRegion | PciBarRegionType::Memory64BitRegion => { 597 // Update system allocator 598 if region_type == PciBarRegionType::Memory32BitRegion { 599 self.allocator 600 .lock() 601 .unwrap() 602 .free_mmio_hole_addresses(GuestAddress(old_base), len as GuestUsize); 603 604 self.allocator 605 .lock() 606 .unwrap() 607 .allocate_mmio_hole_addresses( 608 Some(GuestAddress(new_base)), 609 len as GuestUsize, 610 Some(len), 611 ) 612 .ok_or_else(|| { 613 io::Error::new( 614 io::ErrorKind::Other, 615 "failed allocating new 32 bits MMIO range", 616 ) 617 })?; 618 } else { 619 // Find the specific allocator that this BAR was allocated from and use it for new one 620 for allocator in &self.pci_mmio_allocators { 621 let allocator_base = allocator.lock().unwrap().base(); 622 let allocator_end = allocator.lock().unwrap().end(); 623 624 if old_base >= allocator_base.0 && old_base <= allocator_end.0 { 625 allocator 626 .lock() 627 .unwrap() 628 .free(GuestAddress(old_base), len as GuestUsize); 629 630 allocator 631 .lock() 632 .unwrap() 633 .allocate( 634 Some(GuestAddress(new_base)), 635 len as GuestUsize, 636 Some(len), 637 ) 638 .ok_or_else(|| { 639 io::Error::new( 640 io::ErrorKind::Other, 641 "failed allocating new 64 bits MMIO range", 642 ) 643 })?; 644 645 break; 646 } 647 } 648 } 649 650 // Update MMIO bus 651 self.mmio_bus 652 .update_range(old_base, len, new_base, len) 653 .map_err(|e| io::Error::new(io::ErrorKind::Other, e))?; 654 } 655 } 656 657 // Update the device_tree resources associated with the device 658 if let Some(id) = pci_dev.id() { 659 if let Some(node) = self.device_tree.lock().unwrap().get_mut(&id) { 660 let mut resource_updated = false; 661 for resource in node.resources.iter_mut() { 662 if let Resource::PciBar { base, type_, .. } = resource { 663 if PciBarRegionType::from(*type_) == region_type && *base == old_base { 664 *base = new_base; 665 resource_updated = true; 666 break; 667 } 668 } 669 } 670 671 if !resource_updated { 672 return Err(io::Error::new( 673 io::ErrorKind::Other, 674 format!( 675 "Couldn't find a resource with base 0x{:x} for device {}", 676 old_base, id 677 ), 678 )); 679 } 680 } else { 681 return Err(io::Error::new( 682 io::ErrorKind::Other, 683 format!("Couldn't find device {} from device tree", id), 684 )); 685 } 686 } 687 688 let any_dev = pci_dev.as_any(); 689 if let Some(virtio_pci_dev) = any_dev.downcast_ref::<VirtioPciDevice>() { 690 let bar_addr = virtio_pci_dev.config_bar_addr(); 691 if bar_addr == new_base { 692 for (event, addr) in virtio_pci_dev.ioeventfds(old_base) { 693 let io_addr = IoEventAddress::Mmio(addr); 694 self.vm.unregister_ioevent(event, &io_addr).map_err(|e| { 695 io::Error::new( 696 io::ErrorKind::Other, 697 format!("failed to unregister ioevent: {:?}", e), 698 ) 699 })?; 700 } 701 for (event, addr) in virtio_pci_dev.ioeventfds(new_base) { 702 let io_addr = IoEventAddress::Mmio(addr); 703 self.vm 704 .register_ioevent(event, &io_addr, None) 705 .map_err(|e| { 706 io::Error::new( 707 io::ErrorKind::Other, 708 format!("failed to register ioevent: {:?}", e), 709 ) 710 })?; 711 } 712 } else { 713 let virtio_dev = virtio_pci_dev.virtio_device(); 714 let mut virtio_dev = virtio_dev.lock().unwrap(); 715 if let Some(mut shm_regions) = virtio_dev.get_shm_regions() { 716 if shm_regions.addr.raw_value() == old_base { 717 let mem_region = self.vm.make_user_memory_region( 718 shm_regions.mem_slot, 719 old_base, 720 shm_regions.len, 721 shm_regions.host_addr, 722 false, 723 false, 724 ); 725 726 self.vm.remove_user_memory_region(mem_region).map_err(|e| { 727 io::Error::new( 728 io::ErrorKind::Other, 729 format!("failed to remove user memory region: {:?}", e), 730 ) 731 })?; 732 733 // Create new mapping by inserting new region to KVM. 734 let mem_region = self.vm.make_user_memory_region( 735 shm_regions.mem_slot, 736 new_base, 737 shm_regions.len, 738 shm_regions.host_addr, 739 false, 740 false, 741 ); 742 743 self.vm.create_user_memory_region(mem_region).map_err(|e| { 744 io::Error::new( 745 io::ErrorKind::Other, 746 format!("failed to create user memory regions: {:?}", e), 747 ) 748 })?; 749 750 // Update shared memory regions to reflect the new mapping. 751 shm_regions.addr = GuestAddress(new_base); 752 virtio_dev.set_shm_regions(shm_regions).map_err(|e| { 753 io::Error::new( 754 io::ErrorKind::Other, 755 format!("failed to update shared memory regions: {:?}", e), 756 ) 757 })?; 758 } 759 } 760 } 761 } 762 763 pci_dev.move_bar(old_base, new_base) 764 } 765 } 766 767 #[derive(Serialize, Deserialize)] 768 struct DeviceManagerState { 769 device_tree: DeviceTree, 770 device_id_cnt: Wrapping<usize>, 771 } 772 773 #[derive(Debug)] 774 pub struct PtyPair { 775 pub main: File, 776 pub path: PathBuf, 777 } 778 779 impl Clone for PtyPair { 780 fn clone(&self) -> Self { 781 PtyPair { 782 main: self.main.try_clone().unwrap(), 783 path: self.path.clone(), 784 } 785 } 786 } 787 788 #[derive(Clone)] 789 pub enum PciDeviceHandle { 790 Vfio(Arc<Mutex<VfioPciDevice>>), 791 Virtio(Arc<Mutex<VirtioPciDevice>>), 792 VfioUser(Arc<Mutex<VfioUserPciDevice>>), 793 } 794 795 #[derive(Clone)] 796 struct MetaVirtioDevice { 797 virtio_device: Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 798 iommu: bool, 799 id: String, 800 pci_segment: u16, 801 dma_handler: Option<Arc<dyn ExternalDmaMapping>>, 802 } 803 804 #[derive(Default)] 805 pub struct AcpiPlatformAddresses { 806 pub pm_timer_address: Option<GenericAddress>, 807 pub reset_reg_address: Option<GenericAddress>, 808 pub sleep_control_reg_address: Option<GenericAddress>, 809 pub sleep_status_reg_address: Option<GenericAddress>, 810 } 811 812 pub struct DeviceManager { 813 // The underlying hypervisor 814 hypervisor_type: HypervisorType, 815 816 // Manage address space related to devices 817 address_manager: Arc<AddressManager>, 818 819 // Console abstraction 820 console: Arc<Console>, 821 822 // console PTY 823 console_pty: Option<Arc<Mutex<PtyPair>>>, 824 825 // serial PTY 826 serial_pty: Option<Arc<Mutex<PtyPair>>>, 827 828 // Serial Manager 829 serial_manager: Option<Arc<SerialManager>>, 830 831 // pty foreground status, 832 console_resize_pipe: Option<Arc<File>>, 833 834 // Interrupt controller 835 #[cfg(target_arch = "x86_64")] 836 interrupt_controller: Option<Arc<Mutex<ioapic::Ioapic>>>, 837 #[cfg(target_arch = "aarch64")] 838 interrupt_controller: Option<Arc<Mutex<gic::Gic>>>, 839 840 // Things to be added to the commandline (e.g. aarch64 early console) 841 #[cfg(target_arch = "aarch64")] 842 cmdline_additions: Vec<String>, 843 844 // ACPI GED notification device 845 ged_notification_device: Option<Arc<Mutex<devices::AcpiGedDevice>>>, 846 847 // VM configuration 848 config: Arc<Mutex<VmConfig>>, 849 850 // Memory Manager 851 memory_manager: Arc<Mutex<MemoryManager>>, 852 853 // CPU Manager 854 cpu_manager: Arc<Mutex<CpuManager>>, 855 856 // The virtio devices on the system 857 virtio_devices: Vec<MetaVirtioDevice>, 858 859 // List of bus devices 860 // Let the DeviceManager keep strong references to the BusDevice devices. 861 // This allows the IO and MMIO buses to be provided with Weak references, 862 // which prevents cyclic dependencies. 863 bus_devices: Vec<Arc<Mutex<dyn BusDevice>>>, 864 865 // Counter to keep track of the consumed device IDs. 866 device_id_cnt: Wrapping<usize>, 867 868 pci_segments: Vec<PciSegment>, 869 870 #[cfg_attr(target_arch = "aarch64", allow(dead_code))] 871 // MSI Interrupt Manager 872 msi_interrupt_manager: Arc<dyn InterruptManager<GroupConfig = MsiIrqGroupConfig>>, 873 874 #[cfg_attr(feature = "mshv", allow(dead_code))] 875 // Legacy Interrupt Manager 876 legacy_interrupt_manager: Option<Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>>, 877 878 // Passthrough device handle 879 passthrough_device: Option<VfioDeviceFd>, 880 881 // VFIO container 882 // Only one container can be created, therefore it is stored as part of the 883 // DeviceManager to be reused. 884 vfio_container: Option<Arc<VfioContainer>>, 885 886 // Paravirtualized IOMMU 887 iommu_device: Option<Arc<Mutex<virtio_devices::Iommu>>>, 888 iommu_mapping: Option<Arc<IommuMapping>>, 889 890 // PCI information about devices attached to the paravirtualized IOMMU 891 // It contains the virtual IOMMU PCI BDF along with the list of PCI BDF 892 // representing the devices attached to the virtual IOMMU. This is useful 893 // information for filling the ACPI VIOT table. 894 iommu_attached_devices: Option<(PciBdf, Vec<PciBdf>)>, 895 896 // Tree of devices, representing the dependencies between devices. 897 // Useful for introspection, snapshot and restore. 898 device_tree: Arc<Mutex<DeviceTree>>, 899 900 // Exit event 901 exit_evt: EventFd, 902 reset_evt: EventFd, 903 904 #[cfg(target_arch = "aarch64")] 905 id_to_dev_info: HashMap<(DeviceType, String), MmioDeviceInfo>, 906 907 // seccomp action 908 seccomp_action: SeccompAction, 909 910 // List of guest NUMA nodes. 911 numa_nodes: NumaNodes, 912 913 // Possible handle to the virtio-balloon device 914 balloon: Option<Arc<Mutex<virtio_devices::Balloon>>>, 915 916 // Virtio Device activation EventFd to allow the VMM thread to trigger device 917 // activation and thus start the threads from the VMM thread 918 activate_evt: EventFd, 919 920 acpi_address: GuestAddress, 921 922 selected_segment: usize, 923 924 // Possible handle to the virtio-mem device 925 virtio_mem_devices: Vec<Arc<Mutex<virtio_devices::Mem>>>, 926 927 #[cfg(target_arch = "aarch64")] 928 // GPIO device for AArch64 929 gpio_device: Option<Arc<Mutex<devices::legacy::Gpio>>>, 930 931 // Flag to force setting the iommu on virtio devices 932 force_iommu: bool, 933 934 // Helps identify if the VM is currently being restored 935 restoring: bool, 936 937 // io_uring availability if detected 938 io_uring_supported: Option<bool>, 939 940 // List of unique identifiers provided at boot through the configuration. 941 boot_id_list: BTreeSet<String>, 942 943 // Start time of the VM 944 timestamp: Instant, 945 946 // Pending activations 947 pending_activations: Arc<Mutex<Vec<VirtioPciDeviceActivator>>>, 948 949 // Addresses for ACPI platform devices e.g. ACPI PM timer, sleep/reset registers 950 acpi_platform_addresses: AcpiPlatformAddresses, 951 952 snapshot: Option<Snapshot>, 953 } 954 955 impl DeviceManager { 956 #[allow(clippy::too_many_arguments)] 957 pub fn new( 958 #[cfg(target_arch = "x86_64")] io_bus: Arc<Bus>, 959 mmio_bus: Arc<Bus>, 960 hypervisor_type: HypervisorType, 961 vm: Arc<dyn hypervisor::Vm>, 962 config: Arc<Mutex<VmConfig>>, 963 memory_manager: Arc<Mutex<MemoryManager>>, 964 cpu_manager: Arc<Mutex<CpuManager>>, 965 exit_evt: EventFd, 966 reset_evt: EventFd, 967 seccomp_action: SeccompAction, 968 numa_nodes: NumaNodes, 969 activate_evt: &EventFd, 970 force_iommu: bool, 971 restoring: bool, 972 boot_id_list: BTreeSet<String>, 973 timestamp: Instant, 974 snapshot: Option<Snapshot>, 975 dynamic: bool, 976 ) -> DeviceManagerResult<Arc<Mutex<Self>>> { 977 trace_scoped!("DeviceManager::new"); 978 979 let device_tree = Arc::new(Mutex::new(DeviceTree::new())); 980 981 let num_pci_segments = 982 if let Some(platform_config) = config.lock().unwrap().platform.as_ref() { 983 platform_config.num_pci_segments 984 } else { 985 1 986 }; 987 988 let start_of_device_area = memory_manager.lock().unwrap().start_of_device_area().0; 989 let end_of_device_area = memory_manager.lock().unwrap().end_of_device_area().0; 990 991 // Start each PCI segment range on a 4GiB boundary 992 let pci_segment_size = (end_of_device_area - start_of_device_area + 1) 993 / ((4 << 30) * num_pci_segments as u64) 994 * (4 << 30); 995 996 let mut pci_mmio_allocators = vec![]; 997 for i in 0..num_pci_segments as u64 { 998 let mmio_start = start_of_device_area + i * pci_segment_size; 999 let allocator = Arc::new(Mutex::new( 1000 AddressAllocator::new(GuestAddress(mmio_start), pci_segment_size).unwrap(), 1001 )); 1002 pci_mmio_allocators.push(allocator) 1003 } 1004 1005 let address_manager = Arc::new(AddressManager { 1006 allocator: memory_manager.lock().unwrap().allocator(), 1007 #[cfg(target_arch = "x86_64")] 1008 io_bus, 1009 mmio_bus, 1010 vm: vm.clone(), 1011 device_tree: Arc::clone(&device_tree), 1012 pci_mmio_allocators, 1013 }); 1014 1015 // First we create the MSI interrupt manager, the legacy one is created 1016 // later, after the IOAPIC device creation. 1017 // The reason we create the MSI one first is because the IOAPIC needs it, 1018 // and then the legacy interrupt manager needs an IOAPIC. So we're 1019 // handling a linear dependency chain: 1020 // msi_interrupt_manager <- IOAPIC <- legacy_interrupt_manager. 1021 let msi_interrupt_manager: Arc<dyn InterruptManager<GroupConfig = MsiIrqGroupConfig>> = 1022 Arc::new(MsiInterruptManager::new( 1023 Arc::clone(&address_manager.allocator), 1024 vm, 1025 )); 1026 1027 let acpi_address = address_manager 1028 .allocator 1029 .lock() 1030 .unwrap() 1031 .allocate_platform_mmio_addresses(None, DEVICE_MANAGER_ACPI_SIZE as u64, None) 1032 .ok_or(DeviceManagerError::AllocateIoPort)?; 1033 1034 let mut pci_irq_slots = [0; 32]; 1035 PciSegment::reserve_legacy_interrupts_for_pci_devices( 1036 &address_manager, 1037 &mut pci_irq_slots, 1038 )?; 1039 1040 let mut pci_segments = vec![PciSegment::new_default_segment( 1041 &address_manager, 1042 Arc::clone(&address_manager.pci_mmio_allocators[0]), 1043 &pci_irq_slots, 1044 )?]; 1045 1046 for i in 1..num_pci_segments as usize { 1047 pci_segments.push(PciSegment::new( 1048 i as u16, 1049 &address_manager, 1050 Arc::clone(&address_manager.pci_mmio_allocators[i]), 1051 &pci_irq_slots, 1052 )?); 1053 } 1054 1055 if dynamic { 1056 let acpi_address = address_manager 1057 .allocator 1058 .lock() 1059 .unwrap() 1060 .allocate_platform_mmio_addresses(None, CPU_MANAGER_ACPI_SIZE as u64, None) 1061 .ok_or(DeviceManagerError::AllocateMmioAddress)?; 1062 1063 address_manager 1064 .mmio_bus 1065 .insert( 1066 cpu_manager.clone(), 1067 acpi_address.0, 1068 CPU_MANAGER_ACPI_SIZE as u64, 1069 ) 1070 .map_err(DeviceManagerError::BusError)?; 1071 1072 cpu_manager.lock().unwrap().set_acpi_address(acpi_address); 1073 } 1074 1075 let device_manager = DeviceManager { 1076 hypervisor_type, 1077 address_manager: Arc::clone(&address_manager), 1078 console: Arc::new(Console::default()), 1079 interrupt_controller: None, 1080 #[cfg(target_arch = "aarch64")] 1081 cmdline_additions: Vec::new(), 1082 ged_notification_device: None, 1083 config, 1084 memory_manager, 1085 cpu_manager, 1086 virtio_devices: Vec::new(), 1087 bus_devices: Vec::new(), 1088 device_id_cnt: Wrapping(0), 1089 msi_interrupt_manager, 1090 legacy_interrupt_manager: None, 1091 passthrough_device: None, 1092 vfio_container: None, 1093 iommu_device: None, 1094 iommu_mapping: None, 1095 iommu_attached_devices: None, 1096 pci_segments, 1097 device_tree, 1098 exit_evt, 1099 reset_evt, 1100 #[cfg(target_arch = "aarch64")] 1101 id_to_dev_info: HashMap::new(), 1102 seccomp_action, 1103 numa_nodes, 1104 balloon: None, 1105 activate_evt: activate_evt 1106 .try_clone() 1107 .map_err(DeviceManagerError::EventFd)?, 1108 acpi_address, 1109 selected_segment: 0, 1110 serial_pty: None, 1111 serial_manager: None, 1112 console_pty: None, 1113 console_resize_pipe: None, 1114 virtio_mem_devices: Vec::new(), 1115 #[cfg(target_arch = "aarch64")] 1116 gpio_device: None, 1117 force_iommu, 1118 restoring, 1119 io_uring_supported: None, 1120 boot_id_list, 1121 timestamp, 1122 pending_activations: Arc::new(Mutex::new(Vec::default())), 1123 acpi_platform_addresses: AcpiPlatformAddresses::default(), 1124 snapshot, 1125 }; 1126 1127 let device_manager = Arc::new(Mutex::new(device_manager)); 1128 1129 address_manager 1130 .mmio_bus 1131 .insert( 1132 Arc::clone(&device_manager) as Arc<Mutex<dyn BusDevice>>, 1133 acpi_address.0, 1134 DEVICE_MANAGER_ACPI_SIZE as u64, 1135 ) 1136 .map_err(DeviceManagerError::BusError)?; 1137 1138 Ok(device_manager) 1139 } 1140 1141 pub fn serial_pty(&self) -> Option<PtyPair> { 1142 self.serial_pty 1143 .as_ref() 1144 .map(|pty| pty.lock().unwrap().clone()) 1145 } 1146 1147 pub fn console_pty(&self) -> Option<PtyPair> { 1148 self.console_pty 1149 .as_ref() 1150 .map(|pty| pty.lock().unwrap().clone()) 1151 } 1152 1153 pub fn console_resize_pipe(&self) -> Option<Arc<File>> { 1154 self.console_resize_pipe.as_ref().map(Arc::clone) 1155 } 1156 1157 pub fn create_devices( 1158 &mut self, 1159 serial_pty: Option<PtyPair>, 1160 console_pty: Option<PtyPair>, 1161 console_resize_pipe: Option<File>, 1162 ) -> DeviceManagerResult<()> { 1163 trace_scoped!("create_devices"); 1164 1165 let mut virtio_devices: Vec<MetaVirtioDevice> = Vec::new(); 1166 1167 let interrupt_controller = self.add_interrupt_controller()?; 1168 1169 self.cpu_manager 1170 .lock() 1171 .unwrap() 1172 .set_interrupt_controller(interrupt_controller.clone()); 1173 1174 // Now we can create the legacy interrupt manager, which needs the freshly 1175 // formed IOAPIC device. 1176 let legacy_interrupt_manager: Arc< 1177 dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>, 1178 > = Arc::new(LegacyUserspaceInterruptManager::new(Arc::clone( 1179 &interrupt_controller, 1180 ))); 1181 1182 { 1183 if let Some(acpi_address) = self.memory_manager.lock().unwrap().acpi_address() { 1184 self.address_manager 1185 .mmio_bus 1186 .insert( 1187 Arc::clone(&self.memory_manager) as Arc<Mutex<dyn BusDevice>>, 1188 acpi_address.0, 1189 MEMORY_MANAGER_ACPI_SIZE as u64, 1190 ) 1191 .map_err(DeviceManagerError::BusError)?; 1192 } 1193 } 1194 1195 #[cfg(target_arch = "x86_64")] 1196 self.add_legacy_devices( 1197 self.reset_evt 1198 .try_clone() 1199 .map_err(DeviceManagerError::EventFd)?, 1200 )?; 1201 1202 #[cfg(target_arch = "aarch64")] 1203 self.add_legacy_devices(&legacy_interrupt_manager)?; 1204 1205 { 1206 self.ged_notification_device = self.add_acpi_devices( 1207 &legacy_interrupt_manager, 1208 self.reset_evt 1209 .try_clone() 1210 .map_err(DeviceManagerError::EventFd)?, 1211 self.exit_evt 1212 .try_clone() 1213 .map_err(DeviceManagerError::EventFd)?, 1214 )?; 1215 } 1216 1217 self.console = self.add_console_device( 1218 &legacy_interrupt_manager, 1219 &mut virtio_devices, 1220 serial_pty, 1221 console_pty, 1222 console_resize_pipe, 1223 )?; 1224 1225 if let Some(tpm) = self.config.clone().lock().unwrap().tpm.as_ref() { 1226 let tpm_dev = self.add_tpm_device(tpm.socket.clone())?; 1227 self.bus_devices 1228 .push(Arc::clone(&tpm_dev) as Arc<Mutex<dyn BusDevice>>) 1229 } 1230 self.legacy_interrupt_manager = Some(legacy_interrupt_manager); 1231 1232 virtio_devices.append(&mut self.make_virtio_devices()?); 1233 1234 self.add_pci_devices(virtio_devices.clone())?; 1235 1236 self.virtio_devices = virtio_devices; 1237 1238 Ok(()) 1239 } 1240 1241 fn state(&self) -> DeviceManagerState { 1242 DeviceManagerState { 1243 device_tree: self.device_tree.lock().unwrap().clone(), 1244 device_id_cnt: self.device_id_cnt, 1245 } 1246 } 1247 1248 fn set_state(&mut self, state: &DeviceManagerState) { 1249 *self.device_tree.lock().unwrap() = state.device_tree.clone(); 1250 self.device_id_cnt = state.device_id_cnt; 1251 } 1252 1253 fn get_msi_iova_space(&mut self) -> (u64, u64) { 1254 #[cfg(target_arch = "aarch64")] 1255 { 1256 let vcpus = self.config.lock().unwrap().cpus.boot_vcpus; 1257 let vgic_config = gic::Gic::create_default_config(vcpus.into()); 1258 ( 1259 vgic_config.msi_addr, 1260 vgic_config.msi_addr + vgic_config.msi_size - 1, 1261 ) 1262 } 1263 #[cfg(target_arch = "x86_64")] 1264 (0xfee0_0000, 0xfeef_ffff) 1265 } 1266 1267 #[cfg(target_arch = "aarch64")] 1268 /// Gets the information of the devices registered up to some point in time. 1269 pub fn get_device_info(&self) -> &HashMap<(DeviceType, String), MmioDeviceInfo> { 1270 &self.id_to_dev_info 1271 } 1272 1273 #[allow(unused_variables)] 1274 fn add_pci_devices( 1275 &mut self, 1276 virtio_devices: Vec<MetaVirtioDevice>, 1277 ) -> DeviceManagerResult<()> { 1278 let iommu_id = String::from(IOMMU_DEVICE_NAME); 1279 1280 let iommu_device = if self.config.lock().unwrap().iommu { 1281 let (device, mapping) = virtio_devices::Iommu::new( 1282 iommu_id.clone(), 1283 self.seccomp_action.clone(), 1284 self.exit_evt 1285 .try_clone() 1286 .map_err(DeviceManagerError::EventFd)?, 1287 self.get_msi_iova_space(), 1288 versioned_state_from_id(self.snapshot.as_ref(), iommu_id.as_str()) 1289 .map_err(DeviceManagerError::RestoreGetState)?, 1290 ) 1291 .map_err(DeviceManagerError::CreateVirtioIommu)?; 1292 let device = Arc::new(Mutex::new(device)); 1293 self.iommu_device = Some(Arc::clone(&device)); 1294 self.iommu_mapping = Some(mapping); 1295 1296 // Fill the device tree with a new node. In case of restore, we 1297 // know there is nothing to do, so we can simply override the 1298 // existing entry. 1299 self.device_tree 1300 .lock() 1301 .unwrap() 1302 .insert(iommu_id.clone(), device_node!(iommu_id, device)); 1303 1304 Some(device) 1305 } else { 1306 None 1307 }; 1308 1309 let mut iommu_attached_devices = Vec::new(); 1310 { 1311 for handle in virtio_devices { 1312 let mapping: Option<Arc<IommuMapping>> = if handle.iommu { 1313 self.iommu_mapping.clone() 1314 } else { 1315 None 1316 }; 1317 1318 let dev_id = self.add_virtio_pci_device( 1319 handle.virtio_device, 1320 &mapping, 1321 handle.id, 1322 handle.pci_segment, 1323 handle.dma_handler, 1324 )?; 1325 1326 if handle.iommu { 1327 iommu_attached_devices.push(dev_id); 1328 } 1329 } 1330 1331 let mut vfio_iommu_device_ids = self.add_vfio_devices()?; 1332 iommu_attached_devices.append(&mut vfio_iommu_device_ids); 1333 1334 let mut vfio_user_iommu_device_ids = self.add_user_devices()?; 1335 iommu_attached_devices.append(&mut vfio_user_iommu_device_ids); 1336 1337 // Add all devices from forced iommu segments 1338 if let Some(platform_config) = self.config.lock().unwrap().platform.as_ref() { 1339 if let Some(iommu_segments) = platform_config.iommu_segments.as_ref() { 1340 for segment in iommu_segments { 1341 for device in 0..32 { 1342 let bdf = PciBdf::new(*segment, 0, device, 0); 1343 if !iommu_attached_devices.contains(&bdf) { 1344 iommu_attached_devices.push(bdf); 1345 } 1346 } 1347 } 1348 } 1349 } 1350 1351 if let Some(iommu_device) = iommu_device { 1352 let dev_id = self.add_virtio_pci_device(iommu_device, &None, iommu_id, 0, None)?; 1353 self.iommu_attached_devices = Some((dev_id, iommu_attached_devices)); 1354 } 1355 } 1356 1357 for segment in &self.pci_segments { 1358 #[cfg(target_arch = "x86_64")] 1359 if let Some(pci_config_io) = segment.pci_config_io.as_ref() { 1360 self.bus_devices 1361 .push(Arc::clone(pci_config_io) as Arc<Mutex<dyn BusDevice>>); 1362 } 1363 1364 self.bus_devices 1365 .push(Arc::clone(&segment.pci_config_mmio) as Arc<Mutex<dyn BusDevice>>); 1366 } 1367 1368 Ok(()) 1369 } 1370 1371 #[cfg(target_arch = "aarch64")] 1372 fn add_interrupt_controller( 1373 &mut self, 1374 ) -> DeviceManagerResult<Arc<Mutex<dyn InterruptController>>> { 1375 let interrupt_controller: Arc<Mutex<gic::Gic>> = Arc::new(Mutex::new( 1376 gic::Gic::new( 1377 self.config.lock().unwrap().cpus.boot_vcpus, 1378 Arc::clone(&self.msi_interrupt_manager), 1379 self.address_manager.vm.clone(), 1380 ) 1381 .map_err(DeviceManagerError::CreateInterruptController)?, 1382 )); 1383 1384 self.interrupt_controller = Some(interrupt_controller.clone()); 1385 1386 // Unlike x86_64, the "interrupt_controller" here for AArch64 is only 1387 // a `Gic` object that implements the `InterruptController` to provide 1388 // interrupt delivery service. This is not the real GIC device so that 1389 // we do not need to insert it to the device tree. 1390 1391 Ok(interrupt_controller) 1392 } 1393 1394 #[cfg(target_arch = "aarch64")] 1395 pub fn get_interrupt_controller(&mut self) -> Option<&Arc<Mutex<gic::Gic>>> { 1396 self.interrupt_controller.as_ref() 1397 } 1398 1399 #[cfg(target_arch = "x86_64")] 1400 fn add_interrupt_controller( 1401 &mut self, 1402 ) -> DeviceManagerResult<Arc<Mutex<dyn InterruptController>>> { 1403 let id = String::from(IOAPIC_DEVICE_NAME); 1404 1405 // Create IOAPIC 1406 let interrupt_controller = Arc::new(Mutex::new( 1407 ioapic::Ioapic::new( 1408 id.clone(), 1409 APIC_START, 1410 Arc::clone(&self.msi_interrupt_manager), 1411 versioned_state_from_id(self.snapshot.as_ref(), id.as_str()) 1412 .map_err(DeviceManagerError::RestoreGetState)?, 1413 ) 1414 .map_err(DeviceManagerError::CreateInterruptController)?, 1415 )); 1416 1417 self.interrupt_controller = Some(interrupt_controller.clone()); 1418 1419 self.address_manager 1420 .mmio_bus 1421 .insert(interrupt_controller.clone(), IOAPIC_START.0, IOAPIC_SIZE) 1422 .map_err(DeviceManagerError::BusError)?; 1423 1424 self.bus_devices 1425 .push(Arc::clone(&interrupt_controller) as Arc<Mutex<dyn BusDevice>>); 1426 1427 // Fill the device tree with a new node. In case of restore, we 1428 // know there is nothing to do, so we can simply override the 1429 // existing entry. 1430 self.device_tree 1431 .lock() 1432 .unwrap() 1433 .insert(id.clone(), device_node!(id, interrupt_controller)); 1434 1435 Ok(interrupt_controller) 1436 } 1437 1438 fn add_acpi_devices( 1439 &mut self, 1440 interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>, 1441 reset_evt: EventFd, 1442 exit_evt: EventFd, 1443 ) -> DeviceManagerResult<Option<Arc<Mutex<devices::AcpiGedDevice>>>> { 1444 let shutdown_device = Arc::new(Mutex::new(devices::AcpiShutdownDevice::new( 1445 exit_evt, reset_evt, 1446 ))); 1447 1448 self.bus_devices 1449 .push(Arc::clone(&shutdown_device) as Arc<Mutex<dyn BusDevice>>); 1450 1451 #[cfg(target_arch = "x86_64")] 1452 { 1453 let shutdown_pio_address: u16 = 0x600; 1454 1455 self.address_manager 1456 .allocator 1457 .lock() 1458 .unwrap() 1459 .allocate_io_addresses(Some(GuestAddress(shutdown_pio_address.into())), 0x8, None) 1460 .ok_or(DeviceManagerError::AllocateIoPort)?; 1461 1462 self.address_manager 1463 .io_bus 1464 .insert(shutdown_device, shutdown_pio_address.into(), 0x4) 1465 .map_err(DeviceManagerError::BusError)?; 1466 1467 self.acpi_platform_addresses.sleep_control_reg_address = 1468 Some(GenericAddress::io_port_address::<u8>(shutdown_pio_address)); 1469 self.acpi_platform_addresses.sleep_status_reg_address = 1470 Some(GenericAddress::io_port_address::<u8>(shutdown_pio_address)); 1471 self.acpi_platform_addresses.reset_reg_address = 1472 Some(GenericAddress::io_port_address::<u8>(shutdown_pio_address)); 1473 } 1474 1475 let ged_irq = self 1476 .address_manager 1477 .allocator 1478 .lock() 1479 .unwrap() 1480 .allocate_irq() 1481 .unwrap(); 1482 let interrupt_group = interrupt_manager 1483 .create_group(LegacyIrqGroupConfig { 1484 irq: ged_irq as InterruptIndex, 1485 }) 1486 .map_err(DeviceManagerError::CreateInterruptGroup)?; 1487 let ged_address = self 1488 .address_manager 1489 .allocator 1490 .lock() 1491 .unwrap() 1492 .allocate_platform_mmio_addresses( 1493 None, 1494 devices::acpi::GED_DEVICE_ACPI_SIZE as u64, 1495 None, 1496 ) 1497 .ok_or(DeviceManagerError::AllocateMmioAddress)?; 1498 let ged_device = Arc::new(Mutex::new(devices::AcpiGedDevice::new( 1499 interrupt_group, 1500 ged_irq, 1501 ged_address, 1502 ))); 1503 self.address_manager 1504 .mmio_bus 1505 .insert( 1506 ged_device.clone(), 1507 ged_address.0, 1508 devices::acpi::GED_DEVICE_ACPI_SIZE as u64, 1509 ) 1510 .map_err(DeviceManagerError::BusError)?; 1511 self.bus_devices 1512 .push(Arc::clone(&ged_device) as Arc<Mutex<dyn BusDevice>>); 1513 1514 let pm_timer_device = Arc::new(Mutex::new(devices::AcpiPmTimerDevice::new())); 1515 1516 self.bus_devices 1517 .push(Arc::clone(&pm_timer_device) as Arc<Mutex<dyn BusDevice>>); 1518 1519 #[cfg(target_arch = "x86_64")] 1520 { 1521 let pm_timer_pio_address: u16 = 0x608; 1522 1523 self.address_manager 1524 .allocator 1525 .lock() 1526 .unwrap() 1527 .allocate_io_addresses(Some(GuestAddress(pm_timer_pio_address.into())), 0x4, None) 1528 .ok_or(DeviceManagerError::AllocateIoPort)?; 1529 1530 self.address_manager 1531 .io_bus 1532 .insert(pm_timer_device, pm_timer_pio_address.into(), 0x4) 1533 .map_err(DeviceManagerError::BusError)?; 1534 1535 self.acpi_platform_addresses.pm_timer_address = 1536 Some(GenericAddress::io_port_address::<u32>(pm_timer_pio_address)); 1537 } 1538 1539 Ok(Some(ged_device)) 1540 } 1541 1542 #[cfg(target_arch = "x86_64")] 1543 fn add_legacy_devices(&mut self, reset_evt: EventFd) -> DeviceManagerResult<()> { 1544 // Add a shutdown device (i8042) 1545 let i8042 = Arc::new(Mutex::new(devices::legacy::I8042Device::new( 1546 reset_evt.try_clone().unwrap(), 1547 ))); 1548 1549 self.bus_devices 1550 .push(Arc::clone(&i8042) as Arc<Mutex<dyn BusDevice>>); 1551 1552 self.address_manager 1553 .io_bus 1554 .insert(i8042, 0x61, 0x4) 1555 .map_err(DeviceManagerError::BusError)?; 1556 { 1557 // Add a CMOS emulated device 1558 let mem_size = self 1559 .memory_manager 1560 .lock() 1561 .unwrap() 1562 .guest_memory() 1563 .memory() 1564 .last_addr() 1565 .0 1566 + 1; 1567 let mem_below_4g = std::cmp::min(arch::layout::MEM_32BIT_RESERVED_START.0, mem_size); 1568 let mem_above_4g = mem_size.saturating_sub(arch::layout::RAM_64BIT_START.0); 1569 1570 let cmos = Arc::new(Mutex::new(devices::legacy::Cmos::new( 1571 mem_below_4g, 1572 mem_above_4g, 1573 reset_evt, 1574 ))); 1575 1576 self.bus_devices 1577 .push(Arc::clone(&cmos) as Arc<Mutex<dyn BusDevice>>); 1578 1579 self.address_manager 1580 .io_bus 1581 .insert(cmos, 0x70, 0x2) 1582 .map_err(DeviceManagerError::BusError)?; 1583 1584 let fwdebug = Arc::new(Mutex::new(devices::legacy::FwDebugDevice::new())); 1585 1586 self.bus_devices 1587 .push(Arc::clone(&fwdebug) as Arc<Mutex<dyn BusDevice>>); 1588 1589 self.address_manager 1590 .io_bus 1591 .insert(fwdebug, 0x402, 0x1) 1592 .map_err(DeviceManagerError::BusError)?; 1593 } 1594 1595 // 0x80 debug port 1596 let debug_port = Arc::new(Mutex::new(devices::legacy::DebugPort::new(self.timestamp))); 1597 self.bus_devices 1598 .push(Arc::clone(&debug_port) as Arc<Mutex<dyn BusDevice>>); 1599 self.address_manager 1600 .io_bus 1601 .insert(debug_port, 0x80, 0x1) 1602 .map_err(DeviceManagerError::BusError)?; 1603 1604 Ok(()) 1605 } 1606 1607 #[cfg(target_arch = "aarch64")] 1608 fn add_legacy_devices( 1609 &mut self, 1610 interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>, 1611 ) -> DeviceManagerResult<()> { 1612 // Add a RTC device 1613 let rtc_irq = self 1614 .address_manager 1615 .allocator 1616 .lock() 1617 .unwrap() 1618 .allocate_irq() 1619 .unwrap(); 1620 1621 let interrupt_group = interrupt_manager 1622 .create_group(LegacyIrqGroupConfig { 1623 irq: rtc_irq as InterruptIndex, 1624 }) 1625 .map_err(DeviceManagerError::CreateInterruptGroup)?; 1626 1627 let rtc_device = Arc::new(Mutex::new(devices::legacy::Rtc::new(interrupt_group))); 1628 1629 self.bus_devices 1630 .push(Arc::clone(&rtc_device) as Arc<Mutex<dyn BusDevice>>); 1631 1632 let addr = arch::layout::LEGACY_RTC_MAPPED_IO_START; 1633 1634 self.address_manager 1635 .mmio_bus 1636 .insert(rtc_device, addr.0, MMIO_LEN) 1637 .map_err(DeviceManagerError::BusError)?; 1638 1639 self.id_to_dev_info.insert( 1640 (DeviceType::Rtc, "rtc".to_string()), 1641 MmioDeviceInfo { 1642 addr: addr.0, 1643 len: MMIO_LEN, 1644 irq: rtc_irq, 1645 }, 1646 ); 1647 1648 // Add a GPIO device 1649 let id = String::from(GPIO_DEVICE_NAME); 1650 let gpio_irq = self 1651 .address_manager 1652 .allocator 1653 .lock() 1654 .unwrap() 1655 .allocate_irq() 1656 .unwrap(); 1657 1658 let interrupt_group = interrupt_manager 1659 .create_group(LegacyIrqGroupConfig { 1660 irq: gpio_irq as InterruptIndex, 1661 }) 1662 .map_err(DeviceManagerError::CreateInterruptGroup)?; 1663 1664 let gpio_device = Arc::new(Mutex::new(devices::legacy::Gpio::new( 1665 id.clone(), 1666 interrupt_group, 1667 versioned_state_from_id(self.snapshot.as_ref(), id.as_str()) 1668 .map_err(DeviceManagerError::RestoreGetState)?, 1669 ))); 1670 1671 self.bus_devices 1672 .push(Arc::clone(&gpio_device) as Arc<Mutex<dyn BusDevice>>); 1673 1674 let addr = arch::layout::LEGACY_GPIO_MAPPED_IO_START; 1675 1676 self.address_manager 1677 .mmio_bus 1678 .insert(gpio_device.clone(), addr.0, MMIO_LEN) 1679 .map_err(DeviceManagerError::BusError)?; 1680 1681 self.gpio_device = Some(gpio_device.clone()); 1682 1683 self.id_to_dev_info.insert( 1684 (DeviceType::Gpio, "gpio".to_string()), 1685 MmioDeviceInfo { 1686 addr: addr.0, 1687 len: MMIO_LEN, 1688 irq: gpio_irq, 1689 }, 1690 ); 1691 1692 self.device_tree 1693 .lock() 1694 .unwrap() 1695 .insert(id.clone(), device_node!(id, gpio_device)); 1696 1697 Ok(()) 1698 } 1699 1700 #[cfg(target_arch = "x86_64")] 1701 fn add_serial_device( 1702 &mut self, 1703 interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>, 1704 serial_writer: Option<Box<dyn io::Write + Send>>, 1705 ) -> DeviceManagerResult<Arc<Mutex<Serial>>> { 1706 // Serial is tied to IRQ #4 1707 let serial_irq = 4; 1708 1709 let id = String::from(SERIAL_DEVICE_NAME); 1710 1711 let interrupt_group = interrupt_manager 1712 .create_group(LegacyIrqGroupConfig { 1713 irq: serial_irq as InterruptIndex, 1714 }) 1715 .map_err(DeviceManagerError::CreateInterruptGroup)?; 1716 1717 let serial = Arc::new(Mutex::new(Serial::new( 1718 id.clone(), 1719 interrupt_group, 1720 serial_writer, 1721 versioned_state_from_id(self.snapshot.as_ref(), id.as_str()) 1722 .map_err(DeviceManagerError::RestoreGetState)?, 1723 ))); 1724 1725 self.bus_devices 1726 .push(Arc::clone(&serial) as Arc<Mutex<dyn BusDevice>>); 1727 1728 self.address_manager 1729 .allocator 1730 .lock() 1731 .unwrap() 1732 .allocate_io_addresses(Some(GuestAddress(0x3f8)), 0x8, None) 1733 .ok_or(DeviceManagerError::AllocateIoPort)?; 1734 1735 self.address_manager 1736 .io_bus 1737 .insert(serial.clone(), 0x3f8, 0x8) 1738 .map_err(DeviceManagerError::BusError)?; 1739 1740 // Fill the device tree with a new node. In case of restore, we 1741 // know there is nothing to do, so we can simply override the 1742 // existing entry. 1743 self.device_tree 1744 .lock() 1745 .unwrap() 1746 .insert(id.clone(), device_node!(id, serial)); 1747 1748 Ok(serial) 1749 } 1750 1751 #[cfg(target_arch = "aarch64")] 1752 fn add_serial_device( 1753 &mut self, 1754 interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>, 1755 serial_writer: Option<Box<dyn io::Write + Send>>, 1756 ) -> DeviceManagerResult<Arc<Mutex<Pl011>>> { 1757 let id = String::from(SERIAL_DEVICE_NAME); 1758 1759 let serial_irq = self 1760 .address_manager 1761 .allocator 1762 .lock() 1763 .unwrap() 1764 .allocate_irq() 1765 .unwrap(); 1766 1767 let interrupt_group = interrupt_manager 1768 .create_group(LegacyIrqGroupConfig { 1769 irq: serial_irq as InterruptIndex, 1770 }) 1771 .map_err(DeviceManagerError::CreateInterruptGroup)?; 1772 1773 let serial = Arc::new(Mutex::new(devices::legacy::Pl011::new( 1774 id.clone(), 1775 interrupt_group, 1776 serial_writer, 1777 self.timestamp, 1778 versioned_state_from_id(self.snapshot.as_ref(), id.as_str()) 1779 .map_err(DeviceManagerError::RestoreGetState)?, 1780 ))); 1781 1782 self.bus_devices 1783 .push(Arc::clone(&serial) as Arc<Mutex<dyn BusDevice>>); 1784 1785 let addr = arch::layout::LEGACY_SERIAL_MAPPED_IO_START; 1786 1787 self.address_manager 1788 .mmio_bus 1789 .insert(serial.clone(), addr.0, MMIO_LEN) 1790 .map_err(DeviceManagerError::BusError)?; 1791 1792 self.id_to_dev_info.insert( 1793 (DeviceType::Serial, DeviceType::Serial.to_string()), 1794 MmioDeviceInfo { 1795 addr: addr.0, 1796 len: MMIO_LEN, 1797 irq: serial_irq, 1798 }, 1799 ); 1800 1801 self.cmdline_additions 1802 .push(format!("earlycon=pl011,mmio,0x{:08x}", addr.0)); 1803 1804 // Fill the device tree with a new node. In case of restore, we 1805 // know there is nothing to do, so we can simply override the 1806 // existing entry. 1807 self.device_tree 1808 .lock() 1809 .unwrap() 1810 .insert(id.clone(), device_node!(id, serial)); 1811 1812 Ok(serial) 1813 } 1814 1815 fn modify_mode<F: FnOnce(&mut termios)>( 1816 &self, 1817 fd: RawFd, 1818 f: F, 1819 ) -> vmm_sys_util::errno::Result<()> { 1820 // SAFETY: safe because we check the return value of isatty. 1821 if unsafe { isatty(fd) } != 1 { 1822 return Ok(()); 1823 } 1824 1825 // SAFETY: The following pair are safe because termios gets totally overwritten by tcgetattr 1826 // and we check the return result. 1827 let mut termios: termios = unsafe { zeroed() }; 1828 // SAFETY: see above 1829 let ret = unsafe { tcgetattr(fd, &mut termios as *mut _) }; 1830 if ret < 0 { 1831 return vmm_sys_util::errno::errno_result(); 1832 } 1833 f(&mut termios); 1834 // SAFETY: Safe because the syscall will only read the extent of termios and we check 1835 // the return result. 1836 let ret = unsafe { tcsetattr(fd, TCSANOW, &termios as *const _) }; 1837 if ret < 0 { 1838 return vmm_sys_util::errno::errno_result(); 1839 } 1840 1841 Ok(()) 1842 } 1843 1844 fn set_raw_mode(&self, f: &mut File) -> vmm_sys_util::errno::Result<()> { 1845 // SAFETY: FFI call. Variable t is guaranteed to be a valid termios from modify_mode. 1846 self.modify_mode(f.as_raw_fd(), |t| unsafe { cfmakeraw(t) }) 1847 } 1848 1849 fn listen_for_sigwinch_on_tty(&mut self, pty_main: File, pty_sub: File) -> std::io::Result<()> { 1850 let seccomp_filter = get_seccomp_filter( 1851 &self.seccomp_action, 1852 Thread::PtyForeground, 1853 self.hypervisor_type, 1854 ) 1855 .unwrap(); 1856 1857 match start_sigwinch_listener(seccomp_filter, pty_main, pty_sub) { 1858 Ok(pipe) => { 1859 self.console_resize_pipe = Some(Arc::new(pipe)); 1860 } 1861 Err(e) => { 1862 warn!("Ignoring error from setting up SIGWINCH listener: {}", e) 1863 } 1864 } 1865 1866 Ok(()) 1867 } 1868 1869 fn add_virtio_console_device( 1870 &mut self, 1871 virtio_devices: &mut Vec<MetaVirtioDevice>, 1872 console_pty: Option<PtyPair>, 1873 resize_pipe: Option<File>, 1874 ) -> DeviceManagerResult<Option<Arc<virtio_devices::ConsoleResizer>>> { 1875 let console_config = self.config.lock().unwrap().console.clone(); 1876 let endpoint = match console_config.mode { 1877 ConsoleOutputMode::File => { 1878 let file = File::create(console_config.file.as_ref().unwrap()) 1879 .map_err(DeviceManagerError::ConsoleOutputFileOpen)?; 1880 Endpoint::File(file) 1881 } 1882 ConsoleOutputMode::Pty => { 1883 if let Some(pty) = console_pty { 1884 self.config.lock().unwrap().console.file = Some(pty.path.clone()); 1885 let file = pty.main.try_clone().unwrap(); 1886 self.console_pty = Some(Arc::new(Mutex::new(pty))); 1887 self.console_resize_pipe = resize_pipe.map(Arc::new); 1888 Endpoint::PtyPair(file.try_clone().unwrap(), file) 1889 } else { 1890 let (main, mut sub, path) = 1891 create_pty().map_err(DeviceManagerError::ConsolePtyOpen)?; 1892 self.set_raw_mode(&mut sub) 1893 .map_err(DeviceManagerError::SetPtyRaw)?; 1894 self.config.lock().unwrap().console.file = Some(path.clone()); 1895 let file = main.try_clone().unwrap(); 1896 assert!(resize_pipe.is_none()); 1897 self.listen_for_sigwinch_on_tty(main.try_clone().unwrap(), sub) 1898 .unwrap(); 1899 self.console_pty = Some(Arc::new(Mutex::new(PtyPair { main, path }))); 1900 Endpoint::PtyPair(file.try_clone().unwrap(), file) 1901 } 1902 } 1903 ConsoleOutputMode::Tty => { 1904 // Duplicating the file descriptors like this is needed as otherwise 1905 // they will be closed on a reboot and the numbers reused 1906 1907 // SAFETY: FFI call to dup. Trivially safe. 1908 let stdout = unsafe { libc::dup(libc::STDOUT_FILENO) }; 1909 if stdout == -1 { 1910 return vmm_sys_util::errno::errno_result().map_err(DeviceManagerError::DupFd); 1911 } 1912 // SAFETY: stdout is valid and owned solely by us. 1913 let stdout = unsafe { File::from_raw_fd(stdout) }; 1914 1915 // If an interactive TTY then we can accept input 1916 // SAFETY: FFI call. Trivially safe. 1917 if unsafe { libc::isatty(libc::STDIN_FILENO) == 1 } { 1918 // SAFETY: FFI call to dup. Trivially safe. 1919 let stdin = unsafe { libc::dup(libc::STDIN_FILENO) }; 1920 if stdin == -1 { 1921 return vmm_sys_util::errno::errno_result() 1922 .map_err(DeviceManagerError::DupFd); 1923 } 1924 // SAFETY: stdin is valid and owned solely by us. 1925 let stdin = unsafe { File::from_raw_fd(stdin) }; 1926 1927 Endpoint::FilePair(stdout, stdin) 1928 } else { 1929 Endpoint::File(stdout) 1930 } 1931 } 1932 ConsoleOutputMode::Null => Endpoint::Null, 1933 ConsoleOutputMode::Off => return Ok(None), 1934 }; 1935 let id = String::from(CONSOLE_DEVICE_NAME); 1936 1937 let (virtio_console_device, console_resizer) = virtio_devices::Console::new( 1938 id.clone(), 1939 endpoint, 1940 self.console_resize_pipe 1941 .as_ref() 1942 .map(|p| p.try_clone().unwrap()), 1943 self.force_iommu | console_config.iommu, 1944 self.seccomp_action.clone(), 1945 self.exit_evt 1946 .try_clone() 1947 .map_err(DeviceManagerError::EventFd)?, 1948 versioned_state_from_id(self.snapshot.as_ref(), id.as_str()) 1949 .map_err(DeviceManagerError::RestoreGetState)?, 1950 ) 1951 .map_err(DeviceManagerError::CreateVirtioConsole)?; 1952 let virtio_console_device = Arc::new(Mutex::new(virtio_console_device)); 1953 virtio_devices.push(MetaVirtioDevice { 1954 virtio_device: Arc::clone(&virtio_console_device) 1955 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 1956 iommu: console_config.iommu, 1957 id: id.clone(), 1958 pci_segment: 0, 1959 dma_handler: None, 1960 }); 1961 1962 // Fill the device tree with a new node. In case of restore, we 1963 // know there is nothing to do, so we can simply override the 1964 // existing entry. 1965 self.device_tree 1966 .lock() 1967 .unwrap() 1968 .insert(id.clone(), device_node!(id, virtio_console_device)); 1969 1970 // Only provide a resizer (for SIGWINCH handling) if the console is attached to the TTY 1971 Ok(if matches!(console_config.mode, ConsoleOutputMode::Tty) { 1972 Some(console_resizer) 1973 } else { 1974 None 1975 }) 1976 } 1977 1978 fn add_console_device( 1979 &mut self, 1980 interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>, 1981 virtio_devices: &mut Vec<MetaVirtioDevice>, 1982 serial_pty: Option<PtyPair>, 1983 console_pty: Option<PtyPair>, 1984 console_resize_pipe: Option<File>, 1985 ) -> DeviceManagerResult<Arc<Console>> { 1986 let serial_config = self.config.lock().unwrap().serial.clone(); 1987 let serial_writer: Option<Box<dyn io::Write + Send>> = match serial_config.mode { 1988 ConsoleOutputMode::File => Some(Box::new( 1989 File::create(serial_config.file.as_ref().unwrap()) 1990 .map_err(DeviceManagerError::SerialOutputFileOpen)?, 1991 )), 1992 ConsoleOutputMode::Pty => { 1993 if let Some(pty) = serial_pty { 1994 self.config.lock().unwrap().serial.file = Some(pty.path.clone()); 1995 self.serial_pty = Some(Arc::new(Mutex::new(pty))); 1996 } else { 1997 let (main, mut sub, path) = 1998 create_pty().map_err(DeviceManagerError::SerialPtyOpen)?; 1999 self.set_raw_mode(&mut sub) 2000 .map_err(DeviceManagerError::SetPtyRaw)?; 2001 self.config.lock().unwrap().serial.file = Some(path.clone()); 2002 self.serial_pty = Some(Arc::new(Mutex::new(PtyPair { main, path }))); 2003 } 2004 None 2005 } 2006 ConsoleOutputMode::Tty => Some(Box::new(stdout())), 2007 ConsoleOutputMode::Off | ConsoleOutputMode::Null => None, 2008 }; 2009 if serial_config.mode != ConsoleOutputMode::Off { 2010 let serial = self.add_serial_device(interrupt_manager, serial_writer)?; 2011 self.serial_manager = match serial_config.mode { 2012 ConsoleOutputMode::Pty | ConsoleOutputMode::Tty => { 2013 let serial_manager = 2014 SerialManager::new(serial, self.serial_pty.clone(), serial_config.mode) 2015 .map_err(DeviceManagerError::CreateSerialManager)?; 2016 if let Some(mut serial_manager) = serial_manager { 2017 serial_manager 2018 .start_thread( 2019 self.exit_evt 2020 .try_clone() 2021 .map_err(DeviceManagerError::EventFd)?, 2022 ) 2023 .map_err(DeviceManagerError::SpawnSerialManager)?; 2024 Some(Arc::new(serial_manager)) 2025 } else { 2026 None 2027 } 2028 } 2029 _ => None, 2030 }; 2031 } 2032 2033 let console_resizer = 2034 self.add_virtio_console_device(virtio_devices, console_pty, console_resize_pipe)?; 2035 2036 Ok(Arc::new(Console { console_resizer })) 2037 } 2038 2039 fn add_tpm_device( 2040 &mut self, 2041 tpm_path: PathBuf, 2042 ) -> DeviceManagerResult<Arc<Mutex<devices::tpm::Tpm>>> { 2043 // Create TPM Device 2044 let tpm = devices::tpm::Tpm::new(tpm_path.to_str().unwrap().to_string()).map_err(|e| { 2045 DeviceManagerError::CreateTpmDevice(anyhow!("Failed to create TPM Device : {:?}", e)) 2046 })?; 2047 let tpm = Arc::new(Mutex::new(tpm)); 2048 2049 // Add TPM Device to mmio 2050 self.address_manager 2051 .mmio_bus 2052 .insert( 2053 tpm.clone(), 2054 arch::layout::TPM_START.0, 2055 arch::layout::TPM_SIZE, 2056 ) 2057 .map_err(DeviceManagerError::BusError)?; 2058 2059 Ok(tpm) 2060 } 2061 2062 fn make_virtio_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2063 let mut devices: Vec<MetaVirtioDevice> = Vec::new(); 2064 2065 // Create "standard" virtio devices (net/block/rng) 2066 devices.append(&mut self.make_virtio_block_devices()?); 2067 devices.append(&mut self.make_virtio_net_devices()?); 2068 devices.append(&mut self.make_virtio_rng_devices()?); 2069 2070 // Add virtio-fs if required 2071 devices.append(&mut self.make_virtio_fs_devices()?); 2072 2073 // Add virtio-pmem if required 2074 devices.append(&mut self.make_virtio_pmem_devices()?); 2075 2076 // Add virtio-vsock if required 2077 devices.append(&mut self.make_virtio_vsock_devices()?); 2078 2079 devices.append(&mut self.make_virtio_mem_devices()?); 2080 2081 // Add virtio-balloon if required 2082 devices.append(&mut self.make_virtio_balloon_devices()?); 2083 2084 // Add virtio-watchdog device 2085 devices.append(&mut self.make_virtio_watchdog_devices()?); 2086 2087 // Add vDPA devices if required 2088 devices.append(&mut self.make_vdpa_devices()?); 2089 2090 Ok(devices) 2091 } 2092 2093 // Cache whether io_uring is supported to avoid probing for very block device 2094 fn io_uring_is_supported(&mut self) -> bool { 2095 if let Some(supported) = self.io_uring_supported { 2096 return supported; 2097 } 2098 2099 let supported = block_io_uring_is_supported(); 2100 self.io_uring_supported = Some(supported); 2101 supported 2102 } 2103 2104 fn make_virtio_block_device( 2105 &mut self, 2106 disk_cfg: &mut DiskConfig, 2107 ) -> DeviceManagerResult<MetaVirtioDevice> { 2108 let id = if let Some(id) = &disk_cfg.id { 2109 id.clone() 2110 } else { 2111 let id = self.next_device_name(DISK_DEVICE_NAME_PREFIX)?; 2112 disk_cfg.id = Some(id.clone()); 2113 id 2114 }; 2115 2116 info!("Creating virtio-block device: {:?}", disk_cfg); 2117 2118 let snapshot = snapshot_from_id(self.snapshot.as_ref(), id.as_str()); 2119 2120 let (virtio_device, migratable_device) = if disk_cfg.vhost_user { 2121 let socket = disk_cfg.vhost_socket.as_ref().unwrap().clone(); 2122 let vu_cfg = VhostUserConfig { 2123 socket, 2124 num_queues: disk_cfg.num_queues, 2125 queue_size: disk_cfg.queue_size, 2126 }; 2127 let vhost_user_block = Arc::new(Mutex::new( 2128 match virtio_devices::vhost_user::Blk::new( 2129 id.clone(), 2130 vu_cfg, 2131 self.seccomp_action.clone(), 2132 self.exit_evt 2133 .try_clone() 2134 .map_err(DeviceManagerError::EventFd)?, 2135 self.force_iommu, 2136 snapshot 2137 .map(|s| s.to_versioned_state(&id)) 2138 .transpose() 2139 .map_err(DeviceManagerError::RestoreGetState)?, 2140 ) { 2141 Ok(vub_device) => vub_device, 2142 Err(e) => { 2143 return Err(DeviceManagerError::CreateVhostUserBlk(e)); 2144 } 2145 }, 2146 )); 2147 2148 ( 2149 Arc::clone(&vhost_user_block) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2150 vhost_user_block as Arc<Mutex<dyn Migratable>>, 2151 ) 2152 } else { 2153 let mut options = OpenOptions::new(); 2154 options.read(true); 2155 options.write(!disk_cfg.readonly); 2156 if disk_cfg.direct { 2157 options.custom_flags(libc::O_DIRECT); 2158 } 2159 // Open block device path 2160 let mut file: File = options 2161 .open( 2162 disk_cfg 2163 .path 2164 .as_ref() 2165 .ok_or(DeviceManagerError::NoDiskPath)? 2166 .clone(), 2167 ) 2168 .map_err(DeviceManagerError::Disk)?; 2169 let image_type = 2170 detect_image_type(&mut file).map_err(DeviceManagerError::DetectImageType)?; 2171 2172 let image = match image_type { 2173 ImageType::FixedVhd => { 2174 // Use asynchronous backend relying on io_uring if the 2175 // syscalls are supported. 2176 if !disk_cfg.disable_io_uring && self.io_uring_is_supported() { 2177 info!("Using asynchronous fixed VHD disk file (io_uring)"); 2178 Box::new( 2179 FixedVhdDiskAsync::new(file) 2180 .map_err(DeviceManagerError::CreateFixedVhdDiskAsync)?, 2181 ) as Box<dyn DiskFile> 2182 } else { 2183 info!("Using synchronous fixed VHD disk file"); 2184 Box::new( 2185 FixedVhdDiskSync::new(file) 2186 .map_err(DeviceManagerError::CreateFixedVhdDiskSync)?, 2187 ) as Box<dyn DiskFile> 2188 } 2189 } 2190 ImageType::Raw => { 2191 // Use asynchronous backend relying on io_uring if the 2192 // syscalls are supported. 2193 if !disk_cfg.disable_io_uring && self.io_uring_is_supported() { 2194 info!("Using asynchronous RAW disk file (io_uring)"); 2195 Box::new(RawFileDisk::new(file)) as Box<dyn DiskFile> 2196 } else { 2197 info!("Using synchronous RAW disk file"); 2198 Box::new(RawFileDiskSync::new(file)) as Box<dyn DiskFile> 2199 } 2200 } 2201 ImageType::Qcow2 => { 2202 info!("Using synchronous QCOW disk file"); 2203 Box::new( 2204 QcowDiskSync::new(file, disk_cfg.direct) 2205 .map_err(DeviceManagerError::CreateQcowDiskSync)?, 2206 ) as Box<dyn DiskFile> 2207 } 2208 ImageType::Vhdx => { 2209 info!("Using synchronous VHDX disk file"); 2210 Box::new( 2211 VhdxDiskSync::new(file) 2212 .map_err(DeviceManagerError::CreateFixedVhdxDiskSync)?, 2213 ) as Box<dyn DiskFile> 2214 } 2215 }; 2216 2217 let virtio_block = Arc::new(Mutex::new( 2218 virtio_devices::Block::new( 2219 id.clone(), 2220 image, 2221 disk_cfg 2222 .path 2223 .as_ref() 2224 .ok_or(DeviceManagerError::NoDiskPath)? 2225 .clone(), 2226 disk_cfg.readonly, 2227 self.force_iommu | disk_cfg.iommu, 2228 disk_cfg.num_queues, 2229 disk_cfg.queue_size, 2230 self.seccomp_action.clone(), 2231 disk_cfg.rate_limiter_config, 2232 self.exit_evt 2233 .try_clone() 2234 .map_err(DeviceManagerError::EventFd)?, 2235 snapshot 2236 .map(|s| s.to_versioned_state(&id)) 2237 .transpose() 2238 .map_err(DeviceManagerError::RestoreGetState)?, 2239 ) 2240 .map_err(DeviceManagerError::CreateVirtioBlock)?, 2241 )); 2242 2243 ( 2244 Arc::clone(&virtio_block) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2245 virtio_block as Arc<Mutex<dyn Migratable>>, 2246 ) 2247 }; 2248 2249 // Fill the device tree with a new node. In case of restore, we 2250 // know there is nothing to do, so we can simply override the 2251 // existing entry. 2252 self.device_tree 2253 .lock() 2254 .unwrap() 2255 .insert(id.clone(), device_node!(id, migratable_device)); 2256 2257 Ok(MetaVirtioDevice { 2258 virtio_device, 2259 iommu: disk_cfg.iommu, 2260 id, 2261 pci_segment: disk_cfg.pci_segment, 2262 dma_handler: None, 2263 }) 2264 } 2265 2266 fn make_virtio_block_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2267 let mut devices = Vec::new(); 2268 2269 let mut block_devices = self.config.lock().unwrap().disks.clone(); 2270 if let Some(disk_list_cfg) = &mut block_devices { 2271 for disk_cfg in disk_list_cfg.iter_mut() { 2272 devices.push(self.make_virtio_block_device(disk_cfg)?); 2273 } 2274 } 2275 self.config.lock().unwrap().disks = block_devices; 2276 2277 Ok(devices) 2278 } 2279 2280 fn make_virtio_net_device( 2281 &mut self, 2282 net_cfg: &mut NetConfig, 2283 ) -> DeviceManagerResult<MetaVirtioDevice> { 2284 let id = if let Some(id) = &net_cfg.id { 2285 id.clone() 2286 } else { 2287 let id = self.next_device_name(NET_DEVICE_NAME_PREFIX)?; 2288 net_cfg.id = Some(id.clone()); 2289 id 2290 }; 2291 info!("Creating virtio-net device: {:?}", net_cfg); 2292 2293 let snapshot = snapshot_from_id(self.snapshot.as_ref(), id.as_str()); 2294 2295 let (virtio_device, migratable_device) = if net_cfg.vhost_user { 2296 let socket = net_cfg.vhost_socket.as_ref().unwrap().clone(); 2297 let vu_cfg = VhostUserConfig { 2298 socket, 2299 num_queues: net_cfg.num_queues, 2300 queue_size: net_cfg.queue_size, 2301 }; 2302 let server = match net_cfg.vhost_mode { 2303 VhostMode::Client => false, 2304 VhostMode::Server => true, 2305 }; 2306 let vhost_user_net = Arc::new(Mutex::new( 2307 match virtio_devices::vhost_user::Net::new( 2308 id.clone(), 2309 net_cfg.mac, 2310 net_cfg.mtu, 2311 vu_cfg, 2312 server, 2313 self.seccomp_action.clone(), 2314 self.exit_evt 2315 .try_clone() 2316 .map_err(DeviceManagerError::EventFd)?, 2317 self.force_iommu, 2318 snapshot 2319 .map(|s| s.to_versioned_state(&id)) 2320 .transpose() 2321 .map_err(DeviceManagerError::RestoreGetState)?, 2322 ) { 2323 Ok(vun_device) => vun_device, 2324 Err(e) => { 2325 return Err(DeviceManagerError::CreateVhostUserNet(e)); 2326 } 2327 }, 2328 )); 2329 2330 ( 2331 Arc::clone(&vhost_user_net) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2332 vhost_user_net as Arc<Mutex<dyn Migratable>>, 2333 ) 2334 } else { 2335 let state = snapshot 2336 .map(|s| s.to_versioned_state(&id)) 2337 .transpose() 2338 .map_err(DeviceManagerError::RestoreGetState)?; 2339 2340 let virtio_net = if let Some(ref tap_if_name) = net_cfg.tap { 2341 Arc::new(Mutex::new( 2342 virtio_devices::Net::new( 2343 id.clone(), 2344 Some(tap_if_name), 2345 None, 2346 None, 2347 Some(net_cfg.mac), 2348 &mut net_cfg.host_mac, 2349 net_cfg.mtu, 2350 self.force_iommu | net_cfg.iommu, 2351 net_cfg.num_queues, 2352 net_cfg.queue_size, 2353 self.seccomp_action.clone(), 2354 net_cfg.rate_limiter_config, 2355 self.exit_evt 2356 .try_clone() 2357 .map_err(DeviceManagerError::EventFd)?, 2358 state, 2359 ) 2360 .map_err(DeviceManagerError::CreateVirtioNet)?, 2361 )) 2362 } else if let Some(fds) = &net_cfg.fds { 2363 Arc::new(Mutex::new( 2364 virtio_devices::Net::from_tap_fds( 2365 id.clone(), 2366 fds, 2367 Some(net_cfg.mac), 2368 net_cfg.mtu, 2369 self.force_iommu | net_cfg.iommu, 2370 net_cfg.queue_size, 2371 self.seccomp_action.clone(), 2372 net_cfg.rate_limiter_config, 2373 self.exit_evt 2374 .try_clone() 2375 .map_err(DeviceManagerError::EventFd)?, 2376 state, 2377 ) 2378 .map_err(DeviceManagerError::CreateVirtioNet)?, 2379 )) 2380 } else { 2381 Arc::new(Mutex::new( 2382 virtio_devices::Net::new( 2383 id.clone(), 2384 None, 2385 Some(net_cfg.ip), 2386 Some(net_cfg.mask), 2387 Some(net_cfg.mac), 2388 &mut net_cfg.host_mac, 2389 net_cfg.mtu, 2390 self.force_iommu | net_cfg.iommu, 2391 net_cfg.num_queues, 2392 net_cfg.queue_size, 2393 self.seccomp_action.clone(), 2394 net_cfg.rate_limiter_config, 2395 self.exit_evt 2396 .try_clone() 2397 .map_err(DeviceManagerError::EventFd)?, 2398 state, 2399 ) 2400 .map_err(DeviceManagerError::CreateVirtioNet)?, 2401 )) 2402 }; 2403 2404 ( 2405 Arc::clone(&virtio_net) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2406 virtio_net as Arc<Mutex<dyn Migratable>>, 2407 ) 2408 }; 2409 2410 // Fill the device tree with a new node. In case of restore, we 2411 // know there is nothing to do, so we can simply override the 2412 // existing entry. 2413 self.device_tree 2414 .lock() 2415 .unwrap() 2416 .insert(id.clone(), device_node!(id, migratable_device)); 2417 2418 Ok(MetaVirtioDevice { 2419 virtio_device, 2420 iommu: net_cfg.iommu, 2421 id, 2422 pci_segment: net_cfg.pci_segment, 2423 dma_handler: None, 2424 }) 2425 } 2426 2427 /// Add virto-net and vhost-user-net devices 2428 fn make_virtio_net_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2429 let mut devices = Vec::new(); 2430 let mut net_devices = self.config.lock().unwrap().net.clone(); 2431 if let Some(net_list_cfg) = &mut net_devices { 2432 for net_cfg in net_list_cfg.iter_mut() { 2433 devices.push(self.make_virtio_net_device(net_cfg)?); 2434 } 2435 } 2436 self.config.lock().unwrap().net = net_devices; 2437 2438 Ok(devices) 2439 } 2440 2441 fn make_virtio_rng_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2442 let mut devices = Vec::new(); 2443 2444 // Add virtio-rng if required 2445 let rng_config = self.config.lock().unwrap().rng.clone(); 2446 if let Some(rng_path) = rng_config.src.to_str() { 2447 info!("Creating virtio-rng device: {:?}", rng_config); 2448 let id = String::from(RNG_DEVICE_NAME); 2449 2450 let virtio_rng_device = Arc::new(Mutex::new( 2451 virtio_devices::Rng::new( 2452 id.clone(), 2453 rng_path, 2454 self.force_iommu | rng_config.iommu, 2455 self.seccomp_action.clone(), 2456 self.exit_evt 2457 .try_clone() 2458 .map_err(DeviceManagerError::EventFd)?, 2459 versioned_state_from_id(self.snapshot.as_ref(), id.as_str()) 2460 .map_err(DeviceManagerError::RestoreGetState)?, 2461 ) 2462 .map_err(DeviceManagerError::CreateVirtioRng)?, 2463 )); 2464 devices.push(MetaVirtioDevice { 2465 virtio_device: Arc::clone(&virtio_rng_device) 2466 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2467 iommu: rng_config.iommu, 2468 id: id.clone(), 2469 pci_segment: 0, 2470 dma_handler: None, 2471 }); 2472 2473 // Fill the device tree with a new node. In case of restore, we 2474 // know there is nothing to do, so we can simply override the 2475 // existing entry. 2476 self.device_tree 2477 .lock() 2478 .unwrap() 2479 .insert(id.clone(), device_node!(id, virtio_rng_device)); 2480 } 2481 2482 Ok(devices) 2483 } 2484 2485 fn make_virtio_fs_device( 2486 &mut self, 2487 fs_cfg: &mut FsConfig, 2488 ) -> DeviceManagerResult<MetaVirtioDevice> { 2489 let id = if let Some(id) = &fs_cfg.id { 2490 id.clone() 2491 } else { 2492 let id = self.next_device_name(FS_DEVICE_NAME_PREFIX)?; 2493 fs_cfg.id = Some(id.clone()); 2494 id 2495 }; 2496 2497 info!("Creating virtio-fs device: {:?}", fs_cfg); 2498 2499 let mut node = device_node!(id); 2500 2501 if let Some(fs_socket) = fs_cfg.socket.to_str() { 2502 let virtio_fs_device = Arc::new(Mutex::new( 2503 virtio_devices::vhost_user::Fs::new( 2504 id.clone(), 2505 fs_socket, 2506 &fs_cfg.tag, 2507 fs_cfg.num_queues, 2508 fs_cfg.queue_size, 2509 None, 2510 self.seccomp_action.clone(), 2511 self.exit_evt 2512 .try_clone() 2513 .map_err(DeviceManagerError::EventFd)?, 2514 self.force_iommu, 2515 versioned_state_from_id(self.snapshot.as_ref(), id.as_str()) 2516 .map_err(DeviceManagerError::RestoreGetState)?, 2517 ) 2518 .map_err(DeviceManagerError::CreateVirtioFs)?, 2519 )); 2520 2521 // Update the device tree with the migratable device. 2522 node.migratable = Some(Arc::clone(&virtio_fs_device) as Arc<Mutex<dyn Migratable>>); 2523 self.device_tree.lock().unwrap().insert(id.clone(), node); 2524 2525 Ok(MetaVirtioDevice { 2526 virtio_device: Arc::clone(&virtio_fs_device) 2527 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2528 iommu: false, 2529 id, 2530 pci_segment: fs_cfg.pci_segment, 2531 dma_handler: None, 2532 }) 2533 } else { 2534 Err(DeviceManagerError::NoVirtioFsSock) 2535 } 2536 } 2537 2538 fn make_virtio_fs_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2539 let mut devices = Vec::new(); 2540 2541 let mut fs_devices = self.config.lock().unwrap().fs.clone(); 2542 if let Some(fs_list_cfg) = &mut fs_devices { 2543 for fs_cfg in fs_list_cfg.iter_mut() { 2544 devices.push(self.make_virtio_fs_device(fs_cfg)?); 2545 } 2546 } 2547 self.config.lock().unwrap().fs = fs_devices; 2548 2549 Ok(devices) 2550 } 2551 2552 fn make_virtio_pmem_device( 2553 &mut self, 2554 pmem_cfg: &mut PmemConfig, 2555 ) -> DeviceManagerResult<MetaVirtioDevice> { 2556 let id = if let Some(id) = &pmem_cfg.id { 2557 id.clone() 2558 } else { 2559 let id = self.next_device_name(PMEM_DEVICE_NAME_PREFIX)?; 2560 pmem_cfg.id = Some(id.clone()); 2561 id 2562 }; 2563 2564 info!("Creating virtio-pmem device: {:?}", pmem_cfg); 2565 2566 let mut node = device_node!(id); 2567 2568 // Look for the id in the device tree. If it can be found, that means 2569 // the device is being restored, otherwise it's created from scratch. 2570 let region_range = if let Some(node) = self.device_tree.lock().unwrap().get(&id) { 2571 info!("Restoring virtio-pmem {} resources", id); 2572 2573 let mut region_range: Option<(u64, u64)> = None; 2574 for resource in node.resources.iter() { 2575 match resource { 2576 Resource::MmioAddressRange { base, size } => { 2577 if region_range.is_some() { 2578 return Err(DeviceManagerError::ResourceAlreadyExists); 2579 } 2580 2581 region_range = Some((*base, *size)); 2582 } 2583 _ => { 2584 error!("Unexpected resource {:?} for {}", resource, id); 2585 } 2586 } 2587 } 2588 2589 if region_range.is_none() { 2590 return Err(DeviceManagerError::MissingVirtioPmemResources); 2591 } 2592 2593 region_range 2594 } else { 2595 None 2596 }; 2597 2598 let (custom_flags, set_len) = if pmem_cfg.file.is_dir() { 2599 if pmem_cfg.size.is_none() { 2600 return Err(DeviceManagerError::PmemWithDirectorySizeMissing); 2601 } 2602 (O_TMPFILE, true) 2603 } else { 2604 (0, false) 2605 }; 2606 2607 let mut file = OpenOptions::new() 2608 .read(true) 2609 .write(!pmem_cfg.discard_writes) 2610 .custom_flags(custom_flags) 2611 .open(&pmem_cfg.file) 2612 .map_err(DeviceManagerError::PmemFileOpen)?; 2613 2614 let size = if let Some(size) = pmem_cfg.size { 2615 if set_len { 2616 file.set_len(size) 2617 .map_err(DeviceManagerError::PmemFileSetLen)?; 2618 } 2619 size 2620 } else { 2621 file.seek(SeekFrom::End(0)) 2622 .map_err(DeviceManagerError::PmemFileSetLen)? 2623 }; 2624 2625 if size % 0x20_0000 != 0 { 2626 return Err(DeviceManagerError::PmemSizeNotAligned); 2627 } 2628 2629 let (region_base, region_size) = if let Some((base, size)) = region_range { 2630 // The memory needs to be 2MiB aligned in order to support 2631 // hugepages. 2632 self.pci_segments[pmem_cfg.pci_segment as usize] 2633 .allocator 2634 .lock() 2635 .unwrap() 2636 .allocate( 2637 Some(GuestAddress(base)), 2638 size as GuestUsize, 2639 Some(0x0020_0000), 2640 ) 2641 .ok_or(DeviceManagerError::PmemRangeAllocation)?; 2642 2643 (base, size) 2644 } else { 2645 // The memory needs to be 2MiB aligned in order to support 2646 // hugepages. 2647 let base = self.pci_segments[pmem_cfg.pci_segment as usize] 2648 .allocator 2649 .lock() 2650 .unwrap() 2651 .allocate(None, size as GuestUsize, Some(0x0020_0000)) 2652 .ok_or(DeviceManagerError::PmemRangeAllocation)?; 2653 2654 (base.raw_value(), size) 2655 }; 2656 2657 let cloned_file = file.try_clone().map_err(DeviceManagerError::CloneFile)?; 2658 let mmap_region = MmapRegion::build( 2659 Some(FileOffset::new(cloned_file, 0)), 2660 region_size as usize, 2661 PROT_READ | PROT_WRITE, 2662 MAP_NORESERVE 2663 | if pmem_cfg.discard_writes { 2664 MAP_PRIVATE 2665 } else { 2666 MAP_SHARED 2667 }, 2668 ) 2669 .map_err(DeviceManagerError::NewMmapRegion)?; 2670 let host_addr: u64 = mmap_region.as_ptr() as u64; 2671 2672 let mem_slot = self 2673 .memory_manager 2674 .lock() 2675 .unwrap() 2676 .create_userspace_mapping(region_base, region_size, host_addr, false, false, false) 2677 .map_err(DeviceManagerError::MemoryManager)?; 2678 2679 let mapping = virtio_devices::UserspaceMapping { 2680 host_addr, 2681 mem_slot, 2682 addr: GuestAddress(region_base), 2683 len: region_size, 2684 mergeable: false, 2685 }; 2686 2687 let virtio_pmem_device = Arc::new(Mutex::new( 2688 virtio_devices::Pmem::new( 2689 id.clone(), 2690 file, 2691 GuestAddress(region_base), 2692 mapping, 2693 mmap_region, 2694 self.force_iommu | pmem_cfg.iommu, 2695 self.seccomp_action.clone(), 2696 self.exit_evt 2697 .try_clone() 2698 .map_err(DeviceManagerError::EventFd)?, 2699 versioned_state_from_id(self.snapshot.as_ref(), id.as_str()) 2700 .map_err(DeviceManagerError::RestoreGetState)?, 2701 ) 2702 .map_err(DeviceManagerError::CreateVirtioPmem)?, 2703 )); 2704 2705 // Update the device tree with correct resource information and with 2706 // the migratable device. 2707 node.resources.push(Resource::MmioAddressRange { 2708 base: region_base, 2709 size: region_size, 2710 }); 2711 node.migratable = Some(Arc::clone(&virtio_pmem_device) as Arc<Mutex<dyn Migratable>>); 2712 self.device_tree.lock().unwrap().insert(id.clone(), node); 2713 2714 Ok(MetaVirtioDevice { 2715 virtio_device: Arc::clone(&virtio_pmem_device) 2716 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2717 iommu: pmem_cfg.iommu, 2718 id, 2719 pci_segment: pmem_cfg.pci_segment, 2720 dma_handler: None, 2721 }) 2722 } 2723 2724 fn make_virtio_pmem_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2725 let mut devices = Vec::new(); 2726 // Add virtio-pmem if required 2727 let mut pmem_devices = self.config.lock().unwrap().pmem.clone(); 2728 if let Some(pmem_list_cfg) = &mut pmem_devices { 2729 for pmem_cfg in pmem_list_cfg.iter_mut() { 2730 devices.push(self.make_virtio_pmem_device(pmem_cfg)?); 2731 } 2732 } 2733 self.config.lock().unwrap().pmem = pmem_devices; 2734 2735 Ok(devices) 2736 } 2737 2738 fn make_virtio_vsock_device( 2739 &mut self, 2740 vsock_cfg: &mut VsockConfig, 2741 ) -> DeviceManagerResult<MetaVirtioDevice> { 2742 let id = if let Some(id) = &vsock_cfg.id { 2743 id.clone() 2744 } else { 2745 let id = self.next_device_name(VSOCK_DEVICE_NAME_PREFIX)?; 2746 vsock_cfg.id = Some(id.clone()); 2747 id 2748 }; 2749 2750 info!("Creating virtio-vsock device: {:?}", vsock_cfg); 2751 2752 let socket_path = vsock_cfg 2753 .socket 2754 .to_str() 2755 .ok_or(DeviceManagerError::CreateVsockConvertPath)?; 2756 let backend = 2757 virtio_devices::vsock::VsockUnixBackend::new(vsock_cfg.cid, socket_path.to_string()) 2758 .map_err(DeviceManagerError::CreateVsockBackend)?; 2759 2760 let vsock_device = Arc::new(Mutex::new( 2761 virtio_devices::Vsock::new( 2762 id.clone(), 2763 vsock_cfg.cid, 2764 vsock_cfg.socket.clone(), 2765 backend, 2766 self.force_iommu | vsock_cfg.iommu, 2767 self.seccomp_action.clone(), 2768 self.exit_evt 2769 .try_clone() 2770 .map_err(DeviceManagerError::EventFd)?, 2771 versioned_state_from_id(self.snapshot.as_ref(), id.as_str()) 2772 .map_err(DeviceManagerError::RestoreGetState)?, 2773 ) 2774 .map_err(DeviceManagerError::CreateVirtioVsock)?, 2775 )); 2776 2777 // Fill the device tree with a new node. In case of restore, we 2778 // know there is nothing to do, so we can simply override the 2779 // existing entry. 2780 self.device_tree 2781 .lock() 2782 .unwrap() 2783 .insert(id.clone(), device_node!(id, vsock_device)); 2784 2785 Ok(MetaVirtioDevice { 2786 virtio_device: Arc::clone(&vsock_device) 2787 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2788 iommu: vsock_cfg.iommu, 2789 id, 2790 pci_segment: vsock_cfg.pci_segment, 2791 dma_handler: None, 2792 }) 2793 } 2794 2795 fn make_virtio_vsock_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2796 let mut devices = Vec::new(); 2797 2798 let mut vsock = self.config.lock().unwrap().vsock.clone(); 2799 if let Some(ref mut vsock_cfg) = &mut vsock { 2800 devices.push(self.make_virtio_vsock_device(vsock_cfg)?); 2801 } 2802 self.config.lock().unwrap().vsock = vsock; 2803 2804 Ok(devices) 2805 } 2806 2807 fn make_virtio_mem_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2808 let mut devices = Vec::new(); 2809 2810 let mm = self.memory_manager.clone(); 2811 let mut mm = mm.lock().unwrap(); 2812 for (memory_zone_id, memory_zone) in mm.memory_zones_mut().iter_mut() { 2813 if let Some(virtio_mem_zone) = memory_zone.virtio_mem_zone_mut() { 2814 info!("Creating virtio-mem device: id = {}", memory_zone_id); 2815 2816 let node_id = numa_node_id_from_memory_zone_id(&self.numa_nodes, memory_zone_id) 2817 .map(|i| i as u16); 2818 2819 let virtio_mem_device = Arc::new(Mutex::new( 2820 virtio_devices::Mem::new( 2821 memory_zone_id.clone(), 2822 virtio_mem_zone.region(), 2823 self.seccomp_action.clone(), 2824 node_id, 2825 virtio_mem_zone.hotplugged_size(), 2826 virtio_mem_zone.hugepages(), 2827 self.exit_evt 2828 .try_clone() 2829 .map_err(DeviceManagerError::EventFd)?, 2830 virtio_mem_zone.blocks_state().clone(), 2831 versioned_state_from_id(self.snapshot.as_ref(), memory_zone_id.as_str()) 2832 .map_err(DeviceManagerError::RestoreGetState)?, 2833 ) 2834 .map_err(DeviceManagerError::CreateVirtioMem)?, 2835 )); 2836 2837 // Update the virtio-mem zone so that it has a handle onto the 2838 // virtio-mem device, which will be used for triggering a resize 2839 // if needed. 2840 virtio_mem_zone.set_virtio_device(Arc::clone(&virtio_mem_device)); 2841 2842 self.virtio_mem_devices.push(Arc::clone(&virtio_mem_device)); 2843 2844 devices.push(MetaVirtioDevice { 2845 virtio_device: Arc::clone(&virtio_mem_device) 2846 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2847 iommu: false, 2848 id: memory_zone_id.clone(), 2849 pci_segment: 0, 2850 dma_handler: None, 2851 }); 2852 2853 // Fill the device tree with a new node. In case of restore, we 2854 // know there is nothing to do, so we can simply override the 2855 // existing entry. 2856 self.device_tree.lock().unwrap().insert( 2857 memory_zone_id.clone(), 2858 device_node!(memory_zone_id, virtio_mem_device), 2859 ); 2860 } 2861 } 2862 2863 Ok(devices) 2864 } 2865 2866 fn make_virtio_balloon_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2867 let mut devices = Vec::new(); 2868 2869 if let Some(balloon_config) = &self.config.lock().unwrap().balloon { 2870 let id = String::from(BALLOON_DEVICE_NAME); 2871 info!("Creating virtio-balloon device: id = {}", id); 2872 2873 let virtio_balloon_device = Arc::new(Mutex::new( 2874 virtio_devices::Balloon::new( 2875 id.clone(), 2876 balloon_config.size, 2877 balloon_config.deflate_on_oom, 2878 balloon_config.free_page_reporting, 2879 self.seccomp_action.clone(), 2880 self.exit_evt 2881 .try_clone() 2882 .map_err(DeviceManagerError::EventFd)?, 2883 versioned_state_from_id(self.snapshot.as_ref(), id.as_str()) 2884 .map_err(DeviceManagerError::RestoreGetState)?, 2885 ) 2886 .map_err(DeviceManagerError::CreateVirtioBalloon)?, 2887 )); 2888 2889 self.balloon = Some(virtio_balloon_device.clone()); 2890 2891 devices.push(MetaVirtioDevice { 2892 virtio_device: Arc::clone(&virtio_balloon_device) 2893 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2894 iommu: false, 2895 id: id.clone(), 2896 pci_segment: 0, 2897 dma_handler: None, 2898 }); 2899 2900 self.device_tree 2901 .lock() 2902 .unwrap() 2903 .insert(id.clone(), device_node!(id, virtio_balloon_device)); 2904 } 2905 2906 Ok(devices) 2907 } 2908 2909 fn make_virtio_watchdog_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 2910 let mut devices = Vec::new(); 2911 2912 if !self.config.lock().unwrap().watchdog { 2913 return Ok(devices); 2914 } 2915 2916 let id = String::from(WATCHDOG_DEVICE_NAME); 2917 info!("Creating virtio-watchdog device: id = {}", id); 2918 2919 let virtio_watchdog_device = Arc::new(Mutex::new( 2920 virtio_devices::Watchdog::new( 2921 id.clone(), 2922 self.reset_evt.try_clone().unwrap(), 2923 self.seccomp_action.clone(), 2924 self.exit_evt 2925 .try_clone() 2926 .map_err(DeviceManagerError::EventFd)?, 2927 versioned_state_from_id(self.snapshot.as_ref(), id.as_str()) 2928 .map_err(DeviceManagerError::RestoreGetState)?, 2929 ) 2930 .map_err(DeviceManagerError::CreateVirtioWatchdog)?, 2931 )); 2932 devices.push(MetaVirtioDevice { 2933 virtio_device: Arc::clone(&virtio_watchdog_device) 2934 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2935 iommu: false, 2936 id: id.clone(), 2937 pci_segment: 0, 2938 dma_handler: None, 2939 }); 2940 2941 self.device_tree 2942 .lock() 2943 .unwrap() 2944 .insert(id.clone(), device_node!(id, virtio_watchdog_device)); 2945 2946 Ok(devices) 2947 } 2948 2949 fn make_vdpa_device( 2950 &mut self, 2951 vdpa_cfg: &mut VdpaConfig, 2952 ) -> DeviceManagerResult<MetaVirtioDevice> { 2953 let id = if let Some(id) = &vdpa_cfg.id { 2954 id.clone() 2955 } else { 2956 let id = self.next_device_name(VDPA_DEVICE_NAME_PREFIX)?; 2957 vdpa_cfg.id = Some(id.clone()); 2958 id 2959 }; 2960 2961 info!("Creating vDPA device: {:?}", vdpa_cfg); 2962 2963 let device_path = vdpa_cfg 2964 .path 2965 .to_str() 2966 .ok_or(DeviceManagerError::CreateVdpaConvertPath)?; 2967 2968 let vdpa_device = Arc::new(Mutex::new( 2969 virtio_devices::Vdpa::new( 2970 id.clone(), 2971 device_path, 2972 self.memory_manager.lock().unwrap().guest_memory(), 2973 vdpa_cfg.num_queues as u16, 2974 versioned_state_from_id(self.snapshot.as_ref(), id.as_str()) 2975 .map_err(DeviceManagerError::RestoreGetState)?, 2976 ) 2977 .map_err(DeviceManagerError::CreateVdpa)?, 2978 )); 2979 2980 // Create the DMA handler that is required by the vDPA device 2981 let vdpa_mapping = Arc::new(VdpaDmaMapping::new( 2982 Arc::clone(&vdpa_device), 2983 Arc::new(self.memory_manager.lock().unwrap().guest_memory()), 2984 )); 2985 2986 self.device_tree 2987 .lock() 2988 .unwrap() 2989 .insert(id.clone(), device_node!(id, vdpa_device)); 2990 2991 Ok(MetaVirtioDevice { 2992 virtio_device: vdpa_device as Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 2993 iommu: vdpa_cfg.iommu, 2994 id, 2995 pci_segment: vdpa_cfg.pci_segment, 2996 dma_handler: Some(vdpa_mapping), 2997 }) 2998 } 2999 3000 fn make_vdpa_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> { 3001 let mut devices = Vec::new(); 3002 // Add vdpa if required 3003 let mut vdpa_devices = self.config.lock().unwrap().vdpa.clone(); 3004 if let Some(vdpa_list_cfg) = &mut vdpa_devices { 3005 for vdpa_cfg in vdpa_list_cfg.iter_mut() { 3006 devices.push(self.make_vdpa_device(vdpa_cfg)?); 3007 } 3008 } 3009 self.config.lock().unwrap().vdpa = vdpa_devices; 3010 3011 Ok(devices) 3012 } 3013 3014 fn next_device_name(&mut self, prefix: &str) -> DeviceManagerResult<String> { 3015 let start_id = self.device_id_cnt; 3016 loop { 3017 // Generate the temporary name. 3018 let name = format!("{}{}", prefix, self.device_id_cnt); 3019 // Increment the counter. 3020 self.device_id_cnt += Wrapping(1); 3021 // Check if the name is already in use. 3022 if !self.boot_id_list.contains(&name) 3023 && !self.device_tree.lock().unwrap().contains_key(&name) 3024 { 3025 return Ok(name); 3026 } 3027 3028 if self.device_id_cnt == start_id { 3029 // We went through a full loop and there's nothing else we can 3030 // do. 3031 break; 3032 } 3033 } 3034 Err(DeviceManagerError::NoAvailableDeviceName) 3035 } 3036 3037 fn add_passthrough_device( 3038 &mut self, 3039 device_cfg: &mut DeviceConfig, 3040 ) -> DeviceManagerResult<(PciBdf, String)> { 3041 // If the passthrough device has not been created yet, it is created 3042 // here and stored in the DeviceManager structure for future needs. 3043 if self.passthrough_device.is_none() { 3044 self.passthrough_device = Some( 3045 self.address_manager 3046 .vm 3047 .create_passthrough_device() 3048 .map_err(|e| DeviceManagerError::CreatePassthroughDevice(e.into()))?, 3049 ); 3050 } 3051 3052 self.add_vfio_device(device_cfg) 3053 } 3054 3055 fn create_vfio_container(&self) -> DeviceManagerResult<Arc<VfioContainer>> { 3056 let passthrough_device = self 3057 .passthrough_device 3058 .as_ref() 3059 .ok_or(DeviceManagerError::NoDevicePassthroughSupport)?; 3060 3061 let dup = passthrough_device 3062 .try_clone() 3063 .map_err(DeviceManagerError::VfioCreate)?; 3064 3065 Ok(Arc::new( 3066 VfioContainer::new(Some(Arc::new(dup))).map_err(DeviceManagerError::VfioCreate)?, 3067 )) 3068 } 3069 3070 fn add_vfio_device( 3071 &mut self, 3072 device_cfg: &mut DeviceConfig, 3073 ) -> DeviceManagerResult<(PciBdf, String)> { 3074 let vfio_name = if let Some(id) = &device_cfg.id { 3075 id.clone() 3076 } else { 3077 let id = self.next_device_name(VFIO_DEVICE_NAME_PREFIX)?; 3078 device_cfg.id = Some(id.clone()); 3079 id 3080 }; 3081 3082 let (pci_segment_id, pci_device_bdf, resources) = 3083 self.pci_resources(&vfio_name, device_cfg.pci_segment)?; 3084 3085 let mut needs_dma_mapping = false; 3086 3087 // Here we create a new VFIO container for two reasons. Either this is 3088 // the first VFIO device, meaning we need a new VFIO container, which 3089 // will be shared with other VFIO devices. Or the new VFIO device is 3090 // attached to a vIOMMU, meaning we must create a dedicated VFIO 3091 // container. In the vIOMMU use case, we can't let all devices under 3092 // the same VFIO container since we couldn't map/unmap memory for each 3093 // device. That's simply because the map/unmap operations happen at the 3094 // VFIO container level. 3095 let vfio_container = if device_cfg.iommu { 3096 let vfio_container = self.create_vfio_container()?; 3097 3098 let vfio_mapping = Arc::new(VfioDmaMapping::new( 3099 Arc::clone(&vfio_container), 3100 Arc::new(self.memory_manager.lock().unwrap().guest_memory()), 3101 )); 3102 3103 if let Some(iommu) = &self.iommu_device { 3104 iommu 3105 .lock() 3106 .unwrap() 3107 .add_external_mapping(pci_device_bdf.into(), vfio_mapping); 3108 } else { 3109 return Err(DeviceManagerError::MissingVirtualIommu); 3110 } 3111 3112 vfio_container 3113 } else if let Some(vfio_container) = &self.vfio_container { 3114 Arc::clone(vfio_container) 3115 } else { 3116 let vfio_container = self.create_vfio_container()?; 3117 needs_dma_mapping = true; 3118 self.vfio_container = Some(Arc::clone(&vfio_container)); 3119 3120 vfio_container 3121 }; 3122 3123 let vfio_device = VfioDevice::new(&device_cfg.path, Arc::clone(&vfio_container)) 3124 .map_err(DeviceManagerError::VfioCreate)?; 3125 3126 if needs_dma_mapping { 3127 // Register DMA mapping in IOMMU. 3128 // Do not register virtio-mem regions, as they are handled directly by 3129 // virtio-mem device itself. 3130 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() { 3131 for region in zone.regions() { 3132 vfio_container 3133 .vfio_dma_map( 3134 region.start_addr().raw_value(), 3135 region.len(), 3136 region.as_ptr() as u64, 3137 ) 3138 .map_err(DeviceManagerError::VfioDmaMap)?; 3139 } 3140 } 3141 3142 let vfio_mapping = Arc::new(VfioDmaMapping::new( 3143 Arc::clone(&vfio_container), 3144 Arc::new(self.memory_manager.lock().unwrap().guest_memory()), 3145 )); 3146 3147 for virtio_mem_device in self.virtio_mem_devices.iter() { 3148 virtio_mem_device 3149 .lock() 3150 .unwrap() 3151 .add_dma_mapping_handler( 3152 VirtioMemMappingSource::Container, 3153 vfio_mapping.clone(), 3154 ) 3155 .map_err(DeviceManagerError::AddDmaMappingHandlerVirtioMem)?; 3156 } 3157 } 3158 3159 let legacy_interrupt_group = 3160 if let Some(legacy_interrupt_manager) = &self.legacy_interrupt_manager { 3161 Some( 3162 legacy_interrupt_manager 3163 .create_group(LegacyIrqGroupConfig { 3164 irq: self.pci_segments[pci_segment_id as usize].pci_irq_slots 3165 [pci_device_bdf.device() as usize] 3166 as InterruptIndex, 3167 }) 3168 .map_err(DeviceManagerError::CreateInterruptGroup)?, 3169 ) 3170 } else { 3171 None 3172 }; 3173 3174 let memory_manager = self.memory_manager.clone(); 3175 3176 let vfio_pci_device = VfioPciDevice::new( 3177 vfio_name.clone(), 3178 &self.address_manager.vm, 3179 vfio_device, 3180 vfio_container, 3181 self.msi_interrupt_manager.clone(), 3182 legacy_interrupt_group, 3183 device_cfg.iommu, 3184 pci_device_bdf, 3185 self.restoring, 3186 Arc::new(move || memory_manager.lock().unwrap().allocate_memory_slot()), 3187 ) 3188 .map_err(DeviceManagerError::VfioPciCreate)?; 3189 3190 let vfio_pci_device = Arc::new(Mutex::new(vfio_pci_device)); 3191 3192 let new_resources = self.add_pci_device( 3193 vfio_pci_device.clone(), 3194 vfio_pci_device.clone(), 3195 pci_segment_id, 3196 pci_device_bdf, 3197 resources, 3198 )?; 3199 3200 // When restoring a VM, the restore codepath will take care of mapping 3201 // the MMIO regions based on the information from the snapshot. 3202 if !self.restoring { 3203 vfio_pci_device 3204 .lock() 3205 .unwrap() 3206 .map_mmio_regions() 3207 .map_err(DeviceManagerError::VfioMapRegion)?; 3208 } 3209 3210 let mut node = device_node!(vfio_name, vfio_pci_device); 3211 3212 // Update the device tree with correct resource information. 3213 node.resources = new_resources; 3214 node.pci_bdf = Some(pci_device_bdf); 3215 node.pci_device_handle = Some(PciDeviceHandle::Vfio(vfio_pci_device)); 3216 3217 self.device_tree 3218 .lock() 3219 .unwrap() 3220 .insert(vfio_name.clone(), node); 3221 3222 Ok((pci_device_bdf, vfio_name)) 3223 } 3224 3225 fn add_pci_device( 3226 &mut self, 3227 bus_device: Arc<Mutex<dyn BusDevice>>, 3228 pci_device: Arc<Mutex<dyn PciDevice>>, 3229 segment_id: u16, 3230 bdf: PciBdf, 3231 resources: Option<Vec<Resource>>, 3232 ) -> DeviceManagerResult<Vec<Resource>> { 3233 let bars = pci_device 3234 .lock() 3235 .unwrap() 3236 .allocate_bars( 3237 &self.address_manager.allocator, 3238 &mut self.pci_segments[segment_id as usize] 3239 .allocator 3240 .lock() 3241 .unwrap(), 3242 resources, 3243 ) 3244 .map_err(DeviceManagerError::AllocateBars)?; 3245 3246 let mut pci_bus = self.pci_segments[segment_id as usize] 3247 .pci_bus 3248 .lock() 3249 .unwrap(); 3250 3251 pci_bus 3252 .add_device(bdf.device() as u32, pci_device) 3253 .map_err(DeviceManagerError::AddPciDevice)?; 3254 3255 self.bus_devices.push(Arc::clone(&bus_device)); 3256 3257 pci_bus 3258 .register_mapping( 3259 bus_device, 3260 #[cfg(target_arch = "x86_64")] 3261 self.address_manager.io_bus.as_ref(), 3262 self.address_manager.mmio_bus.as_ref(), 3263 bars.clone(), 3264 ) 3265 .map_err(DeviceManagerError::AddPciDevice)?; 3266 3267 let mut new_resources = Vec::new(); 3268 for bar in bars { 3269 new_resources.push(Resource::PciBar { 3270 index: bar.idx(), 3271 base: bar.addr(), 3272 size: bar.size(), 3273 type_: bar.region_type().into(), 3274 prefetchable: bar.prefetchable().into(), 3275 }); 3276 } 3277 3278 Ok(new_resources) 3279 } 3280 3281 fn add_vfio_devices(&mut self) -> DeviceManagerResult<Vec<PciBdf>> { 3282 let mut iommu_attached_device_ids = Vec::new(); 3283 let mut devices = self.config.lock().unwrap().devices.clone(); 3284 3285 if let Some(device_list_cfg) = &mut devices { 3286 for device_cfg in device_list_cfg.iter_mut() { 3287 let (device_id, _) = self.add_passthrough_device(device_cfg)?; 3288 if device_cfg.iommu && self.iommu_device.is_some() { 3289 iommu_attached_device_ids.push(device_id); 3290 } 3291 } 3292 } 3293 3294 // Update the list of devices 3295 self.config.lock().unwrap().devices = devices; 3296 3297 Ok(iommu_attached_device_ids) 3298 } 3299 3300 fn add_vfio_user_device( 3301 &mut self, 3302 device_cfg: &mut UserDeviceConfig, 3303 ) -> DeviceManagerResult<(PciBdf, String)> { 3304 let vfio_user_name = if let Some(id) = &device_cfg.id { 3305 id.clone() 3306 } else { 3307 let id = self.next_device_name(VFIO_USER_DEVICE_NAME_PREFIX)?; 3308 device_cfg.id = Some(id.clone()); 3309 id 3310 }; 3311 3312 let (pci_segment_id, pci_device_bdf, resources) = 3313 self.pci_resources(&vfio_user_name, device_cfg.pci_segment)?; 3314 3315 let legacy_interrupt_group = 3316 if let Some(legacy_interrupt_manager) = &self.legacy_interrupt_manager { 3317 Some( 3318 legacy_interrupt_manager 3319 .create_group(LegacyIrqGroupConfig { 3320 irq: self.pci_segments[pci_segment_id as usize].pci_irq_slots 3321 [pci_device_bdf.device() as usize] 3322 as InterruptIndex, 3323 }) 3324 .map_err(DeviceManagerError::CreateInterruptGroup)?, 3325 ) 3326 } else { 3327 None 3328 }; 3329 3330 let client = Arc::new(Mutex::new( 3331 vfio_user::Client::new(&device_cfg.socket) 3332 .map_err(DeviceManagerError::VfioUserCreateClient)?, 3333 )); 3334 3335 let memory_manager = self.memory_manager.clone(); 3336 3337 let mut vfio_user_pci_device = VfioUserPciDevice::new( 3338 vfio_user_name.clone(), 3339 &self.address_manager.vm, 3340 client.clone(), 3341 self.msi_interrupt_manager.clone(), 3342 legacy_interrupt_group, 3343 pci_device_bdf, 3344 self.restoring, 3345 Arc::new(move || memory_manager.lock().unwrap().allocate_memory_slot()), 3346 ) 3347 .map_err(DeviceManagerError::VfioUserCreate)?; 3348 3349 let memory = self.memory_manager.lock().unwrap().guest_memory(); 3350 let vfio_user_mapping = Arc::new(VfioUserDmaMapping::new(client, Arc::new(memory))); 3351 for virtio_mem_device in self.virtio_mem_devices.iter() { 3352 virtio_mem_device 3353 .lock() 3354 .unwrap() 3355 .add_dma_mapping_handler( 3356 VirtioMemMappingSource::Device(pci_device_bdf.into()), 3357 vfio_user_mapping.clone(), 3358 ) 3359 .map_err(DeviceManagerError::AddDmaMappingHandlerVirtioMem)?; 3360 } 3361 3362 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() { 3363 for region in zone.regions() { 3364 vfio_user_pci_device 3365 .dma_map(region) 3366 .map_err(DeviceManagerError::VfioUserDmaMap)?; 3367 } 3368 } 3369 3370 let vfio_user_pci_device = Arc::new(Mutex::new(vfio_user_pci_device)); 3371 3372 let new_resources = self.add_pci_device( 3373 vfio_user_pci_device.clone(), 3374 vfio_user_pci_device.clone(), 3375 pci_segment_id, 3376 pci_device_bdf, 3377 resources, 3378 )?; 3379 3380 // When restoring a VM, the restore codepath will take care of mapping 3381 // the MMIO regions based on the information from the snapshot. 3382 if !self.restoring { 3383 // Note it is required to call 'add_pci_device()' in advance to have the list of 3384 // mmio regions provisioned correctly 3385 vfio_user_pci_device 3386 .lock() 3387 .unwrap() 3388 .map_mmio_regions() 3389 .map_err(DeviceManagerError::VfioUserMapRegion)?; 3390 } 3391 3392 let mut node = device_node!(vfio_user_name, vfio_user_pci_device); 3393 3394 // Update the device tree with correct resource information. 3395 node.resources = new_resources; 3396 node.pci_bdf = Some(pci_device_bdf); 3397 node.pci_device_handle = Some(PciDeviceHandle::VfioUser(vfio_user_pci_device)); 3398 3399 self.device_tree 3400 .lock() 3401 .unwrap() 3402 .insert(vfio_user_name.clone(), node); 3403 3404 Ok((pci_device_bdf, vfio_user_name)) 3405 } 3406 3407 fn add_user_devices(&mut self) -> DeviceManagerResult<Vec<PciBdf>> { 3408 let mut user_devices = self.config.lock().unwrap().user_devices.clone(); 3409 3410 if let Some(device_list_cfg) = &mut user_devices { 3411 for device_cfg in device_list_cfg.iter_mut() { 3412 let (_device_id, _id) = self.add_vfio_user_device(device_cfg)?; 3413 } 3414 } 3415 3416 // Update the list of devices 3417 self.config.lock().unwrap().user_devices = user_devices; 3418 3419 Ok(vec![]) 3420 } 3421 3422 fn add_virtio_pci_device( 3423 &mut self, 3424 virtio_device: Arc<Mutex<dyn virtio_devices::VirtioDevice>>, 3425 iommu_mapping: &Option<Arc<IommuMapping>>, 3426 virtio_device_id: String, 3427 pci_segment_id: u16, 3428 dma_handler: Option<Arc<dyn ExternalDmaMapping>>, 3429 ) -> DeviceManagerResult<PciBdf> { 3430 let id = format!("{}-{}", VIRTIO_PCI_DEVICE_NAME_PREFIX, virtio_device_id); 3431 3432 // Add the new virtio-pci node to the device tree. 3433 let mut node = device_node!(id); 3434 node.children = vec![virtio_device_id.clone()]; 3435 3436 let (pci_segment_id, pci_device_bdf, resources) = 3437 self.pci_resources(&id, pci_segment_id)?; 3438 3439 // Update the existing virtio node by setting the parent. 3440 if let Some(node) = self.device_tree.lock().unwrap().get_mut(&virtio_device_id) { 3441 node.parent = Some(id.clone()); 3442 } else { 3443 return Err(DeviceManagerError::MissingNode); 3444 } 3445 3446 // Allows support for one MSI-X vector per queue. It also adds 1 3447 // as we need to take into account the dedicated vector to notify 3448 // about a virtio config change. 3449 let msix_num = (virtio_device.lock().unwrap().queue_max_sizes().len() + 1) as u16; 3450 3451 // Create the AccessPlatform trait from the implementation IommuMapping. 3452 // This will provide address translation for any virtio device sitting 3453 // behind a vIOMMU. 3454 let access_platform: Option<Arc<dyn AccessPlatform>> = if let Some(mapping) = iommu_mapping 3455 { 3456 Some(Arc::new(AccessPlatformMapping::new( 3457 pci_device_bdf.into(), 3458 mapping.clone(), 3459 ))) 3460 } else { 3461 None 3462 }; 3463 3464 let memory = self.memory_manager.lock().unwrap().guest_memory(); 3465 3466 // Map DMA ranges if a DMA handler is available and if the device is 3467 // not attached to a virtual IOMMU. 3468 if let Some(dma_handler) = &dma_handler { 3469 if iommu_mapping.is_some() { 3470 if let Some(iommu) = &self.iommu_device { 3471 iommu 3472 .lock() 3473 .unwrap() 3474 .add_external_mapping(pci_device_bdf.into(), dma_handler.clone()); 3475 } else { 3476 return Err(DeviceManagerError::MissingVirtualIommu); 3477 } 3478 } else { 3479 // Let every virtio-mem device handle the DMA map/unmap through the 3480 // DMA handler provided. 3481 for virtio_mem_device in self.virtio_mem_devices.iter() { 3482 virtio_mem_device 3483 .lock() 3484 .unwrap() 3485 .add_dma_mapping_handler( 3486 VirtioMemMappingSource::Device(pci_device_bdf.into()), 3487 dma_handler.clone(), 3488 ) 3489 .map_err(DeviceManagerError::AddDmaMappingHandlerVirtioMem)?; 3490 } 3491 3492 // Do not register virtio-mem regions, as they are handled directly by 3493 // virtio-mem devices. 3494 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() { 3495 for region in zone.regions() { 3496 let gpa = region.start_addr().0; 3497 let size = region.len(); 3498 dma_handler 3499 .map(gpa, gpa, size) 3500 .map_err(DeviceManagerError::VirtioDmaMap)?; 3501 } 3502 } 3503 } 3504 } 3505 3506 let device_type = virtio_device.lock().unwrap().device_type(); 3507 let virtio_pci_device = Arc::new(Mutex::new( 3508 VirtioPciDevice::new( 3509 id.clone(), 3510 memory, 3511 virtio_device, 3512 msix_num, 3513 access_platform, 3514 &self.msi_interrupt_manager, 3515 pci_device_bdf.into(), 3516 self.activate_evt 3517 .try_clone() 3518 .map_err(DeviceManagerError::EventFd)?, 3519 // All device types *except* virtio block devices should be allocated a 64-bit bar 3520 // The block devices should be given a 32-bit BAR so that they are easily accessible 3521 // to firmware without requiring excessive identity mapping. 3522 // The exception being if not on the default PCI segment. 3523 pci_segment_id > 0 || device_type != VirtioDeviceType::Block as u32, 3524 dma_handler, 3525 self.pending_activations.clone(), 3526 vm_migration::snapshot_from_id(self.snapshot.as_ref(), id.as_str()), 3527 ) 3528 .map_err(DeviceManagerError::VirtioDevice)?, 3529 )); 3530 3531 let new_resources = self.add_pci_device( 3532 virtio_pci_device.clone(), 3533 virtio_pci_device.clone(), 3534 pci_segment_id, 3535 pci_device_bdf, 3536 resources, 3537 )?; 3538 3539 let bar_addr = virtio_pci_device.lock().unwrap().config_bar_addr(); 3540 for (event, addr) in virtio_pci_device.lock().unwrap().ioeventfds(bar_addr) { 3541 let io_addr = IoEventAddress::Mmio(addr); 3542 self.address_manager 3543 .vm 3544 .register_ioevent(event, &io_addr, None) 3545 .map_err(|e| DeviceManagerError::RegisterIoevent(e.into()))?; 3546 } 3547 3548 // Update the device tree with correct resource information. 3549 node.resources = new_resources; 3550 node.migratable = Some(Arc::clone(&virtio_pci_device) as Arc<Mutex<dyn Migratable>>); 3551 node.pci_bdf = Some(pci_device_bdf); 3552 node.pci_device_handle = Some(PciDeviceHandle::Virtio(virtio_pci_device)); 3553 self.device_tree.lock().unwrap().insert(id, node); 3554 3555 Ok(pci_device_bdf) 3556 } 3557 3558 fn pci_resources( 3559 &self, 3560 id: &str, 3561 pci_segment_id: u16, 3562 ) -> DeviceManagerResult<(u16, PciBdf, Option<Vec<Resource>>)> { 3563 // Look for the id in the device tree. If it can be found, that means 3564 // the device is being restored, otherwise it's created from scratch. 3565 Ok( 3566 if let Some(node) = self.device_tree.lock().unwrap().get(id) { 3567 info!("Restoring virtio-pci {} resources", id); 3568 let pci_device_bdf: PciBdf = node 3569 .pci_bdf 3570 .ok_or(DeviceManagerError::MissingDeviceNodePciBdf)?; 3571 let pci_segment_id = pci_device_bdf.segment(); 3572 3573 self.pci_segments[pci_segment_id as usize] 3574 .pci_bus 3575 .lock() 3576 .unwrap() 3577 .get_device_id(pci_device_bdf.device() as usize) 3578 .map_err(DeviceManagerError::GetPciDeviceId)?; 3579 3580 (pci_segment_id, pci_device_bdf, Some(node.resources.clone())) 3581 } else { 3582 let pci_device_bdf = 3583 self.pci_segments[pci_segment_id as usize].next_device_bdf()?; 3584 3585 (pci_segment_id, pci_device_bdf, None) 3586 }, 3587 ) 3588 } 3589 3590 #[cfg(target_arch = "x86_64")] 3591 pub fn io_bus(&self) -> &Arc<Bus> { 3592 &self.address_manager.io_bus 3593 } 3594 3595 pub fn mmio_bus(&self) -> &Arc<Bus> { 3596 &self.address_manager.mmio_bus 3597 } 3598 3599 pub fn allocator(&self) -> &Arc<Mutex<SystemAllocator>> { 3600 &self.address_manager.allocator 3601 } 3602 3603 pub fn interrupt_controller(&self) -> Option<Arc<Mutex<dyn InterruptController>>> { 3604 self.interrupt_controller 3605 .as_ref() 3606 .map(|ic| ic.clone() as Arc<Mutex<dyn InterruptController>>) 3607 } 3608 3609 pub(crate) fn pci_segments(&self) -> &Vec<PciSegment> { 3610 &self.pci_segments 3611 } 3612 3613 pub fn console(&self) -> &Arc<Console> { 3614 &self.console 3615 } 3616 3617 #[cfg(target_arch = "aarch64")] 3618 pub fn cmdline_additions(&self) -> &[String] { 3619 self.cmdline_additions.as_slice() 3620 } 3621 3622 pub fn update_memory(&self, new_region: &Arc<GuestRegionMmap>) -> DeviceManagerResult<()> { 3623 for handle in self.virtio_devices.iter() { 3624 handle 3625 .virtio_device 3626 .lock() 3627 .unwrap() 3628 .add_memory_region(new_region) 3629 .map_err(DeviceManagerError::UpdateMemoryForVirtioDevice)?; 3630 3631 if let Some(dma_handler) = &handle.dma_handler { 3632 if !handle.iommu { 3633 let gpa = new_region.start_addr().0; 3634 let size = new_region.len(); 3635 dma_handler 3636 .map(gpa, gpa, size) 3637 .map_err(DeviceManagerError::VirtioDmaMap)?; 3638 } 3639 } 3640 } 3641 3642 // Take care of updating the memory for VFIO PCI devices. 3643 if let Some(vfio_container) = &self.vfio_container { 3644 vfio_container 3645 .vfio_dma_map( 3646 new_region.start_addr().raw_value(), 3647 new_region.len(), 3648 new_region.as_ptr() as u64, 3649 ) 3650 .map_err(DeviceManagerError::UpdateMemoryForVfioPciDevice)?; 3651 } 3652 3653 // Take care of updating the memory for vfio-user devices. 3654 { 3655 let device_tree = self.device_tree.lock().unwrap(); 3656 for pci_device_node in device_tree.pci_devices() { 3657 if let PciDeviceHandle::VfioUser(vfio_user_pci_device) = pci_device_node 3658 .pci_device_handle 3659 .as_ref() 3660 .ok_or(DeviceManagerError::MissingPciDevice)? 3661 { 3662 vfio_user_pci_device 3663 .lock() 3664 .unwrap() 3665 .dma_map(new_region) 3666 .map_err(DeviceManagerError::UpdateMemoryForVfioUserPciDevice)?; 3667 } 3668 } 3669 } 3670 3671 Ok(()) 3672 } 3673 3674 pub fn activate_virtio_devices(&self) -> DeviceManagerResult<()> { 3675 for mut activator in self.pending_activations.lock().unwrap().drain(..) { 3676 activator 3677 .activate() 3678 .map_err(DeviceManagerError::VirtioActivate)?; 3679 } 3680 Ok(()) 3681 } 3682 3683 pub fn notify_hotplug( 3684 &self, 3685 _notification_type: AcpiNotificationFlags, 3686 ) -> DeviceManagerResult<()> { 3687 return self 3688 .ged_notification_device 3689 .as_ref() 3690 .unwrap() 3691 .lock() 3692 .unwrap() 3693 .notify(_notification_type) 3694 .map_err(DeviceManagerError::HotPlugNotification); 3695 } 3696 3697 pub fn add_device( 3698 &mut self, 3699 device_cfg: &mut DeviceConfig, 3700 ) -> DeviceManagerResult<PciDeviceInfo> { 3701 self.validate_identifier(&device_cfg.id)?; 3702 3703 if device_cfg.iommu && !self.is_iommu_segment(device_cfg.pci_segment) { 3704 return Err(DeviceManagerError::InvalidIommuHotplug); 3705 } 3706 3707 let (bdf, device_name) = self.add_passthrough_device(device_cfg)?; 3708 3709 // Update the PCIU bitmap 3710 self.pci_segments[device_cfg.pci_segment as usize].pci_devices_up |= 1 << bdf.device(); 3711 3712 Ok(PciDeviceInfo { 3713 id: device_name, 3714 bdf, 3715 }) 3716 } 3717 3718 pub fn add_user_device( 3719 &mut self, 3720 device_cfg: &mut UserDeviceConfig, 3721 ) -> DeviceManagerResult<PciDeviceInfo> { 3722 self.validate_identifier(&device_cfg.id)?; 3723 3724 let (bdf, device_name) = self.add_vfio_user_device(device_cfg)?; 3725 3726 // Update the PCIU bitmap 3727 self.pci_segments[device_cfg.pci_segment as usize].pci_devices_up |= 1 << bdf.device(); 3728 3729 Ok(PciDeviceInfo { 3730 id: device_name, 3731 bdf, 3732 }) 3733 } 3734 3735 pub fn remove_device(&mut self, id: String) -> DeviceManagerResult<()> { 3736 // The node can be directly a PCI node in case the 'id' refers to a 3737 // VFIO device or a virtio-pci one. 3738 // In case the 'id' refers to a virtio device, we must find the PCI 3739 // node by looking at the parent. 3740 let device_tree = self.device_tree.lock().unwrap(); 3741 let node = device_tree 3742 .get(&id) 3743 .ok_or(DeviceManagerError::UnknownDeviceId(id))?; 3744 3745 let pci_device_node = if node.pci_bdf.is_some() && node.pci_device_handle.is_some() { 3746 node 3747 } else { 3748 let parent = node 3749 .parent 3750 .as_ref() 3751 .ok_or(DeviceManagerError::MissingNode)?; 3752 device_tree 3753 .get(parent) 3754 .ok_or(DeviceManagerError::MissingNode)? 3755 }; 3756 3757 let pci_device_bdf: PciBdf = pci_device_node 3758 .pci_bdf 3759 .ok_or(DeviceManagerError::MissingDeviceNodePciBdf)?; 3760 let pci_segment_id = pci_device_bdf.segment(); 3761 3762 let pci_device_handle = pci_device_node 3763 .pci_device_handle 3764 .as_ref() 3765 .ok_or(DeviceManagerError::MissingPciDevice)?; 3766 #[allow(irrefutable_let_patterns)] 3767 if let PciDeviceHandle::Virtio(virtio_pci_device) = pci_device_handle { 3768 let device_type = VirtioDeviceType::from( 3769 virtio_pci_device 3770 .lock() 3771 .unwrap() 3772 .virtio_device() 3773 .lock() 3774 .unwrap() 3775 .device_type(), 3776 ); 3777 match device_type { 3778 VirtioDeviceType::Net 3779 | VirtioDeviceType::Block 3780 | VirtioDeviceType::Pmem 3781 | VirtioDeviceType::Fs 3782 | VirtioDeviceType::Vsock => {} 3783 _ => return Err(DeviceManagerError::RemovalNotAllowed(device_type)), 3784 } 3785 } 3786 3787 // Update the PCID bitmap 3788 self.pci_segments[pci_segment_id as usize].pci_devices_down |= 1 << pci_device_bdf.device(); 3789 3790 Ok(()) 3791 } 3792 3793 pub fn eject_device(&mut self, pci_segment_id: u16, device_id: u8) -> DeviceManagerResult<()> { 3794 info!( 3795 "Ejecting device_id = {} on segment_id={}", 3796 device_id, pci_segment_id 3797 ); 3798 3799 // Convert the device ID into the corresponding b/d/f. 3800 let pci_device_bdf = PciBdf::new(pci_segment_id, 0, device_id, 0); 3801 3802 // Give the PCI device ID back to the PCI bus. 3803 self.pci_segments[pci_segment_id as usize] 3804 .pci_bus 3805 .lock() 3806 .unwrap() 3807 .put_device_id(device_id as usize) 3808 .map_err(DeviceManagerError::PutPciDeviceId)?; 3809 3810 // Remove the device from the device tree along with its children. 3811 let mut device_tree = self.device_tree.lock().unwrap(); 3812 let pci_device_node = device_tree 3813 .remove_node_by_pci_bdf(pci_device_bdf) 3814 .ok_or(DeviceManagerError::MissingPciDevice)?; 3815 3816 // For VFIO and vfio-user the PCI device id is the id. 3817 // For virtio we overwrite it later as we want the id of the 3818 // underlying device. 3819 let mut id = pci_device_node.id; 3820 let pci_device_handle = pci_device_node 3821 .pci_device_handle 3822 .ok_or(DeviceManagerError::MissingPciDevice)?; 3823 if matches!(pci_device_handle, PciDeviceHandle::Virtio(_)) { 3824 // The virtio-pci device has a single child 3825 if !pci_device_node.children.is_empty() { 3826 assert_eq!(pci_device_node.children.len(), 1); 3827 let child_id = &pci_device_node.children[0]; 3828 id = child_id.clone(); 3829 } 3830 } 3831 for child in pci_device_node.children.iter() { 3832 device_tree.remove(child); 3833 } 3834 3835 let mut iommu_attached = false; 3836 if let Some((_, iommu_attached_devices)) = &self.iommu_attached_devices { 3837 if iommu_attached_devices.contains(&pci_device_bdf) { 3838 iommu_attached = true; 3839 } 3840 } 3841 3842 let (pci_device, bus_device, virtio_device, remove_dma_handler) = match pci_device_handle { 3843 // No need to remove any virtio-mem mapping here as the container outlives all devices 3844 PciDeviceHandle::Vfio(vfio_pci_device) => ( 3845 Arc::clone(&vfio_pci_device) as Arc<Mutex<dyn PciDevice>>, 3846 Arc::clone(&vfio_pci_device) as Arc<Mutex<dyn BusDevice>>, 3847 None as Option<Arc<Mutex<dyn virtio_devices::VirtioDevice>>>, 3848 false, 3849 ), 3850 PciDeviceHandle::Virtio(virtio_pci_device) => { 3851 let dev = virtio_pci_device.lock().unwrap(); 3852 let bar_addr = dev.config_bar_addr(); 3853 for (event, addr) in dev.ioeventfds(bar_addr) { 3854 let io_addr = IoEventAddress::Mmio(addr); 3855 self.address_manager 3856 .vm 3857 .unregister_ioevent(event, &io_addr) 3858 .map_err(|e| DeviceManagerError::UnRegisterIoevent(e.into()))?; 3859 } 3860 3861 if let Some(dma_handler) = dev.dma_handler() { 3862 if !iommu_attached { 3863 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() { 3864 for region in zone.regions() { 3865 let iova = region.start_addr().0; 3866 let size = region.len(); 3867 dma_handler 3868 .unmap(iova, size) 3869 .map_err(DeviceManagerError::VirtioDmaUnmap)?; 3870 } 3871 } 3872 } 3873 } 3874 3875 ( 3876 Arc::clone(&virtio_pci_device) as Arc<Mutex<dyn PciDevice>>, 3877 Arc::clone(&virtio_pci_device) as Arc<Mutex<dyn BusDevice>>, 3878 Some(dev.virtio_device()), 3879 dev.dma_handler().is_some() && !iommu_attached, 3880 ) 3881 } 3882 PciDeviceHandle::VfioUser(vfio_user_pci_device) => { 3883 let mut dev = vfio_user_pci_device.lock().unwrap(); 3884 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() { 3885 for region in zone.regions() { 3886 dev.dma_unmap(region) 3887 .map_err(DeviceManagerError::VfioUserDmaUnmap)?; 3888 } 3889 } 3890 3891 ( 3892 Arc::clone(&vfio_user_pci_device) as Arc<Mutex<dyn PciDevice>>, 3893 Arc::clone(&vfio_user_pci_device) as Arc<Mutex<dyn BusDevice>>, 3894 None as Option<Arc<Mutex<dyn virtio_devices::VirtioDevice>>>, 3895 true, 3896 ) 3897 } 3898 }; 3899 3900 if remove_dma_handler { 3901 for virtio_mem_device in self.virtio_mem_devices.iter() { 3902 virtio_mem_device 3903 .lock() 3904 .unwrap() 3905 .remove_dma_mapping_handler(VirtioMemMappingSource::Device( 3906 pci_device_bdf.into(), 3907 )) 3908 .map_err(DeviceManagerError::RemoveDmaMappingHandlerVirtioMem)?; 3909 } 3910 } 3911 3912 // Free the allocated BARs 3913 pci_device 3914 .lock() 3915 .unwrap() 3916 .free_bars( 3917 &mut self.address_manager.allocator.lock().unwrap(), 3918 &mut self.pci_segments[pci_segment_id as usize] 3919 .allocator 3920 .lock() 3921 .unwrap(), 3922 ) 3923 .map_err(DeviceManagerError::FreePciBars)?; 3924 3925 // Remove the device from the PCI bus 3926 self.pci_segments[pci_segment_id as usize] 3927 .pci_bus 3928 .lock() 3929 .unwrap() 3930 .remove_by_device(&pci_device) 3931 .map_err(DeviceManagerError::RemoveDeviceFromPciBus)?; 3932 3933 #[cfg(target_arch = "x86_64")] 3934 // Remove the device from the IO bus 3935 self.io_bus() 3936 .remove_by_device(&bus_device) 3937 .map_err(DeviceManagerError::RemoveDeviceFromIoBus)?; 3938 3939 // Remove the device from the MMIO bus 3940 self.mmio_bus() 3941 .remove_by_device(&bus_device) 3942 .map_err(DeviceManagerError::RemoveDeviceFromMmioBus)?; 3943 3944 // Remove the device from the list of BusDevice held by the 3945 // DeviceManager. 3946 self.bus_devices 3947 .retain(|dev| !Arc::ptr_eq(dev, &bus_device)); 3948 3949 // Shutdown and remove the underlying virtio-device if present 3950 if let Some(virtio_device) = virtio_device { 3951 for mapping in virtio_device.lock().unwrap().userspace_mappings() { 3952 self.memory_manager 3953 .lock() 3954 .unwrap() 3955 .remove_userspace_mapping( 3956 mapping.addr.raw_value(), 3957 mapping.len, 3958 mapping.host_addr, 3959 mapping.mergeable, 3960 mapping.mem_slot, 3961 ) 3962 .map_err(DeviceManagerError::MemoryManager)?; 3963 } 3964 3965 virtio_device.lock().unwrap().shutdown(); 3966 3967 self.virtio_devices 3968 .retain(|handler| !Arc::ptr_eq(&handler.virtio_device, &virtio_device)); 3969 } 3970 3971 event!( 3972 "vm", 3973 "device-removed", 3974 "id", 3975 &id, 3976 "bdf", 3977 pci_device_bdf.to_string() 3978 ); 3979 3980 // At this point, the device has been removed from all the list and 3981 // buses where it was stored. At the end of this function, after 3982 // any_device, bus_device and pci_device are released, the actual 3983 // device will be dropped. 3984 Ok(()) 3985 } 3986 3987 fn hotplug_virtio_pci_device( 3988 &mut self, 3989 handle: MetaVirtioDevice, 3990 ) -> DeviceManagerResult<PciDeviceInfo> { 3991 // Add the virtio device to the device manager list. This is important 3992 // as the list is used to notify virtio devices about memory updates 3993 // for instance. 3994 self.virtio_devices.push(handle.clone()); 3995 3996 let mapping: Option<Arc<IommuMapping>> = if handle.iommu { 3997 self.iommu_mapping.clone() 3998 } else { 3999 None 4000 }; 4001 4002 let bdf = self.add_virtio_pci_device( 4003 handle.virtio_device, 4004 &mapping, 4005 handle.id.clone(), 4006 handle.pci_segment, 4007 handle.dma_handler, 4008 )?; 4009 4010 // Update the PCIU bitmap 4011 self.pci_segments[handle.pci_segment as usize].pci_devices_up |= 1 << bdf.device(); 4012 4013 Ok(PciDeviceInfo { id: handle.id, bdf }) 4014 } 4015 4016 fn is_iommu_segment(&self, pci_segment_id: u16) -> bool { 4017 self.config 4018 .lock() 4019 .as_ref() 4020 .unwrap() 4021 .platform 4022 .as_ref() 4023 .map(|pc| { 4024 pc.iommu_segments 4025 .as_ref() 4026 .map(|v| v.contains(&pci_segment_id)) 4027 .unwrap_or_default() 4028 }) 4029 .unwrap_or_default() 4030 } 4031 4032 pub fn add_disk(&mut self, disk_cfg: &mut DiskConfig) -> DeviceManagerResult<PciDeviceInfo> { 4033 self.validate_identifier(&disk_cfg.id)?; 4034 4035 if disk_cfg.iommu && !self.is_iommu_segment(disk_cfg.pci_segment) { 4036 return Err(DeviceManagerError::InvalidIommuHotplug); 4037 } 4038 4039 let device = self.make_virtio_block_device(disk_cfg)?; 4040 self.hotplug_virtio_pci_device(device) 4041 } 4042 4043 pub fn add_fs(&mut self, fs_cfg: &mut FsConfig) -> DeviceManagerResult<PciDeviceInfo> { 4044 self.validate_identifier(&fs_cfg.id)?; 4045 4046 let device = self.make_virtio_fs_device(fs_cfg)?; 4047 self.hotplug_virtio_pci_device(device) 4048 } 4049 4050 pub fn add_pmem(&mut self, pmem_cfg: &mut PmemConfig) -> DeviceManagerResult<PciDeviceInfo> { 4051 self.validate_identifier(&pmem_cfg.id)?; 4052 4053 if pmem_cfg.iommu && !self.is_iommu_segment(pmem_cfg.pci_segment) { 4054 return Err(DeviceManagerError::InvalidIommuHotplug); 4055 } 4056 4057 let device = self.make_virtio_pmem_device(pmem_cfg)?; 4058 self.hotplug_virtio_pci_device(device) 4059 } 4060 4061 pub fn add_net(&mut self, net_cfg: &mut NetConfig) -> DeviceManagerResult<PciDeviceInfo> { 4062 self.validate_identifier(&net_cfg.id)?; 4063 4064 if net_cfg.iommu && !self.is_iommu_segment(net_cfg.pci_segment) { 4065 return Err(DeviceManagerError::InvalidIommuHotplug); 4066 } 4067 4068 let device = self.make_virtio_net_device(net_cfg)?; 4069 self.hotplug_virtio_pci_device(device) 4070 } 4071 4072 pub fn add_vdpa(&mut self, vdpa_cfg: &mut VdpaConfig) -> DeviceManagerResult<PciDeviceInfo> { 4073 self.validate_identifier(&vdpa_cfg.id)?; 4074 4075 if vdpa_cfg.iommu && !self.is_iommu_segment(vdpa_cfg.pci_segment) { 4076 return Err(DeviceManagerError::InvalidIommuHotplug); 4077 } 4078 4079 let device = self.make_vdpa_device(vdpa_cfg)?; 4080 self.hotplug_virtio_pci_device(device) 4081 } 4082 4083 pub fn add_vsock(&mut self, vsock_cfg: &mut VsockConfig) -> DeviceManagerResult<PciDeviceInfo> { 4084 self.validate_identifier(&vsock_cfg.id)?; 4085 4086 if vsock_cfg.iommu && !self.is_iommu_segment(vsock_cfg.pci_segment) { 4087 return Err(DeviceManagerError::InvalidIommuHotplug); 4088 } 4089 4090 let device = self.make_virtio_vsock_device(vsock_cfg)?; 4091 self.hotplug_virtio_pci_device(device) 4092 } 4093 4094 pub fn counters(&self) -> HashMap<String, HashMap<&'static str, Wrapping<u64>>> { 4095 let mut counters = HashMap::new(); 4096 4097 for handle in &self.virtio_devices { 4098 let virtio_device = handle.virtio_device.lock().unwrap(); 4099 if let Some(device_counters) = virtio_device.counters() { 4100 counters.insert(handle.id.clone(), device_counters.clone()); 4101 } 4102 } 4103 4104 counters 4105 } 4106 4107 pub fn resize_balloon(&mut self, size: u64) -> DeviceManagerResult<()> { 4108 if let Some(balloon) = &self.balloon { 4109 return balloon 4110 .lock() 4111 .unwrap() 4112 .resize(size) 4113 .map_err(DeviceManagerError::VirtioBalloonResize); 4114 } 4115 4116 warn!("No balloon setup: Can't resize the balloon"); 4117 Err(DeviceManagerError::MissingVirtioBalloon) 4118 } 4119 4120 pub fn balloon_size(&self) -> u64 { 4121 if let Some(balloon) = &self.balloon { 4122 return balloon.lock().unwrap().get_actual(); 4123 } 4124 4125 0 4126 } 4127 4128 pub fn device_tree(&self) -> Arc<Mutex<DeviceTree>> { 4129 self.device_tree.clone() 4130 } 4131 4132 pub fn restore_devices( 4133 &mut self, 4134 snapshot: Snapshot, 4135 ) -> std::result::Result<(), MigratableError> { 4136 // Finally, restore all devices associated with the DeviceManager. 4137 // It's important to restore devices in the right order, that's why 4138 // the device tree is the right way to ensure we restore a child before 4139 // its parent node. 4140 for node in self 4141 .device_tree 4142 .lock() 4143 .unwrap() 4144 .breadth_first_traversal() 4145 .rev() 4146 { 4147 // Restore the node 4148 if let Some(migratable) = &node.migratable { 4149 info!("Restoring {} from DeviceManager", node.id); 4150 if let Some(snapshot) = snapshot.snapshots.get(&node.id) { 4151 migratable.lock().unwrap().pause()?; 4152 migratable.lock().unwrap().restore(*snapshot.clone())?; 4153 } else { 4154 return Err(MigratableError::Restore(anyhow!( 4155 "Missing device {}", 4156 node.id 4157 ))); 4158 } 4159 } 4160 } 4161 4162 // The devices have been fully restored, we can now update the 4163 // restoring state of the DeviceManager. 4164 self.restoring = false; 4165 4166 Ok(()) 4167 } 4168 4169 #[cfg(target_arch = "x86_64")] 4170 pub fn notify_power_button(&self) -> DeviceManagerResult<()> { 4171 self.ged_notification_device 4172 .as_ref() 4173 .unwrap() 4174 .lock() 4175 .unwrap() 4176 .notify(AcpiNotificationFlags::POWER_BUTTON_CHANGED) 4177 .map_err(DeviceManagerError::PowerButtonNotification) 4178 } 4179 4180 #[cfg(target_arch = "aarch64")] 4181 pub fn notify_power_button(&self) -> DeviceManagerResult<()> { 4182 // There are two use cases: 4183 // 1. Users will use direct kernel boot with device tree. 4184 // 2. Users will use ACPI+UEFI boot. 4185 4186 // Trigger a GPIO pin 3 event to satisify use case 1. 4187 self.gpio_device 4188 .as_ref() 4189 .unwrap() 4190 .lock() 4191 .unwrap() 4192 .trigger_key(3) 4193 .map_err(DeviceManagerError::AArch64PowerButtonNotification)?; 4194 // Trigger a GED power button event to satisify use case 2. 4195 return self 4196 .ged_notification_device 4197 .as_ref() 4198 .unwrap() 4199 .lock() 4200 .unwrap() 4201 .notify(AcpiNotificationFlags::POWER_BUTTON_CHANGED) 4202 .map_err(DeviceManagerError::PowerButtonNotification); 4203 } 4204 4205 pub fn iommu_attached_devices(&self) -> &Option<(PciBdf, Vec<PciBdf>)> { 4206 &self.iommu_attached_devices 4207 } 4208 4209 fn validate_identifier(&self, id: &Option<String>) -> DeviceManagerResult<()> { 4210 if let Some(id) = id { 4211 if id.starts_with("__") { 4212 return Err(DeviceManagerError::InvalidIdentifier(id.clone())); 4213 } 4214 4215 if self.device_tree.lock().unwrap().contains_key(id) { 4216 return Err(DeviceManagerError::IdentifierNotUnique(id.clone())); 4217 } 4218 } 4219 4220 Ok(()) 4221 } 4222 4223 pub(crate) fn acpi_platform_addresses(&self) -> &AcpiPlatformAddresses { 4224 &self.acpi_platform_addresses 4225 } 4226 } 4227 4228 fn numa_node_id_from_memory_zone_id(numa_nodes: &NumaNodes, memory_zone_id: &str) -> Option<u32> { 4229 for (numa_node_id, numa_node) in numa_nodes.iter() { 4230 if numa_node.memory_zones.contains(&memory_zone_id.to_owned()) { 4231 return Some(*numa_node_id); 4232 } 4233 } 4234 4235 None 4236 } 4237 4238 struct TpmDevice {} 4239 4240 impl Aml for TpmDevice { 4241 fn to_aml_bytes(&self) -> Vec<u8> { 4242 aml::Device::new( 4243 "TPM2".into(), 4244 vec![ 4245 &aml::Name::new("_HID".into(), &"MSFT0101"), 4246 &aml::Name::new("_STA".into(), &(0xF_usize)), 4247 &aml::Name::new( 4248 "_CRS".into(), 4249 &aml::ResourceTemplate::new(vec![&aml::Memory32Fixed::new( 4250 true, 4251 layout::TPM_START.0 as u32, 4252 layout::TPM_SIZE as u32, 4253 )]), 4254 ), 4255 ], 4256 ) 4257 .to_aml_bytes() 4258 } 4259 } 4260 4261 impl Aml for DeviceManager { 4262 fn append_aml_bytes(&self, bytes: &mut Vec<u8>) { 4263 #[cfg(target_arch = "aarch64")] 4264 use arch::aarch64::DeviceInfoForFdt; 4265 4266 let mut pci_scan_methods = Vec::new(); 4267 for i in 0..self.pci_segments.len() { 4268 pci_scan_methods.push(aml::MethodCall::new( 4269 format!("\\_SB_.PCI{:X}.PCNT", i).as_str().into(), 4270 vec![], 4271 )); 4272 } 4273 let mut pci_scan_inner: Vec<&dyn Aml> = Vec::new(); 4274 for method in &pci_scan_methods { 4275 pci_scan_inner.push(method) 4276 } 4277 4278 // PCI hotplug controller 4279 aml::Device::new( 4280 "_SB_.PHPR".into(), 4281 vec![ 4282 &aml::Name::new("_HID".into(), &aml::EisaName::new("PNP0A06")), 4283 &aml::Name::new("_STA".into(), &0x0bu8), 4284 &aml::Name::new("_UID".into(), &"PCI Hotplug Controller"), 4285 &aml::Mutex::new("BLCK".into(), 0), 4286 &aml::Name::new( 4287 "_CRS".into(), 4288 &aml::ResourceTemplate::new(vec![&aml::AddressSpace::new_memory( 4289 aml::AddressSpaceCachable::NotCacheable, 4290 true, 4291 self.acpi_address.0, 4292 self.acpi_address.0 + DEVICE_MANAGER_ACPI_SIZE as u64 - 1, 4293 )]), 4294 ), 4295 // OpRegion and Fields map MMIO range into individual field values 4296 &aml::OpRegion::new( 4297 "PCST".into(), 4298 aml::OpRegionSpace::SystemMemory, 4299 self.acpi_address.0 as usize, 4300 DEVICE_MANAGER_ACPI_SIZE, 4301 ), 4302 &aml::Field::new( 4303 "PCST".into(), 4304 aml::FieldAccessType::DWord, 4305 aml::FieldUpdateRule::WriteAsZeroes, 4306 vec![ 4307 aml::FieldEntry::Named(*b"PCIU", 32), 4308 aml::FieldEntry::Named(*b"PCID", 32), 4309 aml::FieldEntry::Named(*b"B0EJ", 32), 4310 aml::FieldEntry::Named(*b"PSEG", 32), 4311 ], 4312 ), 4313 &aml::Method::new( 4314 "PCEJ".into(), 4315 2, 4316 true, 4317 vec![ 4318 // Take lock defined above 4319 &aml::Acquire::new("BLCK".into(), 0xffff), 4320 // Choose the current segment 4321 &aml::Store::new(&aml::Path::new("PSEG"), &aml::Arg(1)), 4322 // Write PCI bus number (in first argument) to I/O port via field 4323 &aml::ShiftLeft::new(&aml::Path::new("B0EJ"), &aml::ONE, &aml::Arg(0)), 4324 // Release lock 4325 &aml::Release::new("BLCK".into()), 4326 // Return 0 4327 &aml::Return::new(&aml::ZERO), 4328 ], 4329 ), 4330 &aml::Method::new("PSCN".into(), 0, true, pci_scan_inner), 4331 ], 4332 ) 4333 .append_aml_bytes(bytes); 4334 4335 for segment in &self.pci_segments { 4336 segment.append_aml_bytes(bytes); 4337 } 4338 4339 let mut mbrd_memory = Vec::new(); 4340 4341 for segment in &self.pci_segments { 4342 mbrd_memory.push(aml::Memory32Fixed::new( 4343 true, 4344 segment.mmio_config_address as u32, 4345 layout::PCI_MMIO_CONFIG_SIZE_PER_SEGMENT as u32, 4346 )) 4347 } 4348 4349 let mut mbrd_memory_refs = Vec::new(); 4350 for mbrd_memory_ref in &mbrd_memory { 4351 mbrd_memory_refs.push(mbrd_memory_ref as &dyn Aml); 4352 } 4353 4354 aml::Device::new( 4355 "_SB_.MBRD".into(), 4356 vec![ 4357 &aml::Name::new("_HID".into(), &aml::EisaName::new("PNP0C02")), 4358 &aml::Name::new("_UID".into(), &aml::ZERO), 4359 &aml::Name::new("_CRS".into(), &aml::ResourceTemplate::new(mbrd_memory_refs)), 4360 ], 4361 ) 4362 .append_aml_bytes(bytes); 4363 4364 // Serial device 4365 #[cfg(target_arch = "x86_64")] 4366 let serial_irq = 4; 4367 #[cfg(target_arch = "aarch64")] 4368 let serial_irq = 4369 if self.config.lock().unwrap().serial.clone().mode != ConsoleOutputMode::Off { 4370 self.get_device_info() 4371 .clone() 4372 .get(&(DeviceType::Serial, DeviceType::Serial.to_string())) 4373 .unwrap() 4374 .irq() 4375 } else { 4376 // If serial is turned off, add a fake device with invalid irq. 4377 31 4378 }; 4379 if self.config.lock().unwrap().serial.mode != ConsoleOutputMode::Off { 4380 aml::Device::new( 4381 "_SB_.COM1".into(), 4382 vec![ 4383 &aml::Name::new( 4384 "_HID".into(), 4385 #[cfg(target_arch = "x86_64")] 4386 &aml::EisaName::new("PNP0501"), 4387 #[cfg(target_arch = "aarch64")] 4388 &"ARMH0011", 4389 ), 4390 &aml::Name::new("_UID".into(), &aml::ZERO), 4391 &aml::Name::new("_DDN".into(), &"COM1"), 4392 &aml::Name::new( 4393 "_CRS".into(), 4394 &aml::ResourceTemplate::new(vec![ 4395 &aml::Interrupt::new(true, true, false, false, serial_irq), 4396 #[cfg(target_arch = "x86_64")] 4397 &aml::Io::new(0x3f8, 0x3f8, 0, 0x8), 4398 #[cfg(target_arch = "aarch64")] 4399 &aml::Memory32Fixed::new( 4400 true, 4401 arch::layout::LEGACY_SERIAL_MAPPED_IO_START.raw_value() as u32, 4402 MMIO_LEN as u32, 4403 ), 4404 ]), 4405 ), 4406 ], 4407 ) 4408 .append_aml_bytes(bytes); 4409 } 4410 4411 aml::Name::new("_S5_".into(), &aml::Package::new(vec![&5u8])).append_aml_bytes(bytes); 4412 4413 aml::Device::new( 4414 "_SB_.PWRB".into(), 4415 vec![ 4416 &aml::Name::new("_HID".into(), &aml::EisaName::new("PNP0C0C")), 4417 &aml::Name::new("_UID".into(), &aml::ZERO), 4418 ], 4419 ) 4420 .append_aml_bytes(bytes); 4421 4422 if self.config.lock().unwrap().tpm.is_some() { 4423 // Add tpm device 4424 let tpm_acpi = TpmDevice {}; 4425 let tpm_dsdt_data = tpm_acpi.to_aml_bytes(); 4426 bytes.extend_from_slice(tpm_dsdt_data.as_slice()); 4427 } 4428 4429 self.ged_notification_device 4430 .as_ref() 4431 .unwrap() 4432 .lock() 4433 .unwrap() 4434 .append_aml_bytes(bytes); 4435 } 4436 } 4437 4438 impl Pausable for DeviceManager { 4439 fn pause(&mut self) -> result::Result<(), MigratableError> { 4440 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4441 if let Some(migratable) = &device_node.migratable { 4442 migratable.lock().unwrap().pause()?; 4443 } 4444 } 4445 // On AArch64, the pause of device manager needs to trigger 4446 // a "pause" of GIC, which will flush the GIC pending tables 4447 // and ITS tables to guest RAM. 4448 #[cfg(target_arch = "aarch64")] 4449 { 4450 self.get_interrupt_controller() 4451 .unwrap() 4452 .lock() 4453 .unwrap() 4454 .pause()?; 4455 }; 4456 4457 Ok(()) 4458 } 4459 4460 fn resume(&mut self) -> result::Result<(), MigratableError> { 4461 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4462 if let Some(migratable) = &device_node.migratable { 4463 migratable.lock().unwrap().resume()?; 4464 } 4465 } 4466 4467 Ok(()) 4468 } 4469 } 4470 4471 impl Snapshottable for DeviceManager { 4472 fn id(&self) -> String { 4473 DEVICE_MANAGER_SNAPSHOT_ID.to_string() 4474 } 4475 4476 fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> { 4477 let mut snapshot = Snapshot::new(DEVICE_MANAGER_SNAPSHOT_ID); 4478 4479 // We aggregate all devices snapshots. 4480 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4481 if let Some(migratable) = &device_node.migratable { 4482 let device_snapshot = migratable.lock().unwrap().snapshot()?; 4483 snapshot.add_snapshot(device_snapshot); 4484 } 4485 } 4486 4487 // Then we store the DeviceManager state. 4488 snapshot.add_data_section(SnapshotDataSection::new_from_state( 4489 DEVICE_MANAGER_SNAPSHOT_ID, 4490 &self.state(), 4491 )?); 4492 4493 Ok(snapshot) 4494 } 4495 4496 fn restore(&mut self, snapshot: Snapshot) -> std::result::Result<(), MigratableError> { 4497 // Let's first restore the DeviceManager. 4498 4499 self.set_state(&snapshot.to_state(DEVICE_MANAGER_SNAPSHOT_ID)?); 4500 4501 // Now that DeviceManager is updated with the right states, it's time 4502 // to create the devices based on the configuration. 4503 self.create_devices(None, None, None) 4504 .map_err(|e| MigratableError::Restore(anyhow!("Could not create devices {:?}", e)))?; 4505 4506 Ok(()) 4507 } 4508 } 4509 4510 impl Transportable for DeviceManager {} 4511 4512 impl Migratable for DeviceManager { 4513 fn start_dirty_log(&mut self) -> std::result::Result<(), MigratableError> { 4514 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4515 if let Some(migratable) = &device_node.migratable { 4516 migratable.lock().unwrap().start_dirty_log()?; 4517 } 4518 } 4519 Ok(()) 4520 } 4521 4522 fn stop_dirty_log(&mut self) -> std::result::Result<(), MigratableError> { 4523 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4524 if let Some(migratable) = &device_node.migratable { 4525 migratable.lock().unwrap().stop_dirty_log()?; 4526 } 4527 } 4528 Ok(()) 4529 } 4530 4531 fn dirty_log(&mut self) -> std::result::Result<MemoryRangeTable, MigratableError> { 4532 let mut tables = Vec::new(); 4533 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4534 if let Some(migratable) = &device_node.migratable { 4535 tables.push(migratable.lock().unwrap().dirty_log()?); 4536 } 4537 } 4538 Ok(MemoryRangeTable::new_from_tables(tables)) 4539 } 4540 4541 fn start_migration(&mut self) -> std::result::Result<(), MigratableError> { 4542 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4543 if let Some(migratable) = &device_node.migratable { 4544 migratable.lock().unwrap().start_migration()?; 4545 } 4546 } 4547 Ok(()) 4548 } 4549 4550 fn complete_migration(&mut self) -> std::result::Result<(), MigratableError> { 4551 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4552 if let Some(migratable) = &device_node.migratable { 4553 migratable.lock().unwrap().complete_migration()?; 4554 } 4555 } 4556 Ok(()) 4557 } 4558 } 4559 4560 const PCIU_FIELD_OFFSET: u64 = 0; 4561 const PCID_FIELD_OFFSET: u64 = 4; 4562 const B0EJ_FIELD_OFFSET: u64 = 8; 4563 const PSEG_FIELD_OFFSET: u64 = 12; 4564 const PCIU_FIELD_SIZE: usize = 4; 4565 const PCID_FIELD_SIZE: usize = 4; 4566 const B0EJ_FIELD_SIZE: usize = 4; 4567 const PSEG_FIELD_SIZE: usize = 4; 4568 4569 impl BusDevice for DeviceManager { 4570 fn read(&mut self, base: u64, offset: u64, data: &mut [u8]) { 4571 match offset { 4572 PCIU_FIELD_OFFSET => { 4573 assert!(data.len() == PCIU_FIELD_SIZE); 4574 data.copy_from_slice( 4575 &self.pci_segments[self.selected_segment] 4576 .pci_devices_up 4577 .to_le_bytes(), 4578 ); 4579 // Clear the PCIU bitmap 4580 self.pci_segments[self.selected_segment].pci_devices_up = 0; 4581 } 4582 PCID_FIELD_OFFSET => { 4583 assert!(data.len() == PCID_FIELD_SIZE); 4584 data.copy_from_slice( 4585 &self.pci_segments[self.selected_segment] 4586 .pci_devices_down 4587 .to_le_bytes(), 4588 ); 4589 // Clear the PCID bitmap 4590 self.pci_segments[self.selected_segment].pci_devices_down = 0; 4591 } 4592 B0EJ_FIELD_OFFSET => { 4593 assert!(data.len() == B0EJ_FIELD_SIZE); 4594 // Always return an empty bitmap since the eject is always 4595 // taken care of right away during a write access. 4596 data.fill(0); 4597 } 4598 PSEG_FIELD_OFFSET => { 4599 assert_eq!(data.len(), PSEG_FIELD_SIZE); 4600 data.copy_from_slice(&(self.selected_segment as u32).to_le_bytes()); 4601 } 4602 _ => error!( 4603 "Accessing unknown location at base 0x{:x}, offset 0x{:x}", 4604 base, offset 4605 ), 4606 } 4607 4608 debug!( 4609 "PCI_HP_REG_R: base 0x{:x}, offset 0x{:x}, data {:?}", 4610 base, offset, data 4611 ) 4612 } 4613 4614 fn write(&mut self, base: u64, offset: u64, data: &[u8]) -> Option<Arc<std::sync::Barrier>> { 4615 match offset { 4616 B0EJ_FIELD_OFFSET => { 4617 assert!(data.len() == B0EJ_FIELD_SIZE); 4618 let mut data_array: [u8; 4] = [0, 0, 0, 0]; 4619 data_array.copy_from_slice(data); 4620 let mut slot_bitmap = u32::from_le_bytes(data_array); 4621 4622 while slot_bitmap > 0 { 4623 let slot_id = slot_bitmap.trailing_zeros(); 4624 if let Err(e) = self.eject_device(self.selected_segment as u16, slot_id as u8) { 4625 error!("Failed ejecting device {}: {:?}", slot_id, e); 4626 } 4627 slot_bitmap &= !(1 << slot_id); 4628 } 4629 } 4630 PSEG_FIELD_OFFSET => { 4631 assert_eq!(data.len(), PSEG_FIELD_SIZE); 4632 let mut data_array: [u8; 4] = [0, 0, 0, 0]; 4633 data_array.copy_from_slice(data); 4634 let selected_segment = u32::from_le_bytes(data_array) as usize; 4635 if selected_segment >= self.pci_segments.len() { 4636 error!( 4637 "Segment selection out of range: {} >= {}", 4638 selected_segment, 4639 self.pci_segments.len() 4640 ); 4641 return None; 4642 } 4643 self.selected_segment = selected_segment; 4644 } 4645 _ => error!( 4646 "Accessing unknown location at base 0x{:x}, offset 0x{:x}", 4647 base, offset 4648 ), 4649 } 4650 4651 debug!( 4652 "PCI_HP_REG_W: base 0x{:x}, offset 0x{:x}, data {:?}", 4653 base, offset, data 4654 ); 4655 4656 None 4657 } 4658 } 4659 4660 impl Drop for DeviceManager { 4661 fn drop(&mut self) { 4662 for handle in self.virtio_devices.drain(..) { 4663 handle.virtio_device.lock().unwrap().shutdown(); 4664 } 4665 } 4666 } 4667