1 // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 // 3 // Portions Copyright 2017 The Chromium OS Authors. All rights reserved. 4 // Use of this source code is governed by a BSD-style license that can be 5 // found in the LICENSE-BSD-3-Clause file. 6 // 7 // Copyright © 2019 Intel Corporation 8 // 9 // SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause 10 // 11 12 use crate::config::{ 13 ConsoleOutputMode, DeviceConfig, DiskConfig, FsConfig, NetConfig, PmemConfig, UserDeviceConfig, 14 VhostMode, VmConfig, VsockConfig, 15 }; 16 use crate::device_tree::{DeviceNode, DeviceTree}; 17 #[cfg(feature = "kvm")] 18 use crate::interrupt::kvm::KvmMsiInterruptManager as MsiInterruptManager; 19 #[cfg(feature = "mshv")] 20 use crate::interrupt::mshv::MshvMsiInterruptManager as MsiInterruptManager; 21 use crate::interrupt::LegacyUserspaceInterruptManager; 22 #[cfg(feature = "acpi")] 23 use crate::memory_manager::MEMORY_MANAGER_ACPI_SIZE; 24 use crate::memory_manager::{Error as MemoryManagerError, MemoryManager}; 25 use crate::seccomp_filters::{get_seccomp_filter, Thread}; 26 use crate::serial_buffer::SerialBuffer; 27 use crate::sigwinch_listener::start_sigwinch_listener; 28 use crate::GuestRegionMmap; 29 use crate::PciDeviceInfo; 30 use crate::{device_node, DEVICE_MANAGER_SNAPSHOT_ID}; 31 #[cfg(feature = "acpi")] 32 use acpi_tables::{aml, aml::Aml}; 33 use anyhow::anyhow; 34 #[cfg(target_arch = "aarch64")] 35 use arch::aarch64::gic::gicv3_its::kvm::KvmGicV3Its; 36 #[cfg(feature = "acpi")] 37 use arch::layout; 38 #[cfg(target_arch = "x86_64")] 39 use arch::layout::{APIC_START, IOAPIC_SIZE, IOAPIC_START}; 40 #[cfg(any(target_arch = "aarch64", feature = "acpi"))] 41 use arch::NumaNodes; 42 #[cfg(target_arch = "aarch64")] 43 use arch::{DeviceType, MmioDeviceInfo}; 44 use block_util::{ 45 async_io::DiskFile, block_io_uring_is_supported, detect_image_type, 46 fixed_vhd_async::FixedVhdDiskAsync, fixed_vhd_sync::FixedVhdDiskSync, qcow_sync::QcowDiskSync, 47 raw_async::RawFileDisk, raw_sync::RawFileDiskSync, vhdx_sync::VhdxDiskSync, ImageType, 48 }; 49 #[cfg(target_arch = "aarch64")] 50 use devices::gic; 51 #[cfg(target_arch = "x86_64")] 52 use devices::ioapic; 53 #[cfg(target_arch = "aarch64")] 54 use devices::legacy::Pl011; 55 #[cfg(target_arch = "x86_64")] 56 use devices::legacy::Serial; 57 use devices::{ 58 interrupt_controller, interrupt_controller::InterruptController, AcpiNotificationFlags, 59 }; 60 #[cfg(feature = "kvm")] 61 use hypervisor::kvm_ioctls::*; 62 use hypervisor::DeviceFd; 63 #[cfg(feature = "mshv")] 64 use hypervisor::IoEventAddress; 65 use libc::{ 66 isatty, tcgetattr, tcsetattr, termios, ECHO, ICANON, ISIG, MAP_NORESERVE, MAP_PRIVATE, 67 MAP_SHARED, O_TMPFILE, PROT_READ, PROT_WRITE, TCSANOW, 68 }; 69 use pci::VfioPciDevice; 70 use pci::{ 71 DeviceRelocation, PciBarRegionType, PciBus, PciConfigIo, PciConfigMmio, PciDevice, PciRoot, 72 VfioUserPciDevice, VfioUserPciDeviceError, 73 }; 74 use seccompiler::SeccompAction; 75 use std::collections::HashMap; 76 use std::convert::TryInto; 77 use std::fs::{read_link, File, OpenOptions}; 78 use std::io::{self, stdout, Seek, SeekFrom}; 79 use std::mem::zeroed; 80 use std::num::Wrapping; 81 use std::os::unix::fs::OpenOptionsExt; 82 use std::os::unix::io::{AsRawFd, FromRawFd, RawFd}; 83 use std::path::PathBuf; 84 use std::result; 85 use std::sync::{Arc, Barrier, Mutex}; 86 #[cfg(feature = "acpi")] 87 use uuid::Uuid; 88 use vfio_ioctls::{VfioContainer, VfioDevice}; 89 use virtio_devices::transport::VirtioPciDevice; 90 use virtio_devices::transport::VirtioTransport; 91 use virtio_devices::vhost_user::VhostUserConfig; 92 use virtio_devices::{DmaRemapping, Endpoint, IommuMapping}; 93 use virtio_devices::{VirtioSharedMemory, VirtioSharedMemoryList}; 94 use vm_allocator::SystemAllocator; 95 use vm_device::dma_mapping::vfio::VfioDmaMapping; 96 use vm_device::interrupt::{ 97 InterruptIndex, InterruptManager, LegacyIrqGroupConfig, MsiIrqGroupConfig, 98 }; 99 use vm_device::{Bus, BusDevice, Resource}; 100 use vm_memory::guest_memory::FileOffset; 101 use vm_memory::GuestMemoryRegion; 102 use vm_memory::{Address, GuestAddress, GuestUsize, MmapRegion}; 103 #[cfg(all(target_arch = "x86_64", feature = "cmos"))] 104 use vm_memory::{GuestAddressSpace, GuestMemory}; 105 use vm_migration::{ 106 protocol::MemoryRangeTable, Migratable, MigratableError, Pausable, Snapshot, 107 SnapshotDataSection, Snapshottable, Transportable, 108 }; 109 use vm_virtio::{VirtioDeviceType, VirtioIommuRemapping}; 110 use vmm_sys_util::eventfd::EventFd; 111 112 #[cfg(target_arch = "aarch64")] 113 const MMIO_LEN: u64 = 0x1000; 114 115 const VFIO_DEVICE_NAME_PREFIX: &str = "_vfio"; 116 117 const VFIO_USER_DEVICE_NAME_PREFIX: &str = "_vfio_user"; 118 119 #[cfg(target_arch = "x86_64")] 120 const IOAPIC_DEVICE_NAME: &str = "_ioapic"; 121 122 const SERIAL_DEVICE_NAME_PREFIX: &str = "_serial"; 123 #[cfg(target_arch = "aarch64")] 124 const GPIO_DEVICE_NAME_PREFIX: &str = "_gpio"; 125 126 const CONSOLE_DEVICE_NAME: &str = "_console"; 127 const DISK_DEVICE_NAME_PREFIX: &str = "_disk"; 128 const FS_DEVICE_NAME_PREFIX: &str = "_fs"; 129 const MEM_DEVICE_NAME_PREFIX: &str = "_mem"; 130 const BALLOON_DEVICE_NAME: &str = "_balloon"; 131 const NET_DEVICE_NAME_PREFIX: &str = "_net"; 132 const PMEM_DEVICE_NAME_PREFIX: &str = "_pmem"; 133 const RNG_DEVICE_NAME: &str = "_rng"; 134 const VSOCK_DEVICE_NAME_PREFIX: &str = "_vsock"; 135 const WATCHDOG_DEVICE_NAME: &str = "_watchdog"; 136 137 const IOMMU_DEVICE_NAME: &str = "_iommu"; 138 139 const VIRTIO_PCI_DEVICE_NAME_PREFIX: &str = "_virtio-pci"; 140 141 /// Errors associated with device manager 142 #[derive(Debug)] 143 pub enum DeviceManagerError { 144 /// Cannot create EventFd. 145 EventFd(io::Error), 146 147 /// Cannot open disk path 148 Disk(io::Error), 149 150 /// Cannot create vhost-user-net device 151 CreateVhostUserNet(virtio_devices::vhost_user::Error), 152 153 /// Cannot create virtio-blk device 154 CreateVirtioBlock(io::Error), 155 156 /// Cannot create virtio-net device 157 CreateVirtioNet(virtio_devices::net::Error), 158 159 /// Cannot create virtio-console device 160 CreateVirtioConsole(io::Error), 161 162 /// Cannot create virtio-rng device 163 CreateVirtioRng(io::Error), 164 165 /// Cannot create virtio-fs device 166 CreateVirtioFs(virtio_devices::vhost_user::Error), 167 168 /// Virtio-fs device was created without a socket. 169 NoVirtioFsSock, 170 171 /// Cannot create vhost-user-blk device 172 CreateVhostUserBlk(virtio_devices::vhost_user::Error), 173 174 /// Cannot create virtio-pmem device 175 CreateVirtioPmem(io::Error), 176 177 /// Cannot create virtio-vsock device 178 CreateVirtioVsock(io::Error), 179 180 /// Failed converting Path to &str for the virtio-vsock device. 181 CreateVsockConvertPath, 182 183 /// Cannot create virtio-vsock backend 184 CreateVsockBackend(virtio_devices::vsock::VsockUnixError), 185 186 /// Cannot create virtio-iommu device 187 CreateVirtioIommu(io::Error), 188 189 /// Cannot create virtio-balloon device 190 CreateVirtioBalloon(io::Error), 191 192 /// Cannot create virtio-watchdog device 193 CreateVirtioWatchdog(io::Error), 194 195 /// Failed parsing disk image format 196 DetectImageType(io::Error), 197 198 /// Cannot open qcow disk path 199 QcowDeviceCreate(qcow::Error), 200 201 /// Cannot open tap interface 202 OpenTap(net_util::TapError), 203 204 /// Cannot allocate IRQ. 205 AllocateIrq, 206 207 /// Cannot configure the IRQ. 208 Irq(vmm_sys_util::errno::Error), 209 210 /// Cannot allocate PCI BARs 211 AllocateBars(pci::PciDeviceError), 212 213 /// Could not free the BARs associated with a PCI device. 214 FreePciBars(pci::PciDeviceError), 215 216 /// Cannot register ioevent. 217 RegisterIoevent(anyhow::Error), 218 219 /// Cannot unregister ioevent. 220 UnRegisterIoevent(anyhow::Error), 221 222 /// Cannot create virtio device 223 VirtioDevice(vmm_sys_util::errno::Error), 224 225 /// Cannot add PCI device 226 AddPciDevice(pci::PciRootError), 227 228 /// Cannot open persistent memory file 229 PmemFileOpen(io::Error), 230 231 /// Cannot set persistent memory file size 232 PmemFileSetLen(io::Error), 233 234 /// Cannot find a memory range for persistent memory 235 PmemRangeAllocation, 236 237 /// Cannot find a memory range for virtio-fs 238 FsRangeAllocation, 239 240 /// Error creating serial output file 241 SerialOutputFileOpen(io::Error), 242 243 /// Error creating console output file 244 ConsoleOutputFileOpen(io::Error), 245 246 /// Error creating serial pty 247 SerialPtyOpen(io::Error), 248 249 /// Error creating console pty 250 ConsolePtyOpen(io::Error), 251 252 /// Error setting pty raw mode 253 SetPtyRaw(vmm_sys_util::errno::Error), 254 255 /// Error getting pty peer 256 GetPtyPeer(vmm_sys_util::errno::Error), 257 258 /// Cannot create a VFIO device 259 VfioCreate(vfio_ioctls::VfioError), 260 261 /// Cannot create a VFIO PCI device 262 VfioPciCreate(pci::VfioPciError), 263 264 /// Failed to map VFIO MMIO region. 265 VfioMapRegion(pci::VfioPciError), 266 267 /// Failed to DMA map VFIO device. 268 VfioDmaMap(vfio_ioctls::VfioError), 269 270 /// Failed to DMA unmap VFIO device. 271 VfioDmaUnmap(pci::VfioPciError), 272 273 /// Failed to create the passthrough device. 274 CreatePassthroughDevice(anyhow::Error), 275 276 /// Failed to memory map. 277 Mmap(io::Error), 278 279 /// Cannot add legacy device to Bus. 280 BusError(vm_device::BusError), 281 282 /// Failed to allocate IO port 283 AllocateIoPort, 284 285 /// Failed to allocate MMIO address 286 AllocateMmioAddress, 287 288 // Failed to make hotplug notification 289 HotPlugNotification(io::Error), 290 291 // Error from a memory manager operation 292 MemoryManager(MemoryManagerError), 293 294 /// Failed to create new interrupt source group. 295 CreateInterruptGroup(io::Error), 296 297 /// Failed to update interrupt source group. 298 UpdateInterruptGroup(io::Error), 299 300 /// Failed creating interrupt controller. 301 CreateInterruptController(interrupt_controller::Error), 302 303 /// Failed creating a new MmapRegion instance. 304 NewMmapRegion(vm_memory::mmap::MmapRegionError), 305 306 /// Failed cloning a File. 307 CloneFile(io::Error), 308 309 /// Failed to create socket file 310 CreateSocketFile(io::Error), 311 312 /// Failed to spawn the network backend 313 SpawnNetBackend(io::Error), 314 315 /// Failed to spawn the block backend 316 SpawnBlockBackend(io::Error), 317 318 /// Missing PCI bus. 319 NoPciBus, 320 321 /// Could not find an available device name. 322 NoAvailableDeviceName, 323 324 /// Missing PCI device. 325 MissingPciDevice, 326 327 /// Failed removing a PCI device from the PCI bus. 328 RemoveDeviceFromPciBus(pci::PciRootError), 329 330 /// Failed removing a bus device from the IO bus. 331 RemoveDeviceFromIoBus(vm_device::BusError), 332 333 /// Failed removing a bus device from the MMIO bus. 334 RemoveDeviceFromMmioBus(vm_device::BusError), 335 336 /// Failed to find the device corresponding to a specific PCI b/d/f. 337 UnknownPciBdf(u32), 338 339 /// Not allowed to remove this type of device from the VM. 340 RemovalNotAllowed(vm_virtio::VirtioDeviceType), 341 342 /// Failed to find device corresponding to the given identifier. 343 UnknownDeviceId(String), 344 345 /// Failed to find an available PCI device ID. 346 NextPciDeviceId(pci::PciRootError), 347 348 /// Could not reserve the PCI device ID. 349 GetPciDeviceId(pci::PciRootError), 350 351 /// Could not give the PCI device ID back. 352 PutPciDeviceId(pci::PciRootError), 353 354 /// Incorrect device ID as it is already used by another device. 355 DeviceIdAlreadyInUse, 356 357 /// No disk path was specified when one was expected 358 NoDiskPath, 359 360 /// Failed updating guest memory for virtio device. 361 UpdateMemoryForVirtioDevice(virtio_devices::Error), 362 363 /// Cannot create virtio-mem device 364 CreateVirtioMem(io::Error), 365 366 /// Cannot generate a ResizeSender from the Resize object. 367 CreateResizeSender(virtio_devices::mem::Error), 368 369 /// Cannot find a memory range for virtio-mem memory 370 VirtioMemRangeAllocation, 371 372 /// Failed updating guest memory for VFIO PCI device. 373 UpdateMemoryForVfioPciDevice(vfio_ioctls::VfioError), 374 375 /// Trying to use a directory for pmem but no size specified 376 PmemWithDirectorySizeMissing, 377 378 /// Trying to use a size that is not multiple of 2MiB 379 PmemSizeNotAligned, 380 381 /// Could not find the node in the device tree. 382 MissingNode, 383 384 /// Resource was already found. 385 ResourceAlreadyExists, 386 387 /// Expected resources for virtio-pci could not be found. 388 MissingVirtioPciResources, 389 390 /// Expected resources for virtio-pmem could not be found. 391 MissingVirtioPmemResources, 392 393 /// Missing PCI b/d/f from the DeviceNode. 394 MissingDeviceNodePciBdf, 395 396 /// No support for device passthrough 397 NoDevicePassthroughSupport, 398 399 /// Failed to resize virtio-balloon 400 VirtioBalloonResize(virtio_devices::balloon::Error), 401 402 /// Missing virtio-balloon, can't proceed as expected. 403 MissingVirtioBalloon, 404 405 /// Missing virtual IOMMU device 406 MissingVirtualIommu, 407 408 /// Failed to do power button notification 409 PowerButtonNotification(io::Error), 410 411 /// Failed to do AArch64 GPIO power button notification 412 #[cfg(target_arch = "aarch64")] 413 AArch64PowerButtonNotification(devices::legacy::GpioDeviceError), 414 415 /// Failed to set O_DIRECT flag to file descriptor 416 SetDirectIo, 417 418 /// Failed to create FixedVhdDiskAsync 419 CreateFixedVhdDiskAsync(io::Error), 420 421 /// Failed to create FixedVhdDiskSync 422 CreateFixedVhdDiskSync(io::Error), 423 424 /// Failed to create QcowDiskSync 425 CreateQcowDiskSync(qcow::Error), 426 427 /// Failed to create FixedVhdxDiskSync 428 CreateFixedVhdxDiskSync(vhdx::vhdx::VhdxError), 429 430 /// Failed adding DMA mapping handler to virtio-mem device. 431 AddDmaMappingHandlerVirtioMem(virtio_devices::mem::Error), 432 433 /// Failed removing DMA mapping handler from virtio-mem device. 434 RemoveDmaMappingHandlerVirtioMem(virtio_devices::mem::Error), 435 436 /// Failed to create VFIO user device 437 VfioUserCreate(VfioUserPciDeviceError), 438 439 /// Failed to map region from VFIO user device into guest 440 VfioUserMapRegion(VfioUserPciDeviceError), 441 442 /// Failed to DMA map VFIO user device. 443 VfioUserDmaMap(VfioUserPciDeviceError), 444 445 /// Failed to DMA unmap VFIO user device. 446 VfioUserDmaUnmap(VfioUserPciDeviceError), 447 448 /// Failed to update memory mappings for VFIO user device 449 UpdateMemoryForVfioUserPciDevice(VfioUserPciDeviceError), 450 } 451 pub type DeviceManagerResult<T> = result::Result<T, DeviceManagerError>; 452 453 type VirtioDeviceArc = Arc<Mutex<dyn virtio_devices::VirtioDevice>>; 454 455 #[cfg(feature = "acpi")] 456 const DEVICE_MANAGER_ACPI_SIZE: usize = 0x10; 457 458 const TIOCSPTLCK: libc::c_int = 0x4004_5431; 459 const TIOCGTPEER: libc::c_int = 0x5441; 460 461 pub fn create_pty(non_blocking: bool) -> io::Result<(File, File, PathBuf)> { 462 // Try to use /dev/pts/ptmx first then fall back to /dev/ptmx 463 // This is done to try and use the devpts filesystem that 464 // could be available for use in the process's namespace first. 465 // Ideally these are all the same file though but different 466 // kernels could have things setup differently. 467 // See https://www.kernel.org/doc/Documentation/filesystems/devpts.txt 468 // for further details. 469 470 let custom_flags = libc::O_NOCTTY | if non_blocking { libc::O_NONBLOCK } else { 0 }; 471 let main = match OpenOptions::new() 472 .read(true) 473 .write(true) 474 .custom_flags(custom_flags) 475 .open("/dev/pts/ptmx") 476 { 477 Ok(f) => f, 478 _ => OpenOptions::new() 479 .read(true) 480 .write(true) 481 .custom_flags(custom_flags) 482 .open("/dev/ptmx")?, 483 }; 484 let mut unlock: libc::c_ulong = 0; 485 unsafe { 486 libc::ioctl( 487 main.as_raw_fd(), 488 TIOCSPTLCK.try_into().unwrap(), 489 &mut unlock, 490 ) 491 }; 492 493 let sub_fd = unsafe { 494 libc::ioctl( 495 main.as_raw_fd(), 496 TIOCGTPEER.try_into().unwrap(), 497 libc::O_NOCTTY | libc::O_RDWR, 498 ) 499 }; 500 if sub_fd == -1 { 501 return vmm_sys_util::errno::errno_result().map_err(|e| e.into()); 502 } 503 504 let proc_path = PathBuf::from(format!("/proc/self/fd/{}", sub_fd)); 505 let path = read_link(proc_path)?; 506 507 Ok((main, unsafe { File::from_raw_fd(sub_fd) }, path)) 508 } 509 510 #[derive(Default)] 511 pub struct Console { 512 #[cfg(target_arch = "x86_64")] 513 // Serial port on 0x3f8 514 serial: Option<Arc<Mutex<Serial>>>, 515 #[cfg(target_arch = "aarch64")] 516 serial: Option<Arc<Mutex<Pl011>>>, 517 console_resizer: Option<Arc<virtio_devices::ConsoleResizer>>, 518 } 519 520 impl Console { 521 pub fn queue_input_bytes_serial(&self, out: &[u8]) -> vmm_sys_util::errno::Result<()> { 522 if self.serial.is_some() { 523 self.serial 524 .as_ref() 525 .unwrap() 526 .lock() 527 .unwrap() 528 .queue_input_bytes(out)?; 529 } 530 Ok(()) 531 } 532 533 pub fn update_console_size(&self) { 534 if let Some(resizer) = self.console_resizer.as_ref() { 535 resizer.update_console_size() 536 } 537 } 538 } 539 540 struct AddressManager { 541 allocator: Arc<Mutex<SystemAllocator>>, 542 #[cfg(target_arch = "x86_64")] 543 io_bus: Arc<Bus>, 544 mmio_bus: Arc<Bus>, 545 vm: Arc<dyn hypervisor::Vm>, 546 device_tree: Arc<Mutex<DeviceTree>>, 547 } 548 549 impl DeviceRelocation for AddressManager { 550 fn move_bar( 551 &self, 552 old_base: u64, 553 new_base: u64, 554 len: u64, 555 pci_dev: &mut dyn PciDevice, 556 region_type: PciBarRegionType, 557 ) -> std::result::Result<(), std::io::Error> { 558 match region_type { 559 PciBarRegionType::IoRegion => { 560 #[cfg(target_arch = "x86_64")] 561 { 562 // Update system allocator 563 self.allocator 564 .lock() 565 .unwrap() 566 .free_io_addresses(GuestAddress(old_base), len as GuestUsize); 567 568 self.allocator 569 .lock() 570 .unwrap() 571 .allocate_io_addresses( 572 Some(GuestAddress(new_base)), 573 len as GuestUsize, 574 None, 575 ) 576 .ok_or_else(|| { 577 io::Error::new(io::ErrorKind::Other, "failed allocating new IO range") 578 })?; 579 580 // Update PIO bus 581 self.io_bus 582 .update_range(old_base, len, new_base, len) 583 .map_err(|e| io::Error::new(io::ErrorKind::Other, e))?; 584 } 585 #[cfg(target_arch = "aarch64")] 586 error!("I/O region is not supported"); 587 } 588 PciBarRegionType::Memory32BitRegion | PciBarRegionType::Memory64BitRegion => { 589 // Update system allocator 590 if region_type == PciBarRegionType::Memory32BitRegion { 591 self.allocator 592 .lock() 593 .unwrap() 594 .free_mmio_hole_addresses(GuestAddress(old_base), len as GuestUsize); 595 596 self.allocator 597 .lock() 598 .unwrap() 599 .allocate_mmio_hole_addresses( 600 Some(GuestAddress(new_base)), 601 len as GuestUsize, 602 None, 603 ) 604 .ok_or_else(|| { 605 io::Error::new( 606 io::ErrorKind::Other, 607 "failed allocating new 32 bits MMIO range", 608 ) 609 })?; 610 } else { 611 self.allocator 612 .lock() 613 .unwrap() 614 .free_mmio_addresses(GuestAddress(old_base), len as GuestUsize); 615 616 self.allocator 617 .lock() 618 .unwrap() 619 .allocate_mmio_addresses( 620 Some(GuestAddress(new_base)), 621 len as GuestUsize, 622 None, 623 ) 624 .ok_or_else(|| { 625 io::Error::new( 626 io::ErrorKind::Other, 627 "failed allocating new 64 bits MMIO range", 628 ) 629 })?; 630 } 631 632 // Update MMIO bus 633 self.mmio_bus 634 .update_range(old_base, len, new_base, len) 635 .map_err(|e| io::Error::new(io::ErrorKind::Other, e))?; 636 } 637 } 638 639 let any_dev = pci_dev.as_any(); 640 if let Some(virtio_pci_dev) = any_dev.downcast_ref::<VirtioPciDevice>() { 641 // Update the device_tree resources associated with the device 642 if let Some(node) = self 643 .device_tree 644 .lock() 645 .unwrap() 646 .get_mut(&virtio_pci_dev.id()) 647 { 648 let mut resource_updated = false; 649 for resource in node.resources.iter_mut() { 650 if let Resource::MmioAddressRange { base, .. } = resource { 651 if *base == old_base { 652 *base = new_base; 653 resource_updated = true; 654 break; 655 } 656 } 657 } 658 659 if !resource_updated { 660 return Err(io::Error::new( 661 io::ErrorKind::Other, 662 format!( 663 "Couldn't find a resource with base 0x{:x} for device {}", 664 old_base, 665 virtio_pci_dev.id() 666 ), 667 )); 668 } 669 } else { 670 return Err(io::Error::new( 671 io::ErrorKind::Other, 672 format!( 673 "Couldn't find device {} from device tree", 674 virtio_pci_dev.id() 675 ), 676 )); 677 } 678 679 let bar_addr = virtio_pci_dev.config_bar_addr(); 680 if bar_addr == new_base { 681 for (event, addr) in virtio_pci_dev.ioeventfds(old_base) { 682 let io_addr = IoEventAddress::Mmio(addr); 683 self.vm.unregister_ioevent(event, &io_addr).map_err(|e| { 684 io::Error::new( 685 io::ErrorKind::Other, 686 format!("failed to unregister ioevent: {:?}", e), 687 ) 688 })?; 689 } 690 for (event, addr) in virtio_pci_dev.ioeventfds(new_base) { 691 let io_addr = IoEventAddress::Mmio(addr); 692 self.vm 693 .register_ioevent(event, &io_addr, None) 694 .map_err(|e| { 695 io::Error::new( 696 io::ErrorKind::Other, 697 format!("failed to register ioevent: {:?}", e), 698 ) 699 })?; 700 } 701 } else { 702 let virtio_dev = virtio_pci_dev.virtio_device(); 703 let mut virtio_dev = virtio_dev.lock().unwrap(); 704 if let Some(mut shm_regions) = virtio_dev.get_shm_regions() { 705 if shm_regions.addr.raw_value() == old_base { 706 let mem_region = self.vm.make_user_memory_region( 707 shm_regions.mem_slot, 708 old_base, 709 shm_regions.len, 710 shm_regions.host_addr, 711 false, 712 false, 713 ); 714 715 self.vm.remove_user_memory_region(mem_region).map_err(|e| { 716 io::Error::new( 717 io::ErrorKind::Other, 718 format!("failed to remove user memory region: {:?}", e), 719 ) 720 })?; 721 722 // Create new mapping by inserting new region to KVM. 723 let mem_region = self.vm.make_user_memory_region( 724 shm_regions.mem_slot, 725 new_base, 726 shm_regions.len, 727 shm_regions.host_addr, 728 false, 729 false, 730 ); 731 732 self.vm.create_user_memory_region(mem_region).map_err(|e| { 733 io::Error::new( 734 io::ErrorKind::Other, 735 format!("failed to create user memory regions: {:?}", e), 736 ) 737 })?; 738 739 // Update shared memory regions to reflect the new mapping. 740 shm_regions.addr = GuestAddress(new_base); 741 virtio_dev.set_shm_regions(shm_regions).map_err(|e| { 742 io::Error::new( 743 io::ErrorKind::Other, 744 format!("failed to update shared memory regions: {:?}", e), 745 ) 746 })?; 747 } 748 } 749 } 750 } 751 752 pci_dev.move_bar(old_base, new_base) 753 } 754 } 755 756 #[derive(Serialize, Deserialize)] 757 struct DeviceManagerState { 758 device_tree: DeviceTree, 759 device_id_cnt: Wrapping<usize>, 760 } 761 762 #[derive(Debug)] 763 pub struct PtyPair { 764 pub main: File, 765 pub sub: File, 766 pub path: PathBuf, 767 } 768 769 impl PtyPair { 770 fn clone(&self) -> Self { 771 PtyPair { 772 main: self.main.try_clone().unwrap(), 773 sub: self.sub.try_clone().unwrap(), 774 path: self.path.clone(), 775 } 776 } 777 } 778 779 #[derive(Clone)] 780 pub enum PciDeviceHandle { 781 Vfio(Arc<Mutex<VfioPciDevice>>), 782 Virtio(Arc<Mutex<VirtioPciDevice>>), 783 VfioUser(Arc<Mutex<VfioUserPciDevice>>), 784 } 785 786 pub struct DeviceManager { 787 // Manage address space related to devices 788 address_manager: Arc<AddressManager>, 789 790 // Console abstraction 791 console: Arc<Console>, 792 793 // console PTY 794 console_pty: Option<Arc<Mutex<PtyPair>>>, 795 796 // serial PTY 797 serial_pty: Option<Arc<Mutex<PtyPair>>>, 798 799 // pty foreground status, 800 console_resize_pipe: Option<Arc<File>>, 801 802 // Interrupt controller 803 #[cfg(target_arch = "x86_64")] 804 interrupt_controller: Option<Arc<Mutex<ioapic::Ioapic>>>, 805 #[cfg(target_arch = "aarch64")] 806 interrupt_controller: Option<Arc<Mutex<gic::Gic>>>, 807 808 // Things to be added to the commandline (i.e. for virtio-mmio) 809 cmdline_additions: Vec<String>, 810 811 // ACPI GED notification device 812 #[cfg(feature = "acpi")] 813 ged_notification_device: Option<Arc<Mutex<devices::AcpiGedDevice>>>, 814 815 // VM configuration 816 config: Arc<Mutex<VmConfig>>, 817 818 // Memory Manager 819 memory_manager: Arc<Mutex<MemoryManager>>, 820 821 // The virtio devices on the system 822 virtio_devices: Vec<(VirtioDeviceArc, bool, String)>, 823 824 // List of bus devices 825 // Let the DeviceManager keep strong references to the BusDevice devices. 826 // This allows the IO and MMIO buses to be provided with Weak references, 827 // which prevents cyclic dependencies. 828 bus_devices: Vec<Arc<Mutex<dyn BusDevice>>>, 829 830 // Counter to keep track of the consumed device IDs. 831 device_id_cnt: Wrapping<usize>, 832 833 // Keep a reference to the PCI bus 834 pci_bus: Option<Arc<Mutex<PciBus>>>, 835 836 #[cfg_attr(target_arch = "aarch64", allow(dead_code))] 837 // MSI Interrupt Manager 838 msi_interrupt_manager: Arc<dyn InterruptManager<GroupConfig = MsiIrqGroupConfig>>, 839 840 #[cfg_attr(feature = "mshv", allow(dead_code))] 841 // Legacy Interrupt Manager 842 legacy_interrupt_manager: Option<Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>>, 843 844 // Passthrough device handle 845 passthrough_device: Option<Arc<dyn hypervisor::Device>>, 846 847 // VFIO container 848 // Only one container can be created, therefore it is stored as part of the 849 // DeviceManager to be reused. 850 vfio_container: Option<Arc<VfioContainer>>, 851 852 // Paravirtualized IOMMU 853 iommu_device: Option<Arc<Mutex<virtio_devices::Iommu>>>, 854 855 // PCI information about devices attached to the paravirtualized IOMMU 856 // It contains the virtual IOMMU PCI BDF along with the list of PCI BDF 857 // representing the devices attached to the virtual IOMMU. This is useful 858 // information for filling the ACPI VIOT table. 859 iommu_attached_devices: Option<(u32, Vec<u32>)>, 860 861 // Bitmap of PCI devices to hotplug. 862 pci_devices_up: u32, 863 864 // Bitmap of PCI devices to hotunplug. 865 pci_devices_down: u32, 866 867 // List of allocated IRQs for each PCI slot. 868 pci_irq_slots: [u8; 32], 869 870 // Tree of devices, representing the dependencies between devices. 871 // Useful for introspection, snapshot and restore. 872 device_tree: Arc<Mutex<DeviceTree>>, 873 874 // Exit event 875 exit_evt: EventFd, 876 reset_evt: EventFd, 877 878 #[cfg(target_arch = "aarch64")] 879 id_to_dev_info: HashMap<(DeviceType, String), MmioDeviceInfo>, 880 881 // seccomp action 882 seccomp_action: SeccompAction, 883 884 // List of guest NUMA nodes. 885 #[cfg(any(target_arch = "aarch64", feature = "acpi"))] 886 numa_nodes: NumaNodes, 887 888 // Possible handle to the virtio-balloon device 889 balloon: Option<Arc<Mutex<virtio_devices::Balloon>>>, 890 891 // Virtio Device activation EventFd to allow the VMM thread to trigger device 892 // activation and thus start the threads from the VMM thread 893 activate_evt: EventFd, 894 895 #[cfg(feature = "acpi")] 896 acpi_address: GuestAddress, 897 898 // Possible handle to the virtio-balloon device 899 virtio_mem_devices: Vec<Arc<Mutex<virtio_devices::Mem>>>, 900 901 #[cfg(target_arch = "aarch64")] 902 // GPIO device for AArch64 903 gpio_device: Option<Arc<Mutex<devices::legacy::Gpio>>>, 904 905 // Flag to force setting the iommu on virtio devices 906 force_iommu: bool, 907 908 // Helps identify if the VM is currently being restored 909 restoring: bool, 910 } 911 912 impl DeviceManager { 913 #[allow(clippy::too_many_arguments)] 914 pub fn new( 915 vm: Arc<dyn hypervisor::Vm>, 916 config: Arc<Mutex<VmConfig>>, 917 memory_manager: Arc<Mutex<MemoryManager>>, 918 exit_evt: &EventFd, 919 reset_evt: &EventFd, 920 seccomp_action: SeccompAction, 921 #[cfg(any(target_arch = "aarch64", feature = "acpi"))] numa_nodes: NumaNodes, 922 activate_evt: &EventFd, 923 force_iommu: bool, 924 restoring: bool, 925 ) -> DeviceManagerResult<Arc<Mutex<Self>>> { 926 let device_tree = Arc::new(Mutex::new(DeviceTree::new())); 927 928 let address_manager = Arc::new(AddressManager { 929 allocator: memory_manager.lock().unwrap().allocator(), 930 #[cfg(target_arch = "x86_64")] 931 io_bus: Arc::new(Bus::new()), 932 mmio_bus: Arc::new(Bus::new()), 933 vm: vm.clone(), 934 device_tree: Arc::clone(&device_tree), 935 }); 936 937 // First we create the MSI interrupt manager, the legacy one is created 938 // later, after the IOAPIC device creation. 939 // The reason we create the MSI one first is because the IOAPIC needs it, 940 // and then the legacy interrupt manager needs an IOAPIC. So we're 941 // handling a linear dependency chain: 942 // msi_interrupt_manager <- IOAPIC <- legacy_interrupt_manager. 943 let msi_interrupt_manager: Arc<dyn InterruptManager<GroupConfig = MsiIrqGroupConfig>> = 944 Arc::new(MsiInterruptManager::new( 945 Arc::clone(&address_manager.allocator), 946 vm, 947 )); 948 949 #[cfg(feature = "acpi")] 950 let acpi_address = address_manager 951 .allocator 952 .lock() 953 .unwrap() 954 .allocate_mmio_addresses(None, DEVICE_MANAGER_ACPI_SIZE as u64, None) 955 .ok_or(DeviceManagerError::AllocateIoPort)?; 956 let device_manager = DeviceManager { 957 address_manager: Arc::clone(&address_manager), 958 console: Arc::new(Console::default()), 959 interrupt_controller: None, 960 cmdline_additions: Vec::new(), 961 #[cfg(feature = "acpi")] 962 ged_notification_device: None, 963 config, 964 memory_manager, 965 virtio_devices: Vec::new(), 966 bus_devices: Vec::new(), 967 device_id_cnt: Wrapping(0), 968 pci_bus: None, 969 msi_interrupt_manager, 970 legacy_interrupt_manager: None, 971 passthrough_device: None, 972 vfio_container: None, 973 iommu_device: None, 974 iommu_attached_devices: None, 975 pci_devices_up: 0, 976 pci_devices_down: 0, 977 pci_irq_slots: [0; 32], 978 device_tree, 979 exit_evt: exit_evt.try_clone().map_err(DeviceManagerError::EventFd)?, 980 reset_evt: reset_evt.try_clone().map_err(DeviceManagerError::EventFd)?, 981 #[cfg(target_arch = "aarch64")] 982 id_to_dev_info: HashMap::new(), 983 seccomp_action, 984 #[cfg(any(target_arch = "aarch64", feature = "acpi"))] 985 numa_nodes, 986 balloon: None, 987 activate_evt: activate_evt 988 .try_clone() 989 .map_err(DeviceManagerError::EventFd)?, 990 #[cfg(feature = "acpi")] 991 acpi_address, 992 serial_pty: None, 993 console_pty: None, 994 console_resize_pipe: None, 995 virtio_mem_devices: Vec::new(), 996 #[cfg(target_arch = "aarch64")] 997 gpio_device: None, 998 force_iommu, 999 restoring, 1000 }; 1001 1002 let device_manager = Arc::new(Mutex::new(device_manager)); 1003 1004 #[cfg(feature = "acpi")] 1005 address_manager 1006 .mmio_bus 1007 .insert( 1008 Arc::clone(&device_manager) as Arc<Mutex<dyn BusDevice>>, 1009 acpi_address.0, 1010 DEVICE_MANAGER_ACPI_SIZE as u64, 1011 ) 1012 .map_err(DeviceManagerError::BusError)?; 1013 1014 Ok(device_manager) 1015 } 1016 1017 pub fn serial_pty(&self) -> Option<PtyPair> { 1018 self.serial_pty 1019 .as_ref() 1020 .map(|pty| pty.lock().unwrap().clone()) 1021 } 1022 1023 pub fn console_pty(&self) -> Option<PtyPair> { 1024 self.console_pty 1025 .as_ref() 1026 .map(|pty| pty.lock().unwrap().clone()) 1027 } 1028 1029 pub fn console_resize_pipe(&self) -> Option<Arc<File>> { 1030 self.console_resize_pipe.as_ref().map(Arc::clone) 1031 } 1032 1033 pub fn create_devices( 1034 &mut self, 1035 serial_pty: Option<PtyPair>, 1036 console_pty: Option<PtyPair>, 1037 console_resize_pipe: Option<File>, 1038 ) -> DeviceManagerResult<()> { 1039 let mut virtio_devices: Vec<(VirtioDeviceArc, bool, String)> = Vec::new(); 1040 1041 let interrupt_controller = self.add_interrupt_controller()?; 1042 1043 // Now we can create the legacy interrupt manager, which needs the freshly 1044 // formed IOAPIC device. 1045 let legacy_interrupt_manager: Arc< 1046 dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>, 1047 > = Arc::new(LegacyUserspaceInterruptManager::new(Arc::clone( 1048 &interrupt_controller, 1049 ))); 1050 1051 #[cfg(feature = "acpi")] 1052 { 1053 let memory_manager_acpi_address = self.memory_manager.lock().unwrap().acpi_address; 1054 self.address_manager 1055 .mmio_bus 1056 .insert( 1057 Arc::clone(&self.memory_manager) as Arc<Mutex<dyn BusDevice>>, 1058 memory_manager_acpi_address.0, 1059 MEMORY_MANAGER_ACPI_SIZE as u64, 1060 ) 1061 .map_err(DeviceManagerError::BusError)?; 1062 } 1063 1064 #[cfg(target_arch = "x86_64")] 1065 self.add_legacy_devices( 1066 self.reset_evt 1067 .try_clone() 1068 .map_err(DeviceManagerError::EventFd)?, 1069 )?; 1070 1071 #[cfg(target_arch = "aarch64")] 1072 self.add_legacy_devices(&legacy_interrupt_manager)?; 1073 1074 #[cfg(feature = "acpi")] 1075 { 1076 self.ged_notification_device = self.add_acpi_devices( 1077 &legacy_interrupt_manager, 1078 self.reset_evt 1079 .try_clone() 1080 .map_err(DeviceManagerError::EventFd)?, 1081 self.exit_evt 1082 .try_clone() 1083 .map_err(DeviceManagerError::EventFd)?, 1084 )?; 1085 } 1086 1087 self.console = self.add_console_device( 1088 &legacy_interrupt_manager, 1089 &mut virtio_devices, 1090 serial_pty, 1091 console_pty, 1092 console_resize_pipe, 1093 )?; 1094 1095 // Reserve some IRQs for PCI devices in case they need to support INTx. 1096 self.reserve_legacy_interrupts_for_pci_devices()?; 1097 1098 self.legacy_interrupt_manager = Some(legacy_interrupt_manager); 1099 1100 virtio_devices.append(&mut self.make_virtio_devices()?); 1101 1102 self.add_pci_devices(virtio_devices.clone())?; 1103 1104 self.virtio_devices = virtio_devices; 1105 1106 Ok(()) 1107 } 1108 1109 fn reserve_legacy_interrupts_for_pci_devices(&mut self) -> DeviceManagerResult<()> { 1110 // Reserve 8 IRQs which will be shared across all PCI devices. 1111 let num_irqs = 8; 1112 let mut irqs: Vec<u8> = Vec::new(); 1113 for _ in 0..num_irqs { 1114 irqs.push( 1115 self.address_manager 1116 .allocator 1117 .lock() 1118 .unwrap() 1119 .allocate_irq() 1120 .ok_or(DeviceManagerError::AllocateIrq)? as u8, 1121 ); 1122 } 1123 1124 // There are 32 devices on the PCI bus, let's assign them an IRQ. 1125 for i in 0..32 { 1126 self.pci_irq_slots[i] = irqs[(i % num_irqs) as usize]; 1127 } 1128 1129 Ok(()) 1130 } 1131 1132 fn state(&self) -> DeviceManagerState { 1133 DeviceManagerState { 1134 device_tree: self.device_tree.lock().unwrap().clone(), 1135 device_id_cnt: self.device_id_cnt, 1136 } 1137 } 1138 1139 fn set_state(&mut self, state: &DeviceManagerState) { 1140 self.device_tree = Arc::new(Mutex::new(state.device_tree.clone())); 1141 self.device_id_cnt = state.device_id_cnt; 1142 } 1143 1144 #[cfg(target_arch = "aarch64")] 1145 /// Gets the information of the devices registered up to some point in time. 1146 pub fn get_device_info(&self) -> &HashMap<(DeviceType, String), MmioDeviceInfo> { 1147 &self.id_to_dev_info 1148 } 1149 1150 #[allow(unused_variables)] 1151 fn add_pci_devices( 1152 &mut self, 1153 virtio_devices: Vec<(VirtioDeviceArc, bool, String)>, 1154 ) -> DeviceManagerResult<()> { 1155 let pci_root = PciRoot::new(None); 1156 let mut pci_bus = PciBus::new( 1157 pci_root, 1158 Arc::clone(&self.address_manager) as Arc<dyn DeviceRelocation>, 1159 ); 1160 1161 let iommu_id = String::from(IOMMU_DEVICE_NAME); 1162 1163 let (iommu_device, iommu_mapping) = if self.config.lock().unwrap().iommu { 1164 let (device, mapping) = virtio_devices::Iommu::new( 1165 iommu_id.clone(), 1166 self.seccomp_action.clone(), 1167 self.exit_evt 1168 .try_clone() 1169 .map_err(DeviceManagerError::EventFd)?, 1170 ) 1171 .map_err(DeviceManagerError::CreateVirtioIommu)?; 1172 let device = Arc::new(Mutex::new(device)); 1173 self.iommu_device = Some(Arc::clone(&device)); 1174 1175 // Fill the device tree with a new node. In case of restore, we 1176 // know there is nothing to do, so we can simply override the 1177 // existing entry. 1178 self.device_tree 1179 .lock() 1180 .unwrap() 1181 .insert(iommu_id.clone(), device_node!(iommu_id, device)); 1182 1183 (Some(device), Some(mapping)) 1184 } else { 1185 (None, None) 1186 }; 1187 1188 let mut iommu_attached_devices = Vec::new(); 1189 1190 for (device, iommu_attached, id) in virtio_devices { 1191 let mapping: &Option<Arc<IommuMapping>> = if iommu_attached { 1192 &iommu_mapping 1193 } else { 1194 &None 1195 }; 1196 1197 let dev_id = self.add_virtio_pci_device(device, &mut pci_bus, mapping, id)?; 1198 1199 if iommu_attached { 1200 iommu_attached_devices.push(dev_id); 1201 } 1202 } 1203 1204 let mut vfio_iommu_device_ids = self.add_vfio_devices(&mut pci_bus)?; 1205 iommu_attached_devices.append(&mut vfio_iommu_device_ids); 1206 1207 let mut vfio_user_iommu_device_ids = self.add_user_devices(&mut pci_bus)?; 1208 iommu_attached_devices.append(&mut vfio_user_iommu_device_ids); 1209 1210 if let Some(iommu_device) = iommu_device { 1211 let dev_id = self.add_virtio_pci_device(iommu_device, &mut pci_bus, &None, iommu_id)?; 1212 self.iommu_attached_devices = Some((dev_id, iommu_attached_devices)); 1213 } 1214 1215 let pci_bus = Arc::new(Mutex::new(pci_bus)); 1216 let pci_config_io = Arc::new(Mutex::new(PciConfigIo::new(Arc::clone(&pci_bus)))); 1217 self.bus_devices 1218 .push(Arc::clone(&pci_config_io) as Arc<Mutex<dyn BusDevice>>); 1219 #[cfg(target_arch = "x86_64")] 1220 self.address_manager 1221 .io_bus 1222 .insert(pci_config_io, 0xcf8, 0x8) 1223 .map_err(DeviceManagerError::BusError)?; 1224 let pci_config_mmio = Arc::new(Mutex::new(PciConfigMmio::new(Arc::clone(&pci_bus)))); 1225 self.bus_devices 1226 .push(Arc::clone(&pci_config_mmio) as Arc<Mutex<dyn BusDevice>>); 1227 self.address_manager 1228 .mmio_bus 1229 .insert( 1230 pci_config_mmio, 1231 arch::layout::PCI_MMCONFIG_START.0, 1232 arch::layout::PCI_MMCONFIG_SIZE, 1233 ) 1234 .map_err(DeviceManagerError::BusError)?; 1235 1236 self.pci_bus = Some(pci_bus); 1237 1238 Ok(()) 1239 } 1240 1241 #[cfg(target_arch = "aarch64")] 1242 fn add_interrupt_controller( 1243 &mut self, 1244 ) -> DeviceManagerResult<Arc<Mutex<dyn InterruptController>>> { 1245 let interrupt_controller: Arc<Mutex<gic::Gic>> = Arc::new(Mutex::new( 1246 gic::Gic::new( 1247 self.config.lock().unwrap().cpus.boot_vcpus, 1248 Arc::clone(&self.msi_interrupt_manager), 1249 ) 1250 .map_err(DeviceManagerError::CreateInterruptController)?, 1251 )); 1252 1253 self.interrupt_controller = Some(interrupt_controller.clone()); 1254 1255 // Unlike x86_64, the "interrupt_controller" here for AArch64 is only 1256 // a `Gic` object that implements the `InterruptController` to provide 1257 // interrupt delivery service. This is not the real GIC device so that 1258 // we do not need to insert it to the device tree. 1259 1260 Ok(interrupt_controller) 1261 } 1262 1263 #[cfg(target_arch = "aarch64")] 1264 pub fn get_interrupt_controller(&mut self) -> Option<&Arc<Mutex<gic::Gic>>> { 1265 self.interrupt_controller.as_ref() 1266 } 1267 1268 #[cfg(target_arch = "x86_64")] 1269 fn add_interrupt_controller( 1270 &mut self, 1271 ) -> DeviceManagerResult<Arc<Mutex<dyn InterruptController>>> { 1272 let id = String::from(IOAPIC_DEVICE_NAME); 1273 1274 // Create IOAPIC 1275 let interrupt_controller = Arc::new(Mutex::new( 1276 ioapic::Ioapic::new( 1277 id.clone(), 1278 APIC_START, 1279 Arc::clone(&self.msi_interrupt_manager), 1280 ) 1281 .map_err(DeviceManagerError::CreateInterruptController)?, 1282 )); 1283 1284 self.interrupt_controller = Some(interrupt_controller.clone()); 1285 1286 self.address_manager 1287 .mmio_bus 1288 .insert(interrupt_controller.clone(), IOAPIC_START.0, IOAPIC_SIZE) 1289 .map_err(DeviceManagerError::BusError)?; 1290 1291 self.bus_devices 1292 .push(Arc::clone(&interrupt_controller) as Arc<Mutex<dyn BusDevice>>); 1293 1294 // Fill the device tree with a new node. In case of restore, we 1295 // know there is nothing to do, so we can simply override the 1296 // existing entry. 1297 self.device_tree 1298 .lock() 1299 .unwrap() 1300 .insert(id.clone(), device_node!(id, interrupt_controller)); 1301 1302 Ok(interrupt_controller) 1303 } 1304 1305 #[cfg(feature = "acpi")] 1306 fn add_acpi_devices( 1307 &mut self, 1308 interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>, 1309 reset_evt: EventFd, 1310 exit_evt: EventFd, 1311 ) -> DeviceManagerResult<Option<Arc<Mutex<devices::AcpiGedDevice>>>> { 1312 let shutdown_device = Arc::new(Mutex::new(devices::AcpiShutdownDevice::new( 1313 exit_evt, reset_evt, 1314 ))); 1315 1316 self.bus_devices 1317 .push(Arc::clone(&shutdown_device) as Arc<Mutex<dyn BusDevice>>); 1318 1319 #[cfg(target_arch = "x86_64")] 1320 { 1321 self.address_manager 1322 .allocator 1323 .lock() 1324 .unwrap() 1325 .allocate_io_addresses(Some(GuestAddress(0x3c0)), 0x8, None) 1326 .ok_or(DeviceManagerError::AllocateIoPort)?; 1327 1328 self.address_manager 1329 .io_bus 1330 .insert(shutdown_device, 0x3c0, 0x4) 1331 .map_err(DeviceManagerError::BusError)?; 1332 } 1333 1334 let ged_irq = self 1335 .address_manager 1336 .allocator 1337 .lock() 1338 .unwrap() 1339 .allocate_irq() 1340 .unwrap(); 1341 let interrupt_group = interrupt_manager 1342 .create_group(LegacyIrqGroupConfig { 1343 irq: ged_irq as InterruptIndex, 1344 }) 1345 .map_err(DeviceManagerError::CreateInterruptGroup)?; 1346 let ged_address = self 1347 .address_manager 1348 .allocator 1349 .lock() 1350 .unwrap() 1351 .allocate_mmio_addresses(None, devices::acpi::GED_DEVICE_ACPI_SIZE as u64, None) 1352 .ok_or(DeviceManagerError::AllocateMmioAddress)?; 1353 let ged_device = Arc::new(Mutex::new(devices::AcpiGedDevice::new( 1354 interrupt_group, 1355 ged_irq, 1356 ged_address, 1357 ))); 1358 self.address_manager 1359 .mmio_bus 1360 .insert( 1361 ged_device.clone(), 1362 ged_address.0, 1363 devices::acpi::GED_DEVICE_ACPI_SIZE as u64, 1364 ) 1365 .map_err(DeviceManagerError::BusError)?; 1366 self.bus_devices 1367 .push(Arc::clone(&ged_device) as Arc<Mutex<dyn BusDevice>>); 1368 1369 let pm_timer_device = Arc::new(Mutex::new(devices::AcpiPmTimerDevice::new())); 1370 1371 self.bus_devices 1372 .push(Arc::clone(&pm_timer_device) as Arc<Mutex<dyn BusDevice>>); 1373 1374 #[cfg(target_arch = "x86_64")] 1375 { 1376 self.address_manager 1377 .allocator 1378 .lock() 1379 .unwrap() 1380 .allocate_io_addresses(Some(GuestAddress(0xb008)), 0x4, None) 1381 .ok_or(DeviceManagerError::AllocateIoPort)?; 1382 1383 self.address_manager 1384 .io_bus 1385 .insert(pm_timer_device, 0xb008, 0x4) 1386 .map_err(DeviceManagerError::BusError)?; 1387 } 1388 1389 Ok(Some(ged_device)) 1390 } 1391 1392 #[cfg(target_arch = "x86_64")] 1393 fn add_legacy_devices(&mut self, reset_evt: EventFd) -> DeviceManagerResult<()> { 1394 // Add a shutdown device (i8042) 1395 let i8042 = Arc::new(Mutex::new(devices::legacy::I8042Device::new(reset_evt))); 1396 1397 self.bus_devices 1398 .push(Arc::clone(&i8042) as Arc<Mutex<dyn BusDevice>>); 1399 1400 self.address_manager 1401 .io_bus 1402 .insert(i8042, 0x61, 0x4) 1403 .map_err(DeviceManagerError::BusError)?; 1404 #[cfg(feature = "cmos")] 1405 { 1406 // Add a CMOS emulated device 1407 let mem_size = self 1408 .memory_manager 1409 .lock() 1410 .unwrap() 1411 .guest_memory() 1412 .memory() 1413 .last_addr() 1414 .0 1415 + 1; 1416 let mem_below_4g = std::cmp::min(arch::layout::MEM_32BIT_RESERVED_START.0, mem_size); 1417 let mem_above_4g = mem_size.saturating_sub(arch::layout::RAM_64BIT_START.0); 1418 1419 let cmos = Arc::new(Mutex::new(devices::legacy::Cmos::new( 1420 mem_below_4g, 1421 mem_above_4g, 1422 ))); 1423 1424 self.bus_devices 1425 .push(Arc::clone(&cmos) as Arc<Mutex<dyn BusDevice>>); 1426 1427 self.address_manager 1428 .io_bus 1429 .insert(cmos, 0x70, 0x2) 1430 .map_err(DeviceManagerError::BusError)?; 1431 } 1432 #[cfg(feature = "fwdebug")] 1433 { 1434 let fwdebug = Arc::new(Mutex::new(devices::legacy::FwDebugDevice::new())); 1435 1436 self.bus_devices 1437 .push(Arc::clone(&fwdebug) as Arc<Mutex<dyn BusDevice>>); 1438 1439 self.address_manager 1440 .io_bus 1441 .insert(fwdebug, 0x402, 0x1) 1442 .map_err(DeviceManagerError::BusError)?; 1443 } 1444 1445 Ok(()) 1446 } 1447 1448 #[cfg(target_arch = "aarch64")] 1449 fn add_legacy_devices( 1450 &mut self, 1451 interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>, 1452 ) -> DeviceManagerResult<()> { 1453 // Add a RTC device 1454 let rtc_irq = self 1455 .address_manager 1456 .allocator 1457 .lock() 1458 .unwrap() 1459 .allocate_irq() 1460 .unwrap(); 1461 1462 let interrupt_group = interrupt_manager 1463 .create_group(LegacyIrqGroupConfig { 1464 irq: rtc_irq as InterruptIndex, 1465 }) 1466 .map_err(DeviceManagerError::CreateInterruptGroup)?; 1467 1468 let rtc_device = Arc::new(Mutex::new(devices::legacy::Rtc::new(interrupt_group))); 1469 1470 self.bus_devices 1471 .push(Arc::clone(&rtc_device) as Arc<Mutex<dyn BusDevice>>); 1472 1473 let addr = GuestAddress(arch::layout::LEGACY_RTC_MAPPED_IO_START); 1474 1475 self.address_manager 1476 .mmio_bus 1477 .insert(rtc_device, addr.0, MMIO_LEN) 1478 .map_err(DeviceManagerError::BusError)?; 1479 1480 self.id_to_dev_info.insert( 1481 (DeviceType::Rtc, "rtc".to_string()), 1482 MmioDeviceInfo { 1483 addr: addr.0, 1484 irq: rtc_irq, 1485 }, 1486 ); 1487 1488 // Add a GPIO device 1489 let id = String::from(GPIO_DEVICE_NAME_PREFIX); 1490 let gpio_irq = self 1491 .address_manager 1492 .allocator 1493 .lock() 1494 .unwrap() 1495 .allocate_irq() 1496 .unwrap(); 1497 1498 let interrupt_group = interrupt_manager 1499 .create_group(LegacyIrqGroupConfig { 1500 irq: gpio_irq as InterruptIndex, 1501 }) 1502 .map_err(DeviceManagerError::CreateInterruptGroup)?; 1503 1504 let gpio_device = Arc::new(Mutex::new(devices::legacy::Gpio::new( 1505 id.clone(), 1506 interrupt_group, 1507 ))); 1508 1509 self.bus_devices 1510 .push(Arc::clone(&gpio_device) as Arc<Mutex<dyn BusDevice>>); 1511 1512 let addr = GuestAddress(arch::layout::LEGACY_GPIO_MAPPED_IO_START); 1513 1514 self.address_manager 1515 .mmio_bus 1516 .insert(gpio_device.clone(), addr.0, MMIO_LEN) 1517 .map_err(DeviceManagerError::BusError)?; 1518 1519 self.gpio_device = Some(gpio_device.clone()); 1520 1521 self.id_to_dev_info.insert( 1522 (DeviceType::Gpio, "gpio".to_string()), 1523 MmioDeviceInfo { 1524 addr: addr.0, 1525 irq: gpio_irq, 1526 }, 1527 ); 1528 1529 self.device_tree 1530 .lock() 1531 .unwrap() 1532 .insert(id.clone(), device_node!(id, gpio_device)); 1533 1534 Ok(()) 1535 } 1536 1537 #[cfg(target_arch = "x86_64")] 1538 fn add_serial_device( 1539 &mut self, 1540 interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>, 1541 serial_writer: Option<Box<dyn io::Write + Send>>, 1542 ) -> DeviceManagerResult<Arc<Mutex<Serial>>> { 1543 // Serial is tied to IRQ #4 1544 let serial_irq = 4; 1545 1546 let id = String::from(SERIAL_DEVICE_NAME_PREFIX); 1547 1548 let interrupt_group = interrupt_manager 1549 .create_group(LegacyIrqGroupConfig { 1550 irq: serial_irq as InterruptIndex, 1551 }) 1552 .map_err(DeviceManagerError::CreateInterruptGroup)?; 1553 1554 let serial = Arc::new(Mutex::new(Serial::new( 1555 id.clone(), 1556 interrupt_group, 1557 serial_writer, 1558 ))); 1559 1560 self.bus_devices 1561 .push(Arc::clone(&serial) as Arc<Mutex<dyn BusDevice>>); 1562 1563 self.address_manager 1564 .allocator 1565 .lock() 1566 .unwrap() 1567 .allocate_io_addresses(Some(GuestAddress(0x3f8)), 0x8, None) 1568 .ok_or(DeviceManagerError::AllocateIoPort)?; 1569 1570 self.address_manager 1571 .io_bus 1572 .insert(serial.clone(), 0x3f8, 0x8) 1573 .map_err(DeviceManagerError::BusError)?; 1574 1575 // Fill the device tree with a new node. In case of restore, we 1576 // know there is nothing to do, so we can simply override the 1577 // existing entry. 1578 self.device_tree 1579 .lock() 1580 .unwrap() 1581 .insert(id.clone(), device_node!(id, serial)); 1582 1583 Ok(serial) 1584 } 1585 1586 #[cfg(target_arch = "aarch64")] 1587 fn add_serial_device( 1588 &mut self, 1589 interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>, 1590 serial_writer: Option<Box<dyn io::Write + Send>>, 1591 ) -> DeviceManagerResult<Arc<Mutex<Pl011>>> { 1592 let id = String::from(SERIAL_DEVICE_NAME_PREFIX); 1593 1594 let serial_irq = self 1595 .address_manager 1596 .allocator 1597 .lock() 1598 .unwrap() 1599 .allocate_irq() 1600 .unwrap(); 1601 1602 let interrupt_group = interrupt_manager 1603 .create_group(LegacyIrqGroupConfig { 1604 irq: serial_irq as InterruptIndex, 1605 }) 1606 .map_err(DeviceManagerError::CreateInterruptGroup)?; 1607 1608 let serial = Arc::new(Mutex::new(devices::legacy::Pl011::new( 1609 id.clone(), 1610 interrupt_group, 1611 serial_writer, 1612 ))); 1613 1614 self.bus_devices 1615 .push(Arc::clone(&serial) as Arc<Mutex<dyn BusDevice>>); 1616 1617 let addr = GuestAddress(arch::layout::LEGACY_SERIAL_MAPPED_IO_START); 1618 1619 self.address_manager 1620 .mmio_bus 1621 .insert(serial.clone(), addr.0, MMIO_LEN) 1622 .map_err(DeviceManagerError::BusError)?; 1623 1624 self.id_to_dev_info.insert( 1625 (DeviceType::Serial, DeviceType::Serial.to_string()), 1626 MmioDeviceInfo { 1627 addr: addr.0, 1628 irq: serial_irq, 1629 }, 1630 ); 1631 1632 self.cmdline_additions 1633 .push(format!("earlycon=pl011,mmio,0x{:08x}", addr.0)); 1634 1635 // Fill the device tree with a new node. In case of restore, we 1636 // know there is nothing to do, so we can simply override the 1637 // existing entry. 1638 self.device_tree 1639 .lock() 1640 .unwrap() 1641 .insert(id.clone(), device_node!(id, serial)); 1642 1643 Ok(serial) 1644 } 1645 1646 fn modify_mode<F: FnOnce(&mut termios)>( 1647 &self, 1648 fd: RawFd, 1649 f: F, 1650 ) -> vmm_sys_util::errno::Result<()> { 1651 // Safe because we check the return value of isatty. 1652 if unsafe { isatty(fd) } != 1 { 1653 return Ok(()); 1654 } 1655 1656 // The following pair are safe because termios gets totally overwritten by tcgetattr and we 1657 // check the return result. 1658 let mut termios: termios = unsafe { zeroed() }; 1659 let ret = unsafe { tcgetattr(fd, &mut termios as *mut _) }; 1660 if ret < 0 { 1661 return vmm_sys_util::errno::errno_result(); 1662 } 1663 f(&mut termios); 1664 // Safe because the syscall will only read the extent of termios and we check the return result. 1665 let ret = unsafe { tcsetattr(fd, TCSANOW, &termios as *const _) }; 1666 if ret < 0 { 1667 return vmm_sys_util::errno::errno_result(); 1668 } 1669 1670 Ok(()) 1671 } 1672 1673 fn set_raw_mode(&self, f: &mut File) -> vmm_sys_util::errno::Result<()> { 1674 self.modify_mode(f.as_raw_fd(), |t| t.c_lflag &= !(ICANON | ECHO | ISIG)) 1675 } 1676 1677 fn listen_for_sigwinch_on_tty(&mut self, pty: &File) -> std::io::Result<()> { 1678 let seccomp_filter = 1679 get_seccomp_filter(&self.seccomp_action, Thread::PtyForeground).unwrap(); 1680 1681 let pipe = start_sigwinch_listener(seccomp_filter, pty)?; 1682 1683 self.console_resize_pipe = Some(Arc::new(pipe)); 1684 1685 Ok(()) 1686 } 1687 1688 fn add_virtio_console_device( 1689 &mut self, 1690 virtio_devices: &mut Vec<(VirtioDeviceArc, bool, String)>, 1691 console_pty: Option<PtyPair>, 1692 resize_pipe: Option<File>, 1693 ) -> DeviceManagerResult<Option<Arc<virtio_devices::ConsoleResizer>>> { 1694 let console_config = self.config.lock().unwrap().console.clone(); 1695 let endpoint = match console_config.mode { 1696 ConsoleOutputMode::File => { 1697 let file = File::create(console_config.file.as_ref().unwrap()) 1698 .map_err(DeviceManagerError::ConsoleOutputFileOpen)?; 1699 Endpoint::File(file) 1700 } 1701 ConsoleOutputMode::Pty => { 1702 if let Some(pty) = console_pty { 1703 self.config.lock().unwrap().console.file = Some(pty.path.clone()); 1704 let file = pty.main.try_clone().unwrap(); 1705 self.console_pty = Some(Arc::new(Mutex::new(pty))); 1706 self.console_resize_pipe = Some(Arc::new(resize_pipe.unwrap())); 1707 Endpoint::FilePair(file.try_clone().unwrap(), file) 1708 } else { 1709 let (main, mut sub, path) = 1710 create_pty(false).map_err(DeviceManagerError::ConsolePtyOpen)?; 1711 self.set_raw_mode(&mut sub) 1712 .map_err(DeviceManagerError::SetPtyRaw)?; 1713 self.config.lock().unwrap().console.file = Some(path.clone()); 1714 let file = main.try_clone().unwrap(); 1715 assert!(resize_pipe.is_none()); 1716 self.listen_for_sigwinch_on_tty(&sub).unwrap(); 1717 self.console_pty = Some(Arc::new(Mutex::new(PtyPair { main, sub, path }))); 1718 Endpoint::FilePair(file.try_clone().unwrap(), file) 1719 } 1720 } 1721 ConsoleOutputMode::Tty => { 1722 // If an interactive TTY then we can accept input 1723 if unsafe { libc::isatty(libc::STDIN_FILENO) == 1 } { 1724 Endpoint::FilePair( 1725 // Duplicating the file descriptors like this is needed as otherwise 1726 // they will be closed on a reboot and the numbers reused 1727 unsafe { File::from_raw_fd(libc::dup(libc::STDOUT_FILENO)) }, 1728 unsafe { File::from_raw_fd(libc::dup(libc::STDIN_FILENO)) }, 1729 ) 1730 } else { 1731 Endpoint::File(unsafe { File::from_raw_fd(libc::dup(libc::STDOUT_FILENO)) }) 1732 } 1733 } 1734 ConsoleOutputMode::Null => Endpoint::Null, 1735 ConsoleOutputMode::Off => return Ok(None), 1736 }; 1737 let id = String::from(CONSOLE_DEVICE_NAME); 1738 1739 let (virtio_console_device, console_resizer) = virtio_devices::Console::new( 1740 id.clone(), 1741 endpoint, 1742 self.console_resize_pipe 1743 .as_ref() 1744 .map(|p| p.try_clone().unwrap()), 1745 self.force_iommu | console_config.iommu, 1746 self.seccomp_action.clone(), 1747 self.exit_evt 1748 .try_clone() 1749 .map_err(DeviceManagerError::EventFd)?, 1750 ) 1751 .map_err(DeviceManagerError::CreateVirtioConsole)?; 1752 let virtio_console_device = Arc::new(Mutex::new(virtio_console_device)); 1753 virtio_devices.push(( 1754 Arc::clone(&virtio_console_device) as VirtioDeviceArc, 1755 console_config.iommu, 1756 id.clone(), 1757 )); 1758 1759 // Fill the device tree with a new node. In case of restore, we 1760 // know there is nothing to do, so we can simply override the 1761 // existing entry. 1762 self.device_tree 1763 .lock() 1764 .unwrap() 1765 .insert(id.clone(), device_node!(id, virtio_console_device)); 1766 1767 // Only provide a resizer (for SIGWINCH handling) if the console is attached to the TTY 1768 Ok(if matches!(console_config.mode, ConsoleOutputMode::Tty) { 1769 Some(console_resizer) 1770 } else { 1771 None 1772 }) 1773 } 1774 1775 fn add_console_device( 1776 &mut self, 1777 interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>, 1778 virtio_devices: &mut Vec<(VirtioDeviceArc, bool, String)>, 1779 serial_pty: Option<PtyPair>, 1780 console_pty: Option<PtyPair>, 1781 console_resize_pipe: Option<File>, 1782 ) -> DeviceManagerResult<Arc<Console>> { 1783 let serial_config = self.config.lock().unwrap().serial.clone(); 1784 let serial_writer: Option<Box<dyn io::Write + Send>> = match serial_config.mode { 1785 ConsoleOutputMode::File => Some(Box::new( 1786 File::create(serial_config.file.as_ref().unwrap()) 1787 .map_err(DeviceManagerError::SerialOutputFileOpen)?, 1788 )), 1789 ConsoleOutputMode::Pty => { 1790 if let Some(pty) = serial_pty { 1791 self.config.lock().unwrap().serial.file = Some(pty.path.clone()); 1792 let writer = pty.main.try_clone().unwrap(); 1793 let buffer = SerialBuffer::new(Box::new(writer)); 1794 self.serial_pty = Some(Arc::new(Mutex::new(pty))); 1795 Some(Box::new(buffer)) 1796 } else { 1797 let (main, mut sub, path) = 1798 create_pty(true).map_err(DeviceManagerError::SerialPtyOpen)?; 1799 self.set_raw_mode(&mut sub) 1800 .map_err(DeviceManagerError::SetPtyRaw)?; 1801 self.config.lock().unwrap().serial.file = Some(path.clone()); 1802 let writer = main.try_clone().unwrap(); 1803 let buffer = SerialBuffer::new(Box::new(writer)); 1804 self.serial_pty = Some(Arc::new(Mutex::new(PtyPair { main, sub, path }))); 1805 Some(Box::new(buffer)) 1806 } 1807 } 1808 ConsoleOutputMode::Tty => Some(Box::new(stdout())), 1809 ConsoleOutputMode::Off | ConsoleOutputMode::Null => None, 1810 }; 1811 let serial = if serial_config.mode != ConsoleOutputMode::Off { 1812 Some(self.add_serial_device(interrupt_manager, serial_writer)?) 1813 } else { 1814 None 1815 }; 1816 1817 let console_resizer = 1818 self.add_virtio_console_device(virtio_devices, console_pty, console_resize_pipe)?; 1819 1820 Ok(Arc::new(Console { 1821 serial, 1822 console_resizer, 1823 })) 1824 } 1825 1826 fn make_virtio_devices(&mut self) -> DeviceManagerResult<Vec<(VirtioDeviceArc, bool, String)>> { 1827 let mut devices: Vec<(VirtioDeviceArc, bool, String)> = Vec::new(); 1828 1829 // Create "standard" virtio devices (net/block/rng) 1830 devices.append(&mut self.make_virtio_block_devices()?); 1831 devices.append(&mut self.make_virtio_net_devices()?); 1832 devices.append(&mut self.make_virtio_rng_devices()?); 1833 1834 // Add virtio-fs if required 1835 devices.append(&mut self.make_virtio_fs_devices()?); 1836 1837 // Add virtio-pmem if required 1838 devices.append(&mut self.make_virtio_pmem_devices()?); 1839 1840 // Add virtio-vsock if required 1841 devices.append(&mut self.make_virtio_vsock_devices()?); 1842 1843 devices.append(&mut self.make_virtio_mem_devices()?); 1844 1845 // Add virtio-balloon if required 1846 devices.append(&mut self.make_virtio_balloon_devices()?); 1847 1848 // Add virtio-watchdog device 1849 devices.append(&mut self.make_virtio_watchdog_devices()?); 1850 1851 Ok(devices) 1852 } 1853 1854 fn make_virtio_block_device( 1855 &mut self, 1856 disk_cfg: &mut DiskConfig, 1857 ) -> DeviceManagerResult<(VirtioDeviceArc, bool, String)> { 1858 let id = if let Some(id) = &disk_cfg.id { 1859 id.clone() 1860 } else { 1861 let id = self.next_device_name(DISK_DEVICE_NAME_PREFIX)?; 1862 disk_cfg.id = Some(id.clone()); 1863 id 1864 }; 1865 1866 info!("Creating virtio-block device: {:?}", disk_cfg); 1867 1868 if disk_cfg.vhost_user { 1869 let socket = disk_cfg.vhost_socket.as_ref().unwrap().clone(); 1870 let vu_cfg = VhostUserConfig { 1871 socket, 1872 num_queues: disk_cfg.num_queues, 1873 queue_size: disk_cfg.queue_size, 1874 }; 1875 let vhost_user_block_device = Arc::new(Mutex::new( 1876 match virtio_devices::vhost_user::Blk::new( 1877 id.clone(), 1878 vu_cfg, 1879 self.restoring, 1880 self.seccomp_action.clone(), 1881 self.exit_evt 1882 .try_clone() 1883 .map_err(DeviceManagerError::EventFd)?, 1884 ) { 1885 Ok(vub_device) => vub_device, 1886 Err(e) => { 1887 return Err(DeviceManagerError::CreateVhostUserBlk(e)); 1888 } 1889 }, 1890 )); 1891 1892 // Fill the device tree with a new node. In case of restore, we 1893 // know there is nothing to do, so we can simply override the 1894 // existing entry. 1895 self.device_tree 1896 .lock() 1897 .unwrap() 1898 .insert(id.clone(), device_node!(id, vhost_user_block_device)); 1899 1900 Ok(( 1901 Arc::clone(&vhost_user_block_device) as VirtioDeviceArc, 1902 false, 1903 id, 1904 )) 1905 } else { 1906 let mut options = OpenOptions::new(); 1907 options.read(true); 1908 options.write(!disk_cfg.readonly); 1909 if disk_cfg.direct { 1910 options.custom_flags(libc::O_DIRECT); 1911 } 1912 // Open block device path 1913 let mut file: File = options 1914 .open( 1915 disk_cfg 1916 .path 1917 .as_ref() 1918 .ok_or(DeviceManagerError::NoDiskPath)? 1919 .clone(), 1920 ) 1921 .map_err(DeviceManagerError::Disk)?; 1922 let image_type = 1923 detect_image_type(&mut file).map_err(DeviceManagerError::DetectImageType)?; 1924 1925 let image = match image_type { 1926 ImageType::FixedVhd => { 1927 // Use asynchronous backend relying on io_uring if the 1928 // syscalls are supported. 1929 if block_io_uring_is_supported() && !disk_cfg.disable_io_uring { 1930 info!("Using asynchronous fixed VHD disk file (io_uring)"); 1931 Box::new( 1932 FixedVhdDiskAsync::new(file) 1933 .map_err(DeviceManagerError::CreateFixedVhdDiskAsync)?, 1934 ) as Box<dyn DiskFile> 1935 } else { 1936 info!("Using synchronous fixed VHD disk file"); 1937 Box::new( 1938 FixedVhdDiskSync::new(file) 1939 .map_err(DeviceManagerError::CreateFixedVhdDiskSync)?, 1940 ) as Box<dyn DiskFile> 1941 } 1942 } 1943 ImageType::Raw => { 1944 // Use asynchronous backend relying on io_uring if the 1945 // syscalls are supported. 1946 if block_io_uring_is_supported() && !disk_cfg.disable_io_uring { 1947 info!("Using asynchronous RAW disk file (io_uring)"); 1948 Box::new(RawFileDisk::new(file)) as Box<dyn DiskFile> 1949 } else { 1950 info!("Using synchronous RAW disk file"); 1951 Box::new(RawFileDiskSync::new(file)) as Box<dyn DiskFile> 1952 } 1953 } 1954 ImageType::Qcow2 => { 1955 info!("Using synchronous QCOW disk file"); 1956 Box::new( 1957 QcowDiskSync::new(file, disk_cfg.direct) 1958 .map_err(DeviceManagerError::CreateQcowDiskSync)?, 1959 ) as Box<dyn DiskFile> 1960 } 1961 ImageType::Vhdx => { 1962 info!("Using synchronous VHDX disk file"); 1963 Box::new( 1964 VhdxDiskSync::new(file) 1965 .map_err(DeviceManagerError::CreateFixedVhdxDiskSync)?, 1966 ) as Box<dyn DiskFile> 1967 } 1968 }; 1969 1970 let dev = Arc::new(Mutex::new( 1971 virtio_devices::Block::new( 1972 id.clone(), 1973 image, 1974 disk_cfg 1975 .path 1976 .as_ref() 1977 .ok_or(DeviceManagerError::NoDiskPath)? 1978 .clone(), 1979 disk_cfg.readonly, 1980 self.force_iommu | disk_cfg.iommu, 1981 disk_cfg.num_queues, 1982 disk_cfg.queue_size, 1983 self.seccomp_action.clone(), 1984 disk_cfg.rate_limiter_config, 1985 self.exit_evt 1986 .try_clone() 1987 .map_err(DeviceManagerError::EventFd)?, 1988 ) 1989 .map_err(DeviceManagerError::CreateVirtioBlock)?, 1990 )); 1991 1992 let virtio_device = Arc::clone(&dev) as VirtioDeviceArc; 1993 let migratable_device = dev as Arc<Mutex<dyn Migratable>>; 1994 1995 // Fill the device tree with a new node. In case of restore, we 1996 // know there is nothing to do, so we can simply override the 1997 // existing entry. 1998 self.device_tree 1999 .lock() 2000 .unwrap() 2001 .insert(id.clone(), device_node!(id, migratable_device)); 2002 2003 Ok((virtio_device, disk_cfg.iommu, id)) 2004 } 2005 } 2006 2007 fn make_virtio_block_devices( 2008 &mut self, 2009 ) -> DeviceManagerResult<Vec<(VirtioDeviceArc, bool, String)>> { 2010 let mut devices = Vec::new(); 2011 2012 let mut block_devices = self.config.lock().unwrap().disks.clone(); 2013 if let Some(disk_list_cfg) = &mut block_devices { 2014 for disk_cfg in disk_list_cfg.iter_mut() { 2015 devices.push(self.make_virtio_block_device(disk_cfg)?); 2016 } 2017 } 2018 self.config.lock().unwrap().disks = block_devices; 2019 2020 Ok(devices) 2021 } 2022 2023 fn make_virtio_net_device( 2024 &mut self, 2025 net_cfg: &mut NetConfig, 2026 ) -> DeviceManagerResult<(VirtioDeviceArc, bool, String)> { 2027 let id = if let Some(id) = &net_cfg.id { 2028 id.clone() 2029 } else { 2030 let id = self.next_device_name(NET_DEVICE_NAME_PREFIX)?; 2031 net_cfg.id = Some(id.clone()); 2032 id 2033 }; 2034 info!("Creating virtio-net device: {:?}", net_cfg); 2035 2036 if net_cfg.vhost_user { 2037 let socket = net_cfg.vhost_socket.as_ref().unwrap().clone(); 2038 let vu_cfg = VhostUserConfig { 2039 socket, 2040 num_queues: net_cfg.num_queues, 2041 queue_size: net_cfg.queue_size, 2042 }; 2043 let server = match net_cfg.vhost_mode { 2044 VhostMode::Client => false, 2045 VhostMode::Server => true, 2046 }; 2047 let vhost_user_net_device = Arc::new(Mutex::new( 2048 match virtio_devices::vhost_user::Net::new( 2049 id.clone(), 2050 net_cfg.mac, 2051 vu_cfg, 2052 server, 2053 self.seccomp_action.clone(), 2054 self.restoring, 2055 self.exit_evt 2056 .try_clone() 2057 .map_err(DeviceManagerError::EventFd)?, 2058 ) { 2059 Ok(vun_device) => vun_device, 2060 Err(e) => { 2061 return Err(DeviceManagerError::CreateVhostUserNet(e)); 2062 } 2063 }, 2064 )); 2065 2066 // Fill the device tree with a new node. In case of restore, we 2067 // know there is nothing to do, so we can simply override the 2068 // existing entry. 2069 self.device_tree 2070 .lock() 2071 .unwrap() 2072 .insert(id.clone(), device_node!(id, vhost_user_net_device)); 2073 2074 Ok(( 2075 Arc::clone(&vhost_user_net_device) as VirtioDeviceArc, 2076 net_cfg.iommu, 2077 id, 2078 )) 2079 } else { 2080 let virtio_net_device = if let Some(ref tap_if_name) = net_cfg.tap { 2081 Arc::new(Mutex::new( 2082 virtio_devices::Net::new( 2083 id.clone(), 2084 Some(tap_if_name), 2085 None, 2086 None, 2087 Some(net_cfg.mac), 2088 &mut net_cfg.host_mac, 2089 self.force_iommu | net_cfg.iommu, 2090 net_cfg.num_queues, 2091 net_cfg.queue_size, 2092 self.seccomp_action.clone(), 2093 net_cfg.rate_limiter_config, 2094 self.exit_evt 2095 .try_clone() 2096 .map_err(DeviceManagerError::EventFd)?, 2097 ) 2098 .map_err(DeviceManagerError::CreateVirtioNet)?, 2099 )) 2100 } else if let Some(fds) = &net_cfg.fds { 2101 Arc::new(Mutex::new( 2102 virtio_devices::Net::from_tap_fds( 2103 id.clone(), 2104 fds, 2105 Some(net_cfg.mac), 2106 self.force_iommu | net_cfg.iommu, 2107 net_cfg.queue_size, 2108 self.seccomp_action.clone(), 2109 net_cfg.rate_limiter_config, 2110 self.exit_evt 2111 .try_clone() 2112 .map_err(DeviceManagerError::EventFd)?, 2113 ) 2114 .map_err(DeviceManagerError::CreateVirtioNet)?, 2115 )) 2116 } else { 2117 Arc::new(Mutex::new( 2118 virtio_devices::Net::new( 2119 id.clone(), 2120 None, 2121 Some(net_cfg.ip), 2122 Some(net_cfg.mask), 2123 Some(net_cfg.mac), 2124 &mut net_cfg.host_mac, 2125 self.force_iommu | net_cfg.iommu, 2126 net_cfg.num_queues, 2127 net_cfg.queue_size, 2128 self.seccomp_action.clone(), 2129 net_cfg.rate_limiter_config, 2130 self.exit_evt 2131 .try_clone() 2132 .map_err(DeviceManagerError::EventFd)?, 2133 ) 2134 .map_err(DeviceManagerError::CreateVirtioNet)?, 2135 )) 2136 }; 2137 2138 // Fill the device tree with a new node. In case of restore, we 2139 // know there is nothing to do, so we can simply override the 2140 // existing entry. 2141 self.device_tree 2142 .lock() 2143 .unwrap() 2144 .insert(id.clone(), device_node!(id, virtio_net_device)); 2145 2146 Ok(( 2147 Arc::clone(&virtio_net_device) as VirtioDeviceArc, 2148 net_cfg.iommu, 2149 id, 2150 )) 2151 } 2152 } 2153 2154 /// Add virto-net and vhost-user-net devices 2155 fn make_virtio_net_devices( 2156 &mut self, 2157 ) -> DeviceManagerResult<Vec<(VirtioDeviceArc, bool, String)>> { 2158 let mut devices = Vec::new(); 2159 let mut net_devices = self.config.lock().unwrap().net.clone(); 2160 if let Some(net_list_cfg) = &mut net_devices { 2161 for net_cfg in net_list_cfg.iter_mut() { 2162 devices.push(self.make_virtio_net_device(net_cfg)?); 2163 } 2164 } 2165 self.config.lock().unwrap().net = net_devices; 2166 2167 Ok(devices) 2168 } 2169 2170 fn make_virtio_rng_devices( 2171 &mut self, 2172 ) -> DeviceManagerResult<Vec<(VirtioDeviceArc, bool, String)>> { 2173 let mut devices = Vec::new(); 2174 2175 // Add virtio-rng if required 2176 let rng_config = self.config.lock().unwrap().rng.clone(); 2177 if let Some(rng_path) = rng_config.src.to_str() { 2178 info!("Creating virtio-rng device: {:?}", rng_config); 2179 let id = String::from(RNG_DEVICE_NAME); 2180 2181 let virtio_rng_device = Arc::new(Mutex::new( 2182 virtio_devices::Rng::new( 2183 id.clone(), 2184 rng_path, 2185 self.force_iommu | rng_config.iommu, 2186 self.seccomp_action.clone(), 2187 self.exit_evt 2188 .try_clone() 2189 .map_err(DeviceManagerError::EventFd)?, 2190 ) 2191 .map_err(DeviceManagerError::CreateVirtioRng)?, 2192 )); 2193 devices.push(( 2194 Arc::clone(&virtio_rng_device) as VirtioDeviceArc, 2195 rng_config.iommu, 2196 id.clone(), 2197 )); 2198 2199 // Fill the device tree with a new node. In case of restore, we 2200 // know there is nothing to do, so we can simply override the 2201 // existing entry. 2202 self.device_tree 2203 .lock() 2204 .unwrap() 2205 .insert(id.clone(), device_node!(id, virtio_rng_device)); 2206 } 2207 2208 Ok(devices) 2209 } 2210 2211 fn make_virtio_fs_device( 2212 &mut self, 2213 fs_cfg: &mut FsConfig, 2214 ) -> DeviceManagerResult<(VirtioDeviceArc, bool, String)> { 2215 let id = if let Some(id) = &fs_cfg.id { 2216 id.clone() 2217 } else { 2218 let id = self.next_device_name(FS_DEVICE_NAME_PREFIX)?; 2219 fs_cfg.id = Some(id.clone()); 2220 id 2221 }; 2222 2223 info!("Creating virtio-fs device: {:?}", fs_cfg); 2224 2225 let mut node = device_node!(id); 2226 2227 // Look for the id in the device tree. If it can be found, that means 2228 // the device is being restored, otherwise it's created from scratch. 2229 let cache_range = if let Some(node) = self.device_tree.lock().unwrap().get(&id) { 2230 info!("Restoring virtio-fs {} resources", id); 2231 2232 let mut cache_range: Option<(u64, u64)> = None; 2233 for resource in node.resources.iter() { 2234 match resource { 2235 Resource::MmioAddressRange { base, size } => { 2236 if cache_range.is_some() { 2237 return Err(DeviceManagerError::ResourceAlreadyExists); 2238 } 2239 2240 cache_range = Some((*base, *size)); 2241 } 2242 _ => { 2243 error!("Unexpected resource {:?} for {}", resource, id); 2244 } 2245 } 2246 } 2247 2248 cache_range 2249 } else { 2250 None 2251 }; 2252 2253 if let Some(fs_socket) = fs_cfg.socket.to_str() { 2254 let cache = if fs_cfg.dax { 2255 let (cache_base, cache_size) = if let Some((base, size)) = cache_range { 2256 // The memory needs to be 2MiB aligned in order to support 2257 // hugepages. 2258 self.address_manager 2259 .allocator 2260 .lock() 2261 .unwrap() 2262 .allocate_mmio_addresses( 2263 Some(GuestAddress(base)), 2264 size as GuestUsize, 2265 Some(0x0020_0000), 2266 ) 2267 .ok_or(DeviceManagerError::FsRangeAllocation)?; 2268 2269 (base, size) 2270 } else { 2271 let size = fs_cfg.cache_size; 2272 // The memory needs to be 2MiB aligned in order to support 2273 // hugepages. 2274 let base = self 2275 .address_manager 2276 .allocator 2277 .lock() 2278 .unwrap() 2279 .allocate_mmio_addresses(None, size as GuestUsize, Some(0x0020_0000)) 2280 .ok_or(DeviceManagerError::FsRangeAllocation)?; 2281 2282 (base.raw_value(), size) 2283 }; 2284 2285 // Update the node with correct resource information. 2286 node.resources.push(Resource::MmioAddressRange { 2287 base: cache_base, 2288 size: cache_size, 2289 }); 2290 2291 let mmap_region = MmapRegion::build( 2292 None, 2293 cache_size as usize, 2294 libc::PROT_NONE, 2295 libc::MAP_ANONYMOUS | libc::MAP_PRIVATE, 2296 ) 2297 .map_err(DeviceManagerError::NewMmapRegion)?; 2298 let host_addr: u64 = mmap_region.as_ptr() as u64; 2299 2300 let mem_slot = self 2301 .memory_manager 2302 .lock() 2303 .unwrap() 2304 .create_userspace_mapping( 2305 cache_base, cache_size, host_addr, false, false, false, 2306 ) 2307 .map_err(DeviceManagerError::MemoryManager)?; 2308 2309 let region_list = vec![VirtioSharedMemory { 2310 offset: 0, 2311 len: cache_size, 2312 }]; 2313 2314 Some(( 2315 VirtioSharedMemoryList { 2316 host_addr, 2317 mem_slot, 2318 addr: GuestAddress(cache_base), 2319 len: cache_size as GuestUsize, 2320 region_list, 2321 }, 2322 mmap_region, 2323 )) 2324 } else { 2325 None 2326 }; 2327 2328 let virtio_fs_device = Arc::new(Mutex::new( 2329 virtio_devices::vhost_user::Fs::new( 2330 id.clone(), 2331 fs_socket, 2332 &fs_cfg.tag, 2333 fs_cfg.num_queues, 2334 fs_cfg.queue_size, 2335 cache, 2336 self.seccomp_action.clone(), 2337 self.restoring, 2338 self.exit_evt 2339 .try_clone() 2340 .map_err(DeviceManagerError::EventFd)?, 2341 ) 2342 .map_err(DeviceManagerError::CreateVirtioFs)?, 2343 )); 2344 2345 // Update the device tree with the migratable device. 2346 node.migratable = Some(Arc::clone(&virtio_fs_device) as Arc<Mutex<dyn Migratable>>); 2347 self.device_tree.lock().unwrap().insert(id.clone(), node); 2348 2349 Ok((Arc::clone(&virtio_fs_device) as VirtioDeviceArc, false, id)) 2350 } else { 2351 Err(DeviceManagerError::NoVirtioFsSock) 2352 } 2353 } 2354 2355 fn make_virtio_fs_devices( 2356 &mut self, 2357 ) -> DeviceManagerResult<Vec<(VirtioDeviceArc, bool, String)>> { 2358 let mut devices = Vec::new(); 2359 2360 let mut fs_devices = self.config.lock().unwrap().fs.clone(); 2361 if let Some(fs_list_cfg) = &mut fs_devices { 2362 for fs_cfg in fs_list_cfg.iter_mut() { 2363 devices.push(self.make_virtio_fs_device(fs_cfg)?); 2364 } 2365 } 2366 self.config.lock().unwrap().fs = fs_devices; 2367 2368 Ok(devices) 2369 } 2370 2371 fn make_virtio_pmem_device( 2372 &mut self, 2373 pmem_cfg: &mut PmemConfig, 2374 ) -> DeviceManagerResult<(VirtioDeviceArc, bool, String)> { 2375 let id = if let Some(id) = &pmem_cfg.id { 2376 id.clone() 2377 } else { 2378 let id = self.next_device_name(PMEM_DEVICE_NAME_PREFIX)?; 2379 pmem_cfg.id = Some(id.clone()); 2380 id 2381 }; 2382 2383 info!("Creating virtio-pmem device: {:?}", pmem_cfg); 2384 2385 let mut node = device_node!(id); 2386 2387 // Look for the id in the device tree. If it can be found, that means 2388 // the device is being restored, otherwise it's created from scratch. 2389 let region_range = if let Some(node) = self.device_tree.lock().unwrap().get(&id) { 2390 info!("Restoring virtio-pmem {} resources", id); 2391 2392 let mut region_range: Option<(u64, u64)> = None; 2393 for resource in node.resources.iter() { 2394 match resource { 2395 Resource::MmioAddressRange { base, size } => { 2396 if region_range.is_some() { 2397 return Err(DeviceManagerError::ResourceAlreadyExists); 2398 } 2399 2400 region_range = Some((*base, *size)); 2401 } 2402 _ => { 2403 error!("Unexpected resource {:?} for {}", resource, id); 2404 } 2405 } 2406 } 2407 2408 if region_range.is_none() { 2409 return Err(DeviceManagerError::MissingVirtioPmemResources); 2410 } 2411 2412 region_range 2413 } else { 2414 None 2415 }; 2416 2417 let (custom_flags, set_len) = if pmem_cfg.file.is_dir() { 2418 if pmem_cfg.size.is_none() { 2419 return Err(DeviceManagerError::PmemWithDirectorySizeMissing); 2420 } 2421 (O_TMPFILE, true) 2422 } else { 2423 (0, false) 2424 }; 2425 2426 let mut file = OpenOptions::new() 2427 .read(true) 2428 .write(!pmem_cfg.discard_writes) 2429 .custom_flags(custom_flags) 2430 .open(&pmem_cfg.file) 2431 .map_err(DeviceManagerError::PmemFileOpen)?; 2432 2433 let size = if let Some(size) = pmem_cfg.size { 2434 if set_len { 2435 file.set_len(size) 2436 .map_err(DeviceManagerError::PmemFileSetLen)?; 2437 } 2438 size 2439 } else { 2440 file.seek(SeekFrom::End(0)) 2441 .map_err(DeviceManagerError::PmemFileSetLen)? 2442 }; 2443 2444 if size % 0x20_0000 != 0 { 2445 return Err(DeviceManagerError::PmemSizeNotAligned); 2446 } 2447 2448 let (region_base, region_size) = if let Some((base, size)) = region_range { 2449 // The memory needs to be 2MiB aligned in order to support 2450 // hugepages. 2451 self.address_manager 2452 .allocator 2453 .lock() 2454 .unwrap() 2455 .allocate_mmio_addresses( 2456 Some(GuestAddress(base)), 2457 size as GuestUsize, 2458 Some(0x0020_0000), 2459 ) 2460 .ok_or(DeviceManagerError::PmemRangeAllocation)?; 2461 2462 (base, size) 2463 } else { 2464 // The memory needs to be 2MiB aligned in order to support 2465 // hugepages. 2466 let base = self 2467 .address_manager 2468 .allocator 2469 .lock() 2470 .unwrap() 2471 .allocate_mmio_addresses(None, size as GuestUsize, Some(0x0020_0000)) 2472 .ok_or(DeviceManagerError::PmemRangeAllocation)?; 2473 2474 (base.raw_value(), size) 2475 }; 2476 2477 let cloned_file = file.try_clone().map_err(DeviceManagerError::CloneFile)?; 2478 let mmap_region = MmapRegion::build( 2479 Some(FileOffset::new(cloned_file, 0)), 2480 region_size as usize, 2481 PROT_READ | PROT_WRITE, 2482 MAP_NORESERVE 2483 | if pmem_cfg.discard_writes { 2484 MAP_PRIVATE 2485 } else { 2486 MAP_SHARED 2487 }, 2488 ) 2489 .map_err(DeviceManagerError::NewMmapRegion)?; 2490 let host_addr: u64 = mmap_region.as_ptr() as u64; 2491 2492 let mem_slot = self 2493 .memory_manager 2494 .lock() 2495 .unwrap() 2496 .create_userspace_mapping( 2497 region_base, 2498 region_size, 2499 host_addr, 2500 pmem_cfg.mergeable, 2501 false, 2502 false, 2503 ) 2504 .map_err(DeviceManagerError::MemoryManager)?; 2505 2506 let mapping = virtio_devices::UserspaceMapping { 2507 host_addr, 2508 mem_slot, 2509 addr: GuestAddress(region_base), 2510 len: region_size, 2511 mergeable: pmem_cfg.mergeable, 2512 }; 2513 2514 let virtio_pmem_device = Arc::new(Mutex::new( 2515 virtio_devices::Pmem::new( 2516 id.clone(), 2517 file, 2518 GuestAddress(region_base), 2519 mapping, 2520 mmap_region, 2521 self.force_iommu | pmem_cfg.iommu, 2522 self.seccomp_action.clone(), 2523 self.exit_evt 2524 .try_clone() 2525 .map_err(DeviceManagerError::EventFd)?, 2526 ) 2527 .map_err(DeviceManagerError::CreateVirtioPmem)?, 2528 )); 2529 2530 // Update the device tree with correct resource information and with 2531 // the migratable device. 2532 node.resources.push(Resource::MmioAddressRange { 2533 base: region_base, 2534 size: region_size, 2535 }); 2536 node.migratable = Some(Arc::clone(&virtio_pmem_device) as Arc<Mutex<dyn Migratable>>); 2537 self.device_tree.lock().unwrap().insert(id.clone(), node); 2538 2539 Ok(( 2540 Arc::clone(&virtio_pmem_device) as VirtioDeviceArc, 2541 pmem_cfg.iommu, 2542 id, 2543 )) 2544 } 2545 2546 fn make_virtio_pmem_devices( 2547 &mut self, 2548 ) -> DeviceManagerResult<Vec<(VirtioDeviceArc, bool, String)>> { 2549 let mut devices = Vec::new(); 2550 // Add virtio-pmem if required 2551 let mut pmem_devices = self.config.lock().unwrap().pmem.clone(); 2552 if let Some(pmem_list_cfg) = &mut pmem_devices { 2553 for pmem_cfg in pmem_list_cfg.iter_mut() { 2554 devices.push(self.make_virtio_pmem_device(pmem_cfg)?); 2555 } 2556 } 2557 self.config.lock().unwrap().pmem = pmem_devices; 2558 2559 Ok(devices) 2560 } 2561 2562 fn make_virtio_vsock_device( 2563 &mut self, 2564 vsock_cfg: &mut VsockConfig, 2565 ) -> DeviceManagerResult<(VirtioDeviceArc, bool, String)> { 2566 let id = if let Some(id) = &vsock_cfg.id { 2567 id.clone() 2568 } else { 2569 let id = self.next_device_name(VSOCK_DEVICE_NAME_PREFIX)?; 2570 vsock_cfg.id = Some(id.clone()); 2571 id 2572 }; 2573 2574 info!("Creating virtio-vsock device: {:?}", vsock_cfg); 2575 2576 let socket_path = vsock_cfg 2577 .socket 2578 .to_str() 2579 .ok_or(DeviceManagerError::CreateVsockConvertPath)?; 2580 let backend = 2581 virtio_devices::vsock::VsockUnixBackend::new(vsock_cfg.cid, socket_path.to_string()) 2582 .map_err(DeviceManagerError::CreateVsockBackend)?; 2583 2584 let vsock_device = Arc::new(Mutex::new( 2585 virtio_devices::Vsock::new( 2586 id.clone(), 2587 vsock_cfg.cid, 2588 vsock_cfg.socket.clone(), 2589 backend, 2590 self.force_iommu | vsock_cfg.iommu, 2591 self.seccomp_action.clone(), 2592 self.exit_evt 2593 .try_clone() 2594 .map_err(DeviceManagerError::EventFd)?, 2595 ) 2596 .map_err(DeviceManagerError::CreateVirtioVsock)?, 2597 )); 2598 2599 // Fill the device tree with a new node. In case of restore, we 2600 // know there is nothing to do, so we can simply override the 2601 // existing entry. 2602 self.device_tree 2603 .lock() 2604 .unwrap() 2605 .insert(id.clone(), device_node!(id, vsock_device)); 2606 2607 Ok(( 2608 Arc::clone(&vsock_device) as VirtioDeviceArc, 2609 vsock_cfg.iommu, 2610 id, 2611 )) 2612 } 2613 2614 fn make_virtio_vsock_devices( 2615 &mut self, 2616 ) -> DeviceManagerResult<Vec<(VirtioDeviceArc, bool, String)>> { 2617 let mut devices = Vec::new(); 2618 2619 let mut vsock = self.config.lock().unwrap().vsock.clone(); 2620 if let Some(ref mut vsock_cfg) = &mut vsock { 2621 devices.push(self.make_virtio_vsock_device(vsock_cfg)?); 2622 } 2623 self.config.lock().unwrap().vsock = vsock; 2624 2625 Ok(devices) 2626 } 2627 2628 fn make_virtio_mem_devices( 2629 &mut self, 2630 ) -> DeviceManagerResult<Vec<(VirtioDeviceArc, bool, String)>> { 2631 let mut devices = Vec::new(); 2632 2633 let mm = self.memory_manager.clone(); 2634 let mm = mm.lock().unwrap(); 2635 for (_memory_zone_id, memory_zone) in mm.memory_zones().iter() { 2636 if let Some(virtio_mem_zone) = memory_zone.virtio_mem_zone() { 2637 let id = self.next_device_name(MEM_DEVICE_NAME_PREFIX)?; 2638 info!("Creating virtio-mem device: id = {}", id); 2639 2640 #[cfg(all(target_arch = "x86_64", not(feature = "acpi")))] 2641 let node_id: Option<u16> = None; 2642 #[cfg(any(target_arch = "aarch64", feature = "acpi"))] 2643 let node_id = numa_node_id_from_memory_zone_id(&self.numa_nodes, _memory_zone_id) 2644 .map(|i| i as u16); 2645 2646 let virtio_mem_device = Arc::new(Mutex::new( 2647 virtio_devices::Mem::new( 2648 id.clone(), 2649 virtio_mem_zone.region(), 2650 virtio_mem_zone 2651 .resize_handler() 2652 .new_resize_sender() 2653 .map_err(DeviceManagerError::CreateResizeSender)?, 2654 self.seccomp_action.clone(), 2655 node_id, 2656 virtio_mem_zone.hotplugged_size(), 2657 virtio_mem_zone.hugepages(), 2658 self.exit_evt 2659 .try_clone() 2660 .map_err(DeviceManagerError::EventFd)?, 2661 ) 2662 .map_err(DeviceManagerError::CreateVirtioMem)?, 2663 )); 2664 2665 self.virtio_mem_devices.push(Arc::clone(&virtio_mem_device)); 2666 2667 devices.push(( 2668 Arc::clone(&virtio_mem_device) as VirtioDeviceArc, 2669 false, 2670 id.clone(), 2671 )); 2672 2673 // Fill the device tree with a new node. In case of restore, we 2674 // know there is nothing to do, so we can simply override the 2675 // existing entry. 2676 self.device_tree 2677 .lock() 2678 .unwrap() 2679 .insert(id.clone(), device_node!(id, virtio_mem_device)); 2680 } 2681 } 2682 2683 Ok(devices) 2684 } 2685 2686 fn make_virtio_balloon_devices( 2687 &mut self, 2688 ) -> DeviceManagerResult<Vec<(VirtioDeviceArc, bool, String)>> { 2689 let mut devices = Vec::new(); 2690 2691 if let Some(balloon_config) = &self.config.lock().unwrap().balloon { 2692 let id = String::from(BALLOON_DEVICE_NAME); 2693 info!("Creating virtio-balloon device: id = {}", id); 2694 2695 let virtio_balloon_device = Arc::new(Mutex::new( 2696 virtio_devices::Balloon::new( 2697 id.clone(), 2698 balloon_config.size, 2699 balloon_config.deflate_on_oom, 2700 self.seccomp_action.clone(), 2701 self.exit_evt 2702 .try_clone() 2703 .map_err(DeviceManagerError::EventFd)?, 2704 ) 2705 .map_err(DeviceManagerError::CreateVirtioBalloon)?, 2706 )); 2707 2708 self.balloon = Some(virtio_balloon_device.clone()); 2709 2710 devices.push(( 2711 Arc::clone(&virtio_balloon_device) as VirtioDeviceArc, 2712 false, 2713 id.clone(), 2714 )); 2715 2716 self.device_tree 2717 .lock() 2718 .unwrap() 2719 .insert(id.clone(), device_node!(id, virtio_balloon_device)); 2720 } 2721 2722 Ok(devices) 2723 } 2724 2725 fn make_virtio_watchdog_devices( 2726 &mut self, 2727 ) -> DeviceManagerResult<Vec<(VirtioDeviceArc, bool, String)>> { 2728 let mut devices = Vec::new(); 2729 2730 if !self.config.lock().unwrap().watchdog { 2731 return Ok(devices); 2732 } 2733 2734 let id = String::from(WATCHDOG_DEVICE_NAME); 2735 info!("Creating virtio-watchdog device: id = {}", id); 2736 2737 let virtio_watchdog_device = Arc::new(Mutex::new( 2738 virtio_devices::Watchdog::new( 2739 id.clone(), 2740 self.reset_evt.try_clone().unwrap(), 2741 self.seccomp_action.clone(), 2742 self.exit_evt 2743 .try_clone() 2744 .map_err(DeviceManagerError::EventFd)?, 2745 ) 2746 .map_err(DeviceManagerError::CreateVirtioWatchdog)?, 2747 )); 2748 devices.push(( 2749 Arc::clone(&virtio_watchdog_device) as VirtioDeviceArc, 2750 false, 2751 id.clone(), 2752 )); 2753 2754 self.device_tree 2755 .lock() 2756 .unwrap() 2757 .insert(id.clone(), device_node!(id, virtio_watchdog_device)); 2758 2759 Ok(devices) 2760 } 2761 2762 fn next_device_name(&mut self, prefix: &str) -> DeviceManagerResult<String> { 2763 let start_id = self.device_id_cnt; 2764 loop { 2765 // Generate the temporary name. 2766 let name = format!("{}{}", prefix, self.device_id_cnt); 2767 // Increment the counter. 2768 self.device_id_cnt += Wrapping(1); 2769 // Check if the name is already in use. 2770 if !self.device_tree.lock().unwrap().contains_key(&name) { 2771 return Ok(name); 2772 } 2773 2774 if self.device_id_cnt == start_id { 2775 // We went through a full loop and there's nothing else we can 2776 // do. 2777 break; 2778 } 2779 } 2780 Err(DeviceManagerError::NoAvailableDeviceName) 2781 } 2782 2783 fn add_passthrough_device( 2784 &mut self, 2785 pci: &mut PciBus, 2786 device_cfg: &mut DeviceConfig, 2787 ) -> DeviceManagerResult<(u32, String)> { 2788 // If the passthrough device has not been created yet, it is created 2789 // here and stored in the DeviceManager structure for future needs. 2790 if self.passthrough_device.is_none() { 2791 self.passthrough_device = Some( 2792 self.address_manager 2793 .vm 2794 .create_passthrough_device() 2795 .map_err(|e| DeviceManagerError::CreatePassthroughDevice(e.into()))?, 2796 ); 2797 } 2798 2799 self.add_vfio_device(pci, device_cfg) 2800 } 2801 2802 fn create_vfio_container(&self) -> DeviceManagerResult<Arc<VfioContainer>> { 2803 let passthrough_device = self 2804 .passthrough_device 2805 .as_ref() 2806 .ok_or(DeviceManagerError::NoDevicePassthroughSupport)?; 2807 2808 // Safe because we know the RawFd is valid. 2809 // 2810 // This dup() is mandatory to be able to give full ownership of the 2811 // file descriptor to the DeviceFd::from_raw_fd() function later in 2812 // the code. 2813 // 2814 // This is particularly needed so that VfioContainer will still have 2815 // a valid file descriptor even if DeviceManager, and therefore the 2816 // passthrough_device are dropped. In case of Drop, the file descriptor 2817 // would be closed, but Linux would still have the duplicated file 2818 // descriptor opened from DeviceFd, preventing from unexpected behavior 2819 // where the VfioContainer would try to use a closed file descriptor. 2820 let dup_device_fd = unsafe { libc::dup(passthrough_device.as_raw_fd()) }; 2821 2822 // SAFETY the raw fd conversion here is safe because: 2823 // 1. When running on KVM or MSHV, passthrough_device wraps around DeviceFd. 2824 // 2. The conversion here extracts the raw fd and then turns the raw fd into a DeviceFd 2825 // of the same (correct) type. 2826 Ok(Arc::new( 2827 VfioContainer::new(Arc::new(unsafe { DeviceFd::from_raw_fd(dup_device_fd) })) 2828 .map_err(DeviceManagerError::VfioCreate)?, 2829 )) 2830 } 2831 2832 fn add_vfio_device( 2833 &mut self, 2834 pci: &mut PciBus, 2835 device_cfg: &mut DeviceConfig, 2836 ) -> DeviceManagerResult<(u32, String)> { 2837 // We need to shift the device id since the 3 first bits 2838 // are dedicated to the PCI function, and we know we don't 2839 // do multifunction. Also, because we only support one PCI 2840 // bus, the bus 0, we don't need to add anything to the 2841 // global device ID. 2842 let pci_device_bdf = pci 2843 .next_device_id() 2844 .map_err(DeviceManagerError::NextPciDeviceId)? 2845 << 3; 2846 2847 let mut needs_dma_mapping = false; 2848 2849 // Here we create a new VFIO container for two reasons. Either this is 2850 // the first VFIO device, meaning we need a new VFIO container, which 2851 // will be shared with other VFIO devices. Or the new VFIO device is 2852 // attached to a vIOMMU, meaning we must create a dedicated VFIO 2853 // container. In the vIOMMU use case, we can't let all devices under 2854 // the same VFIO container since we couldn't map/unmap memory for each 2855 // device. That's simply because the map/unmap operations happen at the 2856 // VFIO container level. 2857 let vfio_container = if device_cfg.iommu { 2858 let vfio_container = self.create_vfio_container()?; 2859 2860 let vfio_mapping = Arc::new(VfioDmaMapping::new( 2861 Arc::clone(&vfio_container), 2862 Arc::new(self.memory_manager.lock().unwrap().guest_memory()), 2863 )); 2864 2865 if let Some(iommu) = &self.iommu_device { 2866 iommu 2867 .lock() 2868 .unwrap() 2869 .add_external_mapping(pci_device_bdf, vfio_mapping); 2870 } else { 2871 return Err(DeviceManagerError::MissingVirtualIommu); 2872 } 2873 2874 vfio_container 2875 } else if let Some(vfio_container) = &self.vfio_container { 2876 Arc::clone(vfio_container) 2877 } else { 2878 let vfio_container = self.create_vfio_container()?; 2879 needs_dma_mapping = true; 2880 self.vfio_container = Some(Arc::clone(&vfio_container)); 2881 2882 vfio_container 2883 }; 2884 2885 let vfio_device = VfioDevice::new(&device_cfg.path, Arc::clone(&vfio_container)) 2886 .map_err(DeviceManagerError::VfioCreate)?; 2887 2888 if needs_dma_mapping { 2889 // Register DMA mapping in IOMMU. 2890 // Do not register virtio-mem regions, as they are handled directly by 2891 // virtio-mem device itself. 2892 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() { 2893 for region in zone.regions() { 2894 vfio_container 2895 .vfio_dma_map( 2896 region.start_addr().raw_value(), 2897 region.len() as u64, 2898 region.as_ptr() as u64, 2899 ) 2900 .map_err(DeviceManagerError::VfioDmaMap)?; 2901 } 2902 } 2903 2904 let vfio_mapping = Arc::new(VfioDmaMapping::new( 2905 Arc::clone(&vfio_container), 2906 Arc::new(self.memory_manager.lock().unwrap().guest_memory()), 2907 )); 2908 2909 for virtio_mem_device in self.virtio_mem_devices.iter() { 2910 virtio_mem_device 2911 .lock() 2912 .unwrap() 2913 .add_dma_mapping_handler(vfio_mapping.clone()) 2914 .map_err(DeviceManagerError::AddDmaMappingHandlerVirtioMem)?; 2915 } 2916 } 2917 2918 let legacy_interrupt_group = if let Some(legacy_interrupt_manager) = 2919 &self.legacy_interrupt_manager 2920 { 2921 Some( 2922 legacy_interrupt_manager 2923 .create_group(LegacyIrqGroupConfig { 2924 irq: self.pci_irq_slots[(pci_device_bdf >> 3) as usize] as InterruptIndex, 2925 }) 2926 .map_err(DeviceManagerError::CreateInterruptGroup)?, 2927 ) 2928 } else { 2929 None 2930 }; 2931 2932 let mut vfio_pci_device = VfioPciDevice::new( 2933 &self.address_manager.vm, 2934 vfio_device, 2935 vfio_container, 2936 &self.msi_interrupt_manager, 2937 legacy_interrupt_group, 2938 device_cfg.iommu, 2939 ) 2940 .map_err(DeviceManagerError::VfioPciCreate)?; 2941 2942 let vfio_name = if let Some(id) = &device_cfg.id { 2943 if self.device_tree.lock().unwrap().contains_key(id) { 2944 return Err(DeviceManagerError::DeviceIdAlreadyInUse); 2945 } 2946 2947 id.clone() 2948 } else { 2949 let id = self.next_device_name(VFIO_DEVICE_NAME_PREFIX)?; 2950 device_cfg.id = Some(id.clone()); 2951 id 2952 }; 2953 2954 vfio_pci_device 2955 .map_mmio_regions(&self.address_manager.vm, || { 2956 self.memory_manager.lock().unwrap().allocate_memory_slot() 2957 }) 2958 .map_err(DeviceManagerError::VfioMapRegion)?; 2959 2960 let mut node = device_node!(vfio_name); 2961 2962 for region in vfio_pci_device.mmio_regions() { 2963 node.resources.push(Resource::MmioAddressRange { 2964 base: region.start.0, 2965 size: region.length as u64, 2966 }); 2967 } 2968 2969 let vfio_pci_device = Arc::new(Mutex::new(vfio_pci_device)); 2970 2971 self.add_pci_device( 2972 pci, 2973 vfio_pci_device.clone(), 2974 vfio_pci_device.clone(), 2975 pci_device_bdf, 2976 )?; 2977 2978 node.pci_bdf = Some(pci_device_bdf); 2979 node.pci_device_handle = Some(PciDeviceHandle::Vfio(vfio_pci_device)); 2980 2981 self.device_tree 2982 .lock() 2983 .unwrap() 2984 .insert(vfio_name.clone(), node); 2985 2986 Ok((pci_device_bdf, vfio_name)) 2987 } 2988 2989 fn add_pci_device( 2990 &mut self, 2991 pci_bus: &mut PciBus, 2992 bus_device: Arc<Mutex<dyn BusDevice>>, 2993 pci_device: Arc<Mutex<dyn PciDevice>>, 2994 bdf: u32, 2995 ) -> DeviceManagerResult<Vec<(GuestAddress, GuestUsize, PciBarRegionType)>> { 2996 let bars = pci_device 2997 .lock() 2998 .unwrap() 2999 .allocate_bars(&mut self.address_manager.allocator.lock().unwrap()) 3000 .map_err(DeviceManagerError::AllocateBars)?; 3001 3002 pci_bus 3003 .add_device(bdf, pci_device) 3004 .map_err(DeviceManagerError::AddPciDevice)?; 3005 3006 self.bus_devices.push(Arc::clone(&bus_device)); 3007 3008 pci_bus 3009 .register_mapping( 3010 bus_device, 3011 #[cfg(target_arch = "x86_64")] 3012 self.address_manager.io_bus.as_ref(), 3013 self.address_manager.mmio_bus.as_ref(), 3014 bars.clone(), 3015 ) 3016 .map_err(DeviceManagerError::AddPciDevice)?; 3017 3018 Ok(bars) 3019 } 3020 3021 fn add_vfio_devices(&mut self, pci: &mut PciBus) -> DeviceManagerResult<Vec<u32>> { 3022 let mut iommu_attached_device_ids = Vec::new(); 3023 let mut devices = self.config.lock().unwrap().devices.clone(); 3024 3025 if let Some(device_list_cfg) = &mut devices { 3026 for device_cfg in device_list_cfg.iter_mut() { 3027 let (device_id, _) = self.add_passthrough_device(pci, device_cfg)?; 3028 if device_cfg.iommu && self.iommu_device.is_some() { 3029 iommu_attached_device_ids.push(device_id); 3030 } 3031 } 3032 } 3033 3034 // Update the list of devices 3035 self.config.lock().unwrap().devices = devices; 3036 3037 Ok(iommu_attached_device_ids) 3038 } 3039 3040 fn add_vfio_user_device( 3041 &mut self, 3042 pci: &mut PciBus, 3043 device_cfg: &mut UserDeviceConfig, 3044 ) -> DeviceManagerResult<(u32, String)> { 3045 let pci_device_bdf = pci 3046 .next_device_id() 3047 .map_err(DeviceManagerError::NextPciDeviceId)? 3048 << 3; 3049 3050 let legacy_interrupt_group = if let Some(legacy_interrupt_manager) = 3051 &self.legacy_interrupt_manager 3052 { 3053 Some( 3054 legacy_interrupt_manager 3055 .create_group(LegacyIrqGroupConfig { 3056 irq: self.pci_irq_slots[(pci_device_bdf >> 3) as usize] as InterruptIndex, 3057 }) 3058 .map_err(DeviceManagerError::CreateInterruptGroup)?, 3059 ) 3060 } else { 3061 None 3062 }; 3063 3064 let mut vfio_user_pci_device = VfioUserPciDevice::new( 3065 &self.address_manager.vm, 3066 &device_cfg.socket, 3067 &self.msi_interrupt_manager, 3068 legacy_interrupt_group, 3069 ) 3070 .map_err(DeviceManagerError::VfioUserCreate)?; 3071 3072 vfio_user_pci_device 3073 .map_mmio_regions(&self.address_manager.vm, || { 3074 self.memory_manager.lock().unwrap().allocate_memory_slot() 3075 }) 3076 .map_err(DeviceManagerError::VfioUserMapRegion)?; 3077 3078 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() { 3079 for region in zone.regions() { 3080 vfio_user_pci_device 3081 .dma_map(region) 3082 .map_err(DeviceManagerError::VfioUserDmaMap)?; 3083 } 3084 } 3085 3086 let vfio_user_pci_device = Arc::new(Mutex::new(vfio_user_pci_device)); 3087 3088 let vfio_user_name = if let Some(id) = &device_cfg.id { 3089 if self.device_tree.lock().unwrap().contains_key(id) { 3090 return Err(DeviceManagerError::DeviceIdAlreadyInUse); 3091 } 3092 3093 id.clone() 3094 } else { 3095 let id = self.next_device_name(VFIO_USER_DEVICE_NAME_PREFIX)?; 3096 device_cfg.id = Some(id.clone()); 3097 id 3098 }; 3099 3100 self.add_pci_device( 3101 pci, 3102 vfio_user_pci_device.clone(), 3103 vfio_user_pci_device.clone(), 3104 pci_device_bdf, 3105 )?; 3106 3107 let mut node = device_node!(vfio_user_name); 3108 3109 node.pci_bdf = Some(pci_device_bdf); 3110 node.pci_device_handle = Some(PciDeviceHandle::VfioUser(vfio_user_pci_device)); 3111 3112 self.device_tree 3113 .lock() 3114 .unwrap() 3115 .insert(vfio_user_name.clone(), node); 3116 3117 Ok((pci_device_bdf, vfio_user_name)) 3118 } 3119 3120 fn add_user_devices(&mut self, pci: &mut PciBus) -> DeviceManagerResult<Vec<u32>> { 3121 let mut user_devices = self.config.lock().unwrap().user_devices.clone(); 3122 3123 if let Some(device_list_cfg) = &mut user_devices { 3124 for device_cfg in device_list_cfg.iter_mut() { 3125 let (_device_id, _id) = self.add_vfio_user_device(pci, device_cfg)?; 3126 } 3127 } 3128 3129 // Update the list of devices 3130 self.config.lock().unwrap().user_devices = user_devices; 3131 3132 Ok(vec![]) 3133 } 3134 3135 fn add_virtio_pci_device( 3136 &mut self, 3137 virtio_device: VirtioDeviceArc, 3138 pci: &mut PciBus, 3139 iommu_mapping: &Option<Arc<IommuMapping>>, 3140 virtio_device_id: String, 3141 ) -> DeviceManagerResult<u32> { 3142 let id = format!("{}-{}", VIRTIO_PCI_DEVICE_NAME_PREFIX, virtio_device_id); 3143 3144 // Add the new virtio-pci node to the device tree. 3145 let mut node = device_node!(id); 3146 node.children = vec![virtio_device_id.clone()]; 3147 3148 // Look for the id in the device tree. If it can be found, that means 3149 // the device is being restored, otherwise it's created from scratch. 3150 let (pci_device_bdf, config_bar_addr) = 3151 if let Some(node) = self.device_tree.lock().unwrap().get(&id) { 3152 info!("Restoring virtio-pci {} resources", id); 3153 let pci_device_bdf = node 3154 .pci_bdf 3155 .ok_or(DeviceManagerError::MissingDeviceNodePciBdf)?; 3156 3157 pci.get_device_id((pci_device_bdf >> 3) as usize) 3158 .map_err(DeviceManagerError::GetPciDeviceId)?; 3159 3160 if node.resources.is_empty() { 3161 return Err(DeviceManagerError::MissingVirtioPciResources); 3162 } 3163 3164 // We know the configuration BAR address is stored on the first 3165 // resource in the list. 3166 let config_bar_addr = match node.resources[0] { 3167 Resource::MmioAddressRange { base, .. } => Some(base), 3168 _ => { 3169 error!("Unexpected resource {:?} for {}", node.resources[0], id); 3170 return Err(DeviceManagerError::MissingVirtioPciResources); 3171 } 3172 }; 3173 3174 (pci_device_bdf, config_bar_addr) 3175 } else { 3176 // We need to shift the device id since the 3 first bits are dedicated 3177 // to the PCI function, and we know we don't do multifunction. 3178 // Also, because we only support one PCI bus, the bus 0, we don't need 3179 // to add anything to the global device ID. 3180 let pci_device_bdf = pci 3181 .next_device_id() 3182 .map_err(DeviceManagerError::NextPciDeviceId)? 3183 << 3; 3184 3185 (pci_device_bdf, None) 3186 }; 3187 3188 // Update the existing virtio node by setting the parent. 3189 if let Some(node) = self.device_tree.lock().unwrap().get_mut(&virtio_device_id) { 3190 node.parent = Some(id.clone()); 3191 } else { 3192 return Err(DeviceManagerError::MissingNode); 3193 } 3194 3195 // Allows support for one MSI-X vector per queue. It also adds 1 3196 // as we need to take into account the dedicated vector to notify 3197 // about a virtio config change. 3198 let msix_num = (virtio_device.lock().unwrap().queue_max_sizes().len() + 1) as u16; 3199 3200 // Create the callback from the implementation of the DmaRemapping 3201 // trait. The point with the callback is to simplify the code as we 3202 // know about the device ID from this point. 3203 let iommu_mapping_cb: Option<Arc<VirtioIommuRemapping>> = 3204 if let Some(mapping) = iommu_mapping { 3205 let mapping_clone = mapping.clone(); 3206 Some(Arc::new(Box::new(move |addr: u64| { 3207 mapping_clone.translate(pci_device_bdf, addr).map_err(|e| { 3208 std::io::Error::new( 3209 std::io::ErrorKind::Other, 3210 format!( 3211 "failed to translate addr 0x{:x} for device 00:{:02x}.0 {}", 3212 addr, pci_device_bdf, e 3213 ), 3214 ) 3215 }) 3216 }) as VirtioIommuRemapping)) 3217 } else { 3218 None 3219 }; 3220 3221 let memory = self.memory_manager.lock().unwrap().guest_memory(); 3222 let mut virtio_pci_device = VirtioPciDevice::new( 3223 id.clone(), 3224 memory, 3225 virtio_device, 3226 msix_num, 3227 iommu_mapping_cb, 3228 &self.msi_interrupt_manager, 3229 pci_device_bdf, 3230 self.activate_evt 3231 .try_clone() 3232 .map_err(DeviceManagerError::EventFd)?, 3233 ) 3234 .map_err(DeviceManagerError::VirtioDevice)?; 3235 3236 // This is important as this will set the BAR address if it exists, 3237 // which is mandatory on the restore path. 3238 if let Some(addr) = config_bar_addr { 3239 virtio_pci_device.set_config_bar_addr(addr); 3240 } 3241 3242 let virtio_pci_device = Arc::new(Mutex::new(virtio_pci_device)); 3243 let bars = self.add_pci_device( 3244 pci, 3245 virtio_pci_device.clone(), 3246 virtio_pci_device.clone(), 3247 pci_device_bdf, 3248 )?; 3249 3250 let bar_addr = virtio_pci_device.lock().unwrap().config_bar_addr(); 3251 for (event, addr) in virtio_pci_device.lock().unwrap().ioeventfds(bar_addr) { 3252 let io_addr = IoEventAddress::Mmio(addr); 3253 self.address_manager 3254 .vm 3255 .register_ioevent(event, &io_addr, None) 3256 .map_err(|e| DeviceManagerError::RegisterIoevent(e.into()))?; 3257 } 3258 3259 // Update the device tree with correct resource information. 3260 for pci_bar in bars.iter() { 3261 node.resources.push(Resource::MmioAddressRange { 3262 base: pci_bar.0.raw_value(), 3263 size: pci_bar.1 as u64, 3264 }); 3265 } 3266 node.migratable = Some(Arc::clone(&virtio_pci_device) as Arc<Mutex<dyn Migratable>>); 3267 node.pci_bdf = Some(pci_device_bdf); 3268 node.pci_device_handle = Some(PciDeviceHandle::Virtio(virtio_pci_device)); 3269 self.device_tree.lock().unwrap().insert(id, node); 3270 3271 Ok(pci_device_bdf) 3272 } 3273 3274 #[cfg(target_arch = "x86_64")] 3275 pub fn io_bus(&self) -> &Arc<Bus> { 3276 &self.address_manager.io_bus 3277 } 3278 3279 pub fn mmio_bus(&self) -> &Arc<Bus> { 3280 &self.address_manager.mmio_bus 3281 } 3282 3283 pub fn allocator(&self) -> &Arc<Mutex<SystemAllocator>> { 3284 &self.address_manager.allocator 3285 } 3286 3287 pub fn interrupt_controller(&self) -> Option<Arc<Mutex<dyn InterruptController>>> { 3288 self.interrupt_controller 3289 .as_ref() 3290 .map(|ic| ic.clone() as Arc<Mutex<dyn InterruptController>>) 3291 } 3292 3293 pub fn console(&self) -> &Arc<Console> { 3294 &self.console 3295 } 3296 3297 pub fn cmdline_additions(&self) -> &[String] { 3298 self.cmdline_additions.as_slice() 3299 } 3300 3301 pub fn update_memory(&self, new_region: &Arc<GuestRegionMmap>) -> DeviceManagerResult<()> { 3302 for (virtio_device, _, _) in self.virtio_devices.iter() { 3303 virtio_device 3304 .lock() 3305 .unwrap() 3306 .add_memory_region(new_region) 3307 .map_err(DeviceManagerError::UpdateMemoryForVirtioDevice)?; 3308 } 3309 3310 // Take care of updating the memory for VFIO PCI devices. 3311 if let Some(vfio_container) = &self.vfio_container { 3312 vfio_container 3313 .vfio_dma_map( 3314 new_region.start_addr().raw_value(), 3315 new_region.len() as u64, 3316 new_region.as_ptr() as u64, 3317 ) 3318 .map_err(DeviceManagerError::UpdateMemoryForVfioPciDevice)?; 3319 } 3320 3321 #[allow(clippy::single_match)] 3322 // Take care of updating the memory for vfio-user devices. 3323 { 3324 let device_tree = self.device_tree.lock().unwrap(); 3325 for pci_device_node in device_tree.pci_devices() { 3326 match pci_device_node 3327 .pci_device_handle 3328 .as_ref() 3329 .ok_or(DeviceManagerError::MissingPciDevice)? 3330 { 3331 PciDeviceHandle::VfioUser(vfio_user_pci_device) => { 3332 vfio_user_pci_device 3333 .lock() 3334 .unwrap() 3335 .dma_map(new_region) 3336 .map_err(DeviceManagerError::UpdateMemoryForVfioUserPciDevice)?; 3337 } 3338 _ => {} 3339 } 3340 } 3341 } 3342 3343 Ok(()) 3344 } 3345 3346 pub fn activate_virtio_devices(&self) -> DeviceManagerResult<()> { 3347 // Find virtio pci devices and activate any pending ones 3348 let device_tree = self.device_tree.lock().unwrap(); 3349 for pci_device_node in device_tree.pci_devices() { 3350 #[allow(irrefutable_let_patterns)] 3351 if let PciDeviceHandle::Virtio(virtio_pci_device) = &pci_device_node 3352 .pci_device_handle 3353 .as_ref() 3354 .ok_or(DeviceManagerError::MissingPciDevice)? 3355 { 3356 virtio_pci_device.lock().unwrap().maybe_activate(); 3357 } 3358 } 3359 Ok(()) 3360 } 3361 3362 pub fn notify_hotplug( 3363 &self, 3364 _notification_type: AcpiNotificationFlags, 3365 ) -> DeviceManagerResult<()> { 3366 #[cfg(feature = "acpi")] 3367 return self 3368 .ged_notification_device 3369 .as_ref() 3370 .unwrap() 3371 .lock() 3372 .unwrap() 3373 .notify(_notification_type) 3374 .map_err(DeviceManagerError::HotPlugNotification); 3375 #[cfg(not(feature = "acpi"))] 3376 return Ok(()); 3377 } 3378 3379 pub fn add_device( 3380 &mut self, 3381 device_cfg: &mut DeviceConfig, 3382 ) -> DeviceManagerResult<PciDeviceInfo> { 3383 let pci = if let Some(pci_bus) = &self.pci_bus { 3384 Arc::clone(pci_bus) 3385 } else { 3386 return Err(DeviceManagerError::NoPciBus); 3387 }; 3388 3389 let (device_id, device_name) = 3390 self.add_passthrough_device(&mut pci.lock().unwrap(), device_cfg)?; 3391 3392 // Update the PCIU bitmap 3393 self.pci_devices_up |= 1 << (device_id >> 3); 3394 3395 Ok(PciDeviceInfo { 3396 id: device_name, 3397 bdf: device_id, 3398 }) 3399 } 3400 3401 pub fn add_user_device( 3402 &mut self, 3403 device_cfg: &mut UserDeviceConfig, 3404 ) -> DeviceManagerResult<PciDeviceInfo> { 3405 let pci = if let Some(pci_bus) = &self.pci_bus { 3406 Arc::clone(pci_bus) 3407 } else { 3408 return Err(DeviceManagerError::NoPciBus); 3409 }; 3410 3411 let (device_id, device_name) = 3412 self.add_vfio_user_device(&mut pci.lock().unwrap(), device_cfg)?; 3413 3414 // Update the PCIU bitmap 3415 self.pci_devices_up |= 1 << (device_id >> 3); 3416 3417 Ok(PciDeviceInfo { 3418 id: device_name, 3419 bdf: device_id, 3420 }) 3421 } 3422 3423 pub fn remove_device(&mut self, id: String) -> DeviceManagerResult<()> { 3424 // The node can be directly a PCI node in case the 'id' refers to a 3425 // VFIO device or a virtio-pci one. 3426 // In case the 'id' refers to a virtio device, we must find the PCI 3427 // node by looking at the parent. 3428 let device_tree = self.device_tree.lock().unwrap(); 3429 let node = device_tree 3430 .get(&id) 3431 .ok_or(DeviceManagerError::UnknownDeviceId(id))?; 3432 3433 let pci_device_node = if node.pci_bdf.is_some() && node.pci_device_handle.is_some() { 3434 node 3435 } else { 3436 let parent = node 3437 .parent 3438 .as_ref() 3439 .ok_or(DeviceManagerError::MissingNode)?; 3440 device_tree 3441 .get(parent) 3442 .ok_or(DeviceManagerError::MissingNode)? 3443 }; 3444 3445 let pci_device_bdf = pci_device_node 3446 .pci_bdf 3447 .ok_or(DeviceManagerError::MissingPciDevice)?; 3448 let pci_device_handle = pci_device_node 3449 .pci_device_handle 3450 .as_ref() 3451 .ok_or(DeviceManagerError::MissingPciDevice)?; 3452 #[allow(irrefutable_let_patterns)] 3453 if let PciDeviceHandle::Virtio(virtio_pci_device) = pci_device_handle { 3454 let device_type = VirtioDeviceType::from( 3455 virtio_pci_device 3456 .lock() 3457 .unwrap() 3458 .virtio_device() 3459 .lock() 3460 .unwrap() 3461 .device_type(), 3462 ); 3463 match device_type { 3464 VirtioDeviceType::Net 3465 | VirtioDeviceType::Block 3466 | VirtioDeviceType::Pmem 3467 | VirtioDeviceType::Fs 3468 | VirtioDeviceType::Vsock => {} 3469 _ => return Err(DeviceManagerError::RemovalNotAllowed(device_type)), 3470 } 3471 } 3472 3473 // Update the PCID bitmap 3474 self.pci_devices_down |= 1 << (pci_device_bdf >> 3); 3475 3476 Ok(()) 3477 } 3478 3479 pub fn eject_device(&mut self, device_id: u8) -> DeviceManagerResult<()> { 3480 // Retrieve the PCI bus. 3481 let pci = if let Some(pci_bus) = &self.pci_bus { 3482 Arc::clone(pci_bus) 3483 } else { 3484 return Err(DeviceManagerError::NoPciBus); 3485 }; 3486 3487 // Convert the device ID into the corresponding b/d/f. 3488 let pci_device_bdf = (device_id as u32) << 3; 3489 3490 // Give the PCI device ID back to the PCI bus. 3491 pci.lock() 3492 .unwrap() 3493 .put_device_id(device_id as usize) 3494 .map_err(DeviceManagerError::PutPciDeviceId)?; 3495 3496 // Remove the device from the device tree along with its children. 3497 let mut device_tree = self.device_tree.lock().unwrap(); 3498 let pci_device_node = device_tree 3499 .remove_node_by_pci_bdf(pci_device_bdf) 3500 .ok_or(DeviceManagerError::MissingPciDevice)?; 3501 for child in pci_device_node.children.iter() { 3502 device_tree.remove(child); 3503 } 3504 3505 let pci_device_handle = pci_device_node 3506 .pci_device_handle 3507 .ok_or(DeviceManagerError::MissingPciDevice)?; 3508 let (pci_device, bus_device, virtio_device) = match pci_device_handle { 3509 PciDeviceHandle::Vfio(vfio_pci_device) => ( 3510 Arc::clone(&vfio_pci_device) as Arc<Mutex<dyn PciDevice>>, 3511 Arc::clone(&vfio_pci_device) as Arc<Mutex<dyn BusDevice>>, 3512 None as Option<VirtioDeviceArc>, 3513 ), 3514 PciDeviceHandle::Virtio(virtio_pci_device) => { 3515 let bar_addr = virtio_pci_device.lock().unwrap().config_bar_addr(); 3516 for (event, addr) in virtio_pci_device.lock().unwrap().ioeventfds(bar_addr) { 3517 let io_addr = IoEventAddress::Mmio(addr); 3518 self.address_manager 3519 .vm 3520 .unregister_ioevent(event, &io_addr) 3521 .map_err(|e| DeviceManagerError::UnRegisterIoevent(e.into()))?; 3522 } 3523 3524 ( 3525 Arc::clone(&virtio_pci_device) as Arc<Mutex<dyn PciDevice>>, 3526 Arc::clone(&virtio_pci_device) as Arc<Mutex<dyn BusDevice>>, 3527 Some(virtio_pci_device.lock().unwrap().virtio_device()), 3528 ) 3529 } 3530 PciDeviceHandle::VfioUser(vfio_user_pci_device) => { 3531 let mut dev = vfio_user_pci_device.lock().unwrap(); 3532 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() { 3533 for region in zone.regions() { 3534 dev.dma_unmap(region) 3535 .map_err(DeviceManagerError::VfioUserDmaUnmap)?; 3536 } 3537 } 3538 3539 ( 3540 Arc::clone(&vfio_user_pci_device) as Arc<Mutex<dyn PciDevice>>, 3541 Arc::clone(&vfio_user_pci_device) as Arc<Mutex<dyn BusDevice>>, 3542 None as Option<VirtioDeviceArc>, 3543 ) 3544 } 3545 }; 3546 3547 // Free the allocated BARs 3548 pci_device 3549 .lock() 3550 .unwrap() 3551 .free_bars(&mut self.address_manager.allocator.lock().unwrap()) 3552 .map_err(DeviceManagerError::FreePciBars)?; 3553 3554 // Remove the device from the PCI bus 3555 pci.lock() 3556 .unwrap() 3557 .remove_by_device(&pci_device) 3558 .map_err(DeviceManagerError::RemoveDeviceFromPciBus)?; 3559 3560 #[cfg(target_arch = "x86_64")] 3561 // Remove the device from the IO bus 3562 self.io_bus() 3563 .remove_by_device(&bus_device) 3564 .map_err(DeviceManagerError::RemoveDeviceFromIoBus)?; 3565 3566 // Remove the device from the MMIO bus 3567 self.mmio_bus() 3568 .remove_by_device(&bus_device) 3569 .map_err(DeviceManagerError::RemoveDeviceFromMmioBus)?; 3570 3571 // Remove the device from the list of BusDevice held by the 3572 // DeviceManager. 3573 self.bus_devices 3574 .retain(|dev| !Arc::ptr_eq(dev, &bus_device)); 3575 3576 // Shutdown and remove the underlying virtio-device if present 3577 if let Some(virtio_device) = virtio_device { 3578 for mapping in virtio_device.lock().unwrap().userspace_mappings() { 3579 self.memory_manager 3580 .lock() 3581 .unwrap() 3582 .remove_userspace_mapping( 3583 mapping.addr.raw_value(), 3584 mapping.len, 3585 mapping.host_addr, 3586 mapping.mergeable, 3587 mapping.mem_slot, 3588 ) 3589 .map_err(DeviceManagerError::MemoryManager)?; 3590 } 3591 3592 virtio_device.lock().unwrap().shutdown(); 3593 3594 self.virtio_devices 3595 .retain(|(d, _, _)| !Arc::ptr_eq(d, &virtio_device)); 3596 } 3597 3598 // At this point, the device has been removed from all the list and 3599 // buses where it was stored. At the end of this function, after 3600 // any_device, bus_device and pci_device are released, the actual 3601 // device will be dropped. 3602 Ok(()) 3603 } 3604 3605 fn hotplug_virtio_pci_device( 3606 &mut self, 3607 device: VirtioDeviceArc, 3608 iommu_attached: bool, 3609 id: String, 3610 ) -> DeviceManagerResult<PciDeviceInfo> { 3611 if iommu_attached { 3612 warn!("Placing device behind vIOMMU is not available for hotplugged devices"); 3613 } 3614 3615 let pci = if let Some(pci_bus) = &self.pci_bus { 3616 Arc::clone(pci_bus) 3617 } else { 3618 return Err(DeviceManagerError::NoPciBus); 3619 }; 3620 3621 // Add the virtio device to the device manager list. This is important 3622 // as the list is used to notify virtio devices about memory updates 3623 // for instance. 3624 self.virtio_devices 3625 .push((device.clone(), iommu_attached, id.clone())); 3626 3627 let device_id = 3628 self.add_virtio_pci_device(device, &mut pci.lock().unwrap(), &None, id.clone())?; 3629 3630 // Update the PCIU bitmap 3631 self.pci_devices_up |= 1 << (device_id >> 3); 3632 3633 Ok(PciDeviceInfo { id, bdf: device_id }) 3634 } 3635 3636 pub fn add_disk(&mut self, disk_cfg: &mut DiskConfig) -> DeviceManagerResult<PciDeviceInfo> { 3637 let (device, iommu_attached, id) = self.make_virtio_block_device(disk_cfg)?; 3638 self.hotplug_virtio_pci_device(device, iommu_attached, id) 3639 } 3640 3641 pub fn add_fs(&mut self, fs_cfg: &mut FsConfig) -> DeviceManagerResult<PciDeviceInfo> { 3642 let (device, iommu_attached, id) = self.make_virtio_fs_device(fs_cfg)?; 3643 self.hotplug_virtio_pci_device(device, iommu_attached, id) 3644 } 3645 3646 pub fn add_pmem(&mut self, pmem_cfg: &mut PmemConfig) -> DeviceManagerResult<PciDeviceInfo> { 3647 let (device, iommu_attached, id) = self.make_virtio_pmem_device(pmem_cfg)?; 3648 self.hotplug_virtio_pci_device(device, iommu_attached, id) 3649 } 3650 3651 pub fn add_net(&mut self, net_cfg: &mut NetConfig) -> DeviceManagerResult<PciDeviceInfo> { 3652 let (device, iommu_attached, id) = self.make_virtio_net_device(net_cfg)?; 3653 self.hotplug_virtio_pci_device(device, iommu_attached, id) 3654 } 3655 3656 pub fn add_vsock(&mut self, vsock_cfg: &mut VsockConfig) -> DeviceManagerResult<PciDeviceInfo> { 3657 let (device, iommu_attached, id) = self.make_virtio_vsock_device(vsock_cfg)?; 3658 self.hotplug_virtio_pci_device(device, iommu_attached, id) 3659 } 3660 3661 pub fn counters(&self) -> HashMap<String, HashMap<&'static str, Wrapping<u64>>> { 3662 let mut counters = HashMap::new(); 3663 3664 for (virtio_device, _, id) in &self.virtio_devices { 3665 let virtio_device = virtio_device.lock().unwrap(); 3666 if let Some(device_counters) = virtio_device.counters() { 3667 counters.insert(id.clone(), device_counters.clone()); 3668 } 3669 } 3670 3671 counters 3672 } 3673 3674 pub fn resize_balloon(&mut self, size: u64) -> DeviceManagerResult<()> { 3675 if let Some(balloon) = &self.balloon { 3676 return balloon 3677 .lock() 3678 .unwrap() 3679 .resize(size) 3680 .map_err(DeviceManagerError::VirtioBalloonResize); 3681 } 3682 3683 warn!("No balloon setup: Can't resize the balloon"); 3684 Err(DeviceManagerError::MissingVirtioBalloon) 3685 } 3686 3687 pub fn balloon_size(&self) -> u64 { 3688 if let Some(balloon) = &self.balloon { 3689 return balloon.lock().unwrap().get_actual(); 3690 } 3691 3692 0 3693 } 3694 3695 pub fn device_tree(&self) -> Arc<Mutex<DeviceTree>> { 3696 self.device_tree.clone() 3697 } 3698 3699 pub fn restore_devices( 3700 &mut self, 3701 snapshot: Snapshot, 3702 ) -> std::result::Result<(), MigratableError> { 3703 // Finally, restore all devices associated with the DeviceManager. 3704 // It's important to restore devices in the right order, that's why 3705 // the device tree is the right way to ensure we restore a child before 3706 // its parent node. 3707 for node in self 3708 .device_tree 3709 .lock() 3710 .unwrap() 3711 .breadth_first_traversal() 3712 .rev() 3713 { 3714 // Restore the node 3715 if let Some(migratable) = &node.migratable { 3716 info!("Restoring {} from DeviceManager", node.id); 3717 if let Some(snapshot) = snapshot.snapshots.get(&node.id) { 3718 migratable.lock().unwrap().pause()?; 3719 migratable.lock().unwrap().restore(*snapshot.clone())?; 3720 } else { 3721 return Err(MigratableError::Restore(anyhow!( 3722 "Missing device {}", 3723 node.id 3724 ))); 3725 } 3726 } 3727 } 3728 3729 // The devices have been fully restored, we can now update the 3730 // restoring state of the DeviceManager. 3731 self.restoring = false; 3732 3733 Ok(()) 3734 } 3735 3736 #[cfg(feature = "acpi")] 3737 #[cfg(target_arch = "x86_64")] 3738 pub fn notify_power_button(&self) -> DeviceManagerResult<()> { 3739 self.ged_notification_device 3740 .as_ref() 3741 .unwrap() 3742 .lock() 3743 .unwrap() 3744 .notify(AcpiNotificationFlags::POWER_BUTTON_CHANGED) 3745 .map_err(DeviceManagerError::PowerButtonNotification) 3746 } 3747 3748 #[cfg(target_arch = "aarch64")] 3749 pub fn notify_power_button(&self) -> DeviceManagerResult<()> { 3750 // There are three use cases: 3751 // 1. The Cloud Hypervisor is built without feature acpi. 3752 // 2. The Cloud Hypervisor is built with feature acpi, but users will 3753 // use direct kernel boot with device tree. 3754 // 3. The Cloud Hypervisor is built with feature acpi, and users will 3755 // use ACPI+UEFI boot. 3756 #[cfg(not(feature = "acpi"))] 3757 // The `return` here will trigger a GPIO pin 3 event, which will trigger 3758 // a power button event for use case 1. 3759 return self 3760 .gpio_device 3761 .as_ref() 3762 .unwrap() 3763 .lock() 3764 .unwrap() 3765 .trigger_key(3) 3766 .map_err(DeviceManagerError::AArch64PowerButtonNotification); 3767 #[cfg(feature = "acpi")] 3768 { 3769 // Trigger a GPIO pin 3 event to satisify use case 2. 3770 self.gpio_device 3771 .as_ref() 3772 .unwrap() 3773 .lock() 3774 .unwrap() 3775 .trigger_key(3) 3776 .map_err(DeviceManagerError::AArch64PowerButtonNotification)?; 3777 // Trigger a GED power button event to satisify use case 3. 3778 return self 3779 .ged_notification_device 3780 .as_ref() 3781 .unwrap() 3782 .lock() 3783 .unwrap() 3784 .notify(AcpiNotificationFlags::POWER_BUTTON_CHANGED) 3785 .map_err(DeviceManagerError::PowerButtonNotification); 3786 } 3787 } 3788 3789 pub fn iommu_attached_devices(&self) -> &Option<(u32, Vec<u32>)> { 3790 &self.iommu_attached_devices 3791 } 3792 } 3793 3794 #[cfg(any(target_arch = "aarch64", feature = "acpi"))] 3795 fn numa_node_id_from_memory_zone_id(numa_nodes: &NumaNodes, memory_zone_id: &str) -> Option<u32> { 3796 for (numa_node_id, numa_node) in numa_nodes.iter() { 3797 if numa_node.memory_zones.contains(&memory_zone_id.to_owned()) { 3798 return Some(*numa_node_id); 3799 } 3800 } 3801 3802 None 3803 } 3804 3805 #[cfg(feature = "acpi")] 3806 struct PciDevSlot { 3807 device_id: u8, 3808 } 3809 3810 #[cfg(feature = "acpi")] 3811 impl Aml for PciDevSlot { 3812 fn to_aml_bytes(&self) -> Vec<u8> { 3813 let sun = self.device_id; 3814 let adr: u32 = (self.device_id as u32) << 16; 3815 aml::Device::new( 3816 format!("S{:03}", self.device_id).as_str().into(), 3817 vec![ 3818 &aml::Name::new("_SUN".into(), &sun), 3819 &aml::Name::new("_ADR".into(), &adr), 3820 &aml::Method::new( 3821 "_EJ0".into(), 3822 1, 3823 true, 3824 vec![&aml::MethodCall::new( 3825 "\\_SB_.PHPR.PCEJ".into(), 3826 vec![&aml::Path::new("_SUN")], 3827 )], 3828 ), 3829 ], 3830 ) 3831 .to_aml_bytes() 3832 } 3833 } 3834 3835 #[cfg(feature = "acpi")] 3836 struct PciDevSlotNotify { 3837 device_id: u8, 3838 } 3839 3840 #[cfg(feature = "acpi")] 3841 impl Aml for PciDevSlotNotify { 3842 fn to_aml_bytes(&self) -> Vec<u8> { 3843 let device_id_mask: u32 = 1 << self.device_id; 3844 let object = aml::Path::new(&format!("S{:03}", self.device_id)); 3845 let mut bytes = aml::And::new(&aml::Local(0), &aml::Arg(0), &device_id_mask).to_aml_bytes(); 3846 bytes.extend_from_slice( 3847 &aml::If::new( 3848 &aml::Equal::new(&aml::Local(0), &device_id_mask), 3849 vec![&aml::Notify::new(&object, &aml::Arg(1))], 3850 ) 3851 .to_aml_bytes(), 3852 ); 3853 bytes 3854 } 3855 } 3856 3857 #[cfg(feature = "acpi")] 3858 struct PciDevSlotMethods {} 3859 3860 #[cfg(feature = "acpi")] 3861 impl Aml for PciDevSlotMethods { 3862 fn to_aml_bytes(&self) -> Vec<u8> { 3863 let mut device_notifies = Vec::new(); 3864 for device_id in 0..32 { 3865 device_notifies.push(PciDevSlotNotify { device_id }); 3866 } 3867 3868 let mut device_notifies_refs: Vec<&dyn aml::Aml> = Vec::new(); 3869 for device_notify in device_notifies.iter() { 3870 device_notifies_refs.push(device_notify); 3871 } 3872 3873 let mut bytes = 3874 aml::Method::new("DVNT".into(), 2, true, device_notifies_refs).to_aml_bytes(); 3875 3876 bytes.extend_from_slice( 3877 &aml::Method::new( 3878 "PCNT".into(), 3879 0, 3880 true, 3881 vec![ 3882 &aml::MethodCall::new( 3883 "DVNT".into(), 3884 vec![&aml::Path::new("\\_SB_.PHPR.PCIU"), &aml::ONE], 3885 ), 3886 &aml::MethodCall::new( 3887 "DVNT".into(), 3888 vec![&aml::Path::new("\\_SB_.PHPR.PCID"), &3usize], 3889 ), 3890 ], 3891 ) 3892 .to_aml_bytes(), 3893 ); 3894 bytes 3895 } 3896 } 3897 3898 #[cfg(feature = "acpi")] 3899 struct PciDsmMethod {} 3900 3901 #[cfg(feature = "acpi")] 3902 impl Aml for PciDsmMethod { 3903 fn to_aml_bytes(&self) -> Vec<u8> { 3904 // Refer to ACPI spec v6.3 Ch 9.1.1 and PCI Firmware spec v3.3 Ch 4.6.1 3905 // _DSM (Device Specific Method), the following is the implementation in ASL. 3906 /* 3907 Method (_DSM, 4, NotSerialized) // _DSM: Device-Specific Method 3908 { 3909 If ((Arg0 == ToUUID ("e5c937d0-3553-4d7a-9117-ea4d19c3434d") /* Device Labeling Interface */)) 3910 { 3911 If ((Arg2 == Zero)) 3912 { 3913 Return (Buffer (One) { 0x21 }) 3914 } 3915 If ((Arg2 == 0x05)) 3916 { 3917 Return (Zero) 3918 } 3919 } 3920 3921 Return (Buffer (One) { 0x00 }) 3922 } 3923 */ 3924 /* 3925 * As per ACPI v6.3 Ch 19.6.142, the UUID is required to be in mixed endian: 3926 * Among the fields of a UUID: 3927 * {d1 (8 digits)} - {d2 (4 digits)} - {d3 (4 digits)} - {d4 (16 digits)} 3928 * d1 ~ d3 need to be little endian, d4 be big endian. 3929 * See https://en.wikipedia.org/wiki/Universally_unique_identifier#Encoding . 3930 */ 3931 let uuid = Uuid::parse_str("E5C937D0-3553-4D7A-9117-EA4D19C3434D").unwrap(); 3932 let (uuid_d1, uuid_d2, uuid_d3, uuid_d4) = uuid.as_fields(); 3933 let mut uuid_buf = vec![]; 3934 uuid_buf.extend(&uuid_d1.to_le_bytes()); 3935 uuid_buf.extend(&uuid_d2.to_le_bytes()); 3936 uuid_buf.extend(&uuid_d3.to_le_bytes()); 3937 uuid_buf.extend(uuid_d4); 3938 aml::Method::new( 3939 "_DSM".into(), 3940 4, 3941 false, 3942 vec![ 3943 &aml::If::new( 3944 &aml::Equal::new(&aml::Arg(0), &aml::Buffer::new(uuid_buf)), 3945 vec![ 3946 &aml::If::new( 3947 &aml::Equal::new(&aml::Arg(2), &aml::ZERO), 3948 vec![&aml::Return::new(&aml::Buffer::new(vec![0x21]))], 3949 ), 3950 &aml::If::new( 3951 &aml::Equal::new(&aml::Arg(2), &0x05u8), 3952 vec![&aml::Return::new(&aml::ZERO)], 3953 ), 3954 ], 3955 ), 3956 &aml::Return::new(&aml::Buffer::new(vec![0])), 3957 ], 3958 ) 3959 .to_aml_bytes() 3960 } 3961 } 3962 3963 #[cfg(feature = "acpi")] 3964 impl Aml for DeviceManager { 3965 fn to_aml_bytes(&self) -> Vec<u8> { 3966 #[cfg(target_arch = "aarch64")] 3967 use arch::aarch64::DeviceInfoForFdt; 3968 3969 let mut bytes = Vec::new(); 3970 // PCI hotplug controller 3971 bytes.extend_from_slice( 3972 &aml::Device::new( 3973 "_SB_.PHPR".into(), 3974 vec![ 3975 &aml::Name::new("_HID".into(), &aml::EisaName::new("PNP0A06")), 3976 &aml::Name::new("_STA".into(), &0x0bu8), 3977 &aml::Name::new("_UID".into(), &"PCI Hotplug Controller"), 3978 &aml::Mutex::new("BLCK".into(), 0), 3979 &aml::Name::new( 3980 "_CRS".into(), 3981 &aml::ResourceTemplate::new(vec![&aml::AddressSpace::new_memory( 3982 aml::AddressSpaceCachable::NotCacheable, 3983 true, 3984 self.acpi_address.0 as u64, 3985 self.acpi_address.0 + DEVICE_MANAGER_ACPI_SIZE as u64 - 1, 3986 )]), 3987 ), 3988 // OpRegion and Fields map MMIO range into individual field values 3989 &aml::OpRegion::new( 3990 "PCST".into(), 3991 aml::OpRegionSpace::SystemMemory, 3992 self.acpi_address.0 as usize, 3993 DEVICE_MANAGER_ACPI_SIZE, 3994 ), 3995 &aml::Field::new( 3996 "PCST".into(), 3997 aml::FieldAccessType::DWord, 3998 aml::FieldUpdateRule::WriteAsZeroes, 3999 vec![ 4000 aml::FieldEntry::Named(*b"PCIU", 32), 4001 aml::FieldEntry::Named(*b"PCID", 32), 4002 aml::FieldEntry::Named(*b"B0EJ", 32), 4003 ], 4004 ), 4005 &aml::Method::new( 4006 "PCEJ".into(), 4007 1, 4008 true, 4009 vec![ 4010 // Take lock defined above 4011 &aml::Acquire::new("BLCK".into(), 0xffff), 4012 // Write PCI bus number (in first argument) to I/O port via field 4013 &aml::ShiftLeft::new(&aml::Path::new("B0EJ"), &aml::ONE, &aml::Arg(0)), 4014 // Release lock 4015 &aml::Release::new("BLCK".into()), 4016 // Return 0 4017 &aml::Return::new(&aml::ZERO), 4018 ], 4019 ), 4020 ], 4021 ) 4022 .to_aml_bytes(), 4023 ); 4024 4025 let start_of_device_area = self.memory_manager.lock().unwrap().start_of_device_area().0; 4026 let end_of_device_area = self.memory_manager.lock().unwrap().end_of_device_area().0; 4027 4028 let mut pci_dsdt_inner_data: Vec<&dyn aml::Aml> = Vec::new(); 4029 let hid = aml::Name::new("_HID".into(), &aml::EisaName::new("PNP0A08")); 4030 pci_dsdt_inner_data.push(&hid); 4031 let cid = aml::Name::new("_CID".into(), &aml::EisaName::new("PNP0A03")); 4032 pci_dsdt_inner_data.push(&cid); 4033 let adr = aml::Name::new("_ADR".into(), &aml::ZERO); 4034 pci_dsdt_inner_data.push(&adr); 4035 let seg = aml::Name::new("_SEG".into(), &aml::ZERO); 4036 pci_dsdt_inner_data.push(&seg); 4037 let uid = aml::Name::new("_UID".into(), &aml::ZERO); 4038 pci_dsdt_inner_data.push(&uid); 4039 let supp = aml::Name::new("SUPP".into(), &aml::ZERO); 4040 pci_dsdt_inner_data.push(&supp); 4041 4042 // Since Cloud Hypervisor supports only one PCI bus, it can be tied 4043 // to the NUMA node 0. It's up to the user to organize the NUMA nodes 4044 // so that the PCI bus relates to the expected vCPUs and guest RAM. 4045 let proximity_domain = 0u32; 4046 let pxm_return = aml::Return::new(&proximity_domain); 4047 let pxm = aml::Method::new("_PXM".into(), 0, false, vec![&pxm_return]); 4048 pci_dsdt_inner_data.push(&pxm); 4049 4050 let pci_dsm = PciDsmMethod {}; 4051 pci_dsdt_inner_data.push(&pci_dsm); 4052 4053 let crs = aml::Name::new( 4054 "_CRS".into(), 4055 &aml::ResourceTemplate::new(vec![ 4056 &aml::AddressSpace::new_bus_number(0x0u16, 0xffu16), 4057 #[cfg(target_arch = "x86_64")] 4058 &aml::Io::new(0xcf8, 0xcf8, 1, 0x8), 4059 #[cfg(target_arch = "aarch64")] 4060 &aml::Memory32Fixed::new( 4061 true, 4062 layout::PCI_MMCONFIG_START.0 as u32, 4063 layout::PCI_MMCONFIG_SIZE as u32, 4064 ), 4065 &aml::AddressSpace::new_memory( 4066 aml::AddressSpaceCachable::NotCacheable, 4067 true, 4068 layout::MEM_32BIT_DEVICES_START.0 as u32, 4069 (layout::MEM_32BIT_DEVICES_START.0 + layout::MEM_32BIT_DEVICES_SIZE - 1) as u32, 4070 ), 4071 &aml::AddressSpace::new_memory( 4072 aml::AddressSpaceCachable::NotCacheable, 4073 true, 4074 start_of_device_area, 4075 end_of_device_area, 4076 ), 4077 #[cfg(target_arch = "x86_64")] 4078 &aml::AddressSpace::new_io(0u16, 0x0cf7u16), 4079 #[cfg(target_arch = "x86_64")] 4080 &aml::AddressSpace::new_io(0x0d00u16, 0xffffu16), 4081 ]), 4082 ); 4083 pci_dsdt_inner_data.push(&crs); 4084 4085 let mut pci_devices = Vec::new(); 4086 for device_id in 0..32 { 4087 let pci_device = PciDevSlot { device_id }; 4088 pci_devices.push(pci_device); 4089 } 4090 for pci_device in pci_devices.iter() { 4091 pci_dsdt_inner_data.push(pci_device); 4092 } 4093 4094 let pci_device_methods = PciDevSlotMethods {}; 4095 pci_dsdt_inner_data.push(&pci_device_methods); 4096 4097 // Build PCI routing table, listing IRQs assigned to PCI devices. 4098 let prt_package_list: Vec<(u32, u32)> = self 4099 .pci_irq_slots 4100 .iter() 4101 .enumerate() 4102 .map(|(i, irq)| (((((i as u32) & 0x1fu32) << 16) | 0xffffu32), *irq as u32)) 4103 .collect(); 4104 let prt_package_list: Vec<aml::Package> = prt_package_list 4105 .iter() 4106 .map(|(bdf, irq)| aml::Package::new(vec![bdf, &0u8, &0u8, irq])) 4107 .collect(); 4108 let prt_package_list: Vec<&dyn Aml> = prt_package_list 4109 .iter() 4110 .map(|item| item as &dyn Aml) 4111 .collect(); 4112 let prt = aml::Name::new("_PRT".into(), &aml::Package::new(prt_package_list)); 4113 pci_dsdt_inner_data.push(&prt); 4114 4115 let pci_dsdt_data = 4116 aml::Device::new("_SB_.PCI0".into(), pci_dsdt_inner_data).to_aml_bytes(); 4117 4118 let mbrd_dsdt_data = aml::Device::new( 4119 "_SB_.MBRD".into(), 4120 vec![ 4121 &aml::Name::new("_HID".into(), &aml::EisaName::new("PNP0C02")), 4122 &aml::Name::new("_UID".into(), &aml::ZERO), 4123 &aml::Name::new( 4124 "_CRS".into(), 4125 &aml::ResourceTemplate::new(vec![&aml::Memory32Fixed::new( 4126 true, 4127 layout::PCI_MMCONFIG_START.0 as u32, 4128 layout::PCI_MMCONFIG_SIZE as u32, 4129 )]), 4130 ), 4131 ], 4132 ) 4133 .to_aml_bytes(); 4134 4135 // Serial device 4136 #[cfg(target_arch = "x86_64")] 4137 let serial_irq = 4; 4138 #[cfg(target_arch = "aarch64")] 4139 let serial_irq = 4140 if self.config.lock().unwrap().serial.clone().mode != ConsoleOutputMode::Off { 4141 self.get_device_info() 4142 .clone() 4143 .get(&(DeviceType::Serial, DeviceType::Serial.to_string())) 4144 .unwrap() 4145 .irq() 4146 } else { 4147 // If serial is turned off, add a fake device with invalid irq. 4148 31 4149 }; 4150 let com1_dsdt_data = aml::Device::new( 4151 "_SB_.COM1".into(), 4152 vec![ 4153 &aml::Name::new( 4154 "_HID".into(), 4155 #[cfg(target_arch = "x86_64")] 4156 &aml::EisaName::new("PNP0501"), 4157 #[cfg(target_arch = "aarch64")] 4158 &"ARMH0011", 4159 ), 4160 &aml::Name::new("_UID".into(), &aml::ZERO), 4161 &aml::Name::new( 4162 "_CRS".into(), 4163 &aml::ResourceTemplate::new(vec![ 4164 &aml::Interrupt::new(true, true, false, false, serial_irq), 4165 #[cfg(target_arch = "x86_64")] 4166 &aml::Io::new(0x3f8, 0x3f8, 0, 0x8), 4167 #[cfg(target_arch = "aarch64")] 4168 &aml::Memory32Fixed::new( 4169 true, 4170 arch::layout::LEGACY_SERIAL_MAPPED_IO_START as u32, 4171 MMIO_LEN as u32, 4172 ), 4173 ]), 4174 ), 4175 ], 4176 ) 4177 .to_aml_bytes(); 4178 4179 let s5_sleep_data = 4180 aml::Name::new("_S5_".into(), &aml::Package::new(vec![&5u8])).to_aml_bytes(); 4181 4182 let power_button_dsdt_data = aml::Device::new( 4183 "_SB_.PWRB".into(), 4184 vec![ 4185 &aml::Name::new("_HID".into(), &aml::EisaName::new("PNP0C0C")), 4186 &aml::Name::new("_UID".into(), &aml::ZERO), 4187 ], 4188 ) 4189 .to_aml_bytes(); 4190 4191 let ged_data = self 4192 .ged_notification_device 4193 .as_ref() 4194 .unwrap() 4195 .lock() 4196 .unwrap() 4197 .to_aml_bytes(); 4198 4199 bytes.extend_from_slice(pci_dsdt_data.as_slice()); 4200 bytes.extend_from_slice(mbrd_dsdt_data.as_slice()); 4201 if self.config.lock().unwrap().serial.mode != ConsoleOutputMode::Off { 4202 bytes.extend_from_slice(com1_dsdt_data.as_slice()); 4203 } 4204 bytes.extend_from_slice(s5_sleep_data.as_slice()); 4205 bytes.extend_from_slice(power_button_dsdt_data.as_slice()); 4206 bytes.extend_from_slice(ged_data.as_slice()); 4207 bytes 4208 } 4209 } 4210 4211 impl Pausable for DeviceManager { 4212 fn pause(&mut self) -> result::Result<(), MigratableError> { 4213 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4214 if let Some(migratable) = &device_node.migratable { 4215 migratable.lock().unwrap().pause()?; 4216 } 4217 } 4218 // On AArch64, the pause of device manager needs to trigger 4219 // a "pause" of GIC, which will flush the GIC pending tables 4220 // and ITS tables to guest RAM. 4221 #[cfg(target_arch = "aarch64")] 4222 { 4223 let gic_device = Arc::clone( 4224 self.get_interrupt_controller() 4225 .unwrap() 4226 .lock() 4227 .unwrap() 4228 .get_gic_device() 4229 .unwrap(), 4230 ); 4231 if let Some(gicv3_its) = gic_device 4232 .lock() 4233 .unwrap() 4234 .as_any_concrete_mut() 4235 .downcast_mut::<KvmGicV3Its>() 4236 { 4237 gicv3_its.pause()?; 4238 } else { 4239 return Err(MigratableError::Pause(anyhow!( 4240 "GicDevice downcast to KvmGicV3Its failed when pausing device manager!" 4241 ))); 4242 }; 4243 }; 4244 4245 Ok(()) 4246 } 4247 4248 fn resume(&mut self) -> result::Result<(), MigratableError> { 4249 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4250 if let Some(migratable) = &device_node.migratable { 4251 migratable.lock().unwrap().resume()?; 4252 } 4253 } 4254 4255 Ok(()) 4256 } 4257 } 4258 4259 impl Snapshottable for DeviceManager { 4260 fn id(&self) -> String { 4261 DEVICE_MANAGER_SNAPSHOT_ID.to_string() 4262 } 4263 4264 fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> { 4265 let mut snapshot = Snapshot::new(DEVICE_MANAGER_SNAPSHOT_ID); 4266 4267 // We aggregate all devices snapshots. 4268 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4269 if let Some(migratable) = &device_node.migratable { 4270 let device_snapshot = migratable.lock().unwrap().snapshot()?; 4271 snapshot.add_snapshot(device_snapshot); 4272 } 4273 } 4274 4275 // Then we store the DeviceManager state. 4276 snapshot.add_data_section(SnapshotDataSection::new_from_state( 4277 DEVICE_MANAGER_SNAPSHOT_ID, 4278 &self.state(), 4279 )?); 4280 4281 Ok(snapshot) 4282 } 4283 4284 fn restore(&mut self, snapshot: Snapshot) -> std::result::Result<(), MigratableError> { 4285 // Let's first restore the DeviceManager. 4286 4287 self.set_state(&snapshot.to_state(DEVICE_MANAGER_SNAPSHOT_ID)?); 4288 4289 // Now that DeviceManager is updated with the right states, it's time 4290 // to create the devices based on the configuration. 4291 self.create_devices(None, None, None) 4292 .map_err(|e| MigratableError::Restore(anyhow!("Could not create devices {:?}", e)))?; 4293 4294 Ok(()) 4295 } 4296 } 4297 4298 impl Transportable for DeviceManager {} 4299 4300 impl Migratable for DeviceManager { 4301 fn start_dirty_log(&mut self) -> std::result::Result<(), MigratableError> { 4302 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4303 if let Some(migratable) = &device_node.migratable { 4304 migratable.lock().unwrap().start_dirty_log()?; 4305 } 4306 } 4307 Ok(()) 4308 } 4309 4310 fn stop_dirty_log(&mut self) -> std::result::Result<(), MigratableError> { 4311 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4312 if let Some(migratable) = &device_node.migratable { 4313 migratable.lock().unwrap().stop_dirty_log()?; 4314 } 4315 } 4316 Ok(()) 4317 } 4318 4319 fn dirty_log(&mut self) -> std::result::Result<MemoryRangeTable, MigratableError> { 4320 let mut tables = Vec::new(); 4321 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4322 if let Some(migratable) = &device_node.migratable { 4323 tables.push(migratable.lock().unwrap().dirty_log()?); 4324 } 4325 } 4326 Ok(MemoryRangeTable::new_from_tables(tables)) 4327 } 4328 4329 fn complete_migration(&mut self) -> std::result::Result<(), MigratableError> { 4330 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4331 if let Some(migratable) = &device_node.migratable { 4332 migratable.lock().unwrap().complete_migration()?; 4333 } 4334 } 4335 Ok(()) 4336 } 4337 } 4338 4339 const PCIU_FIELD_OFFSET: u64 = 0; 4340 const PCID_FIELD_OFFSET: u64 = 4; 4341 const B0EJ_FIELD_OFFSET: u64 = 8; 4342 4343 const PCIU_FIELD_SIZE: usize = 4; 4344 const PCID_FIELD_SIZE: usize = 4; 4345 const B0EJ_FIELD_SIZE: usize = 4; 4346 4347 impl BusDevice for DeviceManager { 4348 fn read(&mut self, base: u64, offset: u64, data: &mut [u8]) { 4349 match offset { 4350 PCIU_FIELD_OFFSET => { 4351 assert!(data.len() == PCIU_FIELD_SIZE); 4352 data.copy_from_slice(&self.pci_devices_up.to_le_bytes()); 4353 // Clear the PCIU bitmap 4354 self.pci_devices_up = 0; 4355 } 4356 PCID_FIELD_OFFSET => { 4357 assert!(data.len() == PCID_FIELD_SIZE); 4358 data.copy_from_slice(&self.pci_devices_down.to_le_bytes()); 4359 // Clear the PCID bitmap 4360 self.pci_devices_down = 0; 4361 } 4362 B0EJ_FIELD_OFFSET => { 4363 assert!(data.len() == B0EJ_FIELD_SIZE); 4364 // Always return an empty bitmap since the eject is always 4365 // taken care of right away during a write access. 4366 data.copy_from_slice(&[0, 0, 0, 0]); 4367 } 4368 _ => error!( 4369 "Accessing unknown location at base 0x{:x}, offset 0x{:x}", 4370 base, offset 4371 ), 4372 } 4373 4374 debug!( 4375 "PCI_HP_REG_R: base 0x{:x}, offset 0x{:x}, data {:?}", 4376 base, offset, data 4377 ) 4378 } 4379 4380 fn write(&mut self, base: u64, offset: u64, data: &[u8]) -> Option<Arc<Barrier>> { 4381 match offset { 4382 B0EJ_FIELD_OFFSET => { 4383 assert!(data.len() == B0EJ_FIELD_SIZE); 4384 let mut data_array: [u8; 4] = [0, 0, 0, 0]; 4385 data_array.copy_from_slice(data); 4386 let device_bitmap = u32::from_le_bytes(data_array); 4387 4388 for device_id in 0..32 { 4389 let mask = 1u32 << device_id; 4390 if (device_bitmap & mask) == mask { 4391 if let Err(e) = self.eject_device(device_id as u8) { 4392 error!("Failed ejecting device {}: {:?}", device_id, e); 4393 } 4394 } 4395 } 4396 } 4397 _ => error!( 4398 "Accessing unknown location at base 0x{:x}, offset 0x{:x}", 4399 base, offset 4400 ), 4401 } 4402 4403 debug!( 4404 "PCI_HP_REG_W: base 0x{:x}, offset 0x{:x}, data {:?}", 4405 base, offset, data 4406 ); 4407 4408 None 4409 } 4410 } 4411 4412 impl Drop for DeviceManager { 4413 fn drop(&mut self) { 4414 for (device, _, _) in self.virtio_devices.drain(..) { 4415 device.lock().unwrap().shutdown(); 4416 } 4417 } 4418 } 4419