1 // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 // 3 // Portions Copyright 2017 The Chromium OS Authors. All rights reserved. 4 // Use of this source code is governed by a BSD-style license that can be 5 // found in the LICENSE-BSD-3-Clause file. 6 // 7 // Copyright © 2019 Intel Corporation 8 // 9 // SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause 10 // 11 12 use crate::config::{ 13 ConsoleOutputMode, DeviceConfig, DiskConfig, FsConfig, NetConfig, PmemConfig, VhostMode, 14 VmConfig, VsockConfig, 15 }; 16 use crate::device_tree::{DeviceNode, DeviceTree}; 17 #[cfg(feature = "kvm")] 18 use crate::interrupt::kvm::KvmMsiInterruptManager as MsiInterruptManager; 19 #[cfg(feature = "mshv")] 20 use crate::interrupt::mshv::MshvMsiInterruptManager as MsiInterruptManager; 21 use crate::interrupt::LegacyUserspaceInterruptManager; 22 #[cfg(feature = "acpi")] 23 use crate::memory_manager::MEMORY_MANAGER_ACPI_SIZE; 24 use crate::memory_manager::{Error as MemoryManagerError, MemoryManager}; 25 #[cfg(feature = "acpi")] 26 use crate::vm::NumaNodes; 27 use crate::GuestRegionMmap; 28 use crate::PciDeviceInfo; 29 use crate::{device_node, DEVICE_MANAGER_SNAPSHOT_ID}; 30 #[cfg(feature = "acpi")] 31 use acpi_tables::{aml, aml::Aml}; 32 use anyhow::anyhow; 33 #[cfg(feature = "acpi")] 34 use arch::layout; 35 #[cfg(target_arch = "x86_64")] 36 use arch::layout::{APIC_START, IOAPIC_SIZE, IOAPIC_START}; 37 #[cfg(target_arch = "aarch64")] 38 use arch::{DeviceType, MmioDeviceInfo}; 39 use block_util::{ 40 async_io::DiskFile, block_io_uring_is_supported, detect_image_type, 41 fixed_vhd_async::FixedVhdDiskAsync, fixed_vhd_sync::FixedVhdDiskSync, qcow_sync::QcowDiskSync, 42 raw_async::RawFileDisk, raw_sync::RawFileDiskSync, ImageType, 43 }; 44 #[cfg(target_arch = "aarch64")] 45 use devices::gic; 46 #[cfg(target_arch = "x86_64")] 47 use devices::ioapic; 48 #[cfg(target_arch = "aarch64")] 49 use devices::legacy::Pl011; 50 #[cfg(target_arch = "x86_64")] 51 use devices::legacy::Serial; 52 use devices::{ 53 interrupt_controller, interrupt_controller::InterruptController, AcpiNotificationFlags, 54 }; 55 #[cfg(feature = "kvm")] 56 use hypervisor::kvm_ioctls::*; 57 #[cfg(feature = "mshv")] 58 use hypervisor::IoEventAddress; 59 use libc::{ 60 isatty, tcgetattr, tcsetattr, termios, ECHO, ICANON, ISIG, MAP_NORESERVE, MAP_PRIVATE, 61 MAP_SHARED, O_TMPFILE, PROT_READ, PROT_WRITE, TCSANOW, TIOCGWINSZ, 62 }; 63 #[cfg(feature = "kvm")] 64 use pci::VfioPciDevice; 65 use pci::{ 66 DeviceRelocation, PciBarRegionType, PciBus, PciConfigIo, PciConfigMmio, PciDevice, PciRoot, 67 }; 68 use seccomp::SeccompAction; 69 use std::collections::HashMap; 70 use std::convert::TryInto; 71 use std::fs::{read_link, File, OpenOptions}; 72 use std::io::{self, sink, stdout, Seek, SeekFrom}; 73 use std::mem::zeroed; 74 use std::num::Wrapping; 75 use std::os::unix::fs::OpenOptionsExt; 76 use std::os::unix::io::{AsRawFd, FromRawFd, RawFd}; 77 use std::path::PathBuf; 78 use std::result; 79 use std::sync::{Arc, Barrier, Mutex}; 80 #[cfg(feature = "acpi")] 81 use uuid::Uuid; 82 #[cfg(feature = "kvm")] 83 use vfio_ioctls::{VfioContainer, VfioDevice}; 84 use virtio_devices::transport::VirtioPciDevice; 85 use virtio_devices::transport::VirtioTransport; 86 use virtio_devices::vhost_user::VhostUserConfig; 87 use virtio_devices::{DmaRemapping, IommuMapping}; 88 use virtio_devices::{VirtioSharedMemory, VirtioSharedMemoryList}; 89 use vm_allocator::SystemAllocator; 90 #[cfg(feature = "kvm")] 91 use vm_device::dma_mapping::vfio::VfioDmaMapping; 92 use vm_device::interrupt::{ 93 InterruptIndex, InterruptManager, LegacyIrqGroupConfig, MsiIrqGroupConfig, 94 }; 95 use vm_device::{Bus, BusDevice, Resource}; 96 use vm_memory::guest_memory::FileOffset; 97 #[cfg(feature = "kvm")] 98 use vm_memory::GuestMemoryRegion; 99 use vm_memory::{Address, GuestAddress, GuestUsize, MmapRegion}; 100 #[cfg(all(target_arch = "x86_64", feature = "cmos"))] 101 use vm_memory::{GuestAddressSpace, GuestMemory}; 102 use vm_migration::{ 103 protocol::MemoryRangeTable, Migratable, MigratableError, Pausable, Snapshot, 104 SnapshotDataSection, Snapshottable, Transportable, 105 }; 106 use vm_virtio::{VirtioDeviceType, VirtioIommuRemapping}; 107 use vmm_sys_util::eventfd::EventFd; 108 109 #[cfg(target_arch = "aarch64")] 110 const MMIO_LEN: u64 = 0x1000; 111 112 #[cfg(feature = "kvm")] 113 const VFIO_DEVICE_NAME_PREFIX: &str = "_vfio"; 114 115 #[cfg(target_arch = "x86_64")] 116 const IOAPIC_DEVICE_NAME: &str = "_ioapic"; 117 118 const SERIAL_DEVICE_NAME_PREFIX: &str = "_serial"; 119 #[cfg(target_arch = "aarch64")] 120 const GPIO_DEVICE_NAME_PREFIX: &str = "_gpio"; 121 122 const CONSOLE_DEVICE_NAME: &str = "_console"; 123 const DISK_DEVICE_NAME_PREFIX: &str = "_disk"; 124 const FS_DEVICE_NAME_PREFIX: &str = "_fs"; 125 const MEM_DEVICE_NAME_PREFIX: &str = "_mem"; 126 const BALLOON_DEVICE_NAME: &str = "_balloon"; 127 const NET_DEVICE_NAME_PREFIX: &str = "_net"; 128 const PMEM_DEVICE_NAME_PREFIX: &str = "_pmem"; 129 const RNG_DEVICE_NAME: &str = "_rng"; 130 const VSOCK_DEVICE_NAME_PREFIX: &str = "_vsock"; 131 const WATCHDOG_DEVICE_NAME: &str = "_watchdog"; 132 133 const IOMMU_DEVICE_NAME: &str = "_iommu"; 134 135 const VIRTIO_PCI_DEVICE_NAME_PREFIX: &str = "_virtio-pci"; 136 137 /// Errors associated with device manager 138 #[derive(Debug)] 139 pub enum DeviceManagerError { 140 /// Cannot create EventFd. 141 EventFd(io::Error), 142 143 /// Cannot open disk path 144 Disk(io::Error), 145 146 /// Cannot create vhost-user-net device 147 CreateVhostUserNet(virtio_devices::vhost_user::Error), 148 149 /// Cannot create virtio-blk device 150 CreateVirtioBlock(io::Error), 151 152 /// Cannot create virtio-net device 153 CreateVirtioNet(virtio_devices::net::Error), 154 155 /// Cannot create virtio-console device 156 CreateVirtioConsole(io::Error), 157 158 /// Cannot create virtio-rng device 159 CreateVirtioRng(io::Error), 160 161 /// Cannot create virtio-fs device 162 CreateVirtioFs(virtio_devices::vhost_user::Error), 163 164 /// Virtio-fs device was created without a socket. 165 NoVirtioFsSock, 166 167 /// Cannot create vhost-user-blk device 168 CreateVhostUserBlk(virtio_devices::vhost_user::Error), 169 170 /// Cannot create virtio-pmem device 171 CreateVirtioPmem(io::Error), 172 173 /// Cannot create virtio-vsock device 174 CreateVirtioVsock(io::Error), 175 176 /// Failed converting Path to &str for the virtio-vsock device. 177 CreateVsockConvertPath, 178 179 /// Cannot create virtio-vsock backend 180 CreateVsockBackend(virtio_devices::vsock::VsockUnixError), 181 182 /// Cannot create virtio-iommu device 183 CreateVirtioIommu(io::Error), 184 185 /// Cannot create virtio-balloon device 186 CreateVirtioBalloon(io::Error), 187 188 /// Cannot create virtio-watchdog device 189 CreateVirtioWatchdog(io::Error), 190 191 /// Failed parsing disk image format 192 DetectImageType(io::Error), 193 194 /// Cannot open qcow disk path 195 QcowDeviceCreate(qcow::Error), 196 197 /// Cannot open tap interface 198 OpenTap(net_util::TapError), 199 200 /// Cannot allocate IRQ. 201 AllocateIrq, 202 203 /// Cannot configure the IRQ. 204 Irq(vmm_sys_util::errno::Error), 205 206 /// Cannot allocate PCI BARs 207 AllocateBars(pci::PciDeviceError), 208 209 /// Could not free the BARs associated with a PCI device. 210 FreePciBars(pci::PciDeviceError), 211 212 /// Cannot register ioevent. 213 RegisterIoevent(anyhow::Error), 214 215 /// Cannot unregister ioevent. 216 UnRegisterIoevent(anyhow::Error), 217 218 /// Cannot create virtio device 219 VirtioDevice(vmm_sys_util::errno::Error), 220 221 /// Cannot add PCI device 222 AddPciDevice(pci::PciRootError), 223 224 /// Cannot open persistent memory file 225 PmemFileOpen(io::Error), 226 227 /// Cannot set persistent memory file size 228 PmemFileSetLen(io::Error), 229 230 /// Cannot find a memory range for persistent memory 231 PmemRangeAllocation, 232 233 /// Cannot find a memory range for virtio-fs 234 FsRangeAllocation, 235 236 /// Error creating serial output file 237 SerialOutputFileOpen(io::Error), 238 239 /// Error creating console output file 240 ConsoleOutputFileOpen(io::Error), 241 242 /// Error creating serial pty 243 SerialPtyOpen(io::Error), 244 245 /// Error creating console pty 246 ConsolePtyOpen(io::Error), 247 248 /// Error setting pty raw mode 249 SetPtyRaw(vmm_sys_util::errno::Error), 250 251 /// Error getting pty peer 252 GetPtyPeer(vmm_sys_util::errno::Error), 253 254 /// Cannot create a VFIO device 255 VfioCreate(vfio_ioctls::VfioError), 256 257 /// Cannot create a VFIO PCI device 258 VfioPciCreate(pci::VfioPciError), 259 260 /// Failed to map VFIO MMIO region. 261 VfioMapRegion(pci::VfioPciError), 262 263 /// Failed to DMA map VFIO device. 264 VfioDmaMap(pci::VfioPciError), 265 266 /// Failed to DMA unmap VFIO device. 267 VfioDmaUnmap(pci::VfioPciError), 268 269 /// Failed to create the passthrough device. 270 CreatePassthroughDevice(anyhow::Error), 271 272 /// Failed to memory map. 273 Mmap(io::Error), 274 275 /// Cannot add legacy device to Bus. 276 BusError(vm_device::BusError), 277 278 /// Failed to allocate IO port 279 AllocateIoPort, 280 281 /// Failed to allocate MMIO address 282 AllocateMmioAddress, 283 284 // Failed to make hotplug notification 285 HotPlugNotification(io::Error), 286 287 // Error from a memory manager operation 288 MemoryManager(MemoryManagerError), 289 290 /// Failed to create new interrupt source group. 291 CreateInterruptGroup(io::Error), 292 293 /// Failed to update interrupt source group. 294 UpdateInterruptGroup(io::Error), 295 296 /// Failed creating interrupt controller. 297 CreateInterruptController(interrupt_controller::Error), 298 299 /// Failed creating a new MmapRegion instance. 300 NewMmapRegion(vm_memory::mmap::MmapRegionError), 301 302 /// Failed cloning a File. 303 CloneFile(io::Error), 304 305 /// Failed to create socket file 306 CreateSocketFile(io::Error), 307 308 /// Failed to spawn the network backend 309 SpawnNetBackend(io::Error), 310 311 /// Failed to spawn the block backend 312 SpawnBlockBackend(io::Error), 313 314 /// Missing PCI bus. 315 NoPciBus, 316 317 /// Could not find an available device name. 318 NoAvailableDeviceName, 319 320 /// Missing PCI device. 321 MissingPciDevice, 322 323 /// Failed removing a PCI device from the PCI bus. 324 RemoveDeviceFromPciBus(pci::PciRootError), 325 326 /// Failed removing a bus device from the IO bus. 327 RemoveDeviceFromIoBus(vm_device::BusError), 328 329 /// Failed removing a bus device from the MMIO bus. 330 RemoveDeviceFromMmioBus(vm_device::BusError), 331 332 /// Failed to find the device corresponding to a specific PCI b/d/f. 333 UnknownPciBdf(u32), 334 335 /// Not allowed to remove this type of device from the VM. 336 RemovalNotAllowed(vm_virtio::VirtioDeviceType), 337 338 /// Failed to find device corresponding to the given identifier. 339 UnknownDeviceId(String), 340 341 /// Failed to find an available PCI device ID. 342 NextPciDeviceId(pci::PciRootError), 343 344 /// Could not reserve the PCI device ID. 345 GetPciDeviceId(pci::PciRootError), 346 347 /// Could not give the PCI device ID back. 348 PutPciDeviceId(pci::PciRootError), 349 350 /// Incorrect device ID as it is already used by another device. 351 DeviceIdAlreadyInUse, 352 353 /// No disk path was specified when one was expected 354 NoDiskPath, 355 356 /// Failed updating guest memory for virtio device. 357 UpdateMemoryForVirtioDevice(virtio_devices::Error), 358 359 /// Cannot create virtio-mem device 360 CreateVirtioMem(io::Error), 361 362 /// Cannot generate a ResizeSender from the Resize object. 363 CreateResizeSender(virtio_devices::mem::Error), 364 365 /// Cannot find a memory range for virtio-mem memory 366 VirtioMemRangeAllocation, 367 368 /// Failed updating guest memory for VFIO PCI device. 369 UpdateMemoryForVfioPciDevice(pci::VfioPciError), 370 371 /// Trying to use a directory for pmem but no size specified 372 PmemWithDirectorySizeMissing, 373 374 /// Trying to use a size that is not multiple of 2MiB 375 PmemSizeNotAligned, 376 377 /// Could not find the node in the device tree. 378 MissingNode, 379 380 /// Resource was already found. 381 ResourceAlreadyExists, 382 383 /// Expected resources for virtio-pci could not be found. 384 MissingVirtioPciResources, 385 386 /// Expected resources for virtio-pmem could not be found. 387 MissingVirtioPmemResources, 388 389 /// Missing PCI b/d/f from the DeviceNode. 390 MissingDeviceNodePciBdf, 391 392 /// No support for device passthrough 393 NoDevicePassthroughSupport, 394 395 /// Failed to resize virtio-balloon 396 VirtioBalloonResize(virtio_devices::balloon::Error), 397 398 /// Missing virtio-balloon, can't proceed as expected. 399 MissingVirtioBalloon, 400 401 /// Failed to do power button notification 402 PowerButtonNotification(io::Error), 403 404 /// Failed to do AArch64 GPIO power button notification 405 #[cfg(target_arch = "aarch64")] 406 AArch64PowerButtonNotification(devices::legacy::GpioDeviceError), 407 408 /// Failed to set O_DIRECT flag to file descriptor 409 SetDirectIo, 410 411 /// Failed to create FixedVhdDiskAsync 412 CreateFixedVhdDiskAsync(io::Error), 413 414 /// Failed to create FixedVhdDiskSync 415 CreateFixedVhdDiskSync(io::Error), 416 417 /// Failed adding DMA mapping handler to virtio-mem device. 418 AddDmaMappingHandlerVirtioMem(virtio_devices::mem::Error), 419 420 /// Failed removing DMA mapping handler from virtio-mem device. 421 RemoveDmaMappingHandlerVirtioMem(virtio_devices::mem::Error), 422 } 423 pub type DeviceManagerResult<T> = result::Result<T, DeviceManagerError>; 424 425 type VirtioDeviceArc = Arc<Mutex<dyn virtio_devices::VirtioDevice>>; 426 427 #[cfg(feature = "acpi")] 428 const DEVICE_MANAGER_ACPI_SIZE: usize = 0x10; 429 430 pub fn get_win_size() -> (u16, u16) { 431 #[repr(C)] 432 #[derive(Default)] 433 struct WindowSize { 434 rows: u16, 435 cols: u16, 436 xpixel: u16, 437 ypixel: u16, 438 } 439 let ws: WindowSize = WindowSize::default(); 440 441 unsafe { 442 libc::ioctl(0, TIOCGWINSZ, &ws); 443 } 444 445 (ws.cols, ws.rows) 446 } 447 448 const TIOCSPTLCK: libc::c_int = 0x4004_5431; 449 const TIOCGTPEER: libc::c_int = 0x5441; 450 451 pub fn create_pty() -> io::Result<(File, File, PathBuf)> { 452 // Try to use /dev/pts/ptmx first then fall back to /dev/ptmx 453 // This is done to try and use the devpts filesystem that 454 // could be available for use in the process's namespace first. 455 // Ideally these are all the same file though but different 456 // kernels could have things setup differently. 457 // See https://www.kernel.org/doc/Documentation/filesystems/devpts.txt 458 // for further details. 459 let main = match OpenOptions::new() 460 .read(true) 461 .write(true) 462 .custom_flags(libc::O_NOCTTY) 463 .open("/dev/pts/ptmx") 464 { 465 Ok(f) => f, 466 _ => OpenOptions::new() 467 .read(true) 468 .write(true) 469 .custom_flags(libc::O_NOCTTY) 470 .open("/dev/ptmx")?, 471 }; 472 let mut unlock: libc::c_ulong = 0; 473 unsafe { 474 libc::ioctl( 475 main.as_raw_fd(), 476 TIOCSPTLCK.try_into().unwrap(), 477 &mut unlock, 478 ) 479 }; 480 481 let sub_fd = unsafe { 482 libc::ioctl( 483 main.as_raw_fd(), 484 TIOCGTPEER.try_into().unwrap(), 485 libc::O_NOCTTY | libc::O_RDWR, 486 ) 487 }; 488 if sub_fd == -1 { 489 return vmm_sys_util::errno::errno_result().map_err(|e| e.into()); 490 } 491 492 let proc_path = PathBuf::from(format!("/proc/self/fd/{}", sub_fd)); 493 let path = read_link(proc_path)?; 494 495 Ok((main, unsafe { File::from_raw_fd(sub_fd) }, path)) 496 } 497 498 enum ConsoleInput { 499 Serial, 500 VirtioConsole, 501 } 502 #[derive(Default)] 503 pub struct Console { 504 #[cfg(target_arch = "x86_64")] 505 // Serial port on 0x3f8 506 serial: Option<Arc<Mutex<Serial>>>, 507 #[cfg(target_arch = "aarch64")] 508 serial: Option<Arc<Mutex<Pl011>>>, 509 virtio_console_input: Option<Arc<virtio_devices::ConsoleInput>>, 510 input: Option<ConsoleInput>, 511 } 512 513 impl Console { 514 pub fn queue_input_bytes(&self, out: &[u8]) -> vmm_sys_util::errno::Result<()> { 515 match self.input { 516 Some(ConsoleInput::Serial) => { 517 self.queue_input_bytes_serial(out)?; 518 } 519 520 Some(ConsoleInput::VirtioConsole) => { 521 self.queue_input_bytes_console(out); 522 } 523 None => {} 524 } 525 526 Ok(()) 527 } 528 529 pub fn queue_input_bytes_serial(&self, out: &[u8]) -> vmm_sys_util::errno::Result<()> { 530 if self.serial.is_some() { 531 self.serial 532 .as_ref() 533 .unwrap() 534 .lock() 535 .unwrap() 536 .queue_input_bytes(out)?; 537 } 538 Ok(()) 539 } 540 541 pub fn queue_input_bytes_console(&self, out: &[u8]) { 542 if self.virtio_console_input.is_some() { 543 self.virtio_console_input 544 .as_ref() 545 .unwrap() 546 .queue_input_bytes(out); 547 } 548 } 549 550 pub fn update_console_size(&self, cols: u16, rows: u16) { 551 if self.virtio_console_input.is_some() { 552 self.virtio_console_input 553 .as_ref() 554 .unwrap() 555 .update_console_size(cols, rows) 556 } 557 } 558 559 pub fn input_enabled(&self) -> bool { 560 self.input.is_some() 561 } 562 } 563 564 struct AddressManager { 565 allocator: Arc<Mutex<SystemAllocator>>, 566 #[cfg(target_arch = "x86_64")] 567 io_bus: Arc<Bus>, 568 mmio_bus: Arc<Bus>, 569 vm: Arc<dyn hypervisor::Vm>, 570 device_tree: Arc<Mutex<DeviceTree>>, 571 } 572 573 impl DeviceRelocation for AddressManager { 574 fn move_bar( 575 &self, 576 old_base: u64, 577 new_base: u64, 578 len: u64, 579 pci_dev: &mut dyn PciDevice, 580 region_type: PciBarRegionType, 581 ) -> std::result::Result<(), std::io::Error> { 582 match region_type { 583 PciBarRegionType::IoRegion => { 584 #[cfg(target_arch = "x86_64")] 585 { 586 // Update system allocator 587 self.allocator 588 .lock() 589 .unwrap() 590 .free_io_addresses(GuestAddress(old_base), len as GuestUsize); 591 592 self.allocator 593 .lock() 594 .unwrap() 595 .allocate_io_addresses( 596 Some(GuestAddress(new_base)), 597 len as GuestUsize, 598 None, 599 ) 600 .ok_or_else(|| { 601 io::Error::new(io::ErrorKind::Other, "failed allocating new IO range") 602 })?; 603 604 // Update PIO bus 605 self.io_bus 606 .update_range(old_base, len, new_base, len) 607 .map_err(|e| io::Error::new(io::ErrorKind::Other, e))?; 608 } 609 #[cfg(target_arch = "aarch64")] 610 error!("I/O region is not supported"); 611 } 612 PciBarRegionType::Memory32BitRegion | PciBarRegionType::Memory64BitRegion => { 613 // Update system allocator 614 if region_type == PciBarRegionType::Memory32BitRegion { 615 self.allocator 616 .lock() 617 .unwrap() 618 .free_mmio_hole_addresses(GuestAddress(old_base), len as GuestUsize); 619 620 self.allocator 621 .lock() 622 .unwrap() 623 .allocate_mmio_hole_addresses( 624 Some(GuestAddress(new_base)), 625 len as GuestUsize, 626 None, 627 ) 628 .ok_or_else(|| { 629 io::Error::new( 630 io::ErrorKind::Other, 631 "failed allocating new 32 bits MMIO range", 632 ) 633 })?; 634 } else { 635 self.allocator 636 .lock() 637 .unwrap() 638 .free_mmio_addresses(GuestAddress(old_base), len as GuestUsize); 639 640 self.allocator 641 .lock() 642 .unwrap() 643 .allocate_mmio_addresses( 644 Some(GuestAddress(new_base)), 645 len as GuestUsize, 646 None, 647 ) 648 .ok_or_else(|| { 649 io::Error::new( 650 io::ErrorKind::Other, 651 "failed allocating new 64 bits MMIO range", 652 ) 653 })?; 654 } 655 656 // Update MMIO bus 657 self.mmio_bus 658 .update_range(old_base, len, new_base, len) 659 .map_err(|e| io::Error::new(io::ErrorKind::Other, e))?; 660 } 661 } 662 663 let any_dev = pci_dev.as_any(); 664 if let Some(virtio_pci_dev) = any_dev.downcast_ref::<VirtioPciDevice>() { 665 // Update the device_tree resources associated with the device 666 if let Some(node) = self 667 .device_tree 668 .lock() 669 .unwrap() 670 .get_mut(&virtio_pci_dev.id()) 671 { 672 let mut resource_updated = false; 673 for resource in node.resources.iter_mut() { 674 if let Resource::MmioAddressRange { base, .. } = resource { 675 if *base == old_base { 676 *base = new_base; 677 resource_updated = true; 678 break; 679 } 680 } 681 } 682 683 if !resource_updated { 684 return Err(io::Error::new( 685 io::ErrorKind::Other, 686 format!( 687 "Couldn't find a resource with base 0x{:x} for device {}", 688 old_base, 689 virtio_pci_dev.id() 690 ), 691 )); 692 } 693 } else { 694 return Err(io::Error::new( 695 io::ErrorKind::Other, 696 format!( 697 "Couldn't find device {} from device tree", 698 virtio_pci_dev.id() 699 ), 700 )); 701 } 702 703 let bar_addr = virtio_pci_dev.config_bar_addr(); 704 if bar_addr == new_base { 705 for (event, addr) in virtio_pci_dev.ioeventfds(old_base) { 706 let io_addr = IoEventAddress::Mmio(addr); 707 self.vm.unregister_ioevent(event, &io_addr).map_err(|e| { 708 io::Error::new( 709 io::ErrorKind::Other, 710 format!("failed to unregister ioevent: {:?}", e), 711 ) 712 })?; 713 } 714 for (event, addr) in virtio_pci_dev.ioeventfds(new_base) { 715 let io_addr = IoEventAddress::Mmio(addr); 716 self.vm 717 .register_ioevent(event, &io_addr, None) 718 .map_err(|e| { 719 io::Error::new( 720 io::ErrorKind::Other, 721 format!("failed to register ioevent: {:?}", e), 722 ) 723 })?; 724 } 725 } else { 726 let virtio_dev = virtio_pci_dev.virtio_device(); 727 let mut virtio_dev = virtio_dev.lock().unwrap(); 728 if let Some(mut shm_regions) = virtio_dev.get_shm_regions() { 729 if shm_regions.addr.raw_value() == old_base { 730 let mem_region = self.vm.make_user_memory_region( 731 shm_regions.mem_slot, 732 old_base, 733 shm_regions.len, 734 shm_regions.host_addr, 735 false, 736 false, 737 ); 738 739 self.vm.remove_user_memory_region(mem_region).map_err(|e| { 740 io::Error::new( 741 io::ErrorKind::Other, 742 format!("failed to remove user memory region: {:?}", e), 743 ) 744 })?; 745 746 // Create new mapping by inserting new region to KVM. 747 let mem_region = self.vm.make_user_memory_region( 748 shm_regions.mem_slot, 749 new_base, 750 shm_regions.len, 751 shm_regions.host_addr, 752 false, 753 false, 754 ); 755 756 self.vm.create_user_memory_region(mem_region).map_err(|e| { 757 io::Error::new( 758 io::ErrorKind::Other, 759 format!("failed to create user memory regions: {:?}", e), 760 ) 761 })?; 762 763 // Update shared memory regions to reflect the new mapping. 764 shm_regions.addr = GuestAddress(new_base); 765 virtio_dev.set_shm_regions(shm_regions).map_err(|e| { 766 io::Error::new( 767 io::ErrorKind::Other, 768 format!("failed to update shared memory regions: {:?}", e), 769 ) 770 })?; 771 } 772 } 773 } 774 } 775 776 pci_dev.move_bar(old_base, new_base) 777 } 778 } 779 780 #[derive(Serialize, Deserialize)] 781 struct DeviceManagerState { 782 device_tree: DeviceTree, 783 device_id_cnt: Wrapping<usize>, 784 } 785 786 #[derive(Debug)] 787 pub struct PtyPair { 788 pub main: File, 789 pub sub: File, 790 pub path: PathBuf, 791 } 792 793 impl PtyPair { 794 fn clone(&self) -> Self { 795 PtyPair { 796 main: self.main.try_clone().unwrap(), 797 sub: self.sub.try_clone().unwrap(), 798 path: self.path.clone(), 799 } 800 } 801 } 802 803 #[derive(Clone)] 804 pub enum PciDeviceHandle { 805 #[cfg(feature = "kvm")] 806 Vfio(Arc<Mutex<VfioPciDevice>>), 807 Virtio(Arc<Mutex<VirtioPciDevice>>), 808 } 809 810 pub struct DeviceManager { 811 // Manage address space related to devices 812 address_manager: Arc<AddressManager>, 813 814 // Console abstraction 815 console: Arc<Console>, 816 817 // console PTY 818 console_pty: Option<Arc<Mutex<PtyPair>>>, 819 820 // serial PTY 821 serial_pty: Option<Arc<Mutex<PtyPair>>>, 822 823 // Interrupt controller 824 #[cfg(target_arch = "x86_64")] 825 interrupt_controller: Option<Arc<Mutex<ioapic::Ioapic>>>, 826 #[cfg(target_arch = "aarch64")] 827 interrupt_controller: Option<Arc<Mutex<gic::Gic>>>, 828 829 // Things to be added to the commandline (i.e. for virtio-mmio) 830 cmdline_additions: Vec<String>, 831 832 // ACPI GED notification device 833 #[cfg(feature = "acpi")] 834 ged_notification_device: Option<Arc<Mutex<devices::AcpiGedDevice>>>, 835 836 // VM configuration 837 config: Arc<Mutex<VmConfig>>, 838 839 // Memory Manager 840 memory_manager: Arc<Mutex<MemoryManager>>, 841 842 // The virtio devices on the system 843 virtio_devices: Vec<(VirtioDeviceArc, bool, String)>, 844 845 // List of bus devices 846 // Let the DeviceManager keep strong references to the BusDevice devices. 847 // This allows the IO and MMIO buses to be provided with Weak references, 848 // which prevents cyclic dependencies. 849 bus_devices: Vec<Arc<Mutex<dyn BusDevice>>>, 850 851 // Counter to keep track of the consumed device IDs. 852 device_id_cnt: Wrapping<usize>, 853 854 // Keep a reference to the PCI bus 855 pci_bus: Option<Arc<Mutex<PciBus>>>, 856 857 #[cfg_attr(target_arch = "aarch64", allow(dead_code))] 858 // MSI Interrupt Manager 859 msi_interrupt_manager: Arc<dyn InterruptManager<GroupConfig = MsiIrqGroupConfig>>, 860 861 #[cfg_attr(feature = "mshv", allow(dead_code))] 862 // Legacy Interrupt Manager 863 legacy_interrupt_manager: Option<Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>>, 864 865 // Passthrough device handle 866 passthrough_device: Option<Arc<dyn hypervisor::Device>>, 867 868 // Paravirtualized IOMMU 869 iommu_device: Option<Arc<Mutex<virtio_devices::Iommu>>>, 870 871 // PCI information about devices attached to the paravirtualized IOMMU 872 // It contains the virtual IOMMU PCI BDF along with the list of PCI BDF 873 // representing the devices attached to the virtual IOMMU. This is useful 874 // information for filling the ACPI VIOT table. 875 iommu_attached_devices: Option<(u32, Vec<u32>)>, 876 877 // Bitmap of PCI devices to hotplug. 878 pci_devices_up: u32, 879 880 // Bitmap of PCI devices to hotunplug. 881 pci_devices_down: u32, 882 883 // List of allocated IRQs for each PCI slot. 884 pci_irq_slots: [u8; 32], 885 886 // Tree of devices, representing the dependencies between devices. 887 // Useful for introspection, snapshot and restore. 888 device_tree: Arc<Mutex<DeviceTree>>, 889 890 // Exit event 891 #[cfg(feature = "acpi")] 892 exit_evt: EventFd, 893 894 reset_evt: EventFd, 895 896 #[cfg(target_arch = "aarch64")] 897 id_to_dev_info: HashMap<(DeviceType, String), MmioDeviceInfo>, 898 899 // seccomp action 900 seccomp_action: SeccompAction, 901 902 // List of guest NUMA nodes. 903 #[cfg(feature = "acpi")] 904 numa_nodes: NumaNodes, 905 906 // Possible handle to the virtio-balloon device 907 balloon: Option<Arc<Mutex<virtio_devices::Balloon>>>, 908 909 // Virtio Device activation EventFd to allow the VMM thread to trigger device 910 // activation and thus start the threads from the VMM thread 911 activate_evt: EventFd, 912 913 #[cfg(feature = "acpi")] 914 acpi_address: GuestAddress, 915 916 // Possible handle to the virtio-balloon device 917 virtio_mem_devices: Vec<Arc<Mutex<virtio_devices::Mem>>>, 918 919 #[cfg(target_arch = "aarch64")] 920 // GPIO device for AArch64 921 gpio_device: Option<Arc<Mutex<devices::legacy::Gpio>>>, 922 923 // Flag to force setting the iommu on virtio devices 924 force_iommu: bool, 925 } 926 927 impl DeviceManager { 928 #[allow(clippy::too_many_arguments)] 929 pub fn new( 930 vm: Arc<dyn hypervisor::Vm>, 931 config: Arc<Mutex<VmConfig>>, 932 memory_manager: Arc<Mutex<MemoryManager>>, 933 _exit_evt: &EventFd, 934 reset_evt: &EventFd, 935 seccomp_action: SeccompAction, 936 #[cfg(feature = "acpi")] numa_nodes: NumaNodes, 937 activate_evt: &EventFd, 938 force_iommu: bool, 939 ) -> DeviceManagerResult<Arc<Mutex<Self>>> { 940 let device_tree = Arc::new(Mutex::new(DeviceTree::new())); 941 942 let address_manager = Arc::new(AddressManager { 943 allocator: memory_manager.lock().unwrap().allocator(), 944 #[cfg(target_arch = "x86_64")] 945 io_bus: Arc::new(Bus::new()), 946 mmio_bus: Arc::new(Bus::new()), 947 vm: vm.clone(), 948 device_tree: Arc::clone(&device_tree), 949 }); 950 951 // First we create the MSI interrupt manager, the legacy one is created 952 // later, after the IOAPIC device creation. 953 // The reason we create the MSI one first is because the IOAPIC needs it, 954 // and then the legacy interrupt manager needs an IOAPIC. So we're 955 // handling a linear dependency chain: 956 // msi_interrupt_manager <- IOAPIC <- legacy_interrupt_manager. 957 let msi_interrupt_manager: Arc<dyn InterruptManager<GroupConfig = MsiIrqGroupConfig>> = 958 Arc::new(MsiInterruptManager::new( 959 Arc::clone(&address_manager.allocator), 960 vm, 961 )); 962 963 #[cfg(feature = "acpi")] 964 let acpi_address = address_manager 965 .allocator 966 .lock() 967 .unwrap() 968 .allocate_mmio_addresses(None, DEVICE_MANAGER_ACPI_SIZE as u64, None) 969 .ok_or(DeviceManagerError::AllocateIoPort)?; 970 let device_manager = DeviceManager { 971 address_manager: Arc::clone(&address_manager), 972 console: Arc::new(Console::default()), 973 interrupt_controller: None, 974 cmdline_additions: Vec::new(), 975 #[cfg(feature = "acpi")] 976 ged_notification_device: None, 977 config, 978 memory_manager, 979 virtio_devices: Vec::new(), 980 bus_devices: Vec::new(), 981 device_id_cnt: Wrapping(0), 982 pci_bus: None, 983 msi_interrupt_manager, 984 legacy_interrupt_manager: None, 985 passthrough_device: None, 986 iommu_device: None, 987 iommu_attached_devices: None, 988 pci_devices_up: 0, 989 pci_devices_down: 0, 990 pci_irq_slots: [0; 32], 991 device_tree, 992 #[cfg(feature = "acpi")] 993 exit_evt: _exit_evt.try_clone().map_err(DeviceManagerError::EventFd)?, 994 reset_evt: reset_evt.try_clone().map_err(DeviceManagerError::EventFd)?, 995 #[cfg(target_arch = "aarch64")] 996 id_to_dev_info: HashMap::new(), 997 seccomp_action, 998 #[cfg(feature = "acpi")] 999 numa_nodes, 1000 balloon: None, 1001 activate_evt: activate_evt 1002 .try_clone() 1003 .map_err(DeviceManagerError::EventFd)?, 1004 #[cfg(feature = "acpi")] 1005 acpi_address, 1006 serial_pty: None, 1007 console_pty: None, 1008 virtio_mem_devices: Vec::new(), 1009 #[cfg(target_arch = "aarch64")] 1010 gpio_device: None, 1011 force_iommu, 1012 }; 1013 1014 let device_manager = Arc::new(Mutex::new(device_manager)); 1015 1016 #[cfg(feature = "acpi")] 1017 address_manager 1018 .mmio_bus 1019 .insert( 1020 Arc::clone(&device_manager) as Arc<Mutex<dyn BusDevice>>, 1021 acpi_address.0, 1022 DEVICE_MANAGER_ACPI_SIZE as u64, 1023 ) 1024 .map_err(DeviceManagerError::BusError)?; 1025 1026 Ok(device_manager) 1027 } 1028 1029 pub fn serial_pty(&self) -> Option<PtyPair> { 1030 self.serial_pty 1031 .as_ref() 1032 .map(|pty| pty.lock().unwrap().clone()) 1033 } 1034 1035 pub fn console_pty(&self) -> Option<PtyPair> { 1036 self.console_pty 1037 .as_ref() 1038 .map(|pty| pty.lock().unwrap().clone()) 1039 } 1040 1041 pub fn create_devices( 1042 &mut self, 1043 serial_pty: Option<PtyPair>, 1044 console_pty: Option<PtyPair>, 1045 ) -> DeviceManagerResult<()> { 1046 let mut virtio_devices: Vec<(VirtioDeviceArc, bool, String)> = Vec::new(); 1047 1048 let interrupt_controller = self.add_interrupt_controller()?; 1049 1050 // Now we can create the legacy interrupt manager, which needs the freshly 1051 // formed IOAPIC device. 1052 let legacy_interrupt_manager: Arc< 1053 dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>, 1054 > = Arc::new(LegacyUserspaceInterruptManager::new(Arc::clone( 1055 &interrupt_controller, 1056 ))); 1057 1058 #[cfg(feature = "acpi")] 1059 { 1060 let memory_manager_acpi_address = self.memory_manager.lock().unwrap().acpi_address; 1061 self.address_manager 1062 .mmio_bus 1063 .insert( 1064 Arc::clone(&self.memory_manager) as Arc<Mutex<dyn BusDevice>>, 1065 memory_manager_acpi_address.0, 1066 MEMORY_MANAGER_ACPI_SIZE as u64, 1067 ) 1068 .map_err(DeviceManagerError::BusError)?; 1069 } 1070 1071 #[cfg(target_arch = "x86_64")] 1072 self.add_legacy_devices( 1073 self.reset_evt 1074 .try_clone() 1075 .map_err(DeviceManagerError::EventFd)?, 1076 )?; 1077 1078 #[cfg(target_arch = "aarch64")] 1079 self.add_legacy_devices(&legacy_interrupt_manager)?; 1080 1081 #[cfg(feature = "acpi")] 1082 { 1083 self.ged_notification_device = self.add_acpi_devices( 1084 &legacy_interrupt_manager, 1085 self.reset_evt 1086 .try_clone() 1087 .map_err(DeviceManagerError::EventFd)?, 1088 self.exit_evt 1089 .try_clone() 1090 .map_err(DeviceManagerError::EventFd)?, 1091 )?; 1092 } 1093 1094 self.console = self.add_console_device( 1095 &legacy_interrupt_manager, 1096 &mut virtio_devices, 1097 serial_pty, 1098 console_pty, 1099 )?; 1100 1101 // Reserve some IRQs for PCI devices in case they need to support INTx. 1102 self.reserve_legacy_interrupts_for_pci_devices()?; 1103 1104 self.legacy_interrupt_manager = Some(legacy_interrupt_manager); 1105 1106 virtio_devices.append(&mut self.make_virtio_devices()?); 1107 1108 self.add_pci_devices(virtio_devices.clone())?; 1109 1110 self.virtio_devices = virtio_devices; 1111 1112 Ok(()) 1113 } 1114 1115 fn reserve_legacy_interrupts_for_pci_devices(&mut self) -> DeviceManagerResult<()> { 1116 // Reserve 8 IRQs which will be shared across all PCI devices. 1117 let num_irqs = 8; 1118 let mut irqs: Vec<u8> = Vec::new(); 1119 for _ in 0..num_irqs { 1120 irqs.push( 1121 self.address_manager 1122 .allocator 1123 .lock() 1124 .unwrap() 1125 .allocate_irq() 1126 .ok_or(DeviceManagerError::AllocateIrq)? as u8, 1127 ); 1128 } 1129 1130 // There are 32 devices on the PCI bus, let's assign them an IRQ. 1131 for i in 0..32 { 1132 self.pci_irq_slots[i] = irqs[(i % num_irqs) as usize]; 1133 } 1134 1135 Ok(()) 1136 } 1137 1138 fn state(&self) -> DeviceManagerState { 1139 DeviceManagerState { 1140 device_tree: self.device_tree.lock().unwrap().clone(), 1141 device_id_cnt: self.device_id_cnt, 1142 } 1143 } 1144 1145 fn set_state(&mut self, state: &DeviceManagerState) { 1146 self.device_tree = Arc::new(Mutex::new(state.device_tree.clone())); 1147 self.device_id_cnt = state.device_id_cnt; 1148 } 1149 1150 #[cfg(target_arch = "aarch64")] 1151 /// Gets the information of the devices registered up to some point in time. 1152 pub fn get_device_info(&self) -> &HashMap<(DeviceType, String), MmioDeviceInfo> { 1153 &self.id_to_dev_info 1154 } 1155 1156 #[allow(unused_variables)] 1157 fn add_pci_devices( 1158 &mut self, 1159 virtio_devices: Vec<(VirtioDeviceArc, bool, String)>, 1160 ) -> DeviceManagerResult<()> { 1161 let pci_root = PciRoot::new(None); 1162 let mut pci_bus = PciBus::new( 1163 pci_root, 1164 Arc::clone(&self.address_manager) as Arc<dyn DeviceRelocation>, 1165 ); 1166 1167 let iommu_id = String::from(IOMMU_DEVICE_NAME); 1168 1169 let (iommu_device, iommu_mapping) = if self.config.lock().unwrap().iommu { 1170 let (device, mapping) = 1171 virtio_devices::Iommu::new(iommu_id.clone(), self.seccomp_action.clone()) 1172 .map_err(DeviceManagerError::CreateVirtioIommu)?; 1173 let device = Arc::new(Mutex::new(device)); 1174 self.iommu_device = Some(Arc::clone(&device)); 1175 1176 // Fill the device tree with a new node. In case of restore, we 1177 // know there is nothing to do, so we can simply override the 1178 // existing entry. 1179 self.device_tree 1180 .lock() 1181 .unwrap() 1182 .insert(iommu_id.clone(), device_node!(iommu_id, device)); 1183 1184 (Some(device), Some(mapping)) 1185 } else { 1186 (None, None) 1187 }; 1188 1189 let mut iommu_attached_devices = Vec::new(); 1190 1191 for (device, iommu_attached, id) in virtio_devices { 1192 let mapping: &Option<Arc<IommuMapping>> = if iommu_attached { 1193 &iommu_mapping 1194 } else { 1195 &None 1196 }; 1197 1198 let dev_id = self.add_virtio_pci_device(device, &mut pci_bus, mapping, id)?; 1199 1200 if iommu_attached { 1201 iommu_attached_devices.push(dev_id); 1202 } 1203 } 1204 1205 let mut vfio_iommu_device_ids = self.add_vfio_devices(&mut pci_bus)?; 1206 1207 iommu_attached_devices.append(&mut vfio_iommu_device_ids); 1208 1209 if let Some(iommu_device) = iommu_device { 1210 let dev_id = self.add_virtio_pci_device(iommu_device, &mut pci_bus, &None, iommu_id)?; 1211 self.iommu_attached_devices = Some((dev_id, iommu_attached_devices)); 1212 } 1213 1214 let pci_bus = Arc::new(Mutex::new(pci_bus)); 1215 let pci_config_io = Arc::new(Mutex::new(PciConfigIo::new(Arc::clone(&pci_bus)))); 1216 self.bus_devices 1217 .push(Arc::clone(&pci_config_io) as Arc<Mutex<dyn BusDevice>>); 1218 #[cfg(target_arch = "x86_64")] 1219 self.address_manager 1220 .io_bus 1221 .insert(pci_config_io, 0xcf8, 0x8) 1222 .map_err(DeviceManagerError::BusError)?; 1223 let pci_config_mmio = Arc::new(Mutex::new(PciConfigMmio::new(Arc::clone(&pci_bus)))); 1224 self.bus_devices 1225 .push(Arc::clone(&pci_config_mmio) as Arc<Mutex<dyn BusDevice>>); 1226 self.address_manager 1227 .mmio_bus 1228 .insert( 1229 pci_config_mmio, 1230 arch::layout::PCI_MMCONFIG_START.0, 1231 arch::layout::PCI_MMCONFIG_SIZE, 1232 ) 1233 .map_err(DeviceManagerError::BusError)?; 1234 1235 self.pci_bus = Some(pci_bus); 1236 1237 Ok(()) 1238 } 1239 1240 #[cfg(target_arch = "aarch64")] 1241 fn add_interrupt_controller( 1242 &mut self, 1243 ) -> DeviceManagerResult<Arc<Mutex<dyn InterruptController>>> { 1244 let interrupt_controller: Arc<Mutex<gic::Gic>> = Arc::new(Mutex::new( 1245 gic::Gic::new( 1246 self.config.lock().unwrap().cpus.boot_vcpus, 1247 Arc::clone(&self.msi_interrupt_manager), 1248 ) 1249 .map_err(DeviceManagerError::CreateInterruptController)?, 1250 )); 1251 1252 self.interrupt_controller = Some(interrupt_controller.clone()); 1253 1254 // Unlike x86_64, the "interrupt_controller" here for AArch64 is only 1255 // a `Gic` object that implements the `InterruptController` to provide 1256 // interrupt delivery service. This is not the real GIC device so that 1257 // we do not need to insert it to the device tree. 1258 1259 Ok(interrupt_controller) 1260 } 1261 1262 #[cfg(target_arch = "aarch64")] 1263 pub fn get_interrupt_controller(&mut self) -> Option<&Arc<Mutex<gic::Gic>>> { 1264 self.interrupt_controller.as_ref() 1265 } 1266 1267 #[cfg(target_arch = "x86_64")] 1268 fn add_interrupt_controller( 1269 &mut self, 1270 ) -> DeviceManagerResult<Arc<Mutex<dyn InterruptController>>> { 1271 let id = String::from(IOAPIC_DEVICE_NAME); 1272 1273 // Create IOAPIC 1274 let interrupt_controller = Arc::new(Mutex::new( 1275 ioapic::Ioapic::new( 1276 id.clone(), 1277 APIC_START, 1278 Arc::clone(&self.msi_interrupt_manager), 1279 ) 1280 .map_err(DeviceManagerError::CreateInterruptController)?, 1281 )); 1282 1283 self.interrupt_controller = Some(interrupt_controller.clone()); 1284 1285 self.address_manager 1286 .mmio_bus 1287 .insert(interrupt_controller.clone(), IOAPIC_START.0, IOAPIC_SIZE) 1288 .map_err(DeviceManagerError::BusError)?; 1289 1290 self.bus_devices 1291 .push(Arc::clone(&interrupt_controller) as Arc<Mutex<dyn BusDevice>>); 1292 1293 // Fill the device tree with a new node. In case of restore, we 1294 // know there is nothing to do, so we can simply override the 1295 // existing entry. 1296 self.device_tree 1297 .lock() 1298 .unwrap() 1299 .insert(id.clone(), device_node!(id, interrupt_controller)); 1300 1301 Ok(interrupt_controller) 1302 } 1303 1304 #[cfg(feature = "acpi")] 1305 fn add_acpi_devices( 1306 &mut self, 1307 interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>, 1308 reset_evt: EventFd, 1309 exit_evt: EventFd, 1310 ) -> DeviceManagerResult<Option<Arc<Mutex<devices::AcpiGedDevice>>>> { 1311 let shutdown_device = Arc::new(Mutex::new(devices::AcpiShutdownDevice::new( 1312 exit_evt, reset_evt, 1313 ))); 1314 1315 self.bus_devices 1316 .push(Arc::clone(&shutdown_device) as Arc<Mutex<dyn BusDevice>>); 1317 1318 #[cfg(target_arch = "x86_64")] 1319 { 1320 self.address_manager 1321 .allocator 1322 .lock() 1323 .unwrap() 1324 .allocate_io_addresses(Some(GuestAddress(0x3c0)), 0x8, None) 1325 .ok_or(DeviceManagerError::AllocateIoPort)?; 1326 1327 self.address_manager 1328 .io_bus 1329 .insert(shutdown_device, 0x3c0, 0x4) 1330 .map_err(DeviceManagerError::BusError)?; 1331 } 1332 1333 let ged_irq = self 1334 .address_manager 1335 .allocator 1336 .lock() 1337 .unwrap() 1338 .allocate_irq() 1339 .unwrap(); 1340 let interrupt_group = interrupt_manager 1341 .create_group(LegacyIrqGroupConfig { 1342 irq: ged_irq as InterruptIndex, 1343 }) 1344 .map_err(DeviceManagerError::CreateInterruptGroup)?; 1345 let ged_address = self 1346 .address_manager 1347 .allocator 1348 .lock() 1349 .unwrap() 1350 .allocate_mmio_addresses(None, devices::acpi::GED_DEVICE_ACPI_SIZE as u64, None) 1351 .ok_or(DeviceManagerError::AllocateMmioAddress)?; 1352 let ged_device = Arc::new(Mutex::new(devices::AcpiGedDevice::new( 1353 interrupt_group, 1354 ged_irq, 1355 ged_address, 1356 ))); 1357 self.address_manager 1358 .mmio_bus 1359 .insert( 1360 ged_device.clone(), 1361 ged_address.0, 1362 devices::acpi::GED_DEVICE_ACPI_SIZE as u64, 1363 ) 1364 .map_err(DeviceManagerError::BusError)?; 1365 self.bus_devices 1366 .push(Arc::clone(&ged_device) as Arc<Mutex<dyn BusDevice>>); 1367 1368 let pm_timer_device = Arc::new(Mutex::new(devices::AcpiPmTimerDevice::new())); 1369 1370 self.bus_devices 1371 .push(Arc::clone(&pm_timer_device) as Arc<Mutex<dyn BusDevice>>); 1372 1373 #[cfg(target_arch = "x86_64")] 1374 { 1375 self.address_manager 1376 .allocator 1377 .lock() 1378 .unwrap() 1379 .allocate_io_addresses(Some(GuestAddress(0xb008)), 0x4, None) 1380 .ok_or(DeviceManagerError::AllocateIoPort)?; 1381 1382 self.address_manager 1383 .io_bus 1384 .insert(pm_timer_device, 0xb008, 0x4) 1385 .map_err(DeviceManagerError::BusError)?; 1386 } 1387 1388 Ok(Some(ged_device)) 1389 } 1390 1391 #[cfg(target_arch = "x86_64")] 1392 fn add_legacy_devices(&mut self, reset_evt: EventFd) -> DeviceManagerResult<()> { 1393 // Add a shutdown device (i8042) 1394 let i8042 = Arc::new(Mutex::new(devices::legacy::I8042Device::new(reset_evt))); 1395 1396 self.bus_devices 1397 .push(Arc::clone(&i8042) as Arc<Mutex<dyn BusDevice>>); 1398 1399 self.address_manager 1400 .io_bus 1401 .insert(i8042, 0x61, 0x4) 1402 .map_err(DeviceManagerError::BusError)?; 1403 #[cfg(feature = "cmos")] 1404 { 1405 // Add a CMOS emulated device 1406 let mem_size = self 1407 .memory_manager 1408 .lock() 1409 .unwrap() 1410 .guest_memory() 1411 .memory() 1412 .last_addr() 1413 .0 1414 + 1; 1415 let mem_below_4g = std::cmp::min(arch::layout::MEM_32BIT_RESERVED_START.0, mem_size); 1416 let mem_above_4g = mem_size.saturating_sub(arch::layout::RAM_64BIT_START.0); 1417 1418 let cmos = Arc::new(Mutex::new(devices::legacy::Cmos::new( 1419 mem_below_4g, 1420 mem_above_4g, 1421 ))); 1422 1423 self.bus_devices 1424 .push(Arc::clone(&cmos) as Arc<Mutex<dyn BusDevice>>); 1425 1426 self.address_manager 1427 .io_bus 1428 .insert(cmos, 0x70, 0x2) 1429 .map_err(DeviceManagerError::BusError)?; 1430 } 1431 #[cfg(feature = "fwdebug")] 1432 { 1433 let fwdebug = Arc::new(Mutex::new(devices::legacy::FwDebugDevice::new())); 1434 1435 self.bus_devices 1436 .push(Arc::clone(&fwdebug) as Arc<Mutex<dyn BusDevice>>); 1437 1438 self.address_manager 1439 .io_bus 1440 .insert(fwdebug, 0x402, 0x1) 1441 .map_err(DeviceManagerError::BusError)?; 1442 } 1443 1444 Ok(()) 1445 } 1446 1447 #[cfg(target_arch = "aarch64")] 1448 fn add_legacy_devices( 1449 &mut self, 1450 interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>, 1451 ) -> DeviceManagerResult<()> { 1452 // Add a RTC device 1453 let rtc_irq = self 1454 .address_manager 1455 .allocator 1456 .lock() 1457 .unwrap() 1458 .allocate_irq() 1459 .unwrap(); 1460 1461 let interrupt_group = interrupt_manager 1462 .create_group(LegacyIrqGroupConfig { 1463 irq: rtc_irq as InterruptIndex, 1464 }) 1465 .map_err(DeviceManagerError::CreateInterruptGroup)?; 1466 1467 let rtc_device = Arc::new(Mutex::new(devices::legacy::Rtc::new(interrupt_group))); 1468 1469 self.bus_devices 1470 .push(Arc::clone(&rtc_device) as Arc<Mutex<dyn BusDevice>>); 1471 1472 let addr = GuestAddress(arch::layout::LEGACY_RTC_MAPPED_IO_START); 1473 1474 self.address_manager 1475 .mmio_bus 1476 .insert(rtc_device, addr.0, MMIO_LEN) 1477 .map_err(DeviceManagerError::BusError)?; 1478 1479 self.id_to_dev_info.insert( 1480 (DeviceType::Rtc, "rtc".to_string()), 1481 MmioDeviceInfo { 1482 addr: addr.0, 1483 irq: rtc_irq, 1484 }, 1485 ); 1486 1487 // Add a GPIO device 1488 let id = String::from(GPIO_DEVICE_NAME_PREFIX); 1489 let gpio_irq = self 1490 .address_manager 1491 .allocator 1492 .lock() 1493 .unwrap() 1494 .allocate_irq() 1495 .unwrap(); 1496 1497 let interrupt_group = interrupt_manager 1498 .create_group(LegacyIrqGroupConfig { 1499 irq: gpio_irq as InterruptIndex, 1500 }) 1501 .map_err(DeviceManagerError::CreateInterruptGroup)?; 1502 1503 let gpio_device = Arc::new(Mutex::new(devices::legacy::Gpio::new( 1504 id.clone(), 1505 interrupt_group, 1506 ))); 1507 1508 self.bus_devices 1509 .push(Arc::clone(&gpio_device) as Arc<Mutex<dyn BusDevice>>); 1510 1511 let addr = GuestAddress(arch::layout::LEGACY_GPIO_MAPPED_IO_START); 1512 1513 self.address_manager 1514 .mmio_bus 1515 .insert(gpio_device.clone(), addr.0, MMIO_LEN) 1516 .map_err(DeviceManagerError::BusError)?; 1517 1518 self.gpio_device = Some(gpio_device.clone()); 1519 1520 self.id_to_dev_info.insert( 1521 (DeviceType::Gpio, "gpio".to_string()), 1522 MmioDeviceInfo { 1523 addr: addr.0, 1524 irq: gpio_irq, 1525 }, 1526 ); 1527 1528 self.device_tree 1529 .lock() 1530 .unwrap() 1531 .insert(id.clone(), device_node!(id, gpio_device)); 1532 1533 Ok(()) 1534 } 1535 1536 #[cfg(target_arch = "x86_64")] 1537 fn add_serial_device( 1538 &mut self, 1539 interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>, 1540 serial_writer: Option<Box<dyn io::Write + Send>>, 1541 ) -> DeviceManagerResult<Arc<Mutex<Serial>>> { 1542 // Serial is tied to IRQ #4 1543 let serial_irq = 4; 1544 1545 let id = String::from(SERIAL_DEVICE_NAME_PREFIX); 1546 1547 let interrupt_group = interrupt_manager 1548 .create_group(LegacyIrqGroupConfig { 1549 irq: serial_irq as InterruptIndex, 1550 }) 1551 .map_err(DeviceManagerError::CreateInterruptGroup)?; 1552 1553 let serial = Arc::new(Mutex::new(Serial::new( 1554 id.clone(), 1555 interrupt_group, 1556 serial_writer, 1557 ))); 1558 1559 self.bus_devices 1560 .push(Arc::clone(&serial) as Arc<Mutex<dyn BusDevice>>); 1561 1562 self.address_manager 1563 .allocator 1564 .lock() 1565 .unwrap() 1566 .allocate_io_addresses(Some(GuestAddress(0x3f8)), 0x8, None) 1567 .ok_or(DeviceManagerError::AllocateIoPort)?; 1568 1569 self.address_manager 1570 .io_bus 1571 .insert(serial.clone(), 0x3f8, 0x8) 1572 .map_err(DeviceManagerError::BusError)?; 1573 1574 // Fill the device tree with a new node. In case of restore, we 1575 // know there is nothing to do, so we can simply override the 1576 // existing entry. 1577 self.device_tree 1578 .lock() 1579 .unwrap() 1580 .insert(id.clone(), device_node!(id, serial)); 1581 1582 Ok(serial) 1583 } 1584 1585 #[cfg(target_arch = "aarch64")] 1586 fn add_serial_device( 1587 &mut self, 1588 interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>, 1589 serial_writer: Option<Box<dyn io::Write + Send>>, 1590 ) -> DeviceManagerResult<Arc<Mutex<Pl011>>> { 1591 let id = String::from(SERIAL_DEVICE_NAME_PREFIX); 1592 1593 let serial_irq = self 1594 .address_manager 1595 .allocator 1596 .lock() 1597 .unwrap() 1598 .allocate_irq() 1599 .unwrap(); 1600 1601 let interrupt_group = interrupt_manager 1602 .create_group(LegacyIrqGroupConfig { 1603 irq: serial_irq as InterruptIndex, 1604 }) 1605 .map_err(DeviceManagerError::CreateInterruptGroup)?; 1606 1607 let serial = Arc::new(Mutex::new(devices::legacy::Pl011::new( 1608 id.clone(), 1609 interrupt_group, 1610 serial_writer, 1611 ))); 1612 1613 self.bus_devices 1614 .push(Arc::clone(&serial) as Arc<Mutex<dyn BusDevice>>); 1615 1616 let addr = GuestAddress(arch::layout::LEGACY_SERIAL_MAPPED_IO_START); 1617 1618 self.address_manager 1619 .mmio_bus 1620 .insert(serial.clone(), addr.0, MMIO_LEN) 1621 .map_err(DeviceManagerError::BusError)?; 1622 1623 self.id_to_dev_info.insert( 1624 (DeviceType::Serial, DeviceType::Serial.to_string()), 1625 MmioDeviceInfo { 1626 addr: addr.0, 1627 irq: serial_irq, 1628 }, 1629 ); 1630 1631 self.cmdline_additions 1632 .push(format!("earlycon=pl011,mmio,0x{:08x}", addr.0)); 1633 1634 // Fill the device tree with a new node. In case of restore, we 1635 // know there is nothing to do, so we can simply override the 1636 // existing entry. 1637 self.device_tree 1638 .lock() 1639 .unwrap() 1640 .insert(id.clone(), device_node!(id, serial)); 1641 1642 Ok(serial) 1643 } 1644 1645 fn modify_mode<F: FnOnce(&mut termios)>( 1646 &self, 1647 fd: RawFd, 1648 f: F, 1649 ) -> vmm_sys_util::errno::Result<()> { 1650 // Safe because we check the return value of isatty. 1651 if unsafe { isatty(fd) } != 1 { 1652 return Ok(()); 1653 } 1654 1655 // The following pair are safe because termios gets totally overwritten by tcgetattr and we 1656 // check the return result. 1657 let mut termios: termios = unsafe { zeroed() }; 1658 let ret = unsafe { tcgetattr(fd, &mut termios as *mut _) }; 1659 if ret < 0 { 1660 return vmm_sys_util::errno::errno_result(); 1661 } 1662 f(&mut termios); 1663 // Safe because the syscall will only read the extent of termios and we check the return result. 1664 let ret = unsafe { tcsetattr(fd, TCSANOW, &termios as *const _) }; 1665 if ret < 0 { 1666 return vmm_sys_util::errno::errno_result(); 1667 } 1668 1669 Ok(()) 1670 } 1671 1672 fn set_raw_mode(&self, f: &mut File) -> vmm_sys_util::errno::Result<()> { 1673 self.modify_mode(f.as_raw_fd(), |t| t.c_lflag &= !(ICANON | ECHO | ISIG)) 1674 } 1675 1676 fn add_console_device( 1677 &mut self, 1678 interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>, 1679 virtio_devices: &mut Vec<(VirtioDeviceArc, bool, String)>, 1680 serial_pty: Option<PtyPair>, 1681 console_pty: Option<PtyPair>, 1682 ) -> DeviceManagerResult<Arc<Console>> { 1683 let serial_config = self.config.lock().unwrap().serial.clone(); 1684 let serial_writer: Option<Box<dyn io::Write + Send>> = match serial_config.mode { 1685 ConsoleOutputMode::File => Some(Box::new( 1686 File::create(serial_config.file.as_ref().unwrap()) 1687 .map_err(DeviceManagerError::SerialOutputFileOpen)?, 1688 )), 1689 ConsoleOutputMode::Pty => { 1690 if let Some(pty) = serial_pty { 1691 self.config.lock().unwrap().serial.file = Some(pty.path.clone()); 1692 let writer = pty.main.try_clone().unwrap(); 1693 self.serial_pty = Some(Arc::new(Mutex::new(pty))); 1694 Some(Box::new(writer)) 1695 } else { 1696 let (main, mut sub, path) = 1697 create_pty().map_err(DeviceManagerError::SerialPtyOpen)?; 1698 self.set_raw_mode(&mut sub) 1699 .map_err(DeviceManagerError::SetPtyRaw)?; 1700 self.config.lock().unwrap().serial.file = Some(path.clone()); 1701 let writer = main.try_clone().unwrap(); 1702 self.serial_pty = Some(Arc::new(Mutex::new(PtyPair { main, sub, path }))); 1703 Some(Box::new(writer)) 1704 } 1705 } 1706 ConsoleOutputMode::Tty => Some(Box::new(stdout())), 1707 ConsoleOutputMode::Off | ConsoleOutputMode::Null => None, 1708 }; 1709 let serial = if serial_config.mode != ConsoleOutputMode::Off { 1710 Some(self.add_serial_device(interrupt_manager, serial_writer)?) 1711 } else { 1712 None 1713 }; 1714 1715 // Create serial and virtio-console 1716 let console_config = self.config.lock().unwrap().console.clone(); 1717 let console_writer: Option<Box<dyn io::Write + Send + Sync>> = match console_config.mode { 1718 ConsoleOutputMode::File => Some(Box::new( 1719 File::create(console_config.file.as_ref().unwrap()) 1720 .map_err(DeviceManagerError::ConsoleOutputFileOpen)?, 1721 )), 1722 ConsoleOutputMode::Pty => { 1723 if let Some(pty) = console_pty { 1724 self.config.lock().unwrap().console.file = Some(pty.path.clone()); 1725 let writer = pty.main.try_clone().unwrap(); 1726 self.console_pty = Some(Arc::new(Mutex::new(pty))); 1727 Some(Box::new(writer)) 1728 } else { 1729 let (main, mut sub, path) = 1730 create_pty().map_err(DeviceManagerError::ConsolePtyOpen)?; 1731 self.set_raw_mode(&mut sub) 1732 .map_err(DeviceManagerError::SetPtyRaw)?; 1733 self.config.lock().unwrap().console.file = Some(path.clone()); 1734 let writer = main.try_clone().unwrap(); 1735 self.console_pty = Some(Arc::new(Mutex::new(PtyPair { main, sub, path }))); 1736 Some(Box::new(writer)) 1737 } 1738 } 1739 ConsoleOutputMode::Tty => Some(Box::new(stdout())), 1740 ConsoleOutputMode::Null => Some(Box::new(sink())), 1741 ConsoleOutputMode::Off => None, 1742 }; 1743 let (col, row) = get_win_size(); 1744 let virtio_console_input = if let Some(writer) = console_writer { 1745 let id = String::from(CONSOLE_DEVICE_NAME); 1746 1747 let (virtio_console_device, virtio_console_input) = virtio_devices::Console::new( 1748 id.clone(), 1749 writer, 1750 col, 1751 row, 1752 self.force_iommu | console_config.iommu, 1753 self.seccomp_action.clone(), 1754 ) 1755 .map_err(DeviceManagerError::CreateVirtioConsole)?; 1756 let virtio_console_device = Arc::new(Mutex::new(virtio_console_device)); 1757 virtio_devices.push(( 1758 Arc::clone(&virtio_console_device) as VirtioDeviceArc, 1759 console_config.iommu, 1760 id.clone(), 1761 )); 1762 1763 // Fill the device tree with a new node. In case of restore, we 1764 // know there is nothing to do, so we can simply override the 1765 // existing entry. 1766 self.device_tree 1767 .lock() 1768 .unwrap() 1769 .insert(id.clone(), device_node!(id, virtio_console_device)); 1770 1771 Some(virtio_console_input) 1772 } else { 1773 None 1774 }; 1775 1776 let input = if serial_config.mode.input_enabled() { 1777 Some(ConsoleInput::Serial) 1778 } else if console_config.mode.input_enabled() { 1779 Some(ConsoleInput::VirtioConsole) 1780 } else { 1781 None 1782 }; 1783 1784 Ok(Arc::new(Console { 1785 serial, 1786 virtio_console_input, 1787 input, 1788 })) 1789 } 1790 1791 fn make_virtio_devices(&mut self) -> DeviceManagerResult<Vec<(VirtioDeviceArc, bool, String)>> { 1792 let mut devices: Vec<(VirtioDeviceArc, bool, String)> = Vec::new(); 1793 1794 // Create "standard" virtio devices (net/block/rng) 1795 devices.append(&mut self.make_virtio_block_devices()?); 1796 devices.append(&mut self.make_virtio_net_devices()?); 1797 devices.append(&mut self.make_virtio_rng_devices()?); 1798 1799 // Add virtio-fs if required 1800 devices.append(&mut self.make_virtio_fs_devices()?); 1801 1802 // Add virtio-pmem if required 1803 devices.append(&mut self.make_virtio_pmem_devices()?); 1804 1805 // Add virtio-vsock if required 1806 devices.append(&mut self.make_virtio_vsock_devices()?); 1807 1808 devices.append(&mut self.make_virtio_mem_devices()?); 1809 1810 // Add virtio-balloon if required 1811 devices.append(&mut self.make_virtio_balloon_devices()?); 1812 1813 // Add virtio-watchdog device 1814 devices.append(&mut self.make_virtio_watchdog_devices()?); 1815 1816 Ok(devices) 1817 } 1818 1819 fn make_virtio_block_device( 1820 &mut self, 1821 disk_cfg: &mut DiskConfig, 1822 ) -> DeviceManagerResult<(VirtioDeviceArc, bool, String)> { 1823 let id = if let Some(id) = &disk_cfg.id { 1824 id.clone() 1825 } else { 1826 let id = self.next_device_name(DISK_DEVICE_NAME_PREFIX)?; 1827 disk_cfg.id = Some(id.clone()); 1828 id 1829 }; 1830 1831 info!("Creating virtio-block device: {:?}", disk_cfg); 1832 1833 if disk_cfg.vhost_user { 1834 let socket = disk_cfg.vhost_socket.as_ref().unwrap().clone(); 1835 let vu_cfg = VhostUserConfig { 1836 socket, 1837 num_queues: disk_cfg.num_queues, 1838 queue_size: disk_cfg.queue_size, 1839 }; 1840 let vhost_user_block_device = Arc::new(Mutex::new( 1841 match virtio_devices::vhost_user::Blk::new(id.clone(), vu_cfg) { 1842 Ok(vub_device) => vub_device, 1843 Err(e) => { 1844 return Err(DeviceManagerError::CreateVhostUserBlk(e)); 1845 } 1846 }, 1847 )); 1848 1849 // Fill the device tree with a new node. In case of restore, we 1850 // know there is nothing to do, so we can simply override the 1851 // existing entry. 1852 self.device_tree 1853 .lock() 1854 .unwrap() 1855 .insert(id.clone(), device_node!(id, vhost_user_block_device)); 1856 1857 Ok(( 1858 Arc::clone(&vhost_user_block_device) as VirtioDeviceArc, 1859 false, 1860 id, 1861 )) 1862 } else { 1863 let mut options = OpenOptions::new(); 1864 options.read(true); 1865 options.write(!disk_cfg.readonly); 1866 if disk_cfg.direct { 1867 options.custom_flags(libc::O_DIRECT); 1868 } 1869 // Open block device path 1870 let mut file: File = options 1871 .open( 1872 disk_cfg 1873 .path 1874 .as_ref() 1875 .ok_or(DeviceManagerError::NoDiskPath)? 1876 .clone(), 1877 ) 1878 .map_err(DeviceManagerError::Disk)?; 1879 let image_type = 1880 detect_image_type(&mut file).map_err(DeviceManagerError::DetectImageType)?; 1881 1882 let image = match image_type { 1883 ImageType::FixedVhd => { 1884 // Use asynchronous backend relying on io_uring if the 1885 // syscalls are supported. 1886 if block_io_uring_is_supported() && !disk_cfg.disable_io_uring { 1887 info!("Using asynchronous fixed VHD disk file (io_uring)"); 1888 Box::new( 1889 FixedVhdDiskAsync::new(file) 1890 .map_err(DeviceManagerError::CreateFixedVhdDiskAsync)?, 1891 ) as Box<dyn DiskFile> 1892 } else { 1893 info!("Using synchronous fixed VHD disk file"); 1894 Box::new( 1895 FixedVhdDiskSync::new(file) 1896 .map_err(DeviceManagerError::CreateFixedVhdDiskSync)?, 1897 ) as Box<dyn DiskFile> 1898 } 1899 } 1900 ImageType::Raw => { 1901 // Use asynchronous backend relying on io_uring if the 1902 // syscalls are supported. 1903 if block_io_uring_is_supported() && !disk_cfg.disable_io_uring { 1904 info!("Using asynchronous RAW disk file (io_uring)"); 1905 Box::new(RawFileDisk::new(file)) as Box<dyn DiskFile> 1906 } else { 1907 info!("Using synchronous RAW disk file"); 1908 Box::new(RawFileDiskSync::new(file)) as Box<dyn DiskFile> 1909 } 1910 } 1911 ImageType::Qcow2 => { 1912 info!("Using synchronous QCOW disk file"); 1913 Box::new(QcowDiskSync::new(file, disk_cfg.direct)) as Box<dyn DiskFile> 1914 } 1915 }; 1916 1917 let dev = Arc::new(Mutex::new( 1918 virtio_devices::Block::new( 1919 id.clone(), 1920 image, 1921 disk_cfg 1922 .path 1923 .as_ref() 1924 .ok_or(DeviceManagerError::NoDiskPath)? 1925 .clone(), 1926 disk_cfg.readonly, 1927 self.force_iommu | disk_cfg.iommu, 1928 disk_cfg.num_queues, 1929 disk_cfg.queue_size, 1930 self.seccomp_action.clone(), 1931 disk_cfg.rate_limiter_config, 1932 ) 1933 .map_err(DeviceManagerError::CreateVirtioBlock)?, 1934 )); 1935 1936 let virtio_device = Arc::clone(&dev) as VirtioDeviceArc; 1937 let migratable_device = dev as Arc<Mutex<dyn Migratable>>; 1938 1939 // Fill the device tree with a new node. In case of restore, we 1940 // know there is nothing to do, so we can simply override the 1941 // existing entry. 1942 self.device_tree 1943 .lock() 1944 .unwrap() 1945 .insert(id.clone(), device_node!(id, migratable_device)); 1946 1947 Ok((virtio_device, disk_cfg.iommu, id)) 1948 } 1949 } 1950 1951 fn make_virtio_block_devices( 1952 &mut self, 1953 ) -> DeviceManagerResult<Vec<(VirtioDeviceArc, bool, String)>> { 1954 let mut devices = Vec::new(); 1955 1956 let mut block_devices = self.config.lock().unwrap().disks.clone(); 1957 if let Some(disk_list_cfg) = &mut block_devices { 1958 for disk_cfg in disk_list_cfg.iter_mut() { 1959 devices.push(self.make_virtio_block_device(disk_cfg)?); 1960 } 1961 } 1962 self.config.lock().unwrap().disks = block_devices; 1963 1964 Ok(devices) 1965 } 1966 1967 fn make_virtio_net_device( 1968 &mut self, 1969 net_cfg: &mut NetConfig, 1970 ) -> DeviceManagerResult<(VirtioDeviceArc, bool, String)> { 1971 let id = if let Some(id) = &net_cfg.id { 1972 id.clone() 1973 } else { 1974 let id = self.next_device_name(NET_DEVICE_NAME_PREFIX)?; 1975 net_cfg.id = Some(id.clone()); 1976 id 1977 }; 1978 info!("Creating virtio-net device: {:?}", net_cfg); 1979 1980 if net_cfg.vhost_user { 1981 let socket = net_cfg.vhost_socket.as_ref().unwrap().clone(); 1982 let vu_cfg = VhostUserConfig { 1983 socket, 1984 num_queues: net_cfg.num_queues, 1985 queue_size: net_cfg.queue_size, 1986 }; 1987 let server = match net_cfg.vhost_mode { 1988 VhostMode::Client => false, 1989 VhostMode::Server => true, 1990 }; 1991 let vhost_user_net_device = Arc::new(Mutex::new( 1992 match virtio_devices::vhost_user::Net::new( 1993 id.clone(), 1994 net_cfg.mac, 1995 vu_cfg, 1996 server, 1997 self.seccomp_action.clone(), 1998 ) { 1999 Ok(vun_device) => vun_device, 2000 Err(e) => { 2001 return Err(DeviceManagerError::CreateVhostUserNet(e)); 2002 } 2003 }, 2004 )); 2005 2006 // Fill the device tree with a new node. In case of restore, we 2007 // know there is nothing to do, so we can simply override the 2008 // existing entry. 2009 self.device_tree 2010 .lock() 2011 .unwrap() 2012 .insert(id.clone(), device_node!(id, vhost_user_net_device)); 2013 2014 Ok(( 2015 Arc::clone(&vhost_user_net_device) as VirtioDeviceArc, 2016 net_cfg.iommu, 2017 id, 2018 )) 2019 } else { 2020 let virtio_net_device = if let Some(ref tap_if_name) = net_cfg.tap { 2021 Arc::new(Mutex::new( 2022 virtio_devices::Net::new( 2023 id.clone(), 2024 Some(tap_if_name), 2025 None, 2026 None, 2027 Some(net_cfg.mac), 2028 &mut net_cfg.host_mac, 2029 self.force_iommu | net_cfg.iommu, 2030 net_cfg.num_queues, 2031 net_cfg.queue_size, 2032 self.seccomp_action.clone(), 2033 net_cfg.rate_limiter_config, 2034 ) 2035 .map_err(DeviceManagerError::CreateVirtioNet)?, 2036 )) 2037 } else if let Some(fds) = &net_cfg.fds { 2038 Arc::new(Mutex::new( 2039 virtio_devices::Net::from_tap_fds( 2040 id.clone(), 2041 fds, 2042 Some(net_cfg.mac), 2043 self.force_iommu | net_cfg.iommu, 2044 net_cfg.queue_size, 2045 self.seccomp_action.clone(), 2046 net_cfg.rate_limiter_config, 2047 ) 2048 .map_err(DeviceManagerError::CreateVirtioNet)?, 2049 )) 2050 } else { 2051 Arc::new(Mutex::new( 2052 virtio_devices::Net::new( 2053 id.clone(), 2054 None, 2055 Some(net_cfg.ip), 2056 Some(net_cfg.mask), 2057 Some(net_cfg.mac), 2058 &mut net_cfg.host_mac, 2059 self.force_iommu | net_cfg.iommu, 2060 net_cfg.num_queues, 2061 net_cfg.queue_size, 2062 self.seccomp_action.clone(), 2063 net_cfg.rate_limiter_config, 2064 ) 2065 .map_err(DeviceManagerError::CreateVirtioNet)?, 2066 )) 2067 }; 2068 2069 // Fill the device tree with a new node. In case of restore, we 2070 // know there is nothing to do, so we can simply override the 2071 // existing entry. 2072 self.device_tree 2073 .lock() 2074 .unwrap() 2075 .insert(id.clone(), device_node!(id, virtio_net_device)); 2076 2077 Ok(( 2078 Arc::clone(&virtio_net_device) as VirtioDeviceArc, 2079 net_cfg.iommu, 2080 id, 2081 )) 2082 } 2083 } 2084 2085 /// Add virto-net and vhost-user-net devices 2086 fn make_virtio_net_devices( 2087 &mut self, 2088 ) -> DeviceManagerResult<Vec<(VirtioDeviceArc, bool, String)>> { 2089 let mut devices = Vec::new(); 2090 let mut net_devices = self.config.lock().unwrap().net.clone(); 2091 if let Some(net_list_cfg) = &mut net_devices { 2092 for net_cfg in net_list_cfg.iter_mut() { 2093 devices.push(self.make_virtio_net_device(net_cfg)?); 2094 } 2095 } 2096 self.config.lock().unwrap().net = net_devices; 2097 2098 Ok(devices) 2099 } 2100 2101 fn make_virtio_rng_devices( 2102 &mut self, 2103 ) -> DeviceManagerResult<Vec<(VirtioDeviceArc, bool, String)>> { 2104 let mut devices = Vec::new(); 2105 2106 // Add virtio-rng if required 2107 let rng_config = self.config.lock().unwrap().rng.clone(); 2108 if let Some(rng_path) = rng_config.src.to_str() { 2109 info!("Creating virtio-rng device: {:?}", rng_config); 2110 let id = String::from(RNG_DEVICE_NAME); 2111 2112 let virtio_rng_device = Arc::new(Mutex::new( 2113 virtio_devices::Rng::new( 2114 id.clone(), 2115 rng_path, 2116 self.force_iommu | rng_config.iommu, 2117 self.seccomp_action.clone(), 2118 ) 2119 .map_err(DeviceManagerError::CreateVirtioRng)?, 2120 )); 2121 devices.push(( 2122 Arc::clone(&virtio_rng_device) as VirtioDeviceArc, 2123 rng_config.iommu, 2124 id.clone(), 2125 )); 2126 2127 // Fill the device tree with a new node. In case of restore, we 2128 // know there is nothing to do, so we can simply override the 2129 // existing entry. 2130 self.device_tree 2131 .lock() 2132 .unwrap() 2133 .insert(id.clone(), device_node!(id, virtio_rng_device)); 2134 } 2135 2136 Ok(devices) 2137 } 2138 2139 fn make_virtio_fs_device( 2140 &mut self, 2141 fs_cfg: &mut FsConfig, 2142 ) -> DeviceManagerResult<(VirtioDeviceArc, bool, String)> { 2143 let id = if let Some(id) = &fs_cfg.id { 2144 id.clone() 2145 } else { 2146 let id = self.next_device_name(FS_DEVICE_NAME_PREFIX)?; 2147 fs_cfg.id = Some(id.clone()); 2148 id 2149 }; 2150 2151 info!("Creating virtio-fs device: {:?}", fs_cfg); 2152 2153 let mut node = device_node!(id); 2154 2155 // Look for the id in the device tree. If it can be found, that means 2156 // the device is being restored, otherwise it's created from scratch. 2157 let cache_range = if let Some(node) = self.device_tree.lock().unwrap().get(&id) { 2158 debug!("Restoring virtio-fs {} resources", id); 2159 2160 let mut cache_range: Option<(u64, u64)> = None; 2161 for resource in node.resources.iter() { 2162 match resource { 2163 Resource::MmioAddressRange { base, size } => { 2164 if cache_range.is_some() { 2165 return Err(DeviceManagerError::ResourceAlreadyExists); 2166 } 2167 2168 cache_range = Some((*base, *size)); 2169 } 2170 _ => { 2171 error!("Unexpected resource {:?} for {}", resource, id); 2172 } 2173 } 2174 } 2175 2176 cache_range 2177 } else { 2178 None 2179 }; 2180 2181 if let Some(fs_socket) = fs_cfg.socket.to_str() { 2182 let cache = if fs_cfg.dax { 2183 let (cache_base, cache_size) = if let Some((base, size)) = cache_range { 2184 // The memory needs to be 2MiB aligned in order to support 2185 // hugepages. 2186 self.address_manager 2187 .allocator 2188 .lock() 2189 .unwrap() 2190 .allocate_mmio_addresses( 2191 Some(GuestAddress(base)), 2192 size as GuestUsize, 2193 Some(0x0020_0000), 2194 ) 2195 .ok_or(DeviceManagerError::FsRangeAllocation)?; 2196 2197 (base, size) 2198 } else { 2199 let size = fs_cfg.cache_size; 2200 // The memory needs to be 2MiB aligned in order to support 2201 // hugepages. 2202 let base = self 2203 .address_manager 2204 .allocator 2205 .lock() 2206 .unwrap() 2207 .allocate_mmio_addresses(None, size as GuestUsize, Some(0x0020_0000)) 2208 .ok_or(DeviceManagerError::FsRangeAllocation)?; 2209 2210 (base.raw_value(), size) 2211 }; 2212 2213 // Update the node with correct resource information. 2214 node.resources.push(Resource::MmioAddressRange { 2215 base: cache_base, 2216 size: cache_size, 2217 }); 2218 2219 let mmap_region = MmapRegion::build( 2220 None, 2221 cache_size as usize, 2222 libc::PROT_NONE, 2223 libc::MAP_ANONYMOUS | libc::MAP_PRIVATE, 2224 ) 2225 .map_err(DeviceManagerError::NewMmapRegion)?; 2226 let host_addr: u64 = mmap_region.as_ptr() as u64; 2227 2228 let mem_slot = self 2229 .memory_manager 2230 .lock() 2231 .unwrap() 2232 .create_userspace_mapping( 2233 cache_base, cache_size, host_addr, false, false, false, 2234 ) 2235 .map_err(DeviceManagerError::MemoryManager)?; 2236 2237 let region_list = vec![VirtioSharedMemory { 2238 offset: 0, 2239 len: cache_size, 2240 }]; 2241 2242 Some(( 2243 VirtioSharedMemoryList { 2244 host_addr, 2245 mem_slot, 2246 addr: GuestAddress(cache_base), 2247 len: cache_size as GuestUsize, 2248 region_list, 2249 }, 2250 mmap_region, 2251 )) 2252 } else { 2253 None 2254 }; 2255 2256 let virtio_fs_device = Arc::new(Mutex::new( 2257 virtio_devices::vhost_user::Fs::new( 2258 id.clone(), 2259 fs_socket, 2260 &fs_cfg.tag, 2261 fs_cfg.num_queues, 2262 fs_cfg.queue_size, 2263 cache, 2264 self.seccomp_action.clone(), 2265 ) 2266 .map_err(DeviceManagerError::CreateVirtioFs)?, 2267 )); 2268 2269 // Update the device tree with the migratable device. 2270 node.migratable = Some(Arc::clone(&virtio_fs_device) as Arc<Mutex<dyn Migratable>>); 2271 self.device_tree.lock().unwrap().insert(id.clone(), node); 2272 2273 Ok((Arc::clone(&virtio_fs_device) as VirtioDeviceArc, false, id)) 2274 } else { 2275 Err(DeviceManagerError::NoVirtioFsSock) 2276 } 2277 } 2278 2279 fn make_virtio_fs_devices( 2280 &mut self, 2281 ) -> DeviceManagerResult<Vec<(VirtioDeviceArc, bool, String)>> { 2282 let mut devices = Vec::new(); 2283 2284 let mut fs_devices = self.config.lock().unwrap().fs.clone(); 2285 if let Some(fs_list_cfg) = &mut fs_devices { 2286 for fs_cfg in fs_list_cfg.iter_mut() { 2287 devices.push(self.make_virtio_fs_device(fs_cfg)?); 2288 } 2289 } 2290 self.config.lock().unwrap().fs = fs_devices; 2291 2292 Ok(devices) 2293 } 2294 2295 fn make_virtio_pmem_device( 2296 &mut self, 2297 pmem_cfg: &mut PmemConfig, 2298 ) -> DeviceManagerResult<(VirtioDeviceArc, bool, String)> { 2299 let id = if let Some(id) = &pmem_cfg.id { 2300 id.clone() 2301 } else { 2302 let id = self.next_device_name(PMEM_DEVICE_NAME_PREFIX)?; 2303 pmem_cfg.id = Some(id.clone()); 2304 id 2305 }; 2306 2307 info!("Creating virtio-pmem device: {:?}", pmem_cfg); 2308 2309 let mut node = device_node!(id); 2310 2311 // Look for the id in the device tree. If it can be found, that means 2312 // the device is being restored, otherwise it's created from scratch. 2313 let region_range = if let Some(node) = self.device_tree.lock().unwrap().get(&id) { 2314 debug!("Restoring virtio-pmem {} resources", id); 2315 2316 let mut region_range: Option<(u64, u64)> = None; 2317 for resource in node.resources.iter() { 2318 match resource { 2319 Resource::MmioAddressRange { base, size } => { 2320 if region_range.is_some() { 2321 return Err(DeviceManagerError::ResourceAlreadyExists); 2322 } 2323 2324 region_range = Some((*base, *size)); 2325 } 2326 _ => { 2327 error!("Unexpected resource {:?} for {}", resource, id); 2328 } 2329 } 2330 } 2331 2332 if region_range.is_none() { 2333 return Err(DeviceManagerError::MissingVirtioPmemResources); 2334 } 2335 2336 region_range 2337 } else { 2338 None 2339 }; 2340 2341 let (custom_flags, set_len) = if pmem_cfg.file.is_dir() { 2342 if pmem_cfg.size.is_none() { 2343 return Err(DeviceManagerError::PmemWithDirectorySizeMissing); 2344 } 2345 (O_TMPFILE, true) 2346 } else { 2347 (0, false) 2348 }; 2349 2350 let mut file = OpenOptions::new() 2351 .read(true) 2352 .write(!pmem_cfg.discard_writes) 2353 .custom_flags(custom_flags) 2354 .open(&pmem_cfg.file) 2355 .map_err(DeviceManagerError::PmemFileOpen)?; 2356 2357 let size = if let Some(size) = pmem_cfg.size { 2358 if set_len { 2359 file.set_len(size) 2360 .map_err(DeviceManagerError::PmemFileSetLen)?; 2361 } 2362 size 2363 } else { 2364 file.seek(SeekFrom::End(0)) 2365 .map_err(DeviceManagerError::PmemFileSetLen)? 2366 }; 2367 2368 if size % 0x20_0000 != 0 { 2369 return Err(DeviceManagerError::PmemSizeNotAligned); 2370 } 2371 2372 let (region_base, region_size) = if let Some((base, size)) = region_range { 2373 // The memory needs to be 2MiB aligned in order to support 2374 // hugepages. 2375 self.address_manager 2376 .allocator 2377 .lock() 2378 .unwrap() 2379 .allocate_mmio_addresses( 2380 Some(GuestAddress(base)), 2381 size as GuestUsize, 2382 Some(0x0020_0000), 2383 ) 2384 .ok_or(DeviceManagerError::PmemRangeAllocation)?; 2385 2386 (base, size) 2387 } else { 2388 // The memory needs to be 2MiB aligned in order to support 2389 // hugepages. 2390 let base = self 2391 .address_manager 2392 .allocator 2393 .lock() 2394 .unwrap() 2395 .allocate_mmio_addresses(None, size as GuestUsize, Some(0x0020_0000)) 2396 .ok_or(DeviceManagerError::PmemRangeAllocation)?; 2397 2398 (base.raw_value(), size) 2399 }; 2400 2401 let cloned_file = file.try_clone().map_err(DeviceManagerError::CloneFile)?; 2402 let mmap_region = MmapRegion::build( 2403 Some(FileOffset::new(cloned_file, 0)), 2404 region_size as usize, 2405 PROT_READ | PROT_WRITE, 2406 MAP_NORESERVE 2407 | if pmem_cfg.discard_writes { 2408 MAP_PRIVATE 2409 } else { 2410 MAP_SHARED 2411 }, 2412 ) 2413 .map_err(DeviceManagerError::NewMmapRegion)?; 2414 let host_addr: u64 = mmap_region.as_ptr() as u64; 2415 2416 let mem_slot = self 2417 .memory_manager 2418 .lock() 2419 .unwrap() 2420 .create_userspace_mapping( 2421 region_base, 2422 region_size, 2423 host_addr, 2424 pmem_cfg.mergeable, 2425 false, 2426 false, 2427 ) 2428 .map_err(DeviceManagerError::MemoryManager)?; 2429 2430 let mapping = virtio_devices::UserspaceMapping { 2431 host_addr, 2432 mem_slot, 2433 addr: GuestAddress(region_base), 2434 len: region_size, 2435 mergeable: pmem_cfg.mergeable, 2436 }; 2437 2438 let virtio_pmem_device = Arc::new(Mutex::new( 2439 virtio_devices::Pmem::new( 2440 id.clone(), 2441 file, 2442 GuestAddress(region_base), 2443 mapping, 2444 mmap_region, 2445 self.force_iommu | pmem_cfg.iommu, 2446 self.seccomp_action.clone(), 2447 ) 2448 .map_err(DeviceManagerError::CreateVirtioPmem)?, 2449 )); 2450 2451 // Update the device tree with correct resource information and with 2452 // the migratable device. 2453 node.resources.push(Resource::MmioAddressRange { 2454 base: region_base, 2455 size: region_size, 2456 }); 2457 node.migratable = Some(Arc::clone(&virtio_pmem_device) as Arc<Mutex<dyn Migratable>>); 2458 self.device_tree.lock().unwrap().insert(id.clone(), node); 2459 2460 Ok(( 2461 Arc::clone(&virtio_pmem_device) as VirtioDeviceArc, 2462 pmem_cfg.iommu, 2463 id, 2464 )) 2465 } 2466 2467 fn make_virtio_pmem_devices( 2468 &mut self, 2469 ) -> DeviceManagerResult<Vec<(VirtioDeviceArc, bool, String)>> { 2470 let mut devices = Vec::new(); 2471 // Add virtio-pmem if required 2472 let mut pmem_devices = self.config.lock().unwrap().pmem.clone(); 2473 if let Some(pmem_list_cfg) = &mut pmem_devices { 2474 for pmem_cfg in pmem_list_cfg.iter_mut() { 2475 devices.push(self.make_virtio_pmem_device(pmem_cfg)?); 2476 } 2477 } 2478 self.config.lock().unwrap().pmem = pmem_devices; 2479 2480 Ok(devices) 2481 } 2482 2483 fn make_virtio_vsock_device( 2484 &mut self, 2485 vsock_cfg: &mut VsockConfig, 2486 ) -> DeviceManagerResult<(VirtioDeviceArc, bool, String)> { 2487 let id = if let Some(id) = &vsock_cfg.id { 2488 id.clone() 2489 } else { 2490 let id = self.next_device_name(VSOCK_DEVICE_NAME_PREFIX)?; 2491 vsock_cfg.id = Some(id.clone()); 2492 id 2493 }; 2494 2495 info!("Creating virtio-vsock device: {:?}", vsock_cfg); 2496 2497 let socket_path = vsock_cfg 2498 .socket 2499 .to_str() 2500 .ok_or(DeviceManagerError::CreateVsockConvertPath)?; 2501 let backend = 2502 virtio_devices::vsock::VsockUnixBackend::new(vsock_cfg.cid, socket_path.to_string()) 2503 .map_err(DeviceManagerError::CreateVsockBackend)?; 2504 2505 let vsock_device = Arc::new(Mutex::new( 2506 virtio_devices::Vsock::new( 2507 id.clone(), 2508 vsock_cfg.cid, 2509 vsock_cfg.socket.clone(), 2510 backend, 2511 self.force_iommu | vsock_cfg.iommu, 2512 self.seccomp_action.clone(), 2513 ) 2514 .map_err(DeviceManagerError::CreateVirtioVsock)?, 2515 )); 2516 2517 // Fill the device tree with a new node. In case of restore, we 2518 // know there is nothing to do, so we can simply override the 2519 // existing entry. 2520 self.device_tree 2521 .lock() 2522 .unwrap() 2523 .insert(id.clone(), device_node!(id, vsock_device)); 2524 2525 Ok(( 2526 Arc::clone(&vsock_device) as VirtioDeviceArc, 2527 vsock_cfg.iommu, 2528 id, 2529 )) 2530 } 2531 2532 fn make_virtio_vsock_devices( 2533 &mut self, 2534 ) -> DeviceManagerResult<Vec<(VirtioDeviceArc, bool, String)>> { 2535 let mut devices = Vec::new(); 2536 2537 let mut vsock = self.config.lock().unwrap().vsock.clone(); 2538 if let Some(ref mut vsock_cfg) = &mut vsock { 2539 devices.push(self.make_virtio_vsock_device(vsock_cfg)?); 2540 } 2541 self.config.lock().unwrap().vsock = vsock; 2542 2543 Ok(devices) 2544 } 2545 2546 fn make_virtio_mem_devices( 2547 &mut self, 2548 ) -> DeviceManagerResult<Vec<(VirtioDeviceArc, bool, String)>> { 2549 let mut devices = Vec::new(); 2550 2551 let mm = self.memory_manager.clone(); 2552 let mm = mm.lock().unwrap(); 2553 for (_memory_zone_id, memory_zone) in mm.memory_zones().iter() { 2554 if let Some(virtio_mem_zone) = memory_zone.virtio_mem_zone() { 2555 let id = self.next_device_name(MEM_DEVICE_NAME_PREFIX)?; 2556 info!("Creating virtio-mem device: id = {}", id); 2557 #[cfg(not(feature = "acpi"))] 2558 let node_id: Option<u16> = None; 2559 #[cfg(feature = "acpi")] 2560 let node_id = numa_node_id_from_memory_zone_id(&self.numa_nodes, _memory_zone_id) 2561 .map(|i| i as u16); 2562 2563 let virtio_mem_device = Arc::new(Mutex::new( 2564 virtio_devices::Mem::new( 2565 id.clone(), 2566 virtio_mem_zone.region(), 2567 virtio_mem_zone 2568 .resize_handler() 2569 .new_resize_sender() 2570 .map_err(DeviceManagerError::CreateResizeSender)?, 2571 self.seccomp_action.clone(), 2572 node_id, 2573 virtio_mem_zone.hotplugged_size(), 2574 virtio_mem_zone.hugepages(), 2575 ) 2576 .map_err(DeviceManagerError::CreateVirtioMem)?, 2577 )); 2578 2579 self.virtio_mem_devices.push(Arc::clone(&virtio_mem_device)); 2580 2581 devices.push(( 2582 Arc::clone(&virtio_mem_device) as VirtioDeviceArc, 2583 false, 2584 id.clone(), 2585 )); 2586 2587 // Fill the device tree with a new node. In case of restore, we 2588 // know there is nothing to do, so we can simply override the 2589 // existing entry. 2590 self.device_tree 2591 .lock() 2592 .unwrap() 2593 .insert(id.clone(), device_node!(id, virtio_mem_device)); 2594 } 2595 } 2596 2597 Ok(devices) 2598 } 2599 2600 fn make_virtio_balloon_devices( 2601 &mut self, 2602 ) -> DeviceManagerResult<Vec<(VirtioDeviceArc, bool, String)>> { 2603 let mut devices = Vec::new(); 2604 2605 if let Some(balloon_config) = &self.config.lock().unwrap().balloon { 2606 let id = String::from(BALLOON_DEVICE_NAME); 2607 info!("Creating virtio-balloon device: id = {}", id); 2608 2609 let virtio_balloon_device = Arc::new(Mutex::new( 2610 virtio_devices::Balloon::new( 2611 id.clone(), 2612 balloon_config.size, 2613 balloon_config.deflate_on_oom, 2614 self.seccomp_action.clone(), 2615 ) 2616 .map_err(DeviceManagerError::CreateVirtioBalloon)?, 2617 )); 2618 2619 self.balloon = Some(virtio_balloon_device.clone()); 2620 2621 devices.push(( 2622 Arc::clone(&virtio_balloon_device) as VirtioDeviceArc, 2623 false, 2624 id.clone(), 2625 )); 2626 2627 self.device_tree 2628 .lock() 2629 .unwrap() 2630 .insert(id.clone(), device_node!(id, virtio_balloon_device)); 2631 } 2632 2633 Ok(devices) 2634 } 2635 2636 fn make_virtio_watchdog_devices( 2637 &mut self, 2638 ) -> DeviceManagerResult<Vec<(VirtioDeviceArc, bool, String)>> { 2639 let mut devices = Vec::new(); 2640 2641 if !self.config.lock().unwrap().watchdog { 2642 return Ok(devices); 2643 } 2644 2645 let id = String::from(WATCHDOG_DEVICE_NAME); 2646 info!("Creating virtio-watchdog device: id = {}", id); 2647 2648 let virtio_watchdog_device = Arc::new(Mutex::new( 2649 virtio_devices::Watchdog::new( 2650 id.clone(), 2651 self.reset_evt.try_clone().unwrap(), 2652 self.seccomp_action.clone(), 2653 ) 2654 .map_err(DeviceManagerError::CreateVirtioWatchdog)?, 2655 )); 2656 devices.push(( 2657 Arc::clone(&virtio_watchdog_device) as VirtioDeviceArc, 2658 false, 2659 id.clone(), 2660 )); 2661 2662 self.device_tree 2663 .lock() 2664 .unwrap() 2665 .insert(id.clone(), device_node!(id, virtio_watchdog_device)); 2666 2667 Ok(devices) 2668 } 2669 2670 fn next_device_name(&mut self, prefix: &str) -> DeviceManagerResult<String> { 2671 let start_id = self.device_id_cnt; 2672 loop { 2673 // Generate the temporary name. 2674 let name = format!("{}{}", prefix, self.device_id_cnt); 2675 // Increment the counter. 2676 self.device_id_cnt += Wrapping(1); 2677 // Check if the name is already in use. 2678 if !self.device_tree.lock().unwrap().contains_key(&name) { 2679 return Ok(name); 2680 } 2681 2682 if self.device_id_cnt == start_id { 2683 // We went through a full loop and there's nothing else we can 2684 // do. 2685 break; 2686 } 2687 } 2688 Err(DeviceManagerError::NoAvailableDeviceName) 2689 } 2690 2691 #[cfg_attr(not(feature = "kvm"), allow(unused_variables))] 2692 fn add_passthrough_device( 2693 &mut self, 2694 pci: &mut PciBus, 2695 device_cfg: &mut DeviceConfig, 2696 ) -> DeviceManagerResult<(u32, String)> { 2697 // If the passthrough device has not been created yet, it is created 2698 // here and stored in the DeviceManager structure for future needs. 2699 if self.passthrough_device.is_none() { 2700 self.passthrough_device = Some( 2701 self.address_manager 2702 .vm 2703 .create_passthrough_device() 2704 .map_err(|e| DeviceManagerError::CreatePassthroughDevice(e.into()))?, 2705 ); 2706 } 2707 2708 #[cfg(feature = "kvm")] 2709 return self.add_vfio_device(pci, device_cfg); 2710 2711 #[cfg(not(feature = "kvm"))] 2712 Err(DeviceManagerError::NoDevicePassthroughSupport) 2713 } 2714 2715 #[cfg(feature = "kvm")] 2716 fn add_vfio_device( 2717 &mut self, 2718 pci: &mut PciBus, 2719 device_cfg: &mut DeviceConfig, 2720 ) -> DeviceManagerResult<(u32, String)> { 2721 let passthrough_device = self 2722 .passthrough_device 2723 .as_ref() 2724 .ok_or(DeviceManagerError::NoDevicePassthroughSupport)?; 2725 2726 // We need to shift the device id since the 3 first bits 2727 // are dedicated to the PCI function, and we know we don't 2728 // do multifunction. Also, because we only support one PCI 2729 // bus, the bus 0, we don't need to add anything to the 2730 // global device ID. 2731 let pci_device_bdf = pci 2732 .next_device_id() 2733 .map_err(DeviceManagerError::NextPciDeviceId)? 2734 << 3; 2735 2736 let memory = self.memory_manager.lock().unwrap().guest_memory(); 2737 2738 // Safe because we know the RawFd is valid. 2739 // 2740 // This dup() is mandatory to be able to give full ownership of the 2741 // file descriptor to the DeviceFd::from_raw_fd() function later in 2742 // the code. 2743 // 2744 // This is particularly needed so that VfioContainer will still have 2745 // a valid file descriptor even if DeviceManager, and therefore the 2746 // passthrough_device are dropped. In case of Drop, the file descriptor 2747 // would be closed, but Linux would still have the duplicated file 2748 // descriptor opened from DeviceFd, preventing from unexpected behavior 2749 // where the VfioContainer would try to use a closed file descriptor. 2750 let dup_device_fd = unsafe { libc::dup(passthrough_device.as_raw_fd()) }; 2751 2752 // SAFETY the raw fd conversion here is safe because: 2753 // 1. This function is only called on KVM, see the feature guard above. 2754 // 2. When running on KVM, passthrough_device wraps around DeviceFd. 2755 // 3. The conversion here extracts the raw fd and then turns the raw fd into a DeviceFd 2756 // of the same (correct) type. 2757 let vfio_container = Arc::new( 2758 VfioContainer::new(Arc::new(unsafe { DeviceFd::from_raw_fd(dup_device_fd) })) 2759 .map_err(DeviceManagerError::VfioCreate)?, 2760 ); 2761 2762 let vfio_device = VfioDevice::new(&device_cfg.path, Arc::clone(&vfio_container)) 2763 .map_err(DeviceManagerError::VfioCreate)?; 2764 2765 let vfio_mapping = Arc::new(VfioDmaMapping::new( 2766 Arc::clone(&vfio_container), 2767 Arc::new(memory), 2768 )); 2769 if device_cfg.iommu { 2770 if let Some(iommu) = &self.iommu_device { 2771 iommu 2772 .lock() 2773 .unwrap() 2774 .add_external_mapping(pci_device_bdf, vfio_mapping); 2775 } 2776 } else { 2777 for virtio_mem_device in self.virtio_mem_devices.iter() { 2778 virtio_mem_device 2779 .lock() 2780 .unwrap() 2781 .add_dma_mapping_handler(pci_device_bdf, vfio_mapping.clone()) 2782 .map_err(DeviceManagerError::AddDmaMappingHandlerVirtioMem)?; 2783 } 2784 } 2785 2786 let legacy_interrupt_group = if let Some(legacy_interrupt_manager) = 2787 &self.legacy_interrupt_manager 2788 { 2789 Some( 2790 legacy_interrupt_manager 2791 .create_group(LegacyIrqGroupConfig { 2792 irq: self.pci_irq_slots[(pci_device_bdf >> 3) as usize] as InterruptIndex, 2793 }) 2794 .map_err(DeviceManagerError::CreateInterruptGroup)?, 2795 ) 2796 } else { 2797 None 2798 }; 2799 2800 let mut vfio_pci_device = VfioPciDevice::new( 2801 &self.address_manager.vm, 2802 vfio_device, 2803 vfio_container, 2804 &self.msi_interrupt_manager, 2805 legacy_interrupt_group, 2806 device_cfg.iommu, 2807 ) 2808 .map_err(DeviceManagerError::VfioPciCreate)?; 2809 2810 let vfio_name = if let Some(id) = &device_cfg.id { 2811 if self.device_tree.lock().unwrap().contains_key(id) { 2812 return Err(DeviceManagerError::DeviceIdAlreadyInUse); 2813 } 2814 2815 id.clone() 2816 } else { 2817 let id = self.next_device_name(VFIO_DEVICE_NAME_PREFIX)?; 2818 device_cfg.id = Some(id.clone()); 2819 id 2820 }; 2821 2822 vfio_pci_device 2823 .map_mmio_regions(&self.address_manager.vm, || { 2824 self.memory_manager.lock().unwrap().allocate_memory_slot() 2825 }) 2826 .map_err(DeviceManagerError::VfioMapRegion)?; 2827 2828 let mut node = device_node!(vfio_name); 2829 2830 for region in vfio_pci_device.mmio_regions() { 2831 node.resources.push(Resource::MmioAddressRange { 2832 base: region.start.0, 2833 size: region.length as u64, 2834 }); 2835 } 2836 2837 // Register DMA mapping in IOMMU. 2838 // Do not register virtio-mem regions, as they are handled directly by 2839 // virtio-mem device itself. 2840 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() { 2841 for region in zone.regions() { 2842 vfio_pci_device 2843 .dma_map( 2844 region.start_addr().raw_value(), 2845 region.len() as u64, 2846 region.as_ptr() as u64, 2847 ) 2848 .map_err(DeviceManagerError::VfioDmaMap)?; 2849 } 2850 } 2851 2852 let vfio_pci_device = Arc::new(Mutex::new(vfio_pci_device)); 2853 2854 self.add_pci_device( 2855 pci, 2856 vfio_pci_device.clone(), 2857 vfio_pci_device.clone(), 2858 pci_device_bdf, 2859 )?; 2860 2861 node.pci_bdf = Some(pci_device_bdf); 2862 node.pci_device_handle = Some(PciDeviceHandle::Vfio(vfio_pci_device)); 2863 2864 self.device_tree 2865 .lock() 2866 .unwrap() 2867 .insert(vfio_name.clone(), node); 2868 2869 Ok((pci_device_bdf, vfio_name)) 2870 } 2871 2872 fn add_pci_device( 2873 &mut self, 2874 pci_bus: &mut PciBus, 2875 bus_device: Arc<Mutex<dyn BusDevice>>, 2876 pci_device: Arc<Mutex<dyn PciDevice>>, 2877 bdf: u32, 2878 ) -> DeviceManagerResult<Vec<(GuestAddress, GuestUsize, PciBarRegionType)>> { 2879 let bars = pci_device 2880 .lock() 2881 .unwrap() 2882 .allocate_bars(&mut self.address_manager.allocator.lock().unwrap()) 2883 .map_err(DeviceManagerError::AllocateBars)?; 2884 2885 pci_bus 2886 .add_device(bdf, pci_device) 2887 .map_err(DeviceManagerError::AddPciDevice)?; 2888 2889 self.bus_devices.push(Arc::clone(&bus_device)); 2890 2891 pci_bus 2892 .register_mapping( 2893 bus_device, 2894 #[cfg(target_arch = "x86_64")] 2895 self.address_manager.io_bus.as_ref(), 2896 self.address_manager.mmio_bus.as_ref(), 2897 bars.clone(), 2898 ) 2899 .map_err(DeviceManagerError::AddPciDevice)?; 2900 2901 Ok(bars) 2902 } 2903 2904 fn add_vfio_devices(&mut self, pci: &mut PciBus) -> DeviceManagerResult<Vec<u32>> { 2905 let mut iommu_attached_device_ids = Vec::new(); 2906 let mut devices = self.config.lock().unwrap().devices.clone(); 2907 2908 if let Some(device_list_cfg) = &mut devices { 2909 for device_cfg in device_list_cfg.iter_mut() { 2910 let (device_id, _) = self.add_passthrough_device(pci, device_cfg)?; 2911 if device_cfg.iommu && self.iommu_device.is_some() { 2912 iommu_attached_device_ids.push(device_id); 2913 } 2914 } 2915 } 2916 2917 // Update the list of devices 2918 self.config.lock().unwrap().devices = devices; 2919 2920 Ok(iommu_attached_device_ids) 2921 } 2922 2923 fn add_virtio_pci_device( 2924 &mut self, 2925 virtio_device: VirtioDeviceArc, 2926 pci: &mut PciBus, 2927 iommu_mapping: &Option<Arc<IommuMapping>>, 2928 virtio_device_id: String, 2929 ) -> DeviceManagerResult<u32> { 2930 let id = format!("{}-{}", VIRTIO_PCI_DEVICE_NAME_PREFIX, virtio_device_id); 2931 2932 // Add the new virtio-pci node to the device tree. 2933 let mut node = device_node!(id); 2934 node.children = vec![virtio_device_id.clone()]; 2935 2936 // Look for the id in the device tree. If it can be found, that means 2937 // the device is being restored, otherwise it's created from scratch. 2938 let (pci_device_bdf, config_bar_addr) = 2939 if let Some(node) = self.device_tree.lock().unwrap().get(&id) { 2940 debug!("Restoring virtio-pci {} resources", id); 2941 let pci_device_bdf = node 2942 .pci_bdf 2943 .ok_or(DeviceManagerError::MissingDeviceNodePciBdf)?; 2944 2945 pci.get_device_id((pci_device_bdf >> 3) as usize) 2946 .map_err(DeviceManagerError::GetPciDeviceId)?; 2947 2948 if node.resources.is_empty() { 2949 return Err(DeviceManagerError::MissingVirtioPciResources); 2950 } 2951 2952 // We know the configuration BAR address is stored on the first 2953 // resource in the list. 2954 let config_bar_addr = match node.resources[0] { 2955 Resource::MmioAddressRange { base, .. } => Some(base), 2956 _ => { 2957 error!("Unexpected resource {:?} for {}", node.resources[0], id); 2958 return Err(DeviceManagerError::MissingVirtioPciResources); 2959 } 2960 }; 2961 2962 (pci_device_bdf, config_bar_addr) 2963 } else { 2964 // We need to shift the device id since the 3 first bits are dedicated 2965 // to the PCI function, and we know we don't do multifunction. 2966 // Also, because we only support one PCI bus, the bus 0, we don't need 2967 // to add anything to the global device ID. 2968 let pci_device_bdf = pci 2969 .next_device_id() 2970 .map_err(DeviceManagerError::NextPciDeviceId)? 2971 << 3; 2972 2973 (pci_device_bdf, None) 2974 }; 2975 2976 // Update the existing virtio node by setting the parent. 2977 if let Some(node) = self.device_tree.lock().unwrap().get_mut(&virtio_device_id) { 2978 node.parent = Some(id.clone()); 2979 } else { 2980 return Err(DeviceManagerError::MissingNode); 2981 } 2982 2983 // Allows support for one MSI-X vector per queue. It also adds 1 2984 // as we need to take into account the dedicated vector to notify 2985 // about a virtio config change. 2986 let msix_num = (virtio_device.lock().unwrap().queue_max_sizes().len() + 1) as u16; 2987 2988 // Create the callback from the implementation of the DmaRemapping 2989 // trait. The point with the callback is to simplify the code as we 2990 // know about the device ID from this point. 2991 let iommu_mapping_cb: Option<Arc<VirtioIommuRemapping>> = 2992 if let Some(mapping) = iommu_mapping { 2993 let mapping_clone = mapping.clone(); 2994 Some(Arc::new(Box::new(move |addr: u64| { 2995 mapping_clone.translate(pci_device_bdf, addr).map_err(|e| { 2996 std::io::Error::new( 2997 std::io::ErrorKind::Other, 2998 format!( 2999 "failed to translate addr 0x{:x} for device 00:{:02x}.0 {}", 3000 addr, pci_device_bdf, e 3001 ), 3002 ) 3003 }) 3004 }) as VirtioIommuRemapping)) 3005 } else { 3006 None 3007 }; 3008 3009 let memory = self.memory_manager.lock().unwrap().guest_memory(); 3010 let mut virtio_pci_device = VirtioPciDevice::new( 3011 id.clone(), 3012 memory, 3013 virtio_device, 3014 msix_num, 3015 iommu_mapping_cb, 3016 &self.msi_interrupt_manager, 3017 pci_device_bdf, 3018 self.activate_evt 3019 .try_clone() 3020 .map_err(DeviceManagerError::EventFd)?, 3021 ) 3022 .map_err(DeviceManagerError::VirtioDevice)?; 3023 3024 // This is important as this will set the BAR address if it exists, 3025 // which is mandatory on the restore path. 3026 if let Some(addr) = config_bar_addr { 3027 virtio_pci_device.set_config_bar_addr(addr); 3028 } 3029 3030 let virtio_pci_device = Arc::new(Mutex::new(virtio_pci_device)); 3031 let bars = self.add_pci_device( 3032 pci, 3033 virtio_pci_device.clone(), 3034 virtio_pci_device.clone(), 3035 pci_device_bdf, 3036 )?; 3037 3038 let bar_addr = virtio_pci_device.lock().unwrap().config_bar_addr(); 3039 for (event, addr) in virtio_pci_device.lock().unwrap().ioeventfds(bar_addr) { 3040 let io_addr = IoEventAddress::Mmio(addr); 3041 self.address_manager 3042 .vm 3043 .register_ioevent(event, &io_addr, None) 3044 .map_err(|e| DeviceManagerError::RegisterIoevent(e.into()))?; 3045 } 3046 3047 // Update the device tree with correct resource information. 3048 for pci_bar in bars.iter() { 3049 node.resources.push(Resource::MmioAddressRange { 3050 base: pci_bar.0.raw_value(), 3051 size: pci_bar.1 as u64, 3052 }); 3053 } 3054 node.migratable = Some(Arc::clone(&virtio_pci_device) as Arc<Mutex<dyn Migratable>>); 3055 node.pci_bdf = Some(pci_device_bdf); 3056 node.pci_device_handle = Some(PciDeviceHandle::Virtio(virtio_pci_device)); 3057 self.device_tree.lock().unwrap().insert(id, node); 3058 3059 Ok(pci_device_bdf) 3060 } 3061 3062 #[cfg(target_arch = "x86_64")] 3063 pub fn io_bus(&self) -> &Arc<Bus> { 3064 &self.address_manager.io_bus 3065 } 3066 3067 pub fn mmio_bus(&self) -> &Arc<Bus> { 3068 &self.address_manager.mmio_bus 3069 } 3070 3071 pub fn allocator(&self) -> &Arc<Mutex<SystemAllocator>> { 3072 &self.address_manager.allocator 3073 } 3074 3075 pub fn interrupt_controller(&self) -> Option<Arc<Mutex<dyn InterruptController>>> { 3076 self.interrupt_controller 3077 .as_ref() 3078 .map(|ic| ic.clone() as Arc<Mutex<dyn InterruptController>>) 3079 } 3080 3081 pub fn console(&self) -> &Arc<Console> { 3082 &self.console 3083 } 3084 3085 pub fn cmdline_additions(&self) -> &[String] { 3086 self.cmdline_additions.as_slice() 3087 } 3088 3089 pub fn update_memory(&self, new_region: &Arc<GuestRegionMmap>) -> DeviceManagerResult<()> { 3090 for (virtio_device, _, _) in self.virtio_devices.iter() { 3091 virtio_device 3092 .lock() 3093 .unwrap() 3094 .add_memory_region(new_region) 3095 .map_err(DeviceManagerError::UpdateMemoryForVirtioDevice)?; 3096 } 3097 3098 // Take care of updating the memory for VFIO PCI devices. 3099 #[cfg(feature = "kvm")] 3100 { 3101 let device_tree = self.device_tree.lock().unwrap(); 3102 for pci_device_node in device_tree.pci_devices() { 3103 if let PciDeviceHandle::Vfio(vfio_pci_device) = pci_device_node 3104 .pci_device_handle 3105 .as_ref() 3106 .ok_or(DeviceManagerError::MissingPciDevice)? 3107 { 3108 vfio_pci_device 3109 .lock() 3110 .unwrap() 3111 .dma_map( 3112 new_region.start_addr().raw_value(), 3113 new_region.len() as u64, 3114 new_region.as_ptr() as u64, 3115 ) 3116 .map_err(DeviceManagerError::UpdateMemoryForVfioPciDevice)?; 3117 } 3118 } 3119 } 3120 3121 Ok(()) 3122 } 3123 3124 pub fn activate_virtio_devices(&self) -> DeviceManagerResult<()> { 3125 // Find virtio pci devices and activate any pending ones 3126 let device_tree = self.device_tree.lock().unwrap(); 3127 for pci_device_node in device_tree.pci_devices() { 3128 #[allow(irrefutable_let_patterns)] 3129 if let PciDeviceHandle::Virtio(virtio_pci_device) = &pci_device_node 3130 .pci_device_handle 3131 .as_ref() 3132 .ok_or(DeviceManagerError::MissingPciDevice)? 3133 { 3134 virtio_pci_device.lock().unwrap().maybe_activate(); 3135 } 3136 } 3137 Ok(()) 3138 } 3139 3140 pub fn notify_hotplug( 3141 &self, 3142 _notification_type: AcpiNotificationFlags, 3143 ) -> DeviceManagerResult<()> { 3144 #[cfg(feature = "acpi")] 3145 return self 3146 .ged_notification_device 3147 .as_ref() 3148 .unwrap() 3149 .lock() 3150 .unwrap() 3151 .notify(_notification_type) 3152 .map_err(DeviceManagerError::HotPlugNotification); 3153 #[cfg(not(feature = "acpi"))] 3154 return Ok(()); 3155 } 3156 3157 pub fn add_device( 3158 &mut self, 3159 device_cfg: &mut DeviceConfig, 3160 ) -> DeviceManagerResult<PciDeviceInfo> { 3161 let pci = if let Some(pci_bus) = &self.pci_bus { 3162 Arc::clone(pci_bus) 3163 } else { 3164 return Err(DeviceManagerError::NoPciBus); 3165 }; 3166 3167 let (device_id, device_name) = 3168 self.add_passthrough_device(&mut pci.lock().unwrap(), device_cfg)?; 3169 3170 // Update the PCIU bitmap 3171 self.pci_devices_up |= 1 << (device_id >> 3); 3172 3173 Ok(PciDeviceInfo { 3174 id: device_name, 3175 bdf: device_id, 3176 }) 3177 } 3178 3179 pub fn remove_device(&mut self, id: String) -> DeviceManagerResult<()> { 3180 // The node can be directly a PCI node in case the 'id' refers to a 3181 // VFIO device or a virtio-pci one. 3182 // In case the 'id' refers to a virtio device, we must find the PCI 3183 // node by looking at the parent. 3184 let device_tree = self.device_tree.lock().unwrap(); 3185 let node = device_tree 3186 .get(&id) 3187 .ok_or(DeviceManagerError::UnknownDeviceId(id))?; 3188 3189 let pci_device_node = if node.pci_bdf.is_some() && node.pci_device_handle.is_some() { 3190 node 3191 } else { 3192 let parent = node 3193 .parent 3194 .as_ref() 3195 .ok_or(DeviceManagerError::MissingNode)?; 3196 device_tree 3197 .get(parent) 3198 .ok_or(DeviceManagerError::MissingNode)? 3199 }; 3200 3201 let pci_device_bdf = pci_device_node 3202 .pci_bdf 3203 .ok_or(DeviceManagerError::MissingPciDevice)?; 3204 let pci_device_handle = pci_device_node 3205 .pci_device_handle 3206 .as_ref() 3207 .ok_or(DeviceManagerError::MissingPciDevice)?; 3208 #[allow(irrefutable_let_patterns)] 3209 if let PciDeviceHandle::Virtio(virtio_pci_device) = pci_device_handle { 3210 let device_type = VirtioDeviceType::from( 3211 virtio_pci_device 3212 .lock() 3213 .unwrap() 3214 .virtio_device() 3215 .lock() 3216 .unwrap() 3217 .device_type(), 3218 ); 3219 match device_type { 3220 VirtioDeviceType::Net 3221 | VirtioDeviceType::Block 3222 | VirtioDeviceType::Pmem 3223 | VirtioDeviceType::Fs 3224 | VirtioDeviceType::Vsock => {} 3225 _ => return Err(DeviceManagerError::RemovalNotAllowed(device_type)), 3226 } 3227 } 3228 3229 // Update the PCID bitmap 3230 self.pci_devices_down |= 1 << (pci_device_bdf >> 3); 3231 3232 Ok(()) 3233 } 3234 3235 pub fn eject_device(&mut self, device_id: u8) -> DeviceManagerResult<()> { 3236 // Retrieve the PCI bus. 3237 let pci = if let Some(pci_bus) = &self.pci_bus { 3238 Arc::clone(pci_bus) 3239 } else { 3240 return Err(DeviceManagerError::NoPciBus); 3241 }; 3242 3243 // Convert the device ID into the corresponding b/d/f. 3244 let pci_device_bdf = (device_id as u32) << 3; 3245 3246 // Give the PCI device ID back to the PCI bus. 3247 pci.lock() 3248 .unwrap() 3249 .put_device_id(device_id as usize) 3250 .map_err(DeviceManagerError::PutPciDeviceId)?; 3251 3252 // Remove the device from the device tree along with its children. 3253 let mut device_tree = self.device_tree.lock().unwrap(); 3254 let pci_device_node = device_tree 3255 .remove_node_by_pci_bdf(pci_device_bdf) 3256 .ok_or(DeviceManagerError::MissingPciDevice)?; 3257 for child in pci_device_node.children.iter() { 3258 device_tree.remove(child); 3259 } 3260 3261 let pci_device_handle = pci_device_node 3262 .pci_device_handle 3263 .ok_or(DeviceManagerError::MissingPciDevice)?; 3264 let (pci_device, bus_device, virtio_device) = match pci_device_handle { 3265 #[cfg(feature = "kvm")] 3266 PciDeviceHandle::Vfio(vfio_pci_device) => { 3267 { 3268 // Unregister DMA mapping in IOMMU. 3269 // Do not unregister the virtio-mem region, as it is 3270 // directly handled by the virtio-mem device. 3271 let dev = vfio_pci_device.lock().unwrap(); 3272 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() { 3273 for region in zone.regions() { 3274 dev.dma_unmap(region.start_addr().raw_value(), region.len() as u64) 3275 .map_err(DeviceManagerError::VfioDmaUnmap)?; 3276 } 3277 } 3278 3279 // Unregister the VFIO mapping handler from all virtio-mem 3280 // devices. 3281 if !dev.iommu_attached() { 3282 for virtio_mem_device in self.virtio_mem_devices.iter() { 3283 virtio_mem_device 3284 .lock() 3285 .unwrap() 3286 .remove_dma_mapping_handler(pci_device_bdf) 3287 .map_err(DeviceManagerError::RemoveDmaMappingHandlerVirtioMem)?; 3288 } 3289 } 3290 } 3291 3292 ( 3293 Arc::clone(&vfio_pci_device) as Arc<Mutex<dyn PciDevice>>, 3294 Arc::clone(&vfio_pci_device) as Arc<Mutex<dyn BusDevice>>, 3295 None as Option<VirtioDeviceArc>, 3296 ) 3297 } 3298 PciDeviceHandle::Virtio(virtio_pci_device) => { 3299 let bar_addr = virtio_pci_device.lock().unwrap().config_bar_addr(); 3300 for (event, addr) in virtio_pci_device.lock().unwrap().ioeventfds(bar_addr) { 3301 let io_addr = IoEventAddress::Mmio(addr); 3302 self.address_manager 3303 .vm 3304 .unregister_ioevent(event, &io_addr) 3305 .map_err(|e| DeviceManagerError::UnRegisterIoevent(e.into()))?; 3306 } 3307 3308 ( 3309 Arc::clone(&virtio_pci_device) as Arc<Mutex<dyn PciDevice>>, 3310 Arc::clone(&virtio_pci_device) as Arc<Mutex<dyn BusDevice>>, 3311 Some(virtio_pci_device.lock().unwrap().virtio_device()), 3312 ) 3313 } 3314 }; 3315 3316 // Free the allocated BARs 3317 pci_device 3318 .lock() 3319 .unwrap() 3320 .free_bars(&mut self.address_manager.allocator.lock().unwrap()) 3321 .map_err(DeviceManagerError::FreePciBars)?; 3322 3323 // Remove the device from the PCI bus 3324 pci.lock() 3325 .unwrap() 3326 .remove_by_device(&pci_device) 3327 .map_err(DeviceManagerError::RemoveDeviceFromPciBus)?; 3328 3329 #[cfg(target_arch = "x86_64")] 3330 // Remove the device from the IO bus 3331 self.io_bus() 3332 .remove_by_device(&bus_device) 3333 .map_err(DeviceManagerError::RemoveDeviceFromIoBus)?; 3334 3335 // Remove the device from the MMIO bus 3336 self.mmio_bus() 3337 .remove_by_device(&bus_device) 3338 .map_err(DeviceManagerError::RemoveDeviceFromMmioBus)?; 3339 3340 // Remove the device from the list of BusDevice held by the 3341 // DeviceManager. 3342 self.bus_devices 3343 .retain(|dev| !Arc::ptr_eq(dev, &bus_device)); 3344 3345 // Shutdown and remove the underlying virtio-device if present 3346 if let Some(virtio_device) = virtio_device { 3347 for mapping in virtio_device.lock().unwrap().userspace_mappings() { 3348 self.memory_manager 3349 .lock() 3350 .unwrap() 3351 .remove_userspace_mapping( 3352 mapping.addr.raw_value(), 3353 mapping.len, 3354 mapping.host_addr, 3355 mapping.mergeable, 3356 mapping.mem_slot, 3357 ) 3358 .map_err(DeviceManagerError::MemoryManager)?; 3359 } 3360 3361 virtio_device.lock().unwrap().shutdown(); 3362 3363 self.virtio_devices 3364 .retain(|(d, _, _)| !Arc::ptr_eq(d, &virtio_device)); 3365 } 3366 3367 // At this point, the device has been removed from all the list and 3368 // buses where it was stored. At the end of this function, after 3369 // any_device, bus_device and pci_device are released, the actual 3370 // device will be dropped. 3371 Ok(()) 3372 } 3373 3374 fn hotplug_virtio_pci_device( 3375 &mut self, 3376 device: VirtioDeviceArc, 3377 iommu_attached: bool, 3378 id: String, 3379 ) -> DeviceManagerResult<PciDeviceInfo> { 3380 if iommu_attached { 3381 warn!("Placing device behind vIOMMU is not available for hotplugged devices"); 3382 } 3383 3384 let pci = if let Some(pci_bus) = &self.pci_bus { 3385 Arc::clone(pci_bus) 3386 } else { 3387 return Err(DeviceManagerError::NoPciBus); 3388 }; 3389 3390 // Add the virtio device to the device manager list. This is important 3391 // as the list is used to notify virtio devices about memory updates 3392 // for instance. 3393 self.virtio_devices 3394 .push((device.clone(), iommu_attached, id.clone())); 3395 3396 let device_id = 3397 self.add_virtio_pci_device(device, &mut pci.lock().unwrap(), &None, id.clone())?; 3398 3399 // Update the PCIU bitmap 3400 self.pci_devices_up |= 1 << (device_id >> 3); 3401 3402 Ok(PciDeviceInfo { id, bdf: device_id }) 3403 } 3404 3405 pub fn add_disk(&mut self, disk_cfg: &mut DiskConfig) -> DeviceManagerResult<PciDeviceInfo> { 3406 let (device, iommu_attached, id) = self.make_virtio_block_device(disk_cfg)?; 3407 self.hotplug_virtio_pci_device(device, iommu_attached, id) 3408 } 3409 3410 pub fn add_fs(&mut self, fs_cfg: &mut FsConfig) -> DeviceManagerResult<PciDeviceInfo> { 3411 let (device, iommu_attached, id) = self.make_virtio_fs_device(fs_cfg)?; 3412 self.hotplug_virtio_pci_device(device, iommu_attached, id) 3413 } 3414 3415 pub fn add_pmem(&mut self, pmem_cfg: &mut PmemConfig) -> DeviceManagerResult<PciDeviceInfo> { 3416 let (device, iommu_attached, id) = self.make_virtio_pmem_device(pmem_cfg)?; 3417 self.hotplug_virtio_pci_device(device, iommu_attached, id) 3418 } 3419 3420 pub fn add_net(&mut self, net_cfg: &mut NetConfig) -> DeviceManagerResult<PciDeviceInfo> { 3421 let (device, iommu_attached, id) = self.make_virtio_net_device(net_cfg)?; 3422 self.hotplug_virtio_pci_device(device, iommu_attached, id) 3423 } 3424 3425 pub fn add_vsock(&mut self, vsock_cfg: &mut VsockConfig) -> DeviceManagerResult<PciDeviceInfo> { 3426 let (device, iommu_attached, id) = self.make_virtio_vsock_device(vsock_cfg)?; 3427 self.hotplug_virtio_pci_device(device, iommu_attached, id) 3428 } 3429 3430 pub fn counters(&self) -> HashMap<String, HashMap<&'static str, Wrapping<u64>>> { 3431 let mut counters = HashMap::new(); 3432 3433 for (virtio_device, _, id) in &self.virtio_devices { 3434 let virtio_device = virtio_device.lock().unwrap(); 3435 if let Some(device_counters) = virtio_device.counters() { 3436 counters.insert(id.clone(), device_counters.clone()); 3437 } 3438 } 3439 3440 counters 3441 } 3442 3443 pub fn resize_balloon(&mut self, size: u64) -> DeviceManagerResult<()> { 3444 if let Some(balloon) = &self.balloon { 3445 return balloon 3446 .lock() 3447 .unwrap() 3448 .resize(size) 3449 .map_err(DeviceManagerError::VirtioBalloonResize); 3450 } 3451 3452 warn!("No balloon setup: Can't resize the balloon"); 3453 Err(DeviceManagerError::MissingVirtioBalloon) 3454 } 3455 3456 pub fn balloon_size(&self) -> u64 { 3457 if let Some(balloon) = &self.balloon { 3458 return balloon.lock().unwrap().get_actual(); 3459 } 3460 3461 0 3462 } 3463 3464 pub fn device_tree(&self) -> Arc<Mutex<DeviceTree>> { 3465 self.device_tree.clone() 3466 } 3467 3468 pub fn restore_devices( 3469 &mut self, 3470 snapshot: Snapshot, 3471 ) -> std::result::Result<(), MigratableError> { 3472 // Finally, restore all devices associated with the DeviceManager. 3473 // It's important to restore devices in the right order, that's why 3474 // the device tree is the right way to ensure we restore a child before 3475 // its parent node. 3476 for node in self 3477 .device_tree 3478 .lock() 3479 .unwrap() 3480 .breadth_first_traversal() 3481 .rev() 3482 { 3483 // Restore the node 3484 if let Some(migratable) = &node.migratable { 3485 debug!("Restoring {} from DeviceManager", node.id); 3486 if let Some(snapshot) = snapshot.snapshots.get(&node.id) { 3487 migratable.lock().unwrap().pause()?; 3488 migratable.lock().unwrap().restore(*snapshot.clone())?; 3489 } else { 3490 return Err(MigratableError::Restore(anyhow!( 3491 "Missing device {}", 3492 node.id 3493 ))); 3494 } 3495 } 3496 } 3497 3498 Ok(()) 3499 } 3500 3501 #[cfg(feature = "acpi")] 3502 #[cfg(target_arch = "x86_64")] 3503 pub fn notify_power_button(&self) -> DeviceManagerResult<()> { 3504 self.ged_notification_device 3505 .as_ref() 3506 .unwrap() 3507 .lock() 3508 .unwrap() 3509 .notify(AcpiNotificationFlags::POWER_BUTTON_CHANGED) 3510 .map_err(DeviceManagerError::PowerButtonNotification) 3511 } 3512 3513 #[cfg(target_arch = "aarch64")] 3514 pub fn notify_power_button(&self) -> DeviceManagerResult<()> { 3515 self.gpio_device 3516 .as_ref() 3517 .unwrap() 3518 .lock() 3519 .unwrap() 3520 .trigger_key(3) 3521 .map_err(DeviceManagerError::AArch64PowerButtonNotification) 3522 } 3523 3524 pub fn iommu_attached_devices(&self) -> &Option<(u32, Vec<u32>)> { 3525 &self.iommu_attached_devices 3526 } 3527 } 3528 3529 #[cfg(feature = "acpi")] 3530 fn numa_node_id_from_memory_zone_id(numa_nodes: &NumaNodes, memory_zone_id: &str) -> Option<u32> { 3531 for (numa_node_id, numa_node) in numa_nodes.iter() { 3532 if numa_node 3533 .memory_zones() 3534 .contains(&memory_zone_id.to_owned()) 3535 { 3536 return Some(*numa_node_id); 3537 } 3538 } 3539 3540 None 3541 } 3542 3543 #[cfg(feature = "acpi")] 3544 struct PciDevSlot { 3545 device_id: u8, 3546 } 3547 3548 #[cfg(feature = "acpi")] 3549 impl Aml for PciDevSlot { 3550 fn to_aml_bytes(&self) -> Vec<u8> { 3551 let sun = self.device_id; 3552 let adr: u32 = (self.device_id as u32) << 16; 3553 aml::Device::new( 3554 format!("S{:03}", self.device_id).as_str().into(), 3555 vec![ 3556 &aml::Name::new("_SUN".into(), &sun), 3557 &aml::Name::new("_ADR".into(), &adr), 3558 &aml::Method::new( 3559 "_EJ0".into(), 3560 1, 3561 true, 3562 vec![&aml::MethodCall::new( 3563 "\\_SB_.PHPR.PCEJ".into(), 3564 vec![&aml::Path::new("_SUN")], 3565 )], 3566 ), 3567 ], 3568 ) 3569 .to_aml_bytes() 3570 } 3571 } 3572 3573 #[cfg(feature = "acpi")] 3574 struct PciDevSlotNotify { 3575 device_id: u8, 3576 } 3577 3578 #[cfg(feature = "acpi")] 3579 impl Aml for PciDevSlotNotify { 3580 fn to_aml_bytes(&self) -> Vec<u8> { 3581 let device_id_mask: u32 = 1 << self.device_id; 3582 let object = aml::Path::new(&format!("S{:03}", self.device_id)); 3583 let mut bytes = aml::And::new(&aml::Local(0), &aml::Arg(0), &device_id_mask).to_aml_bytes(); 3584 bytes.extend_from_slice( 3585 &aml::If::new( 3586 &aml::Equal::new(&aml::Local(0), &device_id_mask), 3587 vec![&aml::Notify::new(&object, &aml::Arg(1))], 3588 ) 3589 .to_aml_bytes(), 3590 ); 3591 bytes 3592 } 3593 } 3594 3595 #[cfg(feature = "acpi")] 3596 struct PciDevSlotMethods {} 3597 3598 #[cfg(feature = "acpi")] 3599 impl Aml for PciDevSlotMethods { 3600 fn to_aml_bytes(&self) -> Vec<u8> { 3601 let mut device_notifies = Vec::new(); 3602 for device_id in 0..32 { 3603 device_notifies.push(PciDevSlotNotify { device_id }); 3604 } 3605 3606 let mut device_notifies_refs: Vec<&dyn aml::Aml> = Vec::new(); 3607 for device_notify in device_notifies.iter() { 3608 device_notifies_refs.push(device_notify); 3609 } 3610 3611 let mut bytes = 3612 aml::Method::new("DVNT".into(), 2, true, device_notifies_refs).to_aml_bytes(); 3613 3614 bytes.extend_from_slice( 3615 &aml::Method::new( 3616 "PCNT".into(), 3617 0, 3618 true, 3619 vec![ 3620 &aml::MethodCall::new( 3621 "DVNT".into(), 3622 vec![&aml::Path::new("\\_SB_.PHPR.PCIU"), &aml::ONE], 3623 ), 3624 &aml::MethodCall::new( 3625 "DVNT".into(), 3626 vec![&aml::Path::new("\\_SB_.PHPR.PCID"), &3usize], 3627 ), 3628 ], 3629 ) 3630 .to_aml_bytes(), 3631 ); 3632 bytes 3633 } 3634 } 3635 3636 #[cfg(feature = "acpi")] 3637 struct PciDsmMethod {} 3638 3639 #[cfg(feature = "acpi")] 3640 impl Aml for PciDsmMethod { 3641 fn to_aml_bytes(&self) -> Vec<u8> { 3642 // Refer to ACPI spec v6.3 Ch 9.1.1 and PCI Firmware spec v3.3 Ch 4.6.1 3643 // _DSM (Device Specific Method), the following is the implementation in ASL. 3644 /* 3645 Method (_DSM, 4, NotSerialized) // _DSM: Device-Specific Method 3646 { 3647 If ((Arg0 == ToUUID ("e5c937d0-3553-4d7a-9117-ea4d19c3434d") /* Device Labeling Interface */)) 3648 { 3649 If ((Arg2 == Zero)) 3650 { 3651 Return (Buffer (One) { 0x21 }) 3652 } 3653 If ((Arg2 == 0x05)) 3654 { 3655 Return (Zero) 3656 } 3657 } 3658 3659 Return (Buffer (One) { 0x00 }) 3660 } 3661 */ 3662 /* 3663 * As per ACPI v6.3 Ch 19.6.142, the UUID is required to be in mixed endian: 3664 * Among the fields of a UUID: 3665 * {d1 (8 digits)} - {d2 (4 digits)} - {d3 (4 digits)} - {d4 (16 digits)} 3666 * d1 ~ d3 need to be little endian, d4 be big endian. 3667 * See https://en.wikipedia.org/wiki/Universally_unique_identifier#Encoding . 3668 */ 3669 let uuid = Uuid::parse_str("E5C937D0-3553-4D7A-9117-EA4D19C3434D").unwrap(); 3670 let (uuid_d1, uuid_d2, uuid_d3, uuid_d4) = uuid.as_fields(); 3671 let mut uuid_buf = vec![]; 3672 uuid_buf.extend(&uuid_d1.to_le_bytes()); 3673 uuid_buf.extend(&uuid_d2.to_le_bytes()); 3674 uuid_buf.extend(&uuid_d3.to_le_bytes()); 3675 uuid_buf.extend(uuid_d4); 3676 aml::Method::new( 3677 "_DSM".into(), 3678 4, 3679 false, 3680 vec![ 3681 &aml::If::new( 3682 &aml::Equal::new(&aml::Arg(0), &aml::Buffer::new(uuid_buf)), 3683 vec![ 3684 &aml::If::new( 3685 &aml::Equal::new(&aml::Arg(2), &aml::ZERO), 3686 vec![&aml::Return::new(&aml::Buffer::new(vec![0x21]))], 3687 ), 3688 &aml::If::new( 3689 &aml::Equal::new(&aml::Arg(2), &0x05u8), 3690 vec![&aml::Return::new(&aml::ZERO)], 3691 ), 3692 ], 3693 ), 3694 &aml::Return::new(&aml::Buffer::new(vec![0])), 3695 ], 3696 ) 3697 .to_aml_bytes() 3698 } 3699 } 3700 3701 #[cfg(feature = "acpi")] 3702 impl Aml for DeviceManager { 3703 fn to_aml_bytes(&self) -> Vec<u8> { 3704 #[cfg(target_arch = "aarch64")] 3705 use arch::aarch64::DeviceInfoForFdt; 3706 3707 let mut bytes = Vec::new(); 3708 // PCI hotplug controller 3709 bytes.extend_from_slice( 3710 &aml::Device::new( 3711 "_SB_.PHPR".into(), 3712 vec![ 3713 &aml::Name::new("_HID".into(), &aml::EisaName::new("PNP0A06")), 3714 &aml::Name::new("_STA".into(), &0x0bu8), 3715 &aml::Name::new("_UID".into(), &"PCI Hotplug Controller"), 3716 &aml::Mutex::new("BLCK".into(), 0), 3717 &aml::Name::new( 3718 "_CRS".into(), 3719 &aml::ResourceTemplate::new(vec![&aml::AddressSpace::new_memory( 3720 aml::AddressSpaceCachable::NotCacheable, 3721 true, 3722 self.acpi_address.0 as u64, 3723 self.acpi_address.0 + DEVICE_MANAGER_ACPI_SIZE as u64 - 1, 3724 )]), 3725 ), 3726 // OpRegion and Fields map MMIO range into individual field values 3727 &aml::OpRegion::new( 3728 "PCST".into(), 3729 aml::OpRegionSpace::SystemMemory, 3730 self.acpi_address.0 as usize, 3731 DEVICE_MANAGER_ACPI_SIZE, 3732 ), 3733 &aml::Field::new( 3734 "PCST".into(), 3735 aml::FieldAccessType::DWord, 3736 aml::FieldUpdateRule::WriteAsZeroes, 3737 vec![ 3738 aml::FieldEntry::Named(*b"PCIU", 32), 3739 aml::FieldEntry::Named(*b"PCID", 32), 3740 aml::FieldEntry::Named(*b"B0EJ", 32), 3741 ], 3742 ), 3743 &aml::Method::new( 3744 "PCEJ".into(), 3745 1, 3746 true, 3747 vec![ 3748 // Take lock defined above 3749 &aml::Acquire::new("BLCK".into(), 0xffff), 3750 // Write PCI bus number (in first argument) to I/O port via field 3751 &aml::ShiftLeft::new(&aml::Path::new("B0EJ"), &aml::ONE, &aml::Arg(0)), 3752 // Release lock 3753 &aml::Release::new("BLCK".into()), 3754 // Return 0 3755 &aml::Return::new(&aml::ZERO), 3756 ], 3757 ), 3758 ], 3759 ) 3760 .to_aml_bytes(), 3761 ); 3762 3763 let start_of_device_area = self.memory_manager.lock().unwrap().start_of_device_area().0; 3764 let end_of_device_area = self.memory_manager.lock().unwrap().end_of_device_area().0; 3765 3766 let mut pci_dsdt_inner_data: Vec<&dyn aml::Aml> = Vec::new(); 3767 let hid = aml::Name::new("_HID".into(), &aml::EisaName::new("PNP0A08")); 3768 pci_dsdt_inner_data.push(&hid); 3769 let cid = aml::Name::new("_CID".into(), &aml::EisaName::new("PNP0A03")); 3770 pci_dsdt_inner_data.push(&cid); 3771 let adr = aml::Name::new("_ADR".into(), &aml::ZERO); 3772 pci_dsdt_inner_data.push(&adr); 3773 let seg = aml::Name::new("_SEG".into(), &aml::ZERO); 3774 pci_dsdt_inner_data.push(&seg); 3775 let uid = aml::Name::new("_UID".into(), &aml::ZERO); 3776 pci_dsdt_inner_data.push(&uid); 3777 let supp = aml::Name::new("SUPP".into(), &aml::ZERO); 3778 pci_dsdt_inner_data.push(&supp); 3779 3780 // Since Cloud Hypervisor supports only one PCI bus, it can be tied 3781 // to the NUMA node 0. It's up to the user to organize the NUMA nodes 3782 // so that the PCI bus relates to the expected vCPUs and guest RAM. 3783 let proximity_domain = 0u32; 3784 let pxm_return = aml::Return::new(&proximity_domain); 3785 let pxm = aml::Method::new("_PXM".into(), 0, false, vec![&pxm_return]); 3786 pci_dsdt_inner_data.push(&pxm); 3787 3788 let pci_dsm = PciDsmMethod {}; 3789 pci_dsdt_inner_data.push(&pci_dsm); 3790 3791 let crs = aml::Name::new( 3792 "_CRS".into(), 3793 &aml::ResourceTemplate::new(vec![ 3794 &aml::AddressSpace::new_bus_number(0x0u16, 0xffu16), 3795 #[cfg(target_arch = "x86_64")] 3796 &aml::Io::new(0xcf8, 0xcf8, 1, 0x8), 3797 #[cfg(target_arch = "aarch64")] 3798 &aml::Memory32Fixed::new( 3799 true, 3800 layout::PCI_MMCONFIG_START.0 as u32, 3801 layout::PCI_MMCONFIG_SIZE as u32, 3802 ), 3803 &aml::AddressSpace::new_memory( 3804 aml::AddressSpaceCachable::NotCacheable, 3805 true, 3806 layout::MEM_32BIT_DEVICES_START.0 as u32, 3807 (layout::MEM_32BIT_DEVICES_START.0 + layout::MEM_32BIT_DEVICES_SIZE - 1) as u32, 3808 ), 3809 &aml::AddressSpace::new_memory( 3810 aml::AddressSpaceCachable::NotCacheable, 3811 true, 3812 start_of_device_area, 3813 end_of_device_area, 3814 ), 3815 #[cfg(target_arch = "x86_64")] 3816 &aml::AddressSpace::new_io(0u16, 0x0cf7u16), 3817 #[cfg(target_arch = "x86_64")] 3818 &aml::AddressSpace::new_io(0x0d00u16, 0xffffu16), 3819 ]), 3820 ); 3821 pci_dsdt_inner_data.push(&crs); 3822 3823 let mut pci_devices = Vec::new(); 3824 for device_id in 0..32 { 3825 let pci_device = PciDevSlot { device_id }; 3826 pci_devices.push(pci_device); 3827 } 3828 for pci_device in pci_devices.iter() { 3829 pci_dsdt_inner_data.push(pci_device); 3830 } 3831 3832 let pci_device_methods = PciDevSlotMethods {}; 3833 pci_dsdt_inner_data.push(&pci_device_methods); 3834 3835 // Build PCI routing table, listing IRQs assigned to PCI devices. 3836 let prt_package_list: Vec<(u32, u32)> = self 3837 .pci_irq_slots 3838 .iter() 3839 .enumerate() 3840 .map(|(i, irq)| (((((i as u32) & 0x1fu32) << 16) | 0xffffu32), *irq as u32)) 3841 .collect(); 3842 let prt_package_list: Vec<aml::Package> = prt_package_list 3843 .iter() 3844 .map(|(bdf, irq)| aml::Package::new(vec![bdf, &0u8, &0u8, irq])) 3845 .collect(); 3846 let prt_package_list: Vec<&dyn Aml> = prt_package_list 3847 .iter() 3848 .map(|item| item as &dyn Aml) 3849 .collect(); 3850 let prt = aml::Name::new("_PRT".into(), &aml::Package::new(prt_package_list)); 3851 pci_dsdt_inner_data.push(&prt); 3852 3853 let pci_dsdt_data = 3854 aml::Device::new("_SB_.PCI0".into(), pci_dsdt_inner_data).to_aml_bytes(); 3855 3856 let mbrd_dsdt_data = aml::Device::new( 3857 "_SB_.MBRD".into(), 3858 vec![ 3859 &aml::Name::new("_HID".into(), &aml::EisaName::new("PNP0C02")), 3860 &aml::Name::new("_UID".into(), &aml::ZERO), 3861 &aml::Name::new( 3862 "_CRS".into(), 3863 &aml::ResourceTemplate::new(vec![&aml::Memory32Fixed::new( 3864 true, 3865 layout::PCI_MMCONFIG_START.0 as u32, 3866 layout::PCI_MMCONFIG_SIZE as u32, 3867 )]), 3868 ), 3869 ], 3870 ) 3871 .to_aml_bytes(); 3872 3873 // Serial device 3874 #[cfg(target_arch = "x86_64")] 3875 let serial_irq = 4; 3876 #[cfg(target_arch = "aarch64")] 3877 let serial_irq = 3878 if self.config.lock().unwrap().serial.clone().mode != ConsoleOutputMode::Off { 3879 self.get_device_info() 3880 .clone() 3881 .get(&(DeviceType::Serial, DeviceType::Serial.to_string())) 3882 .unwrap() 3883 .irq() 3884 } else { 3885 // If serial is turned off, add a fake device with invalid irq. 3886 31 3887 }; 3888 let com1_dsdt_data = aml::Device::new( 3889 "_SB_.COM1".into(), 3890 vec![ 3891 &aml::Name::new( 3892 "_HID".into(), 3893 #[cfg(target_arch = "x86_64")] 3894 &aml::EisaName::new("PNP0501"), 3895 #[cfg(target_arch = "aarch64")] 3896 &"ARMH0011", 3897 ), 3898 &aml::Name::new("_UID".into(), &aml::ZERO), 3899 &aml::Name::new( 3900 "_CRS".into(), 3901 &aml::ResourceTemplate::new(vec![ 3902 &aml::Interrupt::new(true, true, false, false, serial_irq), 3903 #[cfg(target_arch = "x86_64")] 3904 &aml::Io::new(0x3f8, 0x3f8, 0, 0x8), 3905 #[cfg(target_arch = "aarch64")] 3906 &aml::Memory32Fixed::new( 3907 true, 3908 arch::layout::LEGACY_SERIAL_MAPPED_IO_START as u32, 3909 MMIO_LEN as u32, 3910 ), 3911 ]), 3912 ), 3913 ], 3914 ) 3915 .to_aml_bytes(); 3916 3917 let s5_sleep_data = 3918 aml::Name::new("_S5_".into(), &aml::Package::new(vec![&5u8])).to_aml_bytes(); 3919 3920 let power_button_dsdt_data = aml::Device::new( 3921 "_SB_.PWRB".into(), 3922 vec![ 3923 &aml::Name::new("_HID".into(), &aml::EisaName::new("PNP0C0C")), 3924 &aml::Name::new("_UID".into(), &aml::ZERO), 3925 ], 3926 ) 3927 .to_aml_bytes(); 3928 3929 let ged_data = self 3930 .ged_notification_device 3931 .as_ref() 3932 .unwrap() 3933 .lock() 3934 .unwrap() 3935 .to_aml_bytes(); 3936 3937 bytes.extend_from_slice(pci_dsdt_data.as_slice()); 3938 bytes.extend_from_slice(mbrd_dsdt_data.as_slice()); 3939 if self.config.lock().unwrap().serial.mode != ConsoleOutputMode::Off { 3940 bytes.extend_from_slice(com1_dsdt_data.as_slice()); 3941 } 3942 bytes.extend_from_slice(s5_sleep_data.as_slice()); 3943 bytes.extend_from_slice(power_button_dsdt_data.as_slice()); 3944 bytes.extend_from_slice(ged_data.as_slice()); 3945 bytes 3946 } 3947 } 3948 3949 impl Pausable for DeviceManager { 3950 fn pause(&mut self) -> result::Result<(), MigratableError> { 3951 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 3952 if let Some(migratable) = &device_node.migratable { 3953 migratable.lock().unwrap().pause()?; 3954 } 3955 } 3956 3957 Ok(()) 3958 } 3959 3960 fn resume(&mut self) -> result::Result<(), MigratableError> { 3961 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 3962 if let Some(migratable) = &device_node.migratable { 3963 migratable.lock().unwrap().resume()?; 3964 } 3965 } 3966 3967 Ok(()) 3968 } 3969 } 3970 3971 impl Snapshottable for DeviceManager { 3972 fn id(&self) -> String { 3973 DEVICE_MANAGER_SNAPSHOT_ID.to_string() 3974 } 3975 3976 fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> { 3977 let mut snapshot = Snapshot::new(DEVICE_MANAGER_SNAPSHOT_ID); 3978 3979 // We aggregate all devices snapshots. 3980 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 3981 if let Some(migratable) = &device_node.migratable { 3982 let device_snapshot = migratable.lock().unwrap().snapshot()?; 3983 snapshot.add_snapshot(device_snapshot); 3984 } 3985 } 3986 3987 // Then we store the DeviceManager state. 3988 snapshot.add_data_section(SnapshotDataSection::new_from_state( 3989 DEVICE_MANAGER_SNAPSHOT_ID, 3990 &self.state(), 3991 )?); 3992 3993 Ok(snapshot) 3994 } 3995 3996 fn restore(&mut self, snapshot: Snapshot) -> std::result::Result<(), MigratableError> { 3997 // Let's first restore the DeviceManager. 3998 3999 self.set_state(&snapshot.to_state(DEVICE_MANAGER_SNAPSHOT_ID)?); 4000 4001 // Now that DeviceManager is updated with the right states, it's time 4002 // to create the devices based on the configuration. 4003 self.create_devices(None, None) 4004 .map_err(|e| MigratableError::Restore(anyhow!("Could not create devices {:?}", e)))?; 4005 4006 Ok(()) 4007 } 4008 } 4009 4010 impl Transportable for DeviceManager {} 4011 4012 impl Migratable for DeviceManager { 4013 fn start_dirty_log(&mut self) -> std::result::Result<(), MigratableError> { 4014 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4015 if let Some(migratable) = &device_node.migratable { 4016 migratable.lock().unwrap().start_dirty_log()?; 4017 } 4018 } 4019 Ok(()) 4020 } 4021 4022 fn stop_dirty_log(&mut self) -> std::result::Result<(), MigratableError> { 4023 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4024 if let Some(migratable) = &device_node.migratable { 4025 migratable.lock().unwrap().stop_dirty_log()?; 4026 } 4027 } 4028 Ok(()) 4029 } 4030 4031 fn dirty_log(&mut self) -> std::result::Result<MemoryRangeTable, MigratableError> { 4032 let mut tables = Vec::new(); 4033 for (_, device_node) in self.device_tree.lock().unwrap().iter() { 4034 if let Some(migratable) = &device_node.migratable { 4035 tables.push(migratable.lock().unwrap().dirty_log()?); 4036 } 4037 } 4038 Ok(MemoryRangeTable::new_from_tables(tables)) 4039 } 4040 } 4041 4042 const PCIU_FIELD_OFFSET: u64 = 0; 4043 const PCID_FIELD_OFFSET: u64 = 4; 4044 const B0EJ_FIELD_OFFSET: u64 = 8; 4045 4046 const PCIU_FIELD_SIZE: usize = 4; 4047 const PCID_FIELD_SIZE: usize = 4; 4048 const B0EJ_FIELD_SIZE: usize = 4; 4049 4050 impl BusDevice for DeviceManager { 4051 fn read(&mut self, base: u64, offset: u64, data: &mut [u8]) { 4052 match offset { 4053 PCIU_FIELD_OFFSET => { 4054 assert!(data.len() == PCIU_FIELD_SIZE); 4055 data.copy_from_slice(&self.pci_devices_up.to_le_bytes()); 4056 // Clear the PCIU bitmap 4057 self.pci_devices_up = 0; 4058 } 4059 PCID_FIELD_OFFSET => { 4060 assert!(data.len() == PCID_FIELD_SIZE); 4061 data.copy_from_slice(&self.pci_devices_down.to_le_bytes()); 4062 // Clear the PCID bitmap 4063 self.pci_devices_down = 0; 4064 } 4065 B0EJ_FIELD_OFFSET => { 4066 assert!(data.len() == B0EJ_FIELD_SIZE); 4067 // Always return an empty bitmap since the eject is always 4068 // taken care of right away during a write access. 4069 data.copy_from_slice(&[0, 0, 0, 0]); 4070 } 4071 _ => error!( 4072 "Accessing unknown location at base 0x{:x}, offset 0x{:x}", 4073 base, offset 4074 ), 4075 } 4076 4077 debug!( 4078 "PCI_HP_REG_R: base 0x{:x}, offset 0x{:x}, data {:?}", 4079 base, offset, data 4080 ) 4081 } 4082 4083 fn write(&mut self, base: u64, offset: u64, data: &[u8]) -> Option<Arc<Barrier>> { 4084 match offset { 4085 B0EJ_FIELD_OFFSET => { 4086 assert!(data.len() == B0EJ_FIELD_SIZE); 4087 let mut data_array: [u8; 4] = [0, 0, 0, 0]; 4088 data_array.copy_from_slice(data); 4089 let device_bitmap = u32::from_le_bytes(data_array); 4090 4091 for device_id in 0..32 { 4092 let mask = 1u32 << device_id; 4093 if (device_bitmap & mask) == mask { 4094 if let Err(e) = self.eject_device(device_id as u8) { 4095 error!("Failed ejecting device {}: {:?}", device_id, e); 4096 } 4097 } 4098 } 4099 } 4100 _ => error!( 4101 "Accessing unknown location at base 0x{:x}, offset 0x{:x}", 4102 base, offset 4103 ), 4104 } 4105 4106 debug!( 4107 "PCI_HP_REG_W: base 0x{:x}, offset 0x{:x}, data {:?}", 4108 base, offset, data 4109 ); 4110 4111 None 4112 } 4113 } 4114 4115 impl Drop for DeviceManager { 4116 fn drop(&mut self) { 4117 for (device, _, _) in self.virtio_devices.drain(..) { 4118 device.lock().unwrap().shutdown(); 4119 } 4120 } 4121 } 4122