xref: /cloud-hypervisor/vmm/src/device_manager.rs (revision d10f20eb718023742143fa847a37f3d6114ead52)
1 // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
2 //
3 // Portions Copyright 2017 The Chromium OS Authors. All rights reserved.
4 // Use of this source code is governed by a BSD-style license that can be
5 // found in the LICENSE-BSD-3-Clause file.
6 //
7 // Copyright © 2019 Intel Corporation
8 //
9 // SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause
10 //
11 
12 use crate::config::{
13     ConsoleOutputMode, DeviceConfig, DiskConfig, FsConfig, NetConfig, PmemConfig, UserDeviceConfig,
14     VdpaConfig, VhostMode, VmConfig, VsockConfig,
15 };
16 use crate::cpu::{CpuManager, CPU_MANAGER_ACPI_SIZE};
17 use crate::device_tree::{DeviceNode, DeviceTree};
18 use crate::interrupt::LegacyUserspaceInterruptManager;
19 use crate::interrupt::MsiInterruptManager;
20 use crate::memory_manager::{Error as MemoryManagerError, MemoryManager, MEMORY_MANAGER_ACPI_SIZE};
21 use crate::pci_segment::PciSegment;
22 use crate::seccomp_filters::{get_seccomp_filter, Thread};
23 use crate::serial_manager::{Error as SerialManagerError, SerialManager};
24 use crate::sigwinch_listener::start_sigwinch_listener;
25 use crate::vm_config::DEFAULT_PCI_SEGMENT_APERTURE_WEIGHT;
26 use crate::GuestRegionMmap;
27 use crate::PciDeviceInfo;
28 use crate::{device_node, DEVICE_MANAGER_SNAPSHOT_ID};
29 use acpi_tables::sdt::GenericAddress;
30 use acpi_tables::{aml, Aml};
31 use anyhow::anyhow;
32 use arch::layout;
33 #[cfg(target_arch = "x86_64")]
34 use arch::layout::{APIC_START, IOAPIC_SIZE, IOAPIC_START};
35 use arch::NumaNodes;
36 #[cfg(target_arch = "aarch64")]
37 use arch::{DeviceType, MmioDeviceInfo};
38 use block::{
39     async_io::DiskFile, block_aio_is_supported, block_io_uring_is_supported, detect_image_type,
40     fixed_vhd_sync::FixedVhdDiskSync, qcow, qcow_sync::QcowDiskSync, raw_async_aio::RawFileDiskAio,
41     raw_sync::RawFileDiskSync, vhdx, vhdx_sync::VhdxDiskSync, ImageType,
42 };
43 #[cfg(feature = "io_uring")]
44 use block::{fixed_vhd_async::FixedVhdDiskAsync, raw_async::RawFileDisk};
45 #[cfg(target_arch = "x86_64")]
46 use devices::debug_console::DebugConsole;
47 #[cfg(target_arch = "aarch64")]
48 use devices::gic;
49 #[cfg(target_arch = "x86_64")]
50 use devices::ioapic;
51 #[cfg(target_arch = "aarch64")]
52 use devices::legacy::Pl011;
53 use devices::{
54     interrupt_controller, interrupt_controller::InterruptController, AcpiNotificationFlags,
55 };
56 use hypervisor::{HypervisorType, IoEventAddress};
57 use libc::{
58     cfmakeraw, isatty, tcgetattr, tcsetattr, termios, MAP_NORESERVE, MAP_PRIVATE, MAP_SHARED,
59     O_TMPFILE, PROT_READ, PROT_WRITE, TCSANOW,
60 };
61 use pci::{
62     DeviceRelocation, MmioRegion, PciBarRegionType, PciBdf, PciDevice, VfioDmaMapping,
63     VfioPciDevice, VfioUserDmaMapping, VfioUserPciDevice, VfioUserPciDeviceError,
64 };
65 use rate_limiter::group::RateLimiterGroup;
66 use seccompiler::SeccompAction;
67 use serde::{Deserialize, Serialize};
68 use std::collections::{BTreeMap, BTreeSet, HashMap};
69 use std::fs::{read_link, File, OpenOptions};
70 use std::io::{self, stdout, Seek, SeekFrom};
71 use std::mem::zeroed;
72 use std::num::Wrapping;
73 use std::os::unix::fs::OpenOptionsExt;
74 use std::os::unix::io::{AsRawFd, FromRawFd, RawFd};
75 use std::path::PathBuf;
76 use std::result;
77 use std::sync::{Arc, Mutex};
78 use std::time::Instant;
79 use tracer::trace_scoped;
80 use vfio_ioctls::{VfioContainer, VfioDevice, VfioDeviceFd};
81 use virtio_devices::transport::VirtioTransport;
82 use virtio_devices::transport::{VirtioPciDevice, VirtioPciDeviceActivator};
83 use virtio_devices::vhost_user::VhostUserConfig;
84 use virtio_devices::{
85     AccessPlatformMapping, ActivateError, VdpaDmaMapping, VirtioMemMappingSource,
86 };
87 use virtio_devices::{Endpoint, IommuMapping};
88 use vm_allocator::{AddressAllocator, SystemAllocator};
89 use vm_device::dma_mapping::ExternalDmaMapping;
90 use vm_device::interrupt::{
91     InterruptIndex, InterruptManager, LegacyIrqGroupConfig, MsiIrqGroupConfig,
92 };
93 use vm_device::{Bus, BusDevice, Resource};
94 use vm_memory::guest_memory::FileOffset;
95 use vm_memory::GuestMemoryRegion;
96 use vm_memory::{Address, GuestAddress, GuestUsize, MmapRegion};
97 #[cfg(target_arch = "x86_64")]
98 use vm_memory::{GuestAddressSpace, GuestMemory};
99 use vm_migration::{
100     protocol::MemoryRangeTable, snapshot_from_id, state_from_id, Migratable, MigratableError,
101     Pausable, Snapshot, SnapshotData, Snapshottable, Transportable,
102 };
103 use vm_virtio::AccessPlatform;
104 use vm_virtio::VirtioDeviceType;
105 use vmm_sys_util::eventfd::EventFd;
106 #[cfg(target_arch = "x86_64")]
107 use {devices::debug_console, devices::legacy::Serial};
108 
109 #[cfg(target_arch = "aarch64")]
110 const MMIO_LEN: u64 = 0x1000;
111 
112 // Singleton devices / devices the user cannot name
113 #[cfg(target_arch = "x86_64")]
114 const IOAPIC_DEVICE_NAME: &str = "__ioapic";
115 const SERIAL_DEVICE_NAME: &str = "__serial";
116 #[cfg(target_arch = "x86_64")]
117 const DEBUGCON_DEVICE_NAME: &str = "__debug_console";
118 #[cfg(target_arch = "aarch64")]
119 const GPIO_DEVICE_NAME: &str = "__gpio";
120 const RNG_DEVICE_NAME: &str = "__rng";
121 const IOMMU_DEVICE_NAME: &str = "__iommu";
122 const BALLOON_DEVICE_NAME: &str = "__balloon";
123 const CONSOLE_DEVICE_NAME: &str = "__console";
124 const PVPANIC_DEVICE_NAME: &str = "__pvpanic";
125 
126 // Devices that the user may name and for which we generate
127 // identifiers if the user doesn't give one
128 const DISK_DEVICE_NAME_PREFIX: &str = "_disk";
129 const FS_DEVICE_NAME_PREFIX: &str = "_fs";
130 const NET_DEVICE_NAME_PREFIX: &str = "_net";
131 const PMEM_DEVICE_NAME_PREFIX: &str = "_pmem";
132 const VDPA_DEVICE_NAME_PREFIX: &str = "_vdpa";
133 const VSOCK_DEVICE_NAME_PREFIX: &str = "_vsock";
134 const WATCHDOG_DEVICE_NAME: &str = "__watchdog";
135 const VFIO_DEVICE_NAME_PREFIX: &str = "_vfio";
136 const VFIO_USER_DEVICE_NAME_PREFIX: &str = "_vfio_user";
137 const VIRTIO_PCI_DEVICE_NAME_PREFIX: &str = "_virtio-pci";
138 
139 /// Errors associated with device manager
140 #[derive(Debug)]
141 pub enum DeviceManagerError {
142     /// Cannot create EventFd.
143     EventFd(io::Error),
144 
145     /// Cannot open disk path
146     Disk(io::Error),
147 
148     /// Cannot create vhost-user-net device
149     CreateVhostUserNet(virtio_devices::vhost_user::Error),
150 
151     /// Cannot create virtio-blk device
152     CreateVirtioBlock(io::Error),
153 
154     /// Cannot create virtio-net device
155     CreateVirtioNet(virtio_devices::net::Error),
156 
157     /// Cannot create virtio-console device
158     CreateVirtioConsole(io::Error),
159 
160     /// Cannot create virtio-rng device
161     CreateVirtioRng(io::Error),
162 
163     /// Cannot create virtio-fs device
164     CreateVirtioFs(virtio_devices::vhost_user::Error),
165 
166     /// Virtio-fs device was created without a socket.
167     NoVirtioFsSock,
168 
169     /// Cannot create vhost-user-blk device
170     CreateVhostUserBlk(virtio_devices::vhost_user::Error),
171 
172     /// Cannot create virtio-pmem device
173     CreateVirtioPmem(io::Error),
174 
175     /// Cannot create vDPA device
176     CreateVdpa(virtio_devices::vdpa::Error),
177 
178     /// Cannot create virtio-vsock device
179     CreateVirtioVsock(io::Error),
180 
181     /// Cannot create tpm device
182     CreateTpmDevice(anyhow::Error),
183 
184     /// Failed to convert Path to &str for the vDPA device.
185     CreateVdpaConvertPath,
186 
187     /// Failed to convert Path to &str for the virtio-vsock device.
188     CreateVsockConvertPath,
189 
190     /// Cannot create virtio-vsock backend
191     CreateVsockBackend(virtio_devices::vsock::VsockUnixError),
192 
193     /// Cannot create virtio-iommu device
194     CreateVirtioIommu(io::Error),
195 
196     /// Cannot create virtio-balloon device
197     CreateVirtioBalloon(io::Error),
198 
199     /// Cannot create virtio-watchdog device
200     CreateVirtioWatchdog(io::Error),
201 
202     /// Failed to parse disk image format
203     DetectImageType(io::Error),
204 
205     /// Cannot open qcow disk path
206     QcowDeviceCreate(qcow::Error),
207 
208     /// Cannot create serial manager
209     CreateSerialManager(SerialManagerError),
210 
211     /// Cannot spawn the serial manager thread
212     SpawnSerialManager(SerialManagerError),
213 
214     /// Cannot open tap interface
215     OpenTap(net_util::TapError),
216 
217     /// Cannot allocate IRQ.
218     AllocateIrq,
219 
220     /// Cannot configure the IRQ.
221     Irq(vmm_sys_util::errno::Error),
222 
223     /// Cannot allocate PCI BARs
224     AllocateBars(pci::PciDeviceError),
225 
226     /// Could not free the BARs associated with a PCI device.
227     FreePciBars(pci::PciDeviceError),
228 
229     /// Cannot register ioevent.
230     RegisterIoevent(anyhow::Error),
231 
232     /// Cannot unregister ioevent.
233     UnRegisterIoevent(anyhow::Error),
234 
235     /// Cannot create virtio device
236     VirtioDevice(virtio_devices::transport::VirtioPciDeviceError),
237 
238     /// Cannot add PCI device
239     AddPciDevice(pci::PciRootError),
240 
241     /// Cannot open persistent memory file
242     PmemFileOpen(io::Error),
243 
244     /// Cannot set persistent memory file size
245     PmemFileSetLen(io::Error),
246 
247     /// Cannot find a memory range for persistent memory
248     PmemRangeAllocation,
249 
250     /// Cannot find a memory range for virtio-fs
251     FsRangeAllocation,
252 
253     /// Error creating serial output file
254     SerialOutputFileOpen(io::Error),
255 
256     #[cfg(target_arch = "x86_64")]
257     /// Error creating debug-console output file
258     DebugconOutputFileOpen(io::Error),
259 
260     /// Error creating console output file
261     ConsoleOutputFileOpen(io::Error),
262 
263     /// Error creating serial pty
264     SerialPtyOpen(io::Error),
265 
266     /// Error creating console pty
267     ConsolePtyOpen(io::Error),
268 
269     /// Error creating console pty
270     DebugconPtyOpen(io::Error),
271 
272     /// Error setting pty raw mode
273     SetPtyRaw(vmm_sys_util::errno::Error),
274 
275     /// Error getting pty peer
276     GetPtyPeer(vmm_sys_util::errno::Error),
277 
278     /// Cannot create a VFIO device
279     VfioCreate(vfio_ioctls::VfioError),
280 
281     /// Cannot create a VFIO PCI device
282     VfioPciCreate(pci::VfioPciError),
283 
284     /// Failed to map VFIO MMIO region.
285     VfioMapRegion(pci::VfioPciError),
286 
287     /// Failed to DMA map VFIO device.
288     VfioDmaMap(vfio_ioctls::VfioError),
289 
290     /// Failed to DMA unmap VFIO device.
291     VfioDmaUnmap(pci::VfioPciError),
292 
293     /// Failed to create the passthrough device.
294     CreatePassthroughDevice(anyhow::Error),
295 
296     /// Failed to memory map.
297     Mmap(io::Error),
298 
299     /// Cannot add legacy device to Bus.
300     BusError(vm_device::BusError),
301 
302     /// Failed to allocate IO port
303     AllocateIoPort,
304 
305     /// Failed to allocate MMIO address
306     AllocateMmioAddress,
307 
308     /// Failed to make hotplug notification
309     HotPlugNotification(io::Error),
310 
311     /// Error from a memory manager operation
312     MemoryManager(MemoryManagerError),
313 
314     /// Failed to create new interrupt source group.
315     CreateInterruptGroup(io::Error),
316 
317     /// Failed to update interrupt source group.
318     UpdateInterruptGroup(io::Error),
319 
320     /// Failed to create interrupt controller.
321     CreateInterruptController(interrupt_controller::Error),
322 
323     /// Failed to create a new MmapRegion instance.
324     NewMmapRegion(vm_memory::mmap::MmapRegionError),
325 
326     /// Failed to clone a File.
327     CloneFile(io::Error),
328 
329     /// Failed to create socket file
330     CreateSocketFile(io::Error),
331 
332     /// Failed to spawn the network backend
333     SpawnNetBackend(io::Error),
334 
335     /// Failed to spawn the block backend
336     SpawnBlockBackend(io::Error),
337 
338     /// Missing PCI bus.
339     NoPciBus,
340 
341     /// Could not find an available device name.
342     NoAvailableDeviceName,
343 
344     /// Missing PCI device.
345     MissingPciDevice,
346 
347     /// Failed to remove a PCI device from the PCI bus.
348     RemoveDeviceFromPciBus(pci::PciRootError),
349 
350     /// Failed to remove a bus device from the IO bus.
351     RemoveDeviceFromIoBus(vm_device::BusError),
352 
353     /// Failed to remove a bus device from the MMIO bus.
354     RemoveDeviceFromMmioBus(vm_device::BusError),
355 
356     /// Failed to find the device corresponding to a specific PCI b/d/f.
357     UnknownPciBdf(u32),
358 
359     /// Not allowed to remove this type of device from the VM.
360     RemovalNotAllowed(vm_virtio::VirtioDeviceType),
361 
362     /// Failed to find device corresponding to the given identifier.
363     UnknownDeviceId(String),
364 
365     /// Failed to find an available PCI device ID.
366     NextPciDeviceId(pci::PciRootError),
367 
368     /// Could not reserve the PCI device ID.
369     GetPciDeviceId(pci::PciRootError),
370 
371     /// Could not give the PCI device ID back.
372     PutPciDeviceId(pci::PciRootError),
373 
374     /// No disk path was specified when one was expected
375     NoDiskPath,
376 
377     /// Failed to update guest memory for virtio device.
378     UpdateMemoryForVirtioDevice(virtio_devices::Error),
379 
380     /// Cannot create virtio-mem device
381     CreateVirtioMem(io::Error),
382 
383     /// Cannot find a memory range for virtio-mem memory
384     VirtioMemRangeAllocation,
385 
386     /// Failed to update guest memory for VFIO PCI device.
387     UpdateMemoryForVfioPciDevice(vfio_ioctls::VfioError),
388 
389     /// Trying to use a directory for pmem but no size specified
390     PmemWithDirectorySizeMissing,
391 
392     /// Trying to use a size that is not multiple of 2MiB
393     PmemSizeNotAligned,
394 
395     /// Could not find the node in the device tree.
396     MissingNode,
397 
398     /// Resource was already found.
399     ResourceAlreadyExists,
400 
401     /// Expected resources for virtio-pmem could not be found.
402     MissingVirtioPmemResources,
403 
404     /// Missing PCI b/d/f from the DeviceNode.
405     MissingDeviceNodePciBdf,
406 
407     /// No support for device passthrough
408     NoDevicePassthroughSupport,
409 
410     /// No socket option support for console device
411     NoSocketOptionSupportForConsoleDevice,
412 
413     /// Failed to resize virtio-balloon
414     VirtioBalloonResize(virtio_devices::balloon::Error),
415 
416     /// Missing virtio-balloon, can't proceed as expected.
417     MissingVirtioBalloon,
418 
419     /// Missing virtual IOMMU device
420     MissingVirtualIommu,
421 
422     /// Failed to do power button notification
423     PowerButtonNotification(io::Error),
424 
425     /// Failed to do AArch64 GPIO power button notification
426     #[cfg(target_arch = "aarch64")]
427     AArch64PowerButtonNotification(devices::legacy::GpioDeviceError),
428 
429     /// Failed to set O_DIRECT flag to file descriptor
430     SetDirectIo,
431 
432     /// Failed to create FixedVhdDiskAsync
433     CreateFixedVhdDiskAsync(io::Error),
434 
435     /// Failed to create FixedVhdDiskSync
436     CreateFixedVhdDiskSync(io::Error),
437 
438     /// Failed to create QcowDiskSync
439     CreateQcowDiskSync(qcow::Error),
440 
441     /// Failed to create FixedVhdxDiskSync
442     CreateFixedVhdxDiskSync(vhdx::VhdxError),
443 
444     /// Failed to add DMA mapping handler to virtio-mem device.
445     AddDmaMappingHandlerVirtioMem(virtio_devices::mem::Error),
446 
447     /// Failed to remove DMA mapping handler from virtio-mem device.
448     RemoveDmaMappingHandlerVirtioMem(virtio_devices::mem::Error),
449 
450     /// Failed to create vfio-user client
451     VfioUserCreateClient(vfio_user::Error),
452 
453     /// Failed to create VFIO user device
454     VfioUserCreate(VfioUserPciDeviceError),
455 
456     /// Failed to map region from VFIO user device into guest
457     VfioUserMapRegion(VfioUserPciDeviceError),
458 
459     /// Failed to DMA map VFIO user device.
460     VfioUserDmaMap(VfioUserPciDeviceError),
461 
462     /// Failed to DMA unmap VFIO user device.
463     VfioUserDmaUnmap(VfioUserPciDeviceError),
464 
465     /// Failed to update memory mappings for VFIO user device
466     UpdateMemoryForVfioUserPciDevice(VfioUserPciDeviceError),
467 
468     /// Cannot duplicate file descriptor
469     DupFd(vmm_sys_util::errno::Error),
470 
471     /// Failed to DMA map virtio device.
472     VirtioDmaMap(std::io::Error),
473 
474     /// Failed to DMA unmap virtio device.
475     VirtioDmaUnmap(std::io::Error),
476 
477     /// Cannot hotplug device behind vIOMMU
478     InvalidIommuHotplug,
479 
480     /// Invalid identifier as it is not unique.
481     IdentifierNotUnique(String),
482 
483     /// Invalid identifier
484     InvalidIdentifier(String),
485 
486     /// Error activating virtio device
487     VirtioActivate(ActivateError),
488 
489     /// Failed retrieving device state from snapshot
490     RestoreGetState(MigratableError),
491 
492     /// Cannot create a PvPanic device
493     PvPanicCreate(devices::pvpanic::PvPanicError),
494 
495     /// Cannot create a RateLimiterGroup
496     RateLimiterGroupCreate(rate_limiter::group::Error),
497 }
498 
499 pub type DeviceManagerResult<T> = result::Result<T, DeviceManagerError>;
500 
501 const DEVICE_MANAGER_ACPI_SIZE: usize = 0x10;
502 
503 const TIOCSPTLCK: libc::c_int = 0x4004_5431;
504 const TIOCGTPEER: libc::c_int = 0x5441;
505 
506 pub fn create_pty() -> io::Result<(File, File, PathBuf)> {
507     // Try to use /dev/pts/ptmx first then fall back to /dev/ptmx
508     // This is done to try and use the devpts filesystem that
509     // could be available for use in the process's namespace first.
510     // Ideally these are all the same file though but different
511     // kernels could have things setup differently.
512     // See https://www.kernel.org/doc/Documentation/filesystems/devpts.txt
513     // for further details.
514 
515     let custom_flags = libc::O_NONBLOCK;
516     let main = match OpenOptions::new()
517         .read(true)
518         .write(true)
519         .custom_flags(custom_flags)
520         .open("/dev/pts/ptmx")
521     {
522         Ok(f) => f,
523         _ => OpenOptions::new()
524             .read(true)
525             .write(true)
526             .custom_flags(custom_flags)
527             .open("/dev/ptmx")?,
528     };
529     let mut unlock: libc::c_ulong = 0;
530     // SAFETY: FFI call into libc, trivially safe
531     unsafe { libc::ioctl(main.as_raw_fd(), TIOCSPTLCK as _, &mut unlock) };
532 
533     // SAFETY: FFI call into libc, trivially safe
534     let sub_fd = unsafe {
535         libc::ioctl(
536             main.as_raw_fd(),
537             TIOCGTPEER as _,
538             libc::O_NOCTTY | libc::O_RDWR,
539         )
540     };
541     if sub_fd == -1 {
542         return vmm_sys_util::errno::errno_result().map_err(|e| e.into());
543     }
544 
545     let proc_path = PathBuf::from(format!("/proc/self/fd/{sub_fd}"));
546     let path = read_link(proc_path)?;
547 
548     // SAFETY: sub_fd is checked to be valid before being wrapped in File
549     Ok((main, unsafe { File::from_raw_fd(sub_fd) }, path))
550 }
551 
552 #[derive(Default)]
553 pub struct Console {
554     console_resizer: Option<Arc<virtio_devices::ConsoleResizer>>,
555 }
556 
557 impl Console {
558     pub fn need_resize(&self) -> bool {
559         if let Some(_resizer) = self.console_resizer.as_ref() {
560             return true;
561         }
562 
563         false
564     }
565 
566     pub fn update_console_size(&self) {
567         if let Some(resizer) = self.console_resizer.as_ref() {
568             resizer.update_console_size()
569         }
570     }
571 }
572 
573 pub(crate) struct AddressManager {
574     pub(crate) allocator: Arc<Mutex<SystemAllocator>>,
575     #[cfg(target_arch = "x86_64")]
576     pub(crate) io_bus: Arc<Bus>,
577     pub(crate) mmio_bus: Arc<Bus>,
578     pub(crate) vm: Arc<dyn hypervisor::Vm>,
579     device_tree: Arc<Mutex<DeviceTree>>,
580     pci_mmio32_allocators: Vec<Arc<Mutex<AddressAllocator>>>,
581     pci_mmio64_allocators: Vec<Arc<Mutex<AddressAllocator>>>,
582 }
583 
584 impl DeviceRelocation for AddressManager {
585     fn move_bar(
586         &self,
587         old_base: u64,
588         new_base: u64,
589         len: u64,
590         pci_dev: &mut dyn PciDevice,
591         region_type: PciBarRegionType,
592     ) -> std::result::Result<(), std::io::Error> {
593         match region_type {
594             PciBarRegionType::IoRegion => {
595                 #[cfg(target_arch = "x86_64")]
596                 {
597                     // Update system allocator
598                     self.allocator
599                         .lock()
600                         .unwrap()
601                         .free_io_addresses(GuestAddress(old_base), len as GuestUsize);
602 
603                     self.allocator
604                         .lock()
605                         .unwrap()
606                         .allocate_io_addresses(
607                             Some(GuestAddress(new_base)),
608                             len as GuestUsize,
609                             None,
610                         )
611                         .ok_or_else(|| {
612                             io::Error::new(io::ErrorKind::Other, "failed allocating new IO range")
613                         })?;
614 
615                     // Update PIO bus
616                     self.io_bus
617                         .update_range(old_base, len, new_base, len)
618                         .map_err(|e| io::Error::new(io::ErrorKind::Other, e))?;
619                 }
620                 #[cfg(target_arch = "aarch64")]
621                 error!("I/O region is not supported");
622             }
623             PciBarRegionType::Memory32BitRegion | PciBarRegionType::Memory64BitRegion => {
624                 let allocators = if region_type == PciBarRegionType::Memory32BitRegion {
625                     &self.pci_mmio32_allocators
626                 } else {
627                     &self.pci_mmio64_allocators
628                 };
629 
630                 // Find the specific allocator that this BAR was allocated from and use it for new one
631                 for allocator in allocators {
632                     let allocator_base = allocator.lock().unwrap().base();
633                     let allocator_end = allocator.lock().unwrap().end();
634 
635                     if old_base >= allocator_base.0 && old_base <= allocator_end.0 {
636                         allocator
637                             .lock()
638                             .unwrap()
639                             .free(GuestAddress(old_base), len as GuestUsize);
640 
641                         allocator
642                             .lock()
643                             .unwrap()
644                             .allocate(Some(GuestAddress(new_base)), len as GuestUsize, Some(len))
645                             .ok_or_else(|| {
646                                 io::Error::new(
647                                     io::ErrorKind::Other,
648                                     "failed allocating new MMIO range",
649                                 )
650                             })?;
651 
652                         break;
653                     }
654                 }
655 
656                 // Update MMIO bus
657                 self.mmio_bus
658                     .update_range(old_base, len, new_base, len)
659                     .map_err(|e| io::Error::new(io::ErrorKind::Other, e))?;
660             }
661         }
662 
663         // Update the device_tree resources associated with the device
664         if let Some(id) = pci_dev.id() {
665             if let Some(node) = self.device_tree.lock().unwrap().get_mut(&id) {
666                 let mut resource_updated = false;
667                 for resource in node.resources.iter_mut() {
668                     if let Resource::PciBar { base, type_, .. } = resource {
669                         if PciBarRegionType::from(*type_) == region_type && *base == old_base {
670                             *base = new_base;
671                             resource_updated = true;
672                             break;
673                         }
674                     }
675                 }
676 
677                 if !resource_updated {
678                     return Err(io::Error::new(
679                         io::ErrorKind::Other,
680                         format!(
681                             "Couldn't find a resource with base 0x{old_base:x} for device {id}"
682                         ),
683                     ));
684                 }
685             } else {
686                 return Err(io::Error::new(
687                     io::ErrorKind::Other,
688                     format!("Couldn't find device {id} from device tree"),
689                 ));
690             }
691         }
692 
693         let any_dev = pci_dev.as_any();
694         if let Some(virtio_pci_dev) = any_dev.downcast_ref::<VirtioPciDevice>() {
695             let bar_addr = virtio_pci_dev.config_bar_addr();
696             if bar_addr == new_base {
697                 for (event, addr) in virtio_pci_dev.ioeventfds(old_base) {
698                     let io_addr = IoEventAddress::Mmio(addr);
699                     self.vm.unregister_ioevent(event, &io_addr).map_err(|e| {
700                         io::Error::new(
701                             io::ErrorKind::Other,
702                             format!("failed to unregister ioevent: {e:?}"),
703                         )
704                     })?;
705                 }
706                 for (event, addr) in virtio_pci_dev.ioeventfds(new_base) {
707                     let io_addr = IoEventAddress::Mmio(addr);
708                     self.vm
709                         .register_ioevent(event, &io_addr, None)
710                         .map_err(|e| {
711                             io::Error::new(
712                                 io::ErrorKind::Other,
713                                 format!("failed to register ioevent: {e:?}"),
714                             )
715                         })?;
716                 }
717             } else {
718                 let virtio_dev = virtio_pci_dev.virtio_device();
719                 let mut virtio_dev = virtio_dev.lock().unwrap();
720                 if let Some(mut shm_regions) = virtio_dev.get_shm_regions() {
721                     if shm_regions.addr.raw_value() == old_base {
722                         let mem_region = self.vm.make_user_memory_region(
723                             shm_regions.mem_slot,
724                             old_base,
725                             shm_regions.len,
726                             shm_regions.host_addr,
727                             false,
728                             false,
729                         );
730 
731                         self.vm.remove_user_memory_region(mem_region).map_err(|e| {
732                             io::Error::new(
733                                 io::ErrorKind::Other,
734                                 format!("failed to remove user memory region: {e:?}"),
735                             )
736                         })?;
737 
738                         // Create new mapping by inserting new region to KVM.
739                         let mem_region = self.vm.make_user_memory_region(
740                             shm_regions.mem_slot,
741                             new_base,
742                             shm_regions.len,
743                             shm_regions.host_addr,
744                             false,
745                             false,
746                         );
747 
748                         self.vm.create_user_memory_region(mem_region).map_err(|e| {
749                             io::Error::new(
750                                 io::ErrorKind::Other,
751                                 format!("failed to create user memory regions: {e:?}"),
752                             )
753                         })?;
754 
755                         // Update shared memory regions to reflect the new mapping.
756                         shm_regions.addr = GuestAddress(new_base);
757                         virtio_dev.set_shm_regions(shm_regions).map_err(|e| {
758                             io::Error::new(
759                                 io::ErrorKind::Other,
760                                 format!("failed to update shared memory regions: {e:?}"),
761                             )
762                         })?;
763                     }
764                 }
765             }
766         }
767 
768         pci_dev.move_bar(old_base, new_base)
769     }
770 }
771 
772 #[derive(Serialize, Deserialize)]
773 struct DeviceManagerState {
774     device_tree: DeviceTree,
775     device_id_cnt: Wrapping<usize>,
776 }
777 
778 #[derive(Debug)]
779 pub struct PtyPair {
780     pub main: File,
781     pub path: PathBuf,
782 }
783 
784 impl Clone for PtyPair {
785     fn clone(&self) -> Self {
786         PtyPair {
787             main: self.main.try_clone().unwrap(),
788             path: self.path.clone(),
789         }
790     }
791 }
792 
793 #[derive(Clone)]
794 pub enum PciDeviceHandle {
795     Vfio(Arc<Mutex<VfioPciDevice>>),
796     Virtio(Arc<Mutex<VirtioPciDevice>>),
797     VfioUser(Arc<Mutex<VfioUserPciDevice>>),
798 }
799 
800 #[derive(Clone)]
801 struct MetaVirtioDevice {
802     virtio_device: Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
803     iommu: bool,
804     id: String,
805     pci_segment: u16,
806     dma_handler: Option<Arc<dyn ExternalDmaMapping>>,
807 }
808 
809 #[derive(Default)]
810 pub struct AcpiPlatformAddresses {
811     pub pm_timer_address: Option<GenericAddress>,
812     pub reset_reg_address: Option<GenericAddress>,
813     pub sleep_control_reg_address: Option<GenericAddress>,
814     pub sleep_status_reg_address: Option<GenericAddress>,
815 }
816 
817 pub struct DeviceManager {
818     // The underlying hypervisor
819     hypervisor_type: HypervisorType,
820 
821     // Manage address space related to devices
822     address_manager: Arc<AddressManager>,
823 
824     // Console abstraction
825     console: Arc<Console>,
826 
827     // console PTY
828     console_pty: Option<Arc<Mutex<PtyPair>>>,
829 
830     // serial PTY
831     serial_pty: Option<Arc<Mutex<PtyPair>>>,
832 
833     // debug-console PTY
834     debug_console_pty: Option<Arc<Mutex<PtyPair>>>,
835 
836     // Serial Manager
837     serial_manager: Option<Arc<SerialManager>>,
838 
839     // pty foreground status,
840     console_resize_pipe: Option<Arc<File>>,
841 
842     // To restore on exit.
843     original_termios_opt: Arc<Mutex<Option<termios>>>,
844 
845     // Interrupt controller
846     #[cfg(target_arch = "x86_64")]
847     interrupt_controller: Option<Arc<Mutex<ioapic::Ioapic>>>,
848     #[cfg(target_arch = "aarch64")]
849     interrupt_controller: Option<Arc<Mutex<gic::Gic>>>,
850 
851     // Things to be added to the commandline (e.g. aarch64 early console)
852     #[cfg(target_arch = "aarch64")]
853     cmdline_additions: Vec<String>,
854 
855     // ACPI GED notification device
856     ged_notification_device: Option<Arc<Mutex<devices::AcpiGedDevice>>>,
857 
858     // VM configuration
859     config: Arc<Mutex<VmConfig>>,
860 
861     // Memory Manager
862     memory_manager: Arc<Mutex<MemoryManager>>,
863 
864     // CPU Manager
865     cpu_manager: Arc<Mutex<CpuManager>>,
866 
867     // The virtio devices on the system
868     virtio_devices: Vec<MetaVirtioDevice>,
869 
870     // List of bus devices
871     // Let the DeviceManager keep strong references to the BusDevice devices.
872     // This allows the IO and MMIO buses to be provided with Weak references,
873     // which prevents cyclic dependencies.
874     bus_devices: Vec<Arc<Mutex<dyn BusDevice>>>,
875 
876     // Counter to keep track of the consumed device IDs.
877     device_id_cnt: Wrapping<usize>,
878 
879     pci_segments: Vec<PciSegment>,
880 
881     #[cfg_attr(target_arch = "aarch64", allow(dead_code))]
882     // MSI Interrupt Manager
883     msi_interrupt_manager: Arc<dyn InterruptManager<GroupConfig = MsiIrqGroupConfig>>,
884 
885     #[cfg_attr(feature = "mshv", allow(dead_code))]
886     // Legacy Interrupt Manager
887     legacy_interrupt_manager: Option<Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>>,
888 
889     // Passthrough device handle
890     passthrough_device: Option<VfioDeviceFd>,
891 
892     // VFIO container
893     // Only one container can be created, therefore it is stored as part of the
894     // DeviceManager to be reused.
895     vfio_container: Option<Arc<VfioContainer>>,
896 
897     // Paravirtualized IOMMU
898     iommu_device: Option<Arc<Mutex<virtio_devices::Iommu>>>,
899     iommu_mapping: Option<Arc<IommuMapping>>,
900 
901     // PCI information about devices attached to the paravirtualized IOMMU
902     // It contains the virtual IOMMU PCI BDF along with the list of PCI BDF
903     // representing the devices attached to the virtual IOMMU. This is useful
904     // information for filling the ACPI VIOT table.
905     iommu_attached_devices: Option<(PciBdf, Vec<PciBdf>)>,
906 
907     // Tree of devices, representing the dependencies between devices.
908     // Useful for introspection, snapshot and restore.
909     device_tree: Arc<Mutex<DeviceTree>>,
910 
911     // Exit event
912     exit_evt: EventFd,
913     reset_evt: EventFd,
914 
915     #[cfg(target_arch = "aarch64")]
916     id_to_dev_info: HashMap<(DeviceType, String), MmioDeviceInfo>,
917 
918     // seccomp action
919     seccomp_action: SeccompAction,
920 
921     // List of guest NUMA nodes.
922     numa_nodes: NumaNodes,
923 
924     // Possible handle to the virtio-balloon device
925     balloon: Option<Arc<Mutex<virtio_devices::Balloon>>>,
926 
927     // Virtio Device activation EventFd to allow the VMM thread to trigger device
928     // activation and thus start the threads from the VMM thread
929     activate_evt: EventFd,
930 
931     acpi_address: GuestAddress,
932 
933     selected_segment: usize,
934 
935     // Possible handle to the virtio-mem device
936     virtio_mem_devices: Vec<Arc<Mutex<virtio_devices::Mem>>>,
937 
938     #[cfg(target_arch = "aarch64")]
939     // GPIO device for AArch64
940     gpio_device: Option<Arc<Mutex<devices::legacy::Gpio>>>,
941 
942     // pvpanic device
943     pvpanic_device: Option<Arc<Mutex<devices::PvPanicDevice>>>,
944 
945     // Flag to force setting the iommu on virtio devices
946     force_iommu: bool,
947 
948     // io_uring availability if detected
949     io_uring_supported: Option<bool>,
950 
951     // aio availability if detected
952     aio_supported: Option<bool>,
953 
954     // List of unique identifiers provided at boot through the configuration.
955     boot_id_list: BTreeSet<String>,
956 
957     // Start time of the VM
958     timestamp: Instant,
959 
960     // Pending activations
961     pending_activations: Arc<Mutex<Vec<VirtioPciDeviceActivator>>>,
962 
963     // Addresses for ACPI platform devices e.g. ACPI PM timer, sleep/reset registers
964     acpi_platform_addresses: AcpiPlatformAddresses,
965 
966     snapshot: Option<Snapshot>,
967 
968     rate_limit_groups: HashMap<String, Arc<RateLimiterGroup>>,
969 
970     mmio_regions: Arc<Mutex<Vec<MmioRegion>>>,
971 }
972 
973 fn create_mmio_allocators(
974     start: u64,
975     end: u64,
976     num_pci_segments: u16,
977     weights: Vec<u32>,
978     alignment: u64,
979 ) -> Vec<Arc<Mutex<AddressAllocator>>> {
980     let total_weight: u32 = weights.iter().sum();
981 
982     // Start each PCI segment mmio range on an aligned boundary
983     let pci_segment_mmio_size = (end - start + 1) / (alignment * total_weight as u64) * alignment;
984 
985     let mut mmio_allocators = vec![];
986     let mut i = 0;
987     for segment_id in 0..num_pci_segments as u64 {
988         let weight = weights[segment_id as usize] as u64;
989         let mmio_start = start + i * pci_segment_mmio_size;
990         let mmio_size = pci_segment_mmio_size * weight;
991         let allocator = Arc::new(Mutex::new(
992             AddressAllocator::new(GuestAddress(mmio_start), mmio_size).unwrap(),
993         ));
994         mmio_allocators.push(allocator);
995         i += weight;
996     }
997 
998     mmio_allocators
999 }
1000 
1001 impl DeviceManager {
1002     #[allow(clippy::too_many_arguments)]
1003     pub fn new(
1004         #[cfg(target_arch = "x86_64")] io_bus: Arc<Bus>,
1005         mmio_bus: Arc<Bus>,
1006         hypervisor_type: HypervisorType,
1007         vm: Arc<dyn hypervisor::Vm>,
1008         config: Arc<Mutex<VmConfig>>,
1009         memory_manager: Arc<Mutex<MemoryManager>>,
1010         cpu_manager: Arc<Mutex<CpuManager>>,
1011         exit_evt: EventFd,
1012         reset_evt: EventFd,
1013         seccomp_action: SeccompAction,
1014         numa_nodes: NumaNodes,
1015         activate_evt: &EventFd,
1016         force_iommu: bool,
1017         boot_id_list: BTreeSet<String>,
1018         timestamp: Instant,
1019         snapshot: Option<Snapshot>,
1020         dynamic: bool,
1021     ) -> DeviceManagerResult<Arc<Mutex<Self>>> {
1022         trace_scoped!("DeviceManager::new");
1023 
1024         let (device_tree, device_id_cnt) = if let Some(snapshot) = snapshot.as_ref() {
1025             let state: DeviceManagerState = snapshot.to_state().unwrap();
1026             (
1027                 Arc::new(Mutex::new(state.device_tree.clone())),
1028                 state.device_id_cnt,
1029             )
1030         } else {
1031             (Arc::new(Mutex::new(DeviceTree::new())), Wrapping(0))
1032         };
1033 
1034         let num_pci_segments =
1035             if let Some(platform_config) = config.lock().unwrap().platform.as_ref() {
1036                 platform_config.num_pci_segments
1037             } else {
1038                 1
1039             };
1040 
1041         let mut mmio32_aperture_weights: Vec<u32> =
1042             std::iter::repeat(DEFAULT_PCI_SEGMENT_APERTURE_WEIGHT)
1043                 .take(num_pci_segments.into())
1044                 .collect();
1045         if let Some(pci_segments) = &config.lock().unwrap().pci_segments {
1046             for pci_segment in pci_segments.iter() {
1047                 mmio32_aperture_weights[pci_segment.pci_segment as usize] =
1048                     pci_segment.mmio32_aperture_weight
1049             }
1050         }
1051 
1052         let start_of_mmio32_area = layout::MEM_32BIT_DEVICES_START.0;
1053         let end_of_mmio32_area = layout::MEM_32BIT_DEVICES_START.0 + layout::MEM_32BIT_DEVICES_SIZE;
1054         let pci_mmio32_allocators = create_mmio_allocators(
1055             start_of_mmio32_area,
1056             end_of_mmio32_area,
1057             num_pci_segments,
1058             mmio32_aperture_weights,
1059             4 << 10,
1060         );
1061 
1062         let mut mmio64_aperture_weights: Vec<u32> =
1063             std::iter::repeat(DEFAULT_PCI_SEGMENT_APERTURE_WEIGHT)
1064                 .take(num_pci_segments.into())
1065                 .collect();
1066         if let Some(pci_segments) = &config.lock().unwrap().pci_segments {
1067             for pci_segment in pci_segments.iter() {
1068                 mmio64_aperture_weights[pci_segment.pci_segment as usize] =
1069                     pci_segment.mmio64_aperture_weight
1070             }
1071         }
1072 
1073         let start_of_mmio64_area = memory_manager.lock().unwrap().start_of_device_area().0;
1074         let end_of_mmio64_area = memory_manager.lock().unwrap().end_of_device_area().0;
1075         let pci_mmio64_allocators = create_mmio_allocators(
1076             start_of_mmio64_area,
1077             end_of_mmio64_area,
1078             num_pci_segments,
1079             mmio64_aperture_weights,
1080             4 << 30,
1081         );
1082 
1083         let address_manager = Arc::new(AddressManager {
1084             allocator: memory_manager.lock().unwrap().allocator(),
1085             #[cfg(target_arch = "x86_64")]
1086             io_bus,
1087             mmio_bus,
1088             vm: vm.clone(),
1089             device_tree: Arc::clone(&device_tree),
1090             pci_mmio32_allocators,
1091             pci_mmio64_allocators,
1092         });
1093 
1094         // First we create the MSI interrupt manager, the legacy one is created
1095         // later, after the IOAPIC device creation.
1096         // The reason we create the MSI one first is because the IOAPIC needs it,
1097         // and then the legacy interrupt manager needs an IOAPIC. So we're
1098         // handling a linear dependency chain:
1099         // msi_interrupt_manager <- IOAPIC <- legacy_interrupt_manager.
1100         let msi_interrupt_manager: Arc<dyn InterruptManager<GroupConfig = MsiIrqGroupConfig>> =
1101             Arc::new(MsiInterruptManager::new(
1102                 Arc::clone(&address_manager.allocator),
1103                 vm,
1104             ));
1105 
1106         let acpi_address = address_manager
1107             .allocator
1108             .lock()
1109             .unwrap()
1110             .allocate_platform_mmio_addresses(None, DEVICE_MANAGER_ACPI_SIZE as u64, None)
1111             .ok_or(DeviceManagerError::AllocateIoPort)?;
1112 
1113         let mut pci_irq_slots = [0; 32];
1114         PciSegment::reserve_legacy_interrupts_for_pci_devices(
1115             &address_manager,
1116             &mut pci_irq_slots,
1117         )?;
1118 
1119         let mut pci_segments = vec![PciSegment::new_default_segment(
1120             &address_manager,
1121             Arc::clone(&address_manager.pci_mmio32_allocators[0]),
1122             Arc::clone(&address_manager.pci_mmio64_allocators[0]),
1123             &pci_irq_slots,
1124         )?];
1125 
1126         for i in 1..num_pci_segments as usize {
1127             pci_segments.push(PciSegment::new(
1128                 i as u16,
1129                 numa_node_id_from_pci_segment_id(&numa_nodes, i as u16),
1130                 &address_manager,
1131                 Arc::clone(&address_manager.pci_mmio32_allocators[i]),
1132                 Arc::clone(&address_manager.pci_mmio64_allocators[i]),
1133                 &pci_irq_slots,
1134             )?);
1135         }
1136 
1137         if dynamic {
1138             let acpi_address = address_manager
1139                 .allocator
1140                 .lock()
1141                 .unwrap()
1142                 .allocate_platform_mmio_addresses(None, CPU_MANAGER_ACPI_SIZE as u64, None)
1143                 .ok_or(DeviceManagerError::AllocateMmioAddress)?;
1144 
1145             address_manager
1146                 .mmio_bus
1147                 .insert(
1148                     cpu_manager.clone(),
1149                     acpi_address.0,
1150                     CPU_MANAGER_ACPI_SIZE as u64,
1151                 )
1152                 .map_err(DeviceManagerError::BusError)?;
1153 
1154             cpu_manager.lock().unwrap().set_acpi_address(acpi_address);
1155         }
1156 
1157         let mut rate_limit_groups = HashMap::<String, Arc<RateLimiterGroup>>::new();
1158         if let Some(rate_limit_groups_cfg) = config.lock().unwrap().rate_limit_groups.as_ref() {
1159             for rate_limit_group_cfg in rate_limit_groups_cfg {
1160                 let rate_limit_cfg = rate_limit_group_cfg.rate_limiter_config;
1161                 let bw = rate_limit_cfg.bandwidth.unwrap_or_default();
1162                 let ops = rate_limit_cfg.ops.unwrap_or_default();
1163                 let mut rate_limit_group = RateLimiterGroup::new(
1164                     &rate_limit_group_cfg.id,
1165                     bw.size,
1166                     bw.one_time_burst.unwrap_or(0),
1167                     bw.refill_time,
1168                     ops.size,
1169                     ops.one_time_burst.unwrap_or(0),
1170                     ops.refill_time,
1171                 )
1172                 .map_err(DeviceManagerError::RateLimiterGroupCreate)?;
1173 
1174                 let exit_evt = exit_evt.try_clone().map_err(DeviceManagerError::EventFd)?;
1175 
1176                 rate_limit_group.start_thread(exit_evt).unwrap();
1177                 rate_limit_groups
1178                     .insert(rate_limit_group_cfg.id.clone(), Arc::new(rate_limit_group));
1179             }
1180         }
1181 
1182         let device_manager = DeviceManager {
1183             hypervisor_type,
1184             address_manager: Arc::clone(&address_manager),
1185             console: Arc::new(Console::default()),
1186             interrupt_controller: None,
1187             #[cfg(target_arch = "aarch64")]
1188             cmdline_additions: Vec::new(),
1189             ged_notification_device: None,
1190             config,
1191             memory_manager,
1192             cpu_manager,
1193             virtio_devices: Vec::new(),
1194             bus_devices: Vec::new(),
1195             device_id_cnt,
1196             msi_interrupt_manager,
1197             legacy_interrupt_manager: None,
1198             passthrough_device: None,
1199             vfio_container: None,
1200             iommu_device: None,
1201             iommu_mapping: None,
1202             iommu_attached_devices: None,
1203             pci_segments,
1204             device_tree,
1205             exit_evt,
1206             reset_evt,
1207             #[cfg(target_arch = "aarch64")]
1208             id_to_dev_info: HashMap::new(),
1209             seccomp_action,
1210             numa_nodes,
1211             balloon: None,
1212             activate_evt: activate_evt
1213                 .try_clone()
1214                 .map_err(DeviceManagerError::EventFd)?,
1215             acpi_address,
1216             selected_segment: 0,
1217             serial_pty: None,
1218             serial_manager: None,
1219             console_pty: None,
1220             debug_console_pty: None,
1221             console_resize_pipe: None,
1222             original_termios_opt: Arc::new(Mutex::new(None)),
1223             virtio_mem_devices: Vec::new(),
1224             #[cfg(target_arch = "aarch64")]
1225             gpio_device: None,
1226             pvpanic_device: None,
1227             force_iommu,
1228             io_uring_supported: None,
1229             aio_supported: None,
1230             boot_id_list,
1231             timestamp,
1232             pending_activations: Arc::new(Mutex::new(Vec::default())),
1233             acpi_platform_addresses: AcpiPlatformAddresses::default(),
1234             snapshot,
1235             rate_limit_groups,
1236             mmio_regions: Arc::new(Mutex::new(Vec::new())),
1237         };
1238 
1239         let device_manager = Arc::new(Mutex::new(device_manager));
1240 
1241         address_manager
1242             .mmio_bus
1243             .insert(
1244                 Arc::clone(&device_manager) as Arc<Mutex<dyn BusDevice>>,
1245                 acpi_address.0,
1246                 DEVICE_MANAGER_ACPI_SIZE as u64,
1247             )
1248             .map_err(DeviceManagerError::BusError)?;
1249 
1250         Ok(device_manager)
1251     }
1252 
1253     pub fn serial_pty(&self) -> Option<PtyPair> {
1254         self.serial_pty
1255             .as_ref()
1256             .map(|pty| pty.lock().unwrap().clone())
1257     }
1258 
1259     pub fn console_pty(&self) -> Option<PtyPair> {
1260         self.console_pty
1261             .as_ref()
1262             .map(|pty| pty.lock().unwrap().clone())
1263     }
1264 
1265     pub fn debug_console_pty(&self) -> Option<PtyPair> {
1266         self.debug_console_pty
1267             .as_ref()
1268             .map(|pty| pty.lock().unwrap().clone())
1269     }
1270 
1271     pub fn console_resize_pipe(&self) -> Option<Arc<File>> {
1272         self.console_resize_pipe.clone()
1273     }
1274 
1275     pub fn create_devices(
1276         &mut self,
1277         serial_pty: Option<PtyPair>,
1278         console_pty: Option<PtyPair>,
1279         debug_console_pty: Option<PtyPair>,
1280         console_resize_pipe: Option<File>,
1281         original_termios_opt: Arc<Mutex<Option<termios>>>,
1282     ) -> DeviceManagerResult<()> {
1283         trace_scoped!("create_devices");
1284 
1285         let mut virtio_devices: Vec<MetaVirtioDevice> = Vec::new();
1286 
1287         let interrupt_controller = self.add_interrupt_controller()?;
1288 
1289         self.cpu_manager
1290             .lock()
1291             .unwrap()
1292             .set_interrupt_controller(interrupt_controller.clone());
1293 
1294         // Now we can create the legacy interrupt manager, which needs the freshly
1295         // formed IOAPIC device.
1296         let legacy_interrupt_manager: Arc<
1297             dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>,
1298         > = Arc::new(LegacyUserspaceInterruptManager::new(Arc::clone(
1299             &interrupt_controller,
1300         )));
1301 
1302         {
1303             if let Some(acpi_address) = self.memory_manager.lock().unwrap().acpi_address() {
1304                 self.address_manager
1305                     .mmio_bus
1306                     .insert(
1307                         Arc::clone(&self.memory_manager) as Arc<Mutex<dyn BusDevice>>,
1308                         acpi_address.0,
1309                         MEMORY_MANAGER_ACPI_SIZE as u64,
1310                     )
1311                     .map_err(DeviceManagerError::BusError)?;
1312             }
1313         }
1314 
1315         #[cfg(target_arch = "x86_64")]
1316         self.add_legacy_devices(
1317             self.reset_evt
1318                 .try_clone()
1319                 .map_err(DeviceManagerError::EventFd)?,
1320         )?;
1321 
1322         #[cfg(target_arch = "aarch64")]
1323         self.add_legacy_devices(&legacy_interrupt_manager)?;
1324 
1325         {
1326             self.ged_notification_device = self.add_acpi_devices(
1327                 &legacy_interrupt_manager,
1328                 self.reset_evt
1329                     .try_clone()
1330                     .map_err(DeviceManagerError::EventFd)?,
1331                 self.exit_evt
1332                     .try_clone()
1333                     .map_err(DeviceManagerError::EventFd)?,
1334             )?;
1335         }
1336 
1337         self.original_termios_opt = original_termios_opt;
1338 
1339         self.console = self.add_console_devices(
1340             &legacy_interrupt_manager,
1341             &mut virtio_devices,
1342             serial_pty,
1343             console_pty,
1344             debug_console_pty,
1345             console_resize_pipe,
1346         )?;
1347 
1348         if let Some(tpm) = self.config.clone().lock().unwrap().tpm.as_ref() {
1349             let tpm_dev = self.add_tpm_device(tpm.socket.clone())?;
1350             self.bus_devices
1351                 .push(Arc::clone(&tpm_dev) as Arc<Mutex<dyn BusDevice>>)
1352         }
1353         self.legacy_interrupt_manager = Some(legacy_interrupt_manager);
1354 
1355         virtio_devices.append(&mut self.make_virtio_devices()?);
1356 
1357         self.add_pci_devices(virtio_devices.clone())?;
1358 
1359         self.virtio_devices = virtio_devices;
1360 
1361         if self.config.clone().lock().unwrap().pvpanic {
1362             self.pvpanic_device = self.add_pvpanic_device()?;
1363         }
1364 
1365         Ok(())
1366     }
1367 
1368     fn state(&self) -> DeviceManagerState {
1369         DeviceManagerState {
1370             device_tree: self.device_tree.lock().unwrap().clone(),
1371             device_id_cnt: self.device_id_cnt,
1372         }
1373     }
1374 
1375     fn get_msi_iova_space(&mut self) -> (u64, u64) {
1376         #[cfg(target_arch = "aarch64")]
1377         {
1378             let vcpus = self.config.lock().unwrap().cpus.boot_vcpus;
1379             let vgic_config = gic::Gic::create_default_config(vcpus.into());
1380             (
1381                 vgic_config.msi_addr,
1382                 vgic_config.msi_addr + vgic_config.msi_size - 1,
1383             )
1384         }
1385         #[cfg(target_arch = "x86_64")]
1386         (0xfee0_0000, 0xfeef_ffff)
1387     }
1388 
1389     #[cfg(target_arch = "aarch64")]
1390     /// Gets the information of the devices registered up to some point in time.
1391     pub fn get_device_info(&self) -> &HashMap<(DeviceType, String), MmioDeviceInfo> {
1392         &self.id_to_dev_info
1393     }
1394 
1395     #[allow(unused_variables)]
1396     fn add_pci_devices(
1397         &mut self,
1398         virtio_devices: Vec<MetaVirtioDevice>,
1399     ) -> DeviceManagerResult<()> {
1400         let iommu_id = String::from(IOMMU_DEVICE_NAME);
1401 
1402         let iommu_device = if self.config.lock().unwrap().iommu {
1403             let (device, mapping) = virtio_devices::Iommu::new(
1404                 iommu_id.clone(),
1405                 self.seccomp_action.clone(),
1406                 self.exit_evt
1407                     .try_clone()
1408                     .map_err(DeviceManagerError::EventFd)?,
1409                 self.get_msi_iova_space(),
1410                 state_from_id(self.snapshot.as_ref(), iommu_id.as_str())
1411                     .map_err(DeviceManagerError::RestoreGetState)?,
1412             )
1413             .map_err(DeviceManagerError::CreateVirtioIommu)?;
1414             let device = Arc::new(Mutex::new(device));
1415             self.iommu_device = Some(Arc::clone(&device));
1416             self.iommu_mapping = Some(mapping);
1417 
1418             // Fill the device tree with a new node. In case of restore, we
1419             // know there is nothing to do, so we can simply override the
1420             // existing entry.
1421             self.device_tree
1422                 .lock()
1423                 .unwrap()
1424                 .insert(iommu_id.clone(), device_node!(iommu_id, device));
1425 
1426             Some(device)
1427         } else {
1428             None
1429         };
1430 
1431         let mut iommu_attached_devices = Vec::new();
1432         {
1433             for handle in virtio_devices {
1434                 let mapping: Option<Arc<IommuMapping>> = if handle.iommu {
1435                     self.iommu_mapping.clone()
1436                 } else {
1437                     None
1438                 };
1439 
1440                 let dev_id = self.add_virtio_pci_device(
1441                     handle.virtio_device,
1442                     &mapping,
1443                     handle.id,
1444                     handle.pci_segment,
1445                     handle.dma_handler,
1446                 )?;
1447 
1448                 if handle.iommu {
1449                     iommu_attached_devices.push(dev_id);
1450                 }
1451             }
1452 
1453             let mut vfio_iommu_device_ids = self.add_vfio_devices()?;
1454             iommu_attached_devices.append(&mut vfio_iommu_device_ids);
1455 
1456             let mut vfio_user_iommu_device_ids = self.add_user_devices()?;
1457             iommu_attached_devices.append(&mut vfio_user_iommu_device_ids);
1458 
1459             // Add all devices from forced iommu segments
1460             if let Some(platform_config) = self.config.lock().unwrap().platform.as_ref() {
1461                 if let Some(iommu_segments) = platform_config.iommu_segments.as_ref() {
1462                     for segment in iommu_segments {
1463                         for device in 0..32 {
1464                             let bdf = PciBdf::new(*segment, 0, device, 0);
1465                             if !iommu_attached_devices.contains(&bdf) {
1466                                 iommu_attached_devices.push(bdf);
1467                             }
1468                         }
1469                     }
1470                 }
1471             }
1472 
1473             if let Some(iommu_device) = iommu_device {
1474                 let dev_id = self.add_virtio_pci_device(iommu_device, &None, iommu_id, 0, None)?;
1475                 self.iommu_attached_devices = Some((dev_id, iommu_attached_devices));
1476             }
1477         }
1478 
1479         for segment in &self.pci_segments {
1480             #[cfg(target_arch = "x86_64")]
1481             if let Some(pci_config_io) = segment.pci_config_io.as_ref() {
1482                 self.bus_devices
1483                     .push(Arc::clone(pci_config_io) as Arc<Mutex<dyn BusDevice>>);
1484             }
1485 
1486             self.bus_devices
1487                 .push(Arc::clone(&segment.pci_config_mmio) as Arc<Mutex<dyn BusDevice>>);
1488         }
1489 
1490         Ok(())
1491     }
1492 
1493     #[cfg(target_arch = "aarch64")]
1494     fn add_interrupt_controller(
1495         &mut self,
1496     ) -> DeviceManagerResult<Arc<Mutex<dyn InterruptController>>> {
1497         let interrupt_controller: Arc<Mutex<gic::Gic>> = Arc::new(Mutex::new(
1498             gic::Gic::new(
1499                 self.config.lock().unwrap().cpus.boot_vcpus,
1500                 Arc::clone(&self.msi_interrupt_manager),
1501                 self.address_manager.vm.clone(),
1502             )
1503             .map_err(DeviceManagerError::CreateInterruptController)?,
1504         ));
1505 
1506         self.interrupt_controller = Some(interrupt_controller.clone());
1507 
1508         // Restore the vGic if this is in the process of restoration
1509         let id = String::from(gic::GIC_SNAPSHOT_ID);
1510         if let Some(vgic_snapshot) = snapshot_from_id(self.snapshot.as_ref(), &id) {
1511             // PMU support is optional. Nothing should be impacted if the PMU initialization failed.
1512             if self
1513                 .cpu_manager
1514                 .lock()
1515                 .unwrap()
1516                 .init_pmu(arch::aarch64::fdt::AARCH64_PMU_IRQ + 16)
1517                 .is_err()
1518             {
1519                 info!("Failed to initialize PMU");
1520             }
1521 
1522             let vgic_state = vgic_snapshot
1523                 .to_state()
1524                 .map_err(DeviceManagerError::RestoreGetState)?;
1525             let saved_vcpu_states = self.cpu_manager.lock().unwrap().get_saved_states();
1526             interrupt_controller
1527                 .lock()
1528                 .unwrap()
1529                 .restore_vgic(vgic_state, &saved_vcpu_states)
1530                 .unwrap();
1531         }
1532 
1533         self.device_tree
1534             .lock()
1535             .unwrap()
1536             .insert(id.clone(), device_node!(id, interrupt_controller));
1537 
1538         Ok(interrupt_controller)
1539     }
1540 
1541     #[cfg(target_arch = "aarch64")]
1542     pub fn get_interrupt_controller(&mut self) -> Option<&Arc<Mutex<gic::Gic>>> {
1543         self.interrupt_controller.as_ref()
1544     }
1545 
1546     #[cfg(target_arch = "x86_64")]
1547     fn add_interrupt_controller(
1548         &mut self,
1549     ) -> DeviceManagerResult<Arc<Mutex<dyn InterruptController>>> {
1550         let id = String::from(IOAPIC_DEVICE_NAME);
1551 
1552         // Create IOAPIC
1553         let interrupt_controller = Arc::new(Mutex::new(
1554             ioapic::Ioapic::new(
1555                 id.clone(),
1556                 APIC_START,
1557                 Arc::clone(&self.msi_interrupt_manager),
1558                 state_from_id(self.snapshot.as_ref(), id.as_str())
1559                     .map_err(DeviceManagerError::RestoreGetState)?,
1560             )
1561             .map_err(DeviceManagerError::CreateInterruptController)?,
1562         ));
1563 
1564         self.interrupt_controller = Some(interrupt_controller.clone());
1565 
1566         self.address_manager
1567             .mmio_bus
1568             .insert(interrupt_controller.clone(), IOAPIC_START.0, IOAPIC_SIZE)
1569             .map_err(DeviceManagerError::BusError)?;
1570 
1571         self.bus_devices
1572             .push(Arc::clone(&interrupt_controller) as Arc<Mutex<dyn BusDevice>>);
1573 
1574         // Fill the device tree with a new node. In case of restore, we
1575         // know there is nothing to do, so we can simply override the
1576         // existing entry.
1577         self.device_tree
1578             .lock()
1579             .unwrap()
1580             .insert(id.clone(), device_node!(id, interrupt_controller));
1581 
1582         Ok(interrupt_controller)
1583     }
1584 
1585     fn add_acpi_devices(
1586         &mut self,
1587         interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>,
1588         reset_evt: EventFd,
1589         exit_evt: EventFd,
1590     ) -> DeviceManagerResult<Option<Arc<Mutex<devices::AcpiGedDevice>>>> {
1591         let vcpus_kill_signalled = self
1592             .cpu_manager
1593             .lock()
1594             .unwrap()
1595             .vcpus_kill_signalled()
1596             .clone();
1597         let shutdown_device = Arc::new(Mutex::new(devices::AcpiShutdownDevice::new(
1598             exit_evt,
1599             reset_evt,
1600             vcpus_kill_signalled,
1601         )));
1602 
1603         self.bus_devices
1604             .push(Arc::clone(&shutdown_device) as Arc<Mutex<dyn BusDevice>>);
1605 
1606         #[cfg(target_arch = "x86_64")]
1607         {
1608             let shutdown_pio_address: u16 = 0x600;
1609 
1610             self.address_manager
1611                 .allocator
1612                 .lock()
1613                 .unwrap()
1614                 .allocate_io_addresses(Some(GuestAddress(shutdown_pio_address.into())), 0x8, None)
1615                 .ok_or(DeviceManagerError::AllocateIoPort)?;
1616 
1617             self.address_manager
1618                 .io_bus
1619                 .insert(shutdown_device, shutdown_pio_address.into(), 0x4)
1620                 .map_err(DeviceManagerError::BusError)?;
1621 
1622             self.acpi_platform_addresses.sleep_control_reg_address =
1623                 Some(GenericAddress::io_port_address::<u8>(shutdown_pio_address));
1624             self.acpi_platform_addresses.sleep_status_reg_address =
1625                 Some(GenericAddress::io_port_address::<u8>(shutdown_pio_address));
1626             self.acpi_platform_addresses.reset_reg_address =
1627                 Some(GenericAddress::io_port_address::<u8>(shutdown_pio_address));
1628         }
1629 
1630         let ged_irq = self
1631             .address_manager
1632             .allocator
1633             .lock()
1634             .unwrap()
1635             .allocate_irq()
1636             .unwrap();
1637         let interrupt_group = interrupt_manager
1638             .create_group(LegacyIrqGroupConfig {
1639                 irq: ged_irq as InterruptIndex,
1640             })
1641             .map_err(DeviceManagerError::CreateInterruptGroup)?;
1642         let ged_address = self
1643             .address_manager
1644             .allocator
1645             .lock()
1646             .unwrap()
1647             .allocate_platform_mmio_addresses(
1648                 None,
1649                 devices::acpi::GED_DEVICE_ACPI_SIZE as u64,
1650                 None,
1651             )
1652             .ok_or(DeviceManagerError::AllocateMmioAddress)?;
1653         let ged_device = Arc::new(Mutex::new(devices::AcpiGedDevice::new(
1654             interrupt_group,
1655             ged_irq,
1656             ged_address,
1657         )));
1658         self.address_manager
1659             .mmio_bus
1660             .insert(
1661                 ged_device.clone(),
1662                 ged_address.0,
1663                 devices::acpi::GED_DEVICE_ACPI_SIZE as u64,
1664             )
1665             .map_err(DeviceManagerError::BusError)?;
1666         self.bus_devices
1667             .push(Arc::clone(&ged_device) as Arc<Mutex<dyn BusDevice>>);
1668 
1669         let pm_timer_device = Arc::new(Mutex::new(devices::AcpiPmTimerDevice::new()));
1670 
1671         self.bus_devices
1672             .push(Arc::clone(&pm_timer_device) as Arc<Mutex<dyn BusDevice>>);
1673 
1674         #[cfg(target_arch = "x86_64")]
1675         {
1676             let pm_timer_pio_address: u16 = 0x608;
1677 
1678             self.address_manager
1679                 .allocator
1680                 .lock()
1681                 .unwrap()
1682                 .allocate_io_addresses(Some(GuestAddress(pm_timer_pio_address.into())), 0x4, None)
1683                 .ok_or(DeviceManagerError::AllocateIoPort)?;
1684 
1685             self.address_manager
1686                 .io_bus
1687                 .insert(pm_timer_device, pm_timer_pio_address.into(), 0x4)
1688                 .map_err(DeviceManagerError::BusError)?;
1689 
1690             self.acpi_platform_addresses.pm_timer_address =
1691                 Some(GenericAddress::io_port_address::<u32>(pm_timer_pio_address));
1692         }
1693 
1694         Ok(Some(ged_device))
1695     }
1696 
1697     #[cfg(target_arch = "x86_64")]
1698     fn add_legacy_devices(&mut self, reset_evt: EventFd) -> DeviceManagerResult<()> {
1699         let vcpus_kill_signalled = self
1700             .cpu_manager
1701             .lock()
1702             .unwrap()
1703             .vcpus_kill_signalled()
1704             .clone();
1705         // Add a shutdown device (i8042)
1706         let i8042 = Arc::new(Mutex::new(devices::legacy::I8042Device::new(
1707             reset_evt.try_clone().unwrap(),
1708             vcpus_kill_signalled.clone(),
1709         )));
1710 
1711         self.bus_devices
1712             .push(Arc::clone(&i8042) as Arc<Mutex<dyn BusDevice>>);
1713 
1714         self.address_manager
1715             .io_bus
1716             .insert(i8042, 0x61, 0x4)
1717             .map_err(DeviceManagerError::BusError)?;
1718         {
1719             // Add a CMOS emulated device
1720             let mem_size = self
1721                 .memory_manager
1722                 .lock()
1723                 .unwrap()
1724                 .guest_memory()
1725                 .memory()
1726                 .last_addr()
1727                 .0
1728                 + 1;
1729             let mem_below_4g = std::cmp::min(arch::layout::MEM_32BIT_RESERVED_START.0, mem_size);
1730             let mem_above_4g = mem_size.saturating_sub(arch::layout::RAM_64BIT_START.0);
1731 
1732             let cmos = Arc::new(Mutex::new(devices::legacy::Cmos::new(
1733                 mem_below_4g,
1734                 mem_above_4g,
1735                 reset_evt,
1736                 Some(vcpus_kill_signalled),
1737             )));
1738 
1739             self.bus_devices
1740                 .push(Arc::clone(&cmos) as Arc<Mutex<dyn BusDevice>>);
1741 
1742             self.address_manager
1743                 .io_bus
1744                 .insert(cmos, 0x70, 0x2)
1745                 .map_err(DeviceManagerError::BusError)?;
1746 
1747             let fwdebug = Arc::new(Mutex::new(devices::legacy::FwDebugDevice::new()));
1748 
1749             self.bus_devices
1750                 .push(Arc::clone(&fwdebug) as Arc<Mutex<dyn BusDevice>>);
1751 
1752             self.address_manager
1753                 .io_bus
1754                 .insert(fwdebug, 0x402, 0x1)
1755                 .map_err(DeviceManagerError::BusError)?;
1756         }
1757 
1758         // 0x80 debug port
1759         let debug_port = Arc::new(Mutex::new(devices::legacy::DebugPort::new(self.timestamp)));
1760         self.bus_devices
1761             .push(Arc::clone(&debug_port) as Arc<Mutex<dyn BusDevice>>);
1762         self.address_manager
1763             .io_bus
1764             .insert(debug_port, 0x80, 0x1)
1765             .map_err(DeviceManagerError::BusError)?;
1766 
1767         Ok(())
1768     }
1769 
1770     #[cfg(target_arch = "aarch64")]
1771     fn add_legacy_devices(
1772         &mut self,
1773         interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>,
1774     ) -> DeviceManagerResult<()> {
1775         // Add a RTC device
1776         let rtc_irq = self
1777             .address_manager
1778             .allocator
1779             .lock()
1780             .unwrap()
1781             .allocate_irq()
1782             .unwrap();
1783 
1784         let interrupt_group = interrupt_manager
1785             .create_group(LegacyIrqGroupConfig {
1786                 irq: rtc_irq as InterruptIndex,
1787             })
1788             .map_err(DeviceManagerError::CreateInterruptGroup)?;
1789 
1790         let rtc_device = Arc::new(Mutex::new(devices::legacy::Rtc::new(interrupt_group)));
1791 
1792         self.bus_devices
1793             .push(Arc::clone(&rtc_device) as Arc<Mutex<dyn BusDevice>>);
1794 
1795         let addr = arch::layout::LEGACY_RTC_MAPPED_IO_START;
1796 
1797         self.address_manager
1798             .mmio_bus
1799             .insert(rtc_device, addr.0, MMIO_LEN)
1800             .map_err(DeviceManagerError::BusError)?;
1801 
1802         self.id_to_dev_info.insert(
1803             (DeviceType::Rtc, "rtc".to_string()),
1804             MmioDeviceInfo {
1805                 addr: addr.0,
1806                 len: MMIO_LEN,
1807                 irq: rtc_irq,
1808             },
1809         );
1810 
1811         // Add a GPIO device
1812         let id = String::from(GPIO_DEVICE_NAME);
1813         let gpio_irq = self
1814             .address_manager
1815             .allocator
1816             .lock()
1817             .unwrap()
1818             .allocate_irq()
1819             .unwrap();
1820 
1821         let interrupt_group = interrupt_manager
1822             .create_group(LegacyIrqGroupConfig {
1823                 irq: gpio_irq as InterruptIndex,
1824             })
1825             .map_err(DeviceManagerError::CreateInterruptGroup)?;
1826 
1827         let gpio_device = Arc::new(Mutex::new(devices::legacy::Gpio::new(
1828             id.clone(),
1829             interrupt_group,
1830             state_from_id(self.snapshot.as_ref(), id.as_str())
1831                 .map_err(DeviceManagerError::RestoreGetState)?,
1832         )));
1833 
1834         self.bus_devices
1835             .push(Arc::clone(&gpio_device) as Arc<Mutex<dyn BusDevice>>);
1836 
1837         let addr = arch::layout::LEGACY_GPIO_MAPPED_IO_START;
1838 
1839         self.address_manager
1840             .mmio_bus
1841             .insert(gpio_device.clone(), addr.0, MMIO_LEN)
1842             .map_err(DeviceManagerError::BusError)?;
1843 
1844         self.gpio_device = Some(gpio_device.clone());
1845 
1846         self.id_to_dev_info.insert(
1847             (DeviceType::Gpio, "gpio".to_string()),
1848             MmioDeviceInfo {
1849                 addr: addr.0,
1850                 len: MMIO_LEN,
1851                 irq: gpio_irq,
1852             },
1853         );
1854 
1855         self.device_tree
1856             .lock()
1857             .unwrap()
1858             .insert(id.clone(), device_node!(id, gpio_device));
1859 
1860         Ok(())
1861     }
1862 
1863     #[cfg(target_arch = "x86_64")]
1864     fn add_debug_console_device(
1865         &mut self,
1866         debug_console_writer: Box<dyn io::Write + Send>,
1867     ) -> DeviceManagerResult<Arc<Mutex<DebugConsole>>> {
1868         let id = String::from(DEBUGCON_DEVICE_NAME);
1869         let debug_console = Arc::new(Mutex::new(DebugConsole::new(
1870             id.clone(),
1871             debug_console_writer,
1872         )));
1873 
1874         let port = self
1875             .config
1876             .lock()
1877             .unwrap()
1878             .debug_console
1879             .clone()
1880             .iobase
1881             .map(|port| port as u64)
1882             .unwrap_or(debug_console::DEFAULT_PORT);
1883 
1884         self.bus_devices
1885             .push(Arc::clone(&debug_console) as Arc<Mutex<dyn BusDevice>>);
1886 
1887         self.address_manager
1888             .allocator
1889             .lock()
1890             .unwrap()
1891             .allocate_io_addresses(Some(GuestAddress(port)), 0x1, None)
1892             .ok_or(DeviceManagerError::AllocateIoPort)?;
1893 
1894         self.address_manager
1895             .io_bus
1896             .insert(debug_console.clone(), port, 0x1)
1897             .map_err(DeviceManagerError::BusError)?;
1898 
1899         // Fill the device tree with a new node. In case of restore, we
1900         // know there is nothing to do, so we can simply override the
1901         // existing entry.
1902         self.device_tree
1903             .lock()
1904             .unwrap()
1905             .insert(id.clone(), device_node!(id, debug_console));
1906 
1907         Ok(debug_console)
1908     }
1909 
1910     #[cfg(target_arch = "x86_64")]
1911     fn add_serial_device(
1912         &mut self,
1913         interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>,
1914         serial_writer: Option<Box<dyn io::Write + Send>>,
1915     ) -> DeviceManagerResult<Arc<Mutex<Serial>>> {
1916         // Serial is tied to IRQ #4
1917         let serial_irq = 4;
1918 
1919         let id = String::from(SERIAL_DEVICE_NAME);
1920 
1921         let interrupt_group = interrupt_manager
1922             .create_group(LegacyIrqGroupConfig {
1923                 irq: serial_irq as InterruptIndex,
1924             })
1925             .map_err(DeviceManagerError::CreateInterruptGroup)?;
1926 
1927         let serial = Arc::new(Mutex::new(Serial::new(
1928             id.clone(),
1929             interrupt_group,
1930             serial_writer,
1931             state_from_id(self.snapshot.as_ref(), id.as_str())
1932                 .map_err(DeviceManagerError::RestoreGetState)?,
1933         )));
1934 
1935         self.bus_devices
1936             .push(Arc::clone(&serial) as Arc<Mutex<dyn BusDevice>>);
1937 
1938         self.address_manager
1939             .allocator
1940             .lock()
1941             .unwrap()
1942             .allocate_io_addresses(Some(GuestAddress(0x3f8)), 0x8, None)
1943             .ok_or(DeviceManagerError::AllocateIoPort)?;
1944 
1945         self.address_manager
1946             .io_bus
1947             .insert(serial.clone(), 0x3f8, 0x8)
1948             .map_err(DeviceManagerError::BusError)?;
1949 
1950         // Fill the device tree with a new node. In case of restore, we
1951         // know there is nothing to do, so we can simply override the
1952         // existing entry.
1953         self.device_tree
1954             .lock()
1955             .unwrap()
1956             .insert(id.clone(), device_node!(id, serial));
1957 
1958         Ok(serial)
1959     }
1960 
1961     #[cfg(target_arch = "aarch64")]
1962     fn add_serial_device(
1963         &mut self,
1964         interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>,
1965         serial_writer: Option<Box<dyn io::Write + Send>>,
1966     ) -> DeviceManagerResult<Arc<Mutex<Pl011>>> {
1967         let id = String::from(SERIAL_DEVICE_NAME);
1968 
1969         let serial_irq = self
1970             .address_manager
1971             .allocator
1972             .lock()
1973             .unwrap()
1974             .allocate_irq()
1975             .unwrap();
1976 
1977         let interrupt_group = interrupt_manager
1978             .create_group(LegacyIrqGroupConfig {
1979                 irq: serial_irq as InterruptIndex,
1980             })
1981             .map_err(DeviceManagerError::CreateInterruptGroup)?;
1982 
1983         let serial = Arc::new(Mutex::new(devices::legacy::Pl011::new(
1984             id.clone(),
1985             interrupt_group,
1986             serial_writer,
1987             self.timestamp,
1988             state_from_id(self.snapshot.as_ref(), id.as_str())
1989                 .map_err(DeviceManagerError::RestoreGetState)?,
1990         )));
1991 
1992         self.bus_devices
1993             .push(Arc::clone(&serial) as Arc<Mutex<dyn BusDevice>>);
1994 
1995         let addr = arch::layout::LEGACY_SERIAL_MAPPED_IO_START;
1996 
1997         self.address_manager
1998             .mmio_bus
1999             .insert(serial.clone(), addr.0, MMIO_LEN)
2000             .map_err(DeviceManagerError::BusError)?;
2001 
2002         self.id_to_dev_info.insert(
2003             (DeviceType::Serial, DeviceType::Serial.to_string()),
2004             MmioDeviceInfo {
2005                 addr: addr.0,
2006                 len: MMIO_LEN,
2007                 irq: serial_irq,
2008             },
2009         );
2010 
2011         self.cmdline_additions
2012             .push(format!("earlycon=pl011,mmio,0x{:08x}", addr.0));
2013 
2014         // Fill the device tree with a new node. In case of restore, we
2015         // know there is nothing to do, so we can simply override the
2016         // existing entry.
2017         self.device_tree
2018             .lock()
2019             .unwrap()
2020             .insert(id.clone(), device_node!(id, serial));
2021 
2022         Ok(serial)
2023     }
2024 
2025     fn modify_mode<F: FnOnce(&mut termios)>(
2026         &mut self,
2027         fd: RawFd,
2028         f: F,
2029     ) -> vmm_sys_util::errno::Result<()> {
2030         // SAFETY: safe because we check the return value of isatty.
2031         if unsafe { isatty(fd) } != 1 {
2032             return Ok(());
2033         }
2034 
2035         // SAFETY: The following pair are safe because termios gets totally overwritten by tcgetattr
2036         // and we check the return result.
2037         let mut termios: termios = unsafe { zeroed() };
2038         // SAFETY: see above
2039         let ret = unsafe { tcgetattr(fd, &mut termios as *mut _) };
2040         if ret < 0 {
2041             return vmm_sys_util::errno::errno_result();
2042         }
2043         let mut original_termios_opt = self.original_termios_opt.lock().unwrap();
2044         if original_termios_opt.is_none() {
2045             *original_termios_opt = Some(termios);
2046         }
2047         f(&mut termios);
2048         // SAFETY: Safe because the syscall will only read the extent of termios and we check
2049         // the return result.
2050         let ret = unsafe { tcsetattr(fd, TCSANOW, &termios as *const _) };
2051         if ret < 0 {
2052             return vmm_sys_util::errno::errno_result();
2053         }
2054 
2055         Ok(())
2056     }
2057 
2058     fn set_raw_mode(&mut self, f: &dyn AsRawFd) -> vmm_sys_util::errno::Result<()> {
2059         // SAFETY: FFI call. Variable t is guaranteed to be a valid termios from modify_mode.
2060         self.modify_mode(f.as_raw_fd(), |t| unsafe { cfmakeraw(t) })
2061     }
2062 
2063     fn listen_for_sigwinch_on_tty(&mut self, pty_sub: File) -> std::io::Result<()> {
2064         let seccomp_filter = get_seccomp_filter(
2065             &self.seccomp_action,
2066             Thread::PtyForeground,
2067             self.hypervisor_type,
2068         )
2069         .unwrap();
2070 
2071         self.console_resize_pipe =
2072             Some(Arc::new(start_sigwinch_listener(seccomp_filter, pty_sub)?));
2073 
2074         Ok(())
2075     }
2076 
2077     fn add_virtio_console_device(
2078         &mut self,
2079         virtio_devices: &mut Vec<MetaVirtioDevice>,
2080         console_pty: Option<PtyPair>,
2081         resize_pipe: Option<File>,
2082     ) -> DeviceManagerResult<Option<Arc<virtio_devices::ConsoleResizer>>> {
2083         let console_config = self.config.lock().unwrap().console.clone();
2084         let endpoint = match console_config.mode {
2085             ConsoleOutputMode::File => {
2086                 let file = File::create(console_config.file.as_ref().unwrap())
2087                     .map_err(DeviceManagerError::ConsoleOutputFileOpen)?;
2088                 Endpoint::File(file)
2089             }
2090             ConsoleOutputMode::Pty => {
2091                 if let Some(pty) = console_pty {
2092                     self.config.lock().unwrap().console.file = Some(pty.path.clone());
2093                     let file = pty.main.try_clone().unwrap();
2094                     self.console_pty = Some(Arc::new(Mutex::new(pty)));
2095                     self.console_resize_pipe = resize_pipe.map(Arc::new);
2096                     Endpoint::PtyPair(file.try_clone().unwrap(), file)
2097                 } else {
2098                     let (main, sub, path) =
2099                         create_pty().map_err(DeviceManagerError::ConsolePtyOpen)?;
2100                     self.set_raw_mode(&sub)
2101                         .map_err(DeviceManagerError::SetPtyRaw)?;
2102                     self.config.lock().unwrap().console.file = Some(path.clone());
2103                     let file = main.try_clone().unwrap();
2104                     assert!(resize_pipe.is_none());
2105                     self.listen_for_sigwinch_on_tty(sub).unwrap();
2106                     self.console_pty = Some(Arc::new(Mutex::new(PtyPair { main, path })));
2107                     Endpoint::PtyPair(file.try_clone().unwrap(), file)
2108                 }
2109             }
2110             ConsoleOutputMode::Tty => {
2111                 // Duplicating the file descriptors like this is needed as otherwise
2112                 // they will be closed on a reboot and the numbers reused
2113 
2114                 // SAFETY: FFI call to dup. Trivially safe.
2115                 let stdout = unsafe { libc::dup(libc::STDOUT_FILENO) };
2116                 if stdout == -1 {
2117                     return vmm_sys_util::errno::errno_result().map_err(DeviceManagerError::DupFd);
2118                 }
2119                 // SAFETY: stdout is valid and owned solely by us.
2120                 let stdout = unsafe { File::from_raw_fd(stdout) };
2121 
2122                 // Make sure stdout is in raw mode, if it's a terminal.
2123                 let _ = self.set_raw_mode(&stdout);
2124 
2125                 // SAFETY: FFI call. Trivially safe.
2126                 if unsafe { libc::isatty(libc::STDOUT_FILENO) } == 1 {
2127                     self.listen_for_sigwinch_on_tty(stdout.try_clone().unwrap())
2128                         .unwrap();
2129                 }
2130 
2131                 // If an interactive TTY then we can accept input
2132                 // SAFETY: FFI call. Trivially safe.
2133                 if unsafe { libc::isatty(libc::STDIN_FILENO) == 1 } {
2134                     // SAFETY: FFI call to dup. Trivially safe.
2135                     let stdin = unsafe { libc::dup(libc::STDIN_FILENO) };
2136                     if stdin == -1 {
2137                         return vmm_sys_util::errno::errno_result()
2138                             .map_err(DeviceManagerError::DupFd);
2139                     }
2140                     // SAFETY: stdin is valid and owned solely by us.
2141                     let stdin = unsafe { File::from_raw_fd(stdin) };
2142 
2143                     Endpoint::FilePair(stdout, stdin)
2144                 } else {
2145                     Endpoint::File(stdout)
2146                 }
2147             }
2148             ConsoleOutputMode::Socket => {
2149                 return Err(DeviceManagerError::NoSocketOptionSupportForConsoleDevice);
2150             }
2151             ConsoleOutputMode::Null => Endpoint::Null,
2152             ConsoleOutputMode::Off => return Ok(None),
2153         };
2154         let id = String::from(CONSOLE_DEVICE_NAME);
2155 
2156         let (virtio_console_device, console_resizer) = virtio_devices::Console::new(
2157             id.clone(),
2158             endpoint,
2159             self.console_resize_pipe
2160                 .as_ref()
2161                 .map(|p| p.try_clone().unwrap()),
2162             self.force_iommu | console_config.iommu,
2163             self.seccomp_action.clone(),
2164             self.exit_evt
2165                 .try_clone()
2166                 .map_err(DeviceManagerError::EventFd)?,
2167             state_from_id(self.snapshot.as_ref(), id.as_str())
2168                 .map_err(DeviceManagerError::RestoreGetState)?,
2169         )
2170         .map_err(DeviceManagerError::CreateVirtioConsole)?;
2171         let virtio_console_device = Arc::new(Mutex::new(virtio_console_device));
2172         virtio_devices.push(MetaVirtioDevice {
2173             virtio_device: Arc::clone(&virtio_console_device)
2174                 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2175             iommu: console_config.iommu,
2176             id: id.clone(),
2177             pci_segment: 0,
2178             dma_handler: None,
2179         });
2180 
2181         // Fill the device tree with a new node. In case of restore, we
2182         // know there is nothing to do, so we can simply override the
2183         // existing entry.
2184         self.device_tree
2185             .lock()
2186             .unwrap()
2187             .insert(id.clone(), device_node!(id, virtio_console_device));
2188 
2189         // Only provide a resizer (for SIGWINCH handling) if the console is attached to the TTY
2190         Ok(if matches!(console_config.mode, ConsoleOutputMode::Tty) {
2191             Some(console_resizer)
2192         } else {
2193             None
2194         })
2195     }
2196 
2197     /// Adds all devices that behave like a console with respect to the VM
2198     /// configuration. This includes:
2199     /// - debug-console
2200     /// - serial-console
2201     /// - virtio-console
2202     fn add_console_devices(
2203         &mut self,
2204         interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>,
2205         virtio_devices: &mut Vec<MetaVirtioDevice>,
2206         serial_pty: Option<PtyPair>,
2207         console_pty: Option<PtyPair>,
2208         #[cfg(target_arch = "x86_64")] debug_console_pty: Option<PtyPair>,
2209         #[cfg(not(target_arch = "x86_64"))] _: Option<PtyPair>,
2210         console_resize_pipe: Option<File>,
2211     ) -> DeviceManagerResult<Arc<Console>> {
2212         let serial_config = self.config.lock().unwrap().serial.clone();
2213         let serial_writer: Option<Box<dyn io::Write + Send>> = match serial_config.mode {
2214             ConsoleOutputMode::File => Some(Box::new(
2215                 File::create(serial_config.file.as_ref().unwrap())
2216                     .map_err(DeviceManagerError::SerialOutputFileOpen)?,
2217             )),
2218             ConsoleOutputMode::Pty => {
2219                 if let Some(pty) = serial_pty.clone() {
2220                     self.config.lock().unwrap().serial.file = Some(pty.path.clone());
2221                     self.serial_pty = Some(Arc::new(Mutex::new(pty)));
2222                 } else {
2223                     let (main, sub, path) =
2224                         create_pty().map_err(DeviceManagerError::SerialPtyOpen)?;
2225                     self.set_raw_mode(&sub)
2226                         .map_err(DeviceManagerError::SetPtyRaw)?;
2227                     self.config.lock().unwrap().serial.file = Some(path.clone());
2228                     self.serial_pty = Some(Arc::new(Mutex::new(PtyPair { main, path })));
2229                 }
2230                 None
2231             }
2232             ConsoleOutputMode::Tty => {
2233                 let out = stdout();
2234                 let _ = self.set_raw_mode(&out);
2235                 Some(Box::new(out))
2236             }
2237             ConsoleOutputMode::Off | ConsoleOutputMode::Null | ConsoleOutputMode::Socket => None,
2238         };
2239         if serial_config.mode != ConsoleOutputMode::Off {
2240             let serial = self.add_serial_device(interrupt_manager, serial_writer)?;
2241             self.serial_manager = match serial_config.mode {
2242                 ConsoleOutputMode::Pty | ConsoleOutputMode::Tty | ConsoleOutputMode::Socket => {
2243                     let serial_manager = SerialManager::new(
2244                         serial,
2245                         self.serial_pty.clone(),
2246                         serial_config.mode,
2247                         serial_config.socket,
2248                     )
2249                     .map_err(DeviceManagerError::CreateSerialManager)?;
2250                     if let Some(mut serial_manager) = serial_manager {
2251                         serial_manager
2252                             .start_thread(
2253                                 self.exit_evt
2254                                     .try_clone()
2255                                     .map_err(DeviceManagerError::EventFd)?,
2256                             )
2257                             .map_err(DeviceManagerError::SpawnSerialManager)?;
2258                         Some(Arc::new(serial_manager))
2259                     } else {
2260                         None
2261                     }
2262                 }
2263                 _ => None,
2264             };
2265         }
2266 
2267         #[cfg(target_arch = "x86_64")]
2268         {
2269             let debug_console_config = self.config.lock().unwrap().debug_console.clone();
2270             let debug_console_writer: Option<Box<dyn io::Write + Send>> = match debug_console_config
2271                 .mode
2272             {
2273                 ConsoleOutputMode::File => Some(Box::new(
2274                     File::create(debug_console_config.file.as_ref().unwrap())
2275                         .map_err(DeviceManagerError::DebugconOutputFileOpen)?,
2276                 )),
2277                 ConsoleOutputMode::Pty => {
2278                     if let Some(pty) = debug_console_pty {
2279                         self.config.lock().unwrap().debug_console.file = Some(pty.path.clone());
2280                         self.debug_console_pty = Some(Arc::new(Mutex::new(pty)));
2281                     } else {
2282                         let (main, sub, path) =
2283                             create_pty().map_err(DeviceManagerError::DebugconPtyOpen)?;
2284                         self.set_raw_mode(&sub)
2285                             .map_err(DeviceManagerError::SetPtyRaw)?;
2286                         self.config.lock().unwrap().debug_console.file = Some(path.clone());
2287                         self.debug_console_pty = Some(Arc::new(Mutex::new(PtyPair { main, path })));
2288                     }
2289                     None
2290                 }
2291                 ConsoleOutputMode::Tty => {
2292                     let out = stdout();
2293                     let _ = self.set_raw_mode(&out);
2294                     Some(Box::new(out))
2295                 }
2296                 ConsoleOutputMode::Off | ConsoleOutputMode::Null | ConsoleOutputMode::Socket => {
2297                     None
2298                 }
2299             };
2300             if let Some(writer) = debug_console_writer {
2301                 let _ = self.add_debug_console_device(writer)?;
2302             }
2303         }
2304 
2305         let console_resizer =
2306             self.add_virtio_console_device(virtio_devices, console_pty, console_resize_pipe)?;
2307 
2308         Ok(Arc::new(Console { console_resizer }))
2309     }
2310 
2311     fn add_tpm_device(
2312         &mut self,
2313         tpm_path: PathBuf,
2314     ) -> DeviceManagerResult<Arc<Mutex<devices::tpm::Tpm>>> {
2315         // Create TPM Device
2316         let tpm = devices::tpm::Tpm::new(tpm_path.to_str().unwrap().to_string()).map_err(|e| {
2317             DeviceManagerError::CreateTpmDevice(anyhow!("Failed to create TPM Device : {:?}", e))
2318         })?;
2319         let tpm = Arc::new(Mutex::new(tpm));
2320 
2321         // Add TPM Device to mmio
2322         self.address_manager
2323             .mmio_bus
2324             .insert(
2325                 tpm.clone(),
2326                 arch::layout::TPM_START.0,
2327                 arch::layout::TPM_SIZE,
2328             )
2329             .map_err(DeviceManagerError::BusError)?;
2330 
2331         Ok(tpm)
2332     }
2333 
2334     fn make_virtio_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
2335         let mut devices: Vec<MetaVirtioDevice> = Vec::new();
2336 
2337         // Create "standard" virtio devices (net/block/rng)
2338         devices.append(&mut self.make_virtio_block_devices()?);
2339         devices.append(&mut self.make_virtio_net_devices()?);
2340         devices.append(&mut self.make_virtio_rng_devices()?);
2341 
2342         // Add virtio-fs if required
2343         devices.append(&mut self.make_virtio_fs_devices()?);
2344 
2345         // Add virtio-pmem if required
2346         devices.append(&mut self.make_virtio_pmem_devices()?);
2347 
2348         // Add virtio-vsock if required
2349         devices.append(&mut self.make_virtio_vsock_devices()?);
2350 
2351         devices.append(&mut self.make_virtio_mem_devices()?);
2352 
2353         // Add virtio-balloon if required
2354         devices.append(&mut self.make_virtio_balloon_devices()?);
2355 
2356         // Add virtio-watchdog device
2357         devices.append(&mut self.make_virtio_watchdog_devices()?);
2358 
2359         // Add vDPA devices if required
2360         devices.append(&mut self.make_vdpa_devices()?);
2361 
2362         Ok(devices)
2363     }
2364 
2365     // Cache whether aio is supported to avoid checking for very block device
2366     fn aio_is_supported(&mut self) -> bool {
2367         if let Some(supported) = self.aio_supported {
2368             return supported;
2369         }
2370 
2371         let supported = block_aio_is_supported();
2372         self.aio_supported = Some(supported);
2373         supported
2374     }
2375 
2376     // Cache whether io_uring is supported to avoid probing for very block device
2377     fn io_uring_is_supported(&mut self) -> bool {
2378         if let Some(supported) = self.io_uring_supported {
2379             return supported;
2380         }
2381 
2382         let supported = block_io_uring_is_supported();
2383         self.io_uring_supported = Some(supported);
2384         supported
2385     }
2386 
2387     fn make_virtio_block_device(
2388         &mut self,
2389         disk_cfg: &mut DiskConfig,
2390     ) -> DeviceManagerResult<MetaVirtioDevice> {
2391         let id = if let Some(id) = &disk_cfg.id {
2392             id.clone()
2393         } else {
2394             let id = self.next_device_name(DISK_DEVICE_NAME_PREFIX)?;
2395             disk_cfg.id = Some(id.clone());
2396             id
2397         };
2398 
2399         info!("Creating virtio-block device: {:?}", disk_cfg);
2400 
2401         let (virtio_device, migratable_device) = if disk_cfg.vhost_user {
2402             let socket = disk_cfg.vhost_socket.as_ref().unwrap().clone();
2403             let vu_cfg = VhostUserConfig {
2404                 socket,
2405                 num_queues: disk_cfg.num_queues,
2406                 queue_size: disk_cfg.queue_size,
2407             };
2408             let vhost_user_block = Arc::new(Mutex::new(
2409                 match virtio_devices::vhost_user::Blk::new(
2410                     id.clone(),
2411                     vu_cfg,
2412                     self.seccomp_action.clone(),
2413                     self.exit_evt
2414                         .try_clone()
2415                         .map_err(DeviceManagerError::EventFd)?,
2416                     self.force_iommu,
2417                     state_from_id(self.snapshot.as_ref(), id.as_str())
2418                         .map_err(DeviceManagerError::RestoreGetState)?,
2419                 ) {
2420                     Ok(vub_device) => vub_device,
2421                     Err(e) => {
2422                         return Err(DeviceManagerError::CreateVhostUserBlk(e));
2423                     }
2424                 },
2425             ));
2426 
2427             (
2428                 Arc::clone(&vhost_user_block) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2429                 vhost_user_block as Arc<Mutex<dyn Migratable>>,
2430             )
2431         } else {
2432             let mut options = OpenOptions::new();
2433             options.read(true);
2434             options.write(!disk_cfg.readonly);
2435             if disk_cfg.direct {
2436                 options.custom_flags(libc::O_DIRECT);
2437             }
2438             // Open block device path
2439             let mut file: File = options
2440                 .open(
2441                     disk_cfg
2442                         .path
2443                         .as_ref()
2444                         .ok_or(DeviceManagerError::NoDiskPath)?
2445                         .clone(),
2446                 )
2447                 .map_err(DeviceManagerError::Disk)?;
2448             let image_type =
2449                 detect_image_type(&mut file).map_err(DeviceManagerError::DetectImageType)?;
2450 
2451             let image = match image_type {
2452                 ImageType::FixedVhd => {
2453                     // Use asynchronous backend relying on io_uring if the
2454                     // syscalls are supported.
2455                     if cfg!(feature = "io_uring")
2456                         && !disk_cfg.disable_io_uring
2457                         && self.io_uring_is_supported()
2458                     {
2459                         info!("Using asynchronous fixed VHD disk file (io_uring)");
2460 
2461                         #[cfg(not(feature = "io_uring"))]
2462                         unreachable!("Checked in if statement above");
2463                         #[cfg(feature = "io_uring")]
2464                         {
2465                             Box::new(
2466                                 FixedVhdDiskAsync::new(file)
2467                                     .map_err(DeviceManagerError::CreateFixedVhdDiskAsync)?,
2468                             ) as Box<dyn DiskFile>
2469                         }
2470                     } else {
2471                         info!("Using synchronous fixed VHD disk file");
2472                         Box::new(
2473                             FixedVhdDiskSync::new(file)
2474                                 .map_err(DeviceManagerError::CreateFixedVhdDiskSync)?,
2475                         ) as Box<dyn DiskFile>
2476                     }
2477                 }
2478                 ImageType::Raw => {
2479                     // Use asynchronous backend relying on io_uring if the
2480                     // syscalls are supported.
2481                     if cfg!(feature = "io_uring")
2482                         && !disk_cfg.disable_io_uring
2483                         && self.io_uring_is_supported()
2484                     {
2485                         info!("Using asynchronous RAW disk file (io_uring)");
2486 
2487                         #[cfg(not(feature = "io_uring"))]
2488                         unreachable!("Checked in if statement above");
2489                         #[cfg(feature = "io_uring")]
2490                         {
2491                             Box::new(RawFileDisk::new(file)) as Box<dyn DiskFile>
2492                         }
2493                     } else if !disk_cfg.disable_aio && self.aio_is_supported() {
2494                         info!("Using asynchronous RAW disk file (aio)");
2495                         Box::new(RawFileDiskAio::new(file)) as Box<dyn DiskFile>
2496                     } else {
2497                         info!("Using synchronous RAW disk file");
2498                         Box::new(RawFileDiskSync::new(file)) as Box<dyn DiskFile>
2499                     }
2500                 }
2501                 ImageType::Qcow2 => {
2502                     info!("Using synchronous QCOW disk file");
2503                     Box::new(
2504                         QcowDiskSync::new(file, disk_cfg.direct)
2505                             .map_err(DeviceManagerError::CreateQcowDiskSync)?,
2506                     ) as Box<dyn DiskFile>
2507                 }
2508                 ImageType::Vhdx => {
2509                     info!("Using synchronous VHDX disk file");
2510                     Box::new(
2511                         VhdxDiskSync::new(file)
2512                             .map_err(DeviceManagerError::CreateFixedVhdxDiskSync)?,
2513                     ) as Box<dyn DiskFile>
2514                 }
2515             };
2516 
2517             let rate_limit_group =
2518                 if let Some(rate_limiter_cfg) = disk_cfg.rate_limiter_config.as_ref() {
2519                     // Create an anonymous RateLimiterGroup that is dropped when the Disk
2520                     // is dropped.
2521                     let bw = rate_limiter_cfg.bandwidth.unwrap_or_default();
2522                     let ops = rate_limiter_cfg.ops.unwrap_or_default();
2523                     let mut rate_limit_group = RateLimiterGroup::new(
2524                         disk_cfg.id.as_ref().unwrap(),
2525                         bw.size,
2526                         bw.one_time_burst.unwrap_or(0),
2527                         bw.refill_time,
2528                         ops.size,
2529                         ops.one_time_burst.unwrap_or(0),
2530                         ops.refill_time,
2531                     )
2532                     .map_err(DeviceManagerError::RateLimiterGroupCreate)?;
2533 
2534                     rate_limit_group
2535                         .start_thread(
2536                             self.exit_evt
2537                                 .try_clone()
2538                                 .map_err(DeviceManagerError::EventFd)?,
2539                         )
2540                         .unwrap();
2541 
2542                     Some(Arc::new(rate_limit_group))
2543                 } else if let Some(rate_limit_group) = disk_cfg.rate_limit_group.as_ref() {
2544                     self.rate_limit_groups.get(rate_limit_group).cloned()
2545                 } else {
2546                     None
2547                 };
2548 
2549             let queue_affinity = if let Some(queue_affinity) = disk_cfg.queue_affinity.as_ref() {
2550                 queue_affinity
2551                     .iter()
2552                     .map(|a| (a.queue_index, a.host_cpus.clone()))
2553                     .collect()
2554             } else {
2555                 BTreeMap::new()
2556             };
2557 
2558             let virtio_block = Arc::new(Mutex::new(
2559                 virtio_devices::Block::new(
2560                     id.clone(),
2561                     image,
2562                     disk_cfg
2563                         .path
2564                         .as_ref()
2565                         .ok_or(DeviceManagerError::NoDiskPath)?
2566                         .clone(),
2567                     disk_cfg.readonly,
2568                     self.force_iommu | disk_cfg.iommu,
2569                     disk_cfg.num_queues,
2570                     disk_cfg.queue_size,
2571                     disk_cfg.serial.clone(),
2572                     self.seccomp_action.clone(),
2573                     rate_limit_group,
2574                     self.exit_evt
2575                         .try_clone()
2576                         .map_err(DeviceManagerError::EventFd)?,
2577                     state_from_id(self.snapshot.as_ref(), id.as_str())
2578                         .map_err(DeviceManagerError::RestoreGetState)?,
2579                     queue_affinity,
2580                 )
2581                 .map_err(DeviceManagerError::CreateVirtioBlock)?,
2582             ));
2583 
2584             (
2585                 Arc::clone(&virtio_block) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2586                 virtio_block as Arc<Mutex<dyn Migratable>>,
2587             )
2588         };
2589 
2590         // Fill the device tree with a new node. In case of restore, we
2591         // know there is nothing to do, so we can simply override the
2592         // existing entry.
2593         self.device_tree
2594             .lock()
2595             .unwrap()
2596             .insert(id.clone(), device_node!(id, migratable_device));
2597 
2598         Ok(MetaVirtioDevice {
2599             virtio_device,
2600             iommu: disk_cfg.iommu,
2601             id,
2602             pci_segment: disk_cfg.pci_segment,
2603             dma_handler: None,
2604         })
2605     }
2606 
2607     fn make_virtio_block_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
2608         let mut devices = Vec::new();
2609 
2610         let mut block_devices = self.config.lock().unwrap().disks.clone();
2611         if let Some(disk_list_cfg) = &mut block_devices {
2612             for disk_cfg in disk_list_cfg.iter_mut() {
2613                 devices.push(self.make_virtio_block_device(disk_cfg)?);
2614             }
2615         }
2616         self.config.lock().unwrap().disks = block_devices;
2617 
2618         Ok(devices)
2619     }
2620 
2621     fn make_virtio_net_device(
2622         &mut self,
2623         net_cfg: &mut NetConfig,
2624     ) -> DeviceManagerResult<MetaVirtioDevice> {
2625         let id = if let Some(id) = &net_cfg.id {
2626             id.clone()
2627         } else {
2628             let id = self.next_device_name(NET_DEVICE_NAME_PREFIX)?;
2629             net_cfg.id = Some(id.clone());
2630             id
2631         };
2632         info!("Creating virtio-net device: {:?}", net_cfg);
2633 
2634         let (virtio_device, migratable_device) = if net_cfg.vhost_user {
2635             let socket = net_cfg.vhost_socket.as_ref().unwrap().clone();
2636             let vu_cfg = VhostUserConfig {
2637                 socket,
2638                 num_queues: net_cfg.num_queues,
2639                 queue_size: net_cfg.queue_size,
2640             };
2641             let server = match net_cfg.vhost_mode {
2642                 VhostMode::Client => false,
2643                 VhostMode::Server => true,
2644             };
2645             let vhost_user_net = Arc::new(Mutex::new(
2646                 match virtio_devices::vhost_user::Net::new(
2647                     id.clone(),
2648                     net_cfg.mac,
2649                     net_cfg.mtu,
2650                     vu_cfg,
2651                     server,
2652                     self.seccomp_action.clone(),
2653                     self.exit_evt
2654                         .try_clone()
2655                         .map_err(DeviceManagerError::EventFd)?,
2656                     self.force_iommu,
2657                     state_from_id(self.snapshot.as_ref(), id.as_str())
2658                         .map_err(DeviceManagerError::RestoreGetState)?,
2659                     net_cfg.offload_tso,
2660                     net_cfg.offload_ufo,
2661                     net_cfg.offload_csum,
2662                 ) {
2663                     Ok(vun_device) => vun_device,
2664                     Err(e) => {
2665                         return Err(DeviceManagerError::CreateVhostUserNet(e));
2666                     }
2667                 },
2668             ));
2669 
2670             (
2671                 Arc::clone(&vhost_user_net) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2672                 vhost_user_net as Arc<Mutex<dyn Migratable>>,
2673             )
2674         } else {
2675             let state = state_from_id(self.snapshot.as_ref(), id.as_str())
2676                 .map_err(DeviceManagerError::RestoreGetState)?;
2677             let virtio_net = if let Some(ref tap_if_name) = net_cfg.tap {
2678                 Arc::new(Mutex::new(
2679                     virtio_devices::Net::new(
2680                         id.clone(),
2681                         Some(tap_if_name),
2682                         Some(net_cfg.ip),
2683                         Some(net_cfg.mask),
2684                         Some(net_cfg.mac),
2685                         &mut net_cfg.host_mac,
2686                         net_cfg.mtu,
2687                         self.force_iommu | net_cfg.iommu,
2688                         net_cfg.num_queues,
2689                         net_cfg.queue_size,
2690                         self.seccomp_action.clone(),
2691                         net_cfg.rate_limiter_config,
2692                         self.exit_evt
2693                             .try_clone()
2694                             .map_err(DeviceManagerError::EventFd)?,
2695                         state,
2696                         net_cfg.offload_tso,
2697                         net_cfg.offload_ufo,
2698                         net_cfg.offload_csum,
2699                     )
2700                     .map_err(DeviceManagerError::CreateVirtioNet)?,
2701                 ))
2702             } else if let Some(fds) = &net_cfg.fds {
2703                 let net = virtio_devices::Net::from_tap_fds(
2704                     id.clone(),
2705                     fds,
2706                     Some(net_cfg.mac),
2707                     net_cfg.mtu,
2708                     self.force_iommu | net_cfg.iommu,
2709                     net_cfg.queue_size,
2710                     self.seccomp_action.clone(),
2711                     net_cfg.rate_limiter_config,
2712                     self.exit_evt
2713                         .try_clone()
2714                         .map_err(DeviceManagerError::EventFd)?,
2715                     state,
2716                     net_cfg.offload_tso,
2717                     net_cfg.offload_ufo,
2718                     net_cfg.offload_csum,
2719                 )
2720                 .map_err(DeviceManagerError::CreateVirtioNet)?;
2721 
2722                 // SAFETY: 'fds' are valid because TAP devices are created successfully
2723                 unsafe {
2724                     self.config.lock().unwrap().add_preserved_fds(fds.clone());
2725                 }
2726 
2727                 Arc::new(Mutex::new(net))
2728             } else {
2729                 Arc::new(Mutex::new(
2730                     virtio_devices::Net::new(
2731                         id.clone(),
2732                         None,
2733                         Some(net_cfg.ip),
2734                         Some(net_cfg.mask),
2735                         Some(net_cfg.mac),
2736                         &mut net_cfg.host_mac,
2737                         net_cfg.mtu,
2738                         self.force_iommu | net_cfg.iommu,
2739                         net_cfg.num_queues,
2740                         net_cfg.queue_size,
2741                         self.seccomp_action.clone(),
2742                         net_cfg.rate_limiter_config,
2743                         self.exit_evt
2744                             .try_clone()
2745                             .map_err(DeviceManagerError::EventFd)?,
2746                         state,
2747                         net_cfg.offload_tso,
2748                         net_cfg.offload_ufo,
2749                         net_cfg.offload_csum,
2750                     )
2751                     .map_err(DeviceManagerError::CreateVirtioNet)?,
2752                 ))
2753             };
2754 
2755             (
2756                 Arc::clone(&virtio_net) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2757                 virtio_net as Arc<Mutex<dyn Migratable>>,
2758             )
2759         };
2760 
2761         // Fill the device tree with a new node. In case of restore, we
2762         // know there is nothing to do, so we can simply override the
2763         // existing entry.
2764         self.device_tree
2765             .lock()
2766             .unwrap()
2767             .insert(id.clone(), device_node!(id, migratable_device));
2768 
2769         Ok(MetaVirtioDevice {
2770             virtio_device,
2771             iommu: net_cfg.iommu,
2772             id,
2773             pci_segment: net_cfg.pci_segment,
2774             dma_handler: None,
2775         })
2776     }
2777 
2778     /// Add virto-net and vhost-user-net devices
2779     fn make_virtio_net_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
2780         let mut devices = Vec::new();
2781         let mut net_devices = self.config.lock().unwrap().net.clone();
2782         if let Some(net_list_cfg) = &mut net_devices {
2783             for net_cfg in net_list_cfg.iter_mut() {
2784                 devices.push(self.make_virtio_net_device(net_cfg)?);
2785             }
2786         }
2787         self.config.lock().unwrap().net = net_devices;
2788 
2789         Ok(devices)
2790     }
2791 
2792     fn make_virtio_rng_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
2793         let mut devices = Vec::new();
2794 
2795         // Add virtio-rng if required
2796         let rng_config = self.config.lock().unwrap().rng.clone();
2797         if let Some(rng_path) = rng_config.src.to_str() {
2798             info!("Creating virtio-rng device: {:?}", rng_config);
2799             let id = String::from(RNG_DEVICE_NAME);
2800 
2801             let virtio_rng_device = Arc::new(Mutex::new(
2802                 virtio_devices::Rng::new(
2803                     id.clone(),
2804                     rng_path,
2805                     self.force_iommu | rng_config.iommu,
2806                     self.seccomp_action.clone(),
2807                     self.exit_evt
2808                         .try_clone()
2809                         .map_err(DeviceManagerError::EventFd)?,
2810                     state_from_id(self.snapshot.as_ref(), id.as_str())
2811                         .map_err(DeviceManagerError::RestoreGetState)?,
2812                 )
2813                 .map_err(DeviceManagerError::CreateVirtioRng)?,
2814             ));
2815             devices.push(MetaVirtioDevice {
2816                 virtio_device: Arc::clone(&virtio_rng_device)
2817                     as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2818                 iommu: rng_config.iommu,
2819                 id: id.clone(),
2820                 pci_segment: 0,
2821                 dma_handler: None,
2822             });
2823 
2824             // Fill the device tree with a new node. In case of restore, we
2825             // know there is nothing to do, so we can simply override the
2826             // existing entry.
2827             self.device_tree
2828                 .lock()
2829                 .unwrap()
2830                 .insert(id.clone(), device_node!(id, virtio_rng_device));
2831         }
2832 
2833         Ok(devices)
2834     }
2835 
2836     fn make_virtio_fs_device(
2837         &mut self,
2838         fs_cfg: &mut FsConfig,
2839     ) -> DeviceManagerResult<MetaVirtioDevice> {
2840         let id = if let Some(id) = &fs_cfg.id {
2841             id.clone()
2842         } else {
2843             let id = self.next_device_name(FS_DEVICE_NAME_PREFIX)?;
2844             fs_cfg.id = Some(id.clone());
2845             id
2846         };
2847 
2848         info!("Creating virtio-fs device: {:?}", fs_cfg);
2849 
2850         let mut node = device_node!(id);
2851 
2852         if let Some(fs_socket) = fs_cfg.socket.to_str() {
2853             let virtio_fs_device = Arc::new(Mutex::new(
2854                 virtio_devices::vhost_user::Fs::new(
2855                     id.clone(),
2856                     fs_socket,
2857                     &fs_cfg.tag,
2858                     fs_cfg.num_queues,
2859                     fs_cfg.queue_size,
2860                     None,
2861                     self.seccomp_action.clone(),
2862                     self.exit_evt
2863                         .try_clone()
2864                         .map_err(DeviceManagerError::EventFd)?,
2865                     self.force_iommu,
2866                     state_from_id(self.snapshot.as_ref(), id.as_str())
2867                         .map_err(DeviceManagerError::RestoreGetState)?,
2868                 )
2869                 .map_err(DeviceManagerError::CreateVirtioFs)?,
2870             ));
2871 
2872             // Update the device tree with the migratable device.
2873             node.migratable = Some(Arc::clone(&virtio_fs_device) as Arc<Mutex<dyn Migratable>>);
2874             self.device_tree.lock().unwrap().insert(id.clone(), node);
2875 
2876             Ok(MetaVirtioDevice {
2877                 virtio_device: Arc::clone(&virtio_fs_device)
2878                     as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2879                 iommu: false,
2880                 id,
2881                 pci_segment: fs_cfg.pci_segment,
2882                 dma_handler: None,
2883             })
2884         } else {
2885             Err(DeviceManagerError::NoVirtioFsSock)
2886         }
2887     }
2888 
2889     fn make_virtio_fs_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
2890         let mut devices = Vec::new();
2891 
2892         let mut fs_devices = self.config.lock().unwrap().fs.clone();
2893         if let Some(fs_list_cfg) = &mut fs_devices {
2894             for fs_cfg in fs_list_cfg.iter_mut() {
2895                 devices.push(self.make_virtio_fs_device(fs_cfg)?);
2896             }
2897         }
2898         self.config.lock().unwrap().fs = fs_devices;
2899 
2900         Ok(devices)
2901     }
2902 
2903     fn make_virtio_pmem_device(
2904         &mut self,
2905         pmem_cfg: &mut PmemConfig,
2906     ) -> DeviceManagerResult<MetaVirtioDevice> {
2907         let id = if let Some(id) = &pmem_cfg.id {
2908             id.clone()
2909         } else {
2910             let id = self.next_device_name(PMEM_DEVICE_NAME_PREFIX)?;
2911             pmem_cfg.id = Some(id.clone());
2912             id
2913         };
2914 
2915         info!("Creating virtio-pmem device: {:?}", pmem_cfg);
2916 
2917         let mut node = device_node!(id);
2918 
2919         // Look for the id in the device tree. If it can be found, that means
2920         // the device is being restored, otherwise it's created from scratch.
2921         let region_range = if let Some(node) = self.device_tree.lock().unwrap().get(&id) {
2922             info!("Restoring virtio-pmem {} resources", id);
2923 
2924             let mut region_range: Option<(u64, u64)> = None;
2925             for resource in node.resources.iter() {
2926                 match resource {
2927                     Resource::MmioAddressRange { base, size } => {
2928                         if region_range.is_some() {
2929                             return Err(DeviceManagerError::ResourceAlreadyExists);
2930                         }
2931 
2932                         region_range = Some((*base, *size));
2933                     }
2934                     _ => {
2935                         error!("Unexpected resource {:?} for {}", resource, id);
2936                     }
2937                 }
2938             }
2939 
2940             if region_range.is_none() {
2941                 return Err(DeviceManagerError::MissingVirtioPmemResources);
2942             }
2943 
2944             region_range
2945         } else {
2946             None
2947         };
2948 
2949         let (custom_flags, set_len) = if pmem_cfg.file.is_dir() {
2950             if pmem_cfg.size.is_none() {
2951                 return Err(DeviceManagerError::PmemWithDirectorySizeMissing);
2952             }
2953             (O_TMPFILE, true)
2954         } else {
2955             (0, false)
2956         };
2957 
2958         let mut file = OpenOptions::new()
2959             .read(true)
2960             .write(!pmem_cfg.discard_writes)
2961             .custom_flags(custom_flags)
2962             .open(&pmem_cfg.file)
2963             .map_err(DeviceManagerError::PmemFileOpen)?;
2964 
2965         let size = if let Some(size) = pmem_cfg.size {
2966             if set_len {
2967                 file.set_len(size)
2968                     .map_err(DeviceManagerError::PmemFileSetLen)?;
2969             }
2970             size
2971         } else {
2972             file.seek(SeekFrom::End(0))
2973                 .map_err(DeviceManagerError::PmemFileSetLen)?
2974         };
2975 
2976         if size % 0x20_0000 != 0 {
2977             return Err(DeviceManagerError::PmemSizeNotAligned);
2978         }
2979 
2980         let (region_base, region_size) = if let Some((base, size)) = region_range {
2981             // The memory needs to be 2MiB aligned in order to support
2982             // hugepages.
2983             self.pci_segments[pmem_cfg.pci_segment as usize]
2984                 .mem64_allocator
2985                 .lock()
2986                 .unwrap()
2987                 .allocate(
2988                     Some(GuestAddress(base)),
2989                     size as GuestUsize,
2990                     Some(0x0020_0000),
2991                 )
2992                 .ok_or(DeviceManagerError::PmemRangeAllocation)?;
2993 
2994             (base, size)
2995         } else {
2996             // The memory needs to be 2MiB aligned in order to support
2997             // hugepages.
2998             let base = self.pci_segments[pmem_cfg.pci_segment as usize]
2999                 .mem64_allocator
3000                 .lock()
3001                 .unwrap()
3002                 .allocate(None, size as GuestUsize, Some(0x0020_0000))
3003                 .ok_or(DeviceManagerError::PmemRangeAllocation)?;
3004 
3005             (base.raw_value(), size)
3006         };
3007 
3008         let cloned_file = file.try_clone().map_err(DeviceManagerError::CloneFile)?;
3009         let mmap_region = MmapRegion::build(
3010             Some(FileOffset::new(cloned_file, 0)),
3011             region_size as usize,
3012             PROT_READ | PROT_WRITE,
3013             MAP_NORESERVE
3014                 | if pmem_cfg.discard_writes {
3015                     MAP_PRIVATE
3016                 } else {
3017                     MAP_SHARED
3018                 },
3019         )
3020         .map_err(DeviceManagerError::NewMmapRegion)?;
3021         let host_addr: u64 = mmap_region.as_ptr() as u64;
3022 
3023         let mem_slot = self
3024             .memory_manager
3025             .lock()
3026             .unwrap()
3027             .create_userspace_mapping(region_base, region_size, host_addr, false, false, false)
3028             .map_err(DeviceManagerError::MemoryManager)?;
3029 
3030         let mapping = virtio_devices::UserspaceMapping {
3031             host_addr,
3032             mem_slot,
3033             addr: GuestAddress(region_base),
3034             len: region_size,
3035             mergeable: false,
3036         };
3037 
3038         let virtio_pmem_device = Arc::new(Mutex::new(
3039             virtio_devices::Pmem::new(
3040                 id.clone(),
3041                 file,
3042                 GuestAddress(region_base),
3043                 mapping,
3044                 mmap_region,
3045                 self.force_iommu | pmem_cfg.iommu,
3046                 self.seccomp_action.clone(),
3047                 self.exit_evt
3048                     .try_clone()
3049                     .map_err(DeviceManagerError::EventFd)?,
3050                 state_from_id(self.snapshot.as_ref(), id.as_str())
3051                     .map_err(DeviceManagerError::RestoreGetState)?,
3052             )
3053             .map_err(DeviceManagerError::CreateVirtioPmem)?,
3054         ));
3055 
3056         // Update the device tree with correct resource information and with
3057         // the migratable device.
3058         node.resources.push(Resource::MmioAddressRange {
3059             base: region_base,
3060             size: region_size,
3061         });
3062         node.migratable = Some(Arc::clone(&virtio_pmem_device) as Arc<Mutex<dyn Migratable>>);
3063         self.device_tree.lock().unwrap().insert(id.clone(), node);
3064 
3065         Ok(MetaVirtioDevice {
3066             virtio_device: Arc::clone(&virtio_pmem_device)
3067                 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
3068             iommu: pmem_cfg.iommu,
3069             id,
3070             pci_segment: pmem_cfg.pci_segment,
3071             dma_handler: None,
3072         })
3073     }
3074 
3075     fn make_virtio_pmem_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
3076         let mut devices = Vec::new();
3077         // Add virtio-pmem if required
3078         let mut pmem_devices = self.config.lock().unwrap().pmem.clone();
3079         if let Some(pmem_list_cfg) = &mut pmem_devices {
3080             for pmem_cfg in pmem_list_cfg.iter_mut() {
3081                 devices.push(self.make_virtio_pmem_device(pmem_cfg)?);
3082             }
3083         }
3084         self.config.lock().unwrap().pmem = pmem_devices;
3085 
3086         Ok(devices)
3087     }
3088 
3089     fn make_virtio_vsock_device(
3090         &mut self,
3091         vsock_cfg: &mut VsockConfig,
3092     ) -> DeviceManagerResult<MetaVirtioDevice> {
3093         let id = if let Some(id) = &vsock_cfg.id {
3094             id.clone()
3095         } else {
3096             let id = self.next_device_name(VSOCK_DEVICE_NAME_PREFIX)?;
3097             vsock_cfg.id = Some(id.clone());
3098             id
3099         };
3100 
3101         info!("Creating virtio-vsock device: {:?}", vsock_cfg);
3102 
3103         let socket_path = vsock_cfg
3104             .socket
3105             .to_str()
3106             .ok_or(DeviceManagerError::CreateVsockConvertPath)?;
3107         let backend =
3108             virtio_devices::vsock::VsockUnixBackend::new(vsock_cfg.cid, socket_path.to_string())
3109                 .map_err(DeviceManagerError::CreateVsockBackend)?;
3110 
3111         let vsock_device = Arc::new(Mutex::new(
3112             virtio_devices::Vsock::new(
3113                 id.clone(),
3114                 vsock_cfg.cid,
3115                 vsock_cfg.socket.clone(),
3116                 backend,
3117                 self.force_iommu | vsock_cfg.iommu,
3118                 self.seccomp_action.clone(),
3119                 self.exit_evt
3120                     .try_clone()
3121                     .map_err(DeviceManagerError::EventFd)?,
3122                 state_from_id(self.snapshot.as_ref(), id.as_str())
3123                     .map_err(DeviceManagerError::RestoreGetState)?,
3124             )
3125             .map_err(DeviceManagerError::CreateVirtioVsock)?,
3126         ));
3127 
3128         // Fill the device tree with a new node. In case of restore, we
3129         // know there is nothing to do, so we can simply override the
3130         // existing entry.
3131         self.device_tree
3132             .lock()
3133             .unwrap()
3134             .insert(id.clone(), device_node!(id, vsock_device));
3135 
3136         Ok(MetaVirtioDevice {
3137             virtio_device: Arc::clone(&vsock_device)
3138                 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
3139             iommu: vsock_cfg.iommu,
3140             id,
3141             pci_segment: vsock_cfg.pci_segment,
3142             dma_handler: None,
3143         })
3144     }
3145 
3146     fn make_virtio_vsock_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
3147         let mut devices = Vec::new();
3148 
3149         let mut vsock = self.config.lock().unwrap().vsock.clone();
3150         if let Some(ref mut vsock_cfg) = &mut vsock {
3151             devices.push(self.make_virtio_vsock_device(vsock_cfg)?);
3152         }
3153         self.config.lock().unwrap().vsock = vsock;
3154 
3155         Ok(devices)
3156     }
3157 
3158     fn make_virtio_mem_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
3159         let mut devices = Vec::new();
3160 
3161         let mm = self.memory_manager.clone();
3162         let mut mm = mm.lock().unwrap();
3163         for (memory_zone_id, memory_zone) in mm.memory_zones_mut().iter_mut() {
3164             if let Some(virtio_mem_zone) = memory_zone.virtio_mem_zone_mut() {
3165                 info!("Creating virtio-mem device: id = {}", memory_zone_id);
3166 
3167                 let node_id = numa_node_id_from_memory_zone_id(&self.numa_nodes, memory_zone_id)
3168                     .map(|i| i as u16);
3169 
3170                 let virtio_mem_device = Arc::new(Mutex::new(
3171                     virtio_devices::Mem::new(
3172                         memory_zone_id.clone(),
3173                         virtio_mem_zone.region(),
3174                         self.seccomp_action.clone(),
3175                         node_id,
3176                         virtio_mem_zone.hotplugged_size(),
3177                         virtio_mem_zone.hugepages(),
3178                         self.exit_evt
3179                             .try_clone()
3180                             .map_err(DeviceManagerError::EventFd)?,
3181                         virtio_mem_zone.blocks_state().clone(),
3182                         state_from_id(self.snapshot.as_ref(), memory_zone_id.as_str())
3183                             .map_err(DeviceManagerError::RestoreGetState)?,
3184                     )
3185                     .map_err(DeviceManagerError::CreateVirtioMem)?,
3186                 ));
3187 
3188                 // Update the virtio-mem zone so that it has a handle onto the
3189                 // virtio-mem device, which will be used for triggering a resize
3190                 // if needed.
3191                 virtio_mem_zone.set_virtio_device(Arc::clone(&virtio_mem_device));
3192 
3193                 self.virtio_mem_devices.push(Arc::clone(&virtio_mem_device));
3194 
3195                 devices.push(MetaVirtioDevice {
3196                     virtio_device: Arc::clone(&virtio_mem_device)
3197                         as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
3198                     iommu: false,
3199                     id: memory_zone_id.clone(),
3200                     pci_segment: 0,
3201                     dma_handler: None,
3202                 });
3203 
3204                 // Fill the device tree with a new node. In case of restore, we
3205                 // know there is nothing to do, so we can simply override the
3206                 // existing entry.
3207                 self.device_tree.lock().unwrap().insert(
3208                     memory_zone_id.clone(),
3209                     device_node!(memory_zone_id, virtio_mem_device),
3210                 );
3211             }
3212         }
3213 
3214         Ok(devices)
3215     }
3216 
3217     fn make_virtio_balloon_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
3218         let mut devices = Vec::new();
3219 
3220         if let Some(balloon_config) = &self.config.lock().unwrap().balloon {
3221             let id = String::from(BALLOON_DEVICE_NAME);
3222             info!("Creating virtio-balloon device: id = {}", id);
3223 
3224             let virtio_balloon_device = Arc::new(Mutex::new(
3225                 virtio_devices::Balloon::new(
3226                     id.clone(),
3227                     balloon_config.size,
3228                     balloon_config.deflate_on_oom,
3229                     balloon_config.free_page_reporting,
3230                     self.seccomp_action.clone(),
3231                     self.exit_evt
3232                         .try_clone()
3233                         .map_err(DeviceManagerError::EventFd)?,
3234                     state_from_id(self.snapshot.as_ref(), id.as_str())
3235                         .map_err(DeviceManagerError::RestoreGetState)?,
3236                 )
3237                 .map_err(DeviceManagerError::CreateVirtioBalloon)?,
3238             ));
3239 
3240             self.balloon = Some(virtio_balloon_device.clone());
3241 
3242             devices.push(MetaVirtioDevice {
3243                 virtio_device: Arc::clone(&virtio_balloon_device)
3244                     as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
3245                 iommu: false,
3246                 id: id.clone(),
3247                 pci_segment: 0,
3248                 dma_handler: None,
3249             });
3250 
3251             self.device_tree
3252                 .lock()
3253                 .unwrap()
3254                 .insert(id.clone(), device_node!(id, virtio_balloon_device));
3255         }
3256 
3257         Ok(devices)
3258     }
3259 
3260     fn make_virtio_watchdog_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
3261         let mut devices = Vec::new();
3262 
3263         if !self.config.lock().unwrap().watchdog {
3264             return Ok(devices);
3265         }
3266 
3267         let id = String::from(WATCHDOG_DEVICE_NAME);
3268         info!("Creating virtio-watchdog device: id = {}", id);
3269 
3270         let virtio_watchdog_device = Arc::new(Mutex::new(
3271             virtio_devices::Watchdog::new(
3272                 id.clone(),
3273                 self.reset_evt.try_clone().unwrap(),
3274                 self.seccomp_action.clone(),
3275                 self.exit_evt
3276                     .try_clone()
3277                     .map_err(DeviceManagerError::EventFd)?,
3278                 state_from_id(self.snapshot.as_ref(), id.as_str())
3279                     .map_err(DeviceManagerError::RestoreGetState)?,
3280             )
3281             .map_err(DeviceManagerError::CreateVirtioWatchdog)?,
3282         ));
3283         devices.push(MetaVirtioDevice {
3284             virtio_device: Arc::clone(&virtio_watchdog_device)
3285                 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
3286             iommu: false,
3287             id: id.clone(),
3288             pci_segment: 0,
3289             dma_handler: None,
3290         });
3291 
3292         self.device_tree
3293             .lock()
3294             .unwrap()
3295             .insert(id.clone(), device_node!(id, virtio_watchdog_device));
3296 
3297         Ok(devices)
3298     }
3299 
3300     fn make_vdpa_device(
3301         &mut self,
3302         vdpa_cfg: &mut VdpaConfig,
3303     ) -> DeviceManagerResult<MetaVirtioDevice> {
3304         let id = if let Some(id) = &vdpa_cfg.id {
3305             id.clone()
3306         } else {
3307             let id = self.next_device_name(VDPA_DEVICE_NAME_PREFIX)?;
3308             vdpa_cfg.id = Some(id.clone());
3309             id
3310         };
3311 
3312         info!("Creating vDPA device: {:?}", vdpa_cfg);
3313 
3314         let device_path = vdpa_cfg
3315             .path
3316             .to_str()
3317             .ok_or(DeviceManagerError::CreateVdpaConvertPath)?;
3318 
3319         let vdpa_device = Arc::new(Mutex::new(
3320             virtio_devices::Vdpa::new(
3321                 id.clone(),
3322                 device_path,
3323                 self.memory_manager.lock().unwrap().guest_memory(),
3324                 vdpa_cfg.num_queues as u16,
3325                 state_from_id(self.snapshot.as_ref(), id.as_str())
3326                     .map_err(DeviceManagerError::RestoreGetState)?,
3327             )
3328             .map_err(DeviceManagerError::CreateVdpa)?,
3329         ));
3330 
3331         // Create the DMA handler that is required by the vDPA device
3332         let vdpa_mapping = Arc::new(VdpaDmaMapping::new(
3333             Arc::clone(&vdpa_device),
3334             Arc::new(self.memory_manager.lock().unwrap().guest_memory()),
3335         ));
3336 
3337         self.device_tree
3338             .lock()
3339             .unwrap()
3340             .insert(id.clone(), device_node!(id, vdpa_device));
3341 
3342         Ok(MetaVirtioDevice {
3343             virtio_device: vdpa_device as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
3344             iommu: vdpa_cfg.iommu,
3345             id,
3346             pci_segment: vdpa_cfg.pci_segment,
3347             dma_handler: Some(vdpa_mapping),
3348         })
3349     }
3350 
3351     fn make_vdpa_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
3352         let mut devices = Vec::new();
3353         // Add vdpa if required
3354         let mut vdpa_devices = self.config.lock().unwrap().vdpa.clone();
3355         if let Some(vdpa_list_cfg) = &mut vdpa_devices {
3356             for vdpa_cfg in vdpa_list_cfg.iter_mut() {
3357                 devices.push(self.make_vdpa_device(vdpa_cfg)?);
3358             }
3359         }
3360         self.config.lock().unwrap().vdpa = vdpa_devices;
3361 
3362         Ok(devices)
3363     }
3364 
3365     fn next_device_name(&mut self, prefix: &str) -> DeviceManagerResult<String> {
3366         let start_id = self.device_id_cnt;
3367         loop {
3368             // Generate the temporary name.
3369             let name = format!("{}{}", prefix, self.device_id_cnt);
3370             // Increment the counter.
3371             self.device_id_cnt += Wrapping(1);
3372             // Check if the name is already in use.
3373             if !self.boot_id_list.contains(&name)
3374                 && !self.device_tree.lock().unwrap().contains_key(&name)
3375             {
3376                 return Ok(name);
3377             }
3378 
3379             if self.device_id_cnt == start_id {
3380                 // We went through a full loop and there's nothing else we can
3381                 // do.
3382                 break;
3383             }
3384         }
3385         Err(DeviceManagerError::NoAvailableDeviceName)
3386     }
3387 
3388     fn add_passthrough_device(
3389         &mut self,
3390         device_cfg: &mut DeviceConfig,
3391     ) -> DeviceManagerResult<(PciBdf, String)> {
3392         // If the passthrough device has not been created yet, it is created
3393         // here and stored in the DeviceManager structure for future needs.
3394         if self.passthrough_device.is_none() {
3395             self.passthrough_device = Some(
3396                 self.address_manager
3397                     .vm
3398                     .create_passthrough_device()
3399                     .map_err(|e| DeviceManagerError::CreatePassthroughDevice(e.into()))?,
3400             );
3401         }
3402 
3403         self.add_vfio_device(device_cfg)
3404     }
3405 
3406     fn create_vfio_container(&self) -> DeviceManagerResult<Arc<VfioContainer>> {
3407         let passthrough_device = self
3408             .passthrough_device
3409             .as_ref()
3410             .ok_or(DeviceManagerError::NoDevicePassthroughSupport)?;
3411 
3412         let dup = passthrough_device
3413             .try_clone()
3414             .map_err(DeviceManagerError::VfioCreate)?;
3415 
3416         Ok(Arc::new(
3417             VfioContainer::new(Some(Arc::new(dup))).map_err(DeviceManagerError::VfioCreate)?,
3418         ))
3419     }
3420 
3421     fn add_vfio_device(
3422         &mut self,
3423         device_cfg: &mut DeviceConfig,
3424     ) -> DeviceManagerResult<(PciBdf, String)> {
3425         let vfio_name = if let Some(id) = &device_cfg.id {
3426             id.clone()
3427         } else {
3428             let id = self.next_device_name(VFIO_DEVICE_NAME_PREFIX)?;
3429             device_cfg.id = Some(id.clone());
3430             id
3431         };
3432 
3433         let (pci_segment_id, pci_device_bdf, resources) =
3434             self.pci_resources(&vfio_name, device_cfg.pci_segment)?;
3435 
3436         let mut needs_dma_mapping = false;
3437 
3438         // Here we create a new VFIO container for two reasons. Either this is
3439         // the first VFIO device, meaning we need a new VFIO container, which
3440         // will be shared with other VFIO devices. Or the new VFIO device is
3441         // attached to a vIOMMU, meaning we must create a dedicated VFIO
3442         // container. In the vIOMMU use case, we can't let all devices under
3443         // the same VFIO container since we couldn't map/unmap memory for each
3444         // device. That's simply because the map/unmap operations happen at the
3445         // VFIO container level.
3446         let vfio_container = if device_cfg.iommu {
3447             let vfio_container = self.create_vfio_container()?;
3448 
3449             let vfio_mapping = Arc::new(VfioDmaMapping::new(
3450                 Arc::clone(&vfio_container),
3451                 Arc::new(self.memory_manager.lock().unwrap().guest_memory()),
3452                 Arc::clone(&self.mmio_regions),
3453             ));
3454 
3455             if let Some(iommu) = &self.iommu_device {
3456                 iommu
3457                     .lock()
3458                     .unwrap()
3459                     .add_external_mapping(pci_device_bdf.into(), vfio_mapping);
3460             } else {
3461                 return Err(DeviceManagerError::MissingVirtualIommu);
3462             }
3463 
3464             vfio_container
3465         } else if let Some(vfio_container) = &self.vfio_container {
3466             Arc::clone(vfio_container)
3467         } else {
3468             let vfio_container = self.create_vfio_container()?;
3469             needs_dma_mapping = true;
3470             self.vfio_container = Some(Arc::clone(&vfio_container));
3471 
3472             vfio_container
3473         };
3474 
3475         let vfio_device = VfioDevice::new(&device_cfg.path, Arc::clone(&vfio_container))
3476             .map_err(DeviceManagerError::VfioCreate)?;
3477 
3478         if needs_dma_mapping {
3479             // Register DMA mapping in IOMMU.
3480             // Do not register virtio-mem regions, as they are handled directly by
3481             // virtio-mem device itself.
3482             for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() {
3483                 for region in zone.regions() {
3484                     vfio_container
3485                         .vfio_dma_map(
3486                             region.start_addr().raw_value(),
3487                             region.len(),
3488                             region.as_ptr() as u64,
3489                         )
3490                         .map_err(DeviceManagerError::VfioDmaMap)?;
3491                 }
3492             }
3493 
3494             let vfio_mapping = Arc::new(VfioDmaMapping::new(
3495                 Arc::clone(&vfio_container),
3496                 Arc::new(self.memory_manager.lock().unwrap().guest_memory()),
3497                 Arc::clone(&self.mmio_regions),
3498             ));
3499 
3500             for virtio_mem_device in self.virtio_mem_devices.iter() {
3501                 virtio_mem_device
3502                     .lock()
3503                     .unwrap()
3504                     .add_dma_mapping_handler(
3505                         VirtioMemMappingSource::Container,
3506                         vfio_mapping.clone(),
3507                     )
3508                     .map_err(DeviceManagerError::AddDmaMappingHandlerVirtioMem)?;
3509             }
3510         }
3511 
3512         let legacy_interrupt_group =
3513             if let Some(legacy_interrupt_manager) = &self.legacy_interrupt_manager {
3514                 Some(
3515                     legacy_interrupt_manager
3516                         .create_group(LegacyIrqGroupConfig {
3517                             irq: self.pci_segments[pci_segment_id as usize].pci_irq_slots
3518                                 [pci_device_bdf.device() as usize]
3519                                 as InterruptIndex,
3520                         })
3521                         .map_err(DeviceManagerError::CreateInterruptGroup)?,
3522                 )
3523             } else {
3524                 None
3525             };
3526 
3527         let memory_manager = self.memory_manager.clone();
3528 
3529         let vfio_pci_device = VfioPciDevice::new(
3530             vfio_name.clone(),
3531             &self.address_manager.vm,
3532             vfio_device,
3533             vfio_container,
3534             self.msi_interrupt_manager.clone(),
3535             legacy_interrupt_group,
3536             device_cfg.iommu,
3537             pci_device_bdf,
3538             Arc::new(move || memory_manager.lock().unwrap().allocate_memory_slot()),
3539             vm_migration::snapshot_from_id(self.snapshot.as_ref(), vfio_name.as_str()),
3540             device_cfg.x_nv_gpudirect_clique,
3541         )
3542         .map_err(DeviceManagerError::VfioPciCreate)?;
3543 
3544         let vfio_pci_device = Arc::new(Mutex::new(vfio_pci_device));
3545 
3546         let new_resources = self.add_pci_device(
3547             vfio_pci_device.clone(),
3548             vfio_pci_device.clone(),
3549             pci_segment_id,
3550             pci_device_bdf,
3551             resources,
3552         )?;
3553 
3554         vfio_pci_device
3555             .lock()
3556             .unwrap()
3557             .map_mmio_regions()
3558             .map_err(DeviceManagerError::VfioMapRegion)?;
3559 
3560         for mmio_region in vfio_pci_device.lock().unwrap().mmio_regions() {
3561             self.mmio_regions.lock().unwrap().push(mmio_region);
3562         }
3563 
3564         let mut node = device_node!(vfio_name, vfio_pci_device);
3565 
3566         // Update the device tree with correct resource information.
3567         node.resources = new_resources;
3568         node.pci_bdf = Some(pci_device_bdf);
3569         node.pci_device_handle = Some(PciDeviceHandle::Vfio(vfio_pci_device));
3570 
3571         self.device_tree
3572             .lock()
3573             .unwrap()
3574             .insert(vfio_name.clone(), node);
3575 
3576         Ok((pci_device_bdf, vfio_name))
3577     }
3578 
3579     fn add_pci_device(
3580         &mut self,
3581         bus_device: Arc<Mutex<dyn BusDevice>>,
3582         pci_device: Arc<Mutex<dyn PciDevice>>,
3583         segment_id: u16,
3584         bdf: PciBdf,
3585         resources: Option<Vec<Resource>>,
3586     ) -> DeviceManagerResult<Vec<Resource>> {
3587         let bars = pci_device
3588             .lock()
3589             .unwrap()
3590             .allocate_bars(
3591                 &self.address_manager.allocator,
3592                 &mut self.pci_segments[segment_id as usize]
3593                     .mem32_allocator
3594                     .lock()
3595                     .unwrap(),
3596                 &mut self.pci_segments[segment_id as usize]
3597                     .mem64_allocator
3598                     .lock()
3599                     .unwrap(),
3600                 resources,
3601             )
3602             .map_err(DeviceManagerError::AllocateBars)?;
3603 
3604         let mut pci_bus = self.pci_segments[segment_id as usize]
3605             .pci_bus
3606             .lock()
3607             .unwrap();
3608 
3609         pci_bus
3610             .add_device(bdf.device() as u32, pci_device)
3611             .map_err(DeviceManagerError::AddPciDevice)?;
3612 
3613         self.bus_devices.push(Arc::clone(&bus_device));
3614 
3615         pci_bus
3616             .register_mapping(
3617                 bus_device,
3618                 #[cfg(target_arch = "x86_64")]
3619                 self.address_manager.io_bus.as_ref(),
3620                 self.address_manager.mmio_bus.as_ref(),
3621                 bars.clone(),
3622             )
3623             .map_err(DeviceManagerError::AddPciDevice)?;
3624 
3625         let mut new_resources = Vec::new();
3626         for bar in bars {
3627             new_resources.push(Resource::PciBar {
3628                 index: bar.idx(),
3629                 base: bar.addr(),
3630                 size: bar.size(),
3631                 type_: bar.region_type().into(),
3632                 prefetchable: bar.prefetchable().into(),
3633             });
3634         }
3635 
3636         Ok(new_resources)
3637     }
3638 
3639     fn add_vfio_devices(&mut self) -> DeviceManagerResult<Vec<PciBdf>> {
3640         let mut iommu_attached_device_ids = Vec::new();
3641         let mut devices = self.config.lock().unwrap().devices.clone();
3642 
3643         if let Some(device_list_cfg) = &mut devices {
3644             for device_cfg in device_list_cfg.iter_mut() {
3645                 let (device_id, _) = self.add_passthrough_device(device_cfg)?;
3646                 if device_cfg.iommu && self.iommu_device.is_some() {
3647                     iommu_attached_device_ids.push(device_id);
3648                 }
3649             }
3650         }
3651 
3652         // Update the list of devices
3653         self.config.lock().unwrap().devices = devices;
3654 
3655         Ok(iommu_attached_device_ids)
3656     }
3657 
3658     fn add_vfio_user_device(
3659         &mut self,
3660         device_cfg: &mut UserDeviceConfig,
3661     ) -> DeviceManagerResult<(PciBdf, String)> {
3662         let vfio_user_name = if let Some(id) = &device_cfg.id {
3663             id.clone()
3664         } else {
3665             let id = self.next_device_name(VFIO_USER_DEVICE_NAME_PREFIX)?;
3666             device_cfg.id = Some(id.clone());
3667             id
3668         };
3669 
3670         let (pci_segment_id, pci_device_bdf, resources) =
3671             self.pci_resources(&vfio_user_name, device_cfg.pci_segment)?;
3672 
3673         let legacy_interrupt_group =
3674             if let Some(legacy_interrupt_manager) = &self.legacy_interrupt_manager {
3675                 Some(
3676                     legacy_interrupt_manager
3677                         .create_group(LegacyIrqGroupConfig {
3678                             irq: self.pci_segments[pci_segment_id as usize].pci_irq_slots
3679                                 [pci_device_bdf.device() as usize]
3680                                 as InterruptIndex,
3681                         })
3682                         .map_err(DeviceManagerError::CreateInterruptGroup)?,
3683                 )
3684             } else {
3685                 None
3686             };
3687 
3688         let client = Arc::new(Mutex::new(
3689             vfio_user::Client::new(&device_cfg.socket)
3690                 .map_err(DeviceManagerError::VfioUserCreateClient)?,
3691         ));
3692 
3693         let memory_manager = self.memory_manager.clone();
3694 
3695         let mut vfio_user_pci_device = VfioUserPciDevice::new(
3696             vfio_user_name.clone(),
3697             &self.address_manager.vm,
3698             client.clone(),
3699             self.msi_interrupt_manager.clone(),
3700             legacy_interrupt_group,
3701             pci_device_bdf,
3702             Arc::new(move || memory_manager.lock().unwrap().allocate_memory_slot()),
3703             vm_migration::snapshot_from_id(self.snapshot.as_ref(), vfio_user_name.as_str()),
3704         )
3705         .map_err(DeviceManagerError::VfioUserCreate)?;
3706 
3707         let memory = self.memory_manager.lock().unwrap().guest_memory();
3708         let vfio_user_mapping = Arc::new(VfioUserDmaMapping::new(client, Arc::new(memory)));
3709         for virtio_mem_device in self.virtio_mem_devices.iter() {
3710             virtio_mem_device
3711                 .lock()
3712                 .unwrap()
3713                 .add_dma_mapping_handler(
3714                     VirtioMemMappingSource::Device(pci_device_bdf.into()),
3715                     vfio_user_mapping.clone(),
3716                 )
3717                 .map_err(DeviceManagerError::AddDmaMappingHandlerVirtioMem)?;
3718         }
3719 
3720         for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() {
3721             for region in zone.regions() {
3722                 vfio_user_pci_device
3723                     .dma_map(region)
3724                     .map_err(DeviceManagerError::VfioUserDmaMap)?;
3725             }
3726         }
3727 
3728         let vfio_user_pci_device = Arc::new(Mutex::new(vfio_user_pci_device));
3729 
3730         let new_resources = self.add_pci_device(
3731             vfio_user_pci_device.clone(),
3732             vfio_user_pci_device.clone(),
3733             pci_segment_id,
3734             pci_device_bdf,
3735             resources,
3736         )?;
3737 
3738         // Note it is required to call 'add_pci_device()' in advance to have the list of
3739         // mmio regions provisioned correctly
3740         vfio_user_pci_device
3741             .lock()
3742             .unwrap()
3743             .map_mmio_regions()
3744             .map_err(DeviceManagerError::VfioUserMapRegion)?;
3745 
3746         let mut node = device_node!(vfio_user_name, vfio_user_pci_device);
3747 
3748         // Update the device tree with correct resource information.
3749         node.resources = new_resources;
3750         node.pci_bdf = Some(pci_device_bdf);
3751         node.pci_device_handle = Some(PciDeviceHandle::VfioUser(vfio_user_pci_device));
3752 
3753         self.device_tree
3754             .lock()
3755             .unwrap()
3756             .insert(vfio_user_name.clone(), node);
3757 
3758         Ok((pci_device_bdf, vfio_user_name))
3759     }
3760 
3761     fn add_user_devices(&mut self) -> DeviceManagerResult<Vec<PciBdf>> {
3762         let mut user_devices = self.config.lock().unwrap().user_devices.clone();
3763 
3764         if let Some(device_list_cfg) = &mut user_devices {
3765             for device_cfg in device_list_cfg.iter_mut() {
3766                 let (_device_id, _id) = self.add_vfio_user_device(device_cfg)?;
3767             }
3768         }
3769 
3770         // Update the list of devices
3771         self.config.lock().unwrap().user_devices = user_devices;
3772 
3773         Ok(vec![])
3774     }
3775 
3776     fn add_virtio_pci_device(
3777         &mut self,
3778         virtio_device: Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
3779         iommu_mapping: &Option<Arc<IommuMapping>>,
3780         virtio_device_id: String,
3781         pci_segment_id: u16,
3782         dma_handler: Option<Arc<dyn ExternalDmaMapping>>,
3783     ) -> DeviceManagerResult<PciBdf> {
3784         let id = format!("{VIRTIO_PCI_DEVICE_NAME_PREFIX}-{virtio_device_id}");
3785 
3786         // Add the new virtio-pci node to the device tree.
3787         let mut node = device_node!(id);
3788         node.children = vec![virtio_device_id.clone()];
3789 
3790         let (pci_segment_id, pci_device_bdf, resources) =
3791             self.pci_resources(&id, pci_segment_id)?;
3792 
3793         // Update the existing virtio node by setting the parent.
3794         if let Some(node) = self.device_tree.lock().unwrap().get_mut(&virtio_device_id) {
3795             node.parent = Some(id.clone());
3796         } else {
3797             return Err(DeviceManagerError::MissingNode);
3798         }
3799 
3800         // Allows support for one MSI-X vector per queue. It also adds 1
3801         // as we need to take into account the dedicated vector to notify
3802         // about a virtio config change.
3803         let msix_num = (virtio_device.lock().unwrap().queue_max_sizes().len() + 1) as u16;
3804 
3805         // Create the AccessPlatform trait from the implementation IommuMapping.
3806         // This will provide address translation for any virtio device sitting
3807         // behind a vIOMMU.
3808         let access_platform: Option<Arc<dyn AccessPlatform>> = if let Some(mapping) = iommu_mapping
3809         {
3810             Some(Arc::new(AccessPlatformMapping::new(
3811                 pci_device_bdf.into(),
3812                 mapping.clone(),
3813             )))
3814         } else {
3815             None
3816         };
3817 
3818         let memory = self.memory_manager.lock().unwrap().guest_memory();
3819 
3820         // Map DMA ranges if a DMA handler is available and if the device is
3821         // not attached to a virtual IOMMU.
3822         if let Some(dma_handler) = &dma_handler {
3823             if iommu_mapping.is_some() {
3824                 if let Some(iommu) = &self.iommu_device {
3825                     iommu
3826                         .lock()
3827                         .unwrap()
3828                         .add_external_mapping(pci_device_bdf.into(), dma_handler.clone());
3829                 } else {
3830                     return Err(DeviceManagerError::MissingVirtualIommu);
3831                 }
3832             } else {
3833                 // Let every virtio-mem device handle the DMA map/unmap through the
3834                 // DMA handler provided.
3835                 for virtio_mem_device in self.virtio_mem_devices.iter() {
3836                     virtio_mem_device
3837                         .lock()
3838                         .unwrap()
3839                         .add_dma_mapping_handler(
3840                             VirtioMemMappingSource::Device(pci_device_bdf.into()),
3841                             dma_handler.clone(),
3842                         )
3843                         .map_err(DeviceManagerError::AddDmaMappingHandlerVirtioMem)?;
3844                 }
3845 
3846                 // Do not register virtio-mem regions, as they are handled directly by
3847                 // virtio-mem devices.
3848                 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() {
3849                     for region in zone.regions() {
3850                         let gpa = region.start_addr().0;
3851                         let size = region.len();
3852                         dma_handler
3853                             .map(gpa, gpa, size)
3854                             .map_err(DeviceManagerError::VirtioDmaMap)?;
3855                     }
3856                 }
3857             }
3858         }
3859 
3860         let device_type = virtio_device.lock().unwrap().device_type();
3861         let virtio_pci_device = Arc::new(Mutex::new(
3862             VirtioPciDevice::new(
3863                 id.clone(),
3864                 memory,
3865                 virtio_device,
3866                 msix_num,
3867                 access_platform,
3868                 &self.msi_interrupt_manager,
3869                 pci_device_bdf.into(),
3870                 self.activate_evt
3871                     .try_clone()
3872                     .map_err(DeviceManagerError::EventFd)?,
3873                 // All device types *except* virtio block devices should be allocated a 64-bit bar
3874                 // The block devices should be given a 32-bit BAR so that they are easily accessible
3875                 // to firmware without requiring excessive identity mapping.
3876                 // The exception being if not on the default PCI segment.
3877                 pci_segment_id > 0 || device_type != VirtioDeviceType::Block as u32,
3878                 dma_handler,
3879                 self.pending_activations.clone(),
3880                 vm_migration::snapshot_from_id(self.snapshot.as_ref(), id.as_str()),
3881             )
3882             .map_err(DeviceManagerError::VirtioDevice)?,
3883         ));
3884 
3885         let new_resources = self.add_pci_device(
3886             virtio_pci_device.clone(),
3887             virtio_pci_device.clone(),
3888             pci_segment_id,
3889             pci_device_bdf,
3890             resources,
3891         )?;
3892 
3893         let bar_addr = virtio_pci_device.lock().unwrap().config_bar_addr();
3894         for (event, addr) in virtio_pci_device.lock().unwrap().ioeventfds(bar_addr) {
3895             let io_addr = IoEventAddress::Mmio(addr);
3896             self.address_manager
3897                 .vm
3898                 .register_ioevent(event, &io_addr, None)
3899                 .map_err(|e| DeviceManagerError::RegisterIoevent(e.into()))?;
3900         }
3901 
3902         // Update the device tree with correct resource information.
3903         node.resources = new_resources;
3904         node.migratable = Some(Arc::clone(&virtio_pci_device) as Arc<Mutex<dyn Migratable>>);
3905         node.pci_bdf = Some(pci_device_bdf);
3906         node.pci_device_handle = Some(PciDeviceHandle::Virtio(virtio_pci_device));
3907         self.device_tree.lock().unwrap().insert(id, node);
3908 
3909         Ok(pci_device_bdf)
3910     }
3911 
3912     fn add_pvpanic_device(
3913         &mut self,
3914     ) -> DeviceManagerResult<Option<Arc<Mutex<devices::PvPanicDevice>>>> {
3915         let id = String::from(PVPANIC_DEVICE_NAME);
3916         let pci_segment_id = 0x0_u16;
3917 
3918         info!("Creating pvpanic device {}", id);
3919 
3920         let (pci_segment_id, pci_device_bdf, resources) =
3921             self.pci_resources(&id, pci_segment_id)?;
3922 
3923         let snapshot = snapshot_from_id(self.snapshot.as_ref(), id.as_str());
3924 
3925         let pvpanic_device = devices::PvPanicDevice::new(id.clone(), snapshot)
3926             .map_err(DeviceManagerError::PvPanicCreate)?;
3927 
3928         let pvpanic_device = Arc::new(Mutex::new(pvpanic_device));
3929 
3930         let new_resources = self.add_pci_device(
3931             pvpanic_device.clone(),
3932             pvpanic_device.clone(),
3933             pci_segment_id,
3934             pci_device_bdf,
3935             resources,
3936         )?;
3937 
3938         let mut node = device_node!(id, pvpanic_device);
3939 
3940         node.resources = new_resources;
3941         node.pci_bdf = Some(pci_device_bdf);
3942         node.pci_device_handle = None;
3943 
3944         self.device_tree.lock().unwrap().insert(id, node);
3945 
3946         Ok(Some(pvpanic_device))
3947     }
3948 
3949     fn pci_resources(
3950         &self,
3951         id: &str,
3952         pci_segment_id: u16,
3953     ) -> DeviceManagerResult<(u16, PciBdf, Option<Vec<Resource>>)> {
3954         // Look for the id in the device tree. If it can be found, that means
3955         // the device is being restored, otherwise it's created from scratch.
3956         Ok(
3957             if let Some(node) = self.device_tree.lock().unwrap().get(id) {
3958                 info!("Restoring virtio-pci {} resources", id);
3959                 let pci_device_bdf: PciBdf = node
3960                     .pci_bdf
3961                     .ok_or(DeviceManagerError::MissingDeviceNodePciBdf)?;
3962                 let pci_segment_id = pci_device_bdf.segment();
3963 
3964                 self.pci_segments[pci_segment_id as usize]
3965                     .pci_bus
3966                     .lock()
3967                     .unwrap()
3968                     .get_device_id(pci_device_bdf.device() as usize)
3969                     .map_err(DeviceManagerError::GetPciDeviceId)?;
3970 
3971                 (pci_segment_id, pci_device_bdf, Some(node.resources.clone()))
3972             } else {
3973                 let pci_device_bdf =
3974                     self.pci_segments[pci_segment_id as usize].next_device_bdf()?;
3975 
3976                 (pci_segment_id, pci_device_bdf, None)
3977             },
3978         )
3979     }
3980 
3981     #[cfg(target_arch = "x86_64")]
3982     pub fn io_bus(&self) -> &Arc<Bus> {
3983         &self.address_manager.io_bus
3984     }
3985 
3986     pub fn mmio_bus(&self) -> &Arc<Bus> {
3987         &self.address_manager.mmio_bus
3988     }
3989 
3990     pub fn allocator(&self) -> &Arc<Mutex<SystemAllocator>> {
3991         &self.address_manager.allocator
3992     }
3993 
3994     pub fn interrupt_controller(&self) -> Option<Arc<Mutex<dyn InterruptController>>> {
3995         self.interrupt_controller
3996             .as_ref()
3997             .map(|ic| ic.clone() as Arc<Mutex<dyn InterruptController>>)
3998     }
3999 
4000     pub(crate) fn pci_segments(&self) -> &Vec<PciSegment> {
4001         &self.pci_segments
4002     }
4003 
4004     pub fn console(&self) -> &Arc<Console> {
4005         &self.console
4006     }
4007 
4008     #[cfg(target_arch = "aarch64")]
4009     pub fn cmdline_additions(&self) -> &[String] {
4010         self.cmdline_additions.as_slice()
4011     }
4012 
4013     pub fn update_memory(&self, new_region: &Arc<GuestRegionMmap>) -> DeviceManagerResult<()> {
4014         for handle in self.virtio_devices.iter() {
4015             handle
4016                 .virtio_device
4017                 .lock()
4018                 .unwrap()
4019                 .add_memory_region(new_region)
4020                 .map_err(DeviceManagerError::UpdateMemoryForVirtioDevice)?;
4021 
4022             if let Some(dma_handler) = &handle.dma_handler {
4023                 if !handle.iommu {
4024                     let gpa = new_region.start_addr().0;
4025                     let size = new_region.len();
4026                     dma_handler
4027                         .map(gpa, gpa, size)
4028                         .map_err(DeviceManagerError::VirtioDmaMap)?;
4029                 }
4030             }
4031         }
4032 
4033         // Take care of updating the memory for VFIO PCI devices.
4034         if let Some(vfio_container) = &self.vfio_container {
4035             vfio_container
4036                 .vfio_dma_map(
4037                     new_region.start_addr().raw_value(),
4038                     new_region.len(),
4039                     new_region.as_ptr() as u64,
4040                 )
4041                 .map_err(DeviceManagerError::UpdateMemoryForVfioPciDevice)?;
4042         }
4043 
4044         // Take care of updating the memory for vfio-user devices.
4045         {
4046             let device_tree = self.device_tree.lock().unwrap();
4047             for pci_device_node in device_tree.pci_devices() {
4048                 if let PciDeviceHandle::VfioUser(vfio_user_pci_device) = pci_device_node
4049                     .pci_device_handle
4050                     .as_ref()
4051                     .ok_or(DeviceManagerError::MissingPciDevice)?
4052                 {
4053                     vfio_user_pci_device
4054                         .lock()
4055                         .unwrap()
4056                         .dma_map(new_region)
4057                         .map_err(DeviceManagerError::UpdateMemoryForVfioUserPciDevice)?;
4058                 }
4059             }
4060         }
4061 
4062         Ok(())
4063     }
4064 
4065     pub fn activate_virtio_devices(&self) -> DeviceManagerResult<()> {
4066         for mut activator in self.pending_activations.lock().unwrap().drain(..) {
4067             activator
4068                 .activate()
4069                 .map_err(DeviceManagerError::VirtioActivate)?;
4070         }
4071         Ok(())
4072     }
4073 
4074     pub fn notify_hotplug(
4075         &self,
4076         _notification_type: AcpiNotificationFlags,
4077     ) -> DeviceManagerResult<()> {
4078         return self
4079             .ged_notification_device
4080             .as_ref()
4081             .unwrap()
4082             .lock()
4083             .unwrap()
4084             .notify(_notification_type)
4085             .map_err(DeviceManagerError::HotPlugNotification);
4086     }
4087 
4088     pub fn add_device(
4089         &mut self,
4090         device_cfg: &mut DeviceConfig,
4091     ) -> DeviceManagerResult<PciDeviceInfo> {
4092         self.validate_identifier(&device_cfg.id)?;
4093 
4094         if device_cfg.iommu && !self.is_iommu_segment(device_cfg.pci_segment) {
4095             return Err(DeviceManagerError::InvalidIommuHotplug);
4096         }
4097 
4098         let (bdf, device_name) = self.add_passthrough_device(device_cfg)?;
4099 
4100         // Update the PCIU bitmap
4101         self.pci_segments[device_cfg.pci_segment as usize].pci_devices_up |= 1 << bdf.device();
4102 
4103         Ok(PciDeviceInfo {
4104             id: device_name,
4105             bdf,
4106         })
4107     }
4108 
4109     pub fn add_user_device(
4110         &mut self,
4111         device_cfg: &mut UserDeviceConfig,
4112     ) -> DeviceManagerResult<PciDeviceInfo> {
4113         self.validate_identifier(&device_cfg.id)?;
4114 
4115         let (bdf, device_name) = self.add_vfio_user_device(device_cfg)?;
4116 
4117         // Update the PCIU bitmap
4118         self.pci_segments[device_cfg.pci_segment as usize].pci_devices_up |= 1 << bdf.device();
4119 
4120         Ok(PciDeviceInfo {
4121             id: device_name,
4122             bdf,
4123         })
4124     }
4125 
4126     pub fn remove_device(&mut self, id: String) -> DeviceManagerResult<()> {
4127         // The node can be directly a PCI node in case the 'id' refers to a
4128         // VFIO device or a virtio-pci one.
4129         // In case the 'id' refers to a virtio device, we must find the PCI
4130         // node by looking at the parent.
4131         let device_tree = self.device_tree.lock().unwrap();
4132         let node = device_tree
4133             .get(&id)
4134             .ok_or(DeviceManagerError::UnknownDeviceId(id))?;
4135 
4136         let pci_device_node = if node.pci_bdf.is_some() && node.pci_device_handle.is_some() {
4137             node
4138         } else {
4139             let parent = node
4140                 .parent
4141                 .as_ref()
4142                 .ok_or(DeviceManagerError::MissingNode)?;
4143             device_tree
4144                 .get(parent)
4145                 .ok_or(DeviceManagerError::MissingNode)?
4146         };
4147 
4148         let pci_device_bdf: PciBdf = pci_device_node
4149             .pci_bdf
4150             .ok_or(DeviceManagerError::MissingDeviceNodePciBdf)?;
4151         let pci_segment_id = pci_device_bdf.segment();
4152 
4153         let pci_device_handle = pci_device_node
4154             .pci_device_handle
4155             .as_ref()
4156             .ok_or(DeviceManagerError::MissingPciDevice)?;
4157         #[allow(irrefutable_let_patterns)]
4158         if let PciDeviceHandle::Virtio(virtio_pci_device) = pci_device_handle {
4159             let device_type = VirtioDeviceType::from(
4160                 virtio_pci_device
4161                     .lock()
4162                     .unwrap()
4163                     .virtio_device()
4164                     .lock()
4165                     .unwrap()
4166                     .device_type(),
4167             );
4168             match device_type {
4169                 VirtioDeviceType::Net
4170                 | VirtioDeviceType::Block
4171                 | VirtioDeviceType::Pmem
4172                 | VirtioDeviceType::Fs
4173                 | VirtioDeviceType::Vsock => {}
4174                 _ => return Err(DeviceManagerError::RemovalNotAllowed(device_type)),
4175             }
4176         }
4177 
4178         // Update the PCID bitmap
4179         self.pci_segments[pci_segment_id as usize].pci_devices_down |= 1 << pci_device_bdf.device();
4180 
4181         Ok(())
4182     }
4183 
4184     pub fn eject_device(&mut self, pci_segment_id: u16, device_id: u8) -> DeviceManagerResult<()> {
4185         info!(
4186             "Ejecting device_id = {} on segment_id={}",
4187             device_id, pci_segment_id
4188         );
4189 
4190         // Convert the device ID into the corresponding b/d/f.
4191         let pci_device_bdf = PciBdf::new(pci_segment_id, 0, device_id, 0);
4192 
4193         // Give the PCI device ID back to the PCI bus.
4194         self.pci_segments[pci_segment_id as usize]
4195             .pci_bus
4196             .lock()
4197             .unwrap()
4198             .put_device_id(device_id as usize)
4199             .map_err(DeviceManagerError::PutPciDeviceId)?;
4200 
4201         // Remove the device from the device tree along with its children.
4202         let mut device_tree = self.device_tree.lock().unwrap();
4203         let pci_device_node = device_tree
4204             .remove_node_by_pci_bdf(pci_device_bdf)
4205             .ok_or(DeviceManagerError::MissingPciDevice)?;
4206 
4207         // For VFIO and vfio-user the PCI device id is the id.
4208         // For virtio we overwrite it later as we want the id of the
4209         // underlying device.
4210         let mut id = pci_device_node.id;
4211         let pci_device_handle = pci_device_node
4212             .pci_device_handle
4213             .ok_or(DeviceManagerError::MissingPciDevice)?;
4214         if matches!(pci_device_handle, PciDeviceHandle::Virtio(_)) {
4215             // The virtio-pci device has a single child
4216             if !pci_device_node.children.is_empty() {
4217                 assert_eq!(pci_device_node.children.len(), 1);
4218                 let child_id = &pci_device_node.children[0];
4219                 id.clone_from(child_id);
4220             }
4221         }
4222         for child in pci_device_node.children.iter() {
4223             device_tree.remove(child);
4224         }
4225 
4226         let mut iommu_attached = false;
4227         if let Some((_, iommu_attached_devices)) = &self.iommu_attached_devices {
4228             if iommu_attached_devices.contains(&pci_device_bdf) {
4229                 iommu_attached = true;
4230             }
4231         }
4232 
4233         let (pci_device, bus_device, virtio_device, remove_dma_handler) = match pci_device_handle {
4234             // No need to remove any virtio-mem mapping here as the container outlives all devices
4235             PciDeviceHandle::Vfio(vfio_pci_device) => {
4236                 for mmio_region in vfio_pci_device.lock().unwrap().mmio_regions() {
4237                     self.mmio_regions
4238                         .lock()
4239                         .unwrap()
4240                         .retain(|x| x.start != mmio_region.start)
4241                 }
4242 
4243                 (
4244                     Arc::clone(&vfio_pci_device) as Arc<Mutex<dyn PciDevice>>,
4245                     Arc::clone(&vfio_pci_device) as Arc<Mutex<dyn BusDevice>>,
4246                     None as Option<Arc<Mutex<dyn virtio_devices::VirtioDevice>>>,
4247                     false,
4248                 )
4249             }
4250             PciDeviceHandle::Virtio(virtio_pci_device) => {
4251                 let dev = virtio_pci_device.lock().unwrap();
4252                 let bar_addr = dev.config_bar_addr();
4253                 for (event, addr) in dev.ioeventfds(bar_addr) {
4254                     let io_addr = IoEventAddress::Mmio(addr);
4255                     self.address_manager
4256                         .vm
4257                         .unregister_ioevent(event, &io_addr)
4258                         .map_err(|e| DeviceManagerError::UnRegisterIoevent(e.into()))?;
4259                 }
4260 
4261                 if let Some(dma_handler) = dev.dma_handler() {
4262                     if !iommu_attached {
4263                         for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() {
4264                             for region in zone.regions() {
4265                                 let iova = region.start_addr().0;
4266                                 let size = region.len();
4267                                 dma_handler
4268                                     .unmap(iova, size)
4269                                     .map_err(DeviceManagerError::VirtioDmaUnmap)?;
4270                             }
4271                         }
4272                     }
4273                 }
4274 
4275                 (
4276                     Arc::clone(&virtio_pci_device) as Arc<Mutex<dyn PciDevice>>,
4277                     Arc::clone(&virtio_pci_device) as Arc<Mutex<dyn BusDevice>>,
4278                     Some(dev.virtio_device()),
4279                     dev.dma_handler().is_some() && !iommu_attached,
4280                 )
4281             }
4282             PciDeviceHandle::VfioUser(vfio_user_pci_device) => {
4283                 let mut dev = vfio_user_pci_device.lock().unwrap();
4284                 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() {
4285                     for region in zone.regions() {
4286                         dev.dma_unmap(region)
4287                             .map_err(DeviceManagerError::VfioUserDmaUnmap)?;
4288                     }
4289                 }
4290 
4291                 (
4292                     Arc::clone(&vfio_user_pci_device) as Arc<Mutex<dyn PciDevice>>,
4293                     Arc::clone(&vfio_user_pci_device) as Arc<Mutex<dyn BusDevice>>,
4294                     None as Option<Arc<Mutex<dyn virtio_devices::VirtioDevice>>>,
4295                     true,
4296                 )
4297             }
4298         };
4299 
4300         if remove_dma_handler {
4301             for virtio_mem_device in self.virtio_mem_devices.iter() {
4302                 virtio_mem_device
4303                     .lock()
4304                     .unwrap()
4305                     .remove_dma_mapping_handler(VirtioMemMappingSource::Device(
4306                         pci_device_bdf.into(),
4307                     ))
4308                     .map_err(DeviceManagerError::RemoveDmaMappingHandlerVirtioMem)?;
4309             }
4310         }
4311 
4312         // Free the allocated BARs
4313         pci_device
4314             .lock()
4315             .unwrap()
4316             .free_bars(
4317                 &mut self.address_manager.allocator.lock().unwrap(),
4318                 &mut self.pci_segments[pci_segment_id as usize]
4319                     .mem32_allocator
4320                     .lock()
4321                     .unwrap(),
4322                 &mut self.pci_segments[pci_segment_id as usize]
4323                     .mem64_allocator
4324                     .lock()
4325                     .unwrap(),
4326             )
4327             .map_err(DeviceManagerError::FreePciBars)?;
4328 
4329         // Remove the device from the PCI bus
4330         self.pci_segments[pci_segment_id as usize]
4331             .pci_bus
4332             .lock()
4333             .unwrap()
4334             .remove_by_device(&pci_device)
4335             .map_err(DeviceManagerError::RemoveDeviceFromPciBus)?;
4336 
4337         #[cfg(target_arch = "x86_64")]
4338         // Remove the device from the IO bus
4339         self.io_bus()
4340             .remove_by_device(&bus_device)
4341             .map_err(DeviceManagerError::RemoveDeviceFromIoBus)?;
4342 
4343         // Remove the device from the MMIO bus
4344         self.mmio_bus()
4345             .remove_by_device(&bus_device)
4346             .map_err(DeviceManagerError::RemoveDeviceFromMmioBus)?;
4347 
4348         // Remove the device from the list of BusDevice held by the
4349         // DeviceManager.
4350         self.bus_devices
4351             .retain(|dev| !Arc::ptr_eq(dev, &bus_device));
4352 
4353         // Shutdown and remove the underlying virtio-device if present
4354         if let Some(virtio_device) = virtio_device {
4355             for mapping in virtio_device.lock().unwrap().userspace_mappings() {
4356                 self.memory_manager
4357                     .lock()
4358                     .unwrap()
4359                     .remove_userspace_mapping(
4360                         mapping.addr.raw_value(),
4361                         mapping.len,
4362                         mapping.host_addr,
4363                         mapping.mergeable,
4364                         mapping.mem_slot,
4365                     )
4366                     .map_err(DeviceManagerError::MemoryManager)?;
4367             }
4368 
4369             virtio_device.lock().unwrap().shutdown();
4370 
4371             self.virtio_devices
4372                 .retain(|handler| !Arc::ptr_eq(&handler.virtio_device, &virtio_device));
4373         }
4374 
4375         event!(
4376             "vm",
4377             "device-removed",
4378             "id",
4379             &id,
4380             "bdf",
4381             pci_device_bdf.to_string()
4382         );
4383 
4384         // At this point, the device has been removed from all the list and
4385         // buses where it was stored. At the end of this function, after
4386         // any_device, bus_device and pci_device are released, the actual
4387         // device will be dropped.
4388         Ok(())
4389     }
4390 
4391     fn hotplug_virtio_pci_device(
4392         &mut self,
4393         handle: MetaVirtioDevice,
4394     ) -> DeviceManagerResult<PciDeviceInfo> {
4395         // Add the virtio device to the device manager list. This is important
4396         // as the list is used to notify virtio devices about memory updates
4397         // for instance.
4398         self.virtio_devices.push(handle.clone());
4399 
4400         let mapping: Option<Arc<IommuMapping>> = if handle.iommu {
4401             self.iommu_mapping.clone()
4402         } else {
4403             None
4404         };
4405 
4406         let bdf = self.add_virtio_pci_device(
4407             handle.virtio_device,
4408             &mapping,
4409             handle.id.clone(),
4410             handle.pci_segment,
4411             handle.dma_handler,
4412         )?;
4413 
4414         // Update the PCIU bitmap
4415         self.pci_segments[handle.pci_segment as usize].pci_devices_up |= 1 << bdf.device();
4416 
4417         Ok(PciDeviceInfo { id: handle.id, bdf })
4418     }
4419 
4420     fn is_iommu_segment(&self, pci_segment_id: u16) -> bool {
4421         self.config
4422             .lock()
4423             .as_ref()
4424             .unwrap()
4425             .platform
4426             .as_ref()
4427             .map(|pc| {
4428                 pc.iommu_segments
4429                     .as_ref()
4430                     .map(|v| v.contains(&pci_segment_id))
4431                     .unwrap_or_default()
4432             })
4433             .unwrap_or_default()
4434     }
4435 
4436     pub fn add_disk(&mut self, disk_cfg: &mut DiskConfig) -> DeviceManagerResult<PciDeviceInfo> {
4437         self.validate_identifier(&disk_cfg.id)?;
4438 
4439         if disk_cfg.iommu && !self.is_iommu_segment(disk_cfg.pci_segment) {
4440             return Err(DeviceManagerError::InvalidIommuHotplug);
4441         }
4442 
4443         let device = self.make_virtio_block_device(disk_cfg)?;
4444         self.hotplug_virtio_pci_device(device)
4445     }
4446 
4447     pub fn add_fs(&mut self, fs_cfg: &mut FsConfig) -> DeviceManagerResult<PciDeviceInfo> {
4448         self.validate_identifier(&fs_cfg.id)?;
4449 
4450         let device = self.make_virtio_fs_device(fs_cfg)?;
4451         self.hotplug_virtio_pci_device(device)
4452     }
4453 
4454     pub fn add_pmem(&mut self, pmem_cfg: &mut PmemConfig) -> DeviceManagerResult<PciDeviceInfo> {
4455         self.validate_identifier(&pmem_cfg.id)?;
4456 
4457         if pmem_cfg.iommu && !self.is_iommu_segment(pmem_cfg.pci_segment) {
4458             return Err(DeviceManagerError::InvalidIommuHotplug);
4459         }
4460 
4461         let device = self.make_virtio_pmem_device(pmem_cfg)?;
4462         self.hotplug_virtio_pci_device(device)
4463     }
4464 
4465     pub fn add_net(&mut self, net_cfg: &mut NetConfig) -> DeviceManagerResult<PciDeviceInfo> {
4466         self.validate_identifier(&net_cfg.id)?;
4467 
4468         if net_cfg.iommu && !self.is_iommu_segment(net_cfg.pci_segment) {
4469             return Err(DeviceManagerError::InvalidIommuHotplug);
4470         }
4471 
4472         let device = self.make_virtio_net_device(net_cfg)?;
4473         self.hotplug_virtio_pci_device(device)
4474     }
4475 
4476     pub fn add_vdpa(&mut self, vdpa_cfg: &mut VdpaConfig) -> DeviceManagerResult<PciDeviceInfo> {
4477         self.validate_identifier(&vdpa_cfg.id)?;
4478 
4479         if vdpa_cfg.iommu && !self.is_iommu_segment(vdpa_cfg.pci_segment) {
4480             return Err(DeviceManagerError::InvalidIommuHotplug);
4481         }
4482 
4483         let device = self.make_vdpa_device(vdpa_cfg)?;
4484         self.hotplug_virtio_pci_device(device)
4485     }
4486 
4487     pub fn add_vsock(&mut self, vsock_cfg: &mut VsockConfig) -> DeviceManagerResult<PciDeviceInfo> {
4488         self.validate_identifier(&vsock_cfg.id)?;
4489 
4490         if vsock_cfg.iommu && !self.is_iommu_segment(vsock_cfg.pci_segment) {
4491             return Err(DeviceManagerError::InvalidIommuHotplug);
4492         }
4493 
4494         let device = self.make_virtio_vsock_device(vsock_cfg)?;
4495         self.hotplug_virtio_pci_device(device)
4496     }
4497 
4498     pub fn counters(&self) -> HashMap<String, HashMap<&'static str, Wrapping<u64>>> {
4499         let mut counters = HashMap::new();
4500 
4501         for handle in &self.virtio_devices {
4502             let virtio_device = handle.virtio_device.lock().unwrap();
4503             if let Some(device_counters) = virtio_device.counters() {
4504                 counters.insert(handle.id.clone(), device_counters.clone());
4505             }
4506         }
4507 
4508         counters
4509     }
4510 
4511     pub fn resize_balloon(&mut self, size: u64) -> DeviceManagerResult<()> {
4512         if let Some(balloon) = &self.balloon {
4513             return balloon
4514                 .lock()
4515                 .unwrap()
4516                 .resize(size)
4517                 .map_err(DeviceManagerError::VirtioBalloonResize);
4518         }
4519 
4520         warn!("No balloon setup: Can't resize the balloon");
4521         Err(DeviceManagerError::MissingVirtioBalloon)
4522     }
4523 
4524     pub fn balloon_size(&self) -> u64 {
4525         if let Some(balloon) = &self.balloon {
4526             return balloon.lock().unwrap().get_actual();
4527         }
4528 
4529         0
4530     }
4531 
4532     pub fn device_tree(&self) -> Arc<Mutex<DeviceTree>> {
4533         self.device_tree.clone()
4534     }
4535 
4536     #[cfg(target_arch = "x86_64")]
4537     pub fn notify_power_button(&self) -> DeviceManagerResult<()> {
4538         self.ged_notification_device
4539             .as_ref()
4540             .unwrap()
4541             .lock()
4542             .unwrap()
4543             .notify(AcpiNotificationFlags::POWER_BUTTON_CHANGED)
4544             .map_err(DeviceManagerError::PowerButtonNotification)
4545     }
4546 
4547     #[cfg(target_arch = "aarch64")]
4548     pub fn notify_power_button(&self) -> DeviceManagerResult<()> {
4549         // There are two use cases:
4550         // 1. Users will use direct kernel boot with device tree.
4551         // 2. Users will use ACPI+UEFI boot.
4552 
4553         // Trigger a GPIO pin 3 event to satisfy use case 1.
4554         self.gpio_device
4555             .as_ref()
4556             .unwrap()
4557             .lock()
4558             .unwrap()
4559             .trigger_key(3)
4560             .map_err(DeviceManagerError::AArch64PowerButtonNotification)?;
4561         // Trigger a GED power button event to satisfy use case 2.
4562         return self
4563             .ged_notification_device
4564             .as_ref()
4565             .unwrap()
4566             .lock()
4567             .unwrap()
4568             .notify(AcpiNotificationFlags::POWER_BUTTON_CHANGED)
4569             .map_err(DeviceManagerError::PowerButtonNotification);
4570     }
4571 
4572     pub fn iommu_attached_devices(&self) -> &Option<(PciBdf, Vec<PciBdf>)> {
4573         &self.iommu_attached_devices
4574     }
4575 
4576     fn validate_identifier(&self, id: &Option<String>) -> DeviceManagerResult<()> {
4577         if let Some(id) = id {
4578             if id.starts_with("__") {
4579                 return Err(DeviceManagerError::InvalidIdentifier(id.clone()));
4580             }
4581 
4582             if self.device_tree.lock().unwrap().contains_key(id) {
4583                 return Err(DeviceManagerError::IdentifierNotUnique(id.clone()));
4584             }
4585         }
4586 
4587         Ok(())
4588     }
4589 
4590     pub(crate) fn acpi_platform_addresses(&self) -> &AcpiPlatformAddresses {
4591         &self.acpi_platform_addresses
4592     }
4593 }
4594 
4595 fn numa_node_id_from_memory_zone_id(numa_nodes: &NumaNodes, memory_zone_id: &str) -> Option<u32> {
4596     for (numa_node_id, numa_node) in numa_nodes.iter() {
4597         if numa_node.memory_zones.contains(&memory_zone_id.to_owned()) {
4598             return Some(*numa_node_id);
4599         }
4600     }
4601 
4602     None
4603 }
4604 
4605 fn numa_node_id_from_pci_segment_id(numa_nodes: &NumaNodes, pci_segment_id: u16) -> u32 {
4606     for (numa_node_id, numa_node) in numa_nodes.iter() {
4607         if numa_node.pci_segments.contains(&pci_segment_id) {
4608             return *numa_node_id;
4609         }
4610     }
4611 
4612     0
4613 }
4614 
4615 struct TpmDevice {}
4616 
4617 impl Aml for TpmDevice {
4618     fn to_aml_bytes(&self, sink: &mut dyn acpi_tables::AmlSink) {
4619         aml::Device::new(
4620             "TPM2".into(),
4621             vec![
4622                 &aml::Name::new("_HID".into(), &"MSFT0101"),
4623                 &aml::Name::new("_STA".into(), &(0xF_usize)),
4624                 &aml::Name::new(
4625                     "_CRS".into(),
4626                     &aml::ResourceTemplate::new(vec![&aml::Memory32Fixed::new(
4627                         true,
4628                         layout::TPM_START.0 as u32,
4629                         layout::TPM_SIZE as u32,
4630                     )]),
4631                 ),
4632             ],
4633         )
4634         .to_aml_bytes(sink)
4635     }
4636 }
4637 
4638 impl Aml for DeviceManager {
4639     fn to_aml_bytes(&self, sink: &mut dyn acpi_tables::AmlSink) {
4640         #[cfg(target_arch = "aarch64")]
4641         use arch::aarch64::DeviceInfoForFdt;
4642 
4643         let mut pci_scan_methods = Vec::new();
4644         for i in 0..self.pci_segments.len() {
4645             pci_scan_methods.push(aml::MethodCall::new(
4646                 format!("\\_SB_.PC{i:02X}.PCNT").as_str().into(),
4647                 vec![],
4648             ));
4649         }
4650         let mut pci_scan_inner: Vec<&dyn Aml> = Vec::new();
4651         for method in &pci_scan_methods {
4652             pci_scan_inner.push(method)
4653         }
4654 
4655         // PCI hotplug controller
4656         aml::Device::new(
4657             "_SB_.PHPR".into(),
4658             vec![
4659                 &aml::Name::new("_HID".into(), &aml::EISAName::new("PNP0A06")),
4660                 &aml::Name::new("_STA".into(), &0x0bu8),
4661                 &aml::Name::new("_UID".into(), &"PCI Hotplug Controller"),
4662                 &aml::Mutex::new("BLCK".into(), 0),
4663                 &aml::Name::new(
4664                     "_CRS".into(),
4665                     &aml::ResourceTemplate::new(vec![&aml::AddressSpace::new_memory(
4666                         aml::AddressSpaceCacheable::NotCacheable,
4667                         true,
4668                         self.acpi_address.0,
4669                         self.acpi_address.0 + DEVICE_MANAGER_ACPI_SIZE as u64 - 1,
4670                         None,
4671                     )]),
4672                 ),
4673                 // OpRegion and Fields map MMIO range into individual field values
4674                 &aml::OpRegion::new(
4675                     "PCST".into(),
4676                     aml::OpRegionSpace::SystemMemory,
4677                     &(self.acpi_address.0 as usize),
4678                     &DEVICE_MANAGER_ACPI_SIZE,
4679                 ),
4680                 &aml::Field::new(
4681                     "PCST".into(),
4682                     aml::FieldAccessType::DWord,
4683                     aml::FieldLockRule::NoLock,
4684                     aml::FieldUpdateRule::WriteAsZeroes,
4685                     vec![
4686                         aml::FieldEntry::Named(*b"PCIU", 32),
4687                         aml::FieldEntry::Named(*b"PCID", 32),
4688                         aml::FieldEntry::Named(*b"B0EJ", 32),
4689                         aml::FieldEntry::Named(*b"PSEG", 32),
4690                     ],
4691                 ),
4692                 &aml::Method::new(
4693                     "PCEJ".into(),
4694                     2,
4695                     true,
4696                     vec![
4697                         // Take lock defined above
4698                         &aml::Acquire::new("BLCK".into(), 0xffff),
4699                         // Choose the current segment
4700                         &aml::Store::new(&aml::Path::new("PSEG"), &aml::Arg(1)),
4701                         // Write PCI bus number (in first argument) to I/O port via field
4702                         &aml::ShiftLeft::new(&aml::Path::new("B0EJ"), &aml::ONE, &aml::Arg(0)),
4703                         // Release lock
4704                         &aml::Release::new("BLCK".into()),
4705                         // Return 0
4706                         &aml::Return::new(&aml::ZERO),
4707                     ],
4708                 ),
4709                 &aml::Method::new("PSCN".into(), 0, true, pci_scan_inner),
4710             ],
4711         )
4712         .to_aml_bytes(sink);
4713 
4714         for segment in &self.pci_segments {
4715             segment.to_aml_bytes(sink);
4716         }
4717 
4718         let mut mbrd_memory = Vec::new();
4719 
4720         for segment in &self.pci_segments {
4721             mbrd_memory.push(aml::Memory32Fixed::new(
4722                 true,
4723                 segment.mmio_config_address as u32,
4724                 layout::PCI_MMIO_CONFIG_SIZE_PER_SEGMENT as u32,
4725             ))
4726         }
4727 
4728         let mut mbrd_memory_refs = Vec::new();
4729         for mbrd_memory_ref in &mbrd_memory {
4730             mbrd_memory_refs.push(mbrd_memory_ref as &dyn Aml);
4731         }
4732 
4733         aml::Device::new(
4734             "_SB_.MBRD".into(),
4735             vec![
4736                 &aml::Name::new("_HID".into(), &aml::EISAName::new("PNP0C02")),
4737                 &aml::Name::new("_UID".into(), &aml::ZERO),
4738                 &aml::Name::new("_CRS".into(), &aml::ResourceTemplate::new(mbrd_memory_refs)),
4739             ],
4740         )
4741         .to_aml_bytes(sink);
4742 
4743         // Serial device
4744         #[cfg(target_arch = "x86_64")]
4745         let serial_irq = 4;
4746         #[cfg(target_arch = "aarch64")]
4747         let serial_irq =
4748             if self.config.lock().unwrap().serial.clone().mode != ConsoleOutputMode::Off {
4749                 self.get_device_info()
4750                     .clone()
4751                     .get(&(DeviceType::Serial, DeviceType::Serial.to_string()))
4752                     .unwrap()
4753                     .irq()
4754             } else {
4755                 // If serial is turned off, add a fake device with invalid irq.
4756                 31
4757             };
4758         if self.config.lock().unwrap().serial.mode != ConsoleOutputMode::Off {
4759             aml::Device::new(
4760                 "_SB_.COM1".into(),
4761                 vec![
4762                     &aml::Name::new(
4763                         "_HID".into(),
4764                         #[cfg(target_arch = "x86_64")]
4765                         &aml::EISAName::new("PNP0501"),
4766                         #[cfg(target_arch = "aarch64")]
4767                         &"ARMH0011",
4768                     ),
4769                     &aml::Name::new("_UID".into(), &aml::ZERO),
4770                     &aml::Name::new("_DDN".into(), &"COM1"),
4771                     &aml::Name::new(
4772                         "_CRS".into(),
4773                         &aml::ResourceTemplate::new(vec![
4774                             &aml::Interrupt::new(true, true, false, false, serial_irq),
4775                             #[cfg(target_arch = "x86_64")]
4776                             &aml::IO::new(0x3f8, 0x3f8, 0, 0x8),
4777                             #[cfg(target_arch = "aarch64")]
4778                             &aml::Memory32Fixed::new(
4779                                 true,
4780                                 arch::layout::LEGACY_SERIAL_MAPPED_IO_START.raw_value() as u32,
4781                                 MMIO_LEN as u32,
4782                             ),
4783                         ]),
4784                     ),
4785                 ],
4786             )
4787             .to_aml_bytes(sink);
4788         }
4789 
4790         aml::Name::new("_S5_".into(), &aml::Package::new(vec![&5u8])).to_aml_bytes(sink);
4791 
4792         aml::Device::new(
4793             "_SB_.PWRB".into(),
4794             vec![
4795                 &aml::Name::new("_HID".into(), &aml::EISAName::new("PNP0C0C")),
4796                 &aml::Name::new("_UID".into(), &aml::ZERO),
4797             ],
4798         )
4799         .to_aml_bytes(sink);
4800 
4801         if self.config.lock().unwrap().tpm.is_some() {
4802             // Add tpm device
4803             TpmDevice {}.to_aml_bytes(sink);
4804         }
4805 
4806         self.ged_notification_device
4807             .as_ref()
4808             .unwrap()
4809             .lock()
4810             .unwrap()
4811             .to_aml_bytes(sink)
4812     }
4813 }
4814 
4815 impl Pausable for DeviceManager {
4816     fn pause(&mut self) -> result::Result<(), MigratableError> {
4817         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
4818             if let Some(migratable) = &device_node.migratable {
4819                 migratable.lock().unwrap().pause()?;
4820             }
4821         }
4822         // On AArch64, the pause of device manager needs to trigger
4823         // a "pause" of GIC, which will flush the GIC pending tables
4824         // and ITS tables to guest RAM.
4825         #[cfg(target_arch = "aarch64")]
4826         {
4827             self.get_interrupt_controller()
4828                 .unwrap()
4829                 .lock()
4830                 .unwrap()
4831                 .pause()?;
4832         };
4833 
4834         Ok(())
4835     }
4836 
4837     fn resume(&mut self) -> result::Result<(), MigratableError> {
4838         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
4839             if let Some(migratable) = &device_node.migratable {
4840                 migratable.lock().unwrap().resume()?;
4841             }
4842         }
4843 
4844         Ok(())
4845     }
4846 }
4847 
4848 impl Snapshottable for DeviceManager {
4849     fn id(&self) -> String {
4850         DEVICE_MANAGER_SNAPSHOT_ID.to_string()
4851     }
4852 
4853     fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> {
4854         let mut snapshot = Snapshot::from_data(SnapshotData::new_from_state(&self.state())?);
4855 
4856         // We aggregate all devices snapshots.
4857         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
4858             if let Some(migratable) = &device_node.migratable {
4859                 let mut migratable = migratable.lock().unwrap();
4860                 snapshot.add_snapshot(migratable.id(), migratable.snapshot()?);
4861             }
4862         }
4863 
4864         Ok(snapshot)
4865     }
4866 }
4867 
4868 impl Transportable for DeviceManager {}
4869 
4870 impl Migratable for DeviceManager {
4871     fn start_dirty_log(&mut self) -> std::result::Result<(), MigratableError> {
4872         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
4873             if let Some(migratable) = &device_node.migratable {
4874                 migratable.lock().unwrap().start_dirty_log()?;
4875             }
4876         }
4877         Ok(())
4878     }
4879 
4880     fn stop_dirty_log(&mut self) -> std::result::Result<(), MigratableError> {
4881         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
4882             if let Some(migratable) = &device_node.migratable {
4883                 migratable.lock().unwrap().stop_dirty_log()?;
4884             }
4885         }
4886         Ok(())
4887     }
4888 
4889     fn dirty_log(&mut self) -> std::result::Result<MemoryRangeTable, MigratableError> {
4890         let mut tables = Vec::new();
4891         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
4892             if let Some(migratable) = &device_node.migratable {
4893                 tables.push(migratable.lock().unwrap().dirty_log()?);
4894             }
4895         }
4896         Ok(MemoryRangeTable::new_from_tables(tables))
4897     }
4898 
4899     fn start_migration(&mut self) -> std::result::Result<(), MigratableError> {
4900         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
4901             if let Some(migratable) = &device_node.migratable {
4902                 migratable.lock().unwrap().start_migration()?;
4903             }
4904         }
4905         Ok(())
4906     }
4907 
4908     fn complete_migration(&mut self) -> std::result::Result<(), MigratableError> {
4909         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
4910             if let Some(migratable) = &device_node.migratable {
4911                 migratable.lock().unwrap().complete_migration()?;
4912             }
4913         }
4914         Ok(())
4915     }
4916 }
4917 
4918 const PCIU_FIELD_OFFSET: u64 = 0;
4919 const PCID_FIELD_OFFSET: u64 = 4;
4920 const B0EJ_FIELD_OFFSET: u64 = 8;
4921 const PSEG_FIELD_OFFSET: u64 = 12;
4922 const PCIU_FIELD_SIZE: usize = 4;
4923 const PCID_FIELD_SIZE: usize = 4;
4924 const B0EJ_FIELD_SIZE: usize = 4;
4925 const PSEG_FIELD_SIZE: usize = 4;
4926 
4927 impl BusDevice for DeviceManager {
4928     fn read(&mut self, base: u64, offset: u64, data: &mut [u8]) {
4929         match offset {
4930             PCIU_FIELD_OFFSET => {
4931                 assert!(data.len() == PCIU_FIELD_SIZE);
4932                 data.copy_from_slice(
4933                     &self.pci_segments[self.selected_segment]
4934                         .pci_devices_up
4935                         .to_le_bytes(),
4936                 );
4937                 // Clear the PCIU bitmap
4938                 self.pci_segments[self.selected_segment].pci_devices_up = 0;
4939             }
4940             PCID_FIELD_OFFSET => {
4941                 assert!(data.len() == PCID_FIELD_SIZE);
4942                 data.copy_from_slice(
4943                     &self.pci_segments[self.selected_segment]
4944                         .pci_devices_down
4945                         .to_le_bytes(),
4946                 );
4947                 // Clear the PCID bitmap
4948                 self.pci_segments[self.selected_segment].pci_devices_down = 0;
4949             }
4950             B0EJ_FIELD_OFFSET => {
4951                 assert!(data.len() == B0EJ_FIELD_SIZE);
4952                 // Always return an empty bitmap since the eject is always
4953                 // taken care of right away during a write access.
4954                 data.fill(0);
4955             }
4956             PSEG_FIELD_OFFSET => {
4957                 assert_eq!(data.len(), PSEG_FIELD_SIZE);
4958                 data.copy_from_slice(&(self.selected_segment as u32).to_le_bytes());
4959             }
4960             _ => error!(
4961                 "Accessing unknown location at base 0x{:x}, offset 0x{:x}",
4962                 base, offset
4963             ),
4964         }
4965 
4966         debug!(
4967             "PCI_HP_REG_R: base 0x{:x}, offset 0x{:x}, data {:?}",
4968             base, offset, data
4969         )
4970     }
4971 
4972     fn write(&mut self, base: u64, offset: u64, data: &[u8]) -> Option<Arc<std::sync::Barrier>> {
4973         match offset {
4974             B0EJ_FIELD_OFFSET => {
4975                 assert!(data.len() == B0EJ_FIELD_SIZE);
4976                 let mut data_array: [u8; 4] = [0, 0, 0, 0];
4977                 data_array.copy_from_slice(data);
4978                 let mut slot_bitmap = u32::from_le_bytes(data_array);
4979 
4980                 while slot_bitmap > 0 {
4981                     let slot_id = slot_bitmap.trailing_zeros();
4982                     if let Err(e) = self.eject_device(self.selected_segment as u16, slot_id as u8) {
4983                         error!("Failed ejecting device {}: {:?}", slot_id, e);
4984                     }
4985                     slot_bitmap &= !(1 << slot_id);
4986                 }
4987             }
4988             PSEG_FIELD_OFFSET => {
4989                 assert_eq!(data.len(), PSEG_FIELD_SIZE);
4990                 let mut data_array: [u8; 4] = [0, 0, 0, 0];
4991                 data_array.copy_from_slice(data);
4992                 let selected_segment = u32::from_le_bytes(data_array) as usize;
4993                 if selected_segment >= self.pci_segments.len() {
4994                     error!(
4995                         "Segment selection out of range: {} >= {}",
4996                         selected_segment,
4997                         self.pci_segments.len()
4998                     );
4999                     return None;
5000                 }
5001                 self.selected_segment = selected_segment;
5002             }
5003             _ => error!(
5004                 "Accessing unknown location at base 0x{:x}, offset 0x{:x}",
5005                 base, offset
5006             ),
5007         }
5008 
5009         debug!(
5010             "PCI_HP_REG_W: base 0x{:x}, offset 0x{:x}, data {:?}",
5011             base, offset, data
5012         );
5013 
5014         None
5015     }
5016 }
5017 
5018 impl Drop for DeviceManager {
5019     fn drop(&mut self) {
5020         // Wake up the DeviceManager threads (mainly virtio device workers),
5021         // to avoid deadlock on waiting for paused/parked worker threads.
5022         if let Err(e) = self.resume() {
5023             error!("Error resuming DeviceManager: {:?}", e);
5024         }
5025 
5026         for handle in self.virtio_devices.drain(..) {
5027             handle.virtio_device.lock().unwrap().shutdown();
5028         }
5029 
5030         if let Some(termios) = *self.original_termios_opt.lock().unwrap() {
5031             // SAFETY: FFI call
5032             let _ = unsafe { tcsetattr(stdout().lock().as_raw_fd(), TCSANOW, &termios) };
5033         }
5034     }
5035 }
5036 
5037 #[cfg(test)]
5038 mod tests {
5039     use super::*;
5040 
5041     #[test]
5042     fn test_create_mmio_allocators() {
5043         let res = create_mmio_allocators(0x100000, 0x400000, 1, vec![1], 4 << 10);
5044         assert_eq!(res.len(), 1);
5045         assert_eq!(
5046             res[0].lock().unwrap().base(),
5047             vm_memory::GuestAddress(0x100000)
5048         );
5049         assert_eq!(
5050             res[0].lock().unwrap().end(),
5051             vm_memory::GuestAddress(0x3fffff)
5052         );
5053 
5054         let res = create_mmio_allocators(0x100000, 0x400000, 2, vec![1, 1], 4 << 10);
5055         assert_eq!(res.len(), 2);
5056         assert_eq!(
5057             res[0].lock().unwrap().base(),
5058             vm_memory::GuestAddress(0x100000)
5059         );
5060         assert_eq!(
5061             res[0].lock().unwrap().end(),
5062             vm_memory::GuestAddress(0x27ffff)
5063         );
5064         assert_eq!(
5065             res[1].lock().unwrap().base(),
5066             vm_memory::GuestAddress(0x280000)
5067         );
5068         assert_eq!(
5069             res[1].lock().unwrap().end(),
5070             vm_memory::GuestAddress(0x3fffff)
5071         );
5072 
5073         let res = create_mmio_allocators(0x100000, 0x400000, 2, vec![2, 1], 4 << 10);
5074         assert_eq!(res.len(), 2);
5075         assert_eq!(
5076             res[0].lock().unwrap().base(),
5077             vm_memory::GuestAddress(0x100000)
5078         );
5079         assert_eq!(
5080             res[0].lock().unwrap().end(),
5081             vm_memory::GuestAddress(0x2fffff)
5082         );
5083         assert_eq!(
5084             res[1].lock().unwrap().base(),
5085             vm_memory::GuestAddress(0x300000)
5086         );
5087         assert_eq!(
5088             res[1].lock().unwrap().end(),
5089             vm_memory::GuestAddress(0x3fffff)
5090         );
5091     }
5092 }
5093