xref: /cloud-hypervisor/vmm/src/device_manager.rs (revision 274f1aa2e738d579ffff9d4cfd7ed7c45293af31)
1 // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
2 //
3 // Portions Copyright 2017 The Chromium OS Authors. All rights reserved.
4 // Use of this source code is governed by a BSD-style license that can be
5 // found in the LICENSE-BSD-3-Clause file.
6 //
7 // Copyright © 2019 Intel Corporation
8 //
9 // SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause
10 //
11 
12 use crate::config::{
13     ConsoleOutputMode, DeviceConfig, DiskConfig, FsConfig, NetConfig, PmemConfig, UserDeviceConfig,
14     VdpaConfig, VhostMode, VmConfig, VsockConfig,
15 };
16 use crate::cpu::{CpuManager, CPU_MANAGER_ACPI_SIZE};
17 use crate::device_tree::{DeviceNode, DeviceTree};
18 use crate::interrupt::LegacyUserspaceInterruptManager;
19 use crate::interrupt::MsiInterruptManager;
20 use crate::memory_manager::{Error as MemoryManagerError, MemoryManager, MEMORY_MANAGER_ACPI_SIZE};
21 use crate::pci_segment::PciSegment;
22 use crate::seccomp_filters::{get_seccomp_filter, Thread};
23 use crate::serial_manager::{Error as SerialManagerError, SerialManager};
24 use crate::sigwinch_listener::start_sigwinch_listener;
25 use crate::GuestRegionMmap;
26 use crate::PciDeviceInfo;
27 use crate::{device_node, DEVICE_MANAGER_SNAPSHOT_ID};
28 use acpi_tables::sdt::GenericAddress;
29 use acpi_tables::{aml, Aml};
30 use anyhow::anyhow;
31 use arch::layout;
32 #[cfg(target_arch = "x86_64")]
33 use arch::layout::{APIC_START, IOAPIC_SIZE, IOAPIC_START};
34 use arch::NumaNodes;
35 #[cfg(target_arch = "aarch64")]
36 use arch::{DeviceType, MmioDeviceInfo};
37 use block::{
38     async_io::DiskFile, block_io_uring_is_supported, detect_image_type,
39     fixed_vhd_sync::FixedVhdDiskSync, qcow, qcow_sync::QcowDiskSync, raw_sync::RawFileDiskSync,
40     vhdx, vhdx_sync::VhdxDiskSync, ImageType,
41 };
42 #[cfg(feature = "io_uring")]
43 use block::{fixed_vhd_async::FixedVhdDiskAsync, raw_async::RawFileDisk};
44 #[cfg(target_arch = "aarch64")]
45 use devices::gic;
46 #[cfg(target_arch = "x86_64")]
47 use devices::ioapic;
48 #[cfg(target_arch = "aarch64")]
49 use devices::legacy::Pl011;
50 #[cfg(target_arch = "x86_64")]
51 use devices::legacy::Serial;
52 use devices::{
53     interrupt_controller, interrupt_controller::InterruptController, AcpiNotificationFlags,
54 };
55 use hypervisor::{HypervisorType, IoEventAddress};
56 use libc::{
57     cfmakeraw, isatty, tcgetattr, tcsetattr, termios, MAP_NORESERVE, MAP_PRIVATE, MAP_SHARED,
58     O_TMPFILE, PROT_READ, PROT_WRITE, TCSANOW,
59 };
60 use pci::{
61     DeviceRelocation, PciBarRegionType, PciBdf, PciDevice, VfioPciDevice, VfioUserDmaMapping,
62     VfioUserPciDevice, VfioUserPciDeviceError,
63 };
64 use seccompiler::SeccompAction;
65 use serde::{Deserialize, Serialize};
66 use std::collections::{BTreeSet, HashMap};
67 use std::fs::{read_link, File, OpenOptions};
68 use std::io::{self, stdout, Seek, SeekFrom};
69 use std::mem::zeroed;
70 use std::num::Wrapping;
71 use std::os::unix::fs::OpenOptionsExt;
72 use std::os::unix::io::{AsRawFd, FromRawFd, RawFd};
73 use std::path::PathBuf;
74 use std::result;
75 use std::sync::{Arc, Mutex};
76 use std::time::Instant;
77 use tracer::trace_scoped;
78 use vfio_ioctls::{VfioContainer, VfioDevice, VfioDeviceFd};
79 use virtio_devices::transport::VirtioTransport;
80 use virtio_devices::transport::{VirtioPciDevice, VirtioPciDeviceActivator};
81 use virtio_devices::vhost_user::VhostUserConfig;
82 use virtio_devices::{
83     AccessPlatformMapping, ActivateError, VdpaDmaMapping, VirtioMemMappingSource,
84 };
85 use virtio_devices::{Endpoint, IommuMapping};
86 use vm_allocator::{AddressAllocator, SystemAllocator};
87 use vm_device::dma_mapping::vfio::VfioDmaMapping;
88 use vm_device::dma_mapping::ExternalDmaMapping;
89 use vm_device::interrupt::{
90     InterruptIndex, InterruptManager, LegacyIrqGroupConfig, MsiIrqGroupConfig,
91 };
92 use vm_device::{Bus, BusDevice, Resource};
93 use vm_memory::guest_memory::FileOffset;
94 use vm_memory::GuestMemoryRegion;
95 use vm_memory::{Address, GuestAddress, GuestUsize, MmapRegion};
96 #[cfg(target_arch = "x86_64")]
97 use vm_memory::{GuestAddressSpace, GuestMemory};
98 use vm_migration::{
99     protocol::MemoryRangeTable, snapshot_from_id, versioned_state_from_id, Migratable,
100     MigratableError, Pausable, Snapshot, SnapshotData, Snapshottable, Transportable,
101 };
102 use vm_virtio::AccessPlatform;
103 use vm_virtio::VirtioDeviceType;
104 use vmm_sys_util::eventfd::EventFd;
105 
106 #[cfg(target_arch = "aarch64")]
107 const MMIO_LEN: u64 = 0x1000;
108 
109 // Singleton devices / devices the user cannot name
110 #[cfg(target_arch = "x86_64")]
111 const IOAPIC_DEVICE_NAME: &str = "__ioapic";
112 const SERIAL_DEVICE_NAME: &str = "__serial";
113 #[cfg(target_arch = "aarch64")]
114 const GPIO_DEVICE_NAME: &str = "__gpio";
115 const RNG_DEVICE_NAME: &str = "__rng";
116 const IOMMU_DEVICE_NAME: &str = "__iommu";
117 const BALLOON_DEVICE_NAME: &str = "__balloon";
118 const CONSOLE_DEVICE_NAME: &str = "__console";
119 const PVPANIC_DEVICE_NAME: &str = "__pvpanic";
120 
121 // Devices that the user may name and for which we generate
122 // identifiers if the user doesn't give one
123 const DISK_DEVICE_NAME_PREFIX: &str = "_disk";
124 const FS_DEVICE_NAME_PREFIX: &str = "_fs";
125 const NET_DEVICE_NAME_PREFIX: &str = "_net";
126 const PMEM_DEVICE_NAME_PREFIX: &str = "_pmem";
127 const VDPA_DEVICE_NAME_PREFIX: &str = "_vdpa";
128 const VSOCK_DEVICE_NAME_PREFIX: &str = "_vsock";
129 const WATCHDOG_DEVICE_NAME: &str = "__watchdog";
130 const VFIO_DEVICE_NAME_PREFIX: &str = "_vfio";
131 const VFIO_USER_DEVICE_NAME_PREFIX: &str = "_vfio_user";
132 const VIRTIO_PCI_DEVICE_NAME_PREFIX: &str = "_virtio-pci";
133 
134 /// Errors associated with device manager
135 #[derive(Debug)]
136 pub enum DeviceManagerError {
137     /// Cannot create EventFd.
138     EventFd(io::Error),
139 
140     /// Cannot open disk path
141     Disk(io::Error),
142 
143     /// Cannot create vhost-user-net device
144     CreateVhostUserNet(virtio_devices::vhost_user::Error),
145 
146     /// Cannot create virtio-blk device
147     CreateVirtioBlock(io::Error),
148 
149     /// Cannot create virtio-net device
150     CreateVirtioNet(virtio_devices::net::Error),
151 
152     /// Cannot create virtio-console device
153     CreateVirtioConsole(io::Error),
154 
155     /// Cannot create virtio-rng device
156     CreateVirtioRng(io::Error),
157 
158     /// Cannot create virtio-fs device
159     CreateVirtioFs(virtio_devices::vhost_user::Error),
160 
161     /// Virtio-fs device was created without a socket.
162     NoVirtioFsSock,
163 
164     /// Cannot create vhost-user-blk device
165     CreateVhostUserBlk(virtio_devices::vhost_user::Error),
166 
167     /// Cannot create virtio-pmem device
168     CreateVirtioPmem(io::Error),
169 
170     /// Cannot create vDPA device
171     CreateVdpa(virtio_devices::vdpa::Error),
172 
173     /// Cannot create virtio-vsock device
174     CreateVirtioVsock(io::Error),
175 
176     /// Cannot create tpm device
177     CreateTpmDevice(anyhow::Error),
178 
179     /// Failed to convert Path to &str for the vDPA device.
180     CreateVdpaConvertPath,
181 
182     /// Failed to convert Path to &str for the virtio-vsock device.
183     CreateVsockConvertPath,
184 
185     /// Cannot create virtio-vsock backend
186     CreateVsockBackend(virtio_devices::vsock::VsockUnixError),
187 
188     /// Cannot create virtio-iommu device
189     CreateVirtioIommu(io::Error),
190 
191     /// Cannot create virtio-balloon device
192     CreateVirtioBalloon(io::Error),
193 
194     /// Cannot create virtio-watchdog device
195     CreateVirtioWatchdog(io::Error),
196 
197     /// Failed to parse disk image format
198     DetectImageType(io::Error),
199 
200     /// Cannot open qcow disk path
201     QcowDeviceCreate(qcow::Error),
202 
203     /// Cannot create serial manager
204     CreateSerialManager(SerialManagerError),
205 
206     /// Cannot spawn the serial manager thread
207     SpawnSerialManager(SerialManagerError),
208 
209     /// Cannot open tap interface
210     OpenTap(net_util::TapError),
211 
212     /// Cannot allocate IRQ.
213     AllocateIrq,
214 
215     /// Cannot configure the IRQ.
216     Irq(vmm_sys_util::errno::Error),
217 
218     /// Cannot allocate PCI BARs
219     AllocateBars(pci::PciDeviceError),
220 
221     /// Could not free the BARs associated with a PCI device.
222     FreePciBars(pci::PciDeviceError),
223 
224     /// Cannot register ioevent.
225     RegisterIoevent(anyhow::Error),
226 
227     /// Cannot unregister ioevent.
228     UnRegisterIoevent(anyhow::Error),
229 
230     /// Cannot create virtio device
231     VirtioDevice(virtio_devices::transport::VirtioPciDeviceError),
232 
233     /// Cannot add PCI device
234     AddPciDevice(pci::PciRootError),
235 
236     /// Cannot open persistent memory file
237     PmemFileOpen(io::Error),
238 
239     /// Cannot set persistent memory file size
240     PmemFileSetLen(io::Error),
241 
242     /// Cannot find a memory range for persistent memory
243     PmemRangeAllocation,
244 
245     /// Cannot find a memory range for virtio-fs
246     FsRangeAllocation,
247 
248     /// Error creating serial output file
249     SerialOutputFileOpen(io::Error),
250 
251     /// Error creating console output file
252     ConsoleOutputFileOpen(io::Error),
253 
254     /// Error creating serial pty
255     SerialPtyOpen(io::Error),
256 
257     /// Error creating console pty
258     ConsolePtyOpen(io::Error),
259 
260     /// Error setting pty raw mode
261     SetPtyRaw(vmm_sys_util::errno::Error),
262 
263     /// Error getting pty peer
264     GetPtyPeer(vmm_sys_util::errno::Error),
265 
266     /// Cannot create a VFIO device
267     VfioCreate(vfio_ioctls::VfioError),
268 
269     /// Cannot create a VFIO PCI device
270     VfioPciCreate(pci::VfioPciError),
271 
272     /// Failed to map VFIO MMIO region.
273     VfioMapRegion(pci::VfioPciError),
274 
275     /// Failed to DMA map VFIO device.
276     VfioDmaMap(vfio_ioctls::VfioError),
277 
278     /// Failed to DMA unmap VFIO device.
279     VfioDmaUnmap(pci::VfioPciError),
280 
281     /// Failed to create the passthrough device.
282     CreatePassthroughDevice(anyhow::Error),
283 
284     /// Failed to memory map.
285     Mmap(io::Error),
286 
287     /// Cannot add legacy device to Bus.
288     BusError(vm_device::BusError),
289 
290     /// Failed to allocate IO port
291     AllocateIoPort,
292 
293     /// Failed to allocate MMIO address
294     AllocateMmioAddress,
295 
296     /// Failed to make hotplug notification
297     HotPlugNotification(io::Error),
298 
299     /// Error from a memory manager operation
300     MemoryManager(MemoryManagerError),
301 
302     /// Failed to create new interrupt source group.
303     CreateInterruptGroup(io::Error),
304 
305     /// Failed to update interrupt source group.
306     UpdateInterruptGroup(io::Error),
307 
308     /// Failed to create interrupt controller.
309     CreateInterruptController(interrupt_controller::Error),
310 
311     /// Failed to create a new MmapRegion instance.
312     NewMmapRegion(vm_memory::mmap::MmapRegionError),
313 
314     /// Failed to clone a File.
315     CloneFile(io::Error),
316 
317     /// Failed to create socket file
318     CreateSocketFile(io::Error),
319 
320     /// Failed to spawn the network backend
321     SpawnNetBackend(io::Error),
322 
323     /// Failed to spawn the block backend
324     SpawnBlockBackend(io::Error),
325 
326     /// Missing PCI bus.
327     NoPciBus,
328 
329     /// Could not find an available device name.
330     NoAvailableDeviceName,
331 
332     /// Missing PCI device.
333     MissingPciDevice,
334 
335     /// Failed to remove a PCI device from the PCI bus.
336     RemoveDeviceFromPciBus(pci::PciRootError),
337 
338     /// Failed to remove a bus device from the IO bus.
339     RemoveDeviceFromIoBus(vm_device::BusError),
340 
341     /// Failed to remove a bus device from the MMIO bus.
342     RemoveDeviceFromMmioBus(vm_device::BusError),
343 
344     /// Failed to find the device corresponding to a specific PCI b/d/f.
345     UnknownPciBdf(u32),
346 
347     /// Not allowed to remove this type of device from the VM.
348     RemovalNotAllowed(vm_virtio::VirtioDeviceType),
349 
350     /// Failed to find device corresponding to the given identifier.
351     UnknownDeviceId(String),
352 
353     /// Failed to find an available PCI device ID.
354     NextPciDeviceId(pci::PciRootError),
355 
356     /// Could not reserve the PCI device ID.
357     GetPciDeviceId(pci::PciRootError),
358 
359     /// Could not give the PCI device ID back.
360     PutPciDeviceId(pci::PciRootError),
361 
362     /// No disk path was specified when one was expected
363     NoDiskPath,
364 
365     /// Failed to update guest memory for virtio device.
366     UpdateMemoryForVirtioDevice(virtio_devices::Error),
367 
368     /// Cannot create virtio-mem device
369     CreateVirtioMem(io::Error),
370 
371     /// Cannot find a memory range for virtio-mem memory
372     VirtioMemRangeAllocation,
373 
374     /// Failed to update guest memory for VFIO PCI device.
375     UpdateMemoryForVfioPciDevice(vfio_ioctls::VfioError),
376 
377     /// Trying to use a directory for pmem but no size specified
378     PmemWithDirectorySizeMissing,
379 
380     /// Trying to use a size that is not multiple of 2MiB
381     PmemSizeNotAligned,
382 
383     /// Could not find the node in the device tree.
384     MissingNode,
385 
386     /// Resource was already found.
387     ResourceAlreadyExists,
388 
389     /// Expected resources for virtio-pmem could not be found.
390     MissingVirtioPmemResources,
391 
392     /// Missing PCI b/d/f from the DeviceNode.
393     MissingDeviceNodePciBdf,
394 
395     /// No support for device passthrough
396     NoDevicePassthroughSupport,
397 
398     /// No socket option support for console device
399     NoSocketOptionSupportForConsoleDevice,
400 
401     /// Failed to resize virtio-balloon
402     VirtioBalloonResize(virtio_devices::balloon::Error),
403 
404     /// Missing virtio-balloon, can't proceed as expected.
405     MissingVirtioBalloon,
406 
407     /// Missing virtual IOMMU device
408     MissingVirtualIommu,
409 
410     /// Failed to do power button notification
411     PowerButtonNotification(io::Error),
412 
413     /// Failed to do AArch64 GPIO power button notification
414     #[cfg(target_arch = "aarch64")]
415     AArch64PowerButtonNotification(devices::legacy::GpioDeviceError),
416 
417     /// Failed to set O_DIRECT flag to file descriptor
418     SetDirectIo,
419 
420     /// Failed to create FixedVhdDiskAsync
421     CreateFixedVhdDiskAsync(io::Error),
422 
423     /// Failed to create FixedVhdDiskSync
424     CreateFixedVhdDiskSync(io::Error),
425 
426     /// Failed to create QcowDiskSync
427     CreateQcowDiskSync(qcow::Error),
428 
429     /// Failed to create FixedVhdxDiskSync
430     CreateFixedVhdxDiskSync(vhdx::VhdxError),
431 
432     /// Failed to add DMA mapping handler to virtio-mem device.
433     AddDmaMappingHandlerVirtioMem(virtio_devices::mem::Error),
434 
435     /// Failed to remove DMA mapping handler from virtio-mem device.
436     RemoveDmaMappingHandlerVirtioMem(virtio_devices::mem::Error),
437 
438     /// Failed to create vfio-user client
439     VfioUserCreateClient(vfio_user::Error),
440 
441     /// Failed to create VFIO user device
442     VfioUserCreate(VfioUserPciDeviceError),
443 
444     /// Failed to map region from VFIO user device into guest
445     VfioUserMapRegion(VfioUserPciDeviceError),
446 
447     /// Failed to DMA map VFIO user device.
448     VfioUserDmaMap(VfioUserPciDeviceError),
449 
450     /// Failed to DMA unmap VFIO user device.
451     VfioUserDmaUnmap(VfioUserPciDeviceError),
452 
453     /// Failed to update memory mappings for VFIO user device
454     UpdateMemoryForVfioUserPciDevice(VfioUserPciDeviceError),
455 
456     /// Cannot duplicate file descriptor
457     DupFd(vmm_sys_util::errno::Error),
458 
459     /// Failed to DMA map virtio device.
460     VirtioDmaMap(std::io::Error),
461 
462     /// Failed to DMA unmap virtio device.
463     VirtioDmaUnmap(std::io::Error),
464 
465     /// Cannot hotplug device behind vIOMMU
466     InvalidIommuHotplug,
467 
468     /// Invalid identifier as it is not unique.
469     IdentifierNotUnique(String),
470 
471     /// Invalid identifier
472     InvalidIdentifier(String),
473 
474     /// Error activating virtio device
475     VirtioActivate(ActivateError),
476 
477     /// Failed retrieving device state from snapshot
478     RestoreGetState(MigratableError),
479 
480     /// Cannot create a PvPanic device
481     PvPanicCreate(devices::pvpanic::PvPanicError),
482 }
483 pub type DeviceManagerResult<T> = result::Result<T, DeviceManagerError>;
484 
485 const DEVICE_MANAGER_ACPI_SIZE: usize = 0x10;
486 
487 const TIOCSPTLCK: libc::c_int = 0x4004_5431;
488 const TIOCGTPEER: libc::c_int = 0x5441;
489 
490 pub fn create_pty() -> io::Result<(File, File, PathBuf)> {
491     // Try to use /dev/pts/ptmx first then fall back to /dev/ptmx
492     // This is done to try and use the devpts filesystem that
493     // could be available for use in the process's namespace first.
494     // Ideally these are all the same file though but different
495     // kernels could have things setup differently.
496     // See https://www.kernel.org/doc/Documentation/filesystems/devpts.txt
497     // for further details.
498 
499     let custom_flags = libc::O_NONBLOCK;
500     let main = match OpenOptions::new()
501         .read(true)
502         .write(true)
503         .custom_flags(custom_flags)
504         .open("/dev/pts/ptmx")
505     {
506         Ok(f) => f,
507         _ => OpenOptions::new()
508             .read(true)
509             .write(true)
510             .custom_flags(custom_flags)
511             .open("/dev/ptmx")?,
512     };
513     let mut unlock: libc::c_ulong = 0;
514     // SAFETY: FFI call into libc, trivially safe
515     unsafe { libc::ioctl(main.as_raw_fd(), TIOCSPTLCK as _, &mut unlock) };
516 
517     // SAFETY: FFI call into libc, trivially safe
518     let sub_fd = unsafe {
519         libc::ioctl(
520             main.as_raw_fd(),
521             TIOCGTPEER as _,
522             libc::O_NOCTTY | libc::O_RDWR,
523         )
524     };
525     if sub_fd == -1 {
526         return vmm_sys_util::errno::errno_result().map_err(|e| e.into());
527     }
528 
529     let proc_path = PathBuf::from(format!("/proc/self/fd/{sub_fd}"));
530     let path = read_link(proc_path)?;
531 
532     // SAFETY: sub_fd is checked to be valid before being wrapped in File
533     Ok((main, unsafe { File::from_raw_fd(sub_fd) }, path))
534 }
535 
536 #[derive(Default)]
537 pub struct Console {
538     console_resizer: Option<Arc<virtio_devices::ConsoleResizer>>,
539 }
540 
541 impl Console {
542     pub fn need_resize(&self) -> bool {
543         if let Some(_resizer) = self.console_resizer.as_ref() {
544             return true;
545         }
546 
547         false
548     }
549 
550     pub fn update_console_size(&self) {
551         if let Some(resizer) = self.console_resizer.as_ref() {
552             resizer.update_console_size()
553         }
554     }
555 }
556 
557 pub(crate) struct AddressManager {
558     pub(crate) allocator: Arc<Mutex<SystemAllocator>>,
559     #[cfg(target_arch = "x86_64")]
560     pub(crate) io_bus: Arc<Bus>,
561     pub(crate) mmio_bus: Arc<Bus>,
562     pub(crate) vm: Arc<dyn hypervisor::Vm>,
563     device_tree: Arc<Mutex<DeviceTree>>,
564     pci_mmio_allocators: Vec<Arc<Mutex<AddressAllocator>>>,
565 }
566 
567 impl DeviceRelocation for AddressManager {
568     fn move_bar(
569         &self,
570         old_base: u64,
571         new_base: u64,
572         len: u64,
573         pci_dev: &mut dyn PciDevice,
574         region_type: PciBarRegionType,
575     ) -> std::result::Result<(), std::io::Error> {
576         match region_type {
577             PciBarRegionType::IoRegion => {
578                 #[cfg(target_arch = "x86_64")]
579                 {
580                     // Update system allocator
581                     self.allocator
582                         .lock()
583                         .unwrap()
584                         .free_io_addresses(GuestAddress(old_base), len as GuestUsize);
585 
586                     self.allocator
587                         .lock()
588                         .unwrap()
589                         .allocate_io_addresses(
590                             Some(GuestAddress(new_base)),
591                             len as GuestUsize,
592                             None,
593                         )
594                         .ok_or_else(|| {
595                             io::Error::new(io::ErrorKind::Other, "failed allocating new IO range")
596                         })?;
597 
598                     // Update PIO bus
599                     self.io_bus
600                         .update_range(old_base, len, new_base, len)
601                         .map_err(|e| io::Error::new(io::ErrorKind::Other, e))?;
602                 }
603                 #[cfg(target_arch = "aarch64")]
604                 error!("I/O region is not supported");
605             }
606             PciBarRegionType::Memory32BitRegion | PciBarRegionType::Memory64BitRegion => {
607                 // Update system allocator
608                 if region_type == PciBarRegionType::Memory32BitRegion {
609                     self.allocator
610                         .lock()
611                         .unwrap()
612                         .free_mmio_hole_addresses(GuestAddress(old_base), len as GuestUsize);
613 
614                     self.allocator
615                         .lock()
616                         .unwrap()
617                         .allocate_mmio_hole_addresses(
618                             Some(GuestAddress(new_base)),
619                             len as GuestUsize,
620                             Some(len),
621                         )
622                         .ok_or_else(|| {
623                             io::Error::new(
624                                 io::ErrorKind::Other,
625                                 "failed allocating new 32 bits MMIO range",
626                             )
627                         })?;
628                 } else {
629                     // Find the specific allocator that this BAR was allocated from and use it for new one
630                     for allocator in &self.pci_mmio_allocators {
631                         let allocator_base = allocator.lock().unwrap().base();
632                         let allocator_end = allocator.lock().unwrap().end();
633 
634                         if old_base >= allocator_base.0 && old_base <= allocator_end.0 {
635                             allocator
636                                 .lock()
637                                 .unwrap()
638                                 .free(GuestAddress(old_base), len as GuestUsize);
639 
640                             allocator
641                                 .lock()
642                                 .unwrap()
643                                 .allocate(
644                                     Some(GuestAddress(new_base)),
645                                     len as GuestUsize,
646                                     Some(len),
647                                 )
648                                 .ok_or_else(|| {
649                                     io::Error::new(
650                                         io::ErrorKind::Other,
651                                         "failed allocating new 64 bits MMIO range",
652                                     )
653                                 })?;
654 
655                             break;
656                         }
657                     }
658                 }
659 
660                 // Update MMIO bus
661                 self.mmio_bus
662                     .update_range(old_base, len, new_base, len)
663                     .map_err(|e| io::Error::new(io::ErrorKind::Other, e))?;
664             }
665         }
666 
667         // Update the device_tree resources associated with the device
668         if let Some(id) = pci_dev.id() {
669             if let Some(node) = self.device_tree.lock().unwrap().get_mut(&id) {
670                 let mut resource_updated = false;
671                 for resource in node.resources.iter_mut() {
672                     if let Resource::PciBar { base, type_, .. } = resource {
673                         if PciBarRegionType::from(*type_) == region_type && *base == old_base {
674                             *base = new_base;
675                             resource_updated = true;
676                             break;
677                         }
678                     }
679                 }
680 
681                 if !resource_updated {
682                     return Err(io::Error::new(
683                         io::ErrorKind::Other,
684                         format!(
685                             "Couldn't find a resource with base 0x{old_base:x} for device {id}"
686                         ),
687                     ));
688                 }
689             } else {
690                 return Err(io::Error::new(
691                     io::ErrorKind::Other,
692                     format!("Couldn't find device {id} from device tree"),
693                 ));
694             }
695         }
696 
697         let any_dev = pci_dev.as_any();
698         if let Some(virtio_pci_dev) = any_dev.downcast_ref::<VirtioPciDevice>() {
699             let bar_addr = virtio_pci_dev.config_bar_addr();
700             if bar_addr == new_base {
701                 for (event, addr) in virtio_pci_dev.ioeventfds(old_base) {
702                     let io_addr = IoEventAddress::Mmio(addr);
703                     self.vm.unregister_ioevent(event, &io_addr).map_err(|e| {
704                         io::Error::new(
705                             io::ErrorKind::Other,
706                             format!("failed to unregister ioevent: {e:?}"),
707                         )
708                     })?;
709                 }
710                 for (event, addr) in virtio_pci_dev.ioeventfds(new_base) {
711                     let io_addr = IoEventAddress::Mmio(addr);
712                     self.vm
713                         .register_ioevent(event, &io_addr, None)
714                         .map_err(|e| {
715                             io::Error::new(
716                                 io::ErrorKind::Other,
717                                 format!("failed to register ioevent: {e:?}"),
718                             )
719                         })?;
720                 }
721             } else {
722                 let virtio_dev = virtio_pci_dev.virtio_device();
723                 let mut virtio_dev = virtio_dev.lock().unwrap();
724                 if let Some(mut shm_regions) = virtio_dev.get_shm_regions() {
725                     if shm_regions.addr.raw_value() == old_base {
726                         let mem_region = self.vm.make_user_memory_region(
727                             shm_regions.mem_slot,
728                             old_base,
729                             shm_regions.len,
730                             shm_regions.host_addr,
731                             false,
732                             false,
733                         );
734 
735                         self.vm.remove_user_memory_region(mem_region).map_err(|e| {
736                             io::Error::new(
737                                 io::ErrorKind::Other,
738                                 format!("failed to remove user memory region: {e:?}"),
739                             )
740                         })?;
741 
742                         // Create new mapping by inserting new region to KVM.
743                         let mem_region = self.vm.make_user_memory_region(
744                             shm_regions.mem_slot,
745                             new_base,
746                             shm_regions.len,
747                             shm_regions.host_addr,
748                             false,
749                             false,
750                         );
751 
752                         self.vm.create_user_memory_region(mem_region).map_err(|e| {
753                             io::Error::new(
754                                 io::ErrorKind::Other,
755                                 format!("failed to create user memory regions: {e:?}"),
756                             )
757                         })?;
758 
759                         // Update shared memory regions to reflect the new mapping.
760                         shm_regions.addr = GuestAddress(new_base);
761                         virtio_dev.set_shm_regions(shm_regions).map_err(|e| {
762                             io::Error::new(
763                                 io::ErrorKind::Other,
764                                 format!("failed to update shared memory regions: {e:?}"),
765                             )
766                         })?;
767                     }
768                 }
769             }
770         }
771 
772         pci_dev.move_bar(old_base, new_base)
773     }
774 }
775 
776 #[derive(Serialize, Deserialize)]
777 struct DeviceManagerState {
778     device_tree: DeviceTree,
779     device_id_cnt: Wrapping<usize>,
780 }
781 
782 #[derive(Debug)]
783 pub struct PtyPair {
784     pub main: File,
785     pub path: PathBuf,
786 }
787 
788 impl Clone for PtyPair {
789     fn clone(&self) -> Self {
790         PtyPair {
791             main: self.main.try_clone().unwrap(),
792             path: self.path.clone(),
793         }
794     }
795 }
796 
797 #[derive(Clone)]
798 pub enum PciDeviceHandle {
799     Vfio(Arc<Mutex<VfioPciDevice>>),
800     Virtio(Arc<Mutex<VirtioPciDevice>>),
801     VfioUser(Arc<Mutex<VfioUserPciDevice>>),
802 }
803 
804 #[derive(Clone)]
805 struct MetaVirtioDevice {
806     virtio_device: Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
807     iommu: bool,
808     id: String,
809     pci_segment: u16,
810     dma_handler: Option<Arc<dyn ExternalDmaMapping>>,
811 }
812 
813 #[derive(Default)]
814 pub struct AcpiPlatformAddresses {
815     pub pm_timer_address: Option<GenericAddress>,
816     pub reset_reg_address: Option<GenericAddress>,
817     pub sleep_control_reg_address: Option<GenericAddress>,
818     pub sleep_status_reg_address: Option<GenericAddress>,
819 }
820 
821 pub struct DeviceManager {
822     // The underlying hypervisor
823     hypervisor_type: HypervisorType,
824 
825     // Manage address space related to devices
826     address_manager: Arc<AddressManager>,
827 
828     // Console abstraction
829     console: Arc<Console>,
830 
831     // console PTY
832     console_pty: Option<Arc<Mutex<PtyPair>>>,
833 
834     // serial PTY
835     serial_pty: Option<Arc<Mutex<PtyPair>>>,
836 
837     // Serial Manager
838     serial_manager: Option<Arc<SerialManager>>,
839 
840     // pty foreground status,
841     console_resize_pipe: Option<Arc<File>>,
842 
843     // To restore on exit.
844     original_termios_opt: Arc<Mutex<Option<termios>>>,
845 
846     // Interrupt controller
847     #[cfg(target_arch = "x86_64")]
848     interrupt_controller: Option<Arc<Mutex<ioapic::Ioapic>>>,
849     #[cfg(target_arch = "aarch64")]
850     interrupt_controller: Option<Arc<Mutex<gic::Gic>>>,
851 
852     // Things to be added to the commandline (e.g. aarch64 early console)
853     #[cfg(target_arch = "aarch64")]
854     cmdline_additions: Vec<String>,
855 
856     // ACPI GED notification device
857     ged_notification_device: Option<Arc<Mutex<devices::AcpiGedDevice>>>,
858 
859     // VM configuration
860     config: Arc<Mutex<VmConfig>>,
861 
862     // Memory Manager
863     memory_manager: Arc<Mutex<MemoryManager>>,
864 
865     // CPU Manager
866     cpu_manager: Arc<Mutex<CpuManager>>,
867 
868     // The virtio devices on the system
869     virtio_devices: Vec<MetaVirtioDevice>,
870 
871     // List of bus devices
872     // Let the DeviceManager keep strong references to the BusDevice devices.
873     // This allows the IO and MMIO buses to be provided with Weak references,
874     // which prevents cyclic dependencies.
875     bus_devices: Vec<Arc<Mutex<dyn BusDevice>>>,
876 
877     // Counter to keep track of the consumed device IDs.
878     device_id_cnt: Wrapping<usize>,
879 
880     pci_segments: Vec<PciSegment>,
881 
882     #[cfg_attr(target_arch = "aarch64", allow(dead_code))]
883     // MSI Interrupt Manager
884     msi_interrupt_manager: Arc<dyn InterruptManager<GroupConfig = MsiIrqGroupConfig>>,
885 
886     #[cfg_attr(feature = "mshv", allow(dead_code))]
887     // Legacy Interrupt Manager
888     legacy_interrupt_manager: Option<Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>>,
889 
890     // Passthrough device handle
891     passthrough_device: Option<VfioDeviceFd>,
892 
893     // VFIO container
894     // Only one container can be created, therefore it is stored as part of the
895     // DeviceManager to be reused.
896     vfio_container: Option<Arc<VfioContainer>>,
897 
898     // Paravirtualized IOMMU
899     iommu_device: Option<Arc<Mutex<virtio_devices::Iommu>>>,
900     iommu_mapping: Option<Arc<IommuMapping>>,
901 
902     // PCI information about devices attached to the paravirtualized IOMMU
903     // It contains the virtual IOMMU PCI BDF along with the list of PCI BDF
904     // representing the devices attached to the virtual IOMMU. This is useful
905     // information for filling the ACPI VIOT table.
906     iommu_attached_devices: Option<(PciBdf, Vec<PciBdf>)>,
907 
908     // Tree of devices, representing the dependencies between devices.
909     // Useful for introspection, snapshot and restore.
910     device_tree: Arc<Mutex<DeviceTree>>,
911 
912     // Exit event
913     exit_evt: EventFd,
914     reset_evt: EventFd,
915 
916     #[cfg(target_arch = "aarch64")]
917     id_to_dev_info: HashMap<(DeviceType, String), MmioDeviceInfo>,
918 
919     // seccomp action
920     seccomp_action: SeccompAction,
921 
922     // List of guest NUMA nodes.
923     numa_nodes: NumaNodes,
924 
925     // Possible handle to the virtio-balloon device
926     balloon: Option<Arc<Mutex<virtio_devices::Balloon>>>,
927 
928     // Virtio Device activation EventFd to allow the VMM thread to trigger device
929     // activation and thus start the threads from the VMM thread
930     activate_evt: EventFd,
931 
932     acpi_address: GuestAddress,
933 
934     selected_segment: usize,
935 
936     // Possible handle to the virtio-mem device
937     virtio_mem_devices: Vec<Arc<Mutex<virtio_devices::Mem>>>,
938 
939     #[cfg(target_arch = "aarch64")]
940     // GPIO device for AArch64
941     gpio_device: Option<Arc<Mutex<devices::legacy::Gpio>>>,
942 
943     // pvpanic device
944     pvpanic_device: Option<Arc<Mutex<devices::PvPanicDevice>>>,
945 
946     // Flag to force setting the iommu on virtio devices
947     force_iommu: bool,
948 
949     // io_uring availability if detected
950     io_uring_supported: Option<bool>,
951 
952     // List of unique identifiers provided at boot through the configuration.
953     boot_id_list: BTreeSet<String>,
954 
955     // Start time of the VM
956     timestamp: Instant,
957 
958     // Pending activations
959     pending_activations: Arc<Mutex<Vec<VirtioPciDeviceActivator>>>,
960 
961     // Addresses for ACPI platform devices e.g. ACPI PM timer, sleep/reset registers
962     acpi_platform_addresses: AcpiPlatformAddresses,
963 
964     snapshot: Option<Snapshot>,
965 }
966 
967 impl DeviceManager {
968     #[allow(clippy::too_many_arguments)]
969     pub fn new(
970         #[cfg(target_arch = "x86_64")] io_bus: Arc<Bus>,
971         mmio_bus: Arc<Bus>,
972         hypervisor_type: HypervisorType,
973         vm: Arc<dyn hypervisor::Vm>,
974         config: Arc<Mutex<VmConfig>>,
975         memory_manager: Arc<Mutex<MemoryManager>>,
976         cpu_manager: Arc<Mutex<CpuManager>>,
977         exit_evt: EventFd,
978         reset_evt: EventFd,
979         seccomp_action: SeccompAction,
980         numa_nodes: NumaNodes,
981         activate_evt: &EventFd,
982         force_iommu: bool,
983         boot_id_list: BTreeSet<String>,
984         timestamp: Instant,
985         snapshot: Option<Snapshot>,
986         dynamic: bool,
987     ) -> DeviceManagerResult<Arc<Mutex<Self>>> {
988         trace_scoped!("DeviceManager::new");
989 
990         let (device_tree, device_id_cnt) = if let Some(snapshot) = snapshot.as_ref() {
991             let state: DeviceManagerState = snapshot.to_state().unwrap();
992             (
993                 Arc::new(Mutex::new(state.device_tree.clone())),
994                 state.device_id_cnt,
995             )
996         } else {
997             (Arc::new(Mutex::new(DeviceTree::new())), Wrapping(0))
998         };
999 
1000         let num_pci_segments =
1001             if let Some(platform_config) = config.lock().unwrap().platform.as_ref() {
1002                 platform_config.num_pci_segments
1003             } else {
1004                 1
1005             };
1006 
1007         let start_of_device_area = memory_manager.lock().unwrap().start_of_device_area().0;
1008         let end_of_device_area = memory_manager.lock().unwrap().end_of_device_area().0;
1009 
1010         // Start each PCI segment range on a 4GiB boundary
1011         let pci_segment_size = (end_of_device_area - start_of_device_area + 1)
1012             / ((4 << 30) * num_pci_segments as u64)
1013             * (4 << 30);
1014 
1015         let mut pci_mmio_allocators = vec![];
1016         for i in 0..num_pci_segments as u64 {
1017             let mmio_start = start_of_device_area + i * pci_segment_size;
1018             let allocator = Arc::new(Mutex::new(
1019                 AddressAllocator::new(GuestAddress(mmio_start), pci_segment_size).unwrap(),
1020             ));
1021             pci_mmio_allocators.push(allocator)
1022         }
1023 
1024         let address_manager = Arc::new(AddressManager {
1025             allocator: memory_manager.lock().unwrap().allocator(),
1026             #[cfg(target_arch = "x86_64")]
1027             io_bus,
1028             mmio_bus,
1029             vm: vm.clone(),
1030             device_tree: Arc::clone(&device_tree),
1031             pci_mmio_allocators,
1032         });
1033 
1034         // First we create the MSI interrupt manager, the legacy one is created
1035         // later, after the IOAPIC device creation.
1036         // The reason we create the MSI one first is because the IOAPIC needs it,
1037         // and then the legacy interrupt manager needs an IOAPIC. So we're
1038         // handling a linear dependency chain:
1039         // msi_interrupt_manager <- IOAPIC <- legacy_interrupt_manager.
1040         let msi_interrupt_manager: Arc<dyn InterruptManager<GroupConfig = MsiIrqGroupConfig>> =
1041             Arc::new(MsiInterruptManager::new(
1042                 Arc::clone(&address_manager.allocator),
1043                 vm,
1044             ));
1045 
1046         let acpi_address = address_manager
1047             .allocator
1048             .lock()
1049             .unwrap()
1050             .allocate_platform_mmio_addresses(None, DEVICE_MANAGER_ACPI_SIZE as u64, None)
1051             .ok_or(DeviceManagerError::AllocateIoPort)?;
1052 
1053         let mut pci_irq_slots = [0; 32];
1054         PciSegment::reserve_legacy_interrupts_for_pci_devices(
1055             &address_manager,
1056             &mut pci_irq_slots,
1057         )?;
1058 
1059         let mut pci_segments = vec![PciSegment::new_default_segment(
1060             &address_manager,
1061             Arc::clone(&address_manager.pci_mmio_allocators[0]),
1062             &pci_irq_slots,
1063         )?];
1064 
1065         for i in 1..num_pci_segments as usize {
1066             pci_segments.push(PciSegment::new(
1067                 i as u16,
1068                 numa_node_id_from_pci_segment_id(&numa_nodes, i as u16),
1069                 &address_manager,
1070                 Arc::clone(&address_manager.pci_mmio_allocators[i]),
1071                 &pci_irq_slots,
1072             )?);
1073         }
1074 
1075         if dynamic {
1076             let acpi_address = address_manager
1077                 .allocator
1078                 .lock()
1079                 .unwrap()
1080                 .allocate_platform_mmio_addresses(None, CPU_MANAGER_ACPI_SIZE as u64, None)
1081                 .ok_or(DeviceManagerError::AllocateMmioAddress)?;
1082 
1083             address_manager
1084                 .mmio_bus
1085                 .insert(
1086                     cpu_manager.clone(),
1087                     acpi_address.0,
1088                     CPU_MANAGER_ACPI_SIZE as u64,
1089                 )
1090                 .map_err(DeviceManagerError::BusError)?;
1091 
1092             cpu_manager.lock().unwrap().set_acpi_address(acpi_address);
1093         }
1094 
1095         let device_manager = DeviceManager {
1096             hypervisor_type,
1097             address_manager: Arc::clone(&address_manager),
1098             console: Arc::new(Console::default()),
1099             interrupt_controller: None,
1100             #[cfg(target_arch = "aarch64")]
1101             cmdline_additions: Vec::new(),
1102             ged_notification_device: None,
1103             config,
1104             memory_manager,
1105             cpu_manager,
1106             virtio_devices: Vec::new(),
1107             bus_devices: Vec::new(),
1108             device_id_cnt,
1109             msi_interrupt_manager,
1110             legacy_interrupt_manager: None,
1111             passthrough_device: None,
1112             vfio_container: None,
1113             iommu_device: None,
1114             iommu_mapping: None,
1115             iommu_attached_devices: None,
1116             pci_segments,
1117             device_tree,
1118             exit_evt,
1119             reset_evt,
1120             #[cfg(target_arch = "aarch64")]
1121             id_to_dev_info: HashMap::new(),
1122             seccomp_action,
1123             numa_nodes,
1124             balloon: None,
1125             activate_evt: activate_evt
1126                 .try_clone()
1127                 .map_err(DeviceManagerError::EventFd)?,
1128             acpi_address,
1129             selected_segment: 0,
1130             serial_pty: None,
1131             serial_manager: None,
1132             console_pty: None,
1133             console_resize_pipe: None,
1134             original_termios_opt: Arc::new(Mutex::new(None)),
1135             virtio_mem_devices: Vec::new(),
1136             #[cfg(target_arch = "aarch64")]
1137             gpio_device: None,
1138             pvpanic_device: None,
1139             force_iommu,
1140             io_uring_supported: None,
1141             boot_id_list,
1142             timestamp,
1143             pending_activations: Arc::new(Mutex::new(Vec::default())),
1144             acpi_platform_addresses: AcpiPlatformAddresses::default(),
1145             snapshot,
1146         };
1147 
1148         let device_manager = Arc::new(Mutex::new(device_manager));
1149 
1150         address_manager
1151             .mmio_bus
1152             .insert(
1153                 Arc::clone(&device_manager) as Arc<Mutex<dyn BusDevice>>,
1154                 acpi_address.0,
1155                 DEVICE_MANAGER_ACPI_SIZE as u64,
1156             )
1157             .map_err(DeviceManagerError::BusError)?;
1158 
1159         Ok(device_manager)
1160     }
1161 
1162     pub fn serial_pty(&self) -> Option<PtyPair> {
1163         self.serial_pty
1164             .as_ref()
1165             .map(|pty| pty.lock().unwrap().clone())
1166     }
1167 
1168     pub fn console_pty(&self) -> Option<PtyPair> {
1169         self.console_pty
1170             .as_ref()
1171             .map(|pty| pty.lock().unwrap().clone())
1172     }
1173 
1174     pub fn console_resize_pipe(&self) -> Option<Arc<File>> {
1175         self.console_resize_pipe.as_ref().map(Arc::clone)
1176     }
1177 
1178     pub fn create_devices(
1179         &mut self,
1180         serial_pty: Option<PtyPair>,
1181         console_pty: Option<PtyPair>,
1182         console_resize_pipe: Option<File>,
1183         original_termios_opt: Arc<Mutex<Option<termios>>>,
1184     ) -> DeviceManagerResult<()> {
1185         trace_scoped!("create_devices");
1186 
1187         let mut virtio_devices: Vec<MetaVirtioDevice> = Vec::new();
1188 
1189         let interrupt_controller = self.add_interrupt_controller()?;
1190 
1191         self.cpu_manager
1192             .lock()
1193             .unwrap()
1194             .set_interrupt_controller(interrupt_controller.clone());
1195 
1196         // Now we can create the legacy interrupt manager, which needs the freshly
1197         // formed IOAPIC device.
1198         let legacy_interrupt_manager: Arc<
1199             dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>,
1200         > = Arc::new(LegacyUserspaceInterruptManager::new(Arc::clone(
1201             &interrupt_controller,
1202         )));
1203 
1204         {
1205             if let Some(acpi_address) = self.memory_manager.lock().unwrap().acpi_address() {
1206                 self.address_manager
1207                     .mmio_bus
1208                     .insert(
1209                         Arc::clone(&self.memory_manager) as Arc<Mutex<dyn BusDevice>>,
1210                         acpi_address.0,
1211                         MEMORY_MANAGER_ACPI_SIZE as u64,
1212                     )
1213                     .map_err(DeviceManagerError::BusError)?;
1214             }
1215         }
1216 
1217         #[cfg(target_arch = "x86_64")]
1218         self.add_legacy_devices(
1219             self.reset_evt
1220                 .try_clone()
1221                 .map_err(DeviceManagerError::EventFd)?,
1222         )?;
1223 
1224         #[cfg(target_arch = "aarch64")]
1225         self.add_legacy_devices(&legacy_interrupt_manager)?;
1226 
1227         {
1228             self.ged_notification_device = self.add_acpi_devices(
1229                 &legacy_interrupt_manager,
1230                 self.reset_evt
1231                     .try_clone()
1232                     .map_err(DeviceManagerError::EventFd)?,
1233                 self.exit_evt
1234                     .try_clone()
1235                     .map_err(DeviceManagerError::EventFd)?,
1236             )?;
1237         }
1238 
1239         self.original_termios_opt = original_termios_opt;
1240 
1241         self.console = self.add_console_device(
1242             &legacy_interrupt_manager,
1243             &mut virtio_devices,
1244             serial_pty,
1245             console_pty,
1246             console_resize_pipe,
1247         )?;
1248 
1249         if let Some(tpm) = self.config.clone().lock().unwrap().tpm.as_ref() {
1250             let tpm_dev = self.add_tpm_device(tpm.socket.clone())?;
1251             self.bus_devices
1252                 .push(Arc::clone(&tpm_dev) as Arc<Mutex<dyn BusDevice>>)
1253         }
1254         self.legacy_interrupt_manager = Some(legacy_interrupt_manager);
1255 
1256         virtio_devices.append(&mut self.make_virtio_devices()?);
1257 
1258         self.add_pci_devices(virtio_devices.clone())?;
1259 
1260         self.virtio_devices = virtio_devices;
1261 
1262         if self.config.clone().lock().unwrap().pvpanic {
1263             self.pvpanic_device = self.add_pvpanic_device()?;
1264         }
1265 
1266         Ok(())
1267     }
1268 
1269     fn state(&self) -> DeviceManagerState {
1270         DeviceManagerState {
1271             device_tree: self.device_tree.lock().unwrap().clone(),
1272             device_id_cnt: self.device_id_cnt,
1273         }
1274     }
1275 
1276     fn get_msi_iova_space(&mut self) -> (u64, u64) {
1277         #[cfg(target_arch = "aarch64")]
1278         {
1279             let vcpus = self.config.lock().unwrap().cpus.boot_vcpus;
1280             let vgic_config = gic::Gic::create_default_config(vcpus.into());
1281             (
1282                 vgic_config.msi_addr,
1283                 vgic_config.msi_addr + vgic_config.msi_size - 1,
1284             )
1285         }
1286         #[cfg(target_arch = "x86_64")]
1287         (0xfee0_0000, 0xfeef_ffff)
1288     }
1289 
1290     #[cfg(target_arch = "aarch64")]
1291     /// Gets the information of the devices registered up to some point in time.
1292     pub fn get_device_info(&self) -> &HashMap<(DeviceType, String), MmioDeviceInfo> {
1293         &self.id_to_dev_info
1294     }
1295 
1296     #[allow(unused_variables)]
1297     fn add_pci_devices(
1298         &mut self,
1299         virtio_devices: Vec<MetaVirtioDevice>,
1300     ) -> DeviceManagerResult<()> {
1301         let iommu_id = String::from(IOMMU_DEVICE_NAME);
1302 
1303         let iommu_device = if self.config.lock().unwrap().iommu {
1304             let (device, mapping) = virtio_devices::Iommu::new(
1305                 iommu_id.clone(),
1306                 self.seccomp_action.clone(),
1307                 self.exit_evt
1308                     .try_clone()
1309                     .map_err(DeviceManagerError::EventFd)?,
1310                 self.get_msi_iova_space(),
1311                 versioned_state_from_id(self.snapshot.as_ref(), iommu_id.as_str())
1312                     .map_err(DeviceManagerError::RestoreGetState)?,
1313             )
1314             .map_err(DeviceManagerError::CreateVirtioIommu)?;
1315             let device = Arc::new(Mutex::new(device));
1316             self.iommu_device = Some(Arc::clone(&device));
1317             self.iommu_mapping = Some(mapping);
1318 
1319             // Fill the device tree with a new node. In case of restore, we
1320             // know there is nothing to do, so we can simply override the
1321             // existing entry.
1322             self.device_tree
1323                 .lock()
1324                 .unwrap()
1325                 .insert(iommu_id.clone(), device_node!(iommu_id, device));
1326 
1327             Some(device)
1328         } else {
1329             None
1330         };
1331 
1332         let mut iommu_attached_devices = Vec::new();
1333         {
1334             for handle in virtio_devices {
1335                 let mapping: Option<Arc<IommuMapping>> = if handle.iommu {
1336                     self.iommu_mapping.clone()
1337                 } else {
1338                     None
1339                 };
1340 
1341                 let dev_id = self.add_virtio_pci_device(
1342                     handle.virtio_device,
1343                     &mapping,
1344                     handle.id,
1345                     handle.pci_segment,
1346                     handle.dma_handler,
1347                 )?;
1348 
1349                 if handle.iommu {
1350                     iommu_attached_devices.push(dev_id);
1351                 }
1352             }
1353 
1354             let mut vfio_iommu_device_ids = self.add_vfio_devices()?;
1355             iommu_attached_devices.append(&mut vfio_iommu_device_ids);
1356 
1357             let mut vfio_user_iommu_device_ids = self.add_user_devices()?;
1358             iommu_attached_devices.append(&mut vfio_user_iommu_device_ids);
1359 
1360             // Add all devices from forced iommu segments
1361             if let Some(platform_config) = self.config.lock().unwrap().platform.as_ref() {
1362                 if let Some(iommu_segments) = platform_config.iommu_segments.as_ref() {
1363                     for segment in iommu_segments {
1364                         for device in 0..32 {
1365                             let bdf = PciBdf::new(*segment, 0, device, 0);
1366                             if !iommu_attached_devices.contains(&bdf) {
1367                                 iommu_attached_devices.push(bdf);
1368                             }
1369                         }
1370                     }
1371                 }
1372             }
1373 
1374             if let Some(iommu_device) = iommu_device {
1375                 let dev_id = self.add_virtio_pci_device(iommu_device, &None, iommu_id, 0, None)?;
1376                 self.iommu_attached_devices = Some((dev_id, iommu_attached_devices));
1377             }
1378         }
1379 
1380         for segment in &self.pci_segments {
1381             #[cfg(target_arch = "x86_64")]
1382             if let Some(pci_config_io) = segment.pci_config_io.as_ref() {
1383                 self.bus_devices
1384                     .push(Arc::clone(pci_config_io) as Arc<Mutex<dyn BusDevice>>);
1385             }
1386 
1387             self.bus_devices
1388                 .push(Arc::clone(&segment.pci_config_mmio) as Arc<Mutex<dyn BusDevice>>);
1389         }
1390 
1391         Ok(())
1392     }
1393 
1394     #[cfg(target_arch = "aarch64")]
1395     fn add_interrupt_controller(
1396         &mut self,
1397     ) -> DeviceManagerResult<Arc<Mutex<dyn InterruptController>>> {
1398         let interrupt_controller: Arc<Mutex<gic::Gic>> = Arc::new(Mutex::new(
1399             gic::Gic::new(
1400                 self.config.lock().unwrap().cpus.boot_vcpus,
1401                 Arc::clone(&self.msi_interrupt_manager),
1402                 self.address_manager.vm.clone(),
1403             )
1404             .map_err(DeviceManagerError::CreateInterruptController)?,
1405         ));
1406 
1407         self.interrupt_controller = Some(interrupt_controller.clone());
1408 
1409         // Restore the vGic if this is in the process of restoration
1410         let id = String::from(gic::GIC_SNAPSHOT_ID);
1411         if let Some(vgic_snapshot) = snapshot_from_id(self.snapshot.as_ref(), &id) {
1412             // PMU support is optional. Nothing should be impacted if the PMU initialization failed.
1413             if self
1414                 .cpu_manager
1415                 .lock()
1416                 .unwrap()
1417                 .init_pmu(arch::aarch64::fdt::AARCH64_PMU_IRQ + 16)
1418                 .is_err()
1419             {
1420                 info!("Failed to initialize PMU");
1421             }
1422 
1423             let vgic_state = vgic_snapshot
1424                 .to_state()
1425                 .map_err(DeviceManagerError::RestoreGetState)?;
1426             let saved_vcpu_states = self.cpu_manager.lock().unwrap().get_saved_states();
1427             interrupt_controller
1428                 .lock()
1429                 .unwrap()
1430                 .restore_vgic(vgic_state, &saved_vcpu_states)
1431                 .unwrap();
1432         }
1433 
1434         self.device_tree
1435             .lock()
1436             .unwrap()
1437             .insert(id.clone(), device_node!(id, interrupt_controller));
1438 
1439         Ok(interrupt_controller)
1440     }
1441 
1442     #[cfg(target_arch = "aarch64")]
1443     pub fn get_interrupt_controller(&mut self) -> Option<&Arc<Mutex<gic::Gic>>> {
1444         self.interrupt_controller.as_ref()
1445     }
1446 
1447     #[cfg(target_arch = "x86_64")]
1448     fn add_interrupt_controller(
1449         &mut self,
1450     ) -> DeviceManagerResult<Arc<Mutex<dyn InterruptController>>> {
1451         let id = String::from(IOAPIC_DEVICE_NAME);
1452 
1453         // Create IOAPIC
1454         let interrupt_controller = Arc::new(Mutex::new(
1455             ioapic::Ioapic::new(
1456                 id.clone(),
1457                 APIC_START,
1458                 Arc::clone(&self.msi_interrupt_manager),
1459                 versioned_state_from_id(self.snapshot.as_ref(), id.as_str())
1460                     .map_err(DeviceManagerError::RestoreGetState)?,
1461             )
1462             .map_err(DeviceManagerError::CreateInterruptController)?,
1463         ));
1464 
1465         self.interrupt_controller = Some(interrupt_controller.clone());
1466 
1467         self.address_manager
1468             .mmio_bus
1469             .insert(interrupt_controller.clone(), IOAPIC_START.0, IOAPIC_SIZE)
1470             .map_err(DeviceManagerError::BusError)?;
1471 
1472         self.bus_devices
1473             .push(Arc::clone(&interrupt_controller) as Arc<Mutex<dyn BusDevice>>);
1474 
1475         // Fill the device tree with a new node. In case of restore, we
1476         // know there is nothing to do, so we can simply override the
1477         // existing entry.
1478         self.device_tree
1479             .lock()
1480             .unwrap()
1481             .insert(id.clone(), device_node!(id, interrupt_controller));
1482 
1483         Ok(interrupt_controller)
1484     }
1485 
1486     fn add_acpi_devices(
1487         &mut self,
1488         interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>,
1489         reset_evt: EventFd,
1490         exit_evt: EventFd,
1491     ) -> DeviceManagerResult<Option<Arc<Mutex<devices::AcpiGedDevice>>>> {
1492         let vcpus_kill_signalled = self
1493             .cpu_manager
1494             .lock()
1495             .unwrap()
1496             .vcpus_kill_signalled()
1497             .clone();
1498         let shutdown_device = Arc::new(Mutex::new(devices::AcpiShutdownDevice::new(
1499             exit_evt,
1500             reset_evt,
1501             vcpus_kill_signalled,
1502         )));
1503 
1504         self.bus_devices
1505             .push(Arc::clone(&shutdown_device) as Arc<Mutex<dyn BusDevice>>);
1506 
1507         #[cfg(target_arch = "x86_64")]
1508         {
1509             let shutdown_pio_address: u16 = 0x600;
1510 
1511             self.address_manager
1512                 .allocator
1513                 .lock()
1514                 .unwrap()
1515                 .allocate_io_addresses(Some(GuestAddress(shutdown_pio_address.into())), 0x8, None)
1516                 .ok_or(DeviceManagerError::AllocateIoPort)?;
1517 
1518             self.address_manager
1519                 .io_bus
1520                 .insert(shutdown_device, shutdown_pio_address.into(), 0x4)
1521                 .map_err(DeviceManagerError::BusError)?;
1522 
1523             self.acpi_platform_addresses.sleep_control_reg_address =
1524                 Some(GenericAddress::io_port_address::<u8>(shutdown_pio_address));
1525             self.acpi_platform_addresses.sleep_status_reg_address =
1526                 Some(GenericAddress::io_port_address::<u8>(shutdown_pio_address));
1527             self.acpi_platform_addresses.reset_reg_address =
1528                 Some(GenericAddress::io_port_address::<u8>(shutdown_pio_address));
1529         }
1530 
1531         let ged_irq = self
1532             .address_manager
1533             .allocator
1534             .lock()
1535             .unwrap()
1536             .allocate_irq()
1537             .unwrap();
1538         let interrupt_group = interrupt_manager
1539             .create_group(LegacyIrqGroupConfig {
1540                 irq: ged_irq as InterruptIndex,
1541             })
1542             .map_err(DeviceManagerError::CreateInterruptGroup)?;
1543         let ged_address = self
1544             .address_manager
1545             .allocator
1546             .lock()
1547             .unwrap()
1548             .allocate_platform_mmio_addresses(
1549                 None,
1550                 devices::acpi::GED_DEVICE_ACPI_SIZE as u64,
1551                 None,
1552             )
1553             .ok_or(DeviceManagerError::AllocateMmioAddress)?;
1554         let ged_device = Arc::new(Mutex::new(devices::AcpiGedDevice::new(
1555             interrupt_group,
1556             ged_irq,
1557             ged_address,
1558         )));
1559         self.address_manager
1560             .mmio_bus
1561             .insert(
1562                 ged_device.clone(),
1563                 ged_address.0,
1564                 devices::acpi::GED_DEVICE_ACPI_SIZE as u64,
1565             )
1566             .map_err(DeviceManagerError::BusError)?;
1567         self.bus_devices
1568             .push(Arc::clone(&ged_device) as Arc<Mutex<dyn BusDevice>>);
1569 
1570         let pm_timer_device = Arc::new(Mutex::new(devices::AcpiPmTimerDevice::new()));
1571 
1572         self.bus_devices
1573             .push(Arc::clone(&pm_timer_device) as Arc<Mutex<dyn BusDevice>>);
1574 
1575         #[cfg(target_arch = "x86_64")]
1576         {
1577             let pm_timer_pio_address: u16 = 0x608;
1578 
1579             self.address_manager
1580                 .allocator
1581                 .lock()
1582                 .unwrap()
1583                 .allocate_io_addresses(Some(GuestAddress(pm_timer_pio_address.into())), 0x4, None)
1584                 .ok_or(DeviceManagerError::AllocateIoPort)?;
1585 
1586             self.address_manager
1587                 .io_bus
1588                 .insert(pm_timer_device, pm_timer_pio_address.into(), 0x4)
1589                 .map_err(DeviceManagerError::BusError)?;
1590 
1591             self.acpi_platform_addresses.pm_timer_address =
1592                 Some(GenericAddress::io_port_address::<u32>(pm_timer_pio_address));
1593         }
1594 
1595         Ok(Some(ged_device))
1596     }
1597 
1598     #[cfg(target_arch = "x86_64")]
1599     fn add_legacy_devices(&mut self, reset_evt: EventFd) -> DeviceManagerResult<()> {
1600         let vcpus_kill_signalled = self
1601             .cpu_manager
1602             .lock()
1603             .unwrap()
1604             .vcpus_kill_signalled()
1605             .clone();
1606         // Add a shutdown device (i8042)
1607         let i8042 = Arc::new(Mutex::new(devices::legacy::I8042Device::new(
1608             reset_evt.try_clone().unwrap(),
1609             vcpus_kill_signalled.clone(),
1610         )));
1611 
1612         self.bus_devices
1613             .push(Arc::clone(&i8042) as Arc<Mutex<dyn BusDevice>>);
1614 
1615         self.address_manager
1616             .io_bus
1617             .insert(i8042, 0x61, 0x4)
1618             .map_err(DeviceManagerError::BusError)?;
1619         {
1620             // Add a CMOS emulated device
1621             let mem_size = self
1622                 .memory_manager
1623                 .lock()
1624                 .unwrap()
1625                 .guest_memory()
1626                 .memory()
1627                 .last_addr()
1628                 .0
1629                 + 1;
1630             let mem_below_4g = std::cmp::min(arch::layout::MEM_32BIT_RESERVED_START.0, mem_size);
1631             let mem_above_4g = mem_size.saturating_sub(arch::layout::RAM_64BIT_START.0);
1632 
1633             let cmos = Arc::new(Mutex::new(devices::legacy::Cmos::new(
1634                 mem_below_4g,
1635                 mem_above_4g,
1636                 reset_evt,
1637                 Some(vcpus_kill_signalled),
1638             )));
1639 
1640             self.bus_devices
1641                 .push(Arc::clone(&cmos) as Arc<Mutex<dyn BusDevice>>);
1642 
1643             self.address_manager
1644                 .io_bus
1645                 .insert(cmos, 0x70, 0x2)
1646                 .map_err(DeviceManagerError::BusError)?;
1647 
1648             let fwdebug = Arc::new(Mutex::new(devices::legacy::FwDebugDevice::new()));
1649 
1650             self.bus_devices
1651                 .push(Arc::clone(&fwdebug) as Arc<Mutex<dyn BusDevice>>);
1652 
1653             self.address_manager
1654                 .io_bus
1655                 .insert(fwdebug, 0x402, 0x1)
1656                 .map_err(DeviceManagerError::BusError)?;
1657         }
1658 
1659         // 0x80 debug port
1660         let debug_port = Arc::new(Mutex::new(devices::legacy::DebugPort::new(self.timestamp)));
1661         self.bus_devices
1662             .push(Arc::clone(&debug_port) as Arc<Mutex<dyn BusDevice>>);
1663         self.address_manager
1664             .io_bus
1665             .insert(debug_port, 0x80, 0x1)
1666             .map_err(DeviceManagerError::BusError)?;
1667 
1668         Ok(())
1669     }
1670 
1671     #[cfg(target_arch = "aarch64")]
1672     fn add_legacy_devices(
1673         &mut self,
1674         interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>,
1675     ) -> DeviceManagerResult<()> {
1676         // Add a RTC device
1677         let rtc_irq = self
1678             .address_manager
1679             .allocator
1680             .lock()
1681             .unwrap()
1682             .allocate_irq()
1683             .unwrap();
1684 
1685         let interrupt_group = interrupt_manager
1686             .create_group(LegacyIrqGroupConfig {
1687                 irq: rtc_irq as InterruptIndex,
1688             })
1689             .map_err(DeviceManagerError::CreateInterruptGroup)?;
1690 
1691         let rtc_device = Arc::new(Mutex::new(devices::legacy::Rtc::new(interrupt_group)));
1692 
1693         self.bus_devices
1694             .push(Arc::clone(&rtc_device) as Arc<Mutex<dyn BusDevice>>);
1695 
1696         let addr = arch::layout::LEGACY_RTC_MAPPED_IO_START;
1697 
1698         self.address_manager
1699             .mmio_bus
1700             .insert(rtc_device, addr.0, MMIO_LEN)
1701             .map_err(DeviceManagerError::BusError)?;
1702 
1703         self.id_to_dev_info.insert(
1704             (DeviceType::Rtc, "rtc".to_string()),
1705             MmioDeviceInfo {
1706                 addr: addr.0,
1707                 len: MMIO_LEN,
1708                 irq: rtc_irq,
1709             },
1710         );
1711 
1712         // Add a GPIO device
1713         let id = String::from(GPIO_DEVICE_NAME);
1714         let gpio_irq = self
1715             .address_manager
1716             .allocator
1717             .lock()
1718             .unwrap()
1719             .allocate_irq()
1720             .unwrap();
1721 
1722         let interrupt_group = interrupt_manager
1723             .create_group(LegacyIrqGroupConfig {
1724                 irq: gpio_irq as InterruptIndex,
1725             })
1726             .map_err(DeviceManagerError::CreateInterruptGroup)?;
1727 
1728         let gpio_device = Arc::new(Mutex::new(devices::legacy::Gpio::new(
1729             id.clone(),
1730             interrupt_group,
1731             versioned_state_from_id(self.snapshot.as_ref(), id.as_str())
1732                 .map_err(DeviceManagerError::RestoreGetState)?,
1733         )));
1734 
1735         self.bus_devices
1736             .push(Arc::clone(&gpio_device) as Arc<Mutex<dyn BusDevice>>);
1737 
1738         let addr = arch::layout::LEGACY_GPIO_MAPPED_IO_START;
1739 
1740         self.address_manager
1741             .mmio_bus
1742             .insert(gpio_device.clone(), addr.0, MMIO_LEN)
1743             .map_err(DeviceManagerError::BusError)?;
1744 
1745         self.gpio_device = Some(gpio_device.clone());
1746 
1747         self.id_to_dev_info.insert(
1748             (DeviceType::Gpio, "gpio".to_string()),
1749             MmioDeviceInfo {
1750                 addr: addr.0,
1751                 len: MMIO_LEN,
1752                 irq: gpio_irq,
1753             },
1754         );
1755 
1756         self.device_tree
1757             .lock()
1758             .unwrap()
1759             .insert(id.clone(), device_node!(id, gpio_device));
1760 
1761         Ok(())
1762     }
1763 
1764     #[cfg(target_arch = "x86_64")]
1765     fn add_serial_device(
1766         &mut self,
1767         interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>,
1768         serial_writer: Option<Box<dyn io::Write + Send>>,
1769     ) -> DeviceManagerResult<Arc<Mutex<Serial>>> {
1770         // Serial is tied to IRQ #4
1771         let serial_irq = 4;
1772 
1773         let id = String::from(SERIAL_DEVICE_NAME);
1774 
1775         let interrupt_group = interrupt_manager
1776             .create_group(LegacyIrqGroupConfig {
1777                 irq: serial_irq as InterruptIndex,
1778             })
1779             .map_err(DeviceManagerError::CreateInterruptGroup)?;
1780 
1781         let serial = Arc::new(Mutex::new(Serial::new(
1782             id.clone(),
1783             interrupt_group,
1784             serial_writer,
1785             versioned_state_from_id(self.snapshot.as_ref(), id.as_str())
1786                 .map_err(DeviceManagerError::RestoreGetState)?,
1787         )));
1788 
1789         self.bus_devices
1790             .push(Arc::clone(&serial) as Arc<Mutex<dyn BusDevice>>);
1791 
1792         self.address_manager
1793             .allocator
1794             .lock()
1795             .unwrap()
1796             .allocate_io_addresses(Some(GuestAddress(0x3f8)), 0x8, None)
1797             .ok_or(DeviceManagerError::AllocateIoPort)?;
1798 
1799         self.address_manager
1800             .io_bus
1801             .insert(serial.clone(), 0x3f8, 0x8)
1802             .map_err(DeviceManagerError::BusError)?;
1803 
1804         // Fill the device tree with a new node. In case of restore, we
1805         // know there is nothing to do, so we can simply override the
1806         // existing entry.
1807         self.device_tree
1808             .lock()
1809             .unwrap()
1810             .insert(id.clone(), device_node!(id, serial));
1811 
1812         Ok(serial)
1813     }
1814 
1815     #[cfg(target_arch = "aarch64")]
1816     fn add_serial_device(
1817         &mut self,
1818         interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>,
1819         serial_writer: Option<Box<dyn io::Write + Send>>,
1820     ) -> DeviceManagerResult<Arc<Mutex<Pl011>>> {
1821         let id = String::from(SERIAL_DEVICE_NAME);
1822 
1823         let serial_irq = self
1824             .address_manager
1825             .allocator
1826             .lock()
1827             .unwrap()
1828             .allocate_irq()
1829             .unwrap();
1830 
1831         let interrupt_group = interrupt_manager
1832             .create_group(LegacyIrqGroupConfig {
1833                 irq: serial_irq as InterruptIndex,
1834             })
1835             .map_err(DeviceManagerError::CreateInterruptGroup)?;
1836 
1837         let serial = Arc::new(Mutex::new(devices::legacy::Pl011::new(
1838             id.clone(),
1839             interrupt_group,
1840             serial_writer,
1841             self.timestamp,
1842             versioned_state_from_id(self.snapshot.as_ref(), id.as_str())
1843                 .map_err(DeviceManagerError::RestoreGetState)?,
1844         )));
1845 
1846         self.bus_devices
1847             .push(Arc::clone(&serial) as Arc<Mutex<dyn BusDevice>>);
1848 
1849         let addr = arch::layout::LEGACY_SERIAL_MAPPED_IO_START;
1850 
1851         self.address_manager
1852             .mmio_bus
1853             .insert(serial.clone(), addr.0, MMIO_LEN)
1854             .map_err(DeviceManagerError::BusError)?;
1855 
1856         self.id_to_dev_info.insert(
1857             (DeviceType::Serial, DeviceType::Serial.to_string()),
1858             MmioDeviceInfo {
1859                 addr: addr.0,
1860                 len: MMIO_LEN,
1861                 irq: serial_irq,
1862             },
1863         );
1864 
1865         self.cmdline_additions
1866             .push(format!("earlycon=pl011,mmio,0x{:08x}", addr.0));
1867 
1868         // Fill the device tree with a new node. In case of restore, we
1869         // know there is nothing to do, so we can simply override the
1870         // existing entry.
1871         self.device_tree
1872             .lock()
1873             .unwrap()
1874             .insert(id.clone(), device_node!(id, serial));
1875 
1876         Ok(serial)
1877     }
1878 
1879     fn modify_mode<F: FnOnce(&mut termios)>(
1880         &mut self,
1881         fd: RawFd,
1882         f: F,
1883     ) -> vmm_sys_util::errno::Result<()> {
1884         // SAFETY: safe because we check the return value of isatty.
1885         if unsafe { isatty(fd) } != 1 {
1886             return Ok(());
1887         }
1888 
1889         // SAFETY: The following pair are safe because termios gets totally overwritten by tcgetattr
1890         // and we check the return result.
1891         let mut termios: termios = unsafe { zeroed() };
1892         // SAFETY: see above
1893         let ret = unsafe { tcgetattr(fd, &mut termios as *mut _) };
1894         if ret < 0 {
1895             return vmm_sys_util::errno::errno_result();
1896         }
1897         let mut original_termios_opt = self.original_termios_opt.lock().unwrap();
1898         if original_termios_opt.is_none() {
1899             *original_termios_opt = Some(termios);
1900         }
1901         f(&mut termios);
1902         // SAFETY: Safe because the syscall will only read the extent of termios and we check
1903         // the return result.
1904         let ret = unsafe { tcsetattr(fd, TCSANOW, &termios as *const _) };
1905         if ret < 0 {
1906             return vmm_sys_util::errno::errno_result();
1907         }
1908 
1909         Ok(())
1910     }
1911 
1912     fn set_raw_mode(&mut self, f: &dyn AsRawFd) -> vmm_sys_util::errno::Result<()> {
1913         // SAFETY: FFI call. Variable t is guaranteed to be a valid termios from modify_mode.
1914         self.modify_mode(f.as_raw_fd(), |t| unsafe { cfmakeraw(t) })
1915     }
1916 
1917     fn listen_for_sigwinch_on_tty(&mut self, pty_sub: File) -> std::io::Result<()> {
1918         let seccomp_filter = get_seccomp_filter(
1919             &self.seccomp_action,
1920             Thread::PtyForeground,
1921             self.hypervisor_type,
1922         )
1923         .unwrap();
1924 
1925         self.console_resize_pipe =
1926             Some(Arc::new(start_sigwinch_listener(seccomp_filter, pty_sub)?));
1927 
1928         Ok(())
1929     }
1930 
1931     fn add_virtio_console_device(
1932         &mut self,
1933         virtio_devices: &mut Vec<MetaVirtioDevice>,
1934         console_pty: Option<PtyPair>,
1935         resize_pipe: Option<File>,
1936     ) -> DeviceManagerResult<Option<Arc<virtio_devices::ConsoleResizer>>> {
1937         let console_config = self.config.lock().unwrap().console.clone();
1938         let endpoint = match console_config.mode {
1939             ConsoleOutputMode::File => {
1940                 let file = File::create(console_config.file.as_ref().unwrap())
1941                     .map_err(DeviceManagerError::ConsoleOutputFileOpen)?;
1942                 Endpoint::File(file)
1943             }
1944             ConsoleOutputMode::Pty => {
1945                 if let Some(pty) = console_pty {
1946                     self.config.lock().unwrap().console.file = Some(pty.path.clone());
1947                     let file = pty.main.try_clone().unwrap();
1948                     self.console_pty = Some(Arc::new(Mutex::new(pty)));
1949                     self.console_resize_pipe = resize_pipe.map(Arc::new);
1950                     Endpoint::PtyPair(file.try_clone().unwrap(), file)
1951                 } else {
1952                     let (main, sub, path) =
1953                         create_pty().map_err(DeviceManagerError::ConsolePtyOpen)?;
1954                     self.set_raw_mode(&sub)
1955                         .map_err(DeviceManagerError::SetPtyRaw)?;
1956                     self.config.lock().unwrap().console.file = Some(path.clone());
1957                     let file = main.try_clone().unwrap();
1958                     assert!(resize_pipe.is_none());
1959                     self.listen_for_sigwinch_on_tty(sub).unwrap();
1960                     self.console_pty = Some(Arc::new(Mutex::new(PtyPair { main, path })));
1961                     Endpoint::PtyPair(file.try_clone().unwrap(), file)
1962                 }
1963             }
1964             ConsoleOutputMode::Tty => {
1965                 // Duplicating the file descriptors like this is needed as otherwise
1966                 // they will be closed on a reboot and the numbers reused
1967 
1968                 // SAFETY: FFI call to dup. Trivially safe.
1969                 let stdout = unsafe { libc::dup(libc::STDOUT_FILENO) };
1970                 if stdout == -1 {
1971                     return vmm_sys_util::errno::errno_result().map_err(DeviceManagerError::DupFd);
1972                 }
1973                 // SAFETY: stdout is valid and owned solely by us.
1974                 let stdout = unsafe { File::from_raw_fd(stdout) };
1975 
1976                 // Make sure stdout is in raw mode, if it's a terminal.
1977                 let _ = self.set_raw_mode(&stdout);
1978 
1979                 // SAFETY: FFI call. Trivially safe.
1980                 if unsafe { libc::isatty(libc::STDOUT_FILENO) } == 1 {
1981                     self.listen_for_sigwinch_on_tty(stdout.try_clone().unwrap())
1982                         .unwrap();
1983                 }
1984 
1985                 // If an interactive TTY then we can accept input
1986                 // SAFETY: FFI call. Trivially safe.
1987                 if unsafe { libc::isatty(libc::STDIN_FILENO) == 1 } {
1988                     // SAFETY: FFI call to dup. Trivially safe.
1989                     let stdin = unsafe { libc::dup(libc::STDIN_FILENO) };
1990                     if stdin == -1 {
1991                         return vmm_sys_util::errno::errno_result()
1992                             .map_err(DeviceManagerError::DupFd);
1993                     }
1994                     // SAFETY: stdin is valid and owned solely by us.
1995                     let stdin = unsafe { File::from_raw_fd(stdin) };
1996 
1997                     Endpoint::FilePair(stdout, stdin)
1998                 } else {
1999                     Endpoint::File(stdout)
2000                 }
2001             }
2002             ConsoleOutputMode::Socket => {
2003                 return Err(DeviceManagerError::NoSocketOptionSupportForConsoleDevice);
2004             }
2005             ConsoleOutputMode::Null => Endpoint::Null,
2006             ConsoleOutputMode::Off => return Ok(None),
2007         };
2008         let id = String::from(CONSOLE_DEVICE_NAME);
2009 
2010         let (virtio_console_device, console_resizer) = virtio_devices::Console::new(
2011             id.clone(),
2012             endpoint,
2013             self.console_resize_pipe
2014                 .as_ref()
2015                 .map(|p| p.try_clone().unwrap()),
2016             self.force_iommu | console_config.iommu,
2017             self.seccomp_action.clone(),
2018             self.exit_evt
2019                 .try_clone()
2020                 .map_err(DeviceManagerError::EventFd)?,
2021             versioned_state_from_id(self.snapshot.as_ref(), id.as_str())
2022                 .map_err(DeviceManagerError::RestoreGetState)?,
2023         )
2024         .map_err(DeviceManagerError::CreateVirtioConsole)?;
2025         let virtio_console_device = Arc::new(Mutex::new(virtio_console_device));
2026         virtio_devices.push(MetaVirtioDevice {
2027             virtio_device: Arc::clone(&virtio_console_device)
2028                 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2029             iommu: console_config.iommu,
2030             id: id.clone(),
2031             pci_segment: 0,
2032             dma_handler: None,
2033         });
2034 
2035         // Fill the device tree with a new node. In case of restore, we
2036         // know there is nothing to do, so we can simply override the
2037         // existing entry.
2038         self.device_tree
2039             .lock()
2040             .unwrap()
2041             .insert(id.clone(), device_node!(id, virtio_console_device));
2042 
2043         // Only provide a resizer (for SIGWINCH handling) if the console is attached to the TTY
2044         Ok(if matches!(console_config.mode, ConsoleOutputMode::Tty) {
2045             Some(console_resizer)
2046         } else {
2047             None
2048         })
2049     }
2050 
2051     fn add_console_device(
2052         &mut self,
2053         interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>,
2054         virtio_devices: &mut Vec<MetaVirtioDevice>,
2055         serial_pty: Option<PtyPair>,
2056         console_pty: Option<PtyPair>,
2057         console_resize_pipe: Option<File>,
2058     ) -> DeviceManagerResult<Arc<Console>> {
2059         let serial_config = self.config.lock().unwrap().serial.clone();
2060         let serial_writer: Option<Box<dyn io::Write + Send>> = match serial_config.mode {
2061             ConsoleOutputMode::File => Some(Box::new(
2062                 File::create(serial_config.file.as_ref().unwrap())
2063                     .map_err(DeviceManagerError::SerialOutputFileOpen)?,
2064             )),
2065             ConsoleOutputMode::Pty => {
2066                 if let Some(pty) = serial_pty {
2067                     self.config.lock().unwrap().serial.file = Some(pty.path.clone());
2068                     self.serial_pty = Some(Arc::new(Mutex::new(pty)));
2069                 } else {
2070                     let (main, sub, path) =
2071                         create_pty().map_err(DeviceManagerError::SerialPtyOpen)?;
2072                     self.set_raw_mode(&sub)
2073                         .map_err(DeviceManagerError::SetPtyRaw)?;
2074                     self.config.lock().unwrap().serial.file = Some(path.clone());
2075                     self.serial_pty = Some(Arc::new(Mutex::new(PtyPair { main, path })));
2076                 }
2077                 None
2078             }
2079             ConsoleOutputMode::Tty => {
2080                 let out = stdout();
2081                 let _ = self.set_raw_mode(&out);
2082                 Some(Box::new(out))
2083             }
2084             ConsoleOutputMode::Off | ConsoleOutputMode::Null | ConsoleOutputMode::Socket => None,
2085         };
2086         if serial_config.mode != ConsoleOutputMode::Off {
2087             let serial = self.add_serial_device(interrupt_manager, serial_writer)?;
2088             self.serial_manager = match serial_config.mode {
2089                 ConsoleOutputMode::Pty | ConsoleOutputMode::Tty | ConsoleOutputMode::Socket => {
2090                     let serial_manager = SerialManager::new(
2091                         serial,
2092                         self.serial_pty.clone(),
2093                         serial_config.mode,
2094                         serial_config.socket,
2095                     )
2096                     .map_err(DeviceManagerError::CreateSerialManager)?;
2097                     if let Some(mut serial_manager) = serial_manager {
2098                         serial_manager
2099                             .start_thread(
2100                                 self.exit_evt
2101                                     .try_clone()
2102                                     .map_err(DeviceManagerError::EventFd)?,
2103                             )
2104                             .map_err(DeviceManagerError::SpawnSerialManager)?;
2105                         Some(Arc::new(serial_manager))
2106                     } else {
2107                         None
2108                     }
2109                 }
2110                 _ => None,
2111             };
2112         }
2113 
2114         let console_resizer =
2115             self.add_virtio_console_device(virtio_devices, console_pty, console_resize_pipe)?;
2116 
2117         Ok(Arc::new(Console { console_resizer }))
2118     }
2119 
2120     fn add_tpm_device(
2121         &mut self,
2122         tpm_path: PathBuf,
2123     ) -> DeviceManagerResult<Arc<Mutex<devices::tpm::Tpm>>> {
2124         // Create TPM Device
2125         let tpm = devices::tpm::Tpm::new(tpm_path.to_str().unwrap().to_string()).map_err(|e| {
2126             DeviceManagerError::CreateTpmDevice(anyhow!("Failed to create TPM Device : {:?}", e))
2127         })?;
2128         let tpm = Arc::new(Mutex::new(tpm));
2129 
2130         // Add TPM Device to mmio
2131         self.address_manager
2132             .mmio_bus
2133             .insert(
2134                 tpm.clone(),
2135                 arch::layout::TPM_START.0,
2136                 arch::layout::TPM_SIZE,
2137             )
2138             .map_err(DeviceManagerError::BusError)?;
2139 
2140         Ok(tpm)
2141     }
2142 
2143     fn make_virtio_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
2144         let mut devices: Vec<MetaVirtioDevice> = Vec::new();
2145 
2146         // Create "standard" virtio devices (net/block/rng)
2147         devices.append(&mut self.make_virtio_block_devices()?);
2148         devices.append(&mut self.make_virtio_net_devices()?);
2149         devices.append(&mut self.make_virtio_rng_devices()?);
2150 
2151         // Add virtio-fs if required
2152         devices.append(&mut self.make_virtio_fs_devices()?);
2153 
2154         // Add virtio-pmem if required
2155         devices.append(&mut self.make_virtio_pmem_devices()?);
2156 
2157         // Add virtio-vsock if required
2158         devices.append(&mut self.make_virtio_vsock_devices()?);
2159 
2160         devices.append(&mut self.make_virtio_mem_devices()?);
2161 
2162         // Add virtio-balloon if required
2163         devices.append(&mut self.make_virtio_balloon_devices()?);
2164 
2165         // Add virtio-watchdog device
2166         devices.append(&mut self.make_virtio_watchdog_devices()?);
2167 
2168         // Add vDPA devices if required
2169         devices.append(&mut self.make_vdpa_devices()?);
2170 
2171         Ok(devices)
2172     }
2173 
2174     // Cache whether io_uring is supported to avoid probing for very block device
2175     fn io_uring_is_supported(&mut self) -> bool {
2176         if let Some(supported) = self.io_uring_supported {
2177             return supported;
2178         }
2179 
2180         let supported = block_io_uring_is_supported();
2181         self.io_uring_supported = Some(supported);
2182         supported
2183     }
2184 
2185     fn make_virtio_block_device(
2186         &mut self,
2187         disk_cfg: &mut DiskConfig,
2188     ) -> DeviceManagerResult<MetaVirtioDevice> {
2189         let id = if let Some(id) = &disk_cfg.id {
2190             id.clone()
2191         } else {
2192             let id = self.next_device_name(DISK_DEVICE_NAME_PREFIX)?;
2193             disk_cfg.id = Some(id.clone());
2194             id
2195         };
2196 
2197         info!("Creating virtio-block device: {:?}", disk_cfg);
2198 
2199         let snapshot = snapshot_from_id(self.snapshot.as_ref(), id.as_str());
2200 
2201         let (virtio_device, migratable_device) = if disk_cfg.vhost_user {
2202             let socket = disk_cfg.vhost_socket.as_ref().unwrap().clone();
2203             let vu_cfg = VhostUserConfig {
2204                 socket,
2205                 num_queues: disk_cfg.num_queues,
2206                 queue_size: disk_cfg.queue_size,
2207             };
2208             let vhost_user_block = Arc::new(Mutex::new(
2209                 match virtio_devices::vhost_user::Blk::new(
2210                     id.clone(),
2211                     vu_cfg,
2212                     self.seccomp_action.clone(),
2213                     self.exit_evt
2214                         .try_clone()
2215                         .map_err(DeviceManagerError::EventFd)?,
2216                     self.force_iommu,
2217                     snapshot
2218                         .map(|s| s.to_versioned_state())
2219                         .transpose()
2220                         .map_err(DeviceManagerError::RestoreGetState)?,
2221                 ) {
2222                     Ok(vub_device) => vub_device,
2223                     Err(e) => {
2224                         return Err(DeviceManagerError::CreateVhostUserBlk(e));
2225                     }
2226                 },
2227             ));
2228 
2229             (
2230                 Arc::clone(&vhost_user_block) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2231                 vhost_user_block as Arc<Mutex<dyn Migratable>>,
2232             )
2233         } else {
2234             let mut options = OpenOptions::new();
2235             options.read(true);
2236             options.write(!disk_cfg.readonly);
2237             if disk_cfg.direct {
2238                 options.custom_flags(libc::O_DIRECT);
2239             }
2240             // Open block device path
2241             let mut file: File = options
2242                 .open(
2243                     disk_cfg
2244                         .path
2245                         .as_ref()
2246                         .ok_or(DeviceManagerError::NoDiskPath)?
2247                         .clone(),
2248                 )
2249                 .map_err(DeviceManagerError::Disk)?;
2250             let image_type =
2251                 detect_image_type(&mut file).map_err(DeviceManagerError::DetectImageType)?;
2252 
2253             let image = match image_type {
2254                 ImageType::FixedVhd => {
2255                     // Use asynchronous backend relying on io_uring if the
2256                     // syscalls are supported.
2257                     if cfg!(feature = "io_uring")
2258                         && !disk_cfg.disable_io_uring
2259                         && self.io_uring_is_supported()
2260                     {
2261                         info!("Using asynchronous fixed VHD disk file (io_uring)");
2262 
2263                         #[cfg(not(feature = "io_uring"))]
2264                         unreachable!("Checked in if statement above");
2265                         #[cfg(feature = "io_uring")]
2266                         {
2267                             Box::new(
2268                                 FixedVhdDiskAsync::new(file)
2269                                     .map_err(DeviceManagerError::CreateFixedVhdDiskAsync)?,
2270                             ) as Box<dyn DiskFile>
2271                         }
2272                     } else {
2273                         info!("Using synchronous fixed VHD disk file");
2274                         Box::new(
2275                             FixedVhdDiskSync::new(file)
2276                                 .map_err(DeviceManagerError::CreateFixedVhdDiskSync)?,
2277                         ) as Box<dyn DiskFile>
2278                     }
2279                 }
2280                 ImageType::Raw => {
2281                     // Use asynchronous backend relying on io_uring if the
2282                     // syscalls are supported.
2283                     if cfg!(feature = "io_uring")
2284                         && !disk_cfg.disable_io_uring
2285                         && self.io_uring_is_supported()
2286                     {
2287                         info!("Using asynchronous RAW disk file (io_uring)");
2288 
2289                         #[cfg(not(feature = "io_uring"))]
2290                         unreachable!("Checked in if statement above");
2291                         #[cfg(feature = "io_uring")]
2292                         {
2293                             Box::new(RawFileDisk::new(file)) as Box<dyn DiskFile>
2294                         }
2295                     } else {
2296                         info!("Using synchronous RAW disk file");
2297                         Box::new(RawFileDiskSync::new(file)) as Box<dyn DiskFile>
2298                     }
2299                 }
2300                 ImageType::Qcow2 => {
2301                     info!("Using synchronous QCOW disk file");
2302                     Box::new(
2303                         QcowDiskSync::new(file, disk_cfg.direct)
2304                             .map_err(DeviceManagerError::CreateQcowDiskSync)?,
2305                     ) as Box<dyn DiskFile>
2306                 }
2307                 ImageType::Vhdx => {
2308                     info!("Using synchronous VHDX disk file");
2309                     Box::new(
2310                         VhdxDiskSync::new(file)
2311                             .map_err(DeviceManagerError::CreateFixedVhdxDiskSync)?,
2312                     ) as Box<dyn DiskFile>
2313                 }
2314             };
2315 
2316             let virtio_block = Arc::new(Mutex::new(
2317                 virtio_devices::Block::new(
2318                     id.clone(),
2319                     image,
2320                     disk_cfg
2321                         .path
2322                         .as_ref()
2323                         .ok_or(DeviceManagerError::NoDiskPath)?
2324                         .clone(),
2325                     disk_cfg.readonly,
2326                     self.force_iommu | disk_cfg.iommu,
2327                     disk_cfg.num_queues,
2328                     disk_cfg.queue_size,
2329                     disk_cfg.serial.clone(),
2330                     self.seccomp_action.clone(),
2331                     disk_cfg.rate_limiter_config,
2332                     self.exit_evt
2333                         .try_clone()
2334                         .map_err(DeviceManagerError::EventFd)?,
2335                     snapshot
2336                         .map(|s| s.to_versioned_state())
2337                         .transpose()
2338                         .map_err(DeviceManagerError::RestoreGetState)?,
2339                 )
2340                 .map_err(DeviceManagerError::CreateVirtioBlock)?,
2341             ));
2342 
2343             (
2344                 Arc::clone(&virtio_block) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2345                 virtio_block as Arc<Mutex<dyn Migratable>>,
2346             )
2347         };
2348 
2349         // Fill the device tree with a new node. In case of restore, we
2350         // know there is nothing to do, so we can simply override the
2351         // existing entry.
2352         self.device_tree
2353             .lock()
2354             .unwrap()
2355             .insert(id.clone(), device_node!(id, migratable_device));
2356 
2357         Ok(MetaVirtioDevice {
2358             virtio_device,
2359             iommu: disk_cfg.iommu,
2360             id,
2361             pci_segment: disk_cfg.pci_segment,
2362             dma_handler: None,
2363         })
2364     }
2365 
2366     fn make_virtio_block_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
2367         let mut devices = Vec::new();
2368 
2369         let mut block_devices = self.config.lock().unwrap().disks.clone();
2370         if let Some(disk_list_cfg) = &mut block_devices {
2371             for disk_cfg in disk_list_cfg.iter_mut() {
2372                 devices.push(self.make_virtio_block_device(disk_cfg)?);
2373             }
2374         }
2375         self.config.lock().unwrap().disks = block_devices;
2376 
2377         Ok(devices)
2378     }
2379 
2380     fn make_virtio_net_device(
2381         &mut self,
2382         net_cfg: &mut NetConfig,
2383     ) -> DeviceManagerResult<MetaVirtioDevice> {
2384         let id = if let Some(id) = &net_cfg.id {
2385             id.clone()
2386         } else {
2387             let id = self.next_device_name(NET_DEVICE_NAME_PREFIX)?;
2388             net_cfg.id = Some(id.clone());
2389             id
2390         };
2391         info!("Creating virtio-net device: {:?}", net_cfg);
2392 
2393         let snapshot = snapshot_from_id(self.snapshot.as_ref(), id.as_str());
2394 
2395         let (virtio_device, migratable_device) = if net_cfg.vhost_user {
2396             let socket = net_cfg.vhost_socket.as_ref().unwrap().clone();
2397             let vu_cfg = VhostUserConfig {
2398                 socket,
2399                 num_queues: net_cfg.num_queues,
2400                 queue_size: net_cfg.queue_size,
2401             };
2402             let server = match net_cfg.vhost_mode {
2403                 VhostMode::Client => false,
2404                 VhostMode::Server => true,
2405             };
2406             let vhost_user_net = Arc::new(Mutex::new(
2407                 match virtio_devices::vhost_user::Net::new(
2408                     id.clone(),
2409                     net_cfg.mac,
2410                     net_cfg.mtu,
2411                     vu_cfg,
2412                     server,
2413                     self.seccomp_action.clone(),
2414                     self.exit_evt
2415                         .try_clone()
2416                         .map_err(DeviceManagerError::EventFd)?,
2417                     self.force_iommu,
2418                     snapshot
2419                         .map(|s| s.to_versioned_state())
2420                         .transpose()
2421                         .map_err(DeviceManagerError::RestoreGetState)?,
2422                     net_cfg.offload_tso,
2423                     net_cfg.offload_ufo,
2424                     net_cfg.offload_csum,
2425                 ) {
2426                     Ok(vun_device) => vun_device,
2427                     Err(e) => {
2428                         return Err(DeviceManagerError::CreateVhostUserNet(e));
2429                     }
2430                 },
2431             ));
2432 
2433             (
2434                 Arc::clone(&vhost_user_net) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2435                 vhost_user_net as Arc<Mutex<dyn Migratable>>,
2436             )
2437         } else {
2438             let state = snapshot
2439                 .map(|s| s.to_versioned_state())
2440                 .transpose()
2441                 .map_err(DeviceManagerError::RestoreGetState)?;
2442 
2443             let virtio_net = if let Some(ref tap_if_name) = net_cfg.tap {
2444                 Arc::new(Mutex::new(
2445                     virtio_devices::Net::new(
2446                         id.clone(),
2447                         Some(tap_if_name),
2448                         None,
2449                         None,
2450                         Some(net_cfg.mac),
2451                         &mut net_cfg.host_mac,
2452                         net_cfg.mtu,
2453                         self.force_iommu | net_cfg.iommu,
2454                         net_cfg.num_queues,
2455                         net_cfg.queue_size,
2456                         self.seccomp_action.clone(),
2457                         net_cfg.rate_limiter_config,
2458                         self.exit_evt
2459                             .try_clone()
2460                             .map_err(DeviceManagerError::EventFd)?,
2461                         state,
2462                         net_cfg.offload_tso,
2463                         net_cfg.offload_ufo,
2464                         net_cfg.offload_csum,
2465                     )
2466                     .map_err(DeviceManagerError::CreateVirtioNet)?,
2467                 ))
2468             } else if let Some(fds) = &net_cfg.fds {
2469                 let net = virtio_devices::Net::from_tap_fds(
2470                     id.clone(),
2471                     fds,
2472                     Some(net_cfg.mac),
2473                     net_cfg.mtu,
2474                     self.force_iommu | net_cfg.iommu,
2475                     net_cfg.queue_size,
2476                     self.seccomp_action.clone(),
2477                     net_cfg.rate_limiter_config,
2478                     self.exit_evt
2479                         .try_clone()
2480                         .map_err(DeviceManagerError::EventFd)?,
2481                     state,
2482                     net_cfg.offload_tso,
2483                     net_cfg.offload_ufo,
2484                     net_cfg.offload_csum,
2485                 )
2486                 .map_err(DeviceManagerError::CreateVirtioNet)?;
2487 
2488                 // SAFETY: 'fds' are valid because TAP devices are created successfully
2489                 unsafe {
2490                     self.config.lock().unwrap().add_preserved_fds(fds.clone());
2491                 }
2492 
2493                 Arc::new(Mutex::new(net))
2494             } else {
2495                 Arc::new(Mutex::new(
2496                     virtio_devices::Net::new(
2497                         id.clone(),
2498                         None,
2499                         Some(net_cfg.ip),
2500                         Some(net_cfg.mask),
2501                         Some(net_cfg.mac),
2502                         &mut net_cfg.host_mac,
2503                         net_cfg.mtu,
2504                         self.force_iommu | net_cfg.iommu,
2505                         net_cfg.num_queues,
2506                         net_cfg.queue_size,
2507                         self.seccomp_action.clone(),
2508                         net_cfg.rate_limiter_config,
2509                         self.exit_evt
2510                             .try_clone()
2511                             .map_err(DeviceManagerError::EventFd)?,
2512                         state,
2513                         net_cfg.offload_tso,
2514                         net_cfg.offload_ufo,
2515                         net_cfg.offload_csum,
2516                     )
2517                     .map_err(DeviceManagerError::CreateVirtioNet)?,
2518                 ))
2519             };
2520 
2521             (
2522                 Arc::clone(&virtio_net) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2523                 virtio_net as Arc<Mutex<dyn Migratable>>,
2524             )
2525         };
2526 
2527         // Fill the device tree with a new node. In case of restore, we
2528         // know there is nothing to do, so we can simply override the
2529         // existing entry.
2530         self.device_tree
2531             .lock()
2532             .unwrap()
2533             .insert(id.clone(), device_node!(id, migratable_device));
2534 
2535         Ok(MetaVirtioDevice {
2536             virtio_device,
2537             iommu: net_cfg.iommu,
2538             id,
2539             pci_segment: net_cfg.pci_segment,
2540             dma_handler: None,
2541         })
2542     }
2543 
2544     /// Add virto-net and vhost-user-net devices
2545     fn make_virtio_net_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
2546         let mut devices = Vec::new();
2547         let mut net_devices = self.config.lock().unwrap().net.clone();
2548         if let Some(net_list_cfg) = &mut net_devices {
2549             for net_cfg in net_list_cfg.iter_mut() {
2550                 devices.push(self.make_virtio_net_device(net_cfg)?);
2551             }
2552         }
2553         self.config.lock().unwrap().net = net_devices;
2554 
2555         Ok(devices)
2556     }
2557 
2558     fn make_virtio_rng_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
2559         let mut devices = Vec::new();
2560 
2561         // Add virtio-rng if required
2562         let rng_config = self.config.lock().unwrap().rng.clone();
2563         if let Some(rng_path) = rng_config.src.to_str() {
2564             info!("Creating virtio-rng device: {:?}", rng_config);
2565             let id = String::from(RNG_DEVICE_NAME);
2566 
2567             let virtio_rng_device = Arc::new(Mutex::new(
2568                 virtio_devices::Rng::new(
2569                     id.clone(),
2570                     rng_path,
2571                     self.force_iommu | rng_config.iommu,
2572                     self.seccomp_action.clone(),
2573                     self.exit_evt
2574                         .try_clone()
2575                         .map_err(DeviceManagerError::EventFd)?,
2576                     versioned_state_from_id(self.snapshot.as_ref(), id.as_str())
2577                         .map_err(DeviceManagerError::RestoreGetState)?,
2578                 )
2579                 .map_err(DeviceManagerError::CreateVirtioRng)?,
2580             ));
2581             devices.push(MetaVirtioDevice {
2582                 virtio_device: Arc::clone(&virtio_rng_device)
2583                     as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2584                 iommu: rng_config.iommu,
2585                 id: id.clone(),
2586                 pci_segment: 0,
2587                 dma_handler: None,
2588             });
2589 
2590             // Fill the device tree with a new node. In case of restore, we
2591             // know there is nothing to do, so we can simply override the
2592             // existing entry.
2593             self.device_tree
2594                 .lock()
2595                 .unwrap()
2596                 .insert(id.clone(), device_node!(id, virtio_rng_device));
2597         }
2598 
2599         Ok(devices)
2600     }
2601 
2602     fn make_virtio_fs_device(
2603         &mut self,
2604         fs_cfg: &mut FsConfig,
2605     ) -> DeviceManagerResult<MetaVirtioDevice> {
2606         let id = if let Some(id) = &fs_cfg.id {
2607             id.clone()
2608         } else {
2609             let id = self.next_device_name(FS_DEVICE_NAME_PREFIX)?;
2610             fs_cfg.id = Some(id.clone());
2611             id
2612         };
2613 
2614         info!("Creating virtio-fs device: {:?}", fs_cfg);
2615 
2616         let mut node = device_node!(id);
2617 
2618         if let Some(fs_socket) = fs_cfg.socket.to_str() {
2619             let virtio_fs_device = Arc::new(Mutex::new(
2620                 virtio_devices::vhost_user::Fs::new(
2621                     id.clone(),
2622                     fs_socket,
2623                     &fs_cfg.tag,
2624                     fs_cfg.num_queues,
2625                     fs_cfg.queue_size,
2626                     None,
2627                     self.seccomp_action.clone(),
2628                     self.exit_evt
2629                         .try_clone()
2630                         .map_err(DeviceManagerError::EventFd)?,
2631                     self.force_iommu,
2632                     versioned_state_from_id(self.snapshot.as_ref(), id.as_str())
2633                         .map_err(DeviceManagerError::RestoreGetState)?,
2634                 )
2635                 .map_err(DeviceManagerError::CreateVirtioFs)?,
2636             ));
2637 
2638             // Update the device tree with the migratable device.
2639             node.migratable = Some(Arc::clone(&virtio_fs_device) as Arc<Mutex<dyn Migratable>>);
2640             self.device_tree.lock().unwrap().insert(id.clone(), node);
2641 
2642             Ok(MetaVirtioDevice {
2643                 virtio_device: Arc::clone(&virtio_fs_device)
2644                     as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2645                 iommu: false,
2646                 id,
2647                 pci_segment: fs_cfg.pci_segment,
2648                 dma_handler: None,
2649             })
2650         } else {
2651             Err(DeviceManagerError::NoVirtioFsSock)
2652         }
2653     }
2654 
2655     fn make_virtio_fs_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
2656         let mut devices = Vec::new();
2657 
2658         let mut fs_devices = self.config.lock().unwrap().fs.clone();
2659         if let Some(fs_list_cfg) = &mut fs_devices {
2660             for fs_cfg in fs_list_cfg.iter_mut() {
2661                 devices.push(self.make_virtio_fs_device(fs_cfg)?);
2662             }
2663         }
2664         self.config.lock().unwrap().fs = fs_devices;
2665 
2666         Ok(devices)
2667     }
2668 
2669     fn make_virtio_pmem_device(
2670         &mut self,
2671         pmem_cfg: &mut PmemConfig,
2672     ) -> DeviceManagerResult<MetaVirtioDevice> {
2673         let id = if let Some(id) = &pmem_cfg.id {
2674             id.clone()
2675         } else {
2676             let id = self.next_device_name(PMEM_DEVICE_NAME_PREFIX)?;
2677             pmem_cfg.id = Some(id.clone());
2678             id
2679         };
2680 
2681         info!("Creating virtio-pmem device: {:?}", pmem_cfg);
2682 
2683         let mut node = device_node!(id);
2684 
2685         // Look for the id in the device tree. If it can be found, that means
2686         // the device is being restored, otherwise it's created from scratch.
2687         let region_range = if let Some(node) = self.device_tree.lock().unwrap().get(&id) {
2688             info!("Restoring virtio-pmem {} resources", id);
2689 
2690             let mut region_range: Option<(u64, u64)> = None;
2691             for resource in node.resources.iter() {
2692                 match resource {
2693                     Resource::MmioAddressRange { base, size } => {
2694                         if region_range.is_some() {
2695                             return Err(DeviceManagerError::ResourceAlreadyExists);
2696                         }
2697 
2698                         region_range = Some((*base, *size));
2699                     }
2700                     _ => {
2701                         error!("Unexpected resource {:?} for {}", resource, id);
2702                     }
2703                 }
2704             }
2705 
2706             if region_range.is_none() {
2707                 return Err(DeviceManagerError::MissingVirtioPmemResources);
2708             }
2709 
2710             region_range
2711         } else {
2712             None
2713         };
2714 
2715         let (custom_flags, set_len) = if pmem_cfg.file.is_dir() {
2716             if pmem_cfg.size.is_none() {
2717                 return Err(DeviceManagerError::PmemWithDirectorySizeMissing);
2718             }
2719             (O_TMPFILE, true)
2720         } else {
2721             (0, false)
2722         };
2723 
2724         let mut file = OpenOptions::new()
2725             .read(true)
2726             .write(!pmem_cfg.discard_writes)
2727             .custom_flags(custom_flags)
2728             .open(&pmem_cfg.file)
2729             .map_err(DeviceManagerError::PmemFileOpen)?;
2730 
2731         let size = if let Some(size) = pmem_cfg.size {
2732             if set_len {
2733                 file.set_len(size)
2734                     .map_err(DeviceManagerError::PmemFileSetLen)?;
2735             }
2736             size
2737         } else {
2738             file.seek(SeekFrom::End(0))
2739                 .map_err(DeviceManagerError::PmemFileSetLen)?
2740         };
2741 
2742         if size % 0x20_0000 != 0 {
2743             return Err(DeviceManagerError::PmemSizeNotAligned);
2744         }
2745 
2746         let (region_base, region_size) = if let Some((base, size)) = region_range {
2747             // The memory needs to be 2MiB aligned in order to support
2748             // hugepages.
2749             self.pci_segments[pmem_cfg.pci_segment as usize]
2750                 .allocator
2751                 .lock()
2752                 .unwrap()
2753                 .allocate(
2754                     Some(GuestAddress(base)),
2755                     size as GuestUsize,
2756                     Some(0x0020_0000),
2757                 )
2758                 .ok_or(DeviceManagerError::PmemRangeAllocation)?;
2759 
2760             (base, size)
2761         } else {
2762             // The memory needs to be 2MiB aligned in order to support
2763             // hugepages.
2764             let base = self.pci_segments[pmem_cfg.pci_segment as usize]
2765                 .allocator
2766                 .lock()
2767                 .unwrap()
2768                 .allocate(None, size as GuestUsize, Some(0x0020_0000))
2769                 .ok_or(DeviceManagerError::PmemRangeAllocation)?;
2770 
2771             (base.raw_value(), size)
2772         };
2773 
2774         let cloned_file = file.try_clone().map_err(DeviceManagerError::CloneFile)?;
2775         let mmap_region = MmapRegion::build(
2776             Some(FileOffset::new(cloned_file, 0)),
2777             region_size as usize,
2778             PROT_READ | PROT_WRITE,
2779             MAP_NORESERVE
2780                 | if pmem_cfg.discard_writes {
2781                     MAP_PRIVATE
2782                 } else {
2783                     MAP_SHARED
2784                 },
2785         )
2786         .map_err(DeviceManagerError::NewMmapRegion)?;
2787         let host_addr: u64 = mmap_region.as_ptr() as u64;
2788 
2789         let mem_slot = self
2790             .memory_manager
2791             .lock()
2792             .unwrap()
2793             .create_userspace_mapping(region_base, region_size, host_addr, false, false, false)
2794             .map_err(DeviceManagerError::MemoryManager)?;
2795 
2796         let mapping = virtio_devices::UserspaceMapping {
2797             host_addr,
2798             mem_slot,
2799             addr: GuestAddress(region_base),
2800             len: region_size,
2801             mergeable: false,
2802         };
2803 
2804         let virtio_pmem_device = Arc::new(Mutex::new(
2805             virtio_devices::Pmem::new(
2806                 id.clone(),
2807                 file,
2808                 GuestAddress(region_base),
2809                 mapping,
2810                 mmap_region,
2811                 self.force_iommu | pmem_cfg.iommu,
2812                 self.seccomp_action.clone(),
2813                 self.exit_evt
2814                     .try_clone()
2815                     .map_err(DeviceManagerError::EventFd)?,
2816                 versioned_state_from_id(self.snapshot.as_ref(), id.as_str())
2817                     .map_err(DeviceManagerError::RestoreGetState)?,
2818             )
2819             .map_err(DeviceManagerError::CreateVirtioPmem)?,
2820         ));
2821 
2822         // Update the device tree with correct resource information and with
2823         // the migratable device.
2824         node.resources.push(Resource::MmioAddressRange {
2825             base: region_base,
2826             size: region_size,
2827         });
2828         node.migratable = Some(Arc::clone(&virtio_pmem_device) as Arc<Mutex<dyn Migratable>>);
2829         self.device_tree.lock().unwrap().insert(id.clone(), node);
2830 
2831         Ok(MetaVirtioDevice {
2832             virtio_device: Arc::clone(&virtio_pmem_device)
2833                 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2834             iommu: pmem_cfg.iommu,
2835             id,
2836             pci_segment: pmem_cfg.pci_segment,
2837             dma_handler: None,
2838         })
2839     }
2840 
2841     fn make_virtio_pmem_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
2842         let mut devices = Vec::new();
2843         // Add virtio-pmem if required
2844         let mut pmem_devices = self.config.lock().unwrap().pmem.clone();
2845         if let Some(pmem_list_cfg) = &mut pmem_devices {
2846             for pmem_cfg in pmem_list_cfg.iter_mut() {
2847                 devices.push(self.make_virtio_pmem_device(pmem_cfg)?);
2848             }
2849         }
2850         self.config.lock().unwrap().pmem = pmem_devices;
2851 
2852         Ok(devices)
2853     }
2854 
2855     fn make_virtio_vsock_device(
2856         &mut self,
2857         vsock_cfg: &mut VsockConfig,
2858     ) -> DeviceManagerResult<MetaVirtioDevice> {
2859         let id = if let Some(id) = &vsock_cfg.id {
2860             id.clone()
2861         } else {
2862             let id = self.next_device_name(VSOCK_DEVICE_NAME_PREFIX)?;
2863             vsock_cfg.id = Some(id.clone());
2864             id
2865         };
2866 
2867         info!("Creating virtio-vsock device: {:?}", vsock_cfg);
2868 
2869         let socket_path = vsock_cfg
2870             .socket
2871             .to_str()
2872             .ok_or(DeviceManagerError::CreateVsockConvertPath)?;
2873         let backend =
2874             virtio_devices::vsock::VsockUnixBackend::new(vsock_cfg.cid, socket_path.to_string())
2875                 .map_err(DeviceManagerError::CreateVsockBackend)?;
2876 
2877         let vsock_device = Arc::new(Mutex::new(
2878             virtio_devices::Vsock::new(
2879                 id.clone(),
2880                 vsock_cfg.cid,
2881                 vsock_cfg.socket.clone(),
2882                 backend,
2883                 self.force_iommu | vsock_cfg.iommu,
2884                 self.seccomp_action.clone(),
2885                 self.exit_evt
2886                     .try_clone()
2887                     .map_err(DeviceManagerError::EventFd)?,
2888                 versioned_state_from_id(self.snapshot.as_ref(), id.as_str())
2889                     .map_err(DeviceManagerError::RestoreGetState)?,
2890             )
2891             .map_err(DeviceManagerError::CreateVirtioVsock)?,
2892         ));
2893 
2894         // Fill the device tree with a new node. In case of restore, we
2895         // know there is nothing to do, so we can simply override the
2896         // existing entry.
2897         self.device_tree
2898             .lock()
2899             .unwrap()
2900             .insert(id.clone(), device_node!(id, vsock_device));
2901 
2902         Ok(MetaVirtioDevice {
2903             virtio_device: Arc::clone(&vsock_device)
2904                 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2905             iommu: vsock_cfg.iommu,
2906             id,
2907             pci_segment: vsock_cfg.pci_segment,
2908             dma_handler: None,
2909         })
2910     }
2911 
2912     fn make_virtio_vsock_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
2913         let mut devices = Vec::new();
2914 
2915         let mut vsock = self.config.lock().unwrap().vsock.clone();
2916         if let Some(ref mut vsock_cfg) = &mut vsock {
2917             devices.push(self.make_virtio_vsock_device(vsock_cfg)?);
2918         }
2919         self.config.lock().unwrap().vsock = vsock;
2920 
2921         Ok(devices)
2922     }
2923 
2924     fn make_virtio_mem_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
2925         let mut devices = Vec::new();
2926 
2927         let mm = self.memory_manager.clone();
2928         let mut mm = mm.lock().unwrap();
2929         for (memory_zone_id, memory_zone) in mm.memory_zones_mut().iter_mut() {
2930             if let Some(virtio_mem_zone) = memory_zone.virtio_mem_zone_mut() {
2931                 info!("Creating virtio-mem device: id = {}", memory_zone_id);
2932 
2933                 let node_id = numa_node_id_from_memory_zone_id(&self.numa_nodes, memory_zone_id)
2934                     .map(|i| i as u16);
2935 
2936                 let virtio_mem_device = Arc::new(Mutex::new(
2937                     virtio_devices::Mem::new(
2938                         memory_zone_id.clone(),
2939                         virtio_mem_zone.region(),
2940                         self.seccomp_action.clone(),
2941                         node_id,
2942                         virtio_mem_zone.hotplugged_size(),
2943                         virtio_mem_zone.hugepages(),
2944                         self.exit_evt
2945                             .try_clone()
2946                             .map_err(DeviceManagerError::EventFd)?,
2947                         virtio_mem_zone.blocks_state().clone(),
2948                         versioned_state_from_id(self.snapshot.as_ref(), memory_zone_id.as_str())
2949                             .map_err(DeviceManagerError::RestoreGetState)?,
2950                     )
2951                     .map_err(DeviceManagerError::CreateVirtioMem)?,
2952                 ));
2953 
2954                 // Update the virtio-mem zone so that it has a handle onto the
2955                 // virtio-mem device, which will be used for triggering a resize
2956                 // if needed.
2957                 virtio_mem_zone.set_virtio_device(Arc::clone(&virtio_mem_device));
2958 
2959                 self.virtio_mem_devices.push(Arc::clone(&virtio_mem_device));
2960 
2961                 devices.push(MetaVirtioDevice {
2962                     virtio_device: Arc::clone(&virtio_mem_device)
2963                         as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2964                     iommu: false,
2965                     id: memory_zone_id.clone(),
2966                     pci_segment: 0,
2967                     dma_handler: None,
2968                 });
2969 
2970                 // Fill the device tree with a new node. In case of restore, we
2971                 // know there is nothing to do, so we can simply override the
2972                 // existing entry.
2973                 self.device_tree.lock().unwrap().insert(
2974                     memory_zone_id.clone(),
2975                     device_node!(memory_zone_id, virtio_mem_device),
2976                 );
2977             }
2978         }
2979 
2980         Ok(devices)
2981     }
2982 
2983     fn make_virtio_balloon_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
2984         let mut devices = Vec::new();
2985 
2986         if let Some(balloon_config) = &self.config.lock().unwrap().balloon {
2987             let id = String::from(BALLOON_DEVICE_NAME);
2988             info!("Creating virtio-balloon device: id = {}", id);
2989 
2990             let virtio_balloon_device = Arc::new(Mutex::new(
2991                 virtio_devices::Balloon::new(
2992                     id.clone(),
2993                     balloon_config.size,
2994                     balloon_config.deflate_on_oom,
2995                     balloon_config.free_page_reporting,
2996                     self.seccomp_action.clone(),
2997                     self.exit_evt
2998                         .try_clone()
2999                         .map_err(DeviceManagerError::EventFd)?,
3000                     versioned_state_from_id(self.snapshot.as_ref(), id.as_str())
3001                         .map_err(DeviceManagerError::RestoreGetState)?,
3002                 )
3003                 .map_err(DeviceManagerError::CreateVirtioBalloon)?,
3004             ));
3005 
3006             self.balloon = Some(virtio_balloon_device.clone());
3007 
3008             devices.push(MetaVirtioDevice {
3009                 virtio_device: Arc::clone(&virtio_balloon_device)
3010                     as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
3011                 iommu: false,
3012                 id: id.clone(),
3013                 pci_segment: 0,
3014                 dma_handler: None,
3015             });
3016 
3017             self.device_tree
3018                 .lock()
3019                 .unwrap()
3020                 .insert(id.clone(), device_node!(id, virtio_balloon_device));
3021         }
3022 
3023         Ok(devices)
3024     }
3025 
3026     fn make_virtio_watchdog_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
3027         let mut devices = Vec::new();
3028 
3029         if !self.config.lock().unwrap().watchdog {
3030             return Ok(devices);
3031         }
3032 
3033         let id = String::from(WATCHDOG_DEVICE_NAME);
3034         info!("Creating virtio-watchdog device: id = {}", id);
3035 
3036         let virtio_watchdog_device = Arc::new(Mutex::new(
3037             virtio_devices::Watchdog::new(
3038                 id.clone(),
3039                 self.reset_evt.try_clone().unwrap(),
3040                 self.seccomp_action.clone(),
3041                 self.exit_evt
3042                     .try_clone()
3043                     .map_err(DeviceManagerError::EventFd)?,
3044                 versioned_state_from_id(self.snapshot.as_ref(), id.as_str())
3045                     .map_err(DeviceManagerError::RestoreGetState)?,
3046             )
3047             .map_err(DeviceManagerError::CreateVirtioWatchdog)?,
3048         ));
3049         devices.push(MetaVirtioDevice {
3050             virtio_device: Arc::clone(&virtio_watchdog_device)
3051                 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
3052             iommu: false,
3053             id: id.clone(),
3054             pci_segment: 0,
3055             dma_handler: None,
3056         });
3057 
3058         self.device_tree
3059             .lock()
3060             .unwrap()
3061             .insert(id.clone(), device_node!(id, virtio_watchdog_device));
3062 
3063         Ok(devices)
3064     }
3065 
3066     fn make_vdpa_device(
3067         &mut self,
3068         vdpa_cfg: &mut VdpaConfig,
3069     ) -> DeviceManagerResult<MetaVirtioDevice> {
3070         let id = if let Some(id) = &vdpa_cfg.id {
3071             id.clone()
3072         } else {
3073             let id = self.next_device_name(VDPA_DEVICE_NAME_PREFIX)?;
3074             vdpa_cfg.id = Some(id.clone());
3075             id
3076         };
3077 
3078         info!("Creating vDPA device: {:?}", vdpa_cfg);
3079 
3080         let device_path = vdpa_cfg
3081             .path
3082             .to_str()
3083             .ok_or(DeviceManagerError::CreateVdpaConvertPath)?;
3084 
3085         let vdpa_device = Arc::new(Mutex::new(
3086             virtio_devices::Vdpa::new(
3087                 id.clone(),
3088                 device_path,
3089                 self.memory_manager.lock().unwrap().guest_memory(),
3090                 vdpa_cfg.num_queues as u16,
3091                 versioned_state_from_id(self.snapshot.as_ref(), id.as_str())
3092                     .map_err(DeviceManagerError::RestoreGetState)?,
3093             )
3094             .map_err(DeviceManagerError::CreateVdpa)?,
3095         ));
3096 
3097         // Create the DMA handler that is required by the vDPA device
3098         let vdpa_mapping = Arc::new(VdpaDmaMapping::new(
3099             Arc::clone(&vdpa_device),
3100             Arc::new(self.memory_manager.lock().unwrap().guest_memory()),
3101         ));
3102 
3103         self.device_tree
3104             .lock()
3105             .unwrap()
3106             .insert(id.clone(), device_node!(id, vdpa_device));
3107 
3108         Ok(MetaVirtioDevice {
3109             virtio_device: vdpa_device as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
3110             iommu: vdpa_cfg.iommu,
3111             id,
3112             pci_segment: vdpa_cfg.pci_segment,
3113             dma_handler: Some(vdpa_mapping),
3114         })
3115     }
3116 
3117     fn make_vdpa_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
3118         let mut devices = Vec::new();
3119         // Add vdpa if required
3120         let mut vdpa_devices = self.config.lock().unwrap().vdpa.clone();
3121         if let Some(vdpa_list_cfg) = &mut vdpa_devices {
3122             for vdpa_cfg in vdpa_list_cfg.iter_mut() {
3123                 devices.push(self.make_vdpa_device(vdpa_cfg)?);
3124             }
3125         }
3126         self.config.lock().unwrap().vdpa = vdpa_devices;
3127 
3128         Ok(devices)
3129     }
3130 
3131     fn next_device_name(&mut self, prefix: &str) -> DeviceManagerResult<String> {
3132         let start_id = self.device_id_cnt;
3133         loop {
3134             // Generate the temporary name.
3135             let name = format!("{}{}", prefix, self.device_id_cnt);
3136             // Increment the counter.
3137             self.device_id_cnt += Wrapping(1);
3138             // Check if the name is already in use.
3139             if !self.boot_id_list.contains(&name)
3140                 && !self.device_tree.lock().unwrap().contains_key(&name)
3141             {
3142                 return Ok(name);
3143             }
3144 
3145             if self.device_id_cnt == start_id {
3146                 // We went through a full loop and there's nothing else we can
3147                 // do.
3148                 break;
3149             }
3150         }
3151         Err(DeviceManagerError::NoAvailableDeviceName)
3152     }
3153 
3154     fn add_passthrough_device(
3155         &mut self,
3156         device_cfg: &mut DeviceConfig,
3157     ) -> DeviceManagerResult<(PciBdf, String)> {
3158         // If the passthrough device has not been created yet, it is created
3159         // here and stored in the DeviceManager structure for future needs.
3160         if self.passthrough_device.is_none() {
3161             self.passthrough_device = Some(
3162                 self.address_manager
3163                     .vm
3164                     .create_passthrough_device()
3165                     .map_err(|e| DeviceManagerError::CreatePassthroughDevice(e.into()))?,
3166             );
3167         }
3168 
3169         self.add_vfio_device(device_cfg)
3170     }
3171 
3172     fn create_vfio_container(&self) -> DeviceManagerResult<Arc<VfioContainer>> {
3173         let passthrough_device = self
3174             .passthrough_device
3175             .as_ref()
3176             .ok_or(DeviceManagerError::NoDevicePassthroughSupport)?;
3177 
3178         let dup = passthrough_device
3179             .try_clone()
3180             .map_err(DeviceManagerError::VfioCreate)?;
3181 
3182         Ok(Arc::new(
3183             VfioContainer::new(Some(Arc::new(dup))).map_err(DeviceManagerError::VfioCreate)?,
3184         ))
3185     }
3186 
3187     fn add_vfio_device(
3188         &mut self,
3189         device_cfg: &mut DeviceConfig,
3190     ) -> DeviceManagerResult<(PciBdf, String)> {
3191         let vfio_name = if let Some(id) = &device_cfg.id {
3192             id.clone()
3193         } else {
3194             let id = self.next_device_name(VFIO_DEVICE_NAME_PREFIX)?;
3195             device_cfg.id = Some(id.clone());
3196             id
3197         };
3198 
3199         let (pci_segment_id, pci_device_bdf, resources) =
3200             self.pci_resources(&vfio_name, device_cfg.pci_segment)?;
3201 
3202         let mut needs_dma_mapping = false;
3203 
3204         // Here we create a new VFIO container for two reasons. Either this is
3205         // the first VFIO device, meaning we need a new VFIO container, which
3206         // will be shared with other VFIO devices. Or the new VFIO device is
3207         // attached to a vIOMMU, meaning we must create a dedicated VFIO
3208         // container. In the vIOMMU use case, we can't let all devices under
3209         // the same VFIO container since we couldn't map/unmap memory for each
3210         // device. That's simply because the map/unmap operations happen at the
3211         // VFIO container level.
3212         let vfio_container = if device_cfg.iommu {
3213             let vfio_container = self.create_vfio_container()?;
3214 
3215             let vfio_mapping = Arc::new(VfioDmaMapping::new(
3216                 Arc::clone(&vfio_container),
3217                 Arc::new(self.memory_manager.lock().unwrap().guest_memory()),
3218             ));
3219 
3220             if let Some(iommu) = &self.iommu_device {
3221                 iommu
3222                     .lock()
3223                     .unwrap()
3224                     .add_external_mapping(pci_device_bdf.into(), vfio_mapping);
3225             } else {
3226                 return Err(DeviceManagerError::MissingVirtualIommu);
3227             }
3228 
3229             vfio_container
3230         } else if let Some(vfio_container) = &self.vfio_container {
3231             Arc::clone(vfio_container)
3232         } else {
3233             let vfio_container = self.create_vfio_container()?;
3234             needs_dma_mapping = true;
3235             self.vfio_container = Some(Arc::clone(&vfio_container));
3236 
3237             vfio_container
3238         };
3239 
3240         let vfio_device = VfioDevice::new(&device_cfg.path, Arc::clone(&vfio_container))
3241             .map_err(DeviceManagerError::VfioCreate)?;
3242 
3243         if needs_dma_mapping {
3244             // Register DMA mapping in IOMMU.
3245             // Do not register virtio-mem regions, as they are handled directly by
3246             // virtio-mem device itself.
3247             for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() {
3248                 for region in zone.regions() {
3249                     vfio_container
3250                         .vfio_dma_map(
3251                             region.start_addr().raw_value(),
3252                             region.len(),
3253                             region.as_ptr() as u64,
3254                         )
3255                         .map_err(DeviceManagerError::VfioDmaMap)?;
3256                 }
3257             }
3258 
3259             let vfio_mapping = Arc::new(VfioDmaMapping::new(
3260                 Arc::clone(&vfio_container),
3261                 Arc::new(self.memory_manager.lock().unwrap().guest_memory()),
3262             ));
3263 
3264             for virtio_mem_device in self.virtio_mem_devices.iter() {
3265                 virtio_mem_device
3266                     .lock()
3267                     .unwrap()
3268                     .add_dma_mapping_handler(
3269                         VirtioMemMappingSource::Container,
3270                         vfio_mapping.clone(),
3271                     )
3272                     .map_err(DeviceManagerError::AddDmaMappingHandlerVirtioMem)?;
3273             }
3274         }
3275 
3276         let legacy_interrupt_group =
3277             if let Some(legacy_interrupt_manager) = &self.legacy_interrupt_manager {
3278                 Some(
3279                     legacy_interrupt_manager
3280                         .create_group(LegacyIrqGroupConfig {
3281                             irq: self.pci_segments[pci_segment_id as usize].pci_irq_slots
3282                                 [pci_device_bdf.device() as usize]
3283                                 as InterruptIndex,
3284                         })
3285                         .map_err(DeviceManagerError::CreateInterruptGroup)?,
3286                 )
3287             } else {
3288                 None
3289             };
3290 
3291         let memory_manager = self.memory_manager.clone();
3292 
3293         let vfio_pci_device = VfioPciDevice::new(
3294             vfio_name.clone(),
3295             &self.address_manager.vm,
3296             vfio_device,
3297             vfio_container,
3298             self.msi_interrupt_manager.clone(),
3299             legacy_interrupt_group,
3300             device_cfg.iommu,
3301             pci_device_bdf,
3302             Arc::new(move || memory_manager.lock().unwrap().allocate_memory_slot()),
3303             vm_migration::snapshot_from_id(self.snapshot.as_ref(), vfio_name.as_str()),
3304         )
3305         .map_err(DeviceManagerError::VfioPciCreate)?;
3306 
3307         let vfio_pci_device = Arc::new(Mutex::new(vfio_pci_device));
3308 
3309         let new_resources = self.add_pci_device(
3310             vfio_pci_device.clone(),
3311             vfio_pci_device.clone(),
3312             pci_segment_id,
3313             pci_device_bdf,
3314             resources,
3315         )?;
3316 
3317         vfio_pci_device
3318             .lock()
3319             .unwrap()
3320             .map_mmio_regions()
3321             .map_err(DeviceManagerError::VfioMapRegion)?;
3322 
3323         let mut node = device_node!(vfio_name, vfio_pci_device);
3324 
3325         // Update the device tree with correct resource information.
3326         node.resources = new_resources;
3327         node.pci_bdf = Some(pci_device_bdf);
3328         node.pci_device_handle = Some(PciDeviceHandle::Vfio(vfio_pci_device));
3329 
3330         self.device_tree
3331             .lock()
3332             .unwrap()
3333             .insert(vfio_name.clone(), node);
3334 
3335         Ok((pci_device_bdf, vfio_name))
3336     }
3337 
3338     fn add_pci_device(
3339         &mut self,
3340         bus_device: Arc<Mutex<dyn BusDevice>>,
3341         pci_device: Arc<Mutex<dyn PciDevice>>,
3342         segment_id: u16,
3343         bdf: PciBdf,
3344         resources: Option<Vec<Resource>>,
3345     ) -> DeviceManagerResult<Vec<Resource>> {
3346         let bars = pci_device
3347             .lock()
3348             .unwrap()
3349             .allocate_bars(
3350                 &self.address_manager.allocator,
3351                 &mut self.pci_segments[segment_id as usize]
3352                     .allocator
3353                     .lock()
3354                     .unwrap(),
3355                 resources,
3356             )
3357             .map_err(DeviceManagerError::AllocateBars)?;
3358 
3359         let mut pci_bus = self.pci_segments[segment_id as usize]
3360             .pci_bus
3361             .lock()
3362             .unwrap();
3363 
3364         pci_bus
3365             .add_device(bdf.device() as u32, pci_device)
3366             .map_err(DeviceManagerError::AddPciDevice)?;
3367 
3368         self.bus_devices.push(Arc::clone(&bus_device));
3369 
3370         pci_bus
3371             .register_mapping(
3372                 bus_device,
3373                 #[cfg(target_arch = "x86_64")]
3374                 self.address_manager.io_bus.as_ref(),
3375                 self.address_manager.mmio_bus.as_ref(),
3376                 bars.clone(),
3377             )
3378             .map_err(DeviceManagerError::AddPciDevice)?;
3379 
3380         let mut new_resources = Vec::new();
3381         for bar in bars {
3382             new_resources.push(Resource::PciBar {
3383                 index: bar.idx(),
3384                 base: bar.addr(),
3385                 size: bar.size(),
3386                 type_: bar.region_type().into(),
3387                 prefetchable: bar.prefetchable().into(),
3388             });
3389         }
3390 
3391         Ok(new_resources)
3392     }
3393 
3394     fn add_vfio_devices(&mut self) -> DeviceManagerResult<Vec<PciBdf>> {
3395         let mut iommu_attached_device_ids = Vec::new();
3396         let mut devices = self.config.lock().unwrap().devices.clone();
3397 
3398         if let Some(device_list_cfg) = &mut devices {
3399             for device_cfg in device_list_cfg.iter_mut() {
3400                 let (device_id, _) = self.add_passthrough_device(device_cfg)?;
3401                 if device_cfg.iommu && self.iommu_device.is_some() {
3402                     iommu_attached_device_ids.push(device_id);
3403                 }
3404             }
3405         }
3406 
3407         // Update the list of devices
3408         self.config.lock().unwrap().devices = devices;
3409 
3410         Ok(iommu_attached_device_ids)
3411     }
3412 
3413     fn add_vfio_user_device(
3414         &mut self,
3415         device_cfg: &mut UserDeviceConfig,
3416     ) -> DeviceManagerResult<(PciBdf, String)> {
3417         let vfio_user_name = if let Some(id) = &device_cfg.id {
3418             id.clone()
3419         } else {
3420             let id = self.next_device_name(VFIO_USER_DEVICE_NAME_PREFIX)?;
3421             device_cfg.id = Some(id.clone());
3422             id
3423         };
3424 
3425         let (pci_segment_id, pci_device_bdf, resources) =
3426             self.pci_resources(&vfio_user_name, device_cfg.pci_segment)?;
3427 
3428         let legacy_interrupt_group =
3429             if let Some(legacy_interrupt_manager) = &self.legacy_interrupt_manager {
3430                 Some(
3431                     legacy_interrupt_manager
3432                         .create_group(LegacyIrqGroupConfig {
3433                             irq: self.pci_segments[pci_segment_id as usize].pci_irq_slots
3434                                 [pci_device_bdf.device() as usize]
3435                                 as InterruptIndex,
3436                         })
3437                         .map_err(DeviceManagerError::CreateInterruptGroup)?,
3438                 )
3439             } else {
3440                 None
3441             };
3442 
3443         let client = Arc::new(Mutex::new(
3444             vfio_user::Client::new(&device_cfg.socket)
3445                 .map_err(DeviceManagerError::VfioUserCreateClient)?,
3446         ));
3447 
3448         let memory_manager = self.memory_manager.clone();
3449 
3450         let mut vfio_user_pci_device = VfioUserPciDevice::new(
3451             vfio_user_name.clone(),
3452             &self.address_manager.vm,
3453             client.clone(),
3454             self.msi_interrupt_manager.clone(),
3455             legacy_interrupt_group,
3456             pci_device_bdf,
3457             Arc::new(move || memory_manager.lock().unwrap().allocate_memory_slot()),
3458             vm_migration::snapshot_from_id(self.snapshot.as_ref(), vfio_user_name.as_str()),
3459         )
3460         .map_err(DeviceManagerError::VfioUserCreate)?;
3461 
3462         let memory = self.memory_manager.lock().unwrap().guest_memory();
3463         let vfio_user_mapping = Arc::new(VfioUserDmaMapping::new(client, Arc::new(memory)));
3464         for virtio_mem_device in self.virtio_mem_devices.iter() {
3465             virtio_mem_device
3466                 .lock()
3467                 .unwrap()
3468                 .add_dma_mapping_handler(
3469                     VirtioMemMappingSource::Device(pci_device_bdf.into()),
3470                     vfio_user_mapping.clone(),
3471                 )
3472                 .map_err(DeviceManagerError::AddDmaMappingHandlerVirtioMem)?;
3473         }
3474 
3475         for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() {
3476             for region in zone.regions() {
3477                 vfio_user_pci_device
3478                     .dma_map(region)
3479                     .map_err(DeviceManagerError::VfioUserDmaMap)?;
3480             }
3481         }
3482 
3483         let vfio_user_pci_device = Arc::new(Mutex::new(vfio_user_pci_device));
3484 
3485         let new_resources = self.add_pci_device(
3486             vfio_user_pci_device.clone(),
3487             vfio_user_pci_device.clone(),
3488             pci_segment_id,
3489             pci_device_bdf,
3490             resources,
3491         )?;
3492 
3493         // Note it is required to call 'add_pci_device()' in advance to have the list of
3494         // mmio regions provisioned correctly
3495         vfio_user_pci_device
3496             .lock()
3497             .unwrap()
3498             .map_mmio_regions()
3499             .map_err(DeviceManagerError::VfioUserMapRegion)?;
3500 
3501         let mut node = device_node!(vfio_user_name, vfio_user_pci_device);
3502 
3503         // Update the device tree with correct resource information.
3504         node.resources = new_resources;
3505         node.pci_bdf = Some(pci_device_bdf);
3506         node.pci_device_handle = Some(PciDeviceHandle::VfioUser(vfio_user_pci_device));
3507 
3508         self.device_tree
3509             .lock()
3510             .unwrap()
3511             .insert(vfio_user_name.clone(), node);
3512 
3513         Ok((pci_device_bdf, vfio_user_name))
3514     }
3515 
3516     fn add_user_devices(&mut self) -> DeviceManagerResult<Vec<PciBdf>> {
3517         let mut user_devices = self.config.lock().unwrap().user_devices.clone();
3518 
3519         if let Some(device_list_cfg) = &mut user_devices {
3520             for device_cfg in device_list_cfg.iter_mut() {
3521                 let (_device_id, _id) = self.add_vfio_user_device(device_cfg)?;
3522             }
3523         }
3524 
3525         // Update the list of devices
3526         self.config.lock().unwrap().user_devices = user_devices;
3527 
3528         Ok(vec![])
3529     }
3530 
3531     fn add_virtio_pci_device(
3532         &mut self,
3533         virtio_device: Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
3534         iommu_mapping: &Option<Arc<IommuMapping>>,
3535         virtio_device_id: String,
3536         pci_segment_id: u16,
3537         dma_handler: Option<Arc<dyn ExternalDmaMapping>>,
3538     ) -> DeviceManagerResult<PciBdf> {
3539         let id = format!("{VIRTIO_PCI_DEVICE_NAME_PREFIX}-{virtio_device_id}");
3540 
3541         // Add the new virtio-pci node to the device tree.
3542         let mut node = device_node!(id);
3543         node.children = vec![virtio_device_id.clone()];
3544 
3545         let (pci_segment_id, pci_device_bdf, resources) =
3546             self.pci_resources(&id, pci_segment_id)?;
3547 
3548         // Update the existing virtio node by setting the parent.
3549         if let Some(node) = self.device_tree.lock().unwrap().get_mut(&virtio_device_id) {
3550             node.parent = Some(id.clone());
3551         } else {
3552             return Err(DeviceManagerError::MissingNode);
3553         }
3554 
3555         // Allows support for one MSI-X vector per queue. It also adds 1
3556         // as we need to take into account the dedicated vector to notify
3557         // about a virtio config change.
3558         let msix_num = (virtio_device.lock().unwrap().queue_max_sizes().len() + 1) as u16;
3559 
3560         // Create the AccessPlatform trait from the implementation IommuMapping.
3561         // This will provide address translation for any virtio device sitting
3562         // behind a vIOMMU.
3563         let access_platform: Option<Arc<dyn AccessPlatform>> = if let Some(mapping) = iommu_mapping
3564         {
3565             Some(Arc::new(AccessPlatformMapping::new(
3566                 pci_device_bdf.into(),
3567                 mapping.clone(),
3568             )))
3569         } else {
3570             None
3571         };
3572 
3573         let memory = self.memory_manager.lock().unwrap().guest_memory();
3574 
3575         // Map DMA ranges if a DMA handler is available and if the device is
3576         // not attached to a virtual IOMMU.
3577         if let Some(dma_handler) = &dma_handler {
3578             if iommu_mapping.is_some() {
3579                 if let Some(iommu) = &self.iommu_device {
3580                     iommu
3581                         .lock()
3582                         .unwrap()
3583                         .add_external_mapping(pci_device_bdf.into(), dma_handler.clone());
3584                 } else {
3585                     return Err(DeviceManagerError::MissingVirtualIommu);
3586                 }
3587             } else {
3588                 // Let every virtio-mem device handle the DMA map/unmap through the
3589                 // DMA handler provided.
3590                 for virtio_mem_device in self.virtio_mem_devices.iter() {
3591                     virtio_mem_device
3592                         .lock()
3593                         .unwrap()
3594                         .add_dma_mapping_handler(
3595                             VirtioMemMappingSource::Device(pci_device_bdf.into()),
3596                             dma_handler.clone(),
3597                         )
3598                         .map_err(DeviceManagerError::AddDmaMappingHandlerVirtioMem)?;
3599                 }
3600 
3601                 // Do not register virtio-mem regions, as they are handled directly by
3602                 // virtio-mem devices.
3603                 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() {
3604                     for region in zone.regions() {
3605                         let gpa = region.start_addr().0;
3606                         let size = region.len();
3607                         dma_handler
3608                             .map(gpa, gpa, size)
3609                             .map_err(DeviceManagerError::VirtioDmaMap)?;
3610                     }
3611                 }
3612             }
3613         }
3614 
3615         let device_type = virtio_device.lock().unwrap().device_type();
3616         let virtio_pci_device = Arc::new(Mutex::new(
3617             VirtioPciDevice::new(
3618                 id.clone(),
3619                 memory,
3620                 virtio_device,
3621                 msix_num,
3622                 access_platform,
3623                 &self.msi_interrupt_manager,
3624                 pci_device_bdf.into(),
3625                 self.activate_evt
3626                     .try_clone()
3627                     .map_err(DeviceManagerError::EventFd)?,
3628                 // All device types *except* virtio block devices should be allocated a 64-bit bar
3629                 // The block devices should be given a 32-bit BAR so that they are easily accessible
3630                 // to firmware without requiring excessive identity mapping.
3631                 // The exception being if not on the default PCI segment.
3632                 pci_segment_id > 0 || device_type != VirtioDeviceType::Block as u32,
3633                 dma_handler,
3634                 self.pending_activations.clone(),
3635                 vm_migration::snapshot_from_id(self.snapshot.as_ref(), id.as_str()),
3636             )
3637             .map_err(DeviceManagerError::VirtioDevice)?,
3638         ));
3639 
3640         let new_resources = self.add_pci_device(
3641             virtio_pci_device.clone(),
3642             virtio_pci_device.clone(),
3643             pci_segment_id,
3644             pci_device_bdf,
3645             resources,
3646         )?;
3647 
3648         let bar_addr = virtio_pci_device.lock().unwrap().config_bar_addr();
3649         for (event, addr) in virtio_pci_device.lock().unwrap().ioeventfds(bar_addr) {
3650             let io_addr = IoEventAddress::Mmio(addr);
3651             self.address_manager
3652                 .vm
3653                 .register_ioevent(event, &io_addr, None)
3654                 .map_err(|e| DeviceManagerError::RegisterIoevent(e.into()))?;
3655         }
3656 
3657         // Update the device tree with correct resource information.
3658         node.resources = new_resources;
3659         node.migratable = Some(Arc::clone(&virtio_pci_device) as Arc<Mutex<dyn Migratable>>);
3660         node.pci_bdf = Some(pci_device_bdf);
3661         node.pci_device_handle = Some(PciDeviceHandle::Virtio(virtio_pci_device));
3662         self.device_tree.lock().unwrap().insert(id, node);
3663 
3664         Ok(pci_device_bdf)
3665     }
3666 
3667     fn add_pvpanic_device(
3668         &mut self,
3669     ) -> DeviceManagerResult<Option<Arc<Mutex<devices::PvPanicDevice>>>> {
3670         let id = String::from(PVPANIC_DEVICE_NAME);
3671         let pci_segment_id = 0x0_u16;
3672 
3673         info!("Creating pvpanic device {}", id);
3674 
3675         let (pci_segment_id, pci_device_bdf, resources) =
3676             self.pci_resources(&id, pci_segment_id)?;
3677 
3678         let snapshot = snapshot_from_id(self.snapshot.as_ref(), id.as_str());
3679 
3680         let pvpanic_device = devices::PvPanicDevice::new(id.clone(), snapshot)
3681             .map_err(DeviceManagerError::PvPanicCreate)?;
3682 
3683         let pvpanic_device = Arc::new(Mutex::new(pvpanic_device));
3684 
3685         let new_resources = self.add_pci_device(
3686             pvpanic_device.clone(),
3687             pvpanic_device.clone(),
3688             pci_segment_id,
3689             pci_device_bdf,
3690             resources,
3691         )?;
3692 
3693         let mut node = device_node!(id, pvpanic_device);
3694 
3695         node.resources = new_resources;
3696         node.pci_bdf = Some(pci_device_bdf);
3697         node.pci_device_handle = None;
3698 
3699         self.device_tree.lock().unwrap().insert(id, node);
3700 
3701         Ok(Some(pvpanic_device))
3702     }
3703 
3704     fn pci_resources(
3705         &self,
3706         id: &str,
3707         pci_segment_id: u16,
3708     ) -> DeviceManagerResult<(u16, PciBdf, Option<Vec<Resource>>)> {
3709         // Look for the id in the device tree. If it can be found, that means
3710         // the device is being restored, otherwise it's created from scratch.
3711         Ok(
3712             if let Some(node) = self.device_tree.lock().unwrap().get(id) {
3713                 info!("Restoring virtio-pci {} resources", id);
3714                 let pci_device_bdf: PciBdf = node
3715                     .pci_bdf
3716                     .ok_or(DeviceManagerError::MissingDeviceNodePciBdf)?;
3717                 let pci_segment_id = pci_device_bdf.segment();
3718 
3719                 self.pci_segments[pci_segment_id as usize]
3720                     .pci_bus
3721                     .lock()
3722                     .unwrap()
3723                     .get_device_id(pci_device_bdf.device() as usize)
3724                     .map_err(DeviceManagerError::GetPciDeviceId)?;
3725 
3726                 (pci_segment_id, pci_device_bdf, Some(node.resources.clone()))
3727             } else {
3728                 let pci_device_bdf =
3729                     self.pci_segments[pci_segment_id as usize].next_device_bdf()?;
3730 
3731                 (pci_segment_id, pci_device_bdf, None)
3732             },
3733         )
3734     }
3735 
3736     #[cfg(target_arch = "x86_64")]
3737     pub fn io_bus(&self) -> &Arc<Bus> {
3738         &self.address_manager.io_bus
3739     }
3740 
3741     pub fn mmio_bus(&self) -> &Arc<Bus> {
3742         &self.address_manager.mmio_bus
3743     }
3744 
3745     pub fn allocator(&self) -> &Arc<Mutex<SystemAllocator>> {
3746         &self.address_manager.allocator
3747     }
3748 
3749     pub fn interrupt_controller(&self) -> Option<Arc<Mutex<dyn InterruptController>>> {
3750         self.interrupt_controller
3751             .as_ref()
3752             .map(|ic| ic.clone() as Arc<Mutex<dyn InterruptController>>)
3753     }
3754 
3755     pub(crate) fn pci_segments(&self) -> &Vec<PciSegment> {
3756         &self.pci_segments
3757     }
3758 
3759     pub fn console(&self) -> &Arc<Console> {
3760         &self.console
3761     }
3762 
3763     #[cfg(target_arch = "aarch64")]
3764     pub fn cmdline_additions(&self) -> &[String] {
3765         self.cmdline_additions.as_slice()
3766     }
3767 
3768     pub fn update_memory(&self, new_region: &Arc<GuestRegionMmap>) -> DeviceManagerResult<()> {
3769         for handle in self.virtio_devices.iter() {
3770             handle
3771                 .virtio_device
3772                 .lock()
3773                 .unwrap()
3774                 .add_memory_region(new_region)
3775                 .map_err(DeviceManagerError::UpdateMemoryForVirtioDevice)?;
3776 
3777             if let Some(dma_handler) = &handle.dma_handler {
3778                 if !handle.iommu {
3779                     let gpa = new_region.start_addr().0;
3780                     let size = new_region.len();
3781                     dma_handler
3782                         .map(gpa, gpa, size)
3783                         .map_err(DeviceManagerError::VirtioDmaMap)?;
3784                 }
3785             }
3786         }
3787 
3788         // Take care of updating the memory for VFIO PCI devices.
3789         if let Some(vfio_container) = &self.vfio_container {
3790             vfio_container
3791                 .vfio_dma_map(
3792                     new_region.start_addr().raw_value(),
3793                     new_region.len(),
3794                     new_region.as_ptr() as u64,
3795                 )
3796                 .map_err(DeviceManagerError::UpdateMemoryForVfioPciDevice)?;
3797         }
3798 
3799         // Take care of updating the memory for vfio-user devices.
3800         {
3801             let device_tree = self.device_tree.lock().unwrap();
3802             for pci_device_node in device_tree.pci_devices() {
3803                 if let PciDeviceHandle::VfioUser(vfio_user_pci_device) = pci_device_node
3804                     .pci_device_handle
3805                     .as_ref()
3806                     .ok_or(DeviceManagerError::MissingPciDevice)?
3807                 {
3808                     vfio_user_pci_device
3809                         .lock()
3810                         .unwrap()
3811                         .dma_map(new_region)
3812                         .map_err(DeviceManagerError::UpdateMemoryForVfioUserPciDevice)?;
3813                 }
3814             }
3815         }
3816 
3817         Ok(())
3818     }
3819 
3820     pub fn activate_virtio_devices(&self) -> DeviceManagerResult<()> {
3821         for mut activator in self.pending_activations.lock().unwrap().drain(..) {
3822             activator
3823                 .activate()
3824                 .map_err(DeviceManagerError::VirtioActivate)?;
3825         }
3826         Ok(())
3827     }
3828 
3829     pub fn notify_hotplug(
3830         &self,
3831         _notification_type: AcpiNotificationFlags,
3832     ) -> DeviceManagerResult<()> {
3833         return self
3834             .ged_notification_device
3835             .as_ref()
3836             .unwrap()
3837             .lock()
3838             .unwrap()
3839             .notify(_notification_type)
3840             .map_err(DeviceManagerError::HotPlugNotification);
3841     }
3842 
3843     pub fn add_device(
3844         &mut self,
3845         device_cfg: &mut DeviceConfig,
3846     ) -> DeviceManagerResult<PciDeviceInfo> {
3847         self.validate_identifier(&device_cfg.id)?;
3848 
3849         if device_cfg.iommu && !self.is_iommu_segment(device_cfg.pci_segment) {
3850             return Err(DeviceManagerError::InvalidIommuHotplug);
3851         }
3852 
3853         let (bdf, device_name) = self.add_passthrough_device(device_cfg)?;
3854 
3855         // Update the PCIU bitmap
3856         self.pci_segments[device_cfg.pci_segment as usize].pci_devices_up |= 1 << bdf.device();
3857 
3858         Ok(PciDeviceInfo {
3859             id: device_name,
3860             bdf,
3861         })
3862     }
3863 
3864     pub fn add_user_device(
3865         &mut self,
3866         device_cfg: &mut UserDeviceConfig,
3867     ) -> DeviceManagerResult<PciDeviceInfo> {
3868         self.validate_identifier(&device_cfg.id)?;
3869 
3870         let (bdf, device_name) = self.add_vfio_user_device(device_cfg)?;
3871 
3872         // Update the PCIU bitmap
3873         self.pci_segments[device_cfg.pci_segment as usize].pci_devices_up |= 1 << bdf.device();
3874 
3875         Ok(PciDeviceInfo {
3876             id: device_name,
3877             bdf,
3878         })
3879     }
3880 
3881     pub fn remove_device(&mut self, id: String) -> DeviceManagerResult<()> {
3882         // The node can be directly a PCI node in case the 'id' refers to a
3883         // VFIO device or a virtio-pci one.
3884         // In case the 'id' refers to a virtio device, we must find the PCI
3885         // node by looking at the parent.
3886         let device_tree = self.device_tree.lock().unwrap();
3887         let node = device_tree
3888             .get(&id)
3889             .ok_or(DeviceManagerError::UnknownDeviceId(id))?;
3890 
3891         let pci_device_node = if node.pci_bdf.is_some() && node.pci_device_handle.is_some() {
3892             node
3893         } else {
3894             let parent = node
3895                 .parent
3896                 .as_ref()
3897                 .ok_or(DeviceManagerError::MissingNode)?;
3898             device_tree
3899                 .get(parent)
3900                 .ok_or(DeviceManagerError::MissingNode)?
3901         };
3902 
3903         let pci_device_bdf: PciBdf = pci_device_node
3904             .pci_bdf
3905             .ok_or(DeviceManagerError::MissingDeviceNodePciBdf)?;
3906         let pci_segment_id = pci_device_bdf.segment();
3907 
3908         let pci_device_handle = pci_device_node
3909             .pci_device_handle
3910             .as_ref()
3911             .ok_or(DeviceManagerError::MissingPciDevice)?;
3912         #[allow(irrefutable_let_patterns)]
3913         if let PciDeviceHandle::Virtio(virtio_pci_device) = pci_device_handle {
3914             let device_type = VirtioDeviceType::from(
3915                 virtio_pci_device
3916                     .lock()
3917                     .unwrap()
3918                     .virtio_device()
3919                     .lock()
3920                     .unwrap()
3921                     .device_type(),
3922             );
3923             match device_type {
3924                 VirtioDeviceType::Net
3925                 | VirtioDeviceType::Block
3926                 | VirtioDeviceType::Pmem
3927                 | VirtioDeviceType::Fs
3928                 | VirtioDeviceType::Vsock => {}
3929                 _ => return Err(DeviceManagerError::RemovalNotAllowed(device_type)),
3930             }
3931         }
3932 
3933         // Update the PCID bitmap
3934         self.pci_segments[pci_segment_id as usize].pci_devices_down |= 1 << pci_device_bdf.device();
3935 
3936         Ok(())
3937     }
3938 
3939     pub fn eject_device(&mut self, pci_segment_id: u16, device_id: u8) -> DeviceManagerResult<()> {
3940         info!(
3941             "Ejecting device_id = {} on segment_id={}",
3942             device_id, pci_segment_id
3943         );
3944 
3945         // Convert the device ID into the corresponding b/d/f.
3946         let pci_device_bdf = PciBdf::new(pci_segment_id, 0, device_id, 0);
3947 
3948         // Give the PCI device ID back to the PCI bus.
3949         self.pci_segments[pci_segment_id as usize]
3950             .pci_bus
3951             .lock()
3952             .unwrap()
3953             .put_device_id(device_id as usize)
3954             .map_err(DeviceManagerError::PutPciDeviceId)?;
3955 
3956         // Remove the device from the device tree along with its children.
3957         let mut device_tree = self.device_tree.lock().unwrap();
3958         let pci_device_node = device_tree
3959             .remove_node_by_pci_bdf(pci_device_bdf)
3960             .ok_or(DeviceManagerError::MissingPciDevice)?;
3961 
3962         // For VFIO and vfio-user the PCI device id is the id.
3963         // For virtio we overwrite it later as we want the id of the
3964         // underlying device.
3965         let mut id = pci_device_node.id;
3966         let pci_device_handle = pci_device_node
3967             .pci_device_handle
3968             .ok_or(DeviceManagerError::MissingPciDevice)?;
3969         if matches!(pci_device_handle, PciDeviceHandle::Virtio(_)) {
3970             // The virtio-pci device has a single child
3971             if !pci_device_node.children.is_empty() {
3972                 assert_eq!(pci_device_node.children.len(), 1);
3973                 let child_id = &pci_device_node.children[0];
3974                 id = child_id.clone();
3975             }
3976         }
3977         for child in pci_device_node.children.iter() {
3978             device_tree.remove(child);
3979         }
3980 
3981         let mut iommu_attached = false;
3982         if let Some((_, iommu_attached_devices)) = &self.iommu_attached_devices {
3983             if iommu_attached_devices.contains(&pci_device_bdf) {
3984                 iommu_attached = true;
3985             }
3986         }
3987 
3988         let (pci_device, bus_device, virtio_device, remove_dma_handler) = match pci_device_handle {
3989             // No need to remove any virtio-mem mapping here as the container outlives all devices
3990             PciDeviceHandle::Vfio(vfio_pci_device) => (
3991                 Arc::clone(&vfio_pci_device) as Arc<Mutex<dyn PciDevice>>,
3992                 Arc::clone(&vfio_pci_device) as Arc<Mutex<dyn BusDevice>>,
3993                 None as Option<Arc<Mutex<dyn virtio_devices::VirtioDevice>>>,
3994                 false,
3995             ),
3996             PciDeviceHandle::Virtio(virtio_pci_device) => {
3997                 let dev = virtio_pci_device.lock().unwrap();
3998                 let bar_addr = dev.config_bar_addr();
3999                 for (event, addr) in dev.ioeventfds(bar_addr) {
4000                     let io_addr = IoEventAddress::Mmio(addr);
4001                     self.address_manager
4002                         .vm
4003                         .unregister_ioevent(event, &io_addr)
4004                         .map_err(|e| DeviceManagerError::UnRegisterIoevent(e.into()))?;
4005                 }
4006 
4007                 if let Some(dma_handler) = dev.dma_handler() {
4008                     if !iommu_attached {
4009                         for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() {
4010                             for region in zone.regions() {
4011                                 let iova = region.start_addr().0;
4012                                 let size = region.len();
4013                                 dma_handler
4014                                     .unmap(iova, size)
4015                                     .map_err(DeviceManagerError::VirtioDmaUnmap)?;
4016                             }
4017                         }
4018                     }
4019                 }
4020 
4021                 (
4022                     Arc::clone(&virtio_pci_device) as Arc<Mutex<dyn PciDevice>>,
4023                     Arc::clone(&virtio_pci_device) as Arc<Mutex<dyn BusDevice>>,
4024                     Some(dev.virtio_device()),
4025                     dev.dma_handler().is_some() && !iommu_attached,
4026                 )
4027             }
4028             PciDeviceHandle::VfioUser(vfio_user_pci_device) => {
4029                 let mut dev = vfio_user_pci_device.lock().unwrap();
4030                 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() {
4031                     for region in zone.regions() {
4032                         dev.dma_unmap(region)
4033                             .map_err(DeviceManagerError::VfioUserDmaUnmap)?;
4034                     }
4035                 }
4036 
4037                 (
4038                     Arc::clone(&vfio_user_pci_device) as Arc<Mutex<dyn PciDevice>>,
4039                     Arc::clone(&vfio_user_pci_device) as Arc<Mutex<dyn BusDevice>>,
4040                     None as Option<Arc<Mutex<dyn virtio_devices::VirtioDevice>>>,
4041                     true,
4042                 )
4043             }
4044         };
4045 
4046         if remove_dma_handler {
4047             for virtio_mem_device in self.virtio_mem_devices.iter() {
4048                 virtio_mem_device
4049                     .lock()
4050                     .unwrap()
4051                     .remove_dma_mapping_handler(VirtioMemMappingSource::Device(
4052                         pci_device_bdf.into(),
4053                     ))
4054                     .map_err(DeviceManagerError::RemoveDmaMappingHandlerVirtioMem)?;
4055             }
4056         }
4057 
4058         // Free the allocated BARs
4059         pci_device
4060             .lock()
4061             .unwrap()
4062             .free_bars(
4063                 &mut self.address_manager.allocator.lock().unwrap(),
4064                 &mut self.pci_segments[pci_segment_id as usize]
4065                     .allocator
4066                     .lock()
4067                     .unwrap(),
4068             )
4069             .map_err(DeviceManagerError::FreePciBars)?;
4070 
4071         // Remove the device from the PCI bus
4072         self.pci_segments[pci_segment_id as usize]
4073             .pci_bus
4074             .lock()
4075             .unwrap()
4076             .remove_by_device(&pci_device)
4077             .map_err(DeviceManagerError::RemoveDeviceFromPciBus)?;
4078 
4079         #[cfg(target_arch = "x86_64")]
4080         // Remove the device from the IO bus
4081         self.io_bus()
4082             .remove_by_device(&bus_device)
4083             .map_err(DeviceManagerError::RemoveDeviceFromIoBus)?;
4084 
4085         // Remove the device from the MMIO bus
4086         self.mmio_bus()
4087             .remove_by_device(&bus_device)
4088             .map_err(DeviceManagerError::RemoveDeviceFromMmioBus)?;
4089 
4090         // Remove the device from the list of BusDevice held by the
4091         // DeviceManager.
4092         self.bus_devices
4093             .retain(|dev| !Arc::ptr_eq(dev, &bus_device));
4094 
4095         // Shutdown and remove the underlying virtio-device if present
4096         if let Some(virtio_device) = virtio_device {
4097             for mapping in virtio_device.lock().unwrap().userspace_mappings() {
4098                 self.memory_manager
4099                     .lock()
4100                     .unwrap()
4101                     .remove_userspace_mapping(
4102                         mapping.addr.raw_value(),
4103                         mapping.len,
4104                         mapping.host_addr,
4105                         mapping.mergeable,
4106                         mapping.mem_slot,
4107                     )
4108                     .map_err(DeviceManagerError::MemoryManager)?;
4109             }
4110 
4111             virtio_device.lock().unwrap().shutdown();
4112 
4113             self.virtio_devices
4114                 .retain(|handler| !Arc::ptr_eq(&handler.virtio_device, &virtio_device));
4115         }
4116 
4117         event!(
4118             "vm",
4119             "device-removed",
4120             "id",
4121             &id,
4122             "bdf",
4123             pci_device_bdf.to_string()
4124         );
4125 
4126         // At this point, the device has been removed from all the list and
4127         // buses where it was stored. At the end of this function, after
4128         // any_device, bus_device and pci_device are released, the actual
4129         // device will be dropped.
4130         Ok(())
4131     }
4132 
4133     fn hotplug_virtio_pci_device(
4134         &mut self,
4135         handle: MetaVirtioDevice,
4136     ) -> DeviceManagerResult<PciDeviceInfo> {
4137         // Add the virtio device to the device manager list. This is important
4138         // as the list is used to notify virtio devices about memory updates
4139         // for instance.
4140         self.virtio_devices.push(handle.clone());
4141 
4142         let mapping: Option<Arc<IommuMapping>> = if handle.iommu {
4143             self.iommu_mapping.clone()
4144         } else {
4145             None
4146         };
4147 
4148         let bdf = self.add_virtio_pci_device(
4149             handle.virtio_device,
4150             &mapping,
4151             handle.id.clone(),
4152             handle.pci_segment,
4153             handle.dma_handler,
4154         )?;
4155 
4156         // Update the PCIU bitmap
4157         self.pci_segments[handle.pci_segment as usize].pci_devices_up |= 1 << bdf.device();
4158 
4159         Ok(PciDeviceInfo { id: handle.id, bdf })
4160     }
4161 
4162     fn is_iommu_segment(&self, pci_segment_id: u16) -> bool {
4163         self.config
4164             .lock()
4165             .as_ref()
4166             .unwrap()
4167             .platform
4168             .as_ref()
4169             .map(|pc| {
4170                 pc.iommu_segments
4171                     .as_ref()
4172                     .map(|v| v.contains(&pci_segment_id))
4173                     .unwrap_or_default()
4174             })
4175             .unwrap_or_default()
4176     }
4177 
4178     pub fn add_disk(&mut self, disk_cfg: &mut DiskConfig) -> DeviceManagerResult<PciDeviceInfo> {
4179         self.validate_identifier(&disk_cfg.id)?;
4180 
4181         if disk_cfg.iommu && !self.is_iommu_segment(disk_cfg.pci_segment) {
4182             return Err(DeviceManagerError::InvalidIommuHotplug);
4183         }
4184 
4185         let device = self.make_virtio_block_device(disk_cfg)?;
4186         self.hotplug_virtio_pci_device(device)
4187     }
4188 
4189     pub fn add_fs(&mut self, fs_cfg: &mut FsConfig) -> DeviceManagerResult<PciDeviceInfo> {
4190         self.validate_identifier(&fs_cfg.id)?;
4191 
4192         let device = self.make_virtio_fs_device(fs_cfg)?;
4193         self.hotplug_virtio_pci_device(device)
4194     }
4195 
4196     pub fn add_pmem(&mut self, pmem_cfg: &mut PmemConfig) -> DeviceManagerResult<PciDeviceInfo> {
4197         self.validate_identifier(&pmem_cfg.id)?;
4198 
4199         if pmem_cfg.iommu && !self.is_iommu_segment(pmem_cfg.pci_segment) {
4200             return Err(DeviceManagerError::InvalidIommuHotplug);
4201         }
4202 
4203         let device = self.make_virtio_pmem_device(pmem_cfg)?;
4204         self.hotplug_virtio_pci_device(device)
4205     }
4206 
4207     pub fn add_net(&mut self, net_cfg: &mut NetConfig) -> DeviceManagerResult<PciDeviceInfo> {
4208         self.validate_identifier(&net_cfg.id)?;
4209 
4210         if net_cfg.iommu && !self.is_iommu_segment(net_cfg.pci_segment) {
4211             return Err(DeviceManagerError::InvalidIommuHotplug);
4212         }
4213 
4214         let device = self.make_virtio_net_device(net_cfg)?;
4215         self.hotplug_virtio_pci_device(device)
4216     }
4217 
4218     pub fn add_vdpa(&mut self, vdpa_cfg: &mut VdpaConfig) -> DeviceManagerResult<PciDeviceInfo> {
4219         self.validate_identifier(&vdpa_cfg.id)?;
4220 
4221         if vdpa_cfg.iommu && !self.is_iommu_segment(vdpa_cfg.pci_segment) {
4222             return Err(DeviceManagerError::InvalidIommuHotplug);
4223         }
4224 
4225         let device = self.make_vdpa_device(vdpa_cfg)?;
4226         self.hotplug_virtio_pci_device(device)
4227     }
4228 
4229     pub fn add_vsock(&mut self, vsock_cfg: &mut VsockConfig) -> DeviceManagerResult<PciDeviceInfo> {
4230         self.validate_identifier(&vsock_cfg.id)?;
4231 
4232         if vsock_cfg.iommu && !self.is_iommu_segment(vsock_cfg.pci_segment) {
4233             return Err(DeviceManagerError::InvalidIommuHotplug);
4234         }
4235 
4236         let device = self.make_virtio_vsock_device(vsock_cfg)?;
4237         self.hotplug_virtio_pci_device(device)
4238     }
4239 
4240     pub fn counters(&self) -> HashMap<String, HashMap<&'static str, Wrapping<u64>>> {
4241         let mut counters = HashMap::new();
4242 
4243         for handle in &self.virtio_devices {
4244             let virtio_device = handle.virtio_device.lock().unwrap();
4245             if let Some(device_counters) = virtio_device.counters() {
4246                 counters.insert(handle.id.clone(), device_counters.clone());
4247             }
4248         }
4249 
4250         counters
4251     }
4252 
4253     pub fn resize_balloon(&mut self, size: u64) -> DeviceManagerResult<()> {
4254         if let Some(balloon) = &self.balloon {
4255             return balloon
4256                 .lock()
4257                 .unwrap()
4258                 .resize(size)
4259                 .map_err(DeviceManagerError::VirtioBalloonResize);
4260         }
4261 
4262         warn!("No balloon setup: Can't resize the balloon");
4263         Err(DeviceManagerError::MissingVirtioBalloon)
4264     }
4265 
4266     pub fn balloon_size(&self) -> u64 {
4267         if let Some(balloon) = &self.balloon {
4268             return balloon.lock().unwrap().get_actual();
4269         }
4270 
4271         0
4272     }
4273 
4274     pub fn device_tree(&self) -> Arc<Mutex<DeviceTree>> {
4275         self.device_tree.clone()
4276     }
4277 
4278     #[cfg(target_arch = "x86_64")]
4279     pub fn notify_power_button(&self) -> DeviceManagerResult<()> {
4280         self.ged_notification_device
4281             .as_ref()
4282             .unwrap()
4283             .lock()
4284             .unwrap()
4285             .notify(AcpiNotificationFlags::POWER_BUTTON_CHANGED)
4286             .map_err(DeviceManagerError::PowerButtonNotification)
4287     }
4288 
4289     #[cfg(target_arch = "aarch64")]
4290     pub fn notify_power_button(&self) -> DeviceManagerResult<()> {
4291         // There are two use cases:
4292         // 1. Users will use direct kernel boot with device tree.
4293         // 2. Users will use ACPI+UEFI boot.
4294 
4295         // Trigger a GPIO pin 3 event to satisfy use case 1.
4296         self.gpio_device
4297             .as_ref()
4298             .unwrap()
4299             .lock()
4300             .unwrap()
4301             .trigger_key(3)
4302             .map_err(DeviceManagerError::AArch64PowerButtonNotification)?;
4303         // Trigger a GED power button event to satisfy use case 2.
4304         return self
4305             .ged_notification_device
4306             .as_ref()
4307             .unwrap()
4308             .lock()
4309             .unwrap()
4310             .notify(AcpiNotificationFlags::POWER_BUTTON_CHANGED)
4311             .map_err(DeviceManagerError::PowerButtonNotification);
4312     }
4313 
4314     pub fn iommu_attached_devices(&self) -> &Option<(PciBdf, Vec<PciBdf>)> {
4315         &self.iommu_attached_devices
4316     }
4317 
4318     fn validate_identifier(&self, id: &Option<String>) -> DeviceManagerResult<()> {
4319         if let Some(id) = id {
4320             if id.starts_with("__") {
4321                 return Err(DeviceManagerError::InvalidIdentifier(id.clone()));
4322             }
4323 
4324             if self.device_tree.lock().unwrap().contains_key(id) {
4325                 return Err(DeviceManagerError::IdentifierNotUnique(id.clone()));
4326             }
4327         }
4328 
4329         Ok(())
4330     }
4331 
4332     pub(crate) fn acpi_platform_addresses(&self) -> &AcpiPlatformAddresses {
4333         &self.acpi_platform_addresses
4334     }
4335 }
4336 
4337 fn numa_node_id_from_memory_zone_id(numa_nodes: &NumaNodes, memory_zone_id: &str) -> Option<u32> {
4338     for (numa_node_id, numa_node) in numa_nodes.iter() {
4339         if numa_node.memory_zones.contains(&memory_zone_id.to_owned()) {
4340             return Some(*numa_node_id);
4341         }
4342     }
4343 
4344     None
4345 }
4346 
4347 fn numa_node_id_from_pci_segment_id(numa_nodes: &NumaNodes, pci_segment_id: u16) -> u32 {
4348     for (numa_node_id, numa_node) in numa_nodes.iter() {
4349         if numa_node.pci_segments.contains(&pci_segment_id) {
4350             return *numa_node_id;
4351         }
4352     }
4353 
4354     0
4355 }
4356 
4357 struct TpmDevice {}
4358 
4359 impl Aml for TpmDevice {
4360     fn to_aml_bytes(&self, sink: &mut dyn acpi_tables::AmlSink) {
4361         aml::Device::new(
4362             "TPM2".into(),
4363             vec![
4364                 &aml::Name::new("_HID".into(), &"MSFT0101"),
4365                 &aml::Name::new("_STA".into(), &(0xF_usize)),
4366                 &aml::Name::new(
4367                     "_CRS".into(),
4368                     &aml::ResourceTemplate::new(vec![&aml::Memory32Fixed::new(
4369                         true,
4370                         layout::TPM_START.0 as u32,
4371                         layout::TPM_SIZE as u32,
4372                     )]),
4373                 ),
4374             ],
4375         )
4376         .to_aml_bytes(sink)
4377     }
4378 }
4379 
4380 impl Aml for DeviceManager {
4381     fn to_aml_bytes(&self, sink: &mut dyn acpi_tables::AmlSink) {
4382         #[cfg(target_arch = "aarch64")]
4383         use arch::aarch64::DeviceInfoForFdt;
4384 
4385         let mut pci_scan_methods = Vec::new();
4386         for i in 0..self.pci_segments.len() {
4387             pci_scan_methods.push(aml::MethodCall::new(
4388                 format!("\\_SB_.PC{i:02X}.PCNT").as_str().into(),
4389                 vec![],
4390             ));
4391         }
4392         let mut pci_scan_inner: Vec<&dyn Aml> = Vec::new();
4393         for method in &pci_scan_methods {
4394             pci_scan_inner.push(method)
4395         }
4396 
4397         // PCI hotplug controller
4398         aml::Device::new(
4399             "_SB_.PHPR".into(),
4400             vec![
4401                 &aml::Name::new("_HID".into(), &aml::EISAName::new("PNP0A06")),
4402                 &aml::Name::new("_STA".into(), &0x0bu8),
4403                 &aml::Name::new("_UID".into(), &"PCI Hotplug Controller"),
4404                 &aml::Mutex::new("BLCK".into(), 0),
4405                 &aml::Name::new(
4406                     "_CRS".into(),
4407                     &aml::ResourceTemplate::new(vec![&aml::AddressSpace::new_memory(
4408                         aml::AddressSpaceCacheable::NotCacheable,
4409                         true,
4410                         self.acpi_address.0,
4411                         self.acpi_address.0 + DEVICE_MANAGER_ACPI_SIZE as u64 - 1,
4412                         None,
4413                     )]),
4414                 ),
4415                 // OpRegion and Fields map MMIO range into individual field values
4416                 &aml::OpRegion::new(
4417                     "PCST".into(),
4418                     aml::OpRegionSpace::SystemMemory,
4419                     &(self.acpi_address.0 as usize),
4420                     &DEVICE_MANAGER_ACPI_SIZE,
4421                 ),
4422                 &aml::Field::new(
4423                     "PCST".into(),
4424                     aml::FieldAccessType::DWord,
4425                     aml::FieldLockRule::NoLock,
4426                     aml::FieldUpdateRule::WriteAsZeroes,
4427                     vec![
4428                         aml::FieldEntry::Named(*b"PCIU", 32),
4429                         aml::FieldEntry::Named(*b"PCID", 32),
4430                         aml::FieldEntry::Named(*b"B0EJ", 32),
4431                         aml::FieldEntry::Named(*b"PSEG", 32),
4432                     ],
4433                 ),
4434                 &aml::Method::new(
4435                     "PCEJ".into(),
4436                     2,
4437                     true,
4438                     vec![
4439                         // Take lock defined above
4440                         &aml::Acquire::new("BLCK".into(), 0xffff),
4441                         // Choose the current segment
4442                         &aml::Store::new(&aml::Path::new("PSEG"), &aml::Arg(1)),
4443                         // Write PCI bus number (in first argument) to I/O port via field
4444                         &aml::ShiftLeft::new(&aml::Path::new("B0EJ"), &aml::ONE, &aml::Arg(0)),
4445                         // Release lock
4446                         &aml::Release::new("BLCK".into()),
4447                         // Return 0
4448                         &aml::Return::new(&aml::ZERO),
4449                     ],
4450                 ),
4451                 &aml::Method::new("PSCN".into(), 0, true, pci_scan_inner),
4452             ],
4453         )
4454         .to_aml_bytes(sink);
4455 
4456         for segment in &self.pci_segments {
4457             segment.to_aml_bytes(sink);
4458         }
4459 
4460         let mut mbrd_memory = Vec::new();
4461 
4462         for segment in &self.pci_segments {
4463             mbrd_memory.push(aml::Memory32Fixed::new(
4464                 true,
4465                 segment.mmio_config_address as u32,
4466                 layout::PCI_MMIO_CONFIG_SIZE_PER_SEGMENT as u32,
4467             ))
4468         }
4469 
4470         let mut mbrd_memory_refs = Vec::new();
4471         for mbrd_memory_ref in &mbrd_memory {
4472             mbrd_memory_refs.push(mbrd_memory_ref as &dyn Aml);
4473         }
4474 
4475         aml::Device::new(
4476             "_SB_.MBRD".into(),
4477             vec![
4478                 &aml::Name::new("_HID".into(), &aml::EISAName::new("PNP0C02")),
4479                 &aml::Name::new("_UID".into(), &aml::ZERO),
4480                 &aml::Name::new("_CRS".into(), &aml::ResourceTemplate::new(mbrd_memory_refs)),
4481             ],
4482         )
4483         .to_aml_bytes(sink);
4484 
4485         // Serial device
4486         #[cfg(target_arch = "x86_64")]
4487         let serial_irq = 4;
4488         #[cfg(target_arch = "aarch64")]
4489         let serial_irq =
4490             if self.config.lock().unwrap().serial.clone().mode != ConsoleOutputMode::Off {
4491                 self.get_device_info()
4492                     .clone()
4493                     .get(&(DeviceType::Serial, DeviceType::Serial.to_string()))
4494                     .unwrap()
4495                     .irq()
4496             } else {
4497                 // If serial is turned off, add a fake device with invalid irq.
4498                 31
4499             };
4500         if self.config.lock().unwrap().serial.mode != ConsoleOutputMode::Off {
4501             aml::Device::new(
4502                 "_SB_.COM1".into(),
4503                 vec![
4504                     &aml::Name::new(
4505                         "_HID".into(),
4506                         #[cfg(target_arch = "x86_64")]
4507                         &aml::EISAName::new("PNP0501"),
4508                         #[cfg(target_arch = "aarch64")]
4509                         &"ARMH0011",
4510                     ),
4511                     &aml::Name::new("_UID".into(), &aml::ZERO),
4512                     &aml::Name::new("_DDN".into(), &"COM1"),
4513                     &aml::Name::new(
4514                         "_CRS".into(),
4515                         &aml::ResourceTemplate::new(vec![
4516                             &aml::Interrupt::new(true, true, false, false, serial_irq),
4517                             #[cfg(target_arch = "x86_64")]
4518                             &aml::IO::new(0x3f8, 0x3f8, 0, 0x8),
4519                             #[cfg(target_arch = "aarch64")]
4520                             &aml::Memory32Fixed::new(
4521                                 true,
4522                                 arch::layout::LEGACY_SERIAL_MAPPED_IO_START.raw_value() as u32,
4523                                 MMIO_LEN as u32,
4524                             ),
4525                         ]),
4526                     ),
4527                 ],
4528             )
4529             .to_aml_bytes(sink);
4530         }
4531 
4532         aml::Name::new("_S5_".into(), &aml::Package::new(vec![&5u8])).to_aml_bytes(sink);
4533 
4534         aml::Device::new(
4535             "_SB_.PWRB".into(),
4536             vec![
4537                 &aml::Name::new("_HID".into(), &aml::EISAName::new("PNP0C0C")),
4538                 &aml::Name::new("_UID".into(), &aml::ZERO),
4539             ],
4540         )
4541         .to_aml_bytes(sink);
4542 
4543         if self.config.lock().unwrap().tpm.is_some() {
4544             // Add tpm device
4545             TpmDevice {}.to_aml_bytes(sink);
4546         }
4547 
4548         self.ged_notification_device
4549             .as_ref()
4550             .unwrap()
4551             .lock()
4552             .unwrap()
4553             .to_aml_bytes(sink)
4554     }
4555 }
4556 
4557 impl Pausable for DeviceManager {
4558     fn pause(&mut self) -> result::Result<(), MigratableError> {
4559         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
4560             if let Some(migratable) = &device_node.migratable {
4561                 migratable.lock().unwrap().pause()?;
4562             }
4563         }
4564         // On AArch64, the pause of device manager needs to trigger
4565         // a "pause" of GIC, which will flush the GIC pending tables
4566         // and ITS tables to guest RAM.
4567         #[cfg(target_arch = "aarch64")]
4568         {
4569             self.get_interrupt_controller()
4570                 .unwrap()
4571                 .lock()
4572                 .unwrap()
4573                 .pause()?;
4574         };
4575 
4576         Ok(())
4577     }
4578 
4579     fn resume(&mut self) -> result::Result<(), MigratableError> {
4580         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
4581             if let Some(migratable) = &device_node.migratable {
4582                 migratable.lock().unwrap().resume()?;
4583             }
4584         }
4585 
4586         Ok(())
4587     }
4588 }
4589 
4590 impl Snapshottable for DeviceManager {
4591     fn id(&self) -> String {
4592         DEVICE_MANAGER_SNAPSHOT_ID.to_string()
4593     }
4594 
4595     fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> {
4596         let mut snapshot = Snapshot::from_data(SnapshotData::new_from_state(&self.state())?);
4597 
4598         // We aggregate all devices snapshots.
4599         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
4600             if let Some(migratable) = &device_node.migratable {
4601                 let mut migratable = migratable.lock().unwrap();
4602                 snapshot.add_snapshot(migratable.id(), migratable.snapshot()?);
4603             }
4604         }
4605 
4606         Ok(snapshot)
4607     }
4608 }
4609 
4610 impl Transportable for DeviceManager {}
4611 
4612 impl Migratable for DeviceManager {
4613     fn start_dirty_log(&mut self) -> std::result::Result<(), MigratableError> {
4614         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
4615             if let Some(migratable) = &device_node.migratable {
4616                 migratable.lock().unwrap().start_dirty_log()?;
4617             }
4618         }
4619         Ok(())
4620     }
4621 
4622     fn stop_dirty_log(&mut self) -> std::result::Result<(), MigratableError> {
4623         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
4624             if let Some(migratable) = &device_node.migratable {
4625                 migratable.lock().unwrap().stop_dirty_log()?;
4626             }
4627         }
4628         Ok(())
4629     }
4630 
4631     fn dirty_log(&mut self) -> std::result::Result<MemoryRangeTable, MigratableError> {
4632         let mut tables = Vec::new();
4633         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
4634             if let Some(migratable) = &device_node.migratable {
4635                 tables.push(migratable.lock().unwrap().dirty_log()?);
4636             }
4637         }
4638         Ok(MemoryRangeTable::new_from_tables(tables))
4639     }
4640 
4641     fn start_migration(&mut self) -> std::result::Result<(), MigratableError> {
4642         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
4643             if let Some(migratable) = &device_node.migratable {
4644                 migratable.lock().unwrap().start_migration()?;
4645             }
4646         }
4647         Ok(())
4648     }
4649 
4650     fn complete_migration(&mut self) -> std::result::Result<(), MigratableError> {
4651         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
4652             if let Some(migratable) = &device_node.migratable {
4653                 migratable.lock().unwrap().complete_migration()?;
4654             }
4655         }
4656         Ok(())
4657     }
4658 }
4659 
4660 const PCIU_FIELD_OFFSET: u64 = 0;
4661 const PCID_FIELD_OFFSET: u64 = 4;
4662 const B0EJ_FIELD_OFFSET: u64 = 8;
4663 const PSEG_FIELD_OFFSET: u64 = 12;
4664 const PCIU_FIELD_SIZE: usize = 4;
4665 const PCID_FIELD_SIZE: usize = 4;
4666 const B0EJ_FIELD_SIZE: usize = 4;
4667 const PSEG_FIELD_SIZE: usize = 4;
4668 
4669 impl BusDevice for DeviceManager {
4670     fn read(&mut self, base: u64, offset: u64, data: &mut [u8]) {
4671         match offset {
4672             PCIU_FIELD_OFFSET => {
4673                 assert!(data.len() == PCIU_FIELD_SIZE);
4674                 data.copy_from_slice(
4675                     &self.pci_segments[self.selected_segment]
4676                         .pci_devices_up
4677                         .to_le_bytes(),
4678                 );
4679                 // Clear the PCIU bitmap
4680                 self.pci_segments[self.selected_segment].pci_devices_up = 0;
4681             }
4682             PCID_FIELD_OFFSET => {
4683                 assert!(data.len() == PCID_FIELD_SIZE);
4684                 data.copy_from_slice(
4685                     &self.pci_segments[self.selected_segment]
4686                         .pci_devices_down
4687                         .to_le_bytes(),
4688                 );
4689                 // Clear the PCID bitmap
4690                 self.pci_segments[self.selected_segment].pci_devices_down = 0;
4691             }
4692             B0EJ_FIELD_OFFSET => {
4693                 assert!(data.len() == B0EJ_FIELD_SIZE);
4694                 // Always return an empty bitmap since the eject is always
4695                 // taken care of right away during a write access.
4696                 data.fill(0);
4697             }
4698             PSEG_FIELD_OFFSET => {
4699                 assert_eq!(data.len(), PSEG_FIELD_SIZE);
4700                 data.copy_from_slice(&(self.selected_segment as u32).to_le_bytes());
4701             }
4702             _ => error!(
4703                 "Accessing unknown location at base 0x{:x}, offset 0x{:x}",
4704                 base, offset
4705             ),
4706         }
4707 
4708         debug!(
4709             "PCI_HP_REG_R: base 0x{:x}, offset 0x{:x}, data {:?}",
4710             base, offset, data
4711         )
4712     }
4713 
4714     fn write(&mut self, base: u64, offset: u64, data: &[u8]) -> Option<Arc<std::sync::Barrier>> {
4715         match offset {
4716             B0EJ_FIELD_OFFSET => {
4717                 assert!(data.len() == B0EJ_FIELD_SIZE);
4718                 let mut data_array: [u8; 4] = [0, 0, 0, 0];
4719                 data_array.copy_from_slice(data);
4720                 let mut slot_bitmap = u32::from_le_bytes(data_array);
4721 
4722                 while slot_bitmap > 0 {
4723                     let slot_id = slot_bitmap.trailing_zeros();
4724                     if let Err(e) = self.eject_device(self.selected_segment as u16, slot_id as u8) {
4725                         error!("Failed ejecting device {}: {:?}", slot_id, e);
4726                     }
4727                     slot_bitmap &= !(1 << slot_id);
4728                 }
4729             }
4730             PSEG_FIELD_OFFSET => {
4731                 assert_eq!(data.len(), PSEG_FIELD_SIZE);
4732                 let mut data_array: [u8; 4] = [0, 0, 0, 0];
4733                 data_array.copy_from_slice(data);
4734                 let selected_segment = u32::from_le_bytes(data_array) as usize;
4735                 if selected_segment >= self.pci_segments.len() {
4736                     error!(
4737                         "Segment selection out of range: {} >= {}",
4738                         selected_segment,
4739                         self.pci_segments.len()
4740                     );
4741                     return None;
4742                 }
4743                 self.selected_segment = selected_segment;
4744             }
4745             _ => error!(
4746                 "Accessing unknown location at base 0x{:x}, offset 0x{:x}",
4747                 base, offset
4748             ),
4749         }
4750 
4751         debug!(
4752             "PCI_HP_REG_W: base 0x{:x}, offset 0x{:x}, data {:?}",
4753             base, offset, data
4754         );
4755 
4756         None
4757     }
4758 }
4759 
4760 impl Drop for DeviceManager {
4761     fn drop(&mut self) {
4762         for handle in self.virtio_devices.drain(..) {
4763             handle.virtio_device.lock().unwrap().shutdown();
4764         }
4765 
4766         if let Some(termios) = *self.original_termios_opt.lock().unwrap() {
4767             // SAFETY: FFI call
4768             let _ = unsafe { tcsetattr(stdout().lock().as_raw_fd(), TCSANOW, &termios) };
4769         }
4770     }
4771 }
4772