xref: /cloud-hypervisor/vmm/src/device_manager.rs (revision 4d7a4c598ac247aaf770b00dfb057cdac891f67d)
1 // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
2 //
3 // Portions Copyright 2017 The Chromium OS Authors. All rights reserved.
4 // Use of this source code is governed by a BSD-style license that can be
5 // found in the LICENSE-BSD-3-Clause file.
6 //
7 // Copyright © 2019 Intel Corporation
8 //
9 // SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause
10 //
11 
12 use crate::config::{
13     ConsoleOutputMode, DeviceConfig, DiskConfig, FsConfig, NetConfig, PmemConfig, UserDeviceConfig,
14     VdpaConfig, VhostMode, VmConfig, VsockConfig,
15 };
16 use crate::cpu::{CpuManager, CPU_MANAGER_ACPI_SIZE};
17 use crate::device_tree::{DeviceNode, DeviceTree};
18 use crate::interrupt::LegacyUserspaceInterruptManager;
19 use crate::interrupt::MsiInterruptManager;
20 use crate::memory_manager::{Error as MemoryManagerError, MemoryManager, MEMORY_MANAGER_ACPI_SIZE};
21 use crate::pci_segment::PciSegment;
22 use crate::seccomp_filters::{get_seccomp_filter, Thread};
23 use crate::serial_manager::{Error as SerialManagerError, SerialManager};
24 use crate::sigwinch_listener::start_sigwinch_listener;
25 use crate::GuestRegionMmap;
26 use crate::PciDeviceInfo;
27 use crate::{device_node, DEVICE_MANAGER_SNAPSHOT_ID};
28 use acpi_tables::sdt::GenericAddress;
29 use acpi_tables::{aml, Aml};
30 use anyhow::anyhow;
31 use arch::layout;
32 #[cfg(target_arch = "x86_64")]
33 use arch::layout::{APIC_START, IOAPIC_SIZE, IOAPIC_START};
34 use arch::NumaNodes;
35 #[cfg(target_arch = "aarch64")]
36 use arch::{DeviceType, MmioDeviceInfo};
37 use block::{
38     async_io::DiskFile, block_aio_is_supported, block_io_uring_is_supported, detect_image_type,
39     fixed_vhd_sync::FixedVhdDiskSync, qcow, qcow_sync::QcowDiskSync, raw_async_aio::RawFileDiskAio,
40     raw_sync::RawFileDiskSync, vhdx, vhdx_sync::VhdxDiskSync, ImageType,
41 };
42 #[cfg(feature = "io_uring")]
43 use block::{fixed_vhd_async::FixedVhdDiskAsync, raw_async::RawFileDisk};
44 #[cfg(target_arch = "aarch64")]
45 use devices::gic;
46 #[cfg(target_arch = "x86_64")]
47 use devices::ioapic;
48 #[cfg(target_arch = "aarch64")]
49 use devices::legacy::Pl011;
50 #[cfg(target_arch = "x86_64")]
51 use devices::legacy::Serial;
52 use devices::{
53     interrupt_controller, interrupt_controller::InterruptController, AcpiNotificationFlags,
54 };
55 use hypervisor::{HypervisorType, IoEventAddress};
56 use libc::{
57     cfmakeraw, isatty, tcgetattr, tcsetattr, termios, MAP_NORESERVE, MAP_PRIVATE, MAP_SHARED,
58     O_TMPFILE, PROT_READ, PROT_WRITE, TCSANOW,
59 };
60 use pci::{
61     DeviceRelocation, PciBarRegionType, PciBdf, PciDevice, VfioPciDevice, VfioUserDmaMapping,
62     VfioUserPciDevice, VfioUserPciDeviceError,
63 };
64 use seccompiler::SeccompAction;
65 use serde::{Deserialize, Serialize};
66 use std::collections::{BTreeSet, HashMap};
67 use std::fs::{read_link, File, OpenOptions};
68 use std::io::{self, stdout, Seek, SeekFrom};
69 use std::mem::zeroed;
70 use std::num::Wrapping;
71 use std::os::unix::fs::OpenOptionsExt;
72 use std::os::unix::io::{AsRawFd, FromRawFd, RawFd};
73 use std::path::PathBuf;
74 use std::result;
75 use std::sync::{Arc, Mutex};
76 use std::time::Instant;
77 use tracer::trace_scoped;
78 use vfio_ioctls::{VfioContainer, VfioDevice, VfioDeviceFd};
79 use virtio_devices::transport::VirtioTransport;
80 use virtio_devices::transport::{VirtioPciDevice, VirtioPciDeviceActivator};
81 use virtio_devices::vhost_user::VhostUserConfig;
82 use virtio_devices::{
83     AccessPlatformMapping, ActivateError, VdpaDmaMapping, VirtioMemMappingSource,
84 };
85 use virtio_devices::{Endpoint, IommuMapping};
86 use vm_allocator::{AddressAllocator, SystemAllocator};
87 use vm_device::dma_mapping::vfio::VfioDmaMapping;
88 use vm_device::dma_mapping::ExternalDmaMapping;
89 use vm_device::interrupt::{
90     InterruptIndex, InterruptManager, LegacyIrqGroupConfig, MsiIrqGroupConfig,
91 };
92 use vm_device::{Bus, BusDevice, Resource};
93 use vm_memory::guest_memory::FileOffset;
94 use vm_memory::GuestMemoryRegion;
95 use vm_memory::{Address, GuestAddress, GuestUsize, MmapRegion};
96 #[cfg(target_arch = "x86_64")]
97 use vm_memory::{GuestAddressSpace, GuestMemory};
98 use vm_migration::{
99     protocol::MemoryRangeTable, snapshot_from_id, versioned_state_from_id, Migratable,
100     MigratableError, Pausable, Snapshot, SnapshotData, Snapshottable, Transportable,
101 };
102 use vm_virtio::AccessPlatform;
103 use vm_virtio::VirtioDeviceType;
104 use vmm_sys_util::eventfd::EventFd;
105 
106 #[cfg(target_arch = "aarch64")]
107 const MMIO_LEN: u64 = 0x1000;
108 
109 // Singleton devices / devices the user cannot name
110 #[cfg(target_arch = "x86_64")]
111 const IOAPIC_DEVICE_NAME: &str = "__ioapic";
112 const SERIAL_DEVICE_NAME: &str = "__serial";
113 #[cfg(target_arch = "aarch64")]
114 const GPIO_DEVICE_NAME: &str = "__gpio";
115 const RNG_DEVICE_NAME: &str = "__rng";
116 const IOMMU_DEVICE_NAME: &str = "__iommu";
117 const BALLOON_DEVICE_NAME: &str = "__balloon";
118 const CONSOLE_DEVICE_NAME: &str = "__console";
119 const PVPANIC_DEVICE_NAME: &str = "__pvpanic";
120 
121 // Devices that the user may name and for which we generate
122 // identifiers if the user doesn't give one
123 const DISK_DEVICE_NAME_PREFIX: &str = "_disk";
124 const FS_DEVICE_NAME_PREFIX: &str = "_fs";
125 const NET_DEVICE_NAME_PREFIX: &str = "_net";
126 const PMEM_DEVICE_NAME_PREFIX: &str = "_pmem";
127 const VDPA_DEVICE_NAME_PREFIX: &str = "_vdpa";
128 const VSOCK_DEVICE_NAME_PREFIX: &str = "_vsock";
129 const WATCHDOG_DEVICE_NAME: &str = "__watchdog";
130 const VFIO_DEVICE_NAME_PREFIX: &str = "_vfio";
131 const VFIO_USER_DEVICE_NAME_PREFIX: &str = "_vfio_user";
132 const VIRTIO_PCI_DEVICE_NAME_PREFIX: &str = "_virtio-pci";
133 
134 /// Errors associated with device manager
135 #[derive(Debug)]
136 pub enum DeviceManagerError {
137     /// Cannot create EventFd.
138     EventFd(io::Error),
139 
140     /// Cannot open disk path
141     Disk(io::Error),
142 
143     /// Cannot create vhost-user-net device
144     CreateVhostUserNet(virtio_devices::vhost_user::Error),
145 
146     /// Cannot create virtio-blk device
147     CreateVirtioBlock(io::Error),
148 
149     /// Cannot create virtio-net device
150     CreateVirtioNet(virtio_devices::net::Error),
151 
152     /// Cannot create virtio-console device
153     CreateVirtioConsole(io::Error),
154 
155     /// Cannot create virtio-rng device
156     CreateVirtioRng(io::Error),
157 
158     /// Cannot create virtio-fs device
159     CreateVirtioFs(virtio_devices::vhost_user::Error),
160 
161     /// Virtio-fs device was created without a socket.
162     NoVirtioFsSock,
163 
164     /// Cannot create vhost-user-blk device
165     CreateVhostUserBlk(virtio_devices::vhost_user::Error),
166 
167     /// Cannot create virtio-pmem device
168     CreateVirtioPmem(io::Error),
169 
170     /// Cannot create vDPA device
171     CreateVdpa(virtio_devices::vdpa::Error),
172 
173     /// Cannot create virtio-vsock device
174     CreateVirtioVsock(io::Error),
175 
176     /// Cannot create tpm device
177     CreateTpmDevice(anyhow::Error),
178 
179     /// Failed to convert Path to &str for the vDPA device.
180     CreateVdpaConvertPath,
181 
182     /// Failed to convert Path to &str for the virtio-vsock device.
183     CreateVsockConvertPath,
184 
185     /// Cannot create virtio-vsock backend
186     CreateVsockBackend(virtio_devices::vsock::VsockUnixError),
187 
188     /// Cannot create virtio-iommu device
189     CreateVirtioIommu(io::Error),
190 
191     /// Cannot create virtio-balloon device
192     CreateVirtioBalloon(io::Error),
193 
194     /// Cannot create virtio-watchdog device
195     CreateVirtioWatchdog(io::Error),
196 
197     /// Failed to parse disk image format
198     DetectImageType(io::Error),
199 
200     /// Cannot open qcow disk path
201     QcowDeviceCreate(qcow::Error),
202 
203     /// Cannot create serial manager
204     CreateSerialManager(SerialManagerError),
205 
206     /// Cannot spawn the serial manager thread
207     SpawnSerialManager(SerialManagerError),
208 
209     /// Cannot open tap interface
210     OpenTap(net_util::TapError),
211 
212     /// Cannot allocate IRQ.
213     AllocateIrq,
214 
215     /// Cannot configure the IRQ.
216     Irq(vmm_sys_util::errno::Error),
217 
218     /// Cannot allocate PCI BARs
219     AllocateBars(pci::PciDeviceError),
220 
221     /// Could not free the BARs associated with a PCI device.
222     FreePciBars(pci::PciDeviceError),
223 
224     /// Cannot register ioevent.
225     RegisterIoevent(anyhow::Error),
226 
227     /// Cannot unregister ioevent.
228     UnRegisterIoevent(anyhow::Error),
229 
230     /// Cannot create virtio device
231     VirtioDevice(virtio_devices::transport::VirtioPciDeviceError),
232 
233     /// Cannot add PCI device
234     AddPciDevice(pci::PciRootError),
235 
236     /// Cannot open persistent memory file
237     PmemFileOpen(io::Error),
238 
239     /// Cannot set persistent memory file size
240     PmemFileSetLen(io::Error),
241 
242     /// Cannot find a memory range for persistent memory
243     PmemRangeAllocation,
244 
245     /// Cannot find a memory range for virtio-fs
246     FsRangeAllocation,
247 
248     /// Error creating serial output file
249     SerialOutputFileOpen(io::Error),
250 
251     /// Error creating console output file
252     ConsoleOutputFileOpen(io::Error),
253 
254     /// Error creating serial pty
255     SerialPtyOpen(io::Error),
256 
257     /// Error creating console pty
258     ConsolePtyOpen(io::Error),
259 
260     /// Error setting pty raw mode
261     SetPtyRaw(vmm_sys_util::errno::Error),
262 
263     /// Error getting pty peer
264     GetPtyPeer(vmm_sys_util::errno::Error),
265 
266     /// Cannot create a VFIO device
267     VfioCreate(vfio_ioctls::VfioError),
268 
269     /// Cannot create a VFIO PCI device
270     VfioPciCreate(pci::VfioPciError),
271 
272     /// Failed to map VFIO MMIO region.
273     VfioMapRegion(pci::VfioPciError),
274 
275     /// Failed to DMA map VFIO device.
276     VfioDmaMap(vfio_ioctls::VfioError),
277 
278     /// Failed to DMA unmap VFIO device.
279     VfioDmaUnmap(pci::VfioPciError),
280 
281     /// Failed to create the passthrough device.
282     CreatePassthroughDevice(anyhow::Error),
283 
284     /// Failed to memory map.
285     Mmap(io::Error),
286 
287     /// Cannot add legacy device to Bus.
288     BusError(vm_device::BusError),
289 
290     /// Failed to allocate IO port
291     AllocateIoPort,
292 
293     /// Failed to allocate MMIO address
294     AllocateMmioAddress,
295 
296     /// Failed to make hotplug notification
297     HotPlugNotification(io::Error),
298 
299     /// Error from a memory manager operation
300     MemoryManager(MemoryManagerError),
301 
302     /// Failed to create new interrupt source group.
303     CreateInterruptGroup(io::Error),
304 
305     /// Failed to update interrupt source group.
306     UpdateInterruptGroup(io::Error),
307 
308     /// Failed to create interrupt controller.
309     CreateInterruptController(interrupt_controller::Error),
310 
311     /// Failed to create a new MmapRegion instance.
312     NewMmapRegion(vm_memory::mmap::MmapRegionError),
313 
314     /// Failed to clone a File.
315     CloneFile(io::Error),
316 
317     /// Failed to create socket file
318     CreateSocketFile(io::Error),
319 
320     /// Failed to spawn the network backend
321     SpawnNetBackend(io::Error),
322 
323     /// Failed to spawn the block backend
324     SpawnBlockBackend(io::Error),
325 
326     /// Missing PCI bus.
327     NoPciBus,
328 
329     /// Could not find an available device name.
330     NoAvailableDeviceName,
331 
332     /// Missing PCI device.
333     MissingPciDevice,
334 
335     /// Failed to remove a PCI device from the PCI bus.
336     RemoveDeviceFromPciBus(pci::PciRootError),
337 
338     /// Failed to remove a bus device from the IO bus.
339     RemoveDeviceFromIoBus(vm_device::BusError),
340 
341     /// Failed to remove a bus device from the MMIO bus.
342     RemoveDeviceFromMmioBus(vm_device::BusError),
343 
344     /// Failed to find the device corresponding to a specific PCI b/d/f.
345     UnknownPciBdf(u32),
346 
347     /// Not allowed to remove this type of device from the VM.
348     RemovalNotAllowed(vm_virtio::VirtioDeviceType),
349 
350     /// Failed to find device corresponding to the given identifier.
351     UnknownDeviceId(String),
352 
353     /// Failed to find an available PCI device ID.
354     NextPciDeviceId(pci::PciRootError),
355 
356     /// Could not reserve the PCI device ID.
357     GetPciDeviceId(pci::PciRootError),
358 
359     /// Could not give the PCI device ID back.
360     PutPciDeviceId(pci::PciRootError),
361 
362     /// No disk path was specified when one was expected
363     NoDiskPath,
364 
365     /// Failed to update guest memory for virtio device.
366     UpdateMemoryForVirtioDevice(virtio_devices::Error),
367 
368     /// Cannot create virtio-mem device
369     CreateVirtioMem(io::Error),
370 
371     /// Cannot find a memory range for virtio-mem memory
372     VirtioMemRangeAllocation,
373 
374     /// Failed to update guest memory for VFIO PCI device.
375     UpdateMemoryForVfioPciDevice(vfio_ioctls::VfioError),
376 
377     /// Trying to use a directory for pmem but no size specified
378     PmemWithDirectorySizeMissing,
379 
380     /// Trying to use a size that is not multiple of 2MiB
381     PmemSizeNotAligned,
382 
383     /// Could not find the node in the device tree.
384     MissingNode,
385 
386     /// Resource was already found.
387     ResourceAlreadyExists,
388 
389     /// Expected resources for virtio-pmem could not be found.
390     MissingVirtioPmemResources,
391 
392     /// Missing PCI b/d/f from the DeviceNode.
393     MissingDeviceNodePciBdf,
394 
395     /// No support for device passthrough
396     NoDevicePassthroughSupport,
397 
398     /// No socket option support for console device
399     NoSocketOptionSupportForConsoleDevice,
400 
401     /// Failed to resize virtio-balloon
402     VirtioBalloonResize(virtio_devices::balloon::Error),
403 
404     /// Missing virtio-balloon, can't proceed as expected.
405     MissingVirtioBalloon,
406 
407     /// Missing virtual IOMMU device
408     MissingVirtualIommu,
409 
410     /// Failed to do power button notification
411     PowerButtonNotification(io::Error),
412 
413     /// Failed to do AArch64 GPIO power button notification
414     #[cfg(target_arch = "aarch64")]
415     AArch64PowerButtonNotification(devices::legacy::GpioDeviceError),
416 
417     /// Failed to set O_DIRECT flag to file descriptor
418     SetDirectIo,
419 
420     /// Failed to create FixedVhdDiskAsync
421     CreateFixedVhdDiskAsync(io::Error),
422 
423     /// Failed to create FixedVhdDiskSync
424     CreateFixedVhdDiskSync(io::Error),
425 
426     /// Failed to create QcowDiskSync
427     CreateQcowDiskSync(qcow::Error),
428 
429     /// Failed to create FixedVhdxDiskSync
430     CreateFixedVhdxDiskSync(vhdx::VhdxError),
431 
432     /// Failed to add DMA mapping handler to virtio-mem device.
433     AddDmaMappingHandlerVirtioMem(virtio_devices::mem::Error),
434 
435     /// Failed to remove DMA mapping handler from virtio-mem device.
436     RemoveDmaMappingHandlerVirtioMem(virtio_devices::mem::Error),
437 
438     /// Failed to create vfio-user client
439     VfioUserCreateClient(vfio_user::Error),
440 
441     /// Failed to create VFIO user device
442     VfioUserCreate(VfioUserPciDeviceError),
443 
444     /// Failed to map region from VFIO user device into guest
445     VfioUserMapRegion(VfioUserPciDeviceError),
446 
447     /// Failed to DMA map VFIO user device.
448     VfioUserDmaMap(VfioUserPciDeviceError),
449 
450     /// Failed to DMA unmap VFIO user device.
451     VfioUserDmaUnmap(VfioUserPciDeviceError),
452 
453     /// Failed to update memory mappings for VFIO user device
454     UpdateMemoryForVfioUserPciDevice(VfioUserPciDeviceError),
455 
456     /// Cannot duplicate file descriptor
457     DupFd(vmm_sys_util::errno::Error),
458 
459     /// Failed to DMA map virtio device.
460     VirtioDmaMap(std::io::Error),
461 
462     /// Failed to DMA unmap virtio device.
463     VirtioDmaUnmap(std::io::Error),
464 
465     /// Cannot hotplug device behind vIOMMU
466     InvalidIommuHotplug,
467 
468     /// Invalid identifier as it is not unique.
469     IdentifierNotUnique(String),
470 
471     /// Invalid identifier
472     InvalidIdentifier(String),
473 
474     /// Error activating virtio device
475     VirtioActivate(ActivateError),
476 
477     /// Failed retrieving device state from snapshot
478     RestoreGetState(MigratableError),
479 
480     /// Cannot create a PvPanic device
481     PvPanicCreate(devices::pvpanic::PvPanicError),
482 }
483 pub type DeviceManagerResult<T> = result::Result<T, DeviceManagerError>;
484 
485 const DEVICE_MANAGER_ACPI_SIZE: usize = 0x10;
486 
487 const TIOCSPTLCK: libc::c_int = 0x4004_5431;
488 const TIOCGTPEER: libc::c_int = 0x5441;
489 
490 pub fn create_pty() -> io::Result<(File, File, PathBuf)> {
491     // Try to use /dev/pts/ptmx first then fall back to /dev/ptmx
492     // This is done to try and use the devpts filesystem that
493     // could be available for use in the process's namespace first.
494     // Ideally these are all the same file though but different
495     // kernels could have things setup differently.
496     // See https://www.kernel.org/doc/Documentation/filesystems/devpts.txt
497     // for further details.
498 
499     let custom_flags = libc::O_NONBLOCK;
500     let main = match OpenOptions::new()
501         .read(true)
502         .write(true)
503         .custom_flags(custom_flags)
504         .open("/dev/pts/ptmx")
505     {
506         Ok(f) => f,
507         _ => OpenOptions::new()
508             .read(true)
509             .write(true)
510             .custom_flags(custom_flags)
511             .open("/dev/ptmx")?,
512     };
513     let mut unlock: libc::c_ulong = 0;
514     // SAFETY: FFI call into libc, trivially safe
515     unsafe { libc::ioctl(main.as_raw_fd(), TIOCSPTLCK as _, &mut unlock) };
516 
517     // SAFETY: FFI call into libc, trivially safe
518     let sub_fd = unsafe {
519         libc::ioctl(
520             main.as_raw_fd(),
521             TIOCGTPEER as _,
522             libc::O_NOCTTY | libc::O_RDWR,
523         )
524     };
525     if sub_fd == -1 {
526         return vmm_sys_util::errno::errno_result().map_err(|e| e.into());
527     }
528 
529     let proc_path = PathBuf::from(format!("/proc/self/fd/{sub_fd}"));
530     let path = read_link(proc_path)?;
531 
532     // SAFETY: sub_fd is checked to be valid before being wrapped in File
533     Ok((main, unsafe { File::from_raw_fd(sub_fd) }, path))
534 }
535 
536 #[derive(Default)]
537 pub struct Console {
538     console_resizer: Option<Arc<virtio_devices::ConsoleResizer>>,
539 }
540 
541 impl Console {
542     pub fn need_resize(&self) -> bool {
543         if let Some(_resizer) = self.console_resizer.as_ref() {
544             return true;
545         }
546 
547         false
548     }
549 
550     pub fn update_console_size(&self) {
551         if let Some(resizer) = self.console_resizer.as_ref() {
552             resizer.update_console_size()
553         }
554     }
555 }
556 
557 pub(crate) struct AddressManager {
558     pub(crate) allocator: Arc<Mutex<SystemAllocator>>,
559     #[cfg(target_arch = "x86_64")]
560     pub(crate) io_bus: Arc<Bus>,
561     pub(crate) mmio_bus: Arc<Bus>,
562     pub(crate) vm: Arc<dyn hypervisor::Vm>,
563     device_tree: Arc<Mutex<DeviceTree>>,
564     pci_mmio_allocators: Vec<Arc<Mutex<AddressAllocator>>>,
565 }
566 
567 impl DeviceRelocation for AddressManager {
568     fn move_bar(
569         &self,
570         old_base: u64,
571         new_base: u64,
572         len: u64,
573         pci_dev: &mut dyn PciDevice,
574         region_type: PciBarRegionType,
575     ) -> std::result::Result<(), std::io::Error> {
576         match region_type {
577             PciBarRegionType::IoRegion => {
578                 #[cfg(target_arch = "x86_64")]
579                 {
580                     // Update system allocator
581                     self.allocator
582                         .lock()
583                         .unwrap()
584                         .free_io_addresses(GuestAddress(old_base), len as GuestUsize);
585 
586                     self.allocator
587                         .lock()
588                         .unwrap()
589                         .allocate_io_addresses(
590                             Some(GuestAddress(new_base)),
591                             len as GuestUsize,
592                             None,
593                         )
594                         .ok_or_else(|| {
595                             io::Error::new(io::ErrorKind::Other, "failed allocating new IO range")
596                         })?;
597 
598                     // Update PIO bus
599                     self.io_bus
600                         .update_range(old_base, len, new_base, len)
601                         .map_err(|e| io::Error::new(io::ErrorKind::Other, e))?;
602                 }
603                 #[cfg(target_arch = "aarch64")]
604                 error!("I/O region is not supported");
605             }
606             PciBarRegionType::Memory32BitRegion | PciBarRegionType::Memory64BitRegion => {
607                 // Update system allocator
608                 if region_type == PciBarRegionType::Memory32BitRegion {
609                     self.allocator
610                         .lock()
611                         .unwrap()
612                         .free_mmio_hole_addresses(GuestAddress(old_base), len as GuestUsize);
613 
614                     self.allocator
615                         .lock()
616                         .unwrap()
617                         .allocate_mmio_hole_addresses(
618                             Some(GuestAddress(new_base)),
619                             len as GuestUsize,
620                             Some(len),
621                         )
622                         .ok_or_else(|| {
623                             io::Error::new(
624                                 io::ErrorKind::Other,
625                                 "failed allocating new 32 bits MMIO range",
626                             )
627                         })?;
628                 } else {
629                     // Find the specific allocator that this BAR was allocated from and use it for new one
630                     for allocator in &self.pci_mmio_allocators {
631                         let allocator_base = allocator.lock().unwrap().base();
632                         let allocator_end = allocator.lock().unwrap().end();
633 
634                         if old_base >= allocator_base.0 && old_base <= allocator_end.0 {
635                             allocator
636                                 .lock()
637                                 .unwrap()
638                                 .free(GuestAddress(old_base), len as GuestUsize);
639 
640                             allocator
641                                 .lock()
642                                 .unwrap()
643                                 .allocate(
644                                     Some(GuestAddress(new_base)),
645                                     len as GuestUsize,
646                                     Some(len),
647                                 )
648                                 .ok_or_else(|| {
649                                     io::Error::new(
650                                         io::ErrorKind::Other,
651                                         "failed allocating new 64 bits MMIO range",
652                                     )
653                                 })?;
654 
655                             break;
656                         }
657                     }
658                 }
659 
660                 // Update MMIO bus
661                 self.mmio_bus
662                     .update_range(old_base, len, new_base, len)
663                     .map_err(|e| io::Error::new(io::ErrorKind::Other, e))?;
664             }
665         }
666 
667         // Update the device_tree resources associated with the device
668         if let Some(id) = pci_dev.id() {
669             if let Some(node) = self.device_tree.lock().unwrap().get_mut(&id) {
670                 let mut resource_updated = false;
671                 for resource in node.resources.iter_mut() {
672                     if let Resource::PciBar { base, type_, .. } = resource {
673                         if PciBarRegionType::from(*type_) == region_type && *base == old_base {
674                             *base = new_base;
675                             resource_updated = true;
676                             break;
677                         }
678                     }
679                 }
680 
681                 if !resource_updated {
682                     return Err(io::Error::new(
683                         io::ErrorKind::Other,
684                         format!(
685                             "Couldn't find a resource with base 0x{old_base:x} for device {id}"
686                         ),
687                     ));
688                 }
689             } else {
690                 return Err(io::Error::new(
691                     io::ErrorKind::Other,
692                     format!("Couldn't find device {id} from device tree"),
693                 ));
694             }
695         }
696 
697         let any_dev = pci_dev.as_any();
698         if let Some(virtio_pci_dev) = any_dev.downcast_ref::<VirtioPciDevice>() {
699             let bar_addr = virtio_pci_dev.config_bar_addr();
700             if bar_addr == new_base {
701                 for (event, addr) in virtio_pci_dev.ioeventfds(old_base) {
702                     let io_addr = IoEventAddress::Mmio(addr);
703                     self.vm.unregister_ioevent(event, &io_addr).map_err(|e| {
704                         io::Error::new(
705                             io::ErrorKind::Other,
706                             format!("failed to unregister ioevent: {e:?}"),
707                         )
708                     })?;
709                 }
710                 for (event, addr) in virtio_pci_dev.ioeventfds(new_base) {
711                     let io_addr = IoEventAddress::Mmio(addr);
712                     self.vm
713                         .register_ioevent(event, &io_addr, None)
714                         .map_err(|e| {
715                             io::Error::new(
716                                 io::ErrorKind::Other,
717                                 format!("failed to register ioevent: {e:?}"),
718                             )
719                         })?;
720                 }
721             } else {
722                 let virtio_dev = virtio_pci_dev.virtio_device();
723                 let mut virtio_dev = virtio_dev.lock().unwrap();
724                 if let Some(mut shm_regions) = virtio_dev.get_shm_regions() {
725                     if shm_regions.addr.raw_value() == old_base {
726                         let mem_region = self.vm.make_user_memory_region(
727                             shm_regions.mem_slot,
728                             old_base,
729                             shm_regions.len,
730                             shm_regions.host_addr,
731                             false,
732                             false,
733                         );
734 
735                         self.vm.remove_user_memory_region(mem_region).map_err(|e| {
736                             io::Error::new(
737                                 io::ErrorKind::Other,
738                                 format!("failed to remove user memory region: {e:?}"),
739                             )
740                         })?;
741 
742                         // Create new mapping by inserting new region to KVM.
743                         let mem_region = self.vm.make_user_memory_region(
744                             shm_regions.mem_slot,
745                             new_base,
746                             shm_regions.len,
747                             shm_regions.host_addr,
748                             false,
749                             false,
750                         );
751 
752                         self.vm.create_user_memory_region(mem_region).map_err(|e| {
753                             io::Error::new(
754                                 io::ErrorKind::Other,
755                                 format!("failed to create user memory regions: {e:?}"),
756                             )
757                         })?;
758 
759                         // Update shared memory regions to reflect the new mapping.
760                         shm_regions.addr = GuestAddress(new_base);
761                         virtio_dev.set_shm_regions(shm_regions).map_err(|e| {
762                             io::Error::new(
763                                 io::ErrorKind::Other,
764                                 format!("failed to update shared memory regions: {e:?}"),
765                             )
766                         })?;
767                     }
768                 }
769             }
770         }
771 
772         pci_dev.move_bar(old_base, new_base)
773     }
774 }
775 
776 #[derive(Serialize, Deserialize)]
777 struct DeviceManagerState {
778     device_tree: DeviceTree,
779     device_id_cnt: Wrapping<usize>,
780 }
781 
782 #[derive(Debug)]
783 pub struct PtyPair {
784     pub main: File,
785     pub path: PathBuf,
786 }
787 
788 impl Clone for PtyPair {
789     fn clone(&self) -> Self {
790         PtyPair {
791             main: self.main.try_clone().unwrap(),
792             path: self.path.clone(),
793         }
794     }
795 }
796 
797 #[derive(Clone)]
798 pub enum PciDeviceHandle {
799     Vfio(Arc<Mutex<VfioPciDevice>>),
800     Virtio(Arc<Mutex<VirtioPciDevice>>),
801     VfioUser(Arc<Mutex<VfioUserPciDevice>>),
802 }
803 
804 #[derive(Clone)]
805 struct MetaVirtioDevice {
806     virtio_device: Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
807     iommu: bool,
808     id: String,
809     pci_segment: u16,
810     dma_handler: Option<Arc<dyn ExternalDmaMapping>>,
811 }
812 
813 #[derive(Default)]
814 pub struct AcpiPlatformAddresses {
815     pub pm_timer_address: Option<GenericAddress>,
816     pub reset_reg_address: Option<GenericAddress>,
817     pub sleep_control_reg_address: Option<GenericAddress>,
818     pub sleep_status_reg_address: Option<GenericAddress>,
819 }
820 
821 pub struct DeviceManager {
822     // The underlying hypervisor
823     hypervisor_type: HypervisorType,
824 
825     // Manage address space related to devices
826     address_manager: Arc<AddressManager>,
827 
828     // Console abstraction
829     console: Arc<Console>,
830 
831     // console PTY
832     console_pty: Option<Arc<Mutex<PtyPair>>>,
833 
834     // serial PTY
835     serial_pty: Option<Arc<Mutex<PtyPair>>>,
836 
837     // Serial Manager
838     serial_manager: Option<Arc<SerialManager>>,
839 
840     // pty foreground status,
841     console_resize_pipe: Option<Arc<File>>,
842 
843     // To restore on exit.
844     original_termios_opt: Arc<Mutex<Option<termios>>>,
845 
846     // Interrupt controller
847     #[cfg(target_arch = "x86_64")]
848     interrupt_controller: Option<Arc<Mutex<ioapic::Ioapic>>>,
849     #[cfg(target_arch = "aarch64")]
850     interrupt_controller: Option<Arc<Mutex<gic::Gic>>>,
851 
852     // Things to be added to the commandline (e.g. aarch64 early console)
853     #[cfg(target_arch = "aarch64")]
854     cmdline_additions: Vec<String>,
855 
856     // ACPI GED notification device
857     ged_notification_device: Option<Arc<Mutex<devices::AcpiGedDevice>>>,
858 
859     // VM configuration
860     config: Arc<Mutex<VmConfig>>,
861 
862     // Memory Manager
863     memory_manager: Arc<Mutex<MemoryManager>>,
864 
865     // CPU Manager
866     cpu_manager: Arc<Mutex<CpuManager>>,
867 
868     // The virtio devices on the system
869     virtio_devices: Vec<MetaVirtioDevice>,
870 
871     // List of bus devices
872     // Let the DeviceManager keep strong references to the BusDevice devices.
873     // This allows the IO and MMIO buses to be provided with Weak references,
874     // which prevents cyclic dependencies.
875     bus_devices: Vec<Arc<Mutex<dyn BusDevice>>>,
876 
877     // Counter to keep track of the consumed device IDs.
878     device_id_cnt: Wrapping<usize>,
879 
880     pci_segments: Vec<PciSegment>,
881 
882     #[cfg_attr(target_arch = "aarch64", allow(dead_code))]
883     // MSI Interrupt Manager
884     msi_interrupt_manager: Arc<dyn InterruptManager<GroupConfig = MsiIrqGroupConfig>>,
885 
886     #[cfg_attr(feature = "mshv", allow(dead_code))]
887     // Legacy Interrupt Manager
888     legacy_interrupt_manager: Option<Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>>,
889 
890     // Passthrough device handle
891     passthrough_device: Option<VfioDeviceFd>,
892 
893     // VFIO container
894     // Only one container can be created, therefore it is stored as part of the
895     // DeviceManager to be reused.
896     vfio_container: Option<Arc<VfioContainer>>,
897 
898     // Paravirtualized IOMMU
899     iommu_device: Option<Arc<Mutex<virtio_devices::Iommu>>>,
900     iommu_mapping: Option<Arc<IommuMapping>>,
901 
902     // PCI information about devices attached to the paravirtualized IOMMU
903     // It contains the virtual IOMMU PCI BDF along with the list of PCI BDF
904     // representing the devices attached to the virtual IOMMU. This is useful
905     // information for filling the ACPI VIOT table.
906     iommu_attached_devices: Option<(PciBdf, Vec<PciBdf>)>,
907 
908     // Tree of devices, representing the dependencies between devices.
909     // Useful for introspection, snapshot and restore.
910     device_tree: Arc<Mutex<DeviceTree>>,
911 
912     // Exit event
913     exit_evt: EventFd,
914     reset_evt: EventFd,
915 
916     #[cfg(target_arch = "aarch64")]
917     id_to_dev_info: HashMap<(DeviceType, String), MmioDeviceInfo>,
918 
919     // seccomp action
920     seccomp_action: SeccompAction,
921 
922     // List of guest NUMA nodes.
923     numa_nodes: NumaNodes,
924 
925     // Possible handle to the virtio-balloon device
926     balloon: Option<Arc<Mutex<virtio_devices::Balloon>>>,
927 
928     // Virtio Device activation EventFd to allow the VMM thread to trigger device
929     // activation and thus start the threads from the VMM thread
930     activate_evt: EventFd,
931 
932     acpi_address: GuestAddress,
933 
934     selected_segment: usize,
935 
936     // Possible handle to the virtio-mem device
937     virtio_mem_devices: Vec<Arc<Mutex<virtio_devices::Mem>>>,
938 
939     #[cfg(target_arch = "aarch64")]
940     // GPIO device for AArch64
941     gpio_device: Option<Arc<Mutex<devices::legacy::Gpio>>>,
942 
943     // pvpanic device
944     pvpanic_device: Option<Arc<Mutex<devices::PvPanicDevice>>>,
945 
946     // Flag to force setting the iommu on virtio devices
947     force_iommu: bool,
948 
949     // io_uring availability if detected
950     io_uring_supported: Option<bool>,
951 
952     // aio availability if detected
953     aio_supported: Option<bool>,
954 
955     // List of unique identifiers provided at boot through the configuration.
956     boot_id_list: BTreeSet<String>,
957 
958     // Start time of the VM
959     timestamp: Instant,
960 
961     // Pending activations
962     pending_activations: Arc<Mutex<Vec<VirtioPciDeviceActivator>>>,
963 
964     // Addresses for ACPI platform devices e.g. ACPI PM timer, sleep/reset registers
965     acpi_platform_addresses: AcpiPlatformAddresses,
966 
967     snapshot: Option<Snapshot>,
968 }
969 
970 impl DeviceManager {
971     #[allow(clippy::too_many_arguments)]
972     pub fn new(
973         #[cfg(target_arch = "x86_64")] io_bus: Arc<Bus>,
974         mmio_bus: Arc<Bus>,
975         hypervisor_type: HypervisorType,
976         vm: Arc<dyn hypervisor::Vm>,
977         config: Arc<Mutex<VmConfig>>,
978         memory_manager: Arc<Mutex<MemoryManager>>,
979         cpu_manager: Arc<Mutex<CpuManager>>,
980         exit_evt: EventFd,
981         reset_evt: EventFd,
982         seccomp_action: SeccompAction,
983         numa_nodes: NumaNodes,
984         activate_evt: &EventFd,
985         force_iommu: bool,
986         boot_id_list: BTreeSet<String>,
987         timestamp: Instant,
988         snapshot: Option<Snapshot>,
989         dynamic: bool,
990     ) -> DeviceManagerResult<Arc<Mutex<Self>>> {
991         trace_scoped!("DeviceManager::new");
992 
993         let (device_tree, device_id_cnt) = if let Some(snapshot) = snapshot.as_ref() {
994             let state: DeviceManagerState = snapshot.to_state().unwrap();
995             (
996                 Arc::new(Mutex::new(state.device_tree.clone())),
997                 state.device_id_cnt,
998             )
999         } else {
1000             (Arc::new(Mutex::new(DeviceTree::new())), Wrapping(0))
1001         };
1002 
1003         let num_pci_segments =
1004             if let Some(platform_config) = config.lock().unwrap().platform.as_ref() {
1005                 platform_config.num_pci_segments
1006             } else {
1007                 1
1008             };
1009 
1010         let start_of_device_area = memory_manager.lock().unwrap().start_of_device_area().0;
1011         let end_of_device_area = memory_manager.lock().unwrap().end_of_device_area().0;
1012 
1013         // Start each PCI segment range on a 4GiB boundary
1014         let pci_segment_size = (end_of_device_area - start_of_device_area + 1)
1015             / ((4 << 30) * num_pci_segments as u64)
1016             * (4 << 30);
1017 
1018         let mut pci_mmio_allocators = vec![];
1019         for i in 0..num_pci_segments as u64 {
1020             let mmio_start = start_of_device_area + i * pci_segment_size;
1021             let allocator = Arc::new(Mutex::new(
1022                 AddressAllocator::new(GuestAddress(mmio_start), pci_segment_size).unwrap(),
1023             ));
1024             pci_mmio_allocators.push(allocator)
1025         }
1026 
1027         let address_manager = Arc::new(AddressManager {
1028             allocator: memory_manager.lock().unwrap().allocator(),
1029             #[cfg(target_arch = "x86_64")]
1030             io_bus,
1031             mmio_bus,
1032             vm: vm.clone(),
1033             device_tree: Arc::clone(&device_tree),
1034             pci_mmio_allocators,
1035         });
1036 
1037         // First we create the MSI interrupt manager, the legacy one is created
1038         // later, after the IOAPIC device creation.
1039         // The reason we create the MSI one first is because the IOAPIC needs it,
1040         // and then the legacy interrupt manager needs an IOAPIC. So we're
1041         // handling a linear dependency chain:
1042         // msi_interrupt_manager <- IOAPIC <- legacy_interrupt_manager.
1043         let msi_interrupt_manager: Arc<dyn InterruptManager<GroupConfig = MsiIrqGroupConfig>> =
1044             Arc::new(MsiInterruptManager::new(
1045                 Arc::clone(&address_manager.allocator),
1046                 vm,
1047             ));
1048 
1049         let acpi_address = address_manager
1050             .allocator
1051             .lock()
1052             .unwrap()
1053             .allocate_platform_mmio_addresses(None, DEVICE_MANAGER_ACPI_SIZE as u64, None)
1054             .ok_or(DeviceManagerError::AllocateIoPort)?;
1055 
1056         let mut pci_irq_slots = [0; 32];
1057         PciSegment::reserve_legacy_interrupts_for_pci_devices(
1058             &address_manager,
1059             &mut pci_irq_slots,
1060         )?;
1061 
1062         let mut pci_segments = vec![PciSegment::new_default_segment(
1063             &address_manager,
1064             Arc::clone(&address_manager.pci_mmio_allocators[0]),
1065             &pci_irq_slots,
1066         )?];
1067 
1068         for i in 1..num_pci_segments as usize {
1069             pci_segments.push(PciSegment::new(
1070                 i as u16,
1071                 numa_node_id_from_pci_segment_id(&numa_nodes, i as u16),
1072                 &address_manager,
1073                 Arc::clone(&address_manager.pci_mmio_allocators[i]),
1074                 &pci_irq_slots,
1075             )?);
1076         }
1077 
1078         if dynamic {
1079             let acpi_address = address_manager
1080                 .allocator
1081                 .lock()
1082                 .unwrap()
1083                 .allocate_platform_mmio_addresses(None, CPU_MANAGER_ACPI_SIZE as u64, None)
1084                 .ok_or(DeviceManagerError::AllocateMmioAddress)?;
1085 
1086             address_manager
1087                 .mmio_bus
1088                 .insert(
1089                     cpu_manager.clone(),
1090                     acpi_address.0,
1091                     CPU_MANAGER_ACPI_SIZE as u64,
1092                 )
1093                 .map_err(DeviceManagerError::BusError)?;
1094 
1095             cpu_manager.lock().unwrap().set_acpi_address(acpi_address);
1096         }
1097 
1098         let device_manager = DeviceManager {
1099             hypervisor_type,
1100             address_manager: Arc::clone(&address_manager),
1101             console: Arc::new(Console::default()),
1102             interrupt_controller: None,
1103             #[cfg(target_arch = "aarch64")]
1104             cmdline_additions: Vec::new(),
1105             ged_notification_device: None,
1106             config,
1107             memory_manager,
1108             cpu_manager,
1109             virtio_devices: Vec::new(),
1110             bus_devices: Vec::new(),
1111             device_id_cnt,
1112             msi_interrupt_manager,
1113             legacy_interrupt_manager: None,
1114             passthrough_device: None,
1115             vfio_container: None,
1116             iommu_device: None,
1117             iommu_mapping: None,
1118             iommu_attached_devices: None,
1119             pci_segments,
1120             device_tree,
1121             exit_evt,
1122             reset_evt,
1123             #[cfg(target_arch = "aarch64")]
1124             id_to_dev_info: HashMap::new(),
1125             seccomp_action,
1126             numa_nodes,
1127             balloon: None,
1128             activate_evt: activate_evt
1129                 .try_clone()
1130                 .map_err(DeviceManagerError::EventFd)?,
1131             acpi_address,
1132             selected_segment: 0,
1133             serial_pty: None,
1134             serial_manager: None,
1135             console_pty: None,
1136             console_resize_pipe: None,
1137             original_termios_opt: Arc::new(Mutex::new(None)),
1138             virtio_mem_devices: Vec::new(),
1139             #[cfg(target_arch = "aarch64")]
1140             gpio_device: None,
1141             pvpanic_device: None,
1142             force_iommu,
1143             io_uring_supported: None,
1144             aio_supported: None,
1145             boot_id_list,
1146             timestamp,
1147             pending_activations: Arc::new(Mutex::new(Vec::default())),
1148             acpi_platform_addresses: AcpiPlatformAddresses::default(),
1149             snapshot,
1150         };
1151 
1152         let device_manager = Arc::new(Mutex::new(device_manager));
1153 
1154         address_manager
1155             .mmio_bus
1156             .insert(
1157                 Arc::clone(&device_manager) as Arc<Mutex<dyn BusDevice>>,
1158                 acpi_address.0,
1159                 DEVICE_MANAGER_ACPI_SIZE as u64,
1160             )
1161             .map_err(DeviceManagerError::BusError)?;
1162 
1163         Ok(device_manager)
1164     }
1165 
1166     pub fn serial_pty(&self) -> Option<PtyPair> {
1167         self.serial_pty
1168             .as_ref()
1169             .map(|pty| pty.lock().unwrap().clone())
1170     }
1171 
1172     pub fn console_pty(&self) -> Option<PtyPair> {
1173         self.console_pty
1174             .as_ref()
1175             .map(|pty| pty.lock().unwrap().clone())
1176     }
1177 
1178     pub fn console_resize_pipe(&self) -> Option<Arc<File>> {
1179         self.console_resize_pipe.as_ref().map(Arc::clone)
1180     }
1181 
1182     pub fn create_devices(
1183         &mut self,
1184         serial_pty: Option<PtyPair>,
1185         console_pty: Option<PtyPair>,
1186         console_resize_pipe: Option<File>,
1187         original_termios_opt: Arc<Mutex<Option<termios>>>,
1188     ) -> DeviceManagerResult<()> {
1189         trace_scoped!("create_devices");
1190 
1191         let mut virtio_devices: Vec<MetaVirtioDevice> = Vec::new();
1192 
1193         let interrupt_controller = self.add_interrupt_controller()?;
1194 
1195         self.cpu_manager
1196             .lock()
1197             .unwrap()
1198             .set_interrupt_controller(interrupt_controller.clone());
1199 
1200         // Now we can create the legacy interrupt manager, which needs the freshly
1201         // formed IOAPIC device.
1202         let legacy_interrupt_manager: Arc<
1203             dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>,
1204         > = Arc::new(LegacyUserspaceInterruptManager::new(Arc::clone(
1205             &interrupt_controller,
1206         )));
1207 
1208         {
1209             if let Some(acpi_address) = self.memory_manager.lock().unwrap().acpi_address() {
1210                 self.address_manager
1211                     .mmio_bus
1212                     .insert(
1213                         Arc::clone(&self.memory_manager) as Arc<Mutex<dyn BusDevice>>,
1214                         acpi_address.0,
1215                         MEMORY_MANAGER_ACPI_SIZE as u64,
1216                     )
1217                     .map_err(DeviceManagerError::BusError)?;
1218             }
1219         }
1220 
1221         #[cfg(target_arch = "x86_64")]
1222         self.add_legacy_devices(
1223             self.reset_evt
1224                 .try_clone()
1225                 .map_err(DeviceManagerError::EventFd)?,
1226         )?;
1227 
1228         #[cfg(target_arch = "aarch64")]
1229         self.add_legacy_devices(&legacy_interrupt_manager)?;
1230 
1231         {
1232             self.ged_notification_device = self.add_acpi_devices(
1233                 &legacy_interrupt_manager,
1234                 self.reset_evt
1235                     .try_clone()
1236                     .map_err(DeviceManagerError::EventFd)?,
1237                 self.exit_evt
1238                     .try_clone()
1239                     .map_err(DeviceManagerError::EventFd)?,
1240             )?;
1241         }
1242 
1243         self.original_termios_opt = original_termios_opt;
1244 
1245         self.console = self.add_console_device(
1246             &legacy_interrupt_manager,
1247             &mut virtio_devices,
1248             serial_pty,
1249             console_pty,
1250             console_resize_pipe,
1251         )?;
1252 
1253         if let Some(tpm) = self.config.clone().lock().unwrap().tpm.as_ref() {
1254             let tpm_dev = self.add_tpm_device(tpm.socket.clone())?;
1255             self.bus_devices
1256                 .push(Arc::clone(&tpm_dev) as Arc<Mutex<dyn BusDevice>>)
1257         }
1258         self.legacy_interrupt_manager = Some(legacy_interrupt_manager);
1259 
1260         virtio_devices.append(&mut self.make_virtio_devices()?);
1261 
1262         self.add_pci_devices(virtio_devices.clone())?;
1263 
1264         self.virtio_devices = virtio_devices;
1265 
1266         if self.config.clone().lock().unwrap().pvpanic {
1267             self.pvpanic_device = self.add_pvpanic_device()?;
1268         }
1269 
1270         Ok(())
1271     }
1272 
1273     fn state(&self) -> DeviceManagerState {
1274         DeviceManagerState {
1275             device_tree: self.device_tree.lock().unwrap().clone(),
1276             device_id_cnt: self.device_id_cnt,
1277         }
1278     }
1279 
1280     fn get_msi_iova_space(&mut self) -> (u64, u64) {
1281         #[cfg(target_arch = "aarch64")]
1282         {
1283             let vcpus = self.config.lock().unwrap().cpus.boot_vcpus;
1284             let vgic_config = gic::Gic::create_default_config(vcpus.into());
1285             (
1286                 vgic_config.msi_addr,
1287                 vgic_config.msi_addr + vgic_config.msi_size - 1,
1288             )
1289         }
1290         #[cfg(target_arch = "x86_64")]
1291         (0xfee0_0000, 0xfeef_ffff)
1292     }
1293 
1294     #[cfg(target_arch = "aarch64")]
1295     /// Gets the information of the devices registered up to some point in time.
1296     pub fn get_device_info(&self) -> &HashMap<(DeviceType, String), MmioDeviceInfo> {
1297         &self.id_to_dev_info
1298     }
1299 
1300     #[allow(unused_variables)]
1301     fn add_pci_devices(
1302         &mut self,
1303         virtio_devices: Vec<MetaVirtioDevice>,
1304     ) -> DeviceManagerResult<()> {
1305         let iommu_id = String::from(IOMMU_DEVICE_NAME);
1306 
1307         let iommu_device = if self.config.lock().unwrap().iommu {
1308             let (device, mapping) = virtio_devices::Iommu::new(
1309                 iommu_id.clone(),
1310                 self.seccomp_action.clone(),
1311                 self.exit_evt
1312                     .try_clone()
1313                     .map_err(DeviceManagerError::EventFd)?,
1314                 self.get_msi_iova_space(),
1315                 versioned_state_from_id(self.snapshot.as_ref(), iommu_id.as_str())
1316                     .map_err(DeviceManagerError::RestoreGetState)?,
1317             )
1318             .map_err(DeviceManagerError::CreateVirtioIommu)?;
1319             let device = Arc::new(Mutex::new(device));
1320             self.iommu_device = Some(Arc::clone(&device));
1321             self.iommu_mapping = Some(mapping);
1322 
1323             // Fill the device tree with a new node. In case of restore, we
1324             // know there is nothing to do, so we can simply override the
1325             // existing entry.
1326             self.device_tree
1327                 .lock()
1328                 .unwrap()
1329                 .insert(iommu_id.clone(), device_node!(iommu_id, device));
1330 
1331             Some(device)
1332         } else {
1333             None
1334         };
1335 
1336         let mut iommu_attached_devices = Vec::new();
1337         {
1338             for handle in virtio_devices {
1339                 let mapping: Option<Arc<IommuMapping>> = if handle.iommu {
1340                     self.iommu_mapping.clone()
1341                 } else {
1342                     None
1343                 };
1344 
1345                 let dev_id = self.add_virtio_pci_device(
1346                     handle.virtio_device,
1347                     &mapping,
1348                     handle.id,
1349                     handle.pci_segment,
1350                     handle.dma_handler,
1351                 )?;
1352 
1353                 if handle.iommu {
1354                     iommu_attached_devices.push(dev_id);
1355                 }
1356             }
1357 
1358             let mut vfio_iommu_device_ids = self.add_vfio_devices()?;
1359             iommu_attached_devices.append(&mut vfio_iommu_device_ids);
1360 
1361             let mut vfio_user_iommu_device_ids = self.add_user_devices()?;
1362             iommu_attached_devices.append(&mut vfio_user_iommu_device_ids);
1363 
1364             // Add all devices from forced iommu segments
1365             if let Some(platform_config) = self.config.lock().unwrap().platform.as_ref() {
1366                 if let Some(iommu_segments) = platform_config.iommu_segments.as_ref() {
1367                     for segment in iommu_segments {
1368                         for device in 0..32 {
1369                             let bdf = PciBdf::new(*segment, 0, device, 0);
1370                             if !iommu_attached_devices.contains(&bdf) {
1371                                 iommu_attached_devices.push(bdf);
1372                             }
1373                         }
1374                     }
1375                 }
1376             }
1377 
1378             if let Some(iommu_device) = iommu_device {
1379                 let dev_id = self.add_virtio_pci_device(iommu_device, &None, iommu_id, 0, None)?;
1380                 self.iommu_attached_devices = Some((dev_id, iommu_attached_devices));
1381             }
1382         }
1383 
1384         for segment in &self.pci_segments {
1385             #[cfg(target_arch = "x86_64")]
1386             if let Some(pci_config_io) = segment.pci_config_io.as_ref() {
1387                 self.bus_devices
1388                     .push(Arc::clone(pci_config_io) as Arc<Mutex<dyn BusDevice>>);
1389             }
1390 
1391             self.bus_devices
1392                 .push(Arc::clone(&segment.pci_config_mmio) as Arc<Mutex<dyn BusDevice>>);
1393         }
1394 
1395         Ok(())
1396     }
1397 
1398     #[cfg(target_arch = "aarch64")]
1399     fn add_interrupt_controller(
1400         &mut self,
1401     ) -> DeviceManagerResult<Arc<Mutex<dyn InterruptController>>> {
1402         let interrupt_controller: Arc<Mutex<gic::Gic>> = Arc::new(Mutex::new(
1403             gic::Gic::new(
1404                 self.config.lock().unwrap().cpus.boot_vcpus,
1405                 Arc::clone(&self.msi_interrupt_manager),
1406                 self.address_manager.vm.clone(),
1407             )
1408             .map_err(DeviceManagerError::CreateInterruptController)?,
1409         ));
1410 
1411         self.interrupt_controller = Some(interrupt_controller.clone());
1412 
1413         // Restore the vGic if this is in the process of restoration
1414         let id = String::from(gic::GIC_SNAPSHOT_ID);
1415         if let Some(vgic_snapshot) = snapshot_from_id(self.snapshot.as_ref(), &id) {
1416             // PMU support is optional. Nothing should be impacted if the PMU initialization failed.
1417             if self
1418                 .cpu_manager
1419                 .lock()
1420                 .unwrap()
1421                 .init_pmu(arch::aarch64::fdt::AARCH64_PMU_IRQ + 16)
1422                 .is_err()
1423             {
1424                 info!("Failed to initialize PMU");
1425             }
1426 
1427             let vgic_state = vgic_snapshot
1428                 .to_state()
1429                 .map_err(DeviceManagerError::RestoreGetState)?;
1430             let saved_vcpu_states = self.cpu_manager.lock().unwrap().get_saved_states();
1431             interrupt_controller
1432                 .lock()
1433                 .unwrap()
1434                 .restore_vgic(vgic_state, &saved_vcpu_states)
1435                 .unwrap();
1436         }
1437 
1438         self.device_tree
1439             .lock()
1440             .unwrap()
1441             .insert(id.clone(), device_node!(id, interrupt_controller));
1442 
1443         Ok(interrupt_controller)
1444     }
1445 
1446     #[cfg(target_arch = "aarch64")]
1447     pub fn get_interrupt_controller(&mut self) -> Option<&Arc<Mutex<gic::Gic>>> {
1448         self.interrupt_controller.as_ref()
1449     }
1450 
1451     #[cfg(target_arch = "x86_64")]
1452     fn add_interrupt_controller(
1453         &mut self,
1454     ) -> DeviceManagerResult<Arc<Mutex<dyn InterruptController>>> {
1455         let id = String::from(IOAPIC_DEVICE_NAME);
1456 
1457         // Create IOAPIC
1458         let interrupt_controller = Arc::new(Mutex::new(
1459             ioapic::Ioapic::new(
1460                 id.clone(),
1461                 APIC_START,
1462                 Arc::clone(&self.msi_interrupt_manager),
1463                 versioned_state_from_id(self.snapshot.as_ref(), id.as_str())
1464                     .map_err(DeviceManagerError::RestoreGetState)?,
1465             )
1466             .map_err(DeviceManagerError::CreateInterruptController)?,
1467         ));
1468 
1469         self.interrupt_controller = Some(interrupt_controller.clone());
1470 
1471         self.address_manager
1472             .mmio_bus
1473             .insert(interrupt_controller.clone(), IOAPIC_START.0, IOAPIC_SIZE)
1474             .map_err(DeviceManagerError::BusError)?;
1475 
1476         self.bus_devices
1477             .push(Arc::clone(&interrupt_controller) as Arc<Mutex<dyn BusDevice>>);
1478 
1479         // Fill the device tree with a new node. In case of restore, we
1480         // know there is nothing to do, so we can simply override the
1481         // existing entry.
1482         self.device_tree
1483             .lock()
1484             .unwrap()
1485             .insert(id.clone(), device_node!(id, interrupt_controller));
1486 
1487         Ok(interrupt_controller)
1488     }
1489 
1490     fn add_acpi_devices(
1491         &mut self,
1492         interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>,
1493         reset_evt: EventFd,
1494         exit_evt: EventFd,
1495     ) -> DeviceManagerResult<Option<Arc<Mutex<devices::AcpiGedDevice>>>> {
1496         let vcpus_kill_signalled = self
1497             .cpu_manager
1498             .lock()
1499             .unwrap()
1500             .vcpus_kill_signalled()
1501             .clone();
1502         let shutdown_device = Arc::new(Mutex::new(devices::AcpiShutdownDevice::new(
1503             exit_evt,
1504             reset_evt,
1505             vcpus_kill_signalled,
1506         )));
1507 
1508         self.bus_devices
1509             .push(Arc::clone(&shutdown_device) as Arc<Mutex<dyn BusDevice>>);
1510 
1511         #[cfg(target_arch = "x86_64")]
1512         {
1513             let shutdown_pio_address: u16 = 0x600;
1514 
1515             self.address_manager
1516                 .allocator
1517                 .lock()
1518                 .unwrap()
1519                 .allocate_io_addresses(Some(GuestAddress(shutdown_pio_address.into())), 0x8, None)
1520                 .ok_or(DeviceManagerError::AllocateIoPort)?;
1521 
1522             self.address_manager
1523                 .io_bus
1524                 .insert(shutdown_device, shutdown_pio_address.into(), 0x4)
1525                 .map_err(DeviceManagerError::BusError)?;
1526 
1527             self.acpi_platform_addresses.sleep_control_reg_address =
1528                 Some(GenericAddress::io_port_address::<u8>(shutdown_pio_address));
1529             self.acpi_platform_addresses.sleep_status_reg_address =
1530                 Some(GenericAddress::io_port_address::<u8>(shutdown_pio_address));
1531             self.acpi_platform_addresses.reset_reg_address =
1532                 Some(GenericAddress::io_port_address::<u8>(shutdown_pio_address));
1533         }
1534 
1535         let ged_irq = self
1536             .address_manager
1537             .allocator
1538             .lock()
1539             .unwrap()
1540             .allocate_irq()
1541             .unwrap();
1542         let interrupt_group = interrupt_manager
1543             .create_group(LegacyIrqGroupConfig {
1544                 irq: ged_irq as InterruptIndex,
1545             })
1546             .map_err(DeviceManagerError::CreateInterruptGroup)?;
1547         let ged_address = self
1548             .address_manager
1549             .allocator
1550             .lock()
1551             .unwrap()
1552             .allocate_platform_mmio_addresses(
1553                 None,
1554                 devices::acpi::GED_DEVICE_ACPI_SIZE as u64,
1555                 None,
1556             )
1557             .ok_or(DeviceManagerError::AllocateMmioAddress)?;
1558         let ged_device = Arc::new(Mutex::new(devices::AcpiGedDevice::new(
1559             interrupt_group,
1560             ged_irq,
1561             ged_address,
1562         )));
1563         self.address_manager
1564             .mmio_bus
1565             .insert(
1566                 ged_device.clone(),
1567                 ged_address.0,
1568                 devices::acpi::GED_DEVICE_ACPI_SIZE as u64,
1569             )
1570             .map_err(DeviceManagerError::BusError)?;
1571         self.bus_devices
1572             .push(Arc::clone(&ged_device) as Arc<Mutex<dyn BusDevice>>);
1573 
1574         let pm_timer_device = Arc::new(Mutex::new(devices::AcpiPmTimerDevice::new()));
1575 
1576         self.bus_devices
1577             .push(Arc::clone(&pm_timer_device) as Arc<Mutex<dyn BusDevice>>);
1578 
1579         #[cfg(target_arch = "x86_64")]
1580         {
1581             let pm_timer_pio_address: u16 = 0x608;
1582 
1583             self.address_manager
1584                 .allocator
1585                 .lock()
1586                 .unwrap()
1587                 .allocate_io_addresses(Some(GuestAddress(pm_timer_pio_address.into())), 0x4, None)
1588                 .ok_or(DeviceManagerError::AllocateIoPort)?;
1589 
1590             self.address_manager
1591                 .io_bus
1592                 .insert(pm_timer_device, pm_timer_pio_address.into(), 0x4)
1593                 .map_err(DeviceManagerError::BusError)?;
1594 
1595             self.acpi_platform_addresses.pm_timer_address =
1596                 Some(GenericAddress::io_port_address::<u32>(pm_timer_pio_address));
1597         }
1598 
1599         Ok(Some(ged_device))
1600     }
1601 
1602     #[cfg(target_arch = "x86_64")]
1603     fn add_legacy_devices(&mut self, reset_evt: EventFd) -> DeviceManagerResult<()> {
1604         let vcpus_kill_signalled = self
1605             .cpu_manager
1606             .lock()
1607             .unwrap()
1608             .vcpus_kill_signalled()
1609             .clone();
1610         // Add a shutdown device (i8042)
1611         let i8042 = Arc::new(Mutex::new(devices::legacy::I8042Device::new(
1612             reset_evt.try_clone().unwrap(),
1613             vcpus_kill_signalled.clone(),
1614         )));
1615 
1616         self.bus_devices
1617             .push(Arc::clone(&i8042) as Arc<Mutex<dyn BusDevice>>);
1618 
1619         self.address_manager
1620             .io_bus
1621             .insert(i8042, 0x61, 0x4)
1622             .map_err(DeviceManagerError::BusError)?;
1623         {
1624             // Add a CMOS emulated device
1625             let mem_size = self
1626                 .memory_manager
1627                 .lock()
1628                 .unwrap()
1629                 .guest_memory()
1630                 .memory()
1631                 .last_addr()
1632                 .0
1633                 + 1;
1634             let mem_below_4g = std::cmp::min(arch::layout::MEM_32BIT_RESERVED_START.0, mem_size);
1635             let mem_above_4g = mem_size.saturating_sub(arch::layout::RAM_64BIT_START.0);
1636 
1637             let cmos = Arc::new(Mutex::new(devices::legacy::Cmos::new(
1638                 mem_below_4g,
1639                 mem_above_4g,
1640                 reset_evt,
1641                 Some(vcpus_kill_signalled),
1642             )));
1643 
1644             self.bus_devices
1645                 .push(Arc::clone(&cmos) as Arc<Mutex<dyn BusDevice>>);
1646 
1647             self.address_manager
1648                 .io_bus
1649                 .insert(cmos, 0x70, 0x2)
1650                 .map_err(DeviceManagerError::BusError)?;
1651 
1652             let fwdebug = Arc::new(Mutex::new(devices::legacy::FwDebugDevice::new()));
1653 
1654             self.bus_devices
1655                 .push(Arc::clone(&fwdebug) as Arc<Mutex<dyn BusDevice>>);
1656 
1657             self.address_manager
1658                 .io_bus
1659                 .insert(fwdebug, 0x402, 0x1)
1660                 .map_err(DeviceManagerError::BusError)?;
1661         }
1662 
1663         // 0x80 debug port
1664         let debug_port = Arc::new(Mutex::new(devices::legacy::DebugPort::new(self.timestamp)));
1665         self.bus_devices
1666             .push(Arc::clone(&debug_port) as Arc<Mutex<dyn BusDevice>>);
1667         self.address_manager
1668             .io_bus
1669             .insert(debug_port, 0x80, 0x1)
1670             .map_err(DeviceManagerError::BusError)?;
1671 
1672         Ok(())
1673     }
1674 
1675     #[cfg(target_arch = "aarch64")]
1676     fn add_legacy_devices(
1677         &mut self,
1678         interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>,
1679     ) -> DeviceManagerResult<()> {
1680         // Add a RTC device
1681         let rtc_irq = self
1682             .address_manager
1683             .allocator
1684             .lock()
1685             .unwrap()
1686             .allocate_irq()
1687             .unwrap();
1688 
1689         let interrupt_group = interrupt_manager
1690             .create_group(LegacyIrqGroupConfig {
1691                 irq: rtc_irq as InterruptIndex,
1692             })
1693             .map_err(DeviceManagerError::CreateInterruptGroup)?;
1694 
1695         let rtc_device = Arc::new(Mutex::new(devices::legacy::Rtc::new(interrupt_group)));
1696 
1697         self.bus_devices
1698             .push(Arc::clone(&rtc_device) as Arc<Mutex<dyn BusDevice>>);
1699 
1700         let addr = arch::layout::LEGACY_RTC_MAPPED_IO_START;
1701 
1702         self.address_manager
1703             .mmio_bus
1704             .insert(rtc_device, addr.0, MMIO_LEN)
1705             .map_err(DeviceManagerError::BusError)?;
1706 
1707         self.id_to_dev_info.insert(
1708             (DeviceType::Rtc, "rtc".to_string()),
1709             MmioDeviceInfo {
1710                 addr: addr.0,
1711                 len: MMIO_LEN,
1712                 irq: rtc_irq,
1713             },
1714         );
1715 
1716         // Add a GPIO device
1717         let id = String::from(GPIO_DEVICE_NAME);
1718         let gpio_irq = self
1719             .address_manager
1720             .allocator
1721             .lock()
1722             .unwrap()
1723             .allocate_irq()
1724             .unwrap();
1725 
1726         let interrupt_group = interrupt_manager
1727             .create_group(LegacyIrqGroupConfig {
1728                 irq: gpio_irq as InterruptIndex,
1729             })
1730             .map_err(DeviceManagerError::CreateInterruptGroup)?;
1731 
1732         let gpio_device = Arc::new(Mutex::new(devices::legacy::Gpio::new(
1733             id.clone(),
1734             interrupt_group,
1735             versioned_state_from_id(self.snapshot.as_ref(), id.as_str())
1736                 .map_err(DeviceManagerError::RestoreGetState)?,
1737         )));
1738 
1739         self.bus_devices
1740             .push(Arc::clone(&gpio_device) as Arc<Mutex<dyn BusDevice>>);
1741 
1742         let addr = arch::layout::LEGACY_GPIO_MAPPED_IO_START;
1743 
1744         self.address_manager
1745             .mmio_bus
1746             .insert(gpio_device.clone(), addr.0, MMIO_LEN)
1747             .map_err(DeviceManagerError::BusError)?;
1748 
1749         self.gpio_device = Some(gpio_device.clone());
1750 
1751         self.id_to_dev_info.insert(
1752             (DeviceType::Gpio, "gpio".to_string()),
1753             MmioDeviceInfo {
1754                 addr: addr.0,
1755                 len: MMIO_LEN,
1756                 irq: gpio_irq,
1757             },
1758         );
1759 
1760         self.device_tree
1761             .lock()
1762             .unwrap()
1763             .insert(id.clone(), device_node!(id, gpio_device));
1764 
1765         Ok(())
1766     }
1767 
1768     #[cfg(target_arch = "x86_64")]
1769     fn add_serial_device(
1770         &mut self,
1771         interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>,
1772         serial_writer: Option<Box<dyn io::Write + Send>>,
1773     ) -> DeviceManagerResult<Arc<Mutex<Serial>>> {
1774         // Serial is tied to IRQ #4
1775         let serial_irq = 4;
1776 
1777         let id = String::from(SERIAL_DEVICE_NAME);
1778 
1779         let interrupt_group = interrupt_manager
1780             .create_group(LegacyIrqGroupConfig {
1781                 irq: serial_irq as InterruptIndex,
1782             })
1783             .map_err(DeviceManagerError::CreateInterruptGroup)?;
1784 
1785         let serial = Arc::new(Mutex::new(Serial::new(
1786             id.clone(),
1787             interrupt_group,
1788             serial_writer,
1789             versioned_state_from_id(self.snapshot.as_ref(), id.as_str())
1790                 .map_err(DeviceManagerError::RestoreGetState)?,
1791         )));
1792 
1793         self.bus_devices
1794             .push(Arc::clone(&serial) as Arc<Mutex<dyn BusDevice>>);
1795 
1796         self.address_manager
1797             .allocator
1798             .lock()
1799             .unwrap()
1800             .allocate_io_addresses(Some(GuestAddress(0x3f8)), 0x8, None)
1801             .ok_or(DeviceManagerError::AllocateIoPort)?;
1802 
1803         self.address_manager
1804             .io_bus
1805             .insert(serial.clone(), 0x3f8, 0x8)
1806             .map_err(DeviceManagerError::BusError)?;
1807 
1808         // Fill the device tree with a new node. In case of restore, we
1809         // know there is nothing to do, so we can simply override the
1810         // existing entry.
1811         self.device_tree
1812             .lock()
1813             .unwrap()
1814             .insert(id.clone(), device_node!(id, serial));
1815 
1816         Ok(serial)
1817     }
1818 
1819     #[cfg(target_arch = "aarch64")]
1820     fn add_serial_device(
1821         &mut self,
1822         interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>,
1823         serial_writer: Option<Box<dyn io::Write + Send>>,
1824     ) -> DeviceManagerResult<Arc<Mutex<Pl011>>> {
1825         let id = String::from(SERIAL_DEVICE_NAME);
1826 
1827         let serial_irq = self
1828             .address_manager
1829             .allocator
1830             .lock()
1831             .unwrap()
1832             .allocate_irq()
1833             .unwrap();
1834 
1835         let interrupt_group = interrupt_manager
1836             .create_group(LegacyIrqGroupConfig {
1837                 irq: serial_irq as InterruptIndex,
1838             })
1839             .map_err(DeviceManagerError::CreateInterruptGroup)?;
1840 
1841         let serial = Arc::new(Mutex::new(devices::legacy::Pl011::new(
1842             id.clone(),
1843             interrupt_group,
1844             serial_writer,
1845             self.timestamp,
1846             versioned_state_from_id(self.snapshot.as_ref(), id.as_str())
1847                 .map_err(DeviceManagerError::RestoreGetState)?,
1848         )));
1849 
1850         self.bus_devices
1851             .push(Arc::clone(&serial) as Arc<Mutex<dyn BusDevice>>);
1852 
1853         let addr = arch::layout::LEGACY_SERIAL_MAPPED_IO_START;
1854 
1855         self.address_manager
1856             .mmio_bus
1857             .insert(serial.clone(), addr.0, MMIO_LEN)
1858             .map_err(DeviceManagerError::BusError)?;
1859 
1860         self.id_to_dev_info.insert(
1861             (DeviceType::Serial, DeviceType::Serial.to_string()),
1862             MmioDeviceInfo {
1863                 addr: addr.0,
1864                 len: MMIO_LEN,
1865                 irq: serial_irq,
1866             },
1867         );
1868 
1869         self.cmdline_additions
1870             .push(format!("earlycon=pl011,mmio,0x{:08x}", addr.0));
1871 
1872         // Fill the device tree with a new node. In case of restore, we
1873         // know there is nothing to do, so we can simply override the
1874         // existing entry.
1875         self.device_tree
1876             .lock()
1877             .unwrap()
1878             .insert(id.clone(), device_node!(id, serial));
1879 
1880         Ok(serial)
1881     }
1882 
1883     fn modify_mode<F: FnOnce(&mut termios)>(
1884         &mut self,
1885         fd: RawFd,
1886         f: F,
1887     ) -> vmm_sys_util::errno::Result<()> {
1888         // SAFETY: safe because we check the return value of isatty.
1889         if unsafe { isatty(fd) } != 1 {
1890             return Ok(());
1891         }
1892 
1893         // SAFETY: The following pair are safe because termios gets totally overwritten by tcgetattr
1894         // and we check the return result.
1895         let mut termios: termios = unsafe { zeroed() };
1896         // SAFETY: see above
1897         let ret = unsafe { tcgetattr(fd, &mut termios as *mut _) };
1898         if ret < 0 {
1899             return vmm_sys_util::errno::errno_result();
1900         }
1901         let mut original_termios_opt = self.original_termios_opt.lock().unwrap();
1902         if original_termios_opt.is_none() {
1903             *original_termios_opt = Some(termios);
1904         }
1905         f(&mut termios);
1906         // SAFETY: Safe because the syscall will only read the extent of termios and we check
1907         // the return result.
1908         let ret = unsafe { tcsetattr(fd, TCSANOW, &termios as *const _) };
1909         if ret < 0 {
1910             return vmm_sys_util::errno::errno_result();
1911         }
1912 
1913         Ok(())
1914     }
1915 
1916     fn set_raw_mode(&mut self, f: &dyn AsRawFd) -> vmm_sys_util::errno::Result<()> {
1917         // SAFETY: FFI call. Variable t is guaranteed to be a valid termios from modify_mode.
1918         self.modify_mode(f.as_raw_fd(), |t| unsafe { cfmakeraw(t) })
1919     }
1920 
1921     fn listen_for_sigwinch_on_tty(&mut self, pty_sub: File) -> std::io::Result<()> {
1922         let seccomp_filter = get_seccomp_filter(
1923             &self.seccomp_action,
1924             Thread::PtyForeground,
1925             self.hypervisor_type,
1926         )
1927         .unwrap();
1928 
1929         self.console_resize_pipe =
1930             Some(Arc::new(start_sigwinch_listener(seccomp_filter, pty_sub)?));
1931 
1932         Ok(())
1933     }
1934 
1935     fn add_virtio_console_device(
1936         &mut self,
1937         virtio_devices: &mut Vec<MetaVirtioDevice>,
1938         console_pty: Option<PtyPair>,
1939         resize_pipe: Option<File>,
1940     ) -> DeviceManagerResult<Option<Arc<virtio_devices::ConsoleResizer>>> {
1941         let console_config = self.config.lock().unwrap().console.clone();
1942         let endpoint = match console_config.mode {
1943             ConsoleOutputMode::File => {
1944                 let file = File::create(console_config.file.as_ref().unwrap())
1945                     .map_err(DeviceManagerError::ConsoleOutputFileOpen)?;
1946                 Endpoint::File(file)
1947             }
1948             ConsoleOutputMode::Pty => {
1949                 if let Some(pty) = console_pty {
1950                     self.config.lock().unwrap().console.file = Some(pty.path.clone());
1951                     let file = pty.main.try_clone().unwrap();
1952                     self.console_pty = Some(Arc::new(Mutex::new(pty)));
1953                     self.console_resize_pipe = resize_pipe.map(Arc::new);
1954                     Endpoint::PtyPair(file.try_clone().unwrap(), file)
1955                 } else {
1956                     let (main, sub, path) =
1957                         create_pty().map_err(DeviceManagerError::ConsolePtyOpen)?;
1958                     self.set_raw_mode(&sub)
1959                         .map_err(DeviceManagerError::SetPtyRaw)?;
1960                     self.config.lock().unwrap().console.file = Some(path.clone());
1961                     let file = main.try_clone().unwrap();
1962                     assert!(resize_pipe.is_none());
1963                     self.listen_for_sigwinch_on_tty(sub).unwrap();
1964                     self.console_pty = Some(Arc::new(Mutex::new(PtyPair { main, path })));
1965                     Endpoint::PtyPair(file.try_clone().unwrap(), file)
1966                 }
1967             }
1968             ConsoleOutputMode::Tty => {
1969                 // Duplicating the file descriptors like this is needed as otherwise
1970                 // they will be closed on a reboot and the numbers reused
1971 
1972                 // SAFETY: FFI call to dup. Trivially safe.
1973                 let stdout = unsafe { libc::dup(libc::STDOUT_FILENO) };
1974                 if stdout == -1 {
1975                     return vmm_sys_util::errno::errno_result().map_err(DeviceManagerError::DupFd);
1976                 }
1977                 // SAFETY: stdout is valid and owned solely by us.
1978                 let stdout = unsafe { File::from_raw_fd(stdout) };
1979 
1980                 // Make sure stdout is in raw mode, if it's a terminal.
1981                 let _ = self.set_raw_mode(&stdout);
1982 
1983                 // SAFETY: FFI call. Trivially safe.
1984                 if unsafe { libc::isatty(libc::STDOUT_FILENO) } == 1 {
1985                     self.listen_for_sigwinch_on_tty(stdout.try_clone().unwrap())
1986                         .unwrap();
1987                 }
1988 
1989                 // If an interactive TTY then we can accept input
1990                 // SAFETY: FFI call. Trivially safe.
1991                 if unsafe { libc::isatty(libc::STDIN_FILENO) == 1 } {
1992                     // SAFETY: FFI call to dup. Trivially safe.
1993                     let stdin = unsafe { libc::dup(libc::STDIN_FILENO) };
1994                     if stdin == -1 {
1995                         return vmm_sys_util::errno::errno_result()
1996                             .map_err(DeviceManagerError::DupFd);
1997                     }
1998                     // SAFETY: stdin is valid and owned solely by us.
1999                     let stdin = unsafe { File::from_raw_fd(stdin) };
2000 
2001                     Endpoint::FilePair(stdout, stdin)
2002                 } else {
2003                     Endpoint::File(stdout)
2004                 }
2005             }
2006             ConsoleOutputMode::Socket => {
2007                 return Err(DeviceManagerError::NoSocketOptionSupportForConsoleDevice);
2008             }
2009             ConsoleOutputMode::Null => Endpoint::Null,
2010             ConsoleOutputMode::Off => return Ok(None),
2011         };
2012         let id = String::from(CONSOLE_DEVICE_NAME);
2013 
2014         let (virtio_console_device, console_resizer) = virtio_devices::Console::new(
2015             id.clone(),
2016             endpoint,
2017             self.console_resize_pipe
2018                 .as_ref()
2019                 .map(|p| p.try_clone().unwrap()),
2020             self.force_iommu | console_config.iommu,
2021             self.seccomp_action.clone(),
2022             self.exit_evt
2023                 .try_clone()
2024                 .map_err(DeviceManagerError::EventFd)?,
2025             versioned_state_from_id(self.snapshot.as_ref(), id.as_str())
2026                 .map_err(DeviceManagerError::RestoreGetState)?,
2027         )
2028         .map_err(DeviceManagerError::CreateVirtioConsole)?;
2029         let virtio_console_device = Arc::new(Mutex::new(virtio_console_device));
2030         virtio_devices.push(MetaVirtioDevice {
2031             virtio_device: Arc::clone(&virtio_console_device)
2032                 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2033             iommu: console_config.iommu,
2034             id: id.clone(),
2035             pci_segment: 0,
2036             dma_handler: None,
2037         });
2038 
2039         // Fill the device tree with a new node. In case of restore, we
2040         // know there is nothing to do, so we can simply override the
2041         // existing entry.
2042         self.device_tree
2043             .lock()
2044             .unwrap()
2045             .insert(id.clone(), device_node!(id, virtio_console_device));
2046 
2047         // Only provide a resizer (for SIGWINCH handling) if the console is attached to the TTY
2048         Ok(if matches!(console_config.mode, ConsoleOutputMode::Tty) {
2049             Some(console_resizer)
2050         } else {
2051             None
2052         })
2053     }
2054 
2055     fn add_console_device(
2056         &mut self,
2057         interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>,
2058         virtio_devices: &mut Vec<MetaVirtioDevice>,
2059         serial_pty: Option<PtyPair>,
2060         console_pty: Option<PtyPair>,
2061         console_resize_pipe: Option<File>,
2062     ) -> DeviceManagerResult<Arc<Console>> {
2063         let serial_config = self.config.lock().unwrap().serial.clone();
2064         let serial_writer: Option<Box<dyn io::Write + Send>> = match serial_config.mode {
2065             ConsoleOutputMode::File => Some(Box::new(
2066                 File::create(serial_config.file.as_ref().unwrap())
2067                     .map_err(DeviceManagerError::SerialOutputFileOpen)?,
2068             )),
2069             ConsoleOutputMode::Pty => {
2070                 if let Some(pty) = serial_pty {
2071                     self.config.lock().unwrap().serial.file = Some(pty.path.clone());
2072                     self.serial_pty = Some(Arc::new(Mutex::new(pty)));
2073                 } else {
2074                     let (main, sub, path) =
2075                         create_pty().map_err(DeviceManagerError::SerialPtyOpen)?;
2076                     self.set_raw_mode(&sub)
2077                         .map_err(DeviceManagerError::SetPtyRaw)?;
2078                     self.config.lock().unwrap().serial.file = Some(path.clone());
2079                     self.serial_pty = Some(Arc::new(Mutex::new(PtyPair { main, path })));
2080                 }
2081                 None
2082             }
2083             ConsoleOutputMode::Tty => {
2084                 let out = stdout();
2085                 let _ = self.set_raw_mode(&out);
2086                 Some(Box::new(out))
2087             }
2088             ConsoleOutputMode::Off | ConsoleOutputMode::Null | ConsoleOutputMode::Socket => None,
2089         };
2090         if serial_config.mode != ConsoleOutputMode::Off {
2091             let serial = self.add_serial_device(interrupt_manager, serial_writer)?;
2092             self.serial_manager = match serial_config.mode {
2093                 ConsoleOutputMode::Pty | ConsoleOutputMode::Tty | ConsoleOutputMode::Socket => {
2094                     let serial_manager = SerialManager::new(
2095                         serial,
2096                         self.serial_pty.clone(),
2097                         serial_config.mode,
2098                         serial_config.socket,
2099                     )
2100                     .map_err(DeviceManagerError::CreateSerialManager)?;
2101                     if let Some(mut serial_manager) = serial_manager {
2102                         serial_manager
2103                             .start_thread(
2104                                 self.exit_evt
2105                                     .try_clone()
2106                                     .map_err(DeviceManagerError::EventFd)?,
2107                             )
2108                             .map_err(DeviceManagerError::SpawnSerialManager)?;
2109                         Some(Arc::new(serial_manager))
2110                     } else {
2111                         None
2112                     }
2113                 }
2114                 _ => None,
2115             };
2116         }
2117 
2118         let console_resizer =
2119             self.add_virtio_console_device(virtio_devices, console_pty, console_resize_pipe)?;
2120 
2121         Ok(Arc::new(Console { console_resizer }))
2122     }
2123 
2124     fn add_tpm_device(
2125         &mut self,
2126         tpm_path: PathBuf,
2127     ) -> DeviceManagerResult<Arc<Mutex<devices::tpm::Tpm>>> {
2128         // Create TPM Device
2129         let tpm = devices::tpm::Tpm::new(tpm_path.to_str().unwrap().to_string()).map_err(|e| {
2130             DeviceManagerError::CreateTpmDevice(anyhow!("Failed to create TPM Device : {:?}", e))
2131         })?;
2132         let tpm = Arc::new(Mutex::new(tpm));
2133 
2134         // Add TPM Device to mmio
2135         self.address_manager
2136             .mmio_bus
2137             .insert(
2138                 tpm.clone(),
2139                 arch::layout::TPM_START.0,
2140                 arch::layout::TPM_SIZE,
2141             )
2142             .map_err(DeviceManagerError::BusError)?;
2143 
2144         Ok(tpm)
2145     }
2146 
2147     fn make_virtio_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
2148         let mut devices: Vec<MetaVirtioDevice> = Vec::new();
2149 
2150         // Create "standard" virtio devices (net/block/rng)
2151         devices.append(&mut self.make_virtio_block_devices()?);
2152         devices.append(&mut self.make_virtio_net_devices()?);
2153         devices.append(&mut self.make_virtio_rng_devices()?);
2154 
2155         // Add virtio-fs if required
2156         devices.append(&mut self.make_virtio_fs_devices()?);
2157 
2158         // Add virtio-pmem if required
2159         devices.append(&mut self.make_virtio_pmem_devices()?);
2160 
2161         // Add virtio-vsock if required
2162         devices.append(&mut self.make_virtio_vsock_devices()?);
2163 
2164         devices.append(&mut self.make_virtio_mem_devices()?);
2165 
2166         // Add virtio-balloon if required
2167         devices.append(&mut self.make_virtio_balloon_devices()?);
2168 
2169         // Add virtio-watchdog device
2170         devices.append(&mut self.make_virtio_watchdog_devices()?);
2171 
2172         // Add vDPA devices if required
2173         devices.append(&mut self.make_vdpa_devices()?);
2174 
2175         Ok(devices)
2176     }
2177 
2178     // Cache whether aio is supported to avoid checking for very block device
2179     fn aio_is_supported(&mut self) -> bool {
2180         if let Some(supported) = self.aio_supported {
2181             return supported;
2182         }
2183 
2184         let supported = block_aio_is_supported();
2185         self.aio_supported = Some(supported);
2186         supported
2187     }
2188 
2189     // Cache whether io_uring is supported to avoid probing for very block device
2190     fn io_uring_is_supported(&mut self) -> bool {
2191         if let Some(supported) = self.io_uring_supported {
2192             return supported;
2193         }
2194 
2195         let supported = block_io_uring_is_supported();
2196         self.io_uring_supported = Some(supported);
2197         supported
2198     }
2199 
2200     fn make_virtio_block_device(
2201         &mut self,
2202         disk_cfg: &mut DiskConfig,
2203     ) -> DeviceManagerResult<MetaVirtioDevice> {
2204         let id = if let Some(id) = &disk_cfg.id {
2205             id.clone()
2206         } else {
2207             let id = self.next_device_name(DISK_DEVICE_NAME_PREFIX)?;
2208             disk_cfg.id = Some(id.clone());
2209             id
2210         };
2211 
2212         info!("Creating virtio-block device: {:?}", disk_cfg);
2213 
2214         let snapshot = snapshot_from_id(self.snapshot.as_ref(), id.as_str());
2215 
2216         let (virtio_device, migratable_device) = if disk_cfg.vhost_user {
2217             let socket = disk_cfg.vhost_socket.as_ref().unwrap().clone();
2218             let vu_cfg = VhostUserConfig {
2219                 socket,
2220                 num_queues: disk_cfg.num_queues,
2221                 queue_size: disk_cfg.queue_size,
2222             };
2223             let vhost_user_block = Arc::new(Mutex::new(
2224                 match virtio_devices::vhost_user::Blk::new(
2225                     id.clone(),
2226                     vu_cfg,
2227                     self.seccomp_action.clone(),
2228                     self.exit_evt
2229                         .try_clone()
2230                         .map_err(DeviceManagerError::EventFd)?,
2231                     self.force_iommu,
2232                     snapshot
2233                         .map(|s| s.to_versioned_state())
2234                         .transpose()
2235                         .map_err(DeviceManagerError::RestoreGetState)?,
2236                 ) {
2237                     Ok(vub_device) => vub_device,
2238                     Err(e) => {
2239                         return Err(DeviceManagerError::CreateVhostUserBlk(e));
2240                     }
2241                 },
2242             ));
2243 
2244             (
2245                 Arc::clone(&vhost_user_block) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2246                 vhost_user_block as Arc<Mutex<dyn Migratable>>,
2247             )
2248         } else {
2249             let mut options = OpenOptions::new();
2250             options.read(true);
2251             options.write(!disk_cfg.readonly);
2252             if disk_cfg.direct {
2253                 options.custom_flags(libc::O_DIRECT);
2254             }
2255             // Open block device path
2256             let mut file: File = options
2257                 .open(
2258                     disk_cfg
2259                         .path
2260                         .as_ref()
2261                         .ok_or(DeviceManagerError::NoDiskPath)?
2262                         .clone(),
2263                 )
2264                 .map_err(DeviceManagerError::Disk)?;
2265             let image_type =
2266                 detect_image_type(&mut file).map_err(DeviceManagerError::DetectImageType)?;
2267 
2268             let image = match image_type {
2269                 ImageType::FixedVhd => {
2270                     // Use asynchronous backend relying on io_uring if the
2271                     // syscalls are supported.
2272                     if cfg!(feature = "io_uring")
2273                         && !disk_cfg.disable_io_uring
2274                         && self.io_uring_is_supported()
2275                     {
2276                         info!("Using asynchronous fixed VHD disk file (io_uring)");
2277 
2278                         #[cfg(not(feature = "io_uring"))]
2279                         unreachable!("Checked in if statement above");
2280                         #[cfg(feature = "io_uring")]
2281                         {
2282                             Box::new(
2283                                 FixedVhdDiskAsync::new(file)
2284                                     .map_err(DeviceManagerError::CreateFixedVhdDiskAsync)?,
2285                             ) as Box<dyn DiskFile>
2286                         }
2287                     } else {
2288                         info!("Using synchronous fixed VHD disk file");
2289                         Box::new(
2290                             FixedVhdDiskSync::new(file)
2291                                 .map_err(DeviceManagerError::CreateFixedVhdDiskSync)?,
2292                         ) as Box<dyn DiskFile>
2293                     }
2294                 }
2295                 ImageType::Raw => {
2296                     // Use asynchronous backend relying on io_uring if the
2297                     // syscalls are supported.
2298                     if cfg!(feature = "io_uring")
2299                         && !disk_cfg.disable_io_uring
2300                         && self.io_uring_is_supported()
2301                     {
2302                         info!("Using asynchronous RAW disk file (io_uring)");
2303 
2304                         #[cfg(not(feature = "io_uring"))]
2305                         unreachable!("Checked in if statement above");
2306                         #[cfg(feature = "io_uring")]
2307                         {
2308                             Box::new(RawFileDisk::new(file)) as Box<dyn DiskFile>
2309                         }
2310                     } else if !disk_cfg.disable_aio && self.aio_is_supported() {
2311                         info!("Using asynchronous RAW disk file (aio)");
2312                         Box::new(RawFileDiskAio::new(file)) as Box<dyn DiskFile>
2313                     } else {
2314                         info!("Using synchronous RAW disk file");
2315                         Box::new(RawFileDiskSync::new(file)) as Box<dyn DiskFile>
2316                     }
2317                 }
2318                 ImageType::Qcow2 => {
2319                     info!("Using synchronous QCOW disk file");
2320                     Box::new(
2321                         QcowDiskSync::new(file, disk_cfg.direct)
2322                             .map_err(DeviceManagerError::CreateQcowDiskSync)?,
2323                     ) as Box<dyn DiskFile>
2324                 }
2325                 ImageType::Vhdx => {
2326                     info!("Using synchronous VHDX disk file");
2327                     Box::new(
2328                         VhdxDiskSync::new(file)
2329                             .map_err(DeviceManagerError::CreateFixedVhdxDiskSync)?,
2330                     ) as Box<dyn DiskFile>
2331                 }
2332             };
2333 
2334             let virtio_block = Arc::new(Mutex::new(
2335                 virtio_devices::Block::new(
2336                     id.clone(),
2337                     image,
2338                     disk_cfg
2339                         .path
2340                         .as_ref()
2341                         .ok_or(DeviceManagerError::NoDiskPath)?
2342                         .clone(),
2343                     disk_cfg.readonly,
2344                     self.force_iommu | disk_cfg.iommu,
2345                     disk_cfg.num_queues,
2346                     disk_cfg.queue_size,
2347                     disk_cfg.serial.clone(),
2348                     self.seccomp_action.clone(),
2349                     disk_cfg.rate_limiter_config,
2350                     self.exit_evt
2351                         .try_clone()
2352                         .map_err(DeviceManagerError::EventFd)?,
2353                     snapshot
2354                         .map(|s| s.to_versioned_state())
2355                         .transpose()
2356                         .map_err(DeviceManagerError::RestoreGetState)?,
2357                 )
2358                 .map_err(DeviceManagerError::CreateVirtioBlock)?,
2359             ));
2360 
2361             (
2362                 Arc::clone(&virtio_block) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2363                 virtio_block as Arc<Mutex<dyn Migratable>>,
2364             )
2365         };
2366 
2367         // Fill the device tree with a new node. In case of restore, we
2368         // know there is nothing to do, so we can simply override the
2369         // existing entry.
2370         self.device_tree
2371             .lock()
2372             .unwrap()
2373             .insert(id.clone(), device_node!(id, migratable_device));
2374 
2375         Ok(MetaVirtioDevice {
2376             virtio_device,
2377             iommu: disk_cfg.iommu,
2378             id,
2379             pci_segment: disk_cfg.pci_segment,
2380             dma_handler: None,
2381         })
2382     }
2383 
2384     fn make_virtio_block_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
2385         let mut devices = Vec::new();
2386 
2387         let mut block_devices = self.config.lock().unwrap().disks.clone();
2388         if let Some(disk_list_cfg) = &mut block_devices {
2389             for disk_cfg in disk_list_cfg.iter_mut() {
2390                 devices.push(self.make_virtio_block_device(disk_cfg)?);
2391             }
2392         }
2393         self.config.lock().unwrap().disks = block_devices;
2394 
2395         Ok(devices)
2396     }
2397 
2398     fn make_virtio_net_device(
2399         &mut self,
2400         net_cfg: &mut NetConfig,
2401     ) -> DeviceManagerResult<MetaVirtioDevice> {
2402         let id = if let Some(id) = &net_cfg.id {
2403             id.clone()
2404         } else {
2405             let id = self.next_device_name(NET_DEVICE_NAME_PREFIX)?;
2406             net_cfg.id = Some(id.clone());
2407             id
2408         };
2409         info!("Creating virtio-net device: {:?}", net_cfg);
2410 
2411         let snapshot = snapshot_from_id(self.snapshot.as_ref(), id.as_str());
2412 
2413         let (virtio_device, migratable_device) = if net_cfg.vhost_user {
2414             let socket = net_cfg.vhost_socket.as_ref().unwrap().clone();
2415             let vu_cfg = VhostUserConfig {
2416                 socket,
2417                 num_queues: net_cfg.num_queues,
2418                 queue_size: net_cfg.queue_size,
2419             };
2420             let server = match net_cfg.vhost_mode {
2421                 VhostMode::Client => false,
2422                 VhostMode::Server => true,
2423             };
2424             let vhost_user_net = Arc::new(Mutex::new(
2425                 match virtio_devices::vhost_user::Net::new(
2426                     id.clone(),
2427                     net_cfg.mac,
2428                     net_cfg.mtu,
2429                     vu_cfg,
2430                     server,
2431                     self.seccomp_action.clone(),
2432                     self.exit_evt
2433                         .try_clone()
2434                         .map_err(DeviceManagerError::EventFd)?,
2435                     self.force_iommu,
2436                     snapshot
2437                         .map(|s| s.to_versioned_state())
2438                         .transpose()
2439                         .map_err(DeviceManagerError::RestoreGetState)?,
2440                     net_cfg.offload_tso,
2441                     net_cfg.offload_ufo,
2442                     net_cfg.offload_csum,
2443                 ) {
2444                     Ok(vun_device) => vun_device,
2445                     Err(e) => {
2446                         return Err(DeviceManagerError::CreateVhostUserNet(e));
2447                     }
2448                 },
2449             ));
2450 
2451             (
2452                 Arc::clone(&vhost_user_net) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2453                 vhost_user_net as Arc<Mutex<dyn Migratable>>,
2454             )
2455         } else {
2456             let state = snapshot
2457                 .map(|s| s.to_versioned_state())
2458                 .transpose()
2459                 .map_err(DeviceManagerError::RestoreGetState)?;
2460 
2461             let virtio_net = if let Some(ref tap_if_name) = net_cfg.tap {
2462                 Arc::new(Mutex::new(
2463                     virtio_devices::Net::new(
2464                         id.clone(),
2465                         Some(tap_if_name),
2466                         None,
2467                         None,
2468                         Some(net_cfg.mac),
2469                         &mut net_cfg.host_mac,
2470                         net_cfg.mtu,
2471                         self.force_iommu | net_cfg.iommu,
2472                         net_cfg.num_queues,
2473                         net_cfg.queue_size,
2474                         self.seccomp_action.clone(),
2475                         net_cfg.rate_limiter_config,
2476                         self.exit_evt
2477                             .try_clone()
2478                             .map_err(DeviceManagerError::EventFd)?,
2479                         state,
2480                         net_cfg.offload_tso,
2481                         net_cfg.offload_ufo,
2482                         net_cfg.offload_csum,
2483                     )
2484                     .map_err(DeviceManagerError::CreateVirtioNet)?,
2485                 ))
2486             } else if let Some(fds) = &net_cfg.fds {
2487                 let net = virtio_devices::Net::from_tap_fds(
2488                     id.clone(),
2489                     fds,
2490                     Some(net_cfg.mac),
2491                     net_cfg.mtu,
2492                     self.force_iommu | net_cfg.iommu,
2493                     net_cfg.queue_size,
2494                     self.seccomp_action.clone(),
2495                     net_cfg.rate_limiter_config,
2496                     self.exit_evt
2497                         .try_clone()
2498                         .map_err(DeviceManagerError::EventFd)?,
2499                     state,
2500                     net_cfg.offload_tso,
2501                     net_cfg.offload_ufo,
2502                     net_cfg.offload_csum,
2503                 )
2504                 .map_err(DeviceManagerError::CreateVirtioNet)?;
2505 
2506                 // SAFETY: 'fds' are valid because TAP devices are created successfully
2507                 unsafe {
2508                     self.config.lock().unwrap().add_preserved_fds(fds.clone());
2509                 }
2510 
2511                 Arc::new(Mutex::new(net))
2512             } else {
2513                 Arc::new(Mutex::new(
2514                     virtio_devices::Net::new(
2515                         id.clone(),
2516                         None,
2517                         Some(net_cfg.ip),
2518                         Some(net_cfg.mask),
2519                         Some(net_cfg.mac),
2520                         &mut net_cfg.host_mac,
2521                         net_cfg.mtu,
2522                         self.force_iommu | net_cfg.iommu,
2523                         net_cfg.num_queues,
2524                         net_cfg.queue_size,
2525                         self.seccomp_action.clone(),
2526                         net_cfg.rate_limiter_config,
2527                         self.exit_evt
2528                             .try_clone()
2529                             .map_err(DeviceManagerError::EventFd)?,
2530                         state,
2531                         net_cfg.offload_tso,
2532                         net_cfg.offload_ufo,
2533                         net_cfg.offload_csum,
2534                     )
2535                     .map_err(DeviceManagerError::CreateVirtioNet)?,
2536                 ))
2537             };
2538 
2539             (
2540                 Arc::clone(&virtio_net) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2541                 virtio_net as Arc<Mutex<dyn Migratable>>,
2542             )
2543         };
2544 
2545         // Fill the device tree with a new node. In case of restore, we
2546         // know there is nothing to do, so we can simply override the
2547         // existing entry.
2548         self.device_tree
2549             .lock()
2550             .unwrap()
2551             .insert(id.clone(), device_node!(id, migratable_device));
2552 
2553         Ok(MetaVirtioDevice {
2554             virtio_device,
2555             iommu: net_cfg.iommu,
2556             id,
2557             pci_segment: net_cfg.pci_segment,
2558             dma_handler: None,
2559         })
2560     }
2561 
2562     /// Add virto-net and vhost-user-net devices
2563     fn make_virtio_net_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
2564         let mut devices = Vec::new();
2565         let mut net_devices = self.config.lock().unwrap().net.clone();
2566         if let Some(net_list_cfg) = &mut net_devices {
2567             for net_cfg in net_list_cfg.iter_mut() {
2568                 devices.push(self.make_virtio_net_device(net_cfg)?);
2569             }
2570         }
2571         self.config.lock().unwrap().net = net_devices;
2572 
2573         Ok(devices)
2574     }
2575 
2576     fn make_virtio_rng_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
2577         let mut devices = Vec::new();
2578 
2579         // Add virtio-rng if required
2580         let rng_config = self.config.lock().unwrap().rng.clone();
2581         if let Some(rng_path) = rng_config.src.to_str() {
2582             info!("Creating virtio-rng device: {:?}", rng_config);
2583             let id = String::from(RNG_DEVICE_NAME);
2584 
2585             let virtio_rng_device = Arc::new(Mutex::new(
2586                 virtio_devices::Rng::new(
2587                     id.clone(),
2588                     rng_path,
2589                     self.force_iommu | rng_config.iommu,
2590                     self.seccomp_action.clone(),
2591                     self.exit_evt
2592                         .try_clone()
2593                         .map_err(DeviceManagerError::EventFd)?,
2594                     versioned_state_from_id(self.snapshot.as_ref(), id.as_str())
2595                         .map_err(DeviceManagerError::RestoreGetState)?,
2596                 )
2597                 .map_err(DeviceManagerError::CreateVirtioRng)?,
2598             ));
2599             devices.push(MetaVirtioDevice {
2600                 virtio_device: Arc::clone(&virtio_rng_device)
2601                     as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2602                 iommu: rng_config.iommu,
2603                 id: id.clone(),
2604                 pci_segment: 0,
2605                 dma_handler: None,
2606             });
2607 
2608             // Fill the device tree with a new node. In case of restore, we
2609             // know there is nothing to do, so we can simply override the
2610             // existing entry.
2611             self.device_tree
2612                 .lock()
2613                 .unwrap()
2614                 .insert(id.clone(), device_node!(id, virtio_rng_device));
2615         }
2616 
2617         Ok(devices)
2618     }
2619 
2620     fn make_virtio_fs_device(
2621         &mut self,
2622         fs_cfg: &mut FsConfig,
2623     ) -> DeviceManagerResult<MetaVirtioDevice> {
2624         let id = if let Some(id) = &fs_cfg.id {
2625             id.clone()
2626         } else {
2627             let id = self.next_device_name(FS_DEVICE_NAME_PREFIX)?;
2628             fs_cfg.id = Some(id.clone());
2629             id
2630         };
2631 
2632         info!("Creating virtio-fs device: {:?}", fs_cfg);
2633 
2634         let mut node = device_node!(id);
2635 
2636         if let Some(fs_socket) = fs_cfg.socket.to_str() {
2637             let virtio_fs_device = Arc::new(Mutex::new(
2638                 virtio_devices::vhost_user::Fs::new(
2639                     id.clone(),
2640                     fs_socket,
2641                     &fs_cfg.tag,
2642                     fs_cfg.num_queues,
2643                     fs_cfg.queue_size,
2644                     None,
2645                     self.seccomp_action.clone(),
2646                     self.exit_evt
2647                         .try_clone()
2648                         .map_err(DeviceManagerError::EventFd)?,
2649                     self.force_iommu,
2650                     versioned_state_from_id(self.snapshot.as_ref(), id.as_str())
2651                         .map_err(DeviceManagerError::RestoreGetState)?,
2652                 )
2653                 .map_err(DeviceManagerError::CreateVirtioFs)?,
2654             ));
2655 
2656             // Update the device tree with the migratable device.
2657             node.migratable = Some(Arc::clone(&virtio_fs_device) as Arc<Mutex<dyn Migratable>>);
2658             self.device_tree.lock().unwrap().insert(id.clone(), node);
2659 
2660             Ok(MetaVirtioDevice {
2661                 virtio_device: Arc::clone(&virtio_fs_device)
2662                     as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2663                 iommu: false,
2664                 id,
2665                 pci_segment: fs_cfg.pci_segment,
2666                 dma_handler: None,
2667             })
2668         } else {
2669             Err(DeviceManagerError::NoVirtioFsSock)
2670         }
2671     }
2672 
2673     fn make_virtio_fs_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
2674         let mut devices = Vec::new();
2675 
2676         let mut fs_devices = self.config.lock().unwrap().fs.clone();
2677         if let Some(fs_list_cfg) = &mut fs_devices {
2678             for fs_cfg in fs_list_cfg.iter_mut() {
2679                 devices.push(self.make_virtio_fs_device(fs_cfg)?);
2680             }
2681         }
2682         self.config.lock().unwrap().fs = fs_devices;
2683 
2684         Ok(devices)
2685     }
2686 
2687     fn make_virtio_pmem_device(
2688         &mut self,
2689         pmem_cfg: &mut PmemConfig,
2690     ) -> DeviceManagerResult<MetaVirtioDevice> {
2691         let id = if let Some(id) = &pmem_cfg.id {
2692             id.clone()
2693         } else {
2694             let id = self.next_device_name(PMEM_DEVICE_NAME_PREFIX)?;
2695             pmem_cfg.id = Some(id.clone());
2696             id
2697         };
2698 
2699         info!("Creating virtio-pmem device: {:?}", pmem_cfg);
2700 
2701         let mut node = device_node!(id);
2702 
2703         // Look for the id in the device tree. If it can be found, that means
2704         // the device is being restored, otherwise it's created from scratch.
2705         let region_range = if let Some(node) = self.device_tree.lock().unwrap().get(&id) {
2706             info!("Restoring virtio-pmem {} resources", id);
2707 
2708             let mut region_range: Option<(u64, u64)> = None;
2709             for resource in node.resources.iter() {
2710                 match resource {
2711                     Resource::MmioAddressRange { base, size } => {
2712                         if region_range.is_some() {
2713                             return Err(DeviceManagerError::ResourceAlreadyExists);
2714                         }
2715 
2716                         region_range = Some((*base, *size));
2717                     }
2718                     _ => {
2719                         error!("Unexpected resource {:?} for {}", resource, id);
2720                     }
2721                 }
2722             }
2723 
2724             if region_range.is_none() {
2725                 return Err(DeviceManagerError::MissingVirtioPmemResources);
2726             }
2727 
2728             region_range
2729         } else {
2730             None
2731         };
2732 
2733         let (custom_flags, set_len) = if pmem_cfg.file.is_dir() {
2734             if pmem_cfg.size.is_none() {
2735                 return Err(DeviceManagerError::PmemWithDirectorySizeMissing);
2736             }
2737             (O_TMPFILE, true)
2738         } else {
2739             (0, false)
2740         };
2741 
2742         let mut file = OpenOptions::new()
2743             .read(true)
2744             .write(!pmem_cfg.discard_writes)
2745             .custom_flags(custom_flags)
2746             .open(&pmem_cfg.file)
2747             .map_err(DeviceManagerError::PmemFileOpen)?;
2748 
2749         let size = if let Some(size) = pmem_cfg.size {
2750             if set_len {
2751                 file.set_len(size)
2752                     .map_err(DeviceManagerError::PmemFileSetLen)?;
2753             }
2754             size
2755         } else {
2756             file.seek(SeekFrom::End(0))
2757                 .map_err(DeviceManagerError::PmemFileSetLen)?
2758         };
2759 
2760         if size % 0x20_0000 != 0 {
2761             return Err(DeviceManagerError::PmemSizeNotAligned);
2762         }
2763 
2764         let (region_base, region_size) = if let Some((base, size)) = region_range {
2765             // The memory needs to be 2MiB aligned in order to support
2766             // hugepages.
2767             self.pci_segments[pmem_cfg.pci_segment as usize]
2768                 .allocator
2769                 .lock()
2770                 .unwrap()
2771                 .allocate(
2772                     Some(GuestAddress(base)),
2773                     size as GuestUsize,
2774                     Some(0x0020_0000),
2775                 )
2776                 .ok_or(DeviceManagerError::PmemRangeAllocation)?;
2777 
2778             (base, size)
2779         } else {
2780             // The memory needs to be 2MiB aligned in order to support
2781             // hugepages.
2782             let base = self.pci_segments[pmem_cfg.pci_segment as usize]
2783                 .allocator
2784                 .lock()
2785                 .unwrap()
2786                 .allocate(None, size as GuestUsize, Some(0x0020_0000))
2787                 .ok_or(DeviceManagerError::PmemRangeAllocation)?;
2788 
2789             (base.raw_value(), size)
2790         };
2791 
2792         let cloned_file = file.try_clone().map_err(DeviceManagerError::CloneFile)?;
2793         let mmap_region = MmapRegion::build(
2794             Some(FileOffset::new(cloned_file, 0)),
2795             region_size as usize,
2796             PROT_READ | PROT_WRITE,
2797             MAP_NORESERVE
2798                 | if pmem_cfg.discard_writes {
2799                     MAP_PRIVATE
2800                 } else {
2801                     MAP_SHARED
2802                 },
2803         )
2804         .map_err(DeviceManagerError::NewMmapRegion)?;
2805         let host_addr: u64 = mmap_region.as_ptr() as u64;
2806 
2807         let mem_slot = self
2808             .memory_manager
2809             .lock()
2810             .unwrap()
2811             .create_userspace_mapping(region_base, region_size, host_addr, false, false, false)
2812             .map_err(DeviceManagerError::MemoryManager)?;
2813 
2814         let mapping = virtio_devices::UserspaceMapping {
2815             host_addr,
2816             mem_slot,
2817             addr: GuestAddress(region_base),
2818             len: region_size,
2819             mergeable: false,
2820         };
2821 
2822         let virtio_pmem_device = Arc::new(Mutex::new(
2823             virtio_devices::Pmem::new(
2824                 id.clone(),
2825                 file,
2826                 GuestAddress(region_base),
2827                 mapping,
2828                 mmap_region,
2829                 self.force_iommu | pmem_cfg.iommu,
2830                 self.seccomp_action.clone(),
2831                 self.exit_evt
2832                     .try_clone()
2833                     .map_err(DeviceManagerError::EventFd)?,
2834                 versioned_state_from_id(self.snapshot.as_ref(), id.as_str())
2835                     .map_err(DeviceManagerError::RestoreGetState)?,
2836             )
2837             .map_err(DeviceManagerError::CreateVirtioPmem)?,
2838         ));
2839 
2840         // Update the device tree with correct resource information and with
2841         // the migratable device.
2842         node.resources.push(Resource::MmioAddressRange {
2843             base: region_base,
2844             size: region_size,
2845         });
2846         node.migratable = Some(Arc::clone(&virtio_pmem_device) as Arc<Mutex<dyn Migratable>>);
2847         self.device_tree.lock().unwrap().insert(id.clone(), node);
2848 
2849         Ok(MetaVirtioDevice {
2850             virtio_device: Arc::clone(&virtio_pmem_device)
2851                 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2852             iommu: pmem_cfg.iommu,
2853             id,
2854             pci_segment: pmem_cfg.pci_segment,
2855             dma_handler: None,
2856         })
2857     }
2858 
2859     fn make_virtio_pmem_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
2860         let mut devices = Vec::new();
2861         // Add virtio-pmem if required
2862         let mut pmem_devices = self.config.lock().unwrap().pmem.clone();
2863         if let Some(pmem_list_cfg) = &mut pmem_devices {
2864             for pmem_cfg in pmem_list_cfg.iter_mut() {
2865                 devices.push(self.make_virtio_pmem_device(pmem_cfg)?);
2866             }
2867         }
2868         self.config.lock().unwrap().pmem = pmem_devices;
2869 
2870         Ok(devices)
2871     }
2872 
2873     fn make_virtio_vsock_device(
2874         &mut self,
2875         vsock_cfg: &mut VsockConfig,
2876     ) -> DeviceManagerResult<MetaVirtioDevice> {
2877         let id = if let Some(id) = &vsock_cfg.id {
2878             id.clone()
2879         } else {
2880             let id = self.next_device_name(VSOCK_DEVICE_NAME_PREFIX)?;
2881             vsock_cfg.id = Some(id.clone());
2882             id
2883         };
2884 
2885         info!("Creating virtio-vsock device: {:?}", vsock_cfg);
2886 
2887         let socket_path = vsock_cfg
2888             .socket
2889             .to_str()
2890             .ok_or(DeviceManagerError::CreateVsockConvertPath)?;
2891         let backend =
2892             virtio_devices::vsock::VsockUnixBackend::new(vsock_cfg.cid, socket_path.to_string())
2893                 .map_err(DeviceManagerError::CreateVsockBackend)?;
2894 
2895         let vsock_device = Arc::new(Mutex::new(
2896             virtio_devices::Vsock::new(
2897                 id.clone(),
2898                 vsock_cfg.cid,
2899                 vsock_cfg.socket.clone(),
2900                 backend,
2901                 self.force_iommu | vsock_cfg.iommu,
2902                 self.seccomp_action.clone(),
2903                 self.exit_evt
2904                     .try_clone()
2905                     .map_err(DeviceManagerError::EventFd)?,
2906                 versioned_state_from_id(self.snapshot.as_ref(), id.as_str())
2907                     .map_err(DeviceManagerError::RestoreGetState)?,
2908             )
2909             .map_err(DeviceManagerError::CreateVirtioVsock)?,
2910         ));
2911 
2912         // Fill the device tree with a new node. In case of restore, we
2913         // know there is nothing to do, so we can simply override the
2914         // existing entry.
2915         self.device_tree
2916             .lock()
2917             .unwrap()
2918             .insert(id.clone(), device_node!(id, vsock_device));
2919 
2920         Ok(MetaVirtioDevice {
2921             virtio_device: Arc::clone(&vsock_device)
2922                 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2923             iommu: vsock_cfg.iommu,
2924             id,
2925             pci_segment: vsock_cfg.pci_segment,
2926             dma_handler: None,
2927         })
2928     }
2929 
2930     fn make_virtio_vsock_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
2931         let mut devices = Vec::new();
2932 
2933         let mut vsock = self.config.lock().unwrap().vsock.clone();
2934         if let Some(ref mut vsock_cfg) = &mut vsock {
2935             devices.push(self.make_virtio_vsock_device(vsock_cfg)?);
2936         }
2937         self.config.lock().unwrap().vsock = vsock;
2938 
2939         Ok(devices)
2940     }
2941 
2942     fn make_virtio_mem_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
2943         let mut devices = Vec::new();
2944 
2945         let mm = self.memory_manager.clone();
2946         let mut mm = mm.lock().unwrap();
2947         for (memory_zone_id, memory_zone) in mm.memory_zones_mut().iter_mut() {
2948             if let Some(virtio_mem_zone) = memory_zone.virtio_mem_zone_mut() {
2949                 info!("Creating virtio-mem device: id = {}", memory_zone_id);
2950 
2951                 let node_id = numa_node_id_from_memory_zone_id(&self.numa_nodes, memory_zone_id)
2952                     .map(|i| i as u16);
2953 
2954                 let virtio_mem_device = Arc::new(Mutex::new(
2955                     virtio_devices::Mem::new(
2956                         memory_zone_id.clone(),
2957                         virtio_mem_zone.region(),
2958                         self.seccomp_action.clone(),
2959                         node_id,
2960                         virtio_mem_zone.hotplugged_size(),
2961                         virtio_mem_zone.hugepages(),
2962                         self.exit_evt
2963                             .try_clone()
2964                             .map_err(DeviceManagerError::EventFd)?,
2965                         virtio_mem_zone.blocks_state().clone(),
2966                         versioned_state_from_id(self.snapshot.as_ref(), memory_zone_id.as_str())
2967                             .map_err(DeviceManagerError::RestoreGetState)?,
2968                     )
2969                     .map_err(DeviceManagerError::CreateVirtioMem)?,
2970                 ));
2971 
2972                 // Update the virtio-mem zone so that it has a handle onto the
2973                 // virtio-mem device, which will be used for triggering a resize
2974                 // if needed.
2975                 virtio_mem_zone.set_virtio_device(Arc::clone(&virtio_mem_device));
2976 
2977                 self.virtio_mem_devices.push(Arc::clone(&virtio_mem_device));
2978 
2979                 devices.push(MetaVirtioDevice {
2980                     virtio_device: Arc::clone(&virtio_mem_device)
2981                         as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2982                     iommu: false,
2983                     id: memory_zone_id.clone(),
2984                     pci_segment: 0,
2985                     dma_handler: None,
2986                 });
2987 
2988                 // Fill the device tree with a new node. In case of restore, we
2989                 // know there is nothing to do, so we can simply override the
2990                 // existing entry.
2991                 self.device_tree.lock().unwrap().insert(
2992                     memory_zone_id.clone(),
2993                     device_node!(memory_zone_id, virtio_mem_device),
2994                 );
2995             }
2996         }
2997 
2998         Ok(devices)
2999     }
3000 
3001     fn make_virtio_balloon_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
3002         let mut devices = Vec::new();
3003 
3004         if let Some(balloon_config) = &self.config.lock().unwrap().balloon {
3005             let id = String::from(BALLOON_DEVICE_NAME);
3006             info!("Creating virtio-balloon device: id = {}", id);
3007 
3008             let virtio_balloon_device = Arc::new(Mutex::new(
3009                 virtio_devices::Balloon::new(
3010                     id.clone(),
3011                     balloon_config.size,
3012                     balloon_config.deflate_on_oom,
3013                     balloon_config.free_page_reporting,
3014                     self.seccomp_action.clone(),
3015                     self.exit_evt
3016                         .try_clone()
3017                         .map_err(DeviceManagerError::EventFd)?,
3018                     versioned_state_from_id(self.snapshot.as_ref(), id.as_str())
3019                         .map_err(DeviceManagerError::RestoreGetState)?,
3020                 )
3021                 .map_err(DeviceManagerError::CreateVirtioBalloon)?,
3022             ));
3023 
3024             self.balloon = Some(virtio_balloon_device.clone());
3025 
3026             devices.push(MetaVirtioDevice {
3027                 virtio_device: Arc::clone(&virtio_balloon_device)
3028                     as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
3029                 iommu: false,
3030                 id: id.clone(),
3031                 pci_segment: 0,
3032                 dma_handler: None,
3033             });
3034 
3035             self.device_tree
3036                 .lock()
3037                 .unwrap()
3038                 .insert(id.clone(), device_node!(id, virtio_balloon_device));
3039         }
3040 
3041         Ok(devices)
3042     }
3043 
3044     fn make_virtio_watchdog_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
3045         let mut devices = Vec::new();
3046 
3047         if !self.config.lock().unwrap().watchdog {
3048             return Ok(devices);
3049         }
3050 
3051         let id = String::from(WATCHDOG_DEVICE_NAME);
3052         info!("Creating virtio-watchdog device: id = {}", id);
3053 
3054         let virtio_watchdog_device = Arc::new(Mutex::new(
3055             virtio_devices::Watchdog::new(
3056                 id.clone(),
3057                 self.reset_evt.try_clone().unwrap(),
3058                 self.seccomp_action.clone(),
3059                 self.exit_evt
3060                     .try_clone()
3061                     .map_err(DeviceManagerError::EventFd)?,
3062                 versioned_state_from_id(self.snapshot.as_ref(), id.as_str())
3063                     .map_err(DeviceManagerError::RestoreGetState)?,
3064             )
3065             .map_err(DeviceManagerError::CreateVirtioWatchdog)?,
3066         ));
3067         devices.push(MetaVirtioDevice {
3068             virtio_device: Arc::clone(&virtio_watchdog_device)
3069                 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
3070             iommu: false,
3071             id: id.clone(),
3072             pci_segment: 0,
3073             dma_handler: None,
3074         });
3075 
3076         self.device_tree
3077             .lock()
3078             .unwrap()
3079             .insert(id.clone(), device_node!(id, virtio_watchdog_device));
3080 
3081         Ok(devices)
3082     }
3083 
3084     fn make_vdpa_device(
3085         &mut self,
3086         vdpa_cfg: &mut VdpaConfig,
3087     ) -> DeviceManagerResult<MetaVirtioDevice> {
3088         let id = if let Some(id) = &vdpa_cfg.id {
3089             id.clone()
3090         } else {
3091             let id = self.next_device_name(VDPA_DEVICE_NAME_PREFIX)?;
3092             vdpa_cfg.id = Some(id.clone());
3093             id
3094         };
3095 
3096         info!("Creating vDPA device: {:?}", vdpa_cfg);
3097 
3098         let device_path = vdpa_cfg
3099             .path
3100             .to_str()
3101             .ok_or(DeviceManagerError::CreateVdpaConvertPath)?;
3102 
3103         let vdpa_device = Arc::new(Mutex::new(
3104             virtio_devices::Vdpa::new(
3105                 id.clone(),
3106                 device_path,
3107                 self.memory_manager.lock().unwrap().guest_memory(),
3108                 vdpa_cfg.num_queues as u16,
3109                 versioned_state_from_id(self.snapshot.as_ref(), id.as_str())
3110                     .map_err(DeviceManagerError::RestoreGetState)?,
3111             )
3112             .map_err(DeviceManagerError::CreateVdpa)?,
3113         ));
3114 
3115         // Create the DMA handler that is required by the vDPA device
3116         let vdpa_mapping = Arc::new(VdpaDmaMapping::new(
3117             Arc::clone(&vdpa_device),
3118             Arc::new(self.memory_manager.lock().unwrap().guest_memory()),
3119         ));
3120 
3121         self.device_tree
3122             .lock()
3123             .unwrap()
3124             .insert(id.clone(), device_node!(id, vdpa_device));
3125 
3126         Ok(MetaVirtioDevice {
3127             virtio_device: vdpa_device as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
3128             iommu: vdpa_cfg.iommu,
3129             id,
3130             pci_segment: vdpa_cfg.pci_segment,
3131             dma_handler: Some(vdpa_mapping),
3132         })
3133     }
3134 
3135     fn make_vdpa_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
3136         let mut devices = Vec::new();
3137         // Add vdpa if required
3138         let mut vdpa_devices = self.config.lock().unwrap().vdpa.clone();
3139         if let Some(vdpa_list_cfg) = &mut vdpa_devices {
3140             for vdpa_cfg in vdpa_list_cfg.iter_mut() {
3141                 devices.push(self.make_vdpa_device(vdpa_cfg)?);
3142             }
3143         }
3144         self.config.lock().unwrap().vdpa = vdpa_devices;
3145 
3146         Ok(devices)
3147     }
3148 
3149     fn next_device_name(&mut self, prefix: &str) -> DeviceManagerResult<String> {
3150         let start_id = self.device_id_cnt;
3151         loop {
3152             // Generate the temporary name.
3153             let name = format!("{}{}", prefix, self.device_id_cnt);
3154             // Increment the counter.
3155             self.device_id_cnt += Wrapping(1);
3156             // Check if the name is already in use.
3157             if !self.boot_id_list.contains(&name)
3158                 && !self.device_tree.lock().unwrap().contains_key(&name)
3159             {
3160                 return Ok(name);
3161             }
3162 
3163             if self.device_id_cnt == start_id {
3164                 // We went through a full loop and there's nothing else we can
3165                 // do.
3166                 break;
3167             }
3168         }
3169         Err(DeviceManagerError::NoAvailableDeviceName)
3170     }
3171 
3172     fn add_passthrough_device(
3173         &mut self,
3174         device_cfg: &mut DeviceConfig,
3175     ) -> DeviceManagerResult<(PciBdf, String)> {
3176         // If the passthrough device has not been created yet, it is created
3177         // here and stored in the DeviceManager structure for future needs.
3178         if self.passthrough_device.is_none() {
3179             self.passthrough_device = Some(
3180                 self.address_manager
3181                     .vm
3182                     .create_passthrough_device()
3183                     .map_err(|e| DeviceManagerError::CreatePassthroughDevice(e.into()))?,
3184             );
3185         }
3186 
3187         self.add_vfio_device(device_cfg)
3188     }
3189 
3190     fn create_vfio_container(&self) -> DeviceManagerResult<Arc<VfioContainer>> {
3191         let passthrough_device = self
3192             .passthrough_device
3193             .as_ref()
3194             .ok_or(DeviceManagerError::NoDevicePassthroughSupport)?;
3195 
3196         let dup = passthrough_device
3197             .try_clone()
3198             .map_err(DeviceManagerError::VfioCreate)?;
3199 
3200         Ok(Arc::new(
3201             VfioContainer::new(Some(Arc::new(dup))).map_err(DeviceManagerError::VfioCreate)?,
3202         ))
3203     }
3204 
3205     fn add_vfio_device(
3206         &mut self,
3207         device_cfg: &mut DeviceConfig,
3208     ) -> DeviceManagerResult<(PciBdf, String)> {
3209         let vfio_name = if let Some(id) = &device_cfg.id {
3210             id.clone()
3211         } else {
3212             let id = self.next_device_name(VFIO_DEVICE_NAME_PREFIX)?;
3213             device_cfg.id = Some(id.clone());
3214             id
3215         };
3216 
3217         let (pci_segment_id, pci_device_bdf, resources) =
3218             self.pci_resources(&vfio_name, device_cfg.pci_segment)?;
3219 
3220         let mut needs_dma_mapping = false;
3221 
3222         // Here we create a new VFIO container for two reasons. Either this is
3223         // the first VFIO device, meaning we need a new VFIO container, which
3224         // will be shared with other VFIO devices. Or the new VFIO device is
3225         // attached to a vIOMMU, meaning we must create a dedicated VFIO
3226         // container. In the vIOMMU use case, we can't let all devices under
3227         // the same VFIO container since we couldn't map/unmap memory for each
3228         // device. That's simply because the map/unmap operations happen at the
3229         // VFIO container level.
3230         let vfio_container = if device_cfg.iommu {
3231             let vfio_container = self.create_vfio_container()?;
3232 
3233             let vfio_mapping = Arc::new(VfioDmaMapping::new(
3234                 Arc::clone(&vfio_container),
3235                 Arc::new(self.memory_manager.lock().unwrap().guest_memory()),
3236             ));
3237 
3238             if let Some(iommu) = &self.iommu_device {
3239                 iommu
3240                     .lock()
3241                     .unwrap()
3242                     .add_external_mapping(pci_device_bdf.into(), vfio_mapping);
3243             } else {
3244                 return Err(DeviceManagerError::MissingVirtualIommu);
3245             }
3246 
3247             vfio_container
3248         } else if let Some(vfio_container) = &self.vfio_container {
3249             Arc::clone(vfio_container)
3250         } else {
3251             let vfio_container = self.create_vfio_container()?;
3252             needs_dma_mapping = true;
3253             self.vfio_container = Some(Arc::clone(&vfio_container));
3254 
3255             vfio_container
3256         };
3257 
3258         let vfio_device = VfioDevice::new(&device_cfg.path, Arc::clone(&vfio_container))
3259             .map_err(DeviceManagerError::VfioCreate)?;
3260 
3261         if needs_dma_mapping {
3262             // Register DMA mapping in IOMMU.
3263             // Do not register virtio-mem regions, as they are handled directly by
3264             // virtio-mem device itself.
3265             for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() {
3266                 for region in zone.regions() {
3267                     vfio_container
3268                         .vfio_dma_map(
3269                             region.start_addr().raw_value(),
3270                             region.len(),
3271                             region.as_ptr() as u64,
3272                         )
3273                         .map_err(DeviceManagerError::VfioDmaMap)?;
3274                 }
3275             }
3276 
3277             let vfio_mapping = Arc::new(VfioDmaMapping::new(
3278                 Arc::clone(&vfio_container),
3279                 Arc::new(self.memory_manager.lock().unwrap().guest_memory()),
3280             ));
3281 
3282             for virtio_mem_device in self.virtio_mem_devices.iter() {
3283                 virtio_mem_device
3284                     .lock()
3285                     .unwrap()
3286                     .add_dma_mapping_handler(
3287                         VirtioMemMappingSource::Container,
3288                         vfio_mapping.clone(),
3289                     )
3290                     .map_err(DeviceManagerError::AddDmaMappingHandlerVirtioMem)?;
3291             }
3292         }
3293 
3294         let legacy_interrupt_group =
3295             if let Some(legacy_interrupt_manager) = &self.legacy_interrupt_manager {
3296                 Some(
3297                     legacy_interrupt_manager
3298                         .create_group(LegacyIrqGroupConfig {
3299                             irq: self.pci_segments[pci_segment_id as usize].pci_irq_slots
3300                                 [pci_device_bdf.device() as usize]
3301                                 as InterruptIndex,
3302                         })
3303                         .map_err(DeviceManagerError::CreateInterruptGroup)?,
3304                 )
3305             } else {
3306                 None
3307             };
3308 
3309         let memory_manager = self.memory_manager.clone();
3310 
3311         let vfio_pci_device = VfioPciDevice::new(
3312             vfio_name.clone(),
3313             &self.address_manager.vm,
3314             vfio_device,
3315             vfio_container,
3316             self.msi_interrupt_manager.clone(),
3317             legacy_interrupt_group,
3318             device_cfg.iommu,
3319             pci_device_bdf,
3320             Arc::new(move || memory_manager.lock().unwrap().allocate_memory_slot()),
3321             vm_migration::snapshot_from_id(self.snapshot.as_ref(), vfio_name.as_str()),
3322         )
3323         .map_err(DeviceManagerError::VfioPciCreate)?;
3324 
3325         let vfio_pci_device = Arc::new(Mutex::new(vfio_pci_device));
3326 
3327         let new_resources = self.add_pci_device(
3328             vfio_pci_device.clone(),
3329             vfio_pci_device.clone(),
3330             pci_segment_id,
3331             pci_device_bdf,
3332             resources,
3333         )?;
3334 
3335         vfio_pci_device
3336             .lock()
3337             .unwrap()
3338             .map_mmio_regions()
3339             .map_err(DeviceManagerError::VfioMapRegion)?;
3340 
3341         let mut node = device_node!(vfio_name, vfio_pci_device);
3342 
3343         // Update the device tree with correct resource information.
3344         node.resources = new_resources;
3345         node.pci_bdf = Some(pci_device_bdf);
3346         node.pci_device_handle = Some(PciDeviceHandle::Vfio(vfio_pci_device));
3347 
3348         self.device_tree
3349             .lock()
3350             .unwrap()
3351             .insert(vfio_name.clone(), node);
3352 
3353         Ok((pci_device_bdf, vfio_name))
3354     }
3355 
3356     fn add_pci_device(
3357         &mut self,
3358         bus_device: Arc<Mutex<dyn BusDevice>>,
3359         pci_device: Arc<Mutex<dyn PciDevice>>,
3360         segment_id: u16,
3361         bdf: PciBdf,
3362         resources: Option<Vec<Resource>>,
3363     ) -> DeviceManagerResult<Vec<Resource>> {
3364         let bars = pci_device
3365             .lock()
3366             .unwrap()
3367             .allocate_bars(
3368                 &self.address_manager.allocator,
3369                 &mut self.pci_segments[segment_id as usize]
3370                     .allocator
3371                     .lock()
3372                     .unwrap(),
3373                 resources,
3374             )
3375             .map_err(DeviceManagerError::AllocateBars)?;
3376 
3377         let mut pci_bus = self.pci_segments[segment_id as usize]
3378             .pci_bus
3379             .lock()
3380             .unwrap();
3381 
3382         pci_bus
3383             .add_device(bdf.device() as u32, pci_device)
3384             .map_err(DeviceManagerError::AddPciDevice)?;
3385 
3386         self.bus_devices.push(Arc::clone(&bus_device));
3387 
3388         pci_bus
3389             .register_mapping(
3390                 bus_device,
3391                 #[cfg(target_arch = "x86_64")]
3392                 self.address_manager.io_bus.as_ref(),
3393                 self.address_manager.mmio_bus.as_ref(),
3394                 bars.clone(),
3395             )
3396             .map_err(DeviceManagerError::AddPciDevice)?;
3397 
3398         let mut new_resources = Vec::new();
3399         for bar in bars {
3400             new_resources.push(Resource::PciBar {
3401                 index: bar.idx(),
3402                 base: bar.addr(),
3403                 size: bar.size(),
3404                 type_: bar.region_type().into(),
3405                 prefetchable: bar.prefetchable().into(),
3406             });
3407         }
3408 
3409         Ok(new_resources)
3410     }
3411 
3412     fn add_vfio_devices(&mut self) -> DeviceManagerResult<Vec<PciBdf>> {
3413         let mut iommu_attached_device_ids = Vec::new();
3414         let mut devices = self.config.lock().unwrap().devices.clone();
3415 
3416         if let Some(device_list_cfg) = &mut devices {
3417             for device_cfg in device_list_cfg.iter_mut() {
3418                 let (device_id, _) = self.add_passthrough_device(device_cfg)?;
3419                 if device_cfg.iommu && self.iommu_device.is_some() {
3420                     iommu_attached_device_ids.push(device_id);
3421                 }
3422             }
3423         }
3424 
3425         // Update the list of devices
3426         self.config.lock().unwrap().devices = devices;
3427 
3428         Ok(iommu_attached_device_ids)
3429     }
3430 
3431     fn add_vfio_user_device(
3432         &mut self,
3433         device_cfg: &mut UserDeviceConfig,
3434     ) -> DeviceManagerResult<(PciBdf, String)> {
3435         let vfio_user_name = if let Some(id) = &device_cfg.id {
3436             id.clone()
3437         } else {
3438             let id = self.next_device_name(VFIO_USER_DEVICE_NAME_PREFIX)?;
3439             device_cfg.id = Some(id.clone());
3440             id
3441         };
3442 
3443         let (pci_segment_id, pci_device_bdf, resources) =
3444             self.pci_resources(&vfio_user_name, device_cfg.pci_segment)?;
3445 
3446         let legacy_interrupt_group =
3447             if let Some(legacy_interrupt_manager) = &self.legacy_interrupt_manager {
3448                 Some(
3449                     legacy_interrupt_manager
3450                         .create_group(LegacyIrqGroupConfig {
3451                             irq: self.pci_segments[pci_segment_id as usize].pci_irq_slots
3452                                 [pci_device_bdf.device() as usize]
3453                                 as InterruptIndex,
3454                         })
3455                         .map_err(DeviceManagerError::CreateInterruptGroup)?,
3456                 )
3457             } else {
3458                 None
3459             };
3460 
3461         let client = Arc::new(Mutex::new(
3462             vfio_user::Client::new(&device_cfg.socket)
3463                 .map_err(DeviceManagerError::VfioUserCreateClient)?,
3464         ));
3465 
3466         let memory_manager = self.memory_manager.clone();
3467 
3468         let mut vfio_user_pci_device = VfioUserPciDevice::new(
3469             vfio_user_name.clone(),
3470             &self.address_manager.vm,
3471             client.clone(),
3472             self.msi_interrupt_manager.clone(),
3473             legacy_interrupt_group,
3474             pci_device_bdf,
3475             Arc::new(move || memory_manager.lock().unwrap().allocate_memory_slot()),
3476             vm_migration::snapshot_from_id(self.snapshot.as_ref(), vfio_user_name.as_str()),
3477         )
3478         .map_err(DeviceManagerError::VfioUserCreate)?;
3479 
3480         let memory = self.memory_manager.lock().unwrap().guest_memory();
3481         let vfio_user_mapping = Arc::new(VfioUserDmaMapping::new(client, Arc::new(memory)));
3482         for virtio_mem_device in self.virtio_mem_devices.iter() {
3483             virtio_mem_device
3484                 .lock()
3485                 .unwrap()
3486                 .add_dma_mapping_handler(
3487                     VirtioMemMappingSource::Device(pci_device_bdf.into()),
3488                     vfio_user_mapping.clone(),
3489                 )
3490                 .map_err(DeviceManagerError::AddDmaMappingHandlerVirtioMem)?;
3491         }
3492 
3493         for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() {
3494             for region in zone.regions() {
3495                 vfio_user_pci_device
3496                     .dma_map(region)
3497                     .map_err(DeviceManagerError::VfioUserDmaMap)?;
3498             }
3499         }
3500 
3501         let vfio_user_pci_device = Arc::new(Mutex::new(vfio_user_pci_device));
3502 
3503         let new_resources = self.add_pci_device(
3504             vfio_user_pci_device.clone(),
3505             vfio_user_pci_device.clone(),
3506             pci_segment_id,
3507             pci_device_bdf,
3508             resources,
3509         )?;
3510 
3511         // Note it is required to call 'add_pci_device()' in advance to have the list of
3512         // mmio regions provisioned correctly
3513         vfio_user_pci_device
3514             .lock()
3515             .unwrap()
3516             .map_mmio_regions()
3517             .map_err(DeviceManagerError::VfioUserMapRegion)?;
3518 
3519         let mut node = device_node!(vfio_user_name, vfio_user_pci_device);
3520 
3521         // Update the device tree with correct resource information.
3522         node.resources = new_resources;
3523         node.pci_bdf = Some(pci_device_bdf);
3524         node.pci_device_handle = Some(PciDeviceHandle::VfioUser(vfio_user_pci_device));
3525 
3526         self.device_tree
3527             .lock()
3528             .unwrap()
3529             .insert(vfio_user_name.clone(), node);
3530 
3531         Ok((pci_device_bdf, vfio_user_name))
3532     }
3533 
3534     fn add_user_devices(&mut self) -> DeviceManagerResult<Vec<PciBdf>> {
3535         let mut user_devices = self.config.lock().unwrap().user_devices.clone();
3536 
3537         if let Some(device_list_cfg) = &mut user_devices {
3538             for device_cfg in device_list_cfg.iter_mut() {
3539                 let (_device_id, _id) = self.add_vfio_user_device(device_cfg)?;
3540             }
3541         }
3542 
3543         // Update the list of devices
3544         self.config.lock().unwrap().user_devices = user_devices;
3545 
3546         Ok(vec![])
3547     }
3548 
3549     fn add_virtio_pci_device(
3550         &mut self,
3551         virtio_device: Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
3552         iommu_mapping: &Option<Arc<IommuMapping>>,
3553         virtio_device_id: String,
3554         pci_segment_id: u16,
3555         dma_handler: Option<Arc<dyn ExternalDmaMapping>>,
3556     ) -> DeviceManagerResult<PciBdf> {
3557         let id = format!("{VIRTIO_PCI_DEVICE_NAME_PREFIX}-{virtio_device_id}");
3558 
3559         // Add the new virtio-pci node to the device tree.
3560         let mut node = device_node!(id);
3561         node.children = vec![virtio_device_id.clone()];
3562 
3563         let (pci_segment_id, pci_device_bdf, resources) =
3564             self.pci_resources(&id, pci_segment_id)?;
3565 
3566         // Update the existing virtio node by setting the parent.
3567         if let Some(node) = self.device_tree.lock().unwrap().get_mut(&virtio_device_id) {
3568             node.parent = Some(id.clone());
3569         } else {
3570             return Err(DeviceManagerError::MissingNode);
3571         }
3572 
3573         // Allows support for one MSI-X vector per queue. It also adds 1
3574         // as we need to take into account the dedicated vector to notify
3575         // about a virtio config change.
3576         let msix_num = (virtio_device.lock().unwrap().queue_max_sizes().len() + 1) as u16;
3577 
3578         // Create the AccessPlatform trait from the implementation IommuMapping.
3579         // This will provide address translation for any virtio device sitting
3580         // behind a vIOMMU.
3581         let access_platform: Option<Arc<dyn AccessPlatform>> = if let Some(mapping) = iommu_mapping
3582         {
3583             Some(Arc::new(AccessPlatformMapping::new(
3584                 pci_device_bdf.into(),
3585                 mapping.clone(),
3586             )))
3587         } else {
3588             None
3589         };
3590 
3591         let memory = self.memory_manager.lock().unwrap().guest_memory();
3592 
3593         // Map DMA ranges if a DMA handler is available and if the device is
3594         // not attached to a virtual IOMMU.
3595         if let Some(dma_handler) = &dma_handler {
3596             if iommu_mapping.is_some() {
3597                 if let Some(iommu) = &self.iommu_device {
3598                     iommu
3599                         .lock()
3600                         .unwrap()
3601                         .add_external_mapping(pci_device_bdf.into(), dma_handler.clone());
3602                 } else {
3603                     return Err(DeviceManagerError::MissingVirtualIommu);
3604                 }
3605             } else {
3606                 // Let every virtio-mem device handle the DMA map/unmap through the
3607                 // DMA handler provided.
3608                 for virtio_mem_device in self.virtio_mem_devices.iter() {
3609                     virtio_mem_device
3610                         .lock()
3611                         .unwrap()
3612                         .add_dma_mapping_handler(
3613                             VirtioMemMappingSource::Device(pci_device_bdf.into()),
3614                             dma_handler.clone(),
3615                         )
3616                         .map_err(DeviceManagerError::AddDmaMappingHandlerVirtioMem)?;
3617                 }
3618 
3619                 // Do not register virtio-mem regions, as they are handled directly by
3620                 // virtio-mem devices.
3621                 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() {
3622                     for region in zone.regions() {
3623                         let gpa = region.start_addr().0;
3624                         let size = region.len();
3625                         dma_handler
3626                             .map(gpa, gpa, size)
3627                             .map_err(DeviceManagerError::VirtioDmaMap)?;
3628                     }
3629                 }
3630             }
3631         }
3632 
3633         let device_type = virtio_device.lock().unwrap().device_type();
3634         let virtio_pci_device = Arc::new(Mutex::new(
3635             VirtioPciDevice::new(
3636                 id.clone(),
3637                 memory,
3638                 virtio_device,
3639                 msix_num,
3640                 access_platform,
3641                 &self.msi_interrupt_manager,
3642                 pci_device_bdf.into(),
3643                 self.activate_evt
3644                     .try_clone()
3645                     .map_err(DeviceManagerError::EventFd)?,
3646                 // All device types *except* virtio block devices should be allocated a 64-bit bar
3647                 // The block devices should be given a 32-bit BAR so that they are easily accessible
3648                 // to firmware without requiring excessive identity mapping.
3649                 // The exception being if not on the default PCI segment.
3650                 pci_segment_id > 0 || device_type != VirtioDeviceType::Block as u32,
3651                 dma_handler,
3652                 self.pending_activations.clone(),
3653                 vm_migration::snapshot_from_id(self.snapshot.as_ref(), id.as_str()),
3654             )
3655             .map_err(DeviceManagerError::VirtioDevice)?,
3656         ));
3657 
3658         let new_resources = self.add_pci_device(
3659             virtio_pci_device.clone(),
3660             virtio_pci_device.clone(),
3661             pci_segment_id,
3662             pci_device_bdf,
3663             resources,
3664         )?;
3665 
3666         let bar_addr = virtio_pci_device.lock().unwrap().config_bar_addr();
3667         for (event, addr) in virtio_pci_device.lock().unwrap().ioeventfds(bar_addr) {
3668             let io_addr = IoEventAddress::Mmio(addr);
3669             self.address_manager
3670                 .vm
3671                 .register_ioevent(event, &io_addr, None)
3672                 .map_err(|e| DeviceManagerError::RegisterIoevent(e.into()))?;
3673         }
3674 
3675         // Update the device tree with correct resource information.
3676         node.resources = new_resources;
3677         node.migratable = Some(Arc::clone(&virtio_pci_device) as Arc<Mutex<dyn Migratable>>);
3678         node.pci_bdf = Some(pci_device_bdf);
3679         node.pci_device_handle = Some(PciDeviceHandle::Virtio(virtio_pci_device));
3680         self.device_tree.lock().unwrap().insert(id, node);
3681 
3682         Ok(pci_device_bdf)
3683     }
3684 
3685     fn add_pvpanic_device(
3686         &mut self,
3687     ) -> DeviceManagerResult<Option<Arc<Mutex<devices::PvPanicDevice>>>> {
3688         let id = String::from(PVPANIC_DEVICE_NAME);
3689         let pci_segment_id = 0x0_u16;
3690 
3691         info!("Creating pvpanic device {}", id);
3692 
3693         let (pci_segment_id, pci_device_bdf, resources) =
3694             self.pci_resources(&id, pci_segment_id)?;
3695 
3696         let snapshot = snapshot_from_id(self.snapshot.as_ref(), id.as_str());
3697 
3698         let pvpanic_device = devices::PvPanicDevice::new(id.clone(), snapshot)
3699             .map_err(DeviceManagerError::PvPanicCreate)?;
3700 
3701         let pvpanic_device = Arc::new(Mutex::new(pvpanic_device));
3702 
3703         let new_resources = self.add_pci_device(
3704             pvpanic_device.clone(),
3705             pvpanic_device.clone(),
3706             pci_segment_id,
3707             pci_device_bdf,
3708             resources,
3709         )?;
3710 
3711         let mut node = device_node!(id, pvpanic_device);
3712 
3713         node.resources = new_resources;
3714         node.pci_bdf = Some(pci_device_bdf);
3715         node.pci_device_handle = None;
3716 
3717         self.device_tree.lock().unwrap().insert(id, node);
3718 
3719         Ok(Some(pvpanic_device))
3720     }
3721 
3722     fn pci_resources(
3723         &self,
3724         id: &str,
3725         pci_segment_id: u16,
3726     ) -> DeviceManagerResult<(u16, PciBdf, Option<Vec<Resource>>)> {
3727         // Look for the id in the device tree. If it can be found, that means
3728         // the device is being restored, otherwise it's created from scratch.
3729         Ok(
3730             if let Some(node) = self.device_tree.lock().unwrap().get(id) {
3731                 info!("Restoring virtio-pci {} resources", id);
3732                 let pci_device_bdf: PciBdf = node
3733                     .pci_bdf
3734                     .ok_or(DeviceManagerError::MissingDeviceNodePciBdf)?;
3735                 let pci_segment_id = pci_device_bdf.segment();
3736 
3737                 self.pci_segments[pci_segment_id as usize]
3738                     .pci_bus
3739                     .lock()
3740                     .unwrap()
3741                     .get_device_id(pci_device_bdf.device() as usize)
3742                     .map_err(DeviceManagerError::GetPciDeviceId)?;
3743 
3744                 (pci_segment_id, pci_device_bdf, Some(node.resources.clone()))
3745             } else {
3746                 let pci_device_bdf =
3747                     self.pci_segments[pci_segment_id as usize].next_device_bdf()?;
3748 
3749                 (pci_segment_id, pci_device_bdf, None)
3750             },
3751         )
3752     }
3753 
3754     #[cfg(target_arch = "x86_64")]
3755     pub fn io_bus(&self) -> &Arc<Bus> {
3756         &self.address_manager.io_bus
3757     }
3758 
3759     pub fn mmio_bus(&self) -> &Arc<Bus> {
3760         &self.address_manager.mmio_bus
3761     }
3762 
3763     pub fn allocator(&self) -> &Arc<Mutex<SystemAllocator>> {
3764         &self.address_manager.allocator
3765     }
3766 
3767     pub fn interrupt_controller(&self) -> Option<Arc<Mutex<dyn InterruptController>>> {
3768         self.interrupt_controller
3769             .as_ref()
3770             .map(|ic| ic.clone() as Arc<Mutex<dyn InterruptController>>)
3771     }
3772 
3773     pub(crate) fn pci_segments(&self) -> &Vec<PciSegment> {
3774         &self.pci_segments
3775     }
3776 
3777     pub fn console(&self) -> &Arc<Console> {
3778         &self.console
3779     }
3780 
3781     #[cfg(target_arch = "aarch64")]
3782     pub fn cmdline_additions(&self) -> &[String] {
3783         self.cmdline_additions.as_slice()
3784     }
3785 
3786     pub fn update_memory(&self, new_region: &Arc<GuestRegionMmap>) -> DeviceManagerResult<()> {
3787         for handle in self.virtio_devices.iter() {
3788             handle
3789                 .virtio_device
3790                 .lock()
3791                 .unwrap()
3792                 .add_memory_region(new_region)
3793                 .map_err(DeviceManagerError::UpdateMemoryForVirtioDevice)?;
3794 
3795             if let Some(dma_handler) = &handle.dma_handler {
3796                 if !handle.iommu {
3797                     let gpa = new_region.start_addr().0;
3798                     let size = new_region.len();
3799                     dma_handler
3800                         .map(gpa, gpa, size)
3801                         .map_err(DeviceManagerError::VirtioDmaMap)?;
3802                 }
3803             }
3804         }
3805 
3806         // Take care of updating the memory for VFIO PCI devices.
3807         if let Some(vfio_container) = &self.vfio_container {
3808             vfio_container
3809                 .vfio_dma_map(
3810                     new_region.start_addr().raw_value(),
3811                     new_region.len(),
3812                     new_region.as_ptr() as u64,
3813                 )
3814                 .map_err(DeviceManagerError::UpdateMemoryForVfioPciDevice)?;
3815         }
3816 
3817         // Take care of updating the memory for vfio-user devices.
3818         {
3819             let device_tree = self.device_tree.lock().unwrap();
3820             for pci_device_node in device_tree.pci_devices() {
3821                 if let PciDeviceHandle::VfioUser(vfio_user_pci_device) = pci_device_node
3822                     .pci_device_handle
3823                     .as_ref()
3824                     .ok_or(DeviceManagerError::MissingPciDevice)?
3825                 {
3826                     vfio_user_pci_device
3827                         .lock()
3828                         .unwrap()
3829                         .dma_map(new_region)
3830                         .map_err(DeviceManagerError::UpdateMemoryForVfioUserPciDevice)?;
3831                 }
3832             }
3833         }
3834 
3835         Ok(())
3836     }
3837 
3838     pub fn activate_virtio_devices(&self) -> DeviceManagerResult<()> {
3839         for mut activator in self.pending_activations.lock().unwrap().drain(..) {
3840             activator
3841                 .activate()
3842                 .map_err(DeviceManagerError::VirtioActivate)?;
3843         }
3844         Ok(())
3845     }
3846 
3847     pub fn notify_hotplug(
3848         &self,
3849         _notification_type: AcpiNotificationFlags,
3850     ) -> DeviceManagerResult<()> {
3851         return self
3852             .ged_notification_device
3853             .as_ref()
3854             .unwrap()
3855             .lock()
3856             .unwrap()
3857             .notify(_notification_type)
3858             .map_err(DeviceManagerError::HotPlugNotification);
3859     }
3860 
3861     pub fn add_device(
3862         &mut self,
3863         device_cfg: &mut DeviceConfig,
3864     ) -> DeviceManagerResult<PciDeviceInfo> {
3865         self.validate_identifier(&device_cfg.id)?;
3866 
3867         if device_cfg.iommu && !self.is_iommu_segment(device_cfg.pci_segment) {
3868             return Err(DeviceManagerError::InvalidIommuHotplug);
3869         }
3870 
3871         let (bdf, device_name) = self.add_passthrough_device(device_cfg)?;
3872 
3873         // Update the PCIU bitmap
3874         self.pci_segments[device_cfg.pci_segment as usize].pci_devices_up |= 1 << bdf.device();
3875 
3876         Ok(PciDeviceInfo {
3877             id: device_name,
3878             bdf,
3879         })
3880     }
3881 
3882     pub fn add_user_device(
3883         &mut self,
3884         device_cfg: &mut UserDeviceConfig,
3885     ) -> DeviceManagerResult<PciDeviceInfo> {
3886         self.validate_identifier(&device_cfg.id)?;
3887 
3888         let (bdf, device_name) = self.add_vfio_user_device(device_cfg)?;
3889 
3890         // Update the PCIU bitmap
3891         self.pci_segments[device_cfg.pci_segment as usize].pci_devices_up |= 1 << bdf.device();
3892 
3893         Ok(PciDeviceInfo {
3894             id: device_name,
3895             bdf,
3896         })
3897     }
3898 
3899     pub fn remove_device(&mut self, id: String) -> DeviceManagerResult<()> {
3900         // The node can be directly a PCI node in case the 'id' refers to a
3901         // VFIO device or a virtio-pci one.
3902         // In case the 'id' refers to a virtio device, we must find the PCI
3903         // node by looking at the parent.
3904         let device_tree = self.device_tree.lock().unwrap();
3905         let node = device_tree
3906             .get(&id)
3907             .ok_or(DeviceManagerError::UnknownDeviceId(id))?;
3908 
3909         let pci_device_node = if node.pci_bdf.is_some() && node.pci_device_handle.is_some() {
3910             node
3911         } else {
3912             let parent = node
3913                 .parent
3914                 .as_ref()
3915                 .ok_or(DeviceManagerError::MissingNode)?;
3916             device_tree
3917                 .get(parent)
3918                 .ok_or(DeviceManagerError::MissingNode)?
3919         };
3920 
3921         let pci_device_bdf: PciBdf = pci_device_node
3922             .pci_bdf
3923             .ok_or(DeviceManagerError::MissingDeviceNodePciBdf)?;
3924         let pci_segment_id = pci_device_bdf.segment();
3925 
3926         let pci_device_handle = pci_device_node
3927             .pci_device_handle
3928             .as_ref()
3929             .ok_or(DeviceManagerError::MissingPciDevice)?;
3930         #[allow(irrefutable_let_patterns)]
3931         if let PciDeviceHandle::Virtio(virtio_pci_device) = pci_device_handle {
3932             let device_type = VirtioDeviceType::from(
3933                 virtio_pci_device
3934                     .lock()
3935                     .unwrap()
3936                     .virtio_device()
3937                     .lock()
3938                     .unwrap()
3939                     .device_type(),
3940             );
3941             match device_type {
3942                 VirtioDeviceType::Net
3943                 | VirtioDeviceType::Block
3944                 | VirtioDeviceType::Pmem
3945                 | VirtioDeviceType::Fs
3946                 | VirtioDeviceType::Vsock => {}
3947                 _ => return Err(DeviceManagerError::RemovalNotAllowed(device_type)),
3948             }
3949         }
3950 
3951         // Update the PCID bitmap
3952         self.pci_segments[pci_segment_id as usize].pci_devices_down |= 1 << pci_device_bdf.device();
3953 
3954         Ok(())
3955     }
3956 
3957     pub fn eject_device(&mut self, pci_segment_id: u16, device_id: u8) -> DeviceManagerResult<()> {
3958         info!(
3959             "Ejecting device_id = {} on segment_id={}",
3960             device_id, pci_segment_id
3961         );
3962 
3963         // Convert the device ID into the corresponding b/d/f.
3964         let pci_device_bdf = PciBdf::new(pci_segment_id, 0, device_id, 0);
3965 
3966         // Give the PCI device ID back to the PCI bus.
3967         self.pci_segments[pci_segment_id as usize]
3968             .pci_bus
3969             .lock()
3970             .unwrap()
3971             .put_device_id(device_id as usize)
3972             .map_err(DeviceManagerError::PutPciDeviceId)?;
3973 
3974         // Remove the device from the device tree along with its children.
3975         let mut device_tree = self.device_tree.lock().unwrap();
3976         let pci_device_node = device_tree
3977             .remove_node_by_pci_bdf(pci_device_bdf)
3978             .ok_or(DeviceManagerError::MissingPciDevice)?;
3979 
3980         // For VFIO and vfio-user the PCI device id is the id.
3981         // For virtio we overwrite it later as we want the id of the
3982         // underlying device.
3983         let mut id = pci_device_node.id;
3984         let pci_device_handle = pci_device_node
3985             .pci_device_handle
3986             .ok_or(DeviceManagerError::MissingPciDevice)?;
3987         if matches!(pci_device_handle, PciDeviceHandle::Virtio(_)) {
3988             // The virtio-pci device has a single child
3989             if !pci_device_node.children.is_empty() {
3990                 assert_eq!(pci_device_node.children.len(), 1);
3991                 let child_id = &pci_device_node.children[0];
3992                 id = child_id.clone();
3993             }
3994         }
3995         for child in pci_device_node.children.iter() {
3996             device_tree.remove(child);
3997         }
3998 
3999         let mut iommu_attached = false;
4000         if let Some((_, iommu_attached_devices)) = &self.iommu_attached_devices {
4001             if iommu_attached_devices.contains(&pci_device_bdf) {
4002                 iommu_attached = true;
4003             }
4004         }
4005 
4006         let (pci_device, bus_device, virtio_device, remove_dma_handler) = match pci_device_handle {
4007             // No need to remove any virtio-mem mapping here as the container outlives all devices
4008             PciDeviceHandle::Vfio(vfio_pci_device) => (
4009                 Arc::clone(&vfio_pci_device) as Arc<Mutex<dyn PciDevice>>,
4010                 Arc::clone(&vfio_pci_device) as Arc<Mutex<dyn BusDevice>>,
4011                 None as Option<Arc<Mutex<dyn virtio_devices::VirtioDevice>>>,
4012                 false,
4013             ),
4014             PciDeviceHandle::Virtio(virtio_pci_device) => {
4015                 let dev = virtio_pci_device.lock().unwrap();
4016                 let bar_addr = dev.config_bar_addr();
4017                 for (event, addr) in dev.ioeventfds(bar_addr) {
4018                     let io_addr = IoEventAddress::Mmio(addr);
4019                     self.address_manager
4020                         .vm
4021                         .unregister_ioevent(event, &io_addr)
4022                         .map_err(|e| DeviceManagerError::UnRegisterIoevent(e.into()))?;
4023                 }
4024 
4025                 if let Some(dma_handler) = dev.dma_handler() {
4026                     if !iommu_attached {
4027                         for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() {
4028                             for region in zone.regions() {
4029                                 let iova = region.start_addr().0;
4030                                 let size = region.len();
4031                                 dma_handler
4032                                     .unmap(iova, size)
4033                                     .map_err(DeviceManagerError::VirtioDmaUnmap)?;
4034                             }
4035                         }
4036                     }
4037                 }
4038 
4039                 (
4040                     Arc::clone(&virtio_pci_device) as Arc<Mutex<dyn PciDevice>>,
4041                     Arc::clone(&virtio_pci_device) as Arc<Mutex<dyn BusDevice>>,
4042                     Some(dev.virtio_device()),
4043                     dev.dma_handler().is_some() && !iommu_attached,
4044                 )
4045             }
4046             PciDeviceHandle::VfioUser(vfio_user_pci_device) => {
4047                 let mut dev = vfio_user_pci_device.lock().unwrap();
4048                 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() {
4049                     for region in zone.regions() {
4050                         dev.dma_unmap(region)
4051                             .map_err(DeviceManagerError::VfioUserDmaUnmap)?;
4052                     }
4053                 }
4054 
4055                 (
4056                     Arc::clone(&vfio_user_pci_device) as Arc<Mutex<dyn PciDevice>>,
4057                     Arc::clone(&vfio_user_pci_device) as Arc<Mutex<dyn BusDevice>>,
4058                     None as Option<Arc<Mutex<dyn virtio_devices::VirtioDevice>>>,
4059                     true,
4060                 )
4061             }
4062         };
4063 
4064         if remove_dma_handler {
4065             for virtio_mem_device in self.virtio_mem_devices.iter() {
4066                 virtio_mem_device
4067                     .lock()
4068                     .unwrap()
4069                     .remove_dma_mapping_handler(VirtioMemMappingSource::Device(
4070                         pci_device_bdf.into(),
4071                     ))
4072                     .map_err(DeviceManagerError::RemoveDmaMappingHandlerVirtioMem)?;
4073             }
4074         }
4075 
4076         // Free the allocated BARs
4077         pci_device
4078             .lock()
4079             .unwrap()
4080             .free_bars(
4081                 &mut self.address_manager.allocator.lock().unwrap(),
4082                 &mut self.pci_segments[pci_segment_id as usize]
4083                     .allocator
4084                     .lock()
4085                     .unwrap(),
4086             )
4087             .map_err(DeviceManagerError::FreePciBars)?;
4088 
4089         // Remove the device from the PCI bus
4090         self.pci_segments[pci_segment_id as usize]
4091             .pci_bus
4092             .lock()
4093             .unwrap()
4094             .remove_by_device(&pci_device)
4095             .map_err(DeviceManagerError::RemoveDeviceFromPciBus)?;
4096 
4097         #[cfg(target_arch = "x86_64")]
4098         // Remove the device from the IO bus
4099         self.io_bus()
4100             .remove_by_device(&bus_device)
4101             .map_err(DeviceManagerError::RemoveDeviceFromIoBus)?;
4102 
4103         // Remove the device from the MMIO bus
4104         self.mmio_bus()
4105             .remove_by_device(&bus_device)
4106             .map_err(DeviceManagerError::RemoveDeviceFromMmioBus)?;
4107 
4108         // Remove the device from the list of BusDevice held by the
4109         // DeviceManager.
4110         self.bus_devices
4111             .retain(|dev| !Arc::ptr_eq(dev, &bus_device));
4112 
4113         // Shutdown and remove the underlying virtio-device if present
4114         if let Some(virtio_device) = virtio_device {
4115             for mapping in virtio_device.lock().unwrap().userspace_mappings() {
4116                 self.memory_manager
4117                     .lock()
4118                     .unwrap()
4119                     .remove_userspace_mapping(
4120                         mapping.addr.raw_value(),
4121                         mapping.len,
4122                         mapping.host_addr,
4123                         mapping.mergeable,
4124                         mapping.mem_slot,
4125                     )
4126                     .map_err(DeviceManagerError::MemoryManager)?;
4127             }
4128 
4129             virtio_device.lock().unwrap().shutdown();
4130 
4131             self.virtio_devices
4132                 .retain(|handler| !Arc::ptr_eq(&handler.virtio_device, &virtio_device));
4133         }
4134 
4135         event!(
4136             "vm",
4137             "device-removed",
4138             "id",
4139             &id,
4140             "bdf",
4141             pci_device_bdf.to_string()
4142         );
4143 
4144         // At this point, the device has been removed from all the list and
4145         // buses where it was stored. At the end of this function, after
4146         // any_device, bus_device and pci_device are released, the actual
4147         // device will be dropped.
4148         Ok(())
4149     }
4150 
4151     fn hotplug_virtio_pci_device(
4152         &mut self,
4153         handle: MetaVirtioDevice,
4154     ) -> DeviceManagerResult<PciDeviceInfo> {
4155         // Add the virtio device to the device manager list. This is important
4156         // as the list is used to notify virtio devices about memory updates
4157         // for instance.
4158         self.virtio_devices.push(handle.clone());
4159 
4160         let mapping: Option<Arc<IommuMapping>> = if handle.iommu {
4161             self.iommu_mapping.clone()
4162         } else {
4163             None
4164         };
4165 
4166         let bdf = self.add_virtio_pci_device(
4167             handle.virtio_device,
4168             &mapping,
4169             handle.id.clone(),
4170             handle.pci_segment,
4171             handle.dma_handler,
4172         )?;
4173 
4174         // Update the PCIU bitmap
4175         self.pci_segments[handle.pci_segment as usize].pci_devices_up |= 1 << bdf.device();
4176 
4177         Ok(PciDeviceInfo { id: handle.id, bdf })
4178     }
4179 
4180     fn is_iommu_segment(&self, pci_segment_id: u16) -> bool {
4181         self.config
4182             .lock()
4183             .as_ref()
4184             .unwrap()
4185             .platform
4186             .as_ref()
4187             .map(|pc| {
4188                 pc.iommu_segments
4189                     .as_ref()
4190                     .map(|v| v.contains(&pci_segment_id))
4191                     .unwrap_or_default()
4192             })
4193             .unwrap_or_default()
4194     }
4195 
4196     pub fn add_disk(&mut self, disk_cfg: &mut DiskConfig) -> DeviceManagerResult<PciDeviceInfo> {
4197         self.validate_identifier(&disk_cfg.id)?;
4198 
4199         if disk_cfg.iommu && !self.is_iommu_segment(disk_cfg.pci_segment) {
4200             return Err(DeviceManagerError::InvalidIommuHotplug);
4201         }
4202 
4203         let device = self.make_virtio_block_device(disk_cfg)?;
4204         self.hotplug_virtio_pci_device(device)
4205     }
4206 
4207     pub fn add_fs(&mut self, fs_cfg: &mut FsConfig) -> DeviceManagerResult<PciDeviceInfo> {
4208         self.validate_identifier(&fs_cfg.id)?;
4209 
4210         let device = self.make_virtio_fs_device(fs_cfg)?;
4211         self.hotplug_virtio_pci_device(device)
4212     }
4213 
4214     pub fn add_pmem(&mut self, pmem_cfg: &mut PmemConfig) -> DeviceManagerResult<PciDeviceInfo> {
4215         self.validate_identifier(&pmem_cfg.id)?;
4216 
4217         if pmem_cfg.iommu && !self.is_iommu_segment(pmem_cfg.pci_segment) {
4218             return Err(DeviceManagerError::InvalidIommuHotplug);
4219         }
4220 
4221         let device = self.make_virtio_pmem_device(pmem_cfg)?;
4222         self.hotplug_virtio_pci_device(device)
4223     }
4224 
4225     pub fn add_net(&mut self, net_cfg: &mut NetConfig) -> DeviceManagerResult<PciDeviceInfo> {
4226         self.validate_identifier(&net_cfg.id)?;
4227 
4228         if net_cfg.iommu && !self.is_iommu_segment(net_cfg.pci_segment) {
4229             return Err(DeviceManagerError::InvalidIommuHotplug);
4230         }
4231 
4232         let device = self.make_virtio_net_device(net_cfg)?;
4233         self.hotplug_virtio_pci_device(device)
4234     }
4235 
4236     pub fn add_vdpa(&mut self, vdpa_cfg: &mut VdpaConfig) -> DeviceManagerResult<PciDeviceInfo> {
4237         self.validate_identifier(&vdpa_cfg.id)?;
4238 
4239         if vdpa_cfg.iommu && !self.is_iommu_segment(vdpa_cfg.pci_segment) {
4240             return Err(DeviceManagerError::InvalidIommuHotplug);
4241         }
4242 
4243         let device = self.make_vdpa_device(vdpa_cfg)?;
4244         self.hotplug_virtio_pci_device(device)
4245     }
4246 
4247     pub fn add_vsock(&mut self, vsock_cfg: &mut VsockConfig) -> DeviceManagerResult<PciDeviceInfo> {
4248         self.validate_identifier(&vsock_cfg.id)?;
4249 
4250         if vsock_cfg.iommu && !self.is_iommu_segment(vsock_cfg.pci_segment) {
4251             return Err(DeviceManagerError::InvalidIommuHotplug);
4252         }
4253 
4254         let device = self.make_virtio_vsock_device(vsock_cfg)?;
4255         self.hotplug_virtio_pci_device(device)
4256     }
4257 
4258     pub fn counters(&self) -> HashMap<String, HashMap<&'static str, Wrapping<u64>>> {
4259         let mut counters = HashMap::new();
4260 
4261         for handle in &self.virtio_devices {
4262             let virtio_device = handle.virtio_device.lock().unwrap();
4263             if let Some(device_counters) = virtio_device.counters() {
4264                 counters.insert(handle.id.clone(), device_counters.clone());
4265             }
4266         }
4267 
4268         counters
4269     }
4270 
4271     pub fn resize_balloon(&mut self, size: u64) -> DeviceManagerResult<()> {
4272         if let Some(balloon) = &self.balloon {
4273             return balloon
4274                 .lock()
4275                 .unwrap()
4276                 .resize(size)
4277                 .map_err(DeviceManagerError::VirtioBalloonResize);
4278         }
4279 
4280         warn!("No balloon setup: Can't resize the balloon");
4281         Err(DeviceManagerError::MissingVirtioBalloon)
4282     }
4283 
4284     pub fn balloon_size(&self) -> u64 {
4285         if let Some(balloon) = &self.balloon {
4286             return balloon.lock().unwrap().get_actual();
4287         }
4288 
4289         0
4290     }
4291 
4292     pub fn device_tree(&self) -> Arc<Mutex<DeviceTree>> {
4293         self.device_tree.clone()
4294     }
4295 
4296     #[cfg(target_arch = "x86_64")]
4297     pub fn notify_power_button(&self) -> DeviceManagerResult<()> {
4298         self.ged_notification_device
4299             .as_ref()
4300             .unwrap()
4301             .lock()
4302             .unwrap()
4303             .notify(AcpiNotificationFlags::POWER_BUTTON_CHANGED)
4304             .map_err(DeviceManagerError::PowerButtonNotification)
4305     }
4306 
4307     #[cfg(target_arch = "aarch64")]
4308     pub fn notify_power_button(&self) -> DeviceManagerResult<()> {
4309         // There are two use cases:
4310         // 1. Users will use direct kernel boot with device tree.
4311         // 2. Users will use ACPI+UEFI boot.
4312 
4313         // Trigger a GPIO pin 3 event to satisfy use case 1.
4314         self.gpio_device
4315             .as_ref()
4316             .unwrap()
4317             .lock()
4318             .unwrap()
4319             .trigger_key(3)
4320             .map_err(DeviceManagerError::AArch64PowerButtonNotification)?;
4321         // Trigger a GED power button event to satisfy use case 2.
4322         return self
4323             .ged_notification_device
4324             .as_ref()
4325             .unwrap()
4326             .lock()
4327             .unwrap()
4328             .notify(AcpiNotificationFlags::POWER_BUTTON_CHANGED)
4329             .map_err(DeviceManagerError::PowerButtonNotification);
4330     }
4331 
4332     pub fn iommu_attached_devices(&self) -> &Option<(PciBdf, Vec<PciBdf>)> {
4333         &self.iommu_attached_devices
4334     }
4335 
4336     fn validate_identifier(&self, id: &Option<String>) -> DeviceManagerResult<()> {
4337         if let Some(id) = id {
4338             if id.starts_with("__") {
4339                 return Err(DeviceManagerError::InvalidIdentifier(id.clone()));
4340             }
4341 
4342             if self.device_tree.lock().unwrap().contains_key(id) {
4343                 return Err(DeviceManagerError::IdentifierNotUnique(id.clone()));
4344             }
4345         }
4346 
4347         Ok(())
4348     }
4349 
4350     pub(crate) fn acpi_platform_addresses(&self) -> &AcpiPlatformAddresses {
4351         &self.acpi_platform_addresses
4352     }
4353 }
4354 
4355 fn numa_node_id_from_memory_zone_id(numa_nodes: &NumaNodes, memory_zone_id: &str) -> Option<u32> {
4356     for (numa_node_id, numa_node) in numa_nodes.iter() {
4357         if numa_node.memory_zones.contains(&memory_zone_id.to_owned()) {
4358             return Some(*numa_node_id);
4359         }
4360     }
4361 
4362     None
4363 }
4364 
4365 fn numa_node_id_from_pci_segment_id(numa_nodes: &NumaNodes, pci_segment_id: u16) -> u32 {
4366     for (numa_node_id, numa_node) in numa_nodes.iter() {
4367         if numa_node.pci_segments.contains(&pci_segment_id) {
4368             return *numa_node_id;
4369         }
4370     }
4371 
4372     0
4373 }
4374 
4375 struct TpmDevice {}
4376 
4377 impl Aml for TpmDevice {
4378     fn to_aml_bytes(&self, sink: &mut dyn acpi_tables::AmlSink) {
4379         aml::Device::new(
4380             "TPM2".into(),
4381             vec![
4382                 &aml::Name::new("_HID".into(), &"MSFT0101"),
4383                 &aml::Name::new("_STA".into(), &(0xF_usize)),
4384                 &aml::Name::new(
4385                     "_CRS".into(),
4386                     &aml::ResourceTemplate::new(vec![&aml::Memory32Fixed::new(
4387                         true,
4388                         layout::TPM_START.0 as u32,
4389                         layout::TPM_SIZE as u32,
4390                     )]),
4391                 ),
4392             ],
4393         )
4394         .to_aml_bytes(sink)
4395     }
4396 }
4397 
4398 impl Aml for DeviceManager {
4399     fn to_aml_bytes(&self, sink: &mut dyn acpi_tables::AmlSink) {
4400         #[cfg(target_arch = "aarch64")]
4401         use arch::aarch64::DeviceInfoForFdt;
4402 
4403         let mut pci_scan_methods = Vec::new();
4404         for i in 0..self.pci_segments.len() {
4405             pci_scan_methods.push(aml::MethodCall::new(
4406                 format!("\\_SB_.PC{i:02X}.PCNT").as_str().into(),
4407                 vec![],
4408             ));
4409         }
4410         let mut pci_scan_inner: Vec<&dyn Aml> = Vec::new();
4411         for method in &pci_scan_methods {
4412             pci_scan_inner.push(method)
4413         }
4414 
4415         // PCI hotplug controller
4416         aml::Device::new(
4417             "_SB_.PHPR".into(),
4418             vec![
4419                 &aml::Name::new("_HID".into(), &aml::EISAName::new("PNP0A06")),
4420                 &aml::Name::new("_STA".into(), &0x0bu8),
4421                 &aml::Name::new("_UID".into(), &"PCI Hotplug Controller"),
4422                 &aml::Mutex::new("BLCK".into(), 0),
4423                 &aml::Name::new(
4424                     "_CRS".into(),
4425                     &aml::ResourceTemplate::new(vec![&aml::AddressSpace::new_memory(
4426                         aml::AddressSpaceCacheable::NotCacheable,
4427                         true,
4428                         self.acpi_address.0,
4429                         self.acpi_address.0 + DEVICE_MANAGER_ACPI_SIZE as u64 - 1,
4430                         None,
4431                     )]),
4432                 ),
4433                 // OpRegion and Fields map MMIO range into individual field values
4434                 &aml::OpRegion::new(
4435                     "PCST".into(),
4436                     aml::OpRegionSpace::SystemMemory,
4437                     &(self.acpi_address.0 as usize),
4438                     &DEVICE_MANAGER_ACPI_SIZE,
4439                 ),
4440                 &aml::Field::new(
4441                     "PCST".into(),
4442                     aml::FieldAccessType::DWord,
4443                     aml::FieldLockRule::NoLock,
4444                     aml::FieldUpdateRule::WriteAsZeroes,
4445                     vec![
4446                         aml::FieldEntry::Named(*b"PCIU", 32),
4447                         aml::FieldEntry::Named(*b"PCID", 32),
4448                         aml::FieldEntry::Named(*b"B0EJ", 32),
4449                         aml::FieldEntry::Named(*b"PSEG", 32),
4450                     ],
4451                 ),
4452                 &aml::Method::new(
4453                     "PCEJ".into(),
4454                     2,
4455                     true,
4456                     vec![
4457                         // Take lock defined above
4458                         &aml::Acquire::new("BLCK".into(), 0xffff),
4459                         // Choose the current segment
4460                         &aml::Store::new(&aml::Path::new("PSEG"), &aml::Arg(1)),
4461                         // Write PCI bus number (in first argument) to I/O port via field
4462                         &aml::ShiftLeft::new(&aml::Path::new("B0EJ"), &aml::ONE, &aml::Arg(0)),
4463                         // Release lock
4464                         &aml::Release::new("BLCK".into()),
4465                         // Return 0
4466                         &aml::Return::new(&aml::ZERO),
4467                     ],
4468                 ),
4469                 &aml::Method::new("PSCN".into(), 0, true, pci_scan_inner),
4470             ],
4471         )
4472         .to_aml_bytes(sink);
4473 
4474         for segment in &self.pci_segments {
4475             segment.to_aml_bytes(sink);
4476         }
4477 
4478         let mut mbrd_memory = Vec::new();
4479 
4480         for segment in &self.pci_segments {
4481             mbrd_memory.push(aml::Memory32Fixed::new(
4482                 true,
4483                 segment.mmio_config_address as u32,
4484                 layout::PCI_MMIO_CONFIG_SIZE_PER_SEGMENT as u32,
4485             ))
4486         }
4487 
4488         let mut mbrd_memory_refs = Vec::new();
4489         for mbrd_memory_ref in &mbrd_memory {
4490             mbrd_memory_refs.push(mbrd_memory_ref as &dyn Aml);
4491         }
4492 
4493         aml::Device::new(
4494             "_SB_.MBRD".into(),
4495             vec![
4496                 &aml::Name::new("_HID".into(), &aml::EISAName::new("PNP0C02")),
4497                 &aml::Name::new("_UID".into(), &aml::ZERO),
4498                 &aml::Name::new("_CRS".into(), &aml::ResourceTemplate::new(mbrd_memory_refs)),
4499             ],
4500         )
4501         .to_aml_bytes(sink);
4502 
4503         // Serial device
4504         #[cfg(target_arch = "x86_64")]
4505         let serial_irq = 4;
4506         #[cfg(target_arch = "aarch64")]
4507         let serial_irq =
4508             if self.config.lock().unwrap().serial.clone().mode != ConsoleOutputMode::Off {
4509                 self.get_device_info()
4510                     .clone()
4511                     .get(&(DeviceType::Serial, DeviceType::Serial.to_string()))
4512                     .unwrap()
4513                     .irq()
4514             } else {
4515                 // If serial is turned off, add a fake device with invalid irq.
4516                 31
4517             };
4518         if self.config.lock().unwrap().serial.mode != ConsoleOutputMode::Off {
4519             aml::Device::new(
4520                 "_SB_.COM1".into(),
4521                 vec![
4522                     &aml::Name::new(
4523                         "_HID".into(),
4524                         #[cfg(target_arch = "x86_64")]
4525                         &aml::EISAName::new("PNP0501"),
4526                         #[cfg(target_arch = "aarch64")]
4527                         &"ARMH0011",
4528                     ),
4529                     &aml::Name::new("_UID".into(), &aml::ZERO),
4530                     &aml::Name::new("_DDN".into(), &"COM1"),
4531                     &aml::Name::new(
4532                         "_CRS".into(),
4533                         &aml::ResourceTemplate::new(vec![
4534                             &aml::Interrupt::new(true, true, false, false, serial_irq),
4535                             #[cfg(target_arch = "x86_64")]
4536                             &aml::IO::new(0x3f8, 0x3f8, 0, 0x8),
4537                             #[cfg(target_arch = "aarch64")]
4538                             &aml::Memory32Fixed::new(
4539                                 true,
4540                                 arch::layout::LEGACY_SERIAL_MAPPED_IO_START.raw_value() as u32,
4541                                 MMIO_LEN as u32,
4542                             ),
4543                         ]),
4544                     ),
4545                 ],
4546             )
4547             .to_aml_bytes(sink);
4548         }
4549 
4550         aml::Name::new("_S5_".into(), &aml::Package::new(vec![&5u8])).to_aml_bytes(sink);
4551 
4552         aml::Device::new(
4553             "_SB_.PWRB".into(),
4554             vec![
4555                 &aml::Name::new("_HID".into(), &aml::EISAName::new("PNP0C0C")),
4556                 &aml::Name::new("_UID".into(), &aml::ZERO),
4557             ],
4558         )
4559         .to_aml_bytes(sink);
4560 
4561         if self.config.lock().unwrap().tpm.is_some() {
4562             // Add tpm device
4563             TpmDevice {}.to_aml_bytes(sink);
4564         }
4565 
4566         self.ged_notification_device
4567             .as_ref()
4568             .unwrap()
4569             .lock()
4570             .unwrap()
4571             .to_aml_bytes(sink)
4572     }
4573 }
4574 
4575 impl Pausable for DeviceManager {
4576     fn pause(&mut self) -> result::Result<(), MigratableError> {
4577         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
4578             if let Some(migratable) = &device_node.migratable {
4579                 migratable.lock().unwrap().pause()?;
4580             }
4581         }
4582         // On AArch64, the pause of device manager needs to trigger
4583         // a "pause" of GIC, which will flush the GIC pending tables
4584         // and ITS tables to guest RAM.
4585         #[cfg(target_arch = "aarch64")]
4586         {
4587             self.get_interrupt_controller()
4588                 .unwrap()
4589                 .lock()
4590                 .unwrap()
4591                 .pause()?;
4592         };
4593 
4594         Ok(())
4595     }
4596 
4597     fn resume(&mut self) -> result::Result<(), MigratableError> {
4598         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
4599             if let Some(migratable) = &device_node.migratable {
4600                 migratable.lock().unwrap().resume()?;
4601             }
4602         }
4603 
4604         Ok(())
4605     }
4606 }
4607 
4608 impl Snapshottable for DeviceManager {
4609     fn id(&self) -> String {
4610         DEVICE_MANAGER_SNAPSHOT_ID.to_string()
4611     }
4612 
4613     fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> {
4614         let mut snapshot = Snapshot::from_data(SnapshotData::new_from_state(&self.state())?);
4615 
4616         // We aggregate all devices snapshots.
4617         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
4618             if let Some(migratable) = &device_node.migratable {
4619                 let mut migratable = migratable.lock().unwrap();
4620                 snapshot.add_snapshot(migratable.id(), migratable.snapshot()?);
4621             }
4622         }
4623 
4624         Ok(snapshot)
4625     }
4626 }
4627 
4628 impl Transportable for DeviceManager {}
4629 
4630 impl Migratable for DeviceManager {
4631     fn start_dirty_log(&mut self) -> std::result::Result<(), MigratableError> {
4632         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
4633             if let Some(migratable) = &device_node.migratable {
4634                 migratable.lock().unwrap().start_dirty_log()?;
4635             }
4636         }
4637         Ok(())
4638     }
4639 
4640     fn stop_dirty_log(&mut self) -> std::result::Result<(), MigratableError> {
4641         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
4642             if let Some(migratable) = &device_node.migratable {
4643                 migratable.lock().unwrap().stop_dirty_log()?;
4644             }
4645         }
4646         Ok(())
4647     }
4648 
4649     fn dirty_log(&mut self) -> std::result::Result<MemoryRangeTable, MigratableError> {
4650         let mut tables = Vec::new();
4651         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
4652             if let Some(migratable) = &device_node.migratable {
4653                 tables.push(migratable.lock().unwrap().dirty_log()?);
4654             }
4655         }
4656         Ok(MemoryRangeTable::new_from_tables(tables))
4657     }
4658 
4659     fn start_migration(&mut self) -> std::result::Result<(), MigratableError> {
4660         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
4661             if let Some(migratable) = &device_node.migratable {
4662                 migratable.lock().unwrap().start_migration()?;
4663             }
4664         }
4665         Ok(())
4666     }
4667 
4668     fn complete_migration(&mut self) -> std::result::Result<(), MigratableError> {
4669         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
4670             if let Some(migratable) = &device_node.migratable {
4671                 migratable.lock().unwrap().complete_migration()?;
4672             }
4673         }
4674         Ok(())
4675     }
4676 }
4677 
4678 const PCIU_FIELD_OFFSET: u64 = 0;
4679 const PCID_FIELD_OFFSET: u64 = 4;
4680 const B0EJ_FIELD_OFFSET: u64 = 8;
4681 const PSEG_FIELD_OFFSET: u64 = 12;
4682 const PCIU_FIELD_SIZE: usize = 4;
4683 const PCID_FIELD_SIZE: usize = 4;
4684 const B0EJ_FIELD_SIZE: usize = 4;
4685 const PSEG_FIELD_SIZE: usize = 4;
4686 
4687 impl BusDevice for DeviceManager {
4688     fn read(&mut self, base: u64, offset: u64, data: &mut [u8]) {
4689         match offset {
4690             PCIU_FIELD_OFFSET => {
4691                 assert!(data.len() == PCIU_FIELD_SIZE);
4692                 data.copy_from_slice(
4693                     &self.pci_segments[self.selected_segment]
4694                         .pci_devices_up
4695                         .to_le_bytes(),
4696                 );
4697                 // Clear the PCIU bitmap
4698                 self.pci_segments[self.selected_segment].pci_devices_up = 0;
4699             }
4700             PCID_FIELD_OFFSET => {
4701                 assert!(data.len() == PCID_FIELD_SIZE);
4702                 data.copy_from_slice(
4703                     &self.pci_segments[self.selected_segment]
4704                         .pci_devices_down
4705                         .to_le_bytes(),
4706                 );
4707                 // Clear the PCID bitmap
4708                 self.pci_segments[self.selected_segment].pci_devices_down = 0;
4709             }
4710             B0EJ_FIELD_OFFSET => {
4711                 assert!(data.len() == B0EJ_FIELD_SIZE);
4712                 // Always return an empty bitmap since the eject is always
4713                 // taken care of right away during a write access.
4714                 data.fill(0);
4715             }
4716             PSEG_FIELD_OFFSET => {
4717                 assert_eq!(data.len(), PSEG_FIELD_SIZE);
4718                 data.copy_from_slice(&(self.selected_segment as u32).to_le_bytes());
4719             }
4720             _ => error!(
4721                 "Accessing unknown location at base 0x{:x}, offset 0x{:x}",
4722                 base, offset
4723             ),
4724         }
4725 
4726         debug!(
4727             "PCI_HP_REG_R: base 0x{:x}, offset 0x{:x}, data {:?}",
4728             base, offset, data
4729         )
4730     }
4731 
4732     fn write(&mut self, base: u64, offset: u64, data: &[u8]) -> Option<Arc<std::sync::Barrier>> {
4733         match offset {
4734             B0EJ_FIELD_OFFSET => {
4735                 assert!(data.len() == B0EJ_FIELD_SIZE);
4736                 let mut data_array: [u8; 4] = [0, 0, 0, 0];
4737                 data_array.copy_from_slice(data);
4738                 let mut slot_bitmap = u32::from_le_bytes(data_array);
4739 
4740                 while slot_bitmap > 0 {
4741                     let slot_id = slot_bitmap.trailing_zeros();
4742                     if let Err(e) = self.eject_device(self.selected_segment as u16, slot_id as u8) {
4743                         error!("Failed ejecting device {}: {:?}", slot_id, e);
4744                     }
4745                     slot_bitmap &= !(1 << slot_id);
4746                 }
4747             }
4748             PSEG_FIELD_OFFSET => {
4749                 assert_eq!(data.len(), PSEG_FIELD_SIZE);
4750                 let mut data_array: [u8; 4] = [0, 0, 0, 0];
4751                 data_array.copy_from_slice(data);
4752                 let selected_segment = u32::from_le_bytes(data_array) as usize;
4753                 if selected_segment >= self.pci_segments.len() {
4754                     error!(
4755                         "Segment selection out of range: {} >= {}",
4756                         selected_segment,
4757                         self.pci_segments.len()
4758                     );
4759                     return None;
4760                 }
4761                 self.selected_segment = selected_segment;
4762             }
4763             _ => error!(
4764                 "Accessing unknown location at base 0x{:x}, offset 0x{:x}",
4765                 base, offset
4766             ),
4767         }
4768 
4769         debug!(
4770             "PCI_HP_REG_W: base 0x{:x}, offset 0x{:x}, data {:?}",
4771             base, offset, data
4772         );
4773 
4774         None
4775     }
4776 }
4777 
4778 impl Drop for DeviceManager {
4779     fn drop(&mut self) {
4780         for handle in self.virtio_devices.drain(..) {
4781             handle.virtio_device.lock().unwrap().shutdown();
4782         }
4783 
4784         if let Some(termios) = *self.original_termios_opt.lock().unwrap() {
4785             // SAFETY: FFI call
4786             let _ = unsafe { tcsetattr(stdout().lock().as_raw_fd(), TCSANOW, &termios) };
4787         }
4788     }
4789 }
4790