xref: /cloud-hypervisor/vmm/src/device_manager.rs (revision 07d1208dd53a207a65b649b8952780dfd0ca59d9)
1 // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
2 //
3 // Portions Copyright 2017 The Chromium OS Authors. All rights reserved.
4 // Use of this source code is governed by a BSD-style license that can be
5 // found in the LICENSE-BSD-3-Clause file.
6 //
7 // Copyright © 2019 Intel Corporation
8 //
9 // SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause
10 //
11 
12 use crate::config::{
13     ConsoleOutputMode, DeviceConfig, DiskConfig, FsConfig, NetConfig, PmemConfig, UserDeviceConfig,
14     VdpaConfig, VhostMode, VmConfig, VsockConfig,
15 };
16 use crate::cpu::{CpuManager, CPU_MANAGER_ACPI_SIZE};
17 use crate::device_tree::{DeviceNode, DeviceTree};
18 use crate::interrupt::LegacyUserspaceInterruptManager;
19 use crate::interrupt::MsiInterruptManager;
20 use crate::memory_manager::{Error as MemoryManagerError, MemoryManager, MEMORY_MANAGER_ACPI_SIZE};
21 use crate::pci_segment::PciSegment;
22 use crate::seccomp_filters::{get_seccomp_filter, Thread};
23 use crate::serial_manager::{Error as SerialManagerError, SerialManager};
24 use crate::sigwinch_listener::start_sigwinch_listener;
25 use crate::GuestRegionMmap;
26 use crate::PciDeviceInfo;
27 use crate::{device_node, DEVICE_MANAGER_SNAPSHOT_ID};
28 use acpi_tables::sdt::GenericAddress;
29 use acpi_tables::{aml, Aml};
30 use anyhow::anyhow;
31 use arch::layout;
32 #[cfg(target_arch = "x86_64")]
33 use arch::layout::{APIC_START, IOAPIC_SIZE, IOAPIC_START};
34 use arch::NumaNodes;
35 #[cfg(target_arch = "aarch64")]
36 use arch::{DeviceType, MmioDeviceInfo};
37 use block::{
38     async_io::DiskFile, block_io_uring_is_supported, detect_image_type,
39     fixed_vhd_sync::FixedVhdDiskSync, qcow, qcow_sync::QcowDiskSync, raw_sync::RawFileDiskSync,
40     vhdx, vhdx_sync::VhdxDiskSync, ImageType,
41 };
42 #[cfg(feature = "io_uring")]
43 use block::{fixed_vhd_async::FixedVhdDiskAsync, raw_async::RawFileDisk};
44 #[cfg(target_arch = "aarch64")]
45 use devices::gic;
46 #[cfg(target_arch = "x86_64")]
47 use devices::ioapic;
48 #[cfg(target_arch = "aarch64")]
49 use devices::legacy::Pl011;
50 #[cfg(target_arch = "x86_64")]
51 use devices::legacy::Serial;
52 use devices::{
53     interrupt_controller, interrupt_controller::InterruptController, AcpiNotificationFlags,
54 };
55 use hypervisor::{HypervisorType, IoEventAddress};
56 use libc::{
57     cfmakeraw, isatty, tcgetattr, tcsetattr, termios, MAP_NORESERVE, MAP_PRIVATE, MAP_SHARED,
58     O_TMPFILE, PROT_READ, PROT_WRITE, TCSANOW,
59 };
60 use pci::{
61     DeviceRelocation, PciBarRegionType, PciBdf, PciDevice, VfioPciDevice, VfioUserDmaMapping,
62     VfioUserPciDevice, VfioUserPciDeviceError,
63 };
64 use seccompiler::SeccompAction;
65 use serde::{Deserialize, Serialize};
66 use std::collections::{BTreeSet, HashMap};
67 use std::fs::{read_link, File, OpenOptions};
68 use std::io::{self, stdout, Seek, SeekFrom};
69 use std::mem::zeroed;
70 use std::num::Wrapping;
71 use std::os::unix::fs::OpenOptionsExt;
72 use std::os::unix::io::{AsRawFd, FromRawFd, RawFd};
73 use std::path::PathBuf;
74 use std::result;
75 use std::sync::{Arc, Mutex};
76 use std::time::Instant;
77 use tracer::trace_scoped;
78 use vfio_ioctls::{VfioContainer, VfioDevice, VfioDeviceFd};
79 use virtio_devices::transport::VirtioTransport;
80 use virtio_devices::transport::{VirtioPciDevice, VirtioPciDeviceActivator};
81 use virtio_devices::vhost_user::VhostUserConfig;
82 use virtio_devices::{
83     AccessPlatformMapping, ActivateError, VdpaDmaMapping, VirtioMemMappingSource,
84 };
85 use virtio_devices::{Endpoint, IommuMapping};
86 use vm_allocator::{AddressAllocator, SystemAllocator};
87 use vm_device::dma_mapping::vfio::VfioDmaMapping;
88 use vm_device::dma_mapping::ExternalDmaMapping;
89 use vm_device::interrupt::{
90     InterruptIndex, InterruptManager, LegacyIrqGroupConfig, MsiIrqGroupConfig,
91 };
92 use vm_device::{Bus, BusDevice, Resource};
93 use vm_memory::guest_memory::FileOffset;
94 use vm_memory::GuestMemoryRegion;
95 use vm_memory::{Address, GuestAddress, GuestUsize, MmapRegion};
96 #[cfg(target_arch = "x86_64")]
97 use vm_memory::{GuestAddressSpace, GuestMemory};
98 use vm_migration::{
99     protocol::MemoryRangeTable, snapshot_from_id, versioned_state_from_id, Migratable,
100     MigratableError, Pausable, Snapshot, SnapshotData, Snapshottable, Transportable,
101 };
102 use vm_virtio::AccessPlatform;
103 use vm_virtio::VirtioDeviceType;
104 use vmm_sys_util::eventfd::EventFd;
105 
106 #[cfg(target_arch = "aarch64")]
107 const MMIO_LEN: u64 = 0x1000;
108 
109 // Singleton devices / devices the user cannot name
110 #[cfg(target_arch = "x86_64")]
111 const IOAPIC_DEVICE_NAME: &str = "__ioapic";
112 const SERIAL_DEVICE_NAME: &str = "__serial";
113 #[cfg(target_arch = "aarch64")]
114 const GPIO_DEVICE_NAME: &str = "__gpio";
115 const RNG_DEVICE_NAME: &str = "__rng";
116 const IOMMU_DEVICE_NAME: &str = "__iommu";
117 const BALLOON_DEVICE_NAME: &str = "__balloon";
118 const CONSOLE_DEVICE_NAME: &str = "__console";
119 const PVPANIC_DEVICE_NAME: &str = "__pvpanic";
120 
121 // Devices that the user may name and for which we generate
122 // identifiers if the user doesn't give one
123 const DISK_DEVICE_NAME_PREFIX: &str = "_disk";
124 const FS_DEVICE_NAME_PREFIX: &str = "_fs";
125 const NET_DEVICE_NAME_PREFIX: &str = "_net";
126 const PMEM_DEVICE_NAME_PREFIX: &str = "_pmem";
127 const VDPA_DEVICE_NAME_PREFIX: &str = "_vdpa";
128 const VSOCK_DEVICE_NAME_PREFIX: &str = "_vsock";
129 const WATCHDOG_DEVICE_NAME: &str = "__watchdog";
130 const VFIO_DEVICE_NAME_PREFIX: &str = "_vfio";
131 const VFIO_USER_DEVICE_NAME_PREFIX: &str = "_vfio_user";
132 const VIRTIO_PCI_DEVICE_NAME_PREFIX: &str = "_virtio-pci";
133 
134 /// Errors associated with device manager
135 #[derive(Debug)]
136 pub enum DeviceManagerError {
137     /// Cannot create EventFd.
138     EventFd(io::Error),
139 
140     /// Cannot open disk path
141     Disk(io::Error),
142 
143     /// Cannot create vhost-user-net device
144     CreateVhostUserNet(virtio_devices::vhost_user::Error),
145 
146     /// Cannot create virtio-blk device
147     CreateVirtioBlock(io::Error),
148 
149     /// Cannot create virtio-net device
150     CreateVirtioNet(virtio_devices::net::Error),
151 
152     /// Cannot create virtio-console device
153     CreateVirtioConsole(io::Error),
154 
155     /// Cannot create virtio-rng device
156     CreateVirtioRng(io::Error),
157 
158     /// Cannot create virtio-fs device
159     CreateVirtioFs(virtio_devices::vhost_user::Error),
160 
161     /// Virtio-fs device was created without a socket.
162     NoVirtioFsSock,
163 
164     /// Cannot create vhost-user-blk device
165     CreateVhostUserBlk(virtio_devices::vhost_user::Error),
166 
167     /// Cannot create virtio-pmem device
168     CreateVirtioPmem(io::Error),
169 
170     /// Cannot create vDPA device
171     CreateVdpa(virtio_devices::vdpa::Error),
172 
173     /// Cannot create virtio-vsock device
174     CreateVirtioVsock(io::Error),
175 
176     /// Cannot create tpm device
177     CreateTpmDevice(anyhow::Error),
178 
179     /// Failed to convert Path to &str for the vDPA device.
180     CreateVdpaConvertPath,
181 
182     /// Failed to convert Path to &str for the virtio-vsock device.
183     CreateVsockConvertPath,
184 
185     /// Cannot create virtio-vsock backend
186     CreateVsockBackend(virtio_devices::vsock::VsockUnixError),
187 
188     /// Cannot create virtio-iommu device
189     CreateVirtioIommu(io::Error),
190 
191     /// Cannot create virtio-balloon device
192     CreateVirtioBalloon(io::Error),
193 
194     /// Cannot create virtio-watchdog device
195     CreateVirtioWatchdog(io::Error),
196 
197     /// Failed to parse disk image format
198     DetectImageType(io::Error),
199 
200     /// Cannot open qcow disk path
201     QcowDeviceCreate(qcow::Error),
202 
203     /// Cannot create serial manager
204     CreateSerialManager(SerialManagerError),
205 
206     /// Cannot spawn the serial manager thread
207     SpawnSerialManager(SerialManagerError),
208 
209     /// Cannot open tap interface
210     OpenTap(net_util::TapError),
211 
212     /// Cannot allocate IRQ.
213     AllocateIrq,
214 
215     /// Cannot configure the IRQ.
216     Irq(vmm_sys_util::errno::Error),
217 
218     /// Cannot allocate PCI BARs
219     AllocateBars(pci::PciDeviceError),
220 
221     /// Could not free the BARs associated with a PCI device.
222     FreePciBars(pci::PciDeviceError),
223 
224     /// Cannot register ioevent.
225     RegisterIoevent(anyhow::Error),
226 
227     /// Cannot unregister ioevent.
228     UnRegisterIoevent(anyhow::Error),
229 
230     /// Cannot create virtio device
231     VirtioDevice(virtio_devices::transport::VirtioPciDeviceError),
232 
233     /// Cannot add PCI device
234     AddPciDevice(pci::PciRootError),
235 
236     /// Cannot open persistent memory file
237     PmemFileOpen(io::Error),
238 
239     /// Cannot set persistent memory file size
240     PmemFileSetLen(io::Error),
241 
242     /// Cannot find a memory range for persistent memory
243     PmemRangeAllocation,
244 
245     /// Cannot find a memory range for virtio-fs
246     FsRangeAllocation,
247 
248     /// Error creating serial output file
249     SerialOutputFileOpen(io::Error),
250 
251     /// Error creating console output file
252     ConsoleOutputFileOpen(io::Error),
253 
254     /// Error creating serial pty
255     SerialPtyOpen(io::Error),
256 
257     /// Error creating console pty
258     ConsolePtyOpen(io::Error),
259 
260     /// Error setting pty raw mode
261     SetPtyRaw(vmm_sys_util::errno::Error),
262 
263     /// Error getting pty peer
264     GetPtyPeer(vmm_sys_util::errno::Error),
265 
266     /// Cannot create a VFIO device
267     VfioCreate(vfio_ioctls::VfioError),
268 
269     /// Cannot create a VFIO PCI device
270     VfioPciCreate(pci::VfioPciError),
271 
272     /// Failed to map VFIO MMIO region.
273     VfioMapRegion(pci::VfioPciError),
274 
275     /// Failed to DMA map VFIO device.
276     VfioDmaMap(vfio_ioctls::VfioError),
277 
278     /// Failed to DMA unmap VFIO device.
279     VfioDmaUnmap(pci::VfioPciError),
280 
281     /// Failed to create the passthrough device.
282     CreatePassthroughDevice(anyhow::Error),
283 
284     /// Failed to memory map.
285     Mmap(io::Error),
286 
287     /// Cannot add legacy device to Bus.
288     BusError(vm_device::BusError),
289 
290     /// Failed to allocate IO port
291     AllocateIoPort,
292 
293     /// Failed to allocate MMIO address
294     AllocateMmioAddress,
295 
296     /// Failed to make hotplug notification
297     HotPlugNotification(io::Error),
298 
299     /// Error from a memory manager operation
300     MemoryManager(MemoryManagerError),
301 
302     /// Failed to create new interrupt source group.
303     CreateInterruptGroup(io::Error),
304 
305     /// Failed to update interrupt source group.
306     UpdateInterruptGroup(io::Error),
307 
308     /// Failed to create interrupt controller.
309     CreateInterruptController(interrupt_controller::Error),
310 
311     /// Failed to create a new MmapRegion instance.
312     NewMmapRegion(vm_memory::mmap::MmapRegionError),
313 
314     /// Failed to clone a File.
315     CloneFile(io::Error),
316 
317     /// Failed to create socket file
318     CreateSocketFile(io::Error),
319 
320     /// Failed to spawn the network backend
321     SpawnNetBackend(io::Error),
322 
323     /// Failed to spawn the block backend
324     SpawnBlockBackend(io::Error),
325 
326     /// Missing PCI bus.
327     NoPciBus,
328 
329     /// Could not find an available device name.
330     NoAvailableDeviceName,
331 
332     /// Missing PCI device.
333     MissingPciDevice,
334 
335     /// Failed to remove a PCI device from the PCI bus.
336     RemoveDeviceFromPciBus(pci::PciRootError),
337 
338     /// Failed to remove a bus device from the IO bus.
339     RemoveDeviceFromIoBus(vm_device::BusError),
340 
341     /// Failed to remove a bus device from the MMIO bus.
342     RemoveDeviceFromMmioBus(vm_device::BusError),
343 
344     /// Failed to find the device corresponding to a specific PCI b/d/f.
345     UnknownPciBdf(u32),
346 
347     /// Not allowed to remove this type of device from the VM.
348     RemovalNotAllowed(vm_virtio::VirtioDeviceType),
349 
350     /// Failed to find device corresponding to the given identifier.
351     UnknownDeviceId(String),
352 
353     /// Failed to find an available PCI device ID.
354     NextPciDeviceId(pci::PciRootError),
355 
356     /// Could not reserve the PCI device ID.
357     GetPciDeviceId(pci::PciRootError),
358 
359     /// Could not give the PCI device ID back.
360     PutPciDeviceId(pci::PciRootError),
361 
362     /// No disk path was specified when one was expected
363     NoDiskPath,
364 
365     /// Failed to update guest memory for virtio device.
366     UpdateMemoryForVirtioDevice(virtio_devices::Error),
367 
368     /// Cannot create virtio-mem device
369     CreateVirtioMem(io::Error),
370 
371     /// Cannot find a memory range for virtio-mem memory
372     VirtioMemRangeAllocation,
373 
374     /// Failed to update guest memory for VFIO PCI device.
375     UpdateMemoryForVfioPciDevice(vfio_ioctls::VfioError),
376 
377     /// Trying to use a directory for pmem but no size specified
378     PmemWithDirectorySizeMissing,
379 
380     /// Trying to use a size that is not multiple of 2MiB
381     PmemSizeNotAligned,
382 
383     /// Could not find the node in the device tree.
384     MissingNode,
385 
386     /// Resource was already found.
387     ResourceAlreadyExists,
388 
389     /// Expected resources for virtio-pmem could not be found.
390     MissingVirtioPmemResources,
391 
392     /// Missing PCI b/d/f from the DeviceNode.
393     MissingDeviceNodePciBdf,
394 
395     /// No support for device passthrough
396     NoDevicePassthroughSupport,
397 
398     /// Failed to resize virtio-balloon
399     VirtioBalloonResize(virtio_devices::balloon::Error),
400 
401     /// Missing virtio-balloon, can't proceed as expected.
402     MissingVirtioBalloon,
403 
404     /// Missing virtual IOMMU device
405     MissingVirtualIommu,
406 
407     /// Failed to do power button notification
408     PowerButtonNotification(io::Error),
409 
410     /// Failed to do AArch64 GPIO power button notification
411     #[cfg(target_arch = "aarch64")]
412     AArch64PowerButtonNotification(devices::legacy::GpioDeviceError),
413 
414     /// Failed to set O_DIRECT flag to file descriptor
415     SetDirectIo,
416 
417     /// Failed to create FixedVhdDiskAsync
418     CreateFixedVhdDiskAsync(io::Error),
419 
420     /// Failed to create FixedVhdDiskSync
421     CreateFixedVhdDiskSync(io::Error),
422 
423     /// Failed to create QcowDiskSync
424     CreateQcowDiskSync(qcow::Error),
425 
426     /// Failed to create FixedVhdxDiskSync
427     CreateFixedVhdxDiskSync(vhdx::VhdxError),
428 
429     /// Failed to add DMA mapping handler to virtio-mem device.
430     AddDmaMappingHandlerVirtioMem(virtio_devices::mem::Error),
431 
432     /// Failed to remove DMA mapping handler from virtio-mem device.
433     RemoveDmaMappingHandlerVirtioMem(virtio_devices::mem::Error),
434 
435     /// Failed to create vfio-user client
436     VfioUserCreateClient(vfio_user::Error),
437 
438     /// Failed to create VFIO user device
439     VfioUserCreate(VfioUserPciDeviceError),
440 
441     /// Failed to map region from VFIO user device into guest
442     VfioUserMapRegion(VfioUserPciDeviceError),
443 
444     /// Failed to DMA map VFIO user device.
445     VfioUserDmaMap(VfioUserPciDeviceError),
446 
447     /// Failed to DMA unmap VFIO user device.
448     VfioUserDmaUnmap(VfioUserPciDeviceError),
449 
450     /// Failed to update memory mappings for VFIO user device
451     UpdateMemoryForVfioUserPciDevice(VfioUserPciDeviceError),
452 
453     /// Cannot duplicate file descriptor
454     DupFd(vmm_sys_util::errno::Error),
455 
456     /// Failed to DMA map virtio device.
457     VirtioDmaMap(std::io::Error),
458 
459     /// Failed to DMA unmap virtio device.
460     VirtioDmaUnmap(std::io::Error),
461 
462     /// Cannot hotplug device behind vIOMMU
463     InvalidIommuHotplug,
464 
465     /// Invalid identifier as it is not unique.
466     IdentifierNotUnique(String),
467 
468     /// Invalid identifier
469     InvalidIdentifier(String),
470 
471     /// Error activating virtio device
472     VirtioActivate(ActivateError),
473 
474     /// Failed retrieving device state from snapshot
475     RestoreGetState(MigratableError),
476 
477     /// Cannot create a PvPanic device
478     PvPanicCreate(devices::pvpanic::PvPanicError),
479 }
480 pub type DeviceManagerResult<T> = result::Result<T, DeviceManagerError>;
481 
482 const DEVICE_MANAGER_ACPI_SIZE: usize = 0x10;
483 
484 const TIOCSPTLCK: libc::c_int = 0x4004_5431;
485 const TIOCGTPEER: libc::c_int = 0x5441;
486 
487 pub fn create_pty() -> io::Result<(File, File, PathBuf)> {
488     // Try to use /dev/pts/ptmx first then fall back to /dev/ptmx
489     // This is done to try and use the devpts filesystem that
490     // could be available for use in the process's namespace first.
491     // Ideally these are all the same file though but different
492     // kernels could have things setup differently.
493     // See https://www.kernel.org/doc/Documentation/filesystems/devpts.txt
494     // for further details.
495 
496     let custom_flags = libc::O_NONBLOCK;
497     let main = match OpenOptions::new()
498         .read(true)
499         .write(true)
500         .custom_flags(custom_flags)
501         .open("/dev/pts/ptmx")
502     {
503         Ok(f) => f,
504         _ => OpenOptions::new()
505             .read(true)
506             .write(true)
507             .custom_flags(custom_flags)
508             .open("/dev/ptmx")?,
509     };
510     let mut unlock: libc::c_ulong = 0;
511     // SAFETY: FFI call into libc, trivially safe
512     unsafe { libc::ioctl(main.as_raw_fd(), TIOCSPTLCK as _, &mut unlock) };
513 
514     // SAFETY: FFI call into libc, trivally safe
515     let sub_fd = unsafe {
516         libc::ioctl(
517             main.as_raw_fd(),
518             TIOCGTPEER as _,
519             libc::O_NOCTTY | libc::O_RDWR,
520         )
521     };
522     if sub_fd == -1 {
523         return vmm_sys_util::errno::errno_result().map_err(|e| e.into());
524     }
525 
526     let proc_path = PathBuf::from(format!("/proc/self/fd/{sub_fd}"));
527     let path = read_link(proc_path)?;
528 
529     // SAFETY: sub_fd is checked to be valid before being wrapped in File
530     Ok((main, unsafe { File::from_raw_fd(sub_fd) }, path))
531 }
532 
533 #[derive(Default)]
534 pub struct Console {
535     console_resizer: Option<Arc<virtio_devices::ConsoleResizer>>,
536 }
537 
538 impl Console {
539     pub fn need_resize(&self) -> bool {
540         if let Some(_resizer) = self.console_resizer.as_ref() {
541             return true;
542         }
543 
544         false
545     }
546 
547     pub fn update_console_size(&self) {
548         if let Some(resizer) = self.console_resizer.as_ref() {
549             resizer.update_console_size()
550         }
551     }
552 }
553 
554 pub(crate) struct AddressManager {
555     pub(crate) allocator: Arc<Mutex<SystemAllocator>>,
556     #[cfg(target_arch = "x86_64")]
557     pub(crate) io_bus: Arc<Bus>,
558     pub(crate) mmio_bus: Arc<Bus>,
559     pub(crate) vm: Arc<dyn hypervisor::Vm>,
560     device_tree: Arc<Mutex<DeviceTree>>,
561     pci_mmio_allocators: Vec<Arc<Mutex<AddressAllocator>>>,
562 }
563 
564 impl DeviceRelocation for AddressManager {
565     fn move_bar(
566         &self,
567         old_base: u64,
568         new_base: u64,
569         len: u64,
570         pci_dev: &mut dyn PciDevice,
571         region_type: PciBarRegionType,
572     ) -> std::result::Result<(), std::io::Error> {
573         match region_type {
574             PciBarRegionType::IoRegion => {
575                 #[cfg(target_arch = "x86_64")]
576                 {
577                     // Update system allocator
578                     self.allocator
579                         .lock()
580                         .unwrap()
581                         .free_io_addresses(GuestAddress(old_base), len as GuestUsize);
582 
583                     self.allocator
584                         .lock()
585                         .unwrap()
586                         .allocate_io_addresses(
587                             Some(GuestAddress(new_base)),
588                             len as GuestUsize,
589                             None,
590                         )
591                         .ok_or_else(|| {
592                             io::Error::new(io::ErrorKind::Other, "failed allocating new IO range")
593                         })?;
594 
595                     // Update PIO bus
596                     self.io_bus
597                         .update_range(old_base, len, new_base, len)
598                         .map_err(|e| io::Error::new(io::ErrorKind::Other, e))?;
599                 }
600                 #[cfg(target_arch = "aarch64")]
601                 error!("I/O region is not supported");
602             }
603             PciBarRegionType::Memory32BitRegion | PciBarRegionType::Memory64BitRegion => {
604                 // Update system allocator
605                 if region_type == PciBarRegionType::Memory32BitRegion {
606                     self.allocator
607                         .lock()
608                         .unwrap()
609                         .free_mmio_hole_addresses(GuestAddress(old_base), len as GuestUsize);
610 
611                     self.allocator
612                         .lock()
613                         .unwrap()
614                         .allocate_mmio_hole_addresses(
615                             Some(GuestAddress(new_base)),
616                             len as GuestUsize,
617                             Some(len),
618                         )
619                         .ok_or_else(|| {
620                             io::Error::new(
621                                 io::ErrorKind::Other,
622                                 "failed allocating new 32 bits MMIO range",
623                             )
624                         })?;
625                 } else {
626                     // Find the specific allocator that this BAR was allocated from and use it for new one
627                     for allocator in &self.pci_mmio_allocators {
628                         let allocator_base = allocator.lock().unwrap().base();
629                         let allocator_end = allocator.lock().unwrap().end();
630 
631                         if old_base >= allocator_base.0 && old_base <= allocator_end.0 {
632                             allocator
633                                 .lock()
634                                 .unwrap()
635                                 .free(GuestAddress(old_base), len as GuestUsize);
636 
637                             allocator
638                                 .lock()
639                                 .unwrap()
640                                 .allocate(
641                                     Some(GuestAddress(new_base)),
642                                     len as GuestUsize,
643                                     Some(len),
644                                 )
645                                 .ok_or_else(|| {
646                                     io::Error::new(
647                                         io::ErrorKind::Other,
648                                         "failed allocating new 64 bits MMIO range",
649                                     )
650                                 })?;
651 
652                             break;
653                         }
654                     }
655                 }
656 
657                 // Update MMIO bus
658                 self.mmio_bus
659                     .update_range(old_base, len, new_base, len)
660                     .map_err(|e| io::Error::new(io::ErrorKind::Other, e))?;
661             }
662         }
663 
664         // Update the device_tree resources associated with the device
665         if let Some(id) = pci_dev.id() {
666             if let Some(node) = self.device_tree.lock().unwrap().get_mut(&id) {
667                 let mut resource_updated = false;
668                 for resource in node.resources.iter_mut() {
669                     if let Resource::PciBar { base, type_, .. } = resource {
670                         if PciBarRegionType::from(*type_) == region_type && *base == old_base {
671                             *base = new_base;
672                             resource_updated = true;
673                             break;
674                         }
675                     }
676                 }
677 
678                 if !resource_updated {
679                     return Err(io::Error::new(
680                         io::ErrorKind::Other,
681                         format!(
682                             "Couldn't find a resource with base 0x{old_base:x} for device {id}"
683                         ),
684                     ));
685                 }
686             } else {
687                 return Err(io::Error::new(
688                     io::ErrorKind::Other,
689                     format!("Couldn't find device {id} from device tree"),
690                 ));
691             }
692         }
693 
694         let any_dev = pci_dev.as_any();
695         if let Some(virtio_pci_dev) = any_dev.downcast_ref::<VirtioPciDevice>() {
696             let bar_addr = virtio_pci_dev.config_bar_addr();
697             if bar_addr == new_base {
698                 for (event, addr) in virtio_pci_dev.ioeventfds(old_base) {
699                     let io_addr = IoEventAddress::Mmio(addr);
700                     self.vm.unregister_ioevent(event, &io_addr).map_err(|e| {
701                         io::Error::new(
702                             io::ErrorKind::Other,
703                             format!("failed to unregister ioevent: {e:?}"),
704                         )
705                     })?;
706                 }
707                 for (event, addr) in virtio_pci_dev.ioeventfds(new_base) {
708                     let io_addr = IoEventAddress::Mmio(addr);
709                     self.vm
710                         .register_ioevent(event, &io_addr, None)
711                         .map_err(|e| {
712                             io::Error::new(
713                                 io::ErrorKind::Other,
714                                 format!("failed to register ioevent: {e:?}"),
715                             )
716                         })?;
717                 }
718             } else {
719                 let virtio_dev = virtio_pci_dev.virtio_device();
720                 let mut virtio_dev = virtio_dev.lock().unwrap();
721                 if let Some(mut shm_regions) = virtio_dev.get_shm_regions() {
722                     if shm_regions.addr.raw_value() == old_base {
723                         let mem_region = self.vm.make_user_memory_region(
724                             shm_regions.mem_slot,
725                             old_base,
726                             shm_regions.len,
727                             shm_regions.host_addr,
728                             false,
729                             false,
730                         );
731 
732                         self.vm.remove_user_memory_region(mem_region).map_err(|e| {
733                             io::Error::new(
734                                 io::ErrorKind::Other,
735                                 format!("failed to remove user memory region: {e:?}"),
736                             )
737                         })?;
738 
739                         // Create new mapping by inserting new region to KVM.
740                         let mem_region = self.vm.make_user_memory_region(
741                             shm_regions.mem_slot,
742                             new_base,
743                             shm_regions.len,
744                             shm_regions.host_addr,
745                             false,
746                             false,
747                         );
748 
749                         self.vm.create_user_memory_region(mem_region).map_err(|e| {
750                             io::Error::new(
751                                 io::ErrorKind::Other,
752                                 format!("failed to create user memory regions: {e:?}"),
753                             )
754                         })?;
755 
756                         // Update shared memory regions to reflect the new mapping.
757                         shm_regions.addr = GuestAddress(new_base);
758                         virtio_dev.set_shm_regions(shm_regions).map_err(|e| {
759                             io::Error::new(
760                                 io::ErrorKind::Other,
761                                 format!("failed to update shared memory regions: {e:?}"),
762                             )
763                         })?;
764                     }
765                 }
766             }
767         }
768 
769         pci_dev.move_bar(old_base, new_base)
770     }
771 }
772 
773 #[derive(Serialize, Deserialize)]
774 struct DeviceManagerState {
775     device_tree: DeviceTree,
776     device_id_cnt: Wrapping<usize>,
777 }
778 
779 #[derive(Debug)]
780 pub struct PtyPair {
781     pub main: File,
782     pub path: PathBuf,
783 }
784 
785 impl Clone for PtyPair {
786     fn clone(&self) -> Self {
787         PtyPair {
788             main: self.main.try_clone().unwrap(),
789             path: self.path.clone(),
790         }
791     }
792 }
793 
794 #[derive(Clone)]
795 pub enum PciDeviceHandle {
796     Vfio(Arc<Mutex<VfioPciDevice>>),
797     Virtio(Arc<Mutex<VirtioPciDevice>>),
798     VfioUser(Arc<Mutex<VfioUserPciDevice>>),
799 }
800 
801 #[derive(Clone)]
802 struct MetaVirtioDevice {
803     virtio_device: Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
804     iommu: bool,
805     id: String,
806     pci_segment: u16,
807     dma_handler: Option<Arc<dyn ExternalDmaMapping>>,
808 }
809 
810 #[derive(Default)]
811 pub struct AcpiPlatformAddresses {
812     pub pm_timer_address: Option<GenericAddress>,
813     pub reset_reg_address: Option<GenericAddress>,
814     pub sleep_control_reg_address: Option<GenericAddress>,
815     pub sleep_status_reg_address: Option<GenericAddress>,
816 }
817 
818 pub struct DeviceManager {
819     // The underlying hypervisor
820     hypervisor_type: HypervisorType,
821 
822     // Manage address space related to devices
823     address_manager: Arc<AddressManager>,
824 
825     // Console abstraction
826     console: Arc<Console>,
827 
828     // console PTY
829     console_pty: Option<Arc<Mutex<PtyPair>>>,
830 
831     // serial PTY
832     serial_pty: Option<Arc<Mutex<PtyPair>>>,
833 
834     // Serial Manager
835     serial_manager: Option<Arc<SerialManager>>,
836 
837     // pty foreground status,
838     console_resize_pipe: Option<Arc<File>>,
839 
840     // To restore on exit.
841     original_termios_opt: Arc<Mutex<Option<termios>>>,
842 
843     // Interrupt controller
844     #[cfg(target_arch = "x86_64")]
845     interrupt_controller: Option<Arc<Mutex<ioapic::Ioapic>>>,
846     #[cfg(target_arch = "aarch64")]
847     interrupt_controller: Option<Arc<Mutex<gic::Gic>>>,
848 
849     // Things to be added to the commandline (e.g. aarch64 early console)
850     #[cfg(target_arch = "aarch64")]
851     cmdline_additions: Vec<String>,
852 
853     // ACPI GED notification device
854     ged_notification_device: Option<Arc<Mutex<devices::AcpiGedDevice>>>,
855 
856     // VM configuration
857     config: Arc<Mutex<VmConfig>>,
858 
859     // Memory Manager
860     memory_manager: Arc<Mutex<MemoryManager>>,
861 
862     // CPU Manager
863     cpu_manager: Arc<Mutex<CpuManager>>,
864 
865     // The virtio devices on the system
866     virtio_devices: Vec<MetaVirtioDevice>,
867 
868     // List of bus devices
869     // Let the DeviceManager keep strong references to the BusDevice devices.
870     // This allows the IO and MMIO buses to be provided with Weak references,
871     // which prevents cyclic dependencies.
872     bus_devices: Vec<Arc<Mutex<dyn BusDevice>>>,
873 
874     // Counter to keep track of the consumed device IDs.
875     device_id_cnt: Wrapping<usize>,
876 
877     pci_segments: Vec<PciSegment>,
878 
879     #[cfg_attr(target_arch = "aarch64", allow(dead_code))]
880     // MSI Interrupt Manager
881     msi_interrupt_manager: Arc<dyn InterruptManager<GroupConfig = MsiIrqGroupConfig>>,
882 
883     #[cfg_attr(feature = "mshv", allow(dead_code))]
884     // Legacy Interrupt Manager
885     legacy_interrupt_manager: Option<Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>>,
886 
887     // Passthrough device handle
888     passthrough_device: Option<VfioDeviceFd>,
889 
890     // VFIO container
891     // Only one container can be created, therefore it is stored as part of the
892     // DeviceManager to be reused.
893     vfio_container: Option<Arc<VfioContainer>>,
894 
895     // Paravirtualized IOMMU
896     iommu_device: Option<Arc<Mutex<virtio_devices::Iommu>>>,
897     iommu_mapping: Option<Arc<IommuMapping>>,
898 
899     // PCI information about devices attached to the paravirtualized IOMMU
900     // It contains the virtual IOMMU PCI BDF along with the list of PCI BDF
901     // representing the devices attached to the virtual IOMMU. This is useful
902     // information for filling the ACPI VIOT table.
903     iommu_attached_devices: Option<(PciBdf, Vec<PciBdf>)>,
904 
905     // Tree of devices, representing the dependencies between devices.
906     // Useful for introspection, snapshot and restore.
907     device_tree: Arc<Mutex<DeviceTree>>,
908 
909     // Exit event
910     exit_evt: EventFd,
911     reset_evt: EventFd,
912 
913     #[cfg(target_arch = "aarch64")]
914     id_to_dev_info: HashMap<(DeviceType, String), MmioDeviceInfo>,
915 
916     // seccomp action
917     seccomp_action: SeccompAction,
918 
919     // List of guest NUMA nodes.
920     numa_nodes: NumaNodes,
921 
922     // Possible handle to the virtio-balloon device
923     balloon: Option<Arc<Mutex<virtio_devices::Balloon>>>,
924 
925     // Virtio Device activation EventFd to allow the VMM thread to trigger device
926     // activation and thus start the threads from the VMM thread
927     activate_evt: EventFd,
928 
929     acpi_address: GuestAddress,
930 
931     selected_segment: usize,
932 
933     // Possible handle to the virtio-mem device
934     virtio_mem_devices: Vec<Arc<Mutex<virtio_devices::Mem>>>,
935 
936     #[cfg(target_arch = "aarch64")]
937     // GPIO device for AArch64
938     gpio_device: Option<Arc<Mutex<devices::legacy::Gpio>>>,
939 
940     // pvpanic device
941     pvpanic_device: Option<Arc<Mutex<devices::PvPanicDevice>>>,
942 
943     // Flag to force setting the iommu on virtio devices
944     force_iommu: bool,
945 
946     // io_uring availability if detected
947     io_uring_supported: Option<bool>,
948 
949     // List of unique identifiers provided at boot through the configuration.
950     boot_id_list: BTreeSet<String>,
951 
952     // Start time of the VM
953     timestamp: Instant,
954 
955     // Pending activations
956     pending_activations: Arc<Mutex<Vec<VirtioPciDeviceActivator>>>,
957 
958     // Addresses for ACPI platform devices e.g. ACPI PM timer, sleep/reset registers
959     acpi_platform_addresses: AcpiPlatformAddresses,
960 
961     snapshot: Option<Snapshot>,
962 }
963 
964 impl DeviceManager {
965     #[allow(clippy::too_many_arguments)]
966     pub fn new(
967         #[cfg(target_arch = "x86_64")] io_bus: Arc<Bus>,
968         mmio_bus: Arc<Bus>,
969         hypervisor_type: HypervisorType,
970         vm: Arc<dyn hypervisor::Vm>,
971         config: Arc<Mutex<VmConfig>>,
972         memory_manager: Arc<Mutex<MemoryManager>>,
973         cpu_manager: Arc<Mutex<CpuManager>>,
974         exit_evt: EventFd,
975         reset_evt: EventFd,
976         seccomp_action: SeccompAction,
977         numa_nodes: NumaNodes,
978         activate_evt: &EventFd,
979         force_iommu: bool,
980         boot_id_list: BTreeSet<String>,
981         timestamp: Instant,
982         snapshot: Option<Snapshot>,
983         dynamic: bool,
984     ) -> DeviceManagerResult<Arc<Mutex<Self>>> {
985         trace_scoped!("DeviceManager::new");
986 
987         let (device_tree, device_id_cnt) = if let Some(snapshot) = snapshot.as_ref() {
988             let state: DeviceManagerState = snapshot.to_state().unwrap();
989             (
990                 Arc::new(Mutex::new(state.device_tree.clone())),
991                 state.device_id_cnt,
992             )
993         } else {
994             (Arc::new(Mutex::new(DeviceTree::new())), Wrapping(0))
995         };
996 
997         let num_pci_segments =
998             if let Some(platform_config) = config.lock().unwrap().platform.as_ref() {
999                 platform_config.num_pci_segments
1000             } else {
1001                 1
1002             };
1003 
1004         let start_of_device_area = memory_manager.lock().unwrap().start_of_device_area().0;
1005         let end_of_device_area = memory_manager.lock().unwrap().end_of_device_area().0;
1006 
1007         // Start each PCI segment range on a 4GiB boundary
1008         let pci_segment_size = (end_of_device_area - start_of_device_area + 1)
1009             / ((4 << 30) * num_pci_segments as u64)
1010             * (4 << 30);
1011 
1012         let mut pci_mmio_allocators = vec![];
1013         for i in 0..num_pci_segments as u64 {
1014             let mmio_start = start_of_device_area + i * pci_segment_size;
1015             let allocator = Arc::new(Mutex::new(
1016                 AddressAllocator::new(GuestAddress(mmio_start), pci_segment_size).unwrap(),
1017             ));
1018             pci_mmio_allocators.push(allocator)
1019         }
1020 
1021         let address_manager = Arc::new(AddressManager {
1022             allocator: memory_manager.lock().unwrap().allocator(),
1023             #[cfg(target_arch = "x86_64")]
1024             io_bus,
1025             mmio_bus,
1026             vm: vm.clone(),
1027             device_tree: Arc::clone(&device_tree),
1028             pci_mmio_allocators,
1029         });
1030 
1031         // First we create the MSI interrupt manager, the legacy one is created
1032         // later, after the IOAPIC device creation.
1033         // The reason we create the MSI one first is because the IOAPIC needs it,
1034         // and then the legacy interrupt manager needs an IOAPIC. So we're
1035         // handling a linear dependency chain:
1036         // msi_interrupt_manager <- IOAPIC <- legacy_interrupt_manager.
1037         let msi_interrupt_manager: Arc<dyn InterruptManager<GroupConfig = MsiIrqGroupConfig>> =
1038             Arc::new(MsiInterruptManager::new(
1039                 Arc::clone(&address_manager.allocator),
1040                 vm,
1041             ));
1042 
1043         let acpi_address = address_manager
1044             .allocator
1045             .lock()
1046             .unwrap()
1047             .allocate_platform_mmio_addresses(None, DEVICE_MANAGER_ACPI_SIZE as u64, None)
1048             .ok_or(DeviceManagerError::AllocateIoPort)?;
1049 
1050         let mut pci_irq_slots = [0; 32];
1051         PciSegment::reserve_legacy_interrupts_for_pci_devices(
1052             &address_manager,
1053             &mut pci_irq_slots,
1054         )?;
1055 
1056         let mut pci_segments = vec![PciSegment::new_default_segment(
1057             &address_manager,
1058             Arc::clone(&address_manager.pci_mmio_allocators[0]),
1059             &pci_irq_slots,
1060         )?];
1061 
1062         for i in 1..num_pci_segments as usize {
1063             pci_segments.push(PciSegment::new(
1064                 i as u16,
1065                 &address_manager,
1066                 Arc::clone(&address_manager.pci_mmio_allocators[i]),
1067                 &pci_irq_slots,
1068             )?);
1069         }
1070 
1071         if dynamic {
1072             let acpi_address = address_manager
1073                 .allocator
1074                 .lock()
1075                 .unwrap()
1076                 .allocate_platform_mmio_addresses(None, CPU_MANAGER_ACPI_SIZE as u64, None)
1077                 .ok_or(DeviceManagerError::AllocateMmioAddress)?;
1078 
1079             address_manager
1080                 .mmio_bus
1081                 .insert(
1082                     cpu_manager.clone(),
1083                     acpi_address.0,
1084                     CPU_MANAGER_ACPI_SIZE as u64,
1085                 )
1086                 .map_err(DeviceManagerError::BusError)?;
1087 
1088             cpu_manager.lock().unwrap().set_acpi_address(acpi_address);
1089         }
1090 
1091         let device_manager = DeviceManager {
1092             hypervisor_type,
1093             address_manager: Arc::clone(&address_manager),
1094             console: Arc::new(Console::default()),
1095             interrupt_controller: None,
1096             #[cfg(target_arch = "aarch64")]
1097             cmdline_additions: Vec::new(),
1098             ged_notification_device: None,
1099             config,
1100             memory_manager,
1101             cpu_manager,
1102             virtio_devices: Vec::new(),
1103             bus_devices: Vec::new(),
1104             device_id_cnt,
1105             msi_interrupt_manager,
1106             legacy_interrupt_manager: None,
1107             passthrough_device: None,
1108             vfio_container: None,
1109             iommu_device: None,
1110             iommu_mapping: None,
1111             iommu_attached_devices: None,
1112             pci_segments,
1113             device_tree,
1114             exit_evt,
1115             reset_evt,
1116             #[cfg(target_arch = "aarch64")]
1117             id_to_dev_info: HashMap::new(),
1118             seccomp_action,
1119             numa_nodes,
1120             balloon: None,
1121             activate_evt: activate_evt
1122                 .try_clone()
1123                 .map_err(DeviceManagerError::EventFd)?,
1124             acpi_address,
1125             selected_segment: 0,
1126             serial_pty: None,
1127             serial_manager: None,
1128             console_pty: None,
1129             console_resize_pipe: None,
1130             original_termios_opt: Arc::new(Mutex::new(None)),
1131             virtio_mem_devices: Vec::new(),
1132             #[cfg(target_arch = "aarch64")]
1133             gpio_device: None,
1134             pvpanic_device: None,
1135             force_iommu,
1136             io_uring_supported: None,
1137             boot_id_list,
1138             timestamp,
1139             pending_activations: Arc::new(Mutex::new(Vec::default())),
1140             acpi_platform_addresses: AcpiPlatformAddresses::default(),
1141             snapshot,
1142         };
1143 
1144         let device_manager = Arc::new(Mutex::new(device_manager));
1145 
1146         address_manager
1147             .mmio_bus
1148             .insert(
1149                 Arc::clone(&device_manager) as Arc<Mutex<dyn BusDevice>>,
1150                 acpi_address.0,
1151                 DEVICE_MANAGER_ACPI_SIZE as u64,
1152             )
1153             .map_err(DeviceManagerError::BusError)?;
1154 
1155         Ok(device_manager)
1156     }
1157 
1158     pub fn serial_pty(&self) -> Option<PtyPair> {
1159         self.serial_pty
1160             .as_ref()
1161             .map(|pty| pty.lock().unwrap().clone())
1162     }
1163 
1164     pub fn console_pty(&self) -> Option<PtyPair> {
1165         self.console_pty
1166             .as_ref()
1167             .map(|pty| pty.lock().unwrap().clone())
1168     }
1169 
1170     pub fn console_resize_pipe(&self) -> Option<Arc<File>> {
1171         self.console_resize_pipe.as_ref().map(Arc::clone)
1172     }
1173 
1174     pub fn create_devices(
1175         &mut self,
1176         serial_pty: Option<PtyPair>,
1177         console_pty: Option<PtyPair>,
1178         console_resize_pipe: Option<File>,
1179         original_termios_opt: Arc<Mutex<Option<termios>>>,
1180     ) -> DeviceManagerResult<()> {
1181         trace_scoped!("create_devices");
1182 
1183         let mut virtio_devices: Vec<MetaVirtioDevice> = Vec::new();
1184 
1185         let interrupt_controller = self.add_interrupt_controller()?;
1186 
1187         self.cpu_manager
1188             .lock()
1189             .unwrap()
1190             .set_interrupt_controller(interrupt_controller.clone());
1191 
1192         // Now we can create the legacy interrupt manager, which needs the freshly
1193         // formed IOAPIC device.
1194         let legacy_interrupt_manager: Arc<
1195             dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>,
1196         > = Arc::new(LegacyUserspaceInterruptManager::new(Arc::clone(
1197             &interrupt_controller,
1198         )));
1199 
1200         {
1201             if let Some(acpi_address) = self.memory_manager.lock().unwrap().acpi_address() {
1202                 self.address_manager
1203                     .mmio_bus
1204                     .insert(
1205                         Arc::clone(&self.memory_manager) as Arc<Mutex<dyn BusDevice>>,
1206                         acpi_address.0,
1207                         MEMORY_MANAGER_ACPI_SIZE as u64,
1208                     )
1209                     .map_err(DeviceManagerError::BusError)?;
1210             }
1211         }
1212 
1213         #[cfg(target_arch = "x86_64")]
1214         self.add_legacy_devices(
1215             self.reset_evt
1216                 .try_clone()
1217                 .map_err(DeviceManagerError::EventFd)?,
1218         )?;
1219 
1220         #[cfg(target_arch = "aarch64")]
1221         self.add_legacy_devices(&legacy_interrupt_manager)?;
1222 
1223         {
1224             self.ged_notification_device = self.add_acpi_devices(
1225                 &legacy_interrupt_manager,
1226                 self.reset_evt
1227                     .try_clone()
1228                     .map_err(DeviceManagerError::EventFd)?,
1229                 self.exit_evt
1230                     .try_clone()
1231                     .map_err(DeviceManagerError::EventFd)?,
1232             )?;
1233         }
1234 
1235         self.original_termios_opt = original_termios_opt;
1236 
1237         self.console = self.add_console_device(
1238             &legacy_interrupt_manager,
1239             &mut virtio_devices,
1240             serial_pty,
1241             console_pty,
1242             console_resize_pipe,
1243         )?;
1244 
1245         if let Some(tpm) = self.config.clone().lock().unwrap().tpm.as_ref() {
1246             let tpm_dev = self.add_tpm_device(tpm.socket.clone())?;
1247             self.bus_devices
1248                 .push(Arc::clone(&tpm_dev) as Arc<Mutex<dyn BusDevice>>)
1249         }
1250         self.legacy_interrupt_manager = Some(legacy_interrupt_manager);
1251 
1252         virtio_devices.append(&mut self.make_virtio_devices()?);
1253 
1254         self.add_pci_devices(virtio_devices.clone())?;
1255 
1256         self.virtio_devices = virtio_devices;
1257 
1258         if self.config.clone().lock().unwrap().pvpanic {
1259             self.pvpanic_device = self.add_pvpanic_device()?;
1260         }
1261 
1262         Ok(())
1263     }
1264 
1265     fn state(&self) -> DeviceManagerState {
1266         DeviceManagerState {
1267             device_tree: self.device_tree.lock().unwrap().clone(),
1268             device_id_cnt: self.device_id_cnt,
1269         }
1270     }
1271 
1272     fn get_msi_iova_space(&mut self) -> (u64, u64) {
1273         #[cfg(target_arch = "aarch64")]
1274         {
1275             let vcpus = self.config.lock().unwrap().cpus.boot_vcpus;
1276             let vgic_config = gic::Gic::create_default_config(vcpus.into());
1277             (
1278                 vgic_config.msi_addr,
1279                 vgic_config.msi_addr + vgic_config.msi_size - 1,
1280             )
1281         }
1282         #[cfg(target_arch = "x86_64")]
1283         (0xfee0_0000, 0xfeef_ffff)
1284     }
1285 
1286     #[cfg(target_arch = "aarch64")]
1287     /// Gets the information of the devices registered up to some point in time.
1288     pub fn get_device_info(&self) -> &HashMap<(DeviceType, String), MmioDeviceInfo> {
1289         &self.id_to_dev_info
1290     }
1291 
1292     #[allow(unused_variables)]
1293     fn add_pci_devices(
1294         &mut self,
1295         virtio_devices: Vec<MetaVirtioDevice>,
1296     ) -> DeviceManagerResult<()> {
1297         let iommu_id = String::from(IOMMU_DEVICE_NAME);
1298 
1299         let iommu_device = if self.config.lock().unwrap().iommu {
1300             let (device, mapping) = virtio_devices::Iommu::new(
1301                 iommu_id.clone(),
1302                 self.seccomp_action.clone(),
1303                 self.exit_evt
1304                     .try_clone()
1305                     .map_err(DeviceManagerError::EventFd)?,
1306                 self.get_msi_iova_space(),
1307                 versioned_state_from_id(self.snapshot.as_ref(), iommu_id.as_str())
1308                     .map_err(DeviceManagerError::RestoreGetState)?,
1309             )
1310             .map_err(DeviceManagerError::CreateVirtioIommu)?;
1311             let device = Arc::new(Mutex::new(device));
1312             self.iommu_device = Some(Arc::clone(&device));
1313             self.iommu_mapping = Some(mapping);
1314 
1315             // Fill the device tree with a new node. In case of restore, we
1316             // know there is nothing to do, so we can simply override the
1317             // existing entry.
1318             self.device_tree
1319                 .lock()
1320                 .unwrap()
1321                 .insert(iommu_id.clone(), device_node!(iommu_id, device));
1322 
1323             Some(device)
1324         } else {
1325             None
1326         };
1327 
1328         let mut iommu_attached_devices = Vec::new();
1329         {
1330             for handle in virtio_devices {
1331                 let mapping: Option<Arc<IommuMapping>> = if handle.iommu {
1332                     self.iommu_mapping.clone()
1333                 } else {
1334                     None
1335                 };
1336 
1337                 let dev_id = self.add_virtio_pci_device(
1338                     handle.virtio_device,
1339                     &mapping,
1340                     handle.id,
1341                     handle.pci_segment,
1342                     handle.dma_handler,
1343                 )?;
1344 
1345                 if handle.iommu {
1346                     iommu_attached_devices.push(dev_id);
1347                 }
1348             }
1349 
1350             let mut vfio_iommu_device_ids = self.add_vfio_devices()?;
1351             iommu_attached_devices.append(&mut vfio_iommu_device_ids);
1352 
1353             let mut vfio_user_iommu_device_ids = self.add_user_devices()?;
1354             iommu_attached_devices.append(&mut vfio_user_iommu_device_ids);
1355 
1356             // Add all devices from forced iommu segments
1357             if let Some(platform_config) = self.config.lock().unwrap().platform.as_ref() {
1358                 if let Some(iommu_segments) = platform_config.iommu_segments.as_ref() {
1359                     for segment in iommu_segments {
1360                         for device in 0..32 {
1361                             let bdf = PciBdf::new(*segment, 0, device, 0);
1362                             if !iommu_attached_devices.contains(&bdf) {
1363                                 iommu_attached_devices.push(bdf);
1364                             }
1365                         }
1366                     }
1367                 }
1368             }
1369 
1370             if let Some(iommu_device) = iommu_device {
1371                 let dev_id = self.add_virtio_pci_device(iommu_device, &None, iommu_id, 0, None)?;
1372                 self.iommu_attached_devices = Some((dev_id, iommu_attached_devices));
1373             }
1374         }
1375 
1376         for segment in &self.pci_segments {
1377             #[cfg(target_arch = "x86_64")]
1378             if let Some(pci_config_io) = segment.pci_config_io.as_ref() {
1379                 self.bus_devices
1380                     .push(Arc::clone(pci_config_io) as Arc<Mutex<dyn BusDevice>>);
1381             }
1382 
1383             self.bus_devices
1384                 .push(Arc::clone(&segment.pci_config_mmio) as Arc<Mutex<dyn BusDevice>>);
1385         }
1386 
1387         Ok(())
1388     }
1389 
1390     #[cfg(target_arch = "aarch64")]
1391     fn add_interrupt_controller(
1392         &mut self,
1393     ) -> DeviceManagerResult<Arc<Mutex<dyn InterruptController>>> {
1394         let interrupt_controller: Arc<Mutex<gic::Gic>> = Arc::new(Mutex::new(
1395             gic::Gic::new(
1396                 self.config.lock().unwrap().cpus.boot_vcpus,
1397                 Arc::clone(&self.msi_interrupt_manager),
1398                 self.address_manager.vm.clone(),
1399             )
1400             .map_err(DeviceManagerError::CreateInterruptController)?,
1401         ));
1402 
1403         self.interrupt_controller = Some(interrupt_controller.clone());
1404 
1405         // Restore the vGic if this is in the process of restoration
1406         let id = String::from(gic::GIC_SNAPSHOT_ID);
1407         if let Some(vgic_snapshot) = snapshot_from_id(self.snapshot.as_ref(), &id) {
1408             // PMU support is optional. Nothing should be impacted if the PMU initialization failed.
1409             if self
1410                 .cpu_manager
1411                 .lock()
1412                 .unwrap()
1413                 .init_pmu(arch::aarch64::fdt::AARCH64_PMU_IRQ + 16)
1414                 .is_err()
1415             {
1416                 info!("Failed to initialize PMU");
1417             }
1418 
1419             let vgic_state = vgic_snapshot
1420                 .to_state()
1421                 .map_err(DeviceManagerError::RestoreGetState)?;
1422             let saved_vcpu_states = self.cpu_manager.lock().unwrap().get_saved_states();
1423             interrupt_controller
1424                 .lock()
1425                 .unwrap()
1426                 .restore_vgic(vgic_state, &saved_vcpu_states)
1427                 .unwrap();
1428         }
1429 
1430         self.device_tree
1431             .lock()
1432             .unwrap()
1433             .insert(id.clone(), device_node!(id, interrupt_controller));
1434 
1435         Ok(interrupt_controller)
1436     }
1437 
1438     #[cfg(target_arch = "aarch64")]
1439     pub fn get_interrupt_controller(&mut self) -> Option<&Arc<Mutex<gic::Gic>>> {
1440         self.interrupt_controller.as_ref()
1441     }
1442 
1443     #[cfg(target_arch = "x86_64")]
1444     fn add_interrupt_controller(
1445         &mut self,
1446     ) -> DeviceManagerResult<Arc<Mutex<dyn InterruptController>>> {
1447         let id = String::from(IOAPIC_DEVICE_NAME);
1448 
1449         // Create IOAPIC
1450         let interrupt_controller = Arc::new(Mutex::new(
1451             ioapic::Ioapic::new(
1452                 id.clone(),
1453                 APIC_START,
1454                 Arc::clone(&self.msi_interrupt_manager),
1455                 versioned_state_from_id(self.snapshot.as_ref(), id.as_str())
1456                     .map_err(DeviceManagerError::RestoreGetState)?,
1457             )
1458             .map_err(DeviceManagerError::CreateInterruptController)?,
1459         ));
1460 
1461         self.interrupt_controller = Some(interrupt_controller.clone());
1462 
1463         self.address_manager
1464             .mmio_bus
1465             .insert(interrupt_controller.clone(), IOAPIC_START.0, IOAPIC_SIZE)
1466             .map_err(DeviceManagerError::BusError)?;
1467 
1468         self.bus_devices
1469             .push(Arc::clone(&interrupt_controller) as Arc<Mutex<dyn BusDevice>>);
1470 
1471         // Fill the device tree with a new node. In case of restore, we
1472         // know there is nothing to do, so we can simply override the
1473         // existing entry.
1474         self.device_tree
1475             .lock()
1476             .unwrap()
1477             .insert(id.clone(), device_node!(id, interrupt_controller));
1478 
1479         Ok(interrupt_controller)
1480     }
1481 
1482     fn add_acpi_devices(
1483         &mut self,
1484         interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>,
1485         reset_evt: EventFd,
1486         exit_evt: EventFd,
1487     ) -> DeviceManagerResult<Option<Arc<Mutex<devices::AcpiGedDevice>>>> {
1488         let vcpus_kill_signalled = self
1489             .cpu_manager
1490             .lock()
1491             .unwrap()
1492             .vcpus_kill_signalled()
1493             .clone();
1494         let shutdown_device = Arc::new(Mutex::new(devices::AcpiShutdownDevice::new(
1495             exit_evt,
1496             reset_evt,
1497             vcpus_kill_signalled,
1498         )));
1499 
1500         self.bus_devices
1501             .push(Arc::clone(&shutdown_device) as Arc<Mutex<dyn BusDevice>>);
1502 
1503         #[cfg(target_arch = "x86_64")]
1504         {
1505             let shutdown_pio_address: u16 = 0x600;
1506 
1507             self.address_manager
1508                 .allocator
1509                 .lock()
1510                 .unwrap()
1511                 .allocate_io_addresses(Some(GuestAddress(shutdown_pio_address.into())), 0x8, None)
1512                 .ok_or(DeviceManagerError::AllocateIoPort)?;
1513 
1514             self.address_manager
1515                 .io_bus
1516                 .insert(shutdown_device, shutdown_pio_address.into(), 0x4)
1517                 .map_err(DeviceManagerError::BusError)?;
1518 
1519             self.acpi_platform_addresses.sleep_control_reg_address =
1520                 Some(GenericAddress::io_port_address::<u8>(shutdown_pio_address));
1521             self.acpi_platform_addresses.sleep_status_reg_address =
1522                 Some(GenericAddress::io_port_address::<u8>(shutdown_pio_address));
1523             self.acpi_platform_addresses.reset_reg_address =
1524                 Some(GenericAddress::io_port_address::<u8>(shutdown_pio_address));
1525         }
1526 
1527         let ged_irq = self
1528             .address_manager
1529             .allocator
1530             .lock()
1531             .unwrap()
1532             .allocate_irq()
1533             .unwrap();
1534         let interrupt_group = interrupt_manager
1535             .create_group(LegacyIrqGroupConfig {
1536                 irq: ged_irq as InterruptIndex,
1537             })
1538             .map_err(DeviceManagerError::CreateInterruptGroup)?;
1539         let ged_address = self
1540             .address_manager
1541             .allocator
1542             .lock()
1543             .unwrap()
1544             .allocate_platform_mmio_addresses(
1545                 None,
1546                 devices::acpi::GED_DEVICE_ACPI_SIZE as u64,
1547                 None,
1548             )
1549             .ok_or(DeviceManagerError::AllocateMmioAddress)?;
1550         let ged_device = Arc::new(Mutex::new(devices::AcpiGedDevice::new(
1551             interrupt_group,
1552             ged_irq,
1553             ged_address,
1554         )));
1555         self.address_manager
1556             .mmio_bus
1557             .insert(
1558                 ged_device.clone(),
1559                 ged_address.0,
1560                 devices::acpi::GED_DEVICE_ACPI_SIZE as u64,
1561             )
1562             .map_err(DeviceManagerError::BusError)?;
1563         self.bus_devices
1564             .push(Arc::clone(&ged_device) as Arc<Mutex<dyn BusDevice>>);
1565 
1566         let pm_timer_device = Arc::new(Mutex::new(devices::AcpiPmTimerDevice::new()));
1567 
1568         self.bus_devices
1569             .push(Arc::clone(&pm_timer_device) as Arc<Mutex<dyn BusDevice>>);
1570 
1571         #[cfg(target_arch = "x86_64")]
1572         {
1573             let pm_timer_pio_address: u16 = 0x608;
1574 
1575             self.address_manager
1576                 .allocator
1577                 .lock()
1578                 .unwrap()
1579                 .allocate_io_addresses(Some(GuestAddress(pm_timer_pio_address.into())), 0x4, None)
1580                 .ok_or(DeviceManagerError::AllocateIoPort)?;
1581 
1582             self.address_manager
1583                 .io_bus
1584                 .insert(pm_timer_device, pm_timer_pio_address.into(), 0x4)
1585                 .map_err(DeviceManagerError::BusError)?;
1586 
1587             self.acpi_platform_addresses.pm_timer_address =
1588                 Some(GenericAddress::io_port_address::<u32>(pm_timer_pio_address));
1589         }
1590 
1591         Ok(Some(ged_device))
1592     }
1593 
1594     #[cfg(target_arch = "x86_64")]
1595     fn add_legacy_devices(&mut self, reset_evt: EventFd) -> DeviceManagerResult<()> {
1596         let vcpus_kill_signalled = self
1597             .cpu_manager
1598             .lock()
1599             .unwrap()
1600             .vcpus_kill_signalled()
1601             .clone();
1602         // Add a shutdown device (i8042)
1603         let i8042 = Arc::new(Mutex::new(devices::legacy::I8042Device::new(
1604             reset_evt.try_clone().unwrap(),
1605             vcpus_kill_signalled.clone(),
1606         )));
1607 
1608         self.bus_devices
1609             .push(Arc::clone(&i8042) as Arc<Mutex<dyn BusDevice>>);
1610 
1611         self.address_manager
1612             .io_bus
1613             .insert(i8042, 0x61, 0x4)
1614             .map_err(DeviceManagerError::BusError)?;
1615         {
1616             // Add a CMOS emulated device
1617             let mem_size = self
1618                 .memory_manager
1619                 .lock()
1620                 .unwrap()
1621                 .guest_memory()
1622                 .memory()
1623                 .last_addr()
1624                 .0
1625                 + 1;
1626             let mem_below_4g = std::cmp::min(arch::layout::MEM_32BIT_RESERVED_START.0, mem_size);
1627             let mem_above_4g = mem_size.saturating_sub(arch::layout::RAM_64BIT_START.0);
1628 
1629             let cmos = Arc::new(Mutex::new(devices::legacy::Cmos::new(
1630                 mem_below_4g,
1631                 mem_above_4g,
1632                 reset_evt,
1633                 Some(vcpus_kill_signalled),
1634             )));
1635 
1636             self.bus_devices
1637                 .push(Arc::clone(&cmos) as Arc<Mutex<dyn BusDevice>>);
1638 
1639             self.address_manager
1640                 .io_bus
1641                 .insert(cmos, 0x70, 0x2)
1642                 .map_err(DeviceManagerError::BusError)?;
1643 
1644             let fwdebug = Arc::new(Mutex::new(devices::legacy::FwDebugDevice::new()));
1645 
1646             self.bus_devices
1647                 .push(Arc::clone(&fwdebug) as Arc<Mutex<dyn BusDevice>>);
1648 
1649             self.address_manager
1650                 .io_bus
1651                 .insert(fwdebug, 0x402, 0x1)
1652                 .map_err(DeviceManagerError::BusError)?;
1653         }
1654 
1655         // 0x80 debug port
1656         let debug_port = Arc::new(Mutex::new(devices::legacy::DebugPort::new(self.timestamp)));
1657         self.bus_devices
1658             .push(Arc::clone(&debug_port) as Arc<Mutex<dyn BusDevice>>);
1659         self.address_manager
1660             .io_bus
1661             .insert(debug_port, 0x80, 0x1)
1662             .map_err(DeviceManagerError::BusError)?;
1663 
1664         Ok(())
1665     }
1666 
1667     #[cfg(target_arch = "aarch64")]
1668     fn add_legacy_devices(
1669         &mut self,
1670         interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>,
1671     ) -> DeviceManagerResult<()> {
1672         // Add a RTC device
1673         let rtc_irq = self
1674             .address_manager
1675             .allocator
1676             .lock()
1677             .unwrap()
1678             .allocate_irq()
1679             .unwrap();
1680 
1681         let interrupt_group = interrupt_manager
1682             .create_group(LegacyIrqGroupConfig {
1683                 irq: rtc_irq as InterruptIndex,
1684             })
1685             .map_err(DeviceManagerError::CreateInterruptGroup)?;
1686 
1687         let rtc_device = Arc::new(Mutex::new(devices::legacy::Rtc::new(interrupt_group)));
1688 
1689         self.bus_devices
1690             .push(Arc::clone(&rtc_device) as Arc<Mutex<dyn BusDevice>>);
1691 
1692         let addr = arch::layout::LEGACY_RTC_MAPPED_IO_START;
1693 
1694         self.address_manager
1695             .mmio_bus
1696             .insert(rtc_device, addr.0, MMIO_LEN)
1697             .map_err(DeviceManagerError::BusError)?;
1698 
1699         self.id_to_dev_info.insert(
1700             (DeviceType::Rtc, "rtc".to_string()),
1701             MmioDeviceInfo {
1702                 addr: addr.0,
1703                 len: MMIO_LEN,
1704                 irq: rtc_irq,
1705             },
1706         );
1707 
1708         // Add a GPIO device
1709         let id = String::from(GPIO_DEVICE_NAME);
1710         let gpio_irq = self
1711             .address_manager
1712             .allocator
1713             .lock()
1714             .unwrap()
1715             .allocate_irq()
1716             .unwrap();
1717 
1718         let interrupt_group = interrupt_manager
1719             .create_group(LegacyIrqGroupConfig {
1720                 irq: gpio_irq as InterruptIndex,
1721             })
1722             .map_err(DeviceManagerError::CreateInterruptGroup)?;
1723 
1724         let gpio_device = Arc::new(Mutex::new(devices::legacy::Gpio::new(
1725             id.clone(),
1726             interrupt_group,
1727             versioned_state_from_id(self.snapshot.as_ref(), id.as_str())
1728                 .map_err(DeviceManagerError::RestoreGetState)?,
1729         )));
1730 
1731         self.bus_devices
1732             .push(Arc::clone(&gpio_device) as Arc<Mutex<dyn BusDevice>>);
1733 
1734         let addr = arch::layout::LEGACY_GPIO_MAPPED_IO_START;
1735 
1736         self.address_manager
1737             .mmio_bus
1738             .insert(gpio_device.clone(), addr.0, MMIO_LEN)
1739             .map_err(DeviceManagerError::BusError)?;
1740 
1741         self.gpio_device = Some(gpio_device.clone());
1742 
1743         self.id_to_dev_info.insert(
1744             (DeviceType::Gpio, "gpio".to_string()),
1745             MmioDeviceInfo {
1746                 addr: addr.0,
1747                 len: MMIO_LEN,
1748                 irq: gpio_irq,
1749             },
1750         );
1751 
1752         self.device_tree
1753             .lock()
1754             .unwrap()
1755             .insert(id.clone(), device_node!(id, gpio_device));
1756 
1757         Ok(())
1758     }
1759 
1760     #[cfg(target_arch = "x86_64")]
1761     fn add_serial_device(
1762         &mut self,
1763         interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>,
1764         serial_writer: Option<Box<dyn io::Write + Send>>,
1765     ) -> DeviceManagerResult<Arc<Mutex<Serial>>> {
1766         // Serial is tied to IRQ #4
1767         let serial_irq = 4;
1768 
1769         let id = String::from(SERIAL_DEVICE_NAME);
1770 
1771         let interrupt_group = interrupt_manager
1772             .create_group(LegacyIrqGroupConfig {
1773                 irq: serial_irq as InterruptIndex,
1774             })
1775             .map_err(DeviceManagerError::CreateInterruptGroup)?;
1776 
1777         let serial = Arc::new(Mutex::new(Serial::new(
1778             id.clone(),
1779             interrupt_group,
1780             serial_writer,
1781             versioned_state_from_id(self.snapshot.as_ref(), id.as_str())
1782                 .map_err(DeviceManagerError::RestoreGetState)?,
1783         )));
1784 
1785         self.bus_devices
1786             .push(Arc::clone(&serial) as Arc<Mutex<dyn BusDevice>>);
1787 
1788         self.address_manager
1789             .allocator
1790             .lock()
1791             .unwrap()
1792             .allocate_io_addresses(Some(GuestAddress(0x3f8)), 0x8, None)
1793             .ok_or(DeviceManagerError::AllocateIoPort)?;
1794 
1795         self.address_manager
1796             .io_bus
1797             .insert(serial.clone(), 0x3f8, 0x8)
1798             .map_err(DeviceManagerError::BusError)?;
1799 
1800         // Fill the device tree with a new node. In case of restore, we
1801         // know there is nothing to do, so we can simply override the
1802         // existing entry.
1803         self.device_tree
1804             .lock()
1805             .unwrap()
1806             .insert(id.clone(), device_node!(id, serial));
1807 
1808         Ok(serial)
1809     }
1810 
1811     #[cfg(target_arch = "aarch64")]
1812     fn add_serial_device(
1813         &mut self,
1814         interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>,
1815         serial_writer: Option<Box<dyn io::Write + Send>>,
1816     ) -> DeviceManagerResult<Arc<Mutex<Pl011>>> {
1817         let id = String::from(SERIAL_DEVICE_NAME);
1818 
1819         let serial_irq = self
1820             .address_manager
1821             .allocator
1822             .lock()
1823             .unwrap()
1824             .allocate_irq()
1825             .unwrap();
1826 
1827         let interrupt_group = interrupt_manager
1828             .create_group(LegacyIrqGroupConfig {
1829                 irq: serial_irq as InterruptIndex,
1830             })
1831             .map_err(DeviceManagerError::CreateInterruptGroup)?;
1832 
1833         let serial = Arc::new(Mutex::new(devices::legacy::Pl011::new(
1834             id.clone(),
1835             interrupt_group,
1836             serial_writer,
1837             self.timestamp,
1838             versioned_state_from_id(self.snapshot.as_ref(), id.as_str())
1839                 .map_err(DeviceManagerError::RestoreGetState)?,
1840         )));
1841 
1842         self.bus_devices
1843             .push(Arc::clone(&serial) as Arc<Mutex<dyn BusDevice>>);
1844 
1845         let addr = arch::layout::LEGACY_SERIAL_MAPPED_IO_START;
1846 
1847         self.address_manager
1848             .mmio_bus
1849             .insert(serial.clone(), addr.0, MMIO_LEN)
1850             .map_err(DeviceManagerError::BusError)?;
1851 
1852         self.id_to_dev_info.insert(
1853             (DeviceType::Serial, DeviceType::Serial.to_string()),
1854             MmioDeviceInfo {
1855                 addr: addr.0,
1856                 len: MMIO_LEN,
1857                 irq: serial_irq,
1858             },
1859         );
1860 
1861         self.cmdline_additions
1862             .push(format!("earlycon=pl011,mmio,0x{:08x}", addr.0));
1863 
1864         // Fill the device tree with a new node. In case of restore, we
1865         // know there is nothing to do, so we can simply override the
1866         // existing entry.
1867         self.device_tree
1868             .lock()
1869             .unwrap()
1870             .insert(id.clone(), device_node!(id, serial));
1871 
1872         Ok(serial)
1873     }
1874 
1875     fn modify_mode<F: FnOnce(&mut termios)>(
1876         &mut self,
1877         fd: RawFd,
1878         f: F,
1879     ) -> vmm_sys_util::errno::Result<()> {
1880         // SAFETY: safe because we check the return value of isatty.
1881         if unsafe { isatty(fd) } != 1 {
1882             return Ok(());
1883         }
1884 
1885         // SAFETY: The following pair are safe because termios gets totally overwritten by tcgetattr
1886         // and we check the return result.
1887         let mut termios: termios = unsafe { zeroed() };
1888         // SAFETY: see above
1889         let ret = unsafe { tcgetattr(fd, &mut termios as *mut _) };
1890         if ret < 0 {
1891             return vmm_sys_util::errno::errno_result();
1892         }
1893         let mut original_termios_opt = self.original_termios_opt.lock().unwrap();
1894         if original_termios_opt.is_none() {
1895             *original_termios_opt = Some(termios);
1896         }
1897         f(&mut termios);
1898         // SAFETY: Safe because the syscall will only read the extent of termios and we check
1899         // the return result.
1900         let ret = unsafe { tcsetattr(fd, TCSANOW, &termios as *const _) };
1901         if ret < 0 {
1902             return vmm_sys_util::errno::errno_result();
1903         }
1904 
1905         Ok(())
1906     }
1907 
1908     fn set_raw_mode(&mut self, f: &dyn AsRawFd) -> vmm_sys_util::errno::Result<()> {
1909         // SAFETY: FFI call. Variable t is guaranteed to be a valid termios from modify_mode.
1910         self.modify_mode(f.as_raw_fd(), |t| unsafe { cfmakeraw(t) })
1911     }
1912 
1913     fn listen_for_sigwinch_on_tty(&mut self, pty_sub: File) -> std::io::Result<()> {
1914         let seccomp_filter = get_seccomp_filter(
1915             &self.seccomp_action,
1916             Thread::PtyForeground,
1917             self.hypervisor_type,
1918         )
1919         .unwrap();
1920 
1921         self.console_resize_pipe =
1922             Some(Arc::new(start_sigwinch_listener(seccomp_filter, pty_sub)?));
1923 
1924         Ok(())
1925     }
1926 
1927     fn add_virtio_console_device(
1928         &mut self,
1929         virtio_devices: &mut Vec<MetaVirtioDevice>,
1930         console_pty: Option<PtyPair>,
1931         resize_pipe: Option<File>,
1932     ) -> DeviceManagerResult<Option<Arc<virtio_devices::ConsoleResizer>>> {
1933         let console_config = self.config.lock().unwrap().console.clone();
1934         let endpoint = match console_config.mode {
1935             ConsoleOutputMode::File => {
1936                 let file = File::create(console_config.file.as_ref().unwrap())
1937                     .map_err(DeviceManagerError::ConsoleOutputFileOpen)?;
1938                 Endpoint::File(file)
1939             }
1940             ConsoleOutputMode::Pty => {
1941                 if let Some(pty) = console_pty {
1942                     self.config.lock().unwrap().console.file = Some(pty.path.clone());
1943                     let file = pty.main.try_clone().unwrap();
1944                     self.console_pty = Some(Arc::new(Mutex::new(pty)));
1945                     self.console_resize_pipe = resize_pipe.map(Arc::new);
1946                     Endpoint::PtyPair(file.try_clone().unwrap(), file)
1947                 } else {
1948                     let (main, sub, path) =
1949                         create_pty().map_err(DeviceManagerError::ConsolePtyOpen)?;
1950                     self.set_raw_mode(&sub)
1951                         .map_err(DeviceManagerError::SetPtyRaw)?;
1952                     self.config.lock().unwrap().console.file = Some(path.clone());
1953                     let file = main.try_clone().unwrap();
1954                     assert!(resize_pipe.is_none());
1955                     self.listen_for_sigwinch_on_tty(sub).unwrap();
1956                     self.console_pty = Some(Arc::new(Mutex::new(PtyPair { main, path })));
1957                     Endpoint::PtyPair(file.try_clone().unwrap(), file)
1958                 }
1959             }
1960             ConsoleOutputMode::Tty => {
1961                 // Duplicating the file descriptors like this is needed as otherwise
1962                 // they will be closed on a reboot and the numbers reused
1963 
1964                 // SAFETY: FFI call to dup. Trivially safe.
1965                 let stdout = unsafe { libc::dup(libc::STDOUT_FILENO) };
1966                 if stdout == -1 {
1967                     return vmm_sys_util::errno::errno_result().map_err(DeviceManagerError::DupFd);
1968                 }
1969                 // SAFETY: stdout is valid and owned solely by us.
1970                 let stdout = unsafe { File::from_raw_fd(stdout) };
1971 
1972                 // Make sure stdout is in raw mode, if it's a terminal.
1973                 let _ = self.set_raw_mode(&stdout);
1974 
1975                 // SAFETY: FFI call. Trivially safe.
1976                 if unsafe { libc::isatty(libc::STDOUT_FILENO) } == 1 {
1977                     self.listen_for_sigwinch_on_tty(stdout.try_clone().unwrap())
1978                         .unwrap();
1979                 }
1980 
1981                 // If an interactive TTY then we can accept input
1982                 // SAFETY: FFI call. Trivially safe.
1983                 if unsafe { libc::isatty(libc::STDIN_FILENO) == 1 } {
1984                     // SAFETY: FFI call to dup. Trivially safe.
1985                     let stdin = unsafe { libc::dup(libc::STDIN_FILENO) };
1986                     if stdin == -1 {
1987                         return vmm_sys_util::errno::errno_result()
1988                             .map_err(DeviceManagerError::DupFd);
1989                     }
1990                     // SAFETY: stdin is valid and owned solely by us.
1991                     let stdin = unsafe { File::from_raw_fd(stdin) };
1992 
1993                     Endpoint::FilePair(stdout, stdin)
1994                 } else {
1995                     Endpoint::File(stdout)
1996                 }
1997             }
1998             ConsoleOutputMode::Null => Endpoint::Null,
1999             ConsoleOutputMode::Off => return Ok(None),
2000         };
2001         let id = String::from(CONSOLE_DEVICE_NAME);
2002 
2003         let (virtio_console_device, console_resizer) = virtio_devices::Console::new(
2004             id.clone(),
2005             endpoint,
2006             self.console_resize_pipe
2007                 .as_ref()
2008                 .map(|p| p.try_clone().unwrap()),
2009             self.force_iommu | console_config.iommu,
2010             self.seccomp_action.clone(),
2011             self.exit_evt
2012                 .try_clone()
2013                 .map_err(DeviceManagerError::EventFd)?,
2014             versioned_state_from_id(self.snapshot.as_ref(), id.as_str())
2015                 .map_err(DeviceManagerError::RestoreGetState)?,
2016         )
2017         .map_err(DeviceManagerError::CreateVirtioConsole)?;
2018         let virtio_console_device = Arc::new(Mutex::new(virtio_console_device));
2019         virtio_devices.push(MetaVirtioDevice {
2020             virtio_device: Arc::clone(&virtio_console_device)
2021                 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2022             iommu: console_config.iommu,
2023             id: id.clone(),
2024             pci_segment: 0,
2025             dma_handler: None,
2026         });
2027 
2028         // Fill the device tree with a new node. In case of restore, we
2029         // know there is nothing to do, so we can simply override the
2030         // existing entry.
2031         self.device_tree
2032             .lock()
2033             .unwrap()
2034             .insert(id.clone(), device_node!(id, virtio_console_device));
2035 
2036         // Only provide a resizer (for SIGWINCH handling) if the console is attached to the TTY
2037         Ok(if matches!(console_config.mode, ConsoleOutputMode::Tty) {
2038             Some(console_resizer)
2039         } else {
2040             None
2041         })
2042     }
2043 
2044     fn add_console_device(
2045         &mut self,
2046         interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>,
2047         virtio_devices: &mut Vec<MetaVirtioDevice>,
2048         serial_pty: Option<PtyPair>,
2049         console_pty: Option<PtyPair>,
2050         console_resize_pipe: Option<File>,
2051     ) -> DeviceManagerResult<Arc<Console>> {
2052         let serial_config = self.config.lock().unwrap().serial.clone();
2053         let serial_writer: Option<Box<dyn io::Write + Send>> = match serial_config.mode {
2054             ConsoleOutputMode::File => Some(Box::new(
2055                 File::create(serial_config.file.as_ref().unwrap())
2056                     .map_err(DeviceManagerError::SerialOutputFileOpen)?,
2057             )),
2058             ConsoleOutputMode::Pty => {
2059                 if let Some(pty) = serial_pty {
2060                     self.config.lock().unwrap().serial.file = Some(pty.path.clone());
2061                     self.serial_pty = Some(Arc::new(Mutex::new(pty)));
2062                 } else {
2063                     let (main, sub, path) =
2064                         create_pty().map_err(DeviceManagerError::SerialPtyOpen)?;
2065                     self.set_raw_mode(&sub)
2066                         .map_err(DeviceManagerError::SetPtyRaw)?;
2067                     self.config.lock().unwrap().serial.file = Some(path.clone());
2068                     self.serial_pty = Some(Arc::new(Mutex::new(PtyPair { main, path })));
2069                 }
2070                 None
2071             }
2072             ConsoleOutputMode::Tty => {
2073                 let out = stdout();
2074                 let _ = self.set_raw_mode(&out);
2075                 Some(Box::new(out))
2076             }
2077             ConsoleOutputMode::Off | ConsoleOutputMode::Null => None,
2078         };
2079         if serial_config.mode != ConsoleOutputMode::Off {
2080             let serial = self.add_serial_device(interrupt_manager, serial_writer)?;
2081             self.serial_manager = match serial_config.mode {
2082                 ConsoleOutputMode::Pty | ConsoleOutputMode::Tty => {
2083                     let serial_manager =
2084                         SerialManager::new(serial, self.serial_pty.clone(), serial_config.mode)
2085                             .map_err(DeviceManagerError::CreateSerialManager)?;
2086                     if let Some(mut serial_manager) = serial_manager {
2087                         serial_manager
2088                             .start_thread(
2089                                 self.exit_evt
2090                                     .try_clone()
2091                                     .map_err(DeviceManagerError::EventFd)?,
2092                             )
2093                             .map_err(DeviceManagerError::SpawnSerialManager)?;
2094                         Some(Arc::new(serial_manager))
2095                     } else {
2096                         None
2097                     }
2098                 }
2099                 _ => None,
2100             };
2101         }
2102 
2103         let console_resizer =
2104             self.add_virtio_console_device(virtio_devices, console_pty, console_resize_pipe)?;
2105 
2106         Ok(Arc::new(Console { console_resizer }))
2107     }
2108 
2109     fn add_tpm_device(
2110         &mut self,
2111         tpm_path: PathBuf,
2112     ) -> DeviceManagerResult<Arc<Mutex<devices::tpm::Tpm>>> {
2113         // Create TPM Device
2114         let tpm = devices::tpm::Tpm::new(tpm_path.to_str().unwrap().to_string()).map_err(|e| {
2115             DeviceManagerError::CreateTpmDevice(anyhow!("Failed to create TPM Device : {:?}", e))
2116         })?;
2117         let tpm = Arc::new(Mutex::new(tpm));
2118 
2119         // Add TPM Device to mmio
2120         self.address_manager
2121             .mmio_bus
2122             .insert(
2123                 tpm.clone(),
2124                 arch::layout::TPM_START.0,
2125                 arch::layout::TPM_SIZE,
2126             )
2127             .map_err(DeviceManagerError::BusError)?;
2128 
2129         Ok(tpm)
2130     }
2131 
2132     fn make_virtio_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
2133         let mut devices: Vec<MetaVirtioDevice> = Vec::new();
2134 
2135         // Create "standard" virtio devices (net/block/rng)
2136         devices.append(&mut self.make_virtio_block_devices()?);
2137         devices.append(&mut self.make_virtio_net_devices()?);
2138         devices.append(&mut self.make_virtio_rng_devices()?);
2139 
2140         // Add virtio-fs if required
2141         devices.append(&mut self.make_virtio_fs_devices()?);
2142 
2143         // Add virtio-pmem if required
2144         devices.append(&mut self.make_virtio_pmem_devices()?);
2145 
2146         // Add virtio-vsock if required
2147         devices.append(&mut self.make_virtio_vsock_devices()?);
2148 
2149         devices.append(&mut self.make_virtio_mem_devices()?);
2150 
2151         // Add virtio-balloon if required
2152         devices.append(&mut self.make_virtio_balloon_devices()?);
2153 
2154         // Add virtio-watchdog device
2155         devices.append(&mut self.make_virtio_watchdog_devices()?);
2156 
2157         // Add vDPA devices if required
2158         devices.append(&mut self.make_vdpa_devices()?);
2159 
2160         Ok(devices)
2161     }
2162 
2163     // Cache whether io_uring is supported to avoid probing for very block device
2164     fn io_uring_is_supported(&mut self) -> bool {
2165         if let Some(supported) = self.io_uring_supported {
2166             return supported;
2167         }
2168 
2169         let supported = block_io_uring_is_supported();
2170         self.io_uring_supported = Some(supported);
2171         supported
2172     }
2173 
2174     fn make_virtio_block_device(
2175         &mut self,
2176         disk_cfg: &mut DiskConfig,
2177     ) -> DeviceManagerResult<MetaVirtioDevice> {
2178         let id = if let Some(id) = &disk_cfg.id {
2179             id.clone()
2180         } else {
2181             let id = self.next_device_name(DISK_DEVICE_NAME_PREFIX)?;
2182             disk_cfg.id = Some(id.clone());
2183             id
2184         };
2185 
2186         info!("Creating virtio-block device: {:?}", disk_cfg);
2187 
2188         let snapshot = snapshot_from_id(self.snapshot.as_ref(), id.as_str());
2189 
2190         let (virtio_device, migratable_device) = if disk_cfg.vhost_user {
2191             let socket = disk_cfg.vhost_socket.as_ref().unwrap().clone();
2192             let vu_cfg = VhostUserConfig {
2193                 socket,
2194                 num_queues: disk_cfg.num_queues,
2195                 queue_size: disk_cfg.queue_size,
2196             };
2197             let vhost_user_block = Arc::new(Mutex::new(
2198                 match virtio_devices::vhost_user::Blk::new(
2199                     id.clone(),
2200                     vu_cfg,
2201                     self.seccomp_action.clone(),
2202                     self.exit_evt
2203                         .try_clone()
2204                         .map_err(DeviceManagerError::EventFd)?,
2205                     self.force_iommu,
2206                     snapshot
2207                         .map(|s| s.to_versioned_state())
2208                         .transpose()
2209                         .map_err(DeviceManagerError::RestoreGetState)?,
2210                 ) {
2211                     Ok(vub_device) => vub_device,
2212                     Err(e) => {
2213                         return Err(DeviceManagerError::CreateVhostUserBlk(e));
2214                     }
2215                 },
2216             ));
2217 
2218             (
2219                 Arc::clone(&vhost_user_block) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2220                 vhost_user_block as Arc<Mutex<dyn Migratable>>,
2221             )
2222         } else {
2223             let mut options = OpenOptions::new();
2224             options.read(true);
2225             options.write(!disk_cfg.readonly);
2226             if disk_cfg.direct {
2227                 options.custom_flags(libc::O_DIRECT);
2228             }
2229             // Open block device path
2230             let mut file: File = options
2231                 .open(
2232                     disk_cfg
2233                         .path
2234                         .as_ref()
2235                         .ok_or(DeviceManagerError::NoDiskPath)?
2236                         .clone(),
2237                 )
2238                 .map_err(DeviceManagerError::Disk)?;
2239             let image_type =
2240                 detect_image_type(&mut file).map_err(DeviceManagerError::DetectImageType)?;
2241 
2242             let image = match image_type {
2243                 ImageType::FixedVhd => {
2244                     // Use asynchronous backend relying on io_uring if the
2245                     // syscalls are supported.
2246                     if cfg!(feature = "io_uring")
2247                         && !disk_cfg.disable_io_uring
2248                         && self.io_uring_is_supported()
2249                     {
2250                         info!("Using asynchronous fixed VHD disk file (io_uring)");
2251 
2252                         #[cfg(not(feature = "io_uring"))]
2253                         unreachable!("Checked in if statement above");
2254                         #[cfg(feature = "io_uring")]
2255                         {
2256                             Box::new(
2257                                 FixedVhdDiskAsync::new(file)
2258                                     .map_err(DeviceManagerError::CreateFixedVhdDiskAsync)?,
2259                             ) as Box<dyn DiskFile>
2260                         }
2261                     } else {
2262                         info!("Using synchronous fixed VHD disk file");
2263                         Box::new(
2264                             FixedVhdDiskSync::new(file)
2265                                 .map_err(DeviceManagerError::CreateFixedVhdDiskSync)?,
2266                         ) as Box<dyn DiskFile>
2267                     }
2268                 }
2269                 ImageType::Raw => {
2270                     // Use asynchronous backend relying on io_uring if the
2271                     // syscalls are supported.
2272                     if cfg!(feature = "io_uring")
2273                         && !disk_cfg.disable_io_uring
2274                         && self.io_uring_is_supported()
2275                     {
2276                         info!("Using asynchronous RAW disk file (io_uring)");
2277 
2278                         #[cfg(not(feature = "io_uring"))]
2279                         unreachable!("Checked in if statement above");
2280                         #[cfg(feature = "io_uring")]
2281                         {
2282                             Box::new(RawFileDisk::new(file)) as Box<dyn DiskFile>
2283                         }
2284                     } else {
2285                         info!("Using synchronous RAW disk file");
2286                         Box::new(RawFileDiskSync::new(file)) as Box<dyn DiskFile>
2287                     }
2288                 }
2289                 ImageType::Qcow2 => {
2290                     info!("Using synchronous QCOW disk file");
2291                     Box::new(
2292                         QcowDiskSync::new(file, disk_cfg.direct)
2293                             .map_err(DeviceManagerError::CreateQcowDiskSync)?,
2294                     ) as Box<dyn DiskFile>
2295                 }
2296                 ImageType::Vhdx => {
2297                     info!("Using synchronous VHDX disk file");
2298                     Box::new(
2299                         VhdxDiskSync::new(file)
2300                             .map_err(DeviceManagerError::CreateFixedVhdxDiskSync)?,
2301                     ) as Box<dyn DiskFile>
2302                 }
2303             };
2304 
2305             let virtio_block = Arc::new(Mutex::new(
2306                 virtio_devices::Block::new(
2307                     id.clone(),
2308                     image,
2309                     disk_cfg
2310                         .path
2311                         .as_ref()
2312                         .ok_or(DeviceManagerError::NoDiskPath)?
2313                         .clone(),
2314                     disk_cfg.readonly,
2315                     self.force_iommu | disk_cfg.iommu,
2316                     disk_cfg.num_queues,
2317                     disk_cfg.queue_size,
2318                     self.seccomp_action.clone(),
2319                     disk_cfg.rate_limiter_config,
2320                     self.exit_evt
2321                         .try_clone()
2322                         .map_err(DeviceManagerError::EventFd)?,
2323                     snapshot
2324                         .map(|s| s.to_versioned_state())
2325                         .transpose()
2326                         .map_err(DeviceManagerError::RestoreGetState)?,
2327                 )
2328                 .map_err(DeviceManagerError::CreateVirtioBlock)?,
2329             ));
2330 
2331             (
2332                 Arc::clone(&virtio_block) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2333                 virtio_block as Arc<Mutex<dyn Migratable>>,
2334             )
2335         };
2336 
2337         // Fill the device tree with a new node. In case of restore, we
2338         // know there is nothing to do, so we can simply override the
2339         // existing entry.
2340         self.device_tree
2341             .lock()
2342             .unwrap()
2343             .insert(id.clone(), device_node!(id, migratable_device));
2344 
2345         Ok(MetaVirtioDevice {
2346             virtio_device,
2347             iommu: disk_cfg.iommu,
2348             id,
2349             pci_segment: disk_cfg.pci_segment,
2350             dma_handler: None,
2351         })
2352     }
2353 
2354     fn make_virtio_block_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
2355         let mut devices = Vec::new();
2356 
2357         let mut block_devices = self.config.lock().unwrap().disks.clone();
2358         if let Some(disk_list_cfg) = &mut block_devices {
2359             for disk_cfg in disk_list_cfg.iter_mut() {
2360                 devices.push(self.make_virtio_block_device(disk_cfg)?);
2361             }
2362         }
2363         self.config.lock().unwrap().disks = block_devices;
2364 
2365         Ok(devices)
2366     }
2367 
2368     fn make_virtio_net_device(
2369         &mut self,
2370         net_cfg: &mut NetConfig,
2371     ) -> DeviceManagerResult<MetaVirtioDevice> {
2372         let id = if let Some(id) = &net_cfg.id {
2373             id.clone()
2374         } else {
2375             let id = self.next_device_name(NET_DEVICE_NAME_PREFIX)?;
2376             net_cfg.id = Some(id.clone());
2377             id
2378         };
2379         info!("Creating virtio-net device: {:?}", net_cfg);
2380 
2381         let snapshot = snapshot_from_id(self.snapshot.as_ref(), id.as_str());
2382 
2383         let (virtio_device, migratable_device) = if net_cfg.vhost_user {
2384             let socket = net_cfg.vhost_socket.as_ref().unwrap().clone();
2385             let vu_cfg = VhostUserConfig {
2386                 socket,
2387                 num_queues: net_cfg.num_queues,
2388                 queue_size: net_cfg.queue_size,
2389             };
2390             let server = match net_cfg.vhost_mode {
2391                 VhostMode::Client => false,
2392                 VhostMode::Server => true,
2393             };
2394             let vhost_user_net = Arc::new(Mutex::new(
2395                 match virtio_devices::vhost_user::Net::new(
2396                     id.clone(),
2397                     net_cfg.mac,
2398                     net_cfg.mtu,
2399                     vu_cfg,
2400                     server,
2401                     self.seccomp_action.clone(),
2402                     self.exit_evt
2403                         .try_clone()
2404                         .map_err(DeviceManagerError::EventFd)?,
2405                     self.force_iommu,
2406                     snapshot
2407                         .map(|s| s.to_versioned_state())
2408                         .transpose()
2409                         .map_err(DeviceManagerError::RestoreGetState)?,
2410                     net_cfg.offload_tso,
2411                     net_cfg.offload_ufo,
2412                     net_cfg.offload_csum,
2413                 ) {
2414                     Ok(vun_device) => vun_device,
2415                     Err(e) => {
2416                         return Err(DeviceManagerError::CreateVhostUserNet(e));
2417                     }
2418                 },
2419             ));
2420 
2421             (
2422                 Arc::clone(&vhost_user_net) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2423                 vhost_user_net as Arc<Mutex<dyn Migratable>>,
2424             )
2425         } else {
2426             let state = snapshot
2427                 .map(|s| s.to_versioned_state())
2428                 .transpose()
2429                 .map_err(DeviceManagerError::RestoreGetState)?;
2430 
2431             let virtio_net = if let Some(ref tap_if_name) = net_cfg.tap {
2432                 Arc::new(Mutex::new(
2433                     virtio_devices::Net::new(
2434                         id.clone(),
2435                         Some(tap_if_name),
2436                         None,
2437                         None,
2438                         Some(net_cfg.mac),
2439                         &mut net_cfg.host_mac,
2440                         net_cfg.mtu,
2441                         self.force_iommu | net_cfg.iommu,
2442                         net_cfg.num_queues,
2443                         net_cfg.queue_size,
2444                         self.seccomp_action.clone(),
2445                         net_cfg.rate_limiter_config,
2446                         self.exit_evt
2447                             .try_clone()
2448                             .map_err(DeviceManagerError::EventFd)?,
2449                         state,
2450                         net_cfg.offload_tso,
2451                         net_cfg.offload_ufo,
2452                         net_cfg.offload_csum,
2453                     )
2454                     .map_err(DeviceManagerError::CreateVirtioNet)?,
2455                 ))
2456             } else if let Some(fds) = &net_cfg.fds {
2457                 let net = virtio_devices::Net::from_tap_fds(
2458                     id.clone(),
2459                     fds,
2460                     Some(net_cfg.mac),
2461                     net_cfg.mtu,
2462                     self.force_iommu | net_cfg.iommu,
2463                     net_cfg.queue_size,
2464                     self.seccomp_action.clone(),
2465                     net_cfg.rate_limiter_config,
2466                     self.exit_evt
2467                         .try_clone()
2468                         .map_err(DeviceManagerError::EventFd)?,
2469                     state,
2470                     net_cfg.offload_tso,
2471                     net_cfg.offload_ufo,
2472                     net_cfg.offload_csum,
2473                 )
2474                 .map_err(DeviceManagerError::CreateVirtioNet)?;
2475 
2476                 // SAFETY: 'fds' are valid because TAP devices are created successfully
2477                 unsafe {
2478                     self.config.lock().unwrap().add_preserved_fds(fds.clone());
2479                 }
2480 
2481                 Arc::new(Mutex::new(net))
2482             } else {
2483                 Arc::new(Mutex::new(
2484                     virtio_devices::Net::new(
2485                         id.clone(),
2486                         None,
2487                         Some(net_cfg.ip),
2488                         Some(net_cfg.mask),
2489                         Some(net_cfg.mac),
2490                         &mut net_cfg.host_mac,
2491                         net_cfg.mtu,
2492                         self.force_iommu | net_cfg.iommu,
2493                         net_cfg.num_queues,
2494                         net_cfg.queue_size,
2495                         self.seccomp_action.clone(),
2496                         net_cfg.rate_limiter_config,
2497                         self.exit_evt
2498                             .try_clone()
2499                             .map_err(DeviceManagerError::EventFd)?,
2500                         state,
2501                         net_cfg.offload_tso,
2502                         net_cfg.offload_ufo,
2503                         net_cfg.offload_csum,
2504                     )
2505                     .map_err(DeviceManagerError::CreateVirtioNet)?,
2506                 ))
2507             };
2508 
2509             (
2510                 Arc::clone(&virtio_net) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2511                 virtio_net as Arc<Mutex<dyn Migratable>>,
2512             )
2513         };
2514 
2515         // Fill the device tree with a new node. In case of restore, we
2516         // know there is nothing to do, so we can simply override the
2517         // existing entry.
2518         self.device_tree
2519             .lock()
2520             .unwrap()
2521             .insert(id.clone(), device_node!(id, migratable_device));
2522 
2523         Ok(MetaVirtioDevice {
2524             virtio_device,
2525             iommu: net_cfg.iommu,
2526             id,
2527             pci_segment: net_cfg.pci_segment,
2528             dma_handler: None,
2529         })
2530     }
2531 
2532     /// Add virto-net and vhost-user-net devices
2533     fn make_virtio_net_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
2534         let mut devices = Vec::new();
2535         let mut net_devices = self.config.lock().unwrap().net.clone();
2536         if let Some(net_list_cfg) = &mut net_devices {
2537             for net_cfg in net_list_cfg.iter_mut() {
2538                 devices.push(self.make_virtio_net_device(net_cfg)?);
2539             }
2540         }
2541         self.config.lock().unwrap().net = net_devices;
2542 
2543         Ok(devices)
2544     }
2545 
2546     fn make_virtio_rng_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
2547         let mut devices = Vec::new();
2548 
2549         // Add virtio-rng if required
2550         let rng_config = self.config.lock().unwrap().rng.clone();
2551         if let Some(rng_path) = rng_config.src.to_str() {
2552             info!("Creating virtio-rng device: {:?}", rng_config);
2553             let id = String::from(RNG_DEVICE_NAME);
2554 
2555             let virtio_rng_device = Arc::new(Mutex::new(
2556                 virtio_devices::Rng::new(
2557                     id.clone(),
2558                     rng_path,
2559                     self.force_iommu | rng_config.iommu,
2560                     self.seccomp_action.clone(),
2561                     self.exit_evt
2562                         .try_clone()
2563                         .map_err(DeviceManagerError::EventFd)?,
2564                     versioned_state_from_id(self.snapshot.as_ref(), id.as_str())
2565                         .map_err(DeviceManagerError::RestoreGetState)?,
2566                 )
2567                 .map_err(DeviceManagerError::CreateVirtioRng)?,
2568             ));
2569             devices.push(MetaVirtioDevice {
2570                 virtio_device: Arc::clone(&virtio_rng_device)
2571                     as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2572                 iommu: rng_config.iommu,
2573                 id: id.clone(),
2574                 pci_segment: 0,
2575                 dma_handler: None,
2576             });
2577 
2578             // Fill the device tree with a new node. In case of restore, we
2579             // know there is nothing to do, so we can simply override the
2580             // existing entry.
2581             self.device_tree
2582                 .lock()
2583                 .unwrap()
2584                 .insert(id.clone(), device_node!(id, virtio_rng_device));
2585         }
2586 
2587         Ok(devices)
2588     }
2589 
2590     fn make_virtio_fs_device(
2591         &mut self,
2592         fs_cfg: &mut FsConfig,
2593     ) -> DeviceManagerResult<MetaVirtioDevice> {
2594         let id = if let Some(id) = &fs_cfg.id {
2595             id.clone()
2596         } else {
2597             let id = self.next_device_name(FS_DEVICE_NAME_PREFIX)?;
2598             fs_cfg.id = Some(id.clone());
2599             id
2600         };
2601 
2602         info!("Creating virtio-fs device: {:?}", fs_cfg);
2603 
2604         let mut node = device_node!(id);
2605 
2606         if let Some(fs_socket) = fs_cfg.socket.to_str() {
2607             let virtio_fs_device = Arc::new(Mutex::new(
2608                 virtio_devices::vhost_user::Fs::new(
2609                     id.clone(),
2610                     fs_socket,
2611                     &fs_cfg.tag,
2612                     fs_cfg.num_queues,
2613                     fs_cfg.queue_size,
2614                     None,
2615                     self.seccomp_action.clone(),
2616                     self.exit_evt
2617                         .try_clone()
2618                         .map_err(DeviceManagerError::EventFd)?,
2619                     self.force_iommu,
2620                     versioned_state_from_id(self.snapshot.as_ref(), id.as_str())
2621                         .map_err(DeviceManagerError::RestoreGetState)?,
2622                 )
2623                 .map_err(DeviceManagerError::CreateVirtioFs)?,
2624             ));
2625 
2626             // Update the device tree with the migratable device.
2627             node.migratable = Some(Arc::clone(&virtio_fs_device) as Arc<Mutex<dyn Migratable>>);
2628             self.device_tree.lock().unwrap().insert(id.clone(), node);
2629 
2630             Ok(MetaVirtioDevice {
2631                 virtio_device: Arc::clone(&virtio_fs_device)
2632                     as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2633                 iommu: false,
2634                 id,
2635                 pci_segment: fs_cfg.pci_segment,
2636                 dma_handler: None,
2637             })
2638         } else {
2639             Err(DeviceManagerError::NoVirtioFsSock)
2640         }
2641     }
2642 
2643     fn make_virtio_fs_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
2644         let mut devices = Vec::new();
2645 
2646         let mut fs_devices = self.config.lock().unwrap().fs.clone();
2647         if let Some(fs_list_cfg) = &mut fs_devices {
2648             for fs_cfg in fs_list_cfg.iter_mut() {
2649                 devices.push(self.make_virtio_fs_device(fs_cfg)?);
2650             }
2651         }
2652         self.config.lock().unwrap().fs = fs_devices;
2653 
2654         Ok(devices)
2655     }
2656 
2657     fn make_virtio_pmem_device(
2658         &mut self,
2659         pmem_cfg: &mut PmemConfig,
2660     ) -> DeviceManagerResult<MetaVirtioDevice> {
2661         let id = if let Some(id) = &pmem_cfg.id {
2662             id.clone()
2663         } else {
2664             let id = self.next_device_name(PMEM_DEVICE_NAME_PREFIX)?;
2665             pmem_cfg.id = Some(id.clone());
2666             id
2667         };
2668 
2669         info!("Creating virtio-pmem device: {:?}", pmem_cfg);
2670 
2671         let mut node = device_node!(id);
2672 
2673         // Look for the id in the device tree. If it can be found, that means
2674         // the device is being restored, otherwise it's created from scratch.
2675         let region_range = if let Some(node) = self.device_tree.lock().unwrap().get(&id) {
2676             info!("Restoring virtio-pmem {} resources", id);
2677 
2678             let mut region_range: Option<(u64, u64)> = None;
2679             for resource in node.resources.iter() {
2680                 match resource {
2681                     Resource::MmioAddressRange { base, size } => {
2682                         if region_range.is_some() {
2683                             return Err(DeviceManagerError::ResourceAlreadyExists);
2684                         }
2685 
2686                         region_range = Some((*base, *size));
2687                     }
2688                     _ => {
2689                         error!("Unexpected resource {:?} for {}", resource, id);
2690                     }
2691                 }
2692             }
2693 
2694             if region_range.is_none() {
2695                 return Err(DeviceManagerError::MissingVirtioPmemResources);
2696             }
2697 
2698             region_range
2699         } else {
2700             None
2701         };
2702 
2703         let (custom_flags, set_len) = if pmem_cfg.file.is_dir() {
2704             if pmem_cfg.size.is_none() {
2705                 return Err(DeviceManagerError::PmemWithDirectorySizeMissing);
2706             }
2707             (O_TMPFILE, true)
2708         } else {
2709             (0, false)
2710         };
2711 
2712         let mut file = OpenOptions::new()
2713             .read(true)
2714             .write(!pmem_cfg.discard_writes)
2715             .custom_flags(custom_flags)
2716             .open(&pmem_cfg.file)
2717             .map_err(DeviceManagerError::PmemFileOpen)?;
2718 
2719         let size = if let Some(size) = pmem_cfg.size {
2720             if set_len {
2721                 file.set_len(size)
2722                     .map_err(DeviceManagerError::PmemFileSetLen)?;
2723             }
2724             size
2725         } else {
2726             file.seek(SeekFrom::End(0))
2727                 .map_err(DeviceManagerError::PmemFileSetLen)?
2728         };
2729 
2730         if size % 0x20_0000 != 0 {
2731             return Err(DeviceManagerError::PmemSizeNotAligned);
2732         }
2733 
2734         let (region_base, region_size) = if let Some((base, size)) = region_range {
2735             // The memory needs to be 2MiB aligned in order to support
2736             // hugepages.
2737             self.pci_segments[pmem_cfg.pci_segment as usize]
2738                 .allocator
2739                 .lock()
2740                 .unwrap()
2741                 .allocate(
2742                     Some(GuestAddress(base)),
2743                     size as GuestUsize,
2744                     Some(0x0020_0000),
2745                 )
2746                 .ok_or(DeviceManagerError::PmemRangeAllocation)?;
2747 
2748             (base, size)
2749         } else {
2750             // The memory needs to be 2MiB aligned in order to support
2751             // hugepages.
2752             let base = self.pci_segments[pmem_cfg.pci_segment as usize]
2753                 .allocator
2754                 .lock()
2755                 .unwrap()
2756                 .allocate(None, size as GuestUsize, Some(0x0020_0000))
2757                 .ok_or(DeviceManagerError::PmemRangeAllocation)?;
2758 
2759             (base.raw_value(), size)
2760         };
2761 
2762         let cloned_file = file.try_clone().map_err(DeviceManagerError::CloneFile)?;
2763         let mmap_region = MmapRegion::build(
2764             Some(FileOffset::new(cloned_file, 0)),
2765             region_size as usize,
2766             PROT_READ | PROT_WRITE,
2767             MAP_NORESERVE
2768                 | if pmem_cfg.discard_writes {
2769                     MAP_PRIVATE
2770                 } else {
2771                     MAP_SHARED
2772                 },
2773         )
2774         .map_err(DeviceManagerError::NewMmapRegion)?;
2775         let host_addr: u64 = mmap_region.as_ptr() as u64;
2776 
2777         let mem_slot = self
2778             .memory_manager
2779             .lock()
2780             .unwrap()
2781             .create_userspace_mapping(region_base, region_size, host_addr, false, false, false)
2782             .map_err(DeviceManagerError::MemoryManager)?;
2783 
2784         let mapping = virtio_devices::UserspaceMapping {
2785             host_addr,
2786             mem_slot,
2787             addr: GuestAddress(region_base),
2788             len: region_size,
2789             mergeable: false,
2790         };
2791 
2792         let virtio_pmem_device = Arc::new(Mutex::new(
2793             virtio_devices::Pmem::new(
2794                 id.clone(),
2795                 file,
2796                 GuestAddress(region_base),
2797                 mapping,
2798                 mmap_region,
2799                 self.force_iommu | pmem_cfg.iommu,
2800                 self.seccomp_action.clone(),
2801                 self.exit_evt
2802                     .try_clone()
2803                     .map_err(DeviceManagerError::EventFd)?,
2804                 versioned_state_from_id(self.snapshot.as_ref(), id.as_str())
2805                     .map_err(DeviceManagerError::RestoreGetState)?,
2806             )
2807             .map_err(DeviceManagerError::CreateVirtioPmem)?,
2808         ));
2809 
2810         // Update the device tree with correct resource information and with
2811         // the migratable device.
2812         node.resources.push(Resource::MmioAddressRange {
2813             base: region_base,
2814             size: region_size,
2815         });
2816         node.migratable = Some(Arc::clone(&virtio_pmem_device) as Arc<Mutex<dyn Migratable>>);
2817         self.device_tree.lock().unwrap().insert(id.clone(), node);
2818 
2819         Ok(MetaVirtioDevice {
2820             virtio_device: Arc::clone(&virtio_pmem_device)
2821                 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2822             iommu: pmem_cfg.iommu,
2823             id,
2824             pci_segment: pmem_cfg.pci_segment,
2825             dma_handler: None,
2826         })
2827     }
2828 
2829     fn make_virtio_pmem_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
2830         let mut devices = Vec::new();
2831         // Add virtio-pmem if required
2832         let mut pmem_devices = self.config.lock().unwrap().pmem.clone();
2833         if let Some(pmem_list_cfg) = &mut pmem_devices {
2834             for pmem_cfg in pmem_list_cfg.iter_mut() {
2835                 devices.push(self.make_virtio_pmem_device(pmem_cfg)?);
2836             }
2837         }
2838         self.config.lock().unwrap().pmem = pmem_devices;
2839 
2840         Ok(devices)
2841     }
2842 
2843     fn make_virtio_vsock_device(
2844         &mut self,
2845         vsock_cfg: &mut VsockConfig,
2846     ) -> DeviceManagerResult<MetaVirtioDevice> {
2847         let id = if let Some(id) = &vsock_cfg.id {
2848             id.clone()
2849         } else {
2850             let id = self.next_device_name(VSOCK_DEVICE_NAME_PREFIX)?;
2851             vsock_cfg.id = Some(id.clone());
2852             id
2853         };
2854 
2855         info!("Creating virtio-vsock device: {:?}", vsock_cfg);
2856 
2857         let socket_path = vsock_cfg
2858             .socket
2859             .to_str()
2860             .ok_or(DeviceManagerError::CreateVsockConvertPath)?;
2861         let backend =
2862             virtio_devices::vsock::VsockUnixBackend::new(vsock_cfg.cid, socket_path.to_string())
2863                 .map_err(DeviceManagerError::CreateVsockBackend)?;
2864 
2865         let vsock_device = Arc::new(Mutex::new(
2866             virtio_devices::Vsock::new(
2867                 id.clone(),
2868                 vsock_cfg.cid,
2869                 vsock_cfg.socket.clone(),
2870                 backend,
2871                 self.force_iommu | vsock_cfg.iommu,
2872                 self.seccomp_action.clone(),
2873                 self.exit_evt
2874                     .try_clone()
2875                     .map_err(DeviceManagerError::EventFd)?,
2876                 versioned_state_from_id(self.snapshot.as_ref(), id.as_str())
2877                     .map_err(DeviceManagerError::RestoreGetState)?,
2878             )
2879             .map_err(DeviceManagerError::CreateVirtioVsock)?,
2880         ));
2881 
2882         // Fill the device tree with a new node. In case of restore, we
2883         // know there is nothing to do, so we can simply override the
2884         // existing entry.
2885         self.device_tree
2886             .lock()
2887             .unwrap()
2888             .insert(id.clone(), device_node!(id, vsock_device));
2889 
2890         Ok(MetaVirtioDevice {
2891             virtio_device: Arc::clone(&vsock_device)
2892                 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2893             iommu: vsock_cfg.iommu,
2894             id,
2895             pci_segment: vsock_cfg.pci_segment,
2896             dma_handler: None,
2897         })
2898     }
2899 
2900     fn make_virtio_vsock_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
2901         let mut devices = Vec::new();
2902 
2903         let mut vsock = self.config.lock().unwrap().vsock.clone();
2904         if let Some(ref mut vsock_cfg) = &mut vsock {
2905             devices.push(self.make_virtio_vsock_device(vsock_cfg)?);
2906         }
2907         self.config.lock().unwrap().vsock = vsock;
2908 
2909         Ok(devices)
2910     }
2911 
2912     fn make_virtio_mem_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
2913         let mut devices = Vec::new();
2914 
2915         let mm = self.memory_manager.clone();
2916         let mut mm = mm.lock().unwrap();
2917         for (memory_zone_id, memory_zone) in mm.memory_zones_mut().iter_mut() {
2918             if let Some(virtio_mem_zone) = memory_zone.virtio_mem_zone_mut() {
2919                 info!("Creating virtio-mem device: id = {}", memory_zone_id);
2920 
2921                 let node_id = numa_node_id_from_memory_zone_id(&self.numa_nodes, memory_zone_id)
2922                     .map(|i| i as u16);
2923 
2924                 let virtio_mem_device = Arc::new(Mutex::new(
2925                     virtio_devices::Mem::new(
2926                         memory_zone_id.clone(),
2927                         virtio_mem_zone.region(),
2928                         self.seccomp_action.clone(),
2929                         node_id,
2930                         virtio_mem_zone.hotplugged_size(),
2931                         virtio_mem_zone.hugepages(),
2932                         self.exit_evt
2933                             .try_clone()
2934                             .map_err(DeviceManagerError::EventFd)?,
2935                         virtio_mem_zone.blocks_state().clone(),
2936                         versioned_state_from_id(self.snapshot.as_ref(), memory_zone_id.as_str())
2937                             .map_err(DeviceManagerError::RestoreGetState)?,
2938                     )
2939                     .map_err(DeviceManagerError::CreateVirtioMem)?,
2940                 ));
2941 
2942                 // Update the virtio-mem zone so that it has a handle onto the
2943                 // virtio-mem device, which will be used for triggering a resize
2944                 // if needed.
2945                 virtio_mem_zone.set_virtio_device(Arc::clone(&virtio_mem_device));
2946 
2947                 self.virtio_mem_devices.push(Arc::clone(&virtio_mem_device));
2948 
2949                 devices.push(MetaVirtioDevice {
2950                     virtio_device: Arc::clone(&virtio_mem_device)
2951                         as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2952                     iommu: false,
2953                     id: memory_zone_id.clone(),
2954                     pci_segment: 0,
2955                     dma_handler: None,
2956                 });
2957 
2958                 // Fill the device tree with a new node. In case of restore, we
2959                 // know there is nothing to do, so we can simply override the
2960                 // existing entry.
2961                 self.device_tree.lock().unwrap().insert(
2962                     memory_zone_id.clone(),
2963                     device_node!(memory_zone_id, virtio_mem_device),
2964                 );
2965             }
2966         }
2967 
2968         Ok(devices)
2969     }
2970 
2971     fn make_virtio_balloon_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
2972         let mut devices = Vec::new();
2973 
2974         if let Some(balloon_config) = &self.config.lock().unwrap().balloon {
2975             let id = String::from(BALLOON_DEVICE_NAME);
2976             info!("Creating virtio-balloon device: id = {}", id);
2977 
2978             let virtio_balloon_device = Arc::new(Mutex::new(
2979                 virtio_devices::Balloon::new(
2980                     id.clone(),
2981                     balloon_config.size,
2982                     balloon_config.deflate_on_oom,
2983                     balloon_config.free_page_reporting,
2984                     self.seccomp_action.clone(),
2985                     self.exit_evt
2986                         .try_clone()
2987                         .map_err(DeviceManagerError::EventFd)?,
2988                     versioned_state_from_id(self.snapshot.as_ref(), id.as_str())
2989                         .map_err(DeviceManagerError::RestoreGetState)?,
2990                 )
2991                 .map_err(DeviceManagerError::CreateVirtioBalloon)?,
2992             ));
2993 
2994             self.balloon = Some(virtio_balloon_device.clone());
2995 
2996             devices.push(MetaVirtioDevice {
2997                 virtio_device: Arc::clone(&virtio_balloon_device)
2998                     as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2999                 iommu: false,
3000                 id: id.clone(),
3001                 pci_segment: 0,
3002                 dma_handler: None,
3003             });
3004 
3005             self.device_tree
3006                 .lock()
3007                 .unwrap()
3008                 .insert(id.clone(), device_node!(id, virtio_balloon_device));
3009         }
3010 
3011         Ok(devices)
3012     }
3013 
3014     fn make_virtio_watchdog_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
3015         let mut devices = Vec::new();
3016 
3017         if !self.config.lock().unwrap().watchdog {
3018             return Ok(devices);
3019         }
3020 
3021         let id = String::from(WATCHDOG_DEVICE_NAME);
3022         info!("Creating virtio-watchdog device: id = {}", id);
3023 
3024         let virtio_watchdog_device = Arc::new(Mutex::new(
3025             virtio_devices::Watchdog::new(
3026                 id.clone(),
3027                 self.reset_evt.try_clone().unwrap(),
3028                 self.seccomp_action.clone(),
3029                 self.exit_evt
3030                     .try_clone()
3031                     .map_err(DeviceManagerError::EventFd)?,
3032                 versioned_state_from_id(self.snapshot.as_ref(), id.as_str())
3033                     .map_err(DeviceManagerError::RestoreGetState)?,
3034             )
3035             .map_err(DeviceManagerError::CreateVirtioWatchdog)?,
3036         ));
3037         devices.push(MetaVirtioDevice {
3038             virtio_device: Arc::clone(&virtio_watchdog_device)
3039                 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
3040             iommu: false,
3041             id: id.clone(),
3042             pci_segment: 0,
3043             dma_handler: None,
3044         });
3045 
3046         self.device_tree
3047             .lock()
3048             .unwrap()
3049             .insert(id.clone(), device_node!(id, virtio_watchdog_device));
3050 
3051         Ok(devices)
3052     }
3053 
3054     fn make_vdpa_device(
3055         &mut self,
3056         vdpa_cfg: &mut VdpaConfig,
3057     ) -> DeviceManagerResult<MetaVirtioDevice> {
3058         let id = if let Some(id) = &vdpa_cfg.id {
3059             id.clone()
3060         } else {
3061             let id = self.next_device_name(VDPA_DEVICE_NAME_PREFIX)?;
3062             vdpa_cfg.id = Some(id.clone());
3063             id
3064         };
3065 
3066         info!("Creating vDPA device: {:?}", vdpa_cfg);
3067 
3068         let device_path = vdpa_cfg
3069             .path
3070             .to_str()
3071             .ok_or(DeviceManagerError::CreateVdpaConvertPath)?;
3072 
3073         let vdpa_device = Arc::new(Mutex::new(
3074             virtio_devices::Vdpa::new(
3075                 id.clone(),
3076                 device_path,
3077                 self.memory_manager.lock().unwrap().guest_memory(),
3078                 vdpa_cfg.num_queues as u16,
3079                 versioned_state_from_id(self.snapshot.as_ref(), id.as_str())
3080                     .map_err(DeviceManagerError::RestoreGetState)?,
3081             )
3082             .map_err(DeviceManagerError::CreateVdpa)?,
3083         ));
3084 
3085         // Create the DMA handler that is required by the vDPA device
3086         let vdpa_mapping = Arc::new(VdpaDmaMapping::new(
3087             Arc::clone(&vdpa_device),
3088             Arc::new(self.memory_manager.lock().unwrap().guest_memory()),
3089         ));
3090 
3091         self.device_tree
3092             .lock()
3093             .unwrap()
3094             .insert(id.clone(), device_node!(id, vdpa_device));
3095 
3096         Ok(MetaVirtioDevice {
3097             virtio_device: vdpa_device as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
3098             iommu: vdpa_cfg.iommu,
3099             id,
3100             pci_segment: vdpa_cfg.pci_segment,
3101             dma_handler: Some(vdpa_mapping),
3102         })
3103     }
3104 
3105     fn make_vdpa_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
3106         let mut devices = Vec::new();
3107         // Add vdpa if required
3108         let mut vdpa_devices = self.config.lock().unwrap().vdpa.clone();
3109         if let Some(vdpa_list_cfg) = &mut vdpa_devices {
3110             for vdpa_cfg in vdpa_list_cfg.iter_mut() {
3111                 devices.push(self.make_vdpa_device(vdpa_cfg)?);
3112             }
3113         }
3114         self.config.lock().unwrap().vdpa = vdpa_devices;
3115 
3116         Ok(devices)
3117     }
3118 
3119     fn next_device_name(&mut self, prefix: &str) -> DeviceManagerResult<String> {
3120         let start_id = self.device_id_cnt;
3121         loop {
3122             // Generate the temporary name.
3123             let name = format!("{}{}", prefix, self.device_id_cnt);
3124             // Increment the counter.
3125             self.device_id_cnt += Wrapping(1);
3126             // Check if the name is already in use.
3127             if !self.boot_id_list.contains(&name)
3128                 && !self.device_tree.lock().unwrap().contains_key(&name)
3129             {
3130                 return Ok(name);
3131             }
3132 
3133             if self.device_id_cnt == start_id {
3134                 // We went through a full loop and there's nothing else we can
3135                 // do.
3136                 break;
3137             }
3138         }
3139         Err(DeviceManagerError::NoAvailableDeviceName)
3140     }
3141 
3142     fn add_passthrough_device(
3143         &mut self,
3144         device_cfg: &mut DeviceConfig,
3145     ) -> DeviceManagerResult<(PciBdf, String)> {
3146         // If the passthrough device has not been created yet, it is created
3147         // here and stored in the DeviceManager structure for future needs.
3148         if self.passthrough_device.is_none() {
3149             self.passthrough_device = Some(
3150                 self.address_manager
3151                     .vm
3152                     .create_passthrough_device()
3153                     .map_err(|e| DeviceManagerError::CreatePassthroughDevice(e.into()))?,
3154             );
3155         }
3156 
3157         self.add_vfio_device(device_cfg)
3158     }
3159 
3160     fn create_vfio_container(&self) -> DeviceManagerResult<Arc<VfioContainer>> {
3161         let passthrough_device = self
3162             .passthrough_device
3163             .as_ref()
3164             .ok_or(DeviceManagerError::NoDevicePassthroughSupport)?;
3165 
3166         let dup = passthrough_device
3167             .try_clone()
3168             .map_err(DeviceManagerError::VfioCreate)?;
3169 
3170         Ok(Arc::new(
3171             VfioContainer::new(Some(Arc::new(dup))).map_err(DeviceManagerError::VfioCreate)?,
3172         ))
3173     }
3174 
3175     fn add_vfio_device(
3176         &mut self,
3177         device_cfg: &mut DeviceConfig,
3178     ) -> DeviceManagerResult<(PciBdf, String)> {
3179         let vfio_name = if let Some(id) = &device_cfg.id {
3180             id.clone()
3181         } else {
3182             let id = self.next_device_name(VFIO_DEVICE_NAME_PREFIX)?;
3183             device_cfg.id = Some(id.clone());
3184             id
3185         };
3186 
3187         let (pci_segment_id, pci_device_bdf, resources) =
3188             self.pci_resources(&vfio_name, device_cfg.pci_segment)?;
3189 
3190         let mut needs_dma_mapping = false;
3191 
3192         // Here we create a new VFIO container for two reasons. Either this is
3193         // the first VFIO device, meaning we need a new VFIO container, which
3194         // will be shared with other VFIO devices. Or the new VFIO device is
3195         // attached to a vIOMMU, meaning we must create a dedicated VFIO
3196         // container. In the vIOMMU use case, we can't let all devices under
3197         // the same VFIO container since we couldn't map/unmap memory for each
3198         // device. That's simply because the map/unmap operations happen at the
3199         // VFIO container level.
3200         let vfio_container = if device_cfg.iommu {
3201             let vfio_container = self.create_vfio_container()?;
3202 
3203             let vfio_mapping = Arc::new(VfioDmaMapping::new(
3204                 Arc::clone(&vfio_container),
3205                 Arc::new(self.memory_manager.lock().unwrap().guest_memory()),
3206             ));
3207 
3208             if let Some(iommu) = &self.iommu_device {
3209                 iommu
3210                     .lock()
3211                     .unwrap()
3212                     .add_external_mapping(pci_device_bdf.into(), vfio_mapping);
3213             } else {
3214                 return Err(DeviceManagerError::MissingVirtualIommu);
3215             }
3216 
3217             vfio_container
3218         } else if let Some(vfio_container) = &self.vfio_container {
3219             Arc::clone(vfio_container)
3220         } else {
3221             let vfio_container = self.create_vfio_container()?;
3222             needs_dma_mapping = true;
3223             self.vfio_container = Some(Arc::clone(&vfio_container));
3224 
3225             vfio_container
3226         };
3227 
3228         let vfio_device = VfioDevice::new(&device_cfg.path, Arc::clone(&vfio_container))
3229             .map_err(DeviceManagerError::VfioCreate)?;
3230 
3231         if needs_dma_mapping {
3232             // Register DMA mapping in IOMMU.
3233             // Do not register virtio-mem regions, as they are handled directly by
3234             // virtio-mem device itself.
3235             for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() {
3236                 for region in zone.regions() {
3237                     vfio_container
3238                         .vfio_dma_map(
3239                             region.start_addr().raw_value(),
3240                             region.len(),
3241                             region.as_ptr() as u64,
3242                         )
3243                         .map_err(DeviceManagerError::VfioDmaMap)?;
3244                 }
3245             }
3246 
3247             let vfio_mapping = Arc::new(VfioDmaMapping::new(
3248                 Arc::clone(&vfio_container),
3249                 Arc::new(self.memory_manager.lock().unwrap().guest_memory()),
3250             ));
3251 
3252             for virtio_mem_device in self.virtio_mem_devices.iter() {
3253                 virtio_mem_device
3254                     .lock()
3255                     .unwrap()
3256                     .add_dma_mapping_handler(
3257                         VirtioMemMappingSource::Container,
3258                         vfio_mapping.clone(),
3259                     )
3260                     .map_err(DeviceManagerError::AddDmaMappingHandlerVirtioMem)?;
3261             }
3262         }
3263 
3264         let legacy_interrupt_group =
3265             if let Some(legacy_interrupt_manager) = &self.legacy_interrupt_manager {
3266                 Some(
3267                     legacy_interrupt_manager
3268                         .create_group(LegacyIrqGroupConfig {
3269                             irq: self.pci_segments[pci_segment_id as usize].pci_irq_slots
3270                                 [pci_device_bdf.device() as usize]
3271                                 as InterruptIndex,
3272                         })
3273                         .map_err(DeviceManagerError::CreateInterruptGroup)?,
3274                 )
3275             } else {
3276                 None
3277             };
3278 
3279         let memory_manager = self.memory_manager.clone();
3280 
3281         let vfio_pci_device = VfioPciDevice::new(
3282             vfio_name.clone(),
3283             &self.address_manager.vm,
3284             vfio_device,
3285             vfio_container,
3286             self.msi_interrupt_manager.clone(),
3287             legacy_interrupt_group,
3288             device_cfg.iommu,
3289             pci_device_bdf,
3290             Arc::new(move || memory_manager.lock().unwrap().allocate_memory_slot()),
3291             vm_migration::snapshot_from_id(self.snapshot.as_ref(), vfio_name.as_str()),
3292         )
3293         .map_err(DeviceManagerError::VfioPciCreate)?;
3294 
3295         let vfio_pci_device = Arc::new(Mutex::new(vfio_pci_device));
3296 
3297         let new_resources = self.add_pci_device(
3298             vfio_pci_device.clone(),
3299             vfio_pci_device.clone(),
3300             pci_segment_id,
3301             pci_device_bdf,
3302             resources,
3303         )?;
3304 
3305         vfio_pci_device
3306             .lock()
3307             .unwrap()
3308             .map_mmio_regions()
3309             .map_err(DeviceManagerError::VfioMapRegion)?;
3310 
3311         let mut node = device_node!(vfio_name, vfio_pci_device);
3312 
3313         // Update the device tree with correct resource information.
3314         node.resources = new_resources;
3315         node.pci_bdf = Some(pci_device_bdf);
3316         node.pci_device_handle = Some(PciDeviceHandle::Vfio(vfio_pci_device));
3317 
3318         self.device_tree
3319             .lock()
3320             .unwrap()
3321             .insert(vfio_name.clone(), node);
3322 
3323         Ok((pci_device_bdf, vfio_name))
3324     }
3325 
3326     fn add_pci_device(
3327         &mut self,
3328         bus_device: Arc<Mutex<dyn BusDevice>>,
3329         pci_device: Arc<Mutex<dyn PciDevice>>,
3330         segment_id: u16,
3331         bdf: PciBdf,
3332         resources: Option<Vec<Resource>>,
3333     ) -> DeviceManagerResult<Vec<Resource>> {
3334         let bars = pci_device
3335             .lock()
3336             .unwrap()
3337             .allocate_bars(
3338                 &self.address_manager.allocator,
3339                 &mut self.pci_segments[segment_id as usize]
3340                     .allocator
3341                     .lock()
3342                     .unwrap(),
3343                 resources,
3344             )
3345             .map_err(DeviceManagerError::AllocateBars)?;
3346 
3347         let mut pci_bus = self.pci_segments[segment_id as usize]
3348             .pci_bus
3349             .lock()
3350             .unwrap();
3351 
3352         pci_bus
3353             .add_device(bdf.device() as u32, pci_device)
3354             .map_err(DeviceManagerError::AddPciDevice)?;
3355 
3356         self.bus_devices.push(Arc::clone(&bus_device));
3357 
3358         pci_bus
3359             .register_mapping(
3360                 bus_device,
3361                 #[cfg(target_arch = "x86_64")]
3362                 self.address_manager.io_bus.as_ref(),
3363                 self.address_manager.mmio_bus.as_ref(),
3364                 bars.clone(),
3365             )
3366             .map_err(DeviceManagerError::AddPciDevice)?;
3367 
3368         let mut new_resources = Vec::new();
3369         for bar in bars {
3370             new_resources.push(Resource::PciBar {
3371                 index: bar.idx(),
3372                 base: bar.addr(),
3373                 size: bar.size(),
3374                 type_: bar.region_type().into(),
3375                 prefetchable: bar.prefetchable().into(),
3376             });
3377         }
3378 
3379         Ok(new_resources)
3380     }
3381 
3382     fn add_vfio_devices(&mut self) -> DeviceManagerResult<Vec<PciBdf>> {
3383         let mut iommu_attached_device_ids = Vec::new();
3384         let mut devices = self.config.lock().unwrap().devices.clone();
3385 
3386         if let Some(device_list_cfg) = &mut devices {
3387             for device_cfg in device_list_cfg.iter_mut() {
3388                 let (device_id, _) = self.add_passthrough_device(device_cfg)?;
3389                 if device_cfg.iommu && self.iommu_device.is_some() {
3390                     iommu_attached_device_ids.push(device_id);
3391                 }
3392             }
3393         }
3394 
3395         // Update the list of devices
3396         self.config.lock().unwrap().devices = devices;
3397 
3398         Ok(iommu_attached_device_ids)
3399     }
3400 
3401     fn add_vfio_user_device(
3402         &mut self,
3403         device_cfg: &mut UserDeviceConfig,
3404     ) -> DeviceManagerResult<(PciBdf, String)> {
3405         let vfio_user_name = if let Some(id) = &device_cfg.id {
3406             id.clone()
3407         } else {
3408             let id = self.next_device_name(VFIO_USER_DEVICE_NAME_PREFIX)?;
3409             device_cfg.id = Some(id.clone());
3410             id
3411         };
3412 
3413         let (pci_segment_id, pci_device_bdf, resources) =
3414             self.pci_resources(&vfio_user_name, device_cfg.pci_segment)?;
3415 
3416         let legacy_interrupt_group =
3417             if let Some(legacy_interrupt_manager) = &self.legacy_interrupt_manager {
3418                 Some(
3419                     legacy_interrupt_manager
3420                         .create_group(LegacyIrqGroupConfig {
3421                             irq: self.pci_segments[pci_segment_id as usize].pci_irq_slots
3422                                 [pci_device_bdf.device() as usize]
3423                                 as InterruptIndex,
3424                         })
3425                         .map_err(DeviceManagerError::CreateInterruptGroup)?,
3426                 )
3427             } else {
3428                 None
3429             };
3430 
3431         let client = Arc::new(Mutex::new(
3432             vfio_user::Client::new(&device_cfg.socket)
3433                 .map_err(DeviceManagerError::VfioUserCreateClient)?,
3434         ));
3435 
3436         let memory_manager = self.memory_manager.clone();
3437 
3438         let mut vfio_user_pci_device = VfioUserPciDevice::new(
3439             vfio_user_name.clone(),
3440             &self.address_manager.vm,
3441             client.clone(),
3442             self.msi_interrupt_manager.clone(),
3443             legacy_interrupt_group,
3444             pci_device_bdf,
3445             Arc::new(move || memory_manager.lock().unwrap().allocate_memory_slot()),
3446             vm_migration::snapshot_from_id(self.snapshot.as_ref(), vfio_user_name.as_str()),
3447         )
3448         .map_err(DeviceManagerError::VfioUserCreate)?;
3449 
3450         let memory = self.memory_manager.lock().unwrap().guest_memory();
3451         let vfio_user_mapping = Arc::new(VfioUserDmaMapping::new(client, Arc::new(memory)));
3452         for virtio_mem_device in self.virtio_mem_devices.iter() {
3453             virtio_mem_device
3454                 .lock()
3455                 .unwrap()
3456                 .add_dma_mapping_handler(
3457                     VirtioMemMappingSource::Device(pci_device_bdf.into()),
3458                     vfio_user_mapping.clone(),
3459                 )
3460                 .map_err(DeviceManagerError::AddDmaMappingHandlerVirtioMem)?;
3461         }
3462 
3463         for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() {
3464             for region in zone.regions() {
3465                 vfio_user_pci_device
3466                     .dma_map(region)
3467                     .map_err(DeviceManagerError::VfioUserDmaMap)?;
3468             }
3469         }
3470 
3471         let vfio_user_pci_device = Arc::new(Mutex::new(vfio_user_pci_device));
3472 
3473         let new_resources = self.add_pci_device(
3474             vfio_user_pci_device.clone(),
3475             vfio_user_pci_device.clone(),
3476             pci_segment_id,
3477             pci_device_bdf,
3478             resources,
3479         )?;
3480 
3481         // Note it is required to call 'add_pci_device()' in advance to have the list of
3482         // mmio regions provisioned correctly
3483         vfio_user_pci_device
3484             .lock()
3485             .unwrap()
3486             .map_mmio_regions()
3487             .map_err(DeviceManagerError::VfioUserMapRegion)?;
3488 
3489         let mut node = device_node!(vfio_user_name, vfio_user_pci_device);
3490 
3491         // Update the device tree with correct resource information.
3492         node.resources = new_resources;
3493         node.pci_bdf = Some(pci_device_bdf);
3494         node.pci_device_handle = Some(PciDeviceHandle::VfioUser(vfio_user_pci_device));
3495 
3496         self.device_tree
3497             .lock()
3498             .unwrap()
3499             .insert(vfio_user_name.clone(), node);
3500 
3501         Ok((pci_device_bdf, vfio_user_name))
3502     }
3503 
3504     fn add_user_devices(&mut self) -> DeviceManagerResult<Vec<PciBdf>> {
3505         let mut user_devices = self.config.lock().unwrap().user_devices.clone();
3506 
3507         if let Some(device_list_cfg) = &mut user_devices {
3508             for device_cfg in device_list_cfg.iter_mut() {
3509                 let (_device_id, _id) = self.add_vfio_user_device(device_cfg)?;
3510             }
3511         }
3512 
3513         // Update the list of devices
3514         self.config.lock().unwrap().user_devices = user_devices;
3515 
3516         Ok(vec![])
3517     }
3518 
3519     fn add_virtio_pci_device(
3520         &mut self,
3521         virtio_device: Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
3522         iommu_mapping: &Option<Arc<IommuMapping>>,
3523         virtio_device_id: String,
3524         pci_segment_id: u16,
3525         dma_handler: Option<Arc<dyn ExternalDmaMapping>>,
3526     ) -> DeviceManagerResult<PciBdf> {
3527         let id = format!("{VIRTIO_PCI_DEVICE_NAME_PREFIX}-{virtio_device_id}");
3528 
3529         // Add the new virtio-pci node to the device tree.
3530         let mut node = device_node!(id);
3531         node.children = vec![virtio_device_id.clone()];
3532 
3533         let (pci_segment_id, pci_device_bdf, resources) =
3534             self.pci_resources(&id, pci_segment_id)?;
3535 
3536         // Update the existing virtio node by setting the parent.
3537         if let Some(node) = self.device_tree.lock().unwrap().get_mut(&virtio_device_id) {
3538             node.parent = Some(id.clone());
3539         } else {
3540             return Err(DeviceManagerError::MissingNode);
3541         }
3542 
3543         // Allows support for one MSI-X vector per queue. It also adds 1
3544         // as we need to take into account the dedicated vector to notify
3545         // about a virtio config change.
3546         let msix_num = (virtio_device.lock().unwrap().queue_max_sizes().len() + 1) as u16;
3547 
3548         // Create the AccessPlatform trait from the implementation IommuMapping.
3549         // This will provide address translation for any virtio device sitting
3550         // behind a vIOMMU.
3551         let access_platform: Option<Arc<dyn AccessPlatform>> = if let Some(mapping) = iommu_mapping
3552         {
3553             Some(Arc::new(AccessPlatformMapping::new(
3554                 pci_device_bdf.into(),
3555                 mapping.clone(),
3556             )))
3557         } else {
3558             None
3559         };
3560 
3561         let memory = self.memory_manager.lock().unwrap().guest_memory();
3562 
3563         // Map DMA ranges if a DMA handler is available and if the device is
3564         // not attached to a virtual IOMMU.
3565         if let Some(dma_handler) = &dma_handler {
3566             if iommu_mapping.is_some() {
3567                 if let Some(iommu) = &self.iommu_device {
3568                     iommu
3569                         .lock()
3570                         .unwrap()
3571                         .add_external_mapping(pci_device_bdf.into(), dma_handler.clone());
3572                 } else {
3573                     return Err(DeviceManagerError::MissingVirtualIommu);
3574                 }
3575             } else {
3576                 // Let every virtio-mem device handle the DMA map/unmap through the
3577                 // DMA handler provided.
3578                 for virtio_mem_device in self.virtio_mem_devices.iter() {
3579                     virtio_mem_device
3580                         .lock()
3581                         .unwrap()
3582                         .add_dma_mapping_handler(
3583                             VirtioMemMappingSource::Device(pci_device_bdf.into()),
3584                             dma_handler.clone(),
3585                         )
3586                         .map_err(DeviceManagerError::AddDmaMappingHandlerVirtioMem)?;
3587                 }
3588 
3589                 // Do not register virtio-mem regions, as they are handled directly by
3590                 // virtio-mem devices.
3591                 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() {
3592                     for region in zone.regions() {
3593                         let gpa = region.start_addr().0;
3594                         let size = region.len();
3595                         dma_handler
3596                             .map(gpa, gpa, size)
3597                             .map_err(DeviceManagerError::VirtioDmaMap)?;
3598                     }
3599                 }
3600             }
3601         }
3602 
3603         let device_type = virtio_device.lock().unwrap().device_type();
3604         let virtio_pci_device = Arc::new(Mutex::new(
3605             VirtioPciDevice::new(
3606                 id.clone(),
3607                 memory,
3608                 virtio_device,
3609                 msix_num,
3610                 access_platform,
3611                 &self.msi_interrupt_manager,
3612                 pci_device_bdf.into(),
3613                 self.activate_evt
3614                     .try_clone()
3615                     .map_err(DeviceManagerError::EventFd)?,
3616                 // All device types *except* virtio block devices should be allocated a 64-bit bar
3617                 // The block devices should be given a 32-bit BAR so that they are easily accessible
3618                 // to firmware without requiring excessive identity mapping.
3619                 // The exception being if not on the default PCI segment.
3620                 pci_segment_id > 0 || device_type != VirtioDeviceType::Block as u32,
3621                 dma_handler,
3622                 self.pending_activations.clone(),
3623                 vm_migration::snapshot_from_id(self.snapshot.as_ref(), id.as_str()),
3624             )
3625             .map_err(DeviceManagerError::VirtioDevice)?,
3626         ));
3627 
3628         let new_resources = self.add_pci_device(
3629             virtio_pci_device.clone(),
3630             virtio_pci_device.clone(),
3631             pci_segment_id,
3632             pci_device_bdf,
3633             resources,
3634         )?;
3635 
3636         let bar_addr = virtio_pci_device.lock().unwrap().config_bar_addr();
3637         for (event, addr) in virtio_pci_device.lock().unwrap().ioeventfds(bar_addr) {
3638             let io_addr = IoEventAddress::Mmio(addr);
3639             self.address_manager
3640                 .vm
3641                 .register_ioevent(event, &io_addr, None)
3642                 .map_err(|e| DeviceManagerError::RegisterIoevent(e.into()))?;
3643         }
3644 
3645         // Update the device tree with correct resource information.
3646         node.resources = new_resources;
3647         node.migratable = Some(Arc::clone(&virtio_pci_device) as Arc<Mutex<dyn Migratable>>);
3648         node.pci_bdf = Some(pci_device_bdf);
3649         node.pci_device_handle = Some(PciDeviceHandle::Virtio(virtio_pci_device));
3650         self.device_tree.lock().unwrap().insert(id, node);
3651 
3652         Ok(pci_device_bdf)
3653     }
3654 
3655     fn add_pvpanic_device(
3656         &mut self,
3657     ) -> DeviceManagerResult<Option<Arc<Mutex<devices::PvPanicDevice>>>> {
3658         let id = String::from(PVPANIC_DEVICE_NAME);
3659         let pci_segment_id = 0x0_u16;
3660 
3661         info!("Creating pvpanic device {}", id);
3662 
3663         let (pci_segment_id, pci_device_bdf, resources) =
3664             self.pci_resources(&id, pci_segment_id)?;
3665 
3666         let snapshot = snapshot_from_id(self.snapshot.as_ref(), id.as_str());
3667 
3668         let pvpanic_device = devices::PvPanicDevice::new(id.clone(), snapshot)
3669             .map_err(DeviceManagerError::PvPanicCreate)?;
3670 
3671         let pvpanic_device = Arc::new(Mutex::new(pvpanic_device));
3672 
3673         let new_resources = self.add_pci_device(
3674             pvpanic_device.clone(),
3675             pvpanic_device.clone(),
3676             pci_segment_id,
3677             pci_device_bdf,
3678             resources,
3679         )?;
3680 
3681         let mut node = device_node!(id, pvpanic_device);
3682 
3683         node.resources = new_resources;
3684         node.pci_bdf = Some(pci_device_bdf);
3685         node.pci_device_handle = None;
3686 
3687         self.device_tree.lock().unwrap().insert(id, node);
3688 
3689         Ok(Some(pvpanic_device))
3690     }
3691 
3692     fn pci_resources(
3693         &self,
3694         id: &str,
3695         pci_segment_id: u16,
3696     ) -> DeviceManagerResult<(u16, PciBdf, Option<Vec<Resource>>)> {
3697         // Look for the id in the device tree. If it can be found, that means
3698         // the device is being restored, otherwise it's created from scratch.
3699         Ok(
3700             if let Some(node) = self.device_tree.lock().unwrap().get(id) {
3701                 info!("Restoring virtio-pci {} resources", id);
3702                 let pci_device_bdf: PciBdf = node
3703                     .pci_bdf
3704                     .ok_or(DeviceManagerError::MissingDeviceNodePciBdf)?;
3705                 let pci_segment_id = pci_device_bdf.segment();
3706 
3707                 self.pci_segments[pci_segment_id as usize]
3708                     .pci_bus
3709                     .lock()
3710                     .unwrap()
3711                     .get_device_id(pci_device_bdf.device() as usize)
3712                     .map_err(DeviceManagerError::GetPciDeviceId)?;
3713 
3714                 (pci_segment_id, pci_device_bdf, Some(node.resources.clone()))
3715             } else {
3716                 let pci_device_bdf =
3717                     self.pci_segments[pci_segment_id as usize].next_device_bdf()?;
3718 
3719                 (pci_segment_id, pci_device_bdf, None)
3720             },
3721         )
3722     }
3723 
3724     #[cfg(target_arch = "x86_64")]
3725     pub fn io_bus(&self) -> &Arc<Bus> {
3726         &self.address_manager.io_bus
3727     }
3728 
3729     pub fn mmio_bus(&self) -> &Arc<Bus> {
3730         &self.address_manager.mmio_bus
3731     }
3732 
3733     pub fn allocator(&self) -> &Arc<Mutex<SystemAllocator>> {
3734         &self.address_manager.allocator
3735     }
3736 
3737     pub fn interrupt_controller(&self) -> Option<Arc<Mutex<dyn InterruptController>>> {
3738         self.interrupt_controller
3739             .as_ref()
3740             .map(|ic| ic.clone() as Arc<Mutex<dyn InterruptController>>)
3741     }
3742 
3743     pub(crate) fn pci_segments(&self) -> &Vec<PciSegment> {
3744         &self.pci_segments
3745     }
3746 
3747     pub fn console(&self) -> &Arc<Console> {
3748         &self.console
3749     }
3750 
3751     #[cfg(target_arch = "aarch64")]
3752     pub fn cmdline_additions(&self) -> &[String] {
3753         self.cmdline_additions.as_slice()
3754     }
3755 
3756     pub fn update_memory(&self, new_region: &Arc<GuestRegionMmap>) -> DeviceManagerResult<()> {
3757         for handle in self.virtio_devices.iter() {
3758             handle
3759                 .virtio_device
3760                 .lock()
3761                 .unwrap()
3762                 .add_memory_region(new_region)
3763                 .map_err(DeviceManagerError::UpdateMemoryForVirtioDevice)?;
3764 
3765             if let Some(dma_handler) = &handle.dma_handler {
3766                 if !handle.iommu {
3767                     let gpa = new_region.start_addr().0;
3768                     let size = new_region.len();
3769                     dma_handler
3770                         .map(gpa, gpa, size)
3771                         .map_err(DeviceManagerError::VirtioDmaMap)?;
3772                 }
3773             }
3774         }
3775 
3776         // Take care of updating the memory for VFIO PCI devices.
3777         if let Some(vfio_container) = &self.vfio_container {
3778             vfio_container
3779                 .vfio_dma_map(
3780                     new_region.start_addr().raw_value(),
3781                     new_region.len(),
3782                     new_region.as_ptr() as u64,
3783                 )
3784                 .map_err(DeviceManagerError::UpdateMemoryForVfioPciDevice)?;
3785         }
3786 
3787         // Take care of updating the memory for vfio-user devices.
3788         {
3789             let device_tree = self.device_tree.lock().unwrap();
3790             for pci_device_node in device_tree.pci_devices() {
3791                 if let PciDeviceHandle::VfioUser(vfio_user_pci_device) = pci_device_node
3792                     .pci_device_handle
3793                     .as_ref()
3794                     .ok_or(DeviceManagerError::MissingPciDevice)?
3795                 {
3796                     vfio_user_pci_device
3797                         .lock()
3798                         .unwrap()
3799                         .dma_map(new_region)
3800                         .map_err(DeviceManagerError::UpdateMemoryForVfioUserPciDevice)?;
3801                 }
3802             }
3803         }
3804 
3805         Ok(())
3806     }
3807 
3808     pub fn activate_virtio_devices(&self) -> DeviceManagerResult<()> {
3809         for mut activator in self.pending_activations.lock().unwrap().drain(..) {
3810             activator
3811                 .activate()
3812                 .map_err(DeviceManagerError::VirtioActivate)?;
3813         }
3814         Ok(())
3815     }
3816 
3817     pub fn notify_hotplug(
3818         &self,
3819         _notification_type: AcpiNotificationFlags,
3820     ) -> DeviceManagerResult<()> {
3821         return self
3822             .ged_notification_device
3823             .as_ref()
3824             .unwrap()
3825             .lock()
3826             .unwrap()
3827             .notify(_notification_type)
3828             .map_err(DeviceManagerError::HotPlugNotification);
3829     }
3830 
3831     pub fn add_device(
3832         &mut self,
3833         device_cfg: &mut DeviceConfig,
3834     ) -> DeviceManagerResult<PciDeviceInfo> {
3835         self.validate_identifier(&device_cfg.id)?;
3836 
3837         if device_cfg.iommu && !self.is_iommu_segment(device_cfg.pci_segment) {
3838             return Err(DeviceManagerError::InvalidIommuHotplug);
3839         }
3840 
3841         let (bdf, device_name) = self.add_passthrough_device(device_cfg)?;
3842 
3843         // Update the PCIU bitmap
3844         self.pci_segments[device_cfg.pci_segment as usize].pci_devices_up |= 1 << bdf.device();
3845 
3846         Ok(PciDeviceInfo {
3847             id: device_name,
3848             bdf,
3849         })
3850     }
3851 
3852     pub fn add_user_device(
3853         &mut self,
3854         device_cfg: &mut UserDeviceConfig,
3855     ) -> DeviceManagerResult<PciDeviceInfo> {
3856         self.validate_identifier(&device_cfg.id)?;
3857 
3858         let (bdf, device_name) = self.add_vfio_user_device(device_cfg)?;
3859 
3860         // Update the PCIU bitmap
3861         self.pci_segments[device_cfg.pci_segment as usize].pci_devices_up |= 1 << bdf.device();
3862 
3863         Ok(PciDeviceInfo {
3864             id: device_name,
3865             bdf,
3866         })
3867     }
3868 
3869     pub fn remove_device(&mut self, id: String) -> DeviceManagerResult<()> {
3870         // The node can be directly a PCI node in case the 'id' refers to a
3871         // VFIO device or a virtio-pci one.
3872         // In case the 'id' refers to a virtio device, we must find the PCI
3873         // node by looking at the parent.
3874         let device_tree = self.device_tree.lock().unwrap();
3875         let node = device_tree
3876             .get(&id)
3877             .ok_or(DeviceManagerError::UnknownDeviceId(id))?;
3878 
3879         let pci_device_node = if node.pci_bdf.is_some() && node.pci_device_handle.is_some() {
3880             node
3881         } else {
3882             let parent = node
3883                 .parent
3884                 .as_ref()
3885                 .ok_or(DeviceManagerError::MissingNode)?;
3886             device_tree
3887                 .get(parent)
3888                 .ok_or(DeviceManagerError::MissingNode)?
3889         };
3890 
3891         let pci_device_bdf: PciBdf = pci_device_node
3892             .pci_bdf
3893             .ok_or(DeviceManagerError::MissingDeviceNodePciBdf)?;
3894         let pci_segment_id = pci_device_bdf.segment();
3895 
3896         let pci_device_handle = pci_device_node
3897             .pci_device_handle
3898             .as_ref()
3899             .ok_or(DeviceManagerError::MissingPciDevice)?;
3900         #[allow(irrefutable_let_patterns)]
3901         if let PciDeviceHandle::Virtio(virtio_pci_device) = pci_device_handle {
3902             let device_type = VirtioDeviceType::from(
3903                 virtio_pci_device
3904                     .lock()
3905                     .unwrap()
3906                     .virtio_device()
3907                     .lock()
3908                     .unwrap()
3909                     .device_type(),
3910             );
3911             match device_type {
3912                 VirtioDeviceType::Net
3913                 | VirtioDeviceType::Block
3914                 | VirtioDeviceType::Pmem
3915                 | VirtioDeviceType::Fs
3916                 | VirtioDeviceType::Vsock => {}
3917                 _ => return Err(DeviceManagerError::RemovalNotAllowed(device_type)),
3918             }
3919         }
3920 
3921         // Update the PCID bitmap
3922         self.pci_segments[pci_segment_id as usize].pci_devices_down |= 1 << pci_device_bdf.device();
3923 
3924         Ok(())
3925     }
3926 
3927     pub fn eject_device(&mut self, pci_segment_id: u16, device_id: u8) -> DeviceManagerResult<()> {
3928         info!(
3929             "Ejecting device_id = {} on segment_id={}",
3930             device_id, pci_segment_id
3931         );
3932 
3933         // Convert the device ID into the corresponding b/d/f.
3934         let pci_device_bdf = PciBdf::new(pci_segment_id, 0, device_id, 0);
3935 
3936         // Give the PCI device ID back to the PCI bus.
3937         self.pci_segments[pci_segment_id as usize]
3938             .pci_bus
3939             .lock()
3940             .unwrap()
3941             .put_device_id(device_id as usize)
3942             .map_err(DeviceManagerError::PutPciDeviceId)?;
3943 
3944         // Remove the device from the device tree along with its children.
3945         let mut device_tree = self.device_tree.lock().unwrap();
3946         let pci_device_node = device_tree
3947             .remove_node_by_pci_bdf(pci_device_bdf)
3948             .ok_or(DeviceManagerError::MissingPciDevice)?;
3949 
3950         // For VFIO and vfio-user the PCI device id is the id.
3951         // For virtio we overwrite it later as we want the id of the
3952         // underlying device.
3953         let mut id = pci_device_node.id;
3954         let pci_device_handle = pci_device_node
3955             .pci_device_handle
3956             .ok_or(DeviceManagerError::MissingPciDevice)?;
3957         if matches!(pci_device_handle, PciDeviceHandle::Virtio(_)) {
3958             // The virtio-pci device has a single child
3959             if !pci_device_node.children.is_empty() {
3960                 assert_eq!(pci_device_node.children.len(), 1);
3961                 let child_id = &pci_device_node.children[0];
3962                 id = child_id.clone();
3963             }
3964         }
3965         for child in pci_device_node.children.iter() {
3966             device_tree.remove(child);
3967         }
3968 
3969         let mut iommu_attached = false;
3970         if let Some((_, iommu_attached_devices)) = &self.iommu_attached_devices {
3971             if iommu_attached_devices.contains(&pci_device_bdf) {
3972                 iommu_attached = true;
3973             }
3974         }
3975 
3976         let (pci_device, bus_device, virtio_device, remove_dma_handler) = match pci_device_handle {
3977             // No need to remove any virtio-mem mapping here as the container outlives all devices
3978             PciDeviceHandle::Vfio(vfio_pci_device) => (
3979                 Arc::clone(&vfio_pci_device) as Arc<Mutex<dyn PciDevice>>,
3980                 Arc::clone(&vfio_pci_device) as Arc<Mutex<dyn BusDevice>>,
3981                 None as Option<Arc<Mutex<dyn virtio_devices::VirtioDevice>>>,
3982                 false,
3983             ),
3984             PciDeviceHandle::Virtio(virtio_pci_device) => {
3985                 let dev = virtio_pci_device.lock().unwrap();
3986                 let bar_addr = dev.config_bar_addr();
3987                 for (event, addr) in dev.ioeventfds(bar_addr) {
3988                     let io_addr = IoEventAddress::Mmio(addr);
3989                     self.address_manager
3990                         .vm
3991                         .unregister_ioevent(event, &io_addr)
3992                         .map_err(|e| DeviceManagerError::UnRegisterIoevent(e.into()))?;
3993                 }
3994 
3995                 if let Some(dma_handler) = dev.dma_handler() {
3996                     if !iommu_attached {
3997                         for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() {
3998                             for region in zone.regions() {
3999                                 let iova = region.start_addr().0;
4000                                 let size = region.len();
4001                                 dma_handler
4002                                     .unmap(iova, size)
4003                                     .map_err(DeviceManagerError::VirtioDmaUnmap)?;
4004                             }
4005                         }
4006                     }
4007                 }
4008 
4009                 (
4010                     Arc::clone(&virtio_pci_device) as Arc<Mutex<dyn PciDevice>>,
4011                     Arc::clone(&virtio_pci_device) as Arc<Mutex<dyn BusDevice>>,
4012                     Some(dev.virtio_device()),
4013                     dev.dma_handler().is_some() && !iommu_attached,
4014                 )
4015             }
4016             PciDeviceHandle::VfioUser(vfio_user_pci_device) => {
4017                 let mut dev = vfio_user_pci_device.lock().unwrap();
4018                 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() {
4019                     for region in zone.regions() {
4020                         dev.dma_unmap(region)
4021                             .map_err(DeviceManagerError::VfioUserDmaUnmap)?;
4022                     }
4023                 }
4024 
4025                 (
4026                     Arc::clone(&vfio_user_pci_device) as Arc<Mutex<dyn PciDevice>>,
4027                     Arc::clone(&vfio_user_pci_device) as Arc<Mutex<dyn BusDevice>>,
4028                     None as Option<Arc<Mutex<dyn virtio_devices::VirtioDevice>>>,
4029                     true,
4030                 )
4031             }
4032         };
4033 
4034         if remove_dma_handler {
4035             for virtio_mem_device in self.virtio_mem_devices.iter() {
4036                 virtio_mem_device
4037                     .lock()
4038                     .unwrap()
4039                     .remove_dma_mapping_handler(VirtioMemMappingSource::Device(
4040                         pci_device_bdf.into(),
4041                     ))
4042                     .map_err(DeviceManagerError::RemoveDmaMappingHandlerVirtioMem)?;
4043             }
4044         }
4045 
4046         // Free the allocated BARs
4047         pci_device
4048             .lock()
4049             .unwrap()
4050             .free_bars(
4051                 &mut self.address_manager.allocator.lock().unwrap(),
4052                 &mut self.pci_segments[pci_segment_id as usize]
4053                     .allocator
4054                     .lock()
4055                     .unwrap(),
4056             )
4057             .map_err(DeviceManagerError::FreePciBars)?;
4058 
4059         // Remove the device from the PCI bus
4060         self.pci_segments[pci_segment_id as usize]
4061             .pci_bus
4062             .lock()
4063             .unwrap()
4064             .remove_by_device(&pci_device)
4065             .map_err(DeviceManagerError::RemoveDeviceFromPciBus)?;
4066 
4067         #[cfg(target_arch = "x86_64")]
4068         // Remove the device from the IO bus
4069         self.io_bus()
4070             .remove_by_device(&bus_device)
4071             .map_err(DeviceManagerError::RemoveDeviceFromIoBus)?;
4072 
4073         // Remove the device from the MMIO bus
4074         self.mmio_bus()
4075             .remove_by_device(&bus_device)
4076             .map_err(DeviceManagerError::RemoveDeviceFromMmioBus)?;
4077 
4078         // Remove the device from the list of BusDevice held by the
4079         // DeviceManager.
4080         self.bus_devices
4081             .retain(|dev| !Arc::ptr_eq(dev, &bus_device));
4082 
4083         // Shutdown and remove the underlying virtio-device if present
4084         if let Some(virtio_device) = virtio_device {
4085             for mapping in virtio_device.lock().unwrap().userspace_mappings() {
4086                 self.memory_manager
4087                     .lock()
4088                     .unwrap()
4089                     .remove_userspace_mapping(
4090                         mapping.addr.raw_value(),
4091                         mapping.len,
4092                         mapping.host_addr,
4093                         mapping.mergeable,
4094                         mapping.mem_slot,
4095                     )
4096                     .map_err(DeviceManagerError::MemoryManager)?;
4097             }
4098 
4099             virtio_device.lock().unwrap().shutdown();
4100 
4101             self.virtio_devices
4102                 .retain(|handler| !Arc::ptr_eq(&handler.virtio_device, &virtio_device));
4103         }
4104 
4105         event!(
4106             "vm",
4107             "device-removed",
4108             "id",
4109             &id,
4110             "bdf",
4111             pci_device_bdf.to_string()
4112         );
4113 
4114         // At this point, the device has been removed from all the list and
4115         // buses where it was stored. At the end of this function, after
4116         // any_device, bus_device and pci_device are released, the actual
4117         // device will be dropped.
4118         Ok(())
4119     }
4120 
4121     fn hotplug_virtio_pci_device(
4122         &mut self,
4123         handle: MetaVirtioDevice,
4124     ) -> DeviceManagerResult<PciDeviceInfo> {
4125         // Add the virtio device to the device manager list. This is important
4126         // as the list is used to notify virtio devices about memory updates
4127         // for instance.
4128         self.virtio_devices.push(handle.clone());
4129 
4130         let mapping: Option<Arc<IommuMapping>> = if handle.iommu {
4131             self.iommu_mapping.clone()
4132         } else {
4133             None
4134         };
4135 
4136         let bdf = self.add_virtio_pci_device(
4137             handle.virtio_device,
4138             &mapping,
4139             handle.id.clone(),
4140             handle.pci_segment,
4141             handle.dma_handler,
4142         )?;
4143 
4144         // Update the PCIU bitmap
4145         self.pci_segments[handle.pci_segment as usize].pci_devices_up |= 1 << bdf.device();
4146 
4147         Ok(PciDeviceInfo { id: handle.id, bdf })
4148     }
4149 
4150     fn is_iommu_segment(&self, pci_segment_id: u16) -> bool {
4151         self.config
4152             .lock()
4153             .as_ref()
4154             .unwrap()
4155             .platform
4156             .as_ref()
4157             .map(|pc| {
4158                 pc.iommu_segments
4159                     .as_ref()
4160                     .map(|v| v.contains(&pci_segment_id))
4161                     .unwrap_or_default()
4162             })
4163             .unwrap_or_default()
4164     }
4165 
4166     pub fn add_disk(&mut self, disk_cfg: &mut DiskConfig) -> DeviceManagerResult<PciDeviceInfo> {
4167         self.validate_identifier(&disk_cfg.id)?;
4168 
4169         if disk_cfg.iommu && !self.is_iommu_segment(disk_cfg.pci_segment) {
4170             return Err(DeviceManagerError::InvalidIommuHotplug);
4171         }
4172 
4173         let device = self.make_virtio_block_device(disk_cfg)?;
4174         self.hotplug_virtio_pci_device(device)
4175     }
4176 
4177     pub fn add_fs(&mut self, fs_cfg: &mut FsConfig) -> DeviceManagerResult<PciDeviceInfo> {
4178         self.validate_identifier(&fs_cfg.id)?;
4179 
4180         let device = self.make_virtio_fs_device(fs_cfg)?;
4181         self.hotplug_virtio_pci_device(device)
4182     }
4183 
4184     pub fn add_pmem(&mut self, pmem_cfg: &mut PmemConfig) -> DeviceManagerResult<PciDeviceInfo> {
4185         self.validate_identifier(&pmem_cfg.id)?;
4186 
4187         if pmem_cfg.iommu && !self.is_iommu_segment(pmem_cfg.pci_segment) {
4188             return Err(DeviceManagerError::InvalidIommuHotplug);
4189         }
4190 
4191         let device = self.make_virtio_pmem_device(pmem_cfg)?;
4192         self.hotplug_virtio_pci_device(device)
4193     }
4194 
4195     pub fn add_net(&mut self, net_cfg: &mut NetConfig) -> DeviceManagerResult<PciDeviceInfo> {
4196         self.validate_identifier(&net_cfg.id)?;
4197 
4198         if net_cfg.iommu && !self.is_iommu_segment(net_cfg.pci_segment) {
4199             return Err(DeviceManagerError::InvalidIommuHotplug);
4200         }
4201 
4202         let device = self.make_virtio_net_device(net_cfg)?;
4203         self.hotplug_virtio_pci_device(device)
4204     }
4205 
4206     pub fn add_vdpa(&mut self, vdpa_cfg: &mut VdpaConfig) -> DeviceManagerResult<PciDeviceInfo> {
4207         self.validate_identifier(&vdpa_cfg.id)?;
4208 
4209         if vdpa_cfg.iommu && !self.is_iommu_segment(vdpa_cfg.pci_segment) {
4210             return Err(DeviceManagerError::InvalidIommuHotplug);
4211         }
4212 
4213         let device = self.make_vdpa_device(vdpa_cfg)?;
4214         self.hotplug_virtio_pci_device(device)
4215     }
4216 
4217     pub fn add_vsock(&mut self, vsock_cfg: &mut VsockConfig) -> DeviceManagerResult<PciDeviceInfo> {
4218         self.validate_identifier(&vsock_cfg.id)?;
4219 
4220         if vsock_cfg.iommu && !self.is_iommu_segment(vsock_cfg.pci_segment) {
4221             return Err(DeviceManagerError::InvalidIommuHotplug);
4222         }
4223 
4224         let device = self.make_virtio_vsock_device(vsock_cfg)?;
4225         self.hotplug_virtio_pci_device(device)
4226     }
4227 
4228     pub fn counters(&self) -> HashMap<String, HashMap<&'static str, Wrapping<u64>>> {
4229         let mut counters = HashMap::new();
4230 
4231         for handle in &self.virtio_devices {
4232             let virtio_device = handle.virtio_device.lock().unwrap();
4233             if let Some(device_counters) = virtio_device.counters() {
4234                 counters.insert(handle.id.clone(), device_counters.clone());
4235             }
4236         }
4237 
4238         counters
4239     }
4240 
4241     pub fn resize_balloon(&mut self, size: u64) -> DeviceManagerResult<()> {
4242         if let Some(balloon) = &self.balloon {
4243             return balloon
4244                 .lock()
4245                 .unwrap()
4246                 .resize(size)
4247                 .map_err(DeviceManagerError::VirtioBalloonResize);
4248         }
4249 
4250         warn!("No balloon setup: Can't resize the balloon");
4251         Err(DeviceManagerError::MissingVirtioBalloon)
4252     }
4253 
4254     pub fn balloon_size(&self) -> u64 {
4255         if let Some(balloon) = &self.balloon {
4256             return balloon.lock().unwrap().get_actual();
4257         }
4258 
4259         0
4260     }
4261 
4262     pub fn device_tree(&self) -> Arc<Mutex<DeviceTree>> {
4263         self.device_tree.clone()
4264     }
4265 
4266     #[cfg(target_arch = "x86_64")]
4267     pub fn notify_power_button(&self) -> DeviceManagerResult<()> {
4268         self.ged_notification_device
4269             .as_ref()
4270             .unwrap()
4271             .lock()
4272             .unwrap()
4273             .notify(AcpiNotificationFlags::POWER_BUTTON_CHANGED)
4274             .map_err(DeviceManagerError::PowerButtonNotification)
4275     }
4276 
4277     #[cfg(target_arch = "aarch64")]
4278     pub fn notify_power_button(&self) -> DeviceManagerResult<()> {
4279         // There are two use cases:
4280         // 1. Users will use direct kernel boot with device tree.
4281         // 2. Users will use ACPI+UEFI boot.
4282 
4283         // Trigger a GPIO pin 3 event to satisify use case 1.
4284         self.gpio_device
4285             .as_ref()
4286             .unwrap()
4287             .lock()
4288             .unwrap()
4289             .trigger_key(3)
4290             .map_err(DeviceManagerError::AArch64PowerButtonNotification)?;
4291         // Trigger a GED power button event to satisify use case 2.
4292         return self
4293             .ged_notification_device
4294             .as_ref()
4295             .unwrap()
4296             .lock()
4297             .unwrap()
4298             .notify(AcpiNotificationFlags::POWER_BUTTON_CHANGED)
4299             .map_err(DeviceManagerError::PowerButtonNotification);
4300     }
4301 
4302     pub fn iommu_attached_devices(&self) -> &Option<(PciBdf, Vec<PciBdf>)> {
4303         &self.iommu_attached_devices
4304     }
4305 
4306     fn validate_identifier(&self, id: &Option<String>) -> DeviceManagerResult<()> {
4307         if let Some(id) = id {
4308             if id.starts_with("__") {
4309                 return Err(DeviceManagerError::InvalidIdentifier(id.clone()));
4310             }
4311 
4312             if self.device_tree.lock().unwrap().contains_key(id) {
4313                 return Err(DeviceManagerError::IdentifierNotUnique(id.clone()));
4314             }
4315         }
4316 
4317         Ok(())
4318     }
4319 
4320     pub(crate) fn acpi_platform_addresses(&self) -> &AcpiPlatformAddresses {
4321         &self.acpi_platform_addresses
4322     }
4323 }
4324 
4325 fn numa_node_id_from_memory_zone_id(numa_nodes: &NumaNodes, memory_zone_id: &str) -> Option<u32> {
4326     for (numa_node_id, numa_node) in numa_nodes.iter() {
4327         if numa_node.memory_zones.contains(&memory_zone_id.to_owned()) {
4328             return Some(*numa_node_id);
4329         }
4330     }
4331 
4332     None
4333 }
4334 
4335 struct TpmDevice {}
4336 
4337 impl Aml for TpmDevice {
4338     fn to_aml_bytes(&self, sink: &mut dyn acpi_tables::AmlSink) {
4339         aml::Device::new(
4340             "TPM2".into(),
4341             vec![
4342                 &aml::Name::new("_HID".into(), &"MSFT0101"),
4343                 &aml::Name::new("_STA".into(), &(0xF_usize)),
4344                 &aml::Name::new(
4345                     "_CRS".into(),
4346                     &aml::ResourceTemplate::new(vec![&aml::Memory32Fixed::new(
4347                         true,
4348                         layout::TPM_START.0 as u32,
4349                         layout::TPM_SIZE as u32,
4350                     )]),
4351                 ),
4352             ],
4353         )
4354         .to_aml_bytes(sink)
4355     }
4356 }
4357 
4358 impl Aml for DeviceManager {
4359     fn to_aml_bytes(&self, sink: &mut dyn acpi_tables::AmlSink) {
4360         #[cfg(target_arch = "aarch64")]
4361         use arch::aarch64::DeviceInfoForFdt;
4362 
4363         let mut pci_scan_methods = Vec::new();
4364         for i in 0..self.pci_segments.len() {
4365             pci_scan_methods.push(aml::MethodCall::new(
4366                 format!("\\_SB_.PC{i:02X}.PCNT").as_str().into(),
4367                 vec![],
4368             ));
4369         }
4370         let mut pci_scan_inner: Vec<&dyn Aml> = Vec::new();
4371         for method in &pci_scan_methods {
4372             pci_scan_inner.push(method)
4373         }
4374 
4375         // PCI hotplug controller
4376         aml::Device::new(
4377             "_SB_.PHPR".into(),
4378             vec![
4379                 &aml::Name::new("_HID".into(), &aml::EISAName::new("PNP0A06")),
4380                 &aml::Name::new("_STA".into(), &0x0bu8),
4381                 &aml::Name::new("_UID".into(), &"PCI Hotplug Controller"),
4382                 &aml::Mutex::new("BLCK".into(), 0),
4383                 &aml::Name::new(
4384                     "_CRS".into(),
4385                     &aml::ResourceTemplate::new(vec![&aml::AddressSpace::new_memory(
4386                         aml::AddressSpaceCacheable::NotCacheable,
4387                         true,
4388                         self.acpi_address.0,
4389                         self.acpi_address.0 + DEVICE_MANAGER_ACPI_SIZE as u64 - 1,
4390                         None,
4391                     )]),
4392                 ),
4393                 // OpRegion and Fields map MMIO range into individual field values
4394                 &aml::OpRegion::new(
4395                     "PCST".into(),
4396                     aml::OpRegionSpace::SystemMemory,
4397                     &(self.acpi_address.0 as usize),
4398                     &DEVICE_MANAGER_ACPI_SIZE,
4399                 ),
4400                 &aml::Field::new(
4401                     "PCST".into(),
4402                     aml::FieldAccessType::DWord,
4403                     aml::FieldLockRule::NoLock,
4404                     aml::FieldUpdateRule::WriteAsZeroes,
4405                     vec![
4406                         aml::FieldEntry::Named(*b"PCIU", 32),
4407                         aml::FieldEntry::Named(*b"PCID", 32),
4408                         aml::FieldEntry::Named(*b"B0EJ", 32),
4409                         aml::FieldEntry::Named(*b"PSEG", 32),
4410                     ],
4411                 ),
4412                 &aml::Method::new(
4413                     "PCEJ".into(),
4414                     2,
4415                     true,
4416                     vec![
4417                         // Take lock defined above
4418                         &aml::Acquire::new("BLCK".into(), 0xffff),
4419                         // Choose the current segment
4420                         &aml::Store::new(&aml::Path::new("PSEG"), &aml::Arg(1)),
4421                         // Write PCI bus number (in first argument) to I/O port via field
4422                         &aml::ShiftLeft::new(&aml::Path::new("B0EJ"), &aml::ONE, &aml::Arg(0)),
4423                         // Release lock
4424                         &aml::Release::new("BLCK".into()),
4425                         // Return 0
4426                         &aml::Return::new(&aml::ZERO),
4427                     ],
4428                 ),
4429                 &aml::Method::new("PSCN".into(), 0, true, pci_scan_inner),
4430             ],
4431         )
4432         .to_aml_bytes(sink);
4433 
4434         for segment in &self.pci_segments {
4435             segment.to_aml_bytes(sink);
4436         }
4437 
4438         let mut mbrd_memory = Vec::new();
4439 
4440         for segment in &self.pci_segments {
4441             mbrd_memory.push(aml::Memory32Fixed::new(
4442                 true,
4443                 segment.mmio_config_address as u32,
4444                 layout::PCI_MMIO_CONFIG_SIZE_PER_SEGMENT as u32,
4445             ))
4446         }
4447 
4448         let mut mbrd_memory_refs = Vec::new();
4449         for mbrd_memory_ref in &mbrd_memory {
4450             mbrd_memory_refs.push(mbrd_memory_ref as &dyn Aml);
4451         }
4452 
4453         aml::Device::new(
4454             "_SB_.MBRD".into(),
4455             vec![
4456                 &aml::Name::new("_HID".into(), &aml::EISAName::new("PNP0C02")),
4457                 &aml::Name::new("_UID".into(), &aml::ZERO),
4458                 &aml::Name::new("_CRS".into(), &aml::ResourceTemplate::new(mbrd_memory_refs)),
4459             ],
4460         )
4461         .to_aml_bytes(sink);
4462 
4463         // Serial device
4464         #[cfg(target_arch = "x86_64")]
4465         let serial_irq = 4;
4466         #[cfg(target_arch = "aarch64")]
4467         let serial_irq =
4468             if self.config.lock().unwrap().serial.clone().mode != ConsoleOutputMode::Off {
4469                 self.get_device_info()
4470                     .clone()
4471                     .get(&(DeviceType::Serial, DeviceType::Serial.to_string()))
4472                     .unwrap()
4473                     .irq()
4474             } else {
4475                 // If serial is turned off, add a fake device with invalid irq.
4476                 31
4477             };
4478         if self.config.lock().unwrap().serial.mode != ConsoleOutputMode::Off {
4479             aml::Device::new(
4480                 "_SB_.COM1".into(),
4481                 vec![
4482                     &aml::Name::new(
4483                         "_HID".into(),
4484                         #[cfg(target_arch = "x86_64")]
4485                         &aml::EISAName::new("PNP0501"),
4486                         #[cfg(target_arch = "aarch64")]
4487                         &"ARMH0011",
4488                     ),
4489                     &aml::Name::new("_UID".into(), &aml::ZERO),
4490                     &aml::Name::new("_DDN".into(), &"COM1"),
4491                     &aml::Name::new(
4492                         "_CRS".into(),
4493                         &aml::ResourceTemplate::new(vec![
4494                             &aml::Interrupt::new(true, true, false, false, serial_irq),
4495                             #[cfg(target_arch = "x86_64")]
4496                             &aml::IO::new(0x3f8, 0x3f8, 0, 0x8),
4497                             #[cfg(target_arch = "aarch64")]
4498                             &aml::Memory32Fixed::new(
4499                                 true,
4500                                 arch::layout::LEGACY_SERIAL_MAPPED_IO_START.raw_value() as u32,
4501                                 MMIO_LEN as u32,
4502                             ),
4503                         ]),
4504                     ),
4505                 ],
4506             )
4507             .to_aml_bytes(sink);
4508         }
4509 
4510         aml::Name::new("_S5_".into(), &aml::Package::new(vec![&5u8])).to_aml_bytes(sink);
4511 
4512         aml::Device::new(
4513             "_SB_.PWRB".into(),
4514             vec![
4515                 &aml::Name::new("_HID".into(), &aml::EISAName::new("PNP0C0C")),
4516                 &aml::Name::new("_UID".into(), &aml::ZERO),
4517             ],
4518         )
4519         .to_aml_bytes(sink);
4520 
4521         if self.config.lock().unwrap().tpm.is_some() {
4522             // Add tpm device
4523             TpmDevice {}.to_aml_bytes(sink);
4524         }
4525 
4526         self.ged_notification_device
4527             .as_ref()
4528             .unwrap()
4529             .lock()
4530             .unwrap()
4531             .to_aml_bytes(sink)
4532     }
4533 }
4534 
4535 impl Pausable for DeviceManager {
4536     fn pause(&mut self) -> result::Result<(), MigratableError> {
4537         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
4538             if let Some(migratable) = &device_node.migratable {
4539                 migratable.lock().unwrap().pause()?;
4540             }
4541         }
4542         // On AArch64, the pause of device manager needs to trigger
4543         // a "pause" of GIC, which will flush the GIC pending tables
4544         // and ITS tables to guest RAM.
4545         #[cfg(target_arch = "aarch64")]
4546         {
4547             self.get_interrupt_controller()
4548                 .unwrap()
4549                 .lock()
4550                 .unwrap()
4551                 .pause()?;
4552         };
4553 
4554         Ok(())
4555     }
4556 
4557     fn resume(&mut self) -> result::Result<(), MigratableError> {
4558         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
4559             if let Some(migratable) = &device_node.migratable {
4560                 migratable.lock().unwrap().resume()?;
4561             }
4562         }
4563 
4564         Ok(())
4565     }
4566 }
4567 
4568 impl Snapshottable for DeviceManager {
4569     fn id(&self) -> String {
4570         DEVICE_MANAGER_SNAPSHOT_ID.to_string()
4571     }
4572 
4573     fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> {
4574         let mut snapshot = Snapshot::from_data(SnapshotData::new_from_state(&self.state())?);
4575 
4576         // We aggregate all devices snapshots.
4577         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
4578             if let Some(migratable) = &device_node.migratable {
4579                 let mut migratable = migratable.lock().unwrap();
4580                 snapshot.add_snapshot(migratable.id(), migratable.snapshot()?);
4581             }
4582         }
4583 
4584         Ok(snapshot)
4585     }
4586 }
4587 
4588 impl Transportable for DeviceManager {}
4589 
4590 impl Migratable for DeviceManager {
4591     fn start_dirty_log(&mut self) -> std::result::Result<(), MigratableError> {
4592         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
4593             if let Some(migratable) = &device_node.migratable {
4594                 migratable.lock().unwrap().start_dirty_log()?;
4595             }
4596         }
4597         Ok(())
4598     }
4599 
4600     fn stop_dirty_log(&mut self) -> std::result::Result<(), MigratableError> {
4601         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
4602             if let Some(migratable) = &device_node.migratable {
4603                 migratable.lock().unwrap().stop_dirty_log()?;
4604             }
4605         }
4606         Ok(())
4607     }
4608 
4609     fn dirty_log(&mut self) -> std::result::Result<MemoryRangeTable, MigratableError> {
4610         let mut tables = Vec::new();
4611         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
4612             if let Some(migratable) = &device_node.migratable {
4613                 tables.push(migratable.lock().unwrap().dirty_log()?);
4614             }
4615         }
4616         Ok(MemoryRangeTable::new_from_tables(tables))
4617     }
4618 
4619     fn start_migration(&mut self) -> std::result::Result<(), MigratableError> {
4620         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
4621             if let Some(migratable) = &device_node.migratable {
4622                 migratable.lock().unwrap().start_migration()?;
4623             }
4624         }
4625         Ok(())
4626     }
4627 
4628     fn complete_migration(&mut self) -> std::result::Result<(), MigratableError> {
4629         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
4630             if let Some(migratable) = &device_node.migratable {
4631                 migratable.lock().unwrap().complete_migration()?;
4632             }
4633         }
4634         Ok(())
4635     }
4636 }
4637 
4638 const PCIU_FIELD_OFFSET: u64 = 0;
4639 const PCID_FIELD_OFFSET: u64 = 4;
4640 const B0EJ_FIELD_OFFSET: u64 = 8;
4641 const PSEG_FIELD_OFFSET: u64 = 12;
4642 const PCIU_FIELD_SIZE: usize = 4;
4643 const PCID_FIELD_SIZE: usize = 4;
4644 const B0EJ_FIELD_SIZE: usize = 4;
4645 const PSEG_FIELD_SIZE: usize = 4;
4646 
4647 impl BusDevice for DeviceManager {
4648     fn read(&mut self, base: u64, offset: u64, data: &mut [u8]) {
4649         match offset {
4650             PCIU_FIELD_OFFSET => {
4651                 assert!(data.len() == PCIU_FIELD_SIZE);
4652                 data.copy_from_slice(
4653                     &self.pci_segments[self.selected_segment]
4654                         .pci_devices_up
4655                         .to_le_bytes(),
4656                 );
4657                 // Clear the PCIU bitmap
4658                 self.pci_segments[self.selected_segment].pci_devices_up = 0;
4659             }
4660             PCID_FIELD_OFFSET => {
4661                 assert!(data.len() == PCID_FIELD_SIZE);
4662                 data.copy_from_slice(
4663                     &self.pci_segments[self.selected_segment]
4664                         .pci_devices_down
4665                         .to_le_bytes(),
4666                 );
4667                 // Clear the PCID bitmap
4668                 self.pci_segments[self.selected_segment].pci_devices_down = 0;
4669             }
4670             B0EJ_FIELD_OFFSET => {
4671                 assert!(data.len() == B0EJ_FIELD_SIZE);
4672                 // Always return an empty bitmap since the eject is always
4673                 // taken care of right away during a write access.
4674                 data.fill(0);
4675             }
4676             PSEG_FIELD_OFFSET => {
4677                 assert_eq!(data.len(), PSEG_FIELD_SIZE);
4678                 data.copy_from_slice(&(self.selected_segment as u32).to_le_bytes());
4679             }
4680             _ => error!(
4681                 "Accessing unknown location at base 0x{:x}, offset 0x{:x}",
4682                 base, offset
4683             ),
4684         }
4685 
4686         debug!(
4687             "PCI_HP_REG_R: base 0x{:x}, offset 0x{:x}, data {:?}",
4688             base, offset, data
4689         )
4690     }
4691 
4692     fn write(&mut self, base: u64, offset: u64, data: &[u8]) -> Option<Arc<std::sync::Barrier>> {
4693         match offset {
4694             B0EJ_FIELD_OFFSET => {
4695                 assert!(data.len() == B0EJ_FIELD_SIZE);
4696                 let mut data_array: [u8; 4] = [0, 0, 0, 0];
4697                 data_array.copy_from_slice(data);
4698                 let mut slot_bitmap = u32::from_le_bytes(data_array);
4699 
4700                 while slot_bitmap > 0 {
4701                     let slot_id = slot_bitmap.trailing_zeros();
4702                     if let Err(e) = self.eject_device(self.selected_segment as u16, slot_id as u8) {
4703                         error!("Failed ejecting device {}: {:?}", slot_id, e);
4704                     }
4705                     slot_bitmap &= !(1 << slot_id);
4706                 }
4707             }
4708             PSEG_FIELD_OFFSET => {
4709                 assert_eq!(data.len(), PSEG_FIELD_SIZE);
4710                 let mut data_array: [u8; 4] = [0, 0, 0, 0];
4711                 data_array.copy_from_slice(data);
4712                 let selected_segment = u32::from_le_bytes(data_array) as usize;
4713                 if selected_segment >= self.pci_segments.len() {
4714                     error!(
4715                         "Segment selection out of range: {} >= {}",
4716                         selected_segment,
4717                         self.pci_segments.len()
4718                     );
4719                     return None;
4720                 }
4721                 self.selected_segment = selected_segment;
4722             }
4723             _ => error!(
4724                 "Accessing unknown location at base 0x{:x}, offset 0x{:x}",
4725                 base, offset
4726             ),
4727         }
4728 
4729         debug!(
4730             "PCI_HP_REG_W: base 0x{:x}, offset 0x{:x}, data {:?}",
4731             base, offset, data
4732         );
4733 
4734         None
4735     }
4736 }
4737 
4738 impl Drop for DeviceManager {
4739     fn drop(&mut self) {
4740         for handle in self.virtio_devices.drain(..) {
4741             handle.virtio_device.lock().unwrap().shutdown();
4742         }
4743 
4744         if let Some(termios) = *self.original_termios_opt.lock().unwrap() {
4745             // SAFETY: FFI call
4746             let _ = unsafe { tcsetattr(stdout().lock().as_raw_fd(), TCSANOW, &termios) };
4747         }
4748     }
4749 }
4750