xref: /cloud-hypervisor/vmm/src/device_manager.rs (revision 5e52729453cb62edbe4fb3a4aa24f8cca31e667e)
1 // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
2 //
3 // Portions Copyright 2017 The Chromium OS Authors. All rights reserved.
4 // Use of this source code is governed by a BSD-style license that can be
5 // found in the LICENSE-BSD-3-Clause file.
6 //
7 // Copyright © 2019 Intel Corporation
8 //
9 // SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause
10 //
11 
12 use crate::config::{
13     ConsoleOutputMode, DeviceConfig, DiskConfig, FsConfig, NetConfig, PmemConfig, UserDeviceConfig,
14     VdpaConfig, VhostMode, VmConfig, VsockConfig,
15 };
16 use crate::cpu::{CpuManager, CPU_MANAGER_ACPI_SIZE};
17 use crate::device_tree::{DeviceNode, DeviceTree};
18 use crate::interrupt::LegacyUserspaceInterruptManager;
19 use crate::interrupt::MsiInterruptManager;
20 use crate::memory_manager::{Error as MemoryManagerError, MemoryManager, MEMORY_MANAGER_ACPI_SIZE};
21 use crate::pci_segment::PciSegment;
22 use crate::seccomp_filters::{get_seccomp_filter, Thread};
23 use crate::serial_manager::{Error as SerialManagerError, SerialManager};
24 use crate::sigwinch_listener::start_sigwinch_listener;
25 use crate::GuestRegionMmap;
26 use crate::PciDeviceInfo;
27 use crate::{device_node, DEVICE_MANAGER_SNAPSHOT_ID};
28 use acpi_tables::sdt::GenericAddress;
29 use acpi_tables::{aml, aml::Aml};
30 use anyhow::anyhow;
31 use arch::layout;
32 #[cfg(target_arch = "x86_64")]
33 use arch::layout::{APIC_START, IOAPIC_SIZE, IOAPIC_START};
34 use arch::NumaNodes;
35 #[cfg(target_arch = "aarch64")]
36 use arch::{DeviceType, MmioDeviceInfo};
37 use block_util::{
38     async_io::DiskFile, block_io_uring_is_supported, detect_image_type,
39     fixed_vhd_async::FixedVhdDiskAsync, fixed_vhd_sync::FixedVhdDiskSync, qcow_sync::QcowDiskSync,
40     raw_async::RawFileDisk, raw_sync::RawFileDiskSync, vhdx_sync::VhdxDiskSync, ImageType,
41 };
42 #[cfg(target_arch = "aarch64")]
43 use devices::gic;
44 #[cfg(target_arch = "x86_64")]
45 use devices::ioapic;
46 #[cfg(target_arch = "aarch64")]
47 use devices::legacy::Pl011;
48 #[cfg(target_arch = "x86_64")]
49 use devices::legacy::Serial;
50 use devices::{
51     interrupt_controller, interrupt_controller::InterruptController, AcpiNotificationFlags,
52 };
53 use hypervisor::{HypervisorType, IoEventAddress};
54 use libc::{
55     cfmakeraw, isatty, tcgetattr, tcsetattr, termios, MAP_NORESERVE, MAP_PRIVATE, MAP_SHARED,
56     O_TMPFILE, PROT_READ, PROT_WRITE, TCSANOW,
57 };
58 use pci::{
59     DeviceRelocation, PciBarRegionType, PciBdf, PciDevice, VfioPciDevice, VfioUserDmaMapping,
60     VfioUserPciDevice, VfioUserPciDeviceError,
61 };
62 use seccompiler::SeccompAction;
63 use serde::{Deserialize, Serialize};
64 use std::collections::{BTreeSet, HashMap};
65 use std::fs::{read_link, File, OpenOptions};
66 use std::io::{self, stdout, Seek, SeekFrom};
67 use std::mem::zeroed;
68 use std::num::Wrapping;
69 use std::os::unix::fs::OpenOptionsExt;
70 use std::os::unix::io::{AsRawFd, FromRawFd, RawFd};
71 use std::path::PathBuf;
72 use std::result;
73 use std::sync::{Arc, Mutex};
74 use std::time::Instant;
75 use tracer::trace_scoped;
76 use vfio_ioctls::{VfioContainer, VfioDevice, VfioDeviceFd};
77 use virtio_devices::transport::VirtioTransport;
78 use virtio_devices::transport::{VirtioPciDevice, VirtioPciDeviceActivator};
79 use virtio_devices::vhost_user::VhostUserConfig;
80 use virtio_devices::{
81     AccessPlatformMapping, ActivateError, VdpaDmaMapping, VirtioMemMappingSource,
82 };
83 use virtio_devices::{Endpoint, IommuMapping};
84 use vm_allocator::{AddressAllocator, SystemAllocator};
85 use vm_device::dma_mapping::vfio::VfioDmaMapping;
86 use vm_device::dma_mapping::ExternalDmaMapping;
87 use vm_device::interrupt::{
88     InterruptIndex, InterruptManager, LegacyIrqGroupConfig, MsiIrqGroupConfig,
89 };
90 use vm_device::{Bus, BusDevice, Resource};
91 use vm_memory::guest_memory::FileOffset;
92 use vm_memory::GuestMemoryRegion;
93 use vm_memory::{Address, GuestAddress, GuestUsize, MmapRegion};
94 #[cfg(target_arch = "x86_64")]
95 use vm_memory::{GuestAddressSpace, GuestMemory};
96 use vm_migration::{
97     protocol::MemoryRangeTable, snapshot_from_id, versioned_state_from_id, Migratable,
98     MigratableError, Pausable, Snapshot, SnapshotData, Snapshottable, Transportable,
99 };
100 use vm_virtio::AccessPlatform;
101 use vm_virtio::VirtioDeviceType;
102 use vmm_sys_util::eventfd::EventFd;
103 
104 #[cfg(target_arch = "aarch64")]
105 const MMIO_LEN: u64 = 0x1000;
106 
107 // Singleton devices / devices the user cannot name
108 #[cfg(target_arch = "x86_64")]
109 const IOAPIC_DEVICE_NAME: &str = "__ioapic";
110 const SERIAL_DEVICE_NAME: &str = "__serial";
111 #[cfg(target_arch = "aarch64")]
112 const GPIO_DEVICE_NAME: &str = "__gpio";
113 const RNG_DEVICE_NAME: &str = "__rng";
114 const IOMMU_DEVICE_NAME: &str = "__iommu";
115 const BALLOON_DEVICE_NAME: &str = "__balloon";
116 const CONSOLE_DEVICE_NAME: &str = "__console";
117 
118 // Devices that the user may name and for which we generate
119 // identifiers if the user doesn't give one
120 const DISK_DEVICE_NAME_PREFIX: &str = "_disk";
121 const FS_DEVICE_NAME_PREFIX: &str = "_fs";
122 const NET_DEVICE_NAME_PREFIX: &str = "_net";
123 const PMEM_DEVICE_NAME_PREFIX: &str = "_pmem";
124 const VDPA_DEVICE_NAME_PREFIX: &str = "_vdpa";
125 const VSOCK_DEVICE_NAME_PREFIX: &str = "_vsock";
126 const WATCHDOG_DEVICE_NAME: &str = "__watchdog";
127 const VFIO_DEVICE_NAME_PREFIX: &str = "_vfio";
128 const VFIO_USER_DEVICE_NAME_PREFIX: &str = "_vfio_user";
129 const VIRTIO_PCI_DEVICE_NAME_PREFIX: &str = "_virtio-pci";
130 
131 /// Errors associated with device manager
132 #[derive(Debug)]
133 pub enum DeviceManagerError {
134     /// Cannot create EventFd.
135     EventFd(io::Error),
136 
137     /// Cannot open disk path
138     Disk(io::Error),
139 
140     /// Cannot create vhost-user-net device
141     CreateVhostUserNet(virtio_devices::vhost_user::Error),
142 
143     /// Cannot create virtio-blk device
144     CreateVirtioBlock(io::Error),
145 
146     /// Cannot create virtio-net device
147     CreateVirtioNet(virtio_devices::net::Error),
148 
149     /// Cannot create virtio-console device
150     CreateVirtioConsole(io::Error),
151 
152     /// Cannot create virtio-rng device
153     CreateVirtioRng(io::Error),
154 
155     /// Cannot create virtio-fs device
156     CreateVirtioFs(virtio_devices::vhost_user::Error),
157 
158     /// Virtio-fs device was created without a socket.
159     NoVirtioFsSock,
160 
161     /// Cannot create vhost-user-blk device
162     CreateVhostUserBlk(virtio_devices::vhost_user::Error),
163 
164     /// Cannot create virtio-pmem device
165     CreateVirtioPmem(io::Error),
166 
167     /// Cannot create vDPA device
168     CreateVdpa(virtio_devices::vdpa::Error),
169 
170     /// Cannot create virtio-vsock device
171     CreateVirtioVsock(io::Error),
172 
173     /// Cannot create tpm device
174     CreateTpmDevice(anyhow::Error),
175 
176     /// Failed to convert Path to &str for the vDPA device.
177     CreateVdpaConvertPath,
178 
179     /// Failed to convert Path to &str for the virtio-vsock device.
180     CreateVsockConvertPath,
181 
182     /// Cannot create virtio-vsock backend
183     CreateVsockBackend(virtio_devices::vsock::VsockUnixError),
184 
185     /// Cannot create virtio-iommu device
186     CreateVirtioIommu(io::Error),
187 
188     /// Cannot create virtio-balloon device
189     CreateVirtioBalloon(io::Error),
190 
191     /// Cannot create virtio-watchdog device
192     CreateVirtioWatchdog(io::Error),
193 
194     /// Failed to parse disk image format
195     DetectImageType(io::Error),
196 
197     /// Cannot open qcow disk path
198     QcowDeviceCreate(qcow::Error),
199 
200     /// Cannot create serial manager
201     CreateSerialManager(SerialManagerError),
202 
203     /// Cannot spawn the serial manager thread
204     SpawnSerialManager(SerialManagerError),
205 
206     /// Cannot open tap interface
207     OpenTap(net_util::TapError),
208 
209     /// Cannot allocate IRQ.
210     AllocateIrq,
211 
212     /// Cannot configure the IRQ.
213     Irq(vmm_sys_util::errno::Error),
214 
215     /// Cannot allocate PCI BARs
216     AllocateBars(pci::PciDeviceError),
217 
218     /// Could not free the BARs associated with a PCI device.
219     FreePciBars(pci::PciDeviceError),
220 
221     /// Cannot register ioevent.
222     RegisterIoevent(anyhow::Error),
223 
224     /// Cannot unregister ioevent.
225     UnRegisterIoevent(anyhow::Error),
226 
227     /// Cannot create virtio device
228     VirtioDevice(virtio_devices::transport::VirtioPciDeviceError),
229 
230     /// Cannot add PCI device
231     AddPciDevice(pci::PciRootError),
232 
233     /// Cannot open persistent memory file
234     PmemFileOpen(io::Error),
235 
236     /// Cannot set persistent memory file size
237     PmemFileSetLen(io::Error),
238 
239     /// Cannot find a memory range for persistent memory
240     PmemRangeAllocation,
241 
242     /// Cannot find a memory range for virtio-fs
243     FsRangeAllocation,
244 
245     /// Error creating serial output file
246     SerialOutputFileOpen(io::Error),
247 
248     /// Error creating console output file
249     ConsoleOutputFileOpen(io::Error),
250 
251     /// Error creating serial pty
252     SerialPtyOpen(io::Error),
253 
254     /// Error creating console pty
255     ConsolePtyOpen(io::Error),
256 
257     /// Error setting pty raw mode
258     SetPtyRaw(vmm_sys_util::errno::Error),
259 
260     /// Error getting pty peer
261     GetPtyPeer(vmm_sys_util::errno::Error),
262 
263     /// Cannot create a VFIO device
264     VfioCreate(vfio_ioctls::VfioError),
265 
266     /// Cannot create a VFIO PCI device
267     VfioPciCreate(pci::VfioPciError),
268 
269     /// Failed to map VFIO MMIO region.
270     VfioMapRegion(pci::VfioPciError),
271 
272     /// Failed to DMA map VFIO device.
273     VfioDmaMap(vfio_ioctls::VfioError),
274 
275     /// Failed to DMA unmap VFIO device.
276     VfioDmaUnmap(pci::VfioPciError),
277 
278     /// Failed to create the passthrough device.
279     CreatePassthroughDevice(anyhow::Error),
280 
281     /// Failed to memory map.
282     Mmap(io::Error),
283 
284     /// Cannot add legacy device to Bus.
285     BusError(vm_device::BusError),
286 
287     /// Failed to allocate IO port
288     AllocateIoPort,
289 
290     /// Failed to allocate MMIO address
291     AllocateMmioAddress,
292 
293     /// Failed to make hotplug notification
294     HotPlugNotification(io::Error),
295 
296     /// Error from a memory manager operation
297     MemoryManager(MemoryManagerError),
298 
299     /// Failed to create new interrupt source group.
300     CreateInterruptGroup(io::Error),
301 
302     /// Failed to update interrupt source group.
303     UpdateInterruptGroup(io::Error),
304 
305     /// Failed to create interrupt controller.
306     CreateInterruptController(interrupt_controller::Error),
307 
308     /// Failed to create a new MmapRegion instance.
309     NewMmapRegion(vm_memory::mmap::MmapRegionError),
310 
311     /// Failed to clone a File.
312     CloneFile(io::Error),
313 
314     /// Failed to create socket file
315     CreateSocketFile(io::Error),
316 
317     /// Failed to spawn the network backend
318     SpawnNetBackend(io::Error),
319 
320     /// Failed to spawn the block backend
321     SpawnBlockBackend(io::Error),
322 
323     /// Missing PCI bus.
324     NoPciBus,
325 
326     /// Could not find an available device name.
327     NoAvailableDeviceName,
328 
329     /// Missing PCI device.
330     MissingPciDevice,
331 
332     /// Failed to remove a PCI device from the PCI bus.
333     RemoveDeviceFromPciBus(pci::PciRootError),
334 
335     /// Failed to remove a bus device from the IO bus.
336     RemoveDeviceFromIoBus(vm_device::BusError),
337 
338     /// Failed to remove a bus device from the MMIO bus.
339     RemoveDeviceFromMmioBus(vm_device::BusError),
340 
341     /// Failed to find the device corresponding to a specific PCI b/d/f.
342     UnknownPciBdf(u32),
343 
344     /// Not allowed to remove this type of device from the VM.
345     RemovalNotAllowed(vm_virtio::VirtioDeviceType),
346 
347     /// Failed to find device corresponding to the given identifier.
348     UnknownDeviceId(String),
349 
350     /// Failed to find an available PCI device ID.
351     NextPciDeviceId(pci::PciRootError),
352 
353     /// Could not reserve the PCI device ID.
354     GetPciDeviceId(pci::PciRootError),
355 
356     /// Could not give the PCI device ID back.
357     PutPciDeviceId(pci::PciRootError),
358 
359     /// No disk path was specified when one was expected
360     NoDiskPath,
361 
362     /// Failed to update guest memory for virtio device.
363     UpdateMemoryForVirtioDevice(virtio_devices::Error),
364 
365     /// Cannot create virtio-mem device
366     CreateVirtioMem(io::Error),
367 
368     /// Cannot find a memory range for virtio-mem memory
369     VirtioMemRangeAllocation,
370 
371     /// Failed to update guest memory for VFIO PCI device.
372     UpdateMemoryForVfioPciDevice(vfio_ioctls::VfioError),
373 
374     /// Trying to use a directory for pmem but no size specified
375     PmemWithDirectorySizeMissing,
376 
377     /// Trying to use a size that is not multiple of 2MiB
378     PmemSizeNotAligned,
379 
380     /// Could not find the node in the device tree.
381     MissingNode,
382 
383     /// Resource was already found.
384     ResourceAlreadyExists,
385 
386     /// Expected resources for virtio-pmem could not be found.
387     MissingVirtioPmemResources,
388 
389     /// Missing PCI b/d/f from the DeviceNode.
390     MissingDeviceNodePciBdf,
391 
392     /// No support for device passthrough
393     NoDevicePassthroughSupport,
394 
395     /// Failed to resize virtio-balloon
396     VirtioBalloonResize(virtio_devices::balloon::Error),
397 
398     /// Missing virtio-balloon, can't proceed as expected.
399     MissingVirtioBalloon,
400 
401     /// Missing virtual IOMMU device
402     MissingVirtualIommu,
403 
404     /// Failed to do power button notification
405     PowerButtonNotification(io::Error),
406 
407     /// Failed to do AArch64 GPIO power button notification
408     #[cfg(target_arch = "aarch64")]
409     AArch64PowerButtonNotification(devices::legacy::GpioDeviceError),
410 
411     /// Failed to set O_DIRECT flag to file descriptor
412     SetDirectIo,
413 
414     /// Failed to create FixedVhdDiskAsync
415     CreateFixedVhdDiskAsync(io::Error),
416 
417     /// Failed to create FixedVhdDiskSync
418     CreateFixedVhdDiskSync(io::Error),
419 
420     /// Failed to create QcowDiskSync
421     CreateQcowDiskSync(qcow::Error),
422 
423     /// Failed to create FixedVhdxDiskSync
424     CreateFixedVhdxDiskSync(vhdx::vhdx::VhdxError),
425 
426     /// Failed to add DMA mapping handler to virtio-mem device.
427     AddDmaMappingHandlerVirtioMem(virtio_devices::mem::Error),
428 
429     /// Failed to remove DMA mapping handler from virtio-mem device.
430     RemoveDmaMappingHandlerVirtioMem(virtio_devices::mem::Error),
431 
432     /// Failed to create vfio-user client
433     VfioUserCreateClient(vfio_user::Error),
434 
435     /// Failed to create VFIO user device
436     VfioUserCreate(VfioUserPciDeviceError),
437 
438     /// Failed to map region from VFIO user device into guest
439     VfioUserMapRegion(VfioUserPciDeviceError),
440 
441     /// Failed to DMA map VFIO user device.
442     VfioUserDmaMap(VfioUserPciDeviceError),
443 
444     /// Failed to DMA unmap VFIO user device.
445     VfioUserDmaUnmap(VfioUserPciDeviceError),
446 
447     /// Failed to update memory mappings for VFIO user device
448     UpdateMemoryForVfioUserPciDevice(VfioUserPciDeviceError),
449 
450     /// Cannot duplicate file descriptor
451     DupFd(vmm_sys_util::errno::Error),
452 
453     /// Failed to DMA map virtio device.
454     VirtioDmaMap(std::io::Error),
455 
456     /// Failed to DMA unmap virtio device.
457     VirtioDmaUnmap(std::io::Error),
458 
459     /// Cannot hotplug device behind vIOMMU
460     InvalidIommuHotplug,
461 
462     /// Invalid identifier as it is not unique.
463     IdentifierNotUnique(String),
464 
465     /// Invalid identifier
466     InvalidIdentifier(String),
467 
468     /// Error activating virtio device
469     VirtioActivate(ActivateError),
470 
471     /// Failed retrieving device state from snapshot
472     RestoreGetState(MigratableError),
473 }
474 pub type DeviceManagerResult<T> = result::Result<T, DeviceManagerError>;
475 
476 const DEVICE_MANAGER_ACPI_SIZE: usize = 0x10;
477 
478 const TIOCSPTLCK: libc::c_int = 0x4004_5431;
479 const TIOCGTPEER: libc::c_int = 0x5441;
480 
481 pub fn create_pty() -> io::Result<(File, File, PathBuf)> {
482     // Try to use /dev/pts/ptmx first then fall back to /dev/ptmx
483     // This is done to try and use the devpts filesystem that
484     // could be available for use in the process's namespace first.
485     // Ideally these are all the same file though but different
486     // kernels could have things setup differently.
487     // See https://www.kernel.org/doc/Documentation/filesystems/devpts.txt
488     // for further details.
489 
490     let custom_flags = libc::O_NONBLOCK;
491     let main = match OpenOptions::new()
492         .read(true)
493         .write(true)
494         .custom_flags(custom_flags)
495         .open("/dev/pts/ptmx")
496     {
497         Ok(f) => f,
498         _ => OpenOptions::new()
499             .read(true)
500             .write(true)
501             .custom_flags(custom_flags)
502             .open("/dev/ptmx")?,
503     };
504     let mut unlock: libc::c_ulong = 0;
505     // SAFETY: FFI call into libc, trivially safe
506     unsafe { libc::ioctl(main.as_raw_fd(), TIOCSPTLCK as _, &mut unlock) };
507 
508     // SAFETY: FFI call into libc, trivally safe
509     let sub_fd = unsafe {
510         libc::ioctl(
511             main.as_raw_fd(),
512             TIOCGTPEER as _,
513             libc::O_NOCTTY | libc::O_RDWR,
514         )
515     };
516     if sub_fd == -1 {
517         return vmm_sys_util::errno::errno_result().map_err(|e| e.into());
518     }
519 
520     let proc_path = PathBuf::from(format!("/proc/self/fd/{sub_fd}"));
521     let path = read_link(proc_path)?;
522 
523     // SAFETY: sub_fd is checked to be valid before being wrapped in File
524     Ok((main, unsafe { File::from_raw_fd(sub_fd) }, path))
525 }
526 
527 #[derive(Default)]
528 pub struct Console {
529     console_resizer: Option<Arc<virtio_devices::ConsoleResizer>>,
530 }
531 
532 impl Console {
533     pub fn update_console_size(&self) {
534         if let Some(resizer) = self.console_resizer.as_ref() {
535             resizer.update_console_size()
536         }
537     }
538 }
539 
540 pub(crate) struct AddressManager {
541     pub(crate) allocator: Arc<Mutex<SystemAllocator>>,
542     #[cfg(target_arch = "x86_64")]
543     pub(crate) io_bus: Arc<Bus>,
544     pub(crate) mmio_bus: Arc<Bus>,
545     pub(crate) vm: Arc<dyn hypervisor::Vm>,
546     device_tree: Arc<Mutex<DeviceTree>>,
547     pci_mmio_allocators: Vec<Arc<Mutex<AddressAllocator>>>,
548 }
549 
550 impl DeviceRelocation for AddressManager {
551     fn move_bar(
552         &self,
553         old_base: u64,
554         new_base: u64,
555         len: u64,
556         pci_dev: &mut dyn PciDevice,
557         region_type: PciBarRegionType,
558     ) -> std::result::Result<(), std::io::Error> {
559         match region_type {
560             PciBarRegionType::IoRegion => {
561                 #[cfg(target_arch = "x86_64")]
562                 {
563                     // Update system allocator
564                     self.allocator
565                         .lock()
566                         .unwrap()
567                         .free_io_addresses(GuestAddress(old_base), len as GuestUsize);
568 
569                     self.allocator
570                         .lock()
571                         .unwrap()
572                         .allocate_io_addresses(
573                             Some(GuestAddress(new_base)),
574                             len as GuestUsize,
575                             None,
576                         )
577                         .ok_or_else(|| {
578                             io::Error::new(io::ErrorKind::Other, "failed allocating new IO range")
579                         })?;
580 
581                     // Update PIO bus
582                     self.io_bus
583                         .update_range(old_base, len, new_base, len)
584                         .map_err(|e| io::Error::new(io::ErrorKind::Other, e))?;
585                 }
586                 #[cfg(target_arch = "aarch64")]
587                 error!("I/O region is not supported");
588             }
589             PciBarRegionType::Memory32BitRegion | PciBarRegionType::Memory64BitRegion => {
590                 // Update system allocator
591                 if region_type == PciBarRegionType::Memory32BitRegion {
592                     self.allocator
593                         .lock()
594                         .unwrap()
595                         .free_mmio_hole_addresses(GuestAddress(old_base), len as GuestUsize);
596 
597                     self.allocator
598                         .lock()
599                         .unwrap()
600                         .allocate_mmio_hole_addresses(
601                             Some(GuestAddress(new_base)),
602                             len as GuestUsize,
603                             Some(len),
604                         )
605                         .ok_or_else(|| {
606                             io::Error::new(
607                                 io::ErrorKind::Other,
608                                 "failed allocating new 32 bits MMIO range",
609                             )
610                         })?;
611                 } else {
612                     // Find the specific allocator that this BAR was allocated from and use it for new one
613                     for allocator in &self.pci_mmio_allocators {
614                         let allocator_base = allocator.lock().unwrap().base();
615                         let allocator_end = allocator.lock().unwrap().end();
616 
617                         if old_base >= allocator_base.0 && old_base <= allocator_end.0 {
618                             allocator
619                                 .lock()
620                                 .unwrap()
621                                 .free(GuestAddress(old_base), len as GuestUsize);
622 
623                             allocator
624                                 .lock()
625                                 .unwrap()
626                                 .allocate(
627                                     Some(GuestAddress(new_base)),
628                                     len as GuestUsize,
629                                     Some(len),
630                                 )
631                                 .ok_or_else(|| {
632                                     io::Error::new(
633                                         io::ErrorKind::Other,
634                                         "failed allocating new 64 bits MMIO range",
635                                     )
636                                 })?;
637 
638                             break;
639                         }
640                     }
641                 }
642 
643                 // Update MMIO bus
644                 self.mmio_bus
645                     .update_range(old_base, len, new_base, len)
646                     .map_err(|e| io::Error::new(io::ErrorKind::Other, e))?;
647             }
648         }
649 
650         // Update the device_tree resources associated with the device
651         if let Some(id) = pci_dev.id() {
652             if let Some(node) = self.device_tree.lock().unwrap().get_mut(&id) {
653                 let mut resource_updated = false;
654                 for resource in node.resources.iter_mut() {
655                     if let Resource::PciBar { base, type_, .. } = resource {
656                         if PciBarRegionType::from(*type_) == region_type && *base == old_base {
657                             *base = new_base;
658                             resource_updated = true;
659                             break;
660                         }
661                     }
662                 }
663 
664                 if !resource_updated {
665                     return Err(io::Error::new(
666                         io::ErrorKind::Other,
667                         format!(
668                             "Couldn't find a resource with base 0x{old_base:x} for device {id}"
669                         ),
670                     ));
671                 }
672             } else {
673                 return Err(io::Error::new(
674                     io::ErrorKind::Other,
675                     format!("Couldn't find device {id} from device tree"),
676                 ));
677             }
678         }
679 
680         let any_dev = pci_dev.as_any();
681         if let Some(virtio_pci_dev) = any_dev.downcast_ref::<VirtioPciDevice>() {
682             let bar_addr = virtio_pci_dev.config_bar_addr();
683             if bar_addr == new_base {
684                 for (event, addr) in virtio_pci_dev.ioeventfds(old_base) {
685                     let io_addr = IoEventAddress::Mmio(addr);
686                     self.vm.unregister_ioevent(event, &io_addr).map_err(|e| {
687                         io::Error::new(
688                             io::ErrorKind::Other,
689                             format!("failed to unregister ioevent: {e:?}"),
690                         )
691                     })?;
692                 }
693                 for (event, addr) in virtio_pci_dev.ioeventfds(new_base) {
694                     let io_addr = IoEventAddress::Mmio(addr);
695                     self.vm
696                         .register_ioevent(event, &io_addr, None)
697                         .map_err(|e| {
698                             io::Error::new(
699                                 io::ErrorKind::Other,
700                                 format!("failed to register ioevent: {e:?}"),
701                             )
702                         })?;
703                 }
704             } else {
705                 let virtio_dev = virtio_pci_dev.virtio_device();
706                 let mut virtio_dev = virtio_dev.lock().unwrap();
707                 if let Some(mut shm_regions) = virtio_dev.get_shm_regions() {
708                     if shm_regions.addr.raw_value() == old_base {
709                         let mem_region = self.vm.make_user_memory_region(
710                             shm_regions.mem_slot,
711                             old_base,
712                             shm_regions.len,
713                             shm_regions.host_addr,
714                             false,
715                             false,
716                         );
717 
718                         self.vm.remove_user_memory_region(mem_region).map_err(|e| {
719                             io::Error::new(
720                                 io::ErrorKind::Other,
721                                 format!("failed to remove user memory region: {e:?}"),
722                             )
723                         })?;
724 
725                         // Create new mapping by inserting new region to KVM.
726                         let mem_region = self.vm.make_user_memory_region(
727                             shm_regions.mem_slot,
728                             new_base,
729                             shm_regions.len,
730                             shm_regions.host_addr,
731                             false,
732                             false,
733                         );
734 
735                         self.vm.create_user_memory_region(mem_region).map_err(|e| {
736                             io::Error::new(
737                                 io::ErrorKind::Other,
738                                 format!("failed to create user memory regions: {e:?}"),
739                             )
740                         })?;
741 
742                         // Update shared memory regions to reflect the new mapping.
743                         shm_regions.addr = GuestAddress(new_base);
744                         virtio_dev.set_shm_regions(shm_regions).map_err(|e| {
745                             io::Error::new(
746                                 io::ErrorKind::Other,
747                                 format!("failed to update shared memory regions: {e:?}"),
748                             )
749                         })?;
750                     }
751                 }
752             }
753         }
754 
755         pci_dev.move_bar(old_base, new_base)
756     }
757 }
758 
759 #[derive(Serialize, Deserialize)]
760 struct DeviceManagerState {
761     device_tree: DeviceTree,
762     device_id_cnt: Wrapping<usize>,
763 }
764 
765 #[derive(Debug)]
766 pub struct PtyPair {
767     pub main: File,
768     pub path: PathBuf,
769 }
770 
771 impl Clone for PtyPair {
772     fn clone(&self) -> Self {
773         PtyPair {
774             main: self.main.try_clone().unwrap(),
775             path: self.path.clone(),
776         }
777     }
778 }
779 
780 #[derive(Clone)]
781 pub enum PciDeviceHandle {
782     Vfio(Arc<Mutex<VfioPciDevice>>),
783     Virtio(Arc<Mutex<VirtioPciDevice>>),
784     VfioUser(Arc<Mutex<VfioUserPciDevice>>),
785 }
786 
787 #[derive(Clone)]
788 struct MetaVirtioDevice {
789     virtio_device: Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
790     iommu: bool,
791     id: String,
792     pci_segment: u16,
793     dma_handler: Option<Arc<dyn ExternalDmaMapping>>,
794 }
795 
796 #[derive(Default)]
797 pub struct AcpiPlatformAddresses {
798     pub pm_timer_address: Option<GenericAddress>,
799     pub reset_reg_address: Option<GenericAddress>,
800     pub sleep_control_reg_address: Option<GenericAddress>,
801     pub sleep_status_reg_address: Option<GenericAddress>,
802 }
803 
804 pub struct DeviceManager {
805     // The underlying hypervisor
806     hypervisor_type: HypervisorType,
807 
808     // Manage address space related to devices
809     address_manager: Arc<AddressManager>,
810 
811     // Console abstraction
812     console: Arc<Console>,
813 
814     // console PTY
815     console_pty: Option<Arc<Mutex<PtyPair>>>,
816 
817     // serial PTY
818     serial_pty: Option<Arc<Mutex<PtyPair>>>,
819 
820     // Serial Manager
821     serial_manager: Option<Arc<SerialManager>>,
822 
823     // pty foreground status,
824     console_resize_pipe: Option<Arc<File>>,
825 
826     // Interrupt controller
827     #[cfg(target_arch = "x86_64")]
828     interrupt_controller: Option<Arc<Mutex<ioapic::Ioapic>>>,
829     #[cfg(target_arch = "aarch64")]
830     interrupt_controller: Option<Arc<Mutex<gic::Gic>>>,
831 
832     // Things to be added to the commandline (e.g. aarch64 early console)
833     #[cfg(target_arch = "aarch64")]
834     cmdline_additions: Vec<String>,
835 
836     // ACPI GED notification device
837     ged_notification_device: Option<Arc<Mutex<devices::AcpiGedDevice>>>,
838 
839     // VM configuration
840     config: Arc<Mutex<VmConfig>>,
841 
842     // Memory Manager
843     memory_manager: Arc<Mutex<MemoryManager>>,
844 
845     // CPU Manager
846     cpu_manager: Arc<Mutex<CpuManager>>,
847 
848     // The virtio devices on the system
849     virtio_devices: Vec<MetaVirtioDevice>,
850 
851     // List of bus devices
852     // Let the DeviceManager keep strong references to the BusDevice devices.
853     // This allows the IO and MMIO buses to be provided with Weak references,
854     // which prevents cyclic dependencies.
855     bus_devices: Vec<Arc<Mutex<dyn BusDevice>>>,
856 
857     // Counter to keep track of the consumed device IDs.
858     device_id_cnt: Wrapping<usize>,
859 
860     pci_segments: Vec<PciSegment>,
861 
862     #[cfg_attr(target_arch = "aarch64", allow(dead_code))]
863     // MSI Interrupt Manager
864     msi_interrupt_manager: Arc<dyn InterruptManager<GroupConfig = MsiIrqGroupConfig>>,
865 
866     #[cfg_attr(feature = "mshv", allow(dead_code))]
867     // Legacy Interrupt Manager
868     legacy_interrupt_manager: Option<Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>>,
869 
870     // Passthrough device handle
871     passthrough_device: Option<VfioDeviceFd>,
872 
873     // VFIO container
874     // Only one container can be created, therefore it is stored as part of the
875     // DeviceManager to be reused.
876     vfio_container: Option<Arc<VfioContainer>>,
877 
878     // Paravirtualized IOMMU
879     iommu_device: Option<Arc<Mutex<virtio_devices::Iommu>>>,
880     iommu_mapping: Option<Arc<IommuMapping>>,
881 
882     // PCI information about devices attached to the paravirtualized IOMMU
883     // It contains the virtual IOMMU PCI BDF along with the list of PCI BDF
884     // representing the devices attached to the virtual IOMMU. This is useful
885     // information for filling the ACPI VIOT table.
886     iommu_attached_devices: Option<(PciBdf, Vec<PciBdf>)>,
887 
888     // Tree of devices, representing the dependencies between devices.
889     // Useful for introspection, snapshot and restore.
890     device_tree: Arc<Mutex<DeviceTree>>,
891 
892     // Exit event
893     exit_evt: EventFd,
894     reset_evt: EventFd,
895 
896     #[cfg(target_arch = "aarch64")]
897     id_to_dev_info: HashMap<(DeviceType, String), MmioDeviceInfo>,
898 
899     // seccomp action
900     seccomp_action: SeccompAction,
901 
902     // List of guest NUMA nodes.
903     numa_nodes: NumaNodes,
904 
905     // Possible handle to the virtio-balloon device
906     balloon: Option<Arc<Mutex<virtio_devices::Balloon>>>,
907 
908     // Virtio Device activation EventFd to allow the VMM thread to trigger device
909     // activation and thus start the threads from the VMM thread
910     activate_evt: EventFd,
911 
912     acpi_address: GuestAddress,
913 
914     selected_segment: usize,
915 
916     // Possible handle to the virtio-mem device
917     virtio_mem_devices: Vec<Arc<Mutex<virtio_devices::Mem>>>,
918 
919     #[cfg(target_arch = "aarch64")]
920     // GPIO device for AArch64
921     gpio_device: Option<Arc<Mutex<devices::legacy::Gpio>>>,
922 
923     // Flag to force setting the iommu on virtio devices
924     force_iommu: bool,
925 
926     // io_uring availability if detected
927     io_uring_supported: Option<bool>,
928 
929     // List of unique identifiers provided at boot through the configuration.
930     boot_id_list: BTreeSet<String>,
931 
932     // Start time of the VM
933     timestamp: Instant,
934 
935     // Pending activations
936     pending_activations: Arc<Mutex<Vec<VirtioPciDeviceActivator>>>,
937 
938     // Addresses for ACPI platform devices e.g. ACPI PM timer, sleep/reset registers
939     acpi_platform_addresses: AcpiPlatformAddresses,
940 
941     snapshot: Option<Snapshot>,
942 }
943 
944 impl DeviceManager {
945     #[allow(clippy::too_many_arguments)]
946     pub fn new(
947         #[cfg(target_arch = "x86_64")] io_bus: Arc<Bus>,
948         mmio_bus: Arc<Bus>,
949         hypervisor_type: HypervisorType,
950         vm: Arc<dyn hypervisor::Vm>,
951         config: Arc<Mutex<VmConfig>>,
952         memory_manager: Arc<Mutex<MemoryManager>>,
953         cpu_manager: Arc<Mutex<CpuManager>>,
954         exit_evt: EventFd,
955         reset_evt: EventFd,
956         seccomp_action: SeccompAction,
957         numa_nodes: NumaNodes,
958         activate_evt: &EventFd,
959         force_iommu: bool,
960         boot_id_list: BTreeSet<String>,
961         timestamp: Instant,
962         snapshot: Option<Snapshot>,
963         dynamic: bool,
964     ) -> DeviceManagerResult<Arc<Mutex<Self>>> {
965         trace_scoped!("DeviceManager::new");
966 
967         let (device_tree, device_id_cnt) = if let Some(snapshot) = snapshot.as_ref() {
968             let state: DeviceManagerState = snapshot.to_state().unwrap();
969             (
970                 Arc::new(Mutex::new(state.device_tree.clone())),
971                 state.device_id_cnt,
972             )
973         } else {
974             (Arc::new(Mutex::new(DeviceTree::new())), Wrapping(0))
975         };
976 
977         let num_pci_segments =
978             if let Some(platform_config) = config.lock().unwrap().platform.as_ref() {
979                 platform_config.num_pci_segments
980             } else {
981                 1
982             };
983 
984         let start_of_device_area = memory_manager.lock().unwrap().start_of_device_area().0;
985         let end_of_device_area = memory_manager.lock().unwrap().end_of_device_area().0;
986 
987         // Start each PCI segment range on a 4GiB boundary
988         let pci_segment_size = (end_of_device_area - start_of_device_area + 1)
989             / ((4 << 30) * num_pci_segments as u64)
990             * (4 << 30);
991 
992         let mut pci_mmio_allocators = vec![];
993         for i in 0..num_pci_segments as u64 {
994             let mmio_start = start_of_device_area + i * pci_segment_size;
995             let allocator = Arc::new(Mutex::new(
996                 AddressAllocator::new(GuestAddress(mmio_start), pci_segment_size).unwrap(),
997             ));
998             pci_mmio_allocators.push(allocator)
999         }
1000 
1001         let address_manager = Arc::new(AddressManager {
1002             allocator: memory_manager.lock().unwrap().allocator(),
1003             #[cfg(target_arch = "x86_64")]
1004             io_bus,
1005             mmio_bus,
1006             vm: vm.clone(),
1007             device_tree: Arc::clone(&device_tree),
1008             pci_mmio_allocators,
1009         });
1010 
1011         // First we create the MSI interrupt manager, the legacy one is created
1012         // later, after the IOAPIC device creation.
1013         // The reason we create the MSI one first is because the IOAPIC needs it,
1014         // and then the legacy interrupt manager needs an IOAPIC. So we're
1015         // handling a linear dependency chain:
1016         // msi_interrupt_manager <- IOAPIC <- legacy_interrupt_manager.
1017         let msi_interrupt_manager: Arc<dyn InterruptManager<GroupConfig = MsiIrqGroupConfig>> =
1018             Arc::new(MsiInterruptManager::new(
1019                 Arc::clone(&address_manager.allocator),
1020                 vm,
1021             ));
1022 
1023         let acpi_address = address_manager
1024             .allocator
1025             .lock()
1026             .unwrap()
1027             .allocate_platform_mmio_addresses(None, DEVICE_MANAGER_ACPI_SIZE as u64, None)
1028             .ok_or(DeviceManagerError::AllocateIoPort)?;
1029 
1030         let mut pci_irq_slots = [0; 32];
1031         PciSegment::reserve_legacy_interrupts_for_pci_devices(
1032             &address_manager,
1033             &mut pci_irq_slots,
1034         )?;
1035 
1036         let mut pci_segments = vec![PciSegment::new_default_segment(
1037             &address_manager,
1038             Arc::clone(&address_manager.pci_mmio_allocators[0]),
1039             &pci_irq_slots,
1040         )?];
1041 
1042         for i in 1..num_pci_segments as usize {
1043             pci_segments.push(PciSegment::new(
1044                 i as u16,
1045                 &address_manager,
1046                 Arc::clone(&address_manager.pci_mmio_allocators[i]),
1047                 &pci_irq_slots,
1048             )?);
1049         }
1050 
1051         if dynamic {
1052             let acpi_address = address_manager
1053                 .allocator
1054                 .lock()
1055                 .unwrap()
1056                 .allocate_platform_mmio_addresses(None, CPU_MANAGER_ACPI_SIZE as u64, None)
1057                 .ok_or(DeviceManagerError::AllocateMmioAddress)?;
1058 
1059             address_manager
1060                 .mmio_bus
1061                 .insert(
1062                     cpu_manager.clone(),
1063                     acpi_address.0,
1064                     CPU_MANAGER_ACPI_SIZE as u64,
1065                 )
1066                 .map_err(DeviceManagerError::BusError)?;
1067 
1068             cpu_manager.lock().unwrap().set_acpi_address(acpi_address);
1069         }
1070 
1071         let device_manager = DeviceManager {
1072             hypervisor_type,
1073             address_manager: Arc::clone(&address_manager),
1074             console: Arc::new(Console::default()),
1075             interrupt_controller: None,
1076             #[cfg(target_arch = "aarch64")]
1077             cmdline_additions: Vec::new(),
1078             ged_notification_device: None,
1079             config,
1080             memory_manager,
1081             cpu_manager,
1082             virtio_devices: Vec::new(),
1083             bus_devices: Vec::new(),
1084             device_id_cnt,
1085             msi_interrupt_manager,
1086             legacy_interrupt_manager: None,
1087             passthrough_device: None,
1088             vfio_container: None,
1089             iommu_device: None,
1090             iommu_mapping: None,
1091             iommu_attached_devices: None,
1092             pci_segments,
1093             device_tree,
1094             exit_evt,
1095             reset_evt,
1096             #[cfg(target_arch = "aarch64")]
1097             id_to_dev_info: HashMap::new(),
1098             seccomp_action,
1099             numa_nodes,
1100             balloon: None,
1101             activate_evt: activate_evt
1102                 .try_clone()
1103                 .map_err(DeviceManagerError::EventFd)?,
1104             acpi_address,
1105             selected_segment: 0,
1106             serial_pty: None,
1107             serial_manager: None,
1108             console_pty: None,
1109             console_resize_pipe: None,
1110             virtio_mem_devices: Vec::new(),
1111             #[cfg(target_arch = "aarch64")]
1112             gpio_device: None,
1113             force_iommu,
1114             io_uring_supported: None,
1115             boot_id_list,
1116             timestamp,
1117             pending_activations: Arc::new(Mutex::new(Vec::default())),
1118             acpi_platform_addresses: AcpiPlatformAddresses::default(),
1119             snapshot,
1120         };
1121 
1122         let device_manager = Arc::new(Mutex::new(device_manager));
1123 
1124         address_manager
1125             .mmio_bus
1126             .insert(
1127                 Arc::clone(&device_manager) as Arc<Mutex<dyn BusDevice>>,
1128                 acpi_address.0,
1129                 DEVICE_MANAGER_ACPI_SIZE as u64,
1130             )
1131             .map_err(DeviceManagerError::BusError)?;
1132 
1133         Ok(device_manager)
1134     }
1135 
1136     pub fn serial_pty(&self) -> Option<PtyPair> {
1137         self.serial_pty
1138             .as_ref()
1139             .map(|pty| pty.lock().unwrap().clone())
1140     }
1141 
1142     pub fn console_pty(&self) -> Option<PtyPair> {
1143         self.console_pty
1144             .as_ref()
1145             .map(|pty| pty.lock().unwrap().clone())
1146     }
1147 
1148     pub fn console_resize_pipe(&self) -> Option<Arc<File>> {
1149         self.console_resize_pipe.as_ref().map(Arc::clone)
1150     }
1151 
1152     pub fn create_devices(
1153         &mut self,
1154         serial_pty: Option<PtyPair>,
1155         console_pty: Option<PtyPair>,
1156         console_resize_pipe: Option<File>,
1157     ) -> DeviceManagerResult<()> {
1158         trace_scoped!("create_devices");
1159 
1160         let mut virtio_devices: Vec<MetaVirtioDevice> = Vec::new();
1161 
1162         let interrupt_controller = self.add_interrupt_controller()?;
1163 
1164         self.cpu_manager
1165             .lock()
1166             .unwrap()
1167             .set_interrupt_controller(interrupt_controller.clone());
1168 
1169         // Now we can create the legacy interrupt manager, which needs the freshly
1170         // formed IOAPIC device.
1171         let legacy_interrupt_manager: Arc<
1172             dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>,
1173         > = Arc::new(LegacyUserspaceInterruptManager::new(Arc::clone(
1174             &interrupt_controller,
1175         )));
1176 
1177         {
1178             if let Some(acpi_address) = self.memory_manager.lock().unwrap().acpi_address() {
1179                 self.address_manager
1180                     .mmio_bus
1181                     .insert(
1182                         Arc::clone(&self.memory_manager) as Arc<Mutex<dyn BusDevice>>,
1183                         acpi_address.0,
1184                         MEMORY_MANAGER_ACPI_SIZE as u64,
1185                     )
1186                     .map_err(DeviceManagerError::BusError)?;
1187             }
1188         }
1189 
1190         #[cfg(target_arch = "x86_64")]
1191         self.add_legacy_devices(
1192             self.reset_evt
1193                 .try_clone()
1194                 .map_err(DeviceManagerError::EventFd)?,
1195         )?;
1196 
1197         #[cfg(target_arch = "aarch64")]
1198         self.add_legacy_devices(&legacy_interrupt_manager)?;
1199 
1200         {
1201             self.ged_notification_device = self.add_acpi_devices(
1202                 &legacy_interrupt_manager,
1203                 self.reset_evt
1204                     .try_clone()
1205                     .map_err(DeviceManagerError::EventFd)?,
1206                 self.exit_evt
1207                     .try_clone()
1208                     .map_err(DeviceManagerError::EventFd)?,
1209             )?;
1210         }
1211 
1212         self.console = self.add_console_device(
1213             &legacy_interrupt_manager,
1214             &mut virtio_devices,
1215             serial_pty,
1216             console_pty,
1217             console_resize_pipe,
1218         )?;
1219 
1220         if let Some(tpm) = self.config.clone().lock().unwrap().tpm.as_ref() {
1221             let tpm_dev = self.add_tpm_device(tpm.socket.clone())?;
1222             self.bus_devices
1223                 .push(Arc::clone(&tpm_dev) as Arc<Mutex<dyn BusDevice>>)
1224         }
1225         self.legacy_interrupt_manager = Some(legacy_interrupt_manager);
1226 
1227         virtio_devices.append(&mut self.make_virtio_devices()?);
1228 
1229         self.add_pci_devices(virtio_devices.clone())?;
1230 
1231         self.virtio_devices = virtio_devices;
1232 
1233         Ok(())
1234     }
1235 
1236     fn state(&self) -> DeviceManagerState {
1237         DeviceManagerState {
1238             device_tree: self.device_tree.lock().unwrap().clone(),
1239             device_id_cnt: self.device_id_cnt,
1240         }
1241     }
1242 
1243     fn get_msi_iova_space(&mut self) -> (u64, u64) {
1244         #[cfg(target_arch = "aarch64")]
1245         {
1246             let vcpus = self.config.lock().unwrap().cpus.boot_vcpus;
1247             let vgic_config = gic::Gic::create_default_config(vcpus.into());
1248             (
1249                 vgic_config.msi_addr,
1250                 vgic_config.msi_addr + vgic_config.msi_size - 1,
1251             )
1252         }
1253         #[cfg(target_arch = "x86_64")]
1254         (0xfee0_0000, 0xfeef_ffff)
1255     }
1256 
1257     #[cfg(target_arch = "aarch64")]
1258     /// Gets the information of the devices registered up to some point in time.
1259     pub fn get_device_info(&self) -> &HashMap<(DeviceType, String), MmioDeviceInfo> {
1260         &self.id_to_dev_info
1261     }
1262 
1263     #[allow(unused_variables)]
1264     fn add_pci_devices(
1265         &mut self,
1266         virtio_devices: Vec<MetaVirtioDevice>,
1267     ) -> DeviceManagerResult<()> {
1268         let iommu_id = String::from(IOMMU_DEVICE_NAME);
1269 
1270         let iommu_device = if self.config.lock().unwrap().iommu {
1271             let (device, mapping) = virtio_devices::Iommu::new(
1272                 iommu_id.clone(),
1273                 self.seccomp_action.clone(),
1274                 self.exit_evt
1275                     .try_clone()
1276                     .map_err(DeviceManagerError::EventFd)?,
1277                 self.get_msi_iova_space(),
1278                 versioned_state_from_id(self.snapshot.as_ref(), iommu_id.as_str())
1279                     .map_err(DeviceManagerError::RestoreGetState)?,
1280             )
1281             .map_err(DeviceManagerError::CreateVirtioIommu)?;
1282             let device = Arc::new(Mutex::new(device));
1283             self.iommu_device = Some(Arc::clone(&device));
1284             self.iommu_mapping = Some(mapping);
1285 
1286             // Fill the device tree with a new node. In case of restore, we
1287             // know there is nothing to do, so we can simply override the
1288             // existing entry.
1289             self.device_tree
1290                 .lock()
1291                 .unwrap()
1292                 .insert(iommu_id.clone(), device_node!(iommu_id, device));
1293 
1294             Some(device)
1295         } else {
1296             None
1297         };
1298 
1299         let mut iommu_attached_devices = Vec::new();
1300         {
1301             for handle in virtio_devices {
1302                 let mapping: Option<Arc<IommuMapping>> = if handle.iommu {
1303                     self.iommu_mapping.clone()
1304                 } else {
1305                     None
1306                 };
1307 
1308                 let dev_id = self.add_virtio_pci_device(
1309                     handle.virtio_device,
1310                     &mapping,
1311                     handle.id,
1312                     handle.pci_segment,
1313                     handle.dma_handler,
1314                 )?;
1315 
1316                 if handle.iommu {
1317                     iommu_attached_devices.push(dev_id);
1318                 }
1319             }
1320 
1321             let mut vfio_iommu_device_ids = self.add_vfio_devices()?;
1322             iommu_attached_devices.append(&mut vfio_iommu_device_ids);
1323 
1324             let mut vfio_user_iommu_device_ids = self.add_user_devices()?;
1325             iommu_attached_devices.append(&mut vfio_user_iommu_device_ids);
1326 
1327             // Add all devices from forced iommu segments
1328             if let Some(platform_config) = self.config.lock().unwrap().platform.as_ref() {
1329                 if let Some(iommu_segments) = platform_config.iommu_segments.as_ref() {
1330                     for segment in iommu_segments {
1331                         for device in 0..32 {
1332                             let bdf = PciBdf::new(*segment, 0, device, 0);
1333                             if !iommu_attached_devices.contains(&bdf) {
1334                                 iommu_attached_devices.push(bdf);
1335                             }
1336                         }
1337                     }
1338                 }
1339             }
1340 
1341             if let Some(iommu_device) = iommu_device {
1342                 let dev_id = self.add_virtio_pci_device(iommu_device, &None, iommu_id, 0, None)?;
1343                 self.iommu_attached_devices = Some((dev_id, iommu_attached_devices));
1344             }
1345         }
1346 
1347         for segment in &self.pci_segments {
1348             #[cfg(target_arch = "x86_64")]
1349             if let Some(pci_config_io) = segment.pci_config_io.as_ref() {
1350                 self.bus_devices
1351                     .push(Arc::clone(pci_config_io) as Arc<Mutex<dyn BusDevice>>);
1352             }
1353 
1354             self.bus_devices
1355                 .push(Arc::clone(&segment.pci_config_mmio) as Arc<Mutex<dyn BusDevice>>);
1356         }
1357 
1358         Ok(())
1359     }
1360 
1361     #[cfg(target_arch = "aarch64")]
1362     fn add_interrupt_controller(
1363         &mut self,
1364     ) -> DeviceManagerResult<Arc<Mutex<dyn InterruptController>>> {
1365         let interrupt_controller: Arc<Mutex<gic::Gic>> = Arc::new(Mutex::new(
1366             gic::Gic::new(
1367                 self.config.lock().unwrap().cpus.boot_vcpus,
1368                 Arc::clone(&self.msi_interrupt_manager),
1369                 self.address_manager.vm.clone(),
1370             )
1371             .map_err(DeviceManagerError::CreateInterruptController)?,
1372         ));
1373 
1374         self.interrupt_controller = Some(interrupt_controller.clone());
1375 
1376         // Restore the vGic if this is in the process of restoration
1377         let id = String::from(gic::GIC_SNAPSHOT_ID);
1378         if let Some(vgic_snapshot) = snapshot_from_id(self.snapshot.as_ref(), &id) {
1379             // PMU support is optional. Nothing should be impacted if the PMU initialization failed.
1380             if self
1381                 .cpu_manager
1382                 .lock()
1383                 .unwrap()
1384                 .init_pmu(arch::aarch64::fdt::AARCH64_PMU_IRQ + 16)
1385                 .is_err()
1386             {
1387                 info!("Failed to initialize PMU");
1388             }
1389 
1390             let vgic_state = vgic_snapshot
1391                 .to_state()
1392                 .map_err(DeviceManagerError::RestoreGetState)?;
1393             let saved_vcpu_states = self.cpu_manager.lock().unwrap().get_saved_states();
1394             interrupt_controller
1395                 .lock()
1396                 .unwrap()
1397                 .restore_vgic(vgic_state, &saved_vcpu_states)
1398                 .unwrap();
1399         }
1400 
1401         self.device_tree
1402             .lock()
1403             .unwrap()
1404             .insert(id.clone(), device_node!(id, interrupt_controller));
1405 
1406         Ok(interrupt_controller)
1407     }
1408 
1409     #[cfg(target_arch = "aarch64")]
1410     pub fn get_interrupt_controller(&mut self) -> Option<&Arc<Mutex<gic::Gic>>> {
1411         self.interrupt_controller.as_ref()
1412     }
1413 
1414     #[cfg(target_arch = "x86_64")]
1415     fn add_interrupt_controller(
1416         &mut self,
1417     ) -> DeviceManagerResult<Arc<Mutex<dyn InterruptController>>> {
1418         let id = String::from(IOAPIC_DEVICE_NAME);
1419 
1420         // Create IOAPIC
1421         let interrupt_controller = Arc::new(Mutex::new(
1422             ioapic::Ioapic::new(
1423                 id.clone(),
1424                 APIC_START,
1425                 Arc::clone(&self.msi_interrupt_manager),
1426                 versioned_state_from_id(self.snapshot.as_ref(), id.as_str())
1427                     .map_err(DeviceManagerError::RestoreGetState)?,
1428             )
1429             .map_err(DeviceManagerError::CreateInterruptController)?,
1430         ));
1431 
1432         self.interrupt_controller = Some(interrupt_controller.clone());
1433 
1434         self.address_manager
1435             .mmio_bus
1436             .insert(interrupt_controller.clone(), IOAPIC_START.0, IOAPIC_SIZE)
1437             .map_err(DeviceManagerError::BusError)?;
1438 
1439         self.bus_devices
1440             .push(Arc::clone(&interrupt_controller) as Arc<Mutex<dyn BusDevice>>);
1441 
1442         // Fill the device tree with a new node. In case of restore, we
1443         // know there is nothing to do, so we can simply override the
1444         // existing entry.
1445         self.device_tree
1446             .lock()
1447             .unwrap()
1448             .insert(id.clone(), device_node!(id, interrupt_controller));
1449 
1450         Ok(interrupt_controller)
1451     }
1452 
1453     fn add_acpi_devices(
1454         &mut self,
1455         interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>,
1456         reset_evt: EventFd,
1457         exit_evt: EventFd,
1458     ) -> DeviceManagerResult<Option<Arc<Mutex<devices::AcpiGedDevice>>>> {
1459         let shutdown_device = Arc::new(Mutex::new(devices::AcpiShutdownDevice::new(
1460             exit_evt, reset_evt,
1461         )));
1462 
1463         self.bus_devices
1464             .push(Arc::clone(&shutdown_device) as Arc<Mutex<dyn BusDevice>>);
1465 
1466         #[cfg(target_arch = "x86_64")]
1467         {
1468             let shutdown_pio_address: u16 = 0x600;
1469 
1470             self.address_manager
1471                 .allocator
1472                 .lock()
1473                 .unwrap()
1474                 .allocate_io_addresses(Some(GuestAddress(shutdown_pio_address.into())), 0x8, None)
1475                 .ok_or(DeviceManagerError::AllocateIoPort)?;
1476 
1477             self.address_manager
1478                 .io_bus
1479                 .insert(shutdown_device, shutdown_pio_address.into(), 0x4)
1480                 .map_err(DeviceManagerError::BusError)?;
1481 
1482             self.acpi_platform_addresses.sleep_control_reg_address =
1483                 Some(GenericAddress::io_port_address::<u8>(shutdown_pio_address));
1484             self.acpi_platform_addresses.sleep_status_reg_address =
1485                 Some(GenericAddress::io_port_address::<u8>(shutdown_pio_address));
1486             self.acpi_platform_addresses.reset_reg_address =
1487                 Some(GenericAddress::io_port_address::<u8>(shutdown_pio_address));
1488         }
1489 
1490         let ged_irq = self
1491             .address_manager
1492             .allocator
1493             .lock()
1494             .unwrap()
1495             .allocate_irq()
1496             .unwrap();
1497         let interrupt_group = interrupt_manager
1498             .create_group(LegacyIrqGroupConfig {
1499                 irq: ged_irq as InterruptIndex,
1500             })
1501             .map_err(DeviceManagerError::CreateInterruptGroup)?;
1502         let ged_address = self
1503             .address_manager
1504             .allocator
1505             .lock()
1506             .unwrap()
1507             .allocate_platform_mmio_addresses(
1508                 None,
1509                 devices::acpi::GED_DEVICE_ACPI_SIZE as u64,
1510                 None,
1511             )
1512             .ok_or(DeviceManagerError::AllocateMmioAddress)?;
1513         let ged_device = Arc::new(Mutex::new(devices::AcpiGedDevice::new(
1514             interrupt_group,
1515             ged_irq,
1516             ged_address,
1517         )));
1518         self.address_manager
1519             .mmio_bus
1520             .insert(
1521                 ged_device.clone(),
1522                 ged_address.0,
1523                 devices::acpi::GED_DEVICE_ACPI_SIZE as u64,
1524             )
1525             .map_err(DeviceManagerError::BusError)?;
1526         self.bus_devices
1527             .push(Arc::clone(&ged_device) as Arc<Mutex<dyn BusDevice>>);
1528 
1529         let pm_timer_device = Arc::new(Mutex::new(devices::AcpiPmTimerDevice::new()));
1530 
1531         self.bus_devices
1532             .push(Arc::clone(&pm_timer_device) as Arc<Mutex<dyn BusDevice>>);
1533 
1534         #[cfg(target_arch = "x86_64")]
1535         {
1536             let pm_timer_pio_address: u16 = 0x608;
1537 
1538             self.address_manager
1539                 .allocator
1540                 .lock()
1541                 .unwrap()
1542                 .allocate_io_addresses(Some(GuestAddress(pm_timer_pio_address.into())), 0x4, None)
1543                 .ok_or(DeviceManagerError::AllocateIoPort)?;
1544 
1545             self.address_manager
1546                 .io_bus
1547                 .insert(pm_timer_device, pm_timer_pio_address.into(), 0x4)
1548                 .map_err(DeviceManagerError::BusError)?;
1549 
1550             self.acpi_platform_addresses.pm_timer_address =
1551                 Some(GenericAddress::io_port_address::<u32>(pm_timer_pio_address));
1552         }
1553 
1554         Ok(Some(ged_device))
1555     }
1556 
1557     #[cfg(target_arch = "x86_64")]
1558     fn add_legacy_devices(&mut self, reset_evt: EventFd) -> DeviceManagerResult<()> {
1559         // Add a shutdown device (i8042)
1560         let i8042 = Arc::new(Mutex::new(devices::legacy::I8042Device::new(
1561             reset_evt.try_clone().unwrap(),
1562         )));
1563 
1564         self.bus_devices
1565             .push(Arc::clone(&i8042) as Arc<Mutex<dyn BusDevice>>);
1566 
1567         self.address_manager
1568             .io_bus
1569             .insert(i8042, 0x61, 0x4)
1570             .map_err(DeviceManagerError::BusError)?;
1571         {
1572             // Add a CMOS emulated device
1573             let mem_size = self
1574                 .memory_manager
1575                 .lock()
1576                 .unwrap()
1577                 .guest_memory()
1578                 .memory()
1579                 .last_addr()
1580                 .0
1581                 + 1;
1582             let mem_below_4g = std::cmp::min(arch::layout::MEM_32BIT_RESERVED_START.0, mem_size);
1583             let mem_above_4g = mem_size.saturating_sub(arch::layout::RAM_64BIT_START.0);
1584 
1585             let cmos = Arc::new(Mutex::new(devices::legacy::Cmos::new(
1586                 mem_below_4g,
1587                 mem_above_4g,
1588                 reset_evt,
1589             )));
1590 
1591             self.bus_devices
1592                 .push(Arc::clone(&cmos) as Arc<Mutex<dyn BusDevice>>);
1593 
1594             self.address_manager
1595                 .io_bus
1596                 .insert(cmos, 0x70, 0x2)
1597                 .map_err(DeviceManagerError::BusError)?;
1598 
1599             let fwdebug = Arc::new(Mutex::new(devices::legacy::FwDebugDevice::new()));
1600 
1601             self.bus_devices
1602                 .push(Arc::clone(&fwdebug) as Arc<Mutex<dyn BusDevice>>);
1603 
1604             self.address_manager
1605                 .io_bus
1606                 .insert(fwdebug, 0x402, 0x1)
1607                 .map_err(DeviceManagerError::BusError)?;
1608         }
1609 
1610         // 0x80 debug port
1611         let debug_port = Arc::new(Mutex::new(devices::legacy::DebugPort::new(self.timestamp)));
1612         self.bus_devices
1613             .push(Arc::clone(&debug_port) as Arc<Mutex<dyn BusDevice>>);
1614         self.address_manager
1615             .io_bus
1616             .insert(debug_port, 0x80, 0x1)
1617             .map_err(DeviceManagerError::BusError)?;
1618 
1619         Ok(())
1620     }
1621 
1622     #[cfg(target_arch = "aarch64")]
1623     fn add_legacy_devices(
1624         &mut self,
1625         interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>,
1626     ) -> DeviceManagerResult<()> {
1627         // Add a RTC device
1628         let rtc_irq = self
1629             .address_manager
1630             .allocator
1631             .lock()
1632             .unwrap()
1633             .allocate_irq()
1634             .unwrap();
1635 
1636         let interrupt_group = interrupt_manager
1637             .create_group(LegacyIrqGroupConfig {
1638                 irq: rtc_irq as InterruptIndex,
1639             })
1640             .map_err(DeviceManagerError::CreateInterruptGroup)?;
1641 
1642         let rtc_device = Arc::new(Mutex::new(devices::legacy::Rtc::new(interrupt_group)));
1643 
1644         self.bus_devices
1645             .push(Arc::clone(&rtc_device) as Arc<Mutex<dyn BusDevice>>);
1646 
1647         let addr = arch::layout::LEGACY_RTC_MAPPED_IO_START;
1648 
1649         self.address_manager
1650             .mmio_bus
1651             .insert(rtc_device, addr.0, MMIO_LEN)
1652             .map_err(DeviceManagerError::BusError)?;
1653 
1654         self.id_to_dev_info.insert(
1655             (DeviceType::Rtc, "rtc".to_string()),
1656             MmioDeviceInfo {
1657                 addr: addr.0,
1658                 len: MMIO_LEN,
1659                 irq: rtc_irq,
1660             },
1661         );
1662 
1663         // Add a GPIO device
1664         let id = String::from(GPIO_DEVICE_NAME);
1665         let gpio_irq = self
1666             .address_manager
1667             .allocator
1668             .lock()
1669             .unwrap()
1670             .allocate_irq()
1671             .unwrap();
1672 
1673         let interrupt_group = interrupt_manager
1674             .create_group(LegacyIrqGroupConfig {
1675                 irq: gpio_irq as InterruptIndex,
1676             })
1677             .map_err(DeviceManagerError::CreateInterruptGroup)?;
1678 
1679         let gpio_device = Arc::new(Mutex::new(devices::legacy::Gpio::new(
1680             id.clone(),
1681             interrupt_group,
1682             versioned_state_from_id(self.snapshot.as_ref(), id.as_str())
1683                 .map_err(DeviceManagerError::RestoreGetState)?,
1684         )));
1685 
1686         self.bus_devices
1687             .push(Arc::clone(&gpio_device) as Arc<Mutex<dyn BusDevice>>);
1688 
1689         let addr = arch::layout::LEGACY_GPIO_MAPPED_IO_START;
1690 
1691         self.address_manager
1692             .mmio_bus
1693             .insert(gpio_device.clone(), addr.0, MMIO_LEN)
1694             .map_err(DeviceManagerError::BusError)?;
1695 
1696         self.gpio_device = Some(gpio_device.clone());
1697 
1698         self.id_to_dev_info.insert(
1699             (DeviceType::Gpio, "gpio".to_string()),
1700             MmioDeviceInfo {
1701                 addr: addr.0,
1702                 len: MMIO_LEN,
1703                 irq: gpio_irq,
1704             },
1705         );
1706 
1707         self.device_tree
1708             .lock()
1709             .unwrap()
1710             .insert(id.clone(), device_node!(id, gpio_device));
1711 
1712         Ok(())
1713     }
1714 
1715     #[cfg(target_arch = "x86_64")]
1716     fn add_serial_device(
1717         &mut self,
1718         interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>,
1719         serial_writer: Option<Box<dyn io::Write + Send>>,
1720     ) -> DeviceManagerResult<Arc<Mutex<Serial>>> {
1721         // Serial is tied to IRQ #4
1722         let serial_irq = 4;
1723 
1724         let id = String::from(SERIAL_DEVICE_NAME);
1725 
1726         let interrupt_group = interrupt_manager
1727             .create_group(LegacyIrqGroupConfig {
1728                 irq: serial_irq as InterruptIndex,
1729             })
1730             .map_err(DeviceManagerError::CreateInterruptGroup)?;
1731 
1732         let serial = Arc::new(Mutex::new(Serial::new(
1733             id.clone(),
1734             interrupt_group,
1735             serial_writer,
1736             versioned_state_from_id(self.snapshot.as_ref(), id.as_str())
1737                 .map_err(DeviceManagerError::RestoreGetState)?,
1738         )));
1739 
1740         self.bus_devices
1741             .push(Arc::clone(&serial) as Arc<Mutex<dyn BusDevice>>);
1742 
1743         self.address_manager
1744             .allocator
1745             .lock()
1746             .unwrap()
1747             .allocate_io_addresses(Some(GuestAddress(0x3f8)), 0x8, None)
1748             .ok_or(DeviceManagerError::AllocateIoPort)?;
1749 
1750         self.address_manager
1751             .io_bus
1752             .insert(serial.clone(), 0x3f8, 0x8)
1753             .map_err(DeviceManagerError::BusError)?;
1754 
1755         // Fill the device tree with a new node. In case of restore, we
1756         // know there is nothing to do, so we can simply override the
1757         // existing entry.
1758         self.device_tree
1759             .lock()
1760             .unwrap()
1761             .insert(id.clone(), device_node!(id, serial));
1762 
1763         Ok(serial)
1764     }
1765 
1766     #[cfg(target_arch = "aarch64")]
1767     fn add_serial_device(
1768         &mut self,
1769         interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>,
1770         serial_writer: Option<Box<dyn io::Write + Send>>,
1771     ) -> DeviceManagerResult<Arc<Mutex<Pl011>>> {
1772         let id = String::from(SERIAL_DEVICE_NAME);
1773 
1774         let serial_irq = self
1775             .address_manager
1776             .allocator
1777             .lock()
1778             .unwrap()
1779             .allocate_irq()
1780             .unwrap();
1781 
1782         let interrupt_group = interrupt_manager
1783             .create_group(LegacyIrqGroupConfig {
1784                 irq: serial_irq as InterruptIndex,
1785             })
1786             .map_err(DeviceManagerError::CreateInterruptGroup)?;
1787 
1788         let serial = Arc::new(Mutex::new(devices::legacy::Pl011::new(
1789             id.clone(),
1790             interrupt_group,
1791             serial_writer,
1792             self.timestamp,
1793             versioned_state_from_id(self.snapshot.as_ref(), id.as_str())
1794                 .map_err(DeviceManagerError::RestoreGetState)?,
1795         )));
1796 
1797         self.bus_devices
1798             .push(Arc::clone(&serial) as Arc<Mutex<dyn BusDevice>>);
1799 
1800         let addr = arch::layout::LEGACY_SERIAL_MAPPED_IO_START;
1801 
1802         self.address_manager
1803             .mmio_bus
1804             .insert(serial.clone(), addr.0, MMIO_LEN)
1805             .map_err(DeviceManagerError::BusError)?;
1806 
1807         self.id_to_dev_info.insert(
1808             (DeviceType::Serial, DeviceType::Serial.to_string()),
1809             MmioDeviceInfo {
1810                 addr: addr.0,
1811                 len: MMIO_LEN,
1812                 irq: serial_irq,
1813             },
1814         );
1815 
1816         self.cmdline_additions
1817             .push(format!("earlycon=pl011,mmio,0x{:08x}", addr.0));
1818 
1819         // Fill the device tree with a new node. In case of restore, we
1820         // know there is nothing to do, so we can simply override the
1821         // existing entry.
1822         self.device_tree
1823             .lock()
1824             .unwrap()
1825             .insert(id.clone(), device_node!(id, serial));
1826 
1827         Ok(serial)
1828     }
1829 
1830     fn modify_mode<F: FnOnce(&mut termios)>(
1831         &self,
1832         fd: RawFd,
1833         f: F,
1834     ) -> vmm_sys_util::errno::Result<()> {
1835         // SAFETY: safe because we check the return value of isatty.
1836         if unsafe { isatty(fd) } != 1 {
1837             return Ok(());
1838         }
1839 
1840         // SAFETY: The following pair are safe because termios gets totally overwritten by tcgetattr
1841         // and we check the return result.
1842         let mut termios: termios = unsafe { zeroed() };
1843         // SAFETY: see above
1844         let ret = unsafe { tcgetattr(fd, &mut termios as *mut _) };
1845         if ret < 0 {
1846             return vmm_sys_util::errno::errno_result();
1847         }
1848         f(&mut termios);
1849         // SAFETY: Safe because the syscall will only read the extent of termios and we check
1850         // the return result.
1851         let ret = unsafe { tcsetattr(fd, TCSANOW, &termios as *const _) };
1852         if ret < 0 {
1853             return vmm_sys_util::errno::errno_result();
1854         }
1855 
1856         Ok(())
1857     }
1858 
1859     fn set_raw_mode(&self, f: &mut File) -> vmm_sys_util::errno::Result<()> {
1860         // SAFETY: FFI call. Variable t is guaranteed to be a valid termios from modify_mode.
1861         self.modify_mode(f.as_raw_fd(), |t| unsafe { cfmakeraw(t) })
1862     }
1863 
1864     fn listen_for_sigwinch_on_tty(&mut self, pty_main: File, pty_sub: File) -> std::io::Result<()> {
1865         let seccomp_filter = get_seccomp_filter(
1866             &self.seccomp_action,
1867             Thread::PtyForeground,
1868             self.hypervisor_type,
1869         )
1870         .unwrap();
1871 
1872         match start_sigwinch_listener(seccomp_filter, pty_main, pty_sub) {
1873             Ok(pipe) => {
1874                 self.console_resize_pipe = Some(Arc::new(pipe));
1875             }
1876             Err(e) => {
1877                 warn!("Ignoring error from setting up SIGWINCH listener: {}", e)
1878             }
1879         }
1880 
1881         Ok(())
1882     }
1883 
1884     fn add_virtio_console_device(
1885         &mut self,
1886         virtio_devices: &mut Vec<MetaVirtioDevice>,
1887         console_pty: Option<PtyPair>,
1888         resize_pipe: Option<File>,
1889     ) -> DeviceManagerResult<Option<Arc<virtio_devices::ConsoleResizer>>> {
1890         let console_config = self.config.lock().unwrap().console.clone();
1891         let endpoint = match console_config.mode {
1892             ConsoleOutputMode::File => {
1893                 let file = File::create(console_config.file.as_ref().unwrap())
1894                     .map_err(DeviceManagerError::ConsoleOutputFileOpen)?;
1895                 Endpoint::File(file)
1896             }
1897             ConsoleOutputMode::Pty => {
1898                 if let Some(pty) = console_pty {
1899                     self.config.lock().unwrap().console.file = Some(pty.path.clone());
1900                     let file = pty.main.try_clone().unwrap();
1901                     self.console_pty = Some(Arc::new(Mutex::new(pty)));
1902                     self.console_resize_pipe = resize_pipe.map(Arc::new);
1903                     Endpoint::PtyPair(file.try_clone().unwrap(), file)
1904                 } else {
1905                     let (main, mut sub, path) =
1906                         create_pty().map_err(DeviceManagerError::ConsolePtyOpen)?;
1907                     self.set_raw_mode(&mut sub)
1908                         .map_err(DeviceManagerError::SetPtyRaw)?;
1909                     self.config.lock().unwrap().console.file = Some(path.clone());
1910                     let file = main.try_clone().unwrap();
1911                     assert!(resize_pipe.is_none());
1912                     self.listen_for_sigwinch_on_tty(main.try_clone().unwrap(), sub)
1913                         .unwrap();
1914                     self.console_pty = Some(Arc::new(Mutex::new(PtyPair { main, path })));
1915                     Endpoint::PtyPair(file.try_clone().unwrap(), file)
1916                 }
1917             }
1918             ConsoleOutputMode::Tty => {
1919                 // Duplicating the file descriptors like this is needed as otherwise
1920                 // they will be closed on a reboot and the numbers reused
1921 
1922                 // SAFETY: FFI call to dup. Trivially safe.
1923                 let stdout = unsafe { libc::dup(libc::STDOUT_FILENO) };
1924                 if stdout == -1 {
1925                     return vmm_sys_util::errno::errno_result().map_err(DeviceManagerError::DupFd);
1926                 }
1927                 // SAFETY: stdout is valid and owned solely by us.
1928                 let stdout = unsafe { File::from_raw_fd(stdout) };
1929 
1930                 // If an interactive TTY then we can accept input
1931                 // SAFETY: FFI call. Trivially safe.
1932                 if unsafe { libc::isatty(libc::STDIN_FILENO) == 1 } {
1933                     // SAFETY: FFI call to dup. Trivially safe.
1934                     let stdin = unsafe { libc::dup(libc::STDIN_FILENO) };
1935                     if stdin == -1 {
1936                         return vmm_sys_util::errno::errno_result()
1937                             .map_err(DeviceManagerError::DupFd);
1938                     }
1939                     // SAFETY: stdin is valid and owned solely by us.
1940                     let stdin = unsafe { File::from_raw_fd(stdin) };
1941 
1942                     Endpoint::FilePair(stdout, stdin)
1943                 } else {
1944                     Endpoint::File(stdout)
1945                 }
1946             }
1947             ConsoleOutputMode::Null => Endpoint::Null,
1948             ConsoleOutputMode::Off => return Ok(None),
1949         };
1950         let id = String::from(CONSOLE_DEVICE_NAME);
1951 
1952         let (virtio_console_device, console_resizer) = virtio_devices::Console::new(
1953             id.clone(),
1954             endpoint,
1955             self.console_resize_pipe
1956                 .as_ref()
1957                 .map(|p| p.try_clone().unwrap()),
1958             self.force_iommu | console_config.iommu,
1959             self.seccomp_action.clone(),
1960             self.exit_evt
1961                 .try_clone()
1962                 .map_err(DeviceManagerError::EventFd)?,
1963             versioned_state_from_id(self.snapshot.as_ref(), id.as_str())
1964                 .map_err(DeviceManagerError::RestoreGetState)?,
1965         )
1966         .map_err(DeviceManagerError::CreateVirtioConsole)?;
1967         let virtio_console_device = Arc::new(Mutex::new(virtio_console_device));
1968         virtio_devices.push(MetaVirtioDevice {
1969             virtio_device: Arc::clone(&virtio_console_device)
1970                 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
1971             iommu: console_config.iommu,
1972             id: id.clone(),
1973             pci_segment: 0,
1974             dma_handler: None,
1975         });
1976 
1977         // Fill the device tree with a new node. In case of restore, we
1978         // know there is nothing to do, so we can simply override the
1979         // existing entry.
1980         self.device_tree
1981             .lock()
1982             .unwrap()
1983             .insert(id.clone(), device_node!(id, virtio_console_device));
1984 
1985         // Only provide a resizer (for SIGWINCH handling) if the console is attached to the TTY
1986         Ok(if matches!(console_config.mode, ConsoleOutputMode::Tty) {
1987             Some(console_resizer)
1988         } else {
1989             None
1990         })
1991     }
1992 
1993     fn add_console_device(
1994         &mut self,
1995         interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>,
1996         virtio_devices: &mut Vec<MetaVirtioDevice>,
1997         serial_pty: Option<PtyPair>,
1998         console_pty: Option<PtyPair>,
1999         console_resize_pipe: Option<File>,
2000     ) -> DeviceManagerResult<Arc<Console>> {
2001         let serial_config = self.config.lock().unwrap().serial.clone();
2002         let serial_writer: Option<Box<dyn io::Write + Send>> = match serial_config.mode {
2003             ConsoleOutputMode::File => Some(Box::new(
2004                 File::create(serial_config.file.as_ref().unwrap())
2005                     .map_err(DeviceManagerError::SerialOutputFileOpen)?,
2006             )),
2007             ConsoleOutputMode::Pty => {
2008                 if let Some(pty) = serial_pty {
2009                     self.config.lock().unwrap().serial.file = Some(pty.path.clone());
2010                     self.serial_pty = Some(Arc::new(Mutex::new(pty)));
2011                 } else {
2012                     let (main, mut sub, path) =
2013                         create_pty().map_err(DeviceManagerError::SerialPtyOpen)?;
2014                     self.set_raw_mode(&mut sub)
2015                         .map_err(DeviceManagerError::SetPtyRaw)?;
2016                     self.config.lock().unwrap().serial.file = Some(path.clone());
2017                     self.serial_pty = Some(Arc::new(Mutex::new(PtyPair { main, path })));
2018                 }
2019                 None
2020             }
2021             ConsoleOutputMode::Tty => Some(Box::new(stdout())),
2022             ConsoleOutputMode::Off | ConsoleOutputMode::Null => None,
2023         };
2024         if serial_config.mode != ConsoleOutputMode::Off {
2025             let serial = self.add_serial_device(interrupt_manager, serial_writer)?;
2026             self.serial_manager = match serial_config.mode {
2027                 ConsoleOutputMode::Pty | ConsoleOutputMode::Tty => {
2028                     let serial_manager =
2029                         SerialManager::new(serial, self.serial_pty.clone(), serial_config.mode)
2030                             .map_err(DeviceManagerError::CreateSerialManager)?;
2031                     if let Some(mut serial_manager) = serial_manager {
2032                         serial_manager
2033                             .start_thread(
2034                                 self.exit_evt
2035                                     .try_clone()
2036                                     .map_err(DeviceManagerError::EventFd)?,
2037                             )
2038                             .map_err(DeviceManagerError::SpawnSerialManager)?;
2039                         Some(Arc::new(serial_manager))
2040                     } else {
2041                         None
2042                     }
2043                 }
2044                 _ => None,
2045             };
2046         }
2047 
2048         let console_resizer =
2049             self.add_virtio_console_device(virtio_devices, console_pty, console_resize_pipe)?;
2050 
2051         Ok(Arc::new(Console { console_resizer }))
2052     }
2053 
2054     fn add_tpm_device(
2055         &mut self,
2056         tpm_path: PathBuf,
2057     ) -> DeviceManagerResult<Arc<Mutex<devices::tpm::Tpm>>> {
2058         // Create TPM Device
2059         let tpm = devices::tpm::Tpm::new(tpm_path.to_str().unwrap().to_string()).map_err(|e| {
2060             DeviceManagerError::CreateTpmDevice(anyhow!("Failed to create TPM Device : {:?}", e))
2061         })?;
2062         let tpm = Arc::new(Mutex::new(tpm));
2063 
2064         // Add TPM Device to mmio
2065         self.address_manager
2066             .mmio_bus
2067             .insert(
2068                 tpm.clone(),
2069                 arch::layout::TPM_START.0,
2070                 arch::layout::TPM_SIZE,
2071             )
2072             .map_err(DeviceManagerError::BusError)?;
2073 
2074         Ok(tpm)
2075     }
2076 
2077     fn make_virtio_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
2078         let mut devices: Vec<MetaVirtioDevice> = Vec::new();
2079 
2080         // Create "standard" virtio devices (net/block/rng)
2081         devices.append(&mut self.make_virtio_block_devices()?);
2082         devices.append(&mut self.make_virtio_net_devices()?);
2083         devices.append(&mut self.make_virtio_rng_devices()?);
2084 
2085         // Add virtio-fs if required
2086         devices.append(&mut self.make_virtio_fs_devices()?);
2087 
2088         // Add virtio-pmem if required
2089         devices.append(&mut self.make_virtio_pmem_devices()?);
2090 
2091         // Add virtio-vsock if required
2092         devices.append(&mut self.make_virtio_vsock_devices()?);
2093 
2094         devices.append(&mut self.make_virtio_mem_devices()?);
2095 
2096         // Add virtio-balloon if required
2097         devices.append(&mut self.make_virtio_balloon_devices()?);
2098 
2099         // Add virtio-watchdog device
2100         devices.append(&mut self.make_virtio_watchdog_devices()?);
2101 
2102         // Add vDPA devices if required
2103         devices.append(&mut self.make_vdpa_devices()?);
2104 
2105         Ok(devices)
2106     }
2107 
2108     // Cache whether io_uring is supported to avoid probing for very block device
2109     fn io_uring_is_supported(&mut self) -> bool {
2110         if let Some(supported) = self.io_uring_supported {
2111             return supported;
2112         }
2113 
2114         let supported = block_io_uring_is_supported();
2115         self.io_uring_supported = Some(supported);
2116         supported
2117     }
2118 
2119     fn make_virtio_block_device(
2120         &mut self,
2121         disk_cfg: &mut DiskConfig,
2122     ) -> DeviceManagerResult<MetaVirtioDevice> {
2123         let id = if let Some(id) = &disk_cfg.id {
2124             id.clone()
2125         } else {
2126             let id = self.next_device_name(DISK_DEVICE_NAME_PREFIX)?;
2127             disk_cfg.id = Some(id.clone());
2128             id
2129         };
2130 
2131         info!("Creating virtio-block device: {:?}", disk_cfg);
2132 
2133         let snapshot = snapshot_from_id(self.snapshot.as_ref(), id.as_str());
2134 
2135         let (virtio_device, migratable_device) = if disk_cfg.vhost_user {
2136             let socket = disk_cfg.vhost_socket.as_ref().unwrap().clone();
2137             let vu_cfg = VhostUserConfig {
2138                 socket,
2139                 num_queues: disk_cfg.num_queues,
2140                 queue_size: disk_cfg.queue_size,
2141             };
2142             let vhost_user_block = Arc::new(Mutex::new(
2143                 match virtio_devices::vhost_user::Blk::new(
2144                     id.clone(),
2145                     vu_cfg,
2146                     self.seccomp_action.clone(),
2147                     self.exit_evt
2148                         .try_clone()
2149                         .map_err(DeviceManagerError::EventFd)?,
2150                     self.force_iommu,
2151                     snapshot
2152                         .map(|s| s.to_versioned_state())
2153                         .transpose()
2154                         .map_err(DeviceManagerError::RestoreGetState)?,
2155                 ) {
2156                     Ok(vub_device) => vub_device,
2157                     Err(e) => {
2158                         return Err(DeviceManagerError::CreateVhostUserBlk(e));
2159                     }
2160                 },
2161             ));
2162 
2163             (
2164                 Arc::clone(&vhost_user_block) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2165                 vhost_user_block as Arc<Mutex<dyn Migratable>>,
2166             )
2167         } else {
2168             let mut options = OpenOptions::new();
2169             options.read(true);
2170             options.write(!disk_cfg.readonly);
2171             if disk_cfg.direct {
2172                 options.custom_flags(libc::O_DIRECT);
2173             }
2174             // Open block device path
2175             let mut file: File = options
2176                 .open(
2177                     disk_cfg
2178                         .path
2179                         .as_ref()
2180                         .ok_or(DeviceManagerError::NoDiskPath)?
2181                         .clone(),
2182                 )
2183                 .map_err(DeviceManagerError::Disk)?;
2184             let image_type =
2185                 detect_image_type(&mut file).map_err(DeviceManagerError::DetectImageType)?;
2186 
2187             let image = match image_type {
2188                 ImageType::FixedVhd => {
2189                     // Use asynchronous backend relying on io_uring if the
2190                     // syscalls are supported.
2191                     if !disk_cfg.disable_io_uring && self.io_uring_is_supported() {
2192                         info!("Using asynchronous fixed VHD disk file (io_uring)");
2193                         Box::new(
2194                             FixedVhdDiskAsync::new(file)
2195                                 .map_err(DeviceManagerError::CreateFixedVhdDiskAsync)?,
2196                         ) as Box<dyn DiskFile>
2197                     } else {
2198                         info!("Using synchronous fixed VHD disk file");
2199                         Box::new(
2200                             FixedVhdDiskSync::new(file)
2201                                 .map_err(DeviceManagerError::CreateFixedVhdDiskSync)?,
2202                         ) as Box<dyn DiskFile>
2203                     }
2204                 }
2205                 ImageType::Raw => {
2206                     // Use asynchronous backend relying on io_uring if the
2207                     // syscalls are supported.
2208                     if !disk_cfg.disable_io_uring && self.io_uring_is_supported() {
2209                         info!("Using asynchronous RAW disk file (io_uring)");
2210                         Box::new(RawFileDisk::new(file)) as Box<dyn DiskFile>
2211                     } else {
2212                         info!("Using synchronous RAW disk file");
2213                         Box::new(RawFileDiskSync::new(file)) as Box<dyn DiskFile>
2214                     }
2215                 }
2216                 ImageType::Qcow2 => {
2217                     info!("Using synchronous QCOW disk file");
2218                     Box::new(
2219                         QcowDiskSync::new(file, disk_cfg.direct)
2220                             .map_err(DeviceManagerError::CreateQcowDiskSync)?,
2221                     ) as Box<dyn DiskFile>
2222                 }
2223                 ImageType::Vhdx => {
2224                     info!("Using synchronous VHDX disk file");
2225                     Box::new(
2226                         VhdxDiskSync::new(file)
2227                             .map_err(DeviceManagerError::CreateFixedVhdxDiskSync)?,
2228                     ) as Box<dyn DiskFile>
2229                 }
2230             };
2231 
2232             let virtio_block = Arc::new(Mutex::new(
2233                 virtio_devices::Block::new(
2234                     id.clone(),
2235                     image,
2236                     disk_cfg
2237                         .path
2238                         .as_ref()
2239                         .ok_or(DeviceManagerError::NoDiskPath)?
2240                         .clone(),
2241                     disk_cfg.readonly,
2242                     self.force_iommu | disk_cfg.iommu,
2243                     disk_cfg.num_queues,
2244                     disk_cfg.queue_size,
2245                     self.seccomp_action.clone(),
2246                     disk_cfg.rate_limiter_config,
2247                     self.exit_evt
2248                         .try_clone()
2249                         .map_err(DeviceManagerError::EventFd)?,
2250                     snapshot
2251                         .map(|s| s.to_versioned_state())
2252                         .transpose()
2253                         .map_err(DeviceManagerError::RestoreGetState)?,
2254                 )
2255                 .map_err(DeviceManagerError::CreateVirtioBlock)?,
2256             ));
2257 
2258             (
2259                 Arc::clone(&virtio_block) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2260                 virtio_block as Arc<Mutex<dyn Migratable>>,
2261             )
2262         };
2263 
2264         // Fill the device tree with a new node. In case of restore, we
2265         // know there is nothing to do, so we can simply override the
2266         // existing entry.
2267         self.device_tree
2268             .lock()
2269             .unwrap()
2270             .insert(id.clone(), device_node!(id, migratable_device));
2271 
2272         Ok(MetaVirtioDevice {
2273             virtio_device,
2274             iommu: disk_cfg.iommu,
2275             id,
2276             pci_segment: disk_cfg.pci_segment,
2277             dma_handler: None,
2278         })
2279     }
2280 
2281     fn make_virtio_block_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
2282         let mut devices = Vec::new();
2283 
2284         let mut block_devices = self.config.lock().unwrap().disks.clone();
2285         if let Some(disk_list_cfg) = &mut block_devices {
2286             for disk_cfg in disk_list_cfg.iter_mut() {
2287                 devices.push(self.make_virtio_block_device(disk_cfg)?);
2288             }
2289         }
2290         self.config.lock().unwrap().disks = block_devices;
2291 
2292         Ok(devices)
2293     }
2294 
2295     fn make_virtio_net_device(
2296         &mut self,
2297         net_cfg: &mut NetConfig,
2298     ) -> DeviceManagerResult<MetaVirtioDevice> {
2299         let id = if let Some(id) = &net_cfg.id {
2300             id.clone()
2301         } else {
2302             let id = self.next_device_name(NET_DEVICE_NAME_PREFIX)?;
2303             net_cfg.id = Some(id.clone());
2304             id
2305         };
2306         info!("Creating virtio-net device: {:?}", net_cfg);
2307 
2308         let snapshot = snapshot_from_id(self.snapshot.as_ref(), id.as_str());
2309 
2310         let (virtio_device, migratable_device) = if net_cfg.vhost_user {
2311             let socket = net_cfg.vhost_socket.as_ref().unwrap().clone();
2312             let vu_cfg = VhostUserConfig {
2313                 socket,
2314                 num_queues: net_cfg.num_queues,
2315                 queue_size: net_cfg.queue_size,
2316             };
2317             let server = match net_cfg.vhost_mode {
2318                 VhostMode::Client => false,
2319                 VhostMode::Server => true,
2320             };
2321             let vhost_user_net = Arc::new(Mutex::new(
2322                 match virtio_devices::vhost_user::Net::new(
2323                     id.clone(),
2324                     net_cfg.mac,
2325                     net_cfg.mtu,
2326                     vu_cfg,
2327                     server,
2328                     self.seccomp_action.clone(),
2329                     self.exit_evt
2330                         .try_clone()
2331                         .map_err(DeviceManagerError::EventFd)?,
2332                     self.force_iommu,
2333                     snapshot
2334                         .map(|s| s.to_versioned_state())
2335                         .transpose()
2336                         .map_err(DeviceManagerError::RestoreGetState)?,
2337                 ) {
2338                     Ok(vun_device) => vun_device,
2339                     Err(e) => {
2340                         return Err(DeviceManagerError::CreateVhostUserNet(e));
2341                     }
2342                 },
2343             ));
2344 
2345             (
2346                 Arc::clone(&vhost_user_net) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2347                 vhost_user_net as Arc<Mutex<dyn Migratable>>,
2348             )
2349         } else {
2350             let state = snapshot
2351                 .map(|s| s.to_versioned_state())
2352                 .transpose()
2353                 .map_err(DeviceManagerError::RestoreGetState)?;
2354 
2355             let virtio_net = if let Some(ref tap_if_name) = net_cfg.tap {
2356                 Arc::new(Mutex::new(
2357                     virtio_devices::Net::new(
2358                         id.clone(),
2359                         Some(tap_if_name),
2360                         None,
2361                         None,
2362                         Some(net_cfg.mac),
2363                         &mut net_cfg.host_mac,
2364                         net_cfg.mtu,
2365                         self.force_iommu | net_cfg.iommu,
2366                         net_cfg.num_queues,
2367                         net_cfg.queue_size,
2368                         self.seccomp_action.clone(),
2369                         net_cfg.rate_limiter_config,
2370                         self.exit_evt
2371                             .try_clone()
2372                             .map_err(DeviceManagerError::EventFd)?,
2373                         state,
2374                     )
2375                     .map_err(DeviceManagerError::CreateVirtioNet)?,
2376                 ))
2377             } else if let Some(fds) = &net_cfg.fds {
2378                 Arc::new(Mutex::new(
2379                     virtio_devices::Net::from_tap_fds(
2380                         id.clone(),
2381                         fds,
2382                         Some(net_cfg.mac),
2383                         net_cfg.mtu,
2384                         self.force_iommu | net_cfg.iommu,
2385                         net_cfg.queue_size,
2386                         self.seccomp_action.clone(),
2387                         net_cfg.rate_limiter_config,
2388                         self.exit_evt
2389                             .try_clone()
2390                             .map_err(DeviceManagerError::EventFd)?,
2391                         state,
2392                     )
2393                     .map_err(DeviceManagerError::CreateVirtioNet)?,
2394                 ))
2395             } else {
2396                 Arc::new(Mutex::new(
2397                     virtio_devices::Net::new(
2398                         id.clone(),
2399                         None,
2400                         Some(net_cfg.ip),
2401                         Some(net_cfg.mask),
2402                         Some(net_cfg.mac),
2403                         &mut net_cfg.host_mac,
2404                         net_cfg.mtu,
2405                         self.force_iommu | net_cfg.iommu,
2406                         net_cfg.num_queues,
2407                         net_cfg.queue_size,
2408                         self.seccomp_action.clone(),
2409                         net_cfg.rate_limiter_config,
2410                         self.exit_evt
2411                             .try_clone()
2412                             .map_err(DeviceManagerError::EventFd)?,
2413                         state,
2414                     )
2415                     .map_err(DeviceManagerError::CreateVirtioNet)?,
2416                 ))
2417             };
2418 
2419             (
2420                 Arc::clone(&virtio_net) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2421                 virtio_net as Arc<Mutex<dyn Migratable>>,
2422             )
2423         };
2424 
2425         // Fill the device tree with a new node. In case of restore, we
2426         // know there is nothing to do, so we can simply override the
2427         // existing entry.
2428         self.device_tree
2429             .lock()
2430             .unwrap()
2431             .insert(id.clone(), device_node!(id, migratable_device));
2432 
2433         Ok(MetaVirtioDevice {
2434             virtio_device,
2435             iommu: net_cfg.iommu,
2436             id,
2437             pci_segment: net_cfg.pci_segment,
2438             dma_handler: None,
2439         })
2440     }
2441 
2442     /// Add virto-net and vhost-user-net devices
2443     fn make_virtio_net_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
2444         let mut devices = Vec::new();
2445         let mut net_devices = self.config.lock().unwrap().net.clone();
2446         if let Some(net_list_cfg) = &mut net_devices {
2447             for net_cfg in net_list_cfg.iter_mut() {
2448                 devices.push(self.make_virtio_net_device(net_cfg)?);
2449             }
2450         }
2451         self.config.lock().unwrap().net = net_devices;
2452 
2453         Ok(devices)
2454     }
2455 
2456     fn make_virtio_rng_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
2457         let mut devices = Vec::new();
2458 
2459         // Add virtio-rng if required
2460         let rng_config = self.config.lock().unwrap().rng.clone();
2461         if let Some(rng_path) = rng_config.src.to_str() {
2462             info!("Creating virtio-rng device: {:?}", rng_config);
2463             let id = String::from(RNG_DEVICE_NAME);
2464 
2465             let virtio_rng_device = Arc::new(Mutex::new(
2466                 virtio_devices::Rng::new(
2467                     id.clone(),
2468                     rng_path,
2469                     self.force_iommu | rng_config.iommu,
2470                     self.seccomp_action.clone(),
2471                     self.exit_evt
2472                         .try_clone()
2473                         .map_err(DeviceManagerError::EventFd)?,
2474                     versioned_state_from_id(self.snapshot.as_ref(), id.as_str())
2475                         .map_err(DeviceManagerError::RestoreGetState)?,
2476                 )
2477                 .map_err(DeviceManagerError::CreateVirtioRng)?,
2478             ));
2479             devices.push(MetaVirtioDevice {
2480                 virtio_device: Arc::clone(&virtio_rng_device)
2481                     as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2482                 iommu: rng_config.iommu,
2483                 id: id.clone(),
2484                 pci_segment: 0,
2485                 dma_handler: None,
2486             });
2487 
2488             // Fill the device tree with a new node. In case of restore, we
2489             // know there is nothing to do, so we can simply override the
2490             // existing entry.
2491             self.device_tree
2492                 .lock()
2493                 .unwrap()
2494                 .insert(id.clone(), device_node!(id, virtio_rng_device));
2495         }
2496 
2497         Ok(devices)
2498     }
2499 
2500     fn make_virtio_fs_device(
2501         &mut self,
2502         fs_cfg: &mut FsConfig,
2503     ) -> DeviceManagerResult<MetaVirtioDevice> {
2504         let id = if let Some(id) = &fs_cfg.id {
2505             id.clone()
2506         } else {
2507             let id = self.next_device_name(FS_DEVICE_NAME_PREFIX)?;
2508             fs_cfg.id = Some(id.clone());
2509             id
2510         };
2511 
2512         info!("Creating virtio-fs device: {:?}", fs_cfg);
2513 
2514         let mut node = device_node!(id);
2515 
2516         if let Some(fs_socket) = fs_cfg.socket.to_str() {
2517             let virtio_fs_device = Arc::new(Mutex::new(
2518                 virtio_devices::vhost_user::Fs::new(
2519                     id.clone(),
2520                     fs_socket,
2521                     &fs_cfg.tag,
2522                     fs_cfg.num_queues,
2523                     fs_cfg.queue_size,
2524                     None,
2525                     self.seccomp_action.clone(),
2526                     self.exit_evt
2527                         .try_clone()
2528                         .map_err(DeviceManagerError::EventFd)?,
2529                     self.force_iommu,
2530                     versioned_state_from_id(self.snapshot.as_ref(), id.as_str())
2531                         .map_err(DeviceManagerError::RestoreGetState)?,
2532                 )
2533                 .map_err(DeviceManagerError::CreateVirtioFs)?,
2534             ));
2535 
2536             // Update the device tree with the migratable device.
2537             node.migratable = Some(Arc::clone(&virtio_fs_device) as Arc<Mutex<dyn Migratable>>);
2538             self.device_tree.lock().unwrap().insert(id.clone(), node);
2539 
2540             Ok(MetaVirtioDevice {
2541                 virtio_device: Arc::clone(&virtio_fs_device)
2542                     as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2543                 iommu: false,
2544                 id,
2545                 pci_segment: fs_cfg.pci_segment,
2546                 dma_handler: None,
2547             })
2548         } else {
2549             Err(DeviceManagerError::NoVirtioFsSock)
2550         }
2551     }
2552 
2553     fn make_virtio_fs_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
2554         let mut devices = Vec::new();
2555 
2556         let mut fs_devices = self.config.lock().unwrap().fs.clone();
2557         if let Some(fs_list_cfg) = &mut fs_devices {
2558             for fs_cfg in fs_list_cfg.iter_mut() {
2559                 devices.push(self.make_virtio_fs_device(fs_cfg)?);
2560             }
2561         }
2562         self.config.lock().unwrap().fs = fs_devices;
2563 
2564         Ok(devices)
2565     }
2566 
2567     fn make_virtio_pmem_device(
2568         &mut self,
2569         pmem_cfg: &mut PmemConfig,
2570     ) -> DeviceManagerResult<MetaVirtioDevice> {
2571         let id = if let Some(id) = &pmem_cfg.id {
2572             id.clone()
2573         } else {
2574             let id = self.next_device_name(PMEM_DEVICE_NAME_PREFIX)?;
2575             pmem_cfg.id = Some(id.clone());
2576             id
2577         };
2578 
2579         info!("Creating virtio-pmem device: {:?}", pmem_cfg);
2580 
2581         let mut node = device_node!(id);
2582 
2583         // Look for the id in the device tree. If it can be found, that means
2584         // the device is being restored, otherwise it's created from scratch.
2585         let region_range = if let Some(node) = self.device_tree.lock().unwrap().get(&id) {
2586             info!("Restoring virtio-pmem {} resources", id);
2587 
2588             let mut region_range: Option<(u64, u64)> = None;
2589             for resource in node.resources.iter() {
2590                 match resource {
2591                     Resource::MmioAddressRange { base, size } => {
2592                         if region_range.is_some() {
2593                             return Err(DeviceManagerError::ResourceAlreadyExists);
2594                         }
2595 
2596                         region_range = Some((*base, *size));
2597                     }
2598                     _ => {
2599                         error!("Unexpected resource {:?} for {}", resource, id);
2600                     }
2601                 }
2602             }
2603 
2604             if region_range.is_none() {
2605                 return Err(DeviceManagerError::MissingVirtioPmemResources);
2606             }
2607 
2608             region_range
2609         } else {
2610             None
2611         };
2612 
2613         let (custom_flags, set_len) = if pmem_cfg.file.is_dir() {
2614             if pmem_cfg.size.is_none() {
2615                 return Err(DeviceManagerError::PmemWithDirectorySizeMissing);
2616             }
2617             (O_TMPFILE, true)
2618         } else {
2619             (0, false)
2620         };
2621 
2622         let mut file = OpenOptions::new()
2623             .read(true)
2624             .write(!pmem_cfg.discard_writes)
2625             .custom_flags(custom_flags)
2626             .open(&pmem_cfg.file)
2627             .map_err(DeviceManagerError::PmemFileOpen)?;
2628 
2629         let size = if let Some(size) = pmem_cfg.size {
2630             if set_len {
2631                 file.set_len(size)
2632                     .map_err(DeviceManagerError::PmemFileSetLen)?;
2633             }
2634             size
2635         } else {
2636             file.seek(SeekFrom::End(0))
2637                 .map_err(DeviceManagerError::PmemFileSetLen)?
2638         };
2639 
2640         if size % 0x20_0000 != 0 {
2641             return Err(DeviceManagerError::PmemSizeNotAligned);
2642         }
2643 
2644         let (region_base, region_size) = if let Some((base, size)) = region_range {
2645             // The memory needs to be 2MiB aligned in order to support
2646             // hugepages.
2647             self.pci_segments[pmem_cfg.pci_segment as usize]
2648                 .allocator
2649                 .lock()
2650                 .unwrap()
2651                 .allocate(
2652                     Some(GuestAddress(base)),
2653                     size as GuestUsize,
2654                     Some(0x0020_0000),
2655                 )
2656                 .ok_or(DeviceManagerError::PmemRangeAllocation)?;
2657 
2658             (base, size)
2659         } else {
2660             // The memory needs to be 2MiB aligned in order to support
2661             // hugepages.
2662             let base = self.pci_segments[pmem_cfg.pci_segment as usize]
2663                 .allocator
2664                 .lock()
2665                 .unwrap()
2666                 .allocate(None, size as GuestUsize, Some(0x0020_0000))
2667                 .ok_or(DeviceManagerError::PmemRangeAllocation)?;
2668 
2669             (base.raw_value(), size)
2670         };
2671 
2672         let cloned_file = file.try_clone().map_err(DeviceManagerError::CloneFile)?;
2673         let mmap_region = MmapRegion::build(
2674             Some(FileOffset::new(cloned_file, 0)),
2675             region_size as usize,
2676             PROT_READ | PROT_WRITE,
2677             MAP_NORESERVE
2678                 | if pmem_cfg.discard_writes {
2679                     MAP_PRIVATE
2680                 } else {
2681                     MAP_SHARED
2682                 },
2683         )
2684         .map_err(DeviceManagerError::NewMmapRegion)?;
2685         let host_addr: u64 = mmap_region.as_ptr() as u64;
2686 
2687         let mem_slot = self
2688             .memory_manager
2689             .lock()
2690             .unwrap()
2691             .create_userspace_mapping(region_base, region_size, host_addr, false, false, false)
2692             .map_err(DeviceManagerError::MemoryManager)?;
2693 
2694         let mapping = virtio_devices::UserspaceMapping {
2695             host_addr,
2696             mem_slot,
2697             addr: GuestAddress(region_base),
2698             len: region_size,
2699             mergeable: false,
2700         };
2701 
2702         let virtio_pmem_device = Arc::new(Mutex::new(
2703             virtio_devices::Pmem::new(
2704                 id.clone(),
2705                 file,
2706                 GuestAddress(region_base),
2707                 mapping,
2708                 mmap_region,
2709                 self.force_iommu | pmem_cfg.iommu,
2710                 self.seccomp_action.clone(),
2711                 self.exit_evt
2712                     .try_clone()
2713                     .map_err(DeviceManagerError::EventFd)?,
2714                 versioned_state_from_id(self.snapshot.as_ref(), id.as_str())
2715                     .map_err(DeviceManagerError::RestoreGetState)?,
2716             )
2717             .map_err(DeviceManagerError::CreateVirtioPmem)?,
2718         ));
2719 
2720         // Update the device tree with correct resource information and with
2721         // the migratable device.
2722         node.resources.push(Resource::MmioAddressRange {
2723             base: region_base,
2724             size: region_size,
2725         });
2726         node.migratable = Some(Arc::clone(&virtio_pmem_device) as Arc<Mutex<dyn Migratable>>);
2727         self.device_tree.lock().unwrap().insert(id.clone(), node);
2728 
2729         Ok(MetaVirtioDevice {
2730             virtio_device: Arc::clone(&virtio_pmem_device)
2731                 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2732             iommu: pmem_cfg.iommu,
2733             id,
2734             pci_segment: pmem_cfg.pci_segment,
2735             dma_handler: None,
2736         })
2737     }
2738 
2739     fn make_virtio_pmem_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
2740         let mut devices = Vec::new();
2741         // Add virtio-pmem if required
2742         let mut pmem_devices = self.config.lock().unwrap().pmem.clone();
2743         if let Some(pmem_list_cfg) = &mut pmem_devices {
2744             for pmem_cfg in pmem_list_cfg.iter_mut() {
2745                 devices.push(self.make_virtio_pmem_device(pmem_cfg)?);
2746             }
2747         }
2748         self.config.lock().unwrap().pmem = pmem_devices;
2749 
2750         Ok(devices)
2751     }
2752 
2753     fn make_virtio_vsock_device(
2754         &mut self,
2755         vsock_cfg: &mut VsockConfig,
2756     ) -> DeviceManagerResult<MetaVirtioDevice> {
2757         let id = if let Some(id) = &vsock_cfg.id {
2758             id.clone()
2759         } else {
2760             let id = self.next_device_name(VSOCK_DEVICE_NAME_PREFIX)?;
2761             vsock_cfg.id = Some(id.clone());
2762             id
2763         };
2764 
2765         info!("Creating virtio-vsock device: {:?}", vsock_cfg);
2766 
2767         let socket_path = vsock_cfg
2768             .socket
2769             .to_str()
2770             .ok_or(DeviceManagerError::CreateVsockConvertPath)?;
2771         let backend =
2772             virtio_devices::vsock::VsockUnixBackend::new(vsock_cfg.cid, socket_path.to_string())
2773                 .map_err(DeviceManagerError::CreateVsockBackend)?;
2774 
2775         let vsock_device = Arc::new(Mutex::new(
2776             virtio_devices::Vsock::new(
2777                 id.clone(),
2778                 vsock_cfg.cid,
2779                 vsock_cfg.socket.clone(),
2780                 backend,
2781                 self.force_iommu | vsock_cfg.iommu,
2782                 self.seccomp_action.clone(),
2783                 self.exit_evt
2784                     .try_clone()
2785                     .map_err(DeviceManagerError::EventFd)?,
2786                 versioned_state_from_id(self.snapshot.as_ref(), id.as_str())
2787                     .map_err(DeviceManagerError::RestoreGetState)?,
2788             )
2789             .map_err(DeviceManagerError::CreateVirtioVsock)?,
2790         ));
2791 
2792         // Fill the device tree with a new node. In case of restore, we
2793         // know there is nothing to do, so we can simply override the
2794         // existing entry.
2795         self.device_tree
2796             .lock()
2797             .unwrap()
2798             .insert(id.clone(), device_node!(id, vsock_device));
2799 
2800         Ok(MetaVirtioDevice {
2801             virtio_device: Arc::clone(&vsock_device)
2802                 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2803             iommu: vsock_cfg.iommu,
2804             id,
2805             pci_segment: vsock_cfg.pci_segment,
2806             dma_handler: None,
2807         })
2808     }
2809 
2810     fn make_virtio_vsock_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
2811         let mut devices = Vec::new();
2812 
2813         let mut vsock = self.config.lock().unwrap().vsock.clone();
2814         if let Some(ref mut vsock_cfg) = &mut vsock {
2815             devices.push(self.make_virtio_vsock_device(vsock_cfg)?);
2816         }
2817         self.config.lock().unwrap().vsock = vsock;
2818 
2819         Ok(devices)
2820     }
2821 
2822     fn make_virtio_mem_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
2823         let mut devices = Vec::new();
2824 
2825         let mm = self.memory_manager.clone();
2826         let mut mm = mm.lock().unwrap();
2827         for (memory_zone_id, memory_zone) in mm.memory_zones_mut().iter_mut() {
2828             if let Some(virtio_mem_zone) = memory_zone.virtio_mem_zone_mut() {
2829                 info!("Creating virtio-mem device: id = {}", memory_zone_id);
2830 
2831                 let node_id = numa_node_id_from_memory_zone_id(&self.numa_nodes, memory_zone_id)
2832                     .map(|i| i as u16);
2833 
2834                 let virtio_mem_device = Arc::new(Mutex::new(
2835                     virtio_devices::Mem::new(
2836                         memory_zone_id.clone(),
2837                         virtio_mem_zone.region(),
2838                         self.seccomp_action.clone(),
2839                         node_id,
2840                         virtio_mem_zone.hotplugged_size(),
2841                         virtio_mem_zone.hugepages(),
2842                         self.exit_evt
2843                             .try_clone()
2844                             .map_err(DeviceManagerError::EventFd)?,
2845                         virtio_mem_zone.blocks_state().clone(),
2846                         versioned_state_from_id(self.snapshot.as_ref(), memory_zone_id.as_str())
2847                             .map_err(DeviceManagerError::RestoreGetState)?,
2848                     )
2849                     .map_err(DeviceManagerError::CreateVirtioMem)?,
2850                 ));
2851 
2852                 // Update the virtio-mem zone so that it has a handle onto the
2853                 // virtio-mem device, which will be used for triggering a resize
2854                 // if needed.
2855                 virtio_mem_zone.set_virtio_device(Arc::clone(&virtio_mem_device));
2856 
2857                 self.virtio_mem_devices.push(Arc::clone(&virtio_mem_device));
2858 
2859                 devices.push(MetaVirtioDevice {
2860                     virtio_device: Arc::clone(&virtio_mem_device)
2861                         as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2862                     iommu: false,
2863                     id: memory_zone_id.clone(),
2864                     pci_segment: 0,
2865                     dma_handler: None,
2866                 });
2867 
2868                 // Fill the device tree with a new node. In case of restore, we
2869                 // know there is nothing to do, so we can simply override the
2870                 // existing entry.
2871                 self.device_tree.lock().unwrap().insert(
2872                     memory_zone_id.clone(),
2873                     device_node!(memory_zone_id, virtio_mem_device),
2874                 );
2875             }
2876         }
2877 
2878         Ok(devices)
2879     }
2880 
2881     fn make_virtio_balloon_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
2882         let mut devices = Vec::new();
2883 
2884         if let Some(balloon_config) = &self.config.lock().unwrap().balloon {
2885             let id = String::from(BALLOON_DEVICE_NAME);
2886             info!("Creating virtio-balloon device: id = {}", id);
2887 
2888             let virtio_balloon_device = Arc::new(Mutex::new(
2889                 virtio_devices::Balloon::new(
2890                     id.clone(),
2891                     balloon_config.size,
2892                     balloon_config.deflate_on_oom,
2893                     balloon_config.free_page_reporting,
2894                     self.seccomp_action.clone(),
2895                     self.exit_evt
2896                         .try_clone()
2897                         .map_err(DeviceManagerError::EventFd)?,
2898                     versioned_state_from_id(self.snapshot.as_ref(), id.as_str())
2899                         .map_err(DeviceManagerError::RestoreGetState)?,
2900                 )
2901                 .map_err(DeviceManagerError::CreateVirtioBalloon)?,
2902             ));
2903 
2904             self.balloon = Some(virtio_balloon_device.clone());
2905 
2906             devices.push(MetaVirtioDevice {
2907                 virtio_device: Arc::clone(&virtio_balloon_device)
2908                     as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2909                 iommu: false,
2910                 id: id.clone(),
2911                 pci_segment: 0,
2912                 dma_handler: None,
2913             });
2914 
2915             self.device_tree
2916                 .lock()
2917                 .unwrap()
2918                 .insert(id.clone(), device_node!(id, virtio_balloon_device));
2919         }
2920 
2921         Ok(devices)
2922     }
2923 
2924     fn make_virtio_watchdog_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
2925         let mut devices = Vec::new();
2926 
2927         if !self.config.lock().unwrap().watchdog {
2928             return Ok(devices);
2929         }
2930 
2931         let id = String::from(WATCHDOG_DEVICE_NAME);
2932         info!("Creating virtio-watchdog device: id = {}", id);
2933 
2934         let virtio_watchdog_device = Arc::new(Mutex::new(
2935             virtio_devices::Watchdog::new(
2936                 id.clone(),
2937                 self.reset_evt.try_clone().unwrap(),
2938                 self.seccomp_action.clone(),
2939                 self.exit_evt
2940                     .try_clone()
2941                     .map_err(DeviceManagerError::EventFd)?,
2942                 versioned_state_from_id(self.snapshot.as_ref(), id.as_str())
2943                     .map_err(DeviceManagerError::RestoreGetState)?,
2944             )
2945             .map_err(DeviceManagerError::CreateVirtioWatchdog)?,
2946         ));
2947         devices.push(MetaVirtioDevice {
2948             virtio_device: Arc::clone(&virtio_watchdog_device)
2949                 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2950             iommu: false,
2951             id: id.clone(),
2952             pci_segment: 0,
2953             dma_handler: None,
2954         });
2955 
2956         self.device_tree
2957             .lock()
2958             .unwrap()
2959             .insert(id.clone(), device_node!(id, virtio_watchdog_device));
2960 
2961         Ok(devices)
2962     }
2963 
2964     fn make_vdpa_device(
2965         &mut self,
2966         vdpa_cfg: &mut VdpaConfig,
2967     ) -> DeviceManagerResult<MetaVirtioDevice> {
2968         let id = if let Some(id) = &vdpa_cfg.id {
2969             id.clone()
2970         } else {
2971             let id = self.next_device_name(VDPA_DEVICE_NAME_PREFIX)?;
2972             vdpa_cfg.id = Some(id.clone());
2973             id
2974         };
2975 
2976         info!("Creating vDPA device: {:?}", vdpa_cfg);
2977 
2978         let device_path = vdpa_cfg
2979             .path
2980             .to_str()
2981             .ok_or(DeviceManagerError::CreateVdpaConvertPath)?;
2982 
2983         let vdpa_device = Arc::new(Mutex::new(
2984             virtio_devices::Vdpa::new(
2985                 id.clone(),
2986                 device_path,
2987                 self.memory_manager.lock().unwrap().guest_memory(),
2988                 vdpa_cfg.num_queues as u16,
2989                 versioned_state_from_id(self.snapshot.as_ref(), id.as_str())
2990                     .map_err(DeviceManagerError::RestoreGetState)?,
2991             )
2992             .map_err(DeviceManagerError::CreateVdpa)?,
2993         ));
2994 
2995         // Create the DMA handler that is required by the vDPA device
2996         let vdpa_mapping = Arc::new(VdpaDmaMapping::new(
2997             Arc::clone(&vdpa_device),
2998             Arc::new(self.memory_manager.lock().unwrap().guest_memory()),
2999         ));
3000 
3001         self.device_tree
3002             .lock()
3003             .unwrap()
3004             .insert(id.clone(), device_node!(id, vdpa_device));
3005 
3006         Ok(MetaVirtioDevice {
3007             virtio_device: vdpa_device as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
3008             iommu: vdpa_cfg.iommu,
3009             id,
3010             pci_segment: vdpa_cfg.pci_segment,
3011             dma_handler: Some(vdpa_mapping),
3012         })
3013     }
3014 
3015     fn make_vdpa_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
3016         let mut devices = Vec::new();
3017         // Add vdpa if required
3018         let mut vdpa_devices = self.config.lock().unwrap().vdpa.clone();
3019         if let Some(vdpa_list_cfg) = &mut vdpa_devices {
3020             for vdpa_cfg in vdpa_list_cfg.iter_mut() {
3021                 devices.push(self.make_vdpa_device(vdpa_cfg)?);
3022             }
3023         }
3024         self.config.lock().unwrap().vdpa = vdpa_devices;
3025 
3026         Ok(devices)
3027     }
3028 
3029     fn next_device_name(&mut self, prefix: &str) -> DeviceManagerResult<String> {
3030         let start_id = self.device_id_cnt;
3031         loop {
3032             // Generate the temporary name.
3033             let name = format!("{}{}", prefix, self.device_id_cnt);
3034             // Increment the counter.
3035             self.device_id_cnt += Wrapping(1);
3036             // Check if the name is already in use.
3037             if !self.boot_id_list.contains(&name)
3038                 && !self.device_tree.lock().unwrap().contains_key(&name)
3039             {
3040                 return Ok(name);
3041             }
3042 
3043             if self.device_id_cnt == start_id {
3044                 // We went through a full loop and there's nothing else we can
3045                 // do.
3046                 break;
3047             }
3048         }
3049         Err(DeviceManagerError::NoAvailableDeviceName)
3050     }
3051 
3052     fn add_passthrough_device(
3053         &mut self,
3054         device_cfg: &mut DeviceConfig,
3055     ) -> DeviceManagerResult<(PciBdf, String)> {
3056         // If the passthrough device has not been created yet, it is created
3057         // here and stored in the DeviceManager structure for future needs.
3058         if self.passthrough_device.is_none() {
3059             self.passthrough_device = Some(
3060                 self.address_manager
3061                     .vm
3062                     .create_passthrough_device()
3063                     .map_err(|e| DeviceManagerError::CreatePassthroughDevice(e.into()))?,
3064             );
3065         }
3066 
3067         self.add_vfio_device(device_cfg)
3068     }
3069 
3070     fn create_vfio_container(&self) -> DeviceManagerResult<Arc<VfioContainer>> {
3071         let passthrough_device = self
3072             .passthrough_device
3073             .as_ref()
3074             .ok_or(DeviceManagerError::NoDevicePassthroughSupport)?;
3075 
3076         let dup = passthrough_device
3077             .try_clone()
3078             .map_err(DeviceManagerError::VfioCreate)?;
3079 
3080         Ok(Arc::new(
3081             VfioContainer::new(Some(Arc::new(dup))).map_err(DeviceManagerError::VfioCreate)?,
3082         ))
3083     }
3084 
3085     fn add_vfio_device(
3086         &mut self,
3087         device_cfg: &mut DeviceConfig,
3088     ) -> DeviceManagerResult<(PciBdf, String)> {
3089         let vfio_name = if let Some(id) = &device_cfg.id {
3090             id.clone()
3091         } else {
3092             let id = self.next_device_name(VFIO_DEVICE_NAME_PREFIX)?;
3093             device_cfg.id = Some(id.clone());
3094             id
3095         };
3096 
3097         let (pci_segment_id, pci_device_bdf, resources) =
3098             self.pci_resources(&vfio_name, device_cfg.pci_segment)?;
3099 
3100         let mut needs_dma_mapping = false;
3101 
3102         // Here we create a new VFIO container for two reasons. Either this is
3103         // the first VFIO device, meaning we need a new VFIO container, which
3104         // will be shared with other VFIO devices. Or the new VFIO device is
3105         // attached to a vIOMMU, meaning we must create a dedicated VFIO
3106         // container. In the vIOMMU use case, we can't let all devices under
3107         // the same VFIO container since we couldn't map/unmap memory for each
3108         // device. That's simply because the map/unmap operations happen at the
3109         // VFIO container level.
3110         let vfio_container = if device_cfg.iommu {
3111             let vfio_container = self.create_vfio_container()?;
3112 
3113             let vfio_mapping = Arc::new(VfioDmaMapping::new(
3114                 Arc::clone(&vfio_container),
3115                 Arc::new(self.memory_manager.lock().unwrap().guest_memory()),
3116             ));
3117 
3118             if let Some(iommu) = &self.iommu_device {
3119                 iommu
3120                     .lock()
3121                     .unwrap()
3122                     .add_external_mapping(pci_device_bdf.into(), vfio_mapping);
3123             } else {
3124                 return Err(DeviceManagerError::MissingVirtualIommu);
3125             }
3126 
3127             vfio_container
3128         } else if let Some(vfio_container) = &self.vfio_container {
3129             Arc::clone(vfio_container)
3130         } else {
3131             let vfio_container = self.create_vfio_container()?;
3132             needs_dma_mapping = true;
3133             self.vfio_container = Some(Arc::clone(&vfio_container));
3134 
3135             vfio_container
3136         };
3137 
3138         let vfio_device = VfioDevice::new(&device_cfg.path, Arc::clone(&vfio_container))
3139             .map_err(DeviceManagerError::VfioCreate)?;
3140 
3141         if needs_dma_mapping {
3142             // Register DMA mapping in IOMMU.
3143             // Do not register virtio-mem regions, as they are handled directly by
3144             // virtio-mem device itself.
3145             for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() {
3146                 for region in zone.regions() {
3147                     vfio_container
3148                         .vfio_dma_map(
3149                             region.start_addr().raw_value(),
3150                             region.len(),
3151                             region.as_ptr() as u64,
3152                         )
3153                         .map_err(DeviceManagerError::VfioDmaMap)?;
3154                 }
3155             }
3156 
3157             let vfio_mapping = Arc::new(VfioDmaMapping::new(
3158                 Arc::clone(&vfio_container),
3159                 Arc::new(self.memory_manager.lock().unwrap().guest_memory()),
3160             ));
3161 
3162             for virtio_mem_device in self.virtio_mem_devices.iter() {
3163                 virtio_mem_device
3164                     .lock()
3165                     .unwrap()
3166                     .add_dma_mapping_handler(
3167                         VirtioMemMappingSource::Container,
3168                         vfio_mapping.clone(),
3169                     )
3170                     .map_err(DeviceManagerError::AddDmaMappingHandlerVirtioMem)?;
3171             }
3172         }
3173 
3174         let legacy_interrupt_group =
3175             if let Some(legacy_interrupt_manager) = &self.legacy_interrupt_manager {
3176                 Some(
3177                     legacy_interrupt_manager
3178                         .create_group(LegacyIrqGroupConfig {
3179                             irq: self.pci_segments[pci_segment_id as usize].pci_irq_slots
3180                                 [pci_device_bdf.device() as usize]
3181                                 as InterruptIndex,
3182                         })
3183                         .map_err(DeviceManagerError::CreateInterruptGroup)?,
3184                 )
3185             } else {
3186                 None
3187             };
3188 
3189         let memory_manager = self.memory_manager.clone();
3190 
3191         let vfio_pci_device = VfioPciDevice::new(
3192             vfio_name.clone(),
3193             &self.address_manager.vm,
3194             vfio_device,
3195             vfio_container,
3196             self.msi_interrupt_manager.clone(),
3197             legacy_interrupt_group,
3198             device_cfg.iommu,
3199             pci_device_bdf,
3200             Arc::new(move || memory_manager.lock().unwrap().allocate_memory_slot()),
3201             vm_migration::snapshot_from_id(self.snapshot.as_ref(), vfio_name.as_str()),
3202         )
3203         .map_err(DeviceManagerError::VfioPciCreate)?;
3204 
3205         let vfio_pci_device = Arc::new(Mutex::new(vfio_pci_device));
3206 
3207         let new_resources = self.add_pci_device(
3208             vfio_pci_device.clone(),
3209             vfio_pci_device.clone(),
3210             pci_segment_id,
3211             pci_device_bdf,
3212             resources,
3213         )?;
3214 
3215         vfio_pci_device
3216             .lock()
3217             .unwrap()
3218             .map_mmio_regions()
3219             .map_err(DeviceManagerError::VfioMapRegion)?;
3220 
3221         let mut node = device_node!(vfio_name, vfio_pci_device);
3222 
3223         // Update the device tree with correct resource information.
3224         node.resources = new_resources;
3225         node.pci_bdf = Some(pci_device_bdf);
3226         node.pci_device_handle = Some(PciDeviceHandle::Vfio(vfio_pci_device));
3227 
3228         self.device_tree
3229             .lock()
3230             .unwrap()
3231             .insert(vfio_name.clone(), node);
3232 
3233         Ok((pci_device_bdf, vfio_name))
3234     }
3235 
3236     fn add_pci_device(
3237         &mut self,
3238         bus_device: Arc<Mutex<dyn BusDevice>>,
3239         pci_device: Arc<Mutex<dyn PciDevice>>,
3240         segment_id: u16,
3241         bdf: PciBdf,
3242         resources: Option<Vec<Resource>>,
3243     ) -> DeviceManagerResult<Vec<Resource>> {
3244         let bars = pci_device
3245             .lock()
3246             .unwrap()
3247             .allocate_bars(
3248                 &self.address_manager.allocator,
3249                 &mut self.pci_segments[segment_id as usize]
3250                     .allocator
3251                     .lock()
3252                     .unwrap(),
3253                 resources,
3254             )
3255             .map_err(DeviceManagerError::AllocateBars)?;
3256 
3257         let mut pci_bus = self.pci_segments[segment_id as usize]
3258             .pci_bus
3259             .lock()
3260             .unwrap();
3261 
3262         pci_bus
3263             .add_device(bdf.device() as u32, pci_device)
3264             .map_err(DeviceManagerError::AddPciDevice)?;
3265 
3266         self.bus_devices.push(Arc::clone(&bus_device));
3267 
3268         pci_bus
3269             .register_mapping(
3270                 bus_device,
3271                 #[cfg(target_arch = "x86_64")]
3272                 self.address_manager.io_bus.as_ref(),
3273                 self.address_manager.mmio_bus.as_ref(),
3274                 bars.clone(),
3275             )
3276             .map_err(DeviceManagerError::AddPciDevice)?;
3277 
3278         let mut new_resources = Vec::new();
3279         for bar in bars {
3280             new_resources.push(Resource::PciBar {
3281                 index: bar.idx(),
3282                 base: bar.addr(),
3283                 size: bar.size(),
3284                 type_: bar.region_type().into(),
3285                 prefetchable: bar.prefetchable().into(),
3286             });
3287         }
3288 
3289         Ok(new_resources)
3290     }
3291 
3292     fn add_vfio_devices(&mut self) -> DeviceManagerResult<Vec<PciBdf>> {
3293         let mut iommu_attached_device_ids = Vec::new();
3294         let mut devices = self.config.lock().unwrap().devices.clone();
3295 
3296         if let Some(device_list_cfg) = &mut devices {
3297             for device_cfg in device_list_cfg.iter_mut() {
3298                 let (device_id, _) = self.add_passthrough_device(device_cfg)?;
3299                 if device_cfg.iommu && self.iommu_device.is_some() {
3300                     iommu_attached_device_ids.push(device_id);
3301                 }
3302             }
3303         }
3304 
3305         // Update the list of devices
3306         self.config.lock().unwrap().devices = devices;
3307 
3308         Ok(iommu_attached_device_ids)
3309     }
3310 
3311     fn add_vfio_user_device(
3312         &mut self,
3313         device_cfg: &mut UserDeviceConfig,
3314     ) -> DeviceManagerResult<(PciBdf, String)> {
3315         let vfio_user_name = if let Some(id) = &device_cfg.id {
3316             id.clone()
3317         } else {
3318             let id = self.next_device_name(VFIO_USER_DEVICE_NAME_PREFIX)?;
3319             device_cfg.id = Some(id.clone());
3320             id
3321         };
3322 
3323         let (pci_segment_id, pci_device_bdf, resources) =
3324             self.pci_resources(&vfio_user_name, device_cfg.pci_segment)?;
3325 
3326         let legacy_interrupt_group =
3327             if let Some(legacy_interrupt_manager) = &self.legacy_interrupt_manager {
3328                 Some(
3329                     legacy_interrupt_manager
3330                         .create_group(LegacyIrqGroupConfig {
3331                             irq: self.pci_segments[pci_segment_id as usize].pci_irq_slots
3332                                 [pci_device_bdf.device() as usize]
3333                                 as InterruptIndex,
3334                         })
3335                         .map_err(DeviceManagerError::CreateInterruptGroup)?,
3336                 )
3337             } else {
3338                 None
3339             };
3340 
3341         let client = Arc::new(Mutex::new(
3342             vfio_user::Client::new(&device_cfg.socket)
3343                 .map_err(DeviceManagerError::VfioUserCreateClient)?,
3344         ));
3345 
3346         let memory_manager = self.memory_manager.clone();
3347 
3348         let mut vfio_user_pci_device = VfioUserPciDevice::new(
3349             vfio_user_name.clone(),
3350             &self.address_manager.vm,
3351             client.clone(),
3352             self.msi_interrupt_manager.clone(),
3353             legacy_interrupt_group,
3354             pci_device_bdf,
3355             Arc::new(move || memory_manager.lock().unwrap().allocate_memory_slot()),
3356             vm_migration::snapshot_from_id(self.snapshot.as_ref(), vfio_user_name.as_str()),
3357         )
3358         .map_err(DeviceManagerError::VfioUserCreate)?;
3359 
3360         let memory = self.memory_manager.lock().unwrap().guest_memory();
3361         let vfio_user_mapping = Arc::new(VfioUserDmaMapping::new(client, Arc::new(memory)));
3362         for virtio_mem_device in self.virtio_mem_devices.iter() {
3363             virtio_mem_device
3364                 .lock()
3365                 .unwrap()
3366                 .add_dma_mapping_handler(
3367                     VirtioMemMappingSource::Device(pci_device_bdf.into()),
3368                     vfio_user_mapping.clone(),
3369                 )
3370                 .map_err(DeviceManagerError::AddDmaMappingHandlerVirtioMem)?;
3371         }
3372 
3373         for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() {
3374             for region in zone.regions() {
3375                 vfio_user_pci_device
3376                     .dma_map(region)
3377                     .map_err(DeviceManagerError::VfioUserDmaMap)?;
3378             }
3379         }
3380 
3381         let vfio_user_pci_device = Arc::new(Mutex::new(vfio_user_pci_device));
3382 
3383         let new_resources = self.add_pci_device(
3384             vfio_user_pci_device.clone(),
3385             vfio_user_pci_device.clone(),
3386             pci_segment_id,
3387             pci_device_bdf,
3388             resources,
3389         )?;
3390 
3391         // Note it is required to call 'add_pci_device()' in advance to have the list of
3392         // mmio regions provisioned correctly
3393         vfio_user_pci_device
3394             .lock()
3395             .unwrap()
3396             .map_mmio_regions()
3397             .map_err(DeviceManagerError::VfioUserMapRegion)?;
3398 
3399         let mut node = device_node!(vfio_user_name, vfio_user_pci_device);
3400 
3401         // Update the device tree with correct resource information.
3402         node.resources = new_resources;
3403         node.pci_bdf = Some(pci_device_bdf);
3404         node.pci_device_handle = Some(PciDeviceHandle::VfioUser(vfio_user_pci_device));
3405 
3406         self.device_tree
3407             .lock()
3408             .unwrap()
3409             .insert(vfio_user_name.clone(), node);
3410 
3411         Ok((pci_device_bdf, vfio_user_name))
3412     }
3413 
3414     fn add_user_devices(&mut self) -> DeviceManagerResult<Vec<PciBdf>> {
3415         let mut user_devices = self.config.lock().unwrap().user_devices.clone();
3416 
3417         if let Some(device_list_cfg) = &mut user_devices {
3418             for device_cfg in device_list_cfg.iter_mut() {
3419                 let (_device_id, _id) = self.add_vfio_user_device(device_cfg)?;
3420             }
3421         }
3422 
3423         // Update the list of devices
3424         self.config.lock().unwrap().user_devices = user_devices;
3425 
3426         Ok(vec![])
3427     }
3428 
3429     fn add_virtio_pci_device(
3430         &mut self,
3431         virtio_device: Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
3432         iommu_mapping: &Option<Arc<IommuMapping>>,
3433         virtio_device_id: String,
3434         pci_segment_id: u16,
3435         dma_handler: Option<Arc<dyn ExternalDmaMapping>>,
3436     ) -> DeviceManagerResult<PciBdf> {
3437         let id = format!("{VIRTIO_PCI_DEVICE_NAME_PREFIX}-{virtio_device_id}");
3438 
3439         // Add the new virtio-pci node to the device tree.
3440         let mut node = device_node!(id);
3441         node.children = vec![virtio_device_id.clone()];
3442 
3443         let (pci_segment_id, pci_device_bdf, resources) =
3444             self.pci_resources(&id, pci_segment_id)?;
3445 
3446         // Update the existing virtio node by setting the parent.
3447         if let Some(node) = self.device_tree.lock().unwrap().get_mut(&virtio_device_id) {
3448             node.parent = Some(id.clone());
3449         } else {
3450             return Err(DeviceManagerError::MissingNode);
3451         }
3452 
3453         // Allows support for one MSI-X vector per queue. It also adds 1
3454         // as we need to take into account the dedicated vector to notify
3455         // about a virtio config change.
3456         let msix_num = (virtio_device.lock().unwrap().queue_max_sizes().len() + 1) as u16;
3457 
3458         // Create the AccessPlatform trait from the implementation IommuMapping.
3459         // This will provide address translation for any virtio device sitting
3460         // behind a vIOMMU.
3461         let access_platform: Option<Arc<dyn AccessPlatform>> = if let Some(mapping) = iommu_mapping
3462         {
3463             Some(Arc::new(AccessPlatformMapping::new(
3464                 pci_device_bdf.into(),
3465                 mapping.clone(),
3466             )))
3467         } else {
3468             None
3469         };
3470 
3471         let memory = self.memory_manager.lock().unwrap().guest_memory();
3472 
3473         // Map DMA ranges if a DMA handler is available and if the device is
3474         // not attached to a virtual IOMMU.
3475         if let Some(dma_handler) = &dma_handler {
3476             if iommu_mapping.is_some() {
3477                 if let Some(iommu) = &self.iommu_device {
3478                     iommu
3479                         .lock()
3480                         .unwrap()
3481                         .add_external_mapping(pci_device_bdf.into(), dma_handler.clone());
3482                 } else {
3483                     return Err(DeviceManagerError::MissingVirtualIommu);
3484                 }
3485             } else {
3486                 // Let every virtio-mem device handle the DMA map/unmap through the
3487                 // DMA handler provided.
3488                 for virtio_mem_device in self.virtio_mem_devices.iter() {
3489                     virtio_mem_device
3490                         .lock()
3491                         .unwrap()
3492                         .add_dma_mapping_handler(
3493                             VirtioMemMappingSource::Device(pci_device_bdf.into()),
3494                             dma_handler.clone(),
3495                         )
3496                         .map_err(DeviceManagerError::AddDmaMappingHandlerVirtioMem)?;
3497                 }
3498 
3499                 // Do not register virtio-mem regions, as they are handled directly by
3500                 // virtio-mem devices.
3501                 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() {
3502                     for region in zone.regions() {
3503                         let gpa = region.start_addr().0;
3504                         let size = region.len();
3505                         dma_handler
3506                             .map(gpa, gpa, size)
3507                             .map_err(DeviceManagerError::VirtioDmaMap)?;
3508                     }
3509                 }
3510             }
3511         }
3512 
3513         let device_type = virtio_device.lock().unwrap().device_type();
3514         let virtio_pci_device = Arc::new(Mutex::new(
3515             VirtioPciDevice::new(
3516                 id.clone(),
3517                 memory,
3518                 virtio_device,
3519                 msix_num,
3520                 access_platform,
3521                 &self.msi_interrupt_manager,
3522                 pci_device_bdf.into(),
3523                 self.activate_evt
3524                     .try_clone()
3525                     .map_err(DeviceManagerError::EventFd)?,
3526                 // All device types *except* virtio block devices should be allocated a 64-bit bar
3527                 // The block devices should be given a 32-bit BAR so that they are easily accessible
3528                 // to firmware without requiring excessive identity mapping.
3529                 // The exception being if not on the default PCI segment.
3530                 pci_segment_id > 0 || device_type != VirtioDeviceType::Block as u32,
3531                 dma_handler,
3532                 self.pending_activations.clone(),
3533                 vm_migration::snapshot_from_id(self.snapshot.as_ref(), id.as_str()),
3534             )
3535             .map_err(DeviceManagerError::VirtioDevice)?,
3536         ));
3537 
3538         let new_resources = self.add_pci_device(
3539             virtio_pci_device.clone(),
3540             virtio_pci_device.clone(),
3541             pci_segment_id,
3542             pci_device_bdf,
3543             resources,
3544         )?;
3545 
3546         let bar_addr = virtio_pci_device.lock().unwrap().config_bar_addr();
3547         for (event, addr) in virtio_pci_device.lock().unwrap().ioeventfds(bar_addr) {
3548             let io_addr = IoEventAddress::Mmio(addr);
3549             self.address_manager
3550                 .vm
3551                 .register_ioevent(event, &io_addr, None)
3552                 .map_err(|e| DeviceManagerError::RegisterIoevent(e.into()))?;
3553         }
3554 
3555         // Update the device tree with correct resource information.
3556         node.resources = new_resources;
3557         node.migratable = Some(Arc::clone(&virtio_pci_device) as Arc<Mutex<dyn Migratable>>);
3558         node.pci_bdf = Some(pci_device_bdf);
3559         node.pci_device_handle = Some(PciDeviceHandle::Virtio(virtio_pci_device));
3560         self.device_tree.lock().unwrap().insert(id, node);
3561 
3562         Ok(pci_device_bdf)
3563     }
3564 
3565     fn pci_resources(
3566         &self,
3567         id: &str,
3568         pci_segment_id: u16,
3569     ) -> DeviceManagerResult<(u16, PciBdf, Option<Vec<Resource>>)> {
3570         // Look for the id in the device tree. If it can be found, that means
3571         // the device is being restored, otherwise it's created from scratch.
3572         Ok(
3573             if let Some(node) = self.device_tree.lock().unwrap().get(id) {
3574                 info!("Restoring virtio-pci {} resources", id);
3575                 let pci_device_bdf: PciBdf = node
3576                     .pci_bdf
3577                     .ok_or(DeviceManagerError::MissingDeviceNodePciBdf)?;
3578                 let pci_segment_id = pci_device_bdf.segment();
3579 
3580                 self.pci_segments[pci_segment_id as usize]
3581                     .pci_bus
3582                     .lock()
3583                     .unwrap()
3584                     .get_device_id(pci_device_bdf.device() as usize)
3585                     .map_err(DeviceManagerError::GetPciDeviceId)?;
3586 
3587                 (pci_segment_id, pci_device_bdf, Some(node.resources.clone()))
3588             } else {
3589                 let pci_device_bdf =
3590                     self.pci_segments[pci_segment_id as usize].next_device_bdf()?;
3591 
3592                 (pci_segment_id, pci_device_bdf, None)
3593             },
3594         )
3595     }
3596 
3597     #[cfg(target_arch = "x86_64")]
3598     pub fn io_bus(&self) -> &Arc<Bus> {
3599         &self.address_manager.io_bus
3600     }
3601 
3602     pub fn mmio_bus(&self) -> &Arc<Bus> {
3603         &self.address_manager.mmio_bus
3604     }
3605 
3606     pub fn allocator(&self) -> &Arc<Mutex<SystemAllocator>> {
3607         &self.address_manager.allocator
3608     }
3609 
3610     pub fn interrupt_controller(&self) -> Option<Arc<Mutex<dyn InterruptController>>> {
3611         self.interrupt_controller
3612             .as_ref()
3613             .map(|ic| ic.clone() as Arc<Mutex<dyn InterruptController>>)
3614     }
3615 
3616     pub(crate) fn pci_segments(&self) -> &Vec<PciSegment> {
3617         &self.pci_segments
3618     }
3619 
3620     pub fn console(&self) -> &Arc<Console> {
3621         &self.console
3622     }
3623 
3624     #[cfg(target_arch = "aarch64")]
3625     pub fn cmdline_additions(&self) -> &[String] {
3626         self.cmdline_additions.as_slice()
3627     }
3628 
3629     pub fn update_memory(&self, new_region: &Arc<GuestRegionMmap>) -> DeviceManagerResult<()> {
3630         for handle in self.virtio_devices.iter() {
3631             handle
3632                 .virtio_device
3633                 .lock()
3634                 .unwrap()
3635                 .add_memory_region(new_region)
3636                 .map_err(DeviceManagerError::UpdateMemoryForVirtioDevice)?;
3637 
3638             if let Some(dma_handler) = &handle.dma_handler {
3639                 if !handle.iommu {
3640                     let gpa = new_region.start_addr().0;
3641                     let size = new_region.len();
3642                     dma_handler
3643                         .map(gpa, gpa, size)
3644                         .map_err(DeviceManagerError::VirtioDmaMap)?;
3645                 }
3646             }
3647         }
3648 
3649         // Take care of updating the memory for VFIO PCI devices.
3650         if let Some(vfio_container) = &self.vfio_container {
3651             vfio_container
3652                 .vfio_dma_map(
3653                     new_region.start_addr().raw_value(),
3654                     new_region.len(),
3655                     new_region.as_ptr() as u64,
3656                 )
3657                 .map_err(DeviceManagerError::UpdateMemoryForVfioPciDevice)?;
3658         }
3659 
3660         // Take care of updating the memory for vfio-user devices.
3661         {
3662             let device_tree = self.device_tree.lock().unwrap();
3663             for pci_device_node in device_tree.pci_devices() {
3664                 if let PciDeviceHandle::VfioUser(vfio_user_pci_device) = pci_device_node
3665                     .pci_device_handle
3666                     .as_ref()
3667                     .ok_or(DeviceManagerError::MissingPciDevice)?
3668                 {
3669                     vfio_user_pci_device
3670                         .lock()
3671                         .unwrap()
3672                         .dma_map(new_region)
3673                         .map_err(DeviceManagerError::UpdateMemoryForVfioUserPciDevice)?;
3674                 }
3675             }
3676         }
3677 
3678         Ok(())
3679     }
3680 
3681     pub fn activate_virtio_devices(&self) -> DeviceManagerResult<()> {
3682         for mut activator in self.pending_activations.lock().unwrap().drain(..) {
3683             activator
3684                 .activate()
3685                 .map_err(DeviceManagerError::VirtioActivate)?;
3686         }
3687         Ok(())
3688     }
3689 
3690     pub fn notify_hotplug(
3691         &self,
3692         _notification_type: AcpiNotificationFlags,
3693     ) -> DeviceManagerResult<()> {
3694         return self
3695             .ged_notification_device
3696             .as_ref()
3697             .unwrap()
3698             .lock()
3699             .unwrap()
3700             .notify(_notification_type)
3701             .map_err(DeviceManagerError::HotPlugNotification);
3702     }
3703 
3704     pub fn add_device(
3705         &mut self,
3706         device_cfg: &mut DeviceConfig,
3707     ) -> DeviceManagerResult<PciDeviceInfo> {
3708         self.validate_identifier(&device_cfg.id)?;
3709 
3710         if device_cfg.iommu && !self.is_iommu_segment(device_cfg.pci_segment) {
3711             return Err(DeviceManagerError::InvalidIommuHotplug);
3712         }
3713 
3714         let (bdf, device_name) = self.add_passthrough_device(device_cfg)?;
3715 
3716         // Update the PCIU bitmap
3717         self.pci_segments[device_cfg.pci_segment as usize].pci_devices_up |= 1 << bdf.device();
3718 
3719         Ok(PciDeviceInfo {
3720             id: device_name,
3721             bdf,
3722         })
3723     }
3724 
3725     pub fn add_user_device(
3726         &mut self,
3727         device_cfg: &mut UserDeviceConfig,
3728     ) -> DeviceManagerResult<PciDeviceInfo> {
3729         self.validate_identifier(&device_cfg.id)?;
3730 
3731         let (bdf, device_name) = self.add_vfio_user_device(device_cfg)?;
3732 
3733         // Update the PCIU bitmap
3734         self.pci_segments[device_cfg.pci_segment as usize].pci_devices_up |= 1 << bdf.device();
3735 
3736         Ok(PciDeviceInfo {
3737             id: device_name,
3738             bdf,
3739         })
3740     }
3741 
3742     pub fn remove_device(&mut self, id: String) -> DeviceManagerResult<()> {
3743         // The node can be directly a PCI node in case the 'id' refers to a
3744         // VFIO device or a virtio-pci one.
3745         // In case the 'id' refers to a virtio device, we must find the PCI
3746         // node by looking at the parent.
3747         let device_tree = self.device_tree.lock().unwrap();
3748         let node = device_tree
3749             .get(&id)
3750             .ok_or(DeviceManagerError::UnknownDeviceId(id))?;
3751 
3752         let pci_device_node = if node.pci_bdf.is_some() && node.pci_device_handle.is_some() {
3753             node
3754         } else {
3755             let parent = node
3756                 .parent
3757                 .as_ref()
3758                 .ok_or(DeviceManagerError::MissingNode)?;
3759             device_tree
3760                 .get(parent)
3761                 .ok_or(DeviceManagerError::MissingNode)?
3762         };
3763 
3764         let pci_device_bdf: PciBdf = pci_device_node
3765             .pci_bdf
3766             .ok_or(DeviceManagerError::MissingDeviceNodePciBdf)?;
3767         let pci_segment_id = pci_device_bdf.segment();
3768 
3769         let pci_device_handle = pci_device_node
3770             .pci_device_handle
3771             .as_ref()
3772             .ok_or(DeviceManagerError::MissingPciDevice)?;
3773         #[allow(irrefutable_let_patterns)]
3774         if let PciDeviceHandle::Virtio(virtio_pci_device) = pci_device_handle {
3775             let device_type = VirtioDeviceType::from(
3776                 virtio_pci_device
3777                     .lock()
3778                     .unwrap()
3779                     .virtio_device()
3780                     .lock()
3781                     .unwrap()
3782                     .device_type(),
3783             );
3784             match device_type {
3785                 VirtioDeviceType::Net
3786                 | VirtioDeviceType::Block
3787                 | VirtioDeviceType::Pmem
3788                 | VirtioDeviceType::Fs
3789                 | VirtioDeviceType::Vsock => {}
3790                 _ => return Err(DeviceManagerError::RemovalNotAllowed(device_type)),
3791             }
3792         }
3793 
3794         // Update the PCID bitmap
3795         self.pci_segments[pci_segment_id as usize].pci_devices_down |= 1 << pci_device_bdf.device();
3796 
3797         Ok(())
3798     }
3799 
3800     pub fn eject_device(&mut self, pci_segment_id: u16, device_id: u8) -> DeviceManagerResult<()> {
3801         info!(
3802             "Ejecting device_id = {} on segment_id={}",
3803             device_id, pci_segment_id
3804         );
3805 
3806         // Convert the device ID into the corresponding b/d/f.
3807         let pci_device_bdf = PciBdf::new(pci_segment_id, 0, device_id, 0);
3808 
3809         // Give the PCI device ID back to the PCI bus.
3810         self.pci_segments[pci_segment_id as usize]
3811             .pci_bus
3812             .lock()
3813             .unwrap()
3814             .put_device_id(device_id as usize)
3815             .map_err(DeviceManagerError::PutPciDeviceId)?;
3816 
3817         // Remove the device from the device tree along with its children.
3818         let mut device_tree = self.device_tree.lock().unwrap();
3819         let pci_device_node = device_tree
3820             .remove_node_by_pci_bdf(pci_device_bdf)
3821             .ok_or(DeviceManagerError::MissingPciDevice)?;
3822 
3823         // For VFIO and vfio-user the PCI device id is the id.
3824         // For virtio we overwrite it later as we want the id of the
3825         // underlying device.
3826         let mut id = pci_device_node.id;
3827         let pci_device_handle = pci_device_node
3828             .pci_device_handle
3829             .ok_or(DeviceManagerError::MissingPciDevice)?;
3830         if matches!(pci_device_handle, PciDeviceHandle::Virtio(_)) {
3831             // The virtio-pci device has a single child
3832             if !pci_device_node.children.is_empty() {
3833                 assert_eq!(pci_device_node.children.len(), 1);
3834                 let child_id = &pci_device_node.children[0];
3835                 id = child_id.clone();
3836             }
3837         }
3838         for child in pci_device_node.children.iter() {
3839             device_tree.remove(child);
3840         }
3841 
3842         let mut iommu_attached = false;
3843         if let Some((_, iommu_attached_devices)) = &self.iommu_attached_devices {
3844             if iommu_attached_devices.contains(&pci_device_bdf) {
3845                 iommu_attached = true;
3846             }
3847         }
3848 
3849         let (pci_device, bus_device, virtio_device, remove_dma_handler) = match pci_device_handle {
3850             // No need to remove any virtio-mem mapping here as the container outlives all devices
3851             PciDeviceHandle::Vfio(vfio_pci_device) => (
3852                 Arc::clone(&vfio_pci_device) as Arc<Mutex<dyn PciDevice>>,
3853                 Arc::clone(&vfio_pci_device) as Arc<Mutex<dyn BusDevice>>,
3854                 None as Option<Arc<Mutex<dyn virtio_devices::VirtioDevice>>>,
3855                 false,
3856             ),
3857             PciDeviceHandle::Virtio(virtio_pci_device) => {
3858                 let dev = virtio_pci_device.lock().unwrap();
3859                 let bar_addr = dev.config_bar_addr();
3860                 for (event, addr) in dev.ioeventfds(bar_addr) {
3861                     let io_addr = IoEventAddress::Mmio(addr);
3862                     self.address_manager
3863                         .vm
3864                         .unregister_ioevent(event, &io_addr)
3865                         .map_err(|e| DeviceManagerError::UnRegisterIoevent(e.into()))?;
3866                 }
3867 
3868                 if let Some(dma_handler) = dev.dma_handler() {
3869                     if !iommu_attached {
3870                         for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() {
3871                             for region in zone.regions() {
3872                                 let iova = region.start_addr().0;
3873                                 let size = region.len();
3874                                 dma_handler
3875                                     .unmap(iova, size)
3876                                     .map_err(DeviceManagerError::VirtioDmaUnmap)?;
3877                             }
3878                         }
3879                     }
3880                 }
3881 
3882                 (
3883                     Arc::clone(&virtio_pci_device) as Arc<Mutex<dyn PciDevice>>,
3884                     Arc::clone(&virtio_pci_device) as Arc<Mutex<dyn BusDevice>>,
3885                     Some(dev.virtio_device()),
3886                     dev.dma_handler().is_some() && !iommu_attached,
3887                 )
3888             }
3889             PciDeviceHandle::VfioUser(vfio_user_pci_device) => {
3890                 let mut dev = vfio_user_pci_device.lock().unwrap();
3891                 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() {
3892                     for region in zone.regions() {
3893                         dev.dma_unmap(region)
3894                             .map_err(DeviceManagerError::VfioUserDmaUnmap)?;
3895                     }
3896                 }
3897 
3898                 (
3899                     Arc::clone(&vfio_user_pci_device) as Arc<Mutex<dyn PciDevice>>,
3900                     Arc::clone(&vfio_user_pci_device) as Arc<Mutex<dyn BusDevice>>,
3901                     None as Option<Arc<Mutex<dyn virtio_devices::VirtioDevice>>>,
3902                     true,
3903                 )
3904             }
3905         };
3906 
3907         if remove_dma_handler {
3908             for virtio_mem_device in self.virtio_mem_devices.iter() {
3909                 virtio_mem_device
3910                     .lock()
3911                     .unwrap()
3912                     .remove_dma_mapping_handler(VirtioMemMappingSource::Device(
3913                         pci_device_bdf.into(),
3914                     ))
3915                     .map_err(DeviceManagerError::RemoveDmaMappingHandlerVirtioMem)?;
3916             }
3917         }
3918 
3919         // Free the allocated BARs
3920         pci_device
3921             .lock()
3922             .unwrap()
3923             .free_bars(
3924                 &mut self.address_manager.allocator.lock().unwrap(),
3925                 &mut self.pci_segments[pci_segment_id as usize]
3926                     .allocator
3927                     .lock()
3928                     .unwrap(),
3929             )
3930             .map_err(DeviceManagerError::FreePciBars)?;
3931 
3932         // Remove the device from the PCI bus
3933         self.pci_segments[pci_segment_id as usize]
3934             .pci_bus
3935             .lock()
3936             .unwrap()
3937             .remove_by_device(&pci_device)
3938             .map_err(DeviceManagerError::RemoveDeviceFromPciBus)?;
3939 
3940         #[cfg(target_arch = "x86_64")]
3941         // Remove the device from the IO bus
3942         self.io_bus()
3943             .remove_by_device(&bus_device)
3944             .map_err(DeviceManagerError::RemoveDeviceFromIoBus)?;
3945 
3946         // Remove the device from the MMIO bus
3947         self.mmio_bus()
3948             .remove_by_device(&bus_device)
3949             .map_err(DeviceManagerError::RemoveDeviceFromMmioBus)?;
3950 
3951         // Remove the device from the list of BusDevice held by the
3952         // DeviceManager.
3953         self.bus_devices
3954             .retain(|dev| !Arc::ptr_eq(dev, &bus_device));
3955 
3956         // Shutdown and remove the underlying virtio-device if present
3957         if let Some(virtio_device) = virtio_device {
3958             for mapping in virtio_device.lock().unwrap().userspace_mappings() {
3959                 self.memory_manager
3960                     .lock()
3961                     .unwrap()
3962                     .remove_userspace_mapping(
3963                         mapping.addr.raw_value(),
3964                         mapping.len,
3965                         mapping.host_addr,
3966                         mapping.mergeable,
3967                         mapping.mem_slot,
3968                     )
3969                     .map_err(DeviceManagerError::MemoryManager)?;
3970             }
3971 
3972             virtio_device.lock().unwrap().shutdown();
3973 
3974             self.virtio_devices
3975                 .retain(|handler| !Arc::ptr_eq(&handler.virtio_device, &virtio_device));
3976         }
3977 
3978         event!(
3979             "vm",
3980             "device-removed",
3981             "id",
3982             &id,
3983             "bdf",
3984             pci_device_bdf.to_string()
3985         );
3986 
3987         // At this point, the device has been removed from all the list and
3988         // buses where it was stored. At the end of this function, after
3989         // any_device, bus_device and pci_device are released, the actual
3990         // device will be dropped.
3991         Ok(())
3992     }
3993 
3994     fn hotplug_virtio_pci_device(
3995         &mut self,
3996         handle: MetaVirtioDevice,
3997     ) -> DeviceManagerResult<PciDeviceInfo> {
3998         // Add the virtio device to the device manager list. This is important
3999         // as the list is used to notify virtio devices about memory updates
4000         // for instance.
4001         self.virtio_devices.push(handle.clone());
4002 
4003         let mapping: Option<Arc<IommuMapping>> = if handle.iommu {
4004             self.iommu_mapping.clone()
4005         } else {
4006             None
4007         };
4008 
4009         let bdf = self.add_virtio_pci_device(
4010             handle.virtio_device,
4011             &mapping,
4012             handle.id.clone(),
4013             handle.pci_segment,
4014             handle.dma_handler,
4015         )?;
4016 
4017         // Update the PCIU bitmap
4018         self.pci_segments[handle.pci_segment as usize].pci_devices_up |= 1 << bdf.device();
4019 
4020         Ok(PciDeviceInfo { id: handle.id, bdf })
4021     }
4022 
4023     fn is_iommu_segment(&self, pci_segment_id: u16) -> bool {
4024         self.config
4025             .lock()
4026             .as_ref()
4027             .unwrap()
4028             .platform
4029             .as_ref()
4030             .map(|pc| {
4031                 pc.iommu_segments
4032                     .as_ref()
4033                     .map(|v| v.contains(&pci_segment_id))
4034                     .unwrap_or_default()
4035             })
4036             .unwrap_or_default()
4037     }
4038 
4039     pub fn add_disk(&mut self, disk_cfg: &mut DiskConfig) -> DeviceManagerResult<PciDeviceInfo> {
4040         self.validate_identifier(&disk_cfg.id)?;
4041 
4042         if disk_cfg.iommu && !self.is_iommu_segment(disk_cfg.pci_segment) {
4043             return Err(DeviceManagerError::InvalidIommuHotplug);
4044         }
4045 
4046         let device = self.make_virtio_block_device(disk_cfg)?;
4047         self.hotplug_virtio_pci_device(device)
4048     }
4049 
4050     pub fn add_fs(&mut self, fs_cfg: &mut FsConfig) -> DeviceManagerResult<PciDeviceInfo> {
4051         self.validate_identifier(&fs_cfg.id)?;
4052 
4053         let device = self.make_virtio_fs_device(fs_cfg)?;
4054         self.hotplug_virtio_pci_device(device)
4055     }
4056 
4057     pub fn add_pmem(&mut self, pmem_cfg: &mut PmemConfig) -> DeviceManagerResult<PciDeviceInfo> {
4058         self.validate_identifier(&pmem_cfg.id)?;
4059 
4060         if pmem_cfg.iommu && !self.is_iommu_segment(pmem_cfg.pci_segment) {
4061             return Err(DeviceManagerError::InvalidIommuHotplug);
4062         }
4063 
4064         let device = self.make_virtio_pmem_device(pmem_cfg)?;
4065         self.hotplug_virtio_pci_device(device)
4066     }
4067 
4068     pub fn add_net(&mut self, net_cfg: &mut NetConfig) -> DeviceManagerResult<PciDeviceInfo> {
4069         self.validate_identifier(&net_cfg.id)?;
4070 
4071         if net_cfg.iommu && !self.is_iommu_segment(net_cfg.pci_segment) {
4072             return Err(DeviceManagerError::InvalidIommuHotplug);
4073         }
4074 
4075         let device = self.make_virtio_net_device(net_cfg)?;
4076         self.hotplug_virtio_pci_device(device)
4077     }
4078 
4079     pub fn add_vdpa(&mut self, vdpa_cfg: &mut VdpaConfig) -> DeviceManagerResult<PciDeviceInfo> {
4080         self.validate_identifier(&vdpa_cfg.id)?;
4081 
4082         if vdpa_cfg.iommu && !self.is_iommu_segment(vdpa_cfg.pci_segment) {
4083             return Err(DeviceManagerError::InvalidIommuHotplug);
4084         }
4085 
4086         let device = self.make_vdpa_device(vdpa_cfg)?;
4087         self.hotplug_virtio_pci_device(device)
4088     }
4089 
4090     pub fn add_vsock(&mut self, vsock_cfg: &mut VsockConfig) -> DeviceManagerResult<PciDeviceInfo> {
4091         self.validate_identifier(&vsock_cfg.id)?;
4092 
4093         if vsock_cfg.iommu && !self.is_iommu_segment(vsock_cfg.pci_segment) {
4094             return Err(DeviceManagerError::InvalidIommuHotplug);
4095         }
4096 
4097         let device = self.make_virtio_vsock_device(vsock_cfg)?;
4098         self.hotplug_virtio_pci_device(device)
4099     }
4100 
4101     pub fn counters(&self) -> HashMap<String, HashMap<&'static str, Wrapping<u64>>> {
4102         let mut counters = HashMap::new();
4103 
4104         for handle in &self.virtio_devices {
4105             let virtio_device = handle.virtio_device.lock().unwrap();
4106             if let Some(device_counters) = virtio_device.counters() {
4107                 counters.insert(handle.id.clone(), device_counters.clone());
4108             }
4109         }
4110 
4111         counters
4112     }
4113 
4114     pub fn resize_balloon(&mut self, size: u64) -> DeviceManagerResult<()> {
4115         if let Some(balloon) = &self.balloon {
4116             return balloon
4117                 .lock()
4118                 .unwrap()
4119                 .resize(size)
4120                 .map_err(DeviceManagerError::VirtioBalloonResize);
4121         }
4122 
4123         warn!("No balloon setup: Can't resize the balloon");
4124         Err(DeviceManagerError::MissingVirtioBalloon)
4125     }
4126 
4127     pub fn balloon_size(&self) -> u64 {
4128         if let Some(balloon) = &self.balloon {
4129             return balloon.lock().unwrap().get_actual();
4130         }
4131 
4132         0
4133     }
4134 
4135     pub fn device_tree(&self) -> Arc<Mutex<DeviceTree>> {
4136         self.device_tree.clone()
4137     }
4138 
4139     #[cfg(target_arch = "x86_64")]
4140     pub fn notify_power_button(&self) -> DeviceManagerResult<()> {
4141         self.ged_notification_device
4142             .as_ref()
4143             .unwrap()
4144             .lock()
4145             .unwrap()
4146             .notify(AcpiNotificationFlags::POWER_BUTTON_CHANGED)
4147             .map_err(DeviceManagerError::PowerButtonNotification)
4148     }
4149 
4150     #[cfg(target_arch = "aarch64")]
4151     pub fn notify_power_button(&self) -> DeviceManagerResult<()> {
4152         // There are two use cases:
4153         // 1. Users will use direct kernel boot with device tree.
4154         // 2. Users will use ACPI+UEFI boot.
4155 
4156         // Trigger a GPIO pin 3 event to satisify use case 1.
4157         self.gpio_device
4158             .as_ref()
4159             .unwrap()
4160             .lock()
4161             .unwrap()
4162             .trigger_key(3)
4163             .map_err(DeviceManagerError::AArch64PowerButtonNotification)?;
4164         // Trigger a GED power button event to satisify use case 2.
4165         return self
4166             .ged_notification_device
4167             .as_ref()
4168             .unwrap()
4169             .lock()
4170             .unwrap()
4171             .notify(AcpiNotificationFlags::POWER_BUTTON_CHANGED)
4172             .map_err(DeviceManagerError::PowerButtonNotification);
4173     }
4174 
4175     pub fn iommu_attached_devices(&self) -> &Option<(PciBdf, Vec<PciBdf>)> {
4176         &self.iommu_attached_devices
4177     }
4178 
4179     fn validate_identifier(&self, id: &Option<String>) -> DeviceManagerResult<()> {
4180         if let Some(id) = id {
4181             if id.starts_with("__") {
4182                 return Err(DeviceManagerError::InvalidIdentifier(id.clone()));
4183             }
4184 
4185             if self.device_tree.lock().unwrap().contains_key(id) {
4186                 return Err(DeviceManagerError::IdentifierNotUnique(id.clone()));
4187             }
4188         }
4189 
4190         Ok(())
4191     }
4192 
4193     pub(crate) fn acpi_platform_addresses(&self) -> &AcpiPlatformAddresses {
4194         &self.acpi_platform_addresses
4195     }
4196 }
4197 
4198 fn numa_node_id_from_memory_zone_id(numa_nodes: &NumaNodes, memory_zone_id: &str) -> Option<u32> {
4199     for (numa_node_id, numa_node) in numa_nodes.iter() {
4200         if numa_node.memory_zones.contains(&memory_zone_id.to_owned()) {
4201             return Some(*numa_node_id);
4202         }
4203     }
4204 
4205     None
4206 }
4207 
4208 struct TpmDevice {}
4209 
4210 impl Aml for TpmDevice {
4211     fn to_aml_bytes(&self) -> Vec<u8> {
4212         aml::Device::new(
4213             "TPM2".into(),
4214             vec![
4215                 &aml::Name::new("_HID".into(), &"MSFT0101"),
4216                 &aml::Name::new("_STA".into(), &(0xF_usize)),
4217                 &aml::Name::new(
4218                     "_CRS".into(),
4219                     &aml::ResourceTemplate::new(vec![&aml::Memory32Fixed::new(
4220                         true,
4221                         layout::TPM_START.0 as u32,
4222                         layout::TPM_SIZE as u32,
4223                     )]),
4224                 ),
4225             ],
4226         )
4227         .to_aml_bytes()
4228     }
4229 }
4230 
4231 impl Aml for DeviceManager {
4232     fn append_aml_bytes(&self, bytes: &mut Vec<u8>) {
4233         #[cfg(target_arch = "aarch64")]
4234         use arch::aarch64::DeviceInfoForFdt;
4235 
4236         let mut pci_scan_methods = Vec::new();
4237         for i in 0..self.pci_segments.len() {
4238             pci_scan_methods.push(aml::MethodCall::new(
4239                 format!("\\_SB_.PCI{i:X}.PCNT").as_str().into(),
4240                 vec![],
4241             ));
4242         }
4243         let mut pci_scan_inner: Vec<&dyn Aml> = Vec::new();
4244         for method in &pci_scan_methods {
4245             pci_scan_inner.push(method)
4246         }
4247 
4248         // PCI hotplug controller
4249         aml::Device::new(
4250             "_SB_.PHPR".into(),
4251             vec![
4252                 &aml::Name::new("_HID".into(), &aml::EisaName::new("PNP0A06")),
4253                 &aml::Name::new("_STA".into(), &0x0bu8),
4254                 &aml::Name::new("_UID".into(), &"PCI Hotplug Controller"),
4255                 &aml::Mutex::new("BLCK".into(), 0),
4256                 &aml::Name::new(
4257                     "_CRS".into(),
4258                     &aml::ResourceTemplate::new(vec![&aml::AddressSpace::new_memory(
4259                         aml::AddressSpaceCachable::NotCacheable,
4260                         true,
4261                         self.acpi_address.0,
4262                         self.acpi_address.0 + DEVICE_MANAGER_ACPI_SIZE as u64 - 1,
4263                     )]),
4264                 ),
4265                 // OpRegion and Fields map MMIO range into individual field values
4266                 &aml::OpRegion::new(
4267                     "PCST".into(),
4268                     aml::OpRegionSpace::SystemMemory,
4269                     self.acpi_address.0 as usize,
4270                     DEVICE_MANAGER_ACPI_SIZE,
4271                 ),
4272                 &aml::Field::new(
4273                     "PCST".into(),
4274                     aml::FieldAccessType::DWord,
4275                     aml::FieldUpdateRule::WriteAsZeroes,
4276                     vec![
4277                         aml::FieldEntry::Named(*b"PCIU", 32),
4278                         aml::FieldEntry::Named(*b"PCID", 32),
4279                         aml::FieldEntry::Named(*b"B0EJ", 32),
4280                         aml::FieldEntry::Named(*b"PSEG", 32),
4281                     ],
4282                 ),
4283                 &aml::Method::new(
4284                     "PCEJ".into(),
4285                     2,
4286                     true,
4287                     vec![
4288                         // Take lock defined above
4289                         &aml::Acquire::new("BLCK".into(), 0xffff),
4290                         // Choose the current segment
4291                         &aml::Store::new(&aml::Path::new("PSEG"), &aml::Arg(1)),
4292                         // Write PCI bus number (in first argument) to I/O port via field
4293                         &aml::ShiftLeft::new(&aml::Path::new("B0EJ"), &aml::ONE, &aml::Arg(0)),
4294                         // Release lock
4295                         &aml::Release::new("BLCK".into()),
4296                         // Return 0
4297                         &aml::Return::new(&aml::ZERO),
4298                     ],
4299                 ),
4300                 &aml::Method::new("PSCN".into(), 0, true, pci_scan_inner),
4301             ],
4302         )
4303         .append_aml_bytes(bytes);
4304 
4305         for segment in &self.pci_segments {
4306             segment.append_aml_bytes(bytes);
4307         }
4308 
4309         let mut mbrd_memory = Vec::new();
4310 
4311         for segment in &self.pci_segments {
4312             mbrd_memory.push(aml::Memory32Fixed::new(
4313                 true,
4314                 segment.mmio_config_address as u32,
4315                 layout::PCI_MMIO_CONFIG_SIZE_PER_SEGMENT as u32,
4316             ))
4317         }
4318 
4319         let mut mbrd_memory_refs = Vec::new();
4320         for mbrd_memory_ref in &mbrd_memory {
4321             mbrd_memory_refs.push(mbrd_memory_ref as &dyn Aml);
4322         }
4323 
4324         aml::Device::new(
4325             "_SB_.MBRD".into(),
4326             vec![
4327                 &aml::Name::new("_HID".into(), &aml::EisaName::new("PNP0C02")),
4328                 &aml::Name::new("_UID".into(), &aml::ZERO),
4329                 &aml::Name::new("_CRS".into(), &aml::ResourceTemplate::new(mbrd_memory_refs)),
4330             ],
4331         )
4332         .append_aml_bytes(bytes);
4333 
4334         // Serial device
4335         #[cfg(target_arch = "x86_64")]
4336         let serial_irq = 4;
4337         #[cfg(target_arch = "aarch64")]
4338         let serial_irq =
4339             if self.config.lock().unwrap().serial.clone().mode != ConsoleOutputMode::Off {
4340                 self.get_device_info()
4341                     .clone()
4342                     .get(&(DeviceType::Serial, DeviceType::Serial.to_string()))
4343                     .unwrap()
4344                     .irq()
4345             } else {
4346                 // If serial is turned off, add a fake device with invalid irq.
4347                 31
4348             };
4349         if self.config.lock().unwrap().serial.mode != ConsoleOutputMode::Off {
4350             aml::Device::new(
4351                 "_SB_.COM1".into(),
4352                 vec![
4353                     &aml::Name::new(
4354                         "_HID".into(),
4355                         #[cfg(target_arch = "x86_64")]
4356                         &aml::EisaName::new("PNP0501"),
4357                         #[cfg(target_arch = "aarch64")]
4358                         &"ARMH0011",
4359                     ),
4360                     &aml::Name::new("_UID".into(), &aml::ZERO),
4361                     &aml::Name::new("_DDN".into(), &"COM1"),
4362                     &aml::Name::new(
4363                         "_CRS".into(),
4364                         &aml::ResourceTemplate::new(vec![
4365                             &aml::Interrupt::new(true, true, false, false, serial_irq),
4366                             #[cfg(target_arch = "x86_64")]
4367                             &aml::Io::new(0x3f8, 0x3f8, 0, 0x8),
4368                             #[cfg(target_arch = "aarch64")]
4369                             &aml::Memory32Fixed::new(
4370                                 true,
4371                                 arch::layout::LEGACY_SERIAL_MAPPED_IO_START.raw_value() as u32,
4372                                 MMIO_LEN as u32,
4373                             ),
4374                         ]),
4375                     ),
4376                 ],
4377             )
4378             .append_aml_bytes(bytes);
4379         }
4380 
4381         aml::Name::new("_S5_".into(), &aml::Package::new(vec![&5u8])).append_aml_bytes(bytes);
4382 
4383         aml::Device::new(
4384             "_SB_.PWRB".into(),
4385             vec![
4386                 &aml::Name::new("_HID".into(), &aml::EisaName::new("PNP0C0C")),
4387                 &aml::Name::new("_UID".into(), &aml::ZERO),
4388             ],
4389         )
4390         .append_aml_bytes(bytes);
4391 
4392         if self.config.lock().unwrap().tpm.is_some() {
4393             // Add tpm device
4394             let tpm_acpi = TpmDevice {};
4395             let tpm_dsdt_data = tpm_acpi.to_aml_bytes();
4396             bytes.extend_from_slice(tpm_dsdt_data.as_slice());
4397         }
4398 
4399         self.ged_notification_device
4400             .as_ref()
4401             .unwrap()
4402             .lock()
4403             .unwrap()
4404             .append_aml_bytes(bytes);
4405     }
4406 }
4407 
4408 impl Pausable for DeviceManager {
4409     fn pause(&mut self) -> result::Result<(), MigratableError> {
4410         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
4411             if let Some(migratable) = &device_node.migratable {
4412                 migratable.lock().unwrap().pause()?;
4413             }
4414         }
4415         // On AArch64, the pause of device manager needs to trigger
4416         // a "pause" of GIC, which will flush the GIC pending tables
4417         // and ITS tables to guest RAM.
4418         #[cfg(target_arch = "aarch64")]
4419         {
4420             self.get_interrupt_controller()
4421                 .unwrap()
4422                 .lock()
4423                 .unwrap()
4424                 .pause()?;
4425         };
4426 
4427         Ok(())
4428     }
4429 
4430     fn resume(&mut self) -> result::Result<(), MigratableError> {
4431         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
4432             if let Some(migratable) = &device_node.migratable {
4433                 migratable.lock().unwrap().resume()?;
4434             }
4435         }
4436 
4437         Ok(())
4438     }
4439 }
4440 
4441 impl Snapshottable for DeviceManager {
4442     fn id(&self) -> String {
4443         DEVICE_MANAGER_SNAPSHOT_ID.to_string()
4444     }
4445 
4446     fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> {
4447         let mut snapshot = Snapshot::from_data(SnapshotData::new_from_state(&self.state())?);
4448 
4449         // We aggregate all devices snapshots.
4450         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
4451             if let Some(migratable) = &device_node.migratable {
4452                 let mut migratable = migratable.lock().unwrap();
4453                 snapshot.add_snapshot(migratable.id(), migratable.snapshot()?);
4454             }
4455         }
4456 
4457         Ok(snapshot)
4458     }
4459 }
4460 
4461 impl Transportable for DeviceManager {}
4462 
4463 impl Migratable for DeviceManager {
4464     fn start_dirty_log(&mut self) -> std::result::Result<(), MigratableError> {
4465         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
4466             if let Some(migratable) = &device_node.migratable {
4467                 migratable.lock().unwrap().start_dirty_log()?;
4468             }
4469         }
4470         Ok(())
4471     }
4472 
4473     fn stop_dirty_log(&mut self) -> std::result::Result<(), MigratableError> {
4474         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
4475             if let Some(migratable) = &device_node.migratable {
4476                 migratable.lock().unwrap().stop_dirty_log()?;
4477             }
4478         }
4479         Ok(())
4480     }
4481 
4482     fn dirty_log(&mut self) -> std::result::Result<MemoryRangeTable, MigratableError> {
4483         let mut tables = Vec::new();
4484         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
4485             if let Some(migratable) = &device_node.migratable {
4486                 tables.push(migratable.lock().unwrap().dirty_log()?);
4487             }
4488         }
4489         Ok(MemoryRangeTable::new_from_tables(tables))
4490     }
4491 
4492     fn start_migration(&mut self) -> std::result::Result<(), MigratableError> {
4493         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
4494             if let Some(migratable) = &device_node.migratable {
4495                 migratable.lock().unwrap().start_migration()?;
4496             }
4497         }
4498         Ok(())
4499     }
4500 
4501     fn complete_migration(&mut self) -> std::result::Result<(), MigratableError> {
4502         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
4503             if let Some(migratable) = &device_node.migratable {
4504                 migratable.lock().unwrap().complete_migration()?;
4505             }
4506         }
4507         Ok(())
4508     }
4509 }
4510 
4511 const PCIU_FIELD_OFFSET: u64 = 0;
4512 const PCID_FIELD_OFFSET: u64 = 4;
4513 const B0EJ_FIELD_OFFSET: u64 = 8;
4514 const PSEG_FIELD_OFFSET: u64 = 12;
4515 const PCIU_FIELD_SIZE: usize = 4;
4516 const PCID_FIELD_SIZE: usize = 4;
4517 const B0EJ_FIELD_SIZE: usize = 4;
4518 const PSEG_FIELD_SIZE: usize = 4;
4519 
4520 impl BusDevice for DeviceManager {
4521     fn read(&mut self, base: u64, offset: u64, data: &mut [u8]) {
4522         match offset {
4523             PCIU_FIELD_OFFSET => {
4524                 assert!(data.len() == PCIU_FIELD_SIZE);
4525                 data.copy_from_slice(
4526                     &self.pci_segments[self.selected_segment]
4527                         .pci_devices_up
4528                         .to_le_bytes(),
4529                 );
4530                 // Clear the PCIU bitmap
4531                 self.pci_segments[self.selected_segment].pci_devices_up = 0;
4532             }
4533             PCID_FIELD_OFFSET => {
4534                 assert!(data.len() == PCID_FIELD_SIZE);
4535                 data.copy_from_slice(
4536                     &self.pci_segments[self.selected_segment]
4537                         .pci_devices_down
4538                         .to_le_bytes(),
4539                 );
4540                 // Clear the PCID bitmap
4541                 self.pci_segments[self.selected_segment].pci_devices_down = 0;
4542             }
4543             B0EJ_FIELD_OFFSET => {
4544                 assert!(data.len() == B0EJ_FIELD_SIZE);
4545                 // Always return an empty bitmap since the eject is always
4546                 // taken care of right away during a write access.
4547                 data.fill(0);
4548             }
4549             PSEG_FIELD_OFFSET => {
4550                 assert_eq!(data.len(), PSEG_FIELD_SIZE);
4551                 data.copy_from_slice(&(self.selected_segment as u32).to_le_bytes());
4552             }
4553             _ => error!(
4554                 "Accessing unknown location at base 0x{:x}, offset 0x{:x}",
4555                 base, offset
4556             ),
4557         }
4558 
4559         debug!(
4560             "PCI_HP_REG_R: base 0x{:x}, offset 0x{:x}, data {:?}",
4561             base, offset, data
4562         )
4563     }
4564 
4565     fn write(&mut self, base: u64, offset: u64, data: &[u8]) -> Option<Arc<std::sync::Barrier>> {
4566         match offset {
4567             B0EJ_FIELD_OFFSET => {
4568                 assert!(data.len() == B0EJ_FIELD_SIZE);
4569                 let mut data_array: [u8; 4] = [0, 0, 0, 0];
4570                 data_array.copy_from_slice(data);
4571                 let mut slot_bitmap = u32::from_le_bytes(data_array);
4572 
4573                 while slot_bitmap > 0 {
4574                     let slot_id = slot_bitmap.trailing_zeros();
4575                     if let Err(e) = self.eject_device(self.selected_segment as u16, slot_id as u8) {
4576                         error!("Failed ejecting device {}: {:?}", slot_id, e);
4577                     }
4578                     slot_bitmap &= !(1 << slot_id);
4579                 }
4580             }
4581             PSEG_FIELD_OFFSET => {
4582                 assert_eq!(data.len(), PSEG_FIELD_SIZE);
4583                 let mut data_array: [u8; 4] = [0, 0, 0, 0];
4584                 data_array.copy_from_slice(data);
4585                 let selected_segment = u32::from_le_bytes(data_array) as usize;
4586                 if selected_segment >= self.pci_segments.len() {
4587                     error!(
4588                         "Segment selection out of range: {} >= {}",
4589                         selected_segment,
4590                         self.pci_segments.len()
4591                     );
4592                     return None;
4593                 }
4594                 self.selected_segment = selected_segment;
4595             }
4596             _ => error!(
4597                 "Accessing unknown location at base 0x{:x}, offset 0x{:x}",
4598                 base, offset
4599             ),
4600         }
4601 
4602         debug!(
4603             "PCI_HP_REG_W: base 0x{:x}, offset 0x{:x}, data {:?}",
4604             base, offset, data
4605         );
4606 
4607         None
4608     }
4609 }
4610 
4611 impl Drop for DeviceManager {
4612     fn drop(&mut self) {
4613         for handle in self.virtio_devices.drain(..) {
4614             handle.virtio_device.lock().unwrap().shutdown();
4615         }
4616     }
4617 }
4618