xref: /cloud-hypervisor/vmm/src/device_manager.rs (revision 7d7bfb2034001d4cb15df2ddc56d2d350c8da30f)
1 // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
2 //
3 // Portions Copyright 2017 The Chromium OS Authors. All rights reserved.
4 // Use of this source code is governed by a BSD-style license that can be
5 // found in the LICENSE-BSD-3-Clause file.
6 //
7 // Copyright © 2019 Intel Corporation
8 //
9 // SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause
10 //
11 
12 use crate::config::{
13     ConsoleOutputMode, DeviceConfig, DiskConfig, FsConfig, NetConfig, PmemConfig, UserDeviceConfig,
14     VdpaConfig, VhostMode, VmConfig, VsockConfig,
15 };
16 use crate::device_tree::{DeviceNode, DeviceTree};
17 #[cfg(feature = "kvm")]
18 use crate::interrupt::kvm::KvmMsiInterruptManager as MsiInterruptManager;
19 #[cfg(feature = "mshv")]
20 use crate::interrupt::mshv::MshvMsiInterruptManager as MsiInterruptManager;
21 use crate::interrupt::LegacyUserspaceInterruptManager;
22 use crate::memory_manager::MEMORY_MANAGER_ACPI_SIZE;
23 use crate::memory_manager::{Error as MemoryManagerError, MemoryManager};
24 use crate::pci_segment::PciSegment;
25 use crate::seccomp_filters::{get_seccomp_filter, Thread};
26 use crate::serial_manager::{Error as SerialManagerError, SerialManager};
27 use crate::sigwinch_listener::start_sigwinch_listener;
28 use crate::GuestRegionMmap;
29 use crate::PciDeviceInfo;
30 use crate::{device_node, DEVICE_MANAGER_SNAPSHOT_ID};
31 use acpi_tables::{aml, aml::Aml};
32 use anyhow::anyhow;
33 #[cfg(target_arch = "aarch64")]
34 use arch::aarch64::gic::gicv3_its::kvm::KvmGicV3Its;
35 use arch::layout;
36 #[cfg(target_arch = "x86_64")]
37 use arch::layout::{APIC_START, IOAPIC_SIZE, IOAPIC_START};
38 use arch::NumaNodes;
39 #[cfg(target_arch = "aarch64")]
40 use arch::{DeviceType, MmioDeviceInfo};
41 use block_util::{
42     async_io::DiskFile, block_io_uring_is_supported, detect_image_type,
43     fixed_vhd_async::FixedVhdDiskAsync, fixed_vhd_sync::FixedVhdDiskSync, qcow_sync::QcowDiskSync,
44     raw_async::RawFileDisk, raw_sync::RawFileDiskSync, vhdx_sync::VhdxDiskSync, ImageType,
45 };
46 #[cfg(target_arch = "aarch64")]
47 use devices::gic;
48 #[cfg(target_arch = "x86_64")]
49 use devices::ioapic;
50 #[cfg(target_arch = "aarch64")]
51 use devices::legacy::Pl011;
52 #[cfg(target_arch = "x86_64")]
53 use devices::legacy::Serial;
54 use devices::{
55     interrupt_controller, interrupt_controller::InterruptController, AcpiNotificationFlags,
56 };
57 #[cfg(feature = "kvm")]
58 use hypervisor::kvm_ioctls::*;
59 use hypervisor::DeviceFd;
60 #[cfg(feature = "mshv")]
61 use hypervisor::IoEventAddress;
62 use libc::{
63     cfmakeraw, isatty, tcgetattr, tcsetattr, termios, MAP_NORESERVE, MAP_PRIVATE, MAP_SHARED,
64     O_TMPFILE, PROT_READ, PROT_WRITE, TCSANOW,
65 };
66 #[cfg(target_arch = "x86_64")]
67 use pci::PciConfigIo;
68 use pci::{
69     DeviceRelocation, PciBarRegionType, PciBdf, PciDevice, VfioPciDevice, VfioUserDmaMapping,
70     VfioUserPciDevice, VfioUserPciDeviceError,
71 };
72 use seccompiler::SeccompAction;
73 use std::collections::HashMap;
74 use std::convert::TryInto;
75 use std::fs::{read_link, File, OpenOptions};
76 use std::io::{self, stdout, Seek, SeekFrom};
77 use std::mem::zeroed;
78 use std::num::Wrapping;
79 use std::os::unix::fs::OpenOptionsExt;
80 use std::os::unix::io::{AsRawFd, FromRawFd, RawFd};
81 use std::path::PathBuf;
82 use std::result;
83 use std::sync::{Arc, Mutex};
84 use vfio_ioctls::{VfioContainer, VfioDevice};
85 use virtio_devices::transport::VirtioPciDevice;
86 use virtio_devices::transport::VirtioTransport;
87 use virtio_devices::vhost_user::VhostUserConfig;
88 use virtio_devices::{AccessPlatformMapping, VdpaDmaMapping, VirtioMemMappingSource};
89 use virtio_devices::{Endpoint, IommuMapping};
90 use virtio_devices::{VirtioSharedMemory, VirtioSharedMemoryList};
91 use vm_allocator::{AddressAllocator, SystemAllocator};
92 use vm_device::dma_mapping::vfio::VfioDmaMapping;
93 use vm_device::dma_mapping::ExternalDmaMapping;
94 use vm_device::interrupt::{
95     InterruptIndex, InterruptManager, LegacyIrqGroupConfig, MsiIrqGroupConfig,
96 };
97 use vm_device::{Bus, BusDevice, Resource};
98 use vm_memory::guest_memory::FileOffset;
99 use vm_memory::GuestMemoryRegion;
100 use vm_memory::{Address, GuestAddress, GuestUsize, MmapRegion};
101 #[cfg(target_arch = "x86_64")]
102 use vm_memory::{GuestAddressSpace, GuestMemory};
103 use vm_migration::{
104     protocol::MemoryRangeTable, Migratable, MigratableError, Pausable, Snapshot,
105     SnapshotDataSection, Snapshottable, Transportable,
106 };
107 use vm_virtio::AccessPlatform;
108 use vm_virtio::VirtioDeviceType;
109 use vmm_sys_util::eventfd::EventFd;
110 
111 #[cfg(target_arch = "aarch64")]
112 const MMIO_LEN: u64 = 0x1000;
113 
114 const VFIO_DEVICE_NAME_PREFIX: &str = "_vfio";
115 
116 const VFIO_USER_DEVICE_NAME_PREFIX: &str = "_vfio_user";
117 
118 #[cfg(target_arch = "x86_64")]
119 const IOAPIC_DEVICE_NAME: &str = "_ioapic";
120 
121 const SERIAL_DEVICE_NAME_PREFIX: &str = "_serial";
122 #[cfg(target_arch = "aarch64")]
123 const GPIO_DEVICE_NAME_PREFIX: &str = "_gpio";
124 
125 const CONSOLE_DEVICE_NAME: &str = "_console";
126 const DISK_DEVICE_NAME_PREFIX: &str = "_disk";
127 const FS_DEVICE_NAME_PREFIX: &str = "_fs";
128 const BALLOON_DEVICE_NAME: &str = "_balloon";
129 const NET_DEVICE_NAME_PREFIX: &str = "_net";
130 const PMEM_DEVICE_NAME_PREFIX: &str = "_pmem";
131 const RNG_DEVICE_NAME: &str = "_rng";
132 const VDPA_DEVICE_NAME_PREFIX: &str = "_vdpa";
133 const VSOCK_DEVICE_NAME_PREFIX: &str = "_vsock";
134 const WATCHDOG_DEVICE_NAME: &str = "_watchdog";
135 
136 const IOMMU_DEVICE_NAME: &str = "_iommu";
137 
138 const VIRTIO_PCI_DEVICE_NAME_PREFIX: &str = "_virtio-pci";
139 
140 /// Errors associated with device manager
141 #[derive(Debug)]
142 pub enum DeviceManagerError {
143     /// Cannot create EventFd.
144     EventFd(io::Error),
145 
146     /// Cannot open disk path
147     Disk(io::Error),
148 
149     /// Cannot create vhost-user-net device
150     CreateVhostUserNet(virtio_devices::vhost_user::Error),
151 
152     /// Cannot create virtio-blk device
153     CreateVirtioBlock(io::Error),
154 
155     /// Cannot create virtio-net device
156     CreateVirtioNet(virtio_devices::net::Error),
157 
158     /// Cannot create virtio-console device
159     CreateVirtioConsole(io::Error),
160 
161     /// Cannot create virtio-rng device
162     CreateVirtioRng(io::Error),
163 
164     /// Cannot create virtio-fs device
165     CreateVirtioFs(virtio_devices::vhost_user::Error),
166 
167     /// Virtio-fs device was created without a socket.
168     NoVirtioFsSock,
169 
170     /// Cannot create vhost-user-blk device
171     CreateVhostUserBlk(virtio_devices::vhost_user::Error),
172 
173     /// Cannot create virtio-pmem device
174     CreateVirtioPmem(io::Error),
175 
176     /// Cannot create vDPA device
177     CreateVdpa(virtio_devices::vdpa::Error),
178 
179     /// Cannot create virtio-vsock device
180     CreateVirtioVsock(io::Error),
181 
182     /// Failed to convert Path to &str for the vDPA device.
183     CreateVdpaConvertPath,
184 
185     /// Failed to convert Path to &str for the virtio-vsock device.
186     CreateVsockConvertPath,
187 
188     /// Cannot create virtio-vsock backend
189     CreateVsockBackend(virtio_devices::vsock::VsockUnixError),
190 
191     /// Cannot create virtio-iommu device
192     CreateVirtioIommu(io::Error),
193 
194     /// Cannot create virtio-balloon device
195     CreateVirtioBalloon(io::Error),
196 
197     /// Cannot create virtio-watchdog device
198     CreateVirtioWatchdog(io::Error),
199 
200     /// Failed to parse disk image format
201     DetectImageType(io::Error),
202 
203     /// Cannot open qcow disk path
204     QcowDeviceCreate(qcow::Error),
205 
206     /// Cannot create serial manager
207     CreateSerialManager(SerialManagerError),
208 
209     /// Cannot spawn the serial manager thread
210     SpawnSerialManager(SerialManagerError),
211 
212     /// Cannot open tap interface
213     OpenTap(net_util::TapError),
214 
215     /// Cannot allocate IRQ.
216     AllocateIrq,
217 
218     /// Cannot configure the IRQ.
219     Irq(vmm_sys_util::errno::Error),
220 
221     /// Cannot allocate PCI BARs
222     AllocateBars(pci::PciDeviceError),
223 
224     /// Could not free the BARs associated with a PCI device.
225     FreePciBars(pci::PciDeviceError),
226 
227     /// Cannot register ioevent.
228     RegisterIoevent(anyhow::Error),
229 
230     /// Cannot unregister ioevent.
231     UnRegisterIoevent(anyhow::Error),
232 
233     /// Cannot create virtio device
234     VirtioDevice(vmm_sys_util::errno::Error),
235 
236     /// Cannot add PCI device
237     AddPciDevice(pci::PciRootError),
238 
239     /// Cannot open persistent memory file
240     PmemFileOpen(io::Error),
241 
242     /// Cannot set persistent memory file size
243     PmemFileSetLen(io::Error),
244 
245     /// Cannot find a memory range for persistent memory
246     PmemRangeAllocation,
247 
248     /// Cannot find a memory range for virtio-fs
249     FsRangeAllocation,
250 
251     /// Error creating serial output file
252     SerialOutputFileOpen(io::Error),
253 
254     /// Error creating console output file
255     ConsoleOutputFileOpen(io::Error),
256 
257     /// Error creating serial pty
258     SerialPtyOpen(io::Error),
259 
260     /// Error creating console pty
261     ConsolePtyOpen(io::Error),
262 
263     /// Error setting pty raw mode
264     SetPtyRaw(vmm_sys_util::errno::Error),
265 
266     /// Error getting pty peer
267     GetPtyPeer(vmm_sys_util::errno::Error),
268 
269     /// Cannot create a VFIO device
270     VfioCreate(vfio_ioctls::VfioError),
271 
272     /// Cannot create a VFIO PCI device
273     VfioPciCreate(pci::VfioPciError),
274 
275     /// Failed to map VFIO MMIO region.
276     VfioMapRegion(pci::VfioPciError),
277 
278     /// Failed to DMA map VFIO device.
279     VfioDmaMap(vfio_ioctls::VfioError),
280 
281     /// Failed to DMA unmap VFIO device.
282     VfioDmaUnmap(pci::VfioPciError),
283 
284     /// Failed to create the passthrough device.
285     CreatePassthroughDevice(anyhow::Error),
286 
287     /// Failed to memory map.
288     Mmap(io::Error),
289 
290     /// Cannot add legacy device to Bus.
291     BusError(vm_device::BusError),
292 
293     /// Failed to allocate IO port
294     AllocateIoPort,
295 
296     /// Failed to allocate MMIO address
297     AllocateMmioAddress,
298 
299     /// Failed to make hotplug notification
300     HotPlugNotification(io::Error),
301 
302     /// Error from a memory manager operation
303     MemoryManager(MemoryManagerError),
304 
305     /// Failed to create new interrupt source group.
306     CreateInterruptGroup(io::Error),
307 
308     /// Failed to update interrupt source group.
309     UpdateInterruptGroup(io::Error),
310 
311     /// Failed to create interrupt controller.
312     CreateInterruptController(interrupt_controller::Error),
313 
314     /// Failed to create a new MmapRegion instance.
315     NewMmapRegion(vm_memory::mmap::MmapRegionError),
316 
317     /// Failed to clone a File.
318     CloneFile(io::Error),
319 
320     /// Failed to create socket file
321     CreateSocketFile(io::Error),
322 
323     /// Failed to spawn the network backend
324     SpawnNetBackend(io::Error),
325 
326     /// Failed to spawn the block backend
327     SpawnBlockBackend(io::Error),
328 
329     /// Missing PCI bus.
330     NoPciBus,
331 
332     /// Could not find an available device name.
333     NoAvailableDeviceName,
334 
335     /// Missing PCI device.
336     MissingPciDevice,
337 
338     /// Failed to remove a PCI device from the PCI bus.
339     RemoveDeviceFromPciBus(pci::PciRootError),
340 
341     /// Failed to remove a bus device from the IO bus.
342     RemoveDeviceFromIoBus(vm_device::BusError),
343 
344     /// Failed to remove a bus device from the MMIO bus.
345     RemoveDeviceFromMmioBus(vm_device::BusError),
346 
347     /// Failed to find the device corresponding to a specific PCI b/d/f.
348     UnknownPciBdf(u32),
349 
350     /// Not allowed to remove this type of device from the VM.
351     RemovalNotAllowed(vm_virtio::VirtioDeviceType),
352 
353     /// Failed to find device corresponding to the given identifier.
354     UnknownDeviceId(String),
355 
356     /// Failed to find an available PCI device ID.
357     NextPciDeviceId(pci::PciRootError),
358 
359     /// Could not reserve the PCI device ID.
360     GetPciDeviceId(pci::PciRootError),
361 
362     /// Could not give the PCI device ID back.
363     PutPciDeviceId(pci::PciRootError),
364 
365     /// Incorrect device ID as it is already used by another device.
366     DeviceIdAlreadyInUse,
367 
368     /// No disk path was specified when one was expected
369     NoDiskPath,
370 
371     /// Failed to update guest memory for virtio device.
372     UpdateMemoryForVirtioDevice(virtio_devices::Error),
373 
374     /// Cannot create virtio-mem device
375     CreateVirtioMem(io::Error),
376 
377     /// Cannot generate a ResizeSender from the Resize object.
378     CreateResizeSender(virtio_devices::mem::Error),
379 
380     /// Cannot find a memory range for virtio-mem memory
381     VirtioMemRangeAllocation,
382 
383     /// Failed to update guest memory for VFIO PCI device.
384     UpdateMemoryForVfioPciDevice(vfio_ioctls::VfioError),
385 
386     /// Trying to use a directory for pmem but no size specified
387     PmemWithDirectorySizeMissing,
388 
389     /// Trying to use a size that is not multiple of 2MiB
390     PmemSizeNotAligned,
391 
392     /// Could not find the node in the device tree.
393     MissingNode,
394 
395     /// Resource was already found.
396     ResourceAlreadyExists,
397 
398     /// Expected resources for virtio-pci could not be found.
399     MissingVirtioPciResources,
400 
401     /// Expected resources for virtio-pmem could not be found.
402     MissingVirtioPmemResources,
403 
404     /// Missing PCI b/d/f from the DeviceNode.
405     MissingDeviceNodePciBdf,
406 
407     /// No support for device passthrough
408     NoDevicePassthroughSupport,
409 
410     /// Failed to resize virtio-balloon
411     VirtioBalloonResize(virtio_devices::balloon::Error),
412 
413     /// Missing virtio-balloon, can't proceed as expected.
414     MissingVirtioBalloon,
415 
416     /// Missing virtual IOMMU device
417     MissingVirtualIommu,
418 
419     /// Failed to do power button notification
420     PowerButtonNotification(io::Error),
421 
422     /// Failed to do AArch64 GPIO power button notification
423     #[cfg(target_arch = "aarch64")]
424     AArch64PowerButtonNotification(devices::legacy::GpioDeviceError),
425 
426     /// Failed to set O_DIRECT flag to file descriptor
427     SetDirectIo,
428 
429     /// Failed to create FixedVhdDiskAsync
430     CreateFixedVhdDiskAsync(io::Error),
431 
432     /// Failed to create FixedVhdDiskSync
433     CreateFixedVhdDiskSync(io::Error),
434 
435     /// Failed to create QcowDiskSync
436     CreateQcowDiskSync(qcow::Error),
437 
438     /// Failed to create FixedVhdxDiskSync
439     CreateFixedVhdxDiskSync(vhdx::vhdx::VhdxError),
440 
441     /// Failed to add DMA mapping handler to virtio-mem device.
442     AddDmaMappingHandlerVirtioMem(virtio_devices::mem::Error),
443 
444     /// Failed to remove DMA mapping handler from virtio-mem device.
445     RemoveDmaMappingHandlerVirtioMem(virtio_devices::mem::Error),
446 
447     /// Failed to create vfio-user client
448     VfioUserCreateClient(vfio_user::Error),
449 
450     /// Failed to create VFIO user device
451     VfioUserCreate(VfioUserPciDeviceError),
452 
453     /// Failed to map region from VFIO user device into guest
454     VfioUserMapRegion(VfioUserPciDeviceError),
455 
456     /// Failed to DMA map VFIO user device.
457     VfioUserDmaMap(VfioUserPciDeviceError),
458 
459     /// Failed to DMA unmap VFIO user device.
460     VfioUserDmaUnmap(VfioUserPciDeviceError),
461 
462     /// Failed to update memory mappings for VFIO user device
463     UpdateMemoryForVfioUserPciDevice(VfioUserPciDeviceError),
464 
465     /// Cannot duplicate file descriptor
466     DupFd(vmm_sys_util::errno::Error),
467 
468     /// Failed to DMA map virtio device.
469     VirtioDmaMap(std::io::Error),
470 
471     /// Failed to DMA unmap virtio device.
472     VirtioDmaUnmap(std::io::Error),
473 
474     /// Cannot hotplug device behind vIOMMU
475     InvalidIommuHotplug,
476 }
477 pub type DeviceManagerResult<T> = result::Result<T, DeviceManagerError>;
478 
479 const DEVICE_MANAGER_ACPI_SIZE: usize = 0x10;
480 
481 const TIOCSPTLCK: libc::c_int = 0x4004_5431;
482 const TIOCGTPEER: libc::c_int = 0x5441;
483 
484 pub fn create_pty(non_blocking: bool) -> io::Result<(File, File, PathBuf)> {
485     // Try to use /dev/pts/ptmx first then fall back to /dev/ptmx
486     // This is done to try and use the devpts filesystem that
487     // could be available for use in the process's namespace first.
488     // Ideally these are all the same file though but different
489     // kernels could have things setup differently.
490     // See https://www.kernel.org/doc/Documentation/filesystems/devpts.txt
491     // for further details.
492 
493     let custom_flags = libc::O_NOCTTY | if non_blocking { libc::O_NONBLOCK } else { 0 };
494     let main = match OpenOptions::new()
495         .read(true)
496         .write(true)
497         .custom_flags(custom_flags)
498         .open("/dev/pts/ptmx")
499     {
500         Ok(f) => f,
501         _ => OpenOptions::new()
502             .read(true)
503             .write(true)
504             .custom_flags(custom_flags)
505             .open("/dev/ptmx")?,
506     };
507     let mut unlock: libc::c_ulong = 0;
508     // SAFETY: FFI call into libc, trivially safe
509     unsafe {
510         libc::ioctl(
511             main.as_raw_fd(),
512             TIOCSPTLCK.try_into().unwrap(),
513             &mut unlock,
514         )
515     };
516 
517     // SAFETY: FFI call into libc, trivally safe
518     let sub_fd = unsafe {
519         libc::ioctl(
520             main.as_raw_fd(),
521             TIOCGTPEER.try_into().unwrap(),
522             libc::O_NOCTTY | libc::O_RDWR,
523         )
524     };
525     if sub_fd == -1 {
526         return vmm_sys_util::errno::errno_result().map_err(|e| e.into());
527     }
528 
529     let proc_path = PathBuf::from(format!("/proc/self/fd/{}", sub_fd));
530     let path = read_link(proc_path)?;
531 
532     // SAFETY: sub_fd is checked to be valid before being wrapped in File
533     Ok((main, unsafe { File::from_raw_fd(sub_fd) }, path))
534 }
535 
536 #[derive(Default)]
537 pub struct Console {
538     console_resizer: Option<Arc<virtio_devices::ConsoleResizer>>,
539 }
540 
541 impl Console {
542     pub fn update_console_size(&self) {
543         if let Some(resizer) = self.console_resizer.as_ref() {
544             resizer.update_console_size()
545         }
546     }
547 }
548 
549 pub(crate) struct AddressManager {
550     pub(crate) allocator: Arc<Mutex<SystemAllocator>>,
551     #[cfg(target_arch = "x86_64")]
552     pub(crate) io_bus: Arc<Bus>,
553     pub(crate) mmio_bus: Arc<Bus>,
554     vm: Arc<dyn hypervisor::Vm>,
555     device_tree: Arc<Mutex<DeviceTree>>,
556     pci_mmio_allocators: Vec<Arc<Mutex<AddressAllocator>>>,
557 }
558 
559 impl DeviceRelocation for AddressManager {
560     fn move_bar(
561         &self,
562         old_base: u64,
563         new_base: u64,
564         len: u64,
565         pci_dev: &mut dyn PciDevice,
566         region_type: PciBarRegionType,
567     ) -> std::result::Result<(), std::io::Error> {
568         match region_type {
569             PciBarRegionType::IoRegion => {
570                 #[cfg(target_arch = "x86_64")]
571                 {
572                     // Update system allocator
573                     self.allocator
574                         .lock()
575                         .unwrap()
576                         .free_io_addresses(GuestAddress(old_base), len as GuestUsize);
577 
578                     self.allocator
579                         .lock()
580                         .unwrap()
581                         .allocate_io_addresses(
582                             Some(GuestAddress(new_base)),
583                             len as GuestUsize,
584                             None,
585                         )
586                         .ok_or_else(|| {
587                             io::Error::new(io::ErrorKind::Other, "failed allocating new IO range")
588                         })?;
589 
590                     // Update PIO bus
591                     self.io_bus
592                         .update_range(old_base, len, new_base, len)
593                         .map_err(|e| io::Error::new(io::ErrorKind::Other, e))?;
594                 }
595                 #[cfg(target_arch = "aarch64")]
596                 error!("I/O region is not supported");
597             }
598             PciBarRegionType::Memory32BitRegion | PciBarRegionType::Memory64BitRegion => {
599                 // Update system allocator
600                 if region_type == PciBarRegionType::Memory32BitRegion {
601                     self.allocator
602                         .lock()
603                         .unwrap()
604                         .free_mmio_hole_addresses(GuestAddress(old_base), len as GuestUsize);
605 
606                     self.allocator
607                         .lock()
608                         .unwrap()
609                         .allocate_mmio_hole_addresses(
610                             Some(GuestAddress(new_base)),
611                             len as GuestUsize,
612                             Some(len),
613                         )
614                         .ok_or_else(|| {
615                             io::Error::new(
616                                 io::ErrorKind::Other,
617                                 "failed allocating new 32 bits MMIO range",
618                             )
619                         })?;
620                 } else {
621                     // Find the specific allocator that this BAR was allocated from and use it for new one
622                     for allocator in &self.pci_mmio_allocators {
623                         let allocator_base = allocator.lock().unwrap().base();
624                         let allocator_end = allocator.lock().unwrap().end();
625 
626                         if old_base >= allocator_base.0 && old_base <= allocator_end.0 {
627                             allocator
628                                 .lock()
629                                 .unwrap()
630                                 .free(GuestAddress(old_base), len as GuestUsize);
631 
632                             allocator
633                                 .lock()
634                                 .unwrap()
635                                 .allocate(
636                                     Some(GuestAddress(new_base)),
637                                     len as GuestUsize,
638                                     Some(len),
639                                 )
640                                 .ok_or_else(|| {
641                                     io::Error::new(
642                                         io::ErrorKind::Other,
643                                         "failed allocating new 64 bits MMIO range",
644                                     )
645                                 })?;
646 
647                             break;
648                         }
649                     }
650                 }
651 
652                 // Update MMIO bus
653                 self.mmio_bus
654                     .update_range(old_base, len, new_base, len)
655                     .map_err(|e| io::Error::new(io::ErrorKind::Other, e))?;
656             }
657         }
658 
659         let any_dev = pci_dev.as_any();
660         if let Some(virtio_pci_dev) = any_dev.downcast_ref::<VirtioPciDevice>() {
661             // Update the device_tree resources associated with the device
662             if let Some(node) = self
663                 .device_tree
664                 .lock()
665                 .unwrap()
666                 .get_mut(&virtio_pci_dev.id())
667             {
668                 let mut resource_updated = false;
669                 for resource in node.resources.iter_mut() {
670                     if let Resource::MmioAddressRange { base, .. } = resource {
671                         if *base == old_base {
672                             *base = new_base;
673                             resource_updated = true;
674                             break;
675                         }
676                     }
677                 }
678 
679                 if !resource_updated {
680                     return Err(io::Error::new(
681                         io::ErrorKind::Other,
682                         format!(
683                             "Couldn't find a resource with base 0x{:x} for device {}",
684                             old_base,
685                             virtio_pci_dev.id()
686                         ),
687                     ));
688                 }
689             } else {
690                 return Err(io::Error::new(
691                     io::ErrorKind::Other,
692                     format!(
693                         "Couldn't find device {} from device tree",
694                         virtio_pci_dev.id()
695                     ),
696                 ));
697             }
698 
699             let bar_addr = virtio_pci_dev.config_bar_addr();
700             if bar_addr == new_base {
701                 for (event, addr) in virtio_pci_dev.ioeventfds(old_base) {
702                     let io_addr = IoEventAddress::Mmio(addr);
703                     self.vm.unregister_ioevent(event, &io_addr).map_err(|e| {
704                         io::Error::new(
705                             io::ErrorKind::Other,
706                             format!("failed to unregister ioevent: {:?}", e),
707                         )
708                     })?;
709                 }
710                 for (event, addr) in virtio_pci_dev.ioeventfds(new_base) {
711                     let io_addr = IoEventAddress::Mmio(addr);
712                     self.vm
713                         .register_ioevent(event, &io_addr, None)
714                         .map_err(|e| {
715                             io::Error::new(
716                                 io::ErrorKind::Other,
717                                 format!("failed to register ioevent: {:?}", e),
718                             )
719                         })?;
720                 }
721             } else {
722                 let virtio_dev = virtio_pci_dev.virtio_device();
723                 let mut virtio_dev = virtio_dev.lock().unwrap();
724                 if let Some(mut shm_regions) = virtio_dev.get_shm_regions() {
725                     if shm_regions.addr.raw_value() == old_base {
726                         let mem_region = self.vm.make_user_memory_region(
727                             shm_regions.mem_slot,
728                             old_base,
729                             shm_regions.len,
730                             shm_regions.host_addr,
731                             false,
732                             false,
733                         );
734 
735                         self.vm.remove_user_memory_region(mem_region).map_err(|e| {
736                             io::Error::new(
737                                 io::ErrorKind::Other,
738                                 format!("failed to remove user memory region: {:?}", e),
739                             )
740                         })?;
741 
742                         // Create new mapping by inserting new region to KVM.
743                         let mem_region = self.vm.make_user_memory_region(
744                             shm_regions.mem_slot,
745                             new_base,
746                             shm_regions.len,
747                             shm_regions.host_addr,
748                             false,
749                             false,
750                         );
751 
752                         self.vm.create_user_memory_region(mem_region).map_err(|e| {
753                             io::Error::new(
754                                 io::ErrorKind::Other,
755                                 format!("failed to create user memory regions: {:?}", e),
756                             )
757                         })?;
758 
759                         // Update shared memory regions to reflect the new mapping.
760                         shm_regions.addr = GuestAddress(new_base);
761                         virtio_dev.set_shm_regions(shm_regions).map_err(|e| {
762                             io::Error::new(
763                                 io::ErrorKind::Other,
764                                 format!("failed to update shared memory regions: {:?}", e),
765                             )
766                         })?;
767                     }
768                 }
769             }
770         }
771 
772         pci_dev.move_bar(old_base, new_base)
773     }
774 }
775 
776 #[derive(Serialize, Deserialize)]
777 struct DeviceManagerState {
778     device_tree: DeviceTree,
779     device_id_cnt: Wrapping<usize>,
780 }
781 
782 #[derive(Debug)]
783 pub struct PtyPair {
784     pub main: File,
785     pub sub: File,
786     pub path: PathBuf,
787 }
788 
789 impl Clone for PtyPair {
790     fn clone(&self) -> Self {
791         PtyPair {
792             main: self.main.try_clone().unwrap(),
793             sub: self.sub.try_clone().unwrap(),
794             path: self.path.clone(),
795         }
796     }
797 }
798 
799 #[derive(Clone)]
800 pub enum PciDeviceHandle {
801     Vfio(Arc<Mutex<VfioPciDevice>>),
802     Virtio(Arc<Mutex<VirtioPciDevice>>),
803     VfioUser(Arc<Mutex<VfioUserPciDevice>>),
804 }
805 
806 #[derive(Clone)]
807 struct MetaVirtioDevice {
808     virtio_device: Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
809     iommu: bool,
810     id: String,
811     pci_segment: u16,
812     dma_handler: Option<Arc<dyn ExternalDmaMapping>>,
813 }
814 
815 pub struct DeviceManager {
816     // Manage address space related to devices
817     address_manager: Arc<AddressManager>,
818 
819     // Console abstraction
820     console: Arc<Console>,
821 
822     // console PTY
823     console_pty: Option<Arc<Mutex<PtyPair>>>,
824 
825     // serial PTY
826     serial_pty: Option<Arc<Mutex<PtyPair>>>,
827 
828     // Serial Manager
829     serial_manager: Option<Arc<SerialManager>>,
830 
831     // pty foreground status,
832     console_resize_pipe: Option<Arc<File>>,
833 
834     // Interrupt controller
835     #[cfg(target_arch = "x86_64")]
836     interrupt_controller: Option<Arc<Mutex<ioapic::Ioapic>>>,
837     #[cfg(target_arch = "aarch64")]
838     interrupt_controller: Option<Arc<Mutex<gic::Gic>>>,
839 
840     // Things to be added to the commandline (i.e. for virtio-mmio)
841     cmdline_additions: Vec<String>,
842 
843     // ACPI GED notification device
844     ged_notification_device: Option<Arc<Mutex<devices::AcpiGedDevice>>>,
845 
846     // VM configuration
847     config: Arc<Mutex<VmConfig>>,
848 
849     // Memory Manager
850     memory_manager: Arc<Mutex<MemoryManager>>,
851 
852     // The virtio devices on the system
853     virtio_devices: Vec<MetaVirtioDevice>,
854 
855     // List of bus devices
856     // Let the DeviceManager keep strong references to the BusDevice devices.
857     // This allows the IO and MMIO buses to be provided with Weak references,
858     // which prevents cyclic dependencies.
859     bus_devices: Vec<Arc<Mutex<dyn BusDevice>>>,
860 
861     // Counter to keep track of the consumed device IDs.
862     device_id_cnt: Wrapping<usize>,
863 
864     pci_segments: Vec<PciSegment>,
865 
866     #[cfg_attr(target_arch = "aarch64", allow(dead_code))]
867     // MSI Interrupt Manager
868     msi_interrupt_manager: Arc<dyn InterruptManager<GroupConfig = MsiIrqGroupConfig>>,
869 
870     #[cfg_attr(feature = "mshv", allow(dead_code))]
871     // Legacy Interrupt Manager
872     legacy_interrupt_manager: Option<Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>>,
873 
874     // Passthrough device handle
875     passthrough_device: Option<Arc<dyn hypervisor::Device>>,
876 
877     // VFIO container
878     // Only one container can be created, therefore it is stored as part of the
879     // DeviceManager to be reused.
880     vfio_container: Option<Arc<VfioContainer>>,
881 
882     // Paravirtualized IOMMU
883     iommu_device: Option<Arc<Mutex<virtio_devices::Iommu>>>,
884     iommu_mapping: Option<Arc<IommuMapping>>,
885 
886     // PCI information about devices attached to the paravirtualized IOMMU
887     // It contains the virtual IOMMU PCI BDF along with the list of PCI BDF
888     // representing the devices attached to the virtual IOMMU. This is useful
889     // information for filling the ACPI VIOT table.
890     iommu_attached_devices: Option<(PciBdf, Vec<PciBdf>)>,
891 
892     // Tree of devices, representing the dependencies between devices.
893     // Useful for introspection, snapshot and restore.
894     device_tree: Arc<Mutex<DeviceTree>>,
895 
896     // Exit event
897     exit_evt: EventFd,
898     reset_evt: EventFd,
899 
900     #[cfg(target_arch = "aarch64")]
901     id_to_dev_info: HashMap<(DeviceType, String), MmioDeviceInfo>,
902 
903     // seccomp action
904     seccomp_action: SeccompAction,
905 
906     // List of guest NUMA nodes.
907     numa_nodes: NumaNodes,
908 
909     // Possible handle to the virtio-balloon device
910     balloon: Option<Arc<Mutex<virtio_devices::Balloon>>>,
911 
912     // Virtio Device activation EventFd to allow the VMM thread to trigger device
913     // activation and thus start the threads from the VMM thread
914     activate_evt: EventFd,
915 
916     acpi_address: GuestAddress,
917 
918     selected_segment: usize,
919 
920     // Possible handle to the virtio-mem device
921     virtio_mem_devices: Vec<Arc<Mutex<virtio_devices::Mem>>>,
922 
923     #[cfg(target_arch = "aarch64")]
924     // GPIO device for AArch64
925     gpio_device: Option<Arc<Mutex<devices::legacy::Gpio>>>,
926 
927     // Flag to force setting the iommu on virtio devices
928     force_iommu: bool,
929 
930     // Helps identify if the VM is currently being restored
931     restoring: bool,
932 
933     // io_uring availability if detected
934     io_uring_supported: Option<bool>,
935 }
936 
937 impl DeviceManager {
938     #[allow(clippy::too_many_arguments)]
939     pub fn new(
940         vm: Arc<dyn hypervisor::Vm>,
941         config: Arc<Mutex<VmConfig>>,
942         memory_manager: Arc<Mutex<MemoryManager>>,
943         exit_evt: &EventFd,
944         reset_evt: &EventFd,
945         seccomp_action: SeccompAction,
946         numa_nodes: NumaNodes,
947         activate_evt: &EventFd,
948         force_iommu: bool,
949         restoring: bool,
950     ) -> DeviceManagerResult<Arc<Mutex<Self>>> {
951         let device_tree = Arc::new(Mutex::new(DeviceTree::new()));
952 
953         let num_pci_segments =
954             if let Some(platform_config) = config.lock().unwrap().platform.as_ref() {
955                 platform_config.num_pci_segments
956             } else {
957                 1
958             };
959 
960         let start_of_device_area = memory_manager.lock().unwrap().start_of_device_area().0;
961         let end_of_device_area = memory_manager.lock().unwrap().end_of_device_area().0;
962 
963         // Start each PCI segment range on a 4GiB boundary
964         let pci_segment_size = (end_of_device_area - start_of_device_area + 1)
965             / ((4 << 30) * num_pci_segments as u64)
966             * (4 << 30);
967 
968         let mut pci_mmio_allocators = vec![];
969         for i in 0..num_pci_segments as u64 {
970             let mmio_start = start_of_device_area + i * pci_segment_size;
971             let allocator = Arc::new(Mutex::new(
972                 AddressAllocator::new(GuestAddress(mmio_start), pci_segment_size).unwrap(),
973             ));
974             pci_mmio_allocators.push(allocator)
975         }
976 
977         let address_manager = Arc::new(AddressManager {
978             allocator: memory_manager.lock().unwrap().allocator(),
979             #[cfg(target_arch = "x86_64")]
980             io_bus: Arc::new(Bus::new()),
981             mmio_bus: Arc::new(Bus::new()),
982             vm: vm.clone(),
983             device_tree: Arc::clone(&device_tree),
984             pci_mmio_allocators,
985         });
986 
987         // First we create the MSI interrupt manager, the legacy one is created
988         // later, after the IOAPIC device creation.
989         // The reason we create the MSI one first is because the IOAPIC needs it,
990         // and then the legacy interrupt manager needs an IOAPIC. So we're
991         // handling a linear dependency chain:
992         // msi_interrupt_manager <- IOAPIC <- legacy_interrupt_manager.
993         let msi_interrupt_manager: Arc<dyn InterruptManager<GroupConfig = MsiIrqGroupConfig>> =
994             Arc::new(MsiInterruptManager::new(
995                 Arc::clone(&address_manager.allocator),
996                 vm,
997             ));
998 
999         let acpi_address = address_manager
1000             .allocator
1001             .lock()
1002             .unwrap()
1003             .allocate_platform_mmio_addresses(None, DEVICE_MANAGER_ACPI_SIZE as u64, None)
1004             .ok_or(DeviceManagerError::AllocateIoPort)?;
1005 
1006         let mut pci_irq_slots = [0; 32];
1007         PciSegment::reserve_legacy_interrupts_for_pci_devices(
1008             &address_manager,
1009             &mut pci_irq_slots,
1010         )?;
1011 
1012         let mut pci_segments = vec![PciSegment::new_default_segment(
1013             &address_manager,
1014             Arc::clone(&address_manager.pci_mmio_allocators[0]),
1015             &pci_irq_slots,
1016         )?];
1017 
1018         for i in 1..num_pci_segments as usize {
1019             pci_segments.push(PciSegment::new(
1020                 i as u16,
1021                 &address_manager,
1022                 Arc::clone(&address_manager.pci_mmio_allocators[i]),
1023                 &pci_irq_slots,
1024             )?);
1025         }
1026 
1027         let device_manager = DeviceManager {
1028             address_manager: Arc::clone(&address_manager),
1029             console: Arc::new(Console::default()),
1030             interrupt_controller: None,
1031             cmdline_additions: Vec::new(),
1032 
1033             ged_notification_device: None,
1034             config,
1035             memory_manager,
1036             virtio_devices: Vec::new(),
1037             bus_devices: Vec::new(),
1038             device_id_cnt: Wrapping(0),
1039             msi_interrupt_manager,
1040             legacy_interrupt_manager: None,
1041             passthrough_device: None,
1042             vfio_container: None,
1043             iommu_device: None,
1044             iommu_mapping: None,
1045             iommu_attached_devices: None,
1046             pci_segments,
1047             device_tree,
1048             exit_evt: exit_evt.try_clone().map_err(DeviceManagerError::EventFd)?,
1049             reset_evt: reset_evt.try_clone().map_err(DeviceManagerError::EventFd)?,
1050             #[cfg(target_arch = "aarch64")]
1051             id_to_dev_info: HashMap::new(),
1052             seccomp_action,
1053 
1054             numa_nodes,
1055             balloon: None,
1056             activate_evt: activate_evt
1057                 .try_clone()
1058                 .map_err(DeviceManagerError::EventFd)?,
1059 
1060             acpi_address,
1061 
1062             selected_segment: 0,
1063             serial_pty: None,
1064             serial_manager: None,
1065             console_pty: None,
1066             console_resize_pipe: None,
1067             virtio_mem_devices: Vec::new(),
1068             #[cfg(target_arch = "aarch64")]
1069             gpio_device: None,
1070             force_iommu,
1071             restoring,
1072             io_uring_supported: None,
1073         };
1074 
1075         let device_manager = Arc::new(Mutex::new(device_manager));
1076 
1077         address_manager
1078             .mmio_bus
1079             .insert(
1080                 Arc::clone(&device_manager) as Arc<Mutex<dyn BusDevice>>,
1081                 acpi_address.0,
1082                 DEVICE_MANAGER_ACPI_SIZE as u64,
1083             )
1084             .map_err(DeviceManagerError::BusError)?;
1085 
1086         Ok(device_manager)
1087     }
1088 
1089     pub fn serial_pty(&self) -> Option<PtyPair> {
1090         self.serial_pty
1091             .as_ref()
1092             .map(|pty| pty.lock().unwrap().clone())
1093     }
1094 
1095     pub fn console_pty(&self) -> Option<PtyPair> {
1096         self.console_pty
1097             .as_ref()
1098             .map(|pty| pty.lock().unwrap().clone())
1099     }
1100 
1101     pub fn console_resize_pipe(&self) -> Option<Arc<File>> {
1102         self.console_resize_pipe.as_ref().map(Arc::clone)
1103     }
1104 
1105     pub fn create_devices(
1106         &mut self,
1107         serial_pty: Option<PtyPair>,
1108         console_pty: Option<PtyPair>,
1109         console_resize_pipe: Option<File>,
1110     ) -> DeviceManagerResult<()> {
1111         let mut virtio_devices: Vec<MetaVirtioDevice> = Vec::new();
1112 
1113         let interrupt_controller = self.add_interrupt_controller()?;
1114 
1115         // Now we can create the legacy interrupt manager, which needs the freshly
1116         // formed IOAPIC device.
1117         let legacy_interrupt_manager: Arc<
1118             dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>,
1119         > = Arc::new(LegacyUserspaceInterruptManager::new(Arc::clone(
1120             &interrupt_controller,
1121         )));
1122 
1123         {
1124             if let Some(acpi_address) = self.memory_manager.lock().unwrap().acpi_address() {
1125                 self.address_manager
1126                     .mmio_bus
1127                     .insert(
1128                         Arc::clone(&self.memory_manager) as Arc<Mutex<dyn BusDevice>>,
1129                         acpi_address.0,
1130                         MEMORY_MANAGER_ACPI_SIZE as u64,
1131                     )
1132                     .map_err(DeviceManagerError::BusError)?;
1133             }
1134         }
1135 
1136         #[cfg(target_arch = "x86_64")]
1137         self.add_legacy_devices(
1138             self.reset_evt
1139                 .try_clone()
1140                 .map_err(DeviceManagerError::EventFd)?,
1141         )?;
1142 
1143         #[cfg(target_arch = "aarch64")]
1144         self.add_legacy_devices(&legacy_interrupt_manager)?;
1145 
1146         {
1147             self.ged_notification_device = self.add_acpi_devices(
1148                 &legacy_interrupt_manager,
1149                 self.reset_evt
1150                     .try_clone()
1151                     .map_err(DeviceManagerError::EventFd)?,
1152                 self.exit_evt
1153                     .try_clone()
1154                     .map_err(DeviceManagerError::EventFd)?,
1155             )?;
1156         }
1157 
1158         self.console = self.add_console_device(
1159             &legacy_interrupt_manager,
1160             &mut virtio_devices,
1161             serial_pty,
1162             console_pty,
1163             console_resize_pipe,
1164         )?;
1165 
1166         self.legacy_interrupt_manager = Some(legacy_interrupt_manager);
1167 
1168         virtio_devices.append(&mut self.make_virtio_devices()?);
1169 
1170         self.add_pci_devices(virtio_devices.clone())?;
1171 
1172         self.virtio_devices = virtio_devices;
1173 
1174         Ok(())
1175     }
1176 
1177     fn state(&self) -> DeviceManagerState {
1178         DeviceManagerState {
1179             device_tree: self.device_tree.lock().unwrap().clone(),
1180             device_id_cnt: self.device_id_cnt,
1181         }
1182     }
1183 
1184     fn set_state(&mut self, state: &DeviceManagerState) {
1185         *self.device_tree.lock().unwrap() = state.device_tree.clone();
1186         self.device_id_cnt = state.device_id_cnt;
1187     }
1188 
1189     fn get_msi_iova_space(&mut self) -> (u64, u64) {
1190         #[cfg(target_arch = "aarch64")]
1191         {
1192             let vcpus = self.config.lock().unwrap().cpus.boot_vcpus;
1193             let msi_start = arch::layout::GIC_V3_DIST_START.raw_value()
1194                 - arch::layout::GIC_V3_REDIST_SIZE * (vcpus as u64)
1195                 - arch::layout::GIC_V3_ITS_SIZE;
1196             let msi_end = msi_start + arch::layout::GIC_V3_ITS_SIZE - 1;
1197             (msi_start, msi_end)
1198         }
1199         #[cfg(target_arch = "x86_64")]
1200         (0xfee0_0000, 0xfeef_ffff)
1201     }
1202 
1203     #[cfg(target_arch = "aarch64")]
1204     /// Gets the information of the devices registered up to some point in time.
1205     pub fn get_device_info(&self) -> &HashMap<(DeviceType, String), MmioDeviceInfo> {
1206         &self.id_to_dev_info
1207     }
1208 
1209     #[allow(unused_variables)]
1210     fn add_pci_devices(
1211         &mut self,
1212         virtio_devices: Vec<MetaVirtioDevice>,
1213     ) -> DeviceManagerResult<()> {
1214         let iommu_id = String::from(IOMMU_DEVICE_NAME);
1215 
1216         let iommu_device = if self.config.lock().unwrap().iommu {
1217             let (device, mapping) = virtio_devices::Iommu::new(
1218                 iommu_id.clone(),
1219                 self.seccomp_action.clone(),
1220                 self.exit_evt
1221                     .try_clone()
1222                     .map_err(DeviceManagerError::EventFd)?,
1223                 self.get_msi_iova_space(),
1224             )
1225             .map_err(DeviceManagerError::CreateVirtioIommu)?;
1226             let device = Arc::new(Mutex::new(device));
1227             self.iommu_device = Some(Arc::clone(&device));
1228             self.iommu_mapping = Some(mapping);
1229 
1230             // Fill the device tree with a new node. In case of restore, we
1231             // know there is nothing to do, so we can simply override the
1232             // existing entry.
1233             self.device_tree
1234                 .lock()
1235                 .unwrap()
1236                 .insert(iommu_id.clone(), device_node!(iommu_id, device));
1237 
1238             Some(device)
1239         } else {
1240             None
1241         };
1242 
1243         let mut iommu_attached_devices = Vec::new();
1244         {
1245             for handle in virtio_devices {
1246                 let mapping: Option<Arc<IommuMapping>> = if handle.iommu {
1247                     self.iommu_mapping.clone()
1248                 } else {
1249                     None
1250                 };
1251 
1252                 let dev_id = self.add_virtio_pci_device(
1253                     handle.virtio_device,
1254                     &mapping,
1255                     handle.id,
1256                     handle.pci_segment,
1257                     handle.dma_handler,
1258                 )?;
1259 
1260                 if handle.iommu {
1261                     iommu_attached_devices.push(dev_id);
1262                 }
1263             }
1264 
1265             let mut vfio_iommu_device_ids = self.add_vfio_devices()?;
1266             iommu_attached_devices.append(&mut vfio_iommu_device_ids);
1267 
1268             let mut vfio_user_iommu_device_ids = self.add_user_devices()?;
1269             iommu_attached_devices.append(&mut vfio_user_iommu_device_ids);
1270 
1271             // Add all devices from forced iommu segments
1272             if let Some(platform_config) = self.config.lock().unwrap().platform.as_ref() {
1273                 if let Some(iommu_segments) = platform_config.iommu_segments.as_ref() {
1274                     for segment in iommu_segments {
1275                         for device in 0..32 {
1276                             let bdf = PciBdf::new(*segment, 0, device, 0);
1277                             if !iommu_attached_devices.contains(&bdf) {
1278                                 iommu_attached_devices.push(bdf);
1279                             }
1280                         }
1281                     }
1282                 }
1283             }
1284 
1285             if let Some(iommu_device) = iommu_device {
1286                 let dev_id = self.add_virtio_pci_device(iommu_device, &None, iommu_id, 0, None)?;
1287                 self.iommu_attached_devices = Some((dev_id, iommu_attached_devices));
1288             }
1289         }
1290 
1291         for segment in &self.pci_segments {
1292             #[cfg(target_arch = "x86_64")]
1293             if let Some(pci_config_io) = segment.pci_config_io.as_ref() {
1294                 self.bus_devices
1295                     .push(Arc::clone(pci_config_io) as Arc<Mutex<dyn BusDevice>>);
1296             }
1297 
1298             self.bus_devices
1299                 .push(Arc::clone(&segment.pci_config_mmio) as Arc<Mutex<dyn BusDevice>>);
1300         }
1301 
1302         Ok(())
1303     }
1304 
1305     #[cfg(target_arch = "aarch64")]
1306     fn add_interrupt_controller(
1307         &mut self,
1308     ) -> DeviceManagerResult<Arc<Mutex<dyn InterruptController>>> {
1309         let interrupt_controller: Arc<Mutex<gic::Gic>> = Arc::new(Mutex::new(
1310             gic::Gic::new(
1311                 self.config.lock().unwrap().cpus.boot_vcpus,
1312                 Arc::clone(&self.msi_interrupt_manager),
1313             )
1314             .map_err(DeviceManagerError::CreateInterruptController)?,
1315         ));
1316 
1317         self.interrupt_controller = Some(interrupt_controller.clone());
1318 
1319         // Unlike x86_64, the "interrupt_controller" here for AArch64 is only
1320         // a `Gic` object that implements the `InterruptController` to provide
1321         // interrupt delivery service. This is not the real GIC device so that
1322         // we do not need to insert it to the device tree.
1323 
1324         Ok(interrupt_controller)
1325     }
1326 
1327     #[cfg(target_arch = "aarch64")]
1328     pub fn get_interrupt_controller(&mut self) -> Option<&Arc<Mutex<gic::Gic>>> {
1329         self.interrupt_controller.as_ref()
1330     }
1331 
1332     #[cfg(target_arch = "x86_64")]
1333     fn add_interrupt_controller(
1334         &mut self,
1335     ) -> DeviceManagerResult<Arc<Mutex<dyn InterruptController>>> {
1336         let id = String::from(IOAPIC_DEVICE_NAME);
1337 
1338         // Create IOAPIC
1339         let interrupt_controller = Arc::new(Mutex::new(
1340             ioapic::Ioapic::new(
1341                 id.clone(),
1342                 APIC_START,
1343                 Arc::clone(&self.msi_interrupt_manager),
1344             )
1345             .map_err(DeviceManagerError::CreateInterruptController)?,
1346         ));
1347 
1348         self.interrupt_controller = Some(interrupt_controller.clone());
1349 
1350         self.address_manager
1351             .mmio_bus
1352             .insert(interrupt_controller.clone(), IOAPIC_START.0, IOAPIC_SIZE)
1353             .map_err(DeviceManagerError::BusError)?;
1354 
1355         self.bus_devices
1356             .push(Arc::clone(&interrupt_controller) as Arc<Mutex<dyn BusDevice>>);
1357 
1358         // Fill the device tree with a new node. In case of restore, we
1359         // know there is nothing to do, so we can simply override the
1360         // existing entry.
1361         self.device_tree
1362             .lock()
1363             .unwrap()
1364             .insert(id.clone(), device_node!(id, interrupt_controller));
1365 
1366         Ok(interrupt_controller)
1367     }
1368 
1369     fn add_acpi_devices(
1370         &mut self,
1371         interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>,
1372         reset_evt: EventFd,
1373         exit_evt: EventFd,
1374     ) -> DeviceManagerResult<Option<Arc<Mutex<devices::AcpiGedDevice>>>> {
1375         let shutdown_device = Arc::new(Mutex::new(devices::AcpiShutdownDevice::new(
1376             exit_evt, reset_evt,
1377         )));
1378 
1379         self.bus_devices
1380             .push(Arc::clone(&shutdown_device) as Arc<Mutex<dyn BusDevice>>);
1381 
1382         #[cfg(target_arch = "x86_64")]
1383         {
1384             self.address_manager
1385                 .allocator
1386                 .lock()
1387                 .unwrap()
1388                 .allocate_io_addresses(Some(GuestAddress(0x3c0)), 0x8, None)
1389                 .ok_or(DeviceManagerError::AllocateIoPort)?;
1390 
1391             self.address_manager
1392                 .io_bus
1393                 .insert(shutdown_device, 0x3c0, 0x4)
1394                 .map_err(DeviceManagerError::BusError)?;
1395         }
1396 
1397         let ged_irq = self
1398             .address_manager
1399             .allocator
1400             .lock()
1401             .unwrap()
1402             .allocate_irq()
1403             .unwrap();
1404         let interrupt_group = interrupt_manager
1405             .create_group(LegacyIrqGroupConfig {
1406                 irq: ged_irq as InterruptIndex,
1407             })
1408             .map_err(DeviceManagerError::CreateInterruptGroup)?;
1409         let ged_address = self
1410             .address_manager
1411             .allocator
1412             .lock()
1413             .unwrap()
1414             .allocate_platform_mmio_addresses(
1415                 None,
1416                 devices::acpi::GED_DEVICE_ACPI_SIZE as u64,
1417                 None,
1418             )
1419             .ok_or(DeviceManagerError::AllocateMmioAddress)?;
1420         let ged_device = Arc::new(Mutex::new(devices::AcpiGedDevice::new(
1421             interrupt_group,
1422             ged_irq,
1423             ged_address,
1424         )));
1425         self.address_manager
1426             .mmio_bus
1427             .insert(
1428                 ged_device.clone(),
1429                 ged_address.0,
1430                 devices::acpi::GED_DEVICE_ACPI_SIZE as u64,
1431             )
1432             .map_err(DeviceManagerError::BusError)?;
1433         self.bus_devices
1434             .push(Arc::clone(&ged_device) as Arc<Mutex<dyn BusDevice>>);
1435 
1436         let pm_timer_device = Arc::new(Mutex::new(devices::AcpiPmTimerDevice::new()));
1437 
1438         self.bus_devices
1439             .push(Arc::clone(&pm_timer_device) as Arc<Mutex<dyn BusDevice>>);
1440 
1441         #[cfg(target_arch = "x86_64")]
1442         {
1443             self.address_manager
1444                 .allocator
1445                 .lock()
1446                 .unwrap()
1447                 .allocate_io_addresses(Some(GuestAddress(0xb008)), 0x4, None)
1448                 .ok_or(DeviceManagerError::AllocateIoPort)?;
1449 
1450             self.address_manager
1451                 .io_bus
1452                 .insert(pm_timer_device, 0xb008, 0x4)
1453                 .map_err(DeviceManagerError::BusError)?;
1454         }
1455 
1456         Ok(Some(ged_device))
1457     }
1458 
1459     #[cfg(target_arch = "x86_64")]
1460     fn add_legacy_devices(&mut self, reset_evt: EventFd) -> DeviceManagerResult<()> {
1461         // Add a shutdown device (i8042)
1462         let i8042 = Arc::new(Mutex::new(devices::legacy::I8042Device::new(
1463             reset_evt.try_clone().unwrap(),
1464         )));
1465 
1466         self.bus_devices
1467             .push(Arc::clone(&i8042) as Arc<Mutex<dyn BusDevice>>);
1468 
1469         self.address_manager
1470             .io_bus
1471             .insert(i8042, 0x61, 0x4)
1472             .map_err(DeviceManagerError::BusError)?;
1473         {
1474             // Add a CMOS emulated device
1475             let mem_size = self
1476                 .memory_manager
1477                 .lock()
1478                 .unwrap()
1479                 .guest_memory()
1480                 .memory()
1481                 .last_addr()
1482                 .0
1483                 + 1;
1484             let mem_below_4g = std::cmp::min(arch::layout::MEM_32BIT_RESERVED_START.0, mem_size);
1485             let mem_above_4g = mem_size.saturating_sub(arch::layout::RAM_64BIT_START.0);
1486 
1487             let cmos = Arc::new(Mutex::new(devices::legacy::Cmos::new(
1488                 mem_below_4g,
1489                 mem_above_4g,
1490                 reset_evt,
1491             )));
1492 
1493             self.bus_devices
1494                 .push(Arc::clone(&cmos) as Arc<Mutex<dyn BusDevice>>);
1495 
1496             self.address_manager
1497                 .io_bus
1498                 .insert(cmos, 0x70, 0x2)
1499                 .map_err(DeviceManagerError::BusError)?;
1500         }
1501         #[cfg(feature = "fwdebug")]
1502         {
1503             let fwdebug = Arc::new(Mutex::new(devices::legacy::FwDebugDevice::new()));
1504 
1505             self.bus_devices
1506                 .push(Arc::clone(&fwdebug) as Arc<Mutex<dyn BusDevice>>);
1507 
1508             self.address_manager
1509                 .io_bus
1510                 .insert(fwdebug, 0x402, 0x1)
1511                 .map_err(DeviceManagerError::BusError)?;
1512         }
1513 
1514         Ok(())
1515     }
1516 
1517     #[cfg(target_arch = "aarch64")]
1518     fn add_legacy_devices(
1519         &mut self,
1520         interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>,
1521     ) -> DeviceManagerResult<()> {
1522         // Add a RTC device
1523         let rtc_irq = self
1524             .address_manager
1525             .allocator
1526             .lock()
1527             .unwrap()
1528             .allocate_irq()
1529             .unwrap();
1530 
1531         let interrupt_group = interrupt_manager
1532             .create_group(LegacyIrqGroupConfig {
1533                 irq: rtc_irq as InterruptIndex,
1534             })
1535             .map_err(DeviceManagerError::CreateInterruptGroup)?;
1536 
1537         let rtc_device = Arc::new(Mutex::new(devices::legacy::Rtc::new(interrupt_group)));
1538 
1539         self.bus_devices
1540             .push(Arc::clone(&rtc_device) as Arc<Mutex<dyn BusDevice>>);
1541 
1542         let addr = arch::layout::LEGACY_RTC_MAPPED_IO_START;
1543 
1544         self.address_manager
1545             .mmio_bus
1546             .insert(rtc_device, addr.0, MMIO_LEN)
1547             .map_err(DeviceManagerError::BusError)?;
1548 
1549         self.id_to_dev_info.insert(
1550             (DeviceType::Rtc, "rtc".to_string()),
1551             MmioDeviceInfo {
1552                 addr: addr.0,
1553                 len: MMIO_LEN,
1554                 irq: rtc_irq,
1555             },
1556         );
1557 
1558         // Add a GPIO device
1559         let id = String::from(GPIO_DEVICE_NAME_PREFIX);
1560         let gpio_irq = self
1561             .address_manager
1562             .allocator
1563             .lock()
1564             .unwrap()
1565             .allocate_irq()
1566             .unwrap();
1567 
1568         let interrupt_group = interrupt_manager
1569             .create_group(LegacyIrqGroupConfig {
1570                 irq: gpio_irq as InterruptIndex,
1571             })
1572             .map_err(DeviceManagerError::CreateInterruptGroup)?;
1573 
1574         let gpio_device = Arc::new(Mutex::new(devices::legacy::Gpio::new(
1575             id.clone(),
1576             interrupt_group,
1577         )));
1578 
1579         self.bus_devices
1580             .push(Arc::clone(&gpio_device) as Arc<Mutex<dyn BusDevice>>);
1581 
1582         let addr = arch::layout::LEGACY_GPIO_MAPPED_IO_START;
1583 
1584         self.address_manager
1585             .mmio_bus
1586             .insert(gpio_device.clone(), addr.0, MMIO_LEN)
1587             .map_err(DeviceManagerError::BusError)?;
1588 
1589         self.gpio_device = Some(gpio_device.clone());
1590 
1591         self.id_to_dev_info.insert(
1592             (DeviceType::Gpio, "gpio".to_string()),
1593             MmioDeviceInfo {
1594                 addr: addr.0,
1595                 len: MMIO_LEN,
1596                 irq: gpio_irq,
1597             },
1598         );
1599 
1600         self.device_tree
1601             .lock()
1602             .unwrap()
1603             .insert(id.clone(), device_node!(id, gpio_device));
1604 
1605         Ok(())
1606     }
1607 
1608     #[cfg(target_arch = "x86_64")]
1609     fn add_serial_device(
1610         &mut self,
1611         interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>,
1612         serial_writer: Option<Box<dyn io::Write + Send>>,
1613     ) -> DeviceManagerResult<Arc<Mutex<Serial>>> {
1614         // Serial is tied to IRQ #4
1615         let serial_irq = 4;
1616 
1617         let id = String::from(SERIAL_DEVICE_NAME_PREFIX);
1618 
1619         let interrupt_group = interrupt_manager
1620             .create_group(LegacyIrqGroupConfig {
1621                 irq: serial_irq as InterruptIndex,
1622             })
1623             .map_err(DeviceManagerError::CreateInterruptGroup)?;
1624 
1625         let serial = Arc::new(Mutex::new(Serial::new(
1626             id.clone(),
1627             interrupt_group,
1628             serial_writer,
1629         )));
1630 
1631         self.bus_devices
1632             .push(Arc::clone(&serial) as Arc<Mutex<dyn BusDevice>>);
1633 
1634         self.address_manager
1635             .allocator
1636             .lock()
1637             .unwrap()
1638             .allocate_io_addresses(Some(GuestAddress(0x3f8)), 0x8, None)
1639             .ok_or(DeviceManagerError::AllocateIoPort)?;
1640 
1641         self.address_manager
1642             .io_bus
1643             .insert(serial.clone(), 0x3f8, 0x8)
1644             .map_err(DeviceManagerError::BusError)?;
1645 
1646         // Fill the device tree with a new node. In case of restore, we
1647         // know there is nothing to do, so we can simply override the
1648         // existing entry.
1649         self.device_tree
1650             .lock()
1651             .unwrap()
1652             .insert(id.clone(), device_node!(id, serial));
1653 
1654         Ok(serial)
1655     }
1656 
1657     #[cfg(target_arch = "aarch64")]
1658     fn add_serial_device(
1659         &mut self,
1660         interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>,
1661         serial_writer: Option<Box<dyn io::Write + Send>>,
1662     ) -> DeviceManagerResult<Arc<Mutex<Pl011>>> {
1663         let id = String::from(SERIAL_DEVICE_NAME_PREFIX);
1664 
1665         let serial_irq = self
1666             .address_manager
1667             .allocator
1668             .lock()
1669             .unwrap()
1670             .allocate_irq()
1671             .unwrap();
1672 
1673         let interrupt_group = interrupt_manager
1674             .create_group(LegacyIrqGroupConfig {
1675                 irq: serial_irq as InterruptIndex,
1676             })
1677             .map_err(DeviceManagerError::CreateInterruptGroup)?;
1678 
1679         let serial = Arc::new(Mutex::new(devices::legacy::Pl011::new(
1680             id.clone(),
1681             interrupt_group,
1682             serial_writer,
1683         )));
1684 
1685         self.bus_devices
1686             .push(Arc::clone(&serial) as Arc<Mutex<dyn BusDevice>>);
1687 
1688         let addr = arch::layout::LEGACY_SERIAL_MAPPED_IO_START;
1689 
1690         self.address_manager
1691             .mmio_bus
1692             .insert(serial.clone(), addr.0, MMIO_LEN)
1693             .map_err(DeviceManagerError::BusError)?;
1694 
1695         self.id_to_dev_info.insert(
1696             (DeviceType::Serial, DeviceType::Serial.to_string()),
1697             MmioDeviceInfo {
1698                 addr: addr.0,
1699                 len: MMIO_LEN,
1700                 irq: serial_irq,
1701             },
1702         );
1703 
1704         self.cmdline_additions
1705             .push(format!("earlycon=pl011,mmio,0x{:08x}", addr.0));
1706 
1707         // Fill the device tree with a new node. In case of restore, we
1708         // know there is nothing to do, so we can simply override the
1709         // existing entry.
1710         self.device_tree
1711             .lock()
1712             .unwrap()
1713             .insert(id.clone(), device_node!(id, serial));
1714 
1715         Ok(serial)
1716     }
1717 
1718     fn modify_mode<F: FnOnce(&mut termios)>(
1719         &self,
1720         fd: RawFd,
1721         f: F,
1722     ) -> vmm_sys_util::errno::Result<()> {
1723         // SAFETY: safe because we check the return value of isatty.
1724         if unsafe { isatty(fd) } != 1 {
1725             return Ok(());
1726         }
1727 
1728         // SAFETY: The following pair are safe because termios gets totally overwritten by tcgetattr
1729         // and we check the return result.
1730         let mut termios: termios = unsafe { zeroed() };
1731         let ret = unsafe { tcgetattr(fd, &mut termios as *mut _) };
1732         if ret < 0 {
1733             return vmm_sys_util::errno::errno_result();
1734         }
1735         f(&mut termios);
1736         // SAFETY: Safe because the syscall will only read the extent of termios and we check
1737         // the return result.
1738         let ret = unsafe { tcsetattr(fd, TCSANOW, &termios as *const _) };
1739         if ret < 0 {
1740             return vmm_sys_util::errno::errno_result();
1741         }
1742 
1743         Ok(())
1744     }
1745 
1746     fn set_raw_mode(&self, f: &mut File) -> vmm_sys_util::errno::Result<()> {
1747         // SAFETY: FFI call. Variable t is guaranteed to be a valid termios from modify_mode.
1748         self.modify_mode(f.as_raw_fd(), |t| unsafe { cfmakeraw(t) })
1749     }
1750 
1751     fn listen_for_sigwinch_on_tty(&mut self, pty: &File) -> std::io::Result<()> {
1752         let seccomp_filter =
1753             get_seccomp_filter(&self.seccomp_action, Thread::PtyForeground).unwrap();
1754 
1755         match start_sigwinch_listener(seccomp_filter, pty) {
1756             Ok(pipe) => {
1757                 self.console_resize_pipe = Some(Arc::new(pipe));
1758             }
1759             Err(e) => {
1760                 warn!("Ignoring error from setting up SIGWINCH listener: {}", e)
1761             }
1762         }
1763 
1764         Ok(())
1765     }
1766 
1767     fn add_virtio_console_device(
1768         &mut self,
1769         virtio_devices: &mut Vec<MetaVirtioDevice>,
1770         console_pty: Option<PtyPair>,
1771         resize_pipe: Option<File>,
1772     ) -> DeviceManagerResult<Option<Arc<virtio_devices::ConsoleResizer>>> {
1773         let console_config = self.config.lock().unwrap().console.clone();
1774         let endpoint = match console_config.mode {
1775             ConsoleOutputMode::File => {
1776                 let file = File::create(console_config.file.as_ref().unwrap())
1777                     .map_err(DeviceManagerError::ConsoleOutputFileOpen)?;
1778                 Endpoint::File(file)
1779             }
1780             ConsoleOutputMode::Pty => {
1781                 if let Some(pty) = console_pty {
1782                     self.config.lock().unwrap().console.file = Some(pty.path.clone());
1783                     let file = pty.main.try_clone().unwrap();
1784                     self.console_pty = Some(Arc::new(Mutex::new(pty)));
1785                     self.console_resize_pipe = resize_pipe.map(Arc::new);
1786                     Endpoint::FilePair(file.try_clone().unwrap(), file)
1787                 } else {
1788                     let (main, mut sub, path) =
1789                         create_pty(false).map_err(DeviceManagerError::ConsolePtyOpen)?;
1790                     self.set_raw_mode(&mut sub)
1791                         .map_err(DeviceManagerError::SetPtyRaw)?;
1792                     self.config.lock().unwrap().console.file = Some(path.clone());
1793                     let file = main.try_clone().unwrap();
1794                     assert!(resize_pipe.is_none());
1795                     self.listen_for_sigwinch_on_tty(&sub).unwrap();
1796                     self.console_pty = Some(Arc::new(Mutex::new(PtyPair { main, sub, path })));
1797                     Endpoint::FilePair(file.try_clone().unwrap(), file)
1798                 }
1799             }
1800             ConsoleOutputMode::Tty => {
1801                 // Duplicating the file descriptors like this is needed as otherwise
1802                 // they will be closed on a reboot and the numbers reused
1803 
1804                 // SAFETY: FFI call to dup. Trivially safe.
1805                 let stdout = unsafe { libc::dup(libc::STDOUT_FILENO) };
1806                 if stdout == -1 {
1807                     return vmm_sys_util::errno::errno_result().map_err(DeviceManagerError::DupFd);
1808                 }
1809                 // SAFETY: stdout is valid and owned solely by us.
1810                 let stdout = unsafe { File::from_raw_fd(stdout) };
1811 
1812                 // If an interactive TTY then we can accept input
1813                 // SAFETY: FFI call. Trivially safe.
1814                 if unsafe { libc::isatty(libc::STDIN_FILENO) == 1 } {
1815                     // SAFETY: FFI call to dup. Trivially safe.
1816                     let stdin = unsafe { libc::dup(libc::STDIN_FILENO) };
1817                     if stdin == -1 {
1818                         return vmm_sys_util::errno::errno_result()
1819                             .map_err(DeviceManagerError::DupFd);
1820                     }
1821                     // SAFETY: stdin is valid and owned solely by us.
1822                     let stdin = unsafe { File::from_raw_fd(stdin) };
1823 
1824                     Endpoint::FilePair(stdout, stdin)
1825                 } else {
1826                     Endpoint::File(stdout)
1827                 }
1828             }
1829             ConsoleOutputMode::Null => Endpoint::Null,
1830             ConsoleOutputMode::Off => return Ok(None),
1831         };
1832         let id = String::from(CONSOLE_DEVICE_NAME);
1833 
1834         let (virtio_console_device, console_resizer) = virtio_devices::Console::new(
1835             id.clone(),
1836             endpoint,
1837             self.console_resize_pipe
1838                 .as_ref()
1839                 .map(|p| p.try_clone().unwrap()),
1840             self.force_iommu | console_config.iommu,
1841             self.seccomp_action.clone(),
1842             self.exit_evt
1843                 .try_clone()
1844                 .map_err(DeviceManagerError::EventFd)?,
1845         )
1846         .map_err(DeviceManagerError::CreateVirtioConsole)?;
1847         let virtio_console_device = Arc::new(Mutex::new(virtio_console_device));
1848         virtio_devices.push(MetaVirtioDevice {
1849             virtio_device: Arc::clone(&virtio_console_device)
1850                 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
1851             iommu: console_config.iommu,
1852             id: id.clone(),
1853             pci_segment: 0,
1854             dma_handler: None,
1855         });
1856 
1857         // Fill the device tree with a new node. In case of restore, we
1858         // know there is nothing to do, so we can simply override the
1859         // existing entry.
1860         self.device_tree
1861             .lock()
1862             .unwrap()
1863             .insert(id.clone(), device_node!(id, virtio_console_device));
1864 
1865         // Only provide a resizer (for SIGWINCH handling) if the console is attached to the TTY
1866         Ok(if matches!(console_config.mode, ConsoleOutputMode::Tty) {
1867             Some(console_resizer)
1868         } else {
1869             None
1870         })
1871     }
1872 
1873     fn add_console_device(
1874         &mut self,
1875         interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>,
1876         virtio_devices: &mut Vec<MetaVirtioDevice>,
1877         serial_pty: Option<PtyPair>,
1878         console_pty: Option<PtyPair>,
1879         console_resize_pipe: Option<File>,
1880     ) -> DeviceManagerResult<Arc<Console>> {
1881         let serial_config = self.config.lock().unwrap().serial.clone();
1882         let serial_writer: Option<Box<dyn io::Write + Send>> = match serial_config.mode {
1883             ConsoleOutputMode::File => Some(Box::new(
1884                 File::create(serial_config.file.as_ref().unwrap())
1885                     .map_err(DeviceManagerError::SerialOutputFileOpen)?,
1886             )),
1887             ConsoleOutputMode::Pty => {
1888                 if let Some(pty) = serial_pty {
1889                     self.config.lock().unwrap().serial.file = Some(pty.path.clone());
1890                     self.serial_pty = Some(Arc::new(Mutex::new(pty)));
1891                 } else {
1892                     let (main, mut sub, path) =
1893                         create_pty(true).map_err(DeviceManagerError::SerialPtyOpen)?;
1894                     self.set_raw_mode(&mut sub)
1895                         .map_err(DeviceManagerError::SetPtyRaw)?;
1896                     self.config.lock().unwrap().serial.file = Some(path.clone());
1897                     self.serial_pty = Some(Arc::new(Mutex::new(PtyPair { main, sub, path })));
1898                 }
1899                 None
1900             }
1901             ConsoleOutputMode::Tty => Some(Box::new(stdout())),
1902             ConsoleOutputMode::Off | ConsoleOutputMode::Null => None,
1903         };
1904         if serial_config.mode != ConsoleOutputMode::Off {
1905             let serial = self.add_serial_device(interrupt_manager, serial_writer)?;
1906             self.serial_manager = match serial_config.mode {
1907                 ConsoleOutputMode::Pty | ConsoleOutputMode::Tty => {
1908                     let serial_manager =
1909                         SerialManager::new(serial, self.serial_pty.clone(), serial_config.mode)
1910                             .map_err(DeviceManagerError::CreateSerialManager)?;
1911                     if let Some(mut serial_manager) = serial_manager {
1912                         serial_manager
1913                             .start_thread(
1914                                 self.exit_evt
1915                                     .try_clone()
1916                                     .map_err(DeviceManagerError::EventFd)?,
1917                             )
1918                             .map_err(DeviceManagerError::SpawnSerialManager)?;
1919                         Some(Arc::new(serial_manager))
1920                     } else {
1921                         None
1922                     }
1923                 }
1924                 _ => None,
1925             };
1926         }
1927 
1928         let console_resizer =
1929             self.add_virtio_console_device(virtio_devices, console_pty, console_resize_pipe)?;
1930 
1931         Ok(Arc::new(Console { console_resizer }))
1932     }
1933 
1934     fn make_virtio_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
1935         let mut devices: Vec<MetaVirtioDevice> = Vec::new();
1936 
1937         // Create "standard" virtio devices (net/block/rng)
1938         devices.append(&mut self.make_virtio_block_devices()?);
1939         devices.append(&mut self.make_virtio_net_devices()?);
1940         devices.append(&mut self.make_virtio_rng_devices()?);
1941 
1942         // Add virtio-fs if required
1943         devices.append(&mut self.make_virtio_fs_devices()?);
1944 
1945         // Add virtio-pmem if required
1946         devices.append(&mut self.make_virtio_pmem_devices()?);
1947 
1948         // Add virtio-vsock if required
1949         devices.append(&mut self.make_virtio_vsock_devices()?);
1950 
1951         devices.append(&mut self.make_virtio_mem_devices()?);
1952 
1953         // Add virtio-balloon if required
1954         devices.append(&mut self.make_virtio_balloon_devices()?);
1955 
1956         // Add virtio-watchdog device
1957         devices.append(&mut self.make_virtio_watchdog_devices()?);
1958 
1959         // Add vDPA devices if required
1960         devices.append(&mut self.make_vdpa_devices()?);
1961 
1962         Ok(devices)
1963     }
1964 
1965     // Cache whether io_uring is supported to avoid probing for very block device
1966     fn io_uring_is_supported(&mut self) -> bool {
1967         if let Some(supported) = self.io_uring_supported {
1968             return supported;
1969         }
1970 
1971         let supported = block_io_uring_is_supported();
1972         self.io_uring_supported = Some(supported);
1973         supported
1974     }
1975 
1976     fn make_virtio_block_device(
1977         &mut self,
1978         disk_cfg: &mut DiskConfig,
1979     ) -> DeviceManagerResult<MetaVirtioDevice> {
1980         let id = if let Some(id) = &disk_cfg.id {
1981             id.clone()
1982         } else {
1983             let id = self.next_device_name(DISK_DEVICE_NAME_PREFIX)?;
1984             disk_cfg.id = Some(id.clone());
1985             id
1986         };
1987 
1988         info!("Creating virtio-block device: {:?}", disk_cfg);
1989 
1990         if disk_cfg.vhost_user {
1991             let socket = disk_cfg.vhost_socket.as_ref().unwrap().clone();
1992             let vu_cfg = VhostUserConfig {
1993                 socket,
1994                 num_queues: disk_cfg.num_queues,
1995                 queue_size: disk_cfg.queue_size,
1996             };
1997             let vhost_user_block_device = Arc::new(Mutex::new(
1998                 match virtio_devices::vhost_user::Blk::new(
1999                     id.clone(),
2000                     vu_cfg,
2001                     self.restoring,
2002                     self.seccomp_action.clone(),
2003                     self.exit_evt
2004                         .try_clone()
2005                         .map_err(DeviceManagerError::EventFd)?,
2006                     self.force_iommu,
2007                 ) {
2008                     Ok(vub_device) => vub_device,
2009                     Err(e) => {
2010                         return Err(DeviceManagerError::CreateVhostUserBlk(e));
2011                     }
2012                 },
2013             ));
2014 
2015             // Fill the device tree with a new node. In case of restore, we
2016             // know there is nothing to do, so we can simply override the
2017             // existing entry.
2018             self.device_tree
2019                 .lock()
2020                 .unwrap()
2021                 .insert(id.clone(), device_node!(id, vhost_user_block_device));
2022 
2023             Ok(MetaVirtioDevice {
2024                 virtio_device: Arc::clone(&vhost_user_block_device)
2025                     as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2026                 iommu: false,
2027                 id,
2028                 pci_segment: disk_cfg.pci_segment,
2029                 dma_handler: None,
2030             })
2031         } else {
2032             let mut options = OpenOptions::new();
2033             options.read(true);
2034             options.write(!disk_cfg.readonly);
2035             if disk_cfg.direct {
2036                 options.custom_flags(libc::O_DIRECT);
2037             }
2038             // Open block device path
2039             let mut file: File = options
2040                 .open(
2041                     disk_cfg
2042                         .path
2043                         .as_ref()
2044                         .ok_or(DeviceManagerError::NoDiskPath)?
2045                         .clone(),
2046                 )
2047                 .map_err(DeviceManagerError::Disk)?;
2048             let image_type =
2049                 detect_image_type(&mut file).map_err(DeviceManagerError::DetectImageType)?;
2050 
2051             let image = match image_type {
2052                 ImageType::FixedVhd => {
2053                     // Use asynchronous backend relying on io_uring if the
2054                     // syscalls are supported.
2055                     if self.io_uring_is_supported() && !disk_cfg.disable_io_uring {
2056                         info!("Using asynchronous fixed VHD disk file (io_uring)");
2057                         Box::new(
2058                             FixedVhdDiskAsync::new(file)
2059                                 .map_err(DeviceManagerError::CreateFixedVhdDiskAsync)?,
2060                         ) as Box<dyn DiskFile>
2061                     } else {
2062                         info!("Using synchronous fixed VHD disk file");
2063                         Box::new(
2064                             FixedVhdDiskSync::new(file)
2065                                 .map_err(DeviceManagerError::CreateFixedVhdDiskSync)?,
2066                         ) as Box<dyn DiskFile>
2067                     }
2068                 }
2069                 ImageType::Raw => {
2070                     // Use asynchronous backend relying on io_uring if the
2071                     // syscalls are supported.
2072                     if self.io_uring_is_supported() && !disk_cfg.disable_io_uring {
2073                         info!("Using asynchronous RAW disk file (io_uring)");
2074                         Box::new(RawFileDisk::new(file)) as Box<dyn DiskFile>
2075                     } else {
2076                         info!("Using synchronous RAW disk file");
2077                         Box::new(RawFileDiskSync::new(file)) as Box<dyn DiskFile>
2078                     }
2079                 }
2080                 ImageType::Qcow2 => {
2081                     info!("Using synchronous QCOW disk file");
2082                     Box::new(
2083                         QcowDiskSync::new(file, disk_cfg.direct)
2084                             .map_err(DeviceManagerError::CreateQcowDiskSync)?,
2085                     ) as Box<dyn DiskFile>
2086                 }
2087                 ImageType::Vhdx => {
2088                     info!("Using synchronous VHDX disk file");
2089                     Box::new(
2090                         VhdxDiskSync::new(file)
2091                             .map_err(DeviceManagerError::CreateFixedVhdxDiskSync)?,
2092                     ) as Box<dyn DiskFile>
2093                 }
2094             };
2095 
2096             let dev = Arc::new(Mutex::new(
2097                 virtio_devices::Block::new(
2098                     id.clone(),
2099                     image,
2100                     disk_cfg
2101                         .path
2102                         .as_ref()
2103                         .ok_or(DeviceManagerError::NoDiskPath)?
2104                         .clone(),
2105                     disk_cfg.readonly,
2106                     self.force_iommu | disk_cfg.iommu,
2107                     disk_cfg.num_queues,
2108                     disk_cfg.queue_size,
2109                     self.seccomp_action.clone(),
2110                     disk_cfg.rate_limiter_config,
2111                     self.exit_evt
2112                         .try_clone()
2113                         .map_err(DeviceManagerError::EventFd)?,
2114                 )
2115                 .map_err(DeviceManagerError::CreateVirtioBlock)?,
2116             ));
2117 
2118             let virtio_device = Arc::clone(&dev) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>;
2119             let migratable_device = dev as Arc<Mutex<dyn Migratable>>;
2120 
2121             // Fill the device tree with a new node. In case of restore, we
2122             // know there is nothing to do, so we can simply override the
2123             // existing entry.
2124             self.device_tree
2125                 .lock()
2126                 .unwrap()
2127                 .insert(id.clone(), device_node!(id, migratable_device));
2128 
2129             Ok(MetaVirtioDevice {
2130                 virtio_device,
2131                 iommu: disk_cfg.iommu,
2132                 id,
2133                 pci_segment: disk_cfg.pci_segment,
2134                 dma_handler: None,
2135             })
2136         }
2137     }
2138 
2139     fn make_virtio_block_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
2140         let mut devices = Vec::new();
2141 
2142         let mut block_devices = self.config.lock().unwrap().disks.clone();
2143         if let Some(disk_list_cfg) = &mut block_devices {
2144             for disk_cfg in disk_list_cfg.iter_mut() {
2145                 devices.push(self.make_virtio_block_device(disk_cfg)?);
2146             }
2147         }
2148         self.config.lock().unwrap().disks = block_devices;
2149 
2150         Ok(devices)
2151     }
2152 
2153     fn make_virtio_net_device(
2154         &mut self,
2155         net_cfg: &mut NetConfig,
2156     ) -> DeviceManagerResult<MetaVirtioDevice> {
2157         let id = if let Some(id) = &net_cfg.id {
2158             id.clone()
2159         } else {
2160             let id = self.next_device_name(NET_DEVICE_NAME_PREFIX)?;
2161             net_cfg.id = Some(id.clone());
2162             id
2163         };
2164         info!("Creating virtio-net device: {:?}", net_cfg);
2165 
2166         if net_cfg.vhost_user {
2167             let socket = net_cfg.vhost_socket.as_ref().unwrap().clone();
2168             let vu_cfg = VhostUserConfig {
2169                 socket,
2170                 num_queues: net_cfg.num_queues,
2171                 queue_size: net_cfg.queue_size,
2172             };
2173             let server = match net_cfg.vhost_mode {
2174                 VhostMode::Client => false,
2175                 VhostMode::Server => true,
2176             };
2177             let vhost_user_net_device = Arc::new(Mutex::new(
2178                 match virtio_devices::vhost_user::Net::new(
2179                     id.clone(),
2180                     net_cfg.mac,
2181                     vu_cfg,
2182                     server,
2183                     self.seccomp_action.clone(),
2184                     self.restoring,
2185                     self.exit_evt
2186                         .try_clone()
2187                         .map_err(DeviceManagerError::EventFd)?,
2188                     self.force_iommu,
2189                 ) {
2190                     Ok(vun_device) => vun_device,
2191                     Err(e) => {
2192                         return Err(DeviceManagerError::CreateVhostUserNet(e));
2193                     }
2194                 },
2195             ));
2196 
2197             // Fill the device tree with a new node. In case of restore, we
2198             // know there is nothing to do, so we can simply override the
2199             // existing entry.
2200             self.device_tree
2201                 .lock()
2202                 .unwrap()
2203                 .insert(id.clone(), device_node!(id, vhost_user_net_device));
2204 
2205             Ok(MetaVirtioDevice {
2206                 virtio_device: Arc::clone(&vhost_user_net_device)
2207                     as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2208                 iommu: net_cfg.iommu,
2209                 id,
2210                 pci_segment: net_cfg.pci_segment,
2211                 dma_handler: None,
2212             })
2213         } else {
2214             let virtio_net_device = if let Some(ref tap_if_name) = net_cfg.tap {
2215                 Arc::new(Mutex::new(
2216                     virtio_devices::Net::new(
2217                         id.clone(),
2218                         Some(tap_if_name),
2219                         None,
2220                         None,
2221                         Some(net_cfg.mac),
2222                         &mut net_cfg.host_mac,
2223                         self.force_iommu | net_cfg.iommu,
2224                         net_cfg.num_queues,
2225                         net_cfg.queue_size,
2226                         self.seccomp_action.clone(),
2227                         net_cfg.rate_limiter_config,
2228                         self.exit_evt
2229                             .try_clone()
2230                             .map_err(DeviceManagerError::EventFd)?,
2231                     )
2232                     .map_err(DeviceManagerError::CreateVirtioNet)?,
2233                 ))
2234             } else if let Some(fds) = &net_cfg.fds {
2235                 Arc::new(Mutex::new(
2236                     virtio_devices::Net::from_tap_fds(
2237                         id.clone(),
2238                         fds,
2239                         Some(net_cfg.mac),
2240                         self.force_iommu | net_cfg.iommu,
2241                         net_cfg.queue_size,
2242                         self.seccomp_action.clone(),
2243                         net_cfg.rate_limiter_config,
2244                         self.exit_evt
2245                             .try_clone()
2246                             .map_err(DeviceManagerError::EventFd)?,
2247                     )
2248                     .map_err(DeviceManagerError::CreateVirtioNet)?,
2249                 ))
2250             } else {
2251                 Arc::new(Mutex::new(
2252                     virtio_devices::Net::new(
2253                         id.clone(),
2254                         None,
2255                         Some(net_cfg.ip),
2256                         Some(net_cfg.mask),
2257                         Some(net_cfg.mac),
2258                         &mut net_cfg.host_mac,
2259                         self.force_iommu | net_cfg.iommu,
2260                         net_cfg.num_queues,
2261                         net_cfg.queue_size,
2262                         self.seccomp_action.clone(),
2263                         net_cfg.rate_limiter_config,
2264                         self.exit_evt
2265                             .try_clone()
2266                             .map_err(DeviceManagerError::EventFd)?,
2267                     )
2268                     .map_err(DeviceManagerError::CreateVirtioNet)?,
2269                 ))
2270             };
2271 
2272             // Fill the device tree with a new node. In case of restore, we
2273             // know there is nothing to do, so we can simply override the
2274             // existing entry.
2275             self.device_tree
2276                 .lock()
2277                 .unwrap()
2278                 .insert(id.clone(), device_node!(id, virtio_net_device));
2279 
2280             Ok(MetaVirtioDevice {
2281                 virtio_device: Arc::clone(&virtio_net_device)
2282                     as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2283                 iommu: net_cfg.iommu,
2284                 id,
2285                 pci_segment: net_cfg.pci_segment,
2286                 dma_handler: None,
2287             })
2288         }
2289     }
2290 
2291     /// Add virto-net and vhost-user-net devices
2292     fn make_virtio_net_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
2293         let mut devices = Vec::new();
2294         let mut net_devices = self.config.lock().unwrap().net.clone();
2295         if let Some(net_list_cfg) = &mut net_devices {
2296             for net_cfg in net_list_cfg.iter_mut() {
2297                 devices.push(self.make_virtio_net_device(net_cfg)?);
2298             }
2299         }
2300         self.config.lock().unwrap().net = net_devices;
2301 
2302         Ok(devices)
2303     }
2304 
2305     fn make_virtio_rng_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
2306         let mut devices = Vec::new();
2307 
2308         // Add virtio-rng if required
2309         let rng_config = self.config.lock().unwrap().rng.clone();
2310         if let Some(rng_path) = rng_config.src.to_str() {
2311             info!("Creating virtio-rng device: {:?}", rng_config);
2312             let id = String::from(RNG_DEVICE_NAME);
2313 
2314             let virtio_rng_device = Arc::new(Mutex::new(
2315                 virtio_devices::Rng::new(
2316                     id.clone(),
2317                     rng_path,
2318                     self.force_iommu | rng_config.iommu,
2319                     self.seccomp_action.clone(),
2320                     self.exit_evt
2321                         .try_clone()
2322                         .map_err(DeviceManagerError::EventFd)?,
2323                 )
2324                 .map_err(DeviceManagerError::CreateVirtioRng)?,
2325             ));
2326             devices.push(MetaVirtioDevice {
2327                 virtio_device: Arc::clone(&virtio_rng_device)
2328                     as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2329                 iommu: rng_config.iommu,
2330                 id: id.clone(),
2331                 pci_segment: 0,
2332                 dma_handler: None,
2333             });
2334 
2335             // Fill the device tree with a new node. In case of restore, we
2336             // know there is nothing to do, so we can simply override the
2337             // existing entry.
2338             self.device_tree
2339                 .lock()
2340                 .unwrap()
2341                 .insert(id.clone(), device_node!(id, virtio_rng_device));
2342         }
2343 
2344         Ok(devices)
2345     }
2346 
2347     fn make_virtio_fs_device(
2348         &mut self,
2349         fs_cfg: &mut FsConfig,
2350     ) -> DeviceManagerResult<MetaVirtioDevice> {
2351         let id = if let Some(id) = &fs_cfg.id {
2352             id.clone()
2353         } else {
2354             let id = self.next_device_name(FS_DEVICE_NAME_PREFIX)?;
2355             fs_cfg.id = Some(id.clone());
2356             id
2357         };
2358 
2359         info!("Creating virtio-fs device: {:?}", fs_cfg);
2360 
2361         let mut node = device_node!(id);
2362 
2363         // Look for the id in the device tree. If it can be found, that means
2364         // the device is being restored, otherwise it's created from scratch.
2365         let cache_range = if let Some(node) = self.device_tree.lock().unwrap().get(&id) {
2366             info!("Restoring virtio-fs {} resources", id);
2367 
2368             let mut cache_range: Option<(u64, u64)> = None;
2369             for resource in node.resources.iter() {
2370                 match resource {
2371                     Resource::MmioAddressRange { base, size } => {
2372                         if cache_range.is_some() {
2373                             return Err(DeviceManagerError::ResourceAlreadyExists);
2374                         }
2375 
2376                         cache_range = Some((*base, *size));
2377                     }
2378                     _ => {
2379                         error!("Unexpected resource {:?} for {}", resource, id);
2380                     }
2381                 }
2382             }
2383 
2384             cache_range
2385         } else {
2386             None
2387         };
2388 
2389         // DAX is not supported, we override the config by disabling the option.
2390         fs_cfg.dax = false;
2391 
2392         if let Some(fs_socket) = fs_cfg.socket.to_str() {
2393             let cache = if fs_cfg.dax {
2394                 let (cache_base, cache_size) = if let Some((base, size)) = cache_range {
2395                     // The memory needs to be 2MiB aligned in order to support
2396                     // hugepages.
2397                     self.pci_segments[fs_cfg.pci_segment as usize]
2398                         .allocator
2399                         .lock()
2400                         .unwrap()
2401                         .allocate(
2402                             Some(GuestAddress(base)),
2403                             size as GuestUsize,
2404                             Some(0x0020_0000),
2405                         )
2406                         .ok_or(DeviceManagerError::FsRangeAllocation)?;
2407 
2408                     (base, size)
2409                 } else {
2410                     let size = fs_cfg.cache_size;
2411                     // The memory needs to be 2MiB aligned in order to support
2412                     // hugepages.
2413                     let base = self.pci_segments[fs_cfg.pci_segment as usize]
2414                         .allocator
2415                         .lock()
2416                         .unwrap()
2417                         .allocate(None, size as GuestUsize, Some(0x0020_0000))
2418                         .ok_or(DeviceManagerError::FsRangeAllocation)?;
2419 
2420                     (base.raw_value(), size)
2421                 };
2422 
2423                 // Update the node with correct resource information.
2424                 node.resources.push(Resource::MmioAddressRange {
2425                     base: cache_base,
2426                     size: cache_size,
2427                 });
2428 
2429                 let mmap_region = MmapRegion::build(
2430                     None,
2431                     cache_size as usize,
2432                     libc::PROT_NONE,
2433                     libc::MAP_ANONYMOUS | libc::MAP_PRIVATE,
2434                 )
2435                 .map_err(DeviceManagerError::NewMmapRegion)?;
2436                 let host_addr: u64 = mmap_region.as_ptr() as u64;
2437 
2438                 let mem_slot = self
2439                     .memory_manager
2440                     .lock()
2441                     .unwrap()
2442                     .create_userspace_mapping(
2443                         cache_base, cache_size, host_addr, false, false, false,
2444                     )
2445                     .map_err(DeviceManagerError::MemoryManager)?;
2446 
2447                 let region_list = vec![VirtioSharedMemory {
2448                     offset: 0,
2449                     len: cache_size,
2450                 }];
2451 
2452                 Some((
2453                     VirtioSharedMemoryList {
2454                         host_addr,
2455                         mem_slot,
2456                         addr: GuestAddress(cache_base),
2457                         len: cache_size as GuestUsize,
2458                         region_list,
2459                     },
2460                     mmap_region,
2461                 ))
2462             } else {
2463                 None
2464             };
2465 
2466             let virtio_fs_device = Arc::new(Mutex::new(
2467                 virtio_devices::vhost_user::Fs::new(
2468                     id.clone(),
2469                     fs_socket,
2470                     &fs_cfg.tag,
2471                     fs_cfg.num_queues,
2472                     fs_cfg.queue_size,
2473                     cache,
2474                     self.seccomp_action.clone(),
2475                     self.restoring,
2476                     self.exit_evt
2477                         .try_clone()
2478                         .map_err(DeviceManagerError::EventFd)?,
2479                     self.force_iommu,
2480                 )
2481                 .map_err(DeviceManagerError::CreateVirtioFs)?,
2482             ));
2483 
2484             // Update the device tree with the migratable device.
2485             node.migratable = Some(Arc::clone(&virtio_fs_device) as Arc<Mutex<dyn Migratable>>);
2486             self.device_tree.lock().unwrap().insert(id.clone(), node);
2487 
2488             Ok(MetaVirtioDevice {
2489                 virtio_device: Arc::clone(&virtio_fs_device)
2490                     as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2491                 iommu: false,
2492                 id,
2493                 pci_segment: fs_cfg.pci_segment,
2494                 dma_handler: None,
2495             })
2496         } else {
2497             Err(DeviceManagerError::NoVirtioFsSock)
2498         }
2499     }
2500 
2501     fn make_virtio_fs_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
2502         let mut devices = Vec::new();
2503 
2504         let mut fs_devices = self.config.lock().unwrap().fs.clone();
2505         if let Some(fs_list_cfg) = &mut fs_devices {
2506             for fs_cfg in fs_list_cfg.iter_mut() {
2507                 devices.push(self.make_virtio_fs_device(fs_cfg)?);
2508             }
2509         }
2510         self.config.lock().unwrap().fs = fs_devices;
2511 
2512         Ok(devices)
2513     }
2514 
2515     fn make_virtio_pmem_device(
2516         &mut self,
2517         pmem_cfg: &mut PmemConfig,
2518     ) -> DeviceManagerResult<MetaVirtioDevice> {
2519         let id = if let Some(id) = &pmem_cfg.id {
2520             id.clone()
2521         } else {
2522             let id = self.next_device_name(PMEM_DEVICE_NAME_PREFIX)?;
2523             pmem_cfg.id = Some(id.clone());
2524             id
2525         };
2526 
2527         info!("Creating virtio-pmem device: {:?}", pmem_cfg);
2528 
2529         let mut node = device_node!(id);
2530 
2531         // Look for the id in the device tree. If it can be found, that means
2532         // the device is being restored, otherwise it's created from scratch.
2533         let region_range = if let Some(node) = self.device_tree.lock().unwrap().get(&id) {
2534             info!("Restoring virtio-pmem {} resources", id);
2535 
2536             let mut region_range: Option<(u64, u64)> = None;
2537             for resource in node.resources.iter() {
2538                 match resource {
2539                     Resource::MmioAddressRange { base, size } => {
2540                         if region_range.is_some() {
2541                             return Err(DeviceManagerError::ResourceAlreadyExists);
2542                         }
2543 
2544                         region_range = Some((*base, *size));
2545                     }
2546                     _ => {
2547                         error!("Unexpected resource {:?} for {}", resource, id);
2548                     }
2549                 }
2550             }
2551 
2552             if region_range.is_none() {
2553                 return Err(DeviceManagerError::MissingVirtioPmemResources);
2554             }
2555 
2556             region_range
2557         } else {
2558             None
2559         };
2560 
2561         let (custom_flags, set_len) = if pmem_cfg.file.is_dir() {
2562             if pmem_cfg.size.is_none() {
2563                 return Err(DeviceManagerError::PmemWithDirectorySizeMissing);
2564             }
2565             (O_TMPFILE, true)
2566         } else {
2567             (0, false)
2568         };
2569 
2570         let mut file = OpenOptions::new()
2571             .read(true)
2572             .write(!pmem_cfg.discard_writes)
2573             .custom_flags(custom_flags)
2574             .open(&pmem_cfg.file)
2575             .map_err(DeviceManagerError::PmemFileOpen)?;
2576 
2577         let size = if let Some(size) = pmem_cfg.size {
2578             if set_len {
2579                 file.set_len(size)
2580                     .map_err(DeviceManagerError::PmemFileSetLen)?;
2581             }
2582             size
2583         } else {
2584             file.seek(SeekFrom::End(0))
2585                 .map_err(DeviceManagerError::PmemFileSetLen)?
2586         };
2587 
2588         if size % 0x20_0000 != 0 {
2589             return Err(DeviceManagerError::PmemSizeNotAligned);
2590         }
2591 
2592         let (region_base, region_size) = if let Some((base, size)) = region_range {
2593             // The memory needs to be 2MiB aligned in order to support
2594             // hugepages.
2595             self.pci_segments[pmem_cfg.pci_segment as usize]
2596                 .allocator
2597                 .lock()
2598                 .unwrap()
2599                 .allocate(
2600                     Some(GuestAddress(base)),
2601                     size as GuestUsize,
2602                     Some(0x0020_0000),
2603                 )
2604                 .ok_or(DeviceManagerError::PmemRangeAllocation)?;
2605 
2606             (base, size)
2607         } else {
2608             // The memory needs to be 2MiB aligned in order to support
2609             // hugepages.
2610             let base = self.pci_segments[pmem_cfg.pci_segment as usize]
2611                 .allocator
2612                 .lock()
2613                 .unwrap()
2614                 .allocate(None, size as GuestUsize, Some(0x0020_0000))
2615                 .ok_or(DeviceManagerError::PmemRangeAllocation)?;
2616 
2617             (base.raw_value(), size)
2618         };
2619 
2620         let cloned_file = file.try_clone().map_err(DeviceManagerError::CloneFile)?;
2621         let mmap_region = MmapRegion::build(
2622             Some(FileOffset::new(cloned_file, 0)),
2623             region_size as usize,
2624             PROT_READ | PROT_WRITE,
2625             MAP_NORESERVE
2626                 | if pmem_cfg.discard_writes {
2627                     MAP_PRIVATE
2628                 } else {
2629                     MAP_SHARED
2630                 },
2631         )
2632         .map_err(DeviceManagerError::NewMmapRegion)?;
2633         let host_addr: u64 = mmap_region.as_ptr() as u64;
2634 
2635         let mem_slot = self
2636             .memory_manager
2637             .lock()
2638             .unwrap()
2639             .create_userspace_mapping(
2640                 region_base,
2641                 region_size,
2642                 host_addr,
2643                 pmem_cfg.mergeable,
2644                 false,
2645                 false,
2646             )
2647             .map_err(DeviceManagerError::MemoryManager)?;
2648 
2649         let mapping = virtio_devices::UserspaceMapping {
2650             host_addr,
2651             mem_slot,
2652             addr: GuestAddress(region_base),
2653             len: region_size,
2654             mergeable: pmem_cfg.mergeable,
2655         };
2656 
2657         let virtio_pmem_device = Arc::new(Mutex::new(
2658             virtio_devices::Pmem::new(
2659                 id.clone(),
2660                 file,
2661                 GuestAddress(region_base),
2662                 mapping,
2663                 mmap_region,
2664                 self.force_iommu | pmem_cfg.iommu,
2665                 self.seccomp_action.clone(),
2666                 self.exit_evt
2667                     .try_clone()
2668                     .map_err(DeviceManagerError::EventFd)?,
2669             )
2670             .map_err(DeviceManagerError::CreateVirtioPmem)?,
2671         ));
2672 
2673         // Update the device tree with correct resource information and with
2674         // the migratable device.
2675         node.resources.push(Resource::MmioAddressRange {
2676             base: region_base,
2677             size: region_size,
2678         });
2679         node.migratable = Some(Arc::clone(&virtio_pmem_device) as Arc<Mutex<dyn Migratable>>);
2680         self.device_tree.lock().unwrap().insert(id.clone(), node);
2681 
2682         Ok(MetaVirtioDevice {
2683             virtio_device: Arc::clone(&virtio_pmem_device)
2684                 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2685             iommu: pmem_cfg.iommu,
2686             id,
2687             pci_segment: pmem_cfg.pci_segment,
2688             dma_handler: None,
2689         })
2690     }
2691 
2692     fn make_virtio_pmem_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
2693         let mut devices = Vec::new();
2694         // Add virtio-pmem if required
2695         let mut pmem_devices = self.config.lock().unwrap().pmem.clone();
2696         if let Some(pmem_list_cfg) = &mut pmem_devices {
2697             for pmem_cfg in pmem_list_cfg.iter_mut() {
2698                 devices.push(self.make_virtio_pmem_device(pmem_cfg)?);
2699             }
2700         }
2701         self.config.lock().unwrap().pmem = pmem_devices;
2702 
2703         Ok(devices)
2704     }
2705 
2706     fn make_virtio_vsock_device(
2707         &mut self,
2708         vsock_cfg: &mut VsockConfig,
2709     ) -> DeviceManagerResult<MetaVirtioDevice> {
2710         let id = if let Some(id) = &vsock_cfg.id {
2711             id.clone()
2712         } else {
2713             let id = self.next_device_name(VSOCK_DEVICE_NAME_PREFIX)?;
2714             vsock_cfg.id = Some(id.clone());
2715             id
2716         };
2717 
2718         info!("Creating virtio-vsock device: {:?}", vsock_cfg);
2719 
2720         let socket_path = vsock_cfg
2721             .socket
2722             .to_str()
2723             .ok_or(DeviceManagerError::CreateVsockConvertPath)?;
2724         let backend =
2725             virtio_devices::vsock::VsockUnixBackend::new(vsock_cfg.cid, socket_path.to_string())
2726                 .map_err(DeviceManagerError::CreateVsockBackend)?;
2727 
2728         let vsock_device = Arc::new(Mutex::new(
2729             virtio_devices::Vsock::new(
2730                 id.clone(),
2731                 vsock_cfg.cid,
2732                 vsock_cfg.socket.clone(),
2733                 backend,
2734                 self.force_iommu | vsock_cfg.iommu,
2735                 self.seccomp_action.clone(),
2736                 self.exit_evt
2737                     .try_clone()
2738                     .map_err(DeviceManagerError::EventFd)?,
2739             )
2740             .map_err(DeviceManagerError::CreateVirtioVsock)?,
2741         ));
2742 
2743         // Fill the device tree with a new node. In case of restore, we
2744         // know there is nothing to do, so we can simply override the
2745         // existing entry.
2746         self.device_tree
2747             .lock()
2748             .unwrap()
2749             .insert(id.clone(), device_node!(id, vsock_device));
2750 
2751         Ok(MetaVirtioDevice {
2752             virtio_device: Arc::clone(&vsock_device)
2753                 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2754             iommu: vsock_cfg.iommu,
2755             id,
2756             pci_segment: vsock_cfg.pci_segment,
2757             dma_handler: None,
2758         })
2759     }
2760 
2761     fn make_virtio_vsock_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
2762         let mut devices = Vec::new();
2763 
2764         let mut vsock = self.config.lock().unwrap().vsock.clone();
2765         if let Some(ref mut vsock_cfg) = &mut vsock {
2766             devices.push(self.make_virtio_vsock_device(vsock_cfg)?);
2767         }
2768         self.config.lock().unwrap().vsock = vsock;
2769 
2770         Ok(devices)
2771     }
2772 
2773     fn make_virtio_mem_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
2774         let mut devices = Vec::new();
2775 
2776         let mm = self.memory_manager.clone();
2777         let mm = mm.lock().unwrap();
2778         for (memory_zone_id, memory_zone) in mm.memory_zones().iter() {
2779             if let Some(virtio_mem_zone) = memory_zone.virtio_mem_zone() {
2780                 info!("Creating virtio-mem device: id = {}", memory_zone_id);
2781 
2782                 let node_id = numa_node_id_from_memory_zone_id(&self.numa_nodes, memory_zone_id)
2783                     .map(|i| i as u16);
2784 
2785                 let virtio_mem_device = Arc::new(Mutex::new(
2786                     virtio_devices::Mem::new(
2787                         memory_zone_id.clone(),
2788                         virtio_mem_zone.region(),
2789                         virtio_mem_zone
2790                             .resize_handler()
2791                             .new_resize_sender()
2792                             .map_err(DeviceManagerError::CreateResizeSender)?,
2793                         self.seccomp_action.clone(),
2794                         node_id,
2795                         virtio_mem_zone.hotplugged_size(),
2796                         virtio_mem_zone.hugepages(),
2797                         self.exit_evt
2798                             .try_clone()
2799                             .map_err(DeviceManagerError::EventFd)?,
2800                         virtio_mem_zone.blocks_state().clone(),
2801                     )
2802                     .map_err(DeviceManagerError::CreateVirtioMem)?,
2803                 ));
2804 
2805                 self.virtio_mem_devices.push(Arc::clone(&virtio_mem_device));
2806 
2807                 devices.push(MetaVirtioDevice {
2808                     virtio_device: Arc::clone(&virtio_mem_device)
2809                         as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2810                     iommu: false,
2811                     id: memory_zone_id.clone(),
2812                     pci_segment: 0,
2813                     dma_handler: None,
2814                 });
2815 
2816                 // Fill the device tree with a new node. In case of restore, we
2817                 // know there is nothing to do, so we can simply override the
2818                 // existing entry.
2819                 self.device_tree.lock().unwrap().insert(
2820                     memory_zone_id.clone(),
2821                     device_node!(memory_zone_id, virtio_mem_device),
2822                 );
2823             }
2824         }
2825 
2826         Ok(devices)
2827     }
2828 
2829     fn make_virtio_balloon_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
2830         let mut devices = Vec::new();
2831 
2832         if let Some(balloon_config) = &self.config.lock().unwrap().balloon {
2833             let id = String::from(BALLOON_DEVICE_NAME);
2834             info!("Creating virtio-balloon device: id = {}", id);
2835 
2836             let virtio_balloon_device = Arc::new(Mutex::new(
2837                 virtio_devices::Balloon::new(
2838                     id.clone(),
2839                     balloon_config.size,
2840                     balloon_config.deflate_on_oom,
2841                     balloon_config.free_page_reporting,
2842                     self.seccomp_action.clone(),
2843                     self.exit_evt
2844                         .try_clone()
2845                         .map_err(DeviceManagerError::EventFd)?,
2846                 )
2847                 .map_err(DeviceManagerError::CreateVirtioBalloon)?,
2848             ));
2849 
2850             self.balloon = Some(virtio_balloon_device.clone());
2851 
2852             devices.push(MetaVirtioDevice {
2853                 virtio_device: Arc::clone(&virtio_balloon_device)
2854                     as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2855                 iommu: false,
2856                 id: id.clone(),
2857                 pci_segment: 0,
2858                 dma_handler: None,
2859             });
2860 
2861             self.device_tree
2862                 .lock()
2863                 .unwrap()
2864                 .insert(id.clone(), device_node!(id, virtio_balloon_device));
2865         }
2866 
2867         Ok(devices)
2868     }
2869 
2870     fn make_virtio_watchdog_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
2871         let mut devices = Vec::new();
2872 
2873         if !self.config.lock().unwrap().watchdog {
2874             return Ok(devices);
2875         }
2876 
2877         let id = String::from(WATCHDOG_DEVICE_NAME);
2878         info!("Creating virtio-watchdog device: id = {}", id);
2879 
2880         let virtio_watchdog_device = Arc::new(Mutex::new(
2881             virtio_devices::Watchdog::new(
2882                 id.clone(),
2883                 self.reset_evt.try_clone().unwrap(),
2884                 self.seccomp_action.clone(),
2885                 self.exit_evt
2886                     .try_clone()
2887                     .map_err(DeviceManagerError::EventFd)?,
2888             )
2889             .map_err(DeviceManagerError::CreateVirtioWatchdog)?,
2890         ));
2891         devices.push(MetaVirtioDevice {
2892             virtio_device: Arc::clone(&virtio_watchdog_device)
2893                 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2894             iommu: false,
2895             id: id.clone(),
2896             pci_segment: 0,
2897             dma_handler: None,
2898         });
2899 
2900         self.device_tree
2901             .lock()
2902             .unwrap()
2903             .insert(id.clone(), device_node!(id, virtio_watchdog_device));
2904 
2905         Ok(devices)
2906     }
2907 
2908     fn make_vdpa_device(
2909         &mut self,
2910         vdpa_cfg: &mut VdpaConfig,
2911     ) -> DeviceManagerResult<MetaVirtioDevice> {
2912         let id = if let Some(id) = &vdpa_cfg.id {
2913             id.clone()
2914         } else {
2915             let id = self.next_device_name(VDPA_DEVICE_NAME_PREFIX)?;
2916             vdpa_cfg.id = Some(id.clone());
2917             id
2918         };
2919 
2920         info!("Creating vDPA device: {:?}", vdpa_cfg);
2921 
2922         let device_path = vdpa_cfg
2923             .path
2924             .to_str()
2925             .ok_or(DeviceManagerError::CreateVdpaConvertPath)?;
2926 
2927         let vdpa_device = Arc::new(Mutex::new(
2928             virtio_devices::Vdpa::new(
2929                 id.clone(),
2930                 device_path,
2931                 self.memory_manager.lock().unwrap().guest_memory(),
2932                 vdpa_cfg.num_queues as u16,
2933             )
2934             .map_err(DeviceManagerError::CreateVdpa)?,
2935         ));
2936 
2937         // Create the DMA handler that is required by the vDPA device
2938         let vdpa_mapping = Arc::new(VdpaDmaMapping::new(
2939             Arc::clone(&vdpa_device),
2940             Arc::new(self.memory_manager.lock().unwrap().guest_memory()),
2941         ));
2942 
2943         self.device_tree
2944             .lock()
2945             .unwrap()
2946             .insert(id.clone(), device_node!(id));
2947 
2948         Ok(MetaVirtioDevice {
2949             virtio_device: vdpa_device as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2950             iommu: vdpa_cfg.iommu,
2951             id,
2952             pci_segment: vdpa_cfg.pci_segment,
2953             dma_handler: Some(vdpa_mapping),
2954         })
2955     }
2956 
2957     fn make_vdpa_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
2958         let mut devices = Vec::new();
2959         // Add vdpa if required
2960         let mut vdpa_devices = self.config.lock().unwrap().vdpa.clone();
2961         if let Some(vdpa_list_cfg) = &mut vdpa_devices {
2962             for vdpa_cfg in vdpa_list_cfg.iter_mut() {
2963                 devices.push(self.make_vdpa_device(vdpa_cfg)?);
2964             }
2965         }
2966         self.config.lock().unwrap().vdpa = vdpa_devices;
2967 
2968         Ok(devices)
2969     }
2970 
2971     fn next_device_name(&mut self, prefix: &str) -> DeviceManagerResult<String> {
2972         let start_id = self.device_id_cnt;
2973         loop {
2974             // Generate the temporary name.
2975             let name = format!("{}{}", prefix, self.device_id_cnt);
2976             // Increment the counter.
2977             self.device_id_cnt += Wrapping(1);
2978             // Check if the name is already in use.
2979             if !self.device_tree.lock().unwrap().contains_key(&name) {
2980                 return Ok(name);
2981             }
2982 
2983             if self.device_id_cnt == start_id {
2984                 // We went through a full loop and there's nothing else we can
2985                 // do.
2986                 break;
2987             }
2988         }
2989         Err(DeviceManagerError::NoAvailableDeviceName)
2990     }
2991 
2992     fn add_passthrough_device(
2993         &mut self,
2994         device_cfg: &mut DeviceConfig,
2995     ) -> DeviceManagerResult<(PciBdf, String)> {
2996         // If the passthrough device has not been created yet, it is created
2997         // here and stored in the DeviceManager structure for future needs.
2998         if self.passthrough_device.is_none() {
2999             self.passthrough_device = Some(
3000                 self.address_manager
3001                     .vm
3002                     .create_passthrough_device()
3003                     .map_err(|e| DeviceManagerError::CreatePassthroughDevice(e.into()))?,
3004             );
3005         }
3006 
3007         self.add_vfio_device(device_cfg)
3008     }
3009 
3010     fn create_vfio_container(&self) -> DeviceManagerResult<Arc<VfioContainer>> {
3011         let passthrough_device = self
3012             .passthrough_device
3013             .as_ref()
3014             .ok_or(DeviceManagerError::NoDevicePassthroughSupport)?;
3015 
3016         // Safe because we know the RawFd is valid.
3017         //
3018         // This dup() is mandatory to be able to give full ownership of the
3019         // file descriptor to the DeviceFd::from_raw_fd() function later in
3020         // the code.
3021         //
3022         // This is particularly needed so that VfioContainer will still have
3023         // a valid file descriptor even if DeviceManager, and therefore the
3024         // passthrough_device are dropped. In case of Drop, the file descriptor
3025         // would be closed, but Linux would still have the duplicated file
3026         // descriptor opened from DeviceFd, preventing from unexpected behavior
3027         // where the VfioContainer would try to use a closed file descriptor.
3028         let dup_device_fd = unsafe { libc::dup(passthrough_device.as_raw_fd()) };
3029         if dup_device_fd == -1 {
3030             return vmm_sys_util::errno::errno_result().map_err(DeviceManagerError::DupFd);
3031         }
3032 
3033         // SAFETY the raw fd conversion here is safe because:
3034         //   1. When running on KVM or MSHV, passthrough_device wraps around DeviceFd.
3035         //   2. The conversion here extracts the raw fd and then turns the raw fd into a DeviceFd
3036         //      of the same (correct) type.
3037         Ok(Arc::new(
3038             VfioContainer::new(Arc::new(unsafe { DeviceFd::from_raw_fd(dup_device_fd) }))
3039                 .map_err(DeviceManagerError::VfioCreate)?,
3040         ))
3041     }
3042 
3043     fn add_vfio_device(
3044         &mut self,
3045         device_cfg: &mut DeviceConfig,
3046     ) -> DeviceManagerResult<(PciBdf, String)> {
3047         let pci_segment_id = device_cfg.pci_segment;
3048         let pci_device_bdf = self.pci_segments[pci_segment_id as usize].next_device_bdf()?;
3049 
3050         let mut needs_dma_mapping = false;
3051 
3052         // Here we create a new VFIO container for two reasons. Either this is
3053         // the first VFIO device, meaning we need a new VFIO container, which
3054         // will be shared with other VFIO devices. Or the new VFIO device is
3055         // attached to a vIOMMU, meaning we must create a dedicated VFIO
3056         // container. In the vIOMMU use case, we can't let all devices under
3057         // the same VFIO container since we couldn't map/unmap memory for each
3058         // device. That's simply because the map/unmap operations happen at the
3059         // VFIO container level.
3060         let vfio_container = if device_cfg.iommu {
3061             let vfio_container = self.create_vfio_container()?;
3062 
3063             let vfio_mapping = Arc::new(VfioDmaMapping::new(
3064                 Arc::clone(&vfio_container),
3065                 Arc::new(self.memory_manager.lock().unwrap().guest_memory()),
3066             ));
3067 
3068             if let Some(iommu) = &self.iommu_device {
3069                 iommu
3070                     .lock()
3071                     .unwrap()
3072                     .add_external_mapping(pci_device_bdf.into(), vfio_mapping);
3073             } else {
3074                 return Err(DeviceManagerError::MissingVirtualIommu);
3075             }
3076 
3077             vfio_container
3078         } else if let Some(vfio_container) = &self.vfio_container {
3079             Arc::clone(vfio_container)
3080         } else {
3081             let vfio_container = self.create_vfio_container()?;
3082             needs_dma_mapping = true;
3083             self.vfio_container = Some(Arc::clone(&vfio_container));
3084 
3085             vfio_container
3086         };
3087 
3088         let vfio_device = VfioDevice::new(&device_cfg.path, Arc::clone(&vfio_container))
3089             .map_err(DeviceManagerError::VfioCreate)?;
3090 
3091         if needs_dma_mapping {
3092             // Register DMA mapping in IOMMU.
3093             // Do not register virtio-mem regions, as they are handled directly by
3094             // virtio-mem device itself.
3095             for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() {
3096                 for region in zone.regions() {
3097                     vfio_container
3098                         .vfio_dma_map(
3099                             region.start_addr().raw_value(),
3100                             region.len() as u64,
3101                             region.as_ptr() as u64,
3102                         )
3103                         .map_err(DeviceManagerError::VfioDmaMap)?;
3104                 }
3105             }
3106 
3107             let vfio_mapping = Arc::new(VfioDmaMapping::new(
3108                 Arc::clone(&vfio_container),
3109                 Arc::new(self.memory_manager.lock().unwrap().guest_memory()),
3110             ));
3111 
3112             for virtio_mem_device in self.virtio_mem_devices.iter() {
3113                 virtio_mem_device
3114                     .lock()
3115                     .unwrap()
3116                     .add_dma_mapping_handler(
3117                         VirtioMemMappingSource::Container,
3118                         vfio_mapping.clone(),
3119                     )
3120                     .map_err(DeviceManagerError::AddDmaMappingHandlerVirtioMem)?;
3121             }
3122         }
3123 
3124         let legacy_interrupt_group =
3125             if let Some(legacy_interrupt_manager) = &self.legacy_interrupt_manager {
3126                 Some(
3127                     legacy_interrupt_manager
3128                         .create_group(LegacyIrqGroupConfig {
3129                             irq: self.pci_segments[pci_segment_id as usize].pci_irq_slots
3130                                 [pci_device_bdf.device() as usize]
3131                                 as InterruptIndex,
3132                         })
3133                         .map_err(DeviceManagerError::CreateInterruptGroup)?,
3134                 )
3135             } else {
3136                 None
3137             };
3138 
3139         let vfio_pci_device = VfioPciDevice::new(
3140             &self.address_manager.vm,
3141             vfio_device,
3142             vfio_container,
3143             &self.msi_interrupt_manager,
3144             legacy_interrupt_group,
3145             device_cfg.iommu,
3146             pci_device_bdf,
3147         )
3148         .map_err(DeviceManagerError::VfioPciCreate)?;
3149 
3150         let vfio_name = if let Some(id) = &device_cfg.id {
3151             if self.device_tree.lock().unwrap().contains_key(id) {
3152                 return Err(DeviceManagerError::DeviceIdAlreadyInUse);
3153             }
3154 
3155             id.clone()
3156         } else {
3157             let id = self.next_device_name(VFIO_DEVICE_NAME_PREFIX)?;
3158             device_cfg.id = Some(id.clone());
3159             id
3160         };
3161 
3162         let vfio_pci_device = Arc::new(Mutex::new(vfio_pci_device));
3163 
3164         self.add_pci_device(
3165             vfio_pci_device.clone(),
3166             vfio_pci_device.clone(),
3167             pci_segment_id,
3168             pci_device_bdf,
3169         )?;
3170 
3171         vfio_pci_device
3172             .lock()
3173             .unwrap()
3174             .map_mmio_regions(&self.address_manager.vm, || {
3175                 self.memory_manager.lock().unwrap().allocate_memory_slot()
3176             })
3177             .map_err(DeviceManagerError::VfioMapRegion)?;
3178 
3179         let mut node = device_node!(vfio_name);
3180 
3181         for region in vfio_pci_device.lock().unwrap().mmio_regions() {
3182             node.resources.push(Resource::MmioAddressRange {
3183                 base: region.start.0,
3184                 size: region.length as u64,
3185             });
3186         }
3187 
3188         node.pci_bdf = Some(pci_device_bdf);
3189         node.pci_device_handle = Some(PciDeviceHandle::Vfio(vfio_pci_device));
3190 
3191         self.device_tree
3192             .lock()
3193             .unwrap()
3194             .insert(vfio_name.clone(), node);
3195 
3196         Ok((pci_device_bdf, vfio_name))
3197     }
3198 
3199     fn add_pci_device(
3200         &mut self,
3201         bus_device: Arc<Mutex<dyn BusDevice>>,
3202         pci_device: Arc<Mutex<dyn PciDevice>>,
3203         segment_id: u16,
3204         bdf: PciBdf,
3205     ) -> DeviceManagerResult<Vec<(GuestAddress, GuestUsize, PciBarRegionType)>> {
3206         let bars = pci_device
3207             .lock()
3208             .unwrap()
3209             .allocate_bars(
3210                 &self.address_manager.allocator,
3211                 &mut self.pci_segments[segment_id as usize]
3212                     .allocator
3213                     .lock()
3214                     .unwrap(),
3215             )
3216             .map_err(DeviceManagerError::AllocateBars)?;
3217 
3218         let mut pci_bus = self.pci_segments[segment_id as usize]
3219             .pci_bus
3220             .lock()
3221             .unwrap();
3222 
3223         pci_bus
3224             .add_device(bdf.device() as u32, pci_device)
3225             .map_err(DeviceManagerError::AddPciDevice)?;
3226 
3227         self.bus_devices.push(Arc::clone(&bus_device));
3228 
3229         pci_bus
3230             .register_mapping(
3231                 bus_device,
3232                 #[cfg(target_arch = "x86_64")]
3233                 self.address_manager.io_bus.as_ref(),
3234                 self.address_manager.mmio_bus.as_ref(),
3235                 bars.clone(),
3236             )
3237             .map_err(DeviceManagerError::AddPciDevice)?;
3238 
3239         Ok(bars)
3240     }
3241 
3242     fn add_vfio_devices(&mut self) -> DeviceManagerResult<Vec<PciBdf>> {
3243         let mut iommu_attached_device_ids = Vec::new();
3244         let mut devices = self.config.lock().unwrap().devices.clone();
3245 
3246         if let Some(device_list_cfg) = &mut devices {
3247             for device_cfg in device_list_cfg.iter_mut() {
3248                 let (device_id, _) = self.add_passthrough_device(device_cfg)?;
3249                 if device_cfg.iommu && self.iommu_device.is_some() {
3250                     iommu_attached_device_ids.push(device_id);
3251                 }
3252             }
3253         }
3254 
3255         // Update the list of devices
3256         self.config.lock().unwrap().devices = devices;
3257 
3258         Ok(iommu_attached_device_ids)
3259     }
3260 
3261     fn add_vfio_user_device(
3262         &mut self,
3263         device_cfg: &mut UserDeviceConfig,
3264     ) -> DeviceManagerResult<(PciBdf, String)> {
3265         let pci_segment_id = device_cfg.pci_segment;
3266         let pci_device_bdf = self.pci_segments[pci_segment_id as usize].next_device_bdf()?;
3267 
3268         let legacy_interrupt_group =
3269             if let Some(legacy_interrupt_manager) = &self.legacy_interrupt_manager {
3270                 Some(
3271                     legacy_interrupt_manager
3272                         .create_group(LegacyIrqGroupConfig {
3273                             irq: self.pci_segments[pci_segment_id as usize].pci_irq_slots
3274                                 [pci_device_bdf.device() as usize]
3275                                 as InterruptIndex,
3276                         })
3277                         .map_err(DeviceManagerError::CreateInterruptGroup)?,
3278                 )
3279             } else {
3280                 None
3281             };
3282 
3283         let client = Arc::new(Mutex::new(
3284             vfio_user::Client::new(&device_cfg.socket)
3285                 .map_err(DeviceManagerError::VfioUserCreateClient)?,
3286         ));
3287 
3288         let mut vfio_user_pci_device = VfioUserPciDevice::new(
3289             &self.address_manager.vm,
3290             client.clone(),
3291             &self.msi_interrupt_manager,
3292             legacy_interrupt_group,
3293             pci_device_bdf,
3294         )
3295         .map_err(DeviceManagerError::VfioUserCreate)?;
3296 
3297         vfio_user_pci_device
3298             .map_mmio_regions(&self.address_manager.vm, || {
3299                 self.memory_manager.lock().unwrap().allocate_memory_slot()
3300             })
3301             .map_err(DeviceManagerError::VfioUserMapRegion)?;
3302 
3303         let memory = self.memory_manager.lock().unwrap().guest_memory();
3304         let vfio_user_mapping = Arc::new(VfioUserDmaMapping::new(client, Arc::new(memory)));
3305         for virtio_mem_device in self.virtio_mem_devices.iter() {
3306             virtio_mem_device
3307                 .lock()
3308                 .unwrap()
3309                 .add_dma_mapping_handler(
3310                     VirtioMemMappingSource::Device(pci_device_bdf.into()),
3311                     vfio_user_mapping.clone(),
3312                 )
3313                 .map_err(DeviceManagerError::AddDmaMappingHandlerVirtioMem)?;
3314         }
3315 
3316         for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() {
3317             for region in zone.regions() {
3318                 vfio_user_pci_device
3319                     .dma_map(region)
3320                     .map_err(DeviceManagerError::VfioUserDmaMap)?;
3321             }
3322         }
3323 
3324         let vfio_user_pci_device = Arc::new(Mutex::new(vfio_user_pci_device));
3325 
3326         let vfio_user_name = if let Some(id) = &device_cfg.id {
3327             if self.device_tree.lock().unwrap().contains_key(id) {
3328                 return Err(DeviceManagerError::DeviceIdAlreadyInUse);
3329             }
3330 
3331             id.clone()
3332         } else {
3333             let id = self.next_device_name(VFIO_USER_DEVICE_NAME_PREFIX)?;
3334             device_cfg.id = Some(id.clone());
3335             id
3336         };
3337 
3338         self.add_pci_device(
3339             vfio_user_pci_device.clone(),
3340             vfio_user_pci_device.clone(),
3341             pci_segment_id,
3342             pci_device_bdf,
3343         )?;
3344 
3345         let mut node = device_node!(vfio_user_name);
3346 
3347         node.pci_bdf = Some(pci_device_bdf);
3348         node.pci_device_handle = Some(PciDeviceHandle::VfioUser(vfio_user_pci_device));
3349 
3350         self.device_tree
3351             .lock()
3352             .unwrap()
3353             .insert(vfio_user_name.clone(), node);
3354 
3355         Ok((pci_device_bdf, vfio_user_name))
3356     }
3357 
3358     fn add_user_devices(&mut self) -> DeviceManagerResult<Vec<PciBdf>> {
3359         let mut user_devices = self.config.lock().unwrap().user_devices.clone();
3360 
3361         if let Some(device_list_cfg) = &mut user_devices {
3362             for device_cfg in device_list_cfg.iter_mut() {
3363                 let (_device_id, _id) = self.add_vfio_user_device(device_cfg)?;
3364             }
3365         }
3366 
3367         // Update the list of devices
3368         self.config.lock().unwrap().user_devices = user_devices;
3369 
3370         Ok(vec![])
3371     }
3372 
3373     fn add_virtio_pci_device(
3374         &mut self,
3375         virtio_device: Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
3376         iommu_mapping: &Option<Arc<IommuMapping>>,
3377         virtio_device_id: String,
3378         pci_segment_id: u16,
3379         dma_handler: Option<Arc<dyn ExternalDmaMapping>>,
3380     ) -> DeviceManagerResult<PciBdf> {
3381         let id = format!("{}-{}", VIRTIO_PCI_DEVICE_NAME_PREFIX, virtio_device_id);
3382 
3383         // Add the new virtio-pci node to the device tree.
3384         let mut node = device_node!(id);
3385         node.children = vec![virtio_device_id.clone()];
3386 
3387         // Look for the id in the device tree. If it can be found, that means
3388         // the device is being restored, otherwise it's created from scratch.
3389         let (pci_segment_id, pci_device_bdf, config_bar_addr) = if let Some(node) =
3390             self.device_tree.lock().unwrap().get(&id)
3391         {
3392             info!("Restoring virtio-pci {} resources", id);
3393             let pci_device_bdf: PciBdf = node
3394                 .pci_bdf
3395                 .ok_or(DeviceManagerError::MissingDeviceNodePciBdf)?;
3396             let pci_segment_id = pci_device_bdf.segment();
3397 
3398             self.pci_segments[pci_segment_id as usize]
3399                 .pci_bus
3400                 .lock()
3401                 .unwrap()
3402                 .get_device_id(pci_device_bdf.device() as usize)
3403                 .map_err(DeviceManagerError::GetPciDeviceId)?;
3404 
3405             if node.resources.is_empty() {
3406                 return Err(DeviceManagerError::MissingVirtioPciResources);
3407             }
3408 
3409             // We know the configuration BAR address is stored on the first
3410             // resource in the list.
3411             let config_bar_addr = match node.resources[0] {
3412                 Resource::MmioAddressRange { base, .. } => Some(base),
3413                 _ => {
3414                     error!("Unexpected resource {:?} for {}", node.resources[0], id);
3415                     return Err(DeviceManagerError::MissingVirtioPciResources);
3416                 }
3417             };
3418 
3419             (pci_segment_id, pci_device_bdf, config_bar_addr)
3420         } else {
3421             let pci_device_bdf = self.pci_segments[pci_segment_id as usize].next_device_bdf()?;
3422 
3423             (pci_segment_id, pci_device_bdf, None)
3424         };
3425 
3426         // Update the existing virtio node by setting the parent.
3427         if let Some(node) = self.device_tree.lock().unwrap().get_mut(&virtio_device_id) {
3428             node.parent = Some(id.clone());
3429         } else {
3430             return Err(DeviceManagerError::MissingNode);
3431         }
3432 
3433         // Allows support for one MSI-X vector per queue. It also adds 1
3434         // as we need to take into account the dedicated vector to notify
3435         // about a virtio config change.
3436         let msix_num = (virtio_device.lock().unwrap().queue_max_sizes().len() + 1) as u16;
3437 
3438         // Create the AccessPlatform trait from the implementation IommuMapping.
3439         // This will provide address translation for any virtio device sitting
3440         // behind a vIOMMU.
3441         let access_platform: Option<Arc<dyn AccessPlatform>> = if let Some(mapping) = iommu_mapping
3442         {
3443             Some(Arc::new(AccessPlatformMapping::new(
3444                 pci_device_bdf.into(),
3445                 mapping.clone(),
3446             )))
3447         } else {
3448             None
3449         };
3450 
3451         let memory = self.memory_manager.lock().unwrap().guest_memory();
3452 
3453         // Map DMA ranges if a DMA handler is available and if the device is
3454         // not attached to a virtual IOMMU.
3455         if let Some(dma_handler) = &dma_handler {
3456             if iommu_mapping.is_some() {
3457                 if let Some(iommu) = &self.iommu_device {
3458                     iommu
3459                         .lock()
3460                         .unwrap()
3461                         .add_external_mapping(pci_device_bdf.into(), dma_handler.clone());
3462                 } else {
3463                     return Err(DeviceManagerError::MissingVirtualIommu);
3464                 }
3465             } else {
3466                 // Let every virtio-mem device handle the DMA map/unmap through the
3467                 // DMA handler provided.
3468                 for virtio_mem_device in self.virtio_mem_devices.iter() {
3469                     virtio_mem_device
3470                         .lock()
3471                         .unwrap()
3472                         .add_dma_mapping_handler(
3473                             VirtioMemMappingSource::Device(pci_device_bdf.into()),
3474                             dma_handler.clone(),
3475                         )
3476                         .map_err(DeviceManagerError::AddDmaMappingHandlerVirtioMem)?;
3477                 }
3478 
3479                 // Do not register virtio-mem regions, as they are handled directly by
3480                 // virtio-mem devices.
3481                 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() {
3482                     for region in zone.regions() {
3483                         let gpa = region.start_addr().0;
3484                         let size = region.len();
3485                         dma_handler
3486                             .map(gpa, gpa, size)
3487                             .map_err(DeviceManagerError::VirtioDmaMap)?;
3488                     }
3489                 }
3490             }
3491         }
3492 
3493         let device_type = virtio_device.lock().unwrap().device_type();
3494         let mut virtio_pci_device = VirtioPciDevice::new(
3495             id.clone(),
3496             memory,
3497             virtio_device,
3498             msix_num,
3499             access_platform,
3500             &self.msi_interrupt_manager,
3501             pci_device_bdf.into(),
3502             self.activate_evt
3503                 .try_clone()
3504                 .map_err(DeviceManagerError::EventFd)?,
3505             // All device types *except* virtio block devices should be allocated a 64-bit bar
3506             // The block devices should be given a 32-bit BAR so that they are easily accessible
3507             // to firmware without requiring excessive identity mapping.
3508             // The exception being if not on the default PCI segment.
3509             pci_segment_id > 0 || device_type != VirtioDeviceType::Block as u32,
3510             dma_handler,
3511         )
3512         .map_err(DeviceManagerError::VirtioDevice)?;
3513 
3514         // This is important as this will set the BAR address if it exists,
3515         // which is mandatory on the restore path.
3516         if let Some(addr) = config_bar_addr {
3517             virtio_pci_device.set_config_bar_addr(addr);
3518         }
3519 
3520         let virtio_pci_device = Arc::new(Mutex::new(virtio_pci_device));
3521         let bars = self.add_pci_device(
3522             virtio_pci_device.clone(),
3523             virtio_pci_device.clone(),
3524             pci_segment_id,
3525             pci_device_bdf,
3526         )?;
3527 
3528         let bar_addr = virtio_pci_device.lock().unwrap().config_bar_addr();
3529         for (event, addr) in virtio_pci_device.lock().unwrap().ioeventfds(bar_addr) {
3530             let io_addr = IoEventAddress::Mmio(addr);
3531             self.address_manager
3532                 .vm
3533                 .register_ioevent(event, &io_addr, None)
3534                 .map_err(|e| DeviceManagerError::RegisterIoevent(e.into()))?;
3535         }
3536 
3537         // Update the device tree with correct resource information.
3538         for pci_bar in bars.iter() {
3539             node.resources.push(Resource::MmioAddressRange {
3540                 base: pci_bar.0.raw_value(),
3541                 size: pci_bar.1 as u64,
3542             });
3543         }
3544         node.migratable = Some(Arc::clone(&virtio_pci_device) as Arc<Mutex<dyn Migratable>>);
3545         node.pci_bdf = Some(pci_device_bdf);
3546         node.pci_device_handle = Some(PciDeviceHandle::Virtio(virtio_pci_device));
3547         self.device_tree.lock().unwrap().insert(id, node);
3548 
3549         Ok(pci_device_bdf)
3550     }
3551 
3552     #[cfg(target_arch = "x86_64")]
3553     pub fn io_bus(&self) -> &Arc<Bus> {
3554         &self.address_manager.io_bus
3555     }
3556 
3557     pub fn mmio_bus(&self) -> &Arc<Bus> {
3558         &self.address_manager.mmio_bus
3559     }
3560 
3561     pub fn allocator(&self) -> &Arc<Mutex<SystemAllocator>> {
3562         &self.address_manager.allocator
3563     }
3564 
3565     pub fn interrupt_controller(&self) -> Option<Arc<Mutex<dyn InterruptController>>> {
3566         self.interrupt_controller
3567             .as_ref()
3568             .map(|ic| ic.clone() as Arc<Mutex<dyn InterruptController>>)
3569     }
3570 
3571     #[cfg(target_arch = "x86_64")]
3572     // Used to provide a fast path for handling PIO exits
3573     pub fn pci_config_io(&self) -> Arc<Mutex<PciConfigIo>> {
3574         Arc::clone(self.pci_segments[0].pci_config_io.as_ref().unwrap())
3575     }
3576 
3577     pub(crate) fn pci_segments(&self) -> &Vec<PciSegment> {
3578         &self.pci_segments
3579     }
3580 
3581     pub fn console(&self) -> &Arc<Console> {
3582         &self.console
3583     }
3584 
3585     pub fn cmdline_additions(&self) -> &[String] {
3586         self.cmdline_additions.as_slice()
3587     }
3588 
3589     pub fn update_memory(&self, new_region: &Arc<GuestRegionMmap>) -> DeviceManagerResult<()> {
3590         for handle in self.virtio_devices.iter() {
3591             handle
3592                 .virtio_device
3593                 .lock()
3594                 .unwrap()
3595                 .add_memory_region(new_region)
3596                 .map_err(DeviceManagerError::UpdateMemoryForVirtioDevice)?;
3597 
3598             if let Some(dma_handler) = &handle.dma_handler {
3599                 if !handle.iommu {
3600                     let gpa = new_region.start_addr().0;
3601                     let size = new_region.len();
3602                     dma_handler
3603                         .map(gpa, gpa, size)
3604                         .map_err(DeviceManagerError::VirtioDmaMap)?;
3605                 }
3606             }
3607         }
3608 
3609         // Take care of updating the memory for VFIO PCI devices.
3610         if let Some(vfio_container) = &self.vfio_container {
3611             vfio_container
3612                 .vfio_dma_map(
3613                     new_region.start_addr().raw_value(),
3614                     new_region.len() as u64,
3615                     new_region.as_ptr() as u64,
3616                 )
3617                 .map_err(DeviceManagerError::UpdateMemoryForVfioPciDevice)?;
3618         }
3619 
3620         // Take care of updating the memory for vfio-user devices.
3621         {
3622             let device_tree = self.device_tree.lock().unwrap();
3623             for pci_device_node in device_tree.pci_devices() {
3624                 if let PciDeviceHandle::VfioUser(vfio_user_pci_device) = pci_device_node
3625                     .pci_device_handle
3626                     .as_ref()
3627                     .ok_or(DeviceManagerError::MissingPciDevice)?
3628                 {
3629                     vfio_user_pci_device
3630                         .lock()
3631                         .unwrap()
3632                         .dma_map(new_region)
3633                         .map_err(DeviceManagerError::UpdateMemoryForVfioUserPciDevice)?;
3634                 }
3635             }
3636         }
3637 
3638         Ok(())
3639     }
3640 
3641     pub fn activate_virtio_devices(&self) -> DeviceManagerResult<()> {
3642         // Find virtio pci devices and activate any pending ones
3643         let device_tree = self.device_tree.lock().unwrap();
3644         for pci_device_node in device_tree.pci_devices() {
3645             #[allow(irrefutable_let_patterns)]
3646             if let PciDeviceHandle::Virtio(virtio_pci_device) = &pci_device_node
3647                 .pci_device_handle
3648                 .as_ref()
3649                 .ok_or(DeviceManagerError::MissingPciDevice)?
3650             {
3651                 virtio_pci_device.lock().unwrap().maybe_activate();
3652             }
3653         }
3654         Ok(())
3655     }
3656 
3657     pub fn notify_hotplug(
3658         &self,
3659         _notification_type: AcpiNotificationFlags,
3660     ) -> DeviceManagerResult<()> {
3661         return self
3662             .ged_notification_device
3663             .as_ref()
3664             .unwrap()
3665             .lock()
3666             .unwrap()
3667             .notify(_notification_type)
3668             .map_err(DeviceManagerError::HotPlugNotification);
3669     }
3670 
3671     pub fn add_device(
3672         &mut self,
3673         device_cfg: &mut DeviceConfig,
3674     ) -> DeviceManagerResult<PciDeviceInfo> {
3675         if device_cfg.iommu && !self.is_iommu_segment(device_cfg.pci_segment) {
3676             return Err(DeviceManagerError::InvalidIommuHotplug);
3677         }
3678 
3679         let (bdf, device_name) = self.add_passthrough_device(device_cfg)?;
3680 
3681         // Update the PCIU bitmap
3682         self.pci_segments[device_cfg.pci_segment as usize].pci_devices_up |= 1 << bdf.device();
3683 
3684         Ok(PciDeviceInfo {
3685             id: device_name,
3686             bdf,
3687         })
3688     }
3689 
3690     pub fn add_user_device(
3691         &mut self,
3692         device_cfg: &mut UserDeviceConfig,
3693     ) -> DeviceManagerResult<PciDeviceInfo> {
3694         let (bdf, device_name) = self.add_vfio_user_device(device_cfg)?;
3695 
3696         // Update the PCIU bitmap
3697         self.pci_segments[device_cfg.pci_segment as usize].pci_devices_up |= 1 << bdf.device();
3698 
3699         Ok(PciDeviceInfo {
3700             id: device_name,
3701             bdf,
3702         })
3703     }
3704 
3705     pub fn remove_device(&mut self, id: String) -> DeviceManagerResult<()> {
3706         // The node can be directly a PCI node in case the 'id' refers to a
3707         // VFIO device or a virtio-pci one.
3708         // In case the 'id' refers to a virtio device, we must find the PCI
3709         // node by looking at the parent.
3710         let device_tree = self.device_tree.lock().unwrap();
3711         let node = device_tree
3712             .get(&id)
3713             .ok_or(DeviceManagerError::UnknownDeviceId(id))?;
3714 
3715         let pci_device_node = if node.pci_bdf.is_some() && node.pci_device_handle.is_some() {
3716             node
3717         } else {
3718             let parent = node
3719                 .parent
3720                 .as_ref()
3721                 .ok_or(DeviceManagerError::MissingNode)?;
3722             device_tree
3723                 .get(parent)
3724                 .ok_or(DeviceManagerError::MissingNode)?
3725         };
3726 
3727         let pci_device_bdf: PciBdf = pci_device_node
3728             .pci_bdf
3729             .ok_or(DeviceManagerError::MissingDeviceNodePciBdf)?;
3730         let pci_segment_id = pci_device_bdf.segment();
3731 
3732         let pci_device_handle = pci_device_node
3733             .pci_device_handle
3734             .as_ref()
3735             .ok_or(DeviceManagerError::MissingPciDevice)?;
3736         #[allow(irrefutable_let_patterns)]
3737         if let PciDeviceHandle::Virtio(virtio_pci_device) = pci_device_handle {
3738             let device_type = VirtioDeviceType::from(
3739                 virtio_pci_device
3740                     .lock()
3741                     .unwrap()
3742                     .virtio_device()
3743                     .lock()
3744                     .unwrap()
3745                     .device_type(),
3746             );
3747             match device_type {
3748                 VirtioDeviceType::Net
3749                 | VirtioDeviceType::Block
3750                 | VirtioDeviceType::Pmem
3751                 | VirtioDeviceType::Fs
3752                 | VirtioDeviceType::Vsock => {}
3753                 _ => return Err(DeviceManagerError::RemovalNotAllowed(device_type)),
3754             }
3755         }
3756 
3757         // Update the PCID bitmap
3758         self.pci_segments[pci_segment_id as usize].pci_devices_down |= 1 << pci_device_bdf.device();
3759 
3760         Ok(())
3761     }
3762 
3763     pub fn eject_device(&mut self, pci_segment_id: u16, device_id: u8) -> DeviceManagerResult<()> {
3764         info!(
3765             "Ejecting device_id = {} on segment_id={}",
3766             device_id, pci_segment_id
3767         );
3768 
3769         // Convert the device ID into the corresponding b/d/f.
3770         let pci_device_bdf = PciBdf::new(pci_segment_id, 0, device_id, 0);
3771 
3772         // Give the PCI device ID back to the PCI bus.
3773         self.pci_segments[pci_segment_id as usize]
3774             .pci_bus
3775             .lock()
3776             .unwrap()
3777             .put_device_id(device_id as usize)
3778             .map_err(DeviceManagerError::PutPciDeviceId)?;
3779 
3780         // Remove the device from the device tree along with its children.
3781         let mut device_tree = self.device_tree.lock().unwrap();
3782         let pci_device_node = device_tree
3783             .remove_node_by_pci_bdf(pci_device_bdf)
3784             .ok_or(DeviceManagerError::MissingPciDevice)?;
3785         for child in pci_device_node.children.iter() {
3786             device_tree.remove(child);
3787         }
3788 
3789         let mut iommu_attached = false;
3790         if let Some((_, iommu_attached_devices)) = &self.iommu_attached_devices {
3791             if iommu_attached_devices.contains(&pci_device_bdf) {
3792                 iommu_attached = true;
3793             }
3794         }
3795 
3796         let pci_device_handle = pci_device_node
3797             .pci_device_handle
3798             .ok_or(DeviceManagerError::MissingPciDevice)?;
3799         let (pci_device, bus_device, virtio_device, remove_dma_handler) = match pci_device_handle {
3800             // No need to remove any virtio-mem mapping here as the container outlives all devices
3801             PciDeviceHandle::Vfio(vfio_pci_device) => (
3802                 Arc::clone(&vfio_pci_device) as Arc<Mutex<dyn PciDevice>>,
3803                 Arc::clone(&vfio_pci_device) as Arc<Mutex<dyn BusDevice>>,
3804                 None as Option<Arc<Mutex<dyn virtio_devices::VirtioDevice>>>,
3805                 false,
3806             ),
3807             PciDeviceHandle::Virtio(virtio_pci_device) => {
3808                 let dev = virtio_pci_device.lock().unwrap();
3809                 let bar_addr = dev.config_bar_addr();
3810                 for (event, addr) in dev.ioeventfds(bar_addr) {
3811                     let io_addr = IoEventAddress::Mmio(addr);
3812                     self.address_manager
3813                         .vm
3814                         .unregister_ioevent(event, &io_addr)
3815                         .map_err(|e| DeviceManagerError::UnRegisterIoevent(e.into()))?;
3816                 }
3817 
3818                 if let Some(dma_handler) = dev.dma_handler() {
3819                     if !iommu_attached {
3820                         for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() {
3821                             for region in zone.regions() {
3822                                 let iova = region.start_addr().0;
3823                                 let size = region.len();
3824                                 dma_handler
3825                                     .unmap(iova, size)
3826                                     .map_err(DeviceManagerError::VirtioDmaUnmap)?;
3827                             }
3828                         }
3829                     }
3830                 }
3831 
3832                 (
3833                     Arc::clone(&virtio_pci_device) as Arc<Mutex<dyn PciDevice>>,
3834                     Arc::clone(&virtio_pci_device) as Arc<Mutex<dyn BusDevice>>,
3835                     Some(dev.virtio_device()),
3836                     dev.dma_handler().is_some() && !iommu_attached,
3837                 )
3838             }
3839             PciDeviceHandle::VfioUser(vfio_user_pci_device) => {
3840                 let mut dev = vfio_user_pci_device.lock().unwrap();
3841                 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() {
3842                     for region in zone.regions() {
3843                         dev.dma_unmap(region)
3844                             .map_err(DeviceManagerError::VfioUserDmaUnmap)?;
3845                     }
3846                 }
3847 
3848                 (
3849                     Arc::clone(&vfio_user_pci_device) as Arc<Mutex<dyn PciDevice>>,
3850                     Arc::clone(&vfio_user_pci_device) as Arc<Mutex<dyn BusDevice>>,
3851                     None as Option<Arc<Mutex<dyn virtio_devices::VirtioDevice>>>,
3852                     true,
3853                 )
3854             }
3855         };
3856 
3857         if remove_dma_handler {
3858             for virtio_mem_device in self.virtio_mem_devices.iter() {
3859                 virtio_mem_device
3860                     .lock()
3861                     .unwrap()
3862                     .remove_dma_mapping_handler(VirtioMemMappingSource::Device(
3863                         pci_device_bdf.into(),
3864                     ))
3865                     .map_err(DeviceManagerError::RemoveDmaMappingHandlerVirtioMem)?;
3866             }
3867         }
3868 
3869         // Free the allocated BARs
3870         pci_device
3871             .lock()
3872             .unwrap()
3873             .free_bars(
3874                 &mut self.address_manager.allocator.lock().unwrap(),
3875                 &mut self.pci_segments[pci_segment_id as usize]
3876                     .allocator
3877                     .lock()
3878                     .unwrap(),
3879             )
3880             .map_err(DeviceManagerError::FreePciBars)?;
3881 
3882         // Remove the device from the PCI bus
3883         self.pci_segments[pci_segment_id as usize]
3884             .pci_bus
3885             .lock()
3886             .unwrap()
3887             .remove_by_device(&pci_device)
3888             .map_err(DeviceManagerError::RemoveDeviceFromPciBus)?;
3889 
3890         #[cfg(target_arch = "x86_64")]
3891         // Remove the device from the IO bus
3892         self.io_bus()
3893             .remove_by_device(&bus_device)
3894             .map_err(DeviceManagerError::RemoveDeviceFromIoBus)?;
3895 
3896         // Remove the device from the MMIO bus
3897         self.mmio_bus()
3898             .remove_by_device(&bus_device)
3899             .map_err(DeviceManagerError::RemoveDeviceFromMmioBus)?;
3900 
3901         // Remove the device from the list of BusDevice held by the
3902         // DeviceManager.
3903         self.bus_devices
3904             .retain(|dev| !Arc::ptr_eq(dev, &bus_device));
3905 
3906         // Shutdown and remove the underlying virtio-device if present
3907         if let Some(virtio_device) = virtio_device {
3908             for mapping in virtio_device.lock().unwrap().userspace_mappings() {
3909                 self.memory_manager
3910                     .lock()
3911                     .unwrap()
3912                     .remove_userspace_mapping(
3913                         mapping.addr.raw_value(),
3914                         mapping.len,
3915                         mapping.host_addr,
3916                         mapping.mergeable,
3917                         mapping.mem_slot,
3918                     )
3919                     .map_err(DeviceManagerError::MemoryManager)?;
3920             }
3921 
3922             virtio_device.lock().unwrap().shutdown();
3923 
3924             self.virtio_devices
3925                 .retain(|handler| !Arc::ptr_eq(&handler.virtio_device, &virtio_device));
3926         }
3927 
3928         // At this point, the device has been removed from all the list and
3929         // buses where it was stored. At the end of this function, after
3930         // any_device, bus_device and pci_device are released, the actual
3931         // device will be dropped.
3932         Ok(())
3933     }
3934 
3935     fn hotplug_virtio_pci_device(
3936         &mut self,
3937         handle: MetaVirtioDevice,
3938     ) -> DeviceManagerResult<PciDeviceInfo> {
3939         // Add the virtio device to the device manager list. This is important
3940         // as the list is used to notify virtio devices about memory updates
3941         // for instance.
3942         self.virtio_devices.push(handle.clone());
3943 
3944         let mapping: Option<Arc<IommuMapping>> = if handle.iommu {
3945             self.iommu_mapping.clone()
3946         } else {
3947             None
3948         };
3949 
3950         let bdf = self.add_virtio_pci_device(
3951             handle.virtio_device,
3952             &mapping,
3953             handle.id.clone(),
3954             handle.pci_segment,
3955             handle.dma_handler,
3956         )?;
3957 
3958         // Update the PCIU bitmap
3959         self.pci_segments[handle.pci_segment as usize].pci_devices_up |= 1 << bdf.device();
3960 
3961         Ok(PciDeviceInfo { id: handle.id, bdf })
3962     }
3963 
3964     fn is_iommu_segment(&self, pci_segment_id: u16) -> bool {
3965         self.config
3966             .lock()
3967             .as_ref()
3968             .unwrap()
3969             .platform
3970             .as_ref()
3971             .map(|pc| {
3972                 pc.iommu_segments
3973                     .as_ref()
3974                     .map(|v| v.contains(&pci_segment_id))
3975                     .unwrap_or_default()
3976             })
3977             .unwrap_or_default()
3978     }
3979 
3980     pub fn add_disk(&mut self, disk_cfg: &mut DiskConfig) -> DeviceManagerResult<PciDeviceInfo> {
3981         if disk_cfg.iommu && !self.is_iommu_segment(disk_cfg.pci_segment) {
3982             return Err(DeviceManagerError::InvalidIommuHotplug);
3983         }
3984 
3985         let device = self.make_virtio_block_device(disk_cfg)?;
3986         self.hotplug_virtio_pci_device(device)
3987     }
3988 
3989     pub fn add_fs(&mut self, fs_cfg: &mut FsConfig) -> DeviceManagerResult<PciDeviceInfo> {
3990         let device = self.make_virtio_fs_device(fs_cfg)?;
3991         self.hotplug_virtio_pci_device(device)
3992     }
3993 
3994     pub fn add_pmem(&mut self, pmem_cfg: &mut PmemConfig) -> DeviceManagerResult<PciDeviceInfo> {
3995         if pmem_cfg.iommu && !self.is_iommu_segment(pmem_cfg.pci_segment) {
3996             return Err(DeviceManagerError::InvalidIommuHotplug);
3997         }
3998 
3999         let device = self.make_virtio_pmem_device(pmem_cfg)?;
4000         self.hotplug_virtio_pci_device(device)
4001     }
4002 
4003     pub fn add_net(&mut self, net_cfg: &mut NetConfig) -> DeviceManagerResult<PciDeviceInfo> {
4004         if net_cfg.iommu && !self.is_iommu_segment(net_cfg.pci_segment) {
4005             return Err(DeviceManagerError::InvalidIommuHotplug);
4006         }
4007 
4008         let device = self.make_virtio_net_device(net_cfg)?;
4009         self.hotplug_virtio_pci_device(device)
4010     }
4011 
4012     pub fn add_vdpa(&mut self, vdpa_cfg: &mut VdpaConfig) -> DeviceManagerResult<PciDeviceInfo> {
4013         if vdpa_cfg.iommu && !self.is_iommu_segment(vdpa_cfg.pci_segment) {
4014             return Err(DeviceManagerError::InvalidIommuHotplug);
4015         }
4016 
4017         let device = self.make_vdpa_device(vdpa_cfg)?;
4018         self.hotplug_virtio_pci_device(device)
4019     }
4020 
4021     pub fn add_vsock(&mut self, vsock_cfg: &mut VsockConfig) -> DeviceManagerResult<PciDeviceInfo> {
4022         if vsock_cfg.iommu && !self.is_iommu_segment(vsock_cfg.pci_segment) {
4023             return Err(DeviceManagerError::InvalidIommuHotplug);
4024         }
4025 
4026         let device = self.make_virtio_vsock_device(vsock_cfg)?;
4027         self.hotplug_virtio_pci_device(device)
4028     }
4029 
4030     pub fn counters(&self) -> HashMap<String, HashMap<&'static str, Wrapping<u64>>> {
4031         let mut counters = HashMap::new();
4032 
4033         for handle in &self.virtio_devices {
4034             let virtio_device = handle.virtio_device.lock().unwrap();
4035             if let Some(device_counters) = virtio_device.counters() {
4036                 counters.insert(handle.id.clone(), device_counters.clone());
4037             }
4038         }
4039 
4040         counters
4041     }
4042 
4043     pub fn resize_balloon(&mut self, size: u64) -> DeviceManagerResult<()> {
4044         if let Some(balloon) = &self.balloon {
4045             return balloon
4046                 .lock()
4047                 .unwrap()
4048                 .resize(size)
4049                 .map_err(DeviceManagerError::VirtioBalloonResize);
4050         }
4051 
4052         warn!("No balloon setup: Can't resize the balloon");
4053         Err(DeviceManagerError::MissingVirtioBalloon)
4054     }
4055 
4056     pub fn balloon_size(&self) -> u64 {
4057         if let Some(balloon) = &self.balloon {
4058             return balloon.lock().unwrap().get_actual();
4059         }
4060 
4061         0
4062     }
4063 
4064     pub fn device_tree(&self) -> Arc<Mutex<DeviceTree>> {
4065         self.device_tree.clone()
4066     }
4067 
4068     pub fn restore_devices(
4069         &mut self,
4070         snapshot: Snapshot,
4071     ) -> std::result::Result<(), MigratableError> {
4072         // Finally, restore all devices associated with the DeviceManager.
4073         // It's important to restore devices in the right order, that's why
4074         // the device tree is the right way to ensure we restore a child before
4075         // its parent node.
4076         for node in self
4077             .device_tree
4078             .lock()
4079             .unwrap()
4080             .breadth_first_traversal()
4081             .rev()
4082         {
4083             // Restore the node
4084             if let Some(migratable) = &node.migratable {
4085                 info!("Restoring {} from DeviceManager", node.id);
4086                 if let Some(snapshot) = snapshot.snapshots.get(&node.id) {
4087                     migratable.lock().unwrap().pause()?;
4088                     migratable.lock().unwrap().restore(*snapshot.clone())?;
4089                 } else {
4090                     return Err(MigratableError::Restore(anyhow!(
4091                         "Missing device {}",
4092                         node.id
4093                     )));
4094                 }
4095             }
4096         }
4097 
4098         // The devices have been fully restored, we can now update the
4099         // restoring state of the DeviceManager.
4100         self.restoring = false;
4101 
4102         Ok(())
4103     }
4104 
4105     #[cfg(target_arch = "x86_64")]
4106     pub fn notify_power_button(&self) -> DeviceManagerResult<()> {
4107         self.ged_notification_device
4108             .as_ref()
4109             .unwrap()
4110             .lock()
4111             .unwrap()
4112             .notify(AcpiNotificationFlags::POWER_BUTTON_CHANGED)
4113             .map_err(DeviceManagerError::PowerButtonNotification)
4114     }
4115 
4116     #[cfg(target_arch = "aarch64")]
4117     pub fn notify_power_button(&self) -> DeviceManagerResult<()> {
4118         // There are two use cases:
4119         // 1. Users will use direct kernel boot with device tree.
4120         // 2. Users will use ACPI+UEFI boot.
4121 
4122         // Trigger a GPIO pin 3 event to satisify use case 1.
4123         self.gpio_device
4124             .as_ref()
4125             .unwrap()
4126             .lock()
4127             .unwrap()
4128             .trigger_key(3)
4129             .map_err(DeviceManagerError::AArch64PowerButtonNotification)?;
4130         // Trigger a GED power button event to satisify use case 2.
4131         return self
4132             .ged_notification_device
4133             .as_ref()
4134             .unwrap()
4135             .lock()
4136             .unwrap()
4137             .notify(AcpiNotificationFlags::POWER_BUTTON_CHANGED)
4138             .map_err(DeviceManagerError::PowerButtonNotification);
4139     }
4140 
4141     pub fn iommu_attached_devices(&self) -> &Option<(PciBdf, Vec<PciBdf>)> {
4142         &self.iommu_attached_devices
4143     }
4144 }
4145 
4146 fn numa_node_id_from_memory_zone_id(numa_nodes: &NumaNodes, memory_zone_id: &str) -> Option<u32> {
4147     for (numa_node_id, numa_node) in numa_nodes.iter() {
4148         if numa_node.memory_zones.contains(&memory_zone_id.to_owned()) {
4149             return Some(*numa_node_id);
4150         }
4151     }
4152 
4153     None
4154 }
4155 
4156 impl Aml for DeviceManager {
4157     fn append_aml_bytes(&self, bytes: &mut Vec<u8>) {
4158         #[cfg(target_arch = "aarch64")]
4159         use arch::aarch64::DeviceInfoForFdt;
4160 
4161         let mut pci_scan_methods = Vec::new();
4162         for i in 0..self.pci_segments.len() {
4163             pci_scan_methods.push(aml::MethodCall::new(
4164                 format!("\\_SB_.PCI{:X}.PCNT", i).as_str().into(),
4165                 vec![],
4166             ));
4167         }
4168         let mut pci_scan_inner: Vec<&dyn Aml> = Vec::new();
4169         for method in &pci_scan_methods {
4170             pci_scan_inner.push(method)
4171         }
4172 
4173         // PCI hotplug controller
4174         aml::Device::new(
4175             "_SB_.PHPR".into(),
4176             vec![
4177                 &aml::Name::new("_HID".into(), &aml::EisaName::new("PNP0A06")),
4178                 &aml::Name::new("_STA".into(), &0x0bu8),
4179                 &aml::Name::new("_UID".into(), &"PCI Hotplug Controller"),
4180                 &aml::Mutex::new("BLCK".into(), 0),
4181                 &aml::Name::new(
4182                     "_CRS".into(),
4183                     &aml::ResourceTemplate::new(vec![&aml::AddressSpace::new_memory(
4184                         aml::AddressSpaceCachable::NotCacheable,
4185                         true,
4186                         self.acpi_address.0 as u64,
4187                         self.acpi_address.0 + DEVICE_MANAGER_ACPI_SIZE as u64 - 1,
4188                     )]),
4189                 ),
4190                 // OpRegion and Fields map MMIO range into individual field values
4191                 &aml::OpRegion::new(
4192                     "PCST".into(),
4193                     aml::OpRegionSpace::SystemMemory,
4194                     self.acpi_address.0 as usize,
4195                     DEVICE_MANAGER_ACPI_SIZE,
4196                 ),
4197                 &aml::Field::new(
4198                     "PCST".into(),
4199                     aml::FieldAccessType::DWord,
4200                     aml::FieldUpdateRule::WriteAsZeroes,
4201                     vec![
4202                         aml::FieldEntry::Named(*b"PCIU", 32),
4203                         aml::FieldEntry::Named(*b"PCID", 32),
4204                         aml::FieldEntry::Named(*b"B0EJ", 32),
4205                         aml::FieldEntry::Named(*b"PSEG", 32),
4206                     ],
4207                 ),
4208                 &aml::Method::new(
4209                     "PCEJ".into(),
4210                     2,
4211                     true,
4212                     vec![
4213                         // Take lock defined above
4214                         &aml::Acquire::new("BLCK".into(), 0xffff),
4215                         // Choose the current segment
4216                         &aml::Store::new(&aml::Path::new("PSEG"), &aml::Arg(1)),
4217                         // Write PCI bus number (in first argument) to I/O port via field
4218                         &aml::ShiftLeft::new(&aml::Path::new("B0EJ"), &aml::ONE, &aml::Arg(0)),
4219                         // Release lock
4220                         &aml::Release::new("BLCK".into()),
4221                         // Return 0
4222                         &aml::Return::new(&aml::ZERO),
4223                     ],
4224                 ),
4225                 &aml::Method::new("PSCN".into(), 0, true, pci_scan_inner),
4226             ],
4227         )
4228         .append_aml_bytes(bytes);
4229 
4230         for segment in &self.pci_segments {
4231             segment.append_aml_bytes(bytes);
4232         }
4233 
4234         let mut mbrd_memory = Vec::new();
4235 
4236         for segment in &self.pci_segments {
4237             mbrd_memory.push(aml::Memory32Fixed::new(
4238                 true,
4239                 segment.mmio_config_address as u32,
4240                 layout::PCI_MMIO_CONFIG_SIZE_PER_SEGMENT as u32,
4241             ))
4242         }
4243 
4244         let mut mbrd_memory_refs = Vec::new();
4245         for mbrd_memory_ref in &mbrd_memory {
4246             mbrd_memory_refs.push(mbrd_memory_ref as &dyn Aml);
4247         }
4248 
4249         aml::Device::new(
4250             "_SB_.MBRD".into(),
4251             vec![
4252                 &aml::Name::new("_HID".into(), &aml::EisaName::new("PNP0C02")),
4253                 &aml::Name::new("_UID".into(), &aml::ZERO),
4254                 &aml::Name::new("_CRS".into(), &aml::ResourceTemplate::new(mbrd_memory_refs)),
4255             ],
4256         )
4257         .append_aml_bytes(bytes);
4258 
4259         // Serial device
4260         #[cfg(target_arch = "x86_64")]
4261         let serial_irq = 4;
4262         #[cfg(target_arch = "aarch64")]
4263         let serial_irq =
4264             if self.config.lock().unwrap().serial.clone().mode != ConsoleOutputMode::Off {
4265                 self.get_device_info()
4266                     .clone()
4267                     .get(&(DeviceType::Serial, DeviceType::Serial.to_string()))
4268                     .unwrap()
4269                     .irq()
4270             } else {
4271                 // If serial is turned off, add a fake device with invalid irq.
4272                 31
4273             };
4274         if self.config.lock().unwrap().serial.mode != ConsoleOutputMode::Off {
4275             aml::Device::new(
4276                 "_SB_.COM1".into(),
4277                 vec![
4278                     &aml::Name::new(
4279                         "_HID".into(),
4280                         #[cfg(target_arch = "x86_64")]
4281                         &aml::EisaName::new("PNP0501"),
4282                         #[cfg(target_arch = "aarch64")]
4283                         &"ARMH0011",
4284                     ),
4285                     &aml::Name::new("_UID".into(), &aml::ZERO),
4286                     &aml::Name::new("_DDN".into(), &"COM1"),
4287                     &aml::Name::new(
4288                         "_CRS".into(),
4289                         &aml::ResourceTemplate::new(vec![
4290                             &aml::Interrupt::new(true, true, false, false, serial_irq),
4291                             #[cfg(target_arch = "x86_64")]
4292                             &aml::Io::new(0x3f8, 0x3f8, 0, 0x8),
4293                             #[cfg(target_arch = "aarch64")]
4294                             &aml::Memory32Fixed::new(
4295                                 true,
4296                                 arch::layout::LEGACY_SERIAL_MAPPED_IO_START.raw_value() as u32,
4297                                 MMIO_LEN as u32,
4298                             ),
4299                         ]),
4300                     ),
4301                 ],
4302             )
4303             .append_aml_bytes(bytes);
4304         }
4305 
4306         aml::Name::new("_S5_".into(), &aml::Package::new(vec![&5u8])).append_aml_bytes(bytes);
4307 
4308         aml::Device::new(
4309             "_SB_.PWRB".into(),
4310             vec![
4311                 &aml::Name::new("_HID".into(), &aml::EisaName::new("PNP0C0C")),
4312                 &aml::Name::new("_UID".into(), &aml::ZERO),
4313             ],
4314         )
4315         .append_aml_bytes(bytes);
4316 
4317         self.ged_notification_device
4318             .as_ref()
4319             .unwrap()
4320             .lock()
4321             .unwrap()
4322             .append_aml_bytes(bytes);
4323     }
4324 }
4325 
4326 impl Pausable for DeviceManager {
4327     fn pause(&mut self) -> result::Result<(), MigratableError> {
4328         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
4329             if let Some(migratable) = &device_node.migratable {
4330                 migratable.lock().unwrap().pause()?;
4331             }
4332         }
4333         // On AArch64, the pause of device manager needs to trigger
4334         // a "pause" of GIC, which will flush the GIC pending tables
4335         // and ITS tables to guest RAM.
4336         #[cfg(target_arch = "aarch64")]
4337         {
4338             let gic_device = Arc::clone(
4339                 self.get_interrupt_controller()
4340                     .unwrap()
4341                     .lock()
4342                     .unwrap()
4343                     .get_gic_device()
4344                     .unwrap(),
4345             );
4346             if let Some(gicv3_its) = gic_device
4347                 .lock()
4348                 .unwrap()
4349                 .as_any_concrete_mut()
4350                 .downcast_mut::<KvmGicV3Its>()
4351             {
4352                 gicv3_its.pause()?;
4353             } else {
4354                 return Err(MigratableError::Pause(anyhow!(
4355                     "GicDevice downcast to KvmGicV3Its failed when pausing device manager!"
4356                 )));
4357             };
4358         };
4359 
4360         Ok(())
4361     }
4362 
4363     fn resume(&mut self) -> result::Result<(), MigratableError> {
4364         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
4365             if let Some(migratable) = &device_node.migratable {
4366                 migratable.lock().unwrap().resume()?;
4367             }
4368         }
4369 
4370         Ok(())
4371     }
4372 }
4373 
4374 impl Snapshottable for DeviceManager {
4375     fn id(&self) -> String {
4376         DEVICE_MANAGER_SNAPSHOT_ID.to_string()
4377     }
4378 
4379     fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> {
4380         let mut snapshot = Snapshot::new(DEVICE_MANAGER_SNAPSHOT_ID);
4381 
4382         // We aggregate all devices snapshots.
4383         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
4384             if let Some(migratable) = &device_node.migratable {
4385                 let device_snapshot = migratable.lock().unwrap().snapshot()?;
4386                 snapshot.add_snapshot(device_snapshot);
4387             }
4388         }
4389 
4390         // Then we store the DeviceManager state.
4391         snapshot.add_data_section(SnapshotDataSection::new_from_state(
4392             DEVICE_MANAGER_SNAPSHOT_ID,
4393             &self.state(),
4394         )?);
4395 
4396         Ok(snapshot)
4397     }
4398 
4399     fn restore(&mut self, snapshot: Snapshot) -> std::result::Result<(), MigratableError> {
4400         // Let's first restore the DeviceManager.
4401 
4402         self.set_state(&snapshot.to_state(DEVICE_MANAGER_SNAPSHOT_ID)?);
4403 
4404         // Now that DeviceManager is updated with the right states, it's time
4405         // to create the devices based on the configuration.
4406         self.create_devices(None, None, None)
4407             .map_err(|e| MigratableError::Restore(anyhow!("Could not create devices {:?}", e)))?;
4408 
4409         Ok(())
4410     }
4411 }
4412 
4413 impl Transportable for DeviceManager {}
4414 
4415 impl Migratable for DeviceManager {
4416     fn start_dirty_log(&mut self) -> std::result::Result<(), MigratableError> {
4417         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
4418             if let Some(migratable) = &device_node.migratable {
4419                 migratable.lock().unwrap().start_dirty_log()?;
4420             }
4421         }
4422         Ok(())
4423     }
4424 
4425     fn stop_dirty_log(&mut self) -> std::result::Result<(), MigratableError> {
4426         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
4427             if let Some(migratable) = &device_node.migratable {
4428                 migratable.lock().unwrap().stop_dirty_log()?;
4429             }
4430         }
4431         Ok(())
4432     }
4433 
4434     fn dirty_log(&mut self) -> std::result::Result<MemoryRangeTable, MigratableError> {
4435         let mut tables = Vec::new();
4436         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
4437             if let Some(migratable) = &device_node.migratable {
4438                 tables.push(migratable.lock().unwrap().dirty_log()?);
4439             }
4440         }
4441         Ok(MemoryRangeTable::new_from_tables(tables))
4442     }
4443 
4444     fn start_migration(&mut self) -> std::result::Result<(), MigratableError> {
4445         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
4446             if let Some(migratable) = &device_node.migratable {
4447                 migratable.lock().unwrap().start_migration()?;
4448             }
4449         }
4450         Ok(())
4451     }
4452 
4453     fn complete_migration(&mut self) -> std::result::Result<(), MigratableError> {
4454         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
4455             if let Some(migratable) = &device_node.migratable {
4456                 migratable.lock().unwrap().complete_migration()?;
4457             }
4458         }
4459         Ok(())
4460     }
4461 }
4462 
4463 const PCIU_FIELD_OFFSET: u64 = 0;
4464 const PCID_FIELD_OFFSET: u64 = 4;
4465 const B0EJ_FIELD_OFFSET: u64 = 8;
4466 const PSEG_FIELD_OFFSET: u64 = 12;
4467 const PCIU_FIELD_SIZE: usize = 4;
4468 const PCID_FIELD_SIZE: usize = 4;
4469 const B0EJ_FIELD_SIZE: usize = 4;
4470 const PSEG_FIELD_SIZE: usize = 4;
4471 
4472 impl BusDevice for DeviceManager {
4473     fn read(&mut self, base: u64, offset: u64, data: &mut [u8]) {
4474         match offset {
4475             PCIU_FIELD_OFFSET => {
4476                 assert!(data.len() == PCIU_FIELD_SIZE);
4477                 data.copy_from_slice(
4478                     &self.pci_segments[self.selected_segment]
4479                         .pci_devices_up
4480                         .to_le_bytes(),
4481                 );
4482                 // Clear the PCIU bitmap
4483                 self.pci_segments[self.selected_segment].pci_devices_up = 0;
4484             }
4485             PCID_FIELD_OFFSET => {
4486                 assert!(data.len() == PCID_FIELD_SIZE);
4487                 data.copy_from_slice(
4488                     &self.pci_segments[self.selected_segment]
4489                         .pci_devices_down
4490                         .to_le_bytes(),
4491                 );
4492                 // Clear the PCID bitmap
4493                 self.pci_segments[self.selected_segment].pci_devices_down = 0;
4494             }
4495             B0EJ_FIELD_OFFSET => {
4496                 assert!(data.len() == B0EJ_FIELD_SIZE);
4497                 // Always return an empty bitmap since the eject is always
4498                 // taken care of right away during a write access.
4499                 data.fill(0);
4500             }
4501             PSEG_FIELD_OFFSET => {
4502                 assert_eq!(data.len(), PSEG_FIELD_SIZE);
4503                 data.copy_from_slice(&(self.selected_segment as u32).to_le_bytes());
4504             }
4505             _ => error!(
4506                 "Accessing unknown location at base 0x{:x}, offset 0x{:x}",
4507                 base, offset
4508             ),
4509         }
4510 
4511         debug!(
4512             "PCI_HP_REG_R: base 0x{:x}, offset 0x{:x}, data {:?}",
4513             base, offset, data
4514         )
4515     }
4516 
4517     fn write(&mut self, base: u64, offset: u64, data: &[u8]) -> Option<Arc<std::sync::Barrier>> {
4518         match offset {
4519             B0EJ_FIELD_OFFSET => {
4520                 assert!(data.len() == B0EJ_FIELD_SIZE);
4521                 let mut data_array: [u8; 4] = [0, 0, 0, 0];
4522                 data_array.copy_from_slice(data);
4523                 let mut slot_bitmap = u32::from_le_bytes(data_array);
4524 
4525                 while slot_bitmap > 0 {
4526                     let slot_id = slot_bitmap.trailing_zeros();
4527                     if let Err(e) = self.eject_device(self.selected_segment as u16, slot_id as u8) {
4528                         error!("Failed ejecting device {}: {:?}", slot_id, e);
4529                     }
4530                     slot_bitmap &= !(1 << slot_id);
4531                 }
4532             }
4533             PSEG_FIELD_OFFSET => {
4534                 assert_eq!(data.len(), PSEG_FIELD_SIZE);
4535                 let mut data_array: [u8; 4] = [0, 0, 0, 0];
4536                 data_array.copy_from_slice(data);
4537                 let selected_segment = u32::from_le_bytes(data_array) as usize;
4538                 if selected_segment >= self.pci_segments.len() {
4539                     error!(
4540                         "Segment selection out of range: {} >= {}",
4541                         selected_segment,
4542                         self.pci_segments.len()
4543                     );
4544                     return None;
4545                 }
4546                 self.selected_segment = selected_segment;
4547             }
4548             _ => error!(
4549                 "Accessing unknown location at base 0x{:x}, offset 0x{:x}",
4550                 base, offset
4551             ),
4552         }
4553 
4554         debug!(
4555             "PCI_HP_REG_W: base 0x{:x}, offset 0x{:x}, data {:?}",
4556             base, offset, data
4557         );
4558 
4559         None
4560     }
4561 }
4562 
4563 impl Drop for DeviceManager {
4564     fn drop(&mut self) {
4565         for handle in self.virtio_devices.drain(..) {
4566             handle.virtio_device.lock().unwrap().shutdown();
4567         }
4568     }
4569 }
4570