xref: /cloud-hypervisor/vmm/src/device_manager.rs (revision b686a5bb24f949e3b201308d69b01e85c14f1ad6)
1 // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
2 //
3 // Portions Copyright 2017 The Chromium OS Authors. All rights reserved.
4 // Use of this source code is governed by a BSD-style license that can be
5 // found in the LICENSE-BSD-3-Clause file.
6 //
7 // Copyright © 2019 Intel Corporation
8 //
9 // SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause
10 //
11 
12 use std::collections::{BTreeMap, BTreeSet, HashMap};
13 use std::fs::{File, OpenOptions};
14 use std::io::{self, stdout, IsTerminal, Seek, SeekFrom};
15 use std::num::Wrapping;
16 use std::os::unix::fs::OpenOptionsExt;
17 use std::os::unix::io::{AsRawFd, FromRawFd};
18 use std::path::PathBuf;
19 use std::result;
20 use std::sync::{Arc, Mutex};
21 #[cfg(not(target_arch = "riscv64"))]
22 use std::time::Instant;
23 
24 use acpi_tables::sdt::GenericAddress;
25 #[cfg(not(target_arch = "riscv64"))]
26 use acpi_tables::{aml, Aml};
27 #[cfg(not(target_arch = "riscv64"))]
28 use anyhow::anyhow;
29 #[cfg(target_arch = "x86_64")]
30 use arch::layout::{APIC_START, IOAPIC_SIZE, IOAPIC_START};
31 use arch::{layout, NumaNodes};
32 #[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))]
33 use arch::{DeviceType, MmioDeviceInfo};
34 use block::async_io::DiskFile;
35 use block::fixed_vhd_sync::FixedVhdDiskSync;
36 use block::qcow_sync::QcowDiskSync;
37 use block::raw_async_aio::RawFileDiskAio;
38 use block::raw_sync::RawFileDiskSync;
39 use block::vhdx_sync::VhdxDiskSync;
40 use block::{
41     block_aio_is_supported, block_io_uring_is_supported, detect_image_type, qcow, vhdx, ImageType,
42 };
43 #[cfg(feature = "io_uring")]
44 use block::{fixed_vhd_async::FixedVhdDiskAsync, raw_async::RawFileDisk};
45 #[cfg(target_arch = "riscv64")]
46 use devices::aia;
47 #[cfg(target_arch = "x86_64")]
48 use devices::debug_console;
49 #[cfg(target_arch = "x86_64")]
50 use devices::debug_console::DebugConsole;
51 #[cfg(target_arch = "aarch64")]
52 use devices::gic;
53 use devices::interrupt_controller::InterruptController;
54 #[cfg(target_arch = "x86_64")]
55 use devices::ioapic;
56 #[cfg(target_arch = "aarch64")]
57 use devices::legacy::Pl011;
58 #[cfg(any(target_arch = "x86_64", target_arch = "riscv64"))]
59 use devices::legacy::Serial;
60 #[cfg(feature = "pvmemcontrol")]
61 use devices::pvmemcontrol::{PvmemcontrolBusDevice, PvmemcontrolPciDevice};
62 use devices::{interrupt_controller, AcpiNotificationFlags};
63 use hypervisor::IoEventAddress;
64 use libc::{
65     tcsetattr, termios, MAP_NORESERVE, MAP_PRIVATE, MAP_SHARED, O_TMPFILE, PROT_READ, PROT_WRITE,
66     TCSANOW,
67 };
68 use pci::{
69     DeviceRelocation, MmioRegion, PciBarRegionType, PciBdf, PciDevice, VfioDmaMapping,
70     VfioPciDevice, VfioUserDmaMapping, VfioUserPciDevice, VfioUserPciDeviceError,
71 };
72 use rate_limiter::group::RateLimiterGroup;
73 use seccompiler::SeccompAction;
74 use serde::{Deserialize, Serialize};
75 use tracer::trace_scoped;
76 use vfio_ioctls::{VfioContainer, VfioDevice, VfioDeviceFd};
77 use virtio_devices::transport::{VirtioPciDevice, VirtioPciDeviceActivator, VirtioTransport};
78 use virtio_devices::vhost_user::VhostUserConfig;
79 use virtio_devices::{
80     AccessPlatformMapping, ActivateError, Endpoint, IommuMapping, VdpaDmaMapping,
81     VirtioMemMappingSource,
82 };
83 use vm_allocator::{AddressAllocator, SystemAllocator};
84 use vm_device::dma_mapping::ExternalDmaMapping;
85 use vm_device::interrupt::{
86     InterruptIndex, InterruptManager, LegacyIrqGroupConfig, MsiIrqGroupConfig,
87 };
88 use vm_device::{Bus, BusDevice, BusDeviceSync, Resource};
89 use vm_memory::guest_memory::FileOffset;
90 use vm_memory::{Address, GuestAddress, GuestMemoryRegion, GuestUsize, MmapRegion};
91 #[cfg(target_arch = "x86_64")]
92 use vm_memory::{GuestAddressSpace, GuestMemory};
93 use vm_migration::protocol::MemoryRangeTable;
94 use vm_migration::{
95     snapshot_from_id, state_from_id, Migratable, MigratableError, Pausable, Snapshot, SnapshotData,
96     Snapshottable, Transportable,
97 };
98 use vm_virtio::{AccessPlatform, VirtioDeviceType};
99 use vmm_sys_util::eventfd::EventFd;
100 
101 use crate::console_devices::{ConsoleDeviceError, ConsoleInfo, ConsoleOutput};
102 use crate::cpu::{CpuManager, CPU_MANAGER_ACPI_SIZE};
103 use crate::device_tree::{DeviceNode, DeviceTree};
104 use crate::interrupt::{LegacyUserspaceInterruptManager, MsiInterruptManager};
105 use crate::memory_manager::{Error as MemoryManagerError, MemoryManager, MEMORY_MANAGER_ACPI_SIZE};
106 use crate::pci_segment::PciSegment;
107 use crate::serial_manager::{Error as SerialManagerError, SerialManager};
108 use crate::vm_config::{
109     ConsoleOutputMode, DeviceConfig, DiskConfig, FsConfig, NetConfig, PmemConfig, UserDeviceConfig,
110     VdpaConfig, VhostMode, VmConfig, VsockConfig, DEFAULT_IOMMU_ADDRESS_WIDTH_BITS,
111     DEFAULT_PCI_SEGMENT_APERTURE_WEIGHT,
112 };
113 use crate::{device_node, GuestRegionMmap, PciDeviceInfo, DEVICE_MANAGER_SNAPSHOT_ID};
114 
115 #[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))]
116 const MMIO_LEN: u64 = 0x1000;
117 
118 // Singleton devices / devices the user cannot name
119 #[cfg(target_arch = "x86_64")]
120 const IOAPIC_DEVICE_NAME: &str = "__ioapic";
121 const SERIAL_DEVICE_NAME: &str = "__serial";
122 #[cfg(target_arch = "x86_64")]
123 const DEBUGCON_DEVICE_NAME: &str = "__debug_console";
124 #[cfg(target_arch = "aarch64")]
125 const GPIO_DEVICE_NAME: &str = "__gpio";
126 const RNG_DEVICE_NAME: &str = "__rng";
127 const IOMMU_DEVICE_NAME: &str = "__iommu";
128 #[cfg(feature = "pvmemcontrol")]
129 const PVMEMCONTROL_DEVICE_NAME: &str = "__pvmemcontrol";
130 const BALLOON_DEVICE_NAME: &str = "__balloon";
131 const CONSOLE_DEVICE_NAME: &str = "__console";
132 const PVPANIC_DEVICE_NAME: &str = "__pvpanic";
133 
134 // Devices that the user may name and for which we generate
135 // identifiers if the user doesn't give one
136 const DISK_DEVICE_NAME_PREFIX: &str = "_disk";
137 const FS_DEVICE_NAME_PREFIX: &str = "_fs";
138 const NET_DEVICE_NAME_PREFIX: &str = "_net";
139 const PMEM_DEVICE_NAME_PREFIX: &str = "_pmem";
140 const VDPA_DEVICE_NAME_PREFIX: &str = "_vdpa";
141 const VSOCK_DEVICE_NAME_PREFIX: &str = "_vsock";
142 const WATCHDOG_DEVICE_NAME: &str = "__watchdog";
143 const VFIO_DEVICE_NAME_PREFIX: &str = "_vfio";
144 const VFIO_USER_DEVICE_NAME_PREFIX: &str = "_vfio_user";
145 const VIRTIO_PCI_DEVICE_NAME_PREFIX: &str = "_virtio-pci";
146 
147 /// Errors associated with device manager
148 #[derive(Debug)]
149 pub enum DeviceManagerError {
150     /// Cannot create EventFd.
151     EventFd(io::Error),
152 
153     /// Cannot open disk path
154     Disk(io::Error),
155 
156     /// Cannot create vhost-user-net device
157     CreateVhostUserNet(virtio_devices::vhost_user::Error),
158 
159     /// Cannot create virtio-blk device
160     CreateVirtioBlock(io::Error),
161 
162     /// Cannot create virtio-net device
163     CreateVirtioNet(virtio_devices::net::Error),
164 
165     /// Cannot create virtio-console device
166     CreateVirtioConsole(io::Error),
167 
168     /// Cannot create virtio-rng device
169     CreateVirtioRng(io::Error),
170 
171     /// Cannot create virtio-fs device
172     CreateVirtioFs(virtio_devices::vhost_user::Error),
173 
174     /// Virtio-fs device was created without a socket.
175     NoVirtioFsSock,
176 
177     /// Cannot create vhost-user-blk device
178     CreateVhostUserBlk(virtio_devices::vhost_user::Error),
179 
180     /// Cannot create virtio-pmem device
181     CreateVirtioPmem(io::Error),
182 
183     /// Cannot create vDPA device
184     CreateVdpa(virtio_devices::vdpa::Error),
185 
186     /// Cannot create virtio-vsock device
187     CreateVirtioVsock(io::Error),
188 
189     /// Cannot create tpm device
190     CreateTpmDevice(anyhow::Error),
191 
192     /// Failed to convert Path to &str for the vDPA device.
193     CreateVdpaConvertPath,
194 
195     /// Failed to convert Path to &str for the virtio-vsock device.
196     CreateVsockConvertPath,
197 
198     /// Cannot create virtio-vsock backend
199     CreateVsockBackend(virtio_devices::vsock::VsockUnixError),
200 
201     /// Cannot create virtio-iommu device
202     CreateVirtioIommu(io::Error),
203 
204     /// Cannot create virtio-balloon device
205     CreateVirtioBalloon(io::Error),
206 
207     /// Cannot create pvmemcontrol device
208     #[cfg(feature = "pvmemcontrol")]
209     CreatePvmemcontrol(io::Error),
210 
211     /// Cannot create virtio-watchdog device
212     CreateVirtioWatchdog(io::Error),
213 
214     /// Failed to parse disk image format
215     DetectImageType(io::Error),
216 
217     /// Cannot open qcow disk path
218     QcowDeviceCreate(qcow::Error),
219 
220     /// Cannot create serial manager
221     CreateSerialManager(SerialManagerError),
222 
223     /// Cannot spawn the serial manager thread
224     SpawnSerialManager(SerialManagerError),
225 
226     /// Cannot open tap interface
227     OpenTap(net_util::TapError),
228 
229     /// Cannot allocate IRQ.
230     AllocateIrq,
231 
232     /// Cannot configure the IRQ.
233     Irq(vmm_sys_util::errno::Error),
234 
235     /// Cannot allocate PCI BARs
236     AllocateBars(pci::PciDeviceError),
237 
238     /// Could not free the BARs associated with a PCI device.
239     FreePciBars(pci::PciDeviceError),
240 
241     /// Cannot register ioevent.
242     RegisterIoevent(anyhow::Error),
243 
244     /// Cannot unregister ioevent.
245     UnRegisterIoevent(anyhow::Error),
246 
247     /// Cannot create virtio device
248     VirtioDevice(virtio_devices::transport::VirtioPciDeviceError),
249 
250     /// Cannot add PCI device
251     AddPciDevice(pci::PciRootError),
252 
253     /// Cannot open persistent memory file
254     PmemFileOpen(io::Error),
255 
256     /// Cannot set persistent memory file size
257     PmemFileSetLen(io::Error),
258 
259     /// Cannot find a memory range for persistent memory
260     PmemRangeAllocation,
261 
262     /// Cannot find a memory range for virtio-fs
263     FsRangeAllocation,
264 
265     /// Error creating serial output file
266     SerialOutputFileOpen(io::Error),
267 
268     #[cfg(target_arch = "x86_64")]
269     /// Error creating debug-console output file
270     DebugconOutputFileOpen(io::Error),
271 
272     /// Error creating console output file
273     ConsoleOutputFileOpen(io::Error),
274 
275     /// Error creating serial pty
276     SerialPtyOpen(io::Error),
277 
278     /// Error creating console pty
279     ConsolePtyOpen(io::Error),
280 
281     /// Error creating console pty
282     DebugconPtyOpen(io::Error),
283 
284     /// Error setting pty raw mode
285     SetPtyRaw(ConsoleDeviceError),
286 
287     /// Error getting pty peer
288     GetPtyPeer(vmm_sys_util::errno::Error),
289 
290     /// Cannot create a VFIO device
291     VfioCreate(vfio_ioctls::VfioError),
292 
293     /// Cannot create a VFIO PCI device
294     VfioPciCreate(pci::VfioPciError),
295 
296     /// Failed to map VFIO MMIO region.
297     VfioMapRegion(pci::VfioPciError),
298 
299     /// Failed to DMA map VFIO device.
300     VfioDmaMap(vfio_ioctls::VfioError),
301 
302     /// Failed to DMA unmap VFIO device.
303     VfioDmaUnmap(pci::VfioPciError),
304 
305     /// Failed to create the passthrough device.
306     CreatePassthroughDevice(anyhow::Error),
307 
308     /// Failed to memory map.
309     Mmap(io::Error),
310 
311     /// Cannot add legacy device to Bus.
312     BusError(vm_device::BusError),
313 
314     /// Failed to allocate IO port
315     AllocateIoPort,
316 
317     /// Failed to allocate MMIO address
318     AllocateMmioAddress,
319 
320     /// Failed to make hotplug notification
321     HotPlugNotification(io::Error),
322 
323     /// Error from a memory manager operation
324     MemoryManager(MemoryManagerError),
325 
326     /// Failed to create new interrupt source group.
327     CreateInterruptGroup(io::Error),
328 
329     /// Failed to update interrupt source group.
330     UpdateInterruptGroup(io::Error),
331 
332     /// Failed to create interrupt controller.
333     CreateInterruptController(interrupt_controller::Error),
334 
335     /// Failed to create a new MmapRegion instance.
336     NewMmapRegion(vm_memory::mmap::MmapRegionError),
337 
338     /// Failed to clone a File.
339     CloneFile(io::Error),
340 
341     /// Failed to create socket file
342     CreateSocketFile(io::Error),
343 
344     /// Failed to spawn the network backend
345     SpawnNetBackend(io::Error),
346 
347     /// Failed to spawn the block backend
348     SpawnBlockBackend(io::Error),
349 
350     /// Missing PCI bus.
351     NoPciBus,
352 
353     /// Could not find an available device name.
354     NoAvailableDeviceName,
355 
356     /// Missing PCI device.
357     MissingPciDevice,
358 
359     /// Failed to remove a PCI device from the PCI bus.
360     RemoveDeviceFromPciBus(pci::PciRootError),
361 
362     /// Failed to remove a bus device from the IO bus.
363     RemoveDeviceFromIoBus(vm_device::BusError),
364 
365     /// Failed to remove a bus device from the MMIO bus.
366     RemoveDeviceFromMmioBus(vm_device::BusError),
367 
368     /// Failed to find the device corresponding to a specific PCI b/d/f.
369     UnknownPciBdf(u32),
370 
371     /// Not allowed to remove this type of device from the VM.
372     RemovalNotAllowed(vm_virtio::VirtioDeviceType),
373 
374     /// Failed to find device corresponding to the given identifier.
375     UnknownDeviceId(String),
376 
377     /// Failed to find an available PCI device ID.
378     NextPciDeviceId(pci::PciRootError),
379 
380     /// Could not reserve the PCI device ID.
381     GetPciDeviceId(pci::PciRootError),
382 
383     /// Could not give the PCI device ID back.
384     PutPciDeviceId(pci::PciRootError),
385 
386     /// No disk path was specified when one was expected
387     NoDiskPath,
388 
389     /// Failed to update guest memory for virtio device.
390     UpdateMemoryForVirtioDevice(virtio_devices::Error),
391 
392     /// Cannot create virtio-mem device
393     CreateVirtioMem(io::Error),
394 
395     /// Cannot find a memory range for virtio-mem memory
396     VirtioMemRangeAllocation,
397 
398     /// Failed to update guest memory for VFIO PCI device.
399     UpdateMemoryForVfioPciDevice(vfio_ioctls::VfioError),
400 
401     /// Trying to use a directory for pmem but no size specified
402     PmemWithDirectorySizeMissing,
403 
404     /// Trying to use a size that is not multiple of 2MiB
405     PmemSizeNotAligned,
406 
407     /// Could not find the node in the device tree.
408     MissingNode,
409 
410     /// Resource was already found.
411     ResourceAlreadyExists,
412 
413     /// Expected resources for virtio-pmem could not be found.
414     MissingVirtioPmemResources,
415 
416     /// Missing PCI b/d/f from the DeviceNode.
417     MissingDeviceNodePciBdf,
418 
419     /// No support for device passthrough
420     NoDevicePassthroughSupport,
421 
422     /// No socket option support for console device
423     NoSocketOptionSupportForConsoleDevice,
424 
425     /// Failed to resize virtio-balloon
426     VirtioBalloonResize(virtio_devices::balloon::Error),
427 
428     /// Missing virtio-balloon, can't proceed as expected.
429     MissingVirtioBalloon,
430 
431     /// Missing virtual IOMMU device
432     MissingVirtualIommu,
433 
434     /// Failed to do power button notification
435     PowerButtonNotification(io::Error),
436 
437     /// Failed to do AArch64 GPIO power button notification
438     #[cfg(target_arch = "aarch64")]
439     AArch64PowerButtonNotification(devices::legacy::GpioDeviceError),
440 
441     /// Failed to set O_DIRECT flag to file descriptor
442     SetDirectIo,
443 
444     /// Failed to create FixedVhdDiskAsync
445     CreateFixedVhdDiskAsync(io::Error),
446 
447     /// Failed to create FixedVhdDiskSync
448     CreateFixedVhdDiskSync(io::Error),
449 
450     /// Failed to create QcowDiskSync
451     CreateQcowDiskSync(qcow::Error),
452 
453     /// Failed to create FixedVhdxDiskSync
454     CreateFixedVhdxDiskSync(vhdx::VhdxError),
455 
456     /// Failed to add DMA mapping handler to virtio-mem device.
457     AddDmaMappingHandlerVirtioMem(virtio_devices::mem::Error),
458 
459     /// Failed to remove DMA mapping handler from virtio-mem device.
460     RemoveDmaMappingHandlerVirtioMem(virtio_devices::mem::Error),
461 
462     /// Failed to create vfio-user client
463     VfioUserCreateClient(vfio_user::Error),
464 
465     /// Failed to create VFIO user device
466     VfioUserCreate(VfioUserPciDeviceError),
467 
468     /// Failed to map region from VFIO user device into guest
469     VfioUserMapRegion(VfioUserPciDeviceError),
470 
471     /// Failed to DMA map VFIO user device.
472     VfioUserDmaMap(VfioUserPciDeviceError),
473 
474     /// Failed to DMA unmap VFIO user device.
475     VfioUserDmaUnmap(VfioUserPciDeviceError),
476 
477     /// Failed to update memory mappings for VFIO user device
478     UpdateMemoryForVfioUserPciDevice(VfioUserPciDeviceError),
479 
480     /// Cannot duplicate file descriptor
481     DupFd(vmm_sys_util::errno::Error),
482 
483     /// Failed to DMA map virtio device.
484     VirtioDmaMap(std::io::Error),
485 
486     /// Failed to DMA unmap virtio device.
487     VirtioDmaUnmap(std::io::Error),
488 
489     /// Cannot hotplug device behind vIOMMU
490     InvalidIommuHotplug,
491 
492     /// Invalid identifier as it is not unique.
493     IdentifierNotUnique(String),
494 
495     /// Invalid identifier
496     InvalidIdentifier(String),
497 
498     /// Error activating virtio device
499     VirtioActivate(ActivateError),
500 
501     /// Failed retrieving device state from snapshot
502     RestoreGetState(MigratableError),
503 
504     /// Cannot create a PvPanic device
505     PvPanicCreate(devices::pvpanic::PvPanicError),
506 
507     /// Cannot create a RateLimiterGroup
508     RateLimiterGroupCreate(rate_limiter::group::Error),
509 
510     /// Cannot start sigwinch listener
511     StartSigwinchListener(std::io::Error),
512 
513     // Invalid console info
514     InvalidConsoleInfo,
515 
516     // Invalid console fd
517     InvalidConsoleFd,
518 }
519 
520 pub type DeviceManagerResult<T> = result::Result<T, DeviceManagerError>;
521 
522 const DEVICE_MANAGER_ACPI_SIZE: usize = 0x10;
523 
524 #[derive(Default)]
525 pub struct Console {
526     console_resizer: Option<Arc<virtio_devices::ConsoleResizer>>,
527 }
528 
529 impl Console {
530     pub fn need_resize(&self) -> bool {
531         if let Some(_resizer) = self.console_resizer.as_ref() {
532             return true;
533         }
534 
535         false
536     }
537 
538     pub fn update_console_size(&self) {
539         if let Some(resizer) = self.console_resizer.as_ref() {
540             resizer.update_console_size()
541         }
542     }
543 }
544 
545 pub(crate) struct AddressManager {
546     pub(crate) allocator: Arc<Mutex<SystemAllocator>>,
547     pub(crate) io_bus: Arc<Bus>,
548     pub(crate) mmio_bus: Arc<Bus>,
549     pub(crate) vm: Arc<dyn hypervisor::Vm>,
550     device_tree: Arc<Mutex<DeviceTree>>,
551     pci_mmio32_allocators: Vec<Arc<Mutex<AddressAllocator>>>,
552     pci_mmio64_allocators: Vec<Arc<Mutex<AddressAllocator>>>,
553 }
554 
555 impl DeviceRelocation for AddressManager {
556     fn move_bar(
557         &self,
558         old_base: u64,
559         new_base: u64,
560         len: u64,
561         pci_dev: &mut dyn PciDevice,
562         region_type: PciBarRegionType,
563     ) -> std::result::Result<(), std::io::Error> {
564         match region_type {
565             PciBarRegionType::IoRegion => {
566                 // Update system allocator
567                 self.allocator
568                     .lock()
569                     .unwrap()
570                     .free_io_addresses(GuestAddress(old_base), len as GuestUsize);
571 
572                 self.allocator
573                     .lock()
574                     .unwrap()
575                     .allocate_io_addresses(Some(GuestAddress(new_base)), len as GuestUsize, None)
576                     .ok_or_else(|| io::Error::other("failed allocating new IO range"))?;
577 
578                 // Update PIO bus
579                 self.io_bus
580                     .update_range(old_base, len, new_base, len)
581                     .map_err(io::Error::other)?;
582             }
583             PciBarRegionType::Memory32BitRegion | PciBarRegionType::Memory64BitRegion => {
584                 let allocators = if region_type == PciBarRegionType::Memory32BitRegion {
585                     &self.pci_mmio32_allocators
586                 } else {
587                     &self.pci_mmio64_allocators
588                 };
589 
590                 // Find the specific allocator that this BAR was allocated from and use it for new one
591                 for allocator in allocators {
592                     let allocator_base = allocator.lock().unwrap().base();
593                     let allocator_end = allocator.lock().unwrap().end();
594 
595                     if old_base >= allocator_base.0 && old_base <= allocator_end.0 {
596                         allocator
597                             .lock()
598                             .unwrap()
599                             .free(GuestAddress(old_base), len as GuestUsize);
600 
601                         allocator
602                             .lock()
603                             .unwrap()
604                             .allocate(Some(GuestAddress(new_base)), len as GuestUsize, Some(len))
605                             .ok_or_else(|| io::Error::other("failed allocating new MMIO range"))?;
606 
607                         break;
608                     }
609                 }
610 
611                 // Update MMIO bus
612                 self.mmio_bus
613                     .update_range(old_base, len, new_base, len)
614                     .map_err(io::Error::other)?;
615             }
616         }
617 
618         // Update the device_tree resources associated with the device
619         if let Some(id) = pci_dev.id() {
620             if let Some(node) = self.device_tree.lock().unwrap().get_mut(&id) {
621                 let mut resource_updated = false;
622                 for resource in node.resources.iter_mut() {
623                     if let Resource::PciBar { base, type_, .. } = resource {
624                         if PciBarRegionType::from(*type_) == region_type && *base == old_base {
625                             *base = new_base;
626                             resource_updated = true;
627                             break;
628                         }
629                     }
630                 }
631 
632                 if !resource_updated {
633                     return Err(io::Error::other(format!(
634                         "Couldn't find a resource with base 0x{old_base:x} for device {id}"
635                     )));
636                 }
637             } else {
638                 return Err(io::Error::other(format!(
639                     "Couldn't find device {id} from device tree"
640                 )));
641             }
642         }
643 
644         let any_dev = pci_dev.as_any_mut();
645         if let Some(virtio_pci_dev) = any_dev.downcast_ref::<VirtioPciDevice>() {
646             let bar_addr = virtio_pci_dev.config_bar_addr();
647             if bar_addr == new_base {
648                 for (event, addr) in virtio_pci_dev.ioeventfds(old_base) {
649                     let io_addr = IoEventAddress::Mmio(addr);
650                     self.vm.unregister_ioevent(event, &io_addr).map_err(|e| {
651                         io::Error::other(format!("failed to unregister ioevent: {e:?}"))
652                     })?;
653                 }
654                 for (event, addr) in virtio_pci_dev.ioeventfds(new_base) {
655                     let io_addr = IoEventAddress::Mmio(addr);
656                     self.vm
657                         .register_ioevent(event, &io_addr, None)
658                         .map_err(|e| {
659                             io::Error::other(format!("failed to register ioevent: {e:?}"))
660                         })?;
661                 }
662             } else {
663                 let virtio_dev = virtio_pci_dev.virtio_device();
664                 let mut virtio_dev = virtio_dev.lock().unwrap();
665                 if let Some(mut shm_regions) = virtio_dev.get_shm_regions() {
666                     if shm_regions.addr.raw_value() == old_base {
667                         let mem_region = self.vm.make_user_memory_region(
668                             shm_regions.mem_slot,
669                             old_base,
670                             shm_regions.len,
671                             shm_regions.host_addr,
672                             false,
673                             false,
674                         );
675 
676                         self.vm.remove_user_memory_region(mem_region).map_err(|e| {
677                             io::Error::other(format!("failed to remove user memory region: {e:?}"))
678                         })?;
679 
680                         // Create new mapping by inserting new region to KVM.
681                         let mem_region = self.vm.make_user_memory_region(
682                             shm_regions.mem_slot,
683                             new_base,
684                             shm_regions.len,
685                             shm_regions.host_addr,
686                             false,
687                             false,
688                         );
689 
690                         self.vm.create_user_memory_region(mem_region).map_err(|e| {
691                             io::Error::other(format!("failed to create user memory regions: {e:?}"))
692                         })?;
693 
694                         // Update shared memory regions to reflect the new mapping.
695                         shm_regions.addr = GuestAddress(new_base);
696                         virtio_dev.set_shm_regions(shm_regions).map_err(|e| {
697                             io::Error::other(format!(
698                                 "failed to update shared memory regions: {e:?}"
699                             ))
700                         })?;
701                     }
702                 }
703             }
704         }
705 
706         pci_dev.move_bar(old_base, new_base)
707     }
708 }
709 
710 #[derive(Serialize, Deserialize)]
711 struct DeviceManagerState {
712     device_tree: DeviceTree,
713     device_id_cnt: Wrapping<usize>,
714 }
715 
716 #[derive(Debug)]
717 pub struct PtyPair {
718     pub main: File,
719     pub path: PathBuf,
720 }
721 
722 impl Clone for PtyPair {
723     fn clone(&self) -> Self {
724         PtyPair {
725             main: self.main.try_clone().unwrap(),
726             path: self.path.clone(),
727         }
728     }
729 }
730 
731 #[derive(Clone)]
732 pub enum PciDeviceHandle {
733     Vfio(Arc<Mutex<VfioPciDevice>>),
734     Virtio(Arc<Mutex<VirtioPciDevice>>),
735     VfioUser(Arc<Mutex<VfioUserPciDevice>>),
736 }
737 
738 #[derive(Clone)]
739 struct MetaVirtioDevice {
740     virtio_device: Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
741     iommu: bool,
742     id: String,
743     pci_segment: u16,
744     dma_handler: Option<Arc<dyn ExternalDmaMapping>>,
745 }
746 
747 #[derive(Default)]
748 pub struct AcpiPlatformAddresses {
749     pub pm_timer_address: Option<GenericAddress>,
750     pub reset_reg_address: Option<GenericAddress>,
751     pub sleep_control_reg_address: Option<GenericAddress>,
752     pub sleep_status_reg_address: Option<GenericAddress>,
753 }
754 
755 #[cfg(all(feature = "mshv", feature = "sev_snp"))]
756 struct SevSnpPageAccessProxy {
757     vm: Arc<dyn hypervisor::Vm>,
758 }
759 
760 #[cfg(all(feature = "mshv", feature = "sev_snp"))]
761 impl std::fmt::Debug for SevSnpPageAccessProxy {
762     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
763         write!(f, "SNP Page access proxy")
764     }
765 }
766 
767 #[cfg(all(feature = "mshv", feature = "sev_snp"))]
768 impl SevSnpPageAccessProxy {
769     fn new(vm: Arc<dyn hypervisor::Vm>) -> SevSnpPageAccessProxy {
770         SevSnpPageAccessProxy { vm }
771     }
772 }
773 
774 #[cfg(all(feature = "mshv", feature = "sev_snp"))]
775 impl AccessPlatform for SevSnpPageAccessProxy {
776     fn translate_gpa(&self, base: u64, _size: u64) -> std::result::Result<u64, std::io::Error> {
777         Ok(base)
778     }
779 
780     fn translate_gva(&self, base: u64, size: u64) -> std::result::Result<u64, std::io::Error> {
781         self.vm
782             .gain_page_access(base, size as u32)
783             .map_err(io::Error::other)?;
784         Ok(base)
785     }
786 }
787 
788 pub struct DeviceManager {
789     // Manage address space related to devices
790     address_manager: Arc<AddressManager>,
791 
792     // Console abstraction
793     console: Arc<Console>,
794 
795     // Serial Manager
796     serial_manager: Option<Arc<SerialManager>>,
797 
798     // pty foreground status,
799     console_resize_pipe: Option<Arc<File>>,
800 
801     // To restore on exit.
802     original_termios_opt: Arc<Mutex<Option<termios>>>,
803 
804     // Interrupt controller
805     #[cfg(target_arch = "x86_64")]
806     interrupt_controller: Option<Arc<Mutex<ioapic::Ioapic>>>,
807     #[cfg(target_arch = "aarch64")]
808     interrupt_controller: Option<Arc<Mutex<gic::Gic>>>,
809     #[cfg(target_arch = "riscv64")]
810     interrupt_controller: Option<Arc<Mutex<aia::Aia>>>,
811 
812     // Things to be added to the commandline (e.g. aarch64 or riscv64 early console)
813     #[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))]
814     cmdline_additions: Vec<String>,
815 
816     // ACPI GED notification device
817     ged_notification_device: Option<Arc<Mutex<devices::AcpiGedDevice>>>,
818 
819     // VM configuration
820     config: Arc<Mutex<VmConfig>>,
821 
822     // Memory Manager
823     memory_manager: Arc<Mutex<MemoryManager>>,
824 
825     // CPU Manager
826     cpu_manager: Arc<Mutex<CpuManager>>,
827 
828     // The virtio devices on the system
829     virtio_devices: Vec<MetaVirtioDevice>,
830 
831     // List of bus devices
832     // Let the DeviceManager keep strong references to the BusDevice devices.
833     // This allows the IO and MMIO buses to be provided with Weak references,
834     // which prevents cyclic dependencies.
835     bus_devices: Vec<Arc<dyn BusDeviceSync>>,
836 
837     // Counter to keep track of the consumed device IDs.
838     device_id_cnt: Wrapping<usize>,
839 
840     pci_segments: Vec<PciSegment>,
841 
842     #[cfg_attr(target_arch = "aarch64", allow(dead_code))]
843     // MSI Interrupt Manager
844     msi_interrupt_manager: Arc<dyn InterruptManager<GroupConfig = MsiIrqGroupConfig>>,
845 
846     #[cfg_attr(feature = "mshv", allow(dead_code))]
847     // Legacy Interrupt Manager
848     legacy_interrupt_manager: Option<Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>>,
849 
850     // Passthrough device handle
851     passthrough_device: Option<VfioDeviceFd>,
852 
853     // VFIO container
854     // Only one container can be created, therefore it is stored as part of the
855     // DeviceManager to be reused.
856     vfio_container: Option<Arc<VfioContainer>>,
857 
858     // Paravirtualized IOMMU
859     iommu_device: Option<Arc<Mutex<virtio_devices::Iommu>>>,
860     iommu_mapping: Option<Arc<IommuMapping>>,
861 
862     // PCI information about devices attached to the paravirtualized IOMMU
863     // It contains the virtual IOMMU PCI BDF along with the list of PCI BDF
864     // representing the devices attached to the virtual IOMMU. This is useful
865     // information for filling the ACPI VIOT table.
866     iommu_attached_devices: Option<(PciBdf, Vec<PciBdf>)>,
867 
868     // Tree of devices, representing the dependencies between devices.
869     // Useful for introspection, snapshot and restore.
870     device_tree: Arc<Mutex<DeviceTree>>,
871 
872     // Exit event
873     exit_evt: EventFd,
874     reset_evt: EventFd,
875 
876     #[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))]
877     id_to_dev_info: HashMap<(DeviceType, String), MmioDeviceInfo>,
878 
879     // seccomp action
880     seccomp_action: SeccompAction,
881 
882     // List of guest NUMA nodes.
883     numa_nodes: NumaNodes,
884 
885     // Possible handle to the virtio-balloon device
886     balloon: Option<Arc<Mutex<virtio_devices::Balloon>>>,
887 
888     // Virtio Device activation EventFd to allow the VMM thread to trigger device
889     // activation and thus start the threads from the VMM thread
890     activate_evt: EventFd,
891 
892     #[cfg(not(target_arch = "riscv64"))]
893     acpi_address: GuestAddress,
894 
895     selected_segment: usize,
896 
897     // Possible handle to the virtio-mem device
898     virtio_mem_devices: Vec<Arc<Mutex<virtio_devices::Mem>>>,
899 
900     #[cfg(target_arch = "aarch64")]
901     // GPIO device for AArch64
902     gpio_device: Option<Arc<Mutex<devices::legacy::Gpio>>>,
903 
904     #[cfg(feature = "pvmemcontrol")]
905     pvmemcontrol_devices: Option<(
906         Arc<PvmemcontrolBusDevice>,
907         Arc<Mutex<PvmemcontrolPciDevice>>,
908     )>,
909 
910     // pvpanic device
911     pvpanic_device: Option<Arc<Mutex<devices::PvPanicDevice>>>,
912 
913     // Flag to force setting the iommu on virtio devices
914     force_iommu: bool,
915 
916     // io_uring availability if detected
917     io_uring_supported: Option<bool>,
918 
919     // aio availability if detected
920     aio_supported: Option<bool>,
921 
922     // List of unique identifiers provided at boot through the configuration.
923     boot_id_list: BTreeSet<String>,
924 
925     #[cfg(not(target_arch = "riscv64"))]
926     // Start time of the VM
927     timestamp: Instant,
928 
929     // Pending activations
930     pending_activations: Arc<Mutex<Vec<VirtioPciDeviceActivator>>>,
931 
932     #[cfg(not(target_arch = "riscv64"))]
933     // Addresses for ACPI platform devices e.g. ACPI PM timer, sleep/reset registers
934     acpi_platform_addresses: AcpiPlatformAddresses,
935 
936     snapshot: Option<Snapshot>,
937 
938     rate_limit_groups: HashMap<String, Arc<RateLimiterGroup>>,
939 
940     mmio_regions: Arc<Mutex<Vec<MmioRegion>>>,
941 }
942 
943 fn create_mmio_allocators(
944     start: u64,
945     end: u64,
946     num_pci_segments: u16,
947     weights: Vec<u32>,
948     alignment: u64,
949 ) -> Vec<Arc<Mutex<AddressAllocator>>> {
950     let total_weight: u32 = weights.iter().sum();
951 
952     // Start each PCI segment mmio range on an aligned boundary
953     let pci_segment_mmio_size = (end - start + 1) / (alignment * total_weight as u64) * alignment;
954 
955     let mut mmio_allocators = vec![];
956     let mut i = 0;
957     for segment_id in 0..num_pci_segments as u64 {
958         let weight = weights[segment_id as usize] as u64;
959         let mmio_start = start + i * pci_segment_mmio_size;
960         let mmio_size = pci_segment_mmio_size * weight;
961         let allocator = Arc::new(Mutex::new(
962             AddressAllocator::new(GuestAddress(mmio_start), mmio_size).unwrap(),
963         ));
964         mmio_allocators.push(allocator);
965         i += weight;
966     }
967 
968     mmio_allocators
969 }
970 
971 impl DeviceManager {
972     #[allow(clippy::too_many_arguments)]
973     pub fn new(
974         io_bus: Arc<Bus>,
975         mmio_bus: Arc<Bus>,
976         vm: Arc<dyn hypervisor::Vm>,
977         config: Arc<Mutex<VmConfig>>,
978         memory_manager: Arc<Mutex<MemoryManager>>,
979         cpu_manager: Arc<Mutex<CpuManager>>,
980         exit_evt: EventFd,
981         reset_evt: EventFd,
982         seccomp_action: SeccompAction,
983         numa_nodes: NumaNodes,
984         activate_evt: &EventFd,
985         force_iommu: bool,
986         boot_id_list: BTreeSet<String>,
987         #[cfg(not(target_arch = "riscv64"))] timestamp: Instant,
988         snapshot: Option<Snapshot>,
989         dynamic: bool,
990     ) -> DeviceManagerResult<Arc<Mutex<Self>>> {
991         trace_scoped!("DeviceManager::new");
992 
993         let (device_tree, device_id_cnt) = if let Some(snapshot) = snapshot.as_ref() {
994             let state: DeviceManagerState = snapshot.to_state().unwrap();
995             (
996                 Arc::new(Mutex::new(state.device_tree.clone())),
997                 state.device_id_cnt,
998             )
999         } else {
1000             (Arc::new(Mutex::new(DeviceTree::new())), Wrapping(0))
1001         };
1002 
1003         let num_pci_segments =
1004             if let Some(platform_config) = config.lock().unwrap().platform.as_ref() {
1005                 platform_config.num_pci_segments
1006             } else {
1007                 1
1008             };
1009 
1010         let mut mmio32_aperture_weights: Vec<u32> =
1011             std::iter::repeat_n(DEFAULT_PCI_SEGMENT_APERTURE_WEIGHT, num_pci_segments.into())
1012                 .collect();
1013         if let Some(pci_segments) = &config.lock().unwrap().pci_segments {
1014             for pci_segment in pci_segments.iter() {
1015                 mmio32_aperture_weights[pci_segment.pci_segment as usize] =
1016                     pci_segment.mmio32_aperture_weight
1017             }
1018         }
1019 
1020         let start_of_mmio32_area = layout::MEM_32BIT_DEVICES_START.0;
1021         let end_of_mmio32_area = layout::MEM_32BIT_DEVICES_START.0 + layout::MEM_32BIT_DEVICES_SIZE;
1022         let pci_mmio32_allocators = create_mmio_allocators(
1023             start_of_mmio32_area,
1024             end_of_mmio32_area,
1025             num_pci_segments,
1026             mmio32_aperture_weights,
1027             4 << 10,
1028         );
1029 
1030         let mut mmio64_aperture_weights: Vec<u32> =
1031             std::iter::repeat_n(DEFAULT_PCI_SEGMENT_APERTURE_WEIGHT, num_pci_segments.into())
1032                 .collect();
1033         if let Some(pci_segments) = &config.lock().unwrap().pci_segments {
1034             for pci_segment in pci_segments.iter() {
1035                 mmio64_aperture_weights[pci_segment.pci_segment as usize] =
1036                     pci_segment.mmio64_aperture_weight
1037             }
1038         }
1039 
1040         let start_of_mmio64_area = memory_manager.lock().unwrap().start_of_device_area().0;
1041         let end_of_mmio64_area = memory_manager.lock().unwrap().end_of_device_area().0;
1042         let pci_mmio64_allocators = create_mmio_allocators(
1043             start_of_mmio64_area,
1044             end_of_mmio64_area,
1045             num_pci_segments,
1046             mmio64_aperture_weights,
1047             4 << 30,
1048         );
1049 
1050         let address_manager = Arc::new(AddressManager {
1051             allocator: memory_manager.lock().unwrap().allocator(),
1052             io_bus,
1053             mmio_bus,
1054             vm: vm.clone(),
1055             device_tree: Arc::clone(&device_tree),
1056             pci_mmio32_allocators,
1057             pci_mmio64_allocators,
1058         });
1059 
1060         // First we create the MSI interrupt manager, the legacy one is created
1061         // later, after the IOAPIC device creation.
1062         // The reason we create the MSI one first is because the IOAPIC needs it,
1063         // and then the legacy interrupt manager needs an IOAPIC. So we're
1064         // handling a linear dependency chain:
1065         // msi_interrupt_manager <- IOAPIC <- legacy_interrupt_manager.
1066         let msi_interrupt_manager: Arc<dyn InterruptManager<GroupConfig = MsiIrqGroupConfig>> =
1067             Arc::new(MsiInterruptManager::new(
1068                 Arc::clone(&address_manager.allocator),
1069                 vm,
1070             ));
1071 
1072         let acpi_address = address_manager
1073             .allocator
1074             .lock()
1075             .unwrap()
1076             .allocate_platform_mmio_addresses(None, DEVICE_MANAGER_ACPI_SIZE as u64, None)
1077             .ok_or(DeviceManagerError::AllocateIoPort)?;
1078 
1079         let mut pci_irq_slots = [0; 32];
1080         PciSegment::reserve_legacy_interrupts_for_pci_devices(
1081             &address_manager,
1082             &mut pci_irq_slots,
1083         )?;
1084 
1085         let mut pci_segments = vec![PciSegment::new_default_segment(
1086             &address_manager,
1087             Arc::clone(&address_manager.pci_mmio32_allocators[0]),
1088             Arc::clone(&address_manager.pci_mmio64_allocators[0]),
1089             &pci_irq_slots,
1090         )?];
1091 
1092         for i in 1..num_pci_segments as usize {
1093             pci_segments.push(PciSegment::new(
1094                 i as u16,
1095                 numa_node_id_from_pci_segment_id(&numa_nodes, i as u16),
1096                 &address_manager,
1097                 Arc::clone(&address_manager.pci_mmio32_allocators[i]),
1098                 Arc::clone(&address_manager.pci_mmio64_allocators[i]),
1099                 &pci_irq_slots,
1100             )?);
1101         }
1102 
1103         if dynamic {
1104             let acpi_address = address_manager
1105                 .allocator
1106                 .lock()
1107                 .unwrap()
1108                 .allocate_platform_mmio_addresses(None, CPU_MANAGER_ACPI_SIZE as u64, None)
1109                 .ok_or(DeviceManagerError::AllocateMmioAddress)?;
1110 
1111             address_manager
1112                 .mmio_bus
1113                 .insert(
1114                     cpu_manager.clone(),
1115                     acpi_address.0,
1116                     CPU_MANAGER_ACPI_SIZE as u64,
1117                 )
1118                 .map_err(DeviceManagerError::BusError)?;
1119 
1120             cpu_manager.lock().unwrap().set_acpi_address(acpi_address);
1121         }
1122 
1123         let mut rate_limit_groups = HashMap::<String, Arc<RateLimiterGroup>>::new();
1124         if let Some(rate_limit_groups_cfg) = config.lock().unwrap().rate_limit_groups.as_ref() {
1125             for rate_limit_group_cfg in rate_limit_groups_cfg {
1126                 let rate_limit_cfg = rate_limit_group_cfg.rate_limiter_config;
1127                 let bw = rate_limit_cfg.bandwidth.unwrap_or_default();
1128                 let ops = rate_limit_cfg.ops.unwrap_or_default();
1129                 let mut rate_limit_group = RateLimiterGroup::new(
1130                     &rate_limit_group_cfg.id,
1131                     bw.size,
1132                     bw.one_time_burst.unwrap_or(0),
1133                     bw.refill_time,
1134                     ops.size,
1135                     ops.one_time_burst.unwrap_or(0),
1136                     ops.refill_time,
1137                 )
1138                 .map_err(DeviceManagerError::RateLimiterGroupCreate)?;
1139 
1140                 let exit_evt = exit_evt.try_clone().map_err(DeviceManagerError::EventFd)?;
1141 
1142                 rate_limit_group.start_thread(exit_evt).unwrap();
1143                 rate_limit_groups
1144                     .insert(rate_limit_group_cfg.id.clone(), Arc::new(rate_limit_group));
1145             }
1146         }
1147 
1148         let device_manager = DeviceManager {
1149             address_manager: Arc::clone(&address_manager),
1150             console: Arc::new(Console::default()),
1151             interrupt_controller: None,
1152             #[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))]
1153             cmdline_additions: Vec::new(),
1154             ged_notification_device: None,
1155             config,
1156             memory_manager,
1157             cpu_manager,
1158             virtio_devices: Vec::new(),
1159             bus_devices: Vec::new(),
1160             device_id_cnt,
1161             msi_interrupt_manager,
1162             legacy_interrupt_manager: None,
1163             passthrough_device: None,
1164             vfio_container: None,
1165             iommu_device: None,
1166             iommu_mapping: None,
1167             iommu_attached_devices: None,
1168             pci_segments,
1169             device_tree,
1170             exit_evt,
1171             reset_evt,
1172             #[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))]
1173             id_to_dev_info: HashMap::new(),
1174             seccomp_action,
1175             numa_nodes,
1176             balloon: None,
1177             activate_evt: activate_evt
1178                 .try_clone()
1179                 .map_err(DeviceManagerError::EventFd)?,
1180             #[cfg(not(target_arch = "riscv64"))]
1181             acpi_address,
1182             selected_segment: 0,
1183             serial_manager: None,
1184             console_resize_pipe: None,
1185             original_termios_opt: Arc::new(Mutex::new(None)),
1186             virtio_mem_devices: Vec::new(),
1187             #[cfg(target_arch = "aarch64")]
1188             gpio_device: None,
1189             #[cfg(feature = "pvmemcontrol")]
1190             pvmemcontrol_devices: None,
1191             pvpanic_device: None,
1192             force_iommu,
1193             io_uring_supported: None,
1194             aio_supported: None,
1195             boot_id_list,
1196             #[cfg(not(target_arch = "riscv64"))]
1197             timestamp,
1198             pending_activations: Arc::new(Mutex::new(Vec::default())),
1199             #[cfg(not(target_arch = "riscv64"))]
1200             acpi_platform_addresses: AcpiPlatformAddresses::default(),
1201             snapshot,
1202             rate_limit_groups,
1203             mmio_regions: Arc::new(Mutex::new(Vec::new())),
1204         };
1205 
1206         let device_manager = Arc::new(Mutex::new(device_manager));
1207 
1208         address_manager
1209             .mmio_bus
1210             .insert(
1211                 Arc::clone(&device_manager) as Arc<dyn BusDeviceSync>,
1212                 acpi_address.0,
1213                 DEVICE_MANAGER_ACPI_SIZE as u64,
1214             )
1215             .map_err(DeviceManagerError::BusError)?;
1216 
1217         Ok(device_manager)
1218     }
1219 
1220     pub fn console_resize_pipe(&self) -> Option<Arc<File>> {
1221         self.console_resize_pipe.clone()
1222     }
1223 
1224     pub fn create_devices(
1225         &mut self,
1226         console_info: Option<ConsoleInfo>,
1227         console_resize_pipe: Option<Arc<File>>,
1228         original_termios_opt: Arc<Mutex<Option<termios>>>,
1229     ) -> DeviceManagerResult<()> {
1230         trace_scoped!("create_devices");
1231 
1232         let mut virtio_devices: Vec<MetaVirtioDevice> = Vec::new();
1233 
1234         let interrupt_controller = self.add_interrupt_controller()?;
1235 
1236         self.cpu_manager
1237             .lock()
1238             .unwrap()
1239             .set_interrupt_controller(interrupt_controller.clone());
1240 
1241         // Now we can create the legacy interrupt manager, which needs the freshly
1242         // formed IOAPIC device.
1243         let legacy_interrupt_manager: Arc<
1244             dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>,
1245         > = Arc::new(LegacyUserspaceInterruptManager::new(Arc::clone(
1246             &interrupt_controller,
1247         )));
1248 
1249         {
1250             if let Some(acpi_address) = self.memory_manager.lock().unwrap().acpi_address() {
1251                 self.address_manager
1252                     .mmio_bus
1253                     .insert(
1254                         Arc::clone(&self.memory_manager) as Arc<dyn BusDeviceSync>,
1255                         acpi_address.0,
1256                         MEMORY_MANAGER_ACPI_SIZE as u64,
1257                     )
1258                     .map_err(DeviceManagerError::BusError)?;
1259             }
1260         }
1261 
1262         #[cfg(target_arch = "x86_64")]
1263         self.add_legacy_devices(
1264             self.reset_evt
1265                 .try_clone()
1266                 .map_err(DeviceManagerError::EventFd)?,
1267         )?;
1268 
1269         #[cfg(target_arch = "aarch64")]
1270         self.add_legacy_devices(&legacy_interrupt_manager)?;
1271 
1272         {
1273             self.ged_notification_device = self.add_acpi_devices(
1274                 &legacy_interrupt_manager,
1275                 self.reset_evt
1276                     .try_clone()
1277                     .map_err(DeviceManagerError::EventFd)?,
1278                 self.exit_evt
1279                     .try_clone()
1280                     .map_err(DeviceManagerError::EventFd)?,
1281             )?;
1282         }
1283 
1284         self.original_termios_opt = original_termios_opt;
1285 
1286         self.console = self.add_console_devices(
1287             &legacy_interrupt_manager,
1288             &mut virtio_devices,
1289             console_info,
1290             console_resize_pipe,
1291         )?;
1292 
1293         #[cfg(not(target_arch = "riscv64"))]
1294         if let Some(tpm) = self.config.clone().lock().unwrap().tpm.as_ref() {
1295             let tpm_dev = self.add_tpm_device(tpm.socket.clone())?;
1296             self.bus_devices
1297                 .push(Arc::clone(&tpm_dev) as Arc<dyn BusDeviceSync>)
1298         }
1299         self.legacy_interrupt_manager = Some(legacy_interrupt_manager);
1300 
1301         virtio_devices.append(&mut self.make_virtio_devices()?);
1302 
1303         self.add_pci_devices(virtio_devices.clone())?;
1304 
1305         self.virtio_devices = virtio_devices;
1306 
1307         // Add pvmemcontrol if required
1308         #[cfg(feature = "pvmemcontrol")]
1309         {
1310             if self.config.lock().unwrap().pvmemcontrol.is_some() {
1311                 let (pvmemcontrol_bus_device, pvmemcontrol_pci_device) =
1312                     self.make_pvmemcontrol_device()?;
1313                 self.pvmemcontrol_devices =
1314                     Some((pvmemcontrol_bus_device, pvmemcontrol_pci_device));
1315             }
1316         }
1317 
1318         if self.config.clone().lock().unwrap().pvpanic {
1319             self.pvpanic_device = self.add_pvpanic_device()?;
1320         }
1321 
1322         Ok(())
1323     }
1324 
1325     fn state(&self) -> DeviceManagerState {
1326         DeviceManagerState {
1327             device_tree: self.device_tree.lock().unwrap().clone(),
1328             device_id_cnt: self.device_id_cnt,
1329         }
1330     }
1331 
1332     fn get_msi_iova_space(&mut self) -> (u64, u64) {
1333         #[cfg(target_arch = "aarch64")]
1334         {
1335             let vcpus = self.config.lock().unwrap().cpus.boot_vcpus;
1336             let vgic_config = gic::Gic::create_default_config(vcpus.into());
1337             (
1338                 vgic_config.msi_addr,
1339                 vgic_config.msi_addr + vgic_config.msi_size - 1,
1340             )
1341         }
1342         #[cfg(target_arch = "riscv64")]
1343         {
1344             let vcpus = self.config.lock().unwrap().cpus.boot_vcpus;
1345             let vaia_config = aia::Aia::create_default_config(vcpus.into());
1346             (
1347                 vaia_config.imsic_addr,
1348                 vaia_config.imsic_addr + vaia_config.vcpu_count as u64 * arch::layout::IMSIC_SIZE
1349                     - 1,
1350             )
1351         }
1352         #[cfg(target_arch = "x86_64")]
1353         (0xfee0_0000, 0xfeef_ffff)
1354     }
1355 
1356     #[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))]
1357     /// Gets the information of the devices registered up to some point in time.
1358     pub fn get_device_info(&self) -> &HashMap<(DeviceType, String), MmioDeviceInfo> {
1359         &self.id_to_dev_info
1360     }
1361 
1362     #[allow(unused_variables)]
1363     fn add_pci_devices(
1364         &mut self,
1365         virtio_devices: Vec<MetaVirtioDevice>,
1366     ) -> DeviceManagerResult<()> {
1367         let iommu_id = String::from(IOMMU_DEVICE_NAME);
1368 
1369         let iommu_address_width_bits =
1370             if let Some(ref platform) = self.config.lock().unwrap().platform {
1371                 platform.iommu_address_width_bits
1372             } else {
1373                 DEFAULT_IOMMU_ADDRESS_WIDTH_BITS
1374             };
1375 
1376         let iommu_device = if self.config.lock().unwrap().iommu {
1377             let (device, mapping) = virtio_devices::Iommu::new(
1378                 iommu_id.clone(),
1379                 self.seccomp_action.clone(),
1380                 self.exit_evt
1381                     .try_clone()
1382                     .map_err(DeviceManagerError::EventFd)?,
1383                 self.get_msi_iova_space(),
1384                 iommu_address_width_bits,
1385                 state_from_id(self.snapshot.as_ref(), iommu_id.as_str())
1386                     .map_err(DeviceManagerError::RestoreGetState)?,
1387             )
1388             .map_err(DeviceManagerError::CreateVirtioIommu)?;
1389             let device = Arc::new(Mutex::new(device));
1390             self.iommu_device = Some(Arc::clone(&device));
1391             self.iommu_mapping = Some(mapping);
1392 
1393             // Fill the device tree with a new node. In case of restore, we
1394             // know there is nothing to do, so we can simply override the
1395             // existing entry.
1396             self.device_tree
1397                 .lock()
1398                 .unwrap()
1399                 .insert(iommu_id.clone(), device_node!(iommu_id, device));
1400 
1401             Some(device)
1402         } else {
1403             None
1404         };
1405 
1406         let mut iommu_attached_devices = Vec::new();
1407         {
1408             for handle in virtio_devices {
1409                 let mapping: Option<Arc<IommuMapping>> = if handle.iommu {
1410                     self.iommu_mapping.clone()
1411                 } else {
1412                     None
1413                 };
1414 
1415                 let dev_id = self.add_virtio_pci_device(
1416                     handle.virtio_device,
1417                     &mapping,
1418                     handle.id,
1419                     handle.pci_segment,
1420                     handle.dma_handler,
1421                 )?;
1422 
1423                 if handle.iommu {
1424                     iommu_attached_devices.push(dev_id);
1425                 }
1426             }
1427 
1428             let mut vfio_iommu_device_ids = self.add_vfio_devices()?;
1429             iommu_attached_devices.append(&mut vfio_iommu_device_ids);
1430 
1431             let mut vfio_user_iommu_device_ids = self.add_user_devices()?;
1432             iommu_attached_devices.append(&mut vfio_user_iommu_device_ids);
1433 
1434             // Add all devices from forced iommu segments
1435             if let Some(platform_config) = self.config.lock().unwrap().platform.as_ref() {
1436                 if let Some(iommu_segments) = platform_config.iommu_segments.as_ref() {
1437                     for segment in iommu_segments {
1438                         for device in 0..32 {
1439                             let bdf = PciBdf::new(*segment, 0, device, 0);
1440                             if !iommu_attached_devices.contains(&bdf) {
1441                                 iommu_attached_devices.push(bdf);
1442                             }
1443                         }
1444                     }
1445                 }
1446             }
1447 
1448             if let Some(iommu_device) = iommu_device {
1449                 let dev_id = self.add_virtio_pci_device(iommu_device, &None, iommu_id, 0, None)?;
1450                 self.iommu_attached_devices = Some((dev_id, iommu_attached_devices));
1451             }
1452         }
1453 
1454         for segment in &self.pci_segments {
1455             #[cfg(target_arch = "x86_64")]
1456             if let Some(pci_config_io) = segment.pci_config_io.as_ref() {
1457                 self.bus_devices
1458                     .push(Arc::clone(pci_config_io) as Arc<dyn BusDeviceSync>);
1459             }
1460 
1461             self.bus_devices
1462                 .push(Arc::clone(&segment.pci_config_mmio) as Arc<dyn BusDeviceSync>);
1463         }
1464 
1465         Ok(())
1466     }
1467 
1468     #[cfg(target_arch = "aarch64")]
1469     fn add_interrupt_controller(
1470         &mut self,
1471     ) -> DeviceManagerResult<Arc<Mutex<dyn InterruptController>>> {
1472         let interrupt_controller: Arc<Mutex<gic::Gic>> = Arc::new(Mutex::new(
1473             gic::Gic::new(
1474                 self.config.lock().unwrap().cpus.boot_vcpus,
1475                 Arc::clone(&self.msi_interrupt_manager),
1476                 self.address_manager.vm.clone(),
1477             )
1478             .map_err(DeviceManagerError::CreateInterruptController)?,
1479         ));
1480 
1481         self.interrupt_controller = Some(interrupt_controller.clone());
1482 
1483         // Restore the vGic if this is in the process of restoration
1484         let id = String::from(gic::GIC_SNAPSHOT_ID);
1485         if let Some(vgic_snapshot) = snapshot_from_id(self.snapshot.as_ref(), &id) {
1486             // PMU support is optional. Nothing should be impacted if the PMU initialization failed.
1487             if self
1488                 .cpu_manager
1489                 .lock()
1490                 .unwrap()
1491                 .init_pmu(arch::aarch64::fdt::AARCH64_PMU_IRQ + 16)
1492                 .is_err()
1493             {
1494                 info!("Failed to initialize PMU");
1495             }
1496 
1497             let vgic_state = vgic_snapshot
1498                 .to_state()
1499                 .map_err(DeviceManagerError::RestoreGetState)?;
1500             let saved_vcpu_states = self.cpu_manager.lock().unwrap().get_saved_states();
1501             interrupt_controller
1502                 .lock()
1503                 .unwrap()
1504                 .restore_vgic(vgic_state, &saved_vcpu_states)
1505                 .unwrap();
1506         }
1507 
1508         self.device_tree
1509             .lock()
1510             .unwrap()
1511             .insert(id.clone(), device_node!(id, interrupt_controller));
1512 
1513         Ok(interrupt_controller)
1514     }
1515 
1516     #[cfg(target_arch = "aarch64")]
1517     pub fn get_interrupt_controller(&mut self) -> Option<&Arc<Mutex<gic::Gic>>> {
1518         self.interrupt_controller.as_ref()
1519     }
1520 
1521     #[cfg(target_arch = "riscv64")]
1522     fn add_interrupt_controller(
1523         &mut self,
1524     ) -> DeviceManagerResult<Arc<Mutex<dyn InterruptController>>> {
1525         let interrupt_controller: Arc<Mutex<aia::Aia>> = Arc::new(Mutex::new(
1526             aia::Aia::new(
1527                 self.config.lock().unwrap().cpus.boot_vcpus,
1528                 Arc::clone(&self.msi_interrupt_manager),
1529                 self.address_manager.vm.clone(),
1530             )
1531             .map_err(DeviceManagerError::CreateInterruptController)?,
1532         ));
1533 
1534         self.interrupt_controller = Some(interrupt_controller.clone());
1535 
1536         // Restore the vAia if this is in the process of restoration
1537         let id = String::from(aia::_AIA_SNAPSHOT_ID);
1538         if let Some(_vaia_snapshot) = snapshot_from_id(self.snapshot.as_ref(), &id) {
1539             // TODO: vAia snapshotting and restoration is scheduled to next stage of riscv64 support.
1540             // TODO: PMU support is scheduled to next stage of riscv64 support.
1541             // PMU support is optional. Nothing should be impacted if the PMU initialization failed.
1542             unimplemented!()
1543         }
1544 
1545         self.device_tree
1546             .lock()
1547             .unwrap()
1548             .insert(id.clone(), device_node!(id, interrupt_controller));
1549 
1550         Ok(interrupt_controller)
1551     }
1552 
1553     #[cfg(target_arch = "riscv64")]
1554     pub fn get_interrupt_controller(&mut self) -> Option<&Arc<Mutex<aia::Aia>>> {
1555         self.interrupt_controller.as_ref()
1556     }
1557 
1558     #[cfg(target_arch = "x86_64")]
1559     fn add_interrupt_controller(
1560         &mut self,
1561     ) -> DeviceManagerResult<Arc<Mutex<dyn InterruptController>>> {
1562         let id = String::from(IOAPIC_DEVICE_NAME);
1563 
1564         // Create IOAPIC
1565         let interrupt_controller = Arc::new(Mutex::new(
1566             ioapic::Ioapic::new(
1567                 id.clone(),
1568                 APIC_START,
1569                 Arc::clone(&self.msi_interrupt_manager),
1570                 state_from_id(self.snapshot.as_ref(), id.as_str())
1571                     .map_err(DeviceManagerError::RestoreGetState)?,
1572             )
1573             .map_err(DeviceManagerError::CreateInterruptController)?,
1574         ));
1575 
1576         self.interrupt_controller = Some(interrupt_controller.clone());
1577 
1578         self.address_manager
1579             .mmio_bus
1580             .insert(interrupt_controller.clone(), IOAPIC_START.0, IOAPIC_SIZE)
1581             .map_err(DeviceManagerError::BusError)?;
1582 
1583         self.bus_devices
1584             .push(Arc::clone(&interrupt_controller) as Arc<dyn BusDeviceSync>);
1585 
1586         // Fill the device tree with a new node. In case of restore, we
1587         // know there is nothing to do, so we can simply override the
1588         // existing entry.
1589         self.device_tree
1590             .lock()
1591             .unwrap()
1592             .insert(id.clone(), device_node!(id, interrupt_controller));
1593 
1594         Ok(interrupt_controller)
1595     }
1596 
1597     fn add_acpi_devices(
1598         &mut self,
1599         interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>,
1600         reset_evt: EventFd,
1601         exit_evt: EventFd,
1602     ) -> DeviceManagerResult<Option<Arc<Mutex<devices::AcpiGedDevice>>>> {
1603         let vcpus_kill_signalled = self
1604             .cpu_manager
1605             .lock()
1606             .unwrap()
1607             .vcpus_kill_signalled()
1608             .clone();
1609         let shutdown_device = Arc::new(Mutex::new(devices::AcpiShutdownDevice::new(
1610             exit_evt,
1611             reset_evt,
1612             vcpus_kill_signalled,
1613         )));
1614 
1615         self.bus_devices
1616             .push(Arc::clone(&shutdown_device) as Arc<dyn BusDeviceSync>);
1617 
1618         #[cfg(target_arch = "x86_64")]
1619         {
1620             let shutdown_pio_address: u16 = 0x600;
1621 
1622             self.address_manager
1623                 .allocator
1624                 .lock()
1625                 .unwrap()
1626                 .allocate_io_addresses(Some(GuestAddress(shutdown_pio_address.into())), 0x8, None)
1627                 .ok_or(DeviceManagerError::AllocateIoPort)?;
1628 
1629             self.address_manager
1630                 .io_bus
1631                 .insert(shutdown_device, shutdown_pio_address.into(), 0x4)
1632                 .map_err(DeviceManagerError::BusError)?;
1633 
1634             self.acpi_platform_addresses.sleep_control_reg_address =
1635                 Some(GenericAddress::io_port_address::<u8>(shutdown_pio_address));
1636             self.acpi_platform_addresses.sleep_status_reg_address =
1637                 Some(GenericAddress::io_port_address::<u8>(shutdown_pio_address));
1638             self.acpi_platform_addresses.reset_reg_address =
1639                 Some(GenericAddress::io_port_address::<u8>(shutdown_pio_address));
1640         }
1641 
1642         let ged_irq = self
1643             .address_manager
1644             .allocator
1645             .lock()
1646             .unwrap()
1647             .allocate_irq()
1648             .unwrap();
1649         let interrupt_group = interrupt_manager
1650             .create_group(LegacyIrqGroupConfig {
1651                 irq: ged_irq as InterruptIndex,
1652             })
1653             .map_err(DeviceManagerError::CreateInterruptGroup)?;
1654         let ged_address = self
1655             .address_manager
1656             .allocator
1657             .lock()
1658             .unwrap()
1659             .allocate_platform_mmio_addresses(
1660                 None,
1661                 devices::acpi::GED_DEVICE_ACPI_SIZE as u64,
1662                 None,
1663             )
1664             .ok_or(DeviceManagerError::AllocateMmioAddress)?;
1665         let ged_device = Arc::new(Mutex::new(devices::AcpiGedDevice::new(
1666             interrupt_group,
1667             ged_irq,
1668             ged_address,
1669         )));
1670         self.address_manager
1671             .mmio_bus
1672             .insert(
1673                 ged_device.clone(),
1674                 ged_address.0,
1675                 devices::acpi::GED_DEVICE_ACPI_SIZE as u64,
1676             )
1677             .map_err(DeviceManagerError::BusError)?;
1678         self.bus_devices
1679             .push(Arc::clone(&ged_device) as Arc<dyn BusDeviceSync>);
1680 
1681         let pm_timer_device = Arc::new(Mutex::new(devices::AcpiPmTimerDevice::new()));
1682 
1683         self.bus_devices
1684             .push(Arc::clone(&pm_timer_device) as Arc<dyn BusDeviceSync>);
1685 
1686         #[cfg(target_arch = "x86_64")]
1687         {
1688             let pm_timer_pio_address: u16 = 0x608;
1689 
1690             self.address_manager
1691                 .allocator
1692                 .lock()
1693                 .unwrap()
1694                 .allocate_io_addresses(Some(GuestAddress(pm_timer_pio_address.into())), 0x4, None)
1695                 .ok_or(DeviceManagerError::AllocateIoPort)?;
1696 
1697             self.address_manager
1698                 .io_bus
1699                 .insert(pm_timer_device, pm_timer_pio_address.into(), 0x4)
1700                 .map_err(DeviceManagerError::BusError)?;
1701 
1702             self.acpi_platform_addresses.pm_timer_address =
1703                 Some(GenericAddress::io_port_address::<u32>(pm_timer_pio_address));
1704         }
1705 
1706         Ok(Some(ged_device))
1707     }
1708 
1709     #[cfg(target_arch = "x86_64")]
1710     fn add_legacy_devices(&mut self, reset_evt: EventFd) -> DeviceManagerResult<()> {
1711         let vcpus_kill_signalled = self
1712             .cpu_manager
1713             .lock()
1714             .unwrap()
1715             .vcpus_kill_signalled()
1716             .clone();
1717         // Add a shutdown device (i8042)
1718         let i8042 = Arc::new(Mutex::new(devices::legacy::I8042Device::new(
1719             reset_evt.try_clone().unwrap(),
1720             vcpus_kill_signalled.clone(),
1721         )));
1722 
1723         self.bus_devices
1724             .push(Arc::clone(&i8042) as Arc<dyn BusDeviceSync>);
1725 
1726         self.address_manager
1727             .io_bus
1728             .insert(i8042, 0x61, 0x4)
1729             .map_err(DeviceManagerError::BusError)?;
1730         {
1731             // Add a CMOS emulated device
1732             let mem_size = self
1733                 .memory_manager
1734                 .lock()
1735                 .unwrap()
1736                 .guest_memory()
1737                 .memory()
1738                 .last_addr()
1739                 .0
1740                 + 1;
1741             let mem_below_4g = std::cmp::min(arch::layout::MEM_32BIT_RESERVED_START.0, mem_size);
1742             let mem_above_4g = mem_size.saturating_sub(arch::layout::RAM_64BIT_START.0);
1743 
1744             let cmos = Arc::new(Mutex::new(devices::legacy::Cmos::new(
1745                 mem_below_4g,
1746                 mem_above_4g,
1747                 reset_evt,
1748                 Some(vcpus_kill_signalled),
1749             )));
1750 
1751             self.bus_devices
1752                 .push(Arc::clone(&cmos) as Arc<dyn BusDeviceSync>);
1753 
1754             self.address_manager
1755                 .io_bus
1756                 .insert(cmos, 0x70, 0x2)
1757                 .map_err(DeviceManagerError::BusError)?;
1758 
1759             let fwdebug = Arc::new(Mutex::new(devices::legacy::FwDebugDevice::new()));
1760 
1761             self.bus_devices
1762                 .push(Arc::clone(&fwdebug) as Arc<dyn BusDeviceSync>);
1763 
1764             self.address_manager
1765                 .io_bus
1766                 .insert(fwdebug, 0x402, 0x1)
1767                 .map_err(DeviceManagerError::BusError)?;
1768         }
1769 
1770         // 0x80 debug port
1771         let debug_port = Arc::new(Mutex::new(devices::legacy::DebugPort::new(self.timestamp)));
1772         self.bus_devices
1773             .push(Arc::clone(&debug_port) as Arc<dyn BusDeviceSync>);
1774         self.address_manager
1775             .io_bus
1776             .insert(debug_port, 0x80, 0x1)
1777             .map_err(DeviceManagerError::BusError)?;
1778 
1779         Ok(())
1780     }
1781 
1782     #[cfg(target_arch = "aarch64")]
1783     fn add_legacy_devices(
1784         &mut self,
1785         interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>,
1786     ) -> DeviceManagerResult<()> {
1787         // Add a RTC device
1788         let rtc_irq = self
1789             .address_manager
1790             .allocator
1791             .lock()
1792             .unwrap()
1793             .allocate_irq()
1794             .unwrap();
1795 
1796         let interrupt_group = interrupt_manager
1797             .create_group(LegacyIrqGroupConfig {
1798                 irq: rtc_irq as InterruptIndex,
1799             })
1800             .map_err(DeviceManagerError::CreateInterruptGroup)?;
1801 
1802         let rtc_device = Arc::new(Mutex::new(devices::legacy::Rtc::new(interrupt_group)));
1803 
1804         self.bus_devices
1805             .push(Arc::clone(&rtc_device) as Arc<dyn BusDeviceSync>);
1806 
1807         let addr = arch::layout::LEGACY_RTC_MAPPED_IO_START;
1808 
1809         self.address_manager
1810             .mmio_bus
1811             .insert(rtc_device, addr.0, MMIO_LEN)
1812             .map_err(DeviceManagerError::BusError)?;
1813 
1814         self.id_to_dev_info.insert(
1815             (DeviceType::Rtc, "rtc".to_string()),
1816             MmioDeviceInfo {
1817                 addr: addr.0,
1818                 len: MMIO_LEN,
1819                 irq: rtc_irq,
1820             },
1821         );
1822 
1823         // Add a GPIO device
1824         let id = String::from(GPIO_DEVICE_NAME);
1825         let gpio_irq = self
1826             .address_manager
1827             .allocator
1828             .lock()
1829             .unwrap()
1830             .allocate_irq()
1831             .unwrap();
1832 
1833         let interrupt_group = interrupt_manager
1834             .create_group(LegacyIrqGroupConfig {
1835                 irq: gpio_irq as InterruptIndex,
1836             })
1837             .map_err(DeviceManagerError::CreateInterruptGroup)?;
1838 
1839         let gpio_device = Arc::new(Mutex::new(devices::legacy::Gpio::new(
1840             id.clone(),
1841             interrupt_group,
1842             state_from_id(self.snapshot.as_ref(), id.as_str())
1843                 .map_err(DeviceManagerError::RestoreGetState)?,
1844         )));
1845 
1846         self.bus_devices
1847             .push(Arc::clone(&gpio_device) as Arc<dyn BusDeviceSync>);
1848 
1849         let addr = arch::layout::LEGACY_GPIO_MAPPED_IO_START;
1850 
1851         self.address_manager
1852             .mmio_bus
1853             .insert(gpio_device.clone(), addr.0, MMIO_LEN)
1854             .map_err(DeviceManagerError::BusError)?;
1855 
1856         self.gpio_device = Some(gpio_device.clone());
1857 
1858         self.id_to_dev_info.insert(
1859             (DeviceType::Gpio, "gpio".to_string()),
1860             MmioDeviceInfo {
1861                 addr: addr.0,
1862                 len: MMIO_LEN,
1863                 irq: gpio_irq,
1864             },
1865         );
1866 
1867         self.device_tree
1868             .lock()
1869             .unwrap()
1870             .insert(id.clone(), device_node!(id, gpio_device));
1871 
1872         Ok(())
1873     }
1874 
1875     #[cfg(target_arch = "x86_64")]
1876     fn add_debug_console_device(
1877         &mut self,
1878         debug_console_writer: Box<dyn io::Write + Send>,
1879     ) -> DeviceManagerResult<Arc<Mutex<DebugConsole>>> {
1880         let id = String::from(DEBUGCON_DEVICE_NAME);
1881         let debug_console = Arc::new(Mutex::new(DebugConsole::new(
1882             id.clone(),
1883             debug_console_writer,
1884         )));
1885 
1886         let port = self
1887             .config
1888             .lock()
1889             .unwrap()
1890             .debug_console
1891             .clone()
1892             .iobase
1893             .map(|port| port as u64)
1894             .unwrap_or(debug_console::DEFAULT_PORT);
1895 
1896         self.bus_devices
1897             .push(Arc::clone(&debug_console) as Arc<dyn BusDeviceSync>);
1898 
1899         self.address_manager
1900             .allocator
1901             .lock()
1902             .unwrap()
1903             .allocate_io_addresses(Some(GuestAddress(port)), 0x1, None)
1904             .ok_or(DeviceManagerError::AllocateIoPort)?;
1905 
1906         self.address_manager
1907             .io_bus
1908             .insert(debug_console.clone(), port, 0x1)
1909             .map_err(DeviceManagerError::BusError)?;
1910 
1911         // Fill the device tree with a new node. In case of restore, we
1912         // know there is nothing to do, so we can simply override the
1913         // existing entry.
1914         self.device_tree
1915             .lock()
1916             .unwrap()
1917             .insert(id.clone(), device_node!(id, debug_console));
1918 
1919         Ok(debug_console)
1920     }
1921 
1922     #[cfg(target_arch = "x86_64")]
1923     fn add_serial_device(
1924         &mut self,
1925         interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>,
1926         serial_writer: Option<Box<dyn io::Write + Send>>,
1927     ) -> DeviceManagerResult<Arc<Mutex<Serial>>> {
1928         // Serial is tied to IRQ #4
1929         let serial_irq = 4;
1930 
1931         let id = String::from(SERIAL_DEVICE_NAME);
1932 
1933         let interrupt_group = interrupt_manager
1934             .create_group(LegacyIrqGroupConfig {
1935                 irq: serial_irq as InterruptIndex,
1936             })
1937             .map_err(DeviceManagerError::CreateInterruptGroup)?;
1938 
1939         let serial = Arc::new(Mutex::new(Serial::new(
1940             id.clone(),
1941             interrupt_group,
1942             serial_writer,
1943             state_from_id(self.snapshot.as_ref(), id.as_str())
1944                 .map_err(DeviceManagerError::RestoreGetState)?,
1945         )));
1946 
1947         self.bus_devices
1948             .push(Arc::clone(&serial) as Arc<dyn BusDeviceSync>);
1949 
1950         self.address_manager
1951             .allocator
1952             .lock()
1953             .unwrap()
1954             .allocate_io_addresses(Some(GuestAddress(0x3f8)), 0x8, None)
1955             .ok_or(DeviceManagerError::AllocateIoPort)?;
1956 
1957         self.address_manager
1958             .io_bus
1959             .insert(serial.clone(), 0x3f8, 0x8)
1960             .map_err(DeviceManagerError::BusError)?;
1961 
1962         // Fill the device tree with a new node. In case of restore, we
1963         // know there is nothing to do, so we can simply override the
1964         // existing entry.
1965         self.device_tree
1966             .lock()
1967             .unwrap()
1968             .insert(id.clone(), device_node!(id, serial));
1969 
1970         Ok(serial)
1971     }
1972 
1973     #[cfg(target_arch = "aarch64")]
1974     fn add_serial_device(
1975         &mut self,
1976         interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>,
1977         serial_writer: Option<Box<dyn io::Write + Send>>,
1978     ) -> DeviceManagerResult<Arc<Mutex<Pl011>>> {
1979         let id = String::from(SERIAL_DEVICE_NAME);
1980 
1981         let serial_irq = self
1982             .address_manager
1983             .allocator
1984             .lock()
1985             .unwrap()
1986             .allocate_irq()
1987             .unwrap();
1988 
1989         let interrupt_group = interrupt_manager
1990             .create_group(LegacyIrqGroupConfig {
1991                 irq: serial_irq as InterruptIndex,
1992             })
1993             .map_err(DeviceManagerError::CreateInterruptGroup)?;
1994 
1995         let serial = Arc::new(Mutex::new(devices::legacy::Pl011::new(
1996             id.clone(),
1997             interrupt_group,
1998             serial_writer,
1999             self.timestamp,
2000             state_from_id(self.snapshot.as_ref(), id.as_str())
2001                 .map_err(DeviceManagerError::RestoreGetState)?,
2002         )));
2003 
2004         self.bus_devices
2005             .push(Arc::clone(&serial) as Arc<dyn BusDeviceSync>);
2006 
2007         let addr = arch::layout::LEGACY_SERIAL_MAPPED_IO_START;
2008 
2009         self.address_manager
2010             .mmio_bus
2011             .insert(serial.clone(), addr.0, MMIO_LEN)
2012             .map_err(DeviceManagerError::BusError)?;
2013 
2014         self.id_to_dev_info.insert(
2015             (DeviceType::Serial, DeviceType::Serial.to_string()),
2016             MmioDeviceInfo {
2017                 addr: addr.0,
2018                 len: MMIO_LEN,
2019                 irq: serial_irq,
2020             },
2021         );
2022 
2023         self.cmdline_additions
2024             .push(format!("earlycon=pl011,mmio,0x{:08x}", addr.0));
2025 
2026         // Fill the device tree with a new node. In case of restore, we
2027         // know there is nothing to do, so we can simply override the
2028         // existing entry.
2029         self.device_tree
2030             .lock()
2031             .unwrap()
2032             .insert(id.clone(), device_node!(id, serial));
2033 
2034         Ok(serial)
2035     }
2036 
2037     #[cfg(target_arch = "riscv64")]
2038     fn add_serial_device(
2039         &mut self,
2040         interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>,
2041         serial_writer: Option<Box<dyn io::Write + Send>>,
2042     ) -> DeviceManagerResult<Arc<Mutex<Serial>>> {
2043         let id = String::from(SERIAL_DEVICE_NAME);
2044 
2045         let serial_irq = self
2046             .address_manager
2047             .allocator
2048             .lock()
2049             .unwrap()
2050             .allocate_irq()
2051             .unwrap();
2052 
2053         let interrupt_group = interrupt_manager
2054             .create_group(LegacyIrqGroupConfig {
2055                 irq: serial_irq as InterruptIndex,
2056             })
2057             .map_err(DeviceManagerError::CreateInterruptGroup)?;
2058 
2059         let serial = Arc::new(Mutex::new(Serial::new(
2060             id.clone(),
2061             interrupt_group,
2062             serial_writer,
2063             state_from_id(self.snapshot.as_ref(), id.as_str())
2064                 .map_err(DeviceManagerError::RestoreGetState)?,
2065         )));
2066 
2067         self.bus_devices
2068             .push(Arc::clone(&serial) as Arc<dyn BusDeviceSync>);
2069 
2070         let addr = arch::layout::LEGACY_SERIAL_MAPPED_IO_START;
2071 
2072         self.address_manager
2073             .mmio_bus
2074             .insert(serial.clone(), addr.0, MMIO_LEN)
2075             .map_err(DeviceManagerError::BusError)?;
2076 
2077         self.id_to_dev_info.insert(
2078             (DeviceType::Serial, DeviceType::Serial.to_string()),
2079             MmioDeviceInfo {
2080                 addr: addr.0,
2081                 len: MMIO_LEN,
2082                 irq: serial_irq,
2083             },
2084         );
2085 
2086         self.cmdline_additions
2087             .push(format!("earlycon=uart,mmio,0x{:08x}", addr.0));
2088 
2089         // Fill the device tree with a new node. In case of restore, we
2090         // know there is nothing to do, so we can simply override the
2091         // existing entry.
2092         self.device_tree
2093             .lock()
2094             .unwrap()
2095             .insert(id.clone(), device_node!(id, serial));
2096 
2097         Ok(serial)
2098     }
2099 
2100     fn add_virtio_console_device(
2101         &mut self,
2102         virtio_devices: &mut Vec<MetaVirtioDevice>,
2103         console_fd: ConsoleOutput,
2104         resize_pipe: Option<Arc<File>>,
2105     ) -> DeviceManagerResult<Option<Arc<virtio_devices::ConsoleResizer>>> {
2106         let console_config = self.config.lock().unwrap().console.clone();
2107         let endpoint = match console_fd {
2108             ConsoleOutput::File(file) => Endpoint::File(file),
2109             ConsoleOutput::Pty(file) => {
2110                 self.console_resize_pipe = resize_pipe;
2111                 Endpoint::PtyPair(Arc::new(file.try_clone().unwrap()), file)
2112             }
2113             ConsoleOutput::Tty(stdout) => {
2114                 if stdout.is_terminal() {
2115                     self.console_resize_pipe = resize_pipe;
2116                 }
2117 
2118                 // If an interactive TTY then we can accept input
2119                 // SAFETY: FFI call. Trivially safe.
2120                 if unsafe { libc::isatty(libc::STDIN_FILENO) == 1 } {
2121                     // SAFETY: FFI call to dup. Trivially safe.
2122                     let stdin = unsafe { libc::dup(libc::STDIN_FILENO) };
2123                     if stdin == -1 {
2124                         return vmm_sys_util::errno::errno_result()
2125                             .map_err(DeviceManagerError::DupFd);
2126                     }
2127                     // SAFETY: stdin is valid and owned solely by us.
2128                     let stdin = unsafe { File::from_raw_fd(stdin) };
2129                     Endpoint::FilePair(stdout, Arc::new(stdin))
2130                 } else {
2131                     Endpoint::File(stdout)
2132                 }
2133             }
2134             ConsoleOutput::Socket(_) => {
2135                 return Err(DeviceManagerError::NoSocketOptionSupportForConsoleDevice);
2136             }
2137             ConsoleOutput::Null => Endpoint::Null,
2138             ConsoleOutput::Off => return Ok(None),
2139         };
2140         let id = String::from(CONSOLE_DEVICE_NAME);
2141 
2142         let (virtio_console_device, console_resizer) = virtio_devices::Console::new(
2143             id.clone(),
2144             endpoint,
2145             self.console_resize_pipe
2146                 .as_ref()
2147                 .map(|p| p.try_clone().unwrap()),
2148             self.force_iommu | console_config.iommu,
2149             self.seccomp_action.clone(),
2150             self.exit_evt
2151                 .try_clone()
2152                 .map_err(DeviceManagerError::EventFd)?,
2153             state_from_id(self.snapshot.as_ref(), id.as_str())
2154                 .map_err(DeviceManagerError::RestoreGetState)?,
2155         )
2156         .map_err(DeviceManagerError::CreateVirtioConsole)?;
2157         let virtio_console_device = Arc::new(Mutex::new(virtio_console_device));
2158         virtio_devices.push(MetaVirtioDevice {
2159             virtio_device: Arc::clone(&virtio_console_device)
2160                 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2161             iommu: console_config.iommu,
2162             id: id.clone(),
2163             pci_segment: 0,
2164             dma_handler: None,
2165         });
2166 
2167         // Fill the device tree with a new node. In case of restore, we
2168         // know there is nothing to do, so we can simply override the
2169         // existing entry.
2170         self.device_tree
2171             .lock()
2172             .unwrap()
2173             .insert(id.clone(), device_node!(id, virtio_console_device));
2174 
2175         // Only provide a resizer (for SIGWINCH handling) if the console is attached to the TTY
2176         Ok(if matches!(console_config.mode, ConsoleOutputMode::Tty) {
2177             Some(console_resizer)
2178         } else {
2179             None
2180         })
2181     }
2182 
2183     /// Adds all devices that behave like a console with respect to the VM
2184     /// configuration. This includes:
2185     /// - debug-console
2186     /// - serial-console
2187     /// - virtio-console
2188     fn add_console_devices(
2189         &mut self,
2190         interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>,
2191         virtio_devices: &mut Vec<MetaVirtioDevice>,
2192         console_info: Option<ConsoleInfo>,
2193         console_resize_pipe: Option<Arc<File>>,
2194     ) -> DeviceManagerResult<Arc<Console>> {
2195         let serial_config = self.config.lock().unwrap().serial.clone();
2196         if console_info.is_none() {
2197             return Err(DeviceManagerError::InvalidConsoleInfo);
2198         }
2199 
2200         // SAFETY: console_info is Some, so it's safe to unwrap.
2201         let console_info = console_info.unwrap();
2202 
2203         let serial_writer: Option<Box<dyn io::Write + Send>> = match console_info.serial_main_fd {
2204             ConsoleOutput::File(ref file) | ConsoleOutput::Tty(ref file) => {
2205                 Some(Box::new(Arc::clone(file)))
2206             }
2207             ConsoleOutput::Off
2208             | ConsoleOutput::Null
2209             | ConsoleOutput::Pty(_)
2210             | ConsoleOutput::Socket(_) => None,
2211         };
2212 
2213         if !matches!(console_info.serial_main_fd, ConsoleOutput::Off) {
2214             let serial = self.add_serial_device(interrupt_manager, serial_writer)?;
2215             self.serial_manager = match console_info.serial_main_fd {
2216                 ConsoleOutput::Pty(_) | ConsoleOutput::Tty(_) | ConsoleOutput::Socket(_) => {
2217                     let serial_manager = SerialManager::new(
2218                         serial,
2219                         console_info.serial_main_fd,
2220                         serial_config.socket,
2221                     )
2222                     .map_err(DeviceManagerError::CreateSerialManager)?;
2223                     if let Some(mut serial_manager) = serial_manager {
2224                         serial_manager
2225                             .start_thread(
2226                                 self.exit_evt
2227                                     .try_clone()
2228                                     .map_err(DeviceManagerError::EventFd)?,
2229                             )
2230                             .map_err(DeviceManagerError::SpawnSerialManager)?;
2231                         Some(Arc::new(serial_manager))
2232                     } else {
2233                         None
2234                     }
2235                 }
2236                 _ => None,
2237             };
2238         }
2239 
2240         #[cfg(target_arch = "x86_64")]
2241         {
2242             let debug_console_writer: Option<Box<dyn io::Write + Send>> =
2243                 match console_info.debug_main_fd {
2244                     ConsoleOutput::File(file) | ConsoleOutput::Tty(file) => Some(Box::new(file)),
2245                     ConsoleOutput::Off
2246                     | ConsoleOutput::Null
2247                     | ConsoleOutput::Pty(_)
2248                     | ConsoleOutput::Socket(_) => None,
2249                 };
2250             if let Some(writer) = debug_console_writer {
2251                 let _ = self.add_debug_console_device(writer)?;
2252             }
2253         }
2254 
2255         let console_resizer = self.add_virtio_console_device(
2256             virtio_devices,
2257             console_info.console_main_fd,
2258             console_resize_pipe,
2259         )?;
2260 
2261         Ok(Arc::new(Console { console_resizer }))
2262     }
2263 
2264     #[cfg(not(target_arch = "riscv64"))]
2265     fn add_tpm_device(
2266         &mut self,
2267         tpm_path: PathBuf,
2268     ) -> DeviceManagerResult<Arc<Mutex<devices::tpm::Tpm>>> {
2269         // Create TPM Device
2270         let tpm = devices::tpm::Tpm::new(tpm_path.to_str().unwrap().to_string()).map_err(|e| {
2271             DeviceManagerError::CreateTpmDevice(anyhow!("Failed to create TPM Device : {:?}", e))
2272         })?;
2273         let tpm = Arc::new(Mutex::new(tpm));
2274 
2275         // Add TPM Device to mmio
2276         self.address_manager
2277             .mmio_bus
2278             .insert(
2279                 tpm.clone(),
2280                 arch::layout::TPM_START.0,
2281                 arch::layout::TPM_SIZE,
2282             )
2283             .map_err(DeviceManagerError::BusError)?;
2284 
2285         Ok(tpm)
2286     }
2287 
2288     fn make_virtio_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
2289         let mut devices: Vec<MetaVirtioDevice> = Vec::new();
2290 
2291         // Create "standard" virtio devices (net/block/rng)
2292         devices.append(&mut self.make_virtio_block_devices()?);
2293         devices.append(&mut self.make_virtio_net_devices()?);
2294         devices.append(&mut self.make_virtio_rng_devices()?);
2295 
2296         // Add virtio-fs if required
2297         devices.append(&mut self.make_virtio_fs_devices()?);
2298 
2299         // Add virtio-pmem if required
2300         devices.append(&mut self.make_virtio_pmem_devices()?);
2301 
2302         // Add virtio-vsock if required
2303         devices.append(&mut self.make_virtio_vsock_devices()?);
2304 
2305         devices.append(&mut self.make_virtio_mem_devices()?);
2306 
2307         // Add virtio-balloon if required
2308         devices.append(&mut self.make_virtio_balloon_devices()?);
2309 
2310         // Add virtio-watchdog device
2311         devices.append(&mut self.make_virtio_watchdog_devices()?);
2312 
2313         // Add vDPA devices if required
2314         devices.append(&mut self.make_vdpa_devices()?);
2315 
2316         Ok(devices)
2317     }
2318 
2319     // Cache whether aio is supported to avoid checking for very block device
2320     fn aio_is_supported(&mut self) -> bool {
2321         if let Some(supported) = self.aio_supported {
2322             return supported;
2323         }
2324 
2325         let supported = block_aio_is_supported();
2326         self.aio_supported = Some(supported);
2327         supported
2328     }
2329 
2330     // Cache whether io_uring is supported to avoid probing for very block device
2331     fn io_uring_is_supported(&mut self) -> bool {
2332         if let Some(supported) = self.io_uring_supported {
2333             return supported;
2334         }
2335 
2336         let supported = block_io_uring_is_supported();
2337         self.io_uring_supported = Some(supported);
2338         supported
2339     }
2340 
2341     fn make_virtio_block_device(
2342         &mut self,
2343         disk_cfg: &mut DiskConfig,
2344     ) -> DeviceManagerResult<MetaVirtioDevice> {
2345         let id = if let Some(id) = &disk_cfg.id {
2346             id.clone()
2347         } else {
2348             let id = self.next_device_name(DISK_DEVICE_NAME_PREFIX)?;
2349             disk_cfg.id = Some(id.clone());
2350             id
2351         };
2352 
2353         info!("Creating virtio-block device: {:?}", disk_cfg);
2354 
2355         let (virtio_device, migratable_device) = if disk_cfg.vhost_user {
2356             let socket = disk_cfg.vhost_socket.as_ref().unwrap().clone();
2357             let vu_cfg = VhostUserConfig {
2358                 socket,
2359                 num_queues: disk_cfg.num_queues,
2360                 queue_size: disk_cfg.queue_size,
2361             };
2362             let vhost_user_block = Arc::new(Mutex::new(
2363                 match virtio_devices::vhost_user::Blk::new(
2364                     id.clone(),
2365                     vu_cfg,
2366                     self.seccomp_action.clone(),
2367                     self.exit_evt
2368                         .try_clone()
2369                         .map_err(DeviceManagerError::EventFd)?,
2370                     self.force_iommu,
2371                     state_from_id(self.snapshot.as_ref(), id.as_str())
2372                         .map_err(DeviceManagerError::RestoreGetState)?,
2373                 ) {
2374                     Ok(vub_device) => vub_device,
2375                     Err(e) => {
2376                         return Err(DeviceManagerError::CreateVhostUserBlk(e));
2377                     }
2378                 },
2379             ));
2380 
2381             (
2382                 Arc::clone(&vhost_user_block) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2383                 vhost_user_block as Arc<Mutex<dyn Migratable>>,
2384             )
2385         } else {
2386             let mut options = OpenOptions::new();
2387             options.read(true);
2388             options.write(!disk_cfg.readonly);
2389             if disk_cfg.direct {
2390                 options.custom_flags(libc::O_DIRECT);
2391             }
2392             // Open block device path
2393             let mut file: File = options
2394                 .open(
2395                     disk_cfg
2396                         .path
2397                         .as_ref()
2398                         .ok_or(DeviceManagerError::NoDiskPath)?
2399                         .clone(),
2400                 )
2401                 .map_err(DeviceManagerError::Disk)?;
2402             let image_type =
2403                 detect_image_type(&mut file).map_err(DeviceManagerError::DetectImageType)?;
2404 
2405             let image = match image_type {
2406                 ImageType::FixedVhd => {
2407                     // Use asynchronous backend relying on io_uring if the
2408                     // syscalls are supported.
2409                     if cfg!(feature = "io_uring")
2410                         && !disk_cfg.disable_io_uring
2411                         && self.io_uring_is_supported()
2412                     {
2413                         info!("Using asynchronous fixed VHD disk file (io_uring)");
2414 
2415                         #[cfg(not(feature = "io_uring"))]
2416                         unreachable!("Checked in if statement above");
2417                         #[cfg(feature = "io_uring")]
2418                         {
2419                             Box::new(
2420                                 FixedVhdDiskAsync::new(file)
2421                                     .map_err(DeviceManagerError::CreateFixedVhdDiskAsync)?,
2422                             ) as Box<dyn DiskFile>
2423                         }
2424                     } else {
2425                         info!("Using synchronous fixed VHD disk file");
2426                         Box::new(
2427                             FixedVhdDiskSync::new(file)
2428                                 .map_err(DeviceManagerError::CreateFixedVhdDiskSync)?,
2429                         ) as Box<dyn DiskFile>
2430                     }
2431                 }
2432                 ImageType::Raw => {
2433                     // Use asynchronous backend relying on io_uring if the
2434                     // syscalls are supported.
2435                     if cfg!(feature = "io_uring")
2436                         && !disk_cfg.disable_io_uring
2437                         && self.io_uring_is_supported()
2438                     {
2439                         info!("Using asynchronous RAW disk file (io_uring)");
2440 
2441                         #[cfg(not(feature = "io_uring"))]
2442                         unreachable!("Checked in if statement above");
2443                         #[cfg(feature = "io_uring")]
2444                         {
2445                             Box::new(RawFileDisk::new(file)) as Box<dyn DiskFile>
2446                         }
2447                     } else if !disk_cfg.disable_aio && self.aio_is_supported() {
2448                         info!("Using asynchronous RAW disk file (aio)");
2449                         Box::new(RawFileDiskAio::new(file)) as Box<dyn DiskFile>
2450                     } else {
2451                         info!("Using synchronous RAW disk file");
2452                         Box::new(RawFileDiskSync::new(file)) as Box<dyn DiskFile>
2453                     }
2454                 }
2455                 ImageType::Qcow2 => {
2456                     info!("Using synchronous QCOW disk file");
2457                     Box::new(
2458                         QcowDiskSync::new(file, disk_cfg.direct)
2459                             .map_err(DeviceManagerError::CreateQcowDiskSync)?,
2460                     ) as Box<dyn DiskFile>
2461                 }
2462                 ImageType::Vhdx => {
2463                     info!("Using synchronous VHDX disk file");
2464                     Box::new(
2465                         VhdxDiskSync::new(file)
2466                             .map_err(DeviceManagerError::CreateFixedVhdxDiskSync)?,
2467                     ) as Box<dyn DiskFile>
2468                 }
2469             };
2470 
2471             let rate_limit_group =
2472                 if let Some(rate_limiter_cfg) = disk_cfg.rate_limiter_config.as_ref() {
2473                     // Create an anonymous RateLimiterGroup that is dropped when the Disk
2474                     // is dropped.
2475                     let bw = rate_limiter_cfg.bandwidth.unwrap_or_default();
2476                     let ops = rate_limiter_cfg.ops.unwrap_or_default();
2477                     let mut rate_limit_group = RateLimiterGroup::new(
2478                         disk_cfg.id.as_ref().unwrap(),
2479                         bw.size,
2480                         bw.one_time_burst.unwrap_or(0),
2481                         bw.refill_time,
2482                         ops.size,
2483                         ops.one_time_burst.unwrap_or(0),
2484                         ops.refill_time,
2485                     )
2486                     .map_err(DeviceManagerError::RateLimiterGroupCreate)?;
2487 
2488                     rate_limit_group
2489                         .start_thread(
2490                             self.exit_evt
2491                                 .try_clone()
2492                                 .map_err(DeviceManagerError::EventFd)?,
2493                         )
2494                         .unwrap();
2495 
2496                     Some(Arc::new(rate_limit_group))
2497                 } else if let Some(rate_limit_group) = disk_cfg.rate_limit_group.as_ref() {
2498                     self.rate_limit_groups.get(rate_limit_group).cloned()
2499                 } else {
2500                     None
2501                 };
2502 
2503             let queue_affinity = if let Some(queue_affinity) = disk_cfg.queue_affinity.as_ref() {
2504                 queue_affinity
2505                     .iter()
2506                     .map(|a| (a.queue_index, a.host_cpus.clone()))
2507                     .collect()
2508             } else {
2509                 BTreeMap::new()
2510             };
2511 
2512             let virtio_block = Arc::new(Mutex::new(
2513                 virtio_devices::Block::new(
2514                     id.clone(),
2515                     image,
2516                     disk_cfg
2517                         .path
2518                         .as_ref()
2519                         .ok_or(DeviceManagerError::NoDiskPath)?
2520                         .clone(),
2521                     disk_cfg.readonly,
2522                     self.force_iommu | disk_cfg.iommu,
2523                     disk_cfg.num_queues,
2524                     disk_cfg.queue_size,
2525                     disk_cfg.serial.clone(),
2526                     self.seccomp_action.clone(),
2527                     rate_limit_group,
2528                     self.exit_evt
2529                         .try_clone()
2530                         .map_err(DeviceManagerError::EventFd)?,
2531                     state_from_id(self.snapshot.as_ref(), id.as_str())
2532                         .map_err(DeviceManagerError::RestoreGetState)?,
2533                     queue_affinity,
2534                 )
2535                 .map_err(DeviceManagerError::CreateVirtioBlock)?,
2536             ));
2537 
2538             (
2539                 Arc::clone(&virtio_block) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2540                 virtio_block as Arc<Mutex<dyn Migratable>>,
2541             )
2542         };
2543 
2544         // Fill the device tree with a new node. In case of restore, we
2545         // know there is nothing to do, so we can simply override the
2546         // existing entry.
2547         self.device_tree
2548             .lock()
2549             .unwrap()
2550             .insert(id.clone(), device_node!(id, migratable_device));
2551 
2552         Ok(MetaVirtioDevice {
2553             virtio_device,
2554             iommu: disk_cfg.iommu,
2555             id,
2556             pci_segment: disk_cfg.pci_segment,
2557             dma_handler: None,
2558         })
2559     }
2560 
2561     fn make_virtio_block_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
2562         let mut devices = Vec::new();
2563 
2564         let mut block_devices = self.config.lock().unwrap().disks.clone();
2565         if let Some(disk_list_cfg) = &mut block_devices {
2566             for disk_cfg in disk_list_cfg.iter_mut() {
2567                 devices.push(self.make_virtio_block_device(disk_cfg)?);
2568             }
2569         }
2570         self.config.lock().unwrap().disks = block_devices;
2571 
2572         Ok(devices)
2573     }
2574 
2575     fn make_virtio_net_device(
2576         &mut self,
2577         net_cfg: &mut NetConfig,
2578     ) -> DeviceManagerResult<MetaVirtioDevice> {
2579         let id = if let Some(id) = &net_cfg.id {
2580             id.clone()
2581         } else {
2582             let id = self.next_device_name(NET_DEVICE_NAME_PREFIX)?;
2583             net_cfg.id = Some(id.clone());
2584             id
2585         };
2586         info!("Creating virtio-net device: {:?}", net_cfg);
2587 
2588         let (virtio_device, migratable_device) = if net_cfg.vhost_user {
2589             let socket = net_cfg.vhost_socket.as_ref().unwrap().clone();
2590             let vu_cfg = VhostUserConfig {
2591                 socket,
2592                 num_queues: net_cfg.num_queues,
2593                 queue_size: net_cfg.queue_size,
2594             };
2595             let server = match net_cfg.vhost_mode {
2596                 VhostMode::Client => false,
2597                 VhostMode::Server => true,
2598             };
2599             let vhost_user_net = Arc::new(Mutex::new(
2600                 match virtio_devices::vhost_user::Net::new(
2601                     id.clone(),
2602                     net_cfg.mac,
2603                     net_cfg.mtu,
2604                     vu_cfg,
2605                     server,
2606                     self.seccomp_action.clone(),
2607                     self.exit_evt
2608                         .try_clone()
2609                         .map_err(DeviceManagerError::EventFd)?,
2610                     self.force_iommu,
2611                     state_from_id(self.snapshot.as_ref(), id.as_str())
2612                         .map_err(DeviceManagerError::RestoreGetState)?,
2613                     net_cfg.offload_tso,
2614                     net_cfg.offload_ufo,
2615                     net_cfg.offload_csum,
2616                 ) {
2617                     Ok(vun_device) => vun_device,
2618                     Err(e) => {
2619                         return Err(DeviceManagerError::CreateVhostUserNet(e));
2620                     }
2621                 },
2622             ));
2623 
2624             (
2625                 Arc::clone(&vhost_user_net) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2626                 vhost_user_net as Arc<Mutex<dyn Migratable>>,
2627             )
2628         } else {
2629             let state = state_from_id(self.snapshot.as_ref(), id.as_str())
2630                 .map_err(DeviceManagerError::RestoreGetState)?;
2631             let virtio_net = if let Some(ref tap_if_name) = net_cfg.tap {
2632                 Arc::new(Mutex::new(
2633                     virtio_devices::Net::new(
2634                         id.clone(),
2635                         Some(tap_if_name),
2636                         Some(net_cfg.ip),
2637                         Some(net_cfg.mask),
2638                         Some(net_cfg.mac),
2639                         &mut net_cfg.host_mac,
2640                         net_cfg.mtu,
2641                         self.force_iommu | net_cfg.iommu,
2642                         net_cfg.num_queues,
2643                         net_cfg.queue_size,
2644                         self.seccomp_action.clone(),
2645                         net_cfg.rate_limiter_config,
2646                         self.exit_evt
2647                             .try_clone()
2648                             .map_err(DeviceManagerError::EventFd)?,
2649                         state,
2650                         net_cfg.offload_tso,
2651                         net_cfg.offload_ufo,
2652                         net_cfg.offload_csum,
2653                     )
2654                     .map_err(DeviceManagerError::CreateVirtioNet)?,
2655                 ))
2656             } else if let Some(fds) = &net_cfg.fds {
2657                 let net = virtio_devices::Net::from_tap_fds(
2658                     id.clone(),
2659                     fds,
2660                     Some(net_cfg.mac),
2661                     net_cfg.mtu,
2662                     self.force_iommu | net_cfg.iommu,
2663                     net_cfg.queue_size,
2664                     self.seccomp_action.clone(),
2665                     net_cfg.rate_limiter_config,
2666                     self.exit_evt
2667                         .try_clone()
2668                         .map_err(DeviceManagerError::EventFd)?,
2669                     state,
2670                     net_cfg.offload_tso,
2671                     net_cfg.offload_ufo,
2672                     net_cfg.offload_csum,
2673                 )
2674                 .map_err(DeviceManagerError::CreateVirtioNet)?;
2675 
2676                 // SAFETY: 'fds' are valid because TAP devices are created successfully
2677                 unsafe {
2678                     self.config.lock().unwrap().add_preserved_fds(fds.clone());
2679                 }
2680 
2681                 Arc::new(Mutex::new(net))
2682             } else {
2683                 Arc::new(Mutex::new(
2684                     virtio_devices::Net::new(
2685                         id.clone(),
2686                         None,
2687                         Some(net_cfg.ip),
2688                         Some(net_cfg.mask),
2689                         Some(net_cfg.mac),
2690                         &mut net_cfg.host_mac,
2691                         net_cfg.mtu,
2692                         self.force_iommu | net_cfg.iommu,
2693                         net_cfg.num_queues,
2694                         net_cfg.queue_size,
2695                         self.seccomp_action.clone(),
2696                         net_cfg.rate_limiter_config,
2697                         self.exit_evt
2698                             .try_clone()
2699                             .map_err(DeviceManagerError::EventFd)?,
2700                         state,
2701                         net_cfg.offload_tso,
2702                         net_cfg.offload_ufo,
2703                         net_cfg.offload_csum,
2704                     )
2705                     .map_err(DeviceManagerError::CreateVirtioNet)?,
2706                 ))
2707             };
2708 
2709             (
2710                 Arc::clone(&virtio_net) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2711                 virtio_net as Arc<Mutex<dyn Migratable>>,
2712             )
2713         };
2714 
2715         // Fill the device tree with a new node. In case of restore, we
2716         // know there is nothing to do, so we can simply override the
2717         // existing entry.
2718         self.device_tree
2719             .lock()
2720             .unwrap()
2721             .insert(id.clone(), device_node!(id, migratable_device));
2722 
2723         Ok(MetaVirtioDevice {
2724             virtio_device,
2725             iommu: net_cfg.iommu,
2726             id,
2727             pci_segment: net_cfg.pci_segment,
2728             dma_handler: None,
2729         })
2730     }
2731 
2732     /// Add virto-net and vhost-user-net devices
2733     fn make_virtio_net_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
2734         let mut devices = Vec::new();
2735         let mut net_devices = self.config.lock().unwrap().net.clone();
2736         if let Some(net_list_cfg) = &mut net_devices {
2737             for net_cfg in net_list_cfg.iter_mut() {
2738                 devices.push(self.make_virtio_net_device(net_cfg)?);
2739             }
2740         }
2741         self.config.lock().unwrap().net = net_devices;
2742 
2743         Ok(devices)
2744     }
2745 
2746     fn make_virtio_rng_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
2747         let mut devices = Vec::new();
2748 
2749         // Add virtio-rng if required
2750         let rng_config = self.config.lock().unwrap().rng.clone();
2751         if let Some(rng_path) = rng_config.src.to_str() {
2752             info!("Creating virtio-rng device: {:?}", rng_config);
2753             let id = String::from(RNG_DEVICE_NAME);
2754 
2755             let virtio_rng_device = Arc::new(Mutex::new(
2756                 virtio_devices::Rng::new(
2757                     id.clone(),
2758                     rng_path,
2759                     self.force_iommu | rng_config.iommu,
2760                     self.seccomp_action.clone(),
2761                     self.exit_evt
2762                         .try_clone()
2763                         .map_err(DeviceManagerError::EventFd)?,
2764                     state_from_id(self.snapshot.as_ref(), id.as_str())
2765                         .map_err(DeviceManagerError::RestoreGetState)?,
2766                 )
2767                 .map_err(DeviceManagerError::CreateVirtioRng)?,
2768             ));
2769             devices.push(MetaVirtioDevice {
2770                 virtio_device: Arc::clone(&virtio_rng_device)
2771                     as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2772                 iommu: rng_config.iommu,
2773                 id: id.clone(),
2774                 pci_segment: 0,
2775                 dma_handler: None,
2776             });
2777 
2778             // Fill the device tree with a new node. In case of restore, we
2779             // know there is nothing to do, so we can simply override the
2780             // existing entry.
2781             self.device_tree
2782                 .lock()
2783                 .unwrap()
2784                 .insert(id.clone(), device_node!(id, virtio_rng_device));
2785         }
2786 
2787         Ok(devices)
2788     }
2789 
2790     fn make_virtio_fs_device(
2791         &mut self,
2792         fs_cfg: &mut FsConfig,
2793     ) -> DeviceManagerResult<MetaVirtioDevice> {
2794         let id = if let Some(id) = &fs_cfg.id {
2795             id.clone()
2796         } else {
2797             let id = self.next_device_name(FS_DEVICE_NAME_PREFIX)?;
2798             fs_cfg.id = Some(id.clone());
2799             id
2800         };
2801 
2802         info!("Creating virtio-fs device: {:?}", fs_cfg);
2803 
2804         let mut node = device_node!(id);
2805 
2806         if let Some(fs_socket) = fs_cfg.socket.to_str() {
2807             let virtio_fs_device = Arc::new(Mutex::new(
2808                 virtio_devices::vhost_user::Fs::new(
2809                     id.clone(),
2810                     fs_socket,
2811                     &fs_cfg.tag,
2812                     fs_cfg.num_queues,
2813                     fs_cfg.queue_size,
2814                     None,
2815                     self.seccomp_action.clone(),
2816                     self.exit_evt
2817                         .try_clone()
2818                         .map_err(DeviceManagerError::EventFd)?,
2819                     self.force_iommu,
2820                     state_from_id(self.snapshot.as_ref(), id.as_str())
2821                         .map_err(DeviceManagerError::RestoreGetState)?,
2822                 )
2823                 .map_err(DeviceManagerError::CreateVirtioFs)?,
2824             ));
2825 
2826             // Update the device tree with the migratable device.
2827             node.migratable = Some(Arc::clone(&virtio_fs_device) as Arc<Mutex<dyn Migratable>>);
2828             self.device_tree.lock().unwrap().insert(id.clone(), node);
2829 
2830             Ok(MetaVirtioDevice {
2831                 virtio_device: Arc::clone(&virtio_fs_device)
2832                     as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2833                 iommu: false,
2834                 id,
2835                 pci_segment: fs_cfg.pci_segment,
2836                 dma_handler: None,
2837             })
2838         } else {
2839             Err(DeviceManagerError::NoVirtioFsSock)
2840         }
2841     }
2842 
2843     fn make_virtio_fs_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
2844         let mut devices = Vec::new();
2845 
2846         let mut fs_devices = self.config.lock().unwrap().fs.clone();
2847         if let Some(fs_list_cfg) = &mut fs_devices {
2848             for fs_cfg in fs_list_cfg.iter_mut() {
2849                 devices.push(self.make_virtio_fs_device(fs_cfg)?);
2850             }
2851         }
2852         self.config.lock().unwrap().fs = fs_devices;
2853 
2854         Ok(devices)
2855     }
2856 
2857     fn make_virtio_pmem_device(
2858         &mut self,
2859         pmem_cfg: &mut PmemConfig,
2860     ) -> DeviceManagerResult<MetaVirtioDevice> {
2861         let id = if let Some(id) = &pmem_cfg.id {
2862             id.clone()
2863         } else {
2864             let id = self.next_device_name(PMEM_DEVICE_NAME_PREFIX)?;
2865             pmem_cfg.id = Some(id.clone());
2866             id
2867         };
2868 
2869         info!("Creating virtio-pmem device: {:?}", pmem_cfg);
2870 
2871         let mut node = device_node!(id);
2872 
2873         // Look for the id in the device tree. If it can be found, that means
2874         // the device is being restored, otherwise it's created from scratch.
2875         let region_range = if let Some(node) = self.device_tree.lock().unwrap().get(&id) {
2876             info!("Restoring virtio-pmem {} resources", id);
2877 
2878             let mut region_range: Option<(u64, u64)> = None;
2879             for resource in node.resources.iter() {
2880                 match resource {
2881                     Resource::MmioAddressRange { base, size } => {
2882                         if region_range.is_some() {
2883                             return Err(DeviceManagerError::ResourceAlreadyExists);
2884                         }
2885 
2886                         region_range = Some((*base, *size));
2887                     }
2888                     _ => {
2889                         error!("Unexpected resource {:?} for {}", resource, id);
2890                     }
2891                 }
2892             }
2893 
2894             if region_range.is_none() {
2895                 return Err(DeviceManagerError::MissingVirtioPmemResources);
2896             }
2897 
2898             region_range
2899         } else {
2900             None
2901         };
2902 
2903         let (custom_flags, set_len) = if pmem_cfg.file.is_dir() {
2904             if pmem_cfg.size.is_none() {
2905                 return Err(DeviceManagerError::PmemWithDirectorySizeMissing);
2906             }
2907             (O_TMPFILE, true)
2908         } else {
2909             (0, false)
2910         };
2911 
2912         let mut file = OpenOptions::new()
2913             .read(true)
2914             .write(!pmem_cfg.discard_writes)
2915             .custom_flags(custom_flags)
2916             .open(&pmem_cfg.file)
2917             .map_err(DeviceManagerError::PmemFileOpen)?;
2918 
2919         let size = if let Some(size) = pmem_cfg.size {
2920             if set_len {
2921                 file.set_len(size)
2922                     .map_err(DeviceManagerError::PmemFileSetLen)?;
2923             }
2924             size
2925         } else {
2926             file.seek(SeekFrom::End(0))
2927                 .map_err(DeviceManagerError::PmemFileSetLen)?
2928         };
2929 
2930         if size % 0x20_0000 != 0 {
2931             return Err(DeviceManagerError::PmemSizeNotAligned);
2932         }
2933 
2934         let (region_base, region_size) = if let Some((base, size)) = region_range {
2935             // The memory needs to be 2MiB aligned in order to support
2936             // hugepages.
2937             self.pci_segments[pmem_cfg.pci_segment as usize]
2938                 .mem64_allocator
2939                 .lock()
2940                 .unwrap()
2941                 .allocate(
2942                     Some(GuestAddress(base)),
2943                     size as GuestUsize,
2944                     Some(0x0020_0000),
2945                 )
2946                 .ok_or(DeviceManagerError::PmemRangeAllocation)?;
2947 
2948             (base, size)
2949         } else {
2950             // The memory needs to be 2MiB aligned in order to support
2951             // hugepages.
2952             let base = self.pci_segments[pmem_cfg.pci_segment as usize]
2953                 .mem64_allocator
2954                 .lock()
2955                 .unwrap()
2956                 .allocate(None, size as GuestUsize, Some(0x0020_0000))
2957                 .ok_or(DeviceManagerError::PmemRangeAllocation)?;
2958 
2959             (base.raw_value(), size)
2960         };
2961 
2962         let cloned_file = file.try_clone().map_err(DeviceManagerError::CloneFile)?;
2963         let mmap_region = MmapRegion::build(
2964             Some(FileOffset::new(cloned_file, 0)),
2965             region_size as usize,
2966             PROT_READ | PROT_WRITE,
2967             MAP_NORESERVE
2968                 | if pmem_cfg.discard_writes {
2969                     MAP_PRIVATE
2970                 } else {
2971                     MAP_SHARED
2972                 },
2973         )
2974         .map_err(DeviceManagerError::NewMmapRegion)?;
2975         let host_addr: u64 = mmap_region.as_ptr() as u64;
2976 
2977         let mem_slot = self
2978             .memory_manager
2979             .lock()
2980             .unwrap()
2981             .create_userspace_mapping(region_base, region_size, host_addr, false, false, false)
2982             .map_err(DeviceManagerError::MemoryManager)?;
2983 
2984         let mapping = virtio_devices::UserspaceMapping {
2985             host_addr,
2986             mem_slot,
2987             addr: GuestAddress(region_base),
2988             len: region_size,
2989             mergeable: false,
2990         };
2991 
2992         let virtio_pmem_device = Arc::new(Mutex::new(
2993             virtio_devices::Pmem::new(
2994                 id.clone(),
2995                 file,
2996                 GuestAddress(region_base),
2997                 mapping,
2998                 mmap_region,
2999                 self.force_iommu | pmem_cfg.iommu,
3000                 self.seccomp_action.clone(),
3001                 self.exit_evt
3002                     .try_clone()
3003                     .map_err(DeviceManagerError::EventFd)?,
3004                 state_from_id(self.snapshot.as_ref(), id.as_str())
3005                     .map_err(DeviceManagerError::RestoreGetState)?,
3006             )
3007             .map_err(DeviceManagerError::CreateVirtioPmem)?,
3008         ));
3009 
3010         // Update the device tree with correct resource information and with
3011         // the migratable device.
3012         node.resources.push(Resource::MmioAddressRange {
3013             base: region_base,
3014             size: region_size,
3015         });
3016         node.migratable = Some(Arc::clone(&virtio_pmem_device) as Arc<Mutex<dyn Migratable>>);
3017         self.device_tree.lock().unwrap().insert(id.clone(), node);
3018 
3019         Ok(MetaVirtioDevice {
3020             virtio_device: Arc::clone(&virtio_pmem_device)
3021                 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
3022             iommu: pmem_cfg.iommu,
3023             id,
3024             pci_segment: pmem_cfg.pci_segment,
3025             dma_handler: None,
3026         })
3027     }
3028 
3029     fn make_virtio_pmem_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
3030         let mut devices = Vec::new();
3031         // Add virtio-pmem if required
3032         let mut pmem_devices = self.config.lock().unwrap().pmem.clone();
3033         if let Some(pmem_list_cfg) = &mut pmem_devices {
3034             for pmem_cfg in pmem_list_cfg.iter_mut() {
3035                 devices.push(self.make_virtio_pmem_device(pmem_cfg)?);
3036             }
3037         }
3038         self.config.lock().unwrap().pmem = pmem_devices;
3039 
3040         Ok(devices)
3041     }
3042 
3043     fn make_virtio_vsock_device(
3044         &mut self,
3045         vsock_cfg: &mut VsockConfig,
3046     ) -> DeviceManagerResult<MetaVirtioDevice> {
3047         let id = if let Some(id) = &vsock_cfg.id {
3048             id.clone()
3049         } else {
3050             let id = self.next_device_name(VSOCK_DEVICE_NAME_PREFIX)?;
3051             vsock_cfg.id = Some(id.clone());
3052             id
3053         };
3054 
3055         info!("Creating virtio-vsock device: {:?}", vsock_cfg);
3056 
3057         let socket_path = vsock_cfg
3058             .socket
3059             .to_str()
3060             .ok_or(DeviceManagerError::CreateVsockConvertPath)?;
3061         let backend =
3062             virtio_devices::vsock::VsockUnixBackend::new(vsock_cfg.cid, socket_path.to_string())
3063                 .map_err(DeviceManagerError::CreateVsockBackend)?;
3064 
3065         let vsock_device = Arc::new(Mutex::new(
3066             virtio_devices::Vsock::new(
3067                 id.clone(),
3068                 vsock_cfg.cid,
3069                 vsock_cfg.socket.clone(),
3070                 backend,
3071                 self.force_iommu | vsock_cfg.iommu,
3072                 self.seccomp_action.clone(),
3073                 self.exit_evt
3074                     .try_clone()
3075                     .map_err(DeviceManagerError::EventFd)?,
3076                 state_from_id(self.snapshot.as_ref(), id.as_str())
3077                     .map_err(DeviceManagerError::RestoreGetState)?,
3078             )
3079             .map_err(DeviceManagerError::CreateVirtioVsock)?,
3080         ));
3081 
3082         // Fill the device tree with a new node. In case of restore, we
3083         // know there is nothing to do, so we can simply override the
3084         // existing entry.
3085         self.device_tree
3086             .lock()
3087             .unwrap()
3088             .insert(id.clone(), device_node!(id, vsock_device));
3089 
3090         Ok(MetaVirtioDevice {
3091             virtio_device: Arc::clone(&vsock_device)
3092                 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
3093             iommu: vsock_cfg.iommu,
3094             id,
3095             pci_segment: vsock_cfg.pci_segment,
3096             dma_handler: None,
3097         })
3098     }
3099 
3100     fn make_virtio_vsock_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
3101         let mut devices = Vec::new();
3102 
3103         let mut vsock = self.config.lock().unwrap().vsock.clone();
3104         if let Some(ref mut vsock_cfg) = &mut vsock {
3105             devices.push(self.make_virtio_vsock_device(vsock_cfg)?);
3106         }
3107         self.config.lock().unwrap().vsock = vsock;
3108 
3109         Ok(devices)
3110     }
3111 
3112     fn make_virtio_mem_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
3113         let mut devices = Vec::new();
3114 
3115         let mm = self.memory_manager.clone();
3116         let mut mm = mm.lock().unwrap();
3117         for (memory_zone_id, memory_zone) in mm.memory_zones_mut().iter_mut() {
3118             if let Some(virtio_mem_zone) = memory_zone.virtio_mem_zone_mut() {
3119                 info!("Creating virtio-mem device: id = {}", memory_zone_id);
3120 
3121                 let node_id = numa_node_id_from_memory_zone_id(&self.numa_nodes, memory_zone_id)
3122                     .map(|i| i as u16);
3123 
3124                 let virtio_mem_device = Arc::new(Mutex::new(
3125                     virtio_devices::Mem::new(
3126                         memory_zone_id.clone(),
3127                         virtio_mem_zone.region(),
3128                         self.seccomp_action.clone(),
3129                         node_id,
3130                         virtio_mem_zone.hotplugged_size(),
3131                         virtio_mem_zone.hugepages(),
3132                         self.exit_evt
3133                             .try_clone()
3134                             .map_err(DeviceManagerError::EventFd)?,
3135                         virtio_mem_zone.blocks_state().clone(),
3136                         state_from_id(self.snapshot.as_ref(), memory_zone_id.as_str())
3137                             .map_err(DeviceManagerError::RestoreGetState)?,
3138                     )
3139                     .map_err(DeviceManagerError::CreateVirtioMem)?,
3140                 ));
3141 
3142                 // Update the virtio-mem zone so that it has a handle onto the
3143                 // virtio-mem device, which will be used for triggering a resize
3144                 // if needed.
3145                 virtio_mem_zone.set_virtio_device(Arc::clone(&virtio_mem_device));
3146 
3147                 self.virtio_mem_devices.push(Arc::clone(&virtio_mem_device));
3148 
3149                 devices.push(MetaVirtioDevice {
3150                     virtio_device: Arc::clone(&virtio_mem_device)
3151                         as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
3152                     iommu: false,
3153                     id: memory_zone_id.clone(),
3154                     pci_segment: 0,
3155                     dma_handler: None,
3156                 });
3157 
3158                 // Fill the device tree with a new node. In case of restore, we
3159                 // know there is nothing to do, so we can simply override the
3160                 // existing entry.
3161                 self.device_tree.lock().unwrap().insert(
3162                     memory_zone_id.clone(),
3163                     device_node!(memory_zone_id, virtio_mem_device),
3164                 );
3165             }
3166         }
3167 
3168         Ok(devices)
3169     }
3170 
3171     #[cfg(feature = "pvmemcontrol")]
3172     fn make_pvmemcontrol_device(
3173         &mut self,
3174     ) -> DeviceManagerResult<(
3175         Arc<PvmemcontrolBusDevice>,
3176         Arc<Mutex<PvmemcontrolPciDevice>>,
3177     )> {
3178         let id = String::from(PVMEMCONTROL_DEVICE_NAME);
3179         let pci_segment_id = 0x0_u16;
3180 
3181         let (pci_segment_id, pci_device_bdf, resources) =
3182             self.pci_resources(&id, pci_segment_id)?;
3183 
3184         info!("Creating pvmemcontrol device: id = {}", id);
3185         let (pvmemcontrol_pci_device, pvmemcontrol_bus_device) =
3186             devices::pvmemcontrol::PvmemcontrolDevice::make_device(
3187                 id.clone(),
3188                 self.memory_manager.lock().unwrap().guest_memory(),
3189             );
3190 
3191         let pvmemcontrol_pci_device = Arc::new(Mutex::new(pvmemcontrol_pci_device));
3192         let pvmemcontrol_bus_device = Arc::new(pvmemcontrol_bus_device);
3193 
3194         let new_resources = self.add_pci_device(
3195             pvmemcontrol_bus_device.clone(),
3196             pvmemcontrol_pci_device.clone(),
3197             pci_segment_id,
3198             pci_device_bdf,
3199             resources,
3200         )?;
3201 
3202         let mut node = device_node!(id, pvmemcontrol_pci_device);
3203 
3204         node.resources = new_resources;
3205         node.pci_bdf = Some(pci_device_bdf);
3206         node.pci_device_handle = None;
3207 
3208         self.device_tree.lock().unwrap().insert(id, node);
3209 
3210         Ok((pvmemcontrol_bus_device, pvmemcontrol_pci_device))
3211     }
3212 
3213     fn make_virtio_balloon_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
3214         let mut devices = Vec::new();
3215 
3216         if let Some(balloon_config) = &self.config.lock().unwrap().balloon {
3217             let id = String::from(BALLOON_DEVICE_NAME);
3218             info!("Creating virtio-balloon device: id = {}", id);
3219 
3220             let virtio_balloon_device = Arc::new(Mutex::new(
3221                 virtio_devices::Balloon::new(
3222                     id.clone(),
3223                     balloon_config.size,
3224                     balloon_config.deflate_on_oom,
3225                     balloon_config.free_page_reporting,
3226                     self.seccomp_action.clone(),
3227                     self.exit_evt
3228                         .try_clone()
3229                         .map_err(DeviceManagerError::EventFd)?,
3230                     state_from_id(self.snapshot.as_ref(), id.as_str())
3231                         .map_err(DeviceManagerError::RestoreGetState)?,
3232                 )
3233                 .map_err(DeviceManagerError::CreateVirtioBalloon)?,
3234             ));
3235 
3236             self.balloon = Some(virtio_balloon_device.clone());
3237 
3238             devices.push(MetaVirtioDevice {
3239                 virtio_device: Arc::clone(&virtio_balloon_device)
3240                     as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
3241                 iommu: false,
3242                 id: id.clone(),
3243                 pci_segment: 0,
3244                 dma_handler: None,
3245             });
3246 
3247             self.device_tree
3248                 .lock()
3249                 .unwrap()
3250                 .insert(id.clone(), device_node!(id, virtio_balloon_device));
3251         }
3252 
3253         Ok(devices)
3254     }
3255 
3256     fn make_virtio_watchdog_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
3257         let mut devices = Vec::new();
3258 
3259         if !self.config.lock().unwrap().watchdog {
3260             return Ok(devices);
3261         }
3262 
3263         let id = String::from(WATCHDOG_DEVICE_NAME);
3264         info!("Creating virtio-watchdog device: id = {}", id);
3265 
3266         let virtio_watchdog_device = Arc::new(Mutex::new(
3267             virtio_devices::Watchdog::new(
3268                 id.clone(),
3269                 self.reset_evt.try_clone().unwrap(),
3270                 self.seccomp_action.clone(),
3271                 self.exit_evt
3272                     .try_clone()
3273                     .map_err(DeviceManagerError::EventFd)?,
3274                 state_from_id(self.snapshot.as_ref(), id.as_str())
3275                     .map_err(DeviceManagerError::RestoreGetState)?,
3276             )
3277             .map_err(DeviceManagerError::CreateVirtioWatchdog)?,
3278         ));
3279         devices.push(MetaVirtioDevice {
3280             virtio_device: Arc::clone(&virtio_watchdog_device)
3281                 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
3282             iommu: false,
3283             id: id.clone(),
3284             pci_segment: 0,
3285             dma_handler: None,
3286         });
3287 
3288         self.device_tree
3289             .lock()
3290             .unwrap()
3291             .insert(id.clone(), device_node!(id, virtio_watchdog_device));
3292 
3293         Ok(devices)
3294     }
3295 
3296     fn make_vdpa_device(
3297         &mut self,
3298         vdpa_cfg: &mut VdpaConfig,
3299     ) -> DeviceManagerResult<MetaVirtioDevice> {
3300         let id = if let Some(id) = &vdpa_cfg.id {
3301             id.clone()
3302         } else {
3303             let id = self.next_device_name(VDPA_DEVICE_NAME_PREFIX)?;
3304             vdpa_cfg.id = Some(id.clone());
3305             id
3306         };
3307 
3308         info!("Creating vDPA device: {:?}", vdpa_cfg);
3309 
3310         let device_path = vdpa_cfg
3311             .path
3312             .to_str()
3313             .ok_or(DeviceManagerError::CreateVdpaConvertPath)?;
3314 
3315         let vdpa_device = Arc::new(Mutex::new(
3316             virtio_devices::Vdpa::new(
3317                 id.clone(),
3318                 device_path,
3319                 self.memory_manager.lock().unwrap().guest_memory(),
3320                 vdpa_cfg.num_queues as u16,
3321                 state_from_id(self.snapshot.as_ref(), id.as_str())
3322                     .map_err(DeviceManagerError::RestoreGetState)?,
3323             )
3324             .map_err(DeviceManagerError::CreateVdpa)?,
3325         ));
3326 
3327         // Create the DMA handler that is required by the vDPA device
3328         let vdpa_mapping = Arc::new(VdpaDmaMapping::new(
3329             Arc::clone(&vdpa_device),
3330             Arc::new(self.memory_manager.lock().unwrap().guest_memory()),
3331         ));
3332 
3333         self.device_tree
3334             .lock()
3335             .unwrap()
3336             .insert(id.clone(), device_node!(id, vdpa_device));
3337 
3338         Ok(MetaVirtioDevice {
3339             virtio_device: vdpa_device as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
3340             iommu: vdpa_cfg.iommu,
3341             id,
3342             pci_segment: vdpa_cfg.pci_segment,
3343             dma_handler: Some(vdpa_mapping),
3344         })
3345     }
3346 
3347     fn make_vdpa_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
3348         let mut devices = Vec::new();
3349         // Add vdpa if required
3350         let mut vdpa_devices = self.config.lock().unwrap().vdpa.clone();
3351         if let Some(vdpa_list_cfg) = &mut vdpa_devices {
3352             for vdpa_cfg in vdpa_list_cfg.iter_mut() {
3353                 devices.push(self.make_vdpa_device(vdpa_cfg)?);
3354             }
3355         }
3356         self.config.lock().unwrap().vdpa = vdpa_devices;
3357 
3358         Ok(devices)
3359     }
3360 
3361     fn next_device_name(&mut self, prefix: &str) -> DeviceManagerResult<String> {
3362         let start_id = self.device_id_cnt;
3363         loop {
3364             // Generate the temporary name.
3365             let name = format!("{}{}", prefix, self.device_id_cnt);
3366             // Increment the counter.
3367             self.device_id_cnt += Wrapping(1);
3368             // Check if the name is already in use.
3369             if !self.boot_id_list.contains(&name)
3370                 && !self.device_tree.lock().unwrap().contains_key(&name)
3371             {
3372                 return Ok(name);
3373             }
3374 
3375             if self.device_id_cnt == start_id {
3376                 // We went through a full loop and there's nothing else we can
3377                 // do.
3378                 break;
3379             }
3380         }
3381         Err(DeviceManagerError::NoAvailableDeviceName)
3382     }
3383 
3384     fn add_passthrough_device(
3385         &mut self,
3386         device_cfg: &mut DeviceConfig,
3387     ) -> DeviceManagerResult<(PciBdf, String)> {
3388         // If the passthrough device has not been created yet, it is created
3389         // here and stored in the DeviceManager structure for future needs.
3390         if self.passthrough_device.is_none() {
3391             self.passthrough_device = Some(
3392                 self.address_manager
3393                     .vm
3394                     .create_passthrough_device()
3395                     .map_err(|e| DeviceManagerError::CreatePassthroughDevice(e.into()))?,
3396             );
3397         }
3398 
3399         self.add_vfio_device(device_cfg)
3400     }
3401 
3402     fn create_vfio_container(&self) -> DeviceManagerResult<Arc<VfioContainer>> {
3403         let passthrough_device = self
3404             .passthrough_device
3405             .as_ref()
3406             .ok_or(DeviceManagerError::NoDevicePassthroughSupport)?;
3407 
3408         let dup = passthrough_device
3409             .try_clone()
3410             .map_err(DeviceManagerError::VfioCreate)?;
3411 
3412         Ok(Arc::new(
3413             VfioContainer::new(Some(Arc::new(dup))).map_err(DeviceManagerError::VfioCreate)?,
3414         ))
3415     }
3416 
3417     fn add_vfio_device(
3418         &mut self,
3419         device_cfg: &mut DeviceConfig,
3420     ) -> DeviceManagerResult<(PciBdf, String)> {
3421         let vfio_name = if let Some(id) = &device_cfg.id {
3422             id.clone()
3423         } else {
3424             let id = self.next_device_name(VFIO_DEVICE_NAME_PREFIX)?;
3425             device_cfg.id = Some(id.clone());
3426             id
3427         };
3428 
3429         let (pci_segment_id, pci_device_bdf, resources) =
3430             self.pci_resources(&vfio_name, device_cfg.pci_segment)?;
3431 
3432         let mut needs_dma_mapping = false;
3433 
3434         // Here we create a new VFIO container for two reasons. Either this is
3435         // the first VFIO device, meaning we need a new VFIO container, which
3436         // will be shared with other VFIO devices. Or the new VFIO device is
3437         // attached to a vIOMMU, meaning we must create a dedicated VFIO
3438         // container. In the vIOMMU use case, we can't let all devices under
3439         // the same VFIO container since we couldn't map/unmap memory for each
3440         // device. That's simply because the map/unmap operations happen at the
3441         // VFIO container level.
3442         let vfio_container = if device_cfg.iommu {
3443             let vfio_container = self.create_vfio_container()?;
3444 
3445             let vfio_mapping = Arc::new(VfioDmaMapping::new(
3446                 Arc::clone(&vfio_container),
3447                 Arc::new(self.memory_manager.lock().unwrap().guest_memory()),
3448                 Arc::clone(&self.mmio_regions),
3449             ));
3450 
3451             if let Some(iommu) = &self.iommu_device {
3452                 iommu
3453                     .lock()
3454                     .unwrap()
3455                     .add_external_mapping(pci_device_bdf.into(), vfio_mapping);
3456             } else {
3457                 return Err(DeviceManagerError::MissingVirtualIommu);
3458             }
3459 
3460             vfio_container
3461         } else if let Some(vfio_container) = &self.vfio_container {
3462             Arc::clone(vfio_container)
3463         } else {
3464             let vfio_container = self.create_vfio_container()?;
3465             needs_dma_mapping = true;
3466             self.vfio_container = Some(Arc::clone(&vfio_container));
3467 
3468             vfio_container
3469         };
3470 
3471         let vfio_device = VfioDevice::new(&device_cfg.path, Arc::clone(&vfio_container))
3472             .map_err(DeviceManagerError::VfioCreate)?;
3473 
3474         if needs_dma_mapping {
3475             // Register DMA mapping in IOMMU.
3476             // Do not register virtio-mem regions, as they are handled directly by
3477             // virtio-mem device itself.
3478             for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() {
3479                 for region in zone.regions() {
3480                     vfio_container
3481                         .vfio_dma_map(
3482                             region.start_addr().raw_value(),
3483                             region.len(),
3484                             region.as_ptr() as u64,
3485                         )
3486                         .map_err(DeviceManagerError::VfioDmaMap)?;
3487                 }
3488             }
3489 
3490             let vfio_mapping = Arc::new(VfioDmaMapping::new(
3491                 Arc::clone(&vfio_container),
3492                 Arc::new(self.memory_manager.lock().unwrap().guest_memory()),
3493                 Arc::clone(&self.mmio_regions),
3494             ));
3495 
3496             for virtio_mem_device in self.virtio_mem_devices.iter() {
3497                 virtio_mem_device
3498                     .lock()
3499                     .unwrap()
3500                     .add_dma_mapping_handler(
3501                         VirtioMemMappingSource::Container,
3502                         vfio_mapping.clone(),
3503                     )
3504                     .map_err(DeviceManagerError::AddDmaMappingHandlerVirtioMem)?;
3505             }
3506         }
3507 
3508         let legacy_interrupt_group =
3509             if let Some(legacy_interrupt_manager) = &self.legacy_interrupt_manager {
3510                 Some(
3511                     legacy_interrupt_manager
3512                         .create_group(LegacyIrqGroupConfig {
3513                             irq: self.pci_segments[pci_segment_id as usize].pci_irq_slots
3514                                 [pci_device_bdf.device() as usize]
3515                                 as InterruptIndex,
3516                         })
3517                         .map_err(DeviceManagerError::CreateInterruptGroup)?,
3518                 )
3519             } else {
3520                 None
3521             };
3522 
3523         let memory_manager = self.memory_manager.clone();
3524 
3525         let vfio_pci_device = VfioPciDevice::new(
3526             vfio_name.clone(),
3527             &self.address_manager.vm,
3528             vfio_device,
3529             vfio_container,
3530             self.msi_interrupt_manager.clone(),
3531             legacy_interrupt_group,
3532             device_cfg.iommu,
3533             pci_device_bdf,
3534             memory_manager.lock().unwrap().memory_slot_allocator(),
3535             vm_migration::snapshot_from_id(self.snapshot.as_ref(), vfio_name.as_str()),
3536             device_cfg.x_nv_gpudirect_clique,
3537             device_cfg.path.clone(),
3538         )
3539         .map_err(DeviceManagerError::VfioPciCreate)?;
3540 
3541         let vfio_pci_device = Arc::new(Mutex::new(vfio_pci_device));
3542 
3543         let new_resources = self.add_pci_device(
3544             vfio_pci_device.clone(),
3545             vfio_pci_device.clone(),
3546             pci_segment_id,
3547             pci_device_bdf,
3548             resources,
3549         )?;
3550 
3551         vfio_pci_device
3552             .lock()
3553             .unwrap()
3554             .map_mmio_regions()
3555             .map_err(DeviceManagerError::VfioMapRegion)?;
3556 
3557         for mmio_region in vfio_pci_device.lock().unwrap().mmio_regions() {
3558             self.mmio_regions.lock().unwrap().push(mmio_region);
3559         }
3560 
3561         let mut node = device_node!(vfio_name, vfio_pci_device);
3562 
3563         // Update the device tree with correct resource information.
3564         node.resources = new_resources;
3565         node.pci_bdf = Some(pci_device_bdf);
3566         node.pci_device_handle = Some(PciDeviceHandle::Vfio(vfio_pci_device));
3567 
3568         self.device_tree
3569             .lock()
3570             .unwrap()
3571             .insert(vfio_name.clone(), node);
3572 
3573         Ok((pci_device_bdf, vfio_name))
3574     }
3575 
3576     fn add_pci_device(
3577         &mut self,
3578         bus_device: Arc<dyn BusDeviceSync>,
3579         pci_device: Arc<Mutex<dyn PciDevice>>,
3580         segment_id: u16,
3581         bdf: PciBdf,
3582         resources: Option<Vec<Resource>>,
3583     ) -> DeviceManagerResult<Vec<Resource>> {
3584         let bars = pci_device
3585             .lock()
3586             .unwrap()
3587             .allocate_bars(
3588                 &self.address_manager.allocator,
3589                 &mut self.pci_segments[segment_id as usize]
3590                     .mem32_allocator
3591                     .lock()
3592                     .unwrap(),
3593                 &mut self.pci_segments[segment_id as usize]
3594                     .mem64_allocator
3595                     .lock()
3596                     .unwrap(),
3597                 resources,
3598             )
3599             .map_err(DeviceManagerError::AllocateBars)?;
3600 
3601         let mut pci_bus = self.pci_segments[segment_id as usize]
3602             .pci_bus
3603             .lock()
3604             .unwrap();
3605 
3606         pci_bus
3607             .add_device(bdf.device() as u32, pci_device)
3608             .map_err(DeviceManagerError::AddPciDevice)?;
3609 
3610         self.bus_devices.push(Arc::clone(&bus_device));
3611 
3612         pci_bus
3613             .register_mapping(
3614                 bus_device,
3615                 self.address_manager.io_bus.as_ref(),
3616                 self.address_manager.mmio_bus.as_ref(),
3617                 bars.clone(),
3618             )
3619             .map_err(DeviceManagerError::AddPciDevice)?;
3620 
3621         let mut new_resources = Vec::new();
3622         for bar in bars {
3623             new_resources.push(Resource::PciBar {
3624                 index: bar.idx(),
3625                 base: bar.addr(),
3626                 size: bar.size(),
3627                 type_: bar.region_type().into(),
3628                 prefetchable: bar.prefetchable().into(),
3629             });
3630         }
3631 
3632         Ok(new_resources)
3633     }
3634 
3635     fn add_vfio_devices(&mut self) -> DeviceManagerResult<Vec<PciBdf>> {
3636         let mut iommu_attached_device_ids = Vec::new();
3637         let mut devices = self.config.lock().unwrap().devices.clone();
3638 
3639         if let Some(device_list_cfg) = &mut devices {
3640             for device_cfg in device_list_cfg.iter_mut() {
3641                 let (device_id, _) = self.add_passthrough_device(device_cfg)?;
3642                 if device_cfg.iommu && self.iommu_device.is_some() {
3643                     iommu_attached_device_ids.push(device_id);
3644                 }
3645             }
3646         }
3647 
3648         // Update the list of devices
3649         self.config.lock().unwrap().devices = devices;
3650 
3651         Ok(iommu_attached_device_ids)
3652     }
3653 
3654     fn add_vfio_user_device(
3655         &mut self,
3656         device_cfg: &mut UserDeviceConfig,
3657     ) -> DeviceManagerResult<(PciBdf, String)> {
3658         let vfio_user_name = if let Some(id) = &device_cfg.id {
3659             id.clone()
3660         } else {
3661             let id = self.next_device_name(VFIO_USER_DEVICE_NAME_PREFIX)?;
3662             device_cfg.id = Some(id.clone());
3663             id
3664         };
3665 
3666         let (pci_segment_id, pci_device_bdf, resources) =
3667             self.pci_resources(&vfio_user_name, device_cfg.pci_segment)?;
3668 
3669         let legacy_interrupt_group =
3670             if let Some(legacy_interrupt_manager) = &self.legacy_interrupt_manager {
3671                 Some(
3672                     legacy_interrupt_manager
3673                         .create_group(LegacyIrqGroupConfig {
3674                             irq: self.pci_segments[pci_segment_id as usize].pci_irq_slots
3675                                 [pci_device_bdf.device() as usize]
3676                                 as InterruptIndex,
3677                         })
3678                         .map_err(DeviceManagerError::CreateInterruptGroup)?,
3679                 )
3680             } else {
3681                 None
3682             };
3683 
3684         let client = Arc::new(Mutex::new(
3685             vfio_user::Client::new(&device_cfg.socket)
3686                 .map_err(DeviceManagerError::VfioUserCreateClient)?,
3687         ));
3688 
3689         let memory_manager = self.memory_manager.clone();
3690 
3691         let mut vfio_user_pci_device = VfioUserPciDevice::new(
3692             vfio_user_name.clone(),
3693             &self.address_manager.vm,
3694             client.clone(),
3695             self.msi_interrupt_manager.clone(),
3696             legacy_interrupt_group,
3697             pci_device_bdf,
3698             memory_manager.lock().unwrap().memory_slot_allocator(),
3699             vm_migration::snapshot_from_id(self.snapshot.as_ref(), vfio_user_name.as_str()),
3700         )
3701         .map_err(DeviceManagerError::VfioUserCreate)?;
3702 
3703         let memory = self.memory_manager.lock().unwrap().guest_memory();
3704         let vfio_user_mapping = Arc::new(VfioUserDmaMapping::new(client, Arc::new(memory)));
3705         for virtio_mem_device in self.virtio_mem_devices.iter() {
3706             virtio_mem_device
3707                 .lock()
3708                 .unwrap()
3709                 .add_dma_mapping_handler(
3710                     VirtioMemMappingSource::Device(pci_device_bdf.into()),
3711                     vfio_user_mapping.clone(),
3712                 )
3713                 .map_err(DeviceManagerError::AddDmaMappingHandlerVirtioMem)?;
3714         }
3715 
3716         for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() {
3717             for region in zone.regions() {
3718                 vfio_user_pci_device
3719                     .dma_map(region)
3720                     .map_err(DeviceManagerError::VfioUserDmaMap)?;
3721             }
3722         }
3723 
3724         let vfio_user_pci_device = Arc::new(Mutex::new(vfio_user_pci_device));
3725 
3726         let new_resources = self.add_pci_device(
3727             vfio_user_pci_device.clone(),
3728             vfio_user_pci_device.clone(),
3729             pci_segment_id,
3730             pci_device_bdf,
3731             resources,
3732         )?;
3733 
3734         // Note it is required to call 'add_pci_device()' in advance to have the list of
3735         // mmio regions provisioned correctly
3736         vfio_user_pci_device
3737             .lock()
3738             .unwrap()
3739             .map_mmio_regions()
3740             .map_err(DeviceManagerError::VfioUserMapRegion)?;
3741 
3742         let mut node = device_node!(vfio_user_name, vfio_user_pci_device);
3743 
3744         // Update the device tree with correct resource information.
3745         node.resources = new_resources;
3746         node.pci_bdf = Some(pci_device_bdf);
3747         node.pci_device_handle = Some(PciDeviceHandle::VfioUser(vfio_user_pci_device));
3748 
3749         self.device_tree
3750             .lock()
3751             .unwrap()
3752             .insert(vfio_user_name.clone(), node);
3753 
3754         Ok((pci_device_bdf, vfio_user_name))
3755     }
3756 
3757     fn add_user_devices(&mut self) -> DeviceManagerResult<Vec<PciBdf>> {
3758         let mut user_devices = self.config.lock().unwrap().user_devices.clone();
3759 
3760         if let Some(device_list_cfg) = &mut user_devices {
3761             for device_cfg in device_list_cfg.iter_mut() {
3762                 let (_device_id, _id) = self.add_vfio_user_device(device_cfg)?;
3763             }
3764         }
3765 
3766         // Update the list of devices
3767         self.config.lock().unwrap().user_devices = user_devices;
3768 
3769         Ok(vec![])
3770     }
3771 
3772     fn add_virtio_pci_device(
3773         &mut self,
3774         virtio_device: Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
3775         iommu_mapping: &Option<Arc<IommuMapping>>,
3776         virtio_device_id: String,
3777         pci_segment_id: u16,
3778         dma_handler: Option<Arc<dyn ExternalDmaMapping>>,
3779     ) -> DeviceManagerResult<PciBdf> {
3780         let id = format!("{VIRTIO_PCI_DEVICE_NAME_PREFIX}-{virtio_device_id}");
3781 
3782         // Add the new virtio-pci node to the device tree.
3783         let mut node = device_node!(id);
3784         node.children = vec![virtio_device_id.clone()];
3785 
3786         let (pci_segment_id, pci_device_bdf, resources) =
3787             self.pci_resources(&id, pci_segment_id)?;
3788 
3789         // Update the existing virtio node by setting the parent.
3790         if let Some(node) = self.device_tree.lock().unwrap().get_mut(&virtio_device_id) {
3791             node.parent = Some(id.clone());
3792         } else {
3793             return Err(DeviceManagerError::MissingNode);
3794         }
3795 
3796         // Allows support for one MSI-X vector per queue. It also adds 1
3797         // as we need to take into account the dedicated vector to notify
3798         // about a virtio config change.
3799         let msix_num = (virtio_device.lock().unwrap().queue_max_sizes().len() + 1) as u16;
3800 
3801         // Create the AccessPlatform trait from the implementation IommuMapping.
3802         // This will provide address translation for any virtio device sitting
3803         // behind a vIOMMU.
3804         let mut access_platform: Option<Arc<dyn AccessPlatform>> = None;
3805 
3806         if let Some(mapping) = iommu_mapping {
3807             access_platform = Some(Arc::new(AccessPlatformMapping::new(
3808                 pci_device_bdf.into(),
3809                 mapping.clone(),
3810             )));
3811         }
3812 
3813         // If SEV-SNP is enabled create the AccessPlatform from SevSnpPageAccessProxy
3814         #[cfg(feature = "sev_snp")]
3815         if self.config.lock().unwrap().is_sev_snp_enabled() {
3816             access_platform = Some(Arc::new(SevSnpPageAccessProxy::new(
3817                 self.address_manager.vm.clone(),
3818             )));
3819         }
3820 
3821         let memory = self.memory_manager.lock().unwrap().guest_memory();
3822 
3823         // Map DMA ranges if a DMA handler is available and if the device is
3824         // not attached to a virtual IOMMU.
3825         if let Some(dma_handler) = &dma_handler {
3826             if iommu_mapping.is_some() {
3827                 if let Some(iommu) = &self.iommu_device {
3828                     iommu
3829                         .lock()
3830                         .unwrap()
3831                         .add_external_mapping(pci_device_bdf.into(), dma_handler.clone());
3832                 } else {
3833                     return Err(DeviceManagerError::MissingVirtualIommu);
3834                 }
3835             } else {
3836                 // Let every virtio-mem device handle the DMA map/unmap through the
3837                 // DMA handler provided.
3838                 for virtio_mem_device in self.virtio_mem_devices.iter() {
3839                     virtio_mem_device
3840                         .lock()
3841                         .unwrap()
3842                         .add_dma_mapping_handler(
3843                             VirtioMemMappingSource::Device(pci_device_bdf.into()),
3844                             dma_handler.clone(),
3845                         )
3846                         .map_err(DeviceManagerError::AddDmaMappingHandlerVirtioMem)?;
3847                 }
3848 
3849                 // Do not register virtio-mem regions, as they are handled directly by
3850                 // virtio-mem devices.
3851                 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() {
3852                     for region in zone.regions() {
3853                         let gpa = region.start_addr().0;
3854                         let size = region.len();
3855                         dma_handler
3856                             .map(gpa, gpa, size)
3857                             .map_err(DeviceManagerError::VirtioDmaMap)?;
3858                     }
3859                 }
3860             }
3861         }
3862 
3863         let device_type = virtio_device.lock().unwrap().device_type();
3864         let virtio_pci_device = Arc::new(Mutex::new(
3865             VirtioPciDevice::new(
3866                 id.clone(),
3867                 memory,
3868                 virtio_device,
3869                 msix_num,
3870                 access_platform,
3871                 &self.msi_interrupt_manager,
3872                 pci_device_bdf.into(),
3873                 self.activate_evt
3874                     .try_clone()
3875                     .map_err(DeviceManagerError::EventFd)?,
3876                 // All device types *except* virtio block devices should be allocated a 64-bit bar
3877                 // The block devices should be given a 32-bit BAR so that they are easily accessible
3878                 // to firmware without requiring excessive identity mapping.
3879                 // The exception being if not on the default PCI segment.
3880                 pci_segment_id > 0 || device_type != VirtioDeviceType::Block as u32,
3881                 dma_handler,
3882                 self.pending_activations.clone(),
3883                 vm_migration::snapshot_from_id(self.snapshot.as_ref(), id.as_str()),
3884             )
3885             .map_err(DeviceManagerError::VirtioDevice)?,
3886         ));
3887 
3888         let new_resources = self.add_pci_device(
3889             virtio_pci_device.clone(),
3890             virtio_pci_device.clone(),
3891             pci_segment_id,
3892             pci_device_bdf,
3893             resources,
3894         )?;
3895 
3896         let bar_addr = virtio_pci_device.lock().unwrap().config_bar_addr();
3897         for (event, addr) in virtio_pci_device.lock().unwrap().ioeventfds(bar_addr) {
3898             let io_addr = IoEventAddress::Mmio(addr);
3899             self.address_manager
3900                 .vm
3901                 .register_ioevent(event, &io_addr, None)
3902                 .map_err(|e| DeviceManagerError::RegisterIoevent(e.into()))?;
3903         }
3904 
3905         // Update the device tree with correct resource information.
3906         node.resources = new_resources;
3907         node.migratable = Some(Arc::clone(&virtio_pci_device) as Arc<Mutex<dyn Migratable>>);
3908         node.pci_bdf = Some(pci_device_bdf);
3909         node.pci_device_handle = Some(PciDeviceHandle::Virtio(virtio_pci_device));
3910         self.device_tree.lock().unwrap().insert(id, node);
3911 
3912         Ok(pci_device_bdf)
3913     }
3914 
3915     fn add_pvpanic_device(
3916         &mut self,
3917     ) -> DeviceManagerResult<Option<Arc<Mutex<devices::PvPanicDevice>>>> {
3918         let id = String::from(PVPANIC_DEVICE_NAME);
3919         let pci_segment_id = 0x0_u16;
3920 
3921         info!("Creating pvpanic device {}", id);
3922 
3923         let (pci_segment_id, pci_device_bdf, resources) =
3924             self.pci_resources(&id, pci_segment_id)?;
3925 
3926         let snapshot = snapshot_from_id(self.snapshot.as_ref(), id.as_str());
3927 
3928         let pvpanic_device = devices::PvPanicDevice::new(id.clone(), snapshot)
3929             .map_err(DeviceManagerError::PvPanicCreate)?;
3930 
3931         let pvpanic_device = Arc::new(Mutex::new(pvpanic_device));
3932 
3933         let new_resources = self.add_pci_device(
3934             pvpanic_device.clone(),
3935             pvpanic_device.clone(),
3936             pci_segment_id,
3937             pci_device_bdf,
3938             resources,
3939         )?;
3940 
3941         let mut node = device_node!(id, pvpanic_device);
3942 
3943         node.resources = new_resources;
3944         node.pci_bdf = Some(pci_device_bdf);
3945         node.pci_device_handle = None;
3946 
3947         self.device_tree.lock().unwrap().insert(id, node);
3948 
3949         Ok(Some(pvpanic_device))
3950     }
3951 
3952     fn pci_resources(
3953         &self,
3954         id: &str,
3955         pci_segment_id: u16,
3956     ) -> DeviceManagerResult<(u16, PciBdf, Option<Vec<Resource>>)> {
3957         // Look for the id in the device tree. If it can be found, that means
3958         // the device is being restored, otherwise it's created from scratch.
3959         let (pci_device_bdf, resources) =
3960             if let Some(node) = self.device_tree.lock().unwrap().get(id) {
3961                 info!("Restoring virtio-pci {} resources", id);
3962                 let pci_device_bdf: PciBdf = node
3963                     .pci_bdf
3964                     .ok_or(DeviceManagerError::MissingDeviceNodePciBdf)?;
3965                 (Some(pci_device_bdf), Some(node.resources.clone()))
3966             } else {
3967                 (None, None)
3968             };
3969 
3970         Ok(if let Some(pci_device_bdf) = pci_device_bdf {
3971             let pci_segment_id = pci_device_bdf.segment();
3972 
3973             self.pci_segments[pci_segment_id as usize]
3974                 .pci_bus
3975                 .lock()
3976                 .unwrap()
3977                 .get_device_id(pci_device_bdf.device() as usize)
3978                 .map_err(DeviceManagerError::GetPciDeviceId)?;
3979 
3980             (pci_segment_id, pci_device_bdf, resources)
3981         } else {
3982             let pci_device_bdf = self.pci_segments[pci_segment_id as usize].next_device_bdf()?;
3983 
3984             (pci_segment_id, pci_device_bdf, None)
3985         })
3986     }
3987 
3988     #[cfg(target_arch = "x86_64")]
3989     pub fn io_bus(&self) -> &Arc<Bus> {
3990         &self.address_manager.io_bus
3991     }
3992 
3993     pub fn mmio_bus(&self) -> &Arc<Bus> {
3994         &self.address_manager.mmio_bus
3995     }
3996 
3997     pub fn allocator(&self) -> &Arc<Mutex<SystemAllocator>> {
3998         &self.address_manager.allocator
3999     }
4000 
4001     pub fn interrupt_controller(&self) -> Option<Arc<Mutex<dyn InterruptController>>> {
4002         self.interrupt_controller
4003             .as_ref()
4004             .map(|ic| ic.clone() as Arc<Mutex<dyn InterruptController>>)
4005     }
4006 
4007     pub(crate) fn pci_segments(&self) -> &Vec<PciSegment> {
4008         &self.pci_segments
4009     }
4010 
4011     #[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))]
4012     pub fn cmdline_additions(&self) -> &[String] {
4013         self.cmdline_additions.as_slice()
4014     }
4015 
4016     pub fn update_memory(&self, new_region: &Arc<GuestRegionMmap>) -> DeviceManagerResult<()> {
4017         for handle in self.virtio_devices.iter() {
4018             handle
4019                 .virtio_device
4020                 .lock()
4021                 .unwrap()
4022                 .add_memory_region(new_region)
4023                 .map_err(DeviceManagerError::UpdateMemoryForVirtioDevice)?;
4024 
4025             if let Some(dma_handler) = &handle.dma_handler {
4026                 if !handle.iommu {
4027                     let gpa = new_region.start_addr().0;
4028                     let size = new_region.len();
4029                     dma_handler
4030                         .map(gpa, gpa, size)
4031                         .map_err(DeviceManagerError::VirtioDmaMap)?;
4032                 }
4033             }
4034         }
4035 
4036         // Take care of updating the memory for VFIO PCI devices.
4037         if let Some(vfio_container) = &self.vfio_container {
4038             vfio_container
4039                 .vfio_dma_map(
4040                     new_region.start_addr().raw_value(),
4041                     new_region.len(),
4042                     new_region.as_ptr() as u64,
4043                 )
4044                 .map_err(DeviceManagerError::UpdateMemoryForVfioPciDevice)?;
4045         }
4046 
4047         // Take care of updating the memory for vfio-user devices.
4048         {
4049             let device_tree = self.device_tree.lock().unwrap();
4050             for pci_device_node in device_tree.pci_devices() {
4051                 if let PciDeviceHandle::VfioUser(vfio_user_pci_device) = pci_device_node
4052                     .pci_device_handle
4053                     .as_ref()
4054                     .ok_or(DeviceManagerError::MissingPciDevice)?
4055                 {
4056                     vfio_user_pci_device
4057                         .lock()
4058                         .unwrap()
4059                         .dma_map(new_region)
4060                         .map_err(DeviceManagerError::UpdateMemoryForVfioUserPciDevice)?;
4061                 }
4062             }
4063         }
4064 
4065         Ok(())
4066     }
4067 
4068     pub fn activate_virtio_devices(&self) -> DeviceManagerResult<()> {
4069         for mut activator in self.pending_activations.lock().unwrap().drain(..) {
4070             activator
4071                 .activate()
4072                 .map_err(DeviceManagerError::VirtioActivate)?;
4073         }
4074         Ok(())
4075     }
4076 
4077     pub fn notify_hotplug(
4078         &self,
4079         _notification_type: AcpiNotificationFlags,
4080     ) -> DeviceManagerResult<()> {
4081         return self
4082             .ged_notification_device
4083             .as_ref()
4084             .unwrap()
4085             .lock()
4086             .unwrap()
4087             .notify(_notification_type)
4088             .map_err(DeviceManagerError::HotPlugNotification);
4089     }
4090 
4091     pub fn add_device(
4092         &mut self,
4093         device_cfg: &mut DeviceConfig,
4094     ) -> DeviceManagerResult<PciDeviceInfo> {
4095         self.validate_identifier(&device_cfg.id)?;
4096 
4097         if device_cfg.iommu && !self.is_iommu_segment(device_cfg.pci_segment) {
4098             return Err(DeviceManagerError::InvalidIommuHotplug);
4099         }
4100 
4101         let (bdf, device_name) = self.add_passthrough_device(device_cfg)?;
4102 
4103         // Update the PCIU bitmap
4104         self.pci_segments[device_cfg.pci_segment as usize].pci_devices_up |= 1 << bdf.device();
4105 
4106         Ok(PciDeviceInfo {
4107             id: device_name,
4108             bdf,
4109         })
4110     }
4111 
4112     pub fn add_user_device(
4113         &mut self,
4114         device_cfg: &mut UserDeviceConfig,
4115     ) -> DeviceManagerResult<PciDeviceInfo> {
4116         self.validate_identifier(&device_cfg.id)?;
4117 
4118         let (bdf, device_name) = self.add_vfio_user_device(device_cfg)?;
4119 
4120         // Update the PCIU bitmap
4121         self.pci_segments[device_cfg.pci_segment as usize].pci_devices_up |= 1 << bdf.device();
4122 
4123         Ok(PciDeviceInfo {
4124             id: device_name,
4125             bdf,
4126         })
4127     }
4128 
4129     pub fn remove_device(&mut self, id: String) -> DeviceManagerResult<()> {
4130         // The node can be directly a PCI node in case the 'id' refers to a
4131         // VFIO device or a virtio-pci one.
4132         // In case the 'id' refers to a virtio device, we must find the PCI
4133         // node by looking at the parent.
4134         let device_tree = self.device_tree.lock().unwrap();
4135         let node = device_tree
4136             .get(&id)
4137             .ok_or(DeviceManagerError::UnknownDeviceId(id))?;
4138 
4139         let pci_device_node = if node.pci_bdf.is_some() && node.pci_device_handle.is_some() {
4140             node
4141         } else {
4142             let parent = node
4143                 .parent
4144                 .as_ref()
4145                 .ok_or(DeviceManagerError::MissingNode)?;
4146             device_tree
4147                 .get(parent)
4148                 .ok_or(DeviceManagerError::MissingNode)?
4149         };
4150 
4151         let pci_device_bdf: PciBdf = pci_device_node
4152             .pci_bdf
4153             .ok_or(DeviceManagerError::MissingDeviceNodePciBdf)?;
4154         let pci_segment_id = pci_device_bdf.segment();
4155 
4156         let pci_device_handle = pci_device_node
4157             .pci_device_handle
4158             .as_ref()
4159             .ok_or(DeviceManagerError::MissingPciDevice)?;
4160         #[allow(irrefutable_let_patterns)]
4161         if let PciDeviceHandle::Virtio(virtio_pci_device) = pci_device_handle {
4162             let device_type = VirtioDeviceType::from(
4163                 virtio_pci_device
4164                     .lock()
4165                     .unwrap()
4166                     .virtio_device()
4167                     .lock()
4168                     .unwrap()
4169                     .device_type(),
4170             );
4171             match device_type {
4172                 VirtioDeviceType::Net
4173                 | VirtioDeviceType::Block
4174                 | VirtioDeviceType::Pmem
4175                 | VirtioDeviceType::Fs
4176                 | VirtioDeviceType::Vsock => {}
4177                 _ => return Err(DeviceManagerError::RemovalNotAllowed(device_type)),
4178             }
4179         }
4180 
4181         // Update the PCID bitmap
4182         self.pci_segments[pci_segment_id as usize].pci_devices_down |= 1 << pci_device_bdf.device();
4183 
4184         Ok(())
4185     }
4186 
4187     pub fn eject_device(&mut self, pci_segment_id: u16, device_id: u8) -> DeviceManagerResult<()> {
4188         info!(
4189             "Ejecting device_id = {} on segment_id={}",
4190             device_id, pci_segment_id
4191         );
4192 
4193         // Convert the device ID into the corresponding b/d/f.
4194         let pci_device_bdf = PciBdf::new(pci_segment_id, 0, device_id, 0);
4195 
4196         // Give the PCI device ID back to the PCI bus.
4197         self.pci_segments[pci_segment_id as usize]
4198             .pci_bus
4199             .lock()
4200             .unwrap()
4201             .put_device_id(device_id as usize)
4202             .map_err(DeviceManagerError::PutPciDeviceId)?;
4203 
4204         let (pci_device_handle, id) = {
4205             // Remove the device from the device tree along with its children.
4206             let mut device_tree = self.device_tree.lock().unwrap();
4207             let pci_device_node = device_tree
4208                 .remove_node_by_pci_bdf(pci_device_bdf)
4209                 .ok_or(DeviceManagerError::MissingPciDevice)?;
4210 
4211             // For VFIO and vfio-user the PCI device id is the id.
4212             // For virtio we overwrite it later as we want the id of the
4213             // underlying device.
4214             let mut id = pci_device_node.id;
4215             let pci_device_handle = pci_device_node
4216                 .pci_device_handle
4217                 .ok_or(DeviceManagerError::MissingPciDevice)?;
4218             if matches!(pci_device_handle, PciDeviceHandle::Virtio(_)) {
4219                 // The virtio-pci device has a single child
4220                 if !pci_device_node.children.is_empty() {
4221                     assert_eq!(pci_device_node.children.len(), 1);
4222                     let child_id = &pci_device_node.children[0];
4223                     id.clone_from(child_id);
4224                 }
4225             }
4226             for child in pci_device_node.children.iter() {
4227                 device_tree.remove(child);
4228             }
4229 
4230             (pci_device_handle, id)
4231         };
4232 
4233         let mut iommu_attached = false;
4234         if let Some((_, iommu_attached_devices)) = &self.iommu_attached_devices {
4235             if iommu_attached_devices.contains(&pci_device_bdf) {
4236                 iommu_attached = true;
4237             }
4238         }
4239 
4240         let (pci_device, bus_device, virtio_device, remove_dma_handler) = match pci_device_handle {
4241             // No need to remove any virtio-mem mapping here as the container outlives all devices
4242             PciDeviceHandle::Vfio(vfio_pci_device) => {
4243                 for mmio_region in vfio_pci_device.lock().unwrap().mmio_regions() {
4244                     self.mmio_regions
4245                         .lock()
4246                         .unwrap()
4247                         .retain(|x| x.start != mmio_region.start)
4248                 }
4249 
4250                 (
4251                     Arc::clone(&vfio_pci_device) as Arc<Mutex<dyn PciDevice>>,
4252                     Arc::clone(&vfio_pci_device) as Arc<dyn BusDeviceSync>,
4253                     None as Option<Arc<Mutex<dyn virtio_devices::VirtioDevice>>>,
4254                     false,
4255                 )
4256             }
4257             PciDeviceHandle::Virtio(virtio_pci_device) => {
4258                 let dev = virtio_pci_device.lock().unwrap();
4259                 let bar_addr = dev.config_bar_addr();
4260                 for (event, addr) in dev.ioeventfds(bar_addr) {
4261                     let io_addr = IoEventAddress::Mmio(addr);
4262                     self.address_manager
4263                         .vm
4264                         .unregister_ioevent(event, &io_addr)
4265                         .map_err(|e| DeviceManagerError::UnRegisterIoevent(e.into()))?;
4266                 }
4267 
4268                 if let Some(dma_handler) = dev.dma_handler() {
4269                     if !iommu_attached {
4270                         for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() {
4271                             for region in zone.regions() {
4272                                 let iova = region.start_addr().0;
4273                                 let size = region.len();
4274                                 dma_handler
4275                                     .unmap(iova, size)
4276                                     .map_err(DeviceManagerError::VirtioDmaUnmap)?;
4277                             }
4278                         }
4279                     }
4280                 }
4281 
4282                 (
4283                     Arc::clone(&virtio_pci_device) as Arc<Mutex<dyn PciDevice>>,
4284                     Arc::clone(&virtio_pci_device) as Arc<dyn BusDeviceSync>,
4285                     Some(dev.virtio_device()),
4286                     dev.dma_handler().is_some() && !iommu_attached,
4287                 )
4288             }
4289             PciDeviceHandle::VfioUser(vfio_user_pci_device) => {
4290                 let mut dev = vfio_user_pci_device.lock().unwrap();
4291                 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() {
4292                     for region in zone.regions() {
4293                         dev.dma_unmap(region)
4294                             .map_err(DeviceManagerError::VfioUserDmaUnmap)?;
4295                     }
4296                 }
4297 
4298                 (
4299                     Arc::clone(&vfio_user_pci_device) as Arc<Mutex<dyn PciDevice>>,
4300                     Arc::clone(&vfio_user_pci_device) as Arc<dyn BusDeviceSync>,
4301                     None as Option<Arc<Mutex<dyn virtio_devices::VirtioDevice>>>,
4302                     true,
4303                 )
4304             }
4305         };
4306 
4307         if remove_dma_handler {
4308             for virtio_mem_device in self.virtio_mem_devices.iter() {
4309                 virtio_mem_device
4310                     .lock()
4311                     .unwrap()
4312                     .remove_dma_mapping_handler(VirtioMemMappingSource::Device(
4313                         pci_device_bdf.into(),
4314                     ))
4315                     .map_err(DeviceManagerError::RemoveDmaMappingHandlerVirtioMem)?;
4316             }
4317         }
4318 
4319         // Free the allocated BARs
4320         pci_device
4321             .lock()
4322             .unwrap()
4323             .free_bars(
4324                 &mut self.address_manager.allocator.lock().unwrap(),
4325                 &mut self.pci_segments[pci_segment_id as usize]
4326                     .mem32_allocator
4327                     .lock()
4328                     .unwrap(),
4329                 &mut self.pci_segments[pci_segment_id as usize]
4330                     .mem64_allocator
4331                     .lock()
4332                     .unwrap(),
4333             )
4334             .map_err(DeviceManagerError::FreePciBars)?;
4335 
4336         // Remove the device from the PCI bus
4337         self.pci_segments[pci_segment_id as usize]
4338             .pci_bus
4339             .lock()
4340             .unwrap()
4341             .remove_by_device(&pci_device)
4342             .map_err(DeviceManagerError::RemoveDeviceFromPciBus)?;
4343 
4344         #[cfg(target_arch = "x86_64")]
4345         // Remove the device from the IO bus
4346         self.io_bus()
4347             .remove_by_device(&bus_device)
4348             .map_err(DeviceManagerError::RemoveDeviceFromIoBus)?;
4349 
4350         // Remove the device from the MMIO bus
4351         self.mmio_bus()
4352             .remove_by_device(&bus_device)
4353             .map_err(DeviceManagerError::RemoveDeviceFromMmioBus)?;
4354 
4355         // Remove the device from the list of BusDevice held by the
4356         // DeviceManager.
4357         self.bus_devices
4358             .retain(|dev| !Arc::ptr_eq(dev, &bus_device));
4359 
4360         // Shutdown and remove the underlying virtio-device if present
4361         if let Some(virtio_device) = virtio_device {
4362             for mapping in virtio_device.lock().unwrap().userspace_mappings() {
4363                 self.memory_manager
4364                     .lock()
4365                     .unwrap()
4366                     .remove_userspace_mapping(
4367                         mapping.addr.raw_value(),
4368                         mapping.len,
4369                         mapping.host_addr,
4370                         mapping.mergeable,
4371                         mapping.mem_slot,
4372                     )
4373                     .map_err(DeviceManagerError::MemoryManager)?;
4374             }
4375 
4376             virtio_device.lock().unwrap().shutdown();
4377 
4378             self.virtio_devices
4379                 .retain(|handler| !Arc::ptr_eq(&handler.virtio_device, &virtio_device));
4380         }
4381 
4382         event!(
4383             "vm",
4384             "device-removed",
4385             "id",
4386             &id,
4387             "bdf",
4388             pci_device_bdf.to_string()
4389         );
4390 
4391         // At this point, the device has been removed from all the list and
4392         // buses where it was stored. At the end of this function, after
4393         // any_device, bus_device and pci_device are released, the actual
4394         // device will be dropped.
4395         Ok(())
4396     }
4397 
4398     fn hotplug_virtio_pci_device(
4399         &mut self,
4400         handle: MetaVirtioDevice,
4401     ) -> DeviceManagerResult<PciDeviceInfo> {
4402         // Add the virtio device to the device manager list. This is important
4403         // as the list is used to notify virtio devices about memory updates
4404         // for instance.
4405         self.virtio_devices.push(handle.clone());
4406 
4407         let mapping: Option<Arc<IommuMapping>> = if handle.iommu {
4408             self.iommu_mapping.clone()
4409         } else {
4410             None
4411         };
4412 
4413         let bdf = self.add_virtio_pci_device(
4414             handle.virtio_device,
4415             &mapping,
4416             handle.id.clone(),
4417             handle.pci_segment,
4418             handle.dma_handler,
4419         )?;
4420 
4421         // Update the PCIU bitmap
4422         self.pci_segments[handle.pci_segment as usize].pci_devices_up |= 1 << bdf.device();
4423 
4424         Ok(PciDeviceInfo { id: handle.id, bdf })
4425     }
4426 
4427     fn is_iommu_segment(&self, pci_segment_id: u16) -> bool {
4428         self.config
4429             .lock()
4430             .as_ref()
4431             .unwrap()
4432             .platform
4433             .as_ref()
4434             .map(|pc| {
4435                 pc.iommu_segments
4436                     .as_ref()
4437                     .map(|v| v.contains(&pci_segment_id))
4438                     .unwrap_or_default()
4439             })
4440             .unwrap_or_default()
4441     }
4442 
4443     pub fn add_disk(&mut self, disk_cfg: &mut DiskConfig) -> DeviceManagerResult<PciDeviceInfo> {
4444         self.validate_identifier(&disk_cfg.id)?;
4445 
4446         if disk_cfg.iommu && !self.is_iommu_segment(disk_cfg.pci_segment) {
4447             return Err(DeviceManagerError::InvalidIommuHotplug);
4448         }
4449 
4450         let device = self.make_virtio_block_device(disk_cfg)?;
4451         self.hotplug_virtio_pci_device(device)
4452     }
4453 
4454     pub fn add_fs(&mut self, fs_cfg: &mut FsConfig) -> DeviceManagerResult<PciDeviceInfo> {
4455         self.validate_identifier(&fs_cfg.id)?;
4456 
4457         let device = self.make_virtio_fs_device(fs_cfg)?;
4458         self.hotplug_virtio_pci_device(device)
4459     }
4460 
4461     pub fn add_pmem(&mut self, pmem_cfg: &mut PmemConfig) -> DeviceManagerResult<PciDeviceInfo> {
4462         self.validate_identifier(&pmem_cfg.id)?;
4463 
4464         if pmem_cfg.iommu && !self.is_iommu_segment(pmem_cfg.pci_segment) {
4465             return Err(DeviceManagerError::InvalidIommuHotplug);
4466         }
4467 
4468         let device = self.make_virtio_pmem_device(pmem_cfg)?;
4469         self.hotplug_virtio_pci_device(device)
4470     }
4471 
4472     pub fn add_net(&mut self, net_cfg: &mut NetConfig) -> DeviceManagerResult<PciDeviceInfo> {
4473         self.validate_identifier(&net_cfg.id)?;
4474 
4475         if net_cfg.iommu && !self.is_iommu_segment(net_cfg.pci_segment) {
4476             return Err(DeviceManagerError::InvalidIommuHotplug);
4477         }
4478 
4479         let device = self.make_virtio_net_device(net_cfg)?;
4480         self.hotplug_virtio_pci_device(device)
4481     }
4482 
4483     pub fn add_vdpa(&mut self, vdpa_cfg: &mut VdpaConfig) -> DeviceManagerResult<PciDeviceInfo> {
4484         self.validate_identifier(&vdpa_cfg.id)?;
4485 
4486         if vdpa_cfg.iommu && !self.is_iommu_segment(vdpa_cfg.pci_segment) {
4487             return Err(DeviceManagerError::InvalidIommuHotplug);
4488         }
4489 
4490         let device = self.make_vdpa_device(vdpa_cfg)?;
4491         self.hotplug_virtio_pci_device(device)
4492     }
4493 
4494     pub fn add_vsock(&mut self, vsock_cfg: &mut VsockConfig) -> DeviceManagerResult<PciDeviceInfo> {
4495         self.validate_identifier(&vsock_cfg.id)?;
4496 
4497         if vsock_cfg.iommu && !self.is_iommu_segment(vsock_cfg.pci_segment) {
4498             return Err(DeviceManagerError::InvalidIommuHotplug);
4499         }
4500 
4501         let device = self.make_virtio_vsock_device(vsock_cfg)?;
4502         self.hotplug_virtio_pci_device(device)
4503     }
4504 
4505     pub fn counters(&self) -> HashMap<String, HashMap<&'static str, Wrapping<u64>>> {
4506         let mut counters = HashMap::new();
4507 
4508         for handle in &self.virtio_devices {
4509             let virtio_device = handle.virtio_device.lock().unwrap();
4510             if let Some(device_counters) = virtio_device.counters() {
4511                 counters.insert(handle.id.clone(), device_counters.clone());
4512             }
4513         }
4514 
4515         counters
4516     }
4517 
4518     pub fn resize_balloon(&mut self, size: u64) -> DeviceManagerResult<()> {
4519         if let Some(balloon) = &self.balloon {
4520             return balloon
4521                 .lock()
4522                 .unwrap()
4523                 .resize(size)
4524                 .map_err(DeviceManagerError::VirtioBalloonResize);
4525         }
4526 
4527         warn!("No balloon setup: Can't resize the balloon");
4528         Err(DeviceManagerError::MissingVirtioBalloon)
4529     }
4530 
4531     pub fn balloon_size(&self) -> u64 {
4532         if let Some(balloon) = &self.balloon {
4533             return balloon.lock().unwrap().get_actual();
4534         }
4535 
4536         0
4537     }
4538 
4539     pub fn device_tree(&self) -> Arc<Mutex<DeviceTree>> {
4540         self.device_tree.clone()
4541     }
4542 
4543     #[cfg(target_arch = "x86_64")]
4544     pub fn notify_power_button(&self) -> DeviceManagerResult<()> {
4545         self.ged_notification_device
4546             .as_ref()
4547             .unwrap()
4548             .lock()
4549             .unwrap()
4550             .notify(AcpiNotificationFlags::POWER_BUTTON_CHANGED)
4551             .map_err(DeviceManagerError::PowerButtonNotification)
4552     }
4553 
4554     #[cfg(target_arch = "aarch64")]
4555     pub fn notify_power_button(&self) -> DeviceManagerResult<()> {
4556         // There are two use cases:
4557         // 1. Users will use direct kernel boot with device tree.
4558         // 2. Users will use ACPI+UEFI boot.
4559 
4560         // Trigger a GPIO pin 3 event to satisfy use case 1.
4561         self.gpio_device
4562             .as_ref()
4563             .unwrap()
4564             .lock()
4565             .unwrap()
4566             .trigger_key(3)
4567             .map_err(DeviceManagerError::AArch64PowerButtonNotification)?;
4568         // Trigger a GED power button event to satisfy use case 2.
4569         return self
4570             .ged_notification_device
4571             .as_ref()
4572             .unwrap()
4573             .lock()
4574             .unwrap()
4575             .notify(AcpiNotificationFlags::POWER_BUTTON_CHANGED)
4576             .map_err(DeviceManagerError::PowerButtonNotification);
4577     }
4578 
4579     pub fn iommu_attached_devices(&self) -> &Option<(PciBdf, Vec<PciBdf>)> {
4580         &self.iommu_attached_devices
4581     }
4582 
4583     fn validate_identifier(&self, id: &Option<String>) -> DeviceManagerResult<()> {
4584         if let Some(id) = id {
4585             if id.starts_with("__") {
4586                 return Err(DeviceManagerError::InvalidIdentifier(id.clone()));
4587             }
4588 
4589             if self.device_tree.lock().unwrap().contains_key(id) {
4590                 return Err(DeviceManagerError::IdentifierNotUnique(id.clone()));
4591             }
4592         }
4593 
4594         Ok(())
4595     }
4596 
4597     #[cfg(not(target_arch = "riscv64"))]
4598     pub(crate) fn acpi_platform_addresses(&self) -> &AcpiPlatformAddresses {
4599         &self.acpi_platform_addresses
4600     }
4601 }
4602 
4603 fn numa_node_id_from_memory_zone_id(numa_nodes: &NumaNodes, memory_zone_id: &str) -> Option<u32> {
4604     for (numa_node_id, numa_node) in numa_nodes.iter() {
4605         if numa_node.memory_zones.contains(&memory_zone_id.to_owned()) {
4606             return Some(*numa_node_id);
4607         }
4608     }
4609 
4610     None
4611 }
4612 
4613 fn numa_node_id_from_pci_segment_id(numa_nodes: &NumaNodes, pci_segment_id: u16) -> u32 {
4614     for (numa_node_id, numa_node) in numa_nodes.iter() {
4615         if numa_node.pci_segments.contains(&pci_segment_id) {
4616             return *numa_node_id;
4617         }
4618     }
4619 
4620     0
4621 }
4622 
4623 #[cfg(not(target_arch = "riscv64"))]
4624 struct TpmDevice {}
4625 
4626 #[cfg(not(target_arch = "riscv64"))]
4627 impl Aml for TpmDevice {
4628     fn to_aml_bytes(&self, sink: &mut dyn acpi_tables::AmlSink) {
4629         aml::Device::new(
4630             "TPM2".into(),
4631             vec![
4632                 &aml::Name::new("_HID".into(), &"MSFT0101"),
4633                 &aml::Name::new("_STA".into(), &(0xF_usize)),
4634                 &aml::Name::new(
4635                     "_CRS".into(),
4636                     &aml::ResourceTemplate::new(vec![&aml::Memory32Fixed::new(
4637                         true,
4638                         layout::TPM_START.0 as u32,
4639                         layout::TPM_SIZE as u32,
4640                     )]),
4641                 ),
4642             ],
4643         )
4644         .to_aml_bytes(sink)
4645     }
4646 }
4647 
4648 #[cfg(not(target_arch = "riscv64"))]
4649 impl Aml for DeviceManager {
4650     fn to_aml_bytes(&self, sink: &mut dyn acpi_tables::AmlSink) {
4651         #[cfg(target_arch = "aarch64")]
4652         use arch::aarch64::DeviceInfoForFdt;
4653 
4654         let mut pci_scan_methods = Vec::new();
4655         for i in 0..self.pci_segments.len() {
4656             pci_scan_methods.push(aml::MethodCall::new(
4657                 format!("\\_SB_.PC{i:02X}.PCNT").as_str().into(),
4658                 vec![],
4659             ));
4660         }
4661         let mut pci_scan_inner: Vec<&dyn Aml> = Vec::new();
4662         for method in &pci_scan_methods {
4663             pci_scan_inner.push(method)
4664         }
4665 
4666         // PCI hotplug controller
4667         aml::Device::new(
4668             "_SB_.PHPR".into(),
4669             vec![
4670                 &aml::Name::new("_HID".into(), &aml::EISAName::new("PNP0A06")),
4671                 &aml::Name::new("_STA".into(), &0x0bu8),
4672                 &aml::Name::new("_UID".into(), &"PCI Hotplug Controller"),
4673                 &aml::Mutex::new("BLCK".into(), 0),
4674                 &aml::Name::new(
4675                     "_CRS".into(),
4676                     &aml::ResourceTemplate::new(vec![&aml::AddressSpace::new_memory(
4677                         aml::AddressSpaceCacheable::NotCacheable,
4678                         true,
4679                         self.acpi_address.0,
4680                         self.acpi_address.0 + DEVICE_MANAGER_ACPI_SIZE as u64 - 1,
4681                         None,
4682                     )]),
4683                 ),
4684                 // OpRegion and Fields map MMIO range into individual field values
4685                 &aml::OpRegion::new(
4686                     "PCST".into(),
4687                     aml::OpRegionSpace::SystemMemory,
4688                     &(self.acpi_address.0 as usize),
4689                     &DEVICE_MANAGER_ACPI_SIZE,
4690                 ),
4691                 &aml::Field::new(
4692                     "PCST".into(),
4693                     aml::FieldAccessType::DWord,
4694                     aml::FieldLockRule::NoLock,
4695                     aml::FieldUpdateRule::WriteAsZeroes,
4696                     vec![
4697                         aml::FieldEntry::Named(*b"PCIU", 32),
4698                         aml::FieldEntry::Named(*b"PCID", 32),
4699                         aml::FieldEntry::Named(*b"B0EJ", 32),
4700                         aml::FieldEntry::Named(*b"PSEG", 32),
4701                     ],
4702                 ),
4703                 &aml::Method::new(
4704                     "PCEJ".into(),
4705                     2,
4706                     true,
4707                     vec![
4708                         // Take lock defined above
4709                         &aml::Acquire::new("BLCK".into(), 0xffff),
4710                         // Choose the current segment
4711                         &aml::Store::new(&aml::Path::new("PSEG"), &aml::Arg(1)),
4712                         // Write PCI bus number (in first argument) to I/O port via field
4713                         &aml::ShiftLeft::new(&aml::Path::new("B0EJ"), &aml::ONE, &aml::Arg(0)),
4714                         // Release lock
4715                         &aml::Release::new("BLCK".into()),
4716                         // Return 0
4717                         &aml::Return::new(&aml::ZERO),
4718                     ],
4719                 ),
4720                 &aml::Method::new("PSCN".into(), 0, true, pci_scan_inner),
4721             ],
4722         )
4723         .to_aml_bytes(sink);
4724 
4725         for segment in &self.pci_segments {
4726             segment.to_aml_bytes(sink);
4727         }
4728 
4729         let mut mbrd_memory = Vec::new();
4730 
4731         for segment in &self.pci_segments {
4732             mbrd_memory.push(aml::Memory32Fixed::new(
4733                 true,
4734                 segment.mmio_config_address as u32,
4735                 layout::PCI_MMIO_CONFIG_SIZE_PER_SEGMENT as u32,
4736             ))
4737         }
4738 
4739         let mut mbrd_memory_refs = Vec::new();
4740         for mbrd_memory_ref in &mbrd_memory {
4741             mbrd_memory_refs.push(mbrd_memory_ref as &dyn Aml);
4742         }
4743 
4744         aml::Device::new(
4745             "_SB_.MBRD".into(),
4746             vec![
4747                 &aml::Name::new("_HID".into(), &aml::EISAName::new("PNP0C02")),
4748                 &aml::Name::new("_UID".into(), &aml::ZERO),
4749                 &aml::Name::new("_CRS".into(), &aml::ResourceTemplate::new(mbrd_memory_refs)),
4750             ],
4751         )
4752         .to_aml_bytes(sink);
4753 
4754         // Serial device
4755         #[cfg(target_arch = "x86_64")]
4756         let serial_irq = 4;
4757         #[cfg(target_arch = "aarch64")]
4758         let serial_irq =
4759             if self.config.lock().unwrap().serial.clone().mode != ConsoleOutputMode::Off {
4760                 self.get_device_info()
4761                     .clone()
4762                     .get(&(DeviceType::Serial, DeviceType::Serial.to_string()))
4763                     .unwrap()
4764                     .irq()
4765             } else {
4766                 // If serial is turned off, add a fake device with invalid irq.
4767                 31
4768             };
4769         if self.config.lock().unwrap().serial.mode != ConsoleOutputMode::Off {
4770             aml::Device::new(
4771                 "_SB_.COM1".into(),
4772                 vec![
4773                     &aml::Name::new(
4774                         "_HID".into(),
4775                         #[cfg(target_arch = "x86_64")]
4776                         &aml::EISAName::new("PNP0501"),
4777                         #[cfg(target_arch = "aarch64")]
4778                         &"ARMH0011",
4779                     ),
4780                     &aml::Name::new("_UID".into(), &aml::ZERO),
4781                     &aml::Name::new("_DDN".into(), &"COM1"),
4782                     &aml::Name::new(
4783                         "_CRS".into(),
4784                         &aml::ResourceTemplate::new(vec![
4785                             &aml::Interrupt::new(true, true, false, false, serial_irq),
4786                             #[cfg(target_arch = "x86_64")]
4787                             &aml::IO::new(0x3f8, 0x3f8, 0, 0x8),
4788                             #[cfg(target_arch = "aarch64")]
4789                             &aml::Memory32Fixed::new(
4790                                 true,
4791                                 arch::layout::LEGACY_SERIAL_MAPPED_IO_START.raw_value() as u32,
4792                                 MMIO_LEN as u32,
4793                             ),
4794                         ]),
4795                     ),
4796                 ],
4797             )
4798             .to_aml_bytes(sink);
4799         }
4800 
4801         aml::Name::new("_S5_".into(), &aml::Package::new(vec![&5u8])).to_aml_bytes(sink);
4802 
4803         aml::Device::new(
4804             "_SB_.PWRB".into(),
4805             vec![
4806                 &aml::Name::new("_HID".into(), &aml::EISAName::new("PNP0C0C")),
4807                 &aml::Name::new("_UID".into(), &aml::ZERO),
4808             ],
4809         )
4810         .to_aml_bytes(sink);
4811 
4812         if self.config.lock().unwrap().tpm.is_some() {
4813             // Add tpm device
4814             TpmDevice {}.to_aml_bytes(sink);
4815         }
4816 
4817         self.ged_notification_device
4818             .as_ref()
4819             .unwrap()
4820             .lock()
4821             .unwrap()
4822             .to_aml_bytes(sink)
4823     }
4824 }
4825 
4826 impl Pausable for DeviceManager {
4827     fn pause(&mut self) -> result::Result<(), MigratableError> {
4828         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
4829             if let Some(migratable) = &device_node.migratable {
4830                 migratable.lock().unwrap().pause()?;
4831             }
4832         }
4833         // On AArch64, the pause of device manager needs to trigger
4834         // a "pause" of GIC, which will flush the GIC pending tables
4835         // and ITS tables to guest RAM.
4836         #[cfg(target_arch = "aarch64")]
4837         {
4838             self.get_interrupt_controller()
4839                 .unwrap()
4840                 .lock()
4841                 .unwrap()
4842                 .pause()?;
4843         };
4844 
4845         Ok(())
4846     }
4847 
4848     fn resume(&mut self) -> result::Result<(), MigratableError> {
4849         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
4850             if let Some(migratable) = &device_node.migratable {
4851                 migratable.lock().unwrap().resume()?;
4852             }
4853         }
4854 
4855         Ok(())
4856     }
4857 }
4858 
4859 impl Snapshottable for DeviceManager {
4860     fn id(&self) -> String {
4861         DEVICE_MANAGER_SNAPSHOT_ID.to_string()
4862     }
4863 
4864     fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> {
4865         let mut snapshot = Snapshot::from_data(SnapshotData::new_from_state(&self.state())?);
4866 
4867         // We aggregate all devices snapshots.
4868         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
4869             if let Some(migratable) = &device_node.migratable {
4870                 let mut migratable = migratable.lock().unwrap();
4871                 snapshot.add_snapshot(migratable.id(), migratable.snapshot()?);
4872             }
4873         }
4874 
4875         Ok(snapshot)
4876     }
4877 }
4878 
4879 impl Transportable for DeviceManager {}
4880 
4881 impl Migratable for DeviceManager {
4882     fn start_dirty_log(&mut self) -> std::result::Result<(), MigratableError> {
4883         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
4884             if let Some(migratable) = &device_node.migratable {
4885                 migratable.lock().unwrap().start_dirty_log()?;
4886             }
4887         }
4888         Ok(())
4889     }
4890 
4891     fn stop_dirty_log(&mut self) -> std::result::Result<(), MigratableError> {
4892         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
4893             if let Some(migratable) = &device_node.migratable {
4894                 migratable.lock().unwrap().stop_dirty_log()?;
4895             }
4896         }
4897         Ok(())
4898     }
4899 
4900     fn dirty_log(&mut self) -> std::result::Result<MemoryRangeTable, MigratableError> {
4901         let mut tables = Vec::new();
4902         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
4903             if let Some(migratable) = &device_node.migratable {
4904                 tables.push(migratable.lock().unwrap().dirty_log()?);
4905             }
4906         }
4907         Ok(MemoryRangeTable::new_from_tables(tables))
4908     }
4909 
4910     fn start_migration(&mut self) -> std::result::Result<(), MigratableError> {
4911         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
4912             if let Some(migratable) = &device_node.migratable {
4913                 migratable.lock().unwrap().start_migration()?;
4914             }
4915         }
4916         Ok(())
4917     }
4918 
4919     fn complete_migration(&mut self) -> std::result::Result<(), MigratableError> {
4920         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
4921             if let Some(migratable) = &device_node.migratable {
4922                 migratable.lock().unwrap().complete_migration()?;
4923             }
4924         }
4925         Ok(())
4926     }
4927 }
4928 
4929 const PCIU_FIELD_OFFSET: u64 = 0;
4930 const PCID_FIELD_OFFSET: u64 = 4;
4931 const B0EJ_FIELD_OFFSET: u64 = 8;
4932 const PSEG_FIELD_OFFSET: u64 = 12;
4933 const PCIU_FIELD_SIZE: usize = 4;
4934 const PCID_FIELD_SIZE: usize = 4;
4935 const B0EJ_FIELD_SIZE: usize = 4;
4936 const PSEG_FIELD_SIZE: usize = 4;
4937 
4938 impl BusDevice for DeviceManager {
4939     fn read(&mut self, base: u64, offset: u64, data: &mut [u8]) {
4940         match offset {
4941             PCIU_FIELD_OFFSET => {
4942                 assert!(data.len() == PCIU_FIELD_SIZE);
4943                 data.copy_from_slice(
4944                     &self.pci_segments[self.selected_segment]
4945                         .pci_devices_up
4946                         .to_le_bytes(),
4947                 );
4948                 // Clear the PCIU bitmap
4949                 self.pci_segments[self.selected_segment].pci_devices_up = 0;
4950             }
4951             PCID_FIELD_OFFSET => {
4952                 assert!(data.len() == PCID_FIELD_SIZE);
4953                 data.copy_from_slice(
4954                     &self.pci_segments[self.selected_segment]
4955                         .pci_devices_down
4956                         .to_le_bytes(),
4957                 );
4958                 // Clear the PCID bitmap
4959                 self.pci_segments[self.selected_segment].pci_devices_down = 0;
4960             }
4961             B0EJ_FIELD_OFFSET => {
4962                 assert!(data.len() == B0EJ_FIELD_SIZE);
4963                 // Always return an empty bitmap since the eject is always
4964                 // taken care of right away during a write access.
4965                 data.fill(0);
4966             }
4967             PSEG_FIELD_OFFSET => {
4968                 assert_eq!(data.len(), PSEG_FIELD_SIZE);
4969                 data.copy_from_slice(&(self.selected_segment as u32).to_le_bytes());
4970             }
4971             _ => error!(
4972                 "Accessing unknown location at base 0x{:x}, offset 0x{:x}",
4973                 base, offset
4974             ),
4975         }
4976 
4977         debug!(
4978             "PCI_HP_REG_R: base 0x{:x}, offset 0x{:x}, data {:?}",
4979             base, offset, data
4980         )
4981     }
4982 
4983     fn write(&mut self, base: u64, offset: u64, data: &[u8]) -> Option<Arc<std::sync::Barrier>> {
4984         match offset {
4985             B0EJ_FIELD_OFFSET => {
4986                 assert!(data.len() == B0EJ_FIELD_SIZE);
4987                 let mut data_array: [u8; 4] = [0, 0, 0, 0];
4988                 data_array.copy_from_slice(data);
4989                 let mut slot_bitmap = u32::from_le_bytes(data_array);
4990 
4991                 while slot_bitmap > 0 {
4992                     let slot_id = slot_bitmap.trailing_zeros();
4993                     if let Err(e) = self.eject_device(self.selected_segment as u16, slot_id as u8) {
4994                         error!("Failed ejecting device {}: {:?}", slot_id, e);
4995                     }
4996                     slot_bitmap &= !(1 << slot_id);
4997                 }
4998             }
4999             PSEG_FIELD_OFFSET => {
5000                 assert_eq!(data.len(), PSEG_FIELD_SIZE);
5001                 let mut data_array: [u8; 4] = [0, 0, 0, 0];
5002                 data_array.copy_from_slice(data);
5003                 let selected_segment = u32::from_le_bytes(data_array) as usize;
5004                 if selected_segment >= self.pci_segments.len() {
5005                     error!(
5006                         "Segment selection out of range: {} >= {}",
5007                         selected_segment,
5008                         self.pci_segments.len()
5009                     );
5010                     return None;
5011                 }
5012                 self.selected_segment = selected_segment;
5013             }
5014             _ => error!(
5015                 "Accessing unknown location at base 0x{:x}, offset 0x{:x}",
5016                 base, offset
5017             ),
5018         }
5019 
5020         debug!(
5021             "PCI_HP_REG_W: base 0x{:x}, offset 0x{:x}, data {:?}",
5022             base, offset, data
5023         );
5024 
5025         None
5026     }
5027 }
5028 
5029 impl Drop for DeviceManager {
5030     fn drop(&mut self) {
5031         // Wake up the DeviceManager threads (mainly virtio device workers),
5032         // to avoid deadlock on waiting for paused/parked worker threads.
5033         if let Err(e) = self.resume() {
5034             error!("Error resuming DeviceManager: {:?}", e);
5035         }
5036 
5037         for handle in self.virtio_devices.drain(..) {
5038             handle.virtio_device.lock().unwrap().shutdown();
5039         }
5040 
5041         if let Some(termios) = *self.original_termios_opt.lock().unwrap() {
5042             // SAFETY: FFI call
5043             let _ = unsafe { tcsetattr(stdout().lock().as_raw_fd(), TCSANOW, &termios) };
5044         }
5045     }
5046 }
5047 
5048 #[cfg(test)]
5049 mod tests {
5050     use super::*;
5051 
5052     #[test]
5053     fn test_create_mmio_allocators() {
5054         let res = create_mmio_allocators(0x100000, 0x400000, 1, vec![1], 4 << 10);
5055         assert_eq!(res.len(), 1);
5056         assert_eq!(
5057             res[0].lock().unwrap().base(),
5058             vm_memory::GuestAddress(0x100000)
5059         );
5060         assert_eq!(
5061             res[0].lock().unwrap().end(),
5062             vm_memory::GuestAddress(0x3fffff)
5063         );
5064 
5065         let res = create_mmio_allocators(0x100000, 0x400000, 2, vec![1, 1], 4 << 10);
5066         assert_eq!(res.len(), 2);
5067         assert_eq!(
5068             res[0].lock().unwrap().base(),
5069             vm_memory::GuestAddress(0x100000)
5070         );
5071         assert_eq!(
5072             res[0].lock().unwrap().end(),
5073             vm_memory::GuestAddress(0x27ffff)
5074         );
5075         assert_eq!(
5076             res[1].lock().unwrap().base(),
5077             vm_memory::GuestAddress(0x280000)
5078         );
5079         assert_eq!(
5080             res[1].lock().unwrap().end(),
5081             vm_memory::GuestAddress(0x3fffff)
5082         );
5083 
5084         let res = create_mmio_allocators(0x100000, 0x400000, 2, vec![2, 1], 4 << 10);
5085         assert_eq!(res.len(), 2);
5086         assert_eq!(
5087             res[0].lock().unwrap().base(),
5088             vm_memory::GuestAddress(0x100000)
5089         );
5090         assert_eq!(
5091             res[0].lock().unwrap().end(),
5092             vm_memory::GuestAddress(0x2fffff)
5093         );
5094         assert_eq!(
5095             res[1].lock().unwrap().base(),
5096             vm_memory::GuestAddress(0x300000)
5097         );
5098         assert_eq!(
5099             res[1].lock().unwrap().end(),
5100             vm_memory::GuestAddress(0x3fffff)
5101         );
5102     }
5103 }
5104