xref: /cloud-hypervisor/vmm/src/device_manager.rs (revision 88a9f799449c04180c6b9a21d3b9c0c4b57e2bd6)
1 // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
2 //
3 // Portions Copyright 2017 The Chromium OS Authors. All rights reserved.
4 // Use of this source code is governed by a BSD-style license that can be
5 // found in the LICENSE-BSD-3-Clause file.
6 //
7 // Copyright © 2019 Intel Corporation
8 //
9 // SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause
10 //
11 
12 use std::collections::{BTreeMap, BTreeSet, HashMap};
13 use std::fs::{File, OpenOptions};
14 use std::io::{self, stdout, IsTerminal, Seek, SeekFrom};
15 use std::num::Wrapping;
16 use std::os::unix::fs::OpenOptionsExt;
17 use std::os::unix::io::{AsRawFd, FromRawFd};
18 use std::path::PathBuf;
19 use std::result;
20 use std::sync::{Arc, Mutex};
21 use std::time::Instant;
22 
23 use acpi_tables::sdt::GenericAddress;
24 use acpi_tables::{aml, Aml};
25 use anyhow::anyhow;
26 use arch::layout;
27 #[cfg(target_arch = "x86_64")]
28 use arch::layout::{APIC_START, IOAPIC_SIZE, IOAPIC_START};
29 use arch::NumaNodes;
30 #[cfg(target_arch = "aarch64")]
31 use arch::{DeviceType, MmioDeviceInfo};
32 use block::{
33     async_io::DiskFile, block_aio_is_supported, block_io_uring_is_supported, detect_image_type,
34     fixed_vhd_sync::FixedVhdDiskSync, qcow, qcow_sync::QcowDiskSync, raw_async_aio::RawFileDiskAio,
35     raw_sync::RawFileDiskSync, vhdx, vhdx_sync::VhdxDiskSync, ImageType,
36 };
37 #[cfg(feature = "io_uring")]
38 use block::{fixed_vhd_async::FixedVhdDiskAsync, raw_async::RawFileDisk};
39 #[cfg(target_arch = "x86_64")]
40 use devices::debug_console::DebugConsole;
41 #[cfg(target_arch = "aarch64")]
42 use devices::gic;
43 #[cfg(target_arch = "x86_64")]
44 use devices::ioapic;
45 #[cfg(target_arch = "aarch64")]
46 use devices::legacy::Pl011;
47 #[cfg(feature = "pvmemcontrol")]
48 use devices::pvmemcontrol::{PvmemcontrolBusDevice, PvmemcontrolPciDevice};
49 use devices::{
50     interrupt_controller, interrupt_controller::InterruptController, AcpiNotificationFlags,
51 };
52 use hypervisor::IoEventAddress;
53 use libc::{
54     tcsetattr, termios, MAP_NORESERVE, MAP_PRIVATE, MAP_SHARED, O_TMPFILE, PROT_READ, PROT_WRITE,
55     TCSANOW,
56 };
57 use pci::{
58     DeviceRelocation, MmioRegion, PciBarRegionType, PciBdf, PciDevice, VfioDmaMapping,
59     VfioPciDevice, VfioUserDmaMapping, VfioUserPciDevice, VfioUserPciDeviceError,
60 };
61 use rate_limiter::group::RateLimiterGroup;
62 use seccompiler::SeccompAction;
63 use serde::{Deserialize, Serialize};
64 use tracer::trace_scoped;
65 use vfio_ioctls::{VfioContainer, VfioDevice, VfioDeviceFd};
66 use virtio_devices::transport::VirtioTransport;
67 use virtio_devices::transport::{VirtioPciDevice, VirtioPciDeviceActivator};
68 use virtio_devices::vhost_user::VhostUserConfig;
69 use virtio_devices::{
70     AccessPlatformMapping, ActivateError, VdpaDmaMapping, VirtioMemMappingSource,
71 };
72 use virtio_devices::{Endpoint, IommuMapping};
73 use vm_allocator::{AddressAllocator, SystemAllocator};
74 use vm_device::dma_mapping::ExternalDmaMapping;
75 use vm_device::interrupt::{
76     InterruptIndex, InterruptManager, LegacyIrqGroupConfig, MsiIrqGroupConfig,
77 };
78 use vm_device::{Bus, BusDevice, BusDeviceSync, Resource};
79 use vm_memory::guest_memory::FileOffset;
80 use vm_memory::GuestMemoryRegion;
81 use vm_memory::{Address, GuestAddress, GuestUsize, MmapRegion};
82 #[cfg(target_arch = "x86_64")]
83 use vm_memory::{GuestAddressSpace, GuestMemory};
84 use vm_migration::{
85     protocol::MemoryRangeTable, snapshot_from_id, state_from_id, Migratable, MigratableError,
86     Pausable, Snapshot, SnapshotData, Snapshottable, Transportable,
87 };
88 use vm_virtio::AccessPlatform;
89 use vm_virtio::VirtioDeviceType;
90 use vmm_sys_util::eventfd::EventFd;
91 #[cfg(target_arch = "x86_64")]
92 use {devices::debug_console, devices::legacy::Serial};
93 
94 use crate::config::{
95     ConsoleOutputMode, DeviceConfig, DiskConfig, FsConfig, NetConfig, PmemConfig, UserDeviceConfig,
96     VdpaConfig, VhostMode, VmConfig, VsockConfig,
97 };
98 use crate::console_devices::{ConsoleDeviceError, ConsoleInfo, ConsoleOutput};
99 use crate::cpu::{CpuManager, CPU_MANAGER_ACPI_SIZE};
100 use crate::device_tree::{DeviceNode, DeviceTree};
101 use crate::interrupt::LegacyUserspaceInterruptManager;
102 use crate::interrupt::MsiInterruptManager;
103 use crate::memory_manager::{Error as MemoryManagerError, MemoryManager, MEMORY_MANAGER_ACPI_SIZE};
104 use crate::pci_segment::PciSegment;
105 use crate::serial_manager::{Error as SerialManagerError, SerialManager};
106 use crate::vm_config::DEFAULT_PCI_SEGMENT_APERTURE_WEIGHT;
107 use crate::GuestRegionMmap;
108 use crate::PciDeviceInfo;
109 use crate::{device_node, DEVICE_MANAGER_SNAPSHOT_ID};
110 
111 #[cfg(target_arch = "aarch64")]
112 const MMIO_LEN: u64 = 0x1000;
113 
114 // Singleton devices / devices the user cannot name
115 #[cfg(target_arch = "x86_64")]
116 const IOAPIC_DEVICE_NAME: &str = "__ioapic";
117 const SERIAL_DEVICE_NAME: &str = "__serial";
118 #[cfg(target_arch = "x86_64")]
119 const DEBUGCON_DEVICE_NAME: &str = "__debug_console";
120 #[cfg(target_arch = "aarch64")]
121 const GPIO_DEVICE_NAME: &str = "__gpio";
122 const RNG_DEVICE_NAME: &str = "__rng";
123 const IOMMU_DEVICE_NAME: &str = "__iommu";
124 #[cfg(feature = "pvmemcontrol")]
125 const PVMEMCONTROL_DEVICE_NAME: &str = "__pvmemcontrol";
126 const BALLOON_DEVICE_NAME: &str = "__balloon";
127 const CONSOLE_DEVICE_NAME: &str = "__console";
128 const PVPANIC_DEVICE_NAME: &str = "__pvpanic";
129 
130 // Devices that the user may name and for which we generate
131 // identifiers if the user doesn't give one
132 const DISK_DEVICE_NAME_PREFIX: &str = "_disk";
133 const FS_DEVICE_NAME_PREFIX: &str = "_fs";
134 const NET_DEVICE_NAME_PREFIX: &str = "_net";
135 const PMEM_DEVICE_NAME_PREFIX: &str = "_pmem";
136 const VDPA_DEVICE_NAME_PREFIX: &str = "_vdpa";
137 const VSOCK_DEVICE_NAME_PREFIX: &str = "_vsock";
138 const WATCHDOG_DEVICE_NAME: &str = "__watchdog";
139 const VFIO_DEVICE_NAME_PREFIX: &str = "_vfio";
140 const VFIO_USER_DEVICE_NAME_PREFIX: &str = "_vfio_user";
141 const VIRTIO_PCI_DEVICE_NAME_PREFIX: &str = "_virtio-pci";
142 
143 /// Errors associated with device manager
144 #[derive(Debug)]
145 pub enum DeviceManagerError {
146     /// Cannot create EventFd.
147     EventFd(io::Error),
148 
149     /// Cannot open disk path
150     Disk(io::Error),
151 
152     /// Cannot create vhost-user-net device
153     CreateVhostUserNet(virtio_devices::vhost_user::Error),
154 
155     /// Cannot create virtio-blk device
156     CreateVirtioBlock(io::Error),
157 
158     /// Cannot create virtio-net device
159     CreateVirtioNet(virtio_devices::net::Error),
160 
161     /// Cannot create virtio-console device
162     CreateVirtioConsole(io::Error),
163 
164     /// Cannot create virtio-rng device
165     CreateVirtioRng(io::Error),
166 
167     /// Cannot create virtio-fs device
168     CreateVirtioFs(virtio_devices::vhost_user::Error),
169 
170     /// Virtio-fs device was created without a socket.
171     NoVirtioFsSock,
172 
173     /// Cannot create vhost-user-blk device
174     CreateVhostUserBlk(virtio_devices::vhost_user::Error),
175 
176     /// Cannot create virtio-pmem device
177     CreateVirtioPmem(io::Error),
178 
179     /// Cannot create vDPA device
180     CreateVdpa(virtio_devices::vdpa::Error),
181 
182     /// Cannot create virtio-vsock device
183     CreateVirtioVsock(io::Error),
184 
185     /// Cannot create tpm device
186     CreateTpmDevice(anyhow::Error),
187 
188     /// Failed to convert Path to &str for the vDPA device.
189     CreateVdpaConvertPath,
190 
191     /// Failed to convert Path to &str for the virtio-vsock device.
192     CreateVsockConvertPath,
193 
194     /// Cannot create virtio-vsock backend
195     CreateVsockBackend(virtio_devices::vsock::VsockUnixError),
196 
197     /// Cannot create virtio-iommu device
198     CreateVirtioIommu(io::Error),
199 
200     /// Cannot create virtio-balloon device
201     CreateVirtioBalloon(io::Error),
202 
203     /// Cannot create pvmemcontrol device
204     #[cfg(feature = "pvmemcontrol")]
205     CreatePvmemcontrol(io::Error),
206 
207     /// Cannot create virtio-watchdog device
208     CreateVirtioWatchdog(io::Error),
209 
210     /// Failed to parse disk image format
211     DetectImageType(io::Error),
212 
213     /// Cannot open qcow disk path
214     QcowDeviceCreate(qcow::Error),
215 
216     /// Cannot create serial manager
217     CreateSerialManager(SerialManagerError),
218 
219     /// Cannot spawn the serial manager thread
220     SpawnSerialManager(SerialManagerError),
221 
222     /// Cannot open tap interface
223     OpenTap(net_util::TapError),
224 
225     /// Cannot allocate IRQ.
226     AllocateIrq,
227 
228     /// Cannot configure the IRQ.
229     Irq(vmm_sys_util::errno::Error),
230 
231     /// Cannot allocate PCI BARs
232     AllocateBars(pci::PciDeviceError),
233 
234     /// Could not free the BARs associated with a PCI device.
235     FreePciBars(pci::PciDeviceError),
236 
237     /// Cannot register ioevent.
238     RegisterIoevent(anyhow::Error),
239 
240     /// Cannot unregister ioevent.
241     UnRegisterIoevent(anyhow::Error),
242 
243     /// Cannot create virtio device
244     VirtioDevice(virtio_devices::transport::VirtioPciDeviceError),
245 
246     /// Cannot add PCI device
247     AddPciDevice(pci::PciRootError),
248 
249     /// Cannot open persistent memory file
250     PmemFileOpen(io::Error),
251 
252     /// Cannot set persistent memory file size
253     PmemFileSetLen(io::Error),
254 
255     /// Cannot find a memory range for persistent memory
256     PmemRangeAllocation,
257 
258     /// Cannot find a memory range for virtio-fs
259     FsRangeAllocation,
260 
261     /// Error creating serial output file
262     SerialOutputFileOpen(io::Error),
263 
264     #[cfg(target_arch = "x86_64")]
265     /// Error creating debug-console output file
266     DebugconOutputFileOpen(io::Error),
267 
268     /// Error creating console output file
269     ConsoleOutputFileOpen(io::Error),
270 
271     /// Error creating serial pty
272     SerialPtyOpen(io::Error),
273 
274     /// Error creating console pty
275     ConsolePtyOpen(io::Error),
276 
277     /// Error creating console pty
278     DebugconPtyOpen(io::Error),
279 
280     /// Error setting pty raw mode
281     SetPtyRaw(ConsoleDeviceError),
282 
283     /// Error getting pty peer
284     GetPtyPeer(vmm_sys_util::errno::Error),
285 
286     /// Cannot create a VFIO device
287     VfioCreate(vfio_ioctls::VfioError),
288 
289     /// Cannot create a VFIO PCI device
290     VfioPciCreate(pci::VfioPciError),
291 
292     /// Failed to map VFIO MMIO region.
293     VfioMapRegion(pci::VfioPciError),
294 
295     /// Failed to DMA map VFIO device.
296     VfioDmaMap(vfio_ioctls::VfioError),
297 
298     /// Failed to DMA unmap VFIO device.
299     VfioDmaUnmap(pci::VfioPciError),
300 
301     /// Failed to create the passthrough device.
302     CreatePassthroughDevice(anyhow::Error),
303 
304     /// Failed to memory map.
305     Mmap(io::Error),
306 
307     /// Cannot add legacy device to Bus.
308     BusError(vm_device::BusError),
309 
310     /// Failed to allocate IO port
311     AllocateIoPort,
312 
313     /// Failed to allocate MMIO address
314     AllocateMmioAddress,
315 
316     /// Failed to make hotplug notification
317     HotPlugNotification(io::Error),
318 
319     /// Error from a memory manager operation
320     MemoryManager(MemoryManagerError),
321 
322     /// Failed to create new interrupt source group.
323     CreateInterruptGroup(io::Error),
324 
325     /// Failed to update interrupt source group.
326     UpdateInterruptGroup(io::Error),
327 
328     /// Failed to create interrupt controller.
329     CreateInterruptController(interrupt_controller::Error),
330 
331     /// Failed to create a new MmapRegion instance.
332     NewMmapRegion(vm_memory::mmap::MmapRegionError),
333 
334     /// Failed to clone a File.
335     CloneFile(io::Error),
336 
337     /// Failed to create socket file
338     CreateSocketFile(io::Error),
339 
340     /// Failed to spawn the network backend
341     SpawnNetBackend(io::Error),
342 
343     /// Failed to spawn the block backend
344     SpawnBlockBackend(io::Error),
345 
346     /// Missing PCI bus.
347     NoPciBus,
348 
349     /// Could not find an available device name.
350     NoAvailableDeviceName,
351 
352     /// Missing PCI device.
353     MissingPciDevice,
354 
355     /// Failed to remove a PCI device from the PCI bus.
356     RemoveDeviceFromPciBus(pci::PciRootError),
357 
358     /// Failed to remove a bus device from the IO bus.
359     RemoveDeviceFromIoBus(vm_device::BusError),
360 
361     /// Failed to remove a bus device from the MMIO bus.
362     RemoveDeviceFromMmioBus(vm_device::BusError),
363 
364     /// Failed to find the device corresponding to a specific PCI b/d/f.
365     UnknownPciBdf(u32),
366 
367     /// Not allowed to remove this type of device from the VM.
368     RemovalNotAllowed(vm_virtio::VirtioDeviceType),
369 
370     /// Failed to find device corresponding to the given identifier.
371     UnknownDeviceId(String),
372 
373     /// Failed to find an available PCI device ID.
374     NextPciDeviceId(pci::PciRootError),
375 
376     /// Could not reserve the PCI device ID.
377     GetPciDeviceId(pci::PciRootError),
378 
379     /// Could not give the PCI device ID back.
380     PutPciDeviceId(pci::PciRootError),
381 
382     /// No disk path was specified when one was expected
383     NoDiskPath,
384 
385     /// Failed to update guest memory for virtio device.
386     UpdateMemoryForVirtioDevice(virtio_devices::Error),
387 
388     /// Cannot create virtio-mem device
389     CreateVirtioMem(io::Error),
390 
391     /// Cannot find a memory range for virtio-mem memory
392     VirtioMemRangeAllocation,
393 
394     /// Failed to update guest memory for VFIO PCI device.
395     UpdateMemoryForVfioPciDevice(vfio_ioctls::VfioError),
396 
397     /// Trying to use a directory for pmem but no size specified
398     PmemWithDirectorySizeMissing,
399 
400     /// Trying to use a size that is not multiple of 2MiB
401     PmemSizeNotAligned,
402 
403     /// Could not find the node in the device tree.
404     MissingNode,
405 
406     /// Resource was already found.
407     ResourceAlreadyExists,
408 
409     /// Expected resources for virtio-pmem could not be found.
410     MissingVirtioPmemResources,
411 
412     /// Missing PCI b/d/f from the DeviceNode.
413     MissingDeviceNodePciBdf,
414 
415     /// No support for device passthrough
416     NoDevicePassthroughSupport,
417 
418     /// No socket option support for console device
419     NoSocketOptionSupportForConsoleDevice,
420 
421     /// Failed to resize virtio-balloon
422     VirtioBalloonResize(virtio_devices::balloon::Error),
423 
424     /// Missing virtio-balloon, can't proceed as expected.
425     MissingVirtioBalloon,
426 
427     /// Missing virtual IOMMU device
428     MissingVirtualIommu,
429 
430     /// Failed to do power button notification
431     PowerButtonNotification(io::Error),
432 
433     /// Failed to do AArch64 GPIO power button notification
434     #[cfg(target_arch = "aarch64")]
435     AArch64PowerButtonNotification(devices::legacy::GpioDeviceError),
436 
437     /// Failed to set O_DIRECT flag to file descriptor
438     SetDirectIo,
439 
440     /// Failed to create FixedVhdDiskAsync
441     CreateFixedVhdDiskAsync(io::Error),
442 
443     /// Failed to create FixedVhdDiskSync
444     CreateFixedVhdDiskSync(io::Error),
445 
446     /// Failed to create QcowDiskSync
447     CreateQcowDiskSync(qcow::Error),
448 
449     /// Failed to create FixedVhdxDiskSync
450     CreateFixedVhdxDiskSync(vhdx::VhdxError),
451 
452     /// Failed to add DMA mapping handler to virtio-mem device.
453     AddDmaMappingHandlerVirtioMem(virtio_devices::mem::Error),
454 
455     /// Failed to remove DMA mapping handler from virtio-mem device.
456     RemoveDmaMappingHandlerVirtioMem(virtio_devices::mem::Error),
457 
458     /// Failed to create vfio-user client
459     VfioUserCreateClient(vfio_user::Error),
460 
461     /// Failed to create VFIO user device
462     VfioUserCreate(VfioUserPciDeviceError),
463 
464     /// Failed to map region from VFIO user device into guest
465     VfioUserMapRegion(VfioUserPciDeviceError),
466 
467     /// Failed to DMA map VFIO user device.
468     VfioUserDmaMap(VfioUserPciDeviceError),
469 
470     /// Failed to DMA unmap VFIO user device.
471     VfioUserDmaUnmap(VfioUserPciDeviceError),
472 
473     /// Failed to update memory mappings for VFIO user device
474     UpdateMemoryForVfioUserPciDevice(VfioUserPciDeviceError),
475 
476     /// Cannot duplicate file descriptor
477     DupFd(vmm_sys_util::errno::Error),
478 
479     /// Failed to DMA map virtio device.
480     VirtioDmaMap(std::io::Error),
481 
482     /// Failed to DMA unmap virtio device.
483     VirtioDmaUnmap(std::io::Error),
484 
485     /// Cannot hotplug device behind vIOMMU
486     InvalidIommuHotplug,
487 
488     /// Invalid identifier as it is not unique.
489     IdentifierNotUnique(String),
490 
491     /// Invalid identifier
492     InvalidIdentifier(String),
493 
494     /// Error activating virtio device
495     VirtioActivate(ActivateError),
496 
497     /// Failed retrieving device state from snapshot
498     RestoreGetState(MigratableError),
499 
500     /// Cannot create a PvPanic device
501     PvPanicCreate(devices::pvpanic::PvPanicError),
502 
503     /// Cannot create a RateLimiterGroup
504     RateLimiterGroupCreate(rate_limiter::group::Error),
505 
506     /// Cannot start sigwinch listener
507     StartSigwinchListener(std::io::Error),
508 
509     // Invalid console info
510     InvalidConsoleInfo,
511 
512     // Invalid console fd
513     InvalidConsoleFd,
514 }
515 
516 pub type DeviceManagerResult<T> = result::Result<T, DeviceManagerError>;
517 
518 const DEVICE_MANAGER_ACPI_SIZE: usize = 0x10;
519 
520 #[derive(Default)]
521 pub struct Console {
522     console_resizer: Option<Arc<virtio_devices::ConsoleResizer>>,
523 }
524 
525 impl Console {
526     pub fn need_resize(&self) -> bool {
527         if let Some(_resizer) = self.console_resizer.as_ref() {
528             return true;
529         }
530 
531         false
532     }
533 
534     pub fn update_console_size(&self) {
535         if let Some(resizer) = self.console_resizer.as_ref() {
536             resizer.update_console_size()
537         }
538     }
539 }
540 
541 pub(crate) struct AddressManager {
542     pub(crate) allocator: Arc<Mutex<SystemAllocator>>,
543     #[cfg(target_arch = "x86_64")]
544     pub(crate) io_bus: Arc<Bus>,
545     pub(crate) mmio_bus: Arc<Bus>,
546     pub(crate) vm: Arc<dyn hypervisor::Vm>,
547     device_tree: Arc<Mutex<DeviceTree>>,
548     pci_mmio32_allocators: Vec<Arc<Mutex<AddressAllocator>>>,
549     pci_mmio64_allocators: Vec<Arc<Mutex<AddressAllocator>>>,
550 }
551 
552 impl DeviceRelocation for AddressManager {
553     fn move_bar(
554         &self,
555         old_base: u64,
556         new_base: u64,
557         len: u64,
558         pci_dev: &mut dyn PciDevice,
559         region_type: PciBarRegionType,
560     ) -> std::result::Result<(), std::io::Error> {
561         match region_type {
562             PciBarRegionType::IoRegion => {
563                 #[cfg(target_arch = "x86_64")]
564                 {
565                     // Update system allocator
566                     self.allocator
567                         .lock()
568                         .unwrap()
569                         .free_io_addresses(GuestAddress(old_base), len as GuestUsize);
570 
571                     self.allocator
572                         .lock()
573                         .unwrap()
574                         .allocate_io_addresses(
575                             Some(GuestAddress(new_base)),
576                             len as GuestUsize,
577                             None,
578                         )
579                         .ok_or_else(|| {
580                             io::Error::new(io::ErrorKind::Other, "failed allocating new IO range")
581                         })?;
582 
583                     // Update PIO bus
584                     self.io_bus
585                         .update_range(old_base, len, new_base, len)
586                         .map_err(|e| io::Error::new(io::ErrorKind::Other, e))?;
587                 }
588                 #[cfg(target_arch = "aarch64")]
589                 error!("I/O region is not supported");
590             }
591             PciBarRegionType::Memory32BitRegion | PciBarRegionType::Memory64BitRegion => {
592                 let allocators = if region_type == PciBarRegionType::Memory32BitRegion {
593                     &self.pci_mmio32_allocators
594                 } else {
595                     &self.pci_mmio64_allocators
596                 };
597 
598                 // Find the specific allocator that this BAR was allocated from and use it for new one
599                 for allocator in allocators {
600                     let allocator_base = allocator.lock().unwrap().base();
601                     let allocator_end = allocator.lock().unwrap().end();
602 
603                     if old_base >= allocator_base.0 && old_base <= allocator_end.0 {
604                         allocator
605                             .lock()
606                             .unwrap()
607                             .free(GuestAddress(old_base), len as GuestUsize);
608 
609                         allocator
610                             .lock()
611                             .unwrap()
612                             .allocate(Some(GuestAddress(new_base)), len as GuestUsize, Some(len))
613                             .ok_or_else(|| {
614                                 io::Error::new(
615                                     io::ErrorKind::Other,
616                                     "failed allocating new MMIO range",
617                                 )
618                             })?;
619 
620                         break;
621                     }
622                 }
623 
624                 // Update MMIO bus
625                 self.mmio_bus
626                     .update_range(old_base, len, new_base, len)
627                     .map_err(|e| io::Error::new(io::ErrorKind::Other, e))?;
628             }
629         }
630 
631         // Update the device_tree resources associated with the device
632         if let Some(id) = pci_dev.id() {
633             if let Some(node) = self.device_tree.lock().unwrap().get_mut(&id) {
634                 let mut resource_updated = false;
635                 for resource in node.resources.iter_mut() {
636                     if let Resource::PciBar { base, type_, .. } = resource {
637                         if PciBarRegionType::from(*type_) == region_type && *base == old_base {
638                             *base = new_base;
639                             resource_updated = true;
640                             break;
641                         }
642                     }
643                 }
644 
645                 if !resource_updated {
646                     return Err(io::Error::new(
647                         io::ErrorKind::Other,
648                         format!(
649                             "Couldn't find a resource with base 0x{old_base:x} for device {id}"
650                         ),
651                     ));
652                 }
653             } else {
654                 return Err(io::Error::new(
655                     io::ErrorKind::Other,
656                     format!("Couldn't find device {id} from device tree"),
657                 ));
658             }
659         }
660 
661         let any_dev = pci_dev.as_any();
662         if let Some(virtio_pci_dev) = any_dev.downcast_ref::<VirtioPciDevice>() {
663             let bar_addr = virtio_pci_dev.config_bar_addr();
664             if bar_addr == new_base {
665                 for (event, addr) in virtio_pci_dev.ioeventfds(old_base) {
666                     let io_addr = IoEventAddress::Mmio(addr);
667                     self.vm.unregister_ioevent(event, &io_addr).map_err(|e| {
668                         io::Error::new(
669                             io::ErrorKind::Other,
670                             format!("failed to unregister ioevent: {e:?}"),
671                         )
672                     })?;
673                 }
674                 for (event, addr) in virtio_pci_dev.ioeventfds(new_base) {
675                     let io_addr = IoEventAddress::Mmio(addr);
676                     self.vm
677                         .register_ioevent(event, &io_addr, None)
678                         .map_err(|e| {
679                             io::Error::new(
680                                 io::ErrorKind::Other,
681                                 format!("failed to register ioevent: {e:?}"),
682                             )
683                         })?;
684                 }
685             } else {
686                 let virtio_dev = virtio_pci_dev.virtio_device();
687                 let mut virtio_dev = virtio_dev.lock().unwrap();
688                 if let Some(mut shm_regions) = virtio_dev.get_shm_regions() {
689                     if shm_regions.addr.raw_value() == old_base {
690                         let mem_region = self.vm.make_user_memory_region(
691                             shm_regions.mem_slot,
692                             old_base,
693                             shm_regions.len,
694                             shm_regions.host_addr,
695                             false,
696                             false,
697                         );
698 
699                         self.vm.remove_user_memory_region(mem_region).map_err(|e| {
700                             io::Error::new(
701                                 io::ErrorKind::Other,
702                                 format!("failed to remove user memory region: {e:?}"),
703                             )
704                         })?;
705 
706                         // Create new mapping by inserting new region to KVM.
707                         let mem_region = self.vm.make_user_memory_region(
708                             shm_regions.mem_slot,
709                             new_base,
710                             shm_regions.len,
711                             shm_regions.host_addr,
712                             false,
713                             false,
714                         );
715 
716                         self.vm.create_user_memory_region(mem_region).map_err(|e| {
717                             io::Error::new(
718                                 io::ErrorKind::Other,
719                                 format!("failed to create user memory regions: {e:?}"),
720                             )
721                         })?;
722 
723                         // Update shared memory regions to reflect the new mapping.
724                         shm_regions.addr = GuestAddress(new_base);
725                         virtio_dev.set_shm_regions(shm_regions).map_err(|e| {
726                             io::Error::new(
727                                 io::ErrorKind::Other,
728                                 format!("failed to update shared memory regions: {e:?}"),
729                             )
730                         })?;
731                     }
732                 }
733             }
734         }
735 
736         pci_dev.move_bar(old_base, new_base)
737     }
738 }
739 
740 #[derive(Serialize, Deserialize)]
741 struct DeviceManagerState {
742     device_tree: DeviceTree,
743     device_id_cnt: Wrapping<usize>,
744 }
745 
746 #[derive(Debug)]
747 pub struct PtyPair {
748     pub main: File,
749     pub path: PathBuf,
750 }
751 
752 impl Clone for PtyPair {
753     fn clone(&self) -> Self {
754         PtyPair {
755             main: self.main.try_clone().unwrap(),
756             path: self.path.clone(),
757         }
758     }
759 }
760 
761 #[derive(Clone)]
762 pub enum PciDeviceHandle {
763     Vfio(Arc<Mutex<VfioPciDevice>>),
764     Virtio(Arc<Mutex<VirtioPciDevice>>),
765     VfioUser(Arc<Mutex<VfioUserPciDevice>>),
766 }
767 
768 #[derive(Clone)]
769 struct MetaVirtioDevice {
770     virtio_device: Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
771     iommu: bool,
772     id: String,
773     pci_segment: u16,
774     dma_handler: Option<Arc<dyn ExternalDmaMapping>>,
775 }
776 
777 #[derive(Default)]
778 pub struct AcpiPlatformAddresses {
779     pub pm_timer_address: Option<GenericAddress>,
780     pub reset_reg_address: Option<GenericAddress>,
781     pub sleep_control_reg_address: Option<GenericAddress>,
782     pub sleep_status_reg_address: Option<GenericAddress>,
783 }
784 
785 #[cfg(all(feature = "mshv", feature = "sev_snp"))]
786 struct SevSnpPageAccessProxy {
787     vm: Arc<dyn hypervisor::Vm>,
788 }
789 
790 #[cfg(all(feature = "mshv", feature = "sev_snp"))]
791 impl std::fmt::Debug for SevSnpPageAccessProxy {
792     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
793         write!(f, "SNP Page access proxy")
794     }
795 }
796 
797 #[cfg(all(feature = "mshv", feature = "sev_snp"))]
798 impl SevSnpPageAccessProxy {
799     fn new(vm: Arc<dyn hypervisor::Vm>) -> SevSnpPageAccessProxy {
800         SevSnpPageAccessProxy { vm }
801     }
802 }
803 
804 #[cfg(all(feature = "mshv", feature = "sev_snp"))]
805 impl AccessPlatform for SevSnpPageAccessProxy {
806     fn translate_gpa(&self, base: u64, _size: u64) -> std::result::Result<u64, std::io::Error> {
807         Ok(base)
808     }
809 
810     fn translate_gva(&self, base: u64, size: u64) -> std::result::Result<u64, std::io::Error> {
811         self.vm
812             .gain_page_access(base, size as u32)
813             .map_err(|e| io::Error::new(io::ErrorKind::Other, e))?;
814         Ok(base)
815     }
816 }
817 
818 pub struct DeviceManager {
819     // Manage address space related to devices
820     address_manager: Arc<AddressManager>,
821 
822     // Console abstraction
823     console: Arc<Console>,
824 
825     // Serial Manager
826     serial_manager: Option<Arc<SerialManager>>,
827 
828     // pty foreground status,
829     console_resize_pipe: Option<Arc<File>>,
830 
831     // To restore on exit.
832     original_termios_opt: Arc<Mutex<Option<termios>>>,
833 
834     // Interrupt controller
835     #[cfg(target_arch = "x86_64")]
836     interrupt_controller: Option<Arc<Mutex<ioapic::Ioapic>>>,
837     #[cfg(target_arch = "aarch64")]
838     interrupt_controller: Option<Arc<Mutex<gic::Gic>>>,
839 
840     // Things to be added to the commandline (e.g. aarch64 early console)
841     #[cfg(target_arch = "aarch64")]
842     cmdline_additions: Vec<String>,
843 
844     // ACPI GED notification device
845     ged_notification_device: Option<Arc<Mutex<devices::AcpiGedDevice>>>,
846 
847     // VM configuration
848     config: Arc<Mutex<VmConfig>>,
849 
850     // Memory Manager
851     memory_manager: Arc<Mutex<MemoryManager>>,
852 
853     // CPU Manager
854     cpu_manager: Arc<Mutex<CpuManager>>,
855 
856     // The virtio devices on the system
857     virtio_devices: Vec<MetaVirtioDevice>,
858 
859     // List of bus devices
860     // Let the DeviceManager keep strong references to the BusDevice devices.
861     // This allows the IO and MMIO buses to be provided with Weak references,
862     // which prevents cyclic dependencies.
863     bus_devices: Vec<Arc<dyn BusDeviceSync>>,
864 
865     // Counter to keep track of the consumed device IDs.
866     device_id_cnt: Wrapping<usize>,
867 
868     pci_segments: Vec<PciSegment>,
869 
870     #[cfg_attr(target_arch = "aarch64", allow(dead_code))]
871     // MSI Interrupt Manager
872     msi_interrupt_manager: Arc<dyn InterruptManager<GroupConfig = MsiIrqGroupConfig>>,
873 
874     #[cfg_attr(feature = "mshv", allow(dead_code))]
875     // Legacy Interrupt Manager
876     legacy_interrupt_manager: Option<Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>>,
877 
878     // Passthrough device handle
879     passthrough_device: Option<VfioDeviceFd>,
880 
881     // VFIO container
882     // Only one container can be created, therefore it is stored as part of the
883     // DeviceManager to be reused.
884     vfio_container: Option<Arc<VfioContainer>>,
885 
886     // Paravirtualized IOMMU
887     iommu_device: Option<Arc<Mutex<virtio_devices::Iommu>>>,
888     iommu_mapping: Option<Arc<IommuMapping>>,
889 
890     // PCI information about devices attached to the paravirtualized IOMMU
891     // It contains the virtual IOMMU PCI BDF along with the list of PCI BDF
892     // representing the devices attached to the virtual IOMMU. This is useful
893     // information for filling the ACPI VIOT table.
894     iommu_attached_devices: Option<(PciBdf, Vec<PciBdf>)>,
895 
896     // Tree of devices, representing the dependencies between devices.
897     // Useful for introspection, snapshot and restore.
898     device_tree: Arc<Mutex<DeviceTree>>,
899 
900     // Exit event
901     exit_evt: EventFd,
902     reset_evt: EventFd,
903 
904     #[cfg(target_arch = "aarch64")]
905     id_to_dev_info: HashMap<(DeviceType, String), MmioDeviceInfo>,
906 
907     // seccomp action
908     seccomp_action: SeccompAction,
909 
910     // List of guest NUMA nodes.
911     numa_nodes: NumaNodes,
912 
913     // Possible handle to the virtio-balloon device
914     balloon: Option<Arc<Mutex<virtio_devices::Balloon>>>,
915 
916     // Virtio Device activation EventFd to allow the VMM thread to trigger device
917     // activation and thus start the threads from the VMM thread
918     activate_evt: EventFd,
919 
920     acpi_address: GuestAddress,
921 
922     selected_segment: usize,
923 
924     // Possible handle to the virtio-mem device
925     virtio_mem_devices: Vec<Arc<Mutex<virtio_devices::Mem>>>,
926 
927     #[cfg(target_arch = "aarch64")]
928     // GPIO device for AArch64
929     gpio_device: Option<Arc<Mutex<devices::legacy::Gpio>>>,
930 
931     #[cfg(feature = "pvmemcontrol")]
932     pvmemcontrol_devices: Option<(
933         Arc<PvmemcontrolBusDevice>,
934         Arc<Mutex<PvmemcontrolPciDevice>>,
935     )>,
936 
937     // pvpanic device
938     pvpanic_device: Option<Arc<Mutex<devices::PvPanicDevice>>>,
939 
940     // Flag to force setting the iommu on virtio devices
941     force_iommu: bool,
942 
943     // io_uring availability if detected
944     io_uring_supported: Option<bool>,
945 
946     // aio availability if detected
947     aio_supported: Option<bool>,
948 
949     // List of unique identifiers provided at boot through the configuration.
950     boot_id_list: BTreeSet<String>,
951 
952     // Start time of the VM
953     timestamp: Instant,
954 
955     // Pending activations
956     pending_activations: Arc<Mutex<Vec<VirtioPciDeviceActivator>>>,
957 
958     // Addresses for ACPI platform devices e.g. ACPI PM timer, sleep/reset registers
959     acpi_platform_addresses: AcpiPlatformAddresses,
960 
961     snapshot: Option<Snapshot>,
962 
963     rate_limit_groups: HashMap<String, Arc<RateLimiterGroup>>,
964 
965     mmio_regions: Arc<Mutex<Vec<MmioRegion>>>,
966 }
967 
968 fn create_mmio_allocators(
969     start: u64,
970     end: u64,
971     num_pci_segments: u16,
972     weights: Vec<u32>,
973     alignment: u64,
974 ) -> Vec<Arc<Mutex<AddressAllocator>>> {
975     let total_weight: u32 = weights.iter().sum();
976 
977     // Start each PCI segment mmio range on an aligned boundary
978     let pci_segment_mmio_size = (end - start + 1) / (alignment * total_weight as u64) * alignment;
979 
980     let mut mmio_allocators = vec![];
981     let mut i = 0;
982     for segment_id in 0..num_pci_segments as u64 {
983         let weight = weights[segment_id as usize] as u64;
984         let mmio_start = start + i * pci_segment_mmio_size;
985         let mmio_size = pci_segment_mmio_size * weight;
986         let allocator = Arc::new(Mutex::new(
987             AddressAllocator::new(GuestAddress(mmio_start), mmio_size).unwrap(),
988         ));
989         mmio_allocators.push(allocator);
990         i += weight;
991     }
992 
993     mmio_allocators
994 }
995 
996 impl DeviceManager {
997     #[allow(clippy::too_many_arguments)]
998     pub fn new(
999         #[cfg(target_arch = "x86_64")] io_bus: Arc<Bus>,
1000         mmio_bus: Arc<Bus>,
1001         vm: Arc<dyn hypervisor::Vm>,
1002         config: Arc<Mutex<VmConfig>>,
1003         memory_manager: Arc<Mutex<MemoryManager>>,
1004         cpu_manager: Arc<Mutex<CpuManager>>,
1005         exit_evt: EventFd,
1006         reset_evt: EventFd,
1007         seccomp_action: SeccompAction,
1008         numa_nodes: NumaNodes,
1009         activate_evt: &EventFd,
1010         force_iommu: bool,
1011         boot_id_list: BTreeSet<String>,
1012         timestamp: Instant,
1013         snapshot: Option<Snapshot>,
1014         dynamic: bool,
1015     ) -> DeviceManagerResult<Arc<Mutex<Self>>> {
1016         trace_scoped!("DeviceManager::new");
1017 
1018         let (device_tree, device_id_cnt) = if let Some(snapshot) = snapshot.as_ref() {
1019             let state: DeviceManagerState = snapshot.to_state().unwrap();
1020             (
1021                 Arc::new(Mutex::new(state.device_tree.clone())),
1022                 state.device_id_cnt,
1023             )
1024         } else {
1025             (Arc::new(Mutex::new(DeviceTree::new())), Wrapping(0))
1026         };
1027 
1028         let num_pci_segments =
1029             if let Some(platform_config) = config.lock().unwrap().platform.as_ref() {
1030                 platform_config.num_pci_segments
1031             } else {
1032                 1
1033             };
1034 
1035         let mut mmio32_aperture_weights: Vec<u32> =
1036             std::iter::repeat(DEFAULT_PCI_SEGMENT_APERTURE_WEIGHT)
1037                 .take(num_pci_segments.into())
1038                 .collect();
1039         if let Some(pci_segments) = &config.lock().unwrap().pci_segments {
1040             for pci_segment in pci_segments.iter() {
1041                 mmio32_aperture_weights[pci_segment.pci_segment as usize] =
1042                     pci_segment.mmio32_aperture_weight
1043             }
1044         }
1045 
1046         let start_of_mmio32_area = layout::MEM_32BIT_DEVICES_START.0;
1047         let end_of_mmio32_area = layout::MEM_32BIT_DEVICES_START.0 + layout::MEM_32BIT_DEVICES_SIZE;
1048         let pci_mmio32_allocators = create_mmio_allocators(
1049             start_of_mmio32_area,
1050             end_of_mmio32_area,
1051             num_pci_segments,
1052             mmio32_aperture_weights,
1053             4 << 10,
1054         );
1055 
1056         let mut mmio64_aperture_weights: Vec<u32> =
1057             std::iter::repeat(DEFAULT_PCI_SEGMENT_APERTURE_WEIGHT)
1058                 .take(num_pci_segments.into())
1059                 .collect();
1060         if let Some(pci_segments) = &config.lock().unwrap().pci_segments {
1061             for pci_segment in pci_segments.iter() {
1062                 mmio64_aperture_weights[pci_segment.pci_segment as usize] =
1063                     pci_segment.mmio64_aperture_weight
1064             }
1065         }
1066 
1067         let start_of_mmio64_area = memory_manager.lock().unwrap().start_of_device_area().0;
1068         let end_of_mmio64_area = memory_manager.lock().unwrap().end_of_device_area().0;
1069         let pci_mmio64_allocators = create_mmio_allocators(
1070             start_of_mmio64_area,
1071             end_of_mmio64_area,
1072             num_pci_segments,
1073             mmio64_aperture_weights,
1074             4 << 30,
1075         );
1076 
1077         let address_manager = Arc::new(AddressManager {
1078             allocator: memory_manager.lock().unwrap().allocator(),
1079             #[cfg(target_arch = "x86_64")]
1080             io_bus,
1081             mmio_bus,
1082             vm: vm.clone(),
1083             device_tree: Arc::clone(&device_tree),
1084             pci_mmio32_allocators,
1085             pci_mmio64_allocators,
1086         });
1087 
1088         // First we create the MSI interrupt manager, the legacy one is created
1089         // later, after the IOAPIC device creation.
1090         // The reason we create the MSI one first is because the IOAPIC needs it,
1091         // and then the legacy interrupt manager needs an IOAPIC. So we're
1092         // handling a linear dependency chain:
1093         // msi_interrupt_manager <- IOAPIC <- legacy_interrupt_manager.
1094         let msi_interrupt_manager: Arc<dyn InterruptManager<GroupConfig = MsiIrqGroupConfig>> =
1095             Arc::new(MsiInterruptManager::new(
1096                 Arc::clone(&address_manager.allocator),
1097                 vm,
1098             ));
1099 
1100         let acpi_address = address_manager
1101             .allocator
1102             .lock()
1103             .unwrap()
1104             .allocate_platform_mmio_addresses(None, DEVICE_MANAGER_ACPI_SIZE as u64, None)
1105             .ok_or(DeviceManagerError::AllocateIoPort)?;
1106 
1107         let mut pci_irq_slots = [0; 32];
1108         PciSegment::reserve_legacy_interrupts_for_pci_devices(
1109             &address_manager,
1110             &mut pci_irq_slots,
1111         )?;
1112 
1113         let mut pci_segments = vec![PciSegment::new_default_segment(
1114             &address_manager,
1115             Arc::clone(&address_manager.pci_mmio32_allocators[0]),
1116             Arc::clone(&address_manager.pci_mmio64_allocators[0]),
1117             &pci_irq_slots,
1118         )?];
1119 
1120         for i in 1..num_pci_segments as usize {
1121             pci_segments.push(PciSegment::new(
1122                 i as u16,
1123                 numa_node_id_from_pci_segment_id(&numa_nodes, i as u16),
1124                 &address_manager,
1125                 Arc::clone(&address_manager.pci_mmio32_allocators[i]),
1126                 Arc::clone(&address_manager.pci_mmio64_allocators[i]),
1127                 &pci_irq_slots,
1128             )?);
1129         }
1130 
1131         if dynamic {
1132             let acpi_address = address_manager
1133                 .allocator
1134                 .lock()
1135                 .unwrap()
1136                 .allocate_platform_mmio_addresses(None, CPU_MANAGER_ACPI_SIZE as u64, None)
1137                 .ok_or(DeviceManagerError::AllocateMmioAddress)?;
1138 
1139             address_manager
1140                 .mmio_bus
1141                 .insert(
1142                     cpu_manager.clone(),
1143                     acpi_address.0,
1144                     CPU_MANAGER_ACPI_SIZE as u64,
1145                 )
1146                 .map_err(DeviceManagerError::BusError)?;
1147 
1148             cpu_manager.lock().unwrap().set_acpi_address(acpi_address);
1149         }
1150 
1151         let mut rate_limit_groups = HashMap::<String, Arc<RateLimiterGroup>>::new();
1152         if let Some(rate_limit_groups_cfg) = config.lock().unwrap().rate_limit_groups.as_ref() {
1153             for rate_limit_group_cfg in rate_limit_groups_cfg {
1154                 let rate_limit_cfg = rate_limit_group_cfg.rate_limiter_config;
1155                 let bw = rate_limit_cfg.bandwidth.unwrap_or_default();
1156                 let ops = rate_limit_cfg.ops.unwrap_or_default();
1157                 let mut rate_limit_group = RateLimiterGroup::new(
1158                     &rate_limit_group_cfg.id,
1159                     bw.size,
1160                     bw.one_time_burst.unwrap_or(0),
1161                     bw.refill_time,
1162                     ops.size,
1163                     ops.one_time_burst.unwrap_or(0),
1164                     ops.refill_time,
1165                 )
1166                 .map_err(DeviceManagerError::RateLimiterGroupCreate)?;
1167 
1168                 let exit_evt = exit_evt.try_clone().map_err(DeviceManagerError::EventFd)?;
1169 
1170                 rate_limit_group.start_thread(exit_evt).unwrap();
1171                 rate_limit_groups
1172                     .insert(rate_limit_group_cfg.id.clone(), Arc::new(rate_limit_group));
1173             }
1174         }
1175 
1176         let device_manager = DeviceManager {
1177             address_manager: Arc::clone(&address_manager),
1178             console: Arc::new(Console::default()),
1179             interrupt_controller: None,
1180             #[cfg(target_arch = "aarch64")]
1181             cmdline_additions: Vec::new(),
1182             ged_notification_device: None,
1183             config,
1184             memory_manager,
1185             cpu_manager,
1186             virtio_devices: Vec::new(),
1187             bus_devices: Vec::new(),
1188             device_id_cnt,
1189             msi_interrupt_manager,
1190             legacy_interrupt_manager: None,
1191             passthrough_device: None,
1192             vfio_container: None,
1193             iommu_device: None,
1194             iommu_mapping: None,
1195             iommu_attached_devices: None,
1196             pci_segments,
1197             device_tree,
1198             exit_evt,
1199             reset_evt,
1200             #[cfg(target_arch = "aarch64")]
1201             id_to_dev_info: HashMap::new(),
1202             seccomp_action,
1203             numa_nodes,
1204             balloon: None,
1205             activate_evt: activate_evt
1206                 .try_clone()
1207                 .map_err(DeviceManagerError::EventFd)?,
1208             acpi_address,
1209             selected_segment: 0,
1210             serial_manager: None,
1211             console_resize_pipe: None,
1212             original_termios_opt: Arc::new(Mutex::new(None)),
1213             virtio_mem_devices: Vec::new(),
1214             #[cfg(target_arch = "aarch64")]
1215             gpio_device: None,
1216             #[cfg(feature = "pvmemcontrol")]
1217             pvmemcontrol_devices: None,
1218             pvpanic_device: None,
1219             force_iommu,
1220             io_uring_supported: None,
1221             aio_supported: None,
1222             boot_id_list,
1223             timestamp,
1224             pending_activations: Arc::new(Mutex::new(Vec::default())),
1225             acpi_platform_addresses: AcpiPlatformAddresses::default(),
1226             snapshot,
1227             rate_limit_groups,
1228             mmio_regions: Arc::new(Mutex::new(Vec::new())),
1229         };
1230 
1231         let device_manager = Arc::new(Mutex::new(device_manager));
1232 
1233         address_manager
1234             .mmio_bus
1235             .insert(
1236                 Arc::clone(&device_manager) as Arc<dyn BusDeviceSync>,
1237                 acpi_address.0,
1238                 DEVICE_MANAGER_ACPI_SIZE as u64,
1239             )
1240             .map_err(DeviceManagerError::BusError)?;
1241 
1242         Ok(device_manager)
1243     }
1244 
1245     pub fn console_resize_pipe(&self) -> Option<Arc<File>> {
1246         self.console_resize_pipe.clone()
1247     }
1248 
1249     pub fn create_devices(
1250         &mut self,
1251         console_info: Option<ConsoleInfo>,
1252         console_resize_pipe: Option<Arc<File>>,
1253         original_termios_opt: Arc<Mutex<Option<termios>>>,
1254     ) -> DeviceManagerResult<()> {
1255         trace_scoped!("create_devices");
1256 
1257         let mut virtio_devices: Vec<MetaVirtioDevice> = Vec::new();
1258 
1259         let interrupt_controller = self.add_interrupt_controller()?;
1260 
1261         self.cpu_manager
1262             .lock()
1263             .unwrap()
1264             .set_interrupt_controller(interrupt_controller.clone());
1265 
1266         // Now we can create the legacy interrupt manager, which needs the freshly
1267         // formed IOAPIC device.
1268         let legacy_interrupt_manager: Arc<
1269             dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>,
1270         > = Arc::new(LegacyUserspaceInterruptManager::new(Arc::clone(
1271             &interrupt_controller,
1272         )));
1273 
1274         {
1275             if let Some(acpi_address) = self.memory_manager.lock().unwrap().acpi_address() {
1276                 self.address_manager
1277                     .mmio_bus
1278                     .insert(
1279                         Arc::clone(&self.memory_manager) as Arc<dyn BusDeviceSync>,
1280                         acpi_address.0,
1281                         MEMORY_MANAGER_ACPI_SIZE as u64,
1282                     )
1283                     .map_err(DeviceManagerError::BusError)?;
1284             }
1285         }
1286 
1287         #[cfg(target_arch = "x86_64")]
1288         self.add_legacy_devices(
1289             self.reset_evt
1290                 .try_clone()
1291                 .map_err(DeviceManagerError::EventFd)?,
1292         )?;
1293 
1294         #[cfg(target_arch = "aarch64")]
1295         self.add_legacy_devices(&legacy_interrupt_manager)?;
1296 
1297         {
1298             self.ged_notification_device = self.add_acpi_devices(
1299                 &legacy_interrupt_manager,
1300                 self.reset_evt
1301                     .try_clone()
1302                     .map_err(DeviceManagerError::EventFd)?,
1303                 self.exit_evt
1304                     .try_clone()
1305                     .map_err(DeviceManagerError::EventFd)?,
1306             )?;
1307         }
1308 
1309         self.original_termios_opt = original_termios_opt;
1310 
1311         self.console = self.add_console_devices(
1312             &legacy_interrupt_manager,
1313             &mut virtio_devices,
1314             console_info,
1315             console_resize_pipe,
1316         )?;
1317 
1318         if let Some(tpm) = self.config.clone().lock().unwrap().tpm.as_ref() {
1319             let tpm_dev = self.add_tpm_device(tpm.socket.clone())?;
1320             self.bus_devices
1321                 .push(Arc::clone(&tpm_dev) as Arc<dyn BusDeviceSync>)
1322         }
1323         self.legacy_interrupt_manager = Some(legacy_interrupt_manager);
1324 
1325         virtio_devices.append(&mut self.make_virtio_devices()?);
1326 
1327         self.add_pci_devices(virtio_devices.clone())?;
1328 
1329         self.virtio_devices = virtio_devices;
1330 
1331         // Add pvmemcontrol if required
1332         #[cfg(feature = "pvmemcontrol")]
1333         {
1334             if self.config.lock().unwrap().pvmemcontrol.is_some() {
1335                 let (pvmemcontrol_bus_device, pvmemcontrol_pci_device) =
1336                     self.make_pvmemcontrol_device()?;
1337                 self.pvmemcontrol_devices =
1338                     Some((pvmemcontrol_bus_device, pvmemcontrol_pci_device));
1339             }
1340         }
1341 
1342         if self.config.clone().lock().unwrap().pvpanic {
1343             self.pvpanic_device = self.add_pvpanic_device()?;
1344         }
1345 
1346         Ok(())
1347     }
1348 
1349     fn state(&self) -> DeviceManagerState {
1350         DeviceManagerState {
1351             device_tree: self.device_tree.lock().unwrap().clone(),
1352             device_id_cnt: self.device_id_cnt,
1353         }
1354     }
1355 
1356     fn get_msi_iova_space(&mut self) -> (u64, u64) {
1357         #[cfg(target_arch = "aarch64")]
1358         {
1359             let vcpus = self.config.lock().unwrap().cpus.boot_vcpus;
1360             let vgic_config = gic::Gic::create_default_config(vcpus.into());
1361             (
1362                 vgic_config.msi_addr,
1363                 vgic_config.msi_addr + vgic_config.msi_size - 1,
1364             )
1365         }
1366         #[cfg(target_arch = "x86_64")]
1367         (0xfee0_0000, 0xfeef_ffff)
1368     }
1369 
1370     #[cfg(target_arch = "aarch64")]
1371     /// Gets the information of the devices registered up to some point in time.
1372     pub fn get_device_info(&self) -> &HashMap<(DeviceType, String), MmioDeviceInfo> {
1373         &self.id_to_dev_info
1374     }
1375 
1376     #[allow(unused_variables)]
1377     fn add_pci_devices(
1378         &mut self,
1379         virtio_devices: Vec<MetaVirtioDevice>,
1380     ) -> DeviceManagerResult<()> {
1381         let iommu_id = String::from(IOMMU_DEVICE_NAME);
1382 
1383         let iommu_device = if self.config.lock().unwrap().iommu {
1384             let (device, mapping) = virtio_devices::Iommu::new(
1385                 iommu_id.clone(),
1386                 self.seccomp_action.clone(),
1387                 self.exit_evt
1388                     .try_clone()
1389                     .map_err(DeviceManagerError::EventFd)?,
1390                 self.get_msi_iova_space(),
1391                 state_from_id(self.snapshot.as_ref(), iommu_id.as_str())
1392                     .map_err(DeviceManagerError::RestoreGetState)?,
1393             )
1394             .map_err(DeviceManagerError::CreateVirtioIommu)?;
1395             let device = Arc::new(Mutex::new(device));
1396             self.iommu_device = Some(Arc::clone(&device));
1397             self.iommu_mapping = Some(mapping);
1398 
1399             // Fill the device tree with a new node. In case of restore, we
1400             // know there is nothing to do, so we can simply override the
1401             // existing entry.
1402             self.device_tree
1403                 .lock()
1404                 .unwrap()
1405                 .insert(iommu_id.clone(), device_node!(iommu_id, device));
1406 
1407             Some(device)
1408         } else {
1409             None
1410         };
1411 
1412         let mut iommu_attached_devices = Vec::new();
1413         {
1414             for handle in virtio_devices {
1415                 let mapping: Option<Arc<IommuMapping>> = if handle.iommu {
1416                     self.iommu_mapping.clone()
1417                 } else {
1418                     None
1419                 };
1420 
1421                 let dev_id = self.add_virtio_pci_device(
1422                     handle.virtio_device,
1423                     &mapping,
1424                     handle.id,
1425                     handle.pci_segment,
1426                     handle.dma_handler,
1427                 )?;
1428 
1429                 if handle.iommu {
1430                     iommu_attached_devices.push(dev_id);
1431                 }
1432             }
1433 
1434             let mut vfio_iommu_device_ids = self.add_vfio_devices()?;
1435             iommu_attached_devices.append(&mut vfio_iommu_device_ids);
1436 
1437             let mut vfio_user_iommu_device_ids = self.add_user_devices()?;
1438             iommu_attached_devices.append(&mut vfio_user_iommu_device_ids);
1439 
1440             // Add all devices from forced iommu segments
1441             if let Some(platform_config) = self.config.lock().unwrap().platform.as_ref() {
1442                 if let Some(iommu_segments) = platform_config.iommu_segments.as_ref() {
1443                     for segment in iommu_segments {
1444                         for device in 0..32 {
1445                             let bdf = PciBdf::new(*segment, 0, device, 0);
1446                             if !iommu_attached_devices.contains(&bdf) {
1447                                 iommu_attached_devices.push(bdf);
1448                             }
1449                         }
1450                     }
1451                 }
1452             }
1453 
1454             if let Some(iommu_device) = iommu_device {
1455                 let dev_id = self.add_virtio_pci_device(iommu_device, &None, iommu_id, 0, None)?;
1456                 self.iommu_attached_devices = Some((dev_id, iommu_attached_devices));
1457             }
1458         }
1459 
1460         for segment in &self.pci_segments {
1461             #[cfg(target_arch = "x86_64")]
1462             if let Some(pci_config_io) = segment.pci_config_io.as_ref() {
1463                 self.bus_devices
1464                     .push(Arc::clone(pci_config_io) as Arc<dyn BusDeviceSync>);
1465             }
1466 
1467             self.bus_devices
1468                 .push(Arc::clone(&segment.pci_config_mmio) as Arc<dyn BusDeviceSync>);
1469         }
1470 
1471         Ok(())
1472     }
1473 
1474     #[cfg(target_arch = "aarch64")]
1475     fn add_interrupt_controller(
1476         &mut self,
1477     ) -> DeviceManagerResult<Arc<Mutex<dyn InterruptController>>> {
1478         let interrupt_controller: Arc<Mutex<gic::Gic>> = Arc::new(Mutex::new(
1479             gic::Gic::new(
1480                 self.config.lock().unwrap().cpus.boot_vcpus,
1481                 Arc::clone(&self.msi_interrupt_manager),
1482                 self.address_manager.vm.clone(),
1483             )
1484             .map_err(DeviceManagerError::CreateInterruptController)?,
1485         ));
1486 
1487         self.interrupt_controller = Some(interrupt_controller.clone());
1488 
1489         // Restore the vGic if this is in the process of restoration
1490         let id = String::from(gic::GIC_SNAPSHOT_ID);
1491         if let Some(vgic_snapshot) = snapshot_from_id(self.snapshot.as_ref(), &id) {
1492             // PMU support is optional. Nothing should be impacted if the PMU initialization failed.
1493             if self
1494                 .cpu_manager
1495                 .lock()
1496                 .unwrap()
1497                 .init_pmu(arch::aarch64::fdt::AARCH64_PMU_IRQ + 16)
1498                 .is_err()
1499             {
1500                 info!("Failed to initialize PMU");
1501             }
1502 
1503             let vgic_state = vgic_snapshot
1504                 .to_state()
1505                 .map_err(DeviceManagerError::RestoreGetState)?;
1506             let saved_vcpu_states = self.cpu_manager.lock().unwrap().get_saved_states();
1507             interrupt_controller
1508                 .lock()
1509                 .unwrap()
1510                 .restore_vgic(vgic_state, &saved_vcpu_states)
1511                 .unwrap();
1512         }
1513 
1514         self.device_tree
1515             .lock()
1516             .unwrap()
1517             .insert(id.clone(), device_node!(id, interrupt_controller));
1518 
1519         Ok(interrupt_controller)
1520     }
1521 
1522     #[cfg(target_arch = "aarch64")]
1523     pub fn get_interrupt_controller(&mut self) -> Option<&Arc<Mutex<gic::Gic>>> {
1524         self.interrupt_controller.as_ref()
1525     }
1526 
1527     #[cfg(target_arch = "x86_64")]
1528     fn add_interrupt_controller(
1529         &mut self,
1530     ) -> DeviceManagerResult<Arc<Mutex<dyn InterruptController>>> {
1531         let id = String::from(IOAPIC_DEVICE_NAME);
1532 
1533         // Create IOAPIC
1534         let interrupt_controller = Arc::new(Mutex::new(
1535             ioapic::Ioapic::new(
1536                 id.clone(),
1537                 APIC_START,
1538                 Arc::clone(&self.msi_interrupt_manager),
1539                 state_from_id(self.snapshot.as_ref(), id.as_str())
1540                     .map_err(DeviceManagerError::RestoreGetState)?,
1541             )
1542             .map_err(DeviceManagerError::CreateInterruptController)?,
1543         ));
1544 
1545         self.interrupt_controller = Some(interrupt_controller.clone());
1546 
1547         self.address_manager
1548             .mmio_bus
1549             .insert(interrupt_controller.clone(), IOAPIC_START.0, IOAPIC_SIZE)
1550             .map_err(DeviceManagerError::BusError)?;
1551 
1552         self.bus_devices
1553             .push(Arc::clone(&interrupt_controller) as Arc<dyn BusDeviceSync>);
1554 
1555         // Fill the device tree with a new node. In case of restore, we
1556         // know there is nothing to do, so we can simply override the
1557         // existing entry.
1558         self.device_tree
1559             .lock()
1560             .unwrap()
1561             .insert(id.clone(), device_node!(id, interrupt_controller));
1562 
1563         Ok(interrupt_controller)
1564     }
1565 
1566     fn add_acpi_devices(
1567         &mut self,
1568         interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>,
1569         reset_evt: EventFd,
1570         exit_evt: EventFd,
1571     ) -> DeviceManagerResult<Option<Arc<Mutex<devices::AcpiGedDevice>>>> {
1572         let vcpus_kill_signalled = self
1573             .cpu_manager
1574             .lock()
1575             .unwrap()
1576             .vcpus_kill_signalled()
1577             .clone();
1578         let shutdown_device = Arc::new(Mutex::new(devices::AcpiShutdownDevice::new(
1579             exit_evt,
1580             reset_evt,
1581             vcpus_kill_signalled,
1582         )));
1583 
1584         self.bus_devices
1585             .push(Arc::clone(&shutdown_device) as Arc<dyn BusDeviceSync>);
1586 
1587         #[cfg(target_arch = "x86_64")]
1588         {
1589             let shutdown_pio_address: u16 = 0x600;
1590 
1591             self.address_manager
1592                 .allocator
1593                 .lock()
1594                 .unwrap()
1595                 .allocate_io_addresses(Some(GuestAddress(shutdown_pio_address.into())), 0x8, None)
1596                 .ok_or(DeviceManagerError::AllocateIoPort)?;
1597 
1598             self.address_manager
1599                 .io_bus
1600                 .insert(shutdown_device, shutdown_pio_address.into(), 0x4)
1601                 .map_err(DeviceManagerError::BusError)?;
1602 
1603             self.acpi_platform_addresses.sleep_control_reg_address =
1604                 Some(GenericAddress::io_port_address::<u8>(shutdown_pio_address));
1605             self.acpi_platform_addresses.sleep_status_reg_address =
1606                 Some(GenericAddress::io_port_address::<u8>(shutdown_pio_address));
1607             self.acpi_platform_addresses.reset_reg_address =
1608                 Some(GenericAddress::io_port_address::<u8>(shutdown_pio_address));
1609         }
1610 
1611         let ged_irq = self
1612             .address_manager
1613             .allocator
1614             .lock()
1615             .unwrap()
1616             .allocate_irq()
1617             .unwrap();
1618         let interrupt_group = interrupt_manager
1619             .create_group(LegacyIrqGroupConfig {
1620                 irq: ged_irq as InterruptIndex,
1621             })
1622             .map_err(DeviceManagerError::CreateInterruptGroup)?;
1623         let ged_address = self
1624             .address_manager
1625             .allocator
1626             .lock()
1627             .unwrap()
1628             .allocate_platform_mmio_addresses(
1629                 None,
1630                 devices::acpi::GED_DEVICE_ACPI_SIZE as u64,
1631                 None,
1632             )
1633             .ok_or(DeviceManagerError::AllocateMmioAddress)?;
1634         let ged_device = Arc::new(Mutex::new(devices::AcpiGedDevice::new(
1635             interrupt_group,
1636             ged_irq,
1637             ged_address,
1638         )));
1639         self.address_manager
1640             .mmio_bus
1641             .insert(
1642                 ged_device.clone(),
1643                 ged_address.0,
1644                 devices::acpi::GED_DEVICE_ACPI_SIZE as u64,
1645             )
1646             .map_err(DeviceManagerError::BusError)?;
1647         self.bus_devices
1648             .push(Arc::clone(&ged_device) as Arc<dyn BusDeviceSync>);
1649 
1650         let pm_timer_device = Arc::new(Mutex::new(devices::AcpiPmTimerDevice::new()));
1651 
1652         self.bus_devices
1653             .push(Arc::clone(&pm_timer_device) as Arc<dyn BusDeviceSync>);
1654 
1655         #[cfg(target_arch = "x86_64")]
1656         {
1657             let pm_timer_pio_address: u16 = 0x608;
1658 
1659             self.address_manager
1660                 .allocator
1661                 .lock()
1662                 .unwrap()
1663                 .allocate_io_addresses(Some(GuestAddress(pm_timer_pio_address.into())), 0x4, None)
1664                 .ok_or(DeviceManagerError::AllocateIoPort)?;
1665 
1666             self.address_manager
1667                 .io_bus
1668                 .insert(pm_timer_device, pm_timer_pio_address.into(), 0x4)
1669                 .map_err(DeviceManagerError::BusError)?;
1670 
1671             self.acpi_platform_addresses.pm_timer_address =
1672                 Some(GenericAddress::io_port_address::<u32>(pm_timer_pio_address));
1673         }
1674 
1675         Ok(Some(ged_device))
1676     }
1677 
1678     #[cfg(target_arch = "x86_64")]
1679     fn add_legacy_devices(&mut self, reset_evt: EventFd) -> DeviceManagerResult<()> {
1680         let vcpus_kill_signalled = self
1681             .cpu_manager
1682             .lock()
1683             .unwrap()
1684             .vcpus_kill_signalled()
1685             .clone();
1686         // Add a shutdown device (i8042)
1687         let i8042 = Arc::new(Mutex::new(devices::legacy::I8042Device::new(
1688             reset_evt.try_clone().unwrap(),
1689             vcpus_kill_signalled.clone(),
1690         )));
1691 
1692         self.bus_devices
1693             .push(Arc::clone(&i8042) as Arc<dyn BusDeviceSync>);
1694 
1695         self.address_manager
1696             .io_bus
1697             .insert(i8042, 0x61, 0x4)
1698             .map_err(DeviceManagerError::BusError)?;
1699         {
1700             // Add a CMOS emulated device
1701             let mem_size = self
1702                 .memory_manager
1703                 .lock()
1704                 .unwrap()
1705                 .guest_memory()
1706                 .memory()
1707                 .last_addr()
1708                 .0
1709                 + 1;
1710             let mem_below_4g = std::cmp::min(arch::layout::MEM_32BIT_RESERVED_START.0, mem_size);
1711             let mem_above_4g = mem_size.saturating_sub(arch::layout::RAM_64BIT_START.0);
1712 
1713             let cmos = Arc::new(Mutex::new(devices::legacy::Cmos::new(
1714                 mem_below_4g,
1715                 mem_above_4g,
1716                 reset_evt,
1717                 Some(vcpus_kill_signalled),
1718             )));
1719 
1720             self.bus_devices
1721                 .push(Arc::clone(&cmos) as Arc<dyn BusDeviceSync>);
1722 
1723             self.address_manager
1724                 .io_bus
1725                 .insert(cmos, 0x70, 0x2)
1726                 .map_err(DeviceManagerError::BusError)?;
1727 
1728             let fwdebug = Arc::new(Mutex::new(devices::legacy::FwDebugDevice::new()));
1729 
1730             self.bus_devices
1731                 .push(Arc::clone(&fwdebug) as Arc<dyn BusDeviceSync>);
1732 
1733             self.address_manager
1734                 .io_bus
1735                 .insert(fwdebug, 0x402, 0x1)
1736                 .map_err(DeviceManagerError::BusError)?;
1737         }
1738 
1739         // 0x80 debug port
1740         let debug_port = Arc::new(Mutex::new(devices::legacy::DebugPort::new(self.timestamp)));
1741         self.bus_devices
1742             .push(Arc::clone(&debug_port) as Arc<dyn BusDeviceSync>);
1743         self.address_manager
1744             .io_bus
1745             .insert(debug_port, 0x80, 0x1)
1746             .map_err(DeviceManagerError::BusError)?;
1747 
1748         Ok(())
1749     }
1750 
1751     #[cfg(target_arch = "aarch64")]
1752     fn add_legacy_devices(
1753         &mut self,
1754         interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>,
1755     ) -> DeviceManagerResult<()> {
1756         // Add a RTC device
1757         let rtc_irq = self
1758             .address_manager
1759             .allocator
1760             .lock()
1761             .unwrap()
1762             .allocate_irq()
1763             .unwrap();
1764 
1765         let interrupt_group = interrupt_manager
1766             .create_group(LegacyIrqGroupConfig {
1767                 irq: rtc_irq as InterruptIndex,
1768             })
1769             .map_err(DeviceManagerError::CreateInterruptGroup)?;
1770 
1771         let rtc_device = Arc::new(Mutex::new(devices::legacy::Rtc::new(interrupt_group)));
1772 
1773         self.bus_devices
1774             .push(Arc::clone(&rtc_device) as Arc<dyn BusDeviceSync>);
1775 
1776         let addr = arch::layout::LEGACY_RTC_MAPPED_IO_START;
1777 
1778         self.address_manager
1779             .mmio_bus
1780             .insert(rtc_device, addr.0, MMIO_LEN)
1781             .map_err(DeviceManagerError::BusError)?;
1782 
1783         self.id_to_dev_info.insert(
1784             (DeviceType::Rtc, "rtc".to_string()),
1785             MmioDeviceInfo {
1786                 addr: addr.0,
1787                 len: MMIO_LEN,
1788                 irq: rtc_irq,
1789             },
1790         );
1791 
1792         // Add a GPIO device
1793         let id = String::from(GPIO_DEVICE_NAME);
1794         let gpio_irq = self
1795             .address_manager
1796             .allocator
1797             .lock()
1798             .unwrap()
1799             .allocate_irq()
1800             .unwrap();
1801 
1802         let interrupt_group = interrupt_manager
1803             .create_group(LegacyIrqGroupConfig {
1804                 irq: gpio_irq as InterruptIndex,
1805             })
1806             .map_err(DeviceManagerError::CreateInterruptGroup)?;
1807 
1808         let gpio_device = Arc::new(Mutex::new(devices::legacy::Gpio::new(
1809             id.clone(),
1810             interrupt_group,
1811             state_from_id(self.snapshot.as_ref(), id.as_str())
1812                 .map_err(DeviceManagerError::RestoreGetState)?,
1813         )));
1814 
1815         self.bus_devices
1816             .push(Arc::clone(&gpio_device) as Arc<dyn BusDeviceSync>);
1817 
1818         let addr = arch::layout::LEGACY_GPIO_MAPPED_IO_START;
1819 
1820         self.address_manager
1821             .mmio_bus
1822             .insert(gpio_device.clone(), addr.0, MMIO_LEN)
1823             .map_err(DeviceManagerError::BusError)?;
1824 
1825         self.gpio_device = Some(gpio_device.clone());
1826 
1827         self.id_to_dev_info.insert(
1828             (DeviceType::Gpio, "gpio".to_string()),
1829             MmioDeviceInfo {
1830                 addr: addr.0,
1831                 len: MMIO_LEN,
1832                 irq: gpio_irq,
1833             },
1834         );
1835 
1836         self.device_tree
1837             .lock()
1838             .unwrap()
1839             .insert(id.clone(), device_node!(id, gpio_device));
1840 
1841         Ok(())
1842     }
1843 
1844     #[cfg(target_arch = "x86_64")]
1845     fn add_debug_console_device(
1846         &mut self,
1847         debug_console_writer: Box<dyn io::Write + Send>,
1848     ) -> DeviceManagerResult<Arc<Mutex<DebugConsole>>> {
1849         let id = String::from(DEBUGCON_DEVICE_NAME);
1850         let debug_console = Arc::new(Mutex::new(DebugConsole::new(
1851             id.clone(),
1852             debug_console_writer,
1853         )));
1854 
1855         let port = self
1856             .config
1857             .lock()
1858             .unwrap()
1859             .debug_console
1860             .clone()
1861             .iobase
1862             .map(|port| port as u64)
1863             .unwrap_or(debug_console::DEFAULT_PORT);
1864 
1865         self.bus_devices
1866             .push(Arc::clone(&debug_console) as Arc<dyn BusDeviceSync>);
1867 
1868         self.address_manager
1869             .allocator
1870             .lock()
1871             .unwrap()
1872             .allocate_io_addresses(Some(GuestAddress(port)), 0x1, None)
1873             .ok_or(DeviceManagerError::AllocateIoPort)?;
1874 
1875         self.address_manager
1876             .io_bus
1877             .insert(debug_console.clone(), port, 0x1)
1878             .map_err(DeviceManagerError::BusError)?;
1879 
1880         // Fill the device tree with a new node. In case of restore, we
1881         // know there is nothing to do, so we can simply override the
1882         // existing entry.
1883         self.device_tree
1884             .lock()
1885             .unwrap()
1886             .insert(id.clone(), device_node!(id, debug_console));
1887 
1888         Ok(debug_console)
1889     }
1890 
1891     #[cfg(target_arch = "x86_64")]
1892     fn add_serial_device(
1893         &mut self,
1894         interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>,
1895         serial_writer: Option<Box<dyn io::Write + Send>>,
1896     ) -> DeviceManagerResult<Arc<Mutex<Serial>>> {
1897         // Serial is tied to IRQ #4
1898         let serial_irq = 4;
1899 
1900         let id = String::from(SERIAL_DEVICE_NAME);
1901 
1902         let interrupt_group = interrupt_manager
1903             .create_group(LegacyIrqGroupConfig {
1904                 irq: serial_irq as InterruptIndex,
1905             })
1906             .map_err(DeviceManagerError::CreateInterruptGroup)?;
1907 
1908         let serial = Arc::new(Mutex::new(Serial::new(
1909             id.clone(),
1910             interrupt_group,
1911             serial_writer,
1912             state_from_id(self.snapshot.as_ref(), id.as_str())
1913                 .map_err(DeviceManagerError::RestoreGetState)?,
1914         )));
1915 
1916         self.bus_devices
1917             .push(Arc::clone(&serial) as Arc<dyn BusDeviceSync>);
1918 
1919         self.address_manager
1920             .allocator
1921             .lock()
1922             .unwrap()
1923             .allocate_io_addresses(Some(GuestAddress(0x3f8)), 0x8, None)
1924             .ok_or(DeviceManagerError::AllocateIoPort)?;
1925 
1926         self.address_manager
1927             .io_bus
1928             .insert(serial.clone(), 0x3f8, 0x8)
1929             .map_err(DeviceManagerError::BusError)?;
1930 
1931         // Fill the device tree with a new node. In case of restore, we
1932         // know there is nothing to do, so we can simply override the
1933         // existing entry.
1934         self.device_tree
1935             .lock()
1936             .unwrap()
1937             .insert(id.clone(), device_node!(id, serial));
1938 
1939         Ok(serial)
1940     }
1941 
1942     #[cfg(target_arch = "aarch64")]
1943     fn add_serial_device(
1944         &mut self,
1945         interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>,
1946         serial_writer: Option<Box<dyn io::Write + Send>>,
1947     ) -> DeviceManagerResult<Arc<Mutex<Pl011>>> {
1948         let id = String::from(SERIAL_DEVICE_NAME);
1949 
1950         let serial_irq = self
1951             .address_manager
1952             .allocator
1953             .lock()
1954             .unwrap()
1955             .allocate_irq()
1956             .unwrap();
1957 
1958         let interrupt_group = interrupt_manager
1959             .create_group(LegacyIrqGroupConfig {
1960                 irq: serial_irq as InterruptIndex,
1961             })
1962             .map_err(DeviceManagerError::CreateInterruptGroup)?;
1963 
1964         let serial = Arc::new(Mutex::new(devices::legacy::Pl011::new(
1965             id.clone(),
1966             interrupt_group,
1967             serial_writer,
1968             self.timestamp,
1969             state_from_id(self.snapshot.as_ref(), id.as_str())
1970                 .map_err(DeviceManagerError::RestoreGetState)?,
1971         )));
1972 
1973         self.bus_devices
1974             .push(Arc::clone(&serial) as Arc<dyn BusDeviceSync>);
1975 
1976         let addr = arch::layout::LEGACY_SERIAL_MAPPED_IO_START;
1977 
1978         self.address_manager
1979             .mmio_bus
1980             .insert(serial.clone(), addr.0, MMIO_LEN)
1981             .map_err(DeviceManagerError::BusError)?;
1982 
1983         self.id_to_dev_info.insert(
1984             (DeviceType::Serial, DeviceType::Serial.to_string()),
1985             MmioDeviceInfo {
1986                 addr: addr.0,
1987                 len: MMIO_LEN,
1988                 irq: serial_irq,
1989             },
1990         );
1991 
1992         self.cmdline_additions
1993             .push(format!("earlycon=pl011,mmio,0x{:08x}", addr.0));
1994 
1995         // Fill the device tree with a new node. In case of restore, we
1996         // know there is nothing to do, so we can simply override the
1997         // existing entry.
1998         self.device_tree
1999             .lock()
2000             .unwrap()
2001             .insert(id.clone(), device_node!(id, serial));
2002 
2003         Ok(serial)
2004     }
2005 
2006     fn add_virtio_console_device(
2007         &mut self,
2008         virtio_devices: &mut Vec<MetaVirtioDevice>,
2009         console_fd: ConsoleOutput,
2010         resize_pipe: Option<Arc<File>>,
2011     ) -> DeviceManagerResult<Option<Arc<virtio_devices::ConsoleResizer>>> {
2012         let console_config = self.config.lock().unwrap().console.clone();
2013         let endpoint = match console_fd {
2014             ConsoleOutput::File(file) => Endpoint::File(file),
2015             ConsoleOutput::Pty(file) => {
2016                 self.console_resize_pipe = resize_pipe;
2017                 Endpoint::PtyPair(Arc::new(file.try_clone().unwrap()), file)
2018             }
2019             ConsoleOutput::Tty(stdout) => {
2020                 if stdout.is_terminal() {
2021                     self.console_resize_pipe = resize_pipe;
2022                 }
2023 
2024                 // If an interactive TTY then we can accept input
2025                 // SAFETY: FFI call. Trivially safe.
2026                 if unsafe { libc::isatty(libc::STDIN_FILENO) == 1 } {
2027                     // SAFETY: FFI call to dup. Trivially safe.
2028                     let stdin = unsafe { libc::dup(libc::STDIN_FILENO) };
2029                     if stdin == -1 {
2030                         return vmm_sys_util::errno::errno_result()
2031                             .map_err(DeviceManagerError::DupFd);
2032                     }
2033                     // SAFETY: stdin is valid and owned solely by us.
2034                     let stdin = unsafe { File::from_raw_fd(stdin) };
2035                     Endpoint::FilePair(stdout, Arc::new(stdin))
2036                 } else {
2037                     Endpoint::File(stdout)
2038                 }
2039             }
2040             ConsoleOutput::Socket(_) => {
2041                 return Err(DeviceManagerError::NoSocketOptionSupportForConsoleDevice);
2042             }
2043             ConsoleOutput::Null => Endpoint::Null,
2044             ConsoleOutput::Off => return Ok(None),
2045         };
2046         let id = String::from(CONSOLE_DEVICE_NAME);
2047 
2048         let (virtio_console_device, console_resizer) = virtio_devices::Console::new(
2049             id.clone(),
2050             endpoint,
2051             self.console_resize_pipe
2052                 .as_ref()
2053                 .map(|p| p.try_clone().unwrap()),
2054             self.force_iommu | console_config.iommu,
2055             self.seccomp_action.clone(),
2056             self.exit_evt
2057                 .try_clone()
2058                 .map_err(DeviceManagerError::EventFd)?,
2059             state_from_id(self.snapshot.as_ref(), id.as_str())
2060                 .map_err(DeviceManagerError::RestoreGetState)?,
2061         )
2062         .map_err(DeviceManagerError::CreateVirtioConsole)?;
2063         let virtio_console_device = Arc::new(Mutex::new(virtio_console_device));
2064         virtio_devices.push(MetaVirtioDevice {
2065             virtio_device: Arc::clone(&virtio_console_device)
2066                 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2067             iommu: console_config.iommu,
2068             id: id.clone(),
2069             pci_segment: 0,
2070             dma_handler: None,
2071         });
2072 
2073         // Fill the device tree with a new node. In case of restore, we
2074         // know there is nothing to do, so we can simply override the
2075         // existing entry.
2076         self.device_tree
2077             .lock()
2078             .unwrap()
2079             .insert(id.clone(), device_node!(id, virtio_console_device));
2080 
2081         // Only provide a resizer (for SIGWINCH handling) if the console is attached to the TTY
2082         Ok(if matches!(console_config.mode, ConsoleOutputMode::Tty) {
2083             Some(console_resizer)
2084         } else {
2085             None
2086         })
2087     }
2088 
2089     /// Adds all devices that behave like a console with respect to the VM
2090     /// configuration. This includes:
2091     /// - debug-console
2092     /// - serial-console
2093     /// - virtio-console
2094     fn add_console_devices(
2095         &mut self,
2096         interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>,
2097         virtio_devices: &mut Vec<MetaVirtioDevice>,
2098         console_info: Option<ConsoleInfo>,
2099         console_resize_pipe: Option<Arc<File>>,
2100     ) -> DeviceManagerResult<Arc<Console>> {
2101         let serial_config = self.config.lock().unwrap().serial.clone();
2102         if console_info.is_none() {
2103             return Err(DeviceManagerError::InvalidConsoleInfo);
2104         }
2105 
2106         // SAFETY: console_info is Some, so it's safe to unwrap.
2107         let console_info = console_info.unwrap();
2108 
2109         let serial_writer: Option<Box<dyn io::Write + Send>> = match console_info.serial_main_fd {
2110             ConsoleOutput::File(ref file) | ConsoleOutput::Tty(ref file) => {
2111                 Some(Box::new(Arc::clone(file)))
2112             }
2113             ConsoleOutput::Off
2114             | ConsoleOutput::Null
2115             | ConsoleOutput::Pty(_)
2116             | ConsoleOutput::Socket(_) => None,
2117         };
2118 
2119         if !matches!(console_info.serial_main_fd, ConsoleOutput::Off) {
2120             let serial = self.add_serial_device(interrupt_manager, serial_writer)?;
2121             self.serial_manager = match console_info.serial_main_fd {
2122                 ConsoleOutput::Pty(_) | ConsoleOutput::Tty(_) | ConsoleOutput::Socket(_) => {
2123                     let serial_manager = SerialManager::new(
2124                         serial,
2125                         console_info.serial_main_fd,
2126                         serial_config.socket,
2127                     )
2128                     .map_err(DeviceManagerError::CreateSerialManager)?;
2129                     if let Some(mut serial_manager) = serial_manager {
2130                         serial_manager
2131                             .start_thread(
2132                                 self.exit_evt
2133                                     .try_clone()
2134                                     .map_err(DeviceManagerError::EventFd)?,
2135                             )
2136                             .map_err(DeviceManagerError::SpawnSerialManager)?;
2137                         Some(Arc::new(serial_manager))
2138                     } else {
2139                         None
2140                     }
2141                 }
2142                 _ => None,
2143             };
2144         }
2145 
2146         #[cfg(target_arch = "x86_64")]
2147         {
2148             let debug_console_writer: Option<Box<dyn io::Write + Send>> =
2149                 match console_info.debug_main_fd {
2150                     ConsoleOutput::File(file) | ConsoleOutput::Tty(file) => Some(Box::new(file)),
2151                     ConsoleOutput::Off
2152                     | ConsoleOutput::Null
2153                     | ConsoleOutput::Pty(_)
2154                     | ConsoleOutput::Socket(_) => None,
2155                 };
2156             if let Some(writer) = debug_console_writer {
2157                 let _ = self.add_debug_console_device(writer)?;
2158             }
2159         }
2160 
2161         let console_resizer = self.add_virtio_console_device(
2162             virtio_devices,
2163             console_info.console_main_fd,
2164             console_resize_pipe,
2165         )?;
2166 
2167         Ok(Arc::new(Console { console_resizer }))
2168     }
2169 
2170     fn add_tpm_device(
2171         &mut self,
2172         tpm_path: PathBuf,
2173     ) -> DeviceManagerResult<Arc<Mutex<devices::tpm::Tpm>>> {
2174         // Create TPM Device
2175         let tpm = devices::tpm::Tpm::new(tpm_path.to_str().unwrap().to_string()).map_err(|e| {
2176             DeviceManagerError::CreateTpmDevice(anyhow!("Failed to create TPM Device : {:?}", e))
2177         })?;
2178         let tpm = Arc::new(Mutex::new(tpm));
2179 
2180         // Add TPM Device to mmio
2181         self.address_manager
2182             .mmio_bus
2183             .insert(
2184                 tpm.clone(),
2185                 arch::layout::TPM_START.0,
2186                 arch::layout::TPM_SIZE,
2187             )
2188             .map_err(DeviceManagerError::BusError)?;
2189 
2190         Ok(tpm)
2191     }
2192 
2193     fn make_virtio_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
2194         let mut devices: Vec<MetaVirtioDevice> = Vec::new();
2195 
2196         // Create "standard" virtio devices (net/block/rng)
2197         devices.append(&mut self.make_virtio_block_devices()?);
2198         devices.append(&mut self.make_virtio_net_devices()?);
2199         devices.append(&mut self.make_virtio_rng_devices()?);
2200 
2201         // Add virtio-fs if required
2202         devices.append(&mut self.make_virtio_fs_devices()?);
2203 
2204         // Add virtio-pmem if required
2205         devices.append(&mut self.make_virtio_pmem_devices()?);
2206 
2207         // Add virtio-vsock if required
2208         devices.append(&mut self.make_virtio_vsock_devices()?);
2209 
2210         devices.append(&mut self.make_virtio_mem_devices()?);
2211 
2212         // Add virtio-balloon if required
2213         devices.append(&mut self.make_virtio_balloon_devices()?);
2214 
2215         // Add virtio-watchdog device
2216         devices.append(&mut self.make_virtio_watchdog_devices()?);
2217 
2218         // Add vDPA devices if required
2219         devices.append(&mut self.make_vdpa_devices()?);
2220 
2221         Ok(devices)
2222     }
2223 
2224     // Cache whether aio is supported to avoid checking for very block device
2225     fn aio_is_supported(&mut self) -> bool {
2226         if let Some(supported) = self.aio_supported {
2227             return supported;
2228         }
2229 
2230         let supported = block_aio_is_supported();
2231         self.aio_supported = Some(supported);
2232         supported
2233     }
2234 
2235     // Cache whether io_uring is supported to avoid probing for very block device
2236     fn io_uring_is_supported(&mut self) -> bool {
2237         if let Some(supported) = self.io_uring_supported {
2238             return supported;
2239         }
2240 
2241         let supported = block_io_uring_is_supported();
2242         self.io_uring_supported = Some(supported);
2243         supported
2244     }
2245 
2246     fn make_virtio_block_device(
2247         &mut self,
2248         disk_cfg: &mut DiskConfig,
2249     ) -> DeviceManagerResult<MetaVirtioDevice> {
2250         let id = if let Some(id) = &disk_cfg.id {
2251             id.clone()
2252         } else {
2253             let id = self.next_device_name(DISK_DEVICE_NAME_PREFIX)?;
2254             disk_cfg.id = Some(id.clone());
2255             id
2256         };
2257 
2258         info!("Creating virtio-block device: {:?}", disk_cfg);
2259 
2260         let (virtio_device, migratable_device) = if disk_cfg.vhost_user {
2261             let socket = disk_cfg.vhost_socket.as_ref().unwrap().clone();
2262             let vu_cfg = VhostUserConfig {
2263                 socket,
2264                 num_queues: disk_cfg.num_queues,
2265                 queue_size: disk_cfg.queue_size,
2266             };
2267             let vhost_user_block = Arc::new(Mutex::new(
2268                 match virtio_devices::vhost_user::Blk::new(
2269                     id.clone(),
2270                     vu_cfg,
2271                     self.seccomp_action.clone(),
2272                     self.exit_evt
2273                         .try_clone()
2274                         .map_err(DeviceManagerError::EventFd)?,
2275                     self.force_iommu,
2276                     state_from_id(self.snapshot.as_ref(), id.as_str())
2277                         .map_err(DeviceManagerError::RestoreGetState)?,
2278                 ) {
2279                     Ok(vub_device) => vub_device,
2280                     Err(e) => {
2281                         return Err(DeviceManagerError::CreateVhostUserBlk(e));
2282                     }
2283                 },
2284             ));
2285 
2286             (
2287                 Arc::clone(&vhost_user_block) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2288                 vhost_user_block as Arc<Mutex<dyn Migratable>>,
2289             )
2290         } else {
2291             let mut options = OpenOptions::new();
2292             options.read(true);
2293             options.write(!disk_cfg.readonly);
2294             if disk_cfg.direct {
2295                 options.custom_flags(libc::O_DIRECT);
2296             }
2297             // Open block device path
2298             let mut file: File = options
2299                 .open(
2300                     disk_cfg
2301                         .path
2302                         .as_ref()
2303                         .ok_or(DeviceManagerError::NoDiskPath)?
2304                         .clone(),
2305                 )
2306                 .map_err(DeviceManagerError::Disk)?;
2307             let image_type =
2308                 detect_image_type(&mut file).map_err(DeviceManagerError::DetectImageType)?;
2309 
2310             let image = match image_type {
2311                 ImageType::FixedVhd => {
2312                     // Use asynchronous backend relying on io_uring if the
2313                     // syscalls are supported.
2314                     if cfg!(feature = "io_uring")
2315                         && !disk_cfg.disable_io_uring
2316                         && self.io_uring_is_supported()
2317                     {
2318                         info!("Using asynchronous fixed VHD disk file (io_uring)");
2319 
2320                         #[cfg(not(feature = "io_uring"))]
2321                         unreachable!("Checked in if statement above");
2322                         #[cfg(feature = "io_uring")]
2323                         {
2324                             Box::new(
2325                                 FixedVhdDiskAsync::new(file)
2326                                     .map_err(DeviceManagerError::CreateFixedVhdDiskAsync)?,
2327                             ) as Box<dyn DiskFile>
2328                         }
2329                     } else {
2330                         info!("Using synchronous fixed VHD disk file");
2331                         Box::new(
2332                             FixedVhdDiskSync::new(file)
2333                                 .map_err(DeviceManagerError::CreateFixedVhdDiskSync)?,
2334                         ) as Box<dyn DiskFile>
2335                     }
2336                 }
2337                 ImageType::Raw => {
2338                     // Use asynchronous backend relying on io_uring if the
2339                     // syscalls are supported.
2340                     if cfg!(feature = "io_uring")
2341                         && !disk_cfg.disable_io_uring
2342                         && self.io_uring_is_supported()
2343                     {
2344                         info!("Using asynchronous RAW disk file (io_uring)");
2345 
2346                         #[cfg(not(feature = "io_uring"))]
2347                         unreachable!("Checked in if statement above");
2348                         #[cfg(feature = "io_uring")]
2349                         {
2350                             Box::new(RawFileDisk::new(file)) as Box<dyn DiskFile>
2351                         }
2352                     } else if !disk_cfg.disable_aio && self.aio_is_supported() {
2353                         info!("Using asynchronous RAW disk file (aio)");
2354                         Box::new(RawFileDiskAio::new(file)) as Box<dyn DiskFile>
2355                     } else {
2356                         info!("Using synchronous RAW disk file");
2357                         Box::new(RawFileDiskSync::new(file)) as Box<dyn DiskFile>
2358                     }
2359                 }
2360                 ImageType::Qcow2 => {
2361                     info!("Using synchronous QCOW disk file");
2362                     Box::new(
2363                         QcowDiskSync::new(file, disk_cfg.direct)
2364                             .map_err(DeviceManagerError::CreateQcowDiskSync)?,
2365                     ) as Box<dyn DiskFile>
2366                 }
2367                 ImageType::Vhdx => {
2368                     info!("Using synchronous VHDX disk file");
2369                     Box::new(
2370                         VhdxDiskSync::new(file)
2371                             .map_err(DeviceManagerError::CreateFixedVhdxDiskSync)?,
2372                     ) as Box<dyn DiskFile>
2373                 }
2374             };
2375 
2376             let rate_limit_group =
2377                 if let Some(rate_limiter_cfg) = disk_cfg.rate_limiter_config.as_ref() {
2378                     // Create an anonymous RateLimiterGroup that is dropped when the Disk
2379                     // is dropped.
2380                     let bw = rate_limiter_cfg.bandwidth.unwrap_or_default();
2381                     let ops = rate_limiter_cfg.ops.unwrap_or_default();
2382                     let mut rate_limit_group = RateLimiterGroup::new(
2383                         disk_cfg.id.as_ref().unwrap(),
2384                         bw.size,
2385                         bw.one_time_burst.unwrap_or(0),
2386                         bw.refill_time,
2387                         ops.size,
2388                         ops.one_time_burst.unwrap_or(0),
2389                         ops.refill_time,
2390                     )
2391                     .map_err(DeviceManagerError::RateLimiterGroupCreate)?;
2392 
2393                     rate_limit_group
2394                         .start_thread(
2395                             self.exit_evt
2396                                 .try_clone()
2397                                 .map_err(DeviceManagerError::EventFd)?,
2398                         )
2399                         .unwrap();
2400 
2401                     Some(Arc::new(rate_limit_group))
2402                 } else if let Some(rate_limit_group) = disk_cfg.rate_limit_group.as_ref() {
2403                     self.rate_limit_groups.get(rate_limit_group).cloned()
2404                 } else {
2405                     None
2406                 };
2407 
2408             let queue_affinity = if let Some(queue_affinity) = disk_cfg.queue_affinity.as_ref() {
2409                 queue_affinity
2410                     .iter()
2411                     .map(|a| (a.queue_index, a.host_cpus.clone()))
2412                     .collect()
2413             } else {
2414                 BTreeMap::new()
2415             };
2416 
2417             let virtio_block = Arc::new(Mutex::new(
2418                 virtio_devices::Block::new(
2419                     id.clone(),
2420                     image,
2421                     disk_cfg
2422                         .path
2423                         .as_ref()
2424                         .ok_or(DeviceManagerError::NoDiskPath)?
2425                         .clone(),
2426                     disk_cfg.readonly,
2427                     self.force_iommu | disk_cfg.iommu,
2428                     disk_cfg.num_queues,
2429                     disk_cfg.queue_size,
2430                     disk_cfg.serial.clone(),
2431                     self.seccomp_action.clone(),
2432                     rate_limit_group,
2433                     self.exit_evt
2434                         .try_clone()
2435                         .map_err(DeviceManagerError::EventFd)?,
2436                     state_from_id(self.snapshot.as_ref(), id.as_str())
2437                         .map_err(DeviceManagerError::RestoreGetState)?,
2438                     queue_affinity,
2439                 )
2440                 .map_err(DeviceManagerError::CreateVirtioBlock)?,
2441             ));
2442 
2443             (
2444                 Arc::clone(&virtio_block) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2445                 virtio_block as Arc<Mutex<dyn Migratable>>,
2446             )
2447         };
2448 
2449         // Fill the device tree with a new node. In case of restore, we
2450         // know there is nothing to do, so we can simply override the
2451         // existing entry.
2452         self.device_tree
2453             .lock()
2454             .unwrap()
2455             .insert(id.clone(), device_node!(id, migratable_device));
2456 
2457         Ok(MetaVirtioDevice {
2458             virtio_device,
2459             iommu: disk_cfg.iommu,
2460             id,
2461             pci_segment: disk_cfg.pci_segment,
2462             dma_handler: None,
2463         })
2464     }
2465 
2466     fn make_virtio_block_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
2467         let mut devices = Vec::new();
2468 
2469         let mut block_devices = self.config.lock().unwrap().disks.clone();
2470         if let Some(disk_list_cfg) = &mut block_devices {
2471             for disk_cfg in disk_list_cfg.iter_mut() {
2472                 devices.push(self.make_virtio_block_device(disk_cfg)?);
2473             }
2474         }
2475         self.config.lock().unwrap().disks = block_devices;
2476 
2477         Ok(devices)
2478     }
2479 
2480     fn make_virtio_net_device(
2481         &mut self,
2482         net_cfg: &mut NetConfig,
2483     ) -> DeviceManagerResult<MetaVirtioDevice> {
2484         let id = if let Some(id) = &net_cfg.id {
2485             id.clone()
2486         } else {
2487             let id = self.next_device_name(NET_DEVICE_NAME_PREFIX)?;
2488             net_cfg.id = Some(id.clone());
2489             id
2490         };
2491         info!("Creating virtio-net device: {:?}", net_cfg);
2492 
2493         let (virtio_device, migratable_device) = if net_cfg.vhost_user {
2494             let socket = net_cfg.vhost_socket.as_ref().unwrap().clone();
2495             let vu_cfg = VhostUserConfig {
2496                 socket,
2497                 num_queues: net_cfg.num_queues,
2498                 queue_size: net_cfg.queue_size,
2499             };
2500             let server = match net_cfg.vhost_mode {
2501                 VhostMode::Client => false,
2502                 VhostMode::Server => true,
2503             };
2504             let vhost_user_net = Arc::new(Mutex::new(
2505                 match virtio_devices::vhost_user::Net::new(
2506                     id.clone(),
2507                     net_cfg.mac,
2508                     net_cfg.mtu,
2509                     vu_cfg,
2510                     server,
2511                     self.seccomp_action.clone(),
2512                     self.exit_evt
2513                         .try_clone()
2514                         .map_err(DeviceManagerError::EventFd)?,
2515                     self.force_iommu,
2516                     state_from_id(self.snapshot.as_ref(), id.as_str())
2517                         .map_err(DeviceManagerError::RestoreGetState)?,
2518                     net_cfg.offload_tso,
2519                     net_cfg.offload_ufo,
2520                     net_cfg.offload_csum,
2521                 ) {
2522                     Ok(vun_device) => vun_device,
2523                     Err(e) => {
2524                         return Err(DeviceManagerError::CreateVhostUserNet(e));
2525                     }
2526                 },
2527             ));
2528 
2529             (
2530                 Arc::clone(&vhost_user_net) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2531                 vhost_user_net as Arc<Mutex<dyn Migratable>>,
2532             )
2533         } else {
2534             let state = state_from_id(self.snapshot.as_ref(), id.as_str())
2535                 .map_err(DeviceManagerError::RestoreGetState)?;
2536             let virtio_net = if let Some(ref tap_if_name) = net_cfg.tap {
2537                 Arc::new(Mutex::new(
2538                     virtio_devices::Net::new(
2539                         id.clone(),
2540                         Some(tap_if_name),
2541                         Some(net_cfg.ip),
2542                         Some(net_cfg.mask),
2543                         Some(net_cfg.mac),
2544                         &mut net_cfg.host_mac,
2545                         net_cfg.mtu,
2546                         self.force_iommu | net_cfg.iommu,
2547                         net_cfg.num_queues,
2548                         net_cfg.queue_size,
2549                         self.seccomp_action.clone(),
2550                         net_cfg.rate_limiter_config,
2551                         self.exit_evt
2552                             .try_clone()
2553                             .map_err(DeviceManagerError::EventFd)?,
2554                         state,
2555                         net_cfg.offload_tso,
2556                         net_cfg.offload_ufo,
2557                         net_cfg.offload_csum,
2558                     )
2559                     .map_err(DeviceManagerError::CreateVirtioNet)?,
2560                 ))
2561             } else if let Some(fds) = &net_cfg.fds {
2562                 let net = virtio_devices::Net::from_tap_fds(
2563                     id.clone(),
2564                     fds,
2565                     Some(net_cfg.mac),
2566                     net_cfg.mtu,
2567                     self.force_iommu | net_cfg.iommu,
2568                     net_cfg.queue_size,
2569                     self.seccomp_action.clone(),
2570                     net_cfg.rate_limiter_config,
2571                     self.exit_evt
2572                         .try_clone()
2573                         .map_err(DeviceManagerError::EventFd)?,
2574                     state,
2575                     net_cfg.offload_tso,
2576                     net_cfg.offload_ufo,
2577                     net_cfg.offload_csum,
2578                 )
2579                 .map_err(DeviceManagerError::CreateVirtioNet)?;
2580 
2581                 // SAFETY: 'fds' are valid because TAP devices are created successfully
2582                 unsafe {
2583                     self.config.lock().unwrap().add_preserved_fds(fds.clone());
2584                 }
2585 
2586                 Arc::new(Mutex::new(net))
2587             } else {
2588                 Arc::new(Mutex::new(
2589                     virtio_devices::Net::new(
2590                         id.clone(),
2591                         None,
2592                         Some(net_cfg.ip),
2593                         Some(net_cfg.mask),
2594                         Some(net_cfg.mac),
2595                         &mut net_cfg.host_mac,
2596                         net_cfg.mtu,
2597                         self.force_iommu | net_cfg.iommu,
2598                         net_cfg.num_queues,
2599                         net_cfg.queue_size,
2600                         self.seccomp_action.clone(),
2601                         net_cfg.rate_limiter_config,
2602                         self.exit_evt
2603                             .try_clone()
2604                             .map_err(DeviceManagerError::EventFd)?,
2605                         state,
2606                         net_cfg.offload_tso,
2607                         net_cfg.offload_ufo,
2608                         net_cfg.offload_csum,
2609                     )
2610                     .map_err(DeviceManagerError::CreateVirtioNet)?,
2611                 ))
2612             };
2613 
2614             (
2615                 Arc::clone(&virtio_net) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2616                 virtio_net as Arc<Mutex<dyn Migratable>>,
2617             )
2618         };
2619 
2620         // Fill the device tree with a new node. In case of restore, we
2621         // know there is nothing to do, so we can simply override the
2622         // existing entry.
2623         self.device_tree
2624             .lock()
2625             .unwrap()
2626             .insert(id.clone(), device_node!(id, migratable_device));
2627 
2628         Ok(MetaVirtioDevice {
2629             virtio_device,
2630             iommu: net_cfg.iommu,
2631             id,
2632             pci_segment: net_cfg.pci_segment,
2633             dma_handler: None,
2634         })
2635     }
2636 
2637     /// Add virto-net and vhost-user-net devices
2638     fn make_virtio_net_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
2639         let mut devices = Vec::new();
2640         let mut net_devices = self.config.lock().unwrap().net.clone();
2641         if let Some(net_list_cfg) = &mut net_devices {
2642             for net_cfg in net_list_cfg.iter_mut() {
2643                 devices.push(self.make_virtio_net_device(net_cfg)?);
2644             }
2645         }
2646         self.config.lock().unwrap().net = net_devices;
2647 
2648         Ok(devices)
2649     }
2650 
2651     fn make_virtio_rng_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
2652         let mut devices = Vec::new();
2653 
2654         // Add virtio-rng if required
2655         let rng_config = self.config.lock().unwrap().rng.clone();
2656         if let Some(rng_path) = rng_config.src.to_str() {
2657             info!("Creating virtio-rng device: {:?}", rng_config);
2658             let id = String::from(RNG_DEVICE_NAME);
2659 
2660             let virtio_rng_device = Arc::new(Mutex::new(
2661                 virtio_devices::Rng::new(
2662                     id.clone(),
2663                     rng_path,
2664                     self.force_iommu | rng_config.iommu,
2665                     self.seccomp_action.clone(),
2666                     self.exit_evt
2667                         .try_clone()
2668                         .map_err(DeviceManagerError::EventFd)?,
2669                     state_from_id(self.snapshot.as_ref(), id.as_str())
2670                         .map_err(DeviceManagerError::RestoreGetState)?,
2671                 )
2672                 .map_err(DeviceManagerError::CreateVirtioRng)?,
2673             ));
2674             devices.push(MetaVirtioDevice {
2675                 virtio_device: Arc::clone(&virtio_rng_device)
2676                     as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2677                 iommu: rng_config.iommu,
2678                 id: id.clone(),
2679                 pci_segment: 0,
2680                 dma_handler: None,
2681             });
2682 
2683             // Fill the device tree with a new node. In case of restore, we
2684             // know there is nothing to do, so we can simply override the
2685             // existing entry.
2686             self.device_tree
2687                 .lock()
2688                 .unwrap()
2689                 .insert(id.clone(), device_node!(id, virtio_rng_device));
2690         }
2691 
2692         Ok(devices)
2693     }
2694 
2695     fn make_virtio_fs_device(
2696         &mut self,
2697         fs_cfg: &mut FsConfig,
2698     ) -> DeviceManagerResult<MetaVirtioDevice> {
2699         let id = if let Some(id) = &fs_cfg.id {
2700             id.clone()
2701         } else {
2702             let id = self.next_device_name(FS_DEVICE_NAME_PREFIX)?;
2703             fs_cfg.id = Some(id.clone());
2704             id
2705         };
2706 
2707         info!("Creating virtio-fs device: {:?}", fs_cfg);
2708 
2709         let mut node = device_node!(id);
2710 
2711         if let Some(fs_socket) = fs_cfg.socket.to_str() {
2712             let virtio_fs_device = Arc::new(Mutex::new(
2713                 virtio_devices::vhost_user::Fs::new(
2714                     id.clone(),
2715                     fs_socket,
2716                     &fs_cfg.tag,
2717                     fs_cfg.num_queues,
2718                     fs_cfg.queue_size,
2719                     None,
2720                     self.seccomp_action.clone(),
2721                     self.exit_evt
2722                         .try_clone()
2723                         .map_err(DeviceManagerError::EventFd)?,
2724                     self.force_iommu,
2725                     state_from_id(self.snapshot.as_ref(), id.as_str())
2726                         .map_err(DeviceManagerError::RestoreGetState)?,
2727                 )
2728                 .map_err(DeviceManagerError::CreateVirtioFs)?,
2729             ));
2730 
2731             // Update the device tree with the migratable device.
2732             node.migratable = Some(Arc::clone(&virtio_fs_device) as Arc<Mutex<dyn Migratable>>);
2733             self.device_tree.lock().unwrap().insert(id.clone(), node);
2734 
2735             Ok(MetaVirtioDevice {
2736                 virtio_device: Arc::clone(&virtio_fs_device)
2737                     as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2738                 iommu: false,
2739                 id,
2740                 pci_segment: fs_cfg.pci_segment,
2741                 dma_handler: None,
2742             })
2743         } else {
2744             Err(DeviceManagerError::NoVirtioFsSock)
2745         }
2746     }
2747 
2748     fn make_virtio_fs_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
2749         let mut devices = Vec::new();
2750 
2751         let mut fs_devices = self.config.lock().unwrap().fs.clone();
2752         if let Some(fs_list_cfg) = &mut fs_devices {
2753             for fs_cfg in fs_list_cfg.iter_mut() {
2754                 devices.push(self.make_virtio_fs_device(fs_cfg)?);
2755             }
2756         }
2757         self.config.lock().unwrap().fs = fs_devices;
2758 
2759         Ok(devices)
2760     }
2761 
2762     fn make_virtio_pmem_device(
2763         &mut self,
2764         pmem_cfg: &mut PmemConfig,
2765     ) -> DeviceManagerResult<MetaVirtioDevice> {
2766         let id = if let Some(id) = &pmem_cfg.id {
2767             id.clone()
2768         } else {
2769             let id = self.next_device_name(PMEM_DEVICE_NAME_PREFIX)?;
2770             pmem_cfg.id = Some(id.clone());
2771             id
2772         };
2773 
2774         info!("Creating virtio-pmem device: {:?}", pmem_cfg);
2775 
2776         let mut node = device_node!(id);
2777 
2778         // Look for the id in the device tree. If it can be found, that means
2779         // the device is being restored, otherwise it's created from scratch.
2780         let region_range = if let Some(node) = self.device_tree.lock().unwrap().get(&id) {
2781             info!("Restoring virtio-pmem {} resources", id);
2782 
2783             let mut region_range: Option<(u64, u64)> = None;
2784             for resource in node.resources.iter() {
2785                 match resource {
2786                     Resource::MmioAddressRange { base, size } => {
2787                         if region_range.is_some() {
2788                             return Err(DeviceManagerError::ResourceAlreadyExists);
2789                         }
2790 
2791                         region_range = Some((*base, *size));
2792                     }
2793                     _ => {
2794                         error!("Unexpected resource {:?} for {}", resource, id);
2795                     }
2796                 }
2797             }
2798 
2799             if region_range.is_none() {
2800                 return Err(DeviceManagerError::MissingVirtioPmemResources);
2801             }
2802 
2803             region_range
2804         } else {
2805             None
2806         };
2807 
2808         let (custom_flags, set_len) = if pmem_cfg.file.is_dir() {
2809             if pmem_cfg.size.is_none() {
2810                 return Err(DeviceManagerError::PmemWithDirectorySizeMissing);
2811             }
2812             (O_TMPFILE, true)
2813         } else {
2814             (0, false)
2815         };
2816 
2817         let mut file = OpenOptions::new()
2818             .read(true)
2819             .write(!pmem_cfg.discard_writes)
2820             .custom_flags(custom_flags)
2821             .open(&pmem_cfg.file)
2822             .map_err(DeviceManagerError::PmemFileOpen)?;
2823 
2824         let size = if let Some(size) = pmem_cfg.size {
2825             if set_len {
2826                 file.set_len(size)
2827                     .map_err(DeviceManagerError::PmemFileSetLen)?;
2828             }
2829             size
2830         } else {
2831             file.seek(SeekFrom::End(0))
2832                 .map_err(DeviceManagerError::PmemFileSetLen)?
2833         };
2834 
2835         if size % 0x20_0000 != 0 {
2836             return Err(DeviceManagerError::PmemSizeNotAligned);
2837         }
2838 
2839         let (region_base, region_size) = if let Some((base, size)) = region_range {
2840             // The memory needs to be 2MiB aligned in order to support
2841             // hugepages.
2842             self.pci_segments[pmem_cfg.pci_segment as usize]
2843                 .mem64_allocator
2844                 .lock()
2845                 .unwrap()
2846                 .allocate(
2847                     Some(GuestAddress(base)),
2848                     size as GuestUsize,
2849                     Some(0x0020_0000),
2850                 )
2851                 .ok_or(DeviceManagerError::PmemRangeAllocation)?;
2852 
2853             (base, size)
2854         } else {
2855             // The memory needs to be 2MiB aligned in order to support
2856             // hugepages.
2857             let base = self.pci_segments[pmem_cfg.pci_segment as usize]
2858                 .mem64_allocator
2859                 .lock()
2860                 .unwrap()
2861                 .allocate(None, size as GuestUsize, Some(0x0020_0000))
2862                 .ok_or(DeviceManagerError::PmemRangeAllocation)?;
2863 
2864             (base.raw_value(), size)
2865         };
2866 
2867         let cloned_file = file.try_clone().map_err(DeviceManagerError::CloneFile)?;
2868         let mmap_region = MmapRegion::build(
2869             Some(FileOffset::new(cloned_file, 0)),
2870             region_size as usize,
2871             PROT_READ | PROT_WRITE,
2872             MAP_NORESERVE
2873                 | if pmem_cfg.discard_writes {
2874                     MAP_PRIVATE
2875                 } else {
2876                     MAP_SHARED
2877                 },
2878         )
2879         .map_err(DeviceManagerError::NewMmapRegion)?;
2880         let host_addr: u64 = mmap_region.as_ptr() as u64;
2881 
2882         let mem_slot = self
2883             .memory_manager
2884             .lock()
2885             .unwrap()
2886             .create_userspace_mapping(region_base, region_size, host_addr, false, false, false)
2887             .map_err(DeviceManagerError::MemoryManager)?;
2888 
2889         let mapping = virtio_devices::UserspaceMapping {
2890             host_addr,
2891             mem_slot,
2892             addr: GuestAddress(region_base),
2893             len: region_size,
2894             mergeable: false,
2895         };
2896 
2897         let virtio_pmem_device = Arc::new(Mutex::new(
2898             virtio_devices::Pmem::new(
2899                 id.clone(),
2900                 file,
2901                 GuestAddress(region_base),
2902                 mapping,
2903                 mmap_region,
2904                 self.force_iommu | pmem_cfg.iommu,
2905                 self.seccomp_action.clone(),
2906                 self.exit_evt
2907                     .try_clone()
2908                     .map_err(DeviceManagerError::EventFd)?,
2909                 state_from_id(self.snapshot.as_ref(), id.as_str())
2910                     .map_err(DeviceManagerError::RestoreGetState)?,
2911             )
2912             .map_err(DeviceManagerError::CreateVirtioPmem)?,
2913         ));
2914 
2915         // Update the device tree with correct resource information and with
2916         // the migratable device.
2917         node.resources.push(Resource::MmioAddressRange {
2918             base: region_base,
2919             size: region_size,
2920         });
2921         node.migratable = Some(Arc::clone(&virtio_pmem_device) as Arc<Mutex<dyn Migratable>>);
2922         self.device_tree.lock().unwrap().insert(id.clone(), node);
2923 
2924         Ok(MetaVirtioDevice {
2925             virtio_device: Arc::clone(&virtio_pmem_device)
2926                 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2927             iommu: pmem_cfg.iommu,
2928             id,
2929             pci_segment: pmem_cfg.pci_segment,
2930             dma_handler: None,
2931         })
2932     }
2933 
2934     fn make_virtio_pmem_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
2935         let mut devices = Vec::new();
2936         // Add virtio-pmem if required
2937         let mut pmem_devices = self.config.lock().unwrap().pmem.clone();
2938         if let Some(pmem_list_cfg) = &mut pmem_devices {
2939             for pmem_cfg in pmem_list_cfg.iter_mut() {
2940                 devices.push(self.make_virtio_pmem_device(pmem_cfg)?);
2941             }
2942         }
2943         self.config.lock().unwrap().pmem = pmem_devices;
2944 
2945         Ok(devices)
2946     }
2947 
2948     fn make_virtio_vsock_device(
2949         &mut self,
2950         vsock_cfg: &mut VsockConfig,
2951     ) -> DeviceManagerResult<MetaVirtioDevice> {
2952         let id = if let Some(id) = &vsock_cfg.id {
2953             id.clone()
2954         } else {
2955             let id = self.next_device_name(VSOCK_DEVICE_NAME_PREFIX)?;
2956             vsock_cfg.id = Some(id.clone());
2957             id
2958         };
2959 
2960         info!("Creating virtio-vsock device: {:?}", vsock_cfg);
2961 
2962         let socket_path = vsock_cfg
2963             .socket
2964             .to_str()
2965             .ok_or(DeviceManagerError::CreateVsockConvertPath)?;
2966         let backend =
2967             virtio_devices::vsock::VsockUnixBackend::new(vsock_cfg.cid, socket_path.to_string())
2968                 .map_err(DeviceManagerError::CreateVsockBackend)?;
2969 
2970         let vsock_device = Arc::new(Mutex::new(
2971             virtio_devices::Vsock::new(
2972                 id.clone(),
2973                 vsock_cfg.cid,
2974                 vsock_cfg.socket.clone(),
2975                 backend,
2976                 self.force_iommu | vsock_cfg.iommu,
2977                 self.seccomp_action.clone(),
2978                 self.exit_evt
2979                     .try_clone()
2980                     .map_err(DeviceManagerError::EventFd)?,
2981                 state_from_id(self.snapshot.as_ref(), id.as_str())
2982                     .map_err(DeviceManagerError::RestoreGetState)?,
2983             )
2984             .map_err(DeviceManagerError::CreateVirtioVsock)?,
2985         ));
2986 
2987         // Fill the device tree with a new node. In case of restore, we
2988         // know there is nothing to do, so we can simply override the
2989         // existing entry.
2990         self.device_tree
2991             .lock()
2992             .unwrap()
2993             .insert(id.clone(), device_node!(id, vsock_device));
2994 
2995         Ok(MetaVirtioDevice {
2996             virtio_device: Arc::clone(&vsock_device)
2997                 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2998             iommu: vsock_cfg.iommu,
2999             id,
3000             pci_segment: vsock_cfg.pci_segment,
3001             dma_handler: None,
3002         })
3003     }
3004 
3005     fn make_virtio_vsock_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
3006         let mut devices = Vec::new();
3007 
3008         let mut vsock = self.config.lock().unwrap().vsock.clone();
3009         if let Some(ref mut vsock_cfg) = &mut vsock {
3010             devices.push(self.make_virtio_vsock_device(vsock_cfg)?);
3011         }
3012         self.config.lock().unwrap().vsock = vsock;
3013 
3014         Ok(devices)
3015     }
3016 
3017     fn make_virtio_mem_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
3018         let mut devices = Vec::new();
3019 
3020         let mm = self.memory_manager.clone();
3021         let mut mm = mm.lock().unwrap();
3022         for (memory_zone_id, memory_zone) in mm.memory_zones_mut().iter_mut() {
3023             if let Some(virtio_mem_zone) = memory_zone.virtio_mem_zone_mut() {
3024                 info!("Creating virtio-mem device: id = {}", memory_zone_id);
3025 
3026                 let node_id = numa_node_id_from_memory_zone_id(&self.numa_nodes, memory_zone_id)
3027                     .map(|i| i as u16);
3028 
3029                 let virtio_mem_device = Arc::new(Mutex::new(
3030                     virtio_devices::Mem::new(
3031                         memory_zone_id.clone(),
3032                         virtio_mem_zone.region(),
3033                         self.seccomp_action.clone(),
3034                         node_id,
3035                         virtio_mem_zone.hotplugged_size(),
3036                         virtio_mem_zone.hugepages(),
3037                         self.exit_evt
3038                             .try_clone()
3039                             .map_err(DeviceManagerError::EventFd)?,
3040                         virtio_mem_zone.blocks_state().clone(),
3041                         state_from_id(self.snapshot.as_ref(), memory_zone_id.as_str())
3042                             .map_err(DeviceManagerError::RestoreGetState)?,
3043                     )
3044                     .map_err(DeviceManagerError::CreateVirtioMem)?,
3045                 ));
3046 
3047                 // Update the virtio-mem zone so that it has a handle onto the
3048                 // virtio-mem device, which will be used for triggering a resize
3049                 // if needed.
3050                 virtio_mem_zone.set_virtio_device(Arc::clone(&virtio_mem_device));
3051 
3052                 self.virtio_mem_devices.push(Arc::clone(&virtio_mem_device));
3053 
3054                 devices.push(MetaVirtioDevice {
3055                     virtio_device: Arc::clone(&virtio_mem_device)
3056                         as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
3057                     iommu: false,
3058                     id: memory_zone_id.clone(),
3059                     pci_segment: 0,
3060                     dma_handler: None,
3061                 });
3062 
3063                 // Fill the device tree with a new node. In case of restore, we
3064                 // know there is nothing to do, so we can simply override the
3065                 // existing entry.
3066                 self.device_tree.lock().unwrap().insert(
3067                     memory_zone_id.clone(),
3068                     device_node!(memory_zone_id, virtio_mem_device),
3069                 );
3070             }
3071         }
3072 
3073         Ok(devices)
3074     }
3075 
3076     #[cfg(feature = "pvmemcontrol")]
3077     fn make_pvmemcontrol_device(
3078         &mut self,
3079     ) -> DeviceManagerResult<(
3080         Arc<PvmemcontrolBusDevice>,
3081         Arc<Mutex<PvmemcontrolPciDevice>>,
3082     )> {
3083         let id = String::from(PVMEMCONTROL_DEVICE_NAME);
3084         let pci_segment_id = 0x0_u16;
3085 
3086         let (pci_segment_id, pci_device_bdf, resources) =
3087             self.pci_resources(&id, pci_segment_id)?;
3088 
3089         info!("Creating pvmemcontrol device: id = {}", id);
3090         let (pvmemcontrol_pci_device, pvmemcontrol_bus_device) =
3091             devices::pvmemcontrol::PvmemcontrolDevice::make_device(
3092                 id.clone(),
3093                 self.memory_manager.lock().unwrap().guest_memory(),
3094             );
3095 
3096         let pvmemcontrol_pci_device = Arc::new(Mutex::new(pvmemcontrol_pci_device));
3097         let pvmemcontrol_bus_device = Arc::new(pvmemcontrol_bus_device);
3098 
3099         let new_resources = self.add_pci_device(
3100             pvmemcontrol_bus_device.clone(),
3101             pvmemcontrol_pci_device.clone(),
3102             pci_segment_id,
3103             pci_device_bdf,
3104             resources,
3105         )?;
3106 
3107         let mut node = device_node!(id, pvmemcontrol_pci_device);
3108 
3109         node.resources = new_resources;
3110         node.pci_bdf = Some(pci_device_bdf);
3111         node.pci_device_handle = None;
3112 
3113         self.device_tree.lock().unwrap().insert(id, node);
3114 
3115         Ok((pvmemcontrol_bus_device, pvmemcontrol_pci_device))
3116     }
3117 
3118     fn make_virtio_balloon_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
3119         let mut devices = Vec::new();
3120 
3121         if let Some(balloon_config) = &self.config.lock().unwrap().balloon {
3122             let id = String::from(BALLOON_DEVICE_NAME);
3123             info!("Creating virtio-balloon device: id = {}", id);
3124 
3125             let virtio_balloon_device = Arc::new(Mutex::new(
3126                 virtio_devices::Balloon::new(
3127                     id.clone(),
3128                     balloon_config.size,
3129                     balloon_config.deflate_on_oom,
3130                     balloon_config.free_page_reporting,
3131                     self.seccomp_action.clone(),
3132                     self.exit_evt
3133                         .try_clone()
3134                         .map_err(DeviceManagerError::EventFd)?,
3135                     state_from_id(self.snapshot.as_ref(), id.as_str())
3136                         .map_err(DeviceManagerError::RestoreGetState)?,
3137                 )
3138                 .map_err(DeviceManagerError::CreateVirtioBalloon)?,
3139             ));
3140 
3141             self.balloon = Some(virtio_balloon_device.clone());
3142 
3143             devices.push(MetaVirtioDevice {
3144                 virtio_device: Arc::clone(&virtio_balloon_device)
3145                     as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
3146                 iommu: false,
3147                 id: id.clone(),
3148                 pci_segment: 0,
3149                 dma_handler: None,
3150             });
3151 
3152             self.device_tree
3153                 .lock()
3154                 .unwrap()
3155                 .insert(id.clone(), device_node!(id, virtio_balloon_device));
3156         }
3157 
3158         Ok(devices)
3159     }
3160 
3161     fn make_virtio_watchdog_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
3162         let mut devices = Vec::new();
3163 
3164         if !self.config.lock().unwrap().watchdog {
3165             return Ok(devices);
3166         }
3167 
3168         let id = String::from(WATCHDOG_DEVICE_NAME);
3169         info!("Creating virtio-watchdog device: id = {}", id);
3170 
3171         let virtio_watchdog_device = Arc::new(Mutex::new(
3172             virtio_devices::Watchdog::new(
3173                 id.clone(),
3174                 self.reset_evt.try_clone().unwrap(),
3175                 self.seccomp_action.clone(),
3176                 self.exit_evt
3177                     .try_clone()
3178                     .map_err(DeviceManagerError::EventFd)?,
3179                 state_from_id(self.snapshot.as_ref(), id.as_str())
3180                     .map_err(DeviceManagerError::RestoreGetState)?,
3181             )
3182             .map_err(DeviceManagerError::CreateVirtioWatchdog)?,
3183         ));
3184         devices.push(MetaVirtioDevice {
3185             virtio_device: Arc::clone(&virtio_watchdog_device)
3186                 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
3187             iommu: false,
3188             id: id.clone(),
3189             pci_segment: 0,
3190             dma_handler: None,
3191         });
3192 
3193         self.device_tree
3194             .lock()
3195             .unwrap()
3196             .insert(id.clone(), device_node!(id, virtio_watchdog_device));
3197 
3198         Ok(devices)
3199     }
3200 
3201     fn make_vdpa_device(
3202         &mut self,
3203         vdpa_cfg: &mut VdpaConfig,
3204     ) -> DeviceManagerResult<MetaVirtioDevice> {
3205         let id = if let Some(id) = &vdpa_cfg.id {
3206             id.clone()
3207         } else {
3208             let id = self.next_device_name(VDPA_DEVICE_NAME_PREFIX)?;
3209             vdpa_cfg.id = Some(id.clone());
3210             id
3211         };
3212 
3213         info!("Creating vDPA device: {:?}", vdpa_cfg);
3214 
3215         let device_path = vdpa_cfg
3216             .path
3217             .to_str()
3218             .ok_or(DeviceManagerError::CreateVdpaConvertPath)?;
3219 
3220         let vdpa_device = Arc::new(Mutex::new(
3221             virtio_devices::Vdpa::new(
3222                 id.clone(),
3223                 device_path,
3224                 self.memory_manager.lock().unwrap().guest_memory(),
3225                 vdpa_cfg.num_queues as u16,
3226                 state_from_id(self.snapshot.as_ref(), id.as_str())
3227                     .map_err(DeviceManagerError::RestoreGetState)?,
3228             )
3229             .map_err(DeviceManagerError::CreateVdpa)?,
3230         ));
3231 
3232         // Create the DMA handler that is required by the vDPA device
3233         let vdpa_mapping = Arc::new(VdpaDmaMapping::new(
3234             Arc::clone(&vdpa_device),
3235             Arc::new(self.memory_manager.lock().unwrap().guest_memory()),
3236         ));
3237 
3238         self.device_tree
3239             .lock()
3240             .unwrap()
3241             .insert(id.clone(), device_node!(id, vdpa_device));
3242 
3243         Ok(MetaVirtioDevice {
3244             virtio_device: vdpa_device as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
3245             iommu: vdpa_cfg.iommu,
3246             id,
3247             pci_segment: vdpa_cfg.pci_segment,
3248             dma_handler: Some(vdpa_mapping),
3249         })
3250     }
3251 
3252     fn make_vdpa_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
3253         let mut devices = Vec::new();
3254         // Add vdpa if required
3255         let mut vdpa_devices = self.config.lock().unwrap().vdpa.clone();
3256         if let Some(vdpa_list_cfg) = &mut vdpa_devices {
3257             for vdpa_cfg in vdpa_list_cfg.iter_mut() {
3258                 devices.push(self.make_vdpa_device(vdpa_cfg)?);
3259             }
3260         }
3261         self.config.lock().unwrap().vdpa = vdpa_devices;
3262 
3263         Ok(devices)
3264     }
3265 
3266     fn next_device_name(&mut self, prefix: &str) -> DeviceManagerResult<String> {
3267         let start_id = self.device_id_cnt;
3268         loop {
3269             // Generate the temporary name.
3270             let name = format!("{}{}", prefix, self.device_id_cnt);
3271             // Increment the counter.
3272             self.device_id_cnt += Wrapping(1);
3273             // Check if the name is already in use.
3274             if !self.boot_id_list.contains(&name)
3275                 && !self.device_tree.lock().unwrap().contains_key(&name)
3276             {
3277                 return Ok(name);
3278             }
3279 
3280             if self.device_id_cnt == start_id {
3281                 // We went through a full loop and there's nothing else we can
3282                 // do.
3283                 break;
3284             }
3285         }
3286         Err(DeviceManagerError::NoAvailableDeviceName)
3287     }
3288 
3289     fn add_passthrough_device(
3290         &mut self,
3291         device_cfg: &mut DeviceConfig,
3292     ) -> DeviceManagerResult<(PciBdf, String)> {
3293         // If the passthrough device has not been created yet, it is created
3294         // here and stored in the DeviceManager structure for future needs.
3295         if self.passthrough_device.is_none() {
3296             self.passthrough_device = Some(
3297                 self.address_manager
3298                     .vm
3299                     .create_passthrough_device()
3300                     .map_err(|e| DeviceManagerError::CreatePassthroughDevice(e.into()))?,
3301             );
3302         }
3303 
3304         self.add_vfio_device(device_cfg)
3305     }
3306 
3307     fn create_vfio_container(&self) -> DeviceManagerResult<Arc<VfioContainer>> {
3308         let passthrough_device = self
3309             .passthrough_device
3310             .as_ref()
3311             .ok_or(DeviceManagerError::NoDevicePassthroughSupport)?;
3312 
3313         let dup = passthrough_device
3314             .try_clone()
3315             .map_err(DeviceManagerError::VfioCreate)?;
3316 
3317         Ok(Arc::new(
3318             VfioContainer::new(Some(Arc::new(dup))).map_err(DeviceManagerError::VfioCreate)?,
3319         ))
3320     }
3321 
3322     fn add_vfio_device(
3323         &mut self,
3324         device_cfg: &mut DeviceConfig,
3325     ) -> DeviceManagerResult<(PciBdf, String)> {
3326         let vfio_name = if let Some(id) = &device_cfg.id {
3327             id.clone()
3328         } else {
3329             let id = self.next_device_name(VFIO_DEVICE_NAME_PREFIX)?;
3330             device_cfg.id = Some(id.clone());
3331             id
3332         };
3333 
3334         let (pci_segment_id, pci_device_bdf, resources) =
3335             self.pci_resources(&vfio_name, device_cfg.pci_segment)?;
3336 
3337         let mut needs_dma_mapping = false;
3338 
3339         // Here we create a new VFIO container for two reasons. Either this is
3340         // the first VFIO device, meaning we need a new VFIO container, which
3341         // will be shared with other VFIO devices. Or the new VFIO device is
3342         // attached to a vIOMMU, meaning we must create a dedicated VFIO
3343         // container. In the vIOMMU use case, we can't let all devices under
3344         // the same VFIO container since we couldn't map/unmap memory for each
3345         // device. That's simply because the map/unmap operations happen at the
3346         // VFIO container level.
3347         let vfio_container = if device_cfg.iommu {
3348             let vfio_container = self.create_vfio_container()?;
3349 
3350             let vfio_mapping = Arc::new(VfioDmaMapping::new(
3351                 Arc::clone(&vfio_container),
3352                 Arc::new(self.memory_manager.lock().unwrap().guest_memory()),
3353                 Arc::clone(&self.mmio_regions),
3354             ));
3355 
3356             if let Some(iommu) = &self.iommu_device {
3357                 iommu
3358                     .lock()
3359                     .unwrap()
3360                     .add_external_mapping(pci_device_bdf.into(), vfio_mapping);
3361             } else {
3362                 return Err(DeviceManagerError::MissingVirtualIommu);
3363             }
3364 
3365             vfio_container
3366         } else if let Some(vfio_container) = &self.vfio_container {
3367             Arc::clone(vfio_container)
3368         } else {
3369             let vfio_container = self.create_vfio_container()?;
3370             needs_dma_mapping = true;
3371             self.vfio_container = Some(Arc::clone(&vfio_container));
3372 
3373             vfio_container
3374         };
3375 
3376         let vfio_device = VfioDevice::new(&device_cfg.path, Arc::clone(&vfio_container))
3377             .map_err(DeviceManagerError::VfioCreate)?;
3378 
3379         if needs_dma_mapping {
3380             // Register DMA mapping in IOMMU.
3381             // Do not register virtio-mem regions, as they are handled directly by
3382             // virtio-mem device itself.
3383             for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() {
3384                 for region in zone.regions() {
3385                     vfio_container
3386                         .vfio_dma_map(
3387                             region.start_addr().raw_value(),
3388                             region.len(),
3389                             region.as_ptr() as u64,
3390                         )
3391                         .map_err(DeviceManagerError::VfioDmaMap)?;
3392                 }
3393             }
3394 
3395             let vfio_mapping = Arc::new(VfioDmaMapping::new(
3396                 Arc::clone(&vfio_container),
3397                 Arc::new(self.memory_manager.lock().unwrap().guest_memory()),
3398                 Arc::clone(&self.mmio_regions),
3399             ));
3400 
3401             for virtio_mem_device in self.virtio_mem_devices.iter() {
3402                 virtio_mem_device
3403                     .lock()
3404                     .unwrap()
3405                     .add_dma_mapping_handler(
3406                         VirtioMemMappingSource::Container,
3407                         vfio_mapping.clone(),
3408                     )
3409                     .map_err(DeviceManagerError::AddDmaMappingHandlerVirtioMem)?;
3410             }
3411         }
3412 
3413         let legacy_interrupt_group =
3414             if let Some(legacy_interrupt_manager) = &self.legacy_interrupt_manager {
3415                 Some(
3416                     legacy_interrupt_manager
3417                         .create_group(LegacyIrqGroupConfig {
3418                             irq: self.pci_segments[pci_segment_id as usize].pci_irq_slots
3419                                 [pci_device_bdf.device() as usize]
3420                                 as InterruptIndex,
3421                         })
3422                         .map_err(DeviceManagerError::CreateInterruptGroup)?,
3423                 )
3424             } else {
3425                 None
3426             };
3427 
3428         let memory_manager = self.memory_manager.clone();
3429 
3430         let vfio_pci_device = VfioPciDevice::new(
3431             vfio_name.clone(),
3432             &self.address_manager.vm,
3433             vfio_device,
3434             vfio_container,
3435             self.msi_interrupt_manager.clone(),
3436             legacy_interrupt_group,
3437             device_cfg.iommu,
3438             pci_device_bdf,
3439             Arc::new(move || memory_manager.lock().unwrap().allocate_memory_slot()),
3440             vm_migration::snapshot_from_id(self.snapshot.as_ref(), vfio_name.as_str()),
3441             device_cfg.x_nv_gpudirect_clique,
3442         )
3443         .map_err(DeviceManagerError::VfioPciCreate)?;
3444 
3445         let vfio_pci_device = Arc::new(Mutex::new(vfio_pci_device));
3446 
3447         let new_resources = self.add_pci_device(
3448             vfio_pci_device.clone(),
3449             vfio_pci_device.clone(),
3450             pci_segment_id,
3451             pci_device_bdf,
3452             resources,
3453         )?;
3454 
3455         vfio_pci_device
3456             .lock()
3457             .unwrap()
3458             .map_mmio_regions()
3459             .map_err(DeviceManagerError::VfioMapRegion)?;
3460 
3461         for mmio_region in vfio_pci_device.lock().unwrap().mmio_regions() {
3462             self.mmio_regions.lock().unwrap().push(mmio_region);
3463         }
3464 
3465         let mut node = device_node!(vfio_name, vfio_pci_device);
3466 
3467         // Update the device tree with correct resource information.
3468         node.resources = new_resources;
3469         node.pci_bdf = Some(pci_device_bdf);
3470         node.pci_device_handle = Some(PciDeviceHandle::Vfio(vfio_pci_device));
3471 
3472         self.device_tree
3473             .lock()
3474             .unwrap()
3475             .insert(vfio_name.clone(), node);
3476 
3477         Ok((pci_device_bdf, vfio_name))
3478     }
3479 
3480     fn add_pci_device(
3481         &mut self,
3482         bus_device: Arc<dyn BusDeviceSync>,
3483         pci_device: Arc<Mutex<dyn PciDevice>>,
3484         segment_id: u16,
3485         bdf: PciBdf,
3486         resources: Option<Vec<Resource>>,
3487     ) -> DeviceManagerResult<Vec<Resource>> {
3488         let bars = pci_device
3489             .lock()
3490             .unwrap()
3491             .allocate_bars(
3492                 &self.address_manager.allocator,
3493                 &mut self.pci_segments[segment_id as usize]
3494                     .mem32_allocator
3495                     .lock()
3496                     .unwrap(),
3497                 &mut self.pci_segments[segment_id as usize]
3498                     .mem64_allocator
3499                     .lock()
3500                     .unwrap(),
3501                 resources,
3502             )
3503             .map_err(DeviceManagerError::AllocateBars)?;
3504 
3505         let mut pci_bus = self.pci_segments[segment_id as usize]
3506             .pci_bus
3507             .lock()
3508             .unwrap();
3509 
3510         pci_bus
3511             .add_device(bdf.device() as u32, pci_device)
3512             .map_err(DeviceManagerError::AddPciDevice)?;
3513 
3514         self.bus_devices.push(Arc::clone(&bus_device));
3515 
3516         pci_bus
3517             .register_mapping(
3518                 bus_device,
3519                 #[cfg(target_arch = "x86_64")]
3520                 self.address_manager.io_bus.as_ref(),
3521                 self.address_manager.mmio_bus.as_ref(),
3522                 bars.clone(),
3523             )
3524             .map_err(DeviceManagerError::AddPciDevice)?;
3525 
3526         let mut new_resources = Vec::new();
3527         for bar in bars {
3528             new_resources.push(Resource::PciBar {
3529                 index: bar.idx(),
3530                 base: bar.addr(),
3531                 size: bar.size(),
3532                 type_: bar.region_type().into(),
3533                 prefetchable: bar.prefetchable().into(),
3534             });
3535         }
3536 
3537         Ok(new_resources)
3538     }
3539 
3540     fn add_vfio_devices(&mut self) -> DeviceManagerResult<Vec<PciBdf>> {
3541         let mut iommu_attached_device_ids = Vec::new();
3542         let mut devices = self.config.lock().unwrap().devices.clone();
3543 
3544         if let Some(device_list_cfg) = &mut devices {
3545             for device_cfg in device_list_cfg.iter_mut() {
3546                 let (device_id, _) = self.add_passthrough_device(device_cfg)?;
3547                 if device_cfg.iommu && self.iommu_device.is_some() {
3548                     iommu_attached_device_ids.push(device_id);
3549                 }
3550             }
3551         }
3552 
3553         // Update the list of devices
3554         self.config.lock().unwrap().devices = devices;
3555 
3556         Ok(iommu_attached_device_ids)
3557     }
3558 
3559     fn add_vfio_user_device(
3560         &mut self,
3561         device_cfg: &mut UserDeviceConfig,
3562     ) -> DeviceManagerResult<(PciBdf, String)> {
3563         let vfio_user_name = if let Some(id) = &device_cfg.id {
3564             id.clone()
3565         } else {
3566             let id = self.next_device_name(VFIO_USER_DEVICE_NAME_PREFIX)?;
3567             device_cfg.id = Some(id.clone());
3568             id
3569         };
3570 
3571         let (pci_segment_id, pci_device_bdf, resources) =
3572             self.pci_resources(&vfio_user_name, device_cfg.pci_segment)?;
3573 
3574         let legacy_interrupt_group =
3575             if let Some(legacy_interrupt_manager) = &self.legacy_interrupt_manager {
3576                 Some(
3577                     legacy_interrupt_manager
3578                         .create_group(LegacyIrqGroupConfig {
3579                             irq: self.pci_segments[pci_segment_id as usize].pci_irq_slots
3580                                 [pci_device_bdf.device() as usize]
3581                                 as InterruptIndex,
3582                         })
3583                         .map_err(DeviceManagerError::CreateInterruptGroup)?,
3584                 )
3585             } else {
3586                 None
3587             };
3588 
3589         let client = Arc::new(Mutex::new(
3590             vfio_user::Client::new(&device_cfg.socket)
3591                 .map_err(DeviceManagerError::VfioUserCreateClient)?,
3592         ));
3593 
3594         let memory_manager = self.memory_manager.clone();
3595 
3596         let mut vfio_user_pci_device = VfioUserPciDevice::new(
3597             vfio_user_name.clone(),
3598             &self.address_manager.vm,
3599             client.clone(),
3600             self.msi_interrupt_manager.clone(),
3601             legacy_interrupt_group,
3602             pci_device_bdf,
3603             Arc::new(move || memory_manager.lock().unwrap().allocate_memory_slot()),
3604             vm_migration::snapshot_from_id(self.snapshot.as_ref(), vfio_user_name.as_str()),
3605         )
3606         .map_err(DeviceManagerError::VfioUserCreate)?;
3607 
3608         let memory = self.memory_manager.lock().unwrap().guest_memory();
3609         let vfio_user_mapping = Arc::new(VfioUserDmaMapping::new(client, Arc::new(memory)));
3610         for virtio_mem_device in self.virtio_mem_devices.iter() {
3611             virtio_mem_device
3612                 .lock()
3613                 .unwrap()
3614                 .add_dma_mapping_handler(
3615                     VirtioMemMappingSource::Device(pci_device_bdf.into()),
3616                     vfio_user_mapping.clone(),
3617                 )
3618                 .map_err(DeviceManagerError::AddDmaMappingHandlerVirtioMem)?;
3619         }
3620 
3621         for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() {
3622             for region in zone.regions() {
3623                 vfio_user_pci_device
3624                     .dma_map(region)
3625                     .map_err(DeviceManagerError::VfioUserDmaMap)?;
3626             }
3627         }
3628 
3629         let vfio_user_pci_device = Arc::new(Mutex::new(vfio_user_pci_device));
3630 
3631         let new_resources = self.add_pci_device(
3632             vfio_user_pci_device.clone(),
3633             vfio_user_pci_device.clone(),
3634             pci_segment_id,
3635             pci_device_bdf,
3636             resources,
3637         )?;
3638 
3639         // Note it is required to call 'add_pci_device()' in advance to have the list of
3640         // mmio regions provisioned correctly
3641         vfio_user_pci_device
3642             .lock()
3643             .unwrap()
3644             .map_mmio_regions()
3645             .map_err(DeviceManagerError::VfioUserMapRegion)?;
3646 
3647         let mut node = device_node!(vfio_user_name, vfio_user_pci_device);
3648 
3649         // Update the device tree with correct resource information.
3650         node.resources = new_resources;
3651         node.pci_bdf = Some(pci_device_bdf);
3652         node.pci_device_handle = Some(PciDeviceHandle::VfioUser(vfio_user_pci_device));
3653 
3654         self.device_tree
3655             .lock()
3656             .unwrap()
3657             .insert(vfio_user_name.clone(), node);
3658 
3659         Ok((pci_device_bdf, vfio_user_name))
3660     }
3661 
3662     fn add_user_devices(&mut self) -> DeviceManagerResult<Vec<PciBdf>> {
3663         let mut user_devices = self.config.lock().unwrap().user_devices.clone();
3664 
3665         if let Some(device_list_cfg) = &mut user_devices {
3666             for device_cfg in device_list_cfg.iter_mut() {
3667                 let (_device_id, _id) = self.add_vfio_user_device(device_cfg)?;
3668             }
3669         }
3670 
3671         // Update the list of devices
3672         self.config.lock().unwrap().user_devices = user_devices;
3673 
3674         Ok(vec![])
3675     }
3676 
3677     fn add_virtio_pci_device(
3678         &mut self,
3679         virtio_device: Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
3680         iommu_mapping: &Option<Arc<IommuMapping>>,
3681         virtio_device_id: String,
3682         pci_segment_id: u16,
3683         dma_handler: Option<Arc<dyn ExternalDmaMapping>>,
3684     ) -> DeviceManagerResult<PciBdf> {
3685         let id = format!("{VIRTIO_PCI_DEVICE_NAME_PREFIX}-{virtio_device_id}");
3686 
3687         // Add the new virtio-pci node to the device tree.
3688         let mut node = device_node!(id);
3689         node.children = vec![virtio_device_id.clone()];
3690 
3691         let (pci_segment_id, pci_device_bdf, resources) =
3692             self.pci_resources(&id, pci_segment_id)?;
3693 
3694         // Update the existing virtio node by setting the parent.
3695         if let Some(node) = self.device_tree.lock().unwrap().get_mut(&virtio_device_id) {
3696             node.parent = Some(id.clone());
3697         } else {
3698             return Err(DeviceManagerError::MissingNode);
3699         }
3700 
3701         // Allows support for one MSI-X vector per queue. It also adds 1
3702         // as we need to take into account the dedicated vector to notify
3703         // about a virtio config change.
3704         let msix_num = (virtio_device.lock().unwrap().queue_max_sizes().len() + 1) as u16;
3705 
3706         // Create the AccessPlatform trait from the implementation IommuMapping.
3707         // This will provide address translation for any virtio device sitting
3708         // behind a vIOMMU.
3709         let mut access_platform: Option<Arc<dyn AccessPlatform>> = None;
3710 
3711         if let Some(mapping) = iommu_mapping {
3712             access_platform = Some(Arc::new(AccessPlatformMapping::new(
3713                 pci_device_bdf.into(),
3714                 mapping.clone(),
3715             )));
3716         }
3717 
3718         // If SEV-SNP is enabled create the AccessPlatform from SevSnpPageAccessProxy
3719         #[cfg(feature = "sev_snp")]
3720         if self.config.lock().unwrap().is_sev_snp_enabled() {
3721             access_platform = Some(Arc::new(SevSnpPageAccessProxy::new(
3722                 self.address_manager.vm.clone(),
3723             )));
3724         }
3725 
3726         let memory = self.memory_manager.lock().unwrap().guest_memory();
3727 
3728         // Map DMA ranges if a DMA handler is available and if the device is
3729         // not attached to a virtual IOMMU.
3730         if let Some(dma_handler) = &dma_handler {
3731             if iommu_mapping.is_some() {
3732                 if let Some(iommu) = &self.iommu_device {
3733                     iommu
3734                         .lock()
3735                         .unwrap()
3736                         .add_external_mapping(pci_device_bdf.into(), dma_handler.clone());
3737                 } else {
3738                     return Err(DeviceManagerError::MissingVirtualIommu);
3739                 }
3740             } else {
3741                 // Let every virtio-mem device handle the DMA map/unmap through the
3742                 // DMA handler provided.
3743                 for virtio_mem_device in self.virtio_mem_devices.iter() {
3744                     virtio_mem_device
3745                         .lock()
3746                         .unwrap()
3747                         .add_dma_mapping_handler(
3748                             VirtioMemMappingSource::Device(pci_device_bdf.into()),
3749                             dma_handler.clone(),
3750                         )
3751                         .map_err(DeviceManagerError::AddDmaMappingHandlerVirtioMem)?;
3752                 }
3753 
3754                 // Do not register virtio-mem regions, as they are handled directly by
3755                 // virtio-mem devices.
3756                 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() {
3757                     for region in zone.regions() {
3758                         let gpa = region.start_addr().0;
3759                         let size = region.len();
3760                         dma_handler
3761                             .map(gpa, gpa, size)
3762                             .map_err(DeviceManagerError::VirtioDmaMap)?;
3763                     }
3764                 }
3765             }
3766         }
3767 
3768         let device_type = virtio_device.lock().unwrap().device_type();
3769         let virtio_pci_device = Arc::new(Mutex::new(
3770             VirtioPciDevice::new(
3771                 id.clone(),
3772                 memory,
3773                 virtio_device,
3774                 msix_num,
3775                 access_platform,
3776                 &self.msi_interrupt_manager,
3777                 pci_device_bdf.into(),
3778                 self.activate_evt
3779                     .try_clone()
3780                     .map_err(DeviceManagerError::EventFd)?,
3781                 // All device types *except* virtio block devices should be allocated a 64-bit bar
3782                 // The block devices should be given a 32-bit BAR so that they are easily accessible
3783                 // to firmware without requiring excessive identity mapping.
3784                 // The exception being if not on the default PCI segment.
3785                 pci_segment_id > 0 || device_type != VirtioDeviceType::Block as u32,
3786                 dma_handler,
3787                 self.pending_activations.clone(),
3788                 vm_migration::snapshot_from_id(self.snapshot.as_ref(), id.as_str()),
3789             )
3790             .map_err(DeviceManagerError::VirtioDevice)?,
3791         ));
3792 
3793         let new_resources = self.add_pci_device(
3794             virtio_pci_device.clone(),
3795             virtio_pci_device.clone(),
3796             pci_segment_id,
3797             pci_device_bdf,
3798             resources,
3799         )?;
3800 
3801         let bar_addr = virtio_pci_device.lock().unwrap().config_bar_addr();
3802         for (event, addr) in virtio_pci_device.lock().unwrap().ioeventfds(bar_addr) {
3803             let io_addr = IoEventAddress::Mmio(addr);
3804             self.address_manager
3805                 .vm
3806                 .register_ioevent(event, &io_addr, None)
3807                 .map_err(|e| DeviceManagerError::RegisterIoevent(e.into()))?;
3808         }
3809 
3810         // Update the device tree with correct resource information.
3811         node.resources = new_resources;
3812         node.migratable = Some(Arc::clone(&virtio_pci_device) as Arc<Mutex<dyn Migratable>>);
3813         node.pci_bdf = Some(pci_device_bdf);
3814         node.pci_device_handle = Some(PciDeviceHandle::Virtio(virtio_pci_device));
3815         self.device_tree.lock().unwrap().insert(id, node);
3816 
3817         Ok(pci_device_bdf)
3818     }
3819 
3820     fn add_pvpanic_device(
3821         &mut self,
3822     ) -> DeviceManagerResult<Option<Arc<Mutex<devices::PvPanicDevice>>>> {
3823         let id = String::from(PVPANIC_DEVICE_NAME);
3824         let pci_segment_id = 0x0_u16;
3825 
3826         info!("Creating pvpanic device {}", id);
3827 
3828         let (pci_segment_id, pci_device_bdf, resources) =
3829             self.pci_resources(&id, pci_segment_id)?;
3830 
3831         let snapshot = snapshot_from_id(self.snapshot.as_ref(), id.as_str());
3832 
3833         let pvpanic_device = devices::PvPanicDevice::new(id.clone(), snapshot)
3834             .map_err(DeviceManagerError::PvPanicCreate)?;
3835 
3836         let pvpanic_device = Arc::new(Mutex::new(pvpanic_device));
3837 
3838         let new_resources = self.add_pci_device(
3839             pvpanic_device.clone(),
3840             pvpanic_device.clone(),
3841             pci_segment_id,
3842             pci_device_bdf,
3843             resources,
3844         )?;
3845 
3846         let mut node = device_node!(id, pvpanic_device);
3847 
3848         node.resources = new_resources;
3849         node.pci_bdf = Some(pci_device_bdf);
3850         node.pci_device_handle = None;
3851 
3852         self.device_tree.lock().unwrap().insert(id, node);
3853 
3854         Ok(Some(pvpanic_device))
3855     }
3856 
3857     fn pci_resources(
3858         &self,
3859         id: &str,
3860         pci_segment_id: u16,
3861     ) -> DeviceManagerResult<(u16, PciBdf, Option<Vec<Resource>>)> {
3862         // Look for the id in the device tree. If it can be found, that means
3863         // the device is being restored, otherwise it's created from scratch.
3864         Ok(
3865             if let Some(node) = self.device_tree.lock().unwrap().get(id) {
3866                 info!("Restoring virtio-pci {} resources", id);
3867                 let pci_device_bdf: PciBdf = node
3868                     .pci_bdf
3869                     .ok_or(DeviceManagerError::MissingDeviceNodePciBdf)?;
3870                 let pci_segment_id = pci_device_bdf.segment();
3871 
3872                 self.pci_segments[pci_segment_id as usize]
3873                     .pci_bus
3874                     .lock()
3875                     .unwrap()
3876                     .get_device_id(pci_device_bdf.device() as usize)
3877                     .map_err(DeviceManagerError::GetPciDeviceId)?;
3878 
3879                 (pci_segment_id, pci_device_bdf, Some(node.resources.clone()))
3880             } else {
3881                 let pci_device_bdf =
3882                     self.pci_segments[pci_segment_id as usize].next_device_bdf()?;
3883 
3884                 (pci_segment_id, pci_device_bdf, None)
3885             },
3886         )
3887     }
3888 
3889     #[cfg(target_arch = "x86_64")]
3890     pub fn io_bus(&self) -> &Arc<Bus> {
3891         &self.address_manager.io_bus
3892     }
3893 
3894     pub fn mmio_bus(&self) -> &Arc<Bus> {
3895         &self.address_manager.mmio_bus
3896     }
3897 
3898     pub fn allocator(&self) -> &Arc<Mutex<SystemAllocator>> {
3899         &self.address_manager.allocator
3900     }
3901 
3902     pub fn interrupt_controller(&self) -> Option<Arc<Mutex<dyn InterruptController>>> {
3903         self.interrupt_controller
3904             .as_ref()
3905             .map(|ic| ic.clone() as Arc<Mutex<dyn InterruptController>>)
3906     }
3907 
3908     pub(crate) fn pci_segments(&self) -> &Vec<PciSegment> {
3909         &self.pci_segments
3910     }
3911 
3912     #[cfg(target_arch = "aarch64")]
3913     pub fn cmdline_additions(&self) -> &[String] {
3914         self.cmdline_additions.as_slice()
3915     }
3916 
3917     pub fn update_memory(&self, new_region: &Arc<GuestRegionMmap>) -> DeviceManagerResult<()> {
3918         for handle in self.virtio_devices.iter() {
3919             handle
3920                 .virtio_device
3921                 .lock()
3922                 .unwrap()
3923                 .add_memory_region(new_region)
3924                 .map_err(DeviceManagerError::UpdateMemoryForVirtioDevice)?;
3925 
3926             if let Some(dma_handler) = &handle.dma_handler {
3927                 if !handle.iommu {
3928                     let gpa = new_region.start_addr().0;
3929                     let size = new_region.len();
3930                     dma_handler
3931                         .map(gpa, gpa, size)
3932                         .map_err(DeviceManagerError::VirtioDmaMap)?;
3933                 }
3934             }
3935         }
3936 
3937         // Take care of updating the memory for VFIO PCI devices.
3938         if let Some(vfio_container) = &self.vfio_container {
3939             vfio_container
3940                 .vfio_dma_map(
3941                     new_region.start_addr().raw_value(),
3942                     new_region.len(),
3943                     new_region.as_ptr() as u64,
3944                 )
3945                 .map_err(DeviceManagerError::UpdateMemoryForVfioPciDevice)?;
3946         }
3947 
3948         // Take care of updating the memory for vfio-user devices.
3949         {
3950             let device_tree = self.device_tree.lock().unwrap();
3951             for pci_device_node in device_tree.pci_devices() {
3952                 if let PciDeviceHandle::VfioUser(vfio_user_pci_device) = pci_device_node
3953                     .pci_device_handle
3954                     .as_ref()
3955                     .ok_or(DeviceManagerError::MissingPciDevice)?
3956                 {
3957                     vfio_user_pci_device
3958                         .lock()
3959                         .unwrap()
3960                         .dma_map(new_region)
3961                         .map_err(DeviceManagerError::UpdateMemoryForVfioUserPciDevice)?;
3962                 }
3963             }
3964         }
3965 
3966         Ok(())
3967     }
3968 
3969     pub fn activate_virtio_devices(&self) -> DeviceManagerResult<()> {
3970         for mut activator in self.pending_activations.lock().unwrap().drain(..) {
3971             activator
3972                 .activate()
3973                 .map_err(DeviceManagerError::VirtioActivate)?;
3974         }
3975         Ok(())
3976     }
3977 
3978     pub fn notify_hotplug(
3979         &self,
3980         _notification_type: AcpiNotificationFlags,
3981     ) -> DeviceManagerResult<()> {
3982         return self
3983             .ged_notification_device
3984             .as_ref()
3985             .unwrap()
3986             .lock()
3987             .unwrap()
3988             .notify(_notification_type)
3989             .map_err(DeviceManagerError::HotPlugNotification);
3990     }
3991 
3992     pub fn add_device(
3993         &mut self,
3994         device_cfg: &mut DeviceConfig,
3995     ) -> DeviceManagerResult<PciDeviceInfo> {
3996         self.validate_identifier(&device_cfg.id)?;
3997 
3998         if device_cfg.iommu && !self.is_iommu_segment(device_cfg.pci_segment) {
3999             return Err(DeviceManagerError::InvalidIommuHotplug);
4000         }
4001 
4002         let (bdf, device_name) = self.add_passthrough_device(device_cfg)?;
4003 
4004         // Update the PCIU bitmap
4005         self.pci_segments[device_cfg.pci_segment as usize].pci_devices_up |= 1 << bdf.device();
4006 
4007         Ok(PciDeviceInfo {
4008             id: device_name,
4009             bdf,
4010         })
4011     }
4012 
4013     pub fn add_user_device(
4014         &mut self,
4015         device_cfg: &mut UserDeviceConfig,
4016     ) -> DeviceManagerResult<PciDeviceInfo> {
4017         self.validate_identifier(&device_cfg.id)?;
4018 
4019         let (bdf, device_name) = self.add_vfio_user_device(device_cfg)?;
4020 
4021         // Update the PCIU bitmap
4022         self.pci_segments[device_cfg.pci_segment as usize].pci_devices_up |= 1 << bdf.device();
4023 
4024         Ok(PciDeviceInfo {
4025             id: device_name,
4026             bdf,
4027         })
4028     }
4029 
4030     pub fn remove_device(&mut self, id: String) -> DeviceManagerResult<()> {
4031         // The node can be directly a PCI node in case the 'id' refers to a
4032         // VFIO device or a virtio-pci one.
4033         // In case the 'id' refers to a virtio device, we must find the PCI
4034         // node by looking at the parent.
4035         let device_tree = self.device_tree.lock().unwrap();
4036         let node = device_tree
4037             .get(&id)
4038             .ok_or(DeviceManagerError::UnknownDeviceId(id))?;
4039 
4040         let pci_device_node = if node.pci_bdf.is_some() && node.pci_device_handle.is_some() {
4041             node
4042         } else {
4043             let parent = node
4044                 .parent
4045                 .as_ref()
4046                 .ok_or(DeviceManagerError::MissingNode)?;
4047             device_tree
4048                 .get(parent)
4049                 .ok_or(DeviceManagerError::MissingNode)?
4050         };
4051 
4052         let pci_device_bdf: PciBdf = pci_device_node
4053             .pci_bdf
4054             .ok_or(DeviceManagerError::MissingDeviceNodePciBdf)?;
4055         let pci_segment_id = pci_device_bdf.segment();
4056 
4057         let pci_device_handle = pci_device_node
4058             .pci_device_handle
4059             .as_ref()
4060             .ok_or(DeviceManagerError::MissingPciDevice)?;
4061         #[allow(irrefutable_let_patterns)]
4062         if let PciDeviceHandle::Virtio(virtio_pci_device) = pci_device_handle {
4063             let device_type = VirtioDeviceType::from(
4064                 virtio_pci_device
4065                     .lock()
4066                     .unwrap()
4067                     .virtio_device()
4068                     .lock()
4069                     .unwrap()
4070                     .device_type(),
4071             );
4072             match device_type {
4073                 VirtioDeviceType::Net
4074                 | VirtioDeviceType::Block
4075                 | VirtioDeviceType::Pmem
4076                 | VirtioDeviceType::Fs
4077                 | VirtioDeviceType::Vsock => {}
4078                 _ => return Err(DeviceManagerError::RemovalNotAllowed(device_type)),
4079             }
4080         }
4081 
4082         // Update the PCID bitmap
4083         self.pci_segments[pci_segment_id as usize].pci_devices_down |= 1 << pci_device_bdf.device();
4084 
4085         Ok(())
4086     }
4087 
4088     pub fn eject_device(&mut self, pci_segment_id: u16, device_id: u8) -> DeviceManagerResult<()> {
4089         info!(
4090             "Ejecting device_id = {} on segment_id={}",
4091             device_id, pci_segment_id
4092         );
4093 
4094         // Convert the device ID into the corresponding b/d/f.
4095         let pci_device_bdf = PciBdf::new(pci_segment_id, 0, device_id, 0);
4096 
4097         // Give the PCI device ID back to the PCI bus.
4098         self.pci_segments[pci_segment_id as usize]
4099             .pci_bus
4100             .lock()
4101             .unwrap()
4102             .put_device_id(device_id as usize)
4103             .map_err(DeviceManagerError::PutPciDeviceId)?;
4104 
4105         // Remove the device from the device tree along with its children.
4106         let mut device_tree = self.device_tree.lock().unwrap();
4107         let pci_device_node = device_tree
4108             .remove_node_by_pci_bdf(pci_device_bdf)
4109             .ok_or(DeviceManagerError::MissingPciDevice)?;
4110 
4111         // For VFIO and vfio-user the PCI device id is the id.
4112         // For virtio we overwrite it later as we want the id of the
4113         // underlying device.
4114         let mut id = pci_device_node.id;
4115         let pci_device_handle = pci_device_node
4116             .pci_device_handle
4117             .ok_or(DeviceManagerError::MissingPciDevice)?;
4118         if matches!(pci_device_handle, PciDeviceHandle::Virtio(_)) {
4119             // The virtio-pci device has a single child
4120             if !pci_device_node.children.is_empty() {
4121                 assert_eq!(pci_device_node.children.len(), 1);
4122                 let child_id = &pci_device_node.children[0];
4123                 id.clone_from(child_id);
4124             }
4125         }
4126         for child in pci_device_node.children.iter() {
4127             device_tree.remove(child);
4128         }
4129 
4130         let mut iommu_attached = false;
4131         if let Some((_, iommu_attached_devices)) = &self.iommu_attached_devices {
4132             if iommu_attached_devices.contains(&pci_device_bdf) {
4133                 iommu_attached = true;
4134             }
4135         }
4136 
4137         let (pci_device, bus_device, virtio_device, remove_dma_handler) = match pci_device_handle {
4138             // No need to remove any virtio-mem mapping here as the container outlives all devices
4139             PciDeviceHandle::Vfio(vfio_pci_device) => {
4140                 for mmio_region in vfio_pci_device.lock().unwrap().mmio_regions() {
4141                     self.mmio_regions
4142                         .lock()
4143                         .unwrap()
4144                         .retain(|x| x.start != mmio_region.start)
4145                 }
4146 
4147                 (
4148                     Arc::clone(&vfio_pci_device) as Arc<Mutex<dyn PciDevice>>,
4149                     Arc::clone(&vfio_pci_device) as Arc<dyn BusDeviceSync>,
4150                     None as Option<Arc<Mutex<dyn virtio_devices::VirtioDevice>>>,
4151                     false,
4152                 )
4153             }
4154             PciDeviceHandle::Virtio(virtio_pci_device) => {
4155                 let dev = virtio_pci_device.lock().unwrap();
4156                 let bar_addr = dev.config_bar_addr();
4157                 for (event, addr) in dev.ioeventfds(bar_addr) {
4158                     let io_addr = IoEventAddress::Mmio(addr);
4159                     self.address_manager
4160                         .vm
4161                         .unregister_ioevent(event, &io_addr)
4162                         .map_err(|e| DeviceManagerError::UnRegisterIoevent(e.into()))?;
4163                 }
4164 
4165                 if let Some(dma_handler) = dev.dma_handler() {
4166                     if !iommu_attached {
4167                         for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() {
4168                             for region in zone.regions() {
4169                                 let iova = region.start_addr().0;
4170                                 let size = region.len();
4171                                 dma_handler
4172                                     .unmap(iova, size)
4173                                     .map_err(DeviceManagerError::VirtioDmaUnmap)?;
4174                             }
4175                         }
4176                     }
4177                 }
4178 
4179                 (
4180                     Arc::clone(&virtio_pci_device) as Arc<Mutex<dyn PciDevice>>,
4181                     Arc::clone(&virtio_pci_device) as Arc<dyn BusDeviceSync>,
4182                     Some(dev.virtio_device()),
4183                     dev.dma_handler().is_some() && !iommu_attached,
4184                 )
4185             }
4186             PciDeviceHandle::VfioUser(vfio_user_pci_device) => {
4187                 let mut dev = vfio_user_pci_device.lock().unwrap();
4188                 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() {
4189                     for region in zone.regions() {
4190                         dev.dma_unmap(region)
4191                             .map_err(DeviceManagerError::VfioUserDmaUnmap)?;
4192                     }
4193                 }
4194 
4195                 (
4196                     Arc::clone(&vfio_user_pci_device) as Arc<Mutex<dyn PciDevice>>,
4197                     Arc::clone(&vfio_user_pci_device) as Arc<dyn BusDeviceSync>,
4198                     None as Option<Arc<Mutex<dyn virtio_devices::VirtioDevice>>>,
4199                     true,
4200                 )
4201             }
4202         };
4203 
4204         if remove_dma_handler {
4205             for virtio_mem_device in self.virtio_mem_devices.iter() {
4206                 virtio_mem_device
4207                     .lock()
4208                     .unwrap()
4209                     .remove_dma_mapping_handler(VirtioMemMappingSource::Device(
4210                         pci_device_bdf.into(),
4211                     ))
4212                     .map_err(DeviceManagerError::RemoveDmaMappingHandlerVirtioMem)?;
4213             }
4214         }
4215 
4216         // Free the allocated BARs
4217         pci_device
4218             .lock()
4219             .unwrap()
4220             .free_bars(
4221                 &mut self.address_manager.allocator.lock().unwrap(),
4222                 &mut self.pci_segments[pci_segment_id as usize]
4223                     .mem32_allocator
4224                     .lock()
4225                     .unwrap(),
4226                 &mut self.pci_segments[pci_segment_id as usize]
4227                     .mem64_allocator
4228                     .lock()
4229                     .unwrap(),
4230             )
4231             .map_err(DeviceManagerError::FreePciBars)?;
4232 
4233         // Remove the device from the PCI bus
4234         self.pci_segments[pci_segment_id as usize]
4235             .pci_bus
4236             .lock()
4237             .unwrap()
4238             .remove_by_device(&pci_device)
4239             .map_err(DeviceManagerError::RemoveDeviceFromPciBus)?;
4240 
4241         #[cfg(target_arch = "x86_64")]
4242         // Remove the device from the IO bus
4243         self.io_bus()
4244             .remove_by_device(&bus_device)
4245             .map_err(DeviceManagerError::RemoveDeviceFromIoBus)?;
4246 
4247         // Remove the device from the MMIO bus
4248         self.mmio_bus()
4249             .remove_by_device(&bus_device)
4250             .map_err(DeviceManagerError::RemoveDeviceFromMmioBus)?;
4251 
4252         // Remove the device from the list of BusDevice held by the
4253         // DeviceManager.
4254         self.bus_devices
4255             .retain(|dev| !Arc::ptr_eq(dev, &bus_device));
4256 
4257         // Shutdown and remove the underlying virtio-device if present
4258         if let Some(virtio_device) = virtio_device {
4259             for mapping in virtio_device.lock().unwrap().userspace_mappings() {
4260                 self.memory_manager
4261                     .lock()
4262                     .unwrap()
4263                     .remove_userspace_mapping(
4264                         mapping.addr.raw_value(),
4265                         mapping.len,
4266                         mapping.host_addr,
4267                         mapping.mergeable,
4268                         mapping.mem_slot,
4269                     )
4270                     .map_err(DeviceManagerError::MemoryManager)?;
4271             }
4272 
4273             virtio_device.lock().unwrap().shutdown();
4274 
4275             self.virtio_devices
4276                 .retain(|handler| !Arc::ptr_eq(&handler.virtio_device, &virtio_device));
4277         }
4278 
4279         event!(
4280             "vm",
4281             "device-removed",
4282             "id",
4283             &id,
4284             "bdf",
4285             pci_device_bdf.to_string()
4286         );
4287 
4288         // At this point, the device has been removed from all the list and
4289         // buses where it was stored. At the end of this function, after
4290         // any_device, bus_device and pci_device are released, the actual
4291         // device will be dropped.
4292         Ok(())
4293     }
4294 
4295     fn hotplug_virtio_pci_device(
4296         &mut self,
4297         handle: MetaVirtioDevice,
4298     ) -> DeviceManagerResult<PciDeviceInfo> {
4299         // Add the virtio device to the device manager list. This is important
4300         // as the list is used to notify virtio devices about memory updates
4301         // for instance.
4302         self.virtio_devices.push(handle.clone());
4303 
4304         let mapping: Option<Arc<IommuMapping>> = if handle.iommu {
4305             self.iommu_mapping.clone()
4306         } else {
4307             None
4308         };
4309 
4310         let bdf = self.add_virtio_pci_device(
4311             handle.virtio_device,
4312             &mapping,
4313             handle.id.clone(),
4314             handle.pci_segment,
4315             handle.dma_handler,
4316         )?;
4317 
4318         // Update the PCIU bitmap
4319         self.pci_segments[handle.pci_segment as usize].pci_devices_up |= 1 << bdf.device();
4320 
4321         Ok(PciDeviceInfo { id: handle.id, bdf })
4322     }
4323 
4324     fn is_iommu_segment(&self, pci_segment_id: u16) -> bool {
4325         self.config
4326             .lock()
4327             .as_ref()
4328             .unwrap()
4329             .platform
4330             .as_ref()
4331             .map(|pc| {
4332                 pc.iommu_segments
4333                     .as_ref()
4334                     .map(|v| v.contains(&pci_segment_id))
4335                     .unwrap_or_default()
4336             })
4337             .unwrap_or_default()
4338     }
4339 
4340     pub fn add_disk(&mut self, disk_cfg: &mut DiskConfig) -> DeviceManagerResult<PciDeviceInfo> {
4341         self.validate_identifier(&disk_cfg.id)?;
4342 
4343         if disk_cfg.iommu && !self.is_iommu_segment(disk_cfg.pci_segment) {
4344             return Err(DeviceManagerError::InvalidIommuHotplug);
4345         }
4346 
4347         let device = self.make_virtio_block_device(disk_cfg)?;
4348         self.hotplug_virtio_pci_device(device)
4349     }
4350 
4351     pub fn add_fs(&mut self, fs_cfg: &mut FsConfig) -> DeviceManagerResult<PciDeviceInfo> {
4352         self.validate_identifier(&fs_cfg.id)?;
4353 
4354         let device = self.make_virtio_fs_device(fs_cfg)?;
4355         self.hotplug_virtio_pci_device(device)
4356     }
4357 
4358     pub fn add_pmem(&mut self, pmem_cfg: &mut PmemConfig) -> DeviceManagerResult<PciDeviceInfo> {
4359         self.validate_identifier(&pmem_cfg.id)?;
4360 
4361         if pmem_cfg.iommu && !self.is_iommu_segment(pmem_cfg.pci_segment) {
4362             return Err(DeviceManagerError::InvalidIommuHotplug);
4363         }
4364 
4365         let device = self.make_virtio_pmem_device(pmem_cfg)?;
4366         self.hotplug_virtio_pci_device(device)
4367     }
4368 
4369     pub fn add_net(&mut self, net_cfg: &mut NetConfig) -> DeviceManagerResult<PciDeviceInfo> {
4370         self.validate_identifier(&net_cfg.id)?;
4371 
4372         if net_cfg.iommu && !self.is_iommu_segment(net_cfg.pci_segment) {
4373             return Err(DeviceManagerError::InvalidIommuHotplug);
4374         }
4375 
4376         let device = self.make_virtio_net_device(net_cfg)?;
4377         self.hotplug_virtio_pci_device(device)
4378     }
4379 
4380     pub fn add_vdpa(&mut self, vdpa_cfg: &mut VdpaConfig) -> DeviceManagerResult<PciDeviceInfo> {
4381         self.validate_identifier(&vdpa_cfg.id)?;
4382 
4383         if vdpa_cfg.iommu && !self.is_iommu_segment(vdpa_cfg.pci_segment) {
4384             return Err(DeviceManagerError::InvalidIommuHotplug);
4385         }
4386 
4387         let device = self.make_vdpa_device(vdpa_cfg)?;
4388         self.hotplug_virtio_pci_device(device)
4389     }
4390 
4391     pub fn add_vsock(&mut self, vsock_cfg: &mut VsockConfig) -> DeviceManagerResult<PciDeviceInfo> {
4392         self.validate_identifier(&vsock_cfg.id)?;
4393 
4394         if vsock_cfg.iommu && !self.is_iommu_segment(vsock_cfg.pci_segment) {
4395             return Err(DeviceManagerError::InvalidIommuHotplug);
4396         }
4397 
4398         let device = self.make_virtio_vsock_device(vsock_cfg)?;
4399         self.hotplug_virtio_pci_device(device)
4400     }
4401 
4402     pub fn counters(&self) -> HashMap<String, HashMap<&'static str, Wrapping<u64>>> {
4403         let mut counters = HashMap::new();
4404 
4405         for handle in &self.virtio_devices {
4406             let virtio_device = handle.virtio_device.lock().unwrap();
4407             if let Some(device_counters) = virtio_device.counters() {
4408                 counters.insert(handle.id.clone(), device_counters.clone());
4409             }
4410         }
4411 
4412         counters
4413     }
4414 
4415     pub fn resize_balloon(&mut self, size: u64) -> DeviceManagerResult<()> {
4416         if let Some(balloon) = &self.balloon {
4417             return balloon
4418                 .lock()
4419                 .unwrap()
4420                 .resize(size)
4421                 .map_err(DeviceManagerError::VirtioBalloonResize);
4422         }
4423 
4424         warn!("No balloon setup: Can't resize the balloon");
4425         Err(DeviceManagerError::MissingVirtioBalloon)
4426     }
4427 
4428     pub fn balloon_size(&self) -> u64 {
4429         if let Some(balloon) = &self.balloon {
4430             return balloon.lock().unwrap().get_actual();
4431         }
4432 
4433         0
4434     }
4435 
4436     pub fn device_tree(&self) -> Arc<Mutex<DeviceTree>> {
4437         self.device_tree.clone()
4438     }
4439 
4440     #[cfg(target_arch = "x86_64")]
4441     pub fn notify_power_button(&self) -> DeviceManagerResult<()> {
4442         self.ged_notification_device
4443             .as_ref()
4444             .unwrap()
4445             .lock()
4446             .unwrap()
4447             .notify(AcpiNotificationFlags::POWER_BUTTON_CHANGED)
4448             .map_err(DeviceManagerError::PowerButtonNotification)
4449     }
4450 
4451     #[cfg(target_arch = "aarch64")]
4452     pub fn notify_power_button(&self) -> DeviceManagerResult<()> {
4453         // There are two use cases:
4454         // 1. Users will use direct kernel boot with device tree.
4455         // 2. Users will use ACPI+UEFI boot.
4456 
4457         // Trigger a GPIO pin 3 event to satisfy use case 1.
4458         self.gpio_device
4459             .as_ref()
4460             .unwrap()
4461             .lock()
4462             .unwrap()
4463             .trigger_key(3)
4464             .map_err(DeviceManagerError::AArch64PowerButtonNotification)?;
4465         // Trigger a GED power button event to satisfy use case 2.
4466         return self
4467             .ged_notification_device
4468             .as_ref()
4469             .unwrap()
4470             .lock()
4471             .unwrap()
4472             .notify(AcpiNotificationFlags::POWER_BUTTON_CHANGED)
4473             .map_err(DeviceManagerError::PowerButtonNotification);
4474     }
4475 
4476     pub fn iommu_attached_devices(&self) -> &Option<(PciBdf, Vec<PciBdf>)> {
4477         &self.iommu_attached_devices
4478     }
4479 
4480     fn validate_identifier(&self, id: &Option<String>) -> DeviceManagerResult<()> {
4481         if let Some(id) = id {
4482             if id.starts_with("__") {
4483                 return Err(DeviceManagerError::InvalidIdentifier(id.clone()));
4484             }
4485 
4486             if self.device_tree.lock().unwrap().contains_key(id) {
4487                 return Err(DeviceManagerError::IdentifierNotUnique(id.clone()));
4488             }
4489         }
4490 
4491         Ok(())
4492     }
4493 
4494     pub(crate) fn acpi_platform_addresses(&self) -> &AcpiPlatformAddresses {
4495         &self.acpi_platform_addresses
4496     }
4497 }
4498 
4499 fn numa_node_id_from_memory_zone_id(numa_nodes: &NumaNodes, memory_zone_id: &str) -> Option<u32> {
4500     for (numa_node_id, numa_node) in numa_nodes.iter() {
4501         if numa_node.memory_zones.contains(&memory_zone_id.to_owned()) {
4502             return Some(*numa_node_id);
4503         }
4504     }
4505 
4506     None
4507 }
4508 
4509 fn numa_node_id_from_pci_segment_id(numa_nodes: &NumaNodes, pci_segment_id: u16) -> u32 {
4510     for (numa_node_id, numa_node) in numa_nodes.iter() {
4511         if numa_node.pci_segments.contains(&pci_segment_id) {
4512             return *numa_node_id;
4513         }
4514     }
4515 
4516     0
4517 }
4518 
4519 struct TpmDevice {}
4520 
4521 impl Aml for TpmDevice {
4522     fn to_aml_bytes(&self, sink: &mut dyn acpi_tables::AmlSink) {
4523         aml::Device::new(
4524             "TPM2".into(),
4525             vec![
4526                 &aml::Name::new("_HID".into(), &"MSFT0101"),
4527                 &aml::Name::new("_STA".into(), &(0xF_usize)),
4528                 &aml::Name::new(
4529                     "_CRS".into(),
4530                     &aml::ResourceTemplate::new(vec![&aml::Memory32Fixed::new(
4531                         true,
4532                         layout::TPM_START.0 as u32,
4533                         layout::TPM_SIZE as u32,
4534                     )]),
4535                 ),
4536             ],
4537         )
4538         .to_aml_bytes(sink)
4539     }
4540 }
4541 
4542 impl Aml for DeviceManager {
4543     fn to_aml_bytes(&self, sink: &mut dyn acpi_tables::AmlSink) {
4544         #[cfg(target_arch = "aarch64")]
4545         use arch::aarch64::DeviceInfoForFdt;
4546 
4547         let mut pci_scan_methods = Vec::new();
4548         for i in 0..self.pci_segments.len() {
4549             pci_scan_methods.push(aml::MethodCall::new(
4550                 format!("\\_SB_.PC{i:02X}.PCNT").as_str().into(),
4551                 vec![],
4552             ));
4553         }
4554         let mut pci_scan_inner: Vec<&dyn Aml> = Vec::new();
4555         for method in &pci_scan_methods {
4556             pci_scan_inner.push(method)
4557         }
4558 
4559         // PCI hotplug controller
4560         aml::Device::new(
4561             "_SB_.PHPR".into(),
4562             vec![
4563                 &aml::Name::new("_HID".into(), &aml::EISAName::new("PNP0A06")),
4564                 &aml::Name::new("_STA".into(), &0x0bu8),
4565                 &aml::Name::new("_UID".into(), &"PCI Hotplug Controller"),
4566                 &aml::Mutex::new("BLCK".into(), 0),
4567                 &aml::Name::new(
4568                     "_CRS".into(),
4569                     &aml::ResourceTemplate::new(vec![&aml::AddressSpace::new_memory(
4570                         aml::AddressSpaceCacheable::NotCacheable,
4571                         true,
4572                         self.acpi_address.0,
4573                         self.acpi_address.0 + DEVICE_MANAGER_ACPI_SIZE as u64 - 1,
4574                         None,
4575                     )]),
4576                 ),
4577                 // OpRegion and Fields map MMIO range into individual field values
4578                 &aml::OpRegion::new(
4579                     "PCST".into(),
4580                     aml::OpRegionSpace::SystemMemory,
4581                     &(self.acpi_address.0 as usize),
4582                     &DEVICE_MANAGER_ACPI_SIZE,
4583                 ),
4584                 &aml::Field::new(
4585                     "PCST".into(),
4586                     aml::FieldAccessType::DWord,
4587                     aml::FieldLockRule::NoLock,
4588                     aml::FieldUpdateRule::WriteAsZeroes,
4589                     vec![
4590                         aml::FieldEntry::Named(*b"PCIU", 32),
4591                         aml::FieldEntry::Named(*b"PCID", 32),
4592                         aml::FieldEntry::Named(*b"B0EJ", 32),
4593                         aml::FieldEntry::Named(*b"PSEG", 32),
4594                     ],
4595                 ),
4596                 &aml::Method::new(
4597                     "PCEJ".into(),
4598                     2,
4599                     true,
4600                     vec![
4601                         // Take lock defined above
4602                         &aml::Acquire::new("BLCK".into(), 0xffff),
4603                         // Choose the current segment
4604                         &aml::Store::new(&aml::Path::new("PSEG"), &aml::Arg(1)),
4605                         // Write PCI bus number (in first argument) to I/O port via field
4606                         &aml::ShiftLeft::new(&aml::Path::new("B0EJ"), &aml::ONE, &aml::Arg(0)),
4607                         // Release lock
4608                         &aml::Release::new("BLCK".into()),
4609                         // Return 0
4610                         &aml::Return::new(&aml::ZERO),
4611                     ],
4612                 ),
4613                 &aml::Method::new("PSCN".into(), 0, true, pci_scan_inner),
4614             ],
4615         )
4616         .to_aml_bytes(sink);
4617 
4618         for segment in &self.pci_segments {
4619             segment.to_aml_bytes(sink);
4620         }
4621 
4622         let mut mbrd_memory = Vec::new();
4623 
4624         for segment in &self.pci_segments {
4625             mbrd_memory.push(aml::Memory32Fixed::new(
4626                 true,
4627                 segment.mmio_config_address as u32,
4628                 layout::PCI_MMIO_CONFIG_SIZE_PER_SEGMENT as u32,
4629             ))
4630         }
4631 
4632         let mut mbrd_memory_refs = Vec::new();
4633         for mbrd_memory_ref in &mbrd_memory {
4634             mbrd_memory_refs.push(mbrd_memory_ref as &dyn Aml);
4635         }
4636 
4637         aml::Device::new(
4638             "_SB_.MBRD".into(),
4639             vec![
4640                 &aml::Name::new("_HID".into(), &aml::EISAName::new("PNP0C02")),
4641                 &aml::Name::new("_UID".into(), &aml::ZERO),
4642                 &aml::Name::new("_CRS".into(), &aml::ResourceTemplate::new(mbrd_memory_refs)),
4643             ],
4644         )
4645         .to_aml_bytes(sink);
4646 
4647         // Serial device
4648         #[cfg(target_arch = "x86_64")]
4649         let serial_irq = 4;
4650         #[cfg(target_arch = "aarch64")]
4651         let serial_irq =
4652             if self.config.lock().unwrap().serial.clone().mode != ConsoleOutputMode::Off {
4653                 self.get_device_info()
4654                     .clone()
4655                     .get(&(DeviceType::Serial, DeviceType::Serial.to_string()))
4656                     .unwrap()
4657                     .irq()
4658             } else {
4659                 // If serial is turned off, add a fake device with invalid irq.
4660                 31
4661             };
4662         if self.config.lock().unwrap().serial.mode != ConsoleOutputMode::Off {
4663             aml::Device::new(
4664                 "_SB_.COM1".into(),
4665                 vec![
4666                     &aml::Name::new(
4667                         "_HID".into(),
4668                         #[cfg(target_arch = "x86_64")]
4669                         &aml::EISAName::new("PNP0501"),
4670                         #[cfg(target_arch = "aarch64")]
4671                         &"ARMH0011",
4672                     ),
4673                     &aml::Name::new("_UID".into(), &aml::ZERO),
4674                     &aml::Name::new("_DDN".into(), &"COM1"),
4675                     &aml::Name::new(
4676                         "_CRS".into(),
4677                         &aml::ResourceTemplate::new(vec![
4678                             &aml::Interrupt::new(true, true, false, false, serial_irq),
4679                             #[cfg(target_arch = "x86_64")]
4680                             &aml::IO::new(0x3f8, 0x3f8, 0, 0x8),
4681                             #[cfg(target_arch = "aarch64")]
4682                             &aml::Memory32Fixed::new(
4683                                 true,
4684                                 arch::layout::LEGACY_SERIAL_MAPPED_IO_START.raw_value() as u32,
4685                                 MMIO_LEN as u32,
4686                             ),
4687                         ]),
4688                     ),
4689                 ],
4690             )
4691             .to_aml_bytes(sink);
4692         }
4693 
4694         aml::Name::new("_S5_".into(), &aml::Package::new(vec![&5u8])).to_aml_bytes(sink);
4695 
4696         aml::Device::new(
4697             "_SB_.PWRB".into(),
4698             vec![
4699                 &aml::Name::new("_HID".into(), &aml::EISAName::new("PNP0C0C")),
4700                 &aml::Name::new("_UID".into(), &aml::ZERO),
4701             ],
4702         )
4703         .to_aml_bytes(sink);
4704 
4705         if self.config.lock().unwrap().tpm.is_some() {
4706             // Add tpm device
4707             TpmDevice {}.to_aml_bytes(sink);
4708         }
4709 
4710         self.ged_notification_device
4711             .as_ref()
4712             .unwrap()
4713             .lock()
4714             .unwrap()
4715             .to_aml_bytes(sink)
4716     }
4717 }
4718 
4719 impl Pausable for DeviceManager {
4720     fn pause(&mut self) -> result::Result<(), MigratableError> {
4721         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
4722             if let Some(migratable) = &device_node.migratable {
4723                 migratable.lock().unwrap().pause()?;
4724             }
4725         }
4726         // On AArch64, the pause of device manager needs to trigger
4727         // a "pause" of GIC, which will flush the GIC pending tables
4728         // and ITS tables to guest RAM.
4729         #[cfg(target_arch = "aarch64")]
4730         {
4731             self.get_interrupt_controller()
4732                 .unwrap()
4733                 .lock()
4734                 .unwrap()
4735                 .pause()?;
4736         };
4737 
4738         Ok(())
4739     }
4740 
4741     fn resume(&mut self) -> result::Result<(), MigratableError> {
4742         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
4743             if let Some(migratable) = &device_node.migratable {
4744                 migratable.lock().unwrap().resume()?;
4745             }
4746         }
4747 
4748         Ok(())
4749     }
4750 }
4751 
4752 impl Snapshottable for DeviceManager {
4753     fn id(&self) -> String {
4754         DEVICE_MANAGER_SNAPSHOT_ID.to_string()
4755     }
4756 
4757     fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> {
4758         let mut snapshot = Snapshot::from_data(SnapshotData::new_from_state(&self.state())?);
4759 
4760         // We aggregate all devices snapshots.
4761         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
4762             if let Some(migratable) = &device_node.migratable {
4763                 let mut migratable = migratable.lock().unwrap();
4764                 snapshot.add_snapshot(migratable.id(), migratable.snapshot()?);
4765             }
4766         }
4767 
4768         Ok(snapshot)
4769     }
4770 }
4771 
4772 impl Transportable for DeviceManager {}
4773 
4774 impl Migratable for DeviceManager {
4775     fn start_dirty_log(&mut self) -> std::result::Result<(), MigratableError> {
4776         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
4777             if let Some(migratable) = &device_node.migratable {
4778                 migratable.lock().unwrap().start_dirty_log()?;
4779             }
4780         }
4781         Ok(())
4782     }
4783 
4784     fn stop_dirty_log(&mut self) -> std::result::Result<(), MigratableError> {
4785         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
4786             if let Some(migratable) = &device_node.migratable {
4787                 migratable.lock().unwrap().stop_dirty_log()?;
4788             }
4789         }
4790         Ok(())
4791     }
4792 
4793     fn dirty_log(&mut self) -> std::result::Result<MemoryRangeTable, MigratableError> {
4794         let mut tables = Vec::new();
4795         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
4796             if let Some(migratable) = &device_node.migratable {
4797                 tables.push(migratable.lock().unwrap().dirty_log()?);
4798             }
4799         }
4800         Ok(MemoryRangeTable::new_from_tables(tables))
4801     }
4802 
4803     fn start_migration(&mut self) -> std::result::Result<(), MigratableError> {
4804         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
4805             if let Some(migratable) = &device_node.migratable {
4806                 migratable.lock().unwrap().start_migration()?;
4807             }
4808         }
4809         Ok(())
4810     }
4811 
4812     fn complete_migration(&mut self) -> std::result::Result<(), MigratableError> {
4813         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
4814             if let Some(migratable) = &device_node.migratable {
4815                 migratable.lock().unwrap().complete_migration()?;
4816             }
4817         }
4818         Ok(())
4819     }
4820 }
4821 
4822 const PCIU_FIELD_OFFSET: u64 = 0;
4823 const PCID_FIELD_OFFSET: u64 = 4;
4824 const B0EJ_FIELD_OFFSET: u64 = 8;
4825 const PSEG_FIELD_OFFSET: u64 = 12;
4826 const PCIU_FIELD_SIZE: usize = 4;
4827 const PCID_FIELD_SIZE: usize = 4;
4828 const B0EJ_FIELD_SIZE: usize = 4;
4829 const PSEG_FIELD_SIZE: usize = 4;
4830 
4831 impl BusDevice for DeviceManager {
4832     fn read(&mut self, base: u64, offset: u64, data: &mut [u8]) {
4833         match offset {
4834             PCIU_FIELD_OFFSET => {
4835                 assert!(data.len() == PCIU_FIELD_SIZE);
4836                 data.copy_from_slice(
4837                     &self.pci_segments[self.selected_segment]
4838                         .pci_devices_up
4839                         .to_le_bytes(),
4840                 );
4841                 // Clear the PCIU bitmap
4842                 self.pci_segments[self.selected_segment].pci_devices_up = 0;
4843             }
4844             PCID_FIELD_OFFSET => {
4845                 assert!(data.len() == PCID_FIELD_SIZE);
4846                 data.copy_from_slice(
4847                     &self.pci_segments[self.selected_segment]
4848                         .pci_devices_down
4849                         .to_le_bytes(),
4850                 );
4851                 // Clear the PCID bitmap
4852                 self.pci_segments[self.selected_segment].pci_devices_down = 0;
4853             }
4854             B0EJ_FIELD_OFFSET => {
4855                 assert!(data.len() == B0EJ_FIELD_SIZE);
4856                 // Always return an empty bitmap since the eject is always
4857                 // taken care of right away during a write access.
4858                 data.fill(0);
4859             }
4860             PSEG_FIELD_OFFSET => {
4861                 assert_eq!(data.len(), PSEG_FIELD_SIZE);
4862                 data.copy_from_slice(&(self.selected_segment as u32).to_le_bytes());
4863             }
4864             _ => error!(
4865                 "Accessing unknown location at base 0x{:x}, offset 0x{:x}",
4866                 base, offset
4867             ),
4868         }
4869 
4870         debug!(
4871             "PCI_HP_REG_R: base 0x{:x}, offset 0x{:x}, data {:?}",
4872             base, offset, data
4873         )
4874     }
4875 
4876     fn write(&mut self, base: u64, offset: u64, data: &[u8]) -> Option<Arc<std::sync::Barrier>> {
4877         match offset {
4878             B0EJ_FIELD_OFFSET => {
4879                 assert!(data.len() == B0EJ_FIELD_SIZE);
4880                 let mut data_array: [u8; 4] = [0, 0, 0, 0];
4881                 data_array.copy_from_slice(data);
4882                 let mut slot_bitmap = u32::from_le_bytes(data_array);
4883 
4884                 while slot_bitmap > 0 {
4885                     let slot_id = slot_bitmap.trailing_zeros();
4886                     if let Err(e) = self.eject_device(self.selected_segment as u16, slot_id as u8) {
4887                         error!("Failed ejecting device {}: {:?}", slot_id, e);
4888                     }
4889                     slot_bitmap &= !(1 << slot_id);
4890                 }
4891             }
4892             PSEG_FIELD_OFFSET => {
4893                 assert_eq!(data.len(), PSEG_FIELD_SIZE);
4894                 let mut data_array: [u8; 4] = [0, 0, 0, 0];
4895                 data_array.copy_from_slice(data);
4896                 let selected_segment = u32::from_le_bytes(data_array) as usize;
4897                 if selected_segment >= self.pci_segments.len() {
4898                     error!(
4899                         "Segment selection out of range: {} >= {}",
4900                         selected_segment,
4901                         self.pci_segments.len()
4902                     );
4903                     return None;
4904                 }
4905                 self.selected_segment = selected_segment;
4906             }
4907             _ => error!(
4908                 "Accessing unknown location at base 0x{:x}, offset 0x{:x}",
4909                 base, offset
4910             ),
4911         }
4912 
4913         debug!(
4914             "PCI_HP_REG_W: base 0x{:x}, offset 0x{:x}, data {:?}",
4915             base, offset, data
4916         );
4917 
4918         None
4919     }
4920 }
4921 
4922 impl Drop for DeviceManager {
4923     fn drop(&mut self) {
4924         // Wake up the DeviceManager threads (mainly virtio device workers),
4925         // to avoid deadlock on waiting for paused/parked worker threads.
4926         if let Err(e) = self.resume() {
4927             error!("Error resuming DeviceManager: {:?}", e);
4928         }
4929 
4930         for handle in self.virtio_devices.drain(..) {
4931             handle.virtio_device.lock().unwrap().shutdown();
4932         }
4933 
4934         if let Some(termios) = *self.original_termios_opt.lock().unwrap() {
4935             // SAFETY: FFI call
4936             let _ = unsafe { tcsetattr(stdout().lock().as_raw_fd(), TCSANOW, &termios) };
4937         }
4938     }
4939 }
4940 
4941 #[cfg(test)]
4942 mod tests {
4943     use super::*;
4944 
4945     #[test]
4946     fn test_create_mmio_allocators() {
4947         let res = create_mmio_allocators(0x100000, 0x400000, 1, vec![1], 4 << 10);
4948         assert_eq!(res.len(), 1);
4949         assert_eq!(
4950             res[0].lock().unwrap().base(),
4951             vm_memory::GuestAddress(0x100000)
4952         );
4953         assert_eq!(
4954             res[0].lock().unwrap().end(),
4955             vm_memory::GuestAddress(0x3fffff)
4956         );
4957 
4958         let res = create_mmio_allocators(0x100000, 0x400000, 2, vec![1, 1], 4 << 10);
4959         assert_eq!(res.len(), 2);
4960         assert_eq!(
4961             res[0].lock().unwrap().base(),
4962             vm_memory::GuestAddress(0x100000)
4963         );
4964         assert_eq!(
4965             res[0].lock().unwrap().end(),
4966             vm_memory::GuestAddress(0x27ffff)
4967         );
4968         assert_eq!(
4969             res[1].lock().unwrap().base(),
4970             vm_memory::GuestAddress(0x280000)
4971         );
4972         assert_eq!(
4973             res[1].lock().unwrap().end(),
4974             vm_memory::GuestAddress(0x3fffff)
4975         );
4976 
4977         let res = create_mmio_allocators(0x100000, 0x400000, 2, vec![2, 1], 4 << 10);
4978         assert_eq!(res.len(), 2);
4979         assert_eq!(
4980             res[0].lock().unwrap().base(),
4981             vm_memory::GuestAddress(0x100000)
4982         );
4983         assert_eq!(
4984             res[0].lock().unwrap().end(),
4985             vm_memory::GuestAddress(0x2fffff)
4986         );
4987         assert_eq!(
4988             res[1].lock().unwrap().base(),
4989             vm_memory::GuestAddress(0x300000)
4990         );
4991         assert_eq!(
4992             res[1].lock().unwrap().end(),
4993             vm_memory::GuestAddress(0x3fffff)
4994         );
4995     }
4996 }
4997