xref: /cloud-hypervisor/vmm/src/device_manager.rs (revision 5a70d7ec69836ad66cdd1e4ea59414dcdaaeec8c)
1 // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
2 //
3 // Portions Copyright 2017 The Chromium OS Authors. All rights reserved.
4 // Use of this source code is governed by a BSD-style license that can be
5 // found in the LICENSE-BSD-3-Clause file.
6 //
7 // Copyright © 2019 Intel Corporation
8 //
9 // SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause
10 //
11 
12 use crate::config::{
13     ConsoleOutputMode, DeviceConfig, DiskConfig, FsConfig, NetConfig, PmemConfig, UserDeviceConfig,
14     VdpaConfig, VhostMode, VmConfig, VsockConfig,
15 };
16 use crate::console_devices::{ConsoleDeviceError, ConsoleInfo, ConsoleOutput};
17 use crate::cpu::{CpuManager, CPU_MANAGER_ACPI_SIZE};
18 use crate::device_tree::{DeviceNode, DeviceTree};
19 use crate::interrupt::LegacyUserspaceInterruptManager;
20 use crate::interrupt::MsiInterruptManager;
21 use crate::memory_manager::{Error as MemoryManagerError, MemoryManager, MEMORY_MANAGER_ACPI_SIZE};
22 use crate::pci_segment::PciSegment;
23 use crate::serial_manager::{Error as SerialManagerError, SerialManager};
24 use crate::vm_config::DEFAULT_PCI_SEGMENT_APERTURE_WEIGHT;
25 use crate::GuestRegionMmap;
26 use crate::PciDeviceInfo;
27 use crate::{device_node, DEVICE_MANAGER_SNAPSHOT_ID};
28 use acpi_tables::sdt::GenericAddress;
29 use acpi_tables::{aml, Aml};
30 use anyhow::anyhow;
31 use arch::layout;
32 #[cfg(target_arch = "x86_64")]
33 use arch::layout::{APIC_START, IOAPIC_SIZE, IOAPIC_START};
34 use arch::NumaNodes;
35 #[cfg(target_arch = "aarch64")]
36 use arch::{DeviceType, MmioDeviceInfo};
37 use block::{
38     async_io::DiskFile, block_aio_is_supported, block_io_uring_is_supported, detect_image_type,
39     fixed_vhd_sync::FixedVhdDiskSync, qcow, qcow_sync::QcowDiskSync, raw_async_aio::RawFileDiskAio,
40     raw_sync::RawFileDiskSync, vhdx, vhdx_sync::VhdxDiskSync, ImageType,
41 };
42 #[cfg(feature = "io_uring")]
43 use block::{fixed_vhd_async::FixedVhdDiskAsync, raw_async::RawFileDisk};
44 #[cfg(target_arch = "x86_64")]
45 use devices::debug_console::DebugConsole;
46 #[cfg(target_arch = "aarch64")]
47 use devices::gic;
48 #[cfg(target_arch = "x86_64")]
49 use devices::ioapic;
50 #[cfg(target_arch = "aarch64")]
51 use devices::legacy::Pl011;
52 #[cfg(feature = "pvmemcontrol")]
53 use devices::pvmemcontrol::{PvmemcontrolBusDevice, PvmemcontrolPciDevice};
54 use devices::{
55     interrupt_controller, interrupt_controller::InterruptController, AcpiNotificationFlags,
56 };
57 use hypervisor::IoEventAddress;
58 use libc::{
59     tcsetattr, termios, MAP_NORESERVE, MAP_PRIVATE, MAP_SHARED, O_TMPFILE, PROT_READ, PROT_WRITE,
60     TCSANOW,
61 };
62 use pci::{
63     DeviceRelocation, MmioRegion, PciBarRegionType, PciBdf, PciDevice, VfioDmaMapping,
64     VfioPciDevice, VfioUserDmaMapping, VfioUserPciDevice, VfioUserPciDeviceError,
65 };
66 use rate_limiter::group::RateLimiterGroup;
67 use seccompiler::SeccompAction;
68 use serde::{Deserialize, Serialize};
69 use std::collections::{BTreeMap, BTreeSet, HashMap};
70 use std::fs::{File, OpenOptions};
71 use std::io::{self, stdout, IsTerminal, Seek, SeekFrom};
72 use std::num::Wrapping;
73 use std::os::unix::fs::OpenOptionsExt;
74 use std::os::unix::io::{AsRawFd, FromRawFd};
75 use std::path::PathBuf;
76 use std::result;
77 use std::sync::{Arc, Mutex};
78 use std::time::Instant;
79 use tracer::trace_scoped;
80 use vfio_ioctls::{VfioContainer, VfioDevice, VfioDeviceFd};
81 use virtio_devices::transport::VirtioTransport;
82 use virtio_devices::transport::{VirtioPciDevice, VirtioPciDeviceActivator};
83 use virtio_devices::vhost_user::VhostUserConfig;
84 use virtio_devices::{
85     AccessPlatformMapping, ActivateError, VdpaDmaMapping, VirtioMemMappingSource,
86 };
87 use virtio_devices::{Endpoint, IommuMapping};
88 use vm_allocator::{AddressAllocator, SystemAllocator};
89 use vm_device::dma_mapping::ExternalDmaMapping;
90 use vm_device::interrupt::{
91     InterruptIndex, InterruptManager, LegacyIrqGroupConfig, MsiIrqGroupConfig,
92 };
93 use vm_device::{Bus, BusDevice, BusDeviceSync, Resource};
94 use vm_memory::guest_memory::FileOffset;
95 use vm_memory::GuestMemoryRegion;
96 use vm_memory::{Address, GuestAddress, GuestUsize, MmapRegion};
97 #[cfg(target_arch = "x86_64")]
98 use vm_memory::{GuestAddressSpace, GuestMemory};
99 use vm_migration::{
100     protocol::MemoryRangeTable, snapshot_from_id, state_from_id, Migratable, MigratableError,
101     Pausable, Snapshot, SnapshotData, Snapshottable, Transportable,
102 };
103 use vm_virtio::AccessPlatform;
104 use vm_virtio::VirtioDeviceType;
105 use vmm_sys_util::eventfd::EventFd;
106 #[cfg(target_arch = "x86_64")]
107 use {devices::debug_console, devices::legacy::Serial};
108 
109 #[cfg(target_arch = "aarch64")]
110 const MMIO_LEN: u64 = 0x1000;
111 
112 // Singleton devices / devices the user cannot name
113 #[cfg(target_arch = "x86_64")]
114 const IOAPIC_DEVICE_NAME: &str = "__ioapic";
115 const SERIAL_DEVICE_NAME: &str = "__serial";
116 #[cfg(target_arch = "x86_64")]
117 const DEBUGCON_DEVICE_NAME: &str = "__debug_console";
118 #[cfg(target_arch = "aarch64")]
119 const GPIO_DEVICE_NAME: &str = "__gpio";
120 const RNG_DEVICE_NAME: &str = "__rng";
121 const IOMMU_DEVICE_NAME: &str = "__iommu";
122 #[cfg(feature = "pvmemcontrol")]
123 const PVMEMCONTROL_DEVICE_NAME: &str = "__pvmemcontrol";
124 const BALLOON_DEVICE_NAME: &str = "__balloon";
125 const CONSOLE_DEVICE_NAME: &str = "__console";
126 const PVPANIC_DEVICE_NAME: &str = "__pvpanic";
127 
128 // Devices that the user may name and for which we generate
129 // identifiers if the user doesn't give one
130 const DISK_DEVICE_NAME_PREFIX: &str = "_disk";
131 const FS_DEVICE_NAME_PREFIX: &str = "_fs";
132 const NET_DEVICE_NAME_PREFIX: &str = "_net";
133 const PMEM_DEVICE_NAME_PREFIX: &str = "_pmem";
134 const VDPA_DEVICE_NAME_PREFIX: &str = "_vdpa";
135 const VSOCK_DEVICE_NAME_PREFIX: &str = "_vsock";
136 const WATCHDOG_DEVICE_NAME: &str = "__watchdog";
137 const VFIO_DEVICE_NAME_PREFIX: &str = "_vfio";
138 const VFIO_USER_DEVICE_NAME_PREFIX: &str = "_vfio_user";
139 const VIRTIO_PCI_DEVICE_NAME_PREFIX: &str = "_virtio-pci";
140 
141 /// Errors associated with device manager
142 #[derive(Debug)]
143 pub enum DeviceManagerError {
144     /// Cannot create EventFd.
145     EventFd(io::Error),
146 
147     /// Cannot open disk path
148     Disk(io::Error),
149 
150     /// Cannot create vhost-user-net device
151     CreateVhostUserNet(virtio_devices::vhost_user::Error),
152 
153     /// Cannot create virtio-blk device
154     CreateVirtioBlock(io::Error),
155 
156     /// Cannot create virtio-net device
157     CreateVirtioNet(virtio_devices::net::Error),
158 
159     /// Cannot create virtio-console device
160     CreateVirtioConsole(io::Error),
161 
162     /// Cannot create virtio-rng device
163     CreateVirtioRng(io::Error),
164 
165     /// Cannot create virtio-fs device
166     CreateVirtioFs(virtio_devices::vhost_user::Error),
167 
168     /// Virtio-fs device was created without a socket.
169     NoVirtioFsSock,
170 
171     /// Cannot create vhost-user-blk device
172     CreateVhostUserBlk(virtio_devices::vhost_user::Error),
173 
174     /// Cannot create virtio-pmem device
175     CreateVirtioPmem(io::Error),
176 
177     /// Cannot create vDPA device
178     CreateVdpa(virtio_devices::vdpa::Error),
179 
180     /// Cannot create virtio-vsock device
181     CreateVirtioVsock(io::Error),
182 
183     /// Cannot create tpm device
184     CreateTpmDevice(anyhow::Error),
185 
186     /// Failed to convert Path to &str for the vDPA device.
187     CreateVdpaConvertPath,
188 
189     /// Failed to convert Path to &str for the virtio-vsock device.
190     CreateVsockConvertPath,
191 
192     /// Cannot create virtio-vsock backend
193     CreateVsockBackend(virtio_devices::vsock::VsockUnixError),
194 
195     /// Cannot create virtio-iommu device
196     CreateVirtioIommu(io::Error),
197 
198     /// Cannot create virtio-balloon device
199     CreateVirtioBalloon(io::Error),
200 
201     /// Cannot create pvmemcontrol device
202     #[cfg(feature = "pvmemcontrol")]
203     CreatePvmemcontrol(io::Error),
204 
205     /// Cannot create virtio-watchdog device
206     CreateVirtioWatchdog(io::Error),
207 
208     /// Failed to parse disk image format
209     DetectImageType(io::Error),
210 
211     /// Cannot open qcow disk path
212     QcowDeviceCreate(qcow::Error),
213 
214     /// Cannot create serial manager
215     CreateSerialManager(SerialManagerError),
216 
217     /// Cannot spawn the serial manager thread
218     SpawnSerialManager(SerialManagerError),
219 
220     /// Cannot open tap interface
221     OpenTap(net_util::TapError),
222 
223     /// Cannot allocate IRQ.
224     AllocateIrq,
225 
226     /// Cannot configure the IRQ.
227     Irq(vmm_sys_util::errno::Error),
228 
229     /// Cannot allocate PCI BARs
230     AllocateBars(pci::PciDeviceError),
231 
232     /// Could not free the BARs associated with a PCI device.
233     FreePciBars(pci::PciDeviceError),
234 
235     /// Cannot register ioevent.
236     RegisterIoevent(anyhow::Error),
237 
238     /// Cannot unregister ioevent.
239     UnRegisterIoevent(anyhow::Error),
240 
241     /// Cannot create virtio device
242     VirtioDevice(virtio_devices::transport::VirtioPciDeviceError),
243 
244     /// Cannot add PCI device
245     AddPciDevice(pci::PciRootError),
246 
247     /// Cannot open persistent memory file
248     PmemFileOpen(io::Error),
249 
250     /// Cannot set persistent memory file size
251     PmemFileSetLen(io::Error),
252 
253     /// Cannot find a memory range for persistent memory
254     PmemRangeAllocation,
255 
256     /// Cannot find a memory range for virtio-fs
257     FsRangeAllocation,
258 
259     /// Error creating serial output file
260     SerialOutputFileOpen(io::Error),
261 
262     #[cfg(target_arch = "x86_64")]
263     /// Error creating debug-console output file
264     DebugconOutputFileOpen(io::Error),
265 
266     /// Error creating console output file
267     ConsoleOutputFileOpen(io::Error),
268 
269     /// Error creating serial pty
270     SerialPtyOpen(io::Error),
271 
272     /// Error creating console pty
273     ConsolePtyOpen(io::Error),
274 
275     /// Error creating console pty
276     DebugconPtyOpen(io::Error),
277 
278     /// Error setting pty raw mode
279     SetPtyRaw(ConsoleDeviceError),
280 
281     /// Error getting pty peer
282     GetPtyPeer(vmm_sys_util::errno::Error),
283 
284     /// Cannot create a VFIO device
285     VfioCreate(vfio_ioctls::VfioError),
286 
287     /// Cannot create a VFIO PCI device
288     VfioPciCreate(pci::VfioPciError),
289 
290     /// Failed to map VFIO MMIO region.
291     VfioMapRegion(pci::VfioPciError),
292 
293     /// Failed to DMA map VFIO device.
294     VfioDmaMap(vfio_ioctls::VfioError),
295 
296     /// Failed to DMA unmap VFIO device.
297     VfioDmaUnmap(pci::VfioPciError),
298 
299     /// Failed to create the passthrough device.
300     CreatePassthroughDevice(anyhow::Error),
301 
302     /// Failed to memory map.
303     Mmap(io::Error),
304 
305     /// Cannot add legacy device to Bus.
306     BusError(vm_device::BusError),
307 
308     /// Failed to allocate IO port
309     AllocateIoPort,
310 
311     /// Failed to allocate MMIO address
312     AllocateMmioAddress,
313 
314     /// Failed to make hotplug notification
315     HotPlugNotification(io::Error),
316 
317     /// Error from a memory manager operation
318     MemoryManager(MemoryManagerError),
319 
320     /// Failed to create new interrupt source group.
321     CreateInterruptGroup(io::Error),
322 
323     /// Failed to update interrupt source group.
324     UpdateInterruptGroup(io::Error),
325 
326     /// Failed to create interrupt controller.
327     CreateInterruptController(interrupt_controller::Error),
328 
329     /// Failed to create a new MmapRegion instance.
330     NewMmapRegion(vm_memory::mmap::MmapRegionError),
331 
332     /// Failed to clone a File.
333     CloneFile(io::Error),
334 
335     /// Failed to create socket file
336     CreateSocketFile(io::Error),
337 
338     /// Failed to spawn the network backend
339     SpawnNetBackend(io::Error),
340 
341     /// Failed to spawn the block backend
342     SpawnBlockBackend(io::Error),
343 
344     /// Missing PCI bus.
345     NoPciBus,
346 
347     /// Could not find an available device name.
348     NoAvailableDeviceName,
349 
350     /// Missing PCI device.
351     MissingPciDevice,
352 
353     /// Failed to remove a PCI device from the PCI bus.
354     RemoveDeviceFromPciBus(pci::PciRootError),
355 
356     /// Failed to remove a bus device from the IO bus.
357     RemoveDeviceFromIoBus(vm_device::BusError),
358 
359     /// Failed to remove a bus device from the MMIO bus.
360     RemoveDeviceFromMmioBus(vm_device::BusError),
361 
362     /// Failed to find the device corresponding to a specific PCI b/d/f.
363     UnknownPciBdf(u32),
364 
365     /// Not allowed to remove this type of device from the VM.
366     RemovalNotAllowed(vm_virtio::VirtioDeviceType),
367 
368     /// Failed to find device corresponding to the given identifier.
369     UnknownDeviceId(String),
370 
371     /// Failed to find an available PCI device ID.
372     NextPciDeviceId(pci::PciRootError),
373 
374     /// Could not reserve the PCI device ID.
375     GetPciDeviceId(pci::PciRootError),
376 
377     /// Could not give the PCI device ID back.
378     PutPciDeviceId(pci::PciRootError),
379 
380     /// No disk path was specified when one was expected
381     NoDiskPath,
382 
383     /// Failed to update guest memory for virtio device.
384     UpdateMemoryForVirtioDevice(virtio_devices::Error),
385 
386     /// Cannot create virtio-mem device
387     CreateVirtioMem(io::Error),
388 
389     /// Cannot find a memory range for virtio-mem memory
390     VirtioMemRangeAllocation,
391 
392     /// Failed to update guest memory for VFIO PCI device.
393     UpdateMemoryForVfioPciDevice(vfio_ioctls::VfioError),
394 
395     /// Trying to use a directory for pmem but no size specified
396     PmemWithDirectorySizeMissing,
397 
398     /// Trying to use a size that is not multiple of 2MiB
399     PmemSizeNotAligned,
400 
401     /// Could not find the node in the device tree.
402     MissingNode,
403 
404     /// Resource was already found.
405     ResourceAlreadyExists,
406 
407     /// Expected resources for virtio-pmem could not be found.
408     MissingVirtioPmemResources,
409 
410     /// Missing PCI b/d/f from the DeviceNode.
411     MissingDeviceNodePciBdf,
412 
413     /// No support for device passthrough
414     NoDevicePassthroughSupport,
415 
416     /// No socket option support for console device
417     NoSocketOptionSupportForConsoleDevice,
418 
419     /// Failed to resize virtio-balloon
420     VirtioBalloonResize(virtio_devices::balloon::Error),
421 
422     /// Missing virtio-balloon, can't proceed as expected.
423     MissingVirtioBalloon,
424 
425     /// Missing virtual IOMMU device
426     MissingVirtualIommu,
427 
428     /// Failed to do power button notification
429     PowerButtonNotification(io::Error),
430 
431     /// Failed to do AArch64 GPIO power button notification
432     #[cfg(target_arch = "aarch64")]
433     AArch64PowerButtonNotification(devices::legacy::GpioDeviceError),
434 
435     /// Failed to set O_DIRECT flag to file descriptor
436     SetDirectIo,
437 
438     /// Failed to create FixedVhdDiskAsync
439     CreateFixedVhdDiskAsync(io::Error),
440 
441     /// Failed to create FixedVhdDiskSync
442     CreateFixedVhdDiskSync(io::Error),
443 
444     /// Failed to create QcowDiskSync
445     CreateQcowDiskSync(qcow::Error),
446 
447     /// Failed to create FixedVhdxDiskSync
448     CreateFixedVhdxDiskSync(vhdx::VhdxError),
449 
450     /// Failed to add DMA mapping handler to virtio-mem device.
451     AddDmaMappingHandlerVirtioMem(virtio_devices::mem::Error),
452 
453     /// Failed to remove DMA mapping handler from virtio-mem device.
454     RemoveDmaMappingHandlerVirtioMem(virtio_devices::mem::Error),
455 
456     /// Failed to create vfio-user client
457     VfioUserCreateClient(vfio_user::Error),
458 
459     /// Failed to create VFIO user device
460     VfioUserCreate(VfioUserPciDeviceError),
461 
462     /// Failed to map region from VFIO user device into guest
463     VfioUserMapRegion(VfioUserPciDeviceError),
464 
465     /// Failed to DMA map VFIO user device.
466     VfioUserDmaMap(VfioUserPciDeviceError),
467 
468     /// Failed to DMA unmap VFIO user device.
469     VfioUserDmaUnmap(VfioUserPciDeviceError),
470 
471     /// Failed to update memory mappings for VFIO user device
472     UpdateMemoryForVfioUserPciDevice(VfioUserPciDeviceError),
473 
474     /// Cannot duplicate file descriptor
475     DupFd(vmm_sys_util::errno::Error),
476 
477     /// Failed to DMA map virtio device.
478     VirtioDmaMap(std::io::Error),
479 
480     /// Failed to DMA unmap virtio device.
481     VirtioDmaUnmap(std::io::Error),
482 
483     /// Cannot hotplug device behind vIOMMU
484     InvalidIommuHotplug,
485 
486     /// Invalid identifier as it is not unique.
487     IdentifierNotUnique(String),
488 
489     /// Invalid identifier
490     InvalidIdentifier(String),
491 
492     /// Error activating virtio device
493     VirtioActivate(ActivateError),
494 
495     /// Failed retrieving device state from snapshot
496     RestoreGetState(MigratableError),
497 
498     /// Cannot create a PvPanic device
499     PvPanicCreate(devices::pvpanic::PvPanicError),
500 
501     /// Cannot create a RateLimiterGroup
502     RateLimiterGroupCreate(rate_limiter::group::Error),
503 
504     /// Cannot start sigwinch listener
505     StartSigwinchListener(std::io::Error),
506 
507     // Invalid console info
508     InvalidConsoleInfo,
509 
510     // Invalid console fd
511     InvalidConsoleFd,
512 }
513 
514 pub type DeviceManagerResult<T> = result::Result<T, DeviceManagerError>;
515 
516 const DEVICE_MANAGER_ACPI_SIZE: usize = 0x10;
517 
518 #[derive(Default)]
519 pub struct Console {
520     console_resizer: Option<Arc<virtio_devices::ConsoleResizer>>,
521 }
522 
523 impl Console {
524     pub fn need_resize(&self) -> bool {
525         if let Some(_resizer) = self.console_resizer.as_ref() {
526             return true;
527         }
528 
529         false
530     }
531 
532     pub fn update_console_size(&self) {
533         if let Some(resizer) = self.console_resizer.as_ref() {
534             resizer.update_console_size()
535         }
536     }
537 }
538 
539 pub(crate) struct AddressManager {
540     pub(crate) allocator: Arc<Mutex<SystemAllocator>>,
541     #[cfg(target_arch = "x86_64")]
542     pub(crate) io_bus: Arc<Bus>,
543     pub(crate) mmio_bus: Arc<Bus>,
544     pub(crate) vm: Arc<dyn hypervisor::Vm>,
545     device_tree: Arc<Mutex<DeviceTree>>,
546     pci_mmio32_allocators: Vec<Arc<Mutex<AddressAllocator>>>,
547     pci_mmio64_allocators: Vec<Arc<Mutex<AddressAllocator>>>,
548 }
549 
550 impl DeviceRelocation for AddressManager {
551     fn move_bar(
552         &self,
553         old_base: u64,
554         new_base: u64,
555         len: u64,
556         pci_dev: &mut dyn PciDevice,
557         region_type: PciBarRegionType,
558     ) -> std::result::Result<(), std::io::Error> {
559         match region_type {
560             PciBarRegionType::IoRegion => {
561                 #[cfg(target_arch = "x86_64")]
562                 {
563                     // Update system allocator
564                     self.allocator
565                         .lock()
566                         .unwrap()
567                         .free_io_addresses(GuestAddress(old_base), len as GuestUsize);
568 
569                     self.allocator
570                         .lock()
571                         .unwrap()
572                         .allocate_io_addresses(
573                             Some(GuestAddress(new_base)),
574                             len as GuestUsize,
575                             None,
576                         )
577                         .ok_or_else(|| {
578                             io::Error::new(io::ErrorKind::Other, "failed allocating new IO range")
579                         })?;
580 
581                     // Update PIO bus
582                     self.io_bus
583                         .update_range(old_base, len, new_base, len)
584                         .map_err(|e| io::Error::new(io::ErrorKind::Other, e))?;
585                 }
586                 #[cfg(target_arch = "aarch64")]
587                 error!("I/O region is not supported");
588             }
589             PciBarRegionType::Memory32BitRegion | PciBarRegionType::Memory64BitRegion => {
590                 let allocators = if region_type == PciBarRegionType::Memory32BitRegion {
591                     &self.pci_mmio32_allocators
592                 } else {
593                     &self.pci_mmio64_allocators
594                 };
595 
596                 // Find the specific allocator that this BAR was allocated from and use it for new one
597                 for allocator in allocators {
598                     let allocator_base = allocator.lock().unwrap().base();
599                     let allocator_end = allocator.lock().unwrap().end();
600 
601                     if old_base >= allocator_base.0 && old_base <= allocator_end.0 {
602                         allocator
603                             .lock()
604                             .unwrap()
605                             .free(GuestAddress(old_base), len as GuestUsize);
606 
607                         allocator
608                             .lock()
609                             .unwrap()
610                             .allocate(Some(GuestAddress(new_base)), len as GuestUsize, Some(len))
611                             .ok_or_else(|| {
612                                 io::Error::new(
613                                     io::ErrorKind::Other,
614                                     "failed allocating new MMIO range",
615                                 )
616                             })?;
617 
618                         break;
619                     }
620                 }
621 
622                 // Update MMIO bus
623                 self.mmio_bus
624                     .update_range(old_base, len, new_base, len)
625                     .map_err(|e| io::Error::new(io::ErrorKind::Other, e))?;
626             }
627         }
628 
629         // Update the device_tree resources associated with the device
630         if let Some(id) = pci_dev.id() {
631             if let Some(node) = self.device_tree.lock().unwrap().get_mut(&id) {
632                 let mut resource_updated = false;
633                 for resource in node.resources.iter_mut() {
634                     if let Resource::PciBar { base, type_, .. } = resource {
635                         if PciBarRegionType::from(*type_) == region_type && *base == old_base {
636                             *base = new_base;
637                             resource_updated = true;
638                             break;
639                         }
640                     }
641                 }
642 
643                 if !resource_updated {
644                     return Err(io::Error::new(
645                         io::ErrorKind::Other,
646                         format!(
647                             "Couldn't find a resource with base 0x{old_base:x} for device {id}"
648                         ),
649                     ));
650                 }
651             } else {
652                 return Err(io::Error::new(
653                     io::ErrorKind::Other,
654                     format!("Couldn't find device {id} from device tree"),
655                 ));
656             }
657         }
658 
659         let any_dev = pci_dev.as_any();
660         if let Some(virtio_pci_dev) = any_dev.downcast_ref::<VirtioPciDevice>() {
661             let bar_addr = virtio_pci_dev.config_bar_addr();
662             if bar_addr == new_base {
663                 for (event, addr) in virtio_pci_dev.ioeventfds(old_base) {
664                     let io_addr = IoEventAddress::Mmio(addr);
665                     self.vm.unregister_ioevent(event, &io_addr).map_err(|e| {
666                         io::Error::new(
667                             io::ErrorKind::Other,
668                             format!("failed to unregister ioevent: {e:?}"),
669                         )
670                     })?;
671                 }
672                 for (event, addr) in virtio_pci_dev.ioeventfds(new_base) {
673                     let io_addr = IoEventAddress::Mmio(addr);
674                     self.vm
675                         .register_ioevent(event, &io_addr, None)
676                         .map_err(|e| {
677                             io::Error::new(
678                                 io::ErrorKind::Other,
679                                 format!("failed to register ioevent: {e:?}"),
680                             )
681                         })?;
682                 }
683             } else {
684                 let virtio_dev = virtio_pci_dev.virtio_device();
685                 let mut virtio_dev = virtio_dev.lock().unwrap();
686                 if let Some(mut shm_regions) = virtio_dev.get_shm_regions() {
687                     if shm_regions.addr.raw_value() == old_base {
688                         let mem_region = self.vm.make_user_memory_region(
689                             shm_regions.mem_slot,
690                             old_base,
691                             shm_regions.len,
692                             shm_regions.host_addr,
693                             false,
694                             false,
695                         );
696 
697                         self.vm.remove_user_memory_region(mem_region).map_err(|e| {
698                             io::Error::new(
699                                 io::ErrorKind::Other,
700                                 format!("failed to remove user memory region: {e:?}"),
701                             )
702                         })?;
703 
704                         // Create new mapping by inserting new region to KVM.
705                         let mem_region = self.vm.make_user_memory_region(
706                             shm_regions.mem_slot,
707                             new_base,
708                             shm_regions.len,
709                             shm_regions.host_addr,
710                             false,
711                             false,
712                         );
713 
714                         self.vm.create_user_memory_region(mem_region).map_err(|e| {
715                             io::Error::new(
716                                 io::ErrorKind::Other,
717                                 format!("failed to create user memory regions: {e:?}"),
718                             )
719                         })?;
720 
721                         // Update shared memory regions to reflect the new mapping.
722                         shm_regions.addr = GuestAddress(new_base);
723                         virtio_dev.set_shm_regions(shm_regions).map_err(|e| {
724                             io::Error::new(
725                                 io::ErrorKind::Other,
726                                 format!("failed to update shared memory regions: {e:?}"),
727                             )
728                         })?;
729                     }
730                 }
731             }
732         }
733 
734         pci_dev.move_bar(old_base, new_base)
735     }
736 }
737 
738 #[derive(Serialize, Deserialize)]
739 struct DeviceManagerState {
740     device_tree: DeviceTree,
741     device_id_cnt: Wrapping<usize>,
742 }
743 
744 #[derive(Debug)]
745 pub struct PtyPair {
746     pub main: File,
747     pub path: PathBuf,
748 }
749 
750 impl Clone for PtyPair {
751     fn clone(&self) -> Self {
752         PtyPair {
753             main: self.main.try_clone().unwrap(),
754             path: self.path.clone(),
755         }
756     }
757 }
758 
759 #[derive(Clone)]
760 pub enum PciDeviceHandle {
761     Vfio(Arc<Mutex<VfioPciDevice>>),
762     Virtio(Arc<Mutex<VirtioPciDevice>>),
763     VfioUser(Arc<Mutex<VfioUserPciDevice>>),
764 }
765 
766 #[derive(Clone)]
767 struct MetaVirtioDevice {
768     virtio_device: Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
769     iommu: bool,
770     id: String,
771     pci_segment: u16,
772     dma_handler: Option<Arc<dyn ExternalDmaMapping>>,
773 }
774 
775 #[derive(Default)]
776 pub struct AcpiPlatformAddresses {
777     pub pm_timer_address: Option<GenericAddress>,
778     pub reset_reg_address: Option<GenericAddress>,
779     pub sleep_control_reg_address: Option<GenericAddress>,
780     pub sleep_status_reg_address: Option<GenericAddress>,
781 }
782 
783 #[cfg(all(feature = "mshv", feature = "sev_snp"))]
784 struct SevSnpPageAccessProxy {
785     vm: Arc<dyn hypervisor::Vm>,
786 }
787 
788 #[cfg(all(feature = "mshv", feature = "sev_snp"))]
789 impl std::fmt::Debug for SevSnpPageAccessProxy {
790     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
791         write!(f, "SNP Page access proxy")
792     }
793 }
794 
795 #[cfg(all(feature = "mshv", feature = "sev_snp"))]
796 impl SevSnpPageAccessProxy {
797     fn new(vm: Arc<dyn hypervisor::Vm>) -> SevSnpPageAccessProxy {
798         SevSnpPageAccessProxy { vm }
799     }
800 }
801 
802 #[cfg(all(feature = "mshv", feature = "sev_snp"))]
803 impl AccessPlatform for SevSnpPageAccessProxy {
804     fn translate_gpa(&self, base: u64, _size: u64) -> std::result::Result<u64, std::io::Error> {
805         Ok(base)
806     }
807 
808     fn translate_gva(&self, base: u64, size: u64) -> std::result::Result<u64, std::io::Error> {
809         self.vm
810             .gain_page_access(base, size as u32)
811             .map_err(|e| io::Error::new(io::ErrorKind::Other, e))?;
812         Ok(base)
813     }
814 }
815 
816 pub struct DeviceManager {
817     // Manage address space related to devices
818     address_manager: Arc<AddressManager>,
819 
820     // Console abstraction
821     console: Arc<Console>,
822 
823     // Serial Manager
824     serial_manager: Option<Arc<SerialManager>>,
825 
826     // pty foreground status,
827     console_resize_pipe: Option<Arc<File>>,
828 
829     // To restore on exit.
830     original_termios_opt: Arc<Mutex<Option<termios>>>,
831 
832     // Interrupt controller
833     #[cfg(target_arch = "x86_64")]
834     interrupt_controller: Option<Arc<Mutex<ioapic::Ioapic>>>,
835     #[cfg(target_arch = "aarch64")]
836     interrupt_controller: Option<Arc<Mutex<gic::Gic>>>,
837 
838     // Things to be added to the commandline (e.g. aarch64 early console)
839     #[cfg(target_arch = "aarch64")]
840     cmdline_additions: Vec<String>,
841 
842     // ACPI GED notification device
843     ged_notification_device: Option<Arc<Mutex<devices::AcpiGedDevice>>>,
844 
845     // VM configuration
846     config: Arc<Mutex<VmConfig>>,
847 
848     // Memory Manager
849     memory_manager: Arc<Mutex<MemoryManager>>,
850 
851     // CPU Manager
852     cpu_manager: Arc<Mutex<CpuManager>>,
853 
854     // The virtio devices on the system
855     virtio_devices: Vec<MetaVirtioDevice>,
856 
857     // List of bus devices
858     // Let the DeviceManager keep strong references to the BusDevice devices.
859     // This allows the IO and MMIO buses to be provided with Weak references,
860     // which prevents cyclic dependencies.
861     bus_devices: Vec<Arc<dyn BusDeviceSync>>,
862 
863     // Counter to keep track of the consumed device IDs.
864     device_id_cnt: Wrapping<usize>,
865 
866     pci_segments: Vec<PciSegment>,
867 
868     #[cfg_attr(target_arch = "aarch64", allow(dead_code))]
869     // MSI Interrupt Manager
870     msi_interrupt_manager: Arc<dyn InterruptManager<GroupConfig = MsiIrqGroupConfig>>,
871 
872     #[cfg_attr(feature = "mshv", allow(dead_code))]
873     // Legacy Interrupt Manager
874     legacy_interrupt_manager: Option<Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>>,
875 
876     // Passthrough device handle
877     passthrough_device: Option<VfioDeviceFd>,
878 
879     // VFIO container
880     // Only one container can be created, therefore it is stored as part of the
881     // DeviceManager to be reused.
882     vfio_container: Option<Arc<VfioContainer>>,
883 
884     // Paravirtualized IOMMU
885     iommu_device: Option<Arc<Mutex<virtio_devices::Iommu>>>,
886     iommu_mapping: Option<Arc<IommuMapping>>,
887 
888     // PCI information about devices attached to the paravirtualized IOMMU
889     // It contains the virtual IOMMU PCI BDF along with the list of PCI BDF
890     // representing the devices attached to the virtual IOMMU. This is useful
891     // information for filling the ACPI VIOT table.
892     iommu_attached_devices: Option<(PciBdf, Vec<PciBdf>)>,
893 
894     // Tree of devices, representing the dependencies between devices.
895     // Useful for introspection, snapshot and restore.
896     device_tree: Arc<Mutex<DeviceTree>>,
897 
898     // Exit event
899     exit_evt: EventFd,
900     reset_evt: EventFd,
901 
902     #[cfg(target_arch = "aarch64")]
903     id_to_dev_info: HashMap<(DeviceType, String), MmioDeviceInfo>,
904 
905     // seccomp action
906     seccomp_action: SeccompAction,
907 
908     // List of guest NUMA nodes.
909     numa_nodes: NumaNodes,
910 
911     // Possible handle to the virtio-balloon device
912     balloon: Option<Arc<Mutex<virtio_devices::Balloon>>>,
913 
914     // Virtio Device activation EventFd to allow the VMM thread to trigger device
915     // activation and thus start the threads from the VMM thread
916     activate_evt: EventFd,
917 
918     acpi_address: GuestAddress,
919 
920     selected_segment: usize,
921 
922     // Possible handle to the virtio-mem device
923     virtio_mem_devices: Vec<Arc<Mutex<virtio_devices::Mem>>>,
924 
925     #[cfg(target_arch = "aarch64")]
926     // GPIO device for AArch64
927     gpio_device: Option<Arc<Mutex<devices::legacy::Gpio>>>,
928 
929     #[cfg(feature = "pvmemcontrol")]
930     pvmemcontrol_devices: Option<(
931         Arc<PvmemcontrolBusDevice>,
932         Arc<Mutex<PvmemcontrolPciDevice>>,
933     )>,
934 
935     // pvpanic device
936     pvpanic_device: Option<Arc<Mutex<devices::PvPanicDevice>>>,
937 
938     // Flag to force setting the iommu on virtio devices
939     force_iommu: bool,
940 
941     // io_uring availability if detected
942     io_uring_supported: Option<bool>,
943 
944     // aio availability if detected
945     aio_supported: Option<bool>,
946 
947     // List of unique identifiers provided at boot through the configuration.
948     boot_id_list: BTreeSet<String>,
949 
950     // Start time of the VM
951     timestamp: Instant,
952 
953     // Pending activations
954     pending_activations: Arc<Mutex<Vec<VirtioPciDeviceActivator>>>,
955 
956     // Addresses for ACPI platform devices e.g. ACPI PM timer, sleep/reset registers
957     acpi_platform_addresses: AcpiPlatformAddresses,
958 
959     snapshot: Option<Snapshot>,
960 
961     rate_limit_groups: HashMap<String, Arc<RateLimiterGroup>>,
962 
963     mmio_regions: Arc<Mutex<Vec<MmioRegion>>>,
964 }
965 
966 fn create_mmio_allocators(
967     start: u64,
968     end: u64,
969     num_pci_segments: u16,
970     weights: Vec<u32>,
971     alignment: u64,
972 ) -> Vec<Arc<Mutex<AddressAllocator>>> {
973     let total_weight: u32 = weights.iter().sum();
974 
975     // Start each PCI segment mmio range on an aligned boundary
976     let pci_segment_mmio_size = (end - start + 1) / (alignment * total_weight as u64) * alignment;
977 
978     let mut mmio_allocators = vec![];
979     let mut i = 0;
980     for segment_id in 0..num_pci_segments as u64 {
981         let weight = weights[segment_id as usize] as u64;
982         let mmio_start = start + i * pci_segment_mmio_size;
983         let mmio_size = pci_segment_mmio_size * weight;
984         let allocator = Arc::new(Mutex::new(
985             AddressAllocator::new(GuestAddress(mmio_start), mmio_size).unwrap(),
986         ));
987         mmio_allocators.push(allocator);
988         i += weight;
989     }
990 
991     mmio_allocators
992 }
993 
994 impl DeviceManager {
995     #[allow(clippy::too_many_arguments)]
996     pub fn new(
997         #[cfg(target_arch = "x86_64")] io_bus: Arc<Bus>,
998         mmio_bus: Arc<Bus>,
999         vm: Arc<dyn hypervisor::Vm>,
1000         config: Arc<Mutex<VmConfig>>,
1001         memory_manager: Arc<Mutex<MemoryManager>>,
1002         cpu_manager: Arc<Mutex<CpuManager>>,
1003         exit_evt: EventFd,
1004         reset_evt: EventFd,
1005         seccomp_action: SeccompAction,
1006         numa_nodes: NumaNodes,
1007         activate_evt: &EventFd,
1008         force_iommu: bool,
1009         boot_id_list: BTreeSet<String>,
1010         timestamp: Instant,
1011         snapshot: Option<Snapshot>,
1012         dynamic: bool,
1013     ) -> DeviceManagerResult<Arc<Mutex<Self>>> {
1014         trace_scoped!("DeviceManager::new");
1015 
1016         let (device_tree, device_id_cnt) = if let Some(snapshot) = snapshot.as_ref() {
1017             let state: DeviceManagerState = snapshot.to_state().unwrap();
1018             (
1019                 Arc::new(Mutex::new(state.device_tree.clone())),
1020                 state.device_id_cnt,
1021             )
1022         } else {
1023             (Arc::new(Mutex::new(DeviceTree::new())), Wrapping(0))
1024         };
1025 
1026         let num_pci_segments =
1027             if let Some(platform_config) = config.lock().unwrap().platform.as_ref() {
1028                 platform_config.num_pci_segments
1029             } else {
1030                 1
1031             };
1032 
1033         let mut mmio32_aperture_weights: Vec<u32> =
1034             std::iter::repeat(DEFAULT_PCI_SEGMENT_APERTURE_WEIGHT)
1035                 .take(num_pci_segments.into())
1036                 .collect();
1037         if let Some(pci_segments) = &config.lock().unwrap().pci_segments {
1038             for pci_segment in pci_segments.iter() {
1039                 mmio32_aperture_weights[pci_segment.pci_segment as usize] =
1040                     pci_segment.mmio32_aperture_weight
1041             }
1042         }
1043 
1044         let start_of_mmio32_area = layout::MEM_32BIT_DEVICES_START.0;
1045         let end_of_mmio32_area = layout::MEM_32BIT_DEVICES_START.0 + layout::MEM_32BIT_DEVICES_SIZE;
1046         let pci_mmio32_allocators = create_mmio_allocators(
1047             start_of_mmio32_area,
1048             end_of_mmio32_area,
1049             num_pci_segments,
1050             mmio32_aperture_weights,
1051             4 << 10,
1052         );
1053 
1054         let mut mmio64_aperture_weights: Vec<u32> =
1055             std::iter::repeat(DEFAULT_PCI_SEGMENT_APERTURE_WEIGHT)
1056                 .take(num_pci_segments.into())
1057                 .collect();
1058         if let Some(pci_segments) = &config.lock().unwrap().pci_segments {
1059             for pci_segment in pci_segments.iter() {
1060                 mmio64_aperture_weights[pci_segment.pci_segment as usize] =
1061                     pci_segment.mmio64_aperture_weight
1062             }
1063         }
1064 
1065         let start_of_mmio64_area = memory_manager.lock().unwrap().start_of_device_area().0;
1066         let end_of_mmio64_area = memory_manager.lock().unwrap().end_of_device_area().0;
1067         let pci_mmio64_allocators = create_mmio_allocators(
1068             start_of_mmio64_area,
1069             end_of_mmio64_area,
1070             num_pci_segments,
1071             mmio64_aperture_weights,
1072             4 << 30,
1073         );
1074 
1075         let address_manager = Arc::new(AddressManager {
1076             allocator: memory_manager.lock().unwrap().allocator(),
1077             #[cfg(target_arch = "x86_64")]
1078             io_bus,
1079             mmio_bus,
1080             vm: vm.clone(),
1081             device_tree: Arc::clone(&device_tree),
1082             pci_mmio32_allocators,
1083             pci_mmio64_allocators,
1084         });
1085 
1086         // First we create the MSI interrupt manager, the legacy one is created
1087         // later, after the IOAPIC device creation.
1088         // The reason we create the MSI one first is because the IOAPIC needs it,
1089         // and then the legacy interrupt manager needs an IOAPIC. So we're
1090         // handling a linear dependency chain:
1091         // msi_interrupt_manager <- IOAPIC <- legacy_interrupt_manager.
1092         let msi_interrupt_manager: Arc<dyn InterruptManager<GroupConfig = MsiIrqGroupConfig>> =
1093             Arc::new(MsiInterruptManager::new(
1094                 Arc::clone(&address_manager.allocator),
1095                 vm,
1096             ));
1097 
1098         let acpi_address = address_manager
1099             .allocator
1100             .lock()
1101             .unwrap()
1102             .allocate_platform_mmio_addresses(None, DEVICE_MANAGER_ACPI_SIZE as u64, None)
1103             .ok_or(DeviceManagerError::AllocateIoPort)?;
1104 
1105         let mut pci_irq_slots = [0; 32];
1106         PciSegment::reserve_legacy_interrupts_for_pci_devices(
1107             &address_manager,
1108             &mut pci_irq_slots,
1109         )?;
1110 
1111         let mut pci_segments = vec![PciSegment::new_default_segment(
1112             &address_manager,
1113             Arc::clone(&address_manager.pci_mmio32_allocators[0]),
1114             Arc::clone(&address_manager.pci_mmio64_allocators[0]),
1115             &pci_irq_slots,
1116         )?];
1117 
1118         for i in 1..num_pci_segments as usize {
1119             pci_segments.push(PciSegment::new(
1120                 i as u16,
1121                 numa_node_id_from_pci_segment_id(&numa_nodes, i as u16),
1122                 &address_manager,
1123                 Arc::clone(&address_manager.pci_mmio32_allocators[i]),
1124                 Arc::clone(&address_manager.pci_mmio64_allocators[i]),
1125                 &pci_irq_slots,
1126             )?);
1127         }
1128 
1129         if dynamic {
1130             let acpi_address = address_manager
1131                 .allocator
1132                 .lock()
1133                 .unwrap()
1134                 .allocate_platform_mmio_addresses(None, CPU_MANAGER_ACPI_SIZE as u64, None)
1135                 .ok_or(DeviceManagerError::AllocateMmioAddress)?;
1136 
1137             address_manager
1138                 .mmio_bus
1139                 .insert(
1140                     cpu_manager.clone(),
1141                     acpi_address.0,
1142                     CPU_MANAGER_ACPI_SIZE as u64,
1143                 )
1144                 .map_err(DeviceManagerError::BusError)?;
1145 
1146             cpu_manager.lock().unwrap().set_acpi_address(acpi_address);
1147         }
1148 
1149         let mut rate_limit_groups = HashMap::<String, Arc<RateLimiterGroup>>::new();
1150         if let Some(rate_limit_groups_cfg) = config.lock().unwrap().rate_limit_groups.as_ref() {
1151             for rate_limit_group_cfg in rate_limit_groups_cfg {
1152                 let rate_limit_cfg = rate_limit_group_cfg.rate_limiter_config;
1153                 let bw = rate_limit_cfg.bandwidth.unwrap_or_default();
1154                 let ops = rate_limit_cfg.ops.unwrap_or_default();
1155                 let mut rate_limit_group = RateLimiterGroup::new(
1156                     &rate_limit_group_cfg.id,
1157                     bw.size,
1158                     bw.one_time_burst.unwrap_or(0),
1159                     bw.refill_time,
1160                     ops.size,
1161                     ops.one_time_burst.unwrap_or(0),
1162                     ops.refill_time,
1163                 )
1164                 .map_err(DeviceManagerError::RateLimiterGroupCreate)?;
1165 
1166                 let exit_evt = exit_evt.try_clone().map_err(DeviceManagerError::EventFd)?;
1167 
1168                 rate_limit_group.start_thread(exit_evt).unwrap();
1169                 rate_limit_groups
1170                     .insert(rate_limit_group_cfg.id.clone(), Arc::new(rate_limit_group));
1171             }
1172         }
1173 
1174         let device_manager = DeviceManager {
1175             address_manager: Arc::clone(&address_manager),
1176             console: Arc::new(Console::default()),
1177             interrupt_controller: None,
1178             #[cfg(target_arch = "aarch64")]
1179             cmdline_additions: Vec::new(),
1180             ged_notification_device: None,
1181             config,
1182             memory_manager,
1183             cpu_manager,
1184             virtio_devices: Vec::new(),
1185             bus_devices: Vec::new(),
1186             device_id_cnt,
1187             msi_interrupt_manager,
1188             legacy_interrupt_manager: None,
1189             passthrough_device: None,
1190             vfio_container: None,
1191             iommu_device: None,
1192             iommu_mapping: None,
1193             iommu_attached_devices: None,
1194             pci_segments,
1195             device_tree,
1196             exit_evt,
1197             reset_evt,
1198             #[cfg(target_arch = "aarch64")]
1199             id_to_dev_info: HashMap::new(),
1200             seccomp_action,
1201             numa_nodes,
1202             balloon: None,
1203             activate_evt: activate_evt
1204                 .try_clone()
1205                 .map_err(DeviceManagerError::EventFd)?,
1206             acpi_address,
1207             selected_segment: 0,
1208             serial_manager: None,
1209             console_resize_pipe: None,
1210             original_termios_opt: Arc::new(Mutex::new(None)),
1211             virtio_mem_devices: Vec::new(),
1212             #[cfg(target_arch = "aarch64")]
1213             gpio_device: None,
1214             #[cfg(feature = "pvmemcontrol")]
1215             pvmemcontrol_devices: None,
1216             pvpanic_device: None,
1217             force_iommu,
1218             io_uring_supported: None,
1219             aio_supported: None,
1220             boot_id_list,
1221             timestamp,
1222             pending_activations: Arc::new(Mutex::new(Vec::default())),
1223             acpi_platform_addresses: AcpiPlatformAddresses::default(),
1224             snapshot,
1225             rate_limit_groups,
1226             mmio_regions: Arc::new(Mutex::new(Vec::new())),
1227         };
1228 
1229         let device_manager = Arc::new(Mutex::new(device_manager));
1230 
1231         address_manager
1232             .mmio_bus
1233             .insert(
1234                 Arc::clone(&device_manager) as Arc<dyn BusDeviceSync>,
1235                 acpi_address.0,
1236                 DEVICE_MANAGER_ACPI_SIZE as u64,
1237             )
1238             .map_err(DeviceManagerError::BusError)?;
1239 
1240         Ok(device_manager)
1241     }
1242 
1243     pub fn console_resize_pipe(&self) -> Option<Arc<File>> {
1244         self.console_resize_pipe.clone()
1245     }
1246 
1247     pub fn create_devices(
1248         &mut self,
1249         console_info: Option<ConsoleInfo>,
1250         console_resize_pipe: Option<Arc<File>>,
1251         original_termios_opt: Arc<Mutex<Option<termios>>>,
1252     ) -> DeviceManagerResult<()> {
1253         trace_scoped!("create_devices");
1254 
1255         let mut virtio_devices: Vec<MetaVirtioDevice> = Vec::new();
1256 
1257         let interrupt_controller = self.add_interrupt_controller()?;
1258 
1259         self.cpu_manager
1260             .lock()
1261             .unwrap()
1262             .set_interrupt_controller(interrupt_controller.clone());
1263 
1264         // Now we can create the legacy interrupt manager, which needs the freshly
1265         // formed IOAPIC device.
1266         let legacy_interrupt_manager: Arc<
1267             dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>,
1268         > = Arc::new(LegacyUserspaceInterruptManager::new(Arc::clone(
1269             &interrupt_controller,
1270         )));
1271 
1272         {
1273             if let Some(acpi_address) = self.memory_manager.lock().unwrap().acpi_address() {
1274                 self.address_manager
1275                     .mmio_bus
1276                     .insert(
1277                         Arc::clone(&self.memory_manager) as Arc<dyn BusDeviceSync>,
1278                         acpi_address.0,
1279                         MEMORY_MANAGER_ACPI_SIZE as u64,
1280                     )
1281                     .map_err(DeviceManagerError::BusError)?;
1282             }
1283         }
1284 
1285         #[cfg(target_arch = "x86_64")]
1286         self.add_legacy_devices(
1287             self.reset_evt
1288                 .try_clone()
1289                 .map_err(DeviceManagerError::EventFd)?,
1290         )?;
1291 
1292         #[cfg(target_arch = "aarch64")]
1293         self.add_legacy_devices(&legacy_interrupt_manager)?;
1294 
1295         {
1296             self.ged_notification_device = self.add_acpi_devices(
1297                 &legacy_interrupt_manager,
1298                 self.reset_evt
1299                     .try_clone()
1300                     .map_err(DeviceManagerError::EventFd)?,
1301                 self.exit_evt
1302                     .try_clone()
1303                     .map_err(DeviceManagerError::EventFd)?,
1304             )?;
1305         }
1306 
1307         self.original_termios_opt = original_termios_opt;
1308 
1309         self.console = self.add_console_devices(
1310             &legacy_interrupt_manager,
1311             &mut virtio_devices,
1312             console_info,
1313             console_resize_pipe,
1314         )?;
1315 
1316         if let Some(tpm) = self.config.clone().lock().unwrap().tpm.as_ref() {
1317             let tpm_dev = self.add_tpm_device(tpm.socket.clone())?;
1318             self.bus_devices
1319                 .push(Arc::clone(&tpm_dev) as Arc<dyn BusDeviceSync>)
1320         }
1321         self.legacy_interrupt_manager = Some(legacy_interrupt_manager);
1322 
1323         virtio_devices.append(&mut self.make_virtio_devices()?);
1324 
1325         self.add_pci_devices(virtio_devices.clone())?;
1326 
1327         self.virtio_devices = virtio_devices;
1328 
1329         // Add pvmemcontrol if required
1330         #[cfg(feature = "pvmemcontrol")]
1331         {
1332             if self.config.lock().unwrap().pvmemcontrol.is_some() {
1333                 let (pvmemcontrol_bus_device, pvmemcontrol_pci_device) =
1334                     self.make_pvmemcontrol_device()?;
1335                 self.pvmemcontrol_devices =
1336                     Some((pvmemcontrol_bus_device, pvmemcontrol_pci_device));
1337             }
1338         }
1339 
1340         if self.config.clone().lock().unwrap().pvpanic {
1341             self.pvpanic_device = self.add_pvpanic_device()?;
1342         }
1343 
1344         Ok(())
1345     }
1346 
1347     fn state(&self) -> DeviceManagerState {
1348         DeviceManagerState {
1349             device_tree: self.device_tree.lock().unwrap().clone(),
1350             device_id_cnt: self.device_id_cnt,
1351         }
1352     }
1353 
1354     fn get_msi_iova_space(&mut self) -> (u64, u64) {
1355         #[cfg(target_arch = "aarch64")]
1356         {
1357             let vcpus = self.config.lock().unwrap().cpus.boot_vcpus;
1358             let vgic_config = gic::Gic::create_default_config(vcpus.into());
1359             (
1360                 vgic_config.msi_addr,
1361                 vgic_config.msi_addr + vgic_config.msi_size - 1,
1362             )
1363         }
1364         #[cfg(target_arch = "x86_64")]
1365         (0xfee0_0000, 0xfeef_ffff)
1366     }
1367 
1368     #[cfg(target_arch = "aarch64")]
1369     /// Gets the information of the devices registered up to some point in time.
1370     pub fn get_device_info(&self) -> &HashMap<(DeviceType, String), MmioDeviceInfo> {
1371         &self.id_to_dev_info
1372     }
1373 
1374     #[allow(unused_variables)]
1375     fn add_pci_devices(
1376         &mut self,
1377         virtio_devices: Vec<MetaVirtioDevice>,
1378     ) -> DeviceManagerResult<()> {
1379         let iommu_id = String::from(IOMMU_DEVICE_NAME);
1380 
1381         let iommu_device = if self.config.lock().unwrap().iommu {
1382             let (device, mapping) = virtio_devices::Iommu::new(
1383                 iommu_id.clone(),
1384                 self.seccomp_action.clone(),
1385                 self.exit_evt
1386                     .try_clone()
1387                     .map_err(DeviceManagerError::EventFd)?,
1388                 self.get_msi_iova_space(),
1389                 state_from_id(self.snapshot.as_ref(), iommu_id.as_str())
1390                     .map_err(DeviceManagerError::RestoreGetState)?,
1391             )
1392             .map_err(DeviceManagerError::CreateVirtioIommu)?;
1393             let device = Arc::new(Mutex::new(device));
1394             self.iommu_device = Some(Arc::clone(&device));
1395             self.iommu_mapping = Some(mapping);
1396 
1397             // Fill the device tree with a new node. In case of restore, we
1398             // know there is nothing to do, so we can simply override the
1399             // existing entry.
1400             self.device_tree
1401                 .lock()
1402                 .unwrap()
1403                 .insert(iommu_id.clone(), device_node!(iommu_id, device));
1404 
1405             Some(device)
1406         } else {
1407             None
1408         };
1409 
1410         let mut iommu_attached_devices = Vec::new();
1411         {
1412             for handle in virtio_devices {
1413                 let mapping: Option<Arc<IommuMapping>> = if handle.iommu {
1414                     self.iommu_mapping.clone()
1415                 } else {
1416                     None
1417                 };
1418 
1419                 let dev_id = self.add_virtio_pci_device(
1420                     handle.virtio_device,
1421                     &mapping,
1422                     handle.id,
1423                     handle.pci_segment,
1424                     handle.dma_handler,
1425                 )?;
1426 
1427                 if handle.iommu {
1428                     iommu_attached_devices.push(dev_id);
1429                 }
1430             }
1431 
1432             let mut vfio_iommu_device_ids = self.add_vfio_devices()?;
1433             iommu_attached_devices.append(&mut vfio_iommu_device_ids);
1434 
1435             let mut vfio_user_iommu_device_ids = self.add_user_devices()?;
1436             iommu_attached_devices.append(&mut vfio_user_iommu_device_ids);
1437 
1438             // Add all devices from forced iommu segments
1439             if let Some(platform_config) = self.config.lock().unwrap().platform.as_ref() {
1440                 if let Some(iommu_segments) = platform_config.iommu_segments.as_ref() {
1441                     for segment in iommu_segments {
1442                         for device in 0..32 {
1443                             let bdf = PciBdf::new(*segment, 0, device, 0);
1444                             if !iommu_attached_devices.contains(&bdf) {
1445                                 iommu_attached_devices.push(bdf);
1446                             }
1447                         }
1448                     }
1449                 }
1450             }
1451 
1452             if let Some(iommu_device) = iommu_device {
1453                 let dev_id = self.add_virtio_pci_device(iommu_device, &None, iommu_id, 0, None)?;
1454                 self.iommu_attached_devices = Some((dev_id, iommu_attached_devices));
1455             }
1456         }
1457 
1458         for segment in &self.pci_segments {
1459             #[cfg(target_arch = "x86_64")]
1460             if let Some(pci_config_io) = segment.pci_config_io.as_ref() {
1461                 self.bus_devices
1462                     .push(Arc::clone(pci_config_io) as Arc<dyn BusDeviceSync>);
1463             }
1464 
1465             self.bus_devices
1466                 .push(Arc::clone(&segment.pci_config_mmio) as Arc<dyn BusDeviceSync>);
1467         }
1468 
1469         Ok(())
1470     }
1471 
1472     #[cfg(target_arch = "aarch64")]
1473     fn add_interrupt_controller(
1474         &mut self,
1475     ) -> DeviceManagerResult<Arc<Mutex<dyn InterruptController>>> {
1476         let interrupt_controller: Arc<Mutex<gic::Gic>> = Arc::new(Mutex::new(
1477             gic::Gic::new(
1478                 self.config.lock().unwrap().cpus.boot_vcpus,
1479                 Arc::clone(&self.msi_interrupt_manager),
1480                 self.address_manager.vm.clone(),
1481             )
1482             .map_err(DeviceManagerError::CreateInterruptController)?,
1483         ));
1484 
1485         self.interrupt_controller = Some(interrupt_controller.clone());
1486 
1487         // Restore the vGic if this is in the process of restoration
1488         let id = String::from(gic::GIC_SNAPSHOT_ID);
1489         if let Some(vgic_snapshot) = snapshot_from_id(self.snapshot.as_ref(), &id) {
1490             // PMU support is optional. Nothing should be impacted if the PMU initialization failed.
1491             if self
1492                 .cpu_manager
1493                 .lock()
1494                 .unwrap()
1495                 .init_pmu(arch::aarch64::fdt::AARCH64_PMU_IRQ + 16)
1496                 .is_err()
1497             {
1498                 info!("Failed to initialize PMU");
1499             }
1500 
1501             let vgic_state = vgic_snapshot
1502                 .to_state()
1503                 .map_err(DeviceManagerError::RestoreGetState)?;
1504             let saved_vcpu_states = self.cpu_manager.lock().unwrap().get_saved_states();
1505             interrupt_controller
1506                 .lock()
1507                 .unwrap()
1508                 .restore_vgic(vgic_state, &saved_vcpu_states)
1509                 .unwrap();
1510         }
1511 
1512         self.device_tree
1513             .lock()
1514             .unwrap()
1515             .insert(id.clone(), device_node!(id, interrupt_controller));
1516 
1517         Ok(interrupt_controller)
1518     }
1519 
1520     #[cfg(target_arch = "aarch64")]
1521     pub fn get_interrupt_controller(&mut self) -> Option<&Arc<Mutex<gic::Gic>>> {
1522         self.interrupt_controller.as_ref()
1523     }
1524 
1525     #[cfg(target_arch = "x86_64")]
1526     fn add_interrupt_controller(
1527         &mut self,
1528     ) -> DeviceManagerResult<Arc<Mutex<dyn InterruptController>>> {
1529         let id = String::from(IOAPIC_DEVICE_NAME);
1530 
1531         // Create IOAPIC
1532         let interrupt_controller = Arc::new(Mutex::new(
1533             ioapic::Ioapic::new(
1534                 id.clone(),
1535                 APIC_START,
1536                 Arc::clone(&self.msi_interrupt_manager),
1537                 state_from_id(self.snapshot.as_ref(), id.as_str())
1538                     .map_err(DeviceManagerError::RestoreGetState)?,
1539             )
1540             .map_err(DeviceManagerError::CreateInterruptController)?,
1541         ));
1542 
1543         self.interrupt_controller = Some(interrupt_controller.clone());
1544 
1545         self.address_manager
1546             .mmio_bus
1547             .insert(interrupt_controller.clone(), IOAPIC_START.0, IOAPIC_SIZE)
1548             .map_err(DeviceManagerError::BusError)?;
1549 
1550         self.bus_devices
1551             .push(Arc::clone(&interrupt_controller) as Arc<dyn BusDeviceSync>);
1552 
1553         // Fill the device tree with a new node. In case of restore, we
1554         // know there is nothing to do, so we can simply override the
1555         // existing entry.
1556         self.device_tree
1557             .lock()
1558             .unwrap()
1559             .insert(id.clone(), device_node!(id, interrupt_controller));
1560 
1561         Ok(interrupt_controller)
1562     }
1563 
1564     fn add_acpi_devices(
1565         &mut self,
1566         interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>,
1567         reset_evt: EventFd,
1568         exit_evt: EventFd,
1569     ) -> DeviceManagerResult<Option<Arc<Mutex<devices::AcpiGedDevice>>>> {
1570         let vcpus_kill_signalled = self
1571             .cpu_manager
1572             .lock()
1573             .unwrap()
1574             .vcpus_kill_signalled()
1575             .clone();
1576         let shutdown_device = Arc::new(Mutex::new(devices::AcpiShutdownDevice::new(
1577             exit_evt,
1578             reset_evt,
1579             vcpus_kill_signalled,
1580         )));
1581 
1582         self.bus_devices
1583             .push(Arc::clone(&shutdown_device) as Arc<dyn BusDeviceSync>);
1584 
1585         #[cfg(target_arch = "x86_64")]
1586         {
1587             let shutdown_pio_address: u16 = 0x600;
1588 
1589             self.address_manager
1590                 .allocator
1591                 .lock()
1592                 .unwrap()
1593                 .allocate_io_addresses(Some(GuestAddress(shutdown_pio_address.into())), 0x8, None)
1594                 .ok_or(DeviceManagerError::AllocateIoPort)?;
1595 
1596             self.address_manager
1597                 .io_bus
1598                 .insert(shutdown_device, shutdown_pio_address.into(), 0x4)
1599                 .map_err(DeviceManagerError::BusError)?;
1600 
1601             self.acpi_platform_addresses.sleep_control_reg_address =
1602                 Some(GenericAddress::io_port_address::<u8>(shutdown_pio_address));
1603             self.acpi_platform_addresses.sleep_status_reg_address =
1604                 Some(GenericAddress::io_port_address::<u8>(shutdown_pio_address));
1605             self.acpi_platform_addresses.reset_reg_address =
1606                 Some(GenericAddress::io_port_address::<u8>(shutdown_pio_address));
1607         }
1608 
1609         let ged_irq = self
1610             .address_manager
1611             .allocator
1612             .lock()
1613             .unwrap()
1614             .allocate_irq()
1615             .unwrap();
1616         let interrupt_group = interrupt_manager
1617             .create_group(LegacyIrqGroupConfig {
1618                 irq: ged_irq as InterruptIndex,
1619             })
1620             .map_err(DeviceManagerError::CreateInterruptGroup)?;
1621         let ged_address = self
1622             .address_manager
1623             .allocator
1624             .lock()
1625             .unwrap()
1626             .allocate_platform_mmio_addresses(
1627                 None,
1628                 devices::acpi::GED_DEVICE_ACPI_SIZE as u64,
1629                 None,
1630             )
1631             .ok_or(DeviceManagerError::AllocateMmioAddress)?;
1632         let ged_device = Arc::new(Mutex::new(devices::AcpiGedDevice::new(
1633             interrupt_group,
1634             ged_irq,
1635             ged_address,
1636         )));
1637         self.address_manager
1638             .mmio_bus
1639             .insert(
1640                 ged_device.clone(),
1641                 ged_address.0,
1642                 devices::acpi::GED_DEVICE_ACPI_SIZE as u64,
1643             )
1644             .map_err(DeviceManagerError::BusError)?;
1645         self.bus_devices
1646             .push(Arc::clone(&ged_device) as Arc<dyn BusDeviceSync>);
1647 
1648         let pm_timer_device = Arc::new(Mutex::new(devices::AcpiPmTimerDevice::new()));
1649 
1650         self.bus_devices
1651             .push(Arc::clone(&pm_timer_device) as Arc<dyn BusDeviceSync>);
1652 
1653         #[cfg(target_arch = "x86_64")]
1654         {
1655             let pm_timer_pio_address: u16 = 0x608;
1656 
1657             self.address_manager
1658                 .allocator
1659                 .lock()
1660                 .unwrap()
1661                 .allocate_io_addresses(Some(GuestAddress(pm_timer_pio_address.into())), 0x4, None)
1662                 .ok_or(DeviceManagerError::AllocateIoPort)?;
1663 
1664             self.address_manager
1665                 .io_bus
1666                 .insert(pm_timer_device, pm_timer_pio_address.into(), 0x4)
1667                 .map_err(DeviceManagerError::BusError)?;
1668 
1669             self.acpi_platform_addresses.pm_timer_address =
1670                 Some(GenericAddress::io_port_address::<u32>(pm_timer_pio_address));
1671         }
1672 
1673         Ok(Some(ged_device))
1674     }
1675 
1676     #[cfg(target_arch = "x86_64")]
1677     fn add_legacy_devices(&mut self, reset_evt: EventFd) -> DeviceManagerResult<()> {
1678         let vcpus_kill_signalled = self
1679             .cpu_manager
1680             .lock()
1681             .unwrap()
1682             .vcpus_kill_signalled()
1683             .clone();
1684         // Add a shutdown device (i8042)
1685         let i8042 = Arc::new(Mutex::new(devices::legacy::I8042Device::new(
1686             reset_evt.try_clone().unwrap(),
1687             vcpus_kill_signalled.clone(),
1688         )));
1689 
1690         self.bus_devices
1691             .push(Arc::clone(&i8042) as Arc<dyn BusDeviceSync>);
1692 
1693         self.address_manager
1694             .io_bus
1695             .insert(i8042, 0x61, 0x4)
1696             .map_err(DeviceManagerError::BusError)?;
1697         {
1698             // Add a CMOS emulated device
1699             let mem_size = self
1700                 .memory_manager
1701                 .lock()
1702                 .unwrap()
1703                 .guest_memory()
1704                 .memory()
1705                 .last_addr()
1706                 .0
1707                 + 1;
1708             let mem_below_4g = std::cmp::min(arch::layout::MEM_32BIT_RESERVED_START.0, mem_size);
1709             let mem_above_4g = mem_size.saturating_sub(arch::layout::RAM_64BIT_START.0);
1710 
1711             let cmos = Arc::new(Mutex::new(devices::legacy::Cmos::new(
1712                 mem_below_4g,
1713                 mem_above_4g,
1714                 reset_evt,
1715                 Some(vcpus_kill_signalled),
1716             )));
1717 
1718             self.bus_devices
1719                 .push(Arc::clone(&cmos) as Arc<dyn BusDeviceSync>);
1720 
1721             self.address_manager
1722                 .io_bus
1723                 .insert(cmos, 0x70, 0x2)
1724                 .map_err(DeviceManagerError::BusError)?;
1725 
1726             let fwdebug = Arc::new(Mutex::new(devices::legacy::FwDebugDevice::new()));
1727 
1728             self.bus_devices
1729                 .push(Arc::clone(&fwdebug) as Arc<dyn BusDeviceSync>);
1730 
1731             self.address_manager
1732                 .io_bus
1733                 .insert(fwdebug, 0x402, 0x1)
1734                 .map_err(DeviceManagerError::BusError)?;
1735         }
1736 
1737         // 0x80 debug port
1738         let debug_port = Arc::new(Mutex::new(devices::legacy::DebugPort::new(self.timestamp)));
1739         self.bus_devices
1740             .push(Arc::clone(&debug_port) as Arc<dyn BusDeviceSync>);
1741         self.address_manager
1742             .io_bus
1743             .insert(debug_port, 0x80, 0x1)
1744             .map_err(DeviceManagerError::BusError)?;
1745 
1746         Ok(())
1747     }
1748 
1749     #[cfg(target_arch = "aarch64")]
1750     fn add_legacy_devices(
1751         &mut self,
1752         interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>,
1753     ) -> DeviceManagerResult<()> {
1754         // Add a RTC device
1755         let rtc_irq = self
1756             .address_manager
1757             .allocator
1758             .lock()
1759             .unwrap()
1760             .allocate_irq()
1761             .unwrap();
1762 
1763         let interrupt_group = interrupt_manager
1764             .create_group(LegacyIrqGroupConfig {
1765                 irq: rtc_irq as InterruptIndex,
1766             })
1767             .map_err(DeviceManagerError::CreateInterruptGroup)?;
1768 
1769         let rtc_device = Arc::new(Mutex::new(devices::legacy::Rtc::new(interrupt_group)));
1770 
1771         self.bus_devices
1772             .push(Arc::clone(&rtc_device) as Arc<dyn BusDeviceSync>);
1773 
1774         let addr = arch::layout::LEGACY_RTC_MAPPED_IO_START;
1775 
1776         self.address_manager
1777             .mmio_bus
1778             .insert(rtc_device, addr.0, MMIO_LEN)
1779             .map_err(DeviceManagerError::BusError)?;
1780 
1781         self.id_to_dev_info.insert(
1782             (DeviceType::Rtc, "rtc".to_string()),
1783             MmioDeviceInfo {
1784                 addr: addr.0,
1785                 len: MMIO_LEN,
1786                 irq: rtc_irq,
1787             },
1788         );
1789 
1790         // Add a GPIO device
1791         let id = String::from(GPIO_DEVICE_NAME);
1792         let gpio_irq = self
1793             .address_manager
1794             .allocator
1795             .lock()
1796             .unwrap()
1797             .allocate_irq()
1798             .unwrap();
1799 
1800         let interrupt_group = interrupt_manager
1801             .create_group(LegacyIrqGroupConfig {
1802                 irq: gpio_irq as InterruptIndex,
1803             })
1804             .map_err(DeviceManagerError::CreateInterruptGroup)?;
1805 
1806         let gpio_device = Arc::new(Mutex::new(devices::legacy::Gpio::new(
1807             id.clone(),
1808             interrupt_group,
1809             state_from_id(self.snapshot.as_ref(), id.as_str())
1810                 .map_err(DeviceManagerError::RestoreGetState)?,
1811         )));
1812 
1813         self.bus_devices
1814             .push(Arc::clone(&gpio_device) as Arc<dyn BusDeviceSync>);
1815 
1816         let addr = arch::layout::LEGACY_GPIO_MAPPED_IO_START;
1817 
1818         self.address_manager
1819             .mmio_bus
1820             .insert(gpio_device.clone(), addr.0, MMIO_LEN)
1821             .map_err(DeviceManagerError::BusError)?;
1822 
1823         self.gpio_device = Some(gpio_device.clone());
1824 
1825         self.id_to_dev_info.insert(
1826             (DeviceType::Gpio, "gpio".to_string()),
1827             MmioDeviceInfo {
1828                 addr: addr.0,
1829                 len: MMIO_LEN,
1830                 irq: gpio_irq,
1831             },
1832         );
1833 
1834         self.device_tree
1835             .lock()
1836             .unwrap()
1837             .insert(id.clone(), device_node!(id, gpio_device));
1838 
1839         Ok(())
1840     }
1841 
1842     #[cfg(target_arch = "x86_64")]
1843     fn add_debug_console_device(
1844         &mut self,
1845         debug_console_writer: Box<dyn io::Write + Send>,
1846     ) -> DeviceManagerResult<Arc<Mutex<DebugConsole>>> {
1847         let id = String::from(DEBUGCON_DEVICE_NAME);
1848         let debug_console = Arc::new(Mutex::new(DebugConsole::new(
1849             id.clone(),
1850             debug_console_writer,
1851         )));
1852 
1853         let port = self
1854             .config
1855             .lock()
1856             .unwrap()
1857             .debug_console
1858             .clone()
1859             .iobase
1860             .map(|port| port as u64)
1861             .unwrap_or(debug_console::DEFAULT_PORT);
1862 
1863         self.bus_devices
1864             .push(Arc::clone(&debug_console) as Arc<dyn BusDeviceSync>);
1865 
1866         self.address_manager
1867             .allocator
1868             .lock()
1869             .unwrap()
1870             .allocate_io_addresses(Some(GuestAddress(port)), 0x1, None)
1871             .ok_or(DeviceManagerError::AllocateIoPort)?;
1872 
1873         self.address_manager
1874             .io_bus
1875             .insert(debug_console.clone(), port, 0x1)
1876             .map_err(DeviceManagerError::BusError)?;
1877 
1878         // Fill the device tree with a new node. In case of restore, we
1879         // know there is nothing to do, so we can simply override the
1880         // existing entry.
1881         self.device_tree
1882             .lock()
1883             .unwrap()
1884             .insert(id.clone(), device_node!(id, debug_console));
1885 
1886         Ok(debug_console)
1887     }
1888 
1889     #[cfg(target_arch = "x86_64")]
1890     fn add_serial_device(
1891         &mut self,
1892         interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>,
1893         serial_writer: Option<Box<dyn io::Write + Send>>,
1894     ) -> DeviceManagerResult<Arc<Mutex<Serial>>> {
1895         // Serial is tied to IRQ #4
1896         let serial_irq = 4;
1897 
1898         let id = String::from(SERIAL_DEVICE_NAME);
1899 
1900         let interrupt_group = interrupt_manager
1901             .create_group(LegacyIrqGroupConfig {
1902                 irq: serial_irq as InterruptIndex,
1903             })
1904             .map_err(DeviceManagerError::CreateInterruptGroup)?;
1905 
1906         let serial = Arc::new(Mutex::new(Serial::new(
1907             id.clone(),
1908             interrupt_group,
1909             serial_writer,
1910             state_from_id(self.snapshot.as_ref(), id.as_str())
1911                 .map_err(DeviceManagerError::RestoreGetState)?,
1912         )));
1913 
1914         self.bus_devices
1915             .push(Arc::clone(&serial) as Arc<dyn BusDeviceSync>);
1916 
1917         self.address_manager
1918             .allocator
1919             .lock()
1920             .unwrap()
1921             .allocate_io_addresses(Some(GuestAddress(0x3f8)), 0x8, None)
1922             .ok_or(DeviceManagerError::AllocateIoPort)?;
1923 
1924         self.address_manager
1925             .io_bus
1926             .insert(serial.clone(), 0x3f8, 0x8)
1927             .map_err(DeviceManagerError::BusError)?;
1928 
1929         // Fill the device tree with a new node. In case of restore, we
1930         // know there is nothing to do, so we can simply override the
1931         // existing entry.
1932         self.device_tree
1933             .lock()
1934             .unwrap()
1935             .insert(id.clone(), device_node!(id, serial));
1936 
1937         Ok(serial)
1938     }
1939 
1940     #[cfg(target_arch = "aarch64")]
1941     fn add_serial_device(
1942         &mut self,
1943         interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>,
1944         serial_writer: Option<Box<dyn io::Write + Send>>,
1945     ) -> DeviceManagerResult<Arc<Mutex<Pl011>>> {
1946         let id = String::from(SERIAL_DEVICE_NAME);
1947 
1948         let serial_irq = self
1949             .address_manager
1950             .allocator
1951             .lock()
1952             .unwrap()
1953             .allocate_irq()
1954             .unwrap();
1955 
1956         let interrupt_group = interrupt_manager
1957             .create_group(LegacyIrqGroupConfig {
1958                 irq: serial_irq as InterruptIndex,
1959             })
1960             .map_err(DeviceManagerError::CreateInterruptGroup)?;
1961 
1962         let serial = Arc::new(Mutex::new(devices::legacy::Pl011::new(
1963             id.clone(),
1964             interrupt_group,
1965             serial_writer,
1966             self.timestamp,
1967             state_from_id(self.snapshot.as_ref(), id.as_str())
1968                 .map_err(DeviceManagerError::RestoreGetState)?,
1969         )));
1970 
1971         self.bus_devices
1972             .push(Arc::clone(&serial) as Arc<dyn BusDeviceSync>);
1973 
1974         let addr = arch::layout::LEGACY_SERIAL_MAPPED_IO_START;
1975 
1976         self.address_manager
1977             .mmio_bus
1978             .insert(serial.clone(), addr.0, MMIO_LEN)
1979             .map_err(DeviceManagerError::BusError)?;
1980 
1981         self.id_to_dev_info.insert(
1982             (DeviceType::Serial, DeviceType::Serial.to_string()),
1983             MmioDeviceInfo {
1984                 addr: addr.0,
1985                 len: MMIO_LEN,
1986                 irq: serial_irq,
1987             },
1988         );
1989 
1990         self.cmdline_additions
1991             .push(format!("earlycon=pl011,mmio,0x{:08x}", addr.0));
1992 
1993         // Fill the device tree with a new node. In case of restore, we
1994         // know there is nothing to do, so we can simply override the
1995         // existing entry.
1996         self.device_tree
1997             .lock()
1998             .unwrap()
1999             .insert(id.clone(), device_node!(id, serial));
2000 
2001         Ok(serial)
2002     }
2003 
2004     fn add_virtio_console_device(
2005         &mut self,
2006         virtio_devices: &mut Vec<MetaVirtioDevice>,
2007         console_fd: ConsoleOutput,
2008         resize_pipe: Option<Arc<File>>,
2009     ) -> DeviceManagerResult<Option<Arc<virtio_devices::ConsoleResizer>>> {
2010         let console_config = self.config.lock().unwrap().console.clone();
2011         let endpoint = match console_fd {
2012             ConsoleOutput::File(file) => Endpoint::File(file),
2013             ConsoleOutput::Pty(file) => {
2014                 self.console_resize_pipe = resize_pipe;
2015                 Endpoint::PtyPair(Arc::new(file.try_clone().unwrap()), file)
2016             }
2017             ConsoleOutput::Tty(stdout) => {
2018                 if stdout.is_terminal() {
2019                     self.console_resize_pipe = resize_pipe;
2020                 }
2021 
2022                 // If an interactive TTY then we can accept input
2023                 // SAFETY: FFI call. Trivially safe.
2024                 if unsafe { libc::isatty(libc::STDIN_FILENO) == 1 } {
2025                     // SAFETY: FFI call to dup. Trivially safe.
2026                     let stdin = unsafe { libc::dup(libc::STDIN_FILENO) };
2027                     if stdin == -1 {
2028                         return vmm_sys_util::errno::errno_result()
2029                             .map_err(DeviceManagerError::DupFd);
2030                     }
2031                     // SAFETY: stdin is valid and owned solely by us.
2032                     let stdin = unsafe { File::from_raw_fd(stdin) };
2033                     Endpoint::FilePair(stdout, Arc::new(stdin))
2034                 } else {
2035                     Endpoint::File(stdout)
2036                 }
2037             }
2038             ConsoleOutput::Socket(_) => {
2039                 return Err(DeviceManagerError::NoSocketOptionSupportForConsoleDevice);
2040             }
2041             ConsoleOutput::Null => Endpoint::Null,
2042             ConsoleOutput::Off => return Ok(None),
2043         };
2044         let id = String::from(CONSOLE_DEVICE_NAME);
2045 
2046         let (virtio_console_device, console_resizer) = virtio_devices::Console::new(
2047             id.clone(),
2048             endpoint,
2049             self.console_resize_pipe
2050                 .as_ref()
2051                 .map(|p| p.try_clone().unwrap()),
2052             self.force_iommu | console_config.iommu,
2053             self.seccomp_action.clone(),
2054             self.exit_evt
2055                 .try_clone()
2056                 .map_err(DeviceManagerError::EventFd)?,
2057             state_from_id(self.snapshot.as_ref(), id.as_str())
2058                 .map_err(DeviceManagerError::RestoreGetState)?,
2059         )
2060         .map_err(DeviceManagerError::CreateVirtioConsole)?;
2061         let virtio_console_device = Arc::new(Mutex::new(virtio_console_device));
2062         virtio_devices.push(MetaVirtioDevice {
2063             virtio_device: Arc::clone(&virtio_console_device)
2064                 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2065             iommu: console_config.iommu,
2066             id: id.clone(),
2067             pci_segment: 0,
2068             dma_handler: None,
2069         });
2070 
2071         // Fill the device tree with a new node. In case of restore, we
2072         // know there is nothing to do, so we can simply override the
2073         // existing entry.
2074         self.device_tree
2075             .lock()
2076             .unwrap()
2077             .insert(id.clone(), device_node!(id, virtio_console_device));
2078 
2079         // Only provide a resizer (for SIGWINCH handling) if the console is attached to the TTY
2080         Ok(if matches!(console_config.mode, ConsoleOutputMode::Tty) {
2081             Some(console_resizer)
2082         } else {
2083             None
2084         })
2085     }
2086 
2087     /// Adds all devices that behave like a console with respect to the VM
2088     /// configuration. This includes:
2089     /// - debug-console
2090     /// - serial-console
2091     /// - virtio-console
2092     fn add_console_devices(
2093         &mut self,
2094         interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>,
2095         virtio_devices: &mut Vec<MetaVirtioDevice>,
2096         console_info: Option<ConsoleInfo>,
2097         console_resize_pipe: Option<Arc<File>>,
2098     ) -> DeviceManagerResult<Arc<Console>> {
2099         let serial_config = self.config.lock().unwrap().serial.clone();
2100         if console_info.is_none() {
2101             return Err(DeviceManagerError::InvalidConsoleInfo);
2102         }
2103 
2104         // SAFETY: console_info is Some, so it's safe to unwrap.
2105         let console_info = console_info.unwrap();
2106 
2107         let serial_writer: Option<Box<dyn io::Write + Send>> = match console_info.serial_main_fd {
2108             ConsoleOutput::File(ref file) | ConsoleOutput::Tty(ref file) => {
2109                 Some(Box::new(Arc::clone(file)))
2110             }
2111             ConsoleOutput::Off
2112             | ConsoleOutput::Null
2113             | ConsoleOutput::Pty(_)
2114             | ConsoleOutput::Socket(_) => None,
2115         };
2116 
2117         if !matches!(console_info.serial_main_fd, ConsoleOutput::Off) {
2118             let serial = self.add_serial_device(interrupt_manager, serial_writer)?;
2119             self.serial_manager = match console_info.serial_main_fd {
2120                 ConsoleOutput::Pty(_) | ConsoleOutput::Tty(_) | ConsoleOutput::Socket(_) => {
2121                     let serial_manager = SerialManager::new(
2122                         serial,
2123                         console_info.serial_main_fd,
2124                         serial_config.socket,
2125                     )
2126                     .map_err(DeviceManagerError::CreateSerialManager)?;
2127                     if let Some(mut serial_manager) = serial_manager {
2128                         serial_manager
2129                             .start_thread(
2130                                 self.exit_evt
2131                                     .try_clone()
2132                                     .map_err(DeviceManagerError::EventFd)?,
2133                             )
2134                             .map_err(DeviceManagerError::SpawnSerialManager)?;
2135                         Some(Arc::new(serial_manager))
2136                     } else {
2137                         None
2138                     }
2139                 }
2140                 _ => None,
2141             };
2142         }
2143 
2144         #[cfg(target_arch = "x86_64")]
2145         {
2146             let debug_console_writer: Option<Box<dyn io::Write + Send>> =
2147                 match console_info.debug_main_fd {
2148                     ConsoleOutput::File(file) | ConsoleOutput::Tty(file) => Some(Box::new(file)),
2149                     ConsoleOutput::Off
2150                     | ConsoleOutput::Null
2151                     | ConsoleOutput::Pty(_)
2152                     | ConsoleOutput::Socket(_) => None,
2153                 };
2154             if let Some(writer) = debug_console_writer {
2155                 let _ = self.add_debug_console_device(writer)?;
2156             }
2157         }
2158 
2159         let console_resizer = self.add_virtio_console_device(
2160             virtio_devices,
2161             console_info.console_main_fd,
2162             console_resize_pipe,
2163         )?;
2164 
2165         Ok(Arc::new(Console { console_resizer }))
2166     }
2167 
2168     fn add_tpm_device(
2169         &mut self,
2170         tpm_path: PathBuf,
2171     ) -> DeviceManagerResult<Arc<Mutex<devices::tpm::Tpm>>> {
2172         // Create TPM Device
2173         let tpm = devices::tpm::Tpm::new(tpm_path.to_str().unwrap().to_string()).map_err(|e| {
2174             DeviceManagerError::CreateTpmDevice(anyhow!("Failed to create TPM Device : {:?}", e))
2175         })?;
2176         let tpm = Arc::new(Mutex::new(tpm));
2177 
2178         // Add TPM Device to mmio
2179         self.address_manager
2180             .mmio_bus
2181             .insert(
2182                 tpm.clone(),
2183                 arch::layout::TPM_START.0,
2184                 arch::layout::TPM_SIZE,
2185             )
2186             .map_err(DeviceManagerError::BusError)?;
2187 
2188         Ok(tpm)
2189     }
2190 
2191     fn make_virtio_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
2192         let mut devices: Vec<MetaVirtioDevice> = Vec::new();
2193 
2194         // Create "standard" virtio devices (net/block/rng)
2195         devices.append(&mut self.make_virtio_block_devices()?);
2196         devices.append(&mut self.make_virtio_net_devices()?);
2197         devices.append(&mut self.make_virtio_rng_devices()?);
2198 
2199         // Add virtio-fs if required
2200         devices.append(&mut self.make_virtio_fs_devices()?);
2201 
2202         // Add virtio-pmem if required
2203         devices.append(&mut self.make_virtio_pmem_devices()?);
2204 
2205         // Add virtio-vsock if required
2206         devices.append(&mut self.make_virtio_vsock_devices()?);
2207 
2208         devices.append(&mut self.make_virtio_mem_devices()?);
2209 
2210         // Add virtio-balloon if required
2211         devices.append(&mut self.make_virtio_balloon_devices()?);
2212 
2213         // Add virtio-watchdog device
2214         devices.append(&mut self.make_virtio_watchdog_devices()?);
2215 
2216         // Add vDPA devices if required
2217         devices.append(&mut self.make_vdpa_devices()?);
2218 
2219         Ok(devices)
2220     }
2221 
2222     // Cache whether aio is supported to avoid checking for very block device
2223     fn aio_is_supported(&mut self) -> bool {
2224         if let Some(supported) = self.aio_supported {
2225             return supported;
2226         }
2227 
2228         let supported = block_aio_is_supported();
2229         self.aio_supported = Some(supported);
2230         supported
2231     }
2232 
2233     // Cache whether io_uring is supported to avoid probing for very block device
2234     fn io_uring_is_supported(&mut self) -> bool {
2235         if let Some(supported) = self.io_uring_supported {
2236             return supported;
2237         }
2238 
2239         let supported = block_io_uring_is_supported();
2240         self.io_uring_supported = Some(supported);
2241         supported
2242     }
2243 
2244     fn make_virtio_block_device(
2245         &mut self,
2246         disk_cfg: &mut DiskConfig,
2247     ) -> DeviceManagerResult<MetaVirtioDevice> {
2248         let id = if let Some(id) = &disk_cfg.id {
2249             id.clone()
2250         } else {
2251             let id = self.next_device_name(DISK_DEVICE_NAME_PREFIX)?;
2252             disk_cfg.id = Some(id.clone());
2253             id
2254         };
2255 
2256         info!("Creating virtio-block device: {:?}", disk_cfg);
2257 
2258         let (virtio_device, migratable_device) = if disk_cfg.vhost_user {
2259             let socket = disk_cfg.vhost_socket.as_ref().unwrap().clone();
2260             let vu_cfg = VhostUserConfig {
2261                 socket,
2262                 num_queues: disk_cfg.num_queues,
2263                 queue_size: disk_cfg.queue_size,
2264             };
2265             let vhost_user_block = Arc::new(Mutex::new(
2266                 match virtio_devices::vhost_user::Blk::new(
2267                     id.clone(),
2268                     vu_cfg,
2269                     self.seccomp_action.clone(),
2270                     self.exit_evt
2271                         .try_clone()
2272                         .map_err(DeviceManagerError::EventFd)?,
2273                     self.force_iommu,
2274                     state_from_id(self.snapshot.as_ref(), id.as_str())
2275                         .map_err(DeviceManagerError::RestoreGetState)?,
2276                 ) {
2277                     Ok(vub_device) => vub_device,
2278                     Err(e) => {
2279                         return Err(DeviceManagerError::CreateVhostUserBlk(e));
2280                     }
2281                 },
2282             ));
2283 
2284             (
2285                 Arc::clone(&vhost_user_block) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2286                 vhost_user_block as Arc<Mutex<dyn Migratable>>,
2287             )
2288         } else {
2289             let mut options = OpenOptions::new();
2290             options.read(true);
2291             options.write(!disk_cfg.readonly);
2292             if disk_cfg.direct {
2293                 options.custom_flags(libc::O_DIRECT);
2294             }
2295             // Open block device path
2296             let mut file: File = options
2297                 .open(
2298                     disk_cfg
2299                         .path
2300                         .as_ref()
2301                         .ok_or(DeviceManagerError::NoDiskPath)?
2302                         .clone(),
2303                 )
2304                 .map_err(DeviceManagerError::Disk)?;
2305             let image_type =
2306                 detect_image_type(&mut file).map_err(DeviceManagerError::DetectImageType)?;
2307 
2308             let image = match image_type {
2309                 ImageType::FixedVhd => {
2310                     // Use asynchronous backend relying on io_uring if the
2311                     // syscalls are supported.
2312                     if cfg!(feature = "io_uring")
2313                         && !disk_cfg.disable_io_uring
2314                         && self.io_uring_is_supported()
2315                     {
2316                         info!("Using asynchronous fixed VHD disk file (io_uring)");
2317 
2318                         #[cfg(not(feature = "io_uring"))]
2319                         unreachable!("Checked in if statement above");
2320                         #[cfg(feature = "io_uring")]
2321                         {
2322                             Box::new(
2323                                 FixedVhdDiskAsync::new(file)
2324                                     .map_err(DeviceManagerError::CreateFixedVhdDiskAsync)?,
2325                             ) as Box<dyn DiskFile>
2326                         }
2327                     } else {
2328                         info!("Using synchronous fixed VHD disk file");
2329                         Box::new(
2330                             FixedVhdDiskSync::new(file)
2331                                 .map_err(DeviceManagerError::CreateFixedVhdDiskSync)?,
2332                         ) as Box<dyn DiskFile>
2333                     }
2334                 }
2335                 ImageType::Raw => {
2336                     // Use asynchronous backend relying on io_uring if the
2337                     // syscalls are supported.
2338                     if cfg!(feature = "io_uring")
2339                         && !disk_cfg.disable_io_uring
2340                         && self.io_uring_is_supported()
2341                     {
2342                         info!("Using asynchronous RAW disk file (io_uring)");
2343 
2344                         #[cfg(not(feature = "io_uring"))]
2345                         unreachable!("Checked in if statement above");
2346                         #[cfg(feature = "io_uring")]
2347                         {
2348                             Box::new(RawFileDisk::new(file)) as Box<dyn DiskFile>
2349                         }
2350                     } else if !disk_cfg.disable_aio && self.aio_is_supported() {
2351                         info!("Using asynchronous RAW disk file (aio)");
2352                         Box::new(RawFileDiskAio::new(file)) as Box<dyn DiskFile>
2353                     } else {
2354                         info!("Using synchronous RAW disk file");
2355                         Box::new(RawFileDiskSync::new(file)) as Box<dyn DiskFile>
2356                     }
2357                 }
2358                 ImageType::Qcow2 => {
2359                     info!("Using synchronous QCOW disk file");
2360                     Box::new(
2361                         QcowDiskSync::new(file, disk_cfg.direct)
2362                             .map_err(DeviceManagerError::CreateQcowDiskSync)?,
2363                     ) as Box<dyn DiskFile>
2364                 }
2365                 ImageType::Vhdx => {
2366                     info!("Using synchronous VHDX disk file");
2367                     Box::new(
2368                         VhdxDiskSync::new(file)
2369                             .map_err(DeviceManagerError::CreateFixedVhdxDiskSync)?,
2370                     ) as Box<dyn DiskFile>
2371                 }
2372             };
2373 
2374             let rate_limit_group =
2375                 if let Some(rate_limiter_cfg) = disk_cfg.rate_limiter_config.as_ref() {
2376                     // Create an anonymous RateLimiterGroup that is dropped when the Disk
2377                     // is dropped.
2378                     let bw = rate_limiter_cfg.bandwidth.unwrap_or_default();
2379                     let ops = rate_limiter_cfg.ops.unwrap_or_default();
2380                     let mut rate_limit_group = RateLimiterGroup::new(
2381                         disk_cfg.id.as_ref().unwrap(),
2382                         bw.size,
2383                         bw.one_time_burst.unwrap_or(0),
2384                         bw.refill_time,
2385                         ops.size,
2386                         ops.one_time_burst.unwrap_or(0),
2387                         ops.refill_time,
2388                     )
2389                     .map_err(DeviceManagerError::RateLimiterGroupCreate)?;
2390 
2391                     rate_limit_group
2392                         .start_thread(
2393                             self.exit_evt
2394                                 .try_clone()
2395                                 .map_err(DeviceManagerError::EventFd)?,
2396                         )
2397                         .unwrap();
2398 
2399                     Some(Arc::new(rate_limit_group))
2400                 } else if let Some(rate_limit_group) = disk_cfg.rate_limit_group.as_ref() {
2401                     self.rate_limit_groups.get(rate_limit_group).cloned()
2402                 } else {
2403                     None
2404                 };
2405 
2406             let queue_affinity = if let Some(queue_affinity) = disk_cfg.queue_affinity.as_ref() {
2407                 queue_affinity
2408                     .iter()
2409                     .map(|a| (a.queue_index, a.host_cpus.clone()))
2410                     .collect()
2411             } else {
2412                 BTreeMap::new()
2413             };
2414 
2415             let virtio_block = Arc::new(Mutex::new(
2416                 virtio_devices::Block::new(
2417                     id.clone(),
2418                     image,
2419                     disk_cfg
2420                         .path
2421                         .as_ref()
2422                         .ok_or(DeviceManagerError::NoDiskPath)?
2423                         .clone(),
2424                     disk_cfg.readonly,
2425                     self.force_iommu | disk_cfg.iommu,
2426                     disk_cfg.num_queues,
2427                     disk_cfg.queue_size,
2428                     disk_cfg.serial.clone(),
2429                     self.seccomp_action.clone(),
2430                     rate_limit_group,
2431                     self.exit_evt
2432                         .try_clone()
2433                         .map_err(DeviceManagerError::EventFd)?,
2434                     state_from_id(self.snapshot.as_ref(), id.as_str())
2435                         .map_err(DeviceManagerError::RestoreGetState)?,
2436                     queue_affinity,
2437                 )
2438                 .map_err(DeviceManagerError::CreateVirtioBlock)?,
2439             ));
2440 
2441             (
2442                 Arc::clone(&virtio_block) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2443                 virtio_block as Arc<Mutex<dyn Migratable>>,
2444             )
2445         };
2446 
2447         // Fill the device tree with a new node. In case of restore, we
2448         // know there is nothing to do, so we can simply override the
2449         // existing entry.
2450         self.device_tree
2451             .lock()
2452             .unwrap()
2453             .insert(id.clone(), device_node!(id, migratable_device));
2454 
2455         Ok(MetaVirtioDevice {
2456             virtio_device,
2457             iommu: disk_cfg.iommu,
2458             id,
2459             pci_segment: disk_cfg.pci_segment,
2460             dma_handler: None,
2461         })
2462     }
2463 
2464     fn make_virtio_block_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
2465         let mut devices = Vec::new();
2466 
2467         let mut block_devices = self.config.lock().unwrap().disks.clone();
2468         if let Some(disk_list_cfg) = &mut block_devices {
2469             for disk_cfg in disk_list_cfg.iter_mut() {
2470                 devices.push(self.make_virtio_block_device(disk_cfg)?);
2471             }
2472         }
2473         self.config.lock().unwrap().disks = block_devices;
2474 
2475         Ok(devices)
2476     }
2477 
2478     fn make_virtio_net_device(
2479         &mut self,
2480         net_cfg: &mut NetConfig,
2481     ) -> DeviceManagerResult<MetaVirtioDevice> {
2482         let id = if let Some(id) = &net_cfg.id {
2483             id.clone()
2484         } else {
2485             let id = self.next_device_name(NET_DEVICE_NAME_PREFIX)?;
2486             net_cfg.id = Some(id.clone());
2487             id
2488         };
2489         info!("Creating virtio-net device: {:?}", net_cfg);
2490 
2491         let (virtio_device, migratable_device) = if net_cfg.vhost_user {
2492             let socket = net_cfg.vhost_socket.as_ref().unwrap().clone();
2493             let vu_cfg = VhostUserConfig {
2494                 socket,
2495                 num_queues: net_cfg.num_queues,
2496                 queue_size: net_cfg.queue_size,
2497             };
2498             let server = match net_cfg.vhost_mode {
2499                 VhostMode::Client => false,
2500                 VhostMode::Server => true,
2501             };
2502             let vhost_user_net = Arc::new(Mutex::new(
2503                 match virtio_devices::vhost_user::Net::new(
2504                     id.clone(),
2505                     net_cfg.mac,
2506                     net_cfg.mtu,
2507                     vu_cfg,
2508                     server,
2509                     self.seccomp_action.clone(),
2510                     self.exit_evt
2511                         .try_clone()
2512                         .map_err(DeviceManagerError::EventFd)?,
2513                     self.force_iommu,
2514                     state_from_id(self.snapshot.as_ref(), id.as_str())
2515                         .map_err(DeviceManagerError::RestoreGetState)?,
2516                     net_cfg.offload_tso,
2517                     net_cfg.offload_ufo,
2518                     net_cfg.offload_csum,
2519                 ) {
2520                     Ok(vun_device) => vun_device,
2521                     Err(e) => {
2522                         return Err(DeviceManagerError::CreateVhostUserNet(e));
2523                     }
2524                 },
2525             ));
2526 
2527             (
2528                 Arc::clone(&vhost_user_net) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2529                 vhost_user_net as Arc<Mutex<dyn Migratable>>,
2530             )
2531         } else {
2532             let state = state_from_id(self.snapshot.as_ref(), id.as_str())
2533                 .map_err(DeviceManagerError::RestoreGetState)?;
2534             let virtio_net = if let Some(ref tap_if_name) = net_cfg.tap {
2535                 Arc::new(Mutex::new(
2536                     virtio_devices::Net::new(
2537                         id.clone(),
2538                         Some(tap_if_name),
2539                         Some(net_cfg.ip),
2540                         Some(net_cfg.mask),
2541                         Some(net_cfg.mac),
2542                         &mut net_cfg.host_mac,
2543                         net_cfg.mtu,
2544                         self.force_iommu | net_cfg.iommu,
2545                         net_cfg.num_queues,
2546                         net_cfg.queue_size,
2547                         self.seccomp_action.clone(),
2548                         net_cfg.rate_limiter_config,
2549                         self.exit_evt
2550                             .try_clone()
2551                             .map_err(DeviceManagerError::EventFd)?,
2552                         state,
2553                         net_cfg.offload_tso,
2554                         net_cfg.offload_ufo,
2555                         net_cfg.offload_csum,
2556                     )
2557                     .map_err(DeviceManagerError::CreateVirtioNet)?,
2558                 ))
2559             } else if let Some(fds) = &net_cfg.fds {
2560                 let net = virtio_devices::Net::from_tap_fds(
2561                     id.clone(),
2562                     fds,
2563                     Some(net_cfg.mac),
2564                     net_cfg.mtu,
2565                     self.force_iommu | net_cfg.iommu,
2566                     net_cfg.queue_size,
2567                     self.seccomp_action.clone(),
2568                     net_cfg.rate_limiter_config,
2569                     self.exit_evt
2570                         .try_clone()
2571                         .map_err(DeviceManagerError::EventFd)?,
2572                     state,
2573                     net_cfg.offload_tso,
2574                     net_cfg.offload_ufo,
2575                     net_cfg.offload_csum,
2576                 )
2577                 .map_err(DeviceManagerError::CreateVirtioNet)?;
2578 
2579                 // SAFETY: 'fds' are valid because TAP devices are created successfully
2580                 unsafe {
2581                     self.config.lock().unwrap().add_preserved_fds(fds.clone());
2582                 }
2583 
2584                 Arc::new(Mutex::new(net))
2585             } else {
2586                 Arc::new(Mutex::new(
2587                     virtio_devices::Net::new(
2588                         id.clone(),
2589                         None,
2590                         Some(net_cfg.ip),
2591                         Some(net_cfg.mask),
2592                         Some(net_cfg.mac),
2593                         &mut net_cfg.host_mac,
2594                         net_cfg.mtu,
2595                         self.force_iommu | net_cfg.iommu,
2596                         net_cfg.num_queues,
2597                         net_cfg.queue_size,
2598                         self.seccomp_action.clone(),
2599                         net_cfg.rate_limiter_config,
2600                         self.exit_evt
2601                             .try_clone()
2602                             .map_err(DeviceManagerError::EventFd)?,
2603                         state,
2604                         net_cfg.offload_tso,
2605                         net_cfg.offload_ufo,
2606                         net_cfg.offload_csum,
2607                     )
2608                     .map_err(DeviceManagerError::CreateVirtioNet)?,
2609                 ))
2610             };
2611 
2612             (
2613                 Arc::clone(&virtio_net) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2614                 virtio_net as Arc<Mutex<dyn Migratable>>,
2615             )
2616         };
2617 
2618         // Fill the device tree with a new node. In case of restore, we
2619         // know there is nothing to do, so we can simply override the
2620         // existing entry.
2621         self.device_tree
2622             .lock()
2623             .unwrap()
2624             .insert(id.clone(), device_node!(id, migratable_device));
2625 
2626         Ok(MetaVirtioDevice {
2627             virtio_device,
2628             iommu: net_cfg.iommu,
2629             id,
2630             pci_segment: net_cfg.pci_segment,
2631             dma_handler: None,
2632         })
2633     }
2634 
2635     /// Add virto-net and vhost-user-net devices
2636     fn make_virtio_net_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
2637         let mut devices = Vec::new();
2638         let mut net_devices = self.config.lock().unwrap().net.clone();
2639         if let Some(net_list_cfg) = &mut net_devices {
2640             for net_cfg in net_list_cfg.iter_mut() {
2641                 devices.push(self.make_virtio_net_device(net_cfg)?);
2642             }
2643         }
2644         self.config.lock().unwrap().net = net_devices;
2645 
2646         Ok(devices)
2647     }
2648 
2649     fn make_virtio_rng_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
2650         let mut devices = Vec::new();
2651 
2652         // Add virtio-rng if required
2653         let rng_config = self.config.lock().unwrap().rng.clone();
2654         if let Some(rng_path) = rng_config.src.to_str() {
2655             info!("Creating virtio-rng device: {:?}", rng_config);
2656             let id = String::from(RNG_DEVICE_NAME);
2657 
2658             let virtio_rng_device = Arc::new(Mutex::new(
2659                 virtio_devices::Rng::new(
2660                     id.clone(),
2661                     rng_path,
2662                     self.force_iommu | rng_config.iommu,
2663                     self.seccomp_action.clone(),
2664                     self.exit_evt
2665                         .try_clone()
2666                         .map_err(DeviceManagerError::EventFd)?,
2667                     state_from_id(self.snapshot.as_ref(), id.as_str())
2668                         .map_err(DeviceManagerError::RestoreGetState)?,
2669                 )
2670                 .map_err(DeviceManagerError::CreateVirtioRng)?,
2671             ));
2672             devices.push(MetaVirtioDevice {
2673                 virtio_device: Arc::clone(&virtio_rng_device)
2674                     as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2675                 iommu: rng_config.iommu,
2676                 id: id.clone(),
2677                 pci_segment: 0,
2678                 dma_handler: None,
2679             });
2680 
2681             // Fill the device tree with a new node. In case of restore, we
2682             // know there is nothing to do, so we can simply override the
2683             // existing entry.
2684             self.device_tree
2685                 .lock()
2686                 .unwrap()
2687                 .insert(id.clone(), device_node!(id, virtio_rng_device));
2688         }
2689 
2690         Ok(devices)
2691     }
2692 
2693     fn make_virtio_fs_device(
2694         &mut self,
2695         fs_cfg: &mut FsConfig,
2696     ) -> DeviceManagerResult<MetaVirtioDevice> {
2697         let id = if let Some(id) = &fs_cfg.id {
2698             id.clone()
2699         } else {
2700             let id = self.next_device_name(FS_DEVICE_NAME_PREFIX)?;
2701             fs_cfg.id = Some(id.clone());
2702             id
2703         };
2704 
2705         info!("Creating virtio-fs device: {:?}", fs_cfg);
2706 
2707         let mut node = device_node!(id);
2708 
2709         if let Some(fs_socket) = fs_cfg.socket.to_str() {
2710             let virtio_fs_device = Arc::new(Mutex::new(
2711                 virtio_devices::vhost_user::Fs::new(
2712                     id.clone(),
2713                     fs_socket,
2714                     &fs_cfg.tag,
2715                     fs_cfg.num_queues,
2716                     fs_cfg.queue_size,
2717                     None,
2718                     self.seccomp_action.clone(),
2719                     self.exit_evt
2720                         .try_clone()
2721                         .map_err(DeviceManagerError::EventFd)?,
2722                     self.force_iommu,
2723                     state_from_id(self.snapshot.as_ref(), id.as_str())
2724                         .map_err(DeviceManagerError::RestoreGetState)?,
2725                 )
2726                 .map_err(DeviceManagerError::CreateVirtioFs)?,
2727             ));
2728 
2729             // Update the device tree with the migratable device.
2730             node.migratable = Some(Arc::clone(&virtio_fs_device) as Arc<Mutex<dyn Migratable>>);
2731             self.device_tree.lock().unwrap().insert(id.clone(), node);
2732 
2733             Ok(MetaVirtioDevice {
2734                 virtio_device: Arc::clone(&virtio_fs_device)
2735                     as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2736                 iommu: false,
2737                 id,
2738                 pci_segment: fs_cfg.pci_segment,
2739                 dma_handler: None,
2740             })
2741         } else {
2742             Err(DeviceManagerError::NoVirtioFsSock)
2743         }
2744     }
2745 
2746     fn make_virtio_fs_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
2747         let mut devices = Vec::new();
2748 
2749         let mut fs_devices = self.config.lock().unwrap().fs.clone();
2750         if let Some(fs_list_cfg) = &mut fs_devices {
2751             for fs_cfg in fs_list_cfg.iter_mut() {
2752                 devices.push(self.make_virtio_fs_device(fs_cfg)?);
2753             }
2754         }
2755         self.config.lock().unwrap().fs = fs_devices;
2756 
2757         Ok(devices)
2758     }
2759 
2760     fn make_virtio_pmem_device(
2761         &mut self,
2762         pmem_cfg: &mut PmemConfig,
2763     ) -> DeviceManagerResult<MetaVirtioDevice> {
2764         let id = if let Some(id) = &pmem_cfg.id {
2765             id.clone()
2766         } else {
2767             let id = self.next_device_name(PMEM_DEVICE_NAME_PREFIX)?;
2768             pmem_cfg.id = Some(id.clone());
2769             id
2770         };
2771 
2772         info!("Creating virtio-pmem device: {:?}", pmem_cfg);
2773 
2774         let mut node = device_node!(id);
2775 
2776         // Look for the id in the device tree. If it can be found, that means
2777         // the device is being restored, otherwise it's created from scratch.
2778         let region_range = if let Some(node) = self.device_tree.lock().unwrap().get(&id) {
2779             info!("Restoring virtio-pmem {} resources", id);
2780 
2781             let mut region_range: Option<(u64, u64)> = None;
2782             for resource in node.resources.iter() {
2783                 match resource {
2784                     Resource::MmioAddressRange { base, size } => {
2785                         if region_range.is_some() {
2786                             return Err(DeviceManagerError::ResourceAlreadyExists);
2787                         }
2788 
2789                         region_range = Some((*base, *size));
2790                     }
2791                     _ => {
2792                         error!("Unexpected resource {:?} for {}", resource, id);
2793                     }
2794                 }
2795             }
2796 
2797             if region_range.is_none() {
2798                 return Err(DeviceManagerError::MissingVirtioPmemResources);
2799             }
2800 
2801             region_range
2802         } else {
2803             None
2804         };
2805 
2806         let (custom_flags, set_len) = if pmem_cfg.file.is_dir() {
2807             if pmem_cfg.size.is_none() {
2808                 return Err(DeviceManagerError::PmemWithDirectorySizeMissing);
2809             }
2810             (O_TMPFILE, true)
2811         } else {
2812             (0, false)
2813         };
2814 
2815         let mut file = OpenOptions::new()
2816             .read(true)
2817             .write(!pmem_cfg.discard_writes)
2818             .custom_flags(custom_flags)
2819             .open(&pmem_cfg.file)
2820             .map_err(DeviceManagerError::PmemFileOpen)?;
2821 
2822         let size = if let Some(size) = pmem_cfg.size {
2823             if set_len {
2824                 file.set_len(size)
2825                     .map_err(DeviceManagerError::PmemFileSetLen)?;
2826             }
2827             size
2828         } else {
2829             file.seek(SeekFrom::End(0))
2830                 .map_err(DeviceManagerError::PmemFileSetLen)?
2831         };
2832 
2833         if size % 0x20_0000 != 0 {
2834             return Err(DeviceManagerError::PmemSizeNotAligned);
2835         }
2836 
2837         let (region_base, region_size) = if let Some((base, size)) = region_range {
2838             // The memory needs to be 2MiB aligned in order to support
2839             // hugepages.
2840             self.pci_segments[pmem_cfg.pci_segment as usize]
2841                 .mem64_allocator
2842                 .lock()
2843                 .unwrap()
2844                 .allocate(
2845                     Some(GuestAddress(base)),
2846                     size as GuestUsize,
2847                     Some(0x0020_0000),
2848                 )
2849                 .ok_or(DeviceManagerError::PmemRangeAllocation)?;
2850 
2851             (base, size)
2852         } else {
2853             // The memory needs to be 2MiB aligned in order to support
2854             // hugepages.
2855             let base = self.pci_segments[pmem_cfg.pci_segment as usize]
2856                 .mem64_allocator
2857                 .lock()
2858                 .unwrap()
2859                 .allocate(None, size as GuestUsize, Some(0x0020_0000))
2860                 .ok_or(DeviceManagerError::PmemRangeAllocation)?;
2861 
2862             (base.raw_value(), size)
2863         };
2864 
2865         let cloned_file = file.try_clone().map_err(DeviceManagerError::CloneFile)?;
2866         let mmap_region = MmapRegion::build(
2867             Some(FileOffset::new(cloned_file, 0)),
2868             region_size as usize,
2869             PROT_READ | PROT_WRITE,
2870             MAP_NORESERVE
2871                 | if pmem_cfg.discard_writes {
2872                     MAP_PRIVATE
2873                 } else {
2874                     MAP_SHARED
2875                 },
2876         )
2877         .map_err(DeviceManagerError::NewMmapRegion)?;
2878         let host_addr: u64 = mmap_region.as_ptr() as u64;
2879 
2880         let mem_slot = self
2881             .memory_manager
2882             .lock()
2883             .unwrap()
2884             .create_userspace_mapping(region_base, region_size, host_addr, false, false, false)
2885             .map_err(DeviceManagerError::MemoryManager)?;
2886 
2887         let mapping = virtio_devices::UserspaceMapping {
2888             host_addr,
2889             mem_slot,
2890             addr: GuestAddress(region_base),
2891             len: region_size,
2892             mergeable: false,
2893         };
2894 
2895         let virtio_pmem_device = Arc::new(Mutex::new(
2896             virtio_devices::Pmem::new(
2897                 id.clone(),
2898                 file,
2899                 GuestAddress(region_base),
2900                 mapping,
2901                 mmap_region,
2902                 self.force_iommu | pmem_cfg.iommu,
2903                 self.seccomp_action.clone(),
2904                 self.exit_evt
2905                     .try_clone()
2906                     .map_err(DeviceManagerError::EventFd)?,
2907                 state_from_id(self.snapshot.as_ref(), id.as_str())
2908                     .map_err(DeviceManagerError::RestoreGetState)?,
2909             )
2910             .map_err(DeviceManagerError::CreateVirtioPmem)?,
2911         ));
2912 
2913         // Update the device tree with correct resource information and with
2914         // the migratable device.
2915         node.resources.push(Resource::MmioAddressRange {
2916             base: region_base,
2917             size: region_size,
2918         });
2919         node.migratable = Some(Arc::clone(&virtio_pmem_device) as Arc<Mutex<dyn Migratable>>);
2920         self.device_tree.lock().unwrap().insert(id.clone(), node);
2921 
2922         Ok(MetaVirtioDevice {
2923             virtio_device: Arc::clone(&virtio_pmem_device)
2924                 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2925             iommu: pmem_cfg.iommu,
2926             id,
2927             pci_segment: pmem_cfg.pci_segment,
2928             dma_handler: None,
2929         })
2930     }
2931 
2932     fn make_virtio_pmem_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
2933         let mut devices = Vec::new();
2934         // Add virtio-pmem if required
2935         let mut pmem_devices = self.config.lock().unwrap().pmem.clone();
2936         if let Some(pmem_list_cfg) = &mut pmem_devices {
2937             for pmem_cfg in pmem_list_cfg.iter_mut() {
2938                 devices.push(self.make_virtio_pmem_device(pmem_cfg)?);
2939             }
2940         }
2941         self.config.lock().unwrap().pmem = pmem_devices;
2942 
2943         Ok(devices)
2944     }
2945 
2946     fn make_virtio_vsock_device(
2947         &mut self,
2948         vsock_cfg: &mut VsockConfig,
2949     ) -> DeviceManagerResult<MetaVirtioDevice> {
2950         let id = if let Some(id) = &vsock_cfg.id {
2951             id.clone()
2952         } else {
2953             let id = self.next_device_name(VSOCK_DEVICE_NAME_PREFIX)?;
2954             vsock_cfg.id = Some(id.clone());
2955             id
2956         };
2957 
2958         info!("Creating virtio-vsock device: {:?}", vsock_cfg);
2959 
2960         let socket_path = vsock_cfg
2961             .socket
2962             .to_str()
2963             .ok_or(DeviceManagerError::CreateVsockConvertPath)?;
2964         let backend =
2965             virtio_devices::vsock::VsockUnixBackend::new(vsock_cfg.cid, socket_path.to_string())
2966                 .map_err(DeviceManagerError::CreateVsockBackend)?;
2967 
2968         let vsock_device = Arc::new(Mutex::new(
2969             virtio_devices::Vsock::new(
2970                 id.clone(),
2971                 vsock_cfg.cid,
2972                 vsock_cfg.socket.clone(),
2973                 backend,
2974                 self.force_iommu | vsock_cfg.iommu,
2975                 self.seccomp_action.clone(),
2976                 self.exit_evt
2977                     .try_clone()
2978                     .map_err(DeviceManagerError::EventFd)?,
2979                 state_from_id(self.snapshot.as_ref(), id.as_str())
2980                     .map_err(DeviceManagerError::RestoreGetState)?,
2981             )
2982             .map_err(DeviceManagerError::CreateVirtioVsock)?,
2983         ));
2984 
2985         // Fill the device tree with a new node. In case of restore, we
2986         // know there is nothing to do, so we can simply override the
2987         // existing entry.
2988         self.device_tree
2989             .lock()
2990             .unwrap()
2991             .insert(id.clone(), device_node!(id, vsock_device));
2992 
2993         Ok(MetaVirtioDevice {
2994             virtio_device: Arc::clone(&vsock_device)
2995                 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2996             iommu: vsock_cfg.iommu,
2997             id,
2998             pci_segment: vsock_cfg.pci_segment,
2999             dma_handler: None,
3000         })
3001     }
3002 
3003     fn make_virtio_vsock_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
3004         let mut devices = Vec::new();
3005 
3006         let mut vsock = self.config.lock().unwrap().vsock.clone();
3007         if let Some(ref mut vsock_cfg) = &mut vsock {
3008             devices.push(self.make_virtio_vsock_device(vsock_cfg)?);
3009         }
3010         self.config.lock().unwrap().vsock = vsock;
3011 
3012         Ok(devices)
3013     }
3014 
3015     fn make_virtio_mem_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
3016         let mut devices = Vec::new();
3017 
3018         let mm = self.memory_manager.clone();
3019         let mut mm = mm.lock().unwrap();
3020         for (memory_zone_id, memory_zone) in mm.memory_zones_mut().iter_mut() {
3021             if let Some(virtio_mem_zone) = memory_zone.virtio_mem_zone_mut() {
3022                 info!("Creating virtio-mem device: id = {}", memory_zone_id);
3023 
3024                 let node_id = numa_node_id_from_memory_zone_id(&self.numa_nodes, memory_zone_id)
3025                     .map(|i| i as u16);
3026 
3027                 let virtio_mem_device = Arc::new(Mutex::new(
3028                     virtio_devices::Mem::new(
3029                         memory_zone_id.clone(),
3030                         virtio_mem_zone.region(),
3031                         self.seccomp_action.clone(),
3032                         node_id,
3033                         virtio_mem_zone.hotplugged_size(),
3034                         virtio_mem_zone.hugepages(),
3035                         self.exit_evt
3036                             .try_clone()
3037                             .map_err(DeviceManagerError::EventFd)?,
3038                         virtio_mem_zone.blocks_state().clone(),
3039                         state_from_id(self.snapshot.as_ref(), memory_zone_id.as_str())
3040                             .map_err(DeviceManagerError::RestoreGetState)?,
3041                     )
3042                     .map_err(DeviceManagerError::CreateVirtioMem)?,
3043                 ));
3044 
3045                 // Update the virtio-mem zone so that it has a handle onto the
3046                 // virtio-mem device, which will be used for triggering a resize
3047                 // if needed.
3048                 virtio_mem_zone.set_virtio_device(Arc::clone(&virtio_mem_device));
3049 
3050                 self.virtio_mem_devices.push(Arc::clone(&virtio_mem_device));
3051 
3052                 devices.push(MetaVirtioDevice {
3053                     virtio_device: Arc::clone(&virtio_mem_device)
3054                         as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
3055                     iommu: false,
3056                     id: memory_zone_id.clone(),
3057                     pci_segment: 0,
3058                     dma_handler: None,
3059                 });
3060 
3061                 // Fill the device tree with a new node. In case of restore, we
3062                 // know there is nothing to do, so we can simply override the
3063                 // existing entry.
3064                 self.device_tree.lock().unwrap().insert(
3065                     memory_zone_id.clone(),
3066                     device_node!(memory_zone_id, virtio_mem_device),
3067                 );
3068             }
3069         }
3070 
3071         Ok(devices)
3072     }
3073 
3074     #[cfg(feature = "pvmemcontrol")]
3075     fn make_pvmemcontrol_device(
3076         &mut self,
3077     ) -> DeviceManagerResult<(
3078         Arc<PvmemcontrolBusDevice>,
3079         Arc<Mutex<PvmemcontrolPciDevice>>,
3080     )> {
3081         let id = String::from(PVMEMCONTROL_DEVICE_NAME);
3082         let pci_segment_id = 0x0_u16;
3083 
3084         let (pci_segment_id, pci_device_bdf, resources) =
3085             self.pci_resources(&id, pci_segment_id)?;
3086 
3087         info!("Creating pvmemcontrol device: id = {}", id);
3088         let (pvmemcontrol_pci_device, pvmemcontrol_bus_device) =
3089             devices::pvmemcontrol::PvmemcontrolDevice::make_device(
3090                 id.clone(),
3091                 self.memory_manager.lock().unwrap().guest_memory(),
3092             );
3093 
3094         let pvmemcontrol_pci_device = Arc::new(Mutex::new(pvmemcontrol_pci_device));
3095         let pvmemcontrol_bus_device = Arc::new(pvmemcontrol_bus_device);
3096 
3097         let new_resources = self.add_pci_device(
3098             pvmemcontrol_bus_device.clone(),
3099             pvmemcontrol_pci_device.clone(),
3100             pci_segment_id,
3101             pci_device_bdf,
3102             resources,
3103         )?;
3104 
3105         let mut node = device_node!(id, pvmemcontrol_pci_device);
3106 
3107         node.resources = new_resources;
3108         node.pci_bdf = Some(pci_device_bdf);
3109         node.pci_device_handle = None;
3110 
3111         self.device_tree.lock().unwrap().insert(id, node);
3112 
3113         Ok((pvmemcontrol_bus_device, pvmemcontrol_pci_device))
3114     }
3115 
3116     fn make_virtio_balloon_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
3117         let mut devices = Vec::new();
3118 
3119         if let Some(balloon_config) = &self.config.lock().unwrap().balloon {
3120             let id = String::from(BALLOON_DEVICE_NAME);
3121             info!("Creating virtio-balloon device: id = {}", id);
3122 
3123             let virtio_balloon_device = Arc::new(Mutex::new(
3124                 virtio_devices::Balloon::new(
3125                     id.clone(),
3126                     balloon_config.size,
3127                     balloon_config.deflate_on_oom,
3128                     balloon_config.free_page_reporting,
3129                     self.seccomp_action.clone(),
3130                     self.exit_evt
3131                         .try_clone()
3132                         .map_err(DeviceManagerError::EventFd)?,
3133                     state_from_id(self.snapshot.as_ref(), id.as_str())
3134                         .map_err(DeviceManagerError::RestoreGetState)?,
3135                 )
3136                 .map_err(DeviceManagerError::CreateVirtioBalloon)?,
3137             ));
3138 
3139             self.balloon = Some(virtio_balloon_device.clone());
3140 
3141             devices.push(MetaVirtioDevice {
3142                 virtio_device: Arc::clone(&virtio_balloon_device)
3143                     as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
3144                 iommu: false,
3145                 id: id.clone(),
3146                 pci_segment: 0,
3147                 dma_handler: None,
3148             });
3149 
3150             self.device_tree
3151                 .lock()
3152                 .unwrap()
3153                 .insert(id.clone(), device_node!(id, virtio_balloon_device));
3154         }
3155 
3156         Ok(devices)
3157     }
3158 
3159     fn make_virtio_watchdog_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
3160         let mut devices = Vec::new();
3161 
3162         if !self.config.lock().unwrap().watchdog {
3163             return Ok(devices);
3164         }
3165 
3166         let id = String::from(WATCHDOG_DEVICE_NAME);
3167         info!("Creating virtio-watchdog device: id = {}", id);
3168 
3169         let virtio_watchdog_device = Arc::new(Mutex::new(
3170             virtio_devices::Watchdog::new(
3171                 id.clone(),
3172                 self.reset_evt.try_clone().unwrap(),
3173                 self.seccomp_action.clone(),
3174                 self.exit_evt
3175                     .try_clone()
3176                     .map_err(DeviceManagerError::EventFd)?,
3177                 state_from_id(self.snapshot.as_ref(), id.as_str())
3178                     .map_err(DeviceManagerError::RestoreGetState)?,
3179             )
3180             .map_err(DeviceManagerError::CreateVirtioWatchdog)?,
3181         ));
3182         devices.push(MetaVirtioDevice {
3183             virtio_device: Arc::clone(&virtio_watchdog_device)
3184                 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
3185             iommu: false,
3186             id: id.clone(),
3187             pci_segment: 0,
3188             dma_handler: None,
3189         });
3190 
3191         self.device_tree
3192             .lock()
3193             .unwrap()
3194             .insert(id.clone(), device_node!(id, virtio_watchdog_device));
3195 
3196         Ok(devices)
3197     }
3198 
3199     fn make_vdpa_device(
3200         &mut self,
3201         vdpa_cfg: &mut VdpaConfig,
3202     ) -> DeviceManagerResult<MetaVirtioDevice> {
3203         let id = if let Some(id) = &vdpa_cfg.id {
3204             id.clone()
3205         } else {
3206             let id = self.next_device_name(VDPA_DEVICE_NAME_PREFIX)?;
3207             vdpa_cfg.id = Some(id.clone());
3208             id
3209         };
3210 
3211         info!("Creating vDPA device: {:?}", vdpa_cfg);
3212 
3213         let device_path = vdpa_cfg
3214             .path
3215             .to_str()
3216             .ok_or(DeviceManagerError::CreateVdpaConvertPath)?;
3217 
3218         let vdpa_device = Arc::new(Mutex::new(
3219             virtio_devices::Vdpa::new(
3220                 id.clone(),
3221                 device_path,
3222                 self.memory_manager.lock().unwrap().guest_memory(),
3223                 vdpa_cfg.num_queues as u16,
3224                 state_from_id(self.snapshot.as_ref(), id.as_str())
3225                     .map_err(DeviceManagerError::RestoreGetState)?,
3226             )
3227             .map_err(DeviceManagerError::CreateVdpa)?,
3228         ));
3229 
3230         // Create the DMA handler that is required by the vDPA device
3231         let vdpa_mapping = Arc::new(VdpaDmaMapping::new(
3232             Arc::clone(&vdpa_device),
3233             Arc::new(self.memory_manager.lock().unwrap().guest_memory()),
3234         ));
3235 
3236         self.device_tree
3237             .lock()
3238             .unwrap()
3239             .insert(id.clone(), device_node!(id, vdpa_device));
3240 
3241         Ok(MetaVirtioDevice {
3242             virtio_device: vdpa_device as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
3243             iommu: vdpa_cfg.iommu,
3244             id,
3245             pci_segment: vdpa_cfg.pci_segment,
3246             dma_handler: Some(vdpa_mapping),
3247         })
3248     }
3249 
3250     fn make_vdpa_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
3251         let mut devices = Vec::new();
3252         // Add vdpa if required
3253         let mut vdpa_devices = self.config.lock().unwrap().vdpa.clone();
3254         if let Some(vdpa_list_cfg) = &mut vdpa_devices {
3255             for vdpa_cfg in vdpa_list_cfg.iter_mut() {
3256                 devices.push(self.make_vdpa_device(vdpa_cfg)?);
3257             }
3258         }
3259         self.config.lock().unwrap().vdpa = vdpa_devices;
3260 
3261         Ok(devices)
3262     }
3263 
3264     fn next_device_name(&mut self, prefix: &str) -> DeviceManagerResult<String> {
3265         let start_id = self.device_id_cnt;
3266         loop {
3267             // Generate the temporary name.
3268             let name = format!("{}{}", prefix, self.device_id_cnt);
3269             // Increment the counter.
3270             self.device_id_cnt += Wrapping(1);
3271             // Check if the name is already in use.
3272             if !self.boot_id_list.contains(&name)
3273                 && !self.device_tree.lock().unwrap().contains_key(&name)
3274             {
3275                 return Ok(name);
3276             }
3277 
3278             if self.device_id_cnt == start_id {
3279                 // We went through a full loop and there's nothing else we can
3280                 // do.
3281                 break;
3282             }
3283         }
3284         Err(DeviceManagerError::NoAvailableDeviceName)
3285     }
3286 
3287     fn add_passthrough_device(
3288         &mut self,
3289         device_cfg: &mut DeviceConfig,
3290     ) -> DeviceManagerResult<(PciBdf, String)> {
3291         // If the passthrough device has not been created yet, it is created
3292         // here and stored in the DeviceManager structure for future needs.
3293         if self.passthrough_device.is_none() {
3294             self.passthrough_device = Some(
3295                 self.address_manager
3296                     .vm
3297                     .create_passthrough_device()
3298                     .map_err(|e| DeviceManagerError::CreatePassthroughDevice(e.into()))?,
3299             );
3300         }
3301 
3302         self.add_vfio_device(device_cfg)
3303     }
3304 
3305     fn create_vfio_container(&self) -> DeviceManagerResult<Arc<VfioContainer>> {
3306         let passthrough_device = self
3307             .passthrough_device
3308             .as_ref()
3309             .ok_or(DeviceManagerError::NoDevicePassthroughSupport)?;
3310 
3311         let dup = passthrough_device
3312             .try_clone()
3313             .map_err(DeviceManagerError::VfioCreate)?;
3314 
3315         Ok(Arc::new(
3316             VfioContainer::new(Some(Arc::new(dup))).map_err(DeviceManagerError::VfioCreate)?,
3317         ))
3318     }
3319 
3320     fn add_vfio_device(
3321         &mut self,
3322         device_cfg: &mut DeviceConfig,
3323     ) -> DeviceManagerResult<(PciBdf, String)> {
3324         let vfio_name = if let Some(id) = &device_cfg.id {
3325             id.clone()
3326         } else {
3327             let id = self.next_device_name(VFIO_DEVICE_NAME_PREFIX)?;
3328             device_cfg.id = Some(id.clone());
3329             id
3330         };
3331 
3332         let (pci_segment_id, pci_device_bdf, resources) =
3333             self.pci_resources(&vfio_name, device_cfg.pci_segment)?;
3334 
3335         let mut needs_dma_mapping = false;
3336 
3337         // Here we create a new VFIO container for two reasons. Either this is
3338         // the first VFIO device, meaning we need a new VFIO container, which
3339         // will be shared with other VFIO devices. Or the new VFIO device is
3340         // attached to a vIOMMU, meaning we must create a dedicated VFIO
3341         // container. In the vIOMMU use case, we can't let all devices under
3342         // the same VFIO container since we couldn't map/unmap memory for each
3343         // device. That's simply because the map/unmap operations happen at the
3344         // VFIO container level.
3345         let vfio_container = if device_cfg.iommu {
3346             let vfio_container = self.create_vfio_container()?;
3347 
3348             let vfio_mapping = Arc::new(VfioDmaMapping::new(
3349                 Arc::clone(&vfio_container),
3350                 Arc::new(self.memory_manager.lock().unwrap().guest_memory()),
3351                 Arc::clone(&self.mmio_regions),
3352             ));
3353 
3354             if let Some(iommu) = &self.iommu_device {
3355                 iommu
3356                     .lock()
3357                     .unwrap()
3358                     .add_external_mapping(pci_device_bdf.into(), vfio_mapping);
3359             } else {
3360                 return Err(DeviceManagerError::MissingVirtualIommu);
3361             }
3362 
3363             vfio_container
3364         } else if let Some(vfio_container) = &self.vfio_container {
3365             Arc::clone(vfio_container)
3366         } else {
3367             let vfio_container = self.create_vfio_container()?;
3368             needs_dma_mapping = true;
3369             self.vfio_container = Some(Arc::clone(&vfio_container));
3370 
3371             vfio_container
3372         };
3373 
3374         let vfio_device = VfioDevice::new(&device_cfg.path, Arc::clone(&vfio_container))
3375             .map_err(DeviceManagerError::VfioCreate)?;
3376 
3377         if needs_dma_mapping {
3378             // Register DMA mapping in IOMMU.
3379             // Do not register virtio-mem regions, as they are handled directly by
3380             // virtio-mem device itself.
3381             for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() {
3382                 for region in zone.regions() {
3383                     vfio_container
3384                         .vfio_dma_map(
3385                             region.start_addr().raw_value(),
3386                             region.len(),
3387                             region.as_ptr() as u64,
3388                         )
3389                         .map_err(DeviceManagerError::VfioDmaMap)?;
3390                 }
3391             }
3392 
3393             let vfio_mapping = Arc::new(VfioDmaMapping::new(
3394                 Arc::clone(&vfio_container),
3395                 Arc::new(self.memory_manager.lock().unwrap().guest_memory()),
3396                 Arc::clone(&self.mmio_regions),
3397             ));
3398 
3399             for virtio_mem_device in self.virtio_mem_devices.iter() {
3400                 virtio_mem_device
3401                     .lock()
3402                     .unwrap()
3403                     .add_dma_mapping_handler(
3404                         VirtioMemMappingSource::Container,
3405                         vfio_mapping.clone(),
3406                     )
3407                     .map_err(DeviceManagerError::AddDmaMappingHandlerVirtioMem)?;
3408             }
3409         }
3410 
3411         let legacy_interrupt_group =
3412             if let Some(legacy_interrupt_manager) = &self.legacy_interrupt_manager {
3413                 Some(
3414                     legacy_interrupt_manager
3415                         .create_group(LegacyIrqGroupConfig {
3416                             irq: self.pci_segments[pci_segment_id as usize].pci_irq_slots
3417                                 [pci_device_bdf.device() as usize]
3418                                 as InterruptIndex,
3419                         })
3420                         .map_err(DeviceManagerError::CreateInterruptGroup)?,
3421                 )
3422             } else {
3423                 None
3424             };
3425 
3426         let memory_manager = self.memory_manager.clone();
3427 
3428         let vfio_pci_device = VfioPciDevice::new(
3429             vfio_name.clone(),
3430             &self.address_manager.vm,
3431             vfio_device,
3432             vfio_container,
3433             self.msi_interrupt_manager.clone(),
3434             legacy_interrupt_group,
3435             device_cfg.iommu,
3436             pci_device_bdf,
3437             Arc::new(move || memory_manager.lock().unwrap().allocate_memory_slot()),
3438             vm_migration::snapshot_from_id(self.snapshot.as_ref(), vfio_name.as_str()),
3439             device_cfg.x_nv_gpudirect_clique,
3440         )
3441         .map_err(DeviceManagerError::VfioPciCreate)?;
3442 
3443         let vfio_pci_device = Arc::new(Mutex::new(vfio_pci_device));
3444 
3445         let new_resources = self.add_pci_device(
3446             vfio_pci_device.clone(),
3447             vfio_pci_device.clone(),
3448             pci_segment_id,
3449             pci_device_bdf,
3450             resources,
3451         )?;
3452 
3453         vfio_pci_device
3454             .lock()
3455             .unwrap()
3456             .map_mmio_regions()
3457             .map_err(DeviceManagerError::VfioMapRegion)?;
3458 
3459         for mmio_region in vfio_pci_device.lock().unwrap().mmio_regions() {
3460             self.mmio_regions.lock().unwrap().push(mmio_region);
3461         }
3462 
3463         let mut node = device_node!(vfio_name, vfio_pci_device);
3464 
3465         // Update the device tree with correct resource information.
3466         node.resources = new_resources;
3467         node.pci_bdf = Some(pci_device_bdf);
3468         node.pci_device_handle = Some(PciDeviceHandle::Vfio(vfio_pci_device));
3469 
3470         self.device_tree
3471             .lock()
3472             .unwrap()
3473             .insert(vfio_name.clone(), node);
3474 
3475         Ok((pci_device_bdf, vfio_name))
3476     }
3477 
3478     fn add_pci_device(
3479         &mut self,
3480         bus_device: Arc<dyn BusDeviceSync>,
3481         pci_device: Arc<Mutex<dyn PciDevice>>,
3482         segment_id: u16,
3483         bdf: PciBdf,
3484         resources: Option<Vec<Resource>>,
3485     ) -> DeviceManagerResult<Vec<Resource>> {
3486         let bars = pci_device
3487             .lock()
3488             .unwrap()
3489             .allocate_bars(
3490                 &self.address_manager.allocator,
3491                 &mut self.pci_segments[segment_id as usize]
3492                     .mem32_allocator
3493                     .lock()
3494                     .unwrap(),
3495                 &mut self.pci_segments[segment_id as usize]
3496                     .mem64_allocator
3497                     .lock()
3498                     .unwrap(),
3499                 resources,
3500             )
3501             .map_err(DeviceManagerError::AllocateBars)?;
3502 
3503         let mut pci_bus = self.pci_segments[segment_id as usize]
3504             .pci_bus
3505             .lock()
3506             .unwrap();
3507 
3508         pci_bus
3509             .add_device(bdf.device() as u32, pci_device)
3510             .map_err(DeviceManagerError::AddPciDevice)?;
3511 
3512         self.bus_devices.push(Arc::clone(&bus_device));
3513 
3514         pci_bus
3515             .register_mapping(
3516                 bus_device,
3517                 #[cfg(target_arch = "x86_64")]
3518                 self.address_manager.io_bus.as_ref(),
3519                 self.address_manager.mmio_bus.as_ref(),
3520                 bars.clone(),
3521             )
3522             .map_err(DeviceManagerError::AddPciDevice)?;
3523 
3524         let mut new_resources = Vec::new();
3525         for bar in bars {
3526             new_resources.push(Resource::PciBar {
3527                 index: bar.idx(),
3528                 base: bar.addr(),
3529                 size: bar.size(),
3530                 type_: bar.region_type().into(),
3531                 prefetchable: bar.prefetchable().into(),
3532             });
3533         }
3534 
3535         Ok(new_resources)
3536     }
3537 
3538     fn add_vfio_devices(&mut self) -> DeviceManagerResult<Vec<PciBdf>> {
3539         let mut iommu_attached_device_ids = Vec::new();
3540         let mut devices = self.config.lock().unwrap().devices.clone();
3541 
3542         if let Some(device_list_cfg) = &mut devices {
3543             for device_cfg in device_list_cfg.iter_mut() {
3544                 let (device_id, _) = self.add_passthrough_device(device_cfg)?;
3545                 if device_cfg.iommu && self.iommu_device.is_some() {
3546                     iommu_attached_device_ids.push(device_id);
3547                 }
3548             }
3549         }
3550 
3551         // Update the list of devices
3552         self.config.lock().unwrap().devices = devices;
3553 
3554         Ok(iommu_attached_device_ids)
3555     }
3556 
3557     fn add_vfio_user_device(
3558         &mut self,
3559         device_cfg: &mut UserDeviceConfig,
3560     ) -> DeviceManagerResult<(PciBdf, String)> {
3561         let vfio_user_name = if let Some(id) = &device_cfg.id {
3562             id.clone()
3563         } else {
3564             let id = self.next_device_name(VFIO_USER_DEVICE_NAME_PREFIX)?;
3565             device_cfg.id = Some(id.clone());
3566             id
3567         };
3568 
3569         let (pci_segment_id, pci_device_bdf, resources) =
3570             self.pci_resources(&vfio_user_name, device_cfg.pci_segment)?;
3571 
3572         let legacy_interrupt_group =
3573             if let Some(legacy_interrupt_manager) = &self.legacy_interrupt_manager {
3574                 Some(
3575                     legacy_interrupt_manager
3576                         .create_group(LegacyIrqGroupConfig {
3577                             irq: self.pci_segments[pci_segment_id as usize].pci_irq_slots
3578                                 [pci_device_bdf.device() as usize]
3579                                 as InterruptIndex,
3580                         })
3581                         .map_err(DeviceManagerError::CreateInterruptGroup)?,
3582                 )
3583             } else {
3584                 None
3585             };
3586 
3587         let client = Arc::new(Mutex::new(
3588             vfio_user::Client::new(&device_cfg.socket)
3589                 .map_err(DeviceManagerError::VfioUserCreateClient)?,
3590         ));
3591 
3592         let memory_manager = self.memory_manager.clone();
3593 
3594         let mut vfio_user_pci_device = VfioUserPciDevice::new(
3595             vfio_user_name.clone(),
3596             &self.address_manager.vm,
3597             client.clone(),
3598             self.msi_interrupt_manager.clone(),
3599             legacy_interrupt_group,
3600             pci_device_bdf,
3601             Arc::new(move || memory_manager.lock().unwrap().allocate_memory_slot()),
3602             vm_migration::snapshot_from_id(self.snapshot.as_ref(), vfio_user_name.as_str()),
3603         )
3604         .map_err(DeviceManagerError::VfioUserCreate)?;
3605 
3606         let memory = self.memory_manager.lock().unwrap().guest_memory();
3607         let vfio_user_mapping = Arc::new(VfioUserDmaMapping::new(client, Arc::new(memory)));
3608         for virtio_mem_device in self.virtio_mem_devices.iter() {
3609             virtio_mem_device
3610                 .lock()
3611                 .unwrap()
3612                 .add_dma_mapping_handler(
3613                     VirtioMemMappingSource::Device(pci_device_bdf.into()),
3614                     vfio_user_mapping.clone(),
3615                 )
3616                 .map_err(DeviceManagerError::AddDmaMappingHandlerVirtioMem)?;
3617         }
3618 
3619         for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() {
3620             for region in zone.regions() {
3621                 vfio_user_pci_device
3622                     .dma_map(region)
3623                     .map_err(DeviceManagerError::VfioUserDmaMap)?;
3624             }
3625         }
3626 
3627         let vfio_user_pci_device = Arc::new(Mutex::new(vfio_user_pci_device));
3628 
3629         let new_resources = self.add_pci_device(
3630             vfio_user_pci_device.clone(),
3631             vfio_user_pci_device.clone(),
3632             pci_segment_id,
3633             pci_device_bdf,
3634             resources,
3635         )?;
3636 
3637         // Note it is required to call 'add_pci_device()' in advance to have the list of
3638         // mmio regions provisioned correctly
3639         vfio_user_pci_device
3640             .lock()
3641             .unwrap()
3642             .map_mmio_regions()
3643             .map_err(DeviceManagerError::VfioUserMapRegion)?;
3644 
3645         let mut node = device_node!(vfio_user_name, vfio_user_pci_device);
3646 
3647         // Update the device tree with correct resource information.
3648         node.resources = new_resources;
3649         node.pci_bdf = Some(pci_device_bdf);
3650         node.pci_device_handle = Some(PciDeviceHandle::VfioUser(vfio_user_pci_device));
3651 
3652         self.device_tree
3653             .lock()
3654             .unwrap()
3655             .insert(vfio_user_name.clone(), node);
3656 
3657         Ok((pci_device_bdf, vfio_user_name))
3658     }
3659 
3660     fn add_user_devices(&mut self) -> DeviceManagerResult<Vec<PciBdf>> {
3661         let mut user_devices = self.config.lock().unwrap().user_devices.clone();
3662 
3663         if let Some(device_list_cfg) = &mut user_devices {
3664             for device_cfg in device_list_cfg.iter_mut() {
3665                 let (_device_id, _id) = self.add_vfio_user_device(device_cfg)?;
3666             }
3667         }
3668 
3669         // Update the list of devices
3670         self.config.lock().unwrap().user_devices = user_devices;
3671 
3672         Ok(vec![])
3673     }
3674 
3675     fn add_virtio_pci_device(
3676         &mut self,
3677         virtio_device: Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
3678         iommu_mapping: &Option<Arc<IommuMapping>>,
3679         virtio_device_id: String,
3680         pci_segment_id: u16,
3681         dma_handler: Option<Arc<dyn ExternalDmaMapping>>,
3682     ) -> DeviceManagerResult<PciBdf> {
3683         let id = format!("{VIRTIO_PCI_DEVICE_NAME_PREFIX}-{virtio_device_id}");
3684 
3685         // Add the new virtio-pci node to the device tree.
3686         let mut node = device_node!(id);
3687         node.children = vec![virtio_device_id.clone()];
3688 
3689         let (pci_segment_id, pci_device_bdf, resources) =
3690             self.pci_resources(&id, pci_segment_id)?;
3691 
3692         // Update the existing virtio node by setting the parent.
3693         if let Some(node) = self.device_tree.lock().unwrap().get_mut(&virtio_device_id) {
3694             node.parent = Some(id.clone());
3695         } else {
3696             return Err(DeviceManagerError::MissingNode);
3697         }
3698 
3699         // Allows support for one MSI-X vector per queue. It also adds 1
3700         // as we need to take into account the dedicated vector to notify
3701         // about a virtio config change.
3702         let msix_num = (virtio_device.lock().unwrap().queue_max_sizes().len() + 1) as u16;
3703 
3704         // Create the AccessPlatform trait from the implementation IommuMapping.
3705         // This will provide address translation for any virtio device sitting
3706         // behind a vIOMMU.
3707         let mut access_platform: Option<Arc<dyn AccessPlatform>> = None;
3708 
3709         if let Some(mapping) = iommu_mapping {
3710             access_platform = Some(Arc::new(AccessPlatformMapping::new(
3711                 pci_device_bdf.into(),
3712                 mapping.clone(),
3713             )));
3714         }
3715 
3716         // If SEV-SNP is enabled create the AccessPlatform from SevSnpPageAccessProxy
3717         #[cfg(feature = "sev_snp")]
3718         if self.config.lock().unwrap().is_sev_snp_enabled() {
3719             access_platform = Some(Arc::new(SevSnpPageAccessProxy::new(
3720                 self.address_manager.vm.clone(),
3721             )));
3722         }
3723 
3724         let memory = self.memory_manager.lock().unwrap().guest_memory();
3725 
3726         // Map DMA ranges if a DMA handler is available and if the device is
3727         // not attached to a virtual IOMMU.
3728         if let Some(dma_handler) = &dma_handler {
3729             if iommu_mapping.is_some() {
3730                 if let Some(iommu) = &self.iommu_device {
3731                     iommu
3732                         .lock()
3733                         .unwrap()
3734                         .add_external_mapping(pci_device_bdf.into(), dma_handler.clone());
3735                 } else {
3736                     return Err(DeviceManagerError::MissingVirtualIommu);
3737                 }
3738             } else {
3739                 // Let every virtio-mem device handle the DMA map/unmap through the
3740                 // DMA handler provided.
3741                 for virtio_mem_device in self.virtio_mem_devices.iter() {
3742                     virtio_mem_device
3743                         .lock()
3744                         .unwrap()
3745                         .add_dma_mapping_handler(
3746                             VirtioMemMappingSource::Device(pci_device_bdf.into()),
3747                             dma_handler.clone(),
3748                         )
3749                         .map_err(DeviceManagerError::AddDmaMappingHandlerVirtioMem)?;
3750                 }
3751 
3752                 // Do not register virtio-mem regions, as they are handled directly by
3753                 // virtio-mem devices.
3754                 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() {
3755                     for region in zone.regions() {
3756                         let gpa = region.start_addr().0;
3757                         let size = region.len();
3758                         dma_handler
3759                             .map(gpa, gpa, size)
3760                             .map_err(DeviceManagerError::VirtioDmaMap)?;
3761                     }
3762                 }
3763             }
3764         }
3765 
3766         let device_type = virtio_device.lock().unwrap().device_type();
3767         let virtio_pci_device = Arc::new(Mutex::new(
3768             VirtioPciDevice::new(
3769                 id.clone(),
3770                 memory,
3771                 virtio_device,
3772                 msix_num,
3773                 access_platform,
3774                 &self.msi_interrupt_manager,
3775                 pci_device_bdf.into(),
3776                 self.activate_evt
3777                     .try_clone()
3778                     .map_err(DeviceManagerError::EventFd)?,
3779                 // All device types *except* virtio block devices should be allocated a 64-bit bar
3780                 // The block devices should be given a 32-bit BAR so that they are easily accessible
3781                 // to firmware without requiring excessive identity mapping.
3782                 // The exception being if not on the default PCI segment.
3783                 pci_segment_id > 0 || device_type != VirtioDeviceType::Block as u32,
3784                 dma_handler,
3785                 self.pending_activations.clone(),
3786                 vm_migration::snapshot_from_id(self.snapshot.as_ref(), id.as_str()),
3787             )
3788             .map_err(DeviceManagerError::VirtioDevice)?,
3789         ));
3790 
3791         let new_resources = self.add_pci_device(
3792             virtio_pci_device.clone(),
3793             virtio_pci_device.clone(),
3794             pci_segment_id,
3795             pci_device_bdf,
3796             resources,
3797         )?;
3798 
3799         let bar_addr = virtio_pci_device.lock().unwrap().config_bar_addr();
3800         for (event, addr) in virtio_pci_device.lock().unwrap().ioeventfds(bar_addr) {
3801             let io_addr = IoEventAddress::Mmio(addr);
3802             self.address_manager
3803                 .vm
3804                 .register_ioevent(event, &io_addr, None)
3805                 .map_err(|e| DeviceManagerError::RegisterIoevent(e.into()))?;
3806         }
3807 
3808         // Update the device tree with correct resource information.
3809         node.resources = new_resources;
3810         node.migratable = Some(Arc::clone(&virtio_pci_device) as Arc<Mutex<dyn Migratable>>);
3811         node.pci_bdf = Some(pci_device_bdf);
3812         node.pci_device_handle = Some(PciDeviceHandle::Virtio(virtio_pci_device));
3813         self.device_tree.lock().unwrap().insert(id, node);
3814 
3815         Ok(pci_device_bdf)
3816     }
3817 
3818     fn add_pvpanic_device(
3819         &mut self,
3820     ) -> DeviceManagerResult<Option<Arc<Mutex<devices::PvPanicDevice>>>> {
3821         let id = String::from(PVPANIC_DEVICE_NAME);
3822         let pci_segment_id = 0x0_u16;
3823 
3824         info!("Creating pvpanic device {}", id);
3825 
3826         let (pci_segment_id, pci_device_bdf, resources) =
3827             self.pci_resources(&id, pci_segment_id)?;
3828 
3829         let snapshot = snapshot_from_id(self.snapshot.as_ref(), id.as_str());
3830 
3831         let pvpanic_device = devices::PvPanicDevice::new(id.clone(), snapshot)
3832             .map_err(DeviceManagerError::PvPanicCreate)?;
3833 
3834         let pvpanic_device = Arc::new(Mutex::new(pvpanic_device));
3835 
3836         let new_resources = self.add_pci_device(
3837             pvpanic_device.clone(),
3838             pvpanic_device.clone(),
3839             pci_segment_id,
3840             pci_device_bdf,
3841             resources,
3842         )?;
3843 
3844         let mut node = device_node!(id, pvpanic_device);
3845 
3846         node.resources = new_resources;
3847         node.pci_bdf = Some(pci_device_bdf);
3848         node.pci_device_handle = None;
3849 
3850         self.device_tree.lock().unwrap().insert(id, node);
3851 
3852         Ok(Some(pvpanic_device))
3853     }
3854 
3855     fn pci_resources(
3856         &self,
3857         id: &str,
3858         pci_segment_id: u16,
3859     ) -> DeviceManagerResult<(u16, PciBdf, Option<Vec<Resource>>)> {
3860         // Look for the id in the device tree. If it can be found, that means
3861         // the device is being restored, otherwise it's created from scratch.
3862         Ok(
3863             if let Some(node) = self.device_tree.lock().unwrap().get(id) {
3864                 info!("Restoring virtio-pci {} resources", id);
3865                 let pci_device_bdf: PciBdf = node
3866                     .pci_bdf
3867                     .ok_or(DeviceManagerError::MissingDeviceNodePciBdf)?;
3868                 let pci_segment_id = pci_device_bdf.segment();
3869 
3870                 self.pci_segments[pci_segment_id as usize]
3871                     .pci_bus
3872                     .lock()
3873                     .unwrap()
3874                     .get_device_id(pci_device_bdf.device() as usize)
3875                     .map_err(DeviceManagerError::GetPciDeviceId)?;
3876 
3877                 (pci_segment_id, pci_device_bdf, Some(node.resources.clone()))
3878             } else {
3879                 let pci_device_bdf =
3880                     self.pci_segments[pci_segment_id as usize].next_device_bdf()?;
3881 
3882                 (pci_segment_id, pci_device_bdf, None)
3883             },
3884         )
3885     }
3886 
3887     #[cfg(target_arch = "x86_64")]
3888     pub fn io_bus(&self) -> &Arc<Bus> {
3889         &self.address_manager.io_bus
3890     }
3891 
3892     pub fn mmio_bus(&self) -> &Arc<Bus> {
3893         &self.address_manager.mmio_bus
3894     }
3895 
3896     pub fn allocator(&self) -> &Arc<Mutex<SystemAllocator>> {
3897         &self.address_manager.allocator
3898     }
3899 
3900     pub fn interrupt_controller(&self) -> Option<Arc<Mutex<dyn InterruptController>>> {
3901         self.interrupt_controller
3902             .as_ref()
3903             .map(|ic| ic.clone() as Arc<Mutex<dyn InterruptController>>)
3904     }
3905 
3906     pub(crate) fn pci_segments(&self) -> &Vec<PciSegment> {
3907         &self.pci_segments
3908     }
3909 
3910     #[cfg(target_arch = "aarch64")]
3911     pub fn cmdline_additions(&self) -> &[String] {
3912         self.cmdline_additions.as_slice()
3913     }
3914 
3915     pub fn update_memory(&self, new_region: &Arc<GuestRegionMmap>) -> DeviceManagerResult<()> {
3916         for handle in self.virtio_devices.iter() {
3917             handle
3918                 .virtio_device
3919                 .lock()
3920                 .unwrap()
3921                 .add_memory_region(new_region)
3922                 .map_err(DeviceManagerError::UpdateMemoryForVirtioDevice)?;
3923 
3924             if let Some(dma_handler) = &handle.dma_handler {
3925                 if !handle.iommu {
3926                     let gpa = new_region.start_addr().0;
3927                     let size = new_region.len();
3928                     dma_handler
3929                         .map(gpa, gpa, size)
3930                         .map_err(DeviceManagerError::VirtioDmaMap)?;
3931                 }
3932             }
3933         }
3934 
3935         // Take care of updating the memory for VFIO PCI devices.
3936         if let Some(vfio_container) = &self.vfio_container {
3937             vfio_container
3938                 .vfio_dma_map(
3939                     new_region.start_addr().raw_value(),
3940                     new_region.len(),
3941                     new_region.as_ptr() as u64,
3942                 )
3943                 .map_err(DeviceManagerError::UpdateMemoryForVfioPciDevice)?;
3944         }
3945 
3946         // Take care of updating the memory for vfio-user devices.
3947         {
3948             let device_tree = self.device_tree.lock().unwrap();
3949             for pci_device_node in device_tree.pci_devices() {
3950                 if let PciDeviceHandle::VfioUser(vfio_user_pci_device) = pci_device_node
3951                     .pci_device_handle
3952                     .as_ref()
3953                     .ok_or(DeviceManagerError::MissingPciDevice)?
3954                 {
3955                     vfio_user_pci_device
3956                         .lock()
3957                         .unwrap()
3958                         .dma_map(new_region)
3959                         .map_err(DeviceManagerError::UpdateMemoryForVfioUserPciDevice)?;
3960                 }
3961             }
3962         }
3963 
3964         Ok(())
3965     }
3966 
3967     pub fn activate_virtio_devices(&self) -> DeviceManagerResult<()> {
3968         for mut activator in self.pending_activations.lock().unwrap().drain(..) {
3969             activator
3970                 .activate()
3971                 .map_err(DeviceManagerError::VirtioActivate)?;
3972         }
3973         Ok(())
3974     }
3975 
3976     pub fn notify_hotplug(
3977         &self,
3978         _notification_type: AcpiNotificationFlags,
3979     ) -> DeviceManagerResult<()> {
3980         return self
3981             .ged_notification_device
3982             .as_ref()
3983             .unwrap()
3984             .lock()
3985             .unwrap()
3986             .notify(_notification_type)
3987             .map_err(DeviceManagerError::HotPlugNotification);
3988     }
3989 
3990     pub fn add_device(
3991         &mut self,
3992         device_cfg: &mut DeviceConfig,
3993     ) -> DeviceManagerResult<PciDeviceInfo> {
3994         self.validate_identifier(&device_cfg.id)?;
3995 
3996         if device_cfg.iommu && !self.is_iommu_segment(device_cfg.pci_segment) {
3997             return Err(DeviceManagerError::InvalidIommuHotplug);
3998         }
3999 
4000         let (bdf, device_name) = self.add_passthrough_device(device_cfg)?;
4001 
4002         // Update the PCIU bitmap
4003         self.pci_segments[device_cfg.pci_segment as usize].pci_devices_up |= 1 << bdf.device();
4004 
4005         Ok(PciDeviceInfo {
4006             id: device_name,
4007             bdf,
4008         })
4009     }
4010 
4011     pub fn add_user_device(
4012         &mut self,
4013         device_cfg: &mut UserDeviceConfig,
4014     ) -> DeviceManagerResult<PciDeviceInfo> {
4015         self.validate_identifier(&device_cfg.id)?;
4016 
4017         let (bdf, device_name) = self.add_vfio_user_device(device_cfg)?;
4018 
4019         // Update the PCIU bitmap
4020         self.pci_segments[device_cfg.pci_segment as usize].pci_devices_up |= 1 << bdf.device();
4021 
4022         Ok(PciDeviceInfo {
4023             id: device_name,
4024             bdf,
4025         })
4026     }
4027 
4028     pub fn remove_device(&mut self, id: String) -> DeviceManagerResult<()> {
4029         // The node can be directly a PCI node in case the 'id' refers to a
4030         // VFIO device or a virtio-pci one.
4031         // In case the 'id' refers to a virtio device, we must find the PCI
4032         // node by looking at the parent.
4033         let device_tree = self.device_tree.lock().unwrap();
4034         let node = device_tree
4035             .get(&id)
4036             .ok_or(DeviceManagerError::UnknownDeviceId(id))?;
4037 
4038         let pci_device_node = if node.pci_bdf.is_some() && node.pci_device_handle.is_some() {
4039             node
4040         } else {
4041             let parent = node
4042                 .parent
4043                 .as_ref()
4044                 .ok_or(DeviceManagerError::MissingNode)?;
4045             device_tree
4046                 .get(parent)
4047                 .ok_or(DeviceManagerError::MissingNode)?
4048         };
4049 
4050         let pci_device_bdf: PciBdf = pci_device_node
4051             .pci_bdf
4052             .ok_or(DeviceManagerError::MissingDeviceNodePciBdf)?;
4053         let pci_segment_id = pci_device_bdf.segment();
4054 
4055         let pci_device_handle = pci_device_node
4056             .pci_device_handle
4057             .as_ref()
4058             .ok_or(DeviceManagerError::MissingPciDevice)?;
4059         #[allow(irrefutable_let_patterns)]
4060         if let PciDeviceHandle::Virtio(virtio_pci_device) = pci_device_handle {
4061             let device_type = VirtioDeviceType::from(
4062                 virtio_pci_device
4063                     .lock()
4064                     .unwrap()
4065                     .virtio_device()
4066                     .lock()
4067                     .unwrap()
4068                     .device_type(),
4069             );
4070             match device_type {
4071                 VirtioDeviceType::Net
4072                 | VirtioDeviceType::Block
4073                 | VirtioDeviceType::Pmem
4074                 | VirtioDeviceType::Fs
4075                 | VirtioDeviceType::Vsock => {}
4076                 _ => return Err(DeviceManagerError::RemovalNotAllowed(device_type)),
4077             }
4078         }
4079 
4080         // Update the PCID bitmap
4081         self.pci_segments[pci_segment_id as usize].pci_devices_down |= 1 << pci_device_bdf.device();
4082 
4083         Ok(())
4084     }
4085 
4086     pub fn eject_device(&mut self, pci_segment_id: u16, device_id: u8) -> DeviceManagerResult<()> {
4087         info!(
4088             "Ejecting device_id = {} on segment_id={}",
4089             device_id, pci_segment_id
4090         );
4091 
4092         // Convert the device ID into the corresponding b/d/f.
4093         let pci_device_bdf = PciBdf::new(pci_segment_id, 0, device_id, 0);
4094 
4095         // Give the PCI device ID back to the PCI bus.
4096         self.pci_segments[pci_segment_id as usize]
4097             .pci_bus
4098             .lock()
4099             .unwrap()
4100             .put_device_id(device_id as usize)
4101             .map_err(DeviceManagerError::PutPciDeviceId)?;
4102 
4103         // Remove the device from the device tree along with its children.
4104         let mut device_tree = self.device_tree.lock().unwrap();
4105         let pci_device_node = device_tree
4106             .remove_node_by_pci_bdf(pci_device_bdf)
4107             .ok_or(DeviceManagerError::MissingPciDevice)?;
4108 
4109         // For VFIO and vfio-user the PCI device id is the id.
4110         // For virtio we overwrite it later as we want the id of the
4111         // underlying device.
4112         let mut id = pci_device_node.id;
4113         let pci_device_handle = pci_device_node
4114             .pci_device_handle
4115             .ok_or(DeviceManagerError::MissingPciDevice)?;
4116         if matches!(pci_device_handle, PciDeviceHandle::Virtio(_)) {
4117             // The virtio-pci device has a single child
4118             if !pci_device_node.children.is_empty() {
4119                 assert_eq!(pci_device_node.children.len(), 1);
4120                 let child_id = &pci_device_node.children[0];
4121                 id.clone_from(child_id);
4122             }
4123         }
4124         for child in pci_device_node.children.iter() {
4125             device_tree.remove(child);
4126         }
4127 
4128         let mut iommu_attached = false;
4129         if let Some((_, iommu_attached_devices)) = &self.iommu_attached_devices {
4130             if iommu_attached_devices.contains(&pci_device_bdf) {
4131                 iommu_attached = true;
4132             }
4133         }
4134 
4135         let (pci_device, bus_device, virtio_device, remove_dma_handler) = match pci_device_handle {
4136             // No need to remove any virtio-mem mapping here as the container outlives all devices
4137             PciDeviceHandle::Vfio(vfio_pci_device) => {
4138                 for mmio_region in vfio_pci_device.lock().unwrap().mmio_regions() {
4139                     self.mmio_regions
4140                         .lock()
4141                         .unwrap()
4142                         .retain(|x| x.start != mmio_region.start)
4143                 }
4144 
4145                 (
4146                     Arc::clone(&vfio_pci_device) as Arc<Mutex<dyn PciDevice>>,
4147                     Arc::clone(&vfio_pci_device) as Arc<dyn BusDeviceSync>,
4148                     None as Option<Arc<Mutex<dyn virtio_devices::VirtioDevice>>>,
4149                     false,
4150                 )
4151             }
4152             PciDeviceHandle::Virtio(virtio_pci_device) => {
4153                 let dev = virtio_pci_device.lock().unwrap();
4154                 let bar_addr = dev.config_bar_addr();
4155                 for (event, addr) in dev.ioeventfds(bar_addr) {
4156                     let io_addr = IoEventAddress::Mmio(addr);
4157                     self.address_manager
4158                         .vm
4159                         .unregister_ioevent(event, &io_addr)
4160                         .map_err(|e| DeviceManagerError::UnRegisterIoevent(e.into()))?;
4161                 }
4162 
4163                 if let Some(dma_handler) = dev.dma_handler() {
4164                     if !iommu_attached {
4165                         for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() {
4166                             for region in zone.regions() {
4167                                 let iova = region.start_addr().0;
4168                                 let size = region.len();
4169                                 dma_handler
4170                                     .unmap(iova, size)
4171                                     .map_err(DeviceManagerError::VirtioDmaUnmap)?;
4172                             }
4173                         }
4174                     }
4175                 }
4176 
4177                 (
4178                     Arc::clone(&virtio_pci_device) as Arc<Mutex<dyn PciDevice>>,
4179                     Arc::clone(&virtio_pci_device) as Arc<dyn BusDeviceSync>,
4180                     Some(dev.virtio_device()),
4181                     dev.dma_handler().is_some() && !iommu_attached,
4182                 )
4183             }
4184             PciDeviceHandle::VfioUser(vfio_user_pci_device) => {
4185                 let mut dev = vfio_user_pci_device.lock().unwrap();
4186                 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() {
4187                     for region in zone.regions() {
4188                         dev.dma_unmap(region)
4189                             .map_err(DeviceManagerError::VfioUserDmaUnmap)?;
4190                     }
4191                 }
4192 
4193                 (
4194                     Arc::clone(&vfio_user_pci_device) as Arc<Mutex<dyn PciDevice>>,
4195                     Arc::clone(&vfio_user_pci_device) as Arc<dyn BusDeviceSync>,
4196                     None as Option<Arc<Mutex<dyn virtio_devices::VirtioDevice>>>,
4197                     true,
4198                 )
4199             }
4200         };
4201 
4202         if remove_dma_handler {
4203             for virtio_mem_device in self.virtio_mem_devices.iter() {
4204                 virtio_mem_device
4205                     .lock()
4206                     .unwrap()
4207                     .remove_dma_mapping_handler(VirtioMemMappingSource::Device(
4208                         pci_device_bdf.into(),
4209                     ))
4210                     .map_err(DeviceManagerError::RemoveDmaMappingHandlerVirtioMem)?;
4211             }
4212         }
4213 
4214         // Free the allocated BARs
4215         pci_device
4216             .lock()
4217             .unwrap()
4218             .free_bars(
4219                 &mut self.address_manager.allocator.lock().unwrap(),
4220                 &mut self.pci_segments[pci_segment_id as usize]
4221                     .mem32_allocator
4222                     .lock()
4223                     .unwrap(),
4224                 &mut self.pci_segments[pci_segment_id as usize]
4225                     .mem64_allocator
4226                     .lock()
4227                     .unwrap(),
4228             )
4229             .map_err(DeviceManagerError::FreePciBars)?;
4230 
4231         // Remove the device from the PCI bus
4232         self.pci_segments[pci_segment_id as usize]
4233             .pci_bus
4234             .lock()
4235             .unwrap()
4236             .remove_by_device(&pci_device)
4237             .map_err(DeviceManagerError::RemoveDeviceFromPciBus)?;
4238 
4239         #[cfg(target_arch = "x86_64")]
4240         // Remove the device from the IO bus
4241         self.io_bus()
4242             .remove_by_device(&bus_device)
4243             .map_err(DeviceManagerError::RemoveDeviceFromIoBus)?;
4244 
4245         // Remove the device from the MMIO bus
4246         self.mmio_bus()
4247             .remove_by_device(&bus_device)
4248             .map_err(DeviceManagerError::RemoveDeviceFromMmioBus)?;
4249 
4250         // Remove the device from the list of BusDevice held by the
4251         // DeviceManager.
4252         self.bus_devices
4253             .retain(|dev| !Arc::ptr_eq(dev, &bus_device));
4254 
4255         // Shutdown and remove the underlying virtio-device if present
4256         if let Some(virtio_device) = virtio_device {
4257             for mapping in virtio_device.lock().unwrap().userspace_mappings() {
4258                 self.memory_manager
4259                     .lock()
4260                     .unwrap()
4261                     .remove_userspace_mapping(
4262                         mapping.addr.raw_value(),
4263                         mapping.len,
4264                         mapping.host_addr,
4265                         mapping.mergeable,
4266                         mapping.mem_slot,
4267                     )
4268                     .map_err(DeviceManagerError::MemoryManager)?;
4269             }
4270 
4271             virtio_device.lock().unwrap().shutdown();
4272 
4273             self.virtio_devices
4274                 .retain(|handler| !Arc::ptr_eq(&handler.virtio_device, &virtio_device));
4275         }
4276 
4277         event!(
4278             "vm",
4279             "device-removed",
4280             "id",
4281             &id,
4282             "bdf",
4283             pci_device_bdf.to_string()
4284         );
4285 
4286         // At this point, the device has been removed from all the list and
4287         // buses where it was stored. At the end of this function, after
4288         // any_device, bus_device and pci_device are released, the actual
4289         // device will be dropped.
4290         Ok(())
4291     }
4292 
4293     fn hotplug_virtio_pci_device(
4294         &mut self,
4295         handle: MetaVirtioDevice,
4296     ) -> DeviceManagerResult<PciDeviceInfo> {
4297         // Add the virtio device to the device manager list. This is important
4298         // as the list is used to notify virtio devices about memory updates
4299         // for instance.
4300         self.virtio_devices.push(handle.clone());
4301 
4302         let mapping: Option<Arc<IommuMapping>> = if handle.iommu {
4303             self.iommu_mapping.clone()
4304         } else {
4305             None
4306         };
4307 
4308         let bdf = self.add_virtio_pci_device(
4309             handle.virtio_device,
4310             &mapping,
4311             handle.id.clone(),
4312             handle.pci_segment,
4313             handle.dma_handler,
4314         )?;
4315 
4316         // Update the PCIU bitmap
4317         self.pci_segments[handle.pci_segment as usize].pci_devices_up |= 1 << bdf.device();
4318 
4319         Ok(PciDeviceInfo { id: handle.id, bdf })
4320     }
4321 
4322     fn is_iommu_segment(&self, pci_segment_id: u16) -> bool {
4323         self.config
4324             .lock()
4325             .as_ref()
4326             .unwrap()
4327             .platform
4328             .as_ref()
4329             .map(|pc| {
4330                 pc.iommu_segments
4331                     .as_ref()
4332                     .map(|v| v.contains(&pci_segment_id))
4333                     .unwrap_or_default()
4334             })
4335             .unwrap_or_default()
4336     }
4337 
4338     pub fn add_disk(&mut self, disk_cfg: &mut DiskConfig) -> DeviceManagerResult<PciDeviceInfo> {
4339         self.validate_identifier(&disk_cfg.id)?;
4340 
4341         if disk_cfg.iommu && !self.is_iommu_segment(disk_cfg.pci_segment) {
4342             return Err(DeviceManagerError::InvalidIommuHotplug);
4343         }
4344 
4345         let device = self.make_virtio_block_device(disk_cfg)?;
4346         self.hotplug_virtio_pci_device(device)
4347     }
4348 
4349     pub fn add_fs(&mut self, fs_cfg: &mut FsConfig) -> DeviceManagerResult<PciDeviceInfo> {
4350         self.validate_identifier(&fs_cfg.id)?;
4351 
4352         let device = self.make_virtio_fs_device(fs_cfg)?;
4353         self.hotplug_virtio_pci_device(device)
4354     }
4355 
4356     pub fn add_pmem(&mut self, pmem_cfg: &mut PmemConfig) -> DeviceManagerResult<PciDeviceInfo> {
4357         self.validate_identifier(&pmem_cfg.id)?;
4358 
4359         if pmem_cfg.iommu && !self.is_iommu_segment(pmem_cfg.pci_segment) {
4360             return Err(DeviceManagerError::InvalidIommuHotplug);
4361         }
4362 
4363         let device = self.make_virtio_pmem_device(pmem_cfg)?;
4364         self.hotplug_virtio_pci_device(device)
4365     }
4366 
4367     pub fn add_net(&mut self, net_cfg: &mut NetConfig) -> DeviceManagerResult<PciDeviceInfo> {
4368         self.validate_identifier(&net_cfg.id)?;
4369 
4370         if net_cfg.iommu && !self.is_iommu_segment(net_cfg.pci_segment) {
4371             return Err(DeviceManagerError::InvalidIommuHotplug);
4372         }
4373 
4374         let device = self.make_virtio_net_device(net_cfg)?;
4375         self.hotplug_virtio_pci_device(device)
4376     }
4377 
4378     pub fn add_vdpa(&mut self, vdpa_cfg: &mut VdpaConfig) -> DeviceManagerResult<PciDeviceInfo> {
4379         self.validate_identifier(&vdpa_cfg.id)?;
4380 
4381         if vdpa_cfg.iommu && !self.is_iommu_segment(vdpa_cfg.pci_segment) {
4382             return Err(DeviceManagerError::InvalidIommuHotplug);
4383         }
4384 
4385         let device = self.make_vdpa_device(vdpa_cfg)?;
4386         self.hotplug_virtio_pci_device(device)
4387     }
4388 
4389     pub fn add_vsock(&mut self, vsock_cfg: &mut VsockConfig) -> DeviceManagerResult<PciDeviceInfo> {
4390         self.validate_identifier(&vsock_cfg.id)?;
4391 
4392         if vsock_cfg.iommu && !self.is_iommu_segment(vsock_cfg.pci_segment) {
4393             return Err(DeviceManagerError::InvalidIommuHotplug);
4394         }
4395 
4396         let device = self.make_virtio_vsock_device(vsock_cfg)?;
4397         self.hotplug_virtio_pci_device(device)
4398     }
4399 
4400     pub fn counters(&self) -> HashMap<String, HashMap<&'static str, Wrapping<u64>>> {
4401         let mut counters = HashMap::new();
4402 
4403         for handle in &self.virtio_devices {
4404             let virtio_device = handle.virtio_device.lock().unwrap();
4405             if let Some(device_counters) = virtio_device.counters() {
4406                 counters.insert(handle.id.clone(), device_counters.clone());
4407             }
4408         }
4409 
4410         counters
4411     }
4412 
4413     pub fn resize_balloon(&mut self, size: u64) -> DeviceManagerResult<()> {
4414         if let Some(balloon) = &self.balloon {
4415             return balloon
4416                 .lock()
4417                 .unwrap()
4418                 .resize(size)
4419                 .map_err(DeviceManagerError::VirtioBalloonResize);
4420         }
4421 
4422         warn!("No balloon setup: Can't resize the balloon");
4423         Err(DeviceManagerError::MissingVirtioBalloon)
4424     }
4425 
4426     pub fn balloon_size(&self) -> u64 {
4427         if let Some(balloon) = &self.balloon {
4428             return balloon.lock().unwrap().get_actual();
4429         }
4430 
4431         0
4432     }
4433 
4434     pub fn device_tree(&self) -> Arc<Mutex<DeviceTree>> {
4435         self.device_tree.clone()
4436     }
4437 
4438     #[cfg(target_arch = "x86_64")]
4439     pub fn notify_power_button(&self) -> DeviceManagerResult<()> {
4440         self.ged_notification_device
4441             .as_ref()
4442             .unwrap()
4443             .lock()
4444             .unwrap()
4445             .notify(AcpiNotificationFlags::POWER_BUTTON_CHANGED)
4446             .map_err(DeviceManagerError::PowerButtonNotification)
4447     }
4448 
4449     #[cfg(target_arch = "aarch64")]
4450     pub fn notify_power_button(&self) -> DeviceManagerResult<()> {
4451         // There are two use cases:
4452         // 1. Users will use direct kernel boot with device tree.
4453         // 2. Users will use ACPI+UEFI boot.
4454 
4455         // Trigger a GPIO pin 3 event to satisfy use case 1.
4456         self.gpio_device
4457             .as_ref()
4458             .unwrap()
4459             .lock()
4460             .unwrap()
4461             .trigger_key(3)
4462             .map_err(DeviceManagerError::AArch64PowerButtonNotification)?;
4463         // Trigger a GED power button event to satisfy use case 2.
4464         return self
4465             .ged_notification_device
4466             .as_ref()
4467             .unwrap()
4468             .lock()
4469             .unwrap()
4470             .notify(AcpiNotificationFlags::POWER_BUTTON_CHANGED)
4471             .map_err(DeviceManagerError::PowerButtonNotification);
4472     }
4473 
4474     pub fn iommu_attached_devices(&self) -> &Option<(PciBdf, Vec<PciBdf>)> {
4475         &self.iommu_attached_devices
4476     }
4477 
4478     fn validate_identifier(&self, id: &Option<String>) -> DeviceManagerResult<()> {
4479         if let Some(id) = id {
4480             if id.starts_with("__") {
4481                 return Err(DeviceManagerError::InvalidIdentifier(id.clone()));
4482             }
4483 
4484             if self.device_tree.lock().unwrap().contains_key(id) {
4485                 return Err(DeviceManagerError::IdentifierNotUnique(id.clone()));
4486             }
4487         }
4488 
4489         Ok(())
4490     }
4491 
4492     pub(crate) fn acpi_platform_addresses(&self) -> &AcpiPlatformAddresses {
4493         &self.acpi_platform_addresses
4494     }
4495 }
4496 
4497 fn numa_node_id_from_memory_zone_id(numa_nodes: &NumaNodes, memory_zone_id: &str) -> Option<u32> {
4498     for (numa_node_id, numa_node) in numa_nodes.iter() {
4499         if numa_node.memory_zones.contains(&memory_zone_id.to_owned()) {
4500             return Some(*numa_node_id);
4501         }
4502     }
4503 
4504     None
4505 }
4506 
4507 fn numa_node_id_from_pci_segment_id(numa_nodes: &NumaNodes, pci_segment_id: u16) -> u32 {
4508     for (numa_node_id, numa_node) in numa_nodes.iter() {
4509         if numa_node.pci_segments.contains(&pci_segment_id) {
4510             return *numa_node_id;
4511         }
4512     }
4513 
4514     0
4515 }
4516 
4517 struct TpmDevice {}
4518 
4519 impl Aml for TpmDevice {
4520     fn to_aml_bytes(&self, sink: &mut dyn acpi_tables::AmlSink) {
4521         aml::Device::new(
4522             "TPM2".into(),
4523             vec![
4524                 &aml::Name::new("_HID".into(), &"MSFT0101"),
4525                 &aml::Name::new("_STA".into(), &(0xF_usize)),
4526                 &aml::Name::new(
4527                     "_CRS".into(),
4528                     &aml::ResourceTemplate::new(vec![&aml::Memory32Fixed::new(
4529                         true,
4530                         layout::TPM_START.0 as u32,
4531                         layout::TPM_SIZE as u32,
4532                     )]),
4533                 ),
4534             ],
4535         )
4536         .to_aml_bytes(sink)
4537     }
4538 }
4539 
4540 impl Aml for DeviceManager {
4541     fn to_aml_bytes(&self, sink: &mut dyn acpi_tables::AmlSink) {
4542         #[cfg(target_arch = "aarch64")]
4543         use arch::aarch64::DeviceInfoForFdt;
4544 
4545         let mut pci_scan_methods = Vec::new();
4546         for i in 0..self.pci_segments.len() {
4547             pci_scan_methods.push(aml::MethodCall::new(
4548                 format!("\\_SB_.PC{i:02X}.PCNT").as_str().into(),
4549                 vec![],
4550             ));
4551         }
4552         let mut pci_scan_inner: Vec<&dyn Aml> = Vec::new();
4553         for method in &pci_scan_methods {
4554             pci_scan_inner.push(method)
4555         }
4556 
4557         // PCI hotplug controller
4558         aml::Device::new(
4559             "_SB_.PHPR".into(),
4560             vec![
4561                 &aml::Name::new("_HID".into(), &aml::EISAName::new("PNP0A06")),
4562                 &aml::Name::new("_STA".into(), &0x0bu8),
4563                 &aml::Name::new("_UID".into(), &"PCI Hotplug Controller"),
4564                 &aml::Mutex::new("BLCK".into(), 0),
4565                 &aml::Name::new(
4566                     "_CRS".into(),
4567                     &aml::ResourceTemplate::new(vec![&aml::AddressSpace::new_memory(
4568                         aml::AddressSpaceCacheable::NotCacheable,
4569                         true,
4570                         self.acpi_address.0,
4571                         self.acpi_address.0 + DEVICE_MANAGER_ACPI_SIZE as u64 - 1,
4572                         None,
4573                     )]),
4574                 ),
4575                 // OpRegion and Fields map MMIO range into individual field values
4576                 &aml::OpRegion::new(
4577                     "PCST".into(),
4578                     aml::OpRegionSpace::SystemMemory,
4579                     &(self.acpi_address.0 as usize),
4580                     &DEVICE_MANAGER_ACPI_SIZE,
4581                 ),
4582                 &aml::Field::new(
4583                     "PCST".into(),
4584                     aml::FieldAccessType::DWord,
4585                     aml::FieldLockRule::NoLock,
4586                     aml::FieldUpdateRule::WriteAsZeroes,
4587                     vec![
4588                         aml::FieldEntry::Named(*b"PCIU", 32),
4589                         aml::FieldEntry::Named(*b"PCID", 32),
4590                         aml::FieldEntry::Named(*b"B0EJ", 32),
4591                         aml::FieldEntry::Named(*b"PSEG", 32),
4592                     ],
4593                 ),
4594                 &aml::Method::new(
4595                     "PCEJ".into(),
4596                     2,
4597                     true,
4598                     vec![
4599                         // Take lock defined above
4600                         &aml::Acquire::new("BLCK".into(), 0xffff),
4601                         // Choose the current segment
4602                         &aml::Store::new(&aml::Path::new("PSEG"), &aml::Arg(1)),
4603                         // Write PCI bus number (in first argument) to I/O port via field
4604                         &aml::ShiftLeft::new(&aml::Path::new("B0EJ"), &aml::ONE, &aml::Arg(0)),
4605                         // Release lock
4606                         &aml::Release::new("BLCK".into()),
4607                         // Return 0
4608                         &aml::Return::new(&aml::ZERO),
4609                     ],
4610                 ),
4611                 &aml::Method::new("PSCN".into(), 0, true, pci_scan_inner),
4612             ],
4613         )
4614         .to_aml_bytes(sink);
4615 
4616         for segment in &self.pci_segments {
4617             segment.to_aml_bytes(sink);
4618         }
4619 
4620         let mut mbrd_memory = Vec::new();
4621 
4622         for segment in &self.pci_segments {
4623             mbrd_memory.push(aml::Memory32Fixed::new(
4624                 true,
4625                 segment.mmio_config_address as u32,
4626                 layout::PCI_MMIO_CONFIG_SIZE_PER_SEGMENT as u32,
4627             ))
4628         }
4629 
4630         let mut mbrd_memory_refs = Vec::new();
4631         for mbrd_memory_ref in &mbrd_memory {
4632             mbrd_memory_refs.push(mbrd_memory_ref as &dyn Aml);
4633         }
4634 
4635         aml::Device::new(
4636             "_SB_.MBRD".into(),
4637             vec![
4638                 &aml::Name::new("_HID".into(), &aml::EISAName::new("PNP0C02")),
4639                 &aml::Name::new("_UID".into(), &aml::ZERO),
4640                 &aml::Name::new("_CRS".into(), &aml::ResourceTemplate::new(mbrd_memory_refs)),
4641             ],
4642         )
4643         .to_aml_bytes(sink);
4644 
4645         // Serial device
4646         #[cfg(target_arch = "x86_64")]
4647         let serial_irq = 4;
4648         #[cfg(target_arch = "aarch64")]
4649         let serial_irq =
4650             if self.config.lock().unwrap().serial.clone().mode != ConsoleOutputMode::Off {
4651                 self.get_device_info()
4652                     .clone()
4653                     .get(&(DeviceType::Serial, DeviceType::Serial.to_string()))
4654                     .unwrap()
4655                     .irq()
4656             } else {
4657                 // If serial is turned off, add a fake device with invalid irq.
4658                 31
4659             };
4660         if self.config.lock().unwrap().serial.mode != ConsoleOutputMode::Off {
4661             aml::Device::new(
4662                 "_SB_.COM1".into(),
4663                 vec![
4664                     &aml::Name::new(
4665                         "_HID".into(),
4666                         #[cfg(target_arch = "x86_64")]
4667                         &aml::EISAName::new("PNP0501"),
4668                         #[cfg(target_arch = "aarch64")]
4669                         &"ARMH0011",
4670                     ),
4671                     &aml::Name::new("_UID".into(), &aml::ZERO),
4672                     &aml::Name::new("_DDN".into(), &"COM1"),
4673                     &aml::Name::new(
4674                         "_CRS".into(),
4675                         &aml::ResourceTemplate::new(vec![
4676                             &aml::Interrupt::new(true, true, false, false, serial_irq),
4677                             #[cfg(target_arch = "x86_64")]
4678                             &aml::IO::new(0x3f8, 0x3f8, 0, 0x8),
4679                             #[cfg(target_arch = "aarch64")]
4680                             &aml::Memory32Fixed::new(
4681                                 true,
4682                                 arch::layout::LEGACY_SERIAL_MAPPED_IO_START.raw_value() as u32,
4683                                 MMIO_LEN as u32,
4684                             ),
4685                         ]),
4686                     ),
4687                 ],
4688             )
4689             .to_aml_bytes(sink);
4690         }
4691 
4692         aml::Name::new("_S5_".into(), &aml::Package::new(vec![&5u8])).to_aml_bytes(sink);
4693 
4694         aml::Device::new(
4695             "_SB_.PWRB".into(),
4696             vec![
4697                 &aml::Name::new("_HID".into(), &aml::EISAName::new("PNP0C0C")),
4698                 &aml::Name::new("_UID".into(), &aml::ZERO),
4699             ],
4700         )
4701         .to_aml_bytes(sink);
4702 
4703         if self.config.lock().unwrap().tpm.is_some() {
4704             // Add tpm device
4705             TpmDevice {}.to_aml_bytes(sink);
4706         }
4707 
4708         self.ged_notification_device
4709             .as_ref()
4710             .unwrap()
4711             .lock()
4712             .unwrap()
4713             .to_aml_bytes(sink)
4714     }
4715 }
4716 
4717 impl Pausable for DeviceManager {
4718     fn pause(&mut self) -> result::Result<(), MigratableError> {
4719         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
4720             if let Some(migratable) = &device_node.migratable {
4721                 migratable.lock().unwrap().pause()?;
4722             }
4723         }
4724         // On AArch64, the pause of device manager needs to trigger
4725         // a "pause" of GIC, which will flush the GIC pending tables
4726         // and ITS tables to guest RAM.
4727         #[cfg(target_arch = "aarch64")]
4728         {
4729             self.get_interrupt_controller()
4730                 .unwrap()
4731                 .lock()
4732                 .unwrap()
4733                 .pause()?;
4734         };
4735 
4736         Ok(())
4737     }
4738 
4739     fn resume(&mut self) -> result::Result<(), MigratableError> {
4740         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
4741             if let Some(migratable) = &device_node.migratable {
4742                 migratable.lock().unwrap().resume()?;
4743             }
4744         }
4745 
4746         Ok(())
4747     }
4748 }
4749 
4750 impl Snapshottable for DeviceManager {
4751     fn id(&self) -> String {
4752         DEVICE_MANAGER_SNAPSHOT_ID.to_string()
4753     }
4754 
4755     fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> {
4756         let mut snapshot = Snapshot::from_data(SnapshotData::new_from_state(&self.state())?);
4757 
4758         // We aggregate all devices snapshots.
4759         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
4760             if let Some(migratable) = &device_node.migratable {
4761                 let mut migratable = migratable.lock().unwrap();
4762                 snapshot.add_snapshot(migratable.id(), migratable.snapshot()?);
4763             }
4764         }
4765 
4766         Ok(snapshot)
4767     }
4768 }
4769 
4770 impl Transportable for DeviceManager {}
4771 
4772 impl Migratable for DeviceManager {
4773     fn start_dirty_log(&mut self) -> std::result::Result<(), MigratableError> {
4774         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
4775             if let Some(migratable) = &device_node.migratable {
4776                 migratable.lock().unwrap().start_dirty_log()?;
4777             }
4778         }
4779         Ok(())
4780     }
4781 
4782     fn stop_dirty_log(&mut self) -> std::result::Result<(), MigratableError> {
4783         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
4784             if let Some(migratable) = &device_node.migratable {
4785                 migratable.lock().unwrap().stop_dirty_log()?;
4786             }
4787         }
4788         Ok(())
4789     }
4790 
4791     fn dirty_log(&mut self) -> std::result::Result<MemoryRangeTable, MigratableError> {
4792         let mut tables = Vec::new();
4793         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
4794             if let Some(migratable) = &device_node.migratable {
4795                 tables.push(migratable.lock().unwrap().dirty_log()?);
4796             }
4797         }
4798         Ok(MemoryRangeTable::new_from_tables(tables))
4799     }
4800 
4801     fn start_migration(&mut self) -> std::result::Result<(), MigratableError> {
4802         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
4803             if let Some(migratable) = &device_node.migratable {
4804                 migratable.lock().unwrap().start_migration()?;
4805             }
4806         }
4807         Ok(())
4808     }
4809 
4810     fn complete_migration(&mut self) -> std::result::Result<(), MigratableError> {
4811         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
4812             if let Some(migratable) = &device_node.migratable {
4813                 migratable.lock().unwrap().complete_migration()?;
4814             }
4815         }
4816         Ok(())
4817     }
4818 }
4819 
4820 const PCIU_FIELD_OFFSET: u64 = 0;
4821 const PCID_FIELD_OFFSET: u64 = 4;
4822 const B0EJ_FIELD_OFFSET: u64 = 8;
4823 const PSEG_FIELD_OFFSET: u64 = 12;
4824 const PCIU_FIELD_SIZE: usize = 4;
4825 const PCID_FIELD_SIZE: usize = 4;
4826 const B0EJ_FIELD_SIZE: usize = 4;
4827 const PSEG_FIELD_SIZE: usize = 4;
4828 
4829 impl BusDevice for DeviceManager {
4830     fn read(&mut self, base: u64, offset: u64, data: &mut [u8]) {
4831         match offset {
4832             PCIU_FIELD_OFFSET => {
4833                 assert!(data.len() == PCIU_FIELD_SIZE);
4834                 data.copy_from_slice(
4835                     &self.pci_segments[self.selected_segment]
4836                         .pci_devices_up
4837                         .to_le_bytes(),
4838                 );
4839                 // Clear the PCIU bitmap
4840                 self.pci_segments[self.selected_segment].pci_devices_up = 0;
4841             }
4842             PCID_FIELD_OFFSET => {
4843                 assert!(data.len() == PCID_FIELD_SIZE);
4844                 data.copy_from_slice(
4845                     &self.pci_segments[self.selected_segment]
4846                         .pci_devices_down
4847                         .to_le_bytes(),
4848                 );
4849                 // Clear the PCID bitmap
4850                 self.pci_segments[self.selected_segment].pci_devices_down = 0;
4851             }
4852             B0EJ_FIELD_OFFSET => {
4853                 assert!(data.len() == B0EJ_FIELD_SIZE);
4854                 // Always return an empty bitmap since the eject is always
4855                 // taken care of right away during a write access.
4856                 data.fill(0);
4857             }
4858             PSEG_FIELD_OFFSET => {
4859                 assert_eq!(data.len(), PSEG_FIELD_SIZE);
4860                 data.copy_from_slice(&(self.selected_segment as u32).to_le_bytes());
4861             }
4862             _ => error!(
4863                 "Accessing unknown location at base 0x{:x}, offset 0x{:x}",
4864                 base, offset
4865             ),
4866         }
4867 
4868         debug!(
4869             "PCI_HP_REG_R: base 0x{:x}, offset 0x{:x}, data {:?}",
4870             base, offset, data
4871         )
4872     }
4873 
4874     fn write(&mut self, base: u64, offset: u64, data: &[u8]) -> Option<Arc<std::sync::Barrier>> {
4875         match offset {
4876             B0EJ_FIELD_OFFSET => {
4877                 assert!(data.len() == B0EJ_FIELD_SIZE);
4878                 let mut data_array: [u8; 4] = [0, 0, 0, 0];
4879                 data_array.copy_from_slice(data);
4880                 let mut slot_bitmap = u32::from_le_bytes(data_array);
4881 
4882                 while slot_bitmap > 0 {
4883                     let slot_id = slot_bitmap.trailing_zeros();
4884                     if let Err(e) = self.eject_device(self.selected_segment as u16, slot_id as u8) {
4885                         error!("Failed ejecting device {}: {:?}", slot_id, e);
4886                     }
4887                     slot_bitmap &= !(1 << slot_id);
4888                 }
4889             }
4890             PSEG_FIELD_OFFSET => {
4891                 assert_eq!(data.len(), PSEG_FIELD_SIZE);
4892                 let mut data_array: [u8; 4] = [0, 0, 0, 0];
4893                 data_array.copy_from_slice(data);
4894                 let selected_segment = u32::from_le_bytes(data_array) as usize;
4895                 if selected_segment >= self.pci_segments.len() {
4896                     error!(
4897                         "Segment selection out of range: {} >= {}",
4898                         selected_segment,
4899                         self.pci_segments.len()
4900                     );
4901                     return None;
4902                 }
4903                 self.selected_segment = selected_segment;
4904             }
4905             _ => error!(
4906                 "Accessing unknown location at base 0x{:x}, offset 0x{:x}",
4907                 base, offset
4908             ),
4909         }
4910 
4911         debug!(
4912             "PCI_HP_REG_W: base 0x{:x}, offset 0x{:x}, data {:?}",
4913             base, offset, data
4914         );
4915 
4916         None
4917     }
4918 }
4919 
4920 impl Drop for DeviceManager {
4921     fn drop(&mut self) {
4922         // Wake up the DeviceManager threads (mainly virtio device workers),
4923         // to avoid deadlock on waiting for paused/parked worker threads.
4924         if let Err(e) = self.resume() {
4925             error!("Error resuming DeviceManager: {:?}", e);
4926         }
4927 
4928         for handle in self.virtio_devices.drain(..) {
4929             handle.virtio_device.lock().unwrap().shutdown();
4930         }
4931 
4932         if let Some(termios) = *self.original_termios_opt.lock().unwrap() {
4933             // SAFETY: FFI call
4934             let _ = unsafe { tcsetattr(stdout().lock().as_raw_fd(), TCSANOW, &termios) };
4935         }
4936     }
4937 }
4938 
4939 #[cfg(test)]
4940 mod tests {
4941     use super::*;
4942 
4943     #[test]
4944     fn test_create_mmio_allocators() {
4945         let res = create_mmio_allocators(0x100000, 0x400000, 1, vec![1], 4 << 10);
4946         assert_eq!(res.len(), 1);
4947         assert_eq!(
4948             res[0].lock().unwrap().base(),
4949             vm_memory::GuestAddress(0x100000)
4950         );
4951         assert_eq!(
4952             res[0].lock().unwrap().end(),
4953             vm_memory::GuestAddress(0x3fffff)
4954         );
4955 
4956         let res = create_mmio_allocators(0x100000, 0x400000, 2, vec![1, 1], 4 << 10);
4957         assert_eq!(res.len(), 2);
4958         assert_eq!(
4959             res[0].lock().unwrap().base(),
4960             vm_memory::GuestAddress(0x100000)
4961         );
4962         assert_eq!(
4963             res[0].lock().unwrap().end(),
4964             vm_memory::GuestAddress(0x27ffff)
4965         );
4966         assert_eq!(
4967             res[1].lock().unwrap().base(),
4968             vm_memory::GuestAddress(0x280000)
4969         );
4970         assert_eq!(
4971             res[1].lock().unwrap().end(),
4972             vm_memory::GuestAddress(0x3fffff)
4973         );
4974 
4975         let res = create_mmio_allocators(0x100000, 0x400000, 2, vec![2, 1], 4 << 10);
4976         assert_eq!(res.len(), 2);
4977         assert_eq!(
4978             res[0].lock().unwrap().base(),
4979             vm_memory::GuestAddress(0x100000)
4980         );
4981         assert_eq!(
4982             res[0].lock().unwrap().end(),
4983             vm_memory::GuestAddress(0x2fffff)
4984         );
4985         assert_eq!(
4986             res[1].lock().unwrap().base(),
4987             vm_memory::GuestAddress(0x300000)
4988         );
4989         assert_eq!(
4990             res[1].lock().unwrap().end(),
4991             vm_memory::GuestAddress(0x3fffff)
4992         );
4993     }
4994 }
4995