xref: /cloud-hypervisor/vmm/src/device_manager.rs (revision 87c0791d535fd9a1a248dd1b146b65ccac106dd2)
1 // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
2 //
3 // Portions Copyright 2017 The Chromium OS Authors. All rights reserved.
4 // Use of this source code is governed by a BSD-style license that can be
5 // found in the LICENSE-BSD-3-Clause file.
6 //
7 // Copyright © 2019 Intel Corporation
8 //
9 // SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause
10 //
11 
12 use crate::config::{
13     ConsoleOutputMode, DeviceConfig, DiskConfig, FsConfig, NetConfig, PmemConfig, UserDeviceConfig,
14     VdpaConfig, VhostMode, VmConfig, VsockConfig,
15 };
16 use crate::device_tree::{DeviceNode, DeviceTree};
17 use crate::interrupt::LegacyUserspaceInterruptManager;
18 use crate::interrupt::MsiInterruptManager;
19 use crate::memory_manager::{Error as MemoryManagerError, MemoryManager, MEMORY_MANAGER_ACPI_SIZE};
20 use crate::pci_segment::PciSegment;
21 use crate::seccomp_filters::{get_seccomp_filter, Thread};
22 use crate::serial_manager::{Error as SerialManagerError, SerialManager};
23 use crate::sigwinch_listener::start_sigwinch_listener;
24 #[cfg(target_arch = "aarch64")]
25 use crate::GuestMemoryMmap;
26 use crate::GuestRegionMmap;
27 use crate::PciDeviceInfo;
28 use crate::{device_node, DEVICE_MANAGER_SNAPSHOT_ID};
29 use acpi_tables::sdt::GenericAddress;
30 use acpi_tables::{aml, aml::Aml};
31 use anyhow::anyhow;
32 use arch::layout;
33 #[cfg(target_arch = "x86_64")]
34 use arch::layout::{APIC_START, IOAPIC_SIZE, IOAPIC_START};
35 use arch::NumaNodes;
36 #[cfg(target_arch = "aarch64")]
37 use arch::{DeviceType, MmioDeviceInfo};
38 use block_util::{
39     async_io::DiskFile, block_io_uring_is_supported, detect_image_type,
40     fixed_vhd_async::FixedVhdDiskAsync, fixed_vhd_sync::FixedVhdDiskSync, qcow_sync::QcowDiskSync,
41     raw_async::RawFileDisk, raw_sync::RawFileDiskSync, vhdx_sync::VhdxDiskSync, ImageType,
42 };
43 #[cfg(target_arch = "aarch64")]
44 use devices::gic;
45 #[cfg(target_arch = "x86_64")]
46 use devices::ioapic;
47 #[cfg(target_arch = "aarch64")]
48 use devices::legacy::Pl011;
49 #[cfg(target_arch = "x86_64")]
50 use devices::legacy::Serial;
51 use devices::{
52     interrupt_controller, interrupt_controller::InterruptController, AcpiNotificationFlags,
53 };
54 use hypervisor::{HypervisorType, HypervisorVmError, IoEventAddress};
55 use libc::{
56     cfmakeraw, isatty, tcgetattr, tcsetattr, termios, MAP_NORESERVE, MAP_PRIVATE, MAP_SHARED,
57     O_TMPFILE, PROT_READ, PROT_WRITE, TCSANOW,
58 };
59 #[cfg(target_arch = "x86_64")]
60 use pci::PciConfigIo;
61 use pci::{
62     DeviceRelocation, PciBarRegionType, PciBdf, PciDevice, VfioPciDevice, VfioUserDmaMapping,
63     VfioUserPciDevice, VfioUserPciDeviceError,
64 };
65 use seccompiler::SeccompAction;
66 use serde::{Deserialize, Serialize};
67 use std::collections::{BTreeSet, HashMap};
68 use std::convert::TryInto;
69 use std::fs::{read_link, File, OpenOptions};
70 use std::io::{self, stdout, Seek, SeekFrom};
71 use std::mem::zeroed;
72 use std::num::Wrapping;
73 use std::os::unix::fs::OpenOptionsExt;
74 use std::os::unix::io::{AsRawFd, FromRawFd, RawFd};
75 use std::path::PathBuf;
76 use std::result;
77 use std::sync::{Arc, Mutex};
78 use std::time::Instant;
79 use vfio_ioctls::{VfioContainer, VfioDevice, VfioDeviceFd};
80 use virtio_devices::transport::VirtioTransport;
81 use virtio_devices::transport::{VirtioPciDevice, VirtioPciDeviceActivator};
82 use virtio_devices::vhost_user::VhostUserConfig;
83 use virtio_devices::{
84     AccessPlatformMapping, ActivateError, VdpaDmaMapping, VirtioMemMappingSource,
85 };
86 use virtio_devices::{Endpoint, IommuMapping};
87 use vm_allocator::{AddressAllocator, SystemAllocator};
88 use vm_device::dma_mapping::vfio::VfioDmaMapping;
89 use vm_device::dma_mapping::ExternalDmaMapping;
90 use vm_device::interrupt::{
91     InterruptIndex, InterruptManager, LegacyIrqGroupConfig, MsiIrqGroupConfig,
92 };
93 use vm_device::{Bus, BusDevice, Resource};
94 use vm_memory::guest_memory::FileOffset;
95 #[cfg(target_arch = "aarch64")]
96 use vm_memory::GuestMemoryAtomic;
97 use vm_memory::GuestMemoryRegion;
98 use vm_memory::{Address, GuestAddress, GuestUsize, MmapRegion};
99 #[cfg(target_arch = "x86_64")]
100 use vm_memory::{GuestAddressSpace, GuestMemory};
101 use vm_migration::{
102     protocol::MemoryRangeTable, Migratable, MigratableError, Pausable, Snapshot,
103     SnapshotDataSection, Snapshottable, Transportable,
104 };
105 use vm_virtio::AccessPlatform;
106 use vm_virtio::VirtioDeviceType;
107 use vmm_sys_util::eventfd::EventFd;
108 
109 #[cfg(target_arch = "aarch64")]
110 const MMIO_LEN: u64 = 0x1000;
111 
112 // Singleton devices / devices the user cannot name
113 #[cfg(target_arch = "x86_64")]
114 const IOAPIC_DEVICE_NAME: &str = "__ioapic";
115 const SERIAL_DEVICE_NAME: &str = "__serial";
116 #[cfg(target_arch = "aarch64")]
117 const GPIO_DEVICE_NAME: &str = "__gpio";
118 const RNG_DEVICE_NAME: &str = "__rng";
119 const IOMMU_DEVICE_NAME: &str = "__iommu";
120 const BALLOON_DEVICE_NAME: &str = "__balloon";
121 const CONSOLE_DEVICE_NAME: &str = "__console";
122 
123 // Devices that the user may name and for which we generate
124 // identifiers if the user doesn't give one
125 const DISK_DEVICE_NAME_PREFIX: &str = "_disk";
126 const FS_DEVICE_NAME_PREFIX: &str = "_fs";
127 const NET_DEVICE_NAME_PREFIX: &str = "_net";
128 const PMEM_DEVICE_NAME_PREFIX: &str = "_pmem";
129 const VDPA_DEVICE_NAME_PREFIX: &str = "_vdpa";
130 const VSOCK_DEVICE_NAME_PREFIX: &str = "_vsock";
131 const WATCHDOG_DEVICE_NAME: &str = "__watchdog";
132 const VFIO_DEVICE_NAME_PREFIX: &str = "_vfio";
133 const VFIO_USER_DEVICE_NAME_PREFIX: &str = "_vfio_user";
134 const VIRTIO_PCI_DEVICE_NAME_PREFIX: &str = "_virtio-pci";
135 
136 /// Errors associated with device manager
137 #[derive(Debug)]
138 pub enum DeviceManagerError {
139     /// Cannot create EventFd.
140     EventFd(io::Error),
141 
142     /// Cannot open disk path
143     Disk(io::Error),
144 
145     /// Cannot create vhost-user-net device
146     CreateVhostUserNet(virtio_devices::vhost_user::Error),
147 
148     /// Cannot create virtio-blk device
149     CreateVirtioBlock(io::Error),
150 
151     /// Cannot create virtio-net device
152     CreateVirtioNet(virtio_devices::net::Error),
153 
154     /// Cannot create virtio-console device
155     CreateVirtioConsole(io::Error),
156 
157     /// Cannot create virtio-rng device
158     CreateVirtioRng(io::Error),
159 
160     /// Cannot create virtio-fs device
161     CreateVirtioFs(virtio_devices::vhost_user::Error),
162 
163     /// Virtio-fs device was created without a socket.
164     NoVirtioFsSock,
165 
166     /// Cannot create vhost-user-blk device
167     CreateVhostUserBlk(virtio_devices::vhost_user::Error),
168 
169     /// Cannot create virtio-pmem device
170     CreateVirtioPmem(io::Error),
171 
172     /// Cannot create vDPA device
173     CreateVdpa(virtio_devices::vdpa::Error),
174 
175     /// Cannot create virtio-vsock device
176     CreateVirtioVsock(io::Error),
177 
178     /// Failed to convert Path to &str for the vDPA device.
179     CreateVdpaConvertPath,
180 
181     /// Failed to convert Path to &str for the virtio-vsock device.
182     CreateVsockConvertPath,
183 
184     /// Cannot create virtio-vsock backend
185     CreateVsockBackend(virtio_devices::vsock::VsockUnixError),
186 
187     /// Cannot create virtio-iommu device
188     CreateVirtioIommu(io::Error),
189 
190     /// Cannot create virtio-balloon device
191     CreateVirtioBalloon(io::Error),
192 
193     /// Cannot create virtio-watchdog device
194     CreateVirtioWatchdog(io::Error),
195 
196     /// Failed to parse disk image format
197     DetectImageType(io::Error),
198 
199     /// Cannot open qcow disk path
200     QcowDeviceCreate(qcow::Error),
201 
202     /// Cannot create serial manager
203     CreateSerialManager(SerialManagerError),
204 
205     /// Cannot spawn the serial manager thread
206     SpawnSerialManager(SerialManagerError),
207 
208     /// Cannot open tap interface
209     OpenTap(net_util::TapError),
210 
211     /// Cannot allocate IRQ.
212     AllocateIrq,
213 
214     /// Cannot configure the IRQ.
215     Irq(vmm_sys_util::errno::Error),
216 
217     /// Cannot allocate PCI BARs
218     AllocateBars(pci::PciDeviceError),
219 
220     /// Could not free the BARs associated with a PCI device.
221     FreePciBars(pci::PciDeviceError),
222 
223     /// Cannot register ioevent.
224     RegisterIoevent(anyhow::Error),
225 
226     /// Cannot unregister ioevent.
227     UnRegisterIoevent(anyhow::Error),
228 
229     /// Cannot create virtio device
230     VirtioDevice(vmm_sys_util::errno::Error),
231 
232     /// Cannot add PCI device
233     AddPciDevice(pci::PciRootError),
234 
235     /// Cannot open persistent memory file
236     PmemFileOpen(io::Error),
237 
238     /// Cannot set persistent memory file size
239     PmemFileSetLen(io::Error),
240 
241     /// Cannot find a memory range for persistent memory
242     PmemRangeAllocation,
243 
244     /// Cannot find a memory range for virtio-fs
245     FsRangeAllocation,
246 
247     /// Error creating serial output file
248     SerialOutputFileOpen(io::Error),
249 
250     /// Error creating console output file
251     ConsoleOutputFileOpen(io::Error),
252 
253     /// Error creating serial pty
254     SerialPtyOpen(io::Error),
255 
256     /// Error creating console pty
257     ConsolePtyOpen(io::Error),
258 
259     /// Error setting pty raw mode
260     SetPtyRaw(vmm_sys_util::errno::Error),
261 
262     /// Error getting pty peer
263     GetPtyPeer(vmm_sys_util::errno::Error),
264 
265     /// Cannot create a VFIO device
266     VfioCreate(vfio_ioctls::VfioError),
267 
268     /// Cannot create a VFIO PCI device
269     VfioPciCreate(pci::VfioPciError),
270 
271     /// Failed to map VFIO MMIO region.
272     VfioMapRegion(pci::VfioPciError),
273 
274     /// Failed to DMA map VFIO device.
275     VfioDmaMap(vfio_ioctls::VfioError),
276 
277     /// Failed to DMA unmap VFIO device.
278     VfioDmaUnmap(pci::VfioPciError),
279 
280     /// Failed to create the passthrough device.
281     CreatePassthroughDevice(anyhow::Error),
282 
283     /// Failed to memory map.
284     Mmap(io::Error),
285 
286     /// Cannot add legacy device to Bus.
287     BusError(vm_device::BusError),
288 
289     /// Failed to allocate IO port
290     AllocateIoPort,
291 
292     /// Failed to allocate MMIO address
293     AllocateMmioAddress,
294 
295     /// Failed to make hotplug notification
296     HotPlugNotification(io::Error),
297 
298     /// Error from a memory manager operation
299     MemoryManager(MemoryManagerError),
300 
301     /// Failed to create new interrupt source group.
302     CreateInterruptGroup(io::Error),
303 
304     /// Failed to update interrupt source group.
305     UpdateInterruptGroup(io::Error),
306 
307     /// Failed to create interrupt controller.
308     CreateInterruptController(interrupt_controller::Error),
309 
310     /// Failed to create a new MmapRegion instance.
311     NewMmapRegion(vm_memory::mmap::MmapRegionError),
312 
313     /// Failed to clone a File.
314     CloneFile(io::Error),
315 
316     /// Failed to create socket file
317     CreateSocketFile(io::Error),
318 
319     /// Failed to spawn the network backend
320     SpawnNetBackend(io::Error),
321 
322     /// Failed to spawn the block backend
323     SpawnBlockBackend(io::Error),
324 
325     /// Missing PCI bus.
326     NoPciBus,
327 
328     /// Could not find an available device name.
329     NoAvailableDeviceName,
330 
331     /// Missing PCI device.
332     MissingPciDevice,
333 
334     /// Failed to remove a PCI device from the PCI bus.
335     RemoveDeviceFromPciBus(pci::PciRootError),
336 
337     /// Failed to remove a bus device from the IO bus.
338     RemoveDeviceFromIoBus(vm_device::BusError),
339 
340     /// Failed to remove a bus device from the MMIO bus.
341     RemoveDeviceFromMmioBus(vm_device::BusError),
342 
343     /// Failed to find the device corresponding to a specific PCI b/d/f.
344     UnknownPciBdf(u32),
345 
346     /// Not allowed to remove this type of device from the VM.
347     RemovalNotAllowed(vm_virtio::VirtioDeviceType),
348 
349     /// Failed to find device corresponding to the given identifier.
350     UnknownDeviceId(String),
351 
352     /// Failed to find an available PCI device ID.
353     NextPciDeviceId(pci::PciRootError),
354 
355     /// Could not reserve the PCI device ID.
356     GetPciDeviceId(pci::PciRootError),
357 
358     /// Could not give the PCI device ID back.
359     PutPciDeviceId(pci::PciRootError),
360 
361     /// No disk path was specified when one was expected
362     NoDiskPath,
363 
364     /// Failed to update guest memory for virtio device.
365     UpdateMemoryForVirtioDevice(virtio_devices::Error),
366 
367     /// Cannot create virtio-mem device
368     CreateVirtioMem(io::Error),
369 
370     /// Cannot generate a ResizeSender from the Resize object.
371     CreateResizeSender(virtio_devices::mem::Error),
372 
373     /// Cannot find a memory range for virtio-mem memory
374     VirtioMemRangeAllocation,
375 
376     /// Failed to update guest memory for VFIO PCI device.
377     UpdateMemoryForVfioPciDevice(vfio_ioctls::VfioError),
378 
379     /// Trying to use a directory for pmem but no size specified
380     PmemWithDirectorySizeMissing,
381 
382     /// Trying to use a size that is not multiple of 2MiB
383     PmemSizeNotAligned,
384 
385     /// Could not find the node in the device tree.
386     MissingNode,
387 
388     /// Resource was already found.
389     ResourceAlreadyExists,
390 
391     /// Expected resources for virtio-pmem could not be found.
392     MissingVirtioPmemResources,
393 
394     /// Missing PCI b/d/f from the DeviceNode.
395     MissingDeviceNodePciBdf,
396 
397     /// No support for device passthrough
398     NoDevicePassthroughSupport,
399 
400     /// Failed to resize virtio-balloon
401     VirtioBalloonResize(virtio_devices::balloon::Error),
402 
403     /// Missing virtio-balloon, can't proceed as expected.
404     MissingVirtioBalloon,
405 
406     /// Missing virtual IOMMU device
407     MissingVirtualIommu,
408 
409     /// Failed to do power button notification
410     PowerButtonNotification(io::Error),
411 
412     /// Failed to do AArch64 GPIO power button notification
413     #[cfg(target_arch = "aarch64")]
414     AArch64PowerButtonNotification(devices::legacy::GpioDeviceError),
415 
416     /// Failed to set O_DIRECT flag to file descriptor
417     SetDirectIo,
418 
419     /// Failed to create FixedVhdDiskAsync
420     CreateFixedVhdDiskAsync(io::Error),
421 
422     /// Failed to create FixedVhdDiskSync
423     CreateFixedVhdDiskSync(io::Error),
424 
425     /// Failed to create QcowDiskSync
426     CreateQcowDiskSync(qcow::Error),
427 
428     /// Failed to create FixedVhdxDiskSync
429     CreateFixedVhdxDiskSync(vhdx::vhdx::VhdxError),
430 
431     /// Failed to add DMA mapping handler to virtio-mem device.
432     AddDmaMappingHandlerVirtioMem(virtio_devices::mem::Error),
433 
434     /// Failed to remove DMA mapping handler from virtio-mem device.
435     RemoveDmaMappingHandlerVirtioMem(virtio_devices::mem::Error),
436 
437     /// Failed to create vfio-user client
438     VfioUserCreateClient(vfio_user::Error),
439 
440     /// Failed to create VFIO user device
441     VfioUserCreate(VfioUserPciDeviceError),
442 
443     /// Failed to map region from VFIO user device into guest
444     VfioUserMapRegion(VfioUserPciDeviceError),
445 
446     /// Failed to DMA map VFIO user device.
447     VfioUserDmaMap(VfioUserPciDeviceError),
448 
449     /// Failed to DMA unmap VFIO user device.
450     VfioUserDmaUnmap(VfioUserPciDeviceError),
451 
452     /// Failed to update memory mappings for VFIO user device
453     UpdateMemoryForVfioUserPciDevice(VfioUserPciDeviceError),
454 
455     /// Cannot duplicate file descriptor
456     DupFd(vmm_sys_util::errno::Error),
457 
458     /// Failed to DMA map virtio device.
459     VirtioDmaMap(std::io::Error),
460 
461     /// Failed to DMA unmap virtio device.
462     VirtioDmaUnmap(std::io::Error),
463 
464     /// Cannot hotplug device behind vIOMMU
465     InvalidIommuHotplug,
466 
467     /// Failed to create UEFI flash
468     CreateUefiFlash(HypervisorVmError),
469 
470     /// Invalid identifier as it is not unique.
471     IdentifierNotUnique(String),
472 
473     /// Invalid identifier
474     InvalidIdentifier(String),
475 
476     /// Error activating virtio device
477     VirtioActivate(ActivateError),
478 }
479 pub type DeviceManagerResult<T> = result::Result<T, DeviceManagerError>;
480 
481 const DEVICE_MANAGER_ACPI_SIZE: usize = 0x10;
482 
483 const TIOCSPTLCK: libc::c_int = 0x4004_5431;
484 const TIOCGTPEER: libc::c_int = 0x5441;
485 
486 pub fn create_pty(non_blocking: bool) -> io::Result<(File, File, PathBuf)> {
487     // Try to use /dev/pts/ptmx first then fall back to /dev/ptmx
488     // This is done to try and use the devpts filesystem that
489     // could be available for use in the process's namespace first.
490     // Ideally these are all the same file though but different
491     // kernels could have things setup differently.
492     // See https://www.kernel.org/doc/Documentation/filesystems/devpts.txt
493     // for further details.
494 
495     let custom_flags = libc::O_NOCTTY | if non_blocking { libc::O_NONBLOCK } else { 0 };
496     let main = match OpenOptions::new()
497         .read(true)
498         .write(true)
499         .custom_flags(custom_flags)
500         .open("/dev/pts/ptmx")
501     {
502         Ok(f) => f,
503         _ => OpenOptions::new()
504             .read(true)
505             .write(true)
506             .custom_flags(custom_flags)
507             .open("/dev/ptmx")?,
508     };
509     let mut unlock: libc::c_ulong = 0;
510     // SAFETY: FFI call into libc, trivially safe
511     unsafe {
512         libc::ioctl(
513             main.as_raw_fd(),
514             TIOCSPTLCK.try_into().unwrap(),
515             &mut unlock,
516         )
517     };
518 
519     // SAFETY: FFI call into libc, trivally safe
520     let sub_fd = unsafe {
521         libc::ioctl(
522             main.as_raw_fd(),
523             TIOCGTPEER.try_into().unwrap(),
524             libc::O_NOCTTY | libc::O_RDWR,
525         )
526     };
527     if sub_fd == -1 {
528         return vmm_sys_util::errno::errno_result().map_err(|e| e.into());
529     }
530 
531     let proc_path = PathBuf::from(format!("/proc/self/fd/{}", sub_fd));
532     let path = read_link(proc_path)?;
533 
534     // SAFETY: sub_fd is checked to be valid before being wrapped in File
535     Ok((main, unsafe { File::from_raw_fd(sub_fd) }, path))
536 }
537 
538 #[derive(Default)]
539 pub struct Console {
540     console_resizer: Option<Arc<virtio_devices::ConsoleResizer>>,
541 }
542 
543 impl Console {
544     pub fn update_console_size(&self) {
545         if let Some(resizer) = self.console_resizer.as_ref() {
546             resizer.update_console_size()
547         }
548     }
549 }
550 
551 pub(crate) struct AddressManager {
552     pub(crate) allocator: Arc<Mutex<SystemAllocator>>,
553     #[cfg(target_arch = "x86_64")]
554     pub(crate) io_bus: Arc<Bus>,
555     pub(crate) mmio_bus: Arc<Bus>,
556     vm: Arc<dyn hypervisor::Vm>,
557     device_tree: Arc<Mutex<DeviceTree>>,
558     pci_mmio_allocators: Vec<Arc<Mutex<AddressAllocator>>>,
559 }
560 
561 impl DeviceRelocation for AddressManager {
562     fn move_bar(
563         &self,
564         old_base: u64,
565         new_base: u64,
566         len: u64,
567         pci_dev: &mut dyn PciDevice,
568         region_type: PciBarRegionType,
569     ) -> std::result::Result<(), std::io::Error> {
570         match region_type {
571             PciBarRegionType::IoRegion => {
572                 #[cfg(target_arch = "x86_64")]
573                 {
574                     // Update system allocator
575                     self.allocator
576                         .lock()
577                         .unwrap()
578                         .free_io_addresses(GuestAddress(old_base), len as GuestUsize);
579 
580                     self.allocator
581                         .lock()
582                         .unwrap()
583                         .allocate_io_addresses(
584                             Some(GuestAddress(new_base)),
585                             len as GuestUsize,
586                             None,
587                         )
588                         .ok_or_else(|| {
589                             io::Error::new(io::ErrorKind::Other, "failed allocating new IO range")
590                         })?;
591 
592                     // Update PIO bus
593                     self.io_bus
594                         .update_range(old_base, len, new_base, len)
595                         .map_err(|e| io::Error::new(io::ErrorKind::Other, e))?;
596                 }
597                 #[cfg(target_arch = "aarch64")]
598                 error!("I/O region is not supported");
599             }
600             PciBarRegionType::Memory32BitRegion | PciBarRegionType::Memory64BitRegion => {
601                 // Update system allocator
602                 if region_type == PciBarRegionType::Memory32BitRegion {
603                     self.allocator
604                         .lock()
605                         .unwrap()
606                         .free_mmio_hole_addresses(GuestAddress(old_base), len as GuestUsize);
607 
608                     self.allocator
609                         .lock()
610                         .unwrap()
611                         .allocate_mmio_hole_addresses(
612                             Some(GuestAddress(new_base)),
613                             len as GuestUsize,
614                             Some(len),
615                         )
616                         .ok_or_else(|| {
617                             io::Error::new(
618                                 io::ErrorKind::Other,
619                                 "failed allocating new 32 bits MMIO range",
620                             )
621                         })?;
622                 } else {
623                     // Find the specific allocator that this BAR was allocated from and use it for new one
624                     for allocator in &self.pci_mmio_allocators {
625                         let allocator_base = allocator.lock().unwrap().base();
626                         let allocator_end = allocator.lock().unwrap().end();
627 
628                         if old_base >= allocator_base.0 && old_base <= allocator_end.0 {
629                             allocator
630                                 .lock()
631                                 .unwrap()
632                                 .free(GuestAddress(old_base), len as GuestUsize);
633 
634                             allocator
635                                 .lock()
636                                 .unwrap()
637                                 .allocate(
638                                     Some(GuestAddress(new_base)),
639                                     len as GuestUsize,
640                                     Some(len),
641                                 )
642                                 .ok_or_else(|| {
643                                     io::Error::new(
644                                         io::ErrorKind::Other,
645                                         "failed allocating new 64 bits MMIO range",
646                                     )
647                                 })?;
648 
649                             break;
650                         }
651                     }
652                 }
653 
654                 // Update MMIO bus
655                 self.mmio_bus
656                     .update_range(old_base, len, new_base, len)
657                     .map_err(|e| io::Error::new(io::ErrorKind::Other, e))?;
658             }
659         }
660 
661         // Update the device_tree resources associated with the device
662         if let Some(id) = pci_dev.id() {
663             if let Some(node) = self.device_tree.lock().unwrap().get_mut(&id) {
664                 let mut resource_updated = false;
665                 for resource in node.resources.iter_mut() {
666                     if let Resource::PciBar { base, type_, .. } = resource {
667                         if PciBarRegionType::from(*type_) == region_type && *base == old_base {
668                             *base = new_base;
669                             resource_updated = true;
670                             break;
671                         }
672                     }
673                 }
674 
675                 if !resource_updated {
676                     return Err(io::Error::new(
677                         io::ErrorKind::Other,
678                         format!(
679                             "Couldn't find a resource with base 0x{:x} for device {}",
680                             old_base, id
681                         ),
682                     ));
683                 }
684             } else {
685                 return Err(io::Error::new(
686                     io::ErrorKind::Other,
687                     format!("Couldn't find device {} from device tree", id),
688                 ));
689             }
690         }
691 
692         let any_dev = pci_dev.as_any();
693         if let Some(virtio_pci_dev) = any_dev.downcast_ref::<VirtioPciDevice>() {
694             let bar_addr = virtio_pci_dev.config_bar_addr();
695             if bar_addr == new_base {
696                 for (event, addr) in virtio_pci_dev.ioeventfds(old_base) {
697                     let io_addr = IoEventAddress::Mmio(addr);
698                     self.vm.unregister_ioevent(event, &io_addr).map_err(|e| {
699                         io::Error::new(
700                             io::ErrorKind::Other,
701                             format!("failed to unregister ioevent: {:?}", e),
702                         )
703                     })?;
704                 }
705                 for (event, addr) in virtio_pci_dev.ioeventfds(new_base) {
706                     let io_addr = IoEventAddress::Mmio(addr);
707                     self.vm
708                         .register_ioevent(event, &io_addr, None)
709                         .map_err(|e| {
710                             io::Error::new(
711                                 io::ErrorKind::Other,
712                                 format!("failed to register ioevent: {:?}", e),
713                             )
714                         })?;
715                 }
716             } else {
717                 let virtio_dev = virtio_pci_dev.virtio_device();
718                 let mut virtio_dev = virtio_dev.lock().unwrap();
719                 if let Some(mut shm_regions) = virtio_dev.get_shm_regions() {
720                     if shm_regions.addr.raw_value() == old_base {
721                         let mem_region = self.vm.make_user_memory_region(
722                             shm_regions.mem_slot,
723                             old_base,
724                             shm_regions.len,
725                             shm_regions.host_addr,
726                             false,
727                             false,
728                         );
729 
730                         self.vm.remove_user_memory_region(mem_region).map_err(|e| {
731                             io::Error::new(
732                                 io::ErrorKind::Other,
733                                 format!("failed to remove user memory region: {:?}", e),
734                             )
735                         })?;
736 
737                         // Create new mapping by inserting new region to KVM.
738                         let mem_region = self.vm.make_user_memory_region(
739                             shm_regions.mem_slot,
740                             new_base,
741                             shm_regions.len,
742                             shm_regions.host_addr,
743                             false,
744                             false,
745                         );
746 
747                         self.vm.create_user_memory_region(mem_region).map_err(|e| {
748                             io::Error::new(
749                                 io::ErrorKind::Other,
750                                 format!("failed to create user memory regions: {:?}", e),
751                             )
752                         })?;
753 
754                         // Update shared memory regions to reflect the new mapping.
755                         shm_regions.addr = GuestAddress(new_base);
756                         virtio_dev.set_shm_regions(shm_regions).map_err(|e| {
757                             io::Error::new(
758                                 io::ErrorKind::Other,
759                                 format!("failed to update shared memory regions: {:?}", e),
760                             )
761                         })?;
762                     }
763                 }
764             }
765         }
766 
767         pci_dev.move_bar(old_base, new_base)
768     }
769 }
770 
771 #[derive(Serialize, Deserialize)]
772 struct DeviceManagerState {
773     device_tree: DeviceTree,
774     device_id_cnt: Wrapping<usize>,
775 }
776 
777 #[derive(Debug)]
778 pub struct PtyPair {
779     pub main: File,
780     pub sub: File,
781     pub path: PathBuf,
782 }
783 
784 impl Clone for PtyPair {
785     fn clone(&self) -> Self {
786         PtyPair {
787             main: self.main.try_clone().unwrap(),
788             sub: self.sub.try_clone().unwrap(),
789             path: self.path.clone(),
790         }
791     }
792 }
793 
794 #[derive(Clone)]
795 pub enum PciDeviceHandle {
796     Vfio(Arc<Mutex<VfioPciDevice>>),
797     Virtio(Arc<Mutex<VirtioPciDevice>>),
798     VfioUser(Arc<Mutex<VfioUserPciDevice>>),
799 }
800 
801 #[derive(Clone)]
802 struct MetaVirtioDevice {
803     virtio_device: Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
804     iommu: bool,
805     id: String,
806     pci_segment: u16,
807     dma_handler: Option<Arc<dyn ExternalDmaMapping>>,
808 }
809 
810 #[derive(Default)]
811 pub struct AcpiPlatformAddresses {
812     pub pm_timer_address: Option<GenericAddress>,
813     pub reset_reg_address: Option<GenericAddress>,
814     pub sleep_control_reg_address: Option<GenericAddress>,
815     pub sleep_status_reg_address: Option<GenericAddress>,
816 }
817 
818 pub struct DeviceManager {
819     // The underlying hypervisor
820     hypervisor_type: HypervisorType,
821 
822     // Manage address space related to devices
823     address_manager: Arc<AddressManager>,
824 
825     // Console abstraction
826     console: Arc<Console>,
827 
828     // console PTY
829     console_pty: Option<Arc<Mutex<PtyPair>>>,
830 
831     // serial PTY
832     serial_pty: Option<Arc<Mutex<PtyPair>>>,
833 
834     // Serial Manager
835     serial_manager: Option<Arc<SerialManager>>,
836 
837     // pty foreground status,
838     console_resize_pipe: Option<Arc<File>>,
839 
840     // Interrupt controller
841     #[cfg(target_arch = "x86_64")]
842     interrupt_controller: Option<Arc<Mutex<ioapic::Ioapic>>>,
843     #[cfg(target_arch = "aarch64")]
844     interrupt_controller: Option<Arc<Mutex<gic::Gic>>>,
845 
846     // Things to be added to the commandline (e.g. aarch64 early console)
847     #[cfg(target_arch = "aarch64")]
848     cmdline_additions: Vec<String>,
849 
850     // ACPI GED notification device
851     ged_notification_device: Option<Arc<Mutex<devices::AcpiGedDevice>>>,
852 
853     // VM configuration
854     config: Arc<Mutex<VmConfig>>,
855 
856     // Memory Manager
857     memory_manager: Arc<Mutex<MemoryManager>>,
858 
859     // The virtio devices on the system
860     virtio_devices: Vec<MetaVirtioDevice>,
861 
862     // List of bus devices
863     // Let the DeviceManager keep strong references to the BusDevice devices.
864     // This allows the IO and MMIO buses to be provided with Weak references,
865     // which prevents cyclic dependencies.
866     bus_devices: Vec<Arc<Mutex<dyn BusDevice>>>,
867 
868     // Counter to keep track of the consumed device IDs.
869     device_id_cnt: Wrapping<usize>,
870 
871     pci_segments: Vec<PciSegment>,
872 
873     #[cfg_attr(target_arch = "aarch64", allow(dead_code))]
874     // MSI Interrupt Manager
875     msi_interrupt_manager: Arc<dyn InterruptManager<GroupConfig = MsiIrqGroupConfig>>,
876 
877     #[cfg_attr(feature = "mshv", allow(dead_code))]
878     // Legacy Interrupt Manager
879     legacy_interrupt_manager: Option<Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>>,
880 
881     // Passthrough device handle
882     passthrough_device: Option<VfioDeviceFd>,
883 
884     // VFIO container
885     // Only one container can be created, therefore it is stored as part of the
886     // DeviceManager to be reused.
887     vfio_container: Option<Arc<VfioContainer>>,
888 
889     // Paravirtualized IOMMU
890     iommu_device: Option<Arc<Mutex<virtio_devices::Iommu>>>,
891     iommu_mapping: Option<Arc<IommuMapping>>,
892 
893     // PCI information about devices attached to the paravirtualized IOMMU
894     // It contains the virtual IOMMU PCI BDF along with the list of PCI BDF
895     // representing the devices attached to the virtual IOMMU. This is useful
896     // information for filling the ACPI VIOT table.
897     iommu_attached_devices: Option<(PciBdf, Vec<PciBdf>)>,
898 
899     // Tree of devices, representing the dependencies between devices.
900     // Useful for introspection, snapshot and restore.
901     device_tree: Arc<Mutex<DeviceTree>>,
902 
903     // Exit event
904     exit_evt: EventFd,
905     reset_evt: EventFd,
906 
907     #[cfg(target_arch = "aarch64")]
908     id_to_dev_info: HashMap<(DeviceType, String), MmioDeviceInfo>,
909 
910     // seccomp action
911     seccomp_action: SeccompAction,
912 
913     // List of guest NUMA nodes.
914     numa_nodes: NumaNodes,
915 
916     // Possible handle to the virtio-balloon device
917     balloon: Option<Arc<Mutex<virtio_devices::Balloon>>>,
918 
919     // Virtio Device activation EventFd to allow the VMM thread to trigger device
920     // activation and thus start the threads from the VMM thread
921     activate_evt: EventFd,
922 
923     acpi_address: GuestAddress,
924 
925     selected_segment: usize,
926 
927     // Possible handle to the virtio-mem device
928     virtio_mem_devices: Vec<Arc<Mutex<virtio_devices::Mem>>>,
929 
930     #[cfg(target_arch = "aarch64")]
931     // GPIO device for AArch64
932     gpio_device: Option<Arc<Mutex<devices::legacy::Gpio>>>,
933 
934     #[cfg(target_arch = "aarch64")]
935     // Flash device for UEFI on AArch64
936     uefi_flash: Option<GuestMemoryAtomic<GuestMemoryMmap>>,
937 
938     // Flag to force setting the iommu on virtio devices
939     force_iommu: bool,
940 
941     // Helps identify if the VM is currently being restored
942     restoring: bool,
943 
944     // io_uring availability if detected
945     io_uring_supported: Option<bool>,
946 
947     // List of unique identifiers provided at boot through the configuration.
948     boot_id_list: BTreeSet<String>,
949 
950     // Start time of the VM
951     timestamp: Instant,
952 
953     // Pending activations
954     pending_activations: Arc<Mutex<Vec<VirtioPciDeviceActivator>>>,
955 
956     // Addresses for ACPI platform devices e.g. ACPI PM timer, sleep/reset registers
957     acpi_platform_addresses: AcpiPlatformAddresses,
958 }
959 
960 impl DeviceManager {
961     #[allow(clippy::too_many_arguments)]
962     pub fn new(
963         hypervisor_type: HypervisorType,
964         vm: Arc<dyn hypervisor::Vm>,
965         config: Arc<Mutex<VmConfig>>,
966         memory_manager: Arc<Mutex<MemoryManager>>,
967         exit_evt: &EventFd,
968         reset_evt: &EventFd,
969         seccomp_action: SeccompAction,
970         numa_nodes: NumaNodes,
971         activate_evt: &EventFd,
972         force_iommu: bool,
973         restoring: bool,
974         boot_id_list: BTreeSet<String>,
975         timestamp: Instant,
976     ) -> DeviceManagerResult<Arc<Mutex<Self>>> {
977         let device_tree = Arc::new(Mutex::new(DeviceTree::new()));
978 
979         let num_pci_segments =
980             if let Some(platform_config) = config.lock().unwrap().platform.as_ref() {
981                 platform_config.num_pci_segments
982             } else {
983                 1
984             };
985 
986         let start_of_device_area = memory_manager.lock().unwrap().start_of_device_area().0;
987         let end_of_device_area = memory_manager.lock().unwrap().end_of_device_area().0;
988 
989         // Start each PCI segment range on a 4GiB boundary
990         let pci_segment_size = (end_of_device_area - start_of_device_area + 1)
991             / ((4 << 30) * num_pci_segments as u64)
992             * (4 << 30);
993 
994         let mut pci_mmio_allocators = vec![];
995         for i in 0..num_pci_segments as u64 {
996             let mmio_start = start_of_device_area + i * pci_segment_size;
997             let allocator = Arc::new(Mutex::new(
998                 AddressAllocator::new(GuestAddress(mmio_start), pci_segment_size).unwrap(),
999             ));
1000             pci_mmio_allocators.push(allocator)
1001         }
1002 
1003         let address_manager = Arc::new(AddressManager {
1004             allocator: memory_manager.lock().unwrap().allocator(),
1005             #[cfg(target_arch = "x86_64")]
1006             io_bus: Arc::new(Bus::new()),
1007             mmio_bus: Arc::new(Bus::new()),
1008             vm: vm.clone(),
1009             device_tree: Arc::clone(&device_tree),
1010             pci_mmio_allocators,
1011         });
1012 
1013         // First we create the MSI interrupt manager, the legacy one is created
1014         // later, after the IOAPIC device creation.
1015         // The reason we create the MSI one first is because the IOAPIC needs it,
1016         // and then the legacy interrupt manager needs an IOAPIC. So we're
1017         // handling a linear dependency chain:
1018         // msi_interrupt_manager <- IOAPIC <- legacy_interrupt_manager.
1019         let msi_interrupt_manager: Arc<dyn InterruptManager<GroupConfig = MsiIrqGroupConfig>> =
1020             Arc::new(MsiInterruptManager::new(
1021                 Arc::clone(&address_manager.allocator),
1022                 vm,
1023             ));
1024 
1025         let acpi_address = address_manager
1026             .allocator
1027             .lock()
1028             .unwrap()
1029             .allocate_platform_mmio_addresses(None, DEVICE_MANAGER_ACPI_SIZE as u64, None)
1030             .ok_or(DeviceManagerError::AllocateIoPort)?;
1031 
1032         let mut pci_irq_slots = [0; 32];
1033         PciSegment::reserve_legacy_interrupts_for_pci_devices(
1034             &address_manager,
1035             &mut pci_irq_slots,
1036         )?;
1037 
1038         let mut pci_segments = vec![PciSegment::new_default_segment(
1039             &address_manager,
1040             Arc::clone(&address_manager.pci_mmio_allocators[0]),
1041             &pci_irq_slots,
1042         )?];
1043 
1044         for i in 1..num_pci_segments as usize {
1045             pci_segments.push(PciSegment::new(
1046                 i as u16,
1047                 &address_manager,
1048                 Arc::clone(&address_manager.pci_mmio_allocators[i]),
1049                 &pci_irq_slots,
1050             )?);
1051         }
1052 
1053         let device_manager = DeviceManager {
1054             hypervisor_type,
1055             address_manager: Arc::clone(&address_manager),
1056             console: Arc::new(Console::default()),
1057             interrupt_controller: None,
1058             #[cfg(target_arch = "aarch64")]
1059             cmdline_additions: Vec::new(),
1060             ged_notification_device: None,
1061             config,
1062             memory_manager,
1063             virtio_devices: Vec::new(),
1064             bus_devices: Vec::new(),
1065             device_id_cnt: Wrapping(0),
1066             msi_interrupt_manager,
1067             legacy_interrupt_manager: None,
1068             passthrough_device: None,
1069             vfio_container: None,
1070             iommu_device: None,
1071             iommu_mapping: None,
1072             iommu_attached_devices: None,
1073             pci_segments,
1074             device_tree,
1075             exit_evt: exit_evt.try_clone().map_err(DeviceManagerError::EventFd)?,
1076             reset_evt: reset_evt.try_clone().map_err(DeviceManagerError::EventFd)?,
1077             #[cfg(target_arch = "aarch64")]
1078             id_to_dev_info: HashMap::new(),
1079             seccomp_action,
1080             numa_nodes,
1081             balloon: None,
1082             activate_evt: activate_evt
1083                 .try_clone()
1084                 .map_err(DeviceManagerError::EventFd)?,
1085             acpi_address,
1086             selected_segment: 0,
1087             serial_pty: None,
1088             serial_manager: None,
1089             console_pty: None,
1090             console_resize_pipe: None,
1091             virtio_mem_devices: Vec::new(),
1092             #[cfg(target_arch = "aarch64")]
1093             gpio_device: None,
1094             #[cfg(target_arch = "aarch64")]
1095             uefi_flash: None,
1096             force_iommu,
1097             restoring,
1098             io_uring_supported: None,
1099             boot_id_list,
1100             timestamp,
1101             pending_activations: Arc::new(Mutex::new(Vec::default())),
1102             acpi_platform_addresses: AcpiPlatformAddresses::default(),
1103         };
1104 
1105         let device_manager = Arc::new(Mutex::new(device_manager));
1106 
1107         address_manager
1108             .mmio_bus
1109             .insert(
1110                 Arc::clone(&device_manager) as Arc<Mutex<dyn BusDevice>>,
1111                 acpi_address.0,
1112                 DEVICE_MANAGER_ACPI_SIZE as u64,
1113             )
1114             .map_err(DeviceManagerError::BusError)?;
1115 
1116         Ok(device_manager)
1117     }
1118 
1119     pub fn serial_pty(&self) -> Option<PtyPair> {
1120         self.serial_pty
1121             .as_ref()
1122             .map(|pty| pty.lock().unwrap().clone())
1123     }
1124 
1125     pub fn console_pty(&self) -> Option<PtyPair> {
1126         self.console_pty
1127             .as_ref()
1128             .map(|pty| pty.lock().unwrap().clone())
1129     }
1130 
1131     pub fn console_resize_pipe(&self) -> Option<Arc<File>> {
1132         self.console_resize_pipe.as_ref().map(Arc::clone)
1133     }
1134 
1135     pub fn create_devices(
1136         &mut self,
1137         serial_pty: Option<PtyPair>,
1138         console_pty: Option<PtyPair>,
1139         console_resize_pipe: Option<File>,
1140     ) -> DeviceManagerResult<()> {
1141         let mut virtio_devices: Vec<MetaVirtioDevice> = Vec::new();
1142 
1143         let interrupt_controller = self.add_interrupt_controller()?;
1144 
1145         // Now we can create the legacy interrupt manager, which needs the freshly
1146         // formed IOAPIC device.
1147         let legacy_interrupt_manager: Arc<
1148             dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>,
1149         > = Arc::new(LegacyUserspaceInterruptManager::new(Arc::clone(
1150             &interrupt_controller,
1151         )));
1152 
1153         {
1154             if let Some(acpi_address) = self.memory_manager.lock().unwrap().acpi_address() {
1155                 self.address_manager
1156                     .mmio_bus
1157                     .insert(
1158                         Arc::clone(&self.memory_manager) as Arc<Mutex<dyn BusDevice>>,
1159                         acpi_address.0,
1160                         MEMORY_MANAGER_ACPI_SIZE as u64,
1161                     )
1162                     .map_err(DeviceManagerError::BusError)?;
1163             }
1164         }
1165 
1166         #[cfg(target_arch = "x86_64")]
1167         self.add_legacy_devices(
1168             self.reset_evt
1169                 .try_clone()
1170                 .map_err(DeviceManagerError::EventFd)?,
1171         )?;
1172 
1173         #[cfg(target_arch = "aarch64")]
1174         self.add_legacy_devices(&legacy_interrupt_manager)?;
1175 
1176         {
1177             self.ged_notification_device = self.add_acpi_devices(
1178                 &legacy_interrupt_manager,
1179                 self.reset_evt
1180                     .try_clone()
1181                     .map_err(DeviceManagerError::EventFd)?,
1182                 self.exit_evt
1183                     .try_clone()
1184                     .map_err(DeviceManagerError::EventFd)?,
1185             )?;
1186         }
1187 
1188         self.console = self.add_console_device(
1189             &legacy_interrupt_manager,
1190             &mut virtio_devices,
1191             serial_pty,
1192             console_pty,
1193             console_resize_pipe,
1194         )?;
1195 
1196         self.legacy_interrupt_manager = Some(legacy_interrupt_manager);
1197 
1198         virtio_devices.append(&mut self.make_virtio_devices()?);
1199 
1200         self.add_pci_devices(virtio_devices.clone())?;
1201 
1202         self.virtio_devices = virtio_devices;
1203 
1204         Ok(())
1205     }
1206 
1207     fn state(&self) -> DeviceManagerState {
1208         DeviceManagerState {
1209             device_tree: self.device_tree.lock().unwrap().clone(),
1210             device_id_cnt: self.device_id_cnt,
1211         }
1212     }
1213 
1214     fn set_state(&mut self, state: &DeviceManagerState) {
1215         *self.device_tree.lock().unwrap() = state.device_tree.clone();
1216         self.device_id_cnt = state.device_id_cnt;
1217     }
1218 
1219     fn get_msi_iova_space(&mut self) -> (u64, u64) {
1220         #[cfg(target_arch = "aarch64")]
1221         {
1222             let vcpus = self.config.lock().unwrap().cpus.boot_vcpus;
1223             let msi_start = arch::layout::GIC_V3_DIST_START.raw_value()
1224                 - arch::layout::GIC_V3_REDIST_SIZE * (vcpus as u64)
1225                 - arch::layout::GIC_V3_ITS_SIZE;
1226             let msi_end = msi_start + arch::layout::GIC_V3_ITS_SIZE - 1;
1227             (msi_start, msi_end)
1228         }
1229         #[cfg(target_arch = "x86_64")]
1230         (0xfee0_0000, 0xfeef_ffff)
1231     }
1232 
1233     #[cfg(target_arch = "aarch64")]
1234     /// Gets the information of the devices registered up to some point in time.
1235     pub fn get_device_info(&self) -> &HashMap<(DeviceType, String), MmioDeviceInfo> {
1236         &self.id_to_dev_info
1237     }
1238 
1239     #[allow(unused_variables)]
1240     fn add_pci_devices(
1241         &mut self,
1242         virtio_devices: Vec<MetaVirtioDevice>,
1243     ) -> DeviceManagerResult<()> {
1244         let iommu_id = String::from(IOMMU_DEVICE_NAME);
1245 
1246         let iommu_device = if self.config.lock().unwrap().iommu {
1247             let (device, mapping) = virtio_devices::Iommu::new(
1248                 iommu_id.clone(),
1249                 self.seccomp_action.clone(),
1250                 self.exit_evt
1251                     .try_clone()
1252                     .map_err(DeviceManagerError::EventFd)?,
1253                 self.get_msi_iova_space(),
1254             )
1255             .map_err(DeviceManagerError::CreateVirtioIommu)?;
1256             let device = Arc::new(Mutex::new(device));
1257             self.iommu_device = Some(Arc::clone(&device));
1258             self.iommu_mapping = Some(mapping);
1259 
1260             // Fill the device tree with a new node. In case of restore, we
1261             // know there is nothing to do, so we can simply override the
1262             // existing entry.
1263             self.device_tree
1264                 .lock()
1265                 .unwrap()
1266                 .insert(iommu_id.clone(), device_node!(iommu_id, device));
1267 
1268             Some(device)
1269         } else {
1270             None
1271         };
1272 
1273         let mut iommu_attached_devices = Vec::new();
1274         {
1275             for handle in virtio_devices {
1276                 let mapping: Option<Arc<IommuMapping>> = if handle.iommu {
1277                     self.iommu_mapping.clone()
1278                 } else {
1279                     None
1280                 };
1281 
1282                 let dev_id = self.add_virtio_pci_device(
1283                     handle.virtio_device,
1284                     &mapping,
1285                     handle.id,
1286                     handle.pci_segment,
1287                     handle.dma_handler,
1288                 )?;
1289 
1290                 if handle.iommu {
1291                     iommu_attached_devices.push(dev_id);
1292                 }
1293             }
1294 
1295             let mut vfio_iommu_device_ids = self.add_vfio_devices()?;
1296             iommu_attached_devices.append(&mut vfio_iommu_device_ids);
1297 
1298             let mut vfio_user_iommu_device_ids = self.add_user_devices()?;
1299             iommu_attached_devices.append(&mut vfio_user_iommu_device_ids);
1300 
1301             // Add all devices from forced iommu segments
1302             if let Some(platform_config) = self.config.lock().unwrap().platform.as_ref() {
1303                 if let Some(iommu_segments) = platform_config.iommu_segments.as_ref() {
1304                     for segment in iommu_segments {
1305                         for device in 0..32 {
1306                             let bdf = PciBdf::new(*segment, 0, device, 0);
1307                             if !iommu_attached_devices.contains(&bdf) {
1308                                 iommu_attached_devices.push(bdf);
1309                             }
1310                         }
1311                     }
1312                 }
1313             }
1314 
1315             if let Some(iommu_device) = iommu_device {
1316                 let dev_id = self.add_virtio_pci_device(iommu_device, &None, iommu_id, 0, None)?;
1317                 self.iommu_attached_devices = Some((dev_id, iommu_attached_devices));
1318             }
1319         }
1320 
1321         for segment in &self.pci_segments {
1322             #[cfg(target_arch = "x86_64")]
1323             if let Some(pci_config_io) = segment.pci_config_io.as_ref() {
1324                 self.bus_devices
1325                     .push(Arc::clone(pci_config_io) as Arc<Mutex<dyn BusDevice>>);
1326             }
1327 
1328             self.bus_devices
1329                 .push(Arc::clone(&segment.pci_config_mmio) as Arc<Mutex<dyn BusDevice>>);
1330         }
1331 
1332         Ok(())
1333     }
1334 
1335     #[cfg(target_arch = "aarch64")]
1336     fn add_interrupt_controller(
1337         &mut self,
1338     ) -> DeviceManagerResult<Arc<Mutex<dyn InterruptController>>> {
1339         let interrupt_controller: Arc<Mutex<gic::Gic>> = Arc::new(Mutex::new(
1340             gic::Gic::new(
1341                 self.config.lock().unwrap().cpus.boot_vcpus,
1342                 Arc::clone(&self.msi_interrupt_manager),
1343             )
1344             .map_err(DeviceManagerError::CreateInterruptController)?,
1345         ));
1346 
1347         self.interrupt_controller = Some(interrupt_controller.clone());
1348 
1349         // Unlike x86_64, the "interrupt_controller" here for AArch64 is only
1350         // a `Gic` object that implements the `InterruptController` to provide
1351         // interrupt delivery service. This is not the real GIC device so that
1352         // we do not need to insert it to the device tree.
1353 
1354         Ok(interrupt_controller)
1355     }
1356 
1357     #[cfg(target_arch = "aarch64")]
1358     pub fn get_interrupt_controller(&mut self) -> Option<&Arc<Mutex<gic::Gic>>> {
1359         self.interrupt_controller.as_ref()
1360     }
1361 
1362     #[cfg(target_arch = "x86_64")]
1363     fn add_interrupt_controller(
1364         &mut self,
1365     ) -> DeviceManagerResult<Arc<Mutex<dyn InterruptController>>> {
1366         let id = String::from(IOAPIC_DEVICE_NAME);
1367 
1368         // Create IOAPIC
1369         let interrupt_controller = Arc::new(Mutex::new(
1370             ioapic::Ioapic::new(
1371                 id.clone(),
1372                 APIC_START,
1373                 Arc::clone(&self.msi_interrupt_manager),
1374             )
1375             .map_err(DeviceManagerError::CreateInterruptController)?,
1376         ));
1377 
1378         self.interrupt_controller = Some(interrupt_controller.clone());
1379 
1380         self.address_manager
1381             .mmio_bus
1382             .insert(interrupt_controller.clone(), IOAPIC_START.0, IOAPIC_SIZE)
1383             .map_err(DeviceManagerError::BusError)?;
1384 
1385         self.bus_devices
1386             .push(Arc::clone(&interrupt_controller) as Arc<Mutex<dyn BusDevice>>);
1387 
1388         // Fill the device tree with a new node. In case of restore, we
1389         // know there is nothing to do, so we can simply override the
1390         // existing entry.
1391         self.device_tree
1392             .lock()
1393             .unwrap()
1394             .insert(id.clone(), device_node!(id, interrupt_controller));
1395 
1396         Ok(interrupt_controller)
1397     }
1398 
1399     fn add_acpi_devices(
1400         &mut self,
1401         interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>,
1402         reset_evt: EventFd,
1403         exit_evt: EventFd,
1404     ) -> DeviceManagerResult<Option<Arc<Mutex<devices::AcpiGedDevice>>>> {
1405         let shutdown_device = Arc::new(Mutex::new(devices::AcpiShutdownDevice::new(
1406             exit_evt, reset_evt,
1407         )));
1408 
1409         self.bus_devices
1410             .push(Arc::clone(&shutdown_device) as Arc<Mutex<dyn BusDevice>>);
1411 
1412         #[cfg(target_arch = "x86_64")]
1413         {
1414             self.address_manager
1415                 .allocator
1416                 .lock()
1417                 .unwrap()
1418                 .allocate_io_addresses(Some(GuestAddress(0x3c0)), 0x8, None)
1419                 .ok_or(DeviceManagerError::AllocateIoPort)?;
1420 
1421             self.address_manager
1422                 .io_bus
1423                 .insert(shutdown_device, 0x3c0, 0x4)
1424                 .map_err(DeviceManagerError::BusError)?;
1425             self.acpi_platform_addresses.sleep_control_reg_address =
1426                 Some(GenericAddress::io_port_address::<u8>(0x3c0));
1427             self.acpi_platform_addresses.sleep_status_reg_address =
1428                 Some(GenericAddress::io_port_address::<u8>(0x3c0));
1429             self.acpi_platform_addresses.reset_reg_address =
1430                 Some(GenericAddress::io_port_address::<u8>(0x3c0));
1431         }
1432 
1433         let ged_irq = self
1434             .address_manager
1435             .allocator
1436             .lock()
1437             .unwrap()
1438             .allocate_irq()
1439             .unwrap();
1440         let interrupt_group = interrupt_manager
1441             .create_group(LegacyIrqGroupConfig {
1442                 irq: ged_irq as InterruptIndex,
1443             })
1444             .map_err(DeviceManagerError::CreateInterruptGroup)?;
1445         let ged_address = self
1446             .address_manager
1447             .allocator
1448             .lock()
1449             .unwrap()
1450             .allocate_platform_mmio_addresses(
1451                 None,
1452                 devices::acpi::GED_DEVICE_ACPI_SIZE as u64,
1453                 None,
1454             )
1455             .ok_or(DeviceManagerError::AllocateMmioAddress)?;
1456         let ged_device = Arc::new(Mutex::new(devices::AcpiGedDevice::new(
1457             interrupt_group,
1458             ged_irq,
1459             ged_address,
1460         )));
1461         self.address_manager
1462             .mmio_bus
1463             .insert(
1464                 ged_device.clone(),
1465                 ged_address.0,
1466                 devices::acpi::GED_DEVICE_ACPI_SIZE as u64,
1467             )
1468             .map_err(DeviceManagerError::BusError)?;
1469         self.bus_devices
1470             .push(Arc::clone(&ged_device) as Arc<Mutex<dyn BusDevice>>);
1471 
1472         let pm_timer_device = Arc::new(Mutex::new(devices::AcpiPmTimerDevice::new()));
1473 
1474         self.bus_devices
1475             .push(Arc::clone(&pm_timer_device) as Arc<Mutex<dyn BusDevice>>);
1476 
1477         #[cfg(target_arch = "x86_64")]
1478         {
1479             self.address_manager
1480                 .allocator
1481                 .lock()
1482                 .unwrap()
1483                 .allocate_io_addresses(Some(GuestAddress(0xb008)), 0x4, None)
1484                 .ok_or(DeviceManagerError::AllocateIoPort)?;
1485 
1486             self.address_manager
1487                 .io_bus
1488                 .insert(pm_timer_device, 0xb008, 0x4)
1489                 .map_err(DeviceManagerError::BusError)?;
1490 
1491             self.acpi_platform_addresses.pm_timer_address =
1492                 Some(GenericAddress::io_port_address::<u32>(0xb008));
1493         }
1494 
1495         Ok(Some(ged_device))
1496     }
1497 
1498     #[cfg(target_arch = "x86_64")]
1499     fn add_legacy_devices(&mut self, reset_evt: EventFd) -> DeviceManagerResult<()> {
1500         // Add a shutdown device (i8042)
1501         let i8042 = Arc::new(Mutex::new(devices::legacy::I8042Device::new(
1502             reset_evt.try_clone().unwrap(),
1503         )));
1504 
1505         self.bus_devices
1506             .push(Arc::clone(&i8042) as Arc<Mutex<dyn BusDevice>>);
1507 
1508         self.address_manager
1509             .io_bus
1510             .insert(i8042, 0x61, 0x4)
1511             .map_err(DeviceManagerError::BusError)?;
1512         {
1513             // Add a CMOS emulated device
1514             let mem_size = self
1515                 .memory_manager
1516                 .lock()
1517                 .unwrap()
1518                 .guest_memory()
1519                 .memory()
1520                 .last_addr()
1521                 .0
1522                 + 1;
1523             let mem_below_4g = std::cmp::min(arch::layout::MEM_32BIT_RESERVED_START.0, mem_size);
1524             let mem_above_4g = mem_size.saturating_sub(arch::layout::RAM_64BIT_START.0);
1525 
1526             let cmos = Arc::new(Mutex::new(devices::legacy::Cmos::new(
1527                 mem_below_4g,
1528                 mem_above_4g,
1529                 reset_evt,
1530             )));
1531 
1532             self.bus_devices
1533                 .push(Arc::clone(&cmos) as Arc<Mutex<dyn BusDevice>>);
1534 
1535             self.address_manager
1536                 .io_bus
1537                 .insert(cmos, 0x70, 0x2)
1538                 .map_err(DeviceManagerError::BusError)?;
1539         }
1540         #[cfg(feature = "fwdebug")]
1541         {
1542             let fwdebug = Arc::new(Mutex::new(devices::legacy::FwDebugDevice::new()));
1543 
1544             self.bus_devices
1545                 .push(Arc::clone(&fwdebug) as Arc<Mutex<dyn BusDevice>>);
1546 
1547             self.address_manager
1548                 .io_bus
1549                 .insert(fwdebug, 0x402, 0x1)
1550                 .map_err(DeviceManagerError::BusError)?;
1551         }
1552 
1553         // 0x80 debug port
1554         let debug_port = Arc::new(Mutex::new(devices::legacy::DebugPort::new(self.timestamp)));
1555         self.bus_devices
1556             .push(Arc::clone(&debug_port) as Arc<Mutex<dyn BusDevice>>);
1557         self.address_manager
1558             .io_bus
1559             .insert(debug_port, 0x80, 0x1)
1560             .map_err(DeviceManagerError::BusError)?;
1561 
1562         Ok(())
1563     }
1564 
1565     #[cfg(target_arch = "aarch64")]
1566     fn add_legacy_devices(
1567         &mut self,
1568         interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>,
1569     ) -> DeviceManagerResult<()> {
1570         // Add a RTC device
1571         let rtc_irq = self
1572             .address_manager
1573             .allocator
1574             .lock()
1575             .unwrap()
1576             .allocate_irq()
1577             .unwrap();
1578 
1579         let interrupt_group = interrupt_manager
1580             .create_group(LegacyIrqGroupConfig {
1581                 irq: rtc_irq as InterruptIndex,
1582             })
1583             .map_err(DeviceManagerError::CreateInterruptGroup)?;
1584 
1585         let rtc_device = Arc::new(Mutex::new(devices::legacy::Rtc::new(interrupt_group)));
1586 
1587         self.bus_devices
1588             .push(Arc::clone(&rtc_device) as Arc<Mutex<dyn BusDevice>>);
1589 
1590         let addr = arch::layout::LEGACY_RTC_MAPPED_IO_START;
1591 
1592         self.address_manager
1593             .mmio_bus
1594             .insert(rtc_device, addr.0, MMIO_LEN)
1595             .map_err(DeviceManagerError::BusError)?;
1596 
1597         self.id_to_dev_info.insert(
1598             (DeviceType::Rtc, "rtc".to_string()),
1599             MmioDeviceInfo {
1600                 addr: addr.0,
1601                 len: MMIO_LEN,
1602                 irq: rtc_irq,
1603             },
1604         );
1605 
1606         // Add a GPIO device
1607         let id = String::from(GPIO_DEVICE_NAME);
1608         let gpio_irq = self
1609             .address_manager
1610             .allocator
1611             .lock()
1612             .unwrap()
1613             .allocate_irq()
1614             .unwrap();
1615 
1616         let interrupt_group = interrupt_manager
1617             .create_group(LegacyIrqGroupConfig {
1618                 irq: gpio_irq as InterruptIndex,
1619             })
1620             .map_err(DeviceManagerError::CreateInterruptGroup)?;
1621 
1622         let gpio_device = Arc::new(Mutex::new(devices::legacy::Gpio::new(
1623             id.clone(),
1624             interrupt_group,
1625         )));
1626 
1627         self.bus_devices
1628             .push(Arc::clone(&gpio_device) as Arc<Mutex<dyn BusDevice>>);
1629 
1630         let addr = arch::layout::LEGACY_GPIO_MAPPED_IO_START;
1631 
1632         self.address_manager
1633             .mmio_bus
1634             .insert(gpio_device.clone(), addr.0, MMIO_LEN)
1635             .map_err(DeviceManagerError::BusError)?;
1636 
1637         self.gpio_device = Some(gpio_device.clone());
1638 
1639         self.id_to_dev_info.insert(
1640             (DeviceType::Gpio, "gpio".to_string()),
1641             MmioDeviceInfo {
1642                 addr: addr.0,
1643                 len: MMIO_LEN,
1644                 irq: gpio_irq,
1645             },
1646         );
1647 
1648         self.device_tree
1649             .lock()
1650             .unwrap()
1651             .insert(id.clone(), device_node!(id, gpio_device));
1652 
1653         // On AArch64, the UEFI binary requires a flash device at address 0.
1654         // 4 MiB memory is mapped to simulate the flash.
1655         let uefi_mem_slot = self.memory_manager.lock().unwrap().allocate_memory_slot();
1656         let uefi_region = GuestRegionMmap::new(
1657             MmapRegion::new(arch::layout::UEFI_SIZE as usize).unwrap(),
1658             arch::layout::UEFI_START,
1659         )
1660         .unwrap();
1661         let uefi_mem_region = self
1662             .memory_manager
1663             .lock()
1664             .unwrap()
1665             .vm
1666             .make_user_memory_region(
1667                 uefi_mem_slot,
1668                 uefi_region.start_addr().raw_value(),
1669                 uefi_region.len() as u64,
1670                 uefi_region.as_ptr() as u64,
1671                 false,
1672                 false,
1673             );
1674         self.memory_manager
1675             .lock()
1676             .unwrap()
1677             .vm
1678             .create_user_memory_region(uefi_mem_region)
1679             .map_err(DeviceManagerError::CreateUefiFlash)?;
1680 
1681         let uefi_flash =
1682             GuestMemoryAtomic::new(GuestMemoryMmap::from_regions(vec![uefi_region]).unwrap());
1683         self.uefi_flash = Some(uefi_flash);
1684 
1685         Ok(())
1686     }
1687 
1688     #[cfg(target_arch = "x86_64")]
1689     fn add_serial_device(
1690         &mut self,
1691         interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>,
1692         serial_writer: Option<Box<dyn io::Write + Send>>,
1693     ) -> DeviceManagerResult<Arc<Mutex<Serial>>> {
1694         // Serial is tied to IRQ #4
1695         let serial_irq = 4;
1696 
1697         let id = String::from(SERIAL_DEVICE_NAME);
1698 
1699         let interrupt_group = interrupt_manager
1700             .create_group(LegacyIrqGroupConfig {
1701                 irq: serial_irq as InterruptIndex,
1702             })
1703             .map_err(DeviceManagerError::CreateInterruptGroup)?;
1704 
1705         let serial = Arc::new(Mutex::new(Serial::new(
1706             id.clone(),
1707             interrupt_group,
1708             serial_writer,
1709         )));
1710 
1711         self.bus_devices
1712             .push(Arc::clone(&serial) as Arc<Mutex<dyn BusDevice>>);
1713 
1714         self.address_manager
1715             .allocator
1716             .lock()
1717             .unwrap()
1718             .allocate_io_addresses(Some(GuestAddress(0x3f8)), 0x8, None)
1719             .ok_or(DeviceManagerError::AllocateIoPort)?;
1720 
1721         self.address_manager
1722             .io_bus
1723             .insert(serial.clone(), 0x3f8, 0x8)
1724             .map_err(DeviceManagerError::BusError)?;
1725 
1726         // Fill the device tree with a new node. In case of restore, we
1727         // know there is nothing to do, so we can simply override the
1728         // existing entry.
1729         self.device_tree
1730             .lock()
1731             .unwrap()
1732             .insert(id.clone(), device_node!(id, serial));
1733 
1734         Ok(serial)
1735     }
1736 
1737     #[cfg(target_arch = "aarch64")]
1738     fn add_serial_device(
1739         &mut self,
1740         interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>,
1741         serial_writer: Option<Box<dyn io::Write + Send>>,
1742     ) -> DeviceManagerResult<Arc<Mutex<Pl011>>> {
1743         let id = String::from(SERIAL_DEVICE_NAME);
1744 
1745         let serial_irq = self
1746             .address_manager
1747             .allocator
1748             .lock()
1749             .unwrap()
1750             .allocate_irq()
1751             .unwrap();
1752 
1753         let interrupt_group = interrupt_manager
1754             .create_group(LegacyIrqGroupConfig {
1755                 irq: serial_irq as InterruptIndex,
1756             })
1757             .map_err(DeviceManagerError::CreateInterruptGroup)?;
1758 
1759         let serial = Arc::new(Mutex::new(devices::legacy::Pl011::new(
1760             id.clone(),
1761             interrupt_group,
1762             serial_writer,
1763             self.timestamp,
1764         )));
1765 
1766         self.bus_devices
1767             .push(Arc::clone(&serial) as Arc<Mutex<dyn BusDevice>>);
1768 
1769         let addr = arch::layout::LEGACY_SERIAL_MAPPED_IO_START;
1770 
1771         self.address_manager
1772             .mmio_bus
1773             .insert(serial.clone(), addr.0, MMIO_LEN)
1774             .map_err(DeviceManagerError::BusError)?;
1775 
1776         self.id_to_dev_info.insert(
1777             (DeviceType::Serial, DeviceType::Serial.to_string()),
1778             MmioDeviceInfo {
1779                 addr: addr.0,
1780                 len: MMIO_LEN,
1781                 irq: serial_irq,
1782             },
1783         );
1784 
1785         self.cmdline_additions
1786             .push(format!("earlycon=pl011,mmio,0x{:08x}", addr.0));
1787 
1788         // Fill the device tree with a new node. In case of restore, we
1789         // know there is nothing to do, so we can simply override the
1790         // existing entry.
1791         self.device_tree
1792             .lock()
1793             .unwrap()
1794             .insert(id.clone(), device_node!(id, serial));
1795 
1796         Ok(serial)
1797     }
1798 
1799     fn modify_mode<F: FnOnce(&mut termios)>(
1800         &self,
1801         fd: RawFd,
1802         f: F,
1803     ) -> vmm_sys_util::errno::Result<()> {
1804         // SAFETY: safe because we check the return value of isatty.
1805         if unsafe { isatty(fd) } != 1 {
1806             return Ok(());
1807         }
1808 
1809         // SAFETY: The following pair are safe because termios gets totally overwritten by tcgetattr
1810         // and we check the return result.
1811         let mut termios: termios = unsafe { zeroed() };
1812         let ret = unsafe { tcgetattr(fd, &mut termios as *mut _) };
1813         if ret < 0 {
1814             return vmm_sys_util::errno::errno_result();
1815         }
1816         f(&mut termios);
1817         // SAFETY: Safe because the syscall will only read the extent of termios and we check
1818         // the return result.
1819         let ret = unsafe { tcsetattr(fd, TCSANOW, &termios as *const _) };
1820         if ret < 0 {
1821             return vmm_sys_util::errno::errno_result();
1822         }
1823 
1824         Ok(())
1825     }
1826 
1827     fn set_raw_mode(&self, f: &mut File) -> vmm_sys_util::errno::Result<()> {
1828         // SAFETY: FFI call. Variable t is guaranteed to be a valid termios from modify_mode.
1829         self.modify_mode(f.as_raw_fd(), |t| unsafe { cfmakeraw(t) })
1830     }
1831 
1832     fn listen_for_sigwinch_on_tty(&mut self, pty: &File) -> std::io::Result<()> {
1833         let seccomp_filter = get_seccomp_filter(
1834             &self.seccomp_action,
1835             Thread::PtyForeground,
1836             self.hypervisor_type,
1837         )
1838         .unwrap();
1839 
1840         match start_sigwinch_listener(seccomp_filter, pty) {
1841             Ok(pipe) => {
1842                 self.console_resize_pipe = Some(Arc::new(pipe));
1843             }
1844             Err(e) => {
1845                 warn!("Ignoring error from setting up SIGWINCH listener: {}", e)
1846             }
1847         }
1848 
1849         Ok(())
1850     }
1851 
1852     fn add_virtio_console_device(
1853         &mut self,
1854         virtio_devices: &mut Vec<MetaVirtioDevice>,
1855         console_pty: Option<PtyPair>,
1856         resize_pipe: Option<File>,
1857     ) -> DeviceManagerResult<Option<Arc<virtio_devices::ConsoleResizer>>> {
1858         let console_config = self.config.lock().unwrap().console.clone();
1859         let endpoint = match console_config.mode {
1860             ConsoleOutputMode::File => {
1861                 let file = File::create(console_config.file.as_ref().unwrap())
1862                     .map_err(DeviceManagerError::ConsoleOutputFileOpen)?;
1863                 Endpoint::File(file)
1864             }
1865             ConsoleOutputMode::Pty => {
1866                 if let Some(pty) = console_pty {
1867                     self.config.lock().unwrap().console.file = Some(pty.path.clone());
1868                     let file = pty.main.try_clone().unwrap();
1869                     self.console_pty = Some(Arc::new(Mutex::new(pty)));
1870                     self.console_resize_pipe = resize_pipe.map(Arc::new);
1871                     Endpoint::FilePair(file.try_clone().unwrap(), file)
1872                 } else {
1873                     let (main, mut sub, path) =
1874                         create_pty(false).map_err(DeviceManagerError::ConsolePtyOpen)?;
1875                     self.set_raw_mode(&mut sub)
1876                         .map_err(DeviceManagerError::SetPtyRaw)?;
1877                     self.config.lock().unwrap().console.file = Some(path.clone());
1878                     let file = main.try_clone().unwrap();
1879                     assert!(resize_pipe.is_none());
1880                     self.listen_for_sigwinch_on_tty(&sub).unwrap();
1881                     self.console_pty = Some(Arc::new(Mutex::new(PtyPair { main, sub, path })));
1882                     Endpoint::FilePair(file.try_clone().unwrap(), file)
1883                 }
1884             }
1885             ConsoleOutputMode::Tty => {
1886                 // Duplicating the file descriptors like this is needed as otherwise
1887                 // they will be closed on a reboot and the numbers reused
1888 
1889                 // SAFETY: FFI call to dup. Trivially safe.
1890                 let stdout = unsafe { libc::dup(libc::STDOUT_FILENO) };
1891                 if stdout == -1 {
1892                     return vmm_sys_util::errno::errno_result().map_err(DeviceManagerError::DupFd);
1893                 }
1894                 // SAFETY: stdout is valid and owned solely by us.
1895                 let stdout = unsafe { File::from_raw_fd(stdout) };
1896 
1897                 // If an interactive TTY then we can accept input
1898                 // SAFETY: FFI call. Trivially safe.
1899                 if unsafe { libc::isatty(libc::STDIN_FILENO) == 1 } {
1900                     // SAFETY: FFI call to dup. Trivially safe.
1901                     let stdin = unsafe { libc::dup(libc::STDIN_FILENO) };
1902                     if stdin == -1 {
1903                         return vmm_sys_util::errno::errno_result()
1904                             .map_err(DeviceManagerError::DupFd);
1905                     }
1906                     // SAFETY: stdin is valid and owned solely by us.
1907                     let stdin = unsafe { File::from_raw_fd(stdin) };
1908 
1909                     Endpoint::FilePair(stdout, stdin)
1910                 } else {
1911                     Endpoint::File(stdout)
1912                 }
1913             }
1914             ConsoleOutputMode::Null => Endpoint::Null,
1915             ConsoleOutputMode::Off => return Ok(None),
1916         };
1917         let id = String::from(CONSOLE_DEVICE_NAME);
1918 
1919         let (virtio_console_device, console_resizer) = virtio_devices::Console::new(
1920             id.clone(),
1921             endpoint,
1922             self.console_resize_pipe
1923                 .as_ref()
1924                 .map(|p| p.try_clone().unwrap()),
1925             self.force_iommu | console_config.iommu,
1926             self.seccomp_action.clone(),
1927             self.exit_evt
1928                 .try_clone()
1929                 .map_err(DeviceManagerError::EventFd)?,
1930         )
1931         .map_err(DeviceManagerError::CreateVirtioConsole)?;
1932         let virtio_console_device = Arc::new(Mutex::new(virtio_console_device));
1933         virtio_devices.push(MetaVirtioDevice {
1934             virtio_device: Arc::clone(&virtio_console_device)
1935                 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
1936             iommu: console_config.iommu,
1937             id: id.clone(),
1938             pci_segment: 0,
1939             dma_handler: None,
1940         });
1941 
1942         // Fill the device tree with a new node. In case of restore, we
1943         // know there is nothing to do, so we can simply override the
1944         // existing entry.
1945         self.device_tree
1946             .lock()
1947             .unwrap()
1948             .insert(id.clone(), device_node!(id, virtio_console_device));
1949 
1950         // Only provide a resizer (for SIGWINCH handling) if the console is attached to the TTY
1951         Ok(if matches!(console_config.mode, ConsoleOutputMode::Tty) {
1952             Some(console_resizer)
1953         } else {
1954             None
1955         })
1956     }
1957 
1958     fn add_console_device(
1959         &mut self,
1960         interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>,
1961         virtio_devices: &mut Vec<MetaVirtioDevice>,
1962         serial_pty: Option<PtyPair>,
1963         console_pty: Option<PtyPair>,
1964         console_resize_pipe: Option<File>,
1965     ) -> DeviceManagerResult<Arc<Console>> {
1966         let serial_config = self.config.lock().unwrap().serial.clone();
1967         let serial_writer: Option<Box<dyn io::Write + Send>> = match serial_config.mode {
1968             ConsoleOutputMode::File => Some(Box::new(
1969                 File::create(serial_config.file.as_ref().unwrap())
1970                     .map_err(DeviceManagerError::SerialOutputFileOpen)?,
1971             )),
1972             ConsoleOutputMode::Pty => {
1973                 if let Some(pty) = serial_pty {
1974                     self.config.lock().unwrap().serial.file = Some(pty.path.clone());
1975                     self.serial_pty = Some(Arc::new(Mutex::new(pty)));
1976                 } else {
1977                     let (main, mut sub, path) =
1978                         create_pty(true).map_err(DeviceManagerError::SerialPtyOpen)?;
1979                     self.set_raw_mode(&mut sub)
1980                         .map_err(DeviceManagerError::SetPtyRaw)?;
1981                     self.config.lock().unwrap().serial.file = Some(path.clone());
1982                     self.serial_pty = Some(Arc::new(Mutex::new(PtyPair { main, sub, path })));
1983                 }
1984                 None
1985             }
1986             ConsoleOutputMode::Tty => Some(Box::new(stdout())),
1987             ConsoleOutputMode::Off | ConsoleOutputMode::Null => None,
1988         };
1989         if serial_config.mode != ConsoleOutputMode::Off {
1990             let serial = self.add_serial_device(interrupt_manager, serial_writer)?;
1991             self.serial_manager = match serial_config.mode {
1992                 ConsoleOutputMode::Pty | ConsoleOutputMode::Tty => {
1993                     let serial_manager =
1994                         SerialManager::new(serial, self.serial_pty.clone(), serial_config.mode)
1995                             .map_err(DeviceManagerError::CreateSerialManager)?;
1996                     if let Some(mut serial_manager) = serial_manager {
1997                         serial_manager
1998                             .start_thread(
1999                                 self.exit_evt
2000                                     .try_clone()
2001                                     .map_err(DeviceManagerError::EventFd)?,
2002                             )
2003                             .map_err(DeviceManagerError::SpawnSerialManager)?;
2004                         Some(Arc::new(serial_manager))
2005                     } else {
2006                         None
2007                     }
2008                 }
2009                 _ => None,
2010             };
2011         }
2012 
2013         let console_resizer =
2014             self.add_virtio_console_device(virtio_devices, console_pty, console_resize_pipe)?;
2015 
2016         Ok(Arc::new(Console { console_resizer }))
2017     }
2018 
2019     fn make_virtio_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
2020         let mut devices: Vec<MetaVirtioDevice> = Vec::new();
2021 
2022         // Create "standard" virtio devices (net/block/rng)
2023         devices.append(&mut self.make_virtio_block_devices()?);
2024         devices.append(&mut self.make_virtio_net_devices()?);
2025         devices.append(&mut self.make_virtio_rng_devices()?);
2026 
2027         // Add virtio-fs if required
2028         devices.append(&mut self.make_virtio_fs_devices()?);
2029 
2030         // Add virtio-pmem if required
2031         devices.append(&mut self.make_virtio_pmem_devices()?);
2032 
2033         // Add virtio-vsock if required
2034         devices.append(&mut self.make_virtio_vsock_devices()?);
2035 
2036         devices.append(&mut self.make_virtio_mem_devices()?);
2037 
2038         // Add virtio-balloon if required
2039         devices.append(&mut self.make_virtio_balloon_devices()?);
2040 
2041         // Add virtio-watchdog device
2042         devices.append(&mut self.make_virtio_watchdog_devices()?);
2043 
2044         // Add vDPA devices if required
2045         devices.append(&mut self.make_vdpa_devices()?);
2046 
2047         Ok(devices)
2048     }
2049 
2050     // Cache whether io_uring is supported to avoid probing for very block device
2051     fn io_uring_is_supported(&mut self) -> bool {
2052         if let Some(supported) = self.io_uring_supported {
2053             return supported;
2054         }
2055 
2056         let supported = block_io_uring_is_supported();
2057         self.io_uring_supported = Some(supported);
2058         supported
2059     }
2060 
2061     fn make_virtio_block_device(
2062         &mut self,
2063         disk_cfg: &mut DiskConfig,
2064     ) -> DeviceManagerResult<MetaVirtioDevice> {
2065         let id = if let Some(id) = &disk_cfg.id {
2066             id.clone()
2067         } else {
2068             let id = self.next_device_name(DISK_DEVICE_NAME_PREFIX)?;
2069             disk_cfg.id = Some(id.clone());
2070             id
2071         };
2072 
2073         info!("Creating virtio-block device: {:?}", disk_cfg);
2074 
2075         let (virtio_device, migratable_device) = if disk_cfg.vhost_user {
2076             let socket = disk_cfg.vhost_socket.as_ref().unwrap().clone();
2077             let vu_cfg = VhostUserConfig {
2078                 socket,
2079                 num_queues: disk_cfg.num_queues,
2080                 queue_size: disk_cfg.queue_size,
2081             };
2082             let vhost_user_block = Arc::new(Mutex::new(
2083                 match virtio_devices::vhost_user::Blk::new(
2084                     id.clone(),
2085                     vu_cfg,
2086                     self.restoring,
2087                     self.seccomp_action.clone(),
2088                     self.exit_evt
2089                         .try_clone()
2090                         .map_err(DeviceManagerError::EventFd)?,
2091                     self.force_iommu,
2092                 ) {
2093                     Ok(vub_device) => vub_device,
2094                     Err(e) => {
2095                         return Err(DeviceManagerError::CreateVhostUserBlk(e));
2096                     }
2097                 },
2098             ));
2099 
2100             (
2101                 Arc::clone(&vhost_user_block) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2102                 vhost_user_block as Arc<Mutex<dyn Migratable>>,
2103             )
2104         } else {
2105             let mut options = OpenOptions::new();
2106             options.read(true);
2107             options.write(!disk_cfg.readonly);
2108             if disk_cfg.direct {
2109                 options.custom_flags(libc::O_DIRECT);
2110             }
2111             // Open block device path
2112             let mut file: File = options
2113                 .open(
2114                     disk_cfg
2115                         .path
2116                         .as_ref()
2117                         .ok_or(DeviceManagerError::NoDiskPath)?
2118                         .clone(),
2119                 )
2120                 .map_err(DeviceManagerError::Disk)?;
2121             let image_type =
2122                 detect_image_type(&mut file).map_err(DeviceManagerError::DetectImageType)?;
2123 
2124             let image = match image_type {
2125                 ImageType::FixedVhd => {
2126                     // Use asynchronous backend relying on io_uring if the
2127                     // syscalls are supported.
2128                     if self.io_uring_is_supported() && !disk_cfg.disable_io_uring {
2129                         info!("Using asynchronous fixed VHD disk file (io_uring)");
2130                         Box::new(
2131                             FixedVhdDiskAsync::new(file)
2132                                 .map_err(DeviceManagerError::CreateFixedVhdDiskAsync)?,
2133                         ) as Box<dyn DiskFile>
2134                     } else {
2135                         info!("Using synchronous fixed VHD disk file");
2136                         Box::new(
2137                             FixedVhdDiskSync::new(file)
2138                                 .map_err(DeviceManagerError::CreateFixedVhdDiskSync)?,
2139                         ) as Box<dyn DiskFile>
2140                     }
2141                 }
2142                 ImageType::Raw => {
2143                     // Use asynchronous backend relying on io_uring if the
2144                     // syscalls are supported.
2145                     if self.io_uring_is_supported() && !disk_cfg.disable_io_uring {
2146                         info!("Using asynchronous RAW disk file (io_uring)");
2147                         Box::new(RawFileDisk::new(file)) as Box<dyn DiskFile>
2148                     } else {
2149                         info!("Using synchronous RAW disk file");
2150                         Box::new(RawFileDiskSync::new(file)) as Box<dyn DiskFile>
2151                     }
2152                 }
2153                 ImageType::Qcow2 => {
2154                     info!("Using synchronous QCOW disk file");
2155                     Box::new(
2156                         QcowDiskSync::new(file, disk_cfg.direct)
2157                             .map_err(DeviceManagerError::CreateQcowDiskSync)?,
2158                     ) as Box<dyn DiskFile>
2159                 }
2160                 ImageType::Vhdx => {
2161                     info!("Using synchronous VHDX disk file");
2162                     Box::new(
2163                         VhdxDiskSync::new(file)
2164                             .map_err(DeviceManagerError::CreateFixedVhdxDiskSync)?,
2165                     ) as Box<dyn DiskFile>
2166                 }
2167             };
2168 
2169             let virtio_block = Arc::new(Mutex::new(
2170                 virtio_devices::Block::new(
2171                     id.clone(),
2172                     image,
2173                     disk_cfg
2174                         .path
2175                         .as_ref()
2176                         .ok_or(DeviceManagerError::NoDiskPath)?
2177                         .clone(),
2178                     disk_cfg.readonly,
2179                     self.force_iommu | disk_cfg.iommu,
2180                     disk_cfg.num_queues,
2181                     disk_cfg.queue_size,
2182                     self.seccomp_action.clone(),
2183                     disk_cfg.rate_limiter_config,
2184                     self.exit_evt
2185                         .try_clone()
2186                         .map_err(DeviceManagerError::EventFd)?,
2187                 )
2188                 .map_err(DeviceManagerError::CreateVirtioBlock)?,
2189             ));
2190 
2191             (
2192                 Arc::clone(&virtio_block) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2193                 virtio_block as Arc<Mutex<dyn Migratable>>,
2194             )
2195         };
2196 
2197         // Fill the device tree with a new node. In case of restore, we
2198         // know there is nothing to do, so we can simply override the
2199         // existing entry.
2200         self.device_tree
2201             .lock()
2202             .unwrap()
2203             .insert(id.clone(), device_node!(id, migratable_device));
2204 
2205         Ok(MetaVirtioDevice {
2206             virtio_device,
2207             iommu: disk_cfg.iommu,
2208             id,
2209             pci_segment: disk_cfg.pci_segment,
2210             dma_handler: None,
2211         })
2212     }
2213 
2214     fn make_virtio_block_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
2215         let mut devices = Vec::new();
2216 
2217         let mut block_devices = self.config.lock().unwrap().disks.clone();
2218         if let Some(disk_list_cfg) = &mut block_devices {
2219             for disk_cfg in disk_list_cfg.iter_mut() {
2220                 devices.push(self.make_virtio_block_device(disk_cfg)?);
2221             }
2222         }
2223         self.config.lock().unwrap().disks = block_devices;
2224 
2225         Ok(devices)
2226     }
2227 
2228     fn make_virtio_net_device(
2229         &mut self,
2230         net_cfg: &mut NetConfig,
2231     ) -> DeviceManagerResult<MetaVirtioDevice> {
2232         let id = if let Some(id) = &net_cfg.id {
2233             id.clone()
2234         } else {
2235             let id = self.next_device_name(NET_DEVICE_NAME_PREFIX)?;
2236             net_cfg.id = Some(id.clone());
2237             id
2238         };
2239         info!("Creating virtio-net device: {:?}", net_cfg);
2240 
2241         let (virtio_device, migratable_device) = if net_cfg.vhost_user {
2242             let socket = net_cfg.vhost_socket.as_ref().unwrap().clone();
2243             let vu_cfg = VhostUserConfig {
2244                 socket,
2245                 num_queues: net_cfg.num_queues,
2246                 queue_size: net_cfg.queue_size,
2247             };
2248             let server = match net_cfg.vhost_mode {
2249                 VhostMode::Client => false,
2250                 VhostMode::Server => true,
2251             };
2252             let vhost_user_net = Arc::new(Mutex::new(
2253                 match virtio_devices::vhost_user::Net::new(
2254                     id.clone(),
2255                     net_cfg.mac,
2256                     vu_cfg,
2257                     server,
2258                     self.seccomp_action.clone(),
2259                     self.restoring,
2260                     self.exit_evt
2261                         .try_clone()
2262                         .map_err(DeviceManagerError::EventFd)?,
2263                     self.force_iommu,
2264                 ) {
2265                     Ok(vun_device) => vun_device,
2266                     Err(e) => {
2267                         return Err(DeviceManagerError::CreateVhostUserNet(e));
2268                     }
2269                 },
2270             ));
2271 
2272             (
2273                 Arc::clone(&vhost_user_net) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2274                 vhost_user_net as Arc<Mutex<dyn Migratable>>,
2275             )
2276         } else {
2277             let virtio_net = if let Some(ref tap_if_name) = net_cfg.tap {
2278                 Arc::new(Mutex::new(
2279                     virtio_devices::Net::new(
2280                         id.clone(),
2281                         Some(tap_if_name),
2282                         None,
2283                         None,
2284                         Some(net_cfg.mac),
2285                         &mut net_cfg.host_mac,
2286                         self.force_iommu | net_cfg.iommu,
2287                         net_cfg.num_queues,
2288                         net_cfg.queue_size,
2289                         self.seccomp_action.clone(),
2290                         net_cfg.rate_limiter_config,
2291                         self.exit_evt
2292                             .try_clone()
2293                             .map_err(DeviceManagerError::EventFd)?,
2294                     )
2295                     .map_err(DeviceManagerError::CreateVirtioNet)?,
2296                 ))
2297             } else if let Some(fds) = &net_cfg.fds {
2298                 Arc::new(Mutex::new(
2299                     virtio_devices::Net::from_tap_fds(
2300                         id.clone(),
2301                         fds,
2302                         Some(net_cfg.mac),
2303                         self.force_iommu | net_cfg.iommu,
2304                         net_cfg.queue_size,
2305                         self.seccomp_action.clone(),
2306                         net_cfg.rate_limiter_config,
2307                         self.exit_evt
2308                             .try_clone()
2309                             .map_err(DeviceManagerError::EventFd)?,
2310                     )
2311                     .map_err(DeviceManagerError::CreateVirtioNet)?,
2312                 ))
2313             } else {
2314                 Arc::new(Mutex::new(
2315                     virtio_devices::Net::new(
2316                         id.clone(),
2317                         None,
2318                         Some(net_cfg.ip),
2319                         Some(net_cfg.mask),
2320                         Some(net_cfg.mac),
2321                         &mut net_cfg.host_mac,
2322                         self.force_iommu | net_cfg.iommu,
2323                         net_cfg.num_queues,
2324                         net_cfg.queue_size,
2325                         self.seccomp_action.clone(),
2326                         net_cfg.rate_limiter_config,
2327                         self.exit_evt
2328                             .try_clone()
2329                             .map_err(DeviceManagerError::EventFd)?,
2330                     )
2331                     .map_err(DeviceManagerError::CreateVirtioNet)?,
2332                 ))
2333             };
2334 
2335             (
2336                 Arc::clone(&virtio_net) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2337                 virtio_net as Arc<Mutex<dyn Migratable>>,
2338             )
2339         };
2340 
2341         // Fill the device tree with a new node. In case of restore, we
2342         // know there is nothing to do, so we can simply override the
2343         // existing entry.
2344         self.device_tree
2345             .lock()
2346             .unwrap()
2347             .insert(id.clone(), device_node!(id, migratable_device));
2348 
2349         Ok(MetaVirtioDevice {
2350             virtio_device,
2351             iommu: net_cfg.iommu,
2352             id,
2353             pci_segment: net_cfg.pci_segment,
2354             dma_handler: None,
2355         })
2356     }
2357 
2358     /// Add virto-net and vhost-user-net devices
2359     fn make_virtio_net_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
2360         let mut devices = Vec::new();
2361         let mut net_devices = self.config.lock().unwrap().net.clone();
2362         if let Some(net_list_cfg) = &mut net_devices {
2363             for net_cfg in net_list_cfg.iter_mut() {
2364                 devices.push(self.make_virtio_net_device(net_cfg)?);
2365             }
2366         }
2367         self.config.lock().unwrap().net = net_devices;
2368 
2369         Ok(devices)
2370     }
2371 
2372     fn make_virtio_rng_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
2373         let mut devices = Vec::new();
2374 
2375         // Add virtio-rng if required
2376         let rng_config = self.config.lock().unwrap().rng.clone();
2377         if let Some(rng_path) = rng_config.src.to_str() {
2378             info!("Creating virtio-rng device: {:?}", rng_config);
2379             let id = String::from(RNG_DEVICE_NAME);
2380 
2381             let virtio_rng_device = Arc::new(Mutex::new(
2382                 virtio_devices::Rng::new(
2383                     id.clone(),
2384                     rng_path,
2385                     self.force_iommu | rng_config.iommu,
2386                     self.seccomp_action.clone(),
2387                     self.exit_evt
2388                         .try_clone()
2389                         .map_err(DeviceManagerError::EventFd)?,
2390                 )
2391                 .map_err(DeviceManagerError::CreateVirtioRng)?,
2392             ));
2393             devices.push(MetaVirtioDevice {
2394                 virtio_device: Arc::clone(&virtio_rng_device)
2395                     as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2396                 iommu: rng_config.iommu,
2397                 id: id.clone(),
2398                 pci_segment: 0,
2399                 dma_handler: None,
2400             });
2401 
2402             // Fill the device tree with a new node. In case of restore, we
2403             // know there is nothing to do, so we can simply override the
2404             // existing entry.
2405             self.device_tree
2406                 .lock()
2407                 .unwrap()
2408                 .insert(id.clone(), device_node!(id, virtio_rng_device));
2409         }
2410 
2411         Ok(devices)
2412     }
2413 
2414     fn make_virtio_fs_device(
2415         &mut self,
2416         fs_cfg: &mut FsConfig,
2417     ) -> DeviceManagerResult<MetaVirtioDevice> {
2418         let id = if let Some(id) = &fs_cfg.id {
2419             id.clone()
2420         } else {
2421             let id = self.next_device_name(FS_DEVICE_NAME_PREFIX)?;
2422             fs_cfg.id = Some(id.clone());
2423             id
2424         };
2425 
2426         info!("Creating virtio-fs device: {:?}", fs_cfg);
2427 
2428         let mut node = device_node!(id);
2429 
2430         if let Some(fs_socket) = fs_cfg.socket.to_str() {
2431             let virtio_fs_device = Arc::new(Mutex::new(
2432                 virtio_devices::vhost_user::Fs::new(
2433                     id.clone(),
2434                     fs_socket,
2435                     &fs_cfg.tag,
2436                     fs_cfg.num_queues,
2437                     fs_cfg.queue_size,
2438                     None,
2439                     self.seccomp_action.clone(),
2440                     self.restoring,
2441                     self.exit_evt
2442                         .try_clone()
2443                         .map_err(DeviceManagerError::EventFd)?,
2444                     self.force_iommu,
2445                 )
2446                 .map_err(DeviceManagerError::CreateVirtioFs)?,
2447             ));
2448 
2449             // Update the device tree with the migratable device.
2450             node.migratable = Some(Arc::clone(&virtio_fs_device) as Arc<Mutex<dyn Migratable>>);
2451             self.device_tree.lock().unwrap().insert(id.clone(), node);
2452 
2453             Ok(MetaVirtioDevice {
2454                 virtio_device: Arc::clone(&virtio_fs_device)
2455                     as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2456                 iommu: false,
2457                 id,
2458                 pci_segment: fs_cfg.pci_segment,
2459                 dma_handler: None,
2460             })
2461         } else {
2462             Err(DeviceManagerError::NoVirtioFsSock)
2463         }
2464     }
2465 
2466     fn make_virtio_fs_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
2467         let mut devices = Vec::new();
2468 
2469         let mut fs_devices = self.config.lock().unwrap().fs.clone();
2470         if let Some(fs_list_cfg) = &mut fs_devices {
2471             for fs_cfg in fs_list_cfg.iter_mut() {
2472                 devices.push(self.make_virtio_fs_device(fs_cfg)?);
2473             }
2474         }
2475         self.config.lock().unwrap().fs = fs_devices;
2476 
2477         Ok(devices)
2478     }
2479 
2480     fn make_virtio_pmem_device(
2481         &mut self,
2482         pmem_cfg: &mut PmemConfig,
2483     ) -> DeviceManagerResult<MetaVirtioDevice> {
2484         let id = if let Some(id) = &pmem_cfg.id {
2485             id.clone()
2486         } else {
2487             let id = self.next_device_name(PMEM_DEVICE_NAME_PREFIX)?;
2488             pmem_cfg.id = Some(id.clone());
2489             id
2490         };
2491 
2492         info!("Creating virtio-pmem device: {:?}", pmem_cfg);
2493 
2494         let mut node = device_node!(id);
2495 
2496         // Look for the id in the device tree. If it can be found, that means
2497         // the device is being restored, otherwise it's created from scratch.
2498         let region_range = if let Some(node) = self.device_tree.lock().unwrap().get(&id) {
2499             info!("Restoring virtio-pmem {} resources", id);
2500 
2501             let mut region_range: Option<(u64, u64)> = None;
2502             for resource in node.resources.iter() {
2503                 match resource {
2504                     Resource::MmioAddressRange { base, size } => {
2505                         if region_range.is_some() {
2506                             return Err(DeviceManagerError::ResourceAlreadyExists);
2507                         }
2508 
2509                         region_range = Some((*base, *size));
2510                     }
2511                     _ => {
2512                         error!("Unexpected resource {:?} for {}", resource, id);
2513                     }
2514                 }
2515             }
2516 
2517             if region_range.is_none() {
2518                 return Err(DeviceManagerError::MissingVirtioPmemResources);
2519             }
2520 
2521             region_range
2522         } else {
2523             None
2524         };
2525 
2526         let (custom_flags, set_len) = if pmem_cfg.file.is_dir() {
2527             if pmem_cfg.size.is_none() {
2528                 return Err(DeviceManagerError::PmemWithDirectorySizeMissing);
2529             }
2530             (O_TMPFILE, true)
2531         } else {
2532             (0, false)
2533         };
2534 
2535         let mut file = OpenOptions::new()
2536             .read(true)
2537             .write(!pmem_cfg.discard_writes)
2538             .custom_flags(custom_flags)
2539             .open(&pmem_cfg.file)
2540             .map_err(DeviceManagerError::PmemFileOpen)?;
2541 
2542         let size = if let Some(size) = pmem_cfg.size {
2543             if set_len {
2544                 file.set_len(size)
2545                     .map_err(DeviceManagerError::PmemFileSetLen)?;
2546             }
2547             size
2548         } else {
2549             file.seek(SeekFrom::End(0))
2550                 .map_err(DeviceManagerError::PmemFileSetLen)?
2551         };
2552 
2553         if size % 0x20_0000 != 0 {
2554             return Err(DeviceManagerError::PmemSizeNotAligned);
2555         }
2556 
2557         let (region_base, region_size) = if let Some((base, size)) = region_range {
2558             // The memory needs to be 2MiB aligned in order to support
2559             // hugepages.
2560             self.pci_segments[pmem_cfg.pci_segment as usize]
2561                 .allocator
2562                 .lock()
2563                 .unwrap()
2564                 .allocate(
2565                     Some(GuestAddress(base)),
2566                     size as GuestUsize,
2567                     Some(0x0020_0000),
2568                 )
2569                 .ok_or(DeviceManagerError::PmemRangeAllocation)?;
2570 
2571             (base, size)
2572         } else {
2573             // The memory needs to be 2MiB aligned in order to support
2574             // hugepages.
2575             let base = self.pci_segments[pmem_cfg.pci_segment as usize]
2576                 .allocator
2577                 .lock()
2578                 .unwrap()
2579                 .allocate(None, size as GuestUsize, Some(0x0020_0000))
2580                 .ok_or(DeviceManagerError::PmemRangeAllocation)?;
2581 
2582             (base.raw_value(), size)
2583         };
2584 
2585         let cloned_file = file.try_clone().map_err(DeviceManagerError::CloneFile)?;
2586         let mmap_region = MmapRegion::build(
2587             Some(FileOffset::new(cloned_file, 0)),
2588             region_size as usize,
2589             PROT_READ | PROT_WRITE,
2590             MAP_NORESERVE
2591                 | if pmem_cfg.discard_writes {
2592                     MAP_PRIVATE
2593                 } else {
2594                     MAP_SHARED
2595                 },
2596         )
2597         .map_err(DeviceManagerError::NewMmapRegion)?;
2598         let host_addr: u64 = mmap_region.as_ptr() as u64;
2599 
2600         let mem_slot = self
2601             .memory_manager
2602             .lock()
2603             .unwrap()
2604             .create_userspace_mapping(region_base, region_size, host_addr, false, false, false)
2605             .map_err(DeviceManagerError::MemoryManager)?;
2606 
2607         let mapping = virtio_devices::UserspaceMapping {
2608             host_addr,
2609             mem_slot,
2610             addr: GuestAddress(region_base),
2611             len: region_size,
2612             mergeable: false,
2613         };
2614 
2615         let virtio_pmem_device = Arc::new(Mutex::new(
2616             virtio_devices::Pmem::new(
2617                 id.clone(),
2618                 file,
2619                 GuestAddress(region_base),
2620                 mapping,
2621                 mmap_region,
2622                 self.force_iommu | pmem_cfg.iommu,
2623                 self.seccomp_action.clone(),
2624                 self.exit_evt
2625                     .try_clone()
2626                     .map_err(DeviceManagerError::EventFd)?,
2627             )
2628             .map_err(DeviceManagerError::CreateVirtioPmem)?,
2629         ));
2630 
2631         // Update the device tree with correct resource information and with
2632         // the migratable device.
2633         node.resources.push(Resource::MmioAddressRange {
2634             base: region_base,
2635             size: region_size,
2636         });
2637         node.migratable = Some(Arc::clone(&virtio_pmem_device) as Arc<Mutex<dyn Migratable>>);
2638         self.device_tree.lock().unwrap().insert(id.clone(), node);
2639 
2640         Ok(MetaVirtioDevice {
2641             virtio_device: Arc::clone(&virtio_pmem_device)
2642                 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2643             iommu: pmem_cfg.iommu,
2644             id,
2645             pci_segment: pmem_cfg.pci_segment,
2646             dma_handler: None,
2647         })
2648     }
2649 
2650     fn make_virtio_pmem_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
2651         let mut devices = Vec::new();
2652         // Add virtio-pmem if required
2653         let mut pmem_devices = self.config.lock().unwrap().pmem.clone();
2654         if let Some(pmem_list_cfg) = &mut pmem_devices {
2655             for pmem_cfg in pmem_list_cfg.iter_mut() {
2656                 devices.push(self.make_virtio_pmem_device(pmem_cfg)?);
2657             }
2658         }
2659         self.config.lock().unwrap().pmem = pmem_devices;
2660 
2661         Ok(devices)
2662     }
2663 
2664     fn make_virtio_vsock_device(
2665         &mut self,
2666         vsock_cfg: &mut VsockConfig,
2667     ) -> DeviceManagerResult<MetaVirtioDevice> {
2668         let id = if let Some(id) = &vsock_cfg.id {
2669             id.clone()
2670         } else {
2671             let id = self.next_device_name(VSOCK_DEVICE_NAME_PREFIX)?;
2672             vsock_cfg.id = Some(id.clone());
2673             id
2674         };
2675 
2676         info!("Creating virtio-vsock device: {:?}", vsock_cfg);
2677 
2678         let socket_path = vsock_cfg
2679             .socket
2680             .to_str()
2681             .ok_or(DeviceManagerError::CreateVsockConvertPath)?;
2682         let backend =
2683             virtio_devices::vsock::VsockUnixBackend::new(vsock_cfg.cid, socket_path.to_string())
2684                 .map_err(DeviceManagerError::CreateVsockBackend)?;
2685 
2686         let vsock_device = Arc::new(Mutex::new(
2687             virtio_devices::Vsock::new(
2688                 id.clone(),
2689                 vsock_cfg.cid,
2690                 vsock_cfg.socket.clone(),
2691                 backend,
2692                 self.force_iommu | vsock_cfg.iommu,
2693                 self.seccomp_action.clone(),
2694                 self.exit_evt
2695                     .try_clone()
2696                     .map_err(DeviceManagerError::EventFd)?,
2697             )
2698             .map_err(DeviceManagerError::CreateVirtioVsock)?,
2699         ));
2700 
2701         // Fill the device tree with a new node. In case of restore, we
2702         // know there is nothing to do, so we can simply override the
2703         // existing entry.
2704         self.device_tree
2705             .lock()
2706             .unwrap()
2707             .insert(id.clone(), device_node!(id, vsock_device));
2708 
2709         Ok(MetaVirtioDevice {
2710             virtio_device: Arc::clone(&vsock_device)
2711                 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2712             iommu: vsock_cfg.iommu,
2713             id,
2714             pci_segment: vsock_cfg.pci_segment,
2715             dma_handler: None,
2716         })
2717     }
2718 
2719     fn make_virtio_vsock_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
2720         let mut devices = Vec::new();
2721 
2722         let mut vsock = self.config.lock().unwrap().vsock.clone();
2723         if let Some(ref mut vsock_cfg) = &mut vsock {
2724             devices.push(self.make_virtio_vsock_device(vsock_cfg)?);
2725         }
2726         self.config.lock().unwrap().vsock = vsock;
2727 
2728         Ok(devices)
2729     }
2730 
2731     fn make_virtio_mem_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
2732         let mut devices = Vec::new();
2733 
2734         let mm = self.memory_manager.clone();
2735         let mm = mm.lock().unwrap();
2736         for (memory_zone_id, memory_zone) in mm.memory_zones().iter() {
2737             if let Some(virtio_mem_zone) = memory_zone.virtio_mem_zone() {
2738                 info!("Creating virtio-mem device: id = {}", memory_zone_id);
2739 
2740                 let node_id = numa_node_id_from_memory_zone_id(&self.numa_nodes, memory_zone_id)
2741                     .map(|i| i as u16);
2742 
2743                 let virtio_mem_device = Arc::new(Mutex::new(
2744                     virtio_devices::Mem::new(
2745                         memory_zone_id.clone(),
2746                         virtio_mem_zone.region(),
2747                         virtio_mem_zone
2748                             .resize_handler()
2749                             .new_resize_sender()
2750                             .map_err(DeviceManagerError::CreateResizeSender)?,
2751                         self.seccomp_action.clone(),
2752                         node_id,
2753                         virtio_mem_zone.hotplugged_size(),
2754                         virtio_mem_zone.hugepages(),
2755                         self.exit_evt
2756                             .try_clone()
2757                             .map_err(DeviceManagerError::EventFd)?,
2758                         virtio_mem_zone.blocks_state().clone(),
2759                     )
2760                     .map_err(DeviceManagerError::CreateVirtioMem)?,
2761                 ));
2762 
2763                 self.virtio_mem_devices.push(Arc::clone(&virtio_mem_device));
2764 
2765                 devices.push(MetaVirtioDevice {
2766                     virtio_device: Arc::clone(&virtio_mem_device)
2767                         as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2768                     iommu: false,
2769                     id: memory_zone_id.clone(),
2770                     pci_segment: 0,
2771                     dma_handler: None,
2772                 });
2773 
2774                 // Fill the device tree with a new node. In case of restore, we
2775                 // know there is nothing to do, so we can simply override the
2776                 // existing entry.
2777                 self.device_tree.lock().unwrap().insert(
2778                     memory_zone_id.clone(),
2779                     device_node!(memory_zone_id, virtio_mem_device),
2780                 );
2781             }
2782         }
2783 
2784         Ok(devices)
2785     }
2786 
2787     fn make_virtio_balloon_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
2788         let mut devices = Vec::new();
2789 
2790         if let Some(balloon_config) = &self.config.lock().unwrap().balloon {
2791             let id = String::from(BALLOON_DEVICE_NAME);
2792             info!("Creating virtio-balloon device: id = {}", id);
2793 
2794             let virtio_balloon_device = Arc::new(Mutex::new(
2795                 virtio_devices::Balloon::new(
2796                     id.clone(),
2797                     balloon_config.size,
2798                     balloon_config.deflate_on_oom,
2799                     balloon_config.free_page_reporting,
2800                     self.seccomp_action.clone(),
2801                     self.exit_evt
2802                         .try_clone()
2803                         .map_err(DeviceManagerError::EventFd)?,
2804                 )
2805                 .map_err(DeviceManagerError::CreateVirtioBalloon)?,
2806             ));
2807 
2808             self.balloon = Some(virtio_balloon_device.clone());
2809 
2810             devices.push(MetaVirtioDevice {
2811                 virtio_device: Arc::clone(&virtio_balloon_device)
2812                     as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2813                 iommu: false,
2814                 id: id.clone(),
2815                 pci_segment: 0,
2816                 dma_handler: None,
2817             });
2818 
2819             self.device_tree
2820                 .lock()
2821                 .unwrap()
2822                 .insert(id.clone(), device_node!(id, virtio_balloon_device));
2823         }
2824 
2825         Ok(devices)
2826     }
2827 
2828     fn make_virtio_watchdog_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
2829         let mut devices = Vec::new();
2830 
2831         if !self.config.lock().unwrap().watchdog {
2832             return Ok(devices);
2833         }
2834 
2835         let id = String::from(WATCHDOG_DEVICE_NAME);
2836         info!("Creating virtio-watchdog device: id = {}", id);
2837 
2838         let virtio_watchdog_device = Arc::new(Mutex::new(
2839             virtio_devices::Watchdog::new(
2840                 id.clone(),
2841                 self.reset_evt.try_clone().unwrap(),
2842                 self.seccomp_action.clone(),
2843                 self.exit_evt
2844                     .try_clone()
2845                     .map_err(DeviceManagerError::EventFd)?,
2846             )
2847             .map_err(DeviceManagerError::CreateVirtioWatchdog)?,
2848         ));
2849         devices.push(MetaVirtioDevice {
2850             virtio_device: Arc::clone(&virtio_watchdog_device)
2851                 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2852             iommu: false,
2853             id: id.clone(),
2854             pci_segment: 0,
2855             dma_handler: None,
2856         });
2857 
2858         self.device_tree
2859             .lock()
2860             .unwrap()
2861             .insert(id.clone(), device_node!(id, virtio_watchdog_device));
2862 
2863         Ok(devices)
2864     }
2865 
2866     fn make_vdpa_device(
2867         &mut self,
2868         vdpa_cfg: &mut VdpaConfig,
2869     ) -> DeviceManagerResult<MetaVirtioDevice> {
2870         let id = if let Some(id) = &vdpa_cfg.id {
2871             id.clone()
2872         } else {
2873             let id = self.next_device_name(VDPA_DEVICE_NAME_PREFIX)?;
2874             vdpa_cfg.id = Some(id.clone());
2875             id
2876         };
2877 
2878         info!("Creating vDPA device: {:?}", vdpa_cfg);
2879 
2880         let device_path = vdpa_cfg
2881             .path
2882             .to_str()
2883             .ok_or(DeviceManagerError::CreateVdpaConvertPath)?;
2884 
2885         let vdpa_device = Arc::new(Mutex::new(
2886             virtio_devices::Vdpa::new(
2887                 id.clone(),
2888                 device_path,
2889                 self.memory_manager.lock().unwrap().guest_memory(),
2890                 vdpa_cfg.num_queues as u16,
2891             )
2892             .map_err(DeviceManagerError::CreateVdpa)?,
2893         ));
2894 
2895         // Create the DMA handler that is required by the vDPA device
2896         let vdpa_mapping = Arc::new(VdpaDmaMapping::new(
2897             Arc::clone(&vdpa_device),
2898             Arc::new(self.memory_manager.lock().unwrap().guest_memory()),
2899         ));
2900 
2901         self.device_tree
2902             .lock()
2903             .unwrap()
2904             .insert(id.clone(), device_node!(id));
2905 
2906         Ok(MetaVirtioDevice {
2907             virtio_device: vdpa_device as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2908             iommu: vdpa_cfg.iommu,
2909             id,
2910             pci_segment: vdpa_cfg.pci_segment,
2911             dma_handler: Some(vdpa_mapping),
2912         })
2913     }
2914 
2915     fn make_vdpa_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
2916         let mut devices = Vec::new();
2917         // Add vdpa if required
2918         let mut vdpa_devices = self.config.lock().unwrap().vdpa.clone();
2919         if let Some(vdpa_list_cfg) = &mut vdpa_devices {
2920             for vdpa_cfg in vdpa_list_cfg.iter_mut() {
2921                 devices.push(self.make_vdpa_device(vdpa_cfg)?);
2922             }
2923         }
2924         self.config.lock().unwrap().vdpa = vdpa_devices;
2925 
2926         Ok(devices)
2927     }
2928 
2929     fn next_device_name(&mut self, prefix: &str) -> DeviceManagerResult<String> {
2930         let start_id = self.device_id_cnt;
2931         loop {
2932             // Generate the temporary name.
2933             let name = format!("{}{}", prefix, self.device_id_cnt);
2934             // Increment the counter.
2935             self.device_id_cnt += Wrapping(1);
2936             // Check if the name is already in use.
2937             if !self.boot_id_list.contains(&name)
2938                 && !self.device_tree.lock().unwrap().contains_key(&name)
2939             {
2940                 return Ok(name);
2941             }
2942 
2943             if self.device_id_cnt == start_id {
2944                 // We went through a full loop and there's nothing else we can
2945                 // do.
2946                 break;
2947             }
2948         }
2949         Err(DeviceManagerError::NoAvailableDeviceName)
2950     }
2951 
2952     fn add_passthrough_device(
2953         &mut self,
2954         device_cfg: &mut DeviceConfig,
2955     ) -> DeviceManagerResult<(PciBdf, String)> {
2956         // If the passthrough device has not been created yet, it is created
2957         // here and stored in the DeviceManager structure for future needs.
2958         if self.passthrough_device.is_none() {
2959             self.passthrough_device = Some(
2960                 self.address_manager
2961                     .vm
2962                     .create_passthrough_device()
2963                     .map_err(|e| DeviceManagerError::CreatePassthroughDevice(e.into()))?,
2964             );
2965         }
2966 
2967         self.add_vfio_device(device_cfg)
2968     }
2969 
2970     fn create_vfio_container(&self) -> DeviceManagerResult<Arc<VfioContainer>> {
2971         let passthrough_device = self
2972             .passthrough_device
2973             .as_ref()
2974             .ok_or(DeviceManagerError::NoDevicePassthroughSupport)?;
2975 
2976         let dup = passthrough_device
2977             .try_clone()
2978             .map_err(DeviceManagerError::VfioCreate)?;
2979 
2980         Ok(Arc::new(
2981             VfioContainer::new(Some(Arc::new(dup))).map_err(DeviceManagerError::VfioCreate)?,
2982         ))
2983     }
2984 
2985     fn add_vfio_device(
2986         &mut self,
2987         device_cfg: &mut DeviceConfig,
2988     ) -> DeviceManagerResult<(PciBdf, String)> {
2989         let vfio_name = if let Some(id) = &device_cfg.id {
2990             id.clone()
2991         } else {
2992             let id = self.next_device_name(VFIO_DEVICE_NAME_PREFIX)?;
2993             device_cfg.id = Some(id.clone());
2994             id
2995         };
2996 
2997         let (pci_segment_id, pci_device_bdf, resources) =
2998             self.pci_resources(&vfio_name, device_cfg.pci_segment)?;
2999 
3000         let mut needs_dma_mapping = false;
3001 
3002         // Here we create a new VFIO container for two reasons. Either this is
3003         // the first VFIO device, meaning we need a new VFIO container, which
3004         // will be shared with other VFIO devices. Or the new VFIO device is
3005         // attached to a vIOMMU, meaning we must create a dedicated VFIO
3006         // container. In the vIOMMU use case, we can't let all devices under
3007         // the same VFIO container since we couldn't map/unmap memory for each
3008         // device. That's simply because the map/unmap operations happen at the
3009         // VFIO container level.
3010         let vfio_container = if device_cfg.iommu {
3011             let vfio_container = self.create_vfio_container()?;
3012 
3013             let vfio_mapping = Arc::new(VfioDmaMapping::new(
3014                 Arc::clone(&vfio_container),
3015                 Arc::new(self.memory_manager.lock().unwrap().guest_memory()),
3016             ));
3017 
3018             if let Some(iommu) = &self.iommu_device {
3019                 iommu
3020                     .lock()
3021                     .unwrap()
3022                     .add_external_mapping(pci_device_bdf.into(), vfio_mapping);
3023             } else {
3024                 return Err(DeviceManagerError::MissingVirtualIommu);
3025             }
3026 
3027             vfio_container
3028         } else if let Some(vfio_container) = &self.vfio_container {
3029             Arc::clone(vfio_container)
3030         } else {
3031             let vfio_container = self.create_vfio_container()?;
3032             needs_dma_mapping = true;
3033             self.vfio_container = Some(Arc::clone(&vfio_container));
3034 
3035             vfio_container
3036         };
3037 
3038         let vfio_device = VfioDevice::new(&device_cfg.path, Arc::clone(&vfio_container))
3039             .map_err(DeviceManagerError::VfioCreate)?;
3040 
3041         if needs_dma_mapping {
3042             // Register DMA mapping in IOMMU.
3043             // Do not register virtio-mem regions, as they are handled directly by
3044             // virtio-mem device itself.
3045             for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() {
3046                 for region in zone.regions() {
3047                     vfio_container
3048                         .vfio_dma_map(
3049                             region.start_addr().raw_value(),
3050                             region.len() as u64,
3051                             region.as_ptr() as u64,
3052                         )
3053                         .map_err(DeviceManagerError::VfioDmaMap)?;
3054                 }
3055             }
3056 
3057             let vfio_mapping = Arc::new(VfioDmaMapping::new(
3058                 Arc::clone(&vfio_container),
3059                 Arc::new(self.memory_manager.lock().unwrap().guest_memory()),
3060             ));
3061 
3062             for virtio_mem_device in self.virtio_mem_devices.iter() {
3063                 virtio_mem_device
3064                     .lock()
3065                     .unwrap()
3066                     .add_dma_mapping_handler(
3067                         VirtioMemMappingSource::Container,
3068                         vfio_mapping.clone(),
3069                     )
3070                     .map_err(DeviceManagerError::AddDmaMappingHandlerVirtioMem)?;
3071             }
3072         }
3073 
3074         let legacy_interrupt_group =
3075             if let Some(legacy_interrupt_manager) = &self.legacy_interrupt_manager {
3076                 Some(
3077                     legacy_interrupt_manager
3078                         .create_group(LegacyIrqGroupConfig {
3079                             irq: self.pci_segments[pci_segment_id as usize].pci_irq_slots
3080                                 [pci_device_bdf.device() as usize]
3081                                 as InterruptIndex,
3082                         })
3083                         .map_err(DeviceManagerError::CreateInterruptGroup)?,
3084                 )
3085             } else {
3086                 None
3087             };
3088 
3089         let memory_manager = self.memory_manager.clone();
3090 
3091         let vfio_pci_device = VfioPciDevice::new(
3092             vfio_name.clone(),
3093             &self.address_manager.vm,
3094             vfio_device,
3095             vfio_container,
3096             self.msi_interrupt_manager.clone(),
3097             legacy_interrupt_group,
3098             device_cfg.iommu,
3099             pci_device_bdf,
3100             self.restoring,
3101             Arc::new(move || memory_manager.lock().unwrap().allocate_memory_slot()),
3102         )
3103         .map_err(DeviceManagerError::VfioPciCreate)?;
3104 
3105         let vfio_pci_device = Arc::new(Mutex::new(vfio_pci_device));
3106 
3107         let new_resources = self.add_pci_device(
3108             vfio_pci_device.clone(),
3109             vfio_pci_device.clone(),
3110             pci_segment_id,
3111             pci_device_bdf,
3112             resources,
3113         )?;
3114 
3115         // When restoring a VM, the restore codepath will take care of mapping
3116         // the MMIO regions based on the information from the snapshot.
3117         if !self.restoring {
3118             vfio_pci_device
3119                 .lock()
3120                 .unwrap()
3121                 .map_mmio_regions()
3122                 .map_err(DeviceManagerError::VfioMapRegion)?;
3123         }
3124 
3125         let mut node = device_node!(vfio_name, vfio_pci_device);
3126 
3127         // Update the device tree with correct resource information.
3128         node.resources = new_resources;
3129         node.pci_bdf = Some(pci_device_bdf);
3130         node.pci_device_handle = Some(PciDeviceHandle::Vfio(vfio_pci_device));
3131 
3132         self.device_tree
3133             .lock()
3134             .unwrap()
3135             .insert(vfio_name.clone(), node);
3136 
3137         Ok((pci_device_bdf, vfio_name))
3138     }
3139 
3140     fn add_pci_device(
3141         &mut self,
3142         bus_device: Arc<Mutex<dyn BusDevice>>,
3143         pci_device: Arc<Mutex<dyn PciDevice>>,
3144         segment_id: u16,
3145         bdf: PciBdf,
3146         resources: Option<Vec<Resource>>,
3147     ) -> DeviceManagerResult<Vec<Resource>> {
3148         let bars = pci_device
3149             .lock()
3150             .unwrap()
3151             .allocate_bars(
3152                 &self.address_manager.allocator,
3153                 &mut self.pci_segments[segment_id as usize]
3154                     .allocator
3155                     .lock()
3156                     .unwrap(),
3157                 resources,
3158             )
3159             .map_err(DeviceManagerError::AllocateBars)?;
3160 
3161         let mut pci_bus = self.pci_segments[segment_id as usize]
3162             .pci_bus
3163             .lock()
3164             .unwrap();
3165 
3166         pci_bus
3167             .add_device(bdf.device() as u32, pci_device)
3168             .map_err(DeviceManagerError::AddPciDevice)?;
3169 
3170         self.bus_devices.push(Arc::clone(&bus_device));
3171 
3172         pci_bus
3173             .register_mapping(
3174                 bus_device,
3175                 #[cfg(target_arch = "x86_64")]
3176                 self.address_manager.io_bus.as_ref(),
3177                 self.address_manager.mmio_bus.as_ref(),
3178                 bars.clone(),
3179             )
3180             .map_err(DeviceManagerError::AddPciDevice)?;
3181 
3182         let mut new_resources = Vec::new();
3183         for bar in bars {
3184             new_resources.push(Resource::PciBar {
3185                 index: bar.idx(),
3186                 base: bar.addr(),
3187                 size: bar.size(),
3188                 type_: bar.region_type().into(),
3189                 prefetchable: bar.prefetchable().into(),
3190             });
3191         }
3192 
3193         Ok(new_resources)
3194     }
3195 
3196     fn add_vfio_devices(&mut self) -> DeviceManagerResult<Vec<PciBdf>> {
3197         let mut iommu_attached_device_ids = Vec::new();
3198         let mut devices = self.config.lock().unwrap().devices.clone();
3199 
3200         if let Some(device_list_cfg) = &mut devices {
3201             for device_cfg in device_list_cfg.iter_mut() {
3202                 let (device_id, _) = self.add_passthrough_device(device_cfg)?;
3203                 if device_cfg.iommu && self.iommu_device.is_some() {
3204                     iommu_attached_device_ids.push(device_id);
3205                 }
3206             }
3207         }
3208 
3209         // Update the list of devices
3210         self.config.lock().unwrap().devices = devices;
3211 
3212         Ok(iommu_attached_device_ids)
3213     }
3214 
3215     fn add_vfio_user_device(
3216         &mut self,
3217         device_cfg: &mut UserDeviceConfig,
3218     ) -> DeviceManagerResult<(PciBdf, String)> {
3219         let vfio_user_name = if let Some(id) = &device_cfg.id {
3220             id.clone()
3221         } else {
3222             let id = self.next_device_name(VFIO_USER_DEVICE_NAME_PREFIX)?;
3223             device_cfg.id = Some(id.clone());
3224             id
3225         };
3226 
3227         let (pci_segment_id, pci_device_bdf, resources) =
3228             self.pci_resources(&vfio_user_name, device_cfg.pci_segment)?;
3229 
3230         let legacy_interrupt_group =
3231             if let Some(legacy_interrupt_manager) = &self.legacy_interrupt_manager {
3232                 Some(
3233                     legacy_interrupt_manager
3234                         .create_group(LegacyIrqGroupConfig {
3235                             irq: self.pci_segments[pci_segment_id as usize].pci_irq_slots
3236                                 [pci_device_bdf.device() as usize]
3237                                 as InterruptIndex,
3238                         })
3239                         .map_err(DeviceManagerError::CreateInterruptGroup)?,
3240                 )
3241             } else {
3242                 None
3243             };
3244 
3245         let client = Arc::new(Mutex::new(
3246             vfio_user::Client::new(&device_cfg.socket)
3247                 .map_err(DeviceManagerError::VfioUserCreateClient)?,
3248         ));
3249 
3250         let memory_manager = self.memory_manager.clone();
3251 
3252         let mut vfio_user_pci_device = VfioUserPciDevice::new(
3253             vfio_user_name.clone(),
3254             &self.address_manager.vm,
3255             client.clone(),
3256             self.msi_interrupt_manager.clone(),
3257             legacy_interrupt_group,
3258             pci_device_bdf,
3259             self.restoring,
3260             Arc::new(move || memory_manager.lock().unwrap().allocate_memory_slot()),
3261         )
3262         .map_err(DeviceManagerError::VfioUserCreate)?;
3263 
3264         let memory = self.memory_manager.lock().unwrap().guest_memory();
3265         let vfio_user_mapping = Arc::new(VfioUserDmaMapping::new(client, Arc::new(memory)));
3266         for virtio_mem_device in self.virtio_mem_devices.iter() {
3267             virtio_mem_device
3268                 .lock()
3269                 .unwrap()
3270                 .add_dma_mapping_handler(
3271                     VirtioMemMappingSource::Device(pci_device_bdf.into()),
3272                     vfio_user_mapping.clone(),
3273                 )
3274                 .map_err(DeviceManagerError::AddDmaMappingHandlerVirtioMem)?;
3275         }
3276 
3277         for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() {
3278             for region in zone.regions() {
3279                 vfio_user_pci_device
3280                     .dma_map(region)
3281                     .map_err(DeviceManagerError::VfioUserDmaMap)?;
3282             }
3283         }
3284 
3285         let vfio_user_pci_device = Arc::new(Mutex::new(vfio_user_pci_device));
3286 
3287         let new_resources = self.add_pci_device(
3288             vfio_user_pci_device.clone(),
3289             vfio_user_pci_device.clone(),
3290             pci_segment_id,
3291             pci_device_bdf,
3292             resources,
3293         )?;
3294 
3295         // When restoring a VM, the restore codepath will take care of mapping
3296         // the MMIO regions based on the information from the snapshot.
3297         if !self.restoring {
3298             // Note it is required to call 'add_pci_device()' in advance to have the list of
3299             // mmio regions provisioned correctly
3300             vfio_user_pci_device
3301                 .lock()
3302                 .unwrap()
3303                 .map_mmio_regions()
3304                 .map_err(DeviceManagerError::VfioUserMapRegion)?;
3305         }
3306 
3307         let mut node = device_node!(vfio_user_name, vfio_user_pci_device);
3308 
3309         // Update the device tree with correct resource information.
3310         node.resources = new_resources;
3311         node.pci_bdf = Some(pci_device_bdf);
3312         node.pci_device_handle = Some(PciDeviceHandle::VfioUser(vfio_user_pci_device));
3313 
3314         self.device_tree
3315             .lock()
3316             .unwrap()
3317             .insert(vfio_user_name.clone(), node);
3318 
3319         Ok((pci_device_bdf, vfio_user_name))
3320     }
3321 
3322     fn add_user_devices(&mut self) -> DeviceManagerResult<Vec<PciBdf>> {
3323         let mut user_devices = self.config.lock().unwrap().user_devices.clone();
3324 
3325         if let Some(device_list_cfg) = &mut user_devices {
3326             for device_cfg in device_list_cfg.iter_mut() {
3327                 let (_device_id, _id) = self.add_vfio_user_device(device_cfg)?;
3328             }
3329         }
3330 
3331         // Update the list of devices
3332         self.config.lock().unwrap().user_devices = user_devices;
3333 
3334         Ok(vec![])
3335     }
3336 
3337     fn add_virtio_pci_device(
3338         &mut self,
3339         virtio_device: Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
3340         iommu_mapping: &Option<Arc<IommuMapping>>,
3341         virtio_device_id: String,
3342         pci_segment_id: u16,
3343         dma_handler: Option<Arc<dyn ExternalDmaMapping>>,
3344     ) -> DeviceManagerResult<PciBdf> {
3345         let id = format!("{}-{}", VIRTIO_PCI_DEVICE_NAME_PREFIX, virtio_device_id);
3346 
3347         // Add the new virtio-pci node to the device tree.
3348         let mut node = device_node!(id);
3349         node.children = vec![virtio_device_id.clone()];
3350 
3351         let (pci_segment_id, pci_device_bdf, resources) =
3352             self.pci_resources(&id, pci_segment_id)?;
3353 
3354         // Update the existing virtio node by setting the parent.
3355         if let Some(node) = self.device_tree.lock().unwrap().get_mut(&virtio_device_id) {
3356             node.parent = Some(id.clone());
3357         } else {
3358             return Err(DeviceManagerError::MissingNode);
3359         }
3360 
3361         // Allows support for one MSI-X vector per queue. It also adds 1
3362         // as we need to take into account the dedicated vector to notify
3363         // about a virtio config change.
3364         let msix_num = (virtio_device.lock().unwrap().queue_max_sizes().len() + 1) as u16;
3365 
3366         // Create the AccessPlatform trait from the implementation IommuMapping.
3367         // This will provide address translation for any virtio device sitting
3368         // behind a vIOMMU.
3369         let access_platform: Option<Arc<dyn AccessPlatform>> = if let Some(mapping) = iommu_mapping
3370         {
3371             Some(Arc::new(AccessPlatformMapping::new(
3372                 pci_device_bdf.into(),
3373                 mapping.clone(),
3374             )))
3375         } else {
3376             None
3377         };
3378 
3379         let memory = self.memory_manager.lock().unwrap().guest_memory();
3380 
3381         // Map DMA ranges if a DMA handler is available and if the device is
3382         // not attached to a virtual IOMMU.
3383         if let Some(dma_handler) = &dma_handler {
3384             if iommu_mapping.is_some() {
3385                 if let Some(iommu) = &self.iommu_device {
3386                     iommu
3387                         .lock()
3388                         .unwrap()
3389                         .add_external_mapping(pci_device_bdf.into(), dma_handler.clone());
3390                 } else {
3391                     return Err(DeviceManagerError::MissingVirtualIommu);
3392                 }
3393             } else {
3394                 // Let every virtio-mem device handle the DMA map/unmap through the
3395                 // DMA handler provided.
3396                 for virtio_mem_device in self.virtio_mem_devices.iter() {
3397                     virtio_mem_device
3398                         .lock()
3399                         .unwrap()
3400                         .add_dma_mapping_handler(
3401                             VirtioMemMappingSource::Device(pci_device_bdf.into()),
3402                             dma_handler.clone(),
3403                         )
3404                         .map_err(DeviceManagerError::AddDmaMappingHandlerVirtioMem)?;
3405                 }
3406 
3407                 // Do not register virtio-mem regions, as they are handled directly by
3408                 // virtio-mem devices.
3409                 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() {
3410                     for region in zone.regions() {
3411                         let gpa = region.start_addr().0;
3412                         let size = region.len();
3413                         dma_handler
3414                             .map(gpa, gpa, size)
3415                             .map_err(DeviceManagerError::VirtioDmaMap)?;
3416                     }
3417                 }
3418             }
3419         }
3420 
3421         let device_type = virtio_device.lock().unwrap().device_type();
3422         let virtio_pci_device = Arc::new(Mutex::new(
3423             VirtioPciDevice::new(
3424                 id.clone(),
3425                 memory,
3426                 virtio_device,
3427                 msix_num,
3428                 access_platform,
3429                 &self.msi_interrupt_manager,
3430                 pci_device_bdf.into(),
3431                 self.activate_evt
3432                     .try_clone()
3433                     .map_err(DeviceManagerError::EventFd)?,
3434                 // All device types *except* virtio block devices should be allocated a 64-bit bar
3435                 // The block devices should be given a 32-bit BAR so that they are easily accessible
3436                 // to firmware without requiring excessive identity mapping.
3437                 // The exception being if not on the default PCI segment.
3438                 pci_segment_id > 0 || device_type != VirtioDeviceType::Block as u32,
3439                 dma_handler,
3440                 self.pending_activations.clone(),
3441             )
3442             .map_err(DeviceManagerError::VirtioDevice)?,
3443         ));
3444 
3445         let new_resources = self.add_pci_device(
3446             virtio_pci_device.clone(),
3447             virtio_pci_device.clone(),
3448             pci_segment_id,
3449             pci_device_bdf,
3450             resources,
3451         )?;
3452 
3453         let bar_addr = virtio_pci_device.lock().unwrap().config_bar_addr();
3454         for (event, addr) in virtio_pci_device.lock().unwrap().ioeventfds(bar_addr) {
3455             let io_addr = IoEventAddress::Mmio(addr);
3456             self.address_manager
3457                 .vm
3458                 .register_ioevent(event, &io_addr, None)
3459                 .map_err(|e| DeviceManagerError::RegisterIoevent(e.into()))?;
3460         }
3461 
3462         // Update the device tree with correct resource information.
3463         node.resources = new_resources;
3464         node.migratable = Some(Arc::clone(&virtio_pci_device) as Arc<Mutex<dyn Migratable>>);
3465         node.pci_bdf = Some(pci_device_bdf);
3466         node.pci_device_handle = Some(PciDeviceHandle::Virtio(virtio_pci_device));
3467         self.device_tree.lock().unwrap().insert(id, node);
3468 
3469         Ok(pci_device_bdf)
3470     }
3471 
3472     fn pci_resources(
3473         &self,
3474         id: &str,
3475         pci_segment_id: u16,
3476     ) -> DeviceManagerResult<(u16, PciBdf, Option<Vec<Resource>>)> {
3477         // Look for the id in the device tree. If it can be found, that means
3478         // the device is being restored, otherwise it's created from scratch.
3479         Ok(
3480             if let Some(node) = self.device_tree.lock().unwrap().get(id) {
3481                 info!("Restoring virtio-pci {} resources", id);
3482                 let pci_device_bdf: PciBdf = node
3483                     .pci_bdf
3484                     .ok_or(DeviceManagerError::MissingDeviceNodePciBdf)?;
3485                 let pci_segment_id = pci_device_bdf.segment();
3486 
3487                 self.pci_segments[pci_segment_id as usize]
3488                     .pci_bus
3489                     .lock()
3490                     .unwrap()
3491                     .get_device_id(pci_device_bdf.device() as usize)
3492                     .map_err(DeviceManagerError::GetPciDeviceId)?;
3493 
3494                 (pci_segment_id, pci_device_bdf, Some(node.resources.clone()))
3495             } else {
3496                 let pci_device_bdf =
3497                     self.pci_segments[pci_segment_id as usize].next_device_bdf()?;
3498 
3499                 (pci_segment_id, pci_device_bdf, None)
3500             },
3501         )
3502     }
3503 
3504     #[cfg(target_arch = "x86_64")]
3505     pub fn io_bus(&self) -> &Arc<Bus> {
3506         &self.address_manager.io_bus
3507     }
3508 
3509     pub fn mmio_bus(&self) -> &Arc<Bus> {
3510         &self.address_manager.mmio_bus
3511     }
3512 
3513     pub fn allocator(&self) -> &Arc<Mutex<SystemAllocator>> {
3514         &self.address_manager.allocator
3515     }
3516 
3517     pub fn interrupt_controller(&self) -> Option<Arc<Mutex<dyn InterruptController>>> {
3518         self.interrupt_controller
3519             .as_ref()
3520             .map(|ic| ic.clone() as Arc<Mutex<dyn InterruptController>>)
3521     }
3522 
3523     #[cfg(target_arch = "x86_64")]
3524     // Used to provide a fast path for handling PIO exits
3525     pub fn pci_config_io(&self) -> Arc<Mutex<PciConfigIo>> {
3526         Arc::clone(self.pci_segments[0].pci_config_io.as_ref().unwrap())
3527     }
3528 
3529     pub(crate) fn pci_segments(&self) -> &Vec<PciSegment> {
3530         &self.pci_segments
3531     }
3532 
3533     pub fn console(&self) -> &Arc<Console> {
3534         &self.console
3535     }
3536 
3537     #[cfg(target_arch = "aarch64")]
3538     pub fn cmdline_additions(&self) -> &[String] {
3539         self.cmdline_additions.as_slice()
3540     }
3541 
3542     pub fn update_memory(&self, new_region: &Arc<GuestRegionMmap>) -> DeviceManagerResult<()> {
3543         for handle in self.virtio_devices.iter() {
3544             handle
3545                 .virtio_device
3546                 .lock()
3547                 .unwrap()
3548                 .add_memory_region(new_region)
3549                 .map_err(DeviceManagerError::UpdateMemoryForVirtioDevice)?;
3550 
3551             if let Some(dma_handler) = &handle.dma_handler {
3552                 if !handle.iommu {
3553                     let gpa = new_region.start_addr().0;
3554                     let size = new_region.len();
3555                     dma_handler
3556                         .map(gpa, gpa, size)
3557                         .map_err(DeviceManagerError::VirtioDmaMap)?;
3558                 }
3559             }
3560         }
3561 
3562         // Take care of updating the memory for VFIO PCI devices.
3563         if let Some(vfio_container) = &self.vfio_container {
3564             vfio_container
3565                 .vfio_dma_map(
3566                     new_region.start_addr().raw_value(),
3567                     new_region.len() as u64,
3568                     new_region.as_ptr() as u64,
3569                 )
3570                 .map_err(DeviceManagerError::UpdateMemoryForVfioPciDevice)?;
3571         }
3572 
3573         // Take care of updating the memory for vfio-user devices.
3574         {
3575             let device_tree = self.device_tree.lock().unwrap();
3576             for pci_device_node in device_tree.pci_devices() {
3577                 if let PciDeviceHandle::VfioUser(vfio_user_pci_device) = pci_device_node
3578                     .pci_device_handle
3579                     .as_ref()
3580                     .ok_or(DeviceManagerError::MissingPciDevice)?
3581                 {
3582                     vfio_user_pci_device
3583                         .lock()
3584                         .unwrap()
3585                         .dma_map(new_region)
3586                         .map_err(DeviceManagerError::UpdateMemoryForVfioUserPciDevice)?;
3587                 }
3588             }
3589         }
3590 
3591         Ok(())
3592     }
3593 
3594     pub fn activate_virtio_devices(&self) -> DeviceManagerResult<()> {
3595         for mut activator in self.pending_activations.lock().unwrap().drain(..) {
3596             activator
3597                 .activate()
3598                 .map_err(DeviceManagerError::VirtioActivate)?;
3599         }
3600         Ok(())
3601     }
3602 
3603     pub fn notify_hotplug(
3604         &self,
3605         _notification_type: AcpiNotificationFlags,
3606     ) -> DeviceManagerResult<()> {
3607         return self
3608             .ged_notification_device
3609             .as_ref()
3610             .unwrap()
3611             .lock()
3612             .unwrap()
3613             .notify(_notification_type)
3614             .map_err(DeviceManagerError::HotPlugNotification);
3615     }
3616 
3617     pub fn add_device(
3618         &mut self,
3619         device_cfg: &mut DeviceConfig,
3620     ) -> DeviceManagerResult<PciDeviceInfo> {
3621         self.validate_identifier(&device_cfg.id)?;
3622 
3623         if device_cfg.iommu && !self.is_iommu_segment(device_cfg.pci_segment) {
3624             return Err(DeviceManagerError::InvalidIommuHotplug);
3625         }
3626 
3627         let (bdf, device_name) = self.add_passthrough_device(device_cfg)?;
3628 
3629         // Update the PCIU bitmap
3630         self.pci_segments[device_cfg.pci_segment as usize].pci_devices_up |= 1 << bdf.device();
3631 
3632         Ok(PciDeviceInfo {
3633             id: device_name,
3634             bdf,
3635         })
3636     }
3637 
3638     pub fn add_user_device(
3639         &mut self,
3640         device_cfg: &mut UserDeviceConfig,
3641     ) -> DeviceManagerResult<PciDeviceInfo> {
3642         self.validate_identifier(&device_cfg.id)?;
3643 
3644         let (bdf, device_name) = self.add_vfio_user_device(device_cfg)?;
3645 
3646         // Update the PCIU bitmap
3647         self.pci_segments[device_cfg.pci_segment as usize].pci_devices_up |= 1 << bdf.device();
3648 
3649         Ok(PciDeviceInfo {
3650             id: device_name,
3651             bdf,
3652         })
3653     }
3654 
3655     pub fn remove_device(&mut self, id: String) -> DeviceManagerResult<()> {
3656         // The node can be directly a PCI node in case the 'id' refers to a
3657         // VFIO device or a virtio-pci one.
3658         // In case the 'id' refers to a virtio device, we must find the PCI
3659         // node by looking at the parent.
3660         let device_tree = self.device_tree.lock().unwrap();
3661         let node = device_tree
3662             .get(&id)
3663             .ok_or(DeviceManagerError::UnknownDeviceId(id))?;
3664 
3665         let pci_device_node = if node.pci_bdf.is_some() && node.pci_device_handle.is_some() {
3666             node
3667         } else {
3668             let parent = node
3669                 .parent
3670                 .as_ref()
3671                 .ok_or(DeviceManagerError::MissingNode)?;
3672             device_tree
3673                 .get(parent)
3674                 .ok_or(DeviceManagerError::MissingNode)?
3675         };
3676 
3677         let pci_device_bdf: PciBdf = pci_device_node
3678             .pci_bdf
3679             .ok_or(DeviceManagerError::MissingDeviceNodePciBdf)?;
3680         let pci_segment_id = pci_device_bdf.segment();
3681 
3682         let pci_device_handle = pci_device_node
3683             .pci_device_handle
3684             .as_ref()
3685             .ok_or(DeviceManagerError::MissingPciDevice)?;
3686         #[allow(irrefutable_let_patterns)]
3687         if let PciDeviceHandle::Virtio(virtio_pci_device) = pci_device_handle {
3688             let device_type = VirtioDeviceType::from(
3689                 virtio_pci_device
3690                     .lock()
3691                     .unwrap()
3692                     .virtio_device()
3693                     .lock()
3694                     .unwrap()
3695                     .device_type(),
3696             );
3697             match device_type {
3698                 VirtioDeviceType::Net
3699                 | VirtioDeviceType::Block
3700                 | VirtioDeviceType::Pmem
3701                 | VirtioDeviceType::Fs
3702                 | VirtioDeviceType::Vsock => {}
3703                 _ => return Err(DeviceManagerError::RemovalNotAllowed(device_type)),
3704             }
3705         }
3706 
3707         // Update the PCID bitmap
3708         self.pci_segments[pci_segment_id as usize].pci_devices_down |= 1 << pci_device_bdf.device();
3709 
3710         Ok(())
3711     }
3712 
3713     pub fn eject_device(&mut self, pci_segment_id: u16, device_id: u8) -> DeviceManagerResult<()> {
3714         info!(
3715             "Ejecting device_id = {} on segment_id={}",
3716             device_id, pci_segment_id
3717         );
3718 
3719         // Convert the device ID into the corresponding b/d/f.
3720         let pci_device_bdf = PciBdf::new(pci_segment_id, 0, device_id, 0);
3721 
3722         // Give the PCI device ID back to the PCI bus.
3723         self.pci_segments[pci_segment_id as usize]
3724             .pci_bus
3725             .lock()
3726             .unwrap()
3727             .put_device_id(device_id as usize)
3728             .map_err(DeviceManagerError::PutPciDeviceId)?;
3729 
3730         // Remove the device from the device tree along with its children.
3731         let mut device_tree = self.device_tree.lock().unwrap();
3732         let pci_device_node = device_tree
3733             .remove_node_by_pci_bdf(pci_device_bdf)
3734             .ok_or(DeviceManagerError::MissingPciDevice)?;
3735 
3736         // For VFIO and vfio-user the PCI device id is the id.
3737         // For virtio we overwrite it later as we want the id of the
3738         // underlying device.
3739         let mut id = pci_device_node.id;
3740         let pci_device_handle = pci_device_node
3741             .pci_device_handle
3742             .ok_or(DeviceManagerError::MissingPciDevice)?;
3743         if matches!(pci_device_handle, PciDeviceHandle::Virtio(_)) {
3744             // The virtio-pci device has a single child
3745             if !pci_device_node.children.is_empty() {
3746                 assert_eq!(pci_device_node.children.len(), 1);
3747                 let child_id = &pci_device_node.children[0];
3748                 id = child_id.clone();
3749             }
3750         }
3751         for child in pci_device_node.children.iter() {
3752             device_tree.remove(child);
3753         }
3754 
3755         let mut iommu_attached = false;
3756         if let Some((_, iommu_attached_devices)) = &self.iommu_attached_devices {
3757             if iommu_attached_devices.contains(&pci_device_bdf) {
3758                 iommu_attached = true;
3759             }
3760         }
3761 
3762         let (pci_device, bus_device, virtio_device, remove_dma_handler) = match pci_device_handle {
3763             // No need to remove any virtio-mem mapping here as the container outlives all devices
3764             PciDeviceHandle::Vfio(vfio_pci_device) => (
3765                 Arc::clone(&vfio_pci_device) as Arc<Mutex<dyn PciDevice>>,
3766                 Arc::clone(&vfio_pci_device) as Arc<Mutex<dyn BusDevice>>,
3767                 None as Option<Arc<Mutex<dyn virtio_devices::VirtioDevice>>>,
3768                 false,
3769             ),
3770             PciDeviceHandle::Virtio(virtio_pci_device) => {
3771                 let dev = virtio_pci_device.lock().unwrap();
3772                 let bar_addr = dev.config_bar_addr();
3773                 for (event, addr) in dev.ioeventfds(bar_addr) {
3774                     let io_addr = IoEventAddress::Mmio(addr);
3775                     self.address_manager
3776                         .vm
3777                         .unregister_ioevent(event, &io_addr)
3778                         .map_err(|e| DeviceManagerError::UnRegisterIoevent(e.into()))?;
3779                 }
3780 
3781                 if let Some(dma_handler) = dev.dma_handler() {
3782                     if !iommu_attached {
3783                         for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() {
3784                             for region in zone.regions() {
3785                                 let iova = region.start_addr().0;
3786                                 let size = region.len();
3787                                 dma_handler
3788                                     .unmap(iova, size)
3789                                     .map_err(DeviceManagerError::VirtioDmaUnmap)?;
3790                             }
3791                         }
3792                     }
3793                 }
3794 
3795                 (
3796                     Arc::clone(&virtio_pci_device) as Arc<Mutex<dyn PciDevice>>,
3797                     Arc::clone(&virtio_pci_device) as Arc<Mutex<dyn BusDevice>>,
3798                     Some(dev.virtio_device()),
3799                     dev.dma_handler().is_some() && !iommu_attached,
3800                 )
3801             }
3802             PciDeviceHandle::VfioUser(vfio_user_pci_device) => {
3803                 let mut dev = vfio_user_pci_device.lock().unwrap();
3804                 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() {
3805                     for region in zone.regions() {
3806                         dev.dma_unmap(region)
3807                             .map_err(DeviceManagerError::VfioUserDmaUnmap)?;
3808                     }
3809                 }
3810 
3811                 (
3812                     Arc::clone(&vfio_user_pci_device) as Arc<Mutex<dyn PciDevice>>,
3813                     Arc::clone(&vfio_user_pci_device) as Arc<Mutex<dyn BusDevice>>,
3814                     None as Option<Arc<Mutex<dyn virtio_devices::VirtioDevice>>>,
3815                     true,
3816                 )
3817             }
3818         };
3819 
3820         if remove_dma_handler {
3821             for virtio_mem_device in self.virtio_mem_devices.iter() {
3822                 virtio_mem_device
3823                     .lock()
3824                     .unwrap()
3825                     .remove_dma_mapping_handler(VirtioMemMappingSource::Device(
3826                         pci_device_bdf.into(),
3827                     ))
3828                     .map_err(DeviceManagerError::RemoveDmaMappingHandlerVirtioMem)?;
3829             }
3830         }
3831 
3832         // Free the allocated BARs
3833         pci_device
3834             .lock()
3835             .unwrap()
3836             .free_bars(
3837                 &mut self.address_manager.allocator.lock().unwrap(),
3838                 &mut self.pci_segments[pci_segment_id as usize]
3839                     .allocator
3840                     .lock()
3841                     .unwrap(),
3842             )
3843             .map_err(DeviceManagerError::FreePciBars)?;
3844 
3845         // Remove the device from the PCI bus
3846         self.pci_segments[pci_segment_id as usize]
3847             .pci_bus
3848             .lock()
3849             .unwrap()
3850             .remove_by_device(&pci_device)
3851             .map_err(DeviceManagerError::RemoveDeviceFromPciBus)?;
3852 
3853         #[cfg(target_arch = "x86_64")]
3854         // Remove the device from the IO bus
3855         self.io_bus()
3856             .remove_by_device(&bus_device)
3857             .map_err(DeviceManagerError::RemoveDeviceFromIoBus)?;
3858 
3859         // Remove the device from the MMIO bus
3860         self.mmio_bus()
3861             .remove_by_device(&bus_device)
3862             .map_err(DeviceManagerError::RemoveDeviceFromMmioBus)?;
3863 
3864         // Remove the device from the list of BusDevice held by the
3865         // DeviceManager.
3866         self.bus_devices
3867             .retain(|dev| !Arc::ptr_eq(dev, &bus_device));
3868 
3869         // Shutdown and remove the underlying virtio-device if present
3870         if let Some(virtio_device) = virtio_device {
3871             for mapping in virtio_device.lock().unwrap().userspace_mappings() {
3872                 self.memory_manager
3873                     .lock()
3874                     .unwrap()
3875                     .remove_userspace_mapping(
3876                         mapping.addr.raw_value(),
3877                         mapping.len,
3878                         mapping.host_addr,
3879                         mapping.mergeable,
3880                         mapping.mem_slot,
3881                     )
3882                     .map_err(DeviceManagerError::MemoryManager)?;
3883             }
3884 
3885             virtio_device.lock().unwrap().shutdown();
3886 
3887             self.virtio_devices
3888                 .retain(|handler| !Arc::ptr_eq(&handler.virtio_device, &virtio_device));
3889         }
3890 
3891         event!(
3892             "vm",
3893             "device-removed",
3894             "id",
3895             &id,
3896             "bdf",
3897             pci_device_bdf.to_string()
3898         );
3899 
3900         // At this point, the device has been removed from all the list and
3901         // buses where it was stored. At the end of this function, after
3902         // any_device, bus_device and pci_device are released, the actual
3903         // device will be dropped.
3904         Ok(())
3905     }
3906 
3907     fn hotplug_virtio_pci_device(
3908         &mut self,
3909         handle: MetaVirtioDevice,
3910     ) -> DeviceManagerResult<PciDeviceInfo> {
3911         // Add the virtio device to the device manager list. This is important
3912         // as the list is used to notify virtio devices about memory updates
3913         // for instance.
3914         self.virtio_devices.push(handle.clone());
3915 
3916         let mapping: Option<Arc<IommuMapping>> = if handle.iommu {
3917             self.iommu_mapping.clone()
3918         } else {
3919             None
3920         };
3921 
3922         let bdf = self.add_virtio_pci_device(
3923             handle.virtio_device,
3924             &mapping,
3925             handle.id.clone(),
3926             handle.pci_segment,
3927             handle.dma_handler,
3928         )?;
3929 
3930         // Update the PCIU bitmap
3931         self.pci_segments[handle.pci_segment as usize].pci_devices_up |= 1 << bdf.device();
3932 
3933         Ok(PciDeviceInfo { id: handle.id, bdf })
3934     }
3935 
3936     fn is_iommu_segment(&self, pci_segment_id: u16) -> bool {
3937         self.config
3938             .lock()
3939             .as_ref()
3940             .unwrap()
3941             .platform
3942             .as_ref()
3943             .map(|pc| {
3944                 pc.iommu_segments
3945                     .as_ref()
3946                     .map(|v| v.contains(&pci_segment_id))
3947                     .unwrap_or_default()
3948             })
3949             .unwrap_or_default()
3950     }
3951 
3952     pub fn add_disk(&mut self, disk_cfg: &mut DiskConfig) -> DeviceManagerResult<PciDeviceInfo> {
3953         self.validate_identifier(&disk_cfg.id)?;
3954 
3955         if disk_cfg.iommu && !self.is_iommu_segment(disk_cfg.pci_segment) {
3956             return Err(DeviceManagerError::InvalidIommuHotplug);
3957         }
3958 
3959         let device = self.make_virtio_block_device(disk_cfg)?;
3960         self.hotplug_virtio_pci_device(device)
3961     }
3962 
3963     pub fn add_fs(&mut self, fs_cfg: &mut FsConfig) -> DeviceManagerResult<PciDeviceInfo> {
3964         self.validate_identifier(&fs_cfg.id)?;
3965 
3966         let device = self.make_virtio_fs_device(fs_cfg)?;
3967         self.hotplug_virtio_pci_device(device)
3968     }
3969 
3970     pub fn add_pmem(&mut self, pmem_cfg: &mut PmemConfig) -> DeviceManagerResult<PciDeviceInfo> {
3971         self.validate_identifier(&pmem_cfg.id)?;
3972 
3973         if pmem_cfg.iommu && !self.is_iommu_segment(pmem_cfg.pci_segment) {
3974             return Err(DeviceManagerError::InvalidIommuHotplug);
3975         }
3976 
3977         let device = self.make_virtio_pmem_device(pmem_cfg)?;
3978         self.hotplug_virtio_pci_device(device)
3979     }
3980 
3981     pub fn add_net(&mut self, net_cfg: &mut NetConfig) -> DeviceManagerResult<PciDeviceInfo> {
3982         self.validate_identifier(&net_cfg.id)?;
3983 
3984         if net_cfg.iommu && !self.is_iommu_segment(net_cfg.pci_segment) {
3985             return Err(DeviceManagerError::InvalidIommuHotplug);
3986         }
3987 
3988         let device = self.make_virtio_net_device(net_cfg)?;
3989         self.hotplug_virtio_pci_device(device)
3990     }
3991 
3992     pub fn add_vdpa(&mut self, vdpa_cfg: &mut VdpaConfig) -> DeviceManagerResult<PciDeviceInfo> {
3993         self.validate_identifier(&vdpa_cfg.id)?;
3994 
3995         if vdpa_cfg.iommu && !self.is_iommu_segment(vdpa_cfg.pci_segment) {
3996             return Err(DeviceManagerError::InvalidIommuHotplug);
3997         }
3998 
3999         let device = self.make_vdpa_device(vdpa_cfg)?;
4000         self.hotplug_virtio_pci_device(device)
4001     }
4002 
4003     pub fn add_vsock(&mut self, vsock_cfg: &mut VsockConfig) -> DeviceManagerResult<PciDeviceInfo> {
4004         self.validate_identifier(&vsock_cfg.id)?;
4005 
4006         if vsock_cfg.iommu && !self.is_iommu_segment(vsock_cfg.pci_segment) {
4007             return Err(DeviceManagerError::InvalidIommuHotplug);
4008         }
4009 
4010         let device = self.make_virtio_vsock_device(vsock_cfg)?;
4011         self.hotplug_virtio_pci_device(device)
4012     }
4013 
4014     pub fn counters(&self) -> HashMap<String, HashMap<&'static str, Wrapping<u64>>> {
4015         let mut counters = HashMap::new();
4016 
4017         for handle in &self.virtio_devices {
4018             let virtio_device = handle.virtio_device.lock().unwrap();
4019             if let Some(device_counters) = virtio_device.counters() {
4020                 counters.insert(handle.id.clone(), device_counters.clone());
4021             }
4022         }
4023 
4024         counters
4025     }
4026 
4027     pub fn resize_balloon(&mut self, size: u64) -> DeviceManagerResult<()> {
4028         if let Some(balloon) = &self.balloon {
4029             return balloon
4030                 .lock()
4031                 .unwrap()
4032                 .resize(size)
4033                 .map_err(DeviceManagerError::VirtioBalloonResize);
4034         }
4035 
4036         warn!("No balloon setup: Can't resize the balloon");
4037         Err(DeviceManagerError::MissingVirtioBalloon)
4038     }
4039 
4040     pub fn balloon_size(&self) -> u64 {
4041         if let Some(balloon) = &self.balloon {
4042             return balloon.lock().unwrap().get_actual();
4043         }
4044 
4045         0
4046     }
4047 
4048     pub fn device_tree(&self) -> Arc<Mutex<DeviceTree>> {
4049         self.device_tree.clone()
4050     }
4051 
4052     pub fn restore_devices(
4053         &mut self,
4054         snapshot: Snapshot,
4055     ) -> std::result::Result<(), MigratableError> {
4056         // Finally, restore all devices associated with the DeviceManager.
4057         // It's important to restore devices in the right order, that's why
4058         // the device tree is the right way to ensure we restore a child before
4059         // its parent node.
4060         for node in self
4061             .device_tree
4062             .lock()
4063             .unwrap()
4064             .breadth_first_traversal()
4065             .rev()
4066         {
4067             // Restore the node
4068             if let Some(migratable) = &node.migratable {
4069                 info!("Restoring {} from DeviceManager", node.id);
4070                 if let Some(snapshot) = snapshot.snapshots.get(&node.id) {
4071                     migratable.lock().unwrap().pause()?;
4072                     migratable.lock().unwrap().restore(*snapshot.clone())?;
4073                 } else {
4074                     return Err(MigratableError::Restore(anyhow!(
4075                         "Missing device {}",
4076                         node.id
4077                     )));
4078                 }
4079             }
4080         }
4081 
4082         // The devices have been fully restored, we can now update the
4083         // restoring state of the DeviceManager.
4084         self.restoring = false;
4085 
4086         Ok(())
4087     }
4088 
4089     #[cfg(target_arch = "x86_64")]
4090     pub fn notify_power_button(&self) -> DeviceManagerResult<()> {
4091         self.ged_notification_device
4092             .as_ref()
4093             .unwrap()
4094             .lock()
4095             .unwrap()
4096             .notify(AcpiNotificationFlags::POWER_BUTTON_CHANGED)
4097             .map_err(DeviceManagerError::PowerButtonNotification)
4098     }
4099 
4100     #[cfg(target_arch = "aarch64")]
4101     pub fn notify_power_button(&self) -> DeviceManagerResult<()> {
4102         // There are two use cases:
4103         // 1. Users will use direct kernel boot with device tree.
4104         // 2. Users will use ACPI+UEFI boot.
4105 
4106         // Trigger a GPIO pin 3 event to satisify use case 1.
4107         self.gpio_device
4108             .as_ref()
4109             .unwrap()
4110             .lock()
4111             .unwrap()
4112             .trigger_key(3)
4113             .map_err(DeviceManagerError::AArch64PowerButtonNotification)?;
4114         // Trigger a GED power button event to satisify use case 2.
4115         return self
4116             .ged_notification_device
4117             .as_ref()
4118             .unwrap()
4119             .lock()
4120             .unwrap()
4121             .notify(AcpiNotificationFlags::POWER_BUTTON_CHANGED)
4122             .map_err(DeviceManagerError::PowerButtonNotification);
4123     }
4124 
4125     pub fn iommu_attached_devices(&self) -> &Option<(PciBdf, Vec<PciBdf>)> {
4126         &self.iommu_attached_devices
4127     }
4128 
4129     #[cfg(target_arch = "aarch64")]
4130     pub fn uefi_flash(&self) -> GuestMemoryAtomic<GuestMemoryMmap> {
4131         self.uefi_flash.as_ref().unwrap().clone()
4132     }
4133 
4134     fn validate_identifier(&self, id: &Option<String>) -> DeviceManagerResult<()> {
4135         if let Some(id) = id {
4136             if id.starts_with("__") {
4137                 return Err(DeviceManagerError::InvalidIdentifier(id.clone()));
4138             }
4139 
4140             if self.device_tree.lock().unwrap().contains_key(id) {
4141                 return Err(DeviceManagerError::IdentifierNotUnique(id.clone()));
4142             }
4143         }
4144 
4145         Ok(())
4146     }
4147 
4148     pub(crate) fn acpi_platform_addresses(&self) -> &AcpiPlatformAddresses {
4149         &self.acpi_platform_addresses
4150     }
4151 }
4152 
4153 fn numa_node_id_from_memory_zone_id(numa_nodes: &NumaNodes, memory_zone_id: &str) -> Option<u32> {
4154     for (numa_node_id, numa_node) in numa_nodes.iter() {
4155         if numa_node.memory_zones.contains(&memory_zone_id.to_owned()) {
4156             return Some(*numa_node_id);
4157         }
4158     }
4159 
4160     None
4161 }
4162 
4163 impl Aml for DeviceManager {
4164     fn append_aml_bytes(&self, bytes: &mut Vec<u8>) {
4165         #[cfg(target_arch = "aarch64")]
4166         use arch::aarch64::DeviceInfoForFdt;
4167 
4168         let mut pci_scan_methods = Vec::new();
4169         for i in 0..self.pci_segments.len() {
4170             pci_scan_methods.push(aml::MethodCall::new(
4171                 format!("\\_SB_.PCI{:X}.PCNT", i).as_str().into(),
4172                 vec![],
4173             ));
4174         }
4175         let mut pci_scan_inner: Vec<&dyn Aml> = Vec::new();
4176         for method in &pci_scan_methods {
4177             pci_scan_inner.push(method)
4178         }
4179 
4180         // PCI hotplug controller
4181         aml::Device::new(
4182             "_SB_.PHPR".into(),
4183             vec![
4184                 &aml::Name::new("_HID".into(), &aml::EisaName::new("PNP0A06")),
4185                 &aml::Name::new("_STA".into(), &0x0bu8),
4186                 &aml::Name::new("_UID".into(), &"PCI Hotplug Controller"),
4187                 &aml::Mutex::new("BLCK".into(), 0),
4188                 &aml::Name::new(
4189                     "_CRS".into(),
4190                     &aml::ResourceTemplate::new(vec![&aml::AddressSpace::new_memory(
4191                         aml::AddressSpaceCachable::NotCacheable,
4192                         true,
4193                         self.acpi_address.0 as u64,
4194                         self.acpi_address.0 + DEVICE_MANAGER_ACPI_SIZE as u64 - 1,
4195                     )]),
4196                 ),
4197                 // OpRegion and Fields map MMIO range into individual field values
4198                 &aml::OpRegion::new(
4199                     "PCST".into(),
4200                     aml::OpRegionSpace::SystemMemory,
4201                     self.acpi_address.0 as usize,
4202                     DEVICE_MANAGER_ACPI_SIZE,
4203                 ),
4204                 &aml::Field::new(
4205                     "PCST".into(),
4206                     aml::FieldAccessType::DWord,
4207                     aml::FieldUpdateRule::WriteAsZeroes,
4208                     vec![
4209                         aml::FieldEntry::Named(*b"PCIU", 32),
4210                         aml::FieldEntry::Named(*b"PCID", 32),
4211                         aml::FieldEntry::Named(*b"B0EJ", 32),
4212                         aml::FieldEntry::Named(*b"PSEG", 32),
4213                     ],
4214                 ),
4215                 &aml::Method::new(
4216                     "PCEJ".into(),
4217                     2,
4218                     true,
4219                     vec![
4220                         // Take lock defined above
4221                         &aml::Acquire::new("BLCK".into(), 0xffff),
4222                         // Choose the current segment
4223                         &aml::Store::new(&aml::Path::new("PSEG"), &aml::Arg(1)),
4224                         // Write PCI bus number (in first argument) to I/O port via field
4225                         &aml::ShiftLeft::new(&aml::Path::new("B0EJ"), &aml::ONE, &aml::Arg(0)),
4226                         // Release lock
4227                         &aml::Release::new("BLCK".into()),
4228                         // Return 0
4229                         &aml::Return::new(&aml::ZERO),
4230                     ],
4231                 ),
4232                 &aml::Method::new("PSCN".into(), 0, true, pci_scan_inner),
4233             ],
4234         )
4235         .append_aml_bytes(bytes);
4236 
4237         for segment in &self.pci_segments {
4238             segment.append_aml_bytes(bytes);
4239         }
4240 
4241         let mut mbrd_memory = Vec::new();
4242 
4243         for segment in &self.pci_segments {
4244             mbrd_memory.push(aml::Memory32Fixed::new(
4245                 true,
4246                 segment.mmio_config_address as u32,
4247                 layout::PCI_MMIO_CONFIG_SIZE_PER_SEGMENT as u32,
4248             ))
4249         }
4250 
4251         let mut mbrd_memory_refs = Vec::new();
4252         for mbrd_memory_ref in &mbrd_memory {
4253             mbrd_memory_refs.push(mbrd_memory_ref as &dyn Aml);
4254         }
4255 
4256         aml::Device::new(
4257             "_SB_.MBRD".into(),
4258             vec![
4259                 &aml::Name::new("_HID".into(), &aml::EisaName::new("PNP0C02")),
4260                 &aml::Name::new("_UID".into(), &aml::ZERO),
4261                 &aml::Name::new("_CRS".into(), &aml::ResourceTemplate::new(mbrd_memory_refs)),
4262             ],
4263         )
4264         .append_aml_bytes(bytes);
4265 
4266         // Serial device
4267         #[cfg(target_arch = "x86_64")]
4268         let serial_irq = 4;
4269         #[cfg(target_arch = "aarch64")]
4270         let serial_irq =
4271             if self.config.lock().unwrap().serial.clone().mode != ConsoleOutputMode::Off {
4272                 self.get_device_info()
4273                     .clone()
4274                     .get(&(DeviceType::Serial, DeviceType::Serial.to_string()))
4275                     .unwrap()
4276                     .irq()
4277             } else {
4278                 // If serial is turned off, add a fake device with invalid irq.
4279                 31
4280             };
4281         if self.config.lock().unwrap().serial.mode != ConsoleOutputMode::Off {
4282             aml::Device::new(
4283                 "_SB_.COM1".into(),
4284                 vec![
4285                     &aml::Name::new(
4286                         "_HID".into(),
4287                         #[cfg(target_arch = "x86_64")]
4288                         &aml::EisaName::new("PNP0501"),
4289                         #[cfg(target_arch = "aarch64")]
4290                         &"ARMH0011",
4291                     ),
4292                     &aml::Name::new("_UID".into(), &aml::ZERO),
4293                     &aml::Name::new("_DDN".into(), &"COM1"),
4294                     &aml::Name::new(
4295                         "_CRS".into(),
4296                         &aml::ResourceTemplate::new(vec![
4297                             &aml::Interrupt::new(true, true, false, false, serial_irq),
4298                             #[cfg(target_arch = "x86_64")]
4299                             &aml::Io::new(0x3f8, 0x3f8, 0, 0x8),
4300                             #[cfg(target_arch = "aarch64")]
4301                             &aml::Memory32Fixed::new(
4302                                 true,
4303                                 arch::layout::LEGACY_SERIAL_MAPPED_IO_START.raw_value() as u32,
4304                                 MMIO_LEN as u32,
4305                             ),
4306                         ]),
4307                     ),
4308                 ],
4309             )
4310             .append_aml_bytes(bytes);
4311         }
4312 
4313         aml::Name::new("_S5_".into(), &aml::Package::new(vec![&5u8])).append_aml_bytes(bytes);
4314 
4315         aml::Device::new(
4316             "_SB_.PWRB".into(),
4317             vec![
4318                 &aml::Name::new("_HID".into(), &aml::EisaName::new("PNP0C0C")),
4319                 &aml::Name::new("_UID".into(), &aml::ZERO),
4320             ],
4321         )
4322         .append_aml_bytes(bytes);
4323 
4324         self.ged_notification_device
4325             .as_ref()
4326             .unwrap()
4327             .lock()
4328             .unwrap()
4329             .append_aml_bytes(bytes);
4330     }
4331 }
4332 
4333 impl Pausable for DeviceManager {
4334     fn pause(&mut self) -> result::Result<(), MigratableError> {
4335         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
4336             if let Some(migratable) = &device_node.migratable {
4337                 migratable.lock().unwrap().pause()?;
4338             }
4339         }
4340         // On AArch64, the pause of device manager needs to trigger
4341         // a "pause" of GIC, which will flush the GIC pending tables
4342         // and ITS tables to guest RAM.
4343         #[cfg(target_arch = "aarch64")]
4344         {
4345             self.get_interrupt_controller()
4346                 .unwrap()
4347                 .lock()
4348                 .unwrap()
4349                 .pause()?;
4350         };
4351 
4352         Ok(())
4353     }
4354 
4355     fn resume(&mut self) -> result::Result<(), MigratableError> {
4356         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
4357             if let Some(migratable) = &device_node.migratable {
4358                 migratable.lock().unwrap().resume()?;
4359             }
4360         }
4361 
4362         Ok(())
4363     }
4364 }
4365 
4366 impl Snapshottable for DeviceManager {
4367     fn id(&self) -> String {
4368         DEVICE_MANAGER_SNAPSHOT_ID.to_string()
4369     }
4370 
4371     fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> {
4372         let mut snapshot = Snapshot::new(DEVICE_MANAGER_SNAPSHOT_ID);
4373 
4374         // We aggregate all devices snapshots.
4375         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
4376             if let Some(migratable) = &device_node.migratable {
4377                 let device_snapshot = migratable.lock().unwrap().snapshot()?;
4378                 snapshot.add_snapshot(device_snapshot);
4379             }
4380         }
4381 
4382         // Then we store the DeviceManager state.
4383         snapshot.add_data_section(SnapshotDataSection::new_from_state(
4384             DEVICE_MANAGER_SNAPSHOT_ID,
4385             &self.state(),
4386         )?);
4387 
4388         Ok(snapshot)
4389     }
4390 
4391     fn restore(&mut self, snapshot: Snapshot) -> std::result::Result<(), MigratableError> {
4392         // Let's first restore the DeviceManager.
4393 
4394         self.set_state(&snapshot.to_state(DEVICE_MANAGER_SNAPSHOT_ID)?);
4395 
4396         // Now that DeviceManager is updated with the right states, it's time
4397         // to create the devices based on the configuration.
4398         self.create_devices(None, None, None)
4399             .map_err(|e| MigratableError::Restore(anyhow!("Could not create devices {:?}", e)))?;
4400 
4401         Ok(())
4402     }
4403 }
4404 
4405 impl Transportable for DeviceManager {}
4406 
4407 impl Migratable for DeviceManager {
4408     fn start_dirty_log(&mut self) -> std::result::Result<(), MigratableError> {
4409         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
4410             if let Some(migratable) = &device_node.migratable {
4411                 migratable.lock().unwrap().start_dirty_log()?;
4412             }
4413         }
4414         Ok(())
4415     }
4416 
4417     fn stop_dirty_log(&mut self) -> std::result::Result<(), MigratableError> {
4418         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
4419             if let Some(migratable) = &device_node.migratable {
4420                 migratable.lock().unwrap().stop_dirty_log()?;
4421             }
4422         }
4423         Ok(())
4424     }
4425 
4426     fn dirty_log(&mut self) -> std::result::Result<MemoryRangeTable, MigratableError> {
4427         let mut tables = Vec::new();
4428         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
4429             if let Some(migratable) = &device_node.migratable {
4430                 tables.push(migratable.lock().unwrap().dirty_log()?);
4431             }
4432         }
4433         Ok(MemoryRangeTable::new_from_tables(tables))
4434     }
4435 
4436     fn start_migration(&mut self) -> std::result::Result<(), MigratableError> {
4437         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
4438             if let Some(migratable) = &device_node.migratable {
4439                 migratable.lock().unwrap().start_migration()?;
4440             }
4441         }
4442         Ok(())
4443     }
4444 
4445     fn complete_migration(&mut self) -> std::result::Result<(), MigratableError> {
4446         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
4447             if let Some(migratable) = &device_node.migratable {
4448                 migratable.lock().unwrap().complete_migration()?;
4449             }
4450         }
4451         Ok(())
4452     }
4453 }
4454 
4455 const PCIU_FIELD_OFFSET: u64 = 0;
4456 const PCID_FIELD_OFFSET: u64 = 4;
4457 const B0EJ_FIELD_OFFSET: u64 = 8;
4458 const PSEG_FIELD_OFFSET: u64 = 12;
4459 const PCIU_FIELD_SIZE: usize = 4;
4460 const PCID_FIELD_SIZE: usize = 4;
4461 const B0EJ_FIELD_SIZE: usize = 4;
4462 const PSEG_FIELD_SIZE: usize = 4;
4463 
4464 impl BusDevice for DeviceManager {
4465     fn read(&mut self, base: u64, offset: u64, data: &mut [u8]) {
4466         match offset {
4467             PCIU_FIELD_OFFSET => {
4468                 assert!(data.len() == PCIU_FIELD_SIZE);
4469                 data.copy_from_slice(
4470                     &self.pci_segments[self.selected_segment]
4471                         .pci_devices_up
4472                         .to_le_bytes(),
4473                 );
4474                 // Clear the PCIU bitmap
4475                 self.pci_segments[self.selected_segment].pci_devices_up = 0;
4476             }
4477             PCID_FIELD_OFFSET => {
4478                 assert!(data.len() == PCID_FIELD_SIZE);
4479                 data.copy_from_slice(
4480                     &self.pci_segments[self.selected_segment]
4481                         .pci_devices_down
4482                         .to_le_bytes(),
4483                 );
4484                 // Clear the PCID bitmap
4485                 self.pci_segments[self.selected_segment].pci_devices_down = 0;
4486             }
4487             B0EJ_FIELD_OFFSET => {
4488                 assert!(data.len() == B0EJ_FIELD_SIZE);
4489                 // Always return an empty bitmap since the eject is always
4490                 // taken care of right away during a write access.
4491                 data.fill(0);
4492             }
4493             PSEG_FIELD_OFFSET => {
4494                 assert_eq!(data.len(), PSEG_FIELD_SIZE);
4495                 data.copy_from_slice(&(self.selected_segment as u32).to_le_bytes());
4496             }
4497             _ => error!(
4498                 "Accessing unknown location at base 0x{:x}, offset 0x{:x}",
4499                 base, offset
4500             ),
4501         }
4502 
4503         debug!(
4504             "PCI_HP_REG_R: base 0x{:x}, offset 0x{:x}, data {:?}",
4505             base, offset, data
4506         )
4507     }
4508 
4509     fn write(&mut self, base: u64, offset: u64, data: &[u8]) -> Option<Arc<std::sync::Barrier>> {
4510         match offset {
4511             B0EJ_FIELD_OFFSET => {
4512                 assert!(data.len() == B0EJ_FIELD_SIZE);
4513                 let mut data_array: [u8; 4] = [0, 0, 0, 0];
4514                 data_array.copy_from_slice(data);
4515                 let mut slot_bitmap = u32::from_le_bytes(data_array);
4516 
4517                 while slot_bitmap > 0 {
4518                     let slot_id = slot_bitmap.trailing_zeros();
4519                     if let Err(e) = self.eject_device(self.selected_segment as u16, slot_id as u8) {
4520                         error!("Failed ejecting device {}: {:?}", slot_id, e);
4521                     }
4522                     slot_bitmap &= !(1 << slot_id);
4523                 }
4524             }
4525             PSEG_FIELD_OFFSET => {
4526                 assert_eq!(data.len(), PSEG_FIELD_SIZE);
4527                 let mut data_array: [u8; 4] = [0, 0, 0, 0];
4528                 data_array.copy_from_slice(data);
4529                 let selected_segment = u32::from_le_bytes(data_array) as usize;
4530                 if selected_segment >= self.pci_segments.len() {
4531                     error!(
4532                         "Segment selection out of range: {} >= {}",
4533                         selected_segment,
4534                         self.pci_segments.len()
4535                     );
4536                     return None;
4537                 }
4538                 self.selected_segment = selected_segment;
4539             }
4540             _ => error!(
4541                 "Accessing unknown location at base 0x{:x}, offset 0x{:x}",
4542                 base, offset
4543             ),
4544         }
4545 
4546         debug!(
4547             "PCI_HP_REG_W: base 0x{:x}, offset 0x{:x}, data {:?}",
4548             base, offset, data
4549         );
4550 
4551         None
4552     }
4553 }
4554 
4555 impl Drop for DeviceManager {
4556     fn drop(&mut self) {
4557         for handle in self.virtio_devices.drain(..) {
4558             handle.virtio_device.lock().unwrap().shutdown();
4559         }
4560     }
4561 }
4562