xref: /cloud-hypervisor/vmm/src/device_manager.rs (revision 6f8bd27cf7629733582d930519e98d19e90afb16)
1 // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
2 //
3 // Portions Copyright 2017 The Chromium OS Authors. All rights reserved.
4 // Use of this source code is governed by a BSD-style license that can be
5 // found in the LICENSE-BSD-3-Clause file.
6 //
7 // Copyright © 2019 Intel Corporation
8 //
9 // SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause
10 //
11 
12 use crate::config::{
13     ConsoleOutputMode, DeviceConfig, DiskConfig, FsConfig, NetConfig, PmemConfig, UserDeviceConfig,
14     VdpaConfig, VhostMode, VmConfig, VsockConfig,
15 };
16 use crate::cpu::{CpuManager, CPU_MANAGER_ACPI_SIZE};
17 use crate::device_tree::{DeviceNode, DeviceTree};
18 use crate::interrupt::LegacyUserspaceInterruptManager;
19 use crate::interrupt::MsiInterruptManager;
20 use crate::memory_manager::{Error as MemoryManagerError, MemoryManager, MEMORY_MANAGER_ACPI_SIZE};
21 use crate::pci_segment::PciSegment;
22 use crate::seccomp_filters::{get_seccomp_filter, Thread};
23 use crate::serial_manager::{Error as SerialManagerError, SerialManager};
24 use crate::sigwinch_listener::start_sigwinch_listener;
25 use crate::GuestRegionMmap;
26 use crate::PciDeviceInfo;
27 use crate::{device_node, DEVICE_MANAGER_SNAPSHOT_ID};
28 use acpi_tables::sdt::GenericAddress;
29 use acpi_tables::{aml, aml::Aml};
30 use anyhow::anyhow;
31 use arch::layout;
32 #[cfg(target_arch = "x86_64")]
33 use arch::layout::{APIC_START, IOAPIC_SIZE, IOAPIC_START};
34 use arch::NumaNodes;
35 #[cfg(target_arch = "aarch64")]
36 use arch::{DeviceType, MmioDeviceInfo};
37 use block_util::{
38     async_io::DiskFile, block_io_uring_is_supported, detect_image_type,
39     fixed_vhd_async::FixedVhdDiskAsync, fixed_vhd_sync::FixedVhdDiskSync, qcow_sync::QcowDiskSync,
40     raw_async::RawFileDisk, raw_sync::RawFileDiskSync, vhdx_sync::VhdxDiskSync, ImageType,
41 };
42 #[cfg(target_arch = "aarch64")]
43 use devices::gic;
44 #[cfg(target_arch = "x86_64")]
45 use devices::ioapic;
46 #[cfg(target_arch = "aarch64")]
47 use devices::legacy::Pl011;
48 #[cfg(target_arch = "x86_64")]
49 use devices::legacy::Serial;
50 use devices::{
51     interrupt_controller, interrupt_controller::InterruptController, AcpiNotificationFlags,
52 };
53 use hypervisor::{HypervisorType, IoEventAddress};
54 use libc::{
55     cfmakeraw, isatty, tcgetattr, tcsetattr, termios, MAP_NORESERVE, MAP_PRIVATE, MAP_SHARED,
56     O_TMPFILE, PROT_READ, PROT_WRITE, TCSANOW,
57 };
58 use pci::{
59     DeviceRelocation, PciBarRegionType, PciBdf, PciDevice, VfioPciDevice, VfioUserDmaMapping,
60     VfioUserPciDevice, VfioUserPciDeviceError,
61 };
62 use seccompiler::SeccompAction;
63 use serde::{Deserialize, Serialize};
64 use std::collections::{BTreeSet, HashMap};
65 use std::convert::TryInto;
66 use std::fs::{read_link, File, OpenOptions};
67 use std::io::{self, stdout, Seek, SeekFrom};
68 use std::mem::zeroed;
69 use std::num::Wrapping;
70 use std::os::unix::fs::OpenOptionsExt;
71 use std::os::unix::io::{AsRawFd, FromRawFd, RawFd};
72 use std::path::PathBuf;
73 use std::result;
74 use std::sync::{Arc, Mutex};
75 use std::time::Instant;
76 use tracer::trace_scoped;
77 use vfio_ioctls::{VfioContainer, VfioDevice, VfioDeviceFd};
78 use virtio_devices::transport::VirtioTransport;
79 use virtio_devices::transport::{VirtioPciDevice, VirtioPciDeviceActivator};
80 use virtio_devices::vhost_user::VhostUserConfig;
81 use virtio_devices::{
82     AccessPlatformMapping, ActivateError, VdpaDmaMapping, VirtioMemMappingSource,
83 };
84 use virtio_devices::{Endpoint, IommuMapping};
85 use vm_allocator::{AddressAllocator, SystemAllocator};
86 use vm_device::dma_mapping::vfio::VfioDmaMapping;
87 use vm_device::dma_mapping::ExternalDmaMapping;
88 use vm_device::interrupt::{
89     InterruptIndex, InterruptManager, LegacyIrqGroupConfig, MsiIrqGroupConfig,
90 };
91 use vm_device::{Bus, BusDevice, Resource};
92 use vm_memory::guest_memory::FileOffset;
93 use vm_memory::GuestMemoryRegion;
94 use vm_memory::{Address, GuestAddress, GuestUsize, MmapRegion};
95 #[cfg(target_arch = "x86_64")]
96 use vm_memory::{GuestAddressSpace, GuestMemory};
97 use vm_migration::{
98     protocol::MemoryRangeTable, snapshot_from_id, versioned_state_from_id, Migratable,
99     MigratableError, Pausable, Snapshot, SnapshotDataSection, Snapshottable, Transportable,
100 };
101 use vm_virtio::AccessPlatform;
102 use vm_virtio::VirtioDeviceType;
103 use vmm_sys_util::eventfd::EventFd;
104 
105 #[cfg(target_arch = "aarch64")]
106 const MMIO_LEN: u64 = 0x1000;
107 
108 // Singleton devices / devices the user cannot name
109 #[cfg(target_arch = "x86_64")]
110 const IOAPIC_DEVICE_NAME: &str = "__ioapic";
111 const SERIAL_DEVICE_NAME: &str = "__serial";
112 #[cfg(target_arch = "aarch64")]
113 const GPIO_DEVICE_NAME: &str = "__gpio";
114 const RNG_DEVICE_NAME: &str = "__rng";
115 const IOMMU_DEVICE_NAME: &str = "__iommu";
116 const BALLOON_DEVICE_NAME: &str = "__balloon";
117 const CONSOLE_DEVICE_NAME: &str = "__console";
118 
119 // Devices that the user may name and for which we generate
120 // identifiers if the user doesn't give one
121 const DISK_DEVICE_NAME_PREFIX: &str = "_disk";
122 const FS_DEVICE_NAME_PREFIX: &str = "_fs";
123 const NET_DEVICE_NAME_PREFIX: &str = "_net";
124 const PMEM_DEVICE_NAME_PREFIX: &str = "_pmem";
125 const VDPA_DEVICE_NAME_PREFIX: &str = "_vdpa";
126 const VSOCK_DEVICE_NAME_PREFIX: &str = "_vsock";
127 const WATCHDOG_DEVICE_NAME: &str = "__watchdog";
128 const VFIO_DEVICE_NAME_PREFIX: &str = "_vfio";
129 const VFIO_USER_DEVICE_NAME_PREFIX: &str = "_vfio_user";
130 const VIRTIO_PCI_DEVICE_NAME_PREFIX: &str = "_virtio-pci";
131 
132 /// Errors associated with device manager
133 #[derive(Debug)]
134 pub enum DeviceManagerError {
135     /// Cannot create EventFd.
136     EventFd(io::Error),
137 
138     /// Cannot open disk path
139     Disk(io::Error),
140 
141     /// Cannot create vhost-user-net device
142     CreateVhostUserNet(virtio_devices::vhost_user::Error),
143 
144     /// Cannot create virtio-blk device
145     CreateVirtioBlock(io::Error),
146 
147     /// Cannot create virtio-net device
148     CreateVirtioNet(virtio_devices::net::Error),
149 
150     /// Cannot create virtio-console device
151     CreateVirtioConsole(io::Error),
152 
153     /// Cannot create virtio-rng device
154     CreateVirtioRng(io::Error),
155 
156     /// Cannot create virtio-fs device
157     CreateVirtioFs(virtio_devices::vhost_user::Error),
158 
159     /// Virtio-fs device was created without a socket.
160     NoVirtioFsSock,
161 
162     /// Cannot create vhost-user-blk device
163     CreateVhostUserBlk(virtio_devices::vhost_user::Error),
164 
165     /// Cannot create virtio-pmem device
166     CreateVirtioPmem(io::Error),
167 
168     /// Cannot create vDPA device
169     CreateVdpa(virtio_devices::vdpa::Error),
170 
171     /// Cannot create virtio-vsock device
172     CreateVirtioVsock(io::Error),
173 
174     /// Cannot create tpm device
175     CreateTpmDevice(anyhow::Error),
176 
177     /// Failed to convert Path to &str for the vDPA device.
178     CreateVdpaConvertPath,
179 
180     /// Failed to convert Path to &str for the virtio-vsock device.
181     CreateVsockConvertPath,
182 
183     /// Cannot create virtio-vsock backend
184     CreateVsockBackend(virtio_devices::vsock::VsockUnixError),
185 
186     /// Cannot create virtio-iommu device
187     CreateVirtioIommu(io::Error),
188 
189     /// Cannot create virtio-balloon device
190     CreateVirtioBalloon(io::Error),
191 
192     /// Cannot create virtio-watchdog device
193     CreateVirtioWatchdog(io::Error),
194 
195     /// Failed to parse disk image format
196     DetectImageType(io::Error),
197 
198     /// Cannot open qcow disk path
199     QcowDeviceCreate(qcow::Error),
200 
201     /// Cannot create serial manager
202     CreateSerialManager(SerialManagerError),
203 
204     /// Cannot spawn the serial manager thread
205     SpawnSerialManager(SerialManagerError),
206 
207     /// Cannot open tap interface
208     OpenTap(net_util::TapError),
209 
210     /// Cannot allocate IRQ.
211     AllocateIrq,
212 
213     /// Cannot configure the IRQ.
214     Irq(vmm_sys_util::errno::Error),
215 
216     /// Cannot allocate PCI BARs
217     AllocateBars(pci::PciDeviceError),
218 
219     /// Could not free the BARs associated with a PCI device.
220     FreePciBars(pci::PciDeviceError),
221 
222     /// Cannot register ioevent.
223     RegisterIoevent(anyhow::Error),
224 
225     /// Cannot unregister ioevent.
226     UnRegisterIoevent(anyhow::Error),
227 
228     /// Cannot create virtio device
229     VirtioDevice(virtio_devices::transport::VirtioPciDeviceError),
230 
231     /// Cannot add PCI device
232     AddPciDevice(pci::PciRootError),
233 
234     /// Cannot open persistent memory file
235     PmemFileOpen(io::Error),
236 
237     /// Cannot set persistent memory file size
238     PmemFileSetLen(io::Error),
239 
240     /// Cannot find a memory range for persistent memory
241     PmemRangeAllocation,
242 
243     /// Cannot find a memory range for virtio-fs
244     FsRangeAllocation,
245 
246     /// Error creating serial output file
247     SerialOutputFileOpen(io::Error),
248 
249     /// Error creating console output file
250     ConsoleOutputFileOpen(io::Error),
251 
252     /// Error creating serial pty
253     SerialPtyOpen(io::Error),
254 
255     /// Error creating console pty
256     ConsolePtyOpen(io::Error),
257 
258     /// Error setting pty raw mode
259     SetPtyRaw(vmm_sys_util::errno::Error),
260 
261     /// Error getting pty peer
262     GetPtyPeer(vmm_sys_util::errno::Error),
263 
264     /// Cannot create a VFIO device
265     VfioCreate(vfio_ioctls::VfioError),
266 
267     /// Cannot create a VFIO PCI device
268     VfioPciCreate(pci::VfioPciError),
269 
270     /// Failed to map VFIO MMIO region.
271     VfioMapRegion(pci::VfioPciError),
272 
273     /// Failed to DMA map VFIO device.
274     VfioDmaMap(vfio_ioctls::VfioError),
275 
276     /// Failed to DMA unmap VFIO device.
277     VfioDmaUnmap(pci::VfioPciError),
278 
279     /// Failed to create the passthrough device.
280     CreatePassthroughDevice(anyhow::Error),
281 
282     /// Failed to memory map.
283     Mmap(io::Error),
284 
285     /// Cannot add legacy device to Bus.
286     BusError(vm_device::BusError),
287 
288     /// Failed to allocate IO port
289     AllocateIoPort,
290 
291     /// Failed to allocate MMIO address
292     AllocateMmioAddress,
293 
294     /// Failed to make hotplug notification
295     HotPlugNotification(io::Error),
296 
297     /// Error from a memory manager operation
298     MemoryManager(MemoryManagerError),
299 
300     /// Failed to create new interrupt source group.
301     CreateInterruptGroup(io::Error),
302 
303     /// Failed to update interrupt source group.
304     UpdateInterruptGroup(io::Error),
305 
306     /// Failed to create interrupt controller.
307     CreateInterruptController(interrupt_controller::Error),
308 
309     /// Failed to create a new MmapRegion instance.
310     NewMmapRegion(vm_memory::mmap::MmapRegionError),
311 
312     /// Failed to clone a File.
313     CloneFile(io::Error),
314 
315     /// Failed to create socket file
316     CreateSocketFile(io::Error),
317 
318     /// Failed to spawn the network backend
319     SpawnNetBackend(io::Error),
320 
321     /// Failed to spawn the block backend
322     SpawnBlockBackend(io::Error),
323 
324     /// Missing PCI bus.
325     NoPciBus,
326 
327     /// Could not find an available device name.
328     NoAvailableDeviceName,
329 
330     /// Missing PCI device.
331     MissingPciDevice,
332 
333     /// Failed to remove a PCI device from the PCI bus.
334     RemoveDeviceFromPciBus(pci::PciRootError),
335 
336     /// Failed to remove a bus device from the IO bus.
337     RemoveDeviceFromIoBus(vm_device::BusError),
338 
339     /// Failed to remove a bus device from the MMIO bus.
340     RemoveDeviceFromMmioBus(vm_device::BusError),
341 
342     /// Failed to find the device corresponding to a specific PCI b/d/f.
343     UnknownPciBdf(u32),
344 
345     /// Not allowed to remove this type of device from the VM.
346     RemovalNotAllowed(vm_virtio::VirtioDeviceType),
347 
348     /// Failed to find device corresponding to the given identifier.
349     UnknownDeviceId(String),
350 
351     /// Failed to find an available PCI device ID.
352     NextPciDeviceId(pci::PciRootError),
353 
354     /// Could not reserve the PCI device ID.
355     GetPciDeviceId(pci::PciRootError),
356 
357     /// Could not give the PCI device ID back.
358     PutPciDeviceId(pci::PciRootError),
359 
360     /// No disk path was specified when one was expected
361     NoDiskPath,
362 
363     /// Failed to update guest memory for virtio device.
364     UpdateMemoryForVirtioDevice(virtio_devices::Error),
365 
366     /// Cannot create virtio-mem device
367     CreateVirtioMem(io::Error),
368 
369     /// Cannot find a memory range for virtio-mem memory
370     VirtioMemRangeAllocation,
371 
372     /// Failed to update guest memory for VFIO PCI device.
373     UpdateMemoryForVfioPciDevice(vfio_ioctls::VfioError),
374 
375     /// Trying to use a directory for pmem but no size specified
376     PmemWithDirectorySizeMissing,
377 
378     /// Trying to use a size that is not multiple of 2MiB
379     PmemSizeNotAligned,
380 
381     /// Could not find the node in the device tree.
382     MissingNode,
383 
384     /// Resource was already found.
385     ResourceAlreadyExists,
386 
387     /// Expected resources for virtio-pmem could not be found.
388     MissingVirtioPmemResources,
389 
390     /// Missing PCI b/d/f from the DeviceNode.
391     MissingDeviceNodePciBdf,
392 
393     /// No support for device passthrough
394     NoDevicePassthroughSupport,
395 
396     /// Failed to resize virtio-balloon
397     VirtioBalloonResize(virtio_devices::balloon::Error),
398 
399     /// Missing virtio-balloon, can't proceed as expected.
400     MissingVirtioBalloon,
401 
402     /// Missing virtual IOMMU device
403     MissingVirtualIommu,
404 
405     /// Failed to do power button notification
406     PowerButtonNotification(io::Error),
407 
408     /// Failed to do AArch64 GPIO power button notification
409     #[cfg(target_arch = "aarch64")]
410     AArch64PowerButtonNotification(devices::legacy::GpioDeviceError),
411 
412     /// Failed to set O_DIRECT flag to file descriptor
413     SetDirectIo,
414 
415     /// Failed to create FixedVhdDiskAsync
416     CreateFixedVhdDiskAsync(io::Error),
417 
418     /// Failed to create FixedVhdDiskSync
419     CreateFixedVhdDiskSync(io::Error),
420 
421     /// Failed to create QcowDiskSync
422     CreateQcowDiskSync(qcow::Error),
423 
424     /// Failed to create FixedVhdxDiskSync
425     CreateFixedVhdxDiskSync(vhdx::vhdx::VhdxError),
426 
427     /// Failed to add DMA mapping handler to virtio-mem device.
428     AddDmaMappingHandlerVirtioMem(virtio_devices::mem::Error),
429 
430     /// Failed to remove DMA mapping handler from virtio-mem device.
431     RemoveDmaMappingHandlerVirtioMem(virtio_devices::mem::Error),
432 
433     /// Failed to create vfio-user client
434     VfioUserCreateClient(vfio_user::Error),
435 
436     /// Failed to create VFIO user device
437     VfioUserCreate(VfioUserPciDeviceError),
438 
439     /// Failed to map region from VFIO user device into guest
440     VfioUserMapRegion(VfioUserPciDeviceError),
441 
442     /// Failed to DMA map VFIO user device.
443     VfioUserDmaMap(VfioUserPciDeviceError),
444 
445     /// Failed to DMA unmap VFIO user device.
446     VfioUserDmaUnmap(VfioUserPciDeviceError),
447 
448     /// Failed to update memory mappings for VFIO user device
449     UpdateMemoryForVfioUserPciDevice(VfioUserPciDeviceError),
450 
451     /// Cannot duplicate file descriptor
452     DupFd(vmm_sys_util::errno::Error),
453 
454     /// Failed to DMA map virtio device.
455     VirtioDmaMap(std::io::Error),
456 
457     /// Failed to DMA unmap virtio device.
458     VirtioDmaUnmap(std::io::Error),
459 
460     /// Cannot hotplug device behind vIOMMU
461     InvalidIommuHotplug,
462 
463     /// Invalid identifier as it is not unique.
464     IdentifierNotUnique(String),
465 
466     /// Invalid identifier
467     InvalidIdentifier(String),
468 
469     /// Error activating virtio device
470     VirtioActivate(ActivateError),
471 
472     /// Failed retrieving device state from snapshot
473     RestoreGetState(MigratableError),
474 }
475 pub type DeviceManagerResult<T> = result::Result<T, DeviceManagerError>;
476 
477 const DEVICE_MANAGER_ACPI_SIZE: usize = 0x10;
478 
479 const TIOCSPTLCK: libc::c_int = 0x4004_5431;
480 const TIOCGTPEER: libc::c_int = 0x5441;
481 
482 pub fn create_pty() -> io::Result<(File, File, PathBuf)> {
483     // Try to use /dev/pts/ptmx first then fall back to /dev/ptmx
484     // This is done to try and use the devpts filesystem that
485     // could be available for use in the process's namespace first.
486     // Ideally these are all the same file though but different
487     // kernels could have things setup differently.
488     // See https://www.kernel.org/doc/Documentation/filesystems/devpts.txt
489     // for further details.
490 
491     let custom_flags = libc::O_NONBLOCK;
492     let main = match OpenOptions::new()
493         .read(true)
494         .write(true)
495         .custom_flags(custom_flags)
496         .open("/dev/pts/ptmx")
497     {
498         Ok(f) => f,
499         _ => OpenOptions::new()
500             .read(true)
501             .write(true)
502             .custom_flags(custom_flags)
503             .open("/dev/ptmx")?,
504     };
505     let mut unlock: libc::c_ulong = 0;
506     // SAFETY: FFI call into libc, trivially safe
507     unsafe {
508         libc::ioctl(
509             main.as_raw_fd(),
510             TIOCSPTLCK.try_into().unwrap(),
511             &mut unlock,
512         )
513     };
514 
515     // SAFETY: FFI call into libc, trivally safe
516     let sub_fd = unsafe {
517         libc::ioctl(
518             main.as_raw_fd(),
519             TIOCGTPEER.try_into().unwrap(),
520             libc::O_NOCTTY | libc::O_RDWR,
521         )
522     };
523     if sub_fd == -1 {
524         return vmm_sys_util::errno::errno_result().map_err(|e| e.into());
525     }
526 
527     let proc_path = PathBuf::from(format!("/proc/self/fd/{}", sub_fd));
528     let path = read_link(proc_path)?;
529 
530     // SAFETY: sub_fd is checked to be valid before being wrapped in File
531     Ok((main, unsafe { File::from_raw_fd(sub_fd) }, path))
532 }
533 
534 #[derive(Default)]
535 pub struct Console {
536     console_resizer: Option<Arc<virtio_devices::ConsoleResizer>>,
537 }
538 
539 impl Console {
540     pub fn update_console_size(&self) {
541         if let Some(resizer) = self.console_resizer.as_ref() {
542             resizer.update_console_size()
543         }
544     }
545 }
546 
547 pub(crate) struct AddressManager {
548     pub(crate) allocator: Arc<Mutex<SystemAllocator>>,
549     #[cfg(target_arch = "x86_64")]
550     pub(crate) io_bus: Arc<Bus>,
551     pub(crate) mmio_bus: Arc<Bus>,
552     pub(crate) vm: Arc<dyn hypervisor::Vm>,
553     device_tree: Arc<Mutex<DeviceTree>>,
554     pci_mmio_allocators: Vec<Arc<Mutex<AddressAllocator>>>,
555 }
556 
557 impl DeviceRelocation for AddressManager {
558     fn move_bar(
559         &self,
560         old_base: u64,
561         new_base: u64,
562         len: u64,
563         pci_dev: &mut dyn PciDevice,
564         region_type: PciBarRegionType,
565     ) -> std::result::Result<(), std::io::Error> {
566         match region_type {
567             PciBarRegionType::IoRegion => {
568                 #[cfg(target_arch = "x86_64")]
569                 {
570                     // Update system allocator
571                     self.allocator
572                         .lock()
573                         .unwrap()
574                         .free_io_addresses(GuestAddress(old_base), len as GuestUsize);
575 
576                     self.allocator
577                         .lock()
578                         .unwrap()
579                         .allocate_io_addresses(
580                             Some(GuestAddress(new_base)),
581                             len as GuestUsize,
582                             None,
583                         )
584                         .ok_or_else(|| {
585                             io::Error::new(io::ErrorKind::Other, "failed allocating new IO range")
586                         })?;
587 
588                     // Update PIO bus
589                     self.io_bus
590                         .update_range(old_base, len, new_base, len)
591                         .map_err(|e| io::Error::new(io::ErrorKind::Other, e))?;
592                 }
593                 #[cfg(target_arch = "aarch64")]
594                 error!("I/O region is not supported");
595             }
596             PciBarRegionType::Memory32BitRegion | PciBarRegionType::Memory64BitRegion => {
597                 // Update system allocator
598                 if region_type == PciBarRegionType::Memory32BitRegion {
599                     self.allocator
600                         .lock()
601                         .unwrap()
602                         .free_mmio_hole_addresses(GuestAddress(old_base), len as GuestUsize);
603 
604                     self.allocator
605                         .lock()
606                         .unwrap()
607                         .allocate_mmio_hole_addresses(
608                             Some(GuestAddress(new_base)),
609                             len as GuestUsize,
610                             Some(len),
611                         )
612                         .ok_or_else(|| {
613                             io::Error::new(
614                                 io::ErrorKind::Other,
615                                 "failed allocating new 32 bits MMIO range",
616                             )
617                         })?;
618                 } else {
619                     // Find the specific allocator that this BAR was allocated from and use it for new one
620                     for allocator in &self.pci_mmio_allocators {
621                         let allocator_base = allocator.lock().unwrap().base();
622                         let allocator_end = allocator.lock().unwrap().end();
623 
624                         if old_base >= allocator_base.0 && old_base <= allocator_end.0 {
625                             allocator
626                                 .lock()
627                                 .unwrap()
628                                 .free(GuestAddress(old_base), len as GuestUsize);
629 
630                             allocator
631                                 .lock()
632                                 .unwrap()
633                                 .allocate(
634                                     Some(GuestAddress(new_base)),
635                                     len as GuestUsize,
636                                     Some(len),
637                                 )
638                                 .ok_or_else(|| {
639                                     io::Error::new(
640                                         io::ErrorKind::Other,
641                                         "failed allocating new 64 bits MMIO range",
642                                     )
643                                 })?;
644 
645                             break;
646                         }
647                     }
648                 }
649 
650                 // Update MMIO bus
651                 self.mmio_bus
652                     .update_range(old_base, len, new_base, len)
653                     .map_err(|e| io::Error::new(io::ErrorKind::Other, e))?;
654             }
655         }
656 
657         // Update the device_tree resources associated with the device
658         if let Some(id) = pci_dev.id() {
659             if let Some(node) = self.device_tree.lock().unwrap().get_mut(&id) {
660                 let mut resource_updated = false;
661                 for resource in node.resources.iter_mut() {
662                     if let Resource::PciBar { base, type_, .. } = resource {
663                         if PciBarRegionType::from(*type_) == region_type && *base == old_base {
664                             *base = new_base;
665                             resource_updated = true;
666                             break;
667                         }
668                     }
669                 }
670 
671                 if !resource_updated {
672                     return Err(io::Error::new(
673                         io::ErrorKind::Other,
674                         format!(
675                             "Couldn't find a resource with base 0x{:x} for device {}",
676                             old_base, id
677                         ),
678                     ));
679                 }
680             } else {
681                 return Err(io::Error::new(
682                     io::ErrorKind::Other,
683                     format!("Couldn't find device {} from device tree", id),
684                 ));
685             }
686         }
687 
688         let any_dev = pci_dev.as_any();
689         if let Some(virtio_pci_dev) = any_dev.downcast_ref::<VirtioPciDevice>() {
690             let bar_addr = virtio_pci_dev.config_bar_addr();
691             if bar_addr == new_base {
692                 for (event, addr) in virtio_pci_dev.ioeventfds(old_base) {
693                     let io_addr = IoEventAddress::Mmio(addr);
694                     self.vm.unregister_ioevent(event, &io_addr).map_err(|e| {
695                         io::Error::new(
696                             io::ErrorKind::Other,
697                             format!("failed to unregister ioevent: {:?}", e),
698                         )
699                     })?;
700                 }
701                 for (event, addr) in virtio_pci_dev.ioeventfds(new_base) {
702                     let io_addr = IoEventAddress::Mmio(addr);
703                     self.vm
704                         .register_ioevent(event, &io_addr, None)
705                         .map_err(|e| {
706                             io::Error::new(
707                                 io::ErrorKind::Other,
708                                 format!("failed to register ioevent: {:?}", e),
709                             )
710                         })?;
711                 }
712             } else {
713                 let virtio_dev = virtio_pci_dev.virtio_device();
714                 let mut virtio_dev = virtio_dev.lock().unwrap();
715                 if let Some(mut shm_regions) = virtio_dev.get_shm_regions() {
716                     if shm_regions.addr.raw_value() == old_base {
717                         let mem_region = self.vm.make_user_memory_region(
718                             shm_regions.mem_slot,
719                             old_base,
720                             shm_regions.len,
721                             shm_regions.host_addr,
722                             false,
723                             false,
724                         );
725 
726                         self.vm.remove_user_memory_region(mem_region).map_err(|e| {
727                             io::Error::new(
728                                 io::ErrorKind::Other,
729                                 format!("failed to remove user memory region: {:?}", e),
730                             )
731                         })?;
732 
733                         // Create new mapping by inserting new region to KVM.
734                         let mem_region = self.vm.make_user_memory_region(
735                             shm_regions.mem_slot,
736                             new_base,
737                             shm_regions.len,
738                             shm_regions.host_addr,
739                             false,
740                             false,
741                         );
742 
743                         self.vm.create_user_memory_region(mem_region).map_err(|e| {
744                             io::Error::new(
745                                 io::ErrorKind::Other,
746                                 format!("failed to create user memory regions: {:?}", e),
747                             )
748                         })?;
749 
750                         // Update shared memory regions to reflect the new mapping.
751                         shm_regions.addr = GuestAddress(new_base);
752                         virtio_dev.set_shm_regions(shm_regions).map_err(|e| {
753                             io::Error::new(
754                                 io::ErrorKind::Other,
755                                 format!("failed to update shared memory regions: {:?}", e),
756                             )
757                         })?;
758                     }
759                 }
760             }
761         }
762 
763         pci_dev.move_bar(old_base, new_base)
764     }
765 }
766 
767 #[derive(Serialize, Deserialize)]
768 struct DeviceManagerState {
769     device_tree: DeviceTree,
770     device_id_cnt: Wrapping<usize>,
771 }
772 
773 #[derive(Debug)]
774 pub struct PtyPair {
775     pub main: File,
776     pub path: PathBuf,
777 }
778 
779 impl Clone for PtyPair {
780     fn clone(&self) -> Self {
781         PtyPair {
782             main: self.main.try_clone().unwrap(),
783             path: self.path.clone(),
784         }
785     }
786 }
787 
788 #[derive(Clone)]
789 pub enum PciDeviceHandle {
790     Vfio(Arc<Mutex<VfioPciDevice>>),
791     Virtio(Arc<Mutex<VirtioPciDevice>>),
792     VfioUser(Arc<Mutex<VfioUserPciDevice>>),
793 }
794 
795 #[derive(Clone)]
796 struct MetaVirtioDevice {
797     virtio_device: Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
798     iommu: bool,
799     id: String,
800     pci_segment: u16,
801     dma_handler: Option<Arc<dyn ExternalDmaMapping>>,
802 }
803 
804 #[derive(Default)]
805 pub struct AcpiPlatformAddresses {
806     pub pm_timer_address: Option<GenericAddress>,
807     pub reset_reg_address: Option<GenericAddress>,
808     pub sleep_control_reg_address: Option<GenericAddress>,
809     pub sleep_status_reg_address: Option<GenericAddress>,
810 }
811 
812 pub struct DeviceManager {
813     // The underlying hypervisor
814     hypervisor_type: HypervisorType,
815 
816     // Manage address space related to devices
817     address_manager: Arc<AddressManager>,
818 
819     // Console abstraction
820     console: Arc<Console>,
821 
822     // console PTY
823     console_pty: Option<Arc<Mutex<PtyPair>>>,
824 
825     // serial PTY
826     serial_pty: Option<Arc<Mutex<PtyPair>>>,
827 
828     // Serial Manager
829     serial_manager: Option<Arc<SerialManager>>,
830 
831     // pty foreground status,
832     console_resize_pipe: Option<Arc<File>>,
833 
834     // Interrupt controller
835     #[cfg(target_arch = "x86_64")]
836     interrupt_controller: Option<Arc<Mutex<ioapic::Ioapic>>>,
837     #[cfg(target_arch = "aarch64")]
838     interrupt_controller: Option<Arc<Mutex<gic::Gic>>>,
839 
840     // Things to be added to the commandline (e.g. aarch64 early console)
841     #[cfg(target_arch = "aarch64")]
842     cmdline_additions: Vec<String>,
843 
844     // ACPI GED notification device
845     ged_notification_device: Option<Arc<Mutex<devices::AcpiGedDevice>>>,
846 
847     // VM configuration
848     config: Arc<Mutex<VmConfig>>,
849 
850     // Memory Manager
851     memory_manager: Arc<Mutex<MemoryManager>>,
852 
853     // CPU Manager
854     cpu_manager: Arc<Mutex<CpuManager>>,
855 
856     // The virtio devices on the system
857     virtio_devices: Vec<MetaVirtioDevice>,
858 
859     // List of bus devices
860     // Let the DeviceManager keep strong references to the BusDevice devices.
861     // This allows the IO and MMIO buses to be provided with Weak references,
862     // which prevents cyclic dependencies.
863     bus_devices: Vec<Arc<Mutex<dyn BusDevice>>>,
864 
865     // Counter to keep track of the consumed device IDs.
866     device_id_cnt: Wrapping<usize>,
867 
868     pci_segments: Vec<PciSegment>,
869 
870     #[cfg_attr(target_arch = "aarch64", allow(dead_code))]
871     // MSI Interrupt Manager
872     msi_interrupt_manager: Arc<dyn InterruptManager<GroupConfig = MsiIrqGroupConfig>>,
873 
874     #[cfg_attr(feature = "mshv", allow(dead_code))]
875     // Legacy Interrupt Manager
876     legacy_interrupt_manager: Option<Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>>,
877 
878     // Passthrough device handle
879     passthrough_device: Option<VfioDeviceFd>,
880 
881     // VFIO container
882     // Only one container can be created, therefore it is stored as part of the
883     // DeviceManager to be reused.
884     vfio_container: Option<Arc<VfioContainer>>,
885 
886     // Paravirtualized IOMMU
887     iommu_device: Option<Arc<Mutex<virtio_devices::Iommu>>>,
888     iommu_mapping: Option<Arc<IommuMapping>>,
889 
890     // PCI information about devices attached to the paravirtualized IOMMU
891     // It contains the virtual IOMMU PCI BDF along with the list of PCI BDF
892     // representing the devices attached to the virtual IOMMU. This is useful
893     // information for filling the ACPI VIOT table.
894     iommu_attached_devices: Option<(PciBdf, Vec<PciBdf>)>,
895 
896     // Tree of devices, representing the dependencies between devices.
897     // Useful for introspection, snapshot and restore.
898     device_tree: Arc<Mutex<DeviceTree>>,
899 
900     // Exit event
901     exit_evt: EventFd,
902     reset_evt: EventFd,
903 
904     #[cfg(target_arch = "aarch64")]
905     id_to_dev_info: HashMap<(DeviceType, String), MmioDeviceInfo>,
906 
907     // seccomp action
908     seccomp_action: SeccompAction,
909 
910     // List of guest NUMA nodes.
911     numa_nodes: NumaNodes,
912 
913     // Possible handle to the virtio-balloon device
914     balloon: Option<Arc<Mutex<virtio_devices::Balloon>>>,
915 
916     // Virtio Device activation EventFd to allow the VMM thread to trigger device
917     // activation and thus start the threads from the VMM thread
918     activate_evt: EventFd,
919 
920     acpi_address: GuestAddress,
921 
922     selected_segment: usize,
923 
924     // Possible handle to the virtio-mem device
925     virtio_mem_devices: Vec<Arc<Mutex<virtio_devices::Mem>>>,
926 
927     #[cfg(target_arch = "aarch64")]
928     // GPIO device for AArch64
929     gpio_device: Option<Arc<Mutex<devices::legacy::Gpio>>>,
930 
931     // Flag to force setting the iommu on virtio devices
932     force_iommu: bool,
933 
934     // Helps identify if the VM is currently being restored
935     restoring: bool,
936 
937     // io_uring availability if detected
938     io_uring_supported: Option<bool>,
939 
940     // List of unique identifiers provided at boot through the configuration.
941     boot_id_list: BTreeSet<String>,
942 
943     // Start time of the VM
944     timestamp: Instant,
945 
946     // Pending activations
947     pending_activations: Arc<Mutex<Vec<VirtioPciDeviceActivator>>>,
948 
949     // Addresses for ACPI platform devices e.g. ACPI PM timer, sleep/reset registers
950     acpi_platform_addresses: AcpiPlatformAddresses,
951 
952     snapshot: Option<Snapshot>,
953 }
954 
955 impl DeviceManager {
956     #[allow(clippy::too_many_arguments)]
957     pub fn new(
958         #[cfg(target_arch = "x86_64")] io_bus: Arc<Bus>,
959         mmio_bus: Arc<Bus>,
960         hypervisor_type: HypervisorType,
961         vm: Arc<dyn hypervisor::Vm>,
962         config: Arc<Mutex<VmConfig>>,
963         memory_manager: Arc<Mutex<MemoryManager>>,
964         cpu_manager: Arc<Mutex<CpuManager>>,
965         exit_evt: EventFd,
966         reset_evt: EventFd,
967         seccomp_action: SeccompAction,
968         numa_nodes: NumaNodes,
969         activate_evt: &EventFd,
970         force_iommu: bool,
971         restoring: bool,
972         boot_id_list: BTreeSet<String>,
973         timestamp: Instant,
974         snapshot: Option<Snapshot>,
975         dynamic: bool,
976     ) -> DeviceManagerResult<Arc<Mutex<Self>>> {
977         trace_scoped!("DeviceManager::new");
978 
979         let device_tree = Arc::new(Mutex::new(DeviceTree::new()));
980 
981         let num_pci_segments =
982             if let Some(platform_config) = config.lock().unwrap().platform.as_ref() {
983                 platform_config.num_pci_segments
984             } else {
985                 1
986             };
987 
988         let start_of_device_area = memory_manager.lock().unwrap().start_of_device_area().0;
989         let end_of_device_area = memory_manager.lock().unwrap().end_of_device_area().0;
990 
991         // Start each PCI segment range on a 4GiB boundary
992         let pci_segment_size = (end_of_device_area - start_of_device_area + 1)
993             / ((4 << 30) * num_pci_segments as u64)
994             * (4 << 30);
995 
996         let mut pci_mmio_allocators = vec![];
997         for i in 0..num_pci_segments as u64 {
998             let mmio_start = start_of_device_area + i * pci_segment_size;
999             let allocator = Arc::new(Mutex::new(
1000                 AddressAllocator::new(GuestAddress(mmio_start), pci_segment_size).unwrap(),
1001             ));
1002             pci_mmio_allocators.push(allocator)
1003         }
1004 
1005         let address_manager = Arc::new(AddressManager {
1006             allocator: memory_manager.lock().unwrap().allocator(),
1007             #[cfg(target_arch = "x86_64")]
1008             io_bus,
1009             mmio_bus,
1010             vm: vm.clone(),
1011             device_tree: Arc::clone(&device_tree),
1012             pci_mmio_allocators,
1013         });
1014 
1015         // First we create the MSI interrupt manager, the legacy one is created
1016         // later, after the IOAPIC device creation.
1017         // The reason we create the MSI one first is because the IOAPIC needs it,
1018         // and then the legacy interrupt manager needs an IOAPIC. So we're
1019         // handling a linear dependency chain:
1020         // msi_interrupt_manager <- IOAPIC <- legacy_interrupt_manager.
1021         let msi_interrupt_manager: Arc<dyn InterruptManager<GroupConfig = MsiIrqGroupConfig>> =
1022             Arc::new(MsiInterruptManager::new(
1023                 Arc::clone(&address_manager.allocator),
1024                 vm,
1025             ));
1026 
1027         let acpi_address = address_manager
1028             .allocator
1029             .lock()
1030             .unwrap()
1031             .allocate_platform_mmio_addresses(None, DEVICE_MANAGER_ACPI_SIZE as u64, None)
1032             .ok_or(DeviceManagerError::AllocateIoPort)?;
1033 
1034         let mut pci_irq_slots = [0; 32];
1035         PciSegment::reserve_legacy_interrupts_for_pci_devices(
1036             &address_manager,
1037             &mut pci_irq_slots,
1038         )?;
1039 
1040         let mut pci_segments = vec![PciSegment::new_default_segment(
1041             &address_manager,
1042             Arc::clone(&address_manager.pci_mmio_allocators[0]),
1043             &pci_irq_slots,
1044         )?];
1045 
1046         for i in 1..num_pci_segments as usize {
1047             pci_segments.push(PciSegment::new(
1048                 i as u16,
1049                 &address_manager,
1050                 Arc::clone(&address_manager.pci_mmio_allocators[i]),
1051                 &pci_irq_slots,
1052             )?);
1053         }
1054 
1055         if dynamic {
1056             let acpi_address = address_manager
1057                 .allocator
1058                 .lock()
1059                 .unwrap()
1060                 .allocate_platform_mmio_addresses(None, CPU_MANAGER_ACPI_SIZE as u64, None)
1061                 .ok_or(DeviceManagerError::AllocateMmioAddress)?;
1062 
1063             address_manager
1064                 .mmio_bus
1065                 .insert(
1066                     cpu_manager.clone(),
1067                     acpi_address.0,
1068                     CPU_MANAGER_ACPI_SIZE as u64,
1069                 )
1070                 .map_err(DeviceManagerError::BusError)?;
1071 
1072             cpu_manager.lock().unwrap().set_acpi_address(acpi_address);
1073         }
1074 
1075         let device_manager = DeviceManager {
1076             hypervisor_type,
1077             address_manager: Arc::clone(&address_manager),
1078             console: Arc::new(Console::default()),
1079             interrupt_controller: None,
1080             #[cfg(target_arch = "aarch64")]
1081             cmdline_additions: Vec::new(),
1082             ged_notification_device: None,
1083             config,
1084             memory_manager,
1085             cpu_manager,
1086             virtio_devices: Vec::new(),
1087             bus_devices: Vec::new(),
1088             device_id_cnt: Wrapping(0),
1089             msi_interrupt_manager,
1090             legacy_interrupt_manager: None,
1091             passthrough_device: None,
1092             vfio_container: None,
1093             iommu_device: None,
1094             iommu_mapping: None,
1095             iommu_attached_devices: None,
1096             pci_segments,
1097             device_tree,
1098             exit_evt,
1099             reset_evt,
1100             #[cfg(target_arch = "aarch64")]
1101             id_to_dev_info: HashMap::new(),
1102             seccomp_action,
1103             numa_nodes,
1104             balloon: None,
1105             activate_evt: activate_evt
1106                 .try_clone()
1107                 .map_err(DeviceManagerError::EventFd)?,
1108             acpi_address,
1109             selected_segment: 0,
1110             serial_pty: None,
1111             serial_manager: None,
1112             console_pty: None,
1113             console_resize_pipe: None,
1114             virtio_mem_devices: Vec::new(),
1115             #[cfg(target_arch = "aarch64")]
1116             gpio_device: None,
1117             force_iommu,
1118             restoring,
1119             io_uring_supported: None,
1120             boot_id_list,
1121             timestamp,
1122             pending_activations: Arc::new(Mutex::new(Vec::default())),
1123             acpi_platform_addresses: AcpiPlatformAddresses::default(),
1124             snapshot,
1125         };
1126 
1127         let device_manager = Arc::new(Mutex::new(device_manager));
1128 
1129         address_manager
1130             .mmio_bus
1131             .insert(
1132                 Arc::clone(&device_manager) as Arc<Mutex<dyn BusDevice>>,
1133                 acpi_address.0,
1134                 DEVICE_MANAGER_ACPI_SIZE as u64,
1135             )
1136             .map_err(DeviceManagerError::BusError)?;
1137 
1138         Ok(device_manager)
1139     }
1140 
1141     pub fn serial_pty(&self) -> Option<PtyPair> {
1142         self.serial_pty
1143             .as_ref()
1144             .map(|pty| pty.lock().unwrap().clone())
1145     }
1146 
1147     pub fn console_pty(&self) -> Option<PtyPair> {
1148         self.console_pty
1149             .as_ref()
1150             .map(|pty| pty.lock().unwrap().clone())
1151     }
1152 
1153     pub fn console_resize_pipe(&self) -> Option<Arc<File>> {
1154         self.console_resize_pipe.as_ref().map(Arc::clone)
1155     }
1156 
1157     pub fn create_devices(
1158         &mut self,
1159         serial_pty: Option<PtyPair>,
1160         console_pty: Option<PtyPair>,
1161         console_resize_pipe: Option<File>,
1162     ) -> DeviceManagerResult<()> {
1163         trace_scoped!("create_devices");
1164 
1165         let mut virtio_devices: Vec<MetaVirtioDevice> = Vec::new();
1166 
1167         let interrupt_controller = self.add_interrupt_controller()?;
1168 
1169         self.cpu_manager
1170             .lock()
1171             .unwrap()
1172             .set_interrupt_controller(interrupt_controller.clone());
1173 
1174         // Now we can create the legacy interrupt manager, which needs the freshly
1175         // formed IOAPIC device.
1176         let legacy_interrupt_manager: Arc<
1177             dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>,
1178         > = Arc::new(LegacyUserspaceInterruptManager::new(Arc::clone(
1179             &interrupt_controller,
1180         )));
1181 
1182         {
1183             if let Some(acpi_address) = self.memory_manager.lock().unwrap().acpi_address() {
1184                 self.address_manager
1185                     .mmio_bus
1186                     .insert(
1187                         Arc::clone(&self.memory_manager) as Arc<Mutex<dyn BusDevice>>,
1188                         acpi_address.0,
1189                         MEMORY_MANAGER_ACPI_SIZE as u64,
1190                     )
1191                     .map_err(DeviceManagerError::BusError)?;
1192             }
1193         }
1194 
1195         #[cfg(target_arch = "x86_64")]
1196         self.add_legacy_devices(
1197             self.reset_evt
1198                 .try_clone()
1199                 .map_err(DeviceManagerError::EventFd)?,
1200         )?;
1201 
1202         #[cfg(target_arch = "aarch64")]
1203         self.add_legacy_devices(&legacy_interrupt_manager)?;
1204 
1205         {
1206             self.ged_notification_device = self.add_acpi_devices(
1207                 &legacy_interrupt_manager,
1208                 self.reset_evt
1209                     .try_clone()
1210                     .map_err(DeviceManagerError::EventFd)?,
1211                 self.exit_evt
1212                     .try_clone()
1213                     .map_err(DeviceManagerError::EventFd)?,
1214             )?;
1215         }
1216 
1217         self.console = self.add_console_device(
1218             &legacy_interrupt_manager,
1219             &mut virtio_devices,
1220             serial_pty,
1221             console_pty,
1222             console_resize_pipe,
1223         )?;
1224 
1225         if let Some(tpm) = self.config.clone().lock().unwrap().tpm.as_ref() {
1226             let tpm_dev = self.add_tpm_device(tpm.socket.clone())?;
1227             self.bus_devices
1228                 .push(Arc::clone(&tpm_dev) as Arc<Mutex<dyn BusDevice>>)
1229         }
1230         self.legacy_interrupt_manager = Some(legacy_interrupt_manager);
1231 
1232         virtio_devices.append(&mut self.make_virtio_devices()?);
1233 
1234         self.add_pci_devices(virtio_devices.clone())?;
1235 
1236         self.virtio_devices = virtio_devices;
1237 
1238         Ok(())
1239     }
1240 
1241     fn state(&self) -> DeviceManagerState {
1242         DeviceManagerState {
1243             device_tree: self.device_tree.lock().unwrap().clone(),
1244             device_id_cnt: self.device_id_cnt,
1245         }
1246     }
1247 
1248     fn set_state(&mut self, state: &DeviceManagerState) {
1249         *self.device_tree.lock().unwrap() = state.device_tree.clone();
1250         self.device_id_cnt = state.device_id_cnt;
1251     }
1252 
1253     fn get_msi_iova_space(&mut self) -> (u64, u64) {
1254         #[cfg(target_arch = "aarch64")]
1255         {
1256             let vcpus = self.config.lock().unwrap().cpus.boot_vcpus;
1257             let vgic_config = gic::Gic::create_default_config(vcpus.into());
1258             (
1259                 vgic_config.msi_addr,
1260                 vgic_config.msi_addr + vgic_config.msi_size - 1,
1261             )
1262         }
1263         #[cfg(target_arch = "x86_64")]
1264         (0xfee0_0000, 0xfeef_ffff)
1265     }
1266 
1267     #[cfg(target_arch = "aarch64")]
1268     /// Gets the information of the devices registered up to some point in time.
1269     pub fn get_device_info(&self) -> &HashMap<(DeviceType, String), MmioDeviceInfo> {
1270         &self.id_to_dev_info
1271     }
1272 
1273     #[allow(unused_variables)]
1274     fn add_pci_devices(
1275         &mut self,
1276         virtio_devices: Vec<MetaVirtioDevice>,
1277     ) -> DeviceManagerResult<()> {
1278         let iommu_id = String::from(IOMMU_DEVICE_NAME);
1279 
1280         let iommu_device = if self.config.lock().unwrap().iommu {
1281             let (device, mapping) = virtio_devices::Iommu::new(
1282                 iommu_id.clone(),
1283                 self.seccomp_action.clone(),
1284                 self.exit_evt
1285                     .try_clone()
1286                     .map_err(DeviceManagerError::EventFd)?,
1287                 self.get_msi_iova_space(),
1288                 versioned_state_from_id(self.snapshot.as_ref(), iommu_id.as_str())
1289                     .map_err(DeviceManagerError::RestoreGetState)?,
1290             )
1291             .map_err(DeviceManagerError::CreateVirtioIommu)?;
1292             let device = Arc::new(Mutex::new(device));
1293             self.iommu_device = Some(Arc::clone(&device));
1294             self.iommu_mapping = Some(mapping);
1295 
1296             // Fill the device tree with a new node. In case of restore, we
1297             // know there is nothing to do, so we can simply override the
1298             // existing entry.
1299             self.device_tree
1300                 .lock()
1301                 .unwrap()
1302                 .insert(iommu_id.clone(), device_node!(iommu_id, device));
1303 
1304             Some(device)
1305         } else {
1306             None
1307         };
1308 
1309         let mut iommu_attached_devices = Vec::new();
1310         {
1311             for handle in virtio_devices {
1312                 let mapping: Option<Arc<IommuMapping>> = if handle.iommu {
1313                     self.iommu_mapping.clone()
1314                 } else {
1315                     None
1316                 };
1317 
1318                 let dev_id = self.add_virtio_pci_device(
1319                     handle.virtio_device,
1320                     &mapping,
1321                     handle.id,
1322                     handle.pci_segment,
1323                     handle.dma_handler,
1324                 )?;
1325 
1326                 if handle.iommu {
1327                     iommu_attached_devices.push(dev_id);
1328                 }
1329             }
1330 
1331             let mut vfio_iommu_device_ids = self.add_vfio_devices()?;
1332             iommu_attached_devices.append(&mut vfio_iommu_device_ids);
1333 
1334             let mut vfio_user_iommu_device_ids = self.add_user_devices()?;
1335             iommu_attached_devices.append(&mut vfio_user_iommu_device_ids);
1336 
1337             // Add all devices from forced iommu segments
1338             if let Some(platform_config) = self.config.lock().unwrap().platform.as_ref() {
1339                 if let Some(iommu_segments) = platform_config.iommu_segments.as_ref() {
1340                     for segment in iommu_segments {
1341                         for device in 0..32 {
1342                             let bdf = PciBdf::new(*segment, 0, device, 0);
1343                             if !iommu_attached_devices.contains(&bdf) {
1344                                 iommu_attached_devices.push(bdf);
1345                             }
1346                         }
1347                     }
1348                 }
1349             }
1350 
1351             if let Some(iommu_device) = iommu_device {
1352                 let dev_id = self.add_virtio_pci_device(iommu_device, &None, iommu_id, 0, None)?;
1353                 self.iommu_attached_devices = Some((dev_id, iommu_attached_devices));
1354             }
1355         }
1356 
1357         for segment in &self.pci_segments {
1358             #[cfg(target_arch = "x86_64")]
1359             if let Some(pci_config_io) = segment.pci_config_io.as_ref() {
1360                 self.bus_devices
1361                     .push(Arc::clone(pci_config_io) as Arc<Mutex<dyn BusDevice>>);
1362             }
1363 
1364             self.bus_devices
1365                 .push(Arc::clone(&segment.pci_config_mmio) as Arc<Mutex<dyn BusDevice>>);
1366         }
1367 
1368         Ok(())
1369     }
1370 
1371     #[cfg(target_arch = "aarch64")]
1372     fn add_interrupt_controller(
1373         &mut self,
1374     ) -> DeviceManagerResult<Arc<Mutex<dyn InterruptController>>> {
1375         let interrupt_controller: Arc<Mutex<gic::Gic>> = Arc::new(Mutex::new(
1376             gic::Gic::new(
1377                 self.config.lock().unwrap().cpus.boot_vcpus,
1378                 Arc::clone(&self.msi_interrupt_manager),
1379                 self.address_manager.vm.clone(),
1380             )
1381             .map_err(DeviceManagerError::CreateInterruptController)?,
1382         ));
1383 
1384         self.interrupt_controller = Some(interrupt_controller.clone());
1385 
1386         // Unlike x86_64, the "interrupt_controller" here for AArch64 is only
1387         // a `Gic` object that implements the `InterruptController` to provide
1388         // interrupt delivery service. This is not the real GIC device so that
1389         // we do not need to insert it to the device tree.
1390 
1391         Ok(interrupt_controller)
1392     }
1393 
1394     #[cfg(target_arch = "aarch64")]
1395     pub fn get_interrupt_controller(&mut self) -> Option<&Arc<Mutex<gic::Gic>>> {
1396         self.interrupt_controller.as_ref()
1397     }
1398 
1399     #[cfg(target_arch = "x86_64")]
1400     fn add_interrupt_controller(
1401         &mut self,
1402     ) -> DeviceManagerResult<Arc<Mutex<dyn InterruptController>>> {
1403         let id = String::from(IOAPIC_DEVICE_NAME);
1404 
1405         // Create IOAPIC
1406         let interrupt_controller = Arc::new(Mutex::new(
1407             ioapic::Ioapic::new(
1408                 id.clone(),
1409                 APIC_START,
1410                 Arc::clone(&self.msi_interrupt_manager),
1411                 versioned_state_from_id(self.snapshot.as_ref(), id.as_str())
1412                     .map_err(DeviceManagerError::RestoreGetState)?,
1413             )
1414             .map_err(DeviceManagerError::CreateInterruptController)?,
1415         ));
1416 
1417         self.interrupt_controller = Some(interrupt_controller.clone());
1418 
1419         self.address_manager
1420             .mmio_bus
1421             .insert(interrupt_controller.clone(), IOAPIC_START.0, IOAPIC_SIZE)
1422             .map_err(DeviceManagerError::BusError)?;
1423 
1424         self.bus_devices
1425             .push(Arc::clone(&interrupt_controller) as Arc<Mutex<dyn BusDevice>>);
1426 
1427         // Fill the device tree with a new node. In case of restore, we
1428         // know there is nothing to do, so we can simply override the
1429         // existing entry.
1430         self.device_tree
1431             .lock()
1432             .unwrap()
1433             .insert(id.clone(), device_node!(id, interrupt_controller));
1434 
1435         Ok(interrupt_controller)
1436     }
1437 
1438     fn add_acpi_devices(
1439         &mut self,
1440         interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>,
1441         reset_evt: EventFd,
1442         exit_evt: EventFd,
1443     ) -> DeviceManagerResult<Option<Arc<Mutex<devices::AcpiGedDevice>>>> {
1444         let shutdown_device = Arc::new(Mutex::new(devices::AcpiShutdownDevice::new(
1445             exit_evt, reset_evt,
1446         )));
1447 
1448         self.bus_devices
1449             .push(Arc::clone(&shutdown_device) as Arc<Mutex<dyn BusDevice>>);
1450 
1451         #[cfg(target_arch = "x86_64")]
1452         {
1453             let shutdown_pio_address: u16 = 0x600;
1454 
1455             self.address_manager
1456                 .allocator
1457                 .lock()
1458                 .unwrap()
1459                 .allocate_io_addresses(Some(GuestAddress(shutdown_pio_address.into())), 0x8, None)
1460                 .ok_or(DeviceManagerError::AllocateIoPort)?;
1461 
1462             self.address_manager
1463                 .io_bus
1464                 .insert(shutdown_device, shutdown_pio_address.into(), 0x4)
1465                 .map_err(DeviceManagerError::BusError)?;
1466 
1467             self.acpi_platform_addresses.sleep_control_reg_address =
1468                 Some(GenericAddress::io_port_address::<u8>(shutdown_pio_address));
1469             self.acpi_platform_addresses.sleep_status_reg_address =
1470                 Some(GenericAddress::io_port_address::<u8>(shutdown_pio_address));
1471             self.acpi_platform_addresses.reset_reg_address =
1472                 Some(GenericAddress::io_port_address::<u8>(shutdown_pio_address));
1473         }
1474 
1475         let ged_irq = self
1476             .address_manager
1477             .allocator
1478             .lock()
1479             .unwrap()
1480             .allocate_irq()
1481             .unwrap();
1482         let interrupt_group = interrupt_manager
1483             .create_group(LegacyIrqGroupConfig {
1484                 irq: ged_irq as InterruptIndex,
1485             })
1486             .map_err(DeviceManagerError::CreateInterruptGroup)?;
1487         let ged_address = self
1488             .address_manager
1489             .allocator
1490             .lock()
1491             .unwrap()
1492             .allocate_platform_mmio_addresses(
1493                 None,
1494                 devices::acpi::GED_DEVICE_ACPI_SIZE as u64,
1495                 None,
1496             )
1497             .ok_or(DeviceManagerError::AllocateMmioAddress)?;
1498         let ged_device = Arc::new(Mutex::new(devices::AcpiGedDevice::new(
1499             interrupt_group,
1500             ged_irq,
1501             ged_address,
1502         )));
1503         self.address_manager
1504             .mmio_bus
1505             .insert(
1506                 ged_device.clone(),
1507                 ged_address.0,
1508                 devices::acpi::GED_DEVICE_ACPI_SIZE as u64,
1509             )
1510             .map_err(DeviceManagerError::BusError)?;
1511         self.bus_devices
1512             .push(Arc::clone(&ged_device) as Arc<Mutex<dyn BusDevice>>);
1513 
1514         let pm_timer_device = Arc::new(Mutex::new(devices::AcpiPmTimerDevice::new()));
1515 
1516         self.bus_devices
1517             .push(Arc::clone(&pm_timer_device) as Arc<Mutex<dyn BusDevice>>);
1518 
1519         #[cfg(target_arch = "x86_64")]
1520         {
1521             let pm_timer_pio_address: u16 = 0x608;
1522 
1523             self.address_manager
1524                 .allocator
1525                 .lock()
1526                 .unwrap()
1527                 .allocate_io_addresses(Some(GuestAddress(pm_timer_pio_address.into())), 0x4, None)
1528                 .ok_or(DeviceManagerError::AllocateIoPort)?;
1529 
1530             self.address_manager
1531                 .io_bus
1532                 .insert(pm_timer_device, pm_timer_pio_address.into(), 0x4)
1533                 .map_err(DeviceManagerError::BusError)?;
1534 
1535             self.acpi_platform_addresses.pm_timer_address =
1536                 Some(GenericAddress::io_port_address::<u32>(pm_timer_pio_address));
1537         }
1538 
1539         Ok(Some(ged_device))
1540     }
1541 
1542     #[cfg(target_arch = "x86_64")]
1543     fn add_legacy_devices(&mut self, reset_evt: EventFd) -> DeviceManagerResult<()> {
1544         // Add a shutdown device (i8042)
1545         let i8042 = Arc::new(Mutex::new(devices::legacy::I8042Device::new(
1546             reset_evt.try_clone().unwrap(),
1547         )));
1548 
1549         self.bus_devices
1550             .push(Arc::clone(&i8042) as Arc<Mutex<dyn BusDevice>>);
1551 
1552         self.address_manager
1553             .io_bus
1554             .insert(i8042, 0x61, 0x4)
1555             .map_err(DeviceManagerError::BusError)?;
1556         {
1557             // Add a CMOS emulated device
1558             let mem_size = self
1559                 .memory_manager
1560                 .lock()
1561                 .unwrap()
1562                 .guest_memory()
1563                 .memory()
1564                 .last_addr()
1565                 .0
1566                 + 1;
1567             let mem_below_4g = std::cmp::min(arch::layout::MEM_32BIT_RESERVED_START.0, mem_size);
1568             let mem_above_4g = mem_size.saturating_sub(arch::layout::RAM_64BIT_START.0);
1569 
1570             let cmos = Arc::new(Mutex::new(devices::legacy::Cmos::new(
1571                 mem_below_4g,
1572                 mem_above_4g,
1573                 reset_evt,
1574             )));
1575 
1576             self.bus_devices
1577                 .push(Arc::clone(&cmos) as Arc<Mutex<dyn BusDevice>>);
1578 
1579             self.address_manager
1580                 .io_bus
1581                 .insert(cmos, 0x70, 0x2)
1582                 .map_err(DeviceManagerError::BusError)?;
1583 
1584             let fwdebug = Arc::new(Mutex::new(devices::legacy::FwDebugDevice::new()));
1585 
1586             self.bus_devices
1587                 .push(Arc::clone(&fwdebug) as Arc<Mutex<dyn BusDevice>>);
1588 
1589             self.address_manager
1590                 .io_bus
1591                 .insert(fwdebug, 0x402, 0x1)
1592                 .map_err(DeviceManagerError::BusError)?;
1593         }
1594 
1595         // 0x80 debug port
1596         let debug_port = Arc::new(Mutex::new(devices::legacy::DebugPort::new(self.timestamp)));
1597         self.bus_devices
1598             .push(Arc::clone(&debug_port) as Arc<Mutex<dyn BusDevice>>);
1599         self.address_manager
1600             .io_bus
1601             .insert(debug_port, 0x80, 0x1)
1602             .map_err(DeviceManagerError::BusError)?;
1603 
1604         Ok(())
1605     }
1606 
1607     #[cfg(target_arch = "aarch64")]
1608     fn add_legacy_devices(
1609         &mut self,
1610         interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>,
1611     ) -> DeviceManagerResult<()> {
1612         // Add a RTC device
1613         let rtc_irq = self
1614             .address_manager
1615             .allocator
1616             .lock()
1617             .unwrap()
1618             .allocate_irq()
1619             .unwrap();
1620 
1621         let interrupt_group = interrupt_manager
1622             .create_group(LegacyIrqGroupConfig {
1623                 irq: rtc_irq as InterruptIndex,
1624             })
1625             .map_err(DeviceManagerError::CreateInterruptGroup)?;
1626 
1627         let rtc_device = Arc::new(Mutex::new(devices::legacy::Rtc::new(interrupt_group)));
1628 
1629         self.bus_devices
1630             .push(Arc::clone(&rtc_device) as Arc<Mutex<dyn BusDevice>>);
1631 
1632         let addr = arch::layout::LEGACY_RTC_MAPPED_IO_START;
1633 
1634         self.address_manager
1635             .mmio_bus
1636             .insert(rtc_device, addr.0, MMIO_LEN)
1637             .map_err(DeviceManagerError::BusError)?;
1638 
1639         self.id_to_dev_info.insert(
1640             (DeviceType::Rtc, "rtc".to_string()),
1641             MmioDeviceInfo {
1642                 addr: addr.0,
1643                 len: MMIO_LEN,
1644                 irq: rtc_irq,
1645             },
1646         );
1647 
1648         // Add a GPIO device
1649         let id = String::from(GPIO_DEVICE_NAME);
1650         let gpio_irq = self
1651             .address_manager
1652             .allocator
1653             .lock()
1654             .unwrap()
1655             .allocate_irq()
1656             .unwrap();
1657 
1658         let interrupt_group = interrupt_manager
1659             .create_group(LegacyIrqGroupConfig {
1660                 irq: gpio_irq as InterruptIndex,
1661             })
1662             .map_err(DeviceManagerError::CreateInterruptGroup)?;
1663 
1664         let gpio_device = Arc::new(Mutex::new(devices::legacy::Gpio::new(
1665             id.clone(),
1666             interrupt_group,
1667             versioned_state_from_id(self.snapshot.as_ref(), id.as_str())
1668                 .map_err(DeviceManagerError::RestoreGetState)?,
1669         )));
1670 
1671         self.bus_devices
1672             .push(Arc::clone(&gpio_device) as Arc<Mutex<dyn BusDevice>>);
1673 
1674         let addr = arch::layout::LEGACY_GPIO_MAPPED_IO_START;
1675 
1676         self.address_manager
1677             .mmio_bus
1678             .insert(gpio_device.clone(), addr.0, MMIO_LEN)
1679             .map_err(DeviceManagerError::BusError)?;
1680 
1681         self.gpio_device = Some(gpio_device.clone());
1682 
1683         self.id_to_dev_info.insert(
1684             (DeviceType::Gpio, "gpio".to_string()),
1685             MmioDeviceInfo {
1686                 addr: addr.0,
1687                 len: MMIO_LEN,
1688                 irq: gpio_irq,
1689             },
1690         );
1691 
1692         self.device_tree
1693             .lock()
1694             .unwrap()
1695             .insert(id.clone(), device_node!(id, gpio_device));
1696 
1697         Ok(())
1698     }
1699 
1700     #[cfg(target_arch = "x86_64")]
1701     fn add_serial_device(
1702         &mut self,
1703         interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>,
1704         serial_writer: Option<Box<dyn io::Write + Send>>,
1705     ) -> DeviceManagerResult<Arc<Mutex<Serial>>> {
1706         // Serial is tied to IRQ #4
1707         let serial_irq = 4;
1708 
1709         let id = String::from(SERIAL_DEVICE_NAME);
1710 
1711         let interrupt_group = interrupt_manager
1712             .create_group(LegacyIrqGroupConfig {
1713                 irq: serial_irq as InterruptIndex,
1714             })
1715             .map_err(DeviceManagerError::CreateInterruptGroup)?;
1716 
1717         let serial = Arc::new(Mutex::new(Serial::new(
1718             id.clone(),
1719             interrupt_group,
1720             serial_writer,
1721             versioned_state_from_id(self.snapshot.as_ref(), id.as_str())
1722                 .map_err(DeviceManagerError::RestoreGetState)?,
1723         )));
1724 
1725         self.bus_devices
1726             .push(Arc::clone(&serial) as Arc<Mutex<dyn BusDevice>>);
1727 
1728         self.address_manager
1729             .allocator
1730             .lock()
1731             .unwrap()
1732             .allocate_io_addresses(Some(GuestAddress(0x3f8)), 0x8, None)
1733             .ok_or(DeviceManagerError::AllocateIoPort)?;
1734 
1735         self.address_manager
1736             .io_bus
1737             .insert(serial.clone(), 0x3f8, 0x8)
1738             .map_err(DeviceManagerError::BusError)?;
1739 
1740         // Fill the device tree with a new node. In case of restore, we
1741         // know there is nothing to do, so we can simply override the
1742         // existing entry.
1743         self.device_tree
1744             .lock()
1745             .unwrap()
1746             .insert(id.clone(), device_node!(id, serial));
1747 
1748         Ok(serial)
1749     }
1750 
1751     #[cfg(target_arch = "aarch64")]
1752     fn add_serial_device(
1753         &mut self,
1754         interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>,
1755         serial_writer: Option<Box<dyn io::Write + Send>>,
1756     ) -> DeviceManagerResult<Arc<Mutex<Pl011>>> {
1757         let id = String::from(SERIAL_DEVICE_NAME);
1758 
1759         let serial_irq = self
1760             .address_manager
1761             .allocator
1762             .lock()
1763             .unwrap()
1764             .allocate_irq()
1765             .unwrap();
1766 
1767         let interrupt_group = interrupt_manager
1768             .create_group(LegacyIrqGroupConfig {
1769                 irq: serial_irq as InterruptIndex,
1770             })
1771             .map_err(DeviceManagerError::CreateInterruptGroup)?;
1772 
1773         let serial = Arc::new(Mutex::new(devices::legacy::Pl011::new(
1774             id.clone(),
1775             interrupt_group,
1776             serial_writer,
1777             self.timestamp,
1778             versioned_state_from_id(self.snapshot.as_ref(), id.as_str())
1779                 .map_err(DeviceManagerError::RestoreGetState)?,
1780         )));
1781 
1782         self.bus_devices
1783             .push(Arc::clone(&serial) as Arc<Mutex<dyn BusDevice>>);
1784 
1785         let addr = arch::layout::LEGACY_SERIAL_MAPPED_IO_START;
1786 
1787         self.address_manager
1788             .mmio_bus
1789             .insert(serial.clone(), addr.0, MMIO_LEN)
1790             .map_err(DeviceManagerError::BusError)?;
1791 
1792         self.id_to_dev_info.insert(
1793             (DeviceType::Serial, DeviceType::Serial.to_string()),
1794             MmioDeviceInfo {
1795                 addr: addr.0,
1796                 len: MMIO_LEN,
1797                 irq: serial_irq,
1798             },
1799         );
1800 
1801         self.cmdline_additions
1802             .push(format!("earlycon=pl011,mmio,0x{:08x}", addr.0));
1803 
1804         // Fill the device tree with a new node. In case of restore, we
1805         // know there is nothing to do, so we can simply override the
1806         // existing entry.
1807         self.device_tree
1808             .lock()
1809             .unwrap()
1810             .insert(id.clone(), device_node!(id, serial));
1811 
1812         Ok(serial)
1813     }
1814 
1815     fn modify_mode<F: FnOnce(&mut termios)>(
1816         &self,
1817         fd: RawFd,
1818         f: F,
1819     ) -> vmm_sys_util::errno::Result<()> {
1820         // SAFETY: safe because we check the return value of isatty.
1821         if unsafe { isatty(fd) } != 1 {
1822             return Ok(());
1823         }
1824 
1825         // SAFETY: The following pair are safe because termios gets totally overwritten by tcgetattr
1826         // and we check the return result.
1827         let mut termios: termios = unsafe { zeroed() };
1828         // SAFETY: see above
1829         let ret = unsafe { tcgetattr(fd, &mut termios as *mut _) };
1830         if ret < 0 {
1831             return vmm_sys_util::errno::errno_result();
1832         }
1833         f(&mut termios);
1834         // SAFETY: Safe because the syscall will only read the extent of termios and we check
1835         // the return result.
1836         let ret = unsafe { tcsetattr(fd, TCSANOW, &termios as *const _) };
1837         if ret < 0 {
1838             return vmm_sys_util::errno::errno_result();
1839         }
1840 
1841         Ok(())
1842     }
1843 
1844     fn set_raw_mode(&self, f: &mut File) -> vmm_sys_util::errno::Result<()> {
1845         // SAFETY: FFI call. Variable t is guaranteed to be a valid termios from modify_mode.
1846         self.modify_mode(f.as_raw_fd(), |t| unsafe { cfmakeraw(t) })
1847     }
1848 
1849     fn listen_for_sigwinch_on_tty(&mut self, pty_main: File, pty_sub: File) -> std::io::Result<()> {
1850         let seccomp_filter = get_seccomp_filter(
1851             &self.seccomp_action,
1852             Thread::PtyForeground,
1853             self.hypervisor_type,
1854         )
1855         .unwrap();
1856 
1857         match start_sigwinch_listener(seccomp_filter, pty_main, pty_sub) {
1858             Ok(pipe) => {
1859                 self.console_resize_pipe = Some(Arc::new(pipe));
1860             }
1861             Err(e) => {
1862                 warn!("Ignoring error from setting up SIGWINCH listener: {}", e)
1863             }
1864         }
1865 
1866         Ok(())
1867     }
1868 
1869     fn add_virtio_console_device(
1870         &mut self,
1871         virtio_devices: &mut Vec<MetaVirtioDevice>,
1872         console_pty: Option<PtyPair>,
1873         resize_pipe: Option<File>,
1874     ) -> DeviceManagerResult<Option<Arc<virtio_devices::ConsoleResizer>>> {
1875         let console_config = self.config.lock().unwrap().console.clone();
1876         let endpoint = match console_config.mode {
1877             ConsoleOutputMode::File => {
1878                 let file = File::create(console_config.file.as_ref().unwrap())
1879                     .map_err(DeviceManagerError::ConsoleOutputFileOpen)?;
1880                 Endpoint::File(file)
1881             }
1882             ConsoleOutputMode::Pty => {
1883                 if let Some(pty) = console_pty {
1884                     self.config.lock().unwrap().console.file = Some(pty.path.clone());
1885                     let file = pty.main.try_clone().unwrap();
1886                     self.console_pty = Some(Arc::new(Mutex::new(pty)));
1887                     self.console_resize_pipe = resize_pipe.map(Arc::new);
1888                     Endpoint::PtyPair(file.try_clone().unwrap(), file)
1889                 } else {
1890                     let (main, mut sub, path) =
1891                         create_pty().map_err(DeviceManagerError::ConsolePtyOpen)?;
1892                     self.set_raw_mode(&mut sub)
1893                         .map_err(DeviceManagerError::SetPtyRaw)?;
1894                     self.config.lock().unwrap().console.file = Some(path.clone());
1895                     let file = main.try_clone().unwrap();
1896                     assert!(resize_pipe.is_none());
1897                     self.listen_for_sigwinch_on_tty(main.try_clone().unwrap(), sub)
1898                         .unwrap();
1899                     self.console_pty = Some(Arc::new(Mutex::new(PtyPair { main, path })));
1900                     Endpoint::PtyPair(file.try_clone().unwrap(), file)
1901                 }
1902             }
1903             ConsoleOutputMode::Tty => {
1904                 // Duplicating the file descriptors like this is needed as otherwise
1905                 // they will be closed on a reboot and the numbers reused
1906 
1907                 // SAFETY: FFI call to dup. Trivially safe.
1908                 let stdout = unsafe { libc::dup(libc::STDOUT_FILENO) };
1909                 if stdout == -1 {
1910                     return vmm_sys_util::errno::errno_result().map_err(DeviceManagerError::DupFd);
1911                 }
1912                 // SAFETY: stdout is valid and owned solely by us.
1913                 let stdout = unsafe { File::from_raw_fd(stdout) };
1914 
1915                 // If an interactive TTY then we can accept input
1916                 // SAFETY: FFI call. Trivially safe.
1917                 if unsafe { libc::isatty(libc::STDIN_FILENO) == 1 } {
1918                     // SAFETY: FFI call to dup. Trivially safe.
1919                     let stdin = unsafe { libc::dup(libc::STDIN_FILENO) };
1920                     if stdin == -1 {
1921                         return vmm_sys_util::errno::errno_result()
1922                             .map_err(DeviceManagerError::DupFd);
1923                     }
1924                     // SAFETY: stdin is valid and owned solely by us.
1925                     let stdin = unsafe { File::from_raw_fd(stdin) };
1926 
1927                     Endpoint::FilePair(stdout, stdin)
1928                 } else {
1929                     Endpoint::File(stdout)
1930                 }
1931             }
1932             ConsoleOutputMode::Null => Endpoint::Null,
1933             ConsoleOutputMode::Off => return Ok(None),
1934         };
1935         let id = String::from(CONSOLE_DEVICE_NAME);
1936 
1937         let (virtio_console_device, console_resizer) = virtio_devices::Console::new(
1938             id.clone(),
1939             endpoint,
1940             self.console_resize_pipe
1941                 .as_ref()
1942                 .map(|p| p.try_clone().unwrap()),
1943             self.force_iommu | console_config.iommu,
1944             self.seccomp_action.clone(),
1945             self.exit_evt
1946                 .try_clone()
1947                 .map_err(DeviceManagerError::EventFd)?,
1948             versioned_state_from_id(self.snapshot.as_ref(), id.as_str())
1949                 .map_err(DeviceManagerError::RestoreGetState)?,
1950         )
1951         .map_err(DeviceManagerError::CreateVirtioConsole)?;
1952         let virtio_console_device = Arc::new(Mutex::new(virtio_console_device));
1953         virtio_devices.push(MetaVirtioDevice {
1954             virtio_device: Arc::clone(&virtio_console_device)
1955                 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
1956             iommu: console_config.iommu,
1957             id: id.clone(),
1958             pci_segment: 0,
1959             dma_handler: None,
1960         });
1961 
1962         // Fill the device tree with a new node. In case of restore, we
1963         // know there is nothing to do, so we can simply override the
1964         // existing entry.
1965         self.device_tree
1966             .lock()
1967             .unwrap()
1968             .insert(id.clone(), device_node!(id, virtio_console_device));
1969 
1970         // Only provide a resizer (for SIGWINCH handling) if the console is attached to the TTY
1971         Ok(if matches!(console_config.mode, ConsoleOutputMode::Tty) {
1972             Some(console_resizer)
1973         } else {
1974             None
1975         })
1976     }
1977 
1978     fn add_console_device(
1979         &mut self,
1980         interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>,
1981         virtio_devices: &mut Vec<MetaVirtioDevice>,
1982         serial_pty: Option<PtyPair>,
1983         console_pty: Option<PtyPair>,
1984         console_resize_pipe: Option<File>,
1985     ) -> DeviceManagerResult<Arc<Console>> {
1986         let serial_config = self.config.lock().unwrap().serial.clone();
1987         let serial_writer: Option<Box<dyn io::Write + Send>> = match serial_config.mode {
1988             ConsoleOutputMode::File => Some(Box::new(
1989                 File::create(serial_config.file.as_ref().unwrap())
1990                     .map_err(DeviceManagerError::SerialOutputFileOpen)?,
1991             )),
1992             ConsoleOutputMode::Pty => {
1993                 if let Some(pty) = serial_pty {
1994                     self.config.lock().unwrap().serial.file = Some(pty.path.clone());
1995                     self.serial_pty = Some(Arc::new(Mutex::new(pty)));
1996                 } else {
1997                     let (main, mut sub, path) =
1998                         create_pty().map_err(DeviceManagerError::SerialPtyOpen)?;
1999                     self.set_raw_mode(&mut sub)
2000                         .map_err(DeviceManagerError::SetPtyRaw)?;
2001                     self.config.lock().unwrap().serial.file = Some(path.clone());
2002                     self.serial_pty = Some(Arc::new(Mutex::new(PtyPair { main, path })));
2003                 }
2004                 None
2005             }
2006             ConsoleOutputMode::Tty => Some(Box::new(stdout())),
2007             ConsoleOutputMode::Off | ConsoleOutputMode::Null => None,
2008         };
2009         if serial_config.mode != ConsoleOutputMode::Off {
2010             let serial = self.add_serial_device(interrupt_manager, serial_writer)?;
2011             self.serial_manager = match serial_config.mode {
2012                 ConsoleOutputMode::Pty | ConsoleOutputMode::Tty => {
2013                     let serial_manager =
2014                         SerialManager::new(serial, self.serial_pty.clone(), serial_config.mode)
2015                             .map_err(DeviceManagerError::CreateSerialManager)?;
2016                     if let Some(mut serial_manager) = serial_manager {
2017                         serial_manager
2018                             .start_thread(
2019                                 self.exit_evt
2020                                     .try_clone()
2021                                     .map_err(DeviceManagerError::EventFd)?,
2022                             )
2023                             .map_err(DeviceManagerError::SpawnSerialManager)?;
2024                         Some(Arc::new(serial_manager))
2025                     } else {
2026                         None
2027                     }
2028                 }
2029                 _ => None,
2030             };
2031         }
2032 
2033         let console_resizer =
2034             self.add_virtio_console_device(virtio_devices, console_pty, console_resize_pipe)?;
2035 
2036         Ok(Arc::new(Console { console_resizer }))
2037     }
2038 
2039     fn add_tpm_device(
2040         &mut self,
2041         tpm_path: PathBuf,
2042     ) -> DeviceManagerResult<Arc<Mutex<devices::tpm::Tpm>>> {
2043         // Create TPM Device
2044         let tpm = devices::tpm::Tpm::new(tpm_path.to_str().unwrap().to_string()).map_err(|e| {
2045             DeviceManagerError::CreateTpmDevice(anyhow!("Failed to create TPM Device : {:?}", e))
2046         })?;
2047         let tpm = Arc::new(Mutex::new(tpm));
2048 
2049         // Add TPM Device to mmio
2050         self.address_manager
2051             .mmio_bus
2052             .insert(
2053                 tpm.clone(),
2054                 arch::layout::TPM_START.0,
2055                 arch::layout::TPM_SIZE,
2056             )
2057             .map_err(DeviceManagerError::BusError)?;
2058 
2059         Ok(tpm)
2060     }
2061 
2062     fn make_virtio_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
2063         let mut devices: Vec<MetaVirtioDevice> = Vec::new();
2064 
2065         // Create "standard" virtio devices (net/block/rng)
2066         devices.append(&mut self.make_virtio_block_devices()?);
2067         devices.append(&mut self.make_virtio_net_devices()?);
2068         devices.append(&mut self.make_virtio_rng_devices()?);
2069 
2070         // Add virtio-fs if required
2071         devices.append(&mut self.make_virtio_fs_devices()?);
2072 
2073         // Add virtio-pmem if required
2074         devices.append(&mut self.make_virtio_pmem_devices()?);
2075 
2076         // Add virtio-vsock if required
2077         devices.append(&mut self.make_virtio_vsock_devices()?);
2078 
2079         devices.append(&mut self.make_virtio_mem_devices()?);
2080 
2081         // Add virtio-balloon if required
2082         devices.append(&mut self.make_virtio_balloon_devices()?);
2083 
2084         // Add virtio-watchdog device
2085         devices.append(&mut self.make_virtio_watchdog_devices()?);
2086 
2087         // Add vDPA devices if required
2088         devices.append(&mut self.make_vdpa_devices()?);
2089 
2090         Ok(devices)
2091     }
2092 
2093     // Cache whether io_uring is supported to avoid probing for very block device
2094     fn io_uring_is_supported(&mut self) -> bool {
2095         if let Some(supported) = self.io_uring_supported {
2096             return supported;
2097         }
2098 
2099         let supported = block_io_uring_is_supported();
2100         self.io_uring_supported = Some(supported);
2101         supported
2102     }
2103 
2104     fn make_virtio_block_device(
2105         &mut self,
2106         disk_cfg: &mut DiskConfig,
2107     ) -> DeviceManagerResult<MetaVirtioDevice> {
2108         let id = if let Some(id) = &disk_cfg.id {
2109             id.clone()
2110         } else {
2111             let id = self.next_device_name(DISK_DEVICE_NAME_PREFIX)?;
2112             disk_cfg.id = Some(id.clone());
2113             id
2114         };
2115 
2116         info!("Creating virtio-block device: {:?}", disk_cfg);
2117 
2118         let snapshot = snapshot_from_id(self.snapshot.as_ref(), id.as_str());
2119 
2120         let (virtio_device, migratable_device) = if disk_cfg.vhost_user {
2121             let socket = disk_cfg.vhost_socket.as_ref().unwrap().clone();
2122             let vu_cfg = VhostUserConfig {
2123                 socket,
2124                 num_queues: disk_cfg.num_queues,
2125                 queue_size: disk_cfg.queue_size,
2126             };
2127             let vhost_user_block = Arc::new(Mutex::new(
2128                 match virtio_devices::vhost_user::Blk::new(
2129                     id.clone(),
2130                     vu_cfg,
2131                     self.seccomp_action.clone(),
2132                     self.exit_evt
2133                         .try_clone()
2134                         .map_err(DeviceManagerError::EventFd)?,
2135                     self.force_iommu,
2136                     snapshot
2137                         .map(|s| s.to_versioned_state(&id))
2138                         .transpose()
2139                         .map_err(DeviceManagerError::RestoreGetState)?,
2140                 ) {
2141                     Ok(vub_device) => vub_device,
2142                     Err(e) => {
2143                         return Err(DeviceManagerError::CreateVhostUserBlk(e));
2144                     }
2145                 },
2146             ));
2147 
2148             (
2149                 Arc::clone(&vhost_user_block) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2150                 vhost_user_block as Arc<Mutex<dyn Migratable>>,
2151             )
2152         } else {
2153             let mut options = OpenOptions::new();
2154             options.read(true);
2155             options.write(!disk_cfg.readonly);
2156             if disk_cfg.direct {
2157                 options.custom_flags(libc::O_DIRECT);
2158             }
2159             // Open block device path
2160             let mut file: File = options
2161                 .open(
2162                     disk_cfg
2163                         .path
2164                         .as_ref()
2165                         .ok_or(DeviceManagerError::NoDiskPath)?
2166                         .clone(),
2167                 )
2168                 .map_err(DeviceManagerError::Disk)?;
2169             let image_type =
2170                 detect_image_type(&mut file).map_err(DeviceManagerError::DetectImageType)?;
2171 
2172             let image = match image_type {
2173                 ImageType::FixedVhd => {
2174                     // Use asynchronous backend relying on io_uring if the
2175                     // syscalls are supported.
2176                     if !disk_cfg.disable_io_uring && self.io_uring_is_supported() {
2177                         info!("Using asynchronous fixed VHD disk file (io_uring)");
2178                         Box::new(
2179                             FixedVhdDiskAsync::new(file)
2180                                 .map_err(DeviceManagerError::CreateFixedVhdDiskAsync)?,
2181                         ) as Box<dyn DiskFile>
2182                     } else {
2183                         info!("Using synchronous fixed VHD disk file");
2184                         Box::new(
2185                             FixedVhdDiskSync::new(file)
2186                                 .map_err(DeviceManagerError::CreateFixedVhdDiskSync)?,
2187                         ) as Box<dyn DiskFile>
2188                     }
2189                 }
2190                 ImageType::Raw => {
2191                     // Use asynchronous backend relying on io_uring if the
2192                     // syscalls are supported.
2193                     if !disk_cfg.disable_io_uring && self.io_uring_is_supported() {
2194                         info!("Using asynchronous RAW disk file (io_uring)");
2195                         Box::new(RawFileDisk::new(file)) as Box<dyn DiskFile>
2196                     } else {
2197                         info!("Using synchronous RAW disk file");
2198                         Box::new(RawFileDiskSync::new(file)) as Box<dyn DiskFile>
2199                     }
2200                 }
2201                 ImageType::Qcow2 => {
2202                     info!("Using synchronous QCOW disk file");
2203                     Box::new(
2204                         QcowDiskSync::new(file, disk_cfg.direct)
2205                             .map_err(DeviceManagerError::CreateQcowDiskSync)?,
2206                     ) as Box<dyn DiskFile>
2207                 }
2208                 ImageType::Vhdx => {
2209                     info!("Using synchronous VHDX disk file");
2210                     Box::new(
2211                         VhdxDiskSync::new(file)
2212                             .map_err(DeviceManagerError::CreateFixedVhdxDiskSync)?,
2213                     ) as Box<dyn DiskFile>
2214                 }
2215             };
2216 
2217             let virtio_block = Arc::new(Mutex::new(
2218                 virtio_devices::Block::new(
2219                     id.clone(),
2220                     image,
2221                     disk_cfg
2222                         .path
2223                         .as_ref()
2224                         .ok_or(DeviceManagerError::NoDiskPath)?
2225                         .clone(),
2226                     disk_cfg.readonly,
2227                     self.force_iommu | disk_cfg.iommu,
2228                     disk_cfg.num_queues,
2229                     disk_cfg.queue_size,
2230                     self.seccomp_action.clone(),
2231                     disk_cfg.rate_limiter_config,
2232                     self.exit_evt
2233                         .try_clone()
2234                         .map_err(DeviceManagerError::EventFd)?,
2235                     snapshot
2236                         .map(|s| s.to_versioned_state(&id))
2237                         .transpose()
2238                         .map_err(DeviceManagerError::RestoreGetState)?,
2239                 )
2240                 .map_err(DeviceManagerError::CreateVirtioBlock)?,
2241             ));
2242 
2243             (
2244                 Arc::clone(&virtio_block) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2245                 virtio_block as Arc<Mutex<dyn Migratable>>,
2246             )
2247         };
2248 
2249         // Fill the device tree with a new node. In case of restore, we
2250         // know there is nothing to do, so we can simply override the
2251         // existing entry.
2252         self.device_tree
2253             .lock()
2254             .unwrap()
2255             .insert(id.clone(), device_node!(id, migratable_device));
2256 
2257         Ok(MetaVirtioDevice {
2258             virtio_device,
2259             iommu: disk_cfg.iommu,
2260             id,
2261             pci_segment: disk_cfg.pci_segment,
2262             dma_handler: None,
2263         })
2264     }
2265 
2266     fn make_virtio_block_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
2267         let mut devices = Vec::new();
2268 
2269         let mut block_devices = self.config.lock().unwrap().disks.clone();
2270         if let Some(disk_list_cfg) = &mut block_devices {
2271             for disk_cfg in disk_list_cfg.iter_mut() {
2272                 devices.push(self.make_virtio_block_device(disk_cfg)?);
2273             }
2274         }
2275         self.config.lock().unwrap().disks = block_devices;
2276 
2277         Ok(devices)
2278     }
2279 
2280     fn make_virtio_net_device(
2281         &mut self,
2282         net_cfg: &mut NetConfig,
2283     ) -> DeviceManagerResult<MetaVirtioDevice> {
2284         let id = if let Some(id) = &net_cfg.id {
2285             id.clone()
2286         } else {
2287             let id = self.next_device_name(NET_DEVICE_NAME_PREFIX)?;
2288             net_cfg.id = Some(id.clone());
2289             id
2290         };
2291         info!("Creating virtio-net device: {:?}", net_cfg);
2292 
2293         let snapshot = snapshot_from_id(self.snapshot.as_ref(), id.as_str());
2294 
2295         let (virtio_device, migratable_device) = if net_cfg.vhost_user {
2296             let socket = net_cfg.vhost_socket.as_ref().unwrap().clone();
2297             let vu_cfg = VhostUserConfig {
2298                 socket,
2299                 num_queues: net_cfg.num_queues,
2300                 queue_size: net_cfg.queue_size,
2301             };
2302             let server = match net_cfg.vhost_mode {
2303                 VhostMode::Client => false,
2304                 VhostMode::Server => true,
2305             };
2306             let vhost_user_net = Arc::new(Mutex::new(
2307                 match virtio_devices::vhost_user::Net::new(
2308                     id.clone(),
2309                     net_cfg.mac,
2310                     net_cfg.mtu,
2311                     vu_cfg,
2312                     server,
2313                     self.seccomp_action.clone(),
2314                     self.exit_evt
2315                         .try_clone()
2316                         .map_err(DeviceManagerError::EventFd)?,
2317                     self.force_iommu,
2318                     snapshot
2319                         .map(|s| s.to_versioned_state(&id))
2320                         .transpose()
2321                         .map_err(DeviceManagerError::RestoreGetState)?,
2322                 ) {
2323                     Ok(vun_device) => vun_device,
2324                     Err(e) => {
2325                         return Err(DeviceManagerError::CreateVhostUserNet(e));
2326                     }
2327                 },
2328             ));
2329 
2330             (
2331                 Arc::clone(&vhost_user_net) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2332                 vhost_user_net as Arc<Mutex<dyn Migratable>>,
2333             )
2334         } else {
2335             let state = snapshot
2336                 .map(|s| s.to_versioned_state(&id))
2337                 .transpose()
2338                 .map_err(DeviceManagerError::RestoreGetState)?;
2339 
2340             let virtio_net = if let Some(ref tap_if_name) = net_cfg.tap {
2341                 Arc::new(Mutex::new(
2342                     virtio_devices::Net::new(
2343                         id.clone(),
2344                         Some(tap_if_name),
2345                         None,
2346                         None,
2347                         Some(net_cfg.mac),
2348                         &mut net_cfg.host_mac,
2349                         net_cfg.mtu,
2350                         self.force_iommu | net_cfg.iommu,
2351                         net_cfg.num_queues,
2352                         net_cfg.queue_size,
2353                         self.seccomp_action.clone(),
2354                         net_cfg.rate_limiter_config,
2355                         self.exit_evt
2356                             .try_clone()
2357                             .map_err(DeviceManagerError::EventFd)?,
2358                         state,
2359                     )
2360                     .map_err(DeviceManagerError::CreateVirtioNet)?,
2361                 ))
2362             } else if let Some(fds) = &net_cfg.fds {
2363                 Arc::new(Mutex::new(
2364                     virtio_devices::Net::from_tap_fds(
2365                         id.clone(),
2366                         fds,
2367                         Some(net_cfg.mac),
2368                         net_cfg.mtu,
2369                         self.force_iommu | net_cfg.iommu,
2370                         net_cfg.queue_size,
2371                         self.seccomp_action.clone(),
2372                         net_cfg.rate_limiter_config,
2373                         self.exit_evt
2374                             .try_clone()
2375                             .map_err(DeviceManagerError::EventFd)?,
2376                         state,
2377                     )
2378                     .map_err(DeviceManagerError::CreateVirtioNet)?,
2379                 ))
2380             } else {
2381                 Arc::new(Mutex::new(
2382                     virtio_devices::Net::new(
2383                         id.clone(),
2384                         None,
2385                         Some(net_cfg.ip),
2386                         Some(net_cfg.mask),
2387                         Some(net_cfg.mac),
2388                         &mut net_cfg.host_mac,
2389                         net_cfg.mtu,
2390                         self.force_iommu | net_cfg.iommu,
2391                         net_cfg.num_queues,
2392                         net_cfg.queue_size,
2393                         self.seccomp_action.clone(),
2394                         net_cfg.rate_limiter_config,
2395                         self.exit_evt
2396                             .try_clone()
2397                             .map_err(DeviceManagerError::EventFd)?,
2398                         state,
2399                     )
2400                     .map_err(DeviceManagerError::CreateVirtioNet)?,
2401                 ))
2402             };
2403 
2404             (
2405                 Arc::clone(&virtio_net) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2406                 virtio_net as Arc<Mutex<dyn Migratable>>,
2407             )
2408         };
2409 
2410         // Fill the device tree with a new node. In case of restore, we
2411         // know there is nothing to do, so we can simply override the
2412         // existing entry.
2413         self.device_tree
2414             .lock()
2415             .unwrap()
2416             .insert(id.clone(), device_node!(id, migratable_device));
2417 
2418         Ok(MetaVirtioDevice {
2419             virtio_device,
2420             iommu: net_cfg.iommu,
2421             id,
2422             pci_segment: net_cfg.pci_segment,
2423             dma_handler: None,
2424         })
2425     }
2426 
2427     /// Add virto-net and vhost-user-net devices
2428     fn make_virtio_net_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
2429         let mut devices = Vec::new();
2430         let mut net_devices = self.config.lock().unwrap().net.clone();
2431         if let Some(net_list_cfg) = &mut net_devices {
2432             for net_cfg in net_list_cfg.iter_mut() {
2433                 devices.push(self.make_virtio_net_device(net_cfg)?);
2434             }
2435         }
2436         self.config.lock().unwrap().net = net_devices;
2437 
2438         Ok(devices)
2439     }
2440 
2441     fn make_virtio_rng_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
2442         let mut devices = Vec::new();
2443 
2444         // Add virtio-rng if required
2445         let rng_config = self.config.lock().unwrap().rng.clone();
2446         if let Some(rng_path) = rng_config.src.to_str() {
2447             info!("Creating virtio-rng device: {:?}", rng_config);
2448             let id = String::from(RNG_DEVICE_NAME);
2449 
2450             let virtio_rng_device = Arc::new(Mutex::new(
2451                 virtio_devices::Rng::new(
2452                     id.clone(),
2453                     rng_path,
2454                     self.force_iommu | rng_config.iommu,
2455                     self.seccomp_action.clone(),
2456                     self.exit_evt
2457                         .try_clone()
2458                         .map_err(DeviceManagerError::EventFd)?,
2459                     versioned_state_from_id(self.snapshot.as_ref(), id.as_str())
2460                         .map_err(DeviceManagerError::RestoreGetState)?,
2461                 )
2462                 .map_err(DeviceManagerError::CreateVirtioRng)?,
2463             ));
2464             devices.push(MetaVirtioDevice {
2465                 virtio_device: Arc::clone(&virtio_rng_device)
2466                     as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2467                 iommu: rng_config.iommu,
2468                 id: id.clone(),
2469                 pci_segment: 0,
2470                 dma_handler: None,
2471             });
2472 
2473             // Fill the device tree with a new node. In case of restore, we
2474             // know there is nothing to do, so we can simply override the
2475             // existing entry.
2476             self.device_tree
2477                 .lock()
2478                 .unwrap()
2479                 .insert(id.clone(), device_node!(id, virtio_rng_device));
2480         }
2481 
2482         Ok(devices)
2483     }
2484 
2485     fn make_virtio_fs_device(
2486         &mut self,
2487         fs_cfg: &mut FsConfig,
2488     ) -> DeviceManagerResult<MetaVirtioDevice> {
2489         let id = if let Some(id) = &fs_cfg.id {
2490             id.clone()
2491         } else {
2492             let id = self.next_device_name(FS_DEVICE_NAME_PREFIX)?;
2493             fs_cfg.id = Some(id.clone());
2494             id
2495         };
2496 
2497         info!("Creating virtio-fs device: {:?}", fs_cfg);
2498 
2499         let mut node = device_node!(id);
2500 
2501         if let Some(fs_socket) = fs_cfg.socket.to_str() {
2502             let virtio_fs_device = Arc::new(Mutex::new(
2503                 virtio_devices::vhost_user::Fs::new(
2504                     id.clone(),
2505                     fs_socket,
2506                     &fs_cfg.tag,
2507                     fs_cfg.num_queues,
2508                     fs_cfg.queue_size,
2509                     None,
2510                     self.seccomp_action.clone(),
2511                     self.exit_evt
2512                         .try_clone()
2513                         .map_err(DeviceManagerError::EventFd)?,
2514                     self.force_iommu,
2515                     versioned_state_from_id(self.snapshot.as_ref(), id.as_str())
2516                         .map_err(DeviceManagerError::RestoreGetState)?,
2517                 )
2518                 .map_err(DeviceManagerError::CreateVirtioFs)?,
2519             ));
2520 
2521             // Update the device tree with the migratable device.
2522             node.migratable = Some(Arc::clone(&virtio_fs_device) as Arc<Mutex<dyn Migratable>>);
2523             self.device_tree.lock().unwrap().insert(id.clone(), node);
2524 
2525             Ok(MetaVirtioDevice {
2526                 virtio_device: Arc::clone(&virtio_fs_device)
2527                     as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2528                 iommu: false,
2529                 id,
2530                 pci_segment: fs_cfg.pci_segment,
2531                 dma_handler: None,
2532             })
2533         } else {
2534             Err(DeviceManagerError::NoVirtioFsSock)
2535         }
2536     }
2537 
2538     fn make_virtio_fs_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
2539         let mut devices = Vec::new();
2540 
2541         let mut fs_devices = self.config.lock().unwrap().fs.clone();
2542         if let Some(fs_list_cfg) = &mut fs_devices {
2543             for fs_cfg in fs_list_cfg.iter_mut() {
2544                 devices.push(self.make_virtio_fs_device(fs_cfg)?);
2545             }
2546         }
2547         self.config.lock().unwrap().fs = fs_devices;
2548 
2549         Ok(devices)
2550     }
2551 
2552     fn make_virtio_pmem_device(
2553         &mut self,
2554         pmem_cfg: &mut PmemConfig,
2555     ) -> DeviceManagerResult<MetaVirtioDevice> {
2556         let id = if let Some(id) = &pmem_cfg.id {
2557             id.clone()
2558         } else {
2559             let id = self.next_device_name(PMEM_DEVICE_NAME_PREFIX)?;
2560             pmem_cfg.id = Some(id.clone());
2561             id
2562         };
2563 
2564         info!("Creating virtio-pmem device: {:?}", pmem_cfg);
2565 
2566         let mut node = device_node!(id);
2567 
2568         // Look for the id in the device tree. If it can be found, that means
2569         // the device is being restored, otherwise it's created from scratch.
2570         let region_range = if let Some(node) = self.device_tree.lock().unwrap().get(&id) {
2571             info!("Restoring virtio-pmem {} resources", id);
2572 
2573             let mut region_range: Option<(u64, u64)> = None;
2574             for resource in node.resources.iter() {
2575                 match resource {
2576                     Resource::MmioAddressRange { base, size } => {
2577                         if region_range.is_some() {
2578                             return Err(DeviceManagerError::ResourceAlreadyExists);
2579                         }
2580 
2581                         region_range = Some((*base, *size));
2582                     }
2583                     _ => {
2584                         error!("Unexpected resource {:?} for {}", resource, id);
2585                     }
2586                 }
2587             }
2588 
2589             if region_range.is_none() {
2590                 return Err(DeviceManagerError::MissingVirtioPmemResources);
2591             }
2592 
2593             region_range
2594         } else {
2595             None
2596         };
2597 
2598         let (custom_flags, set_len) = if pmem_cfg.file.is_dir() {
2599             if pmem_cfg.size.is_none() {
2600                 return Err(DeviceManagerError::PmemWithDirectorySizeMissing);
2601             }
2602             (O_TMPFILE, true)
2603         } else {
2604             (0, false)
2605         };
2606 
2607         let mut file = OpenOptions::new()
2608             .read(true)
2609             .write(!pmem_cfg.discard_writes)
2610             .custom_flags(custom_flags)
2611             .open(&pmem_cfg.file)
2612             .map_err(DeviceManagerError::PmemFileOpen)?;
2613 
2614         let size = if let Some(size) = pmem_cfg.size {
2615             if set_len {
2616                 file.set_len(size)
2617                     .map_err(DeviceManagerError::PmemFileSetLen)?;
2618             }
2619             size
2620         } else {
2621             file.seek(SeekFrom::End(0))
2622                 .map_err(DeviceManagerError::PmemFileSetLen)?
2623         };
2624 
2625         if size % 0x20_0000 != 0 {
2626             return Err(DeviceManagerError::PmemSizeNotAligned);
2627         }
2628 
2629         let (region_base, region_size) = if let Some((base, size)) = region_range {
2630             // The memory needs to be 2MiB aligned in order to support
2631             // hugepages.
2632             self.pci_segments[pmem_cfg.pci_segment as usize]
2633                 .allocator
2634                 .lock()
2635                 .unwrap()
2636                 .allocate(
2637                     Some(GuestAddress(base)),
2638                     size as GuestUsize,
2639                     Some(0x0020_0000),
2640                 )
2641                 .ok_or(DeviceManagerError::PmemRangeAllocation)?;
2642 
2643             (base, size)
2644         } else {
2645             // The memory needs to be 2MiB aligned in order to support
2646             // hugepages.
2647             let base = self.pci_segments[pmem_cfg.pci_segment as usize]
2648                 .allocator
2649                 .lock()
2650                 .unwrap()
2651                 .allocate(None, size as GuestUsize, Some(0x0020_0000))
2652                 .ok_or(DeviceManagerError::PmemRangeAllocation)?;
2653 
2654             (base.raw_value(), size)
2655         };
2656 
2657         let cloned_file = file.try_clone().map_err(DeviceManagerError::CloneFile)?;
2658         let mmap_region = MmapRegion::build(
2659             Some(FileOffset::new(cloned_file, 0)),
2660             region_size as usize,
2661             PROT_READ | PROT_WRITE,
2662             MAP_NORESERVE
2663                 | if pmem_cfg.discard_writes {
2664                     MAP_PRIVATE
2665                 } else {
2666                     MAP_SHARED
2667                 },
2668         )
2669         .map_err(DeviceManagerError::NewMmapRegion)?;
2670         let host_addr: u64 = mmap_region.as_ptr() as u64;
2671 
2672         let mem_slot = self
2673             .memory_manager
2674             .lock()
2675             .unwrap()
2676             .create_userspace_mapping(region_base, region_size, host_addr, false, false, false)
2677             .map_err(DeviceManagerError::MemoryManager)?;
2678 
2679         let mapping = virtio_devices::UserspaceMapping {
2680             host_addr,
2681             mem_slot,
2682             addr: GuestAddress(region_base),
2683             len: region_size,
2684             mergeable: false,
2685         };
2686 
2687         let virtio_pmem_device = Arc::new(Mutex::new(
2688             virtio_devices::Pmem::new(
2689                 id.clone(),
2690                 file,
2691                 GuestAddress(region_base),
2692                 mapping,
2693                 mmap_region,
2694                 self.force_iommu | pmem_cfg.iommu,
2695                 self.seccomp_action.clone(),
2696                 self.exit_evt
2697                     .try_clone()
2698                     .map_err(DeviceManagerError::EventFd)?,
2699                 versioned_state_from_id(self.snapshot.as_ref(), id.as_str())
2700                     .map_err(DeviceManagerError::RestoreGetState)?,
2701             )
2702             .map_err(DeviceManagerError::CreateVirtioPmem)?,
2703         ));
2704 
2705         // Update the device tree with correct resource information and with
2706         // the migratable device.
2707         node.resources.push(Resource::MmioAddressRange {
2708             base: region_base,
2709             size: region_size,
2710         });
2711         node.migratable = Some(Arc::clone(&virtio_pmem_device) as Arc<Mutex<dyn Migratable>>);
2712         self.device_tree.lock().unwrap().insert(id.clone(), node);
2713 
2714         Ok(MetaVirtioDevice {
2715             virtio_device: Arc::clone(&virtio_pmem_device)
2716                 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2717             iommu: pmem_cfg.iommu,
2718             id,
2719             pci_segment: pmem_cfg.pci_segment,
2720             dma_handler: None,
2721         })
2722     }
2723 
2724     fn make_virtio_pmem_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
2725         let mut devices = Vec::new();
2726         // Add virtio-pmem if required
2727         let mut pmem_devices = self.config.lock().unwrap().pmem.clone();
2728         if let Some(pmem_list_cfg) = &mut pmem_devices {
2729             for pmem_cfg in pmem_list_cfg.iter_mut() {
2730                 devices.push(self.make_virtio_pmem_device(pmem_cfg)?);
2731             }
2732         }
2733         self.config.lock().unwrap().pmem = pmem_devices;
2734 
2735         Ok(devices)
2736     }
2737 
2738     fn make_virtio_vsock_device(
2739         &mut self,
2740         vsock_cfg: &mut VsockConfig,
2741     ) -> DeviceManagerResult<MetaVirtioDevice> {
2742         let id = if let Some(id) = &vsock_cfg.id {
2743             id.clone()
2744         } else {
2745             let id = self.next_device_name(VSOCK_DEVICE_NAME_PREFIX)?;
2746             vsock_cfg.id = Some(id.clone());
2747             id
2748         };
2749 
2750         info!("Creating virtio-vsock device: {:?}", vsock_cfg);
2751 
2752         let socket_path = vsock_cfg
2753             .socket
2754             .to_str()
2755             .ok_or(DeviceManagerError::CreateVsockConvertPath)?;
2756         let backend =
2757             virtio_devices::vsock::VsockUnixBackend::new(vsock_cfg.cid, socket_path.to_string())
2758                 .map_err(DeviceManagerError::CreateVsockBackend)?;
2759 
2760         let vsock_device = Arc::new(Mutex::new(
2761             virtio_devices::Vsock::new(
2762                 id.clone(),
2763                 vsock_cfg.cid,
2764                 vsock_cfg.socket.clone(),
2765                 backend,
2766                 self.force_iommu | vsock_cfg.iommu,
2767                 self.seccomp_action.clone(),
2768                 self.exit_evt
2769                     .try_clone()
2770                     .map_err(DeviceManagerError::EventFd)?,
2771                 versioned_state_from_id(self.snapshot.as_ref(), id.as_str())
2772                     .map_err(DeviceManagerError::RestoreGetState)?,
2773             )
2774             .map_err(DeviceManagerError::CreateVirtioVsock)?,
2775         ));
2776 
2777         // Fill the device tree with a new node. In case of restore, we
2778         // know there is nothing to do, so we can simply override the
2779         // existing entry.
2780         self.device_tree
2781             .lock()
2782             .unwrap()
2783             .insert(id.clone(), device_node!(id, vsock_device));
2784 
2785         Ok(MetaVirtioDevice {
2786             virtio_device: Arc::clone(&vsock_device)
2787                 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2788             iommu: vsock_cfg.iommu,
2789             id,
2790             pci_segment: vsock_cfg.pci_segment,
2791             dma_handler: None,
2792         })
2793     }
2794 
2795     fn make_virtio_vsock_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
2796         let mut devices = Vec::new();
2797 
2798         let mut vsock = self.config.lock().unwrap().vsock.clone();
2799         if let Some(ref mut vsock_cfg) = &mut vsock {
2800             devices.push(self.make_virtio_vsock_device(vsock_cfg)?);
2801         }
2802         self.config.lock().unwrap().vsock = vsock;
2803 
2804         Ok(devices)
2805     }
2806 
2807     fn make_virtio_mem_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
2808         let mut devices = Vec::new();
2809 
2810         let mm = self.memory_manager.clone();
2811         let mut mm = mm.lock().unwrap();
2812         for (memory_zone_id, memory_zone) in mm.memory_zones_mut().iter_mut() {
2813             if let Some(virtio_mem_zone) = memory_zone.virtio_mem_zone_mut() {
2814                 info!("Creating virtio-mem device: id = {}", memory_zone_id);
2815 
2816                 let node_id = numa_node_id_from_memory_zone_id(&self.numa_nodes, memory_zone_id)
2817                     .map(|i| i as u16);
2818 
2819                 let virtio_mem_device = Arc::new(Mutex::new(
2820                     virtio_devices::Mem::new(
2821                         memory_zone_id.clone(),
2822                         virtio_mem_zone.region(),
2823                         self.seccomp_action.clone(),
2824                         node_id,
2825                         virtio_mem_zone.hotplugged_size(),
2826                         virtio_mem_zone.hugepages(),
2827                         self.exit_evt
2828                             .try_clone()
2829                             .map_err(DeviceManagerError::EventFd)?,
2830                         virtio_mem_zone.blocks_state().clone(),
2831                         versioned_state_from_id(self.snapshot.as_ref(), memory_zone_id.as_str())
2832                             .map_err(DeviceManagerError::RestoreGetState)?,
2833                     )
2834                     .map_err(DeviceManagerError::CreateVirtioMem)?,
2835                 ));
2836 
2837                 // Update the virtio-mem zone so that it has a handle onto the
2838                 // virtio-mem device, which will be used for triggering a resize
2839                 // if needed.
2840                 virtio_mem_zone.set_virtio_device(Arc::clone(&virtio_mem_device));
2841 
2842                 self.virtio_mem_devices.push(Arc::clone(&virtio_mem_device));
2843 
2844                 devices.push(MetaVirtioDevice {
2845                     virtio_device: Arc::clone(&virtio_mem_device)
2846                         as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2847                     iommu: false,
2848                     id: memory_zone_id.clone(),
2849                     pci_segment: 0,
2850                     dma_handler: None,
2851                 });
2852 
2853                 // Fill the device tree with a new node. In case of restore, we
2854                 // know there is nothing to do, so we can simply override the
2855                 // existing entry.
2856                 self.device_tree.lock().unwrap().insert(
2857                     memory_zone_id.clone(),
2858                     device_node!(memory_zone_id, virtio_mem_device),
2859                 );
2860             }
2861         }
2862 
2863         Ok(devices)
2864     }
2865 
2866     fn make_virtio_balloon_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
2867         let mut devices = Vec::new();
2868 
2869         if let Some(balloon_config) = &self.config.lock().unwrap().balloon {
2870             let id = String::from(BALLOON_DEVICE_NAME);
2871             info!("Creating virtio-balloon device: id = {}", id);
2872 
2873             let virtio_balloon_device = Arc::new(Mutex::new(
2874                 virtio_devices::Balloon::new(
2875                     id.clone(),
2876                     balloon_config.size,
2877                     balloon_config.deflate_on_oom,
2878                     balloon_config.free_page_reporting,
2879                     self.seccomp_action.clone(),
2880                     self.exit_evt
2881                         .try_clone()
2882                         .map_err(DeviceManagerError::EventFd)?,
2883                     versioned_state_from_id(self.snapshot.as_ref(), id.as_str())
2884                         .map_err(DeviceManagerError::RestoreGetState)?,
2885                 )
2886                 .map_err(DeviceManagerError::CreateVirtioBalloon)?,
2887             ));
2888 
2889             self.balloon = Some(virtio_balloon_device.clone());
2890 
2891             devices.push(MetaVirtioDevice {
2892                 virtio_device: Arc::clone(&virtio_balloon_device)
2893                     as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2894                 iommu: false,
2895                 id: id.clone(),
2896                 pci_segment: 0,
2897                 dma_handler: None,
2898             });
2899 
2900             self.device_tree
2901                 .lock()
2902                 .unwrap()
2903                 .insert(id.clone(), device_node!(id, virtio_balloon_device));
2904         }
2905 
2906         Ok(devices)
2907     }
2908 
2909     fn make_virtio_watchdog_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
2910         let mut devices = Vec::new();
2911 
2912         if !self.config.lock().unwrap().watchdog {
2913             return Ok(devices);
2914         }
2915 
2916         let id = String::from(WATCHDOG_DEVICE_NAME);
2917         info!("Creating virtio-watchdog device: id = {}", id);
2918 
2919         let virtio_watchdog_device = Arc::new(Mutex::new(
2920             virtio_devices::Watchdog::new(
2921                 id.clone(),
2922                 self.reset_evt.try_clone().unwrap(),
2923                 self.seccomp_action.clone(),
2924                 self.exit_evt
2925                     .try_clone()
2926                     .map_err(DeviceManagerError::EventFd)?,
2927                 versioned_state_from_id(self.snapshot.as_ref(), id.as_str())
2928                     .map_err(DeviceManagerError::RestoreGetState)?,
2929             )
2930             .map_err(DeviceManagerError::CreateVirtioWatchdog)?,
2931         ));
2932         devices.push(MetaVirtioDevice {
2933             virtio_device: Arc::clone(&virtio_watchdog_device)
2934                 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2935             iommu: false,
2936             id: id.clone(),
2937             pci_segment: 0,
2938             dma_handler: None,
2939         });
2940 
2941         self.device_tree
2942             .lock()
2943             .unwrap()
2944             .insert(id.clone(), device_node!(id, virtio_watchdog_device));
2945 
2946         Ok(devices)
2947     }
2948 
2949     fn make_vdpa_device(
2950         &mut self,
2951         vdpa_cfg: &mut VdpaConfig,
2952     ) -> DeviceManagerResult<MetaVirtioDevice> {
2953         let id = if let Some(id) = &vdpa_cfg.id {
2954             id.clone()
2955         } else {
2956             let id = self.next_device_name(VDPA_DEVICE_NAME_PREFIX)?;
2957             vdpa_cfg.id = Some(id.clone());
2958             id
2959         };
2960 
2961         info!("Creating vDPA device: {:?}", vdpa_cfg);
2962 
2963         let device_path = vdpa_cfg
2964             .path
2965             .to_str()
2966             .ok_or(DeviceManagerError::CreateVdpaConvertPath)?;
2967 
2968         let vdpa_device = Arc::new(Mutex::new(
2969             virtio_devices::Vdpa::new(
2970                 id.clone(),
2971                 device_path,
2972                 self.memory_manager.lock().unwrap().guest_memory(),
2973                 vdpa_cfg.num_queues as u16,
2974                 versioned_state_from_id(self.snapshot.as_ref(), id.as_str())
2975                     .map_err(DeviceManagerError::RestoreGetState)?,
2976             )
2977             .map_err(DeviceManagerError::CreateVdpa)?,
2978         ));
2979 
2980         // Create the DMA handler that is required by the vDPA device
2981         let vdpa_mapping = Arc::new(VdpaDmaMapping::new(
2982             Arc::clone(&vdpa_device),
2983             Arc::new(self.memory_manager.lock().unwrap().guest_memory()),
2984         ));
2985 
2986         self.device_tree
2987             .lock()
2988             .unwrap()
2989             .insert(id.clone(), device_node!(id, vdpa_device));
2990 
2991         Ok(MetaVirtioDevice {
2992             virtio_device: vdpa_device as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2993             iommu: vdpa_cfg.iommu,
2994             id,
2995             pci_segment: vdpa_cfg.pci_segment,
2996             dma_handler: Some(vdpa_mapping),
2997         })
2998     }
2999 
3000     fn make_vdpa_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
3001         let mut devices = Vec::new();
3002         // Add vdpa if required
3003         let mut vdpa_devices = self.config.lock().unwrap().vdpa.clone();
3004         if let Some(vdpa_list_cfg) = &mut vdpa_devices {
3005             for vdpa_cfg in vdpa_list_cfg.iter_mut() {
3006                 devices.push(self.make_vdpa_device(vdpa_cfg)?);
3007             }
3008         }
3009         self.config.lock().unwrap().vdpa = vdpa_devices;
3010 
3011         Ok(devices)
3012     }
3013 
3014     fn next_device_name(&mut self, prefix: &str) -> DeviceManagerResult<String> {
3015         let start_id = self.device_id_cnt;
3016         loop {
3017             // Generate the temporary name.
3018             let name = format!("{}{}", prefix, self.device_id_cnt);
3019             // Increment the counter.
3020             self.device_id_cnt += Wrapping(1);
3021             // Check if the name is already in use.
3022             if !self.boot_id_list.contains(&name)
3023                 && !self.device_tree.lock().unwrap().contains_key(&name)
3024             {
3025                 return Ok(name);
3026             }
3027 
3028             if self.device_id_cnt == start_id {
3029                 // We went through a full loop and there's nothing else we can
3030                 // do.
3031                 break;
3032             }
3033         }
3034         Err(DeviceManagerError::NoAvailableDeviceName)
3035     }
3036 
3037     fn add_passthrough_device(
3038         &mut self,
3039         device_cfg: &mut DeviceConfig,
3040     ) -> DeviceManagerResult<(PciBdf, String)> {
3041         // If the passthrough device has not been created yet, it is created
3042         // here and stored in the DeviceManager structure for future needs.
3043         if self.passthrough_device.is_none() {
3044             self.passthrough_device = Some(
3045                 self.address_manager
3046                     .vm
3047                     .create_passthrough_device()
3048                     .map_err(|e| DeviceManagerError::CreatePassthroughDevice(e.into()))?,
3049             );
3050         }
3051 
3052         self.add_vfio_device(device_cfg)
3053     }
3054 
3055     fn create_vfio_container(&self) -> DeviceManagerResult<Arc<VfioContainer>> {
3056         let passthrough_device = self
3057             .passthrough_device
3058             .as_ref()
3059             .ok_or(DeviceManagerError::NoDevicePassthroughSupport)?;
3060 
3061         let dup = passthrough_device
3062             .try_clone()
3063             .map_err(DeviceManagerError::VfioCreate)?;
3064 
3065         Ok(Arc::new(
3066             VfioContainer::new(Some(Arc::new(dup))).map_err(DeviceManagerError::VfioCreate)?,
3067         ))
3068     }
3069 
3070     fn add_vfio_device(
3071         &mut self,
3072         device_cfg: &mut DeviceConfig,
3073     ) -> DeviceManagerResult<(PciBdf, String)> {
3074         let vfio_name = if let Some(id) = &device_cfg.id {
3075             id.clone()
3076         } else {
3077             let id = self.next_device_name(VFIO_DEVICE_NAME_PREFIX)?;
3078             device_cfg.id = Some(id.clone());
3079             id
3080         };
3081 
3082         let (pci_segment_id, pci_device_bdf, resources) =
3083             self.pci_resources(&vfio_name, device_cfg.pci_segment)?;
3084 
3085         let mut needs_dma_mapping = false;
3086 
3087         // Here we create a new VFIO container for two reasons. Either this is
3088         // the first VFIO device, meaning we need a new VFIO container, which
3089         // will be shared with other VFIO devices. Or the new VFIO device is
3090         // attached to a vIOMMU, meaning we must create a dedicated VFIO
3091         // container. In the vIOMMU use case, we can't let all devices under
3092         // the same VFIO container since we couldn't map/unmap memory for each
3093         // device. That's simply because the map/unmap operations happen at the
3094         // VFIO container level.
3095         let vfio_container = if device_cfg.iommu {
3096             let vfio_container = self.create_vfio_container()?;
3097 
3098             let vfio_mapping = Arc::new(VfioDmaMapping::new(
3099                 Arc::clone(&vfio_container),
3100                 Arc::new(self.memory_manager.lock().unwrap().guest_memory()),
3101             ));
3102 
3103             if let Some(iommu) = &self.iommu_device {
3104                 iommu
3105                     .lock()
3106                     .unwrap()
3107                     .add_external_mapping(pci_device_bdf.into(), vfio_mapping);
3108             } else {
3109                 return Err(DeviceManagerError::MissingVirtualIommu);
3110             }
3111 
3112             vfio_container
3113         } else if let Some(vfio_container) = &self.vfio_container {
3114             Arc::clone(vfio_container)
3115         } else {
3116             let vfio_container = self.create_vfio_container()?;
3117             needs_dma_mapping = true;
3118             self.vfio_container = Some(Arc::clone(&vfio_container));
3119 
3120             vfio_container
3121         };
3122 
3123         let vfio_device = VfioDevice::new(&device_cfg.path, Arc::clone(&vfio_container))
3124             .map_err(DeviceManagerError::VfioCreate)?;
3125 
3126         if needs_dma_mapping {
3127             // Register DMA mapping in IOMMU.
3128             // Do not register virtio-mem regions, as they are handled directly by
3129             // virtio-mem device itself.
3130             for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() {
3131                 for region in zone.regions() {
3132                     vfio_container
3133                         .vfio_dma_map(
3134                             region.start_addr().raw_value(),
3135                             region.len(),
3136                             region.as_ptr() as u64,
3137                         )
3138                         .map_err(DeviceManagerError::VfioDmaMap)?;
3139                 }
3140             }
3141 
3142             let vfio_mapping = Arc::new(VfioDmaMapping::new(
3143                 Arc::clone(&vfio_container),
3144                 Arc::new(self.memory_manager.lock().unwrap().guest_memory()),
3145             ));
3146 
3147             for virtio_mem_device in self.virtio_mem_devices.iter() {
3148                 virtio_mem_device
3149                     .lock()
3150                     .unwrap()
3151                     .add_dma_mapping_handler(
3152                         VirtioMemMappingSource::Container,
3153                         vfio_mapping.clone(),
3154                     )
3155                     .map_err(DeviceManagerError::AddDmaMappingHandlerVirtioMem)?;
3156             }
3157         }
3158 
3159         let legacy_interrupt_group =
3160             if let Some(legacy_interrupt_manager) = &self.legacy_interrupt_manager {
3161                 Some(
3162                     legacy_interrupt_manager
3163                         .create_group(LegacyIrqGroupConfig {
3164                             irq: self.pci_segments[pci_segment_id as usize].pci_irq_slots
3165                                 [pci_device_bdf.device() as usize]
3166                                 as InterruptIndex,
3167                         })
3168                         .map_err(DeviceManagerError::CreateInterruptGroup)?,
3169                 )
3170             } else {
3171                 None
3172             };
3173 
3174         let memory_manager = self.memory_manager.clone();
3175 
3176         let vfio_pci_device = VfioPciDevice::new(
3177             vfio_name.clone(),
3178             &self.address_manager.vm,
3179             vfio_device,
3180             vfio_container,
3181             self.msi_interrupt_manager.clone(),
3182             legacy_interrupt_group,
3183             device_cfg.iommu,
3184             pci_device_bdf,
3185             self.restoring,
3186             Arc::new(move || memory_manager.lock().unwrap().allocate_memory_slot()),
3187         )
3188         .map_err(DeviceManagerError::VfioPciCreate)?;
3189 
3190         let vfio_pci_device = Arc::new(Mutex::new(vfio_pci_device));
3191 
3192         let new_resources = self.add_pci_device(
3193             vfio_pci_device.clone(),
3194             vfio_pci_device.clone(),
3195             pci_segment_id,
3196             pci_device_bdf,
3197             resources,
3198         )?;
3199 
3200         // When restoring a VM, the restore codepath will take care of mapping
3201         // the MMIO regions based on the information from the snapshot.
3202         if !self.restoring {
3203             vfio_pci_device
3204                 .lock()
3205                 .unwrap()
3206                 .map_mmio_regions()
3207                 .map_err(DeviceManagerError::VfioMapRegion)?;
3208         }
3209 
3210         let mut node = device_node!(vfio_name, vfio_pci_device);
3211 
3212         // Update the device tree with correct resource information.
3213         node.resources = new_resources;
3214         node.pci_bdf = Some(pci_device_bdf);
3215         node.pci_device_handle = Some(PciDeviceHandle::Vfio(vfio_pci_device));
3216 
3217         self.device_tree
3218             .lock()
3219             .unwrap()
3220             .insert(vfio_name.clone(), node);
3221 
3222         Ok((pci_device_bdf, vfio_name))
3223     }
3224 
3225     fn add_pci_device(
3226         &mut self,
3227         bus_device: Arc<Mutex<dyn BusDevice>>,
3228         pci_device: Arc<Mutex<dyn PciDevice>>,
3229         segment_id: u16,
3230         bdf: PciBdf,
3231         resources: Option<Vec<Resource>>,
3232     ) -> DeviceManagerResult<Vec<Resource>> {
3233         let bars = pci_device
3234             .lock()
3235             .unwrap()
3236             .allocate_bars(
3237                 &self.address_manager.allocator,
3238                 &mut self.pci_segments[segment_id as usize]
3239                     .allocator
3240                     .lock()
3241                     .unwrap(),
3242                 resources,
3243             )
3244             .map_err(DeviceManagerError::AllocateBars)?;
3245 
3246         let mut pci_bus = self.pci_segments[segment_id as usize]
3247             .pci_bus
3248             .lock()
3249             .unwrap();
3250 
3251         pci_bus
3252             .add_device(bdf.device() as u32, pci_device)
3253             .map_err(DeviceManagerError::AddPciDevice)?;
3254 
3255         self.bus_devices.push(Arc::clone(&bus_device));
3256 
3257         pci_bus
3258             .register_mapping(
3259                 bus_device,
3260                 #[cfg(target_arch = "x86_64")]
3261                 self.address_manager.io_bus.as_ref(),
3262                 self.address_manager.mmio_bus.as_ref(),
3263                 bars.clone(),
3264             )
3265             .map_err(DeviceManagerError::AddPciDevice)?;
3266 
3267         let mut new_resources = Vec::new();
3268         for bar in bars {
3269             new_resources.push(Resource::PciBar {
3270                 index: bar.idx(),
3271                 base: bar.addr(),
3272                 size: bar.size(),
3273                 type_: bar.region_type().into(),
3274                 prefetchable: bar.prefetchable().into(),
3275             });
3276         }
3277 
3278         Ok(new_resources)
3279     }
3280 
3281     fn add_vfio_devices(&mut self) -> DeviceManagerResult<Vec<PciBdf>> {
3282         let mut iommu_attached_device_ids = Vec::new();
3283         let mut devices = self.config.lock().unwrap().devices.clone();
3284 
3285         if let Some(device_list_cfg) = &mut devices {
3286             for device_cfg in device_list_cfg.iter_mut() {
3287                 let (device_id, _) = self.add_passthrough_device(device_cfg)?;
3288                 if device_cfg.iommu && self.iommu_device.is_some() {
3289                     iommu_attached_device_ids.push(device_id);
3290                 }
3291             }
3292         }
3293 
3294         // Update the list of devices
3295         self.config.lock().unwrap().devices = devices;
3296 
3297         Ok(iommu_attached_device_ids)
3298     }
3299 
3300     fn add_vfio_user_device(
3301         &mut self,
3302         device_cfg: &mut UserDeviceConfig,
3303     ) -> DeviceManagerResult<(PciBdf, String)> {
3304         let vfio_user_name = if let Some(id) = &device_cfg.id {
3305             id.clone()
3306         } else {
3307             let id = self.next_device_name(VFIO_USER_DEVICE_NAME_PREFIX)?;
3308             device_cfg.id = Some(id.clone());
3309             id
3310         };
3311 
3312         let (pci_segment_id, pci_device_bdf, resources) =
3313             self.pci_resources(&vfio_user_name, device_cfg.pci_segment)?;
3314 
3315         let legacy_interrupt_group =
3316             if let Some(legacy_interrupt_manager) = &self.legacy_interrupt_manager {
3317                 Some(
3318                     legacy_interrupt_manager
3319                         .create_group(LegacyIrqGroupConfig {
3320                             irq: self.pci_segments[pci_segment_id as usize].pci_irq_slots
3321                                 [pci_device_bdf.device() as usize]
3322                                 as InterruptIndex,
3323                         })
3324                         .map_err(DeviceManagerError::CreateInterruptGroup)?,
3325                 )
3326             } else {
3327                 None
3328             };
3329 
3330         let client = Arc::new(Mutex::new(
3331             vfio_user::Client::new(&device_cfg.socket)
3332                 .map_err(DeviceManagerError::VfioUserCreateClient)?,
3333         ));
3334 
3335         let memory_manager = self.memory_manager.clone();
3336 
3337         let mut vfio_user_pci_device = VfioUserPciDevice::new(
3338             vfio_user_name.clone(),
3339             &self.address_manager.vm,
3340             client.clone(),
3341             self.msi_interrupt_manager.clone(),
3342             legacy_interrupt_group,
3343             pci_device_bdf,
3344             self.restoring,
3345             Arc::new(move || memory_manager.lock().unwrap().allocate_memory_slot()),
3346         )
3347         .map_err(DeviceManagerError::VfioUserCreate)?;
3348 
3349         let memory = self.memory_manager.lock().unwrap().guest_memory();
3350         let vfio_user_mapping = Arc::new(VfioUserDmaMapping::new(client, Arc::new(memory)));
3351         for virtio_mem_device in self.virtio_mem_devices.iter() {
3352             virtio_mem_device
3353                 .lock()
3354                 .unwrap()
3355                 .add_dma_mapping_handler(
3356                     VirtioMemMappingSource::Device(pci_device_bdf.into()),
3357                     vfio_user_mapping.clone(),
3358                 )
3359                 .map_err(DeviceManagerError::AddDmaMappingHandlerVirtioMem)?;
3360         }
3361 
3362         for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() {
3363             for region in zone.regions() {
3364                 vfio_user_pci_device
3365                     .dma_map(region)
3366                     .map_err(DeviceManagerError::VfioUserDmaMap)?;
3367             }
3368         }
3369 
3370         let vfio_user_pci_device = Arc::new(Mutex::new(vfio_user_pci_device));
3371 
3372         let new_resources = self.add_pci_device(
3373             vfio_user_pci_device.clone(),
3374             vfio_user_pci_device.clone(),
3375             pci_segment_id,
3376             pci_device_bdf,
3377             resources,
3378         )?;
3379 
3380         // When restoring a VM, the restore codepath will take care of mapping
3381         // the MMIO regions based on the information from the snapshot.
3382         if !self.restoring {
3383             // Note it is required to call 'add_pci_device()' in advance to have the list of
3384             // mmio regions provisioned correctly
3385             vfio_user_pci_device
3386                 .lock()
3387                 .unwrap()
3388                 .map_mmio_regions()
3389                 .map_err(DeviceManagerError::VfioUserMapRegion)?;
3390         }
3391 
3392         let mut node = device_node!(vfio_user_name, vfio_user_pci_device);
3393 
3394         // Update the device tree with correct resource information.
3395         node.resources = new_resources;
3396         node.pci_bdf = Some(pci_device_bdf);
3397         node.pci_device_handle = Some(PciDeviceHandle::VfioUser(vfio_user_pci_device));
3398 
3399         self.device_tree
3400             .lock()
3401             .unwrap()
3402             .insert(vfio_user_name.clone(), node);
3403 
3404         Ok((pci_device_bdf, vfio_user_name))
3405     }
3406 
3407     fn add_user_devices(&mut self) -> DeviceManagerResult<Vec<PciBdf>> {
3408         let mut user_devices = self.config.lock().unwrap().user_devices.clone();
3409 
3410         if let Some(device_list_cfg) = &mut user_devices {
3411             for device_cfg in device_list_cfg.iter_mut() {
3412                 let (_device_id, _id) = self.add_vfio_user_device(device_cfg)?;
3413             }
3414         }
3415 
3416         // Update the list of devices
3417         self.config.lock().unwrap().user_devices = user_devices;
3418 
3419         Ok(vec![])
3420     }
3421 
3422     fn add_virtio_pci_device(
3423         &mut self,
3424         virtio_device: Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
3425         iommu_mapping: &Option<Arc<IommuMapping>>,
3426         virtio_device_id: String,
3427         pci_segment_id: u16,
3428         dma_handler: Option<Arc<dyn ExternalDmaMapping>>,
3429     ) -> DeviceManagerResult<PciBdf> {
3430         let id = format!("{}-{}", VIRTIO_PCI_DEVICE_NAME_PREFIX, virtio_device_id);
3431 
3432         // Add the new virtio-pci node to the device tree.
3433         let mut node = device_node!(id);
3434         node.children = vec![virtio_device_id.clone()];
3435 
3436         let (pci_segment_id, pci_device_bdf, resources) =
3437             self.pci_resources(&id, pci_segment_id)?;
3438 
3439         // Update the existing virtio node by setting the parent.
3440         if let Some(node) = self.device_tree.lock().unwrap().get_mut(&virtio_device_id) {
3441             node.parent = Some(id.clone());
3442         } else {
3443             return Err(DeviceManagerError::MissingNode);
3444         }
3445 
3446         // Allows support for one MSI-X vector per queue. It also adds 1
3447         // as we need to take into account the dedicated vector to notify
3448         // about a virtio config change.
3449         let msix_num = (virtio_device.lock().unwrap().queue_max_sizes().len() + 1) as u16;
3450 
3451         // Create the AccessPlatform trait from the implementation IommuMapping.
3452         // This will provide address translation for any virtio device sitting
3453         // behind a vIOMMU.
3454         let access_platform: Option<Arc<dyn AccessPlatform>> = if let Some(mapping) = iommu_mapping
3455         {
3456             Some(Arc::new(AccessPlatformMapping::new(
3457                 pci_device_bdf.into(),
3458                 mapping.clone(),
3459             )))
3460         } else {
3461             None
3462         };
3463 
3464         let memory = self.memory_manager.lock().unwrap().guest_memory();
3465 
3466         // Map DMA ranges if a DMA handler is available and if the device is
3467         // not attached to a virtual IOMMU.
3468         if let Some(dma_handler) = &dma_handler {
3469             if iommu_mapping.is_some() {
3470                 if let Some(iommu) = &self.iommu_device {
3471                     iommu
3472                         .lock()
3473                         .unwrap()
3474                         .add_external_mapping(pci_device_bdf.into(), dma_handler.clone());
3475                 } else {
3476                     return Err(DeviceManagerError::MissingVirtualIommu);
3477                 }
3478             } else {
3479                 // Let every virtio-mem device handle the DMA map/unmap through the
3480                 // DMA handler provided.
3481                 for virtio_mem_device in self.virtio_mem_devices.iter() {
3482                     virtio_mem_device
3483                         .lock()
3484                         .unwrap()
3485                         .add_dma_mapping_handler(
3486                             VirtioMemMappingSource::Device(pci_device_bdf.into()),
3487                             dma_handler.clone(),
3488                         )
3489                         .map_err(DeviceManagerError::AddDmaMappingHandlerVirtioMem)?;
3490                 }
3491 
3492                 // Do not register virtio-mem regions, as they are handled directly by
3493                 // virtio-mem devices.
3494                 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() {
3495                     for region in zone.regions() {
3496                         let gpa = region.start_addr().0;
3497                         let size = region.len();
3498                         dma_handler
3499                             .map(gpa, gpa, size)
3500                             .map_err(DeviceManagerError::VirtioDmaMap)?;
3501                     }
3502                 }
3503             }
3504         }
3505 
3506         let device_type = virtio_device.lock().unwrap().device_type();
3507         let virtio_pci_device = Arc::new(Mutex::new(
3508             VirtioPciDevice::new(
3509                 id.clone(),
3510                 memory,
3511                 virtio_device,
3512                 msix_num,
3513                 access_platform,
3514                 &self.msi_interrupt_manager,
3515                 pci_device_bdf.into(),
3516                 self.activate_evt
3517                     .try_clone()
3518                     .map_err(DeviceManagerError::EventFd)?,
3519                 // All device types *except* virtio block devices should be allocated a 64-bit bar
3520                 // The block devices should be given a 32-bit BAR so that they are easily accessible
3521                 // to firmware without requiring excessive identity mapping.
3522                 // The exception being if not on the default PCI segment.
3523                 pci_segment_id > 0 || device_type != VirtioDeviceType::Block as u32,
3524                 dma_handler,
3525                 self.pending_activations.clone(),
3526                 vm_migration::snapshot_from_id(self.snapshot.as_ref(), id.as_str()),
3527             )
3528             .map_err(DeviceManagerError::VirtioDevice)?,
3529         ));
3530 
3531         let new_resources = self.add_pci_device(
3532             virtio_pci_device.clone(),
3533             virtio_pci_device.clone(),
3534             pci_segment_id,
3535             pci_device_bdf,
3536             resources,
3537         )?;
3538 
3539         let bar_addr = virtio_pci_device.lock().unwrap().config_bar_addr();
3540         for (event, addr) in virtio_pci_device.lock().unwrap().ioeventfds(bar_addr) {
3541             let io_addr = IoEventAddress::Mmio(addr);
3542             self.address_manager
3543                 .vm
3544                 .register_ioevent(event, &io_addr, None)
3545                 .map_err(|e| DeviceManagerError::RegisterIoevent(e.into()))?;
3546         }
3547 
3548         // Update the device tree with correct resource information.
3549         node.resources = new_resources;
3550         node.migratable = Some(Arc::clone(&virtio_pci_device) as Arc<Mutex<dyn Migratable>>);
3551         node.pci_bdf = Some(pci_device_bdf);
3552         node.pci_device_handle = Some(PciDeviceHandle::Virtio(virtio_pci_device));
3553         self.device_tree.lock().unwrap().insert(id, node);
3554 
3555         Ok(pci_device_bdf)
3556     }
3557 
3558     fn pci_resources(
3559         &self,
3560         id: &str,
3561         pci_segment_id: u16,
3562     ) -> DeviceManagerResult<(u16, PciBdf, Option<Vec<Resource>>)> {
3563         // Look for the id in the device tree. If it can be found, that means
3564         // the device is being restored, otherwise it's created from scratch.
3565         Ok(
3566             if let Some(node) = self.device_tree.lock().unwrap().get(id) {
3567                 info!("Restoring virtio-pci {} resources", id);
3568                 let pci_device_bdf: PciBdf = node
3569                     .pci_bdf
3570                     .ok_or(DeviceManagerError::MissingDeviceNodePciBdf)?;
3571                 let pci_segment_id = pci_device_bdf.segment();
3572 
3573                 self.pci_segments[pci_segment_id as usize]
3574                     .pci_bus
3575                     .lock()
3576                     .unwrap()
3577                     .get_device_id(pci_device_bdf.device() as usize)
3578                     .map_err(DeviceManagerError::GetPciDeviceId)?;
3579 
3580                 (pci_segment_id, pci_device_bdf, Some(node.resources.clone()))
3581             } else {
3582                 let pci_device_bdf =
3583                     self.pci_segments[pci_segment_id as usize].next_device_bdf()?;
3584 
3585                 (pci_segment_id, pci_device_bdf, None)
3586             },
3587         )
3588     }
3589 
3590     #[cfg(target_arch = "x86_64")]
3591     pub fn io_bus(&self) -> &Arc<Bus> {
3592         &self.address_manager.io_bus
3593     }
3594 
3595     pub fn mmio_bus(&self) -> &Arc<Bus> {
3596         &self.address_manager.mmio_bus
3597     }
3598 
3599     pub fn allocator(&self) -> &Arc<Mutex<SystemAllocator>> {
3600         &self.address_manager.allocator
3601     }
3602 
3603     pub fn interrupt_controller(&self) -> Option<Arc<Mutex<dyn InterruptController>>> {
3604         self.interrupt_controller
3605             .as_ref()
3606             .map(|ic| ic.clone() as Arc<Mutex<dyn InterruptController>>)
3607     }
3608 
3609     pub(crate) fn pci_segments(&self) -> &Vec<PciSegment> {
3610         &self.pci_segments
3611     }
3612 
3613     pub fn console(&self) -> &Arc<Console> {
3614         &self.console
3615     }
3616 
3617     #[cfg(target_arch = "aarch64")]
3618     pub fn cmdline_additions(&self) -> &[String] {
3619         self.cmdline_additions.as_slice()
3620     }
3621 
3622     pub fn update_memory(&self, new_region: &Arc<GuestRegionMmap>) -> DeviceManagerResult<()> {
3623         for handle in self.virtio_devices.iter() {
3624             handle
3625                 .virtio_device
3626                 .lock()
3627                 .unwrap()
3628                 .add_memory_region(new_region)
3629                 .map_err(DeviceManagerError::UpdateMemoryForVirtioDevice)?;
3630 
3631             if let Some(dma_handler) = &handle.dma_handler {
3632                 if !handle.iommu {
3633                     let gpa = new_region.start_addr().0;
3634                     let size = new_region.len();
3635                     dma_handler
3636                         .map(gpa, gpa, size)
3637                         .map_err(DeviceManagerError::VirtioDmaMap)?;
3638                 }
3639             }
3640         }
3641 
3642         // Take care of updating the memory for VFIO PCI devices.
3643         if let Some(vfio_container) = &self.vfio_container {
3644             vfio_container
3645                 .vfio_dma_map(
3646                     new_region.start_addr().raw_value(),
3647                     new_region.len(),
3648                     new_region.as_ptr() as u64,
3649                 )
3650                 .map_err(DeviceManagerError::UpdateMemoryForVfioPciDevice)?;
3651         }
3652 
3653         // Take care of updating the memory for vfio-user devices.
3654         {
3655             let device_tree = self.device_tree.lock().unwrap();
3656             for pci_device_node in device_tree.pci_devices() {
3657                 if let PciDeviceHandle::VfioUser(vfio_user_pci_device) = pci_device_node
3658                     .pci_device_handle
3659                     .as_ref()
3660                     .ok_or(DeviceManagerError::MissingPciDevice)?
3661                 {
3662                     vfio_user_pci_device
3663                         .lock()
3664                         .unwrap()
3665                         .dma_map(new_region)
3666                         .map_err(DeviceManagerError::UpdateMemoryForVfioUserPciDevice)?;
3667                 }
3668             }
3669         }
3670 
3671         Ok(())
3672     }
3673 
3674     pub fn activate_virtio_devices(&self) -> DeviceManagerResult<()> {
3675         for mut activator in self.pending_activations.lock().unwrap().drain(..) {
3676             activator
3677                 .activate()
3678                 .map_err(DeviceManagerError::VirtioActivate)?;
3679         }
3680         Ok(())
3681     }
3682 
3683     pub fn notify_hotplug(
3684         &self,
3685         _notification_type: AcpiNotificationFlags,
3686     ) -> DeviceManagerResult<()> {
3687         return self
3688             .ged_notification_device
3689             .as_ref()
3690             .unwrap()
3691             .lock()
3692             .unwrap()
3693             .notify(_notification_type)
3694             .map_err(DeviceManagerError::HotPlugNotification);
3695     }
3696 
3697     pub fn add_device(
3698         &mut self,
3699         device_cfg: &mut DeviceConfig,
3700     ) -> DeviceManagerResult<PciDeviceInfo> {
3701         self.validate_identifier(&device_cfg.id)?;
3702 
3703         if device_cfg.iommu && !self.is_iommu_segment(device_cfg.pci_segment) {
3704             return Err(DeviceManagerError::InvalidIommuHotplug);
3705         }
3706 
3707         let (bdf, device_name) = self.add_passthrough_device(device_cfg)?;
3708 
3709         // Update the PCIU bitmap
3710         self.pci_segments[device_cfg.pci_segment as usize].pci_devices_up |= 1 << bdf.device();
3711 
3712         Ok(PciDeviceInfo {
3713             id: device_name,
3714             bdf,
3715         })
3716     }
3717 
3718     pub fn add_user_device(
3719         &mut self,
3720         device_cfg: &mut UserDeviceConfig,
3721     ) -> DeviceManagerResult<PciDeviceInfo> {
3722         self.validate_identifier(&device_cfg.id)?;
3723 
3724         let (bdf, device_name) = self.add_vfio_user_device(device_cfg)?;
3725 
3726         // Update the PCIU bitmap
3727         self.pci_segments[device_cfg.pci_segment as usize].pci_devices_up |= 1 << bdf.device();
3728 
3729         Ok(PciDeviceInfo {
3730             id: device_name,
3731             bdf,
3732         })
3733     }
3734 
3735     pub fn remove_device(&mut self, id: String) -> DeviceManagerResult<()> {
3736         // The node can be directly a PCI node in case the 'id' refers to a
3737         // VFIO device or a virtio-pci one.
3738         // In case the 'id' refers to a virtio device, we must find the PCI
3739         // node by looking at the parent.
3740         let device_tree = self.device_tree.lock().unwrap();
3741         let node = device_tree
3742             .get(&id)
3743             .ok_or(DeviceManagerError::UnknownDeviceId(id))?;
3744 
3745         let pci_device_node = if node.pci_bdf.is_some() && node.pci_device_handle.is_some() {
3746             node
3747         } else {
3748             let parent = node
3749                 .parent
3750                 .as_ref()
3751                 .ok_or(DeviceManagerError::MissingNode)?;
3752             device_tree
3753                 .get(parent)
3754                 .ok_or(DeviceManagerError::MissingNode)?
3755         };
3756 
3757         let pci_device_bdf: PciBdf = pci_device_node
3758             .pci_bdf
3759             .ok_or(DeviceManagerError::MissingDeviceNodePciBdf)?;
3760         let pci_segment_id = pci_device_bdf.segment();
3761 
3762         let pci_device_handle = pci_device_node
3763             .pci_device_handle
3764             .as_ref()
3765             .ok_or(DeviceManagerError::MissingPciDevice)?;
3766         #[allow(irrefutable_let_patterns)]
3767         if let PciDeviceHandle::Virtio(virtio_pci_device) = pci_device_handle {
3768             let device_type = VirtioDeviceType::from(
3769                 virtio_pci_device
3770                     .lock()
3771                     .unwrap()
3772                     .virtio_device()
3773                     .lock()
3774                     .unwrap()
3775                     .device_type(),
3776             );
3777             match device_type {
3778                 VirtioDeviceType::Net
3779                 | VirtioDeviceType::Block
3780                 | VirtioDeviceType::Pmem
3781                 | VirtioDeviceType::Fs
3782                 | VirtioDeviceType::Vsock => {}
3783                 _ => return Err(DeviceManagerError::RemovalNotAllowed(device_type)),
3784             }
3785         }
3786 
3787         // Update the PCID bitmap
3788         self.pci_segments[pci_segment_id as usize].pci_devices_down |= 1 << pci_device_bdf.device();
3789 
3790         Ok(())
3791     }
3792 
3793     pub fn eject_device(&mut self, pci_segment_id: u16, device_id: u8) -> DeviceManagerResult<()> {
3794         info!(
3795             "Ejecting device_id = {} on segment_id={}",
3796             device_id, pci_segment_id
3797         );
3798 
3799         // Convert the device ID into the corresponding b/d/f.
3800         let pci_device_bdf = PciBdf::new(pci_segment_id, 0, device_id, 0);
3801 
3802         // Give the PCI device ID back to the PCI bus.
3803         self.pci_segments[pci_segment_id as usize]
3804             .pci_bus
3805             .lock()
3806             .unwrap()
3807             .put_device_id(device_id as usize)
3808             .map_err(DeviceManagerError::PutPciDeviceId)?;
3809 
3810         // Remove the device from the device tree along with its children.
3811         let mut device_tree = self.device_tree.lock().unwrap();
3812         let pci_device_node = device_tree
3813             .remove_node_by_pci_bdf(pci_device_bdf)
3814             .ok_or(DeviceManagerError::MissingPciDevice)?;
3815 
3816         // For VFIO and vfio-user the PCI device id is the id.
3817         // For virtio we overwrite it later as we want the id of the
3818         // underlying device.
3819         let mut id = pci_device_node.id;
3820         let pci_device_handle = pci_device_node
3821             .pci_device_handle
3822             .ok_or(DeviceManagerError::MissingPciDevice)?;
3823         if matches!(pci_device_handle, PciDeviceHandle::Virtio(_)) {
3824             // The virtio-pci device has a single child
3825             if !pci_device_node.children.is_empty() {
3826                 assert_eq!(pci_device_node.children.len(), 1);
3827                 let child_id = &pci_device_node.children[0];
3828                 id = child_id.clone();
3829             }
3830         }
3831         for child in pci_device_node.children.iter() {
3832             device_tree.remove(child);
3833         }
3834 
3835         let mut iommu_attached = false;
3836         if let Some((_, iommu_attached_devices)) = &self.iommu_attached_devices {
3837             if iommu_attached_devices.contains(&pci_device_bdf) {
3838                 iommu_attached = true;
3839             }
3840         }
3841 
3842         let (pci_device, bus_device, virtio_device, remove_dma_handler) = match pci_device_handle {
3843             // No need to remove any virtio-mem mapping here as the container outlives all devices
3844             PciDeviceHandle::Vfio(vfio_pci_device) => (
3845                 Arc::clone(&vfio_pci_device) as Arc<Mutex<dyn PciDevice>>,
3846                 Arc::clone(&vfio_pci_device) as Arc<Mutex<dyn BusDevice>>,
3847                 None as Option<Arc<Mutex<dyn virtio_devices::VirtioDevice>>>,
3848                 false,
3849             ),
3850             PciDeviceHandle::Virtio(virtio_pci_device) => {
3851                 let dev = virtio_pci_device.lock().unwrap();
3852                 let bar_addr = dev.config_bar_addr();
3853                 for (event, addr) in dev.ioeventfds(bar_addr) {
3854                     let io_addr = IoEventAddress::Mmio(addr);
3855                     self.address_manager
3856                         .vm
3857                         .unregister_ioevent(event, &io_addr)
3858                         .map_err(|e| DeviceManagerError::UnRegisterIoevent(e.into()))?;
3859                 }
3860 
3861                 if let Some(dma_handler) = dev.dma_handler() {
3862                     if !iommu_attached {
3863                         for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() {
3864                             for region in zone.regions() {
3865                                 let iova = region.start_addr().0;
3866                                 let size = region.len();
3867                                 dma_handler
3868                                     .unmap(iova, size)
3869                                     .map_err(DeviceManagerError::VirtioDmaUnmap)?;
3870                             }
3871                         }
3872                     }
3873                 }
3874 
3875                 (
3876                     Arc::clone(&virtio_pci_device) as Arc<Mutex<dyn PciDevice>>,
3877                     Arc::clone(&virtio_pci_device) as Arc<Mutex<dyn BusDevice>>,
3878                     Some(dev.virtio_device()),
3879                     dev.dma_handler().is_some() && !iommu_attached,
3880                 )
3881             }
3882             PciDeviceHandle::VfioUser(vfio_user_pci_device) => {
3883                 let mut dev = vfio_user_pci_device.lock().unwrap();
3884                 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() {
3885                     for region in zone.regions() {
3886                         dev.dma_unmap(region)
3887                             .map_err(DeviceManagerError::VfioUserDmaUnmap)?;
3888                     }
3889                 }
3890 
3891                 (
3892                     Arc::clone(&vfio_user_pci_device) as Arc<Mutex<dyn PciDevice>>,
3893                     Arc::clone(&vfio_user_pci_device) as Arc<Mutex<dyn BusDevice>>,
3894                     None as Option<Arc<Mutex<dyn virtio_devices::VirtioDevice>>>,
3895                     true,
3896                 )
3897             }
3898         };
3899 
3900         if remove_dma_handler {
3901             for virtio_mem_device in self.virtio_mem_devices.iter() {
3902                 virtio_mem_device
3903                     .lock()
3904                     .unwrap()
3905                     .remove_dma_mapping_handler(VirtioMemMappingSource::Device(
3906                         pci_device_bdf.into(),
3907                     ))
3908                     .map_err(DeviceManagerError::RemoveDmaMappingHandlerVirtioMem)?;
3909             }
3910         }
3911 
3912         // Free the allocated BARs
3913         pci_device
3914             .lock()
3915             .unwrap()
3916             .free_bars(
3917                 &mut self.address_manager.allocator.lock().unwrap(),
3918                 &mut self.pci_segments[pci_segment_id as usize]
3919                     .allocator
3920                     .lock()
3921                     .unwrap(),
3922             )
3923             .map_err(DeviceManagerError::FreePciBars)?;
3924 
3925         // Remove the device from the PCI bus
3926         self.pci_segments[pci_segment_id as usize]
3927             .pci_bus
3928             .lock()
3929             .unwrap()
3930             .remove_by_device(&pci_device)
3931             .map_err(DeviceManagerError::RemoveDeviceFromPciBus)?;
3932 
3933         #[cfg(target_arch = "x86_64")]
3934         // Remove the device from the IO bus
3935         self.io_bus()
3936             .remove_by_device(&bus_device)
3937             .map_err(DeviceManagerError::RemoveDeviceFromIoBus)?;
3938 
3939         // Remove the device from the MMIO bus
3940         self.mmio_bus()
3941             .remove_by_device(&bus_device)
3942             .map_err(DeviceManagerError::RemoveDeviceFromMmioBus)?;
3943 
3944         // Remove the device from the list of BusDevice held by the
3945         // DeviceManager.
3946         self.bus_devices
3947             .retain(|dev| !Arc::ptr_eq(dev, &bus_device));
3948 
3949         // Shutdown and remove the underlying virtio-device if present
3950         if let Some(virtio_device) = virtio_device {
3951             for mapping in virtio_device.lock().unwrap().userspace_mappings() {
3952                 self.memory_manager
3953                     .lock()
3954                     .unwrap()
3955                     .remove_userspace_mapping(
3956                         mapping.addr.raw_value(),
3957                         mapping.len,
3958                         mapping.host_addr,
3959                         mapping.mergeable,
3960                         mapping.mem_slot,
3961                     )
3962                     .map_err(DeviceManagerError::MemoryManager)?;
3963             }
3964 
3965             virtio_device.lock().unwrap().shutdown();
3966 
3967             self.virtio_devices
3968                 .retain(|handler| !Arc::ptr_eq(&handler.virtio_device, &virtio_device));
3969         }
3970 
3971         event!(
3972             "vm",
3973             "device-removed",
3974             "id",
3975             &id,
3976             "bdf",
3977             pci_device_bdf.to_string()
3978         );
3979 
3980         // At this point, the device has been removed from all the list and
3981         // buses where it was stored. At the end of this function, after
3982         // any_device, bus_device and pci_device are released, the actual
3983         // device will be dropped.
3984         Ok(())
3985     }
3986 
3987     fn hotplug_virtio_pci_device(
3988         &mut self,
3989         handle: MetaVirtioDevice,
3990     ) -> DeviceManagerResult<PciDeviceInfo> {
3991         // Add the virtio device to the device manager list. This is important
3992         // as the list is used to notify virtio devices about memory updates
3993         // for instance.
3994         self.virtio_devices.push(handle.clone());
3995 
3996         let mapping: Option<Arc<IommuMapping>> = if handle.iommu {
3997             self.iommu_mapping.clone()
3998         } else {
3999             None
4000         };
4001 
4002         let bdf = self.add_virtio_pci_device(
4003             handle.virtio_device,
4004             &mapping,
4005             handle.id.clone(),
4006             handle.pci_segment,
4007             handle.dma_handler,
4008         )?;
4009 
4010         // Update the PCIU bitmap
4011         self.pci_segments[handle.pci_segment as usize].pci_devices_up |= 1 << bdf.device();
4012 
4013         Ok(PciDeviceInfo { id: handle.id, bdf })
4014     }
4015 
4016     fn is_iommu_segment(&self, pci_segment_id: u16) -> bool {
4017         self.config
4018             .lock()
4019             .as_ref()
4020             .unwrap()
4021             .platform
4022             .as_ref()
4023             .map(|pc| {
4024                 pc.iommu_segments
4025                     .as_ref()
4026                     .map(|v| v.contains(&pci_segment_id))
4027                     .unwrap_or_default()
4028             })
4029             .unwrap_or_default()
4030     }
4031 
4032     pub fn add_disk(&mut self, disk_cfg: &mut DiskConfig) -> DeviceManagerResult<PciDeviceInfo> {
4033         self.validate_identifier(&disk_cfg.id)?;
4034 
4035         if disk_cfg.iommu && !self.is_iommu_segment(disk_cfg.pci_segment) {
4036             return Err(DeviceManagerError::InvalidIommuHotplug);
4037         }
4038 
4039         let device = self.make_virtio_block_device(disk_cfg)?;
4040         self.hotplug_virtio_pci_device(device)
4041     }
4042 
4043     pub fn add_fs(&mut self, fs_cfg: &mut FsConfig) -> DeviceManagerResult<PciDeviceInfo> {
4044         self.validate_identifier(&fs_cfg.id)?;
4045 
4046         let device = self.make_virtio_fs_device(fs_cfg)?;
4047         self.hotplug_virtio_pci_device(device)
4048     }
4049 
4050     pub fn add_pmem(&mut self, pmem_cfg: &mut PmemConfig) -> DeviceManagerResult<PciDeviceInfo> {
4051         self.validate_identifier(&pmem_cfg.id)?;
4052 
4053         if pmem_cfg.iommu && !self.is_iommu_segment(pmem_cfg.pci_segment) {
4054             return Err(DeviceManagerError::InvalidIommuHotplug);
4055         }
4056 
4057         let device = self.make_virtio_pmem_device(pmem_cfg)?;
4058         self.hotplug_virtio_pci_device(device)
4059     }
4060 
4061     pub fn add_net(&mut self, net_cfg: &mut NetConfig) -> DeviceManagerResult<PciDeviceInfo> {
4062         self.validate_identifier(&net_cfg.id)?;
4063 
4064         if net_cfg.iommu && !self.is_iommu_segment(net_cfg.pci_segment) {
4065             return Err(DeviceManagerError::InvalidIommuHotplug);
4066         }
4067 
4068         let device = self.make_virtio_net_device(net_cfg)?;
4069         self.hotplug_virtio_pci_device(device)
4070     }
4071 
4072     pub fn add_vdpa(&mut self, vdpa_cfg: &mut VdpaConfig) -> DeviceManagerResult<PciDeviceInfo> {
4073         self.validate_identifier(&vdpa_cfg.id)?;
4074 
4075         if vdpa_cfg.iommu && !self.is_iommu_segment(vdpa_cfg.pci_segment) {
4076             return Err(DeviceManagerError::InvalidIommuHotplug);
4077         }
4078 
4079         let device = self.make_vdpa_device(vdpa_cfg)?;
4080         self.hotplug_virtio_pci_device(device)
4081     }
4082 
4083     pub fn add_vsock(&mut self, vsock_cfg: &mut VsockConfig) -> DeviceManagerResult<PciDeviceInfo> {
4084         self.validate_identifier(&vsock_cfg.id)?;
4085 
4086         if vsock_cfg.iommu && !self.is_iommu_segment(vsock_cfg.pci_segment) {
4087             return Err(DeviceManagerError::InvalidIommuHotplug);
4088         }
4089 
4090         let device = self.make_virtio_vsock_device(vsock_cfg)?;
4091         self.hotplug_virtio_pci_device(device)
4092     }
4093 
4094     pub fn counters(&self) -> HashMap<String, HashMap<&'static str, Wrapping<u64>>> {
4095         let mut counters = HashMap::new();
4096 
4097         for handle in &self.virtio_devices {
4098             let virtio_device = handle.virtio_device.lock().unwrap();
4099             if let Some(device_counters) = virtio_device.counters() {
4100                 counters.insert(handle.id.clone(), device_counters.clone());
4101             }
4102         }
4103 
4104         counters
4105     }
4106 
4107     pub fn resize_balloon(&mut self, size: u64) -> DeviceManagerResult<()> {
4108         if let Some(balloon) = &self.balloon {
4109             return balloon
4110                 .lock()
4111                 .unwrap()
4112                 .resize(size)
4113                 .map_err(DeviceManagerError::VirtioBalloonResize);
4114         }
4115 
4116         warn!("No balloon setup: Can't resize the balloon");
4117         Err(DeviceManagerError::MissingVirtioBalloon)
4118     }
4119 
4120     pub fn balloon_size(&self) -> u64 {
4121         if let Some(balloon) = &self.balloon {
4122             return balloon.lock().unwrap().get_actual();
4123         }
4124 
4125         0
4126     }
4127 
4128     pub fn device_tree(&self) -> Arc<Mutex<DeviceTree>> {
4129         self.device_tree.clone()
4130     }
4131 
4132     pub fn restore_devices(
4133         &mut self,
4134         snapshot: Snapshot,
4135     ) -> std::result::Result<(), MigratableError> {
4136         // Finally, restore all devices associated with the DeviceManager.
4137         // It's important to restore devices in the right order, that's why
4138         // the device tree is the right way to ensure we restore a child before
4139         // its parent node.
4140         for node in self
4141             .device_tree
4142             .lock()
4143             .unwrap()
4144             .breadth_first_traversal()
4145             .rev()
4146         {
4147             // Restore the node
4148             if let Some(migratable) = &node.migratable {
4149                 info!("Restoring {} from DeviceManager", node.id);
4150                 if let Some(snapshot) = snapshot.snapshots.get(&node.id) {
4151                     migratable.lock().unwrap().pause()?;
4152                     migratable.lock().unwrap().restore(*snapshot.clone())?;
4153                 } else {
4154                     return Err(MigratableError::Restore(anyhow!(
4155                         "Missing device {}",
4156                         node.id
4157                     )));
4158                 }
4159             }
4160         }
4161 
4162         // The devices have been fully restored, we can now update the
4163         // restoring state of the DeviceManager.
4164         self.restoring = false;
4165 
4166         Ok(())
4167     }
4168 
4169     #[cfg(target_arch = "x86_64")]
4170     pub fn notify_power_button(&self) -> DeviceManagerResult<()> {
4171         self.ged_notification_device
4172             .as_ref()
4173             .unwrap()
4174             .lock()
4175             .unwrap()
4176             .notify(AcpiNotificationFlags::POWER_BUTTON_CHANGED)
4177             .map_err(DeviceManagerError::PowerButtonNotification)
4178     }
4179 
4180     #[cfg(target_arch = "aarch64")]
4181     pub fn notify_power_button(&self) -> DeviceManagerResult<()> {
4182         // There are two use cases:
4183         // 1. Users will use direct kernel boot with device tree.
4184         // 2. Users will use ACPI+UEFI boot.
4185 
4186         // Trigger a GPIO pin 3 event to satisify use case 1.
4187         self.gpio_device
4188             .as_ref()
4189             .unwrap()
4190             .lock()
4191             .unwrap()
4192             .trigger_key(3)
4193             .map_err(DeviceManagerError::AArch64PowerButtonNotification)?;
4194         // Trigger a GED power button event to satisify use case 2.
4195         return self
4196             .ged_notification_device
4197             .as_ref()
4198             .unwrap()
4199             .lock()
4200             .unwrap()
4201             .notify(AcpiNotificationFlags::POWER_BUTTON_CHANGED)
4202             .map_err(DeviceManagerError::PowerButtonNotification);
4203     }
4204 
4205     pub fn iommu_attached_devices(&self) -> &Option<(PciBdf, Vec<PciBdf>)> {
4206         &self.iommu_attached_devices
4207     }
4208 
4209     fn validate_identifier(&self, id: &Option<String>) -> DeviceManagerResult<()> {
4210         if let Some(id) = id {
4211             if id.starts_with("__") {
4212                 return Err(DeviceManagerError::InvalidIdentifier(id.clone()));
4213             }
4214 
4215             if self.device_tree.lock().unwrap().contains_key(id) {
4216                 return Err(DeviceManagerError::IdentifierNotUnique(id.clone()));
4217             }
4218         }
4219 
4220         Ok(())
4221     }
4222 
4223     pub(crate) fn acpi_platform_addresses(&self) -> &AcpiPlatformAddresses {
4224         &self.acpi_platform_addresses
4225     }
4226 }
4227 
4228 fn numa_node_id_from_memory_zone_id(numa_nodes: &NumaNodes, memory_zone_id: &str) -> Option<u32> {
4229     for (numa_node_id, numa_node) in numa_nodes.iter() {
4230         if numa_node.memory_zones.contains(&memory_zone_id.to_owned()) {
4231             return Some(*numa_node_id);
4232         }
4233     }
4234 
4235     None
4236 }
4237 
4238 struct TpmDevice {}
4239 
4240 impl Aml for TpmDevice {
4241     fn to_aml_bytes(&self) -> Vec<u8> {
4242         aml::Device::new(
4243             "TPM2".into(),
4244             vec![
4245                 &aml::Name::new("_HID".into(), &"MSFT0101"),
4246                 &aml::Name::new("_STA".into(), &(0xF_usize)),
4247                 &aml::Name::new(
4248                     "_CRS".into(),
4249                     &aml::ResourceTemplate::new(vec![&aml::Memory32Fixed::new(
4250                         true,
4251                         layout::TPM_START.0 as u32,
4252                         layout::TPM_SIZE as u32,
4253                     )]),
4254                 ),
4255             ],
4256         )
4257         .to_aml_bytes()
4258     }
4259 }
4260 
4261 impl Aml for DeviceManager {
4262     fn append_aml_bytes(&self, bytes: &mut Vec<u8>) {
4263         #[cfg(target_arch = "aarch64")]
4264         use arch::aarch64::DeviceInfoForFdt;
4265 
4266         let mut pci_scan_methods = Vec::new();
4267         for i in 0..self.pci_segments.len() {
4268             pci_scan_methods.push(aml::MethodCall::new(
4269                 format!("\\_SB_.PCI{:X}.PCNT", i).as_str().into(),
4270                 vec![],
4271             ));
4272         }
4273         let mut pci_scan_inner: Vec<&dyn Aml> = Vec::new();
4274         for method in &pci_scan_methods {
4275             pci_scan_inner.push(method)
4276         }
4277 
4278         // PCI hotplug controller
4279         aml::Device::new(
4280             "_SB_.PHPR".into(),
4281             vec![
4282                 &aml::Name::new("_HID".into(), &aml::EisaName::new("PNP0A06")),
4283                 &aml::Name::new("_STA".into(), &0x0bu8),
4284                 &aml::Name::new("_UID".into(), &"PCI Hotplug Controller"),
4285                 &aml::Mutex::new("BLCK".into(), 0),
4286                 &aml::Name::new(
4287                     "_CRS".into(),
4288                     &aml::ResourceTemplate::new(vec![&aml::AddressSpace::new_memory(
4289                         aml::AddressSpaceCachable::NotCacheable,
4290                         true,
4291                         self.acpi_address.0,
4292                         self.acpi_address.0 + DEVICE_MANAGER_ACPI_SIZE as u64 - 1,
4293                     )]),
4294                 ),
4295                 // OpRegion and Fields map MMIO range into individual field values
4296                 &aml::OpRegion::new(
4297                     "PCST".into(),
4298                     aml::OpRegionSpace::SystemMemory,
4299                     self.acpi_address.0 as usize,
4300                     DEVICE_MANAGER_ACPI_SIZE,
4301                 ),
4302                 &aml::Field::new(
4303                     "PCST".into(),
4304                     aml::FieldAccessType::DWord,
4305                     aml::FieldUpdateRule::WriteAsZeroes,
4306                     vec![
4307                         aml::FieldEntry::Named(*b"PCIU", 32),
4308                         aml::FieldEntry::Named(*b"PCID", 32),
4309                         aml::FieldEntry::Named(*b"B0EJ", 32),
4310                         aml::FieldEntry::Named(*b"PSEG", 32),
4311                     ],
4312                 ),
4313                 &aml::Method::new(
4314                     "PCEJ".into(),
4315                     2,
4316                     true,
4317                     vec![
4318                         // Take lock defined above
4319                         &aml::Acquire::new("BLCK".into(), 0xffff),
4320                         // Choose the current segment
4321                         &aml::Store::new(&aml::Path::new("PSEG"), &aml::Arg(1)),
4322                         // Write PCI bus number (in first argument) to I/O port via field
4323                         &aml::ShiftLeft::new(&aml::Path::new("B0EJ"), &aml::ONE, &aml::Arg(0)),
4324                         // Release lock
4325                         &aml::Release::new("BLCK".into()),
4326                         // Return 0
4327                         &aml::Return::new(&aml::ZERO),
4328                     ],
4329                 ),
4330                 &aml::Method::new("PSCN".into(), 0, true, pci_scan_inner),
4331             ],
4332         )
4333         .append_aml_bytes(bytes);
4334 
4335         for segment in &self.pci_segments {
4336             segment.append_aml_bytes(bytes);
4337         }
4338 
4339         let mut mbrd_memory = Vec::new();
4340 
4341         for segment in &self.pci_segments {
4342             mbrd_memory.push(aml::Memory32Fixed::new(
4343                 true,
4344                 segment.mmio_config_address as u32,
4345                 layout::PCI_MMIO_CONFIG_SIZE_PER_SEGMENT as u32,
4346             ))
4347         }
4348 
4349         let mut mbrd_memory_refs = Vec::new();
4350         for mbrd_memory_ref in &mbrd_memory {
4351             mbrd_memory_refs.push(mbrd_memory_ref as &dyn Aml);
4352         }
4353 
4354         aml::Device::new(
4355             "_SB_.MBRD".into(),
4356             vec![
4357                 &aml::Name::new("_HID".into(), &aml::EisaName::new("PNP0C02")),
4358                 &aml::Name::new("_UID".into(), &aml::ZERO),
4359                 &aml::Name::new("_CRS".into(), &aml::ResourceTemplate::new(mbrd_memory_refs)),
4360             ],
4361         )
4362         .append_aml_bytes(bytes);
4363 
4364         // Serial device
4365         #[cfg(target_arch = "x86_64")]
4366         let serial_irq = 4;
4367         #[cfg(target_arch = "aarch64")]
4368         let serial_irq =
4369             if self.config.lock().unwrap().serial.clone().mode != ConsoleOutputMode::Off {
4370                 self.get_device_info()
4371                     .clone()
4372                     .get(&(DeviceType::Serial, DeviceType::Serial.to_string()))
4373                     .unwrap()
4374                     .irq()
4375             } else {
4376                 // If serial is turned off, add a fake device with invalid irq.
4377                 31
4378             };
4379         if self.config.lock().unwrap().serial.mode != ConsoleOutputMode::Off {
4380             aml::Device::new(
4381                 "_SB_.COM1".into(),
4382                 vec![
4383                     &aml::Name::new(
4384                         "_HID".into(),
4385                         #[cfg(target_arch = "x86_64")]
4386                         &aml::EisaName::new("PNP0501"),
4387                         #[cfg(target_arch = "aarch64")]
4388                         &"ARMH0011",
4389                     ),
4390                     &aml::Name::new("_UID".into(), &aml::ZERO),
4391                     &aml::Name::new("_DDN".into(), &"COM1"),
4392                     &aml::Name::new(
4393                         "_CRS".into(),
4394                         &aml::ResourceTemplate::new(vec![
4395                             &aml::Interrupt::new(true, true, false, false, serial_irq),
4396                             #[cfg(target_arch = "x86_64")]
4397                             &aml::Io::new(0x3f8, 0x3f8, 0, 0x8),
4398                             #[cfg(target_arch = "aarch64")]
4399                             &aml::Memory32Fixed::new(
4400                                 true,
4401                                 arch::layout::LEGACY_SERIAL_MAPPED_IO_START.raw_value() as u32,
4402                                 MMIO_LEN as u32,
4403                             ),
4404                         ]),
4405                     ),
4406                 ],
4407             )
4408             .append_aml_bytes(bytes);
4409         }
4410 
4411         aml::Name::new("_S5_".into(), &aml::Package::new(vec![&5u8])).append_aml_bytes(bytes);
4412 
4413         aml::Device::new(
4414             "_SB_.PWRB".into(),
4415             vec![
4416                 &aml::Name::new("_HID".into(), &aml::EisaName::new("PNP0C0C")),
4417                 &aml::Name::new("_UID".into(), &aml::ZERO),
4418             ],
4419         )
4420         .append_aml_bytes(bytes);
4421 
4422         if self.config.lock().unwrap().tpm.is_some() {
4423             // Add tpm device
4424             let tpm_acpi = TpmDevice {};
4425             let tpm_dsdt_data = tpm_acpi.to_aml_bytes();
4426             bytes.extend_from_slice(tpm_dsdt_data.as_slice());
4427         }
4428 
4429         self.ged_notification_device
4430             .as_ref()
4431             .unwrap()
4432             .lock()
4433             .unwrap()
4434             .append_aml_bytes(bytes);
4435     }
4436 }
4437 
4438 impl Pausable for DeviceManager {
4439     fn pause(&mut self) -> result::Result<(), MigratableError> {
4440         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
4441             if let Some(migratable) = &device_node.migratable {
4442                 migratable.lock().unwrap().pause()?;
4443             }
4444         }
4445         // On AArch64, the pause of device manager needs to trigger
4446         // a "pause" of GIC, which will flush the GIC pending tables
4447         // and ITS tables to guest RAM.
4448         #[cfg(target_arch = "aarch64")]
4449         {
4450             self.get_interrupt_controller()
4451                 .unwrap()
4452                 .lock()
4453                 .unwrap()
4454                 .pause()?;
4455         };
4456 
4457         Ok(())
4458     }
4459 
4460     fn resume(&mut self) -> result::Result<(), MigratableError> {
4461         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
4462             if let Some(migratable) = &device_node.migratable {
4463                 migratable.lock().unwrap().resume()?;
4464             }
4465         }
4466 
4467         Ok(())
4468     }
4469 }
4470 
4471 impl Snapshottable for DeviceManager {
4472     fn id(&self) -> String {
4473         DEVICE_MANAGER_SNAPSHOT_ID.to_string()
4474     }
4475 
4476     fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> {
4477         let mut snapshot = Snapshot::new(DEVICE_MANAGER_SNAPSHOT_ID);
4478 
4479         // We aggregate all devices snapshots.
4480         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
4481             if let Some(migratable) = &device_node.migratable {
4482                 let device_snapshot = migratable.lock().unwrap().snapshot()?;
4483                 snapshot.add_snapshot(device_snapshot);
4484             }
4485         }
4486 
4487         // Then we store the DeviceManager state.
4488         snapshot.add_data_section(SnapshotDataSection::new_from_state(
4489             DEVICE_MANAGER_SNAPSHOT_ID,
4490             &self.state(),
4491         )?);
4492 
4493         Ok(snapshot)
4494     }
4495 
4496     fn restore(&mut self, snapshot: Snapshot) -> std::result::Result<(), MigratableError> {
4497         // Let's first restore the DeviceManager.
4498 
4499         self.set_state(&snapshot.to_state(DEVICE_MANAGER_SNAPSHOT_ID)?);
4500 
4501         // Now that DeviceManager is updated with the right states, it's time
4502         // to create the devices based on the configuration.
4503         self.create_devices(None, None, None)
4504             .map_err(|e| MigratableError::Restore(anyhow!("Could not create devices {:?}", e)))?;
4505 
4506         Ok(())
4507     }
4508 }
4509 
4510 impl Transportable for DeviceManager {}
4511 
4512 impl Migratable for DeviceManager {
4513     fn start_dirty_log(&mut self) -> std::result::Result<(), MigratableError> {
4514         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
4515             if let Some(migratable) = &device_node.migratable {
4516                 migratable.lock().unwrap().start_dirty_log()?;
4517             }
4518         }
4519         Ok(())
4520     }
4521 
4522     fn stop_dirty_log(&mut self) -> std::result::Result<(), MigratableError> {
4523         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
4524             if let Some(migratable) = &device_node.migratable {
4525                 migratable.lock().unwrap().stop_dirty_log()?;
4526             }
4527         }
4528         Ok(())
4529     }
4530 
4531     fn dirty_log(&mut self) -> std::result::Result<MemoryRangeTable, MigratableError> {
4532         let mut tables = Vec::new();
4533         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
4534             if let Some(migratable) = &device_node.migratable {
4535                 tables.push(migratable.lock().unwrap().dirty_log()?);
4536             }
4537         }
4538         Ok(MemoryRangeTable::new_from_tables(tables))
4539     }
4540 
4541     fn start_migration(&mut self) -> std::result::Result<(), MigratableError> {
4542         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
4543             if let Some(migratable) = &device_node.migratable {
4544                 migratable.lock().unwrap().start_migration()?;
4545             }
4546         }
4547         Ok(())
4548     }
4549 
4550     fn complete_migration(&mut self) -> std::result::Result<(), MigratableError> {
4551         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
4552             if let Some(migratable) = &device_node.migratable {
4553                 migratable.lock().unwrap().complete_migration()?;
4554             }
4555         }
4556         Ok(())
4557     }
4558 }
4559 
4560 const PCIU_FIELD_OFFSET: u64 = 0;
4561 const PCID_FIELD_OFFSET: u64 = 4;
4562 const B0EJ_FIELD_OFFSET: u64 = 8;
4563 const PSEG_FIELD_OFFSET: u64 = 12;
4564 const PCIU_FIELD_SIZE: usize = 4;
4565 const PCID_FIELD_SIZE: usize = 4;
4566 const B0EJ_FIELD_SIZE: usize = 4;
4567 const PSEG_FIELD_SIZE: usize = 4;
4568 
4569 impl BusDevice for DeviceManager {
4570     fn read(&mut self, base: u64, offset: u64, data: &mut [u8]) {
4571         match offset {
4572             PCIU_FIELD_OFFSET => {
4573                 assert!(data.len() == PCIU_FIELD_SIZE);
4574                 data.copy_from_slice(
4575                     &self.pci_segments[self.selected_segment]
4576                         .pci_devices_up
4577                         .to_le_bytes(),
4578                 );
4579                 // Clear the PCIU bitmap
4580                 self.pci_segments[self.selected_segment].pci_devices_up = 0;
4581             }
4582             PCID_FIELD_OFFSET => {
4583                 assert!(data.len() == PCID_FIELD_SIZE);
4584                 data.copy_from_slice(
4585                     &self.pci_segments[self.selected_segment]
4586                         .pci_devices_down
4587                         .to_le_bytes(),
4588                 );
4589                 // Clear the PCID bitmap
4590                 self.pci_segments[self.selected_segment].pci_devices_down = 0;
4591             }
4592             B0EJ_FIELD_OFFSET => {
4593                 assert!(data.len() == B0EJ_FIELD_SIZE);
4594                 // Always return an empty bitmap since the eject is always
4595                 // taken care of right away during a write access.
4596                 data.fill(0);
4597             }
4598             PSEG_FIELD_OFFSET => {
4599                 assert_eq!(data.len(), PSEG_FIELD_SIZE);
4600                 data.copy_from_slice(&(self.selected_segment as u32).to_le_bytes());
4601             }
4602             _ => error!(
4603                 "Accessing unknown location at base 0x{:x}, offset 0x{:x}",
4604                 base, offset
4605             ),
4606         }
4607 
4608         debug!(
4609             "PCI_HP_REG_R: base 0x{:x}, offset 0x{:x}, data {:?}",
4610             base, offset, data
4611         )
4612     }
4613 
4614     fn write(&mut self, base: u64, offset: u64, data: &[u8]) -> Option<Arc<std::sync::Barrier>> {
4615         match offset {
4616             B0EJ_FIELD_OFFSET => {
4617                 assert!(data.len() == B0EJ_FIELD_SIZE);
4618                 let mut data_array: [u8; 4] = [0, 0, 0, 0];
4619                 data_array.copy_from_slice(data);
4620                 let mut slot_bitmap = u32::from_le_bytes(data_array);
4621 
4622                 while slot_bitmap > 0 {
4623                     let slot_id = slot_bitmap.trailing_zeros();
4624                     if let Err(e) = self.eject_device(self.selected_segment as u16, slot_id as u8) {
4625                         error!("Failed ejecting device {}: {:?}", slot_id, e);
4626                     }
4627                     slot_bitmap &= !(1 << slot_id);
4628                 }
4629             }
4630             PSEG_FIELD_OFFSET => {
4631                 assert_eq!(data.len(), PSEG_FIELD_SIZE);
4632                 let mut data_array: [u8; 4] = [0, 0, 0, 0];
4633                 data_array.copy_from_slice(data);
4634                 let selected_segment = u32::from_le_bytes(data_array) as usize;
4635                 if selected_segment >= self.pci_segments.len() {
4636                     error!(
4637                         "Segment selection out of range: {} >= {}",
4638                         selected_segment,
4639                         self.pci_segments.len()
4640                     );
4641                     return None;
4642                 }
4643                 self.selected_segment = selected_segment;
4644             }
4645             _ => error!(
4646                 "Accessing unknown location at base 0x{:x}, offset 0x{:x}",
4647                 base, offset
4648             ),
4649         }
4650 
4651         debug!(
4652             "PCI_HP_REG_W: base 0x{:x}, offset 0x{:x}, data {:?}",
4653             base, offset, data
4654         );
4655 
4656         None
4657     }
4658 }
4659 
4660 impl Drop for DeviceManager {
4661     fn drop(&mut self) {
4662         for handle in self.virtio_devices.drain(..) {
4663             handle.virtio_device.lock().unwrap().shutdown();
4664         }
4665     }
4666 }
4667