xref: /cloud-hypervisor/vmm/src/device_manager.rs (revision f67b3f79ea19c9a66e04074cbbf5d292f6529e43)
1 // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
2 //
3 // Portions Copyright 2017 The Chromium OS Authors. All rights reserved.
4 // Use of this source code is governed by a BSD-style license that can be
5 // found in the LICENSE-BSD-3-Clause file.
6 //
7 // Copyright © 2019 Intel Corporation
8 //
9 // SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause
10 //
11 
12 use crate::config::{
13     ConsoleOutputMode, DeviceConfig, DiskConfig, FsConfig, NetConfig, PmemConfig, UserDeviceConfig,
14     VhostMode, VmConfig, VsockConfig,
15 };
16 use crate::device_tree::{DeviceNode, DeviceTree};
17 #[cfg(feature = "kvm")]
18 use crate::interrupt::kvm::KvmMsiInterruptManager as MsiInterruptManager;
19 #[cfg(feature = "mshv")]
20 use crate::interrupt::mshv::MshvMsiInterruptManager as MsiInterruptManager;
21 use crate::interrupt::LegacyUserspaceInterruptManager;
22 #[cfg(feature = "acpi")]
23 use crate::memory_manager::MEMORY_MANAGER_ACPI_SIZE;
24 use crate::memory_manager::{Error as MemoryManagerError, MemoryManager};
25 use crate::seccomp_filters::{get_seccomp_filter, Thread};
26 use crate::serial_buffer::SerialBuffer;
27 use crate::sigwinch_listener::start_sigwinch_listener;
28 use crate::GuestRegionMmap;
29 use crate::PciDeviceInfo;
30 use crate::{device_node, DEVICE_MANAGER_SNAPSHOT_ID};
31 #[cfg(feature = "acpi")]
32 use acpi_tables::{aml, aml::Aml};
33 use anyhow::anyhow;
34 #[cfg(target_arch = "aarch64")]
35 use arch::aarch64::gic::gicv3_its::kvm::KvmGicV3Its;
36 #[cfg(feature = "acpi")]
37 use arch::layout;
38 #[cfg(target_arch = "x86_64")]
39 use arch::layout::{APIC_START, IOAPIC_SIZE, IOAPIC_START};
40 #[cfg(any(target_arch = "aarch64", feature = "acpi"))]
41 use arch::NumaNodes;
42 #[cfg(target_arch = "aarch64")]
43 use arch::{DeviceType, MmioDeviceInfo};
44 use block_util::{
45     async_io::DiskFile, block_io_uring_is_supported, detect_image_type,
46     fixed_vhd_async::FixedVhdDiskAsync, fixed_vhd_sync::FixedVhdDiskSync, qcow_sync::QcowDiskSync,
47     raw_async::RawFileDisk, raw_sync::RawFileDiskSync, vhdx_sync::VhdxDiskSync, ImageType,
48 };
49 #[cfg(target_arch = "aarch64")]
50 use devices::gic;
51 #[cfg(target_arch = "x86_64")]
52 use devices::ioapic;
53 #[cfg(target_arch = "aarch64")]
54 use devices::legacy::Pl011;
55 #[cfg(target_arch = "x86_64")]
56 use devices::legacy::Serial;
57 use devices::{
58     interrupt_controller, interrupt_controller::InterruptController, AcpiNotificationFlags,
59 };
60 #[cfg(feature = "kvm")]
61 use hypervisor::kvm_ioctls::*;
62 use hypervisor::DeviceFd;
63 #[cfg(feature = "mshv")]
64 use hypervisor::IoEventAddress;
65 use libc::{
66     isatty, tcgetattr, tcsetattr, termios, ECHO, ICANON, ISIG, MAP_NORESERVE, MAP_PRIVATE,
67     MAP_SHARED, O_TMPFILE, PROT_READ, PROT_WRITE, TCSANOW,
68 };
69 use pci::VfioPciDevice;
70 use pci::{
71     DeviceRelocation, PciBarRegionType, PciBus, PciConfigIo, PciConfigMmio, PciDevice, PciRoot,
72     VfioUserPciDevice, VfioUserPciDeviceError,
73 };
74 use seccompiler::SeccompAction;
75 use std::collections::HashMap;
76 use std::convert::TryInto;
77 use std::fs::{read_link, File, OpenOptions};
78 use std::io::{self, stdout, Seek, SeekFrom};
79 use std::mem::zeroed;
80 use std::num::Wrapping;
81 use std::os::unix::fs::OpenOptionsExt;
82 use std::os::unix::io::{AsRawFd, FromRawFd, RawFd};
83 use std::path::PathBuf;
84 use std::result;
85 use std::sync::{Arc, Barrier, Mutex};
86 #[cfg(feature = "acpi")]
87 use uuid::Uuid;
88 use vfio_ioctls::{VfioContainer, VfioDevice};
89 use virtio_devices::transport::VirtioPciDevice;
90 use virtio_devices::transport::VirtioTransport;
91 use virtio_devices::vhost_user::VhostUserConfig;
92 use virtio_devices::{DmaRemapping, Endpoint, IommuMapping};
93 use virtio_devices::{VirtioSharedMemory, VirtioSharedMemoryList};
94 use vm_allocator::SystemAllocator;
95 use vm_device::dma_mapping::vfio::VfioDmaMapping;
96 use vm_device::interrupt::{
97     InterruptIndex, InterruptManager, LegacyIrqGroupConfig, MsiIrqGroupConfig,
98 };
99 use vm_device::{Bus, BusDevice, Resource};
100 use vm_memory::guest_memory::FileOffset;
101 use vm_memory::GuestMemoryRegion;
102 use vm_memory::{Address, GuestAddress, GuestUsize, MmapRegion};
103 #[cfg(all(target_arch = "x86_64", feature = "cmos"))]
104 use vm_memory::{GuestAddressSpace, GuestMemory};
105 use vm_migration::{
106     protocol::MemoryRangeTable, Migratable, MigratableError, Pausable, Snapshot,
107     SnapshotDataSection, Snapshottable, Transportable,
108 };
109 use vm_virtio::{VirtioDeviceType, VirtioIommuRemapping};
110 use vmm_sys_util::eventfd::EventFd;
111 
112 #[cfg(target_arch = "aarch64")]
113 const MMIO_LEN: u64 = 0x1000;
114 
115 const VFIO_DEVICE_NAME_PREFIX: &str = "_vfio";
116 
117 const VFIO_USER_DEVICE_NAME_PREFIX: &str = "_vfio_user";
118 
119 #[cfg(target_arch = "x86_64")]
120 const IOAPIC_DEVICE_NAME: &str = "_ioapic";
121 
122 const SERIAL_DEVICE_NAME_PREFIX: &str = "_serial";
123 #[cfg(target_arch = "aarch64")]
124 const GPIO_DEVICE_NAME_PREFIX: &str = "_gpio";
125 
126 const CONSOLE_DEVICE_NAME: &str = "_console";
127 const DISK_DEVICE_NAME_PREFIX: &str = "_disk";
128 const FS_DEVICE_NAME_PREFIX: &str = "_fs";
129 const MEM_DEVICE_NAME_PREFIX: &str = "_mem";
130 const BALLOON_DEVICE_NAME: &str = "_balloon";
131 const NET_DEVICE_NAME_PREFIX: &str = "_net";
132 const PMEM_DEVICE_NAME_PREFIX: &str = "_pmem";
133 const RNG_DEVICE_NAME: &str = "_rng";
134 const VSOCK_DEVICE_NAME_PREFIX: &str = "_vsock";
135 const WATCHDOG_DEVICE_NAME: &str = "_watchdog";
136 
137 const IOMMU_DEVICE_NAME: &str = "_iommu";
138 
139 const VIRTIO_PCI_DEVICE_NAME_PREFIX: &str = "_virtio-pci";
140 
141 /// Errors associated with device manager
142 #[derive(Debug)]
143 pub enum DeviceManagerError {
144     /// Cannot create EventFd.
145     EventFd(io::Error),
146 
147     /// Cannot open disk path
148     Disk(io::Error),
149 
150     /// Cannot create vhost-user-net device
151     CreateVhostUserNet(virtio_devices::vhost_user::Error),
152 
153     /// Cannot create virtio-blk device
154     CreateVirtioBlock(io::Error),
155 
156     /// Cannot create virtio-net device
157     CreateVirtioNet(virtio_devices::net::Error),
158 
159     /// Cannot create virtio-console device
160     CreateVirtioConsole(io::Error),
161 
162     /// Cannot create virtio-rng device
163     CreateVirtioRng(io::Error),
164 
165     /// Cannot create virtio-fs device
166     CreateVirtioFs(virtio_devices::vhost_user::Error),
167 
168     /// Virtio-fs device was created without a socket.
169     NoVirtioFsSock,
170 
171     /// Cannot create vhost-user-blk device
172     CreateVhostUserBlk(virtio_devices::vhost_user::Error),
173 
174     /// Cannot create virtio-pmem device
175     CreateVirtioPmem(io::Error),
176 
177     /// Cannot create virtio-vsock device
178     CreateVirtioVsock(io::Error),
179 
180     /// Failed converting Path to &str for the virtio-vsock device.
181     CreateVsockConvertPath,
182 
183     /// Cannot create virtio-vsock backend
184     CreateVsockBackend(virtio_devices::vsock::VsockUnixError),
185 
186     /// Cannot create virtio-iommu device
187     CreateVirtioIommu(io::Error),
188 
189     /// Cannot create virtio-balloon device
190     CreateVirtioBalloon(io::Error),
191 
192     /// Cannot create virtio-watchdog device
193     CreateVirtioWatchdog(io::Error),
194 
195     /// Failed parsing disk image format
196     DetectImageType(io::Error),
197 
198     /// Cannot open qcow disk path
199     QcowDeviceCreate(qcow::Error),
200 
201     /// Cannot open tap interface
202     OpenTap(net_util::TapError),
203 
204     /// Cannot allocate IRQ.
205     AllocateIrq,
206 
207     /// Cannot configure the IRQ.
208     Irq(vmm_sys_util::errno::Error),
209 
210     /// Cannot allocate PCI BARs
211     AllocateBars(pci::PciDeviceError),
212 
213     /// Could not free the BARs associated with a PCI device.
214     FreePciBars(pci::PciDeviceError),
215 
216     /// Cannot register ioevent.
217     RegisterIoevent(anyhow::Error),
218 
219     /// Cannot unregister ioevent.
220     UnRegisterIoevent(anyhow::Error),
221 
222     /// Cannot create virtio device
223     VirtioDevice(vmm_sys_util::errno::Error),
224 
225     /// Cannot add PCI device
226     AddPciDevice(pci::PciRootError),
227 
228     /// Cannot open persistent memory file
229     PmemFileOpen(io::Error),
230 
231     /// Cannot set persistent memory file size
232     PmemFileSetLen(io::Error),
233 
234     /// Cannot find a memory range for persistent memory
235     PmemRangeAllocation,
236 
237     /// Cannot find a memory range for virtio-fs
238     FsRangeAllocation,
239 
240     /// Error creating serial output file
241     SerialOutputFileOpen(io::Error),
242 
243     /// Error creating console output file
244     ConsoleOutputFileOpen(io::Error),
245 
246     /// Error creating serial pty
247     SerialPtyOpen(io::Error),
248 
249     /// Error creating console pty
250     ConsolePtyOpen(io::Error),
251 
252     /// Error setting pty raw mode
253     SetPtyRaw(vmm_sys_util::errno::Error),
254 
255     /// Error getting pty peer
256     GetPtyPeer(vmm_sys_util::errno::Error),
257 
258     /// Cannot create a VFIO device
259     VfioCreate(vfio_ioctls::VfioError),
260 
261     /// Cannot create a VFIO PCI device
262     VfioPciCreate(pci::VfioPciError),
263 
264     /// Failed to map VFIO MMIO region.
265     VfioMapRegion(pci::VfioPciError),
266 
267     /// Failed to DMA map VFIO device.
268     VfioDmaMap(vfio_ioctls::VfioError),
269 
270     /// Failed to DMA unmap VFIO device.
271     VfioDmaUnmap(pci::VfioPciError),
272 
273     /// Failed to create the passthrough device.
274     CreatePassthroughDevice(anyhow::Error),
275 
276     /// Failed to memory map.
277     Mmap(io::Error),
278 
279     /// Cannot add legacy device to Bus.
280     BusError(vm_device::BusError),
281 
282     /// Failed to allocate IO port
283     AllocateIoPort,
284 
285     /// Failed to allocate MMIO address
286     AllocateMmioAddress,
287 
288     // Failed to make hotplug notification
289     HotPlugNotification(io::Error),
290 
291     // Error from a memory manager operation
292     MemoryManager(MemoryManagerError),
293 
294     /// Failed to create new interrupt source group.
295     CreateInterruptGroup(io::Error),
296 
297     /// Failed to update interrupt source group.
298     UpdateInterruptGroup(io::Error),
299 
300     /// Failed creating interrupt controller.
301     CreateInterruptController(interrupt_controller::Error),
302 
303     /// Failed creating a new MmapRegion instance.
304     NewMmapRegion(vm_memory::mmap::MmapRegionError),
305 
306     /// Failed cloning a File.
307     CloneFile(io::Error),
308 
309     /// Failed to create socket file
310     CreateSocketFile(io::Error),
311 
312     /// Failed to spawn the network backend
313     SpawnNetBackend(io::Error),
314 
315     /// Failed to spawn the block backend
316     SpawnBlockBackend(io::Error),
317 
318     /// Missing PCI bus.
319     NoPciBus,
320 
321     /// Could not find an available device name.
322     NoAvailableDeviceName,
323 
324     /// Missing PCI device.
325     MissingPciDevice,
326 
327     /// Failed removing a PCI device from the PCI bus.
328     RemoveDeviceFromPciBus(pci::PciRootError),
329 
330     /// Failed removing a bus device from the IO bus.
331     RemoveDeviceFromIoBus(vm_device::BusError),
332 
333     /// Failed removing a bus device from the MMIO bus.
334     RemoveDeviceFromMmioBus(vm_device::BusError),
335 
336     /// Failed to find the device corresponding to a specific PCI b/d/f.
337     UnknownPciBdf(u32),
338 
339     /// Not allowed to remove this type of device from the VM.
340     RemovalNotAllowed(vm_virtio::VirtioDeviceType),
341 
342     /// Failed to find device corresponding to the given identifier.
343     UnknownDeviceId(String),
344 
345     /// Failed to find an available PCI device ID.
346     NextPciDeviceId(pci::PciRootError),
347 
348     /// Could not reserve the PCI device ID.
349     GetPciDeviceId(pci::PciRootError),
350 
351     /// Could not give the PCI device ID back.
352     PutPciDeviceId(pci::PciRootError),
353 
354     /// Incorrect device ID as it is already used by another device.
355     DeviceIdAlreadyInUse,
356 
357     /// No disk path was specified when one was expected
358     NoDiskPath,
359 
360     /// Failed updating guest memory for virtio device.
361     UpdateMemoryForVirtioDevice(virtio_devices::Error),
362 
363     /// Cannot create virtio-mem device
364     CreateVirtioMem(io::Error),
365 
366     /// Cannot generate a ResizeSender from the Resize object.
367     CreateResizeSender(virtio_devices::mem::Error),
368 
369     /// Cannot find a memory range for virtio-mem memory
370     VirtioMemRangeAllocation,
371 
372     /// Failed updating guest memory for VFIO PCI device.
373     UpdateMemoryForVfioPciDevice(vfio_ioctls::VfioError),
374 
375     /// Trying to use a directory for pmem but no size specified
376     PmemWithDirectorySizeMissing,
377 
378     /// Trying to use a size that is not multiple of 2MiB
379     PmemSizeNotAligned,
380 
381     /// Could not find the node in the device tree.
382     MissingNode,
383 
384     /// Resource was already found.
385     ResourceAlreadyExists,
386 
387     /// Expected resources for virtio-pci could not be found.
388     MissingVirtioPciResources,
389 
390     /// Expected resources for virtio-pmem could not be found.
391     MissingVirtioPmemResources,
392 
393     /// Missing PCI b/d/f from the DeviceNode.
394     MissingDeviceNodePciBdf,
395 
396     /// No support for device passthrough
397     NoDevicePassthroughSupport,
398 
399     /// Failed to resize virtio-balloon
400     VirtioBalloonResize(virtio_devices::balloon::Error),
401 
402     /// Missing virtio-balloon, can't proceed as expected.
403     MissingVirtioBalloon,
404 
405     /// Missing virtual IOMMU device
406     MissingVirtualIommu,
407 
408     /// Failed to do power button notification
409     PowerButtonNotification(io::Error),
410 
411     /// Failed to do AArch64 GPIO power button notification
412     #[cfg(target_arch = "aarch64")]
413     AArch64PowerButtonNotification(devices::legacy::GpioDeviceError),
414 
415     /// Failed to set O_DIRECT flag to file descriptor
416     SetDirectIo,
417 
418     /// Failed to create FixedVhdDiskAsync
419     CreateFixedVhdDiskAsync(io::Error),
420 
421     /// Failed to create FixedVhdDiskSync
422     CreateFixedVhdDiskSync(io::Error),
423 
424     /// Failed to create QcowDiskSync
425     CreateQcowDiskSync(qcow::Error),
426 
427     /// Failed to create FixedVhdxDiskSync
428     CreateFixedVhdxDiskSync(vhdx::vhdx::VhdxError),
429 
430     /// Failed adding DMA mapping handler to virtio-mem device.
431     AddDmaMappingHandlerVirtioMem(virtio_devices::mem::Error),
432 
433     /// Failed removing DMA mapping handler from virtio-mem device.
434     RemoveDmaMappingHandlerVirtioMem(virtio_devices::mem::Error),
435 
436     /// Failed to create VFIO user device
437     VfioUserCreate(VfioUserPciDeviceError),
438 
439     /// Failed to map region from VFIO user device into guest
440     VfioUserMapRegion(VfioUserPciDeviceError),
441 
442     /// Failed to DMA map VFIO user device.
443     VfioUserDmaMap(VfioUserPciDeviceError),
444 
445     /// Failed to DMA unmap VFIO user device.
446     VfioUserDmaUnmap(VfioUserPciDeviceError),
447 
448     /// Failed to update memory mappings for VFIO user device
449     UpdateMemoryForVfioUserPciDevice(VfioUserPciDeviceError),
450 }
451 pub type DeviceManagerResult<T> = result::Result<T, DeviceManagerError>;
452 
453 type VirtioDeviceArc = Arc<Mutex<dyn virtio_devices::VirtioDevice>>;
454 
455 #[cfg(feature = "acpi")]
456 const DEVICE_MANAGER_ACPI_SIZE: usize = 0x10;
457 
458 const TIOCSPTLCK: libc::c_int = 0x4004_5431;
459 const TIOCGTPEER: libc::c_int = 0x5441;
460 
461 pub fn create_pty(non_blocking: bool) -> io::Result<(File, File, PathBuf)> {
462     // Try to use /dev/pts/ptmx first then fall back to /dev/ptmx
463     // This is done to try and use the devpts filesystem that
464     // could be available for use in the process's namespace first.
465     // Ideally these are all the same file though but different
466     // kernels could have things setup differently.
467     // See https://www.kernel.org/doc/Documentation/filesystems/devpts.txt
468     // for further details.
469 
470     let custom_flags = libc::O_NOCTTY | if non_blocking { libc::O_NONBLOCK } else { 0 };
471     let main = match OpenOptions::new()
472         .read(true)
473         .write(true)
474         .custom_flags(custom_flags)
475         .open("/dev/pts/ptmx")
476     {
477         Ok(f) => f,
478         _ => OpenOptions::new()
479             .read(true)
480             .write(true)
481             .custom_flags(custom_flags)
482             .open("/dev/ptmx")?,
483     };
484     let mut unlock: libc::c_ulong = 0;
485     unsafe {
486         libc::ioctl(
487             main.as_raw_fd(),
488             TIOCSPTLCK.try_into().unwrap(),
489             &mut unlock,
490         )
491     };
492 
493     let sub_fd = unsafe {
494         libc::ioctl(
495             main.as_raw_fd(),
496             TIOCGTPEER.try_into().unwrap(),
497             libc::O_NOCTTY | libc::O_RDWR,
498         )
499     };
500     if sub_fd == -1 {
501         return vmm_sys_util::errno::errno_result().map_err(|e| e.into());
502     }
503 
504     let proc_path = PathBuf::from(format!("/proc/self/fd/{}", sub_fd));
505     let path = read_link(proc_path)?;
506 
507     Ok((main, unsafe { File::from_raw_fd(sub_fd) }, path))
508 }
509 
510 #[derive(Default)]
511 pub struct Console {
512     #[cfg(target_arch = "x86_64")]
513     // Serial port on 0x3f8
514     serial: Option<Arc<Mutex<Serial>>>,
515     #[cfg(target_arch = "aarch64")]
516     serial: Option<Arc<Mutex<Pl011>>>,
517     console_resizer: Option<Arc<virtio_devices::ConsoleResizer>>,
518 }
519 
520 impl Console {
521     pub fn queue_input_bytes_serial(&self, out: &[u8]) -> vmm_sys_util::errno::Result<()> {
522         if self.serial.is_some() {
523             self.serial
524                 .as_ref()
525                 .unwrap()
526                 .lock()
527                 .unwrap()
528                 .queue_input_bytes(out)?;
529         }
530         Ok(())
531     }
532 
533     pub fn update_console_size(&self) {
534         if let Some(resizer) = self.console_resizer.as_ref() {
535             resizer.update_console_size()
536         }
537     }
538 }
539 
540 struct AddressManager {
541     allocator: Arc<Mutex<SystemAllocator>>,
542     #[cfg(target_arch = "x86_64")]
543     io_bus: Arc<Bus>,
544     mmio_bus: Arc<Bus>,
545     vm: Arc<dyn hypervisor::Vm>,
546     device_tree: Arc<Mutex<DeviceTree>>,
547 }
548 
549 impl DeviceRelocation for AddressManager {
550     fn move_bar(
551         &self,
552         old_base: u64,
553         new_base: u64,
554         len: u64,
555         pci_dev: &mut dyn PciDevice,
556         region_type: PciBarRegionType,
557     ) -> std::result::Result<(), std::io::Error> {
558         match region_type {
559             PciBarRegionType::IoRegion => {
560                 #[cfg(target_arch = "x86_64")]
561                 {
562                     // Update system allocator
563                     self.allocator
564                         .lock()
565                         .unwrap()
566                         .free_io_addresses(GuestAddress(old_base), len as GuestUsize);
567 
568                     self.allocator
569                         .lock()
570                         .unwrap()
571                         .allocate_io_addresses(
572                             Some(GuestAddress(new_base)),
573                             len as GuestUsize,
574                             None,
575                         )
576                         .ok_or_else(|| {
577                             io::Error::new(io::ErrorKind::Other, "failed allocating new IO range")
578                         })?;
579 
580                     // Update PIO bus
581                     self.io_bus
582                         .update_range(old_base, len, new_base, len)
583                         .map_err(|e| io::Error::new(io::ErrorKind::Other, e))?;
584                 }
585                 #[cfg(target_arch = "aarch64")]
586                 error!("I/O region is not supported");
587             }
588             PciBarRegionType::Memory32BitRegion | PciBarRegionType::Memory64BitRegion => {
589                 // Update system allocator
590                 if region_type == PciBarRegionType::Memory32BitRegion {
591                     self.allocator
592                         .lock()
593                         .unwrap()
594                         .free_mmio_hole_addresses(GuestAddress(old_base), len as GuestUsize);
595 
596                     self.allocator
597                         .lock()
598                         .unwrap()
599                         .allocate_mmio_hole_addresses(
600                             Some(GuestAddress(new_base)),
601                             len as GuestUsize,
602                             None,
603                         )
604                         .ok_or_else(|| {
605                             io::Error::new(
606                                 io::ErrorKind::Other,
607                                 "failed allocating new 32 bits MMIO range",
608                             )
609                         })?;
610                 } else {
611                     self.allocator
612                         .lock()
613                         .unwrap()
614                         .free_mmio_addresses(GuestAddress(old_base), len as GuestUsize);
615 
616                     self.allocator
617                         .lock()
618                         .unwrap()
619                         .allocate_mmio_addresses(
620                             Some(GuestAddress(new_base)),
621                             len as GuestUsize,
622                             None,
623                         )
624                         .ok_or_else(|| {
625                             io::Error::new(
626                                 io::ErrorKind::Other,
627                                 "failed allocating new 64 bits MMIO range",
628                             )
629                         })?;
630                 }
631 
632                 // Update MMIO bus
633                 self.mmio_bus
634                     .update_range(old_base, len, new_base, len)
635                     .map_err(|e| io::Error::new(io::ErrorKind::Other, e))?;
636             }
637         }
638 
639         let any_dev = pci_dev.as_any();
640         if let Some(virtio_pci_dev) = any_dev.downcast_ref::<VirtioPciDevice>() {
641             // Update the device_tree resources associated with the device
642             if let Some(node) = self
643                 .device_tree
644                 .lock()
645                 .unwrap()
646                 .get_mut(&virtio_pci_dev.id())
647             {
648                 let mut resource_updated = false;
649                 for resource in node.resources.iter_mut() {
650                     if let Resource::MmioAddressRange { base, .. } = resource {
651                         if *base == old_base {
652                             *base = new_base;
653                             resource_updated = true;
654                             break;
655                         }
656                     }
657                 }
658 
659                 if !resource_updated {
660                     return Err(io::Error::new(
661                         io::ErrorKind::Other,
662                         format!(
663                             "Couldn't find a resource with base 0x{:x} for device {}",
664                             old_base,
665                             virtio_pci_dev.id()
666                         ),
667                     ));
668                 }
669             } else {
670                 return Err(io::Error::new(
671                     io::ErrorKind::Other,
672                     format!(
673                         "Couldn't find device {} from device tree",
674                         virtio_pci_dev.id()
675                     ),
676                 ));
677             }
678 
679             let bar_addr = virtio_pci_dev.config_bar_addr();
680             if bar_addr == new_base {
681                 for (event, addr) in virtio_pci_dev.ioeventfds(old_base) {
682                     let io_addr = IoEventAddress::Mmio(addr);
683                     self.vm.unregister_ioevent(event, &io_addr).map_err(|e| {
684                         io::Error::new(
685                             io::ErrorKind::Other,
686                             format!("failed to unregister ioevent: {:?}", e),
687                         )
688                     })?;
689                 }
690                 for (event, addr) in virtio_pci_dev.ioeventfds(new_base) {
691                     let io_addr = IoEventAddress::Mmio(addr);
692                     self.vm
693                         .register_ioevent(event, &io_addr, None)
694                         .map_err(|e| {
695                             io::Error::new(
696                                 io::ErrorKind::Other,
697                                 format!("failed to register ioevent: {:?}", e),
698                             )
699                         })?;
700                 }
701             } else {
702                 let virtio_dev = virtio_pci_dev.virtio_device();
703                 let mut virtio_dev = virtio_dev.lock().unwrap();
704                 if let Some(mut shm_regions) = virtio_dev.get_shm_regions() {
705                     if shm_regions.addr.raw_value() == old_base {
706                         let mem_region = self.vm.make_user_memory_region(
707                             shm_regions.mem_slot,
708                             old_base,
709                             shm_regions.len,
710                             shm_regions.host_addr,
711                             false,
712                             false,
713                         );
714 
715                         self.vm.remove_user_memory_region(mem_region).map_err(|e| {
716                             io::Error::new(
717                                 io::ErrorKind::Other,
718                                 format!("failed to remove user memory region: {:?}", e),
719                             )
720                         })?;
721 
722                         // Create new mapping by inserting new region to KVM.
723                         let mem_region = self.vm.make_user_memory_region(
724                             shm_regions.mem_slot,
725                             new_base,
726                             shm_regions.len,
727                             shm_regions.host_addr,
728                             false,
729                             false,
730                         );
731 
732                         self.vm.create_user_memory_region(mem_region).map_err(|e| {
733                             io::Error::new(
734                                 io::ErrorKind::Other,
735                                 format!("failed to create user memory regions: {:?}", e),
736                             )
737                         })?;
738 
739                         // Update shared memory regions to reflect the new mapping.
740                         shm_regions.addr = GuestAddress(new_base);
741                         virtio_dev.set_shm_regions(shm_regions).map_err(|e| {
742                             io::Error::new(
743                                 io::ErrorKind::Other,
744                                 format!("failed to update shared memory regions: {:?}", e),
745                             )
746                         })?;
747                     }
748                 }
749             }
750         }
751 
752         pci_dev.move_bar(old_base, new_base)
753     }
754 }
755 
756 #[derive(Serialize, Deserialize)]
757 struct DeviceManagerState {
758     device_tree: DeviceTree,
759     device_id_cnt: Wrapping<usize>,
760 }
761 
762 #[derive(Debug)]
763 pub struct PtyPair {
764     pub main: File,
765     pub sub: File,
766     pub path: PathBuf,
767 }
768 
769 impl PtyPair {
770     fn clone(&self) -> Self {
771         PtyPair {
772             main: self.main.try_clone().unwrap(),
773             sub: self.sub.try_clone().unwrap(),
774             path: self.path.clone(),
775         }
776     }
777 }
778 
779 #[derive(Clone)]
780 pub enum PciDeviceHandle {
781     Vfio(Arc<Mutex<VfioPciDevice>>),
782     Virtio(Arc<Mutex<VirtioPciDevice>>),
783     VfioUser(Arc<Mutex<VfioUserPciDevice>>),
784 }
785 
786 pub struct DeviceManager {
787     // Manage address space related to devices
788     address_manager: Arc<AddressManager>,
789 
790     // Console abstraction
791     console: Arc<Console>,
792 
793     // console PTY
794     console_pty: Option<Arc<Mutex<PtyPair>>>,
795 
796     // serial PTY
797     serial_pty: Option<Arc<Mutex<PtyPair>>>,
798 
799     // pty foreground status,
800     console_resize_pipe: Option<Arc<File>>,
801 
802     // Interrupt controller
803     #[cfg(target_arch = "x86_64")]
804     interrupt_controller: Option<Arc<Mutex<ioapic::Ioapic>>>,
805     #[cfg(target_arch = "aarch64")]
806     interrupt_controller: Option<Arc<Mutex<gic::Gic>>>,
807 
808     // Things to be added to the commandline (i.e. for virtio-mmio)
809     cmdline_additions: Vec<String>,
810 
811     // ACPI GED notification device
812     #[cfg(feature = "acpi")]
813     ged_notification_device: Option<Arc<Mutex<devices::AcpiGedDevice>>>,
814 
815     // VM configuration
816     config: Arc<Mutex<VmConfig>>,
817 
818     // Memory Manager
819     memory_manager: Arc<Mutex<MemoryManager>>,
820 
821     // The virtio devices on the system
822     virtio_devices: Vec<(VirtioDeviceArc, bool, String)>,
823 
824     // List of bus devices
825     // Let the DeviceManager keep strong references to the BusDevice devices.
826     // This allows the IO and MMIO buses to be provided with Weak references,
827     // which prevents cyclic dependencies.
828     bus_devices: Vec<Arc<Mutex<dyn BusDevice>>>,
829 
830     // Counter to keep track of the consumed device IDs.
831     device_id_cnt: Wrapping<usize>,
832 
833     // Keep a reference to the PCI bus
834     pci_bus: Option<Arc<Mutex<PciBus>>>,
835 
836     #[cfg_attr(target_arch = "aarch64", allow(dead_code))]
837     // MSI Interrupt Manager
838     msi_interrupt_manager: Arc<dyn InterruptManager<GroupConfig = MsiIrqGroupConfig>>,
839 
840     #[cfg_attr(feature = "mshv", allow(dead_code))]
841     // Legacy Interrupt Manager
842     legacy_interrupt_manager: Option<Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>>,
843 
844     // Passthrough device handle
845     passthrough_device: Option<Arc<dyn hypervisor::Device>>,
846 
847     // VFIO container
848     // Only one container can be created, therefore it is stored as part of the
849     // DeviceManager to be reused.
850     vfio_container: Option<Arc<VfioContainer>>,
851 
852     // Paravirtualized IOMMU
853     iommu_device: Option<Arc<Mutex<virtio_devices::Iommu>>>,
854 
855     // PCI information about devices attached to the paravirtualized IOMMU
856     // It contains the virtual IOMMU PCI BDF along with the list of PCI BDF
857     // representing the devices attached to the virtual IOMMU. This is useful
858     // information for filling the ACPI VIOT table.
859     iommu_attached_devices: Option<(u32, Vec<u32>)>,
860 
861     // Bitmap of PCI devices to hotplug.
862     pci_devices_up: u32,
863 
864     // Bitmap of PCI devices to hotunplug.
865     pci_devices_down: u32,
866 
867     // List of allocated IRQs for each PCI slot.
868     pci_irq_slots: [u8; 32],
869 
870     // Tree of devices, representing the dependencies between devices.
871     // Useful for introspection, snapshot and restore.
872     device_tree: Arc<Mutex<DeviceTree>>,
873 
874     // Exit event
875     exit_evt: EventFd,
876     reset_evt: EventFd,
877 
878     #[cfg(target_arch = "aarch64")]
879     id_to_dev_info: HashMap<(DeviceType, String), MmioDeviceInfo>,
880 
881     // seccomp action
882     seccomp_action: SeccompAction,
883 
884     // List of guest NUMA nodes.
885     #[cfg(any(target_arch = "aarch64", feature = "acpi"))]
886     numa_nodes: NumaNodes,
887 
888     // Possible handle to the virtio-balloon device
889     balloon: Option<Arc<Mutex<virtio_devices::Balloon>>>,
890 
891     // Virtio Device activation EventFd to allow the VMM thread to trigger device
892     // activation and thus start the threads from the VMM thread
893     activate_evt: EventFd,
894 
895     #[cfg(feature = "acpi")]
896     acpi_address: GuestAddress,
897 
898     // Possible handle to the virtio-balloon device
899     virtio_mem_devices: Vec<Arc<Mutex<virtio_devices::Mem>>>,
900 
901     #[cfg(target_arch = "aarch64")]
902     // GPIO device for AArch64
903     gpio_device: Option<Arc<Mutex<devices::legacy::Gpio>>>,
904 
905     // Flag to force setting the iommu on virtio devices
906     force_iommu: bool,
907 
908     // Helps identify if the VM is currently being restored
909     restoring: bool,
910 }
911 
912 impl DeviceManager {
913     #[allow(clippy::too_many_arguments)]
914     pub fn new(
915         vm: Arc<dyn hypervisor::Vm>,
916         config: Arc<Mutex<VmConfig>>,
917         memory_manager: Arc<Mutex<MemoryManager>>,
918         exit_evt: &EventFd,
919         reset_evt: &EventFd,
920         seccomp_action: SeccompAction,
921         #[cfg(any(target_arch = "aarch64", feature = "acpi"))] numa_nodes: NumaNodes,
922         activate_evt: &EventFd,
923         force_iommu: bool,
924         restoring: bool,
925     ) -> DeviceManagerResult<Arc<Mutex<Self>>> {
926         let device_tree = Arc::new(Mutex::new(DeviceTree::new()));
927 
928         let address_manager = Arc::new(AddressManager {
929             allocator: memory_manager.lock().unwrap().allocator(),
930             #[cfg(target_arch = "x86_64")]
931             io_bus: Arc::new(Bus::new()),
932             mmio_bus: Arc::new(Bus::new()),
933             vm: vm.clone(),
934             device_tree: Arc::clone(&device_tree),
935         });
936 
937         // First we create the MSI interrupt manager, the legacy one is created
938         // later, after the IOAPIC device creation.
939         // The reason we create the MSI one first is because the IOAPIC needs it,
940         // and then the legacy interrupt manager needs an IOAPIC. So we're
941         // handling a linear dependency chain:
942         // msi_interrupt_manager <- IOAPIC <- legacy_interrupt_manager.
943         let msi_interrupt_manager: Arc<dyn InterruptManager<GroupConfig = MsiIrqGroupConfig>> =
944             Arc::new(MsiInterruptManager::new(
945                 Arc::clone(&address_manager.allocator),
946                 vm,
947             ));
948 
949         #[cfg(feature = "acpi")]
950         let acpi_address = address_manager
951             .allocator
952             .lock()
953             .unwrap()
954             .allocate_mmio_addresses(None, DEVICE_MANAGER_ACPI_SIZE as u64, None)
955             .ok_or(DeviceManagerError::AllocateIoPort)?;
956         let device_manager = DeviceManager {
957             address_manager: Arc::clone(&address_manager),
958             console: Arc::new(Console::default()),
959             interrupt_controller: None,
960             cmdline_additions: Vec::new(),
961             #[cfg(feature = "acpi")]
962             ged_notification_device: None,
963             config,
964             memory_manager,
965             virtio_devices: Vec::new(),
966             bus_devices: Vec::new(),
967             device_id_cnt: Wrapping(0),
968             pci_bus: None,
969             msi_interrupt_manager,
970             legacy_interrupt_manager: None,
971             passthrough_device: None,
972             vfio_container: None,
973             iommu_device: None,
974             iommu_attached_devices: None,
975             pci_devices_up: 0,
976             pci_devices_down: 0,
977             pci_irq_slots: [0; 32],
978             device_tree,
979             exit_evt: exit_evt.try_clone().map_err(DeviceManagerError::EventFd)?,
980             reset_evt: reset_evt.try_clone().map_err(DeviceManagerError::EventFd)?,
981             #[cfg(target_arch = "aarch64")]
982             id_to_dev_info: HashMap::new(),
983             seccomp_action,
984             #[cfg(any(target_arch = "aarch64", feature = "acpi"))]
985             numa_nodes,
986             balloon: None,
987             activate_evt: activate_evt
988                 .try_clone()
989                 .map_err(DeviceManagerError::EventFd)?,
990             #[cfg(feature = "acpi")]
991             acpi_address,
992             serial_pty: None,
993             console_pty: None,
994             console_resize_pipe: None,
995             virtio_mem_devices: Vec::new(),
996             #[cfg(target_arch = "aarch64")]
997             gpio_device: None,
998             force_iommu,
999             restoring,
1000         };
1001 
1002         let device_manager = Arc::new(Mutex::new(device_manager));
1003 
1004         #[cfg(feature = "acpi")]
1005         address_manager
1006             .mmio_bus
1007             .insert(
1008                 Arc::clone(&device_manager) as Arc<Mutex<dyn BusDevice>>,
1009                 acpi_address.0,
1010                 DEVICE_MANAGER_ACPI_SIZE as u64,
1011             )
1012             .map_err(DeviceManagerError::BusError)?;
1013 
1014         Ok(device_manager)
1015     }
1016 
1017     pub fn serial_pty(&self) -> Option<PtyPair> {
1018         self.serial_pty
1019             .as_ref()
1020             .map(|pty| pty.lock().unwrap().clone())
1021     }
1022 
1023     pub fn console_pty(&self) -> Option<PtyPair> {
1024         self.console_pty
1025             .as_ref()
1026             .map(|pty| pty.lock().unwrap().clone())
1027     }
1028 
1029     pub fn console_resize_pipe(&self) -> Option<Arc<File>> {
1030         self.console_resize_pipe.as_ref().map(Arc::clone)
1031     }
1032 
1033     pub fn create_devices(
1034         &mut self,
1035         serial_pty: Option<PtyPair>,
1036         console_pty: Option<PtyPair>,
1037         console_resize_pipe: Option<File>,
1038     ) -> DeviceManagerResult<()> {
1039         let mut virtio_devices: Vec<(VirtioDeviceArc, bool, String)> = Vec::new();
1040 
1041         let interrupt_controller = self.add_interrupt_controller()?;
1042 
1043         // Now we can create the legacy interrupt manager, which needs the freshly
1044         // formed IOAPIC device.
1045         let legacy_interrupt_manager: Arc<
1046             dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>,
1047         > = Arc::new(LegacyUserspaceInterruptManager::new(Arc::clone(
1048             &interrupt_controller,
1049         )));
1050 
1051         #[cfg(feature = "acpi")]
1052         {
1053             let memory_manager_acpi_address = self.memory_manager.lock().unwrap().acpi_address;
1054             self.address_manager
1055                 .mmio_bus
1056                 .insert(
1057                     Arc::clone(&self.memory_manager) as Arc<Mutex<dyn BusDevice>>,
1058                     memory_manager_acpi_address.0,
1059                     MEMORY_MANAGER_ACPI_SIZE as u64,
1060                 )
1061                 .map_err(DeviceManagerError::BusError)?;
1062         }
1063 
1064         #[cfg(target_arch = "x86_64")]
1065         self.add_legacy_devices(
1066             self.reset_evt
1067                 .try_clone()
1068                 .map_err(DeviceManagerError::EventFd)?,
1069         )?;
1070 
1071         #[cfg(target_arch = "aarch64")]
1072         self.add_legacy_devices(&legacy_interrupt_manager)?;
1073 
1074         #[cfg(feature = "acpi")]
1075         {
1076             self.ged_notification_device = self.add_acpi_devices(
1077                 &legacy_interrupt_manager,
1078                 self.reset_evt
1079                     .try_clone()
1080                     .map_err(DeviceManagerError::EventFd)?,
1081                 self.exit_evt
1082                     .try_clone()
1083                     .map_err(DeviceManagerError::EventFd)?,
1084             )?;
1085         }
1086 
1087         self.console = self.add_console_device(
1088             &legacy_interrupt_manager,
1089             &mut virtio_devices,
1090             serial_pty,
1091             console_pty,
1092             console_resize_pipe,
1093         )?;
1094 
1095         // Reserve some IRQs for PCI devices in case they need to support INTx.
1096         self.reserve_legacy_interrupts_for_pci_devices()?;
1097 
1098         self.legacy_interrupt_manager = Some(legacy_interrupt_manager);
1099 
1100         virtio_devices.append(&mut self.make_virtio_devices()?);
1101 
1102         self.add_pci_devices(virtio_devices.clone())?;
1103 
1104         self.virtio_devices = virtio_devices;
1105 
1106         Ok(())
1107     }
1108 
1109     fn reserve_legacy_interrupts_for_pci_devices(&mut self) -> DeviceManagerResult<()> {
1110         // Reserve 8 IRQs which will be shared across all PCI devices.
1111         let num_irqs = 8;
1112         let mut irqs: Vec<u8> = Vec::new();
1113         for _ in 0..num_irqs {
1114             irqs.push(
1115                 self.address_manager
1116                     .allocator
1117                     .lock()
1118                     .unwrap()
1119                     .allocate_irq()
1120                     .ok_or(DeviceManagerError::AllocateIrq)? as u8,
1121             );
1122         }
1123 
1124         // There are 32 devices on the PCI bus, let's assign them an IRQ.
1125         for i in 0..32 {
1126             self.pci_irq_slots[i] = irqs[(i % num_irqs) as usize];
1127         }
1128 
1129         Ok(())
1130     }
1131 
1132     fn state(&self) -> DeviceManagerState {
1133         DeviceManagerState {
1134             device_tree: self.device_tree.lock().unwrap().clone(),
1135             device_id_cnt: self.device_id_cnt,
1136         }
1137     }
1138 
1139     fn set_state(&mut self, state: &DeviceManagerState) {
1140         self.device_tree = Arc::new(Mutex::new(state.device_tree.clone()));
1141         self.device_id_cnt = state.device_id_cnt;
1142     }
1143 
1144     #[cfg(target_arch = "aarch64")]
1145     /// Gets the information of the devices registered up to some point in time.
1146     pub fn get_device_info(&self) -> &HashMap<(DeviceType, String), MmioDeviceInfo> {
1147         &self.id_to_dev_info
1148     }
1149 
1150     #[allow(unused_variables)]
1151     fn add_pci_devices(
1152         &mut self,
1153         virtio_devices: Vec<(VirtioDeviceArc, bool, String)>,
1154     ) -> DeviceManagerResult<()> {
1155         let pci_root = PciRoot::new(None);
1156         let mut pci_bus = PciBus::new(
1157             pci_root,
1158             Arc::clone(&self.address_manager) as Arc<dyn DeviceRelocation>,
1159         );
1160 
1161         let iommu_id = String::from(IOMMU_DEVICE_NAME);
1162 
1163         let (iommu_device, iommu_mapping) = if self.config.lock().unwrap().iommu {
1164             let (device, mapping) = virtio_devices::Iommu::new(
1165                 iommu_id.clone(),
1166                 self.seccomp_action.clone(),
1167                 self.exit_evt
1168                     .try_clone()
1169                     .map_err(DeviceManagerError::EventFd)?,
1170             )
1171             .map_err(DeviceManagerError::CreateVirtioIommu)?;
1172             let device = Arc::new(Mutex::new(device));
1173             self.iommu_device = Some(Arc::clone(&device));
1174 
1175             // Fill the device tree with a new node. In case of restore, we
1176             // know there is nothing to do, so we can simply override the
1177             // existing entry.
1178             self.device_tree
1179                 .lock()
1180                 .unwrap()
1181                 .insert(iommu_id.clone(), device_node!(iommu_id, device));
1182 
1183             (Some(device), Some(mapping))
1184         } else {
1185             (None, None)
1186         };
1187 
1188         let mut iommu_attached_devices = Vec::new();
1189 
1190         for (device, iommu_attached, id) in virtio_devices {
1191             let mapping: &Option<Arc<IommuMapping>> = if iommu_attached {
1192                 &iommu_mapping
1193             } else {
1194                 &None
1195             };
1196 
1197             let dev_id = self.add_virtio_pci_device(device, &mut pci_bus, mapping, id)?;
1198 
1199             if iommu_attached {
1200                 iommu_attached_devices.push(dev_id);
1201             }
1202         }
1203 
1204         let mut vfio_iommu_device_ids = self.add_vfio_devices(&mut pci_bus)?;
1205         iommu_attached_devices.append(&mut vfio_iommu_device_ids);
1206 
1207         let mut vfio_user_iommu_device_ids = self.add_user_devices(&mut pci_bus)?;
1208         iommu_attached_devices.append(&mut vfio_user_iommu_device_ids);
1209 
1210         if let Some(iommu_device) = iommu_device {
1211             let dev_id = self.add_virtio_pci_device(iommu_device, &mut pci_bus, &None, iommu_id)?;
1212             self.iommu_attached_devices = Some((dev_id, iommu_attached_devices));
1213         }
1214 
1215         let pci_bus = Arc::new(Mutex::new(pci_bus));
1216         let pci_config_io = Arc::new(Mutex::new(PciConfigIo::new(Arc::clone(&pci_bus))));
1217         self.bus_devices
1218             .push(Arc::clone(&pci_config_io) as Arc<Mutex<dyn BusDevice>>);
1219         #[cfg(target_arch = "x86_64")]
1220         self.address_manager
1221             .io_bus
1222             .insert(pci_config_io, 0xcf8, 0x8)
1223             .map_err(DeviceManagerError::BusError)?;
1224         let pci_config_mmio = Arc::new(Mutex::new(PciConfigMmio::new(Arc::clone(&pci_bus))));
1225         self.bus_devices
1226             .push(Arc::clone(&pci_config_mmio) as Arc<Mutex<dyn BusDevice>>);
1227         self.address_manager
1228             .mmio_bus
1229             .insert(
1230                 pci_config_mmio,
1231                 arch::layout::PCI_MMCONFIG_START.0,
1232                 arch::layout::PCI_MMCONFIG_SIZE,
1233             )
1234             .map_err(DeviceManagerError::BusError)?;
1235 
1236         self.pci_bus = Some(pci_bus);
1237 
1238         Ok(())
1239     }
1240 
1241     #[cfg(target_arch = "aarch64")]
1242     fn add_interrupt_controller(
1243         &mut self,
1244     ) -> DeviceManagerResult<Arc<Mutex<dyn InterruptController>>> {
1245         let interrupt_controller: Arc<Mutex<gic::Gic>> = Arc::new(Mutex::new(
1246             gic::Gic::new(
1247                 self.config.lock().unwrap().cpus.boot_vcpus,
1248                 Arc::clone(&self.msi_interrupt_manager),
1249             )
1250             .map_err(DeviceManagerError::CreateInterruptController)?,
1251         ));
1252 
1253         self.interrupt_controller = Some(interrupt_controller.clone());
1254 
1255         // Unlike x86_64, the "interrupt_controller" here for AArch64 is only
1256         // a `Gic` object that implements the `InterruptController` to provide
1257         // interrupt delivery service. This is not the real GIC device so that
1258         // we do not need to insert it to the device tree.
1259 
1260         Ok(interrupt_controller)
1261     }
1262 
1263     #[cfg(target_arch = "aarch64")]
1264     pub fn get_interrupt_controller(&mut self) -> Option<&Arc<Mutex<gic::Gic>>> {
1265         self.interrupt_controller.as_ref()
1266     }
1267 
1268     #[cfg(target_arch = "x86_64")]
1269     fn add_interrupt_controller(
1270         &mut self,
1271     ) -> DeviceManagerResult<Arc<Mutex<dyn InterruptController>>> {
1272         let id = String::from(IOAPIC_DEVICE_NAME);
1273 
1274         // Create IOAPIC
1275         let interrupt_controller = Arc::new(Mutex::new(
1276             ioapic::Ioapic::new(
1277                 id.clone(),
1278                 APIC_START,
1279                 Arc::clone(&self.msi_interrupt_manager),
1280             )
1281             .map_err(DeviceManagerError::CreateInterruptController)?,
1282         ));
1283 
1284         self.interrupt_controller = Some(interrupt_controller.clone());
1285 
1286         self.address_manager
1287             .mmio_bus
1288             .insert(interrupt_controller.clone(), IOAPIC_START.0, IOAPIC_SIZE)
1289             .map_err(DeviceManagerError::BusError)?;
1290 
1291         self.bus_devices
1292             .push(Arc::clone(&interrupt_controller) as Arc<Mutex<dyn BusDevice>>);
1293 
1294         // Fill the device tree with a new node. In case of restore, we
1295         // know there is nothing to do, so we can simply override the
1296         // existing entry.
1297         self.device_tree
1298             .lock()
1299             .unwrap()
1300             .insert(id.clone(), device_node!(id, interrupt_controller));
1301 
1302         Ok(interrupt_controller)
1303     }
1304 
1305     #[cfg(feature = "acpi")]
1306     fn add_acpi_devices(
1307         &mut self,
1308         interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>,
1309         reset_evt: EventFd,
1310         exit_evt: EventFd,
1311     ) -> DeviceManagerResult<Option<Arc<Mutex<devices::AcpiGedDevice>>>> {
1312         let shutdown_device = Arc::new(Mutex::new(devices::AcpiShutdownDevice::new(
1313             exit_evt, reset_evt,
1314         )));
1315 
1316         self.bus_devices
1317             .push(Arc::clone(&shutdown_device) as Arc<Mutex<dyn BusDevice>>);
1318 
1319         #[cfg(target_arch = "x86_64")]
1320         {
1321             self.address_manager
1322                 .allocator
1323                 .lock()
1324                 .unwrap()
1325                 .allocate_io_addresses(Some(GuestAddress(0x3c0)), 0x8, None)
1326                 .ok_or(DeviceManagerError::AllocateIoPort)?;
1327 
1328             self.address_manager
1329                 .io_bus
1330                 .insert(shutdown_device, 0x3c0, 0x4)
1331                 .map_err(DeviceManagerError::BusError)?;
1332         }
1333 
1334         let ged_irq = self
1335             .address_manager
1336             .allocator
1337             .lock()
1338             .unwrap()
1339             .allocate_irq()
1340             .unwrap();
1341         let interrupt_group = interrupt_manager
1342             .create_group(LegacyIrqGroupConfig {
1343                 irq: ged_irq as InterruptIndex,
1344             })
1345             .map_err(DeviceManagerError::CreateInterruptGroup)?;
1346         let ged_address = self
1347             .address_manager
1348             .allocator
1349             .lock()
1350             .unwrap()
1351             .allocate_mmio_addresses(None, devices::acpi::GED_DEVICE_ACPI_SIZE as u64, None)
1352             .ok_or(DeviceManagerError::AllocateMmioAddress)?;
1353         let ged_device = Arc::new(Mutex::new(devices::AcpiGedDevice::new(
1354             interrupt_group,
1355             ged_irq,
1356             ged_address,
1357         )));
1358         self.address_manager
1359             .mmio_bus
1360             .insert(
1361                 ged_device.clone(),
1362                 ged_address.0,
1363                 devices::acpi::GED_DEVICE_ACPI_SIZE as u64,
1364             )
1365             .map_err(DeviceManagerError::BusError)?;
1366         self.bus_devices
1367             .push(Arc::clone(&ged_device) as Arc<Mutex<dyn BusDevice>>);
1368 
1369         let pm_timer_device = Arc::new(Mutex::new(devices::AcpiPmTimerDevice::new()));
1370 
1371         self.bus_devices
1372             .push(Arc::clone(&pm_timer_device) as Arc<Mutex<dyn BusDevice>>);
1373 
1374         #[cfg(target_arch = "x86_64")]
1375         {
1376             self.address_manager
1377                 .allocator
1378                 .lock()
1379                 .unwrap()
1380                 .allocate_io_addresses(Some(GuestAddress(0xb008)), 0x4, None)
1381                 .ok_or(DeviceManagerError::AllocateIoPort)?;
1382 
1383             self.address_manager
1384                 .io_bus
1385                 .insert(pm_timer_device, 0xb008, 0x4)
1386                 .map_err(DeviceManagerError::BusError)?;
1387         }
1388 
1389         Ok(Some(ged_device))
1390     }
1391 
1392     #[cfg(target_arch = "x86_64")]
1393     fn add_legacy_devices(&mut self, reset_evt: EventFd) -> DeviceManagerResult<()> {
1394         // Add a shutdown device (i8042)
1395         let i8042 = Arc::new(Mutex::new(devices::legacy::I8042Device::new(reset_evt)));
1396 
1397         self.bus_devices
1398             .push(Arc::clone(&i8042) as Arc<Mutex<dyn BusDevice>>);
1399 
1400         self.address_manager
1401             .io_bus
1402             .insert(i8042, 0x61, 0x4)
1403             .map_err(DeviceManagerError::BusError)?;
1404         #[cfg(feature = "cmos")]
1405         {
1406             // Add a CMOS emulated device
1407             let mem_size = self
1408                 .memory_manager
1409                 .lock()
1410                 .unwrap()
1411                 .guest_memory()
1412                 .memory()
1413                 .last_addr()
1414                 .0
1415                 + 1;
1416             let mem_below_4g = std::cmp::min(arch::layout::MEM_32BIT_RESERVED_START.0, mem_size);
1417             let mem_above_4g = mem_size.saturating_sub(arch::layout::RAM_64BIT_START.0);
1418 
1419             let cmos = Arc::new(Mutex::new(devices::legacy::Cmos::new(
1420                 mem_below_4g,
1421                 mem_above_4g,
1422             )));
1423 
1424             self.bus_devices
1425                 .push(Arc::clone(&cmos) as Arc<Mutex<dyn BusDevice>>);
1426 
1427             self.address_manager
1428                 .io_bus
1429                 .insert(cmos, 0x70, 0x2)
1430                 .map_err(DeviceManagerError::BusError)?;
1431         }
1432         #[cfg(feature = "fwdebug")]
1433         {
1434             let fwdebug = Arc::new(Mutex::new(devices::legacy::FwDebugDevice::new()));
1435 
1436             self.bus_devices
1437                 .push(Arc::clone(&fwdebug) as Arc<Mutex<dyn BusDevice>>);
1438 
1439             self.address_manager
1440                 .io_bus
1441                 .insert(fwdebug, 0x402, 0x1)
1442                 .map_err(DeviceManagerError::BusError)?;
1443         }
1444 
1445         Ok(())
1446     }
1447 
1448     #[cfg(target_arch = "aarch64")]
1449     fn add_legacy_devices(
1450         &mut self,
1451         interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>,
1452     ) -> DeviceManagerResult<()> {
1453         // Add a RTC device
1454         let rtc_irq = self
1455             .address_manager
1456             .allocator
1457             .lock()
1458             .unwrap()
1459             .allocate_irq()
1460             .unwrap();
1461 
1462         let interrupt_group = interrupt_manager
1463             .create_group(LegacyIrqGroupConfig {
1464                 irq: rtc_irq as InterruptIndex,
1465             })
1466             .map_err(DeviceManagerError::CreateInterruptGroup)?;
1467 
1468         let rtc_device = Arc::new(Mutex::new(devices::legacy::Rtc::new(interrupt_group)));
1469 
1470         self.bus_devices
1471             .push(Arc::clone(&rtc_device) as Arc<Mutex<dyn BusDevice>>);
1472 
1473         let addr = GuestAddress(arch::layout::LEGACY_RTC_MAPPED_IO_START);
1474 
1475         self.address_manager
1476             .mmio_bus
1477             .insert(rtc_device, addr.0, MMIO_LEN)
1478             .map_err(DeviceManagerError::BusError)?;
1479 
1480         self.id_to_dev_info.insert(
1481             (DeviceType::Rtc, "rtc".to_string()),
1482             MmioDeviceInfo {
1483                 addr: addr.0,
1484                 irq: rtc_irq,
1485             },
1486         );
1487 
1488         // Add a GPIO device
1489         let id = String::from(GPIO_DEVICE_NAME_PREFIX);
1490         let gpio_irq = self
1491             .address_manager
1492             .allocator
1493             .lock()
1494             .unwrap()
1495             .allocate_irq()
1496             .unwrap();
1497 
1498         let interrupt_group = interrupt_manager
1499             .create_group(LegacyIrqGroupConfig {
1500                 irq: gpio_irq as InterruptIndex,
1501             })
1502             .map_err(DeviceManagerError::CreateInterruptGroup)?;
1503 
1504         let gpio_device = Arc::new(Mutex::new(devices::legacy::Gpio::new(
1505             id.clone(),
1506             interrupt_group,
1507         )));
1508 
1509         self.bus_devices
1510             .push(Arc::clone(&gpio_device) as Arc<Mutex<dyn BusDevice>>);
1511 
1512         let addr = GuestAddress(arch::layout::LEGACY_GPIO_MAPPED_IO_START);
1513 
1514         self.address_manager
1515             .mmio_bus
1516             .insert(gpio_device.clone(), addr.0, MMIO_LEN)
1517             .map_err(DeviceManagerError::BusError)?;
1518 
1519         self.gpio_device = Some(gpio_device.clone());
1520 
1521         self.id_to_dev_info.insert(
1522             (DeviceType::Gpio, "gpio".to_string()),
1523             MmioDeviceInfo {
1524                 addr: addr.0,
1525                 irq: gpio_irq,
1526             },
1527         );
1528 
1529         self.device_tree
1530             .lock()
1531             .unwrap()
1532             .insert(id.clone(), device_node!(id, gpio_device));
1533 
1534         Ok(())
1535     }
1536 
1537     #[cfg(target_arch = "x86_64")]
1538     fn add_serial_device(
1539         &mut self,
1540         interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>,
1541         serial_writer: Option<Box<dyn io::Write + Send>>,
1542     ) -> DeviceManagerResult<Arc<Mutex<Serial>>> {
1543         // Serial is tied to IRQ #4
1544         let serial_irq = 4;
1545 
1546         let id = String::from(SERIAL_DEVICE_NAME_PREFIX);
1547 
1548         let interrupt_group = interrupt_manager
1549             .create_group(LegacyIrqGroupConfig {
1550                 irq: serial_irq as InterruptIndex,
1551             })
1552             .map_err(DeviceManagerError::CreateInterruptGroup)?;
1553 
1554         let serial = Arc::new(Mutex::new(Serial::new(
1555             id.clone(),
1556             interrupt_group,
1557             serial_writer,
1558         )));
1559 
1560         self.bus_devices
1561             .push(Arc::clone(&serial) as Arc<Mutex<dyn BusDevice>>);
1562 
1563         self.address_manager
1564             .allocator
1565             .lock()
1566             .unwrap()
1567             .allocate_io_addresses(Some(GuestAddress(0x3f8)), 0x8, None)
1568             .ok_or(DeviceManagerError::AllocateIoPort)?;
1569 
1570         self.address_manager
1571             .io_bus
1572             .insert(serial.clone(), 0x3f8, 0x8)
1573             .map_err(DeviceManagerError::BusError)?;
1574 
1575         // Fill the device tree with a new node. In case of restore, we
1576         // know there is nothing to do, so we can simply override the
1577         // existing entry.
1578         self.device_tree
1579             .lock()
1580             .unwrap()
1581             .insert(id.clone(), device_node!(id, serial));
1582 
1583         Ok(serial)
1584     }
1585 
1586     #[cfg(target_arch = "aarch64")]
1587     fn add_serial_device(
1588         &mut self,
1589         interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>,
1590         serial_writer: Option<Box<dyn io::Write + Send>>,
1591     ) -> DeviceManagerResult<Arc<Mutex<Pl011>>> {
1592         let id = String::from(SERIAL_DEVICE_NAME_PREFIX);
1593 
1594         let serial_irq = self
1595             .address_manager
1596             .allocator
1597             .lock()
1598             .unwrap()
1599             .allocate_irq()
1600             .unwrap();
1601 
1602         let interrupt_group = interrupt_manager
1603             .create_group(LegacyIrqGroupConfig {
1604                 irq: serial_irq as InterruptIndex,
1605             })
1606             .map_err(DeviceManagerError::CreateInterruptGroup)?;
1607 
1608         let serial = Arc::new(Mutex::new(devices::legacy::Pl011::new(
1609             id.clone(),
1610             interrupt_group,
1611             serial_writer,
1612         )));
1613 
1614         self.bus_devices
1615             .push(Arc::clone(&serial) as Arc<Mutex<dyn BusDevice>>);
1616 
1617         let addr = GuestAddress(arch::layout::LEGACY_SERIAL_MAPPED_IO_START);
1618 
1619         self.address_manager
1620             .mmio_bus
1621             .insert(serial.clone(), addr.0, MMIO_LEN)
1622             .map_err(DeviceManagerError::BusError)?;
1623 
1624         self.id_to_dev_info.insert(
1625             (DeviceType::Serial, DeviceType::Serial.to_string()),
1626             MmioDeviceInfo {
1627                 addr: addr.0,
1628                 irq: serial_irq,
1629             },
1630         );
1631 
1632         self.cmdline_additions
1633             .push(format!("earlycon=pl011,mmio,0x{:08x}", addr.0));
1634 
1635         // Fill the device tree with a new node. In case of restore, we
1636         // know there is nothing to do, so we can simply override the
1637         // existing entry.
1638         self.device_tree
1639             .lock()
1640             .unwrap()
1641             .insert(id.clone(), device_node!(id, serial));
1642 
1643         Ok(serial)
1644     }
1645 
1646     fn modify_mode<F: FnOnce(&mut termios)>(
1647         &self,
1648         fd: RawFd,
1649         f: F,
1650     ) -> vmm_sys_util::errno::Result<()> {
1651         // Safe because we check the return value of isatty.
1652         if unsafe { isatty(fd) } != 1 {
1653             return Ok(());
1654         }
1655 
1656         // The following pair are safe because termios gets totally overwritten by tcgetattr and we
1657         // check the return result.
1658         let mut termios: termios = unsafe { zeroed() };
1659         let ret = unsafe { tcgetattr(fd, &mut termios as *mut _) };
1660         if ret < 0 {
1661             return vmm_sys_util::errno::errno_result();
1662         }
1663         f(&mut termios);
1664         // Safe because the syscall will only read the extent of termios and we check the return result.
1665         let ret = unsafe { tcsetattr(fd, TCSANOW, &termios as *const _) };
1666         if ret < 0 {
1667             return vmm_sys_util::errno::errno_result();
1668         }
1669 
1670         Ok(())
1671     }
1672 
1673     fn set_raw_mode(&self, f: &mut File) -> vmm_sys_util::errno::Result<()> {
1674         self.modify_mode(f.as_raw_fd(), |t| t.c_lflag &= !(ICANON | ECHO | ISIG))
1675     }
1676 
1677     fn listen_for_sigwinch_on_tty(&mut self, pty: &File) -> std::io::Result<()> {
1678         let seccomp_filter =
1679             get_seccomp_filter(&self.seccomp_action, Thread::PtyForeground).unwrap();
1680 
1681         let pipe = start_sigwinch_listener(seccomp_filter, pty)?;
1682 
1683         self.console_resize_pipe = Some(Arc::new(pipe));
1684 
1685         Ok(())
1686     }
1687 
1688     fn add_virtio_console_device(
1689         &mut self,
1690         virtio_devices: &mut Vec<(VirtioDeviceArc, bool, String)>,
1691         console_pty: Option<PtyPair>,
1692         resize_pipe: Option<File>,
1693     ) -> DeviceManagerResult<Option<Arc<virtio_devices::ConsoleResizer>>> {
1694         let console_config = self.config.lock().unwrap().console.clone();
1695         let endpoint = match console_config.mode {
1696             ConsoleOutputMode::File => {
1697                 let file = File::create(console_config.file.as_ref().unwrap())
1698                     .map_err(DeviceManagerError::ConsoleOutputFileOpen)?;
1699                 Endpoint::File(file)
1700             }
1701             ConsoleOutputMode::Pty => {
1702                 if let Some(pty) = console_pty {
1703                     self.config.lock().unwrap().console.file = Some(pty.path.clone());
1704                     let file = pty.main.try_clone().unwrap();
1705                     self.console_pty = Some(Arc::new(Mutex::new(pty)));
1706                     self.console_resize_pipe = Some(Arc::new(resize_pipe.unwrap()));
1707                     Endpoint::FilePair(file.try_clone().unwrap(), file)
1708                 } else {
1709                     let (main, mut sub, path) =
1710                         create_pty(false).map_err(DeviceManagerError::ConsolePtyOpen)?;
1711                     self.set_raw_mode(&mut sub)
1712                         .map_err(DeviceManagerError::SetPtyRaw)?;
1713                     self.config.lock().unwrap().console.file = Some(path.clone());
1714                     let file = main.try_clone().unwrap();
1715                     assert!(resize_pipe.is_none());
1716                     self.listen_for_sigwinch_on_tty(&sub).unwrap();
1717                     self.console_pty = Some(Arc::new(Mutex::new(PtyPair { main, sub, path })));
1718                     Endpoint::FilePair(file.try_clone().unwrap(), file)
1719                 }
1720             }
1721             ConsoleOutputMode::Tty => {
1722                 // If an interactive TTY then we can accept input
1723                 if unsafe { libc::isatty(libc::STDIN_FILENO) == 1 } {
1724                     Endpoint::FilePair(
1725                         // Duplicating the file descriptors like this is needed as otherwise
1726                         // they will be closed on a reboot and the numbers reused
1727                         unsafe { File::from_raw_fd(libc::dup(libc::STDOUT_FILENO)) },
1728                         unsafe { File::from_raw_fd(libc::dup(libc::STDIN_FILENO)) },
1729                     )
1730                 } else {
1731                     Endpoint::File(unsafe { File::from_raw_fd(libc::dup(libc::STDOUT_FILENO)) })
1732                 }
1733             }
1734             ConsoleOutputMode::Null => Endpoint::Null,
1735             ConsoleOutputMode::Off => return Ok(None),
1736         };
1737         let id = String::from(CONSOLE_DEVICE_NAME);
1738 
1739         let (virtio_console_device, console_resizer) = virtio_devices::Console::new(
1740             id.clone(),
1741             endpoint,
1742             self.console_resize_pipe
1743                 .as_ref()
1744                 .map(|p| p.try_clone().unwrap()),
1745             self.force_iommu | console_config.iommu,
1746             self.seccomp_action.clone(),
1747             self.exit_evt
1748                 .try_clone()
1749                 .map_err(DeviceManagerError::EventFd)?,
1750         )
1751         .map_err(DeviceManagerError::CreateVirtioConsole)?;
1752         let virtio_console_device = Arc::new(Mutex::new(virtio_console_device));
1753         virtio_devices.push((
1754             Arc::clone(&virtio_console_device) as VirtioDeviceArc,
1755             console_config.iommu,
1756             id.clone(),
1757         ));
1758 
1759         // Fill the device tree with a new node. In case of restore, we
1760         // know there is nothing to do, so we can simply override the
1761         // existing entry.
1762         self.device_tree
1763             .lock()
1764             .unwrap()
1765             .insert(id.clone(), device_node!(id, virtio_console_device));
1766 
1767         // Only provide a resizer (for SIGWINCH handling) if the console is attached to the TTY
1768         Ok(if matches!(console_config.mode, ConsoleOutputMode::Tty) {
1769             Some(console_resizer)
1770         } else {
1771             None
1772         })
1773     }
1774 
1775     fn add_console_device(
1776         &mut self,
1777         interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>,
1778         virtio_devices: &mut Vec<(VirtioDeviceArc, bool, String)>,
1779         serial_pty: Option<PtyPair>,
1780         console_pty: Option<PtyPair>,
1781         console_resize_pipe: Option<File>,
1782     ) -> DeviceManagerResult<Arc<Console>> {
1783         let serial_config = self.config.lock().unwrap().serial.clone();
1784         let serial_writer: Option<Box<dyn io::Write + Send>> = match serial_config.mode {
1785             ConsoleOutputMode::File => Some(Box::new(
1786                 File::create(serial_config.file.as_ref().unwrap())
1787                     .map_err(DeviceManagerError::SerialOutputFileOpen)?,
1788             )),
1789             ConsoleOutputMode::Pty => {
1790                 if let Some(pty) = serial_pty {
1791                     self.config.lock().unwrap().serial.file = Some(pty.path.clone());
1792                     let writer = pty.main.try_clone().unwrap();
1793                     let buffer = SerialBuffer::new(Box::new(writer));
1794                     self.serial_pty = Some(Arc::new(Mutex::new(pty)));
1795                     Some(Box::new(buffer))
1796                 } else {
1797                     let (main, mut sub, path) =
1798                         create_pty(true).map_err(DeviceManagerError::SerialPtyOpen)?;
1799                     self.set_raw_mode(&mut sub)
1800                         .map_err(DeviceManagerError::SetPtyRaw)?;
1801                     self.config.lock().unwrap().serial.file = Some(path.clone());
1802                     let writer = main.try_clone().unwrap();
1803                     let buffer = SerialBuffer::new(Box::new(writer));
1804                     self.serial_pty = Some(Arc::new(Mutex::new(PtyPair { main, sub, path })));
1805                     Some(Box::new(buffer))
1806                 }
1807             }
1808             ConsoleOutputMode::Tty => Some(Box::new(stdout())),
1809             ConsoleOutputMode::Off | ConsoleOutputMode::Null => None,
1810         };
1811         let serial = if serial_config.mode != ConsoleOutputMode::Off {
1812             Some(self.add_serial_device(interrupt_manager, serial_writer)?)
1813         } else {
1814             None
1815         };
1816 
1817         let console_resizer =
1818             self.add_virtio_console_device(virtio_devices, console_pty, console_resize_pipe)?;
1819 
1820         Ok(Arc::new(Console {
1821             serial,
1822             console_resizer,
1823         }))
1824     }
1825 
1826     fn make_virtio_devices(&mut self) -> DeviceManagerResult<Vec<(VirtioDeviceArc, bool, String)>> {
1827         let mut devices: Vec<(VirtioDeviceArc, bool, String)> = Vec::new();
1828 
1829         // Create "standard" virtio devices (net/block/rng)
1830         devices.append(&mut self.make_virtio_block_devices()?);
1831         devices.append(&mut self.make_virtio_net_devices()?);
1832         devices.append(&mut self.make_virtio_rng_devices()?);
1833 
1834         // Add virtio-fs if required
1835         devices.append(&mut self.make_virtio_fs_devices()?);
1836 
1837         // Add virtio-pmem if required
1838         devices.append(&mut self.make_virtio_pmem_devices()?);
1839 
1840         // Add virtio-vsock if required
1841         devices.append(&mut self.make_virtio_vsock_devices()?);
1842 
1843         devices.append(&mut self.make_virtio_mem_devices()?);
1844 
1845         // Add virtio-balloon if required
1846         devices.append(&mut self.make_virtio_balloon_devices()?);
1847 
1848         // Add virtio-watchdog device
1849         devices.append(&mut self.make_virtio_watchdog_devices()?);
1850 
1851         Ok(devices)
1852     }
1853 
1854     fn make_virtio_block_device(
1855         &mut self,
1856         disk_cfg: &mut DiskConfig,
1857     ) -> DeviceManagerResult<(VirtioDeviceArc, bool, String)> {
1858         let id = if let Some(id) = &disk_cfg.id {
1859             id.clone()
1860         } else {
1861             let id = self.next_device_name(DISK_DEVICE_NAME_PREFIX)?;
1862             disk_cfg.id = Some(id.clone());
1863             id
1864         };
1865 
1866         info!("Creating virtio-block device: {:?}", disk_cfg);
1867 
1868         if disk_cfg.vhost_user {
1869             let socket = disk_cfg.vhost_socket.as_ref().unwrap().clone();
1870             let vu_cfg = VhostUserConfig {
1871                 socket,
1872                 num_queues: disk_cfg.num_queues,
1873                 queue_size: disk_cfg.queue_size,
1874             };
1875             let vhost_user_block_device = Arc::new(Mutex::new(
1876                 match virtio_devices::vhost_user::Blk::new(
1877                     id.clone(),
1878                     vu_cfg,
1879                     self.restoring,
1880                     self.seccomp_action.clone(),
1881                     self.exit_evt
1882                         .try_clone()
1883                         .map_err(DeviceManagerError::EventFd)?,
1884                 ) {
1885                     Ok(vub_device) => vub_device,
1886                     Err(e) => {
1887                         return Err(DeviceManagerError::CreateVhostUserBlk(e));
1888                     }
1889                 },
1890             ));
1891 
1892             // Fill the device tree with a new node. In case of restore, we
1893             // know there is nothing to do, so we can simply override the
1894             // existing entry.
1895             self.device_tree
1896                 .lock()
1897                 .unwrap()
1898                 .insert(id.clone(), device_node!(id, vhost_user_block_device));
1899 
1900             Ok((
1901                 Arc::clone(&vhost_user_block_device) as VirtioDeviceArc,
1902                 false,
1903                 id,
1904             ))
1905         } else {
1906             let mut options = OpenOptions::new();
1907             options.read(true);
1908             options.write(!disk_cfg.readonly);
1909             if disk_cfg.direct {
1910                 options.custom_flags(libc::O_DIRECT);
1911             }
1912             // Open block device path
1913             let mut file: File = options
1914                 .open(
1915                     disk_cfg
1916                         .path
1917                         .as_ref()
1918                         .ok_or(DeviceManagerError::NoDiskPath)?
1919                         .clone(),
1920                 )
1921                 .map_err(DeviceManagerError::Disk)?;
1922             let image_type =
1923                 detect_image_type(&mut file).map_err(DeviceManagerError::DetectImageType)?;
1924 
1925             let image = match image_type {
1926                 ImageType::FixedVhd => {
1927                     // Use asynchronous backend relying on io_uring if the
1928                     // syscalls are supported.
1929                     if block_io_uring_is_supported() && !disk_cfg.disable_io_uring {
1930                         info!("Using asynchronous fixed VHD disk file (io_uring)");
1931                         Box::new(
1932                             FixedVhdDiskAsync::new(file)
1933                                 .map_err(DeviceManagerError::CreateFixedVhdDiskAsync)?,
1934                         ) as Box<dyn DiskFile>
1935                     } else {
1936                         info!("Using synchronous fixed VHD disk file");
1937                         Box::new(
1938                             FixedVhdDiskSync::new(file)
1939                                 .map_err(DeviceManagerError::CreateFixedVhdDiskSync)?,
1940                         ) as Box<dyn DiskFile>
1941                     }
1942                 }
1943                 ImageType::Raw => {
1944                     // Use asynchronous backend relying on io_uring if the
1945                     // syscalls are supported.
1946                     if block_io_uring_is_supported() && !disk_cfg.disable_io_uring {
1947                         info!("Using asynchronous RAW disk file (io_uring)");
1948                         Box::new(RawFileDisk::new(file)) as Box<dyn DiskFile>
1949                     } else {
1950                         info!("Using synchronous RAW disk file");
1951                         Box::new(RawFileDiskSync::new(file)) as Box<dyn DiskFile>
1952                     }
1953                 }
1954                 ImageType::Qcow2 => {
1955                     info!("Using synchronous QCOW disk file");
1956                     Box::new(
1957                         QcowDiskSync::new(file, disk_cfg.direct)
1958                             .map_err(DeviceManagerError::CreateQcowDiskSync)?,
1959                     ) as Box<dyn DiskFile>
1960                 }
1961                 ImageType::Vhdx => {
1962                     info!("Using synchronous VHDX disk file");
1963                     Box::new(
1964                         VhdxDiskSync::new(file)
1965                             .map_err(DeviceManagerError::CreateFixedVhdxDiskSync)?,
1966                     ) as Box<dyn DiskFile>
1967                 }
1968             };
1969 
1970             let dev = Arc::new(Mutex::new(
1971                 virtio_devices::Block::new(
1972                     id.clone(),
1973                     image,
1974                     disk_cfg
1975                         .path
1976                         .as_ref()
1977                         .ok_or(DeviceManagerError::NoDiskPath)?
1978                         .clone(),
1979                     disk_cfg.readonly,
1980                     self.force_iommu | disk_cfg.iommu,
1981                     disk_cfg.num_queues,
1982                     disk_cfg.queue_size,
1983                     self.seccomp_action.clone(),
1984                     disk_cfg.rate_limiter_config,
1985                     self.exit_evt
1986                         .try_clone()
1987                         .map_err(DeviceManagerError::EventFd)?,
1988                 )
1989                 .map_err(DeviceManagerError::CreateVirtioBlock)?,
1990             ));
1991 
1992             let virtio_device = Arc::clone(&dev) as VirtioDeviceArc;
1993             let migratable_device = dev as Arc<Mutex<dyn Migratable>>;
1994 
1995             // Fill the device tree with a new node. In case of restore, we
1996             // know there is nothing to do, so we can simply override the
1997             // existing entry.
1998             self.device_tree
1999                 .lock()
2000                 .unwrap()
2001                 .insert(id.clone(), device_node!(id, migratable_device));
2002 
2003             Ok((virtio_device, disk_cfg.iommu, id))
2004         }
2005     }
2006 
2007     fn make_virtio_block_devices(
2008         &mut self,
2009     ) -> DeviceManagerResult<Vec<(VirtioDeviceArc, bool, String)>> {
2010         let mut devices = Vec::new();
2011 
2012         let mut block_devices = self.config.lock().unwrap().disks.clone();
2013         if let Some(disk_list_cfg) = &mut block_devices {
2014             for disk_cfg in disk_list_cfg.iter_mut() {
2015                 devices.push(self.make_virtio_block_device(disk_cfg)?);
2016             }
2017         }
2018         self.config.lock().unwrap().disks = block_devices;
2019 
2020         Ok(devices)
2021     }
2022 
2023     fn make_virtio_net_device(
2024         &mut self,
2025         net_cfg: &mut NetConfig,
2026     ) -> DeviceManagerResult<(VirtioDeviceArc, bool, String)> {
2027         let id = if let Some(id) = &net_cfg.id {
2028             id.clone()
2029         } else {
2030             let id = self.next_device_name(NET_DEVICE_NAME_PREFIX)?;
2031             net_cfg.id = Some(id.clone());
2032             id
2033         };
2034         info!("Creating virtio-net device: {:?}", net_cfg);
2035 
2036         if net_cfg.vhost_user {
2037             let socket = net_cfg.vhost_socket.as_ref().unwrap().clone();
2038             let vu_cfg = VhostUserConfig {
2039                 socket,
2040                 num_queues: net_cfg.num_queues,
2041                 queue_size: net_cfg.queue_size,
2042             };
2043             let server = match net_cfg.vhost_mode {
2044                 VhostMode::Client => false,
2045                 VhostMode::Server => true,
2046             };
2047             let vhost_user_net_device = Arc::new(Mutex::new(
2048                 match virtio_devices::vhost_user::Net::new(
2049                     id.clone(),
2050                     net_cfg.mac,
2051                     vu_cfg,
2052                     server,
2053                     self.seccomp_action.clone(),
2054                     self.restoring,
2055                     self.exit_evt
2056                         .try_clone()
2057                         .map_err(DeviceManagerError::EventFd)?,
2058                 ) {
2059                     Ok(vun_device) => vun_device,
2060                     Err(e) => {
2061                         return Err(DeviceManagerError::CreateVhostUserNet(e));
2062                     }
2063                 },
2064             ));
2065 
2066             // Fill the device tree with a new node. In case of restore, we
2067             // know there is nothing to do, so we can simply override the
2068             // existing entry.
2069             self.device_tree
2070                 .lock()
2071                 .unwrap()
2072                 .insert(id.clone(), device_node!(id, vhost_user_net_device));
2073 
2074             Ok((
2075                 Arc::clone(&vhost_user_net_device) as VirtioDeviceArc,
2076                 net_cfg.iommu,
2077                 id,
2078             ))
2079         } else {
2080             let virtio_net_device = if let Some(ref tap_if_name) = net_cfg.tap {
2081                 Arc::new(Mutex::new(
2082                     virtio_devices::Net::new(
2083                         id.clone(),
2084                         Some(tap_if_name),
2085                         None,
2086                         None,
2087                         Some(net_cfg.mac),
2088                         &mut net_cfg.host_mac,
2089                         self.force_iommu | net_cfg.iommu,
2090                         net_cfg.num_queues,
2091                         net_cfg.queue_size,
2092                         self.seccomp_action.clone(),
2093                         net_cfg.rate_limiter_config,
2094                         self.exit_evt
2095                             .try_clone()
2096                             .map_err(DeviceManagerError::EventFd)?,
2097                     )
2098                     .map_err(DeviceManagerError::CreateVirtioNet)?,
2099                 ))
2100             } else if let Some(fds) = &net_cfg.fds {
2101                 Arc::new(Mutex::new(
2102                     virtio_devices::Net::from_tap_fds(
2103                         id.clone(),
2104                         fds,
2105                         Some(net_cfg.mac),
2106                         self.force_iommu | net_cfg.iommu,
2107                         net_cfg.queue_size,
2108                         self.seccomp_action.clone(),
2109                         net_cfg.rate_limiter_config,
2110                         self.exit_evt
2111                             .try_clone()
2112                             .map_err(DeviceManagerError::EventFd)?,
2113                     )
2114                     .map_err(DeviceManagerError::CreateVirtioNet)?,
2115                 ))
2116             } else {
2117                 Arc::new(Mutex::new(
2118                     virtio_devices::Net::new(
2119                         id.clone(),
2120                         None,
2121                         Some(net_cfg.ip),
2122                         Some(net_cfg.mask),
2123                         Some(net_cfg.mac),
2124                         &mut net_cfg.host_mac,
2125                         self.force_iommu | net_cfg.iommu,
2126                         net_cfg.num_queues,
2127                         net_cfg.queue_size,
2128                         self.seccomp_action.clone(),
2129                         net_cfg.rate_limiter_config,
2130                         self.exit_evt
2131                             .try_clone()
2132                             .map_err(DeviceManagerError::EventFd)?,
2133                     )
2134                     .map_err(DeviceManagerError::CreateVirtioNet)?,
2135                 ))
2136             };
2137 
2138             // Fill the device tree with a new node. In case of restore, we
2139             // know there is nothing to do, so we can simply override the
2140             // existing entry.
2141             self.device_tree
2142                 .lock()
2143                 .unwrap()
2144                 .insert(id.clone(), device_node!(id, virtio_net_device));
2145 
2146             Ok((
2147                 Arc::clone(&virtio_net_device) as VirtioDeviceArc,
2148                 net_cfg.iommu,
2149                 id,
2150             ))
2151         }
2152     }
2153 
2154     /// Add virto-net and vhost-user-net devices
2155     fn make_virtio_net_devices(
2156         &mut self,
2157     ) -> DeviceManagerResult<Vec<(VirtioDeviceArc, bool, String)>> {
2158         let mut devices = Vec::new();
2159         let mut net_devices = self.config.lock().unwrap().net.clone();
2160         if let Some(net_list_cfg) = &mut net_devices {
2161             for net_cfg in net_list_cfg.iter_mut() {
2162                 devices.push(self.make_virtio_net_device(net_cfg)?);
2163             }
2164         }
2165         self.config.lock().unwrap().net = net_devices;
2166 
2167         Ok(devices)
2168     }
2169 
2170     fn make_virtio_rng_devices(
2171         &mut self,
2172     ) -> DeviceManagerResult<Vec<(VirtioDeviceArc, bool, String)>> {
2173         let mut devices = Vec::new();
2174 
2175         // Add virtio-rng if required
2176         let rng_config = self.config.lock().unwrap().rng.clone();
2177         if let Some(rng_path) = rng_config.src.to_str() {
2178             info!("Creating virtio-rng device: {:?}", rng_config);
2179             let id = String::from(RNG_DEVICE_NAME);
2180 
2181             let virtio_rng_device = Arc::new(Mutex::new(
2182                 virtio_devices::Rng::new(
2183                     id.clone(),
2184                     rng_path,
2185                     self.force_iommu | rng_config.iommu,
2186                     self.seccomp_action.clone(),
2187                     self.exit_evt
2188                         .try_clone()
2189                         .map_err(DeviceManagerError::EventFd)?,
2190                 )
2191                 .map_err(DeviceManagerError::CreateVirtioRng)?,
2192             ));
2193             devices.push((
2194                 Arc::clone(&virtio_rng_device) as VirtioDeviceArc,
2195                 rng_config.iommu,
2196                 id.clone(),
2197             ));
2198 
2199             // Fill the device tree with a new node. In case of restore, we
2200             // know there is nothing to do, so we can simply override the
2201             // existing entry.
2202             self.device_tree
2203                 .lock()
2204                 .unwrap()
2205                 .insert(id.clone(), device_node!(id, virtio_rng_device));
2206         }
2207 
2208         Ok(devices)
2209     }
2210 
2211     fn make_virtio_fs_device(
2212         &mut self,
2213         fs_cfg: &mut FsConfig,
2214     ) -> DeviceManagerResult<(VirtioDeviceArc, bool, String)> {
2215         let id = if let Some(id) = &fs_cfg.id {
2216             id.clone()
2217         } else {
2218             let id = self.next_device_name(FS_DEVICE_NAME_PREFIX)?;
2219             fs_cfg.id = Some(id.clone());
2220             id
2221         };
2222 
2223         info!("Creating virtio-fs device: {:?}", fs_cfg);
2224 
2225         let mut node = device_node!(id);
2226 
2227         // Look for the id in the device tree. If it can be found, that means
2228         // the device is being restored, otherwise it's created from scratch.
2229         let cache_range = if let Some(node) = self.device_tree.lock().unwrap().get(&id) {
2230             info!("Restoring virtio-fs {} resources", id);
2231 
2232             let mut cache_range: Option<(u64, u64)> = None;
2233             for resource in node.resources.iter() {
2234                 match resource {
2235                     Resource::MmioAddressRange { base, size } => {
2236                         if cache_range.is_some() {
2237                             return Err(DeviceManagerError::ResourceAlreadyExists);
2238                         }
2239 
2240                         cache_range = Some((*base, *size));
2241                     }
2242                     _ => {
2243                         error!("Unexpected resource {:?} for {}", resource, id);
2244                     }
2245                 }
2246             }
2247 
2248             cache_range
2249         } else {
2250             None
2251         };
2252 
2253         if let Some(fs_socket) = fs_cfg.socket.to_str() {
2254             let cache = if fs_cfg.dax {
2255                 let (cache_base, cache_size) = if let Some((base, size)) = cache_range {
2256                     // The memory needs to be 2MiB aligned in order to support
2257                     // hugepages.
2258                     self.address_manager
2259                         .allocator
2260                         .lock()
2261                         .unwrap()
2262                         .allocate_mmio_addresses(
2263                             Some(GuestAddress(base)),
2264                             size as GuestUsize,
2265                             Some(0x0020_0000),
2266                         )
2267                         .ok_or(DeviceManagerError::FsRangeAllocation)?;
2268 
2269                     (base, size)
2270                 } else {
2271                     let size = fs_cfg.cache_size;
2272                     // The memory needs to be 2MiB aligned in order to support
2273                     // hugepages.
2274                     let base = self
2275                         .address_manager
2276                         .allocator
2277                         .lock()
2278                         .unwrap()
2279                         .allocate_mmio_addresses(None, size as GuestUsize, Some(0x0020_0000))
2280                         .ok_or(DeviceManagerError::FsRangeAllocation)?;
2281 
2282                     (base.raw_value(), size)
2283                 };
2284 
2285                 // Update the node with correct resource information.
2286                 node.resources.push(Resource::MmioAddressRange {
2287                     base: cache_base,
2288                     size: cache_size,
2289                 });
2290 
2291                 let mmap_region = MmapRegion::build(
2292                     None,
2293                     cache_size as usize,
2294                     libc::PROT_NONE,
2295                     libc::MAP_ANONYMOUS | libc::MAP_PRIVATE,
2296                 )
2297                 .map_err(DeviceManagerError::NewMmapRegion)?;
2298                 let host_addr: u64 = mmap_region.as_ptr() as u64;
2299 
2300                 let mem_slot = self
2301                     .memory_manager
2302                     .lock()
2303                     .unwrap()
2304                     .create_userspace_mapping(
2305                         cache_base, cache_size, host_addr, false, false, false,
2306                     )
2307                     .map_err(DeviceManagerError::MemoryManager)?;
2308 
2309                 let region_list = vec![VirtioSharedMemory {
2310                     offset: 0,
2311                     len: cache_size,
2312                 }];
2313 
2314                 Some((
2315                     VirtioSharedMemoryList {
2316                         host_addr,
2317                         mem_slot,
2318                         addr: GuestAddress(cache_base),
2319                         len: cache_size as GuestUsize,
2320                         region_list,
2321                     },
2322                     mmap_region,
2323                 ))
2324             } else {
2325                 None
2326             };
2327 
2328             let virtio_fs_device = Arc::new(Mutex::new(
2329                 virtio_devices::vhost_user::Fs::new(
2330                     id.clone(),
2331                     fs_socket,
2332                     &fs_cfg.tag,
2333                     fs_cfg.num_queues,
2334                     fs_cfg.queue_size,
2335                     cache,
2336                     self.seccomp_action.clone(),
2337                     self.restoring,
2338                     self.exit_evt
2339                         .try_clone()
2340                         .map_err(DeviceManagerError::EventFd)?,
2341                 )
2342                 .map_err(DeviceManagerError::CreateVirtioFs)?,
2343             ));
2344 
2345             // Update the device tree with the migratable device.
2346             node.migratable = Some(Arc::clone(&virtio_fs_device) as Arc<Mutex<dyn Migratable>>);
2347             self.device_tree.lock().unwrap().insert(id.clone(), node);
2348 
2349             Ok((Arc::clone(&virtio_fs_device) as VirtioDeviceArc, false, id))
2350         } else {
2351             Err(DeviceManagerError::NoVirtioFsSock)
2352         }
2353     }
2354 
2355     fn make_virtio_fs_devices(
2356         &mut self,
2357     ) -> DeviceManagerResult<Vec<(VirtioDeviceArc, bool, String)>> {
2358         let mut devices = Vec::new();
2359 
2360         let mut fs_devices = self.config.lock().unwrap().fs.clone();
2361         if let Some(fs_list_cfg) = &mut fs_devices {
2362             for fs_cfg in fs_list_cfg.iter_mut() {
2363                 devices.push(self.make_virtio_fs_device(fs_cfg)?);
2364             }
2365         }
2366         self.config.lock().unwrap().fs = fs_devices;
2367 
2368         Ok(devices)
2369     }
2370 
2371     fn make_virtio_pmem_device(
2372         &mut self,
2373         pmem_cfg: &mut PmemConfig,
2374     ) -> DeviceManagerResult<(VirtioDeviceArc, bool, String)> {
2375         let id = if let Some(id) = &pmem_cfg.id {
2376             id.clone()
2377         } else {
2378             let id = self.next_device_name(PMEM_DEVICE_NAME_PREFIX)?;
2379             pmem_cfg.id = Some(id.clone());
2380             id
2381         };
2382 
2383         info!("Creating virtio-pmem device: {:?}", pmem_cfg);
2384 
2385         let mut node = device_node!(id);
2386 
2387         // Look for the id in the device tree. If it can be found, that means
2388         // the device is being restored, otherwise it's created from scratch.
2389         let region_range = if let Some(node) = self.device_tree.lock().unwrap().get(&id) {
2390             info!("Restoring virtio-pmem {} resources", id);
2391 
2392             let mut region_range: Option<(u64, u64)> = None;
2393             for resource in node.resources.iter() {
2394                 match resource {
2395                     Resource::MmioAddressRange { base, size } => {
2396                         if region_range.is_some() {
2397                             return Err(DeviceManagerError::ResourceAlreadyExists);
2398                         }
2399 
2400                         region_range = Some((*base, *size));
2401                     }
2402                     _ => {
2403                         error!("Unexpected resource {:?} for {}", resource, id);
2404                     }
2405                 }
2406             }
2407 
2408             if region_range.is_none() {
2409                 return Err(DeviceManagerError::MissingVirtioPmemResources);
2410             }
2411 
2412             region_range
2413         } else {
2414             None
2415         };
2416 
2417         let (custom_flags, set_len) = if pmem_cfg.file.is_dir() {
2418             if pmem_cfg.size.is_none() {
2419                 return Err(DeviceManagerError::PmemWithDirectorySizeMissing);
2420             }
2421             (O_TMPFILE, true)
2422         } else {
2423             (0, false)
2424         };
2425 
2426         let mut file = OpenOptions::new()
2427             .read(true)
2428             .write(!pmem_cfg.discard_writes)
2429             .custom_flags(custom_flags)
2430             .open(&pmem_cfg.file)
2431             .map_err(DeviceManagerError::PmemFileOpen)?;
2432 
2433         let size = if let Some(size) = pmem_cfg.size {
2434             if set_len {
2435                 file.set_len(size)
2436                     .map_err(DeviceManagerError::PmemFileSetLen)?;
2437             }
2438             size
2439         } else {
2440             file.seek(SeekFrom::End(0))
2441                 .map_err(DeviceManagerError::PmemFileSetLen)?
2442         };
2443 
2444         if size % 0x20_0000 != 0 {
2445             return Err(DeviceManagerError::PmemSizeNotAligned);
2446         }
2447 
2448         let (region_base, region_size) = if let Some((base, size)) = region_range {
2449             // The memory needs to be 2MiB aligned in order to support
2450             // hugepages.
2451             self.address_manager
2452                 .allocator
2453                 .lock()
2454                 .unwrap()
2455                 .allocate_mmio_addresses(
2456                     Some(GuestAddress(base)),
2457                     size as GuestUsize,
2458                     Some(0x0020_0000),
2459                 )
2460                 .ok_or(DeviceManagerError::PmemRangeAllocation)?;
2461 
2462             (base, size)
2463         } else {
2464             // The memory needs to be 2MiB aligned in order to support
2465             // hugepages.
2466             let base = self
2467                 .address_manager
2468                 .allocator
2469                 .lock()
2470                 .unwrap()
2471                 .allocate_mmio_addresses(None, size as GuestUsize, Some(0x0020_0000))
2472                 .ok_or(DeviceManagerError::PmemRangeAllocation)?;
2473 
2474             (base.raw_value(), size)
2475         };
2476 
2477         let cloned_file = file.try_clone().map_err(DeviceManagerError::CloneFile)?;
2478         let mmap_region = MmapRegion::build(
2479             Some(FileOffset::new(cloned_file, 0)),
2480             region_size as usize,
2481             PROT_READ | PROT_WRITE,
2482             MAP_NORESERVE
2483                 | if pmem_cfg.discard_writes {
2484                     MAP_PRIVATE
2485                 } else {
2486                     MAP_SHARED
2487                 },
2488         )
2489         .map_err(DeviceManagerError::NewMmapRegion)?;
2490         let host_addr: u64 = mmap_region.as_ptr() as u64;
2491 
2492         let mem_slot = self
2493             .memory_manager
2494             .lock()
2495             .unwrap()
2496             .create_userspace_mapping(
2497                 region_base,
2498                 region_size,
2499                 host_addr,
2500                 pmem_cfg.mergeable,
2501                 false,
2502                 false,
2503             )
2504             .map_err(DeviceManagerError::MemoryManager)?;
2505 
2506         let mapping = virtio_devices::UserspaceMapping {
2507             host_addr,
2508             mem_slot,
2509             addr: GuestAddress(region_base),
2510             len: region_size,
2511             mergeable: pmem_cfg.mergeable,
2512         };
2513 
2514         let virtio_pmem_device = Arc::new(Mutex::new(
2515             virtio_devices::Pmem::new(
2516                 id.clone(),
2517                 file,
2518                 GuestAddress(region_base),
2519                 mapping,
2520                 mmap_region,
2521                 self.force_iommu | pmem_cfg.iommu,
2522                 self.seccomp_action.clone(),
2523                 self.exit_evt
2524                     .try_clone()
2525                     .map_err(DeviceManagerError::EventFd)?,
2526             )
2527             .map_err(DeviceManagerError::CreateVirtioPmem)?,
2528         ));
2529 
2530         // Update the device tree with correct resource information and with
2531         // the migratable device.
2532         node.resources.push(Resource::MmioAddressRange {
2533             base: region_base,
2534             size: region_size,
2535         });
2536         node.migratable = Some(Arc::clone(&virtio_pmem_device) as Arc<Mutex<dyn Migratable>>);
2537         self.device_tree.lock().unwrap().insert(id.clone(), node);
2538 
2539         Ok((
2540             Arc::clone(&virtio_pmem_device) as VirtioDeviceArc,
2541             pmem_cfg.iommu,
2542             id,
2543         ))
2544     }
2545 
2546     fn make_virtio_pmem_devices(
2547         &mut self,
2548     ) -> DeviceManagerResult<Vec<(VirtioDeviceArc, bool, String)>> {
2549         let mut devices = Vec::new();
2550         // Add virtio-pmem if required
2551         let mut pmem_devices = self.config.lock().unwrap().pmem.clone();
2552         if let Some(pmem_list_cfg) = &mut pmem_devices {
2553             for pmem_cfg in pmem_list_cfg.iter_mut() {
2554                 devices.push(self.make_virtio_pmem_device(pmem_cfg)?);
2555             }
2556         }
2557         self.config.lock().unwrap().pmem = pmem_devices;
2558 
2559         Ok(devices)
2560     }
2561 
2562     fn make_virtio_vsock_device(
2563         &mut self,
2564         vsock_cfg: &mut VsockConfig,
2565     ) -> DeviceManagerResult<(VirtioDeviceArc, bool, String)> {
2566         let id = if let Some(id) = &vsock_cfg.id {
2567             id.clone()
2568         } else {
2569             let id = self.next_device_name(VSOCK_DEVICE_NAME_PREFIX)?;
2570             vsock_cfg.id = Some(id.clone());
2571             id
2572         };
2573 
2574         info!("Creating virtio-vsock device: {:?}", vsock_cfg);
2575 
2576         let socket_path = vsock_cfg
2577             .socket
2578             .to_str()
2579             .ok_or(DeviceManagerError::CreateVsockConvertPath)?;
2580         let backend =
2581             virtio_devices::vsock::VsockUnixBackend::new(vsock_cfg.cid, socket_path.to_string())
2582                 .map_err(DeviceManagerError::CreateVsockBackend)?;
2583 
2584         let vsock_device = Arc::new(Mutex::new(
2585             virtio_devices::Vsock::new(
2586                 id.clone(),
2587                 vsock_cfg.cid,
2588                 vsock_cfg.socket.clone(),
2589                 backend,
2590                 self.force_iommu | vsock_cfg.iommu,
2591                 self.seccomp_action.clone(),
2592                 self.exit_evt
2593                     .try_clone()
2594                     .map_err(DeviceManagerError::EventFd)?,
2595             )
2596             .map_err(DeviceManagerError::CreateVirtioVsock)?,
2597         ));
2598 
2599         // Fill the device tree with a new node. In case of restore, we
2600         // know there is nothing to do, so we can simply override the
2601         // existing entry.
2602         self.device_tree
2603             .lock()
2604             .unwrap()
2605             .insert(id.clone(), device_node!(id, vsock_device));
2606 
2607         Ok((
2608             Arc::clone(&vsock_device) as VirtioDeviceArc,
2609             vsock_cfg.iommu,
2610             id,
2611         ))
2612     }
2613 
2614     fn make_virtio_vsock_devices(
2615         &mut self,
2616     ) -> DeviceManagerResult<Vec<(VirtioDeviceArc, bool, String)>> {
2617         let mut devices = Vec::new();
2618 
2619         let mut vsock = self.config.lock().unwrap().vsock.clone();
2620         if let Some(ref mut vsock_cfg) = &mut vsock {
2621             devices.push(self.make_virtio_vsock_device(vsock_cfg)?);
2622         }
2623         self.config.lock().unwrap().vsock = vsock;
2624 
2625         Ok(devices)
2626     }
2627 
2628     fn make_virtio_mem_devices(
2629         &mut self,
2630     ) -> DeviceManagerResult<Vec<(VirtioDeviceArc, bool, String)>> {
2631         let mut devices = Vec::new();
2632 
2633         let mm = self.memory_manager.clone();
2634         let mm = mm.lock().unwrap();
2635         for (_memory_zone_id, memory_zone) in mm.memory_zones().iter() {
2636             if let Some(virtio_mem_zone) = memory_zone.virtio_mem_zone() {
2637                 let id = self.next_device_name(MEM_DEVICE_NAME_PREFIX)?;
2638                 info!("Creating virtio-mem device: id = {}", id);
2639 
2640                 #[cfg(all(target_arch = "x86_64", not(feature = "acpi")))]
2641                 let node_id: Option<u16> = None;
2642                 #[cfg(any(target_arch = "aarch64", feature = "acpi"))]
2643                 let node_id = numa_node_id_from_memory_zone_id(&self.numa_nodes, _memory_zone_id)
2644                     .map(|i| i as u16);
2645 
2646                 let virtio_mem_device = Arc::new(Mutex::new(
2647                     virtio_devices::Mem::new(
2648                         id.clone(),
2649                         virtio_mem_zone.region(),
2650                         virtio_mem_zone
2651                             .resize_handler()
2652                             .new_resize_sender()
2653                             .map_err(DeviceManagerError::CreateResizeSender)?,
2654                         self.seccomp_action.clone(),
2655                         node_id,
2656                         virtio_mem_zone.hotplugged_size(),
2657                         virtio_mem_zone.hugepages(),
2658                         self.exit_evt
2659                             .try_clone()
2660                             .map_err(DeviceManagerError::EventFd)?,
2661                     )
2662                     .map_err(DeviceManagerError::CreateVirtioMem)?,
2663                 ));
2664 
2665                 self.virtio_mem_devices.push(Arc::clone(&virtio_mem_device));
2666 
2667                 devices.push((
2668                     Arc::clone(&virtio_mem_device) as VirtioDeviceArc,
2669                     false,
2670                     id.clone(),
2671                 ));
2672 
2673                 // Fill the device tree with a new node. In case of restore, we
2674                 // know there is nothing to do, so we can simply override the
2675                 // existing entry.
2676                 self.device_tree
2677                     .lock()
2678                     .unwrap()
2679                     .insert(id.clone(), device_node!(id, virtio_mem_device));
2680             }
2681         }
2682 
2683         Ok(devices)
2684     }
2685 
2686     fn make_virtio_balloon_devices(
2687         &mut self,
2688     ) -> DeviceManagerResult<Vec<(VirtioDeviceArc, bool, String)>> {
2689         let mut devices = Vec::new();
2690 
2691         if let Some(balloon_config) = &self.config.lock().unwrap().balloon {
2692             let id = String::from(BALLOON_DEVICE_NAME);
2693             info!("Creating virtio-balloon device: id = {}", id);
2694 
2695             let virtio_balloon_device = Arc::new(Mutex::new(
2696                 virtio_devices::Balloon::new(
2697                     id.clone(),
2698                     balloon_config.size,
2699                     balloon_config.deflate_on_oom,
2700                     self.seccomp_action.clone(),
2701                     self.exit_evt
2702                         .try_clone()
2703                         .map_err(DeviceManagerError::EventFd)?,
2704                 )
2705                 .map_err(DeviceManagerError::CreateVirtioBalloon)?,
2706             ));
2707 
2708             self.balloon = Some(virtio_balloon_device.clone());
2709 
2710             devices.push((
2711                 Arc::clone(&virtio_balloon_device) as VirtioDeviceArc,
2712                 false,
2713                 id.clone(),
2714             ));
2715 
2716             self.device_tree
2717                 .lock()
2718                 .unwrap()
2719                 .insert(id.clone(), device_node!(id, virtio_balloon_device));
2720         }
2721 
2722         Ok(devices)
2723     }
2724 
2725     fn make_virtio_watchdog_devices(
2726         &mut self,
2727     ) -> DeviceManagerResult<Vec<(VirtioDeviceArc, bool, String)>> {
2728         let mut devices = Vec::new();
2729 
2730         if !self.config.lock().unwrap().watchdog {
2731             return Ok(devices);
2732         }
2733 
2734         let id = String::from(WATCHDOG_DEVICE_NAME);
2735         info!("Creating virtio-watchdog device: id = {}", id);
2736 
2737         let virtio_watchdog_device = Arc::new(Mutex::new(
2738             virtio_devices::Watchdog::new(
2739                 id.clone(),
2740                 self.reset_evt.try_clone().unwrap(),
2741                 self.seccomp_action.clone(),
2742                 self.exit_evt
2743                     .try_clone()
2744                     .map_err(DeviceManagerError::EventFd)?,
2745             )
2746             .map_err(DeviceManagerError::CreateVirtioWatchdog)?,
2747         ));
2748         devices.push((
2749             Arc::clone(&virtio_watchdog_device) as VirtioDeviceArc,
2750             false,
2751             id.clone(),
2752         ));
2753 
2754         self.device_tree
2755             .lock()
2756             .unwrap()
2757             .insert(id.clone(), device_node!(id, virtio_watchdog_device));
2758 
2759         Ok(devices)
2760     }
2761 
2762     fn next_device_name(&mut self, prefix: &str) -> DeviceManagerResult<String> {
2763         let start_id = self.device_id_cnt;
2764         loop {
2765             // Generate the temporary name.
2766             let name = format!("{}{}", prefix, self.device_id_cnt);
2767             // Increment the counter.
2768             self.device_id_cnt += Wrapping(1);
2769             // Check if the name is already in use.
2770             if !self.device_tree.lock().unwrap().contains_key(&name) {
2771                 return Ok(name);
2772             }
2773 
2774             if self.device_id_cnt == start_id {
2775                 // We went through a full loop and there's nothing else we can
2776                 // do.
2777                 break;
2778             }
2779         }
2780         Err(DeviceManagerError::NoAvailableDeviceName)
2781     }
2782 
2783     fn add_passthrough_device(
2784         &mut self,
2785         pci: &mut PciBus,
2786         device_cfg: &mut DeviceConfig,
2787     ) -> DeviceManagerResult<(u32, String)> {
2788         // If the passthrough device has not been created yet, it is created
2789         // here and stored in the DeviceManager structure for future needs.
2790         if self.passthrough_device.is_none() {
2791             self.passthrough_device = Some(
2792                 self.address_manager
2793                     .vm
2794                     .create_passthrough_device()
2795                     .map_err(|e| DeviceManagerError::CreatePassthroughDevice(e.into()))?,
2796             );
2797         }
2798 
2799         self.add_vfio_device(pci, device_cfg)
2800     }
2801 
2802     fn create_vfio_container(&self) -> DeviceManagerResult<Arc<VfioContainer>> {
2803         let passthrough_device = self
2804             .passthrough_device
2805             .as_ref()
2806             .ok_or(DeviceManagerError::NoDevicePassthroughSupport)?;
2807 
2808         // Safe because we know the RawFd is valid.
2809         //
2810         // This dup() is mandatory to be able to give full ownership of the
2811         // file descriptor to the DeviceFd::from_raw_fd() function later in
2812         // the code.
2813         //
2814         // This is particularly needed so that VfioContainer will still have
2815         // a valid file descriptor even if DeviceManager, and therefore the
2816         // passthrough_device are dropped. In case of Drop, the file descriptor
2817         // would be closed, but Linux would still have the duplicated file
2818         // descriptor opened from DeviceFd, preventing from unexpected behavior
2819         // where the VfioContainer would try to use a closed file descriptor.
2820         let dup_device_fd = unsafe { libc::dup(passthrough_device.as_raw_fd()) };
2821 
2822         // SAFETY the raw fd conversion here is safe because:
2823         //   1. When running on KVM or MSHV, passthrough_device wraps around DeviceFd.
2824         //   2. The conversion here extracts the raw fd and then turns the raw fd into a DeviceFd
2825         //      of the same (correct) type.
2826         Ok(Arc::new(
2827             VfioContainer::new(Arc::new(unsafe { DeviceFd::from_raw_fd(dup_device_fd) }))
2828                 .map_err(DeviceManagerError::VfioCreate)?,
2829         ))
2830     }
2831 
2832     fn add_vfio_device(
2833         &mut self,
2834         pci: &mut PciBus,
2835         device_cfg: &mut DeviceConfig,
2836     ) -> DeviceManagerResult<(u32, String)> {
2837         // We need to shift the device id since the 3 first bits
2838         // are dedicated to the PCI function, and we know we don't
2839         // do multifunction. Also, because we only support one PCI
2840         // bus, the bus 0, we don't need to add anything to the
2841         // global device ID.
2842         let pci_device_bdf = pci
2843             .next_device_id()
2844             .map_err(DeviceManagerError::NextPciDeviceId)?
2845             << 3;
2846 
2847         let mut needs_dma_mapping = false;
2848 
2849         // Here we create a new VFIO container for two reasons. Either this is
2850         // the first VFIO device, meaning we need a new VFIO container, which
2851         // will be shared with other VFIO devices. Or the new VFIO device is
2852         // attached to a vIOMMU, meaning we must create a dedicated VFIO
2853         // container. In the vIOMMU use case, we can't let all devices under
2854         // the same VFIO container since we couldn't map/unmap memory for each
2855         // device. That's simply because the map/unmap operations happen at the
2856         // VFIO container level.
2857         let vfio_container = if device_cfg.iommu {
2858             let vfio_container = self.create_vfio_container()?;
2859 
2860             let vfio_mapping = Arc::new(VfioDmaMapping::new(
2861                 Arc::clone(&vfio_container),
2862                 Arc::new(self.memory_manager.lock().unwrap().guest_memory()),
2863             ));
2864 
2865             if let Some(iommu) = &self.iommu_device {
2866                 iommu
2867                     .lock()
2868                     .unwrap()
2869                     .add_external_mapping(pci_device_bdf, vfio_mapping);
2870             } else {
2871                 return Err(DeviceManagerError::MissingVirtualIommu);
2872             }
2873 
2874             vfio_container
2875         } else if let Some(vfio_container) = &self.vfio_container {
2876             Arc::clone(vfio_container)
2877         } else {
2878             let vfio_container = self.create_vfio_container()?;
2879             needs_dma_mapping = true;
2880             self.vfio_container = Some(Arc::clone(&vfio_container));
2881 
2882             vfio_container
2883         };
2884 
2885         let vfio_device = VfioDevice::new(&device_cfg.path, Arc::clone(&vfio_container))
2886             .map_err(DeviceManagerError::VfioCreate)?;
2887 
2888         if needs_dma_mapping {
2889             // Register DMA mapping in IOMMU.
2890             // Do not register virtio-mem regions, as they are handled directly by
2891             // virtio-mem device itself.
2892             for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() {
2893                 for region in zone.regions() {
2894                     vfio_container
2895                         .vfio_dma_map(
2896                             region.start_addr().raw_value(),
2897                             region.len() as u64,
2898                             region.as_ptr() as u64,
2899                         )
2900                         .map_err(DeviceManagerError::VfioDmaMap)?;
2901                 }
2902             }
2903 
2904             let vfio_mapping = Arc::new(VfioDmaMapping::new(
2905                 Arc::clone(&vfio_container),
2906                 Arc::new(self.memory_manager.lock().unwrap().guest_memory()),
2907             ));
2908 
2909             for virtio_mem_device in self.virtio_mem_devices.iter() {
2910                 virtio_mem_device
2911                     .lock()
2912                     .unwrap()
2913                     .add_dma_mapping_handler(vfio_mapping.clone())
2914                     .map_err(DeviceManagerError::AddDmaMappingHandlerVirtioMem)?;
2915             }
2916         }
2917 
2918         let legacy_interrupt_group = if let Some(legacy_interrupt_manager) =
2919             &self.legacy_interrupt_manager
2920         {
2921             Some(
2922                 legacy_interrupt_manager
2923                     .create_group(LegacyIrqGroupConfig {
2924                         irq: self.pci_irq_slots[(pci_device_bdf >> 3) as usize] as InterruptIndex,
2925                     })
2926                     .map_err(DeviceManagerError::CreateInterruptGroup)?,
2927             )
2928         } else {
2929             None
2930         };
2931 
2932         let mut vfio_pci_device = VfioPciDevice::new(
2933             &self.address_manager.vm,
2934             vfio_device,
2935             vfio_container,
2936             &self.msi_interrupt_manager,
2937             legacy_interrupt_group,
2938             device_cfg.iommu,
2939         )
2940         .map_err(DeviceManagerError::VfioPciCreate)?;
2941 
2942         let vfio_name = if let Some(id) = &device_cfg.id {
2943             if self.device_tree.lock().unwrap().contains_key(id) {
2944                 return Err(DeviceManagerError::DeviceIdAlreadyInUse);
2945             }
2946 
2947             id.clone()
2948         } else {
2949             let id = self.next_device_name(VFIO_DEVICE_NAME_PREFIX)?;
2950             device_cfg.id = Some(id.clone());
2951             id
2952         };
2953 
2954         vfio_pci_device
2955             .map_mmio_regions(&self.address_manager.vm, || {
2956                 self.memory_manager.lock().unwrap().allocate_memory_slot()
2957             })
2958             .map_err(DeviceManagerError::VfioMapRegion)?;
2959 
2960         let mut node = device_node!(vfio_name);
2961 
2962         for region in vfio_pci_device.mmio_regions() {
2963             node.resources.push(Resource::MmioAddressRange {
2964                 base: region.start.0,
2965                 size: region.length as u64,
2966             });
2967         }
2968 
2969         let vfio_pci_device = Arc::new(Mutex::new(vfio_pci_device));
2970 
2971         self.add_pci_device(
2972             pci,
2973             vfio_pci_device.clone(),
2974             vfio_pci_device.clone(),
2975             pci_device_bdf,
2976         )?;
2977 
2978         node.pci_bdf = Some(pci_device_bdf);
2979         node.pci_device_handle = Some(PciDeviceHandle::Vfio(vfio_pci_device));
2980 
2981         self.device_tree
2982             .lock()
2983             .unwrap()
2984             .insert(vfio_name.clone(), node);
2985 
2986         Ok((pci_device_bdf, vfio_name))
2987     }
2988 
2989     fn add_pci_device(
2990         &mut self,
2991         pci_bus: &mut PciBus,
2992         bus_device: Arc<Mutex<dyn BusDevice>>,
2993         pci_device: Arc<Mutex<dyn PciDevice>>,
2994         bdf: u32,
2995     ) -> DeviceManagerResult<Vec<(GuestAddress, GuestUsize, PciBarRegionType)>> {
2996         let bars = pci_device
2997             .lock()
2998             .unwrap()
2999             .allocate_bars(&mut self.address_manager.allocator.lock().unwrap())
3000             .map_err(DeviceManagerError::AllocateBars)?;
3001 
3002         pci_bus
3003             .add_device(bdf, pci_device)
3004             .map_err(DeviceManagerError::AddPciDevice)?;
3005 
3006         self.bus_devices.push(Arc::clone(&bus_device));
3007 
3008         pci_bus
3009             .register_mapping(
3010                 bus_device,
3011                 #[cfg(target_arch = "x86_64")]
3012                 self.address_manager.io_bus.as_ref(),
3013                 self.address_manager.mmio_bus.as_ref(),
3014                 bars.clone(),
3015             )
3016             .map_err(DeviceManagerError::AddPciDevice)?;
3017 
3018         Ok(bars)
3019     }
3020 
3021     fn add_vfio_devices(&mut self, pci: &mut PciBus) -> DeviceManagerResult<Vec<u32>> {
3022         let mut iommu_attached_device_ids = Vec::new();
3023         let mut devices = self.config.lock().unwrap().devices.clone();
3024 
3025         if let Some(device_list_cfg) = &mut devices {
3026             for device_cfg in device_list_cfg.iter_mut() {
3027                 let (device_id, _) = self.add_passthrough_device(pci, device_cfg)?;
3028                 if device_cfg.iommu && self.iommu_device.is_some() {
3029                     iommu_attached_device_ids.push(device_id);
3030                 }
3031             }
3032         }
3033 
3034         // Update the list of devices
3035         self.config.lock().unwrap().devices = devices;
3036 
3037         Ok(iommu_attached_device_ids)
3038     }
3039 
3040     fn add_vfio_user_device(
3041         &mut self,
3042         pci: &mut PciBus,
3043         device_cfg: &mut UserDeviceConfig,
3044     ) -> DeviceManagerResult<(u32, String)> {
3045         let pci_device_bdf = pci
3046             .next_device_id()
3047             .map_err(DeviceManagerError::NextPciDeviceId)?
3048             << 3;
3049 
3050         let legacy_interrupt_group = if let Some(legacy_interrupt_manager) =
3051             &self.legacy_interrupt_manager
3052         {
3053             Some(
3054                 legacy_interrupt_manager
3055                     .create_group(LegacyIrqGroupConfig {
3056                         irq: self.pci_irq_slots[(pci_device_bdf >> 3) as usize] as InterruptIndex,
3057                     })
3058                     .map_err(DeviceManagerError::CreateInterruptGroup)?,
3059             )
3060         } else {
3061             None
3062         };
3063 
3064         let mut vfio_user_pci_device = VfioUserPciDevice::new(
3065             &self.address_manager.vm,
3066             &device_cfg.socket,
3067             &self.msi_interrupt_manager,
3068             legacy_interrupt_group,
3069         )
3070         .map_err(DeviceManagerError::VfioUserCreate)?;
3071 
3072         vfio_user_pci_device
3073             .map_mmio_regions(&self.address_manager.vm, || {
3074                 self.memory_manager.lock().unwrap().allocate_memory_slot()
3075             })
3076             .map_err(DeviceManagerError::VfioUserMapRegion)?;
3077 
3078         for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() {
3079             for region in zone.regions() {
3080                 vfio_user_pci_device
3081                     .dma_map(region)
3082                     .map_err(DeviceManagerError::VfioUserDmaMap)?;
3083             }
3084         }
3085 
3086         let vfio_user_pci_device = Arc::new(Mutex::new(vfio_user_pci_device));
3087 
3088         let vfio_user_name = if let Some(id) = &device_cfg.id {
3089             if self.device_tree.lock().unwrap().contains_key(id) {
3090                 return Err(DeviceManagerError::DeviceIdAlreadyInUse);
3091             }
3092 
3093             id.clone()
3094         } else {
3095             let id = self.next_device_name(VFIO_USER_DEVICE_NAME_PREFIX)?;
3096             device_cfg.id = Some(id.clone());
3097             id
3098         };
3099 
3100         self.add_pci_device(
3101             pci,
3102             vfio_user_pci_device.clone(),
3103             vfio_user_pci_device.clone(),
3104             pci_device_bdf,
3105         )?;
3106 
3107         let mut node = device_node!(vfio_user_name);
3108 
3109         node.pci_bdf = Some(pci_device_bdf);
3110         node.pci_device_handle = Some(PciDeviceHandle::VfioUser(vfio_user_pci_device));
3111 
3112         self.device_tree
3113             .lock()
3114             .unwrap()
3115             .insert(vfio_user_name.clone(), node);
3116 
3117         Ok((pci_device_bdf, vfio_user_name))
3118     }
3119 
3120     fn add_user_devices(&mut self, pci: &mut PciBus) -> DeviceManagerResult<Vec<u32>> {
3121         let mut user_devices = self.config.lock().unwrap().user_devices.clone();
3122 
3123         if let Some(device_list_cfg) = &mut user_devices {
3124             for device_cfg in device_list_cfg.iter_mut() {
3125                 let (_device_id, _id) = self.add_vfio_user_device(pci, device_cfg)?;
3126             }
3127         }
3128 
3129         // Update the list of devices
3130         self.config.lock().unwrap().user_devices = user_devices;
3131 
3132         Ok(vec![])
3133     }
3134 
3135     fn add_virtio_pci_device(
3136         &mut self,
3137         virtio_device: VirtioDeviceArc,
3138         pci: &mut PciBus,
3139         iommu_mapping: &Option<Arc<IommuMapping>>,
3140         virtio_device_id: String,
3141     ) -> DeviceManagerResult<u32> {
3142         let id = format!("{}-{}", VIRTIO_PCI_DEVICE_NAME_PREFIX, virtio_device_id);
3143 
3144         // Add the new virtio-pci node to the device tree.
3145         let mut node = device_node!(id);
3146         node.children = vec![virtio_device_id.clone()];
3147 
3148         // Look for the id in the device tree. If it can be found, that means
3149         // the device is being restored, otherwise it's created from scratch.
3150         let (pci_device_bdf, config_bar_addr) =
3151             if let Some(node) = self.device_tree.lock().unwrap().get(&id) {
3152                 info!("Restoring virtio-pci {} resources", id);
3153                 let pci_device_bdf = node
3154                     .pci_bdf
3155                     .ok_or(DeviceManagerError::MissingDeviceNodePciBdf)?;
3156 
3157                 pci.get_device_id((pci_device_bdf >> 3) as usize)
3158                     .map_err(DeviceManagerError::GetPciDeviceId)?;
3159 
3160                 if node.resources.is_empty() {
3161                     return Err(DeviceManagerError::MissingVirtioPciResources);
3162                 }
3163 
3164                 // We know the configuration BAR address is stored on the first
3165                 // resource in the list.
3166                 let config_bar_addr = match node.resources[0] {
3167                     Resource::MmioAddressRange { base, .. } => Some(base),
3168                     _ => {
3169                         error!("Unexpected resource {:?} for {}", node.resources[0], id);
3170                         return Err(DeviceManagerError::MissingVirtioPciResources);
3171                     }
3172                 };
3173 
3174                 (pci_device_bdf, config_bar_addr)
3175             } else {
3176                 // We need to shift the device id since the 3 first bits are dedicated
3177                 // to the PCI function, and we know we don't do multifunction.
3178                 // Also, because we only support one PCI bus, the bus 0, we don't need
3179                 // to add anything to the global device ID.
3180                 let pci_device_bdf = pci
3181                     .next_device_id()
3182                     .map_err(DeviceManagerError::NextPciDeviceId)?
3183                     << 3;
3184 
3185                 (pci_device_bdf, None)
3186             };
3187 
3188         // Update the existing virtio node by setting the parent.
3189         if let Some(node) = self.device_tree.lock().unwrap().get_mut(&virtio_device_id) {
3190             node.parent = Some(id.clone());
3191         } else {
3192             return Err(DeviceManagerError::MissingNode);
3193         }
3194 
3195         // Allows support for one MSI-X vector per queue. It also adds 1
3196         // as we need to take into account the dedicated vector to notify
3197         // about a virtio config change.
3198         let msix_num = (virtio_device.lock().unwrap().queue_max_sizes().len() + 1) as u16;
3199 
3200         // Create the callback from the implementation of the DmaRemapping
3201         // trait. The point with the callback is to simplify the code as we
3202         // know about the device ID from this point.
3203         let iommu_mapping_cb: Option<Arc<VirtioIommuRemapping>> =
3204             if let Some(mapping) = iommu_mapping {
3205                 let mapping_clone = mapping.clone();
3206                 Some(Arc::new(Box::new(move |addr: u64| {
3207                     mapping_clone.translate(pci_device_bdf, addr).map_err(|e| {
3208                         std::io::Error::new(
3209                             std::io::ErrorKind::Other,
3210                             format!(
3211                                 "failed to translate addr 0x{:x} for device 00:{:02x}.0 {}",
3212                                 addr, pci_device_bdf, e
3213                             ),
3214                         )
3215                     })
3216                 }) as VirtioIommuRemapping))
3217             } else {
3218                 None
3219             };
3220 
3221         let memory = self.memory_manager.lock().unwrap().guest_memory();
3222         let mut virtio_pci_device = VirtioPciDevice::new(
3223             id.clone(),
3224             memory,
3225             virtio_device,
3226             msix_num,
3227             iommu_mapping_cb,
3228             &self.msi_interrupt_manager,
3229             pci_device_bdf,
3230             self.activate_evt
3231                 .try_clone()
3232                 .map_err(DeviceManagerError::EventFd)?,
3233         )
3234         .map_err(DeviceManagerError::VirtioDevice)?;
3235 
3236         // This is important as this will set the BAR address if it exists,
3237         // which is mandatory on the restore path.
3238         if let Some(addr) = config_bar_addr {
3239             virtio_pci_device.set_config_bar_addr(addr);
3240         }
3241 
3242         let virtio_pci_device = Arc::new(Mutex::new(virtio_pci_device));
3243         let bars = self.add_pci_device(
3244             pci,
3245             virtio_pci_device.clone(),
3246             virtio_pci_device.clone(),
3247             pci_device_bdf,
3248         )?;
3249 
3250         let bar_addr = virtio_pci_device.lock().unwrap().config_bar_addr();
3251         for (event, addr) in virtio_pci_device.lock().unwrap().ioeventfds(bar_addr) {
3252             let io_addr = IoEventAddress::Mmio(addr);
3253             self.address_manager
3254                 .vm
3255                 .register_ioevent(event, &io_addr, None)
3256                 .map_err(|e| DeviceManagerError::RegisterIoevent(e.into()))?;
3257         }
3258 
3259         // Update the device tree with correct resource information.
3260         for pci_bar in bars.iter() {
3261             node.resources.push(Resource::MmioAddressRange {
3262                 base: pci_bar.0.raw_value(),
3263                 size: pci_bar.1 as u64,
3264             });
3265         }
3266         node.migratable = Some(Arc::clone(&virtio_pci_device) as Arc<Mutex<dyn Migratable>>);
3267         node.pci_bdf = Some(pci_device_bdf);
3268         node.pci_device_handle = Some(PciDeviceHandle::Virtio(virtio_pci_device));
3269         self.device_tree.lock().unwrap().insert(id, node);
3270 
3271         Ok(pci_device_bdf)
3272     }
3273 
3274     #[cfg(target_arch = "x86_64")]
3275     pub fn io_bus(&self) -> &Arc<Bus> {
3276         &self.address_manager.io_bus
3277     }
3278 
3279     pub fn mmio_bus(&self) -> &Arc<Bus> {
3280         &self.address_manager.mmio_bus
3281     }
3282 
3283     pub fn allocator(&self) -> &Arc<Mutex<SystemAllocator>> {
3284         &self.address_manager.allocator
3285     }
3286 
3287     pub fn interrupt_controller(&self) -> Option<Arc<Mutex<dyn InterruptController>>> {
3288         self.interrupt_controller
3289             .as_ref()
3290             .map(|ic| ic.clone() as Arc<Mutex<dyn InterruptController>>)
3291     }
3292 
3293     pub fn console(&self) -> &Arc<Console> {
3294         &self.console
3295     }
3296 
3297     pub fn cmdline_additions(&self) -> &[String] {
3298         self.cmdline_additions.as_slice()
3299     }
3300 
3301     pub fn update_memory(&self, new_region: &Arc<GuestRegionMmap>) -> DeviceManagerResult<()> {
3302         for (virtio_device, _, _) in self.virtio_devices.iter() {
3303             virtio_device
3304                 .lock()
3305                 .unwrap()
3306                 .add_memory_region(new_region)
3307                 .map_err(DeviceManagerError::UpdateMemoryForVirtioDevice)?;
3308         }
3309 
3310         // Take care of updating the memory for VFIO PCI devices.
3311         if let Some(vfio_container) = &self.vfio_container {
3312             vfio_container
3313                 .vfio_dma_map(
3314                     new_region.start_addr().raw_value(),
3315                     new_region.len() as u64,
3316                     new_region.as_ptr() as u64,
3317                 )
3318                 .map_err(DeviceManagerError::UpdateMemoryForVfioPciDevice)?;
3319         }
3320 
3321         #[allow(clippy::single_match)]
3322         // Take care of updating the memory for vfio-user devices.
3323         {
3324             let device_tree = self.device_tree.lock().unwrap();
3325             for pci_device_node in device_tree.pci_devices() {
3326                 match pci_device_node
3327                     .pci_device_handle
3328                     .as_ref()
3329                     .ok_or(DeviceManagerError::MissingPciDevice)?
3330                 {
3331                     PciDeviceHandle::VfioUser(vfio_user_pci_device) => {
3332                         vfio_user_pci_device
3333                             .lock()
3334                             .unwrap()
3335                             .dma_map(new_region)
3336                             .map_err(DeviceManagerError::UpdateMemoryForVfioUserPciDevice)?;
3337                     }
3338                     _ => {}
3339                 }
3340             }
3341         }
3342 
3343         Ok(())
3344     }
3345 
3346     pub fn activate_virtio_devices(&self) -> DeviceManagerResult<()> {
3347         // Find virtio pci devices and activate any pending ones
3348         let device_tree = self.device_tree.lock().unwrap();
3349         for pci_device_node in device_tree.pci_devices() {
3350             #[allow(irrefutable_let_patterns)]
3351             if let PciDeviceHandle::Virtio(virtio_pci_device) = &pci_device_node
3352                 .pci_device_handle
3353                 .as_ref()
3354                 .ok_or(DeviceManagerError::MissingPciDevice)?
3355             {
3356                 virtio_pci_device.lock().unwrap().maybe_activate();
3357             }
3358         }
3359         Ok(())
3360     }
3361 
3362     pub fn notify_hotplug(
3363         &self,
3364         _notification_type: AcpiNotificationFlags,
3365     ) -> DeviceManagerResult<()> {
3366         #[cfg(feature = "acpi")]
3367         return self
3368             .ged_notification_device
3369             .as_ref()
3370             .unwrap()
3371             .lock()
3372             .unwrap()
3373             .notify(_notification_type)
3374             .map_err(DeviceManagerError::HotPlugNotification);
3375         #[cfg(not(feature = "acpi"))]
3376         return Ok(());
3377     }
3378 
3379     pub fn add_device(
3380         &mut self,
3381         device_cfg: &mut DeviceConfig,
3382     ) -> DeviceManagerResult<PciDeviceInfo> {
3383         let pci = if let Some(pci_bus) = &self.pci_bus {
3384             Arc::clone(pci_bus)
3385         } else {
3386             return Err(DeviceManagerError::NoPciBus);
3387         };
3388 
3389         let (device_id, device_name) =
3390             self.add_passthrough_device(&mut pci.lock().unwrap(), device_cfg)?;
3391 
3392         // Update the PCIU bitmap
3393         self.pci_devices_up |= 1 << (device_id >> 3);
3394 
3395         Ok(PciDeviceInfo {
3396             id: device_name,
3397             bdf: device_id,
3398         })
3399     }
3400 
3401     pub fn add_user_device(
3402         &mut self,
3403         device_cfg: &mut UserDeviceConfig,
3404     ) -> DeviceManagerResult<PciDeviceInfo> {
3405         let pci = if let Some(pci_bus) = &self.pci_bus {
3406             Arc::clone(pci_bus)
3407         } else {
3408             return Err(DeviceManagerError::NoPciBus);
3409         };
3410 
3411         let (device_id, device_name) =
3412             self.add_vfio_user_device(&mut pci.lock().unwrap(), device_cfg)?;
3413 
3414         // Update the PCIU bitmap
3415         self.pci_devices_up |= 1 << (device_id >> 3);
3416 
3417         Ok(PciDeviceInfo {
3418             id: device_name,
3419             bdf: device_id,
3420         })
3421     }
3422 
3423     pub fn remove_device(&mut self, id: String) -> DeviceManagerResult<()> {
3424         // The node can be directly a PCI node in case the 'id' refers to a
3425         // VFIO device or a virtio-pci one.
3426         // In case the 'id' refers to a virtio device, we must find the PCI
3427         // node by looking at the parent.
3428         let device_tree = self.device_tree.lock().unwrap();
3429         let node = device_tree
3430             .get(&id)
3431             .ok_or(DeviceManagerError::UnknownDeviceId(id))?;
3432 
3433         let pci_device_node = if node.pci_bdf.is_some() && node.pci_device_handle.is_some() {
3434             node
3435         } else {
3436             let parent = node
3437                 .parent
3438                 .as_ref()
3439                 .ok_or(DeviceManagerError::MissingNode)?;
3440             device_tree
3441                 .get(parent)
3442                 .ok_or(DeviceManagerError::MissingNode)?
3443         };
3444 
3445         let pci_device_bdf = pci_device_node
3446             .pci_bdf
3447             .ok_or(DeviceManagerError::MissingPciDevice)?;
3448         let pci_device_handle = pci_device_node
3449             .pci_device_handle
3450             .as_ref()
3451             .ok_or(DeviceManagerError::MissingPciDevice)?;
3452         #[allow(irrefutable_let_patterns)]
3453         if let PciDeviceHandle::Virtio(virtio_pci_device) = pci_device_handle {
3454             let device_type = VirtioDeviceType::from(
3455                 virtio_pci_device
3456                     .lock()
3457                     .unwrap()
3458                     .virtio_device()
3459                     .lock()
3460                     .unwrap()
3461                     .device_type(),
3462             );
3463             match device_type {
3464                 VirtioDeviceType::Net
3465                 | VirtioDeviceType::Block
3466                 | VirtioDeviceType::Pmem
3467                 | VirtioDeviceType::Fs
3468                 | VirtioDeviceType::Vsock => {}
3469                 _ => return Err(DeviceManagerError::RemovalNotAllowed(device_type)),
3470             }
3471         }
3472 
3473         // Update the PCID bitmap
3474         self.pci_devices_down |= 1 << (pci_device_bdf >> 3);
3475 
3476         Ok(())
3477     }
3478 
3479     pub fn eject_device(&mut self, device_id: u8) -> DeviceManagerResult<()> {
3480         // Retrieve the PCI bus.
3481         let pci = if let Some(pci_bus) = &self.pci_bus {
3482             Arc::clone(pci_bus)
3483         } else {
3484             return Err(DeviceManagerError::NoPciBus);
3485         };
3486 
3487         // Convert the device ID into the corresponding b/d/f.
3488         let pci_device_bdf = (device_id as u32) << 3;
3489 
3490         // Give the PCI device ID back to the PCI bus.
3491         pci.lock()
3492             .unwrap()
3493             .put_device_id(device_id as usize)
3494             .map_err(DeviceManagerError::PutPciDeviceId)?;
3495 
3496         // Remove the device from the device tree along with its children.
3497         let mut device_tree = self.device_tree.lock().unwrap();
3498         let pci_device_node = device_tree
3499             .remove_node_by_pci_bdf(pci_device_bdf)
3500             .ok_or(DeviceManagerError::MissingPciDevice)?;
3501         for child in pci_device_node.children.iter() {
3502             device_tree.remove(child);
3503         }
3504 
3505         let pci_device_handle = pci_device_node
3506             .pci_device_handle
3507             .ok_or(DeviceManagerError::MissingPciDevice)?;
3508         let (pci_device, bus_device, virtio_device) = match pci_device_handle {
3509             PciDeviceHandle::Vfio(vfio_pci_device) => (
3510                 Arc::clone(&vfio_pci_device) as Arc<Mutex<dyn PciDevice>>,
3511                 Arc::clone(&vfio_pci_device) as Arc<Mutex<dyn BusDevice>>,
3512                 None as Option<VirtioDeviceArc>,
3513             ),
3514             PciDeviceHandle::Virtio(virtio_pci_device) => {
3515                 let bar_addr = virtio_pci_device.lock().unwrap().config_bar_addr();
3516                 for (event, addr) in virtio_pci_device.lock().unwrap().ioeventfds(bar_addr) {
3517                     let io_addr = IoEventAddress::Mmio(addr);
3518                     self.address_manager
3519                         .vm
3520                         .unregister_ioevent(event, &io_addr)
3521                         .map_err(|e| DeviceManagerError::UnRegisterIoevent(e.into()))?;
3522                 }
3523 
3524                 (
3525                     Arc::clone(&virtio_pci_device) as Arc<Mutex<dyn PciDevice>>,
3526                     Arc::clone(&virtio_pci_device) as Arc<Mutex<dyn BusDevice>>,
3527                     Some(virtio_pci_device.lock().unwrap().virtio_device()),
3528                 )
3529             }
3530             PciDeviceHandle::VfioUser(vfio_user_pci_device) => {
3531                 let mut dev = vfio_user_pci_device.lock().unwrap();
3532                 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() {
3533                     for region in zone.regions() {
3534                         dev.dma_unmap(region)
3535                             .map_err(DeviceManagerError::VfioUserDmaUnmap)?;
3536                     }
3537                 }
3538 
3539                 (
3540                     Arc::clone(&vfio_user_pci_device) as Arc<Mutex<dyn PciDevice>>,
3541                     Arc::clone(&vfio_user_pci_device) as Arc<Mutex<dyn BusDevice>>,
3542                     None as Option<VirtioDeviceArc>,
3543                 )
3544             }
3545         };
3546 
3547         // Free the allocated BARs
3548         pci_device
3549             .lock()
3550             .unwrap()
3551             .free_bars(&mut self.address_manager.allocator.lock().unwrap())
3552             .map_err(DeviceManagerError::FreePciBars)?;
3553 
3554         // Remove the device from the PCI bus
3555         pci.lock()
3556             .unwrap()
3557             .remove_by_device(&pci_device)
3558             .map_err(DeviceManagerError::RemoveDeviceFromPciBus)?;
3559 
3560         #[cfg(target_arch = "x86_64")]
3561         // Remove the device from the IO bus
3562         self.io_bus()
3563             .remove_by_device(&bus_device)
3564             .map_err(DeviceManagerError::RemoveDeviceFromIoBus)?;
3565 
3566         // Remove the device from the MMIO bus
3567         self.mmio_bus()
3568             .remove_by_device(&bus_device)
3569             .map_err(DeviceManagerError::RemoveDeviceFromMmioBus)?;
3570 
3571         // Remove the device from the list of BusDevice held by the
3572         // DeviceManager.
3573         self.bus_devices
3574             .retain(|dev| !Arc::ptr_eq(dev, &bus_device));
3575 
3576         // Shutdown and remove the underlying virtio-device if present
3577         if let Some(virtio_device) = virtio_device {
3578             for mapping in virtio_device.lock().unwrap().userspace_mappings() {
3579                 self.memory_manager
3580                     .lock()
3581                     .unwrap()
3582                     .remove_userspace_mapping(
3583                         mapping.addr.raw_value(),
3584                         mapping.len,
3585                         mapping.host_addr,
3586                         mapping.mergeable,
3587                         mapping.mem_slot,
3588                     )
3589                     .map_err(DeviceManagerError::MemoryManager)?;
3590             }
3591 
3592             virtio_device.lock().unwrap().shutdown();
3593 
3594             self.virtio_devices
3595                 .retain(|(d, _, _)| !Arc::ptr_eq(d, &virtio_device));
3596         }
3597 
3598         // At this point, the device has been removed from all the list and
3599         // buses where it was stored. At the end of this function, after
3600         // any_device, bus_device and pci_device are released, the actual
3601         // device will be dropped.
3602         Ok(())
3603     }
3604 
3605     fn hotplug_virtio_pci_device(
3606         &mut self,
3607         device: VirtioDeviceArc,
3608         iommu_attached: bool,
3609         id: String,
3610     ) -> DeviceManagerResult<PciDeviceInfo> {
3611         if iommu_attached {
3612             warn!("Placing device behind vIOMMU is not available for hotplugged devices");
3613         }
3614 
3615         let pci = if let Some(pci_bus) = &self.pci_bus {
3616             Arc::clone(pci_bus)
3617         } else {
3618             return Err(DeviceManagerError::NoPciBus);
3619         };
3620 
3621         // Add the virtio device to the device manager list. This is important
3622         // as the list is used to notify virtio devices about memory updates
3623         // for instance.
3624         self.virtio_devices
3625             .push((device.clone(), iommu_attached, id.clone()));
3626 
3627         let device_id =
3628             self.add_virtio_pci_device(device, &mut pci.lock().unwrap(), &None, id.clone())?;
3629 
3630         // Update the PCIU bitmap
3631         self.pci_devices_up |= 1 << (device_id >> 3);
3632 
3633         Ok(PciDeviceInfo { id, bdf: device_id })
3634     }
3635 
3636     pub fn add_disk(&mut self, disk_cfg: &mut DiskConfig) -> DeviceManagerResult<PciDeviceInfo> {
3637         let (device, iommu_attached, id) = self.make_virtio_block_device(disk_cfg)?;
3638         self.hotplug_virtio_pci_device(device, iommu_attached, id)
3639     }
3640 
3641     pub fn add_fs(&mut self, fs_cfg: &mut FsConfig) -> DeviceManagerResult<PciDeviceInfo> {
3642         let (device, iommu_attached, id) = self.make_virtio_fs_device(fs_cfg)?;
3643         self.hotplug_virtio_pci_device(device, iommu_attached, id)
3644     }
3645 
3646     pub fn add_pmem(&mut self, pmem_cfg: &mut PmemConfig) -> DeviceManagerResult<PciDeviceInfo> {
3647         let (device, iommu_attached, id) = self.make_virtio_pmem_device(pmem_cfg)?;
3648         self.hotplug_virtio_pci_device(device, iommu_attached, id)
3649     }
3650 
3651     pub fn add_net(&mut self, net_cfg: &mut NetConfig) -> DeviceManagerResult<PciDeviceInfo> {
3652         let (device, iommu_attached, id) = self.make_virtio_net_device(net_cfg)?;
3653         self.hotplug_virtio_pci_device(device, iommu_attached, id)
3654     }
3655 
3656     pub fn add_vsock(&mut self, vsock_cfg: &mut VsockConfig) -> DeviceManagerResult<PciDeviceInfo> {
3657         let (device, iommu_attached, id) = self.make_virtio_vsock_device(vsock_cfg)?;
3658         self.hotplug_virtio_pci_device(device, iommu_attached, id)
3659     }
3660 
3661     pub fn counters(&self) -> HashMap<String, HashMap<&'static str, Wrapping<u64>>> {
3662         let mut counters = HashMap::new();
3663 
3664         for (virtio_device, _, id) in &self.virtio_devices {
3665             let virtio_device = virtio_device.lock().unwrap();
3666             if let Some(device_counters) = virtio_device.counters() {
3667                 counters.insert(id.clone(), device_counters.clone());
3668             }
3669         }
3670 
3671         counters
3672     }
3673 
3674     pub fn resize_balloon(&mut self, size: u64) -> DeviceManagerResult<()> {
3675         if let Some(balloon) = &self.balloon {
3676             return balloon
3677                 .lock()
3678                 .unwrap()
3679                 .resize(size)
3680                 .map_err(DeviceManagerError::VirtioBalloonResize);
3681         }
3682 
3683         warn!("No balloon setup: Can't resize the balloon");
3684         Err(DeviceManagerError::MissingVirtioBalloon)
3685     }
3686 
3687     pub fn balloon_size(&self) -> u64 {
3688         if let Some(balloon) = &self.balloon {
3689             return balloon.lock().unwrap().get_actual();
3690         }
3691 
3692         0
3693     }
3694 
3695     pub fn device_tree(&self) -> Arc<Mutex<DeviceTree>> {
3696         self.device_tree.clone()
3697     }
3698 
3699     pub fn restore_devices(
3700         &mut self,
3701         snapshot: Snapshot,
3702     ) -> std::result::Result<(), MigratableError> {
3703         // Finally, restore all devices associated with the DeviceManager.
3704         // It's important to restore devices in the right order, that's why
3705         // the device tree is the right way to ensure we restore a child before
3706         // its parent node.
3707         for node in self
3708             .device_tree
3709             .lock()
3710             .unwrap()
3711             .breadth_first_traversal()
3712             .rev()
3713         {
3714             // Restore the node
3715             if let Some(migratable) = &node.migratable {
3716                 info!("Restoring {} from DeviceManager", node.id);
3717                 if let Some(snapshot) = snapshot.snapshots.get(&node.id) {
3718                     migratable.lock().unwrap().pause()?;
3719                     migratable.lock().unwrap().restore(*snapshot.clone())?;
3720                 } else {
3721                     return Err(MigratableError::Restore(anyhow!(
3722                         "Missing device {}",
3723                         node.id
3724                     )));
3725                 }
3726             }
3727         }
3728 
3729         // The devices have been fully restored, we can now update the
3730         // restoring state of the DeviceManager.
3731         self.restoring = false;
3732 
3733         Ok(())
3734     }
3735 
3736     #[cfg(feature = "acpi")]
3737     #[cfg(target_arch = "x86_64")]
3738     pub fn notify_power_button(&self) -> DeviceManagerResult<()> {
3739         self.ged_notification_device
3740             .as_ref()
3741             .unwrap()
3742             .lock()
3743             .unwrap()
3744             .notify(AcpiNotificationFlags::POWER_BUTTON_CHANGED)
3745             .map_err(DeviceManagerError::PowerButtonNotification)
3746     }
3747 
3748     #[cfg(target_arch = "aarch64")]
3749     pub fn notify_power_button(&self) -> DeviceManagerResult<()> {
3750         // There are three use cases:
3751         // 1. The Cloud Hypervisor is built without feature acpi.
3752         // 2. The Cloud Hypervisor is built with feature acpi, but users will
3753         // use direct kernel boot with device tree.
3754         // 3. The Cloud Hypervisor is built with feature acpi, and users will
3755         // use ACPI+UEFI boot.
3756         #[cfg(not(feature = "acpi"))]
3757         // The `return` here will trigger a GPIO pin 3 event, which will trigger
3758         // a power button event for use case 1.
3759         return self
3760             .gpio_device
3761             .as_ref()
3762             .unwrap()
3763             .lock()
3764             .unwrap()
3765             .trigger_key(3)
3766             .map_err(DeviceManagerError::AArch64PowerButtonNotification);
3767         #[cfg(feature = "acpi")]
3768         {
3769             // Trigger a GPIO pin 3 event to satisify use case 2.
3770             self.gpio_device
3771                 .as_ref()
3772                 .unwrap()
3773                 .lock()
3774                 .unwrap()
3775                 .trigger_key(3)
3776                 .map_err(DeviceManagerError::AArch64PowerButtonNotification)?;
3777             // Trigger a GED power button event to satisify use case 3.
3778             return self
3779                 .ged_notification_device
3780                 .as_ref()
3781                 .unwrap()
3782                 .lock()
3783                 .unwrap()
3784                 .notify(AcpiNotificationFlags::POWER_BUTTON_CHANGED)
3785                 .map_err(DeviceManagerError::PowerButtonNotification);
3786         }
3787     }
3788 
3789     pub fn iommu_attached_devices(&self) -> &Option<(u32, Vec<u32>)> {
3790         &self.iommu_attached_devices
3791     }
3792 }
3793 
3794 #[cfg(any(target_arch = "aarch64", feature = "acpi"))]
3795 fn numa_node_id_from_memory_zone_id(numa_nodes: &NumaNodes, memory_zone_id: &str) -> Option<u32> {
3796     for (numa_node_id, numa_node) in numa_nodes.iter() {
3797         if numa_node.memory_zones.contains(&memory_zone_id.to_owned()) {
3798             return Some(*numa_node_id);
3799         }
3800     }
3801 
3802     None
3803 }
3804 
3805 #[cfg(feature = "acpi")]
3806 struct PciDevSlot {
3807     device_id: u8,
3808 }
3809 
3810 #[cfg(feature = "acpi")]
3811 impl Aml for PciDevSlot {
3812     fn to_aml_bytes(&self) -> Vec<u8> {
3813         let sun = self.device_id;
3814         let adr: u32 = (self.device_id as u32) << 16;
3815         aml::Device::new(
3816             format!("S{:03}", self.device_id).as_str().into(),
3817             vec![
3818                 &aml::Name::new("_SUN".into(), &sun),
3819                 &aml::Name::new("_ADR".into(), &adr),
3820                 &aml::Method::new(
3821                     "_EJ0".into(),
3822                     1,
3823                     true,
3824                     vec![&aml::MethodCall::new(
3825                         "\\_SB_.PHPR.PCEJ".into(),
3826                         vec![&aml::Path::new("_SUN")],
3827                     )],
3828                 ),
3829             ],
3830         )
3831         .to_aml_bytes()
3832     }
3833 }
3834 
3835 #[cfg(feature = "acpi")]
3836 struct PciDevSlotNotify {
3837     device_id: u8,
3838 }
3839 
3840 #[cfg(feature = "acpi")]
3841 impl Aml for PciDevSlotNotify {
3842     fn to_aml_bytes(&self) -> Vec<u8> {
3843         let device_id_mask: u32 = 1 << self.device_id;
3844         let object = aml::Path::new(&format!("S{:03}", self.device_id));
3845         let mut bytes = aml::And::new(&aml::Local(0), &aml::Arg(0), &device_id_mask).to_aml_bytes();
3846         bytes.extend_from_slice(
3847             &aml::If::new(
3848                 &aml::Equal::new(&aml::Local(0), &device_id_mask),
3849                 vec![&aml::Notify::new(&object, &aml::Arg(1))],
3850             )
3851             .to_aml_bytes(),
3852         );
3853         bytes
3854     }
3855 }
3856 
3857 #[cfg(feature = "acpi")]
3858 struct PciDevSlotMethods {}
3859 
3860 #[cfg(feature = "acpi")]
3861 impl Aml for PciDevSlotMethods {
3862     fn to_aml_bytes(&self) -> Vec<u8> {
3863         let mut device_notifies = Vec::new();
3864         for device_id in 0..32 {
3865             device_notifies.push(PciDevSlotNotify { device_id });
3866         }
3867 
3868         let mut device_notifies_refs: Vec<&dyn aml::Aml> = Vec::new();
3869         for device_notify in device_notifies.iter() {
3870             device_notifies_refs.push(device_notify);
3871         }
3872 
3873         let mut bytes =
3874             aml::Method::new("DVNT".into(), 2, true, device_notifies_refs).to_aml_bytes();
3875 
3876         bytes.extend_from_slice(
3877             &aml::Method::new(
3878                 "PCNT".into(),
3879                 0,
3880                 true,
3881                 vec![
3882                     &aml::MethodCall::new(
3883                         "DVNT".into(),
3884                         vec![&aml::Path::new("\\_SB_.PHPR.PCIU"), &aml::ONE],
3885                     ),
3886                     &aml::MethodCall::new(
3887                         "DVNT".into(),
3888                         vec![&aml::Path::new("\\_SB_.PHPR.PCID"), &3usize],
3889                     ),
3890                 ],
3891             )
3892             .to_aml_bytes(),
3893         );
3894         bytes
3895     }
3896 }
3897 
3898 #[cfg(feature = "acpi")]
3899 struct PciDsmMethod {}
3900 
3901 #[cfg(feature = "acpi")]
3902 impl Aml for PciDsmMethod {
3903     fn to_aml_bytes(&self) -> Vec<u8> {
3904         // Refer to ACPI spec v6.3 Ch 9.1.1 and PCI Firmware spec v3.3 Ch 4.6.1
3905         // _DSM (Device Specific Method), the following is the implementation in ASL.
3906         /*
3907         Method (_DSM, 4, NotSerialized)  // _DSM: Device-Specific Method
3908         {
3909               If ((Arg0 == ToUUID ("e5c937d0-3553-4d7a-9117-ea4d19c3434d") /* Device Labeling Interface */))
3910               {
3911                   If ((Arg2 == Zero))
3912                   {
3913                       Return (Buffer (One) { 0x21 })
3914                   }
3915                   If ((Arg2 == 0x05))
3916                   {
3917                       Return (Zero)
3918                   }
3919               }
3920 
3921               Return (Buffer (One) { 0x00 })
3922         }
3923          */
3924         /*
3925          * As per ACPI v6.3 Ch 19.6.142, the UUID is required to be in mixed endian:
3926          * Among the fields of a UUID:
3927          *   {d1 (8 digits)} - {d2 (4 digits)} - {d3 (4 digits)} - {d4 (16 digits)}
3928          * d1 ~ d3 need to be little endian, d4 be big endian.
3929          * See https://en.wikipedia.org/wiki/Universally_unique_identifier#Encoding .
3930          */
3931         let uuid = Uuid::parse_str("E5C937D0-3553-4D7A-9117-EA4D19C3434D").unwrap();
3932         let (uuid_d1, uuid_d2, uuid_d3, uuid_d4) = uuid.as_fields();
3933         let mut uuid_buf = vec![];
3934         uuid_buf.extend(&uuid_d1.to_le_bytes());
3935         uuid_buf.extend(&uuid_d2.to_le_bytes());
3936         uuid_buf.extend(&uuid_d3.to_le_bytes());
3937         uuid_buf.extend(uuid_d4);
3938         aml::Method::new(
3939             "_DSM".into(),
3940             4,
3941             false,
3942             vec![
3943                 &aml::If::new(
3944                     &aml::Equal::new(&aml::Arg(0), &aml::Buffer::new(uuid_buf)),
3945                     vec![
3946                         &aml::If::new(
3947                             &aml::Equal::new(&aml::Arg(2), &aml::ZERO),
3948                             vec![&aml::Return::new(&aml::Buffer::new(vec![0x21]))],
3949                         ),
3950                         &aml::If::new(
3951                             &aml::Equal::new(&aml::Arg(2), &0x05u8),
3952                             vec![&aml::Return::new(&aml::ZERO)],
3953                         ),
3954                     ],
3955                 ),
3956                 &aml::Return::new(&aml::Buffer::new(vec![0])),
3957             ],
3958         )
3959         .to_aml_bytes()
3960     }
3961 }
3962 
3963 #[cfg(feature = "acpi")]
3964 impl Aml for DeviceManager {
3965     fn to_aml_bytes(&self) -> Vec<u8> {
3966         #[cfg(target_arch = "aarch64")]
3967         use arch::aarch64::DeviceInfoForFdt;
3968 
3969         let mut bytes = Vec::new();
3970         // PCI hotplug controller
3971         bytes.extend_from_slice(
3972             &aml::Device::new(
3973                 "_SB_.PHPR".into(),
3974                 vec![
3975                     &aml::Name::new("_HID".into(), &aml::EisaName::new("PNP0A06")),
3976                     &aml::Name::new("_STA".into(), &0x0bu8),
3977                     &aml::Name::new("_UID".into(), &"PCI Hotplug Controller"),
3978                     &aml::Mutex::new("BLCK".into(), 0),
3979                     &aml::Name::new(
3980                         "_CRS".into(),
3981                         &aml::ResourceTemplate::new(vec![&aml::AddressSpace::new_memory(
3982                             aml::AddressSpaceCachable::NotCacheable,
3983                             true,
3984                             self.acpi_address.0 as u64,
3985                             self.acpi_address.0 + DEVICE_MANAGER_ACPI_SIZE as u64 - 1,
3986                         )]),
3987                     ),
3988                     // OpRegion and Fields map MMIO range into individual field values
3989                     &aml::OpRegion::new(
3990                         "PCST".into(),
3991                         aml::OpRegionSpace::SystemMemory,
3992                         self.acpi_address.0 as usize,
3993                         DEVICE_MANAGER_ACPI_SIZE,
3994                     ),
3995                     &aml::Field::new(
3996                         "PCST".into(),
3997                         aml::FieldAccessType::DWord,
3998                         aml::FieldUpdateRule::WriteAsZeroes,
3999                         vec![
4000                             aml::FieldEntry::Named(*b"PCIU", 32),
4001                             aml::FieldEntry::Named(*b"PCID", 32),
4002                             aml::FieldEntry::Named(*b"B0EJ", 32),
4003                         ],
4004                     ),
4005                     &aml::Method::new(
4006                         "PCEJ".into(),
4007                         1,
4008                         true,
4009                         vec![
4010                             // Take lock defined above
4011                             &aml::Acquire::new("BLCK".into(), 0xffff),
4012                             // Write PCI bus number (in first argument) to I/O port via field
4013                             &aml::ShiftLeft::new(&aml::Path::new("B0EJ"), &aml::ONE, &aml::Arg(0)),
4014                             // Release lock
4015                             &aml::Release::new("BLCK".into()),
4016                             // Return 0
4017                             &aml::Return::new(&aml::ZERO),
4018                         ],
4019                     ),
4020                 ],
4021             )
4022             .to_aml_bytes(),
4023         );
4024 
4025         let start_of_device_area = self.memory_manager.lock().unwrap().start_of_device_area().0;
4026         let end_of_device_area = self.memory_manager.lock().unwrap().end_of_device_area().0;
4027 
4028         let mut pci_dsdt_inner_data: Vec<&dyn aml::Aml> = Vec::new();
4029         let hid = aml::Name::new("_HID".into(), &aml::EisaName::new("PNP0A08"));
4030         pci_dsdt_inner_data.push(&hid);
4031         let cid = aml::Name::new("_CID".into(), &aml::EisaName::new("PNP0A03"));
4032         pci_dsdt_inner_data.push(&cid);
4033         let adr = aml::Name::new("_ADR".into(), &aml::ZERO);
4034         pci_dsdt_inner_data.push(&adr);
4035         let seg = aml::Name::new("_SEG".into(), &aml::ZERO);
4036         pci_dsdt_inner_data.push(&seg);
4037         let uid = aml::Name::new("_UID".into(), &aml::ZERO);
4038         pci_dsdt_inner_data.push(&uid);
4039         let supp = aml::Name::new("SUPP".into(), &aml::ZERO);
4040         pci_dsdt_inner_data.push(&supp);
4041 
4042         // Since Cloud Hypervisor supports only one PCI bus, it can be tied
4043         // to the NUMA node 0. It's up to the user to organize the NUMA nodes
4044         // so that the PCI bus relates to the expected vCPUs and guest RAM.
4045         let proximity_domain = 0u32;
4046         let pxm_return = aml::Return::new(&proximity_domain);
4047         let pxm = aml::Method::new("_PXM".into(), 0, false, vec![&pxm_return]);
4048         pci_dsdt_inner_data.push(&pxm);
4049 
4050         let pci_dsm = PciDsmMethod {};
4051         pci_dsdt_inner_data.push(&pci_dsm);
4052 
4053         let crs = aml::Name::new(
4054             "_CRS".into(),
4055             &aml::ResourceTemplate::new(vec![
4056                 &aml::AddressSpace::new_bus_number(0x0u16, 0xffu16),
4057                 #[cfg(target_arch = "x86_64")]
4058                 &aml::Io::new(0xcf8, 0xcf8, 1, 0x8),
4059                 #[cfg(target_arch = "aarch64")]
4060                 &aml::Memory32Fixed::new(
4061                     true,
4062                     layout::PCI_MMCONFIG_START.0 as u32,
4063                     layout::PCI_MMCONFIG_SIZE as u32,
4064                 ),
4065                 &aml::AddressSpace::new_memory(
4066                     aml::AddressSpaceCachable::NotCacheable,
4067                     true,
4068                     layout::MEM_32BIT_DEVICES_START.0 as u32,
4069                     (layout::MEM_32BIT_DEVICES_START.0 + layout::MEM_32BIT_DEVICES_SIZE - 1) as u32,
4070                 ),
4071                 &aml::AddressSpace::new_memory(
4072                     aml::AddressSpaceCachable::NotCacheable,
4073                     true,
4074                     start_of_device_area,
4075                     end_of_device_area,
4076                 ),
4077                 #[cfg(target_arch = "x86_64")]
4078                 &aml::AddressSpace::new_io(0u16, 0x0cf7u16),
4079                 #[cfg(target_arch = "x86_64")]
4080                 &aml::AddressSpace::new_io(0x0d00u16, 0xffffu16),
4081             ]),
4082         );
4083         pci_dsdt_inner_data.push(&crs);
4084 
4085         let mut pci_devices = Vec::new();
4086         for device_id in 0..32 {
4087             let pci_device = PciDevSlot { device_id };
4088             pci_devices.push(pci_device);
4089         }
4090         for pci_device in pci_devices.iter() {
4091             pci_dsdt_inner_data.push(pci_device);
4092         }
4093 
4094         let pci_device_methods = PciDevSlotMethods {};
4095         pci_dsdt_inner_data.push(&pci_device_methods);
4096 
4097         // Build PCI routing table, listing IRQs assigned to PCI devices.
4098         let prt_package_list: Vec<(u32, u32)> = self
4099             .pci_irq_slots
4100             .iter()
4101             .enumerate()
4102             .map(|(i, irq)| (((((i as u32) & 0x1fu32) << 16) | 0xffffu32), *irq as u32))
4103             .collect();
4104         let prt_package_list: Vec<aml::Package> = prt_package_list
4105             .iter()
4106             .map(|(bdf, irq)| aml::Package::new(vec![bdf, &0u8, &0u8, irq]))
4107             .collect();
4108         let prt_package_list: Vec<&dyn Aml> = prt_package_list
4109             .iter()
4110             .map(|item| item as &dyn Aml)
4111             .collect();
4112         let prt = aml::Name::new("_PRT".into(), &aml::Package::new(prt_package_list));
4113         pci_dsdt_inner_data.push(&prt);
4114 
4115         let pci_dsdt_data =
4116             aml::Device::new("_SB_.PCI0".into(), pci_dsdt_inner_data).to_aml_bytes();
4117 
4118         let mbrd_dsdt_data = aml::Device::new(
4119             "_SB_.MBRD".into(),
4120             vec![
4121                 &aml::Name::new("_HID".into(), &aml::EisaName::new("PNP0C02")),
4122                 &aml::Name::new("_UID".into(), &aml::ZERO),
4123                 &aml::Name::new(
4124                     "_CRS".into(),
4125                     &aml::ResourceTemplate::new(vec![&aml::Memory32Fixed::new(
4126                         true,
4127                         layout::PCI_MMCONFIG_START.0 as u32,
4128                         layout::PCI_MMCONFIG_SIZE as u32,
4129                     )]),
4130                 ),
4131             ],
4132         )
4133         .to_aml_bytes();
4134 
4135         // Serial device
4136         #[cfg(target_arch = "x86_64")]
4137         let serial_irq = 4;
4138         #[cfg(target_arch = "aarch64")]
4139         let serial_irq =
4140             if self.config.lock().unwrap().serial.clone().mode != ConsoleOutputMode::Off {
4141                 self.get_device_info()
4142                     .clone()
4143                     .get(&(DeviceType::Serial, DeviceType::Serial.to_string()))
4144                     .unwrap()
4145                     .irq()
4146             } else {
4147                 // If serial is turned off, add a fake device with invalid irq.
4148                 31
4149             };
4150         let com1_dsdt_data = aml::Device::new(
4151             "_SB_.COM1".into(),
4152             vec![
4153                 &aml::Name::new(
4154                     "_HID".into(),
4155                     #[cfg(target_arch = "x86_64")]
4156                     &aml::EisaName::new("PNP0501"),
4157                     #[cfg(target_arch = "aarch64")]
4158                     &"ARMH0011",
4159                 ),
4160                 &aml::Name::new("_UID".into(), &aml::ZERO),
4161                 &aml::Name::new(
4162                     "_CRS".into(),
4163                     &aml::ResourceTemplate::new(vec![
4164                         &aml::Interrupt::new(true, true, false, false, serial_irq),
4165                         #[cfg(target_arch = "x86_64")]
4166                         &aml::Io::new(0x3f8, 0x3f8, 0, 0x8),
4167                         #[cfg(target_arch = "aarch64")]
4168                         &aml::Memory32Fixed::new(
4169                             true,
4170                             arch::layout::LEGACY_SERIAL_MAPPED_IO_START as u32,
4171                             MMIO_LEN as u32,
4172                         ),
4173                     ]),
4174                 ),
4175             ],
4176         )
4177         .to_aml_bytes();
4178 
4179         let s5_sleep_data =
4180             aml::Name::new("_S5_".into(), &aml::Package::new(vec![&5u8])).to_aml_bytes();
4181 
4182         let power_button_dsdt_data = aml::Device::new(
4183             "_SB_.PWRB".into(),
4184             vec![
4185                 &aml::Name::new("_HID".into(), &aml::EisaName::new("PNP0C0C")),
4186                 &aml::Name::new("_UID".into(), &aml::ZERO),
4187             ],
4188         )
4189         .to_aml_bytes();
4190 
4191         let ged_data = self
4192             .ged_notification_device
4193             .as_ref()
4194             .unwrap()
4195             .lock()
4196             .unwrap()
4197             .to_aml_bytes();
4198 
4199         bytes.extend_from_slice(pci_dsdt_data.as_slice());
4200         bytes.extend_from_slice(mbrd_dsdt_data.as_slice());
4201         if self.config.lock().unwrap().serial.mode != ConsoleOutputMode::Off {
4202             bytes.extend_from_slice(com1_dsdt_data.as_slice());
4203         }
4204         bytes.extend_from_slice(s5_sleep_data.as_slice());
4205         bytes.extend_from_slice(power_button_dsdt_data.as_slice());
4206         bytes.extend_from_slice(ged_data.as_slice());
4207         bytes
4208     }
4209 }
4210 
4211 impl Pausable for DeviceManager {
4212     fn pause(&mut self) -> result::Result<(), MigratableError> {
4213         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
4214             if let Some(migratable) = &device_node.migratable {
4215                 migratable.lock().unwrap().pause()?;
4216             }
4217         }
4218         // On AArch64, the pause of device manager needs to trigger
4219         // a "pause" of GIC, which will flush the GIC pending tables
4220         // and ITS tables to guest RAM.
4221         #[cfg(target_arch = "aarch64")]
4222         {
4223             let gic_device = Arc::clone(
4224                 self.get_interrupt_controller()
4225                     .unwrap()
4226                     .lock()
4227                     .unwrap()
4228                     .get_gic_device()
4229                     .unwrap(),
4230             );
4231             if let Some(gicv3_its) = gic_device
4232                 .lock()
4233                 .unwrap()
4234                 .as_any_concrete_mut()
4235                 .downcast_mut::<KvmGicV3Its>()
4236             {
4237                 gicv3_its.pause()?;
4238             } else {
4239                 return Err(MigratableError::Pause(anyhow!(
4240                     "GicDevice downcast to KvmGicV3Its failed when pausing device manager!"
4241                 )));
4242             };
4243         };
4244 
4245         Ok(())
4246     }
4247 
4248     fn resume(&mut self) -> result::Result<(), MigratableError> {
4249         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
4250             if let Some(migratable) = &device_node.migratable {
4251                 migratable.lock().unwrap().resume()?;
4252             }
4253         }
4254 
4255         Ok(())
4256     }
4257 }
4258 
4259 impl Snapshottable for DeviceManager {
4260     fn id(&self) -> String {
4261         DEVICE_MANAGER_SNAPSHOT_ID.to_string()
4262     }
4263 
4264     fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> {
4265         let mut snapshot = Snapshot::new(DEVICE_MANAGER_SNAPSHOT_ID);
4266 
4267         // We aggregate all devices snapshots.
4268         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
4269             if let Some(migratable) = &device_node.migratable {
4270                 let device_snapshot = migratable.lock().unwrap().snapshot()?;
4271                 snapshot.add_snapshot(device_snapshot);
4272             }
4273         }
4274 
4275         // Then we store the DeviceManager state.
4276         snapshot.add_data_section(SnapshotDataSection::new_from_state(
4277             DEVICE_MANAGER_SNAPSHOT_ID,
4278             &self.state(),
4279         )?);
4280 
4281         Ok(snapshot)
4282     }
4283 
4284     fn restore(&mut self, snapshot: Snapshot) -> std::result::Result<(), MigratableError> {
4285         // Let's first restore the DeviceManager.
4286 
4287         self.set_state(&snapshot.to_state(DEVICE_MANAGER_SNAPSHOT_ID)?);
4288 
4289         // Now that DeviceManager is updated with the right states, it's time
4290         // to create the devices based on the configuration.
4291         self.create_devices(None, None, None)
4292             .map_err(|e| MigratableError::Restore(anyhow!("Could not create devices {:?}", e)))?;
4293 
4294         Ok(())
4295     }
4296 }
4297 
4298 impl Transportable for DeviceManager {}
4299 
4300 impl Migratable for DeviceManager {
4301     fn start_dirty_log(&mut self) -> std::result::Result<(), MigratableError> {
4302         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
4303             if let Some(migratable) = &device_node.migratable {
4304                 migratable.lock().unwrap().start_dirty_log()?;
4305             }
4306         }
4307         Ok(())
4308     }
4309 
4310     fn stop_dirty_log(&mut self) -> std::result::Result<(), MigratableError> {
4311         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
4312             if let Some(migratable) = &device_node.migratable {
4313                 migratable.lock().unwrap().stop_dirty_log()?;
4314             }
4315         }
4316         Ok(())
4317     }
4318 
4319     fn dirty_log(&mut self) -> std::result::Result<MemoryRangeTable, MigratableError> {
4320         let mut tables = Vec::new();
4321         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
4322             if let Some(migratable) = &device_node.migratable {
4323                 tables.push(migratable.lock().unwrap().dirty_log()?);
4324             }
4325         }
4326         Ok(MemoryRangeTable::new_from_tables(tables))
4327     }
4328 
4329     fn complete_migration(&mut self) -> std::result::Result<(), MigratableError> {
4330         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
4331             if let Some(migratable) = &device_node.migratable {
4332                 migratable.lock().unwrap().complete_migration()?;
4333             }
4334         }
4335         Ok(())
4336     }
4337 }
4338 
4339 const PCIU_FIELD_OFFSET: u64 = 0;
4340 const PCID_FIELD_OFFSET: u64 = 4;
4341 const B0EJ_FIELD_OFFSET: u64 = 8;
4342 
4343 const PCIU_FIELD_SIZE: usize = 4;
4344 const PCID_FIELD_SIZE: usize = 4;
4345 const B0EJ_FIELD_SIZE: usize = 4;
4346 
4347 impl BusDevice for DeviceManager {
4348     fn read(&mut self, base: u64, offset: u64, data: &mut [u8]) {
4349         match offset {
4350             PCIU_FIELD_OFFSET => {
4351                 assert!(data.len() == PCIU_FIELD_SIZE);
4352                 data.copy_from_slice(&self.pci_devices_up.to_le_bytes());
4353                 // Clear the PCIU bitmap
4354                 self.pci_devices_up = 0;
4355             }
4356             PCID_FIELD_OFFSET => {
4357                 assert!(data.len() == PCID_FIELD_SIZE);
4358                 data.copy_from_slice(&self.pci_devices_down.to_le_bytes());
4359                 // Clear the PCID bitmap
4360                 self.pci_devices_down = 0;
4361             }
4362             B0EJ_FIELD_OFFSET => {
4363                 assert!(data.len() == B0EJ_FIELD_SIZE);
4364                 // Always return an empty bitmap since the eject is always
4365                 // taken care of right away during a write access.
4366                 data.copy_from_slice(&[0, 0, 0, 0]);
4367             }
4368             _ => error!(
4369                 "Accessing unknown location at base 0x{:x}, offset 0x{:x}",
4370                 base, offset
4371             ),
4372         }
4373 
4374         debug!(
4375             "PCI_HP_REG_R: base 0x{:x}, offset 0x{:x}, data {:?}",
4376             base, offset, data
4377         )
4378     }
4379 
4380     fn write(&mut self, base: u64, offset: u64, data: &[u8]) -> Option<Arc<Barrier>> {
4381         match offset {
4382             B0EJ_FIELD_OFFSET => {
4383                 assert!(data.len() == B0EJ_FIELD_SIZE);
4384                 let mut data_array: [u8; 4] = [0, 0, 0, 0];
4385                 data_array.copy_from_slice(data);
4386                 let device_bitmap = u32::from_le_bytes(data_array);
4387 
4388                 for device_id in 0..32 {
4389                     let mask = 1u32 << device_id;
4390                     if (device_bitmap & mask) == mask {
4391                         if let Err(e) = self.eject_device(device_id as u8) {
4392                             error!("Failed ejecting device {}: {:?}", device_id, e);
4393                         }
4394                     }
4395                 }
4396             }
4397             _ => error!(
4398                 "Accessing unknown location at base 0x{:x}, offset 0x{:x}",
4399                 base, offset
4400             ),
4401         }
4402 
4403         debug!(
4404             "PCI_HP_REG_W: base 0x{:x}, offset 0x{:x}, data {:?}",
4405             base, offset, data
4406         );
4407 
4408         None
4409     }
4410 }
4411 
4412 impl Drop for DeviceManager {
4413     fn drop(&mut self) {
4414         for (device, _, _) in self.virtio_devices.drain(..) {
4415             device.lock().unwrap().shutdown();
4416         }
4417     }
4418 }
4419