xref: /cloud-hypervisor/vmm/src/device_manager.rs (revision f7f2f25a574b1b2dba22c094fc8226d404157d15)
1 // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
2 //
3 // Portions Copyright 2017 The Chromium OS Authors. All rights reserved.
4 // Use of this source code is governed by a BSD-style license that can be
5 // found in the LICENSE-BSD-3-Clause file.
6 //
7 // Copyright © 2019 Intel Corporation
8 //
9 // SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause
10 //
11 
12 use crate::config::{
13     ConsoleOutputMode, DeviceConfig, DiskConfig, FsConfig, NetConfig, PmemConfig, VhostMode,
14     VmConfig, VsockConfig,
15 };
16 use crate::device_tree::{DeviceNode, DeviceTree};
17 #[cfg(feature = "kvm")]
18 use crate::interrupt::kvm::KvmMsiInterruptManager as MsiInterruptManager;
19 #[cfg(feature = "mshv")]
20 use crate::interrupt::mshv::MshvMsiInterruptManager as MsiInterruptManager;
21 use crate::interrupt::LegacyUserspaceInterruptManager;
22 #[cfg(feature = "acpi")]
23 use crate::memory_manager::MEMORY_MANAGER_ACPI_SIZE;
24 use crate::memory_manager::{Error as MemoryManagerError, MemoryManager};
25 #[cfg(feature = "acpi")]
26 use crate::vm::NumaNodes;
27 use crate::GuestRegionMmap;
28 use crate::PciDeviceInfo;
29 use crate::{device_node, DEVICE_MANAGER_SNAPSHOT_ID};
30 #[cfg(feature = "acpi")]
31 use acpi_tables::{aml, aml::Aml};
32 use anyhow::anyhow;
33 #[cfg(feature = "acpi")]
34 use arch::layout;
35 #[cfg(target_arch = "x86_64")]
36 use arch::layout::{APIC_START, IOAPIC_SIZE, IOAPIC_START};
37 #[cfg(target_arch = "aarch64")]
38 use arch::{DeviceType, MmioDeviceInfo};
39 use block_util::{
40     async_io::DiskFile, block_io_uring_is_supported, detect_image_type,
41     fixed_vhd_async::FixedVhdDiskAsync, fixed_vhd_sync::FixedVhdDiskSync, qcow_sync::QcowDiskSync,
42     raw_async::RawFileDisk, raw_sync::RawFileDiskSync, ImageType,
43 };
44 #[cfg(target_arch = "aarch64")]
45 use devices::gic;
46 #[cfg(target_arch = "x86_64")]
47 use devices::ioapic;
48 #[cfg(target_arch = "aarch64")]
49 use devices::legacy::Pl011;
50 #[cfg(target_arch = "x86_64")]
51 use devices::legacy::Serial;
52 use devices::{
53     interrupt_controller, interrupt_controller::InterruptController, AcpiNotificationFlags,
54 };
55 #[cfg(feature = "kvm")]
56 use hypervisor::kvm_ioctls::*;
57 #[cfg(feature = "mshv")]
58 use hypervisor::IoEventAddress;
59 use libc::{
60     isatty, tcgetattr, tcsetattr, termios, ECHO, ICANON, ISIG, MAP_NORESERVE, MAP_PRIVATE,
61     MAP_SHARED, O_TMPFILE, PROT_READ, PROT_WRITE, TCSANOW, TIOCGWINSZ,
62 };
63 #[cfg(feature = "kvm")]
64 use pci::VfioPciDevice;
65 use pci::{
66     DeviceRelocation, PciBarRegionType, PciBus, PciConfigIo, PciConfigMmio, PciDevice, PciRoot,
67 };
68 use seccomp::SeccompAction;
69 use std::collections::HashMap;
70 use std::convert::TryInto;
71 use std::fs::{read_link, File, OpenOptions};
72 use std::io::{self, sink, stdout, Seek, SeekFrom};
73 use std::mem::zeroed;
74 use std::num::Wrapping;
75 use std::os::unix::fs::OpenOptionsExt;
76 use std::os::unix::io::{AsRawFd, FromRawFd, RawFd};
77 use std::path::PathBuf;
78 use std::result;
79 use std::sync::{Arc, Barrier, Mutex};
80 #[cfg(feature = "acpi")]
81 use uuid::Uuid;
82 #[cfg(feature = "kvm")]
83 use vfio_ioctls::{VfioContainer, VfioDevice};
84 use virtio_devices::transport::VirtioPciDevice;
85 use virtio_devices::transport::VirtioTransport;
86 use virtio_devices::vhost_user::VhostUserConfig;
87 use virtio_devices::{DmaRemapping, IommuMapping};
88 use virtio_devices::{VirtioSharedMemory, VirtioSharedMemoryList};
89 use vm_allocator::SystemAllocator;
90 #[cfg(feature = "kvm")]
91 use vm_device::dma_mapping::vfio::VfioDmaMapping;
92 use vm_device::interrupt::{
93     InterruptIndex, InterruptManager, LegacyIrqGroupConfig, MsiIrqGroupConfig,
94 };
95 use vm_device::{Bus, BusDevice, Resource};
96 use vm_memory::guest_memory::FileOffset;
97 #[cfg(feature = "kvm")]
98 use vm_memory::GuestMemoryRegion;
99 use vm_memory::{Address, GuestAddress, GuestUsize, MmapRegion};
100 #[cfg(all(target_arch = "x86_64", feature = "cmos"))]
101 use vm_memory::{GuestAddressSpace, GuestMemory};
102 use vm_migration::{
103     protocol::MemoryRangeTable, Migratable, MigratableError, Pausable, Snapshot,
104     SnapshotDataSection, Snapshottable, Transportable,
105 };
106 use vm_virtio::{VirtioDeviceType, VirtioIommuRemapping};
107 use vmm_sys_util::eventfd::EventFd;
108 
109 #[cfg(target_arch = "aarch64")]
110 const MMIO_LEN: u64 = 0x1000;
111 
112 #[cfg(feature = "kvm")]
113 const VFIO_DEVICE_NAME_PREFIX: &str = "_vfio";
114 
115 #[cfg(target_arch = "x86_64")]
116 const IOAPIC_DEVICE_NAME: &str = "_ioapic";
117 
118 const SERIAL_DEVICE_NAME_PREFIX: &str = "_serial";
119 #[cfg(target_arch = "aarch64")]
120 const GPIO_DEVICE_NAME_PREFIX: &str = "_gpio";
121 
122 const CONSOLE_DEVICE_NAME: &str = "_console";
123 const DISK_DEVICE_NAME_PREFIX: &str = "_disk";
124 const FS_DEVICE_NAME_PREFIX: &str = "_fs";
125 const MEM_DEVICE_NAME_PREFIX: &str = "_mem";
126 const BALLOON_DEVICE_NAME: &str = "_balloon";
127 const NET_DEVICE_NAME_PREFIX: &str = "_net";
128 const PMEM_DEVICE_NAME_PREFIX: &str = "_pmem";
129 const RNG_DEVICE_NAME: &str = "_rng";
130 const VSOCK_DEVICE_NAME_PREFIX: &str = "_vsock";
131 const WATCHDOG_DEVICE_NAME: &str = "_watchdog";
132 
133 const IOMMU_DEVICE_NAME: &str = "_iommu";
134 
135 const VIRTIO_PCI_DEVICE_NAME_PREFIX: &str = "_virtio-pci";
136 
137 /// Errors associated with device manager
138 #[derive(Debug)]
139 pub enum DeviceManagerError {
140     /// Cannot create EventFd.
141     EventFd(io::Error),
142 
143     /// Cannot open disk path
144     Disk(io::Error),
145 
146     /// Cannot create vhost-user-net device
147     CreateVhostUserNet(virtio_devices::vhost_user::Error),
148 
149     /// Cannot create virtio-blk device
150     CreateVirtioBlock(io::Error),
151 
152     /// Cannot create virtio-net device
153     CreateVirtioNet(virtio_devices::net::Error),
154 
155     /// Cannot create virtio-console device
156     CreateVirtioConsole(io::Error),
157 
158     /// Cannot create virtio-rng device
159     CreateVirtioRng(io::Error),
160 
161     /// Cannot create virtio-fs device
162     CreateVirtioFs(virtio_devices::vhost_user::Error),
163 
164     /// Virtio-fs device was created without a socket.
165     NoVirtioFsSock,
166 
167     /// Cannot create vhost-user-blk device
168     CreateVhostUserBlk(virtio_devices::vhost_user::Error),
169 
170     /// Cannot create virtio-pmem device
171     CreateVirtioPmem(io::Error),
172 
173     /// Cannot create virtio-vsock device
174     CreateVirtioVsock(io::Error),
175 
176     /// Failed converting Path to &str for the virtio-vsock device.
177     CreateVsockConvertPath,
178 
179     /// Cannot create virtio-vsock backend
180     CreateVsockBackend(virtio_devices::vsock::VsockUnixError),
181 
182     /// Cannot create virtio-iommu device
183     CreateVirtioIommu(io::Error),
184 
185     /// Cannot create virtio-balloon device
186     CreateVirtioBalloon(io::Error),
187 
188     /// Cannot create virtio-watchdog device
189     CreateVirtioWatchdog(io::Error),
190 
191     /// Failed parsing disk image format
192     DetectImageType(io::Error),
193 
194     /// Cannot open qcow disk path
195     QcowDeviceCreate(qcow::Error),
196 
197     /// Cannot open tap interface
198     OpenTap(net_util::TapError),
199 
200     /// Cannot allocate IRQ.
201     AllocateIrq,
202 
203     /// Cannot configure the IRQ.
204     Irq(vmm_sys_util::errno::Error),
205 
206     /// Cannot allocate PCI BARs
207     AllocateBars(pci::PciDeviceError),
208 
209     /// Could not free the BARs associated with a PCI device.
210     FreePciBars(pci::PciDeviceError),
211 
212     /// Cannot register ioevent.
213     RegisterIoevent(anyhow::Error),
214 
215     /// Cannot unregister ioevent.
216     UnRegisterIoevent(anyhow::Error),
217 
218     /// Cannot create virtio device
219     VirtioDevice(vmm_sys_util::errno::Error),
220 
221     /// Cannot add PCI device
222     AddPciDevice(pci::PciRootError),
223 
224     /// Cannot open persistent memory file
225     PmemFileOpen(io::Error),
226 
227     /// Cannot set persistent memory file size
228     PmemFileSetLen(io::Error),
229 
230     /// Cannot find a memory range for persistent memory
231     PmemRangeAllocation,
232 
233     /// Cannot find a memory range for virtio-fs
234     FsRangeAllocation,
235 
236     /// Error creating serial output file
237     SerialOutputFileOpen(io::Error),
238 
239     /// Error creating console output file
240     ConsoleOutputFileOpen(io::Error),
241 
242     /// Error creating serial pty
243     SerialPtyOpen(io::Error),
244 
245     /// Error creating console pty
246     ConsolePtyOpen(io::Error),
247 
248     /// Error setting pty raw mode
249     SetPtyRaw(vmm_sys_util::errno::Error),
250 
251     /// Error getting pty peer
252     GetPtyPeer(vmm_sys_util::errno::Error),
253 
254     /// Cannot create a VFIO device
255     VfioCreate(vfio_ioctls::VfioError),
256 
257     /// Cannot create a VFIO PCI device
258     VfioPciCreate(pci::VfioPciError),
259 
260     /// Failed to map VFIO MMIO region.
261     VfioMapRegion(pci::VfioPciError),
262 
263     /// Failed to DMA map VFIO device.
264     VfioDmaMap(pci::VfioPciError),
265 
266     /// Failed to DMA unmap VFIO device.
267     VfioDmaUnmap(pci::VfioPciError),
268 
269     /// Failed to create the passthrough device.
270     CreatePassthroughDevice(anyhow::Error),
271 
272     /// Failed to memory map.
273     Mmap(io::Error),
274 
275     /// Cannot add legacy device to Bus.
276     BusError(vm_device::BusError),
277 
278     /// Failed to allocate IO port
279     AllocateIoPort,
280 
281     /// Failed to allocate MMIO address
282     AllocateMmioAddress,
283 
284     // Failed to make hotplug notification
285     HotPlugNotification(io::Error),
286 
287     // Error from a memory manager operation
288     MemoryManager(MemoryManagerError),
289 
290     /// Failed to create new interrupt source group.
291     CreateInterruptGroup(io::Error),
292 
293     /// Failed to update interrupt source group.
294     UpdateInterruptGroup(io::Error),
295 
296     /// Failed creating interrupt controller.
297     CreateInterruptController(interrupt_controller::Error),
298 
299     /// Failed creating a new MmapRegion instance.
300     NewMmapRegion(vm_memory::mmap::MmapRegionError),
301 
302     /// Failed cloning a File.
303     CloneFile(io::Error),
304 
305     /// Failed to create socket file
306     CreateSocketFile(io::Error),
307 
308     /// Failed to spawn the network backend
309     SpawnNetBackend(io::Error),
310 
311     /// Failed to spawn the block backend
312     SpawnBlockBackend(io::Error),
313 
314     /// Missing PCI bus.
315     NoPciBus,
316 
317     /// Could not find an available device name.
318     NoAvailableDeviceName,
319 
320     /// Missing PCI device.
321     MissingPciDevice,
322 
323     /// Failed removing a PCI device from the PCI bus.
324     RemoveDeviceFromPciBus(pci::PciRootError),
325 
326     /// Failed removing a bus device from the IO bus.
327     RemoveDeviceFromIoBus(vm_device::BusError),
328 
329     /// Failed removing a bus device from the MMIO bus.
330     RemoveDeviceFromMmioBus(vm_device::BusError),
331 
332     /// Failed to find the device corresponding to a specific PCI b/d/f.
333     UnknownPciBdf(u32),
334 
335     /// Not allowed to remove this type of device from the VM.
336     RemovalNotAllowed(vm_virtio::VirtioDeviceType),
337 
338     /// Failed to find device corresponding to the given identifier.
339     UnknownDeviceId(String),
340 
341     /// Failed to find an available PCI device ID.
342     NextPciDeviceId(pci::PciRootError),
343 
344     /// Could not reserve the PCI device ID.
345     GetPciDeviceId(pci::PciRootError),
346 
347     /// Could not give the PCI device ID back.
348     PutPciDeviceId(pci::PciRootError),
349 
350     /// Incorrect device ID as it is already used by another device.
351     DeviceIdAlreadyInUse,
352 
353     /// No disk path was specified when one was expected
354     NoDiskPath,
355 
356     /// Failed updating guest memory for virtio device.
357     UpdateMemoryForVirtioDevice(virtio_devices::Error),
358 
359     /// Cannot create virtio-mem device
360     CreateVirtioMem(io::Error),
361 
362     /// Cannot generate a ResizeSender from the Resize object.
363     CreateResizeSender(virtio_devices::mem::Error),
364 
365     /// Cannot find a memory range for virtio-mem memory
366     VirtioMemRangeAllocation,
367 
368     /// Failed updating guest memory for VFIO PCI device.
369     UpdateMemoryForVfioPciDevice(pci::VfioPciError),
370 
371     /// Trying to use a directory for pmem but no size specified
372     PmemWithDirectorySizeMissing,
373 
374     /// Trying to use a size that is not multiple of 2MiB
375     PmemSizeNotAligned,
376 
377     /// Could not find the node in the device tree.
378     MissingNode,
379 
380     /// Resource was already found.
381     ResourceAlreadyExists,
382 
383     /// Expected resources for virtio-pci could not be found.
384     MissingVirtioPciResources,
385 
386     /// Expected resources for virtio-pmem could not be found.
387     MissingVirtioPmemResources,
388 
389     /// Missing PCI b/d/f from the DeviceNode.
390     MissingDeviceNodePciBdf,
391 
392     /// No support for device passthrough
393     NoDevicePassthroughSupport,
394 
395     /// Failed to resize virtio-balloon
396     VirtioBalloonResize(virtio_devices::balloon::Error),
397 
398     /// Missing virtio-balloon, can't proceed as expected.
399     MissingVirtioBalloon,
400 
401     /// Failed to do power button notification
402     PowerButtonNotification(io::Error),
403 
404     /// Failed to do AArch64 GPIO power button notification
405     #[cfg(target_arch = "aarch64")]
406     AArch64PowerButtonNotification(devices::legacy::GpioDeviceError),
407 
408     /// Failed to set O_DIRECT flag to file descriptor
409     SetDirectIo,
410 
411     /// Failed to create FixedVhdDiskAsync
412     CreateFixedVhdDiskAsync(io::Error),
413 
414     /// Failed to create FixedVhdDiskSync
415     CreateFixedVhdDiskSync(io::Error),
416 
417     /// Failed adding DMA mapping handler to virtio-mem device.
418     AddDmaMappingHandlerVirtioMem(virtio_devices::mem::Error),
419 
420     /// Failed removing DMA mapping handler from virtio-mem device.
421     RemoveDmaMappingHandlerVirtioMem(virtio_devices::mem::Error),
422 }
423 pub type DeviceManagerResult<T> = result::Result<T, DeviceManagerError>;
424 
425 type VirtioDeviceArc = Arc<Mutex<dyn virtio_devices::VirtioDevice>>;
426 
427 #[cfg(feature = "acpi")]
428 const DEVICE_MANAGER_ACPI_SIZE: usize = 0x10;
429 
430 pub fn get_win_size() -> (u16, u16) {
431     #[repr(C)]
432     #[derive(Default)]
433     struct WindowSize {
434         rows: u16,
435         cols: u16,
436         xpixel: u16,
437         ypixel: u16,
438     }
439     let ws: WindowSize = WindowSize::default();
440 
441     unsafe {
442         libc::ioctl(0, TIOCGWINSZ, &ws);
443     }
444 
445     (ws.cols, ws.rows)
446 }
447 
448 const TIOCSPTLCK: libc::c_int = 0x4004_5431;
449 const TIOCGTPEER: libc::c_int = 0x5441;
450 
451 pub fn create_pty() -> io::Result<(File, File, PathBuf)> {
452     // Try to use /dev/pts/ptmx first then fall back to /dev/ptmx
453     // This is done to try and use the devpts filesystem that
454     // could be available for use in the process's namespace first.
455     // Ideally these are all the same file though but different
456     // kernels could have things setup differently.
457     // See https://www.kernel.org/doc/Documentation/filesystems/devpts.txt
458     // for further details.
459     let main = match OpenOptions::new()
460         .read(true)
461         .write(true)
462         .custom_flags(libc::O_NOCTTY)
463         .open("/dev/pts/ptmx")
464     {
465         Ok(f) => f,
466         _ => OpenOptions::new()
467             .read(true)
468             .write(true)
469             .custom_flags(libc::O_NOCTTY)
470             .open("/dev/ptmx")?,
471     };
472     let mut unlock: libc::c_ulong = 0;
473     unsafe {
474         libc::ioctl(
475             main.as_raw_fd(),
476             TIOCSPTLCK.try_into().unwrap(),
477             &mut unlock,
478         )
479     };
480 
481     let sub_fd = unsafe {
482         libc::ioctl(
483             main.as_raw_fd(),
484             TIOCGTPEER.try_into().unwrap(),
485             libc::O_NOCTTY | libc::O_RDWR,
486         )
487     };
488     if sub_fd == -1 {
489         return vmm_sys_util::errno::errno_result().map_err(|e| e.into());
490     }
491 
492     let proc_path = PathBuf::from(format!("/proc/self/fd/{}", sub_fd));
493     let path = read_link(proc_path)?;
494 
495     Ok((main, unsafe { File::from_raw_fd(sub_fd) }, path))
496 }
497 
498 enum ConsoleInput {
499     Serial,
500     VirtioConsole,
501 }
502 #[derive(Default)]
503 pub struct Console {
504     #[cfg(target_arch = "x86_64")]
505     // Serial port on 0x3f8
506     serial: Option<Arc<Mutex<Serial>>>,
507     #[cfg(target_arch = "aarch64")]
508     serial: Option<Arc<Mutex<Pl011>>>,
509     virtio_console_input: Option<Arc<virtio_devices::ConsoleInput>>,
510     input: Option<ConsoleInput>,
511 }
512 
513 impl Console {
514     pub fn queue_input_bytes(&self, out: &[u8]) -> vmm_sys_util::errno::Result<()> {
515         match self.input {
516             Some(ConsoleInput::Serial) => {
517                 self.queue_input_bytes_serial(out)?;
518             }
519 
520             Some(ConsoleInput::VirtioConsole) => {
521                 self.queue_input_bytes_console(out);
522             }
523             None => {}
524         }
525 
526         Ok(())
527     }
528 
529     pub fn queue_input_bytes_serial(&self, out: &[u8]) -> vmm_sys_util::errno::Result<()> {
530         if self.serial.is_some() {
531             self.serial
532                 .as_ref()
533                 .unwrap()
534                 .lock()
535                 .unwrap()
536                 .queue_input_bytes(out)?;
537         }
538         Ok(())
539     }
540 
541     pub fn queue_input_bytes_console(&self, out: &[u8]) {
542         if self.virtio_console_input.is_some() {
543             self.virtio_console_input
544                 .as_ref()
545                 .unwrap()
546                 .queue_input_bytes(out);
547         }
548     }
549 
550     pub fn update_console_size(&self, cols: u16, rows: u16) {
551         if self.virtio_console_input.is_some() {
552             self.virtio_console_input
553                 .as_ref()
554                 .unwrap()
555                 .update_console_size(cols, rows)
556         }
557     }
558 
559     pub fn input_enabled(&self) -> bool {
560         self.input.is_some()
561     }
562 }
563 
564 struct AddressManager {
565     allocator: Arc<Mutex<SystemAllocator>>,
566     #[cfg(target_arch = "x86_64")]
567     io_bus: Arc<Bus>,
568     mmio_bus: Arc<Bus>,
569     vm: Arc<dyn hypervisor::Vm>,
570     device_tree: Arc<Mutex<DeviceTree>>,
571 }
572 
573 impl DeviceRelocation for AddressManager {
574     fn move_bar(
575         &self,
576         old_base: u64,
577         new_base: u64,
578         len: u64,
579         pci_dev: &mut dyn PciDevice,
580         region_type: PciBarRegionType,
581     ) -> std::result::Result<(), std::io::Error> {
582         match region_type {
583             PciBarRegionType::IoRegion => {
584                 #[cfg(target_arch = "x86_64")]
585                 {
586                     // Update system allocator
587                     self.allocator
588                         .lock()
589                         .unwrap()
590                         .free_io_addresses(GuestAddress(old_base), len as GuestUsize);
591 
592                     self.allocator
593                         .lock()
594                         .unwrap()
595                         .allocate_io_addresses(
596                             Some(GuestAddress(new_base)),
597                             len as GuestUsize,
598                             None,
599                         )
600                         .ok_or_else(|| {
601                             io::Error::new(io::ErrorKind::Other, "failed allocating new IO range")
602                         })?;
603 
604                     // Update PIO bus
605                     self.io_bus
606                         .update_range(old_base, len, new_base, len)
607                         .map_err(|e| io::Error::new(io::ErrorKind::Other, e))?;
608                 }
609                 #[cfg(target_arch = "aarch64")]
610                 error!("I/O region is not supported");
611             }
612             PciBarRegionType::Memory32BitRegion | PciBarRegionType::Memory64BitRegion => {
613                 // Update system allocator
614                 if region_type == PciBarRegionType::Memory32BitRegion {
615                     self.allocator
616                         .lock()
617                         .unwrap()
618                         .free_mmio_hole_addresses(GuestAddress(old_base), len as GuestUsize);
619 
620                     self.allocator
621                         .lock()
622                         .unwrap()
623                         .allocate_mmio_hole_addresses(
624                             Some(GuestAddress(new_base)),
625                             len as GuestUsize,
626                             None,
627                         )
628                         .ok_or_else(|| {
629                             io::Error::new(
630                                 io::ErrorKind::Other,
631                                 "failed allocating new 32 bits MMIO range",
632                             )
633                         })?;
634                 } else {
635                     self.allocator
636                         .lock()
637                         .unwrap()
638                         .free_mmio_addresses(GuestAddress(old_base), len as GuestUsize);
639 
640                     self.allocator
641                         .lock()
642                         .unwrap()
643                         .allocate_mmio_addresses(
644                             Some(GuestAddress(new_base)),
645                             len as GuestUsize,
646                             None,
647                         )
648                         .ok_or_else(|| {
649                             io::Error::new(
650                                 io::ErrorKind::Other,
651                                 "failed allocating new 64 bits MMIO range",
652                             )
653                         })?;
654                 }
655 
656                 // Update MMIO bus
657                 self.mmio_bus
658                     .update_range(old_base, len, new_base, len)
659                     .map_err(|e| io::Error::new(io::ErrorKind::Other, e))?;
660             }
661         }
662 
663         let any_dev = pci_dev.as_any();
664         if let Some(virtio_pci_dev) = any_dev.downcast_ref::<VirtioPciDevice>() {
665             // Update the device_tree resources associated with the device
666             if let Some(node) = self
667                 .device_tree
668                 .lock()
669                 .unwrap()
670                 .get_mut(&virtio_pci_dev.id())
671             {
672                 let mut resource_updated = false;
673                 for resource in node.resources.iter_mut() {
674                     if let Resource::MmioAddressRange { base, .. } = resource {
675                         if *base == old_base {
676                             *base = new_base;
677                             resource_updated = true;
678                             break;
679                         }
680                     }
681                 }
682 
683                 if !resource_updated {
684                     return Err(io::Error::new(
685                         io::ErrorKind::Other,
686                         format!(
687                             "Couldn't find a resource with base 0x{:x} for device {}",
688                             old_base,
689                             virtio_pci_dev.id()
690                         ),
691                     ));
692                 }
693             } else {
694                 return Err(io::Error::new(
695                     io::ErrorKind::Other,
696                     format!(
697                         "Couldn't find device {} from device tree",
698                         virtio_pci_dev.id()
699                     ),
700                 ));
701             }
702 
703             let bar_addr = virtio_pci_dev.config_bar_addr();
704             if bar_addr == new_base {
705                 for (event, addr) in virtio_pci_dev.ioeventfds(old_base) {
706                     let io_addr = IoEventAddress::Mmio(addr);
707                     self.vm.unregister_ioevent(event, &io_addr).map_err(|e| {
708                         io::Error::new(
709                             io::ErrorKind::Other,
710                             format!("failed to unregister ioevent: {:?}", e),
711                         )
712                     })?;
713                 }
714                 for (event, addr) in virtio_pci_dev.ioeventfds(new_base) {
715                     let io_addr = IoEventAddress::Mmio(addr);
716                     self.vm
717                         .register_ioevent(event, &io_addr, None)
718                         .map_err(|e| {
719                             io::Error::new(
720                                 io::ErrorKind::Other,
721                                 format!("failed to register ioevent: {:?}", e),
722                             )
723                         })?;
724                 }
725             } else {
726                 let virtio_dev = virtio_pci_dev.virtio_device();
727                 let mut virtio_dev = virtio_dev.lock().unwrap();
728                 if let Some(mut shm_regions) = virtio_dev.get_shm_regions() {
729                     if shm_regions.addr.raw_value() == old_base {
730                         let mem_region = self.vm.make_user_memory_region(
731                             shm_regions.mem_slot,
732                             old_base,
733                             shm_regions.len,
734                             shm_regions.host_addr,
735                             false,
736                             false,
737                         );
738 
739                         self.vm.remove_user_memory_region(mem_region).map_err(|e| {
740                             io::Error::new(
741                                 io::ErrorKind::Other,
742                                 format!("failed to remove user memory region: {:?}", e),
743                             )
744                         })?;
745 
746                         // Create new mapping by inserting new region to KVM.
747                         let mem_region = self.vm.make_user_memory_region(
748                             shm_regions.mem_slot,
749                             new_base,
750                             shm_regions.len,
751                             shm_regions.host_addr,
752                             false,
753                             false,
754                         );
755 
756                         self.vm.create_user_memory_region(mem_region).map_err(|e| {
757                             io::Error::new(
758                                 io::ErrorKind::Other,
759                                 format!("failed to create user memory regions: {:?}", e),
760                             )
761                         })?;
762 
763                         // Update shared memory regions to reflect the new mapping.
764                         shm_regions.addr = GuestAddress(new_base);
765                         virtio_dev.set_shm_regions(shm_regions).map_err(|e| {
766                             io::Error::new(
767                                 io::ErrorKind::Other,
768                                 format!("failed to update shared memory regions: {:?}", e),
769                             )
770                         })?;
771                     }
772                 }
773             }
774         }
775 
776         pci_dev.move_bar(old_base, new_base)
777     }
778 }
779 
780 #[derive(Serialize, Deserialize)]
781 struct DeviceManagerState {
782     device_tree: DeviceTree,
783     device_id_cnt: Wrapping<usize>,
784 }
785 
786 #[derive(Debug)]
787 pub struct PtyPair {
788     pub main: File,
789     pub sub: File,
790     pub path: PathBuf,
791 }
792 
793 impl PtyPair {
794     fn clone(&self) -> Self {
795         PtyPair {
796             main: self.main.try_clone().unwrap(),
797             sub: self.sub.try_clone().unwrap(),
798             path: self.path.clone(),
799         }
800     }
801 }
802 
803 #[derive(Clone)]
804 pub enum PciDeviceHandle {
805     #[cfg(feature = "kvm")]
806     Vfio(Arc<Mutex<VfioPciDevice>>),
807     Virtio(Arc<Mutex<VirtioPciDevice>>),
808 }
809 
810 pub struct DeviceManager {
811     // Manage address space related to devices
812     address_manager: Arc<AddressManager>,
813 
814     // Console abstraction
815     console: Arc<Console>,
816 
817     // console PTY
818     console_pty: Option<Arc<Mutex<PtyPair>>>,
819 
820     // serial PTY
821     serial_pty: Option<Arc<Mutex<PtyPair>>>,
822 
823     // Interrupt controller
824     #[cfg(target_arch = "x86_64")]
825     interrupt_controller: Option<Arc<Mutex<ioapic::Ioapic>>>,
826     #[cfg(target_arch = "aarch64")]
827     interrupt_controller: Option<Arc<Mutex<gic::Gic>>>,
828 
829     // Things to be added to the commandline (i.e. for virtio-mmio)
830     cmdline_additions: Vec<String>,
831 
832     // ACPI GED notification device
833     #[cfg(feature = "acpi")]
834     ged_notification_device: Option<Arc<Mutex<devices::AcpiGedDevice>>>,
835 
836     // VM configuration
837     config: Arc<Mutex<VmConfig>>,
838 
839     // Memory Manager
840     memory_manager: Arc<Mutex<MemoryManager>>,
841 
842     // The virtio devices on the system
843     virtio_devices: Vec<(VirtioDeviceArc, bool, String)>,
844 
845     // List of bus devices
846     // Let the DeviceManager keep strong references to the BusDevice devices.
847     // This allows the IO and MMIO buses to be provided with Weak references,
848     // which prevents cyclic dependencies.
849     bus_devices: Vec<Arc<Mutex<dyn BusDevice>>>,
850 
851     // Counter to keep track of the consumed device IDs.
852     device_id_cnt: Wrapping<usize>,
853 
854     // Keep a reference to the PCI bus
855     pci_bus: Option<Arc<Mutex<PciBus>>>,
856 
857     #[cfg_attr(target_arch = "aarch64", allow(dead_code))]
858     // MSI Interrupt Manager
859     msi_interrupt_manager: Arc<dyn InterruptManager<GroupConfig = MsiIrqGroupConfig>>,
860 
861     #[cfg_attr(feature = "mshv", allow(dead_code))]
862     // Legacy Interrupt Manager
863     legacy_interrupt_manager: Option<Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>>,
864 
865     // Passthrough device handle
866     passthrough_device: Option<Arc<dyn hypervisor::Device>>,
867 
868     // Paravirtualized IOMMU
869     iommu_device: Option<Arc<Mutex<virtio_devices::Iommu>>>,
870 
871     // PCI information about devices attached to the paravirtualized IOMMU
872     // It contains the virtual IOMMU PCI BDF along with the list of PCI BDF
873     // representing the devices attached to the virtual IOMMU. This is useful
874     // information for filling the ACPI VIOT table.
875     iommu_attached_devices: Option<(u32, Vec<u32>)>,
876 
877     // Bitmap of PCI devices to hotplug.
878     pci_devices_up: u32,
879 
880     // Bitmap of PCI devices to hotunplug.
881     pci_devices_down: u32,
882 
883     // List of allocated IRQs for each PCI slot.
884     pci_irq_slots: [u8; 32],
885 
886     // Tree of devices, representing the dependencies between devices.
887     // Useful for introspection, snapshot and restore.
888     device_tree: Arc<Mutex<DeviceTree>>,
889 
890     // Exit event
891     #[cfg(feature = "acpi")]
892     exit_evt: EventFd,
893 
894     reset_evt: EventFd,
895 
896     #[cfg(target_arch = "aarch64")]
897     id_to_dev_info: HashMap<(DeviceType, String), MmioDeviceInfo>,
898 
899     // seccomp action
900     seccomp_action: SeccompAction,
901 
902     // List of guest NUMA nodes.
903     #[cfg(feature = "acpi")]
904     numa_nodes: NumaNodes,
905 
906     // Possible handle to the virtio-balloon device
907     balloon: Option<Arc<Mutex<virtio_devices::Balloon>>>,
908 
909     // Virtio Device activation EventFd to allow the VMM thread to trigger device
910     // activation and thus start the threads from the VMM thread
911     activate_evt: EventFd,
912 
913     #[cfg(feature = "acpi")]
914     acpi_address: GuestAddress,
915 
916     // Possible handle to the virtio-balloon device
917     virtio_mem_devices: Vec<Arc<Mutex<virtio_devices::Mem>>>,
918 
919     #[cfg(target_arch = "aarch64")]
920     // GPIO device for AArch64
921     gpio_device: Option<Arc<Mutex<devices::legacy::Gpio>>>,
922 
923     // Flag to force setting the iommu on virtio devices
924     force_iommu: bool,
925 }
926 
927 impl DeviceManager {
928     #[allow(clippy::too_many_arguments)]
929     pub fn new(
930         vm: Arc<dyn hypervisor::Vm>,
931         config: Arc<Mutex<VmConfig>>,
932         memory_manager: Arc<Mutex<MemoryManager>>,
933         _exit_evt: &EventFd,
934         reset_evt: &EventFd,
935         seccomp_action: SeccompAction,
936         #[cfg(feature = "acpi")] numa_nodes: NumaNodes,
937         activate_evt: &EventFd,
938         force_iommu: bool,
939     ) -> DeviceManagerResult<Arc<Mutex<Self>>> {
940         let device_tree = Arc::new(Mutex::new(DeviceTree::new()));
941 
942         let address_manager = Arc::new(AddressManager {
943             allocator: memory_manager.lock().unwrap().allocator(),
944             #[cfg(target_arch = "x86_64")]
945             io_bus: Arc::new(Bus::new()),
946             mmio_bus: Arc::new(Bus::new()),
947             vm: vm.clone(),
948             device_tree: Arc::clone(&device_tree),
949         });
950 
951         // First we create the MSI interrupt manager, the legacy one is created
952         // later, after the IOAPIC device creation.
953         // The reason we create the MSI one first is because the IOAPIC needs it,
954         // and then the legacy interrupt manager needs an IOAPIC. So we're
955         // handling a linear dependency chain:
956         // msi_interrupt_manager <- IOAPIC <- legacy_interrupt_manager.
957         let msi_interrupt_manager: Arc<dyn InterruptManager<GroupConfig = MsiIrqGroupConfig>> =
958             Arc::new(MsiInterruptManager::new(
959                 Arc::clone(&address_manager.allocator),
960                 vm,
961             ));
962 
963         #[cfg(feature = "acpi")]
964         let acpi_address = address_manager
965             .allocator
966             .lock()
967             .unwrap()
968             .allocate_mmio_addresses(None, DEVICE_MANAGER_ACPI_SIZE as u64, None)
969             .ok_or(DeviceManagerError::AllocateIoPort)?;
970         let device_manager = DeviceManager {
971             address_manager: Arc::clone(&address_manager),
972             console: Arc::new(Console::default()),
973             interrupt_controller: None,
974             cmdline_additions: Vec::new(),
975             #[cfg(feature = "acpi")]
976             ged_notification_device: None,
977             config,
978             memory_manager,
979             virtio_devices: Vec::new(),
980             bus_devices: Vec::new(),
981             device_id_cnt: Wrapping(0),
982             pci_bus: None,
983             msi_interrupt_manager,
984             legacy_interrupt_manager: None,
985             passthrough_device: None,
986             iommu_device: None,
987             iommu_attached_devices: None,
988             pci_devices_up: 0,
989             pci_devices_down: 0,
990             pci_irq_slots: [0; 32],
991             device_tree,
992             #[cfg(feature = "acpi")]
993             exit_evt: _exit_evt.try_clone().map_err(DeviceManagerError::EventFd)?,
994             reset_evt: reset_evt.try_clone().map_err(DeviceManagerError::EventFd)?,
995             #[cfg(target_arch = "aarch64")]
996             id_to_dev_info: HashMap::new(),
997             seccomp_action,
998             #[cfg(feature = "acpi")]
999             numa_nodes,
1000             balloon: None,
1001             activate_evt: activate_evt
1002                 .try_clone()
1003                 .map_err(DeviceManagerError::EventFd)?,
1004             #[cfg(feature = "acpi")]
1005             acpi_address,
1006             serial_pty: None,
1007             console_pty: None,
1008             virtio_mem_devices: Vec::new(),
1009             #[cfg(target_arch = "aarch64")]
1010             gpio_device: None,
1011             force_iommu,
1012         };
1013 
1014         let device_manager = Arc::new(Mutex::new(device_manager));
1015 
1016         #[cfg(feature = "acpi")]
1017         address_manager
1018             .mmio_bus
1019             .insert(
1020                 Arc::clone(&device_manager) as Arc<Mutex<dyn BusDevice>>,
1021                 acpi_address.0,
1022                 DEVICE_MANAGER_ACPI_SIZE as u64,
1023             )
1024             .map_err(DeviceManagerError::BusError)?;
1025 
1026         Ok(device_manager)
1027     }
1028 
1029     pub fn serial_pty(&self) -> Option<PtyPair> {
1030         self.serial_pty
1031             .as_ref()
1032             .map(|pty| pty.lock().unwrap().clone())
1033     }
1034 
1035     pub fn console_pty(&self) -> Option<PtyPair> {
1036         self.console_pty
1037             .as_ref()
1038             .map(|pty| pty.lock().unwrap().clone())
1039     }
1040 
1041     pub fn create_devices(
1042         &mut self,
1043         serial_pty: Option<PtyPair>,
1044         console_pty: Option<PtyPair>,
1045     ) -> DeviceManagerResult<()> {
1046         let mut virtio_devices: Vec<(VirtioDeviceArc, bool, String)> = Vec::new();
1047 
1048         let interrupt_controller = self.add_interrupt_controller()?;
1049 
1050         // Now we can create the legacy interrupt manager, which needs the freshly
1051         // formed IOAPIC device.
1052         let legacy_interrupt_manager: Arc<
1053             dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>,
1054         > = Arc::new(LegacyUserspaceInterruptManager::new(Arc::clone(
1055             &interrupt_controller,
1056         )));
1057 
1058         #[cfg(feature = "acpi")]
1059         {
1060             let memory_manager_acpi_address = self.memory_manager.lock().unwrap().acpi_address;
1061             self.address_manager
1062                 .mmio_bus
1063                 .insert(
1064                     Arc::clone(&self.memory_manager) as Arc<Mutex<dyn BusDevice>>,
1065                     memory_manager_acpi_address.0,
1066                     MEMORY_MANAGER_ACPI_SIZE as u64,
1067                 )
1068                 .map_err(DeviceManagerError::BusError)?;
1069         }
1070 
1071         #[cfg(target_arch = "x86_64")]
1072         self.add_legacy_devices(
1073             self.reset_evt
1074                 .try_clone()
1075                 .map_err(DeviceManagerError::EventFd)?,
1076         )?;
1077 
1078         #[cfg(target_arch = "aarch64")]
1079         self.add_legacy_devices(&legacy_interrupt_manager)?;
1080 
1081         #[cfg(feature = "acpi")]
1082         {
1083             self.ged_notification_device = self.add_acpi_devices(
1084                 &legacy_interrupt_manager,
1085                 self.reset_evt
1086                     .try_clone()
1087                     .map_err(DeviceManagerError::EventFd)?,
1088                 self.exit_evt
1089                     .try_clone()
1090                     .map_err(DeviceManagerError::EventFd)?,
1091             )?;
1092         }
1093 
1094         self.console = self.add_console_device(
1095             &legacy_interrupt_manager,
1096             &mut virtio_devices,
1097             serial_pty,
1098             console_pty,
1099         )?;
1100 
1101         // Reserve some IRQs for PCI devices in case they need to support INTx.
1102         self.reserve_legacy_interrupts_for_pci_devices()?;
1103 
1104         self.legacy_interrupt_manager = Some(legacy_interrupt_manager);
1105 
1106         virtio_devices.append(&mut self.make_virtio_devices()?);
1107 
1108         self.add_pci_devices(virtio_devices.clone())?;
1109 
1110         self.virtio_devices = virtio_devices;
1111 
1112         Ok(())
1113     }
1114 
1115     fn reserve_legacy_interrupts_for_pci_devices(&mut self) -> DeviceManagerResult<()> {
1116         // Reserve 8 IRQs which will be shared across all PCI devices.
1117         let num_irqs = 8;
1118         let mut irqs: Vec<u8> = Vec::new();
1119         for _ in 0..num_irqs {
1120             irqs.push(
1121                 self.address_manager
1122                     .allocator
1123                     .lock()
1124                     .unwrap()
1125                     .allocate_irq()
1126                     .ok_or(DeviceManagerError::AllocateIrq)? as u8,
1127             );
1128         }
1129 
1130         // There are 32 devices on the PCI bus, let's assign them an IRQ.
1131         for i in 0..32 {
1132             self.pci_irq_slots[i] = irqs[(i % num_irqs) as usize];
1133         }
1134 
1135         Ok(())
1136     }
1137 
1138     fn state(&self) -> DeviceManagerState {
1139         DeviceManagerState {
1140             device_tree: self.device_tree.lock().unwrap().clone(),
1141             device_id_cnt: self.device_id_cnt,
1142         }
1143     }
1144 
1145     fn set_state(&mut self, state: &DeviceManagerState) {
1146         self.device_tree = Arc::new(Mutex::new(state.device_tree.clone()));
1147         self.device_id_cnt = state.device_id_cnt;
1148     }
1149 
1150     #[cfg(target_arch = "aarch64")]
1151     /// Gets the information of the devices registered up to some point in time.
1152     pub fn get_device_info(&self) -> &HashMap<(DeviceType, String), MmioDeviceInfo> {
1153         &self.id_to_dev_info
1154     }
1155 
1156     #[allow(unused_variables)]
1157     fn add_pci_devices(
1158         &mut self,
1159         virtio_devices: Vec<(VirtioDeviceArc, bool, String)>,
1160     ) -> DeviceManagerResult<()> {
1161         let pci_root = PciRoot::new(None);
1162         let mut pci_bus = PciBus::new(
1163             pci_root,
1164             Arc::clone(&self.address_manager) as Arc<dyn DeviceRelocation>,
1165         );
1166 
1167         let iommu_id = String::from(IOMMU_DEVICE_NAME);
1168 
1169         let (iommu_device, iommu_mapping) = if self.config.lock().unwrap().iommu {
1170             let (device, mapping) =
1171                 virtio_devices::Iommu::new(iommu_id.clone(), self.seccomp_action.clone())
1172                     .map_err(DeviceManagerError::CreateVirtioIommu)?;
1173             let device = Arc::new(Mutex::new(device));
1174             self.iommu_device = Some(Arc::clone(&device));
1175 
1176             // Fill the device tree with a new node. In case of restore, we
1177             // know there is nothing to do, so we can simply override the
1178             // existing entry.
1179             self.device_tree
1180                 .lock()
1181                 .unwrap()
1182                 .insert(iommu_id.clone(), device_node!(iommu_id, device));
1183 
1184             (Some(device), Some(mapping))
1185         } else {
1186             (None, None)
1187         };
1188 
1189         let mut iommu_attached_devices = Vec::new();
1190 
1191         for (device, iommu_attached, id) in virtio_devices {
1192             let mapping: &Option<Arc<IommuMapping>> = if iommu_attached {
1193                 &iommu_mapping
1194             } else {
1195                 &None
1196             };
1197 
1198             let dev_id = self.add_virtio_pci_device(device, &mut pci_bus, mapping, id)?;
1199 
1200             if iommu_attached {
1201                 iommu_attached_devices.push(dev_id);
1202             }
1203         }
1204 
1205         let mut vfio_iommu_device_ids = self.add_vfio_devices(&mut pci_bus)?;
1206 
1207         iommu_attached_devices.append(&mut vfio_iommu_device_ids);
1208 
1209         if let Some(iommu_device) = iommu_device {
1210             let dev_id = self.add_virtio_pci_device(iommu_device, &mut pci_bus, &None, iommu_id)?;
1211             self.iommu_attached_devices = Some((dev_id, iommu_attached_devices));
1212         }
1213 
1214         let pci_bus = Arc::new(Mutex::new(pci_bus));
1215         let pci_config_io = Arc::new(Mutex::new(PciConfigIo::new(Arc::clone(&pci_bus))));
1216         self.bus_devices
1217             .push(Arc::clone(&pci_config_io) as Arc<Mutex<dyn BusDevice>>);
1218         #[cfg(target_arch = "x86_64")]
1219         self.address_manager
1220             .io_bus
1221             .insert(pci_config_io, 0xcf8, 0x8)
1222             .map_err(DeviceManagerError::BusError)?;
1223         let pci_config_mmio = Arc::new(Mutex::new(PciConfigMmio::new(Arc::clone(&pci_bus))));
1224         self.bus_devices
1225             .push(Arc::clone(&pci_config_mmio) as Arc<Mutex<dyn BusDevice>>);
1226         self.address_manager
1227             .mmio_bus
1228             .insert(
1229                 pci_config_mmio,
1230                 arch::layout::PCI_MMCONFIG_START.0,
1231                 arch::layout::PCI_MMCONFIG_SIZE,
1232             )
1233             .map_err(DeviceManagerError::BusError)?;
1234 
1235         self.pci_bus = Some(pci_bus);
1236 
1237         Ok(())
1238     }
1239 
1240     #[cfg(target_arch = "aarch64")]
1241     fn add_interrupt_controller(
1242         &mut self,
1243     ) -> DeviceManagerResult<Arc<Mutex<dyn InterruptController>>> {
1244         let interrupt_controller: Arc<Mutex<gic::Gic>> = Arc::new(Mutex::new(
1245             gic::Gic::new(
1246                 self.config.lock().unwrap().cpus.boot_vcpus,
1247                 Arc::clone(&self.msi_interrupt_manager),
1248             )
1249             .map_err(DeviceManagerError::CreateInterruptController)?,
1250         ));
1251 
1252         self.interrupt_controller = Some(interrupt_controller.clone());
1253 
1254         // Unlike x86_64, the "interrupt_controller" here for AArch64 is only
1255         // a `Gic` object that implements the `InterruptController` to provide
1256         // interrupt delivery service. This is not the real GIC device so that
1257         // we do not need to insert it to the device tree.
1258 
1259         Ok(interrupt_controller)
1260     }
1261 
1262     #[cfg(target_arch = "aarch64")]
1263     pub fn get_interrupt_controller(&mut self) -> Option<&Arc<Mutex<gic::Gic>>> {
1264         self.interrupt_controller.as_ref()
1265     }
1266 
1267     #[cfg(target_arch = "x86_64")]
1268     fn add_interrupt_controller(
1269         &mut self,
1270     ) -> DeviceManagerResult<Arc<Mutex<dyn InterruptController>>> {
1271         let id = String::from(IOAPIC_DEVICE_NAME);
1272 
1273         // Create IOAPIC
1274         let interrupt_controller = Arc::new(Mutex::new(
1275             ioapic::Ioapic::new(
1276                 id.clone(),
1277                 APIC_START,
1278                 Arc::clone(&self.msi_interrupt_manager),
1279             )
1280             .map_err(DeviceManagerError::CreateInterruptController)?,
1281         ));
1282 
1283         self.interrupt_controller = Some(interrupt_controller.clone());
1284 
1285         self.address_manager
1286             .mmio_bus
1287             .insert(interrupt_controller.clone(), IOAPIC_START.0, IOAPIC_SIZE)
1288             .map_err(DeviceManagerError::BusError)?;
1289 
1290         self.bus_devices
1291             .push(Arc::clone(&interrupt_controller) as Arc<Mutex<dyn BusDevice>>);
1292 
1293         // Fill the device tree with a new node. In case of restore, we
1294         // know there is nothing to do, so we can simply override the
1295         // existing entry.
1296         self.device_tree
1297             .lock()
1298             .unwrap()
1299             .insert(id.clone(), device_node!(id, interrupt_controller));
1300 
1301         Ok(interrupt_controller)
1302     }
1303 
1304     #[cfg(feature = "acpi")]
1305     fn add_acpi_devices(
1306         &mut self,
1307         interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>,
1308         reset_evt: EventFd,
1309         exit_evt: EventFd,
1310     ) -> DeviceManagerResult<Option<Arc<Mutex<devices::AcpiGedDevice>>>> {
1311         let shutdown_device = Arc::new(Mutex::new(devices::AcpiShutdownDevice::new(
1312             exit_evt, reset_evt,
1313         )));
1314 
1315         self.bus_devices
1316             .push(Arc::clone(&shutdown_device) as Arc<Mutex<dyn BusDevice>>);
1317 
1318         #[cfg(target_arch = "x86_64")]
1319         {
1320             self.address_manager
1321                 .allocator
1322                 .lock()
1323                 .unwrap()
1324                 .allocate_io_addresses(Some(GuestAddress(0x3c0)), 0x8, None)
1325                 .ok_or(DeviceManagerError::AllocateIoPort)?;
1326 
1327             self.address_manager
1328                 .io_bus
1329                 .insert(shutdown_device, 0x3c0, 0x4)
1330                 .map_err(DeviceManagerError::BusError)?;
1331         }
1332 
1333         let ged_irq = self
1334             .address_manager
1335             .allocator
1336             .lock()
1337             .unwrap()
1338             .allocate_irq()
1339             .unwrap();
1340         let interrupt_group = interrupt_manager
1341             .create_group(LegacyIrqGroupConfig {
1342                 irq: ged_irq as InterruptIndex,
1343             })
1344             .map_err(DeviceManagerError::CreateInterruptGroup)?;
1345         let ged_address = self
1346             .address_manager
1347             .allocator
1348             .lock()
1349             .unwrap()
1350             .allocate_mmio_addresses(None, devices::acpi::GED_DEVICE_ACPI_SIZE as u64, None)
1351             .ok_or(DeviceManagerError::AllocateMmioAddress)?;
1352         let ged_device = Arc::new(Mutex::new(devices::AcpiGedDevice::new(
1353             interrupt_group,
1354             ged_irq,
1355             ged_address,
1356         )));
1357         self.address_manager
1358             .mmio_bus
1359             .insert(
1360                 ged_device.clone(),
1361                 ged_address.0,
1362                 devices::acpi::GED_DEVICE_ACPI_SIZE as u64,
1363             )
1364             .map_err(DeviceManagerError::BusError)?;
1365         self.bus_devices
1366             .push(Arc::clone(&ged_device) as Arc<Mutex<dyn BusDevice>>);
1367 
1368         let pm_timer_device = Arc::new(Mutex::new(devices::AcpiPmTimerDevice::new()));
1369 
1370         self.bus_devices
1371             .push(Arc::clone(&pm_timer_device) as Arc<Mutex<dyn BusDevice>>);
1372 
1373         #[cfg(target_arch = "x86_64")]
1374         {
1375             self.address_manager
1376                 .allocator
1377                 .lock()
1378                 .unwrap()
1379                 .allocate_io_addresses(Some(GuestAddress(0xb008)), 0x4, None)
1380                 .ok_or(DeviceManagerError::AllocateIoPort)?;
1381 
1382             self.address_manager
1383                 .io_bus
1384                 .insert(pm_timer_device, 0xb008, 0x4)
1385                 .map_err(DeviceManagerError::BusError)?;
1386         }
1387 
1388         Ok(Some(ged_device))
1389     }
1390 
1391     #[cfg(target_arch = "x86_64")]
1392     fn add_legacy_devices(&mut self, reset_evt: EventFd) -> DeviceManagerResult<()> {
1393         // Add a shutdown device (i8042)
1394         let i8042 = Arc::new(Mutex::new(devices::legacy::I8042Device::new(reset_evt)));
1395 
1396         self.bus_devices
1397             .push(Arc::clone(&i8042) as Arc<Mutex<dyn BusDevice>>);
1398 
1399         self.address_manager
1400             .io_bus
1401             .insert(i8042, 0x61, 0x4)
1402             .map_err(DeviceManagerError::BusError)?;
1403         #[cfg(feature = "cmos")]
1404         {
1405             // Add a CMOS emulated device
1406             let mem_size = self
1407                 .memory_manager
1408                 .lock()
1409                 .unwrap()
1410                 .guest_memory()
1411                 .memory()
1412                 .last_addr()
1413                 .0
1414                 + 1;
1415             let mem_below_4g = std::cmp::min(arch::layout::MEM_32BIT_RESERVED_START.0, mem_size);
1416             let mem_above_4g = mem_size.saturating_sub(arch::layout::RAM_64BIT_START.0);
1417 
1418             let cmos = Arc::new(Mutex::new(devices::legacy::Cmos::new(
1419                 mem_below_4g,
1420                 mem_above_4g,
1421             )));
1422 
1423             self.bus_devices
1424                 .push(Arc::clone(&cmos) as Arc<Mutex<dyn BusDevice>>);
1425 
1426             self.address_manager
1427                 .io_bus
1428                 .insert(cmos, 0x70, 0x2)
1429                 .map_err(DeviceManagerError::BusError)?;
1430         }
1431         #[cfg(feature = "fwdebug")]
1432         {
1433             let fwdebug = Arc::new(Mutex::new(devices::legacy::FwDebugDevice::new()));
1434 
1435             self.bus_devices
1436                 .push(Arc::clone(&fwdebug) as Arc<Mutex<dyn BusDevice>>);
1437 
1438             self.address_manager
1439                 .io_bus
1440                 .insert(fwdebug, 0x402, 0x1)
1441                 .map_err(DeviceManagerError::BusError)?;
1442         }
1443 
1444         Ok(())
1445     }
1446 
1447     #[cfg(target_arch = "aarch64")]
1448     fn add_legacy_devices(
1449         &mut self,
1450         interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>,
1451     ) -> DeviceManagerResult<()> {
1452         // Add a RTC device
1453         let rtc_irq = self
1454             .address_manager
1455             .allocator
1456             .lock()
1457             .unwrap()
1458             .allocate_irq()
1459             .unwrap();
1460 
1461         let interrupt_group = interrupt_manager
1462             .create_group(LegacyIrqGroupConfig {
1463                 irq: rtc_irq as InterruptIndex,
1464             })
1465             .map_err(DeviceManagerError::CreateInterruptGroup)?;
1466 
1467         let rtc_device = Arc::new(Mutex::new(devices::legacy::Rtc::new(interrupt_group)));
1468 
1469         self.bus_devices
1470             .push(Arc::clone(&rtc_device) as Arc<Mutex<dyn BusDevice>>);
1471 
1472         let addr = GuestAddress(arch::layout::LEGACY_RTC_MAPPED_IO_START);
1473 
1474         self.address_manager
1475             .mmio_bus
1476             .insert(rtc_device, addr.0, MMIO_LEN)
1477             .map_err(DeviceManagerError::BusError)?;
1478 
1479         self.id_to_dev_info.insert(
1480             (DeviceType::Rtc, "rtc".to_string()),
1481             MmioDeviceInfo {
1482                 addr: addr.0,
1483                 irq: rtc_irq,
1484             },
1485         );
1486 
1487         // Add a GPIO device
1488         let id = String::from(GPIO_DEVICE_NAME_PREFIX);
1489         let gpio_irq = self
1490             .address_manager
1491             .allocator
1492             .lock()
1493             .unwrap()
1494             .allocate_irq()
1495             .unwrap();
1496 
1497         let interrupt_group = interrupt_manager
1498             .create_group(LegacyIrqGroupConfig {
1499                 irq: gpio_irq as InterruptIndex,
1500             })
1501             .map_err(DeviceManagerError::CreateInterruptGroup)?;
1502 
1503         let gpio_device = Arc::new(Mutex::new(devices::legacy::Gpio::new(
1504             id.clone(),
1505             interrupt_group,
1506         )));
1507 
1508         self.bus_devices
1509             .push(Arc::clone(&gpio_device) as Arc<Mutex<dyn BusDevice>>);
1510 
1511         let addr = GuestAddress(arch::layout::LEGACY_GPIO_MAPPED_IO_START);
1512 
1513         self.address_manager
1514             .mmio_bus
1515             .insert(gpio_device.clone(), addr.0, MMIO_LEN)
1516             .map_err(DeviceManagerError::BusError)?;
1517 
1518         self.gpio_device = Some(gpio_device.clone());
1519 
1520         self.id_to_dev_info.insert(
1521             (DeviceType::Gpio, "gpio".to_string()),
1522             MmioDeviceInfo {
1523                 addr: addr.0,
1524                 irq: gpio_irq,
1525             },
1526         );
1527 
1528         self.device_tree
1529             .lock()
1530             .unwrap()
1531             .insert(id.clone(), device_node!(id, gpio_device));
1532 
1533         Ok(())
1534     }
1535 
1536     #[cfg(target_arch = "x86_64")]
1537     fn add_serial_device(
1538         &mut self,
1539         interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>,
1540         serial_writer: Option<Box<dyn io::Write + Send>>,
1541     ) -> DeviceManagerResult<Arc<Mutex<Serial>>> {
1542         // Serial is tied to IRQ #4
1543         let serial_irq = 4;
1544 
1545         let id = String::from(SERIAL_DEVICE_NAME_PREFIX);
1546 
1547         let interrupt_group = interrupt_manager
1548             .create_group(LegacyIrqGroupConfig {
1549                 irq: serial_irq as InterruptIndex,
1550             })
1551             .map_err(DeviceManagerError::CreateInterruptGroup)?;
1552 
1553         let serial = Arc::new(Mutex::new(Serial::new(
1554             id.clone(),
1555             interrupt_group,
1556             serial_writer,
1557         )));
1558 
1559         self.bus_devices
1560             .push(Arc::clone(&serial) as Arc<Mutex<dyn BusDevice>>);
1561 
1562         self.address_manager
1563             .allocator
1564             .lock()
1565             .unwrap()
1566             .allocate_io_addresses(Some(GuestAddress(0x3f8)), 0x8, None)
1567             .ok_or(DeviceManagerError::AllocateIoPort)?;
1568 
1569         self.address_manager
1570             .io_bus
1571             .insert(serial.clone(), 0x3f8, 0x8)
1572             .map_err(DeviceManagerError::BusError)?;
1573 
1574         // Fill the device tree with a new node. In case of restore, we
1575         // know there is nothing to do, so we can simply override the
1576         // existing entry.
1577         self.device_tree
1578             .lock()
1579             .unwrap()
1580             .insert(id.clone(), device_node!(id, serial));
1581 
1582         Ok(serial)
1583     }
1584 
1585     #[cfg(target_arch = "aarch64")]
1586     fn add_serial_device(
1587         &mut self,
1588         interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>,
1589         serial_writer: Option<Box<dyn io::Write + Send>>,
1590     ) -> DeviceManagerResult<Arc<Mutex<Pl011>>> {
1591         let id = String::from(SERIAL_DEVICE_NAME_PREFIX);
1592 
1593         let serial_irq = self
1594             .address_manager
1595             .allocator
1596             .lock()
1597             .unwrap()
1598             .allocate_irq()
1599             .unwrap();
1600 
1601         let interrupt_group = interrupt_manager
1602             .create_group(LegacyIrqGroupConfig {
1603                 irq: serial_irq as InterruptIndex,
1604             })
1605             .map_err(DeviceManagerError::CreateInterruptGroup)?;
1606 
1607         let serial = Arc::new(Mutex::new(devices::legacy::Pl011::new(
1608             id.clone(),
1609             interrupt_group,
1610             serial_writer,
1611         )));
1612 
1613         self.bus_devices
1614             .push(Arc::clone(&serial) as Arc<Mutex<dyn BusDevice>>);
1615 
1616         let addr = GuestAddress(arch::layout::LEGACY_SERIAL_MAPPED_IO_START);
1617 
1618         self.address_manager
1619             .mmio_bus
1620             .insert(serial.clone(), addr.0, MMIO_LEN)
1621             .map_err(DeviceManagerError::BusError)?;
1622 
1623         self.id_to_dev_info.insert(
1624             (DeviceType::Serial, DeviceType::Serial.to_string()),
1625             MmioDeviceInfo {
1626                 addr: addr.0,
1627                 irq: serial_irq,
1628             },
1629         );
1630 
1631         self.cmdline_additions
1632             .push(format!("earlycon=pl011,mmio,0x{:08x}", addr.0));
1633 
1634         // Fill the device tree with a new node. In case of restore, we
1635         // know there is nothing to do, so we can simply override the
1636         // existing entry.
1637         self.device_tree
1638             .lock()
1639             .unwrap()
1640             .insert(id.clone(), device_node!(id, serial));
1641 
1642         Ok(serial)
1643     }
1644 
1645     fn modify_mode<F: FnOnce(&mut termios)>(
1646         &self,
1647         fd: RawFd,
1648         f: F,
1649     ) -> vmm_sys_util::errno::Result<()> {
1650         // Safe because we check the return value of isatty.
1651         if unsafe { isatty(fd) } != 1 {
1652             return Ok(());
1653         }
1654 
1655         // The following pair are safe because termios gets totally overwritten by tcgetattr and we
1656         // check the return result.
1657         let mut termios: termios = unsafe { zeroed() };
1658         let ret = unsafe { tcgetattr(fd, &mut termios as *mut _) };
1659         if ret < 0 {
1660             return vmm_sys_util::errno::errno_result();
1661         }
1662         f(&mut termios);
1663         // Safe because the syscall will only read the extent of termios and we check the return result.
1664         let ret = unsafe { tcsetattr(fd, TCSANOW, &termios as *const _) };
1665         if ret < 0 {
1666             return vmm_sys_util::errno::errno_result();
1667         }
1668 
1669         Ok(())
1670     }
1671 
1672     fn set_raw_mode(&self, f: &mut File) -> vmm_sys_util::errno::Result<()> {
1673         self.modify_mode(f.as_raw_fd(), |t| t.c_lflag &= !(ICANON | ECHO | ISIG))
1674     }
1675 
1676     fn add_console_device(
1677         &mut self,
1678         interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>,
1679         virtio_devices: &mut Vec<(VirtioDeviceArc, bool, String)>,
1680         serial_pty: Option<PtyPair>,
1681         console_pty: Option<PtyPair>,
1682     ) -> DeviceManagerResult<Arc<Console>> {
1683         let serial_config = self.config.lock().unwrap().serial.clone();
1684         let serial_writer: Option<Box<dyn io::Write + Send>> = match serial_config.mode {
1685             ConsoleOutputMode::File => Some(Box::new(
1686                 File::create(serial_config.file.as_ref().unwrap())
1687                     .map_err(DeviceManagerError::SerialOutputFileOpen)?,
1688             )),
1689             ConsoleOutputMode::Pty => {
1690                 if let Some(pty) = serial_pty {
1691                     self.config.lock().unwrap().serial.file = Some(pty.path.clone());
1692                     let writer = pty.main.try_clone().unwrap();
1693                     self.serial_pty = Some(Arc::new(Mutex::new(pty)));
1694                     Some(Box::new(writer))
1695                 } else {
1696                     let (main, mut sub, path) =
1697                         create_pty().map_err(DeviceManagerError::SerialPtyOpen)?;
1698                     self.set_raw_mode(&mut sub)
1699                         .map_err(DeviceManagerError::SetPtyRaw)?;
1700                     self.config.lock().unwrap().serial.file = Some(path.clone());
1701                     let writer = main.try_clone().unwrap();
1702                     self.serial_pty = Some(Arc::new(Mutex::new(PtyPair { main, sub, path })));
1703                     Some(Box::new(writer))
1704                 }
1705             }
1706             ConsoleOutputMode::Tty => Some(Box::new(stdout())),
1707             ConsoleOutputMode::Off | ConsoleOutputMode::Null => None,
1708         };
1709         let serial = if serial_config.mode != ConsoleOutputMode::Off {
1710             Some(self.add_serial_device(interrupt_manager, serial_writer)?)
1711         } else {
1712             None
1713         };
1714 
1715         // Create serial and virtio-console
1716         let console_config = self.config.lock().unwrap().console.clone();
1717         let console_writer: Option<Box<dyn io::Write + Send + Sync>> = match console_config.mode {
1718             ConsoleOutputMode::File => Some(Box::new(
1719                 File::create(console_config.file.as_ref().unwrap())
1720                     .map_err(DeviceManagerError::ConsoleOutputFileOpen)?,
1721             )),
1722             ConsoleOutputMode::Pty => {
1723                 if let Some(pty) = console_pty {
1724                     self.config.lock().unwrap().console.file = Some(pty.path.clone());
1725                     let writer = pty.main.try_clone().unwrap();
1726                     self.console_pty = Some(Arc::new(Mutex::new(pty)));
1727                     Some(Box::new(writer))
1728                 } else {
1729                     let (main, mut sub, path) =
1730                         create_pty().map_err(DeviceManagerError::ConsolePtyOpen)?;
1731                     self.set_raw_mode(&mut sub)
1732                         .map_err(DeviceManagerError::SetPtyRaw)?;
1733                     self.config.lock().unwrap().console.file = Some(path.clone());
1734                     let writer = main.try_clone().unwrap();
1735                     self.console_pty = Some(Arc::new(Mutex::new(PtyPair { main, sub, path })));
1736                     Some(Box::new(writer))
1737                 }
1738             }
1739             ConsoleOutputMode::Tty => Some(Box::new(stdout())),
1740             ConsoleOutputMode::Null => Some(Box::new(sink())),
1741             ConsoleOutputMode::Off => None,
1742         };
1743         let (col, row) = get_win_size();
1744         let virtio_console_input = if let Some(writer) = console_writer {
1745             let id = String::from(CONSOLE_DEVICE_NAME);
1746 
1747             let (virtio_console_device, virtio_console_input) = virtio_devices::Console::new(
1748                 id.clone(),
1749                 writer,
1750                 col,
1751                 row,
1752                 self.force_iommu | console_config.iommu,
1753                 self.seccomp_action.clone(),
1754             )
1755             .map_err(DeviceManagerError::CreateVirtioConsole)?;
1756             let virtio_console_device = Arc::new(Mutex::new(virtio_console_device));
1757             virtio_devices.push((
1758                 Arc::clone(&virtio_console_device) as VirtioDeviceArc,
1759                 console_config.iommu,
1760                 id.clone(),
1761             ));
1762 
1763             // Fill the device tree with a new node. In case of restore, we
1764             // know there is nothing to do, so we can simply override the
1765             // existing entry.
1766             self.device_tree
1767                 .lock()
1768                 .unwrap()
1769                 .insert(id.clone(), device_node!(id, virtio_console_device));
1770 
1771             Some(virtio_console_input)
1772         } else {
1773             None
1774         };
1775 
1776         let input = if serial_config.mode.input_enabled() {
1777             Some(ConsoleInput::Serial)
1778         } else if console_config.mode.input_enabled() {
1779             Some(ConsoleInput::VirtioConsole)
1780         } else {
1781             None
1782         };
1783 
1784         Ok(Arc::new(Console {
1785             serial,
1786             virtio_console_input,
1787             input,
1788         }))
1789     }
1790 
1791     fn make_virtio_devices(&mut self) -> DeviceManagerResult<Vec<(VirtioDeviceArc, bool, String)>> {
1792         let mut devices: Vec<(VirtioDeviceArc, bool, String)> = Vec::new();
1793 
1794         // Create "standard" virtio devices (net/block/rng)
1795         devices.append(&mut self.make_virtio_block_devices()?);
1796         devices.append(&mut self.make_virtio_net_devices()?);
1797         devices.append(&mut self.make_virtio_rng_devices()?);
1798 
1799         // Add virtio-fs if required
1800         devices.append(&mut self.make_virtio_fs_devices()?);
1801 
1802         // Add virtio-pmem if required
1803         devices.append(&mut self.make_virtio_pmem_devices()?);
1804 
1805         // Add virtio-vsock if required
1806         devices.append(&mut self.make_virtio_vsock_devices()?);
1807 
1808         devices.append(&mut self.make_virtio_mem_devices()?);
1809 
1810         // Add virtio-balloon if required
1811         devices.append(&mut self.make_virtio_balloon_devices()?);
1812 
1813         // Add virtio-watchdog device
1814         devices.append(&mut self.make_virtio_watchdog_devices()?);
1815 
1816         Ok(devices)
1817     }
1818 
1819     fn make_virtio_block_device(
1820         &mut self,
1821         disk_cfg: &mut DiskConfig,
1822     ) -> DeviceManagerResult<(VirtioDeviceArc, bool, String)> {
1823         let id = if let Some(id) = &disk_cfg.id {
1824             id.clone()
1825         } else {
1826             let id = self.next_device_name(DISK_DEVICE_NAME_PREFIX)?;
1827             disk_cfg.id = Some(id.clone());
1828             id
1829         };
1830 
1831         info!("Creating virtio-block device: {:?}", disk_cfg);
1832 
1833         if disk_cfg.vhost_user {
1834             let socket = disk_cfg.vhost_socket.as_ref().unwrap().clone();
1835             let vu_cfg = VhostUserConfig {
1836                 socket,
1837                 num_queues: disk_cfg.num_queues,
1838                 queue_size: disk_cfg.queue_size,
1839             };
1840             let vhost_user_block_device = Arc::new(Mutex::new(
1841                 match virtio_devices::vhost_user::Blk::new(id.clone(), vu_cfg) {
1842                     Ok(vub_device) => vub_device,
1843                     Err(e) => {
1844                         return Err(DeviceManagerError::CreateVhostUserBlk(e));
1845                     }
1846                 },
1847             ));
1848 
1849             // Fill the device tree with a new node. In case of restore, we
1850             // know there is nothing to do, so we can simply override the
1851             // existing entry.
1852             self.device_tree
1853                 .lock()
1854                 .unwrap()
1855                 .insert(id.clone(), device_node!(id, vhost_user_block_device));
1856 
1857             Ok((
1858                 Arc::clone(&vhost_user_block_device) as VirtioDeviceArc,
1859                 false,
1860                 id,
1861             ))
1862         } else {
1863             let mut options = OpenOptions::new();
1864             options.read(true);
1865             options.write(!disk_cfg.readonly);
1866             if disk_cfg.direct {
1867                 options.custom_flags(libc::O_DIRECT);
1868             }
1869             // Open block device path
1870             let mut file: File = options
1871                 .open(
1872                     disk_cfg
1873                         .path
1874                         .as_ref()
1875                         .ok_or(DeviceManagerError::NoDiskPath)?
1876                         .clone(),
1877                 )
1878                 .map_err(DeviceManagerError::Disk)?;
1879             let image_type =
1880                 detect_image_type(&mut file).map_err(DeviceManagerError::DetectImageType)?;
1881 
1882             let image = match image_type {
1883                 ImageType::FixedVhd => {
1884                     // Use asynchronous backend relying on io_uring if the
1885                     // syscalls are supported.
1886                     if block_io_uring_is_supported() && !disk_cfg.disable_io_uring {
1887                         info!("Using asynchronous fixed VHD disk file (io_uring)");
1888                         Box::new(
1889                             FixedVhdDiskAsync::new(file)
1890                                 .map_err(DeviceManagerError::CreateFixedVhdDiskAsync)?,
1891                         ) as Box<dyn DiskFile>
1892                     } else {
1893                         info!("Using synchronous fixed VHD disk file");
1894                         Box::new(
1895                             FixedVhdDiskSync::new(file)
1896                                 .map_err(DeviceManagerError::CreateFixedVhdDiskSync)?,
1897                         ) as Box<dyn DiskFile>
1898                     }
1899                 }
1900                 ImageType::Raw => {
1901                     // Use asynchronous backend relying on io_uring if the
1902                     // syscalls are supported.
1903                     if block_io_uring_is_supported() && !disk_cfg.disable_io_uring {
1904                         info!("Using asynchronous RAW disk file (io_uring)");
1905                         Box::new(RawFileDisk::new(file)) as Box<dyn DiskFile>
1906                     } else {
1907                         info!("Using synchronous RAW disk file");
1908                         Box::new(RawFileDiskSync::new(file)) as Box<dyn DiskFile>
1909                     }
1910                 }
1911                 ImageType::Qcow2 => {
1912                     info!("Using synchronous QCOW disk file");
1913                     Box::new(QcowDiskSync::new(file, disk_cfg.direct)) as Box<dyn DiskFile>
1914                 }
1915             };
1916 
1917             let dev = Arc::new(Mutex::new(
1918                 virtio_devices::Block::new(
1919                     id.clone(),
1920                     image,
1921                     disk_cfg
1922                         .path
1923                         .as_ref()
1924                         .ok_or(DeviceManagerError::NoDiskPath)?
1925                         .clone(),
1926                     disk_cfg.readonly,
1927                     self.force_iommu | disk_cfg.iommu,
1928                     disk_cfg.num_queues,
1929                     disk_cfg.queue_size,
1930                     self.seccomp_action.clone(),
1931                     disk_cfg.rate_limiter_config,
1932                 )
1933                 .map_err(DeviceManagerError::CreateVirtioBlock)?,
1934             ));
1935 
1936             let virtio_device = Arc::clone(&dev) as VirtioDeviceArc;
1937             let migratable_device = dev as Arc<Mutex<dyn Migratable>>;
1938 
1939             // Fill the device tree with a new node. In case of restore, we
1940             // know there is nothing to do, so we can simply override the
1941             // existing entry.
1942             self.device_tree
1943                 .lock()
1944                 .unwrap()
1945                 .insert(id.clone(), device_node!(id, migratable_device));
1946 
1947             Ok((virtio_device, disk_cfg.iommu, id))
1948         }
1949     }
1950 
1951     fn make_virtio_block_devices(
1952         &mut self,
1953     ) -> DeviceManagerResult<Vec<(VirtioDeviceArc, bool, String)>> {
1954         let mut devices = Vec::new();
1955 
1956         let mut block_devices = self.config.lock().unwrap().disks.clone();
1957         if let Some(disk_list_cfg) = &mut block_devices {
1958             for disk_cfg in disk_list_cfg.iter_mut() {
1959                 devices.push(self.make_virtio_block_device(disk_cfg)?);
1960             }
1961         }
1962         self.config.lock().unwrap().disks = block_devices;
1963 
1964         Ok(devices)
1965     }
1966 
1967     fn make_virtio_net_device(
1968         &mut self,
1969         net_cfg: &mut NetConfig,
1970     ) -> DeviceManagerResult<(VirtioDeviceArc, bool, String)> {
1971         let id = if let Some(id) = &net_cfg.id {
1972             id.clone()
1973         } else {
1974             let id = self.next_device_name(NET_DEVICE_NAME_PREFIX)?;
1975             net_cfg.id = Some(id.clone());
1976             id
1977         };
1978         info!("Creating virtio-net device: {:?}", net_cfg);
1979 
1980         if net_cfg.vhost_user {
1981             let socket = net_cfg.vhost_socket.as_ref().unwrap().clone();
1982             let vu_cfg = VhostUserConfig {
1983                 socket,
1984                 num_queues: net_cfg.num_queues,
1985                 queue_size: net_cfg.queue_size,
1986             };
1987             let server = match net_cfg.vhost_mode {
1988                 VhostMode::Client => false,
1989                 VhostMode::Server => true,
1990             };
1991             let vhost_user_net_device = Arc::new(Mutex::new(
1992                 match virtio_devices::vhost_user::Net::new(
1993                     id.clone(),
1994                     net_cfg.mac,
1995                     vu_cfg,
1996                     server,
1997                     self.seccomp_action.clone(),
1998                 ) {
1999                     Ok(vun_device) => vun_device,
2000                     Err(e) => {
2001                         return Err(DeviceManagerError::CreateVhostUserNet(e));
2002                     }
2003                 },
2004             ));
2005 
2006             // Fill the device tree with a new node. In case of restore, we
2007             // know there is nothing to do, so we can simply override the
2008             // existing entry.
2009             self.device_tree
2010                 .lock()
2011                 .unwrap()
2012                 .insert(id.clone(), device_node!(id, vhost_user_net_device));
2013 
2014             Ok((
2015                 Arc::clone(&vhost_user_net_device) as VirtioDeviceArc,
2016                 net_cfg.iommu,
2017                 id,
2018             ))
2019         } else {
2020             let virtio_net_device = if let Some(ref tap_if_name) = net_cfg.tap {
2021                 Arc::new(Mutex::new(
2022                     virtio_devices::Net::new(
2023                         id.clone(),
2024                         Some(tap_if_name),
2025                         None,
2026                         None,
2027                         Some(net_cfg.mac),
2028                         &mut net_cfg.host_mac,
2029                         self.force_iommu | net_cfg.iommu,
2030                         net_cfg.num_queues,
2031                         net_cfg.queue_size,
2032                         self.seccomp_action.clone(),
2033                         net_cfg.rate_limiter_config,
2034                     )
2035                     .map_err(DeviceManagerError::CreateVirtioNet)?,
2036                 ))
2037             } else if let Some(fds) = &net_cfg.fds {
2038                 Arc::new(Mutex::new(
2039                     virtio_devices::Net::from_tap_fds(
2040                         id.clone(),
2041                         fds,
2042                         Some(net_cfg.mac),
2043                         self.force_iommu | net_cfg.iommu,
2044                         net_cfg.queue_size,
2045                         self.seccomp_action.clone(),
2046                         net_cfg.rate_limiter_config,
2047                     )
2048                     .map_err(DeviceManagerError::CreateVirtioNet)?,
2049                 ))
2050             } else {
2051                 Arc::new(Mutex::new(
2052                     virtio_devices::Net::new(
2053                         id.clone(),
2054                         None,
2055                         Some(net_cfg.ip),
2056                         Some(net_cfg.mask),
2057                         Some(net_cfg.mac),
2058                         &mut net_cfg.host_mac,
2059                         self.force_iommu | net_cfg.iommu,
2060                         net_cfg.num_queues,
2061                         net_cfg.queue_size,
2062                         self.seccomp_action.clone(),
2063                         net_cfg.rate_limiter_config,
2064                     )
2065                     .map_err(DeviceManagerError::CreateVirtioNet)?,
2066                 ))
2067             };
2068 
2069             // Fill the device tree with a new node. In case of restore, we
2070             // know there is nothing to do, so we can simply override the
2071             // existing entry.
2072             self.device_tree
2073                 .lock()
2074                 .unwrap()
2075                 .insert(id.clone(), device_node!(id, virtio_net_device));
2076 
2077             Ok((
2078                 Arc::clone(&virtio_net_device) as VirtioDeviceArc,
2079                 net_cfg.iommu,
2080                 id,
2081             ))
2082         }
2083     }
2084 
2085     /// Add virto-net and vhost-user-net devices
2086     fn make_virtio_net_devices(
2087         &mut self,
2088     ) -> DeviceManagerResult<Vec<(VirtioDeviceArc, bool, String)>> {
2089         let mut devices = Vec::new();
2090         let mut net_devices = self.config.lock().unwrap().net.clone();
2091         if let Some(net_list_cfg) = &mut net_devices {
2092             for net_cfg in net_list_cfg.iter_mut() {
2093                 devices.push(self.make_virtio_net_device(net_cfg)?);
2094             }
2095         }
2096         self.config.lock().unwrap().net = net_devices;
2097 
2098         Ok(devices)
2099     }
2100 
2101     fn make_virtio_rng_devices(
2102         &mut self,
2103     ) -> DeviceManagerResult<Vec<(VirtioDeviceArc, bool, String)>> {
2104         let mut devices = Vec::new();
2105 
2106         // Add virtio-rng if required
2107         let rng_config = self.config.lock().unwrap().rng.clone();
2108         if let Some(rng_path) = rng_config.src.to_str() {
2109             info!("Creating virtio-rng device: {:?}", rng_config);
2110             let id = String::from(RNG_DEVICE_NAME);
2111 
2112             let virtio_rng_device = Arc::new(Mutex::new(
2113                 virtio_devices::Rng::new(
2114                     id.clone(),
2115                     rng_path,
2116                     self.force_iommu | rng_config.iommu,
2117                     self.seccomp_action.clone(),
2118                 )
2119                 .map_err(DeviceManagerError::CreateVirtioRng)?,
2120             ));
2121             devices.push((
2122                 Arc::clone(&virtio_rng_device) as VirtioDeviceArc,
2123                 rng_config.iommu,
2124                 id.clone(),
2125             ));
2126 
2127             // Fill the device tree with a new node. In case of restore, we
2128             // know there is nothing to do, so we can simply override the
2129             // existing entry.
2130             self.device_tree
2131                 .lock()
2132                 .unwrap()
2133                 .insert(id.clone(), device_node!(id, virtio_rng_device));
2134         }
2135 
2136         Ok(devices)
2137     }
2138 
2139     fn make_virtio_fs_device(
2140         &mut self,
2141         fs_cfg: &mut FsConfig,
2142     ) -> DeviceManagerResult<(VirtioDeviceArc, bool, String)> {
2143         let id = if let Some(id) = &fs_cfg.id {
2144             id.clone()
2145         } else {
2146             let id = self.next_device_name(FS_DEVICE_NAME_PREFIX)?;
2147             fs_cfg.id = Some(id.clone());
2148             id
2149         };
2150 
2151         info!("Creating virtio-fs device: {:?}", fs_cfg);
2152 
2153         let mut node = device_node!(id);
2154 
2155         // Look for the id in the device tree. If it can be found, that means
2156         // the device is being restored, otherwise it's created from scratch.
2157         let cache_range = if let Some(node) = self.device_tree.lock().unwrap().get(&id) {
2158             debug!("Restoring virtio-fs {} resources", id);
2159 
2160             let mut cache_range: Option<(u64, u64)> = None;
2161             for resource in node.resources.iter() {
2162                 match resource {
2163                     Resource::MmioAddressRange { base, size } => {
2164                         if cache_range.is_some() {
2165                             return Err(DeviceManagerError::ResourceAlreadyExists);
2166                         }
2167 
2168                         cache_range = Some((*base, *size));
2169                     }
2170                     _ => {
2171                         error!("Unexpected resource {:?} for {}", resource, id);
2172                     }
2173                 }
2174             }
2175 
2176             cache_range
2177         } else {
2178             None
2179         };
2180 
2181         if let Some(fs_socket) = fs_cfg.socket.to_str() {
2182             let cache = if fs_cfg.dax {
2183                 let (cache_base, cache_size) = if let Some((base, size)) = cache_range {
2184                     // The memory needs to be 2MiB aligned in order to support
2185                     // hugepages.
2186                     self.address_manager
2187                         .allocator
2188                         .lock()
2189                         .unwrap()
2190                         .allocate_mmio_addresses(
2191                             Some(GuestAddress(base)),
2192                             size as GuestUsize,
2193                             Some(0x0020_0000),
2194                         )
2195                         .ok_or(DeviceManagerError::FsRangeAllocation)?;
2196 
2197                     (base, size)
2198                 } else {
2199                     let size = fs_cfg.cache_size;
2200                     // The memory needs to be 2MiB aligned in order to support
2201                     // hugepages.
2202                     let base = self
2203                         .address_manager
2204                         .allocator
2205                         .lock()
2206                         .unwrap()
2207                         .allocate_mmio_addresses(None, size as GuestUsize, Some(0x0020_0000))
2208                         .ok_or(DeviceManagerError::FsRangeAllocation)?;
2209 
2210                     (base.raw_value(), size)
2211                 };
2212 
2213                 // Update the node with correct resource information.
2214                 node.resources.push(Resource::MmioAddressRange {
2215                     base: cache_base,
2216                     size: cache_size,
2217                 });
2218 
2219                 let mmap_region = MmapRegion::build(
2220                     None,
2221                     cache_size as usize,
2222                     libc::PROT_NONE,
2223                     libc::MAP_ANONYMOUS | libc::MAP_PRIVATE,
2224                 )
2225                 .map_err(DeviceManagerError::NewMmapRegion)?;
2226                 let host_addr: u64 = mmap_region.as_ptr() as u64;
2227 
2228                 let mem_slot = self
2229                     .memory_manager
2230                     .lock()
2231                     .unwrap()
2232                     .create_userspace_mapping(
2233                         cache_base, cache_size, host_addr, false, false, false,
2234                     )
2235                     .map_err(DeviceManagerError::MemoryManager)?;
2236 
2237                 let region_list = vec![VirtioSharedMemory {
2238                     offset: 0,
2239                     len: cache_size,
2240                 }];
2241 
2242                 Some((
2243                     VirtioSharedMemoryList {
2244                         host_addr,
2245                         mem_slot,
2246                         addr: GuestAddress(cache_base),
2247                         len: cache_size as GuestUsize,
2248                         region_list,
2249                     },
2250                     mmap_region,
2251                 ))
2252             } else {
2253                 None
2254             };
2255 
2256             let virtio_fs_device = Arc::new(Mutex::new(
2257                 virtio_devices::vhost_user::Fs::new(
2258                     id.clone(),
2259                     fs_socket,
2260                     &fs_cfg.tag,
2261                     fs_cfg.num_queues,
2262                     fs_cfg.queue_size,
2263                     cache,
2264                     self.seccomp_action.clone(),
2265                 )
2266                 .map_err(DeviceManagerError::CreateVirtioFs)?,
2267             ));
2268 
2269             // Update the device tree with the migratable device.
2270             node.migratable = Some(Arc::clone(&virtio_fs_device) as Arc<Mutex<dyn Migratable>>);
2271             self.device_tree.lock().unwrap().insert(id.clone(), node);
2272 
2273             Ok((Arc::clone(&virtio_fs_device) as VirtioDeviceArc, false, id))
2274         } else {
2275             Err(DeviceManagerError::NoVirtioFsSock)
2276         }
2277     }
2278 
2279     fn make_virtio_fs_devices(
2280         &mut self,
2281     ) -> DeviceManagerResult<Vec<(VirtioDeviceArc, bool, String)>> {
2282         let mut devices = Vec::new();
2283 
2284         let mut fs_devices = self.config.lock().unwrap().fs.clone();
2285         if let Some(fs_list_cfg) = &mut fs_devices {
2286             for fs_cfg in fs_list_cfg.iter_mut() {
2287                 devices.push(self.make_virtio_fs_device(fs_cfg)?);
2288             }
2289         }
2290         self.config.lock().unwrap().fs = fs_devices;
2291 
2292         Ok(devices)
2293     }
2294 
2295     fn make_virtio_pmem_device(
2296         &mut self,
2297         pmem_cfg: &mut PmemConfig,
2298     ) -> DeviceManagerResult<(VirtioDeviceArc, bool, String)> {
2299         let id = if let Some(id) = &pmem_cfg.id {
2300             id.clone()
2301         } else {
2302             let id = self.next_device_name(PMEM_DEVICE_NAME_PREFIX)?;
2303             pmem_cfg.id = Some(id.clone());
2304             id
2305         };
2306 
2307         info!("Creating virtio-pmem device: {:?}", pmem_cfg);
2308 
2309         let mut node = device_node!(id);
2310 
2311         // Look for the id in the device tree. If it can be found, that means
2312         // the device is being restored, otherwise it's created from scratch.
2313         let region_range = if let Some(node) = self.device_tree.lock().unwrap().get(&id) {
2314             debug!("Restoring virtio-pmem {} resources", id);
2315 
2316             let mut region_range: Option<(u64, u64)> = None;
2317             for resource in node.resources.iter() {
2318                 match resource {
2319                     Resource::MmioAddressRange { base, size } => {
2320                         if region_range.is_some() {
2321                             return Err(DeviceManagerError::ResourceAlreadyExists);
2322                         }
2323 
2324                         region_range = Some((*base, *size));
2325                     }
2326                     _ => {
2327                         error!("Unexpected resource {:?} for {}", resource, id);
2328                     }
2329                 }
2330             }
2331 
2332             if region_range.is_none() {
2333                 return Err(DeviceManagerError::MissingVirtioPmemResources);
2334             }
2335 
2336             region_range
2337         } else {
2338             None
2339         };
2340 
2341         let (custom_flags, set_len) = if pmem_cfg.file.is_dir() {
2342             if pmem_cfg.size.is_none() {
2343                 return Err(DeviceManagerError::PmemWithDirectorySizeMissing);
2344             }
2345             (O_TMPFILE, true)
2346         } else {
2347             (0, false)
2348         };
2349 
2350         let mut file = OpenOptions::new()
2351             .read(true)
2352             .write(!pmem_cfg.discard_writes)
2353             .custom_flags(custom_flags)
2354             .open(&pmem_cfg.file)
2355             .map_err(DeviceManagerError::PmemFileOpen)?;
2356 
2357         let size = if let Some(size) = pmem_cfg.size {
2358             if set_len {
2359                 file.set_len(size)
2360                     .map_err(DeviceManagerError::PmemFileSetLen)?;
2361             }
2362             size
2363         } else {
2364             file.seek(SeekFrom::End(0))
2365                 .map_err(DeviceManagerError::PmemFileSetLen)?
2366         };
2367 
2368         if size % 0x20_0000 != 0 {
2369             return Err(DeviceManagerError::PmemSizeNotAligned);
2370         }
2371 
2372         let (region_base, region_size) = if let Some((base, size)) = region_range {
2373             // The memory needs to be 2MiB aligned in order to support
2374             // hugepages.
2375             self.address_manager
2376                 .allocator
2377                 .lock()
2378                 .unwrap()
2379                 .allocate_mmio_addresses(
2380                     Some(GuestAddress(base)),
2381                     size as GuestUsize,
2382                     Some(0x0020_0000),
2383                 )
2384                 .ok_or(DeviceManagerError::PmemRangeAllocation)?;
2385 
2386             (base, size)
2387         } else {
2388             // The memory needs to be 2MiB aligned in order to support
2389             // hugepages.
2390             let base = self
2391                 .address_manager
2392                 .allocator
2393                 .lock()
2394                 .unwrap()
2395                 .allocate_mmio_addresses(None, size as GuestUsize, Some(0x0020_0000))
2396                 .ok_or(DeviceManagerError::PmemRangeAllocation)?;
2397 
2398             (base.raw_value(), size)
2399         };
2400 
2401         let cloned_file = file.try_clone().map_err(DeviceManagerError::CloneFile)?;
2402         let mmap_region = MmapRegion::build(
2403             Some(FileOffset::new(cloned_file, 0)),
2404             region_size as usize,
2405             PROT_READ | PROT_WRITE,
2406             MAP_NORESERVE
2407                 | if pmem_cfg.discard_writes {
2408                     MAP_PRIVATE
2409                 } else {
2410                     MAP_SHARED
2411                 },
2412         )
2413         .map_err(DeviceManagerError::NewMmapRegion)?;
2414         let host_addr: u64 = mmap_region.as_ptr() as u64;
2415 
2416         let mem_slot = self
2417             .memory_manager
2418             .lock()
2419             .unwrap()
2420             .create_userspace_mapping(
2421                 region_base,
2422                 region_size,
2423                 host_addr,
2424                 pmem_cfg.mergeable,
2425                 false,
2426                 false,
2427             )
2428             .map_err(DeviceManagerError::MemoryManager)?;
2429 
2430         let mapping = virtio_devices::UserspaceMapping {
2431             host_addr,
2432             mem_slot,
2433             addr: GuestAddress(region_base),
2434             len: region_size,
2435             mergeable: pmem_cfg.mergeable,
2436         };
2437 
2438         let virtio_pmem_device = Arc::new(Mutex::new(
2439             virtio_devices::Pmem::new(
2440                 id.clone(),
2441                 file,
2442                 GuestAddress(region_base),
2443                 mapping,
2444                 mmap_region,
2445                 self.force_iommu | pmem_cfg.iommu,
2446                 self.seccomp_action.clone(),
2447             )
2448             .map_err(DeviceManagerError::CreateVirtioPmem)?,
2449         ));
2450 
2451         // Update the device tree with correct resource information and with
2452         // the migratable device.
2453         node.resources.push(Resource::MmioAddressRange {
2454             base: region_base,
2455             size: region_size,
2456         });
2457         node.migratable = Some(Arc::clone(&virtio_pmem_device) as Arc<Mutex<dyn Migratable>>);
2458         self.device_tree.lock().unwrap().insert(id.clone(), node);
2459 
2460         Ok((
2461             Arc::clone(&virtio_pmem_device) as VirtioDeviceArc,
2462             pmem_cfg.iommu,
2463             id,
2464         ))
2465     }
2466 
2467     fn make_virtio_pmem_devices(
2468         &mut self,
2469     ) -> DeviceManagerResult<Vec<(VirtioDeviceArc, bool, String)>> {
2470         let mut devices = Vec::new();
2471         // Add virtio-pmem if required
2472         let mut pmem_devices = self.config.lock().unwrap().pmem.clone();
2473         if let Some(pmem_list_cfg) = &mut pmem_devices {
2474             for pmem_cfg in pmem_list_cfg.iter_mut() {
2475                 devices.push(self.make_virtio_pmem_device(pmem_cfg)?);
2476             }
2477         }
2478         self.config.lock().unwrap().pmem = pmem_devices;
2479 
2480         Ok(devices)
2481     }
2482 
2483     fn make_virtio_vsock_device(
2484         &mut self,
2485         vsock_cfg: &mut VsockConfig,
2486     ) -> DeviceManagerResult<(VirtioDeviceArc, bool, String)> {
2487         let id = if let Some(id) = &vsock_cfg.id {
2488             id.clone()
2489         } else {
2490             let id = self.next_device_name(VSOCK_DEVICE_NAME_PREFIX)?;
2491             vsock_cfg.id = Some(id.clone());
2492             id
2493         };
2494 
2495         info!("Creating virtio-vsock device: {:?}", vsock_cfg);
2496 
2497         let socket_path = vsock_cfg
2498             .socket
2499             .to_str()
2500             .ok_or(DeviceManagerError::CreateVsockConvertPath)?;
2501         let backend =
2502             virtio_devices::vsock::VsockUnixBackend::new(vsock_cfg.cid, socket_path.to_string())
2503                 .map_err(DeviceManagerError::CreateVsockBackend)?;
2504 
2505         let vsock_device = Arc::new(Mutex::new(
2506             virtio_devices::Vsock::new(
2507                 id.clone(),
2508                 vsock_cfg.cid,
2509                 vsock_cfg.socket.clone(),
2510                 backend,
2511                 self.force_iommu | vsock_cfg.iommu,
2512                 self.seccomp_action.clone(),
2513             )
2514             .map_err(DeviceManagerError::CreateVirtioVsock)?,
2515         ));
2516 
2517         // Fill the device tree with a new node. In case of restore, we
2518         // know there is nothing to do, so we can simply override the
2519         // existing entry.
2520         self.device_tree
2521             .lock()
2522             .unwrap()
2523             .insert(id.clone(), device_node!(id, vsock_device));
2524 
2525         Ok((
2526             Arc::clone(&vsock_device) as VirtioDeviceArc,
2527             vsock_cfg.iommu,
2528             id,
2529         ))
2530     }
2531 
2532     fn make_virtio_vsock_devices(
2533         &mut self,
2534     ) -> DeviceManagerResult<Vec<(VirtioDeviceArc, bool, String)>> {
2535         let mut devices = Vec::new();
2536 
2537         let mut vsock = self.config.lock().unwrap().vsock.clone();
2538         if let Some(ref mut vsock_cfg) = &mut vsock {
2539             devices.push(self.make_virtio_vsock_device(vsock_cfg)?);
2540         }
2541         self.config.lock().unwrap().vsock = vsock;
2542 
2543         Ok(devices)
2544     }
2545 
2546     fn make_virtio_mem_devices(
2547         &mut self,
2548     ) -> DeviceManagerResult<Vec<(VirtioDeviceArc, bool, String)>> {
2549         let mut devices = Vec::new();
2550 
2551         let mm = self.memory_manager.clone();
2552         let mm = mm.lock().unwrap();
2553         for (_memory_zone_id, memory_zone) in mm.memory_zones().iter() {
2554             if let Some(virtio_mem_zone) = memory_zone.virtio_mem_zone() {
2555                 let id = self.next_device_name(MEM_DEVICE_NAME_PREFIX)?;
2556                 info!("Creating virtio-mem device: id = {}", id);
2557                 #[cfg(not(feature = "acpi"))]
2558                 let node_id: Option<u16> = None;
2559                 #[cfg(feature = "acpi")]
2560                 let node_id = numa_node_id_from_memory_zone_id(&self.numa_nodes, _memory_zone_id)
2561                     .map(|i| i as u16);
2562 
2563                 let virtio_mem_device = Arc::new(Mutex::new(
2564                     virtio_devices::Mem::new(
2565                         id.clone(),
2566                         virtio_mem_zone.region(),
2567                         virtio_mem_zone
2568                             .resize_handler()
2569                             .new_resize_sender()
2570                             .map_err(DeviceManagerError::CreateResizeSender)?,
2571                         self.seccomp_action.clone(),
2572                         node_id,
2573                         virtio_mem_zone.hotplugged_size(),
2574                         virtio_mem_zone.hugepages(),
2575                     )
2576                     .map_err(DeviceManagerError::CreateVirtioMem)?,
2577                 ));
2578 
2579                 self.virtio_mem_devices.push(Arc::clone(&virtio_mem_device));
2580 
2581                 devices.push((
2582                     Arc::clone(&virtio_mem_device) as VirtioDeviceArc,
2583                     false,
2584                     id.clone(),
2585                 ));
2586 
2587                 // Fill the device tree with a new node. In case of restore, we
2588                 // know there is nothing to do, so we can simply override the
2589                 // existing entry.
2590                 self.device_tree
2591                     .lock()
2592                     .unwrap()
2593                     .insert(id.clone(), device_node!(id, virtio_mem_device));
2594             }
2595         }
2596 
2597         Ok(devices)
2598     }
2599 
2600     fn make_virtio_balloon_devices(
2601         &mut self,
2602     ) -> DeviceManagerResult<Vec<(VirtioDeviceArc, bool, String)>> {
2603         let mut devices = Vec::new();
2604 
2605         if let Some(balloon_config) = &self.config.lock().unwrap().balloon {
2606             let id = String::from(BALLOON_DEVICE_NAME);
2607             info!("Creating virtio-balloon device: id = {}", id);
2608 
2609             let virtio_balloon_device = Arc::new(Mutex::new(
2610                 virtio_devices::Balloon::new(
2611                     id.clone(),
2612                     balloon_config.size,
2613                     balloon_config.deflate_on_oom,
2614                     self.seccomp_action.clone(),
2615                 )
2616                 .map_err(DeviceManagerError::CreateVirtioBalloon)?,
2617             ));
2618 
2619             self.balloon = Some(virtio_balloon_device.clone());
2620 
2621             devices.push((
2622                 Arc::clone(&virtio_balloon_device) as VirtioDeviceArc,
2623                 false,
2624                 id.clone(),
2625             ));
2626 
2627             self.device_tree
2628                 .lock()
2629                 .unwrap()
2630                 .insert(id.clone(), device_node!(id, virtio_balloon_device));
2631         }
2632 
2633         Ok(devices)
2634     }
2635 
2636     fn make_virtio_watchdog_devices(
2637         &mut self,
2638     ) -> DeviceManagerResult<Vec<(VirtioDeviceArc, bool, String)>> {
2639         let mut devices = Vec::new();
2640 
2641         if !self.config.lock().unwrap().watchdog {
2642             return Ok(devices);
2643         }
2644 
2645         let id = String::from(WATCHDOG_DEVICE_NAME);
2646         info!("Creating virtio-watchdog device: id = {}", id);
2647 
2648         let virtio_watchdog_device = Arc::new(Mutex::new(
2649             virtio_devices::Watchdog::new(
2650                 id.clone(),
2651                 self.reset_evt.try_clone().unwrap(),
2652                 self.seccomp_action.clone(),
2653             )
2654             .map_err(DeviceManagerError::CreateVirtioWatchdog)?,
2655         ));
2656         devices.push((
2657             Arc::clone(&virtio_watchdog_device) as VirtioDeviceArc,
2658             false,
2659             id.clone(),
2660         ));
2661 
2662         self.device_tree
2663             .lock()
2664             .unwrap()
2665             .insert(id.clone(), device_node!(id, virtio_watchdog_device));
2666 
2667         Ok(devices)
2668     }
2669 
2670     fn next_device_name(&mut self, prefix: &str) -> DeviceManagerResult<String> {
2671         let start_id = self.device_id_cnt;
2672         loop {
2673             // Generate the temporary name.
2674             let name = format!("{}{}", prefix, self.device_id_cnt);
2675             // Increment the counter.
2676             self.device_id_cnt += Wrapping(1);
2677             // Check if the name is already in use.
2678             if !self.device_tree.lock().unwrap().contains_key(&name) {
2679                 return Ok(name);
2680             }
2681 
2682             if self.device_id_cnt == start_id {
2683                 // We went through a full loop and there's nothing else we can
2684                 // do.
2685                 break;
2686             }
2687         }
2688         Err(DeviceManagerError::NoAvailableDeviceName)
2689     }
2690 
2691     #[cfg_attr(not(feature = "kvm"), allow(unused_variables))]
2692     fn add_passthrough_device(
2693         &mut self,
2694         pci: &mut PciBus,
2695         device_cfg: &mut DeviceConfig,
2696     ) -> DeviceManagerResult<(u32, String)> {
2697         // If the passthrough device has not been created yet, it is created
2698         // here and stored in the DeviceManager structure for future needs.
2699         if self.passthrough_device.is_none() {
2700             self.passthrough_device = Some(
2701                 self.address_manager
2702                     .vm
2703                     .create_passthrough_device()
2704                     .map_err(|e| DeviceManagerError::CreatePassthroughDevice(e.into()))?,
2705             );
2706         }
2707 
2708         #[cfg(feature = "kvm")]
2709         return self.add_vfio_device(pci, device_cfg);
2710 
2711         #[cfg(not(feature = "kvm"))]
2712         Err(DeviceManagerError::NoDevicePassthroughSupport)
2713     }
2714 
2715     #[cfg(feature = "kvm")]
2716     fn add_vfio_device(
2717         &mut self,
2718         pci: &mut PciBus,
2719         device_cfg: &mut DeviceConfig,
2720     ) -> DeviceManagerResult<(u32, String)> {
2721         let passthrough_device = self
2722             .passthrough_device
2723             .as_ref()
2724             .ok_or(DeviceManagerError::NoDevicePassthroughSupport)?;
2725 
2726         // We need to shift the device id since the 3 first bits
2727         // are dedicated to the PCI function, and we know we don't
2728         // do multifunction. Also, because we only support one PCI
2729         // bus, the bus 0, we don't need to add anything to the
2730         // global device ID.
2731         let pci_device_bdf = pci
2732             .next_device_id()
2733             .map_err(DeviceManagerError::NextPciDeviceId)?
2734             << 3;
2735 
2736         let memory = self.memory_manager.lock().unwrap().guest_memory();
2737 
2738         // Safe because we know the RawFd is valid.
2739         //
2740         // This dup() is mandatory to be able to give full ownership of the
2741         // file descriptor to the DeviceFd::from_raw_fd() function later in
2742         // the code.
2743         //
2744         // This is particularly needed so that VfioContainer will still have
2745         // a valid file descriptor even if DeviceManager, and therefore the
2746         // passthrough_device are dropped. In case of Drop, the file descriptor
2747         // would be closed, but Linux would still have the duplicated file
2748         // descriptor opened from DeviceFd, preventing from unexpected behavior
2749         // where the VfioContainer would try to use a closed file descriptor.
2750         let dup_device_fd = unsafe { libc::dup(passthrough_device.as_raw_fd()) };
2751 
2752         // SAFETY the raw fd conversion here is safe because:
2753         //   1. This function is only called on KVM, see the feature guard above.
2754         //   2. When running on KVM, passthrough_device wraps around DeviceFd.
2755         //   3. The conversion here extracts the raw fd and then turns the raw fd into a DeviceFd
2756         //      of the same (correct) type.
2757         let vfio_container = Arc::new(
2758             VfioContainer::new(Arc::new(unsafe { DeviceFd::from_raw_fd(dup_device_fd) }))
2759                 .map_err(DeviceManagerError::VfioCreate)?,
2760         );
2761 
2762         let vfio_device = VfioDevice::new(&device_cfg.path, Arc::clone(&vfio_container))
2763             .map_err(DeviceManagerError::VfioCreate)?;
2764 
2765         let vfio_mapping = Arc::new(VfioDmaMapping::new(
2766             Arc::clone(&vfio_container),
2767             Arc::new(memory),
2768         ));
2769         if device_cfg.iommu {
2770             if let Some(iommu) = &self.iommu_device {
2771                 iommu
2772                     .lock()
2773                     .unwrap()
2774                     .add_external_mapping(pci_device_bdf, vfio_mapping);
2775             }
2776         } else {
2777             for virtio_mem_device in self.virtio_mem_devices.iter() {
2778                 virtio_mem_device
2779                     .lock()
2780                     .unwrap()
2781                     .add_dma_mapping_handler(pci_device_bdf, vfio_mapping.clone())
2782                     .map_err(DeviceManagerError::AddDmaMappingHandlerVirtioMem)?;
2783             }
2784         }
2785 
2786         let legacy_interrupt_group = if let Some(legacy_interrupt_manager) =
2787             &self.legacy_interrupt_manager
2788         {
2789             Some(
2790                 legacy_interrupt_manager
2791                     .create_group(LegacyIrqGroupConfig {
2792                         irq: self.pci_irq_slots[(pci_device_bdf >> 3) as usize] as InterruptIndex,
2793                     })
2794                     .map_err(DeviceManagerError::CreateInterruptGroup)?,
2795             )
2796         } else {
2797             None
2798         };
2799 
2800         let mut vfio_pci_device = VfioPciDevice::new(
2801             &self.address_manager.vm,
2802             vfio_device,
2803             vfio_container,
2804             &self.msi_interrupt_manager,
2805             legacy_interrupt_group,
2806             device_cfg.iommu,
2807         )
2808         .map_err(DeviceManagerError::VfioPciCreate)?;
2809 
2810         let vfio_name = if let Some(id) = &device_cfg.id {
2811             if self.device_tree.lock().unwrap().contains_key(id) {
2812                 return Err(DeviceManagerError::DeviceIdAlreadyInUse);
2813             }
2814 
2815             id.clone()
2816         } else {
2817             let id = self.next_device_name(VFIO_DEVICE_NAME_PREFIX)?;
2818             device_cfg.id = Some(id.clone());
2819             id
2820         };
2821 
2822         vfio_pci_device
2823             .map_mmio_regions(&self.address_manager.vm, || {
2824                 self.memory_manager.lock().unwrap().allocate_memory_slot()
2825             })
2826             .map_err(DeviceManagerError::VfioMapRegion)?;
2827 
2828         let mut node = device_node!(vfio_name);
2829 
2830         for region in vfio_pci_device.mmio_regions() {
2831             node.resources.push(Resource::MmioAddressRange {
2832                 base: region.start.0,
2833                 size: region.length as u64,
2834             });
2835         }
2836 
2837         // Register DMA mapping in IOMMU.
2838         // Do not register virtio-mem regions, as they are handled directly by
2839         // virtio-mem device itself.
2840         for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() {
2841             for region in zone.regions() {
2842                 vfio_pci_device
2843                     .dma_map(
2844                         region.start_addr().raw_value(),
2845                         region.len() as u64,
2846                         region.as_ptr() as u64,
2847                     )
2848                     .map_err(DeviceManagerError::VfioDmaMap)?;
2849             }
2850         }
2851 
2852         let vfio_pci_device = Arc::new(Mutex::new(vfio_pci_device));
2853 
2854         self.add_pci_device(
2855             pci,
2856             vfio_pci_device.clone(),
2857             vfio_pci_device.clone(),
2858             pci_device_bdf,
2859         )?;
2860 
2861         node.pci_bdf = Some(pci_device_bdf);
2862         node.pci_device_handle = Some(PciDeviceHandle::Vfio(vfio_pci_device));
2863 
2864         self.device_tree
2865             .lock()
2866             .unwrap()
2867             .insert(vfio_name.clone(), node);
2868 
2869         Ok((pci_device_bdf, vfio_name))
2870     }
2871 
2872     fn add_pci_device(
2873         &mut self,
2874         pci_bus: &mut PciBus,
2875         bus_device: Arc<Mutex<dyn BusDevice>>,
2876         pci_device: Arc<Mutex<dyn PciDevice>>,
2877         bdf: u32,
2878     ) -> DeviceManagerResult<Vec<(GuestAddress, GuestUsize, PciBarRegionType)>> {
2879         let bars = pci_device
2880             .lock()
2881             .unwrap()
2882             .allocate_bars(&mut self.address_manager.allocator.lock().unwrap())
2883             .map_err(DeviceManagerError::AllocateBars)?;
2884 
2885         pci_bus
2886             .add_device(bdf, pci_device)
2887             .map_err(DeviceManagerError::AddPciDevice)?;
2888 
2889         self.bus_devices.push(Arc::clone(&bus_device));
2890 
2891         pci_bus
2892             .register_mapping(
2893                 bus_device,
2894                 #[cfg(target_arch = "x86_64")]
2895                 self.address_manager.io_bus.as_ref(),
2896                 self.address_manager.mmio_bus.as_ref(),
2897                 bars.clone(),
2898             )
2899             .map_err(DeviceManagerError::AddPciDevice)?;
2900 
2901         Ok(bars)
2902     }
2903 
2904     fn add_vfio_devices(&mut self, pci: &mut PciBus) -> DeviceManagerResult<Vec<u32>> {
2905         let mut iommu_attached_device_ids = Vec::new();
2906         let mut devices = self.config.lock().unwrap().devices.clone();
2907 
2908         if let Some(device_list_cfg) = &mut devices {
2909             for device_cfg in device_list_cfg.iter_mut() {
2910                 let (device_id, _) = self.add_passthrough_device(pci, device_cfg)?;
2911                 if device_cfg.iommu && self.iommu_device.is_some() {
2912                     iommu_attached_device_ids.push(device_id);
2913                 }
2914             }
2915         }
2916 
2917         // Update the list of devices
2918         self.config.lock().unwrap().devices = devices;
2919 
2920         Ok(iommu_attached_device_ids)
2921     }
2922 
2923     fn add_virtio_pci_device(
2924         &mut self,
2925         virtio_device: VirtioDeviceArc,
2926         pci: &mut PciBus,
2927         iommu_mapping: &Option<Arc<IommuMapping>>,
2928         virtio_device_id: String,
2929     ) -> DeviceManagerResult<u32> {
2930         let id = format!("{}-{}", VIRTIO_PCI_DEVICE_NAME_PREFIX, virtio_device_id);
2931 
2932         // Add the new virtio-pci node to the device tree.
2933         let mut node = device_node!(id);
2934         node.children = vec![virtio_device_id.clone()];
2935 
2936         // Look for the id in the device tree. If it can be found, that means
2937         // the device is being restored, otherwise it's created from scratch.
2938         let (pci_device_bdf, config_bar_addr) =
2939             if let Some(node) = self.device_tree.lock().unwrap().get(&id) {
2940                 debug!("Restoring virtio-pci {} resources", id);
2941                 let pci_device_bdf = node
2942                     .pci_bdf
2943                     .ok_or(DeviceManagerError::MissingDeviceNodePciBdf)?;
2944 
2945                 pci.get_device_id((pci_device_bdf >> 3) as usize)
2946                     .map_err(DeviceManagerError::GetPciDeviceId)?;
2947 
2948                 if node.resources.is_empty() {
2949                     return Err(DeviceManagerError::MissingVirtioPciResources);
2950                 }
2951 
2952                 // We know the configuration BAR address is stored on the first
2953                 // resource in the list.
2954                 let config_bar_addr = match node.resources[0] {
2955                     Resource::MmioAddressRange { base, .. } => Some(base),
2956                     _ => {
2957                         error!("Unexpected resource {:?} for {}", node.resources[0], id);
2958                         return Err(DeviceManagerError::MissingVirtioPciResources);
2959                     }
2960                 };
2961 
2962                 (pci_device_bdf, config_bar_addr)
2963             } else {
2964                 // We need to shift the device id since the 3 first bits are dedicated
2965                 // to the PCI function, and we know we don't do multifunction.
2966                 // Also, because we only support one PCI bus, the bus 0, we don't need
2967                 // to add anything to the global device ID.
2968                 let pci_device_bdf = pci
2969                     .next_device_id()
2970                     .map_err(DeviceManagerError::NextPciDeviceId)?
2971                     << 3;
2972 
2973                 (pci_device_bdf, None)
2974             };
2975 
2976         // Update the existing virtio node by setting the parent.
2977         if let Some(node) = self.device_tree.lock().unwrap().get_mut(&virtio_device_id) {
2978             node.parent = Some(id.clone());
2979         } else {
2980             return Err(DeviceManagerError::MissingNode);
2981         }
2982 
2983         // Allows support for one MSI-X vector per queue. It also adds 1
2984         // as we need to take into account the dedicated vector to notify
2985         // about a virtio config change.
2986         let msix_num = (virtio_device.lock().unwrap().queue_max_sizes().len() + 1) as u16;
2987 
2988         // Create the callback from the implementation of the DmaRemapping
2989         // trait. The point with the callback is to simplify the code as we
2990         // know about the device ID from this point.
2991         let iommu_mapping_cb: Option<Arc<VirtioIommuRemapping>> =
2992             if let Some(mapping) = iommu_mapping {
2993                 let mapping_clone = mapping.clone();
2994                 Some(Arc::new(Box::new(move |addr: u64| {
2995                     mapping_clone.translate(pci_device_bdf, addr).map_err(|e| {
2996                         std::io::Error::new(
2997                             std::io::ErrorKind::Other,
2998                             format!(
2999                                 "failed to translate addr 0x{:x} for device 00:{:02x}.0 {}",
3000                                 addr, pci_device_bdf, e
3001                             ),
3002                         )
3003                     })
3004                 }) as VirtioIommuRemapping))
3005             } else {
3006                 None
3007             };
3008 
3009         let memory = self.memory_manager.lock().unwrap().guest_memory();
3010         let mut virtio_pci_device = VirtioPciDevice::new(
3011             id.clone(),
3012             memory,
3013             virtio_device,
3014             msix_num,
3015             iommu_mapping_cb,
3016             &self.msi_interrupt_manager,
3017             pci_device_bdf,
3018             self.activate_evt
3019                 .try_clone()
3020                 .map_err(DeviceManagerError::EventFd)?,
3021         )
3022         .map_err(DeviceManagerError::VirtioDevice)?;
3023 
3024         // This is important as this will set the BAR address if it exists,
3025         // which is mandatory on the restore path.
3026         if let Some(addr) = config_bar_addr {
3027             virtio_pci_device.set_config_bar_addr(addr);
3028         }
3029 
3030         let virtio_pci_device = Arc::new(Mutex::new(virtio_pci_device));
3031         let bars = self.add_pci_device(
3032             pci,
3033             virtio_pci_device.clone(),
3034             virtio_pci_device.clone(),
3035             pci_device_bdf,
3036         )?;
3037 
3038         let bar_addr = virtio_pci_device.lock().unwrap().config_bar_addr();
3039         for (event, addr) in virtio_pci_device.lock().unwrap().ioeventfds(bar_addr) {
3040             let io_addr = IoEventAddress::Mmio(addr);
3041             self.address_manager
3042                 .vm
3043                 .register_ioevent(event, &io_addr, None)
3044                 .map_err(|e| DeviceManagerError::RegisterIoevent(e.into()))?;
3045         }
3046 
3047         // Update the device tree with correct resource information.
3048         for pci_bar in bars.iter() {
3049             node.resources.push(Resource::MmioAddressRange {
3050                 base: pci_bar.0.raw_value(),
3051                 size: pci_bar.1 as u64,
3052             });
3053         }
3054         node.migratable = Some(Arc::clone(&virtio_pci_device) as Arc<Mutex<dyn Migratable>>);
3055         node.pci_bdf = Some(pci_device_bdf);
3056         node.pci_device_handle = Some(PciDeviceHandle::Virtio(virtio_pci_device));
3057         self.device_tree.lock().unwrap().insert(id, node);
3058 
3059         Ok(pci_device_bdf)
3060     }
3061 
3062     #[cfg(target_arch = "x86_64")]
3063     pub fn io_bus(&self) -> &Arc<Bus> {
3064         &self.address_manager.io_bus
3065     }
3066 
3067     pub fn mmio_bus(&self) -> &Arc<Bus> {
3068         &self.address_manager.mmio_bus
3069     }
3070 
3071     pub fn allocator(&self) -> &Arc<Mutex<SystemAllocator>> {
3072         &self.address_manager.allocator
3073     }
3074 
3075     pub fn interrupt_controller(&self) -> Option<Arc<Mutex<dyn InterruptController>>> {
3076         self.interrupt_controller
3077             .as_ref()
3078             .map(|ic| ic.clone() as Arc<Mutex<dyn InterruptController>>)
3079     }
3080 
3081     pub fn console(&self) -> &Arc<Console> {
3082         &self.console
3083     }
3084 
3085     pub fn cmdline_additions(&self) -> &[String] {
3086         self.cmdline_additions.as_slice()
3087     }
3088 
3089     pub fn update_memory(&self, new_region: &Arc<GuestRegionMmap>) -> DeviceManagerResult<()> {
3090         for (virtio_device, _, _) in self.virtio_devices.iter() {
3091             virtio_device
3092                 .lock()
3093                 .unwrap()
3094                 .add_memory_region(new_region)
3095                 .map_err(DeviceManagerError::UpdateMemoryForVirtioDevice)?;
3096         }
3097 
3098         // Take care of updating the memory for VFIO PCI devices.
3099         #[cfg(feature = "kvm")]
3100         {
3101             let device_tree = self.device_tree.lock().unwrap();
3102             for pci_device_node in device_tree.pci_devices() {
3103                 if let PciDeviceHandle::Vfio(vfio_pci_device) = pci_device_node
3104                     .pci_device_handle
3105                     .as_ref()
3106                     .ok_or(DeviceManagerError::MissingPciDevice)?
3107                 {
3108                     vfio_pci_device
3109                         .lock()
3110                         .unwrap()
3111                         .dma_map(
3112                             new_region.start_addr().raw_value(),
3113                             new_region.len() as u64,
3114                             new_region.as_ptr() as u64,
3115                         )
3116                         .map_err(DeviceManagerError::UpdateMemoryForVfioPciDevice)?;
3117                 }
3118             }
3119         }
3120 
3121         Ok(())
3122     }
3123 
3124     pub fn activate_virtio_devices(&self) -> DeviceManagerResult<()> {
3125         // Find virtio pci devices and activate any pending ones
3126         let device_tree = self.device_tree.lock().unwrap();
3127         for pci_device_node in device_tree.pci_devices() {
3128             #[allow(irrefutable_let_patterns)]
3129             if let PciDeviceHandle::Virtio(virtio_pci_device) = &pci_device_node
3130                 .pci_device_handle
3131                 .as_ref()
3132                 .ok_or(DeviceManagerError::MissingPciDevice)?
3133             {
3134                 virtio_pci_device.lock().unwrap().maybe_activate();
3135             }
3136         }
3137         Ok(())
3138     }
3139 
3140     pub fn notify_hotplug(
3141         &self,
3142         _notification_type: AcpiNotificationFlags,
3143     ) -> DeviceManagerResult<()> {
3144         #[cfg(feature = "acpi")]
3145         return self
3146             .ged_notification_device
3147             .as_ref()
3148             .unwrap()
3149             .lock()
3150             .unwrap()
3151             .notify(_notification_type)
3152             .map_err(DeviceManagerError::HotPlugNotification);
3153         #[cfg(not(feature = "acpi"))]
3154         return Ok(());
3155     }
3156 
3157     pub fn add_device(
3158         &mut self,
3159         device_cfg: &mut DeviceConfig,
3160     ) -> DeviceManagerResult<PciDeviceInfo> {
3161         let pci = if let Some(pci_bus) = &self.pci_bus {
3162             Arc::clone(pci_bus)
3163         } else {
3164             return Err(DeviceManagerError::NoPciBus);
3165         };
3166 
3167         let (device_id, device_name) =
3168             self.add_passthrough_device(&mut pci.lock().unwrap(), device_cfg)?;
3169 
3170         // Update the PCIU bitmap
3171         self.pci_devices_up |= 1 << (device_id >> 3);
3172 
3173         Ok(PciDeviceInfo {
3174             id: device_name,
3175             bdf: device_id,
3176         })
3177     }
3178 
3179     pub fn remove_device(&mut self, id: String) -> DeviceManagerResult<()> {
3180         // The node can be directly a PCI node in case the 'id' refers to a
3181         // VFIO device or a virtio-pci one.
3182         // In case the 'id' refers to a virtio device, we must find the PCI
3183         // node by looking at the parent.
3184         let device_tree = self.device_tree.lock().unwrap();
3185         let node = device_tree
3186             .get(&id)
3187             .ok_or(DeviceManagerError::UnknownDeviceId(id))?;
3188 
3189         let pci_device_node = if node.pci_bdf.is_some() && node.pci_device_handle.is_some() {
3190             node
3191         } else {
3192             let parent = node
3193                 .parent
3194                 .as_ref()
3195                 .ok_or(DeviceManagerError::MissingNode)?;
3196             device_tree
3197                 .get(parent)
3198                 .ok_or(DeviceManagerError::MissingNode)?
3199         };
3200 
3201         let pci_device_bdf = pci_device_node
3202             .pci_bdf
3203             .ok_or(DeviceManagerError::MissingPciDevice)?;
3204         let pci_device_handle = pci_device_node
3205             .pci_device_handle
3206             .as_ref()
3207             .ok_or(DeviceManagerError::MissingPciDevice)?;
3208         #[allow(irrefutable_let_patterns)]
3209         if let PciDeviceHandle::Virtio(virtio_pci_device) = pci_device_handle {
3210             let device_type = VirtioDeviceType::from(
3211                 virtio_pci_device
3212                     .lock()
3213                     .unwrap()
3214                     .virtio_device()
3215                     .lock()
3216                     .unwrap()
3217                     .device_type(),
3218             );
3219             match device_type {
3220                 VirtioDeviceType::Net
3221                 | VirtioDeviceType::Block
3222                 | VirtioDeviceType::Pmem
3223                 | VirtioDeviceType::Fs
3224                 | VirtioDeviceType::Vsock => {}
3225                 _ => return Err(DeviceManagerError::RemovalNotAllowed(device_type)),
3226             }
3227         }
3228 
3229         // Update the PCID bitmap
3230         self.pci_devices_down |= 1 << (pci_device_bdf >> 3);
3231 
3232         Ok(())
3233     }
3234 
3235     pub fn eject_device(&mut self, device_id: u8) -> DeviceManagerResult<()> {
3236         // Retrieve the PCI bus.
3237         let pci = if let Some(pci_bus) = &self.pci_bus {
3238             Arc::clone(pci_bus)
3239         } else {
3240             return Err(DeviceManagerError::NoPciBus);
3241         };
3242 
3243         // Convert the device ID into the corresponding b/d/f.
3244         let pci_device_bdf = (device_id as u32) << 3;
3245 
3246         // Give the PCI device ID back to the PCI bus.
3247         pci.lock()
3248             .unwrap()
3249             .put_device_id(device_id as usize)
3250             .map_err(DeviceManagerError::PutPciDeviceId)?;
3251 
3252         // Remove the device from the device tree along with its children.
3253         let mut device_tree = self.device_tree.lock().unwrap();
3254         let pci_device_node = device_tree
3255             .remove_node_by_pci_bdf(pci_device_bdf)
3256             .ok_or(DeviceManagerError::MissingPciDevice)?;
3257         for child in pci_device_node.children.iter() {
3258             device_tree.remove(child);
3259         }
3260 
3261         let pci_device_handle = pci_device_node
3262             .pci_device_handle
3263             .ok_or(DeviceManagerError::MissingPciDevice)?;
3264         let (pci_device, bus_device, virtio_device) = match pci_device_handle {
3265             #[cfg(feature = "kvm")]
3266             PciDeviceHandle::Vfio(vfio_pci_device) => {
3267                 {
3268                     // Unregister DMA mapping in IOMMU.
3269                     // Do not unregister the virtio-mem region, as it is
3270                     // directly handled by the virtio-mem device.
3271                     let dev = vfio_pci_device.lock().unwrap();
3272                     for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() {
3273                         for region in zone.regions() {
3274                             dev.dma_unmap(region.start_addr().raw_value(), region.len() as u64)
3275                                 .map_err(DeviceManagerError::VfioDmaUnmap)?;
3276                         }
3277                     }
3278 
3279                     // Unregister the VFIO mapping handler from all virtio-mem
3280                     // devices.
3281                     if !dev.iommu_attached() {
3282                         for virtio_mem_device in self.virtio_mem_devices.iter() {
3283                             virtio_mem_device
3284                                 .lock()
3285                                 .unwrap()
3286                                 .remove_dma_mapping_handler(pci_device_bdf)
3287                                 .map_err(DeviceManagerError::RemoveDmaMappingHandlerVirtioMem)?;
3288                         }
3289                     }
3290                 }
3291 
3292                 (
3293                     Arc::clone(&vfio_pci_device) as Arc<Mutex<dyn PciDevice>>,
3294                     Arc::clone(&vfio_pci_device) as Arc<Mutex<dyn BusDevice>>,
3295                     None as Option<VirtioDeviceArc>,
3296                 )
3297             }
3298             PciDeviceHandle::Virtio(virtio_pci_device) => {
3299                 let bar_addr = virtio_pci_device.lock().unwrap().config_bar_addr();
3300                 for (event, addr) in virtio_pci_device.lock().unwrap().ioeventfds(bar_addr) {
3301                     let io_addr = IoEventAddress::Mmio(addr);
3302                     self.address_manager
3303                         .vm
3304                         .unregister_ioevent(event, &io_addr)
3305                         .map_err(|e| DeviceManagerError::UnRegisterIoevent(e.into()))?;
3306                 }
3307 
3308                 (
3309                     Arc::clone(&virtio_pci_device) as Arc<Mutex<dyn PciDevice>>,
3310                     Arc::clone(&virtio_pci_device) as Arc<Mutex<dyn BusDevice>>,
3311                     Some(virtio_pci_device.lock().unwrap().virtio_device()),
3312                 )
3313             }
3314         };
3315 
3316         // Free the allocated BARs
3317         pci_device
3318             .lock()
3319             .unwrap()
3320             .free_bars(&mut self.address_manager.allocator.lock().unwrap())
3321             .map_err(DeviceManagerError::FreePciBars)?;
3322 
3323         // Remove the device from the PCI bus
3324         pci.lock()
3325             .unwrap()
3326             .remove_by_device(&pci_device)
3327             .map_err(DeviceManagerError::RemoveDeviceFromPciBus)?;
3328 
3329         #[cfg(target_arch = "x86_64")]
3330         // Remove the device from the IO bus
3331         self.io_bus()
3332             .remove_by_device(&bus_device)
3333             .map_err(DeviceManagerError::RemoveDeviceFromIoBus)?;
3334 
3335         // Remove the device from the MMIO bus
3336         self.mmio_bus()
3337             .remove_by_device(&bus_device)
3338             .map_err(DeviceManagerError::RemoveDeviceFromMmioBus)?;
3339 
3340         // Remove the device from the list of BusDevice held by the
3341         // DeviceManager.
3342         self.bus_devices
3343             .retain(|dev| !Arc::ptr_eq(dev, &bus_device));
3344 
3345         // Shutdown and remove the underlying virtio-device if present
3346         if let Some(virtio_device) = virtio_device {
3347             for mapping in virtio_device.lock().unwrap().userspace_mappings() {
3348                 self.memory_manager
3349                     .lock()
3350                     .unwrap()
3351                     .remove_userspace_mapping(
3352                         mapping.addr.raw_value(),
3353                         mapping.len,
3354                         mapping.host_addr,
3355                         mapping.mergeable,
3356                         mapping.mem_slot,
3357                     )
3358                     .map_err(DeviceManagerError::MemoryManager)?;
3359             }
3360 
3361             virtio_device.lock().unwrap().shutdown();
3362 
3363             self.virtio_devices
3364                 .retain(|(d, _, _)| !Arc::ptr_eq(d, &virtio_device));
3365         }
3366 
3367         // At this point, the device has been removed from all the list and
3368         // buses where it was stored. At the end of this function, after
3369         // any_device, bus_device and pci_device are released, the actual
3370         // device will be dropped.
3371         Ok(())
3372     }
3373 
3374     fn hotplug_virtio_pci_device(
3375         &mut self,
3376         device: VirtioDeviceArc,
3377         iommu_attached: bool,
3378         id: String,
3379     ) -> DeviceManagerResult<PciDeviceInfo> {
3380         if iommu_attached {
3381             warn!("Placing device behind vIOMMU is not available for hotplugged devices");
3382         }
3383 
3384         let pci = if let Some(pci_bus) = &self.pci_bus {
3385             Arc::clone(pci_bus)
3386         } else {
3387             return Err(DeviceManagerError::NoPciBus);
3388         };
3389 
3390         // Add the virtio device to the device manager list. This is important
3391         // as the list is used to notify virtio devices about memory updates
3392         // for instance.
3393         self.virtio_devices
3394             .push((device.clone(), iommu_attached, id.clone()));
3395 
3396         let device_id =
3397             self.add_virtio_pci_device(device, &mut pci.lock().unwrap(), &None, id.clone())?;
3398 
3399         // Update the PCIU bitmap
3400         self.pci_devices_up |= 1 << (device_id >> 3);
3401 
3402         Ok(PciDeviceInfo { id, bdf: device_id })
3403     }
3404 
3405     pub fn add_disk(&mut self, disk_cfg: &mut DiskConfig) -> DeviceManagerResult<PciDeviceInfo> {
3406         let (device, iommu_attached, id) = self.make_virtio_block_device(disk_cfg)?;
3407         self.hotplug_virtio_pci_device(device, iommu_attached, id)
3408     }
3409 
3410     pub fn add_fs(&mut self, fs_cfg: &mut FsConfig) -> DeviceManagerResult<PciDeviceInfo> {
3411         let (device, iommu_attached, id) = self.make_virtio_fs_device(fs_cfg)?;
3412         self.hotplug_virtio_pci_device(device, iommu_attached, id)
3413     }
3414 
3415     pub fn add_pmem(&mut self, pmem_cfg: &mut PmemConfig) -> DeviceManagerResult<PciDeviceInfo> {
3416         let (device, iommu_attached, id) = self.make_virtio_pmem_device(pmem_cfg)?;
3417         self.hotplug_virtio_pci_device(device, iommu_attached, id)
3418     }
3419 
3420     pub fn add_net(&mut self, net_cfg: &mut NetConfig) -> DeviceManagerResult<PciDeviceInfo> {
3421         let (device, iommu_attached, id) = self.make_virtio_net_device(net_cfg)?;
3422         self.hotplug_virtio_pci_device(device, iommu_attached, id)
3423     }
3424 
3425     pub fn add_vsock(&mut self, vsock_cfg: &mut VsockConfig) -> DeviceManagerResult<PciDeviceInfo> {
3426         let (device, iommu_attached, id) = self.make_virtio_vsock_device(vsock_cfg)?;
3427         self.hotplug_virtio_pci_device(device, iommu_attached, id)
3428     }
3429 
3430     pub fn counters(&self) -> HashMap<String, HashMap<&'static str, Wrapping<u64>>> {
3431         let mut counters = HashMap::new();
3432 
3433         for (virtio_device, _, id) in &self.virtio_devices {
3434             let virtio_device = virtio_device.lock().unwrap();
3435             if let Some(device_counters) = virtio_device.counters() {
3436                 counters.insert(id.clone(), device_counters.clone());
3437             }
3438         }
3439 
3440         counters
3441     }
3442 
3443     pub fn resize_balloon(&mut self, size: u64) -> DeviceManagerResult<()> {
3444         if let Some(balloon) = &self.balloon {
3445             return balloon
3446                 .lock()
3447                 .unwrap()
3448                 .resize(size)
3449                 .map_err(DeviceManagerError::VirtioBalloonResize);
3450         }
3451 
3452         warn!("No balloon setup: Can't resize the balloon");
3453         Err(DeviceManagerError::MissingVirtioBalloon)
3454     }
3455 
3456     pub fn balloon_size(&self) -> u64 {
3457         if let Some(balloon) = &self.balloon {
3458             return balloon.lock().unwrap().get_actual();
3459         }
3460 
3461         0
3462     }
3463 
3464     pub fn device_tree(&self) -> Arc<Mutex<DeviceTree>> {
3465         self.device_tree.clone()
3466     }
3467 
3468     pub fn restore_devices(
3469         &mut self,
3470         snapshot: Snapshot,
3471     ) -> std::result::Result<(), MigratableError> {
3472         // Finally, restore all devices associated with the DeviceManager.
3473         // It's important to restore devices in the right order, that's why
3474         // the device tree is the right way to ensure we restore a child before
3475         // its parent node.
3476         for node in self
3477             .device_tree
3478             .lock()
3479             .unwrap()
3480             .breadth_first_traversal()
3481             .rev()
3482         {
3483             // Restore the node
3484             if let Some(migratable) = &node.migratable {
3485                 debug!("Restoring {} from DeviceManager", node.id);
3486                 if let Some(snapshot) = snapshot.snapshots.get(&node.id) {
3487                     migratable.lock().unwrap().pause()?;
3488                     migratable.lock().unwrap().restore(*snapshot.clone())?;
3489                 } else {
3490                     return Err(MigratableError::Restore(anyhow!(
3491                         "Missing device {}",
3492                         node.id
3493                     )));
3494                 }
3495             }
3496         }
3497 
3498         Ok(())
3499     }
3500 
3501     #[cfg(feature = "acpi")]
3502     #[cfg(target_arch = "x86_64")]
3503     pub fn notify_power_button(&self) -> DeviceManagerResult<()> {
3504         self.ged_notification_device
3505             .as_ref()
3506             .unwrap()
3507             .lock()
3508             .unwrap()
3509             .notify(AcpiNotificationFlags::POWER_BUTTON_CHANGED)
3510             .map_err(DeviceManagerError::PowerButtonNotification)
3511     }
3512 
3513     #[cfg(target_arch = "aarch64")]
3514     pub fn notify_power_button(&self) -> DeviceManagerResult<()> {
3515         self.gpio_device
3516             .as_ref()
3517             .unwrap()
3518             .lock()
3519             .unwrap()
3520             .trigger_key(3)
3521             .map_err(DeviceManagerError::AArch64PowerButtonNotification)
3522     }
3523 
3524     pub fn iommu_attached_devices(&self) -> &Option<(u32, Vec<u32>)> {
3525         &self.iommu_attached_devices
3526     }
3527 }
3528 
3529 #[cfg(feature = "acpi")]
3530 fn numa_node_id_from_memory_zone_id(numa_nodes: &NumaNodes, memory_zone_id: &str) -> Option<u32> {
3531     for (numa_node_id, numa_node) in numa_nodes.iter() {
3532         if numa_node
3533             .memory_zones()
3534             .contains(&memory_zone_id.to_owned())
3535         {
3536             return Some(*numa_node_id);
3537         }
3538     }
3539 
3540     None
3541 }
3542 
3543 #[cfg(feature = "acpi")]
3544 struct PciDevSlot {
3545     device_id: u8,
3546 }
3547 
3548 #[cfg(feature = "acpi")]
3549 impl Aml for PciDevSlot {
3550     fn to_aml_bytes(&self) -> Vec<u8> {
3551         let sun = self.device_id;
3552         let adr: u32 = (self.device_id as u32) << 16;
3553         aml::Device::new(
3554             format!("S{:03}", self.device_id).as_str().into(),
3555             vec![
3556                 &aml::Name::new("_SUN".into(), &sun),
3557                 &aml::Name::new("_ADR".into(), &adr),
3558                 &aml::Method::new(
3559                     "_EJ0".into(),
3560                     1,
3561                     true,
3562                     vec![&aml::MethodCall::new(
3563                         "\\_SB_.PHPR.PCEJ".into(),
3564                         vec![&aml::Path::new("_SUN")],
3565                     )],
3566                 ),
3567             ],
3568         )
3569         .to_aml_bytes()
3570     }
3571 }
3572 
3573 #[cfg(feature = "acpi")]
3574 struct PciDevSlotNotify {
3575     device_id: u8,
3576 }
3577 
3578 #[cfg(feature = "acpi")]
3579 impl Aml for PciDevSlotNotify {
3580     fn to_aml_bytes(&self) -> Vec<u8> {
3581         let device_id_mask: u32 = 1 << self.device_id;
3582         let object = aml::Path::new(&format!("S{:03}", self.device_id));
3583         let mut bytes = aml::And::new(&aml::Local(0), &aml::Arg(0), &device_id_mask).to_aml_bytes();
3584         bytes.extend_from_slice(
3585             &aml::If::new(
3586                 &aml::Equal::new(&aml::Local(0), &device_id_mask),
3587                 vec![&aml::Notify::new(&object, &aml::Arg(1))],
3588             )
3589             .to_aml_bytes(),
3590         );
3591         bytes
3592     }
3593 }
3594 
3595 #[cfg(feature = "acpi")]
3596 struct PciDevSlotMethods {}
3597 
3598 #[cfg(feature = "acpi")]
3599 impl Aml for PciDevSlotMethods {
3600     fn to_aml_bytes(&self) -> Vec<u8> {
3601         let mut device_notifies = Vec::new();
3602         for device_id in 0..32 {
3603             device_notifies.push(PciDevSlotNotify { device_id });
3604         }
3605 
3606         let mut device_notifies_refs: Vec<&dyn aml::Aml> = Vec::new();
3607         for device_notify in device_notifies.iter() {
3608             device_notifies_refs.push(device_notify);
3609         }
3610 
3611         let mut bytes =
3612             aml::Method::new("DVNT".into(), 2, true, device_notifies_refs).to_aml_bytes();
3613 
3614         bytes.extend_from_slice(
3615             &aml::Method::new(
3616                 "PCNT".into(),
3617                 0,
3618                 true,
3619                 vec![
3620                     &aml::MethodCall::new(
3621                         "DVNT".into(),
3622                         vec![&aml::Path::new("\\_SB_.PHPR.PCIU"), &aml::ONE],
3623                     ),
3624                     &aml::MethodCall::new(
3625                         "DVNT".into(),
3626                         vec![&aml::Path::new("\\_SB_.PHPR.PCID"), &3usize],
3627                     ),
3628                 ],
3629             )
3630             .to_aml_bytes(),
3631         );
3632         bytes
3633     }
3634 }
3635 
3636 #[cfg(feature = "acpi")]
3637 struct PciDsmMethod {}
3638 
3639 #[cfg(feature = "acpi")]
3640 impl Aml for PciDsmMethod {
3641     fn to_aml_bytes(&self) -> Vec<u8> {
3642         // Refer to ACPI spec v6.3 Ch 9.1.1 and PCI Firmware spec v3.3 Ch 4.6.1
3643         // _DSM (Device Specific Method), the following is the implementation in ASL.
3644         /*
3645         Method (_DSM, 4, NotSerialized)  // _DSM: Device-Specific Method
3646         {
3647               If ((Arg0 == ToUUID ("e5c937d0-3553-4d7a-9117-ea4d19c3434d") /* Device Labeling Interface */))
3648               {
3649                   If ((Arg2 == Zero))
3650                   {
3651                       Return (Buffer (One) { 0x21 })
3652                   }
3653                   If ((Arg2 == 0x05))
3654                   {
3655                       Return (Zero)
3656                   }
3657               }
3658 
3659               Return (Buffer (One) { 0x00 })
3660         }
3661          */
3662         /*
3663          * As per ACPI v6.3 Ch 19.6.142, the UUID is required to be in mixed endian:
3664          * Among the fields of a UUID:
3665          *   {d1 (8 digits)} - {d2 (4 digits)} - {d3 (4 digits)} - {d4 (16 digits)}
3666          * d1 ~ d3 need to be little endian, d4 be big endian.
3667          * See https://en.wikipedia.org/wiki/Universally_unique_identifier#Encoding .
3668          */
3669         let uuid = Uuid::parse_str("E5C937D0-3553-4D7A-9117-EA4D19C3434D").unwrap();
3670         let (uuid_d1, uuid_d2, uuid_d3, uuid_d4) = uuid.as_fields();
3671         let mut uuid_buf = vec![];
3672         uuid_buf.extend(&uuid_d1.to_le_bytes());
3673         uuid_buf.extend(&uuid_d2.to_le_bytes());
3674         uuid_buf.extend(&uuid_d3.to_le_bytes());
3675         uuid_buf.extend(uuid_d4);
3676         aml::Method::new(
3677             "_DSM".into(),
3678             4,
3679             false,
3680             vec![
3681                 &aml::If::new(
3682                     &aml::Equal::new(&aml::Arg(0), &aml::Buffer::new(uuid_buf)),
3683                     vec![
3684                         &aml::If::new(
3685                             &aml::Equal::new(&aml::Arg(2), &aml::ZERO),
3686                             vec![&aml::Return::new(&aml::Buffer::new(vec![0x21]))],
3687                         ),
3688                         &aml::If::new(
3689                             &aml::Equal::new(&aml::Arg(2), &0x05u8),
3690                             vec![&aml::Return::new(&aml::ZERO)],
3691                         ),
3692                     ],
3693                 ),
3694                 &aml::Return::new(&aml::Buffer::new(vec![0])),
3695             ],
3696         )
3697         .to_aml_bytes()
3698     }
3699 }
3700 
3701 #[cfg(feature = "acpi")]
3702 impl Aml for DeviceManager {
3703     fn to_aml_bytes(&self) -> Vec<u8> {
3704         #[cfg(target_arch = "aarch64")]
3705         use arch::aarch64::DeviceInfoForFdt;
3706 
3707         let mut bytes = Vec::new();
3708         // PCI hotplug controller
3709         bytes.extend_from_slice(
3710             &aml::Device::new(
3711                 "_SB_.PHPR".into(),
3712                 vec![
3713                     &aml::Name::new("_HID".into(), &aml::EisaName::new("PNP0A06")),
3714                     &aml::Name::new("_STA".into(), &0x0bu8),
3715                     &aml::Name::new("_UID".into(), &"PCI Hotplug Controller"),
3716                     &aml::Mutex::new("BLCK".into(), 0),
3717                     &aml::Name::new(
3718                         "_CRS".into(),
3719                         &aml::ResourceTemplate::new(vec![&aml::AddressSpace::new_memory(
3720                             aml::AddressSpaceCachable::NotCacheable,
3721                             true,
3722                             self.acpi_address.0 as u64,
3723                             self.acpi_address.0 + DEVICE_MANAGER_ACPI_SIZE as u64 - 1,
3724                         )]),
3725                     ),
3726                     // OpRegion and Fields map MMIO range into individual field values
3727                     &aml::OpRegion::new(
3728                         "PCST".into(),
3729                         aml::OpRegionSpace::SystemMemory,
3730                         self.acpi_address.0 as usize,
3731                         DEVICE_MANAGER_ACPI_SIZE,
3732                     ),
3733                     &aml::Field::new(
3734                         "PCST".into(),
3735                         aml::FieldAccessType::DWord,
3736                         aml::FieldUpdateRule::WriteAsZeroes,
3737                         vec![
3738                             aml::FieldEntry::Named(*b"PCIU", 32),
3739                             aml::FieldEntry::Named(*b"PCID", 32),
3740                             aml::FieldEntry::Named(*b"B0EJ", 32),
3741                         ],
3742                     ),
3743                     &aml::Method::new(
3744                         "PCEJ".into(),
3745                         1,
3746                         true,
3747                         vec![
3748                             // Take lock defined above
3749                             &aml::Acquire::new("BLCK".into(), 0xffff),
3750                             // Write PCI bus number (in first argument) to I/O port via field
3751                             &aml::ShiftLeft::new(&aml::Path::new("B0EJ"), &aml::ONE, &aml::Arg(0)),
3752                             // Release lock
3753                             &aml::Release::new("BLCK".into()),
3754                             // Return 0
3755                             &aml::Return::new(&aml::ZERO),
3756                         ],
3757                     ),
3758                 ],
3759             )
3760             .to_aml_bytes(),
3761         );
3762 
3763         let start_of_device_area = self.memory_manager.lock().unwrap().start_of_device_area().0;
3764         let end_of_device_area = self.memory_manager.lock().unwrap().end_of_device_area().0;
3765 
3766         let mut pci_dsdt_inner_data: Vec<&dyn aml::Aml> = Vec::new();
3767         let hid = aml::Name::new("_HID".into(), &aml::EisaName::new("PNP0A08"));
3768         pci_dsdt_inner_data.push(&hid);
3769         let cid = aml::Name::new("_CID".into(), &aml::EisaName::new("PNP0A03"));
3770         pci_dsdt_inner_data.push(&cid);
3771         let adr = aml::Name::new("_ADR".into(), &aml::ZERO);
3772         pci_dsdt_inner_data.push(&adr);
3773         let seg = aml::Name::new("_SEG".into(), &aml::ZERO);
3774         pci_dsdt_inner_data.push(&seg);
3775         let uid = aml::Name::new("_UID".into(), &aml::ZERO);
3776         pci_dsdt_inner_data.push(&uid);
3777         let supp = aml::Name::new("SUPP".into(), &aml::ZERO);
3778         pci_dsdt_inner_data.push(&supp);
3779 
3780         // Since Cloud Hypervisor supports only one PCI bus, it can be tied
3781         // to the NUMA node 0. It's up to the user to organize the NUMA nodes
3782         // so that the PCI bus relates to the expected vCPUs and guest RAM.
3783         let proximity_domain = 0u32;
3784         let pxm_return = aml::Return::new(&proximity_domain);
3785         let pxm = aml::Method::new("_PXM".into(), 0, false, vec![&pxm_return]);
3786         pci_dsdt_inner_data.push(&pxm);
3787 
3788         let pci_dsm = PciDsmMethod {};
3789         pci_dsdt_inner_data.push(&pci_dsm);
3790 
3791         let crs = aml::Name::new(
3792             "_CRS".into(),
3793             &aml::ResourceTemplate::new(vec![
3794                 &aml::AddressSpace::new_bus_number(0x0u16, 0xffu16),
3795                 #[cfg(target_arch = "x86_64")]
3796                 &aml::Io::new(0xcf8, 0xcf8, 1, 0x8),
3797                 #[cfg(target_arch = "aarch64")]
3798                 &aml::Memory32Fixed::new(
3799                     true,
3800                     layout::PCI_MMCONFIG_START.0 as u32,
3801                     layout::PCI_MMCONFIG_SIZE as u32,
3802                 ),
3803                 &aml::AddressSpace::new_memory(
3804                     aml::AddressSpaceCachable::NotCacheable,
3805                     true,
3806                     layout::MEM_32BIT_DEVICES_START.0 as u32,
3807                     (layout::MEM_32BIT_DEVICES_START.0 + layout::MEM_32BIT_DEVICES_SIZE - 1) as u32,
3808                 ),
3809                 &aml::AddressSpace::new_memory(
3810                     aml::AddressSpaceCachable::NotCacheable,
3811                     true,
3812                     start_of_device_area,
3813                     end_of_device_area,
3814                 ),
3815                 #[cfg(target_arch = "x86_64")]
3816                 &aml::AddressSpace::new_io(0u16, 0x0cf7u16),
3817                 #[cfg(target_arch = "x86_64")]
3818                 &aml::AddressSpace::new_io(0x0d00u16, 0xffffu16),
3819             ]),
3820         );
3821         pci_dsdt_inner_data.push(&crs);
3822 
3823         let mut pci_devices = Vec::new();
3824         for device_id in 0..32 {
3825             let pci_device = PciDevSlot { device_id };
3826             pci_devices.push(pci_device);
3827         }
3828         for pci_device in pci_devices.iter() {
3829             pci_dsdt_inner_data.push(pci_device);
3830         }
3831 
3832         let pci_device_methods = PciDevSlotMethods {};
3833         pci_dsdt_inner_data.push(&pci_device_methods);
3834 
3835         // Build PCI routing table, listing IRQs assigned to PCI devices.
3836         let prt_package_list: Vec<(u32, u32)> = self
3837             .pci_irq_slots
3838             .iter()
3839             .enumerate()
3840             .map(|(i, irq)| (((((i as u32) & 0x1fu32) << 16) | 0xffffu32), *irq as u32))
3841             .collect();
3842         let prt_package_list: Vec<aml::Package> = prt_package_list
3843             .iter()
3844             .map(|(bdf, irq)| aml::Package::new(vec![bdf, &0u8, &0u8, irq]))
3845             .collect();
3846         let prt_package_list: Vec<&dyn Aml> = prt_package_list
3847             .iter()
3848             .map(|item| item as &dyn Aml)
3849             .collect();
3850         let prt = aml::Name::new("_PRT".into(), &aml::Package::new(prt_package_list));
3851         pci_dsdt_inner_data.push(&prt);
3852 
3853         let pci_dsdt_data =
3854             aml::Device::new("_SB_.PCI0".into(), pci_dsdt_inner_data).to_aml_bytes();
3855 
3856         let mbrd_dsdt_data = aml::Device::new(
3857             "_SB_.MBRD".into(),
3858             vec![
3859                 &aml::Name::new("_HID".into(), &aml::EisaName::new("PNP0C02")),
3860                 &aml::Name::new("_UID".into(), &aml::ZERO),
3861                 &aml::Name::new(
3862                     "_CRS".into(),
3863                     &aml::ResourceTemplate::new(vec![&aml::Memory32Fixed::new(
3864                         true,
3865                         layout::PCI_MMCONFIG_START.0 as u32,
3866                         layout::PCI_MMCONFIG_SIZE as u32,
3867                     )]),
3868                 ),
3869             ],
3870         )
3871         .to_aml_bytes();
3872 
3873         // Serial device
3874         #[cfg(target_arch = "x86_64")]
3875         let serial_irq = 4;
3876         #[cfg(target_arch = "aarch64")]
3877         let serial_irq =
3878             if self.config.lock().unwrap().serial.clone().mode != ConsoleOutputMode::Off {
3879                 self.get_device_info()
3880                     .clone()
3881                     .get(&(DeviceType::Serial, DeviceType::Serial.to_string()))
3882                     .unwrap()
3883                     .irq()
3884             } else {
3885                 // If serial is turned off, add a fake device with invalid irq.
3886                 31
3887             };
3888         let com1_dsdt_data = aml::Device::new(
3889             "_SB_.COM1".into(),
3890             vec![
3891                 &aml::Name::new(
3892                     "_HID".into(),
3893                     #[cfg(target_arch = "x86_64")]
3894                     &aml::EisaName::new("PNP0501"),
3895                     #[cfg(target_arch = "aarch64")]
3896                     &"ARMH0011",
3897                 ),
3898                 &aml::Name::new("_UID".into(), &aml::ZERO),
3899                 &aml::Name::new(
3900                     "_CRS".into(),
3901                     &aml::ResourceTemplate::new(vec![
3902                         &aml::Interrupt::new(true, true, false, false, serial_irq),
3903                         #[cfg(target_arch = "x86_64")]
3904                         &aml::Io::new(0x3f8, 0x3f8, 0, 0x8),
3905                         #[cfg(target_arch = "aarch64")]
3906                         &aml::Memory32Fixed::new(
3907                             true,
3908                             arch::layout::LEGACY_SERIAL_MAPPED_IO_START as u32,
3909                             MMIO_LEN as u32,
3910                         ),
3911                     ]),
3912                 ),
3913             ],
3914         )
3915         .to_aml_bytes();
3916 
3917         let s5_sleep_data =
3918             aml::Name::new("_S5_".into(), &aml::Package::new(vec![&5u8])).to_aml_bytes();
3919 
3920         let power_button_dsdt_data = aml::Device::new(
3921             "_SB_.PWRB".into(),
3922             vec![
3923                 &aml::Name::new("_HID".into(), &aml::EisaName::new("PNP0C0C")),
3924                 &aml::Name::new("_UID".into(), &aml::ZERO),
3925             ],
3926         )
3927         .to_aml_bytes();
3928 
3929         let ged_data = self
3930             .ged_notification_device
3931             .as_ref()
3932             .unwrap()
3933             .lock()
3934             .unwrap()
3935             .to_aml_bytes();
3936 
3937         bytes.extend_from_slice(pci_dsdt_data.as_slice());
3938         bytes.extend_from_slice(mbrd_dsdt_data.as_slice());
3939         if self.config.lock().unwrap().serial.mode != ConsoleOutputMode::Off {
3940             bytes.extend_from_slice(com1_dsdt_data.as_slice());
3941         }
3942         bytes.extend_from_slice(s5_sleep_data.as_slice());
3943         bytes.extend_from_slice(power_button_dsdt_data.as_slice());
3944         bytes.extend_from_slice(ged_data.as_slice());
3945         bytes
3946     }
3947 }
3948 
3949 impl Pausable for DeviceManager {
3950     fn pause(&mut self) -> result::Result<(), MigratableError> {
3951         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
3952             if let Some(migratable) = &device_node.migratable {
3953                 migratable.lock().unwrap().pause()?;
3954             }
3955         }
3956 
3957         Ok(())
3958     }
3959 
3960     fn resume(&mut self) -> result::Result<(), MigratableError> {
3961         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
3962             if let Some(migratable) = &device_node.migratable {
3963                 migratable.lock().unwrap().resume()?;
3964             }
3965         }
3966 
3967         Ok(())
3968     }
3969 }
3970 
3971 impl Snapshottable for DeviceManager {
3972     fn id(&self) -> String {
3973         DEVICE_MANAGER_SNAPSHOT_ID.to_string()
3974     }
3975 
3976     fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> {
3977         let mut snapshot = Snapshot::new(DEVICE_MANAGER_SNAPSHOT_ID);
3978 
3979         // We aggregate all devices snapshots.
3980         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
3981             if let Some(migratable) = &device_node.migratable {
3982                 let device_snapshot = migratable.lock().unwrap().snapshot()?;
3983                 snapshot.add_snapshot(device_snapshot);
3984             }
3985         }
3986 
3987         // Then we store the DeviceManager state.
3988         snapshot.add_data_section(SnapshotDataSection::new_from_state(
3989             DEVICE_MANAGER_SNAPSHOT_ID,
3990             &self.state(),
3991         )?);
3992 
3993         Ok(snapshot)
3994     }
3995 
3996     fn restore(&mut self, snapshot: Snapshot) -> std::result::Result<(), MigratableError> {
3997         // Let's first restore the DeviceManager.
3998 
3999         self.set_state(&snapshot.to_state(DEVICE_MANAGER_SNAPSHOT_ID)?);
4000 
4001         // Now that DeviceManager is updated with the right states, it's time
4002         // to create the devices based on the configuration.
4003         self.create_devices(None, None)
4004             .map_err(|e| MigratableError::Restore(anyhow!("Could not create devices {:?}", e)))?;
4005 
4006         Ok(())
4007     }
4008 }
4009 
4010 impl Transportable for DeviceManager {}
4011 
4012 impl Migratable for DeviceManager {
4013     fn start_dirty_log(&mut self) -> std::result::Result<(), MigratableError> {
4014         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
4015             if let Some(migratable) = &device_node.migratable {
4016                 migratable.lock().unwrap().start_dirty_log()?;
4017             }
4018         }
4019         Ok(())
4020     }
4021 
4022     fn stop_dirty_log(&mut self) -> std::result::Result<(), MigratableError> {
4023         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
4024             if let Some(migratable) = &device_node.migratable {
4025                 migratable.lock().unwrap().stop_dirty_log()?;
4026             }
4027         }
4028         Ok(())
4029     }
4030 
4031     fn dirty_log(&mut self) -> std::result::Result<MemoryRangeTable, MigratableError> {
4032         let mut tables = Vec::new();
4033         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
4034             if let Some(migratable) = &device_node.migratable {
4035                 tables.push(migratable.lock().unwrap().dirty_log()?);
4036             }
4037         }
4038         Ok(MemoryRangeTable::new_from_tables(tables))
4039     }
4040 }
4041 
4042 const PCIU_FIELD_OFFSET: u64 = 0;
4043 const PCID_FIELD_OFFSET: u64 = 4;
4044 const B0EJ_FIELD_OFFSET: u64 = 8;
4045 
4046 const PCIU_FIELD_SIZE: usize = 4;
4047 const PCID_FIELD_SIZE: usize = 4;
4048 const B0EJ_FIELD_SIZE: usize = 4;
4049 
4050 impl BusDevice for DeviceManager {
4051     fn read(&mut self, base: u64, offset: u64, data: &mut [u8]) {
4052         match offset {
4053             PCIU_FIELD_OFFSET => {
4054                 assert!(data.len() == PCIU_FIELD_SIZE);
4055                 data.copy_from_slice(&self.pci_devices_up.to_le_bytes());
4056                 // Clear the PCIU bitmap
4057                 self.pci_devices_up = 0;
4058             }
4059             PCID_FIELD_OFFSET => {
4060                 assert!(data.len() == PCID_FIELD_SIZE);
4061                 data.copy_from_slice(&self.pci_devices_down.to_le_bytes());
4062                 // Clear the PCID bitmap
4063                 self.pci_devices_down = 0;
4064             }
4065             B0EJ_FIELD_OFFSET => {
4066                 assert!(data.len() == B0EJ_FIELD_SIZE);
4067                 // Always return an empty bitmap since the eject is always
4068                 // taken care of right away during a write access.
4069                 data.copy_from_slice(&[0, 0, 0, 0]);
4070             }
4071             _ => error!(
4072                 "Accessing unknown location at base 0x{:x}, offset 0x{:x}",
4073                 base, offset
4074             ),
4075         }
4076 
4077         debug!(
4078             "PCI_HP_REG_R: base 0x{:x}, offset 0x{:x}, data {:?}",
4079             base, offset, data
4080         )
4081     }
4082 
4083     fn write(&mut self, base: u64, offset: u64, data: &[u8]) -> Option<Arc<Barrier>> {
4084         match offset {
4085             B0EJ_FIELD_OFFSET => {
4086                 assert!(data.len() == B0EJ_FIELD_SIZE);
4087                 let mut data_array: [u8; 4] = [0, 0, 0, 0];
4088                 data_array.copy_from_slice(data);
4089                 let device_bitmap = u32::from_le_bytes(data_array);
4090 
4091                 for device_id in 0..32 {
4092                     let mask = 1u32 << device_id;
4093                     if (device_bitmap & mask) == mask {
4094                         if let Err(e) = self.eject_device(device_id as u8) {
4095                             error!("Failed ejecting device {}: {:?}", device_id, e);
4096                         }
4097                     }
4098                 }
4099             }
4100             _ => error!(
4101                 "Accessing unknown location at base 0x{:x}, offset 0x{:x}",
4102                 base, offset
4103             ),
4104         }
4105 
4106         debug!(
4107             "PCI_HP_REG_W: base 0x{:x}, offset 0x{:x}, data {:?}",
4108             base, offset, data
4109         );
4110 
4111         None
4112     }
4113 }
4114 
4115 impl Drop for DeviceManager {
4116     fn drop(&mut self) {
4117         for (device, _, _) in self.virtio_devices.drain(..) {
4118             device.lock().unwrap().shutdown();
4119         }
4120     }
4121 }
4122