xref: /cloud-hypervisor/vmm/src/device_manager.rs (revision eea9bcea38e0c5649f444c829f3a4f9c22aa486c)
1 // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
2 //
3 // Portions Copyright 2017 The Chromium OS Authors. All rights reserved.
4 // Use of this source code is governed by a BSD-style license that can be
5 // found in the LICENSE-BSD-3-Clause file.
6 //
7 // Copyright © 2019 Intel Corporation
8 //
9 // SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause
10 //
11 
12 use crate::config::{
13     ConsoleOutputMode, DeviceConfig, DiskConfig, FsConfig, NetConfig, PmemConfig, UserDeviceConfig,
14     VdpaConfig, VhostMode, VmConfig, VsockConfig,
15 };
16 use crate::device_tree::{DeviceNode, DeviceTree};
17 use crate::interrupt::LegacyUserspaceInterruptManager;
18 use crate::interrupt::MsiInterruptManager;
19 use crate::memory_manager::{Error as MemoryManagerError, MemoryManager, MEMORY_MANAGER_ACPI_SIZE};
20 use crate::pci_segment::PciSegment;
21 use crate::seccomp_filters::{get_seccomp_filter, Thread};
22 use crate::serial_manager::{Error as SerialManagerError, SerialManager};
23 use crate::sigwinch_listener::start_sigwinch_listener;
24 use crate::GuestRegionMmap;
25 use crate::PciDeviceInfo;
26 use crate::{device_node, DEVICE_MANAGER_SNAPSHOT_ID};
27 use acpi_tables::sdt::GenericAddress;
28 use acpi_tables::{aml, aml::Aml};
29 use anyhow::anyhow;
30 use arch::layout;
31 #[cfg(target_arch = "x86_64")]
32 use arch::layout::{APIC_START, IOAPIC_SIZE, IOAPIC_START};
33 use arch::NumaNodes;
34 #[cfg(target_arch = "aarch64")]
35 use arch::{DeviceType, MmioDeviceInfo};
36 use block_util::{
37     async_io::DiskFile, block_io_uring_is_supported, detect_image_type,
38     fixed_vhd_async::FixedVhdDiskAsync, fixed_vhd_sync::FixedVhdDiskSync, qcow_sync::QcowDiskSync,
39     raw_async::RawFileDisk, raw_sync::RawFileDiskSync, vhdx_sync::VhdxDiskSync, ImageType,
40 };
41 #[cfg(target_arch = "aarch64")]
42 use devices::gic;
43 #[cfg(target_arch = "x86_64")]
44 use devices::ioapic;
45 #[cfg(target_arch = "aarch64")]
46 use devices::legacy::Pl011;
47 #[cfg(target_arch = "x86_64")]
48 use devices::legacy::Serial;
49 use devices::{
50     interrupt_controller, interrupt_controller::InterruptController, AcpiNotificationFlags,
51 };
52 use hypervisor::{HypervisorType, IoEventAddress};
53 use libc::{
54     cfmakeraw, isatty, tcgetattr, tcsetattr, termios, MAP_NORESERVE, MAP_PRIVATE, MAP_SHARED,
55     O_TMPFILE, PROT_READ, PROT_WRITE, TCSANOW,
56 };
57 #[cfg(target_arch = "x86_64")]
58 use pci::PciConfigIo;
59 use pci::{
60     DeviceRelocation, PciBarRegionType, PciBdf, PciDevice, VfioPciDevice, VfioUserDmaMapping,
61     VfioUserPciDevice, VfioUserPciDeviceError,
62 };
63 use seccompiler::SeccompAction;
64 use serde::{Deserialize, Serialize};
65 use std::collections::{BTreeSet, HashMap};
66 use std::convert::TryInto;
67 use std::fs::{read_link, File, OpenOptions};
68 use std::io::{self, stdout, Seek, SeekFrom};
69 use std::mem::zeroed;
70 use std::num::Wrapping;
71 use std::os::unix::fs::OpenOptionsExt;
72 use std::os::unix::io::{AsRawFd, FromRawFd, RawFd};
73 use std::path::PathBuf;
74 use std::result;
75 use std::sync::{Arc, Mutex};
76 use std::time::Instant;
77 use tracer::trace_scoped;
78 use vfio_ioctls::{VfioContainer, VfioDevice, VfioDeviceFd};
79 use virtio_devices::transport::VirtioTransport;
80 use virtio_devices::transport::{VirtioPciDevice, VirtioPciDeviceActivator};
81 use virtio_devices::vhost_user::VhostUserConfig;
82 use virtio_devices::{
83     AccessPlatformMapping, ActivateError, VdpaDmaMapping, VirtioMemMappingSource,
84 };
85 use virtio_devices::{Endpoint, IommuMapping};
86 use vm_allocator::{AddressAllocator, SystemAllocator};
87 use vm_device::dma_mapping::vfio::VfioDmaMapping;
88 use vm_device::dma_mapping::ExternalDmaMapping;
89 use vm_device::interrupt::{
90     InterruptIndex, InterruptManager, LegacyIrqGroupConfig, MsiIrqGroupConfig,
91 };
92 use vm_device::{Bus, BusDevice, Resource};
93 use vm_memory::guest_memory::FileOffset;
94 use vm_memory::GuestMemoryRegion;
95 use vm_memory::{Address, GuestAddress, GuestUsize, MmapRegion};
96 #[cfg(target_arch = "x86_64")]
97 use vm_memory::{GuestAddressSpace, GuestMemory};
98 use vm_migration::{
99     protocol::MemoryRangeTable, Migratable, MigratableError, Pausable, Snapshot,
100     SnapshotDataSection, Snapshottable, Transportable,
101 };
102 use vm_virtio::AccessPlatform;
103 use vm_virtio::VirtioDeviceType;
104 use vmm_sys_util::eventfd::EventFd;
105 
106 #[cfg(target_arch = "aarch64")]
107 const MMIO_LEN: u64 = 0x1000;
108 
109 // Singleton devices / devices the user cannot name
110 #[cfg(target_arch = "x86_64")]
111 const IOAPIC_DEVICE_NAME: &str = "__ioapic";
112 const SERIAL_DEVICE_NAME: &str = "__serial";
113 #[cfg(target_arch = "aarch64")]
114 const GPIO_DEVICE_NAME: &str = "__gpio";
115 const RNG_DEVICE_NAME: &str = "__rng";
116 const IOMMU_DEVICE_NAME: &str = "__iommu";
117 const BALLOON_DEVICE_NAME: &str = "__balloon";
118 const CONSOLE_DEVICE_NAME: &str = "__console";
119 
120 // Devices that the user may name and for which we generate
121 // identifiers if the user doesn't give one
122 const DISK_DEVICE_NAME_PREFIX: &str = "_disk";
123 const FS_DEVICE_NAME_PREFIX: &str = "_fs";
124 const NET_DEVICE_NAME_PREFIX: &str = "_net";
125 const PMEM_DEVICE_NAME_PREFIX: &str = "_pmem";
126 const VDPA_DEVICE_NAME_PREFIX: &str = "_vdpa";
127 const VSOCK_DEVICE_NAME_PREFIX: &str = "_vsock";
128 const WATCHDOG_DEVICE_NAME: &str = "__watchdog";
129 const VFIO_DEVICE_NAME_PREFIX: &str = "_vfio";
130 const VFIO_USER_DEVICE_NAME_PREFIX: &str = "_vfio_user";
131 const VIRTIO_PCI_DEVICE_NAME_PREFIX: &str = "_virtio-pci";
132 
133 /// Errors associated with device manager
134 #[derive(Debug)]
135 pub enum DeviceManagerError {
136     /// Cannot create EventFd.
137     EventFd(io::Error),
138 
139     /// Cannot open disk path
140     Disk(io::Error),
141 
142     /// Cannot create vhost-user-net device
143     CreateVhostUserNet(virtio_devices::vhost_user::Error),
144 
145     /// Cannot create virtio-blk device
146     CreateVirtioBlock(io::Error),
147 
148     /// Cannot create virtio-net device
149     CreateVirtioNet(virtio_devices::net::Error),
150 
151     /// Cannot create virtio-console device
152     CreateVirtioConsole(io::Error),
153 
154     /// Cannot create virtio-rng device
155     CreateVirtioRng(io::Error),
156 
157     /// Cannot create virtio-fs device
158     CreateVirtioFs(virtio_devices::vhost_user::Error),
159 
160     /// Virtio-fs device was created without a socket.
161     NoVirtioFsSock,
162 
163     /// Cannot create vhost-user-blk device
164     CreateVhostUserBlk(virtio_devices::vhost_user::Error),
165 
166     /// Cannot create virtio-pmem device
167     CreateVirtioPmem(io::Error),
168 
169     /// Cannot create vDPA device
170     CreateVdpa(virtio_devices::vdpa::Error),
171 
172     /// Cannot create virtio-vsock device
173     CreateVirtioVsock(io::Error),
174 
175     /// Failed to convert Path to &str for the vDPA device.
176     CreateVdpaConvertPath,
177 
178     /// Failed to convert Path to &str for the virtio-vsock device.
179     CreateVsockConvertPath,
180 
181     /// Cannot create virtio-vsock backend
182     CreateVsockBackend(virtio_devices::vsock::VsockUnixError),
183 
184     /// Cannot create virtio-iommu device
185     CreateVirtioIommu(io::Error),
186 
187     /// Cannot create virtio-balloon device
188     CreateVirtioBalloon(io::Error),
189 
190     /// Cannot create virtio-watchdog device
191     CreateVirtioWatchdog(io::Error),
192 
193     /// Failed to parse disk image format
194     DetectImageType(io::Error),
195 
196     /// Cannot open qcow disk path
197     QcowDeviceCreate(qcow::Error),
198 
199     /// Cannot create serial manager
200     CreateSerialManager(SerialManagerError),
201 
202     /// Cannot spawn the serial manager thread
203     SpawnSerialManager(SerialManagerError),
204 
205     /// Cannot open tap interface
206     OpenTap(net_util::TapError),
207 
208     /// Cannot allocate IRQ.
209     AllocateIrq,
210 
211     /// Cannot configure the IRQ.
212     Irq(vmm_sys_util::errno::Error),
213 
214     /// Cannot allocate PCI BARs
215     AllocateBars(pci::PciDeviceError),
216 
217     /// Could not free the BARs associated with a PCI device.
218     FreePciBars(pci::PciDeviceError),
219 
220     /// Cannot register ioevent.
221     RegisterIoevent(anyhow::Error),
222 
223     /// Cannot unregister ioevent.
224     UnRegisterIoevent(anyhow::Error),
225 
226     /// Cannot create virtio device
227     VirtioDevice(vmm_sys_util::errno::Error),
228 
229     /// Cannot add PCI device
230     AddPciDevice(pci::PciRootError),
231 
232     /// Cannot open persistent memory file
233     PmemFileOpen(io::Error),
234 
235     /// Cannot set persistent memory file size
236     PmemFileSetLen(io::Error),
237 
238     /// Cannot find a memory range for persistent memory
239     PmemRangeAllocation,
240 
241     /// Cannot find a memory range for virtio-fs
242     FsRangeAllocation,
243 
244     /// Error creating serial output file
245     SerialOutputFileOpen(io::Error),
246 
247     /// Error creating console output file
248     ConsoleOutputFileOpen(io::Error),
249 
250     /// Error creating serial pty
251     SerialPtyOpen(io::Error),
252 
253     /// Error creating console pty
254     ConsolePtyOpen(io::Error),
255 
256     /// Error setting pty raw mode
257     SetPtyRaw(vmm_sys_util::errno::Error),
258 
259     /// Error getting pty peer
260     GetPtyPeer(vmm_sys_util::errno::Error),
261 
262     /// Cannot create a VFIO device
263     VfioCreate(vfio_ioctls::VfioError),
264 
265     /// Cannot create a VFIO PCI device
266     VfioPciCreate(pci::VfioPciError),
267 
268     /// Failed to map VFIO MMIO region.
269     VfioMapRegion(pci::VfioPciError),
270 
271     /// Failed to DMA map VFIO device.
272     VfioDmaMap(vfio_ioctls::VfioError),
273 
274     /// Failed to DMA unmap VFIO device.
275     VfioDmaUnmap(pci::VfioPciError),
276 
277     /// Failed to create the passthrough device.
278     CreatePassthroughDevice(anyhow::Error),
279 
280     /// Failed to memory map.
281     Mmap(io::Error),
282 
283     /// Cannot add legacy device to Bus.
284     BusError(vm_device::BusError),
285 
286     /// Failed to allocate IO port
287     AllocateIoPort,
288 
289     /// Failed to allocate MMIO address
290     AllocateMmioAddress,
291 
292     /// Failed to make hotplug notification
293     HotPlugNotification(io::Error),
294 
295     /// Error from a memory manager operation
296     MemoryManager(MemoryManagerError),
297 
298     /// Failed to create new interrupt source group.
299     CreateInterruptGroup(io::Error),
300 
301     /// Failed to update interrupt source group.
302     UpdateInterruptGroup(io::Error),
303 
304     /// Failed to create interrupt controller.
305     CreateInterruptController(interrupt_controller::Error),
306 
307     /// Failed to create a new MmapRegion instance.
308     NewMmapRegion(vm_memory::mmap::MmapRegionError),
309 
310     /// Failed to clone a File.
311     CloneFile(io::Error),
312 
313     /// Failed to create socket file
314     CreateSocketFile(io::Error),
315 
316     /// Failed to spawn the network backend
317     SpawnNetBackend(io::Error),
318 
319     /// Failed to spawn the block backend
320     SpawnBlockBackend(io::Error),
321 
322     /// Missing PCI bus.
323     NoPciBus,
324 
325     /// Could not find an available device name.
326     NoAvailableDeviceName,
327 
328     /// Missing PCI device.
329     MissingPciDevice,
330 
331     /// Failed to remove a PCI device from the PCI bus.
332     RemoveDeviceFromPciBus(pci::PciRootError),
333 
334     /// Failed to remove a bus device from the IO bus.
335     RemoveDeviceFromIoBus(vm_device::BusError),
336 
337     /// Failed to remove a bus device from the MMIO bus.
338     RemoveDeviceFromMmioBus(vm_device::BusError),
339 
340     /// Failed to find the device corresponding to a specific PCI b/d/f.
341     UnknownPciBdf(u32),
342 
343     /// Not allowed to remove this type of device from the VM.
344     RemovalNotAllowed(vm_virtio::VirtioDeviceType),
345 
346     /// Failed to find device corresponding to the given identifier.
347     UnknownDeviceId(String),
348 
349     /// Failed to find an available PCI device ID.
350     NextPciDeviceId(pci::PciRootError),
351 
352     /// Could not reserve the PCI device ID.
353     GetPciDeviceId(pci::PciRootError),
354 
355     /// Could not give the PCI device ID back.
356     PutPciDeviceId(pci::PciRootError),
357 
358     /// No disk path was specified when one was expected
359     NoDiskPath,
360 
361     /// Failed to update guest memory for virtio device.
362     UpdateMemoryForVirtioDevice(virtio_devices::Error),
363 
364     /// Cannot create virtio-mem device
365     CreateVirtioMem(io::Error),
366 
367     /// Cannot find a memory range for virtio-mem memory
368     VirtioMemRangeAllocation,
369 
370     /// Failed to update guest memory for VFIO PCI device.
371     UpdateMemoryForVfioPciDevice(vfio_ioctls::VfioError),
372 
373     /// Trying to use a directory for pmem but no size specified
374     PmemWithDirectorySizeMissing,
375 
376     /// Trying to use a size that is not multiple of 2MiB
377     PmemSizeNotAligned,
378 
379     /// Could not find the node in the device tree.
380     MissingNode,
381 
382     /// Resource was already found.
383     ResourceAlreadyExists,
384 
385     /// Expected resources for virtio-pmem could not be found.
386     MissingVirtioPmemResources,
387 
388     /// Missing PCI b/d/f from the DeviceNode.
389     MissingDeviceNodePciBdf,
390 
391     /// No support for device passthrough
392     NoDevicePassthroughSupport,
393 
394     /// Failed to resize virtio-balloon
395     VirtioBalloonResize(virtio_devices::balloon::Error),
396 
397     /// Missing virtio-balloon, can't proceed as expected.
398     MissingVirtioBalloon,
399 
400     /// Missing virtual IOMMU device
401     MissingVirtualIommu,
402 
403     /// Failed to do power button notification
404     PowerButtonNotification(io::Error),
405 
406     /// Failed to do AArch64 GPIO power button notification
407     #[cfg(target_arch = "aarch64")]
408     AArch64PowerButtonNotification(devices::legacy::GpioDeviceError),
409 
410     /// Failed to set O_DIRECT flag to file descriptor
411     SetDirectIo,
412 
413     /// Failed to create FixedVhdDiskAsync
414     CreateFixedVhdDiskAsync(io::Error),
415 
416     /// Failed to create FixedVhdDiskSync
417     CreateFixedVhdDiskSync(io::Error),
418 
419     /// Failed to create QcowDiskSync
420     CreateQcowDiskSync(qcow::Error),
421 
422     /// Failed to create FixedVhdxDiskSync
423     CreateFixedVhdxDiskSync(vhdx::vhdx::VhdxError),
424 
425     /// Failed to add DMA mapping handler to virtio-mem device.
426     AddDmaMappingHandlerVirtioMem(virtio_devices::mem::Error),
427 
428     /// Failed to remove DMA mapping handler from virtio-mem device.
429     RemoveDmaMappingHandlerVirtioMem(virtio_devices::mem::Error),
430 
431     /// Failed to create vfio-user client
432     VfioUserCreateClient(vfio_user::Error),
433 
434     /// Failed to create VFIO user device
435     VfioUserCreate(VfioUserPciDeviceError),
436 
437     /// Failed to map region from VFIO user device into guest
438     VfioUserMapRegion(VfioUserPciDeviceError),
439 
440     /// Failed to DMA map VFIO user device.
441     VfioUserDmaMap(VfioUserPciDeviceError),
442 
443     /// Failed to DMA unmap VFIO user device.
444     VfioUserDmaUnmap(VfioUserPciDeviceError),
445 
446     /// Failed to update memory mappings for VFIO user device
447     UpdateMemoryForVfioUserPciDevice(VfioUserPciDeviceError),
448 
449     /// Cannot duplicate file descriptor
450     DupFd(vmm_sys_util::errno::Error),
451 
452     /// Failed to DMA map virtio device.
453     VirtioDmaMap(std::io::Error),
454 
455     /// Failed to DMA unmap virtio device.
456     VirtioDmaUnmap(std::io::Error),
457 
458     /// Cannot hotplug device behind vIOMMU
459     InvalidIommuHotplug,
460 
461     /// Invalid identifier as it is not unique.
462     IdentifierNotUnique(String),
463 
464     /// Invalid identifier
465     InvalidIdentifier(String),
466 
467     /// Error activating virtio device
468     VirtioActivate(ActivateError),
469 }
470 pub type DeviceManagerResult<T> = result::Result<T, DeviceManagerError>;
471 
472 const DEVICE_MANAGER_ACPI_SIZE: usize = 0x10;
473 
474 const TIOCSPTLCK: libc::c_int = 0x4004_5431;
475 const TIOCGTPEER: libc::c_int = 0x5441;
476 
477 pub fn create_pty() -> io::Result<(File, File, PathBuf)> {
478     // Try to use /dev/pts/ptmx first then fall back to /dev/ptmx
479     // This is done to try and use the devpts filesystem that
480     // could be available for use in the process's namespace first.
481     // Ideally these are all the same file though but different
482     // kernels could have things setup differently.
483     // See https://www.kernel.org/doc/Documentation/filesystems/devpts.txt
484     // for further details.
485 
486     let custom_flags = libc::O_NONBLOCK;
487     let main = match OpenOptions::new()
488         .read(true)
489         .write(true)
490         .custom_flags(custom_flags)
491         .open("/dev/pts/ptmx")
492     {
493         Ok(f) => f,
494         _ => OpenOptions::new()
495             .read(true)
496             .write(true)
497             .custom_flags(custom_flags)
498             .open("/dev/ptmx")?,
499     };
500     let mut unlock: libc::c_ulong = 0;
501     // SAFETY: FFI call into libc, trivially safe
502     unsafe {
503         libc::ioctl(
504             main.as_raw_fd(),
505             TIOCSPTLCK.try_into().unwrap(),
506             &mut unlock,
507         )
508     };
509 
510     // SAFETY: FFI call into libc, trivally safe
511     let sub_fd = unsafe {
512         libc::ioctl(
513             main.as_raw_fd(),
514             TIOCGTPEER.try_into().unwrap(),
515             libc::O_NOCTTY | libc::O_RDWR,
516         )
517     };
518     if sub_fd == -1 {
519         return vmm_sys_util::errno::errno_result().map_err(|e| e.into());
520     }
521 
522     let proc_path = PathBuf::from(format!("/proc/self/fd/{}", sub_fd));
523     let path = read_link(proc_path)?;
524 
525     // SAFETY: sub_fd is checked to be valid before being wrapped in File
526     Ok((main, unsafe { File::from_raw_fd(sub_fd) }, path))
527 }
528 
529 #[derive(Default)]
530 pub struct Console {
531     console_resizer: Option<Arc<virtio_devices::ConsoleResizer>>,
532 }
533 
534 impl Console {
535     pub fn update_console_size(&self) {
536         if let Some(resizer) = self.console_resizer.as_ref() {
537             resizer.update_console_size()
538         }
539     }
540 }
541 
542 pub(crate) struct AddressManager {
543     pub(crate) allocator: Arc<Mutex<SystemAllocator>>,
544     #[cfg(target_arch = "x86_64")]
545     pub(crate) io_bus: Arc<Bus>,
546     pub(crate) mmio_bus: Arc<Bus>,
547     vm: Arc<dyn hypervisor::Vm>,
548     device_tree: Arc<Mutex<DeviceTree>>,
549     pci_mmio_allocators: Vec<Arc<Mutex<AddressAllocator>>>,
550 }
551 
552 impl DeviceRelocation for AddressManager {
553     fn move_bar(
554         &self,
555         old_base: u64,
556         new_base: u64,
557         len: u64,
558         pci_dev: &mut dyn PciDevice,
559         region_type: PciBarRegionType,
560     ) -> std::result::Result<(), std::io::Error> {
561         match region_type {
562             PciBarRegionType::IoRegion => {
563                 #[cfg(target_arch = "x86_64")]
564                 {
565                     // Update system allocator
566                     self.allocator
567                         .lock()
568                         .unwrap()
569                         .free_io_addresses(GuestAddress(old_base), len as GuestUsize);
570 
571                     self.allocator
572                         .lock()
573                         .unwrap()
574                         .allocate_io_addresses(
575                             Some(GuestAddress(new_base)),
576                             len as GuestUsize,
577                             None,
578                         )
579                         .ok_or_else(|| {
580                             io::Error::new(io::ErrorKind::Other, "failed allocating new IO range")
581                         })?;
582 
583                     // Update PIO bus
584                     self.io_bus
585                         .update_range(old_base, len, new_base, len)
586                         .map_err(|e| io::Error::new(io::ErrorKind::Other, e))?;
587                 }
588                 #[cfg(target_arch = "aarch64")]
589                 error!("I/O region is not supported");
590             }
591             PciBarRegionType::Memory32BitRegion | PciBarRegionType::Memory64BitRegion => {
592                 // Update system allocator
593                 if region_type == PciBarRegionType::Memory32BitRegion {
594                     self.allocator
595                         .lock()
596                         .unwrap()
597                         .free_mmio_hole_addresses(GuestAddress(old_base), len as GuestUsize);
598 
599                     self.allocator
600                         .lock()
601                         .unwrap()
602                         .allocate_mmio_hole_addresses(
603                             Some(GuestAddress(new_base)),
604                             len as GuestUsize,
605                             Some(len),
606                         )
607                         .ok_or_else(|| {
608                             io::Error::new(
609                                 io::ErrorKind::Other,
610                                 "failed allocating new 32 bits MMIO range",
611                             )
612                         })?;
613                 } else {
614                     // Find the specific allocator that this BAR was allocated from and use it for new one
615                     for allocator in &self.pci_mmio_allocators {
616                         let allocator_base = allocator.lock().unwrap().base();
617                         let allocator_end = allocator.lock().unwrap().end();
618 
619                         if old_base >= allocator_base.0 && old_base <= allocator_end.0 {
620                             allocator
621                                 .lock()
622                                 .unwrap()
623                                 .free(GuestAddress(old_base), len as GuestUsize);
624 
625                             allocator
626                                 .lock()
627                                 .unwrap()
628                                 .allocate(
629                                     Some(GuestAddress(new_base)),
630                                     len as GuestUsize,
631                                     Some(len),
632                                 )
633                                 .ok_or_else(|| {
634                                     io::Error::new(
635                                         io::ErrorKind::Other,
636                                         "failed allocating new 64 bits MMIO range",
637                                     )
638                                 })?;
639 
640                             break;
641                         }
642                     }
643                 }
644 
645                 // Update MMIO bus
646                 self.mmio_bus
647                     .update_range(old_base, len, new_base, len)
648                     .map_err(|e| io::Error::new(io::ErrorKind::Other, e))?;
649             }
650         }
651 
652         // Update the device_tree resources associated with the device
653         if let Some(id) = pci_dev.id() {
654             if let Some(node) = self.device_tree.lock().unwrap().get_mut(&id) {
655                 let mut resource_updated = false;
656                 for resource in node.resources.iter_mut() {
657                     if let Resource::PciBar { base, type_, .. } = resource {
658                         if PciBarRegionType::from(*type_) == region_type && *base == old_base {
659                             *base = new_base;
660                             resource_updated = true;
661                             break;
662                         }
663                     }
664                 }
665 
666                 if !resource_updated {
667                     return Err(io::Error::new(
668                         io::ErrorKind::Other,
669                         format!(
670                             "Couldn't find a resource with base 0x{:x} for device {}",
671                             old_base, id
672                         ),
673                     ));
674                 }
675             } else {
676                 return Err(io::Error::new(
677                     io::ErrorKind::Other,
678                     format!("Couldn't find device {} from device tree", id),
679                 ));
680             }
681         }
682 
683         let any_dev = pci_dev.as_any();
684         if let Some(virtio_pci_dev) = any_dev.downcast_ref::<VirtioPciDevice>() {
685             let bar_addr = virtio_pci_dev.config_bar_addr();
686             if bar_addr == new_base {
687                 for (event, addr) in virtio_pci_dev.ioeventfds(old_base) {
688                     let io_addr = IoEventAddress::Mmio(addr);
689                     self.vm.unregister_ioevent(event, &io_addr).map_err(|e| {
690                         io::Error::new(
691                             io::ErrorKind::Other,
692                             format!("failed to unregister ioevent: {:?}", e),
693                         )
694                     })?;
695                 }
696                 for (event, addr) in virtio_pci_dev.ioeventfds(new_base) {
697                     let io_addr = IoEventAddress::Mmio(addr);
698                     self.vm
699                         .register_ioevent(event, &io_addr, None)
700                         .map_err(|e| {
701                             io::Error::new(
702                                 io::ErrorKind::Other,
703                                 format!("failed to register ioevent: {:?}", e),
704                             )
705                         })?;
706                 }
707             } else {
708                 let virtio_dev = virtio_pci_dev.virtio_device();
709                 let mut virtio_dev = virtio_dev.lock().unwrap();
710                 if let Some(mut shm_regions) = virtio_dev.get_shm_regions() {
711                     if shm_regions.addr.raw_value() == old_base {
712                         let mem_region = self.vm.make_user_memory_region(
713                             shm_regions.mem_slot,
714                             old_base,
715                             shm_regions.len,
716                             shm_regions.host_addr,
717                             false,
718                             false,
719                         );
720 
721                         self.vm.remove_user_memory_region(mem_region).map_err(|e| {
722                             io::Error::new(
723                                 io::ErrorKind::Other,
724                                 format!("failed to remove user memory region: {:?}", e),
725                             )
726                         })?;
727 
728                         // Create new mapping by inserting new region to KVM.
729                         let mem_region = self.vm.make_user_memory_region(
730                             shm_regions.mem_slot,
731                             new_base,
732                             shm_regions.len,
733                             shm_regions.host_addr,
734                             false,
735                             false,
736                         );
737 
738                         self.vm.create_user_memory_region(mem_region).map_err(|e| {
739                             io::Error::new(
740                                 io::ErrorKind::Other,
741                                 format!("failed to create user memory regions: {:?}", e),
742                             )
743                         })?;
744 
745                         // Update shared memory regions to reflect the new mapping.
746                         shm_regions.addr = GuestAddress(new_base);
747                         virtio_dev.set_shm_regions(shm_regions).map_err(|e| {
748                             io::Error::new(
749                                 io::ErrorKind::Other,
750                                 format!("failed to update shared memory regions: {:?}", e),
751                             )
752                         })?;
753                     }
754                 }
755             }
756         }
757 
758         pci_dev.move_bar(old_base, new_base)
759     }
760 }
761 
762 #[derive(Serialize, Deserialize)]
763 struct DeviceManagerState {
764     device_tree: DeviceTree,
765     device_id_cnt: Wrapping<usize>,
766 }
767 
768 #[derive(Debug)]
769 pub struct PtyPair {
770     pub main: File,
771     pub path: PathBuf,
772 }
773 
774 impl Clone for PtyPair {
775     fn clone(&self) -> Self {
776         PtyPair {
777             main: self.main.try_clone().unwrap(),
778             path: self.path.clone(),
779         }
780     }
781 }
782 
783 #[derive(Clone)]
784 pub enum PciDeviceHandle {
785     Vfio(Arc<Mutex<VfioPciDevice>>),
786     Virtio(Arc<Mutex<VirtioPciDevice>>),
787     VfioUser(Arc<Mutex<VfioUserPciDevice>>),
788 }
789 
790 #[derive(Clone)]
791 struct MetaVirtioDevice {
792     virtio_device: Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
793     iommu: bool,
794     id: String,
795     pci_segment: u16,
796     dma_handler: Option<Arc<dyn ExternalDmaMapping>>,
797 }
798 
799 #[derive(Default)]
800 pub struct AcpiPlatformAddresses {
801     pub pm_timer_address: Option<GenericAddress>,
802     pub reset_reg_address: Option<GenericAddress>,
803     pub sleep_control_reg_address: Option<GenericAddress>,
804     pub sleep_status_reg_address: Option<GenericAddress>,
805 }
806 
807 pub struct DeviceManager {
808     // The underlying hypervisor
809     hypervisor_type: HypervisorType,
810 
811     // Manage address space related to devices
812     address_manager: Arc<AddressManager>,
813 
814     // Console abstraction
815     console: Arc<Console>,
816 
817     // console PTY
818     console_pty: Option<Arc<Mutex<PtyPair>>>,
819 
820     // serial PTY
821     serial_pty: Option<Arc<Mutex<PtyPair>>>,
822 
823     // Serial Manager
824     serial_manager: Option<Arc<SerialManager>>,
825 
826     // pty foreground status,
827     console_resize_pipe: Option<Arc<File>>,
828 
829     // Interrupt controller
830     #[cfg(target_arch = "x86_64")]
831     interrupt_controller: Option<Arc<Mutex<ioapic::Ioapic>>>,
832     #[cfg(target_arch = "aarch64")]
833     interrupt_controller: Option<Arc<Mutex<gic::Gic>>>,
834 
835     // Things to be added to the commandline (e.g. aarch64 early console)
836     #[cfg(target_arch = "aarch64")]
837     cmdline_additions: Vec<String>,
838 
839     // ACPI GED notification device
840     ged_notification_device: Option<Arc<Mutex<devices::AcpiGedDevice>>>,
841 
842     // VM configuration
843     config: Arc<Mutex<VmConfig>>,
844 
845     // Memory Manager
846     memory_manager: Arc<Mutex<MemoryManager>>,
847 
848     // The virtio devices on the system
849     virtio_devices: Vec<MetaVirtioDevice>,
850 
851     // List of bus devices
852     // Let the DeviceManager keep strong references to the BusDevice devices.
853     // This allows the IO and MMIO buses to be provided with Weak references,
854     // which prevents cyclic dependencies.
855     bus_devices: Vec<Arc<Mutex<dyn BusDevice>>>,
856 
857     // Counter to keep track of the consumed device IDs.
858     device_id_cnt: Wrapping<usize>,
859 
860     pci_segments: Vec<PciSegment>,
861 
862     #[cfg_attr(target_arch = "aarch64", allow(dead_code))]
863     // MSI Interrupt Manager
864     msi_interrupt_manager: Arc<dyn InterruptManager<GroupConfig = MsiIrqGroupConfig>>,
865 
866     #[cfg_attr(feature = "mshv", allow(dead_code))]
867     // Legacy Interrupt Manager
868     legacy_interrupt_manager: Option<Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>>,
869 
870     // Passthrough device handle
871     passthrough_device: Option<VfioDeviceFd>,
872 
873     // VFIO container
874     // Only one container can be created, therefore it is stored as part of the
875     // DeviceManager to be reused.
876     vfio_container: Option<Arc<VfioContainer>>,
877 
878     // Paravirtualized IOMMU
879     iommu_device: Option<Arc<Mutex<virtio_devices::Iommu>>>,
880     iommu_mapping: Option<Arc<IommuMapping>>,
881 
882     // PCI information about devices attached to the paravirtualized IOMMU
883     // It contains the virtual IOMMU PCI BDF along with the list of PCI BDF
884     // representing the devices attached to the virtual IOMMU. This is useful
885     // information for filling the ACPI VIOT table.
886     iommu_attached_devices: Option<(PciBdf, Vec<PciBdf>)>,
887 
888     // Tree of devices, representing the dependencies between devices.
889     // Useful for introspection, snapshot and restore.
890     device_tree: Arc<Mutex<DeviceTree>>,
891 
892     // Exit event
893     exit_evt: EventFd,
894     reset_evt: EventFd,
895 
896     #[cfg(target_arch = "aarch64")]
897     id_to_dev_info: HashMap<(DeviceType, String), MmioDeviceInfo>,
898 
899     // seccomp action
900     seccomp_action: SeccompAction,
901 
902     // List of guest NUMA nodes.
903     numa_nodes: NumaNodes,
904 
905     // Possible handle to the virtio-balloon device
906     balloon: Option<Arc<Mutex<virtio_devices::Balloon>>>,
907 
908     // Virtio Device activation EventFd to allow the VMM thread to trigger device
909     // activation and thus start the threads from the VMM thread
910     activate_evt: EventFd,
911 
912     acpi_address: GuestAddress,
913 
914     selected_segment: usize,
915 
916     // Possible handle to the virtio-mem device
917     virtio_mem_devices: Vec<Arc<Mutex<virtio_devices::Mem>>>,
918 
919     #[cfg(target_arch = "aarch64")]
920     // GPIO device for AArch64
921     gpio_device: Option<Arc<Mutex<devices::legacy::Gpio>>>,
922 
923     // Flag to force setting the iommu on virtio devices
924     force_iommu: bool,
925 
926     // Helps identify if the VM is currently being restored
927     restoring: bool,
928 
929     // io_uring availability if detected
930     io_uring_supported: Option<bool>,
931 
932     // List of unique identifiers provided at boot through the configuration.
933     boot_id_list: BTreeSet<String>,
934 
935     // Start time of the VM
936     timestamp: Instant,
937 
938     // Pending activations
939     pending_activations: Arc<Mutex<Vec<VirtioPciDeviceActivator>>>,
940 
941     // Addresses for ACPI platform devices e.g. ACPI PM timer, sleep/reset registers
942     acpi_platform_addresses: AcpiPlatformAddresses,
943 }
944 
945 impl DeviceManager {
946     #[allow(clippy::too_many_arguments)]
947     pub fn new(
948         hypervisor_type: HypervisorType,
949         vm: Arc<dyn hypervisor::Vm>,
950         config: Arc<Mutex<VmConfig>>,
951         memory_manager: Arc<Mutex<MemoryManager>>,
952         exit_evt: &EventFd,
953         reset_evt: &EventFd,
954         seccomp_action: SeccompAction,
955         numa_nodes: NumaNodes,
956         activate_evt: &EventFd,
957         force_iommu: bool,
958         restoring: bool,
959         boot_id_list: BTreeSet<String>,
960         timestamp: Instant,
961     ) -> DeviceManagerResult<Arc<Mutex<Self>>> {
962         trace_scoped!("DeviceManager::new");
963 
964         let device_tree = Arc::new(Mutex::new(DeviceTree::new()));
965 
966         let num_pci_segments =
967             if let Some(platform_config) = config.lock().unwrap().platform.as_ref() {
968                 platform_config.num_pci_segments
969             } else {
970                 1
971             };
972 
973         let start_of_device_area = memory_manager.lock().unwrap().start_of_device_area().0;
974         let end_of_device_area = memory_manager.lock().unwrap().end_of_device_area().0;
975 
976         // Start each PCI segment range on a 4GiB boundary
977         let pci_segment_size = (end_of_device_area - start_of_device_area + 1)
978             / ((4 << 30) * num_pci_segments as u64)
979             * (4 << 30);
980 
981         let mut pci_mmio_allocators = vec![];
982         for i in 0..num_pci_segments as u64 {
983             let mmio_start = start_of_device_area + i * pci_segment_size;
984             let allocator = Arc::new(Mutex::new(
985                 AddressAllocator::new(GuestAddress(mmio_start), pci_segment_size).unwrap(),
986             ));
987             pci_mmio_allocators.push(allocator)
988         }
989 
990         let address_manager = Arc::new(AddressManager {
991             allocator: memory_manager.lock().unwrap().allocator(),
992             #[cfg(target_arch = "x86_64")]
993             io_bus: Arc::new(Bus::new()),
994             mmio_bus: Arc::new(Bus::new()),
995             vm: vm.clone(),
996             device_tree: Arc::clone(&device_tree),
997             pci_mmio_allocators,
998         });
999 
1000         // First we create the MSI interrupt manager, the legacy one is created
1001         // later, after the IOAPIC device creation.
1002         // The reason we create the MSI one first is because the IOAPIC needs it,
1003         // and then the legacy interrupt manager needs an IOAPIC. So we're
1004         // handling a linear dependency chain:
1005         // msi_interrupt_manager <- IOAPIC <- legacy_interrupt_manager.
1006         let msi_interrupt_manager: Arc<dyn InterruptManager<GroupConfig = MsiIrqGroupConfig>> =
1007             Arc::new(MsiInterruptManager::new(
1008                 Arc::clone(&address_manager.allocator),
1009                 vm,
1010             ));
1011 
1012         let acpi_address = address_manager
1013             .allocator
1014             .lock()
1015             .unwrap()
1016             .allocate_platform_mmio_addresses(None, DEVICE_MANAGER_ACPI_SIZE as u64, None)
1017             .ok_or(DeviceManagerError::AllocateIoPort)?;
1018 
1019         let mut pci_irq_slots = [0; 32];
1020         PciSegment::reserve_legacy_interrupts_for_pci_devices(
1021             &address_manager,
1022             &mut pci_irq_slots,
1023         )?;
1024 
1025         let mut pci_segments = vec![PciSegment::new_default_segment(
1026             &address_manager,
1027             Arc::clone(&address_manager.pci_mmio_allocators[0]),
1028             &pci_irq_slots,
1029         )?];
1030 
1031         for i in 1..num_pci_segments as usize {
1032             pci_segments.push(PciSegment::new(
1033                 i as u16,
1034                 &address_manager,
1035                 Arc::clone(&address_manager.pci_mmio_allocators[i]),
1036                 &pci_irq_slots,
1037             )?);
1038         }
1039 
1040         let device_manager = DeviceManager {
1041             hypervisor_type,
1042             address_manager: Arc::clone(&address_manager),
1043             console: Arc::new(Console::default()),
1044             interrupt_controller: None,
1045             #[cfg(target_arch = "aarch64")]
1046             cmdline_additions: Vec::new(),
1047             ged_notification_device: None,
1048             config,
1049             memory_manager,
1050             virtio_devices: Vec::new(),
1051             bus_devices: Vec::new(),
1052             device_id_cnt: Wrapping(0),
1053             msi_interrupt_manager,
1054             legacy_interrupt_manager: None,
1055             passthrough_device: None,
1056             vfio_container: None,
1057             iommu_device: None,
1058             iommu_mapping: None,
1059             iommu_attached_devices: None,
1060             pci_segments,
1061             device_tree,
1062             exit_evt: exit_evt.try_clone().map_err(DeviceManagerError::EventFd)?,
1063             reset_evt: reset_evt.try_clone().map_err(DeviceManagerError::EventFd)?,
1064             #[cfg(target_arch = "aarch64")]
1065             id_to_dev_info: HashMap::new(),
1066             seccomp_action,
1067             numa_nodes,
1068             balloon: None,
1069             activate_evt: activate_evt
1070                 .try_clone()
1071                 .map_err(DeviceManagerError::EventFd)?,
1072             acpi_address,
1073             selected_segment: 0,
1074             serial_pty: None,
1075             serial_manager: None,
1076             console_pty: None,
1077             console_resize_pipe: None,
1078             virtio_mem_devices: Vec::new(),
1079             #[cfg(target_arch = "aarch64")]
1080             gpio_device: None,
1081             force_iommu,
1082             restoring,
1083             io_uring_supported: None,
1084             boot_id_list,
1085             timestamp,
1086             pending_activations: Arc::new(Mutex::new(Vec::default())),
1087             acpi_platform_addresses: AcpiPlatformAddresses::default(),
1088         };
1089 
1090         let device_manager = Arc::new(Mutex::new(device_manager));
1091 
1092         address_manager
1093             .mmio_bus
1094             .insert(
1095                 Arc::clone(&device_manager) as Arc<Mutex<dyn BusDevice>>,
1096                 acpi_address.0,
1097                 DEVICE_MANAGER_ACPI_SIZE as u64,
1098             )
1099             .map_err(DeviceManagerError::BusError)?;
1100 
1101         Ok(device_manager)
1102     }
1103 
1104     pub fn serial_pty(&self) -> Option<PtyPair> {
1105         self.serial_pty
1106             .as_ref()
1107             .map(|pty| pty.lock().unwrap().clone())
1108     }
1109 
1110     pub fn console_pty(&self) -> Option<PtyPair> {
1111         self.console_pty
1112             .as_ref()
1113             .map(|pty| pty.lock().unwrap().clone())
1114     }
1115 
1116     pub fn console_resize_pipe(&self) -> Option<Arc<File>> {
1117         self.console_resize_pipe.as_ref().map(Arc::clone)
1118     }
1119 
1120     pub fn create_devices(
1121         &mut self,
1122         serial_pty: Option<PtyPair>,
1123         console_pty: Option<PtyPair>,
1124         console_resize_pipe: Option<File>,
1125     ) -> DeviceManagerResult<()> {
1126         trace_scoped!("create_devices");
1127 
1128         let mut virtio_devices: Vec<MetaVirtioDevice> = Vec::new();
1129 
1130         let interrupt_controller = self.add_interrupt_controller()?;
1131 
1132         // Now we can create the legacy interrupt manager, which needs the freshly
1133         // formed IOAPIC device.
1134         let legacy_interrupt_manager: Arc<
1135             dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>,
1136         > = Arc::new(LegacyUserspaceInterruptManager::new(Arc::clone(
1137             &interrupt_controller,
1138         )));
1139 
1140         {
1141             if let Some(acpi_address) = self.memory_manager.lock().unwrap().acpi_address() {
1142                 self.address_manager
1143                     .mmio_bus
1144                     .insert(
1145                         Arc::clone(&self.memory_manager) as Arc<Mutex<dyn BusDevice>>,
1146                         acpi_address.0,
1147                         MEMORY_MANAGER_ACPI_SIZE as u64,
1148                     )
1149                     .map_err(DeviceManagerError::BusError)?;
1150             }
1151         }
1152 
1153         #[cfg(target_arch = "x86_64")]
1154         self.add_legacy_devices(
1155             self.reset_evt
1156                 .try_clone()
1157                 .map_err(DeviceManagerError::EventFd)?,
1158         )?;
1159 
1160         #[cfg(target_arch = "aarch64")]
1161         self.add_legacy_devices(&legacy_interrupt_manager)?;
1162 
1163         {
1164             self.ged_notification_device = self.add_acpi_devices(
1165                 &legacy_interrupt_manager,
1166                 self.reset_evt
1167                     .try_clone()
1168                     .map_err(DeviceManagerError::EventFd)?,
1169                 self.exit_evt
1170                     .try_clone()
1171                     .map_err(DeviceManagerError::EventFd)?,
1172             )?;
1173         }
1174 
1175         self.console = self.add_console_device(
1176             &legacy_interrupt_manager,
1177             &mut virtio_devices,
1178             serial_pty,
1179             console_pty,
1180             console_resize_pipe,
1181         )?;
1182 
1183         self.legacy_interrupt_manager = Some(legacy_interrupt_manager);
1184 
1185         virtio_devices.append(&mut self.make_virtio_devices()?);
1186 
1187         self.add_pci_devices(virtio_devices.clone())?;
1188 
1189         self.virtio_devices = virtio_devices;
1190 
1191         Ok(())
1192     }
1193 
1194     fn state(&self) -> DeviceManagerState {
1195         DeviceManagerState {
1196             device_tree: self.device_tree.lock().unwrap().clone(),
1197             device_id_cnt: self.device_id_cnt,
1198         }
1199     }
1200 
1201     fn set_state(&mut self, state: &DeviceManagerState) {
1202         *self.device_tree.lock().unwrap() = state.device_tree.clone();
1203         self.device_id_cnt = state.device_id_cnt;
1204     }
1205 
1206     fn get_msi_iova_space(&mut self) -> (u64, u64) {
1207         #[cfg(target_arch = "aarch64")]
1208         {
1209             let vcpus = self.config.lock().unwrap().cpus.boot_vcpus;
1210             let vgic_config = gic::Gic::create_default_config(vcpus.into());
1211             (
1212                 vgic_config.msi_addr,
1213                 vgic_config.msi_addr + vgic_config.msi_size - 1,
1214             )
1215         }
1216         #[cfg(target_arch = "x86_64")]
1217         (0xfee0_0000, 0xfeef_ffff)
1218     }
1219 
1220     #[cfg(target_arch = "aarch64")]
1221     /// Gets the information of the devices registered up to some point in time.
1222     pub fn get_device_info(&self) -> &HashMap<(DeviceType, String), MmioDeviceInfo> {
1223         &self.id_to_dev_info
1224     }
1225 
1226     #[allow(unused_variables)]
1227     fn add_pci_devices(
1228         &mut self,
1229         virtio_devices: Vec<MetaVirtioDevice>,
1230     ) -> DeviceManagerResult<()> {
1231         let iommu_id = String::from(IOMMU_DEVICE_NAME);
1232 
1233         let iommu_device = if self.config.lock().unwrap().iommu {
1234             let (device, mapping) = virtio_devices::Iommu::new(
1235                 iommu_id.clone(),
1236                 self.seccomp_action.clone(),
1237                 self.exit_evt
1238                     .try_clone()
1239                     .map_err(DeviceManagerError::EventFd)?,
1240                 self.get_msi_iova_space(),
1241             )
1242             .map_err(DeviceManagerError::CreateVirtioIommu)?;
1243             let device = Arc::new(Mutex::new(device));
1244             self.iommu_device = Some(Arc::clone(&device));
1245             self.iommu_mapping = Some(mapping);
1246 
1247             // Fill the device tree with a new node. In case of restore, we
1248             // know there is nothing to do, so we can simply override the
1249             // existing entry.
1250             self.device_tree
1251                 .lock()
1252                 .unwrap()
1253                 .insert(iommu_id.clone(), device_node!(iommu_id, device));
1254 
1255             Some(device)
1256         } else {
1257             None
1258         };
1259 
1260         let mut iommu_attached_devices = Vec::new();
1261         {
1262             for handle in virtio_devices {
1263                 let mapping: Option<Arc<IommuMapping>> = if handle.iommu {
1264                     self.iommu_mapping.clone()
1265                 } else {
1266                     None
1267                 };
1268 
1269                 let dev_id = self.add_virtio_pci_device(
1270                     handle.virtio_device,
1271                     &mapping,
1272                     handle.id,
1273                     handle.pci_segment,
1274                     handle.dma_handler,
1275                 )?;
1276 
1277                 if handle.iommu {
1278                     iommu_attached_devices.push(dev_id);
1279                 }
1280             }
1281 
1282             let mut vfio_iommu_device_ids = self.add_vfio_devices()?;
1283             iommu_attached_devices.append(&mut vfio_iommu_device_ids);
1284 
1285             let mut vfio_user_iommu_device_ids = self.add_user_devices()?;
1286             iommu_attached_devices.append(&mut vfio_user_iommu_device_ids);
1287 
1288             // Add all devices from forced iommu segments
1289             if let Some(platform_config) = self.config.lock().unwrap().platform.as_ref() {
1290                 if let Some(iommu_segments) = platform_config.iommu_segments.as_ref() {
1291                     for segment in iommu_segments {
1292                         for device in 0..32 {
1293                             let bdf = PciBdf::new(*segment, 0, device, 0);
1294                             if !iommu_attached_devices.contains(&bdf) {
1295                                 iommu_attached_devices.push(bdf);
1296                             }
1297                         }
1298                     }
1299                 }
1300             }
1301 
1302             if let Some(iommu_device) = iommu_device {
1303                 let dev_id = self.add_virtio_pci_device(iommu_device, &None, iommu_id, 0, None)?;
1304                 self.iommu_attached_devices = Some((dev_id, iommu_attached_devices));
1305             }
1306         }
1307 
1308         for segment in &self.pci_segments {
1309             #[cfg(target_arch = "x86_64")]
1310             if let Some(pci_config_io) = segment.pci_config_io.as_ref() {
1311                 self.bus_devices
1312                     .push(Arc::clone(pci_config_io) as Arc<Mutex<dyn BusDevice>>);
1313             }
1314 
1315             self.bus_devices
1316                 .push(Arc::clone(&segment.pci_config_mmio) as Arc<Mutex<dyn BusDevice>>);
1317         }
1318 
1319         Ok(())
1320     }
1321 
1322     #[cfg(target_arch = "aarch64")]
1323     fn add_interrupt_controller(
1324         &mut self,
1325     ) -> DeviceManagerResult<Arc<Mutex<dyn InterruptController>>> {
1326         let interrupt_controller: Arc<Mutex<gic::Gic>> = Arc::new(Mutex::new(
1327             gic::Gic::new(
1328                 self.config.lock().unwrap().cpus.boot_vcpus,
1329                 Arc::clone(&self.msi_interrupt_manager),
1330             )
1331             .map_err(DeviceManagerError::CreateInterruptController)?,
1332         ));
1333 
1334         self.interrupt_controller = Some(interrupt_controller.clone());
1335 
1336         // Unlike x86_64, the "interrupt_controller" here for AArch64 is only
1337         // a `Gic` object that implements the `InterruptController` to provide
1338         // interrupt delivery service. This is not the real GIC device so that
1339         // we do not need to insert it to the device tree.
1340 
1341         Ok(interrupt_controller)
1342     }
1343 
1344     #[cfg(target_arch = "aarch64")]
1345     pub fn get_interrupt_controller(&mut self) -> Option<&Arc<Mutex<gic::Gic>>> {
1346         self.interrupt_controller.as_ref()
1347     }
1348 
1349     #[cfg(target_arch = "x86_64")]
1350     fn add_interrupt_controller(
1351         &mut self,
1352     ) -> DeviceManagerResult<Arc<Mutex<dyn InterruptController>>> {
1353         let id = String::from(IOAPIC_DEVICE_NAME);
1354 
1355         // Create IOAPIC
1356         let interrupt_controller = Arc::new(Mutex::new(
1357             ioapic::Ioapic::new(
1358                 id.clone(),
1359                 APIC_START,
1360                 Arc::clone(&self.msi_interrupt_manager),
1361             )
1362             .map_err(DeviceManagerError::CreateInterruptController)?,
1363         ));
1364 
1365         self.interrupt_controller = Some(interrupt_controller.clone());
1366 
1367         self.address_manager
1368             .mmio_bus
1369             .insert(interrupt_controller.clone(), IOAPIC_START.0, IOAPIC_SIZE)
1370             .map_err(DeviceManagerError::BusError)?;
1371 
1372         self.bus_devices
1373             .push(Arc::clone(&interrupt_controller) as Arc<Mutex<dyn BusDevice>>);
1374 
1375         // Fill the device tree with a new node. In case of restore, we
1376         // know there is nothing to do, so we can simply override the
1377         // existing entry.
1378         self.device_tree
1379             .lock()
1380             .unwrap()
1381             .insert(id.clone(), device_node!(id, interrupt_controller));
1382 
1383         Ok(interrupt_controller)
1384     }
1385 
1386     fn add_acpi_devices(
1387         &mut self,
1388         interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>,
1389         reset_evt: EventFd,
1390         exit_evt: EventFd,
1391     ) -> DeviceManagerResult<Option<Arc<Mutex<devices::AcpiGedDevice>>>> {
1392         let shutdown_device = Arc::new(Mutex::new(devices::AcpiShutdownDevice::new(
1393             exit_evt, reset_evt,
1394         )));
1395 
1396         self.bus_devices
1397             .push(Arc::clone(&shutdown_device) as Arc<Mutex<dyn BusDevice>>);
1398 
1399         #[cfg(target_arch = "x86_64")]
1400         {
1401             let shutdown_pio_address: u16 = 0x600;
1402 
1403             // TODO: Remove the entry for 0x3c0 once all firmwares will have been
1404             // updated with the new value.
1405             self.address_manager
1406                 .allocator
1407                 .lock()
1408                 .unwrap()
1409                 .allocate_io_addresses(Some(GuestAddress(0x3c0)), 0x8, None)
1410                 .ok_or(DeviceManagerError::AllocateIoPort)?;
1411 
1412             self.address_manager
1413                 .allocator
1414                 .lock()
1415                 .unwrap()
1416                 .allocate_io_addresses(Some(GuestAddress(shutdown_pio_address.into())), 0x8, None)
1417                 .ok_or(DeviceManagerError::AllocateIoPort)?;
1418 
1419             // TODO: Remove the entry for 0x3c0 once all firmwares will have been
1420             // updated with the new value.
1421             self.address_manager
1422                 .io_bus
1423                 .insert(shutdown_device.clone(), 0x3c0, 0x4)
1424                 .map_err(DeviceManagerError::BusError)?;
1425 
1426             self.address_manager
1427                 .io_bus
1428                 .insert(shutdown_device, shutdown_pio_address.into(), 0x4)
1429                 .map_err(DeviceManagerError::BusError)?;
1430 
1431             self.acpi_platform_addresses.sleep_control_reg_address =
1432                 Some(GenericAddress::io_port_address::<u8>(shutdown_pio_address));
1433             self.acpi_platform_addresses.sleep_status_reg_address =
1434                 Some(GenericAddress::io_port_address::<u8>(shutdown_pio_address));
1435             self.acpi_platform_addresses.reset_reg_address =
1436                 Some(GenericAddress::io_port_address::<u8>(shutdown_pio_address));
1437         }
1438 
1439         let ged_irq = self
1440             .address_manager
1441             .allocator
1442             .lock()
1443             .unwrap()
1444             .allocate_irq()
1445             .unwrap();
1446         let interrupt_group = interrupt_manager
1447             .create_group(LegacyIrqGroupConfig {
1448                 irq: ged_irq as InterruptIndex,
1449             })
1450             .map_err(DeviceManagerError::CreateInterruptGroup)?;
1451         let ged_address = self
1452             .address_manager
1453             .allocator
1454             .lock()
1455             .unwrap()
1456             .allocate_platform_mmio_addresses(
1457                 None,
1458                 devices::acpi::GED_DEVICE_ACPI_SIZE as u64,
1459                 None,
1460             )
1461             .ok_or(DeviceManagerError::AllocateMmioAddress)?;
1462         let ged_device = Arc::new(Mutex::new(devices::AcpiGedDevice::new(
1463             interrupt_group,
1464             ged_irq,
1465             ged_address,
1466         )));
1467         self.address_manager
1468             .mmio_bus
1469             .insert(
1470                 ged_device.clone(),
1471                 ged_address.0,
1472                 devices::acpi::GED_DEVICE_ACPI_SIZE as u64,
1473             )
1474             .map_err(DeviceManagerError::BusError)?;
1475         self.bus_devices
1476             .push(Arc::clone(&ged_device) as Arc<Mutex<dyn BusDevice>>);
1477 
1478         let pm_timer_device = Arc::new(Mutex::new(devices::AcpiPmTimerDevice::new()));
1479 
1480         self.bus_devices
1481             .push(Arc::clone(&pm_timer_device) as Arc<Mutex<dyn BusDevice>>);
1482 
1483         #[cfg(target_arch = "x86_64")]
1484         {
1485             let pm_timer_pio_address: u16 = 0x608;
1486 
1487             // TODO: Remove the entry for 0xb008 once all firmwares will have been
1488             // updated with the new value.
1489             self.address_manager
1490                 .allocator
1491                 .lock()
1492                 .unwrap()
1493                 .allocate_io_addresses(Some(GuestAddress(0xb008)), 0x4, None)
1494                 .ok_or(DeviceManagerError::AllocateIoPort)?;
1495 
1496             self.address_manager
1497                 .allocator
1498                 .lock()
1499                 .unwrap()
1500                 .allocate_io_addresses(Some(GuestAddress(pm_timer_pio_address.into())), 0x4, None)
1501                 .ok_or(DeviceManagerError::AllocateIoPort)?;
1502 
1503             // TODO: Remove the entry for 0xb008 once all firmwares will have been
1504             // updated with the new value.
1505             self.address_manager
1506                 .io_bus
1507                 .insert(pm_timer_device.clone(), 0xb008, 0x4)
1508                 .map_err(DeviceManagerError::BusError)?;
1509 
1510             self.address_manager
1511                 .io_bus
1512                 .insert(pm_timer_device, pm_timer_pio_address.into(), 0x4)
1513                 .map_err(DeviceManagerError::BusError)?;
1514 
1515             self.acpi_platform_addresses.pm_timer_address =
1516                 Some(GenericAddress::io_port_address::<u32>(pm_timer_pio_address));
1517         }
1518 
1519         Ok(Some(ged_device))
1520     }
1521 
1522     #[cfg(target_arch = "x86_64")]
1523     fn add_legacy_devices(&mut self, reset_evt: EventFd) -> DeviceManagerResult<()> {
1524         // Add a shutdown device (i8042)
1525         let i8042 = Arc::new(Mutex::new(devices::legacy::I8042Device::new(
1526             reset_evt.try_clone().unwrap(),
1527         )));
1528 
1529         self.bus_devices
1530             .push(Arc::clone(&i8042) as Arc<Mutex<dyn BusDevice>>);
1531 
1532         self.address_manager
1533             .io_bus
1534             .insert(i8042, 0x61, 0x4)
1535             .map_err(DeviceManagerError::BusError)?;
1536         {
1537             // Add a CMOS emulated device
1538             let mem_size = self
1539                 .memory_manager
1540                 .lock()
1541                 .unwrap()
1542                 .guest_memory()
1543                 .memory()
1544                 .last_addr()
1545                 .0
1546                 + 1;
1547             let mem_below_4g = std::cmp::min(arch::layout::MEM_32BIT_RESERVED_START.0, mem_size);
1548             let mem_above_4g = mem_size.saturating_sub(arch::layout::RAM_64BIT_START.0);
1549 
1550             let cmos = Arc::new(Mutex::new(devices::legacy::Cmos::new(
1551                 mem_below_4g,
1552                 mem_above_4g,
1553                 reset_evt,
1554             )));
1555 
1556             self.bus_devices
1557                 .push(Arc::clone(&cmos) as Arc<Mutex<dyn BusDevice>>);
1558 
1559             self.address_manager
1560                 .io_bus
1561                 .insert(cmos, 0x70, 0x2)
1562                 .map_err(DeviceManagerError::BusError)?;
1563 
1564             let fwdebug = Arc::new(Mutex::new(devices::legacy::FwDebugDevice::new()));
1565 
1566             self.bus_devices
1567                 .push(Arc::clone(&fwdebug) as Arc<Mutex<dyn BusDevice>>);
1568 
1569             self.address_manager
1570                 .io_bus
1571                 .insert(fwdebug, 0x402, 0x1)
1572                 .map_err(DeviceManagerError::BusError)?;
1573         }
1574 
1575         // 0x80 debug port
1576         let debug_port = Arc::new(Mutex::new(devices::legacy::DebugPort::new(self.timestamp)));
1577         self.bus_devices
1578             .push(Arc::clone(&debug_port) as Arc<Mutex<dyn BusDevice>>);
1579         self.address_manager
1580             .io_bus
1581             .insert(debug_port, 0x80, 0x1)
1582             .map_err(DeviceManagerError::BusError)?;
1583 
1584         Ok(())
1585     }
1586 
1587     #[cfg(target_arch = "aarch64")]
1588     fn add_legacy_devices(
1589         &mut self,
1590         interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>,
1591     ) -> DeviceManagerResult<()> {
1592         // Add a RTC device
1593         let rtc_irq = self
1594             .address_manager
1595             .allocator
1596             .lock()
1597             .unwrap()
1598             .allocate_irq()
1599             .unwrap();
1600 
1601         let interrupt_group = interrupt_manager
1602             .create_group(LegacyIrqGroupConfig {
1603                 irq: rtc_irq as InterruptIndex,
1604             })
1605             .map_err(DeviceManagerError::CreateInterruptGroup)?;
1606 
1607         let rtc_device = Arc::new(Mutex::new(devices::legacy::Rtc::new(interrupt_group)));
1608 
1609         self.bus_devices
1610             .push(Arc::clone(&rtc_device) as Arc<Mutex<dyn BusDevice>>);
1611 
1612         let addr = arch::layout::LEGACY_RTC_MAPPED_IO_START;
1613 
1614         self.address_manager
1615             .mmio_bus
1616             .insert(rtc_device, addr.0, MMIO_LEN)
1617             .map_err(DeviceManagerError::BusError)?;
1618 
1619         self.id_to_dev_info.insert(
1620             (DeviceType::Rtc, "rtc".to_string()),
1621             MmioDeviceInfo {
1622                 addr: addr.0,
1623                 len: MMIO_LEN,
1624                 irq: rtc_irq,
1625             },
1626         );
1627 
1628         // Add a GPIO device
1629         let id = String::from(GPIO_DEVICE_NAME);
1630         let gpio_irq = self
1631             .address_manager
1632             .allocator
1633             .lock()
1634             .unwrap()
1635             .allocate_irq()
1636             .unwrap();
1637 
1638         let interrupt_group = interrupt_manager
1639             .create_group(LegacyIrqGroupConfig {
1640                 irq: gpio_irq as InterruptIndex,
1641             })
1642             .map_err(DeviceManagerError::CreateInterruptGroup)?;
1643 
1644         let gpio_device = Arc::new(Mutex::new(devices::legacy::Gpio::new(
1645             id.clone(),
1646             interrupt_group,
1647         )));
1648 
1649         self.bus_devices
1650             .push(Arc::clone(&gpio_device) as Arc<Mutex<dyn BusDevice>>);
1651 
1652         let addr = arch::layout::LEGACY_GPIO_MAPPED_IO_START;
1653 
1654         self.address_manager
1655             .mmio_bus
1656             .insert(gpio_device.clone(), addr.0, MMIO_LEN)
1657             .map_err(DeviceManagerError::BusError)?;
1658 
1659         self.gpio_device = Some(gpio_device.clone());
1660 
1661         self.id_to_dev_info.insert(
1662             (DeviceType::Gpio, "gpio".to_string()),
1663             MmioDeviceInfo {
1664                 addr: addr.0,
1665                 len: MMIO_LEN,
1666                 irq: gpio_irq,
1667             },
1668         );
1669 
1670         self.device_tree
1671             .lock()
1672             .unwrap()
1673             .insert(id.clone(), device_node!(id, gpio_device));
1674 
1675         Ok(())
1676     }
1677 
1678     #[cfg(target_arch = "x86_64")]
1679     fn add_serial_device(
1680         &mut self,
1681         interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>,
1682         serial_writer: Option<Box<dyn io::Write + Send>>,
1683     ) -> DeviceManagerResult<Arc<Mutex<Serial>>> {
1684         // Serial is tied to IRQ #4
1685         let serial_irq = 4;
1686 
1687         let id = String::from(SERIAL_DEVICE_NAME);
1688 
1689         let interrupt_group = interrupt_manager
1690             .create_group(LegacyIrqGroupConfig {
1691                 irq: serial_irq as InterruptIndex,
1692             })
1693             .map_err(DeviceManagerError::CreateInterruptGroup)?;
1694 
1695         let serial = Arc::new(Mutex::new(Serial::new(
1696             id.clone(),
1697             interrupt_group,
1698             serial_writer,
1699         )));
1700 
1701         self.bus_devices
1702             .push(Arc::clone(&serial) as Arc<Mutex<dyn BusDevice>>);
1703 
1704         self.address_manager
1705             .allocator
1706             .lock()
1707             .unwrap()
1708             .allocate_io_addresses(Some(GuestAddress(0x3f8)), 0x8, None)
1709             .ok_or(DeviceManagerError::AllocateIoPort)?;
1710 
1711         self.address_manager
1712             .io_bus
1713             .insert(serial.clone(), 0x3f8, 0x8)
1714             .map_err(DeviceManagerError::BusError)?;
1715 
1716         // Fill the device tree with a new node. In case of restore, we
1717         // know there is nothing to do, so we can simply override the
1718         // existing entry.
1719         self.device_tree
1720             .lock()
1721             .unwrap()
1722             .insert(id.clone(), device_node!(id, serial));
1723 
1724         Ok(serial)
1725     }
1726 
1727     #[cfg(target_arch = "aarch64")]
1728     fn add_serial_device(
1729         &mut self,
1730         interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>,
1731         serial_writer: Option<Box<dyn io::Write + Send>>,
1732     ) -> DeviceManagerResult<Arc<Mutex<Pl011>>> {
1733         let id = String::from(SERIAL_DEVICE_NAME);
1734 
1735         let serial_irq = self
1736             .address_manager
1737             .allocator
1738             .lock()
1739             .unwrap()
1740             .allocate_irq()
1741             .unwrap();
1742 
1743         let interrupt_group = interrupt_manager
1744             .create_group(LegacyIrqGroupConfig {
1745                 irq: serial_irq as InterruptIndex,
1746             })
1747             .map_err(DeviceManagerError::CreateInterruptGroup)?;
1748 
1749         let serial = Arc::new(Mutex::new(devices::legacy::Pl011::new(
1750             id.clone(),
1751             interrupt_group,
1752             serial_writer,
1753             self.timestamp,
1754         )));
1755 
1756         self.bus_devices
1757             .push(Arc::clone(&serial) as Arc<Mutex<dyn BusDevice>>);
1758 
1759         let addr = arch::layout::LEGACY_SERIAL_MAPPED_IO_START;
1760 
1761         self.address_manager
1762             .mmio_bus
1763             .insert(serial.clone(), addr.0, MMIO_LEN)
1764             .map_err(DeviceManagerError::BusError)?;
1765 
1766         self.id_to_dev_info.insert(
1767             (DeviceType::Serial, DeviceType::Serial.to_string()),
1768             MmioDeviceInfo {
1769                 addr: addr.0,
1770                 len: MMIO_LEN,
1771                 irq: serial_irq,
1772             },
1773         );
1774 
1775         self.cmdline_additions
1776             .push(format!("earlycon=pl011,mmio,0x{:08x}", addr.0));
1777 
1778         // Fill the device tree with a new node. In case of restore, we
1779         // know there is nothing to do, so we can simply override the
1780         // existing entry.
1781         self.device_tree
1782             .lock()
1783             .unwrap()
1784             .insert(id.clone(), device_node!(id, serial));
1785 
1786         Ok(serial)
1787     }
1788 
1789     fn modify_mode<F: FnOnce(&mut termios)>(
1790         &self,
1791         fd: RawFd,
1792         f: F,
1793     ) -> vmm_sys_util::errno::Result<()> {
1794         // SAFETY: safe because we check the return value of isatty.
1795         if unsafe { isatty(fd) } != 1 {
1796             return Ok(());
1797         }
1798 
1799         // SAFETY: The following pair are safe because termios gets totally overwritten by tcgetattr
1800         // and we check the return result.
1801         let mut termios: termios = unsafe { zeroed() };
1802         let ret = unsafe { tcgetattr(fd, &mut termios as *mut _) };
1803         if ret < 0 {
1804             return vmm_sys_util::errno::errno_result();
1805         }
1806         f(&mut termios);
1807         // SAFETY: Safe because the syscall will only read the extent of termios and we check
1808         // the return result.
1809         let ret = unsafe { tcsetattr(fd, TCSANOW, &termios as *const _) };
1810         if ret < 0 {
1811             return vmm_sys_util::errno::errno_result();
1812         }
1813 
1814         Ok(())
1815     }
1816 
1817     fn set_raw_mode(&self, f: &mut File) -> vmm_sys_util::errno::Result<()> {
1818         // SAFETY: FFI call. Variable t is guaranteed to be a valid termios from modify_mode.
1819         self.modify_mode(f.as_raw_fd(), |t| unsafe { cfmakeraw(t) })
1820     }
1821 
1822     fn listen_for_sigwinch_on_tty(&mut self, pty_main: File, pty_sub: File) -> std::io::Result<()> {
1823         let seccomp_filter = get_seccomp_filter(
1824             &self.seccomp_action,
1825             Thread::PtyForeground,
1826             self.hypervisor_type,
1827         )
1828         .unwrap();
1829 
1830         match start_sigwinch_listener(seccomp_filter, pty_main, pty_sub) {
1831             Ok(pipe) => {
1832                 self.console_resize_pipe = Some(Arc::new(pipe));
1833             }
1834             Err(e) => {
1835                 warn!("Ignoring error from setting up SIGWINCH listener: {}", e)
1836             }
1837         }
1838 
1839         Ok(())
1840     }
1841 
1842     fn add_virtio_console_device(
1843         &mut self,
1844         virtio_devices: &mut Vec<MetaVirtioDevice>,
1845         console_pty: Option<PtyPair>,
1846         resize_pipe: Option<File>,
1847     ) -> DeviceManagerResult<Option<Arc<virtio_devices::ConsoleResizer>>> {
1848         let console_config = self.config.lock().unwrap().console.clone();
1849         let endpoint = match console_config.mode {
1850             ConsoleOutputMode::File => {
1851                 let file = File::create(console_config.file.as_ref().unwrap())
1852                     .map_err(DeviceManagerError::ConsoleOutputFileOpen)?;
1853                 Endpoint::File(file)
1854             }
1855             ConsoleOutputMode::Pty => {
1856                 if let Some(pty) = console_pty {
1857                     self.config.lock().unwrap().console.file = Some(pty.path.clone());
1858                     let file = pty.main.try_clone().unwrap();
1859                     self.console_pty = Some(Arc::new(Mutex::new(pty)));
1860                     self.console_resize_pipe = resize_pipe.map(Arc::new);
1861                     Endpoint::PtyPair(file.try_clone().unwrap(), file)
1862                 } else {
1863                     let (main, mut sub, path) =
1864                         create_pty().map_err(DeviceManagerError::ConsolePtyOpen)?;
1865                     self.set_raw_mode(&mut sub)
1866                         .map_err(DeviceManagerError::SetPtyRaw)?;
1867                     self.config.lock().unwrap().console.file = Some(path.clone());
1868                     let file = main.try_clone().unwrap();
1869                     assert!(resize_pipe.is_none());
1870                     self.listen_for_sigwinch_on_tty(main.try_clone().unwrap(), sub)
1871                         .unwrap();
1872                     self.console_pty = Some(Arc::new(Mutex::new(PtyPair { main, path })));
1873                     Endpoint::PtyPair(file.try_clone().unwrap(), file)
1874                 }
1875             }
1876             ConsoleOutputMode::Tty => {
1877                 // Duplicating the file descriptors like this is needed as otherwise
1878                 // they will be closed on a reboot and the numbers reused
1879 
1880                 // SAFETY: FFI call to dup. Trivially safe.
1881                 let stdout = unsafe { libc::dup(libc::STDOUT_FILENO) };
1882                 if stdout == -1 {
1883                     return vmm_sys_util::errno::errno_result().map_err(DeviceManagerError::DupFd);
1884                 }
1885                 // SAFETY: stdout is valid and owned solely by us.
1886                 let stdout = unsafe { File::from_raw_fd(stdout) };
1887 
1888                 // If an interactive TTY then we can accept input
1889                 // SAFETY: FFI call. Trivially safe.
1890                 if unsafe { libc::isatty(libc::STDIN_FILENO) == 1 } {
1891                     // SAFETY: FFI call to dup. Trivially safe.
1892                     let stdin = unsafe { libc::dup(libc::STDIN_FILENO) };
1893                     if stdin == -1 {
1894                         return vmm_sys_util::errno::errno_result()
1895                             .map_err(DeviceManagerError::DupFd);
1896                     }
1897                     // SAFETY: stdin is valid and owned solely by us.
1898                     let stdin = unsafe { File::from_raw_fd(stdin) };
1899 
1900                     Endpoint::FilePair(stdout, stdin)
1901                 } else {
1902                     Endpoint::File(stdout)
1903                 }
1904             }
1905             ConsoleOutputMode::Null => Endpoint::Null,
1906             ConsoleOutputMode::Off => return Ok(None),
1907         };
1908         let id = String::from(CONSOLE_DEVICE_NAME);
1909 
1910         let (virtio_console_device, console_resizer) = virtio_devices::Console::new(
1911             id.clone(),
1912             endpoint,
1913             self.console_resize_pipe
1914                 .as_ref()
1915                 .map(|p| p.try_clone().unwrap()),
1916             self.force_iommu | console_config.iommu,
1917             self.seccomp_action.clone(),
1918             self.exit_evt
1919                 .try_clone()
1920                 .map_err(DeviceManagerError::EventFd)?,
1921         )
1922         .map_err(DeviceManagerError::CreateVirtioConsole)?;
1923         let virtio_console_device = Arc::new(Mutex::new(virtio_console_device));
1924         virtio_devices.push(MetaVirtioDevice {
1925             virtio_device: Arc::clone(&virtio_console_device)
1926                 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
1927             iommu: console_config.iommu,
1928             id: id.clone(),
1929             pci_segment: 0,
1930             dma_handler: None,
1931         });
1932 
1933         // Fill the device tree with a new node. In case of restore, we
1934         // know there is nothing to do, so we can simply override the
1935         // existing entry.
1936         self.device_tree
1937             .lock()
1938             .unwrap()
1939             .insert(id.clone(), device_node!(id, virtio_console_device));
1940 
1941         // Only provide a resizer (for SIGWINCH handling) if the console is attached to the TTY
1942         Ok(if matches!(console_config.mode, ConsoleOutputMode::Tty) {
1943             Some(console_resizer)
1944         } else {
1945             None
1946         })
1947     }
1948 
1949     fn add_console_device(
1950         &mut self,
1951         interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>,
1952         virtio_devices: &mut Vec<MetaVirtioDevice>,
1953         serial_pty: Option<PtyPair>,
1954         console_pty: Option<PtyPair>,
1955         console_resize_pipe: Option<File>,
1956     ) -> DeviceManagerResult<Arc<Console>> {
1957         let serial_config = self.config.lock().unwrap().serial.clone();
1958         let serial_writer: Option<Box<dyn io::Write + Send>> = match serial_config.mode {
1959             ConsoleOutputMode::File => Some(Box::new(
1960                 File::create(serial_config.file.as_ref().unwrap())
1961                     .map_err(DeviceManagerError::SerialOutputFileOpen)?,
1962             )),
1963             ConsoleOutputMode::Pty => {
1964                 if let Some(pty) = serial_pty {
1965                     self.config.lock().unwrap().serial.file = Some(pty.path.clone());
1966                     self.serial_pty = Some(Arc::new(Mutex::new(pty)));
1967                 } else {
1968                     let (main, mut sub, path) =
1969                         create_pty().map_err(DeviceManagerError::SerialPtyOpen)?;
1970                     self.set_raw_mode(&mut sub)
1971                         .map_err(DeviceManagerError::SetPtyRaw)?;
1972                     self.config.lock().unwrap().serial.file = Some(path.clone());
1973                     self.serial_pty = Some(Arc::new(Mutex::new(PtyPair { main, path })));
1974                 }
1975                 None
1976             }
1977             ConsoleOutputMode::Tty => Some(Box::new(stdout())),
1978             ConsoleOutputMode::Off | ConsoleOutputMode::Null => None,
1979         };
1980         if serial_config.mode != ConsoleOutputMode::Off {
1981             let serial = self.add_serial_device(interrupt_manager, serial_writer)?;
1982             self.serial_manager = match serial_config.mode {
1983                 ConsoleOutputMode::Pty | ConsoleOutputMode::Tty => {
1984                     let serial_manager =
1985                         SerialManager::new(serial, self.serial_pty.clone(), serial_config.mode)
1986                             .map_err(DeviceManagerError::CreateSerialManager)?;
1987                     if let Some(mut serial_manager) = serial_manager {
1988                         serial_manager
1989                             .start_thread(
1990                                 self.exit_evt
1991                                     .try_clone()
1992                                     .map_err(DeviceManagerError::EventFd)?,
1993                             )
1994                             .map_err(DeviceManagerError::SpawnSerialManager)?;
1995                         Some(Arc::new(serial_manager))
1996                     } else {
1997                         None
1998                     }
1999                 }
2000                 _ => None,
2001             };
2002         }
2003 
2004         let console_resizer =
2005             self.add_virtio_console_device(virtio_devices, console_pty, console_resize_pipe)?;
2006 
2007         Ok(Arc::new(Console { console_resizer }))
2008     }
2009 
2010     fn make_virtio_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
2011         let mut devices: Vec<MetaVirtioDevice> = Vec::new();
2012 
2013         // Create "standard" virtio devices (net/block/rng)
2014         devices.append(&mut self.make_virtio_block_devices()?);
2015         devices.append(&mut self.make_virtio_net_devices()?);
2016         devices.append(&mut self.make_virtio_rng_devices()?);
2017 
2018         // Add virtio-fs if required
2019         devices.append(&mut self.make_virtio_fs_devices()?);
2020 
2021         // Add virtio-pmem if required
2022         devices.append(&mut self.make_virtio_pmem_devices()?);
2023 
2024         // Add virtio-vsock if required
2025         devices.append(&mut self.make_virtio_vsock_devices()?);
2026 
2027         devices.append(&mut self.make_virtio_mem_devices()?);
2028 
2029         // Add virtio-balloon if required
2030         devices.append(&mut self.make_virtio_balloon_devices()?);
2031 
2032         // Add virtio-watchdog device
2033         devices.append(&mut self.make_virtio_watchdog_devices()?);
2034 
2035         // Add vDPA devices if required
2036         devices.append(&mut self.make_vdpa_devices()?);
2037 
2038         Ok(devices)
2039     }
2040 
2041     // Cache whether io_uring is supported to avoid probing for very block device
2042     fn io_uring_is_supported(&mut self) -> bool {
2043         if let Some(supported) = self.io_uring_supported {
2044             return supported;
2045         }
2046 
2047         let supported = block_io_uring_is_supported();
2048         self.io_uring_supported = Some(supported);
2049         supported
2050     }
2051 
2052     fn make_virtio_block_device(
2053         &mut self,
2054         disk_cfg: &mut DiskConfig,
2055     ) -> DeviceManagerResult<MetaVirtioDevice> {
2056         let id = if let Some(id) = &disk_cfg.id {
2057             id.clone()
2058         } else {
2059             let id = self.next_device_name(DISK_DEVICE_NAME_PREFIX)?;
2060             disk_cfg.id = Some(id.clone());
2061             id
2062         };
2063 
2064         info!("Creating virtio-block device: {:?}", disk_cfg);
2065 
2066         let (virtio_device, migratable_device) = if disk_cfg.vhost_user {
2067             let socket = disk_cfg.vhost_socket.as_ref().unwrap().clone();
2068             let vu_cfg = VhostUserConfig {
2069                 socket,
2070                 num_queues: disk_cfg.num_queues,
2071                 queue_size: disk_cfg.queue_size,
2072             };
2073             let vhost_user_block = Arc::new(Mutex::new(
2074                 match virtio_devices::vhost_user::Blk::new(
2075                     id.clone(),
2076                     vu_cfg,
2077                     self.restoring,
2078                     self.seccomp_action.clone(),
2079                     self.exit_evt
2080                         .try_clone()
2081                         .map_err(DeviceManagerError::EventFd)?,
2082                     self.force_iommu,
2083                 ) {
2084                     Ok(vub_device) => vub_device,
2085                     Err(e) => {
2086                         return Err(DeviceManagerError::CreateVhostUserBlk(e));
2087                     }
2088                 },
2089             ));
2090 
2091             (
2092                 Arc::clone(&vhost_user_block) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2093                 vhost_user_block as Arc<Mutex<dyn Migratable>>,
2094             )
2095         } else {
2096             let mut options = OpenOptions::new();
2097             options.read(true);
2098             options.write(!disk_cfg.readonly);
2099             if disk_cfg.direct {
2100                 options.custom_flags(libc::O_DIRECT);
2101             }
2102             // Open block device path
2103             let mut file: File = options
2104                 .open(
2105                     disk_cfg
2106                         .path
2107                         .as_ref()
2108                         .ok_or(DeviceManagerError::NoDiskPath)?
2109                         .clone(),
2110                 )
2111                 .map_err(DeviceManagerError::Disk)?;
2112             let image_type =
2113                 detect_image_type(&mut file).map_err(DeviceManagerError::DetectImageType)?;
2114 
2115             let image = match image_type {
2116                 ImageType::FixedVhd => {
2117                     // Use asynchronous backend relying on io_uring if the
2118                     // syscalls are supported.
2119                     if self.io_uring_is_supported() && !disk_cfg.disable_io_uring {
2120                         info!("Using asynchronous fixed VHD disk file (io_uring)");
2121                         Box::new(
2122                             FixedVhdDiskAsync::new(file)
2123                                 .map_err(DeviceManagerError::CreateFixedVhdDiskAsync)?,
2124                         ) as Box<dyn DiskFile>
2125                     } else {
2126                         info!("Using synchronous fixed VHD disk file");
2127                         Box::new(
2128                             FixedVhdDiskSync::new(file)
2129                                 .map_err(DeviceManagerError::CreateFixedVhdDiskSync)?,
2130                         ) as Box<dyn DiskFile>
2131                     }
2132                 }
2133                 ImageType::Raw => {
2134                     // Use asynchronous backend relying on io_uring if the
2135                     // syscalls are supported.
2136                     if self.io_uring_is_supported() && !disk_cfg.disable_io_uring {
2137                         info!("Using asynchronous RAW disk file (io_uring)");
2138                         Box::new(RawFileDisk::new(file)) as Box<dyn DiskFile>
2139                     } else {
2140                         info!("Using synchronous RAW disk file");
2141                         Box::new(RawFileDiskSync::new(file)) as Box<dyn DiskFile>
2142                     }
2143                 }
2144                 ImageType::Qcow2 => {
2145                     info!("Using synchronous QCOW disk file");
2146                     Box::new(
2147                         QcowDiskSync::new(file, disk_cfg.direct)
2148                             .map_err(DeviceManagerError::CreateQcowDiskSync)?,
2149                     ) as Box<dyn DiskFile>
2150                 }
2151                 ImageType::Vhdx => {
2152                     info!("Using synchronous VHDX disk file");
2153                     Box::new(
2154                         VhdxDiskSync::new(file)
2155                             .map_err(DeviceManagerError::CreateFixedVhdxDiskSync)?,
2156                     ) as Box<dyn DiskFile>
2157                 }
2158             };
2159 
2160             let virtio_block = Arc::new(Mutex::new(
2161                 virtio_devices::Block::new(
2162                     id.clone(),
2163                     image,
2164                     disk_cfg
2165                         .path
2166                         .as_ref()
2167                         .ok_or(DeviceManagerError::NoDiskPath)?
2168                         .clone(),
2169                     disk_cfg.readonly,
2170                     self.force_iommu | disk_cfg.iommu,
2171                     disk_cfg.num_queues,
2172                     disk_cfg.queue_size,
2173                     self.seccomp_action.clone(),
2174                     disk_cfg.rate_limiter_config,
2175                     self.exit_evt
2176                         .try_clone()
2177                         .map_err(DeviceManagerError::EventFd)?,
2178                 )
2179                 .map_err(DeviceManagerError::CreateVirtioBlock)?,
2180             ));
2181 
2182             (
2183                 Arc::clone(&virtio_block) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2184                 virtio_block as Arc<Mutex<dyn Migratable>>,
2185             )
2186         };
2187 
2188         // Fill the device tree with a new node. In case of restore, we
2189         // know there is nothing to do, so we can simply override the
2190         // existing entry.
2191         self.device_tree
2192             .lock()
2193             .unwrap()
2194             .insert(id.clone(), device_node!(id, migratable_device));
2195 
2196         Ok(MetaVirtioDevice {
2197             virtio_device,
2198             iommu: disk_cfg.iommu,
2199             id,
2200             pci_segment: disk_cfg.pci_segment,
2201             dma_handler: None,
2202         })
2203     }
2204 
2205     fn make_virtio_block_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
2206         let mut devices = Vec::new();
2207 
2208         let mut block_devices = self.config.lock().unwrap().disks.clone();
2209         if let Some(disk_list_cfg) = &mut block_devices {
2210             for disk_cfg in disk_list_cfg.iter_mut() {
2211                 devices.push(self.make_virtio_block_device(disk_cfg)?);
2212             }
2213         }
2214         self.config.lock().unwrap().disks = block_devices;
2215 
2216         Ok(devices)
2217     }
2218 
2219     fn make_virtio_net_device(
2220         &mut self,
2221         net_cfg: &mut NetConfig,
2222     ) -> DeviceManagerResult<MetaVirtioDevice> {
2223         let id = if let Some(id) = &net_cfg.id {
2224             id.clone()
2225         } else {
2226             let id = self.next_device_name(NET_DEVICE_NAME_PREFIX)?;
2227             net_cfg.id = Some(id.clone());
2228             id
2229         };
2230         info!("Creating virtio-net device: {:?}", net_cfg);
2231 
2232         let (virtio_device, migratable_device) = if net_cfg.vhost_user {
2233             let socket = net_cfg.vhost_socket.as_ref().unwrap().clone();
2234             let vu_cfg = VhostUserConfig {
2235                 socket,
2236                 num_queues: net_cfg.num_queues,
2237                 queue_size: net_cfg.queue_size,
2238             };
2239             let server = match net_cfg.vhost_mode {
2240                 VhostMode::Client => false,
2241                 VhostMode::Server => true,
2242             };
2243             let vhost_user_net = Arc::new(Mutex::new(
2244                 match virtio_devices::vhost_user::Net::new(
2245                     id.clone(),
2246                     net_cfg.mac,
2247                     net_cfg.mtu,
2248                     vu_cfg,
2249                     server,
2250                     self.seccomp_action.clone(),
2251                     self.restoring,
2252                     self.exit_evt
2253                         .try_clone()
2254                         .map_err(DeviceManagerError::EventFd)?,
2255                     self.force_iommu,
2256                 ) {
2257                     Ok(vun_device) => vun_device,
2258                     Err(e) => {
2259                         return Err(DeviceManagerError::CreateVhostUserNet(e));
2260                     }
2261                 },
2262             ));
2263 
2264             (
2265                 Arc::clone(&vhost_user_net) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2266                 vhost_user_net as Arc<Mutex<dyn Migratable>>,
2267             )
2268         } else {
2269             let virtio_net = if let Some(ref tap_if_name) = net_cfg.tap {
2270                 Arc::new(Mutex::new(
2271                     virtio_devices::Net::new(
2272                         id.clone(),
2273                         Some(tap_if_name),
2274                         None,
2275                         None,
2276                         Some(net_cfg.mac),
2277                         &mut net_cfg.host_mac,
2278                         net_cfg.mtu,
2279                         self.force_iommu | net_cfg.iommu,
2280                         net_cfg.num_queues,
2281                         net_cfg.queue_size,
2282                         self.seccomp_action.clone(),
2283                         net_cfg.rate_limiter_config,
2284                         self.exit_evt
2285                             .try_clone()
2286                             .map_err(DeviceManagerError::EventFd)?,
2287                     )
2288                     .map_err(DeviceManagerError::CreateVirtioNet)?,
2289                 ))
2290             } else if let Some(fds) = &net_cfg.fds {
2291                 Arc::new(Mutex::new(
2292                     virtio_devices::Net::from_tap_fds(
2293                         id.clone(),
2294                         fds,
2295                         Some(net_cfg.mac),
2296                         net_cfg.mtu,
2297                         self.force_iommu | net_cfg.iommu,
2298                         net_cfg.queue_size,
2299                         self.seccomp_action.clone(),
2300                         net_cfg.rate_limiter_config,
2301                         self.exit_evt
2302                             .try_clone()
2303                             .map_err(DeviceManagerError::EventFd)?,
2304                     )
2305                     .map_err(DeviceManagerError::CreateVirtioNet)?,
2306                 ))
2307             } else {
2308                 Arc::new(Mutex::new(
2309                     virtio_devices::Net::new(
2310                         id.clone(),
2311                         None,
2312                         Some(net_cfg.ip),
2313                         Some(net_cfg.mask),
2314                         Some(net_cfg.mac),
2315                         &mut net_cfg.host_mac,
2316                         net_cfg.mtu,
2317                         self.force_iommu | net_cfg.iommu,
2318                         net_cfg.num_queues,
2319                         net_cfg.queue_size,
2320                         self.seccomp_action.clone(),
2321                         net_cfg.rate_limiter_config,
2322                         self.exit_evt
2323                             .try_clone()
2324                             .map_err(DeviceManagerError::EventFd)?,
2325                     )
2326                     .map_err(DeviceManagerError::CreateVirtioNet)?,
2327                 ))
2328             };
2329 
2330             (
2331                 Arc::clone(&virtio_net) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2332                 virtio_net as Arc<Mutex<dyn Migratable>>,
2333             )
2334         };
2335 
2336         // Fill the device tree with a new node. In case of restore, we
2337         // know there is nothing to do, so we can simply override the
2338         // existing entry.
2339         self.device_tree
2340             .lock()
2341             .unwrap()
2342             .insert(id.clone(), device_node!(id, migratable_device));
2343 
2344         Ok(MetaVirtioDevice {
2345             virtio_device,
2346             iommu: net_cfg.iommu,
2347             id,
2348             pci_segment: net_cfg.pci_segment,
2349             dma_handler: None,
2350         })
2351     }
2352 
2353     /// Add virto-net and vhost-user-net devices
2354     fn make_virtio_net_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
2355         let mut devices = Vec::new();
2356         let mut net_devices = self.config.lock().unwrap().net.clone();
2357         if let Some(net_list_cfg) = &mut net_devices {
2358             for net_cfg in net_list_cfg.iter_mut() {
2359                 devices.push(self.make_virtio_net_device(net_cfg)?);
2360             }
2361         }
2362         self.config.lock().unwrap().net = net_devices;
2363 
2364         Ok(devices)
2365     }
2366 
2367     fn make_virtio_rng_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
2368         let mut devices = Vec::new();
2369 
2370         // Add virtio-rng if required
2371         let rng_config = self.config.lock().unwrap().rng.clone();
2372         if let Some(rng_path) = rng_config.src.to_str() {
2373             info!("Creating virtio-rng device: {:?}", rng_config);
2374             let id = String::from(RNG_DEVICE_NAME);
2375 
2376             let virtio_rng_device = Arc::new(Mutex::new(
2377                 virtio_devices::Rng::new(
2378                     id.clone(),
2379                     rng_path,
2380                     self.force_iommu | rng_config.iommu,
2381                     self.seccomp_action.clone(),
2382                     self.exit_evt
2383                         .try_clone()
2384                         .map_err(DeviceManagerError::EventFd)?,
2385                 )
2386                 .map_err(DeviceManagerError::CreateVirtioRng)?,
2387             ));
2388             devices.push(MetaVirtioDevice {
2389                 virtio_device: Arc::clone(&virtio_rng_device)
2390                     as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2391                 iommu: rng_config.iommu,
2392                 id: id.clone(),
2393                 pci_segment: 0,
2394                 dma_handler: None,
2395             });
2396 
2397             // Fill the device tree with a new node. In case of restore, we
2398             // know there is nothing to do, so we can simply override the
2399             // existing entry.
2400             self.device_tree
2401                 .lock()
2402                 .unwrap()
2403                 .insert(id.clone(), device_node!(id, virtio_rng_device));
2404         }
2405 
2406         Ok(devices)
2407     }
2408 
2409     fn make_virtio_fs_device(
2410         &mut self,
2411         fs_cfg: &mut FsConfig,
2412     ) -> DeviceManagerResult<MetaVirtioDevice> {
2413         let id = if let Some(id) = &fs_cfg.id {
2414             id.clone()
2415         } else {
2416             let id = self.next_device_name(FS_DEVICE_NAME_PREFIX)?;
2417             fs_cfg.id = Some(id.clone());
2418             id
2419         };
2420 
2421         info!("Creating virtio-fs device: {:?}", fs_cfg);
2422 
2423         let mut node = device_node!(id);
2424 
2425         if let Some(fs_socket) = fs_cfg.socket.to_str() {
2426             let virtio_fs_device = Arc::new(Mutex::new(
2427                 virtio_devices::vhost_user::Fs::new(
2428                     id.clone(),
2429                     fs_socket,
2430                     &fs_cfg.tag,
2431                     fs_cfg.num_queues,
2432                     fs_cfg.queue_size,
2433                     None,
2434                     self.seccomp_action.clone(),
2435                     self.restoring,
2436                     self.exit_evt
2437                         .try_clone()
2438                         .map_err(DeviceManagerError::EventFd)?,
2439                     self.force_iommu,
2440                 )
2441                 .map_err(DeviceManagerError::CreateVirtioFs)?,
2442             ));
2443 
2444             // Update the device tree with the migratable device.
2445             node.migratable = Some(Arc::clone(&virtio_fs_device) as Arc<Mutex<dyn Migratable>>);
2446             self.device_tree.lock().unwrap().insert(id.clone(), node);
2447 
2448             Ok(MetaVirtioDevice {
2449                 virtio_device: Arc::clone(&virtio_fs_device)
2450                     as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2451                 iommu: false,
2452                 id,
2453                 pci_segment: fs_cfg.pci_segment,
2454                 dma_handler: None,
2455             })
2456         } else {
2457             Err(DeviceManagerError::NoVirtioFsSock)
2458         }
2459     }
2460 
2461     fn make_virtio_fs_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
2462         let mut devices = Vec::new();
2463 
2464         let mut fs_devices = self.config.lock().unwrap().fs.clone();
2465         if let Some(fs_list_cfg) = &mut fs_devices {
2466             for fs_cfg in fs_list_cfg.iter_mut() {
2467                 devices.push(self.make_virtio_fs_device(fs_cfg)?);
2468             }
2469         }
2470         self.config.lock().unwrap().fs = fs_devices;
2471 
2472         Ok(devices)
2473     }
2474 
2475     fn make_virtio_pmem_device(
2476         &mut self,
2477         pmem_cfg: &mut PmemConfig,
2478     ) -> DeviceManagerResult<MetaVirtioDevice> {
2479         let id = if let Some(id) = &pmem_cfg.id {
2480             id.clone()
2481         } else {
2482             let id = self.next_device_name(PMEM_DEVICE_NAME_PREFIX)?;
2483             pmem_cfg.id = Some(id.clone());
2484             id
2485         };
2486 
2487         info!("Creating virtio-pmem device: {:?}", pmem_cfg);
2488 
2489         let mut node = device_node!(id);
2490 
2491         // Look for the id in the device tree. If it can be found, that means
2492         // the device is being restored, otherwise it's created from scratch.
2493         let region_range = if let Some(node) = self.device_tree.lock().unwrap().get(&id) {
2494             info!("Restoring virtio-pmem {} resources", id);
2495 
2496             let mut region_range: Option<(u64, u64)> = None;
2497             for resource in node.resources.iter() {
2498                 match resource {
2499                     Resource::MmioAddressRange { base, size } => {
2500                         if region_range.is_some() {
2501                             return Err(DeviceManagerError::ResourceAlreadyExists);
2502                         }
2503 
2504                         region_range = Some((*base, *size));
2505                     }
2506                     _ => {
2507                         error!("Unexpected resource {:?} for {}", resource, id);
2508                     }
2509                 }
2510             }
2511 
2512             if region_range.is_none() {
2513                 return Err(DeviceManagerError::MissingVirtioPmemResources);
2514             }
2515 
2516             region_range
2517         } else {
2518             None
2519         };
2520 
2521         let (custom_flags, set_len) = if pmem_cfg.file.is_dir() {
2522             if pmem_cfg.size.is_none() {
2523                 return Err(DeviceManagerError::PmemWithDirectorySizeMissing);
2524             }
2525             (O_TMPFILE, true)
2526         } else {
2527             (0, false)
2528         };
2529 
2530         let mut file = OpenOptions::new()
2531             .read(true)
2532             .write(!pmem_cfg.discard_writes)
2533             .custom_flags(custom_flags)
2534             .open(&pmem_cfg.file)
2535             .map_err(DeviceManagerError::PmemFileOpen)?;
2536 
2537         let size = if let Some(size) = pmem_cfg.size {
2538             if set_len {
2539                 file.set_len(size)
2540                     .map_err(DeviceManagerError::PmemFileSetLen)?;
2541             }
2542             size
2543         } else {
2544             file.seek(SeekFrom::End(0))
2545                 .map_err(DeviceManagerError::PmemFileSetLen)?
2546         };
2547 
2548         if size % 0x20_0000 != 0 {
2549             return Err(DeviceManagerError::PmemSizeNotAligned);
2550         }
2551 
2552         let (region_base, region_size) = if let Some((base, size)) = region_range {
2553             // The memory needs to be 2MiB aligned in order to support
2554             // hugepages.
2555             self.pci_segments[pmem_cfg.pci_segment as usize]
2556                 .allocator
2557                 .lock()
2558                 .unwrap()
2559                 .allocate(
2560                     Some(GuestAddress(base)),
2561                     size as GuestUsize,
2562                     Some(0x0020_0000),
2563                 )
2564                 .ok_or(DeviceManagerError::PmemRangeAllocation)?;
2565 
2566             (base, size)
2567         } else {
2568             // The memory needs to be 2MiB aligned in order to support
2569             // hugepages.
2570             let base = self.pci_segments[pmem_cfg.pci_segment as usize]
2571                 .allocator
2572                 .lock()
2573                 .unwrap()
2574                 .allocate(None, size as GuestUsize, Some(0x0020_0000))
2575                 .ok_or(DeviceManagerError::PmemRangeAllocation)?;
2576 
2577             (base.raw_value(), size)
2578         };
2579 
2580         let cloned_file = file.try_clone().map_err(DeviceManagerError::CloneFile)?;
2581         let mmap_region = MmapRegion::build(
2582             Some(FileOffset::new(cloned_file, 0)),
2583             region_size as usize,
2584             PROT_READ | PROT_WRITE,
2585             MAP_NORESERVE
2586                 | if pmem_cfg.discard_writes {
2587                     MAP_PRIVATE
2588                 } else {
2589                     MAP_SHARED
2590                 },
2591         )
2592         .map_err(DeviceManagerError::NewMmapRegion)?;
2593         let host_addr: u64 = mmap_region.as_ptr() as u64;
2594 
2595         let mem_slot = self
2596             .memory_manager
2597             .lock()
2598             .unwrap()
2599             .create_userspace_mapping(region_base, region_size, host_addr, false, false, false)
2600             .map_err(DeviceManagerError::MemoryManager)?;
2601 
2602         let mapping = virtio_devices::UserspaceMapping {
2603             host_addr,
2604             mem_slot,
2605             addr: GuestAddress(region_base),
2606             len: region_size,
2607             mergeable: false,
2608         };
2609 
2610         let virtio_pmem_device = Arc::new(Mutex::new(
2611             virtio_devices::Pmem::new(
2612                 id.clone(),
2613                 file,
2614                 GuestAddress(region_base),
2615                 mapping,
2616                 mmap_region,
2617                 self.force_iommu | pmem_cfg.iommu,
2618                 self.seccomp_action.clone(),
2619                 self.exit_evt
2620                     .try_clone()
2621                     .map_err(DeviceManagerError::EventFd)?,
2622             )
2623             .map_err(DeviceManagerError::CreateVirtioPmem)?,
2624         ));
2625 
2626         // Update the device tree with correct resource information and with
2627         // the migratable device.
2628         node.resources.push(Resource::MmioAddressRange {
2629             base: region_base,
2630             size: region_size,
2631         });
2632         node.migratable = Some(Arc::clone(&virtio_pmem_device) as Arc<Mutex<dyn Migratable>>);
2633         self.device_tree.lock().unwrap().insert(id.clone(), node);
2634 
2635         Ok(MetaVirtioDevice {
2636             virtio_device: Arc::clone(&virtio_pmem_device)
2637                 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2638             iommu: pmem_cfg.iommu,
2639             id,
2640             pci_segment: pmem_cfg.pci_segment,
2641             dma_handler: None,
2642         })
2643     }
2644 
2645     fn make_virtio_pmem_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
2646         let mut devices = Vec::new();
2647         // Add virtio-pmem if required
2648         let mut pmem_devices = self.config.lock().unwrap().pmem.clone();
2649         if let Some(pmem_list_cfg) = &mut pmem_devices {
2650             for pmem_cfg in pmem_list_cfg.iter_mut() {
2651                 devices.push(self.make_virtio_pmem_device(pmem_cfg)?);
2652             }
2653         }
2654         self.config.lock().unwrap().pmem = pmem_devices;
2655 
2656         Ok(devices)
2657     }
2658 
2659     fn make_virtio_vsock_device(
2660         &mut self,
2661         vsock_cfg: &mut VsockConfig,
2662     ) -> DeviceManagerResult<MetaVirtioDevice> {
2663         let id = if let Some(id) = &vsock_cfg.id {
2664             id.clone()
2665         } else {
2666             let id = self.next_device_name(VSOCK_DEVICE_NAME_PREFIX)?;
2667             vsock_cfg.id = Some(id.clone());
2668             id
2669         };
2670 
2671         info!("Creating virtio-vsock device: {:?}", vsock_cfg);
2672 
2673         let socket_path = vsock_cfg
2674             .socket
2675             .to_str()
2676             .ok_or(DeviceManagerError::CreateVsockConvertPath)?;
2677         let backend =
2678             virtio_devices::vsock::VsockUnixBackend::new(vsock_cfg.cid, socket_path.to_string())
2679                 .map_err(DeviceManagerError::CreateVsockBackend)?;
2680 
2681         let vsock_device = Arc::new(Mutex::new(
2682             virtio_devices::Vsock::new(
2683                 id.clone(),
2684                 vsock_cfg.cid,
2685                 vsock_cfg.socket.clone(),
2686                 backend,
2687                 self.force_iommu | vsock_cfg.iommu,
2688                 self.seccomp_action.clone(),
2689                 self.exit_evt
2690                     .try_clone()
2691                     .map_err(DeviceManagerError::EventFd)?,
2692             )
2693             .map_err(DeviceManagerError::CreateVirtioVsock)?,
2694         ));
2695 
2696         // Fill the device tree with a new node. In case of restore, we
2697         // know there is nothing to do, so we can simply override the
2698         // existing entry.
2699         self.device_tree
2700             .lock()
2701             .unwrap()
2702             .insert(id.clone(), device_node!(id, vsock_device));
2703 
2704         Ok(MetaVirtioDevice {
2705             virtio_device: Arc::clone(&vsock_device)
2706                 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2707             iommu: vsock_cfg.iommu,
2708             id,
2709             pci_segment: vsock_cfg.pci_segment,
2710             dma_handler: None,
2711         })
2712     }
2713 
2714     fn make_virtio_vsock_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
2715         let mut devices = Vec::new();
2716 
2717         let mut vsock = self.config.lock().unwrap().vsock.clone();
2718         if let Some(ref mut vsock_cfg) = &mut vsock {
2719             devices.push(self.make_virtio_vsock_device(vsock_cfg)?);
2720         }
2721         self.config.lock().unwrap().vsock = vsock;
2722 
2723         Ok(devices)
2724     }
2725 
2726     fn make_virtio_mem_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
2727         let mut devices = Vec::new();
2728 
2729         let mm = self.memory_manager.clone();
2730         let mut mm = mm.lock().unwrap();
2731         for (memory_zone_id, memory_zone) in mm.memory_zones_mut().iter_mut() {
2732             if let Some(virtio_mem_zone) = memory_zone.virtio_mem_zone_mut() {
2733                 info!("Creating virtio-mem device: id = {}", memory_zone_id);
2734 
2735                 let node_id = numa_node_id_from_memory_zone_id(&self.numa_nodes, memory_zone_id)
2736                     .map(|i| i as u16);
2737 
2738                 let virtio_mem_device = Arc::new(Mutex::new(
2739                     virtio_devices::Mem::new(
2740                         memory_zone_id.clone(),
2741                         virtio_mem_zone.region(),
2742                         self.seccomp_action.clone(),
2743                         node_id,
2744                         virtio_mem_zone.hotplugged_size(),
2745                         virtio_mem_zone.hugepages(),
2746                         self.exit_evt
2747                             .try_clone()
2748                             .map_err(DeviceManagerError::EventFd)?,
2749                         virtio_mem_zone.blocks_state().clone(),
2750                     )
2751                     .map_err(DeviceManagerError::CreateVirtioMem)?,
2752                 ));
2753 
2754                 // Update the virtio-mem zone so that it has a handle onto the
2755                 // virtio-mem device, which will be used for triggering a resize
2756                 // if needed.
2757                 virtio_mem_zone.set_virtio_device(Arc::clone(&virtio_mem_device));
2758 
2759                 self.virtio_mem_devices.push(Arc::clone(&virtio_mem_device));
2760 
2761                 devices.push(MetaVirtioDevice {
2762                     virtio_device: Arc::clone(&virtio_mem_device)
2763                         as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2764                     iommu: false,
2765                     id: memory_zone_id.clone(),
2766                     pci_segment: 0,
2767                     dma_handler: None,
2768                 });
2769 
2770                 // Fill the device tree with a new node. In case of restore, we
2771                 // know there is nothing to do, so we can simply override the
2772                 // existing entry.
2773                 self.device_tree.lock().unwrap().insert(
2774                     memory_zone_id.clone(),
2775                     device_node!(memory_zone_id, virtio_mem_device),
2776                 );
2777             }
2778         }
2779 
2780         Ok(devices)
2781     }
2782 
2783     fn make_virtio_balloon_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
2784         let mut devices = Vec::new();
2785 
2786         if let Some(balloon_config) = &self.config.lock().unwrap().balloon {
2787             let id = String::from(BALLOON_DEVICE_NAME);
2788             info!("Creating virtio-balloon device: id = {}", id);
2789 
2790             let virtio_balloon_device = Arc::new(Mutex::new(
2791                 virtio_devices::Balloon::new(
2792                     id.clone(),
2793                     balloon_config.size,
2794                     balloon_config.deflate_on_oom,
2795                     balloon_config.free_page_reporting,
2796                     self.seccomp_action.clone(),
2797                     self.exit_evt
2798                         .try_clone()
2799                         .map_err(DeviceManagerError::EventFd)?,
2800                 )
2801                 .map_err(DeviceManagerError::CreateVirtioBalloon)?,
2802             ));
2803 
2804             self.balloon = Some(virtio_balloon_device.clone());
2805 
2806             devices.push(MetaVirtioDevice {
2807                 virtio_device: Arc::clone(&virtio_balloon_device)
2808                     as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2809                 iommu: false,
2810                 id: id.clone(),
2811                 pci_segment: 0,
2812                 dma_handler: None,
2813             });
2814 
2815             self.device_tree
2816                 .lock()
2817                 .unwrap()
2818                 .insert(id.clone(), device_node!(id, virtio_balloon_device));
2819         }
2820 
2821         Ok(devices)
2822     }
2823 
2824     fn make_virtio_watchdog_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
2825         let mut devices = Vec::new();
2826 
2827         if !self.config.lock().unwrap().watchdog {
2828             return Ok(devices);
2829         }
2830 
2831         let id = String::from(WATCHDOG_DEVICE_NAME);
2832         info!("Creating virtio-watchdog device: id = {}", id);
2833 
2834         let virtio_watchdog_device = Arc::new(Mutex::new(
2835             virtio_devices::Watchdog::new(
2836                 id.clone(),
2837                 self.reset_evt.try_clone().unwrap(),
2838                 self.seccomp_action.clone(),
2839                 self.exit_evt
2840                     .try_clone()
2841                     .map_err(DeviceManagerError::EventFd)?,
2842             )
2843             .map_err(DeviceManagerError::CreateVirtioWatchdog)?,
2844         ));
2845         devices.push(MetaVirtioDevice {
2846             virtio_device: Arc::clone(&virtio_watchdog_device)
2847                 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2848             iommu: false,
2849             id: id.clone(),
2850             pci_segment: 0,
2851             dma_handler: None,
2852         });
2853 
2854         self.device_tree
2855             .lock()
2856             .unwrap()
2857             .insert(id.clone(), device_node!(id, virtio_watchdog_device));
2858 
2859         Ok(devices)
2860     }
2861 
2862     fn make_vdpa_device(
2863         &mut self,
2864         vdpa_cfg: &mut VdpaConfig,
2865     ) -> DeviceManagerResult<MetaVirtioDevice> {
2866         let id = if let Some(id) = &vdpa_cfg.id {
2867             id.clone()
2868         } else {
2869             let id = self.next_device_name(VDPA_DEVICE_NAME_PREFIX)?;
2870             vdpa_cfg.id = Some(id.clone());
2871             id
2872         };
2873 
2874         info!("Creating vDPA device: {:?}", vdpa_cfg);
2875 
2876         let device_path = vdpa_cfg
2877             .path
2878             .to_str()
2879             .ok_or(DeviceManagerError::CreateVdpaConvertPath)?;
2880 
2881         let vdpa_device = Arc::new(Mutex::new(
2882             virtio_devices::Vdpa::new(
2883                 id.clone(),
2884                 device_path,
2885                 self.memory_manager.lock().unwrap().guest_memory(),
2886                 vdpa_cfg.num_queues as u16,
2887             )
2888             .map_err(DeviceManagerError::CreateVdpa)?,
2889         ));
2890 
2891         // Create the DMA handler that is required by the vDPA device
2892         let vdpa_mapping = Arc::new(VdpaDmaMapping::new(
2893             Arc::clone(&vdpa_device),
2894             Arc::new(self.memory_manager.lock().unwrap().guest_memory()),
2895         ));
2896 
2897         self.device_tree
2898             .lock()
2899             .unwrap()
2900             .insert(id.clone(), device_node!(id));
2901 
2902         Ok(MetaVirtioDevice {
2903             virtio_device: vdpa_device as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2904             iommu: vdpa_cfg.iommu,
2905             id,
2906             pci_segment: vdpa_cfg.pci_segment,
2907             dma_handler: Some(vdpa_mapping),
2908         })
2909     }
2910 
2911     fn make_vdpa_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
2912         let mut devices = Vec::new();
2913         // Add vdpa if required
2914         let mut vdpa_devices = self.config.lock().unwrap().vdpa.clone();
2915         if let Some(vdpa_list_cfg) = &mut vdpa_devices {
2916             for vdpa_cfg in vdpa_list_cfg.iter_mut() {
2917                 devices.push(self.make_vdpa_device(vdpa_cfg)?);
2918             }
2919         }
2920         self.config.lock().unwrap().vdpa = vdpa_devices;
2921 
2922         Ok(devices)
2923     }
2924 
2925     fn next_device_name(&mut self, prefix: &str) -> DeviceManagerResult<String> {
2926         let start_id = self.device_id_cnt;
2927         loop {
2928             // Generate the temporary name.
2929             let name = format!("{}{}", prefix, self.device_id_cnt);
2930             // Increment the counter.
2931             self.device_id_cnt += Wrapping(1);
2932             // Check if the name is already in use.
2933             if !self.boot_id_list.contains(&name)
2934                 && !self.device_tree.lock().unwrap().contains_key(&name)
2935             {
2936                 return Ok(name);
2937             }
2938 
2939             if self.device_id_cnt == start_id {
2940                 // We went through a full loop and there's nothing else we can
2941                 // do.
2942                 break;
2943             }
2944         }
2945         Err(DeviceManagerError::NoAvailableDeviceName)
2946     }
2947 
2948     fn add_passthrough_device(
2949         &mut self,
2950         device_cfg: &mut DeviceConfig,
2951     ) -> DeviceManagerResult<(PciBdf, String)> {
2952         // If the passthrough device has not been created yet, it is created
2953         // here and stored in the DeviceManager structure for future needs.
2954         if self.passthrough_device.is_none() {
2955             self.passthrough_device = Some(
2956                 self.address_manager
2957                     .vm
2958                     .create_passthrough_device()
2959                     .map_err(|e| DeviceManagerError::CreatePassthroughDevice(e.into()))?,
2960             );
2961         }
2962 
2963         self.add_vfio_device(device_cfg)
2964     }
2965 
2966     fn create_vfio_container(&self) -> DeviceManagerResult<Arc<VfioContainer>> {
2967         let passthrough_device = self
2968             .passthrough_device
2969             .as_ref()
2970             .ok_or(DeviceManagerError::NoDevicePassthroughSupport)?;
2971 
2972         let dup = passthrough_device
2973             .try_clone()
2974             .map_err(DeviceManagerError::VfioCreate)?;
2975 
2976         Ok(Arc::new(
2977             VfioContainer::new(Some(Arc::new(dup))).map_err(DeviceManagerError::VfioCreate)?,
2978         ))
2979     }
2980 
2981     fn add_vfio_device(
2982         &mut self,
2983         device_cfg: &mut DeviceConfig,
2984     ) -> DeviceManagerResult<(PciBdf, String)> {
2985         let vfio_name = if let Some(id) = &device_cfg.id {
2986             id.clone()
2987         } else {
2988             let id = self.next_device_name(VFIO_DEVICE_NAME_PREFIX)?;
2989             device_cfg.id = Some(id.clone());
2990             id
2991         };
2992 
2993         let (pci_segment_id, pci_device_bdf, resources) =
2994             self.pci_resources(&vfio_name, device_cfg.pci_segment)?;
2995 
2996         let mut needs_dma_mapping = false;
2997 
2998         // Here we create a new VFIO container for two reasons. Either this is
2999         // the first VFIO device, meaning we need a new VFIO container, which
3000         // will be shared with other VFIO devices. Or the new VFIO device is
3001         // attached to a vIOMMU, meaning we must create a dedicated VFIO
3002         // container. In the vIOMMU use case, we can't let all devices under
3003         // the same VFIO container since we couldn't map/unmap memory for each
3004         // device. That's simply because the map/unmap operations happen at the
3005         // VFIO container level.
3006         let vfio_container = if device_cfg.iommu {
3007             let vfio_container = self.create_vfio_container()?;
3008 
3009             let vfio_mapping = Arc::new(VfioDmaMapping::new(
3010                 Arc::clone(&vfio_container),
3011                 Arc::new(self.memory_manager.lock().unwrap().guest_memory()),
3012             ));
3013 
3014             if let Some(iommu) = &self.iommu_device {
3015                 iommu
3016                     .lock()
3017                     .unwrap()
3018                     .add_external_mapping(pci_device_bdf.into(), vfio_mapping);
3019             } else {
3020                 return Err(DeviceManagerError::MissingVirtualIommu);
3021             }
3022 
3023             vfio_container
3024         } else if let Some(vfio_container) = &self.vfio_container {
3025             Arc::clone(vfio_container)
3026         } else {
3027             let vfio_container = self.create_vfio_container()?;
3028             needs_dma_mapping = true;
3029             self.vfio_container = Some(Arc::clone(&vfio_container));
3030 
3031             vfio_container
3032         };
3033 
3034         let vfio_device = VfioDevice::new(&device_cfg.path, Arc::clone(&vfio_container))
3035             .map_err(DeviceManagerError::VfioCreate)?;
3036 
3037         if needs_dma_mapping {
3038             // Register DMA mapping in IOMMU.
3039             // Do not register virtio-mem regions, as they are handled directly by
3040             // virtio-mem device itself.
3041             for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() {
3042                 for region in zone.regions() {
3043                     vfio_container
3044                         .vfio_dma_map(
3045                             region.start_addr().raw_value(),
3046                             region.len() as u64,
3047                             region.as_ptr() as u64,
3048                         )
3049                         .map_err(DeviceManagerError::VfioDmaMap)?;
3050                 }
3051             }
3052 
3053             let vfio_mapping = Arc::new(VfioDmaMapping::new(
3054                 Arc::clone(&vfio_container),
3055                 Arc::new(self.memory_manager.lock().unwrap().guest_memory()),
3056             ));
3057 
3058             for virtio_mem_device in self.virtio_mem_devices.iter() {
3059                 virtio_mem_device
3060                     .lock()
3061                     .unwrap()
3062                     .add_dma_mapping_handler(
3063                         VirtioMemMappingSource::Container,
3064                         vfio_mapping.clone(),
3065                     )
3066                     .map_err(DeviceManagerError::AddDmaMappingHandlerVirtioMem)?;
3067             }
3068         }
3069 
3070         let legacy_interrupt_group =
3071             if let Some(legacy_interrupt_manager) = &self.legacy_interrupt_manager {
3072                 Some(
3073                     legacy_interrupt_manager
3074                         .create_group(LegacyIrqGroupConfig {
3075                             irq: self.pci_segments[pci_segment_id as usize].pci_irq_slots
3076                                 [pci_device_bdf.device() as usize]
3077                                 as InterruptIndex,
3078                         })
3079                         .map_err(DeviceManagerError::CreateInterruptGroup)?,
3080                 )
3081             } else {
3082                 None
3083             };
3084 
3085         let memory_manager = self.memory_manager.clone();
3086 
3087         let vfio_pci_device = VfioPciDevice::new(
3088             vfio_name.clone(),
3089             &self.address_manager.vm,
3090             vfio_device,
3091             vfio_container,
3092             self.msi_interrupt_manager.clone(),
3093             legacy_interrupt_group,
3094             device_cfg.iommu,
3095             pci_device_bdf,
3096             self.restoring,
3097             Arc::new(move || memory_manager.lock().unwrap().allocate_memory_slot()),
3098         )
3099         .map_err(DeviceManagerError::VfioPciCreate)?;
3100 
3101         let vfio_pci_device = Arc::new(Mutex::new(vfio_pci_device));
3102 
3103         let new_resources = self.add_pci_device(
3104             vfio_pci_device.clone(),
3105             vfio_pci_device.clone(),
3106             pci_segment_id,
3107             pci_device_bdf,
3108             resources,
3109         )?;
3110 
3111         // When restoring a VM, the restore codepath will take care of mapping
3112         // the MMIO regions based on the information from the snapshot.
3113         if !self.restoring {
3114             vfio_pci_device
3115                 .lock()
3116                 .unwrap()
3117                 .map_mmio_regions()
3118                 .map_err(DeviceManagerError::VfioMapRegion)?;
3119         }
3120 
3121         let mut node = device_node!(vfio_name, vfio_pci_device);
3122 
3123         // Update the device tree with correct resource information.
3124         node.resources = new_resources;
3125         node.pci_bdf = Some(pci_device_bdf);
3126         node.pci_device_handle = Some(PciDeviceHandle::Vfio(vfio_pci_device));
3127 
3128         self.device_tree
3129             .lock()
3130             .unwrap()
3131             .insert(vfio_name.clone(), node);
3132 
3133         Ok((pci_device_bdf, vfio_name))
3134     }
3135 
3136     fn add_pci_device(
3137         &mut self,
3138         bus_device: Arc<Mutex<dyn BusDevice>>,
3139         pci_device: Arc<Mutex<dyn PciDevice>>,
3140         segment_id: u16,
3141         bdf: PciBdf,
3142         resources: Option<Vec<Resource>>,
3143     ) -> DeviceManagerResult<Vec<Resource>> {
3144         let bars = pci_device
3145             .lock()
3146             .unwrap()
3147             .allocate_bars(
3148                 &self.address_manager.allocator,
3149                 &mut self.pci_segments[segment_id as usize]
3150                     .allocator
3151                     .lock()
3152                     .unwrap(),
3153                 resources,
3154             )
3155             .map_err(DeviceManagerError::AllocateBars)?;
3156 
3157         let mut pci_bus = self.pci_segments[segment_id as usize]
3158             .pci_bus
3159             .lock()
3160             .unwrap();
3161 
3162         pci_bus
3163             .add_device(bdf.device() as u32, pci_device)
3164             .map_err(DeviceManagerError::AddPciDevice)?;
3165 
3166         self.bus_devices.push(Arc::clone(&bus_device));
3167 
3168         pci_bus
3169             .register_mapping(
3170                 bus_device,
3171                 #[cfg(target_arch = "x86_64")]
3172                 self.address_manager.io_bus.as_ref(),
3173                 self.address_manager.mmio_bus.as_ref(),
3174                 bars.clone(),
3175             )
3176             .map_err(DeviceManagerError::AddPciDevice)?;
3177 
3178         let mut new_resources = Vec::new();
3179         for bar in bars {
3180             new_resources.push(Resource::PciBar {
3181                 index: bar.idx(),
3182                 base: bar.addr(),
3183                 size: bar.size(),
3184                 type_: bar.region_type().into(),
3185                 prefetchable: bar.prefetchable().into(),
3186             });
3187         }
3188 
3189         Ok(new_resources)
3190     }
3191 
3192     fn add_vfio_devices(&mut self) -> DeviceManagerResult<Vec<PciBdf>> {
3193         let mut iommu_attached_device_ids = Vec::new();
3194         let mut devices = self.config.lock().unwrap().devices.clone();
3195 
3196         if let Some(device_list_cfg) = &mut devices {
3197             for device_cfg in device_list_cfg.iter_mut() {
3198                 let (device_id, _) = self.add_passthrough_device(device_cfg)?;
3199                 if device_cfg.iommu && self.iommu_device.is_some() {
3200                     iommu_attached_device_ids.push(device_id);
3201                 }
3202             }
3203         }
3204 
3205         // Update the list of devices
3206         self.config.lock().unwrap().devices = devices;
3207 
3208         Ok(iommu_attached_device_ids)
3209     }
3210 
3211     fn add_vfio_user_device(
3212         &mut self,
3213         device_cfg: &mut UserDeviceConfig,
3214     ) -> DeviceManagerResult<(PciBdf, String)> {
3215         let vfio_user_name = if let Some(id) = &device_cfg.id {
3216             id.clone()
3217         } else {
3218             let id = self.next_device_name(VFIO_USER_DEVICE_NAME_PREFIX)?;
3219             device_cfg.id = Some(id.clone());
3220             id
3221         };
3222 
3223         let (pci_segment_id, pci_device_bdf, resources) =
3224             self.pci_resources(&vfio_user_name, device_cfg.pci_segment)?;
3225 
3226         let legacy_interrupt_group =
3227             if let Some(legacy_interrupt_manager) = &self.legacy_interrupt_manager {
3228                 Some(
3229                     legacy_interrupt_manager
3230                         .create_group(LegacyIrqGroupConfig {
3231                             irq: self.pci_segments[pci_segment_id as usize].pci_irq_slots
3232                                 [pci_device_bdf.device() as usize]
3233                                 as InterruptIndex,
3234                         })
3235                         .map_err(DeviceManagerError::CreateInterruptGroup)?,
3236                 )
3237             } else {
3238                 None
3239             };
3240 
3241         let client = Arc::new(Mutex::new(
3242             vfio_user::Client::new(&device_cfg.socket)
3243                 .map_err(DeviceManagerError::VfioUserCreateClient)?,
3244         ));
3245 
3246         let memory_manager = self.memory_manager.clone();
3247 
3248         let mut vfio_user_pci_device = VfioUserPciDevice::new(
3249             vfio_user_name.clone(),
3250             &self.address_manager.vm,
3251             client.clone(),
3252             self.msi_interrupt_manager.clone(),
3253             legacy_interrupt_group,
3254             pci_device_bdf,
3255             self.restoring,
3256             Arc::new(move || memory_manager.lock().unwrap().allocate_memory_slot()),
3257         )
3258         .map_err(DeviceManagerError::VfioUserCreate)?;
3259 
3260         let memory = self.memory_manager.lock().unwrap().guest_memory();
3261         let vfio_user_mapping = Arc::new(VfioUserDmaMapping::new(client, Arc::new(memory)));
3262         for virtio_mem_device in self.virtio_mem_devices.iter() {
3263             virtio_mem_device
3264                 .lock()
3265                 .unwrap()
3266                 .add_dma_mapping_handler(
3267                     VirtioMemMappingSource::Device(pci_device_bdf.into()),
3268                     vfio_user_mapping.clone(),
3269                 )
3270                 .map_err(DeviceManagerError::AddDmaMappingHandlerVirtioMem)?;
3271         }
3272 
3273         for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() {
3274             for region in zone.regions() {
3275                 vfio_user_pci_device
3276                     .dma_map(region)
3277                     .map_err(DeviceManagerError::VfioUserDmaMap)?;
3278             }
3279         }
3280 
3281         let vfio_user_pci_device = Arc::new(Mutex::new(vfio_user_pci_device));
3282 
3283         let new_resources = self.add_pci_device(
3284             vfio_user_pci_device.clone(),
3285             vfio_user_pci_device.clone(),
3286             pci_segment_id,
3287             pci_device_bdf,
3288             resources,
3289         )?;
3290 
3291         // When restoring a VM, the restore codepath will take care of mapping
3292         // the MMIO regions based on the information from the snapshot.
3293         if !self.restoring {
3294             // Note it is required to call 'add_pci_device()' in advance to have the list of
3295             // mmio regions provisioned correctly
3296             vfio_user_pci_device
3297                 .lock()
3298                 .unwrap()
3299                 .map_mmio_regions()
3300                 .map_err(DeviceManagerError::VfioUserMapRegion)?;
3301         }
3302 
3303         let mut node = device_node!(vfio_user_name, vfio_user_pci_device);
3304 
3305         // Update the device tree with correct resource information.
3306         node.resources = new_resources;
3307         node.pci_bdf = Some(pci_device_bdf);
3308         node.pci_device_handle = Some(PciDeviceHandle::VfioUser(vfio_user_pci_device));
3309 
3310         self.device_tree
3311             .lock()
3312             .unwrap()
3313             .insert(vfio_user_name.clone(), node);
3314 
3315         Ok((pci_device_bdf, vfio_user_name))
3316     }
3317 
3318     fn add_user_devices(&mut self) -> DeviceManagerResult<Vec<PciBdf>> {
3319         let mut user_devices = self.config.lock().unwrap().user_devices.clone();
3320 
3321         if let Some(device_list_cfg) = &mut user_devices {
3322             for device_cfg in device_list_cfg.iter_mut() {
3323                 let (_device_id, _id) = self.add_vfio_user_device(device_cfg)?;
3324             }
3325         }
3326 
3327         // Update the list of devices
3328         self.config.lock().unwrap().user_devices = user_devices;
3329 
3330         Ok(vec![])
3331     }
3332 
3333     fn add_virtio_pci_device(
3334         &mut self,
3335         virtio_device: Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
3336         iommu_mapping: &Option<Arc<IommuMapping>>,
3337         virtio_device_id: String,
3338         pci_segment_id: u16,
3339         dma_handler: Option<Arc<dyn ExternalDmaMapping>>,
3340     ) -> DeviceManagerResult<PciBdf> {
3341         let id = format!("{}-{}", VIRTIO_PCI_DEVICE_NAME_PREFIX, virtio_device_id);
3342 
3343         // Add the new virtio-pci node to the device tree.
3344         let mut node = device_node!(id);
3345         node.children = vec![virtio_device_id.clone()];
3346 
3347         let (pci_segment_id, pci_device_bdf, resources) =
3348             self.pci_resources(&id, pci_segment_id)?;
3349 
3350         // Update the existing virtio node by setting the parent.
3351         if let Some(node) = self.device_tree.lock().unwrap().get_mut(&virtio_device_id) {
3352             node.parent = Some(id.clone());
3353         } else {
3354             return Err(DeviceManagerError::MissingNode);
3355         }
3356 
3357         // Allows support for one MSI-X vector per queue. It also adds 1
3358         // as we need to take into account the dedicated vector to notify
3359         // about a virtio config change.
3360         let msix_num = (virtio_device.lock().unwrap().queue_max_sizes().len() + 1) as u16;
3361 
3362         // Create the AccessPlatform trait from the implementation IommuMapping.
3363         // This will provide address translation for any virtio device sitting
3364         // behind a vIOMMU.
3365         let access_platform: Option<Arc<dyn AccessPlatform>> = if let Some(mapping) = iommu_mapping
3366         {
3367             Some(Arc::new(AccessPlatformMapping::new(
3368                 pci_device_bdf.into(),
3369                 mapping.clone(),
3370             )))
3371         } else {
3372             None
3373         };
3374 
3375         let memory = self.memory_manager.lock().unwrap().guest_memory();
3376 
3377         // Map DMA ranges if a DMA handler is available and if the device is
3378         // not attached to a virtual IOMMU.
3379         if let Some(dma_handler) = &dma_handler {
3380             if iommu_mapping.is_some() {
3381                 if let Some(iommu) = &self.iommu_device {
3382                     iommu
3383                         .lock()
3384                         .unwrap()
3385                         .add_external_mapping(pci_device_bdf.into(), dma_handler.clone());
3386                 } else {
3387                     return Err(DeviceManagerError::MissingVirtualIommu);
3388                 }
3389             } else {
3390                 // Let every virtio-mem device handle the DMA map/unmap through the
3391                 // DMA handler provided.
3392                 for virtio_mem_device in self.virtio_mem_devices.iter() {
3393                     virtio_mem_device
3394                         .lock()
3395                         .unwrap()
3396                         .add_dma_mapping_handler(
3397                             VirtioMemMappingSource::Device(pci_device_bdf.into()),
3398                             dma_handler.clone(),
3399                         )
3400                         .map_err(DeviceManagerError::AddDmaMappingHandlerVirtioMem)?;
3401                 }
3402 
3403                 // Do not register virtio-mem regions, as they are handled directly by
3404                 // virtio-mem devices.
3405                 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() {
3406                     for region in zone.regions() {
3407                         let gpa = region.start_addr().0;
3408                         let size = region.len();
3409                         dma_handler
3410                             .map(gpa, gpa, size)
3411                             .map_err(DeviceManagerError::VirtioDmaMap)?;
3412                     }
3413                 }
3414             }
3415         }
3416 
3417         let device_type = virtio_device.lock().unwrap().device_type();
3418         let virtio_pci_device = Arc::new(Mutex::new(
3419             VirtioPciDevice::new(
3420                 id.clone(),
3421                 memory,
3422                 virtio_device,
3423                 msix_num,
3424                 access_platform,
3425                 &self.msi_interrupt_manager,
3426                 pci_device_bdf.into(),
3427                 self.activate_evt
3428                     .try_clone()
3429                     .map_err(DeviceManagerError::EventFd)?,
3430                 // All device types *except* virtio block devices should be allocated a 64-bit bar
3431                 // The block devices should be given a 32-bit BAR so that they are easily accessible
3432                 // to firmware without requiring excessive identity mapping.
3433                 // The exception being if not on the default PCI segment.
3434                 pci_segment_id > 0 || device_type != VirtioDeviceType::Block as u32,
3435                 dma_handler,
3436                 self.pending_activations.clone(),
3437             )
3438             .map_err(DeviceManagerError::VirtioDevice)?,
3439         ));
3440 
3441         let new_resources = self.add_pci_device(
3442             virtio_pci_device.clone(),
3443             virtio_pci_device.clone(),
3444             pci_segment_id,
3445             pci_device_bdf,
3446             resources,
3447         )?;
3448 
3449         let bar_addr = virtio_pci_device.lock().unwrap().config_bar_addr();
3450         for (event, addr) in virtio_pci_device.lock().unwrap().ioeventfds(bar_addr) {
3451             let io_addr = IoEventAddress::Mmio(addr);
3452             self.address_manager
3453                 .vm
3454                 .register_ioevent(event, &io_addr, None)
3455                 .map_err(|e| DeviceManagerError::RegisterIoevent(e.into()))?;
3456         }
3457 
3458         // Update the device tree with correct resource information.
3459         node.resources = new_resources;
3460         node.migratable = Some(Arc::clone(&virtio_pci_device) as Arc<Mutex<dyn Migratable>>);
3461         node.pci_bdf = Some(pci_device_bdf);
3462         node.pci_device_handle = Some(PciDeviceHandle::Virtio(virtio_pci_device));
3463         self.device_tree.lock().unwrap().insert(id, node);
3464 
3465         Ok(pci_device_bdf)
3466     }
3467 
3468     fn pci_resources(
3469         &self,
3470         id: &str,
3471         pci_segment_id: u16,
3472     ) -> DeviceManagerResult<(u16, PciBdf, Option<Vec<Resource>>)> {
3473         // Look for the id in the device tree. If it can be found, that means
3474         // the device is being restored, otherwise it's created from scratch.
3475         Ok(
3476             if let Some(node) = self.device_tree.lock().unwrap().get(id) {
3477                 info!("Restoring virtio-pci {} resources", id);
3478                 let pci_device_bdf: PciBdf = node
3479                     .pci_bdf
3480                     .ok_or(DeviceManagerError::MissingDeviceNodePciBdf)?;
3481                 let pci_segment_id = pci_device_bdf.segment();
3482 
3483                 self.pci_segments[pci_segment_id as usize]
3484                     .pci_bus
3485                     .lock()
3486                     .unwrap()
3487                     .get_device_id(pci_device_bdf.device() as usize)
3488                     .map_err(DeviceManagerError::GetPciDeviceId)?;
3489 
3490                 (pci_segment_id, pci_device_bdf, Some(node.resources.clone()))
3491             } else {
3492                 let pci_device_bdf =
3493                     self.pci_segments[pci_segment_id as usize].next_device_bdf()?;
3494 
3495                 (pci_segment_id, pci_device_bdf, None)
3496             },
3497         )
3498     }
3499 
3500     #[cfg(target_arch = "x86_64")]
3501     pub fn io_bus(&self) -> &Arc<Bus> {
3502         &self.address_manager.io_bus
3503     }
3504 
3505     pub fn mmio_bus(&self) -> &Arc<Bus> {
3506         &self.address_manager.mmio_bus
3507     }
3508 
3509     pub fn allocator(&self) -> &Arc<Mutex<SystemAllocator>> {
3510         &self.address_manager.allocator
3511     }
3512 
3513     pub fn interrupt_controller(&self) -> Option<Arc<Mutex<dyn InterruptController>>> {
3514         self.interrupt_controller
3515             .as_ref()
3516             .map(|ic| ic.clone() as Arc<Mutex<dyn InterruptController>>)
3517     }
3518 
3519     #[cfg(target_arch = "x86_64")]
3520     // Used to provide a fast path for handling PIO exits
3521     pub fn pci_config_io(&self) -> Arc<Mutex<PciConfigIo>> {
3522         Arc::clone(self.pci_segments[0].pci_config_io.as_ref().unwrap())
3523     }
3524 
3525     pub(crate) fn pci_segments(&self) -> &Vec<PciSegment> {
3526         &self.pci_segments
3527     }
3528 
3529     pub fn console(&self) -> &Arc<Console> {
3530         &self.console
3531     }
3532 
3533     #[cfg(target_arch = "aarch64")]
3534     pub fn cmdline_additions(&self) -> &[String] {
3535         self.cmdline_additions.as_slice()
3536     }
3537 
3538     pub fn update_memory(&self, new_region: &Arc<GuestRegionMmap>) -> DeviceManagerResult<()> {
3539         for handle in self.virtio_devices.iter() {
3540             handle
3541                 .virtio_device
3542                 .lock()
3543                 .unwrap()
3544                 .add_memory_region(new_region)
3545                 .map_err(DeviceManagerError::UpdateMemoryForVirtioDevice)?;
3546 
3547             if let Some(dma_handler) = &handle.dma_handler {
3548                 if !handle.iommu {
3549                     let gpa = new_region.start_addr().0;
3550                     let size = new_region.len();
3551                     dma_handler
3552                         .map(gpa, gpa, size)
3553                         .map_err(DeviceManagerError::VirtioDmaMap)?;
3554                 }
3555             }
3556         }
3557 
3558         // Take care of updating the memory for VFIO PCI devices.
3559         if let Some(vfio_container) = &self.vfio_container {
3560             vfio_container
3561                 .vfio_dma_map(
3562                     new_region.start_addr().raw_value(),
3563                     new_region.len() as u64,
3564                     new_region.as_ptr() as u64,
3565                 )
3566                 .map_err(DeviceManagerError::UpdateMemoryForVfioPciDevice)?;
3567         }
3568 
3569         // Take care of updating the memory for vfio-user devices.
3570         {
3571             let device_tree = self.device_tree.lock().unwrap();
3572             for pci_device_node in device_tree.pci_devices() {
3573                 if let PciDeviceHandle::VfioUser(vfio_user_pci_device) = pci_device_node
3574                     .pci_device_handle
3575                     .as_ref()
3576                     .ok_or(DeviceManagerError::MissingPciDevice)?
3577                 {
3578                     vfio_user_pci_device
3579                         .lock()
3580                         .unwrap()
3581                         .dma_map(new_region)
3582                         .map_err(DeviceManagerError::UpdateMemoryForVfioUserPciDevice)?;
3583                 }
3584             }
3585         }
3586 
3587         Ok(())
3588     }
3589 
3590     pub fn activate_virtio_devices(&self) -> DeviceManagerResult<()> {
3591         for mut activator in self.pending_activations.lock().unwrap().drain(..) {
3592             activator
3593                 .activate()
3594                 .map_err(DeviceManagerError::VirtioActivate)?;
3595         }
3596         Ok(())
3597     }
3598 
3599     pub fn notify_hotplug(
3600         &self,
3601         _notification_type: AcpiNotificationFlags,
3602     ) -> DeviceManagerResult<()> {
3603         return self
3604             .ged_notification_device
3605             .as_ref()
3606             .unwrap()
3607             .lock()
3608             .unwrap()
3609             .notify(_notification_type)
3610             .map_err(DeviceManagerError::HotPlugNotification);
3611     }
3612 
3613     pub fn add_device(
3614         &mut self,
3615         device_cfg: &mut DeviceConfig,
3616     ) -> DeviceManagerResult<PciDeviceInfo> {
3617         self.validate_identifier(&device_cfg.id)?;
3618 
3619         if device_cfg.iommu && !self.is_iommu_segment(device_cfg.pci_segment) {
3620             return Err(DeviceManagerError::InvalidIommuHotplug);
3621         }
3622 
3623         let (bdf, device_name) = self.add_passthrough_device(device_cfg)?;
3624 
3625         // Update the PCIU bitmap
3626         self.pci_segments[device_cfg.pci_segment as usize].pci_devices_up |= 1 << bdf.device();
3627 
3628         Ok(PciDeviceInfo {
3629             id: device_name,
3630             bdf,
3631         })
3632     }
3633 
3634     pub fn add_user_device(
3635         &mut self,
3636         device_cfg: &mut UserDeviceConfig,
3637     ) -> DeviceManagerResult<PciDeviceInfo> {
3638         self.validate_identifier(&device_cfg.id)?;
3639 
3640         let (bdf, device_name) = self.add_vfio_user_device(device_cfg)?;
3641 
3642         // Update the PCIU bitmap
3643         self.pci_segments[device_cfg.pci_segment as usize].pci_devices_up |= 1 << bdf.device();
3644 
3645         Ok(PciDeviceInfo {
3646             id: device_name,
3647             bdf,
3648         })
3649     }
3650 
3651     pub fn remove_device(&mut self, id: String) -> DeviceManagerResult<()> {
3652         // The node can be directly a PCI node in case the 'id' refers to a
3653         // VFIO device or a virtio-pci one.
3654         // In case the 'id' refers to a virtio device, we must find the PCI
3655         // node by looking at the parent.
3656         let device_tree = self.device_tree.lock().unwrap();
3657         let node = device_tree
3658             .get(&id)
3659             .ok_or(DeviceManagerError::UnknownDeviceId(id))?;
3660 
3661         let pci_device_node = if node.pci_bdf.is_some() && node.pci_device_handle.is_some() {
3662             node
3663         } else {
3664             let parent = node
3665                 .parent
3666                 .as_ref()
3667                 .ok_or(DeviceManagerError::MissingNode)?;
3668             device_tree
3669                 .get(parent)
3670                 .ok_or(DeviceManagerError::MissingNode)?
3671         };
3672 
3673         let pci_device_bdf: PciBdf = pci_device_node
3674             .pci_bdf
3675             .ok_or(DeviceManagerError::MissingDeviceNodePciBdf)?;
3676         let pci_segment_id = pci_device_bdf.segment();
3677 
3678         let pci_device_handle = pci_device_node
3679             .pci_device_handle
3680             .as_ref()
3681             .ok_or(DeviceManagerError::MissingPciDevice)?;
3682         #[allow(irrefutable_let_patterns)]
3683         if let PciDeviceHandle::Virtio(virtio_pci_device) = pci_device_handle {
3684             let device_type = VirtioDeviceType::from(
3685                 virtio_pci_device
3686                     .lock()
3687                     .unwrap()
3688                     .virtio_device()
3689                     .lock()
3690                     .unwrap()
3691                     .device_type(),
3692             );
3693             match device_type {
3694                 VirtioDeviceType::Net
3695                 | VirtioDeviceType::Block
3696                 | VirtioDeviceType::Pmem
3697                 | VirtioDeviceType::Fs
3698                 | VirtioDeviceType::Vsock => {}
3699                 _ => return Err(DeviceManagerError::RemovalNotAllowed(device_type)),
3700             }
3701         }
3702 
3703         // Update the PCID bitmap
3704         self.pci_segments[pci_segment_id as usize].pci_devices_down |= 1 << pci_device_bdf.device();
3705 
3706         Ok(())
3707     }
3708 
3709     pub fn eject_device(&mut self, pci_segment_id: u16, device_id: u8) -> DeviceManagerResult<()> {
3710         info!(
3711             "Ejecting device_id = {} on segment_id={}",
3712             device_id, pci_segment_id
3713         );
3714 
3715         // Convert the device ID into the corresponding b/d/f.
3716         let pci_device_bdf = PciBdf::new(pci_segment_id, 0, device_id, 0);
3717 
3718         // Give the PCI device ID back to the PCI bus.
3719         self.pci_segments[pci_segment_id as usize]
3720             .pci_bus
3721             .lock()
3722             .unwrap()
3723             .put_device_id(device_id as usize)
3724             .map_err(DeviceManagerError::PutPciDeviceId)?;
3725 
3726         // Remove the device from the device tree along with its children.
3727         let mut device_tree = self.device_tree.lock().unwrap();
3728         let pci_device_node = device_tree
3729             .remove_node_by_pci_bdf(pci_device_bdf)
3730             .ok_or(DeviceManagerError::MissingPciDevice)?;
3731 
3732         // For VFIO and vfio-user the PCI device id is the id.
3733         // For virtio we overwrite it later as we want the id of the
3734         // underlying device.
3735         let mut id = pci_device_node.id;
3736         let pci_device_handle = pci_device_node
3737             .pci_device_handle
3738             .ok_or(DeviceManagerError::MissingPciDevice)?;
3739         if matches!(pci_device_handle, PciDeviceHandle::Virtio(_)) {
3740             // The virtio-pci device has a single child
3741             if !pci_device_node.children.is_empty() {
3742                 assert_eq!(pci_device_node.children.len(), 1);
3743                 let child_id = &pci_device_node.children[0];
3744                 id = child_id.clone();
3745             }
3746         }
3747         for child in pci_device_node.children.iter() {
3748             device_tree.remove(child);
3749         }
3750 
3751         let mut iommu_attached = false;
3752         if let Some((_, iommu_attached_devices)) = &self.iommu_attached_devices {
3753             if iommu_attached_devices.contains(&pci_device_bdf) {
3754                 iommu_attached = true;
3755             }
3756         }
3757 
3758         let (pci_device, bus_device, virtio_device, remove_dma_handler) = match pci_device_handle {
3759             // No need to remove any virtio-mem mapping here as the container outlives all devices
3760             PciDeviceHandle::Vfio(vfio_pci_device) => (
3761                 Arc::clone(&vfio_pci_device) as Arc<Mutex<dyn PciDevice>>,
3762                 Arc::clone(&vfio_pci_device) as Arc<Mutex<dyn BusDevice>>,
3763                 None as Option<Arc<Mutex<dyn virtio_devices::VirtioDevice>>>,
3764                 false,
3765             ),
3766             PciDeviceHandle::Virtio(virtio_pci_device) => {
3767                 let dev = virtio_pci_device.lock().unwrap();
3768                 let bar_addr = dev.config_bar_addr();
3769                 for (event, addr) in dev.ioeventfds(bar_addr) {
3770                     let io_addr = IoEventAddress::Mmio(addr);
3771                     self.address_manager
3772                         .vm
3773                         .unregister_ioevent(event, &io_addr)
3774                         .map_err(|e| DeviceManagerError::UnRegisterIoevent(e.into()))?;
3775                 }
3776 
3777                 if let Some(dma_handler) = dev.dma_handler() {
3778                     if !iommu_attached {
3779                         for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() {
3780                             for region in zone.regions() {
3781                                 let iova = region.start_addr().0;
3782                                 let size = region.len();
3783                                 dma_handler
3784                                     .unmap(iova, size)
3785                                     .map_err(DeviceManagerError::VirtioDmaUnmap)?;
3786                             }
3787                         }
3788                     }
3789                 }
3790 
3791                 (
3792                     Arc::clone(&virtio_pci_device) as Arc<Mutex<dyn PciDevice>>,
3793                     Arc::clone(&virtio_pci_device) as Arc<Mutex<dyn BusDevice>>,
3794                     Some(dev.virtio_device()),
3795                     dev.dma_handler().is_some() && !iommu_attached,
3796                 )
3797             }
3798             PciDeviceHandle::VfioUser(vfio_user_pci_device) => {
3799                 let mut dev = vfio_user_pci_device.lock().unwrap();
3800                 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() {
3801                     for region in zone.regions() {
3802                         dev.dma_unmap(region)
3803                             .map_err(DeviceManagerError::VfioUserDmaUnmap)?;
3804                     }
3805                 }
3806 
3807                 (
3808                     Arc::clone(&vfio_user_pci_device) as Arc<Mutex<dyn PciDevice>>,
3809                     Arc::clone(&vfio_user_pci_device) as Arc<Mutex<dyn BusDevice>>,
3810                     None as Option<Arc<Mutex<dyn virtio_devices::VirtioDevice>>>,
3811                     true,
3812                 )
3813             }
3814         };
3815 
3816         if remove_dma_handler {
3817             for virtio_mem_device in self.virtio_mem_devices.iter() {
3818                 virtio_mem_device
3819                     .lock()
3820                     .unwrap()
3821                     .remove_dma_mapping_handler(VirtioMemMappingSource::Device(
3822                         pci_device_bdf.into(),
3823                     ))
3824                     .map_err(DeviceManagerError::RemoveDmaMappingHandlerVirtioMem)?;
3825             }
3826         }
3827 
3828         // Free the allocated BARs
3829         pci_device
3830             .lock()
3831             .unwrap()
3832             .free_bars(
3833                 &mut self.address_manager.allocator.lock().unwrap(),
3834                 &mut self.pci_segments[pci_segment_id as usize]
3835                     .allocator
3836                     .lock()
3837                     .unwrap(),
3838             )
3839             .map_err(DeviceManagerError::FreePciBars)?;
3840 
3841         // Remove the device from the PCI bus
3842         self.pci_segments[pci_segment_id as usize]
3843             .pci_bus
3844             .lock()
3845             .unwrap()
3846             .remove_by_device(&pci_device)
3847             .map_err(DeviceManagerError::RemoveDeviceFromPciBus)?;
3848 
3849         #[cfg(target_arch = "x86_64")]
3850         // Remove the device from the IO bus
3851         self.io_bus()
3852             .remove_by_device(&bus_device)
3853             .map_err(DeviceManagerError::RemoveDeviceFromIoBus)?;
3854 
3855         // Remove the device from the MMIO bus
3856         self.mmio_bus()
3857             .remove_by_device(&bus_device)
3858             .map_err(DeviceManagerError::RemoveDeviceFromMmioBus)?;
3859 
3860         // Remove the device from the list of BusDevice held by the
3861         // DeviceManager.
3862         self.bus_devices
3863             .retain(|dev| !Arc::ptr_eq(dev, &bus_device));
3864 
3865         // Shutdown and remove the underlying virtio-device if present
3866         if let Some(virtio_device) = virtio_device {
3867             for mapping in virtio_device.lock().unwrap().userspace_mappings() {
3868                 self.memory_manager
3869                     .lock()
3870                     .unwrap()
3871                     .remove_userspace_mapping(
3872                         mapping.addr.raw_value(),
3873                         mapping.len,
3874                         mapping.host_addr,
3875                         mapping.mergeable,
3876                         mapping.mem_slot,
3877                     )
3878                     .map_err(DeviceManagerError::MemoryManager)?;
3879             }
3880 
3881             virtio_device.lock().unwrap().shutdown();
3882 
3883             self.virtio_devices
3884                 .retain(|handler| !Arc::ptr_eq(&handler.virtio_device, &virtio_device));
3885         }
3886 
3887         event!(
3888             "vm",
3889             "device-removed",
3890             "id",
3891             &id,
3892             "bdf",
3893             pci_device_bdf.to_string()
3894         );
3895 
3896         // At this point, the device has been removed from all the list and
3897         // buses where it was stored. At the end of this function, after
3898         // any_device, bus_device and pci_device are released, the actual
3899         // device will be dropped.
3900         Ok(())
3901     }
3902 
3903     fn hotplug_virtio_pci_device(
3904         &mut self,
3905         handle: MetaVirtioDevice,
3906     ) -> DeviceManagerResult<PciDeviceInfo> {
3907         // Add the virtio device to the device manager list. This is important
3908         // as the list is used to notify virtio devices about memory updates
3909         // for instance.
3910         self.virtio_devices.push(handle.clone());
3911 
3912         let mapping: Option<Arc<IommuMapping>> = if handle.iommu {
3913             self.iommu_mapping.clone()
3914         } else {
3915             None
3916         };
3917 
3918         let bdf = self.add_virtio_pci_device(
3919             handle.virtio_device,
3920             &mapping,
3921             handle.id.clone(),
3922             handle.pci_segment,
3923             handle.dma_handler,
3924         )?;
3925 
3926         // Update the PCIU bitmap
3927         self.pci_segments[handle.pci_segment as usize].pci_devices_up |= 1 << bdf.device();
3928 
3929         Ok(PciDeviceInfo { id: handle.id, bdf })
3930     }
3931 
3932     fn is_iommu_segment(&self, pci_segment_id: u16) -> bool {
3933         self.config
3934             .lock()
3935             .as_ref()
3936             .unwrap()
3937             .platform
3938             .as_ref()
3939             .map(|pc| {
3940                 pc.iommu_segments
3941                     .as_ref()
3942                     .map(|v| v.contains(&pci_segment_id))
3943                     .unwrap_or_default()
3944             })
3945             .unwrap_or_default()
3946     }
3947 
3948     pub fn add_disk(&mut self, disk_cfg: &mut DiskConfig) -> DeviceManagerResult<PciDeviceInfo> {
3949         self.validate_identifier(&disk_cfg.id)?;
3950 
3951         if disk_cfg.iommu && !self.is_iommu_segment(disk_cfg.pci_segment) {
3952             return Err(DeviceManagerError::InvalidIommuHotplug);
3953         }
3954 
3955         let device = self.make_virtio_block_device(disk_cfg)?;
3956         self.hotplug_virtio_pci_device(device)
3957     }
3958 
3959     pub fn add_fs(&mut self, fs_cfg: &mut FsConfig) -> DeviceManagerResult<PciDeviceInfo> {
3960         self.validate_identifier(&fs_cfg.id)?;
3961 
3962         let device = self.make_virtio_fs_device(fs_cfg)?;
3963         self.hotplug_virtio_pci_device(device)
3964     }
3965 
3966     pub fn add_pmem(&mut self, pmem_cfg: &mut PmemConfig) -> DeviceManagerResult<PciDeviceInfo> {
3967         self.validate_identifier(&pmem_cfg.id)?;
3968 
3969         if pmem_cfg.iommu && !self.is_iommu_segment(pmem_cfg.pci_segment) {
3970             return Err(DeviceManagerError::InvalidIommuHotplug);
3971         }
3972 
3973         let device = self.make_virtio_pmem_device(pmem_cfg)?;
3974         self.hotplug_virtio_pci_device(device)
3975     }
3976 
3977     pub fn add_net(&mut self, net_cfg: &mut NetConfig) -> DeviceManagerResult<PciDeviceInfo> {
3978         self.validate_identifier(&net_cfg.id)?;
3979 
3980         if net_cfg.iommu && !self.is_iommu_segment(net_cfg.pci_segment) {
3981             return Err(DeviceManagerError::InvalidIommuHotplug);
3982         }
3983 
3984         let device = self.make_virtio_net_device(net_cfg)?;
3985         self.hotplug_virtio_pci_device(device)
3986     }
3987 
3988     pub fn add_vdpa(&mut self, vdpa_cfg: &mut VdpaConfig) -> DeviceManagerResult<PciDeviceInfo> {
3989         self.validate_identifier(&vdpa_cfg.id)?;
3990 
3991         if vdpa_cfg.iommu && !self.is_iommu_segment(vdpa_cfg.pci_segment) {
3992             return Err(DeviceManagerError::InvalidIommuHotplug);
3993         }
3994 
3995         let device = self.make_vdpa_device(vdpa_cfg)?;
3996         self.hotplug_virtio_pci_device(device)
3997     }
3998 
3999     pub fn add_vsock(&mut self, vsock_cfg: &mut VsockConfig) -> DeviceManagerResult<PciDeviceInfo> {
4000         self.validate_identifier(&vsock_cfg.id)?;
4001 
4002         if vsock_cfg.iommu && !self.is_iommu_segment(vsock_cfg.pci_segment) {
4003             return Err(DeviceManagerError::InvalidIommuHotplug);
4004         }
4005 
4006         let device = self.make_virtio_vsock_device(vsock_cfg)?;
4007         self.hotplug_virtio_pci_device(device)
4008     }
4009 
4010     pub fn counters(&self) -> HashMap<String, HashMap<&'static str, Wrapping<u64>>> {
4011         let mut counters = HashMap::new();
4012 
4013         for handle in &self.virtio_devices {
4014             let virtio_device = handle.virtio_device.lock().unwrap();
4015             if let Some(device_counters) = virtio_device.counters() {
4016                 counters.insert(handle.id.clone(), device_counters.clone());
4017             }
4018         }
4019 
4020         counters
4021     }
4022 
4023     pub fn resize_balloon(&mut self, size: u64) -> DeviceManagerResult<()> {
4024         if let Some(balloon) = &self.balloon {
4025             return balloon
4026                 .lock()
4027                 .unwrap()
4028                 .resize(size)
4029                 .map_err(DeviceManagerError::VirtioBalloonResize);
4030         }
4031 
4032         warn!("No balloon setup: Can't resize the balloon");
4033         Err(DeviceManagerError::MissingVirtioBalloon)
4034     }
4035 
4036     pub fn balloon_size(&self) -> u64 {
4037         if let Some(balloon) = &self.balloon {
4038             return balloon.lock().unwrap().get_actual();
4039         }
4040 
4041         0
4042     }
4043 
4044     pub fn device_tree(&self) -> Arc<Mutex<DeviceTree>> {
4045         self.device_tree.clone()
4046     }
4047 
4048     pub fn restore_devices(
4049         &mut self,
4050         snapshot: Snapshot,
4051     ) -> std::result::Result<(), MigratableError> {
4052         // Finally, restore all devices associated with the DeviceManager.
4053         // It's important to restore devices in the right order, that's why
4054         // the device tree is the right way to ensure we restore a child before
4055         // its parent node.
4056         for node in self
4057             .device_tree
4058             .lock()
4059             .unwrap()
4060             .breadth_first_traversal()
4061             .rev()
4062         {
4063             // Restore the node
4064             if let Some(migratable) = &node.migratable {
4065                 info!("Restoring {} from DeviceManager", node.id);
4066                 if let Some(snapshot) = snapshot.snapshots.get(&node.id) {
4067                     migratable.lock().unwrap().pause()?;
4068                     migratable.lock().unwrap().restore(*snapshot.clone())?;
4069                 } else {
4070                     return Err(MigratableError::Restore(anyhow!(
4071                         "Missing device {}",
4072                         node.id
4073                     )));
4074                 }
4075             }
4076         }
4077 
4078         // The devices have been fully restored, we can now update the
4079         // restoring state of the DeviceManager.
4080         self.restoring = false;
4081 
4082         Ok(())
4083     }
4084 
4085     #[cfg(target_arch = "x86_64")]
4086     pub fn notify_power_button(&self) -> DeviceManagerResult<()> {
4087         self.ged_notification_device
4088             .as_ref()
4089             .unwrap()
4090             .lock()
4091             .unwrap()
4092             .notify(AcpiNotificationFlags::POWER_BUTTON_CHANGED)
4093             .map_err(DeviceManagerError::PowerButtonNotification)
4094     }
4095 
4096     #[cfg(target_arch = "aarch64")]
4097     pub fn notify_power_button(&self) -> DeviceManagerResult<()> {
4098         // There are two use cases:
4099         // 1. Users will use direct kernel boot with device tree.
4100         // 2. Users will use ACPI+UEFI boot.
4101 
4102         // Trigger a GPIO pin 3 event to satisify use case 1.
4103         self.gpio_device
4104             .as_ref()
4105             .unwrap()
4106             .lock()
4107             .unwrap()
4108             .trigger_key(3)
4109             .map_err(DeviceManagerError::AArch64PowerButtonNotification)?;
4110         // Trigger a GED power button event to satisify use case 2.
4111         return self
4112             .ged_notification_device
4113             .as_ref()
4114             .unwrap()
4115             .lock()
4116             .unwrap()
4117             .notify(AcpiNotificationFlags::POWER_BUTTON_CHANGED)
4118             .map_err(DeviceManagerError::PowerButtonNotification);
4119     }
4120 
4121     pub fn iommu_attached_devices(&self) -> &Option<(PciBdf, Vec<PciBdf>)> {
4122         &self.iommu_attached_devices
4123     }
4124 
4125     fn validate_identifier(&self, id: &Option<String>) -> DeviceManagerResult<()> {
4126         if let Some(id) = id {
4127             if id.starts_with("__") {
4128                 return Err(DeviceManagerError::InvalidIdentifier(id.clone()));
4129             }
4130 
4131             if self.device_tree.lock().unwrap().contains_key(id) {
4132                 return Err(DeviceManagerError::IdentifierNotUnique(id.clone()));
4133             }
4134         }
4135 
4136         Ok(())
4137     }
4138 
4139     pub(crate) fn acpi_platform_addresses(&self) -> &AcpiPlatformAddresses {
4140         &self.acpi_platform_addresses
4141     }
4142 }
4143 
4144 fn numa_node_id_from_memory_zone_id(numa_nodes: &NumaNodes, memory_zone_id: &str) -> Option<u32> {
4145     for (numa_node_id, numa_node) in numa_nodes.iter() {
4146         if numa_node.memory_zones.contains(&memory_zone_id.to_owned()) {
4147             return Some(*numa_node_id);
4148         }
4149     }
4150 
4151     None
4152 }
4153 
4154 impl Aml for DeviceManager {
4155     fn append_aml_bytes(&self, bytes: &mut Vec<u8>) {
4156         #[cfg(target_arch = "aarch64")]
4157         use arch::aarch64::DeviceInfoForFdt;
4158 
4159         let mut pci_scan_methods = Vec::new();
4160         for i in 0..self.pci_segments.len() {
4161             pci_scan_methods.push(aml::MethodCall::new(
4162                 format!("\\_SB_.PCI{:X}.PCNT", i).as_str().into(),
4163                 vec![],
4164             ));
4165         }
4166         let mut pci_scan_inner: Vec<&dyn Aml> = Vec::new();
4167         for method in &pci_scan_methods {
4168             pci_scan_inner.push(method)
4169         }
4170 
4171         // PCI hotplug controller
4172         aml::Device::new(
4173             "_SB_.PHPR".into(),
4174             vec![
4175                 &aml::Name::new("_HID".into(), &aml::EisaName::new("PNP0A06")),
4176                 &aml::Name::new("_STA".into(), &0x0bu8),
4177                 &aml::Name::new("_UID".into(), &"PCI Hotplug Controller"),
4178                 &aml::Mutex::new("BLCK".into(), 0),
4179                 &aml::Name::new(
4180                     "_CRS".into(),
4181                     &aml::ResourceTemplate::new(vec![&aml::AddressSpace::new_memory(
4182                         aml::AddressSpaceCachable::NotCacheable,
4183                         true,
4184                         self.acpi_address.0 as u64,
4185                         self.acpi_address.0 + DEVICE_MANAGER_ACPI_SIZE as u64 - 1,
4186                     )]),
4187                 ),
4188                 // OpRegion and Fields map MMIO range into individual field values
4189                 &aml::OpRegion::new(
4190                     "PCST".into(),
4191                     aml::OpRegionSpace::SystemMemory,
4192                     self.acpi_address.0 as usize,
4193                     DEVICE_MANAGER_ACPI_SIZE,
4194                 ),
4195                 &aml::Field::new(
4196                     "PCST".into(),
4197                     aml::FieldAccessType::DWord,
4198                     aml::FieldUpdateRule::WriteAsZeroes,
4199                     vec![
4200                         aml::FieldEntry::Named(*b"PCIU", 32),
4201                         aml::FieldEntry::Named(*b"PCID", 32),
4202                         aml::FieldEntry::Named(*b"B0EJ", 32),
4203                         aml::FieldEntry::Named(*b"PSEG", 32),
4204                     ],
4205                 ),
4206                 &aml::Method::new(
4207                     "PCEJ".into(),
4208                     2,
4209                     true,
4210                     vec![
4211                         // Take lock defined above
4212                         &aml::Acquire::new("BLCK".into(), 0xffff),
4213                         // Choose the current segment
4214                         &aml::Store::new(&aml::Path::new("PSEG"), &aml::Arg(1)),
4215                         // Write PCI bus number (in first argument) to I/O port via field
4216                         &aml::ShiftLeft::new(&aml::Path::new("B0EJ"), &aml::ONE, &aml::Arg(0)),
4217                         // Release lock
4218                         &aml::Release::new("BLCK".into()),
4219                         // Return 0
4220                         &aml::Return::new(&aml::ZERO),
4221                     ],
4222                 ),
4223                 &aml::Method::new("PSCN".into(), 0, true, pci_scan_inner),
4224             ],
4225         )
4226         .append_aml_bytes(bytes);
4227 
4228         for segment in &self.pci_segments {
4229             segment.append_aml_bytes(bytes);
4230         }
4231 
4232         let mut mbrd_memory = Vec::new();
4233 
4234         for segment in &self.pci_segments {
4235             mbrd_memory.push(aml::Memory32Fixed::new(
4236                 true,
4237                 segment.mmio_config_address as u32,
4238                 layout::PCI_MMIO_CONFIG_SIZE_PER_SEGMENT as u32,
4239             ))
4240         }
4241 
4242         let mut mbrd_memory_refs = Vec::new();
4243         for mbrd_memory_ref in &mbrd_memory {
4244             mbrd_memory_refs.push(mbrd_memory_ref as &dyn Aml);
4245         }
4246 
4247         aml::Device::new(
4248             "_SB_.MBRD".into(),
4249             vec![
4250                 &aml::Name::new("_HID".into(), &aml::EisaName::new("PNP0C02")),
4251                 &aml::Name::new("_UID".into(), &aml::ZERO),
4252                 &aml::Name::new("_CRS".into(), &aml::ResourceTemplate::new(mbrd_memory_refs)),
4253             ],
4254         )
4255         .append_aml_bytes(bytes);
4256 
4257         // Serial device
4258         #[cfg(target_arch = "x86_64")]
4259         let serial_irq = 4;
4260         #[cfg(target_arch = "aarch64")]
4261         let serial_irq =
4262             if self.config.lock().unwrap().serial.clone().mode != ConsoleOutputMode::Off {
4263                 self.get_device_info()
4264                     .clone()
4265                     .get(&(DeviceType::Serial, DeviceType::Serial.to_string()))
4266                     .unwrap()
4267                     .irq()
4268             } else {
4269                 // If serial is turned off, add a fake device with invalid irq.
4270                 31
4271             };
4272         if self.config.lock().unwrap().serial.mode != ConsoleOutputMode::Off {
4273             aml::Device::new(
4274                 "_SB_.COM1".into(),
4275                 vec![
4276                     &aml::Name::new(
4277                         "_HID".into(),
4278                         #[cfg(target_arch = "x86_64")]
4279                         &aml::EisaName::new("PNP0501"),
4280                         #[cfg(target_arch = "aarch64")]
4281                         &"ARMH0011",
4282                     ),
4283                     &aml::Name::new("_UID".into(), &aml::ZERO),
4284                     &aml::Name::new("_DDN".into(), &"COM1"),
4285                     &aml::Name::new(
4286                         "_CRS".into(),
4287                         &aml::ResourceTemplate::new(vec![
4288                             &aml::Interrupt::new(true, true, false, false, serial_irq),
4289                             #[cfg(target_arch = "x86_64")]
4290                             &aml::Io::new(0x3f8, 0x3f8, 0, 0x8),
4291                             #[cfg(target_arch = "aarch64")]
4292                             &aml::Memory32Fixed::new(
4293                                 true,
4294                                 arch::layout::LEGACY_SERIAL_MAPPED_IO_START.raw_value() as u32,
4295                                 MMIO_LEN as u32,
4296                             ),
4297                         ]),
4298                     ),
4299                 ],
4300             )
4301             .append_aml_bytes(bytes);
4302         }
4303 
4304         aml::Name::new("_S5_".into(), &aml::Package::new(vec![&5u8])).append_aml_bytes(bytes);
4305 
4306         aml::Device::new(
4307             "_SB_.PWRB".into(),
4308             vec![
4309                 &aml::Name::new("_HID".into(), &aml::EisaName::new("PNP0C0C")),
4310                 &aml::Name::new("_UID".into(), &aml::ZERO),
4311             ],
4312         )
4313         .append_aml_bytes(bytes);
4314 
4315         self.ged_notification_device
4316             .as_ref()
4317             .unwrap()
4318             .lock()
4319             .unwrap()
4320             .append_aml_bytes(bytes);
4321     }
4322 }
4323 
4324 impl Pausable for DeviceManager {
4325     fn pause(&mut self) -> result::Result<(), MigratableError> {
4326         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
4327             if let Some(migratable) = &device_node.migratable {
4328                 migratable.lock().unwrap().pause()?;
4329             }
4330         }
4331         // On AArch64, the pause of device manager needs to trigger
4332         // a "pause" of GIC, which will flush the GIC pending tables
4333         // and ITS tables to guest RAM.
4334         #[cfg(target_arch = "aarch64")]
4335         {
4336             self.get_interrupt_controller()
4337                 .unwrap()
4338                 .lock()
4339                 .unwrap()
4340                 .pause()?;
4341         };
4342 
4343         Ok(())
4344     }
4345 
4346     fn resume(&mut self) -> result::Result<(), MigratableError> {
4347         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
4348             if let Some(migratable) = &device_node.migratable {
4349                 migratable.lock().unwrap().resume()?;
4350             }
4351         }
4352 
4353         Ok(())
4354     }
4355 }
4356 
4357 impl Snapshottable for DeviceManager {
4358     fn id(&self) -> String {
4359         DEVICE_MANAGER_SNAPSHOT_ID.to_string()
4360     }
4361 
4362     fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> {
4363         let mut snapshot = Snapshot::new(DEVICE_MANAGER_SNAPSHOT_ID);
4364 
4365         // We aggregate all devices snapshots.
4366         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
4367             if let Some(migratable) = &device_node.migratable {
4368                 let device_snapshot = migratable.lock().unwrap().snapshot()?;
4369                 snapshot.add_snapshot(device_snapshot);
4370             }
4371         }
4372 
4373         // Then we store the DeviceManager state.
4374         snapshot.add_data_section(SnapshotDataSection::new_from_state(
4375             DEVICE_MANAGER_SNAPSHOT_ID,
4376             &self.state(),
4377         )?);
4378 
4379         Ok(snapshot)
4380     }
4381 
4382     fn restore(&mut self, snapshot: Snapshot) -> std::result::Result<(), MigratableError> {
4383         // Let's first restore the DeviceManager.
4384 
4385         self.set_state(&snapshot.to_state(DEVICE_MANAGER_SNAPSHOT_ID)?);
4386 
4387         // Now that DeviceManager is updated with the right states, it's time
4388         // to create the devices based on the configuration.
4389         self.create_devices(None, None, None)
4390             .map_err(|e| MigratableError::Restore(anyhow!("Could not create devices {:?}", e)))?;
4391 
4392         Ok(())
4393     }
4394 }
4395 
4396 impl Transportable for DeviceManager {}
4397 
4398 impl Migratable for DeviceManager {
4399     fn start_dirty_log(&mut self) -> std::result::Result<(), MigratableError> {
4400         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
4401             if let Some(migratable) = &device_node.migratable {
4402                 migratable.lock().unwrap().start_dirty_log()?;
4403             }
4404         }
4405         Ok(())
4406     }
4407 
4408     fn stop_dirty_log(&mut self) -> std::result::Result<(), MigratableError> {
4409         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
4410             if let Some(migratable) = &device_node.migratable {
4411                 migratable.lock().unwrap().stop_dirty_log()?;
4412             }
4413         }
4414         Ok(())
4415     }
4416 
4417     fn dirty_log(&mut self) -> std::result::Result<MemoryRangeTable, MigratableError> {
4418         let mut tables = Vec::new();
4419         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
4420             if let Some(migratable) = &device_node.migratable {
4421                 tables.push(migratable.lock().unwrap().dirty_log()?);
4422             }
4423         }
4424         Ok(MemoryRangeTable::new_from_tables(tables))
4425     }
4426 
4427     fn start_migration(&mut self) -> std::result::Result<(), MigratableError> {
4428         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
4429             if let Some(migratable) = &device_node.migratable {
4430                 migratable.lock().unwrap().start_migration()?;
4431             }
4432         }
4433         Ok(())
4434     }
4435 
4436     fn complete_migration(&mut self) -> std::result::Result<(), MigratableError> {
4437         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
4438             if let Some(migratable) = &device_node.migratable {
4439                 migratable.lock().unwrap().complete_migration()?;
4440             }
4441         }
4442         Ok(())
4443     }
4444 }
4445 
4446 const PCIU_FIELD_OFFSET: u64 = 0;
4447 const PCID_FIELD_OFFSET: u64 = 4;
4448 const B0EJ_FIELD_OFFSET: u64 = 8;
4449 const PSEG_FIELD_OFFSET: u64 = 12;
4450 const PCIU_FIELD_SIZE: usize = 4;
4451 const PCID_FIELD_SIZE: usize = 4;
4452 const B0EJ_FIELD_SIZE: usize = 4;
4453 const PSEG_FIELD_SIZE: usize = 4;
4454 
4455 impl BusDevice for DeviceManager {
4456     fn read(&mut self, base: u64, offset: u64, data: &mut [u8]) {
4457         match offset {
4458             PCIU_FIELD_OFFSET => {
4459                 assert!(data.len() == PCIU_FIELD_SIZE);
4460                 data.copy_from_slice(
4461                     &self.pci_segments[self.selected_segment]
4462                         .pci_devices_up
4463                         .to_le_bytes(),
4464                 );
4465                 // Clear the PCIU bitmap
4466                 self.pci_segments[self.selected_segment].pci_devices_up = 0;
4467             }
4468             PCID_FIELD_OFFSET => {
4469                 assert!(data.len() == PCID_FIELD_SIZE);
4470                 data.copy_from_slice(
4471                     &self.pci_segments[self.selected_segment]
4472                         .pci_devices_down
4473                         .to_le_bytes(),
4474                 );
4475                 // Clear the PCID bitmap
4476                 self.pci_segments[self.selected_segment].pci_devices_down = 0;
4477             }
4478             B0EJ_FIELD_OFFSET => {
4479                 assert!(data.len() == B0EJ_FIELD_SIZE);
4480                 // Always return an empty bitmap since the eject is always
4481                 // taken care of right away during a write access.
4482                 data.fill(0);
4483             }
4484             PSEG_FIELD_OFFSET => {
4485                 assert_eq!(data.len(), PSEG_FIELD_SIZE);
4486                 data.copy_from_slice(&(self.selected_segment as u32).to_le_bytes());
4487             }
4488             _ => error!(
4489                 "Accessing unknown location at base 0x{:x}, offset 0x{:x}",
4490                 base, offset
4491             ),
4492         }
4493 
4494         debug!(
4495             "PCI_HP_REG_R: base 0x{:x}, offset 0x{:x}, data {:?}",
4496             base, offset, data
4497         )
4498     }
4499 
4500     fn write(&mut self, base: u64, offset: u64, data: &[u8]) -> Option<Arc<std::sync::Barrier>> {
4501         match offset {
4502             B0EJ_FIELD_OFFSET => {
4503                 assert!(data.len() == B0EJ_FIELD_SIZE);
4504                 let mut data_array: [u8; 4] = [0, 0, 0, 0];
4505                 data_array.copy_from_slice(data);
4506                 let mut slot_bitmap = u32::from_le_bytes(data_array);
4507 
4508                 while slot_bitmap > 0 {
4509                     let slot_id = slot_bitmap.trailing_zeros();
4510                     if let Err(e) = self.eject_device(self.selected_segment as u16, slot_id as u8) {
4511                         error!("Failed ejecting device {}: {:?}", slot_id, e);
4512                     }
4513                     slot_bitmap &= !(1 << slot_id);
4514                 }
4515             }
4516             PSEG_FIELD_OFFSET => {
4517                 assert_eq!(data.len(), PSEG_FIELD_SIZE);
4518                 let mut data_array: [u8; 4] = [0, 0, 0, 0];
4519                 data_array.copy_from_slice(data);
4520                 let selected_segment = u32::from_le_bytes(data_array) as usize;
4521                 if selected_segment >= self.pci_segments.len() {
4522                     error!(
4523                         "Segment selection out of range: {} >= {}",
4524                         selected_segment,
4525                         self.pci_segments.len()
4526                     );
4527                     return None;
4528                 }
4529                 self.selected_segment = selected_segment;
4530             }
4531             _ => error!(
4532                 "Accessing unknown location at base 0x{:x}, offset 0x{:x}",
4533                 base, offset
4534             ),
4535         }
4536 
4537         debug!(
4538             "PCI_HP_REG_W: base 0x{:x}, offset 0x{:x}, data {:?}",
4539             base, offset, data
4540         );
4541 
4542         None
4543     }
4544 }
4545 
4546 impl Drop for DeviceManager {
4547     fn drop(&mut self) {
4548         for handle in self.virtio_devices.drain(..) {
4549             handle.virtio_device.lock().unwrap().shutdown();
4550         }
4551     }
4552 }
4553