xref: /cloud-hypervisor/vmm/src/device_manager.rs (revision b440cb7d2330770cd415b63544a371d4caa2db3a)
1 // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
2 //
3 // Portions Copyright 2017 The Chromium OS Authors. All rights reserved.
4 // Use of this source code is governed by a BSD-style license that can be
5 // found in the LICENSE-BSD-3-Clause file.
6 //
7 // Copyright © 2019 Intel Corporation
8 //
9 // SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause
10 //
11 
12 use crate::config::{
13     ConsoleOutputMode, DeviceConfig, DiskConfig, FsConfig, NetConfig, PmemConfig, UserDeviceConfig,
14     VdpaConfig, VhostMode, VmConfig, VsockConfig,
15 };
16 use crate::device_tree::{DeviceNode, DeviceTree};
17 use crate::interrupt::LegacyUserspaceInterruptManager;
18 use crate::interrupt::MsiInterruptManager;
19 use crate::memory_manager::{Error as MemoryManagerError, MemoryManager, MEMORY_MANAGER_ACPI_SIZE};
20 use crate::pci_segment::PciSegment;
21 use crate::seccomp_filters::{get_seccomp_filter, Thread};
22 use crate::serial_manager::{Error as SerialManagerError, SerialManager};
23 use crate::sigwinch_listener::start_sigwinch_listener;
24 #[cfg(target_arch = "aarch64")]
25 use crate::GuestMemoryMmap;
26 use crate::GuestRegionMmap;
27 use crate::PciDeviceInfo;
28 use crate::{device_node, DEVICE_MANAGER_SNAPSHOT_ID};
29 use acpi_tables::{aml, aml::Aml};
30 use anyhow::anyhow;
31 use arch::layout;
32 #[cfg(target_arch = "x86_64")]
33 use arch::layout::{APIC_START, IOAPIC_SIZE, IOAPIC_START};
34 use arch::NumaNodes;
35 #[cfg(target_arch = "aarch64")]
36 use arch::{DeviceType, MmioDeviceInfo};
37 use block_util::{
38     async_io::DiskFile, block_io_uring_is_supported, detect_image_type,
39     fixed_vhd_async::FixedVhdDiskAsync, fixed_vhd_sync::FixedVhdDiskSync, qcow_sync::QcowDiskSync,
40     raw_async::RawFileDisk, raw_sync::RawFileDiskSync, vhdx_sync::VhdxDiskSync, ImageType,
41 };
42 #[cfg(target_arch = "aarch64")]
43 use devices::gic;
44 #[cfg(target_arch = "x86_64")]
45 use devices::ioapic;
46 #[cfg(target_arch = "aarch64")]
47 use devices::legacy::Pl011;
48 #[cfg(target_arch = "x86_64")]
49 use devices::legacy::Serial;
50 use devices::{
51     interrupt_controller, interrupt_controller::InterruptController, AcpiNotificationFlags,
52 };
53 use hypervisor::{DeviceFd, HypervisorVmError, IoEventAddress};
54 use libc::{
55     cfmakeraw, isatty, tcgetattr, tcsetattr, termios, MAP_NORESERVE, MAP_PRIVATE, MAP_SHARED,
56     O_TMPFILE, PROT_READ, PROT_WRITE, TCSANOW,
57 };
58 #[cfg(target_arch = "x86_64")]
59 use pci::PciConfigIo;
60 use pci::{
61     DeviceRelocation, PciBarRegionType, PciBdf, PciDevice, VfioPciDevice, VfioUserDmaMapping,
62     VfioUserPciDevice, VfioUserPciDeviceError,
63 };
64 use seccompiler::SeccompAction;
65 use serde::{Deserialize, Serialize};
66 use std::collections::{BTreeSet, HashMap};
67 use std::convert::TryInto;
68 use std::fs::{read_link, File, OpenOptions};
69 use std::io::{self, stdout, Seek, SeekFrom};
70 use std::mem::zeroed;
71 use std::num::Wrapping;
72 use std::os::unix::fs::OpenOptionsExt;
73 use std::os::unix::io::{AsRawFd, FromRawFd, RawFd};
74 use std::path::PathBuf;
75 use std::result;
76 use std::sync::{Arc, Mutex};
77 use std::time::Instant;
78 use vfio_ioctls::{VfioContainer, VfioDevice};
79 use virtio_devices::transport::VirtioTransport;
80 use virtio_devices::transport::{VirtioPciDevice, VirtioPciDeviceActivator};
81 use virtio_devices::vhost_user::VhostUserConfig;
82 use virtio_devices::{
83     AccessPlatformMapping, ActivateError, VdpaDmaMapping, VirtioMemMappingSource,
84 };
85 use virtio_devices::{Endpoint, IommuMapping};
86 use vm_allocator::{AddressAllocator, SystemAllocator};
87 use vm_device::dma_mapping::vfio::VfioDmaMapping;
88 use vm_device::dma_mapping::ExternalDmaMapping;
89 use vm_device::interrupt::{
90     InterruptIndex, InterruptManager, LegacyIrqGroupConfig, MsiIrqGroupConfig,
91 };
92 use vm_device::{Bus, BusDevice, Resource};
93 use vm_memory::guest_memory::FileOffset;
94 #[cfg(target_arch = "aarch64")]
95 use vm_memory::GuestMemoryAtomic;
96 use vm_memory::GuestMemoryRegion;
97 use vm_memory::{Address, GuestAddress, GuestUsize, MmapRegion};
98 #[cfg(target_arch = "x86_64")]
99 use vm_memory::{GuestAddressSpace, GuestMemory};
100 use vm_migration::{
101     protocol::MemoryRangeTable, Migratable, MigratableError, Pausable, Snapshot,
102     SnapshotDataSection, Snapshottable, Transportable,
103 };
104 use vm_virtio::AccessPlatform;
105 use vm_virtio::VirtioDeviceType;
106 use vmm_sys_util::eventfd::EventFd;
107 
108 #[cfg(target_arch = "aarch64")]
109 const MMIO_LEN: u64 = 0x1000;
110 
111 // Singleton devices / devices the user cannot name
112 #[cfg(target_arch = "x86_64")]
113 const IOAPIC_DEVICE_NAME: &str = "__ioapic";
114 const SERIAL_DEVICE_NAME: &str = "__serial";
115 #[cfg(target_arch = "aarch64")]
116 const GPIO_DEVICE_NAME: &str = "__gpio";
117 const RNG_DEVICE_NAME: &str = "__rng";
118 const IOMMU_DEVICE_NAME: &str = "__iommu";
119 const BALLOON_DEVICE_NAME: &str = "__balloon";
120 const CONSOLE_DEVICE_NAME: &str = "__console";
121 
122 // Devices that the user may name and for which we generate
123 // identifiers if the user doesn't give one
124 const DISK_DEVICE_NAME_PREFIX: &str = "_disk";
125 const FS_DEVICE_NAME_PREFIX: &str = "_fs";
126 const NET_DEVICE_NAME_PREFIX: &str = "_net";
127 const PMEM_DEVICE_NAME_PREFIX: &str = "_pmem";
128 const VDPA_DEVICE_NAME_PREFIX: &str = "_vdpa";
129 const VSOCK_DEVICE_NAME_PREFIX: &str = "_vsock";
130 const WATCHDOG_DEVICE_NAME: &str = "__watchdog";
131 const VFIO_DEVICE_NAME_PREFIX: &str = "_vfio";
132 const VFIO_USER_DEVICE_NAME_PREFIX: &str = "_vfio_user";
133 const VIRTIO_PCI_DEVICE_NAME_PREFIX: &str = "_virtio-pci";
134 
135 /// Errors associated with device manager
136 #[derive(Debug)]
137 pub enum DeviceManagerError {
138     /// Cannot create EventFd.
139     EventFd(io::Error),
140 
141     /// Cannot open disk path
142     Disk(io::Error),
143 
144     /// Cannot create vhost-user-net device
145     CreateVhostUserNet(virtio_devices::vhost_user::Error),
146 
147     /// Cannot create virtio-blk device
148     CreateVirtioBlock(io::Error),
149 
150     /// Cannot create virtio-net device
151     CreateVirtioNet(virtio_devices::net::Error),
152 
153     /// Cannot create virtio-console device
154     CreateVirtioConsole(io::Error),
155 
156     /// Cannot create virtio-rng device
157     CreateVirtioRng(io::Error),
158 
159     /// Cannot create virtio-fs device
160     CreateVirtioFs(virtio_devices::vhost_user::Error),
161 
162     /// Virtio-fs device was created without a socket.
163     NoVirtioFsSock,
164 
165     /// Cannot create vhost-user-blk device
166     CreateVhostUserBlk(virtio_devices::vhost_user::Error),
167 
168     /// Cannot create virtio-pmem device
169     CreateVirtioPmem(io::Error),
170 
171     /// Cannot create vDPA device
172     CreateVdpa(virtio_devices::vdpa::Error),
173 
174     /// Cannot create virtio-vsock device
175     CreateVirtioVsock(io::Error),
176 
177     /// Failed to convert Path to &str for the vDPA device.
178     CreateVdpaConvertPath,
179 
180     /// Failed to convert Path to &str for the virtio-vsock device.
181     CreateVsockConvertPath,
182 
183     /// Cannot create virtio-vsock backend
184     CreateVsockBackend(virtio_devices::vsock::VsockUnixError),
185 
186     /// Cannot create virtio-iommu device
187     CreateVirtioIommu(io::Error),
188 
189     /// Cannot create virtio-balloon device
190     CreateVirtioBalloon(io::Error),
191 
192     /// Cannot create virtio-watchdog device
193     CreateVirtioWatchdog(io::Error),
194 
195     /// Failed to parse disk image format
196     DetectImageType(io::Error),
197 
198     /// Cannot open qcow disk path
199     QcowDeviceCreate(qcow::Error),
200 
201     /// Cannot create serial manager
202     CreateSerialManager(SerialManagerError),
203 
204     /// Cannot spawn the serial manager thread
205     SpawnSerialManager(SerialManagerError),
206 
207     /// Cannot open tap interface
208     OpenTap(net_util::TapError),
209 
210     /// Cannot allocate IRQ.
211     AllocateIrq,
212 
213     /// Cannot configure the IRQ.
214     Irq(vmm_sys_util::errno::Error),
215 
216     /// Cannot allocate PCI BARs
217     AllocateBars(pci::PciDeviceError),
218 
219     /// Could not free the BARs associated with a PCI device.
220     FreePciBars(pci::PciDeviceError),
221 
222     /// Cannot register ioevent.
223     RegisterIoevent(anyhow::Error),
224 
225     /// Cannot unregister ioevent.
226     UnRegisterIoevent(anyhow::Error),
227 
228     /// Cannot create virtio device
229     VirtioDevice(vmm_sys_util::errno::Error),
230 
231     /// Cannot add PCI device
232     AddPciDevice(pci::PciRootError),
233 
234     /// Cannot open persistent memory file
235     PmemFileOpen(io::Error),
236 
237     /// Cannot set persistent memory file size
238     PmemFileSetLen(io::Error),
239 
240     /// Cannot find a memory range for persistent memory
241     PmemRangeAllocation,
242 
243     /// Cannot find a memory range for virtio-fs
244     FsRangeAllocation,
245 
246     /// Error creating serial output file
247     SerialOutputFileOpen(io::Error),
248 
249     /// Error creating console output file
250     ConsoleOutputFileOpen(io::Error),
251 
252     /// Error creating serial pty
253     SerialPtyOpen(io::Error),
254 
255     /// Error creating console pty
256     ConsolePtyOpen(io::Error),
257 
258     /// Error setting pty raw mode
259     SetPtyRaw(vmm_sys_util::errno::Error),
260 
261     /// Error getting pty peer
262     GetPtyPeer(vmm_sys_util::errno::Error),
263 
264     /// Cannot create a VFIO device
265     VfioCreate(vfio_ioctls::VfioError),
266 
267     /// Cannot create a VFIO PCI device
268     VfioPciCreate(pci::VfioPciError),
269 
270     /// Failed to map VFIO MMIO region.
271     VfioMapRegion(pci::VfioPciError),
272 
273     /// Failed to DMA map VFIO device.
274     VfioDmaMap(vfio_ioctls::VfioError),
275 
276     /// Failed to DMA unmap VFIO device.
277     VfioDmaUnmap(pci::VfioPciError),
278 
279     /// Failed to create the passthrough device.
280     CreatePassthroughDevice(anyhow::Error),
281 
282     /// Failed to memory map.
283     Mmap(io::Error),
284 
285     /// Cannot add legacy device to Bus.
286     BusError(vm_device::BusError),
287 
288     /// Failed to allocate IO port
289     AllocateIoPort,
290 
291     /// Failed to allocate MMIO address
292     AllocateMmioAddress,
293 
294     /// Failed to make hotplug notification
295     HotPlugNotification(io::Error),
296 
297     /// Error from a memory manager operation
298     MemoryManager(MemoryManagerError),
299 
300     /// Failed to create new interrupt source group.
301     CreateInterruptGroup(io::Error),
302 
303     /// Failed to update interrupt source group.
304     UpdateInterruptGroup(io::Error),
305 
306     /// Failed to create interrupt controller.
307     CreateInterruptController(interrupt_controller::Error),
308 
309     /// Failed to create a new MmapRegion instance.
310     NewMmapRegion(vm_memory::mmap::MmapRegionError),
311 
312     /// Failed to clone a File.
313     CloneFile(io::Error),
314 
315     /// Failed to create socket file
316     CreateSocketFile(io::Error),
317 
318     /// Failed to spawn the network backend
319     SpawnNetBackend(io::Error),
320 
321     /// Failed to spawn the block backend
322     SpawnBlockBackend(io::Error),
323 
324     /// Missing PCI bus.
325     NoPciBus,
326 
327     /// Could not find an available device name.
328     NoAvailableDeviceName,
329 
330     /// Missing PCI device.
331     MissingPciDevice,
332 
333     /// Failed to remove a PCI device from the PCI bus.
334     RemoveDeviceFromPciBus(pci::PciRootError),
335 
336     /// Failed to remove a bus device from the IO bus.
337     RemoveDeviceFromIoBus(vm_device::BusError),
338 
339     /// Failed to remove a bus device from the MMIO bus.
340     RemoveDeviceFromMmioBus(vm_device::BusError),
341 
342     /// Failed to find the device corresponding to a specific PCI b/d/f.
343     UnknownPciBdf(u32),
344 
345     /// Not allowed to remove this type of device from the VM.
346     RemovalNotAllowed(vm_virtio::VirtioDeviceType),
347 
348     /// Failed to find device corresponding to the given identifier.
349     UnknownDeviceId(String),
350 
351     /// Failed to find an available PCI device ID.
352     NextPciDeviceId(pci::PciRootError),
353 
354     /// Could not reserve the PCI device ID.
355     GetPciDeviceId(pci::PciRootError),
356 
357     /// Could not give the PCI device ID back.
358     PutPciDeviceId(pci::PciRootError),
359 
360     /// No disk path was specified when one was expected
361     NoDiskPath,
362 
363     /// Failed to update guest memory for virtio device.
364     UpdateMemoryForVirtioDevice(virtio_devices::Error),
365 
366     /// Cannot create virtio-mem device
367     CreateVirtioMem(io::Error),
368 
369     /// Cannot generate a ResizeSender from the Resize object.
370     CreateResizeSender(virtio_devices::mem::Error),
371 
372     /// Cannot find a memory range for virtio-mem memory
373     VirtioMemRangeAllocation,
374 
375     /// Failed to update guest memory for VFIO PCI device.
376     UpdateMemoryForVfioPciDevice(vfio_ioctls::VfioError),
377 
378     /// Trying to use a directory for pmem but no size specified
379     PmemWithDirectorySizeMissing,
380 
381     /// Trying to use a size that is not multiple of 2MiB
382     PmemSizeNotAligned,
383 
384     /// Could not find the node in the device tree.
385     MissingNode,
386 
387     /// Resource was already found.
388     ResourceAlreadyExists,
389 
390     /// Expected resources for virtio-pmem could not be found.
391     MissingVirtioPmemResources,
392 
393     /// Missing PCI b/d/f from the DeviceNode.
394     MissingDeviceNodePciBdf,
395 
396     /// No support for device passthrough
397     NoDevicePassthroughSupport,
398 
399     /// Failed to resize virtio-balloon
400     VirtioBalloonResize(virtio_devices::balloon::Error),
401 
402     /// Missing virtio-balloon, can't proceed as expected.
403     MissingVirtioBalloon,
404 
405     /// Missing virtual IOMMU device
406     MissingVirtualIommu,
407 
408     /// Failed to do power button notification
409     PowerButtonNotification(io::Error),
410 
411     /// Failed to do AArch64 GPIO power button notification
412     #[cfg(target_arch = "aarch64")]
413     AArch64PowerButtonNotification(devices::legacy::GpioDeviceError),
414 
415     /// Failed to set O_DIRECT flag to file descriptor
416     SetDirectIo,
417 
418     /// Failed to create FixedVhdDiskAsync
419     CreateFixedVhdDiskAsync(io::Error),
420 
421     /// Failed to create FixedVhdDiskSync
422     CreateFixedVhdDiskSync(io::Error),
423 
424     /// Failed to create QcowDiskSync
425     CreateQcowDiskSync(qcow::Error),
426 
427     /// Failed to create FixedVhdxDiskSync
428     CreateFixedVhdxDiskSync(vhdx::vhdx::VhdxError),
429 
430     /// Failed to add DMA mapping handler to virtio-mem device.
431     AddDmaMappingHandlerVirtioMem(virtio_devices::mem::Error),
432 
433     /// Failed to remove DMA mapping handler from virtio-mem device.
434     RemoveDmaMappingHandlerVirtioMem(virtio_devices::mem::Error),
435 
436     /// Failed to create vfio-user client
437     VfioUserCreateClient(vfio_user::Error),
438 
439     /// Failed to create VFIO user device
440     VfioUserCreate(VfioUserPciDeviceError),
441 
442     /// Failed to map region from VFIO user device into guest
443     VfioUserMapRegion(VfioUserPciDeviceError),
444 
445     /// Failed to DMA map VFIO user device.
446     VfioUserDmaMap(VfioUserPciDeviceError),
447 
448     /// Failed to DMA unmap VFIO user device.
449     VfioUserDmaUnmap(VfioUserPciDeviceError),
450 
451     /// Failed to update memory mappings for VFIO user device
452     UpdateMemoryForVfioUserPciDevice(VfioUserPciDeviceError),
453 
454     /// Cannot duplicate file descriptor
455     DupFd(vmm_sys_util::errno::Error),
456 
457     /// Failed to DMA map virtio device.
458     VirtioDmaMap(std::io::Error),
459 
460     /// Failed to DMA unmap virtio device.
461     VirtioDmaUnmap(std::io::Error),
462 
463     /// Cannot hotplug device behind vIOMMU
464     InvalidIommuHotplug,
465 
466     /// Failed to create UEFI flash
467     CreateUefiFlash(HypervisorVmError),
468 
469     /// Invalid identifier as it is not unique.
470     IdentifierNotUnique(String),
471 
472     /// Invalid identifier
473     InvalidIdentifier(String),
474 
475     /// Error activating virtio device
476     VirtioActivate(ActivateError),
477 }
478 pub type DeviceManagerResult<T> = result::Result<T, DeviceManagerError>;
479 
480 const DEVICE_MANAGER_ACPI_SIZE: usize = 0x10;
481 
482 const TIOCSPTLCK: libc::c_int = 0x4004_5431;
483 const TIOCGTPEER: libc::c_int = 0x5441;
484 
485 pub fn create_pty(non_blocking: bool) -> io::Result<(File, File, PathBuf)> {
486     // Try to use /dev/pts/ptmx first then fall back to /dev/ptmx
487     // This is done to try and use the devpts filesystem that
488     // could be available for use in the process's namespace first.
489     // Ideally these are all the same file though but different
490     // kernels could have things setup differently.
491     // See https://www.kernel.org/doc/Documentation/filesystems/devpts.txt
492     // for further details.
493 
494     let custom_flags = libc::O_NOCTTY | if non_blocking { libc::O_NONBLOCK } else { 0 };
495     let main = match OpenOptions::new()
496         .read(true)
497         .write(true)
498         .custom_flags(custom_flags)
499         .open("/dev/pts/ptmx")
500     {
501         Ok(f) => f,
502         _ => OpenOptions::new()
503             .read(true)
504             .write(true)
505             .custom_flags(custom_flags)
506             .open("/dev/ptmx")?,
507     };
508     let mut unlock: libc::c_ulong = 0;
509     // SAFETY: FFI call into libc, trivially safe
510     unsafe {
511         libc::ioctl(
512             main.as_raw_fd(),
513             TIOCSPTLCK.try_into().unwrap(),
514             &mut unlock,
515         )
516     };
517 
518     // SAFETY: FFI call into libc, trivally safe
519     let sub_fd = unsafe {
520         libc::ioctl(
521             main.as_raw_fd(),
522             TIOCGTPEER.try_into().unwrap(),
523             libc::O_NOCTTY | libc::O_RDWR,
524         )
525     };
526     if sub_fd == -1 {
527         return vmm_sys_util::errno::errno_result().map_err(|e| e.into());
528     }
529 
530     let proc_path = PathBuf::from(format!("/proc/self/fd/{}", sub_fd));
531     let path = read_link(proc_path)?;
532 
533     // SAFETY: sub_fd is checked to be valid before being wrapped in File
534     Ok((main, unsafe { File::from_raw_fd(sub_fd) }, path))
535 }
536 
537 #[derive(Default)]
538 pub struct Console {
539     console_resizer: Option<Arc<virtio_devices::ConsoleResizer>>,
540 }
541 
542 impl Console {
543     pub fn update_console_size(&self) {
544         if let Some(resizer) = self.console_resizer.as_ref() {
545             resizer.update_console_size()
546         }
547     }
548 }
549 
550 pub(crate) struct AddressManager {
551     pub(crate) allocator: Arc<Mutex<SystemAllocator>>,
552     #[cfg(target_arch = "x86_64")]
553     pub(crate) io_bus: Arc<Bus>,
554     pub(crate) mmio_bus: Arc<Bus>,
555     vm: Arc<dyn hypervisor::Vm>,
556     device_tree: Arc<Mutex<DeviceTree>>,
557     pci_mmio_allocators: Vec<Arc<Mutex<AddressAllocator>>>,
558 }
559 
560 impl DeviceRelocation for AddressManager {
561     fn move_bar(
562         &self,
563         old_base: u64,
564         new_base: u64,
565         len: u64,
566         pci_dev: &mut dyn PciDevice,
567         region_type: PciBarRegionType,
568     ) -> std::result::Result<(), std::io::Error> {
569         match region_type {
570             PciBarRegionType::IoRegion => {
571                 #[cfg(target_arch = "x86_64")]
572                 {
573                     // Update system allocator
574                     self.allocator
575                         .lock()
576                         .unwrap()
577                         .free_io_addresses(GuestAddress(old_base), len as GuestUsize);
578 
579                     self.allocator
580                         .lock()
581                         .unwrap()
582                         .allocate_io_addresses(
583                             Some(GuestAddress(new_base)),
584                             len as GuestUsize,
585                             None,
586                         )
587                         .ok_or_else(|| {
588                             io::Error::new(io::ErrorKind::Other, "failed allocating new IO range")
589                         })?;
590 
591                     // Update PIO bus
592                     self.io_bus
593                         .update_range(old_base, len, new_base, len)
594                         .map_err(|e| io::Error::new(io::ErrorKind::Other, e))?;
595                 }
596                 #[cfg(target_arch = "aarch64")]
597                 error!("I/O region is not supported");
598             }
599             PciBarRegionType::Memory32BitRegion | PciBarRegionType::Memory64BitRegion => {
600                 // Update system allocator
601                 if region_type == PciBarRegionType::Memory32BitRegion {
602                     self.allocator
603                         .lock()
604                         .unwrap()
605                         .free_mmio_hole_addresses(GuestAddress(old_base), len as GuestUsize);
606 
607                     self.allocator
608                         .lock()
609                         .unwrap()
610                         .allocate_mmio_hole_addresses(
611                             Some(GuestAddress(new_base)),
612                             len as GuestUsize,
613                             Some(len),
614                         )
615                         .ok_or_else(|| {
616                             io::Error::new(
617                                 io::ErrorKind::Other,
618                                 "failed allocating new 32 bits MMIO range",
619                             )
620                         })?;
621                 } else {
622                     // Find the specific allocator that this BAR was allocated from and use it for new one
623                     for allocator in &self.pci_mmio_allocators {
624                         let allocator_base = allocator.lock().unwrap().base();
625                         let allocator_end = allocator.lock().unwrap().end();
626 
627                         if old_base >= allocator_base.0 && old_base <= allocator_end.0 {
628                             allocator
629                                 .lock()
630                                 .unwrap()
631                                 .free(GuestAddress(old_base), len as GuestUsize);
632 
633                             allocator
634                                 .lock()
635                                 .unwrap()
636                                 .allocate(
637                                     Some(GuestAddress(new_base)),
638                                     len as GuestUsize,
639                                     Some(len),
640                                 )
641                                 .ok_or_else(|| {
642                                     io::Error::new(
643                                         io::ErrorKind::Other,
644                                         "failed allocating new 64 bits MMIO range",
645                                     )
646                                 })?;
647 
648                             break;
649                         }
650                     }
651                 }
652 
653                 // Update MMIO bus
654                 self.mmio_bus
655                     .update_range(old_base, len, new_base, len)
656                     .map_err(|e| io::Error::new(io::ErrorKind::Other, e))?;
657             }
658         }
659 
660         // Update the device_tree resources associated with the device
661         if let Some(id) = pci_dev.id() {
662             if let Some(node) = self.device_tree.lock().unwrap().get_mut(&id) {
663                 let mut resource_updated = false;
664                 for resource in node.resources.iter_mut() {
665                     if let Resource::PciBar { base, type_, .. } = resource {
666                         if PciBarRegionType::from(*type_) == region_type && *base == old_base {
667                             *base = new_base;
668                             resource_updated = true;
669                             break;
670                         }
671                     }
672                 }
673 
674                 if !resource_updated {
675                     return Err(io::Error::new(
676                         io::ErrorKind::Other,
677                         format!(
678                             "Couldn't find a resource with base 0x{:x} for device {}",
679                             old_base, id
680                         ),
681                     ));
682                 }
683             } else {
684                 return Err(io::Error::new(
685                     io::ErrorKind::Other,
686                     format!("Couldn't find device {} from device tree", id),
687                 ));
688             }
689         }
690 
691         let any_dev = pci_dev.as_any();
692         if let Some(virtio_pci_dev) = any_dev.downcast_ref::<VirtioPciDevice>() {
693             let bar_addr = virtio_pci_dev.config_bar_addr();
694             if bar_addr == new_base {
695                 for (event, addr) in virtio_pci_dev.ioeventfds(old_base) {
696                     let io_addr = IoEventAddress::Mmio(addr);
697                     self.vm.unregister_ioevent(event, &io_addr).map_err(|e| {
698                         io::Error::new(
699                             io::ErrorKind::Other,
700                             format!("failed to unregister ioevent: {:?}", e),
701                         )
702                     })?;
703                 }
704                 for (event, addr) in virtio_pci_dev.ioeventfds(new_base) {
705                     let io_addr = IoEventAddress::Mmio(addr);
706                     self.vm
707                         .register_ioevent(event, &io_addr, None)
708                         .map_err(|e| {
709                             io::Error::new(
710                                 io::ErrorKind::Other,
711                                 format!("failed to register ioevent: {:?}", e),
712                             )
713                         })?;
714                 }
715             } else {
716                 let virtio_dev = virtio_pci_dev.virtio_device();
717                 let mut virtio_dev = virtio_dev.lock().unwrap();
718                 if let Some(mut shm_regions) = virtio_dev.get_shm_regions() {
719                     if shm_regions.addr.raw_value() == old_base {
720                         let mem_region = self.vm.make_user_memory_region(
721                             shm_regions.mem_slot,
722                             old_base,
723                             shm_regions.len,
724                             shm_regions.host_addr,
725                             false,
726                             false,
727                         );
728 
729                         self.vm.remove_user_memory_region(mem_region).map_err(|e| {
730                             io::Error::new(
731                                 io::ErrorKind::Other,
732                                 format!("failed to remove user memory region: {:?}", e),
733                             )
734                         })?;
735 
736                         // Create new mapping by inserting new region to KVM.
737                         let mem_region = self.vm.make_user_memory_region(
738                             shm_regions.mem_slot,
739                             new_base,
740                             shm_regions.len,
741                             shm_regions.host_addr,
742                             false,
743                             false,
744                         );
745 
746                         self.vm.create_user_memory_region(mem_region).map_err(|e| {
747                             io::Error::new(
748                                 io::ErrorKind::Other,
749                                 format!("failed to create user memory regions: {:?}", e),
750                             )
751                         })?;
752 
753                         // Update shared memory regions to reflect the new mapping.
754                         shm_regions.addr = GuestAddress(new_base);
755                         virtio_dev.set_shm_regions(shm_regions).map_err(|e| {
756                             io::Error::new(
757                                 io::ErrorKind::Other,
758                                 format!("failed to update shared memory regions: {:?}", e),
759                             )
760                         })?;
761                     }
762                 }
763             }
764         }
765 
766         pci_dev.move_bar(old_base, new_base)
767     }
768 }
769 
770 #[derive(Serialize, Deserialize)]
771 struct DeviceManagerState {
772     device_tree: DeviceTree,
773     device_id_cnt: Wrapping<usize>,
774 }
775 
776 #[derive(Debug)]
777 pub struct PtyPair {
778     pub main: File,
779     pub sub: File,
780     pub path: PathBuf,
781 }
782 
783 impl Clone for PtyPair {
784     fn clone(&self) -> Self {
785         PtyPair {
786             main: self.main.try_clone().unwrap(),
787             sub: self.sub.try_clone().unwrap(),
788             path: self.path.clone(),
789         }
790     }
791 }
792 
793 #[derive(Clone)]
794 pub enum PciDeviceHandle {
795     Vfio(Arc<Mutex<VfioPciDevice>>),
796     Virtio(Arc<Mutex<VirtioPciDevice>>),
797     VfioUser(Arc<Mutex<VfioUserPciDevice>>),
798 }
799 
800 #[derive(Clone)]
801 struct MetaVirtioDevice {
802     virtio_device: Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
803     iommu: bool,
804     id: String,
805     pci_segment: u16,
806     dma_handler: Option<Arc<dyn ExternalDmaMapping>>,
807 }
808 
809 pub struct DeviceManager {
810     // Manage address space related to devices
811     address_manager: Arc<AddressManager>,
812 
813     // Console abstraction
814     console: Arc<Console>,
815 
816     // console PTY
817     console_pty: Option<Arc<Mutex<PtyPair>>>,
818 
819     // serial PTY
820     serial_pty: Option<Arc<Mutex<PtyPair>>>,
821 
822     // Serial Manager
823     serial_manager: Option<Arc<SerialManager>>,
824 
825     // pty foreground status,
826     console_resize_pipe: Option<Arc<File>>,
827 
828     // Interrupt controller
829     #[cfg(target_arch = "x86_64")]
830     interrupt_controller: Option<Arc<Mutex<ioapic::Ioapic>>>,
831     #[cfg(target_arch = "aarch64")]
832     interrupt_controller: Option<Arc<Mutex<gic::Gic>>>,
833 
834     // Things to be added to the commandline (e.g. aarch64 early console)
835     #[cfg(target_arch = "aarch64")]
836     cmdline_additions: Vec<String>,
837 
838     // ACPI GED notification device
839     ged_notification_device: Option<Arc<Mutex<devices::AcpiGedDevice>>>,
840 
841     // VM configuration
842     config: Arc<Mutex<VmConfig>>,
843 
844     // Memory Manager
845     memory_manager: Arc<Mutex<MemoryManager>>,
846 
847     // The virtio devices on the system
848     virtio_devices: Vec<MetaVirtioDevice>,
849 
850     // List of bus devices
851     // Let the DeviceManager keep strong references to the BusDevice devices.
852     // This allows the IO and MMIO buses to be provided with Weak references,
853     // which prevents cyclic dependencies.
854     bus_devices: Vec<Arc<Mutex<dyn BusDevice>>>,
855 
856     // Counter to keep track of the consumed device IDs.
857     device_id_cnt: Wrapping<usize>,
858 
859     pci_segments: Vec<PciSegment>,
860 
861     #[cfg_attr(target_arch = "aarch64", allow(dead_code))]
862     // MSI Interrupt Manager
863     msi_interrupt_manager: Arc<dyn InterruptManager<GroupConfig = MsiIrqGroupConfig>>,
864 
865     #[cfg_attr(feature = "mshv", allow(dead_code))]
866     // Legacy Interrupt Manager
867     legacy_interrupt_manager: Option<Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>>,
868 
869     // Passthrough device handle
870     passthrough_device: Option<Arc<dyn hypervisor::Device>>,
871 
872     // VFIO container
873     // Only one container can be created, therefore it is stored as part of the
874     // DeviceManager to be reused.
875     vfio_container: Option<Arc<VfioContainer>>,
876 
877     // Paravirtualized IOMMU
878     iommu_device: Option<Arc<Mutex<virtio_devices::Iommu>>>,
879     iommu_mapping: Option<Arc<IommuMapping>>,
880 
881     // PCI information about devices attached to the paravirtualized IOMMU
882     // It contains the virtual IOMMU PCI BDF along with the list of PCI BDF
883     // representing the devices attached to the virtual IOMMU. This is useful
884     // information for filling the ACPI VIOT table.
885     iommu_attached_devices: Option<(PciBdf, Vec<PciBdf>)>,
886 
887     // Tree of devices, representing the dependencies between devices.
888     // Useful for introspection, snapshot and restore.
889     device_tree: Arc<Mutex<DeviceTree>>,
890 
891     // Exit event
892     exit_evt: EventFd,
893     reset_evt: EventFd,
894 
895     #[cfg(target_arch = "aarch64")]
896     id_to_dev_info: HashMap<(DeviceType, String), MmioDeviceInfo>,
897 
898     // seccomp action
899     seccomp_action: SeccompAction,
900 
901     // List of guest NUMA nodes.
902     numa_nodes: NumaNodes,
903 
904     // Possible handle to the virtio-balloon device
905     balloon: Option<Arc<Mutex<virtio_devices::Balloon>>>,
906 
907     // Virtio Device activation EventFd to allow the VMM thread to trigger device
908     // activation and thus start the threads from the VMM thread
909     activate_evt: EventFd,
910 
911     acpi_address: GuestAddress,
912 
913     selected_segment: usize,
914 
915     // Possible handle to the virtio-mem device
916     virtio_mem_devices: Vec<Arc<Mutex<virtio_devices::Mem>>>,
917 
918     #[cfg(target_arch = "aarch64")]
919     // GPIO device for AArch64
920     gpio_device: Option<Arc<Mutex<devices::legacy::Gpio>>>,
921 
922     #[cfg(target_arch = "aarch64")]
923     // Flash device for UEFI on AArch64
924     uefi_flash: Option<GuestMemoryAtomic<GuestMemoryMmap>>,
925 
926     // Flag to force setting the iommu on virtio devices
927     force_iommu: bool,
928 
929     // Helps identify if the VM is currently being restored
930     restoring: bool,
931 
932     // io_uring availability if detected
933     io_uring_supported: Option<bool>,
934 
935     // List of unique identifiers provided at boot through the configuration.
936     boot_id_list: BTreeSet<String>,
937 
938     // Start time of the VM
939     timestamp: Instant,
940 
941     // Pending activations
942     pending_activations: Arc<Mutex<Vec<VirtioPciDeviceActivator>>>,
943 }
944 
945 impl DeviceManager {
946     #[allow(clippy::too_many_arguments)]
947     pub fn new(
948         vm: Arc<dyn hypervisor::Vm>,
949         config: Arc<Mutex<VmConfig>>,
950         memory_manager: Arc<Mutex<MemoryManager>>,
951         exit_evt: &EventFd,
952         reset_evt: &EventFd,
953         seccomp_action: SeccompAction,
954         numa_nodes: NumaNodes,
955         activate_evt: &EventFd,
956         force_iommu: bool,
957         restoring: bool,
958         boot_id_list: BTreeSet<String>,
959         timestamp: Instant,
960     ) -> DeviceManagerResult<Arc<Mutex<Self>>> {
961         let device_tree = Arc::new(Mutex::new(DeviceTree::new()));
962 
963         let num_pci_segments =
964             if let Some(platform_config) = config.lock().unwrap().platform.as_ref() {
965                 platform_config.num_pci_segments
966             } else {
967                 1
968             };
969 
970         let start_of_device_area = memory_manager.lock().unwrap().start_of_device_area().0;
971         let end_of_device_area = memory_manager.lock().unwrap().end_of_device_area().0;
972 
973         // Start each PCI segment range on a 4GiB boundary
974         let pci_segment_size = (end_of_device_area - start_of_device_area + 1)
975             / ((4 << 30) * num_pci_segments as u64)
976             * (4 << 30);
977 
978         let mut pci_mmio_allocators = vec![];
979         for i in 0..num_pci_segments as u64 {
980             let mmio_start = start_of_device_area + i * pci_segment_size;
981             let allocator = Arc::new(Mutex::new(
982                 AddressAllocator::new(GuestAddress(mmio_start), pci_segment_size).unwrap(),
983             ));
984             pci_mmio_allocators.push(allocator)
985         }
986 
987         let address_manager = Arc::new(AddressManager {
988             allocator: memory_manager.lock().unwrap().allocator(),
989             #[cfg(target_arch = "x86_64")]
990             io_bus: Arc::new(Bus::new()),
991             mmio_bus: Arc::new(Bus::new()),
992             vm: vm.clone(),
993             device_tree: Arc::clone(&device_tree),
994             pci_mmio_allocators,
995         });
996 
997         // First we create the MSI interrupt manager, the legacy one is created
998         // later, after the IOAPIC device creation.
999         // The reason we create the MSI one first is because the IOAPIC needs it,
1000         // and then the legacy interrupt manager needs an IOAPIC. So we're
1001         // handling a linear dependency chain:
1002         // msi_interrupt_manager <- IOAPIC <- legacy_interrupt_manager.
1003         let msi_interrupt_manager: Arc<dyn InterruptManager<GroupConfig = MsiIrqGroupConfig>> =
1004             Arc::new(MsiInterruptManager::new(
1005                 Arc::clone(&address_manager.allocator),
1006                 vm,
1007             ));
1008 
1009         let acpi_address = address_manager
1010             .allocator
1011             .lock()
1012             .unwrap()
1013             .allocate_platform_mmio_addresses(None, DEVICE_MANAGER_ACPI_SIZE as u64, None)
1014             .ok_or(DeviceManagerError::AllocateIoPort)?;
1015 
1016         let mut pci_irq_slots = [0; 32];
1017         PciSegment::reserve_legacy_interrupts_for_pci_devices(
1018             &address_manager,
1019             &mut pci_irq_slots,
1020         )?;
1021 
1022         let mut pci_segments = vec![PciSegment::new_default_segment(
1023             &address_manager,
1024             Arc::clone(&address_manager.pci_mmio_allocators[0]),
1025             &pci_irq_slots,
1026         )?];
1027 
1028         for i in 1..num_pci_segments as usize {
1029             pci_segments.push(PciSegment::new(
1030                 i as u16,
1031                 &address_manager,
1032                 Arc::clone(&address_manager.pci_mmio_allocators[i]),
1033                 &pci_irq_slots,
1034             )?);
1035         }
1036 
1037         let device_manager = DeviceManager {
1038             address_manager: Arc::clone(&address_manager),
1039             console: Arc::new(Console::default()),
1040             interrupt_controller: None,
1041             #[cfg(target_arch = "aarch64")]
1042             cmdline_additions: Vec::new(),
1043             ged_notification_device: None,
1044             config,
1045             memory_manager,
1046             virtio_devices: Vec::new(),
1047             bus_devices: Vec::new(),
1048             device_id_cnt: Wrapping(0),
1049             msi_interrupt_manager,
1050             legacy_interrupt_manager: None,
1051             passthrough_device: None,
1052             vfio_container: None,
1053             iommu_device: None,
1054             iommu_mapping: None,
1055             iommu_attached_devices: None,
1056             pci_segments,
1057             device_tree,
1058             exit_evt: exit_evt.try_clone().map_err(DeviceManagerError::EventFd)?,
1059             reset_evt: reset_evt.try_clone().map_err(DeviceManagerError::EventFd)?,
1060             #[cfg(target_arch = "aarch64")]
1061             id_to_dev_info: HashMap::new(),
1062             seccomp_action,
1063             numa_nodes,
1064             balloon: None,
1065             activate_evt: activate_evt
1066                 .try_clone()
1067                 .map_err(DeviceManagerError::EventFd)?,
1068             acpi_address,
1069             selected_segment: 0,
1070             serial_pty: None,
1071             serial_manager: None,
1072             console_pty: None,
1073             console_resize_pipe: None,
1074             virtio_mem_devices: Vec::new(),
1075             #[cfg(target_arch = "aarch64")]
1076             gpio_device: None,
1077             #[cfg(target_arch = "aarch64")]
1078             uefi_flash: None,
1079             force_iommu,
1080             restoring,
1081             io_uring_supported: None,
1082             boot_id_list,
1083             timestamp,
1084             pending_activations: Arc::new(Mutex::new(Vec::default())),
1085         };
1086 
1087         let device_manager = Arc::new(Mutex::new(device_manager));
1088 
1089         address_manager
1090             .mmio_bus
1091             .insert(
1092                 Arc::clone(&device_manager) as Arc<Mutex<dyn BusDevice>>,
1093                 acpi_address.0,
1094                 DEVICE_MANAGER_ACPI_SIZE as u64,
1095             )
1096             .map_err(DeviceManagerError::BusError)?;
1097 
1098         Ok(device_manager)
1099     }
1100 
1101     pub fn serial_pty(&self) -> Option<PtyPair> {
1102         self.serial_pty
1103             .as_ref()
1104             .map(|pty| pty.lock().unwrap().clone())
1105     }
1106 
1107     pub fn console_pty(&self) -> Option<PtyPair> {
1108         self.console_pty
1109             .as_ref()
1110             .map(|pty| pty.lock().unwrap().clone())
1111     }
1112 
1113     pub fn console_resize_pipe(&self) -> Option<Arc<File>> {
1114         self.console_resize_pipe.as_ref().map(Arc::clone)
1115     }
1116 
1117     pub fn create_devices(
1118         &mut self,
1119         serial_pty: Option<PtyPair>,
1120         console_pty: Option<PtyPair>,
1121         console_resize_pipe: Option<File>,
1122     ) -> DeviceManagerResult<()> {
1123         let mut virtio_devices: Vec<MetaVirtioDevice> = Vec::new();
1124 
1125         let interrupt_controller = self.add_interrupt_controller()?;
1126 
1127         // Now we can create the legacy interrupt manager, which needs the freshly
1128         // formed IOAPIC device.
1129         let legacy_interrupt_manager: Arc<
1130             dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>,
1131         > = Arc::new(LegacyUserspaceInterruptManager::new(Arc::clone(
1132             &interrupt_controller,
1133         )));
1134 
1135         {
1136             if let Some(acpi_address) = self.memory_manager.lock().unwrap().acpi_address() {
1137                 self.address_manager
1138                     .mmio_bus
1139                     .insert(
1140                         Arc::clone(&self.memory_manager) as Arc<Mutex<dyn BusDevice>>,
1141                         acpi_address.0,
1142                         MEMORY_MANAGER_ACPI_SIZE as u64,
1143                     )
1144                     .map_err(DeviceManagerError::BusError)?;
1145             }
1146         }
1147 
1148         #[cfg(target_arch = "x86_64")]
1149         self.add_legacy_devices(
1150             self.reset_evt
1151                 .try_clone()
1152                 .map_err(DeviceManagerError::EventFd)?,
1153         )?;
1154 
1155         #[cfg(target_arch = "aarch64")]
1156         self.add_legacy_devices(&legacy_interrupt_manager)?;
1157 
1158         {
1159             self.ged_notification_device = self.add_acpi_devices(
1160                 &legacy_interrupt_manager,
1161                 self.reset_evt
1162                     .try_clone()
1163                     .map_err(DeviceManagerError::EventFd)?,
1164                 self.exit_evt
1165                     .try_clone()
1166                     .map_err(DeviceManagerError::EventFd)?,
1167             )?;
1168         }
1169 
1170         self.console = self.add_console_device(
1171             &legacy_interrupt_manager,
1172             &mut virtio_devices,
1173             serial_pty,
1174             console_pty,
1175             console_resize_pipe,
1176         )?;
1177 
1178         self.legacy_interrupt_manager = Some(legacy_interrupt_manager);
1179 
1180         virtio_devices.append(&mut self.make_virtio_devices()?);
1181 
1182         self.add_pci_devices(virtio_devices.clone())?;
1183 
1184         self.virtio_devices = virtio_devices;
1185 
1186         Ok(())
1187     }
1188 
1189     fn state(&self) -> DeviceManagerState {
1190         DeviceManagerState {
1191             device_tree: self.device_tree.lock().unwrap().clone(),
1192             device_id_cnt: self.device_id_cnt,
1193         }
1194     }
1195 
1196     fn set_state(&mut self, state: &DeviceManagerState) {
1197         *self.device_tree.lock().unwrap() = state.device_tree.clone();
1198         self.device_id_cnt = state.device_id_cnt;
1199     }
1200 
1201     fn get_msi_iova_space(&mut self) -> (u64, u64) {
1202         #[cfg(target_arch = "aarch64")]
1203         {
1204             let vcpus = self.config.lock().unwrap().cpus.boot_vcpus;
1205             let msi_start = arch::layout::GIC_V3_DIST_START.raw_value()
1206                 - arch::layout::GIC_V3_REDIST_SIZE * (vcpus as u64)
1207                 - arch::layout::GIC_V3_ITS_SIZE;
1208             let msi_end = msi_start + arch::layout::GIC_V3_ITS_SIZE - 1;
1209             (msi_start, msi_end)
1210         }
1211         #[cfg(target_arch = "x86_64")]
1212         (0xfee0_0000, 0xfeef_ffff)
1213     }
1214 
1215     #[cfg(target_arch = "aarch64")]
1216     /// Gets the information of the devices registered up to some point in time.
1217     pub fn get_device_info(&self) -> &HashMap<(DeviceType, String), MmioDeviceInfo> {
1218         &self.id_to_dev_info
1219     }
1220 
1221     #[allow(unused_variables)]
1222     fn add_pci_devices(
1223         &mut self,
1224         virtio_devices: Vec<MetaVirtioDevice>,
1225     ) -> DeviceManagerResult<()> {
1226         let iommu_id = String::from(IOMMU_DEVICE_NAME);
1227 
1228         let iommu_device = if self.config.lock().unwrap().iommu {
1229             let (device, mapping) = virtio_devices::Iommu::new(
1230                 iommu_id.clone(),
1231                 self.seccomp_action.clone(),
1232                 self.exit_evt
1233                     .try_clone()
1234                     .map_err(DeviceManagerError::EventFd)?,
1235                 self.get_msi_iova_space(),
1236             )
1237             .map_err(DeviceManagerError::CreateVirtioIommu)?;
1238             let device = Arc::new(Mutex::new(device));
1239             self.iommu_device = Some(Arc::clone(&device));
1240             self.iommu_mapping = Some(mapping);
1241 
1242             // Fill the device tree with a new node. In case of restore, we
1243             // know there is nothing to do, so we can simply override the
1244             // existing entry.
1245             self.device_tree
1246                 .lock()
1247                 .unwrap()
1248                 .insert(iommu_id.clone(), device_node!(iommu_id, device));
1249 
1250             Some(device)
1251         } else {
1252             None
1253         };
1254 
1255         let mut iommu_attached_devices = Vec::new();
1256         {
1257             for handle in virtio_devices {
1258                 let mapping: Option<Arc<IommuMapping>> = if handle.iommu {
1259                     self.iommu_mapping.clone()
1260                 } else {
1261                     None
1262                 };
1263 
1264                 let dev_id = self.add_virtio_pci_device(
1265                     handle.virtio_device,
1266                     &mapping,
1267                     handle.id,
1268                     handle.pci_segment,
1269                     handle.dma_handler,
1270                 )?;
1271 
1272                 if handle.iommu {
1273                     iommu_attached_devices.push(dev_id);
1274                 }
1275             }
1276 
1277             let mut vfio_iommu_device_ids = self.add_vfio_devices()?;
1278             iommu_attached_devices.append(&mut vfio_iommu_device_ids);
1279 
1280             let mut vfio_user_iommu_device_ids = self.add_user_devices()?;
1281             iommu_attached_devices.append(&mut vfio_user_iommu_device_ids);
1282 
1283             // Add all devices from forced iommu segments
1284             if let Some(platform_config) = self.config.lock().unwrap().platform.as_ref() {
1285                 if let Some(iommu_segments) = platform_config.iommu_segments.as_ref() {
1286                     for segment in iommu_segments {
1287                         for device in 0..32 {
1288                             let bdf = PciBdf::new(*segment, 0, device, 0);
1289                             if !iommu_attached_devices.contains(&bdf) {
1290                                 iommu_attached_devices.push(bdf);
1291                             }
1292                         }
1293                     }
1294                 }
1295             }
1296 
1297             if let Some(iommu_device) = iommu_device {
1298                 let dev_id = self.add_virtio_pci_device(iommu_device, &None, iommu_id, 0, None)?;
1299                 self.iommu_attached_devices = Some((dev_id, iommu_attached_devices));
1300             }
1301         }
1302 
1303         for segment in &self.pci_segments {
1304             #[cfg(target_arch = "x86_64")]
1305             if let Some(pci_config_io) = segment.pci_config_io.as_ref() {
1306                 self.bus_devices
1307                     .push(Arc::clone(pci_config_io) as Arc<Mutex<dyn BusDevice>>);
1308             }
1309 
1310             self.bus_devices
1311                 .push(Arc::clone(&segment.pci_config_mmio) as Arc<Mutex<dyn BusDevice>>);
1312         }
1313 
1314         Ok(())
1315     }
1316 
1317     #[cfg(target_arch = "aarch64")]
1318     fn add_interrupt_controller(
1319         &mut self,
1320     ) -> DeviceManagerResult<Arc<Mutex<dyn InterruptController>>> {
1321         let interrupt_controller: Arc<Mutex<gic::Gic>> = Arc::new(Mutex::new(
1322             gic::Gic::new(
1323                 self.config.lock().unwrap().cpus.boot_vcpus,
1324                 Arc::clone(&self.msi_interrupt_manager),
1325             )
1326             .map_err(DeviceManagerError::CreateInterruptController)?,
1327         ));
1328 
1329         self.interrupt_controller = Some(interrupt_controller.clone());
1330 
1331         // Unlike x86_64, the "interrupt_controller" here for AArch64 is only
1332         // a `Gic` object that implements the `InterruptController` to provide
1333         // interrupt delivery service. This is not the real GIC device so that
1334         // we do not need to insert it to the device tree.
1335 
1336         Ok(interrupt_controller)
1337     }
1338 
1339     #[cfg(target_arch = "aarch64")]
1340     pub fn get_interrupt_controller(&mut self) -> Option<&Arc<Mutex<gic::Gic>>> {
1341         self.interrupt_controller.as_ref()
1342     }
1343 
1344     #[cfg(target_arch = "x86_64")]
1345     fn add_interrupt_controller(
1346         &mut self,
1347     ) -> DeviceManagerResult<Arc<Mutex<dyn InterruptController>>> {
1348         let id = String::from(IOAPIC_DEVICE_NAME);
1349 
1350         // Create IOAPIC
1351         let interrupt_controller = Arc::new(Mutex::new(
1352             ioapic::Ioapic::new(
1353                 id.clone(),
1354                 APIC_START,
1355                 Arc::clone(&self.msi_interrupt_manager),
1356             )
1357             .map_err(DeviceManagerError::CreateInterruptController)?,
1358         ));
1359 
1360         self.interrupt_controller = Some(interrupt_controller.clone());
1361 
1362         self.address_manager
1363             .mmio_bus
1364             .insert(interrupt_controller.clone(), IOAPIC_START.0, IOAPIC_SIZE)
1365             .map_err(DeviceManagerError::BusError)?;
1366 
1367         self.bus_devices
1368             .push(Arc::clone(&interrupt_controller) as Arc<Mutex<dyn BusDevice>>);
1369 
1370         // Fill the device tree with a new node. In case of restore, we
1371         // know there is nothing to do, so we can simply override the
1372         // existing entry.
1373         self.device_tree
1374             .lock()
1375             .unwrap()
1376             .insert(id.clone(), device_node!(id, interrupt_controller));
1377 
1378         Ok(interrupt_controller)
1379     }
1380 
1381     fn add_acpi_devices(
1382         &mut self,
1383         interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>,
1384         reset_evt: EventFd,
1385         exit_evt: EventFd,
1386     ) -> DeviceManagerResult<Option<Arc<Mutex<devices::AcpiGedDevice>>>> {
1387         let shutdown_device = Arc::new(Mutex::new(devices::AcpiShutdownDevice::new(
1388             exit_evt, reset_evt,
1389         )));
1390 
1391         self.bus_devices
1392             .push(Arc::clone(&shutdown_device) as Arc<Mutex<dyn BusDevice>>);
1393 
1394         #[cfg(target_arch = "x86_64")]
1395         {
1396             self.address_manager
1397                 .allocator
1398                 .lock()
1399                 .unwrap()
1400                 .allocate_io_addresses(Some(GuestAddress(0x3c0)), 0x8, None)
1401                 .ok_or(DeviceManagerError::AllocateIoPort)?;
1402 
1403             self.address_manager
1404                 .io_bus
1405                 .insert(shutdown_device, 0x3c0, 0x4)
1406                 .map_err(DeviceManagerError::BusError)?;
1407         }
1408 
1409         let ged_irq = self
1410             .address_manager
1411             .allocator
1412             .lock()
1413             .unwrap()
1414             .allocate_irq()
1415             .unwrap();
1416         let interrupt_group = interrupt_manager
1417             .create_group(LegacyIrqGroupConfig {
1418                 irq: ged_irq as InterruptIndex,
1419             })
1420             .map_err(DeviceManagerError::CreateInterruptGroup)?;
1421         let ged_address = self
1422             .address_manager
1423             .allocator
1424             .lock()
1425             .unwrap()
1426             .allocate_platform_mmio_addresses(
1427                 None,
1428                 devices::acpi::GED_DEVICE_ACPI_SIZE as u64,
1429                 None,
1430             )
1431             .ok_or(DeviceManagerError::AllocateMmioAddress)?;
1432         let ged_device = Arc::new(Mutex::new(devices::AcpiGedDevice::new(
1433             interrupt_group,
1434             ged_irq,
1435             ged_address,
1436         )));
1437         self.address_manager
1438             .mmio_bus
1439             .insert(
1440                 ged_device.clone(),
1441                 ged_address.0,
1442                 devices::acpi::GED_DEVICE_ACPI_SIZE as u64,
1443             )
1444             .map_err(DeviceManagerError::BusError)?;
1445         self.bus_devices
1446             .push(Arc::clone(&ged_device) as Arc<Mutex<dyn BusDevice>>);
1447 
1448         let pm_timer_device = Arc::new(Mutex::new(devices::AcpiPmTimerDevice::new()));
1449 
1450         self.bus_devices
1451             .push(Arc::clone(&pm_timer_device) as Arc<Mutex<dyn BusDevice>>);
1452 
1453         #[cfg(target_arch = "x86_64")]
1454         {
1455             self.address_manager
1456                 .allocator
1457                 .lock()
1458                 .unwrap()
1459                 .allocate_io_addresses(Some(GuestAddress(0xb008)), 0x4, None)
1460                 .ok_or(DeviceManagerError::AllocateIoPort)?;
1461 
1462             self.address_manager
1463                 .io_bus
1464                 .insert(pm_timer_device, 0xb008, 0x4)
1465                 .map_err(DeviceManagerError::BusError)?;
1466         }
1467 
1468         Ok(Some(ged_device))
1469     }
1470 
1471     #[cfg(target_arch = "x86_64")]
1472     fn add_legacy_devices(&mut self, reset_evt: EventFd) -> DeviceManagerResult<()> {
1473         // Add a shutdown device (i8042)
1474         let i8042 = Arc::new(Mutex::new(devices::legacy::I8042Device::new(
1475             reset_evt.try_clone().unwrap(),
1476         )));
1477 
1478         self.bus_devices
1479             .push(Arc::clone(&i8042) as Arc<Mutex<dyn BusDevice>>);
1480 
1481         self.address_manager
1482             .io_bus
1483             .insert(i8042, 0x61, 0x4)
1484             .map_err(DeviceManagerError::BusError)?;
1485         {
1486             // Add a CMOS emulated device
1487             let mem_size = self
1488                 .memory_manager
1489                 .lock()
1490                 .unwrap()
1491                 .guest_memory()
1492                 .memory()
1493                 .last_addr()
1494                 .0
1495                 + 1;
1496             let mem_below_4g = std::cmp::min(arch::layout::MEM_32BIT_RESERVED_START.0, mem_size);
1497             let mem_above_4g = mem_size.saturating_sub(arch::layout::RAM_64BIT_START.0);
1498 
1499             let cmos = Arc::new(Mutex::new(devices::legacy::Cmos::new(
1500                 mem_below_4g,
1501                 mem_above_4g,
1502                 reset_evt,
1503             )));
1504 
1505             self.bus_devices
1506                 .push(Arc::clone(&cmos) as Arc<Mutex<dyn BusDevice>>);
1507 
1508             self.address_manager
1509                 .io_bus
1510                 .insert(cmos, 0x70, 0x2)
1511                 .map_err(DeviceManagerError::BusError)?;
1512         }
1513         #[cfg(feature = "fwdebug")]
1514         {
1515             let fwdebug = Arc::new(Mutex::new(devices::legacy::FwDebugDevice::new()));
1516 
1517             self.bus_devices
1518                 .push(Arc::clone(&fwdebug) as Arc<Mutex<dyn BusDevice>>);
1519 
1520             self.address_manager
1521                 .io_bus
1522                 .insert(fwdebug, 0x402, 0x1)
1523                 .map_err(DeviceManagerError::BusError)?;
1524         }
1525 
1526         // 0x80 debug port
1527         let debug_port = Arc::new(Mutex::new(devices::legacy::DebugPort::new(self.timestamp)));
1528         self.bus_devices
1529             .push(Arc::clone(&debug_port) as Arc<Mutex<dyn BusDevice>>);
1530         self.address_manager
1531             .io_bus
1532             .insert(debug_port, 0x80, 0x1)
1533             .map_err(DeviceManagerError::BusError)?;
1534 
1535         Ok(())
1536     }
1537 
1538     #[cfg(target_arch = "aarch64")]
1539     fn add_legacy_devices(
1540         &mut self,
1541         interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>,
1542     ) -> DeviceManagerResult<()> {
1543         // Add a RTC device
1544         let rtc_irq = self
1545             .address_manager
1546             .allocator
1547             .lock()
1548             .unwrap()
1549             .allocate_irq()
1550             .unwrap();
1551 
1552         let interrupt_group = interrupt_manager
1553             .create_group(LegacyIrqGroupConfig {
1554                 irq: rtc_irq as InterruptIndex,
1555             })
1556             .map_err(DeviceManagerError::CreateInterruptGroup)?;
1557 
1558         let rtc_device = Arc::new(Mutex::new(devices::legacy::Rtc::new(interrupt_group)));
1559 
1560         self.bus_devices
1561             .push(Arc::clone(&rtc_device) as Arc<Mutex<dyn BusDevice>>);
1562 
1563         let addr = arch::layout::LEGACY_RTC_MAPPED_IO_START;
1564 
1565         self.address_manager
1566             .mmio_bus
1567             .insert(rtc_device, addr.0, MMIO_LEN)
1568             .map_err(DeviceManagerError::BusError)?;
1569 
1570         self.id_to_dev_info.insert(
1571             (DeviceType::Rtc, "rtc".to_string()),
1572             MmioDeviceInfo {
1573                 addr: addr.0,
1574                 len: MMIO_LEN,
1575                 irq: rtc_irq,
1576             },
1577         );
1578 
1579         // Add a GPIO device
1580         let id = String::from(GPIO_DEVICE_NAME);
1581         let gpio_irq = self
1582             .address_manager
1583             .allocator
1584             .lock()
1585             .unwrap()
1586             .allocate_irq()
1587             .unwrap();
1588 
1589         let interrupt_group = interrupt_manager
1590             .create_group(LegacyIrqGroupConfig {
1591                 irq: gpio_irq as InterruptIndex,
1592             })
1593             .map_err(DeviceManagerError::CreateInterruptGroup)?;
1594 
1595         let gpio_device = Arc::new(Mutex::new(devices::legacy::Gpio::new(
1596             id.clone(),
1597             interrupt_group,
1598         )));
1599 
1600         self.bus_devices
1601             .push(Arc::clone(&gpio_device) as Arc<Mutex<dyn BusDevice>>);
1602 
1603         let addr = arch::layout::LEGACY_GPIO_MAPPED_IO_START;
1604 
1605         self.address_manager
1606             .mmio_bus
1607             .insert(gpio_device.clone(), addr.0, MMIO_LEN)
1608             .map_err(DeviceManagerError::BusError)?;
1609 
1610         self.gpio_device = Some(gpio_device.clone());
1611 
1612         self.id_to_dev_info.insert(
1613             (DeviceType::Gpio, "gpio".to_string()),
1614             MmioDeviceInfo {
1615                 addr: addr.0,
1616                 len: MMIO_LEN,
1617                 irq: gpio_irq,
1618             },
1619         );
1620 
1621         self.device_tree
1622             .lock()
1623             .unwrap()
1624             .insert(id.clone(), device_node!(id, gpio_device));
1625 
1626         // On AArch64, the UEFI binary requires a flash device at address 0.
1627         // 4 MiB memory is mapped to simulate the flash.
1628         let uefi_mem_slot = self.memory_manager.lock().unwrap().allocate_memory_slot();
1629         let uefi_region = GuestRegionMmap::new(
1630             MmapRegion::new(arch::layout::UEFI_SIZE as usize).unwrap(),
1631             arch::layout::UEFI_START,
1632         )
1633         .unwrap();
1634         let uefi_mem_region = self
1635             .memory_manager
1636             .lock()
1637             .unwrap()
1638             .vm
1639             .make_user_memory_region(
1640                 uefi_mem_slot,
1641                 uefi_region.start_addr().raw_value(),
1642                 uefi_region.len() as u64,
1643                 uefi_region.as_ptr() as u64,
1644                 false,
1645                 false,
1646             );
1647         self.memory_manager
1648             .lock()
1649             .unwrap()
1650             .vm
1651             .create_user_memory_region(uefi_mem_region)
1652             .map_err(DeviceManagerError::CreateUefiFlash)?;
1653 
1654         let uefi_flash =
1655             GuestMemoryAtomic::new(GuestMemoryMmap::from_regions(vec![uefi_region]).unwrap());
1656         self.uefi_flash = Some(uefi_flash);
1657 
1658         Ok(())
1659     }
1660 
1661     #[cfg(target_arch = "x86_64")]
1662     fn add_serial_device(
1663         &mut self,
1664         interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>,
1665         serial_writer: Option<Box<dyn io::Write + Send>>,
1666     ) -> DeviceManagerResult<Arc<Mutex<Serial>>> {
1667         // Serial is tied to IRQ #4
1668         let serial_irq = 4;
1669 
1670         let id = String::from(SERIAL_DEVICE_NAME);
1671 
1672         let interrupt_group = interrupt_manager
1673             .create_group(LegacyIrqGroupConfig {
1674                 irq: serial_irq as InterruptIndex,
1675             })
1676             .map_err(DeviceManagerError::CreateInterruptGroup)?;
1677 
1678         let serial = Arc::new(Mutex::new(Serial::new(
1679             id.clone(),
1680             interrupt_group,
1681             serial_writer,
1682         )));
1683 
1684         self.bus_devices
1685             .push(Arc::clone(&serial) as Arc<Mutex<dyn BusDevice>>);
1686 
1687         self.address_manager
1688             .allocator
1689             .lock()
1690             .unwrap()
1691             .allocate_io_addresses(Some(GuestAddress(0x3f8)), 0x8, None)
1692             .ok_or(DeviceManagerError::AllocateIoPort)?;
1693 
1694         self.address_manager
1695             .io_bus
1696             .insert(serial.clone(), 0x3f8, 0x8)
1697             .map_err(DeviceManagerError::BusError)?;
1698 
1699         // Fill the device tree with a new node. In case of restore, we
1700         // know there is nothing to do, so we can simply override the
1701         // existing entry.
1702         self.device_tree
1703             .lock()
1704             .unwrap()
1705             .insert(id.clone(), device_node!(id, serial));
1706 
1707         Ok(serial)
1708     }
1709 
1710     #[cfg(target_arch = "aarch64")]
1711     fn add_serial_device(
1712         &mut self,
1713         interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>,
1714         serial_writer: Option<Box<dyn io::Write + Send>>,
1715     ) -> DeviceManagerResult<Arc<Mutex<Pl011>>> {
1716         let id = String::from(SERIAL_DEVICE_NAME);
1717 
1718         let serial_irq = self
1719             .address_manager
1720             .allocator
1721             .lock()
1722             .unwrap()
1723             .allocate_irq()
1724             .unwrap();
1725 
1726         let interrupt_group = interrupt_manager
1727             .create_group(LegacyIrqGroupConfig {
1728                 irq: serial_irq as InterruptIndex,
1729             })
1730             .map_err(DeviceManagerError::CreateInterruptGroup)?;
1731 
1732         let serial = Arc::new(Mutex::new(devices::legacy::Pl011::new(
1733             id.clone(),
1734             interrupt_group,
1735             serial_writer,
1736             self.timestamp,
1737         )));
1738 
1739         self.bus_devices
1740             .push(Arc::clone(&serial) as Arc<Mutex<dyn BusDevice>>);
1741 
1742         let addr = arch::layout::LEGACY_SERIAL_MAPPED_IO_START;
1743 
1744         self.address_manager
1745             .mmio_bus
1746             .insert(serial.clone(), addr.0, MMIO_LEN)
1747             .map_err(DeviceManagerError::BusError)?;
1748 
1749         self.id_to_dev_info.insert(
1750             (DeviceType::Serial, DeviceType::Serial.to_string()),
1751             MmioDeviceInfo {
1752                 addr: addr.0,
1753                 len: MMIO_LEN,
1754                 irq: serial_irq,
1755             },
1756         );
1757 
1758         self.cmdline_additions
1759             .push(format!("earlycon=pl011,mmio,0x{:08x}", addr.0));
1760 
1761         // Fill the device tree with a new node. In case of restore, we
1762         // know there is nothing to do, so we can simply override the
1763         // existing entry.
1764         self.device_tree
1765             .lock()
1766             .unwrap()
1767             .insert(id.clone(), device_node!(id, serial));
1768 
1769         Ok(serial)
1770     }
1771 
1772     fn modify_mode<F: FnOnce(&mut termios)>(
1773         &self,
1774         fd: RawFd,
1775         f: F,
1776     ) -> vmm_sys_util::errno::Result<()> {
1777         // SAFETY: safe because we check the return value of isatty.
1778         if unsafe { isatty(fd) } != 1 {
1779             return Ok(());
1780         }
1781 
1782         // SAFETY: The following pair are safe because termios gets totally overwritten by tcgetattr
1783         // and we check the return result.
1784         let mut termios: termios = unsafe { zeroed() };
1785         let ret = unsafe { tcgetattr(fd, &mut termios as *mut _) };
1786         if ret < 0 {
1787             return vmm_sys_util::errno::errno_result();
1788         }
1789         f(&mut termios);
1790         // SAFETY: Safe because the syscall will only read the extent of termios and we check
1791         // the return result.
1792         let ret = unsafe { tcsetattr(fd, TCSANOW, &termios as *const _) };
1793         if ret < 0 {
1794             return vmm_sys_util::errno::errno_result();
1795         }
1796 
1797         Ok(())
1798     }
1799 
1800     fn set_raw_mode(&self, f: &mut File) -> vmm_sys_util::errno::Result<()> {
1801         // SAFETY: FFI call. Variable t is guaranteed to be a valid termios from modify_mode.
1802         self.modify_mode(f.as_raw_fd(), |t| unsafe { cfmakeraw(t) })
1803     }
1804 
1805     fn listen_for_sigwinch_on_tty(&mut self, pty: &File) -> std::io::Result<()> {
1806         let seccomp_filter =
1807             get_seccomp_filter(&self.seccomp_action, Thread::PtyForeground).unwrap();
1808 
1809         match start_sigwinch_listener(seccomp_filter, pty) {
1810             Ok(pipe) => {
1811                 self.console_resize_pipe = Some(Arc::new(pipe));
1812             }
1813             Err(e) => {
1814                 warn!("Ignoring error from setting up SIGWINCH listener: {}", e)
1815             }
1816         }
1817 
1818         Ok(())
1819     }
1820 
1821     fn add_virtio_console_device(
1822         &mut self,
1823         virtio_devices: &mut Vec<MetaVirtioDevice>,
1824         console_pty: Option<PtyPair>,
1825         resize_pipe: Option<File>,
1826     ) -> DeviceManagerResult<Option<Arc<virtio_devices::ConsoleResizer>>> {
1827         let console_config = self.config.lock().unwrap().console.clone();
1828         let endpoint = match console_config.mode {
1829             ConsoleOutputMode::File => {
1830                 let file = File::create(console_config.file.as_ref().unwrap())
1831                     .map_err(DeviceManagerError::ConsoleOutputFileOpen)?;
1832                 Endpoint::File(file)
1833             }
1834             ConsoleOutputMode::Pty => {
1835                 if let Some(pty) = console_pty {
1836                     self.config.lock().unwrap().console.file = Some(pty.path.clone());
1837                     let file = pty.main.try_clone().unwrap();
1838                     self.console_pty = Some(Arc::new(Mutex::new(pty)));
1839                     self.console_resize_pipe = resize_pipe.map(Arc::new);
1840                     Endpoint::FilePair(file.try_clone().unwrap(), file)
1841                 } else {
1842                     let (main, mut sub, path) =
1843                         create_pty(false).map_err(DeviceManagerError::ConsolePtyOpen)?;
1844                     self.set_raw_mode(&mut sub)
1845                         .map_err(DeviceManagerError::SetPtyRaw)?;
1846                     self.config.lock().unwrap().console.file = Some(path.clone());
1847                     let file = main.try_clone().unwrap();
1848                     assert!(resize_pipe.is_none());
1849                     self.listen_for_sigwinch_on_tty(&sub).unwrap();
1850                     self.console_pty = Some(Arc::new(Mutex::new(PtyPair { main, sub, path })));
1851                     Endpoint::FilePair(file.try_clone().unwrap(), file)
1852                 }
1853             }
1854             ConsoleOutputMode::Tty => {
1855                 // Duplicating the file descriptors like this is needed as otherwise
1856                 // they will be closed on a reboot and the numbers reused
1857 
1858                 // SAFETY: FFI call to dup. Trivially safe.
1859                 let stdout = unsafe { libc::dup(libc::STDOUT_FILENO) };
1860                 if stdout == -1 {
1861                     return vmm_sys_util::errno::errno_result().map_err(DeviceManagerError::DupFd);
1862                 }
1863                 // SAFETY: stdout is valid and owned solely by us.
1864                 let stdout = unsafe { File::from_raw_fd(stdout) };
1865 
1866                 // If an interactive TTY then we can accept input
1867                 // SAFETY: FFI call. Trivially safe.
1868                 if unsafe { libc::isatty(libc::STDIN_FILENO) == 1 } {
1869                     // SAFETY: FFI call to dup. Trivially safe.
1870                     let stdin = unsafe { libc::dup(libc::STDIN_FILENO) };
1871                     if stdin == -1 {
1872                         return vmm_sys_util::errno::errno_result()
1873                             .map_err(DeviceManagerError::DupFd);
1874                     }
1875                     // SAFETY: stdin is valid and owned solely by us.
1876                     let stdin = unsafe { File::from_raw_fd(stdin) };
1877 
1878                     Endpoint::FilePair(stdout, stdin)
1879                 } else {
1880                     Endpoint::File(stdout)
1881                 }
1882             }
1883             ConsoleOutputMode::Null => Endpoint::Null,
1884             ConsoleOutputMode::Off => return Ok(None),
1885         };
1886         let id = String::from(CONSOLE_DEVICE_NAME);
1887 
1888         let (virtio_console_device, console_resizer) = virtio_devices::Console::new(
1889             id.clone(),
1890             endpoint,
1891             self.console_resize_pipe
1892                 .as_ref()
1893                 .map(|p| p.try_clone().unwrap()),
1894             self.force_iommu | console_config.iommu,
1895             self.seccomp_action.clone(),
1896             self.exit_evt
1897                 .try_clone()
1898                 .map_err(DeviceManagerError::EventFd)?,
1899         )
1900         .map_err(DeviceManagerError::CreateVirtioConsole)?;
1901         let virtio_console_device = Arc::new(Mutex::new(virtio_console_device));
1902         virtio_devices.push(MetaVirtioDevice {
1903             virtio_device: Arc::clone(&virtio_console_device)
1904                 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
1905             iommu: console_config.iommu,
1906             id: id.clone(),
1907             pci_segment: 0,
1908             dma_handler: None,
1909         });
1910 
1911         // Fill the device tree with a new node. In case of restore, we
1912         // know there is nothing to do, so we can simply override the
1913         // existing entry.
1914         self.device_tree
1915             .lock()
1916             .unwrap()
1917             .insert(id.clone(), device_node!(id, virtio_console_device));
1918 
1919         // Only provide a resizer (for SIGWINCH handling) if the console is attached to the TTY
1920         Ok(if matches!(console_config.mode, ConsoleOutputMode::Tty) {
1921             Some(console_resizer)
1922         } else {
1923             None
1924         })
1925     }
1926 
1927     fn add_console_device(
1928         &mut self,
1929         interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>,
1930         virtio_devices: &mut Vec<MetaVirtioDevice>,
1931         serial_pty: Option<PtyPair>,
1932         console_pty: Option<PtyPair>,
1933         console_resize_pipe: Option<File>,
1934     ) -> DeviceManagerResult<Arc<Console>> {
1935         let serial_config = self.config.lock().unwrap().serial.clone();
1936         let serial_writer: Option<Box<dyn io::Write + Send>> = match serial_config.mode {
1937             ConsoleOutputMode::File => Some(Box::new(
1938                 File::create(serial_config.file.as_ref().unwrap())
1939                     .map_err(DeviceManagerError::SerialOutputFileOpen)?,
1940             )),
1941             ConsoleOutputMode::Pty => {
1942                 if let Some(pty) = serial_pty {
1943                     self.config.lock().unwrap().serial.file = Some(pty.path.clone());
1944                     self.serial_pty = Some(Arc::new(Mutex::new(pty)));
1945                 } else {
1946                     let (main, mut sub, path) =
1947                         create_pty(true).map_err(DeviceManagerError::SerialPtyOpen)?;
1948                     self.set_raw_mode(&mut sub)
1949                         .map_err(DeviceManagerError::SetPtyRaw)?;
1950                     self.config.lock().unwrap().serial.file = Some(path.clone());
1951                     self.serial_pty = Some(Arc::new(Mutex::new(PtyPair { main, sub, path })));
1952                 }
1953                 None
1954             }
1955             ConsoleOutputMode::Tty => Some(Box::new(stdout())),
1956             ConsoleOutputMode::Off | ConsoleOutputMode::Null => None,
1957         };
1958         if serial_config.mode != ConsoleOutputMode::Off {
1959             let serial = self.add_serial_device(interrupt_manager, serial_writer)?;
1960             self.serial_manager = match serial_config.mode {
1961                 ConsoleOutputMode::Pty | ConsoleOutputMode::Tty => {
1962                     let serial_manager =
1963                         SerialManager::new(serial, self.serial_pty.clone(), serial_config.mode)
1964                             .map_err(DeviceManagerError::CreateSerialManager)?;
1965                     if let Some(mut serial_manager) = serial_manager {
1966                         serial_manager
1967                             .start_thread(
1968                                 self.exit_evt
1969                                     .try_clone()
1970                                     .map_err(DeviceManagerError::EventFd)?,
1971                             )
1972                             .map_err(DeviceManagerError::SpawnSerialManager)?;
1973                         Some(Arc::new(serial_manager))
1974                     } else {
1975                         None
1976                     }
1977                 }
1978                 _ => None,
1979             };
1980         }
1981 
1982         let console_resizer =
1983             self.add_virtio_console_device(virtio_devices, console_pty, console_resize_pipe)?;
1984 
1985         Ok(Arc::new(Console { console_resizer }))
1986     }
1987 
1988     fn make_virtio_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
1989         let mut devices: Vec<MetaVirtioDevice> = Vec::new();
1990 
1991         // Create "standard" virtio devices (net/block/rng)
1992         devices.append(&mut self.make_virtio_block_devices()?);
1993         devices.append(&mut self.make_virtio_net_devices()?);
1994         devices.append(&mut self.make_virtio_rng_devices()?);
1995 
1996         // Add virtio-fs if required
1997         devices.append(&mut self.make_virtio_fs_devices()?);
1998 
1999         // Add virtio-pmem if required
2000         devices.append(&mut self.make_virtio_pmem_devices()?);
2001 
2002         // Add virtio-vsock if required
2003         devices.append(&mut self.make_virtio_vsock_devices()?);
2004 
2005         devices.append(&mut self.make_virtio_mem_devices()?);
2006 
2007         // Add virtio-balloon if required
2008         devices.append(&mut self.make_virtio_balloon_devices()?);
2009 
2010         // Add virtio-watchdog device
2011         devices.append(&mut self.make_virtio_watchdog_devices()?);
2012 
2013         // Add vDPA devices if required
2014         devices.append(&mut self.make_vdpa_devices()?);
2015 
2016         Ok(devices)
2017     }
2018 
2019     // Cache whether io_uring is supported to avoid probing for very block device
2020     fn io_uring_is_supported(&mut self) -> bool {
2021         if let Some(supported) = self.io_uring_supported {
2022             return supported;
2023         }
2024 
2025         let supported = block_io_uring_is_supported();
2026         self.io_uring_supported = Some(supported);
2027         supported
2028     }
2029 
2030     fn make_virtio_block_device(
2031         &mut self,
2032         disk_cfg: &mut DiskConfig,
2033     ) -> DeviceManagerResult<MetaVirtioDevice> {
2034         let id = if let Some(id) = &disk_cfg.id {
2035             id.clone()
2036         } else {
2037             let id = self.next_device_name(DISK_DEVICE_NAME_PREFIX)?;
2038             disk_cfg.id = Some(id.clone());
2039             id
2040         };
2041 
2042         info!("Creating virtio-block device: {:?}", disk_cfg);
2043 
2044         let (virtio_device, migratable_device) = if disk_cfg.vhost_user {
2045             let socket = disk_cfg.vhost_socket.as_ref().unwrap().clone();
2046             let vu_cfg = VhostUserConfig {
2047                 socket,
2048                 num_queues: disk_cfg.num_queues,
2049                 queue_size: disk_cfg.queue_size,
2050             };
2051             let vhost_user_block = Arc::new(Mutex::new(
2052                 match virtio_devices::vhost_user::Blk::new(
2053                     id.clone(),
2054                     vu_cfg,
2055                     self.restoring,
2056                     self.seccomp_action.clone(),
2057                     self.exit_evt
2058                         .try_clone()
2059                         .map_err(DeviceManagerError::EventFd)?,
2060                     self.force_iommu,
2061                 ) {
2062                     Ok(vub_device) => vub_device,
2063                     Err(e) => {
2064                         return Err(DeviceManagerError::CreateVhostUserBlk(e));
2065                     }
2066                 },
2067             ));
2068 
2069             (
2070                 Arc::clone(&vhost_user_block) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2071                 vhost_user_block as Arc<Mutex<dyn Migratable>>,
2072             )
2073         } else {
2074             let mut options = OpenOptions::new();
2075             options.read(true);
2076             options.write(!disk_cfg.readonly);
2077             if disk_cfg.direct {
2078                 options.custom_flags(libc::O_DIRECT);
2079             }
2080             // Open block device path
2081             let mut file: File = options
2082                 .open(
2083                     disk_cfg
2084                         .path
2085                         .as_ref()
2086                         .ok_or(DeviceManagerError::NoDiskPath)?
2087                         .clone(),
2088                 )
2089                 .map_err(DeviceManagerError::Disk)?;
2090             let image_type =
2091                 detect_image_type(&mut file).map_err(DeviceManagerError::DetectImageType)?;
2092 
2093             let image = match image_type {
2094                 ImageType::FixedVhd => {
2095                     // Use asynchronous backend relying on io_uring if the
2096                     // syscalls are supported.
2097                     if self.io_uring_is_supported() && !disk_cfg.disable_io_uring {
2098                         info!("Using asynchronous fixed VHD disk file (io_uring)");
2099                         Box::new(
2100                             FixedVhdDiskAsync::new(file)
2101                                 .map_err(DeviceManagerError::CreateFixedVhdDiskAsync)?,
2102                         ) as Box<dyn DiskFile>
2103                     } else {
2104                         info!("Using synchronous fixed VHD disk file");
2105                         Box::new(
2106                             FixedVhdDiskSync::new(file)
2107                                 .map_err(DeviceManagerError::CreateFixedVhdDiskSync)?,
2108                         ) as Box<dyn DiskFile>
2109                     }
2110                 }
2111                 ImageType::Raw => {
2112                     // Use asynchronous backend relying on io_uring if the
2113                     // syscalls are supported.
2114                     if self.io_uring_is_supported() && !disk_cfg.disable_io_uring {
2115                         info!("Using asynchronous RAW disk file (io_uring)");
2116                         Box::new(RawFileDisk::new(file)) as Box<dyn DiskFile>
2117                     } else {
2118                         info!("Using synchronous RAW disk file");
2119                         Box::new(RawFileDiskSync::new(file)) as Box<dyn DiskFile>
2120                     }
2121                 }
2122                 ImageType::Qcow2 => {
2123                     info!("Using synchronous QCOW disk file");
2124                     Box::new(
2125                         QcowDiskSync::new(file, disk_cfg.direct)
2126                             .map_err(DeviceManagerError::CreateQcowDiskSync)?,
2127                     ) as Box<dyn DiskFile>
2128                 }
2129                 ImageType::Vhdx => {
2130                     info!("Using synchronous VHDX disk file");
2131                     Box::new(
2132                         VhdxDiskSync::new(file)
2133                             .map_err(DeviceManagerError::CreateFixedVhdxDiskSync)?,
2134                     ) as Box<dyn DiskFile>
2135                 }
2136             };
2137 
2138             let virtio_block = Arc::new(Mutex::new(
2139                 virtio_devices::Block::new(
2140                     id.clone(),
2141                     image,
2142                     disk_cfg
2143                         .path
2144                         .as_ref()
2145                         .ok_or(DeviceManagerError::NoDiskPath)?
2146                         .clone(),
2147                     disk_cfg.readonly,
2148                     self.force_iommu | disk_cfg.iommu,
2149                     disk_cfg.num_queues,
2150                     disk_cfg.queue_size,
2151                     self.seccomp_action.clone(),
2152                     disk_cfg.rate_limiter_config,
2153                     self.exit_evt
2154                         .try_clone()
2155                         .map_err(DeviceManagerError::EventFd)?,
2156                 )
2157                 .map_err(DeviceManagerError::CreateVirtioBlock)?,
2158             ));
2159 
2160             (
2161                 Arc::clone(&virtio_block) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2162                 virtio_block as Arc<Mutex<dyn Migratable>>,
2163             )
2164         };
2165 
2166         // Fill the device tree with a new node. In case of restore, we
2167         // know there is nothing to do, so we can simply override the
2168         // existing entry.
2169         self.device_tree
2170             .lock()
2171             .unwrap()
2172             .insert(id.clone(), device_node!(id, migratable_device));
2173 
2174         Ok(MetaVirtioDevice {
2175             virtio_device,
2176             iommu: disk_cfg.iommu,
2177             id,
2178             pci_segment: disk_cfg.pci_segment,
2179             dma_handler: None,
2180         })
2181     }
2182 
2183     fn make_virtio_block_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
2184         let mut devices = Vec::new();
2185 
2186         let mut block_devices = self.config.lock().unwrap().disks.clone();
2187         if let Some(disk_list_cfg) = &mut block_devices {
2188             for disk_cfg in disk_list_cfg.iter_mut() {
2189                 devices.push(self.make_virtio_block_device(disk_cfg)?);
2190             }
2191         }
2192         self.config.lock().unwrap().disks = block_devices;
2193 
2194         Ok(devices)
2195     }
2196 
2197     fn make_virtio_net_device(
2198         &mut self,
2199         net_cfg: &mut NetConfig,
2200     ) -> DeviceManagerResult<MetaVirtioDevice> {
2201         let id = if let Some(id) = &net_cfg.id {
2202             id.clone()
2203         } else {
2204             let id = self.next_device_name(NET_DEVICE_NAME_PREFIX)?;
2205             net_cfg.id = Some(id.clone());
2206             id
2207         };
2208         info!("Creating virtio-net device: {:?}", net_cfg);
2209 
2210         let (virtio_device, migratable_device) = if net_cfg.vhost_user {
2211             let socket = net_cfg.vhost_socket.as_ref().unwrap().clone();
2212             let vu_cfg = VhostUserConfig {
2213                 socket,
2214                 num_queues: net_cfg.num_queues,
2215                 queue_size: net_cfg.queue_size,
2216             };
2217             let server = match net_cfg.vhost_mode {
2218                 VhostMode::Client => false,
2219                 VhostMode::Server => true,
2220             };
2221             let vhost_user_net = Arc::new(Mutex::new(
2222                 match virtio_devices::vhost_user::Net::new(
2223                     id.clone(),
2224                     net_cfg.mac,
2225                     vu_cfg,
2226                     server,
2227                     self.seccomp_action.clone(),
2228                     self.restoring,
2229                     self.exit_evt
2230                         .try_clone()
2231                         .map_err(DeviceManagerError::EventFd)?,
2232                     self.force_iommu,
2233                 ) {
2234                     Ok(vun_device) => vun_device,
2235                     Err(e) => {
2236                         return Err(DeviceManagerError::CreateVhostUserNet(e));
2237                     }
2238                 },
2239             ));
2240 
2241             (
2242                 Arc::clone(&vhost_user_net) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2243                 vhost_user_net as Arc<Mutex<dyn Migratable>>,
2244             )
2245         } else {
2246             let virtio_net = if let Some(ref tap_if_name) = net_cfg.tap {
2247                 Arc::new(Mutex::new(
2248                     virtio_devices::Net::new(
2249                         id.clone(),
2250                         Some(tap_if_name),
2251                         None,
2252                         None,
2253                         Some(net_cfg.mac),
2254                         &mut net_cfg.host_mac,
2255                         self.force_iommu | net_cfg.iommu,
2256                         net_cfg.num_queues,
2257                         net_cfg.queue_size,
2258                         self.seccomp_action.clone(),
2259                         net_cfg.rate_limiter_config,
2260                         self.exit_evt
2261                             .try_clone()
2262                             .map_err(DeviceManagerError::EventFd)?,
2263                     )
2264                     .map_err(DeviceManagerError::CreateVirtioNet)?,
2265                 ))
2266             } else if let Some(fds) = &net_cfg.fds {
2267                 Arc::new(Mutex::new(
2268                     virtio_devices::Net::from_tap_fds(
2269                         id.clone(),
2270                         fds,
2271                         Some(net_cfg.mac),
2272                         self.force_iommu | net_cfg.iommu,
2273                         net_cfg.queue_size,
2274                         self.seccomp_action.clone(),
2275                         net_cfg.rate_limiter_config,
2276                         self.exit_evt
2277                             .try_clone()
2278                             .map_err(DeviceManagerError::EventFd)?,
2279                     )
2280                     .map_err(DeviceManagerError::CreateVirtioNet)?,
2281                 ))
2282             } else {
2283                 Arc::new(Mutex::new(
2284                     virtio_devices::Net::new(
2285                         id.clone(),
2286                         None,
2287                         Some(net_cfg.ip),
2288                         Some(net_cfg.mask),
2289                         Some(net_cfg.mac),
2290                         &mut net_cfg.host_mac,
2291                         self.force_iommu | net_cfg.iommu,
2292                         net_cfg.num_queues,
2293                         net_cfg.queue_size,
2294                         self.seccomp_action.clone(),
2295                         net_cfg.rate_limiter_config,
2296                         self.exit_evt
2297                             .try_clone()
2298                             .map_err(DeviceManagerError::EventFd)?,
2299                     )
2300                     .map_err(DeviceManagerError::CreateVirtioNet)?,
2301                 ))
2302             };
2303 
2304             (
2305                 Arc::clone(&virtio_net) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2306                 virtio_net as Arc<Mutex<dyn Migratable>>,
2307             )
2308         };
2309 
2310         // Fill the device tree with a new node. In case of restore, we
2311         // know there is nothing to do, so we can simply override the
2312         // existing entry.
2313         self.device_tree
2314             .lock()
2315             .unwrap()
2316             .insert(id.clone(), device_node!(id, migratable_device));
2317 
2318         Ok(MetaVirtioDevice {
2319             virtio_device,
2320             iommu: net_cfg.iommu,
2321             id,
2322             pci_segment: net_cfg.pci_segment,
2323             dma_handler: None,
2324         })
2325     }
2326 
2327     /// Add virto-net and vhost-user-net devices
2328     fn make_virtio_net_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
2329         let mut devices = Vec::new();
2330         let mut net_devices = self.config.lock().unwrap().net.clone();
2331         if let Some(net_list_cfg) = &mut net_devices {
2332             for net_cfg in net_list_cfg.iter_mut() {
2333                 devices.push(self.make_virtio_net_device(net_cfg)?);
2334             }
2335         }
2336         self.config.lock().unwrap().net = net_devices;
2337 
2338         Ok(devices)
2339     }
2340 
2341     fn make_virtio_rng_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
2342         let mut devices = Vec::new();
2343 
2344         // Add virtio-rng if required
2345         let rng_config = self.config.lock().unwrap().rng.clone();
2346         if let Some(rng_path) = rng_config.src.to_str() {
2347             info!("Creating virtio-rng device: {:?}", rng_config);
2348             let id = String::from(RNG_DEVICE_NAME);
2349 
2350             let virtio_rng_device = Arc::new(Mutex::new(
2351                 virtio_devices::Rng::new(
2352                     id.clone(),
2353                     rng_path,
2354                     self.force_iommu | rng_config.iommu,
2355                     self.seccomp_action.clone(),
2356                     self.exit_evt
2357                         .try_clone()
2358                         .map_err(DeviceManagerError::EventFd)?,
2359                 )
2360                 .map_err(DeviceManagerError::CreateVirtioRng)?,
2361             ));
2362             devices.push(MetaVirtioDevice {
2363                 virtio_device: Arc::clone(&virtio_rng_device)
2364                     as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2365                 iommu: rng_config.iommu,
2366                 id: id.clone(),
2367                 pci_segment: 0,
2368                 dma_handler: None,
2369             });
2370 
2371             // Fill the device tree with a new node. In case of restore, we
2372             // know there is nothing to do, so we can simply override the
2373             // existing entry.
2374             self.device_tree
2375                 .lock()
2376                 .unwrap()
2377                 .insert(id.clone(), device_node!(id, virtio_rng_device));
2378         }
2379 
2380         Ok(devices)
2381     }
2382 
2383     fn make_virtio_fs_device(
2384         &mut self,
2385         fs_cfg: &mut FsConfig,
2386     ) -> DeviceManagerResult<MetaVirtioDevice> {
2387         let id = if let Some(id) = &fs_cfg.id {
2388             id.clone()
2389         } else {
2390             let id = self.next_device_name(FS_DEVICE_NAME_PREFIX)?;
2391             fs_cfg.id = Some(id.clone());
2392             id
2393         };
2394 
2395         info!("Creating virtio-fs device: {:?}", fs_cfg);
2396 
2397         let mut node = device_node!(id);
2398 
2399         if let Some(fs_socket) = fs_cfg.socket.to_str() {
2400             let virtio_fs_device = Arc::new(Mutex::new(
2401                 virtio_devices::vhost_user::Fs::new(
2402                     id.clone(),
2403                     fs_socket,
2404                     &fs_cfg.tag,
2405                     fs_cfg.num_queues,
2406                     fs_cfg.queue_size,
2407                     None,
2408                     self.seccomp_action.clone(),
2409                     self.restoring,
2410                     self.exit_evt
2411                         .try_clone()
2412                         .map_err(DeviceManagerError::EventFd)?,
2413                     self.force_iommu,
2414                 )
2415                 .map_err(DeviceManagerError::CreateVirtioFs)?,
2416             ));
2417 
2418             // Update the device tree with the migratable device.
2419             node.migratable = Some(Arc::clone(&virtio_fs_device) as Arc<Mutex<dyn Migratable>>);
2420             self.device_tree.lock().unwrap().insert(id.clone(), node);
2421 
2422             Ok(MetaVirtioDevice {
2423                 virtio_device: Arc::clone(&virtio_fs_device)
2424                     as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2425                 iommu: false,
2426                 id,
2427                 pci_segment: fs_cfg.pci_segment,
2428                 dma_handler: None,
2429             })
2430         } else {
2431             Err(DeviceManagerError::NoVirtioFsSock)
2432         }
2433     }
2434 
2435     fn make_virtio_fs_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
2436         let mut devices = Vec::new();
2437 
2438         let mut fs_devices = self.config.lock().unwrap().fs.clone();
2439         if let Some(fs_list_cfg) = &mut fs_devices {
2440             for fs_cfg in fs_list_cfg.iter_mut() {
2441                 devices.push(self.make_virtio_fs_device(fs_cfg)?);
2442             }
2443         }
2444         self.config.lock().unwrap().fs = fs_devices;
2445 
2446         Ok(devices)
2447     }
2448 
2449     fn make_virtio_pmem_device(
2450         &mut self,
2451         pmem_cfg: &mut PmemConfig,
2452     ) -> DeviceManagerResult<MetaVirtioDevice> {
2453         let id = if let Some(id) = &pmem_cfg.id {
2454             id.clone()
2455         } else {
2456             let id = self.next_device_name(PMEM_DEVICE_NAME_PREFIX)?;
2457             pmem_cfg.id = Some(id.clone());
2458             id
2459         };
2460 
2461         info!("Creating virtio-pmem device: {:?}", pmem_cfg);
2462 
2463         let mut node = device_node!(id);
2464 
2465         // Look for the id in the device tree. If it can be found, that means
2466         // the device is being restored, otherwise it's created from scratch.
2467         let region_range = if let Some(node) = self.device_tree.lock().unwrap().get(&id) {
2468             info!("Restoring virtio-pmem {} resources", id);
2469 
2470             let mut region_range: Option<(u64, u64)> = None;
2471             for resource in node.resources.iter() {
2472                 match resource {
2473                     Resource::MmioAddressRange { base, size } => {
2474                         if region_range.is_some() {
2475                             return Err(DeviceManagerError::ResourceAlreadyExists);
2476                         }
2477 
2478                         region_range = Some((*base, *size));
2479                     }
2480                     _ => {
2481                         error!("Unexpected resource {:?} for {}", resource, id);
2482                     }
2483                 }
2484             }
2485 
2486             if region_range.is_none() {
2487                 return Err(DeviceManagerError::MissingVirtioPmemResources);
2488             }
2489 
2490             region_range
2491         } else {
2492             None
2493         };
2494 
2495         let (custom_flags, set_len) = if pmem_cfg.file.is_dir() {
2496             if pmem_cfg.size.is_none() {
2497                 return Err(DeviceManagerError::PmemWithDirectorySizeMissing);
2498             }
2499             (O_TMPFILE, true)
2500         } else {
2501             (0, false)
2502         };
2503 
2504         let mut file = OpenOptions::new()
2505             .read(true)
2506             .write(!pmem_cfg.discard_writes)
2507             .custom_flags(custom_flags)
2508             .open(&pmem_cfg.file)
2509             .map_err(DeviceManagerError::PmemFileOpen)?;
2510 
2511         let size = if let Some(size) = pmem_cfg.size {
2512             if set_len {
2513                 file.set_len(size)
2514                     .map_err(DeviceManagerError::PmemFileSetLen)?;
2515             }
2516             size
2517         } else {
2518             file.seek(SeekFrom::End(0))
2519                 .map_err(DeviceManagerError::PmemFileSetLen)?
2520         };
2521 
2522         if size % 0x20_0000 != 0 {
2523             return Err(DeviceManagerError::PmemSizeNotAligned);
2524         }
2525 
2526         let (region_base, region_size) = if let Some((base, size)) = region_range {
2527             // The memory needs to be 2MiB aligned in order to support
2528             // hugepages.
2529             self.pci_segments[pmem_cfg.pci_segment as usize]
2530                 .allocator
2531                 .lock()
2532                 .unwrap()
2533                 .allocate(
2534                     Some(GuestAddress(base)),
2535                     size as GuestUsize,
2536                     Some(0x0020_0000),
2537                 )
2538                 .ok_or(DeviceManagerError::PmemRangeAllocation)?;
2539 
2540             (base, size)
2541         } else {
2542             // The memory needs to be 2MiB aligned in order to support
2543             // hugepages.
2544             let base = self.pci_segments[pmem_cfg.pci_segment as usize]
2545                 .allocator
2546                 .lock()
2547                 .unwrap()
2548                 .allocate(None, size as GuestUsize, Some(0x0020_0000))
2549                 .ok_or(DeviceManagerError::PmemRangeAllocation)?;
2550 
2551             (base.raw_value(), size)
2552         };
2553 
2554         let cloned_file = file.try_clone().map_err(DeviceManagerError::CloneFile)?;
2555         let mmap_region = MmapRegion::build(
2556             Some(FileOffset::new(cloned_file, 0)),
2557             region_size as usize,
2558             PROT_READ | PROT_WRITE,
2559             MAP_NORESERVE
2560                 | if pmem_cfg.discard_writes {
2561                     MAP_PRIVATE
2562                 } else {
2563                     MAP_SHARED
2564                 },
2565         )
2566         .map_err(DeviceManagerError::NewMmapRegion)?;
2567         let host_addr: u64 = mmap_region.as_ptr() as u64;
2568 
2569         let mem_slot = self
2570             .memory_manager
2571             .lock()
2572             .unwrap()
2573             .create_userspace_mapping(region_base, region_size, host_addr, false, false, false)
2574             .map_err(DeviceManagerError::MemoryManager)?;
2575 
2576         let mapping = virtio_devices::UserspaceMapping {
2577             host_addr,
2578             mem_slot,
2579             addr: GuestAddress(region_base),
2580             len: region_size,
2581             mergeable: false,
2582         };
2583 
2584         let virtio_pmem_device = Arc::new(Mutex::new(
2585             virtio_devices::Pmem::new(
2586                 id.clone(),
2587                 file,
2588                 GuestAddress(region_base),
2589                 mapping,
2590                 mmap_region,
2591                 self.force_iommu | pmem_cfg.iommu,
2592                 self.seccomp_action.clone(),
2593                 self.exit_evt
2594                     .try_clone()
2595                     .map_err(DeviceManagerError::EventFd)?,
2596             )
2597             .map_err(DeviceManagerError::CreateVirtioPmem)?,
2598         ));
2599 
2600         // Update the device tree with correct resource information and with
2601         // the migratable device.
2602         node.resources.push(Resource::MmioAddressRange {
2603             base: region_base,
2604             size: region_size,
2605         });
2606         node.migratable = Some(Arc::clone(&virtio_pmem_device) as Arc<Mutex<dyn Migratable>>);
2607         self.device_tree.lock().unwrap().insert(id.clone(), node);
2608 
2609         Ok(MetaVirtioDevice {
2610             virtio_device: Arc::clone(&virtio_pmem_device)
2611                 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2612             iommu: pmem_cfg.iommu,
2613             id,
2614             pci_segment: pmem_cfg.pci_segment,
2615             dma_handler: None,
2616         })
2617     }
2618 
2619     fn make_virtio_pmem_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
2620         let mut devices = Vec::new();
2621         // Add virtio-pmem if required
2622         let mut pmem_devices = self.config.lock().unwrap().pmem.clone();
2623         if let Some(pmem_list_cfg) = &mut pmem_devices {
2624             for pmem_cfg in pmem_list_cfg.iter_mut() {
2625                 devices.push(self.make_virtio_pmem_device(pmem_cfg)?);
2626             }
2627         }
2628         self.config.lock().unwrap().pmem = pmem_devices;
2629 
2630         Ok(devices)
2631     }
2632 
2633     fn make_virtio_vsock_device(
2634         &mut self,
2635         vsock_cfg: &mut VsockConfig,
2636     ) -> DeviceManagerResult<MetaVirtioDevice> {
2637         let id = if let Some(id) = &vsock_cfg.id {
2638             id.clone()
2639         } else {
2640             let id = self.next_device_name(VSOCK_DEVICE_NAME_PREFIX)?;
2641             vsock_cfg.id = Some(id.clone());
2642             id
2643         };
2644 
2645         info!("Creating virtio-vsock device: {:?}", vsock_cfg);
2646 
2647         let socket_path = vsock_cfg
2648             .socket
2649             .to_str()
2650             .ok_or(DeviceManagerError::CreateVsockConvertPath)?;
2651         let backend =
2652             virtio_devices::vsock::VsockUnixBackend::new(vsock_cfg.cid, socket_path.to_string())
2653                 .map_err(DeviceManagerError::CreateVsockBackend)?;
2654 
2655         let vsock_device = Arc::new(Mutex::new(
2656             virtio_devices::Vsock::new(
2657                 id.clone(),
2658                 vsock_cfg.cid,
2659                 vsock_cfg.socket.clone(),
2660                 backend,
2661                 self.force_iommu | vsock_cfg.iommu,
2662                 self.seccomp_action.clone(),
2663                 self.exit_evt
2664                     .try_clone()
2665                     .map_err(DeviceManagerError::EventFd)?,
2666             )
2667             .map_err(DeviceManagerError::CreateVirtioVsock)?,
2668         ));
2669 
2670         // Fill the device tree with a new node. In case of restore, we
2671         // know there is nothing to do, so we can simply override the
2672         // existing entry.
2673         self.device_tree
2674             .lock()
2675             .unwrap()
2676             .insert(id.clone(), device_node!(id, vsock_device));
2677 
2678         Ok(MetaVirtioDevice {
2679             virtio_device: Arc::clone(&vsock_device)
2680                 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2681             iommu: vsock_cfg.iommu,
2682             id,
2683             pci_segment: vsock_cfg.pci_segment,
2684             dma_handler: None,
2685         })
2686     }
2687 
2688     fn make_virtio_vsock_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
2689         let mut devices = Vec::new();
2690 
2691         let mut vsock = self.config.lock().unwrap().vsock.clone();
2692         if let Some(ref mut vsock_cfg) = &mut vsock {
2693             devices.push(self.make_virtio_vsock_device(vsock_cfg)?);
2694         }
2695         self.config.lock().unwrap().vsock = vsock;
2696 
2697         Ok(devices)
2698     }
2699 
2700     fn make_virtio_mem_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
2701         let mut devices = Vec::new();
2702 
2703         let mm = self.memory_manager.clone();
2704         let mm = mm.lock().unwrap();
2705         for (memory_zone_id, memory_zone) in mm.memory_zones().iter() {
2706             if let Some(virtio_mem_zone) = memory_zone.virtio_mem_zone() {
2707                 info!("Creating virtio-mem device: id = {}", memory_zone_id);
2708 
2709                 let node_id = numa_node_id_from_memory_zone_id(&self.numa_nodes, memory_zone_id)
2710                     .map(|i| i as u16);
2711 
2712                 let virtio_mem_device = Arc::new(Mutex::new(
2713                     virtio_devices::Mem::new(
2714                         memory_zone_id.clone(),
2715                         virtio_mem_zone.region(),
2716                         virtio_mem_zone
2717                             .resize_handler()
2718                             .new_resize_sender()
2719                             .map_err(DeviceManagerError::CreateResizeSender)?,
2720                         self.seccomp_action.clone(),
2721                         node_id,
2722                         virtio_mem_zone.hotplugged_size(),
2723                         virtio_mem_zone.hugepages(),
2724                         self.exit_evt
2725                             .try_clone()
2726                             .map_err(DeviceManagerError::EventFd)?,
2727                         virtio_mem_zone.blocks_state().clone(),
2728                     )
2729                     .map_err(DeviceManagerError::CreateVirtioMem)?,
2730                 ));
2731 
2732                 self.virtio_mem_devices.push(Arc::clone(&virtio_mem_device));
2733 
2734                 devices.push(MetaVirtioDevice {
2735                     virtio_device: Arc::clone(&virtio_mem_device)
2736                         as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2737                     iommu: false,
2738                     id: memory_zone_id.clone(),
2739                     pci_segment: 0,
2740                     dma_handler: None,
2741                 });
2742 
2743                 // Fill the device tree with a new node. In case of restore, we
2744                 // know there is nothing to do, so we can simply override the
2745                 // existing entry.
2746                 self.device_tree.lock().unwrap().insert(
2747                     memory_zone_id.clone(),
2748                     device_node!(memory_zone_id, virtio_mem_device),
2749                 );
2750             }
2751         }
2752 
2753         Ok(devices)
2754     }
2755 
2756     fn make_virtio_balloon_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
2757         let mut devices = Vec::new();
2758 
2759         if let Some(balloon_config) = &self.config.lock().unwrap().balloon {
2760             let id = String::from(BALLOON_DEVICE_NAME);
2761             info!("Creating virtio-balloon device: id = {}", id);
2762 
2763             let virtio_balloon_device = Arc::new(Mutex::new(
2764                 virtio_devices::Balloon::new(
2765                     id.clone(),
2766                     balloon_config.size,
2767                     balloon_config.deflate_on_oom,
2768                     balloon_config.free_page_reporting,
2769                     self.seccomp_action.clone(),
2770                     self.exit_evt
2771                         .try_clone()
2772                         .map_err(DeviceManagerError::EventFd)?,
2773                 )
2774                 .map_err(DeviceManagerError::CreateVirtioBalloon)?,
2775             ));
2776 
2777             self.balloon = Some(virtio_balloon_device.clone());
2778 
2779             devices.push(MetaVirtioDevice {
2780                 virtio_device: Arc::clone(&virtio_balloon_device)
2781                     as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2782                 iommu: false,
2783                 id: id.clone(),
2784                 pci_segment: 0,
2785                 dma_handler: None,
2786             });
2787 
2788             self.device_tree
2789                 .lock()
2790                 .unwrap()
2791                 .insert(id.clone(), device_node!(id, virtio_balloon_device));
2792         }
2793 
2794         Ok(devices)
2795     }
2796 
2797     fn make_virtio_watchdog_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
2798         let mut devices = Vec::new();
2799 
2800         if !self.config.lock().unwrap().watchdog {
2801             return Ok(devices);
2802         }
2803 
2804         let id = String::from(WATCHDOG_DEVICE_NAME);
2805         info!("Creating virtio-watchdog device: id = {}", id);
2806 
2807         let virtio_watchdog_device = Arc::new(Mutex::new(
2808             virtio_devices::Watchdog::new(
2809                 id.clone(),
2810                 self.reset_evt.try_clone().unwrap(),
2811                 self.seccomp_action.clone(),
2812                 self.exit_evt
2813                     .try_clone()
2814                     .map_err(DeviceManagerError::EventFd)?,
2815             )
2816             .map_err(DeviceManagerError::CreateVirtioWatchdog)?,
2817         ));
2818         devices.push(MetaVirtioDevice {
2819             virtio_device: Arc::clone(&virtio_watchdog_device)
2820                 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2821             iommu: false,
2822             id: id.clone(),
2823             pci_segment: 0,
2824             dma_handler: None,
2825         });
2826 
2827         self.device_tree
2828             .lock()
2829             .unwrap()
2830             .insert(id.clone(), device_node!(id, virtio_watchdog_device));
2831 
2832         Ok(devices)
2833     }
2834 
2835     fn make_vdpa_device(
2836         &mut self,
2837         vdpa_cfg: &mut VdpaConfig,
2838     ) -> DeviceManagerResult<MetaVirtioDevice> {
2839         let id = if let Some(id) = &vdpa_cfg.id {
2840             id.clone()
2841         } else {
2842             let id = self.next_device_name(VDPA_DEVICE_NAME_PREFIX)?;
2843             vdpa_cfg.id = Some(id.clone());
2844             id
2845         };
2846 
2847         info!("Creating vDPA device: {:?}", vdpa_cfg);
2848 
2849         let device_path = vdpa_cfg
2850             .path
2851             .to_str()
2852             .ok_or(DeviceManagerError::CreateVdpaConvertPath)?;
2853 
2854         let vdpa_device = Arc::new(Mutex::new(
2855             virtio_devices::Vdpa::new(
2856                 id.clone(),
2857                 device_path,
2858                 self.memory_manager.lock().unwrap().guest_memory(),
2859                 vdpa_cfg.num_queues as u16,
2860             )
2861             .map_err(DeviceManagerError::CreateVdpa)?,
2862         ));
2863 
2864         // Create the DMA handler that is required by the vDPA device
2865         let vdpa_mapping = Arc::new(VdpaDmaMapping::new(
2866             Arc::clone(&vdpa_device),
2867             Arc::new(self.memory_manager.lock().unwrap().guest_memory()),
2868         ));
2869 
2870         self.device_tree
2871             .lock()
2872             .unwrap()
2873             .insert(id.clone(), device_node!(id));
2874 
2875         Ok(MetaVirtioDevice {
2876             virtio_device: vdpa_device as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2877             iommu: vdpa_cfg.iommu,
2878             id,
2879             pci_segment: vdpa_cfg.pci_segment,
2880             dma_handler: Some(vdpa_mapping),
2881         })
2882     }
2883 
2884     fn make_vdpa_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
2885         let mut devices = Vec::new();
2886         // Add vdpa if required
2887         let mut vdpa_devices = self.config.lock().unwrap().vdpa.clone();
2888         if let Some(vdpa_list_cfg) = &mut vdpa_devices {
2889             for vdpa_cfg in vdpa_list_cfg.iter_mut() {
2890                 devices.push(self.make_vdpa_device(vdpa_cfg)?);
2891             }
2892         }
2893         self.config.lock().unwrap().vdpa = vdpa_devices;
2894 
2895         Ok(devices)
2896     }
2897 
2898     fn next_device_name(&mut self, prefix: &str) -> DeviceManagerResult<String> {
2899         let start_id = self.device_id_cnt;
2900         loop {
2901             // Generate the temporary name.
2902             let name = format!("{}{}", prefix, self.device_id_cnt);
2903             // Increment the counter.
2904             self.device_id_cnt += Wrapping(1);
2905             // Check if the name is already in use.
2906             if !self.boot_id_list.contains(&name)
2907                 && !self.device_tree.lock().unwrap().contains_key(&name)
2908             {
2909                 return Ok(name);
2910             }
2911 
2912             if self.device_id_cnt == start_id {
2913                 // We went through a full loop and there's nothing else we can
2914                 // do.
2915                 break;
2916             }
2917         }
2918         Err(DeviceManagerError::NoAvailableDeviceName)
2919     }
2920 
2921     fn add_passthrough_device(
2922         &mut self,
2923         device_cfg: &mut DeviceConfig,
2924     ) -> DeviceManagerResult<(PciBdf, String)> {
2925         // If the passthrough device has not been created yet, it is created
2926         // here and stored in the DeviceManager structure for future needs.
2927         if self.passthrough_device.is_none() {
2928             self.passthrough_device = Some(
2929                 self.address_manager
2930                     .vm
2931                     .create_passthrough_device()
2932                     .map_err(|e| DeviceManagerError::CreatePassthroughDevice(e.into()))?,
2933             );
2934         }
2935 
2936         self.add_vfio_device(device_cfg)
2937     }
2938 
2939     fn create_vfio_container(&self) -> DeviceManagerResult<Arc<VfioContainer>> {
2940         let passthrough_device = self
2941             .passthrough_device
2942             .as_ref()
2943             .ok_or(DeviceManagerError::NoDevicePassthroughSupport)?;
2944 
2945         // Safe because we know the RawFd is valid.
2946         //
2947         // This dup() is mandatory to be able to give full ownership of the
2948         // file descriptor to the DeviceFd::from_raw_fd() function later in
2949         // the code.
2950         //
2951         // This is particularly needed so that VfioContainer will still have
2952         // a valid file descriptor even if DeviceManager, and therefore the
2953         // passthrough_device are dropped. In case of Drop, the file descriptor
2954         // would be closed, but Linux would still have the duplicated file
2955         // descriptor opened from DeviceFd, preventing from unexpected behavior
2956         // where the VfioContainer would try to use a closed file descriptor.
2957         let dup_device_fd = unsafe { libc::dup(passthrough_device.as_raw_fd()) };
2958         if dup_device_fd == -1 {
2959             return vmm_sys_util::errno::errno_result().map_err(DeviceManagerError::DupFd);
2960         }
2961 
2962         assert!(passthrough_device.as_any().is::<DeviceFd>());
2963 
2964         // SAFETY the raw fd conversion here is safe because:
2965         //   1. When running on KVM or MSHV, passthrough_device wraps around DeviceFd.
2966         //   2. The conversion here extracts the raw fd and then turns the raw fd into a DeviceFd
2967         //      of the same (correct) type.
2968         Ok(Arc::new(
2969             VfioContainer::new(Arc::new(unsafe { DeviceFd::from_raw_fd(dup_device_fd) }))
2970                 .map_err(DeviceManagerError::VfioCreate)?,
2971         ))
2972     }
2973 
2974     fn add_vfio_device(
2975         &mut self,
2976         device_cfg: &mut DeviceConfig,
2977     ) -> DeviceManagerResult<(PciBdf, String)> {
2978         let vfio_name = if let Some(id) = &device_cfg.id {
2979             id.clone()
2980         } else {
2981             let id = self.next_device_name(VFIO_DEVICE_NAME_PREFIX)?;
2982             device_cfg.id = Some(id.clone());
2983             id
2984         };
2985 
2986         let (pci_segment_id, pci_device_bdf, resources) =
2987             self.pci_resources(&vfio_name, device_cfg.pci_segment)?;
2988 
2989         let mut needs_dma_mapping = false;
2990 
2991         // Here we create a new VFIO container for two reasons. Either this is
2992         // the first VFIO device, meaning we need a new VFIO container, which
2993         // will be shared with other VFIO devices. Or the new VFIO device is
2994         // attached to a vIOMMU, meaning we must create a dedicated VFIO
2995         // container. In the vIOMMU use case, we can't let all devices under
2996         // the same VFIO container since we couldn't map/unmap memory for each
2997         // device. That's simply because the map/unmap operations happen at the
2998         // VFIO container level.
2999         let vfio_container = if device_cfg.iommu {
3000             let vfio_container = self.create_vfio_container()?;
3001 
3002             let vfio_mapping = Arc::new(VfioDmaMapping::new(
3003                 Arc::clone(&vfio_container),
3004                 Arc::new(self.memory_manager.lock().unwrap().guest_memory()),
3005             ));
3006 
3007             if let Some(iommu) = &self.iommu_device {
3008                 iommu
3009                     .lock()
3010                     .unwrap()
3011                     .add_external_mapping(pci_device_bdf.into(), vfio_mapping);
3012             } else {
3013                 return Err(DeviceManagerError::MissingVirtualIommu);
3014             }
3015 
3016             vfio_container
3017         } else if let Some(vfio_container) = &self.vfio_container {
3018             Arc::clone(vfio_container)
3019         } else {
3020             let vfio_container = self.create_vfio_container()?;
3021             needs_dma_mapping = true;
3022             self.vfio_container = Some(Arc::clone(&vfio_container));
3023 
3024             vfio_container
3025         };
3026 
3027         let vfio_device = VfioDevice::new(&device_cfg.path, Arc::clone(&vfio_container))
3028             .map_err(DeviceManagerError::VfioCreate)?;
3029 
3030         if needs_dma_mapping {
3031             // Register DMA mapping in IOMMU.
3032             // Do not register virtio-mem regions, as they are handled directly by
3033             // virtio-mem device itself.
3034             for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() {
3035                 for region in zone.regions() {
3036                     vfio_container
3037                         .vfio_dma_map(
3038                             region.start_addr().raw_value(),
3039                             region.len() as u64,
3040                             region.as_ptr() as u64,
3041                         )
3042                         .map_err(DeviceManagerError::VfioDmaMap)?;
3043                 }
3044             }
3045 
3046             let vfio_mapping = Arc::new(VfioDmaMapping::new(
3047                 Arc::clone(&vfio_container),
3048                 Arc::new(self.memory_manager.lock().unwrap().guest_memory()),
3049             ));
3050 
3051             for virtio_mem_device in self.virtio_mem_devices.iter() {
3052                 virtio_mem_device
3053                     .lock()
3054                     .unwrap()
3055                     .add_dma_mapping_handler(
3056                         VirtioMemMappingSource::Container,
3057                         vfio_mapping.clone(),
3058                     )
3059                     .map_err(DeviceManagerError::AddDmaMappingHandlerVirtioMem)?;
3060             }
3061         }
3062 
3063         let legacy_interrupt_group =
3064             if let Some(legacy_interrupt_manager) = &self.legacy_interrupt_manager {
3065                 Some(
3066                     legacy_interrupt_manager
3067                         .create_group(LegacyIrqGroupConfig {
3068                             irq: self.pci_segments[pci_segment_id as usize].pci_irq_slots
3069                                 [pci_device_bdf.device() as usize]
3070                                 as InterruptIndex,
3071                         })
3072                         .map_err(DeviceManagerError::CreateInterruptGroup)?,
3073                 )
3074             } else {
3075                 None
3076             };
3077 
3078         let memory_manager = self.memory_manager.clone();
3079 
3080         let vfio_pci_device = VfioPciDevice::new(
3081             vfio_name.clone(),
3082             &self.address_manager.vm,
3083             vfio_device,
3084             vfio_container,
3085             self.msi_interrupt_manager.clone(),
3086             legacy_interrupt_group,
3087             device_cfg.iommu,
3088             pci_device_bdf,
3089             self.restoring,
3090             Arc::new(move || memory_manager.lock().unwrap().allocate_memory_slot()),
3091         )
3092         .map_err(DeviceManagerError::VfioPciCreate)?;
3093 
3094         let vfio_pci_device = Arc::new(Mutex::new(vfio_pci_device));
3095 
3096         let new_resources = self.add_pci_device(
3097             vfio_pci_device.clone(),
3098             vfio_pci_device.clone(),
3099             pci_segment_id,
3100             pci_device_bdf,
3101             resources,
3102         )?;
3103 
3104         // When restoring a VM, the restore codepath will take care of mapping
3105         // the MMIO regions based on the information from the snapshot.
3106         if !self.restoring {
3107             vfio_pci_device
3108                 .lock()
3109                 .unwrap()
3110                 .map_mmio_regions()
3111                 .map_err(DeviceManagerError::VfioMapRegion)?;
3112         }
3113 
3114         let mut node = device_node!(vfio_name, vfio_pci_device);
3115 
3116         // Update the device tree with correct resource information.
3117         node.resources = new_resources;
3118         node.pci_bdf = Some(pci_device_bdf);
3119         node.pci_device_handle = Some(PciDeviceHandle::Vfio(vfio_pci_device));
3120 
3121         self.device_tree
3122             .lock()
3123             .unwrap()
3124             .insert(vfio_name.clone(), node);
3125 
3126         Ok((pci_device_bdf, vfio_name))
3127     }
3128 
3129     fn add_pci_device(
3130         &mut self,
3131         bus_device: Arc<Mutex<dyn BusDevice>>,
3132         pci_device: Arc<Mutex<dyn PciDevice>>,
3133         segment_id: u16,
3134         bdf: PciBdf,
3135         resources: Option<Vec<Resource>>,
3136     ) -> DeviceManagerResult<Vec<Resource>> {
3137         let bars = pci_device
3138             .lock()
3139             .unwrap()
3140             .allocate_bars(
3141                 &self.address_manager.allocator,
3142                 &mut self.pci_segments[segment_id as usize]
3143                     .allocator
3144                     .lock()
3145                     .unwrap(),
3146                 resources,
3147             )
3148             .map_err(DeviceManagerError::AllocateBars)?;
3149 
3150         let mut pci_bus = self.pci_segments[segment_id as usize]
3151             .pci_bus
3152             .lock()
3153             .unwrap();
3154 
3155         pci_bus
3156             .add_device(bdf.device() as u32, pci_device)
3157             .map_err(DeviceManagerError::AddPciDevice)?;
3158 
3159         self.bus_devices.push(Arc::clone(&bus_device));
3160 
3161         pci_bus
3162             .register_mapping(
3163                 bus_device,
3164                 #[cfg(target_arch = "x86_64")]
3165                 self.address_manager.io_bus.as_ref(),
3166                 self.address_manager.mmio_bus.as_ref(),
3167                 bars.clone(),
3168             )
3169             .map_err(DeviceManagerError::AddPciDevice)?;
3170 
3171         let mut new_resources = Vec::new();
3172         for bar in bars {
3173             new_resources.push(Resource::PciBar {
3174                 index: bar.idx(),
3175                 base: bar.addr(),
3176                 size: bar.size(),
3177                 type_: bar.region_type().into(),
3178                 prefetchable: bar.prefetchable().into(),
3179             });
3180         }
3181 
3182         Ok(new_resources)
3183     }
3184 
3185     fn add_vfio_devices(&mut self) -> DeviceManagerResult<Vec<PciBdf>> {
3186         let mut iommu_attached_device_ids = Vec::new();
3187         let mut devices = self.config.lock().unwrap().devices.clone();
3188 
3189         if let Some(device_list_cfg) = &mut devices {
3190             for device_cfg in device_list_cfg.iter_mut() {
3191                 let (device_id, _) = self.add_passthrough_device(device_cfg)?;
3192                 if device_cfg.iommu && self.iommu_device.is_some() {
3193                     iommu_attached_device_ids.push(device_id);
3194                 }
3195             }
3196         }
3197 
3198         // Update the list of devices
3199         self.config.lock().unwrap().devices = devices;
3200 
3201         Ok(iommu_attached_device_ids)
3202     }
3203 
3204     fn add_vfio_user_device(
3205         &mut self,
3206         device_cfg: &mut UserDeviceConfig,
3207     ) -> DeviceManagerResult<(PciBdf, String)> {
3208         let vfio_user_name = if let Some(id) = &device_cfg.id {
3209             id.clone()
3210         } else {
3211             let id = self.next_device_name(VFIO_USER_DEVICE_NAME_PREFIX)?;
3212             device_cfg.id = Some(id.clone());
3213             id
3214         };
3215 
3216         let (pci_segment_id, pci_device_bdf, resources) =
3217             self.pci_resources(&vfio_user_name, device_cfg.pci_segment)?;
3218 
3219         let legacy_interrupt_group =
3220             if let Some(legacy_interrupt_manager) = &self.legacy_interrupt_manager {
3221                 Some(
3222                     legacy_interrupt_manager
3223                         .create_group(LegacyIrqGroupConfig {
3224                             irq: self.pci_segments[pci_segment_id as usize].pci_irq_slots
3225                                 [pci_device_bdf.device() as usize]
3226                                 as InterruptIndex,
3227                         })
3228                         .map_err(DeviceManagerError::CreateInterruptGroup)?,
3229                 )
3230             } else {
3231                 None
3232             };
3233 
3234         let client = Arc::new(Mutex::new(
3235             vfio_user::Client::new(&device_cfg.socket)
3236                 .map_err(DeviceManagerError::VfioUserCreateClient)?,
3237         ));
3238 
3239         let memory_manager = self.memory_manager.clone();
3240 
3241         let mut vfio_user_pci_device = VfioUserPciDevice::new(
3242             vfio_user_name.clone(),
3243             &self.address_manager.vm,
3244             client.clone(),
3245             self.msi_interrupt_manager.clone(),
3246             legacy_interrupt_group,
3247             pci_device_bdf,
3248             self.restoring,
3249             Arc::new(move || memory_manager.lock().unwrap().allocate_memory_slot()),
3250         )
3251         .map_err(DeviceManagerError::VfioUserCreate)?;
3252 
3253         let memory = self.memory_manager.lock().unwrap().guest_memory();
3254         let vfio_user_mapping = Arc::new(VfioUserDmaMapping::new(client, Arc::new(memory)));
3255         for virtio_mem_device in self.virtio_mem_devices.iter() {
3256             virtio_mem_device
3257                 .lock()
3258                 .unwrap()
3259                 .add_dma_mapping_handler(
3260                     VirtioMemMappingSource::Device(pci_device_bdf.into()),
3261                     vfio_user_mapping.clone(),
3262                 )
3263                 .map_err(DeviceManagerError::AddDmaMappingHandlerVirtioMem)?;
3264         }
3265 
3266         for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() {
3267             for region in zone.regions() {
3268                 vfio_user_pci_device
3269                     .dma_map(region)
3270                     .map_err(DeviceManagerError::VfioUserDmaMap)?;
3271             }
3272         }
3273 
3274         let vfio_user_pci_device = Arc::new(Mutex::new(vfio_user_pci_device));
3275 
3276         let new_resources = self.add_pci_device(
3277             vfio_user_pci_device.clone(),
3278             vfio_user_pci_device.clone(),
3279             pci_segment_id,
3280             pci_device_bdf,
3281             resources,
3282         )?;
3283 
3284         // When restoring a VM, the restore codepath will take care of mapping
3285         // the MMIO regions based on the information from the snapshot.
3286         if !self.restoring {
3287             // Note it is required to call 'add_pci_device()' in advance to have the list of
3288             // mmio regions provisioned correctly
3289             vfio_user_pci_device
3290                 .lock()
3291                 .unwrap()
3292                 .map_mmio_regions()
3293                 .map_err(DeviceManagerError::VfioUserMapRegion)?;
3294         }
3295 
3296         let mut node = device_node!(vfio_user_name, vfio_user_pci_device);
3297 
3298         // Update the device tree with correct resource information.
3299         node.resources = new_resources;
3300         node.pci_bdf = Some(pci_device_bdf);
3301         node.pci_device_handle = Some(PciDeviceHandle::VfioUser(vfio_user_pci_device));
3302 
3303         self.device_tree
3304             .lock()
3305             .unwrap()
3306             .insert(vfio_user_name.clone(), node);
3307 
3308         Ok((pci_device_bdf, vfio_user_name))
3309     }
3310 
3311     fn add_user_devices(&mut self) -> DeviceManagerResult<Vec<PciBdf>> {
3312         let mut user_devices = self.config.lock().unwrap().user_devices.clone();
3313 
3314         if let Some(device_list_cfg) = &mut user_devices {
3315             for device_cfg in device_list_cfg.iter_mut() {
3316                 let (_device_id, _id) = self.add_vfio_user_device(device_cfg)?;
3317             }
3318         }
3319 
3320         // Update the list of devices
3321         self.config.lock().unwrap().user_devices = user_devices;
3322 
3323         Ok(vec![])
3324     }
3325 
3326     fn add_virtio_pci_device(
3327         &mut self,
3328         virtio_device: Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
3329         iommu_mapping: &Option<Arc<IommuMapping>>,
3330         virtio_device_id: String,
3331         pci_segment_id: u16,
3332         dma_handler: Option<Arc<dyn ExternalDmaMapping>>,
3333     ) -> DeviceManagerResult<PciBdf> {
3334         let id = format!("{}-{}", VIRTIO_PCI_DEVICE_NAME_PREFIX, virtio_device_id);
3335 
3336         // Add the new virtio-pci node to the device tree.
3337         let mut node = device_node!(id);
3338         node.children = vec![virtio_device_id.clone()];
3339 
3340         let (pci_segment_id, pci_device_bdf, resources) =
3341             self.pci_resources(&id, pci_segment_id)?;
3342 
3343         // Update the existing virtio node by setting the parent.
3344         if let Some(node) = self.device_tree.lock().unwrap().get_mut(&virtio_device_id) {
3345             node.parent = Some(id.clone());
3346         } else {
3347             return Err(DeviceManagerError::MissingNode);
3348         }
3349 
3350         // Allows support for one MSI-X vector per queue. It also adds 1
3351         // as we need to take into account the dedicated vector to notify
3352         // about a virtio config change.
3353         let msix_num = (virtio_device.lock().unwrap().queue_max_sizes().len() + 1) as u16;
3354 
3355         // Create the AccessPlatform trait from the implementation IommuMapping.
3356         // This will provide address translation for any virtio device sitting
3357         // behind a vIOMMU.
3358         let access_platform: Option<Arc<dyn AccessPlatform>> = if let Some(mapping) = iommu_mapping
3359         {
3360             Some(Arc::new(AccessPlatformMapping::new(
3361                 pci_device_bdf.into(),
3362                 mapping.clone(),
3363             )))
3364         } else {
3365             None
3366         };
3367 
3368         let memory = self.memory_manager.lock().unwrap().guest_memory();
3369 
3370         // Map DMA ranges if a DMA handler is available and if the device is
3371         // not attached to a virtual IOMMU.
3372         if let Some(dma_handler) = &dma_handler {
3373             if iommu_mapping.is_some() {
3374                 if let Some(iommu) = &self.iommu_device {
3375                     iommu
3376                         .lock()
3377                         .unwrap()
3378                         .add_external_mapping(pci_device_bdf.into(), dma_handler.clone());
3379                 } else {
3380                     return Err(DeviceManagerError::MissingVirtualIommu);
3381                 }
3382             } else {
3383                 // Let every virtio-mem device handle the DMA map/unmap through the
3384                 // DMA handler provided.
3385                 for virtio_mem_device in self.virtio_mem_devices.iter() {
3386                     virtio_mem_device
3387                         .lock()
3388                         .unwrap()
3389                         .add_dma_mapping_handler(
3390                             VirtioMemMappingSource::Device(pci_device_bdf.into()),
3391                             dma_handler.clone(),
3392                         )
3393                         .map_err(DeviceManagerError::AddDmaMappingHandlerVirtioMem)?;
3394                 }
3395 
3396                 // Do not register virtio-mem regions, as they are handled directly by
3397                 // virtio-mem devices.
3398                 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() {
3399                     for region in zone.regions() {
3400                         let gpa = region.start_addr().0;
3401                         let size = region.len();
3402                         dma_handler
3403                             .map(gpa, gpa, size)
3404                             .map_err(DeviceManagerError::VirtioDmaMap)?;
3405                     }
3406                 }
3407             }
3408         }
3409 
3410         let device_type = virtio_device.lock().unwrap().device_type();
3411         let virtio_pci_device = Arc::new(Mutex::new(
3412             VirtioPciDevice::new(
3413                 id.clone(),
3414                 memory,
3415                 virtio_device,
3416                 msix_num,
3417                 access_platform,
3418                 &self.msi_interrupt_manager,
3419                 pci_device_bdf.into(),
3420                 self.activate_evt
3421                     .try_clone()
3422                     .map_err(DeviceManagerError::EventFd)?,
3423                 // All device types *except* virtio block devices should be allocated a 64-bit bar
3424                 // The block devices should be given a 32-bit BAR so that they are easily accessible
3425                 // to firmware without requiring excessive identity mapping.
3426                 // The exception being if not on the default PCI segment.
3427                 pci_segment_id > 0 || device_type != VirtioDeviceType::Block as u32,
3428                 dma_handler,
3429                 self.pending_activations.clone(),
3430             )
3431             .map_err(DeviceManagerError::VirtioDevice)?,
3432         ));
3433 
3434         let new_resources = self.add_pci_device(
3435             virtio_pci_device.clone(),
3436             virtio_pci_device.clone(),
3437             pci_segment_id,
3438             pci_device_bdf,
3439             resources,
3440         )?;
3441 
3442         let bar_addr = virtio_pci_device.lock().unwrap().config_bar_addr();
3443         for (event, addr) in virtio_pci_device.lock().unwrap().ioeventfds(bar_addr) {
3444             let io_addr = IoEventAddress::Mmio(addr);
3445             self.address_manager
3446                 .vm
3447                 .register_ioevent(event, &io_addr, None)
3448                 .map_err(|e| DeviceManagerError::RegisterIoevent(e.into()))?;
3449         }
3450 
3451         // Update the device tree with correct resource information.
3452         node.resources = new_resources;
3453         node.migratable = Some(Arc::clone(&virtio_pci_device) as Arc<Mutex<dyn Migratable>>);
3454         node.pci_bdf = Some(pci_device_bdf);
3455         node.pci_device_handle = Some(PciDeviceHandle::Virtio(virtio_pci_device));
3456         self.device_tree.lock().unwrap().insert(id, node);
3457 
3458         Ok(pci_device_bdf)
3459     }
3460 
3461     fn pci_resources(
3462         &self,
3463         id: &str,
3464         pci_segment_id: u16,
3465     ) -> DeviceManagerResult<(u16, PciBdf, Option<Vec<Resource>>)> {
3466         // Look for the id in the device tree. If it can be found, that means
3467         // the device is being restored, otherwise it's created from scratch.
3468         Ok(
3469             if let Some(node) = self.device_tree.lock().unwrap().get(id) {
3470                 info!("Restoring virtio-pci {} resources", id);
3471                 let pci_device_bdf: PciBdf = node
3472                     .pci_bdf
3473                     .ok_or(DeviceManagerError::MissingDeviceNodePciBdf)?;
3474                 let pci_segment_id = pci_device_bdf.segment();
3475 
3476                 self.pci_segments[pci_segment_id as usize]
3477                     .pci_bus
3478                     .lock()
3479                     .unwrap()
3480                     .get_device_id(pci_device_bdf.device() as usize)
3481                     .map_err(DeviceManagerError::GetPciDeviceId)?;
3482 
3483                 (pci_segment_id, pci_device_bdf, Some(node.resources.clone()))
3484             } else {
3485                 let pci_device_bdf =
3486                     self.pci_segments[pci_segment_id as usize].next_device_bdf()?;
3487 
3488                 (pci_segment_id, pci_device_bdf, None)
3489             },
3490         )
3491     }
3492 
3493     #[cfg(target_arch = "x86_64")]
3494     pub fn io_bus(&self) -> &Arc<Bus> {
3495         &self.address_manager.io_bus
3496     }
3497 
3498     pub fn mmio_bus(&self) -> &Arc<Bus> {
3499         &self.address_manager.mmio_bus
3500     }
3501 
3502     pub fn allocator(&self) -> &Arc<Mutex<SystemAllocator>> {
3503         &self.address_manager.allocator
3504     }
3505 
3506     pub fn interrupt_controller(&self) -> Option<Arc<Mutex<dyn InterruptController>>> {
3507         self.interrupt_controller
3508             .as_ref()
3509             .map(|ic| ic.clone() as Arc<Mutex<dyn InterruptController>>)
3510     }
3511 
3512     #[cfg(target_arch = "x86_64")]
3513     // Used to provide a fast path for handling PIO exits
3514     pub fn pci_config_io(&self) -> Arc<Mutex<PciConfigIo>> {
3515         Arc::clone(self.pci_segments[0].pci_config_io.as_ref().unwrap())
3516     }
3517 
3518     pub(crate) fn pci_segments(&self) -> &Vec<PciSegment> {
3519         &self.pci_segments
3520     }
3521 
3522     pub fn console(&self) -> &Arc<Console> {
3523         &self.console
3524     }
3525 
3526     #[cfg(target_arch = "aarch64")]
3527     pub fn cmdline_additions(&self) -> &[String] {
3528         self.cmdline_additions.as_slice()
3529     }
3530 
3531     pub fn update_memory(&self, new_region: &Arc<GuestRegionMmap>) -> DeviceManagerResult<()> {
3532         for handle in self.virtio_devices.iter() {
3533             handle
3534                 .virtio_device
3535                 .lock()
3536                 .unwrap()
3537                 .add_memory_region(new_region)
3538                 .map_err(DeviceManagerError::UpdateMemoryForVirtioDevice)?;
3539 
3540             if let Some(dma_handler) = &handle.dma_handler {
3541                 if !handle.iommu {
3542                     let gpa = new_region.start_addr().0;
3543                     let size = new_region.len();
3544                     dma_handler
3545                         .map(gpa, gpa, size)
3546                         .map_err(DeviceManagerError::VirtioDmaMap)?;
3547                 }
3548             }
3549         }
3550 
3551         // Take care of updating the memory for VFIO PCI devices.
3552         if let Some(vfio_container) = &self.vfio_container {
3553             vfio_container
3554                 .vfio_dma_map(
3555                     new_region.start_addr().raw_value(),
3556                     new_region.len() as u64,
3557                     new_region.as_ptr() as u64,
3558                 )
3559                 .map_err(DeviceManagerError::UpdateMemoryForVfioPciDevice)?;
3560         }
3561 
3562         // Take care of updating the memory for vfio-user devices.
3563         {
3564             let device_tree = self.device_tree.lock().unwrap();
3565             for pci_device_node in device_tree.pci_devices() {
3566                 if let PciDeviceHandle::VfioUser(vfio_user_pci_device) = pci_device_node
3567                     .pci_device_handle
3568                     .as_ref()
3569                     .ok_or(DeviceManagerError::MissingPciDevice)?
3570                 {
3571                     vfio_user_pci_device
3572                         .lock()
3573                         .unwrap()
3574                         .dma_map(new_region)
3575                         .map_err(DeviceManagerError::UpdateMemoryForVfioUserPciDevice)?;
3576                 }
3577             }
3578         }
3579 
3580         Ok(())
3581     }
3582 
3583     pub fn activate_virtio_devices(&self) -> DeviceManagerResult<()> {
3584         for mut activator in self.pending_activations.lock().unwrap().drain(..) {
3585             activator
3586                 .activate()
3587                 .map_err(DeviceManagerError::VirtioActivate)?;
3588         }
3589         Ok(())
3590     }
3591 
3592     pub fn notify_hotplug(
3593         &self,
3594         _notification_type: AcpiNotificationFlags,
3595     ) -> DeviceManagerResult<()> {
3596         return self
3597             .ged_notification_device
3598             .as_ref()
3599             .unwrap()
3600             .lock()
3601             .unwrap()
3602             .notify(_notification_type)
3603             .map_err(DeviceManagerError::HotPlugNotification);
3604     }
3605 
3606     pub fn add_device(
3607         &mut self,
3608         device_cfg: &mut DeviceConfig,
3609     ) -> DeviceManagerResult<PciDeviceInfo> {
3610         self.validate_identifier(&device_cfg.id)?;
3611 
3612         if device_cfg.iommu && !self.is_iommu_segment(device_cfg.pci_segment) {
3613             return Err(DeviceManagerError::InvalidIommuHotplug);
3614         }
3615 
3616         let (bdf, device_name) = self.add_passthrough_device(device_cfg)?;
3617 
3618         // Update the PCIU bitmap
3619         self.pci_segments[device_cfg.pci_segment as usize].pci_devices_up |= 1 << bdf.device();
3620 
3621         Ok(PciDeviceInfo {
3622             id: device_name,
3623             bdf,
3624         })
3625     }
3626 
3627     pub fn add_user_device(
3628         &mut self,
3629         device_cfg: &mut UserDeviceConfig,
3630     ) -> DeviceManagerResult<PciDeviceInfo> {
3631         self.validate_identifier(&device_cfg.id)?;
3632 
3633         let (bdf, device_name) = self.add_vfio_user_device(device_cfg)?;
3634 
3635         // Update the PCIU bitmap
3636         self.pci_segments[device_cfg.pci_segment as usize].pci_devices_up |= 1 << bdf.device();
3637 
3638         Ok(PciDeviceInfo {
3639             id: device_name,
3640             bdf,
3641         })
3642     }
3643 
3644     pub fn remove_device(&mut self, id: String) -> DeviceManagerResult<()> {
3645         // The node can be directly a PCI node in case the 'id' refers to a
3646         // VFIO device or a virtio-pci one.
3647         // In case the 'id' refers to a virtio device, we must find the PCI
3648         // node by looking at the parent.
3649         let device_tree = self.device_tree.lock().unwrap();
3650         let node = device_tree
3651             .get(&id)
3652             .ok_or(DeviceManagerError::UnknownDeviceId(id))?;
3653 
3654         let pci_device_node = if node.pci_bdf.is_some() && node.pci_device_handle.is_some() {
3655             node
3656         } else {
3657             let parent = node
3658                 .parent
3659                 .as_ref()
3660                 .ok_or(DeviceManagerError::MissingNode)?;
3661             device_tree
3662                 .get(parent)
3663                 .ok_or(DeviceManagerError::MissingNode)?
3664         };
3665 
3666         let pci_device_bdf: PciBdf = pci_device_node
3667             .pci_bdf
3668             .ok_or(DeviceManagerError::MissingDeviceNodePciBdf)?;
3669         let pci_segment_id = pci_device_bdf.segment();
3670 
3671         let pci_device_handle = pci_device_node
3672             .pci_device_handle
3673             .as_ref()
3674             .ok_or(DeviceManagerError::MissingPciDevice)?;
3675         #[allow(irrefutable_let_patterns)]
3676         if let PciDeviceHandle::Virtio(virtio_pci_device) = pci_device_handle {
3677             let device_type = VirtioDeviceType::from(
3678                 virtio_pci_device
3679                     .lock()
3680                     .unwrap()
3681                     .virtio_device()
3682                     .lock()
3683                     .unwrap()
3684                     .device_type(),
3685             );
3686             match device_type {
3687                 VirtioDeviceType::Net
3688                 | VirtioDeviceType::Block
3689                 | VirtioDeviceType::Pmem
3690                 | VirtioDeviceType::Fs
3691                 | VirtioDeviceType::Vsock => {}
3692                 _ => return Err(DeviceManagerError::RemovalNotAllowed(device_type)),
3693             }
3694         }
3695 
3696         // Update the PCID bitmap
3697         self.pci_segments[pci_segment_id as usize].pci_devices_down |= 1 << pci_device_bdf.device();
3698 
3699         Ok(())
3700     }
3701 
3702     pub fn eject_device(&mut self, pci_segment_id: u16, device_id: u8) -> DeviceManagerResult<()> {
3703         info!(
3704             "Ejecting device_id = {} on segment_id={}",
3705             device_id, pci_segment_id
3706         );
3707 
3708         // Convert the device ID into the corresponding b/d/f.
3709         let pci_device_bdf = PciBdf::new(pci_segment_id, 0, device_id, 0);
3710 
3711         // Give the PCI device ID back to the PCI bus.
3712         self.pci_segments[pci_segment_id as usize]
3713             .pci_bus
3714             .lock()
3715             .unwrap()
3716             .put_device_id(device_id as usize)
3717             .map_err(DeviceManagerError::PutPciDeviceId)?;
3718 
3719         // Remove the device from the device tree along with its children.
3720         let mut device_tree = self.device_tree.lock().unwrap();
3721         let pci_device_node = device_tree
3722             .remove_node_by_pci_bdf(pci_device_bdf)
3723             .ok_or(DeviceManagerError::MissingPciDevice)?;
3724 
3725         // For VFIO and vfio-user the PCI device id is the id.
3726         // For virtio we overwrite it later as we want the id of the
3727         // underlying device.
3728         let mut id = pci_device_node.id;
3729         let pci_device_handle = pci_device_node
3730             .pci_device_handle
3731             .ok_or(DeviceManagerError::MissingPciDevice)?;
3732         if matches!(pci_device_handle, PciDeviceHandle::Virtio(_)) {
3733             // The virtio-pci device has a single child
3734             if !pci_device_node.children.is_empty() {
3735                 assert_eq!(pci_device_node.children.len(), 1);
3736                 let child_id = &pci_device_node.children[0];
3737                 id = child_id.clone();
3738             }
3739         }
3740         for child in pci_device_node.children.iter() {
3741             device_tree.remove(child);
3742         }
3743 
3744         let mut iommu_attached = false;
3745         if let Some((_, iommu_attached_devices)) = &self.iommu_attached_devices {
3746             if iommu_attached_devices.contains(&pci_device_bdf) {
3747                 iommu_attached = true;
3748             }
3749         }
3750 
3751         let (pci_device, bus_device, virtio_device, remove_dma_handler) = match pci_device_handle {
3752             // No need to remove any virtio-mem mapping here as the container outlives all devices
3753             PciDeviceHandle::Vfio(vfio_pci_device) => (
3754                 Arc::clone(&vfio_pci_device) as Arc<Mutex<dyn PciDevice>>,
3755                 Arc::clone(&vfio_pci_device) as Arc<Mutex<dyn BusDevice>>,
3756                 None as Option<Arc<Mutex<dyn virtio_devices::VirtioDevice>>>,
3757                 false,
3758             ),
3759             PciDeviceHandle::Virtio(virtio_pci_device) => {
3760                 let dev = virtio_pci_device.lock().unwrap();
3761                 let bar_addr = dev.config_bar_addr();
3762                 for (event, addr) in dev.ioeventfds(bar_addr) {
3763                     let io_addr = IoEventAddress::Mmio(addr);
3764                     self.address_manager
3765                         .vm
3766                         .unregister_ioevent(event, &io_addr)
3767                         .map_err(|e| DeviceManagerError::UnRegisterIoevent(e.into()))?;
3768                 }
3769 
3770                 if let Some(dma_handler) = dev.dma_handler() {
3771                     if !iommu_attached {
3772                         for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() {
3773                             for region in zone.regions() {
3774                                 let iova = region.start_addr().0;
3775                                 let size = region.len();
3776                                 dma_handler
3777                                     .unmap(iova, size)
3778                                     .map_err(DeviceManagerError::VirtioDmaUnmap)?;
3779                             }
3780                         }
3781                     }
3782                 }
3783 
3784                 (
3785                     Arc::clone(&virtio_pci_device) as Arc<Mutex<dyn PciDevice>>,
3786                     Arc::clone(&virtio_pci_device) as Arc<Mutex<dyn BusDevice>>,
3787                     Some(dev.virtio_device()),
3788                     dev.dma_handler().is_some() && !iommu_attached,
3789                 )
3790             }
3791             PciDeviceHandle::VfioUser(vfio_user_pci_device) => {
3792                 let mut dev = vfio_user_pci_device.lock().unwrap();
3793                 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() {
3794                     for region in zone.regions() {
3795                         dev.dma_unmap(region)
3796                             .map_err(DeviceManagerError::VfioUserDmaUnmap)?;
3797                     }
3798                 }
3799 
3800                 (
3801                     Arc::clone(&vfio_user_pci_device) as Arc<Mutex<dyn PciDevice>>,
3802                     Arc::clone(&vfio_user_pci_device) as Arc<Mutex<dyn BusDevice>>,
3803                     None as Option<Arc<Mutex<dyn virtio_devices::VirtioDevice>>>,
3804                     true,
3805                 )
3806             }
3807         };
3808 
3809         if remove_dma_handler {
3810             for virtio_mem_device in self.virtio_mem_devices.iter() {
3811                 virtio_mem_device
3812                     .lock()
3813                     .unwrap()
3814                     .remove_dma_mapping_handler(VirtioMemMappingSource::Device(
3815                         pci_device_bdf.into(),
3816                     ))
3817                     .map_err(DeviceManagerError::RemoveDmaMappingHandlerVirtioMem)?;
3818             }
3819         }
3820 
3821         // Free the allocated BARs
3822         pci_device
3823             .lock()
3824             .unwrap()
3825             .free_bars(
3826                 &mut self.address_manager.allocator.lock().unwrap(),
3827                 &mut self.pci_segments[pci_segment_id as usize]
3828                     .allocator
3829                     .lock()
3830                     .unwrap(),
3831             )
3832             .map_err(DeviceManagerError::FreePciBars)?;
3833 
3834         // Remove the device from the PCI bus
3835         self.pci_segments[pci_segment_id as usize]
3836             .pci_bus
3837             .lock()
3838             .unwrap()
3839             .remove_by_device(&pci_device)
3840             .map_err(DeviceManagerError::RemoveDeviceFromPciBus)?;
3841 
3842         #[cfg(target_arch = "x86_64")]
3843         // Remove the device from the IO bus
3844         self.io_bus()
3845             .remove_by_device(&bus_device)
3846             .map_err(DeviceManagerError::RemoveDeviceFromIoBus)?;
3847 
3848         // Remove the device from the MMIO bus
3849         self.mmio_bus()
3850             .remove_by_device(&bus_device)
3851             .map_err(DeviceManagerError::RemoveDeviceFromMmioBus)?;
3852 
3853         // Remove the device from the list of BusDevice held by the
3854         // DeviceManager.
3855         self.bus_devices
3856             .retain(|dev| !Arc::ptr_eq(dev, &bus_device));
3857 
3858         // Shutdown and remove the underlying virtio-device if present
3859         if let Some(virtio_device) = virtio_device {
3860             for mapping in virtio_device.lock().unwrap().userspace_mappings() {
3861                 self.memory_manager
3862                     .lock()
3863                     .unwrap()
3864                     .remove_userspace_mapping(
3865                         mapping.addr.raw_value(),
3866                         mapping.len,
3867                         mapping.host_addr,
3868                         mapping.mergeable,
3869                         mapping.mem_slot,
3870                     )
3871                     .map_err(DeviceManagerError::MemoryManager)?;
3872             }
3873 
3874             virtio_device.lock().unwrap().shutdown();
3875 
3876             self.virtio_devices
3877                 .retain(|handler| !Arc::ptr_eq(&handler.virtio_device, &virtio_device));
3878         }
3879 
3880         event!(
3881             "vm",
3882             "device-removed",
3883             "id",
3884             &id,
3885             "bdf",
3886             pci_device_bdf.to_string()
3887         );
3888 
3889         // At this point, the device has been removed from all the list and
3890         // buses where it was stored. At the end of this function, after
3891         // any_device, bus_device and pci_device are released, the actual
3892         // device will be dropped.
3893         Ok(())
3894     }
3895 
3896     fn hotplug_virtio_pci_device(
3897         &mut self,
3898         handle: MetaVirtioDevice,
3899     ) -> DeviceManagerResult<PciDeviceInfo> {
3900         // Add the virtio device to the device manager list. This is important
3901         // as the list is used to notify virtio devices about memory updates
3902         // for instance.
3903         self.virtio_devices.push(handle.clone());
3904 
3905         let mapping: Option<Arc<IommuMapping>> = if handle.iommu {
3906             self.iommu_mapping.clone()
3907         } else {
3908             None
3909         };
3910 
3911         let bdf = self.add_virtio_pci_device(
3912             handle.virtio_device,
3913             &mapping,
3914             handle.id.clone(),
3915             handle.pci_segment,
3916             handle.dma_handler,
3917         )?;
3918 
3919         // Update the PCIU bitmap
3920         self.pci_segments[handle.pci_segment as usize].pci_devices_up |= 1 << bdf.device();
3921 
3922         Ok(PciDeviceInfo { id: handle.id, bdf })
3923     }
3924 
3925     fn is_iommu_segment(&self, pci_segment_id: u16) -> bool {
3926         self.config
3927             .lock()
3928             .as_ref()
3929             .unwrap()
3930             .platform
3931             .as_ref()
3932             .map(|pc| {
3933                 pc.iommu_segments
3934                     .as_ref()
3935                     .map(|v| v.contains(&pci_segment_id))
3936                     .unwrap_or_default()
3937             })
3938             .unwrap_or_default()
3939     }
3940 
3941     pub fn add_disk(&mut self, disk_cfg: &mut DiskConfig) -> DeviceManagerResult<PciDeviceInfo> {
3942         self.validate_identifier(&disk_cfg.id)?;
3943 
3944         if disk_cfg.iommu && !self.is_iommu_segment(disk_cfg.pci_segment) {
3945             return Err(DeviceManagerError::InvalidIommuHotplug);
3946         }
3947 
3948         let device = self.make_virtio_block_device(disk_cfg)?;
3949         self.hotplug_virtio_pci_device(device)
3950     }
3951 
3952     pub fn add_fs(&mut self, fs_cfg: &mut FsConfig) -> DeviceManagerResult<PciDeviceInfo> {
3953         self.validate_identifier(&fs_cfg.id)?;
3954 
3955         let device = self.make_virtio_fs_device(fs_cfg)?;
3956         self.hotplug_virtio_pci_device(device)
3957     }
3958 
3959     pub fn add_pmem(&mut self, pmem_cfg: &mut PmemConfig) -> DeviceManagerResult<PciDeviceInfo> {
3960         self.validate_identifier(&pmem_cfg.id)?;
3961 
3962         if pmem_cfg.iommu && !self.is_iommu_segment(pmem_cfg.pci_segment) {
3963             return Err(DeviceManagerError::InvalidIommuHotplug);
3964         }
3965 
3966         let device = self.make_virtio_pmem_device(pmem_cfg)?;
3967         self.hotplug_virtio_pci_device(device)
3968     }
3969 
3970     pub fn add_net(&mut self, net_cfg: &mut NetConfig) -> DeviceManagerResult<PciDeviceInfo> {
3971         self.validate_identifier(&net_cfg.id)?;
3972 
3973         if net_cfg.iommu && !self.is_iommu_segment(net_cfg.pci_segment) {
3974             return Err(DeviceManagerError::InvalidIommuHotplug);
3975         }
3976 
3977         let device = self.make_virtio_net_device(net_cfg)?;
3978         self.hotplug_virtio_pci_device(device)
3979     }
3980 
3981     pub fn add_vdpa(&mut self, vdpa_cfg: &mut VdpaConfig) -> DeviceManagerResult<PciDeviceInfo> {
3982         self.validate_identifier(&vdpa_cfg.id)?;
3983 
3984         if vdpa_cfg.iommu && !self.is_iommu_segment(vdpa_cfg.pci_segment) {
3985             return Err(DeviceManagerError::InvalidIommuHotplug);
3986         }
3987 
3988         let device = self.make_vdpa_device(vdpa_cfg)?;
3989         self.hotplug_virtio_pci_device(device)
3990     }
3991 
3992     pub fn add_vsock(&mut self, vsock_cfg: &mut VsockConfig) -> DeviceManagerResult<PciDeviceInfo> {
3993         self.validate_identifier(&vsock_cfg.id)?;
3994 
3995         if vsock_cfg.iommu && !self.is_iommu_segment(vsock_cfg.pci_segment) {
3996             return Err(DeviceManagerError::InvalidIommuHotplug);
3997         }
3998 
3999         let device = self.make_virtio_vsock_device(vsock_cfg)?;
4000         self.hotplug_virtio_pci_device(device)
4001     }
4002 
4003     pub fn counters(&self) -> HashMap<String, HashMap<&'static str, Wrapping<u64>>> {
4004         let mut counters = HashMap::new();
4005 
4006         for handle in &self.virtio_devices {
4007             let virtio_device = handle.virtio_device.lock().unwrap();
4008             if let Some(device_counters) = virtio_device.counters() {
4009                 counters.insert(handle.id.clone(), device_counters.clone());
4010             }
4011         }
4012 
4013         counters
4014     }
4015 
4016     pub fn resize_balloon(&mut self, size: u64) -> DeviceManagerResult<()> {
4017         if let Some(balloon) = &self.balloon {
4018             return balloon
4019                 .lock()
4020                 .unwrap()
4021                 .resize(size)
4022                 .map_err(DeviceManagerError::VirtioBalloonResize);
4023         }
4024 
4025         warn!("No balloon setup: Can't resize the balloon");
4026         Err(DeviceManagerError::MissingVirtioBalloon)
4027     }
4028 
4029     pub fn balloon_size(&self) -> u64 {
4030         if let Some(balloon) = &self.balloon {
4031             return balloon.lock().unwrap().get_actual();
4032         }
4033 
4034         0
4035     }
4036 
4037     pub fn device_tree(&self) -> Arc<Mutex<DeviceTree>> {
4038         self.device_tree.clone()
4039     }
4040 
4041     pub fn restore_devices(
4042         &mut self,
4043         snapshot: Snapshot,
4044     ) -> std::result::Result<(), MigratableError> {
4045         // Finally, restore all devices associated with the DeviceManager.
4046         // It's important to restore devices in the right order, that's why
4047         // the device tree is the right way to ensure we restore a child before
4048         // its parent node.
4049         for node in self
4050             .device_tree
4051             .lock()
4052             .unwrap()
4053             .breadth_first_traversal()
4054             .rev()
4055         {
4056             // Restore the node
4057             if let Some(migratable) = &node.migratable {
4058                 info!("Restoring {} from DeviceManager", node.id);
4059                 if let Some(snapshot) = snapshot.snapshots.get(&node.id) {
4060                     migratable.lock().unwrap().pause()?;
4061                     migratable.lock().unwrap().restore(*snapshot.clone())?;
4062                 } else {
4063                     return Err(MigratableError::Restore(anyhow!(
4064                         "Missing device {}",
4065                         node.id
4066                     )));
4067                 }
4068             }
4069         }
4070 
4071         // The devices have been fully restored, we can now update the
4072         // restoring state of the DeviceManager.
4073         self.restoring = false;
4074 
4075         Ok(())
4076     }
4077 
4078     #[cfg(target_arch = "x86_64")]
4079     pub fn notify_power_button(&self) -> DeviceManagerResult<()> {
4080         self.ged_notification_device
4081             .as_ref()
4082             .unwrap()
4083             .lock()
4084             .unwrap()
4085             .notify(AcpiNotificationFlags::POWER_BUTTON_CHANGED)
4086             .map_err(DeviceManagerError::PowerButtonNotification)
4087     }
4088 
4089     #[cfg(target_arch = "aarch64")]
4090     pub fn notify_power_button(&self) -> DeviceManagerResult<()> {
4091         // There are two use cases:
4092         // 1. Users will use direct kernel boot with device tree.
4093         // 2. Users will use ACPI+UEFI boot.
4094 
4095         // Trigger a GPIO pin 3 event to satisify use case 1.
4096         self.gpio_device
4097             .as_ref()
4098             .unwrap()
4099             .lock()
4100             .unwrap()
4101             .trigger_key(3)
4102             .map_err(DeviceManagerError::AArch64PowerButtonNotification)?;
4103         // Trigger a GED power button event to satisify use case 2.
4104         return self
4105             .ged_notification_device
4106             .as_ref()
4107             .unwrap()
4108             .lock()
4109             .unwrap()
4110             .notify(AcpiNotificationFlags::POWER_BUTTON_CHANGED)
4111             .map_err(DeviceManagerError::PowerButtonNotification);
4112     }
4113 
4114     pub fn iommu_attached_devices(&self) -> &Option<(PciBdf, Vec<PciBdf>)> {
4115         &self.iommu_attached_devices
4116     }
4117 
4118     #[cfg(target_arch = "aarch64")]
4119     pub fn uefi_flash(&self) -> GuestMemoryAtomic<GuestMemoryMmap> {
4120         self.uefi_flash.as_ref().unwrap().clone()
4121     }
4122 
4123     fn validate_identifier(&self, id: &Option<String>) -> DeviceManagerResult<()> {
4124         if let Some(id) = id {
4125             if id.starts_with("__") {
4126                 return Err(DeviceManagerError::InvalidIdentifier(id.clone()));
4127             }
4128 
4129             if self.device_tree.lock().unwrap().contains_key(id) {
4130                 return Err(DeviceManagerError::IdentifierNotUnique(id.clone()));
4131             }
4132         }
4133 
4134         Ok(())
4135     }
4136 }
4137 
4138 fn numa_node_id_from_memory_zone_id(numa_nodes: &NumaNodes, memory_zone_id: &str) -> Option<u32> {
4139     for (numa_node_id, numa_node) in numa_nodes.iter() {
4140         if numa_node.memory_zones.contains(&memory_zone_id.to_owned()) {
4141             return Some(*numa_node_id);
4142         }
4143     }
4144 
4145     None
4146 }
4147 
4148 impl Aml for DeviceManager {
4149     fn append_aml_bytes(&self, bytes: &mut Vec<u8>) {
4150         #[cfg(target_arch = "aarch64")]
4151         use arch::aarch64::DeviceInfoForFdt;
4152 
4153         let mut pci_scan_methods = Vec::new();
4154         for i in 0..self.pci_segments.len() {
4155             pci_scan_methods.push(aml::MethodCall::new(
4156                 format!("\\_SB_.PCI{:X}.PCNT", i).as_str().into(),
4157                 vec![],
4158             ));
4159         }
4160         let mut pci_scan_inner: Vec<&dyn Aml> = Vec::new();
4161         for method in &pci_scan_methods {
4162             pci_scan_inner.push(method)
4163         }
4164 
4165         // PCI hotplug controller
4166         aml::Device::new(
4167             "_SB_.PHPR".into(),
4168             vec![
4169                 &aml::Name::new("_HID".into(), &aml::EisaName::new("PNP0A06")),
4170                 &aml::Name::new("_STA".into(), &0x0bu8),
4171                 &aml::Name::new("_UID".into(), &"PCI Hotplug Controller"),
4172                 &aml::Mutex::new("BLCK".into(), 0),
4173                 &aml::Name::new(
4174                     "_CRS".into(),
4175                     &aml::ResourceTemplate::new(vec![&aml::AddressSpace::new_memory(
4176                         aml::AddressSpaceCachable::NotCacheable,
4177                         true,
4178                         self.acpi_address.0 as u64,
4179                         self.acpi_address.0 + DEVICE_MANAGER_ACPI_SIZE as u64 - 1,
4180                     )]),
4181                 ),
4182                 // OpRegion and Fields map MMIO range into individual field values
4183                 &aml::OpRegion::new(
4184                     "PCST".into(),
4185                     aml::OpRegionSpace::SystemMemory,
4186                     self.acpi_address.0 as usize,
4187                     DEVICE_MANAGER_ACPI_SIZE,
4188                 ),
4189                 &aml::Field::new(
4190                     "PCST".into(),
4191                     aml::FieldAccessType::DWord,
4192                     aml::FieldUpdateRule::WriteAsZeroes,
4193                     vec![
4194                         aml::FieldEntry::Named(*b"PCIU", 32),
4195                         aml::FieldEntry::Named(*b"PCID", 32),
4196                         aml::FieldEntry::Named(*b"B0EJ", 32),
4197                         aml::FieldEntry::Named(*b"PSEG", 32),
4198                     ],
4199                 ),
4200                 &aml::Method::new(
4201                     "PCEJ".into(),
4202                     2,
4203                     true,
4204                     vec![
4205                         // Take lock defined above
4206                         &aml::Acquire::new("BLCK".into(), 0xffff),
4207                         // Choose the current segment
4208                         &aml::Store::new(&aml::Path::new("PSEG"), &aml::Arg(1)),
4209                         // Write PCI bus number (in first argument) to I/O port via field
4210                         &aml::ShiftLeft::new(&aml::Path::new("B0EJ"), &aml::ONE, &aml::Arg(0)),
4211                         // Release lock
4212                         &aml::Release::new("BLCK".into()),
4213                         // Return 0
4214                         &aml::Return::new(&aml::ZERO),
4215                     ],
4216                 ),
4217                 &aml::Method::new("PSCN".into(), 0, true, pci_scan_inner),
4218             ],
4219         )
4220         .append_aml_bytes(bytes);
4221 
4222         for segment in &self.pci_segments {
4223             segment.append_aml_bytes(bytes);
4224         }
4225 
4226         let mut mbrd_memory = Vec::new();
4227 
4228         for segment in &self.pci_segments {
4229             mbrd_memory.push(aml::Memory32Fixed::new(
4230                 true,
4231                 segment.mmio_config_address as u32,
4232                 layout::PCI_MMIO_CONFIG_SIZE_PER_SEGMENT as u32,
4233             ))
4234         }
4235 
4236         let mut mbrd_memory_refs = Vec::new();
4237         for mbrd_memory_ref in &mbrd_memory {
4238             mbrd_memory_refs.push(mbrd_memory_ref as &dyn Aml);
4239         }
4240 
4241         aml::Device::new(
4242             "_SB_.MBRD".into(),
4243             vec![
4244                 &aml::Name::new("_HID".into(), &aml::EisaName::new("PNP0C02")),
4245                 &aml::Name::new("_UID".into(), &aml::ZERO),
4246                 &aml::Name::new("_CRS".into(), &aml::ResourceTemplate::new(mbrd_memory_refs)),
4247             ],
4248         )
4249         .append_aml_bytes(bytes);
4250 
4251         // Serial device
4252         #[cfg(target_arch = "x86_64")]
4253         let serial_irq = 4;
4254         #[cfg(target_arch = "aarch64")]
4255         let serial_irq =
4256             if self.config.lock().unwrap().serial.clone().mode != ConsoleOutputMode::Off {
4257                 self.get_device_info()
4258                     .clone()
4259                     .get(&(DeviceType::Serial, DeviceType::Serial.to_string()))
4260                     .unwrap()
4261                     .irq()
4262             } else {
4263                 // If serial is turned off, add a fake device with invalid irq.
4264                 31
4265             };
4266         if self.config.lock().unwrap().serial.mode != ConsoleOutputMode::Off {
4267             aml::Device::new(
4268                 "_SB_.COM1".into(),
4269                 vec![
4270                     &aml::Name::new(
4271                         "_HID".into(),
4272                         #[cfg(target_arch = "x86_64")]
4273                         &aml::EisaName::new("PNP0501"),
4274                         #[cfg(target_arch = "aarch64")]
4275                         &"ARMH0011",
4276                     ),
4277                     &aml::Name::new("_UID".into(), &aml::ZERO),
4278                     &aml::Name::new("_DDN".into(), &"COM1"),
4279                     &aml::Name::new(
4280                         "_CRS".into(),
4281                         &aml::ResourceTemplate::new(vec![
4282                             &aml::Interrupt::new(true, true, false, false, serial_irq),
4283                             #[cfg(target_arch = "x86_64")]
4284                             &aml::Io::new(0x3f8, 0x3f8, 0, 0x8),
4285                             #[cfg(target_arch = "aarch64")]
4286                             &aml::Memory32Fixed::new(
4287                                 true,
4288                                 arch::layout::LEGACY_SERIAL_MAPPED_IO_START.raw_value() as u32,
4289                                 MMIO_LEN as u32,
4290                             ),
4291                         ]),
4292                     ),
4293                 ],
4294             )
4295             .append_aml_bytes(bytes);
4296         }
4297 
4298         aml::Name::new("_S5_".into(), &aml::Package::new(vec![&5u8])).append_aml_bytes(bytes);
4299 
4300         aml::Device::new(
4301             "_SB_.PWRB".into(),
4302             vec![
4303                 &aml::Name::new("_HID".into(), &aml::EisaName::new("PNP0C0C")),
4304                 &aml::Name::new("_UID".into(), &aml::ZERO),
4305             ],
4306         )
4307         .append_aml_bytes(bytes);
4308 
4309         self.ged_notification_device
4310             .as_ref()
4311             .unwrap()
4312             .lock()
4313             .unwrap()
4314             .append_aml_bytes(bytes);
4315     }
4316 }
4317 
4318 impl Pausable for DeviceManager {
4319     fn pause(&mut self) -> result::Result<(), MigratableError> {
4320         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
4321             if let Some(migratable) = &device_node.migratable {
4322                 migratable.lock().unwrap().pause()?;
4323             }
4324         }
4325         // On AArch64, the pause of device manager needs to trigger
4326         // a "pause" of GIC, which will flush the GIC pending tables
4327         // and ITS tables to guest RAM.
4328         #[cfg(target_arch = "aarch64")]
4329         {
4330             self.get_interrupt_controller()
4331                 .unwrap()
4332                 .lock()
4333                 .unwrap()
4334                 .pause()?;
4335         };
4336 
4337         Ok(())
4338     }
4339 
4340     fn resume(&mut self) -> result::Result<(), MigratableError> {
4341         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
4342             if let Some(migratable) = &device_node.migratable {
4343                 migratable.lock().unwrap().resume()?;
4344             }
4345         }
4346 
4347         Ok(())
4348     }
4349 }
4350 
4351 impl Snapshottable for DeviceManager {
4352     fn id(&self) -> String {
4353         DEVICE_MANAGER_SNAPSHOT_ID.to_string()
4354     }
4355 
4356     fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> {
4357         let mut snapshot = Snapshot::new(DEVICE_MANAGER_SNAPSHOT_ID);
4358 
4359         // We aggregate all devices snapshots.
4360         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
4361             if let Some(migratable) = &device_node.migratable {
4362                 let device_snapshot = migratable.lock().unwrap().snapshot()?;
4363                 snapshot.add_snapshot(device_snapshot);
4364             }
4365         }
4366 
4367         // Then we store the DeviceManager state.
4368         snapshot.add_data_section(SnapshotDataSection::new_from_state(
4369             DEVICE_MANAGER_SNAPSHOT_ID,
4370             &self.state(),
4371         )?);
4372 
4373         Ok(snapshot)
4374     }
4375 
4376     fn restore(&mut self, snapshot: Snapshot) -> std::result::Result<(), MigratableError> {
4377         // Let's first restore the DeviceManager.
4378 
4379         self.set_state(&snapshot.to_state(DEVICE_MANAGER_SNAPSHOT_ID)?);
4380 
4381         // Now that DeviceManager is updated with the right states, it's time
4382         // to create the devices based on the configuration.
4383         self.create_devices(None, None, None)
4384             .map_err(|e| MigratableError::Restore(anyhow!("Could not create devices {:?}", e)))?;
4385 
4386         Ok(())
4387     }
4388 }
4389 
4390 impl Transportable for DeviceManager {}
4391 
4392 impl Migratable for DeviceManager {
4393     fn start_dirty_log(&mut self) -> std::result::Result<(), MigratableError> {
4394         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
4395             if let Some(migratable) = &device_node.migratable {
4396                 migratable.lock().unwrap().start_dirty_log()?;
4397             }
4398         }
4399         Ok(())
4400     }
4401 
4402     fn stop_dirty_log(&mut self) -> std::result::Result<(), MigratableError> {
4403         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
4404             if let Some(migratable) = &device_node.migratable {
4405                 migratable.lock().unwrap().stop_dirty_log()?;
4406             }
4407         }
4408         Ok(())
4409     }
4410 
4411     fn dirty_log(&mut self) -> std::result::Result<MemoryRangeTable, MigratableError> {
4412         let mut tables = Vec::new();
4413         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
4414             if let Some(migratable) = &device_node.migratable {
4415                 tables.push(migratable.lock().unwrap().dirty_log()?);
4416             }
4417         }
4418         Ok(MemoryRangeTable::new_from_tables(tables))
4419     }
4420 
4421     fn start_migration(&mut self) -> std::result::Result<(), MigratableError> {
4422         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
4423             if let Some(migratable) = &device_node.migratable {
4424                 migratable.lock().unwrap().start_migration()?;
4425             }
4426         }
4427         Ok(())
4428     }
4429 
4430     fn complete_migration(&mut self) -> std::result::Result<(), MigratableError> {
4431         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
4432             if let Some(migratable) = &device_node.migratable {
4433                 migratable.lock().unwrap().complete_migration()?;
4434             }
4435         }
4436         Ok(())
4437     }
4438 }
4439 
4440 const PCIU_FIELD_OFFSET: u64 = 0;
4441 const PCID_FIELD_OFFSET: u64 = 4;
4442 const B0EJ_FIELD_OFFSET: u64 = 8;
4443 const PSEG_FIELD_OFFSET: u64 = 12;
4444 const PCIU_FIELD_SIZE: usize = 4;
4445 const PCID_FIELD_SIZE: usize = 4;
4446 const B0EJ_FIELD_SIZE: usize = 4;
4447 const PSEG_FIELD_SIZE: usize = 4;
4448 
4449 impl BusDevice for DeviceManager {
4450     fn read(&mut self, base: u64, offset: u64, data: &mut [u8]) {
4451         match offset {
4452             PCIU_FIELD_OFFSET => {
4453                 assert!(data.len() == PCIU_FIELD_SIZE);
4454                 data.copy_from_slice(
4455                     &self.pci_segments[self.selected_segment]
4456                         .pci_devices_up
4457                         .to_le_bytes(),
4458                 );
4459                 // Clear the PCIU bitmap
4460                 self.pci_segments[self.selected_segment].pci_devices_up = 0;
4461             }
4462             PCID_FIELD_OFFSET => {
4463                 assert!(data.len() == PCID_FIELD_SIZE);
4464                 data.copy_from_slice(
4465                     &self.pci_segments[self.selected_segment]
4466                         .pci_devices_down
4467                         .to_le_bytes(),
4468                 );
4469                 // Clear the PCID bitmap
4470                 self.pci_segments[self.selected_segment].pci_devices_down = 0;
4471             }
4472             B0EJ_FIELD_OFFSET => {
4473                 assert!(data.len() == B0EJ_FIELD_SIZE);
4474                 // Always return an empty bitmap since the eject is always
4475                 // taken care of right away during a write access.
4476                 data.fill(0);
4477             }
4478             PSEG_FIELD_OFFSET => {
4479                 assert_eq!(data.len(), PSEG_FIELD_SIZE);
4480                 data.copy_from_slice(&(self.selected_segment as u32).to_le_bytes());
4481             }
4482             _ => error!(
4483                 "Accessing unknown location at base 0x{:x}, offset 0x{:x}",
4484                 base, offset
4485             ),
4486         }
4487 
4488         debug!(
4489             "PCI_HP_REG_R: base 0x{:x}, offset 0x{:x}, data {:?}",
4490             base, offset, data
4491         )
4492     }
4493 
4494     fn write(&mut self, base: u64, offset: u64, data: &[u8]) -> Option<Arc<std::sync::Barrier>> {
4495         match offset {
4496             B0EJ_FIELD_OFFSET => {
4497                 assert!(data.len() == B0EJ_FIELD_SIZE);
4498                 let mut data_array: [u8; 4] = [0, 0, 0, 0];
4499                 data_array.copy_from_slice(data);
4500                 let mut slot_bitmap = u32::from_le_bytes(data_array);
4501 
4502                 while slot_bitmap > 0 {
4503                     let slot_id = slot_bitmap.trailing_zeros();
4504                     if let Err(e) = self.eject_device(self.selected_segment as u16, slot_id as u8) {
4505                         error!("Failed ejecting device {}: {:?}", slot_id, e);
4506                     }
4507                     slot_bitmap &= !(1 << slot_id);
4508                 }
4509             }
4510             PSEG_FIELD_OFFSET => {
4511                 assert_eq!(data.len(), PSEG_FIELD_SIZE);
4512                 let mut data_array: [u8; 4] = [0, 0, 0, 0];
4513                 data_array.copy_from_slice(data);
4514                 let selected_segment = u32::from_le_bytes(data_array) as usize;
4515                 if selected_segment >= self.pci_segments.len() {
4516                     error!(
4517                         "Segment selection out of range: {} >= {}",
4518                         selected_segment,
4519                         self.pci_segments.len()
4520                     );
4521                     return None;
4522                 }
4523                 self.selected_segment = selected_segment;
4524             }
4525             _ => error!(
4526                 "Accessing unknown location at base 0x{:x}, offset 0x{:x}",
4527                 base, offset
4528             ),
4529         }
4530 
4531         debug!(
4532             "PCI_HP_REG_W: base 0x{:x}, offset 0x{:x}, data {:?}",
4533             base, offset, data
4534         );
4535 
4536         None
4537     }
4538 }
4539 
4540 impl Drop for DeviceManager {
4541     fn drop(&mut self) {
4542         for handle in self.virtio_devices.drain(..) {
4543             handle.virtio_device.lock().unwrap().shutdown();
4544         }
4545     }
4546 }
4547