xref: /cloud-hypervisor/vmm/src/device_manager.rs (revision 21f05ebb4fb0ddf1f148d9b5329c9259297ed3c7)
1 // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
2 //
3 // Portions Copyright 2017 The Chromium OS Authors. All rights reserved.
4 // Use of this source code is governed by a BSD-style license that can be
5 // found in the LICENSE-BSD-3-Clause file.
6 //
7 // Copyright © 2019 Intel Corporation
8 //
9 // SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause
10 //
11 
12 use std::collections::{BTreeMap, BTreeSet, HashMap};
13 use std::fs::{File, OpenOptions};
14 use std::io::{self, stdout, IsTerminal, Seek, SeekFrom};
15 use std::num::Wrapping;
16 use std::os::unix::fs::OpenOptionsExt;
17 use std::os::unix::io::{AsRawFd, FromRawFd};
18 use std::path::PathBuf;
19 use std::result;
20 use std::sync::{Arc, Mutex};
21 use std::time::Instant;
22 
23 use acpi_tables::sdt::GenericAddress;
24 use acpi_tables::{aml, Aml};
25 use anyhow::anyhow;
26 #[cfg(target_arch = "x86_64")]
27 use arch::layout::{APIC_START, IOAPIC_SIZE, IOAPIC_START};
28 use arch::{layout, NumaNodes};
29 #[cfg(target_arch = "aarch64")]
30 use arch::{DeviceType, MmioDeviceInfo};
31 use block::async_io::DiskFile;
32 use block::fixed_vhd_sync::FixedVhdDiskSync;
33 use block::qcow_sync::QcowDiskSync;
34 use block::raw_async_aio::RawFileDiskAio;
35 use block::raw_sync::RawFileDiskSync;
36 use block::vhdx_sync::VhdxDiskSync;
37 use block::{
38     block_aio_is_supported, block_io_uring_is_supported, detect_image_type, qcow, vhdx, ImageType,
39 };
40 #[cfg(feature = "io_uring")]
41 use block::{fixed_vhd_async::FixedVhdDiskAsync, raw_async::RawFileDisk};
42 #[cfg(target_arch = "x86_64")]
43 use devices::debug_console::DebugConsole;
44 #[cfg(target_arch = "aarch64")]
45 use devices::gic;
46 use devices::interrupt_controller::InterruptController;
47 #[cfg(target_arch = "x86_64")]
48 use devices::ioapic;
49 #[cfg(target_arch = "aarch64")]
50 use devices::legacy::Pl011;
51 #[cfg(feature = "pvmemcontrol")]
52 use devices::pvmemcontrol::{PvmemcontrolBusDevice, PvmemcontrolPciDevice};
53 use devices::{interrupt_controller, AcpiNotificationFlags};
54 use hypervisor::IoEventAddress;
55 use libc::{
56     tcsetattr, termios, MAP_NORESERVE, MAP_PRIVATE, MAP_SHARED, O_TMPFILE, PROT_READ, PROT_WRITE,
57     TCSANOW,
58 };
59 use pci::{
60     DeviceRelocation, MmioRegion, PciBarRegionType, PciBdf, PciDevice, VfioDmaMapping,
61     VfioPciDevice, VfioUserDmaMapping, VfioUserPciDevice, VfioUserPciDeviceError,
62 };
63 use rate_limiter::group::RateLimiterGroup;
64 use seccompiler::SeccompAction;
65 use serde::{Deserialize, Serialize};
66 use tracer::trace_scoped;
67 use vfio_ioctls::{VfioContainer, VfioDevice, VfioDeviceFd};
68 use virtio_devices::transport::{VirtioPciDevice, VirtioPciDeviceActivator, VirtioTransport};
69 use virtio_devices::vhost_user::VhostUserConfig;
70 use virtio_devices::{
71     AccessPlatformMapping, ActivateError, Endpoint, IommuMapping, VdpaDmaMapping,
72     VirtioMemMappingSource,
73 };
74 use vm_allocator::{AddressAllocator, SystemAllocator};
75 use vm_device::dma_mapping::ExternalDmaMapping;
76 use vm_device::interrupt::{
77     InterruptIndex, InterruptManager, LegacyIrqGroupConfig, MsiIrqGroupConfig,
78 };
79 use vm_device::{Bus, BusDevice, BusDeviceSync, Resource};
80 use vm_memory::guest_memory::FileOffset;
81 use vm_memory::{Address, GuestAddress, GuestMemoryRegion, GuestUsize, MmapRegion};
82 #[cfg(target_arch = "x86_64")]
83 use vm_memory::{GuestAddressSpace, GuestMemory};
84 use vm_migration::protocol::MemoryRangeTable;
85 use vm_migration::{
86     snapshot_from_id, state_from_id, Migratable, MigratableError, Pausable, Snapshot, SnapshotData,
87     Snapshottable, Transportable,
88 };
89 use vm_virtio::{AccessPlatform, VirtioDeviceType};
90 use vmm_sys_util::eventfd::EventFd;
91 #[cfg(target_arch = "x86_64")]
92 use {devices::debug_console, devices::legacy::Serial};
93 
94 use crate::console_devices::{ConsoleDeviceError, ConsoleInfo, ConsoleOutput};
95 use crate::cpu::{CpuManager, CPU_MANAGER_ACPI_SIZE};
96 use crate::device_tree::{DeviceNode, DeviceTree};
97 use crate::interrupt::{LegacyUserspaceInterruptManager, MsiInterruptManager};
98 use crate::memory_manager::{Error as MemoryManagerError, MemoryManager, MEMORY_MANAGER_ACPI_SIZE};
99 use crate::pci_segment::PciSegment;
100 use crate::serial_manager::{Error as SerialManagerError, SerialManager};
101 use crate::vm_config::{
102     ConsoleOutputMode, DeviceConfig, DiskConfig, FsConfig, NetConfig, PmemConfig, UserDeviceConfig,
103     VdpaConfig, VhostMode, VmConfig, VsockConfig, DEFAULT_PCI_SEGMENT_APERTURE_WEIGHT,
104 };
105 use crate::{device_node, GuestRegionMmap, PciDeviceInfo, DEVICE_MANAGER_SNAPSHOT_ID};
106 
107 #[cfg(target_arch = "aarch64")]
108 const MMIO_LEN: u64 = 0x1000;
109 
110 // Singleton devices / devices the user cannot name
111 #[cfg(target_arch = "x86_64")]
112 const IOAPIC_DEVICE_NAME: &str = "__ioapic";
113 const SERIAL_DEVICE_NAME: &str = "__serial";
114 #[cfg(target_arch = "x86_64")]
115 const DEBUGCON_DEVICE_NAME: &str = "__debug_console";
116 #[cfg(target_arch = "aarch64")]
117 const GPIO_DEVICE_NAME: &str = "__gpio";
118 const RNG_DEVICE_NAME: &str = "__rng";
119 const IOMMU_DEVICE_NAME: &str = "__iommu";
120 #[cfg(feature = "pvmemcontrol")]
121 const PVMEMCONTROL_DEVICE_NAME: &str = "__pvmemcontrol";
122 const BALLOON_DEVICE_NAME: &str = "__balloon";
123 const CONSOLE_DEVICE_NAME: &str = "__console";
124 const PVPANIC_DEVICE_NAME: &str = "__pvpanic";
125 
126 // Devices that the user may name and for which we generate
127 // identifiers if the user doesn't give one
128 const DISK_DEVICE_NAME_PREFIX: &str = "_disk";
129 const FS_DEVICE_NAME_PREFIX: &str = "_fs";
130 const NET_DEVICE_NAME_PREFIX: &str = "_net";
131 const PMEM_DEVICE_NAME_PREFIX: &str = "_pmem";
132 const VDPA_DEVICE_NAME_PREFIX: &str = "_vdpa";
133 const VSOCK_DEVICE_NAME_PREFIX: &str = "_vsock";
134 const WATCHDOG_DEVICE_NAME: &str = "__watchdog";
135 const VFIO_DEVICE_NAME_PREFIX: &str = "_vfio";
136 const VFIO_USER_DEVICE_NAME_PREFIX: &str = "_vfio_user";
137 const VIRTIO_PCI_DEVICE_NAME_PREFIX: &str = "_virtio-pci";
138 
139 /// Errors associated with device manager
140 #[derive(Debug)]
141 pub enum DeviceManagerError {
142     /// Cannot create EventFd.
143     EventFd(io::Error),
144 
145     /// Cannot open disk path
146     Disk(io::Error),
147 
148     /// Cannot create vhost-user-net device
149     CreateVhostUserNet(virtio_devices::vhost_user::Error),
150 
151     /// Cannot create virtio-blk device
152     CreateVirtioBlock(io::Error),
153 
154     /// Cannot create virtio-net device
155     CreateVirtioNet(virtio_devices::net::Error),
156 
157     /// Cannot create virtio-console device
158     CreateVirtioConsole(io::Error),
159 
160     /// Cannot create virtio-rng device
161     CreateVirtioRng(io::Error),
162 
163     /// Cannot create virtio-fs device
164     CreateVirtioFs(virtio_devices::vhost_user::Error),
165 
166     /// Virtio-fs device was created without a socket.
167     NoVirtioFsSock,
168 
169     /// Cannot create vhost-user-blk device
170     CreateVhostUserBlk(virtio_devices::vhost_user::Error),
171 
172     /// Cannot create virtio-pmem device
173     CreateVirtioPmem(io::Error),
174 
175     /// Cannot create vDPA device
176     CreateVdpa(virtio_devices::vdpa::Error),
177 
178     /// Cannot create virtio-vsock device
179     CreateVirtioVsock(io::Error),
180 
181     /// Cannot create tpm device
182     CreateTpmDevice(anyhow::Error),
183 
184     /// Failed to convert Path to &str for the vDPA device.
185     CreateVdpaConvertPath,
186 
187     /// Failed to convert Path to &str for the virtio-vsock device.
188     CreateVsockConvertPath,
189 
190     /// Cannot create virtio-vsock backend
191     CreateVsockBackend(virtio_devices::vsock::VsockUnixError),
192 
193     /// Cannot create virtio-iommu device
194     CreateVirtioIommu(io::Error),
195 
196     /// Cannot create virtio-balloon device
197     CreateVirtioBalloon(io::Error),
198 
199     /// Cannot create pvmemcontrol device
200     #[cfg(feature = "pvmemcontrol")]
201     CreatePvmemcontrol(io::Error),
202 
203     /// Cannot create virtio-watchdog device
204     CreateVirtioWatchdog(io::Error),
205 
206     /// Failed to parse disk image format
207     DetectImageType(io::Error),
208 
209     /// Cannot open qcow disk path
210     QcowDeviceCreate(qcow::Error),
211 
212     /// Cannot create serial manager
213     CreateSerialManager(SerialManagerError),
214 
215     /// Cannot spawn the serial manager thread
216     SpawnSerialManager(SerialManagerError),
217 
218     /// Cannot open tap interface
219     OpenTap(net_util::TapError),
220 
221     /// Cannot allocate IRQ.
222     AllocateIrq,
223 
224     /// Cannot configure the IRQ.
225     Irq(vmm_sys_util::errno::Error),
226 
227     /// Cannot allocate PCI BARs
228     AllocateBars(pci::PciDeviceError),
229 
230     /// Could not free the BARs associated with a PCI device.
231     FreePciBars(pci::PciDeviceError),
232 
233     /// Cannot register ioevent.
234     RegisterIoevent(anyhow::Error),
235 
236     /// Cannot unregister ioevent.
237     UnRegisterIoevent(anyhow::Error),
238 
239     /// Cannot create virtio device
240     VirtioDevice(virtio_devices::transport::VirtioPciDeviceError),
241 
242     /// Cannot add PCI device
243     AddPciDevice(pci::PciRootError),
244 
245     /// Cannot open persistent memory file
246     PmemFileOpen(io::Error),
247 
248     /// Cannot set persistent memory file size
249     PmemFileSetLen(io::Error),
250 
251     /// Cannot find a memory range for persistent memory
252     PmemRangeAllocation,
253 
254     /// Cannot find a memory range for virtio-fs
255     FsRangeAllocation,
256 
257     /// Error creating serial output file
258     SerialOutputFileOpen(io::Error),
259 
260     #[cfg(target_arch = "x86_64")]
261     /// Error creating debug-console output file
262     DebugconOutputFileOpen(io::Error),
263 
264     /// Error creating console output file
265     ConsoleOutputFileOpen(io::Error),
266 
267     /// Error creating serial pty
268     SerialPtyOpen(io::Error),
269 
270     /// Error creating console pty
271     ConsolePtyOpen(io::Error),
272 
273     /// Error creating console pty
274     DebugconPtyOpen(io::Error),
275 
276     /// Error setting pty raw mode
277     SetPtyRaw(ConsoleDeviceError),
278 
279     /// Error getting pty peer
280     GetPtyPeer(vmm_sys_util::errno::Error),
281 
282     /// Cannot create a VFIO device
283     VfioCreate(vfio_ioctls::VfioError),
284 
285     /// Cannot create a VFIO PCI device
286     VfioPciCreate(pci::VfioPciError),
287 
288     /// Failed to map VFIO MMIO region.
289     VfioMapRegion(pci::VfioPciError),
290 
291     /// Failed to DMA map VFIO device.
292     VfioDmaMap(vfio_ioctls::VfioError),
293 
294     /// Failed to DMA unmap VFIO device.
295     VfioDmaUnmap(pci::VfioPciError),
296 
297     /// Failed to create the passthrough device.
298     CreatePassthroughDevice(anyhow::Error),
299 
300     /// Failed to memory map.
301     Mmap(io::Error),
302 
303     /// Cannot add legacy device to Bus.
304     BusError(vm_device::BusError),
305 
306     /// Failed to allocate IO port
307     AllocateIoPort,
308 
309     /// Failed to allocate MMIO address
310     AllocateMmioAddress,
311 
312     /// Failed to make hotplug notification
313     HotPlugNotification(io::Error),
314 
315     /// Error from a memory manager operation
316     MemoryManager(MemoryManagerError),
317 
318     /// Failed to create new interrupt source group.
319     CreateInterruptGroup(io::Error),
320 
321     /// Failed to update interrupt source group.
322     UpdateInterruptGroup(io::Error),
323 
324     /// Failed to create interrupt controller.
325     CreateInterruptController(interrupt_controller::Error),
326 
327     /// Failed to create a new MmapRegion instance.
328     NewMmapRegion(vm_memory::mmap::MmapRegionError),
329 
330     /// Failed to clone a File.
331     CloneFile(io::Error),
332 
333     /// Failed to create socket file
334     CreateSocketFile(io::Error),
335 
336     /// Failed to spawn the network backend
337     SpawnNetBackend(io::Error),
338 
339     /// Failed to spawn the block backend
340     SpawnBlockBackend(io::Error),
341 
342     /// Missing PCI bus.
343     NoPciBus,
344 
345     /// Could not find an available device name.
346     NoAvailableDeviceName,
347 
348     /// Missing PCI device.
349     MissingPciDevice,
350 
351     /// Failed to remove a PCI device from the PCI bus.
352     RemoveDeviceFromPciBus(pci::PciRootError),
353 
354     /// Failed to remove a bus device from the IO bus.
355     RemoveDeviceFromIoBus(vm_device::BusError),
356 
357     /// Failed to remove a bus device from the MMIO bus.
358     RemoveDeviceFromMmioBus(vm_device::BusError),
359 
360     /// Failed to find the device corresponding to a specific PCI b/d/f.
361     UnknownPciBdf(u32),
362 
363     /// Not allowed to remove this type of device from the VM.
364     RemovalNotAllowed(vm_virtio::VirtioDeviceType),
365 
366     /// Failed to find device corresponding to the given identifier.
367     UnknownDeviceId(String),
368 
369     /// Failed to find an available PCI device ID.
370     NextPciDeviceId(pci::PciRootError),
371 
372     /// Could not reserve the PCI device ID.
373     GetPciDeviceId(pci::PciRootError),
374 
375     /// Could not give the PCI device ID back.
376     PutPciDeviceId(pci::PciRootError),
377 
378     /// No disk path was specified when one was expected
379     NoDiskPath,
380 
381     /// Failed to update guest memory for virtio device.
382     UpdateMemoryForVirtioDevice(virtio_devices::Error),
383 
384     /// Cannot create virtio-mem device
385     CreateVirtioMem(io::Error),
386 
387     /// Cannot find a memory range for virtio-mem memory
388     VirtioMemRangeAllocation,
389 
390     /// Failed to update guest memory for VFIO PCI device.
391     UpdateMemoryForVfioPciDevice(vfio_ioctls::VfioError),
392 
393     /// Trying to use a directory for pmem but no size specified
394     PmemWithDirectorySizeMissing,
395 
396     /// Trying to use a size that is not multiple of 2MiB
397     PmemSizeNotAligned,
398 
399     /// Could not find the node in the device tree.
400     MissingNode,
401 
402     /// Resource was already found.
403     ResourceAlreadyExists,
404 
405     /// Expected resources for virtio-pmem could not be found.
406     MissingVirtioPmemResources,
407 
408     /// Missing PCI b/d/f from the DeviceNode.
409     MissingDeviceNodePciBdf,
410 
411     /// No support for device passthrough
412     NoDevicePassthroughSupport,
413 
414     /// No socket option support for console device
415     NoSocketOptionSupportForConsoleDevice,
416 
417     /// Failed to resize virtio-balloon
418     VirtioBalloonResize(virtio_devices::balloon::Error),
419 
420     /// Missing virtio-balloon, can't proceed as expected.
421     MissingVirtioBalloon,
422 
423     /// Missing virtual IOMMU device
424     MissingVirtualIommu,
425 
426     /// Failed to do power button notification
427     PowerButtonNotification(io::Error),
428 
429     /// Failed to do AArch64 GPIO power button notification
430     #[cfg(target_arch = "aarch64")]
431     AArch64PowerButtonNotification(devices::legacy::GpioDeviceError),
432 
433     /// Failed to set O_DIRECT flag to file descriptor
434     SetDirectIo,
435 
436     /// Failed to create FixedVhdDiskAsync
437     CreateFixedVhdDiskAsync(io::Error),
438 
439     /// Failed to create FixedVhdDiskSync
440     CreateFixedVhdDiskSync(io::Error),
441 
442     /// Failed to create QcowDiskSync
443     CreateQcowDiskSync(qcow::Error),
444 
445     /// Failed to create FixedVhdxDiskSync
446     CreateFixedVhdxDiskSync(vhdx::VhdxError),
447 
448     /// Failed to add DMA mapping handler to virtio-mem device.
449     AddDmaMappingHandlerVirtioMem(virtio_devices::mem::Error),
450 
451     /// Failed to remove DMA mapping handler from virtio-mem device.
452     RemoveDmaMappingHandlerVirtioMem(virtio_devices::mem::Error),
453 
454     /// Failed to create vfio-user client
455     VfioUserCreateClient(vfio_user::Error),
456 
457     /// Failed to create VFIO user device
458     VfioUserCreate(VfioUserPciDeviceError),
459 
460     /// Failed to map region from VFIO user device into guest
461     VfioUserMapRegion(VfioUserPciDeviceError),
462 
463     /// Failed to DMA map VFIO user device.
464     VfioUserDmaMap(VfioUserPciDeviceError),
465 
466     /// Failed to DMA unmap VFIO user device.
467     VfioUserDmaUnmap(VfioUserPciDeviceError),
468 
469     /// Failed to update memory mappings for VFIO user device
470     UpdateMemoryForVfioUserPciDevice(VfioUserPciDeviceError),
471 
472     /// Cannot duplicate file descriptor
473     DupFd(vmm_sys_util::errno::Error),
474 
475     /// Failed to DMA map virtio device.
476     VirtioDmaMap(std::io::Error),
477 
478     /// Failed to DMA unmap virtio device.
479     VirtioDmaUnmap(std::io::Error),
480 
481     /// Cannot hotplug device behind vIOMMU
482     InvalidIommuHotplug,
483 
484     /// Invalid identifier as it is not unique.
485     IdentifierNotUnique(String),
486 
487     /// Invalid identifier
488     InvalidIdentifier(String),
489 
490     /// Error activating virtio device
491     VirtioActivate(ActivateError),
492 
493     /// Failed retrieving device state from snapshot
494     RestoreGetState(MigratableError),
495 
496     /// Cannot create a PvPanic device
497     PvPanicCreate(devices::pvpanic::PvPanicError),
498 
499     /// Cannot create a RateLimiterGroup
500     RateLimiterGroupCreate(rate_limiter::group::Error),
501 
502     /// Cannot start sigwinch listener
503     StartSigwinchListener(std::io::Error),
504 
505     // Invalid console info
506     InvalidConsoleInfo,
507 
508     // Invalid console fd
509     InvalidConsoleFd,
510 }
511 
512 pub type DeviceManagerResult<T> = result::Result<T, DeviceManagerError>;
513 
514 const DEVICE_MANAGER_ACPI_SIZE: usize = 0x10;
515 
516 #[derive(Default)]
517 pub struct Console {
518     console_resizer: Option<Arc<virtio_devices::ConsoleResizer>>,
519 }
520 
521 impl Console {
522     pub fn need_resize(&self) -> bool {
523         if let Some(_resizer) = self.console_resizer.as_ref() {
524             return true;
525         }
526 
527         false
528     }
529 
530     pub fn update_console_size(&self) {
531         if let Some(resizer) = self.console_resizer.as_ref() {
532             resizer.update_console_size()
533         }
534     }
535 }
536 
537 pub(crate) struct AddressManager {
538     pub(crate) allocator: Arc<Mutex<SystemAllocator>>,
539     pub(crate) io_bus: Arc<Bus>,
540     pub(crate) mmio_bus: Arc<Bus>,
541     pub(crate) vm: Arc<dyn hypervisor::Vm>,
542     device_tree: Arc<Mutex<DeviceTree>>,
543     pci_mmio32_allocators: Vec<Arc<Mutex<AddressAllocator>>>,
544     pci_mmio64_allocators: Vec<Arc<Mutex<AddressAllocator>>>,
545 }
546 
547 impl DeviceRelocation for AddressManager {
548     fn move_bar(
549         &self,
550         old_base: u64,
551         new_base: u64,
552         len: u64,
553         pci_dev: &mut dyn PciDevice,
554         region_type: PciBarRegionType,
555     ) -> std::result::Result<(), std::io::Error> {
556         match region_type {
557             PciBarRegionType::IoRegion => {
558                 // Update system allocator
559                 self.allocator
560                     .lock()
561                     .unwrap()
562                     .free_io_addresses(GuestAddress(old_base), len as GuestUsize);
563 
564                 self.allocator
565                     .lock()
566                     .unwrap()
567                     .allocate_io_addresses(Some(GuestAddress(new_base)), len as GuestUsize, None)
568                     .ok_or_else(|| {
569                         io::Error::new(io::ErrorKind::Other, "failed allocating new IO range")
570                     })?;
571 
572                 // Update PIO bus
573                 self.io_bus
574                     .update_range(old_base, len, new_base, len)
575                     .map_err(|e| io::Error::new(io::ErrorKind::Other, e))?;
576             }
577             PciBarRegionType::Memory32BitRegion | PciBarRegionType::Memory64BitRegion => {
578                 let allocators = if region_type == PciBarRegionType::Memory32BitRegion {
579                     &self.pci_mmio32_allocators
580                 } else {
581                     &self.pci_mmio64_allocators
582                 };
583 
584                 // Find the specific allocator that this BAR was allocated from and use it for new one
585                 for allocator in allocators {
586                     let allocator_base = allocator.lock().unwrap().base();
587                     let allocator_end = allocator.lock().unwrap().end();
588 
589                     if old_base >= allocator_base.0 && old_base <= allocator_end.0 {
590                         allocator
591                             .lock()
592                             .unwrap()
593                             .free(GuestAddress(old_base), len as GuestUsize);
594 
595                         allocator
596                             .lock()
597                             .unwrap()
598                             .allocate(Some(GuestAddress(new_base)), len as GuestUsize, Some(len))
599                             .ok_or_else(|| {
600                                 io::Error::new(
601                                     io::ErrorKind::Other,
602                                     "failed allocating new MMIO range",
603                                 )
604                             })?;
605 
606                         break;
607                     }
608                 }
609 
610                 // Update MMIO bus
611                 self.mmio_bus
612                     .update_range(old_base, len, new_base, len)
613                     .map_err(|e| io::Error::new(io::ErrorKind::Other, e))?;
614             }
615         }
616 
617         // Update the device_tree resources associated with the device
618         if let Some(id) = pci_dev.id() {
619             if let Some(node) = self.device_tree.lock().unwrap().get_mut(&id) {
620                 let mut resource_updated = false;
621                 for resource in node.resources.iter_mut() {
622                     if let Resource::PciBar { base, type_, .. } = resource {
623                         if PciBarRegionType::from(*type_) == region_type && *base == old_base {
624                             *base = new_base;
625                             resource_updated = true;
626                             break;
627                         }
628                     }
629                 }
630 
631                 if !resource_updated {
632                     return Err(io::Error::new(
633                         io::ErrorKind::Other,
634                         format!(
635                             "Couldn't find a resource with base 0x{old_base:x} for device {id}"
636                         ),
637                     ));
638                 }
639             } else {
640                 return Err(io::Error::new(
641                     io::ErrorKind::Other,
642                     format!("Couldn't find device {id} from device tree"),
643                 ));
644             }
645         }
646 
647         let any_dev = pci_dev.as_any();
648         if let Some(virtio_pci_dev) = any_dev.downcast_ref::<VirtioPciDevice>() {
649             let bar_addr = virtio_pci_dev.config_bar_addr();
650             if bar_addr == new_base {
651                 for (event, addr) in virtio_pci_dev.ioeventfds(old_base) {
652                     let io_addr = IoEventAddress::Mmio(addr);
653                     self.vm.unregister_ioevent(event, &io_addr).map_err(|e| {
654                         io::Error::new(
655                             io::ErrorKind::Other,
656                             format!("failed to unregister ioevent: {e:?}"),
657                         )
658                     })?;
659                 }
660                 for (event, addr) in virtio_pci_dev.ioeventfds(new_base) {
661                     let io_addr = IoEventAddress::Mmio(addr);
662                     self.vm
663                         .register_ioevent(event, &io_addr, None)
664                         .map_err(|e| {
665                             io::Error::new(
666                                 io::ErrorKind::Other,
667                                 format!("failed to register ioevent: {e:?}"),
668                             )
669                         })?;
670                 }
671             } else {
672                 let virtio_dev = virtio_pci_dev.virtio_device();
673                 let mut virtio_dev = virtio_dev.lock().unwrap();
674                 if let Some(mut shm_regions) = virtio_dev.get_shm_regions() {
675                     if shm_regions.addr.raw_value() == old_base {
676                         let mem_region = self.vm.make_user_memory_region(
677                             shm_regions.mem_slot,
678                             old_base,
679                             shm_regions.len,
680                             shm_regions.host_addr,
681                             false,
682                             false,
683                         );
684 
685                         self.vm.remove_user_memory_region(mem_region).map_err(|e| {
686                             io::Error::new(
687                                 io::ErrorKind::Other,
688                                 format!("failed to remove user memory region: {e:?}"),
689                             )
690                         })?;
691 
692                         // Create new mapping by inserting new region to KVM.
693                         let mem_region = self.vm.make_user_memory_region(
694                             shm_regions.mem_slot,
695                             new_base,
696                             shm_regions.len,
697                             shm_regions.host_addr,
698                             false,
699                             false,
700                         );
701 
702                         self.vm.create_user_memory_region(mem_region).map_err(|e| {
703                             io::Error::new(
704                                 io::ErrorKind::Other,
705                                 format!("failed to create user memory regions: {e:?}"),
706                             )
707                         })?;
708 
709                         // Update shared memory regions to reflect the new mapping.
710                         shm_regions.addr = GuestAddress(new_base);
711                         virtio_dev.set_shm_regions(shm_regions).map_err(|e| {
712                             io::Error::new(
713                                 io::ErrorKind::Other,
714                                 format!("failed to update shared memory regions: {e:?}"),
715                             )
716                         })?;
717                     }
718                 }
719             }
720         }
721 
722         pci_dev.move_bar(old_base, new_base)
723     }
724 }
725 
726 #[derive(Serialize, Deserialize)]
727 struct DeviceManagerState {
728     device_tree: DeviceTree,
729     device_id_cnt: Wrapping<usize>,
730 }
731 
732 #[derive(Debug)]
733 pub struct PtyPair {
734     pub main: File,
735     pub path: PathBuf,
736 }
737 
738 impl Clone for PtyPair {
739     fn clone(&self) -> Self {
740         PtyPair {
741             main: self.main.try_clone().unwrap(),
742             path: self.path.clone(),
743         }
744     }
745 }
746 
747 #[derive(Clone)]
748 pub enum PciDeviceHandle {
749     Vfio(Arc<Mutex<VfioPciDevice>>),
750     Virtio(Arc<Mutex<VirtioPciDevice>>),
751     VfioUser(Arc<Mutex<VfioUserPciDevice>>),
752 }
753 
754 #[derive(Clone)]
755 struct MetaVirtioDevice {
756     virtio_device: Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
757     iommu: bool,
758     id: String,
759     pci_segment: u16,
760     dma_handler: Option<Arc<dyn ExternalDmaMapping>>,
761 }
762 
763 #[derive(Default)]
764 pub struct AcpiPlatformAddresses {
765     pub pm_timer_address: Option<GenericAddress>,
766     pub reset_reg_address: Option<GenericAddress>,
767     pub sleep_control_reg_address: Option<GenericAddress>,
768     pub sleep_status_reg_address: Option<GenericAddress>,
769 }
770 
771 #[cfg(all(feature = "mshv", feature = "sev_snp"))]
772 struct SevSnpPageAccessProxy {
773     vm: Arc<dyn hypervisor::Vm>,
774 }
775 
776 #[cfg(all(feature = "mshv", feature = "sev_snp"))]
777 impl std::fmt::Debug for SevSnpPageAccessProxy {
778     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
779         write!(f, "SNP Page access proxy")
780     }
781 }
782 
783 #[cfg(all(feature = "mshv", feature = "sev_snp"))]
784 impl SevSnpPageAccessProxy {
785     fn new(vm: Arc<dyn hypervisor::Vm>) -> SevSnpPageAccessProxy {
786         SevSnpPageAccessProxy { vm }
787     }
788 }
789 
790 #[cfg(all(feature = "mshv", feature = "sev_snp"))]
791 impl AccessPlatform for SevSnpPageAccessProxy {
792     fn translate_gpa(&self, base: u64, _size: u64) -> std::result::Result<u64, std::io::Error> {
793         Ok(base)
794     }
795 
796     fn translate_gva(&self, base: u64, size: u64) -> std::result::Result<u64, std::io::Error> {
797         self.vm
798             .gain_page_access(base, size as u32)
799             .map_err(|e| io::Error::new(io::ErrorKind::Other, e))?;
800         Ok(base)
801     }
802 }
803 
804 pub struct DeviceManager {
805     // Manage address space related to devices
806     address_manager: Arc<AddressManager>,
807 
808     // Console abstraction
809     console: Arc<Console>,
810 
811     // Serial Manager
812     serial_manager: Option<Arc<SerialManager>>,
813 
814     // pty foreground status,
815     console_resize_pipe: Option<Arc<File>>,
816 
817     // To restore on exit.
818     original_termios_opt: Arc<Mutex<Option<termios>>>,
819 
820     // Interrupt controller
821     #[cfg(target_arch = "x86_64")]
822     interrupt_controller: Option<Arc<Mutex<ioapic::Ioapic>>>,
823     #[cfg(target_arch = "aarch64")]
824     interrupt_controller: Option<Arc<Mutex<gic::Gic>>>,
825 
826     // Things to be added to the commandline (e.g. aarch64 early console)
827     #[cfg(target_arch = "aarch64")]
828     cmdline_additions: Vec<String>,
829 
830     // ACPI GED notification device
831     ged_notification_device: Option<Arc<Mutex<devices::AcpiGedDevice>>>,
832 
833     // VM configuration
834     config: Arc<Mutex<VmConfig>>,
835 
836     // Memory Manager
837     memory_manager: Arc<Mutex<MemoryManager>>,
838 
839     // CPU Manager
840     cpu_manager: Arc<Mutex<CpuManager>>,
841 
842     // The virtio devices on the system
843     virtio_devices: Vec<MetaVirtioDevice>,
844 
845     // List of bus devices
846     // Let the DeviceManager keep strong references to the BusDevice devices.
847     // This allows the IO and MMIO buses to be provided with Weak references,
848     // which prevents cyclic dependencies.
849     bus_devices: Vec<Arc<dyn BusDeviceSync>>,
850 
851     // Counter to keep track of the consumed device IDs.
852     device_id_cnt: Wrapping<usize>,
853 
854     pci_segments: Vec<PciSegment>,
855 
856     #[cfg_attr(target_arch = "aarch64", allow(dead_code))]
857     // MSI Interrupt Manager
858     msi_interrupt_manager: Arc<dyn InterruptManager<GroupConfig = MsiIrqGroupConfig>>,
859 
860     #[cfg_attr(feature = "mshv", allow(dead_code))]
861     // Legacy Interrupt Manager
862     legacy_interrupt_manager: Option<Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>>,
863 
864     // Passthrough device handle
865     passthrough_device: Option<VfioDeviceFd>,
866 
867     // VFIO container
868     // Only one container can be created, therefore it is stored as part of the
869     // DeviceManager to be reused.
870     vfio_container: Option<Arc<VfioContainer>>,
871 
872     // Paravirtualized IOMMU
873     iommu_device: Option<Arc<Mutex<virtio_devices::Iommu>>>,
874     iommu_mapping: Option<Arc<IommuMapping>>,
875 
876     // PCI information about devices attached to the paravirtualized IOMMU
877     // It contains the virtual IOMMU PCI BDF along with the list of PCI BDF
878     // representing the devices attached to the virtual IOMMU. This is useful
879     // information for filling the ACPI VIOT table.
880     iommu_attached_devices: Option<(PciBdf, Vec<PciBdf>)>,
881 
882     // Tree of devices, representing the dependencies between devices.
883     // Useful for introspection, snapshot and restore.
884     device_tree: Arc<Mutex<DeviceTree>>,
885 
886     // Exit event
887     exit_evt: EventFd,
888     reset_evt: EventFd,
889 
890     #[cfg(target_arch = "aarch64")]
891     id_to_dev_info: HashMap<(DeviceType, String), MmioDeviceInfo>,
892 
893     // seccomp action
894     seccomp_action: SeccompAction,
895 
896     // List of guest NUMA nodes.
897     numa_nodes: NumaNodes,
898 
899     // Possible handle to the virtio-balloon device
900     balloon: Option<Arc<Mutex<virtio_devices::Balloon>>>,
901 
902     // Virtio Device activation EventFd to allow the VMM thread to trigger device
903     // activation and thus start the threads from the VMM thread
904     activate_evt: EventFd,
905 
906     acpi_address: GuestAddress,
907 
908     selected_segment: usize,
909 
910     // Possible handle to the virtio-mem device
911     virtio_mem_devices: Vec<Arc<Mutex<virtio_devices::Mem>>>,
912 
913     #[cfg(target_arch = "aarch64")]
914     // GPIO device for AArch64
915     gpio_device: Option<Arc<Mutex<devices::legacy::Gpio>>>,
916 
917     #[cfg(feature = "pvmemcontrol")]
918     pvmemcontrol_devices: Option<(
919         Arc<PvmemcontrolBusDevice>,
920         Arc<Mutex<PvmemcontrolPciDevice>>,
921     )>,
922 
923     // pvpanic device
924     pvpanic_device: Option<Arc<Mutex<devices::PvPanicDevice>>>,
925 
926     // Flag to force setting the iommu on virtio devices
927     force_iommu: bool,
928 
929     // io_uring availability if detected
930     io_uring_supported: Option<bool>,
931 
932     // aio availability if detected
933     aio_supported: Option<bool>,
934 
935     // List of unique identifiers provided at boot through the configuration.
936     boot_id_list: BTreeSet<String>,
937 
938     // Start time of the VM
939     timestamp: Instant,
940 
941     // Pending activations
942     pending_activations: Arc<Mutex<Vec<VirtioPciDeviceActivator>>>,
943 
944     // Addresses for ACPI platform devices e.g. ACPI PM timer, sleep/reset registers
945     acpi_platform_addresses: AcpiPlatformAddresses,
946 
947     snapshot: Option<Snapshot>,
948 
949     rate_limit_groups: HashMap<String, Arc<RateLimiterGroup>>,
950 
951     mmio_regions: Arc<Mutex<Vec<MmioRegion>>>,
952 }
953 
954 fn create_mmio_allocators(
955     start: u64,
956     end: u64,
957     num_pci_segments: u16,
958     weights: Vec<u32>,
959     alignment: u64,
960 ) -> Vec<Arc<Mutex<AddressAllocator>>> {
961     let total_weight: u32 = weights.iter().sum();
962 
963     // Start each PCI segment mmio range on an aligned boundary
964     let pci_segment_mmio_size = (end - start + 1) / (alignment * total_weight as u64) * alignment;
965 
966     let mut mmio_allocators = vec![];
967     let mut i = 0;
968     for segment_id in 0..num_pci_segments as u64 {
969         let weight = weights[segment_id as usize] as u64;
970         let mmio_start = start + i * pci_segment_mmio_size;
971         let mmio_size = pci_segment_mmio_size * weight;
972         let allocator = Arc::new(Mutex::new(
973             AddressAllocator::new(GuestAddress(mmio_start), mmio_size).unwrap(),
974         ));
975         mmio_allocators.push(allocator);
976         i += weight;
977     }
978 
979     mmio_allocators
980 }
981 
982 impl DeviceManager {
983     #[allow(clippy::too_many_arguments)]
984     pub fn new(
985         io_bus: Arc<Bus>,
986         mmio_bus: Arc<Bus>,
987         vm: Arc<dyn hypervisor::Vm>,
988         config: Arc<Mutex<VmConfig>>,
989         memory_manager: Arc<Mutex<MemoryManager>>,
990         cpu_manager: Arc<Mutex<CpuManager>>,
991         exit_evt: EventFd,
992         reset_evt: EventFd,
993         seccomp_action: SeccompAction,
994         numa_nodes: NumaNodes,
995         activate_evt: &EventFd,
996         force_iommu: bool,
997         boot_id_list: BTreeSet<String>,
998         timestamp: Instant,
999         snapshot: Option<Snapshot>,
1000         dynamic: bool,
1001     ) -> DeviceManagerResult<Arc<Mutex<Self>>> {
1002         trace_scoped!("DeviceManager::new");
1003 
1004         let (device_tree, device_id_cnt) = if let Some(snapshot) = snapshot.as_ref() {
1005             let state: DeviceManagerState = snapshot.to_state().unwrap();
1006             (
1007                 Arc::new(Mutex::new(state.device_tree.clone())),
1008                 state.device_id_cnt,
1009             )
1010         } else {
1011             (Arc::new(Mutex::new(DeviceTree::new())), Wrapping(0))
1012         };
1013 
1014         let num_pci_segments =
1015             if let Some(platform_config) = config.lock().unwrap().platform.as_ref() {
1016                 platform_config.num_pci_segments
1017             } else {
1018                 1
1019             };
1020 
1021         let mut mmio32_aperture_weights: Vec<u32> =
1022             std::iter::repeat(DEFAULT_PCI_SEGMENT_APERTURE_WEIGHT)
1023                 .take(num_pci_segments.into())
1024                 .collect();
1025         if let Some(pci_segments) = &config.lock().unwrap().pci_segments {
1026             for pci_segment in pci_segments.iter() {
1027                 mmio32_aperture_weights[pci_segment.pci_segment as usize] =
1028                     pci_segment.mmio32_aperture_weight
1029             }
1030         }
1031 
1032         let start_of_mmio32_area = layout::MEM_32BIT_DEVICES_START.0;
1033         let end_of_mmio32_area = layout::MEM_32BIT_DEVICES_START.0 + layout::MEM_32BIT_DEVICES_SIZE;
1034         let pci_mmio32_allocators = create_mmio_allocators(
1035             start_of_mmio32_area,
1036             end_of_mmio32_area,
1037             num_pci_segments,
1038             mmio32_aperture_weights,
1039             4 << 10,
1040         );
1041 
1042         let mut mmio64_aperture_weights: Vec<u32> =
1043             std::iter::repeat(DEFAULT_PCI_SEGMENT_APERTURE_WEIGHT)
1044                 .take(num_pci_segments.into())
1045                 .collect();
1046         if let Some(pci_segments) = &config.lock().unwrap().pci_segments {
1047             for pci_segment in pci_segments.iter() {
1048                 mmio64_aperture_weights[pci_segment.pci_segment as usize] =
1049                     pci_segment.mmio64_aperture_weight
1050             }
1051         }
1052 
1053         let start_of_mmio64_area = memory_manager.lock().unwrap().start_of_device_area().0;
1054         let end_of_mmio64_area = memory_manager.lock().unwrap().end_of_device_area().0;
1055         let pci_mmio64_allocators = create_mmio_allocators(
1056             start_of_mmio64_area,
1057             end_of_mmio64_area,
1058             num_pci_segments,
1059             mmio64_aperture_weights,
1060             4 << 30,
1061         );
1062 
1063         let address_manager = Arc::new(AddressManager {
1064             allocator: memory_manager.lock().unwrap().allocator(),
1065             io_bus,
1066             mmio_bus,
1067             vm: vm.clone(),
1068             device_tree: Arc::clone(&device_tree),
1069             pci_mmio32_allocators,
1070             pci_mmio64_allocators,
1071         });
1072 
1073         // First we create the MSI interrupt manager, the legacy one is created
1074         // later, after the IOAPIC device creation.
1075         // The reason we create the MSI one first is because the IOAPIC needs it,
1076         // and then the legacy interrupt manager needs an IOAPIC. So we're
1077         // handling a linear dependency chain:
1078         // msi_interrupt_manager <- IOAPIC <- legacy_interrupt_manager.
1079         let msi_interrupt_manager: Arc<dyn InterruptManager<GroupConfig = MsiIrqGroupConfig>> =
1080             Arc::new(MsiInterruptManager::new(
1081                 Arc::clone(&address_manager.allocator),
1082                 vm,
1083             ));
1084 
1085         let acpi_address = address_manager
1086             .allocator
1087             .lock()
1088             .unwrap()
1089             .allocate_platform_mmio_addresses(None, DEVICE_MANAGER_ACPI_SIZE as u64, None)
1090             .ok_or(DeviceManagerError::AllocateIoPort)?;
1091 
1092         let mut pci_irq_slots = [0; 32];
1093         PciSegment::reserve_legacy_interrupts_for_pci_devices(
1094             &address_manager,
1095             &mut pci_irq_slots,
1096         )?;
1097 
1098         let mut pci_segments = vec![PciSegment::new_default_segment(
1099             &address_manager,
1100             Arc::clone(&address_manager.pci_mmio32_allocators[0]),
1101             Arc::clone(&address_manager.pci_mmio64_allocators[0]),
1102             &pci_irq_slots,
1103         )?];
1104 
1105         for i in 1..num_pci_segments as usize {
1106             pci_segments.push(PciSegment::new(
1107                 i as u16,
1108                 numa_node_id_from_pci_segment_id(&numa_nodes, i as u16),
1109                 &address_manager,
1110                 Arc::clone(&address_manager.pci_mmio32_allocators[i]),
1111                 Arc::clone(&address_manager.pci_mmio64_allocators[i]),
1112                 &pci_irq_slots,
1113             )?);
1114         }
1115 
1116         if dynamic {
1117             let acpi_address = address_manager
1118                 .allocator
1119                 .lock()
1120                 .unwrap()
1121                 .allocate_platform_mmio_addresses(None, CPU_MANAGER_ACPI_SIZE as u64, None)
1122                 .ok_or(DeviceManagerError::AllocateMmioAddress)?;
1123 
1124             address_manager
1125                 .mmio_bus
1126                 .insert(
1127                     cpu_manager.clone(),
1128                     acpi_address.0,
1129                     CPU_MANAGER_ACPI_SIZE as u64,
1130                 )
1131                 .map_err(DeviceManagerError::BusError)?;
1132 
1133             cpu_manager.lock().unwrap().set_acpi_address(acpi_address);
1134         }
1135 
1136         let mut rate_limit_groups = HashMap::<String, Arc<RateLimiterGroup>>::new();
1137         if let Some(rate_limit_groups_cfg) = config.lock().unwrap().rate_limit_groups.as_ref() {
1138             for rate_limit_group_cfg in rate_limit_groups_cfg {
1139                 let rate_limit_cfg = rate_limit_group_cfg.rate_limiter_config;
1140                 let bw = rate_limit_cfg.bandwidth.unwrap_or_default();
1141                 let ops = rate_limit_cfg.ops.unwrap_or_default();
1142                 let mut rate_limit_group = RateLimiterGroup::new(
1143                     &rate_limit_group_cfg.id,
1144                     bw.size,
1145                     bw.one_time_burst.unwrap_or(0),
1146                     bw.refill_time,
1147                     ops.size,
1148                     ops.one_time_burst.unwrap_or(0),
1149                     ops.refill_time,
1150                 )
1151                 .map_err(DeviceManagerError::RateLimiterGroupCreate)?;
1152 
1153                 let exit_evt = exit_evt.try_clone().map_err(DeviceManagerError::EventFd)?;
1154 
1155                 rate_limit_group.start_thread(exit_evt).unwrap();
1156                 rate_limit_groups
1157                     .insert(rate_limit_group_cfg.id.clone(), Arc::new(rate_limit_group));
1158             }
1159         }
1160 
1161         let device_manager = DeviceManager {
1162             address_manager: Arc::clone(&address_manager),
1163             console: Arc::new(Console::default()),
1164             interrupt_controller: None,
1165             #[cfg(target_arch = "aarch64")]
1166             cmdline_additions: Vec::new(),
1167             ged_notification_device: None,
1168             config,
1169             memory_manager,
1170             cpu_manager,
1171             virtio_devices: Vec::new(),
1172             bus_devices: Vec::new(),
1173             device_id_cnt,
1174             msi_interrupt_manager,
1175             legacy_interrupt_manager: None,
1176             passthrough_device: None,
1177             vfio_container: None,
1178             iommu_device: None,
1179             iommu_mapping: None,
1180             iommu_attached_devices: None,
1181             pci_segments,
1182             device_tree,
1183             exit_evt,
1184             reset_evt,
1185             #[cfg(target_arch = "aarch64")]
1186             id_to_dev_info: HashMap::new(),
1187             seccomp_action,
1188             numa_nodes,
1189             balloon: None,
1190             activate_evt: activate_evt
1191                 .try_clone()
1192                 .map_err(DeviceManagerError::EventFd)?,
1193             acpi_address,
1194             selected_segment: 0,
1195             serial_manager: None,
1196             console_resize_pipe: None,
1197             original_termios_opt: Arc::new(Mutex::new(None)),
1198             virtio_mem_devices: Vec::new(),
1199             #[cfg(target_arch = "aarch64")]
1200             gpio_device: None,
1201             #[cfg(feature = "pvmemcontrol")]
1202             pvmemcontrol_devices: None,
1203             pvpanic_device: None,
1204             force_iommu,
1205             io_uring_supported: None,
1206             aio_supported: None,
1207             boot_id_list,
1208             timestamp,
1209             pending_activations: Arc::new(Mutex::new(Vec::default())),
1210             acpi_platform_addresses: AcpiPlatformAddresses::default(),
1211             snapshot,
1212             rate_limit_groups,
1213             mmio_regions: Arc::new(Mutex::new(Vec::new())),
1214         };
1215 
1216         let device_manager = Arc::new(Mutex::new(device_manager));
1217 
1218         address_manager
1219             .mmio_bus
1220             .insert(
1221                 Arc::clone(&device_manager) as Arc<dyn BusDeviceSync>,
1222                 acpi_address.0,
1223                 DEVICE_MANAGER_ACPI_SIZE as u64,
1224             )
1225             .map_err(DeviceManagerError::BusError)?;
1226 
1227         Ok(device_manager)
1228     }
1229 
1230     pub fn console_resize_pipe(&self) -> Option<Arc<File>> {
1231         self.console_resize_pipe.clone()
1232     }
1233 
1234     pub fn create_devices(
1235         &mut self,
1236         console_info: Option<ConsoleInfo>,
1237         console_resize_pipe: Option<Arc<File>>,
1238         original_termios_opt: Arc<Mutex<Option<termios>>>,
1239     ) -> DeviceManagerResult<()> {
1240         trace_scoped!("create_devices");
1241 
1242         let mut virtio_devices: Vec<MetaVirtioDevice> = Vec::new();
1243 
1244         let interrupt_controller = self.add_interrupt_controller()?;
1245 
1246         self.cpu_manager
1247             .lock()
1248             .unwrap()
1249             .set_interrupt_controller(interrupt_controller.clone());
1250 
1251         // Now we can create the legacy interrupt manager, which needs the freshly
1252         // formed IOAPIC device.
1253         let legacy_interrupt_manager: Arc<
1254             dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>,
1255         > = Arc::new(LegacyUserspaceInterruptManager::new(Arc::clone(
1256             &interrupt_controller,
1257         )));
1258 
1259         {
1260             if let Some(acpi_address) = self.memory_manager.lock().unwrap().acpi_address() {
1261                 self.address_manager
1262                     .mmio_bus
1263                     .insert(
1264                         Arc::clone(&self.memory_manager) as Arc<dyn BusDeviceSync>,
1265                         acpi_address.0,
1266                         MEMORY_MANAGER_ACPI_SIZE as u64,
1267                     )
1268                     .map_err(DeviceManagerError::BusError)?;
1269             }
1270         }
1271 
1272         #[cfg(target_arch = "x86_64")]
1273         self.add_legacy_devices(
1274             self.reset_evt
1275                 .try_clone()
1276                 .map_err(DeviceManagerError::EventFd)?,
1277         )?;
1278 
1279         #[cfg(target_arch = "aarch64")]
1280         self.add_legacy_devices(&legacy_interrupt_manager)?;
1281 
1282         {
1283             self.ged_notification_device = self.add_acpi_devices(
1284                 &legacy_interrupt_manager,
1285                 self.reset_evt
1286                     .try_clone()
1287                     .map_err(DeviceManagerError::EventFd)?,
1288                 self.exit_evt
1289                     .try_clone()
1290                     .map_err(DeviceManagerError::EventFd)?,
1291             )?;
1292         }
1293 
1294         self.original_termios_opt = original_termios_opt;
1295 
1296         self.console = self.add_console_devices(
1297             &legacy_interrupt_manager,
1298             &mut virtio_devices,
1299             console_info,
1300             console_resize_pipe,
1301         )?;
1302 
1303         if let Some(tpm) = self.config.clone().lock().unwrap().tpm.as_ref() {
1304             let tpm_dev = self.add_tpm_device(tpm.socket.clone())?;
1305             self.bus_devices
1306                 .push(Arc::clone(&tpm_dev) as Arc<dyn BusDeviceSync>)
1307         }
1308         self.legacy_interrupt_manager = Some(legacy_interrupt_manager);
1309 
1310         virtio_devices.append(&mut self.make_virtio_devices()?);
1311 
1312         self.add_pci_devices(virtio_devices.clone())?;
1313 
1314         self.virtio_devices = virtio_devices;
1315 
1316         // Add pvmemcontrol if required
1317         #[cfg(feature = "pvmemcontrol")]
1318         {
1319             if self.config.lock().unwrap().pvmemcontrol.is_some() {
1320                 let (pvmemcontrol_bus_device, pvmemcontrol_pci_device) =
1321                     self.make_pvmemcontrol_device()?;
1322                 self.pvmemcontrol_devices =
1323                     Some((pvmemcontrol_bus_device, pvmemcontrol_pci_device));
1324             }
1325         }
1326 
1327         if self.config.clone().lock().unwrap().pvpanic {
1328             self.pvpanic_device = self.add_pvpanic_device()?;
1329         }
1330 
1331         Ok(())
1332     }
1333 
1334     fn state(&self) -> DeviceManagerState {
1335         DeviceManagerState {
1336             device_tree: self.device_tree.lock().unwrap().clone(),
1337             device_id_cnt: self.device_id_cnt,
1338         }
1339     }
1340 
1341     fn get_msi_iova_space(&mut self) -> (u64, u64) {
1342         #[cfg(target_arch = "aarch64")]
1343         {
1344             let vcpus = self.config.lock().unwrap().cpus.boot_vcpus;
1345             let vgic_config = gic::Gic::create_default_config(vcpus.into());
1346             (
1347                 vgic_config.msi_addr,
1348                 vgic_config.msi_addr + vgic_config.msi_size - 1,
1349             )
1350         }
1351         #[cfg(target_arch = "x86_64")]
1352         (0xfee0_0000, 0xfeef_ffff)
1353     }
1354 
1355     #[cfg(target_arch = "aarch64")]
1356     /// Gets the information of the devices registered up to some point in time.
1357     pub fn get_device_info(&self) -> &HashMap<(DeviceType, String), MmioDeviceInfo> {
1358         &self.id_to_dev_info
1359     }
1360 
1361     #[allow(unused_variables)]
1362     fn add_pci_devices(
1363         &mut self,
1364         virtio_devices: Vec<MetaVirtioDevice>,
1365     ) -> DeviceManagerResult<()> {
1366         let iommu_id = String::from(IOMMU_DEVICE_NAME);
1367 
1368         let iommu_device = if self.config.lock().unwrap().iommu {
1369             let (device, mapping) = virtio_devices::Iommu::new(
1370                 iommu_id.clone(),
1371                 self.seccomp_action.clone(),
1372                 self.exit_evt
1373                     .try_clone()
1374                     .map_err(DeviceManagerError::EventFd)?,
1375                 self.get_msi_iova_space(),
1376                 state_from_id(self.snapshot.as_ref(), iommu_id.as_str())
1377                     .map_err(DeviceManagerError::RestoreGetState)?,
1378             )
1379             .map_err(DeviceManagerError::CreateVirtioIommu)?;
1380             let device = Arc::new(Mutex::new(device));
1381             self.iommu_device = Some(Arc::clone(&device));
1382             self.iommu_mapping = Some(mapping);
1383 
1384             // Fill the device tree with a new node. In case of restore, we
1385             // know there is nothing to do, so we can simply override the
1386             // existing entry.
1387             self.device_tree
1388                 .lock()
1389                 .unwrap()
1390                 .insert(iommu_id.clone(), device_node!(iommu_id, device));
1391 
1392             Some(device)
1393         } else {
1394             None
1395         };
1396 
1397         let mut iommu_attached_devices = Vec::new();
1398         {
1399             for handle in virtio_devices {
1400                 let mapping: Option<Arc<IommuMapping>> = if handle.iommu {
1401                     self.iommu_mapping.clone()
1402                 } else {
1403                     None
1404                 };
1405 
1406                 let dev_id = self.add_virtio_pci_device(
1407                     handle.virtio_device,
1408                     &mapping,
1409                     handle.id,
1410                     handle.pci_segment,
1411                     handle.dma_handler,
1412                 )?;
1413 
1414                 if handle.iommu {
1415                     iommu_attached_devices.push(dev_id);
1416                 }
1417             }
1418 
1419             let mut vfio_iommu_device_ids = self.add_vfio_devices()?;
1420             iommu_attached_devices.append(&mut vfio_iommu_device_ids);
1421 
1422             let mut vfio_user_iommu_device_ids = self.add_user_devices()?;
1423             iommu_attached_devices.append(&mut vfio_user_iommu_device_ids);
1424 
1425             // Add all devices from forced iommu segments
1426             if let Some(platform_config) = self.config.lock().unwrap().platform.as_ref() {
1427                 if let Some(iommu_segments) = platform_config.iommu_segments.as_ref() {
1428                     for segment in iommu_segments {
1429                         for device in 0..32 {
1430                             let bdf = PciBdf::new(*segment, 0, device, 0);
1431                             if !iommu_attached_devices.contains(&bdf) {
1432                                 iommu_attached_devices.push(bdf);
1433                             }
1434                         }
1435                     }
1436                 }
1437             }
1438 
1439             if let Some(iommu_device) = iommu_device {
1440                 let dev_id = self.add_virtio_pci_device(iommu_device, &None, iommu_id, 0, None)?;
1441                 self.iommu_attached_devices = Some((dev_id, iommu_attached_devices));
1442             }
1443         }
1444 
1445         for segment in &self.pci_segments {
1446             #[cfg(target_arch = "x86_64")]
1447             if let Some(pci_config_io) = segment.pci_config_io.as_ref() {
1448                 self.bus_devices
1449                     .push(Arc::clone(pci_config_io) as Arc<dyn BusDeviceSync>);
1450             }
1451 
1452             self.bus_devices
1453                 .push(Arc::clone(&segment.pci_config_mmio) as Arc<dyn BusDeviceSync>);
1454         }
1455 
1456         Ok(())
1457     }
1458 
1459     #[cfg(target_arch = "aarch64")]
1460     fn add_interrupt_controller(
1461         &mut self,
1462     ) -> DeviceManagerResult<Arc<Mutex<dyn InterruptController>>> {
1463         let interrupt_controller: Arc<Mutex<gic::Gic>> = Arc::new(Mutex::new(
1464             gic::Gic::new(
1465                 self.config.lock().unwrap().cpus.boot_vcpus,
1466                 Arc::clone(&self.msi_interrupt_manager),
1467                 self.address_manager.vm.clone(),
1468             )
1469             .map_err(DeviceManagerError::CreateInterruptController)?,
1470         ));
1471 
1472         self.interrupt_controller = Some(interrupt_controller.clone());
1473 
1474         // Restore the vGic if this is in the process of restoration
1475         let id = String::from(gic::GIC_SNAPSHOT_ID);
1476         if let Some(vgic_snapshot) = snapshot_from_id(self.snapshot.as_ref(), &id) {
1477             // PMU support is optional. Nothing should be impacted if the PMU initialization failed.
1478             if self
1479                 .cpu_manager
1480                 .lock()
1481                 .unwrap()
1482                 .init_pmu(arch::aarch64::fdt::AARCH64_PMU_IRQ + 16)
1483                 .is_err()
1484             {
1485                 info!("Failed to initialize PMU");
1486             }
1487 
1488             let vgic_state = vgic_snapshot
1489                 .to_state()
1490                 .map_err(DeviceManagerError::RestoreGetState)?;
1491             let saved_vcpu_states = self.cpu_manager.lock().unwrap().get_saved_states();
1492             interrupt_controller
1493                 .lock()
1494                 .unwrap()
1495                 .restore_vgic(vgic_state, &saved_vcpu_states)
1496                 .unwrap();
1497         }
1498 
1499         self.device_tree
1500             .lock()
1501             .unwrap()
1502             .insert(id.clone(), device_node!(id, interrupt_controller));
1503 
1504         Ok(interrupt_controller)
1505     }
1506 
1507     #[cfg(target_arch = "aarch64")]
1508     pub fn get_interrupt_controller(&mut self) -> Option<&Arc<Mutex<gic::Gic>>> {
1509         self.interrupt_controller.as_ref()
1510     }
1511 
1512     #[cfg(target_arch = "x86_64")]
1513     fn add_interrupt_controller(
1514         &mut self,
1515     ) -> DeviceManagerResult<Arc<Mutex<dyn InterruptController>>> {
1516         let id = String::from(IOAPIC_DEVICE_NAME);
1517 
1518         // Create IOAPIC
1519         let interrupt_controller = Arc::new(Mutex::new(
1520             ioapic::Ioapic::new(
1521                 id.clone(),
1522                 APIC_START,
1523                 Arc::clone(&self.msi_interrupt_manager),
1524                 state_from_id(self.snapshot.as_ref(), id.as_str())
1525                     .map_err(DeviceManagerError::RestoreGetState)?,
1526             )
1527             .map_err(DeviceManagerError::CreateInterruptController)?,
1528         ));
1529 
1530         self.interrupt_controller = Some(interrupt_controller.clone());
1531 
1532         self.address_manager
1533             .mmio_bus
1534             .insert(interrupt_controller.clone(), IOAPIC_START.0, IOAPIC_SIZE)
1535             .map_err(DeviceManagerError::BusError)?;
1536 
1537         self.bus_devices
1538             .push(Arc::clone(&interrupt_controller) as Arc<dyn BusDeviceSync>);
1539 
1540         // Fill the device tree with a new node. In case of restore, we
1541         // know there is nothing to do, so we can simply override the
1542         // existing entry.
1543         self.device_tree
1544             .lock()
1545             .unwrap()
1546             .insert(id.clone(), device_node!(id, interrupt_controller));
1547 
1548         Ok(interrupt_controller)
1549     }
1550 
1551     fn add_acpi_devices(
1552         &mut self,
1553         interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>,
1554         reset_evt: EventFd,
1555         exit_evt: EventFd,
1556     ) -> DeviceManagerResult<Option<Arc<Mutex<devices::AcpiGedDevice>>>> {
1557         let vcpus_kill_signalled = self
1558             .cpu_manager
1559             .lock()
1560             .unwrap()
1561             .vcpus_kill_signalled()
1562             .clone();
1563         let shutdown_device = Arc::new(Mutex::new(devices::AcpiShutdownDevice::new(
1564             exit_evt,
1565             reset_evt,
1566             vcpus_kill_signalled,
1567         )));
1568 
1569         self.bus_devices
1570             .push(Arc::clone(&shutdown_device) as Arc<dyn BusDeviceSync>);
1571 
1572         #[cfg(target_arch = "x86_64")]
1573         {
1574             let shutdown_pio_address: u16 = 0x600;
1575 
1576             self.address_manager
1577                 .allocator
1578                 .lock()
1579                 .unwrap()
1580                 .allocate_io_addresses(Some(GuestAddress(shutdown_pio_address.into())), 0x8, None)
1581                 .ok_or(DeviceManagerError::AllocateIoPort)?;
1582 
1583             self.address_manager
1584                 .io_bus
1585                 .insert(shutdown_device, shutdown_pio_address.into(), 0x4)
1586                 .map_err(DeviceManagerError::BusError)?;
1587 
1588             self.acpi_platform_addresses.sleep_control_reg_address =
1589                 Some(GenericAddress::io_port_address::<u8>(shutdown_pio_address));
1590             self.acpi_platform_addresses.sleep_status_reg_address =
1591                 Some(GenericAddress::io_port_address::<u8>(shutdown_pio_address));
1592             self.acpi_platform_addresses.reset_reg_address =
1593                 Some(GenericAddress::io_port_address::<u8>(shutdown_pio_address));
1594         }
1595 
1596         let ged_irq = self
1597             .address_manager
1598             .allocator
1599             .lock()
1600             .unwrap()
1601             .allocate_irq()
1602             .unwrap();
1603         let interrupt_group = interrupt_manager
1604             .create_group(LegacyIrqGroupConfig {
1605                 irq: ged_irq as InterruptIndex,
1606             })
1607             .map_err(DeviceManagerError::CreateInterruptGroup)?;
1608         let ged_address = self
1609             .address_manager
1610             .allocator
1611             .lock()
1612             .unwrap()
1613             .allocate_platform_mmio_addresses(
1614                 None,
1615                 devices::acpi::GED_DEVICE_ACPI_SIZE as u64,
1616                 None,
1617             )
1618             .ok_or(DeviceManagerError::AllocateMmioAddress)?;
1619         let ged_device = Arc::new(Mutex::new(devices::AcpiGedDevice::new(
1620             interrupt_group,
1621             ged_irq,
1622             ged_address,
1623         )));
1624         self.address_manager
1625             .mmio_bus
1626             .insert(
1627                 ged_device.clone(),
1628                 ged_address.0,
1629                 devices::acpi::GED_DEVICE_ACPI_SIZE as u64,
1630             )
1631             .map_err(DeviceManagerError::BusError)?;
1632         self.bus_devices
1633             .push(Arc::clone(&ged_device) as Arc<dyn BusDeviceSync>);
1634 
1635         let pm_timer_device = Arc::new(Mutex::new(devices::AcpiPmTimerDevice::new()));
1636 
1637         self.bus_devices
1638             .push(Arc::clone(&pm_timer_device) as Arc<dyn BusDeviceSync>);
1639 
1640         #[cfg(target_arch = "x86_64")]
1641         {
1642             let pm_timer_pio_address: u16 = 0x608;
1643 
1644             self.address_manager
1645                 .allocator
1646                 .lock()
1647                 .unwrap()
1648                 .allocate_io_addresses(Some(GuestAddress(pm_timer_pio_address.into())), 0x4, None)
1649                 .ok_or(DeviceManagerError::AllocateIoPort)?;
1650 
1651             self.address_manager
1652                 .io_bus
1653                 .insert(pm_timer_device, pm_timer_pio_address.into(), 0x4)
1654                 .map_err(DeviceManagerError::BusError)?;
1655 
1656             self.acpi_platform_addresses.pm_timer_address =
1657                 Some(GenericAddress::io_port_address::<u32>(pm_timer_pio_address));
1658         }
1659 
1660         Ok(Some(ged_device))
1661     }
1662 
1663     #[cfg(target_arch = "x86_64")]
1664     fn add_legacy_devices(&mut self, reset_evt: EventFd) -> DeviceManagerResult<()> {
1665         let vcpus_kill_signalled = self
1666             .cpu_manager
1667             .lock()
1668             .unwrap()
1669             .vcpus_kill_signalled()
1670             .clone();
1671         // Add a shutdown device (i8042)
1672         let i8042 = Arc::new(Mutex::new(devices::legacy::I8042Device::new(
1673             reset_evt.try_clone().unwrap(),
1674             vcpus_kill_signalled.clone(),
1675         )));
1676 
1677         self.bus_devices
1678             .push(Arc::clone(&i8042) as Arc<dyn BusDeviceSync>);
1679 
1680         self.address_manager
1681             .io_bus
1682             .insert(i8042, 0x61, 0x4)
1683             .map_err(DeviceManagerError::BusError)?;
1684         {
1685             // Add a CMOS emulated device
1686             let mem_size = self
1687                 .memory_manager
1688                 .lock()
1689                 .unwrap()
1690                 .guest_memory()
1691                 .memory()
1692                 .last_addr()
1693                 .0
1694                 + 1;
1695             let mem_below_4g = std::cmp::min(arch::layout::MEM_32BIT_RESERVED_START.0, mem_size);
1696             let mem_above_4g = mem_size.saturating_sub(arch::layout::RAM_64BIT_START.0);
1697 
1698             let cmos = Arc::new(Mutex::new(devices::legacy::Cmos::new(
1699                 mem_below_4g,
1700                 mem_above_4g,
1701                 reset_evt,
1702                 Some(vcpus_kill_signalled),
1703             )));
1704 
1705             self.bus_devices
1706                 .push(Arc::clone(&cmos) as Arc<dyn BusDeviceSync>);
1707 
1708             self.address_manager
1709                 .io_bus
1710                 .insert(cmos, 0x70, 0x2)
1711                 .map_err(DeviceManagerError::BusError)?;
1712 
1713             let fwdebug = Arc::new(Mutex::new(devices::legacy::FwDebugDevice::new()));
1714 
1715             self.bus_devices
1716                 .push(Arc::clone(&fwdebug) as Arc<dyn BusDeviceSync>);
1717 
1718             self.address_manager
1719                 .io_bus
1720                 .insert(fwdebug, 0x402, 0x1)
1721                 .map_err(DeviceManagerError::BusError)?;
1722         }
1723 
1724         // 0x80 debug port
1725         let debug_port = Arc::new(Mutex::new(devices::legacy::DebugPort::new(self.timestamp)));
1726         self.bus_devices
1727             .push(Arc::clone(&debug_port) as Arc<dyn BusDeviceSync>);
1728         self.address_manager
1729             .io_bus
1730             .insert(debug_port, 0x80, 0x1)
1731             .map_err(DeviceManagerError::BusError)?;
1732 
1733         Ok(())
1734     }
1735 
1736     #[cfg(target_arch = "aarch64")]
1737     fn add_legacy_devices(
1738         &mut self,
1739         interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>,
1740     ) -> DeviceManagerResult<()> {
1741         // Add a RTC device
1742         let rtc_irq = self
1743             .address_manager
1744             .allocator
1745             .lock()
1746             .unwrap()
1747             .allocate_irq()
1748             .unwrap();
1749 
1750         let interrupt_group = interrupt_manager
1751             .create_group(LegacyIrqGroupConfig {
1752                 irq: rtc_irq as InterruptIndex,
1753             })
1754             .map_err(DeviceManagerError::CreateInterruptGroup)?;
1755 
1756         let rtc_device = Arc::new(Mutex::new(devices::legacy::Rtc::new(interrupt_group)));
1757 
1758         self.bus_devices
1759             .push(Arc::clone(&rtc_device) as Arc<dyn BusDeviceSync>);
1760 
1761         let addr = arch::layout::LEGACY_RTC_MAPPED_IO_START;
1762 
1763         self.address_manager
1764             .mmio_bus
1765             .insert(rtc_device, addr.0, MMIO_LEN)
1766             .map_err(DeviceManagerError::BusError)?;
1767 
1768         self.id_to_dev_info.insert(
1769             (DeviceType::Rtc, "rtc".to_string()),
1770             MmioDeviceInfo {
1771                 addr: addr.0,
1772                 len: MMIO_LEN,
1773                 irq: rtc_irq,
1774             },
1775         );
1776 
1777         // Add a GPIO device
1778         let id = String::from(GPIO_DEVICE_NAME);
1779         let gpio_irq = self
1780             .address_manager
1781             .allocator
1782             .lock()
1783             .unwrap()
1784             .allocate_irq()
1785             .unwrap();
1786 
1787         let interrupt_group = interrupt_manager
1788             .create_group(LegacyIrqGroupConfig {
1789                 irq: gpio_irq as InterruptIndex,
1790             })
1791             .map_err(DeviceManagerError::CreateInterruptGroup)?;
1792 
1793         let gpio_device = Arc::new(Mutex::new(devices::legacy::Gpio::new(
1794             id.clone(),
1795             interrupt_group,
1796             state_from_id(self.snapshot.as_ref(), id.as_str())
1797                 .map_err(DeviceManagerError::RestoreGetState)?,
1798         )));
1799 
1800         self.bus_devices
1801             .push(Arc::clone(&gpio_device) as Arc<dyn BusDeviceSync>);
1802 
1803         let addr = arch::layout::LEGACY_GPIO_MAPPED_IO_START;
1804 
1805         self.address_manager
1806             .mmio_bus
1807             .insert(gpio_device.clone(), addr.0, MMIO_LEN)
1808             .map_err(DeviceManagerError::BusError)?;
1809 
1810         self.gpio_device = Some(gpio_device.clone());
1811 
1812         self.id_to_dev_info.insert(
1813             (DeviceType::Gpio, "gpio".to_string()),
1814             MmioDeviceInfo {
1815                 addr: addr.0,
1816                 len: MMIO_LEN,
1817                 irq: gpio_irq,
1818             },
1819         );
1820 
1821         self.device_tree
1822             .lock()
1823             .unwrap()
1824             .insert(id.clone(), device_node!(id, gpio_device));
1825 
1826         Ok(())
1827     }
1828 
1829     #[cfg(target_arch = "x86_64")]
1830     fn add_debug_console_device(
1831         &mut self,
1832         debug_console_writer: Box<dyn io::Write + Send>,
1833     ) -> DeviceManagerResult<Arc<Mutex<DebugConsole>>> {
1834         let id = String::from(DEBUGCON_DEVICE_NAME);
1835         let debug_console = Arc::new(Mutex::new(DebugConsole::new(
1836             id.clone(),
1837             debug_console_writer,
1838         )));
1839 
1840         let port = self
1841             .config
1842             .lock()
1843             .unwrap()
1844             .debug_console
1845             .clone()
1846             .iobase
1847             .map(|port| port as u64)
1848             .unwrap_or(debug_console::DEFAULT_PORT);
1849 
1850         self.bus_devices
1851             .push(Arc::clone(&debug_console) as Arc<dyn BusDeviceSync>);
1852 
1853         self.address_manager
1854             .allocator
1855             .lock()
1856             .unwrap()
1857             .allocate_io_addresses(Some(GuestAddress(port)), 0x1, None)
1858             .ok_or(DeviceManagerError::AllocateIoPort)?;
1859 
1860         self.address_manager
1861             .io_bus
1862             .insert(debug_console.clone(), port, 0x1)
1863             .map_err(DeviceManagerError::BusError)?;
1864 
1865         // Fill the device tree with a new node. In case of restore, we
1866         // know there is nothing to do, so we can simply override the
1867         // existing entry.
1868         self.device_tree
1869             .lock()
1870             .unwrap()
1871             .insert(id.clone(), device_node!(id, debug_console));
1872 
1873         Ok(debug_console)
1874     }
1875 
1876     #[cfg(target_arch = "x86_64")]
1877     fn add_serial_device(
1878         &mut self,
1879         interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>,
1880         serial_writer: Option<Box<dyn io::Write + Send>>,
1881     ) -> DeviceManagerResult<Arc<Mutex<Serial>>> {
1882         // Serial is tied to IRQ #4
1883         let serial_irq = 4;
1884 
1885         let id = String::from(SERIAL_DEVICE_NAME);
1886 
1887         let interrupt_group = interrupt_manager
1888             .create_group(LegacyIrqGroupConfig {
1889                 irq: serial_irq as InterruptIndex,
1890             })
1891             .map_err(DeviceManagerError::CreateInterruptGroup)?;
1892 
1893         let serial = Arc::new(Mutex::new(Serial::new(
1894             id.clone(),
1895             interrupt_group,
1896             serial_writer,
1897             state_from_id(self.snapshot.as_ref(), id.as_str())
1898                 .map_err(DeviceManagerError::RestoreGetState)?,
1899         )));
1900 
1901         self.bus_devices
1902             .push(Arc::clone(&serial) as Arc<dyn BusDeviceSync>);
1903 
1904         self.address_manager
1905             .allocator
1906             .lock()
1907             .unwrap()
1908             .allocate_io_addresses(Some(GuestAddress(0x3f8)), 0x8, None)
1909             .ok_or(DeviceManagerError::AllocateIoPort)?;
1910 
1911         self.address_manager
1912             .io_bus
1913             .insert(serial.clone(), 0x3f8, 0x8)
1914             .map_err(DeviceManagerError::BusError)?;
1915 
1916         // Fill the device tree with a new node. In case of restore, we
1917         // know there is nothing to do, so we can simply override the
1918         // existing entry.
1919         self.device_tree
1920             .lock()
1921             .unwrap()
1922             .insert(id.clone(), device_node!(id, serial));
1923 
1924         Ok(serial)
1925     }
1926 
1927     #[cfg(target_arch = "aarch64")]
1928     fn add_serial_device(
1929         &mut self,
1930         interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>,
1931         serial_writer: Option<Box<dyn io::Write + Send>>,
1932     ) -> DeviceManagerResult<Arc<Mutex<Pl011>>> {
1933         let id = String::from(SERIAL_DEVICE_NAME);
1934 
1935         let serial_irq = self
1936             .address_manager
1937             .allocator
1938             .lock()
1939             .unwrap()
1940             .allocate_irq()
1941             .unwrap();
1942 
1943         let interrupt_group = interrupt_manager
1944             .create_group(LegacyIrqGroupConfig {
1945                 irq: serial_irq as InterruptIndex,
1946             })
1947             .map_err(DeviceManagerError::CreateInterruptGroup)?;
1948 
1949         let serial = Arc::new(Mutex::new(devices::legacy::Pl011::new(
1950             id.clone(),
1951             interrupt_group,
1952             serial_writer,
1953             self.timestamp,
1954             state_from_id(self.snapshot.as_ref(), id.as_str())
1955                 .map_err(DeviceManagerError::RestoreGetState)?,
1956         )));
1957 
1958         self.bus_devices
1959             .push(Arc::clone(&serial) as Arc<dyn BusDeviceSync>);
1960 
1961         let addr = arch::layout::LEGACY_SERIAL_MAPPED_IO_START;
1962 
1963         self.address_manager
1964             .mmio_bus
1965             .insert(serial.clone(), addr.0, MMIO_LEN)
1966             .map_err(DeviceManagerError::BusError)?;
1967 
1968         self.id_to_dev_info.insert(
1969             (DeviceType::Serial, DeviceType::Serial.to_string()),
1970             MmioDeviceInfo {
1971                 addr: addr.0,
1972                 len: MMIO_LEN,
1973                 irq: serial_irq,
1974             },
1975         );
1976 
1977         self.cmdline_additions
1978             .push(format!("earlycon=pl011,mmio,0x{:08x}", addr.0));
1979 
1980         // Fill the device tree with a new node. In case of restore, we
1981         // know there is nothing to do, so we can simply override the
1982         // existing entry.
1983         self.device_tree
1984             .lock()
1985             .unwrap()
1986             .insert(id.clone(), device_node!(id, serial));
1987 
1988         Ok(serial)
1989     }
1990 
1991     fn add_virtio_console_device(
1992         &mut self,
1993         virtio_devices: &mut Vec<MetaVirtioDevice>,
1994         console_fd: ConsoleOutput,
1995         resize_pipe: Option<Arc<File>>,
1996     ) -> DeviceManagerResult<Option<Arc<virtio_devices::ConsoleResizer>>> {
1997         let console_config = self.config.lock().unwrap().console.clone();
1998         let endpoint = match console_fd {
1999             ConsoleOutput::File(file) => Endpoint::File(file),
2000             ConsoleOutput::Pty(file) => {
2001                 self.console_resize_pipe = resize_pipe;
2002                 Endpoint::PtyPair(Arc::new(file.try_clone().unwrap()), file)
2003             }
2004             ConsoleOutput::Tty(stdout) => {
2005                 if stdout.is_terminal() {
2006                     self.console_resize_pipe = resize_pipe;
2007                 }
2008 
2009                 // If an interactive TTY then we can accept input
2010                 // SAFETY: FFI call. Trivially safe.
2011                 if unsafe { libc::isatty(libc::STDIN_FILENO) == 1 } {
2012                     // SAFETY: FFI call to dup. Trivially safe.
2013                     let stdin = unsafe { libc::dup(libc::STDIN_FILENO) };
2014                     if stdin == -1 {
2015                         return vmm_sys_util::errno::errno_result()
2016                             .map_err(DeviceManagerError::DupFd);
2017                     }
2018                     // SAFETY: stdin is valid and owned solely by us.
2019                     let stdin = unsafe { File::from_raw_fd(stdin) };
2020                     Endpoint::FilePair(stdout, Arc::new(stdin))
2021                 } else {
2022                     Endpoint::File(stdout)
2023                 }
2024             }
2025             ConsoleOutput::Socket(_) => {
2026                 return Err(DeviceManagerError::NoSocketOptionSupportForConsoleDevice);
2027             }
2028             ConsoleOutput::Null => Endpoint::Null,
2029             ConsoleOutput::Off => return Ok(None),
2030         };
2031         let id = String::from(CONSOLE_DEVICE_NAME);
2032 
2033         let (virtio_console_device, console_resizer) = virtio_devices::Console::new(
2034             id.clone(),
2035             endpoint,
2036             self.console_resize_pipe
2037                 .as_ref()
2038                 .map(|p| p.try_clone().unwrap()),
2039             self.force_iommu | console_config.iommu,
2040             self.seccomp_action.clone(),
2041             self.exit_evt
2042                 .try_clone()
2043                 .map_err(DeviceManagerError::EventFd)?,
2044             state_from_id(self.snapshot.as_ref(), id.as_str())
2045                 .map_err(DeviceManagerError::RestoreGetState)?,
2046         )
2047         .map_err(DeviceManagerError::CreateVirtioConsole)?;
2048         let virtio_console_device = Arc::new(Mutex::new(virtio_console_device));
2049         virtio_devices.push(MetaVirtioDevice {
2050             virtio_device: Arc::clone(&virtio_console_device)
2051                 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2052             iommu: console_config.iommu,
2053             id: id.clone(),
2054             pci_segment: 0,
2055             dma_handler: None,
2056         });
2057 
2058         // Fill the device tree with a new node. In case of restore, we
2059         // know there is nothing to do, so we can simply override the
2060         // existing entry.
2061         self.device_tree
2062             .lock()
2063             .unwrap()
2064             .insert(id.clone(), device_node!(id, virtio_console_device));
2065 
2066         // Only provide a resizer (for SIGWINCH handling) if the console is attached to the TTY
2067         Ok(if matches!(console_config.mode, ConsoleOutputMode::Tty) {
2068             Some(console_resizer)
2069         } else {
2070             None
2071         })
2072     }
2073 
2074     /// Adds all devices that behave like a console with respect to the VM
2075     /// configuration. This includes:
2076     /// - debug-console
2077     /// - serial-console
2078     /// - virtio-console
2079     fn add_console_devices(
2080         &mut self,
2081         interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>,
2082         virtio_devices: &mut Vec<MetaVirtioDevice>,
2083         console_info: Option<ConsoleInfo>,
2084         console_resize_pipe: Option<Arc<File>>,
2085     ) -> DeviceManagerResult<Arc<Console>> {
2086         let serial_config = self.config.lock().unwrap().serial.clone();
2087         if console_info.is_none() {
2088             return Err(DeviceManagerError::InvalidConsoleInfo);
2089         }
2090 
2091         // SAFETY: console_info is Some, so it's safe to unwrap.
2092         let console_info = console_info.unwrap();
2093 
2094         let serial_writer: Option<Box<dyn io::Write + Send>> = match console_info.serial_main_fd {
2095             ConsoleOutput::File(ref file) | ConsoleOutput::Tty(ref file) => {
2096                 Some(Box::new(Arc::clone(file)))
2097             }
2098             ConsoleOutput::Off
2099             | ConsoleOutput::Null
2100             | ConsoleOutput::Pty(_)
2101             | ConsoleOutput::Socket(_) => None,
2102         };
2103 
2104         if !matches!(console_info.serial_main_fd, ConsoleOutput::Off) {
2105             let serial = self.add_serial_device(interrupt_manager, serial_writer)?;
2106             self.serial_manager = match console_info.serial_main_fd {
2107                 ConsoleOutput::Pty(_) | ConsoleOutput::Tty(_) | ConsoleOutput::Socket(_) => {
2108                     let serial_manager = SerialManager::new(
2109                         serial,
2110                         console_info.serial_main_fd,
2111                         serial_config.socket,
2112                     )
2113                     .map_err(DeviceManagerError::CreateSerialManager)?;
2114                     if let Some(mut serial_manager) = serial_manager {
2115                         serial_manager
2116                             .start_thread(
2117                                 self.exit_evt
2118                                     .try_clone()
2119                                     .map_err(DeviceManagerError::EventFd)?,
2120                             )
2121                             .map_err(DeviceManagerError::SpawnSerialManager)?;
2122                         Some(Arc::new(serial_manager))
2123                     } else {
2124                         None
2125                     }
2126                 }
2127                 _ => None,
2128             };
2129         }
2130 
2131         #[cfg(target_arch = "x86_64")]
2132         {
2133             let debug_console_writer: Option<Box<dyn io::Write + Send>> =
2134                 match console_info.debug_main_fd {
2135                     ConsoleOutput::File(file) | ConsoleOutput::Tty(file) => Some(Box::new(file)),
2136                     ConsoleOutput::Off
2137                     | ConsoleOutput::Null
2138                     | ConsoleOutput::Pty(_)
2139                     | ConsoleOutput::Socket(_) => None,
2140                 };
2141             if let Some(writer) = debug_console_writer {
2142                 let _ = self.add_debug_console_device(writer)?;
2143             }
2144         }
2145 
2146         let console_resizer = self.add_virtio_console_device(
2147             virtio_devices,
2148             console_info.console_main_fd,
2149             console_resize_pipe,
2150         )?;
2151 
2152         Ok(Arc::new(Console { console_resizer }))
2153     }
2154 
2155     fn add_tpm_device(
2156         &mut self,
2157         tpm_path: PathBuf,
2158     ) -> DeviceManagerResult<Arc<Mutex<devices::tpm::Tpm>>> {
2159         // Create TPM Device
2160         let tpm = devices::tpm::Tpm::new(tpm_path.to_str().unwrap().to_string()).map_err(|e| {
2161             DeviceManagerError::CreateTpmDevice(anyhow!("Failed to create TPM Device : {:?}", e))
2162         })?;
2163         let tpm = Arc::new(Mutex::new(tpm));
2164 
2165         // Add TPM Device to mmio
2166         self.address_manager
2167             .mmio_bus
2168             .insert(
2169                 tpm.clone(),
2170                 arch::layout::TPM_START.0,
2171                 arch::layout::TPM_SIZE,
2172             )
2173             .map_err(DeviceManagerError::BusError)?;
2174 
2175         Ok(tpm)
2176     }
2177 
2178     fn make_virtio_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
2179         let mut devices: Vec<MetaVirtioDevice> = Vec::new();
2180 
2181         // Create "standard" virtio devices (net/block/rng)
2182         devices.append(&mut self.make_virtio_block_devices()?);
2183         devices.append(&mut self.make_virtio_net_devices()?);
2184         devices.append(&mut self.make_virtio_rng_devices()?);
2185 
2186         // Add virtio-fs if required
2187         devices.append(&mut self.make_virtio_fs_devices()?);
2188 
2189         // Add virtio-pmem if required
2190         devices.append(&mut self.make_virtio_pmem_devices()?);
2191 
2192         // Add virtio-vsock if required
2193         devices.append(&mut self.make_virtio_vsock_devices()?);
2194 
2195         devices.append(&mut self.make_virtio_mem_devices()?);
2196 
2197         // Add virtio-balloon if required
2198         devices.append(&mut self.make_virtio_balloon_devices()?);
2199 
2200         // Add virtio-watchdog device
2201         devices.append(&mut self.make_virtio_watchdog_devices()?);
2202 
2203         // Add vDPA devices if required
2204         devices.append(&mut self.make_vdpa_devices()?);
2205 
2206         Ok(devices)
2207     }
2208 
2209     // Cache whether aio is supported to avoid checking for very block device
2210     fn aio_is_supported(&mut self) -> bool {
2211         if let Some(supported) = self.aio_supported {
2212             return supported;
2213         }
2214 
2215         let supported = block_aio_is_supported();
2216         self.aio_supported = Some(supported);
2217         supported
2218     }
2219 
2220     // Cache whether io_uring is supported to avoid probing for very block device
2221     fn io_uring_is_supported(&mut self) -> bool {
2222         if let Some(supported) = self.io_uring_supported {
2223             return supported;
2224         }
2225 
2226         let supported = block_io_uring_is_supported();
2227         self.io_uring_supported = Some(supported);
2228         supported
2229     }
2230 
2231     fn make_virtio_block_device(
2232         &mut self,
2233         disk_cfg: &mut DiskConfig,
2234     ) -> DeviceManagerResult<MetaVirtioDevice> {
2235         let id = if let Some(id) = &disk_cfg.id {
2236             id.clone()
2237         } else {
2238             let id = self.next_device_name(DISK_DEVICE_NAME_PREFIX)?;
2239             disk_cfg.id = Some(id.clone());
2240             id
2241         };
2242 
2243         info!("Creating virtio-block device: {:?}", disk_cfg);
2244 
2245         let (virtio_device, migratable_device) = if disk_cfg.vhost_user {
2246             let socket = disk_cfg.vhost_socket.as_ref().unwrap().clone();
2247             let vu_cfg = VhostUserConfig {
2248                 socket,
2249                 num_queues: disk_cfg.num_queues,
2250                 queue_size: disk_cfg.queue_size,
2251             };
2252             let vhost_user_block = Arc::new(Mutex::new(
2253                 match virtio_devices::vhost_user::Blk::new(
2254                     id.clone(),
2255                     vu_cfg,
2256                     self.seccomp_action.clone(),
2257                     self.exit_evt
2258                         .try_clone()
2259                         .map_err(DeviceManagerError::EventFd)?,
2260                     self.force_iommu,
2261                     state_from_id(self.snapshot.as_ref(), id.as_str())
2262                         .map_err(DeviceManagerError::RestoreGetState)?,
2263                 ) {
2264                     Ok(vub_device) => vub_device,
2265                     Err(e) => {
2266                         return Err(DeviceManagerError::CreateVhostUserBlk(e));
2267                     }
2268                 },
2269             ));
2270 
2271             (
2272                 Arc::clone(&vhost_user_block) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2273                 vhost_user_block as Arc<Mutex<dyn Migratable>>,
2274             )
2275         } else {
2276             let mut options = OpenOptions::new();
2277             options.read(true);
2278             options.write(!disk_cfg.readonly);
2279             if disk_cfg.direct {
2280                 options.custom_flags(libc::O_DIRECT);
2281             }
2282             // Open block device path
2283             let mut file: File = options
2284                 .open(
2285                     disk_cfg
2286                         .path
2287                         .as_ref()
2288                         .ok_or(DeviceManagerError::NoDiskPath)?
2289                         .clone(),
2290                 )
2291                 .map_err(DeviceManagerError::Disk)?;
2292             let image_type =
2293                 detect_image_type(&mut file).map_err(DeviceManagerError::DetectImageType)?;
2294 
2295             let image = match image_type {
2296                 ImageType::FixedVhd => {
2297                     // Use asynchronous backend relying on io_uring if the
2298                     // syscalls are supported.
2299                     if cfg!(feature = "io_uring")
2300                         && !disk_cfg.disable_io_uring
2301                         && self.io_uring_is_supported()
2302                     {
2303                         info!("Using asynchronous fixed VHD disk file (io_uring)");
2304 
2305                         #[cfg(not(feature = "io_uring"))]
2306                         unreachable!("Checked in if statement above");
2307                         #[cfg(feature = "io_uring")]
2308                         {
2309                             Box::new(
2310                                 FixedVhdDiskAsync::new(file)
2311                                     .map_err(DeviceManagerError::CreateFixedVhdDiskAsync)?,
2312                             ) as Box<dyn DiskFile>
2313                         }
2314                     } else {
2315                         info!("Using synchronous fixed VHD disk file");
2316                         Box::new(
2317                             FixedVhdDiskSync::new(file)
2318                                 .map_err(DeviceManagerError::CreateFixedVhdDiskSync)?,
2319                         ) as Box<dyn DiskFile>
2320                     }
2321                 }
2322                 ImageType::Raw => {
2323                     // Use asynchronous backend relying on io_uring if the
2324                     // syscalls are supported.
2325                     if cfg!(feature = "io_uring")
2326                         && !disk_cfg.disable_io_uring
2327                         && self.io_uring_is_supported()
2328                     {
2329                         info!("Using asynchronous RAW disk file (io_uring)");
2330 
2331                         #[cfg(not(feature = "io_uring"))]
2332                         unreachable!("Checked in if statement above");
2333                         #[cfg(feature = "io_uring")]
2334                         {
2335                             Box::new(RawFileDisk::new(file)) as Box<dyn DiskFile>
2336                         }
2337                     } else if !disk_cfg.disable_aio && self.aio_is_supported() {
2338                         info!("Using asynchronous RAW disk file (aio)");
2339                         Box::new(RawFileDiskAio::new(file)) as Box<dyn DiskFile>
2340                     } else {
2341                         info!("Using synchronous RAW disk file");
2342                         Box::new(RawFileDiskSync::new(file)) as Box<dyn DiskFile>
2343                     }
2344                 }
2345                 ImageType::Qcow2 => {
2346                     info!("Using synchronous QCOW disk file");
2347                     Box::new(
2348                         QcowDiskSync::new(file, disk_cfg.direct)
2349                             .map_err(DeviceManagerError::CreateQcowDiskSync)?,
2350                     ) as Box<dyn DiskFile>
2351                 }
2352                 ImageType::Vhdx => {
2353                     info!("Using synchronous VHDX disk file");
2354                     Box::new(
2355                         VhdxDiskSync::new(file)
2356                             .map_err(DeviceManagerError::CreateFixedVhdxDiskSync)?,
2357                     ) as Box<dyn DiskFile>
2358                 }
2359             };
2360 
2361             let rate_limit_group =
2362                 if let Some(rate_limiter_cfg) = disk_cfg.rate_limiter_config.as_ref() {
2363                     // Create an anonymous RateLimiterGroup that is dropped when the Disk
2364                     // is dropped.
2365                     let bw = rate_limiter_cfg.bandwidth.unwrap_or_default();
2366                     let ops = rate_limiter_cfg.ops.unwrap_or_default();
2367                     let mut rate_limit_group = RateLimiterGroup::new(
2368                         disk_cfg.id.as_ref().unwrap(),
2369                         bw.size,
2370                         bw.one_time_burst.unwrap_or(0),
2371                         bw.refill_time,
2372                         ops.size,
2373                         ops.one_time_burst.unwrap_or(0),
2374                         ops.refill_time,
2375                     )
2376                     .map_err(DeviceManagerError::RateLimiterGroupCreate)?;
2377 
2378                     rate_limit_group
2379                         .start_thread(
2380                             self.exit_evt
2381                                 .try_clone()
2382                                 .map_err(DeviceManagerError::EventFd)?,
2383                         )
2384                         .unwrap();
2385 
2386                     Some(Arc::new(rate_limit_group))
2387                 } else if let Some(rate_limit_group) = disk_cfg.rate_limit_group.as_ref() {
2388                     self.rate_limit_groups.get(rate_limit_group).cloned()
2389                 } else {
2390                     None
2391                 };
2392 
2393             let queue_affinity = if let Some(queue_affinity) = disk_cfg.queue_affinity.as_ref() {
2394                 queue_affinity
2395                     .iter()
2396                     .map(|a| (a.queue_index, a.host_cpus.clone()))
2397                     .collect()
2398             } else {
2399                 BTreeMap::new()
2400             };
2401 
2402             let virtio_block = Arc::new(Mutex::new(
2403                 virtio_devices::Block::new(
2404                     id.clone(),
2405                     image,
2406                     disk_cfg
2407                         .path
2408                         .as_ref()
2409                         .ok_or(DeviceManagerError::NoDiskPath)?
2410                         .clone(),
2411                     disk_cfg.readonly,
2412                     self.force_iommu | disk_cfg.iommu,
2413                     disk_cfg.num_queues,
2414                     disk_cfg.queue_size,
2415                     disk_cfg.serial.clone(),
2416                     self.seccomp_action.clone(),
2417                     rate_limit_group,
2418                     self.exit_evt
2419                         .try_clone()
2420                         .map_err(DeviceManagerError::EventFd)?,
2421                     state_from_id(self.snapshot.as_ref(), id.as_str())
2422                         .map_err(DeviceManagerError::RestoreGetState)?,
2423                     queue_affinity,
2424                 )
2425                 .map_err(DeviceManagerError::CreateVirtioBlock)?,
2426             ));
2427 
2428             (
2429                 Arc::clone(&virtio_block) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2430                 virtio_block as Arc<Mutex<dyn Migratable>>,
2431             )
2432         };
2433 
2434         // Fill the device tree with a new node. In case of restore, we
2435         // know there is nothing to do, so we can simply override the
2436         // existing entry.
2437         self.device_tree
2438             .lock()
2439             .unwrap()
2440             .insert(id.clone(), device_node!(id, migratable_device));
2441 
2442         Ok(MetaVirtioDevice {
2443             virtio_device,
2444             iommu: disk_cfg.iommu,
2445             id,
2446             pci_segment: disk_cfg.pci_segment,
2447             dma_handler: None,
2448         })
2449     }
2450 
2451     fn make_virtio_block_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
2452         let mut devices = Vec::new();
2453 
2454         let mut block_devices = self.config.lock().unwrap().disks.clone();
2455         if let Some(disk_list_cfg) = &mut block_devices {
2456             for disk_cfg in disk_list_cfg.iter_mut() {
2457                 devices.push(self.make_virtio_block_device(disk_cfg)?);
2458             }
2459         }
2460         self.config.lock().unwrap().disks = block_devices;
2461 
2462         Ok(devices)
2463     }
2464 
2465     fn make_virtio_net_device(
2466         &mut self,
2467         net_cfg: &mut NetConfig,
2468     ) -> DeviceManagerResult<MetaVirtioDevice> {
2469         let id = if let Some(id) = &net_cfg.id {
2470             id.clone()
2471         } else {
2472             let id = self.next_device_name(NET_DEVICE_NAME_PREFIX)?;
2473             net_cfg.id = Some(id.clone());
2474             id
2475         };
2476         info!("Creating virtio-net device: {:?}", net_cfg);
2477 
2478         let (virtio_device, migratable_device) = if net_cfg.vhost_user {
2479             let socket = net_cfg.vhost_socket.as_ref().unwrap().clone();
2480             let vu_cfg = VhostUserConfig {
2481                 socket,
2482                 num_queues: net_cfg.num_queues,
2483                 queue_size: net_cfg.queue_size,
2484             };
2485             let server = match net_cfg.vhost_mode {
2486                 VhostMode::Client => false,
2487                 VhostMode::Server => true,
2488             };
2489             let vhost_user_net = Arc::new(Mutex::new(
2490                 match virtio_devices::vhost_user::Net::new(
2491                     id.clone(),
2492                     net_cfg.mac,
2493                     net_cfg.mtu,
2494                     vu_cfg,
2495                     server,
2496                     self.seccomp_action.clone(),
2497                     self.exit_evt
2498                         .try_clone()
2499                         .map_err(DeviceManagerError::EventFd)?,
2500                     self.force_iommu,
2501                     state_from_id(self.snapshot.as_ref(), id.as_str())
2502                         .map_err(DeviceManagerError::RestoreGetState)?,
2503                     net_cfg.offload_tso,
2504                     net_cfg.offload_ufo,
2505                     net_cfg.offload_csum,
2506                 ) {
2507                     Ok(vun_device) => vun_device,
2508                     Err(e) => {
2509                         return Err(DeviceManagerError::CreateVhostUserNet(e));
2510                     }
2511                 },
2512             ));
2513 
2514             (
2515                 Arc::clone(&vhost_user_net) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2516                 vhost_user_net as Arc<Mutex<dyn Migratable>>,
2517             )
2518         } else {
2519             let state = state_from_id(self.snapshot.as_ref(), id.as_str())
2520                 .map_err(DeviceManagerError::RestoreGetState)?;
2521             let virtio_net = if let Some(ref tap_if_name) = net_cfg.tap {
2522                 Arc::new(Mutex::new(
2523                     virtio_devices::Net::new(
2524                         id.clone(),
2525                         Some(tap_if_name),
2526                         Some(net_cfg.ip),
2527                         Some(net_cfg.mask),
2528                         Some(net_cfg.mac),
2529                         &mut net_cfg.host_mac,
2530                         net_cfg.mtu,
2531                         self.force_iommu | net_cfg.iommu,
2532                         net_cfg.num_queues,
2533                         net_cfg.queue_size,
2534                         self.seccomp_action.clone(),
2535                         net_cfg.rate_limiter_config,
2536                         self.exit_evt
2537                             .try_clone()
2538                             .map_err(DeviceManagerError::EventFd)?,
2539                         state,
2540                         net_cfg.offload_tso,
2541                         net_cfg.offload_ufo,
2542                         net_cfg.offload_csum,
2543                     )
2544                     .map_err(DeviceManagerError::CreateVirtioNet)?,
2545                 ))
2546             } else if let Some(fds) = &net_cfg.fds {
2547                 let net = virtio_devices::Net::from_tap_fds(
2548                     id.clone(),
2549                     fds,
2550                     Some(net_cfg.mac),
2551                     net_cfg.mtu,
2552                     self.force_iommu | net_cfg.iommu,
2553                     net_cfg.queue_size,
2554                     self.seccomp_action.clone(),
2555                     net_cfg.rate_limiter_config,
2556                     self.exit_evt
2557                         .try_clone()
2558                         .map_err(DeviceManagerError::EventFd)?,
2559                     state,
2560                     net_cfg.offload_tso,
2561                     net_cfg.offload_ufo,
2562                     net_cfg.offload_csum,
2563                 )
2564                 .map_err(DeviceManagerError::CreateVirtioNet)?;
2565 
2566                 // SAFETY: 'fds' are valid because TAP devices are created successfully
2567                 unsafe {
2568                     self.config.lock().unwrap().add_preserved_fds(fds.clone());
2569                 }
2570 
2571                 Arc::new(Mutex::new(net))
2572             } else {
2573                 Arc::new(Mutex::new(
2574                     virtio_devices::Net::new(
2575                         id.clone(),
2576                         None,
2577                         Some(net_cfg.ip),
2578                         Some(net_cfg.mask),
2579                         Some(net_cfg.mac),
2580                         &mut net_cfg.host_mac,
2581                         net_cfg.mtu,
2582                         self.force_iommu | net_cfg.iommu,
2583                         net_cfg.num_queues,
2584                         net_cfg.queue_size,
2585                         self.seccomp_action.clone(),
2586                         net_cfg.rate_limiter_config,
2587                         self.exit_evt
2588                             .try_clone()
2589                             .map_err(DeviceManagerError::EventFd)?,
2590                         state,
2591                         net_cfg.offload_tso,
2592                         net_cfg.offload_ufo,
2593                         net_cfg.offload_csum,
2594                     )
2595                     .map_err(DeviceManagerError::CreateVirtioNet)?,
2596                 ))
2597             };
2598 
2599             (
2600                 Arc::clone(&virtio_net) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2601                 virtio_net as Arc<Mutex<dyn Migratable>>,
2602             )
2603         };
2604 
2605         // Fill the device tree with a new node. In case of restore, we
2606         // know there is nothing to do, so we can simply override the
2607         // existing entry.
2608         self.device_tree
2609             .lock()
2610             .unwrap()
2611             .insert(id.clone(), device_node!(id, migratable_device));
2612 
2613         Ok(MetaVirtioDevice {
2614             virtio_device,
2615             iommu: net_cfg.iommu,
2616             id,
2617             pci_segment: net_cfg.pci_segment,
2618             dma_handler: None,
2619         })
2620     }
2621 
2622     /// Add virto-net and vhost-user-net devices
2623     fn make_virtio_net_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
2624         let mut devices = Vec::new();
2625         let mut net_devices = self.config.lock().unwrap().net.clone();
2626         if let Some(net_list_cfg) = &mut net_devices {
2627             for net_cfg in net_list_cfg.iter_mut() {
2628                 devices.push(self.make_virtio_net_device(net_cfg)?);
2629             }
2630         }
2631         self.config.lock().unwrap().net = net_devices;
2632 
2633         Ok(devices)
2634     }
2635 
2636     fn make_virtio_rng_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
2637         let mut devices = Vec::new();
2638 
2639         // Add virtio-rng if required
2640         let rng_config = self.config.lock().unwrap().rng.clone();
2641         if let Some(rng_path) = rng_config.src.to_str() {
2642             info!("Creating virtio-rng device: {:?}", rng_config);
2643             let id = String::from(RNG_DEVICE_NAME);
2644 
2645             let virtio_rng_device = Arc::new(Mutex::new(
2646                 virtio_devices::Rng::new(
2647                     id.clone(),
2648                     rng_path,
2649                     self.force_iommu | rng_config.iommu,
2650                     self.seccomp_action.clone(),
2651                     self.exit_evt
2652                         .try_clone()
2653                         .map_err(DeviceManagerError::EventFd)?,
2654                     state_from_id(self.snapshot.as_ref(), id.as_str())
2655                         .map_err(DeviceManagerError::RestoreGetState)?,
2656                 )
2657                 .map_err(DeviceManagerError::CreateVirtioRng)?,
2658             ));
2659             devices.push(MetaVirtioDevice {
2660                 virtio_device: Arc::clone(&virtio_rng_device)
2661                     as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2662                 iommu: rng_config.iommu,
2663                 id: id.clone(),
2664                 pci_segment: 0,
2665                 dma_handler: None,
2666             });
2667 
2668             // Fill the device tree with a new node. In case of restore, we
2669             // know there is nothing to do, so we can simply override the
2670             // existing entry.
2671             self.device_tree
2672                 .lock()
2673                 .unwrap()
2674                 .insert(id.clone(), device_node!(id, virtio_rng_device));
2675         }
2676 
2677         Ok(devices)
2678     }
2679 
2680     fn make_virtio_fs_device(
2681         &mut self,
2682         fs_cfg: &mut FsConfig,
2683     ) -> DeviceManagerResult<MetaVirtioDevice> {
2684         let id = if let Some(id) = &fs_cfg.id {
2685             id.clone()
2686         } else {
2687             let id = self.next_device_name(FS_DEVICE_NAME_PREFIX)?;
2688             fs_cfg.id = Some(id.clone());
2689             id
2690         };
2691 
2692         info!("Creating virtio-fs device: {:?}", fs_cfg);
2693 
2694         let mut node = device_node!(id);
2695 
2696         if let Some(fs_socket) = fs_cfg.socket.to_str() {
2697             let virtio_fs_device = Arc::new(Mutex::new(
2698                 virtio_devices::vhost_user::Fs::new(
2699                     id.clone(),
2700                     fs_socket,
2701                     &fs_cfg.tag,
2702                     fs_cfg.num_queues,
2703                     fs_cfg.queue_size,
2704                     None,
2705                     self.seccomp_action.clone(),
2706                     self.exit_evt
2707                         .try_clone()
2708                         .map_err(DeviceManagerError::EventFd)?,
2709                     self.force_iommu,
2710                     state_from_id(self.snapshot.as_ref(), id.as_str())
2711                         .map_err(DeviceManagerError::RestoreGetState)?,
2712                 )
2713                 .map_err(DeviceManagerError::CreateVirtioFs)?,
2714             ));
2715 
2716             // Update the device tree with the migratable device.
2717             node.migratable = Some(Arc::clone(&virtio_fs_device) as Arc<Mutex<dyn Migratable>>);
2718             self.device_tree.lock().unwrap().insert(id.clone(), node);
2719 
2720             Ok(MetaVirtioDevice {
2721                 virtio_device: Arc::clone(&virtio_fs_device)
2722                     as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2723                 iommu: false,
2724                 id,
2725                 pci_segment: fs_cfg.pci_segment,
2726                 dma_handler: None,
2727             })
2728         } else {
2729             Err(DeviceManagerError::NoVirtioFsSock)
2730         }
2731     }
2732 
2733     fn make_virtio_fs_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
2734         let mut devices = Vec::new();
2735 
2736         let mut fs_devices = self.config.lock().unwrap().fs.clone();
2737         if let Some(fs_list_cfg) = &mut fs_devices {
2738             for fs_cfg in fs_list_cfg.iter_mut() {
2739                 devices.push(self.make_virtio_fs_device(fs_cfg)?);
2740             }
2741         }
2742         self.config.lock().unwrap().fs = fs_devices;
2743 
2744         Ok(devices)
2745     }
2746 
2747     fn make_virtio_pmem_device(
2748         &mut self,
2749         pmem_cfg: &mut PmemConfig,
2750     ) -> DeviceManagerResult<MetaVirtioDevice> {
2751         let id = if let Some(id) = &pmem_cfg.id {
2752             id.clone()
2753         } else {
2754             let id = self.next_device_name(PMEM_DEVICE_NAME_PREFIX)?;
2755             pmem_cfg.id = Some(id.clone());
2756             id
2757         };
2758 
2759         info!("Creating virtio-pmem device: {:?}", pmem_cfg);
2760 
2761         let mut node = device_node!(id);
2762 
2763         // Look for the id in the device tree. If it can be found, that means
2764         // the device is being restored, otherwise it's created from scratch.
2765         let region_range = if let Some(node) = self.device_tree.lock().unwrap().get(&id) {
2766             info!("Restoring virtio-pmem {} resources", id);
2767 
2768             let mut region_range: Option<(u64, u64)> = None;
2769             for resource in node.resources.iter() {
2770                 match resource {
2771                     Resource::MmioAddressRange { base, size } => {
2772                         if region_range.is_some() {
2773                             return Err(DeviceManagerError::ResourceAlreadyExists);
2774                         }
2775 
2776                         region_range = Some((*base, *size));
2777                     }
2778                     _ => {
2779                         error!("Unexpected resource {:?} for {}", resource, id);
2780                     }
2781                 }
2782             }
2783 
2784             if region_range.is_none() {
2785                 return Err(DeviceManagerError::MissingVirtioPmemResources);
2786             }
2787 
2788             region_range
2789         } else {
2790             None
2791         };
2792 
2793         let (custom_flags, set_len) = if pmem_cfg.file.is_dir() {
2794             if pmem_cfg.size.is_none() {
2795                 return Err(DeviceManagerError::PmemWithDirectorySizeMissing);
2796             }
2797             (O_TMPFILE, true)
2798         } else {
2799             (0, false)
2800         };
2801 
2802         let mut file = OpenOptions::new()
2803             .read(true)
2804             .write(!pmem_cfg.discard_writes)
2805             .custom_flags(custom_flags)
2806             .open(&pmem_cfg.file)
2807             .map_err(DeviceManagerError::PmemFileOpen)?;
2808 
2809         let size = if let Some(size) = pmem_cfg.size {
2810             if set_len {
2811                 file.set_len(size)
2812                     .map_err(DeviceManagerError::PmemFileSetLen)?;
2813             }
2814             size
2815         } else {
2816             file.seek(SeekFrom::End(0))
2817                 .map_err(DeviceManagerError::PmemFileSetLen)?
2818         };
2819 
2820         if size % 0x20_0000 != 0 {
2821             return Err(DeviceManagerError::PmemSizeNotAligned);
2822         }
2823 
2824         let (region_base, region_size) = if let Some((base, size)) = region_range {
2825             // The memory needs to be 2MiB aligned in order to support
2826             // hugepages.
2827             self.pci_segments[pmem_cfg.pci_segment as usize]
2828                 .mem64_allocator
2829                 .lock()
2830                 .unwrap()
2831                 .allocate(
2832                     Some(GuestAddress(base)),
2833                     size as GuestUsize,
2834                     Some(0x0020_0000),
2835                 )
2836                 .ok_or(DeviceManagerError::PmemRangeAllocation)?;
2837 
2838             (base, size)
2839         } else {
2840             // The memory needs to be 2MiB aligned in order to support
2841             // hugepages.
2842             let base = self.pci_segments[pmem_cfg.pci_segment as usize]
2843                 .mem64_allocator
2844                 .lock()
2845                 .unwrap()
2846                 .allocate(None, size as GuestUsize, Some(0x0020_0000))
2847                 .ok_or(DeviceManagerError::PmemRangeAllocation)?;
2848 
2849             (base.raw_value(), size)
2850         };
2851 
2852         let cloned_file = file.try_clone().map_err(DeviceManagerError::CloneFile)?;
2853         let mmap_region = MmapRegion::build(
2854             Some(FileOffset::new(cloned_file, 0)),
2855             region_size as usize,
2856             PROT_READ | PROT_WRITE,
2857             MAP_NORESERVE
2858                 | if pmem_cfg.discard_writes {
2859                     MAP_PRIVATE
2860                 } else {
2861                     MAP_SHARED
2862                 },
2863         )
2864         .map_err(DeviceManagerError::NewMmapRegion)?;
2865         let host_addr: u64 = mmap_region.as_ptr() as u64;
2866 
2867         let mem_slot = self
2868             .memory_manager
2869             .lock()
2870             .unwrap()
2871             .create_userspace_mapping(region_base, region_size, host_addr, false, false, false)
2872             .map_err(DeviceManagerError::MemoryManager)?;
2873 
2874         let mapping = virtio_devices::UserspaceMapping {
2875             host_addr,
2876             mem_slot,
2877             addr: GuestAddress(region_base),
2878             len: region_size,
2879             mergeable: false,
2880         };
2881 
2882         let virtio_pmem_device = Arc::new(Mutex::new(
2883             virtio_devices::Pmem::new(
2884                 id.clone(),
2885                 file,
2886                 GuestAddress(region_base),
2887                 mapping,
2888                 mmap_region,
2889                 self.force_iommu | pmem_cfg.iommu,
2890                 self.seccomp_action.clone(),
2891                 self.exit_evt
2892                     .try_clone()
2893                     .map_err(DeviceManagerError::EventFd)?,
2894                 state_from_id(self.snapshot.as_ref(), id.as_str())
2895                     .map_err(DeviceManagerError::RestoreGetState)?,
2896             )
2897             .map_err(DeviceManagerError::CreateVirtioPmem)?,
2898         ));
2899 
2900         // Update the device tree with correct resource information and with
2901         // the migratable device.
2902         node.resources.push(Resource::MmioAddressRange {
2903             base: region_base,
2904             size: region_size,
2905         });
2906         node.migratable = Some(Arc::clone(&virtio_pmem_device) as Arc<Mutex<dyn Migratable>>);
2907         self.device_tree.lock().unwrap().insert(id.clone(), node);
2908 
2909         Ok(MetaVirtioDevice {
2910             virtio_device: Arc::clone(&virtio_pmem_device)
2911                 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2912             iommu: pmem_cfg.iommu,
2913             id,
2914             pci_segment: pmem_cfg.pci_segment,
2915             dma_handler: None,
2916         })
2917     }
2918 
2919     fn make_virtio_pmem_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
2920         let mut devices = Vec::new();
2921         // Add virtio-pmem if required
2922         let mut pmem_devices = self.config.lock().unwrap().pmem.clone();
2923         if let Some(pmem_list_cfg) = &mut pmem_devices {
2924             for pmem_cfg in pmem_list_cfg.iter_mut() {
2925                 devices.push(self.make_virtio_pmem_device(pmem_cfg)?);
2926             }
2927         }
2928         self.config.lock().unwrap().pmem = pmem_devices;
2929 
2930         Ok(devices)
2931     }
2932 
2933     fn make_virtio_vsock_device(
2934         &mut self,
2935         vsock_cfg: &mut VsockConfig,
2936     ) -> DeviceManagerResult<MetaVirtioDevice> {
2937         let id = if let Some(id) = &vsock_cfg.id {
2938             id.clone()
2939         } else {
2940             let id = self.next_device_name(VSOCK_DEVICE_NAME_PREFIX)?;
2941             vsock_cfg.id = Some(id.clone());
2942             id
2943         };
2944 
2945         info!("Creating virtio-vsock device: {:?}", vsock_cfg);
2946 
2947         let socket_path = vsock_cfg
2948             .socket
2949             .to_str()
2950             .ok_or(DeviceManagerError::CreateVsockConvertPath)?;
2951         let backend =
2952             virtio_devices::vsock::VsockUnixBackend::new(vsock_cfg.cid, socket_path.to_string())
2953                 .map_err(DeviceManagerError::CreateVsockBackend)?;
2954 
2955         let vsock_device = Arc::new(Mutex::new(
2956             virtio_devices::Vsock::new(
2957                 id.clone(),
2958                 vsock_cfg.cid,
2959                 vsock_cfg.socket.clone(),
2960                 backend,
2961                 self.force_iommu | vsock_cfg.iommu,
2962                 self.seccomp_action.clone(),
2963                 self.exit_evt
2964                     .try_clone()
2965                     .map_err(DeviceManagerError::EventFd)?,
2966                 state_from_id(self.snapshot.as_ref(), id.as_str())
2967                     .map_err(DeviceManagerError::RestoreGetState)?,
2968             )
2969             .map_err(DeviceManagerError::CreateVirtioVsock)?,
2970         ));
2971 
2972         // Fill the device tree with a new node. In case of restore, we
2973         // know there is nothing to do, so we can simply override the
2974         // existing entry.
2975         self.device_tree
2976             .lock()
2977             .unwrap()
2978             .insert(id.clone(), device_node!(id, vsock_device));
2979 
2980         Ok(MetaVirtioDevice {
2981             virtio_device: Arc::clone(&vsock_device)
2982                 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2983             iommu: vsock_cfg.iommu,
2984             id,
2985             pci_segment: vsock_cfg.pci_segment,
2986             dma_handler: None,
2987         })
2988     }
2989 
2990     fn make_virtio_vsock_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
2991         let mut devices = Vec::new();
2992 
2993         let mut vsock = self.config.lock().unwrap().vsock.clone();
2994         if let Some(ref mut vsock_cfg) = &mut vsock {
2995             devices.push(self.make_virtio_vsock_device(vsock_cfg)?);
2996         }
2997         self.config.lock().unwrap().vsock = vsock;
2998 
2999         Ok(devices)
3000     }
3001 
3002     fn make_virtio_mem_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
3003         let mut devices = Vec::new();
3004 
3005         let mm = self.memory_manager.clone();
3006         let mut mm = mm.lock().unwrap();
3007         for (memory_zone_id, memory_zone) in mm.memory_zones_mut().iter_mut() {
3008             if let Some(virtio_mem_zone) = memory_zone.virtio_mem_zone_mut() {
3009                 info!("Creating virtio-mem device: id = {}", memory_zone_id);
3010 
3011                 let node_id = numa_node_id_from_memory_zone_id(&self.numa_nodes, memory_zone_id)
3012                     .map(|i| i as u16);
3013 
3014                 let virtio_mem_device = Arc::new(Mutex::new(
3015                     virtio_devices::Mem::new(
3016                         memory_zone_id.clone(),
3017                         virtio_mem_zone.region(),
3018                         self.seccomp_action.clone(),
3019                         node_id,
3020                         virtio_mem_zone.hotplugged_size(),
3021                         virtio_mem_zone.hugepages(),
3022                         self.exit_evt
3023                             .try_clone()
3024                             .map_err(DeviceManagerError::EventFd)?,
3025                         virtio_mem_zone.blocks_state().clone(),
3026                         state_from_id(self.snapshot.as_ref(), memory_zone_id.as_str())
3027                             .map_err(DeviceManagerError::RestoreGetState)?,
3028                     )
3029                     .map_err(DeviceManagerError::CreateVirtioMem)?,
3030                 ));
3031 
3032                 // Update the virtio-mem zone so that it has a handle onto the
3033                 // virtio-mem device, which will be used for triggering a resize
3034                 // if needed.
3035                 virtio_mem_zone.set_virtio_device(Arc::clone(&virtio_mem_device));
3036 
3037                 self.virtio_mem_devices.push(Arc::clone(&virtio_mem_device));
3038 
3039                 devices.push(MetaVirtioDevice {
3040                     virtio_device: Arc::clone(&virtio_mem_device)
3041                         as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
3042                     iommu: false,
3043                     id: memory_zone_id.clone(),
3044                     pci_segment: 0,
3045                     dma_handler: None,
3046                 });
3047 
3048                 // Fill the device tree with a new node. In case of restore, we
3049                 // know there is nothing to do, so we can simply override the
3050                 // existing entry.
3051                 self.device_tree.lock().unwrap().insert(
3052                     memory_zone_id.clone(),
3053                     device_node!(memory_zone_id, virtio_mem_device),
3054                 );
3055             }
3056         }
3057 
3058         Ok(devices)
3059     }
3060 
3061     #[cfg(feature = "pvmemcontrol")]
3062     fn make_pvmemcontrol_device(
3063         &mut self,
3064     ) -> DeviceManagerResult<(
3065         Arc<PvmemcontrolBusDevice>,
3066         Arc<Mutex<PvmemcontrolPciDevice>>,
3067     )> {
3068         let id = String::from(PVMEMCONTROL_DEVICE_NAME);
3069         let pci_segment_id = 0x0_u16;
3070 
3071         let (pci_segment_id, pci_device_bdf, resources) =
3072             self.pci_resources(&id, pci_segment_id)?;
3073 
3074         info!("Creating pvmemcontrol device: id = {}", id);
3075         let (pvmemcontrol_pci_device, pvmemcontrol_bus_device) =
3076             devices::pvmemcontrol::PvmemcontrolDevice::make_device(
3077                 id.clone(),
3078                 self.memory_manager.lock().unwrap().guest_memory(),
3079             );
3080 
3081         let pvmemcontrol_pci_device = Arc::new(Mutex::new(pvmemcontrol_pci_device));
3082         let pvmemcontrol_bus_device = Arc::new(pvmemcontrol_bus_device);
3083 
3084         let new_resources = self.add_pci_device(
3085             pvmemcontrol_bus_device.clone(),
3086             pvmemcontrol_pci_device.clone(),
3087             pci_segment_id,
3088             pci_device_bdf,
3089             resources,
3090         )?;
3091 
3092         let mut node = device_node!(id, pvmemcontrol_pci_device);
3093 
3094         node.resources = new_resources;
3095         node.pci_bdf = Some(pci_device_bdf);
3096         node.pci_device_handle = None;
3097 
3098         self.device_tree.lock().unwrap().insert(id, node);
3099 
3100         Ok((pvmemcontrol_bus_device, pvmemcontrol_pci_device))
3101     }
3102 
3103     fn make_virtio_balloon_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
3104         let mut devices = Vec::new();
3105 
3106         if let Some(balloon_config) = &self.config.lock().unwrap().balloon {
3107             let id = String::from(BALLOON_DEVICE_NAME);
3108             info!("Creating virtio-balloon device: id = {}", id);
3109 
3110             let virtio_balloon_device = Arc::new(Mutex::new(
3111                 virtio_devices::Balloon::new(
3112                     id.clone(),
3113                     balloon_config.size,
3114                     balloon_config.deflate_on_oom,
3115                     balloon_config.free_page_reporting,
3116                     self.seccomp_action.clone(),
3117                     self.exit_evt
3118                         .try_clone()
3119                         .map_err(DeviceManagerError::EventFd)?,
3120                     state_from_id(self.snapshot.as_ref(), id.as_str())
3121                         .map_err(DeviceManagerError::RestoreGetState)?,
3122                 )
3123                 .map_err(DeviceManagerError::CreateVirtioBalloon)?,
3124             ));
3125 
3126             self.balloon = Some(virtio_balloon_device.clone());
3127 
3128             devices.push(MetaVirtioDevice {
3129                 virtio_device: Arc::clone(&virtio_balloon_device)
3130                     as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
3131                 iommu: false,
3132                 id: id.clone(),
3133                 pci_segment: 0,
3134                 dma_handler: None,
3135             });
3136 
3137             self.device_tree
3138                 .lock()
3139                 .unwrap()
3140                 .insert(id.clone(), device_node!(id, virtio_balloon_device));
3141         }
3142 
3143         Ok(devices)
3144     }
3145 
3146     fn make_virtio_watchdog_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
3147         let mut devices = Vec::new();
3148 
3149         if !self.config.lock().unwrap().watchdog {
3150             return Ok(devices);
3151         }
3152 
3153         let id = String::from(WATCHDOG_DEVICE_NAME);
3154         info!("Creating virtio-watchdog device: id = {}", id);
3155 
3156         let virtio_watchdog_device = Arc::new(Mutex::new(
3157             virtio_devices::Watchdog::new(
3158                 id.clone(),
3159                 self.reset_evt.try_clone().unwrap(),
3160                 self.seccomp_action.clone(),
3161                 self.exit_evt
3162                     .try_clone()
3163                     .map_err(DeviceManagerError::EventFd)?,
3164                 state_from_id(self.snapshot.as_ref(), id.as_str())
3165                     .map_err(DeviceManagerError::RestoreGetState)?,
3166             )
3167             .map_err(DeviceManagerError::CreateVirtioWatchdog)?,
3168         ));
3169         devices.push(MetaVirtioDevice {
3170             virtio_device: Arc::clone(&virtio_watchdog_device)
3171                 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
3172             iommu: false,
3173             id: id.clone(),
3174             pci_segment: 0,
3175             dma_handler: None,
3176         });
3177 
3178         self.device_tree
3179             .lock()
3180             .unwrap()
3181             .insert(id.clone(), device_node!(id, virtio_watchdog_device));
3182 
3183         Ok(devices)
3184     }
3185 
3186     fn make_vdpa_device(
3187         &mut self,
3188         vdpa_cfg: &mut VdpaConfig,
3189     ) -> DeviceManagerResult<MetaVirtioDevice> {
3190         let id = if let Some(id) = &vdpa_cfg.id {
3191             id.clone()
3192         } else {
3193             let id = self.next_device_name(VDPA_DEVICE_NAME_PREFIX)?;
3194             vdpa_cfg.id = Some(id.clone());
3195             id
3196         };
3197 
3198         info!("Creating vDPA device: {:?}", vdpa_cfg);
3199 
3200         let device_path = vdpa_cfg
3201             .path
3202             .to_str()
3203             .ok_or(DeviceManagerError::CreateVdpaConvertPath)?;
3204 
3205         let vdpa_device = Arc::new(Mutex::new(
3206             virtio_devices::Vdpa::new(
3207                 id.clone(),
3208                 device_path,
3209                 self.memory_manager.lock().unwrap().guest_memory(),
3210                 vdpa_cfg.num_queues as u16,
3211                 state_from_id(self.snapshot.as_ref(), id.as_str())
3212                     .map_err(DeviceManagerError::RestoreGetState)?,
3213             )
3214             .map_err(DeviceManagerError::CreateVdpa)?,
3215         ));
3216 
3217         // Create the DMA handler that is required by the vDPA device
3218         let vdpa_mapping = Arc::new(VdpaDmaMapping::new(
3219             Arc::clone(&vdpa_device),
3220             Arc::new(self.memory_manager.lock().unwrap().guest_memory()),
3221         ));
3222 
3223         self.device_tree
3224             .lock()
3225             .unwrap()
3226             .insert(id.clone(), device_node!(id, vdpa_device));
3227 
3228         Ok(MetaVirtioDevice {
3229             virtio_device: vdpa_device as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
3230             iommu: vdpa_cfg.iommu,
3231             id,
3232             pci_segment: vdpa_cfg.pci_segment,
3233             dma_handler: Some(vdpa_mapping),
3234         })
3235     }
3236 
3237     fn make_vdpa_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
3238         let mut devices = Vec::new();
3239         // Add vdpa if required
3240         let mut vdpa_devices = self.config.lock().unwrap().vdpa.clone();
3241         if let Some(vdpa_list_cfg) = &mut vdpa_devices {
3242             for vdpa_cfg in vdpa_list_cfg.iter_mut() {
3243                 devices.push(self.make_vdpa_device(vdpa_cfg)?);
3244             }
3245         }
3246         self.config.lock().unwrap().vdpa = vdpa_devices;
3247 
3248         Ok(devices)
3249     }
3250 
3251     fn next_device_name(&mut self, prefix: &str) -> DeviceManagerResult<String> {
3252         let start_id = self.device_id_cnt;
3253         loop {
3254             // Generate the temporary name.
3255             let name = format!("{}{}", prefix, self.device_id_cnt);
3256             // Increment the counter.
3257             self.device_id_cnt += Wrapping(1);
3258             // Check if the name is already in use.
3259             if !self.boot_id_list.contains(&name)
3260                 && !self.device_tree.lock().unwrap().contains_key(&name)
3261             {
3262                 return Ok(name);
3263             }
3264 
3265             if self.device_id_cnt == start_id {
3266                 // We went through a full loop and there's nothing else we can
3267                 // do.
3268                 break;
3269             }
3270         }
3271         Err(DeviceManagerError::NoAvailableDeviceName)
3272     }
3273 
3274     fn add_passthrough_device(
3275         &mut self,
3276         device_cfg: &mut DeviceConfig,
3277     ) -> DeviceManagerResult<(PciBdf, String)> {
3278         // If the passthrough device has not been created yet, it is created
3279         // here and stored in the DeviceManager structure for future needs.
3280         if self.passthrough_device.is_none() {
3281             self.passthrough_device = Some(
3282                 self.address_manager
3283                     .vm
3284                     .create_passthrough_device()
3285                     .map_err(|e| DeviceManagerError::CreatePassthroughDevice(e.into()))?,
3286             );
3287         }
3288 
3289         self.add_vfio_device(device_cfg)
3290     }
3291 
3292     fn create_vfio_container(&self) -> DeviceManagerResult<Arc<VfioContainer>> {
3293         let passthrough_device = self
3294             .passthrough_device
3295             .as_ref()
3296             .ok_or(DeviceManagerError::NoDevicePassthroughSupport)?;
3297 
3298         let dup = passthrough_device
3299             .try_clone()
3300             .map_err(DeviceManagerError::VfioCreate)?;
3301 
3302         Ok(Arc::new(
3303             VfioContainer::new(Some(Arc::new(dup))).map_err(DeviceManagerError::VfioCreate)?,
3304         ))
3305     }
3306 
3307     fn add_vfio_device(
3308         &mut self,
3309         device_cfg: &mut DeviceConfig,
3310     ) -> DeviceManagerResult<(PciBdf, String)> {
3311         let vfio_name = if let Some(id) = &device_cfg.id {
3312             id.clone()
3313         } else {
3314             let id = self.next_device_name(VFIO_DEVICE_NAME_PREFIX)?;
3315             device_cfg.id = Some(id.clone());
3316             id
3317         };
3318 
3319         let (pci_segment_id, pci_device_bdf, resources) =
3320             self.pci_resources(&vfio_name, device_cfg.pci_segment)?;
3321 
3322         let mut needs_dma_mapping = false;
3323 
3324         // Here we create a new VFIO container for two reasons. Either this is
3325         // the first VFIO device, meaning we need a new VFIO container, which
3326         // will be shared with other VFIO devices. Or the new VFIO device is
3327         // attached to a vIOMMU, meaning we must create a dedicated VFIO
3328         // container. In the vIOMMU use case, we can't let all devices under
3329         // the same VFIO container since we couldn't map/unmap memory for each
3330         // device. That's simply because the map/unmap operations happen at the
3331         // VFIO container level.
3332         let vfio_container = if device_cfg.iommu {
3333             let vfio_container = self.create_vfio_container()?;
3334 
3335             let vfio_mapping = Arc::new(VfioDmaMapping::new(
3336                 Arc::clone(&vfio_container),
3337                 Arc::new(self.memory_manager.lock().unwrap().guest_memory()),
3338                 Arc::clone(&self.mmio_regions),
3339             ));
3340 
3341             if let Some(iommu) = &self.iommu_device {
3342                 iommu
3343                     .lock()
3344                     .unwrap()
3345                     .add_external_mapping(pci_device_bdf.into(), vfio_mapping);
3346             } else {
3347                 return Err(DeviceManagerError::MissingVirtualIommu);
3348             }
3349 
3350             vfio_container
3351         } else if let Some(vfio_container) = &self.vfio_container {
3352             Arc::clone(vfio_container)
3353         } else {
3354             let vfio_container = self.create_vfio_container()?;
3355             needs_dma_mapping = true;
3356             self.vfio_container = Some(Arc::clone(&vfio_container));
3357 
3358             vfio_container
3359         };
3360 
3361         let vfio_device = VfioDevice::new(&device_cfg.path, Arc::clone(&vfio_container))
3362             .map_err(DeviceManagerError::VfioCreate)?;
3363 
3364         if needs_dma_mapping {
3365             // Register DMA mapping in IOMMU.
3366             // Do not register virtio-mem regions, as they are handled directly by
3367             // virtio-mem device itself.
3368             for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() {
3369                 for region in zone.regions() {
3370                     vfio_container
3371                         .vfio_dma_map(
3372                             region.start_addr().raw_value(),
3373                             region.len(),
3374                             region.as_ptr() as u64,
3375                         )
3376                         .map_err(DeviceManagerError::VfioDmaMap)?;
3377                 }
3378             }
3379 
3380             let vfio_mapping = Arc::new(VfioDmaMapping::new(
3381                 Arc::clone(&vfio_container),
3382                 Arc::new(self.memory_manager.lock().unwrap().guest_memory()),
3383                 Arc::clone(&self.mmio_regions),
3384             ));
3385 
3386             for virtio_mem_device in self.virtio_mem_devices.iter() {
3387                 virtio_mem_device
3388                     .lock()
3389                     .unwrap()
3390                     .add_dma_mapping_handler(
3391                         VirtioMemMappingSource::Container,
3392                         vfio_mapping.clone(),
3393                     )
3394                     .map_err(DeviceManagerError::AddDmaMappingHandlerVirtioMem)?;
3395             }
3396         }
3397 
3398         let legacy_interrupt_group =
3399             if let Some(legacy_interrupt_manager) = &self.legacy_interrupt_manager {
3400                 Some(
3401                     legacy_interrupt_manager
3402                         .create_group(LegacyIrqGroupConfig {
3403                             irq: self.pci_segments[pci_segment_id as usize].pci_irq_slots
3404                                 [pci_device_bdf.device() as usize]
3405                                 as InterruptIndex,
3406                         })
3407                         .map_err(DeviceManagerError::CreateInterruptGroup)?,
3408                 )
3409             } else {
3410                 None
3411             };
3412 
3413         let memory_manager = self.memory_manager.clone();
3414 
3415         let vfio_pci_device = VfioPciDevice::new(
3416             vfio_name.clone(),
3417             &self.address_manager.vm,
3418             vfio_device,
3419             vfio_container,
3420             self.msi_interrupt_manager.clone(),
3421             legacy_interrupt_group,
3422             device_cfg.iommu,
3423             pci_device_bdf,
3424             memory_manager.lock().unwrap().memory_slot_allocator(),
3425             vm_migration::snapshot_from_id(self.snapshot.as_ref(), vfio_name.as_str()),
3426             device_cfg.x_nv_gpudirect_clique,
3427         )
3428         .map_err(DeviceManagerError::VfioPciCreate)?;
3429 
3430         let vfio_pci_device = Arc::new(Mutex::new(vfio_pci_device));
3431 
3432         let new_resources = self.add_pci_device(
3433             vfio_pci_device.clone(),
3434             vfio_pci_device.clone(),
3435             pci_segment_id,
3436             pci_device_bdf,
3437             resources,
3438         )?;
3439 
3440         vfio_pci_device
3441             .lock()
3442             .unwrap()
3443             .map_mmio_regions()
3444             .map_err(DeviceManagerError::VfioMapRegion)?;
3445 
3446         for mmio_region in vfio_pci_device.lock().unwrap().mmio_regions() {
3447             self.mmio_regions.lock().unwrap().push(mmio_region);
3448         }
3449 
3450         let mut node = device_node!(vfio_name, vfio_pci_device);
3451 
3452         // Update the device tree with correct resource information.
3453         node.resources = new_resources;
3454         node.pci_bdf = Some(pci_device_bdf);
3455         node.pci_device_handle = Some(PciDeviceHandle::Vfio(vfio_pci_device));
3456 
3457         self.device_tree
3458             .lock()
3459             .unwrap()
3460             .insert(vfio_name.clone(), node);
3461 
3462         Ok((pci_device_bdf, vfio_name))
3463     }
3464 
3465     fn add_pci_device(
3466         &mut self,
3467         bus_device: Arc<dyn BusDeviceSync>,
3468         pci_device: Arc<Mutex<dyn PciDevice>>,
3469         segment_id: u16,
3470         bdf: PciBdf,
3471         resources: Option<Vec<Resource>>,
3472     ) -> DeviceManagerResult<Vec<Resource>> {
3473         let bars = pci_device
3474             .lock()
3475             .unwrap()
3476             .allocate_bars(
3477                 &self.address_manager.allocator,
3478                 &mut self.pci_segments[segment_id as usize]
3479                     .mem32_allocator
3480                     .lock()
3481                     .unwrap(),
3482                 &mut self.pci_segments[segment_id as usize]
3483                     .mem64_allocator
3484                     .lock()
3485                     .unwrap(),
3486                 resources,
3487             )
3488             .map_err(DeviceManagerError::AllocateBars)?;
3489 
3490         let mut pci_bus = self.pci_segments[segment_id as usize]
3491             .pci_bus
3492             .lock()
3493             .unwrap();
3494 
3495         pci_bus
3496             .add_device(bdf.device() as u32, pci_device)
3497             .map_err(DeviceManagerError::AddPciDevice)?;
3498 
3499         self.bus_devices.push(Arc::clone(&bus_device));
3500 
3501         pci_bus
3502             .register_mapping(
3503                 bus_device,
3504                 self.address_manager.io_bus.as_ref(),
3505                 self.address_manager.mmio_bus.as_ref(),
3506                 bars.clone(),
3507             )
3508             .map_err(DeviceManagerError::AddPciDevice)?;
3509 
3510         let mut new_resources = Vec::new();
3511         for bar in bars {
3512             new_resources.push(Resource::PciBar {
3513                 index: bar.idx(),
3514                 base: bar.addr(),
3515                 size: bar.size(),
3516                 type_: bar.region_type().into(),
3517                 prefetchable: bar.prefetchable().into(),
3518             });
3519         }
3520 
3521         Ok(new_resources)
3522     }
3523 
3524     fn add_vfio_devices(&mut self) -> DeviceManagerResult<Vec<PciBdf>> {
3525         let mut iommu_attached_device_ids = Vec::new();
3526         let mut devices = self.config.lock().unwrap().devices.clone();
3527 
3528         if let Some(device_list_cfg) = &mut devices {
3529             for device_cfg in device_list_cfg.iter_mut() {
3530                 let (device_id, _) = self.add_passthrough_device(device_cfg)?;
3531                 if device_cfg.iommu && self.iommu_device.is_some() {
3532                     iommu_attached_device_ids.push(device_id);
3533                 }
3534             }
3535         }
3536 
3537         // Update the list of devices
3538         self.config.lock().unwrap().devices = devices;
3539 
3540         Ok(iommu_attached_device_ids)
3541     }
3542 
3543     fn add_vfio_user_device(
3544         &mut self,
3545         device_cfg: &mut UserDeviceConfig,
3546     ) -> DeviceManagerResult<(PciBdf, String)> {
3547         let vfio_user_name = if let Some(id) = &device_cfg.id {
3548             id.clone()
3549         } else {
3550             let id = self.next_device_name(VFIO_USER_DEVICE_NAME_PREFIX)?;
3551             device_cfg.id = Some(id.clone());
3552             id
3553         };
3554 
3555         let (pci_segment_id, pci_device_bdf, resources) =
3556             self.pci_resources(&vfio_user_name, device_cfg.pci_segment)?;
3557 
3558         let legacy_interrupt_group =
3559             if let Some(legacy_interrupt_manager) = &self.legacy_interrupt_manager {
3560                 Some(
3561                     legacy_interrupt_manager
3562                         .create_group(LegacyIrqGroupConfig {
3563                             irq: self.pci_segments[pci_segment_id as usize].pci_irq_slots
3564                                 [pci_device_bdf.device() as usize]
3565                                 as InterruptIndex,
3566                         })
3567                         .map_err(DeviceManagerError::CreateInterruptGroup)?,
3568                 )
3569             } else {
3570                 None
3571             };
3572 
3573         let client = Arc::new(Mutex::new(
3574             vfio_user::Client::new(&device_cfg.socket)
3575                 .map_err(DeviceManagerError::VfioUserCreateClient)?,
3576         ));
3577 
3578         let memory_manager = self.memory_manager.clone();
3579 
3580         let mut vfio_user_pci_device = VfioUserPciDevice::new(
3581             vfio_user_name.clone(),
3582             &self.address_manager.vm,
3583             client.clone(),
3584             self.msi_interrupt_manager.clone(),
3585             legacy_interrupt_group,
3586             pci_device_bdf,
3587             memory_manager.lock().unwrap().memory_slot_allocator(),
3588             vm_migration::snapshot_from_id(self.snapshot.as_ref(), vfio_user_name.as_str()),
3589         )
3590         .map_err(DeviceManagerError::VfioUserCreate)?;
3591 
3592         let memory = self.memory_manager.lock().unwrap().guest_memory();
3593         let vfio_user_mapping = Arc::new(VfioUserDmaMapping::new(client, Arc::new(memory)));
3594         for virtio_mem_device in self.virtio_mem_devices.iter() {
3595             virtio_mem_device
3596                 .lock()
3597                 .unwrap()
3598                 .add_dma_mapping_handler(
3599                     VirtioMemMappingSource::Device(pci_device_bdf.into()),
3600                     vfio_user_mapping.clone(),
3601                 )
3602                 .map_err(DeviceManagerError::AddDmaMappingHandlerVirtioMem)?;
3603         }
3604 
3605         for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() {
3606             for region in zone.regions() {
3607                 vfio_user_pci_device
3608                     .dma_map(region)
3609                     .map_err(DeviceManagerError::VfioUserDmaMap)?;
3610             }
3611         }
3612 
3613         let vfio_user_pci_device = Arc::new(Mutex::new(vfio_user_pci_device));
3614 
3615         let new_resources = self.add_pci_device(
3616             vfio_user_pci_device.clone(),
3617             vfio_user_pci_device.clone(),
3618             pci_segment_id,
3619             pci_device_bdf,
3620             resources,
3621         )?;
3622 
3623         // Note it is required to call 'add_pci_device()' in advance to have the list of
3624         // mmio regions provisioned correctly
3625         vfio_user_pci_device
3626             .lock()
3627             .unwrap()
3628             .map_mmio_regions()
3629             .map_err(DeviceManagerError::VfioUserMapRegion)?;
3630 
3631         let mut node = device_node!(vfio_user_name, vfio_user_pci_device);
3632 
3633         // Update the device tree with correct resource information.
3634         node.resources = new_resources;
3635         node.pci_bdf = Some(pci_device_bdf);
3636         node.pci_device_handle = Some(PciDeviceHandle::VfioUser(vfio_user_pci_device));
3637 
3638         self.device_tree
3639             .lock()
3640             .unwrap()
3641             .insert(vfio_user_name.clone(), node);
3642 
3643         Ok((pci_device_bdf, vfio_user_name))
3644     }
3645 
3646     fn add_user_devices(&mut self) -> DeviceManagerResult<Vec<PciBdf>> {
3647         let mut user_devices = self.config.lock().unwrap().user_devices.clone();
3648 
3649         if let Some(device_list_cfg) = &mut user_devices {
3650             for device_cfg in device_list_cfg.iter_mut() {
3651                 let (_device_id, _id) = self.add_vfio_user_device(device_cfg)?;
3652             }
3653         }
3654 
3655         // Update the list of devices
3656         self.config.lock().unwrap().user_devices = user_devices;
3657 
3658         Ok(vec![])
3659     }
3660 
3661     fn add_virtio_pci_device(
3662         &mut self,
3663         virtio_device: Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
3664         iommu_mapping: &Option<Arc<IommuMapping>>,
3665         virtio_device_id: String,
3666         pci_segment_id: u16,
3667         dma_handler: Option<Arc<dyn ExternalDmaMapping>>,
3668     ) -> DeviceManagerResult<PciBdf> {
3669         let id = format!("{VIRTIO_PCI_DEVICE_NAME_PREFIX}-{virtio_device_id}");
3670 
3671         // Add the new virtio-pci node to the device tree.
3672         let mut node = device_node!(id);
3673         node.children = vec![virtio_device_id.clone()];
3674 
3675         let (pci_segment_id, pci_device_bdf, resources) =
3676             self.pci_resources(&id, pci_segment_id)?;
3677 
3678         // Update the existing virtio node by setting the parent.
3679         if let Some(node) = self.device_tree.lock().unwrap().get_mut(&virtio_device_id) {
3680             node.parent = Some(id.clone());
3681         } else {
3682             return Err(DeviceManagerError::MissingNode);
3683         }
3684 
3685         // Allows support for one MSI-X vector per queue. It also adds 1
3686         // as we need to take into account the dedicated vector to notify
3687         // about a virtio config change.
3688         let msix_num = (virtio_device.lock().unwrap().queue_max_sizes().len() + 1) as u16;
3689 
3690         // Create the AccessPlatform trait from the implementation IommuMapping.
3691         // This will provide address translation for any virtio device sitting
3692         // behind a vIOMMU.
3693         let mut access_platform: Option<Arc<dyn AccessPlatform>> = None;
3694 
3695         if let Some(mapping) = iommu_mapping {
3696             access_platform = Some(Arc::new(AccessPlatformMapping::new(
3697                 pci_device_bdf.into(),
3698                 mapping.clone(),
3699             )));
3700         }
3701 
3702         // If SEV-SNP is enabled create the AccessPlatform from SevSnpPageAccessProxy
3703         #[cfg(feature = "sev_snp")]
3704         if self.config.lock().unwrap().is_sev_snp_enabled() {
3705             access_platform = Some(Arc::new(SevSnpPageAccessProxy::new(
3706                 self.address_manager.vm.clone(),
3707             )));
3708         }
3709 
3710         let memory = self.memory_manager.lock().unwrap().guest_memory();
3711 
3712         // Map DMA ranges if a DMA handler is available and if the device is
3713         // not attached to a virtual IOMMU.
3714         if let Some(dma_handler) = &dma_handler {
3715             if iommu_mapping.is_some() {
3716                 if let Some(iommu) = &self.iommu_device {
3717                     iommu
3718                         .lock()
3719                         .unwrap()
3720                         .add_external_mapping(pci_device_bdf.into(), dma_handler.clone());
3721                 } else {
3722                     return Err(DeviceManagerError::MissingVirtualIommu);
3723                 }
3724             } else {
3725                 // Let every virtio-mem device handle the DMA map/unmap through the
3726                 // DMA handler provided.
3727                 for virtio_mem_device in self.virtio_mem_devices.iter() {
3728                     virtio_mem_device
3729                         .lock()
3730                         .unwrap()
3731                         .add_dma_mapping_handler(
3732                             VirtioMemMappingSource::Device(pci_device_bdf.into()),
3733                             dma_handler.clone(),
3734                         )
3735                         .map_err(DeviceManagerError::AddDmaMappingHandlerVirtioMem)?;
3736                 }
3737 
3738                 // Do not register virtio-mem regions, as they are handled directly by
3739                 // virtio-mem devices.
3740                 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() {
3741                     for region in zone.regions() {
3742                         let gpa = region.start_addr().0;
3743                         let size = region.len();
3744                         dma_handler
3745                             .map(gpa, gpa, size)
3746                             .map_err(DeviceManagerError::VirtioDmaMap)?;
3747                     }
3748                 }
3749             }
3750         }
3751 
3752         let device_type = virtio_device.lock().unwrap().device_type();
3753         let virtio_pci_device = Arc::new(Mutex::new(
3754             VirtioPciDevice::new(
3755                 id.clone(),
3756                 memory,
3757                 virtio_device,
3758                 msix_num,
3759                 access_platform,
3760                 &self.msi_interrupt_manager,
3761                 pci_device_bdf.into(),
3762                 self.activate_evt
3763                     .try_clone()
3764                     .map_err(DeviceManagerError::EventFd)?,
3765                 // All device types *except* virtio block devices should be allocated a 64-bit bar
3766                 // The block devices should be given a 32-bit BAR so that they are easily accessible
3767                 // to firmware without requiring excessive identity mapping.
3768                 // The exception being if not on the default PCI segment.
3769                 pci_segment_id > 0 || device_type != VirtioDeviceType::Block as u32,
3770                 dma_handler,
3771                 self.pending_activations.clone(),
3772                 vm_migration::snapshot_from_id(self.snapshot.as_ref(), id.as_str()),
3773             )
3774             .map_err(DeviceManagerError::VirtioDevice)?,
3775         ));
3776 
3777         let new_resources = self.add_pci_device(
3778             virtio_pci_device.clone(),
3779             virtio_pci_device.clone(),
3780             pci_segment_id,
3781             pci_device_bdf,
3782             resources,
3783         )?;
3784 
3785         let bar_addr = virtio_pci_device.lock().unwrap().config_bar_addr();
3786         for (event, addr) in virtio_pci_device.lock().unwrap().ioeventfds(bar_addr) {
3787             let io_addr = IoEventAddress::Mmio(addr);
3788             self.address_manager
3789                 .vm
3790                 .register_ioevent(event, &io_addr, None)
3791                 .map_err(|e| DeviceManagerError::RegisterIoevent(e.into()))?;
3792         }
3793 
3794         // Update the device tree with correct resource information.
3795         node.resources = new_resources;
3796         node.migratable = Some(Arc::clone(&virtio_pci_device) as Arc<Mutex<dyn Migratable>>);
3797         node.pci_bdf = Some(pci_device_bdf);
3798         node.pci_device_handle = Some(PciDeviceHandle::Virtio(virtio_pci_device));
3799         self.device_tree.lock().unwrap().insert(id, node);
3800 
3801         Ok(pci_device_bdf)
3802     }
3803 
3804     fn add_pvpanic_device(
3805         &mut self,
3806     ) -> DeviceManagerResult<Option<Arc<Mutex<devices::PvPanicDevice>>>> {
3807         let id = String::from(PVPANIC_DEVICE_NAME);
3808         let pci_segment_id = 0x0_u16;
3809 
3810         info!("Creating pvpanic device {}", id);
3811 
3812         let (pci_segment_id, pci_device_bdf, resources) =
3813             self.pci_resources(&id, pci_segment_id)?;
3814 
3815         let snapshot = snapshot_from_id(self.snapshot.as_ref(), id.as_str());
3816 
3817         let pvpanic_device = devices::PvPanicDevice::new(id.clone(), snapshot)
3818             .map_err(DeviceManagerError::PvPanicCreate)?;
3819 
3820         let pvpanic_device = Arc::new(Mutex::new(pvpanic_device));
3821 
3822         let new_resources = self.add_pci_device(
3823             pvpanic_device.clone(),
3824             pvpanic_device.clone(),
3825             pci_segment_id,
3826             pci_device_bdf,
3827             resources,
3828         )?;
3829 
3830         let mut node = device_node!(id, pvpanic_device);
3831 
3832         node.resources = new_resources;
3833         node.pci_bdf = Some(pci_device_bdf);
3834         node.pci_device_handle = None;
3835 
3836         self.device_tree.lock().unwrap().insert(id, node);
3837 
3838         Ok(Some(pvpanic_device))
3839     }
3840 
3841     fn pci_resources(
3842         &self,
3843         id: &str,
3844         pci_segment_id: u16,
3845     ) -> DeviceManagerResult<(u16, PciBdf, Option<Vec<Resource>>)> {
3846         // Look for the id in the device tree. If it can be found, that means
3847         // the device is being restored, otherwise it's created from scratch.
3848         let (pci_device_bdf, resources) =
3849             if let Some(node) = self.device_tree.lock().unwrap().get(id) {
3850                 info!("Restoring virtio-pci {} resources", id);
3851                 let pci_device_bdf: PciBdf = node
3852                     .pci_bdf
3853                     .ok_or(DeviceManagerError::MissingDeviceNodePciBdf)?;
3854                 (Some(pci_device_bdf), Some(node.resources.clone()))
3855             } else {
3856                 (None, None)
3857             };
3858 
3859         Ok(if let Some(pci_device_bdf) = pci_device_bdf {
3860             let pci_segment_id = pci_device_bdf.segment();
3861 
3862             self.pci_segments[pci_segment_id as usize]
3863                 .pci_bus
3864                 .lock()
3865                 .unwrap()
3866                 .get_device_id(pci_device_bdf.device() as usize)
3867                 .map_err(DeviceManagerError::GetPciDeviceId)?;
3868 
3869             (pci_segment_id, pci_device_bdf, resources)
3870         } else {
3871             let pci_device_bdf = self.pci_segments[pci_segment_id as usize].next_device_bdf()?;
3872 
3873             (pci_segment_id, pci_device_bdf, None)
3874         })
3875     }
3876 
3877     #[cfg(target_arch = "x86_64")]
3878     pub fn io_bus(&self) -> &Arc<Bus> {
3879         &self.address_manager.io_bus
3880     }
3881 
3882     pub fn mmio_bus(&self) -> &Arc<Bus> {
3883         &self.address_manager.mmio_bus
3884     }
3885 
3886     pub fn allocator(&self) -> &Arc<Mutex<SystemAllocator>> {
3887         &self.address_manager.allocator
3888     }
3889 
3890     pub fn interrupt_controller(&self) -> Option<Arc<Mutex<dyn InterruptController>>> {
3891         self.interrupt_controller
3892             .as_ref()
3893             .map(|ic| ic.clone() as Arc<Mutex<dyn InterruptController>>)
3894     }
3895 
3896     pub(crate) fn pci_segments(&self) -> &Vec<PciSegment> {
3897         &self.pci_segments
3898     }
3899 
3900     #[cfg(target_arch = "aarch64")]
3901     pub fn cmdline_additions(&self) -> &[String] {
3902         self.cmdline_additions.as_slice()
3903     }
3904 
3905     pub fn update_memory(&self, new_region: &Arc<GuestRegionMmap>) -> DeviceManagerResult<()> {
3906         for handle in self.virtio_devices.iter() {
3907             handle
3908                 .virtio_device
3909                 .lock()
3910                 .unwrap()
3911                 .add_memory_region(new_region)
3912                 .map_err(DeviceManagerError::UpdateMemoryForVirtioDevice)?;
3913 
3914             if let Some(dma_handler) = &handle.dma_handler {
3915                 if !handle.iommu {
3916                     let gpa = new_region.start_addr().0;
3917                     let size = new_region.len();
3918                     dma_handler
3919                         .map(gpa, gpa, size)
3920                         .map_err(DeviceManagerError::VirtioDmaMap)?;
3921                 }
3922             }
3923         }
3924 
3925         // Take care of updating the memory for VFIO PCI devices.
3926         if let Some(vfio_container) = &self.vfio_container {
3927             vfio_container
3928                 .vfio_dma_map(
3929                     new_region.start_addr().raw_value(),
3930                     new_region.len(),
3931                     new_region.as_ptr() as u64,
3932                 )
3933                 .map_err(DeviceManagerError::UpdateMemoryForVfioPciDevice)?;
3934         }
3935 
3936         // Take care of updating the memory for vfio-user devices.
3937         {
3938             let device_tree = self.device_tree.lock().unwrap();
3939             for pci_device_node in device_tree.pci_devices() {
3940                 if let PciDeviceHandle::VfioUser(vfio_user_pci_device) = pci_device_node
3941                     .pci_device_handle
3942                     .as_ref()
3943                     .ok_or(DeviceManagerError::MissingPciDevice)?
3944                 {
3945                     vfio_user_pci_device
3946                         .lock()
3947                         .unwrap()
3948                         .dma_map(new_region)
3949                         .map_err(DeviceManagerError::UpdateMemoryForVfioUserPciDevice)?;
3950                 }
3951             }
3952         }
3953 
3954         Ok(())
3955     }
3956 
3957     pub fn activate_virtio_devices(&self) -> DeviceManagerResult<()> {
3958         for mut activator in self.pending_activations.lock().unwrap().drain(..) {
3959             activator
3960                 .activate()
3961                 .map_err(DeviceManagerError::VirtioActivate)?;
3962         }
3963         Ok(())
3964     }
3965 
3966     pub fn notify_hotplug(
3967         &self,
3968         _notification_type: AcpiNotificationFlags,
3969     ) -> DeviceManagerResult<()> {
3970         return self
3971             .ged_notification_device
3972             .as_ref()
3973             .unwrap()
3974             .lock()
3975             .unwrap()
3976             .notify(_notification_type)
3977             .map_err(DeviceManagerError::HotPlugNotification);
3978     }
3979 
3980     pub fn add_device(
3981         &mut self,
3982         device_cfg: &mut DeviceConfig,
3983     ) -> DeviceManagerResult<PciDeviceInfo> {
3984         self.validate_identifier(&device_cfg.id)?;
3985 
3986         if device_cfg.iommu && !self.is_iommu_segment(device_cfg.pci_segment) {
3987             return Err(DeviceManagerError::InvalidIommuHotplug);
3988         }
3989 
3990         let (bdf, device_name) = self.add_passthrough_device(device_cfg)?;
3991 
3992         // Update the PCIU bitmap
3993         self.pci_segments[device_cfg.pci_segment as usize].pci_devices_up |= 1 << bdf.device();
3994 
3995         Ok(PciDeviceInfo {
3996             id: device_name,
3997             bdf,
3998         })
3999     }
4000 
4001     pub fn add_user_device(
4002         &mut self,
4003         device_cfg: &mut UserDeviceConfig,
4004     ) -> DeviceManagerResult<PciDeviceInfo> {
4005         self.validate_identifier(&device_cfg.id)?;
4006 
4007         let (bdf, device_name) = self.add_vfio_user_device(device_cfg)?;
4008 
4009         // Update the PCIU bitmap
4010         self.pci_segments[device_cfg.pci_segment as usize].pci_devices_up |= 1 << bdf.device();
4011 
4012         Ok(PciDeviceInfo {
4013             id: device_name,
4014             bdf,
4015         })
4016     }
4017 
4018     pub fn remove_device(&mut self, id: String) -> DeviceManagerResult<()> {
4019         // The node can be directly a PCI node in case the 'id' refers to a
4020         // VFIO device or a virtio-pci one.
4021         // In case the 'id' refers to a virtio device, we must find the PCI
4022         // node by looking at the parent.
4023         let device_tree = self.device_tree.lock().unwrap();
4024         let node = device_tree
4025             .get(&id)
4026             .ok_or(DeviceManagerError::UnknownDeviceId(id))?;
4027 
4028         let pci_device_node = if node.pci_bdf.is_some() && node.pci_device_handle.is_some() {
4029             node
4030         } else {
4031             let parent = node
4032                 .parent
4033                 .as_ref()
4034                 .ok_or(DeviceManagerError::MissingNode)?;
4035             device_tree
4036                 .get(parent)
4037                 .ok_or(DeviceManagerError::MissingNode)?
4038         };
4039 
4040         let pci_device_bdf: PciBdf = pci_device_node
4041             .pci_bdf
4042             .ok_or(DeviceManagerError::MissingDeviceNodePciBdf)?;
4043         let pci_segment_id = pci_device_bdf.segment();
4044 
4045         let pci_device_handle = pci_device_node
4046             .pci_device_handle
4047             .as_ref()
4048             .ok_or(DeviceManagerError::MissingPciDevice)?;
4049         #[allow(irrefutable_let_patterns)]
4050         if let PciDeviceHandle::Virtio(virtio_pci_device) = pci_device_handle {
4051             let device_type = VirtioDeviceType::from(
4052                 virtio_pci_device
4053                     .lock()
4054                     .unwrap()
4055                     .virtio_device()
4056                     .lock()
4057                     .unwrap()
4058                     .device_type(),
4059             );
4060             match device_type {
4061                 VirtioDeviceType::Net
4062                 | VirtioDeviceType::Block
4063                 | VirtioDeviceType::Pmem
4064                 | VirtioDeviceType::Fs
4065                 | VirtioDeviceType::Vsock => {}
4066                 _ => return Err(DeviceManagerError::RemovalNotAllowed(device_type)),
4067             }
4068         }
4069 
4070         // Update the PCID bitmap
4071         self.pci_segments[pci_segment_id as usize].pci_devices_down |= 1 << pci_device_bdf.device();
4072 
4073         Ok(())
4074     }
4075 
4076     pub fn eject_device(&mut self, pci_segment_id: u16, device_id: u8) -> DeviceManagerResult<()> {
4077         info!(
4078             "Ejecting device_id = {} on segment_id={}",
4079             device_id, pci_segment_id
4080         );
4081 
4082         // Convert the device ID into the corresponding b/d/f.
4083         let pci_device_bdf = PciBdf::new(pci_segment_id, 0, device_id, 0);
4084 
4085         // Give the PCI device ID back to the PCI bus.
4086         self.pci_segments[pci_segment_id as usize]
4087             .pci_bus
4088             .lock()
4089             .unwrap()
4090             .put_device_id(device_id as usize)
4091             .map_err(DeviceManagerError::PutPciDeviceId)?;
4092 
4093         let (pci_device_handle, id) = {
4094             // Remove the device from the device tree along with its children.
4095             let mut device_tree = self.device_tree.lock().unwrap();
4096             let pci_device_node = device_tree
4097                 .remove_node_by_pci_bdf(pci_device_bdf)
4098                 .ok_or(DeviceManagerError::MissingPciDevice)?;
4099 
4100             // For VFIO and vfio-user the PCI device id is the id.
4101             // For virtio we overwrite it later as we want the id of the
4102             // underlying device.
4103             let mut id = pci_device_node.id;
4104             let pci_device_handle = pci_device_node
4105                 .pci_device_handle
4106                 .ok_or(DeviceManagerError::MissingPciDevice)?;
4107             if matches!(pci_device_handle, PciDeviceHandle::Virtio(_)) {
4108                 // The virtio-pci device has a single child
4109                 if !pci_device_node.children.is_empty() {
4110                     assert_eq!(pci_device_node.children.len(), 1);
4111                     let child_id = &pci_device_node.children[0];
4112                     id.clone_from(child_id);
4113                 }
4114             }
4115             for child in pci_device_node.children.iter() {
4116                 device_tree.remove(child);
4117             }
4118 
4119             (pci_device_handle, id)
4120         };
4121 
4122         let mut iommu_attached = false;
4123         if let Some((_, iommu_attached_devices)) = &self.iommu_attached_devices {
4124             if iommu_attached_devices.contains(&pci_device_bdf) {
4125                 iommu_attached = true;
4126             }
4127         }
4128 
4129         let (pci_device, bus_device, virtio_device, remove_dma_handler) = match pci_device_handle {
4130             // No need to remove any virtio-mem mapping here as the container outlives all devices
4131             PciDeviceHandle::Vfio(vfio_pci_device) => {
4132                 for mmio_region in vfio_pci_device.lock().unwrap().mmio_regions() {
4133                     self.mmio_regions
4134                         .lock()
4135                         .unwrap()
4136                         .retain(|x| x.start != mmio_region.start)
4137                 }
4138 
4139                 (
4140                     Arc::clone(&vfio_pci_device) as Arc<Mutex<dyn PciDevice>>,
4141                     Arc::clone(&vfio_pci_device) as Arc<dyn BusDeviceSync>,
4142                     None as Option<Arc<Mutex<dyn virtio_devices::VirtioDevice>>>,
4143                     false,
4144                 )
4145             }
4146             PciDeviceHandle::Virtio(virtio_pci_device) => {
4147                 let dev = virtio_pci_device.lock().unwrap();
4148                 let bar_addr = dev.config_bar_addr();
4149                 for (event, addr) in dev.ioeventfds(bar_addr) {
4150                     let io_addr = IoEventAddress::Mmio(addr);
4151                     self.address_manager
4152                         .vm
4153                         .unregister_ioevent(event, &io_addr)
4154                         .map_err(|e| DeviceManagerError::UnRegisterIoevent(e.into()))?;
4155                 }
4156 
4157                 if let Some(dma_handler) = dev.dma_handler() {
4158                     if !iommu_attached {
4159                         for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() {
4160                             for region in zone.regions() {
4161                                 let iova = region.start_addr().0;
4162                                 let size = region.len();
4163                                 dma_handler
4164                                     .unmap(iova, size)
4165                                     .map_err(DeviceManagerError::VirtioDmaUnmap)?;
4166                             }
4167                         }
4168                     }
4169                 }
4170 
4171                 (
4172                     Arc::clone(&virtio_pci_device) as Arc<Mutex<dyn PciDevice>>,
4173                     Arc::clone(&virtio_pci_device) as Arc<dyn BusDeviceSync>,
4174                     Some(dev.virtio_device()),
4175                     dev.dma_handler().is_some() && !iommu_attached,
4176                 )
4177             }
4178             PciDeviceHandle::VfioUser(vfio_user_pci_device) => {
4179                 let mut dev = vfio_user_pci_device.lock().unwrap();
4180                 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() {
4181                     for region in zone.regions() {
4182                         dev.dma_unmap(region)
4183                             .map_err(DeviceManagerError::VfioUserDmaUnmap)?;
4184                     }
4185                 }
4186 
4187                 (
4188                     Arc::clone(&vfio_user_pci_device) as Arc<Mutex<dyn PciDevice>>,
4189                     Arc::clone(&vfio_user_pci_device) as Arc<dyn BusDeviceSync>,
4190                     None as Option<Arc<Mutex<dyn virtio_devices::VirtioDevice>>>,
4191                     true,
4192                 )
4193             }
4194         };
4195 
4196         if remove_dma_handler {
4197             for virtio_mem_device in self.virtio_mem_devices.iter() {
4198                 virtio_mem_device
4199                     .lock()
4200                     .unwrap()
4201                     .remove_dma_mapping_handler(VirtioMemMappingSource::Device(
4202                         pci_device_bdf.into(),
4203                     ))
4204                     .map_err(DeviceManagerError::RemoveDmaMappingHandlerVirtioMem)?;
4205             }
4206         }
4207 
4208         // Free the allocated BARs
4209         pci_device
4210             .lock()
4211             .unwrap()
4212             .free_bars(
4213                 &mut self.address_manager.allocator.lock().unwrap(),
4214                 &mut self.pci_segments[pci_segment_id as usize]
4215                     .mem32_allocator
4216                     .lock()
4217                     .unwrap(),
4218                 &mut self.pci_segments[pci_segment_id as usize]
4219                     .mem64_allocator
4220                     .lock()
4221                     .unwrap(),
4222             )
4223             .map_err(DeviceManagerError::FreePciBars)?;
4224 
4225         // Remove the device from the PCI bus
4226         self.pci_segments[pci_segment_id as usize]
4227             .pci_bus
4228             .lock()
4229             .unwrap()
4230             .remove_by_device(&pci_device)
4231             .map_err(DeviceManagerError::RemoveDeviceFromPciBus)?;
4232 
4233         #[cfg(target_arch = "x86_64")]
4234         // Remove the device from the IO bus
4235         self.io_bus()
4236             .remove_by_device(&bus_device)
4237             .map_err(DeviceManagerError::RemoveDeviceFromIoBus)?;
4238 
4239         // Remove the device from the MMIO bus
4240         self.mmio_bus()
4241             .remove_by_device(&bus_device)
4242             .map_err(DeviceManagerError::RemoveDeviceFromMmioBus)?;
4243 
4244         // Remove the device from the list of BusDevice held by the
4245         // DeviceManager.
4246         self.bus_devices
4247             .retain(|dev| !Arc::ptr_eq(dev, &bus_device));
4248 
4249         // Shutdown and remove the underlying virtio-device if present
4250         if let Some(virtio_device) = virtio_device {
4251             for mapping in virtio_device.lock().unwrap().userspace_mappings() {
4252                 self.memory_manager
4253                     .lock()
4254                     .unwrap()
4255                     .remove_userspace_mapping(
4256                         mapping.addr.raw_value(),
4257                         mapping.len,
4258                         mapping.host_addr,
4259                         mapping.mergeable,
4260                         mapping.mem_slot,
4261                     )
4262                     .map_err(DeviceManagerError::MemoryManager)?;
4263             }
4264 
4265             virtio_device.lock().unwrap().shutdown();
4266 
4267             self.virtio_devices
4268                 .retain(|handler| !Arc::ptr_eq(&handler.virtio_device, &virtio_device));
4269         }
4270 
4271         event!(
4272             "vm",
4273             "device-removed",
4274             "id",
4275             &id,
4276             "bdf",
4277             pci_device_bdf.to_string()
4278         );
4279 
4280         // At this point, the device has been removed from all the list and
4281         // buses where it was stored. At the end of this function, after
4282         // any_device, bus_device and pci_device are released, the actual
4283         // device will be dropped.
4284         Ok(())
4285     }
4286 
4287     fn hotplug_virtio_pci_device(
4288         &mut self,
4289         handle: MetaVirtioDevice,
4290     ) -> DeviceManagerResult<PciDeviceInfo> {
4291         // Add the virtio device to the device manager list. This is important
4292         // as the list is used to notify virtio devices about memory updates
4293         // for instance.
4294         self.virtio_devices.push(handle.clone());
4295 
4296         let mapping: Option<Arc<IommuMapping>> = if handle.iommu {
4297             self.iommu_mapping.clone()
4298         } else {
4299             None
4300         };
4301 
4302         let bdf = self.add_virtio_pci_device(
4303             handle.virtio_device,
4304             &mapping,
4305             handle.id.clone(),
4306             handle.pci_segment,
4307             handle.dma_handler,
4308         )?;
4309 
4310         // Update the PCIU bitmap
4311         self.pci_segments[handle.pci_segment as usize].pci_devices_up |= 1 << bdf.device();
4312 
4313         Ok(PciDeviceInfo { id: handle.id, bdf })
4314     }
4315 
4316     fn is_iommu_segment(&self, pci_segment_id: u16) -> bool {
4317         self.config
4318             .lock()
4319             .as_ref()
4320             .unwrap()
4321             .platform
4322             .as_ref()
4323             .map(|pc| {
4324                 pc.iommu_segments
4325                     .as_ref()
4326                     .map(|v| v.contains(&pci_segment_id))
4327                     .unwrap_or_default()
4328             })
4329             .unwrap_or_default()
4330     }
4331 
4332     pub fn add_disk(&mut self, disk_cfg: &mut DiskConfig) -> DeviceManagerResult<PciDeviceInfo> {
4333         self.validate_identifier(&disk_cfg.id)?;
4334 
4335         if disk_cfg.iommu && !self.is_iommu_segment(disk_cfg.pci_segment) {
4336             return Err(DeviceManagerError::InvalidIommuHotplug);
4337         }
4338 
4339         let device = self.make_virtio_block_device(disk_cfg)?;
4340         self.hotplug_virtio_pci_device(device)
4341     }
4342 
4343     pub fn add_fs(&mut self, fs_cfg: &mut FsConfig) -> DeviceManagerResult<PciDeviceInfo> {
4344         self.validate_identifier(&fs_cfg.id)?;
4345 
4346         let device = self.make_virtio_fs_device(fs_cfg)?;
4347         self.hotplug_virtio_pci_device(device)
4348     }
4349 
4350     pub fn add_pmem(&mut self, pmem_cfg: &mut PmemConfig) -> DeviceManagerResult<PciDeviceInfo> {
4351         self.validate_identifier(&pmem_cfg.id)?;
4352 
4353         if pmem_cfg.iommu && !self.is_iommu_segment(pmem_cfg.pci_segment) {
4354             return Err(DeviceManagerError::InvalidIommuHotplug);
4355         }
4356 
4357         let device = self.make_virtio_pmem_device(pmem_cfg)?;
4358         self.hotplug_virtio_pci_device(device)
4359     }
4360 
4361     pub fn add_net(&mut self, net_cfg: &mut NetConfig) -> DeviceManagerResult<PciDeviceInfo> {
4362         self.validate_identifier(&net_cfg.id)?;
4363 
4364         if net_cfg.iommu && !self.is_iommu_segment(net_cfg.pci_segment) {
4365             return Err(DeviceManagerError::InvalidIommuHotplug);
4366         }
4367 
4368         let device = self.make_virtio_net_device(net_cfg)?;
4369         self.hotplug_virtio_pci_device(device)
4370     }
4371 
4372     pub fn add_vdpa(&mut self, vdpa_cfg: &mut VdpaConfig) -> DeviceManagerResult<PciDeviceInfo> {
4373         self.validate_identifier(&vdpa_cfg.id)?;
4374 
4375         if vdpa_cfg.iommu && !self.is_iommu_segment(vdpa_cfg.pci_segment) {
4376             return Err(DeviceManagerError::InvalidIommuHotplug);
4377         }
4378 
4379         let device = self.make_vdpa_device(vdpa_cfg)?;
4380         self.hotplug_virtio_pci_device(device)
4381     }
4382 
4383     pub fn add_vsock(&mut self, vsock_cfg: &mut VsockConfig) -> DeviceManagerResult<PciDeviceInfo> {
4384         self.validate_identifier(&vsock_cfg.id)?;
4385 
4386         if vsock_cfg.iommu && !self.is_iommu_segment(vsock_cfg.pci_segment) {
4387             return Err(DeviceManagerError::InvalidIommuHotplug);
4388         }
4389 
4390         let device = self.make_virtio_vsock_device(vsock_cfg)?;
4391         self.hotplug_virtio_pci_device(device)
4392     }
4393 
4394     pub fn counters(&self) -> HashMap<String, HashMap<&'static str, Wrapping<u64>>> {
4395         let mut counters = HashMap::new();
4396 
4397         for handle in &self.virtio_devices {
4398             let virtio_device = handle.virtio_device.lock().unwrap();
4399             if let Some(device_counters) = virtio_device.counters() {
4400                 counters.insert(handle.id.clone(), device_counters.clone());
4401             }
4402         }
4403 
4404         counters
4405     }
4406 
4407     pub fn resize_balloon(&mut self, size: u64) -> DeviceManagerResult<()> {
4408         if let Some(balloon) = &self.balloon {
4409             return balloon
4410                 .lock()
4411                 .unwrap()
4412                 .resize(size)
4413                 .map_err(DeviceManagerError::VirtioBalloonResize);
4414         }
4415 
4416         warn!("No balloon setup: Can't resize the balloon");
4417         Err(DeviceManagerError::MissingVirtioBalloon)
4418     }
4419 
4420     pub fn balloon_size(&self) -> u64 {
4421         if let Some(balloon) = &self.balloon {
4422             return balloon.lock().unwrap().get_actual();
4423         }
4424 
4425         0
4426     }
4427 
4428     pub fn device_tree(&self) -> Arc<Mutex<DeviceTree>> {
4429         self.device_tree.clone()
4430     }
4431 
4432     #[cfg(target_arch = "x86_64")]
4433     pub fn notify_power_button(&self) -> DeviceManagerResult<()> {
4434         self.ged_notification_device
4435             .as_ref()
4436             .unwrap()
4437             .lock()
4438             .unwrap()
4439             .notify(AcpiNotificationFlags::POWER_BUTTON_CHANGED)
4440             .map_err(DeviceManagerError::PowerButtonNotification)
4441     }
4442 
4443     #[cfg(target_arch = "aarch64")]
4444     pub fn notify_power_button(&self) -> DeviceManagerResult<()> {
4445         // There are two use cases:
4446         // 1. Users will use direct kernel boot with device tree.
4447         // 2. Users will use ACPI+UEFI boot.
4448 
4449         // Trigger a GPIO pin 3 event to satisfy use case 1.
4450         self.gpio_device
4451             .as_ref()
4452             .unwrap()
4453             .lock()
4454             .unwrap()
4455             .trigger_key(3)
4456             .map_err(DeviceManagerError::AArch64PowerButtonNotification)?;
4457         // Trigger a GED power button event to satisfy use case 2.
4458         return self
4459             .ged_notification_device
4460             .as_ref()
4461             .unwrap()
4462             .lock()
4463             .unwrap()
4464             .notify(AcpiNotificationFlags::POWER_BUTTON_CHANGED)
4465             .map_err(DeviceManagerError::PowerButtonNotification);
4466     }
4467 
4468     pub fn iommu_attached_devices(&self) -> &Option<(PciBdf, Vec<PciBdf>)> {
4469         &self.iommu_attached_devices
4470     }
4471 
4472     fn validate_identifier(&self, id: &Option<String>) -> DeviceManagerResult<()> {
4473         if let Some(id) = id {
4474             if id.starts_with("__") {
4475                 return Err(DeviceManagerError::InvalidIdentifier(id.clone()));
4476             }
4477 
4478             if self.device_tree.lock().unwrap().contains_key(id) {
4479                 return Err(DeviceManagerError::IdentifierNotUnique(id.clone()));
4480             }
4481         }
4482 
4483         Ok(())
4484     }
4485 
4486     pub(crate) fn acpi_platform_addresses(&self) -> &AcpiPlatformAddresses {
4487         &self.acpi_platform_addresses
4488     }
4489 }
4490 
4491 fn numa_node_id_from_memory_zone_id(numa_nodes: &NumaNodes, memory_zone_id: &str) -> Option<u32> {
4492     for (numa_node_id, numa_node) in numa_nodes.iter() {
4493         if numa_node.memory_zones.contains(&memory_zone_id.to_owned()) {
4494             return Some(*numa_node_id);
4495         }
4496     }
4497 
4498     None
4499 }
4500 
4501 fn numa_node_id_from_pci_segment_id(numa_nodes: &NumaNodes, pci_segment_id: u16) -> u32 {
4502     for (numa_node_id, numa_node) in numa_nodes.iter() {
4503         if numa_node.pci_segments.contains(&pci_segment_id) {
4504             return *numa_node_id;
4505         }
4506     }
4507 
4508     0
4509 }
4510 
4511 struct TpmDevice {}
4512 
4513 impl Aml for TpmDevice {
4514     fn to_aml_bytes(&self, sink: &mut dyn acpi_tables::AmlSink) {
4515         aml::Device::new(
4516             "TPM2".into(),
4517             vec![
4518                 &aml::Name::new("_HID".into(), &"MSFT0101"),
4519                 &aml::Name::new("_STA".into(), &(0xF_usize)),
4520                 &aml::Name::new(
4521                     "_CRS".into(),
4522                     &aml::ResourceTemplate::new(vec![&aml::Memory32Fixed::new(
4523                         true,
4524                         layout::TPM_START.0 as u32,
4525                         layout::TPM_SIZE as u32,
4526                     )]),
4527                 ),
4528             ],
4529         )
4530         .to_aml_bytes(sink)
4531     }
4532 }
4533 
4534 impl Aml for DeviceManager {
4535     fn to_aml_bytes(&self, sink: &mut dyn acpi_tables::AmlSink) {
4536         #[cfg(target_arch = "aarch64")]
4537         use arch::aarch64::DeviceInfoForFdt;
4538 
4539         let mut pci_scan_methods = Vec::new();
4540         for i in 0..self.pci_segments.len() {
4541             pci_scan_methods.push(aml::MethodCall::new(
4542                 format!("\\_SB_.PC{i:02X}.PCNT").as_str().into(),
4543                 vec![],
4544             ));
4545         }
4546         let mut pci_scan_inner: Vec<&dyn Aml> = Vec::new();
4547         for method in &pci_scan_methods {
4548             pci_scan_inner.push(method)
4549         }
4550 
4551         // PCI hotplug controller
4552         aml::Device::new(
4553             "_SB_.PHPR".into(),
4554             vec![
4555                 &aml::Name::new("_HID".into(), &aml::EISAName::new("PNP0A06")),
4556                 &aml::Name::new("_STA".into(), &0x0bu8),
4557                 &aml::Name::new("_UID".into(), &"PCI Hotplug Controller"),
4558                 &aml::Mutex::new("BLCK".into(), 0),
4559                 &aml::Name::new(
4560                     "_CRS".into(),
4561                     &aml::ResourceTemplate::new(vec![&aml::AddressSpace::new_memory(
4562                         aml::AddressSpaceCacheable::NotCacheable,
4563                         true,
4564                         self.acpi_address.0,
4565                         self.acpi_address.0 + DEVICE_MANAGER_ACPI_SIZE as u64 - 1,
4566                         None,
4567                     )]),
4568                 ),
4569                 // OpRegion and Fields map MMIO range into individual field values
4570                 &aml::OpRegion::new(
4571                     "PCST".into(),
4572                     aml::OpRegionSpace::SystemMemory,
4573                     &(self.acpi_address.0 as usize),
4574                     &DEVICE_MANAGER_ACPI_SIZE,
4575                 ),
4576                 &aml::Field::new(
4577                     "PCST".into(),
4578                     aml::FieldAccessType::DWord,
4579                     aml::FieldLockRule::NoLock,
4580                     aml::FieldUpdateRule::WriteAsZeroes,
4581                     vec![
4582                         aml::FieldEntry::Named(*b"PCIU", 32),
4583                         aml::FieldEntry::Named(*b"PCID", 32),
4584                         aml::FieldEntry::Named(*b"B0EJ", 32),
4585                         aml::FieldEntry::Named(*b"PSEG", 32),
4586                     ],
4587                 ),
4588                 &aml::Method::new(
4589                     "PCEJ".into(),
4590                     2,
4591                     true,
4592                     vec![
4593                         // Take lock defined above
4594                         &aml::Acquire::new("BLCK".into(), 0xffff),
4595                         // Choose the current segment
4596                         &aml::Store::new(&aml::Path::new("PSEG"), &aml::Arg(1)),
4597                         // Write PCI bus number (in first argument) to I/O port via field
4598                         &aml::ShiftLeft::new(&aml::Path::new("B0EJ"), &aml::ONE, &aml::Arg(0)),
4599                         // Release lock
4600                         &aml::Release::new("BLCK".into()),
4601                         // Return 0
4602                         &aml::Return::new(&aml::ZERO),
4603                     ],
4604                 ),
4605                 &aml::Method::new("PSCN".into(), 0, true, pci_scan_inner),
4606             ],
4607         )
4608         .to_aml_bytes(sink);
4609 
4610         for segment in &self.pci_segments {
4611             segment.to_aml_bytes(sink);
4612         }
4613 
4614         let mut mbrd_memory = Vec::new();
4615 
4616         for segment in &self.pci_segments {
4617             mbrd_memory.push(aml::Memory32Fixed::new(
4618                 true,
4619                 segment.mmio_config_address as u32,
4620                 layout::PCI_MMIO_CONFIG_SIZE_PER_SEGMENT as u32,
4621             ))
4622         }
4623 
4624         let mut mbrd_memory_refs = Vec::new();
4625         for mbrd_memory_ref in &mbrd_memory {
4626             mbrd_memory_refs.push(mbrd_memory_ref as &dyn Aml);
4627         }
4628 
4629         aml::Device::new(
4630             "_SB_.MBRD".into(),
4631             vec![
4632                 &aml::Name::new("_HID".into(), &aml::EISAName::new("PNP0C02")),
4633                 &aml::Name::new("_UID".into(), &aml::ZERO),
4634                 &aml::Name::new("_CRS".into(), &aml::ResourceTemplate::new(mbrd_memory_refs)),
4635             ],
4636         )
4637         .to_aml_bytes(sink);
4638 
4639         // Serial device
4640         #[cfg(target_arch = "x86_64")]
4641         let serial_irq = 4;
4642         #[cfg(target_arch = "aarch64")]
4643         let serial_irq =
4644             if self.config.lock().unwrap().serial.clone().mode != ConsoleOutputMode::Off {
4645                 self.get_device_info()
4646                     .clone()
4647                     .get(&(DeviceType::Serial, DeviceType::Serial.to_string()))
4648                     .unwrap()
4649                     .irq()
4650             } else {
4651                 // If serial is turned off, add a fake device with invalid irq.
4652                 31
4653             };
4654         if self.config.lock().unwrap().serial.mode != ConsoleOutputMode::Off {
4655             aml::Device::new(
4656                 "_SB_.COM1".into(),
4657                 vec![
4658                     &aml::Name::new(
4659                         "_HID".into(),
4660                         #[cfg(target_arch = "x86_64")]
4661                         &aml::EISAName::new("PNP0501"),
4662                         #[cfg(target_arch = "aarch64")]
4663                         &"ARMH0011",
4664                     ),
4665                     &aml::Name::new("_UID".into(), &aml::ZERO),
4666                     &aml::Name::new("_DDN".into(), &"COM1"),
4667                     &aml::Name::new(
4668                         "_CRS".into(),
4669                         &aml::ResourceTemplate::new(vec![
4670                             &aml::Interrupt::new(true, true, false, false, serial_irq),
4671                             #[cfg(target_arch = "x86_64")]
4672                             &aml::IO::new(0x3f8, 0x3f8, 0, 0x8),
4673                             #[cfg(target_arch = "aarch64")]
4674                             &aml::Memory32Fixed::new(
4675                                 true,
4676                                 arch::layout::LEGACY_SERIAL_MAPPED_IO_START.raw_value() as u32,
4677                                 MMIO_LEN as u32,
4678                             ),
4679                         ]),
4680                     ),
4681                 ],
4682             )
4683             .to_aml_bytes(sink);
4684         }
4685 
4686         aml::Name::new("_S5_".into(), &aml::Package::new(vec![&5u8])).to_aml_bytes(sink);
4687 
4688         aml::Device::new(
4689             "_SB_.PWRB".into(),
4690             vec![
4691                 &aml::Name::new("_HID".into(), &aml::EISAName::new("PNP0C0C")),
4692                 &aml::Name::new("_UID".into(), &aml::ZERO),
4693             ],
4694         )
4695         .to_aml_bytes(sink);
4696 
4697         if self.config.lock().unwrap().tpm.is_some() {
4698             // Add tpm device
4699             TpmDevice {}.to_aml_bytes(sink);
4700         }
4701 
4702         self.ged_notification_device
4703             .as_ref()
4704             .unwrap()
4705             .lock()
4706             .unwrap()
4707             .to_aml_bytes(sink)
4708     }
4709 }
4710 
4711 impl Pausable for DeviceManager {
4712     fn pause(&mut self) -> result::Result<(), MigratableError> {
4713         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
4714             if let Some(migratable) = &device_node.migratable {
4715                 migratable.lock().unwrap().pause()?;
4716             }
4717         }
4718         // On AArch64, the pause of device manager needs to trigger
4719         // a "pause" of GIC, which will flush the GIC pending tables
4720         // and ITS tables to guest RAM.
4721         #[cfg(target_arch = "aarch64")]
4722         {
4723             self.get_interrupt_controller()
4724                 .unwrap()
4725                 .lock()
4726                 .unwrap()
4727                 .pause()?;
4728         };
4729 
4730         Ok(())
4731     }
4732 
4733     fn resume(&mut self) -> result::Result<(), MigratableError> {
4734         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
4735             if let Some(migratable) = &device_node.migratable {
4736                 migratable.lock().unwrap().resume()?;
4737             }
4738         }
4739 
4740         Ok(())
4741     }
4742 }
4743 
4744 impl Snapshottable for DeviceManager {
4745     fn id(&self) -> String {
4746         DEVICE_MANAGER_SNAPSHOT_ID.to_string()
4747     }
4748 
4749     fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> {
4750         let mut snapshot = Snapshot::from_data(SnapshotData::new_from_state(&self.state())?);
4751 
4752         // We aggregate all devices snapshots.
4753         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
4754             if let Some(migratable) = &device_node.migratable {
4755                 let mut migratable = migratable.lock().unwrap();
4756                 snapshot.add_snapshot(migratable.id(), migratable.snapshot()?);
4757             }
4758         }
4759 
4760         Ok(snapshot)
4761     }
4762 }
4763 
4764 impl Transportable for DeviceManager {}
4765 
4766 impl Migratable for DeviceManager {
4767     fn start_dirty_log(&mut self) -> std::result::Result<(), MigratableError> {
4768         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
4769             if let Some(migratable) = &device_node.migratable {
4770                 migratable.lock().unwrap().start_dirty_log()?;
4771             }
4772         }
4773         Ok(())
4774     }
4775 
4776     fn stop_dirty_log(&mut self) -> std::result::Result<(), MigratableError> {
4777         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
4778             if let Some(migratable) = &device_node.migratable {
4779                 migratable.lock().unwrap().stop_dirty_log()?;
4780             }
4781         }
4782         Ok(())
4783     }
4784 
4785     fn dirty_log(&mut self) -> std::result::Result<MemoryRangeTable, MigratableError> {
4786         let mut tables = Vec::new();
4787         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
4788             if let Some(migratable) = &device_node.migratable {
4789                 tables.push(migratable.lock().unwrap().dirty_log()?);
4790             }
4791         }
4792         Ok(MemoryRangeTable::new_from_tables(tables))
4793     }
4794 
4795     fn start_migration(&mut self) -> std::result::Result<(), MigratableError> {
4796         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
4797             if let Some(migratable) = &device_node.migratable {
4798                 migratable.lock().unwrap().start_migration()?;
4799             }
4800         }
4801         Ok(())
4802     }
4803 
4804     fn complete_migration(&mut self) -> std::result::Result<(), MigratableError> {
4805         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
4806             if let Some(migratable) = &device_node.migratable {
4807                 migratable.lock().unwrap().complete_migration()?;
4808             }
4809         }
4810         Ok(())
4811     }
4812 }
4813 
4814 const PCIU_FIELD_OFFSET: u64 = 0;
4815 const PCID_FIELD_OFFSET: u64 = 4;
4816 const B0EJ_FIELD_OFFSET: u64 = 8;
4817 const PSEG_FIELD_OFFSET: u64 = 12;
4818 const PCIU_FIELD_SIZE: usize = 4;
4819 const PCID_FIELD_SIZE: usize = 4;
4820 const B0EJ_FIELD_SIZE: usize = 4;
4821 const PSEG_FIELD_SIZE: usize = 4;
4822 
4823 impl BusDevice for DeviceManager {
4824     fn read(&mut self, base: u64, offset: u64, data: &mut [u8]) {
4825         match offset {
4826             PCIU_FIELD_OFFSET => {
4827                 assert!(data.len() == PCIU_FIELD_SIZE);
4828                 data.copy_from_slice(
4829                     &self.pci_segments[self.selected_segment]
4830                         .pci_devices_up
4831                         .to_le_bytes(),
4832                 );
4833                 // Clear the PCIU bitmap
4834                 self.pci_segments[self.selected_segment].pci_devices_up = 0;
4835             }
4836             PCID_FIELD_OFFSET => {
4837                 assert!(data.len() == PCID_FIELD_SIZE);
4838                 data.copy_from_slice(
4839                     &self.pci_segments[self.selected_segment]
4840                         .pci_devices_down
4841                         .to_le_bytes(),
4842                 );
4843                 // Clear the PCID bitmap
4844                 self.pci_segments[self.selected_segment].pci_devices_down = 0;
4845             }
4846             B0EJ_FIELD_OFFSET => {
4847                 assert!(data.len() == B0EJ_FIELD_SIZE);
4848                 // Always return an empty bitmap since the eject is always
4849                 // taken care of right away during a write access.
4850                 data.fill(0);
4851             }
4852             PSEG_FIELD_OFFSET => {
4853                 assert_eq!(data.len(), PSEG_FIELD_SIZE);
4854                 data.copy_from_slice(&(self.selected_segment as u32).to_le_bytes());
4855             }
4856             _ => error!(
4857                 "Accessing unknown location at base 0x{:x}, offset 0x{:x}",
4858                 base, offset
4859             ),
4860         }
4861 
4862         debug!(
4863             "PCI_HP_REG_R: base 0x{:x}, offset 0x{:x}, data {:?}",
4864             base, offset, data
4865         )
4866     }
4867 
4868     fn write(&mut self, base: u64, offset: u64, data: &[u8]) -> Option<Arc<std::sync::Barrier>> {
4869         match offset {
4870             B0EJ_FIELD_OFFSET => {
4871                 assert!(data.len() == B0EJ_FIELD_SIZE);
4872                 let mut data_array: [u8; 4] = [0, 0, 0, 0];
4873                 data_array.copy_from_slice(data);
4874                 let mut slot_bitmap = u32::from_le_bytes(data_array);
4875 
4876                 while slot_bitmap > 0 {
4877                     let slot_id = slot_bitmap.trailing_zeros();
4878                     if let Err(e) = self.eject_device(self.selected_segment as u16, slot_id as u8) {
4879                         error!("Failed ejecting device {}: {:?}", slot_id, e);
4880                     }
4881                     slot_bitmap &= !(1 << slot_id);
4882                 }
4883             }
4884             PSEG_FIELD_OFFSET => {
4885                 assert_eq!(data.len(), PSEG_FIELD_SIZE);
4886                 let mut data_array: [u8; 4] = [0, 0, 0, 0];
4887                 data_array.copy_from_slice(data);
4888                 let selected_segment = u32::from_le_bytes(data_array) as usize;
4889                 if selected_segment >= self.pci_segments.len() {
4890                     error!(
4891                         "Segment selection out of range: {} >= {}",
4892                         selected_segment,
4893                         self.pci_segments.len()
4894                     );
4895                     return None;
4896                 }
4897                 self.selected_segment = selected_segment;
4898             }
4899             _ => error!(
4900                 "Accessing unknown location at base 0x{:x}, offset 0x{:x}",
4901                 base, offset
4902             ),
4903         }
4904 
4905         debug!(
4906             "PCI_HP_REG_W: base 0x{:x}, offset 0x{:x}, data {:?}",
4907             base, offset, data
4908         );
4909 
4910         None
4911     }
4912 }
4913 
4914 impl Drop for DeviceManager {
4915     fn drop(&mut self) {
4916         // Wake up the DeviceManager threads (mainly virtio device workers),
4917         // to avoid deadlock on waiting for paused/parked worker threads.
4918         if let Err(e) = self.resume() {
4919             error!("Error resuming DeviceManager: {:?}", e);
4920         }
4921 
4922         for handle in self.virtio_devices.drain(..) {
4923             handle.virtio_device.lock().unwrap().shutdown();
4924         }
4925 
4926         if let Some(termios) = *self.original_termios_opt.lock().unwrap() {
4927             // SAFETY: FFI call
4928             let _ = unsafe { tcsetattr(stdout().lock().as_raw_fd(), TCSANOW, &termios) };
4929         }
4930     }
4931 }
4932 
4933 #[cfg(test)]
4934 mod tests {
4935     use super::*;
4936 
4937     #[test]
4938     fn test_create_mmio_allocators() {
4939         let res = create_mmio_allocators(0x100000, 0x400000, 1, vec![1], 4 << 10);
4940         assert_eq!(res.len(), 1);
4941         assert_eq!(
4942             res[0].lock().unwrap().base(),
4943             vm_memory::GuestAddress(0x100000)
4944         );
4945         assert_eq!(
4946             res[0].lock().unwrap().end(),
4947             vm_memory::GuestAddress(0x3fffff)
4948         );
4949 
4950         let res = create_mmio_allocators(0x100000, 0x400000, 2, vec![1, 1], 4 << 10);
4951         assert_eq!(res.len(), 2);
4952         assert_eq!(
4953             res[0].lock().unwrap().base(),
4954             vm_memory::GuestAddress(0x100000)
4955         );
4956         assert_eq!(
4957             res[0].lock().unwrap().end(),
4958             vm_memory::GuestAddress(0x27ffff)
4959         );
4960         assert_eq!(
4961             res[1].lock().unwrap().base(),
4962             vm_memory::GuestAddress(0x280000)
4963         );
4964         assert_eq!(
4965             res[1].lock().unwrap().end(),
4966             vm_memory::GuestAddress(0x3fffff)
4967         );
4968 
4969         let res = create_mmio_allocators(0x100000, 0x400000, 2, vec![2, 1], 4 << 10);
4970         assert_eq!(res.len(), 2);
4971         assert_eq!(
4972             res[0].lock().unwrap().base(),
4973             vm_memory::GuestAddress(0x100000)
4974         );
4975         assert_eq!(
4976             res[0].lock().unwrap().end(),
4977             vm_memory::GuestAddress(0x2fffff)
4978         );
4979         assert_eq!(
4980             res[1].lock().unwrap().base(),
4981             vm_memory::GuestAddress(0x300000)
4982         );
4983         assert_eq!(
4984             res[1].lock().unwrap().end(),
4985             vm_memory::GuestAddress(0x3fffff)
4986         );
4987     }
4988 }
4989