xref: /cloud-hypervisor/vmm/src/device_manager.rs (revision 8803e4a2e7f8e9596b72f81d3c916390e5b10fbd)
1 // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
2 //
3 // Portions Copyright 2017 The Chromium OS Authors. All rights reserved.
4 // Use of this source code is governed by a BSD-style license that can be
5 // found in the LICENSE-BSD-3-Clause file.
6 //
7 // Copyright © 2019 Intel Corporation
8 //
9 // SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause
10 //
11 
12 use crate::config::{
13     ConsoleOutputMode, DeviceConfig, DiskConfig, FsConfig, NetConfig, PmemConfig, UserDeviceConfig,
14     VdpaConfig, VhostMode, VmConfig, VsockConfig,
15 };
16 use crate::console_devices::{ConsoleDeviceError, ConsoleInfo};
17 use crate::cpu::{CpuManager, CPU_MANAGER_ACPI_SIZE};
18 use crate::device_tree::{DeviceNode, DeviceTree};
19 use crate::interrupt::LegacyUserspaceInterruptManager;
20 use crate::interrupt::MsiInterruptManager;
21 use crate::memory_manager::{Error as MemoryManagerError, MemoryManager, MEMORY_MANAGER_ACPI_SIZE};
22 use crate::pci_segment::PciSegment;
23 use crate::serial_manager::{Error as SerialManagerError, SerialManager};
24 use crate::vm_config::DEFAULT_PCI_SEGMENT_APERTURE_WEIGHT;
25 use crate::GuestRegionMmap;
26 use crate::PciDeviceInfo;
27 use crate::{device_node, DEVICE_MANAGER_SNAPSHOT_ID};
28 use acpi_tables::sdt::GenericAddress;
29 use acpi_tables::{aml, Aml};
30 use anyhow::anyhow;
31 use arch::layout;
32 #[cfg(target_arch = "x86_64")]
33 use arch::layout::{APIC_START, IOAPIC_SIZE, IOAPIC_START};
34 use arch::NumaNodes;
35 #[cfg(target_arch = "aarch64")]
36 use arch::{DeviceType, MmioDeviceInfo};
37 use block::{
38     async_io::DiskFile, block_aio_is_supported, block_io_uring_is_supported, detect_image_type,
39     fixed_vhd_sync::FixedVhdDiskSync, qcow, qcow_sync::QcowDiskSync, raw_async_aio::RawFileDiskAio,
40     raw_sync::RawFileDiskSync, vhdx, vhdx_sync::VhdxDiskSync, ImageType,
41 };
42 #[cfg(feature = "io_uring")]
43 use block::{fixed_vhd_async::FixedVhdDiskAsync, raw_async::RawFileDisk};
44 #[cfg(target_arch = "x86_64")]
45 use devices::debug_console::DebugConsole;
46 #[cfg(target_arch = "aarch64")]
47 use devices::gic;
48 #[cfg(target_arch = "x86_64")]
49 use devices::ioapic;
50 #[cfg(target_arch = "aarch64")]
51 use devices::legacy::Pl011;
52 use devices::{
53     interrupt_controller, interrupt_controller::InterruptController, AcpiNotificationFlags,
54 };
55 use hypervisor::IoEventAddress;
56 use libc::{
57     tcsetattr, termios, MAP_NORESERVE, MAP_PRIVATE, MAP_SHARED, O_TMPFILE, PROT_READ, PROT_WRITE,
58     TCSANOW,
59 };
60 use pci::{
61     DeviceRelocation, MmioRegion, PciBarRegionType, PciBdf, PciDevice, VfioDmaMapping,
62     VfioPciDevice, VfioUserDmaMapping, VfioUserPciDevice, VfioUserPciDeviceError,
63 };
64 use rate_limiter::group::RateLimiterGroup;
65 use seccompiler::SeccompAction;
66 use serde::{Deserialize, Serialize};
67 use std::collections::{BTreeMap, BTreeSet, HashMap};
68 use std::fs::{File, OpenOptions};
69 use std::io::{self, stdout, Seek, SeekFrom};
70 use std::num::Wrapping;
71 use std::os::fd::RawFd;
72 use std::os::unix::fs::OpenOptionsExt;
73 use std::os::unix::io::{AsRawFd, FromRawFd};
74 use std::path::PathBuf;
75 use std::result;
76 use std::sync::{Arc, Mutex};
77 use std::time::Instant;
78 use tracer::trace_scoped;
79 use vfio_ioctls::{VfioContainer, VfioDevice, VfioDeviceFd};
80 use virtio_devices::transport::VirtioTransport;
81 use virtio_devices::transport::{VirtioPciDevice, VirtioPciDeviceActivator};
82 use virtio_devices::vhost_user::VhostUserConfig;
83 use virtio_devices::{
84     AccessPlatformMapping, ActivateError, VdpaDmaMapping, VirtioMemMappingSource,
85 };
86 use virtio_devices::{Endpoint, IommuMapping};
87 use vm_allocator::{AddressAllocator, SystemAllocator};
88 use vm_device::dma_mapping::ExternalDmaMapping;
89 use vm_device::interrupt::{
90     InterruptIndex, InterruptManager, LegacyIrqGroupConfig, MsiIrqGroupConfig,
91 };
92 use vm_device::{Bus, BusDevice, Resource};
93 use vm_memory::guest_memory::FileOffset;
94 use vm_memory::GuestMemoryRegion;
95 use vm_memory::{Address, GuestAddress, GuestUsize, MmapRegion};
96 #[cfg(target_arch = "x86_64")]
97 use vm_memory::{GuestAddressSpace, GuestMemory};
98 use vm_migration::{
99     protocol::MemoryRangeTable, snapshot_from_id, state_from_id, Migratable, MigratableError,
100     Pausable, Snapshot, SnapshotData, Snapshottable, Transportable,
101 };
102 use vm_virtio::AccessPlatform;
103 use vm_virtio::VirtioDeviceType;
104 use vmm_sys_util::eventfd::EventFd;
105 #[cfg(target_arch = "x86_64")]
106 use {devices::debug_console, devices::legacy::Serial};
107 
108 #[cfg(target_arch = "aarch64")]
109 const MMIO_LEN: u64 = 0x1000;
110 
111 // Singleton devices / devices the user cannot name
112 #[cfg(target_arch = "x86_64")]
113 const IOAPIC_DEVICE_NAME: &str = "__ioapic";
114 const SERIAL_DEVICE_NAME: &str = "__serial";
115 #[cfg(target_arch = "x86_64")]
116 const DEBUGCON_DEVICE_NAME: &str = "__debug_console";
117 #[cfg(target_arch = "aarch64")]
118 const GPIO_DEVICE_NAME: &str = "__gpio";
119 const RNG_DEVICE_NAME: &str = "__rng";
120 const IOMMU_DEVICE_NAME: &str = "__iommu";
121 const BALLOON_DEVICE_NAME: &str = "__balloon";
122 const CONSOLE_DEVICE_NAME: &str = "__console";
123 const PVPANIC_DEVICE_NAME: &str = "__pvpanic";
124 
125 // Devices that the user may name and for which we generate
126 // identifiers if the user doesn't give one
127 const DISK_DEVICE_NAME_PREFIX: &str = "_disk";
128 const FS_DEVICE_NAME_PREFIX: &str = "_fs";
129 const NET_DEVICE_NAME_PREFIX: &str = "_net";
130 const PMEM_DEVICE_NAME_PREFIX: &str = "_pmem";
131 const VDPA_DEVICE_NAME_PREFIX: &str = "_vdpa";
132 const VSOCK_DEVICE_NAME_PREFIX: &str = "_vsock";
133 const WATCHDOG_DEVICE_NAME: &str = "__watchdog";
134 const VFIO_DEVICE_NAME_PREFIX: &str = "_vfio";
135 const VFIO_USER_DEVICE_NAME_PREFIX: &str = "_vfio_user";
136 const VIRTIO_PCI_DEVICE_NAME_PREFIX: &str = "_virtio-pci";
137 
138 /// Errors associated with device manager
139 #[derive(Debug)]
140 pub enum DeviceManagerError {
141     /// Cannot create EventFd.
142     EventFd(io::Error),
143 
144     /// Cannot open disk path
145     Disk(io::Error),
146 
147     /// Cannot create vhost-user-net device
148     CreateVhostUserNet(virtio_devices::vhost_user::Error),
149 
150     /// Cannot create virtio-blk device
151     CreateVirtioBlock(io::Error),
152 
153     /// Cannot create virtio-net device
154     CreateVirtioNet(virtio_devices::net::Error),
155 
156     /// Cannot create virtio-console device
157     CreateVirtioConsole(io::Error),
158 
159     /// Cannot create virtio-rng device
160     CreateVirtioRng(io::Error),
161 
162     /// Cannot create virtio-fs device
163     CreateVirtioFs(virtio_devices::vhost_user::Error),
164 
165     /// Virtio-fs device was created without a socket.
166     NoVirtioFsSock,
167 
168     /// Cannot create vhost-user-blk device
169     CreateVhostUserBlk(virtio_devices::vhost_user::Error),
170 
171     /// Cannot create virtio-pmem device
172     CreateVirtioPmem(io::Error),
173 
174     /// Cannot create vDPA device
175     CreateVdpa(virtio_devices::vdpa::Error),
176 
177     /// Cannot create virtio-vsock device
178     CreateVirtioVsock(io::Error),
179 
180     /// Cannot create tpm device
181     CreateTpmDevice(anyhow::Error),
182 
183     /// Failed to convert Path to &str for the vDPA device.
184     CreateVdpaConvertPath,
185 
186     /// Failed to convert Path to &str for the virtio-vsock device.
187     CreateVsockConvertPath,
188 
189     /// Cannot create virtio-vsock backend
190     CreateVsockBackend(virtio_devices::vsock::VsockUnixError),
191 
192     /// Cannot create virtio-iommu device
193     CreateVirtioIommu(io::Error),
194 
195     /// Cannot create virtio-balloon device
196     CreateVirtioBalloon(io::Error),
197 
198     /// Cannot create virtio-watchdog device
199     CreateVirtioWatchdog(io::Error),
200 
201     /// Failed to parse disk image format
202     DetectImageType(io::Error),
203 
204     /// Cannot open qcow disk path
205     QcowDeviceCreate(qcow::Error),
206 
207     /// Cannot create serial manager
208     CreateSerialManager(SerialManagerError),
209 
210     /// Cannot spawn the serial manager thread
211     SpawnSerialManager(SerialManagerError),
212 
213     /// Cannot open tap interface
214     OpenTap(net_util::TapError),
215 
216     /// Cannot allocate IRQ.
217     AllocateIrq,
218 
219     /// Cannot configure the IRQ.
220     Irq(vmm_sys_util::errno::Error),
221 
222     /// Cannot allocate PCI BARs
223     AllocateBars(pci::PciDeviceError),
224 
225     /// Could not free the BARs associated with a PCI device.
226     FreePciBars(pci::PciDeviceError),
227 
228     /// Cannot register ioevent.
229     RegisterIoevent(anyhow::Error),
230 
231     /// Cannot unregister ioevent.
232     UnRegisterIoevent(anyhow::Error),
233 
234     /// Cannot create virtio device
235     VirtioDevice(virtio_devices::transport::VirtioPciDeviceError),
236 
237     /// Cannot add PCI device
238     AddPciDevice(pci::PciRootError),
239 
240     /// Cannot open persistent memory file
241     PmemFileOpen(io::Error),
242 
243     /// Cannot set persistent memory file size
244     PmemFileSetLen(io::Error),
245 
246     /// Cannot find a memory range for persistent memory
247     PmemRangeAllocation,
248 
249     /// Cannot find a memory range for virtio-fs
250     FsRangeAllocation,
251 
252     /// Error creating serial output file
253     SerialOutputFileOpen(io::Error),
254 
255     #[cfg(target_arch = "x86_64")]
256     /// Error creating debug-console output file
257     DebugconOutputFileOpen(io::Error),
258 
259     /// Error creating console output file
260     ConsoleOutputFileOpen(io::Error),
261 
262     /// Error creating serial pty
263     SerialPtyOpen(io::Error),
264 
265     /// Error creating console pty
266     ConsolePtyOpen(io::Error),
267 
268     /// Error creating console pty
269     DebugconPtyOpen(io::Error),
270 
271     /// Error setting pty raw mode
272     SetPtyRaw(ConsoleDeviceError),
273 
274     /// Error getting pty peer
275     GetPtyPeer(vmm_sys_util::errno::Error),
276 
277     /// Cannot create a VFIO device
278     VfioCreate(vfio_ioctls::VfioError),
279 
280     /// Cannot create a VFIO PCI device
281     VfioPciCreate(pci::VfioPciError),
282 
283     /// Failed to map VFIO MMIO region.
284     VfioMapRegion(pci::VfioPciError),
285 
286     /// Failed to DMA map VFIO device.
287     VfioDmaMap(vfio_ioctls::VfioError),
288 
289     /// Failed to DMA unmap VFIO device.
290     VfioDmaUnmap(pci::VfioPciError),
291 
292     /// Failed to create the passthrough device.
293     CreatePassthroughDevice(anyhow::Error),
294 
295     /// Failed to memory map.
296     Mmap(io::Error),
297 
298     /// Cannot add legacy device to Bus.
299     BusError(vm_device::BusError),
300 
301     /// Failed to allocate IO port
302     AllocateIoPort,
303 
304     /// Failed to allocate MMIO address
305     AllocateMmioAddress,
306 
307     /// Failed to make hotplug notification
308     HotPlugNotification(io::Error),
309 
310     /// Error from a memory manager operation
311     MemoryManager(MemoryManagerError),
312 
313     /// Failed to create new interrupt source group.
314     CreateInterruptGroup(io::Error),
315 
316     /// Failed to update interrupt source group.
317     UpdateInterruptGroup(io::Error),
318 
319     /// Failed to create interrupt controller.
320     CreateInterruptController(interrupt_controller::Error),
321 
322     /// Failed to create a new MmapRegion instance.
323     NewMmapRegion(vm_memory::mmap::MmapRegionError),
324 
325     /// Failed to clone a File.
326     CloneFile(io::Error),
327 
328     /// Failed to create socket file
329     CreateSocketFile(io::Error),
330 
331     /// Failed to spawn the network backend
332     SpawnNetBackend(io::Error),
333 
334     /// Failed to spawn the block backend
335     SpawnBlockBackend(io::Error),
336 
337     /// Missing PCI bus.
338     NoPciBus,
339 
340     /// Could not find an available device name.
341     NoAvailableDeviceName,
342 
343     /// Missing PCI device.
344     MissingPciDevice,
345 
346     /// Failed to remove a PCI device from the PCI bus.
347     RemoveDeviceFromPciBus(pci::PciRootError),
348 
349     /// Failed to remove a bus device from the IO bus.
350     RemoveDeviceFromIoBus(vm_device::BusError),
351 
352     /// Failed to remove a bus device from the MMIO bus.
353     RemoveDeviceFromMmioBus(vm_device::BusError),
354 
355     /// Failed to find the device corresponding to a specific PCI b/d/f.
356     UnknownPciBdf(u32),
357 
358     /// Not allowed to remove this type of device from the VM.
359     RemovalNotAllowed(vm_virtio::VirtioDeviceType),
360 
361     /// Failed to find device corresponding to the given identifier.
362     UnknownDeviceId(String),
363 
364     /// Failed to find an available PCI device ID.
365     NextPciDeviceId(pci::PciRootError),
366 
367     /// Could not reserve the PCI device ID.
368     GetPciDeviceId(pci::PciRootError),
369 
370     /// Could not give the PCI device ID back.
371     PutPciDeviceId(pci::PciRootError),
372 
373     /// No disk path was specified when one was expected
374     NoDiskPath,
375 
376     /// Failed to update guest memory for virtio device.
377     UpdateMemoryForVirtioDevice(virtio_devices::Error),
378 
379     /// Cannot create virtio-mem device
380     CreateVirtioMem(io::Error),
381 
382     /// Cannot find a memory range for virtio-mem memory
383     VirtioMemRangeAllocation,
384 
385     /// Failed to update guest memory for VFIO PCI device.
386     UpdateMemoryForVfioPciDevice(vfio_ioctls::VfioError),
387 
388     /// Trying to use a directory for pmem but no size specified
389     PmemWithDirectorySizeMissing,
390 
391     /// Trying to use a size that is not multiple of 2MiB
392     PmemSizeNotAligned,
393 
394     /// Could not find the node in the device tree.
395     MissingNode,
396 
397     /// Resource was already found.
398     ResourceAlreadyExists,
399 
400     /// Expected resources for virtio-pmem could not be found.
401     MissingVirtioPmemResources,
402 
403     /// Missing PCI b/d/f from the DeviceNode.
404     MissingDeviceNodePciBdf,
405 
406     /// No support for device passthrough
407     NoDevicePassthroughSupport,
408 
409     /// No socket option support for console device
410     NoSocketOptionSupportForConsoleDevice,
411 
412     /// Failed to resize virtio-balloon
413     VirtioBalloonResize(virtio_devices::balloon::Error),
414 
415     /// Missing virtio-balloon, can't proceed as expected.
416     MissingVirtioBalloon,
417 
418     /// Missing virtual IOMMU device
419     MissingVirtualIommu,
420 
421     /// Failed to do power button notification
422     PowerButtonNotification(io::Error),
423 
424     /// Failed to do AArch64 GPIO power button notification
425     #[cfg(target_arch = "aarch64")]
426     AArch64PowerButtonNotification(devices::legacy::GpioDeviceError),
427 
428     /// Failed to set O_DIRECT flag to file descriptor
429     SetDirectIo,
430 
431     /// Failed to create FixedVhdDiskAsync
432     CreateFixedVhdDiskAsync(io::Error),
433 
434     /// Failed to create FixedVhdDiskSync
435     CreateFixedVhdDiskSync(io::Error),
436 
437     /// Failed to create QcowDiskSync
438     CreateQcowDiskSync(qcow::Error),
439 
440     /// Failed to create FixedVhdxDiskSync
441     CreateFixedVhdxDiskSync(vhdx::VhdxError),
442 
443     /// Failed to add DMA mapping handler to virtio-mem device.
444     AddDmaMappingHandlerVirtioMem(virtio_devices::mem::Error),
445 
446     /// Failed to remove DMA mapping handler from virtio-mem device.
447     RemoveDmaMappingHandlerVirtioMem(virtio_devices::mem::Error),
448 
449     /// Failed to create vfio-user client
450     VfioUserCreateClient(vfio_user::Error),
451 
452     /// Failed to create VFIO user device
453     VfioUserCreate(VfioUserPciDeviceError),
454 
455     /// Failed to map region from VFIO user device into guest
456     VfioUserMapRegion(VfioUserPciDeviceError),
457 
458     /// Failed to DMA map VFIO user device.
459     VfioUserDmaMap(VfioUserPciDeviceError),
460 
461     /// Failed to DMA unmap VFIO user device.
462     VfioUserDmaUnmap(VfioUserPciDeviceError),
463 
464     /// Failed to update memory mappings for VFIO user device
465     UpdateMemoryForVfioUserPciDevice(VfioUserPciDeviceError),
466 
467     /// Cannot duplicate file descriptor
468     DupFd(vmm_sys_util::errno::Error),
469 
470     /// Failed to DMA map virtio device.
471     VirtioDmaMap(std::io::Error),
472 
473     /// Failed to DMA unmap virtio device.
474     VirtioDmaUnmap(std::io::Error),
475 
476     /// Cannot hotplug device behind vIOMMU
477     InvalidIommuHotplug,
478 
479     /// Invalid identifier as it is not unique.
480     IdentifierNotUnique(String),
481 
482     /// Invalid identifier
483     InvalidIdentifier(String),
484 
485     /// Error activating virtio device
486     VirtioActivate(ActivateError),
487 
488     /// Failed retrieving device state from snapshot
489     RestoreGetState(MigratableError),
490 
491     /// Cannot create a PvPanic device
492     PvPanicCreate(devices::pvpanic::PvPanicError),
493 
494     /// Cannot create a RateLimiterGroup
495     RateLimiterGroupCreate(rate_limiter::group::Error),
496 
497     /// Cannot start sigwinch listener
498     StartSigwinchListener(std::io::Error),
499 
500     // Invalid console info
501     InvalidConsoleInfo,
502 
503     // Invalid console fd
504     InvalidConsoleFd,
505 }
506 
507 pub type DeviceManagerResult<T> = result::Result<T, DeviceManagerError>;
508 
509 const DEVICE_MANAGER_ACPI_SIZE: usize = 0x10;
510 
511 #[derive(Default)]
512 pub struct Console {
513     console_resizer: Option<Arc<virtio_devices::ConsoleResizer>>,
514 }
515 
516 impl Console {
517     pub fn need_resize(&self) -> bool {
518         if let Some(_resizer) = self.console_resizer.as_ref() {
519             return true;
520         }
521 
522         false
523     }
524 
525     pub fn update_console_size(&self) {
526         if let Some(resizer) = self.console_resizer.as_ref() {
527             resizer.update_console_size()
528         }
529     }
530 }
531 
532 pub(crate) struct AddressManager {
533     pub(crate) allocator: Arc<Mutex<SystemAllocator>>,
534     #[cfg(target_arch = "x86_64")]
535     pub(crate) io_bus: Arc<Bus>,
536     pub(crate) mmio_bus: Arc<Bus>,
537     pub(crate) vm: Arc<dyn hypervisor::Vm>,
538     device_tree: Arc<Mutex<DeviceTree>>,
539     pci_mmio32_allocators: Vec<Arc<Mutex<AddressAllocator>>>,
540     pci_mmio64_allocators: Vec<Arc<Mutex<AddressAllocator>>>,
541 }
542 
543 impl DeviceRelocation for AddressManager {
544     fn move_bar(
545         &self,
546         old_base: u64,
547         new_base: u64,
548         len: u64,
549         pci_dev: &mut dyn PciDevice,
550         region_type: PciBarRegionType,
551     ) -> std::result::Result<(), std::io::Error> {
552         match region_type {
553             PciBarRegionType::IoRegion => {
554                 #[cfg(target_arch = "x86_64")]
555                 {
556                     // Update system allocator
557                     self.allocator
558                         .lock()
559                         .unwrap()
560                         .free_io_addresses(GuestAddress(old_base), len as GuestUsize);
561 
562                     self.allocator
563                         .lock()
564                         .unwrap()
565                         .allocate_io_addresses(
566                             Some(GuestAddress(new_base)),
567                             len as GuestUsize,
568                             None,
569                         )
570                         .ok_or_else(|| {
571                             io::Error::new(io::ErrorKind::Other, "failed allocating new IO range")
572                         })?;
573 
574                     // Update PIO bus
575                     self.io_bus
576                         .update_range(old_base, len, new_base, len)
577                         .map_err(|e| io::Error::new(io::ErrorKind::Other, e))?;
578                 }
579                 #[cfg(target_arch = "aarch64")]
580                 error!("I/O region is not supported");
581             }
582             PciBarRegionType::Memory32BitRegion | PciBarRegionType::Memory64BitRegion => {
583                 let allocators = if region_type == PciBarRegionType::Memory32BitRegion {
584                     &self.pci_mmio32_allocators
585                 } else {
586                     &self.pci_mmio64_allocators
587                 };
588 
589                 // Find the specific allocator that this BAR was allocated from and use it for new one
590                 for allocator in allocators {
591                     let allocator_base = allocator.lock().unwrap().base();
592                     let allocator_end = allocator.lock().unwrap().end();
593 
594                     if old_base >= allocator_base.0 && old_base <= allocator_end.0 {
595                         allocator
596                             .lock()
597                             .unwrap()
598                             .free(GuestAddress(old_base), len as GuestUsize);
599 
600                         allocator
601                             .lock()
602                             .unwrap()
603                             .allocate(Some(GuestAddress(new_base)), len as GuestUsize, Some(len))
604                             .ok_or_else(|| {
605                                 io::Error::new(
606                                     io::ErrorKind::Other,
607                                     "failed allocating new MMIO range",
608                                 )
609                             })?;
610 
611                         break;
612                     }
613                 }
614 
615                 // Update MMIO bus
616                 self.mmio_bus
617                     .update_range(old_base, len, new_base, len)
618                     .map_err(|e| io::Error::new(io::ErrorKind::Other, e))?;
619             }
620         }
621 
622         // Update the device_tree resources associated with the device
623         if let Some(id) = pci_dev.id() {
624             if let Some(node) = self.device_tree.lock().unwrap().get_mut(&id) {
625                 let mut resource_updated = false;
626                 for resource in node.resources.iter_mut() {
627                     if let Resource::PciBar { base, type_, .. } = resource {
628                         if PciBarRegionType::from(*type_) == region_type && *base == old_base {
629                             *base = new_base;
630                             resource_updated = true;
631                             break;
632                         }
633                     }
634                 }
635 
636                 if !resource_updated {
637                     return Err(io::Error::new(
638                         io::ErrorKind::Other,
639                         format!(
640                             "Couldn't find a resource with base 0x{old_base:x} for device {id}"
641                         ),
642                     ));
643                 }
644             } else {
645                 return Err(io::Error::new(
646                     io::ErrorKind::Other,
647                     format!("Couldn't find device {id} from device tree"),
648                 ));
649             }
650         }
651 
652         let any_dev = pci_dev.as_any();
653         if let Some(virtio_pci_dev) = any_dev.downcast_ref::<VirtioPciDevice>() {
654             let bar_addr = virtio_pci_dev.config_bar_addr();
655             if bar_addr == new_base {
656                 for (event, addr) in virtio_pci_dev.ioeventfds(old_base) {
657                     let io_addr = IoEventAddress::Mmio(addr);
658                     self.vm.unregister_ioevent(event, &io_addr).map_err(|e| {
659                         io::Error::new(
660                             io::ErrorKind::Other,
661                             format!("failed to unregister ioevent: {e:?}"),
662                         )
663                     })?;
664                 }
665                 for (event, addr) in virtio_pci_dev.ioeventfds(new_base) {
666                     let io_addr = IoEventAddress::Mmio(addr);
667                     self.vm
668                         .register_ioevent(event, &io_addr, None)
669                         .map_err(|e| {
670                             io::Error::new(
671                                 io::ErrorKind::Other,
672                                 format!("failed to register ioevent: {e:?}"),
673                             )
674                         })?;
675                 }
676             } else {
677                 let virtio_dev = virtio_pci_dev.virtio_device();
678                 let mut virtio_dev = virtio_dev.lock().unwrap();
679                 if let Some(mut shm_regions) = virtio_dev.get_shm_regions() {
680                     if shm_regions.addr.raw_value() == old_base {
681                         let mem_region = self.vm.make_user_memory_region(
682                             shm_regions.mem_slot,
683                             old_base,
684                             shm_regions.len,
685                             shm_regions.host_addr,
686                             false,
687                             false,
688                         );
689 
690                         self.vm.remove_user_memory_region(mem_region).map_err(|e| {
691                             io::Error::new(
692                                 io::ErrorKind::Other,
693                                 format!("failed to remove user memory region: {e:?}"),
694                             )
695                         })?;
696 
697                         // Create new mapping by inserting new region to KVM.
698                         let mem_region = self.vm.make_user_memory_region(
699                             shm_regions.mem_slot,
700                             new_base,
701                             shm_regions.len,
702                             shm_regions.host_addr,
703                             false,
704                             false,
705                         );
706 
707                         self.vm.create_user_memory_region(mem_region).map_err(|e| {
708                             io::Error::new(
709                                 io::ErrorKind::Other,
710                                 format!("failed to create user memory regions: {e:?}"),
711                             )
712                         })?;
713 
714                         // Update shared memory regions to reflect the new mapping.
715                         shm_regions.addr = GuestAddress(new_base);
716                         virtio_dev.set_shm_regions(shm_regions).map_err(|e| {
717                             io::Error::new(
718                                 io::ErrorKind::Other,
719                                 format!("failed to update shared memory regions: {e:?}"),
720                             )
721                         })?;
722                     }
723                 }
724             }
725         }
726 
727         pci_dev.move_bar(old_base, new_base)
728     }
729 }
730 
731 #[derive(Serialize, Deserialize)]
732 struct DeviceManagerState {
733     device_tree: DeviceTree,
734     device_id_cnt: Wrapping<usize>,
735 }
736 
737 #[derive(Debug)]
738 pub struct PtyPair {
739     pub main: File,
740     pub path: PathBuf,
741 }
742 
743 impl Clone for PtyPair {
744     fn clone(&self) -> Self {
745         PtyPair {
746             main: self.main.try_clone().unwrap(),
747             path: self.path.clone(),
748         }
749     }
750 }
751 
752 #[derive(Clone)]
753 pub enum PciDeviceHandle {
754     Vfio(Arc<Mutex<VfioPciDevice>>),
755     Virtio(Arc<Mutex<VirtioPciDevice>>),
756     VfioUser(Arc<Mutex<VfioUserPciDevice>>),
757 }
758 
759 #[derive(Clone)]
760 struct MetaVirtioDevice {
761     virtio_device: Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
762     iommu: bool,
763     id: String,
764     pci_segment: u16,
765     dma_handler: Option<Arc<dyn ExternalDmaMapping>>,
766 }
767 
768 #[derive(Default)]
769 pub struct AcpiPlatformAddresses {
770     pub pm_timer_address: Option<GenericAddress>,
771     pub reset_reg_address: Option<GenericAddress>,
772     pub sleep_control_reg_address: Option<GenericAddress>,
773     pub sleep_status_reg_address: Option<GenericAddress>,
774 }
775 
776 pub struct DeviceManager {
777     // Manage address space related to devices
778     address_manager: Arc<AddressManager>,
779 
780     // Console abstraction
781     console: Arc<Console>,
782 
783     // Serial Manager
784     serial_manager: Option<Arc<SerialManager>>,
785 
786     // pty foreground status,
787     console_resize_pipe: Option<Arc<File>>,
788 
789     // To restore on exit.
790     original_termios_opt: Arc<Mutex<Option<termios>>>,
791 
792     // Interrupt controller
793     #[cfg(target_arch = "x86_64")]
794     interrupt_controller: Option<Arc<Mutex<ioapic::Ioapic>>>,
795     #[cfg(target_arch = "aarch64")]
796     interrupt_controller: Option<Arc<Mutex<gic::Gic>>>,
797 
798     // Things to be added to the commandline (e.g. aarch64 early console)
799     #[cfg(target_arch = "aarch64")]
800     cmdline_additions: Vec<String>,
801 
802     // ACPI GED notification device
803     ged_notification_device: Option<Arc<Mutex<devices::AcpiGedDevice>>>,
804 
805     // VM configuration
806     config: Arc<Mutex<VmConfig>>,
807 
808     // Memory Manager
809     memory_manager: Arc<Mutex<MemoryManager>>,
810 
811     // CPU Manager
812     cpu_manager: Arc<Mutex<CpuManager>>,
813 
814     // The virtio devices on the system
815     virtio_devices: Vec<MetaVirtioDevice>,
816 
817     // List of bus devices
818     // Let the DeviceManager keep strong references to the BusDevice devices.
819     // This allows the IO and MMIO buses to be provided with Weak references,
820     // which prevents cyclic dependencies.
821     bus_devices: Vec<Arc<Mutex<dyn BusDevice>>>,
822 
823     // Counter to keep track of the consumed device IDs.
824     device_id_cnt: Wrapping<usize>,
825 
826     pci_segments: Vec<PciSegment>,
827 
828     #[cfg_attr(target_arch = "aarch64", allow(dead_code))]
829     // MSI Interrupt Manager
830     msi_interrupt_manager: Arc<dyn InterruptManager<GroupConfig = MsiIrqGroupConfig>>,
831 
832     #[cfg_attr(feature = "mshv", allow(dead_code))]
833     // Legacy Interrupt Manager
834     legacy_interrupt_manager: Option<Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>>,
835 
836     // Passthrough device handle
837     passthrough_device: Option<VfioDeviceFd>,
838 
839     // VFIO container
840     // Only one container can be created, therefore it is stored as part of the
841     // DeviceManager to be reused.
842     vfio_container: Option<Arc<VfioContainer>>,
843 
844     // Paravirtualized IOMMU
845     iommu_device: Option<Arc<Mutex<virtio_devices::Iommu>>>,
846     iommu_mapping: Option<Arc<IommuMapping>>,
847 
848     // PCI information about devices attached to the paravirtualized IOMMU
849     // It contains the virtual IOMMU PCI BDF along with the list of PCI BDF
850     // representing the devices attached to the virtual IOMMU. This is useful
851     // information for filling the ACPI VIOT table.
852     iommu_attached_devices: Option<(PciBdf, Vec<PciBdf>)>,
853 
854     // Tree of devices, representing the dependencies between devices.
855     // Useful for introspection, snapshot and restore.
856     device_tree: Arc<Mutex<DeviceTree>>,
857 
858     // Exit event
859     exit_evt: EventFd,
860     reset_evt: EventFd,
861 
862     #[cfg(target_arch = "aarch64")]
863     id_to_dev_info: HashMap<(DeviceType, String), MmioDeviceInfo>,
864 
865     // seccomp action
866     seccomp_action: SeccompAction,
867 
868     // List of guest NUMA nodes.
869     numa_nodes: NumaNodes,
870 
871     // Possible handle to the virtio-balloon device
872     balloon: Option<Arc<Mutex<virtio_devices::Balloon>>>,
873 
874     // Virtio Device activation EventFd to allow the VMM thread to trigger device
875     // activation and thus start the threads from the VMM thread
876     activate_evt: EventFd,
877 
878     acpi_address: GuestAddress,
879 
880     selected_segment: usize,
881 
882     // Possible handle to the virtio-mem device
883     virtio_mem_devices: Vec<Arc<Mutex<virtio_devices::Mem>>>,
884 
885     #[cfg(target_arch = "aarch64")]
886     // GPIO device for AArch64
887     gpio_device: Option<Arc<Mutex<devices::legacy::Gpio>>>,
888 
889     // pvpanic device
890     pvpanic_device: Option<Arc<Mutex<devices::PvPanicDevice>>>,
891 
892     // Flag to force setting the iommu on virtio devices
893     force_iommu: bool,
894 
895     // io_uring availability if detected
896     io_uring_supported: Option<bool>,
897 
898     // aio availability if detected
899     aio_supported: Option<bool>,
900 
901     // List of unique identifiers provided at boot through the configuration.
902     boot_id_list: BTreeSet<String>,
903 
904     // Start time of the VM
905     timestamp: Instant,
906 
907     // Pending activations
908     pending_activations: Arc<Mutex<Vec<VirtioPciDeviceActivator>>>,
909 
910     // Addresses for ACPI platform devices e.g. ACPI PM timer, sleep/reset registers
911     acpi_platform_addresses: AcpiPlatformAddresses,
912 
913     snapshot: Option<Snapshot>,
914 
915     rate_limit_groups: HashMap<String, Arc<RateLimiterGroup>>,
916 
917     mmio_regions: Arc<Mutex<Vec<MmioRegion>>>,
918 }
919 
920 fn create_mmio_allocators(
921     start: u64,
922     end: u64,
923     num_pci_segments: u16,
924     weights: Vec<u32>,
925     alignment: u64,
926 ) -> Vec<Arc<Mutex<AddressAllocator>>> {
927     let total_weight: u32 = weights.iter().sum();
928 
929     // Start each PCI segment mmio range on an aligned boundary
930     let pci_segment_mmio_size = (end - start + 1) / (alignment * total_weight as u64) * alignment;
931 
932     let mut mmio_allocators = vec![];
933     let mut i = 0;
934     for segment_id in 0..num_pci_segments as u64 {
935         let weight = weights[segment_id as usize] as u64;
936         let mmio_start = start + i * pci_segment_mmio_size;
937         let mmio_size = pci_segment_mmio_size * weight;
938         let allocator = Arc::new(Mutex::new(
939             AddressAllocator::new(GuestAddress(mmio_start), mmio_size).unwrap(),
940         ));
941         mmio_allocators.push(allocator);
942         i += weight;
943     }
944 
945     mmio_allocators
946 }
947 
948 impl DeviceManager {
949     #[allow(clippy::too_many_arguments)]
950     pub fn new(
951         #[cfg(target_arch = "x86_64")] io_bus: Arc<Bus>,
952         mmio_bus: Arc<Bus>,
953         vm: Arc<dyn hypervisor::Vm>,
954         config: Arc<Mutex<VmConfig>>,
955         memory_manager: Arc<Mutex<MemoryManager>>,
956         cpu_manager: Arc<Mutex<CpuManager>>,
957         exit_evt: EventFd,
958         reset_evt: EventFd,
959         seccomp_action: SeccompAction,
960         numa_nodes: NumaNodes,
961         activate_evt: &EventFd,
962         force_iommu: bool,
963         boot_id_list: BTreeSet<String>,
964         timestamp: Instant,
965         snapshot: Option<Snapshot>,
966         dynamic: bool,
967     ) -> DeviceManagerResult<Arc<Mutex<Self>>> {
968         trace_scoped!("DeviceManager::new");
969 
970         let (device_tree, device_id_cnt) = if let Some(snapshot) = snapshot.as_ref() {
971             let state: DeviceManagerState = snapshot.to_state().unwrap();
972             (
973                 Arc::new(Mutex::new(state.device_tree.clone())),
974                 state.device_id_cnt,
975             )
976         } else {
977             (Arc::new(Mutex::new(DeviceTree::new())), Wrapping(0))
978         };
979 
980         let num_pci_segments =
981             if let Some(platform_config) = config.lock().unwrap().platform.as_ref() {
982                 platform_config.num_pci_segments
983             } else {
984                 1
985             };
986 
987         let mut mmio32_aperture_weights: Vec<u32> =
988             std::iter::repeat(DEFAULT_PCI_SEGMENT_APERTURE_WEIGHT)
989                 .take(num_pci_segments.into())
990                 .collect();
991         if let Some(pci_segments) = &config.lock().unwrap().pci_segments {
992             for pci_segment in pci_segments.iter() {
993                 mmio32_aperture_weights[pci_segment.pci_segment as usize] =
994                     pci_segment.mmio32_aperture_weight
995             }
996         }
997 
998         let start_of_mmio32_area = layout::MEM_32BIT_DEVICES_START.0;
999         let end_of_mmio32_area = layout::MEM_32BIT_DEVICES_START.0 + layout::MEM_32BIT_DEVICES_SIZE;
1000         let pci_mmio32_allocators = create_mmio_allocators(
1001             start_of_mmio32_area,
1002             end_of_mmio32_area,
1003             num_pci_segments,
1004             mmio32_aperture_weights,
1005             4 << 10,
1006         );
1007 
1008         let mut mmio64_aperture_weights: Vec<u32> =
1009             std::iter::repeat(DEFAULT_PCI_SEGMENT_APERTURE_WEIGHT)
1010                 .take(num_pci_segments.into())
1011                 .collect();
1012         if let Some(pci_segments) = &config.lock().unwrap().pci_segments {
1013             for pci_segment in pci_segments.iter() {
1014                 mmio64_aperture_weights[pci_segment.pci_segment as usize] =
1015                     pci_segment.mmio64_aperture_weight
1016             }
1017         }
1018 
1019         let start_of_mmio64_area = memory_manager.lock().unwrap().start_of_device_area().0;
1020         let end_of_mmio64_area = memory_manager.lock().unwrap().end_of_device_area().0;
1021         let pci_mmio64_allocators = create_mmio_allocators(
1022             start_of_mmio64_area,
1023             end_of_mmio64_area,
1024             num_pci_segments,
1025             mmio64_aperture_weights,
1026             4 << 30,
1027         );
1028 
1029         let address_manager = Arc::new(AddressManager {
1030             allocator: memory_manager.lock().unwrap().allocator(),
1031             #[cfg(target_arch = "x86_64")]
1032             io_bus,
1033             mmio_bus,
1034             vm: vm.clone(),
1035             device_tree: Arc::clone(&device_tree),
1036             pci_mmio32_allocators,
1037             pci_mmio64_allocators,
1038         });
1039 
1040         // First we create the MSI interrupt manager, the legacy one is created
1041         // later, after the IOAPIC device creation.
1042         // The reason we create the MSI one first is because the IOAPIC needs it,
1043         // and then the legacy interrupt manager needs an IOAPIC. So we're
1044         // handling a linear dependency chain:
1045         // msi_interrupt_manager <- IOAPIC <- legacy_interrupt_manager.
1046         let msi_interrupt_manager: Arc<dyn InterruptManager<GroupConfig = MsiIrqGroupConfig>> =
1047             Arc::new(MsiInterruptManager::new(
1048                 Arc::clone(&address_manager.allocator),
1049                 vm,
1050             ));
1051 
1052         let acpi_address = address_manager
1053             .allocator
1054             .lock()
1055             .unwrap()
1056             .allocate_platform_mmio_addresses(None, DEVICE_MANAGER_ACPI_SIZE as u64, None)
1057             .ok_or(DeviceManagerError::AllocateIoPort)?;
1058 
1059         let mut pci_irq_slots = [0; 32];
1060         PciSegment::reserve_legacy_interrupts_for_pci_devices(
1061             &address_manager,
1062             &mut pci_irq_slots,
1063         )?;
1064 
1065         let mut pci_segments = vec![PciSegment::new_default_segment(
1066             &address_manager,
1067             Arc::clone(&address_manager.pci_mmio32_allocators[0]),
1068             Arc::clone(&address_manager.pci_mmio64_allocators[0]),
1069             &pci_irq_slots,
1070         )?];
1071 
1072         for i in 1..num_pci_segments as usize {
1073             pci_segments.push(PciSegment::new(
1074                 i as u16,
1075                 numa_node_id_from_pci_segment_id(&numa_nodes, i as u16),
1076                 &address_manager,
1077                 Arc::clone(&address_manager.pci_mmio32_allocators[i]),
1078                 Arc::clone(&address_manager.pci_mmio64_allocators[i]),
1079                 &pci_irq_slots,
1080             )?);
1081         }
1082 
1083         if dynamic {
1084             let acpi_address = address_manager
1085                 .allocator
1086                 .lock()
1087                 .unwrap()
1088                 .allocate_platform_mmio_addresses(None, CPU_MANAGER_ACPI_SIZE as u64, None)
1089                 .ok_or(DeviceManagerError::AllocateMmioAddress)?;
1090 
1091             address_manager
1092                 .mmio_bus
1093                 .insert(
1094                     cpu_manager.clone(),
1095                     acpi_address.0,
1096                     CPU_MANAGER_ACPI_SIZE as u64,
1097                 )
1098                 .map_err(DeviceManagerError::BusError)?;
1099 
1100             cpu_manager.lock().unwrap().set_acpi_address(acpi_address);
1101         }
1102 
1103         let mut rate_limit_groups = HashMap::<String, Arc<RateLimiterGroup>>::new();
1104         if let Some(rate_limit_groups_cfg) = config.lock().unwrap().rate_limit_groups.as_ref() {
1105             for rate_limit_group_cfg in rate_limit_groups_cfg {
1106                 let rate_limit_cfg = rate_limit_group_cfg.rate_limiter_config;
1107                 let bw = rate_limit_cfg.bandwidth.unwrap_or_default();
1108                 let ops = rate_limit_cfg.ops.unwrap_or_default();
1109                 let mut rate_limit_group = RateLimiterGroup::new(
1110                     &rate_limit_group_cfg.id,
1111                     bw.size,
1112                     bw.one_time_burst.unwrap_or(0),
1113                     bw.refill_time,
1114                     ops.size,
1115                     ops.one_time_burst.unwrap_or(0),
1116                     ops.refill_time,
1117                 )
1118                 .map_err(DeviceManagerError::RateLimiterGroupCreate)?;
1119 
1120                 let exit_evt = exit_evt.try_clone().map_err(DeviceManagerError::EventFd)?;
1121 
1122                 rate_limit_group.start_thread(exit_evt).unwrap();
1123                 rate_limit_groups
1124                     .insert(rate_limit_group_cfg.id.clone(), Arc::new(rate_limit_group));
1125             }
1126         }
1127 
1128         let device_manager = DeviceManager {
1129             address_manager: Arc::clone(&address_manager),
1130             console: Arc::new(Console::default()),
1131             interrupt_controller: None,
1132             #[cfg(target_arch = "aarch64")]
1133             cmdline_additions: Vec::new(),
1134             ged_notification_device: None,
1135             config,
1136             memory_manager,
1137             cpu_manager,
1138             virtio_devices: Vec::new(),
1139             bus_devices: Vec::new(),
1140             device_id_cnt,
1141             msi_interrupt_manager,
1142             legacy_interrupt_manager: None,
1143             passthrough_device: None,
1144             vfio_container: None,
1145             iommu_device: None,
1146             iommu_mapping: None,
1147             iommu_attached_devices: None,
1148             pci_segments,
1149             device_tree,
1150             exit_evt,
1151             reset_evt,
1152             #[cfg(target_arch = "aarch64")]
1153             id_to_dev_info: HashMap::new(),
1154             seccomp_action,
1155             numa_nodes,
1156             balloon: None,
1157             activate_evt: activate_evt
1158                 .try_clone()
1159                 .map_err(DeviceManagerError::EventFd)?,
1160             acpi_address,
1161             selected_segment: 0,
1162             serial_manager: None,
1163             console_resize_pipe: None,
1164             original_termios_opt: Arc::new(Mutex::new(None)),
1165             virtio_mem_devices: Vec::new(),
1166             #[cfg(target_arch = "aarch64")]
1167             gpio_device: None,
1168             pvpanic_device: None,
1169             force_iommu,
1170             io_uring_supported: None,
1171             aio_supported: None,
1172             boot_id_list,
1173             timestamp,
1174             pending_activations: Arc::new(Mutex::new(Vec::default())),
1175             acpi_platform_addresses: AcpiPlatformAddresses::default(),
1176             snapshot,
1177             rate_limit_groups,
1178             mmio_regions: Arc::new(Mutex::new(Vec::new())),
1179         };
1180 
1181         let device_manager = Arc::new(Mutex::new(device_manager));
1182 
1183         address_manager
1184             .mmio_bus
1185             .insert(
1186                 Arc::clone(&device_manager) as Arc<Mutex<dyn BusDevice>>,
1187                 acpi_address.0,
1188                 DEVICE_MANAGER_ACPI_SIZE as u64,
1189             )
1190             .map_err(DeviceManagerError::BusError)?;
1191 
1192         Ok(device_manager)
1193     }
1194 
1195     pub fn console_resize_pipe(&self) -> Option<Arc<File>> {
1196         self.console_resize_pipe.clone()
1197     }
1198 
1199     pub fn create_devices(
1200         &mut self,
1201         console_info: Option<ConsoleInfo>,
1202         console_resize_pipe: Option<File>,
1203         original_termios_opt: Arc<Mutex<Option<termios>>>,
1204     ) -> DeviceManagerResult<()> {
1205         trace_scoped!("create_devices");
1206 
1207         let mut virtio_devices: Vec<MetaVirtioDevice> = Vec::new();
1208 
1209         let interrupt_controller = self.add_interrupt_controller()?;
1210 
1211         self.cpu_manager
1212             .lock()
1213             .unwrap()
1214             .set_interrupt_controller(interrupt_controller.clone());
1215 
1216         // Now we can create the legacy interrupt manager, which needs the freshly
1217         // formed IOAPIC device.
1218         let legacy_interrupt_manager: Arc<
1219             dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>,
1220         > = Arc::new(LegacyUserspaceInterruptManager::new(Arc::clone(
1221             &interrupt_controller,
1222         )));
1223 
1224         {
1225             if let Some(acpi_address) = self.memory_manager.lock().unwrap().acpi_address() {
1226                 self.address_manager
1227                     .mmio_bus
1228                     .insert(
1229                         Arc::clone(&self.memory_manager) as Arc<Mutex<dyn BusDevice>>,
1230                         acpi_address.0,
1231                         MEMORY_MANAGER_ACPI_SIZE as u64,
1232                     )
1233                     .map_err(DeviceManagerError::BusError)?;
1234             }
1235         }
1236 
1237         #[cfg(target_arch = "x86_64")]
1238         self.add_legacy_devices(
1239             self.reset_evt
1240                 .try_clone()
1241                 .map_err(DeviceManagerError::EventFd)?,
1242         )?;
1243 
1244         #[cfg(target_arch = "aarch64")]
1245         self.add_legacy_devices(&legacy_interrupt_manager)?;
1246 
1247         {
1248             self.ged_notification_device = self.add_acpi_devices(
1249                 &legacy_interrupt_manager,
1250                 self.reset_evt
1251                     .try_clone()
1252                     .map_err(DeviceManagerError::EventFd)?,
1253                 self.exit_evt
1254                     .try_clone()
1255                     .map_err(DeviceManagerError::EventFd)?,
1256             )?;
1257         }
1258 
1259         self.original_termios_opt = original_termios_opt;
1260 
1261         self.console = self.add_console_devices(
1262             &legacy_interrupt_manager,
1263             &mut virtio_devices,
1264             console_info,
1265             console_resize_pipe,
1266         )?;
1267 
1268         if let Some(tpm) = self.config.clone().lock().unwrap().tpm.as_ref() {
1269             let tpm_dev = self.add_tpm_device(tpm.socket.clone())?;
1270             self.bus_devices
1271                 .push(Arc::clone(&tpm_dev) as Arc<Mutex<dyn BusDevice>>)
1272         }
1273         self.legacy_interrupt_manager = Some(legacy_interrupt_manager);
1274 
1275         virtio_devices.append(&mut self.make_virtio_devices()?);
1276 
1277         self.add_pci_devices(virtio_devices.clone())?;
1278 
1279         self.virtio_devices = virtio_devices;
1280 
1281         if self.config.clone().lock().unwrap().pvpanic {
1282             self.pvpanic_device = self.add_pvpanic_device()?;
1283         }
1284 
1285         Ok(())
1286     }
1287 
1288     fn state(&self) -> DeviceManagerState {
1289         DeviceManagerState {
1290             device_tree: self.device_tree.lock().unwrap().clone(),
1291             device_id_cnt: self.device_id_cnt,
1292         }
1293     }
1294 
1295     fn get_msi_iova_space(&mut self) -> (u64, u64) {
1296         #[cfg(target_arch = "aarch64")]
1297         {
1298             let vcpus = self.config.lock().unwrap().cpus.boot_vcpus;
1299             let vgic_config = gic::Gic::create_default_config(vcpus.into());
1300             (
1301                 vgic_config.msi_addr,
1302                 vgic_config.msi_addr + vgic_config.msi_size - 1,
1303             )
1304         }
1305         #[cfg(target_arch = "x86_64")]
1306         (0xfee0_0000, 0xfeef_ffff)
1307     }
1308 
1309     #[cfg(target_arch = "aarch64")]
1310     /// Gets the information of the devices registered up to some point in time.
1311     pub fn get_device_info(&self) -> &HashMap<(DeviceType, String), MmioDeviceInfo> {
1312         &self.id_to_dev_info
1313     }
1314 
1315     #[allow(unused_variables)]
1316     fn add_pci_devices(
1317         &mut self,
1318         virtio_devices: Vec<MetaVirtioDevice>,
1319     ) -> DeviceManagerResult<()> {
1320         let iommu_id = String::from(IOMMU_DEVICE_NAME);
1321 
1322         let iommu_device = if self.config.lock().unwrap().iommu {
1323             let (device, mapping) = virtio_devices::Iommu::new(
1324                 iommu_id.clone(),
1325                 self.seccomp_action.clone(),
1326                 self.exit_evt
1327                     .try_clone()
1328                     .map_err(DeviceManagerError::EventFd)?,
1329                 self.get_msi_iova_space(),
1330                 state_from_id(self.snapshot.as_ref(), iommu_id.as_str())
1331                     .map_err(DeviceManagerError::RestoreGetState)?,
1332             )
1333             .map_err(DeviceManagerError::CreateVirtioIommu)?;
1334             let device = Arc::new(Mutex::new(device));
1335             self.iommu_device = Some(Arc::clone(&device));
1336             self.iommu_mapping = Some(mapping);
1337 
1338             // Fill the device tree with a new node. In case of restore, we
1339             // know there is nothing to do, so we can simply override the
1340             // existing entry.
1341             self.device_tree
1342                 .lock()
1343                 .unwrap()
1344                 .insert(iommu_id.clone(), device_node!(iommu_id, device));
1345 
1346             Some(device)
1347         } else {
1348             None
1349         };
1350 
1351         let mut iommu_attached_devices = Vec::new();
1352         {
1353             for handle in virtio_devices {
1354                 let mapping: Option<Arc<IommuMapping>> = if handle.iommu {
1355                     self.iommu_mapping.clone()
1356                 } else {
1357                     None
1358                 };
1359 
1360                 let dev_id = self.add_virtio_pci_device(
1361                     handle.virtio_device,
1362                     &mapping,
1363                     handle.id,
1364                     handle.pci_segment,
1365                     handle.dma_handler,
1366                 )?;
1367 
1368                 if handle.iommu {
1369                     iommu_attached_devices.push(dev_id);
1370                 }
1371             }
1372 
1373             let mut vfio_iommu_device_ids = self.add_vfio_devices()?;
1374             iommu_attached_devices.append(&mut vfio_iommu_device_ids);
1375 
1376             let mut vfio_user_iommu_device_ids = self.add_user_devices()?;
1377             iommu_attached_devices.append(&mut vfio_user_iommu_device_ids);
1378 
1379             // Add all devices from forced iommu segments
1380             if let Some(platform_config) = self.config.lock().unwrap().platform.as_ref() {
1381                 if let Some(iommu_segments) = platform_config.iommu_segments.as_ref() {
1382                     for segment in iommu_segments {
1383                         for device in 0..32 {
1384                             let bdf = PciBdf::new(*segment, 0, device, 0);
1385                             if !iommu_attached_devices.contains(&bdf) {
1386                                 iommu_attached_devices.push(bdf);
1387                             }
1388                         }
1389                     }
1390                 }
1391             }
1392 
1393             if let Some(iommu_device) = iommu_device {
1394                 let dev_id = self.add_virtio_pci_device(iommu_device, &None, iommu_id, 0, None)?;
1395                 self.iommu_attached_devices = Some((dev_id, iommu_attached_devices));
1396             }
1397         }
1398 
1399         for segment in &self.pci_segments {
1400             #[cfg(target_arch = "x86_64")]
1401             if let Some(pci_config_io) = segment.pci_config_io.as_ref() {
1402                 self.bus_devices
1403                     .push(Arc::clone(pci_config_io) as Arc<Mutex<dyn BusDevice>>);
1404             }
1405 
1406             self.bus_devices
1407                 .push(Arc::clone(&segment.pci_config_mmio) as Arc<Mutex<dyn BusDevice>>);
1408         }
1409 
1410         Ok(())
1411     }
1412 
1413     #[cfg(target_arch = "aarch64")]
1414     fn add_interrupt_controller(
1415         &mut self,
1416     ) -> DeviceManagerResult<Arc<Mutex<dyn InterruptController>>> {
1417         let interrupt_controller: Arc<Mutex<gic::Gic>> = Arc::new(Mutex::new(
1418             gic::Gic::new(
1419                 self.config.lock().unwrap().cpus.boot_vcpus,
1420                 Arc::clone(&self.msi_interrupt_manager),
1421                 self.address_manager.vm.clone(),
1422             )
1423             .map_err(DeviceManagerError::CreateInterruptController)?,
1424         ));
1425 
1426         self.interrupt_controller = Some(interrupt_controller.clone());
1427 
1428         // Restore the vGic if this is in the process of restoration
1429         let id = String::from(gic::GIC_SNAPSHOT_ID);
1430         if let Some(vgic_snapshot) = snapshot_from_id(self.snapshot.as_ref(), &id) {
1431             // PMU support is optional. Nothing should be impacted if the PMU initialization failed.
1432             if self
1433                 .cpu_manager
1434                 .lock()
1435                 .unwrap()
1436                 .init_pmu(arch::aarch64::fdt::AARCH64_PMU_IRQ + 16)
1437                 .is_err()
1438             {
1439                 info!("Failed to initialize PMU");
1440             }
1441 
1442             let vgic_state = vgic_snapshot
1443                 .to_state()
1444                 .map_err(DeviceManagerError::RestoreGetState)?;
1445             let saved_vcpu_states = self.cpu_manager.lock().unwrap().get_saved_states();
1446             interrupt_controller
1447                 .lock()
1448                 .unwrap()
1449                 .restore_vgic(vgic_state, &saved_vcpu_states)
1450                 .unwrap();
1451         }
1452 
1453         self.device_tree
1454             .lock()
1455             .unwrap()
1456             .insert(id.clone(), device_node!(id, interrupt_controller));
1457 
1458         Ok(interrupt_controller)
1459     }
1460 
1461     #[cfg(target_arch = "aarch64")]
1462     pub fn get_interrupt_controller(&mut self) -> Option<&Arc<Mutex<gic::Gic>>> {
1463         self.interrupt_controller.as_ref()
1464     }
1465 
1466     #[cfg(target_arch = "x86_64")]
1467     fn add_interrupt_controller(
1468         &mut self,
1469     ) -> DeviceManagerResult<Arc<Mutex<dyn InterruptController>>> {
1470         let id = String::from(IOAPIC_DEVICE_NAME);
1471 
1472         // Create IOAPIC
1473         let interrupt_controller = Arc::new(Mutex::new(
1474             ioapic::Ioapic::new(
1475                 id.clone(),
1476                 APIC_START,
1477                 Arc::clone(&self.msi_interrupt_manager),
1478                 state_from_id(self.snapshot.as_ref(), id.as_str())
1479                     .map_err(DeviceManagerError::RestoreGetState)?,
1480             )
1481             .map_err(DeviceManagerError::CreateInterruptController)?,
1482         ));
1483 
1484         self.interrupt_controller = Some(interrupt_controller.clone());
1485 
1486         self.address_manager
1487             .mmio_bus
1488             .insert(interrupt_controller.clone(), IOAPIC_START.0, IOAPIC_SIZE)
1489             .map_err(DeviceManagerError::BusError)?;
1490 
1491         self.bus_devices
1492             .push(Arc::clone(&interrupt_controller) as Arc<Mutex<dyn BusDevice>>);
1493 
1494         // Fill the device tree with a new node. In case of restore, we
1495         // know there is nothing to do, so we can simply override the
1496         // existing entry.
1497         self.device_tree
1498             .lock()
1499             .unwrap()
1500             .insert(id.clone(), device_node!(id, interrupt_controller));
1501 
1502         Ok(interrupt_controller)
1503     }
1504 
1505     fn add_acpi_devices(
1506         &mut self,
1507         interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>,
1508         reset_evt: EventFd,
1509         exit_evt: EventFd,
1510     ) -> DeviceManagerResult<Option<Arc<Mutex<devices::AcpiGedDevice>>>> {
1511         let vcpus_kill_signalled = self
1512             .cpu_manager
1513             .lock()
1514             .unwrap()
1515             .vcpus_kill_signalled()
1516             .clone();
1517         let shutdown_device = Arc::new(Mutex::new(devices::AcpiShutdownDevice::new(
1518             exit_evt,
1519             reset_evt,
1520             vcpus_kill_signalled,
1521         )));
1522 
1523         self.bus_devices
1524             .push(Arc::clone(&shutdown_device) as Arc<Mutex<dyn BusDevice>>);
1525 
1526         #[cfg(target_arch = "x86_64")]
1527         {
1528             let shutdown_pio_address: u16 = 0x600;
1529 
1530             self.address_manager
1531                 .allocator
1532                 .lock()
1533                 .unwrap()
1534                 .allocate_io_addresses(Some(GuestAddress(shutdown_pio_address.into())), 0x8, None)
1535                 .ok_or(DeviceManagerError::AllocateIoPort)?;
1536 
1537             self.address_manager
1538                 .io_bus
1539                 .insert(shutdown_device, shutdown_pio_address.into(), 0x4)
1540                 .map_err(DeviceManagerError::BusError)?;
1541 
1542             self.acpi_platform_addresses.sleep_control_reg_address =
1543                 Some(GenericAddress::io_port_address::<u8>(shutdown_pio_address));
1544             self.acpi_platform_addresses.sleep_status_reg_address =
1545                 Some(GenericAddress::io_port_address::<u8>(shutdown_pio_address));
1546             self.acpi_platform_addresses.reset_reg_address =
1547                 Some(GenericAddress::io_port_address::<u8>(shutdown_pio_address));
1548         }
1549 
1550         let ged_irq = self
1551             .address_manager
1552             .allocator
1553             .lock()
1554             .unwrap()
1555             .allocate_irq()
1556             .unwrap();
1557         let interrupt_group = interrupt_manager
1558             .create_group(LegacyIrqGroupConfig {
1559                 irq: ged_irq as InterruptIndex,
1560             })
1561             .map_err(DeviceManagerError::CreateInterruptGroup)?;
1562         let ged_address = self
1563             .address_manager
1564             .allocator
1565             .lock()
1566             .unwrap()
1567             .allocate_platform_mmio_addresses(
1568                 None,
1569                 devices::acpi::GED_DEVICE_ACPI_SIZE as u64,
1570                 None,
1571             )
1572             .ok_or(DeviceManagerError::AllocateMmioAddress)?;
1573         let ged_device = Arc::new(Mutex::new(devices::AcpiGedDevice::new(
1574             interrupt_group,
1575             ged_irq,
1576             ged_address,
1577         )));
1578         self.address_manager
1579             .mmio_bus
1580             .insert(
1581                 ged_device.clone(),
1582                 ged_address.0,
1583                 devices::acpi::GED_DEVICE_ACPI_SIZE as u64,
1584             )
1585             .map_err(DeviceManagerError::BusError)?;
1586         self.bus_devices
1587             .push(Arc::clone(&ged_device) as Arc<Mutex<dyn BusDevice>>);
1588 
1589         let pm_timer_device = Arc::new(Mutex::new(devices::AcpiPmTimerDevice::new()));
1590 
1591         self.bus_devices
1592             .push(Arc::clone(&pm_timer_device) as Arc<Mutex<dyn BusDevice>>);
1593 
1594         #[cfg(target_arch = "x86_64")]
1595         {
1596             let pm_timer_pio_address: u16 = 0x608;
1597 
1598             self.address_manager
1599                 .allocator
1600                 .lock()
1601                 .unwrap()
1602                 .allocate_io_addresses(Some(GuestAddress(pm_timer_pio_address.into())), 0x4, None)
1603                 .ok_or(DeviceManagerError::AllocateIoPort)?;
1604 
1605             self.address_manager
1606                 .io_bus
1607                 .insert(pm_timer_device, pm_timer_pio_address.into(), 0x4)
1608                 .map_err(DeviceManagerError::BusError)?;
1609 
1610             self.acpi_platform_addresses.pm_timer_address =
1611                 Some(GenericAddress::io_port_address::<u32>(pm_timer_pio_address));
1612         }
1613 
1614         Ok(Some(ged_device))
1615     }
1616 
1617     #[cfg(target_arch = "x86_64")]
1618     fn add_legacy_devices(&mut self, reset_evt: EventFd) -> DeviceManagerResult<()> {
1619         let vcpus_kill_signalled = self
1620             .cpu_manager
1621             .lock()
1622             .unwrap()
1623             .vcpus_kill_signalled()
1624             .clone();
1625         // Add a shutdown device (i8042)
1626         let i8042 = Arc::new(Mutex::new(devices::legacy::I8042Device::new(
1627             reset_evt.try_clone().unwrap(),
1628             vcpus_kill_signalled.clone(),
1629         )));
1630 
1631         self.bus_devices
1632             .push(Arc::clone(&i8042) as Arc<Mutex<dyn BusDevice>>);
1633 
1634         self.address_manager
1635             .io_bus
1636             .insert(i8042, 0x61, 0x4)
1637             .map_err(DeviceManagerError::BusError)?;
1638         {
1639             // Add a CMOS emulated device
1640             let mem_size = self
1641                 .memory_manager
1642                 .lock()
1643                 .unwrap()
1644                 .guest_memory()
1645                 .memory()
1646                 .last_addr()
1647                 .0
1648                 + 1;
1649             let mem_below_4g = std::cmp::min(arch::layout::MEM_32BIT_RESERVED_START.0, mem_size);
1650             let mem_above_4g = mem_size.saturating_sub(arch::layout::RAM_64BIT_START.0);
1651 
1652             let cmos = Arc::new(Mutex::new(devices::legacy::Cmos::new(
1653                 mem_below_4g,
1654                 mem_above_4g,
1655                 reset_evt,
1656                 Some(vcpus_kill_signalled),
1657             )));
1658 
1659             self.bus_devices
1660                 .push(Arc::clone(&cmos) as Arc<Mutex<dyn BusDevice>>);
1661 
1662             self.address_manager
1663                 .io_bus
1664                 .insert(cmos, 0x70, 0x2)
1665                 .map_err(DeviceManagerError::BusError)?;
1666 
1667             let fwdebug = Arc::new(Mutex::new(devices::legacy::FwDebugDevice::new()));
1668 
1669             self.bus_devices
1670                 .push(Arc::clone(&fwdebug) as Arc<Mutex<dyn BusDevice>>);
1671 
1672             self.address_manager
1673                 .io_bus
1674                 .insert(fwdebug, 0x402, 0x1)
1675                 .map_err(DeviceManagerError::BusError)?;
1676         }
1677 
1678         // 0x80 debug port
1679         let debug_port = Arc::new(Mutex::new(devices::legacy::DebugPort::new(self.timestamp)));
1680         self.bus_devices
1681             .push(Arc::clone(&debug_port) as Arc<Mutex<dyn BusDevice>>);
1682         self.address_manager
1683             .io_bus
1684             .insert(debug_port, 0x80, 0x1)
1685             .map_err(DeviceManagerError::BusError)?;
1686 
1687         Ok(())
1688     }
1689 
1690     #[cfg(target_arch = "aarch64")]
1691     fn add_legacy_devices(
1692         &mut self,
1693         interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>,
1694     ) -> DeviceManagerResult<()> {
1695         // Add a RTC device
1696         let rtc_irq = self
1697             .address_manager
1698             .allocator
1699             .lock()
1700             .unwrap()
1701             .allocate_irq()
1702             .unwrap();
1703 
1704         let interrupt_group = interrupt_manager
1705             .create_group(LegacyIrqGroupConfig {
1706                 irq: rtc_irq as InterruptIndex,
1707             })
1708             .map_err(DeviceManagerError::CreateInterruptGroup)?;
1709 
1710         let rtc_device = Arc::new(Mutex::new(devices::legacy::Rtc::new(interrupt_group)));
1711 
1712         self.bus_devices
1713             .push(Arc::clone(&rtc_device) as Arc<Mutex<dyn BusDevice>>);
1714 
1715         let addr = arch::layout::LEGACY_RTC_MAPPED_IO_START;
1716 
1717         self.address_manager
1718             .mmio_bus
1719             .insert(rtc_device, addr.0, MMIO_LEN)
1720             .map_err(DeviceManagerError::BusError)?;
1721 
1722         self.id_to_dev_info.insert(
1723             (DeviceType::Rtc, "rtc".to_string()),
1724             MmioDeviceInfo {
1725                 addr: addr.0,
1726                 len: MMIO_LEN,
1727                 irq: rtc_irq,
1728             },
1729         );
1730 
1731         // Add a GPIO device
1732         let id = String::from(GPIO_DEVICE_NAME);
1733         let gpio_irq = self
1734             .address_manager
1735             .allocator
1736             .lock()
1737             .unwrap()
1738             .allocate_irq()
1739             .unwrap();
1740 
1741         let interrupt_group = interrupt_manager
1742             .create_group(LegacyIrqGroupConfig {
1743                 irq: gpio_irq as InterruptIndex,
1744             })
1745             .map_err(DeviceManagerError::CreateInterruptGroup)?;
1746 
1747         let gpio_device = Arc::new(Mutex::new(devices::legacy::Gpio::new(
1748             id.clone(),
1749             interrupt_group,
1750             state_from_id(self.snapshot.as_ref(), id.as_str())
1751                 .map_err(DeviceManagerError::RestoreGetState)?,
1752         )));
1753 
1754         self.bus_devices
1755             .push(Arc::clone(&gpio_device) as Arc<Mutex<dyn BusDevice>>);
1756 
1757         let addr = arch::layout::LEGACY_GPIO_MAPPED_IO_START;
1758 
1759         self.address_manager
1760             .mmio_bus
1761             .insert(gpio_device.clone(), addr.0, MMIO_LEN)
1762             .map_err(DeviceManagerError::BusError)?;
1763 
1764         self.gpio_device = Some(gpio_device.clone());
1765 
1766         self.id_to_dev_info.insert(
1767             (DeviceType::Gpio, "gpio".to_string()),
1768             MmioDeviceInfo {
1769                 addr: addr.0,
1770                 len: MMIO_LEN,
1771                 irq: gpio_irq,
1772             },
1773         );
1774 
1775         self.device_tree
1776             .lock()
1777             .unwrap()
1778             .insert(id.clone(), device_node!(id, gpio_device));
1779 
1780         Ok(())
1781     }
1782 
1783     #[cfg(target_arch = "x86_64")]
1784     fn add_debug_console_device(
1785         &mut self,
1786         debug_console_writer: Box<dyn io::Write + Send>,
1787     ) -> DeviceManagerResult<Arc<Mutex<DebugConsole>>> {
1788         let id = String::from(DEBUGCON_DEVICE_NAME);
1789         let debug_console = Arc::new(Mutex::new(DebugConsole::new(
1790             id.clone(),
1791             debug_console_writer,
1792         )));
1793 
1794         let port = self
1795             .config
1796             .lock()
1797             .unwrap()
1798             .debug_console
1799             .clone()
1800             .iobase
1801             .map(|port| port as u64)
1802             .unwrap_or(debug_console::DEFAULT_PORT);
1803 
1804         self.bus_devices
1805             .push(Arc::clone(&debug_console) as Arc<Mutex<dyn BusDevice>>);
1806 
1807         self.address_manager
1808             .allocator
1809             .lock()
1810             .unwrap()
1811             .allocate_io_addresses(Some(GuestAddress(port)), 0x1, None)
1812             .ok_or(DeviceManagerError::AllocateIoPort)?;
1813 
1814         self.address_manager
1815             .io_bus
1816             .insert(debug_console.clone(), port, 0x1)
1817             .map_err(DeviceManagerError::BusError)?;
1818 
1819         // Fill the device tree with a new node. In case of restore, we
1820         // know there is nothing to do, so we can simply override the
1821         // existing entry.
1822         self.device_tree
1823             .lock()
1824             .unwrap()
1825             .insert(id.clone(), device_node!(id, debug_console));
1826 
1827         Ok(debug_console)
1828     }
1829 
1830     #[cfg(target_arch = "x86_64")]
1831     fn add_serial_device(
1832         &mut self,
1833         interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>,
1834         serial_writer: Option<Box<dyn io::Write + Send>>,
1835     ) -> DeviceManagerResult<Arc<Mutex<Serial>>> {
1836         // Serial is tied to IRQ #4
1837         let serial_irq = 4;
1838 
1839         let id = String::from(SERIAL_DEVICE_NAME);
1840 
1841         let interrupt_group = interrupt_manager
1842             .create_group(LegacyIrqGroupConfig {
1843                 irq: serial_irq as InterruptIndex,
1844             })
1845             .map_err(DeviceManagerError::CreateInterruptGroup)?;
1846 
1847         let serial = Arc::new(Mutex::new(Serial::new(
1848             id.clone(),
1849             interrupt_group,
1850             serial_writer,
1851             state_from_id(self.snapshot.as_ref(), id.as_str())
1852                 .map_err(DeviceManagerError::RestoreGetState)?,
1853         )));
1854 
1855         self.bus_devices
1856             .push(Arc::clone(&serial) as Arc<Mutex<dyn BusDevice>>);
1857 
1858         self.address_manager
1859             .allocator
1860             .lock()
1861             .unwrap()
1862             .allocate_io_addresses(Some(GuestAddress(0x3f8)), 0x8, None)
1863             .ok_or(DeviceManagerError::AllocateIoPort)?;
1864 
1865         self.address_manager
1866             .io_bus
1867             .insert(serial.clone(), 0x3f8, 0x8)
1868             .map_err(DeviceManagerError::BusError)?;
1869 
1870         // Fill the device tree with a new node. In case of restore, we
1871         // know there is nothing to do, so we can simply override the
1872         // existing entry.
1873         self.device_tree
1874             .lock()
1875             .unwrap()
1876             .insert(id.clone(), device_node!(id, serial));
1877 
1878         Ok(serial)
1879     }
1880 
1881     #[cfg(target_arch = "aarch64")]
1882     fn add_serial_device(
1883         &mut self,
1884         interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>,
1885         serial_writer: Option<Box<dyn io::Write + Send>>,
1886     ) -> DeviceManagerResult<Arc<Mutex<Pl011>>> {
1887         let id = String::from(SERIAL_DEVICE_NAME);
1888 
1889         let serial_irq = self
1890             .address_manager
1891             .allocator
1892             .lock()
1893             .unwrap()
1894             .allocate_irq()
1895             .unwrap();
1896 
1897         let interrupt_group = interrupt_manager
1898             .create_group(LegacyIrqGroupConfig {
1899                 irq: serial_irq as InterruptIndex,
1900             })
1901             .map_err(DeviceManagerError::CreateInterruptGroup)?;
1902 
1903         let serial = Arc::new(Mutex::new(devices::legacy::Pl011::new(
1904             id.clone(),
1905             interrupt_group,
1906             serial_writer,
1907             self.timestamp,
1908             state_from_id(self.snapshot.as_ref(), id.as_str())
1909                 .map_err(DeviceManagerError::RestoreGetState)?,
1910         )));
1911 
1912         self.bus_devices
1913             .push(Arc::clone(&serial) as Arc<Mutex<dyn BusDevice>>);
1914 
1915         let addr = arch::layout::LEGACY_SERIAL_MAPPED_IO_START;
1916 
1917         self.address_manager
1918             .mmio_bus
1919             .insert(serial.clone(), addr.0, MMIO_LEN)
1920             .map_err(DeviceManagerError::BusError)?;
1921 
1922         self.id_to_dev_info.insert(
1923             (DeviceType::Serial, DeviceType::Serial.to_string()),
1924             MmioDeviceInfo {
1925                 addr: addr.0,
1926                 len: MMIO_LEN,
1927                 irq: serial_irq,
1928             },
1929         );
1930 
1931         self.cmdline_additions
1932             .push(format!("earlycon=pl011,mmio,0x{:08x}", addr.0));
1933 
1934         // Fill the device tree with a new node. In case of restore, we
1935         // know there is nothing to do, so we can simply override the
1936         // existing entry.
1937         self.device_tree
1938             .lock()
1939             .unwrap()
1940             .insert(id.clone(), device_node!(id, serial));
1941 
1942         Ok(serial)
1943     }
1944 
1945     fn add_virtio_console_device(
1946         &mut self,
1947         virtio_devices: &mut Vec<MetaVirtioDevice>,
1948         console_fd: Option<RawFd>,
1949         resize_pipe: Option<File>,
1950     ) -> DeviceManagerResult<Option<Arc<virtio_devices::ConsoleResizer>>> {
1951         let console_config = self.config.lock().unwrap().console.clone();
1952         let endpoint = match console_config.mode {
1953             ConsoleOutputMode::File => {
1954                 if let Some(file_fd) = console_fd {
1955                     // SAFETY: file_fd is guaranteed to be a valid fd from
1956                     // pre_create_console_devices() in vmm/src/console_devices.rs
1957                     Endpoint::File(unsafe { File::from_raw_fd(file_fd) })
1958                 } else {
1959                     return Err(DeviceManagerError::InvalidConsoleFd);
1960                 }
1961             }
1962             ConsoleOutputMode::Pty => {
1963                 if let Some(pty_fd) = console_fd {
1964                     // SAFETY: pty_fd is guaranteed to be a valid fd from
1965                     // pre_create_console_devices() in vmm/src/console_devices.rs
1966                     let file = unsafe { File::from_raw_fd(pty_fd) };
1967                     self.console_resize_pipe = resize_pipe.map(Arc::new);
1968                     Endpoint::PtyPair(file.try_clone().unwrap(), file)
1969                 } else {
1970                     return Err(DeviceManagerError::InvalidConsoleFd);
1971                 }
1972             }
1973             ConsoleOutputMode::Tty => {
1974                 if let Some(tty_fd) = console_fd {
1975                     // SAFETY: tty_fd is guaranteed to be a valid fd from
1976                     // pre_create_console_devices() in vmm/src/console_devices.rs
1977                     let stdout = unsafe { File::from_raw_fd(tty_fd) };
1978                     // If an interactive TTY then we can accept input
1979                     // SAFETY: FFI call. Trivially safe.
1980                     if unsafe { libc::isatty(libc::STDIN_FILENO) == 1 } {
1981                         // SAFETY: FFI call to dup. Trivially safe.
1982                         let stdin = unsafe { libc::dup(libc::STDIN_FILENO) };
1983                         if stdin == -1 {
1984                             return vmm_sys_util::errno::errno_result()
1985                                 .map_err(DeviceManagerError::DupFd);
1986                         }
1987                         // SAFETY: stdin is valid and owned solely by us.
1988                         let stdin = unsafe { File::from_raw_fd(stdin) };
1989                         Endpoint::FilePair(stdout, stdin)
1990                     } else {
1991                         Endpoint::File(stdout)
1992                     }
1993                 } else {
1994                     return Err(DeviceManagerError::InvalidConsoleFd);
1995                 }
1996             }
1997             ConsoleOutputMode::Socket => {
1998                 return Err(DeviceManagerError::NoSocketOptionSupportForConsoleDevice);
1999             }
2000             ConsoleOutputMode::Null => Endpoint::Null,
2001             ConsoleOutputMode::Off => return Ok(None),
2002         };
2003         let id = String::from(CONSOLE_DEVICE_NAME);
2004 
2005         let (virtio_console_device, console_resizer) = virtio_devices::Console::new(
2006             id.clone(),
2007             endpoint,
2008             self.console_resize_pipe
2009                 .as_ref()
2010                 .map(|p| p.try_clone().unwrap()),
2011             self.force_iommu | console_config.iommu,
2012             self.seccomp_action.clone(),
2013             self.exit_evt
2014                 .try_clone()
2015                 .map_err(DeviceManagerError::EventFd)?,
2016             state_from_id(self.snapshot.as_ref(), id.as_str())
2017                 .map_err(DeviceManagerError::RestoreGetState)?,
2018         )
2019         .map_err(DeviceManagerError::CreateVirtioConsole)?;
2020         let virtio_console_device = Arc::new(Mutex::new(virtio_console_device));
2021         virtio_devices.push(MetaVirtioDevice {
2022             virtio_device: Arc::clone(&virtio_console_device)
2023                 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2024             iommu: console_config.iommu,
2025             id: id.clone(),
2026             pci_segment: 0,
2027             dma_handler: None,
2028         });
2029 
2030         // Fill the device tree with a new node. In case of restore, we
2031         // know there is nothing to do, so we can simply override the
2032         // existing entry.
2033         self.device_tree
2034             .lock()
2035             .unwrap()
2036             .insert(id.clone(), device_node!(id, virtio_console_device));
2037 
2038         // Only provide a resizer (for SIGWINCH handling) if the console is attached to the TTY
2039         Ok(if matches!(console_config.mode, ConsoleOutputMode::Tty) {
2040             Some(console_resizer)
2041         } else {
2042             None
2043         })
2044     }
2045 
2046     /// Adds all devices that behave like a console with respect to the VM
2047     /// configuration. This includes:
2048     /// - debug-console
2049     /// - serial-console
2050     /// - virtio-console
2051     fn add_console_devices(
2052         &mut self,
2053         interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>,
2054         virtio_devices: &mut Vec<MetaVirtioDevice>,
2055         console_info: Option<ConsoleInfo>,
2056         console_resize_pipe: Option<File>,
2057     ) -> DeviceManagerResult<Arc<Console>> {
2058         let serial_config = self.config.lock().unwrap().serial.clone();
2059         if console_info.is_none() {
2060             return Err(DeviceManagerError::InvalidConsoleInfo);
2061         }
2062 
2063         // SAFETY: console_info is Some, so it's safe to unwrap.
2064         let console_info = console_info.unwrap();
2065         let serial_writer: Option<Box<dyn io::Write + Send>> = match serial_config.mode {
2066             ConsoleOutputMode::File | ConsoleOutputMode::Tty => {
2067                 if console_info.serial_main_fd.is_none() {
2068                     return Err(DeviceManagerError::InvalidConsoleInfo);
2069                 }
2070                 // SAFETY: serial_main_fd is Some, so it's safe to unwrap.
2071                 // SAFETY: serial_main_fd is guaranteed to be a valid fd from
2072                 // pre_create_console_devices() in vmm/src/console_devices.rs
2073                 Some(Box::new(unsafe {
2074                     File::from_raw_fd(console_info.serial_main_fd.unwrap())
2075                 }))
2076             }
2077             ConsoleOutputMode::Off
2078             | ConsoleOutputMode::Null
2079             | ConsoleOutputMode::Pty
2080             | ConsoleOutputMode::Socket => None,
2081         };
2082         if serial_config.mode != ConsoleOutputMode::Off {
2083             let serial = self.add_serial_device(interrupt_manager, serial_writer)?;
2084             self.serial_manager = match serial_config.mode {
2085                 ConsoleOutputMode::Pty | ConsoleOutputMode::Tty | ConsoleOutputMode::Socket => {
2086                     let serial_manager = SerialManager::new(
2087                         serial,
2088                         console_info.serial_main_fd,
2089                         serial_config.mode,
2090                         serial_config.socket,
2091                     )
2092                     .map_err(DeviceManagerError::CreateSerialManager)?;
2093                     if let Some(mut serial_manager) = serial_manager {
2094                         serial_manager
2095                             .start_thread(
2096                                 self.exit_evt
2097                                     .try_clone()
2098                                     .map_err(DeviceManagerError::EventFd)?,
2099                             )
2100                             .map_err(DeviceManagerError::SpawnSerialManager)?;
2101                         Some(Arc::new(serial_manager))
2102                     } else {
2103                         None
2104                     }
2105                 }
2106                 _ => None,
2107             };
2108         }
2109 
2110         #[cfg(target_arch = "x86_64")]
2111         {
2112             let debug_console_config = self.config.lock().unwrap().debug_console.clone();
2113             let debug_console_writer: Option<Box<dyn io::Write + Send>> =
2114                 match debug_console_config.mode {
2115                     ConsoleOutputMode::File | ConsoleOutputMode::Tty => {
2116                         if console_info.debug_main_fd.is_none() {
2117                             return Err(DeviceManagerError::InvalidConsoleInfo);
2118                         }
2119                         // SAFETY: debug_main_fd is Some, so it's safe to unwrap.
2120                         // SAFETY: debug_main_fd is guaranteed to be a valid fd from
2121                         // pre_create_console_devices() in vmm/src/console_devices.rs
2122                         Some(Box::new(unsafe {
2123                             File::from_raw_fd(console_info.debug_main_fd.unwrap())
2124                         }))
2125                     }
2126                     ConsoleOutputMode::Off
2127                     | ConsoleOutputMode::Null
2128                     | ConsoleOutputMode::Pty
2129                     | ConsoleOutputMode::Socket => None,
2130                 };
2131             if let Some(writer) = debug_console_writer {
2132                 let _ = self.add_debug_console_device(writer)?;
2133             }
2134         }
2135 
2136         let console_resizer = self.add_virtio_console_device(
2137             virtio_devices,
2138             console_info.console_main_fd,
2139             console_resize_pipe,
2140         )?;
2141 
2142         Ok(Arc::new(Console { console_resizer }))
2143     }
2144 
2145     fn add_tpm_device(
2146         &mut self,
2147         tpm_path: PathBuf,
2148     ) -> DeviceManagerResult<Arc<Mutex<devices::tpm::Tpm>>> {
2149         // Create TPM Device
2150         let tpm = devices::tpm::Tpm::new(tpm_path.to_str().unwrap().to_string()).map_err(|e| {
2151             DeviceManagerError::CreateTpmDevice(anyhow!("Failed to create TPM Device : {:?}", e))
2152         })?;
2153         let tpm = Arc::new(Mutex::new(tpm));
2154 
2155         // Add TPM Device to mmio
2156         self.address_manager
2157             .mmio_bus
2158             .insert(
2159                 tpm.clone(),
2160                 arch::layout::TPM_START.0,
2161                 arch::layout::TPM_SIZE,
2162             )
2163             .map_err(DeviceManagerError::BusError)?;
2164 
2165         Ok(tpm)
2166     }
2167 
2168     fn make_virtio_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
2169         let mut devices: Vec<MetaVirtioDevice> = Vec::new();
2170 
2171         // Create "standard" virtio devices (net/block/rng)
2172         devices.append(&mut self.make_virtio_block_devices()?);
2173         devices.append(&mut self.make_virtio_net_devices()?);
2174         devices.append(&mut self.make_virtio_rng_devices()?);
2175 
2176         // Add virtio-fs if required
2177         devices.append(&mut self.make_virtio_fs_devices()?);
2178 
2179         // Add virtio-pmem if required
2180         devices.append(&mut self.make_virtio_pmem_devices()?);
2181 
2182         // Add virtio-vsock if required
2183         devices.append(&mut self.make_virtio_vsock_devices()?);
2184 
2185         devices.append(&mut self.make_virtio_mem_devices()?);
2186 
2187         // Add virtio-balloon if required
2188         devices.append(&mut self.make_virtio_balloon_devices()?);
2189 
2190         // Add virtio-watchdog device
2191         devices.append(&mut self.make_virtio_watchdog_devices()?);
2192 
2193         // Add vDPA devices if required
2194         devices.append(&mut self.make_vdpa_devices()?);
2195 
2196         Ok(devices)
2197     }
2198 
2199     // Cache whether aio is supported to avoid checking for very block device
2200     fn aio_is_supported(&mut self) -> bool {
2201         if let Some(supported) = self.aio_supported {
2202             return supported;
2203         }
2204 
2205         let supported = block_aio_is_supported();
2206         self.aio_supported = Some(supported);
2207         supported
2208     }
2209 
2210     // Cache whether io_uring is supported to avoid probing for very block device
2211     fn io_uring_is_supported(&mut self) -> bool {
2212         if let Some(supported) = self.io_uring_supported {
2213             return supported;
2214         }
2215 
2216         let supported = block_io_uring_is_supported();
2217         self.io_uring_supported = Some(supported);
2218         supported
2219     }
2220 
2221     fn make_virtio_block_device(
2222         &mut self,
2223         disk_cfg: &mut DiskConfig,
2224     ) -> DeviceManagerResult<MetaVirtioDevice> {
2225         let id = if let Some(id) = &disk_cfg.id {
2226             id.clone()
2227         } else {
2228             let id = self.next_device_name(DISK_DEVICE_NAME_PREFIX)?;
2229             disk_cfg.id = Some(id.clone());
2230             id
2231         };
2232 
2233         info!("Creating virtio-block device: {:?}", disk_cfg);
2234 
2235         let (virtio_device, migratable_device) = if disk_cfg.vhost_user {
2236             let socket = disk_cfg.vhost_socket.as_ref().unwrap().clone();
2237             let vu_cfg = VhostUserConfig {
2238                 socket,
2239                 num_queues: disk_cfg.num_queues,
2240                 queue_size: disk_cfg.queue_size,
2241             };
2242             let vhost_user_block = Arc::new(Mutex::new(
2243                 match virtio_devices::vhost_user::Blk::new(
2244                     id.clone(),
2245                     vu_cfg,
2246                     self.seccomp_action.clone(),
2247                     self.exit_evt
2248                         .try_clone()
2249                         .map_err(DeviceManagerError::EventFd)?,
2250                     self.force_iommu,
2251                     state_from_id(self.snapshot.as_ref(), id.as_str())
2252                         .map_err(DeviceManagerError::RestoreGetState)?,
2253                 ) {
2254                     Ok(vub_device) => vub_device,
2255                     Err(e) => {
2256                         return Err(DeviceManagerError::CreateVhostUserBlk(e));
2257                     }
2258                 },
2259             ));
2260 
2261             (
2262                 Arc::clone(&vhost_user_block) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2263                 vhost_user_block as Arc<Mutex<dyn Migratable>>,
2264             )
2265         } else {
2266             let mut options = OpenOptions::new();
2267             options.read(true);
2268             options.write(!disk_cfg.readonly);
2269             if disk_cfg.direct {
2270                 options.custom_flags(libc::O_DIRECT);
2271             }
2272             // Open block device path
2273             let mut file: File = options
2274                 .open(
2275                     disk_cfg
2276                         .path
2277                         .as_ref()
2278                         .ok_or(DeviceManagerError::NoDiskPath)?
2279                         .clone(),
2280                 )
2281                 .map_err(DeviceManagerError::Disk)?;
2282             let image_type =
2283                 detect_image_type(&mut file).map_err(DeviceManagerError::DetectImageType)?;
2284 
2285             let image = match image_type {
2286                 ImageType::FixedVhd => {
2287                     // Use asynchronous backend relying on io_uring if the
2288                     // syscalls are supported.
2289                     if cfg!(feature = "io_uring")
2290                         && !disk_cfg.disable_io_uring
2291                         && self.io_uring_is_supported()
2292                     {
2293                         info!("Using asynchronous fixed VHD disk file (io_uring)");
2294 
2295                         #[cfg(not(feature = "io_uring"))]
2296                         unreachable!("Checked in if statement above");
2297                         #[cfg(feature = "io_uring")]
2298                         {
2299                             Box::new(
2300                                 FixedVhdDiskAsync::new(file)
2301                                     .map_err(DeviceManagerError::CreateFixedVhdDiskAsync)?,
2302                             ) as Box<dyn DiskFile>
2303                         }
2304                     } else {
2305                         info!("Using synchronous fixed VHD disk file");
2306                         Box::new(
2307                             FixedVhdDiskSync::new(file)
2308                                 .map_err(DeviceManagerError::CreateFixedVhdDiskSync)?,
2309                         ) as Box<dyn DiskFile>
2310                     }
2311                 }
2312                 ImageType::Raw => {
2313                     // Use asynchronous backend relying on io_uring if the
2314                     // syscalls are supported.
2315                     if cfg!(feature = "io_uring")
2316                         && !disk_cfg.disable_io_uring
2317                         && self.io_uring_is_supported()
2318                     {
2319                         info!("Using asynchronous RAW disk file (io_uring)");
2320 
2321                         #[cfg(not(feature = "io_uring"))]
2322                         unreachable!("Checked in if statement above");
2323                         #[cfg(feature = "io_uring")]
2324                         {
2325                             Box::new(RawFileDisk::new(file)) as Box<dyn DiskFile>
2326                         }
2327                     } else if !disk_cfg.disable_aio && self.aio_is_supported() {
2328                         info!("Using asynchronous RAW disk file (aio)");
2329                         Box::new(RawFileDiskAio::new(file)) as Box<dyn DiskFile>
2330                     } else {
2331                         info!("Using synchronous RAW disk file");
2332                         Box::new(RawFileDiskSync::new(file)) as Box<dyn DiskFile>
2333                     }
2334                 }
2335                 ImageType::Qcow2 => {
2336                     info!("Using synchronous QCOW disk file");
2337                     Box::new(
2338                         QcowDiskSync::new(file, disk_cfg.direct)
2339                             .map_err(DeviceManagerError::CreateQcowDiskSync)?,
2340                     ) as Box<dyn DiskFile>
2341                 }
2342                 ImageType::Vhdx => {
2343                     info!("Using synchronous VHDX disk file");
2344                     Box::new(
2345                         VhdxDiskSync::new(file)
2346                             .map_err(DeviceManagerError::CreateFixedVhdxDiskSync)?,
2347                     ) as Box<dyn DiskFile>
2348                 }
2349             };
2350 
2351             let rate_limit_group =
2352                 if let Some(rate_limiter_cfg) = disk_cfg.rate_limiter_config.as_ref() {
2353                     // Create an anonymous RateLimiterGroup that is dropped when the Disk
2354                     // is dropped.
2355                     let bw = rate_limiter_cfg.bandwidth.unwrap_or_default();
2356                     let ops = rate_limiter_cfg.ops.unwrap_or_default();
2357                     let mut rate_limit_group = RateLimiterGroup::new(
2358                         disk_cfg.id.as_ref().unwrap(),
2359                         bw.size,
2360                         bw.one_time_burst.unwrap_or(0),
2361                         bw.refill_time,
2362                         ops.size,
2363                         ops.one_time_burst.unwrap_or(0),
2364                         ops.refill_time,
2365                     )
2366                     .map_err(DeviceManagerError::RateLimiterGroupCreate)?;
2367 
2368                     rate_limit_group
2369                         .start_thread(
2370                             self.exit_evt
2371                                 .try_clone()
2372                                 .map_err(DeviceManagerError::EventFd)?,
2373                         )
2374                         .unwrap();
2375 
2376                     Some(Arc::new(rate_limit_group))
2377                 } else if let Some(rate_limit_group) = disk_cfg.rate_limit_group.as_ref() {
2378                     self.rate_limit_groups.get(rate_limit_group).cloned()
2379                 } else {
2380                     None
2381                 };
2382 
2383             let queue_affinity = if let Some(queue_affinity) = disk_cfg.queue_affinity.as_ref() {
2384                 queue_affinity
2385                     .iter()
2386                     .map(|a| (a.queue_index, a.host_cpus.clone()))
2387                     .collect()
2388             } else {
2389                 BTreeMap::new()
2390             };
2391 
2392             let virtio_block = Arc::new(Mutex::new(
2393                 virtio_devices::Block::new(
2394                     id.clone(),
2395                     image,
2396                     disk_cfg
2397                         .path
2398                         .as_ref()
2399                         .ok_or(DeviceManagerError::NoDiskPath)?
2400                         .clone(),
2401                     disk_cfg.readonly,
2402                     self.force_iommu | disk_cfg.iommu,
2403                     disk_cfg.num_queues,
2404                     disk_cfg.queue_size,
2405                     disk_cfg.serial.clone(),
2406                     self.seccomp_action.clone(),
2407                     rate_limit_group,
2408                     self.exit_evt
2409                         .try_clone()
2410                         .map_err(DeviceManagerError::EventFd)?,
2411                     state_from_id(self.snapshot.as_ref(), id.as_str())
2412                         .map_err(DeviceManagerError::RestoreGetState)?,
2413                     queue_affinity,
2414                 )
2415                 .map_err(DeviceManagerError::CreateVirtioBlock)?,
2416             ));
2417 
2418             (
2419                 Arc::clone(&virtio_block) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2420                 virtio_block as Arc<Mutex<dyn Migratable>>,
2421             )
2422         };
2423 
2424         // Fill the device tree with a new node. In case of restore, we
2425         // know there is nothing to do, so we can simply override the
2426         // existing entry.
2427         self.device_tree
2428             .lock()
2429             .unwrap()
2430             .insert(id.clone(), device_node!(id, migratable_device));
2431 
2432         Ok(MetaVirtioDevice {
2433             virtio_device,
2434             iommu: disk_cfg.iommu,
2435             id,
2436             pci_segment: disk_cfg.pci_segment,
2437             dma_handler: None,
2438         })
2439     }
2440 
2441     fn make_virtio_block_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
2442         let mut devices = Vec::new();
2443 
2444         let mut block_devices = self.config.lock().unwrap().disks.clone();
2445         if let Some(disk_list_cfg) = &mut block_devices {
2446             for disk_cfg in disk_list_cfg.iter_mut() {
2447                 devices.push(self.make_virtio_block_device(disk_cfg)?);
2448             }
2449         }
2450         self.config.lock().unwrap().disks = block_devices;
2451 
2452         Ok(devices)
2453     }
2454 
2455     fn make_virtio_net_device(
2456         &mut self,
2457         net_cfg: &mut NetConfig,
2458     ) -> DeviceManagerResult<MetaVirtioDevice> {
2459         let id = if let Some(id) = &net_cfg.id {
2460             id.clone()
2461         } else {
2462             let id = self.next_device_name(NET_DEVICE_NAME_PREFIX)?;
2463             net_cfg.id = Some(id.clone());
2464             id
2465         };
2466         info!("Creating virtio-net device: {:?}", net_cfg);
2467 
2468         let (virtio_device, migratable_device) = if net_cfg.vhost_user {
2469             let socket = net_cfg.vhost_socket.as_ref().unwrap().clone();
2470             let vu_cfg = VhostUserConfig {
2471                 socket,
2472                 num_queues: net_cfg.num_queues,
2473                 queue_size: net_cfg.queue_size,
2474             };
2475             let server = match net_cfg.vhost_mode {
2476                 VhostMode::Client => false,
2477                 VhostMode::Server => true,
2478             };
2479             let vhost_user_net = Arc::new(Mutex::new(
2480                 match virtio_devices::vhost_user::Net::new(
2481                     id.clone(),
2482                     net_cfg.mac,
2483                     net_cfg.mtu,
2484                     vu_cfg,
2485                     server,
2486                     self.seccomp_action.clone(),
2487                     self.exit_evt
2488                         .try_clone()
2489                         .map_err(DeviceManagerError::EventFd)?,
2490                     self.force_iommu,
2491                     state_from_id(self.snapshot.as_ref(), id.as_str())
2492                         .map_err(DeviceManagerError::RestoreGetState)?,
2493                     net_cfg.offload_tso,
2494                     net_cfg.offload_ufo,
2495                     net_cfg.offload_csum,
2496                 ) {
2497                     Ok(vun_device) => vun_device,
2498                     Err(e) => {
2499                         return Err(DeviceManagerError::CreateVhostUserNet(e));
2500                     }
2501                 },
2502             ));
2503 
2504             (
2505                 Arc::clone(&vhost_user_net) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2506                 vhost_user_net as Arc<Mutex<dyn Migratable>>,
2507             )
2508         } else {
2509             let state = state_from_id(self.snapshot.as_ref(), id.as_str())
2510                 .map_err(DeviceManagerError::RestoreGetState)?;
2511             let virtio_net = if let Some(ref tap_if_name) = net_cfg.tap {
2512                 Arc::new(Mutex::new(
2513                     virtio_devices::Net::new(
2514                         id.clone(),
2515                         Some(tap_if_name),
2516                         Some(net_cfg.ip),
2517                         Some(net_cfg.mask),
2518                         Some(net_cfg.mac),
2519                         &mut net_cfg.host_mac,
2520                         net_cfg.mtu,
2521                         self.force_iommu | net_cfg.iommu,
2522                         net_cfg.num_queues,
2523                         net_cfg.queue_size,
2524                         self.seccomp_action.clone(),
2525                         net_cfg.rate_limiter_config,
2526                         self.exit_evt
2527                             .try_clone()
2528                             .map_err(DeviceManagerError::EventFd)?,
2529                         state,
2530                         net_cfg.offload_tso,
2531                         net_cfg.offload_ufo,
2532                         net_cfg.offload_csum,
2533                     )
2534                     .map_err(DeviceManagerError::CreateVirtioNet)?,
2535                 ))
2536             } else if let Some(fds) = &net_cfg.fds {
2537                 let net = virtio_devices::Net::from_tap_fds(
2538                     id.clone(),
2539                     fds,
2540                     Some(net_cfg.mac),
2541                     net_cfg.mtu,
2542                     self.force_iommu | net_cfg.iommu,
2543                     net_cfg.queue_size,
2544                     self.seccomp_action.clone(),
2545                     net_cfg.rate_limiter_config,
2546                     self.exit_evt
2547                         .try_clone()
2548                         .map_err(DeviceManagerError::EventFd)?,
2549                     state,
2550                     net_cfg.offload_tso,
2551                     net_cfg.offload_ufo,
2552                     net_cfg.offload_csum,
2553                 )
2554                 .map_err(DeviceManagerError::CreateVirtioNet)?;
2555 
2556                 // SAFETY: 'fds' are valid because TAP devices are created successfully
2557                 unsafe {
2558                     self.config.lock().unwrap().add_preserved_fds(fds.clone());
2559                 }
2560 
2561                 Arc::new(Mutex::new(net))
2562             } else {
2563                 Arc::new(Mutex::new(
2564                     virtio_devices::Net::new(
2565                         id.clone(),
2566                         None,
2567                         Some(net_cfg.ip),
2568                         Some(net_cfg.mask),
2569                         Some(net_cfg.mac),
2570                         &mut net_cfg.host_mac,
2571                         net_cfg.mtu,
2572                         self.force_iommu | net_cfg.iommu,
2573                         net_cfg.num_queues,
2574                         net_cfg.queue_size,
2575                         self.seccomp_action.clone(),
2576                         net_cfg.rate_limiter_config,
2577                         self.exit_evt
2578                             .try_clone()
2579                             .map_err(DeviceManagerError::EventFd)?,
2580                         state,
2581                         net_cfg.offload_tso,
2582                         net_cfg.offload_ufo,
2583                         net_cfg.offload_csum,
2584                     )
2585                     .map_err(DeviceManagerError::CreateVirtioNet)?,
2586                 ))
2587             };
2588 
2589             (
2590                 Arc::clone(&virtio_net) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2591                 virtio_net as Arc<Mutex<dyn Migratable>>,
2592             )
2593         };
2594 
2595         // Fill the device tree with a new node. In case of restore, we
2596         // know there is nothing to do, so we can simply override the
2597         // existing entry.
2598         self.device_tree
2599             .lock()
2600             .unwrap()
2601             .insert(id.clone(), device_node!(id, migratable_device));
2602 
2603         Ok(MetaVirtioDevice {
2604             virtio_device,
2605             iommu: net_cfg.iommu,
2606             id,
2607             pci_segment: net_cfg.pci_segment,
2608             dma_handler: None,
2609         })
2610     }
2611 
2612     /// Add virto-net and vhost-user-net devices
2613     fn make_virtio_net_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
2614         let mut devices = Vec::new();
2615         let mut net_devices = self.config.lock().unwrap().net.clone();
2616         if let Some(net_list_cfg) = &mut net_devices {
2617             for net_cfg in net_list_cfg.iter_mut() {
2618                 devices.push(self.make_virtio_net_device(net_cfg)?);
2619             }
2620         }
2621         self.config.lock().unwrap().net = net_devices;
2622 
2623         Ok(devices)
2624     }
2625 
2626     fn make_virtio_rng_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
2627         let mut devices = Vec::new();
2628 
2629         // Add virtio-rng if required
2630         let rng_config = self.config.lock().unwrap().rng.clone();
2631         if let Some(rng_path) = rng_config.src.to_str() {
2632             info!("Creating virtio-rng device: {:?}", rng_config);
2633             let id = String::from(RNG_DEVICE_NAME);
2634 
2635             let virtio_rng_device = Arc::new(Mutex::new(
2636                 virtio_devices::Rng::new(
2637                     id.clone(),
2638                     rng_path,
2639                     self.force_iommu | rng_config.iommu,
2640                     self.seccomp_action.clone(),
2641                     self.exit_evt
2642                         .try_clone()
2643                         .map_err(DeviceManagerError::EventFd)?,
2644                     state_from_id(self.snapshot.as_ref(), id.as_str())
2645                         .map_err(DeviceManagerError::RestoreGetState)?,
2646                 )
2647                 .map_err(DeviceManagerError::CreateVirtioRng)?,
2648             ));
2649             devices.push(MetaVirtioDevice {
2650                 virtio_device: Arc::clone(&virtio_rng_device)
2651                     as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2652                 iommu: rng_config.iommu,
2653                 id: id.clone(),
2654                 pci_segment: 0,
2655                 dma_handler: None,
2656             });
2657 
2658             // Fill the device tree with a new node. In case of restore, we
2659             // know there is nothing to do, so we can simply override the
2660             // existing entry.
2661             self.device_tree
2662                 .lock()
2663                 .unwrap()
2664                 .insert(id.clone(), device_node!(id, virtio_rng_device));
2665         }
2666 
2667         Ok(devices)
2668     }
2669 
2670     fn make_virtio_fs_device(
2671         &mut self,
2672         fs_cfg: &mut FsConfig,
2673     ) -> DeviceManagerResult<MetaVirtioDevice> {
2674         let id = if let Some(id) = &fs_cfg.id {
2675             id.clone()
2676         } else {
2677             let id = self.next_device_name(FS_DEVICE_NAME_PREFIX)?;
2678             fs_cfg.id = Some(id.clone());
2679             id
2680         };
2681 
2682         info!("Creating virtio-fs device: {:?}", fs_cfg);
2683 
2684         let mut node = device_node!(id);
2685 
2686         if let Some(fs_socket) = fs_cfg.socket.to_str() {
2687             let virtio_fs_device = Arc::new(Mutex::new(
2688                 virtio_devices::vhost_user::Fs::new(
2689                     id.clone(),
2690                     fs_socket,
2691                     &fs_cfg.tag,
2692                     fs_cfg.num_queues,
2693                     fs_cfg.queue_size,
2694                     None,
2695                     self.seccomp_action.clone(),
2696                     self.exit_evt
2697                         .try_clone()
2698                         .map_err(DeviceManagerError::EventFd)?,
2699                     self.force_iommu,
2700                     state_from_id(self.snapshot.as_ref(), id.as_str())
2701                         .map_err(DeviceManagerError::RestoreGetState)?,
2702                 )
2703                 .map_err(DeviceManagerError::CreateVirtioFs)?,
2704             ));
2705 
2706             // Update the device tree with the migratable device.
2707             node.migratable = Some(Arc::clone(&virtio_fs_device) as Arc<Mutex<dyn Migratable>>);
2708             self.device_tree.lock().unwrap().insert(id.clone(), node);
2709 
2710             Ok(MetaVirtioDevice {
2711                 virtio_device: Arc::clone(&virtio_fs_device)
2712                     as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2713                 iommu: false,
2714                 id,
2715                 pci_segment: fs_cfg.pci_segment,
2716                 dma_handler: None,
2717             })
2718         } else {
2719             Err(DeviceManagerError::NoVirtioFsSock)
2720         }
2721     }
2722 
2723     fn make_virtio_fs_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
2724         let mut devices = Vec::new();
2725 
2726         let mut fs_devices = self.config.lock().unwrap().fs.clone();
2727         if let Some(fs_list_cfg) = &mut fs_devices {
2728             for fs_cfg in fs_list_cfg.iter_mut() {
2729                 devices.push(self.make_virtio_fs_device(fs_cfg)?);
2730             }
2731         }
2732         self.config.lock().unwrap().fs = fs_devices;
2733 
2734         Ok(devices)
2735     }
2736 
2737     fn make_virtio_pmem_device(
2738         &mut self,
2739         pmem_cfg: &mut PmemConfig,
2740     ) -> DeviceManagerResult<MetaVirtioDevice> {
2741         let id = if let Some(id) = &pmem_cfg.id {
2742             id.clone()
2743         } else {
2744             let id = self.next_device_name(PMEM_DEVICE_NAME_PREFIX)?;
2745             pmem_cfg.id = Some(id.clone());
2746             id
2747         };
2748 
2749         info!("Creating virtio-pmem device: {:?}", pmem_cfg);
2750 
2751         let mut node = device_node!(id);
2752 
2753         // Look for the id in the device tree. If it can be found, that means
2754         // the device is being restored, otherwise it's created from scratch.
2755         let region_range = if let Some(node) = self.device_tree.lock().unwrap().get(&id) {
2756             info!("Restoring virtio-pmem {} resources", id);
2757 
2758             let mut region_range: Option<(u64, u64)> = None;
2759             for resource in node.resources.iter() {
2760                 match resource {
2761                     Resource::MmioAddressRange { base, size } => {
2762                         if region_range.is_some() {
2763                             return Err(DeviceManagerError::ResourceAlreadyExists);
2764                         }
2765 
2766                         region_range = Some((*base, *size));
2767                     }
2768                     _ => {
2769                         error!("Unexpected resource {:?} for {}", resource, id);
2770                     }
2771                 }
2772             }
2773 
2774             if region_range.is_none() {
2775                 return Err(DeviceManagerError::MissingVirtioPmemResources);
2776             }
2777 
2778             region_range
2779         } else {
2780             None
2781         };
2782 
2783         let (custom_flags, set_len) = if pmem_cfg.file.is_dir() {
2784             if pmem_cfg.size.is_none() {
2785                 return Err(DeviceManagerError::PmemWithDirectorySizeMissing);
2786             }
2787             (O_TMPFILE, true)
2788         } else {
2789             (0, false)
2790         };
2791 
2792         let mut file = OpenOptions::new()
2793             .read(true)
2794             .write(!pmem_cfg.discard_writes)
2795             .custom_flags(custom_flags)
2796             .open(&pmem_cfg.file)
2797             .map_err(DeviceManagerError::PmemFileOpen)?;
2798 
2799         let size = if let Some(size) = pmem_cfg.size {
2800             if set_len {
2801                 file.set_len(size)
2802                     .map_err(DeviceManagerError::PmemFileSetLen)?;
2803             }
2804             size
2805         } else {
2806             file.seek(SeekFrom::End(0))
2807                 .map_err(DeviceManagerError::PmemFileSetLen)?
2808         };
2809 
2810         if size % 0x20_0000 != 0 {
2811             return Err(DeviceManagerError::PmemSizeNotAligned);
2812         }
2813 
2814         let (region_base, region_size) = if let Some((base, size)) = region_range {
2815             // The memory needs to be 2MiB aligned in order to support
2816             // hugepages.
2817             self.pci_segments[pmem_cfg.pci_segment as usize]
2818                 .mem64_allocator
2819                 .lock()
2820                 .unwrap()
2821                 .allocate(
2822                     Some(GuestAddress(base)),
2823                     size as GuestUsize,
2824                     Some(0x0020_0000),
2825                 )
2826                 .ok_or(DeviceManagerError::PmemRangeAllocation)?;
2827 
2828             (base, size)
2829         } else {
2830             // The memory needs to be 2MiB aligned in order to support
2831             // hugepages.
2832             let base = self.pci_segments[pmem_cfg.pci_segment as usize]
2833                 .mem64_allocator
2834                 .lock()
2835                 .unwrap()
2836                 .allocate(None, size as GuestUsize, Some(0x0020_0000))
2837                 .ok_or(DeviceManagerError::PmemRangeAllocation)?;
2838 
2839             (base.raw_value(), size)
2840         };
2841 
2842         let cloned_file = file.try_clone().map_err(DeviceManagerError::CloneFile)?;
2843         let mmap_region = MmapRegion::build(
2844             Some(FileOffset::new(cloned_file, 0)),
2845             region_size as usize,
2846             PROT_READ | PROT_WRITE,
2847             MAP_NORESERVE
2848                 | if pmem_cfg.discard_writes {
2849                     MAP_PRIVATE
2850                 } else {
2851                     MAP_SHARED
2852                 },
2853         )
2854         .map_err(DeviceManagerError::NewMmapRegion)?;
2855         let host_addr: u64 = mmap_region.as_ptr() as u64;
2856 
2857         let mem_slot = self
2858             .memory_manager
2859             .lock()
2860             .unwrap()
2861             .create_userspace_mapping(region_base, region_size, host_addr, false, false, false)
2862             .map_err(DeviceManagerError::MemoryManager)?;
2863 
2864         let mapping = virtio_devices::UserspaceMapping {
2865             host_addr,
2866             mem_slot,
2867             addr: GuestAddress(region_base),
2868             len: region_size,
2869             mergeable: false,
2870         };
2871 
2872         let virtio_pmem_device = Arc::new(Mutex::new(
2873             virtio_devices::Pmem::new(
2874                 id.clone(),
2875                 file,
2876                 GuestAddress(region_base),
2877                 mapping,
2878                 mmap_region,
2879                 self.force_iommu | pmem_cfg.iommu,
2880                 self.seccomp_action.clone(),
2881                 self.exit_evt
2882                     .try_clone()
2883                     .map_err(DeviceManagerError::EventFd)?,
2884                 state_from_id(self.snapshot.as_ref(), id.as_str())
2885                     .map_err(DeviceManagerError::RestoreGetState)?,
2886             )
2887             .map_err(DeviceManagerError::CreateVirtioPmem)?,
2888         ));
2889 
2890         // Update the device tree with correct resource information and with
2891         // the migratable device.
2892         node.resources.push(Resource::MmioAddressRange {
2893             base: region_base,
2894             size: region_size,
2895         });
2896         node.migratable = Some(Arc::clone(&virtio_pmem_device) as Arc<Mutex<dyn Migratable>>);
2897         self.device_tree.lock().unwrap().insert(id.clone(), node);
2898 
2899         Ok(MetaVirtioDevice {
2900             virtio_device: Arc::clone(&virtio_pmem_device)
2901                 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2902             iommu: pmem_cfg.iommu,
2903             id,
2904             pci_segment: pmem_cfg.pci_segment,
2905             dma_handler: None,
2906         })
2907     }
2908 
2909     fn make_virtio_pmem_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
2910         let mut devices = Vec::new();
2911         // Add virtio-pmem if required
2912         let mut pmem_devices = self.config.lock().unwrap().pmem.clone();
2913         if let Some(pmem_list_cfg) = &mut pmem_devices {
2914             for pmem_cfg in pmem_list_cfg.iter_mut() {
2915                 devices.push(self.make_virtio_pmem_device(pmem_cfg)?);
2916             }
2917         }
2918         self.config.lock().unwrap().pmem = pmem_devices;
2919 
2920         Ok(devices)
2921     }
2922 
2923     fn make_virtio_vsock_device(
2924         &mut self,
2925         vsock_cfg: &mut VsockConfig,
2926     ) -> DeviceManagerResult<MetaVirtioDevice> {
2927         let id = if let Some(id) = &vsock_cfg.id {
2928             id.clone()
2929         } else {
2930             let id = self.next_device_name(VSOCK_DEVICE_NAME_PREFIX)?;
2931             vsock_cfg.id = Some(id.clone());
2932             id
2933         };
2934 
2935         info!("Creating virtio-vsock device: {:?}", vsock_cfg);
2936 
2937         let socket_path = vsock_cfg
2938             .socket
2939             .to_str()
2940             .ok_or(DeviceManagerError::CreateVsockConvertPath)?;
2941         let backend =
2942             virtio_devices::vsock::VsockUnixBackend::new(vsock_cfg.cid, socket_path.to_string())
2943                 .map_err(DeviceManagerError::CreateVsockBackend)?;
2944 
2945         let vsock_device = Arc::new(Mutex::new(
2946             virtio_devices::Vsock::new(
2947                 id.clone(),
2948                 vsock_cfg.cid,
2949                 vsock_cfg.socket.clone(),
2950                 backend,
2951                 self.force_iommu | vsock_cfg.iommu,
2952                 self.seccomp_action.clone(),
2953                 self.exit_evt
2954                     .try_clone()
2955                     .map_err(DeviceManagerError::EventFd)?,
2956                 state_from_id(self.snapshot.as_ref(), id.as_str())
2957                     .map_err(DeviceManagerError::RestoreGetState)?,
2958             )
2959             .map_err(DeviceManagerError::CreateVirtioVsock)?,
2960         ));
2961 
2962         // Fill the device tree with a new node. In case of restore, we
2963         // know there is nothing to do, so we can simply override the
2964         // existing entry.
2965         self.device_tree
2966             .lock()
2967             .unwrap()
2968             .insert(id.clone(), device_node!(id, vsock_device));
2969 
2970         Ok(MetaVirtioDevice {
2971             virtio_device: Arc::clone(&vsock_device)
2972                 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2973             iommu: vsock_cfg.iommu,
2974             id,
2975             pci_segment: vsock_cfg.pci_segment,
2976             dma_handler: None,
2977         })
2978     }
2979 
2980     fn make_virtio_vsock_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
2981         let mut devices = Vec::new();
2982 
2983         let mut vsock = self.config.lock().unwrap().vsock.clone();
2984         if let Some(ref mut vsock_cfg) = &mut vsock {
2985             devices.push(self.make_virtio_vsock_device(vsock_cfg)?);
2986         }
2987         self.config.lock().unwrap().vsock = vsock;
2988 
2989         Ok(devices)
2990     }
2991 
2992     fn make_virtio_mem_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
2993         let mut devices = Vec::new();
2994 
2995         let mm = self.memory_manager.clone();
2996         let mut mm = mm.lock().unwrap();
2997         for (memory_zone_id, memory_zone) in mm.memory_zones_mut().iter_mut() {
2998             if let Some(virtio_mem_zone) = memory_zone.virtio_mem_zone_mut() {
2999                 info!("Creating virtio-mem device: id = {}", memory_zone_id);
3000 
3001                 let node_id = numa_node_id_from_memory_zone_id(&self.numa_nodes, memory_zone_id)
3002                     .map(|i| i as u16);
3003 
3004                 let virtio_mem_device = Arc::new(Mutex::new(
3005                     virtio_devices::Mem::new(
3006                         memory_zone_id.clone(),
3007                         virtio_mem_zone.region(),
3008                         self.seccomp_action.clone(),
3009                         node_id,
3010                         virtio_mem_zone.hotplugged_size(),
3011                         virtio_mem_zone.hugepages(),
3012                         self.exit_evt
3013                             .try_clone()
3014                             .map_err(DeviceManagerError::EventFd)?,
3015                         virtio_mem_zone.blocks_state().clone(),
3016                         state_from_id(self.snapshot.as_ref(), memory_zone_id.as_str())
3017                             .map_err(DeviceManagerError::RestoreGetState)?,
3018                     )
3019                     .map_err(DeviceManagerError::CreateVirtioMem)?,
3020                 ));
3021 
3022                 // Update the virtio-mem zone so that it has a handle onto the
3023                 // virtio-mem device, which will be used for triggering a resize
3024                 // if needed.
3025                 virtio_mem_zone.set_virtio_device(Arc::clone(&virtio_mem_device));
3026 
3027                 self.virtio_mem_devices.push(Arc::clone(&virtio_mem_device));
3028 
3029                 devices.push(MetaVirtioDevice {
3030                     virtio_device: Arc::clone(&virtio_mem_device)
3031                         as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
3032                     iommu: false,
3033                     id: memory_zone_id.clone(),
3034                     pci_segment: 0,
3035                     dma_handler: None,
3036                 });
3037 
3038                 // Fill the device tree with a new node. In case of restore, we
3039                 // know there is nothing to do, so we can simply override the
3040                 // existing entry.
3041                 self.device_tree.lock().unwrap().insert(
3042                     memory_zone_id.clone(),
3043                     device_node!(memory_zone_id, virtio_mem_device),
3044                 );
3045             }
3046         }
3047 
3048         Ok(devices)
3049     }
3050 
3051     fn make_virtio_balloon_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
3052         let mut devices = Vec::new();
3053 
3054         if let Some(balloon_config) = &self.config.lock().unwrap().balloon {
3055             let id = String::from(BALLOON_DEVICE_NAME);
3056             info!("Creating virtio-balloon device: id = {}", id);
3057 
3058             let virtio_balloon_device = Arc::new(Mutex::new(
3059                 virtio_devices::Balloon::new(
3060                     id.clone(),
3061                     balloon_config.size,
3062                     balloon_config.deflate_on_oom,
3063                     balloon_config.free_page_reporting,
3064                     self.seccomp_action.clone(),
3065                     self.exit_evt
3066                         .try_clone()
3067                         .map_err(DeviceManagerError::EventFd)?,
3068                     state_from_id(self.snapshot.as_ref(), id.as_str())
3069                         .map_err(DeviceManagerError::RestoreGetState)?,
3070                 )
3071                 .map_err(DeviceManagerError::CreateVirtioBalloon)?,
3072             ));
3073 
3074             self.balloon = Some(virtio_balloon_device.clone());
3075 
3076             devices.push(MetaVirtioDevice {
3077                 virtio_device: Arc::clone(&virtio_balloon_device)
3078                     as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
3079                 iommu: false,
3080                 id: id.clone(),
3081                 pci_segment: 0,
3082                 dma_handler: None,
3083             });
3084 
3085             self.device_tree
3086                 .lock()
3087                 .unwrap()
3088                 .insert(id.clone(), device_node!(id, virtio_balloon_device));
3089         }
3090 
3091         Ok(devices)
3092     }
3093 
3094     fn make_virtio_watchdog_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
3095         let mut devices = Vec::new();
3096 
3097         if !self.config.lock().unwrap().watchdog {
3098             return Ok(devices);
3099         }
3100 
3101         let id = String::from(WATCHDOG_DEVICE_NAME);
3102         info!("Creating virtio-watchdog device: id = {}", id);
3103 
3104         let virtio_watchdog_device = Arc::new(Mutex::new(
3105             virtio_devices::Watchdog::new(
3106                 id.clone(),
3107                 self.reset_evt.try_clone().unwrap(),
3108                 self.seccomp_action.clone(),
3109                 self.exit_evt
3110                     .try_clone()
3111                     .map_err(DeviceManagerError::EventFd)?,
3112                 state_from_id(self.snapshot.as_ref(), id.as_str())
3113                     .map_err(DeviceManagerError::RestoreGetState)?,
3114             )
3115             .map_err(DeviceManagerError::CreateVirtioWatchdog)?,
3116         ));
3117         devices.push(MetaVirtioDevice {
3118             virtio_device: Arc::clone(&virtio_watchdog_device)
3119                 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
3120             iommu: false,
3121             id: id.clone(),
3122             pci_segment: 0,
3123             dma_handler: None,
3124         });
3125 
3126         self.device_tree
3127             .lock()
3128             .unwrap()
3129             .insert(id.clone(), device_node!(id, virtio_watchdog_device));
3130 
3131         Ok(devices)
3132     }
3133 
3134     fn make_vdpa_device(
3135         &mut self,
3136         vdpa_cfg: &mut VdpaConfig,
3137     ) -> DeviceManagerResult<MetaVirtioDevice> {
3138         let id = if let Some(id) = &vdpa_cfg.id {
3139             id.clone()
3140         } else {
3141             let id = self.next_device_name(VDPA_DEVICE_NAME_PREFIX)?;
3142             vdpa_cfg.id = Some(id.clone());
3143             id
3144         };
3145 
3146         info!("Creating vDPA device: {:?}", vdpa_cfg);
3147 
3148         let device_path = vdpa_cfg
3149             .path
3150             .to_str()
3151             .ok_or(DeviceManagerError::CreateVdpaConvertPath)?;
3152 
3153         let vdpa_device = Arc::new(Mutex::new(
3154             virtio_devices::Vdpa::new(
3155                 id.clone(),
3156                 device_path,
3157                 self.memory_manager.lock().unwrap().guest_memory(),
3158                 vdpa_cfg.num_queues as u16,
3159                 state_from_id(self.snapshot.as_ref(), id.as_str())
3160                     .map_err(DeviceManagerError::RestoreGetState)?,
3161             )
3162             .map_err(DeviceManagerError::CreateVdpa)?,
3163         ));
3164 
3165         // Create the DMA handler that is required by the vDPA device
3166         let vdpa_mapping = Arc::new(VdpaDmaMapping::new(
3167             Arc::clone(&vdpa_device),
3168             Arc::new(self.memory_manager.lock().unwrap().guest_memory()),
3169         ));
3170 
3171         self.device_tree
3172             .lock()
3173             .unwrap()
3174             .insert(id.clone(), device_node!(id, vdpa_device));
3175 
3176         Ok(MetaVirtioDevice {
3177             virtio_device: vdpa_device as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
3178             iommu: vdpa_cfg.iommu,
3179             id,
3180             pci_segment: vdpa_cfg.pci_segment,
3181             dma_handler: Some(vdpa_mapping),
3182         })
3183     }
3184 
3185     fn make_vdpa_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
3186         let mut devices = Vec::new();
3187         // Add vdpa if required
3188         let mut vdpa_devices = self.config.lock().unwrap().vdpa.clone();
3189         if let Some(vdpa_list_cfg) = &mut vdpa_devices {
3190             for vdpa_cfg in vdpa_list_cfg.iter_mut() {
3191                 devices.push(self.make_vdpa_device(vdpa_cfg)?);
3192             }
3193         }
3194         self.config.lock().unwrap().vdpa = vdpa_devices;
3195 
3196         Ok(devices)
3197     }
3198 
3199     fn next_device_name(&mut self, prefix: &str) -> DeviceManagerResult<String> {
3200         let start_id = self.device_id_cnt;
3201         loop {
3202             // Generate the temporary name.
3203             let name = format!("{}{}", prefix, self.device_id_cnt);
3204             // Increment the counter.
3205             self.device_id_cnt += Wrapping(1);
3206             // Check if the name is already in use.
3207             if !self.boot_id_list.contains(&name)
3208                 && !self.device_tree.lock().unwrap().contains_key(&name)
3209             {
3210                 return Ok(name);
3211             }
3212 
3213             if self.device_id_cnt == start_id {
3214                 // We went through a full loop and there's nothing else we can
3215                 // do.
3216                 break;
3217             }
3218         }
3219         Err(DeviceManagerError::NoAvailableDeviceName)
3220     }
3221 
3222     fn add_passthrough_device(
3223         &mut self,
3224         device_cfg: &mut DeviceConfig,
3225     ) -> DeviceManagerResult<(PciBdf, String)> {
3226         // If the passthrough device has not been created yet, it is created
3227         // here and stored in the DeviceManager structure for future needs.
3228         if self.passthrough_device.is_none() {
3229             self.passthrough_device = Some(
3230                 self.address_manager
3231                     .vm
3232                     .create_passthrough_device()
3233                     .map_err(|e| DeviceManagerError::CreatePassthroughDevice(e.into()))?,
3234             );
3235         }
3236 
3237         self.add_vfio_device(device_cfg)
3238     }
3239 
3240     fn create_vfio_container(&self) -> DeviceManagerResult<Arc<VfioContainer>> {
3241         let passthrough_device = self
3242             .passthrough_device
3243             .as_ref()
3244             .ok_or(DeviceManagerError::NoDevicePassthroughSupport)?;
3245 
3246         let dup = passthrough_device
3247             .try_clone()
3248             .map_err(DeviceManagerError::VfioCreate)?;
3249 
3250         Ok(Arc::new(
3251             VfioContainer::new(Some(Arc::new(dup))).map_err(DeviceManagerError::VfioCreate)?,
3252         ))
3253     }
3254 
3255     fn add_vfio_device(
3256         &mut self,
3257         device_cfg: &mut DeviceConfig,
3258     ) -> DeviceManagerResult<(PciBdf, String)> {
3259         let vfio_name = if let Some(id) = &device_cfg.id {
3260             id.clone()
3261         } else {
3262             let id = self.next_device_name(VFIO_DEVICE_NAME_PREFIX)?;
3263             device_cfg.id = Some(id.clone());
3264             id
3265         };
3266 
3267         let (pci_segment_id, pci_device_bdf, resources) =
3268             self.pci_resources(&vfio_name, device_cfg.pci_segment)?;
3269 
3270         let mut needs_dma_mapping = false;
3271 
3272         // Here we create a new VFIO container for two reasons. Either this is
3273         // the first VFIO device, meaning we need a new VFIO container, which
3274         // will be shared with other VFIO devices. Or the new VFIO device is
3275         // attached to a vIOMMU, meaning we must create a dedicated VFIO
3276         // container. In the vIOMMU use case, we can't let all devices under
3277         // the same VFIO container since we couldn't map/unmap memory for each
3278         // device. That's simply because the map/unmap operations happen at the
3279         // VFIO container level.
3280         let vfio_container = if device_cfg.iommu {
3281             let vfio_container = self.create_vfio_container()?;
3282 
3283             let vfio_mapping = Arc::new(VfioDmaMapping::new(
3284                 Arc::clone(&vfio_container),
3285                 Arc::new(self.memory_manager.lock().unwrap().guest_memory()),
3286                 Arc::clone(&self.mmio_regions),
3287             ));
3288 
3289             if let Some(iommu) = &self.iommu_device {
3290                 iommu
3291                     .lock()
3292                     .unwrap()
3293                     .add_external_mapping(pci_device_bdf.into(), vfio_mapping);
3294             } else {
3295                 return Err(DeviceManagerError::MissingVirtualIommu);
3296             }
3297 
3298             vfio_container
3299         } else if let Some(vfio_container) = &self.vfio_container {
3300             Arc::clone(vfio_container)
3301         } else {
3302             let vfio_container = self.create_vfio_container()?;
3303             needs_dma_mapping = true;
3304             self.vfio_container = Some(Arc::clone(&vfio_container));
3305 
3306             vfio_container
3307         };
3308 
3309         let vfio_device = VfioDevice::new(&device_cfg.path, Arc::clone(&vfio_container))
3310             .map_err(DeviceManagerError::VfioCreate)?;
3311 
3312         if needs_dma_mapping {
3313             // Register DMA mapping in IOMMU.
3314             // Do not register virtio-mem regions, as they are handled directly by
3315             // virtio-mem device itself.
3316             for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() {
3317                 for region in zone.regions() {
3318                     vfio_container
3319                         .vfio_dma_map(
3320                             region.start_addr().raw_value(),
3321                             region.len(),
3322                             region.as_ptr() as u64,
3323                         )
3324                         .map_err(DeviceManagerError::VfioDmaMap)?;
3325                 }
3326             }
3327 
3328             let vfio_mapping = Arc::new(VfioDmaMapping::new(
3329                 Arc::clone(&vfio_container),
3330                 Arc::new(self.memory_manager.lock().unwrap().guest_memory()),
3331                 Arc::clone(&self.mmio_regions),
3332             ));
3333 
3334             for virtio_mem_device in self.virtio_mem_devices.iter() {
3335                 virtio_mem_device
3336                     .lock()
3337                     .unwrap()
3338                     .add_dma_mapping_handler(
3339                         VirtioMemMappingSource::Container,
3340                         vfio_mapping.clone(),
3341                     )
3342                     .map_err(DeviceManagerError::AddDmaMappingHandlerVirtioMem)?;
3343             }
3344         }
3345 
3346         let legacy_interrupt_group =
3347             if let Some(legacy_interrupt_manager) = &self.legacy_interrupt_manager {
3348                 Some(
3349                     legacy_interrupt_manager
3350                         .create_group(LegacyIrqGroupConfig {
3351                             irq: self.pci_segments[pci_segment_id as usize].pci_irq_slots
3352                                 [pci_device_bdf.device() as usize]
3353                                 as InterruptIndex,
3354                         })
3355                         .map_err(DeviceManagerError::CreateInterruptGroup)?,
3356                 )
3357             } else {
3358                 None
3359             };
3360 
3361         let memory_manager = self.memory_manager.clone();
3362 
3363         let vfio_pci_device = VfioPciDevice::new(
3364             vfio_name.clone(),
3365             &self.address_manager.vm,
3366             vfio_device,
3367             vfio_container,
3368             self.msi_interrupt_manager.clone(),
3369             legacy_interrupt_group,
3370             device_cfg.iommu,
3371             pci_device_bdf,
3372             Arc::new(move || memory_manager.lock().unwrap().allocate_memory_slot()),
3373             vm_migration::snapshot_from_id(self.snapshot.as_ref(), vfio_name.as_str()),
3374             device_cfg.x_nv_gpudirect_clique,
3375         )
3376         .map_err(DeviceManagerError::VfioPciCreate)?;
3377 
3378         let vfio_pci_device = Arc::new(Mutex::new(vfio_pci_device));
3379 
3380         let new_resources = self.add_pci_device(
3381             vfio_pci_device.clone(),
3382             vfio_pci_device.clone(),
3383             pci_segment_id,
3384             pci_device_bdf,
3385             resources,
3386         )?;
3387 
3388         vfio_pci_device
3389             .lock()
3390             .unwrap()
3391             .map_mmio_regions()
3392             .map_err(DeviceManagerError::VfioMapRegion)?;
3393 
3394         for mmio_region in vfio_pci_device.lock().unwrap().mmio_regions() {
3395             self.mmio_regions.lock().unwrap().push(mmio_region);
3396         }
3397 
3398         let mut node = device_node!(vfio_name, vfio_pci_device);
3399 
3400         // Update the device tree with correct resource information.
3401         node.resources = new_resources;
3402         node.pci_bdf = Some(pci_device_bdf);
3403         node.pci_device_handle = Some(PciDeviceHandle::Vfio(vfio_pci_device));
3404 
3405         self.device_tree
3406             .lock()
3407             .unwrap()
3408             .insert(vfio_name.clone(), node);
3409 
3410         Ok((pci_device_bdf, vfio_name))
3411     }
3412 
3413     fn add_pci_device(
3414         &mut self,
3415         bus_device: Arc<Mutex<dyn BusDevice>>,
3416         pci_device: Arc<Mutex<dyn PciDevice>>,
3417         segment_id: u16,
3418         bdf: PciBdf,
3419         resources: Option<Vec<Resource>>,
3420     ) -> DeviceManagerResult<Vec<Resource>> {
3421         let bars = pci_device
3422             .lock()
3423             .unwrap()
3424             .allocate_bars(
3425                 &self.address_manager.allocator,
3426                 &mut self.pci_segments[segment_id as usize]
3427                     .mem32_allocator
3428                     .lock()
3429                     .unwrap(),
3430                 &mut self.pci_segments[segment_id as usize]
3431                     .mem64_allocator
3432                     .lock()
3433                     .unwrap(),
3434                 resources,
3435             )
3436             .map_err(DeviceManagerError::AllocateBars)?;
3437 
3438         let mut pci_bus = self.pci_segments[segment_id as usize]
3439             .pci_bus
3440             .lock()
3441             .unwrap();
3442 
3443         pci_bus
3444             .add_device(bdf.device() as u32, pci_device)
3445             .map_err(DeviceManagerError::AddPciDevice)?;
3446 
3447         self.bus_devices.push(Arc::clone(&bus_device));
3448 
3449         pci_bus
3450             .register_mapping(
3451                 bus_device,
3452                 #[cfg(target_arch = "x86_64")]
3453                 self.address_manager.io_bus.as_ref(),
3454                 self.address_manager.mmio_bus.as_ref(),
3455                 bars.clone(),
3456             )
3457             .map_err(DeviceManagerError::AddPciDevice)?;
3458 
3459         let mut new_resources = Vec::new();
3460         for bar in bars {
3461             new_resources.push(Resource::PciBar {
3462                 index: bar.idx(),
3463                 base: bar.addr(),
3464                 size: bar.size(),
3465                 type_: bar.region_type().into(),
3466                 prefetchable: bar.prefetchable().into(),
3467             });
3468         }
3469 
3470         Ok(new_resources)
3471     }
3472 
3473     fn add_vfio_devices(&mut self) -> DeviceManagerResult<Vec<PciBdf>> {
3474         let mut iommu_attached_device_ids = Vec::new();
3475         let mut devices = self.config.lock().unwrap().devices.clone();
3476 
3477         if let Some(device_list_cfg) = &mut devices {
3478             for device_cfg in device_list_cfg.iter_mut() {
3479                 let (device_id, _) = self.add_passthrough_device(device_cfg)?;
3480                 if device_cfg.iommu && self.iommu_device.is_some() {
3481                     iommu_attached_device_ids.push(device_id);
3482                 }
3483             }
3484         }
3485 
3486         // Update the list of devices
3487         self.config.lock().unwrap().devices = devices;
3488 
3489         Ok(iommu_attached_device_ids)
3490     }
3491 
3492     fn add_vfio_user_device(
3493         &mut self,
3494         device_cfg: &mut UserDeviceConfig,
3495     ) -> DeviceManagerResult<(PciBdf, String)> {
3496         let vfio_user_name = if let Some(id) = &device_cfg.id {
3497             id.clone()
3498         } else {
3499             let id = self.next_device_name(VFIO_USER_DEVICE_NAME_PREFIX)?;
3500             device_cfg.id = Some(id.clone());
3501             id
3502         };
3503 
3504         let (pci_segment_id, pci_device_bdf, resources) =
3505             self.pci_resources(&vfio_user_name, device_cfg.pci_segment)?;
3506 
3507         let legacy_interrupt_group =
3508             if let Some(legacy_interrupt_manager) = &self.legacy_interrupt_manager {
3509                 Some(
3510                     legacy_interrupt_manager
3511                         .create_group(LegacyIrqGroupConfig {
3512                             irq: self.pci_segments[pci_segment_id as usize].pci_irq_slots
3513                                 [pci_device_bdf.device() as usize]
3514                                 as InterruptIndex,
3515                         })
3516                         .map_err(DeviceManagerError::CreateInterruptGroup)?,
3517                 )
3518             } else {
3519                 None
3520             };
3521 
3522         let client = Arc::new(Mutex::new(
3523             vfio_user::Client::new(&device_cfg.socket)
3524                 .map_err(DeviceManagerError::VfioUserCreateClient)?,
3525         ));
3526 
3527         let memory_manager = self.memory_manager.clone();
3528 
3529         let mut vfio_user_pci_device = VfioUserPciDevice::new(
3530             vfio_user_name.clone(),
3531             &self.address_manager.vm,
3532             client.clone(),
3533             self.msi_interrupt_manager.clone(),
3534             legacy_interrupt_group,
3535             pci_device_bdf,
3536             Arc::new(move || memory_manager.lock().unwrap().allocate_memory_slot()),
3537             vm_migration::snapshot_from_id(self.snapshot.as_ref(), vfio_user_name.as_str()),
3538         )
3539         .map_err(DeviceManagerError::VfioUserCreate)?;
3540 
3541         let memory = self.memory_manager.lock().unwrap().guest_memory();
3542         let vfio_user_mapping = Arc::new(VfioUserDmaMapping::new(client, Arc::new(memory)));
3543         for virtio_mem_device in self.virtio_mem_devices.iter() {
3544             virtio_mem_device
3545                 .lock()
3546                 .unwrap()
3547                 .add_dma_mapping_handler(
3548                     VirtioMemMappingSource::Device(pci_device_bdf.into()),
3549                     vfio_user_mapping.clone(),
3550                 )
3551                 .map_err(DeviceManagerError::AddDmaMappingHandlerVirtioMem)?;
3552         }
3553 
3554         for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() {
3555             for region in zone.regions() {
3556                 vfio_user_pci_device
3557                     .dma_map(region)
3558                     .map_err(DeviceManagerError::VfioUserDmaMap)?;
3559             }
3560         }
3561 
3562         let vfio_user_pci_device = Arc::new(Mutex::new(vfio_user_pci_device));
3563 
3564         let new_resources = self.add_pci_device(
3565             vfio_user_pci_device.clone(),
3566             vfio_user_pci_device.clone(),
3567             pci_segment_id,
3568             pci_device_bdf,
3569             resources,
3570         )?;
3571 
3572         // Note it is required to call 'add_pci_device()' in advance to have the list of
3573         // mmio regions provisioned correctly
3574         vfio_user_pci_device
3575             .lock()
3576             .unwrap()
3577             .map_mmio_regions()
3578             .map_err(DeviceManagerError::VfioUserMapRegion)?;
3579 
3580         let mut node = device_node!(vfio_user_name, vfio_user_pci_device);
3581 
3582         // Update the device tree with correct resource information.
3583         node.resources = new_resources;
3584         node.pci_bdf = Some(pci_device_bdf);
3585         node.pci_device_handle = Some(PciDeviceHandle::VfioUser(vfio_user_pci_device));
3586 
3587         self.device_tree
3588             .lock()
3589             .unwrap()
3590             .insert(vfio_user_name.clone(), node);
3591 
3592         Ok((pci_device_bdf, vfio_user_name))
3593     }
3594 
3595     fn add_user_devices(&mut self) -> DeviceManagerResult<Vec<PciBdf>> {
3596         let mut user_devices = self.config.lock().unwrap().user_devices.clone();
3597 
3598         if let Some(device_list_cfg) = &mut user_devices {
3599             for device_cfg in device_list_cfg.iter_mut() {
3600                 let (_device_id, _id) = self.add_vfio_user_device(device_cfg)?;
3601             }
3602         }
3603 
3604         // Update the list of devices
3605         self.config.lock().unwrap().user_devices = user_devices;
3606 
3607         Ok(vec![])
3608     }
3609 
3610     fn add_virtio_pci_device(
3611         &mut self,
3612         virtio_device: Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
3613         iommu_mapping: &Option<Arc<IommuMapping>>,
3614         virtio_device_id: String,
3615         pci_segment_id: u16,
3616         dma_handler: Option<Arc<dyn ExternalDmaMapping>>,
3617     ) -> DeviceManagerResult<PciBdf> {
3618         let id = format!("{VIRTIO_PCI_DEVICE_NAME_PREFIX}-{virtio_device_id}");
3619 
3620         // Add the new virtio-pci node to the device tree.
3621         let mut node = device_node!(id);
3622         node.children = vec![virtio_device_id.clone()];
3623 
3624         let (pci_segment_id, pci_device_bdf, resources) =
3625             self.pci_resources(&id, pci_segment_id)?;
3626 
3627         // Update the existing virtio node by setting the parent.
3628         if let Some(node) = self.device_tree.lock().unwrap().get_mut(&virtio_device_id) {
3629             node.parent = Some(id.clone());
3630         } else {
3631             return Err(DeviceManagerError::MissingNode);
3632         }
3633 
3634         // Allows support for one MSI-X vector per queue. It also adds 1
3635         // as we need to take into account the dedicated vector to notify
3636         // about a virtio config change.
3637         let msix_num = (virtio_device.lock().unwrap().queue_max_sizes().len() + 1) as u16;
3638 
3639         // Create the AccessPlatform trait from the implementation IommuMapping.
3640         // This will provide address translation for any virtio device sitting
3641         // behind a vIOMMU.
3642         let access_platform: Option<Arc<dyn AccessPlatform>> = if let Some(mapping) = iommu_mapping
3643         {
3644             Some(Arc::new(AccessPlatformMapping::new(
3645                 pci_device_bdf.into(),
3646                 mapping.clone(),
3647             )))
3648         } else {
3649             None
3650         };
3651 
3652         let memory = self.memory_manager.lock().unwrap().guest_memory();
3653 
3654         // Map DMA ranges if a DMA handler is available and if the device is
3655         // not attached to a virtual IOMMU.
3656         if let Some(dma_handler) = &dma_handler {
3657             if iommu_mapping.is_some() {
3658                 if let Some(iommu) = &self.iommu_device {
3659                     iommu
3660                         .lock()
3661                         .unwrap()
3662                         .add_external_mapping(pci_device_bdf.into(), dma_handler.clone());
3663                 } else {
3664                     return Err(DeviceManagerError::MissingVirtualIommu);
3665                 }
3666             } else {
3667                 // Let every virtio-mem device handle the DMA map/unmap through the
3668                 // DMA handler provided.
3669                 for virtio_mem_device in self.virtio_mem_devices.iter() {
3670                     virtio_mem_device
3671                         .lock()
3672                         .unwrap()
3673                         .add_dma_mapping_handler(
3674                             VirtioMemMappingSource::Device(pci_device_bdf.into()),
3675                             dma_handler.clone(),
3676                         )
3677                         .map_err(DeviceManagerError::AddDmaMappingHandlerVirtioMem)?;
3678                 }
3679 
3680                 // Do not register virtio-mem regions, as they are handled directly by
3681                 // virtio-mem devices.
3682                 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() {
3683                     for region in zone.regions() {
3684                         let gpa = region.start_addr().0;
3685                         let size = region.len();
3686                         dma_handler
3687                             .map(gpa, gpa, size)
3688                             .map_err(DeviceManagerError::VirtioDmaMap)?;
3689                     }
3690                 }
3691             }
3692         }
3693 
3694         let device_type = virtio_device.lock().unwrap().device_type();
3695         let virtio_pci_device = Arc::new(Mutex::new(
3696             VirtioPciDevice::new(
3697                 id.clone(),
3698                 memory,
3699                 virtio_device,
3700                 msix_num,
3701                 access_platform,
3702                 &self.msi_interrupt_manager,
3703                 pci_device_bdf.into(),
3704                 self.activate_evt
3705                     .try_clone()
3706                     .map_err(DeviceManagerError::EventFd)?,
3707                 // All device types *except* virtio block devices should be allocated a 64-bit bar
3708                 // The block devices should be given a 32-bit BAR so that they are easily accessible
3709                 // to firmware without requiring excessive identity mapping.
3710                 // The exception being if not on the default PCI segment.
3711                 pci_segment_id > 0 || device_type != VirtioDeviceType::Block as u32,
3712                 dma_handler,
3713                 self.pending_activations.clone(),
3714                 vm_migration::snapshot_from_id(self.snapshot.as_ref(), id.as_str()),
3715             )
3716             .map_err(DeviceManagerError::VirtioDevice)?,
3717         ));
3718 
3719         let new_resources = self.add_pci_device(
3720             virtio_pci_device.clone(),
3721             virtio_pci_device.clone(),
3722             pci_segment_id,
3723             pci_device_bdf,
3724             resources,
3725         )?;
3726 
3727         let bar_addr = virtio_pci_device.lock().unwrap().config_bar_addr();
3728         for (event, addr) in virtio_pci_device.lock().unwrap().ioeventfds(bar_addr) {
3729             let io_addr = IoEventAddress::Mmio(addr);
3730             self.address_manager
3731                 .vm
3732                 .register_ioevent(event, &io_addr, None)
3733                 .map_err(|e| DeviceManagerError::RegisterIoevent(e.into()))?;
3734         }
3735 
3736         // Update the device tree with correct resource information.
3737         node.resources = new_resources;
3738         node.migratable = Some(Arc::clone(&virtio_pci_device) as Arc<Mutex<dyn Migratable>>);
3739         node.pci_bdf = Some(pci_device_bdf);
3740         node.pci_device_handle = Some(PciDeviceHandle::Virtio(virtio_pci_device));
3741         self.device_tree.lock().unwrap().insert(id, node);
3742 
3743         Ok(pci_device_bdf)
3744     }
3745 
3746     fn add_pvpanic_device(
3747         &mut self,
3748     ) -> DeviceManagerResult<Option<Arc<Mutex<devices::PvPanicDevice>>>> {
3749         let id = String::from(PVPANIC_DEVICE_NAME);
3750         let pci_segment_id = 0x0_u16;
3751 
3752         info!("Creating pvpanic device {}", id);
3753 
3754         let (pci_segment_id, pci_device_bdf, resources) =
3755             self.pci_resources(&id, pci_segment_id)?;
3756 
3757         let snapshot = snapshot_from_id(self.snapshot.as_ref(), id.as_str());
3758 
3759         let pvpanic_device = devices::PvPanicDevice::new(id.clone(), snapshot)
3760             .map_err(DeviceManagerError::PvPanicCreate)?;
3761 
3762         let pvpanic_device = Arc::new(Mutex::new(pvpanic_device));
3763 
3764         let new_resources = self.add_pci_device(
3765             pvpanic_device.clone(),
3766             pvpanic_device.clone(),
3767             pci_segment_id,
3768             pci_device_bdf,
3769             resources,
3770         )?;
3771 
3772         let mut node = device_node!(id, pvpanic_device);
3773 
3774         node.resources = new_resources;
3775         node.pci_bdf = Some(pci_device_bdf);
3776         node.pci_device_handle = None;
3777 
3778         self.device_tree.lock().unwrap().insert(id, node);
3779 
3780         Ok(Some(pvpanic_device))
3781     }
3782 
3783     fn pci_resources(
3784         &self,
3785         id: &str,
3786         pci_segment_id: u16,
3787     ) -> DeviceManagerResult<(u16, PciBdf, Option<Vec<Resource>>)> {
3788         // Look for the id in the device tree. If it can be found, that means
3789         // the device is being restored, otherwise it's created from scratch.
3790         Ok(
3791             if let Some(node) = self.device_tree.lock().unwrap().get(id) {
3792                 info!("Restoring virtio-pci {} resources", id);
3793                 let pci_device_bdf: PciBdf = node
3794                     .pci_bdf
3795                     .ok_or(DeviceManagerError::MissingDeviceNodePciBdf)?;
3796                 let pci_segment_id = pci_device_bdf.segment();
3797 
3798                 self.pci_segments[pci_segment_id as usize]
3799                     .pci_bus
3800                     .lock()
3801                     .unwrap()
3802                     .get_device_id(pci_device_bdf.device() as usize)
3803                     .map_err(DeviceManagerError::GetPciDeviceId)?;
3804 
3805                 (pci_segment_id, pci_device_bdf, Some(node.resources.clone()))
3806             } else {
3807                 let pci_device_bdf =
3808                     self.pci_segments[pci_segment_id as usize].next_device_bdf()?;
3809 
3810                 (pci_segment_id, pci_device_bdf, None)
3811             },
3812         )
3813     }
3814 
3815     #[cfg(target_arch = "x86_64")]
3816     pub fn io_bus(&self) -> &Arc<Bus> {
3817         &self.address_manager.io_bus
3818     }
3819 
3820     pub fn mmio_bus(&self) -> &Arc<Bus> {
3821         &self.address_manager.mmio_bus
3822     }
3823 
3824     pub fn allocator(&self) -> &Arc<Mutex<SystemAllocator>> {
3825         &self.address_manager.allocator
3826     }
3827 
3828     pub fn interrupt_controller(&self) -> Option<Arc<Mutex<dyn InterruptController>>> {
3829         self.interrupt_controller
3830             .as_ref()
3831             .map(|ic| ic.clone() as Arc<Mutex<dyn InterruptController>>)
3832     }
3833 
3834     pub(crate) fn pci_segments(&self) -> &Vec<PciSegment> {
3835         &self.pci_segments
3836     }
3837 
3838     pub fn console(&self) -> &Arc<Console> {
3839         &self.console
3840     }
3841 
3842     #[cfg(target_arch = "aarch64")]
3843     pub fn cmdline_additions(&self) -> &[String] {
3844         self.cmdline_additions.as_slice()
3845     }
3846 
3847     pub fn update_memory(&self, new_region: &Arc<GuestRegionMmap>) -> DeviceManagerResult<()> {
3848         for handle in self.virtio_devices.iter() {
3849             handle
3850                 .virtio_device
3851                 .lock()
3852                 .unwrap()
3853                 .add_memory_region(new_region)
3854                 .map_err(DeviceManagerError::UpdateMemoryForVirtioDevice)?;
3855 
3856             if let Some(dma_handler) = &handle.dma_handler {
3857                 if !handle.iommu {
3858                     let gpa = new_region.start_addr().0;
3859                     let size = new_region.len();
3860                     dma_handler
3861                         .map(gpa, gpa, size)
3862                         .map_err(DeviceManagerError::VirtioDmaMap)?;
3863                 }
3864             }
3865         }
3866 
3867         // Take care of updating the memory for VFIO PCI devices.
3868         if let Some(vfio_container) = &self.vfio_container {
3869             vfio_container
3870                 .vfio_dma_map(
3871                     new_region.start_addr().raw_value(),
3872                     new_region.len(),
3873                     new_region.as_ptr() as u64,
3874                 )
3875                 .map_err(DeviceManagerError::UpdateMemoryForVfioPciDevice)?;
3876         }
3877 
3878         // Take care of updating the memory for vfio-user devices.
3879         {
3880             let device_tree = self.device_tree.lock().unwrap();
3881             for pci_device_node in device_tree.pci_devices() {
3882                 if let PciDeviceHandle::VfioUser(vfio_user_pci_device) = pci_device_node
3883                     .pci_device_handle
3884                     .as_ref()
3885                     .ok_or(DeviceManagerError::MissingPciDevice)?
3886                 {
3887                     vfio_user_pci_device
3888                         .lock()
3889                         .unwrap()
3890                         .dma_map(new_region)
3891                         .map_err(DeviceManagerError::UpdateMemoryForVfioUserPciDevice)?;
3892                 }
3893             }
3894         }
3895 
3896         Ok(())
3897     }
3898 
3899     pub fn activate_virtio_devices(&self) -> DeviceManagerResult<()> {
3900         for mut activator in self.pending_activations.lock().unwrap().drain(..) {
3901             activator
3902                 .activate()
3903                 .map_err(DeviceManagerError::VirtioActivate)?;
3904         }
3905         Ok(())
3906     }
3907 
3908     pub fn notify_hotplug(
3909         &self,
3910         _notification_type: AcpiNotificationFlags,
3911     ) -> DeviceManagerResult<()> {
3912         return self
3913             .ged_notification_device
3914             .as_ref()
3915             .unwrap()
3916             .lock()
3917             .unwrap()
3918             .notify(_notification_type)
3919             .map_err(DeviceManagerError::HotPlugNotification);
3920     }
3921 
3922     pub fn add_device(
3923         &mut self,
3924         device_cfg: &mut DeviceConfig,
3925     ) -> DeviceManagerResult<PciDeviceInfo> {
3926         self.validate_identifier(&device_cfg.id)?;
3927 
3928         if device_cfg.iommu && !self.is_iommu_segment(device_cfg.pci_segment) {
3929             return Err(DeviceManagerError::InvalidIommuHotplug);
3930         }
3931 
3932         let (bdf, device_name) = self.add_passthrough_device(device_cfg)?;
3933 
3934         // Update the PCIU bitmap
3935         self.pci_segments[device_cfg.pci_segment as usize].pci_devices_up |= 1 << bdf.device();
3936 
3937         Ok(PciDeviceInfo {
3938             id: device_name,
3939             bdf,
3940         })
3941     }
3942 
3943     pub fn add_user_device(
3944         &mut self,
3945         device_cfg: &mut UserDeviceConfig,
3946     ) -> DeviceManagerResult<PciDeviceInfo> {
3947         self.validate_identifier(&device_cfg.id)?;
3948 
3949         let (bdf, device_name) = self.add_vfio_user_device(device_cfg)?;
3950 
3951         // Update the PCIU bitmap
3952         self.pci_segments[device_cfg.pci_segment as usize].pci_devices_up |= 1 << bdf.device();
3953 
3954         Ok(PciDeviceInfo {
3955             id: device_name,
3956             bdf,
3957         })
3958     }
3959 
3960     pub fn remove_device(&mut self, id: String) -> DeviceManagerResult<()> {
3961         // The node can be directly a PCI node in case the 'id' refers to a
3962         // VFIO device or a virtio-pci one.
3963         // In case the 'id' refers to a virtio device, we must find the PCI
3964         // node by looking at the parent.
3965         let device_tree = self.device_tree.lock().unwrap();
3966         let node = device_tree
3967             .get(&id)
3968             .ok_or(DeviceManagerError::UnknownDeviceId(id))?;
3969 
3970         let pci_device_node = if node.pci_bdf.is_some() && node.pci_device_handle.is_some() {
3971             node
3972         } else {
3973             let parent = node
3974                 .parent
3975                 .as_ref()
3976                 .ok_or(DeviceManagerError::MissingNode)?;
3977             device_tree
3978                 .get(parent)
3979                 .ok_or(DeviceManagerError::MissingNode)?
3980         };
3981 
3982         let pci_device_bdf: PciBdf = pci_device_node
3983             .pci_bdf
3984             .ok_or(DeviceManagerError::MissingDeviceNodePciBdf)?;
3985         let pci_segment_id = pci_device_bdf.segment();
3986 
3987         let pci_device_handle = pci_device_node
3988             .pci_device_handle
3989             .as_ref()
3990             .ok_or(DeviceManagerError::MissingPciDevice)?;
3991         #[allow(irrefutable_let_patterns)]
3992         if let PciDeviceHandle::Virtio(virtio_pci_device) = pci_device_handle {
3993             let device_type = VirtioDeviceType::from(
3994                 virtio_pci_device
3995                     .lock()
3996                     .unwrap()
3997                     .virtio_device()
3998                     .lock()
3999                     .unwrap()
4000                     .device_type(),
4001             );
4002             match device_type {
4003                 VirtioDeviceType::Net
4004                 | VirtioDeviceType::Block
4005                 | VirtioDeviceType::Pmem
4006                 | VirtioDeviceType::Fs
4007                 | VirtioDeviceType::Vsock => {}
4008                 _ => return Err(DeviceManagerError::RemovalNotAllowed(device_type)),
4009             }
4010         }
4011 
4012         // Update the PCID bitmap
4013         self.pci_segments[pci_segment_id as usize].pci_devices_down |= 1 << pci_device_bdf.device();
4014 
4015         Ok(())
4016     }
4017 
4018     pub fn eject_device(&mut self, pci_segment_id: u16, device_id: u8) -> DeviceManagerResult<()> {
4019         info!(
4020             "Ejecting device_id = {} on segment_id={}",
4021             device_id, pci_segment_id
4022         );
4023 
4024         // Convert the device ID into the corresponding b/d/f.
4025         let pci_device_bdf = PciBdf::new(pci_segment_id, 0, device_id, 0);
4026 
4027         // Give the PCI device ID back to the PCI bus.
4028         self.pci_segments[pci_segment_id as usize]
4029             .pci_bus
4030             .lock()
4031             .unwrap()
4032             .put_device_id(device_id as usize)
4033             .map_err(DeviceManagerError::PutPciDeviceId)?;
4034 
4035         // Remove the device from the device tree along with its children.
4036         let mut device_tree = self.device_tree.lock().unwrap();
4037         let pci_device_node = device_tree
4038             .remove_node_by_pci_bdf(pci_device_bdf)
4039             .ok_or(DeviceManagerError::MissingPciDevice)?;
4040 
4041         // For VFIO and vfio-user the PCI device id is the id.
4042         // For virtio we overwrite it later as we want the id of the
4043         // underlying device.
4044         let mut id = pci_device_node.id;
4045         let pci_device_handle = pci_device_node
4046             .pci_device_handle
4047             .ok_or(DeviceManagerError::MissingPciDevice)?;
4048         if matches!(pci_device_handle, PciDeviceHandle::Virtio(_)) {
4049             // The virtio-pci device has a single child
4050             if !pci_device_node.children.is_empty() {
4051                 assert_eq!(pci_device_node.children.len(), 1);
4052                 let child_id = &pci_device_node.children[0];
4053                 id.clone_from(child_id);
4054             }
4055         }
4056         for child in pci_device_node.children.iter() {
4057             device_tree.remove(child);
4058         }
4059 
4060         let mut iommu_attached = false;
4061         if let Some((_, iommu_attached_devices)) = &self.iommu_attached_devices {
4062             if iommu_attached_devices.contains(&pci_device_bdf) {
4063                 iommu_attached = true;
4064             }
4065         }
4066 
4067         let (pci_device, bus_device, virtio_device, remove_dma_handler) = match pci_device_handle {
4068             // No need to remove any virtio-mem mapping here as the container outlives all devices
4069             PciDeviceHandle::Vfio(vfio_pci_device) => {
4070                 for mmio_region in vfio_pci_device.lock().unwrap().mmio_regions() {
4071                     self.mmio_regions
4072                         .lock()
4073                         .unwrap()
4074                         .retain(|x| x.start != mmio_region.start)
4075                 }
4076 
4077                 (
4078                     Arc::clone(&vfio_pci_device) as Arc<Mutex<dyn PciDevice>>,
4079                     Arc::clone(&vfio_pci_device) as Arc<Mutex<dyn BusDevice>>,
4080                     None as Option<Arc<Mutex<dyn virtio_devices::VirtioDevice>>>,
4081                     false,
4082                 )
4083             }
4084             PciDeviceHandle::Virtio(virtio_pci_device) => {
4085                 let dev = virtio_pci_device.lock().unwrap();
4086                 let bar_addr = dev.config_bar_addr();
4087                 for (event, addr) in dev.ioeventfds(bar_addr) {
4088                     let io_addr = IoEventAddress::Mmio(addr);
4089                     self.address_manager
4090                         .vm
4091                         .unregister_ioevent(event, &io_addr)
4092                         .map_err(|e| DeviceManagerError::UnRegisterIoevent(e.into()))?;
4093                 }
4094 
4095                 if let Some(dma_handler) = dev.dma_handler() {
4096                     if !iommu_attached {
4097                         for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() {
4098                             for region in zone.regions() {
4099                                 let iova = region.start_addr().0;
4100                                 let size = region.len();
4101                                 dma_handler
4102                                     .unmap(iova, size)
4103                                     .map_err(DeviceManagerError::VirtioDmaUnmap)?;
4104                             }
4105                         }
4106                     }
4107                 }
4108 
4109                 (
4110                     Arc::clone(&virtio_pci_device) as Arc<Mutex<dyn PciDevice>>,
4111                     Arc::clone(&virtio_pci_device) as Arc<Mutex<dyn BusDevice>>,
4112                     Some(dev.virtio_device()),
4113                     dev.dma_handler().is_some() && !iommu_attached,
4114                 )
4115             }
4116             PciDeviceHandle::VfioUser(vfio_user_pci_device) => {
4117                 let mut dev = vfio_user_pci_device.lock().unwrap();
4118                 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() {
4119                     for region in zone.regions() {
4120                         dev.dma_unmap(region)
4121                             .map_err(DeviceManagerError::VfioUserDmaUnmap)?;
4122                     }
4123                 }
4124 
4125                 (
4126                     Arc::clone(&vfio_user_pci_device) as Arc<Mutex<dyn PciDevice>>,
4127                     Arc::clone(&vfio_user_pci_device) as Arc<Mutex<dyn BusDevice>>,
4128                     None as Option<Arc<Mutex<dyn virtio_devices::VirtioDevice>>>,
4129                     true,
4130                 )
4131             }
4132         };
4133 
4134         if remove_dma_handler {
4135             for virtio_mem_device in self.virtio_mem_devices.iter() {
4136                 virtio_mem_device
4137                     .lock()
4138                     .unwrap()
4139                     .remove_dma_mapping_handler(VirtioMemMappingSource::Device(
4140                         pci_device_bdf.into(),
4141                     ))
4142                     .map_err(DeviceManagerError::RemoveDmaMappingHandlerVirtioMem)?;
4143             }
4144         }
4145 
4146         // Free the allocated BARs
4147         pci_device
4148             .lock()
4149             .unwrap()
4150             .free_bars(
4151                 &mut self.address_manager.allocator.lock().unwrap(),
4152                 &mut self.pci_segments[pci_segment_id as usize]
4153                     .mem32_allocator
4154                     .lock()
4155                     .unwrap(),
4156                 &mut self.pci_segments[pci_segment_id as usize]
4157                     .mem64_allocator
4158                     .lock()
4159                     .unwrap(),
4160             )
4161             .map_err(DeviceManagerError::FreePciBars)?;
4162 
4163         // Remove the device from the PCI bus
4164         self.pci_segments[pci_segment_id as usize]
4165             .pci_bus
4166             .lock()
4167             .unwrap()
4168             .remove_by_device(&pci_device)
4169             .map_err(DeviceManagerError::RemoveDeviceFromPciBus)?;
4170 
4171         #[cfg(target_arch = "x86_64")]
4172         // Remove the device from the IO bus
4173         self.io_bus()
4174             .remove_by_device(&bus_device)
4175             .map_err(DeviceManagerError::RemoveDeviceFromIoBus)?;
4176 
4177         // Remove the device from the MMIO bus
4178         self.mmio_bus()
4179             .remove_by_device(&bus_device)
4180             .map_err(DeviceManagerError::RemoveDeviceFromMmioBus)?;
4181 
4182         // Remove the device from the list of BusDevice held by the
4183         // DeviceManager.
4184         self.bus_devices
4185             .retain(|dev| !Arc::ptr_eq(dev, &bus_device));
4186 
4187         // Shutdown and remove the underlying virtio-device if present
4188         if let Some(virtio_device) = virtio_device {
4189             for mapping in virtio_device.lock().unwrap().userspace_mappings() {
4190                 self.memory_manager
4191                     .lock()
4192                     .unwrap()
4193                     .remove_userspace_mapping(
4194                         mapping.addr.raw_value(),
4195                         mapping.len,
4196                         mapping.host_addr,
4197                         mapping.mergeable,
4198                         mapping.mem_slot,
4199                     )
4200                     .map_err(DeviceManagerError::MemoryManager)?;
4201             }
4202 
4203             virtio_device.lock().unwrap().shutdown();
4204 
4205             self.virtio_devices
4206                 .retain(|handler| !Arc::ptr_eq(&handler.virtio_device, &virtio_device));
4207         }
4208 
4209         event!(
4210             "vm",
4211             "device-removed",
4212             "id",
4213             &id,
4214             "bdf",
4215             pci_device_bdf.to_string()
4216         );
4217 
4218         // At this point, the device has been removed from all the list and
4219         // buses where it was stored. At the end of this function, after
4220         // any_device, bus_device and pci_device are released, the actual
4221         // device will be dropped.
4222         Ok(())
4223     }
4224 
4225     fn hotplug_virtio_pci_device(
4226         &mut self,
4227         handle: MetaVirtioDevice,
4228     ) -> DeviceManagerResult<PciDeviceInfo> {
4229         // Add the virtio device to the device manager list. This is important
4230         // as the list is used to notify virtio devices about memory updates
4231         // for instance.
4232         self.virtio_devices.push(handle.clone());
4233 
4234         let mapping: Option<Arc<IommuMapping>> = if handle.iommu {
4235             self.iommu_mapping.clone()
4236         } else {
4237             None
4238         };
4239 
4240         let bdf = self.add_virtio_pci_device(
4241             handle.virtio_device,
4242             &mapping,
4243             handle.id.clone(),
4244             handle.pci_segment,
4245             handle.dma_handler,
4246         )?;
4247 
4248         // Update the PCIU bitmap
4249         self.pci_segments[handle.pci_segment as usize].pci_devices_up |= 1 << bdf.device();
4250 
4251         Ok(PciDeviceInfo { id: handle.id, bdf })
4252     }
4253 
4254     fn is_iommu_segment(&self, pci_segment_id: u16) -> bool {
4255         self.config
4256             .lock()
4257             .as_ref()
4258             .unwrap()
4259             .platform
4260             .as_ref()
4261             .map(|pc| {
4262                 pc.iommu_segments
4263                     .as_ref()
4264                     .map(|v| v.contains(&pci_segment_id))
4265                     .unwrap_or_default()
4266             })
4267             .unwrap_or_default()
4268     }
4269 
4270     pub fn add_disk(&mut self, disk_cfg: &mut DiskConfig) -> DeviceManagerResult<PciDeviceInfo> {
4271         self.validate_identifier(&disk_cfg.id)?;
4272 
4273         if disk_cfg.iommu && !self.is_iommu_segment(disk_cfg.pci_segment) {
4274             return Err(DeviceManagerError::InvalidIommuHotplug);
4275         }
4276 
4277         let device = self.make_virtio_block_device(disk_cfg)?;
4278         self.hotplug_virtio_pci_device(device)
4279     }
4280 
4281     pub fn add_fs(&mut self, fs_cfg: &mut FsConfig) -> DeviceManagerResult<PciDeviceInfo> {
4282         self.validate_identifier(&fs_cfg.id)?;
4283 
4284         let device = self.make_virtio_fs_device(fs_cfg)?;
4285         self.hotplug_virtio_pci_device(device)
4286     }
4287 
4288     pub fn add_pmem(&mut self, pmem_cfg: &mut PmemConfig) -> DeviceManagerResult<PciDeviceInfo> {
4289         self.validate_identifier(&pmem_cfg.id)?;
4290 
4291         if pmem_cfg.iommu && !self.is_iommu_segment(pmem_cfg.pci_segment) {
4292             return Err(DeviceManagerError::InvalidIommuHotplug);
4293         }
4294 
4295         let device = self.make_virtio_pmem_device(pmem_cfg)?;
4296         self.hotplug_virtio_pci_device(device)
4297     }
4298 
4299     pub fn add_net(&mut self, net_cfg: &mut NetConfig) -> DeviceManagerResult<PciDeviceInfo> {
4300         self.validate_identifier(&net_cfg.id)?;
4301 
4302         if net_cfg.iommu && !self.is_iommu_segment(net_cfg.pci_segment) {
4303             return Err(DeviceManagerError::InvalidIommuHotplug);
4304         }
4305 
4306         let device = self.make_virtio_net_device(net_cfg)?;
4307         self.hotplug_virtio_pci_device(device)
4308     }
4309 
4310     pub fn add_vdpa(&mut self, vdpa_cfg: &mut VdpaConfig) -> DeviceManagerResult<PciDeviceInfo> {
4311         self.validate_identifier(&vdpa_cfg.id)?;
4312 
4313         if vdpa_cfg.iommu && !self.is_iommu_segment(vdpa_cfg.pci_segment) {
4314             return Err(DeviceManagerError::InvalidIommuHotplug);
4315         }
4316 
4317         let device = self.make_vdpa_device(vdpa_cfg)?;
4318         self.hotplug_virtio_pci_device(device)
4319     }
4320 
4321     pub fn add_vsock(&mut self, vsock_cfg: &mut VsockConfig) -> DeviceManagerResult<PciDeviceInfo> {
4322         self.validate_identifier(&vsock_cfg.id)?;
4323 
4324         if vsock_cfg.iommu && !self.is_iommu_segment(vsock_cfg.pci_segment) {
4325             return Err(DeviceManagerError::InvalidIommuHotplug);
4326         }
4327 
4328         let device = self.make_virtio_vsock_device(vsock_cfg)?;
4329         self.hotplug_virtio_pci_device(device)
4330     }
4331 
4332     pub fn counters(&self) -> HashMap<String, HashMap<&'static str, Wrapping<u64>>> {
4333         let mut counters = HashMap::new();
4334 
4335         for handle in &self.virtio_devices {
4336             let virtio_device = handle.virtio_device.lock().unwrap();
4337             if let Some(device_counters) = virtio_device.counters() {
4338                 counters.insert(handle.id.clone(), device_counters.clone());
4339             }
4340         }
4341 
4342         counters
4343     }
4344 
4345     pub fn resize_balloon(&mut self, size: u64) -> DeviceManagerResult<()> {
4346         if let Some(balloon) = &self.balloon {
4347             return balloon
4348                 .lock()
4349                 .unwrap()
4350                 .resize(size)
4351                 .map_err(DeviceManagerError::VirtioBalloonResize);
4352         }
4353 
4354         warn!("No balloon setup: Can't resize the balloon");
4355         Err(DeviceManagerError::MissingVirtioBalloon)
4356     }
4357 
4358     pub fn balloon_size(&self) -> u64 {
4359         if let Some(balloon) = &self.balloon {
4360             return balloon.lock().unwrap().get_actual();
4361         }
4362 
4363         0
4364     }
4365 
4366     pub fn device_tree(&self) -> Arc<Mutex<DeviceTree>> {
4367         self.device_tree.clone()
4368     }
4369 
4370     #[cfg(target_arch = "x86_64")]
4371     pub fn notify_power_button(&self) -> DeviceManagerResult<()> {
4372         self.ged_notification_device
4373             .as_ref()
4374             .unwrap()
4375             .lock()
4376             .unwrap()
4377             .notify(AcpiNotificationFlags::POWER_BUTTON_CHANGED)
4378             .map_err(DeviceManagerError::PowerButtonNotification)
4379     }
4380 
4381     #[cfg(target_arch = "aarch64")]
4382     pub fn notify_power_button(&self) -> DeviceManagerResult<()> {
4383         // There are two use cases:
4384         // 1. Users will use direct kernel boot with device tree.
4385         // 2. Users will use ACPI+UEFI boot.
4386 
4387         // Trigger a GPIO pin 3 event to satisfy use case 1.
4388         self.gpio_device
4389             .as_ref()
4390             .unwrap()
4391             .lock()
4392             .unwrap()
4393             .trigger_key(3)
4394             .map_err(DeviceManagerError::AArch64PowerButtonNotification)?;
4395         // Trigger a GED power button event to satisfy use case 2.
4396         return self
4397             .ged_notification_device
4398             .as_ref()
4399             .unwrap()
4400             .lock()
4401             .unwrap()
4402             .notify(AcpiNotificationFlags::POWER_BUTTON_CHANGED)
4403             .map_err(DeviceManagerError::PowerButtonNotification);
4404     }
4405 
4406     pub fn iommu_attached_devices(&self) -> &Option<(PciBdf, Vec<PciBdf>)> {
4407         &self.iommu_attached_devices
4408     }
4409 
4410     fn validate_identifier(&self, id: &Option<String>) -> DeviceManagerResult<()> {
4411         if let Some(id) = id {
4412             if id.starts_with("__") {
4413                 return Err(DeviceManagerError::InvalidIdentifier(id.clone()));
4414             }
4415 
4416             if self.device_tree.lock().unwrap().contains_key(id) {
4417                 return Err(DeviceManagerError::IdentifierNotUnique(id.clone()));
4418             }
4419         }
4420 
4421         Ok(())
4422     }
4423 
4424     pub(crate) fn acpi_platform_addresses(&self) -> &AcpiPlatformAddresses {
4425         &self.acpi_platform_addresses
4426     }
4427 }
4428 
4429 fn numa_node_id_from_memory_zone_id(numa_nodes: &NumaNodes, memory_zone_id: &str) -> Option<u32> {
4430     for (numa_node_id, numa_node) in numa_nodes.iter() {
4431         if numa_node.memory_zones.contains(&memory_zone_id.to_owned()) {
4432             return Some(*numa_node_id);
4433         }
4434     }
4435 
4436     None
4437 }
4438 
4439 fn numa_node_id_from_pci_segment_id(numa_nodes: &NumaNodes, pci_segment_id: u16) -> u32 {
4440     for (numa_node_id, numa_node) in numa_nodes.iter() {
4441         if numa_node.pci_segments.contains(&pci_segment_id) {
4442             return *numa_node_id;
4443         }
4444     }
4445 
4446     0
4447 }
4448 
4449 struct TpmDevice {}
4450 
4451 impl Aml for TpmDevice {
4452     fn to_aml_bytes(&self, sink: &mut dyn acpi_tables::AmlSink) {
4453         aml::Device::new(
4454             "TPM2".into(),
4455             vec![
4456                 &aml::Name::new("_HID".into(), &"MSFT0101"),
4457                 &aml::Name::new("_STA".into(), &(0xF_usize)),
4458                 &aml::Name::new(
4459                     "_CRS".into(),
4460                     &aml::ResourceTemplate::new(vec![&aml::Memory32Fixed::new(
4461                         true,
4462                         layout::TPM_START.0 as u32,
4463                         layout::TPM_SIZE as u32,
4464                     )]),
4465                 ),
4466             ],
4467         )
4468         .to_aml_bytes(sink)
4469     }
4470 }
4471 
4472 impl Aml for DeviceManager {
4473     fn to_aml_bytes(&self, sink: &mut dyn acpi_tables::AmlSink) {
4474         #[cfg(target_arch = "aarch64")]
4475         use arch::aarch64::DeviceInfoForFdt;
4476 
4477         let mut pci_scan_methods = Vec::new();
4478         for i in 0..self.pci_segments.len() {
4479             pci_scan_methods.push(aml::MethodCall::new(
4480                 format!("\\_SB_.PC{i:02X}.PCNT").as_str().into(),
4481                 vec![],
4482             ));
4483         }
4484         let mut pci_scan_inner: Vec<&dyn Aml> = Vec::new();
4485         for method in &pci_scan_methods {
4486             pci_scan_inner.push(method)
4487         }
4488 
4489         // PCI hotplug controller
4490         aml::Device::new(
4491             "_SB_.PHPR".into(),
4492             vec![
4493                 &aml::Name::new("_HID".into(), &aml::EISAName::new("PNP0A06")),
4494                 &aml::Name::new("_STA".into(), &0x0bu8),
4495                 &aml::Name::new("_UID".into(), &"PCI Hotplug Controller"),
4496                 &aml::Mutex::new("BLCK".into(), 0),
4497                 &aml::Name::new(
4498                     "_CRS".into(),
4499                     &aml::ResourceTemplate::new(vec![&aml::AddressSpace::new_memory(
4500                         aml::AddressSpaceCacheable::NotCacheable,
4501                         true,
4502                         self.acpi_address.0,
4503                         self.acpi_address.0 + DEVICE_MANAGER_ACPI_SIZE as u64 - 1,
4504                         None,
4505                     )]),
4506                 ),
4507                 // OpRegion and Fields map MMIO range into individual field values
4508                 &aml::OpRegion::new(
4509                     "PCST".into(),
4510                     aml::OpRegionSpace::SystemMemory,
4511                     &(self.acpi_address.0 as usize),
4512                     &DEVICE_MANAGER_ACPI_SIZE,
4513                 ),
4514                 &aml::Field::new(
4515                     "PCST".into(),
4516                     aml::FieldAccessType::DWord,
4517                     aml::FieldLockRule::NoLock,
4518                     aml::FieldUpdateRule::WriteAsZeroes,
4519                     vec![
4520                         aml::FieldEntry::Named(*b"PCIU", 32),
4521                         aml::FieldEntry::Named(*b"PCID", 32),
4522                         aml::FieldEntry::Named(*b"B0EJ", 32),
4523                         aml::FieldEntry::Named(*b"PSEG", 32),
4524                     ],
4525                 ),
4526                 &aml::Method::new(
4527                     "PCEJ".into(),
4528                     2,
4529                     true,
4530                     vec![
4531                         // Take lock defined above
4532                         &aml::Acquire::new("BLCK".into(), 0xffff),
4533                         // Choose the current segment
4534                         &aml::Store::new(&aml::Path::new("PSEG"), &aml::Arg(1)),
4535                         // Write PCI bus number (in first argument) to I/O port via field
4536                         &aml::ShiftLeft::new(&aml::Path::new("B0EJ"), &aml::ONE, &aml::Arg(0)),
4537                         // Release lock
4538                         &aml::Release::new("BLCK".into()),
4539                         // Return 0
4540                         &aml::Return::new(&aml::ZERO),
4541                     ],
4542                 ),
4543                 &aml::Method::new("PSCN".into(), 0, true, pci_scan_inner),
4544             ],
4545         )
4546         .to_aml_bytes(sink);
4547 
4548         for segment in &self.pci_segments {
4549             segment.to_aml_bytes(sink);
4550         }
4551 
4552         let mut mbrd_memory = Vec::new();
4553 
4554         for segment in &self.pci_segments {
4555             mbrd_memory.push(aml::Memory32Fixed::new(
4556                 true,
4557                 segment.mmio_config_address as u32,
4558                 layout::PCI_MMIO_CONFIG_SIZE_PER_SEGMENT as u32,
4559             ))
4560         }
4561 
4562         let mut mbrd_memory_refs = Vec::new();
4563         for mbrd_memory_ref in &mbrd_memory {
4564             mbrd_memory_refs.push(mbrd_memory_ref as &dyn Aml);
4565         }
4566 
4567         aml::Device::new(
4568             "_SB_.MBRD".into(),
4569             vec![
4570                 &aml::Name::new("_HID".into(), &aml::EISAName::new("PNP0C02")),
4571                 &aml::Name::new("_UID".into(), &aml::ZERO),
4572                 &aml::Name::new("_CRS".into(), &aml::ResourceTemplate::new(mbrd_memory_refs)),
4573             ],
4574         )
4575         .to_aml_bytes(sink);
4576 
4577         // Serial device
4578         #[cfg(target_arch = "x86_64")]
4579         let serial_irq = 4;
4580         #[cfg(target_arch = "aarch64")]
4581         let serial_irq =
4582             if self.config.lock().unwrap().serial.clone().mode != ConsoleOutputMode::Off {
4583                 self.get_device_info()
4584                     .clone()
4585                     .get(&(DeviceType::Serial, DeviceType::Serial.to_string()))
4586                     .unwrap()
4587                     .irq()
4588             } else {
4589                 // If serial is turned off, add a fake device with invalid irq.
4590                 31
4591             };
4592         if self.config.lock().unwrap().serial.mode != ConsoleOutputMode::Off {
4593             aml::Device::new(
4594                 "_SB_.COM1".into(),
4595                 vec![
4596                     &aml::Name::new(
4597                         "_HID".into(),
4598                         #[cfg(target_arch = "x86_64")]
4599                         &aml::EISAName::new("PNP0501"),
4600                         #[cfg(target_arch = "aarch64")]
4601                         &"ARMH0011",
4602                     ),
4603                     &aml::Name::new("_UID".into(), &aml::ZERO),
4604                     &aml::Name::new("_DDN".into(), &"COM1"),
4605                     &aml::Name::new(
4606                         "_CRS".into(),
4607                         &aml::ResourceTemplate::new(vec![
4608                             &aml::Interrupt::new(true, true, false, false, serial_irq),
4609                             #[cfg(target_arch = "x86_64")]
4610                             &aml::IO::new(0x3f8, 0x3f8, 0, 0x8),
4611                             #[cfg(target_arch = "aarch64")]
4612                             &aml::Memory32Fixed::new(
4613                                 true,
4614                                 arch::layout::LEGACY_SERIAL_MAPPED_IO_START.raw_value() as u32,
4615                                 MMIO_LEN as u32,
4616                             ),
4617                         ]),
4618                     ),
4619                 ],
4620             )
4621             .to_aml_bytes(sink);
4622         }
4623 
4624         aml::Name::new("_S5_".into(), &aml::Package::new(vec![&5u8])).to_aml_bytes(sink);
4625 
4626         aml::Device::new(
4627             "_SB_.PWRB".into(),
4628             vec![
4629                 &aml::Name::new("_HID".into(), &aml::EISAName::new("PNP0C0C")),
4630                 &aml::Name::new("_UID".into(), &aml::ZERO),
4631             ],
4632         )
4633         .to_aml_bytes(sink);
4634 
4635         if self.config.lock().unwrap().tpm.is_some() {
4636             // Add tpm device
4637             TpmDevice {}.to_aml_bytes(sink);
4638         }
4639 
4640         self.ged_notification_device
4641             .as_ref()
4642             .unwrap()
4643             .lock()
4644             .unwrap()
4645             .to_aml_bytes(sink)
4646     }
4647 }
4648 
4649 impl Pausable for DeviceManager {
4650     fn pause(&mut self) -> result::Result<(), MigratableError> {
4651         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
4652             if let Some(migratable) = &device_node.migratable {
4653                 migratable.lock().unwrap().pause()?;
4654             }
4655         }
4656         // On AArch64, the pause of device manager needs to trigger
4657         // a "pause" of GIC, which will flush the GIC pending tables
4658         // and ITS tables to guest RAM.
4659         #[cfg(target_arch = "aarch64")]
4660         {
4661             self.get_interrupt_controller()
4662                 .unwrap()
4663                 .lock()
4664                 .unwrap()
4665                 .pause()?;
4666         };
4667 
4668         Ok(())
4669     }
4670 
4671     fn resume(&mut self) -> result::Result<(), MigratableError> {
4672         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
4673             if let Some(migratable) = &device_node.migratable {
4674                 migratable.lock().unwrap().resume()?;
4675             }
4676         }
4677 
4678         Ok(())
4679     }
4680 }
4681 
4682 impl Snapshottable for DeviceManager {
4683     fn id(&self) -> String {
4684         DEVICE_MANAGER_SNAPSHOT_ID.to_string()
4685     }
4686 
4687     fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> {
4688         let mut snapshot = Snapshot::from_data(SnapshotData::new_from_state(&self.state())?);
4689 
4690         // We aggregate all devices snapshots.
4691         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
4692             if let Some(migratable) = &device_node.migratable {
4693                 let mut migratable = migratable.lock().unwrap();
4694                 snapshot.add_snapshot(migratable.id(), migratable.snapshot()?);
4695             }
4696         }
4697 
4698         Ok(snapshot)
4699     }
4700 }
4701 
4702 impl Transportable for DeviceManager {}
4703 
4704 impl Migratable for DeviceManager {
4705     fn start_dirty_log(&mut self) -> std::result::Result<(), MigratableError> {
4706         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
4707             if let Some(migratable) = &device_node.migratable {
4708                 migratable.lock().unwrap().start_dirty_log()?;
4709             }
4710         }
4711         Ok(())
4712     }
4713 
4714     fn stop_dirty_log(&mut self) -> std::result::Result<(), MigratableError> {
4715         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
4716             if let Some(migratable) = &device_node.migratable {
4717                 migratable.lock().unwrap().stop_dirty_log()?;
4718             }
4719         }
4720         Ok(())
4721     }
4722 
4723     fn dirty_log(&mut self) -> std::result::Result<MemoryRangeTable, MigratableError> {
4724         let mut tables = Vec::new();
4725         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
4726             if let Some(migratable) = &device_node.migratable {
4727                 tables.push(migratable.lock().unwrap().dirty_log()?);
4728             }
4729         }
4730         Ok(MemoryRangeTable::new_from_tables(tables))
4731     }
4732 
4733     fn start_migration(&mut self) -> std::result::Result<(), MigratableError> {
4734         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
4735             if let Some(migratable) = &device_node.migratable {
4736                 migratable.lock().unwrap().start_migration()?;
4737             }
4738         }
4739         Ok(())
4740     }
4741 
4742     fn complete_migration(&mut self) -> std::result::Result<(), MigratableError> {
4743         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
4744             if let Some(migratable) = &device_node.migratable {
4745                 migratable.lock().unwrap().complete_migration()?;
4746             }
4747         }
4748         Ok(())
4749     }
4750 }
4751 
4752 const PCIU_FIELD_OFFSET: u64 = 0;
4753 const PCID_FIELD_OFFSET: u64 = 4;
4754 const B0EJ_FIELD_OFFSET: u64 = 8;
4755 const PSEG_FIELD_OFFSET: u64 = 12;
4756 const PCIU_FIELD_SIZE: usize = 4;
4757 const PCID_FIELD_SIZE: usize = 4;
4758 const B0EJ_FIELD_SIZE: usize = 4;
4759 const PSEG_FIELD_SIZE: usize = 4;
4760 
4761 impl BusDevice for DeviceManager {
4762     fn read(&mut self, base: u64, offset: u64, data: &mut [u8]) {
4763         match offset {
4764             PCIU_FIELD_OFFSET => {
4765                 assert!(data.len() == PCIU_FIELD_SIZE);
4766                 data.copy_from_slice(
4767                     &self.pci_segments[self.selected_segment]
4768                         .pci_devices_up
4769                         .to_le_bytes(),
4770                 );
4771                 // Clear the PCIU bitmap
4772                 self.pci_segments[self.selected_segment].pci_devices_up = 0;
4773             }
4774             PCID_FIELD_OFFSET => {
4775                 assert!(data.len() == PCID_FIELD_SIZE);
4776                 data.copy_from_slice(
4777                     &self.pci_segments[self.selected_segment]
4778                         .pci_devices_down
4779                         .to_le_bytes(),
4780                 );
4781                 // Clear the PCID bitmap
4782                 self.pci_segments[self.selected_segment].pci_devices_down = 0;
4783             }
4784             B0EJ_FIELD_OFFSET => {
4785                 assert!(data.len() == B0EJ_FIELD_SIZE);
4786                 // Always return an empty bitmap since the eject is always
4787                 // taken care of right away during a write access.
4788                 data.fill(0);
4789             }
4790             PSEG_FIELD_OFFSET => {
4791                 assert_eq!(data.len(), PSEG_FIELD_SIZE);
4792                 data.copy_from_slice(&(self.selected_segment as u32).to_le_bytes());
4793             }
4794             _ => error!(
4795                 "Accessing unknown location at base 0x{:x}, offset 0x{:x}",
4796                 base, offset
4797             ),
4798         }
4799 
4800         debug!(
4801             "PCI_HP_REG_R: base 0x{:x}, offset 0x{:x}, data {:?}",
4802             base, offset, data
4803         )
4804     }
4805 
4806     fn write(&mut self, base: u64, offset: u64, data: &[u8]) -> Option<Arc<std::sync::Barrier>> {
4807         match offset {
4808             B0EJ_FIELD_OFFSET => {
4809                 assert!(data.len() == B0EJ_FIELD_SIZE);
4810                 let mut data_array: [u8; 4] = [0, 0, 0, 0];
4811                 data_array.copy_from_slice(data);
4812                 let mut slot_bitmap = u32::from_le_bytes(data_array);
4813 
4814                 while slot_bitmap > 0 {
4815                     let slot_id = slot_bitmap.trailing_zeros();
4816                     if let Err(e) = self.eject_device(self.selected_segment as u16, slot_id as u8) {
4817                         error!("Failed ejecting device {}: {:?}", slot_id, e);
4818                     }
4819                     slot_bitmap &= !(1 << slot_id);
4820                 }
4821             }
4822             PSEG_FIELD_OFFSET => {
4823                 assert_eq!(data.len(), PSEG_FIELD_SIZE);
4824                 let mut data_array: [u8; 4] = [0, 0, 0, 0];
4825                 data_array.copy_from_slice(data);
4826                 let selected_segment = u32::from_le_bytes(data_array) as usize;
4827                 if selected_segment >= self.pci_segments.len() {
4828                     error!(
4829                         "Segment selection out of range: {} >= {}",
4830                         selected_segment,
4831                         self.pci_segments.len()
4832                     );
4833                     return None;
4834                 }
4835                 self.selected_segment = selected_segment;
4836             }
4837             _ => error!(
4838                 "Accessing unknown location at base 0x{:x}, offset 0x{:x}",
4839                 base, offset
4840             ),
4841         }
4842 
4843         debug!(
4844             "PCI_HP_REG_W: base 0x{:x}, offset 0x{:x}, data {:?}",
4845             base, offset, data
4846         );
4847 
4848         None
4849     }
4850 }
4851 
4852 impl Drop for DeviceManager {
4853     fn drop(&mut self) {
4854         // Wake up the DeviceManager threads (mainly virtio device workers),
4855         // to avoid deadlock on waiting for paused/parked worker threads.
4856         if let Err(e) = self.resume() {
4857             error!("Error resuming DeviceManager: {:?}", e);
4858         }
4859 
4860         for handle in self.virtio_devices.drain(..) {
4861             handle.virtio_device.lock().unwrap().shutdown();
4862         }
4863 
4864         if let Some(termios) = *self.original_termios_opt.lock().unwrap() {
4865             // SAFETY: FFI call
4866             let _ = unsafe { tcsetattr(stdout().lock().as_raw_fd(), TCSANOW, &termios) };
4867         }
4868     }
4869 }
4870 
4871 #[cfg(test)]
4872 mod tests {
4873     use super::*;
4874 
4875     #[test]
4876     fn test_create_mmio_allocators() {
4877         let res = create_mmio_allocators(0x100000, 0x400000, 1, vec![1], 4 << 10);
4878         assert_eq!(res.len(), 1);
4879         assert_eq!(
4880             res[0].lock().unwrap().base(),
4881             vm_memory::GuestAddress(0x100000)
4882         );
4883         assert_eq!(
4884             res[0].lock().unwrap().end(),
4885             vm_memory::GuestAddress(0x3fffff)
4886         );
4887 
4888         let res = create_mmio_allocators(0x100000, 0x400000, 2, vec![1, 1], 4 << 10);
4889         assert_eq!(res.len(), 2);
4890         assert_eq!(
4891             res[0].lock().unwrap().base(),
4892             vm_memory::GuestAddress(0x100000)
4893         );
4894         assert_eq!(
4895             res[0].lock().unwrap().end(),
4896             vm_memory::GuestAddress(0x27ffff)
4897         );
4898         assert_eq!(
4899             res[1].lock().unwrap().base(),
4900             vm_memory::GuestAddress(0x280000)
4901         );
4902         assert_eq!(
4903             res[1].lock().unwrap().end(),
4904             vm_memory::GuestAddress(0x3fffff)
4905         );
4906 
4907         let res = create_mmio_allocators(0x100000, 0x400000, 2, vec![2, 1], 4 << 10);
4908         assert_eq!(res.len(), 2);
4909         assert_eq!(
4910             res[0].lock().unwrap().base(),
4911             vm_memory::GuestAddress(0x100000)
4912         );
4913         assert_eq!(
4914             res[0].lock().unwrap().end(),
4915             vm_memory::GuestAddress(0x2fffff)
4916         );
4917         assert_eq!(
4918             res[1].lock().unwrap().base(),
4919             vm_memory::GuestAddress(0x300000)
4920         );
4921         assert_eq!(
4922             res[1].lock().unwrap().end(),
4923             vm_memory::GuestAddress(0x3fffff)
4924         );
4925     }
4926 }
4927