xref: /cloud-hypervisor/vmm/src/device_manager.rs (revision 3ce0fef7fd546467398c914dbc74d8542e45cf6f)
1 // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
2 //
3 // Portions Copyright 2017 The Chromium OS Authors. All rights reserved.
4 // Use of this source code is governed by a BSD-style license that can be
5 // found in the LICENSE-BSD-3-Clause file.
6 //
7 // Copyright © 2019 Intel Corporation
8 //
9 // SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause
10 //
11 
12 use crate::config::{
13     ConsoleOutputMode, DeviceConfig, DiskConfig, FsConfig, NetConfig, PmemConfig, UserDeviceConfig,
14     VdpaConfig, VhostMode, VmConfig, VsockConfig,
15 };
16 use crate::cpu::{CpuManager, CPU_MANAGER_ACPI_SIZE};
17 use crate::device_tree::{DeviceNode, DeviceTree};
18 use crate::interrupt::LegacyUserspaceInterruptManager;
19 use crate::interrupt::MsiInterruptManager;
20 use crate::memory_manager::{Error as MemoryManagerError, MemoryManager, MEMORY_MANAGER_ACPI_SIZE};
21 use crate::pci_segment::PciSegment;
22 use crate::seccomp_filters::{get_seccomp_filter, Thread};
23 use crate::serial_manager::{Error as SerialManagerError, SerialManager};
24 use crate::sigwinch_listener::start_sigwinch_listener;
25 use crate::GuestRegionMmap;
26 use crate::PciDeviceInfo;
27 use crate::{device_node, DEVICE_MANAGER_SNAPSHOT_ID};
28 use acpi_tables::sdt::GenericAddress;
29 use acpi_tables::{aml, Aml};
30 use anyhow::anyhow;
31 use arch::layout;
32 #[cfg(target_arch = "x86_64")]
33 use arch::layout::{APIC_START, IOAPIC_SIZE, IOAPIC_START};
34 use arch::NumaNodes;
35 #[cfg(target_arch = "aarch64")]
36 use arch::{DeviceType, MmioDeviceInfo};
37 use block::{
38     async_io::DiskFile, block_aio_is_supported, block_io_uring_is_supported, detect_image_type,
39     fixed_vhd_sync::FixedVhdDiskSync, qcow, qcow_sync::QcowDiskSync, raw_async_aio::RawFileDiskAio,
40     raw_sync::RawFileDiskSync, vhdx, vhdx_sync::VhdxDiskSync, ImageType,
41 };
42 #[cfg(feature = "io_uring")]
43 use block::{fixed_vhd_async::FixedVhdDiskAsync, raw_async::RawFileDisk};
44 #[cfg(target_arch = "aarch64")]
45 use devices::gic;
46 #[cfg(target_arch = "x86_64")]
47 use devices::ioapic;
48 #[cfg(target_arch = "aarch64")]
49 use devices::legacy::Pl011;
50 #[cfg(target_arch = "x86_64")]
51 use devices::legacy::Serial;
52 use devices::{
53     interrupt_controller, interrupt_controller::InterruptController, AcpiNotificationFlags,
54 };
55 use hypervisor::{HypervisorType, IoEventAddress};
56 use libc::{
57     cfmakeraw, isatty, tcgetattr, tcsetattr, termios, MAP_NORESERVE, MAP_PRIVATE, MAP_SHARED,
58     O_TMPFILE, PROT_READ, PROT_WRITE, TCSANOW,
59 };
60 use pci::{
61     DeviceRelocation, PciBarRegionType, PciBdf, PciDevice, VfioPciDevice, VfioUserDmaMapping,
62     VfioUserPciDevice, VfioUserPciDeviceError,
63 };
64 use rate_limiter::group::RateLimiterGroup;
65 use seccompiler::SeccompAction;
66 use serde::{Deserialize, Serialize};
67 use std::collections::{BTreeSet, HashMap};
68 use std::fs::{read_link, File, OpenOptions};
69 use std::io::{self, stdout, Seek, SeekFrom};
70 use std::mem::zeroed;
71 use std::num::Wrapping;
72 use std::os::unix::fs::OpenOptionsExt;
73 use std::os::unix::io::{AsRawFd, FromRawFd, RawFd};
74 use std::path::PathBuf;
75 use std::result;
76 use std::sync::{Arc, Mutex};
77 use std::time::Instant;
78 use tracer::trace_scoped;
79 use vfio_ioctls::{VfioContainer, VfioDevice, VfioDeviceFd};
80 use virtio_devices::transport::VirtioTransport;
81 use virtio_devices::transport::{VirtioPciDevice, VirtioPciDeviceActivator};
82 use virtio_devices::vhost_user::VhostUserConfig;
83 use virtio_devices::{
84     AccessPlatformMapping, ActivateError, VdpaDmaMapping, VirtioMemMappingSource,
85 };
86 use virtio_devices::{Endpoint, IommuMapping};
87 use vm_allocator::{AddressAllocator, SystemAllocator};
88 use vm_device::dma_mapping::vfio::VfioDmaMapping;
89 use vm_device::dma_mapping::ExternalDmaMapping;
90 use vm_device::interrupt::{
91     InterruptIndex, InterruptManager, LegacyIrqGroupConfig, MsiIrqGroupConfig,
92 };
93 use vm_device::{Bus, BusDevice, Resource};
94 use vm_memory::guest_memory::FileOffset;
95 use vm_memory::GuestMemoryRegion;
96 use vm_memory::{Address, GuestAddress, GuestUsize, MmapRegion};
97 #[cfg(target_arch = "x86_64")]
98 use vm_memory::{GuestAddressSpace, GuestMemory};
99 use vm_migration::{
100     protocol::MemoryRangeTable, snapshot_from_id, versioned_state_from_id, Migratable,
101     MigratableError, Pausable, Snapshot, SnapshotData, Snapshottable, Transportable,
102 };
103 use vm_virtio::AccessPlatform;
104 use vm_virtio::VirtioDeviceType;
105 use vmm_sys_util::eventfd::EventFd;
106 
107 #[cfg(target_arch = "aarch64")]
108 const MMIO_LEN: u64 = 0x1000;
109 
110 // Singleton devices / devices the user cannot name
111 #[cfg(target_arch = "x86_64")]
112 const IOAPIC_DEVICE_NAME: &str = "__ioapic";
113 const SERIAL_DEVICE_NAME: &str = "__serial";
114 #[cfg(target_arch = "aarch64")]
115 const GPIO_DEVICE_NAME: &str = "__gpio";
116 const RNG_DEVICE_NAME: &str = "__rng";
117 const IOMMU_DEVICE_NAME: &str = "__iommu";
118 const BALLOON_DEVICE_NAME: &str = "__balloon";
119 const CONSOLE_DEVICE_NAME: &str = "__console";
120 const PVPANIC_DEVICE_NAME: &str = "__pvpanic";
121 
122 // Devices that the user may name and for which we generate
123 // identifiers if the user doesn't give one
124 const DISK_DEVICE_NAME_PREFIX: &str = "_disk";
125 const FS_DEVICE_NAME_PREFIX: &str = "_fs";
126 const NET_DEVICE_NAME_PREFIX: &str = "_net";
127 const PMEM_DEVICE_NAME_PREFIX: &str = "_pmem";
128 const VDPA_DEVICE_NAME_PREFIX: &str = "_vdpa";
129 const VSOCK_DEVICE_NAME_PREFIX: &str = "_vsock";
130 const WATCHDOG_DEVICE_NAME: &str = "__watchdog";
131 const VFIO_DEVICE_NAME_PREFIX: &str = "_vfio";
132 const VFIO_USER_DEVICE_NAME_PREFIX: &str = "_vfio_user";
133 const VIRTIO_PCI_DEVICE_NAME_PREFIX: &str = "_virtio-pci";
134 
135 /// Errors associated with device manager
136 #[derive(Debug)]
137 pub enum DeviceManagerError {
138     /// Cannot create EventFd.
139     EventFd(io::Error),
140 
141     /// Cannot open disk path
142     Disk(io::Error),
143 
144     /// Cannot create vhost-user-net device
145     CreateVhostUserNet(virtio_devices::vhost_user::Error),
146 
147     /// Cannot create virtio-blk device
148     CreateVirtioBlock(io::Error),
149 
150     /// Cannot create virtio-net device
151     CreateVirtioNet(virtio_devices::net::Error),
152 
153     /// Cannot create virtio-console device
154     CreateVirtioConsole(io::Error),
155 
156     /// Cannot create virtio-rng device
157     CreateVirtioRng(io::Error),
158 
159     /// Cannot create virtio-fs device
160     CreateVirtioFs(virtio_devices::vhost_user::Error),
161 
162     /// Virtio-fs device was created without a socket.
163     NoVirtioFsSock,
164 
165     /// Cannot create vhost-user-blk device
166     CreateVhostUserBlk(virtio_devices::vhost_user::Error),
167 
168     /// Cannot create virtio-pmem device
169     CreateVirtioPmem(io::Error),
170 
171     /// Cannot create vDPA device
172     CreateVdpa(virtio_devices::vdpa::Error),
173 
174     /// Cannot create virtio-vsock device
175     CreateVirtioVsock(io::Error),
176 
177     /// Cannot create tpm device
178     CreateTpmDevice(anyhow::Error),
179 
180     /// Failed to convert Path to &str for the vDPA device.
181     CreateVdpaConvertPath,
182 
183     /// Failed to convert Path to &str for the virtio-vsock device.
184     CreateVsockConvertPath,
185 
186     /// Cannot create virtio-vsock backend
187     CreateVsockBackend(virtio_devices::vsock::VsockUnixError),
188 
189     /// Cannot create virtio-iommu device
190     CreateVirtioIommu(io::Error),
191 
192     /// Cannot create virtio-balloon device
193     CreateVirtioBalloon(io::Error),
194 
195     /// Cannot create virtio-watchdog device
196     CreateVirtioWatchdog(io::Error),
197 
198     /// Failed to parse disk image format
199     DetectImageType(io::Error),
200 
201     /// Cannot open qcow disk path
202     QcowDeviceCreate(qcow::Error),
203 
204     /// Cannot create serial manager
205     CreateSerialManager(SerialManagerError),
206 
207     /// Cannot spawn the serial manager thread
208     SpawnSerialManager(SerialManagerError),
209 
210     /// Cannot open tap interface
211     OpenTap(net_util::TapError),
212 
213     /// Cannot allocate IRQ.
214     AllocateIrq,
215 
216     /// Cannot configure the IRQ.
217     Irq(vmm_sys_util::errno::Error),
218 
219     /// Cannot allocate PCI BARs
220     AllocateBars(pci::PciDeviceError),
221 
222     /// Could not free the BARs associated with a PCI device.
223     FreePciBars(pci::PciDeviceError),
224 
225     /// Cannot register ioevent.
226     RegisterIoevent(anyhow::Error),
227 
228     /// Cannot unregister ioevent.
229     UnRegisterIoevent(anyhow::Error),
230 
231     /// Cannot create virtio device
232     VirtioDevice(virtio_devices::transport::VirtioPciDeviceError),
233 
234     /// Cannot add PCI device
235     AddPciDevice(pci::PciRootError),
236 
237     /// Cannot open persistent memory file
238     PmemFileOpen(io::Error),
239 
240     /// Cannot set persistent memory file size
241     PmemFileSetLen(io::Error),
242 
243     /// Cannot find a memory range for persistent memory
244     PmemRangeAllocation,
245 
246     /// Cannot find a memory range for virtio-fs
247     FsRangeAllocation,
248 
249     /// Error creating serial output file
250     SerialOutputFileOpen(io::Error),
251 
252     /// Error creating console output file
253     ConsoleOutputFileOpen(io::Error),
254 
255     /// Error creating serial pty
256     SerialPtyOpen(io::Error),
257 
258     /// Error creating console pty
259     ConsolePtyOpen(io::Error),
260 
261     /// Error setting pty raw mode
262     SetPtyRaw(vmm_sys_util::errno::Error),
263 
264     /// Error getting pty peer
265     GetPtyPeer(vmm_sys_util::errno::Error),
266 
267     /// Cannot create a VFIO device
268     VfioCreate(vfio_ioctls::VfioError),
269 
270     /// Cannot create a VFIO PCI device
271     VfioPciCreate(pci::VfioPciError),
272 
273     /// Failed to map VFIO MMIO region.
274     VfioMapRegion(pci::VfioPciError),
275 
276     /// Failed to DMA map VFIO device.
277     VfioDmaMap(vfio_ioctls::VfioError),
278 
279     /// Failed to DMA unmap VFIO device.
280     VfioDmaUnmap(pci::VfioPciError),
281 
282     /// Failed to create the passthrough device.
283     CreatePassthroughDevice(anyhow::Error),
284 
285     /// Failed to memory map.
286     Mmap(io::Error),
287 
288     /// Cannot add legacy device to Bus.
289     BusError(vm_device::BusError),
290 
291     /// Failed to allocate IO port
292     AllocateIoPort,
293 
294     /// Failed to allocate MMIO address
295     AllocateMmioAddress,
296 
297     /// Failed to make hotplug notification
298     HotPlugNotification(io::Error),
299 
300     /// Error from a memory manager operation
301     MemoryManager(MemoryManagerError),
302 
303     /// Failed to create new interrupt source group.
304     CreateInterruptGroup(io::Error),
305 
306     /// Failed to update interrupt source group.
307     UpdateInterruptGroup(io::Error),
308 
309     /// Failed to create interrupt controller.
310     CreateInterruptController(interrupt_controller::Error),
311 
312     /// Failed to create a new MmapRegion instance.
313     NewMmapRegion(vm_memory::mmap::MmapRegionError),
314 
315     /// Failed to clone a File.
316     CloneFile(io::Error),
317 
318     /// Failed to create socket file
319     CreateSocketFile(io::Error),
320 
321     /// Failed to spawn the network backend
322     SpawnNetBackend(io::Error),
323 
324     /// Failed to spawn the block backend
325     SpawnBlockBackend(io::Error),
326 
327     /// Missing PCI bus.
328     NoPciBus,
329 
330     /// Could not find an available device name.
331     NoAvailableDeviceName,
332 
333     /// Missing PCI device.
334     MissingPciDevice,
335 
336     /// Failed to remove a PCI device from the PCI bus.
337     RemoveDeviceFromPciBus(pci::PciRootError),
338 
339     /// Failed to remove a bus device from the IO bus.
340     RemoveDeviceFromIoBus(vm_device::BusError),
341 
342     /// Failed to remove a bus device from the MMIO bus.
343     RemoveDeviceFromMmioBus(vm_device::BusError),
344 
345     /// Failed to find the device corresponding to a specific PCI b/d/f.
346     UnknownPciBdf(u32),
347 
348     /// Not allowed to remove this type of device from the VM.
349     RemovalNotAllowed(vm_virtio::VirtioDeviceType),
350 
351     /// Failed to find device corresponding to the given identifier.
352     UnknownDeviceId(String),
353 
354     /// Failed to find an available PCI device ID.
355     NextPciDeviceId(pci::PciRootError),
356 
357     /// Could not reserve the PCI device ID.
358     GetPciDeviceId(pci::PciRootError),
359 
360     /// Could not give the PCI device ID back.
361     PutPciDeviceId(pci::PciRootError),
362 
363     /// No disk path was specified when one was expected
364     NoDiskPath,
365 
366     /// Failed to update guest memory for virtio device.
367     UpdateMemoryForVirtioDevice(virtio_devices::Error),
368 
369     /// Cannot create virtio-mem device
370     CreateVirtioMem(io::Error),
371 
372     /// Cannot find a memory range for virtio-mem memory
373     VirtioMemRangeAllocation,
374 
375     /// Failed to update guest memory for VFIO PCI device.
376     UpdateMemoryForVfioPciDevice(vfio_ioctls::VfioError),
377 
378     /// Trying to use a directory for pmem but no size specified
379     PmemWithDirectorySizeMissing,
380 
381     /// Trying to use a size that is not multiple of 2MiB
382     PmemSizeNotAligned,
383 
384     /// Could not find the node in the device tree.
385     MissingNode,
386 
387     /// Resource was already found.
388     ResourceAlreadyExists,
389 
390     /// Expected resources for virtio-pmem could not be found.
391     MissingVirtioPmemResources,
392 
393     /// Missing PCI b/d/f from the DeviceNode.
394     MissingDeviceNodePciBdf,
395 
396     /// No support for device passthrough
397     NoDevicePassthroughSupport,
398 
399     /// No socket option support for console device
400     NoSocketOptionSupportForConsoleDevice,
401 
402     /// Failed to resize virtio-balloon
403     VirtioBalloonResize(virtio_devices::balloon::Error),
404 
405     /// Missing virtio-balloon, can't proceed as expected.
406     MissingVirtioBalloon,
407 
408     /// Missing virtual IOMMU device
409     MissingVirtualIommu,
410 
411     /// Failed to do power button notification
412     PowerButtonNotification(io::Error),
413 
414     /// Failed to do AArch64 GPIO power button notification
415     #[cfg(target_arch = "aarch64")]
416     AArch64PowerButtonNotification(devices::legacy::GpioDeviceError),
417 
418     /// Failed to set O_DIRECT flag to file descriptor
419     SetDirectIo,
420 
421     /// Failed to create FixedVhdDiskAsync
422     CreateFixedVhdDiskAsync(io::Error),
423 
424     /// Failed to create FixedVhdDiskSync
425     CreateFixedVhdDiskSync(io::Error),
426 
427     /// Failed to create QcowDiskSync
428     CreateQcowDiskSync(qcow::Error),
429 
430     /// Failed to create FixedVhdxDiskSync
431     CreateFixedVhdxDiskSync(vhdx::VhdxError),
432 
433     /// Failed to add DMA mapping handler to virtio-mem device.
434     AddDmaMappingHandlerVirtioMem(virtio_devices::mem::Error),
435 
436     /// Failed to remove DMA mapping handler from virtio-mem device.
437     RemoveDmaMappingHandlerVirtioMem(virtio_devices::mem::Error),
438 
439     /// Failed to create vfio-user client
440     VfioUserCreateClient(vfio_user::Error),
441 
442     /// Failed to create VFIO user device
443     VfioUserCreate(VfioUserPciDeviceError),
444 
445     /// Failed to map region from VFIO user device into guest
446     VfioUserMapRegion(VfioUserPciDeviceError),
447 
448     /// Failed to DMA map VFIO user device.
449     VfioUserDmaMap(VfioUserPciDeviceError),
450 
451     /// Failed to DMA unmap VFIO user device.
452     VfioUserDmaUnmap(VfioUserPciDeviceError),
453 
454     /// Failed to update memory mappings for VFIO user device
455     UpdateMemoryForVfioUserPciDevice(VfioUserPciDeviceError),
456 
457     /// Cannot duplicate file descriptor
458     DupFd(vmm_sys_util::errno::Error),
459 
460     /// Failed to DMA map virtio device.
461     VirtioDmaMap(std::io::Error),
462 
463     /// Failed to DMA unmap virtio device.
464     VirtioDmaUnmap(std::io::Error),
465 
466     /// Cannot hotplug device behind vIOMMU
467     InvalidIommuHotplug,
468 
469     /// Invalid identifier as it is not unique.
470     IdentifierNotUnique(String),
471 
472     /// Invalid identifier
473     InvalidIdentifier(String),
474 
475     /// Error activating virtio device
476     VirtioActivate(ActivateError),
477 
478     /// Failed retrieving device state from snapshot
479     RestoreGetState(MigratableError),
480 
481     /// Cannot create a PvPanic device
482     PvPanicCreate(devices::pvpanic::PvPanicError),
483 
484     /// Cannot create a RateLimiterGroup
485     RateLimiterGroupCreate(rate_limiter::group::Error),
486 }
487 
488 pub type DeviceManagerResult<T> = result::Result<T, DeviceManagerError>;
489 
490 const DEVICE_MANAGER_ACPI_SIZE: usize = 0x10;
491 
492 const TIOCSPTLCK: libc::c_int = 0x4004_5431;
493 const TIOCGTPEER: libc::c_int = 0x5441;
494 
495 pub fn create_pty() -> io::Result<(File, File, PathBuf)> {
496     // Try to use /dev/pts/ptmx first then fall back to /dev/ptmx
497     // This is done to try and use the devpts filesystem that
498     // could be available for use in the process's namespace first.
499     // Ideally these are all the same file though but different
500     // kernels could have things setup differently.
501     // See https://www.kernel.org/doc/Documentation/filesystems/devpts.txt
502     // for further details.
503 
504     let custom_flags = libc::O_NONBLOCK;
505     let main = match OpenOptions::new()
506         .read(true)
507         .write(true)
508         .custom_flags(custom_flags)
509         .open("/dev/pts/ptmx")
510     {
511         Ok(f) => f,
512         _ => OpenOptions::new()
513             .read(true)
514             .write(true)
515             .custom_flags(custom_flags)
516             .open("/dev/ptmx")?,
517     };
518     let mut unlock: libc::c_ulong = 0;
519     // SAFETY: FFI call into libc, trivially safe
520     unsafe { libc::ioctl(main.as_raw_fd(), TIOCSPTLCK as _, &mut unlock) };
521 
522     // SAFETY: FFI call into libc, trivially safe
523     let sub_fd = unsafe {
524         libc::ioctl(
525             main.as_raw_fd(),
526             TIOCGTPEER as _,
527             libc::O_NOCTTY | libc::O_RDWR,
528         )
529     };
530     if sub_fd == -1 {
531         return vmm_sys_util::errno::errno_result().map_err(|e| e.into());
532     }
533 
534     let proc_path = PathBuf::from(format!("/proc/self/fd/{sub_fd}"));
535     let path = read_link(proc_path)?;
536 
537     // SAFETY: sub_fd is checked to be valid before being wrapped in File
538     Ok((main, unsafe { File::from_raw_fd(sub_fd) }, path))
539 }
540 
541 #[derive(Default)]
542 pub struct Console {
543     console_resizer: Option<Arc<virtio_devices::ConsoleResizer>>,
544 }
545 
546 impl Console {
547     pub fn need_resize(&self) -> bool {
548         if let Some(_resizer) = self.console_resizer.as_ref() {
549             return true;
550         }
551 
552         false
553     }
554 
555     pub fn update_console_size(&self) {
556         if let Some(resizer) = self.console_resizer.as_ref() {
557             resizer.update_console_size()
558         }
559     }
560 }
561 
562 pub(crate) struct AddressManager {
563     pub(crate) allocator: Arc<Mutex<SystemAllocator>>,
564     #[cfg(target_arch = "x86_64")]
565     pub(crate) io_bus: Arc<Bus>,
566     pub(crate) mmio_bus: Arc<Bus>,
567     pub(crate) vm: Arc<dyn hypervisor::Vm>,
568     device_tree: Arc<Mutex<DeviceTree>>,
569     pci_mmio32_allocators: Vec<Arc<Mutex<AddressAllocator>>>,
570     pci_mmio64_allocators: Vec<Arc<Mutex<AddressAllocator>>>,
571 }
572 
573 impl DeviceRelocation for AddressManager {
574     fn move_bar(
575         &self,
576         old_base: u64,
577         new_base: u64,
578         len: u64,
579         pci_dev: &mut dyn PciDevice,
580         region_type: PciBarRegionType,
581     ) -> std::result::Result<(), std::io::Error> {
582         match region_type {
583             PciBarRegionType::IoRegion => {
584                 #[cfg(target_arch = "x86_64")]
585                 {
586                     // Update system allocator
587                     self.allocator
588                         .lock()
589                         .unwrap()
590                         .free_io_addresses(GuestAddress(old_base), len as GuestUsize);
591 
592                     self.allocator
593                         .lock()
594                         .unwrap()
595                         .allocate_io_addresses(
596                             Some(GuestAddress(new_base)),
597                             len as GuestUsize,
598                             None,
599                         )
600                         .ok_or_else(|| {
601                             io::Error::new(io::ErrorKind::Other, "failed allocating new IO range")
602                         })?;
603 
604                     // Update PIO bus
605                     self.io_bus
606                         .update_range(old_base, len, new_base, len)
607                         .map_err(|e| io::Error::new(io::ErrorKind::Other, e))?;
608                 }
609                 #[cfg(target_arch = "aarch64")]
610                 error!("I/O region is not supported");
611             }
612             PciBarRegionType::Memory32BitRegion | PciBarRegionType::Memory64BitRegion => {
613                 let allocators = if region_type == PciBarRegionType::Memory32BitRegion {
614                     &self.pci_mmio32_allocators
615                 } else {
616                     &self.pci_mmio64_allocators
617                 };
618 
619                 // Find the specific allocator that this BAR was allocated from and use it for new one
620                 for allocator in allocators {
621                     let allocator_base = allocator.lock().unwrap().base();
622                     let allocator_end = allocator.lock().unwrap().end();
623 
624                     if old_base >= allocator_base.0 && old_base <= allocator_end.0 {
625                         allocator
626                             .lock()
627                             .unwrap()
628                             .free(GuestAddress(old_base), len as GuestUsize);
629 
630                         allocator
631                             .lock()
632                             .unwrap()
633                             .allocate(Some(GuestAddress(new_base)), len as GuestUsize, Some(len))
634                             .ok_or_else(|| {
635                                 io::Error::new(
636                                     io::ErrorKind::Other,
637                                     "failed allocating new MMIO range",
638                                 )
639                             })?;
640 
641                         break;
642                     }
643                 }
644 
645                 // Update MMIO bus
646                 self.mmio_bus
647                     .update_range(old_base, len, new_base, len)
648                     .map_err(|e| io::Error::new(io::ErrorKind::Other, e))?;
649             }
650         }
651 
652         // Update the device_tree resources associated with the device
653         if let Some(id) = pci_dev.id() {
654             if let Some(node) = self.device_tree.lock().unwrap().get_mut(&id) {
655                 let mut resource_updated = false;
656                 for resource in node.resources.iter_mut() {
657                     if let Resource::PciBar { base, type_, .. } = resource {
658                         if PciBarRegionType::from(*type_) == region_type && *base == old_base {
659                             *base = new_base;
660                             resource_updated = true;
661                             break;
662                         }
663                     }
664                 }
665 
666                 if !resource_updated {
667                     return Err(io::Error::new(
668                         io::ErrorKind::Other,
669                         format!(
670                             "Couldn't find a resource with base 0x{old_base:x} for device {id}"
671                         ),
672                     ));
673                 }
674             } else {
675                 return Err(io::Error::new(
676                     io::ErrorKind::Other,
677                     format!("Couldn't find device {id} from device tree"),
678                 ));
679             }
680         }
681 
682         let any_dev = pci_dev.as_any();
683         if let Some(virtio_pci_dev) = any_dev.downcast_ref::<VirtioPciDevice>() {
684             let bar_addr = virtio_pci_dev.config_bar_addr();
685             if bar_addr == new_base {
686                 for (event, addr) in virtio_pci_dev.ioeventfds(old_base) {
687                     let io_addr = IoEventAddress::Mmio(addr);
688                     self.vm.unregister_ioevent(event, &io_addr).map_err(|e| {
689                         io::Error::new(
690                             io::ErrorKind::Other,
691                             format!("failed to unregister ioevent: {e:?}"),
692                         )
693                     })?;
694                 }
695                 for (event, addr) in virtio_pci_dev.ioeventfds(new_base) {
696                     let io_addr = IoEventAddress::Mmio(addr);
697                     self.vm
698                         .register_ioevent(event, &io_addr, None)
699                         .map_err(|e| {
700                             io::Error::new(
701                                 io::ErrorKind::Other,
702                                 format!("failed to register ioevent: {e:?}"),
703                             )
704                         })?;
705                 }
706             } else {
707                 let virtio_dev = virtio_pci_dev.virtio_device();
708                 let mut virtio_dev = virtio_dev.lock().unwrap();
709                 if let Some(mut shm_regions) = virtio_dev.get_shm_regions() {
710                     if shm_regions.addr.raw_value() == old_base {
711                         let mem_region = self.vm.make_user_memory_region(
712                             shm_regions.mem_slot,
713                             old_base,
714                             shm_regions.len,
715                             shm_regions.host_addr,
716                             false,
717                             false,
718                         );
719 
720                         self.vm.remove_user_memory_region(mem_region).map_err(|e| {
721                             io::Error::new(
722                                 io::ErrorKind::Other,
723                                 format!("failed to remove user memory region: {e:?}"),
724                             )
725                         })?;
726 
727                         // Create new mapping by inserting new region to KVM.
728                         let mem_region = self.vm.make_user_memory_region(
729                             shm_regions.mem_slot,
730                             new_base,
731                             shm_regions.len,
732                             shm_regions.host_addr,
733                             false,
734                             false,
735                         );
736 
737                         self.vm.create_user_memory_region(mem_region).map_err(|e| {
738                             io::Error::new(
739                                 io::ErrorKind::Other,
740                                 format!("failed to create user memory regions: {e:?}"),
741                             )
742                         })?;
743 
744                         // Update shared memory regions to reflect the new mapping.
745                         shm_regions.addr = GuestAddress(new_base);
746                         virtio_dev.set_shm_regions(shm_regions).map_err(|e| {
747                             io::Error::new(
748                                 io::ErrorKind::Other,
749                                 format!("failed to update shared memory regions: {e:?}"),
750                             )
751                         })?;
752                     }
753                 }
754             }
755         }
756 
757         pci_dev.move_bar(old_base, new_base)
758     }
759 }
760 
761 #[derive(Serialize, Deserialize)]
762 struct DeviceManagerState {
763     device_tree: DeviceTree,
764     device_id_cnt: Wrapping<usize>,
765 }
766 
767 #[derive(Debug)]
768 pub struct PtyPair {
769     pub main: File,
770     pub path: PathBuf,
771 }
772 
773 impl Clone for PtyPair {
774     fn clone(&self) -> Self {
775         PtyPair {
776             main: self.main.try_clone().unwrap(),
777             path: self.path.clone(),
778         }
779     }
780 }
781 
782 #[derive(Clone)]
783 pub enum PciDeviceHandle {
784     Vfio(Arc<Mutex<VfioPciDevice>>),
785     Virtio(Arc<Mutex<VirtioPciDevice>>),
786     VfioUser(Arc<Mutex<VfioUserPciDevice>>),
787 }
788 
789 #[derive(Clone)]
790 struct MetaVirtioDevice {
791     virtio_device: Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
792     iommu: bool,
793     id: String,
794     pci_segment: u16,
795     dma_handler: Option<Arc<dyn ExternalDmaMapping>>,
796 }
797 
798 #[derive(Default)]
799 pub struct AcpiPlatformAddresses {
800     pub pm_timer_address: Option<GenericAddress>,
801     pub reset_reg_address: Option<GenericAddress>,
802     pub sleep_control_reg_address: Option<GenericAddress>,
803     pub sleep_status_reg_address: Option<GenericAddress>,
804 }
805 
806 pub struct DeviceManager {
807     // The underlying hypervisor
808     hypervisor_type: HypervisorType,
809 
810     // Manage address space related to devices
811     address_manager: Arc<AddressManager>,
812 
813     // Console abstraction
814     console: Arc<Console>,
815 
816     // console PTY
817     console_pty: Option<Arc<Mutex<PtyPair>>>,
818 
819     // serial PTY
820     serial_pty: Option<Arc<Mutex<PtyPair>>>,
821 
822     // Serial Manager
823     serial_manager: Option<Arc<SerialManager>>,
824 
825     // pty foreground status,
826     console_resize_pipe: Option<Arc<File>>,
827 
828     // To restore on exit.
829     original_termios_opt: Arc<Mutex<Option<termios>>>,
830 
831     // Interrupt controller
832     #[cfg(target_arch = "x86_64")]
833     interrupt_controller: Option<Arc<Mutex<ioapic::Ioapic>>>,
834     #[cfg(target_arch = "aarch64")]
835     interrupt_controller: Option<Arc<Mutex<gic::Gic>>>,
836 
837     // Things to be added to the commandline (e.g. aarch64 early console)
838     #[cfg(target_arch = "aarch64")]
839     cmdline_additions: Vec<String>,
840 
841     // ACPI GED notification device
842     ged_notification_device: Option<Arc<Mutex<devices::AcpiGedDevice>>>,
843 
844     // VM configuration
845     config: Arc<Mutex<VmConfig>>,
846 
847     // Memory Manager
848     memory_manager: Arc<Mutex<MemoryManager>>,
849 
850     // CPU Manager
851     cpu_manager: Arc<Mutex<CpuManager>>,
852 
853     // The virtio devices on the system
854     virtio_devices: Vec<MetaVirtioDevice>,
855 
856     // List of bus devices
857     // Let the DeviceManager keep strong references to the BusDevice devices.
858     // This allows the IO and MMIO buses to be provided with Weak references,
859     // which prevents cyclic dependencies.
860     bus_devices: Vec<Arc<Mutex<dyn BusDevice>>>,
861 
862     // Counter to keep track of the consumed device IDs.
863     device_id_cnt: Wrapping<usize>,
864 
865     pci_segments: Vec<PciSegment>,
866 
867     #[cfg_attr(target_arch = "aarch64", allow(dead_code))]
868     // MSI Interrupt Manager
869     msi_interrupt_manager: Arc<dyn InterruptManager<GroupConfig = MsiIrqGroupConfig>>,
870 
871     #[cfg_attr(feature = "mshv", allow(dead_code))]
872     // Legacy Interrupt Manager
873     legacy_interrupt_manager: Option<Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>>,
874 
875     // Passthrough device handle
876     passthrough_device: Option<VfioDeviceFd>,
877 
878     // VFIO container
879     // Only one container can be created, therefore it is stored as part of the
880     // DeviceManager to be reused.
881     vfio_container: Option<Arc<VfioContainer>>,
882 
883     // Paravirtualized IOMMU
884     iommu_device: Option<Arc<Mutex<virtio_devices::Iommu>>>,
885     iommu_mapping: Option<Arc<IommuMapping>>,
886 
887     // PCI information about devices attached to the paravirtualized IOMMU
888     // It contains the virtual IOMMU PCI BDF along with the list of PCI BDF
889     // representing the devices attached to the virtual IOMMU. This is useful
890     // information for filling the ACPI VIOT table.
891     iommu_attached_devices: Option<(PciBdf, Vec<PciBdf>)>,
892 
893     // Tree of devices, representing the dependencies between devices.
894     // Useful for introspection, snapshot and restore.
895     device_tree: Arc<Mutex<DeviceTree>>,
896 
897     // Exit event
898     exit_evt: EventFd,
899     reset_evt: EventFd,
900 
901     #[cfg(target_arch = "aarch64")]
902     id_to_dev_info: HashMap<(DeviceType, String), MmioDeviceInfo>,
903 
904     // seccomp action
905     seccomp_action: SeccompAction,
906 
907     // List of guest NUMA nodes.
908     numa_nodes: NumaNodes,
909 
910     // Possible handle to the virtio-balloon device
911     balloon: Option<Arc<Mutex<virtio_devices::Balloon>>>,
912 
913     // Virtio Device activation EventFd to allow the VMM thread to trigger device
914     // activation and thus start the threads from the VMM thread
915     activate_evt: EventFd,
916 
917     acpi_address: GuestAddress,
918 
919     selected_segment: usize,
920 
921     // Possible handle to the virtio-mem device
922     virtio_mem_devices: Vec<Arc<Mutex<virtio_devices::Mem>>>,
923 
924     #[cfg(target_arch = "aarch64")]
925     // GPIO device for AArch64
926     gpio_device: Option<Arc<Mutex<devices::legacy::Gpio>>>,
927 
928     // pvpanic device
929     pvpanic_device: Option<Arc<Mutex<devices::PvPanicDevice>>>,
930 
931     // Flag to force setting the iommu on virtio devices
932     force_iommu: bool,
933 
934     // io_uring availability if detected
935     io_uring_supported: Option<bool>,
936 
937     // aio availability if detected
938     aio_supported: Option<bool>,
939 
940     // List of unique identifiers provided at boot through the configuration.
941     boot_id_list: BTreeSet<String>,
942 
943     // Start time of the VM
944     timestamp: Instant,
945 
946     // Pending activations
947     pending_activations: Arc<Mutex<Vec<VirtioPciDeviceActivator>>>,
948 
949     // Addresses for ACPI platform devices e.g. ACPI PM timer, sleep/reset registers
950     acpi_platform_addresses: AcpiPlatformAddresses,
951 
952     snapshot: Option<Snapshot>,
953 
954     rate_limit_groups: HashMap<String, Arc<RateLimiterGroup>>,
955 }
956 
957 impl DeviceManager {
958     #[allow(clippy::too_many_arguments)]
959     pub fn new(
960         #[cfg(target_arch = "x86_64")] io_bus: Arc<Bus>,
961         mmio_bus: Arc<Bus>,
962         hypervisor_type: HypervisorType,
963         vm: Arc<dyn hypervisor::Vm>,
964         config: Arc<Mutex<VmConfig>>,
965         memory_manager: Arc<Mutex<MemoryManager>>,
966         cpu_manager: Arc<Mutex<CpuManager>>,
967         exit_evt: EventFd,
968         reset_evt: EventFd,
969         seccomp_action: SeccompAction,
970         numa_nodes: NumaNodes,
971         activate_evt: &EventFd,
972         force_iommu: bool,
973         boot_id_list: BTreeSet<String>,
974         timestamp: Instant,
975         snapshot: Option<Snapshot>,
976         dynamic: bool,
977     ) -> DeviceManagerResult<Arc<Mutex<Self>>> {
978         trace_scoped!("DeviceManager::new");
979 
980         let (device_tree, device_id_cnt) = if let Some(snapshot) = snapshot.as_ref() {
981             let state: DeviceManagerState = snapshot.to_state().unwrap();
982             (
983                 Arc::new(Mutex::new(state.device_tree.clone())),
984                 state.device_id_cnt,
985             )
986         } else {
987             (Arc::new(Mutex::new(DeviceTree::new())), Wrapping(0))
988         };
989 
990         let num_pci_segments =
991             if let Some(platform_config) = config.lock().unwrap().platform.as_ref() {
992                 platform_config.num_pci_segments
993             } else {
994                 1
995             };
996 
997         let create_mmio_allocators = |start, end, num_pci_segments, alignment| {
998             // Start each PCI segment mmio range on an aligned boundary
999             let pci_segment_mmio_size =
1000                 (end - start + 1) / (alignment * num_pci_segments as u64) * alignment;
1001 
1002             let mut mmio_allocators = vec![];
1003             for i in 0..num_pci_segments as u64 {
1004                 let mmio_start = start + i * pci_segment_mmio_size;
1005                 let allocator = Arc::new(Mutex::new(
1006                     AddressAllocator::new(GuestAddress(mmio_start), pci_segment_mmio_size).unwrap(),
1007                 ));
1008                 mmio_allocators.push(allocator)
1009             }
1010 
1011             mmio_allocators
1012         };
1013 
1014         let start_of_mmio32_area = layout::MEM_32BIT_DEVICES_START.0;
1015         let end_of_mmio32_area = layout::MEM_32BIT_DEVICES_START.0 + layout::MEM_32BIT_DEVICES_SIZE;
1016         let pci_mmio32_allocators = create_mmio_allocators(
1017             start_of_mmio32_area,
1018             end_of_mmio32_area,
1019             num_pci_segments,
1020             4 << 10,
1021         );
1022 
1023         let start_of_mmio64_area = memory_manager.lock().unwrap().start_of_device_area().0;
1024         let end_of_mmio64_area = memory_manager.lock().unwrap().end_of_device_area().0;
1025         let pci_mmio64_allocators = create_mmio_allocators(
1026             start_of_mmio64_area,
1027             end_of_mmio64_area,
1028             num_pci_segments,
1029             4 << 30,
1030         );
1031 
1032         let address_manager = Arc::new(AddressManager {
1033             allocator: memory_manager.lock().unwrap().allocator(),
1034             #[cfg(target_arch = "x86_64")]
1035             io_bus,
1036             mmio_bus,
1037             vm: vm.clone(),
1038             device_tree: Arc::clone(&device_tree),
1039             pci_mmio32_allocators,
1040             pci_mmio64_allocators,
1041         });
1042 
1043         // First we create the MSI interrupt manager, the legacy one is created
1044         // later, after the IOAPIC device creation.
1045         // The reason we create the MSI one first is because the IOAPIC needs it,
1046         // and then the legacy interrupt manager needs an IOAPIC. So we're
1047         // handling a linear dependency chain:
1048         // msi_interrupt_manager <- IOAPIC <- legacy_interrupt_manager.
1049         let msi_interrupt_manager: Arc<dyn InterruptManager<GroupConfig = MsiIrqGroupConfig>> =
1050             Arc::new(MsiInterruptManager::new(
1051                 Arc::clone(&address_manager.allocator),
1052                 vm,
1053             ));
1054 
1055         let acpi_address = address_manager
1056             .allocator
1057             .lock()
1058             .unwrap()
1059             .allocate_platform_mmio_addresses(None, DEVICE_MANAGER_ACPI_SIZE as u64, None)
1060             .ok_or(DeviceManagerError::AllocateIoPort)?;
1061 
1062         let mut pci_irq_slots = [0; 32];
1063         PciSegment::reserve_legacy_interrupts_for_pci_devices(
1064             &address_manager,
1065             &mut pci_irq_slots,
1066         )?;
1067 
1068         let mut pci_segments = vec![PciSegment::new_default_segment(
1069             &address_manager,
1070             Arc::clone(&address_manager.pci_mmio32_allocators[0]),
1071             Arc::clone(&address_manager.pci_mmio64_allocators[0]),
1072             &pci_irq_slots,
1073         )?];
1074 
1075         for i in 1..num_pci_segments as usize {
1076             pci_segments.push(PciSegment::new(
1077                 i as u16,
1078                 numa_node_id_from_pci_segment_id(&numa_nodes, i as u16),
1079                 &address_manager,
1080                 Arc::clone(&address_manager.pci_mmio32_allocators[i]),
1081                 Arc::clone(&address_manager.pci_mmio64_allocators[i]),
1082                 &pci_irq_slots,
1083             )?);
1084         }
1085 
1086         if dynamic {
1087             let acpi_address = address_manager
1088                 .allocator
1089                 .lock()
1090                 .unwrap()
1091                 .allocate_platform_mmio_addresses(None, CPU_MANAGER_ACPI_SIZE as u64, None)
1092                 .ok_or(DeviceManagerError::AllocateMmioAddress)?;
1093 
1094             address_manager
1095                 .mmio_bus
1096                 .insert(
1097                     cpu_manager.clone(),
1098                     acpi_address.0,
1099                     CPU_MANAGER_ACPI_SIZE as u64,
1100                 )
1101                 .map_err(DeviceManagerError::BusError)?;
1102 
1103             cpu_manager.lock().unwrap().set_acpi_address(acpi_address);
1104         }
1105 
1106         let mut rate_limit_groups = HashMap::<String, Arc<RateLimiterGroup>>::new();
1107         if let Some(rate_limit_groups_cfg) = config.lock().unwrap().rate_limit_groups.as_ref() {
1108             for rate_limit_group_cfg in rate_limit_groups_cfg {
1109                 let rate_limit_cfg = rate_limit_group_cfg.rate_limiter_config;
1110                 let bw = rate_limit_cfg.bandwidth.unwrap_or_default();
1111                 let ops = rate_limit_cfg.ops.unwrap_or_default();
1112                 let mut rate_limit_group = RateLimiterGroup::new(
1113                     &rate_limit_group_cfg.id,
1114                     bw.size,
1115                     bw.one_time_burst.unwrap_or(0),
1116                     bw.refill_time,
1117                     ops.size,
1118                     ops.one_time_burst.unwrap_or(0),
1119                     ops.refill_time,
1120                 )
1121                 .map_err(DeviceManagerError::RateLimiterGroupCreate)?;
1122 
1123                 let exit_evt = exit_evt.try_clone().map_err(DeviceManagerError::EventFd)?;
1124 
1125                 rate_limit_group.start_thread(exit_evt).unwrap();
1126                 rate_limit_groups
1127                     .insert(rate_limit_group_cfg.id.clone(), Arc::new(rate_limit_group));
1128             }
1129         }
1130 
1131         let device_manager = DeviceManager {
1132             hypervisor_type,
1133             address_manager: Arc::clone(&address_manager),
1134             console: Arc::new(Console::default()),
1135             interrupt_controller: None,
1136             #[cfg(target_arch = "aarch64")]
1137             cmdline_additions: Vec::new(),
1138             ged_notification_device: None,
1139             config,
1140             memory_manager,
1141             cpu_manager,
1142             virtio_devices: Vec::new(),
1143             bus_devices: Vec::new(),
1144             device_id_cnt,
1145             msi_interrupt_manager,
1146             legacy_interrupt_manager: None,
1147             passthrough_device: None,
1148             vfio_container: None,
1149             iommu_device: None,
1150             iommu_mapping: None,
1151             iommu_attached_devices: None,
1152             pci_segments,
1153             device_tree,
1154             exit_evt,
1155             reset_evt,
1156             #[cfg(target_arch = "aarch64")]
1157             id_to_dev_info: HashMap::new(),
1158             seccomp_action,
1159             numa_nodes,
1160             balloon: None,
1161             activate_evt: activate_evt
1162                 .try_clone()
1163                 .map_err(DeviceManagerError::EventFd)?,
1164             acpi_address,
1165             selected_segment: 0,
1166             serial_pty: None,
1167             serial_manager: None,
1168             console_pty: None,
1169             console_resize_pipe: None,
1170             original_termios_opt: Arc::new(Mutex::new(None)),
1171             virtio_mem_devices: Vec::new(),
1172             #[cfg(target_arch = "aarch64")]
1173             gpio_device: None,
1174             pvpanic_device: None,
1175             force_iommu,
1176             io_uring_supported: None,
1177             aio_supported: None,
1178             boot_id_list,
1179             timestamp,
1180             pending_activations: Arc::new(Mutex::new(Vec::default())),
1181             acpi_platform_addresses: AcpiPlatformAddresses::default(),
1182             snapshot,
1183             rate_limit_groups,
1184         };
1185 
1186         let device_manager = Arc::new(Mutex::new(device_manager));
1187 
1188         address_manager
1189             .mmio_bus
1190             .insert(
1191                 Arc::clone(&device_manager) as Arc<Mutex<dyn BusDevice>>,
1192                 acpi_address.0,
1193                 DEVICE_MANAGER_ACPI_SIZE as u64,
1194             )
1195             .map_err(DeviceManagerError::BusError)?;
1196 
1197         Ok(device_manager)
1198     }
1199 
1200     pub fn serial_pty(&self) -> Option<PtyPair> {
1201         self.serial_pty
1202             .as_ref()
1203             .map(|pty| pty.lock().unwrap().clone())
1204     }
1205 
1206     pub fn console_pty(&self) -> Option<PtyPair> {
1207         self.console_pty
1208             .as_ref()
1209             .map(|pty| pty.lock().unwrap().clone())
1210     }
1211 
1212     pub fn console_resize_pipe(&self) -> Option<Arc<File>> {
1213         self.console_resize_pipe.as_ref().map(Arc::clone)
1214     }
1215 
1216     pub fn create_devices(
1217         &mut self,
1218         serial_pty: Option<PtyPair>,
1219         console_pty: Option<PtyPair>,
1220         console_resize_pipe: Option<File>,
1221         original_termios_opt: Arc<Mutex<Option<termios>>>,
1222     ) -> DeviceManagerResult<()> {
1223         trace_scoped!("create_devices");
1224 
1225         let mut virtio_devices: Vec<MetaVirtioDevice> = Vec::new();
1226 
1227         let interrupt_controller = self.add_interrupt_controller()?;
1228 
1229         self.cpu_manager
1230             .lock()
1231             .unwrap()
1232             .set_interrupt_controller(interrupt_controller.clone());
1233 
1234         // Now we can create the legacy interrupt manager, which needs the freshly
1235         // formed IOAPIC device.
1236         let legacy_interrupt_manager: Arc<
1237             dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>,
1238         > = Arc::new(LegacyUserspaceInterruptManager::new(Arc::clone(
1239             &interrupt_controller,
1240         )));
1241 
1242         {
1243             if let Some(acpi_address) = self.memory_manager.lock().unwrap().acpi_address() {
1244                 self.address_manager
1245                     .mmio_bus
1246                     .insert(
1247                         Arc::clone(&self.memory_manager) as Arc<Mutex<dyn BusDevice>>,
1248                         acpi_address.0,
1249                         MEMORY_MANAGER_ACPI_SIZE as u64,
1250                     )
1251                     .map_err(DeviceManagerError::BusError)?;
1252             }
1253         }
1254 
1255         #[cfg(target_arch = "x86_64")]
1256         self.add_legacy_devices(
1257             self.reset_evt
1258                 .try_clone()
1259                 .map_err(DeviceManagerError::EventFd)?,
1260         )?;
1261 
1262         #[cfg(target_arch = "aarch64")]
1263         self.add_legacy_devices(&legacy_interrupt_manager)?;
1264 
1265         {
1266             self.ged_notification_device = self.add_acpi_devices(
1267                 &legacy_interrupt_manager,
1268                 self.reset_evt
1269                     .try_clone()
1270                     .map_err(DeviceManagerError::EventFd)?,
1271                 self.exit_evt
1272                     .try_clone()
1273                     .map_err(DeviceManagerError::EventFd)?,
1274             )?;
1275         }
1276 
1277         self.original_termios_opt = original_termios_opt;
1278 
1279         self.console = self.add_console_device(
1280             &legacy_interrupt_manager,
1281             &mut virtio_devices,
1282             serial_pty,
1283             console_pty,
1284             console_resize_pipe,
1285         )?;
1286 
1287         if let Some(tpm) = self.config.clone().lock().unwrap().tpm.as_ref() {
1288             let tpm_dev = self.add_tpm_device(tpm.socket.clone())?;
1289             self.bus_devices
1290                 .push(Arc::clone(&tpm_dev) as Arc<Mutex<dyn BusDevice>>)
1291         }
1292         self.legacy_interrupt_manager = Some(legacy_interrupt_manager);
1293 
1294         virtio_devices.append(&mut self.make_virtio_devices()?);
1295 
1296         self.add_pci_devices(virtio_devices.clone())?;
1297 
1298         self.virtio_devices = virtio_devices;
1299 
1300         if self.config.clone().lock().unwrap().pvpanic {
1301             self.pvpanic_device = self.add_pvpanic_device()?;
1302         }
1303 
1304         Ok(())
1305     }
1306 
1307     fn state(&self) -> DeviceManagerState {
1308         DeviceManagerState {
1309             device_tree: self.device_tree.lock().unwrap().clone(),
1310             device_id_cnt: self.device_id_cnt,
1311         }
1312     }
1313 
1314     fn get_msi_iova_space(&mut self) -> (u64, u64) {
1315         #[cfg(target_arch = "aarch64")]
1316         {
1317             let vcpus = self.config.lock().unwrap().cpus.boot_vcpus;
1318             let vgic_config = gic::Gic::create_default_config(vcpus.into());
1319             (
1320                 vgic_config.msi_addr,
1321                 vgic_config.msi_addr + vgic_config.msi_size - 1,
1322             )
1323         }
1324         #[cfg(target_arch = "x86_64")]
1325         (0xfee0_0000, 0xfeef_ffff)
1326     }
1327 
1328     #[cfg(target_arch = "aarch64")]
1329     /// Gets the information of the devices registered up to some point in time.
1330     pub fn get_device_info(&self) -> &HashMap<(DeviceType, String), MmioDeviceInfo> {
1331         &self.id_to_dev_info
1332     }
1333 
1334     #[allow(unused_variables)]
1335     fn add_pci_devices(
1336         &mut self,
1337         virtio_devices: Vec<MetaVirtioDevice>,
1338     ) -> DeviceManagerResult<()> {
1339         let iommu_id = String::from(IOMMU_DEVICE_NAME);
1340 
1341         let iommu_device = if self.config.lock().unwrap().iommu {
1342             let (device, mapping) = virtio_devices::Iommu::new(
1343                 iommu_id.clone(),
1344                 self.seccomp_action.clone(),
1345                 self.exit_evt
1346                     .try_clone()
1347                     .map_err(DeviceManagerError::EventFd)?,
1348                 self.get_msi_iova_space(),
1349                 versioned_state_from_id(self.snapshot.as_ref(), iommu_id.as_str())
1350                     .map_err(DeviceManagerError::RestoreGetState)?,
1351             )
1352             .map_err(DeviceManagerError::CreateVirtioIommu)?;
1353             let device = Arc::new(Mutex::new(device));
1354             self.iommu_device = Some(Arc::clone(&device));
1355             self.iommu_mapping = Some(mapping);
1356 
1357             // Fill the device tree with a new node. In case of restore, we
1358             // know there is nothing to do, so we can simply override the
1359             // existing entry.
1360             self.device_tree
1361                 .lock()
1362                 .unwrap()
1363                 .insert(iommu_id.clone(), device_node!(iommu_id, device));
1364 
1365             Some(device)
1366         } else {
1367             None
1368         };
1369 
1370         let mut iommu_attached_devices = Vec::new();
1371         {
1372             for handle in virtio_devices {
1373                 let mapping: Option<Arc<IommuMapping>> = if handle.iommu {
1374                     self.iommu_mapping.clone()
1375                 } else {
1376                     None
1377                 };
1378 
1379                 let dev_id = self.add_virtio_pci_device(
1380                     handle.virtio_device,
1381                     &mapping,
1382                     handle.id,
1383                     handle.pci_segment,
1384                     handle.dma_handler,
1385                 )?;
1386 
1387                 if handle.iommu {
1388                     iommu_attached_devices.push(dev_id);
1389                 }
1390             }
1391 
1392             let mut vfio_iommu_device_ids = self.add_vfio_devices()?;
1393             iommu_attached_devices.append(&mut vfio_iommu_device_ids);
1394 
1395             let mut vfio_user_iommu_device_ids = self.add_user_devices()?;
1396             iommu_attached_devices.append(&mut vfio_user_iommu_device_ids);
1397 
1398             // Add all devices from forced iommu segments
1399             if let Some(platform_config) = self.config.lock().unwrap().platform.as_ref() {
1400                 if let Some(iommu_segments) = platform_config.iommu_segments.as_ref() {
1401                     for segment in iommu_segments {
1402                         for device in 0..32 {
1403                             let bdf = PciBdf::new(*segment, 0, device, 0);
1404                             if !iommu_attached_devices.contains(&bdf) {
1405                                 iommu_attached_devices.push(bdf);
1406                             }
1407                         }
1408                     }
1409                 }
1410             }
1411 
1412             if let Some(iommu_device) = iommu_device {
1413                 let dev_id = self.add_virtio_pci_device(iommu_device, &None, iommu_id, 0, None)?;
1414                 self.iommu_attached_devices = Some((dev_id, iommu_attached_devices));
1415             }
1416         }
1417 
1418         for segment in &self.pci_segments {
1419             #[cfg(target_arch = "x86_64")]
1420             if let Some(pci_config_io) = segment.pci_config_io.as_ref() {
1421                 self.bus_devices
1422                     .push(Arc::clone(pci_config_io) as Arc<Mutex<dyn BusDevice>>);
1423             }
1424 
1425             self.bus_devices
1426                 .push(Arc::clone(&segment.pci_config_mmio) as Arc<Mutex<dyn BusDevice>>);
1427         }
1428 
1429         Ok(())
1430     }
1431 
1432     #[cfg(target_arch = "aarch64")]
1433     fn add_interrupt_controller(
1434         &mut self,
1435     ) -> DeviceManagerResult<Arc<Mutex<dyn InterruptController>>> {
1436         let interrupt_controller: Arc<Mutex<gic::Gic>> = Arc::new(Mutex::new(
1437             gic::Gic::new(
1438                 self.config.lock().unwrap().cpus.boot_vcpus,
1439                 Arc::clone(&self.msi_interrupt_manager),
1440                 self.address_manager.vm.clone(),
1441             )
1442             .map_err(DeviceManagerError::CreateInterruptController)?,
1443         ));
1444 
1445         self.interrupt_controller = Some(interrupt_controller.clone());
1446 
1447         // Restore the vGic if this is in the process of restoration
1448         let id = String::from(gic::GIC_SNAPSHOT_ID);
1449         if let Some(vgic_snapshot) = snapshot_from_id(self.snapshot.as_ref(), &id) {
1450             // PMU support is optional. Nothing should be impacted if the PMU initialization failed.
1451             if self
1452                 .cpu_manager
1453                 .lock()
1454                 .unwrap()
1455                 .init_pmu(arch::aarch64::fdt::AARCH64_PMU_IRQ + 16)
1456                 .is_err()
1457             {
1458                 info!("Failed to initialize PMU");
1459             }
1460 
1461             let vgic_state = vgic_snapshot
1462                 .to_state()
1463                 .map_err(DeviceManagerError::RestoreGetState)?;
1464             let saved_vcpu_states = self.cpu_manager.lock().unwrap().get_saved_states();
1465             interrupt_controller
1466                 .lock()
1467                 .unwrap()
1468                 .restore_vgic(vgic_state, &saved_vcpu_states)
1469                 .unwrap();
1470         }
1471 
1472         self.device_tree
1473             .lock()
1474             .unwrap()
1475             .insert(id.clone(), device_node!(id, interrupt_controller));
1476 
1477         Ok(interrupt_controller)
1478     }
1479 
1480     #[cfg(target_arch = "aarch64")]
1481     pub fn get_interrupt_controller(&mut self) -> Option<&Arc<Mutex<gic::Gic>>> {
1482         self.interrupt_controller.as_ref()
1483     }
1484 
1485     #[cfg(target_arch = "x86_64")]
1486     fn add_interrupt_controller(
1487         &mut self,
1488     ) -> DeviceManagerResult<Arc<Mutex<dyn InterruptController>>> {
1489         let id = String::from(IOAPIC_DEVICE_NAME);
1490 
1491         // Create IOAPIC
1492         let interrupt_controller = Arc::new(Mutex::new(
1493             ioapic::Ioapic::new(
1494                 id.clone(),
1495                 APIC_START,
1496                 Arc::clone(&self.msi_interrupt_manager),
1497                 versioned_state_from_id(self.snapshot.as_ref(), id.as_str())
1498                     .map_err(DeviceManagerError::RestoreGetState)?,
1499             )
1500             .map_err(DeviceManagerError::CreateInterruptController)?,
1501         ));
1502 
1503         self.interrupt_controller = Some(interrupt_controller.clone());
1504 
1505         self.address_manager
1506             .mmio_bus
1507             .insert(interrupt_controller.clone(), IOAPIC_START.0, IOAPIC_SIZE)
1508             .map_err(DeviceManagerError::BusError)?;
1509 
1510         self.bus_devices
1511             .push(Arc::clone(&interrupt_controller) as Arc<Mutex<dyn BusDevice>>);
1512 
1513         // Fill the device tree with a new node. In case of restore, we
1514         // know there is nothing to do, so we can simply override the
1515         // existing entry.
1516         self.device_tree
1517             .lock()
1518             .unwrap()
1519             .insert(id.clone(), device_node!(id, interrupt_controller));
1520 
1521         Ok(interrupt_controller)
1522     }
1523 
1524     fn add_acpi_devices(
1525         &mut self,
1526         interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>,
1527         reset_evt: EventFd,
1528         exit_evt: EventFd,
1529     ) -> DeviceManagerResult<Option<Arc<Mutex<devices::AcpiGedDevice>>>> {
1530         let vcpus_kill_signalled = self
1531             .cpu_manager
1532             .lock()
1533             .unwrap()
1534             .vcpus_kill_signalled()
1535             .clone();
1536         let shutdown_device = Arc::new(Mutex::new(devices::AcpiShutdownDevice::new(
1537             exit_evt,
1538             reset_evt,
1539             vcpus_kill_signalled,
1540         )));
1541 
1542         self.bus_devices
1543             .push(Arc::clone(&shutdown_device) as Arc<Mutex<dyn BusDevice>>);
1544 
1545         #[cfg(target_arch = "x86_64")]
1546         {
1547             let shutdown_pio_address: u16 = 0x600;
1548 
1549             self.address_manager
1550                 .allocator
1551                 .lock()
1552                 .unwrap()
1553                 .allocate_io_addresses(Some(GuestAddress(shutdown_pio_address.into())), 0x8, None)
1554                 .ok_or(DeviceManagerError::AllocateIoPort)?;
1555 
1556             self.address_manager
1557                 .io_bus
1558                 .insert(shutdown_device, shutdown_pio_address.into(), 0x4)
1559                 .map_err(DeviceManagerError::BusError)?;
1560 
1561             self.acpi_platform_addresses.sleep_control_reg_address =
1562                 Some(GenericAddress::io_port_address::<u8>(shutdown_pio_address));
1563             self.acpi_platform_addresses.sleep_status_reg_address =
1564                 Some(GenericAddress::io_port_address::<u8>(shutdown_pio_address));
1565             self.acpi_platform_addresses.reset_reg_address =
1566                 Some(GenericAddress::io_port_address::<u8>(shutdown_pio_address));
1567         }
1568 
1569         let ged_irq = self
1570             .address_manager
1571             .allocator
1572             .lock()
1573             .unwrap()
1574             .allocate_irq()
1575             .unwrap();
1576         let interrupt_group = interrupt_manager
1577             .create_group(LegacyIrqGroupConfig {
1578                 irq: ged_irq as InterruptIndex,
1579             })
1580             .map_err(DeviceManagerError::CreateInterruptGroup)?;
1581         let ged_address = self
1582             .address_manager
1583             .allocator
1584             .lock()
1585             .unwrap()
1586             .allocate_platform_mmio_addresses(
1587                 None,
1588                 devices::acpi::GED_DEVICE_ACPI_SIZE as u64,
1589                 None,
1590             )
1591             .ok_or(DeviceManagerError::AllocateMmioAddress)?;
1592         let ged_device = Arc::new(Mutex::new(devices::AcpiGedDevice::new(
1593             interrupt_group,
1594             ged_irq,
1595             ged_address,
1596         )));
1597         self.address_manager
1598             .mmio_bus
1599             .insert(
1600                 ged_device.clone(),
1601                 ged_address.0,
1602                 devices::acpi::GED_DEVICE_ACPI_SIZE as u64,
1603             )
1604             .map_err(DeviceManagerError::BusError)?;
1605         self.bus_devices
1606             .push(Arc::clone(&ged_device) as Arc<Mutex<dyn BusDevice>>);
1607 
1608         let pm_timer_device = Arc::new(Mutex::new(devices::AcpiPmTimerDevice::new()));
1609 
1610         self.bus_devices
1611             .push(Arc::clone(&pm_timer_device) as Arc<Mutex<dyn BusDevice>>);
1612 
1613         #[cfg(target_arch = "x86_64")]
1614         {
1615             let pm_timer_pio_address: u16 = 0x608;
1616 
1617             self.address_manager
1618                 .allocator
1619                 .lock()
1620                 .unwrap()
1621                 .allocate_io_addresses(Some(GuestAddress(pm_timer_pio_address.into())), 0x4, None)
1622                 .ok_or(DeviceManagerError::AllocateIoPort)?;
1623 
1624             self.address_manager
1625                 .io_bus
1626                 .insert(pm_timer_device, pm_timer_pio_address.into(), 0x4)
1627                 .map_err(DeviceManagerError::BusError)?;
1628 
1629             self.acpi_platform_addresses.pm_timer_address =
1630                 Some(GenericAddress::io_port_address::<u32>(pm_timer_pio_address));
1631         }
1632 
1633         Ok(Some(ged_device))
1634     }
1635 
1636     #[cfg(target_arch = "x86_64")]
1637     fn add_legacy_devices(&mut self, reset_evt: EventFd) -> DeviceManagerResult<()> {
1638         let vcpus_kill_signalled = self
1639             .cpu_manager
1640             .lock()
1641             .unwrap()
1642             .vcpus_kill_signalled()
1643             .clone();
1644         // Add a shutdown device (i8042)
1645         let i8042 = Arc::new(Mutex::new(devices::legacy::I8042Device::new(
1646             reset_evt.try_clone().unwrap(),
1647             vcpus_kill_signalled.clone(),
1648         )));
1649 
1650         self.bus_devices
1651             .push(Arc::clone(&i8042) as Arc<Mutex<dyn BusDevice>>);
1652 
1653         self.address_manager
1654             .io_bus
1655             .insert(i8042, 0x61, 0x4)
1656             .map_err(DeviceManagerError::BusError)?;
1657         {
1658             // Add a CMOS emulated device
1659             let mem_size = self
1660                 .memory_manager
1661                 .lock()
1662                 .unwrap()
1663                 .guest_memory()
1664                 .memory()
1665                 .last_addr()
1666                 .0
1667                 + 1;
1668             let mem_below_4g = std::cmp::min(arch::layout::MEM_32BIT_RESERVED_START.0, mem_size);
1669             let mem_above_4g = mem_size.saturating_sub(arch::layout::RAM_64BIT_START.0);
1670 
1671             let cmos = Arc::new(Mutex::new(devices::legacy::Cmos::new(
1672                 mem_below_4g,
1673                 mem_above_4g,
1674                 reset_evt,
1675                 Some(vcpus_kill_signalled),
1676             )));
1677 
1678             self.bus_devices
1679                 .push(Arc::clone(&cmos) as Arc<Mutex<dyn BusDevice>>);
1680 
1681             self.address_manager
1682                 .io_bus
1683                 .insert(cmos, 0x70, 0x2)
1684                 .map_err(DeviceManagerError::BusError)?;
1685 
1686             let fwdebug = Arc::new(Mutex::new(devices::legacy::FwDebugDevice::new()));
1687 
1688             self.bus_devices
1689                 .push(Arc::clone(&fwdebug) as Arc<Mutex<dyn BusDevice>>);
1690 
1691             self.address_manager
1692                 .io_bus
1693                 .insert(fwdebug, 0x402, 0x1)
1694                 .map_err(DeviceManagerError::BusError)?;
1695         }
1696 
1697         // 0x80 debug port
1698         let debug_port = Arc::new(Mutex::new(devices::legacy::DebugPort::new(self.timestamp)));
1699         self.bus_devices
1700             .push(Arc::clone(&debug_port) as Arc<Mutex<dyn BusDevice>>);
1701         self.address_manager
1702             .io_bus
1703             .insert(debug_port, 0x80, 0x1)
1704             .map_err(DeviceManagerError::BusError)?;
1705 
1706         Ok(())
1707     }
1708 
1709     #[cfg(target_arch = "aarch64")]
1710     fn add_legacy_devices(
1711         &mut self,
1712         interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>,
1713     ) -> DeviceManagerResult<()> {
1714         // Add a RTC device
1715         let rtc_irq = self
1716             .address_manager
1717             .allocator
1718             .lock()
1719             .unwrap()
1720             .allocate_irq()
1721             .unwrap();
1722 
1723         let interrupt_group = interrupt_manager
1724             .create_group(LegacyIrqGroupConfig {
1725                 irq: rtc_irq as InterruptIndex,
1726             })
1727             .map_err(DeviceManagerError::CreateInterruptGroup)?;
1728 
1729         let rtc_device = Arc::new(Mutex::new(devices::legacy::Rtc::new(interrupt_group)));
1730 
1731         self.bus_devices
1732             .push(Arc::clone(&rtc_device) as Arc<Mutex<dyn BusDevice>>);
1733 
1734         let addr = arch::layout::LEGACY_RTC_MAPPED_IO_START;
1735 
1736         self.address_manager
1737             .mmio_bus
1738             .insert(rtc_device, addr.0, MMIO_LEN)
1739             .map_err(DeviceManagerError::BusError)?;
1740 
1741         self.id_to_dev_info.insert(
1742             (DeviceType::Rtc, "rtc".to_string()),
1743             MmioDeviceInfo {
1744                 addr: addr.0,
1745                 len: MMIO_LEN,
1746                 irq: rtc_irq,
1747             },
1748         );
1749 
1750         // Add a GPIO device
1751         let id = String::from(GPIO_DEVICE_NAME);
1752         let gpio_irq = self
1753             .address_manager
1754             .allocator
1755             .lock()
1756             .unwrap()
1757             .allocate_irq()
1758             .unwrap();
1759 
1760         let interrupt_group = interrupt_manager
1761             .create_group(LegacyIrqGroupConfig {
1762                 irq: gpio_irq as InterruptIndex,
1763             })
1764             .map_err(DeviceManagerError::CreateInterruptGroup)?;
1765 
1766         let gpio_device = Arc::new(Mutex::new(devices::legacy::Gpio::new(
1767             id.clone(),
1768             interrupt_group,
1769             versioned_state_from_id(self.snapshot.as_ref(), id.as_str())
1770                 .map_err(DeviceManagerError::RestoreGetState)?,
1771         )));
1772 
1773         self.bus_devices
1774             .push(Arc::clone(&gpio_device) as Arc<Mutex<dyn BusDevice>>);
1775 
1776         let addr = arch::layout::LEGACY_GPIO_MAPPED_IO_START;
1777 
1778         self.address_manager
1779             .mmio_bus
1780             .insert(gpio_device.clone(), addr.0, MMIO_LEN)
1781             .map_err(DeviceManagerError::BusError)?;
1782 
1783         self.gpio_device = Some(gpio_device.clone());
1784 
1785         self.id_to_dev_info.insert(
1786             (DeviceType::Gpio, "gpio".to_string()),
1787             MmioDeviceInfo {
1788                 addr: addr.0,
1789                 len: MMIO_LEN,
1790                 irq: gpio_irq,
1791             },
1792         );
1793 
1794         self.device_tree
1795             .lock()
1796             .unwrap()
1797             .insert(id.clone(), device_node!(id, gpio_device));
1798 
1799         Ok(())
1800     }
1801 
1802     #[cfg(target_arch = "x86_64")]
1803     fn add_serial_device(
1804         &mut self,
1805         interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>,
1806         serial_writer: Option<Box<dyn io::Write + Send>>,
1807     ) -> DeviceManagerResult<Arc<Mutex<Serial>>> {
1808         // Serial is tied to IRQ #4
1809         let serial_irq = 4;
1810 
1811         let id = String::from(SERIAL_DEVICE_NAME);
1812 
1813         let interrupt_group = interrupt_manager
1814             .create_group(LegacyIrqGroupConfig {
1815                 irq: serial_irq as InterruptIndex,
1816             })
1817             .map_err(DeviceManagerError::CreateInterruptGroup)?;
1818 
1819         let serial = Arc::new(Mutex::new(Serial::new(
1820             id.clone(),
1821             interrupt_group,
1822             serial_writer,
1823             versioned_state_from_id(self.snapshot.as_ref(), id.as_str())
1824                 .map_err(DeviceManagerError::RestoreGetState)?,
1825         )));
1826 
1827         self.bus_devices
1828             .push(Arc::clone(&serial) as Arc<Mutex<dyn BusDevice>>);
1829 
1830         self.address_manager
1831             .allocator
1832             .lock()
1833             .unwrap()
1834             .allocate_io_addresses(Some(GuestAddress(0x3f8)), 0x8, None)
1835             .ok_or(DeviceManagerError::AllocateIoPort)?;
1836 
1837         self.address_manager
1838             .io_bus
1839             .insert(serial.clone(), 0x3f8, 0x8)
1840             .map_err(DeviceManagerError::BusError)?;
1841 
1842         // Fill the device tree with a new node. In case of restore, we
1843         // know there is nothing to do, so we can simply override the
1844         // existing entry.
1845         self.device_tree
1846             .lock()
1847             .unwrap()
1848             .insert(id.clone(), device_node!(id, serial));
1849 
1850         Ok(serial)
1851     }
1852 
1853     #[cfg(target_arch = "aarch64")]
1854     fn add_serial_device(
1855         &mut self,
1856         interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>,
1857         serial_writer: Option<Box<dyn io::Write + Send>>,
1858     ) -> DeviceManagerResult<Arc<Mutex<Pl011>>> {
1859         let id = String::from(SERIAL_DEVICE_NAME);
1860 
1861         let serial_irq = self
1862             .address_manager
1863             .allocator
1864             .lock()
1865             .unwrap()
1866             .allocate_irq()
1867             .unwrap();
1868 
1869         let interrupt_group = interrupt_manager
1870             .create_group(LegacyIrqGroupConfig {
1871                 irq: serial_irq as InterruptIndex,
1872             })
1873             .map_err(DeviceManagerError::CreateInterruptGroup)?;
1874 
1875         let serial = Arc::new(Mutex::new(devices::legacy::Pl011::new(
1876             id.clone(),
1877             interrupt_group,
1878             serial_writer,
1879             self.timestamp,
1880             versioned_state_from_id(self.snapshot.as_ref(), id.as_str())
1881                 .map_err(DeviceManagerError::RestoreGetState)?,
1882         )));
1883 
1884         self.bus_devices
1885             .push(Arc::clone(&serial) as Arc<Mutex<dyn BusDevice>>);
1886 
1887         let addr = arch::layout::LEGACY_SERIAL_MAPPED_IO_START;
1888 
1889         self.address_manager
1890             .mmio_bus
1891             .insert(serial.clone(), addr.0, MMIO_LEN)
1892             .map_err(DeviceManagerError::BusError)?;
1893 
1894         self.id_to_dev_info.insert(
1895             (DeviceType::Serial, DeviceType::Serial.to_string()),
1896             MmioDeviceInfo {
1897                 addr: addr.0,
1898                 len: MMIO_LEN,
1899                 irq: serial_irq,
1900             },
1901         );
1902 
1903         self.cmdline_additions
1904             .push(format!("earlycon=pl011,mmio,0x{:08x}", addr.0));
1905 
1906         // Fill the device tree with a new node. In case of restore, we
1907         // know there is nothing to do, so we can simply override the
1908         // existing entry.
1909         self.device_tree
1910             .lock()
1911             .unwrap()
1912             .insert(id.clone(), device_node!(id, serial));
1913 
1914         Ok(serial)
1915     }
1916 
1917     fn modify_mode<F: FnOnce(&mut termios)>(
1918         &mut self,
1919         fd: RawFd,
1920         f: F,
1921     ) -> vmm_sys_util::errno::Result<()> {
1922         // SAFETY: safe because we check the return value of isatty.
1923         if unsafe { isatty(fd) } != 1 {
1924             return Ok(());
1925         }
1926 
1927         // SAFETY: The following pair are safe because termios gets totally overwritten by tcgetattr
1928         // and we check the return result.
1929         let mut termios: termios = unsafe { zeroed() };
1930         // SAFETY: see above
1931         let ret = unsafe { tcgetattr(fd, &mut termios as *mut _) };
1932         if ret < 0 {
1933             return vmm_sys_util::errno::errno_result();
1934         }
1935         let mut original_termios_opt = self.original_termios_opt.lock().unwrap();
1936         if original_termios_opt.is_none() {
1937             *original_termios_opt = Some(termios);
1938         }
1939         f(&mut termios);
1940         // SAFETY: Safe because the syscall will only read the extent of termios and we check
1941         // the return result.
1942         let ret = unsafe { tcsetattr(fd, TCSANOW, &termios as *const _) };
1943         if ret < 0 {
1944             return vmm_sys_util::errno::errno_result();
1945         }
1946 
1947         Ok(())
1948     }
1949 
1950     fn set_raw_mode(&mut self, f: &dyn AsRawFd) -> vmm_sys_util::errno::Result<()> {
1951         // SAFETY: FFI call. Variable t is guaranteed to be a valid termios from modify_mode.
1952         self.modify_mode(f.as_raw_fd(), |t| unsafe { cfmakeraw(t) })
1953     }
1954 
1955     fn listen_for_sigwinch_on_tty(&mut self, pty_sub: File) -> std::io::Result<()> {
1956         let seccomp_filter = get_seccomp_filter(
1957             &self.seccomp_action,
1958             Thread::PtyForeground,
1959             self.hypervisor_type,
1960         )
1961         .unwrap();
1962 
1963         self.console_resize_pipe =
1964             Some(Arc::new(start_sigwinch_listener(seccomp_filter, pty_sub)?));
1965 
1966         Ok(())
1967     }
1968 
1969     fn add_virtio_console_device(
1970         &mut self,
1971         virtio_devices: &mut Vec<MetaVirtioDevice>,
1972         console_pty: Option<PtyPair>,
1973         resize_pipe: Option<File>,
1974     ) -> DeviceManagerResult<Option<Arc<virtio_devices::ConsoleResizer>>> {
1975         let console_config = self.config.lock().unwrap().console.clone();
1976         let endpoint = match console_config.mode {
1977             ConsoleOutputMode::File => {
1978                 let file = File::create(console_config.file.as_ref().unwrap())
1979                     .map_err(DeviceManagerError::ConsoleOutputFileOpen)?;
1980                 Endpoint::File(file)
1981             }
1982             ConsoleOutputMode::Pty => {
1983                 if let Some(pty) = console_pty {
1984                     self.config.lock().unwrap().console.file = Some(pty.path.clone());
1985                     let file = pty.main.try_clone().unwrap();
1986                     self.console_pty = Some(Arc::new(Mutex::new(pty)));
1987                     self.console_resize_pipe = resize_pipe.map(Arc::new);
1988                     Endpoint::PtyPair(file.try_clone().unwrap(), file)
1989                 } else {
1990                     let (main, sub, path) =
1991                         create_pty().map_err(DeviceManagerError::ConsolePtyOpen)?;
1992                     self.set_raw_mode(&sub)
1993                         .map_err(DeviceManagerError::SetPtyRaw)?;
1994                     self.config.lock().unwrap().console.file = Some(path.clone());
1995                     let file = main.try_clone().unwrap();
1996                     assert!(resize_pipe.is_none());
1997                     self.listen_for_sigwinch_on_tty(sub).unwrap();
1998                     self.console_pty = Some(Arc::new(Mutex::new(PtyPair { main, path })));
1999                     Endpoint::PtyPair(file.try_clone().unwrap(), file)
2000                 }
2001             }
2002             ConsoleOutputMode::Tty => {
2003                 // Duplicating the file descriptors like this is needed as otherwise
2004                 // they will be closed on a reboot and the numbers reused
2005 
2006                 // SAFETY: FFI call to dup. Trivially safe.
2007                 let stdout = unsafe { libc::dup(libc::STDOUT_FILENO) };
2008                 if stdout == -1 {
2009                     return vmm_sys_util::errno::errno_result().map_err(DeviceManagerError::DupFd);
2010                 }
2011                 // SAFETY: stdout is valid and owned solely by us.
2012                 let stdout = unsafe { File::from_raw_fd(stdout) };
2013 
2014                 // Make sure stdout is in raw mode, if it's a terminal.
2015                 let _ = self.set_raw_mode(&stdout);
2016 
2017                 // SAFETY: FFI call. Trivially safe.
2018                 if unsafe { libc::isatty(libc::STDOUT_FILENO) } == 1 {
2019                     self.listen_for_sigwinch_on_tty(stdout.try_clone().unwrap())
2020                         .unwrap();
2021                 }
2022 
2023                 // If an interactive TTY then we can accept input
2024                 // SAFETY: FFI call. Trivially safe.
2025                 if unsafe { libc::isatty(libc::STDIN_FILENO) == 1 } {
2026                     // SAFETY: FFI call to dup. Trivially safe.
2027                     let stdin = unsafe { libc::dup(libc::STDIN_FILENO) };
2028                     if stdin == -1 {
2029                         return vmm_sys_util::errno::errno_result()
2030                             .map_err(DeviceManagerError::DupFd);
2031                     }
2032                     // SAFETY: stdin is valid and owned solely by us.
2033                     let stdin = unsafe { File::from_raw_fd(stdin) };
2034 
2035                     Endpoint::FilePair(stdout, stdin)
2036                 } else {
2037                     Endpoint::File(stdout)
2038                 }
2039             }
2040             ConsoleOutputMode::Socket => {
2041                 return Err(DeviceManagerError::NoSocketOptionSupportForConsoleDevice);
2042             }
2043             ConsoleOutputMode::Null => Endpoint::Null,
2044             ConsoleOutputMode::Off => return Ok(None),
2045         };
2046         let id = String::from(CONSOLE_DEVICE_NAME);
2047 
2048         let (virtio_console_device, console_resizer) = virtio_devices::Console::new(
2049             id.clone(),
2050             endpoint,
2051             self.console_resize_pipe
2052                 .as_ref()
2053                 .map(|p| p.try_clone().unwrap()),
2054             self.force_iommu | console_config.iommu,
2055             self.seccomp_action.clone(),
2056             self.exit_evt
2057                 .try_clone()
2058                 .map_err(DeviceManagerError::EventFd)?,
2059             versioned_state_from_id(self.snapshot.as_ref(), id.as_str())
2060                 .map_err(DeviceManagerError::RestoreGetState)?,
2061         )
2062         .map_err(DeviceManagerError::CreateVirtioConsole)?;
2063         let virtio_console_device = Arc::new(Mutex::new(virtio_console_device));
2064         virtio_devices.push(MetaVirtioDevice {
2065             virtio_device: Arc::clone(&virtio_console_device)
2066                 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2067             iommu: console_config.iommu,
2068             id: id.clone(),
2069             pci_segment: 0,
2070             dma_handler: None,
2071         });
2072 
2073         // Fill the device tree with a new node. In case of restore, we
2074         // know there is nothing to do, so we can simply override the
2075         // existing entry.
2076         self.device_tree
2077             .lock()
2078             .unwrap()
2079             .insert(id.clone(), device_node!(id, virtio_console_device));
2080 
2081         // Only provide a resizer (for SIGWINCH handling) if the console is attached to the TTY
2082         Ok(if matches!(console_config.mode, ConsoleOutputMode::Tty) {
2083             Some(console_resizer)
2084         } else {
2085             None
2086         })
2087     }
2088 
2089     fn add_console_device(
2090         &mut self,
2091         interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>,
2092         virtio_devices: &mut Vec<MetaVirtioDevice>,
2093         serial_pty: Option<PtyPair>,
2094         console_pty: Option<PtyPair>,
2095         console_resize_pipe: Option<File>,
2096     ) -> DeviceManagerResult<Arc<Console>> {
2097         let serial_config = self.config.lock().unwrap().serial.clone();
2098         let serial_writer: Option<Box<dyn io::Write + Send>> = match serial_config.mode {
2099             ConsoleOutputMode::File => Some(Box::new(
2100                 File::create(serial_config.file.as_ref().unwrap())
2101                     .map_err(DeviceManagerError::SerialOutputFileOpen)?,
2102             )),
2103             ConsoleOutputMode::Pty => {
2104                 if let Some(pty) = serial_pty {
2105                     self.config.lock().unwrap().serial.file = Some(pty.path.clone());
2106                     self.serial_pty = Some(Arc::new(Mutex::new(pty)));
2107                 } else {
2108                     let (main, sub, path) =
2109                         create_pty().map_err(DeviceManagerError::SerialPtyOpen)?;
2110                     self.set_raw_mode(&sub)
2111                         .map_err(DeviceManagerError::SetPtyRaw)?;
2112                     self.config.lock().unwrap().serial.file = Some(path.clone());
2113                     self.serial_pty = Some(Arc::new(Mutex::new(PtyPair { main, path })));
2114                 }
2115                 None
2116             }
2117             ConsoleOutputMode::Tty => {
2118                 let out = stdout();
2119                 let _ = self.set_raw_mode(&out);
2120                 Some(Box::new(out))
2121             }
2122             ConsoleOutputMode::Off | ConsoleOutputMode::Null | ConsoleOutputMode::Socket => None,
2123         };
2124         if serial_config.mode != ConsoleOutputMode::Off {
2125             let serial = self.add_serial_device(interrupt_manager, serial_writer)?;
2126             self.serial_manager = match serial_config.mode {
2127                 ConsoleOutputMode::Pty | ConsoleOutputMode::Tty | ConsoleOutputMode::Socket => {
2128                     let serial_manager = SerialManager::new(
2129                         serial,
2130                         self.serial_pty.clone(),
2131                         serial_config.mode,
2132                         serial_config.socket,
2133                     )
2134                     .map_err(DeviceManagerError::CreateSerialManager)?;
2135                     if let Some(mut serial_manager) = serial_manager {
2136                         serial_manager
2137                             .start_thread(
2138                                 self.exit_evt
2139                                     .try_clone()
2140                                     .map_err(DeviceManagerError::EventFd)?,
2141                             )
2142                             .map_err(DeviceManagerError::SpawnSerialManager)?;
2143                         Some(Arc::new(serial_manager))
2144                     } else {
2145                         None
2146                     }
2147                 }
2148                 _ => None,
2149             };
2150         }
2151 
2152         let console_resizer =
2153             self.add_virtio_console_device(virtio_devices, console_pty, console_resize_pipe)?;
2154 
2155         Ok(Arc::new(Console { console_resizer }))
2156     }
2157 
2158     fn add_tpm_device(
2159         &mut self,
2160         tpm_path: PathBuf,
2161     ) -> DeviceManagerResult<Arc<Mutex<devices::tpm::Tpm>>> {
2162         // Create TPM Device
2163         let tpm = devices::tpm::Tpm::new(tpm_path.to_str().unwrap().to_string()).map_err(|e| {
2164             DeviceManagerError::CreateTpmDevice(anyhow!("Failed to create TPM Device : {:?}", e))
2165         })?;
2166         let tpm = Arc::new(Mutex::new(tpm));
2167 
2168         // Add TPM Device to mmio
2169         self.address_manager
2170             .mmio_bus
2171             .insert(
2172                 tpm.clone(),
2173                 arch::layout::TPM_START.0,
2174                 arch::layout::TPM_SIZE,
2175             )
2176             .map_err(DeviceManagerError::BusError)?;
2177 
2178         Ok(tpm)
2179     }
2180 
2181     fn make_virtio_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
2182         let mut devices: Vec<MetaVirtioDevice> = Vec::new();
2183 
2184         // Create "standard" virtio devices (net/block/rng)
2185         devices.append(&mut self.make_virtio_block_devices()?);
2186         devices.append(&mut self.make_virtio_net_devices()?);
2187         devices.append(&mut self.make_virtio_rng_devices()?);
2188 
2189         // Add virtio-fs if required
2190         devices.append(&mut self.make_virtio_fs_devices()?);
2191 
2192         // Add virtio-pmem if required
2193         devices.append(&mut self.make_virtio_pmem_devices()?);
2194 
2195         // Add virtio-vsock if required
2196         devices.append(&mut self.make_virtio_vsock_devices()?);
2197 
2198         devices.append(&mut self.make_virtio_mem_devices()?);
2199 
2200         // Add virtio-balloon if required
2201         devices.append(&mut self.make_virtio_balloon_devices()?);
2202 
2203         // Add virtio-watchdog device
2204         devices.append(&mut self.make_virtio_watchdog_devices()?);
2205 
2206         // Add vDPA devices if required
2207         devices.append(&mut self.make_vdpa_devices()?);
2208 
2209         Ok(devices)
2210     }
2211 
2212     // Cache whether aio is supported to avoid checking for very block device
2213     fn aio_is_supported(&mut self) -> bool {
2214         if let Some(supported) = self.aio_supported {
2215             return supported;
2216         }
2217 
2218         let supported = block_aio_is_supported();
2219         self.aio_supported = Some(supported);
2220         supported
2221     }
2222 
2223     // Cache whether io_uring is supported to avoid probing for very block device
2224     fn io_uring_is_supported(&mut self) -> bool {
2225         if let Some(supported) = self.io_uring_supported {
2226             return supported;
2227         }
2228 
2229         let supported = block_io_uring_is_supported();
2230         self.io_uring_supported = Some(supported);
2231         supported
2232     }
2233 
2234     fn make_virtio_block_device(
2235         &mut self,
2236         disk_cfg: &mut DiskConfig,
2237     ) -> DeviceManagerResult<MetaVirtioDevice> {
2238         let id = if let Some(id) = &disk_cfg.id {
2239             id.clone()
2240         } else {
2241             let id = self.next_device_name(DISK_DEVICE_NAME_PREFIX)?;
2242             disk_cfg.id = Some(id.clone());
2243             id
2244         };
2245 
2246         info!("Creating virtio-block device: {:?}", disk_cfg);
2247 
2248         let snapshot = snapshot_from_id(self.snapshot.as_ref(), id.as_str());
2249 
2250         let (virtio_device, migratable_device) = if disk_cfg.vhost_user {
2251             let socket = disk_cfg.vhost_socket.as_ref().unwrap().clone();
2252             let vu_cfg = VhostUserConfig {
2253                 socket,
2254                 num_queues: disk_cfg.num_queues,
2255                 queue_size: disk_cfg.queue_size,
2256             };
2257             let vhost_user_block = Arc::new(Mutex::new(
2258                 match virtio_devices::vhost_user::Blk::new(
2259                     id.clone(),
2260                     vu_cfg,
2261                     self.seccomp_action.clone(),
2262                     self.exit_evt
2263                         .try_clone()
2264                         .map_err(DeviceManagerError::EventFd)?,
2265                     self.force_iommu,
2266                     snapshot
2267                         .map(|s| s.to_versioned_state())
2268                         .transpose()
2269                         .map_err(DeviceManagerError::RestoreGetState)?,
2270                 ) {
2271                     Ok(vub_device) => vub_device,
2272                     Err(e) => {
2273                         return Err(DeviceManagerError::CreateVhostUserBlk(e));
2274                     }
2275                 },
2276             ));
2277 
2278             (
2279                 Arc::clone(&vhost_user_block) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2280                 vhost_user_block as Arc<Mutex<dyn Migratable>>,
2281             )
2282         } else {
2283             let mut options = OpenOptions::new();
2284             options.read(true);
2285             options.write(!disk_cfg.readonly);
2286             if disk_cfg.direct {
2287                 options.custom_flags(libc::O_DIRECT);
2288             }
2289             // Open block device path
2290             let mut file: File = options
2291                 .open(
2292                     disk_cfg
2293                         .path
2294                         .as_ref()
2295                         .ok_or(DeviceManagerError::NoDiskPath)?
2296                         .clone(),
2297                 )
2298                 .map_err(DeviceManagerError::Disk)?;
2299             let image_type =
2300                 detect_image_type(&mut file).map_err(DeviceManagerError::DetectImageType)?;
2301 
2302             let image = match image_type {
2303                 ImageType::FixedVhd => {
2304                     // Use asynchronous backend relying on io_uring if the
2305                     // syscalls are supported.
2306                     if cfg!(feature = "io_uring")
2307                         && !disk_cfg.disable_io_uring
2308                         && self.io_uring_is_supported()
2309                     {
2310                         info!("Using asynchronous fixed VHD disk file (io_uring)");
2311 
2312                         #[cfg(not(feature = "io_uring"))]
2313                         unreachable!("Checked in if statement above");
2314                         #[cfg(feature = "io_uring")]
2315                         {
2316                             Box::new(
2317                                 FixedVhdDiskAsync::new(file)
2318                                     .map_err(DeviceManagerError::CreateFixedVhdDiskAsync)?,
2319                             ) as Box<dyn DiskFile>
2320                         }
2321                     } else {
2322                         info!("Using synchronous fixed VHD disk file");
2323                         Box::new(
2324                             FixedVhdDiskSync::new(file)
2325                                 .map_err(DeviceManagerError::CreateFixedVhdDiskSync)?,
2326                         ) as Box<dyn DiskFile>
2327                     }
2328                 }
2329                 ImageType::Raw => {
2330                     // Use asynchronous backend relying on io_uring if the
2331                     // syscalls are supported.
2332                     if cfg!(feature = "io_uring")
2333                         && !disk_cfg.disable_io_uring
2334                         && self.io_uring_is_supported()
2335                     {
2336                         info!("Using asynchronous RAW disk file (io_uring)");
2337 
2338                         #[cfg(not(feature = "io_uring"))]
2339                         unreachable!("Checked in if statement above");
2340                         #[cfg(feature = "io_uring")]
2341                         {
2342                             Box::new(RawFileDisk::new(file)) as Box<dyn DiskFile>
2343                         }
2344                     } else if !disk_cfg.disable_aio && self.aio_is_supported() {
2345                         info!("Using asynchronous RAW disk file (aio)");
2346                         Box::new(RawFileDiskAio::new(file)) as Box<dyn DiskFile>
2347                     } else {
2348                         info!("Using synchronous RAW disk file");
2349                         Box::new(RawFileDiskSync::new(file)) as Box<dyn DiskFile>
2350                     }
2351                 }
2352                 ImageType::Qcow2 => {
2353                     info!("Using synchronous QCOW disk file");
2354                     Box::new(
2355                         QcowDiskSync::new(file, disk_cfg.direct)
2356                             .map_err(DeviceManagerError::CreateQcowDiskSync)?,
2357                     ) as Box<dyn DiskFile>
2358                 }
2359                 ImageType::Vhdx => {
2360                     info!("Using synchronous VHDX disk file");
2361                     Box::new(
2362                         VhdxDiskSync::new(file)
2363                             .map_err(DeviceManagerError::CreateFixedVhdxDiskSync)?,
2364                     ) as Box<dyn DiskFile>
2365                 }
2366             };
2367 
2368             let rate_limit_group =
2369                 if let Some(rate_limiter_cfg) = disk_cfg.rate_limiter_config.as_ref() {
2370                     // Create an anonymous RateLimiterGroup that is dropped when the Disk
2371                     // is dropped.
2372                     let bw = rate_limiter_cfg.bandwidth.unwrap_or_default();
2373                     let ops = rate_limiter_cfg.ops.unwrap_or_default();
2374                     let mut rate_limit_group = RateLimiterGroup::new(
2375                         disk_cfg.id.as_ref().unwrap(),
2376                         bw.size,
2377                         bw.one_time_burst.unwrap_or(0),
2378                         bw.refill_time,
2379                         ops.size,
2380                         ops.one_time_burst.unwrap_or(0),
2381                         ops.refill_time,
2382                     )
2383                     .map_err(DeviceManagerError::RateLimiterGroupCreate)?;
2384 
2385                     rate_limit_group
2386                         .start_thread(
2387                             self.exit_evt
2388                                 .try_clone()
2389                                 .map_err(DeviceManagerError::EventFd)?,
2390                         )
2391                         .unwrap();
2392 
2393                     Some(Arc::new(rate_limit_group))
2394                 } else if let Some(rate_limit_group) = disk_cfg.rate_limit_group.as_ref() {
2395                     self.rate_limit_groups.get(rate_limit_group).cloned()
2396                 } else {
2397                     None
2398                 };
2399 
2400             let virtio_block = Arc::new(Mutex::new(
2401                 virtio_devices::Block::new(
2402                     id.clone(),
2403                     image,
2404                     disk_cfg
2405                         .path
2406                         .as_ref()
2407                         .ok_or(DeviceManagerError::NoDiskPath)?
2408                         .clone(),
2409                     disk_cfg.readonly,
2410                     self.force_iommu | disk_cfg.iommu,
2411                     disk_cfg.num_queues,
2412                     disk_cfg.queue_size,
2413                     disk_cfg.serial.clone(),
2414                     self.seccomp_action.clone(),
2415                     rate_limit_group,
2416                     self.exit_evt
2417                         .try_clone()
2418                         .map_err(DeviceManagerError::EventFd)?,
2419                     snapshot
2420                         .map(|s| s.to_versioned_state())
2421                         .transpose()
2422                         .map_err(DeviceManagerError::RestoreGetState)?,
2423                 )
2424                 .map_err(DeviceManagerError::CreateVirtioBlock)?,
2425             ));
2426 
2427             (
2428                 Arc::clone(&virtio_block) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2429                 virtio_block as Arc<Mutex<dyn Migratable>>,
2430             )
2431         };
2432 
2433         // Fill the device tree with a new node. In case of restore, we
2434         // know there is nothing to do, so we can simply override the
2435         // existing entry.
2436         self.device_tree
2437             .lock()
2438             .unwrap()
2439             .insert(id.clone(), device_node!(id, migratable_device));
2440 
2441         Ok(MetaVirtioDevice {
2442             virtio_device,
2443             iommu: disk_cfg.iommu,
2444             id,
2445             pci_segment: disk_cfg.pci_segment,
2446             dma_handler: None,
2447         })
2448     }
2449 
2450     fn make_virtio_block_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
2451         let mut devices = Vec::new();
2452 
2453         let mut block_devices = self.config.lock().unwrap().disks.clone();
2454         if let Some(disk_list_cfg) = &mut block_devices {
2455             for disk_cfg in disk_list_cfg.iter_mut() {
2456                 devices.push(self.make_virtio_block_device(disk_cfg)?);
2457             }
2458         }
2459         self.config.lock().unwrap().disks = block_devices;
2460 
2461         Ok(devices)
2462     }
2463 
2464     fn make_virtio_net_device(
2465         &mut self,
2466         net_cfg: &mut NetConfig,
2467     ) -> DeviceManagerResult<MetaVirtioDevice> {
2468         let id = if let Some(id) = &net_cfg.id {
2469             id.clone()
2470         } else {
2471             let id = self.next_device_name(NET_DEVICE_NAME_PREFIX)?;
2472             net_cfg.id = Some(id.clone());
2473             id
2474         };
2475         info!("Creating virtio-net device: {:?}", net_cfg);
2476 
2477         let snapshot = snapshot_from_id(self.snapshot.as_ref(), id.as_str());
2478 
2479         let (virtio_device, migratable_device) = if net_cfg.vhost_user {
2480             let socket = net_cfg.vhost_socket.as_ref().unwrap().clone();
2481             let vu_cfg = VhostUserConfig {
2482                 socket,
2483                 num_queues: net_cfg.num_queues,
2484                 queue_size: net_cfg.queue_size,
2485             };
2486             let server = match net_cfg.vhost_mode {
2487                 VhostMode::Client => false,
2488                 VhostMode::Server => true,
2489             };
2490             let vhost_user_net = Arc::new(Mutex::new(
2491                 match virtio_devices::vhost_user::Net::new(
2492                     id.clone(),
2493                     net_cfg.mac,
2494                     net_cfg.mtu,
2495                     vu_cfg,
2496                     server,
2497                     self.seccomp_action.clone(),
2498                     self.exit_evt
2499                         .try_clone()
2500                         .map_err(DeviceManagerError::EventFd)?,
2501                     self.force_iommu,
2502                     snapshot
2503                         .map(|s| s.to_versioned_state())
2504                         .transpose()
2505                         .map_err(DeviceManagerError::RestoreGetState)?,
2506                     net_cfg.offload_tso,
2507                     net_cfg.offload_ufo,
2508                     net_cfg.offload_csum,
2509                 ) {
2510                     Ok(vun_device) => vun_device,
2511                     Err(e) => {
2512                         return Err(DeviceManagerError::CreateVhostUserNet(e));
2513                     }
2514                 },
2515             ));
2516 
2517             (
2518                 Arc::clone(&vhost_user_net) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2519                 vhost_user_net as Arc<Mutex<dyn Migratable>>,
2520             )
2521         } else {
2522             let state = snapshot
2523                 .map(|s| s.to_versioned_state())
2524                 .transpose()
2525                 .map_err(DeviceManagerError::RestoreGetState)?;
2526 
2527             let virtio_net = if let Some(ref tap_if_name) = net_cfg.tap {
2528                 Arc::new(Mutex::new(
2529                     virtio_devices::Net::new(
2530                         id.clone(),
2531                         Some(tap_if_name),
2532                         Some(net_cfg.ip),
2533                         Some(net_cfg.mask),
2534                         Some(net_cfg.mac),
2535                         &mut net_cfg.host_mac,
2536                         net_cfg.mtu,
2537                         self.force_iommu | net_cfg.iommu,
2538                         net_cfg.num_queues,
2539                         net_cfg.queue_size,
2540                         self.seccomp_action.clone(),
2541                         net_cfg.rate_limiter_config,
2542                         self.exit_evt
2543                             .try_clone()
2544                             .map_err(DeviceManagerError::EventFd)?,
2545                         state,
2546                         net_cfg.offload_tso,
2547                         net_cfg.offload_ufo,
2548                         net_cfg.offload_csum,
2549                     )
2550                     .map_err(DeviceManagerError::CreateVirtioNet)?,
2551                 ))
2552             } else if let Some(fds) = &net_cfg.fds {
2553                 let net = virtio_devices::Net::from_tap_fds(
2554                     id.clone(),
2555                     fds,
2556                     Some(net_cfg.mac),
2557                     net_cfg.mtu,
2558                     self.force_iommu | net_cfg.iommu,
2559                     net_cfg.queue_size,
2560                     self.seccomp_action.clone(),
2561                     net_cfg.rate_limiter_config,
2562                     self.exit_evt
2563                         .try_clone()
2564                         .map_err(DeviceManagerError::EventFd)?,
2565                     state,
2566                     net_cfg.offload_tso,
2567                     net_cfg.offload_ufo,
2568                     net_cfg.offload_csum,
2569                 )
2570                 .map_err(DeviceManagerError::CreateVirtioNet)?;
2571 
2572                 // SAFETY: 'fds' are valid because TAP devices are created successfully
2573                 unsafe {
2574                     self.config.lock().unwrap().add_preserved_fds(fds.clone());
2575                 }
2576 
2577                 Arc::new(Mutex::new(net))
2578             } else {
2579                 Arc::new(Mutex::new(
2580                     virtio_devices::Net::new(
2581                         id.clone(),
2582                         None,
2583                         Some(net_cfg.ip),
2584                         Some(net_cfg.mask),
2585                         Some(net_cfg.mac),
2586                         &mut net_cfg.host_mac,
2587                         net_cfg.mtu,
2588                         self.force_iommu | net_cfg.iommu,
2589                         net_cfg.num_queues,
2590                         net_cfg.queue_size,
2591                         self.seccomp_action.clone(),
2592                         net_cfg.rate_limiter_config,
2593                         self.exit_evt
2594                             .try_clone()
2595                             .map_err(DeviceManagerError::EventFd)?,
2596                         state,
2597                         net_cfg.offload_tso,
2598                         net_cfg.offload_ufo,
2599                         net_cfg.offload_csum,
2600                     )
2601                     .map_err(DeviceManagerError::CreateVirtioNet)?,
2602                 ))
2603             };
2604 
2605             (
2606                 Arc::clone(&virtio_net) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2607                 virtio_net as Arc<Mutex<dyn Migratable>>,
2608             )
2609         };
2610 
2611         // Fill the device tree with a new node. In case of restore, we
2612         // know there is nothing to do, so we can simply override the
2613         // existing entry.
2614         self.device_tree
2615             .lock()
2616             .unwrap()
2617             .insert(id.clone(), device_node!(id, migratable_device));
2618 
2619         Ok(MetaVirtioDevice {
2620             virtio_device,
2621             iommu: net_cfg.iommu,
2622             id,
2623             pci_segment: net_cfg.pci_segment,
2624             dma_handler: None,
2625         })
2626     }
2627 
2628     /// Add virto-net and vhost-user-net devices
2629     fn make_virtio_net_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
2630         let mut devices = Vec::new();
2631         let mut net_devices = self.config.lock().unwrap().net.clone();
2632         if let Some(net_list_cfg) = &mut net_devices {
2633             for net_cfg in net_list_cfg.iter_mut() {
2634                 devices.push(self.make_virtio_net_device(net_cfg)?);
2635             }
2636         }
2637         self.config.lock().unwrap().net = net_devices;
2638 
2639         Ok(devices)
2640     }
2641 
2642     fn make_virtio_rng_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
2643         let mut devices = Vec::new();
2644 
2645         // Add virtio-rng if required
2646         let rng_config = self.config.lock().unwrap().rng.clone();
2647         if let Some(rng_path) = rng_config.src.to_str() {
2648             info!("Creating virtio-rng device: {:?}", rng_config);
2649             let id = String::from(RNG_DEVICE_NAME);
2650 
2651             let virtio_rng_device = Arc::new(Mutex::new(
2652                 virtio_devices::Rng::new(
2653                     id.clone(),
2654                     rng_path,
2655                     self.force_iommu | rng_config.iommu,
2656                     self.seccomp_action.clone(),
2657                     self.exit_evt
2658                         .try_clone()
2659                         .map_err(DeviceManagerError::EventFd)?,
2660                     versioned_state_from_id(self.snapshot.as_ref(), id.as_str())
2661                         .map_err(DeviceManagerError::RestoreGetState)?,
2662                 )
2663                 .map_err(DeviceManagerError::CreateVirtioRng)?,
2664             ));
2665             devices.push(MetaVirtioDevice {
2666                 virtio_device: Arc::clone(&virtio_rng_device)
2667                     as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2668                 iommu: rng_config.iommu,
2669                 id: id.clone(),
2670                 pci_segment: 0,
2671                 dma_handler: None,
2672             });
2673 
2674             // Fill the device tree with a new node. In case of restore, we
2675             // know there is nothing to do, so we can simply override the
2676             // existing entry.
2677             self.device_tree
2678                 .lock()
2679                 .unwrap()
2680                 .insert(id.clone(), device_node!(id, virtio_rng_device));
2681         }
2682 
2683         Ok(devices)
2684     }
2685 
2686     fn make_virtio_fs_device(
2687         &mut self,
2688         fs_cfg: &mut FsConfig,
2689     ) -> DeviceManagerResult<MetaVirtioDevice> {
2690         let id = if let Some(id) = &fs_cfg.id {
2691             id.clone()
2692         } else {
2693             let id = self.next_device_name(FS_DEVICE_NAME_PREFIX)?;
2694             fs_cfg.id = Some(id.clone());
2695             id
2696         };
2697 
2698         info!("Creating virtio-fs device: {:?}", fs_cfg);
2699 
2700         let mut node = device_node!(id);
2701 
2702         if let Some(fs_socket) = fs_cfg.socket.to_str() {
2703             let virtio_fs_device = Arc::new(Mutex::new(
2704                 virtio_devices::vhost_user::Fs::new(
2705                     id.clone(),
2706                     fs_socket,
2707                     &fs_cfg.tag,
2708                     fs_cfg.num_queues,
2709                     fs_cfg.queue_size,
2710                     None,
2711                     self.seccomp_action.clone(),
2712                     self.exit_evt
2713                         .try_clone()
2714                         .map_err(DeviceManagerError::EventFd)?,
2715                     self.force_iommu,
2716                     versioned_state_from_id(self.snapshot.as_ref(), id.as_str())
2717                         .map_err(DeviceManagerError::RestoreGetState)?,
2718                 )
2719                 .map_err(DeviceManagerError::CreateVirtioFs)?,
2720             ));
2721 
2722             // Update the device tree with the migratable device.
2723             node.migratable = Some(Arc::clone(&virtio_fs_device) as Arc<Mutex<dyn Migratable>>);
2724             self.device_tree.lock().unwrap().insert(id.clone(), node);
2725 
2726             Ok(MetaVirtioDevice {
2727                 virtio_device: Arc::clone(&virtio_fs_device)
2728                     as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2729                 iommu: false,
2730                 id,
2731                 pci_segment: fs_cfg.pci_segment,
2732                 dma_handler: None,
2733             })
2734         } else {
2735             Err(DeviceManagerError::NoVirtioFsSock)
2736         }
2737     }
2738 
2739     fn make_virtio_fs_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
2740         let mut devices = Vec::new();
2741 
2742         let mut fs_devices = self.config.lock().unwrap().fs.clone();
2743         if let Some(fs_list_cfg) = &mut fs_devices {
2744             for fs_cfg in fs_list_cfg.iter_mut() {
2745                 devices.push(self.make_virtio_fs_device(fs_cfg)?);
2746             }
2747         }
2748         self.config.lock().unwrap().fs = fs_devices;
2749 
2750         Ok(devices)
2751     }
2752 
2753     fn make_virtio_pmem_device(
2754         &mut self,
2755         pmem_cfg: &mut PmemConfig,
2756     ) -> DeviceManagerResult<MetaVirtioDevice> {
2757         let id = if let Some(id) = &pmem_cfg.id {
2758             id.clone()
2759         } else {
2760             let id = self.next_device_name(PMEM_DEVICE_NAME_PREFIX)?;
2761             pmem_cfg.id = Some(id.clone());
2762             id
2763         };
2764 
2765         info!("Creating virtio-pmem device: {:?}", pmem_cfg);
2766 
2767         let mut node = device_node!(id);
2768 
2769         // Look for the id in the device tree. If it can be found, that means
2770         // the device is being restored, otherwise it's created from scratch.
2771         let region_range = if let Some(node) = self.device_tree.lock().unwrap().get(&id) {
2772             info!("Restoring virtio-pmem {} resources", id);
2773 
2774             let mut region_range: Option<(u64, u64)> = None;
2775             for resource in node.resources.iter() {
2776                 match resource {
2777                     Resource::MmioAddressRange { base, size } => {
2778                         if region_range.is_some() {
2779                             return Err(DeviceManagerError::ResourceAlreadyExists);
2780                         }
2781 
2782                         region_range = Some((*base, *size));
2783                     }
2784                     _ => {
2785                         error!("Unexpected resource {:?} for {}", resource, id);
2786                     }
2787                 }
2788             }
2789 
2790             if region_range.is_none() {
2791                 return Err(DeviceManagerError::MissingVirtioPmemResources);
2792             }
2793 
2794             region_range
2795         } else {
2796             None
2797         };
2798 
2799         let (custom_flags, set_len) = if pmem_cfg.file.is_dir() {
2800             if pmem_cfg.size.is_none() {
2801                 return Err(DeviceManagerError::PmemWithDirectorySizeMissing);
2802             }
2803             (O_TMPFILE, true)
2804         } else {
2805             (0, false)
2806         };
2807 
2808         let mut file = OpenOptions::new()
2809             .read(true)
2810             .write(!pmem_cfg.discard_writes)
2811             .custom_flags(custom_flags)
2812             .open(&pmem_cfg.file)
2813             .map_err(DeviceManagerError::PmemFileOpen)?;
2814 
2815         let size = if let Some(size) = pmem_cfg.size {
2816             if set_len {
2817                 file.set_len(size)
2818                     .map_err(DeviceManagerError::PmemFileSetLen)?;
2819             }
2820             size
2821         } else {
2822             file.seek(SeekFrom::End(0))
2823                 .map_err(DeviceManagerError::PmemFileSetLen)?
2824         };
2825 
2826         if size % 0x20_0000 != 0 {
2827             return Err(DeviceManagerError::PmemSizeNotAligned);
2828         }
2829 
2830         let (region_base, region_size) = if let Some((base, size)) = region_range {
2831             // The memory needs to be 2MiB aligned in order to support
2832             // hugepages.
2833             self.pci_segments[pmem_cfg.pci_segment as usize]
2834                 .mem64_allocator
2835                 .lock()
2836                 .unwrap()
2837                 .allocate(
2838                     Some(GuestAddress(base)),
2839                     size as GuestUsize,
2840                     Some(0x0020_0000),
2841                 )
2842                 .ok_or(DeviceManagerError::PmemRangeAllocation)?;
2843 
2844             (base, size)
2845         } else {
2846             // The memory needs to be 2MiB aligned in order to support
2847             // hugepages.
2848             let base = self.pci_segments[pmem_cfg.pci_segment as usize]
2849                 .mem64_allocator
2850                 .lock()
2851                 .unwrap()
2852                 .allocate(None, size as GuestUsize, Some(0x0020_0000))
2853                 .ok_or(DeviceManagerError::PmemRangeAllocation)?;
2854 
2855             (base.raw_value(), size)
2856         };
2857 
2858         let cloned_file = file.try_clone().map_err(DeviceManagerError::CloneFile)?;
2859         let mmap_region = MmapRegion::build(
2860             Some(FileOffset::new(cloned_file, 0)),
2861             region_size as usize,
2862             PROT_READ | PROT_WRITE,
2863             MAP_NORESERVE
2864                 | if pmem_cfg.discard_writes {
2865                     MAP_PRIVATE
2866                 } else {
2867                     MAP_SHARED
2868                 },
2869         )
2870         .map_err(DeviceManagerError::NewMmapRegion)?;
2871         let host_addr: u64 = mmap_region.as_ptr() as u64;
2872 
2873         let mem_slot = self
2874             .memory_manager
2875             .lock()
2876             .unwrap()
2877             .create_userspace_mapping(region_base, region_size, host_addr, false, false, false)
2878             .map_err(DeviceManagerError::MemoryManager)?;
2879 
2880         let mapping = virtio_devices::UserspaceMapping {
2881             host_addr,
2882             mem_slot,
2883             addr: GuestAddress(region_base),
2884             len: region_size,
2885             mergeable: false,
2886         };
2887 
2888         let virtio_pmem_device = Arc::new(Mutex::new(
2889             virtio_devices::Pmem::new(
2890                 id.clone(),
2891                 file,
2892                 GuestAddress(region_base),
2893                 mapping,
2894                 mmap_region,
2895                 self.force_iommu | pmem_cfg.iommu,
2896                 self.seccomp_action.clone(),
2897                 self.exit_evt
2898                     .try_clone()
2899                     .map_err(DeviceManagerError::EventFd)?,
2900                 versioned_state_from_id(self.snapshot.as_ref(), id.as_str())
2901                     .map_err(DeviceManagerError::RestoreGetState)?,
2902             )
2903             .map_err(DeviceManagerError::CreateVirtioPmem)?,
2904         ));
2905 
2906         // Update the device tree with correct resource information and with
2907         // the migratable device.
2908         node.resources.push(Resource::MmioAddressRange {
2909             base: region_base,
2910             size: region_size,
2911         });
2912         node.migratable = Some(Arc::clone(&virtio_pmem_device) as Arc<Mutex<dyn Migratable>>);
2913         self.device_tree.lock().unwrap().insert(id.clone(), node);
2914 
2915         Ok(MetaVirtioDevice {
2916             virtio_device: Arc::clone(&virtio_pmem_device)
2917                 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2918             iommu: pmem_cfg.iommu,
2919             id,
2920             pci_segment: pmem_cfg.pci_segment,
2921             dma_handler: None,
2922         })
2923     }
2924 
2925     fn make_virtio_pmem_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
2926         let mut devices = Vec::new();
2927         // Add virtio-pmem if required
2928         let mut pmem_devices = self.config.lock().unwrap().pmem.clone();
2929         if let Some(pmem_list_cfg) = &mut pmem_devices {
2930             for pmem_cfg in pmem_list_cfg.iter_mut() {
2931                 devices.push(self.make_virtio_pmem_device(pmem_cfg)?);
2932             }
2933         }
2934         self.config.lock().unwrap().pmem = pmem_devices;
2935 
2936         Ok(devices)
2937     }
2938 
2939     fn make_virtio_vsock_device(
2940         &mut self,
2941         vsock_cfg: &mut VsockConfig,
2942     ) -> DeviceManagerResult<MetaVirtioDevice> {
2943         let id = if let Some(id) = &vsock_cfg.id {
2944             id.clone()
2945         } else {
2946             let id = self.next_device_name(VSOCK_DEVICE_NAME_PREFIX)?;
2947             vsock_cfg.id = Some(id.clone());
2948             id
2949         };
2950 
2951         info!("Creating virtio-vsock device: {:?}", vsock_cfg);
2952 
2953         let socket_path = vsock_cfg
2954             .socket
2955             .to_str()
2956             .ok_or(DeviceManagerError::CreateVsockConvertPath)?;
2957         let backend =
2958             virtio_devices::vsock::VsockUnixBackend::new(vsock_cfg.cid, socket_path.to_string())
2959                 .map_err(DeviceManagerError::CreateVsockBackend)?;
2960 
2961         let vsock_device = Arc::new(Mutex::new(
2962             virtio_devices::Vsock::new(
2963                 id.clone(),
2964                 vsock_cfg.cid,
2965                 vsock_cfg.socket.clone(),
2966                 backend,
2967                 self.force_iommu | vsock_cfg.iommu,
2968                 self.seccomp_action.clone(),
2969                 self.exit_evt
2970                     .try_clone()
2971                     .map_err(DeviceManagerError::EventFd)?,
2972                 versioned_state_from_id(self.snapshot.as_ref(), id.as_str())
2973                     .map_err(DeviceManagerError::RestoreGetState)?,
2974             )
2975             .map_err(DeviceManagerError::CreateVirtioVsock)?,
2976         ));
2977 
2978         // Fill the device tree with a new node. In case of restore, we
2979         // know there is nothing to do, so we can simply override the
2980         // existing entry.
2981         self.device_tree
2982             .lock()
2983             .unwrap()
2984             .insert(id.clone(), device_node!(id, vsock_device));
2985 
2986         Ok(MetaVirtioDevice {
2987             virtio_device: Arc::clone(&vsock_device)
2988                 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2989             iommu: vsock_cfg.iommu,
2990             id,
2991             pci_segment: vsock_cfg.pci_segment,
2992             dma_handler: None,
2993         })
2994     }
2995 
2996     fn make_virtio_vsock_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
2997         let mut devices = Vec::new();
2998 
2999         let mut vsock = self.config.lock().unwrap().vsock.clone();
3000         if let Some(ref mut vsock_cfg) = &mut vsock {
3001             devices.push(self.make_virtio_vsock_device(vsock_cfg)?);
3002         }
3003         self.config.lock().unwrap().vsock = vsock;
3004 
3005         Ok(devices)
3006     }
3007 
3008     fn make_virtio_mem_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
3009         let mut devices = Vec::new();
3010 
3011         let mm = self.memory_manager.clone();
3012         let mut mm = mm.lock().unwrap();
3013         for (memory_zone_id, memory_zone) in mm.memory_zones_mut().iter_mut() {
3014             if let Some(virtio_mem_zone) = memory_zone.virtio_mem_zone_mut() {
3015                 info!("Creating virtio-mem device: id = {}", memory_zone_id);
3016 
3017                 let node_id = numa_node_id_from_memory_zone_id(&self.numa_nodes, memory_zone_id)
3018                     .map(|i| i as u16);
3019 
3020                 let virtio_mem_device = Arc::new(Mutex::new(
3021                     virtio_devices::Mem::new(
3022                         memory_zone_id.clone(),
3023                         virtio_mem_zone.region(),
3024                         self.seccomp_action.clone(),
3025                         node_id,
3026                         virtio_mem_zone.hotplugged_size(),
3027                         virtio_mem_zone.hugepages(),
3028                         self.exit_evt
3029                             .try_clone()
3030                             .map_err(DeviceManagerError::EventFd)?,
3031                         virtio_mem_zone.blocks_state().clone(),
3032                         versioned_state_from_id(self.snapshot.as_ref(), memory_zone_id.as_str())
3033                             .map_err(DeviceManagerError::RestoreGetState)?,
3034                     )
3035                     .map_err(DeviceManagerError::CreateVirtioMem)?,
3036                 ));
3037 
3038                 // Update the virtio-mem zone so that it has a handle onto the
3039                 // virtio-mem device, which will be used for triggering a resize
3040                 // if needed.
3041                 virtio_mem_zone.set_virtio_device(Arc::clone(&virtio_mem_device));
3042 
3043                 self.virtio_mem_devices.push(Arc::clone(&virtio_mem_device));
3044 
3045                 devices.push(MetaVirtioDevice {
3046                     virtio_device: Arc::clone(&virtio_mem_device)
3047                         as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
3048                     iommu: false,
3049                     id: memory_zone_id.clone(),
3050                     pci_segment: 0,
3051                     dma_handler: None,
3052                 });
3053 
3054                 // Fill the device tree with a new node. In case of restore, we
3055                 // know there is nothing to do, so we can simply override the
3056                 // existing entry.
3057                 self.device_tree.lock().unwrap().insert(
3058                     memory_zone_id.clone(),
3059                     device_node!(memory_zone_id, virtio_mem_device),
3060                 );
3061             }
3062         }
3063 
3064         Ok(devices)
3065     }
3066 
3067     fn make_virtio_balloon_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
3068         let mut devices = Vec::new();
3069 
3070         if let Some(balloon_config) = &self.config.lock().unwrap().balloon {
3071             let id = String::from(BALLOON_DEVICE_NAME);
3072             info!("Creating virtio-balloon device: id = {}", id);
3073 
3074             let virtio_balloon_device = Arc::new(Mutex::new(
3075                 virtio_devices::Balloon::new(
3076                     id.clone(),
3077                     balloon_config.size,
3078                     balloon_config.deflate_on_oom,
3079                     balloon_config.free_page_reporting,
3080                     self.seccomp_action.clone(),
3081                     self.exit_evt
3082                         .try_clone()
3083                         .map_err(DeviceManagerError::EventFd)?,
3084                     versioned_state_from_id(self.snapshot.as_ref(), id.as_str())
3085                         .map_err(DeviceManagerError::RestoreGetState)?,
3086                 )
3087                 .map_err(DeviceManagerError::CreateVirtioBalloon)?,
3088             ));
3089 
3090             self.balloon = Some(virtio_balloon_device.clone());
3091 
3092             devices.push(MetaVirtioDevice {
3093                 virtio_device: Arc::clone(&virtio_balloon_device)
3094                     as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
3095                 iommu: false,
3096                 id: id.clone(),
3097                 pci_segment: 0,
3098                 dma_handler: None,
3099             });
3100 
3101             self.device_tree
3102                 .lock()
3103                 .unwrap()
3104                 .insert(id.clone(), device_node!(id, virtio_balloon_device));
3105         }
3106 
3107         Ok(devices)
3108     }
3109 
3110     fn make_virtio_watchdog_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
3111         let mut devices = Vec::new();
3112 
3113         if !self.config.lock().unwrap().watchdog {
3114             return Ok(devices);
3115         }
3116 
3117         let id = String::from(WATCHDOG_DEVICE_NAME);
3118         info!("Creating virtio-watchdog device: id = {}", id);
3119 
3120         let virtio_watchdog_device = Arc::new(Mutex::new(
3121             virtio_devices::Watchdog::new(
3122                 id.clone(),
3123                 self.reset_evt.try_clone().unwrap(),
3124                 self.seccomp_action.clone(),
3125                 self.exit_evt
3126                     .try_clone()
3127                     .map_err(DeviceManagerError::EventFd)?,
3128                 versioned_state_from_id(self.snapshot.as_ref(), id.as_str())
3129                     .map_err(DeviceManagerError::RestoreGetState)?,
3130             )
3131             .map_err(DeviceManagerError::CreateVirtioWatchdog)?,
3132         ));
3133         devices.push(MetaVirtioDevice {
3134             virtio_device: Arc::clone(&virtio_watchdog_device)
3135                 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
3136             iommu: false,
3137             id: id.clone(),
3138             pci_segment: 0,
3139             dma_handler: None,
3140         });
3141 
3142         self.device_tree
3143             .lock()
3144             .unwrap()
3145             .insert(id.clone(), device_node!(id, virtio_watchdog_device));
3146 
3147         Ok(devices)
3148     }
3149 
3150     fn make_vdpa_device(
3151         &mut self,
3152         vdpa_cfg: &mut VdpaConfig,
3153     ) -> DeviceManagerResult<MetaVirtioDevice> {
3154         let id = if let Some(id) = &vdpa_cfg.id {
3155             id.clone()
3156         } else {
3157             let id = self.next_device_name(VDPA_DEVICE_NAME_PREFIX)?;
3158             vdpa_cfg.id = Some(id.clone());
3159             id
3160         };
3161 
3162         info!("Creating vDPA device: {:?}", vdpa_cfg);
3163 
3164         let device_path = vdpa_cfg
3165             .path
3166             .to_str()
3167             .ok_or(DeviceManagerError::CreateVdpaConvertPath)?;
3168 
3169         let vdpa_device = Arc::new(Mutex::new(
3170             virtio_devices::Vdpa::new(
3171                 id.clone(),
3172                 device_path,
3173                 self.memory_manager.lock().unwrap().guest_memory(),
3174                 vdpa_cfg.num_queues as u16,
3175                 versioned_state_from_id(self.snapshot.as_ref(), id.as_str())
3176                     .map_err(DeviceManagerError::RestoreGetState)?,
3177             )
3178             .map_err(DeviceManagerError::CreateVdpa)?,
3179         ));
3180 
3181         // Create the DMA handler that is required by the vDPA device
3182         let vdpa_mapping = Arc::new(VdpaDmaMapping::new(
3183             Arc::clone(&vdpa_device),
3184             Arc::new(self.memory_manager.lock().unwrap().guest_memory()),
3185         ));
3186 
3187         self.device_tree
3188             .lock()
3189             .unwrap()
3190             .insert(id.clone(), device_node!(id, vdpa_device));
3191 
3192         Ok(MetaVirtioDevice {
3193             virtio_device: vdpa_device as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
3194             iommu: vdpa_cfg.iommu,
3195             id,
3196             pci_segment: vdpa_cfg.pci_segment,
3197             dma_handler: Some(vdpa_mapping),
3198         })
3199     }
3200 
3201     fn make_vdpa_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
3202         let mut devices = Vec::new();
3203         // Add vdpa if required
3204         let mut vdpa_devices = self.config.lock().unwrap().vdpa.clone();
3205         if let Some(vdpa_list_cfg) = &mut vdpa_devices {
3206             for vdpa_cfg in vdpa_list_cfg.iter_mut() {
3207                 devices.push(self.make_vdpa_device(vdpa_cfg)?);
3208             }
3209         }
3210         self.config.lock().unwrap().vdpa = vdpa_devices;
3211 
3212         Ok(devices)
3213     }
3214 
3215     fn next_device_name(&mut self, prefix: &str) -> DeviceManagerResult<String> {
3216         let start_id = self.device_id_cnt;
3217         loop {
3218             // Generate the temporary name.
3219             let name = format!("{}{}", prefix, self.device_id_cnt);
3220             // Increment the counter.
3221             self.device_id_cnt += Wrapping(1);
3222             // Check if the name is already in use.
3223             if !self.boot_id_list.contains(&name)
3224                 && !self.device_tree.lock().unwrap().contains_key(&name)
3225             {
3226                 return Ok(name);
3227             }
3228 
3229             if self.device_id_cnt == start_id {
3230                 // We went through a full loop and there's nothing else we can
3231                 // do.
3232                 break;
3233             }
3234         }
3235         Err(DeviceManagerError::NoAvailableDeviceName)
3236     }
3237 
3238     fn add_passthrough_device(
3239         &mut self,
3240         device_cfg: &mut DeviceConfig,
3241     ) -> DeviceManagerResult<(PciBdf, String)> {
3242         // If the passthrough device has not been created yet, it is created
3243         // here and stored in the DeviceManager structure for future needs.
3244         if self.passthrough_device.is_none() {
3245             self.passthrough_device = Some(
3246                 self.address_manager
3247                     .vm
3248                     .create_passthrough_device()
3249                     .map_err(|e| DeviceManagerError::CreatePassthroughDevice(e.into()))?,
3250             );
3251         }
3252 
3253         self.add_vfio_device(device_cfg)
3254     }
3255 
3256     fn create_vfio_container(&self) -> DeviceManagerResult<Arc<VfioContainer>> {
3257         let passthrough_device = self
3258             .passthrough_device
3259             .as_ref()
3260             .ok_or(DeviceManagerError::NoDevicePassthroughSupport)?;
3261 
3262         let dup = passthrough_device
3263             .try_clone()
3264             .map_err(DeviceManagerError::VfioCreate)?;
3265 
3266         Ok(Arc::new(
3267             VfioContainer::new(Some(Arc::new(dup))).map_err(DeviceManagerError::VfioCreate)?,
3268         ))
3269     }
3270 
3271     fn add_vfio_device(
3272         &mut self,
3273         device_cfg: &mut DeviceConfig,
3274     ) -> DeviceManagerResult<(PciBdf, String)> {
3275         let vfio_name = if let Some(id) = &device_cfg.id {
3276             id.clone()
3277         } else {
3278             let id = self.next_device_name(VFIO_DEVICE_NAME_PREFIX)?;
3279             device_cfg.id = Some(id.clone());
3280             id
3281         };
3282 
3283         let (pci_segment_id, pci_device_bdf, resources) =
3284             self.pci_resources(&vfio_name, device_cfg.pci_segment)?;
3285 
3286         let mut needs_dma_mapping = false;
3287 
3288         // Here we create a new VFIO container for two reasons. Either this is
3289         // the first VFIO device, meaning we need a new VFIO container, which
3290         // will be shared with other VFIO devices. Or the new VFIO device is
3291         // attached to a vIOMMU, meaning we must create a dedicated VFIO
3292         // container. In the vIOMMU use case, we can't let all devices under
3293         // the same VFIO container since we couldn't map/unmap memory for each
3294         // device. That's simply because the map/unmap operations happen at the
3295         // VFIO container level.
3296         let vfio_container = if device_cfg.iommu {
3297             let vfio_container = self.create_vfio_container()?;
3298 
3299             let vfio_mapping = Arc::new(VfioDmaMapping::new(
3300                 Arc::clone(&vfio_container),
3301                 Arc::new(self.memory_manager.lock().unwrap().guest_memory()),
3302             ));
3303 
3304             if let Some(iommu) = &self.iommu_device {
3305                 iommu
3306                     .lock()
3307                     .unwrap()
3308                     .add_external_mapping(pci_device_bdf.into(), vfio_mapping);
3309             } else {
3310                 return Err(DeviceManagerError::MissingVirtualIommu);
3311             }
3312 
3313             vfio_container
3314         } else if let Some(vfio_container) = &self.vfio_container {
3315             Arc::clone(vfio_container)
3316         } else {
3317             let vfio_container = self.create_vfio_container()?;
3318             needs_dma_mapping = true;
3319             self.vfio_container = Some(Arc::clone(&vfio_container));
3320 
3321             vfio_container
3322         };
3323 
3324         let vfio_device = VfioDevice::new(&device_cfg.path, Arc::clone(&vfio_container))
3325             .map_err(DeviceManagerError::VfioCreate)?;
3326 
3327         if needs_dma_mapping {
3328             // Register DMA mapping in IOMMU.
3329             // Do not register virtio-mem regions, as they are handled directly by
3330             // virtio-mem device itself.
3331             for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() {
3332                 for region in zone.regions() {
3333                     vfio_container
3334                         .vfio_dma_map(
3335                             region.start_addr().raw_value(),
3336                             region.len(),
3337                             region.as_ptr() as u64,
3338                         )
3339                         .map_err(DeviceManagerError::VfioDmaMap)?;
3340                 }
3341             }
3342 
3343             let vfio_mapping = Arc::new(VfioDmaMapping::new(
3344                 Arc::clone(&vfio_container),
3345                 Arc::new(self.memory_manager.lock().unwrap().guest_memory()),
3346             ));
3347 
3348             for virtio_mem_device in self.virtio_mem_devices.iter() {
3349                 virtio_mem_device
3350                     .lock()
3351                     .unwrap()
3352                     .add_dma_mapping_handler(
3353                         VirtioMemMappingSource::Container,
3354                         vfio_mapping.clone(),
3355                     )
3356                     .map_err(DeviceManagerError::AddDmaMappingHandlerVirtioMem)?;
3357             }
3358         }
3359 
3360         let legacy_interrupt_group =
3361             if let Some(legacy_interrupt_manager) = &self.legacy_interrupt_manager {
3362                 Some(
3363                     legacy_interrupt_manager
3364                         .create_group(LegacyIrqGroupConfig {
3365                             irq: self.pci_segments[pci_segment_id as usize].pci_irq_slots
3366                                 [pci_device_bdf.device() as usize]
3367                                 as InterruptIndex,
3368                         })
3369                         .map_err(DeviceManagerError::CreateInterruptGroup)?,
3370                 )
3371             } else {
3372                 None
3373             };
3374 
3375         let memory_manager = self.memory_manager.clone();
3376 
3377         let vfio_pci_device = VfioPciDevice::new(
3378             vfio_name.clone(),
3379             &self.address_manager.vm,
3380             vfio_device,
3381             vfio_container,
3382             self.msi_interrupt_manager.clone(),
3383             legacy_interrupt_group,
3384             device_cfg.iommu,
3385             pci_device_bdf,
3386             Arc::new(move || memory_manager.lock().unwrap().allocate_memory_slot()),
3387             vm_migration::snapshot_from_id(self.snapshot.as_ref(), vfio_name.as_str()),
3388         )
3389         .map_err(DeviceManagerError::VfioPciCreate)?;
3390 
3391         let vfio_pci_device = Arc::new(Mutex::new(vfio_pci_device));
3392 
3393         let new_resources = self.add_pci_device(
3394             vfio_pci_device.clone(),
3395             vfio_pci_device.clone(),
3396             pci_segment_id,
3397             pci_device_bdf,
3398             resources,
3399         )?;
3400 
3401         vfio_pci_device
3402             .lock()
3403             .unwrap()
3404             .map_mmio_regions()
3405             .map_err(DeviceManagerError::VfioMapRegion)?;
3406 
3407         let mut node = device_node!(vfio_name, vfio_pci_device);
3408 
3409         // Update the device tree with correct resource information.
3410         node.resources = new_resources;
3411         node.pci_bdf = Some(pci_device_bdf);
3412         node.pci_device_handle = Some(PciDeviceHandle::Vfio(vfio_pci_device));
3413 
3414         self.device_tree
3415             .lock()
3416             .unwrap()
3417             .insert(vfio_name.clone(), node);
3418 
3419         Ok((pci_device_bdf, vfio_name))
3420     }
3421 
3422     fn add_pci_device(
3423         &mut self,
3424         bus_device: Arc<Mutex<dyn BusDevice>>,
3425         pci_device: Arc<Mutex<dyn PciDevice>>,
3426         segment_id: u16,
3427         bdf: PciBdf,
3428         resources: Option<Vec<Resource>>,
3429     ) -> DeviceManagerResult<Vec<Resource>> {
3430         let bars = pci_device
3431             .lock()
3432             .unwrap()
3433             .allocate_bars(
3434                 &self.address_manager.allocator,
3435                 &mut self.pci_segments[segment_id as usize]
3436                     .mem32_allocator
3437                     .lock()
3438                     .unwrap(),
3439                 &mut self.pci_segments[segment_id as usize]
3440                     .mem64_allocator
3441                     .lock()
3442                     .unwrap(),
3443                 resources,
3444             )
3445             .map_err(DeviceManagerError::AllocateBars)?;
3446 
3447         let mut pci_bus = self.pci_segments[segment_id as usize]
3448             .pci_bus
3449             .lock()
3450             .unwrap();
3451 
3452         pci_bus
3453             .add_device(bdf.device() as u32, pci_device)
3454             .map_err(DeviceManagerError::AddPciDevice)?;
3455 
3456         self.bus_devices.push(Arc::clone(&bus_device));
3457 
3458         pci_bus
3459             .register_mapping(
3460                 bus_device,
3461                 #[cfg(target_arch = "x86_64")]
3462                 self.address_manager.io_bus.as_ref(),
3463                 self.address_manager.mmio_bus.as_ref(),
3464                 bars.clone(),
3465             )
3466             .map_err(DeviceManagerError::AddPciDevice)?;
3467 
3468         let mut new_resources = Vec::new();
3469         for bar in bars {
3470             new_resources.push(Resource::PciBar {
3471                 index: bar.idx(),
3472                 base: bar.addr(),
3473                 size: bar.size(),
3474                 type_: bar.region_type().into(),
3475                 prefetchable: bar.prefetchable().into(),
3476             });
3477         }
3478 
3479         Ok(new_resources)
3480     }
3481 
3482     fn add_vfio_devices(&mut self) -> DeviceManagerResult<Vec<PciBdf>> {
3483         let mut iommu_attached_device_ids = Vec::new();
3484         let mut devices = self.config.lock().unwrap().devices.clone();
3485 
3486         if let Some(device_list_cfg) = &mut devices {
3487             for device_cfg in device_list_cfg.iter_mut() {
3488                 let (device_id, _) = self.add_passthrough_device(device_cfg)?;
3489                 if device_cfg.iommu && self.iommu_device.is_some() {
3490                     iommu_attached_device_ids.push(device_id);
3491                 }
3492             }
3493         }
3494 
3495         // Update the list of devices
3496         self.config.lock().unwrap().devices = devices;
3497 
3498         Ok(iommu_attached_device_ids)
3499     }
3500 
3501     fn add_vfio_user_device(
3502         &mut self,
3503         device_cfg: &mut UserDeviceConfig,
3504     ) -> DeviceManagerResult<(PciBdf, String)> {
3505         let vfio_user_name = if let Some(id) = &device_cfg.id {
3506             id.clone()
3507         } else {
3508             let id = self.next_device_name(VFIO_USER_DEVICE_NAME_PREFIX)?;
3509             device_cfg.id = Some(id.clone());
3510             id
3511         };
3512 
3513         let (pci_segment_id, pci_device_bdf, resources) =
3514             self.pci_resources(&vfio_user_name, device_cfg.pci_segment)?;
3515 
3516         let legacy_interrupt_group =
3517             if let Some(legacy_interrupt_manager) = &self.legacy_interrupt_manager {
3518                 Some(
3519                     legacy_interrupt_manager
3520                         .create_group(LegacyIrqGroupConfig {
3521                             irq: self.pci_segments[pci_segment_id as usize].pci_irq_slots
3522                                 [pci_device_bdf.device() as usize]
3523                                 as InterruptIndex,
3524                         })
3525                         .map_err(DeviceManagerError::CreateInterruptGroup)?,
3526                 )
3527             } else {
3528                 None
3529             };
3530 
3531         let client = Arc::new(Mutex::new(
3532             vfio_user::Client::new(&device_cfg.socket)
3533                 .map_err(DeviceManagerError::VfioUserCreateClient)?,
3534         ));
3535 
3536         let memory_manager = self.memory_manager.clone();
3537 
3538         let mut vfio_user_pci_device = VfioUserPciDevice::new(
3539             vfio_user_name.clone(),
3540             &self.address_manager.vm,
3541             client.clone(),
3542             self.msi_interrupt_manager.clone(),
3543             legacy_interrupt_group,
3544             pci_device_bdf,
3545             Arc::new(move || memory_manager.lock().unwrap().allocate_memory_slot()),
3546             vm_migration::snapshot_from_id(self.snapshot.as_ref(), vfio_user_name.as_str()),
3547         )
3548         .map_err(DeviceManagerError::VfioUserCreate)?;
3549 
3550         let memory = self.memory_manager.lock().unwrap().guest_memory();
3551         let vfio_user_mapping = Arc::new(VfioUserDmaMapping::new(client, Arc::new(memory)));
3552         for virtio_mem_device in self.virtio_mem_devices.iter() {
3553             virtio_mem_device
3554                 .lock()
3555                 .unwrap()
3556                 .add_dma_mapping_handler(
3557                     VirtioMemMappingSource::Device(pci_device_bdf.into()),
3558                     vfio_user_mapping.clone(),
3559                 )
3560                 .map_err(DeviceManagerError::AddDmaMappingHandlerVirtioMem)?;
3561         }
3562 
3563         for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() {
3564             for region in zone.regions() {
3565                 vfio_user_pci_device
3566                     .dma_map(region)
3567                     .map_err(DeviceManagerError::VfioUserDmaMap)?;
3568             }
3569         }
3570 
3571         let vfio_user_pci_device = Arc::new(Mutex::new(vfio_user_pci_device));
3572 
3573         let new_resources = self.add_pci_device(
3574             vfio_user_pci_device.clone(),
3575             vfio_user_pci_device.clone(),
3576             pci_segment_id,
3577             pci_device_bdf,
3578             resources,
3579         )?;
3580 
3581         // Note it is required to call 'add_pci_device()' in advance to have the list of
3582         // mmio regions provisioned correctly
3583         vfio_user_pci_device
3584             .lock()
3585             .unwrap()
3586             .map_mmio_regions()
3587             .map_err(DeviceManagerError::VfioUserMapRegion)?;
3588 
3589         let mut node = device_node!(vfio_user_name, vfio_user_pci_device);
3590 
3591         // Update the device tree with correct resource information.
3592         node.resources = new_resources;
3593         node.pci_bdf = Some(pci_device_bdf);
3594         node.pci_device_handle = Some(PciDeviceHandle::VfioUser(vfio_user_pci_device));
3595 
3596         self.device_tree
3597             .lock()
3598             .unwrap()
3599             .insert(vfio_user_name.clone(), node);
3600 
3601         Ok((pci_device_bdf, vfio_user_name))
3602     }
3603 
3604     fn add_user_devices(&mut self) -> DeviceManagerResult<Vec<PciBdf>> {
3605         let mut user_devices = self.config.lock().unwrap().user_devices.clone();
3606 
3607         if let Some(device_list_cfg) = &mut user_devices {
3608             for device_cfg in device_list_cfg.iter_mut() {
3609                 let (_device_id, _id) = self.add_vfio_user_device(device_cfg)?;
3610             }
3611         }
3612 
3613         // Update the list of devices
3614         self.config.lock().unwrap().user_devices = user_devices;
3615 
3616         Ok(vec![])
3617     }
3618 
3619     fn add_virtio_pci_device(
3620         &mut self,
3621         virtio_device: Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
3622         iommu_mapping: &Option<Arc<IommuMapping>>,
3623         virtio_device_id: String,
3624         pci_segment_id: u16,
3625         dma_handler: Option<Arc<dyn ExternalDmaMapping>>,
3626     ) -> DeviceManagerResult<PciBdf> {
3627         let id = format!("{VIRTIO_PCI_DEVICE_NAME_PREFIX}-{virtio_device_id}");
3628 
3629         // Add the new virtio-pci node to the device tree.
3630         let mut node = device_node!(id);
3631         node.children = vec![virtio_device_id.clone()];
3632 
3633         let (pci_segment_id, pci_device_bdf, resources) =
3634             self.pci_resources(&id, pci_segment_id)?;
3635 
3636         // Update the existing virtio node by setting the parent.
3637         if let Some(node) = self.device_tree.lock().unwrap().get_mut(&virtio_device_id) {
3638             node.parent = Some(id.clone());
3639         } else {
3640             return Err(DeviceManagerError::MissingNode);
3641         }
3642 
3643         // Allows support for one MSI-X vector per queue. It also adds 1
3644         // as we need to take into account the dedicated vector to notify
3645         // about a virtio config change.
3646         let msix_num = (virtio_device.lock().unwrap().queue_max_sizes().len() + 1) as u16;
3647 
3648         // Create the AccessPlatform trait from the implementation IommuMapping.
3649         // This will provide address translation for any virtio device sitting
3650         // behind a vIOMMU.
3651         let access_platform: Option<Arc<dyn AccessPlatform>> = if let Some(mapping) = iommu_mapping
3652         {
3653             Some(Arc::new(AccessPlatformMapping::new(
3654                 pci_device_bdf.into(),
3655                 mapping.clone(),
3656             )))
3657         } else {
3658             None
3659         };
3660 
3661         let memory = self.memory_manager.lock().unwrap().guest_memory();
3662 
3663         // Map DMA ranges if a DMA handler is available and if the device is
3664         // not attached to a virtual IOMMU.
3665         if let Some(dma_handler) = &dma_handler {
3666             if iommu_mapping.is_some() {
3667                 if let Some(iommu) = &self.iommu_device {
3668                     iommu
3669                         .lock()
3670                         .unwrap()
3671                         .add_external_mapping(pci_device_bdf.into(), dma_handler.clone());
3672                 } else {
3673                     return Err(DeviceManagerError::MissingVirtualIommu);
3674                 }
3675             } else {
3676                 // Let every virtio-mem device handle the DMA map/unmap through the
3677                 // DMA handler provided.
3678                 for virtio_mem_device in self.virtio_mem_devices.iter() {
3679                     virtio_mem_device
3680                         .lock()
3681                         .unwrap()
3682                         .add_dma_mapping_handler(
3683                             VirtioMemMappingSource::Device(pci_device_bdf.into()),
3684                             dma_handler.clone(),
3685                         )
3686                         .map_err(DeviceManagerError::AddDmaMappingHandlerVirtioMem)?;
3687                 }
3688 
3689                 // Do not register virtio-mem regions, as they are handled directly by
3690                 // virtio-mem devices.
3691                 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() {
3692                     for region in zone.regions() {
3693                         let gpa = region.start_addr().0;
3694                         let size = region.len();
3695                         dma_handler
3696                             .map(gpa, gpa, size)
3697                             .map_err(DeviceManagerError::VirtioDmaMap)?;
3698                     }
3699                 }
3700             }
3701         }
3702 
3703         let device_type = virtio_device.lock().unwrap().device_type();
3704         let virtio_pci_device = Arc::new(Mutex::new(
3705             VirtioPciDevice::new(
3706                 id.clone(),
3707                 memory,
3708                 virtio_device,
3709                 msix_num,
3710                 access_platform,
3711                 &self.msi_interrupt_manager,
3712                 pci_device_bdf.into(),
3713                 self.activate_evt
3714                     .try_clone()
3715                     .map_err(DeviceManagerError::EventFd)?,
3716                 // All device types *except* virtio block devices should be allocated a 64-bit bar
3717                 // The block devices should be given a 32-bit BAR so that they are easily accessible
3718                 // to firmware without requiring excessive identity mapping.
3719                 // The exception being if not on the default PCI segment.
3720                 pci_segment_id > 0 || device_type != VirtioDeviceType::Block as u32,
3721                 dma_handler,
3722                 self.pending_activations.clone(),
3723                 vm_migration::snapshot_from_id(self.snapshot.as_ref(), id.as_str()),
3724             )
3725             .map_err(DeviceManagerError::VirtioDevice)?,
3726         ));
3727 
3728         let new_resources = self.add_pci_device(
3729             virtio_pci_device.clone(),
3730             virtio_pci_device.clone(),
3731             pci_segment_id,
3732             pci_device_bdf,
3733             resources,
3734         )?;
3735 
3736         let bar_addr = virtio_pci_device.lock().unwrap().config_bar_addr();
3737         for (event, addr) in virtio_pci_device.lock().unwrap().ioeventfds(bar_addr) {
3738             let io_addr = IoEventAddress::Mmio(addr);
3739             self.address_manager
3740                 .vm
3741                 .register_ioevent(event, &io_addr, None)
3742                 .map_err(|e| DeviceManagerError::RegisterIoevent(e.into()))?;
3743         }
3744 
3745         // Update the device tree with correct resource information.
3746         node.resources = new_resources;
3747         node.migratable = Some(Arc::clone(&virtio_pci_device) as Arc<Mutex<dyn Migratable>>);
3748         node.pci_bdf = Some(pci_device_bdf);
3749         node.pci_device_handle = Some(PciDeviceHandle::Virtio(virtio_pci_device));
3750         self.device_tree.lock().unwrap().insert(id, node);
3751 
3752         Ok(pci_device_bdf)
3753     }
3754 
3755     fn add_pvpanic_device(
3756         &mut self,
3757     ) -> DeviceManagerResult<Option<Arc<Mutex<devices::PvPanicDevice>>>> {
3758         let id = String::from(PVPANIC_DEVICE_NAME);
3759         let pci_segment_id = 0x0_u16;
3760 
3761         info!("Creating pvpanic device {}", id);
3762 
3763         let (pci_segment_id, pci_device_bdf, resources) =
3764             self.pci_resources(&id, pci_segment_id)?;
3765 
3766         let snapshot = snapshot_from_id(self.snapshot.as_ref(), id.as_str());
3767 
3768         let pvpanic_device = devices::PvPanicDevice::new(id.clone(), snapshot)
3769             .map_err(DeviceManagerError::PvPanicCreate)?;
3770 
3771         let pvpanic_device = Arc::new(Mutex::new(pvpanic_device));
3772 
3773         let new_resources = self.add_pci_device(
3774             pvpanic_device.clone(),
3775             pvpanic_device.clone(),
3776             pci_segment_id,
3777             pci_device_bdf,
3778             resources,
3779         )?;
3780 
3781         let mut node = device_node!(id, pvpanic_device);
3782 
3783         node.resources = new_resources;
3784         node.pci_bdf = Some(pci_device_bdf);
3785         node.pci_device_handle = None;
3786 
3787         self.device_tree.lock().unwrap().insert(id, node);
3788 
3789         Ok(Some(pvpanic_device))
3790     }
3791 
3792     fn pci_resources(
3793         &self,
3794         id: &str,
3795         pci_segment_id: u16,
3796     ) -> DeviceManagerResult<(u16, PciBdf, Option<Vec<Resource>>)> {
3797         // Look for the id in the device tree. If it can be found, that means
3798         // the device is being restored, otherwise it's created from scratch.
3799         Ok(
3800             if let Some(node) = self.device_tree.lock().unwrap().get(id) {
3801                 info!("Restoring virtio-pci {} resources", id);
3802                 let pci_device_bdf: PciBdf = node
3803                     .pci_bdf
3804                     .ok_or(DeviceManagerError::MissingDeviceNodePciBdf)?;
3805                 let pci_segment_id = pci_device_bdf.segment();
3806 
3807                 self.pci_segments[pci_segment_id as usize]
3808                     .pci_bus
3809                     .lock()
3810                     .unwrap()
3811                     .get_device_id(pci_device_bdf.device() as usize)
3812                     .map_err(DeviceManagerError::GetPciDeviceId)?;
3813 
3814                 (pci_segment_id, pci_device_bdf, Some(node.resources.clone()))
3815             } else {
3816                 let pci_device_bdf =
3817                     self.pci_segments[pci_segment_id as usize].next_device_bdf()?;
3818 
3819                 (pci_segment_id, pci_device_bdf, None)
3820             },
3821         )
3822     }
3823 
3824     #[cfg(target_arch = "x86_64")]
3825     pub fn io_bus(&self) -> &Arc<Bus> {
3826         &self.address_manager.io_bus
3827     }
3828 
3829     pub fn mmio_bus(&self) -> &Arc<Bus> {
3830         &self.address_manager.mmio_bus
3831     }
3832 
3833     pub fn allocator(&self) -> &Arc<Mutex<SystemAllocator>> {
3834         &self.address_manager.allocator
3835     }
3836 
3837     pub fn interrupt_controller(&self) -> Option<Arc<Mutex<dyn InterruptController>>> {
3838         self.interrupt_controller
3839             .as_ref()
3840             .map(|ic| ic.clone() as Arc<Mutex<dyn InterruptController>>)
3841     }
3842 
3843     pub(crate) fn pci_segments(&self) -> &Vec<PciSegment> {
3844         &self.pci_segments
3845     }
3846 
3847     pub fn console(&self) -> &Arc<Console> {
3848         &self.console
3849     }
3850 
3851     #[cfg(target_arch = "aarch64")]
3852     pub fn cmdline_additions(&self) -> &[String] {
3853         self.cmdline_additions.as_slice()
3854     }
3855 
3856     pub fn update_memory(&self, new_region: &Arc<GuestRegionMmap>) -> DeviceManagerResult<()> {
3857         for handle in self.virtio_devices.iter() {
3858             handle
3859                 .virtio_device
3860                 .lock()
3861                 .unwrap()
3862                 .add_memory_region(new_region)
3863                 .map_err(DeviceManagerError::UpdateMemoryForVirtioDevice)?;
3864 
3865             if let Some(dma_handler) = &handle.dma_handler {
3866                 if !handle.iommu {
3867                     let gpa = new_region.start_addr().0;
3868                     let size = new_region.len();
3869                     dma_handler
3870                         .map(gpa, gpa, size)
3871                         .map_err(DeviceManagerError::VirtioDmaMap)?;
3872                 }
3873             }
3874         }
3875 
3876         // Take care of updating the memory for VFIO PCI devices.
3877         if let Some(vfio_container) = &self.vfio_container {
3878             vfio_container
3879                 .vfio_dma_map(
3880                     new_region.start_addr().raw_value(),
3881                     new_region.len(),
3882                     new_region.as_ptr() as u64,
3883                 )
3884                 .map_err(DeviceManagerError::UpdateMemoryForVfioPciDevice)?;
3885         }
3886 
3887         // Take care of updating the memory for vfio-user devices.
3888         {
3889             let device_tree = self.device_tree.lock().unwrap();
3890             for pci_device_node in device_tree.pci_devices() {
3891                 if let PciDeviceHandle::VfioUser(vfio_user_pci_device) = pci_device_node
3892                     .pci_device_handle
3893                     .as_ref()
3894                     .ok_or(DeviceManagerError::MissingPciDevice)?
3895                 {
3896                     vfio_user_pci_device
3897                         .lock()
3898                         .unwrap()
3899                         .dma_map(new_region)
3900                         .map_err(DeviceManagerError::UpdateMemoryForVfioUserPciDevice)?;
3901                 }
3902             }
3903         }
3904 
3905         Ok(())
3906     }
3907 
3908     pub fn activate_virtio_devices(&self) -> DeviceManagerResult<()> {
3909         for mut activator in self.pending_activations.lock().unwrap().drain(..) {
3910             activator
3911                 .activate()
3912                 .map_err(DeviceManagerError::VirtioActivate)?;
3913         }
3914         Ok(())
3915     }
3916 
3917     pub fn notify_hotplug(
3918         &self,
3919         _notification_type: AcpiNotificationFlags,
3920     ) -> DeviceManagerResult<()> {
3921         return self
3922             .ged_notification_device
3923             .as_ref()
3924             .unwrap()
3925             .lock()
3926             .unwrap()
3927             .notify(_notification_type)
3928             .map_err(DeviceManagerError::HotPlugNotification);
3929     }
3930 
3931     pub fn add_device(
3932         &mut self,
3933         device_cfg: &mut DeviceConfig,
3934     ) -> DeviceManagerResult<PciDeviceInfo> {
3935         self.validate_identifier(&device_cfg.id)?;
3936 
3937         if device_cfg.iommu && !self.is_iommu_segment(device_cfg.pci_segment) {
3938             return Err(DeviceManagerError::InvalidIommuHotplug);
3939         }
3940 
3941         let (bdf, device_name) = self.add_passthrough_device(device_cfg)?;
3942 
3943         // Update the PCIU bitmap
3944         self.pci_segments[device_cfg.pci_segment as usize].pci_devices_up |= 1 << bdf.device();
3945 
3946         Ok(PciDeviceInfo {
3947             id: device_name,
3948             bdf,
3949         })
3950     }
3951 
3952     pub fn add_user_device(
3953         &mut self,
3954         device_cfg: &mut UserDeviceConfig,
3955     ) -> DeviceManagerResult<PciDeviceInfo> {
3956         self.validate_identifier(&device_cfg.id)?;
3957 
3958         let (bdf, device_name) = self.add_vfio_user_device(device_cfg)?;
3959 
3960         // Update the PCIU bitmap
3961         self.pci_segments[device_cfg.pci_segment as usize].pci_devices_up |= 1 << bdf.device();
3962 
3963         Ok(PciDeviceInfo {
3964             id: device_name,
3965             bdf,
3966         })
3967     }
3968 
3969     pub fn remove_device(&mut self, id: String) -> DeviceManagerResult<()> {
3970         // The node can be directly a PCI node in case the 'id' refers to a
3971         // VFIO device or a virtio-pci one.
3972         // In case the 'id' refers to a virtio device, we must find the PCI
3973         // node by looking at the parent.
3974         let device_tree = self.device_tree.lock().unwrap();
3975         let node = device_tree
3976             .get(&id)
3977             .ok_or(DeviceManagerError::UnknownDeviceId(id))?;
3978 
3979         let pci_device_node = if node.pci_bdf.is_some() && node.pci_device_handle.is_some() {
3980             node
3981         } else {
3982             let parent = node
3983                 .parent
3984                 .as_ref()
3985                 .ok_or(DeviceManagerError::MissingNode)?;
3986             device_tree
3987                 .get(parent)
3988                 .ok_or(DeviceManagerError::MissingNode)?
3989         };
3990 
3991         let pci_device_bdf: PciBdf = pci_device_node
3992             .pci_bdf
3993             .ok_or(DeviceManagerError::MissingDeviceNodePciBdf)?;
3994         let pci_segment_id = pci_device_bdf.segment();
3995 
3996         let pci_device_handle = pci_device_node
3997             .pci_device_handle
3998             .as_ref()
3999             .ok_or(DeviceManagerError::MissingPciDevice)?;
4000         #[allow(irrefutable_let_patterns)]
4001         if let PciDeviceHandle::Virtio(virtio_pci_device) = pci_device_handle {
4002             let device_type = VirtioDeviceType::from(
4003                 virtio_pci_device
4004                     .lock()
4005                     .unwrap()
4006                     .virtio_device()
4007                     .lock()
4008                     .unwrap()
4009                     .device_type(),
4010             );
4011             match device_type {
4012                 VirtioDeviceType::Net
4013                 | VirtioDeviceType::Block
4014                 | VirtioDeviceType::Pmem
4015                 | VirtioDeviceType::Fs
4016                 | VirtioDeviceType::Vsock => {}
4017                 _ => return Err(DeviceManagerError::RemovalNotAllowed(device_type)),
4018             }
4019         }
4020 
4021         // Update the PCID bitmap
4022         self.pci_segments[pci_segment_id as usize].pci_devices_down |= 1 << pci_device_bdf.device();
4023 
4024         Ok(())
4025     }
4026 
4027     pub fn eject_device(&mut self, pci_segment_id: u16, device_id: u8) -> DeviceManagerResult<()> {
4028         info!(
4029             "Ejecting device_id = {} on segment_id={}",
4030             device_id, pci_segment_id
4031         );
4032 
4033         // Convert the device ID into the corresponding b/d/f.
4034         let pci_device_bdf = PciBdf::new(pci_segment_id, 0, device_id, 0);
4035 
4036         // Give the PCI device ID back to the PCI bus.
4037         self.pci_segments[pci_segment_id as usize]
4038             .pci_bus
4039             .lock()
4040             .unwrap()
4041             .put_device_id(device_id as usize)
4042             .map_err(DeviceManagerError::PutPciDeviceId)?;
4043 
4044         // Remove the device from the device tree along with its children.
4045         let mut device_tree = self.device_tree.lock().unwrap();
4046         let pci_device_node = device_tree
4047             .remove_node_by_pci_bdf(pci_device_bdf)
4048             .ok_or(DeviceManagerError::MissingPciDevice)?;
4049 
4050         // For VFIO and vfio-user the PCI device id is the id.
4051         // For virtio we overwrite it later as we want the id of the
4052         // underlying device.
4053         let mut id = pci_device_node.id;
4054         let pci_device_handle = pci_device_node
4055             .pci_device_handle
4056             .ok_or(DeviceManagerError::MissingPciDevice)?;
4057         if matches!(pci_device_handle, PciDeviceHandle::Virtio(_)) {
4058             // The virtio-pci device has a single child
4059             if !pci_device_node.children.is_empty() {
4060                 assert_eq!(pci_device_node.children.len(), 1);
4061                 let child_id = &pci_device_node.children[0];
4062                 id = child_id.clone();
4063             }
4064         }
4065         for child in pci_device_node.children.iter() {
4066             device_tree.remove(child);
4067         }
4068 
4069         let mut iommu_attached = false;
4070         if let Some((_, iommu_attached_devices)) = &self.iommu_attached_devices {
4071             if iommu_attached_devices.contains(&pci_device_bdf) {
4072                 iommu_attached = true;
4073             }
4074         }
4075 
4076         let (pci_device, bus_device, virtio_device, remove_dma_handler) = match pci_device_handle {
4077             // No need to remove any virtio-mem mapping here as the container outlives all devices
4078             PciDeviceHandle::Vfio(vfio_pci_device) => (
4079                 Arc::clone(&vfio_pci_device) as Arc<Mutex<dyn PciDevice>>,
4080                 Arc::clone(&vfio_pci_device) as Arc<Mutex<dyn BusDevice>>,
4081                 None as Option<Arc<Mutex<dyn virtio_devices::VirtioDevice>>>,
4082                 false,
4083             ),
4084             PciDeviceHandle::Virtio(virtio_pci_device) => {
4085                 let dev = virtio_pci_device.lock().unwrap();
4086                 let bar_addr = dev.config_bar_addr();
4087                 for (event, addr) in dev.ioeventfds(bar_addr) {
4088                     let io_addr = IoEventAddress::Mmio(addr);
4089                     self.address_manager
4090                         .vm
4091                         .unregister_ioevent(event, &io_addr)
4092                         .map_err(|e| DeviceManagerError::UnRegisterIoevent(e.into()))?;
4093                 }
4094 
4095                 if let Some(dma_handler) = dev.dma_handler() {
4096                     if !iommu_attached {
4097                         for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() {
4098                             for region in zone.regions() {
4099                                 let iova = region.start_addr().0;
4100                                 let size = region.len();
4101                                 dma_handler
4102                                     .unmap(iova, size)
4103                                     .map_err(DeviceManagerError::VirtioDmaUnmap)?;
4104                             }
4105                         }
4106                     }
4107                 }
4108 
4109                 (
4110                     Arc::clone(&virtio_pci_device) as Arc<Mutex<dyn PciDevice>>,
4111                     Arc::clone(&virtio_pci_device) as Arc<Mutex<dyn BusDevice>>,
4112                     Some(dev.virtio_device()),
4113                     dev.dma_handler().is_some() && !iommu_attached,
4114                 )
4115             }
4116             PciDeviceHandle::VfioUser(vfio_user_pci_device) => {
4117                 let mut dev = vfio_user_pci_device.lock().unwrap();
4118                 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() {
4119                     for region in zone.regions() {
4120                         dev.dma_unmap(region)
4121                             .map_err(DeviceManagerError::VfioUserDmaUnmap)?;
4122                     }
4123                 }
4124 
4125                 (
4126                     Arc::clone(&vfio_user_pci_device) as Arc<Mutex<dyn PciDevice>>,
4127                     Arc::clone(&vfio_user_pci_device) as Arc<Mutex<dyn BusDevice>>,
4128                     None as Option<Arc<Mutex<dyn virtio_devices::VirtioDevice>>>,
4129                     true,
4130                 )
4131             }
4132         };
4133 
4134         if remove_dma_handler {
4135             for virtio_mem_device in self.virtio_mem_devices.iter() {
4136                 virtio_mem_device
4137                     .lock()
4138                     .unwrap()
4139                     .remove_dma_mapping_handler(VirtioMemMappingSource::Device(
4140                         pci_device_bdf.into(),
4141                     ))
4142                     .map_err(DeviceManagerError::RemoveDmaMappingHandlerVirtioMem)?;
4143             }
4144         }
4145 
4146         // Free the allocated BARs
4147         pci_device
4148             .lock()
4149             .unwrap()
4150             .free_bars(
4151                 &mut self.address_manager.allocator.lock().unwrap(),
4152                 &mut self.pci_segments[pci_segment_id as usize]
4153                     .mem32_allocator
4154                     .lock()
4155                     .unwrap(),
4156                 &mut self.pci_segments[pci_segment_id as usize]
4157                     .mem64_allocator
4158                     .lock()
4159                     .unwrap(),
4160             )
4161             .map_err(DeviceManagerError::FreePciBars)?;
4162 
4163         // Remove the device from the PCI bus
4164         self.pci_segments[pci_segment_id as usize]
4165             .pci_bus
4166             .lock()
4167             .unwrap()
4168             .remove_by_device(&pci_device)
4169             .map_err(DeviceManagerError::RemoveDeviceFromPciBus)?;
4170 
4171         #[cfg(target_arch = "x86_64")]
4172         // Remove the device from the IO bus
4173         self.io_bus()
4174             .remove_by_device(&bus_device)
4175             .map_err(DeviceManagerError::RemoveDeviceFromIoBus)?;
4176 
4177         // Remove the device from the MMIO bus
4178         self.mmio_bus()
4179             .remove_by_device(&bus_device)
4180             .map_err(DeviceManagerError::RemoveDeviceFromMmioBus)?;
4181 
4182         // Remove the device from the list of BusDevice held by the
4183         // DeviceManager.
4184         self.bus_devices
4185             .retain(|dev| !Arc::ptr_eq(dev, &bus_device));
4186 
4187         // Shutdown and remove the underlying virtio-device if present
4188         if let Some(virtio_device) = virtio_device {
4189             for mapping in virtio_device.lock().unwrap().userspace_mappings() {
4190                 self.memory_manager
4191                     .lock()
4192                     .unwrap()
4193                     .remove_userspace_mapping(
4194                         mapping.addr.raw_value(),
4195                         mapping.len,
4196                         mapping.host_addr,
4197                         mapping.mergeable,
4198                         mapping.mem_slot,
4199                     )
4200                     .map_err(DeviceManagerError::MemoryManager)?;
4201             }
4202 
4203             virtio_device.lock().unwrap().shutdown();
4204 
4205             self.virtio_devices
4206                 .retain(|handler| !Arc::ptr_eq(&handler.virtio_device, &virtio_device));
4207         }
4208 
4209         event!(
4210             "vm",
4211             "device-removed",
4212             "id",
4213             &id,
4214             "bdf",
4215             pci_device_bdf.to_string()
4216         );
4217 
4218         // At this point, the device has been removed from all the list and
4219         // buses where it was stored. At the end of this function, after
4220         // any_device, bus_device and pci_device are released, the actual
4221         // device will be dropped.
4222         Ok(())
4223     }
4224 
4225     fn hotplug_virtio_pci_device(
4226         &mut self,
4227         handle: MetaVirtioDevice,
4228     ) -> DeviceManagerResult<PciDeviceInfo> {
4229         // Add the virtio device to the device manager list. This is important
4230         // as the list is used to notify virtio devices about memory updates
4231         // for instance.
4232         self.virtio_devices.push(handle.clone());
4233 
4234         let mapping: Option<Arc<IommuMapping>> = if handle.iommu {
4235             self.iommu_mapping.clone()
4236         } else {
4237             None
4238         };
4239 
4240         let bdf = self.add_virtio_pci_device(
4241             handle.virtio_device,
4242             &mapping,
4243             handle.id.clone(),
4244             handle.pci_segment,
4245             handle.dma_handler,
4246         )?;
4247 
4248         // Update the PCIU bitmap
4249         self.pci_segments[handle.pci_segment as usize].pci_devices_up |= 1 << bdf.device();
4250 
4251         Ok(PciDeviceInfo { id: handle.id, bdf })
4252     }
4253 
4254     fn is_iommu_segment(&self, pci_segment_id: u16) -> bool {
4255         self.config
4256             .lock()
4257             .as_ref()
4258             .unwrap()
4259             .platform
4260             .as_ref()
4261             .map(|pc| {
4262                 pc.iommu_segments
4263                     .as_ref()
4264                     .map(|v| v.contains(&pci_segment_id))
4265                     .unwrap_or_default()
4266             })
4267             .unwrap_or_default()
4268     }
4269 
4270     pub fn add_disk(&mut self, disk_cfg: &mut DiskConfig) -> DeviceManagerResult<PciDeviceInfo> {
4271         self.validate_identifier(&disk_cfg.id)?;
4272 
4273         if disk_cfg.iommu && !self.is_iommu_segment(disk_cfg.pci_segment) {
4274             return Err(DeviceManagerError::InvalidIommuHotplug);
4275         }
4276 
4277         let device = self.make_virtio_block_device(disk_cfg)?;
4278         self.hotplug_virtio_pci_device(device)
4279     }
4280 
4281     pub fn add_fs(&mut self, fs_cfg: &mut FsConfig) -> DeviceManagerResult<PciDeviceInfo> {
4282         self.validate_identifier(&fs_cfg.id)?;
4283 
4284         let device = self.make_virtio_fs_device(fs_cfg)?;
4285         self.hotplug_virtio_pci_device(device)
4286     }
4287 
4288     pub fn add_pmem(&mut self, pmem_cfg: &mut PmemConfig) -> DeviceManagerResult<PciDeviceInfo> {
4289         self.validate_identifier(&pmem_cfg.id)?;
4290 
4291         if pmem_cfg.iommu && !self.is_iommu_segment(pmem_cfg.pci_segment) {
4292             return Err(DeviceManagerError::InvalidIommuHotplug);
4293         }
4294 
4295         let device = self.make_virtio_pmem_device(pmem_cfg)?;
4296         self.hotplug_virtio_pci_device(device)
4297     }
4298 
4299     pub fn add_net(&mut self, net_cfg: &mut NetConfig) -> DeviceManagerResult<PciDeviceInfo> {
4300         self.validate_identifier(&net_cfg.id)?;
4301 
4302         if net_cfg.iommu && !self.is_iommu_segment(net_cfg.pci_segment) {
4303             return Err(DeviceManagerError::InvalidIommuHotplug);
4304         }
4305 
4306         let device = self.make_virtio_net_device(net_cfg)?;
4307         self.hotplug_virtio_pci_device(device)
4308     }
4309 
4310     pub fn add_vdpa(&mut self, vdpa_cfg: &mut VdpaConfig) -> DeviceManagerResult<PciDeviceInfo> {
4311         self.validate_identifier(&vdpa_cfg.id)?;
4312 
4313         if vdpa_cfg.iommu && !self.is_iommu_segment(vdpa_cfg.pci_segment) {
4314             return Err(DeviceManagerError::InvalidIommuHotplug);
4315         }
4316 
4317         let device = self.make_vdpa_device(vdpa_cfg)?;
4318         self.hotplug_virtio_pci_device(device)
4319     }
4320 
4321     pub fn add_vsock(&mut self, vsock_cfg: &mut VsockConfig) -> DeviceManagerResult<PciDeviceInfo> {
4322         self.validate_identifier(&vsock_cfg.id)?;
4323 
4324         if vsock_cfg.iommu && !self.is_iommu_segment(vsock_cfg.pci_segment) {
4325             return Err(DeviceManagerError::InvalidIommuHotplug);
4326         }
4327 
4328         let device = self.make_virtio_vsock_device(vsock_cfg)?;
4329         self.hotplug_virtio_pci_device(device)
4330     }
4331 
4332     pub fn counters(&self) -> HashMap<String, HashMap<&'static str, Wrapping<u64>>> {
4333         let mut counters = HashMap::new();
4334 
4335         for handle in &self.virtio_devices {
4336             let virtio_device = handle.virtio_device.lock().unwrap();
4337             if let Some(device_counters) = virtio_device.counters() {
4338                 counters.insert(handle.id.clone(), device_counters.clone());
4339             }
4340         }
4341 
4342         counters
4343     }
4344 
4345     pub fn resize_balloon(&mut self, size: u64) -> DeviceManagerResult<()> {
4346         if let Some(balloon) = &self.balloon {
4347             return balloon
4348                 .lock()
4349                 .unwrap()
4350                 .resize(size)
4351                 .map_err(DeviceManagerError::VirtioBalloonResize);
4352         }
4353 
4354         warn!("No balloon setup: Can't resize the balloon");
4355         Err(DeviceManagerError::MissingVirtioBalloon)
4356     }
4357 
4358     pub fn balloon_size(&self) -> u64 {
4359         if let Some(balloon) = &self.balloon {
4360             return balloon.lock().unwrap().get_actual();
4361         }
4362 
4363         0
4364     }
4365 
4366     pub fn device_tree(&self) -> Arc<Mutex<DeviceTree>> {
4367         self.device_tree.clone()
4368     }
4369 
4370     #[cfg(target_arch = "x86_64")]
4371     pub fn notify_power_button(&self) -> DeviceManagerResult<()> {
4372         self.ged_notification_device
4373             .as_ref()
4374             .unwrap()
4375             .lock()
4376             .unwrap()
4377             .notify(AcpiNotificationFlags::POWER_BUTTON_CHANGED)
4378             .map_err(DeviceManagerError::PowerButtonNotification)
4379     }
4380 
4381     #[cfg(target_arch = "aarch64")]
4382     pub fn notify_power_button(&self) -> DeviceManagerResult<()> {
4383         // There are two use cases:
4384         // 1. Users will use direct kernel boot with device tree.
4385         // 2. Users will use ACPI+UEFI boot.
4386 
4387         // Trigger a GPIO pin 3 event to satisfy use case 1.
4388         self.gpio_device
4389             .as_ref()
4390             .unwrap()
4391             .lock()
4392             .unwrap()
4393             .trigger_key(3)
4394             .map_err(DeviceManagerError::AArch64PowerButtonNotification)?;
4395         // Trigger a GED power button event to satisfy use case 2.
4396         return self
4397             .ged_notification_device
4398             .as_ref()
4399             .unwrap()
4400             .lock()
4401             .unwrap()
4402             .notify(AcpiNotificationFlags::POWER_BUTTON_CHANGED)
4403             .map_err(DeviceManagerError::PowerButtonNotification);
4404     }
4405 
4406     pub fn iommu_attached_devices(&self) -> &Option<(PciBdf, Vec<PciBdf>)> {
4407         &self.iommu_attached_devices
4408     }
4409 
4410     fn validate_identifier(&self, id: &Option<String>) -> DeviceManagerResult<()> {
4411         if let Some(id) = id {
4412             if id.starts_with("__") {
4413                 return Err(DeviceManagerError::InvalidIdentifier(id.clone()));
4414             }
4415 
4416             if self.device_tree.lock().unwrap().contains_key(id) {
4417                 return Err(DeviceManagerError::IdentifierNotUnique(id.clone()));
4418             }
4419         }
4420 
4421         Ok(())
4422     }
4423 
4424     pub(crate) fn acpi_platform_addresses(&self) -> &AcpiPlatformAddresses {
4425         &self.acpi_platform_addresses
4426     }
4427 }
4428 
4429 fn numa_node_id_from_memory_zone_id(numa_nodes: &NumaNodes, memory_zone_id: &str) -> Option<u32> {
4430     for (numa_node_id, numa_node) in numa_nodes.iter() {
4431         if numa_node.memory_zones.contains(&memory_zone_id.to_owned()) {
4432             return Some(*numa_node_id);
4433         }
4434     }
4435 
4436     None
4437 }
4438 
4439 fn numa_node_id_from_pci_segment_id(numa_nodes: &NumaNodes, pci_segment_id: u16) -> u32 {
4440     for (numa_node_id, numa_node) in numa_nodes.iter() {
4441         if numa_node.pci_segments.contains(&pci_segment_id) {
4442             return *numa_node_id;
4443         }
4444     }
4445 
4446     0
4447 }
4448 
4449 struct TpmDevice {}
4450 
4451 impl Aml for TpmDevice {
4452     fn to_aml_bytes(&self, sink: &mut dyn acpi_tables::AmlSink) {
4453         aml::Device::new(
4454             "TPM2".into(),
4455             vec![
4456                 &aml::Name::new("_HID".into(), &"MSFT0101"),
4457                 &aml::Name::new("_STA".into(), &(0xF_usize)),
4458                 &aml::Name::new(
4459                     "_CRS".into(),
4460                     &aml::ResourceTemplate::new(vec![&aml::Memory32Fixed::new(
4461                         true,
4462                         layout::TPM_START.0 as u32,
4463                         layout::TPM_SIZE as u32,
4464                     )]),
4465                 ),
4466             ],
4467         )
4468         .to_aml_bytes(sink)
4469     }
4470 }
4471 
4472 impl Aml for DeviceManager {
4473     fn to_aml_bytes(&self, sink: &mut dyn acpi_tables::AmlSink) {
4474         #[cfg(target_arch = "aarch64")]
4475         use arch::aarch64::DeviceInfoForFdt;
4476 
4477         let mut pci_scan_methods = Vec::new();
4478         for i in 0..self.pci_segments.len() {
4479             pci_scan_methods.push(aml::MethodCall::new(
4480                 format!("\\_SB_.PC{i:02X}.PCNT").as_str().into(),
4481                 vec![],
4482             ));
4483         }
4484         let mut pci_scan_inner: Vec<&dyn Aml> = Vec::new();
4485         for method in &pci_scan_methods {
4486             pci_scan_inner.push(method)
4487         }
4488 
4489         // PCI hotplug controller
4490         aml::Device::new(
4491             "_SB_.PHPR".into(),
4492             vec![
4493                 &aml::Name::new("_HID".into(), &aml::EISAName::new("PNP0A06")),
4494                 &aml::Name::new("_STA".into(), &0x0bu8),
4495                 &aml::Name::new("_UID".into(), &"PCI Hotplug Controller"),
4496                 &aml::Mutex::new("BLCK".into(), 0),
4497                 &aml::Name::new(
4498                     "_CRS".into(),
4499                     &aml::ResourceTemplate::new(vec![&aml::AddressSpace::new_memory(
4500                         aml::AddressSpaceCacheable::NotCacheable,
4501                         true,
4502                         self.acpi_address.0,
4503                         self.acpi_address.0 + DEVICE_MANAGER_ACPI_SIZE as u64 - 1,
4504                         None,
4505                     )]),
4506                 ),
4507                 // OpRegion and Fields map MMIO range into individual field values
4508                 &aml::OpRegion::new(
4509                     "PCST".into(),
4510                     aml::OpRegionSpace::SystemMemory,
4511                     &(self.acpi_address.0 as usize),
4512                     &DEVICE_MANAGER_ACPI_SIZE,
4513                 ),
4514                 &aml::Field::new(
4515                     "PCST".into(),
4516                     aml::FieldAccessType::DWord,
4517                     aml::FieldLockRule::NoLock,
4518                     aml::FieldUpdateRule::WriteAsZeroes,
4519                     vec![
4520                         aml::FieldEntry::Named(*b"PCIU", 32),
4521                         aml::FieldEntry::Named(*b"PCID", 32),
4522                         aml::FieldEntry::Named(*b"B0EJ", 32),
4523                         aml::FieldEntry::Named(*b"PSEG", 32),
4524                     ],
4525                 ),
4526                 &aml::Method::new(
4527                     "PCEJ".into(),
4528                     2,
4529                     true,
4530                     vec![
4531                         // Take lock defined above
4532                         &aml::Acquire::new("BLCK".into(), 0xffff),
4533                         // Choose the current segment
4534                         &aml::Store::new(&aml::Path::new("PSEG"), &aml::Arg(1)),
4535                         // Write PCI bus number (in first argument) to I/O port via field
4536                         &aml::ShiftLeft::new(&aml::Path::new("B0EJ"), &aml::ONE, &aml::Arg(0)),
4537                         // Release lock
4538                         &aml::Release::new("BLCK".into()),
4539                         // Return 0
4540                         &aml::Return::new(&aml::ZERO),
4541                     ],
4542                 ),
4543                 &aml::Method::new("PSCN".into(), 0, true, pci_scan_inner),
4544             ],
4545         )
4546         .to_aml_bytes(sink);
4547 
4548         for segment in &self.pci_segments {
4549             segment.to_aml_bytes(sink);
4550         }
4551 
4552         let mut mbrd_memory = Vec::new();
4553 
4554         for segment in &self.pci_segments {
4555             mbrd_memory.push(aml::Memory32Fixed::new(
4556                 true,
4557                 segment.mmio_config_address as u32,
4558                 layout::PCI_MMIO_CONFIG_SIZE_PER_SEGMENT as u32,
4559             ))
4560         }
4561 
4562         let mut mbrd_memory_refs = Vec::new();
4563         for mbrd_memory_ref in &mbrd_memory {
4564             mbrd_memory_refs.push(mbrd_memory_ref as &dyn Aml);
4565         }
4566 
4567         aml::Device::new(
4568             "_SB_.MBRD".into(),
4569             vec![
4570                 &aml::Name::new("_HID".into(), &aml::EISAName::new("PNP0C02")),
4571                 &aml::Name::new("_UID".into(), &aml::ZERO),
4572                 &aml::Name::new("_CRS".into(), &aml::ResourceTemplate::new(mbrd_memory_refs)),
4573             ],
4574         )
4575         .to_aml_bytes(sink);
4576 
4577         // Serial device
4578         #[cfg(target_arch = "x86_64")]
4579         let serial_irq = 4;
4580         #[cfg(target_arch = "aarch64")]
4581         let serial_irq =
4582             if self.config.lock().unwrap().serial.clone().mode != ConsoleOutputMode::Off {
4583                 self.get_device_info()
4584                     .clone()
4585                     .get(&(DeviceType::Serial, DeviceType::Serial.to_string()))
4586                     .unwrap()
4587                     .irq()
4588             } else {
4589                 // If serial is turned off, add a fake device with invalid irq.
4590                 31
4591             };
4592         if self.config.lock().unwrap().serial.mode != ConsoleOutputMode::Off {
4593             aml::Device::new(
4594                 "_SB_.COM1".into(),
4595                 vec![
4596                     &aml::Name::new(
4597                         "_HID".into(),
4598                         #[cfg(target_arch = "x86_64")]
4599                         &aml::EISAName::new("PNP0501"),
4600                         #[cfg(target_arch = "aarch64")]
4601                         &"ARMH0011",
4602                     ),
4603                     &aml::Name::new("_UID".into(), &aml::ZERO),
4604                     &aml::Name::new("_DDN".into(), &"COM1"),
4605                     &aml::Name::new(
4606                         "_CRS".into(),
4607                         &aml::ResourceTemplate::new(vec![
4608                             &aml::Interrupt::new(true, true, false, false, serial_irq),
4609                             #[cfg(target_arch = "x86_64")]
4610                             &aml::IO::new(0x3f8, 0x3f8, 0, 0x8),
4611                             #[cfg(target_arch = "aarch64")]
4612                             &aml::Memory32Fixed::new(
4613                                 true,
4614                                 arch::layout::LEGACY_SERIAL_MAPPED_IO_START.raw_value() as u32,
4615                                 MMIO_LEN as u32,
4616                             ),
4617                         ]),
4618                     ),
4619                 ],
4620             )
4621             .to_aml_bytes(sink);
4622         }
4623 
4624         aml::Name::new("_S5_".into(), &aml::Package::new(vec![&5u8])).to_aml_bytes(sink);
4625 
4626         aml::Device::new(
4627             "_SB_.PWRB".into(),
4628             vec![
4629                 &aml::Name::new("_HID".into(), &aml::EISAName::new("PNP0C0C")),
4630                 &aml::Name::new("_UID".into(), &aml::ZERO),
4631             ],
4632         )
4633         .to_aml_bytes(sink);
4634 
4635         if self.config.lock().unwrap().tpm.is_some() {
4636             // Add tpm device
4637             TpmDevice {}.to_aml_bytes(sink);
4638         }
4639 
4640         self.ged_notification_device
4641             .as_ref()
4642             .unwrap()
4643             .lock()
4644             .unwrap()
4645             .to_aml_bytes(sink)
4646     }
4647 }
4648 
4649 impl Pausable for DeviceManager {
4650     fn pause(&mut self) -> result::Result<(), MigratableError> {
4651         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
4652             if let Some(migratable) = &device_node.migratable {
4653                 migratable.lock().unwrap().pause()?;
4654             }
4655         }
4656         // On AArch64, the pause of device manager needs to trigger
4657         // a "pause" of GIC, which will flush the GIC pending tables
4658         // and ITS tables to guest RAM.
4659         #[cfg(target_arch = "aarch64")]
4660         {
4661             self.get_interrupt_controller()
4662                 .unwrap()
4663                 .lock()
4664                 .unwrap()
4665                 .pause()?;
4666         };
4667 
4668         Ok(())
4669     }
4670 
4671     fn resume(&mut self) -> result::Result<(), MigratableError> {
4672         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
4673             if let Some(migratable) = &device_node.migratable {
4674                 migratable.lock().unwrap().resume()?;
4675             }
4676         }
4677 
4678         Ok(())
4679     }
4680 }
4681 
4682 impl Snapshottable for DeviceManager {
4683     fn id(&self) -> String {
4684         DEVICE_MANAGER_SNAPSHOT_ID.to_string()
4685     }
4686 
4687     fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> {
4688         let mut snapshot = Snapshot::from_data(SnapshotData::new_from_state(&self.state())?);
4689 
4690         // We aggregate all devices snapshots.
4691         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
4692             if let Some(migratable) = &device_node.migratable {
4693                 let mut migratable = migratable.lock().unwrap();
4694                 snapshot.add_snapshot(migratable.id(), migratable.snapshot()?);
4695             }
4696         }
4697 
4698         Ok(snapshot)
4699     }
4700 }
4701 
4702 impl Transportable for DeviceManager {}
4703 
4704 impl Migratable for DeviceManager {
4705     fn start_dirty_log(&mut self) -> std::result::Result<(), MigratableError> {
4706         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
4707             if let Some(migratable) = &device_node.migratable {
4708                 migratable.lock().unwrap().start_dirty_log()?;
4709             }
4710         }
4711         Ok(())
4712     }
4713 
4714     fn stop_dirty_log(&mut self) -> std::result::Result<(), MigratableError> {
4715         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
4716             if let Some(migratable) = &device_node.migratable {
4717                 migratable.lock().unwrap().stop_dirty_log()?;
4718             }
4719         }
4720         Ok(())
4721     }
4722 
4723     fn dirty_log(&mut self) -> std::result::Result<MemoryRangeTable, MigratableError> {
4724         let mut tables = Vec::new();
4725         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
4726             if let Some(migratable) = &device_node.migratable {
4727                 tables.push(migratable.lock().unwrap().dirty_log()?);
4728             }
4729         }
4730         Ok(MemoryRangeTable::new_from_tables(tables))
4731     }
4732 
4733     fn start_migration(&mut self) -> std::result::Result<(), MigratableError> {
4734         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
4735             if let Some(migratable) = &device_node.migratable {
4736                 migratable.lock().unwrap().start_migration()?;
4737             }
4738         }
4739         Ok(())
4740     }
4741 
4742     fn complete_migration(&mut self) -> std::result::Result<(), MigratableError> {
4743         for (_, device_node) in self.device_tree.lock().unwrap().iter() {
4744             if let Some(migratable) = &device_node.migratable {
4745                 migratable.lock().unwrap().complete_migration()?;
4746             }
4747         }
4748         Ok(())
4749     }
4750 }
4751 
4752 const PCIU_FIELD_OFFSET: u64 = 0;
4753 const PCID_FIELD_OFFSET: u64 = 4;
4754 const B0EJ_FIELD_OFFSET: u64 = 8;
4755 const PSEG_FIELD_OFFSET: u64 = 12;
4756 const PCIU_FIELD_SIZE: usize = 4;
4757 const PCID_FIELD_SIZE: usize = 4;
4758 const B0EJ_FIELD_SIZE: usize = 4;
4759 const PSEG_FIELD_SIZE: usize = 4;
4760 
4761 impl BusDevice for DeviceManager {
4762     fn read(&mut self, base: u64, offset: u64, data: &mut [u8]) {
4763         match offset {
4764             PCIU_FIELD_OFFSET => {
4765                 assert!(data.len() == PCIU_FIELD_SIZE);
4766                 data.copy_from_slice(
4767                     &self.pci_segments[self.selected_segment]
4768                         .pci_devices_up
4769                         .to_le_bytes(),
4770                 );
4771                 // Clear the PCIU bitmap
4772                 self.pci_segments[self.selected_segment].pci_devices_up = 0;
4773             }
4774             PCID_FIELD_OFFSET => {
4775                 assert!(data.len() == PCID_FIELD_SIZE);
4776                 data.copy_from_slice(
4777                     &self.pci_segments[self.selected_segment]
4778                         .pci_devices_down
4779                         .to_le_bytes(),
4780                 );
4781                 // Clear the PCID bitmap
4782                 self.pci_segments[self.selected_segment].pci_devices_down = 0;
4783             }
4784             B0EJ_FIELD_OFFSET => {
4785                 assert!(data.len() == B0EJ_FIELD_SIZE);
4786                 // Always return an empty bitmap since the eject is always
4787                 // taken care of right away during a write access.
4788                 data.fill(0);
4789             }
4790             PSEG_FIELD_OFFSET => {
4791                 assert_eq!(data.len(), PSEG_FIELD_SIZE);
4792                 data.copy_from_slice(&(self.selected_segment as u32).to_le_bytes());
4793             }
4794             _ => error!(
4795                 "Accessing unknown location at base 0x{:x}, offset 0x{:x}",
4796                 base, offset
4797             ),
4798         }
4799 
4800         debug!(
4801             "PCI_HP_REG_R: base 0x{:x}, offset 0x{:x}, data {:?}",
4802             base, offset, data
4803         )
4804     }
4805 
4806     fn write(&mut self, base: u64, offset: u64, data: &[u8]) -> Option<Arc<std::sync::Barrier>> {
4807         match offset {
4808             B0EJ_FIELD_OFFSET => {
4809                 assert!(data.len() == B0EJ_FIELD_SIZE);
4810                 let mut data_array: [u8; 4] = [0, 0, 0, 0];
4811                 data_array.copy_from_slice(data);
4812                 let mut slot_bitmap = u32::from_le_bytes(data_array);
4813 
4814                 while slot_bitmap > 0 {
4815                     let slot_id = slot_bitmap.trailing_zeros();
4816                     if let Err(e) = self.eject_device(self.selected_segment as u16, slot_id as u8) {
4817                         error!("Failed ejecting device {}: {:?}", slot_id, e);
4818                     }
4819                     slot_bitmap &= !(1 << slot_id);
4820                 }
4821             }
4822             PSEG_FIELD_OFFSET => {
4823                 assert_eq!(data.len(), PSEG_FIELD_SIZE);
4824                 let mut data_array: [u8; 4] = [0, 0, 0, 0];
4825                 data_array.copy_from_slice(data);
4826                 let selected_segment = u32::from_le_bytes(data_array) as usize;
4827                 if selected_segment >= self.pci_segments.len() {
4828                     error!(
4829                         "Segment selection out of range: {} >= {}",
4830                         selected_segment,
4831                         self.pci_segments.len()
4832                     );
4833                     return None;
4834                 }
4835                 self.selected_segment = selected_segment;
4836             }
4837             _ => error!(
4838                 "Accessing unknown location at base 0x{:x}, offset 0x{:x}",
4839                 base, offset
4840             ),
4841         }
4842 
4843         debug!(
4844             "PCI_HP_REG_W: base 0x{:x}, offset 0x{:x}, data {:?}",
4845             base, offset, data
4846         );
4847 
4848         None
4849     }
4850 }
4851 
4852 impl Drop for DeviceManager {
4853     fn drop(&mut self) {
4854         for handle in self.virtio_devices.drain(..) {
4855             handle.virtio_device.lock().unwrap().shutdown();
4856         }
4857 
4858         if let Some(termios) = *self.original_termios_opt.lock().unwrap() {
4859             // SAFETY: FFI call
4860             let _ = unsafe { tcsetattr(stdout().lock().as_raw_fd(), TCSANOW, &termios) };
4861         }
4862     }
4863 }
4864