1 // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
2 //
3 // Portions Copyright 2017 The Chromium OS Authors. All rights reserved.
4 // Use of this source code is governed by a BSD-style license that can be
5 // found in the LICENSE-BSD-3-Clause file.
6 //
7 // Copyright © 2019 Intel Corporation
8 //
9 // SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause
10 //
11
12 use std::collections::{BTreeMap, BTreeSet, HashMap};
13 use std::fs::{File, OpenOptions};
14 use std::io::{self, stdout, IsTerminal, Seek, SeekFrom};
15 use std::num::Wrapping;
16 use std::os::unix::fs::OpenOptionsExt;
17 use std::os::unix::io::{AsRawFd, FromRawFd};
18 use std::path::PathBuf;
19 use std::result;
20 use std::sync::{Arc, Mutex};
21 #[cfg(not(target_arch = "riscv64"))]
22 use std::time::Instant;
23
24 use acpi_tables::sdt::GenericAddress;
25 #[cfg(not(target_arch = "riscv64"))]
26 use acpi_tables::{aml, Aml};
27 #[cfg(not(target_arch = "riscv64"))]
28 use anyhow::anyhow;
29 #[cfg(target_arch = "x86_64")]
30 use arch::layout::{APIC_START, IOAPIC_SIZE, IOAPIC_START};
31 use arch::{layout, NumaNodes};
32 #[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))]
33 use arch::{DeviceType, MmioDeviceInfo};
34 use block::async_io::DiskFile;
35 use block::fixed_vhd_sync::FixedVhdDiskSync;
36 use block::qcow_sync::QcowDiskSync;
37 use block::raw_async_aio::RawFileDiskAio;
38 use block::raw_sync::RawFileDiskSync;
39 use block::vhdx_sync::VhdxDiskSync;
40 use block::{
41 block_aio_is_supported, block_io_uring_is_supported, detect_image_type, qcow, vhdx, ImageType,
42 };
43 #[cfg(feature = "io_uring")]
44 use block::{fixed_vhd_async::FixedVhdDiskAsync, raw_async::RawFileDisk};
45 #[cfg(target_arch = "riscv64")]
46 use devices::aia;
47 #[cfg(target_arch = "x86_64")]
48 use devices::debug_console;
49 #[cfg(target_arch = "x86_64")]
50 use devices::debug_console::DebugConsole;
51 #[cfg(target_arch = "aarch64")]
52 use devices::gic;
53 use devices::interrupt_controller::InterruptController;
54 #[cfg(target_arch = "x86_64")]
55 use devices::ioapic;
56 #[cfg(target_arch = "aarch64")]
57 use devices::legacy::Pl011;
58 #[cfg(any(target_arch = "x86_64", target_arch = "riscv64"))]
59 use devices::legacy::Serial;
60 #[cfg(feature = "pvmemcontrol")]
61 use devices::pvmemcontrol::{PvmemcontrolBusDevice, PvmemcontrolPciDevice};
62 use devices::{interrupt_controller, AcpiNotificationFlags};
63 #[cfg(target_arch = "aarch64")]
64 use hypervisor::arch::aarch64::regs::AARCH64_PMU_IRQ;
65 use hypervisor::IoEventAddress;
66 use libc::{
67 tcsetattr, termios, MAP_NORESERVE, MAP_PRIVATE, MAP_SHARED, O_TMPFILE, PROT_READ, PROT_WRITE,
68 TCSANOW,
69 };
70 use pci::{
71 DeviceRelocation, MmioRegion, PciBarRegionType, PciBdf, PciDevice, VfioDmaMapping,
72 VfioPciDevice, VfioUserDmaMapping, VfioUserPciDevice, VfioUserPciDeviceError,
73 };
74 use rate_limiter::group::RateLimiterGroup;
75 use seccompiler::SeccompAction;
76 use serde::{Deserialize, Serialize};
77 use thiserror::Error;
78 use tracer::trace_scoped;
79 use vfio_ioctls::{VfioContainer, VfioDevice, VfioDeviceFd};
80 use virtio_devices::transport::{VirtioPciDevice, VirtioPciDeviceActivator, VirtioTransport};
81 use virtio_devices::vhost_user::VhostUserConfig;
82 use virtio_devices::{
83 AccessPlatformMapping, ActivateError, Block, Endpoint, IommuMapping, VdpaDmaMapping,
84 VirtioMemMappingSource,
85 };
86 use vm_allocator::{AddressAllocator, SystemAllocator};
87 use vm_device::dma_mapping::ExternalDmaMapping;
88 use vm_device::interrupt::{
89 InterruptIndex, InterruptManager, LegacyIrqGroupConfig, MsiIrqGroupConfig,
90 };
91 use vm_device::{Bus, BusDevice, BusDeviceSync, Resource};
92 use vm_memory::guest_memory::FileOffset;
93 use vm_memory::{Address, GuestAddress, GuestMemoryRegion, GuestUsize, MmapRegion};
94 #[cfg(target_arch = "x86_64")]
95 use vm_memory::{GuestAddressSpace, GuestMemory};
96 use vm_migration::protocol::MemoryRangeTable;
97 use vm_migration::{
98 snapshot_from_id, state_from_id, Migratable, MigratableError, Pausable, Snapshot, SnapshotData,
99 Snapshottable, Transportable,
100 };
101 use vm_virtio::{AccessPlatform, VirtioDeviceType};
102 use vmm_sys_util::eventfd::EventFd;
103
104 use crate::console_devices::{ConsoleDeviceError, ConsoleInfo, ConsoleOutput};
105 use crate::cpu::{CpuManager, CPU_MANAGER_ACPI_SIZE};
106 use crate::device_tree::{DeviceNode, DeviceTree};
107 use crate::interrupt::{LegacyUserspaceInterruptManager, MsiInterruptManager};
108 use crate::memory_manager::{Error as MemoryManagerError, MemoryManager, MEMORY_MANAGER_ACPI_SIZE};
109 use crate::pci_segment::PciSegment;
110 use crate::serial_manager::{Error as SerialManagerError, SerialManager};
111 use crate::vm_config::{
112 ConsoleOutputMode, DeviceConfig, DiskConfig, FsConfig, NetConfig, PmemConfig, UserDeviceConfig,
113 VdpaConfig, VhostMode, VmConfig, VsockConfig, DEFAULT_IOMMU_ADDRESS_WIDTH_BITS,
114 DEFAULT_PCI_SEGMENT_APERTURE_WEIGHT,
115 };
116 use crate::{device_node, GuestRegionMmap, PciDeviceInfo, DEVICE_MANAGER_SNAPSHOT_ID};
117
118 #[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))]
119 const MMIO_LEN: u64 = 0x1000;
120
121 // Singleton devices / devices the user cannot name
122 #[cfg(target_arch = "x86_64")]
123 const IOAPIC_DEVICE_NAME: &str = "__ioapic";
124 const SERIAL_DEVICE_NAME: &str = "__serial";
125 #[cfg(target_arch = "x86_64")]
126 const DEBUGCON_DEVICE_NAME: &str = "__debug_console";
127 #[cfg(target_arch = "aarch64")]
128 const GPIO_DEVICE_NAME: &str = "__gpio";
129 const RNG_DEVICE_NAME: &str = "__rng";
130 const IOMMU_DEVICE_NAME: &str = "__iommu";
131 #[cfg(feature = "pvmemcontrol")]
132 const PVMEMCONTROL_DEVICE_NAME: &str = "__pvmemcontrol";
133 const BALLOON_DEVICE_NAME: &str = "__balloon";
134 const CONSOLE_DEVICE_NAME: &str = "__console";
135 const PVPANIC_DEVICE_NAME: &str = "__pvpanic";
136
137 // Devices that the user may name and for which we generate
138 // identifiers if the user doesn't give one
139 const DISK_DEVICE_NAME_PREFIX: &str = "_disk";
140 const FS_DEVICE_NAME_PREFIX: &str = "_fs";
141 const NET_DEVICE_NAME_PREFIX: &str = "_net";
142 const PMEM_DEVICE_NAME_PREFIX: &str = "_pmem";
143 const VDPA_DEVICE_NAME_PREFIX: &str = "_vdpa";
144 const VSOCK_DEVICE_NAME_PREFIX: &str = "_vsock";
145 const WATCHDOG_DEVICE_NAME: &str = "__watchdog";
146 const VFIO_DEVICE_NAME_PREFIX: &str = "_vfio";
147 const VFIO_USER_DEVICE_NAME_PREFIX: &str = "_vfio_user";
148 const VIRTIO_PCI_DEVICE_NAME_PREFIX: &str = "_virtio-pci";
149
150 /// Errors associated with device manager
151 #[derive(Error, Debug)]
152 pub enum DeviceManagerError {
153 /// Cannot create EventFd.
154 #[error("Cannot create EventFd")]
155 EventFd(#[source] io::Error),
156
157 /// Cannot open disk path
158 #[error("Cannot open disk path")]
159 Disk(#[source] io::Error),
160
161 /// Cannot create vhost-user-net device
162 #[error("Cannot create vhost-user-net device")]
163 CreateVhostUserNet(#[source] virtio_devices::vhost_user::Error),
164
165 /// Cannot create virtio-blk device
166 #[error("Cannot create virtio-blk device")]
167 CreateVirtioBlock(#[source] io::Error),
168
169 /// Cannot create virtio-net device
170 #[error("Cannot create virtio-net device")]
171 CreateVirtioNet(#[source] virtio_devices::net::Error),
172
173 /// Cannot create virtio-console device
174 #[error("Cannot create virtio-console device")]
175 CreateVirtioConsole(#[source] io::Error),
176
177 /// Cannot create virtio-rng device
178 #[error("Cannot create virtio-rng device")]
179 CreateVirtioRng(#[source] io::Error),
180
181 /// Cannot create virtio-fs device
182 #[error("Cannot create virtio-fs device")]
183 CreateVirtioFs(#[source] virtio_devices::vhost_user::Error),
184
185 /// Virtio-fs device was created without a socket.
186 #[error("Virtio-fs device was created without a socket")]
187 NoVirtioFsSock,
188
189 /// Cannot create vhost-user-blk device
190 #[error("Cannot create vhost-user-blk device")]
191 CreateVhostUserBlk(#[source] virtio_devices::vhost_user::Error),
192
193 /// Cannot create virtio-pmem device
194 #[error("Cannot create virtio-pmem device")]
195 CreateVirtioPmem(#[source] io::Error),
196
197 /// Cannot create vDPA device
198 #[error("Cannot create vdpa device")]
199 CreateVdpa(#[source] virtio_devices::vdpa::Error),
200
201 /// Cannot create virtio-vsock device
202 #[error("Cannot create virtio-vsock device")]
203 CreateVirtioVsock(#[source] io::Error),
204
205 /// Cannot create tpm device
206 #[error("Cannot create tmp device")]
207 CreateTpmDevice(#[source] anyhow::Error),
208
209 /// Failed to convert Path to &str for the vDPA device.
210 #[error("Failed to convert Path to &str for the vDPA device")]
211 CreateVdpaConvertPath,
212
213 /// Failed to convert Path to &str for the virtio-vsock device.
214 #[error("Failed to convert Path to &str for the virtio-vsock device")]
215 CreateVsockConvertPath,
216
217 /// Cannot create virtio-vsock backend
218 #[error("Cannot create virtio-vsock backend")]
219 CreateVsockBackend(#[source] virtio_devices::vsock::VsockUnixError),
220
221 /// Cannot create virtio-iommu device
222 #[error("Cannot create virtio-iommu device")]
223 CreateVirtioIommu(#[source] io::Error),
224
225 /// Cannot create virtio-balloon device
226 #[error("Cannot create virtio-balloon device")]
227 CreateVirtioBalloon(#[source] io::Error),
228
229 /// Cannot create pvmemcontrol device
230 #[cfg(feature = "pvmemcontrol")]
231 #[error("Cannot create pvmemcontrol device")]
232 CreatePvmemcontrol(#[source] io::Error),
233
234 /// Cannot create virtio-watchdog device
235 #[error("Cannot create virtio-watchdog device")]
236 CreateVirtioWatchdog(#[source] io::Error),
237
238 /// Failed to parse disk image format
239 #[error("Failed to parse disk image format")]
240 DetectImageType(#[source] io::Error),
241
242 /// Cannot open qcow disk path
243 #[error("Cannot open qcow disk path")]
244 QcowDeviceCreate(#[source] qcow::Error),
245
246 /// Cannot create serial manager
247 #[error("Cannot create serial manager")]
248 CreateSerialManager(#[source] SerialManagerError),
249
250 /// Cannot spawn the serial manager thread
251 #[error("Cannot spawn serial manager thread")]
252 SpawnSerialManager(#[source] SerialManagerError),
253
254 /// Cannot open tap interface
255 #[error("Cannot open tap interface")]
256 OpenTap(#[source] net_util::TapError),
257
258 /// Cannot allocate IRQ.
259 #[error("Cannot allocate IRQ")]
260 AllocateIrq,
261
262 /// Cannot configure the IRQ.
263 #[error("Cannot configure the IRQ")]
264 Irq(#[source] vmm_sys_util::errno::Error),
265
266 /// Cannot allocate PCI BARs
267 #[error("Cannot allocate PCI BARs")]
268 AllocateBars(#[source] pci::PciDeviceError),
269
270 /// Could not free the BARs associated with a PCI device.
271 #[error("Could not free the BARs associated with a PCI device")]
272 FreePciBars(#[source] pci::PciDeviceError),
273
274 /// Cannot register ioevent.
275 #[error("Cannot register ioevent")]
276 RegisterIoevent(#[source] anyhow::Error),
277
278 /// Cannot unregister ioevent.
279 #[error("Cannot unregister ioevent")]
280 UnRegisterIoevent(#[source] anyhow::Error),
281
282 /// Cannot create virtio device
283 #[error("Cannot create virtio device")]
284 VirtioDevice(#[source] virtio_devices::transport::VirtioPciDeviceError),
285
286 /// Cannot add PCI device
287 #[error("Cannot add PCI device")]
288 AddPciDevice(#[source] pci::PciRootError),
289
290 /// Cannot open persistent memory file
291 #[error("Cannot open persistent memory file")]
292 PmemFileOpen(#[source] io::Error),
293
294 /// Cannot set persistent memory file size
295 #[error("Cannot set persistent memory file size")]
296 PmemFileSetLen(#[source] io::Error),
297
298 /// Cannot find a memory range for persistent memory
299 #[error("Cannot find a memory range for persistent memory")]
300 PmemRangeAllocation,
301
302 /// Cannot find a memory range for virtio-fs
303 #[error("Cannot find a memory range for virtio-fs")]
304 FsRangeAllocation,
305
306 /// Error creating serial output file
307 #[error("Error creating serial output file")]
308 SerialOutputFileOpen(#[source] io::Error),
309
310 /// Error creating debug-console output file
311 #[cfg(target_arch = "x86_64")]
312 #[error("Error creating debug-console output file")]
313 DebugconOutputFileOpen(#[source] io::Error),
314
315 /// Error creating console output file
316 #[error("Error creating console output file")]
317 ConsoleOutputFileOpen(#[source] io::Error),
318
319 /// Error creating serial pty
320 #[error("Error creating serial pty")]
321 SerialPtyOpen(#[source] io::Error),
322
323 /// Error creating console pty
324 #[error("Error creating console pty")]
325 ConsolePtyOpen(#[source] io::Error),
326
327 /// Error creating debugcon pty
328 #[error("Error creating console pty")]
329 DebugconPtyOpen(#[source] io::Error),
330
331 /// Error setting pty raw mode
332 #[error("Error setting pty raw mode")]
333 SetPtyRaw(#[source] ConsoleDeviceError),
334
335 /// Error getting pty peer
336 #[error("Error getting pty peer")]
337 GetPtyPeer(#[source] vmm_sys_util::errno::Error),
338
339 /// Cannot create a VFIO device
340 #[error("Cannot create a VFIO device")]
341 VfioCreate(#[source] vfio_ioctls::VfioError),
342
343 /// Cannot create a VFIO PCI device
344 #[error("Cannot create a VFIO PCI device")]
345 VfioPciCreate(#[source] pci::VfioPciError),
346
347 /// Failed to map VFIO MMIO region.
348 #[error("Failed to map VFIO MMIO region")]
349 VfioMapRegion(#[source] pci::VfioPciError),
350
351 /// Failed to DMA map VFIO device.
352 #[error("Failed to DMA map VFIO device")]
353 VfioDmaMap(#[source] vfio_ioctls::VfioError),
354
355 /// Failed to DMA unmap VFIO device.
356 #[error("Failed to DMA unmap VFIO device")]
357 VfioDmaUnmap(#[source] pci::VfioPciError),
358
359 /// Failed to create the passthrough device.
360 #[error("Failed to create the passthrough device")]
361 CreatePassthroughDevice(#[source] anyhow::Error),
362
363 /// Failed to memory map.
364 #[error("Failed to memory map")]
365 Mmap(#[source] io::Error),
366
367 /// Cannot add legacy device to Bus.
368 #[error("Cannot add legacy device to Bus")]
369 BusError(#[source] vm_device::BusError),
370
371 /// Failed to allocate IO port
372 #[error("Failed to allocate IO port")]
373 AllocateIoPort,
374
375 /// Failed to allocate MMIO address
376 #[error("Failed to allocate MMIO address")]
377 AllocateMmioAddress,
378
379 /// Failed to make hotplug notification
380 #[error("Failed to make hotplug notification")]
381 HotPlugNotification(#[source] io::Error),
382
383 /// Error from a memory manager operation
384 #[error("Error from a memory manager operation")]
385 MemoryManager(#[source] MemoryManagerError),
386
387 /// Failed to create new interrupt source group.
388 #[error("Failed to create new interrupt source group")]
389 CreateInterruptGroup(#[source] io::Error),
390
391 /// Failed to update interrupt source group.
392 #[error("Failed to update interrupt source group")]
393 UpdateInterruptGroup(#[source] io::Error),
394
395 /// Failed to create interrupt controller.
396 #[error("Failed to create interrupt controller")]
397 CreateInterruptController(#[source] interrupt_controller::Error),
398
399 /// Failed to create a new MmapRegion instance.
400 #[error("Failed to create a new MmapRegion instance")]
401 NewMmapRegion(#[source] vm_memory::mmap::MmapRegionError),
402
403 /// Failed to clone a File.
404 #[error("Failed to clone a File")]
405 CloneFile(#[source] io::Error),
406
407 /// Failed to create socket file
408 #[error("Failed to create socket file")]
409 CreateSocketFile(#[source] io::Error),
410
411 /// Failed to spawn the network backend
412 #[error("Failed to spawn the network backend")]
413 SpawnNetBackend(#[source] io::Error),
414
415 /// Failed to spawn the block backend
416 #[error("Failed to spawn the block backend")]
417 SpawnBlockBackend(#[source] io::Error),
418
419 /// Missing PCI bus.
420 #[error("Missing PCI bus")]
421 NoPciBus,
422
423 /// Could not find an available device name.
424 #[error("Could not find an available device name")]
425 NoAvailableDeviceName,
426
427 /// Missing PCI device.
428 #[error("Missing PCI device")]
429 MissingPciDevice,
430
431 /// Failed to remove a PCI device from the PCI bus.
432 #[error("Failed to remove a PCI device from the PCI bus")]
433 RemoveDeviceFromPciBus(#[source] pci::PciRootError),
434
435 /// Failed to remove a bus device from the IO bus.
436 #[error("Failed to remove a bus device from the IO bus")]
437 RemoveDeviceFromIoBus(#[source] vm_device::BusError),
438
439 /// Failed to remove a bus device from the MMIO bus.
440 #[error("Failed to remove a bus device from the MMIO bus")]
441 RemoveDeviceFromMmioBus(#[source] vm_device::BusError),
442
443 /// Failed to find the device corresponding to a specific PCI b/d/f.
444 #[error("Failed to find the device corresponding to a specific PCI b/d/f")]
445 UnknownPciBdf(u32),
446
447 /// Not allowed to remove this type of device from the VM.
448 #[error("Not allowed to remove this type of device from the VM: {0}")]
449 RemovalNotAllowed(vm_virtio::VirtioDeviceType),
450
451 /// Failed to find device corresponding to the given identifier.
452 #[error("Failed to find device corresponding to the given identifier")]
453 UnknownDeviceId(String),
454
455 /// Failed to find an available PCI device ID.
456 #[error("Failed to find an available PCI device ID")]
457 NextPciDeviceId(#[source] pci::PciRootError),
458
459 /// Could not reserve the PCI device ID.
460 #[error("Could not reserve the PCI device ID")]
461 GetPciDeviceId(#[source] pci::PciRootError),
462
463 /// Could not give the PCI device ID back.
464 #[error("Could not give the PCI device ID back")]
465 PutPciDeviceId(#[source] pci::PciRootError),
466
467 /// No disk path was specified when one was expected
468 #[error("No disk path was specified when one was expected")]
469 NoDiskPath,
470
471 /// Failed to update guest memory for virtio device.
472 #[error("Failed to update guest memory for virtio device")]
473 UpdateMemoryForVirtioDevice(#[source] virtio_devices::Error),
474
475 /// Cannot create virtio-mem device
476 #[error("Cannot create virtio-mem device")]
477 CreateVirtioMem(#[source] io::Error),
478
479 /// Cannot find a memory range for virtio-mem memory
480 #[error("Cannot find a memory range for virtio-mem memory")]
481 VirtioMemRangeAllocation,
482
483 /// Failed to update guest memory for VFIO PCI device.
484 #[error("Failed to update guest memory for VFIO PCI device")]
485 UpdateMemoryForVfioPciDevice(#[source] vfio_ioctls::VfioError),
486
487 /// Trying to use a directory for pmem but no size specified
488 #[error("Trying to use a directory for pmem but no size specified")]
489 PmemWithDirectorySizeMissing,
490
491 /// Trying to use a size that is not multiple of 2MiB
492 #[error("Trying to use a size that is not multiple of 2MiB")]
493 PmemSizeNotAligned,
494
495 /// Could not find the node in the device tree.
496 #[error("Could not find the node in the device tree")]
497 MissingNode,
498
499 /// Resource was already found.
500 #[error("Resource was already found")]
501 ResourceAlreadyExists,
502
503 /// Expected resources for virtio-pmem could not be found.
504 #[error("Expected resources for virtio-pmem could not be found")]
505 MissingVirtioPmemResources,
506
507 /// Missing PCI b/d/f from the DeviceNode.
508 #[error("Missing PCI b/d/f from the DeviceNode")]
509 MissingDeviceNodePciBdf,
510
511 /// No support for device passthrough
512 #[error("No support for device passthrough")]
513 NoDevicePassthroughSupport,
514
515 /// No socket option support for console device
516 #[error("No socket option support for console device")]
517 NoSocketOptionSupportForConsoleDevice,
518
519 /// Failed to resize virtio-balloon
520 #[error("Failed to resize virtio-balloon")]
521 VirtioBalloonResize(#[source] virtio_devices::balloon::Error),
522
523 /// Missing virtio-balloon, can't proceed as expected.
524 #[error("Missing virtio-balloon, can't proceed as expected")]
525 MissingVirtioBalloon,
526
527 /// Missing virtual IOMMU device
528 #[error("Missing virtual IOMMU device")]
529 MissingVirtualIommu,
530
531 /// Failed to do power button notification
532 #[error("Failed to do power button notification")]
533 PowerButtonNotification(#[source] io::Error),
534
535 /// Failed to do AArch64 GPIO power button notification
536 #[cfg(target_arch = "aarch64")]
537 #[error("Failed to do AArch64 GPIO power button notification")]
538 AArch64PowerButtonNotification(#[source] devices::legacy::GpioDeviceError),
539
540 /// Failed to set O_DIRECT flag to file descriptor
541 #[error("Failed to set O_DIRECT flag to file descriptor")]
542 SetDirectIo,
543
544 /// Failed to create FixedVhdDiskAsync
545 #[error("Failed to create FixedVhdDiskAsync")]
546 CreateFixedVhdDiskAsync(#[source] io::Error),
547
548 /// Failed to create FixedVhdDiskSync
549 #[error("Failed to create FixedVhdDiskSync")]
550 CreateFixedVhdDiskSync(#[source] io::Error),
551
552 /// Failed to create QcowDiskSync
553 #[error("Failed to create QcowDiskSync")]
554 CreateQcowDiskSync(#[source] qcow::Error),
555
556 /// Failed to create FixedVhdxDiskSync
557 #[error("Failed to create FixedVhdxDiskSync")]
558 CreateFixedVhdxDiskSync(#[source] vhdx::VhdxError),
559
560 /// Failed to add DMA mapping handler to virtio-mem device.
561 #[error("Failed to add DMA mapping handler to virtio-mem device")]
562 AddDmaMappingHandlerVirtioMem(#[source] virtio_devices::mem::Error),
563
564 /// Failed to remove DMA mapping handler from virtio-mem device.
565 #[error("Failed to remove DMA mapping handler from virtio-mem device")]
566 RemoveDmaMappingHandlerVirtioMem(#[source] virtio_devices::mem::Error),
567
568 /// Failed to create vfio-user client
569 #[error("Failed to create vfio-user client")]
570 VfioUserCreateClient(#[source] vfio_user::Error),
571
572 /// Failed to create VFIO user device
573 #[error("Failed to create VFIO user device")]
574 VfioUserCreate(#[source] VfioUserPciDeviceError),
575
576 /// Failed to map region from VFIO user device into guest
577 #[error("Failed to map region from VFIO user device into guest")]
578 VfioUserMapRegion(#[source] VfioUserPciDeviceError),
579
580 /// Failed to DMA map VFIO user device.
581 #[error("Failed to DMA map VFIO user device")]
582 VfioUserDmaMap(#[source] VfioUserPciDeviceError),
583
584 /// Failed to DMA unmap VFIO user device.
585 #[error("Failed to DMA unmap VFIO user device")]
586 VfioUserDmaUnmap(#[source] VfioUserPciDeviceError),
587
588 /// Failed to update memory mappings for VFIO user device
589 #[error("Failed to update memory mappings for VFIO user device")]
590 UpdateMemoryForVfioUserPciDevice(#[source] VfioUserPciDeviceError),
591
592 /// Cannot duplicate file descriptor
593 #[error("Cannot duplicate file descriptor")]
594 DupFd(#[source] vmm_sys_util::errno::Error),
595
596 /// Failed to DMA map virtio device.
597 #[error("Failed to DMA map virtio device")]
598 VirtioDmaMap(#[source] std::io::Error),
599
600 /// Failed to DMA unmap virtio device.
601 #[error("Failed to DMA unmap virtio device")]
602 VirtioDmaUnmap(#[source] std::io::Error),
603
604 /// Cannot hotplug device behind vIOMMU
605 #[error("Cannot hotplug device behind vIOMMU")]
606 InvalidIommuHotplug,
607
608 /// Invalid identifier as it is not unique.
609 #[error("Invalid identifier as it is not unique: {0}")]
610 IdentifierNotUnique(String),
611
612 /// Invalid identifier
613 #[error("Invalid identifier: {0}")]
614 InvalidIdentifier(String),
615
616 /// Error activating virtio device
617 #[error("Error activating virtio device")]
618 VirtioActivate(#[source] ActivateError),
619
620 /// Failed retrieving device state from snapshot
621 #[error("Failed retrieving device state from snapshot")]
622 RestoreGetState(#[source] MigratableError),
623
624 /// Cannot create a PvPanic device
625 #[error("Cannot create a PvPanic device")]
626 PvPanicCreate(#[source] devices::pvpanic::PvPanicError),
627
628 /// Cannot create a RateLimiterGroup
629 #[error("Cannot create a RateLimiterGroup")]
630 RateLimiterGroupCreate(#[source] rate_limiter::group::Error),
631
632 /// Cannot start sigwinch listener
633 #[error("Cannot start sigwinch listener")]
634 StartSigwinchListener(#[source] std::io::Error),
635
636 // Invalid console info
637 #[error("Invalid console info")]
638 InvalidConsoleInfo,
639
640 // Invalid console fd
641 #[error("Invalid console fd")]
642 InvalidConsoleFd,
643
644 /// Cannot lock images of all block devices.
645 #[error("Cannot lock images of all block devices")]
646 DiskLockError(#[source] virtio_devices::block::Error),
647 }
648
649 pub type DeviceManagerResult<T> = result::Result<T, DeviceManagerError>;
650
651 const DEVICE_MANAGER_ACPI_SIZE: usize = 0x10;
652
653 #[derive(Default)]
654 pub struct Console {
655 console_resizer: Option<Arc<virtio_devices::ConsoleResizer>>,
656 }
657
658 impl Console {
need_resize(&self) -> bool659 pub fn need_resize(&self) -> bool {
660 if let Some(_resizer) = self.console_resizer.as_ref() {
661 return true;
662 }
663
664 false
665 }
666
update_console_size(&self)667 pub fn update_console_size(&self) {
668 if let Some(resizer) = self.console_resizer.as_ref() {
669 resizer.update_console_size()
670 }
671 }
672 }
673
674 pub(crate) struct AddressManager {
675 pub(crate) allocator: Arc<Mutex<SystemAllocator>>,
676 pub(crate) io_bus: Arc<Bus>,
677 pub(crate) mmio_bus: Arc<Bus>,
678 pub(crate) vm: Arc<dyn hypervisor::Vm>,
679 device_tree: Arc<Mutex<DeviceTree>>,
680 pci_mmio32_allocators: Vec<Arc<Mutex<AddressAllocator>>>,
681 pci_mmio64_allocators: Vec<Arc<Mutex<AddressAllocator>>>,
682 }
683
684 impl DeviceRelocation for AddressManager {
move_bar( &self, old_base: u64, new_base: u64, len: u64, pci_dev: &mut dyn PciDevice, region_type: PciBarRegionType, ) -> std::result::Result<(), std::io::Error>685 fn move_bar(
686 &self,
687 old_base: u64,
688 new_base: u64,
689 len: u64,
690 pci_dev: &mut dyn PciDevice,
691 region_type: PciBarRegionType,
692 ) -> std::result::Result<(), std::io::Error> {
693 match region_type {
694 PciBarRegionType::IoRegion => {
695 // Update system allocator
696 self.allocator
697 .lock()
698 .unwrap()
699 .free_io_addresses(GuestAddress(old_base), len as GuestUsize);
700
701 self.allocator
702 .lock()
703 .unwrap()
704 .allocate_io_addresses(Some(GuestAddress(new_base)), len as GuestUsize, None)
705 .ok_or_else(|| io::Error::other("failed allocating new IO range"))?;
706
707 // Update PIO bus
708 self.io_bus
709 .update_range(old_base, len, new_base, len)
710 .map_err(io::Error::other)?;
711 }
712 PciBarRegionType::Memory32BitRegion | PciBarRegionType::Memory64BitRegion => {
713 let allocators = if region_type == PciBarRegionType::Memory32BitRegion {
714 &self.pci_mmio32_allocators
715 } else {
716 &self.pci_mmio64_allocators
717 };
718
719 // Find the specific allocator that this BAR was allocated from and use it for new one
720 for allocator in allocators {
721 let allocator_base = allocator.lock().unwrap().base();
722 let allocator_end = allocator.lock().unwrap().end();
723
724 if old_base >= allocator_base.0 && old_base <= allocator_end.0 {
725 allocator
726 .lock()
727 .unwrap()
728 .free(GuestAddress(old_base), len as GuestUsize);
729
730 allocator
731 .lock()
732 .unwrap()
733 .allocate(Some(GuestAddress(new_base)), len as GuestUsize, Some(len))
734 .ok_or_else(|| io::Error::other("failed allocating new MMIO range"))?;
735
736 break;
737 }
738 }
739
740 // Update MMIO bus
741 self.mmio_bus
742 .update_range(old_base, len, new_base, len)
743 .map_err(io::Error::other)?;
744 }
745 }
746
747 // Update the device_tree resources associated with the device
748 if let Some(id) = pci_dev.id() {
749 if let Some(node) = self.device_tree.lock().unwrap().get_mut(&id) {
750 let mut resource_updated = false;
751 for resource in node.resources.iter_mut() {
752 if let Resource::PciBar { base, type_, .. } = resource {
753 if PciBarRegionType::from(*type_) == region_type && *base == old_base {
754 *base = new_base;
755 resource_updated = true;
756 break;
757 }
758 }
759 }
760
761 if !resource_updated {
762 return Err(io::Error::other(format!(
763 "Couldn't find a resource with base 0x{old_base:x} for device {id}"
764 )));
765 }
766 } else {
767 return Err(io::Error::other(format!(
768 "Couldn't find device {id} from device tree"
769 )));
770 }
771 }
772
773 let any_dev = pci_dev.as_any_mut();
774 if let Some(virtio_pci_dev) = any_dev.downcast_ref::<VirtioPciDevice>() {
775 let bar_addr = virtio_pci_dev.config_bar_addr();
776 if bar_addr == new_base {
777 for (event, addr) in virtio_pci_dev.ioeventfds(old_base) {
778 let io_addr = IoEventAddress::Mmio(addr);
779 self.vm.unregister_ioevent(event, &io_addr).map_err(|e| {
780 io::Error::other(format!("failed to unregister ioevent: {e:?}"))
781 })?;
782 }
783 for (event, addr) in virtio_pci_dev.ioeventfds(new_base) {
784 let io_addr = IoEventAddress::Mmio(addr);
785 self.vm
786 .register_ioevent(event, &io_addr, None)
787 .map_err(|e| {
788 io::Error::other(format!("failed to register ioevent: {e:?}"))
789 })?;
790 }
791 } else {
792 let virtio_dev = virtio_pci_dev.virtio_device();
793 let mut virtio_dev = virtio_dev.lock().unwrap();
794 if let Some(mut shm_regions) = virtio_dev.get_shm_regions() {
795 if shm_regions.addr.raw_value() == old_base {
796 let mem_region = self.vm.make_user_memory_region(
797 shm_regions.mem_slot,
798 old_base,
799 shm_regions.len,
800 shm_regions.host_addr,
801 false,
802 false,
803 );
804
805 self.vm.remove_user_memory_region(mem_region).map_err(|e| {
806 io::Error::other(format!("failed to remove user memory region: {e:?}"))
807 })?;
808
809 // Create new mapping by inserting new region to KVM.
810 let mem_region = self.vm.make_user_memory_region(
811 shm_regions.mem_slot,
812 new_base,
813 shm_regions.len,
814 shm_regions.host_addr,
815 false,
816 false,
817 );
818
819 self.vm.create_user_memory_region(mem_region).map_err(|e| {
820 io::Error::other(format!("failed to create user memory regions: {e:?}"))
821 })?;
822
823 // Update shared memory regions to reflect the new mapping.
824 shm_regions.addr = GuestAddress(new_base);
825 virtio_dev.set_shm_regions(shm_regions).map_err(|e| {
826 io::Error::other(format!(
827 "failed to update shared memory regions: {e:?}"
828 ))
829 })?;
830 }
831 }
832 }
833 }
834
835 pci_dev.move_bar(old_base, new_base)
836 }
837 }
838
839 #[derive(Serialize, Deserialize)]
840 struct DeviceManagerState {
841 device_tree: DeviceTree,
842 device_id_cnt: Wrapping<usize>,
843 }
844
845 #[derive(Debug)]
846 pub struct PtyPair {
847 pub main: File,
848 pub path: PathBuf,
849 }
850
851 impl Clone for PtyPair {
clone(&self) -> Self852 fn clone(&self) -> Self {
853 PtyPair {
854 main: self.main.try_clone().unwrap(),
855 path: self.path.clone(),
856 }
857 }
858 }
859
860 #[derive(Clone)]
861 pub enum PciDeviceHandle {
862 Vfio(Arc<Mutex<VfioPciDevice>>),
863 Virtio(Arc<Mutex<VirtioPciDevice>>),
864 VfioUser(Arc<Mutex<VfioUserPciDevice>>),
865 }
866
867 #[derive(Clone)]
868 struct MetaVirtioDevice {
869 virtio_device: Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
870 iommu: bool,
871 id: String,
872 pci_segment: u16,
873 dma_handler: Option<Arc<dyn ExternalDmaMapping>>,
874 }
875
876 #[derive(Default)]
877 pub struct AcpiPlatformAddresses {
878 pub pm_timer_address: Option<GenericAddress>,
879 pub reset_reg_address: Option<GenericAddress>,
880 pub sleep_control_reg_address: Option<GenericAddress>,
881 pub sleep_status_reg_address: Option<GenericAddress>,
882 }
883
884 #[cfg(all(feature = "mshv", feature = "sev_snp"))]
885 struct SevSnpPageAccessProxy {
886 vm: Arc<dyn hypervisor::Vm>,
887 }
888
889 #[cfg(all(feature = "mshv", feature = "sev_snp"))]
890 impl std::fmt::Debug for SevSnpPageAccessProxy {
fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result891 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
892 write!(f, "SNP Page access proxy")
893 }
894 }
895
896 #[cfg(all(feature = "mshv", feature = "sev_snp"))]
897 impl SevSnpPageAccessProxy {
new(vm: Arc<dyn hypervisor::Vm>) -> SevSnpPageAccessProxy898 fn new(vm: Arc<dyn hypervisor::Vm>) -> SevSnpPageAccessProxy {
899 SevSnpPageAccessProxy { vm }
900 }
901 }
902
903 #[cfg(all(feature = "mshv", feature = "sev_snp"))]
904 impl AccessPlatform for SevSnpPageAccessProxy {
translate_gpa(&self, base: u64, _size: u64) -> std::result::Result<u64, std::io::Error>905 fn translate_gpa(&self, base: u64, _size: u64) -> std::result::Result<u64, std::io::Error> {
906 Ok(base)
907 }
908
translate_gva(&self, base: u64, size: u64) -> std::result::Result<u64, std::io::Error>909 fn translate_gva(&self, base: u64, size: u64) -> std::result::Result<u64, std::io::Error> {
910 self.vm
911 .gain_page_access(base, size as u32)
912 .map_err(io::Error::other)?;
913 Ok(base)
914 }
915 }
916
917 pub struct DeviceManager {
918 // Manage address space related to devices
919 address_manager: Arc<AddressManager>,
920
921 // Console abstraction
922 console: Arc<Console>,
923
924 // Serial Manager
925 serial_manager: Option<Arc<SerialManager>>,
926
927 // pty foreground status,
928 console_resize_pipe: Option<Arc<File>>,
929
930 // To restore on exit.
931 original_termios_opt: Arc<Mutex<Option<termios>>>,
932
933 // Interrupt controller
934 #[cfg(target_arch = "x86_64")]
935 interrupt_controller: Option<Arc<Mutex<ioapic::Ioapic>>>,
936 #[cfg(target_arch = "aarch64")]
937 interrupt_controller: Option<Arc<Mutex<gic::Gic>>>,
938 #[cfg(target_arch = "riscv64")]
939 interrupt_controller: Option<Arc<Mutex<aia::Aia>>>,
940
941 // Things to be added to the commandline (e.g. aarch64 or riscv64 early console)
942 #[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))]
943 cmdline_additions: Vec<String>,
944
945 // ACPI GED notification device
946 ged_notification_device: Option<Arc<Mutex<devices::AcpiGedDevice>>>,
947
948 // VM configuration
949 config: Arc<Mutex<VmConfig>>,
950
951 // Memory Manager
952 memory_manager: Arc<Mutex<MemoryManager>>,
953
954 // CPU Manager
955 cpu_manager: Arc<Mutex<CpuManager>>,
956
957 // The virtio devices on the system
958 virtio_devices: Vec<MetaVirtioDevice>,
959
960 /// All disks. Needed for locking and unlocking the images.
961 block_devices: Vec<Arc<Mutex<Block>>>,
962
963 // List of bus devices
964 // Let the DeviceManager keep strong references to the BusDevice devices.
965 // This allows the IO and MMIO buses to be provided with Weak references,
966 // which prevents cyclic dependencies.
967 bus_devices: Vec<Arc<dyn BusDeviceSync>>,
968
969 // Counter to keep track of the consumed device IDs.
970 device_id_cnt: Wrapping<usize>,
971
972 pci_segments: Vec<PciSegment>,
973
974 #[cfg_attr(target_arch = "aarch64", allow(dead_code))]
975 // MSI Interrupt Manager
976 msi_interrupt_manager: Arc<dyn InterruptManager<GroupConfig = MsiIrqGroupConfig>>,
977
978 #[cfg_attr(feature = "mshv", allow(dead_code))]
979 // Legacy Interrupt Manager
980 legacy_interrupt_manager: Option<Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>>,
981
982 // Passthrough device handle
983 passthrough_device: Option<VfioDeviceFd>,
984
985 // VFIO container
986 // Only one container can be created, therefore it is stored as part of the
987 // DeviceManager to be reused.
988 vfio_container: Option<Arc<VfioContainer>>,
989
990 // Paravirtualized IOMMU
991 iommu_device: Option<Arc<Mutex<virtio_devices::Iommu>>>,
992 iommu_mapping: Option<Arc<IommuMapping>>,
993
994 // PCI information about devices attached to the paravirtualized IOMMU
995 // It contains the virtual IOMMU PCI BDF along with the list of PCI BDF
996 // representing the devices attached to the virtual IOMMU. This is useful
997 // information for filling the ACPI VIOT table.
998 iommu_attached_devices: Option<(PciBdf, Vec<PciBdf>)>,
999
1000 // Tree of devices, representing the dependencies between devices.
1001 // Useful for introspection, snapshot and restore.
1002 device_tree: Arc<Mutex<DeviceTree>>,
1003
1004 // Exit event
1005 exit_evt: EventFd,
1006 reset_evt: EventFd,
1007
1008 #[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))]
1009 id_to_dev_info: HashMap<(DeviceType, String), MmioDeviceInfo>,
1010
1011 // seccomp action
1012 seccomp_action: SeccompAction,
1013
1014 // List of guest NUMA nodes.
1015 numa_nodes: NumaNodes,
1016
1017 // Possible handle to the virtio-balloon device
1018 balloon: Option<Arc<Mutex<virtio_devices::Balloon>>>,
1019
1020 // Virtio Device activation EventFd to allow the VMM thread to trigger device
1021 // activation and thus start the threads from the VMM thread
1022 activate_evt: EventFd,
1023
1024 #[cfg(not(target_arch = "riscv64"))]
1025 acpi_address: GuestAddress,
1026
1027 selected_segment: usize,
1028
1029 // Possible handle to the virtio-mem device
1030 virtio_mem_devices: Vec<Arc<Mutex<virtio_devices::Mem>>>,
1031
1032 #[cfg(target_arch = "aarch64")]
1033 // GPIO device for AArch64
1034 gpio_device: Option<Arc<Mutex<devices::legacy::Gpio>>>,
1035
1036 #[cfg(feature = "pvmemcontrol")]
1037 pvmemcontrol_devices: Option<(
1038 Arc<PvmemcontrolBusDevice>,
1039 Arc<Mutex<PvmemcontrolPciDevice>>,
1040 )>,
1041
1042 // pvpanic device
1043 pvpanic_device: Option<Arc<Mutex<devices::PvPanicDevice>>>,
1044
1045 // Flag to force setting the iommu on virtio devices
1046 force_iommu: bool,
1047
1048 // io_uring availability if detected
1049 io_uring_supported: Option<bool>,
1050
1051 // aio availability if detected
1052 aio_supported: Option<bool>,
1053
1054 // List of unique identifiers provided at boot through the configuration.
1055 boot_id_list: BTreeSet<String>,
1056
1057 #[cfg(not(target_arch = "riscv64"))]
1058 // Start time of the VM
1059 timestamp: Instant,
1060
1061 // Pending activations
1062 pending_activations: Arc<Mutex<Vec<VirtioPciDeviceActivator>>>,
1063
1064 #[cfg(not(target_arch = "riscv64"))]
1065 // Addresses for ACPI platform devices e.g. ACPI PM timer, sleep/reset registers
1066 acpi_platform_addresses: AcpiPlatformAddresses,
1067
1068 snapshot: Option<Snapshot>,
1069
1070 rate_limit_groups: HashMap<String, Arc<RateLimiterGroup>>,
1071
1072 mmio_regions: Arc<Mutex<Vec<MmioRegion>>>,
1073 }
1074
create_mmio_allocators( start: u64, end: u64, num_pci_segments: u16, weights: Vec<u32>, alignment: u64, ) -> Vec<Arc<Mutex<AddressAllocator>>>1075 fn create_mmio_allocators(
1076 start: u64,
1077 end: u64,
1078 num_pci_segments: u16,
1079 weights: Vec<u32>,
1080 alignment: u64,
1081 ) -> Vec<Arc<Mutex<AddressAllocator>>> {
1082 let total_weight: u32 = weights.iter().sum();
1083
1084 // Start each PCI segment mmio range on an aligned boundary
1085 let pci_segment_mmio_size = (end - start + 1) / (alignment * total_weight as u64) * alignment;
1086
1087 let mut mmio_allocators = vec![];
1088 let mut i = 0;
1089 for segment_id in 0..num_pci_segments as u64 {
1090 let weight = weights[segment_id as usize] as u64;
1091 let mmio_start = start + i * pci_segment_mmio_size;
1092 let mmio_size = pci_segment_mmio_size * weight;
1093 let allocator = Arc::new(Mutex::new(
1094 AddressAllocator::new(GuestAddress(mmio_start), mmio_size).unwrap(),
1095 ));
1096 mmio_allocators.push(allocator);
1097 i += weight;
1098 }
1099
1100 mmio_allocators
1101 }
1102
1103 impl DeviceManager {
1104 #[allow(clippy::too_many_arguments)]
new( io_bus: Arc<Bus>, mmio_bus: Arc<Bus>, vm: Arc<dyn hypervisor::Vm>, config: Arc<Mutex<VmConfig>>, memory_manager: Arc<Mutex<MemoryManager>>, cpu_manager: Arc<Mutex<CpuManager>>, exit_evt: EventFd, reset_evt: EventFd, seccomp_action: SeccompAction, numa_nodes: NumaNodes, activate_evt: &EventFd, force_iommu: bool, boot_id_list: BTreeSet<String>, #[cfg(not(target_arch = "riscv64"))] timestamp: Instant, snapshot: Option<Snapshot>, dynamic: bool, ) -> DeviceManagerResult<Arc<Mutex<Self>>>1105 pub fn new(
1106 io_bus: Arc<Bus>,
1107 mmio_bus: Arc<Bus>,
1108 vm: Arc<dyn hypervisor::Vm>,
1109 config: Arc<Mutex<VmConfig>>,
1110 memory_manager: Arc<Mutex<MemoryManager>>,
1111 cpu_manager: Arc<Mutex<CpuManager>>,
1112 exit_evt: EventFd,
1113 reset_evt: EventFd,
1114 seccomp_action: SeccompAction,
1115 numa_nodes: NumaNodes,
1116 activate_evt: &EventFd,
1117 force_iommu: bool,
1118 boot_id_list: BTreeSet<String>,
1119 #[cfg(not(target_arch = "riscv64"))] timestamp: Instant,
1120 snapshot: Option<Snapshot>,
1121 dynamic: bool,
1122 ) -> DeviceManagerResult<Arc<Mutex<Self>>> {
1123 trace_scoped!("DeviceManager::new");
1124
1125 let (device_tree, device_id_cnt) = if let Some(snapshot) = snapshot.as_ref() {
1126 let state: DeviceManagerState = snapshot.to_state().unwrap();
1127 (
1128 Arc::new(Mutex::new(state.device_tree.clone())),
1129 state.device_id_cnt,
1130 )
1131 } else {
1132 (Arc::new(Mutex::new(DeviceTree::new())), Wrapping(0))
1133 };
1134
1135 let num_pci_segments =
1136 if let Some(platform_config) = config.lock().unwrap().platform.as_ref() {
1137 platform_config.num_pci_segments
1138 } else {
1139 1
1140 };
1141
1142 let mut mmio32_aperture_weights: Vec<u32> =
1143 std::iter::repeat_n(DEFAULT_PCI_SEGMENT_APERTURE_WEIGHT, num_pci_segments.into())
1144 .collect();
1145 if let Some(pci_segments) = &config.lock().unwrap().pci_segments {
1146 for pci_segment in pci_segments.iter() {
1147 mmio32_aperture_weights[pci_segment.pci_segment as usize] =
1148 pci_segment.mmio32_aperture_weight
1149 }
1150 }
1151
1152 let start_of_mmio32_area = layout::MEM_32BIT_DEVICES_START.0;
1153 let end_of_mmio32_area = layout::MEM_32BIT_DEVICES_START.0 + layout::MEM_32BIT_DEVICES_SIZE;
1154 let pci_mmio32_allocators = create_mmio_allocators(
1155 start_of_mmio32_area,
1156 end_of_mmio32_area,
1157 num_pci_segments,
1158 mmio32_aperture_weights,
1159 4 << 10,
1160 );
1161
1162 let mut mmio64_aperture_weights: Vec<u32> =
1163 std::iter::repeat_n(DEFAULT_PCI_SEGMENT_APERTURE_WEIGHT, num_pci_segments.into())
1164 .collect();
1165 if let Some(pci_segments) = &config.lock().unwrap().pci_segments {
1166 for pci_segment in pci_segments.iter() {
1167 mmio64_aperture_weights[pci_segment.pci_segment as usize] =
1168 pci_segment.mmio64_aperture_weight
1169 }
1170 }
1171
1172 let start_of_mmio64_area = memory_manager.lock().unwrap().start_of_device_area().0;
1173 let end_of_mmio64_area = memory_manager.lock().unwrap().end_of_device_area().0;
1174 let pci_mmio64_allocators = create_mmio_allocators(
1175 start_of_mmio64_area,
1176 end_of_mmio64_area,
1177 num_pci_segments,
1178 mmio64_aperture_weights,
1179 4 << 30,
1180 );
1181
1182 let address_manager = Arc::new(AddressManager {
1183 allocator: memory_manager.lock().unwrap().allocator(),
1184 io_bus,
1185 mmio_bus,
1186 vm: vm.clone(),
1187 device_tree: Arc::clone(&device_tree),
1188 pci_mmio32_allocators,
1189 pci_mmio64_allocators,
1190 });
1191
1192 // First we create the MSI interrupt manager, the legacy one is created
1193 // later, after the IOAPIC device creation.
1194 // The reason we create the MSI one first is because the IOAPIC needs it,
1195 // and then the legacy interrupt manager needs an IOAPIC. So we're
1196 // handling a linear dependency chain:
1197 // msi_interrupt_manager <- IOAPIC <- legacy_interrupt_manager.
1198 let msi_interrupt_manager: Arc<dyn InterruptManager<GroupConfig = MsiIrqGroupConfig>> =
1199 Arc::new(MsiInterruptManager::new(
1200 Arc::clone(&address_manager.allocator),
1201 vm,
1202 ));
1203
1204 let acpi_address = address_manager
1205 .allocator
1206 .lock()
1207 .unwrap()
1208 .allocate_platform_mmio_addresses(None, DEVICE_MANAGER_ACPI_SIZE as u64, None)
1209 .ok_or(DeviceManagerError::AllocateIoPort)?;
1210
1211 let mut pci_irq_slots = [0; 32];
1212 PciSegment::reserve_legacy_interrupts_for_pci_devices(
1213 &address_manager,
1214 &mut pci_irq_slots,
1215 )?;
1216
1217 let mut pci_segments = vec![PciSegment::new_default_segment(
1218 &address_manager,
1219 Arc::clone(&address_manager.pci_mmio32_allocators[0]),
1220 Arc::clone(&address_manager.pci_mmio64_allocators[0]),
1221 &pci_irq_slots,
1222 )?];
1223
1224 for i in 1..num_pci_segments as usize {
1225 pci_segments.push(PciSegment::new(
1226 i as u16,
1227 numa_node_id_from_pci_segment_id(&numa_nodes, i as u16),
1228 &address_manager,
1229 Arc::clone(&address_manager.pci_mmio32_allocators[i]),
1230 Arc::clone(&address_manager.pci_mmio64_allocators[i]),
1231 &pci_irq_slots,
1232 )?);
1233 }
1234
1235 if dynamic {
1236 let acpi_address = address_manager
1237 .allocator
1238 .lock()
1239 .unwrap()
1240 .allocate_platform_mmio_addresses(None, CPU_MANAGER_ACPI_SIZE as u64, None)
1241 .ok_or(DeviceManagerError::AllocateMmioAddress)?;
1242
1243 address_manager
1244 .mmio_bus
1245 .insert(
1246 cpu_manager.clone(),
1247 acpi_address.0,
1248 CPU_MANAGER_ACPI_SIZE as u64,
1249 )
1250 .map_err(DeviceManagerError::BusError)?;
1251
1252 cpu_manager.lock().unwrap().set_acpi_address(acpi_address);
1253 }
1254
1255 let mut rate_limit_groups = HashMap::<String, Arc<RateLimiterGroup>>::new();
1256 if let Some(rate_limit_groups_cfg) = config.lock().unwrap().rate_limit_groups.as_ref() {
1257 for rate_limit_group_cfg in rate_limit_groups_cfg {
1258 let rate_limit_cfg = rate_limit_group_cfg.rate_limiter_config;
1259 let bw = rate_limit_cfg.bandwidth.unwrap_or_default();
1260 let ops = rate_limit_cfg.ops.unwrap_or_default();
1261 let mut rate_limit_group = RateLimiterGroup::new(
1262 &rate_limit_group_cfg.id,
1263 bw.size,
1264 bw.one_time_burst.unwrap_or(0),
1265 bw.refill_time,
1266 ops.size,
1267 ops.one_time_burst.unwrap_or(0),
1268 ops.refill_time,
1269 )
1270 .map_err(DeviceManagerError::RateLimiterGroupCreate)?;
1271
1272 let exit_evt = exit_evt.try_clone().map_err(DeviceManagerError::EventFd)?;
1273
1274 rate_limit_group.start_thread(exit_evt).unwrap();
1275 rate_limit_groups
1276 .insert(rate_limit_group_cfg.id.clone(), Arc::new(rate_limit_group));
1277 }
1278 }
1279
1280 let device_manager = DeviceManager {
1281 address_manager: Arc::clone(&address_manager),
1282 console: Arc::new(Console::default()),
1283 interrupt_controller: None,
1284 #[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))]
1285 cmdline_additions: Vec::new(),
1286 ged_notification_device: None,
1287 config,
1288 memory_manager,
1289 cpu_manager,
1290 virtio_devices: Vec::new(),
1291 block_devices: vec![],
1292 bus_devices: Vec::new(),
1293 device_id_cnt,
1294 msi_interrupt_manager,
1295 legacy_interrupt_manager: None,
1296 passthrough_device: None,
1297 vfio_container: None,
1298 iommu_device: None,
1299 iommu_mapping: None,
1300 iommu_attached_devices: None,
1301 pci_segments,
1302 device_tree,
1303 exit_evt,
1304 reset_evt,
1305 #[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))]
1306 id_to_dev_info: HashMap::new(),
1307 seccomp_action,
1308 numa_nodes,
1309 balloon: None,
1310 activate_evt: activate_evt
1311 .try_clone()
1312 .map_err(DeviceManagerError::EventFd)?,
1313 #[cfg(not(target_arch = "riscv64"))]
1314 acpi_address,
1315 selected_segment: 0,
1316 serial_manager: None,
1317 console_resize_pipe: None,
1318 original_termios_opt: Arc::new(Mutex::new(None)),
1319 virtio_mem_devices: Vec::new(),
1320 #[cfg(target_arch = "aarch64")]
1321 gpio_device: None,
1322 #[cfg(feature = "pvmemcontrol")]
1323 pvmemcontrol_devices: None,
1324 pvpanic_device: None,
1325 force_iommu,
1326 io_uring_supported: None,
1327 aio_supported: None,
1328 boot_id_list,
1329 #[cfg(not(target_arch = "riscv64"))]
1330 timestamp,
1331 pending_activations: Arc::new(Mutex::new(Vec::default())),
1332 #[cfg(not(target_arch = "riscv64"))]
1333 acpi_platform_addresses: AcpiPlatformAddresses::default(),
1334 snapshot,
1335 rate_limit_groups,
1336 mmio_regions: Arc::new(Mutex::new(Vec::new())),
1337 };
1338
1339 let device_manager = Arc::new(Mutex::new(device_manager));
1340
1341 address_manager
1342 .mmio_bus
1343 .insert(
1344 Arc::clone(&device_manager) as Arc<dyn BusDeviceSync>,
1345 acpi_address.0,
1346 DEVICE_MANAGER_ACPI_SIZE as u64,
1347 )
1348 .map_err(DeviceManagerError::BusError)?;
1349
1350 Ok(device_manager)
1351 }
1352
console_resize_pipe(&self) -> Option<Arc<File>>1353 pub fn console_resize_pipe(&self) -> Option<Arc<File>> {
1354 self.console_resize_pipe.clone()
1355 }
1356
create_interrupt_controller( &mut self, ) -> DeviceManagerResult<Arc<Mutex<dyn InterruptController>>>1357 pub fn create_interrupt_controller(
1358 &mut self,
1359 ) -> DeviceManagerResult<Arc<Mutex<dyn InterruptController>>> {
1360 self.add_interrupt_controller()
1361 }
1362
create_devices( &mut self, console_info: Option<ConsoleInfo>, console_resize_pipe: Option<Arc<File>>, original_termios_opt: Arc<Mutex<Option<termios>>>, interrupt_controller: Arc<Mutex<dyn InterruptController>>, ) -> DeviceManagerResult<()>1363 pub fn create_devices(
1364 &mut self,
1365 console_info: Option<ConsoleInfo>,
1366 console_resize_pipe: Option<Arc<File>>,
1367 original_termios_opt: Arc<Mutex<Option<termios>>>,
1368 interrupt_controller: Arc<Mutex<dyn InterruptController>>,
1369 ) -> DeviceManagerResult<()> {
1370 trace_scoped!("create_devices");
1371
1372 let mut virtio_devices: Vec<MetaVirtioDevice> = Vec::new();
1373
1374 self.cpu_manager
1375 .lock()
1376 .unwrap()
1377 .set_interrupt_controller(interrupt_controller.clone());
1378
1379 // Now we can create the legacy interrupt manager, which needs the freshly
1380 // formed IOAPIC device.
1381 let legacy_interrupt_manager: Arc<
1382 dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>,
1383 > = Arc::new(LegacyUserspaceInterruptManager::new(Arc::clone(
1384 &interrupt_controller,
1385 )));
1386
1387 {
1388 if let Some(acpi_address) = self.memory_manager.lock().unwrap().acpi_address() {
1389 self.address_manager
1390 .mmio_bus
1391 .insert(
1392 Arc::clone(&self.memory_manager) as Arc<dyn BusDeviceSync>,
1393 acpi_address.0,
1394 MEMORY_MANAGER_ACPI_SIZE as u64,
1395 )
1396 .map_err(DeviceManagerError::BusError)?;
1397 }
1398 }
1399
1400 #[cfg(target_arch = "x86_64")]
1401 self.add_legacy_devices(
1402 self.reset_evt
1403 .try_clone()
1404 .map_err(DeviceManagerError::EventFd)?,
1405 )?;
1406
1407 #[cfg(target_arch = "aarch64")]
1408 self.add_legacy_devices(&legacy_interrupt_manager)?;
1409
1410 {
1411 self.ged_notification_device = self.add_acpi_devices(
1412 &legacy_interrupt_manager,
1413 self.reset_evt
1414 .try_clone()
1415 .map_err(DeviceManagerError::EventFd)?,
1416 self.exit_evt
1417 .try_clone()
1418 .map_err(DeviceManagerError::EventFd)?,
1419 )?;
1420 }
1421
1422 self.original_termios_opt = original_termios_opt;
1423
1424 self.console = self.add_console_devices(
1425 &legacy_interrupt_manager,
1426 &mut virtio_devices,
1427 console_info,
1428 console_resize_pipe,
1429 )?;
1430
1431 #[cfg(not(target_arch = "riscv64"))]
1432 if let Some(tpm) = self.config.clone().lock().unwrap().tpm.as_ref() {
1433 let tpm_dev = self.add_tpm_device(tpm.socket.clone())?;
1434 self.bus_devices
1435 .push(Arc::clone(&tpm_dev) as Arc<dyn BusDeviceSync>)
1436 }
1437 self.legacy_interrupt_manager = Some(legacy_interrupt_manager);
1438
1439 virtio_devices.append(&mut self.make_virtio_devices()?);
1440
1441 self.add_pci_devices(virtio_devices.clone())?;
1442
1443 self.virtio_devices = virtio_devices;
1444
1445 // Add pvmemcontrol if required
1446 #[cfg(feature = "pvmemcontrol")]
1447 {
1448 if self.config.lock().unwrap().pvmemcontrol.is_some() {
1449 let (pvmemcontrol_bus_device, pvmemcontrol_pci_device) =
1450 self.make_pvmemcontrol_device()?;
1451 self.pvmemcontrol_devices =
1452 Some((pvmemcontrol_bus_device, pvmemcontrol_pci_device));
1453 }
1454 }
1455
1456 if self.config.clone().lock().unwrap().pvpanic {
1457 self.pvpanic_device = self.add_pvpanic_device()?;
1458 }
1459
1460 Ok(())
1461 }
1462
state(&self) -> DeviceManagerState1463 fn state(&self) -> DeviceManagerState {
1464 DeviceManagerState {
1465 device_tree: self.device_tree.lock().unwrap().clone(),
1466 device_id_cnt: self.device_id_cnt,
1467 }
1468 }
1469
get_msi_iova_space(&mut self) -> (u64, u64)1470 fn get_msi_iova_space(&mut self) -> (u64, u64) {
1471 #[cfg(target_arch = "aarch64")]
1472 {
1473 let vcpus = self.config.lock().unwrap().cpus.boot_vcpus;
1474 let vgic_config = gic::Gic::create_default_config(vcpus.into());
1475 (
1476 vgic_config.msi_addr,
1477 vgic_config.msi_addr + vgic_config.msi_size - 1,
1478 )
1479 }
1480 #[cfg(target_arch = "riscv64")]
1481 {
1482 let vcpus = self.config.lock().unwrap().cpus.boot_vcpus;
1483 let vaia_config = aia::Aia::create_default_config(vcpus.into());
1484 (
1485 vaia_config.imsic_addr,
1486 vaia_config.imsic_addr + vaia_config.vcpu_count as u64 * arch::layout::IMSIC_SIZE
1487 - 1,
1488 )
1489 }
1490 #[cfg(target_arch = "x86_64")]
1491 (0xfee0_0000, 0xfeef_ffff)
1492 }
1493
1494 #[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))]
1495 /// Gets the information of the devices registered up to some point in time.
get_device_info(&self) -> &HashMap<(DeviceType, String), MmioDeviceInfo>1496 pub fn get_device_info(&self) -> &HashMap<(DeviceType, String), MmioDeviceInfo> {
1497 &self.id_to_dev_info
1498 }
1499
1500 #[allow(unused_variables)]
add_pci_devices( &mut self, virtio_devices: Vec<MetaVirtioDevice>, ) -> DeviceManagerResult<()>1501 fn add_pci_devices(
1502 &mut self,
1503 virtio_devices: Vec<MetaVirtioDevice>,
1504 ) -> DeviceManagerResult<()> {
1505 let iommu_id = String::from(IOMMU_DEVICE_NAME);
1506
1507 let iommu_address_width_bits =
1508 if let Some(ref platform) = self.config.lock().unwrap().platform {
1509 platform.iommu_address_width_bits
1510 } else {
1511 DEFAULT_IOMMU_ADDRESS_WIDTH_BITS
1512 };
1513
1514 let iommu_device = if self.config.lock().unwrap().iommu {
1515 let (device, mapping) = virtio_devices::Iommu::new(
1516 iommu_id.clone(),
1517 self.seccomp_action.clone(),
1518 self.exit_evt
1519 .try_clone()
1520 .map_err(DeviceManagerError::EventFd)?,
1521 self.get_msi_iova_space(),
1522 iommu_address_width_bits,
1523 state_from_id(self.snapshot.as_ref(), iommu_id.as_str())
1524 .map_err(DeviceManagerError::RestoreGetState)?,
1525 )
1526 .map_err(DeviceManagerError::CreateVirtioIommu)?;
1527 let device = Arc::new(Mutex::new(device));
1528 self.iommu_device = Some(Arc::clone(&device));
1529 self.iommu_mapping = Some(mapping);
1530
1531 // Fill the device tree with a new node. In case of restore, we
1532 // know there is nothing to do, so we can simply override the
1533 // existing entry.
1534 self.device_tree
1535 .lock()
1536 .unwrap()
1537 .insert(iommu_id.clone(), device_node!(iommu_id, device));
1538
1539 Some(device)
1540 } else {
1541 None
1542 };
1543
1544 let mut iommu_attached_devices = Vec::new();
1545 {
1546 for handle in virtio_devices {
1547 let mapping: Option<Arc<IommuMapping>> = if handle.iommu {
1548 self.iommu_mapping.clone()
1549 } else {
1550 None
1551 };
1552
1553 let dev_id = self.add_virtio_pci_device(
1554 handle.virtio_device,
1555 &mapping,
1556 handle.id,
1557 handle.pci_segment,
1558 handle.dma_handler,
1559 )?;
1560
1561 if handle.iommu {
1562 iommu_attached_devices.push(dev_id);
1563 }
1564 }
1565
1566 let mut vfio_iommu_device_ids = self.add_vfio_devices()?;
1567 iommu_attached_devices.append(&mut vfio_iommu_device_ids);
1568
1569 let mut vfio_user_iommu_device_ids = self.add_user_devices()?;
1570 iommu_attached_devices.append(&mut vfio_user_iommu_device_ids);
1571
1572 // Add all devices from forced iommu segments
1573 if let Some(platform_config) = self.config.lock().unwrap().platform.as_ref() {
1574 if let Some(iommu_segments) = platform_config.iommu_segments.as_ref() {
1575 for segment in iommu_segments {
1576 for device in 0..32 {
1577 let bdf = PciBdf::new(*segment, 0, device, 0);
1578 if !iommu_attached_devices.contains(&bdf) {
1579 iommu_attached_devices.push(bdf);
1580 }
1581 }
1582 }
1583 }
1584 }
1585
1586 if let Some(iommu_device) = iommu_device {
1587 let dev_id = self.add_virtio_pci_device(iommu_device, &None, iommu_id, 0, None)?;
1588 self.iommu_attached_devices = Some((dev_id, iommu_attached_devices));
1589 }
1590 }
1591
1592 for segment in &self.pci_segments {
1593 #[cfg(target_arch = "x86_64")]
1594 if let Some(pci_config_io) = segment.pci_config_io.as_ref() {
1595 self.bus_devices
1596 .push(Arc::clone(pci_config_io) as Arc<dyn BusDeviceSync>);
1597 }
1598
1599 self.bus_devices
1600 .push(Arc::clone(&segment.pci_config_mmio) as Arc<dyn BusDeviceSync>);
1601 }
1602
1603 Ok(())
1604 }
1605
1606 #[cfg(target_arch = "aarch64")]
add_interrupt_controller( &mut self, ) -> DeviceManagerResult<Arc<Mutex<dyn InterruptController>>>1607 fn add_interrupt_controller(
1608 &mut self,
1609 ) -> DeviceManagerResult<Arc<Mutex<dyn InterruptController>>> {
1610 let interrupt_controller: Arc<Mutex<gic::Gic>> = Arc::new(Mutex::new(
1611 gic::Gic::new(
1612 self.config.lock().unwrap().cpus.boot_vcpus,
1613 Arc::clone(&self.msi_interrupt_manager),
1614 self.address_manager.vm.clone(),
1615 )
1616 .map_err(DeviceManagerError::CreateInterruptController)?,
1617 ));
1618
1619 self.interrupt_controller = Some(interrupt_controller.clone());
1620
1621 // Restore the vGic if this is in the process of restoration
1622 let id = String::from(gic::GIC_SNAPSHOT_ID);
1623 if let Some(vgic_snapshot) = snapshot_from_id(self.snapshot.as_ref(), &id) {
1624 // PMU support is optional. Nothing should be impacted if the PMU initialization failed.
1625 if self
1626 .cpu_manager
1627 .lock()
1628 .unwrap()
1629 .init_pmu(AARCH64_PMU_IRQ + 16)
1630 .is_err()
1631 {
1632 info!("Failed to initialize PMU");
1633 }
1634
1635 let vgic_state = vgic_snapshot
1636 .to_state()
1637 .map_err(DeviceManagerError::RestoreGetState)?;
1638 let saved_vcpu_states = self.cpu_manager.lock().unwrap().get_saved_states();
1639 interrupt_controller
1640 .lock()
1641 .unwrap()
1642 .restore_vgic(vgic_state, &saved_vcpu_states)
1643 .unwrap();
1644 }
1645
1646 self.device_tree
1647 .lock()
1648 .unwrap()
1649 .insert(id.clone(), device_node!(id, interrupt_controller));
1650
1651 Ok(interrupt_controller)
1652 }
1653
1654 #[cfg(target_arch = "aarch64")]
get_interrupt_controller(&mut self) -> Option<&Arc<Mutex<gic::Gic>>>1655 pub fn get_interrupt_controller(&mut self) -> Option<&Arc<Mutex<gic::Gic>>> {
1656 self.interrupt_controller.as_ref()
1657 }
1658
1659 #[cfg(target_arch = "riscv64")]
add_interrupt_controller( &mut self, ) -> DeviceManagerResult<Arc<Mutex<dyn InterruptController>>>1660 fn add_interrupt_controller(
1661 &mut self,
1662 ) -> DeviceManagerResult<Arc<Mutex<dyn InterruptController>>> {
1663 let interrupt_controller: Arc<Mutex<aia::Aia>> = Arc::new(Mutex::new(
1664 aia::Aia::new(
1665 self.config.lock().unwrap().cpus.boot_vcpus,
1666 Arc::clone(&self.msi_interrupt_manager),
1667 self.address_manager.vm.clone(),
1668 )
1669 .map_err(DeviceManagerError::CreateInterruptController)?,
1670 ));
1671
1672 self.interrupt_controller = Some(interrupt_controller.clone());
1673
1674 // Restore the vAia if this is in the process of restoration
1675 let id = String::from(aia::_AIA_SNAPSHOT_ID);
1676 if let Some(_vaia_snapshot) = snapshot_from_id(self.snapshot.as_ref(), &id) {
1677 // TODO: vAia snapshotting and restoration is scheduled to next stage of riscv64 support.
1678 // TODO: PMU support is scheduled to next stage of riscv64 support.
1679 // PMU support is optional. Nothing should be impacted if the PMU initialization failed.
1680 unimplemented!()
1681 }
1682
1683 self.device_tree
1684 .lock()
1685 .unwrap()
1686 .insert(id.clone(), device_node!(id, interrupt_controller));
1687
1688 Ok(interrupt_controller)
1689 }
1690
1691 #[cfg(target_arch = "riscv64")]
get_interrupt_controller(&mut self) -> Option<&Arc<Mutex<aia::Aia>>>1692 pub fn get_interrupt_controller(&mut self) -> Option<&Arc<Mutex<aia::Aia>>> {
1693 self.interrupt_controller.as_ref()
1694 }
1695
1696 #[cfg(target_arch = "x86_64")]
add_interrupt_controller( &mut self, ) -> DeviceManagerResult<Arc<Mutex<dyn InterruptController>>>1697 fn add_interrupt_controller(
1698 &mut self,
1699 ) -> DeviceManagerResult<Arc<Mutex<dyn InterruptController>>> {
1700 let id = String::from(IOAPIC_DEVICE_NAME);
1701
1702 // Create IOAPIC
1703 let interrupt_controller = Arc::new(Mutex::new(
1704 ioapic::Ioapic::new(
1705 id.clone(),
1706 APIC_START,
1707 Arc::clone(&self.msi_interrupt_manager),
1708 state_from_id(self.snapshot.as_ref(), id.as_str())
1709 .map_err(DeviceManagerError::RestoreGetState)?,
1710 )
1711 .map_err(DeviceManagerError::CreateInterruptController)?,
1712 ));
1713
1714 self.interrupt_controller = Some(interrupt_controller.clone());
1715
1716 self.address_manager
1717 .mmio_bus
1718 .insert(interrupt_controller.clone(), IOAPIC_START.0, IOAPIC_SIZE)
1719 .map_err(DeviceManagerError::BusError)?;
1720
1721 self.bus_devices
1722 .push(Arc::clone(&interrupt_controller) as Arc<dyn BusDeviceSync>);
1723
1724 // Fill the device tree with a new node. In case of restore, we
1725 // know there is nothing to do, so we can simply override the
1726 // existing entry.
1727 self.device_tree
1728 .lock()
1729 .unwrap()
1730 .insert(id.clone(), device_node!(id, interrupt_controller));
1731
1732 Ok(interrupt_controller)
1733 }
1734
add_acpi_devices( &mut self, interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>, reset_evt: EventFd, exit_evt: EventFd, ) -> DeviceManagerResult<Option<Arc<Mutex<devices::AcpiGedDevice>>>>1735 fn add_acpi_devices(
1736 &mut self,
1737 interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>,
1738 reset_evt: EventFd,
1739 exit_evt: EventFd,
1740 ) -> DeviceManagerResult<Option<Arc<Mutex<devices::AcpiGedDevice>>>> {
1741 let vcpus_kill_signalled = self
1742 .cpu_manager
1743 .lock()
1744 .unwrap()
1745 .vcpus_kill_signalled()
1746 .clone();
1747 let shutdown_device = Arc::new(Mutex::new(devices::AcpiShutdownDevice::new(
1748 exit_evt,
1749 reset_evt,
1750 vcpus_kill_signalled,
1751 )));
1752
1753 self.bus_devices
1754 .push(Arc::clone(&shutdown_device) as Arc<dyn BusDeviceSync>);
1755
1756 #[cfg(target_arch = "x86_64")]
1757 {
1758 let shutdown_pio_address: u16 = 0x600;
1759
1760 self.address_manager
1761 .allocator
1762 .lock()
1763 .unwrap()
1764 .allocate_io_addresses(Some(GuestAddress(shutdown_pio_address.into())), 0x8, None)
1765 .ok_or(DeviceManagerError::AllocateIoPort)?;
1766
1767 self.address_manager
1768 .io_bus
1769 .insert(shutdown_device, shutdown_pio_address.into(), 0x4)
1770 .map_err(DeviceManagerError::BusError)?;
1771
1772 self.acpi_platform_addresses.sleep_control_reg_address =
1773 Some(GenericAddress::io_port_address::<u8>(shutdown_pio_address));
1774 self.acpi_platform_addresses.sleep_status_reg_address =
1775 Some(GenericAddress::io_port_address::<u8>(shutdown_pio_address));
1776 self.acpi_platform_addresses.reset_reg_address =
1777 Some(GenericAddress::io_port_address::<u8>(shutdown_pio_address));
1778 }
1779
1780 let ged_irq = self
1781 .address_manager
1782 .allocator
1783 .lock()
1784 .unwrap()
1785 .allocate_irq()
1786 .unwrap();
1787 let interrupt_group = interrupt_manager
1788 .create_group(LegacyIrqGroupConfig {
1789 irq: ged_irq as InterruptIndex,
1790 })
1791 .map_err(DeviceManagerError::CreateInterruptGroup)?;
1792 let ged_address = self
1793 .address_manager
1794 .allocator
1795 .lock()
1796 .unwrap()
1797 .allocate_platform_mmio_addresses(
1798 None,
1799 devices::acpi::GED_DEVICE_ACPI_SIZE as u64,
1800 None,
1801 )
1802 .ok_or(DeviceManagerError::AllocateMmioAddress)?;
1803 let ged_device = Arc::new(Mutex::new(devices::AcpiGedDevice::new(
1804 interrupt_group,
1805 ged_irq,
1806 ged_address,
1807 )));
1808 self.address_manager
1809 .mmio_bus
1810 .insert(
1811 ged_device.clone(),
1812 ged_address.0,
1813 devices::acpi::GED_DEVICE_ACPI_SIZE as u64,
1814 )
1815 .map_err(DeviceManagerError::BusError)?;
1816 self.bus_devices
1817 .push(Arc::clone(&ged_device) as Arc<dyn BusDeviceSync>);
1818
1819 let pm_timer_device = Arc::new(Mutex::new(devices::AcpiPmTimerDevice::new()));
1820
1821 self.bus_devices
1822 .push(Arc::clone(&pm_timer_device) as Arc<dyn BusDeviceSync>);
1823
1824 #[cfg(target_arch = "x86_64")]
1825 {
1826 let pm_timer_pio_address: u16 = 0x608;
1827
1828 self.address_manager
1829 .allocator
1830 .lock()
1831 .unwrap()
1832 .allocate_io_addresses(Some(GuestAddress(pm_timer_pio_address.into())), 0x4, None)
1833 .ok_or(DeviceManagerError::AllocateIoPort)?;
1834
1835 self.address_manager
1836 .io_bus
1837 .insert(pm_timer_device, pm_timer_pio_address.into(), 0x4)
1838 .map_err(DeviceManagerError::BusError)?;
1839
1840 self.acpi_platform_addresses.pm_timer_address =
1841 Some(GenericAddress::io_port_address::<u32>(pm_timer_pio_address));
1842 }
1843
1844 Ok(Some(ged_device))
1845 }
1846
1847 #[cfg(target_arch = "x86_64")]
add_legacy_devices(&mut self, reset_evt: EventFd) -> DeviceManagerResult<()>1848 fn add_legacy_devices(&mut self, reset_evt: EventFd) -> DeviceManagerResult<()> {
1849 let vcpus_kill_signalled = self
1850 .cpu_manager
1851 .lock()
1852 .unwrap()
1853 .vcpus_kill_signalled()
1854 .clone();
1855 // Add a shutdown device (i8042)
1856 let i8042 = Arc::new(Mutex::new(devices::legacy::I8042Device::new(
1857 reset_evt.try_clone().unwrap(),
1858 vcpus_kill_signalled.clone(),
1859 )));
1860
1861 self.bus_devices
1862 .push(Arc::clone(&i8042) as Arc<dyn BusDeviceSync>);
1863
1864 self.address_manager
1865 .io_bus
1866 .insert(i8042, 0x61, 0x4)
1867 .map_err(DeviceManagerError::BusError)?;
1868 {
1869 // Add a CMOS emulated device
1870 let mem_size = self
1871 .memory_manager
1872 .lock()
1873 .unwrap()
1874 .guest_memory()
1875 .memory()
1876 .last_addr()
1877 .0
1878 + 1;
1879 let mem_below_4g = std::cmp::min(arch::layout::MEM_32BIT_RESERVED_START.0, mem_size);
1880 let mem_above_4g = mem_size.saturating_sub(arch::layout::RAM_64BIT_START.0);
1881
1882 let cmos = Arc::new(Mutex::new(devices::legacy::Cmos::new(
1883 mem_below_4g,
1884 mem_above_4g,
1885 reset_evt,
1886 Some(vcpus_kill_signalled),
1887 )));
1888
1889 self.bus_devices
1890 .push(Arc::clone(&cmos) as Arc<dyn BusDeviceSync>);
1891
1892 self.address_manager
1893 .io_bus
1894 .insert(cmos, 0x70, 0x2)
1895 .map_err(DeviceManagerError::BusError)?;
1896
1897 let fwdebug = Arc::new(Mutex::new(devices::legacy::FwDebugDevice::new()));
1898
1899 self.bus_devices
1900 .push(Arc::clone(&fwdebug) as Arc<dyn BusDeviceSync>);
1901
1902 self.address_manager
1903 .io_bus
1904 .insert(fwdebug, 0x402, 0x1)
1905 .map_err(DeviceManagerError::BusError)?;
1906 }
1907
1908 // 0x80 debug port
1909 let debug_port = Arc::new(Mutex::new(devices::legacy::DebugPort::new(self.timestamp)));
1910 self.bus_devices
1911 .push(Arc::clone(&debug_port) as Arc<dyn BusDeviceSync>);
1912 self.address_manager
1913 .io_bus
1914 .insert(debug_port, 0x80, 0x1)
1915 .map_err(DeviceManagerError::BusError)?;
1916
1917 Ok(())
1918 }
1919
1920 #[cfg(target_arch = "aarch64")]
add_legacy_devices( &mut self, interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>, ) -> DeviceManagerResult<()>1921 fn add_legacy_devices(
1922 &mut self,
1923 interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>,
1924 ) -> DeviceManagerResult<()> {
1925 // Add a RTC device
1926 let rtc_irq = self
1927 .address_manager
1928 .allocator
1929 .lock()
1930 .unwrap()
1931 .allocate_irq()
1932 .unwrap();
1933
1934 let interrupt_group = interrupt_manager
1935 .create_group(LegacyIrqGroupConfig {
1936 irq: rtc_irq as InterruptIndex,
1937 })
1938 .map_err(DeviceManagerError::CreateInterruptGroup)?;
1939
1940 let rtc_device = Arc::new(Mutex::new(devices::legacy::Rtc::new(interrupt_group)));
1941
1942 self.bus_devices
1943 .push(Arc::clone(&rtc_device) as Arc<dyn BusDeviceSync>);
1944
1945 let addr = arch::layout::LEGACY_RTC_MAPPED_IO_START;
1946
1947 self.address_manager
1948 .mmio_bus
1949 .insert(rtc_device, addr.0, MMIO_LEN)
1950 .map_err(DeviceManagerError::BusError)?;
1951
1952 self.id_to_dev_info.insert(
1953 (DeviceType::Rtc, "rtc".to_string()),
1954 MmioDeviceInfo {
1955 addr: addr.0,
1956 len: MMIO_LEN,
1957 irq: rtc_irq,
1958 },
1959 );
1960
1961 // Add a GPIO device
1962 let id = String::from(GPIO_DEVICE_NAME);
1963 let gpio_irq = self
1964 .address_manager
1965 .allocator
1966 .lock()
1967 .unwrap()
1968 .allocate_irq()
1969 .unwrap();
1970
1971 let interrupt_group = interrupt_manager
1972 .create_group(LegacyIrqGroupConfig {
1973 irq: gpio_irq as InterruptIndex,
1974 })
1975 .map_err(DeviceManagerError::CreateInterruptGroup)?;
1976
1977 let gpio_device = Arc::new(Mutex::new(devices::legacy::Gpio::new(
1978 id.clone(),
1979 interrupt_group,
1980 state_from_id(self.snapshot.as_ref(), id.as_str())
1981 .map_err(DeviceManagerError::RestoreGetState)?,
1982 )));
1983
1984 self.bus_devices
1985 .push(Arc::clone(&gpio_device) as Arc<dyn BusDeviceSync>);
1986
1987 let addr = arch::layout::LEGACY_GPIO_MAPPED_IO_START;
1988
1989 self.address_manager
1990 .mmio_bus
1991 .insert(gpio_device.clone(), addr.0, MMIO_LEN)
1992 .map_err(DeviceManagerError::BusError)?;
1993
1994 self.gpio_device = Some(gpio_device.clone());
1995
1996 self.id_to_dev_info.insert(
1997 (DeviceType::Gpio, "gpio".to_string()),
1998 MmioDeviceInfo {
1999 addr: addr.0,
2000 len: MMIO_LEN,
2001 irq: gpio_irq,
2002 },
2003 );
2004
2005 self.device_tree
2006 .lock()
2007 .unwrap()
2008 .insert(id.clone(), device_node!(id, gpio_device));
2009
2010 Ok(())
2011 }
2012
2013 #[cfg(target_arch = "x86_64")]
add_debug_console_device( &mut self, debug_console_writer: Box<dyn io::Write + Send>, ) -> DeviceManagerResult<Arc<Mutex<DebugConsole>>>2014 fn add_debug_console_device(
2015 &mut self,
2016 debug_console_writer: Box<dyn io::Write + Send>,
2017 ) -> DeviceManagerResult<Arc<Mutex<DebugConsole>>> {
2018 let id = String::from(DEBUGCON_DEVICE_NAME);
2019 let debug_console = Arc::new(Mutex::new(DebugConsole::new(
2020 id.clone(),
2021 debug_console_writer,
2022 )));
2023
2024 let port = self
2025 .config
2026 .lock()
2027 .unwrap()
2028 .debug_console
2029 .clone()
2030 .iobase
2031 .map(|port| port as u64)
2032 .unwrap_or(debug_console::DEFAULT_PORT);
2033
2034 self.bus_devices
2035 .push(Arc::clone(&debug_console) as Arc<dyn BusDeviceSync>);
2036
2037 self.address_manager
2038 .allocator
2039 .lock()
2040 .unwrap()
2041 .allocate_io_addresses(Some(GuestAddress(port)), 0x1, None)
2042 .ok_or(DeviceManagerError::AllocateIoPort)?;
2043
2044 self.address_manager
2045 .io_bus
2046 .insert(debug_console.clone(), port, 0x1)
2047 .map_err(DeviceManagerError::BusError)?;
2048
2049 // Fill the device tree with a new node. In case of restore, we
2050 // know there is nothing to do, so we can simply override the
2051 // existing entry.
2052 self.device_tree
2053 .lock()
2054 .unwrap()
2055 .insert(id.clone(), device_node!(id, debug_console));
2056
2057 Ok(debug_console)
2058 }
2059
2060 #[cfg(target_arch = "x86_64")]
add_serial_device( &mut self, interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>, serial_writer: Option<Box<dyn io::Write + Send>>, ) -> DeviceManagerResult<Arc<Mutex<Serial>>>2061 fn add_serial_device(
2062 &mut self,
2063 interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>,
2064 serial_writer: Option<Box<dyn io::Write + Send>>,
2065 ) -> DeviceManagerResult<Arc<Mutex<Serial>>> {
2066 // Serial is tied to IRQ #4
2067 let serial_irq = 4;
2068
2069 let id = String::from(SERIAL_DEVICE_NAME);
2070
2071 let interrupt_group = interrupt_manager
2072 .create_group(LegacyIrqGroupConfig {
2073 irq: serial_irq as InterruptIndex,
2074 })
2075 .map_err(DeviceManagerError::CreateInterruptGroup)?;
2076
2077 let serial = Arc::new(Mutex::new(Serial::new(
2078 id.clone(),
2079 interrupt_group,
2080 serial_writer,
2081 state_from_id(self.snapshot.as_ref(), id.as_str())
2082 .map_err(DeviceManagerError::RestoreGetState)?,
2083 )));
2084
2085 self.bus_devices
2086 .push(Arc::clone(&serial) as Arc<dyn BusDeviceSync>);
2087
2088 self.address_manager
2089 .allocator
2090 .lock()
2091 .unwrap()
2092 .allocate_io_addresses(Some(GuestAddress(0x3f8)), 0x8, None)
2093 .ok_or(DeviceManagerError::AllocateIoPort)?;
2094
2095 self.address_manager
2096 .io_bus
2097 .insert(serial.clone(), 0x3f8, 0x8)
2098 .map_err(DeviceManagerError::BusError)?;
2099
2100 // Fill the device tree with a new node. In case of restore, we
2101 // know there is nothing to do, so we can simply override the
2102 // existing entry.
2103 self.device_tree
2104 .lock()
2105 .unwrap()
2106 .insert(id.clone(), device_node!(id, serial));
2107
2108 Ok(serial)
2109 }
2110
2111 #[cfg(target_arch = "aarch64")]
add_serial_device( &mut self, interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>, serial_writer: Option<Box<dyn io::Write + Send>>, ) -> DeviceManagerResult<Arc<Mutex<Pl011>>>2112 fn add_serial_device(
2113 &mut self,
2114 interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>,
2115 serial_writer: Option<Box<dyn io::Write + Send>>,
2116 ) -> DeviceManagerResult<Arc<Mutex<Pl011>>> {
2117 let id = String::from(SERIAL_DEVICE_NAME);
2118
2119 let serial_irq = self
2120 .address_manager
2121 .allocator
2122 .lock()
2123 .unwrap()
2124 .allocate_irq()
2125 .unwrap();
2126
2127 let interrupt_group = interrupt_manager
2128 .create_group(LegacyIrqGroupConfig {
2129 irq: serial_irq as InterruptIndex,
2130 })
2131 .map_err(DeviceManagerError::CreateInterruptGroup)?;
2132
2133 let serial = Arc::new(Mutex::new(devices::legacy::Pl011::new(
2134 id.clone(),
2135 interrupt_group,
2136 serial_writer,
2137 self.timestamp,
2138 state_from_id(self.snapshot.as_ref(), id.as_str())
2139 .map_err(DeviceManagerError::RestoreGetState)?,
2140 )));
2141
2142 self.bus_devices
2143 .push(Arc::clone(&serial) as Arc<dyn BusDeviceSync>);
2144
2145 let addr = arch::layout::LEGACY_SERIAL_MAPPED_IO_START;
2146
2147 self.address_manager
2148 .mmio_bus
2149 .insert(serial.clone(), addr.0, MMIO_LEN)
2150 .map_err(DeviceManagerError::BusError)?;
2151
2152 self.id_to_dev_info.insert(
2153 (DeviceType::Serial, DeviceType::Serial.to_string()),
2154 MmioDeviceInfo {
2155 addr: addr.0,
2156 len: MMIO_LEN,
2157 irq: serial_irq,
2158 },
2159 );
2160
2161 self.cmdline_additions
2162 .push(format!("earlycon=pl011,mmio,0x{:08x}", addr.0));
2163
2164 // Fill the device tree with a new node. In case of restore, we
2165 // know there is nothing to do, so we can simply override the
2166 // existing entry.
2167 self.device_tree
2168 .lock()
2169 .unwrap()
2170 .insert(id.clone(), device_node!(id, serial));
2171
2172 Ok(serial)
2173 }
2174
2175 #[cfg(target_arch = "riscv64")]
add_serial_device( &mut self, interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>, serial_writer: Option<Box<dyn io::Write + Send>>, ) -> DeviceManagerResult<Arc<Mutex<Serial>>>2176 fn add_serial_device(
2177 &mut self,
2178 interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>,
2179 serial_writer: Option<Box<dyn io::Write + Send>>,
2180 ) -> DeviceManagerResult<Arc<Mutex<Serial>>> {
2181 let id = String::from(SERIAL_DEVICE_NAME);
2182
2183 let serial_irq = self
2184 .address_manager
2185 .allocator
2186 .lock()
2187 .unwrap()
2188 .allocate_irq()
2189 .unwrap();
2190
2191 let interrupt_group = interrupt_manager
2192 .create_group(LegacyIrqGroupConfig {
2193 irq: serial_irq as InterruptIndex,
2194 })
2195 .map_err(DeviceManagerError::CreateInterruptGroup)?;
2196
2197 let serial = Arc::new(Mutex::new(Serial::new(
2198 id.clone(),
2199 interrupt_group,
2200 serial_writer,
2201 state_from_id(self.snapshot.as_ref(), id.as_str())
2202 .map_err(DeviceManagerError::RestoreGetState)?,
2203 )));
2204
2205 self.bus_devices
2206 .push(Arc::clone(&serial) as Arc<dyn BusDeviceSync>);
2207
2208 let addr = arch::layout::LEGACY_SERIAL_MAPPED_IO_START;
2209
2210 self.address_manager
2211 .mmio_bus
2212 .insert(serial.clone(), addr.0, MMIO_LEN)
2213 .map_err(DeviceManagerError::BusError)?;
2214
2215 self.id_to_dev_info.insert(
2216 (DeviceType::Serial, DeviceType::Serial.to_string()),
2217 MmioDeviceInfo {
2218 addr: addr.0,
2219 len: MMIO_LEN,
2220 irq: serial_irq,
2221 },
2222 );
2223
2224 self.cmdline_additions
2225 .push(format!("earlycon=uart,mmio,0x{:08x}", addr.0));
2226
2227 // Fill the device tree with a new node. In case of restore, we
2228 // know there is nothing to do, so we can simply override the
2229 // existing entry.
2230 self.device_tree
2231 .lock()
2232 .unwrap()
2233 .insert(id.clone(), device_node!(id, serial));
2234
2235 Ok(serial)
2236 }
2237
add_virtio_console_device( &mut self, virtio_devices: &mut Vec<MetaVirtioDevice>, console_fd: ConsoleOutput, resize_pipe: Option<Arc<File>>, ) -> DeviceManagerResult<Option<Arc<virtio_devices::ConsoleResizer>>>2238 fn add_virtio_console_device(
2239 &mut self,
2240 virtio_devices: &mut Vec<MetaVirtioDevice>,
2241 console_fd: ConsoleOutput,
2242 resize_pipe: Option<Arc<File>>,
2243 ) -> DeviceManagerResult<Option<Arc<virtio_devices::ConsoleResizer>>> {
2244 let console_config = self.config.lock().unwrap().console.clone();
2245 let endpoint = match console_fd {
2246 ConsoleOutput::File(file) => Endpoint::File(file),
2247 ConsoleOutput::Pty(file) => {
2248 self.console_resize_pipe = resize_pipe;
2249 Endpoint::PtyPair(Arc::new(file.try_clone().unwrap()), file)
2250 }
2251 ConsoleOutput::Tty(stdout) => {
2252 if stdout.is_terminal() {
2253 self.console_resize_pipe = resize_pipe;
2254 }
2255
2256 // If an interactive TTY then we can accept input
2257 // SAFETY: FFI call. Trivially safe.
2258 if unsafe { libc::isatty(libc::STDIN_FILENO) == 1 } {
2259 // SAFETY: FFI call to dup. Trivially safe.
2260 let stdin = unsafe { libc::dup(libc::STDIN_FILENO) };
2261 if stdin == -1 {
2262 return vmm_sys_util::errno::errno_result()
2263 .map_err(DeviceManagerError::DupFd);
2264 }
2265 // SAFETY: stdin is valid and owned solely by us.
2266 let stdin = unsafe { File::from_raw_fd(stdin) };
2267 Endpoint::FilePair(stdout, Arc::new(stdin))
2268 } else {
2269 Endpoint::File(stdout)
2270 }
2271 }
2272 ConsoleOutput::Socket(_) => {
2273 return Err(DeviceManagerError::NoSocketOptionSupportForConsoleDevice);
2274 }
2275 ConsoleOutput::Null => Endpoint::Null,
2276 ConsoleOutput::Off => return Ok(None),
2277 };
2278 let id = String::from(CONSOLE_DEVICE_NAME);
2279
2280 let (virtio_console_device, console_resizer) = virtio_devices::Console::new(
2281 id.clone(),
2282 endpoint,
2283 self.console_resize_pipe
2284 .as_ref()
2285 .map(|p| p.try_clone().unwrap()),
2286 self.force_iommu | console_config.iommu,
2287 self.seccomp_action.clone(),
2288 self.exit_evt
2289 .try_clone()
2290 .map_err(DeviceManagerError::EventFd)?,
2291 state_from_id(self.snapshot.as_ref(), id.as_str())
2292 .map_err(DeviceManagerError::RestoreGetState)?,
2293 )
2294 .map_err(DeviceManagerError::CreateVirtioConsole)?;
2295 let virtio_console_device = Arc::new(Mutex::new(virtio_console_device));
2296 virtio_devices.push(MetaVirtioDevice {
2297 virtio_device: Arc::clone(&virtio_console_device)
2298 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2299 iommu: console_config.iommu,
2300 id: id.clone(),
2301 pci_segment: 0,
2302 dma_handler: None,
2303 });
2304
2305 // Fill the device tree with a new node. In case of restore, we
2306 // know there is nothing to do, so we can simply override the
2307 // existing entry.
2308 self.device_tree
2309 .lock()
2310 .unwrap()
2311 .insert(id.clone(), device_node!(id, virtio_console_device));
2312
2313 // Only provide a resizer (for SIGWINCH handling) if the console is attached to the TTY
2314 Ok(if matches!(console_config.mode, ConsoleOutputMode::Tty) {
2315 Some(console_resizer)
2316 } else {
2317 None
2318 })
2319 }
2320
2321 /// Adds all devices that behave like a console with respect to the VM
2322 /// configuration. This includes:
2323 /// - debug-console
2324 /// - serial-console
2325 /// - virtio-console
add_console_devices( &mut self, interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>, virtio_devices: &mut Vec<MetaVirtioDevice>, console_info: Option<ConsoleInfo>, console_resize_pipe: Option<Arc<File>>, ) -> DeviceManagerResult<Arc<Console>>2326 fn add_console_devices(
2327 &mut self,
2328 interrupt_manager: &Arc<dyn InterruptManager<GroupConfig = LegacyIrqGroupConfig>>,
2329 virtio_devices: &mut Vec<MetaVirtioDevice>,
2330 console_info: Option<ConsoleInfo>,
2331 console_resize_pipe: Option<Arc<File>>,
2332 ) -> DeviceManagerResult<Arc<Console>> {
2333 let serial_config = self.config.lock().unwrap().serial.clone();
2334 if console_info.is_none() {
2335 return Err(DeviceManagerError::InvalidConsoleInfo);
2336 }
2337
2338 // SAFETY: console_info is Some, so it's safe to unwrap.
2339 let console_info = console_info.unwrap();
2340
2341 let serial_writer: Option<Box<dyn io::Write + Send>> = match console_info.serial_main_fd {
2342 ConsoleOutput::File(ref file) | ConsoleOutput::Tty(ref file) => {
2343 Some(Box::new(Arc::clone(file)))
2344 }
2345 ConsoleOutput::Off
2346 | ConsoleOutput::Null
2347 | ConsoleOutput::Pty(_)
2348 | ConsoleOutput::Socket(_) => None,
2349 };
2350
2351 if !matches!(console_info.serial_main_fd, ConsoleOutput::Off) {
2352 let serial = self.add_serial_device(interrupt_manager, serial_writer)?;
2353 self.serial_manager = match console_info.serial_main_fd {
2354 ConsoleOutput::Pty(_) | ConsoleOutput::Tty(_) | ConsoleOutput::Socket(_) => {
2355 let serial_manager = SerialManager::new(
2356 serial,
2357 console_info.serial_main_fd,
2358 serial_config.socket,
2359 )
2360 .map_err(DeviceManagerError::CreateSerialManager)?;
2361 if let Some(mut serial_manager) = serial_manager {
2362 serial_manager
2363 .start_thread(
2364 self.exit_evt
2365 .try_clone()
2366 .map_err(DeviceManagerError::EventFd)?,
2367 )
2368 .map_err(DeviceManagerError::SpawnSerialManager)?;
2369 Some(Arc::new(serial_manager))
2370 } else {
2371 None
2372 }
2373 }
2374 _ => None,
2375 };
2376 }
2377
2378 #[cfg(target_arch = "x86_64")]
2379 {
2380 let debug_console_writer: Option<Box<dyn io::Write + Send>> =
2381 match console_info.debug_main_fd {
2382 ConsoleOutput::File(file) | ConsoleOutput::Tty(file) => Some(Box::new(file)),
2383 ConsoleOutput::Off
2384 | ConsoleOutput::Null
2385 | ConsoleOutput::Pty(_)
2386 | ConsoleOutput::Socket(_) => None,
2387 };
2388 if let Some(writer) = debug_console_writer {
2389 let _ = self.add_debug_console_device(writer)?;
2390 }
2391 }
2392
2393 let console_resizer = self.add_virtio_console_device(
2394 virtio_devices,
2395 console_info.console_main_fd,
2396 console_resize_pipe,
2397 )?;
2398
2399 Ok(Arc::new(Console { console_resizer }))
2400 }
2401
2402 #[cfg(not(target_arch = "riscv64"))]
add_tpm_device( &mut self, tpm_path: PathBuf, ) -> DeviceManagerResult<Arc<Mutex<devices::tpm::Tpm>>>2403 fn add_tpm_device(
2404 &mut self,
2405 tpm_path: PathBuf,
2406 ) -> DeviceManagerResult<Arc<Mutex<devices::tpm::Tpm>>> {
2407 // Create TPM Device
2408 let tpm = devices::tpm::Tpm::new(tpm_path.to_str().unwrap().to_string()).map_err(|e| {
2409 DeviceManagerError::CreateTpmDevice(anyhow!("Failed to create TPM Device : {:?}", e))
2410 })?;
2411 let tpm = Arc::new(Mutex::new(tpm));
2412
2413 // Add TPM Device to mmio
2414 self.address_manager
2415 .mmio_bus
2416 .insert(
2417 tpm.clone(),
2418 arch::layout::TPM_START.0,
2419 arch::layout::TPM_SIZE,
2420 )
2421 .map_err(DeviceManagerError::BusError)?;
2422
2423 Ok(tpm)
2424 }
2425
2426 /// Tries to acquire advisory locks for all disk images.
2427 ///
2428 /// This should only be called when a VM boots or VM state is restored.
2429 /// For live-migration, the locks must be released on the destination side
2430 /// before they are acquired again by the receiving side.
try_lock_disks(&self) -> DeviceManagerResult<()>2431 pub fn try_lock_disks(&self) -> DeviceManagerResult<()> {
2432 for dev in &self.block_devices {
2433 let mut dev = dev.lock().unwrap();
2434 dev.try_lock_image()
2435 .map_err(DeviceManagerError::DiskLockError)?;
2436 }
2437 Ok(())
2438 }
2439
2440 /// Release all advisory locks held for the disk images.
2441 ///
2442 /// This should only be called when the VM is stopped and the VMM supposed
2443 /// to shut down. A new VMM, either after a live migration or a
2444 /// state save/resume cycle, should then acquire all locks before the VM
2445 /// starts to run.
release_disk_locks(&self) -> DeviceManagerResult<()>2446 pub fn release_disk_locks(&self) -> DeviceManagerResult<()> {
2447 for dev in &self.block_devices {
2448 let mut dev = dev.lock().unwrap();
2449 dev.unlock_image()
2450 .map_err(DeviceManagerError::DiskLockError)?;
2451 }
2452 Ok(())
2453 }
2454
make_virtio_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>>2455 fn make_virtio_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
2456 let mut devices: Vec<MetaVirtioDevice> = Vec::new();
2457
2458 // Create "standard" virtio devices (net/block/rng)
2459 devices.append(&mut self.make_virtio_block_devices()?);
2460 devices.append(&mut self.make_virtio_net_devices()?);
2461 devices.append(&mut self.make_virtio_rng_devices()?);
2462
2463 // Add virtio-fs if required
2464 devices.append(&mut self.make_virtio_fs_devices()?);
2465
2466 // Add virtio-pmem if required
2467 devices.append(&mut self.make_virtio_pmem_devices()?);
2468
2469 // Add virtio-vsock if required
2470 devices.append(&mut self.make_virtio_vsock_devices()?);
2471
2472 devices.append(&mut self.make_virtio_mem_devices()?);
2473
2474 // Add virtio-balloon if required
2475 devices.append(&mut self.make_virtio_balloon_devices()?);
2476
2477 // Add virtio-watchdog device
2478 devices.append(&mut self.make_virtio_watchdog_devices()?);
2479
2480 // Add vDPA devices if required
2481 devices.append(&mut self.make_vdpa_devices()?);
2482
2483 Ok(devices)
2484 }
2485
2486 // Cache whether aio is supported to avoid checking for very block device
aio_is_supported(&mut self) -> bool2487 fn aio_is_supported(&mut self) -> bool {
2488 if let Some(supported) = self.aio_supported {
2489 return supported;
2490 }
2491
2492 let supported = block_aio_is_supported();
2493 self.aio_supported = Some(supported);
2494 supported
2495 }
2496
2497 // Cache whether io_uring is supported to avoid probing for very block device
io_uring_is_supported(&mut self) -> bool2498 fn io_uring_is_supported(&mut self) -> bool {
2499 if let Some(supported) = self.io_uring_supported {
2500 return supported;
2501 }
2502
2503 let supported = block_io_uring_is_supported();
2504 self.io_uring_supported = Some(supported);
2505 supported
2506 }
2507
2508 /// Creates a [`MetaVirtioDevice`] from the provided [`DiskConfig`].
2509 ///
2510 /// Depending on the config, this is a [`vhost_user::Blk`] device or a [`virtio_devices::Block`]
2511 /// device.
2512 ///
2513 /// # Arguments
2514 /// - `disk_cfg`: The [`DiskConfig`] used to create the block device.
2515 /// - `is_hotplug`: Whether the device is being hotplugged and the lock for the disk image
2516 /// should be acquired right away. Locking will only happen for normal block devices, and not
2517 /// vhost-user devices.
make_virtio_block_device( &mut self, disk_cfg: &mut DiskConfig, is_hotplug: bool, ) -> DeviceManagerResult<MetaVirtioDevice>2518 fn make_virtio_block_device(
2519 &mut self,
2520 disk_cfg: &mut DiskConfig,
2521 is_hotplug: bool,
2522 ) -> DeviceManagerResult<MetaVirtioDevice> {
2523 let id = if let Some(id) = &disk_cfg.id {
2524 id.clone()
2525 } else {
2526 let id = self.next_device_name(DISK_DEVICE_NAME_PREFIX)?;
2527 disk_cfg.id = Some(id.clone());
2528 id
2529 };
2530
2531 info!("Creating virtio-block device: {:?}", disk_cfg);
2532
2533 let (virtio_device, migratable_device) = if disk_cfg.vhost_user {
2534 if is_hotplug {
2535 log::debug!("Acquiring image lock for vhost-user block device not supported");
2536 }
2537 let socket = disk_cfg.vhost_socket.as_ref().unwrap().clone();
2538 let vu_cfg = VhostUserConfig {
2539 socket,
2540 num_queues: disk_cfg.num_queues,
2541 queue_size: disk_cfg.queue_size,
2542 };
2543 let vhost_user_block = Arc::new(Mutex::new(
2544 match virtio_devices::vhost_user::Blk::new(
2545 id.clone(),
2546 vu_cfg,
2547 self.seccomp_action.clone(),
2548 self.exit_evt
2549 .try_clone()
2550 .map_err(DeviceManagerError::EventFd)?,
2551 self.force_iommu,
2552 state_from_id(self.snapshot.as_ref(), id.as_str())
2553 .map_err(DeviceManagerError::RestoreGetState)?,
2554 ) {
2555 Ok(vub_device) => vub_device,
2556 Err(e) => {
2557 return Err(DeviceManagerError::CreateVhostUserBlk(e));
2558 }
2559 },
2560 ));
2561
2562 (
2563 Arc::clone(&vhost_user_block) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2564 vhost_user_block as Arc<Mutex<dyn Migratable>>,
2565 )
2566 } else {
2567 let mut options = OpenOptions::new();
2568 options.read(true);
2569 options.write(!disk_cfg.readonly);
2570 if disk_cfg.direct {
2571 options.custom_flags(libc::O_DIRECT);
2572 }
2573 // Open block device path
2574 let mut file: File = options
2575 .open(
2576 disk_cfg
2577 .path
2578 .as_ref()
2579 .ok_or(DeviceManagerError::NoDiskPath)?
2580 .clone(),
2581 )
2582 .map_err(DeviceManagerError::Disk)?;
2583 let image_type =
2584 detect_image_type(&mut file).map_err(DeviceManagerError::DetectImageType)?;
2585
2586 let image = match image_type {
2587 ImageType::FixedVhd => {
2588 // Use asynchronous backend relying on io_uring if the
2589 // syscalls are supported.
2590 if cfg!(feature = "io_uring")
2591 && !disk_cfg.disable_io_uring
2592 && self.io_uring_is_supported()
2593 {
2594 info!("Using asynchronous fixed VHD disk file (io_uring)");
2595
2596 #[cfg(not(feature = "io_uring"))]
2597 unreachable!("Checked in if statement above");
2598 #[cfg(feature = "io_uring")]
2599 {
2600 Box::new(
2601 FixedVhdDiskAsync::new(file)
2602 .map_err(DeviceManagerError::CreateFixedVhdDiskAsync)?,
2603 ) as Box<dyn DiskFile>
2604 }
2605 } else {
2606 info!("Using synchronous fixed VHD disk file");
2607 Box::new(
2608 FixedVhdDiskSync::new(file)
2609 .map_err(DeviceManagerError::CreateFixedVhdDiskSync)?,
2610 ) as Box<dyn DiskFile>
2611 }
2612 }
2613 ImageType::Raw => {
2614 // Use asynchronous backend relying on io_uring if the
2615 // syscalls are supported.
2616 if cfg!(feature = "io_uring")
2617 && !disk_cfg.disable_io_uring
2618 && self.io_uring_is_supported()
2619 {
2620 info!("Using asynchronous RAW disk file (io_uring)");
2621
2622 #[cfg(not(feature = "io_uring"))]
2623 unreachable!("Checked in if statement above");
2624 #[cfg(feature = "io_uring")]
2625 {
2626 Box::new(RawFileDisk::new(file)) as Box<dyn DiskFile>
2627 }
2628 } else if !disk_cfg.disable_aio && self.aio_is_supported() {
2629 info!("Using asynchronous RAW disk file (aio)");
2630 Box::new(RawFileDiskAio::new(file)) as Box<dyn DiskFile>
2631 } else {
2632 info!("Using synchronous RAW disk file");
2633 Box::new(RawFileDiskSync::new(file)) as Box<dyn DiskFile>
2634 }
2635 }
2636 ImageType::Qcow2 => {
2637 info!("Using synchronous QCOW2 disk file");
2638 Box::new(
2639 QcowDiskSync::new(file, disk_cfg.direct)
2640 .map_err(DeviceManagerError::CreateQcowDiskSync)?,
2641 ) as Box<dyn DiskFile>
2642 }
2643 ImageType::Vhdx => {
2644 info!("Using synchronous VHDX disk file");
2645 Box::new(
2646 VhdxDiskSync::new(file)
2647 .map_err(DeviceManagerError::CreateFixedVhdxDiskSync)?,
2648 ) as Box<dyn DiskFile>
2649 }
2650 };
2651
2652 let rate_limit_group =
2653 if let Some(rate_limiter_cfg) = disk_cfg.rate_limiter_config.as_ref() {
2654 // Create an anonymous RateLimiterGroup that is dropped when the Disk
2655 // is dropped.
2656 let bw = rate_limiter_cfg.bandwidth.unwrap_or_default();
2657 let ops = rate_limiter_cfg.ops.unwrap_or_default();
2658 let mut rate_limit_group = RateLimiterGroup::new(
2659 disk_cfg.id.as_ref().unwrap(),
2660 bw.size,
2661 bw.one_time_burst.unwrap_or(0),
2662 bw.refill_time,
2663 ops.size,
2664 ops.one_time_burst.unwrap_or(0),
2665 ops.refill_time,
2666 )
2667 .map_err(DeviceManagerError::RateLimiterGroupCreate)?;
2668
2669 rate_limit_group
2670 .start_thread(
2671 self.exit_evt
2672 .try_clone()
2673 .map_err(DeviceManagerError::EventFd)?,
2674 )
2675 .unwrap();
2676
2677 Some(Arc::new(rate_limit_group))
2678 } else if let Some(rate_limit_group) = disk_cfg.rate_limit_group.as_ref() {
2679 self.rate_limit_groups.get(rate_limit_group).cloned()
2680 } else {
2681 None
2682 };
2683
2684 let queue_affinity = if let Some(queue_affinity) = disk_cfg.queue_affinity.as_ref() {
2685 queue_affinity
2686 .iter()
2687 .map(|a| (a.queue_index, a.host_cpus.clone()))
2688 .collect()
2689 } else {
2690 BTreeMap::new()
2691 };
2692
2693 let mut virtio_block = virtio_devices::Block::new(
2694 id.clone(),
2695 image,
2696 disk_cfg
2697 .path
2698 .as_ref()
2699 .ok_or(DeviceManagerError::NoDiskPath)?
2700 .clone(),
2701 disk_cfg.readonly,
2702 self.force_iommu | disk_cfg.iommu,
2703 disk_cfg.num_queues,
2704 disk_cfg.queue_size,
2705 disk_cfg.serial.clone(),
2706 self.seccomp_action.clone(),
2707 rate_limit_group,
2708 self.exit_evt
2709 .try_clone()
2710 .map_err(DeviceManagerError::EventFd)?,
2711 state_from_id(self.snapshot.as_ref(), id.as_str())
2712 .map_err(DeviceManagerError::RestoreGetState)?,
2713 queue_affinity,
2714 )
2715 .map_err(DeviceManagerError::CreateVirtioBlock)?;
2716
2717 // We lock the file here only for hotplugging. In normal operation,
2718 // state save/resume, and live-migration, locking is part of the outer control flow
2719 // to ensure proper order of (un)locking.
2720 if is_hotplug {
2721 log::debug!("Acquiring lock for hotplugged image");
2722 virtio_block
2723 .try_lock_image()
2724 .map_err(DeviceManagerError::DiskLockError)?;
2725 }
2726
2727 let virtio_block = Arc::new(Mutex::new(virtio_block));
2728
2729 self.block_devices.push(virtio_block.clone());
2730
2731 (
2732 Arc::clone(&virtio_block) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2733 virtio_block as Arc<Mutex<dyn Migratable>>,
2734 )
2735 };
2736
2737 // Fill the device tree with a new node. In case of restore, we
2738 // know there is nothing to do, so we can simply override the
2739 // existing entry.
2740 self.device_tree
2741 .lock()
2742 .unwrap()
2743 .insert(id.clone(), device_node!(id, migratable_device));
2744
2745 Ok(MetaVirtioDevice {
2746 virtio_device,
2747 iommu: disk_cfg.iommu,
2748 id,
2749 pci_segment: disk_cfg.pci_segment,
2750 dma_handler: None,
2751 })
2752 }
2753
make_virtio_block_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>>2754 fn make_virtio_block_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
2755 let mut devices = Vec::new();
2756
2757 let mut block_devices = self.config.lock().unwrap().disks.clone();
2758 if let Some(disk_list_cfg) = &mut block_devices {
2759 for disk_cfg in disk_list_cfg.iter_mut() {
2760 devices.push(self.make_virtio_block_device(disk_cfg, false)?);
2761 }
2762 }
2763 self.config.lock().unwrap().disks = block_devices;
2764
2765 Ok(devices)
2766 }
2767
make_virtio_net_device( &mut self, net_cfg: &mut NetConfig, ) -> DeviceManagerResult<MetaVirtioDevice>2768 fn make_virtio_net_device(
2769 &mut self,
2770 net_cfg: &mut NetConfig,
2771 ) -> DeviceManagerResult<MetaVirtioDevice> {
2772 let id = if let Some(id) = &net_cfg.id {
2773 id.clone()
2774 } else {
2775 let id = self.next_device_name(NET_DEVICE_NAME_PREFIX)?;
2776 net_cfg.id = Some(id.clone());
2777 id
2778 };
2779 info!("Creating virtio-net device: {:?}", net_cfg);
2780
2781 let (virtio_device, migratable_device) = if net_cfg.vhost_user {
2782 let socket = net_cfg.vhost_socket.as_ref().unwrap().clone();
2783 let vu_cfg = VhostUserConfig {
2784 socket,
2785 num_queues: net_cfg.num_queues,
2786 queue_size: net_cfg.queue_size,
2787 };
2788 let server = match net_cfg.vhost_mode {
2789 VhostMode::Client => false,
2790 VhostMode::Server => true,
2791 };
2792 let vhost_user_net = Arc::new(Mutex::new(
2793 match virtio_devices::vhost_user::Net::new(
2794 id.clone(),
2795 net_cfg.mac,
2796 net_cfg.mtu,
2797 vu_cfg,
2798 server,
2799 self.seccomp_action.clone(),
2800 self.exit_evt
2801 .try_clone()
2802 .map_err(DeviceManagerError::EventFd)?,
2803 self.force_iommu,
2804 state_from_id(self.snapshot.as_ref(), id.as_str())
2805 .map_err(DeviceManagerError::RestoreGetState)?,
2806 net_cfg.offload_tso,
2807 net_cfg.offload_ufo,
2808 net_cfg.offload_csum,
2809 ) {
2810 Ok(vun_device) => vun_device,
2811 Err(e) => {
2812 return Err(DeviceManagerError::CreateVhostUserNet(e));
2813 }
2814 },
2815 ));
2816
2817 (
2818 Arc::clone(&vhost_user_net) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2819 vhost_user_net as Arc<Mutex<dyn Migratable>>,
2820 )
2821 } else {
2822 let state = state_from_id(self.snapshot.as_ref(), id.as_str())
2823 .map_err(DeviceManagerError::RestoreGetState)?;
2824 let virtio_net = if let Some(ref tap_if_name) = net_cfg.tap {
2825 Arc::new(Mutex::new(
2826 virtio_devices::Net::new(
2827 id.clone(),
2828 Some(tap_if_name),
2829 Some(net_cfg.ip),
2830 Some(net_cfg.mask),
2831 Some(net_cfg.mac),
2832 &mut net_cfg.host_mac,
2833 net_cfg.mtu,
2834 self.force_iommu | net_cfg.iommu,
2835 net_cfg.num_queues,
2836 net_cfg.queue_size,
2837 self.seccomp_action.clone(),
2838 net_cfg.rate_limiter_config,
2839 self.exit_evt
2840 .try_clone()
2841 .map_err(DeviceManagerError::EventFd)?,
2842 state,
2843 net_cfg.offload_tso,
2844 net_cfg.offload_ufo,
2845 net_cfg.offload_csum,
2846 )
2847 .map_err(DeviceManagerError::CreateVirtioNet)?,
2848 ))
2849 } else if let Some(fds) = &net_cfg.fds {
2850 let net = virtio_devices::Net::from_tap_fds(
2851 id.clone(),
2852 fds,
2853 Some(net_cfg.mac),
2854 net_cfg.mtu,
2855 self.force_iommu | net_cfg.iommu,
2856 net_cfg.queue_size,
2857 self.seccomp_action.clone(),
2858 net_cfg.rate_limiter_config,
2859 self.exit_evt
2860 .try_clone()
2861 .map_err(DeviceManagerError::EventFd)?,
2862 state,
2863 net_cfg.offload_tso,
2864 net_cfg.offload_ufo,
2865 net_cfg.offload_csum,
2866 )
2867 .map_err(DeviceManagerError::CreateVirtioNet)?;
2868
2869 // SAFETY: 'fds' are valid because TAP devices are created successfully
2870 unsafe {
2871 self.config.lock().unwrap().add_preserved_fds(fds.clone());
2872 }
2873
2874 Arc::new(Mutex::new(net))
2875 } else {
2876 Arc::new(Mutex::new(
2877 virtio_devices::Net::new(
2878 id.clone(),
2879 None,
2880 Some(net_cfg.ip),
2881 Some(net_cfg.mask),
2882 Some(net_cfg.mac),
2883 &mut net_cfg.host_mac,
2884 net_cfg.mtu,
2885 self.force_iommu | net_cfg.iommu,
2886 net_cfg.num_queues,
2887 net_cfg.queue_size,
2888 self.seccomp_action.clone(),
2889 net_cfg.rate_limiter_config,
2890 self.exit_evt
2891 .try_clone()
2892 .map_err(DeviceManagerError::EventFd)?,
2893 state,
2894 net_cfg.offload_tso,
2895 net_cfg.offload_ufo,
2896 net_cfg.offload_csum,
2897 )
2898 .map_err(DeviceManagerError::CreateVirtioNet)?,
2899 ))
2900 };
2901
2902 (
2903 Arc::clone(&virtio_net) as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2904 virtio_net as Arc<Mutex<dyn Migratable>>,
2905 )
2906 };
2907
2908 // Fill the device tree with a new node. In case of restore, we
2909 // know there is nothing to do, so we can simply override the
2910 // existing entry.
2911 self.device_tree
2912 .lock()
2913 .unwrap()
2914 .insert(id.clone(), device_node!(id, migratable_device));
2915
2916 Ok(MetaVirtioDevice {
2917 virtio_device,
2918 iommu: net_cfg.iommu,
2919 id,
2920 pci_segment: net_cfg.pci_segment,
2921 dma_handler: None,
2922 })
2923 }
2924
2925 /// Add virto-net and vhost-user-net devices
make_virtio_net_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>>2926 fn make_virtio_net_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
2927 let mut devices = Vec::new();
2928 let mut net_devices = self.config.lock().unwrap().net.clone();
2929 if let Some(net_list_cfg) = &mut net_devices {
2930 for net_cfg in net_list_cfg.iter_mut() {
2931 devices.push(self.make_virtio_net_device(net_cfg)?);
2932 }
2933 }
2934 self.config.lock().unwrap().net = net_devices;
2935
2936 Ok(devices)
2937 }
2938
make_virtio_rng_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>>2939 fn make_virtio_rng_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
2940 let mut devices = Vec::new();
2941
2942 // Add virtio-rng if required
2943 let rng_config = self.config.lock().unwrap().rng.clone();
2944 if let Some(rng_path) = rng_config.src.to_str() {
2945 info!("Creating virtio-rng device: {:?}", rng_config);
2946 let id = String::from(RNG_DEVICE_NAME);
2947
2948 let virtio_rng_device = Arc::new(Mutex::new(
2949 virtio_devices::Rng::new(
2950 id.clone(),
2951 rng_path,
2952 self.force_iommu | rng_config.iommu,
2953 self.seccomp_action.clone(),
2954 self.exit_evt
2955 .try_clone()
2956 .map_err(DeviceManagerError::EventFd)?,
2957 state_from_id(self.snapshot.as_ref(), id.as_str())
2958 .map_err(DeviceManagerError::RestoreGetState)?,
2959 )
2960 .map_err(DeviceManagerError::CreateVirtioRng)?,
2961 ));
2962 devices.push(MetaVirtioDevice {
2963 virtio_device: Arc::clone(&virtio_rng_device)
2964 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
2965 iommu: rng_config.iommu,
2966 id: id.clone(),
2967 pci_segment: 0,
2968 dma_handler: None,
2969 });
2970
2971 // Fill the device tree with a new node. In case of restore, we
2972 // know there is nothing to do, so we can simply override the
2973 // existing entry.
2974 self.device_tree
2975 .lock()
2976 .unwrap()
2977 .insert(id.clone(), device_node!(id, virtio_rng_device));
2978 }
2979
2980 Ok(devices)
2981 }
2982
make_virtio_fs_device( &mut self, fs_cfg: &mut FsConfig, ) -> DeviceManagerResult<MetaVirtioDevice>2983 fn make_virtio_fs_device(
2984 &mut self,
2985 fs_cfg: &mut FsConfig,
2986 ) -> DeviceManagerResult<MetaVirtioDevice> {
2987 let id = if let Some(id) = &fs_cfg.id {
2988 id.clone()
2989 } else {
2990 let id = self.next_device_name(FS_DEVICE_NAME_PREFIX)?;
2991 fs_cfg.id = Some(id.clone());
2992 id
2993 };
2994
2995 info!("Creating virtio-fs device: {:?}", fs_cfg);
2996
2997 let mut node = device_node!(id);
2998
2999 if let Some(fs_socket) = fs_cfg.socket.to_str() {
3000 let virtio_fs_device = Arc::new(Mutex::new(
3001 virtio_devices::vhost_user::Fs::new(
3002 id.clone(),
3003 fs_socket,
3004 &fs_cfg.tag,
3005 fs_cfg.num_queues,
3006 fs_cfg.queue_size,
3007 None,
3008 self.seccomp_action.clone(),
3009 self.exit_evt
3010 .try_clone()
3011 .map_err(DeviceManagerError::EventFd)?,
3012 self.force_iommu,
3013 state_from_id(self.snapshot.as_ref(), id.as_str())
3014 .map_err(DeviceManagerError::RestoreGetState)?,
3015 )
3016 .map_err(DeviceManagerError::CreateVirtioFs)?,
3017 ));
3018
3019 // Update the device tree with the migratable device.
3020 node.migratable = Some(Arc::clone(&virtio_fs_device) as Arc<Mutex<dyn Migratable>>);
3021 self.device_tree.lock().unwrap().insert(id.clone(), node);
3022
3023 Ok(MetaVirtioDevice {
3024 virtio_device: Arc::clone(&virtio_fs_device)
3025 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
3026 iommu: false,
3027 id,
3028 pci_segment: fs_cfg.pci_segment,
3029 dma_handler: None,
3030 })
3031 } else {
3032 Err(DeviceManagerError::NoVirtioFsSock)
3033 }
3034 }
3035
make_virtio_fs_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>>3036 fn make_virtio_fs_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
3037 let mut devices = Vec::new();
3038
3039 let mut fs_devices = self.config.lock().unwrap().fs.clone();
3040 if let Some(fs_list_cfg) = &mut fs_devices {
3041 for fs_cfg in fs_list_cfg.iter_mut() {
3042 devices.push(self.make_virtio_fs_device(fs_cfg)?);
3043 }
3044 }
3045 self.config.lock().unwrap().fs = fs_devices;
3046
3047 Ok(devices)
3048 }
3049
make_virtio_pmem_device( &mut self, pmem_cfg: &mut PmemConfig, ) -> DeviceManagerResult<MetaVirtioDevice>3050 fn make_virtio_pmem_device(
3051 &mut self,
3052 pmem_cfg: &mut PmemConfig,
3053 ) -> DeviceManagerResult<MetaVirtioDevice> {
3054 let id = if let Some(id) = &pmem_cfg.id {
3055 id.clone()
3056 } else {
3057 let id = self.next_device_name(PMEM_DEVICE_NAME_PREFIX)?;
3058 pmem_cfg.id = Some(id.clone());
3059 id
3060 };
3061
3062 info!("Creating virtio-pmem device: {:?}", pmem_cfg);
3063
3064 let mut node = device_node!(id);
3065
3066 // Look for the id in the device tree. If it can be found, that means
3067 // the device is being restored, otherwise it's created from scratch.
3068 let region_range = if let Some(node) = self.device_tree.lock().unwrap().get(&id) {
3069 info!("Restoring virtio-pmem {} resources", id);
3070
3071 let mut region_range: Option<(u64, u64)> = None;
3072 for resource in node.resources.iter() {
3073 match resource {
3074 Resource::MmioAddressRange { base, size } => {
3075 if region_range.is_some() {
3076 return Err(DeviceManagerError::ResourceAlreadyExists);
3077 }
3078
3079 region_range = Some((*base, *size));
3080 }
3081 _ => {
3082 error!("Unexpected resource {:?} for {}", resource, id);
3083 }
3084 }
3085 }
3086
3087 if region_range.is_none() {
3088 return Err(DeviceManagerError::MissingVirtioPmemResources);
3089 }
3090
3091 region_range
3092 } else {
3093 None
3094 };
3095
3096 let (custom_flags, set_len) = if pmem_cfg.file.is_dir() {
3097 if pmem_cfg.size.is_none() {
3098 return Err(DeviceManagerError::PmemWithDirectorySizeMissing);
3099 }
3100 (O_TMPFILE, true)
3101 } else {
3102 (0, false)
3103 };
3104
3105 let mut file = OpenOptions::new()
3106 .read(true)
3107 .write(!pmem_cfg.discard_writes)
3108 .custom_flags(custom_flags)
3109 .open(&pmem_cfg.file)
3110 .map_err(DeviceManagerError::PmemFileOpen)?;
3111
3112 let size = if let Some(size) = pmem_cfg.size {
3113 if set_len {
3114 file.set_len(size)
3115 .map_err(DeviceManagerError::PmemFileSetLen)?;
3116 }
3117 size
3118 } else {
3119 file.seek(SeekFrom::End(0))
3120 .map_err(DeviceManagerError::PmemFileSetLen)?
3121 };
3122
3123 if size % 0x20_0000 != 0 {
3124 return Err(DeviceManagerError::PmemSizeNotAligned);
3125 }
3126
3127 let (region_base, region_size) = if let Some((base, size)) = region_range {
3128 // The memory needs to be 2MiB aligned in order to support
3129 // hugepages.
3130 self.pci_segments[pmem_cfg.pci_segment as usize]
3131 .mem64_allocator
3132 .lock()
3133 .unwrap()
3134 .allocate(
3135 Some(GuestAddress(base)),
3136 size as GuestUsize,
3137 Some(0x0020_0000),
3138 )
3139 .ok_or(DeviceManagerError::PmemRangeAllocation)?;
3140
3141 (base, size)
3142 } else {
3143 // The memory needs to be 2MiB aligned in order to support
3144 // hugepages.
3145 let base = self.pci_segments[pmem_cfg.pci_segment as usize]
3146 .mem64_allocator
3147 .lock()
3148 .unwrap()
3149 .allocate(None, size as GuestUsize, Some(0x0020_0000))
3150 .ok_or(DeviceManagerError::PmemRangeAllocation)?;
3151
3152 (base.raw_value(), size)
3153 };
3154
3155 let cloned_file = file.try_clone().map_err(DeviceManagerError::CloneFile)?;
3156 let mmap_region = MmapRegion::build(
3157 Some(FileOffset::new(cloned_file, 0)),
3158 region_size as usize,
3159 PROT_READ | PROT_WRITE,
3160 MAP_NORESERVE
3161 | if pmem_cfg.discard_writes {
3162 MAP_PRIVATE
3163 } else {
3164 MAP_SHARED
3165 },
3166 )
3167 .map_err(DeviceManagerError::NewMmapRegion)?;
3168 let host_addr: u64 = mmap_region.as_ptr() as u64;
3169
3170 let mem_slot = self
3171 .memory_manager
3172 .lock()
3173 .unwrap()
3174 .create_userspace_mapping(region_base, region_size, host_addr, false, false, false)
3175 .map_err(DeviceManagerError::MemoryManager)?;
3176
3177 let mapping = virtio_devices::UserspaceMapping {
3178 host_addr,
3179 mem_slot,
3180 addr: GuestAddress(region_base),
3181 len: region_size,
3182 mergeable: false,
3183 };
3184
3185 let virtio_pmem_device = Arc::new(Mutex::new(
3186 virtio_devices::Pmem::new(
3187 id.clone(),
3188 file,
3189 GuestAddress(region_base),
3190 mapping,
3191 mmap_region,
3192 self.force_iommu | pmem_cfg.iommu,
3193 self.seccomp_action.clone(),
3194 self.exit_evt
3195 .try_clone()
3196 .map_err(DeviceManagerError::EventFd)?,
3197 state_from_id(self.snapshot.as_ref(), id.as_str())
3198 .map_err(DeviceManagerError::RestoreGetState)?,
3199 )
3200 .map_err(DeviceManagerError::CreateVirtioPmem)?,
3201 ));
3202
3203 // Update the device tree with correct resource information and with
3204 // the migratable device.
3205 node.resources.push(Resource::MmioAddressRange {
3206 base: region_base,
3207 size: region_size,
3208 });
3209 node.migratable = Some(Arc::clone(&virtio_pmem_device) as Arc<Mutex<dyn Migratable>>);
3210 self.device_tree.lock().unwrap().insert(id.clone(), node);
3211
3212 Ok(MetaVirtioDevice {
3213 virtio_device: Arc::clone(&virtio_pmem_device)
3214 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
3215 iommu: pmem_cfg.iommu,
3216 id,
3217 pci_segment: pmem_cfg.pci_segment,
3218 dma_handler: None,
3219 })
3220 }
3221
make_virtio_pmem_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>>3222 fn make_virtio_pmem_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
3223 let mut devices = Vec::new();
3224 // Add virtio-pmem if required
3225 let mut pmem_devices = self.config.lock().unwrap().pmem.clone();
3226 if let Some(pmem_list_cfg) = &mut pmem_devices {
3227 for pmem_cfg in pmem_list_cfg.iter_mut() {
3228 devices.push(self.make_virtio_pmem_device(pmem_cfg)?);
3229 }
3230 }
3231 self.config.lock().unwrap().pmem = pmem_devices;
3232
3233 Ok(devices)
3234 }
3235
make_virtio_vsock_device( &mut self, vsock_cfg: &mut VsockConfig, ) -> DeviceManagerResult<MetaVirtioDevice>3236 fn make_virtio_vsock_device(
3237 &mut self,
3238 vsock_cfg: &mut VsockConfig,
3239 ) -> DeviceManagerResult<MetaVirtioDevice> {
3240 let id = if let Some(id) = &vsock_cfg.id {
3241 id.clone()
3242 } else {
3243 let id = self.next_device_name(VSOCK_DEVICE_NAME_PREFIX)?;
3244 vsock_cfg.id = Some(id.clone());
3245 id
3246 };
3247
3248 info!("Creating virtio-vsock device: {:?}", vsock_cfg);
3249
3250 let socket_path = vsock_cfg
3251 .socket
3252 .to_str()
3253 .ok_or(DeviceManagerError::CreateVsockConvertPath)?;
3254 let backend =
3255 virtio_devices::vsock::VsockUnixBackend::new(vsock_cfg.cid, socket_path.to_string())
3256 .map_err(DeviceManagerError::CreateVsockBackend)?;
3257
3258 let vsock_device = Arc::new(Mutex::new(
3259 virtio_devices::Vsock::new(
3260 id.clone(),
3261 vsock_cfg.cid,
3262 vsock_cfg.socket.clone(),
3263 backend,
3264 self.force_iommu | vsock_cfg.iommu,
3265 self.seccomp_action.clone(),
3266 self.exit_evt
3267 .try_clone()
3268 .map_err(DeviceManagerError::EventFd)?,
3269 state_from_id(self.snapshot.as_ref(), id.as_str())
3270 .map_err(DeviceManagerError::RestoreGetState)?,
3271 )
3272 .map_err(DeviceManagerError::CreateVirtioVsock)?,
3273 ));
3274
3275 // Fill the device tree with a new node. In case of restore, we
3276 // know there is nothing to do, so we can simply override the
3277 // existing entry.
3278 self.device_tree
3279 .lock()
3280 .unwrap()
3281 .insert(id.clone(), device_node!(id, vsock_device));
3282
3283 Ok(MetaVirtioDevice {
3284 virtio_device: Arc::clone(&vsock_device)
3285 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
3286 iommu: vsock_cfg.iommu,
3287 id,
3288 pci_segment: vsock_cfg.pci_segment,
3289 dma_handler: None,
3290 })
3291 }
3292
make_virtio_vsock_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>>3293 fn make_virtio_vsock_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
3294 let mut devices = Vec::new();
3295
3296 let mut vsock = self.config.lock().unwrap().vsock.clone();
3297 if let Some(ref mut vsock_cfg) = &mut vsock {
3298 devices.push(self.make_virtio_vsock_device(vsock_cfg)?);
3299 }
3300 self.config.lock().unwrap().vsock = vsock;
3301
3302 Ok(devices)
3303 }
3304
make_virtio_mem_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>>3305 fn make_virtio_mem_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
3306 let mut devices = Vec::new();
3307
3308 let mm = self.memory_manager.clone();
3309 let mut mm = mm.lock().unwrap();
3310 for (memory_zone_id, memory_zone) in mm.memory_zones_mut().iter_mut() {
3311 if let Some(virtio_mem_zone) = memory_zone.virtio_mem_zone_mut() {
3312 info!("Creating virtio-mem device: id = {}", memory_zone_id);
3313
3314 let node_id = numa_node_id_from_memory_zone_id(&self.numa_nodes, memory_zone_id)
3315 .map(|i| i as u16);
3316
3317 let virtio_mem_device = Arc::new(Mutex::new(
3318 virtio_devices::Mem::new(
3319 memory_zone_id.clone(),
3320 virtio_mem_zone.region(),
3321 self.seccomp_action.clone(),
3322 node_id,
3323 virtio_mem_zone.hotplugged_size(),
3324 virtio_mem_zone.hugepages(),
3325 self.exit_evt
3326 .try_clone()
3327 .map_err(DeviceManagerError::EventFd)?,
3328 virtio_mem_zone.blocks_state().clone(),
3329 state_from_id(self.snapshot.as_ref(), memory_zone_id.as_str())
3330 .map_err(DeviceManagerError::RestoreGetState)?,
3331 )
3332 .map_err(DeviceManagerError::CreateVirtioMem)?,
3333 ));
3334
3335 // Update the virtio-mem zone so that it has a handle onto the
3336 // virtio-mem device, which will be used for triggering a resize
3337 // if needed.
3338 virtio_mem_zone.set_virtio_device(Arc::clone(&virtio_mem_device));
3339
3340 self.virtio_mem_devices.push(Arc::clone(&virtio_mem_device));
3341
3342 devices.push(MetaVirtioDevice {
3343 virtio_device: Arc::clone(&virtio_mem_device)
3344 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
3345 iommu: false,
3346 id: memory_zone_id.clone(),
3347 pci_segment: 0,
3348 dma_handler: None,
3349 });
3350
3351 // Fill the device tree with a new node. In case of restore, we
3352 // know there is nothing to do, so we can simply override the
3353 // existing entry.
3354 self.device_tree.lock().unwrap().insert(
3355 memory_zone_id.clone(),
3356 device_node!(memory_zone_id, virtio_mem_device),
3357 );
3358 }
3359 }
3360
3361 Ok(devices)
3362 }
3363
3364 #[cfg(feature = "pvmemcontrol")]
make_pvmemcontrol_device( &mut self, ) -> DeviceManagerResult<( Arc<PvmemcontrolBusDevice>, Arc<Mutex<PvmemcontrolPciDevice>>, )>3365 fn make_pvmemcontrol_device(
3366 &mut self,
3367 ) -> DeviceManagerResult<(
3368 Arc<PvmemcontrolBusDevice>,
3369 Arc<Mutex<PvmemcontrolPciDevice>>,
3370 )> {
3371 let id = String::from(PVMEMCONTROL_DEVICE_NAME);
3372 let pci_segment_id = 0x0_u16;
3373
3374 let (pci_segment_id, pci_device_bdf, resources) =
3375 self.pci_resources(&id, pci_segment_id)?;
3376
3377 info!("Creating pvmemcontrol device: id = {}", id);
3378 let (pvmemcontrol_pci_device, pvmemcontrol_bus_device) =
3379 devices::pvmemcontrol::PvmemcontrolDevice::make_device(
3380 id.clone(),
3381 self.memory_manager.lock().unwrap().guest_memory(),
3382 );
3383
3384 let pvmemcontrol_pci_device = Arc::new(Mutex::new(pvmemcontrol_pci_device));
3385 let pvmemcontrol_bus_device = Arc::new(pvmemcontrol_bus_device);
3386
3387 let new_resources = self.add_pci_device(
3388 pvmemcontrol_bus_device.clone(),
3389 pvmemcontrol_pci_device.clone(),
3390 pci_segment_id,
3391 pci_device_bdf,
3392 resources,
3393 )?;
3394
3395 let mut node = device_node!(id, pvmemcontrol_pci_device);
3396
3397 node.resources = new_resources;
3398 node.pci_bdf = Some(pci_device_bdf);
3399 node.pci_device_handle = None;
3400
3401 self.device_tree.lock().unwrap().insert(id, node);
3402
3403 Ok((pvmemcontrol_bus_device, pvmemcontrol_pci_device))
3404 }
3405
make_virtio_balloon_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>>3406 fn make_virtio_balloon_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
3407 let mut devices = Vec::new();
3408
3409 if let Some(balloon_config) = &self.config.lock().unwrap().balloon {
3410 let id = String::from(BALLOON_DEVICE_NAME);
3411 info!("Creating virtio-balloon device: id = {}", id);
3412
3413 let virtio_balloon_device = Arc::new(Mutex::new(
3414 virtio_devices::Balloon::new(
3415 id.clone(),
3416 balloon_config.size,
3417 balloon_config.deflate_on_oom,
3418 balloon_config.free_page_reporting,
3419 self.seccomp_action.clone(),
3420 self.exit_evt
3421 .try_clone()
3422 .map_err(DeviceManagerError::EventFd)?,
3423 state_from_id(self.snapshot.as_ref(), id.as_str())
3424 .map_err(DeviceManagerError::RestoreGetState)?,
3425 )
3426 .map_err(DeviceManagerError::CreateVirtioBalloon)?,
3427 ));
3428
3429 self.balloon = Some(virtio_balloon_device.clone());
3430
3431 devices.push(MetaVirtioDevice {
3432 virtio_device: Arc::clone(&virtio_balloon_device)
3433 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
3434 iommu: false,
3435 id: id.clone(),
3436 pci_segment: 0,
3437 dma_handler: None,
3438 });
3439
3440 self.device_tree
3441 .lock()
3442 .unwrap()
3443 .insert(id.clone(), device_node!(id, virtio_balloon_device));
3444 }
3445
3446 Ok(devices)
3447 }
3448
make_virtio_watchdog_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>>3449 fn make_virtio_watchdog_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
3450 let mut devices = Vec::new();
3451
3452 if !self.config.lock().unwrap().watchdog {
3453 return Ok(devices);
3454 }
3455
3456 let id = String::from(WATCHDOG_DEVICE_NAME);
3457 info!("Creating virtio-watchdog device: id = {}", id);
3458
3459 let virtio_watchdog_device = Arc::new(Mutex::new(
3460 virtio_devices::Watchdog::new(
3461 id.clone(),
3462 self.reset_evt.try_clone().unwrap(),
3463 self.seccomp_action.clone(),
3464 self.exit_evt
3465 .try_clone()
3466 .map_err(DeviceManagerError::EventFd)?,
3467 state_from_id(self.snapshot.as_ref(), id.as_str())
3468 .map_err(DeviceManagerError::RestoreGetState)?,
3469 )
3470 .map_err(DeviceManagerError::CreateVirtioWatchdog)?,
3471 ));
3472 devices.push(MetaVirtioDevice {
3473 virtio_device: Arc::clone(&virtio_watchdog_device)
3474 as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
3475 iommu: false,
3476 id: id.clone(),
3477 pci_segment: 0,
3478 dma_handler: None,
3479 });
3480
3481 self.device_tree
3482 .lock()
3483 .unwrap()
3484 .insert(id.clone(), device_node!(id, virtio_watchdog_device));
3485
3486 Ok(devices)
3487 }
3488
make_vdpa_device( &mut self, vdpa_cfg: &mut VdpaConfig, ) -> DeviceManagerResult<MetaVirtioDevice>3489 fn make_vdpa_device(
3490 &mut self,
3491 vdpa_cfg: &mut VdpaConfig,
3492 ) -> DeviceManagerResult<MetaVirtioDevice> {
3493 let id = if let Some(id) = &vdpa_cfg.id {
3494 id.clone()
3495 } else {
3496 let id = self.next_device_name(VDPA_DEVICE_NAME_PREFIX)?;
3497 vdpa_cfg.id = Some(id.clone());
3498 id
3499 };
3500
3501 info!("Creating vDPA device: {:?}", vdpa_cfg);
3502
3503 let device_path = vdpa_cfg
3504 .path
3505 .to_str()
3506 .ok_or(DeviceManagerError::CreateVdpaConvertPath)?;
3507
3508 let vdpa_device = Arc::new(Mutex::new(
3509 virtio_devices::Vdpa::new(
3510 id.clone(),
3511 device_path,
3512 self.memory_manager.lock().unwrap().guest_memory(),
3513 vdpa_cfg.num_queues as u16,
3514 state_from_id(self.snapshot.as_ref(), id.as_str())
3515 .map_err(DeviceManagerError::RestoreGetState)?,
3516 )
3517 .map_err(DeviceManagerError::CreateVdpa)?,
3518 ));
3519
3520 // Create the DMA handler that is required by the vDPA device
3521 let vdpa_mapping = Arc::new(VdpaDmaMapping::new(
3522 Arc::clone(&vdpa_device),
3523 Arc::new(self.memory_manager.lock().unwrap().guest_memory()),
3524 ));
3525
3526 self.device_tree
3527 .lock()
3528 .unwrap()
3529 .insert(id.clone(), device_node!(id, vdpa_device));
3530
3531 Ok(MetaVirtioDevice {
3532 virtio_device: vdpa_device as Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
3533 iommu: vdpa_cfg.iommu,
3534 id,
3535 pci_segment: vdpa_cfg.pci_segment,
3536 dma_handler: Some(vdpa_mapping),
3537 })
3538 }
3539
make_vdpa_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>>3540 fn make_vdpa_devices(&mut self) -> DeviceManagerResult<Vec<MetaVirtioDevice>> {
3541 let mut devices = Vec::new();
3542 // Add vdpa if required
3543 let mut vdpa_devices = self.config.lock().unwrap().vdpa.clone();
3544 if let Some(vdpa_list_cfg) = &mut vdpa_devices {
3545 for vdpa_cfg in vdpa_list_cfg.iter_mut() {
3546 devices.push(self.make_vdpa_device(vdpa_cfg)?);
3547 }
3548 }
3549 self.config.lock().unwrap().vdpa = vdpa_devices;
3550
3551 Ok(devices)
3552 }
3553
next_device_name(&mut self, prefix: &str) -> DeviceManagerResult<String>3554 fn next_device_name(&mut self, prefix: &str) -> DeviceManagerResult<String> {
3555 let start_id = self.device_id_cnt;
3556 loop {
3557 // Generate the temporary name.
3558 let name = format!("{}{}", prefix, self.device_id_cnt);
3559 // Increment the counter.
3560 self.device_id_cnt += Wrapping(1);
3561 // Check if the name is already in use.
3562 if !self.boot_id_list.contains(&name)
3563 && !self.device_tree.lock().unwrap().contains_key(&name)
3564 {
3565 return Ok(name);
3566 }
3567
3568 if self.device_id_cnt == start_id {
3569 // We went through a full loop and there's nothing else we can
3570 // do.
3571 break;
3572 }
3573 }
3574 Err(DeviceManagerError::NoAvailableDeviceName)
3575 }
3576
add_passthrough_device( &mut self, device_cfg: &mut DeviceConfig, ) -> DeviceManagerResult<(PciBdf, String)>3577 fn add_passthrough_device(
3578 &mut self,
3579 device_cfg: &mut DeviceConfig,
3580 ) -> DeviceManagerResult<(PciBdf, String)> {
3581 // If the passthrough device has not been created yet, it is created
3582 // here and stored in the DeviceManager structure for future needs.
3583 if self.passthrough_device.is_none() {
3584 self.passthrough_device = Some(
3585 self.address_manager
3586 .vm
3587 .create_passthrough_device()
3588 .map_err(|e| DeviceManagerError::CreatePassthroughDevice(e.into()))?,
3589 );
3590 }
3591
3592 self.add_vfio_device(device_cfg)
3593 }
3594
create_vfio_container(&self) -> DeviceManagerResult<Arc<VfioContainer>>3595 fn create_vfio_container(&self) -> DeviceManagerResult<Arc<VfioContainer>> {
3596 let passthrough_device = self
3597 .passthrough_device
3598 .as_ref()
3599 .ok_or(DeviceManagerError::NoDevicePassthroughSupport)?;
3600
3601 let dup = passthrough_device
3602 .try_clone()
3603 .map_err(DeviceManagerError::VfioCreate)?;
3604
3605 Ok(Arc::new(
3606 VfioContainer::new(Some(Arc::new(dup))).map_err(DeviceManagerError::VfioCreate)?,
3607 ))
3608 }
3609
add_vfio_device( &mut self, device_cfg: &mut DeviceConfig, ) -> DeviceManagerResult<(PciBdf, String)>3610 fn add_vfio_device(
3611 &mut self,
3612 device_cfg: &mut DeviceConfig,
3613 ) -> DeviceManagerResult<(PciBdf, String)> {
3614 let vfio_name = if let Some(id) = &device_cfg.id {
3615 id.clone()
3616 } else {
3617 let id = self.next_device_name(VFIO_DEVICE_NAME_PREFIX)?;
3618 device_cfg.id = Some(id.clone());
3619 id
3620 };
3621
3622 let (pci_segment_id, pci_device_bdf, resources) =
3623 self.pci_resources(&vfio_name, device_cfg.pci_segment)?;
3624
3625 let mut needs_dma_mapping = false;
3626
3627 // Here we create a new VFIO container for two reasons. Either this is
3628 // the first VFIO device, meaning we need a new VFIO container, which
3629 // will be shared with other VFIO devices. Or the new VFIO device is
3630 // attached to a vIOMMU, meaning we must create a dedicated VFIO
3631 // container. In the vIOMMU use case, we can't let all devices under
3632 // the same VFIO container since we couldn't map/unmap memory for each
3633 // device. That's simply because the map/unmap operations happen at the
3634 // VFIO container level.
3635 let vfio_container = if device_cfg.iommu {
3636 let vfio_container = self.create_vfio_container()?;
3637
3638 let vfio_mapping = Arc::new(VfioDmaMapping::new(
3639 Arc::clone(&vfio_container),
3640 Arc::new(self.memory_manager.lock().unwrap().guest_memory()),
3641 Arc::clone(&self.mmio_regions),
3642 ));
3643
3644 if let Some(iommu) = &self.iommu_device {
3645 iommu
3646 .lock()
3647 .unwrap()
3648 .add_external_mapping(pci_device_bdf.into(), vfio_mapping);
3649 } else {
3650 return Err(DeviceManagerError::MissingVirtualIommu);
3651 }
3652
3653 vfio_container
3654 } else if let Some(vfio_container) = &self.vfio_container {
3655 Arc::clone(vfio_container)
3656 } else {
3657 let vfio_container = self.create_vfio_container()?;
3658 needs_dma_mapping = true;
3659 self.vfio_container = Some(Arc::clone(&vfio_container));
3660
3661 vfio_container
3662 };
3663
3664 let vfio_device = VfioDevice::new(&device_cfg.path, Arc::clone(&vfio_container))
3665 .map_err(DeviceManagerError::VfioCreate)?;
3666
3667 if needs_dma_mapping {
3668 // Register DMA mapping in IOMMU.
3669 // Do not register virtio-mem regions, as they are handled directly by
3670 // virtio-mem device itself.
3671 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() {
3672 for region in zone.regions() {
3673 vfio_container
3674 .vfio_dma_map(
3675 region.start_addr().raw_value(),
3676 region.len(),
3677 region.as_ptr() as u64,
3678 )
3679 .map_err(DeviceManagerError::VfioDmaMap)?;
3680 }
3681 }
3682
3683 let vfio_mapping = Arc::new(VfioDmaMapping::new(
3684 Arc::clone(&vfio_container),
3685 Arc::new(self.memory_manager.lock().unwrap().guest_memory()),
3686 Arc::clone(&self.mmio_regions),
3687 ));
3688
3689 for virtio_mem_device in self.virtio_mem_devices.iter() {
3690 virtio_mem_device
3691 .lock()
3692 .unwrap()
3693 .add_dma_mapping_handler(
3694 VirtioMemMappingSource::Container,
3695 vfio_mapping.clone(),
3696 )
3697 .map_err(DeviceManagerError::AddDmaMappingHandlerVirtioMem)?;
3698 }
3699 }
3700
3701 let legacy_interrupt_group =
3702 if let Some(legacy_interrupt_manager) = &self.legacy_interrupt_manager {
3703 Some(
3704 legacy_interrupt_manager
3705 .create_group(LegacyIrqGroupConfig {
3706 irq: self.pci_segments[pci_segment_id as usize].pci_irq_slots
3707 [pci_device_bdf.device() as usize]
3708 as InterruptIndex,
3709 })
3710 .map_err(DeviceManagerError::CreateInterruptGroup)?,
3711 )
3712 } else {
3713 None
3714 };
3715
3716 let memory_manager = self.memory_manager.clone();
3717
3718 let vfio_pci_device = VfioPciDevice::new(
3719 vfio_name.clone(),
3720 &self.address_manager.vm,
3721 vfio_device,
3722 vfio_container,
3723 self.msi_interrupt_manager.clone(),
3724 legacy_interrupt_group,
3725 device_cfg.iommu,
3726 pci_device_bdf,
3727 memory_manager.lock().unwrap().memory_slot_allocator(),
3728 vm_migration::snapshot_from_id(self.snapshot.as_ref(), vfio_name.as_str()),
3729 device_cfg.x_nv_gpudirect_clique,
3730 device_cfg.path.clone(),
3731 )
3732 .map_err(DeviceManagerError::VfioPciCreate)?;
3733
3734 let vfio_pci_device = Arc::new(Mutex::new(vfio_pci_device));
3735
3736 let new_resources = self.add_pci_device(
3737 vfio_pci_device.clone(),
3738 vfio_pci_device.clone(),
3739 pci_segment_id,
3740 pci_device_bdf,
3741 resources,
3742 )?;
3743
3744 vfio_pci_device
3745 .lock()
3746 .unwrap()
3747 .map_mmio_regions()
3748 .map_err(DeviceManagerError::VfioMapRegion)?;
3749
3750 for mmio_region in vfio_pci_device.lock().unwrap().mmio_regions() {
3751 self.mmio_regions.lock().unwrap().push(mmio_region);
3752 }
3753
3754 let mut node = device_node!(vfio_name, vfio_pci_device);
3755
3756 // Update the device tree with correct resource information.
3757 node.resources = new_resources;
3758 node.pci_bdf = Some(pci_device_bdf);
3759 node.pci_device_handle = Some(PciDeviceHandle::Vfio(vfio_pci_device));
3760
3761 self.device_tree
3762 .lock()
3763 .unwrap()
3764 .insert(vfio_name.clone(), node);
3765
3766 Ok((pci_device_bdf, vfio_name))
3767 }
3768
add_pci_device( &mut self, bus_device: Arc<dyn BusDeviceSync>, pci_device: Arc<Mutex<dyn PciDevice>>, segment_id: u16, bdf: PciBdf, resources: Option<Vec<Resource>>, ) -> DeviceManagerResult<Vec<Resource>>3769 fn add_pci_device(
3770 &mut self,
3771 bus_device: Arc<dyn BusDeviceSync>,
3772 pci_device: Arc<Mutex<dyn PciDevice>>,
3773 segment_id: u16,
3774 bdf: PciBdf,
3775 resources: Option<Vec<Resource>>,
3776 ) -> DeviceManagerResult<Vec<Resource>> {
3777 let bars = pci_device
3778 .lock()
3779 .unwrap()
3780 .allocate_bars(
3781 &self.address_manager.allocator,
3782 &mut self.pci_segments[segment_id as usize]
3783 .mem32_allocator
3784 .lock()
3785 .unwrap(),
3786 &mut self.pci_segments[segment_id as usize]
3787 .mem64_allocator
3788 .lock()
3789 .unwrap(),
3790 resources,
3791 )
3792 .map_err(DeviceManagerError::AllocateBars)?;
3793
3794 let mut pci_bus = self.pci_segments[segment_id as usize]
3795 .pci_bus
3796 .lock()
3797 .unwrap();
3798
3799 pci_bus
3800 .add_device(bdf.device() as u32, pci_device)
3801 .map_err(DeviceManagerError::AddPciDevice)?;
3802
3803 self.bus_devices.push(Arc::clone(&bus_device));
3804
3805 pci_bus
3806 .register_mapping(
3807 bus_device,
3808 self.address_manager.io_bus.as_ref(),
3809 self.address_manager.mmio_bus.as_ref(),
3810 bars.clone(),
3811 )
3812 .map_err(DeviceManagerError::AddPciDevice)?;
3813
3814 let mut new_resources = Vec::new();
3815 for bar in bars {
3816 new_resources.push(Resource::PciBar {
3817 index: bar.idx(),
3818 base: bar.addr(),
3819 size: bar.size(),
3820 type_: bar.region_type().into(),
3821 prefetchable: bar.prefetchable().into(),
3822 });
3823 }
3824
3825 Ok(new_resources)
3826 }
3827
add_vfio_devices(&mut self) -> DeviceManagerResult<Vec<PciBdf>>3828 fn add_vfio_devices(&mut self) -> DeviceManagerResult<Vec<PciBdf>> {
3829 let mut iommu_attached_device_ids = Vec::new();
3830 let mut devices = self.config.lock().unwrap().devices.clone();
3831
3832 if let Some(device_list_cfg) = &mut devices {
3833 for device_cfg in device_list_cfg.iter_mut() {
3834 let (device_id, _) = self.add_passthrough_device(device_cfg)?;
3835 if device_cfg.iommu && self.iommu_device.is_some() {
3836 iommu_attached_device_ids.push(device_id);
3837 }
3838 }
3839 }
3840
3841 // Update the list of devices
3842 self.config.lock().unwrap().devices = devices;
3843
3844 Ok(iommu_attached_device_ids)
3845 }
3846
add_vfio_user_device( &mut self, device_cfg: &mut UserDeviceConfig, ) -> DeviceManagerResult<(PciBdf, String)>3847 fn add_vfio_user_device(
3848 &mut self,
3849 device_cfg: &mut UserDeviceConfig,
3850 ) -> DeviceManagerResult<(PciBdf, String)> {
3851 let vfio_user_name = if let Some(id) = &device_cfg.id {
3852 id.clone()
3853 } else {
3854 let id = self.next_device_name(VFIO_USER_DEVICE_NAME_PREFIX)?;
3855 device_cfg.id = Some(id.clone());
3856 id
3857 };
3858
3859 let (pci_segment_id, pci_device_bdf, resources) =
3860 self.pci_resources(&vfio_user_name, device_cfg.pci_segment)?;
3861
3862 let legacy_interrupt_group =
3863 if let Some(legacy_interrupt_manager) = &self.legacy_interrupt_manager {
3864 Some(
3865 legacy_interrupt_manager
3866 .create_group(LegacyIrqGroupConfig {
3867 irq: self.pci_segments[pci_segment_id as usize].pci_irq_slots
3868 [pci_device_bdf.device() as usize]
3869 as InterruptIndex,
3870 })
3871 .map_err(DeviceManagerError::CreateInterruptGroup)?,
3872 )
3873 } else {
3874 None
3875 };
3876
3877 let client = Arc::new(Mutex::new(
3878 vfio_user::Client::new(&device_cfg.socket)
3879 .map_err(DeviceManagerError::VfioUserCreateClient)?,
3880 ));
3881
3882 let memory_manager = self.memory_manager.clone();
3883
3884 let mut vfio_user_pci_device = VfioUserPciDevice::new(
3885 vfio_user_name.clone(),
3886 &self.address_manager.vm,
3887 client.clone(),
3888 self.msi_interrupt_manager.clone(),
3889 legacy_interrupt_group,
3890 pci_device_bdf,
3891 memory_manager.lock().unwrap().memory_slot_allocator(),
3892 vm_migration::snapshot_from_id(self.snapshot.as_ref(), vfio_user_name.as_str()),
3893 )
3894 .map_err(DeviceManagerError::VfioUserCreate)?;
3895
3896 let memory = self.memory_manager.lock().unwrap().guest_memory();
3897 let vfio_user_mapping = Arc::new(VfioUserDmaMapping::new(client, Arc::new(memory)));
3898 for virtio_mem_device in self.virtio_mem_devices.iter() {
3899 virtio_mem_device
3900 .lock()
3901 .unwrap()
3902 .add_dma_mapping_handler(
3903 VirtioMemMappingSource::Device(pci_device_bdf.into()),
3904 vfio_user_mapping.clone(),
3905 )
3906 .map_err(DeviceManagerError::AddDmaMappingHandlerVirtioMem)?;
3907 }
3908
3909 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() {
3910 for region in zone.regions() {
3911 vfio_user_pci_device
3912 .dma_map(region)
3913 .map_err(DeviceManagerError::VfioUserDmaMap)?;
3914 }
3915 }
3916
3917 let vfio_user_pci_device = Arc::new(Mutex::new(vfio_user_pci_device));
3918
3919 let new_resources = self.add_pci_device(
3920 vfio_user_pci_device.clone(),
3921 vfio_user_pci_device.clone(),
3922 pci_segment_id,
3923 pci_device_bdf,
3924 resources,
3925 )?;
3926
3927 // Note it is required to call 'add_pci_device()' in advance to have the list of
3928 // mmio regions provisioned correctly
3929 vfio_user_pci_device
3930 .lock()
3931 .unwrap()
3932 .map_mmio_regions()
3933 .map_err(DeviceManagerError::VfioUserMapRegion)?;
3934
3935 let mut node = device_node!(vfio_user_name, vfio_user_pci_device);
3936
3937 // Update the device tree with correct resource information.
3938 node.resources = new_resources;
3939 node.pci_bdf = Some(pci_device_bdf);
3940 node.pci_device_handle = Some(PciDeviceHandle::VfioUser(vfio_user_pci_device));
3941
3942 self.device_tree
3943 .lock()
3944 .unwrap()
3945 .insert(vfio_user_name.clone(), node);
3946
3947 Ok((pci_device_bdf, vfio_user_name))
3948 }
3949
add_user_devices(&mut self) -> DeviceManagerResult<Vec<PciBdf>>3950 fn add_user_devices(&mut self) -> DeviceManagerResult<Vec<PciBdf>> {
3951 let mut user_devices = self.config.lock().unwrap().user_devices.clone();
3952
3953 if let Some(device_list_cfg) = &mut user_devices {
3954 for device_cfg in device_list_cfg.iter_mut() {
3955 let (_device_id, _id) = self.add_vfio_user_device(device_cfg)?;
3956 }
3957 }
3958
3959 // Update the list of devices
3960 self.config.lock().unwrap().user_devices = user_devices;
3961
3962 Ok(vec![])
3963 }
3964
add_virtio_pci_device( &mut self, virtio_device: Arc<Mutex<dyn virtio_devices::VirtioDevice>>, iommu_mapping: &Option<Arc<IommuMapping>>, virtio_device_id: String, pci_segment_id: u16, dma_handler: Option<Arc<dyn ExternalDmaMapping>>, ) -> DeviceManagerResult<PciBdf>3965 fn add_virtio_pci_device(
3966 &mut self,
3967 virtio_device: Arc<Mutex<dyn virtio_devices::VirtioDevice>>,
3968 iommu_mapping: &Option<Arc<IommuMapping>>,
3969 virtio_device_id: String,
3970 pci_segment_id: u16,
3971 dma_handler: Option<Arc<dyn ExternalDmaMapping>>,
3972 ) -> DeviceManagerResult<PciBdf> {
3973 let id = format!("{VIRTIO_PCI_DEVICE_NAME_PREFIX}-{virtio_device_id}");
3974
3975 // Add the new virtio-pci node to the device tree.
3976 let mut node = device_node!(id);
3977 node.children = vec![virtio_device_id.clone()];
3978
3979 let (pci_segment_id, pci_device_bdf, resources) =
3980 self.pci_resources(&id, pci_segment_id)?;
3981
3982 // Update the existing virtio node by setting the parent.
3983 if let Some(node) = self.device_tree.lock().unwrap().get_mut(&virtio_device_id) {
3984 node.parent = Some(id.clone());
3985 } else {
3986 return Err(DeviceManagerError::MissingNode);
3987 }
3988
3989 // Allows support for one MSI-X vector per queue. It also adds 1
3990 // as we need to take into account the dedicated vector to notify
3991 // about a virtio config change.
3992 let msix_num = (virtio_device.lock().unwrap().queue_max_sizes().len() + 1) as u16;
3993
3994 // Create the AccessPlatform trait from the implementation IommuMapping.
3995 // This will provide address translation for any virtio device sitting
3996 // behind a vIOMMU.
3997 let mut access_platform: Option<Arc<dyn AccessPlatform>> = None;
3998
3999 if let Some(mapping) = iommu_mapping {
4000 access_platform = Some(Arc::new(AccessPlatformMapping::new(
4001 pci_device_bdf.into(),
4002 mapping.clone(),
4003 )));
4004 }
4005
4006 // If SEV-SNP is enabled create the AccessPlatform from SevSnpPageAccessProxy
4007 #[cfg(feature = "sev_snp")]
4008 if self.config.lock().unwrap().is_sev_snp_enabled() {
4009 access_platform = Some(Arc::new(SevSnpPageAccessProxy::new(
4010 self.address_manager.vm.clone(),
4011 )));
4012 }
4013
4014 let memory = self.memory_manager.lock().unwrap().guest_memory();
4015
4016 // Map DMA ranges if a DMA handler is available and if the device is
4017 // not attached to a virtual IOMMU.
4018 if let Some(dma_handler) = &dma_handler {
4019 if iommu_mapping.is_some() {
4020 if let Some(iommu) = &self.iommu_device {
4021 iommu
4022 .lock()
4023 .unwrap()
4024 .add_external_mapping(pci_device_bdf.into(), dma_handler.clone());
4025 } else {
4026 return Err(DeviceManagerError::MissingVirtualIommu);
4027 }
4028 } else {
4029 // Let every virtio-mem device handle the DMA map/unmap through the
4030 // DMA handler provided.
4031 for virtio_mem_device in self.virtio_mem_devices.iter() {
4032 virtio_mem_device
4033 .lock()
4034 .unwrap()
4035 .add_dma_mapping_handler(
4036 VirtioMemMappingSource::Device(pci_device_bdf.into()),
4037 dma_handler.clone(),
4038 )
4039 .map_err(DeviceManagerError::AddDmaMappingHandlerVirtioMem)?;
4040 }
4041
4042 // Do not register virtio-mem regions, as they are handled directly by
4043 // virtio-mem devices.
4044 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() {
4045 for region in zone.regions() {
4046 let gpa = region.start_addr().0;
4047 let size = region.len();
4048 dma_handler
4049 .map(gpa, gpa, size)
4050 .map_err(DeviceManagerError::VirtioDmaMap)?;
4051 }
4052 }
4053 }
4054 }
4055
4056 let device_type = virtio_device.lock().unwrap().device_type();
4057 let virtio_pci_device = Arc::new(Mutex::new(
4058 VirtioPciDevice::new(
4059 id.clone(),
4060 memory,
4061 virtio_device,
4062 msix_num,
4063 access_platform,
4064 &self.msi_interrupt_manager,
4065 pci_device_bdf.into(),
4066 self.activate_evt
4067 .try_clone()
4068 .map_err(DeviceManagerError::EventFd)?,
4069 // All device types *except* virtio block devices should be allocated a 64-bit bar
4070 // The block devices should be given a 32-bit BAR so that they are easily accessible
4071 // to firmware without requiring excessive identity mapping.
4072 // The exception being if not on the default PCI segment.
4073 pci_segment_id > 0 || device_type != VirtioDeviceType::Block as u32,
4074 dma_handler,
4075 self.pending_activations.clone(),
4076 vm_migration::snapshot_from_id(self.snapshot.as_ref(), id.as_str()),
4077 )
4078 .map_err(DeviceManagerError::VirtioDevice)?,
4079 ));
4080
4081 let new_resources = self.add_pci_device(
4082 virtio_pci_device.clone(),
4083 virtio_pci_device.clone(),
4084 pci_segment_id,
4085 pci_device_bdf,
4086 resources,
4087 )?;
4088
4089 let bar_addr = virtio_pci_device.lock().unwrap().config_bar_addr();
4090 for (event, addr) in virtio_pci_device.lock().unwrap().ioeventfds(bar_addr) {
4091 let io_addr = IoEventAddress::Mmio(addr);
4092 self.address_manager
4093 .vm
4094 .register_ioevent(event, &io_addr, None)
4095 .map_err(|e| DeviceManagerError::RegisterIoevent(e.into()))?;
4096 }
4097
4098 // Update the device tree with correct resource information.
4099 node.resources = new_resources;
4100 node.migratable = Some(Arc::clone(&virtio_pci_device) as Arc<Mutex<dyn Migratable>>);
4101 node.pci_bdf = Some(pci_device_bdf);
4102 node.pci_device_handle = Some(PciDeviceHandle::Virtio(virtio_pci_device));
4103 self.device_tree.lock().unwrap().insert(id, node);
4104
4105 Ok(pci_device_bdf)
4106 }
4107
add_pvpanic_device( &mut self, ) -> DeviceManagerResult<Option<Arc<Mutex<devices::PvPanicDevice>>>>4108 fn add_pvpanic_device(
4109 &mut self,
4110 ) -> DeviceManagerResult<Option<Arc<Mutex<devices::PvPanicDevice>>>> {
4111 let id = String::from(PVPANIC_DEVICE_NAME);
4112 let pci_segment_id = 0x0_u16;
4113
4114 info!("Creating pvpanic device {}", id);
4115
4116 let (pci_segment_id, pci_device_bdf, resources) =
4117 self.pci_resources(&id, pci_segment_id)?;
4118
4119 let snapshot = snapshot_from_id(self.snapshot.as_ref(), id.as_str());
4120
4121 let pvpanic_device = devices::PvPanicDevice::new(id.clone(), snapshot)
4122 .map_err(DeviceManagerError::PvPanicCreate)?;
4123
4124 let pvpanic_device = Arc::new(Mutex::new(pvpanic_device));
4125
4126 let new_resources = self.add_pci_device(
4127 pvpanic_device.clone(),
4128 pvpanic_device.clone(),
4129 pci_segment_id,
4130 pci_device_bdf,
4131 resources,
4132 )?;
4133
4134 let mut node = device_node!(id, pvpanic_device);
4135
4136 node.resources = new_resources;
4137 node.pci_bdf = Some(pci_device_bdf);
4138 node.pci_device_handle = None;
4139
4140 self.device_tree.lock().unwrap().insert(id, node);
4141
4142 Ok(Some(pvpanic_device))
4143 }
4144
pci_resources( &self, id: &str, pci_segment_id: u16, ) -> DeviceManagerResult<(u16, PciBdf, Option<Vec<Resource>>)>4145 fn pci_resources(
4146 &self,
4147 id: &str,
4148 pci_segment_id: u16,
4149 ) -> DeviceManagerResult<(u16, PciBdf, Option<Vec<Resource>>)> {
4150 // Look for the id in the device tree. If it can be found, that means
4151 // the device is being restored, otherwise it's created from scratch.
4152 let (pci_device_bdf, resources) =
4153 if let Some(node) = self.device_tree.lock().unwrap().get(id) {
4154 info!("Restoring virtio-pci {} resources", id);
4155 let pci_device_bdf: PciBdf = node
4156 .pci_bdf
4157 .ok_or(DeviceManagerError::MissingDeviceNodePciBdf)?;
4158 (Some(pci_device_bdf), Some(node.resources.clone()))
4159 } else {
4160 (None, None)
4161 };
4162
4163 Ok(if let Some(pci_device_bdf) = pci_device_bdf {
4164 let pci_segment_id = pci_device_bdf.segment();
4165
4166 self.pci_segments[pci_segment_id as usize]
4167 .pci_bus
4168 .lock()
4169 .unwrap()
4170 .get_device_id(pci_device_bdf.device() as usize)
4171 .map_err(DeviceManagerError::GetPciDeviceId)?;
4172
4173 (pci_segment_id, pci_device_bdf, resources)
4174 } else {
4175 let pci_device_bdf = self.pci_segments[pci_segment_id as usize].next_device_bdf()?;
4176
4177 (pci_segment_id, pci_device_bdf, None)
4178 })
4179 }
4180
4181 #[cfg(target_arch = "x86_64")]
io_bus(&self) -> &Arc<Bus>4182 pub fn io_bus(&self) -> &Arc<Bus> {
4183 &self.address_manager.io_bus
4184 }
4185
mmio_bus(&self) -> &Arc<Bus>4186 pub fn mmio_bus(&self) -> &Arc<Bus> {
4187 &self.address_manager.mmio_bus
4188 }
4189
allocator(&self) -> &Arc<Mutex<SystemAllocator>>4190 pub fn allocator(&self) -> &Arc<Mutex<SystemAllocator>> {
4191 &self.address_manager.allocator
4192 }
4193
interrupt_controller(&self) -> Option<Arc<Mutex<dyn InterruptController>>>4194 pub fn interrupt_controller(&self) -> Option<Arc<Mutex<dyn InterruptController>>> {
4195 self.interrupt_controller
4196 .as_ref()
4197 .map(|ic| ic.clone() as Arc<Mutex<dyn InterruptController>>)
4198 }
4199
pci_segments(&self) -> &Vec<PciSegment>4200 pub(crate) fn pci_segments(&self) -> &Vec<PciSegment> {
4201 &self.pci_segments
4202 }
4203
4204 #[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))]
cmdline_additions(&self) -> &[String]4205 pub fn cmdline_additions(&self) -> &[String] {
4206 self.cmdline_additions.as_slice()
4207 }
4208
update_memory(&self, new_region: &Arc<GuestRegionMmap>) -> DeviceManagerResult<()>4209 pub fn update_memory(&self, new_region: &Arc<GuestRegionMmap>) -> DeviceManagerResult<()> {
4210 for handle in self.virtio_devices.iter() {
4211 handle
4212 .virtio_device
4213 .lock()
4214 .unwrap()
4215 .add_memory_region(new_region)
4216 .map_err(DeviceManagerError::UpdateMemoryForVirtioDevice)?;
4217
4218 if let Some(dma_handler) = &handle.dma_handler {
4219 if !handle.iommu {
4220 let gpa = new_region.start_addr().0;
4221 let size = new_region.len();
4222 dma_handler
4223 .map(gpa, gpa, size)
4224 .map_err(DeviceManagerError::VirtioDmaMap)?;
4225 }
4226 }
4227 }
4228
4229 // Take care of updating the memory for VFIO PCI devices.
4230 if let Some(vfio_container) = &self.vfio_container {
4231 vfio_container
4232 .vfio_dma_map(
4233 new_region.start_addr().raw_value(),
4234 new_region.len(),
4235 new_region.as_ptr() as u64,
4236 )
4237 .map_err(DeviceManagerError::UpdateMemoryForVfioPciDevice)?;
4238 }
4239
4240 // Take care of updating the memory for vfio-user devices.
4241 {
4242 let device_tree = self.device_tree.lock().unwrap();
4243 for pci_device_node in device_tree.pci_devices() {
4244 if let PciDeviceHandle::VfioUser(vfio_user_pci_device) = pci_device_node
4245 .pci_device_handle
4246 .as_ref()
4247 .ok_or(DeviceManagerError::MissingPciDevice)?
4248 {
4249 vfio_user_pci_device
4250 .lock()
4251 .unwrap()
4252 .dma_map(new_region)
4253 .map_err(DeviceManagerError::UpdateMemoryForVfioUserPciDevice)?;
4254 }
4255 }
4256 }
4257
4258 Ok(())
4259 }
4260
activate_virtio_devices(&self) -> DeviceManagerResult<()>4261 pub fn activate_virtio_devices(&self) -> DeviceManagerResult<()> {
4262 for mut activator in self.pending_activations.lock().unwrap().drain(..) {
4263 activator
4264 .activate()
4265 .map_err(DeviceManagerError::VirtioActivate)?;
4266 }
4267 Ok(())
4268 }
4269
notify_hotplug( &self, _notification_type: AcpiNotificationFlags, ) -> DeviceManagerResult<()>4270 pub fn notify_hotplug(
4271 &self,
4272 _notification_type: AcpiNotificationFlags,
4273 ) -> DeviceManagerResult<()> {
4274 return self
4275 .ged_notification_device
4276 .as_ref()
4277 .unwrap()
4278 .lock()
4279 .unwrap()
4280 .notify(_notification_type)
4281 .map_err(DeviceManagerError::HotPlugNotification);
4282 }
4283
add_device( &mut self, device_cfg: &mut DeviceConfig, ) -> DeviceManagerResult<PciDeviceInfo>4284 pub fn add_device(
4285 &mut self,
4286 device_cfg: &mut DeviceConfig,
4287 ) -> DeviceManagerResult<PciDeviceInfo> {
4288 self.validate_identifier(&device_cfg.id)?;
4289
4290 if device_cfg.iommu && !self.is_iommu_segment(device_cfg.pci_segment) {
4291 return Err(DeviceManagerError::InvalidIommuHotplug);
4292 }
4293
4294 let (bdf, device_name) = self.add_passthrough_device(device_cfg)?;
4295
4296 // Update the PCIU bitmap
4297 self.pci_segments[device_cfg.pci_segment as usize].pci_devices_up |= 1 << bdf.device();
4298
4299 Ok(PciDeviceInfo {
4300 id: device_name,
4301 bdf,
4302 })
4303 }
4304
add_user_device( &mut self, device_cfg: &mut UserDeviceConfig, ) -> DeviceManagerResult<PciDeviceInfo>4305 pub fn add_user_device(
4306 &mut self,
4307 device_cfg: &mut UserDeviceConfig,
4308 ) -> DeviceManagerResult<PciDeviceInfo> {
4309 self.validate_identifier(&device_cfg.id)?;
4310
4311 let (bdf, device_name) = self.add_vfio_user_device(device_cfg)?;
4312
4313 // Update the PCIU bitmap
4314 self.pci_segments[device_cfg.pci_segment as usize].pci_devices_up |= 1 << bdf.device();
4315
4316 Ok(PciDeviceInfo {
4317 id: device_name,
4318 bdf,
4319 })
4320 }
4321
remove_device(&mut self, id: String) -> DeviceManagerResult<()>4322 pub fn remove_device(&mut self, id: String) -> DeviceManagerResult<()> {
4323 // The node can be directly a PCI node in case the 'id' refers to a
4324 // VFIO device or a virtio-pci one.
4325 // In case the 'id' refers to a virtio device, we must find the PCI
4326 // node by looking at the parent.
4327 let device_tree = self.device_tree.lock().unwrap();
4328 let node = device_tree
4329 .get(&id)
4330 .ok_or(DeviceManagerError::UnknownDeviceId(id.clone()))?;
4331
4332 // Release advisory locks by dropping all references.
4333 // Linux automatically releases all locks of that file if the last open FD is closed.
4334 {
4335 let maybe_block_device_index = self
4336 .block_devices
4337 .iter()
4338 .enumerate()
4339 .find(|(_, dev)| {
4340 let dev = dev.lock().unwrap();
4341 dev.id() == id
4342 })
4343 .map(|(i, _)| i);
4344 if let Some(index) = maybe_block_device_index {
4345 let _ = self.block_devices.swap_remove(index);
4346 }
4347 }
4348
4349 let pci_device_node = if node.pci_bdf.is_some() && node.pci_device_handle.is_some() {
4350 node
4351 } else {
4352 let parent = node
4353 .parent
4354 .as_ref()
4355 .ok_or(DeviceManagerError::MissingNode)?;
4356 device_tree
4357 .get(parent)
4358 .ok_or(DeviceManagerError::MissingNode)?
4359 };
4360
4361 let pci_device_bdf: PciBdf = pci_device_node
4362 .pci_bdf
4363 .ok_or(DeviceManagerError::MissingDeviceNodePciBdf)?;
4364 let pci_segment_id = pci_device_bdf.segment();
4365
4366 let pci_device_handle = pci_device_node
4367 .pci_device_handle
4368 .as_ref()
4369 .ok_or(DeviceManagerError::MissingPciDevice)?;
4370 #[allow(irrefutable_let_patterns)]
4371 if let PciDeviceHandle::Virtio(virtio_pci_device) = pci_device_handle {
4372 let device_type = VirtioDeviceType::from(
4373 virtio_pci_device
4374 .lock()
4375 .unwrap()
4376 .virtio_device()
4377 .lock()
4378 .unwrap()
4379 .device_type(),
4380 );
4381 match device_type {
4382 VirtioDeviceType::Net
4383 | VirtioDeviceType::Block
4384 | VirtioDeviceType::Pmem
4385 | VirtioDeviceType::Fs
4386 | VirtioDeviceType::Vsock => {}
4387 _ => return Err(DeviceManagerError::RemovalNotAllowed(device_type)),
4388 }
4389 }
4390
4391 // Update the PCID bitmap
4392 self.pci_segments[pci_segment_id as usize].pci_devices_down |= 1 << pci_device_bdf.device();
4393
4394 Ok(())
4395 }
4396
eject_device(&mut self, pci_segment_id: u16, device_id: u8) -> DeviceManagerResult<()>4397 pub fn eject_device(&mut self, pci_segment_id: u16, device_id: u8) -> DeviceManagerResult<()> {
4398 info!(
4399 "Ejecting device_id = {} on segment_id={}",
4400 device_id, pci_segment_id
4401 );
4402
4403 // Convert the device ID into the corresponding b/d/f.
4404 let pci_device_bdf = PciBdf::new(pci_segment_id, 0, device_id, 0);
4405
4406 // Give the PCI device ID back to the PCI bus.
4407 self.pci_segments[pci_segment_id as usize]
4408 .pci_bus
4409 .lock()
4410 .unwrap()
4411 .put_device_id(device_id as usize)
4412 .map_err(DeviceManagerError::PutPciDeviceId)?;
4413
4414 let (pci_device_handle, id) = {
4415 // Remove the device from the device tree along with its children.
4416 let mut device_tree = self.device_tree.lock().unwrap();
4417 let pci_device_node = device_tree
4418 .remove_node_by_pci_bdf(pci_device_bdf)
4419 .ok_or(DeviceManagerError::MissingPciDevice)?;
4420
4421 // For VFIO and vfio-user the PCI device id is the id.
4422 // For virtio we overwrite it later as we want the id of the
4423 // underlying device.
4424 let mut id = pci_device_node.id;
4425 let pci_device_handle = pci_device_node
4426 .pci_device_handle
4427 .ok_or(DeviceManagerError::MissingPciDevice)?;
4428 if matches!(pci_device_handle, PciDeviceHandle::Virtio(_)) {
4429 // The virtio-pci device has a single child
4430 if !pci_device_node.children.is_empty() {
4431 assert_eq!(pci_device_node.children.len(), 1);
4432 let child_id = &pci_device_node.children[0];
4433 id.clone_from(child_id);
4434 }
4435 }
4436 for child in pci_device_node.children.iter() {
4437 device_tree.remove(child);
4438 }
4439
4440 (pci_device_handle, id)
4441 };
4442
4443 let mut iommu_attached = false;
4444 if let Some((_, iommu_attached_devices)) = &self.iommu_attached_devices {
4445 if iommu_attached_devices.contains(&pci_device_bdf) {
4446 iommu_attached = true;
4447 }
4448 }
4449
4450 let (pci_device, bus_device, virtio_device, remove_dma_handler) = match pci_device_handle {
4451 // No need to remove any virtio-mem mapping here as the container outlives all devices
4452 PciDeviceHandle::Vfio(vfio_pci_device) => {
4453 for mmio_region in vfio_pci_device.lock().unwrap().mmio_regions() {
4454 self.mmio_regions
4455 .lock()
4456 .unwrap()
4457 .retain(|x| x.start != mmio_region.start)
4458 }
4459
4460 (
4461 Arc::clone(&vfio_pci_device) as Arc<Mutex<dyn PciDevice>>,
4462 Arc::clone(&vfio_pci_device) as Arc<dyn BusDeviceSync>,
4463 None as Option<Arc<Mutex<dyn virtio_devices::VirtioDevice>>>,
4464 false,
4465 )
4466 }
4467 PciDeviceHandle::Virtio(virtio_pci_device) => {
4468 let dev = virtio_pci_device.lock().unwrap();
4469 let bar_addr = dev.config_bar_addr();
4470 for (event, addr) in dev.ioeventfds(bar_addr) {
4471 let io_addr = IoEventAddress::Mmio(addr);
4472 self.address_manager
4473 .vm
4474 .unregister_ioevent(event, &io_addr)
4475 .map_err(|e| DeviceManagerError::UnRegisterIoevent(e.into()))?;
4476 }
4477
4478 if let Some(dma_handler) = dev.dma_handler() {
4479 if !iommu_attached {
4480 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() {
4481 for region in zone.regions() {
4482 let iova = region.start_addr().0;
4483 let size = region.len();
4484 dma_handler
4485 .unmap(iova, size)
4486 .map_err(DeviceManagerError::VirtioDmaUnmap)?;
4487 }
4488 }
4489 }
4490 }
4491
4492 (
4493 Arc::clone(&virtio_pci_device) as Arc<Mutex<dyn PciDevice>>,
4494 Arc::clone(&virtio_pci_device) as Arc<dyn BusDeviceSync>,
4495 Some(dev.virtio_device()),
4496 dev.dma_handler().is_some() && !iommu_attached,
4497 )
4498 }
4499 PciDeviceHandle::VfioUser(vfio_user_pci_device) => {
4500 let mut dev = vfio_user_pci_device.lock().unwrap();
4501 for (_, zone) in self.memory_manager.lock().unwrap().memory_zones().iter() {
4502 for region in zone.regions() {
4503 dev.dma_unmap(region)
4504 .map_err(DeviceManagerError::VfioUserDmaUnmap)?;
4505 }
4506 }
4507
4508 (
4509 Arc::clone(&vfio_user_pci_device) as Arc<Mutex<dyn PciDevice>>,
4510 Arc::clone(&vfio_user_pci_device) as Arc<dyn BusDeviceSync>,
4511 None as Option<Arc<Mutex<dyn virtio_devices::VirtioDevice>>>,
4512 true,
4513 )
4514 }
4515 };
4516
4517 if remove_dma_handler {
4518 for virtio_mem_device in self.virtio_mem_devices.iter() {
4519 virtio_mem_device
4520 .lock()
4521 .unwrap()
4522 .remove_dma_mapping_handler(VirtioMemMappingSource::Device(
4523 pci_device_bdf.into(),
4524 ))
4525 .map_err(DeviceManagerError::RemoveDmaMappingHandlerVirtioMem)?;
4526 }
4527 }
4528
4529 // Free the allocated BARs
4530 pci_device
4531 .lock()
4532 .unwrap()
4533 .free_bars(
4534 &mut self.address_manager.allocator.lock().unwrap(),
4535 &mut self.pci_segments[pci_segment_id as usize]
4536 .mem32_allocator
4537 .lock()
4538 .unwrap(),
4539 &mut self.pci_segments[pci_segment_id as usize]
4540 .mem64_allocator
4541 .lock()
4542 .unwrap(),
4543 )
4544 .map_err(DeviceManagerError::FreePciBars)?;
4545
4546 // Remove the device from the PCI bus
4547 self.pci_segments[pci_segment_id as usize]
4548 .pci_bus
4549 .lock()
4550 .unwrap()
4551 .remove_by_device(&pci_device)
4552 .map_err(DeviceManagerError::RemoveDeviceFromPciBus)?;
4553
4554 #[cfg(target_arch = "x86_64")]
4555 // Remove the device from the IO bus
4556 self.io_bus()
4557 .remove_by_device(&bus_device)
4558 .map_err(DeviceManagerError::RemoveDeviceFromIoBus)?;
4559
4560 // Remove the device from the MMIO bus
4561 self.mmio_bus()
4562 .remove_by_device(&bus_device)
4563 .map_err(DeviceManagerError::RemoveDeviceFromMmioBus)?;
4564
4565 // Remove the device from the list of BusDevice held by the
4566 // DeviceManager.
4567 self.bus_devices
4568 .retain(|dev| !Arc::ptr_eq(dev, &bus_device));
4569
4570 // Shutdown and remove the underlying virtio-device if present
4571 if let Some(virtio_device) = virtio_device {
4572 for mapping in virtio_device.lock().unwrap().userspace_mappings() {
4573 self.memory_manager
4574 .lock()
4575 .unwrap()
4576 .remove_userspace_mapping(
4577 mapping.addr.raw_value(),
4578 mapping.len,
4579 mapping.host_addr,
4580 mapping.mergeable,
4581 mapping.mem_slot,
4582 )
4583 .map_err(DeviceManagerError::MemoryManager)?;
4584 }
4585
4586 virtio_device.lock().unwrap().shutdown();
4587
4588 self.virtio_devices
4589 .retain(|handler| !Arc::ptr_eq(&handler.virtio_device, &virtio_device));
4590 }
4591
4592 event!(
4593 "vm",
4594 "device-removed",
4595 "id",
4596 &id,
4597 "bdf",
4598 pci_device_bdf.to_string()
4599 );
4600
4601 // At this point, the device has been removed from all the list and
4602 // buses where it was stored. At the end of this function, after
4603 // any_device, bus_device and pci_device are released, the actual
4604 // device will be dropped.
4605 Ok(())
4606 }
4607
hotplug_virtio_pci_device( &mut self, handle: MetaVirtioDevice, ) -> DeviceManagerResult<PciDeviceInfo>4608 fn hotplug_virtio_pci_device(
4609 &mut self,
4610 handle: MetaVirtioDevice,
4611 ) -> DeviceManagerResult<PciDeviceInfo> {
4612 // Add the virtio device to the device manager list. This is important
4613 // as the list is used to notify virtio devices about memory updates
4614 // for instance.
4615 self.virtio_devices.push(handle.clone());
4616
4617 let mapping: Option<Arc<IommuMapping>> = if handle.iommu {
4618 self.iommu_mapping.clone()
4619 } else {
4620 None
4621 };
4622
4623 let bdf = self.add_virtio_pci_device(
4624 handle.virtio_device,
4625 &mapping,
4626 handle.id.clone(),
4627 handle.pci_segment,
4628 handle.dma_handler,
4629 )?;
4630
4631 // Update the PCIU bitmap
4632 self.pci_segments[handle.pci_segment as usize].pci_devices_up |= 1 << bdf.device();
4633
4634 Ok(PciDeviceInfo { id: handle.id, bdf })
4635 }
4636
is_iommu_segment(&self, pci_segment_id: u16) -> bool4637 fn is_iommu_segment(&self, pci_segment_id: u16) -> bool {
4638 self.config
4639 .lock()
4640 .as_ref()
4641 .unwrap()
4642 .platform
4643 .as_ref()
4644 .map(|pc| {
4645 pc.iommu_segments
4646 .as_ref()
4647 .map(|v| v.contains(&pci_segment_id))
4648 .unwrap_or_default()
4649 })
4650 .unwrap_or_default()
4651 }
4652
add_disk(&mut self, disk_cfg: &mut DiskConfig) -> DeviceManagerResult<PciDeviceInfo>4653 pub fn add_disk(&mut self, disk_cfg: &mut DiskConfig) -> DeviceManagerResult<PciDeviceInfo> {
4654 self.validate_identifier(&disk_cfg.id)?;
4655
4656 if disk_cfg.iommu && !self.is_iommu_segment(disk_cfg.pci_segment) {
4657 return Err(DeviceManagerError::InvalidIommuHotplug);
4658 }
4659
4660 let device = self.make_virtio_block_device(disk_cfg, true)?;
4661 self.hotplug_virtio_pci_device(device)
4662 }
4663
add_fs(&mut self, fs_cfg: &mut FsConfig) -> DeviceManagerResult<PciDeviceInfo>4664 pub fn add_fs(&mut self, fs_cfg: &mut FsConfig) -> DeviceManagerResult<PciDeviceInfo> {
4665 self.validate_identifier(&fs_cfg.id)?;
4666
4667 let device = self.make_virtio_fs_device(fs_cfg)?;
4668 self.hotplug_virtio_pci_device(device)
4669 }
4670
add_pmem(&mut self, pmem_cfg: &mut PmemConfig) -> DeviceManagerResult<PciDeviceInfo>4671 pub fn add_pmem(&mut self, pmem_cfg: &mut PmemConfig) -> DeviceManagerResult<PciDeviceInfo> {
4672 self.validate_identifier(&pmem_cfg.id)?;
4673
4674 if pmem_cfg.iommu && !self.is_iommu_segment(pmem_cfg.pci_segment) {
4675 return Err(DeviceManagerError::InvalidIommuHotplug);
4676 }
4677
4678 let device = self.make_virtio_pmem_device(pmem_cfg)?;
4679 self.hotplug_virtio_pci_device(device)
4680 }
4681
add_net(&mut self, net_cfg: &mut NetConfig) -> DeviceManagerResult<PciDeviceInfo>4682 pub fn add_net(&mut self, net_cfg: &mut NetConfig) -> DeviceManagerResult<PciDeviceInfo> {
4683 self.validate_identifier(&net_cfg.id)?;
4684
4685 if net_cfg.iommu && !self.is_iommu_segment(net_cfg.pci_segment) {
4686 return Err(DeviceManagerError::InvalidIommuHotplug);
4687 }
4688
4689 let device = self.make_virtio_net_device(net_cfg)?;
4690 self.hotplug_virtio_pci_device(device)
4691 }
4692
add_vdpa(&mut self, vdpa_cfg: &mut VdpaConfig) -> DeviceManagerResult<PciDeviceInfo>4693 pub fn add_vdpa(&mut self, vdpa_cfg: &mut VdpaConfig) -> DeviceManagerResult<PciDeviceInfo> {
4694 self.validate_identifier(&vdpa_cfg.id)?;
4695
4696 if vdpa_cfg.iommu && !self.is_iommu_segment(vdpa_cfg.pci_segment) {
4697 return Err(DeviceManagerError::InvalidIommuHotplug);
4698 }
4699
4700 let device = self.make_vdpa_device(vdpa_cfg)?;
4701 self.hotplug_virtio_pci_device(device)
4702 }
4703
add_vsock(&mut self, vsock_cfg: &mut VsockConfig) -> DeviceManagerResult<PciDeviceInfo>4704 pub fn add_vsock(&mut self, vsock_cfg: &mut VsockConfig) -> DeviceManagerResult<PciDeviceInfo> {
4705 self.validate_identifier(&vsock_cfg.id)?;
4706
4707 if vsock_cfg.iommu && !self.is_iommu_segment(vsock_cfg.pci_segment) {
4708 return Err(DeviceManagerError::InvalidIommuHotplug);
4709 }
4710
4711 let device = self.make_virtio_vsock_device(vsock_cfg)?;
4712 self.hotplug_virtio_pci_device(device)
4713 }
4714
counters(&self) -> HashMap<String, HashMap<&'static str, Wrapping<u64>>>4715 pub fn counters(&self) -> HashMap<String, HashMap<&'static str, Wrapping<u64>>> {
4716 let mut counters = HashMap::new();
4717
4718 for handle in &self.virtio_devices {
4719 let virtio_device = handle.virtio_device.lock().unwrap();
4720 if let Some(device_counters) = virtio_device.counters() {
4721 counters.insert(handle.id.clone(), device_counters.clone());
4722 }
4723 }
4724
4725 counters
4726 }
4727
resize_balloon(&mut self, size: u64) -> DeviceManagerResult<()>4728 pub fn resize_balloon(&mut self, size: u64) -> DeviceManagerResult<()> {
4729 if let Some(balloon) = &self.balloon {
4730 return balloon
4731 .lock()
4732 .unwrap()
4733 .resize(size)
4734 .map_err(DeviceManagerError::VirtioBalloonResize);
4735 }
4736
4737 warn!("No balloon setup: Can't resize the balloon");
4738 Err(DeviceManagerError::MissingVirtioBalloon)
4739 }
4740
balloon_size(&self) -> u644741 pub fn balloon_size(&self) -> u64 {
4742 if let Some(balloon) = &self.balloon {
4743 return balloon.lock().unwrap().get_actual();
4744 }
4745
4746 0
4747 }
4748
device_tree(&self) -> Arc<Mutex<DeviceTree>>4749 pub fn device_tree(&self) -> Arc<Mutex<DeviceTree>> {
4750 self.device_tree.clone()
4751 }
4752
4753 #[cfg(target_arch = "x86_64")]
notify_power_button(&self) -> DeviceManagerResult<()>4754 pub fn notify_power_button(&self) -> DeviceManagerResult<()> {
4755 self.ged_notification_device
4756 .as_ref()
4757 .unwrap()
4758 .lock()
4759 .unwrap()
4760 .notify(AcpiNotificationFlags::POWER_BUTTON_CHANGED)
4761 .map_err(DeviceManagerError::PowerButtonNotification)
4762 }
4763
4764 #[cfg(target_arch = "aarch64")]
notify_power_button(&self) -> DeviceManagerResult<()>4765 pub fn notify_power_button(&self) -> DeviceManagerResult<()> {
4766 // There are two use cases:
4767 // 1. Users will use direct kernel boot with device tree.
4768 // 2. Users will use ACPI+UEFI boot.
4769
4770 // Trigger a GPIO pin 3 event to satisfy use case 1.
4771 self.gpio_device
4772 .as_ref()
4773 .unwrap()
4774 .lock()
4775 .unwrap()
4776 .trigger_key(3)
4777 .map_err(DeviceManagerError::AArch64PowerButtonNotification)?;
4778 // Trigger a GED power button event to satisfy use case 2.
4779 return self
4780 .ged_notification_device
4781 .as_ref()
4782 .unwrap()
4783 .lock()
4784 .unwrap()
4785 .notify(AcpiNotificationFlags::POWER_BUTTON_CHANGED)
4786 .map_err(DeviceManagerError::PowerButtonNotification);
4787 }
4788
iommu_attached_devices(&self) -> &Option<(PciBdf, Vec<PciBdf>)>4789 pub fn iommu_attached_devices(&self) -> &Option<(PciBdf, Vec<PciBdf>)> {
4790 &self.iommu_attached_devices
4791 }
4792
validate_identifier(&self, id: &Option<String>) -> DeviceManagerResult<()>4793 fn validate_identifier(&self, id: &Option<String>) -> DeviceManagerResult<()> {
4794 if let Some(id) = id {
4795 if id.starts_with("__") {
4796 return Err(DeviceManagerError::InvalidIdentifier(id.clone()));
4797 }
4798
4799 if self.device_tree.lock().unwrap().contains_key(id) {
4800 return Err(DeviceManagerError::IdentifierNotUnique(id.clone()));
4801 }
4802 }
4803
4804 Ok(())
4805 }
4806
4807 #[cfg(not(target_arch = "riscv64"))]
acpi_platform_addresses(&self) -> &AcpiPlatformAddresses4808 pub(crate) fn acpi_platform_addresses(&self) -> &AcpiPlatformAddresses {
4809 &self.acpi_platform_addresses
4810 }
4811 }
4812
numa_node_id_from_memory_zone_id(numa_nodes: &NumaNodes, memory_zone_id: &str) -> Option<u32>4813 fn numa_node_id_from_memory_zone_id(numa_nodes: &NumaNodes, memory_zone_id: &str) -> Option<u32> {
4814 for (numa_node_id, numa_node) in numa_nodes.iter() {
4815 if numa_node.memory_zones.contains(&memory_zone_id.to_owned()) {
4816 return Some(*numa_node_id);
4817 }
4818 }
4819
4820 None
4821 }
4822
numa_node_id_from_pci_segment_id(numa_nodes: &NumaNodes, pci_segment_id: u16) -> u324823 fn numa_node_id_from_pci_segment_id(numa_nodes: &NumaNodes, pci_segment_id: u16) -> u32 {
4824 for (numa_node_id, numa_node) in numa_nodes.iter() {
4825 if numa_node.pci_segments.contains(&pci_segment_id) {
4826 return *numa_node_id;
4827 }
4828 }
4829
4830 0
4831 }
4832
4833 #[cfg(not(target_arch = "riscv64"))]
4834 struct TpmDevice {}
4835
4836 #[cfg(not(target_arch = "riscv64"))]
4837 impl Aml for TpmDevice {
to_aml_bytes(&self, sink: &mut dyn acpi_tables::AmlSink)4838 fn to_aml_bytes(&self, sink: &mut dyn acpi_tables::AmlSink) {
4839 aml::Device::new(
4840 "TPM2".into(),
4841 vec![
4842 &aml::Name::new("_HID".into(), &"MSFT0101"),
4843 &aml::Name::new("_STA".into(), &(0xF_usize)),
4844 &aml::Name::new(
4845 "_CRS".into(),
4846 &aml::ResourceTemplate::new(vec![&aml::Memory32Fixed::new(
4847 true,
4848 layout::TPM_START.0 as u32,
4849 layout::TPM_SIZE as u32,
4850 )]),
4851 ),
4852 ],
4853 )
4854 .to_aml_bytes(sink)
4855 }
4856 }
4857
4858 #[cfg(not(target_arch = "riscv64"))]
4859 impl Aml for DeviceManager {
to_aml_bytes(&self, sink: &mut dyn acpi_tables::AmlSink)4860 fn to_aml_bytes(&self, sink: &mut dyn acpi_tables::AmlSink) {
4861 #[cfg(target_arch = "aarch64")]
4862 use arch::aarch64::DeviceInfoForFdt;
4863
4864 let mut pci_scan_methods = Vec::new();
4865 for i in 0..self.pci_segments.len() {
4866 pci_scan_methods.push(aml::MethodCall::new(
4867 format!("\\_SB_.PC{i:02X}.PCNT").as_str().into(),
4868 vec![],
4869 ));
4870 }
4871 let mut pci_scan_inner: Vec<&dyn Aml> = Vec::new();
4872 for method in &pci_scan_methods {
4873 pci_scan_inner.push(method)
4874 }
4875
4876 // PCI hotplug controller
4877 aml::Device::new(
4878 "_SB_.PHPR".into(),
4879 vec![
4880 &aml::Name::new("_HID".into(), &aml::EISAName::new("PNP0A06")),
4881 &aml::Name::new("_STA".into(), &0x0bu8),
4882 &aml::Name::new("_UID".into(), &"PCI Hotplug Controller"),
4883 &aml::Mutex::new("BLCK".into(), 0),
4884 &aml::Name::new(
4885 "_CRS".into(),
4886 &aml::ResourceTemplate::new(vec![&aml::AddressSpace::new_memory(
4887 aml::AddressSpaceCacheable::NotCacheable,
4888 true,
4889 self.acpi_address.0,
4890 self.acpi_address.0 + DEVICE_MANAGER_ACPI_SIZE as u64 - 1,
4891 None,
4892 )]),
4893 ),
4894 // OpRegion and Fields map MMIO range into individual field values
4895 &aml::OpRegion::new(
4896 "PCST".into(),
4897 aml::OpRegionSpace::SystemMemory,
4898 &(self.acpi_address.0 as usize),
4899 &DEVICE_MANAGER_ACPI_SIZE,
4900 ),
4901 &aml::Field::new(
4902 "PCST".into(),
4903 aml::FieldAccessType::DWord,
4904 aml::FieldLockRule::NoLock,
4905 aml::FieldUpdateRule::WriteAsZeroes,
4906 vec![
4907 aml::FieldEntry::Named(*b"PCIU", 32),
4908 aml::FieldEntry::Named(*b"PCID", 32),
4909 aml::FieldEntry::Named(*b"B0EJ", 32),
4910 aml::FieldEntry::Named(*b"PSEG", 32),
4911 ],
4912 ),
4913 &aml::Method::new(
4914 "PCEJ".into(),
4915 2,
4916 true,
4917 vec![
4918 // Take lock defined above
4919 &aml::Acquire::new("BLCK".into(), 0xffff),
4920 // Choose the current segment
4921 &aml::Store::new(&aml::Path::new("PSEG"), &aml::Arg(1)),
4922 // Write PCI bus number (in first argument) to I/O port via field
4923 &aml::ShiftLeft::new(&aml::Path::new("B0EJ"), &aml::ONE, &aml::Arg(0)),
4924 // Release lock
4925 &aml::Release::new("BLCK".into()),
4926 // Return 0
4927 &aml::Return::new(&aml::ZERO),
4928 ],
4929 ),
4930 &aml::Method::new("PSCN".into(), 0, true, pci_scan_inner),
4931 ],
4932 )
4933 .to_aml_bytes(sink);
4934
4935 for segment in &self.pci_segments {
4936 segment.to_aml_bytes(sink);
4937 }
4938
4939 let mut mbrd_memory = Vec::new();
4940
4941 for segment in &self.pci_segments {
4942 mbrd_memory.push(aml::Memory32Fixed::new(
4943 true,
4944 segment.mmio_config_address as u32,
4945 layout::PCI_MMIO_CONFIG_SIZE_PER_SEGMENT as u32,
4946 ))
4947 }
4948
4949 let mut mbrd_memory_refs = Vec::new();
4950 for mbrd_memory_ref in &mbrd_memory {
4951 mbrd_memory_refs.push(mbrd_memory_ref as &dyn Aml);
4952 }
4953
4954 aml::Device::new(
4955 "_SB_.MBRD".into(),
4956 vec![
4957 &aml::Name::new("_HID".into(), &aml::EISAName::new("PNP0C02")),
4958 &aml::Name::new("_UID".into(), &aml::ZERO),
4959 &aml::Name::new("_CRS".into(), &aml::ResourceTemplate::new(mbrd_memory_refs)),
4960 ],
4961 )
4962 .to_aml_bytes(sink);
4963
4964 // Serial device
4965 #[cfg(target_arch = "x86_64")]
4966 let serial_irq = 4;
4967 #[cfg(target_arch = "aarch64")]
4968 let serial_irq =
4969 if self.config.lock().unwrap().serial.clone().mode != ConsoleOutputMode::Off {
4970 self.get_device_info()
4971 .clone()
4972 .get(&(DeviceType::Serial, DeviceType::Serial.to_string()))
4973 .unwrap()
4974 .irq()
4975 } else {
4976 // If serial is turned off, add a fake device with invalid irq.
4977 31
4978 };
4979 if self.config.lock().unwrap().serial.mode != ConsoleOutputMode::Off {
4980 aml::Device::new(
4981 "_SB_.COM1".into(),
4982 vec![
4983 &aml::Name::new(
4984 "_HID".into(),
4985 #[cfg(target_arch = "x86_64")]
4986 &aml::EISAName::new("PNP0501"),
4987 #[cfg(target_arch = "aarch64")]
4988 &"ARMH0011",
4989 ),
4990 &aml::Name::new("_UID".into(), &aml::ZERO),
4991 &aml::Name::new("_DDN".into(), &"COM1"),
4992 &aml::Name::new(
4993 "_CRS".into(),
4994 &aml::ResourceTemplate::new(vec![
4995 &aml::Interrupt::new(true, true, false, false, serial_irq),
4996 #[cfg(target_arch = "x86_64")]
4997 &aml::IO::new(0x3f8, 0x3f8, 0, 0x8),
4998 #[cfg(target_arch = "aarch64")]
4999 &aml::Memory32Fixed::new(
5000 true,
5001 arch::layout::LEGACY_SERIAL_MAPPED_IO_START.raw_value() as u32,
5002 MMIO_LEN as u32,
5003 ),
5004 ]),
5005 ),
5006 ],
5007 )
5008 .to_aml_bytes(sink);
5009 }
5010
5011 aml::Name::new("_S5_".into(), &aml::Package::new(vec![&5u8])).to_aml_bytes(sink);
5012
5013 aml::Device::new(
5014 "_SB_.PWRB".into(),
5015 vec![
5016 &aml::Name::new("_HID".into(), &aml::EISAName::new("PNP0C0C")),
5017 &aml::Name::new("_UID".into(), &aml::ZERO),
5018 ],
5019 )
5020 .to_aml_bytes(sink);
5021
5022 if self.config.lock().unwrap().tpm.is_some() {
5023 // Add tpm device
5024 TpmDevice {}.to_aml_bytes(sink);
5025 }
5026
5027 self.ged_notification_device
5028 .as_ref()
5029 .unwrap()
5030 .lock()
5031 .unwrap()
5032 .to_aml_bytes(sink)
5033 }
5034 }
5035
5036 impl Pausable for DeviceManager {
pause(&mut self) -> result::Result<(), MigratableError>5037 fn pause(&mut self) -> result::Result<(), MigratableError> {
5038 for (_, device_node) in self.device_tree.lock().unwrap().iter() {
5039 if let Some(migratable) = &device_node.migratable {
5040 migratable.lock().unwrap().pause()?;
5041 }
5042 }
5043 // On AArch64, the pause of device manager needs to trigger
5044 // a "pause" of GIC, which will flush the GIC pending tables
5045 // and ITS tables to guest RAM.
5046 #[cfg(target_arch = "aarch64")]
5047 {
5048 self.get_interrupt_controller()
5049 .unwrap()
5050 .lock()
5051 .unwrap()
5052 .pause()?;
5053 };
5054
5055 Ok(())
5056 }
5057
resume(&mut self) -> result::Result<(), MigratableError>5058 fn resume(&mut self) -> result::Result<(), MigratableError> {
5059 for (_, device_node) in self.device_tree.lock().unwrap().iter() {
5060 if let Some(migratable) = &device_node.migratable {
5061 migratable.lock().unwrap().resume()?;
5062 }
5063 }
5064 Ok(())
5065 }
5066 }
5067
5068 impl Snapshottable for DeviceManager {
id(&self) -> String5069 fn id(&self) -> String {
5070 DEVICE_MANAGER_SNAPSHOT_ID.to_string()
5071 }
5072
snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError>5073 fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> {
5074 let mut snapshot = Snapshot::from_data(SnapshotData::new_from_state(&self.state())?);
5075
5076 // We aggregate all devices snapshots.
5077 for (_, device_node) in self.device_tree.lock().unwrap().iter() {
5078 if let Some(migratable) = &device_node.migratable {
5079 let mut migratable = migratable.lock().unwrap();
5080 snapshot.add_snapshot(migratable.id(), migratable.snapshot()?);
5081 }
5082 }
5083
5084 Ok(snapshot)
5085 }
5086 }
5087
5088 impl Transportable for DeviceManager {}
5089
5090 impl Migratable for DeviceManager {
start_dirty_log(&mut self) -> std::result::Result<(), MigratableError>5091 fn start_dirty_log(&mut self) -> std::result::Result<(), MigratableError> {
5092 for (_, device_node) in self.device_tree.lock().unwrap().iter() {
5093 if let Some(migratable) = &device_node.migratable {
5094 migratable.lock().unwrap().start_dirty_log()?;
5095 }
5096 }
5097 Ok(())
5098 }
5099
stop_dirty_log(&mut self) -> std::result::Result<(), MigratableError>5100 fn stop_dirty_log(&mut self) -> std::result::Result<(), MigratableError> {
5101 for (_, device_node) in self.device_tree.lock().unwrap().iter() {
5102 if let Some(migratable) = &device_node.migratable {
5103 migratable.lock().unwrap().stop_dirty_log()?;
5104 }
5105 }
5106 Ok(())
5107 }
5108
dirty_log(&mut self) -> std::result::Result<MemoryRangeTable, MigratableError>5109 fn dirty_log(&mut self) -> std::result::Result<MemoryRangeTable, MigratableError> {
5110 let mut tables = Vec::new();
5111 for (_, device_node) in self.device_tree.lock().unwrap().iter() {
5112 if let Some(migratable) = &device_node.migratable {
5113 tables.push(migratable.lock().unwrap().dirty_log()?);
5114 }
5115 }
5116 Ok(MemoryRangeTable::new_from_tables(tables))
5117 }
5118
start_migration(&mut self) -> std::result::Result<(), MigratableError>5119 fn start_migration(&mut self) -> std::result::Result<(), MigratableError> {
5120 for (_, device_node) in self.device_tree.lock().unwrap().iter() {
5121 if let Some(migratable) = &device_node.migratable {
5122 migratable.lock().unwrap().start_migration()?;
5123 }
5124 }
5125 Ok(())
5126 }
5127
complete_migration(&mut self) -> std::result::Result<(), MigratableError>5128 fn complete_migration(&mut self) -> std::result::Result<(), MigratableError> {
5129 for (_, device_node) in self.device_tree.lock().unwrap().iter() {
5130 if let Some(migratable) = &device_node.migratable {
5131 migratable.lock().unwrap().complete_migration()?;
5132 }
5133 }
5134 Ok(())
5135 }
5136 }
5137
5138 const PCIU_FIELD_OFFSET: u64 = 0;
5139 const PCID_FIELD_OFFSET: u64 = 4;
5140 const B0EJ_FIELD_OFFSET: u64 = 8;
5141 const PSEG_FIELD_OFFSET: u64 = 12;
5142 const PCIU_FIELD_SIZE: usize = 4;
5143 const PCID_FIELD_SIZE: usize = 4;
5144 const B0EJ_FIELD_SIZE: usize = 4;
5145 const PSEG_FIELD_SIZE: usize = 4;
5146
5147 impl BusDevice for DeviceManager {
read(&mut self, base: u64, offset: u64, data: &mut [u8])5148 fn read(&mut self, base: u64, offset: u64, data: &mut [u8]) {
5149 match offset {
5150 PCIU_FIELD_OFFSET => {
5151 assert!(data.len() == PCIU_FIELD_SIZE);
5152 data.copy_from_slice(
5153 &self.pci_segments[self.selected_segment]
5154 .pci_devices_up
5155 .to_le_bytes(),
5156 );
5157 // Clear the PCIU bitmap
5158 self.pci_segments[self.selected_segment].pci_devices_up = 0;
5159 }
5160 PCID_FIELD_OFFSET => {
5161 assert!(data.len() == PCID_FIELD_SIZE);
5162 data.copy_from_slice(
5163 &self.pci_segments[self.selected_segment]
5164 .pci_devices_down
5165 .to_le_bytes(),
5166 );
5167 // Clear the PCID bitmap
5168 self.pci_segments[self.selected_segment].pci_devices_down = 0;
5169 }
5170 B0EJ_FIELD_OFFSET => {
5171 assert!(data.len() == B0EJ_FIELD_SIZE);
5172 // Always return an empty bitmap since the eject is always
5173 // taken care of right away during a write access.
5174 data.fill(0);
5175 }
5176 PSEG_FIELD_OFFSET => {
5177 assert_eq!(data.len(), PSEG_FIELD_SIZE);
5178 data.copy_from_slice(&(self.selected_segment as u32).to_le_bytes());
5179 }
5180 _ => error!(
5181 "Accessing unknown location at base 0x{:x}, offset 0x{:x}",
5182 base, offset
5183 ),
5184 }
5185
5186 debug!(
5187 "PCI_HP_REG_R: base 0x{:x}, offset 0x{:x}, data {:?}",
5188 base, offset, data
5189 )
5190 }
5191
write(&mut self, base: u64, offset: u64, data: &[u8]) -> Option<Arc<std::sync::Barrier>>5192 fn write(&mut self, base: u64, offset: u64, data: &[u8]) -> Option<Arc<std::sync::Barrier>> {
5193 match offset {
5194 B0EJ_FIELD_OFFSET => {
5195 assert!(data.len() == B0EJ_FIELD_SIZE);
5196 let mut data_array: [u8; 4] = [0, 0, 0, 0];
5197 data_array.copy_from_slice(data);
5198 let mut slot_bitmap = u32::from_le_bytes(data_array);
5199
5200 while slot_bitmap > 0 {
5201 let slot_id = slot_bitmap.trailing_zeros();
5202 if let Err(e) = self.eject_device(self.selected_segment as u16, slot_id as u8) {
5203 error!("Failed ejecting device {}: {:?}", slot_id, e);
5204 }
5205 slot_bitmap &= !(1 << slot_id);
5206 }
5207 }
5208 PSEG_FIELD_OFFSET => {
5209 assert_eq!(data.len(), PSEG_FIELD_SIZE);
5210 let mut data_array: [u8; 4] = [0, 0, 0, 0];
5211 data_array.copy_from_slice(data);
5212 let selected_segment = u32::from_le_bytes(data_array) as usize;
5213 if selected_segment >= self.pci_segments.len() {
5214 error!(
5215 "Segment selection out of range: {} >= {}",
5216 selected_segment,
5217 self.pci_segments.len()
5218 );
5219 return None;
5220 }
5221 self.selected_segment = selected_segment;
5222 }
5223 _ => error!(
5224 "Accessing unknown location at base 0x{:x}, offset 0x{:x}",
5225 base, offset
5226 ),
5227 }
5228
5229 debug!(
5230 "PCI_HP_REG_W: base 0x{:x}, offset 0x{:x}, data {:?}",
5231 base, offset, data
5232 );
5233
5234 None
5235 }
5236 }
5237
5238 impl Drop for DeviceManager {
drop(&mut self)5239 fn drop(&mut self) {
5240 // Wake up the DeviceManager threads (mainly virtio device workers),
5241 // to avoid deadlock on waiting for paused/parked worker threads.
5242 if let Err(e) = self.resume() {
5243 error!("Error resuming DeviceManager: {:?}", e);
5244 }
5245
5246 for handle in self.virtio_devices.drain(..) {
5247 handle.virtio_device.lock().unwrap().shutdown();
5248 }
5249
5250 if let Some(termios) = *self.original_termios_opt.lock().unwrap() {
5251 // SAFETY: FFI call
5252 let _ = unsafe { tcsetattr(stdout().lock().as_raw_fd(), TCSANOW, &termios) };
5253 }
5254 }
5255 }
5256
5257 #[cfg(test)]
5258 mod tests {
5259 use super::*;
5260
5261 #[test]
test_create_mmio_allocators()5262 fn test_create_mmio_allocators() {
5263 let res = create_mmio_allocators(0x100000, 0x400000, 1, vec![1], 4 << 10);
5264 assert_eq!(res.len(), 1);
5265 assert_eq!(
5266 res[0].lock().unwrap().base(),
5267 vm_memory::GuestAddress(0x100000)
5268 );
5269 assert_eq!(
5270 res[0].lock().unwrap().end(),
5271 vm_memory::GuestAddress(0x3fffff)
5272 );
5273
5274 let res = create_mmio_allocators(0x100000, 0x400000, 2, vec![1, 1], 4 << 10);
5275 assert_eq!(res.len(), 2);
5276 assert_eq!(
5277 res[0].lock().unwrap().base(),
5278 vm_memory::GuestAddress(0x100000)
5279 );
5280 assert_eq!(
5281 res[0].lock().unwrap().end(),
5282 vm_memory::GuestAddress(0x27ffff)
5283 );
5284 assert_eq!(
5285 res[1].lock().unwrap().base(),
5286 vm_memory::GuestAddress(0x280000)
5287 );
5288 assert_eq!(
5289 res[1].lock().unwrap().end(),
5290 vm_memory::GuestAddress(0x3fffff)
5291 );
5292
5293 let res = create_mmio_allocators(0x100000, 0x400000, 2, vec![2, 1], 4 << 10);
5294 assert_eq!(res.len(), 2);
5295 assert_eq!(
5296 res[0].lock().unwrap().base(),
5297 vm_memory::GuestAddress(0x100000)
5298 );
5299 assert_eq!(
5300 res[0].lock().unwrap().end(),
5301 vm_memory::GuestAddress(0x2fffff)
5302 );
5303 assert_eq!(
5304 res[1].lock().unwrap().base(),
5305 vm_memory::GuestAddress(0x300000)
5306 );
5307 assert_eq!(
5308 res[1].lock().unwrap().end(),
5309 vm_memory::GuestAddress(0x3fffff)
5310 );
5311 }
5312 }
5313