xref: /cloud-hypervisor/vmm/src/vm.rs (revision 87c0791d535fd9a1a248dd1b146b65ccac106dd2)
1 // Copyright © 2020, Oracle and/or its affiliates.
2 //
3 // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
4 //
5 // Portions Copyright 2017 The Chromium OS Authors. All rights reserved.
6 // Use of this source code is governed by a BSD-style license that can be
7 // found in the LICENSE-BSD-3-Clause file.
8 //
9 // Copyright © 2019 Intel Corporation
10 //
11 // SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause
12 //
13 
14 use crate::config::NumaConfig;
15 use crate::config::{
16     add_to_config, DeviceConfig, DiskConfig, FsConfig, HotplugMethod, NetConfig, PmemConfig,
17     UserDeviceConfig, ValidationError, VdpaConfig, VmConfig, VsockConfig,
18 };
19 #[cfg(feature = "guest_debug")]
20 use crate::coredump::{
21     CpuElf64Writable, DumpState, Elf64Writable, GuestDebuggable, GuestDebuggableError, NoteDescType,
22 };
23 use crate::cpu;
24 use crate::device_manager::{Console, DeviceManager, DeviceManagerError, PtyPair};
25 use crate::device_tree::DeviceTree;
26 #[cfg(feature = "gdb")]
27 use crate::gdb::{Debuggable, DebuggableError, GdbRequestPayload, GdbResponsePayload};
28 use crate::memory_manager::{
29     Error as MemoryManagerError, MemoryManager, MemoryManagerSnapshotData,
30 };
31 #[cfg(feature = "guest_debug")]
32 use crate::migration::url_to_file;
33 use crate::migration::{url_to_path, SNAPSHOT_CONFIG_FILE, SNAPSHOT_STATE_FILE};
34 use crate::seccomp_filters::{get_seccomp_filter, Thread};
35 use crate::GuestMemoryMmap;
36 use crate::{
37     PciDeviceInfo, CPU_MANAGER_SNAPSHOT_ID, DEVICE_MANAGER_SNAPSHOT_ID, MEMORY_MANAGER_SNAPSHOT_ID,
38 };
39 use anyhow::anyhow;
40 use arch::get_host_cpu_phys_bits;
41 #[cfg(target_arch = "x86_64")]
42 use arch::layout::{KVM_IDENTITY_MAP_START, KVM_TSS_START};
43 #[cfg(feature = "tdx")]
44 use arch::x86_64::tdx::TdvfSection;
45 use arch::EntryPoint;
46 #[cfg(target_arch = "aarch64")]
47 use arch::PciSpaceInfo;
48 use arch::{NumaNode, NumaNodes};
49 #[cfg(target_arch = "aarch64")]
50 use devices::gic::GIC_V3_ITS_SNAPSHOT_ID;
51 #[cfg(target_arch = "aarch64")]
52 use devices::interrupt_controller::{self, InterruptController};
53 use devices::AcpiNotificationFlags;
54 #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
55 use gdbstub_arch::x86::reg::X86_64CoreRegs;
56 use hypervisor::{HypervisorVmError, VmOps};
57 use linux_loader::cmdline::Cmdline;
58 #[cfg(feature = "guest_debug")]
59 use linux_loader::elf;
60 #[cfg(target_arch = "x86_64")]
61 use linux_loader::loader::elf::PvhBootCapability::PvhEntryPresent;
62 #[cfg(target_arch = "aarch64")]
63 use linux_loader::loader::pe::Error::InvalidImageMagicNumber;
64 use linux_loader::loader::KernelLoader;
65 use seccompiler::{apply_filter, SeccompAction};
66 use serde::{Deserialize, Serialize};
67 use signal_hook::{consts::SIGWINCH, iterator::backend::Handle, iterator::Signals};
68 use std::cmp;
69 use std::collections::BTreeMap;
70 use std::collections::HashMap;
71 use std::convert::TryInto;
72 use std::fs::{File, OpenOptions};
73 use std::io::{self, Read, Write};
74 use std::io::{Seek, SeekFrom};
75 #[cfg(feature = "tdx")]
76 use std::mem;
77 #[cfg(feature = "guest_debug")]
78 use std::mem::size_of;
79 use std::num::Wrapping;
80 use std::ops::Deref;
81 use std::os::unix::net::UnixStream;
82 use std::panic::AssertUnwindSafe;
83 use std::sync::{Arc, Mutex, RwLock};
84 use std::time::Instant;
85 use std::{result, str, thread};
86 use thiserror::Error;
87 use vm_device::Bus;
88 #[cfg(target_arch = "x86_64")]
89 use vm_device::BusDevice;
90 #[cfg(target_arch = "x86_64")]
91 use vm_memory::Address;
92 #[cfg(feature = "tdx")]
93 use vm_memory::{ByteValued, GuestMemory, GuestMemoryRegion};
94 use vm_memory::{Bytes, GuestAddress, GuestAddressSpace, GuestMemoryAtomic};
95 use vm_migration::protocol::{Request, Response, Status};
96 use vm_migration::{
97     protocol::MemoryRangeTable, Migratable, MigratableError, Pausable, Snapshot,
98     SnapshotDataSection, Snapshottable, Transportable,
99 };
100 use vmm_sys_util::eventfd::EventFd;
101 use vmm_sys_util::signal::unblock_signal;
102 use vmm_sys_util::sock_ctrl_msg::ScmSocket;
103 use vmm_sys_util::terminal::Terminal;
104 
105 /// Errors associated with VM management
106 #[derive(Debug, Error)]
107 pub enum Error {
108     #[error("Cannot open kernel file: {0}")]
109     KernelFile(#[source] io::Error),
110 
111     #[error("Cannot open initramfs file: {0}")]
112     InitramfsFile(#[source] io::Error),
113 
114     #[error("Cannot load the kernel into memory: {0}")]
115     KernelLoad(#[source] linux_loader::loader::Error),
116 
117     #[cfg(target_arch = "aarch64")]
118     #[error("Cannot load the UEFI binary in memory: {0:?}")]
119     UefiLoad(arch::aarch64::uefi::Error),
120 
121     #[error("Cannot load the initramfs into memory")]
122     InitramfsLoad,
123 
124     #[error("Cannot load the kernel command line in memory: {0}")]
125     LoadCmdLine(#[source] linux_loader::loader::Error),
126 
127     #[error("Cannot modify the kernel command line: {0}")]
128     CmdLineInsertStr(#[source] linux_loader::cmdline::Error),
129 
130     #[error("Cannot configure system: {0}")]
131     ConfigureSystem(#[source] arch::Error),
132 
133     #[cfg(target_arch = "aarch64")]
134     #[error("Cannot enable interrupt controller: {0:?}")]
135     EnableInterruptController(interrupt_controller::Error),
136 
137     #[error("VM state is poisoned")]
138     PoisonedState,
139 
140     #[error("Error from device manager: {0:?}")]
141     DeviceManager(DeviceManagerError),
142 
143     #[error("Cannot setup terminal in raw mode: {0}")]
144     SetTerminalRaw(#[source] vmm_sys_util::errno::Error),
145 
146     #[error("Cannot setup terminal in canonical mode.: {0}")]
147     SetTerminalCanon(#[source] vmm_sys_util::errno::Error),
148 
149     #[error("Cannot spawn a signal handler thread: {0}")]
150     SignalHandlerSpawn(#[source] io::Error),
151 
152     #[error("Failed to join on threads: {0:?}")]
153     ThreadCleanup(std::boxed::Box<dyn std::any::Any + std::marker::Send>),
154 
155     #[error("VM config is missing")]
156     VmMissingConfig,
157 
158     #[error("VM is not created")]
159     VmNotCreated,
160 
161     #[error("VM is already created")]
162     VmAlreadyCreated,
163 
164     #[error("VM is not running")]
165     VmNotRunning,
166 
167     #[error("Cannot clone EventFd: {0}")]
168     EventFdClone(#[source] io::Error),
169 
170     #[error("invalid VM state transition: {0:?} to {1:?}")]
171     InvalidStateTransition(VmState, VmState),
172 
173     #[error("Error from CPU manager: {0}")]
174     CpuManager(#[source] cpu::Error),
175 
176     #[error("Cannot pause devices: {0}")]
177     PauseDevices(#[source] MigratableError),
178 
179     #[error("Cannot resume devices: {0}")]
180     ResumeDevices(#[source] MigratableError),
181 
182     #[error("Cannot pause CPUs: {0}")]
183     PauseCpus(#[source] MigratableError),
184 
185     #[error("Cannot resume cpus: {0}")]
186     ResumeCpus(#[source] MigratableError),
187 
188     #[error("Cannot pause VM: {0}")]
189     Pause(#[source] MigratableError),
190 
191     #[error("Cannot resume VM: {0}")]
192     Resume(#[source] MigratableError),
193 
194     #[error("Memory manager error: {0:?}")]
195     MemoryManager(MemoryManagerError),
196 
197     #[error("Eventfd write error: {0}")]
198     EventfdError(#[source] std::io::Error),
199 
200     #[error("Cannot snapshot VM: {0}")]
201     Snapshot(#[source] MigratableError),
202 
203     #[error("Cannot restore VM: {0}")]
204     Restore(#[source] MigratableError),
205 
206     #[error("Cannot send VM snapshot: {0}")]
207     SnapshotSend(#[source] MigratableError),
208 
209     #[error("Invalid restore source URL")]
210     InvalidRestoreSourceUrl,
211 
212     #[error("Failed to validate config: {0}")]
213     ConfigValidation(#[source] ValidationError),
214 
215     #[error("Too many virtio-vsock devices")]
216     TooManyVsockDevices,
217 
218     #[error("Failed serializing into JSON: {0}")]
219     SerializeJson(#[source] serde_json::Error),
220 
221     #[error("Invalid NUMA configuration")]
222     InvalidNumaConfig,
223 
224     #[error("Cannot create seccomp filter: {0}")]
225     CreateSeccompFilter(#[source] seccompiler::Error),
226 
227     #[error("Cannot apply seccomp filter: {0}")]
228     ApplySeccompFilter(#[source] seccompiler::Error),
229 
230     #[error("Failed resizing a memory zone")]
231     ResizeZone,
232 
233     #[error("Cannot activate virtio devices: {0:?}")]
234     ActivateVirtioDevices(DeviceManagerError),
235 
236     #[error("Error triggering power button: {0:?}")]
237     PowerButton(DeviceManagerError),
238 
239     #[error("Kernel lacks PVH header")]
240     KernelMissingPvhHeader,
241 
242     #[error("Failed to allocate firmware RAM: {0:?}")]
243     AllocateFirmwareMemory(MemoryManagerError),
244 
245     #[error("Error manipulating firmware file: {0}")]
246     FirmwareFile(#[source] std::io::Error),
247 
248     #[error("Firmware too big")]
249     FirmwareTooLarge,
250 
251     #[error("Failed to copy firmware to memory: {0}")]
252     FirmwareLoad(#[source] vm_memory::GuestMemoryError),
253 
254     #[cfg(feature = "tdx")]
255     #[error("Error performing I/O on TDX firmware file: {0}")]
256     LoadTdvf(#[source] std::io::Error),
257 
258     #[cfg(feature = "tdx")]
259     #[error("Error performing I/O on the TDX payload file: {0}")]
260     LoadPayload(#[source] std::io::Error),
261 
262     #[cfg(feature = "tdx")]
263     #[error("Error parsing TDVF: {0}")]
264     ParseTdvf(#[source] arch::x86_64::tdx::TdvfError),
265 
266     #[cfg(feature = "tdx")]
267     #[error("Error populating TDX HOB: {0}")]
268     PopulateHob(#[source] arch::x86_64::tdx::TdvfError),
269 
270     #[cfg(feature = "tdx")]
271     #[error("Error allocating TDVF memory: {0:?}")]
272     AllocatingTdvfMemory(crate::memory_manager::Error),
273 
274     #[cfg(feature = "tdx")]
275     #[error("Error enabling TDX VM: {0}")]
276     InitializeTdxVm(#[source] hypervisor::HypervisorVmError),
277 
278     #[cfg(feature = "tdx")]
279     #[error("Error enabling TDX memory region: {0}")]
280     InitializeTdxMemoryRegion(#[source] hypervisor::HypervisorVmError),
281 
282     #[cfg(feature = "tdx")]
283     #[error("Error finalizing TDX VM: {0}")]
284     FinalizeTdx(#[source] hypervisor::HypervisorVmError),
285 
286     #[cfg(feature = "tdx")]
287     #[error("Invalid TDX payload type")]
288     InvalidPayloadType,
289 
290     #[cfg(feature = "gdb")]
291     #[error("Error debugging VM: {0:?}")]
292     Debug(DebuggableError),
293 
294     #[cfg(target_arch = "x86_64")]
295     #[error("Error spawning kernel loading thread")]
296     KernelLoadThreadSpawn(std::io::Error),
297 
298     #[cfg(target_arch = "x86_64")]
299     #[error("Error joining kernel loading thread")]
300     KernelLoadThreadJoin(std::boxed::Box<dyn std::any::Any + std::marker::Send>),
301 
302     #[cfg(feature = "guest_debug")]
303     #[error("Error coredumping VM: {0:?}")]
304     Coredump(GuestDebuggableError),
305 }
306 pub type Result<T> = result::Result<T, Error>;
307 
308 #[derive(Clone, Copy, Debug, Deserialize, Serialize, PartialEq, Eq)]
309 pub enum VmState {
310     Created,
311     Running,
312     Shutdown,
313     Paused,
314     BreakPoint,
315 }
316 
317 impl VmState {
318     fn valid_transition(self, new_state: VmState) -> Result<()> {
319         match self {
320             VmState::Created => match new_state {
321                 VmState::Created | VmState::Shutdown => {
322                     Err(Error::InvalidStateTransition(self, new_state))
323                 }
324                 VmState::Running | VmState::Paused | VmState::BreakPoint => Ok(()),
325             },
326 
327             VmState::Running => match new_state {
328                 VmState::Created | VmState::Running => {
329                     Err(Error::InvalidStateTransition(self, new_state))
330                 }
331                 VmState::Paused | VmState::Shutdown | VmState::BreakPoint => Ok(()),
332             },
333 
334             VmState::Shutdown => match new_state {
335                 VmState::Paused | VmState::Created | VmState::Shutdown | VmState::BreakPoint => {
336                     Err(Error::InvalidStateTransition(self, new_state))
337                 }
338                 VmState::Running => Ok(()),
339             },
340 
341             VmState::Paused => match new_state {
342                 VmState::Created | VmState::Paused | VmState::BreakPoint => {
343                     Err(Error::InvalidStateTransition(self, new_state))
344                 }
345                 VmState::Running | VmState::Shutdown => Ok(()),
346             },
347             VmState::BreakPoint => match new_state {
348                 VmState::Created | VmState::Running => Ok(()),
349                 _ => Err(Error::InvalidStateTransition(self, new_state)),
350             },
351         }
352     }
353 }
354 
355 struct VmOpsHandler {
356     memory: GuestMemoryAtomic<GuestMemoryMmap>,
357     #[cfg(target_arch = "x86_64")]
358     io_bus: Arc<Bus>,
359     mmio_bus: Arc<Bus>,
360     #[cfg(target_arch = "x86_64")]
361     pci_config_io: Arc<Mutex<dyn BusDevice>>,
362 }
363 
364 impl VmOps for VmOpsHandler {
365     fn guest_mem_write(&self, gpa: u64, buf: &[u8]) -> result::Result<usize, HypervisorVmError> {
366         self.memory
367             .memory()
368             .write(buf, GuestAddress(gpa))
369             .map_err(|e| HypervisorVmError::GuestMemWrite(e.into()))
370     }
371 
372     fn guest_mem_read(&self, gpa: u64, buf: &mut [u8]) -> result::Result<usize, HypervisorVmError> {
373         self.memory
374             .memory()
375             .read(buf, GuestAddress(gpa))
376             .map_err(|e| HypervisorVmError::GuestMemRead(e.into()))
377     }
378 
379     fn mmio_read(&self, gpa: u64, data: &mut [u8]) -> result::Result<(), HypervisorVmError> {
380         if let Err(vm_device::BusError::MissingAddressRange) = self.mmio_bus.read(gpa, data) {
381             warn!("Guest MMIO read to unregistered address 0x{:x}", gpa);
382         }
383         Ok(())
384     }
385 
386     fn mmio_write(&self, gpa: u64, data: &[u8]) -> result::Result<(), HypervisorVmError> {
387         match self.mmio_bus.write(gpa, data) {
388             Err(vm_device::BusError::MissingAddressRange) => {
389                 warn!("Guest MMIO write to unregistered address 0x{:x}", gpa);
390             }
391             Ok(Some(barrier)) => {
392                 info!("Waiting for barrier");
393                 barrier.wait();
394                 info!("Barrier released");
395             }
396             _ => {}
397         };
398         Ok(())
399     }
400 
401     #[cfg(target_arch = "x86_64")]
402     fn pio_read(&self, port: u64, data: &mut [u8]) -> result::Result<(), HypervisorVmError> {
403         use pci::{PCI_CONFIG_IO_PORT, PCI_CONFIG_IO_PORT_SIZE};
404 
405         if (PCI_CONFIG_IO_PORT..(PCI_CONFIG_IO_PORT + PCI_CONFIG_IO_PORT_SIZE)).contains(&port) {
406             self.pci_config_io.lock().unwrap().read(
407                 PCI_CONFIG_IO_PORT,
408                 port - PCI_CONFIG_IO_PORT,
409                 data,
410             );
411             return Ok(());
412         }
413 
414         if let Err(vm_device::BusError::MissingAddressRange) = self.io_bus.read(port, data) {
415             warn!("Guest PIO read to unregistered address 0x{:x}", port);
416         }
417         Ok(())
418     }
419 
420     #[cfg(target_arch = "x86_64")]
421     fn pio_write(&self, port: u64, data: &[u8]) -> result::Result<(), HypervisorVmError> {
422         use pci::{PCI_CONFIG_IO_PORT, PCI_CONFIG_IO_PORT_SIZE};
423 
424         if (PCI_CONFIG_IO_PORT..(PCI_CONFIG_IO_PORT + PCI_CONFIG_IO_PORT_SIZE)).contains(&port) {
425             self.pci_config_io.lock().unwrap().write(
426                 PCI_CONFIG_IO_PORT,
427                 port - PCI_CONFIG_IO_PORT,
428                 data,
429             );
430             return Ok(());
431         }
432 
433         match self.io_bus.write(port, data) {
434             Err(vm_device::BusError::MissingAddressRange) => {
435                 warn!("Guest PIO write to unregistered address 0x{:x}", port);
436             }
437             Ok(Some(barrier)) => {
438                 info!("Waiting for barrier");
439                 barrier.wait();
440                 info!("Barrier released");
441             }
442             _ => {}
443         };
444         Ok(())
445     }
446 }
447 
448 pub fn physical_bits(max_phys_bits: u8) -> u8 {
449     let host_phys_bits = get_host_cpu_phys_bits();
450 
451     cmp::min(host_phys_bits, max_phys_bits)
452 }
453 
454 pub struct Vm {
455     #[cfg(any(target_arch = "aarch64", feature = "tdx"))]
456     kernel: Option<File>,
457     initramfs: Option<File>,
458     threads: Vec<thread::JoinHandle<()>>,
459     device_manager: Arc<Mutex<DeviceManager>>,
460     config: Arc<Mutex<VmConfig>>,
461     on_tty: bool,
462     signals: Option<Handle>,
463     state: RwLock<VmState>,
464     cpu_manager: Arc<Mutex<cpu::CpuManager>>,
465     memory_manager: Arc<Mutex<MemoryManager>>,
466     #[cfg_attr(not(feature = "kvm"), allow(dead_code))]
467     // The hypervisor abstracted virtual machine.
468     vm: Arc<dyn hypervisor::Vm>,
469     #[cfg(all(feature = "kvm", target_arch = "x86_64"))]
470     saved_clock: Option<hypervisor::ClockData>,
471     numa_nodes: NumaNodes,
472     seccomp_action: SeccompAction,
473     exit_evt: EventFd,
474     hypervisor: Arc<dyn hypervisor::Hypervisor>,
475     stop_on_boot: bool,
476     #[cfg(target_arch = "x86_64")]
477     load_kernel_handle: Option<thread::JoinHandle<Result<EntryPoint>>>,
478 }
479 
480 impl Vm {
481     pub const HANDLED_SIGNALS: [i32; 1] = [SIGWINCH];
482 
483     #[allow(clippy::too_many_arguments)]
484     fn new_from_memory_manager(
485         config: Arc<Mutex<VmConfig>>,
486         memory_manager: Arc<Mutex<MemoryManager>>,
487         vm: Arc<dyn hypervisor::Vm>,
488         exit_evt: EventFd,
489         reset_evt: EventFd,
490         #[cfg(feature = "gdb")] vm_debug_evt: EventFd,
491         seccomp_action: &SeccompAction,
492         hypervisor: Arc<dyn hypervisor::Hypervisor>,
493         activate_evt: EventFd,
494         restoring: bool,
495         timestamp: Instant,
496     ) -> Result<Self> {
497         let kernel = config
498             .lock()
499             .unwrap()
500             .kernel
501             .as_ref()
502             .map(|k| File::open(&k.path))
503             .transpose()
504             .map_err(Error::KernelFile)?;
505 
506         #[cfg(target_arch = "x86_64")]
507         let load_kernel_handle = if !restoring {
508             Self::load_kernel_async(&kernel, &memory_manager, &config)?
509         } else {
510             None
511         };
512 
513         let boot_id_list = config
514             .lock()
515             .unwrap()
516             .validate()
517             .map_err(Error::ConfigValidation)?;
518 
519         info!("Booting VM from config: {:?}", &config);
520 
521         // Create NUMA nodes based on NumaConfig.
522         let numa_nodes =
523             Self::create_numa_nodes(config.lock().unwrap().numa.clone(), &memory_manager)?;
524 
525         #[cfg(feature = "tdx")]
526         let force_iommu = config.lock().unwrap().tdx.is_some();
527         #[cfg(not(feature = "tdx"))]
528         let force_iommu = false;
529 
530         #[cfg(feature = "gdb")]
531         let stop_on_boot = config.lock().unwrap().gdb;
532         #[cfg(not(feature = "gdb"))]
533         let stop_on_boot = false;
534 
535         let device_manager = DeviceManager::new(
536             hypervisor.hypervisor_type(),
537             vm.clone(),
538             config.clone(),
539             memory_manager.clone(),
540             &exit_evt,
541             &reset_evt,
542             seccomp_action.clone(),
543             numa_nodes.clone(),
544             &activate_evt,
545             force_iommu,
546             restoring,
547             boot_id_list,
548             timestamp,
549         )
550         .map_err(Error::DeviceManager)?;
551 
552         let memory = memory_manager.lock().unwrap().guest_memory();
553         #[cfg(target_arch = "x86_64")]
554         let io_bus = Arc::clone(device_manager.lock().unwrap().io_bus());
555         let mmio_bus = Arc::clone(device_manager.lock().unwrap().mmio_bus());
556 
557         #[cfg(target_arch = "x86_64")]
558         let pci_config_io =
559             device_manager.lock().unwrap().pci_config_io() as Arc<Mutex<dyn BusDevice>>;
560         let vm_ops: Arc<dyn VmOps> = Arc::new(VmOpsHandler {
561             memory,
562             #[cfg(target_arch = "x86_64")]
563             io_bus,
564             mmio_bus,
565             #[cfg(target_arch = "x86_64")]
566             pci_config_io,
567         });
568 
569         let exit_evt_clone = exit_evt.try_clone().map_err(Error::EventFdClone)?;
570         #[cfg(feature = "tdx")]
571         let tdx_enabled = config.lock().unwrap().tdx.is_some();
572         let cpus_config = { &config.lock().unwrap().cpus.clone() };
573         let cpu_manager = cpu::CpuManager::new(
574             cpus_config,
575             &device_manager,
576             &memory_manager,
577             vm.clone(),
578             exit_evt_clone,
579             reset_evt,
580             #[cfg(feature = "gdb")]
581             vm_debug_evt,
582             hypervisor.clone(),
583             seccomp_action.clone(),
584             vm_ops,
585             #[cfg(feature = "tdx")]
586             tdx_enabled,
587             &numa_nodes,
588         )
589         .map_err(Error::CpuManager)?;
590 
591         let on_tty = unsafe { libc::isatty(libc::STDIN_FILENO as i32) } != 0;
592 
593         let initramfs = config
594             .lock()
595             .unwrap()
596             .initramfs
597             .as_ref()
598             .map(|i| File::open(&i.path))
599             .transpose()
600             .map_err(Error::InitramfsFile)?;
601 
602         Ok(Vm {
603             #[cfg(any(target_arch = "aarch64", feature = "tdx"))]
604             kernel,
605             initramfs,
606             device_manager,
607             config,
608             on_tty,
609             threads: Vec::with_capacity(1),
610             signals: None,
611             state: RwLock::new(VmState::Created),
612             cpu_manager,
613             memory_manager,
614             vm,
615             #[cfg(all(feature = "kvm", target_arch = "x86_64"))]
616             saved_clock: None,
617             numa_nodes,
618             seccomp_action: seccomp_action.clone(),
619             exit_evt,
620             hypervisor,
621             stop_on_boot,
622             #[cfg(target_arch = "x86_64")]
623             load_kernel_handle,
624         })
625     }
626 
627     fn create_numa_nodes(
628         configs: Option<Vec<NumaConfig>>,
629         memory_manager: &Arc<Mutex<MemoryManager>>,
630     ) -> Result<NumaNodes> {
631         let mm = memory_manager.lock().unwrap();
632         let mm_zones = mm.memory_zones();
633         let mut numa_nodes = BTreeMap::new();
634 
635         if let Some(configs) = &configs {
636             for config in configs.iter() {
637                 if numa_nodes.contains_key(&config.guest_numa_id) {
638                     error!("Can't define twice the same NUMA node");
639                     return Err(Error::InvalidNumaConfig);
640                 }
641 
642                 let mut node = NumaNode::default();
643 
644                 if let Some(memory_zones) = &config.memory_zones {
645                     for memory_zone in memory_zones.iter() {
646                         if let Some(mm_zone) = mm_zones.get(memory_zone) {
647                             node.memory_regions.extend(mm_zone.regions().clone());
648                             if let Some(virtiomem_zone) = mm_zone.virtio_mem_zone() {
649                                 node.hotplug_regions.push(virtiomem_zone.region().clone());
650                             }
651                             node.memory_zones.push(memory_zone.clone());
652                         } else {
653                             error!("Unknown memory zone '{}'", memory_zone);
654                             return Err(Error::InvalidNumaConfig);
655                         }
656                     }
657                 }
658 
659                 if let Some(cpus) = &config.cpus {
660                     node.cpus.extend(cpus);
661                 }
662 
663                 if let Some(distances) = &config.distances {
664                     for distance in distances.iter() {
665                         let dest = distance.destination;
666                         let dist = distance.distance;
667 
668                         if !configs.iter().any(|cfg| cfg.guest_numa_id == dest) {
669                             error!("Unknown destination NUMA node {}", dest);
670                             return Err(Error::InvalidNumaConfig);
671                         }
672 
673                         if node.distances.contains_key(&dest) {
674                             error!("Destination NUMA node {} has been already set", dest);
675                             return Err(Error::InvalidNumaConfig);
676                         }
677 
678                         node.distances.insert(dest, dist);
679                     }
680                 }
681 
682                 #[cfg(target_arch = "x86_64")]
683                 if let Some(sgx_epc_sections) = &config.sgx_epc_sections {
684                     if let Some(sgx_epc_region) = mm.sgx_epc_region() {
685                         let mm_sections = sgx_epc_region.epc_sections();
686                         for sgx_epc_section in sgx_epc_sections.iter() {
687                             if let Some(mm_section) = mm_sections.get(sgx_epc_section) {
688                                 node.sgx_epc_sections.push(mm_section.clone());
689                             } else {
690                                 error!("Unknown SGX EPC section '{}'", sgx_epc_section);
691                                 return Err(Error::InvalidNumaConfig);
692                             }
693                         }
694                     } else {
695                         error!("Missing SGX EPC region");
696                         return Err(Error::InvalidNumaConfig);
697                     }
698                 }
699 
700                 numa_nodes.insert(config.guest_numa_id, node);
701             }
702         }
703 
704         Ok(numa_nodes)
705     }
706 
707     #[allow(clippy::too_many_arguments)]
708     pub fn new(
709         config: Arc<Mutex<VmConfig>>,
710         exit_evt: EventFd,
711         reset_evt: EventFd,
712         #[cfg(feature = "gdb")] vm_debug_evt: EventFd,
713         seccomp_action: &SeccompAction,
714         hypervisor: Arc<dyn hypervisor::Hypervisor>,
715         activate_evt: EventFd,
716         serial_pty: Option<PtyPair>,
717         console_pty: Option<PtyPair>,
718         console_resize_pipe: Option<File>,
719     ) -> Result<Self> {
720         let timestamp = Instant::now();
721 
722         #[cfg(feature = "tdx")]
723         let tdx_enabled = config.lock().unwrap().tdx.is_some();
724         hypervisor.check_required_extensions().unwrap();
725         #[cfg(feature = "tdx")]
726         let vm = hypervisor
727             .create_vm_with_type(if tdx_enabled {
728                 2 // KVM_X86_TDX_VM
729             } else {
730                 0 // KVM_X86_LEGACY_VM
731             })
732             .unwrap();
733         #[cfg(not(feature = "tdx"))]
734         let vm = hypervisor.create_vm().unwrap();
735 
736         #[cfg(target_arch = "x86_64")]
737         {
738             vm.set_identity_map_address(KVM_IDENTITY_MAP_START.0)
739                 .unwrap();
740             vm.set_tss_address(KVM_TSS_START.0 as usize).unwrap();
741             vm.enable_split_irq().unwrap();
742         }
743 
744         let phys_bits = physical_bits(config.lock().unwrap().cpus.max_phys_bits);
745 
746         #[cfg(target_arch = "x86_64")]
747         let sgx_epc_config = config.lock().unwrap().sgx_epc.clone();
748 
749         let memory_manager = MemoryManager::new(
750             vm.clone(),
751             &config.lock().unwrap().memory.clone(),
752             None,
753             phys_bits,
754             #[cfg(feature = "tdx")]
755             tdx_enabled,
756             None,
757             None,
758             #[cfg(target_arch = "x86_64")]
759             sgx_epc_config,
760         )
761         .map_err(Error::MemoryManager)?;
762 
763         let new_vm = Vm::new_from_memory_manager(
764             config,
765             memory_manager,
766             vm,
767             exit_evt,
768             reset_evt,
769             #[cfg(feature = "gdb")]
770             vm_debug_evt,
771             seccomp_action,
772             hypervisor,
773             activate_evt,
774             false,
775             timestamp,
776         )?;
777 
778         // The device manager must create the devices from here as it is part
779         // of the regular code path creating everything from scratch.
780         new_vm
781             .device_manager
782             .lock()
783             .unwrap()
784             .create_devices(serial_pty, console_pty, console_resize_pipe)
785             .map_err(Error::DeviceManager)?;
786         Ok(new_vm)
787     }
788 
789     #[allow(clippy::too_many_arguments)]
790     pub fn new_from_snapshot(
791         snapshot: &Snapshot,
792         vm_config: Arc<Mutex<VmConfig>>,
793         exit_evt: EventFd,
794         reset_evt: EventFd,
795         #[cfg(feature = "gdb")] vm_debug_evt: EventFd,
796         source_url: Option<&str>,
797         prefault: bool,
798         seccomp_action: &SeccompAction,
799         hypervisor: Arc<dyn hypervisor::Hypervisor>,
800         activate_evt: EventFd,
801     ) -> Result<Self> {
802         let timestamp = Instant::now();
803 
804         hypervisor.check_required_extensions().unwrap();
805         let vm = hypervisor.create_vm().unwrap();
806 
807         #[cfg(target_arch = "x86_64")]
808         {
809             vm.set_identity_map_address(KVM_IDENTITY_MAP_START.0)
810                 .unwrap();
811             vm.set_tss_address(KVM_TSS_START.0 as usize).unwrap();
812             vm.enable_split_irq().unwrap();
813         }
814 
815         let memory_manager = if let Some(memory_manager_snapshot) =
816             snapshot.snapshots.get(MEMORY_MANAGER_SNAPSHOT_ID)
817         {
818             let phys_bits = physical_bits(vm_config.lock().unwrap().cpus.max_phys_bits);
819             MemoryManager::new_from_snapshot(
820                 memory_manager_snapshot,
821                 vm.clone(),
822                 &vm_config.lock().unwrap().memory.clone(),
823                 source_url,
824                 prefault,
825                 phys_bits,
826             )
827             .map_err(Error::MemoryManager)?
828         } else {
829             return Err(Error::Restore(MigratableError::Restore(anyhow!(
830                 "Missing memory manager snapshot"
831             ))));
832         };
833 
834         Vm::new_from_memory_manager(
835             vm_config,
836             memory_manager,
837             vm,
838             exit_evt,
839             reset_evt,
840             #[cfg(feature = "gdb")]
841             vm_debug_evt,
842             seccomp_action,
843             hypervisor,
844             activate_evt,
845             true,
846             timestamp,
847         )
848     }
849 
850     #[allow(clippy::too_many_arguments)]
851     pub fn new_from_migration(
852         config: Arc<Mutex<VmConfig>>,
853         exit_evt: EventFd,
854         reset_evt: EventFd,
855         #[cfg(feature = "gdb")] vm_debug_evt: EventFd,
856         seccomp_action: &SeccompAction,
857         hypervisor: Arc<dyn hypervisor::Hypervisor>,
858         activate_evt: EventFd,
859         memory_manager_data: &MemoryManagerSnapshotData,
860         existing_memory_files: Option<HashMap<u32, File>>,
861     ) -> Result<Self> {
862         let timestamp = Instant::now();
863 
864         hypervisor.check_required_extensions().unwrap();
865         let vm = hypervisor.create_vm().unwrap();
866 
867         #[cfg(target_arch = "x86_64")]
868         {
869             vm.set_identity_map_address(KVM_IDENTITY_MAP_START.0)
870                 .unwrap();
871             vm.set_tss_address(KVM_TSS_START.0 as usize).unwrap();
872             vm.enable_split_irq().unwrap();
873         }
874 
875         let phys_bits = physical_bits(config.lock().unwrap().cpus.max_phys_bits);
876 
877         let memory_manager = MemoryManager::new(
878             vm.clone(),
879             &config.lock().unwrap().memory.clone(),
880             None,
881             phys_bits,
882             #[cfg(feature = "tdx")]
883             false,
884             Some(memory_manager_data),
885             existing_memory_files,
886             #[cfg(target_arch = "x86_64")]
887             None,
888         )
889         .map_err(Error::MemoryManager)?;
890 
891         Vm::new_from_memory_manager(
892             config,
893             memory_manager,
894             vm,
895             exit_evt,
896             reset_evt,
897             #[cfg(feature = "gdb")]
898             vm_debug_evt,
899             seccomp_action,
900             hypervisor,
901             activate_evt,
902             true,
903             timestamp,
904         )
905     }
906 
907     fn load_initramfs(&mut self, guest_mem: &GuestMemoryMmap) -> Result<arch::InitramfsConfig> {
908         let mut initramfs = self.initramfs.as_ref().unwrap();
909         let size: usize = initramfs
910             .seek(SeekFrom::End(0))
911             .map_err(|_| Error::InitramfsLoad)?
912             .try_into()
913             .unwrap();
914         initramfs
915             .seek(SeekFrom::Start(0))
916             .map_err(|_| Error::InitramfsLoad)?;
917 
918         let address =
919             arch::initramfs_load_addr(guest_mem, size).map_err(|_| Error::InitramfsLoad)?;
920         let address = GuestAddress(address);
921 
922         guest_mem
923             .read_from(address, &mut initramfs, size)
924             .map_err(|_| Error::InitramfsLoad)?;
925 
926         info!("Initramfs loaded: address = 0x{:x}", address.0);
927         Ok(arch::InitramfsConfig { address, size })
928     }
929 
930     fn generate_cmdline(
931         config: &Arc<Mutex<VmConfig>>,
932         #[cfg(target_arch = "aarch64")] device_manager: &Arc<Mutex<DeviceManager>>,
933     ) -> Result<Cmdline> {
934         let mut cmdline = Cmdline::new(arch::CMDLINE_MAX_SIZE);
935         cmdline
936             .insert_str(&config.lock().unwrap().cmdline.args)
937             .map_err(Error::CmdLineInsertStr)?;
938 
939         #[cfg(target_arch = "aarch64")]
940         for entry in device_manager.lock().unwrap().cmdline_additions() {
941             cmdline.insert_str(entry).map_err(Error::CmdLineInsertStr)?;
942         }
943         Ok(cmdline)
944     }
945 
946     #[cfg(target_arch = "aarch64")]
947     fn load_kernel(&mut self) -> Result<EntryPoint> {
948         let guest_memory = self.memory_manager.lock().as_ref().unwrap().guest_memory();
949         let mem = guest_memory.memory();
950         let mut kernel = self.kernel.as_ref().unwrap();
951         let entry_addr = match linux_loader::loader::pe::PE::load(
952             mem.deref(),
953             Some(arch::layout::KERNEL_START),
954             &mut kernel,
955             None,
956         ) {
957             Ok(entry_addr) => entry_addr,
958             // Try to load the binary as kernel PE file at first.
959             // If failed, retry to load it as UEFI binary.
960             // As the UEFI binary is formatless, it must be the last option to try.
961             Err(linux_loader::loader::Error::Pe(InvalidImageMagicNumber)) => {
962                 let uefi_flash = self.device_manager.lock().as_ref().unwrap().uefi_flash();
963                 let mem = uefi_flash.memory();
964                 arch::aarch64::uefi::load_uefi(mem.deref(), arch::layout::UEFI_START, &mut kernel)
965                     .map_err(Error::UefiLoad)?;
966 
967                 // The entry point offset in UEFI image is always 0.
968                 return Ok(EntryPoint {
969                     entry_addr: arch::layout::UEFI_START,
970                 });
971             }
972             Err(e) => {
973                 return Err(Error::KernelLoad(e));
974             }
975         };
976 
977         let entry_point_addr: GuestAddress = entry_addr.kernel_load;
978 
979         Ok(EntryPoint {
980             entry_addr: entry_point_addr,
981         })
982     }
983 
984     #[cfg(target_arch = "x86_64")]
985     fn load_kernel(
986         mut kernel: File,
987         cmdline: Cmdline,
988         memory_manager: Arc<Mutex<MemoryManager>>,
989     ) -> Result<EntryPoint> {
990         use linux_loader::loader::{elf::Error::InvalidElfMagicNumber, Error::Elf};
991         info!("Loading kernel");
992 
993         let mem = {
994             let guest_memory = memory_manager.lock().as_ref().unwrap().guest_memory();
995             guest_memory.memory()
996         };
997         let entry_addr = match linux_loader::loader::elf::Elf::load(
998             mem.deref(),
999             None,
1000             &mut kernel,
1001             Some(arch::layout::HIGH_RAM_START),
1002         ) {
1003             Ok(entry_addr) => entry_addr,
1004             Err(e) => match e {
1005                 Elf(InvalidElfMagicNumber) => {
1006                     // Not an ELF header - assume raw binary data / firmware
1007                     let size = kernel.seek(SeekFrom::End(0)).map_err(Error::FirmwareFile)?;
1008 
1009                     // The OVMF firmware is as big as you might expect and it's 4MiB so limit to that
1010                     if size > 4 << 20 {
1011                         return Err(Error::FirmwareTooLarge);
1012                     }
1013 
1014                     // Loaded at the end of the 4GiB
1015                     let load_address = GuestAddress(4 << 30)
1016                         .checked_sub(size)
1017                         .ok_or(Error::FirmwareTooLarge)?;
1018 
1019                     info!(
1020                         "Loading RAW firmware at 0x{:x} (size: {})",
1021                         load_address.raw_value(),
1022                         size
1023                     );
1024 
1025                     memory_manager
1026                         .lock()
1027                         .unwrap()
1028                         .add_ram_region(load_address, size as usize)
1029                         .map_err(Error::AllocateFirmwareMemory)?;
1030 
1031                     kernel
1032                         .seek(SeekFrom::Start(0))
1033                         .map_err(Error::FirmwareFile)?;
1034                     memory_manager
1035                         .lock()
1036                         .unwrap()
1037                         .guest_memory()
1038                         .memory()
1039                         .read_exact_from(load_address, &mut kernel, size as usize)
1040                         .map_err(Error::FirmwareLoad)?;
1041 
1042                     return Ok(EntryPoint { entry_addr: None });
1043                 }
1044                 _ => {
1045                     return Err(Error::KernelLoad(e));
1046                 }
1047             },
1048         };
1049 
1050         linux_loader::loader::load_cmdline(mem.deref(), arch::layout::CMDLINE_START, &cmdline)
1051             .map_err(Error::LoadCmdLine)?;
1052 
1053         if let PvhEntryPresent(entry_addr) = entry_addr.pvh_boot_cap {
1054             // Use the PVH kernel entry point to boot the guest
1055             info!("Kernel loaded: entry_addr = 0x{:x}", entry_addr.0);
1056             Ok(EntryPoint {
1057                 entry_addr: Some(entry_addr),
1058             })
1059         } else {
1060             Err(Error::KernelMissingPvhHeader)
1061         }
1062     }
1063 
1064     #[cfg(target_arch = "x86_64")]
1065     fn load_kernel_async(
1066         kernel: &Option<File>,
1067         memory_manager: &Arc<Mutex<MemoryManager>>,
1068         config: &Arc<Mutex<VmConfig>>,
1069     ) -> Result<Option<thread::JoinHandle<Result<EntryPoint>>>> {
1070         // Kernel with TDX is loaded in a different manner
1071         #[cfg(feature = "tdx")]
1072         if config.lock().unwrap().tdx.is_some() {
1073             return Ok(None);
1074         }
1075 
1076         kernel
1077             .as_ref()
1078             .map(|kernel| {
1079                 let kernel = kernel.try_clone().unwrap();
1080                 let config = config.clone();
1081                 let memory_manager = memory_manager.clone();
1082 
1083                 std::thread::Builder::new()
1084                     .name("kernel_loader".into())
1085                     .spawn(move || {
1086                         let cmdline = Self::generate_cmdline(&config)?;
1087                         Self::load_kernel(kernel, cmdline, memory_manager)
1088                     })
1089                     .map_err(Error::KernelLoadThreadSpawn)
1090             })
1091             .transpose()
1092     }
1093 
1094     #[cfg(target_arch = "x86_64")]
1095     fn configure_system(&mut self, rsdp_addr: GuestAddress) -> Result<()> {
1096         info!("Configuring system");
1097         let mem = self.memory_manager.lock().unwrap().boot_guest_memory();
1098 
1099         let initramfs_config = match self.initramfs {
1100             Some(_) => Some(self.load_initramfs(&mem)?),
1101             None => None,
1102         };
1103 
1104         let boot_vcpus = self.cpu_manager.lock().unwrap().boot_vcpus();
1105         let rsdp_addr = Some(rsdp_addr);
1106         let sgx_epc_region = self
1107             .memory_manager
1108             .lock()
1109             .unwrap()
1110             .sgx_epc_region()
1111             .as_ref()
1112             .cloned();
1113 
1114         let serial_number = self
1115             .config
1116             .lock()
1117             .unwrap()
1118             .platform
1119             .as_ref()
1120             .and_then(|p| p.serial_number.clone());
1121 
1122         arch::configure_system(
1123             &mem,
1124             arch::layout::CMDLINE_START,
1125             &initramfs_config,
1126             boot_vcpus,
1127             rsdp_addr,
1128             sgx_epc_region,
1129             serial_number.as_deref(),
1130         )
1131         .map_err(Error::ConfigureSystem)?;
1132         Ok(())
1133     }
1134 
1135     #[cfg(target_arch = "aarch64")]
1136     fn configure_system(&mut self, _rsdp_addr: GuestAddress) -> Result<()> {
1137         let cmdline = Self::generate_cmdline(&self.config, &self.device_manager)?;
1138         let vcpu_mpidrs = self.cpu_manager.lock().unwrap().get_mpidrs();
1139         let vcpu_topology = self.cpu_manager.lock().unwrap().get_vcpu_topology();
1140         let mem = self.memory_manager.lock().unwrap().boot_guest_memory();
1141         let mut pci_space_info: Vec<PciSpaceInfo> = Vec::new();
1142         let initramfs_config = match self.initramfs {
1143             Some(_) => Some(self.load_initramfs(&mem)?),
1144             None => None,
1145         };
1146 
1147         let device_info = &self
1148             .device_manager
1149             .lock()
1150             .unwrap()
1151             .get_device_info()
1152             .clone();
1153 
1154         for pci_segment in self.device_manager.lock().unwrap().pci_segments().iter() {
1155             let pci_space = PciSpaceInfo {
1156                 pci_segment_id: pci_segment.id,
1157                 mmio_config_address: pci_segment.mmio_config_address,
1158                 pci_device_space_start: pci_segment.start_of_device_area,
1159                 pci_device_space_size: pci_segment.end_of_device_area
1160                     - pci_segment.start_of_device_area
1161                     + 1,
1162             };
1163             pci_space_info.push(pci_space);
1164         }
1165 
1166         let virtio_iommu_bdf = self
1167             .device_manager
1168             .lock()
1169             .unwrap()
1170             .iommu_attached_devices()
1171             .as_ref()
1172             .map(|(v, _)| *v);
1173 
1174         let vgic = self
1175             .device_manager
1176             .lock()
1177             .unwrap()
1178             .get_interrupt_controller()
1179             .unwrap()
1180             .lock()
1181             .unwrap()
1182             .create_vgic(
1183                 &self.memory_manager.lock().as_ref().unwrap().vm,
1184                 self.cpu_manager.lock().unwrap().boot_vcpus() as u64,
1185             )
1186             .map_err(|_| {
1187                 Error::ConfigureSystem(arch::Error::PlatformSpecific(
1188                     arch::aarch64::Error::SetupGic,
1189                 ))
1190             })?;
1191 
1192         // PMU interrupt sticks to PPI, so need to be added by 16 to get real irq number.
1193         let pmu_supported = self
1194             .cpu_manager
1195             .lock()
1196             .unwrap()
1197             .init_pmu(arch::aarch64::fdt::AARCH64_PMU_IRQ + 16)
1198             .map_err(|_| {
1199                 Error::ConfigureSystem(arch::Error::PlatformSpecific(
1200                     arch::aarch64::Error::VcpuInitPmu,
1201                 ))
1202             })?;
1203 
1204         arch::configure_system(
1205             &mem,
1206             cmdline.as_str(),
1207             vcpu_mpidrs,
1208             vcpu_topology,
1209             device_info,
1210             &initramfs_config,
1211             &pci_space_info,
1212             virtio_iommu_bdf.map(|bdf| bdf.into()),
1213             &vgic,
1214             &self.numa_nodes,
1215             pmu_supported,
1216         )
1217         .map_err(Error::ConfigureSystem)?;
1218 
1219         // Activate gic device
1220         self.device_manager
1221             .lock()
1222             .unwrap()
1223             .get_interrupt_controller()
1224             .unwrap()
1225             .lock()
1226             .unwrap()
1227             .enable()
1228             .map_err(Error::EnableInterruptController)?;
1229 
1230         Ok(())
1231     }
1232 
1233     pub fn serial_pty(&self) -> Option<PtyPair> {
1234         self.device_manager.lock().unwrap().serial_pty()
1235     }
1236 
1237     pub fn console_pty(&self) -> Option<PtyPair> {
1238         self.device_manager.lock().unwrap().console_pty()
1239     }
1240 
1241     pub fn console_resize_pipe(&self) -> Option<Arc<File>> {
1242         self.device_manager.lock().unwrap().console_resize_pipe()
1243     }
1244 
1245     pub fn shutdown(&mut self) -> Result<()> {
1246         let mut state = self.state.try_write().map_err(|_| Error::PoisonedState)?;
1247         let new_state = VmState::Shutdown;
1248 
1249         state.valid_transition(new_state)?;
1250 
1251         if self.on_tty {
1252             // Don't forget to set the terminal in canonical mode
1253             // before to exit.
1254             io::stdin()
1255                 .lock()
1256                 .set_canon_mode()
1257                 .map_err(Error::SetTerminalCanon)?;
1258         }
1259 
1260         // Trigger the termination of the signal_handler thread
1261         if let Some(signals) = self.signals.take() {
1262             signals.close();
1263         }
1264 
1265         // Wake up the DeviceManager threads so they will get terminated cleanly
1266         self.device_manager
1267             .lock()
1268             .unwrap()
1269             .resume()
1270             .map_err(Error::Resume)?;
1271 
1272         self.cpu_manager
1273             .lock()
1274             .unwrap()
1275             .shutdown()
1276             .map_err(Error::CpuManager)?;
1277 
1278         // Wait for all the threads to finish
1279         for thread in self.threads.drain(..) {
1280             thread.join().map_err(Error::ThreadCleanup)?
1281         }
1282         *state = new_state;
1283 
1284         event!("vm", "shutdown");
1285 
1286         Ok(())
1287     }
1288 
1289     pub fn resize(
1290         &mut self,
1291         desired_vcpus: Option<u8>,
1292         desired_memory: Option<u64>,
1293         desired_balloon: Option<u64>,
1294     ) -> Result<()> {
1295         event!("vm", "resizing");
1296 
1297         if let Some(desired_vcpus) = desired_vcpus {
1298             if self
1299                 .cpu_manager
1300                 .lock()
1301                 .unwrap()
1302                 .resize(desired_vcpus)
1303                 .map_err(Error::CpuManager)?
1304             {
1305                 self.device_manager
1306                     .lock()
1307                     .unwrap()
1308                     .notify_hotplug(AcpiNotificationFlags::CPU_DEVICES_CHANGED)
1309                     .map_err(Error::DeviceManager)?;
1310             }
1311             self.config.lock().unwrap().cpus.boot_vcpus = desired_vcpus;
1312         }
1313 
1314         if let Some(desired_memory) = desired_memory {
1315             let new_region = self
1316                 .memory_manager
1317                 .lock()
1318                 .unwrap()
1319                 .resize(desired_memory)
1320                 .map_err(Error::MemoryManager)?;
1321 
1322             let mut memory_config = &mut self.config.lock().unwrap().memory;
1323 
1324             if let Some(new_region) = &new_region {
1325                 self.device_manager
1326                     .lock()
1327                     .unwrap()
1328                     .update_memory(new_region)
1329                     .map_err(Error::DeviceManager)?;
1330 
1331                 match memory_config.hotplug_method {
1332                     HotplugMethod::Acpi => {
1333                         self.device_manager
1334                             .lock()
1335                             .unwrap()
1336                             .notify_hotplug(AcpiNotificationFlags::MEMORY_DEVICES_CHANGED)
1337                             .map_err(Error::DeviceManager)?;
1338                     }
1339                     HotplugMethod::VirtioMem => {}
1340                 }
1341             }
1342 
1343             // We update the VM config regardless of the actual guest resize
1344             // operation result (happened or not), so that if the VM reboots
1345             // it will be running with the last configure memory size.
1346             match memory_config.hotplug_method {
1347                 HotplugMethod::Acpi => memory_config.size = desired_memory,
1348                 HotplugMethod::VirtioMem => {
1349                     if desired_memory > memory_config.size {
1350                         memory_config.hotplugged_size = Some(desired_memory - memory_config.size);
1351                     } else {
1352                         memory_config.hotplugged_size = None;
1353                     }
1354                 }
1355             }
1356         }
1357 
1358         if let Some(desired_balloon) = desired_balloon {
1359             self.device_manager
1360                 .lock()
1361                 .unwrap()
1362                 .resize_balloon(desired_balloon)
1363                 .map_err(Error::DeviceManager)?;
1364 
1365             // Update the configuration value for the balloon size to ensure
1366             // a reboot would use the right value.
1367             if let Some(balloon_config) = &mut self.config.lock().unwrap().balloon {
1368                 balloon_config.size = desired_balloon;
1369             }
1370         }
1371 
1372         event!("vm", "resized");
1373 
1374         Ok(())
1375     }
1376 
1377     pub fn resize_zone(&mut self, id: String, desired_memory: u64) -> Result<()> {
1378         let memory_config = &mut self.config.lock().unwrap().memory;
1379 
1380         if let Some(zones) = &mut memory_config.zones {
1381             for zone in zones.iter_mut() {
1382                 if zone.id == id {
1383                     if desired_memory >= zone.size {
1384                         let hotplugged_size = desired_memory - zone.size;
1385                         self.memory_manager
1386                             .lock()
1387                             .unwrap()
1388                             .resize_zone(&id, desired_memory - zone.size)
1389                             .map_err(Error::MemoryManager)?;
1390                         // We update the memory zone config regardless of the
1391                         // actual 'resize-zone' operation result (happened or
1392                         // not), so that if the VM reboots it will be running
1393                         // with the last configured memory zone size.
1394                         zone.hotplugged_size = Some(hotplugged_size);
1395 
1396                         return Ok(());
1397                     } else {
1398                         error!(
1399                             "Invalid to ask less ({}) than boot RAM ({}) for \
1400                             this memory zone",
1401                             desired_memory, zone.size,
1402                         );
1403                         return Err(Error::ResizeZone);
1404                     }
1405                 }
1406             }
1407         }
1408 
1409         error!("Could not find the memory zone {} for the resize", id);
1410         Err(Error::ResizeZone)
1411     }
1412 
1413     pub fn add_device(&mut self, mut device_cfg: DeviceConfig) -> Result<PciDeviceInfo> {
1414         let pci_device_info = self
1415             .device_manager
1416             .lock()
1417             .unwrap()
1418             .add_device(&mut device_cfg)
1419             .map_err(Error::DeviceManager)?;
1420 
1421         // Update VmConfig by adding the new device. This is important to
1422         // ensure the device would be created in case of a reboot.
1423         {
1424             let mut config = self.config.lock().unwrap();
1425             add_to_config(&mut config.devices, device_cfg);
1426         }
1427 
1428         self.device_manager
1429             .lock()
1430             .unwrap()
1431             .notify_hotplug(AcpiNotificationFlags::PCI_DEVICES_CHANGED)
1432             .map_err(Error::DeviceManager)?;
1433 
1434         Ok(pci_device_info)
1435     }
1436 
1437     pub fn add_user_device(&mut self, mut device_cfg: UserDeviceConfig) -> Result<PciDeviceInfo> {
1438         let pci_device_info = self
1439             .device_manager
1440             .lock()
1441             .unwrap()
1442             .add_user_device(&mut device_cfg)
1443             .map_err(Error::DeviceManager)?;
1444 
1445         // Update VmConfig by adding the new device. This is important to
1446         // ensure the device would be created in case of a reboot.
1447         {
1448             let mut config = self.config.lock().unwrap();
1449             add_to_config(&mut config.user_devices, device_cfg);
1450         }
1451 
1452         self.device_manager
1453             .lock()
1454             .unwrap()
1455             .notify_hotplug(AcpiNotificationFlags::PCI_DEVICES_CHANGED)
1456             .map_err(Error::DeviceManager)?;
1457 
1458         Ok(pci_device_info)
1459     }
1460 
1461     pub fn remove_device(&mut self, id: String) -> Result<()> {
1462         self.device_manager
1463             .lock()
1464             .unwrap()
1465             .remove_device(id.clone())
1466             .map_err(Error::DeviceManager)?;
1467 
1468         // Update VmConfig by removing the device. This is important to
1469         // ensure the device would not be created in case of a reboot.
1470         let mut config = self.config.lock().unwrap();
1471 
1472         // Remove if VFIO device
1473         if let Some(devices) = config.devices.as_mut() {
1474             devices.retain(|dev| dev.id.as_ref() != Some(&id));
1475         }
1476 
1477         // Remove if VFIO user device
1478         if let Some(user_devices) = config.user_devices.as_mut() {
1479             user_devices.retain(|dev| dev.id.as_ref() != Some(&id));
1480         }
1481 
1482         // Remove if disk device
1483         if let Some(disks) = config.disks.as_mut() {
1484             disks.retain(|dev| dev.id.as_ref() != Some(&id));
1485         }
1486 
1487         // Remove if fs device
1488         if let Some(fs) = config.fs.as_mut() {
1489             fs.retain(|dev| dev.id.as_ref() != Some(&id));
1490         }
1491 
1492         // Remove if net device
1493         if let Some(net) = config.net.as_mut() {
1494             net.retain(|dev| dev.id.as_ref() != Some(&id));
1495         }
1496 
1497         // Remove if pmem device
1498         if let Some(pmem) = config.pmem.as_mut() {
1499             pmem.retain(|dev| dev.id.as_ref() != Some(&id));
1500         }
1501 
1502         // Remove if vDPA device
1503         if let Some(vdpa) = config.vdpa.as_mut() {
1504             vdpa.retain(|dev| dev.id.as_ref() != Some(&id));
1505         }
1506 
1507         // Remove if vsock device
1508         if let Some(vsock) = config.vsock.as_ref() {
1509             if vsock.id.as_ref() == Some(&id) {
1510                 config.vsock = None;
1511             }
1512         }
1513 
1514         self.device_manager
1515             .lock()
1516             .unwrap()
1517             .notify_hotplug(AcpiNotificationFlags::PCI_DEVICES_CHANGED)
1518             .map_err(Error::DeviceManager)?;
1519         Ok(())
1520     }
1521 
1522     pub fn add_disk(&mut self, mut disk_cfg: DiskConfig) -> Result<PciDeviceInfo> {
1523         let pci_device_info = self
1524             .device_manager
1525             .lock()
1526             .unwrap()
1527             .add_disk(&mut disk_cfg)
1528             .map_err(Error::DeviceManager)?;
1529 
1530         // Update VmConfig by adding the new device. This is important to
1531         // ensure the device would be created in case of a reboot.
1532         {
1533             let mut config = self.config.lock().unwrap();
1534             add_to_config(&mut config.disks, disk_cfg);
1535         }
1536 
1537         self.device_manager
1538             .lock()
1539             .unwrap()
1540             .notify_hotplug(AcpiNotificationFlags::PCI_DEVICES_CHANGED)
1541             .map_err(Error::DeviceManager)?;
1542 
1543         Ok(pci_device_info)
1544     }
1545 
1546     pub fn add_fs(&mut self, mut fs_cfg: FsConfig) -> Result<PciDeviceInfo> {
1547         let pci_device_info = self
1548             .device_manager
1549             .lock()
1550             .unwrap()
1551             .add_fs(&mut fs_cfg)
1552             .map_err(Error::DeviceManager)?;
1553 
1554         // Update VmConfig by adding the new device. This is important to
1555         // ensure the device would be created in case of a reboot.
1556         {
1557             let mut config = self.config.lock().unwrap();
1558             add_to_config(&mut config.fs, fs_cfg);
1559         }
1560 
1561         self.device_manager
1562             .lock()
1563             .unwrap()
1564             .notify_hotplug(AcpiNotificationFlags::PCI_DEVICES_CHANGED)
1565             .map_err(Error::DeviceManager)?;
1566 
1567         Ok(pci_device_info)
1568     }
1569 
1570     pub fn add_pmem(&mut self, mut pmem_cfg: PmemConfig) -> Result<PciDeviceInfo> {
1571         let pci_device_info = self
1572             .device_manager
1573             .lock()
1574             .unwrap()
1575             .add_pmem(&mut pmem_cfg)
1576             .map_err(Error::DeviceManager)?;
1577 
1578         // Update VmConfig by adding the new device. This is important to
1579         // ensure the device would be created in case of a reboot.
1580         {
1581             let mut config = self.config.lock().unwrap();
1582             add_to_config(&mut config.pmem, pmem_cfg);
1583         }
1584 
1585         self.device_manager
1586             .lock()
1587             .unwrap()
1588             .notify_hotplug(AcpiNotificationFlags::PCI_DEVICES_CHANGED)
1589             .map_err(Error::DeviceManager)?;
1590 
1591         Ok(pci_device_info)
1592     }
1593 
1594     pub fn add_net(&mut self, mut net_cfg: NetConfig) -> Result<PciDeviceInfo> {
1595         let pci_device_info = self
1596             .device_manager
1597             .lock()
1598             .unwrap()
1599             .add_net(&mut net_cfg)
1600             .map_err(Error::DeviceManager)?;
1601 
1602         // Update VmConfig by adding the new device. This is important to
1603         // ensure the device would be created in case of a reboot.
1604         {
1605             let mut config = self.config.lock().unwrap();
1606             add_to_config(&mut config.net, net_cfg);
1607         }
1608 
1609         self.device_manager
1610             .lock()
1611             .unwrap()
1612             .notify_hotplug(AcpiNotificationFlags::PCI_DEVICES_CHANGED)
1613             .map_err(Error::DeviceManager)?;
1614 
1615         Ok(pci_device_info)
1616     }
1617 
1618     pub fn add_vdpa(&mut self, mut vdpa_cfg: VdpaConfig) -> Result<PciDeviceInfo> {
1619         let pci_device_info = self
1620             .device_manager
1621             .lock()
1622             .unwrap()
1623             .add_vdpa(&mut vdpa_cfg)
1624             .map_err(Error::DeviceManager)?;
1625 
1626         // Update VmConfig by adding the new device. This is important to
1627         // ensure the device would be created in case of a reboot.
1628         {
1629             let mut config = self.config.lock().unwrap();
1630             add_to_config(&mut config.vdpa, vdpa_cfg);
1631         }
1632 
1633         self.device_manager
1634             .lock()
1635             .unwrap()
1636             .notify_hotplug(AcpiNotificationFlags::PCI_DEVICES_CHANGED)
1637             .map_err(Error::DeviceManager)?;
1638 
1639         Ok(pci_device_info)
1640     }
1641 
1642     pub fn add_vsock(&mut self, mut vsock_cfg: VsockConfig) -> Result<PciDeviceInfo> {
1643         let pci_device_info = self
1644             .device_manager
1645             .lock()
1646             .unwrap()
1647             .add_vsock(&mut vsock_cfg)
1648             .map_err(Error::DeviceManager)?;
1649 
1650         // Update VmConfig by adding the new device. This is important to
1651         // ensure the device would be created in case of a reboot.
1652         {
1653             let mut config = self.config.lock().unwrap();
1654             config.vsock = Some(vsock_cfg);
1655         }
1656 
1657         self.device_manager
1658             .lock()
1659             .unwrap()
1660             .notify_hotplug(AcpiNotificationFlags::PCI_DEVICES_CHANGED)
1661             .map_err(Error::DeviceManager)?;
1662 
1663         Ok(pci_device_info)
1664     }
1665 
1666     pub fn counters(&self) -> Result<HashMap<String, HashMap<&'static str, Wrapping<u64>>>> {
1667         Ok(self.device_manager.lock().unwrap().counters())
1668     }
1669 
1670     fn signal_handler(mut signals: Signals, console_input_clone: Arc<Console>) {
1671         for sig in &Vm::HANDLED_SIGNALS {
1672             unblock_signal(*sig).unwrap();
1673         }
1674 
1675         for signal in signals.forever() {
1676             if signal == SIGWINCH {
1677                 console_input_clone.update_console_size();
1678             }
1679         }
1680     }
1681 
1682     #[cfg(feature = "tdx")]
1683     fn init_tdx(&mut self) -> Result<()> {
1684         let cpuid = self.cpu_manager.lock().unwrap().common_cpuid();
1685         let max_vcpus = self.cpu_manager.lock().unwrap().max_vcpus() as u32;
1686         self.vm
1687             .tdx_init(&cpuid, max_vcpus)
1688             .map_err(Error::InitializeTdxVm)?;
1689         Ok(())
1690     }
1691 
1692     #[cfg(feature = "tdx")]
1693     fn extract_tdvf_sections(&mut self) -> Result<Vec<TdvfSection>> {
1694         use arch::x86_64::tdx::*;
1695         // The TDVF file contains a table of section as well as code
1696         let mut firmware_file =
1697             File::open(&self.config.lock().unwrap().tdx.as_ref().unwrap().firmware)
1698                 .map_err(Error::LoadTdvf)?;
1699 
1700         // For all the sections allocate some RAM backing them
1701         parse_tdvf_sections(&mut firmware_file).map_err(Error::ParseTdvf)
1702     }
1703 
1704     #[cfg(feature = "tdx")]
1705     fn hob_memory_resources(
1706         mut sorted_sections: Vec<TdvfSection>,
1707         guest_memory: &GuestMemoryMmap,
1708     ) -> Vec<(u64, u64, bool)> {
1709         let mut list = Vec::new();
1710 
1711         let mut current_section = sorted_sections.pop();
1712 
1713         // RAM regions interleaved with TDVF sections
1714         let mut next_start_addr = 0;
1715         for region in guest_memory.iter() {
1716             let region_start = region.start_addr().0;
1717             let region_end = region.last_addr().0;
1718             if region_start > next_start_addr {
1719                 next_start_addr = region_start;
1720             }
1721 
1722             loop {
1723                 let (start, size, ram) = if let Some(section) = &current_section {
1724                     if section.address <= next_start_addr {
1725                         (section.address, section.size, false)
1726                     } else {
1727                         let last_addr = std::cmp::min(section.address - 1, region_end);
1728                         (next_start_addr, last_addr - next_start_addr + 1, true)
1729                     }
1730                 } else {
1731                     (next_start_addr, region_end - next_start_addr + 1, true)
1732                 };
1733 
1734                 list.push((start, size, ram));
1735 
1736                 if !ram {
1737                     current_section = sorted_sections.pop();
1738                 }
1739 
1740                 next_start_addr = start + size;
1741 
1742                 if region_start > next_start_addr {
1743                     next_start_addr = region_start;
1744                 }
1745 
1746                 if next_start_addr > region_end {
1747                     break;
1748                 }
1749             }
1750         }
1751 
1752         // Once all the interleaved sections have been processed, let's simply
1753         // pull the remaining ones.
1754         if let Some(section) = current_section {
1755             list.push((section.address, section.size, false));
1756         }
1757         while let Some(section) = sorted_sections.pop() {
1758             list.push((section.address, section.size, false));
1759         }
1760 
1761         list
1762     }
1763 
1764     #[cfg(feature = "tdx")]
1765     fn populate_tdx_sections(&mut self, sections: &[TdvfSection]) -> Result<Option<u64>> {
1766         use arch::x86_64::tdx::*;
1767         // Get the memory end *before* we start adding TDVF ram regions
1768         let boot_guest_memory = self
1769             .memory_manager
1770             .lock()
1771             .as_ref()
1772             .unwrap()
1773             .boot_guest_memory();
1774         for section in sections {
1775             // No need to allocate if the section falls within guest RAM ranges
1776             if boot_guest_memory.address_in_range(GuestAddress(section.address)) {
1777                 info!(
1778                     "Not allocating TDVF Section: {:x?} since it is already part of guest RAM",
1779                     section
1780                 );
1781                 continue;
1782             }
1783 
1784             info!("Allocating TDVF Section: {:x?}", section);
1785             self.memory_manager
1786                 .lock()
1787                 .unwrap()
1788                 .add_ram_region(GuestAddress(section.address), section.size as usize)
1789                 .map_err(Error::AllocatingTdvfMemory)?;
1790         }
1791 
1792         // The TDVF file contains a table of section as well as code
1793         let mut firmware_file =
1794             File::open(&self.config.lock().unwrap().tdx.as_ref().unwrap().firmware)
1795                 .map_err(Error::LoadTdvf)?;
1796 
1797         // The guest memory at this point now has all the required regions so it
1798         // is safe to copy from the TDVF file into it.
1799         let guest_memory = self.memory_manager.lock().as_ref().unwrap().guest_memory();
1800         let mem = guest_memory.memory();
1801         let mut payload_info = None;
1802         let mut hob_offset = None;
1803         for section in sections {
1804             info!("Populating TDVF Section: {:x?}", section);
1805             match section.r#type {
1806                 TdvfSectionType::Bfv | TdvfSectionType::Cfv => {
1807                     info!("Copying section to guest memory");
1808                     firmware_file
1809                         .seek(SeekFrom::Start(section.data_offset as u64))
1810                         .map_err(Error::LoadTdvf)?;
1811                     mem.read_from(
1812                         GuestAddress(section.address),
1813                         &mut firmware_file,
1814                         section.data_size as usize,
1815                     )
1816                     .unwrap();
1817                 }
1818                 TdvfSectionType::TdHob => {
1819                     hob_offset = Some(section.address);
1820                 }
1821                 TdvfSectionType::Payload => {
1822                     info!("Copying payload to guest memory");
1823                     if let Some(payload_file) = self.kernel.as_mut() {
1824                         let payload_size = payload_file
1825                             .seek(SeekFrom::End(0))
1826                             .map_err(Error::LoadPayload)?;
1827 
1828                         payload_file
1829                             .seek(SeekFrom::Start(0x1f1))
1830                             .map_err(Error::LoadPayload)?;
1831 
1832                         let mut payload_header = linux_loader::bootparam::setup_header::default();
1833                         payload_header
1834                             .as_bytes()
1835                             .read_from(
1836                                 0,
1837                                 payload_file,
1838                                 mem::size_of::<linux_loader::bootparam::setup_header>(),
1839                             )
1840                             .unwrap();
1841 
1842                         if payload_header.header != 0x5372_6448 {
1843                             return Err(Error::InvalidPayloadType);
1844                         }
1845 
1846                         if (payload_header.version < 0x0200)
1847                             || ((payload_header.loadflags & 0x1) == 0x0)
1848                         {
1849                             return Err(Error::InvalidPayloadType);
1850                         }
1851 
1852                         payload_file
1853                             .seek(SeekFrom::Start(0))
1854                             .map_err(Error::LoadPayload)?;
1855                         mem.read_from(
1856                             GuestAddress(section.address),
1857                             payload_file,
1858                             payload_size as usize,
1859                         )
1860                         .unwrap();
1861 
1862                         // Create the payload info that will be inserted into
1863                         // the HOB.
1864                         payload_info = Some(PayloadInfo {
1865                             image_type: PayloadImageType::BzImage,
1866                             entry_point: section.address,
1867                         });
1868                     }
1869                 }
1870                 TdvfSectionType::PayloadParam => {
1871                     info!("Copying payload parameters to guest memory");
1872                     let cmdline = Self::generate_cmdline(&self.config)?;
1873                     mem.write_slice(cmdline.as_str().as_bytes(), GuestAddress(section.address))
1874                         .unwrap();
1875                 }
1876                 _ => {}
1877             }
1878         }
1879 
1880         // Generate HOB
1881         let mut hob = TdHob::start(hob_offset.unwrap());
1882 
1883         let mut sorted_sections = sections.to_vec();
1884         sorted_sections.retain(|section| matches!(section.r#type, TdvfSectionType::TempMem));
1885 
1886         sorted_sections.sort_by_key(|section| section.address);
1887         sorted_sections.reverse();
1888 
1889         for (start, size, ram) in Vm::hob_memory_resources(sorted_sections, &boot_guest_memory) {
1890             hob.add_memory_resource(&mem, start, size, ram)
1891                 .map_err(Error::PopulateHob)?;
1892         }
1893 
1894         // MMIO regions
1895         hob.add_mmio_resource(
1896             &mem,
1897             arch::layout::MEM_32BIT_DEVICES_START.raw_value(),
1898             arch::layout::APIC_START.raw_value()
1899                 - arch::layout::MEM_32BIT_DEVICES_START.raw_value(),
1900         )
1901         .map_err(Error::PopulateHob)?;
1902         let start_of_device_area = self
1903             .memory_manager
1904             .lock()
1905             .unwrap()
1906             .start_of_device_area()
1907             .raw_value();
1908         let end_of_device_area = self
1909             .memory_manager
1910             .lock()
1911             .unwrap()
1912             .end_of_device_area()
1913             .raw_value();
1914         hob.add_mmio_resource(
1915             &mem,
1916             start_of_device_area,
1917             end_of_device_area - start_of_device_area,
1918         )
1919         .map_err(Error::PopulateHob)?;
1920 
1921         // Loop over the ACPI tables and copy them to the HOB.
1922 
1923         for acpi_table in crate::acpi::create_acpi_tables_tdx(
1924             &self.device_manager,
1925             &self.cpu_manager,
1926             &self.memory_manager,
1927             &self.numa_nodes,
1928         ) {
1929             hob.add_acpi_table(&mem, acpi_table.as_slice())
1930                 .map_err(Error::PopulateHob)?;
1931         }
1932 
1933         // If a payload info has been created, let's insert it into the HOB.
1934         if let Some(payload_info) = payload_info {
1935             hob.add_payload(&mem, payload_info)
1936                 .map_err(Error::PopulateHob)?;
1937         }
1938 
1939         hob.finish(&mem).map_err(Error::PopulateHob)?;
1940 
1941         Ok(hob_offset)
1942     }
1943 
1944     #[cfg(feature = "tdx")]
1945     fn init_tdx_memory(&mut self, sections: &[TdvfSection]) -> Result<()> {
1946         let guest_memory = self.memory_manager.lock().as_ref().unwrap().guest_memory();
1947         let mem = guest_memory.memory();
1948 
1949         for section in sections {
1950             self.vm
1951                 .tdx_init_memory_region(
1952                     mem.get_host_address(GuestAddress(section.address)).unwrap() as u64,
1953                     section.address,
1954                     section.size,
1955                     /* TDVF_SECTION_ATTRIBUTES_EXTENDMR */
1956                     section.attributes == 1,
1957                 )
1958                 .map_err(Error::InitializeTdxMemoryRegion)?;
1959         }
1960 
1961         Ok(())
1962     }
1963 
1964     fn setup_signal_handler(&mut self) -> Result<()> {
1965         let console = self.device_manager.lock().unwrap().console().clone();
1966         let signals = Signals::new(&Vm::HANDLED_SIGNALS);
1967         match signals {
1968             Ok(signals) => {
1969                 self.signals = Some(signals.handle());
1970                 let exit_evt = self.exit_evt.try_clone().map_err(Error::EventFdClone)?;
1971                 let signal_handler_seccomp_filter = get_seccomp_filter(
1972                     &self.seccomp_action,
1973                     Thread::SignalHandler,
1974                     self.hypervisor.hypervisor_type(),
1975                 )
1976                 .map_err(Error::CreateSeccompFilter)?;
1977                 self.threads.push(
1978                     thread::Builder::new()
1979                         .name("vm_signal_handler".to_string())
1980                         .spawn(move || {
1981                             if !signal_handler_seccomp_filter.is_empty() {
1982                                 if let Err(e) = apply_filter(&signal_handler_seccomp_filter)
1983                                     .map_err(Error::ApplySeccompFilter)
1984                                 {
1985                                     error!("Error applying seccomp filter: {:?}", e);
1986                                     exit_evt.write(1).ok();
1987                                     return;
1988                                 }
1989                             }
1990                             std::panic::catch_unwind(AssertUnwindSafe(|| {
1991                                 Vm::signal_handler(signals, console);
1992                             }))
1993                             .map_err(|_| {
1994                                 error!("signal_handler thead panicked");
1995                                 exit_evt.write(1).ok()
1996                             })
1997                             .ok();
1998                         })
1999                         .map_err(Error::SignalHandlerSpawn)?,
2000                 );
2001             }
2002             Err(e) => error!("Signal not found {}", e),
2003         }
2004         Ok(())
2005     }
2006 
2007     fn setup_tty(&self) -> Result<()> {
2008         if self.on_tty {
2009             io::stdin()
2010                 .lock()
2011                 .set_raw_mode()
2012                 .map_err(Error::SetTerminalRaw)?;
2013         }
2014 
2015         Ok(())
2016     }
2017 
2018     // Creates ACPI tables
2019     // In case of TDX being used, this is a no-op since the tables will be
2020     // created and passed when populating the HOB.
2021 
2022     fn create_acpi_tables(&self) -> Option<GuestAddress> {
2023         #[cfg(feature = "tdx")]
2024         if self.config.lock().unwrap().tdx.is_some() {
2025             return None;
2026         }
2027 
2028         let mem = self.memory_manager.lock().unwrap().guest_memory().memory();
2029 
2030         let rsdp_addr = crate::acpi::create_acpi_tables(
2031             &mem,
2032             &self.device_manager,
2033             &self.cpu_manager,
2034             &self.memory_manager,
2035             &self.numa_nodes,
2036         );
2037         info!("Created ACPI tables: rsdp_addr = 0x{:x}", rsdp_addr.0);
2038 
2039         Some(rsdp_addr)
2040     }
2041 
2042     #[cfg(target_arch = "x86_64")]
2043     fn entry_point(&mut self) -> Result<Option<EntryPoint>> {
2044         self.load_kernel_handle
2045             .take()
2046             .map(|handle| handle.join().map_err(Error::KernelLoadThreadJoin)?)
2047             .transpose()
2048     }
2049 
2050     #[cfg(target_arch = "aarch64")]
2051     fn entry_point(&mut self) -> Result<Option<EntryPoint>> {
2052         Ok(if self.kernel.as_ref().is_some() {
2053             Some(self.load_kernel()?)
2054         } else {
2055             None
2056         })
2057     }
2058 
2059     pub fn boot(&mut self) -> Result<()> {
2060         info!("Booting VM");
2061         event!("vm", "booting");
2062         let current_state = self.get_state()?;
2063         if current_state == VmState::Paused {
2064             return self.resume().map_err(Error::Resume);
2065         }
2066 
2067         let new_state = if self.stop_on_boot {
2068             VmState::BreakPoint
2069         } else {
2070             VmState::Running
2071         };
2072         current_state.valid_transition(new_state)?;
2073 
2074         // Do earlier to parallelise with loading kernel
2075         #[cfg(target_arch = "x86_64")]
2076         let rsdp_addr = self.create_acpi_tables();
2077 
2078         self.setup_signal_handler()?;
2079         self.setup_tty()?;
2080 
2081         // Load kernel synchronously or if asynchronous then wait for load to
2082         // finish.
2083         let entry_point = self.entry_point()?;
2084 
2085         // The initial TDX configuration must be done before the vCPUs are
2086         // created
2087         #[cfg(feature = "tdx")]
2088         if self.config.lock().unwrap().tdx.is_some() {
2089             self.init_tdx()?;
2090         }
2091 
2092         // Create and configure vcpus
2093         self.cpu_manager
2094             .lock()
2095             .unwrap()
2096             .create_boot_vcpus(entry_point)
2097             .map_err(Error::CpuManager)?;
2098 
2099         #[cfg(feature = "tdx")]
2100         let sections = if self.config.lock().unwrap().tdx.is_some() {
2101             self.extract_tdvf_sections()?
2102         } else {
2103             Vec::new()
2104         };
2105 
2106         // Configuring the TDX regions requires that the vCPUs are created.
2107         #[cfg(feature = "tdx")]
2108         let hob_address = if self.config.lock().unwrap().tdx.is_some() {
2109             // TDX sections are written to memory.
2110             self.populate_tdx_sections(&sections)?
2111         } else {
2112             None
2113         };
2114 
2115         // On aarch64 the ACPI tables depend on the vCPU mpidr which is only
2116         // available after they are configured
2117         #[cfg(target_arch = "aarch64")]
2118         let rsdp_addr = self.create_acpi_tables();
2119 
2120         // Configure shared state based on loaded kernel
2121         entry_point
2122             .map(|_| {
2123                 // Safe to unwrap rsdp_addr as we know it can't be None when
2124                 // the entry_point is Some.
2125                 self.configure_system(rsdp_addr.unwrap())
2126             })
2127             .transpose()?;
2128 
2129         #[cfg(feature = "tdx")]
2130         if let Some(hob_address) = hob_address {
2131             // With the HOB address extracted the vCPUs can have
2132             // their TDX state configured.
2133             self.cpu_manager
2134                 .lock()
2135                 .unwrap()
2136                 .initialize_tdx(hob_address)
2137                 .map_err(Error::CpuManager)?;
2138             // Let the hypervisor know which memory ranges are shared with the
2139             // guest. This prevents the guest from ignoring/discarding memory
2140             // regions provided by the host.
2141             self.init_tdx_memory(&sections)?;
2142             // With TDX memory and CPU state configured TDX setup is complete
2143             self.vm.tdx_finalize().map_err(Error::FinalizeTdx)?;
2144         }
2145 
2146         self.cpu_manager
2147             .lock()
2148             .unwrap()
2149             .start_boot_vcpus(new_state == VmState::BreakPoint)
2150             .map_err(Error::CpuManager)?;
2151 
2152         let mut state = self.state.try_write().map_err(|_| Error::PoisonedState)?;
2153         *state = new_state;
2154         event!("vm", "booted");
2155         Ok(())
2156     }
2157 
2158     /// Gets a thread-safe reference counted pointer to the VM configuration.
2159     pub fn get_config(&self) -> Arc<Mutex<VmConfig>> {
2160         Arc::clone(&self.config)
2161     }
2162 
2163     /// Get the VM state. Returns an error if the state is poisoned.
2164     pub fn get_state(&self) -> Result<VmState> {
2165         self.state
2166             .try_read()
2167             .map_err(|_| Error::PoisonedState)
2168             .map(|state| *state)
2169     }
2170 
2171     /// Load saved clock from snapshot
2172     #[cfg(all(feature = "kvm", target_arch = "x86_64"))]
2173     pub fn load_clock_from_snapshot(
2174         &mut self,
2175         snapshot: &Snapshot,
2176     ) -> Result<Option<hypervisor::ClockData>> {
2177         use crate::migration::get_vm_snapshot;
2178         let vm_snapshot = get_vm_snapshot(snapshot).map_err(Error::Restore)?;
2179         self.saved_clock = vm_snapshot.clock;
2180         Ok(self.saved_clock)
2181     }
2182 
2183     #[cfg(target_arch = "aarch64")]
2184     /// Add the vGIC section to the VM snapshot.
2185     fn add_vgic_snapshot_section(
2186         &self,
2187         vm_snapshot: &mut Snapshot,
2188     ) -> std::result::Result<(), MigratableError> {
2189         let saved_vcpu_states = self.cpu_manager.lock().unwrap().get_saved_states();
2190         self.device_manager
2191             .lock()
2192             .unwrap()
2193             .get_interrupt_controller()
2194             .unwrap()
2195             .lock()
2196             .unwrap()
2197             .set_gicr_typers(&saved_vcpu_states);
2198 
2199         vm_snapshot.add_snapshot(
2200             self.device_manager
2201                 .lock()
2202                 .unwrap()
2203                 .get_interrupt_controller()
2204                 .unwrap()
2205                 .lock()
2206                 .unwrap()
2207                 .snapshot()?,
2208         );
2209 
2210         Ok(())
2211     }
2212 
2213     #[cfg(target_arch = "aarch64")]
2214     /// Restore the vGIC from the VM snapshot and enable the interrupt controller routing.
2215     fn restore_vgic_and_enable_interrupt(
2216         &self,
2217         vm_snapshot: &Snapshot,
2218     ) -> std::result::Result<(), MigratableError> {
2219         let saved_vcpu_states = self.cpu_manager.lock().unwrap().get_saved_states();
2220         // The number of vCPUs is the same as the number of saved vCPU states.
2221         let vcpu_numbers = saved_vcpu_states.len();
2222 
2223         // Creating a GIC device here, as the GIC will not be created when
2224         // restoring the device manager. Note that currently only the bare GICv3
2225         // without ITS is supported.
2226         self.device_manager
2227             .lock()
2228             .unwrap()
2229             .get_interrupt_controller()
2230             .unwrap()
2231             .lock()
2232             .unwrap()
2233             .create_vgic(&self.vm, vcpu_numbers.try_into().unwrap())
2234             .map_err(|e| MigratableError::Restore(anyhow!("Could not create GIC: {:#?}", e)))?;
2235 
2236         // PMU interrupt sticks to PPI, so need to be added by 16 to get real irq number.
2237         self.cpu_manager
2238             .lock()
2239             .unwrap()
2240             .init_pmu(arch::aarch64::fdt::AARCH64_PMU_IRQ + 16)
2241             .map_err(|e| MigratableError::Restore(anyhow!("Error init PMU: {:?}", e)))?;
2242 
2243         // Here we prepare the GICR_TYPER registers from the restored vCPU states.
2244         self.device_manager
2245             .lock()
2246             .unwrap()
2247             .get_interrupt_controller()
2248             .unwrap()
2249             .lock()
2250             .unwrap()
2251             .set_gicr_typers(&saved_vcpu_states);
2252 
2253         // Restore GIC states.
2254         if let Some(gicv3_its_snapshot) = vm_snapshot.snapshots.get(GIC_V3_ITS_SNAPSHOT_ID) {
2255             self.device_manager
2256                 .lock()
2257                 .unwrap()
2258                 .get_interrupt_controller()
2259                 .unwrap()
2260                 .lock()
2261                 .unwrap()
2262                 .restore(*gicv3_its_snapshot.clone())?;
2263         } else {
2264             return Err(MigratableError::Restore(anyhow!(
2265                 "Missing GicV3Its snapshot"
2266             )));
2267         }
2268 
2269         // Activate gic device
2270         self.device_manager
2271             .lock()
2272             .unwrap()
2273             .get_interrupt_controller()
2274             .unwrap()
2275             .lock()
2276             .unwrap()
2277             .enable()
2278             .map_err(|e| {
2279                 MigratableError::Restore(anyhow!(
2280                     "Could not enable interrupt controller routing: {:#?}",
2281                     e
2282                 ))
2283             })?;
2284 
2285         Ok(())
2286     }
2287 
2288     /// Gets the actual size of the balloon.
2289     pub fn balloon_size(&self) -> u64 {
2290         self.device_manager.lock().unwrap().balloon_size()
2291     }
2292 
2293     pub fn receive_memory_regions<F>(
2294         &mut self,
2295         ranges: &MemoryRangeTable,
2296         fd: &mut F,
2297     ) -> std::result::Result<(), MigratableError>
2298     where
2299         F: Read,
2300     {
2301         let guest_memory = self.memory_manager.lock().as_ref().unwrap().guest_memory();
2302         let mem = guest_memory.memory();
2303 
2304         for range in ranges.regions() {
2305             let mut offset: u64 = 0;
2306             // Here we are manually handling the retry in case we can't the
2307             // whole region at once because we can't use the implementation
2308             // from vm-memory::GuestMemory of read_exact_from() as it is not
2309             // following the correct behavior. For more info about this issue
2310             // see: https://github.com/rust-vmm/vm-memory/issues/174
2311             loop {
2312                 let bytes_read = mem
2313                     .read_from(
2314                         GuestAddress(range.gpa + offset),
2315                         fd,
2316                         (range.length - offset) as usize,
2317                     )
2318                     .map_err(|e| {
2319                         MigratableError::MigrateReceive(anyhow!(
2320                             "Error receiving memory from socket: {}",
2321                             e
2322                         ))
2323                     })?;
2324                 offset += bytes_read as u64;
2325 
2326                 if offset == range.length {
2327                     break;
2328                 }
2329             }
2330         }
2331 
2332         Ok(())
2333     }
2334 
2335     pub fn send_memory_fds(
2336         &mut self,
2337         socket: &mut UnixStream,
2338     ) -> std::result::Result<(), MigratableError> {
2339         for (slot, fd) in self
2340             .memory_manager
2341             .lock()
2342             .unwrap()
2343             .memory_slot_fds()
2344             .drain()
2345         {
2346             Request::memory_fd(std::mem::size_of_val(&slot) as u64)
2347                 .write_to(socket)
2348                 .map_err(|e| {
2349                     MigratableError::MigrateSend(anyhow!("Error sending memory fd request: {}", e))
2350                 })?;
2351             socket
2352                 .send_with_fd(&slot.to_le_bytes()[..], fd)
2353                 .map_err(|e| {
2354                     MigratableError::MigrateSend(anyhow!("Error sending memory fd: {}", e))
2355                 })?;
2356 
2357             let res = Response::read_from(socket)?;
2358             if res.status() != Status::Ok {
2359                 warn!("Error during memory fd migration");
2360                 Request::abandon().write_to(socket)?;
2361                 Response::read_from(socket).ok();
2362                 return Err(MigratableError::MigrateSend(anyhow!(
2363                     "Error during memory fd migration"
2364                 )));
2365             }
2366         }
2367 
2368         Ok(())
2369     }
2370 
2371     pub fn send_memory_regions<F>(
2372         &mut self,
2373         ranges: &MemoryRangeTable,
2374         fd: &mut F,
2375     ) -> std::result::Result<(), MigratableError>
2376     where
2377         F: Write,
2378     {
2379         let guest_memory = self.memory_manager.lock().as_ref().unwrap().guest_memory();
2380         let mem = guest_memory.memory();
2381 
2382         for range in ranges.regions() {
2383             let mut offset: u64 = 0;
2384             // Here we are manually handling the retry in case we can't the
2385             // whole region at once because we can't use the implementation
2386             // from vm-memory::GuestMemory of write_all_to() as it is not
2387             // following the correct behavior. For more info about this issue
2388             // see: https://github.com/rust-vmm/vm-memory/issues/174
2389             loop {
2390                 let bytes_written = mem
2391                     .write_to(
2392                         GuestAddress(range.gpa + offset),
2393                         fd,
2394                         (range.length - offset) as usize,
2395                     )
2396                     .map_err(|e| {
2397                         MigratableError::MigrateSend(anyhow!(
2398                             "Error transferring memory to socket: {}",
2399                             e
2400                         ))
2401                     })?;
2402                 offset += bytes_written as u64;
2403 
2404                 if offset == range.length {
2405                     break;
2406                 }
2407             }
2408         }
2409 
2410         Ok(())
2411     }
2412 
2413     pub fn memory_range_table(&self) -> std::result::Result<MemoryRangeTable, MigratableError> {
2414         self.memory_manager
2415             .lock()
2416             .unwrap()
2417             .memory_range_table(false)
2418     }
2419 
2420     pub fn device_tree(&self) -> Arc<Mutex<DeviceTree>> {
2421         self.device_manager.lock().unwrap().device_tree()
2422     }
2423 
2424     pub fn activate_virtio_devices(&self) -> Result<()> {
2425         self.device_manager
2426             .lock()
2427             .unwrap()
2428             .activate_virtio_devices()
2429             .map_err(Error::ActivateVirtioDevices)
2430     }
2431 
2432     #[cfg(target_arch = "x86_64")]
2433     pub fn power_button(&self) -> Result<()> {
2434         return self
2435             .device_manager
2436             .lock()
2437             .unwrap()
2438             .notify_power_button()
2439             .map_err(Error::PowerButton);
2440     }
2441 
2442     #[cfg(target_arch = "aarch64")]
2443     pub fn power_button(&self) -> Result<()> {
2444         self.device_manager
2445             .lock()
2446             .unwrap()
2447             .notify_power_button()
2448             .map_err(Error::PowerButton)
2449     }
2450 
2451     pub fn memory_manager_data(&self) -> MemoryManagerSnapshotData {
2452         self.memory_manager.lock().unwrap().snapshot_data()
2453     }
2454 
2455     #[cfg(all(target_arch = "x86_64", feature = "gdb"))]
2456     pub fn debug_request(
2457         &mut self,
2458         gdb_request: &GdbRequestPayload,
2459         cpu_id: usize,
2460     ) -> Result<GdbResponsePayload> {
2461         use GdbRequestPayload::*;
2462         match gdb_request {
2463             SetSingleStep(single_step) => {
2464                 self.set_guest_debug(cpu_id, &[], *single_step)
2465                     .map_err(Error::Debug)?;
2466             }
2467             SetHwBreakPoint(addrs) => {
2468                 self.set_guest_debug(cpu_id, addrs, false)
2469                     .map_err(Error::Debug)?;
2470             }
2471             Pause => {
2472                 self.debug_pause().map_err(Error::Debug)?;
2473             }
2474             Resume => {
2475                 self.debug_resume().map_err(Error::Debug)?;
2476             }
2477             ReadRegs => {
2478                 let regs = self.read_regs(cpu_id).map_err(Error::Debug)?;
2479                 return Ok(GdbResponsePayload::RegValues(Box::new(regs)));
2480             }
2481             WriteRegs(regs) => {
2482                 self.write_regs(cpu_id, regs).map_err(Error::Debug)?;
2483             }
2484             ReadMem(vaddr, len) => {
2485                 let mem = self.read_mem(cpu_id, *vaddr, *len).map_err(Error::Debug)?;
2486                 return Ok(GdbResponsePayload::MemoryRegion(mem));
2487             }
2488             WriteMem(vaddr, data) => {
2489                 self.write_mem(cpu_id, vaddr, data).map_err(Error::Debug)?;
2490             }
2491             ActiveVcpus => {
2492                 let active_vcpus = self.active_vcpus();
2493                 return Ok(GdbResponsePayload::ActiveVcpus(active_vcpus));
2494             }
2495         }
2496         Ok(GdbResponsePayload::CommandComplete)
2497     }
2498 
2499     #[cfg(feature = "guest_debug")]
2500     fn get_dump_state(
2501         &mut self,
2502         destination_url: &str,
2503     ) -> std::result::Result<DumpState, GuestDebuggableError> {
2504         let nr_cpus = self.config.lock().unwrap().cpus.boot_vcpus as u32;
2505         let elf_note_size = self.get_note_size(NoteDescType::ElfAndVmm, nr_cpus) as isize;
2506         let mut elf_phdr_num = 1;
2507         let elf_sh_info = 0;
2508         let coredump_file_path = url_to_file(destination_url)?;
2509         let mapping_num = self.memory_manager.lock().unwrap().num_guest_ram_mappings();
2510 
2511         if mapping_num < UINT16_MAX - 2 {
2512             elf_phdr_num += mapping_num as u16;
2513         } else {
2514             panic!("mapping num beyond 65535 not supported");
2515         }
2516         let coredump_file = OpenOptions::new()
2517             .read(true)
2518             .write(true)
2519             .create_new(true)
2520             .open(coredump_file_path)
2521             .map_err(|e| GuestDebuggableError::Coredump(e.into()))?;
2522 
2523         let mem_offset = self.coredump_get_mem_offset(elf_phdr_num, elf_note_size);
2524         let mem_data = self
2525             .memory_manager
2526             .lock()
2527             .unwrap()
2528             .coredump_memory_regions(mem_offset);
2529 
2530         Ok(DumpState {
2531             elf_note_size,
2532             elf_phdr_num,
2533             elf_sh_info,
2534             mem_offset,
2535             mem_info: Some(mem_data),
2536             file: Some(coredump_file),
2537         })
2538     }
2539 
2540     #[cfg(feature = "guest_debug")]
2541     fn coredump_get_mem_offset(&self, phdr_num: u16, note_size: isize) -> u64 {
2542         size_of::<elf::Elf64_Ehdr>() as u64
2543             + note_size as u64
2544             + size_of::<elf::Elf64_Phdr>() as u64 * phdr_num as u64
2545     }
2546 }
2547 
2548 impl Pausable for Vm {
2549     fn pause(&mut self) -> std::result::Result<(), MigratableError> {
2550         event!("vm", "pausing");
2551         let mut state = self
2552             .state
2553             .try_write()
2554             .map_err(|e| MigratableError::Pause(anyhow!("Could not get VM state: {}", e)))?;
2555         let new_state = VmState::Paused;
2556 
2557         state
2558             .valid_transition(new_state)
2559             .map_err(|e| MigratableError::Pause(anyhow!("Invalid transition: {:?}", e)))?;
2560 
2561         #[cfg(all(feature = "kvm", target_arch = "x86_64"))]
2562         {
2563             let mut clock = self
2564                 .vm
2565                 .get_clock()
2566                 .map_err(|e| MigratableError::Pause(anyhow!("Could not get VM clock: {}", e)))?;
2567             clock.reset_flags();
2568             self.saved_clock = Some(clock);
2569         }
2570 
2571         // Before pausing the vCPUs activate any pending virtio devices that might
2572         // need activation between starting the pause (or e.g. a migration it's part of)
2573         self.activate_virtio_devices().map_err(|e| {
2574             MigratableError::Pause(anyhow!("Error activating pending virtio devices: {:?}", e))
2575         })?;
2576 
2577         self.cpu_manager.lock().unwrap().pause()?;
2578         self.device_manager.lock().unwrap().pause()?;
2579 
2580         *state = new_state;
2581 
2582         event!("vm", "paused");
2583         Ok(())
2584     }
2585 
2586     fn resume(&mut self) -> std::result::Result<(), MigratableError> {
2587         event!("vm", "resuming");
2588         let mut state = self
2589             .state
2590             .try_write()
2591             .map_err(|e| MigratableError::Resume(anyhow!("Could not get VM state: {}", e)))?;
2592         let new_state = VmState::Running;
2593 
2594         state
2595             .valid_transition(new_state)
2596             .map_err(|e| MigratableError::Resume(anyhow!("Invalid transition: {:?}", e)))?;
2597 
2598         self.cpu_manager.lock().unwrap().resume()?;
2599         #[cfg(all(feature = "kvm", target_arch = "x86_64"))]
2600         {
2601             if let Some(clock) = &self.saved_clock {
2602                 self.vm.set_clock(clock).map_err(|e| {
2603                     MigratableError::Resume(anyhow!("Could not set VM clock: {}", e))
2604                 })?;
2605             }
2606         }
2607         self.device_manager.lock().unwrap().resume()?;
2608 
2609         // And we're back to the Running state.
2610         *state = new_state;
2611         event!("vm", "resumed");
2612         Ok(())
2613     }
2614 }
2615 
2616 #[derive(Serialize, Deserialize)]
2617 pub struct VmSnapshot {
2618     #[cfg(all(feature = "kvm", target_arch = "x86_64"))]
2619     pub clock: Option<hypervisor::ClockData>,
2620     #[cfg(all(feature = "kvm", target_arch = "x86_64"))]
2621     pub common_cpuid: Vec<hypervisor::arch::x86::CpuIdEntry>,
2622 }
2623 
2624 pub const VM_SNAPSHOT_ID: &str = "vm";
2625 impl Snapshottable for Vm {
2626     fn id(&self) -> String {
2627         VM_SNAPSHOT_ID.to_string()
2628     }
2629 
2630     fn snapshot(&mut self) -> std::result::Result<Snapshot, MigratableError> {
2631         event!("vm", "snapshotting");
2632 
2633         #[cfg(feature = "tdx")]
2634         {
2635             if self.config.lock().unwrap().tdx.is_some() {
2636                 return Err(MigratableError::Snapshot(anyhow!(
2637                     "Snapshot not possible with TDX VM"
2638                 )));
2639             }
2640         }
2641 
2642         let current_state = self.get_state().unwrap();
2643         if current_state != VmState::Paused {
2644             return Err(MigratableError::Snapshot(anyhow!(
2645                 "Trying to snapshot while VM is running"
2646             )));
2647         }
2648 
2649         #[cfg(all(feature = "kvm", target_arch = "x86_64"))]
2650         let common_cpuid = {
2651             #[cfg(feature = "tdx")]
2652             let tdx_enabled = self.config.lock().unwrap().tdx.is_some();
2653             let phys_bits = physical_bits(self.config.lock().unwrap().cpus.max_phys_bits);
2654             arch::generate_common_cpuid(
2655                 self.hypervisor.clone(),
2656                 None,
2657                 None,
2658                 phys_bits,
2659                 self.config.lock().unwrap().cpus.kvm_hyperv,
2660                 #[cfg(feature = "tdx")]
2661                 tdx_enabled,
2662             )
2663             .map_err(|e| {
2664                 MigratableError::MigrateReceive(anyhow!("Error generating common cpuid: {:?}", e))
2665             })?
2666         };
2667 
2668         let mut vm_snapshot = Snapshot::new(VM_SNAPSHOT_ID);
2669         let vm_snapshot_data = serde_json::to_vec(&VmSnapshot {
2670             #[cfg(all(feature = "kvm", target_arch = "x86_64"))]
2671             clock: self.saved_clock,
2672             #[cfg(all(feature = "kvm", target_arch = "x86_64"))]
2673             common_cpuid,
2674         })
2675         .map_err(|e| MigratableError::Snapshot(e.into()))?;
2676 
2677         vm_snapshot.add_snapshot(self.cpu_manager.lock().unwrap().snapshot()?);
2678         vm_snapshot.add_snapshot(self.memory_manager.lock().unwrap().snapshot()?);
2679 
2680         #[cfg(target_arch = "aarch64")]
2681         self.add_vgic_snapshot_section(&mut vm_snapshot)
2682             .map_err(|e| MigratableError::Snapshot(e.into()))?;
2683 
2684         vm_snapshot.add_snapshot(self.device_manager.lock().unwrap().snapshot()?);
2685         vm_snapshot.add_data_section(SnapshotDataSection {
2686             id: format!("{}-section", VM_SNAPSHOT_ID),
2687             snapshot: vm_snapshot_data,
2688         });
2689 
2690         event!("vm", "snapshotted");
2691         Ok(vm_snapshot)
2692     }
2693 
2694     fn restore(&mut self, snapshot: Snapshot) -> std::result::Result<(), MigratableError> {
2695         event!("vm", "restoring");
2696 
2697         let current_state = self
2698             .get_state()
2699             .map_err(|e| MigratableError::Restore(anyhow!("Could not get VM state: {:#?}", e)))?;
2700         let new_state = VmState::Paused;
2701         current_state.valid_transition(new_state).map_err(|e| {
2702             MigratableError::Restore(anyhow!("Could not restore VM state: {:#?}", e))
2703         })?;
2704 
2705         #[cfg(all(feature = "kvm", target_arch = "x86_64"))]
2706         self.load_clock_from_snapshot(&snapshot)
2707             .map_err(|e| MigratableError::Restore(anyhow!("Error restoring clock: {:?}", e)))?;
2708 
2709         if let Some(memory_manager_snapshot) = snapshot.snapshots.get(MEMORY_MANAGER_SNAPSHOT_ID) {
2710             self.memory_manager
2711                 .lock()
2712                 .unwrap()
2713                 .restore(*memory_manager_snapshot.clone())?;
2714         } else {
2715             return Err(MigratableError::Restore(anyhow!(
2716                 "Missing memory manager snapshot"
2717             )));
2718         }
2719 
2720         if let Some(device_manager_snapshot) = snapshot.snapshots.get(DEVICE_MANAGER_SNAPSHOT_ID) {
2721             self.device_manager
2722                 .lock()
2723                 .unwrap()
2724                 .restore(*device_manager_snapshot.clone())?;
2725         } else {
2726             return Err(MigratableError::Restore(anyhow!(
2727                 "Missing device manager snapshot"
2728             )));
2729         }
2730 
2731         if let Some(cpu_manager_snapshot) = snapshot.snapshots.get(CPU_MANAGER_SNAPSHOT_ID) {
2732             self.cpu_manager
2733                 .lock()
2734                 .unwrap()
2735                 .restore(*cpu_manager_snapshot.clone())?;
2736         } else {
2737             return Err(MigratableError::Restore(anyhow!(
2738                 "Missing CPU manager snapshot"
2739             )));
2740         }
2741 
2742         #[cfg(target_arch = "aarch64")]
2743         self.restore_vgic_and_enable_interrupt(&snapshot)?;
2744 
2745         if let Some(device_manager_snapshot) = snapshot.snapshots.get(DEVICE_MANAGER_SNAPSHOT_ID) {
2746             self.device_manager
2747                 .lock()
2748                 .unwrap()
2749                 .restore_devices(*device_manager_snapshot.clone())?;
2750         } else {
2751             return Err(MigratableError::Restore(anyhow!(
2752                 "Missing device manager snapshot"
2753             )));
2754         }
2755 
2756         // Now we can start all vCPUs from here.
2757         self.cpu_manager
2758             .lock()
2759             .unwrap()
2760             .start_restored_vcpus()
2761             .map_err(|e| {
2762                 MigratableError::Restore(anyhow!("Cannot start restored vCPUs: {:#?}", e))
2763             })?;
2764 
2765         self.setup_signal_handler().map_err(|e| {
2766             MigratableError::Restore(anyhow!("Could not setup signal handler: {:#?}", e))
2767         })?;
2768         self.setup_tty()
2769             .map_err(|e| MigratableError::Restore(anyhow!("Could not setup tty: {:#?}", e)))?;
2770 
2771         let mut state = self
2772             .state
2773             .try_write()
2774             .map_err(|e| MigratableError::Restore(anyhow!("Could not set VM state: {:#?}", e)))?;
2775         *state = new_state;
2776 
2777         event!("vm", "restored");
2778         Ok(())
2779     }
2780 }
2781 
2782 impl Transportable for Vm {
2783     fn send(
2784         &self,
2785         snapshot: &Snapshot,
2786         destination_url: &str,
2787     ) -> std::result::Result<(), MigratableError> {
2788         let mut snapshot_config_path = url_to_path(destination_url)?;
2789         snapshot_config_path.push(SNAPSHOT_CONFIG_FILE);
2790 
2791         // Create the snapshot config file
2792         let mut snapshot_config_file = OpenOptions::new()
2793             .read(true)
2794             .write(true)
2795             .create_new(true)
2796             .open(snapshot_config_path)
2797             .map_err(|e| MigratableError::MigrateSend(e.into()))?;
2798 
2799         // Serialize and write the snapshot config
2800         let vm_config = serde_json::to_string(self.config.lock().unwrap().deref())
2801             .map_err(|e| MigratableError::MigrateSend(e.into()))?;
2802 
2803         snapshot_config_file
2804             .write(vm_config.as_bytes())
2805             .map_err(|e| MigratableError::MigrateSend(e.into()))?;
2806 
2807         let mut snapshot_state_path = url_to_path(destination_url)?;
2808         snapshot_state_path.push(SNAPSHOT_STATE_FILE);
2809 
2810         // Create the snapshot state file
2811         let mut snapshot_state_file = OpenOptions::new()
2812             .read(true)
2813             .write(true)
2814             .create_new(true)
2815             .open(snapshot_state_path)
2816             .map_err(|e| MigratableError::MigrateSend(e.into()))?;
2817 
2818         // Serialize and write the snapshot state
2819         let vm_state =
2820             serde_json::to_vec(snapshot).map_err(|e| MigratableError::MigrateSend(e.into()))?;
2821 
2822         snapshot_state_file
2823             .write(&vm_state)
2824             .map_err(|e| MigratableError::MigrateSend(e.into()))?;
2825 
2826         // Tell the memory manager to also send/write its own snapshot.
2827         if let Some(memory_manager_snapshot) = snapshot.snapshots.get(MEMORY_MANAGER_SNAPSHOT_ID) {
2828             self.memory_manager
2829                 .lock()
2830                 .unwrap()
2831                 .send(&*memory_manager_snapshot.clone(), destination_url)?;
2832         } else {
2833             return Err(MigratableError::Restore(anyhow!(
2834                 "Missing memory manager snapshot"
2835             )));
2836         }
2837 
2838         Ok(())
2839     }
2840 }
2841 
2842 impl Migratable for Vm {
2843     fn start_dirty_log(&mut self) -> std::result::Result<(), MigratableError> {
2844         self.memory_manager.lock().unwrap().start_dirty_log()?;
2845         self.device_manager.lock().unwrap().start_dirty_log()
2846     }
2847 
2848     fn stop_dirty_log(&mut self) -> std::result::Result<(), MigratableError> {
2849         self.memory_manager.lock().unwrap().stop_dirty_log()?;
2850         self.device_manager.lock().unwrap().stop_dirty_log()
2851     }
2852 
2853     fn dirty_log(&mut self) -> std::result::Result<MemoryRangeTable, MigratableError> {
2854         Ok(MemoryRangeTable::new_from_tables(vec![
2855             self.memory_manager.lock().unwrap().dirty_log()?,
2856             self.device_manager.lock().unwrap().dirty_log()?,
2857         ]))
2858     }
2859 
2860     fn start_migration(&mut self) -> std::result::Result<(), MigratableError> {
2861         self.memory_manager.lock().unwrap().start_migration()?;
2862         self.device_manager.lock().unwrap().start_migration()
2863     }
2864 
2865     fn complete_migration(&mut self) -> std::result::Result<(), MigratableError> {
2866         self.memory_manager.lock().unwrap().complete_migration()?;
2867         self.device_manager.lock().unwrap().complete_migration()
2868     }
2869 }
2870 
2871 #[cfg(feature = "gdb")]
2872 impl Debuggable for Vm {
2873     fn set_guest_debug(
2874         &self,
2875         cpu_id: usize,
2876         addrs: &[GuestAddress],
2877         singlestep: bool,
2878     ) -> std::result::Result<(), DebuggableError> {
2879         self.cpu_manager
2880             .lock()
2881             .unwrap()
2882             .set_guest_debug(cpu_id, addrs, singlestep)
2883     }
2884 
2885     fn debug_pause(&mut self) -> std::result::Result<(), DebuggableError> {
2886         if *self.state.read().unwrap() == VmState::Running {
2887             self.pause().map_err(DebuggableError::Pause)?;
2888         }
2889 
2890         let mut state = self
2891             .state
2892             .try_write()
2893             .map_err(|_| DebuggableError::PoisonedState)?;
2894         *state = VmState::BreakPoint;
2895         Ok(())
2896     }
2897 
2898     fn debug_resume(&mut self) -> std::result::Result<(), DebuggableError> {
2899         if *self.state.read().unwrap() == VmState::BreakPoint {
2900             self.resume().map_err(DebuggableError::Pause)?;
2901         }
2902 
2903         Ok(())
2904     }
2905 
2906     fn read_regs(&self, cpu_id: usize) -> std::result::Result<X86_64CoreRegs, DebuggableError> {
2907         self.cpu_manager.lock().unwrap().read_regs(cpu_id)
2908     }
2909 
2910     fn write_regs(
2911         &self,
2912         cpu_id: usize,
2913         regs: &X86_64CoreRegs,
2914     ) -> std::result::Result<(), DebuggableError> {
2915         self.cpu_manager.lock().unwrap().write_regs(cpu_id, regs)
2916     }
2917 
2918     fn read_mem(
2919         &self,
2920         cpu_id: usize,
2921         vaddr: GuestAddress,
2922         len: usize,
2923     ) -> std::result::Result<Vec<u8>, DebuggableError> {
2924         self.cpu_manager
2925             .lock()
2926             .unwrap()
2927             .read_mem(cpu_id, vaddr, len)
2928     }
2929 
2930     fn write_mem(
2931         &self,
2932         cpu_id: usize,
2933         vaddr: &GuestAddress,
2934         data: &[u8],
2935     ) -> std::result::Result<(), DebuggableError> {
2936         self.cpu_manager
2937             .lock()
2938             .unwrap()
2939             .write_mem(cpu_id, vaddr, data)
2940     }
2941 
2942     fn active_vcpus(&self) -> usize {
2943         let active_vcpus = self.cpu_manager.lock().unwrap().active_vcpus();
2944         if active_vcpus > 0 {
2945             active_vcpus
2946         } else {
2947             // The VM is not booted yet. Report boot_vcpus() instead.
2948             self.cpu_manager.lock().unwrap().boot_vcpus() as usize
2949         }
2950     }
2951 }
2952 
2953 #[cfg(feature = "guest_debug")]
2954 pub const UINT16_MAX: u32 = 65535;
2955 
2956 #[cfg(feature = "guest_debug")]
2957 impl Elf64Writable for Vm {}
2958 
2959 #[cfg(feature = "guest_debug")]
2960 impl GuestDebuggable for Vm {
2961     fn coredump(&mut self, destination_url: &str) -> std::result::Result<(), GuestDebuggableError> {
2962         event!("vm", "coredumping");
2963 
2964         #[cfg(feature = "tdx")]
2965         {
2966             if self.config.lock().unwrap().tdx.is_some() {
2967                 return Err(GuestDebuggableError::Coredump(anyhow!(
2968                     "Coredump not possible with TDX VM"
2969                 )));
2970             }
2971         }
2972 
2973         let current_state = self.get_state().unwrap();
2974         if current_state != VmState::Paused {
2975             return Err(GuestDebuggableError::Coredump(anyhow!(
2976                 "Trying to coredump while VM is running"
2977             )));
2978         }
2979 
2980         let coredump_state = self.get_dump_state(destination_url)?;
2981 
2982         self.write_header(&coredump_state)?;
2983         self.write_note(&coredump_state)?;
2984         self.write_loads(&coredump_state)?;
2985 
2986         self.cpu_manager
2987             .lock()
2988             .unwrap()
2989             .cpu_write_elf64_note(&coredump_state)?;
2990         self.cpu_manager
2991             .lock()
2992             .unwrap()
2993             .cpu_write_vmm_note(&coredump_state)?;
2994 
2995         self.memory_manager
2996             .lock()
2997             .unwrap()
2998             .coredump_iterate_save_mem(&coredump_state)
2999     }
3000 }
3001 
3002 #[cfg(all(feature = "kvm", target_arch = "x86_64"))]
3003 #[cfg(test)]
3004 mod tests {
3005     use super::*;
3006 
3007     fn test_vm_state_transitions(state: VmState) {
3008         match state {
3009             VmState::Created => {
3010                 // Check the transitions from Created
3011                 assert!(state.valid_transition(VmState::Created).is_err());
3012                 assert!(state.valid_transition(VmState::Running).is_ok());
3013                 assert!(state.valid_transition(VmState::Shutdown).is_err());
3014                 assert!(state.valid_transition(VmState::Paused).is_ok());
3015                 assert!(state.valid_transition(VmState::BreakPoint).is_ok());
3016             }
3017             VmState::Running => {
3018                 // Check the transitions from Running
3019                 assert!(state.valid_transition(VmState::Created).is_err());
3020                 assert!(state.valid_transition(VmState::Running).is_err());
3021                 assert!(state.valid_transition(VmState::Shutdown).is_ok());
3022                 assert!(state.valid_transition(VmState::Paused).is_ok());
3023                 assert!(state.valid_transition(VmState::BreakPoint).is_ok());
3024             }
3025             VmState::Shutdown => {
3026                 // Check the transitions from Shutdown
3027                 assert!(state.valid_transition(VmState::Created).is_err());
3028                 assert!(state.valid_transition(VmState::Running).is_ok());
3029                 assert!(state.valid_transition(VmState::Shutdown).is_err());
3030                 assert!(state.valid_transition(VmState::Paused).is_err());
3031                 assert!(state.valid_transition(VmState::BreakPoint).is_err());
3032             }
3033             VmState::Paused => {
3034                 // Check the transitions from Paused
3035                 assert!(state.valid_transition(VmState::Created).is_err());
3036                 assert!(state.valid_transition(VmState::Running).is_ok());
3037                 assert!(state.valid_transition(VmState::Shutdown).is_ok());
3038                 assert!(state.valid_transition(VmState::Paused).is_err());
3039                 assert!(state.valid_transition(VmState::BreakPoint).is_err());
3040             }
3041             VmState::BreakPoint => {
3042                 // Check the transitions from Breakpoint
3043                 assert!(state.valid_transition(VmState::Created).is_ok());
3044                 assert!(state.valid_transition(VmState::Running).is_ok());
3045                 assert!(state.valid_transition(VmState::Shutdown).is_err());
3046                 assert!(state.valid_transition(VmState::Paused).is_err());
3047                 assert!(state.valid_transition(VmState::BreakPoint).is_err());
3048             }
3049         }
3050     }
3051 
3052     #[test]
3053     fn test_vm_created_transitions() {
3054         test_vm_state_transitions(VmState::Created);
3055     }
3056 
3057     #[test]
3058     fn test_vm_running_transitions() {
3059         test_vm_state_transitions(VmState::Running);
3060     }
3061 
3062     #[test]
3063     fn test_vm_shutdown_transitions() {
3064         test_vm_state_transitions(VmState::Shutdown);
3065     }
3066 
3067     #[test]
3068     fn test_vm_paused_transitions() {
3069         test_vm_state_transitions(VmState::Paused);
3070     }
3071 
3072     #[cfg(feature = "tdx")]
3073     #[test]
3074     fn test_hob_memory_resources() {
3075         // Case 1: Two TDVF sections in the middle of the RAM
3076         let sections = vec![
3077             TdvfSection {
3078                 address: 0xc000,
3079                 size: 0x1000,
3080                 ..Default::default()
3081             },
3082             TdvfSection {
3083                 address: 0x1000,
3084                 size: 0x4000,
3085                 ..Default::default()
3086             },
3087         ];
3088         let guest_ranges: Vec<(GuestAddress, usize)> = vec![(GuestAddress(0), 0x1000_0000)];
3089         let expected = vec![
3090             (0, 0x1000, true),
3091             (0x1000, 0x4000, false),
3092             (0x5000, 0x7000, true),
3093             (0xc000, 0x1000, false),
3094             (0xd000, 0x0fff_3000, true),
3095         ];
3096         assert_eq!(
3097             expected,
3098             Vm::hob_memory_resources(
3099                 sections,
3100                 &GuestMemoryMmap::from_ranges(&guest_ranges).unwrap()
3101             )
3102         );
3103 
3104         // Case 2: Two TDVF sections with no conflict with the RAM
3105         let sections = vec![
3106             TdvfSection {
3107                 address: 0x1000_1000,
3108                 size: 0x1000,
3109                 ..Default::default()
3110             },
3111             TdvfSection {
3112                 address: 0,
3113                 size: 0x1000,
3114                 ..Default::default()
3115             },
3116         ];
3117         let guest_ranges: Vec<(GuestAddress, usize)> = vec![(GuestAddress(0x1000), 0x1000_0000)];
3118         let expected = vec![
3119             (0, 0x1000, false),
3120             (0x1000, 0x1000_0000, true),
3121             (0x1000_1000, 0x1000, false),
3122         ];
3123         assert_eq!(
3124             expected,
3125             Vm::hob_memory_resources(
3126                 sections,
3127                 &GuestMemoryMmap::from_ranges(&guest_ranges).unwrap()
3128             )
3129         );
3130 
3131         // Case 3: Two TDVF sections with partial conflicts with the RAM
3132         let sections = vec![
3133             TdvfSection {
3134                 address: 0x1000_0000,
3135                 size: 0x2000,
3136                 ..Default::default()
3137             },
3138             TdvfSection {
3139                 address: 0,
3140                 size: 0x2000,
3141                 ..Default::default()
3142             },
3143         ];
3144         let guest_ranges: Vec<(GuestAddress, usize)> = vec![(GuestAddress(0x1000), 0x1000_0000)];
3145         let expected = vec![
3146             (0, 0x2000, false),
3147             (0x2000, 0x0fff_e000, true),
3148             (0x1000_0000, 0x2000, false),
3149         ];
3150         assert_eq!(
3151             expected,
3152             Vm::hob_memory_resources(
3153                 sections,
3154                 &GuestMemoryMmap::from_ranges(&guest_ranges).unwrap()
3155             )
3156         );
3157 
3158         // Case 4: Two TDVF sections with no conflict before the RAM and two
3159         // more additional sections with no conflict after the RAM.
3160         let sections = vec![
3161             TdvfSection {
3162                 address: 0x2000_1000,
3163                 size: 0x1000,
3164                 ..Default::default()
3165             },
3166             TdvfSection {
3167                 address: 0x2000_0000,
3168                 size: 0x1000,
3169                 ..Default::default()
3170             },
3171             TdvfSection {
3172                 address: 0x1000,
3173                 size: 0x1000,
3174                 ..Default::default()
3175             },
3176             TdvfSection {
3177                 address: 0,
3178                 size: 0x1000,
3179                 ..Default::default()
3180             },
3181         ];
3182         let guest_ranges: Vec<(GuestAddress, usize)> = vec![(GuestAddress(0x4000), 0x1000_0000)];
3183         let expected = vec![
3184             (0, 0x1000, false),
3185             (0x1000, 0x1000, false),
3186             (0x4000, 0x1000_0000, true),
3187             (0x2000_0000, 0x1000, false),
3188             (0x2000_1000, 0x1000, false),
3189         ];
3190         assert_eq!(
3191             expected,
3192             Vm::hob_memory_resources(
3193                 sections,
3194                 &GuestMemoryMmap::from_ranges(&guest_ranges).unwrap()
3195             )
3196         );
3197 
3198         // Case 5: One TDVF section overriding the entire RAM
3199         let sections = vec![TdvfSection {
3200             address: 0,
3201             size: 0x2000_0000,
3202             ..Default::default()
3203         }];
3204         let guest_ranges: Vec<(GuestAddress, usize)> = vec![(GuestAddress(0x1000), 0x1000_0000)];
3205         let expected = vec![(0, 0x2000_0000, false)];
3206         assert_eq!(
3207             expected,
3208             Vm::hob_memory_resources(
3209                 sections,
3210                 &GuestMemoryMmap::from_ranges(&guest_ranges).unwrap()
3211             )
3212         );
3213 
3214         // Case 6: Two TDVF sections with no conflict with 2 RAM regions
3215         let sections = vec![
3216             TdvfSection {
3217                 address: 0x1000_2000,
3218                 size: 0x2000,
3219                 ..Default::default()
3220             },
3221             TdvfSection {
3222                 address: 0,
3223                 size: 0x2000,
3224                 ..Default::default()
3225             },
3226         ];
3227         let guest_ranges: Vec<(GuestAddress, usize)> = vec![
3228             (GuestAddress(0x2000), 0x1000_0000),
3229             (GuestAddress(0x1000_4000), 0x1000_0000),
3230         ];
3231         let expected = vec![
3232             (0, 0x2000, false),
3233             (0x2000, 0x1000_0000, true),
3234             (0x1000_2000, 0x2000, false),
3235             (0x1000_4000, 0x1000_0000, true),
3236         ];
3237         assert_eq!(
3238             expected,
3239             Vm::hob_memory_resources(
3240                 sections,
3241                 &GuestMemoryMmap::from_ranges(&guest_ranges).unwrap()
3242             )
3243         );
3244 
3245         // Case 7: Two TDVF sections with partial conflicts with 2 RAM regions
3246         let sections = vec![
3247             TdvfSection {
3248                 address: 0x1000_0000,
3249                 size: 0x4000,
3250                 ..Default::default()
3251             },
3252             TdvfSection {
3253                 address: 0,
3254                 size: 0x4000,
3255                 ..Default::default()
3256             },
3257         ];
3258         let guest_ranges: Vec<(GuestAddress, usize)> = vec![
3259             (GuestAddress(0x1000), 0x1000_0000),
3260             (GuestAddress(0x1000_3000), 0x1000_0000),
3261         ];
3262         let expected = vec![
3263             (0, 0x4000, false),
3264             (0x4000, 0x0fff_c000, true),
3265             (0x1000_0000, 0x4000, false),
3266             (0x1000_4000, 0x0fff_f000, true),
3267         ];
3268         assert_eq!(
3269             expected,
3270             Vm::hob_memory_resources(
3271                 sections,
3272                 &GuestMemoryMmap::from_ranges(&guest_ranges).unwrap()
3273             )
3274         );
3275     }
3276 }
3277 
3278 #[cfg(target_arch = "aarch64")]
3279 #[cfg(test)]
3280 mod tests {
3281     use super::*;
3282     use crate::GuestMemoryMmap;
3283     use arch::aarch64::fdt::create_fdt;
3284     use arch::aarch64::layout;
3285     use arch::{DeviceType, MmioDeviceInfo};
3286 
3287     const LEN: u64 = 4096;
3288 
3289     #[test]
3290     fn test_create_fdt_with_devices() {
3291         let regions = vec![(layout::RAM_START, (layout::FDT_MAX_SIZE + 0x1000) as usize)];
3292         let mem = GuestMemoryMmap::from_ranges(&regions).expect("Cannot initialize memory");
3293 
3294         let dev_info: HashMap<(DeviceType, std::string::String), MmioDeviceInfo> = [
3295             (
3296                 (DeviceType::Serial, DeviceType::Serial.to_string()),
3297                 MmioDeviceInfo {
3298                     addr: 0x00,
3299                     len: LEN,
3300                     irq: 33,
3301                 },
3302             ),
3303             (
3304                 (DeviceType::Virtio(1), "virtio".to_string()),
3305                 MmioDeviceInfo {
3306                     addr: LEN,
3307                     len: LEN,
3308                     irq: 34,
3309                 },
3310             ),
3311             (
3312                 (DeviceType::Rtc, "rtc".to_string()),
3313                 MmioDeviceInfo {
3314                     addr: 2 * LEN,
3315                     len: LEN,
3316                     irq: 35,
3317                 },
3318             ),
3319         ]
3320         .iter()
3321         .cloned()
3322         .collect();
3323 
3324         let hv = hypervisor::new().unwrap();
3325         let vm = hv.create_vm().unwrap();
3326         let gic = vm
3327             .create_vgic(
3328                 1,
3329                 0x0900_0000 - 0x01_0000,
3330                 0x01_0000,
3331                 0x02_0000,
3332                 0x02_0000,
3333                 256,
3334             )
3335             .expect("Cannot create gic");
3336         assert!(create_fdt(
3337             &mem,
3338             "console=tty0",
3339             vec![0],
3340             Some((0, 0, 0)),
3341             &dev_info,
3342             &gic,
3343             &None,
3344             &Vec::new(),
3345             &BTreeMap::new(),
3346             None,
3347             true,
3348         )
3349         .is_ok())
3350     }
3351 }
3352 
3353 #[cfg(all(feature = "kvm", target_arch = "x86_64"))]
3354 #[test]
3355 pub fn test_vm() {
3356     use hypervisor::VmExit;
3357     use vm_memory::{Address, GuestMemory, GuestMemoryRegion};
3358     // This example based on https://lwn.net/Articles/658511/
3359     let code = [
3360         0xba, 0xf8, 0x03, /* mov $0x3f8, %dx */
3361         0x00, 0xd8, /* add %bl, %al */
3362         0x04, b'0', /* add $'0', %al */
3363         0xee, /* out %al, (%dx) */
3364         0xb0, b'\n', /* mov $'\n', %al */
3365         0xee,  /* out %al, (%dx) */
3366         0xf4,  /* hlt */
3367     ];
3368 
3369     let mem_size = 0x1000;
3370     let load_addr = GuestAddress(0x1000);
3371     let mem = GuestMemoryMmap::from_ranges(&[(load_addr, mem_size)]).unwrap();
3372 
3373     let hv = hypervisor::new().unwrap();
3374     let vm = hv.create_vm().expect("new VM creation failed");
3375 
3376     for (index, region) in mem.iter().enumerate() {
3377         let mem_region = vm.make_user_memory_region(
3378             index as u32,
3379             region.start_addr().raw_value(),
3380             region.len() as u64,
3381             region.as_ptr() as u64,
3382             false,
3383             false,
3384         );
3385 
3386         vm.create_user_memory_region(mem_region)
3387             .expect("Cannot configure guest memory");
3388     }
3389     mem.write_slice(&code, load_addr)
3390         .expect("Writing code to memory failed");
3391 
3392     let vcpu = vm.create_vcpu(0, None).expect("new Vcpu failed");
3393 
3394     let mut vcpu_sregs = vcpu.get_sregs().expect("get sregs failed");
3395     vcpu_sregs.cs.base = 0;
3396     vcpu_sregs.cs.selector = 0;
3397     vcpu.set_sregs(&vcpu_sregs).expect("set sregs failed");
3398 
3399     let mut vcpu_regs = vcpu.get_regs().expect("get regs failed");
3400     vcpu_regs.rip = 0x1000;
3401     vcpu_regs.rax = 2;
3402     vcpu_regs.rbx = 3;
3403     vcpu_regs.rflags = 2;
3404     vcpu.set_regs(&vcpu_regs).expect("set regs failed");
3405 
3406     loop {
3407         match vcpu.run().expect("run failed") {
3408             VmExit::IoOut(addr, data) => {
3409                 println!(
3410                     "IO out -- addr: {:#x} data [{:?}]",
3411                     addr,
3412                     str::from_utf8(data).unwrap()
3413                 );
3414             }
3415             VmExit::Reset => {
3416                 println!("HLT");
3417                 break;
3418             }
3419             r => panic!("unexpected exit reason: {:?}", r),
3420         }
3421     }
3422 }
3423