xref: /cloud-hypervisor/hypervisor/src/kvm/mod.rs (revision 3ce0fef7fd546467398c914dbc74d8542e45cf6f)
1 // Copyright © 2019 Intel Corporation
2 //
3 // SPDX-License-Identifier: Apache-2.0 OR BSD-3-Clause
4 //
5 // Copyright © 2020, Microsoft Corporation
6 //
7 // Copyright 2018-2019 CrowdStrike, Inc.
8 //
9 //
10 
11 #[cfg(target_arch = "aarch64")]
12 use crate::aarch64::gic::KvmGicV3Its;
13 #[cfg(target_arch = "aarch64")]
14 pub use crate::aarch64::{
15     check_required_kvm_extensions, gic::Gicv3ItsState as GicState, is_system_register, VcpuInit,
16     VcpuKvmState,
17 };
18 #[cfg(target_arch = "aarch64")]
19 use crate::arch::aarch64::gic::{Vgic, VgicConfig};
20 use crate::cpu;
21 use crate::hypervisor;
22 use crate::vec_with_array_field;
23 use crate::vm::{self, InterruptSourceConfig, VmOps};
24 use crate::HypervisorType;
25 #[cfg(target_arch = "aarch64")]
26 use crate::{arm64_core_reg_id, offset_of};
27 use kvm_ioctls::{NoDatamatch, VcpuFd, VmFd};
28 use std::any::Any;
29 use std::collections::HashMap;
30 #[cfg(target_arch = "aarch64")]
31 use std::convert::TryInto;
32 #[cfg(target_arch = "x86_64")]
33 use std::fs::File;
34 #[cfg(target_arch = "x86_64")]
35 use std::os::unix::io::AsRawFd;
36 #[cfg(feature = "tdx")]
37 use std::os::unix::io::RawFd;
38 use std::result;
39 #[cfg(target_arch = "x86_64")]
40 use std::sync::atomic::{AtomicBool, Ordering};
41 #[cfg(target_arch = "aarch64")]
42 use std::sync::Mutex;
43 use std::sync::{Arc, RwLock};
44 use vmm_sys_util::eventfd::EventFd;
45 // x86_64 dependencies
46 #[cfg(target_arch = "x86_64")]
47 pub mod x86_64;
48 #[cfg(target_arch = "x86_64")]
49 use crate::arch::x86::{
50     CpuIdEntry, FpuState, LapicState, MsrEntry, SpecialRegisters, StandardRegisters, XsaveState,
51     NUM_IOAPIC_PINS,
52 };
53 #[cfg(target_arch = "x86_64")]
54 use crate::ClockData;
55 use crate::{
56     CpuState, IoEventAddress, IrqRoutingEntry, MpState, UserMemoryRegion,
57     USER_MEMORY_REGION_LOG_DIRTY, USER_MEMORY_REGION_READ, USER_MEMORY_REGION_WRITE,
58 };
59 #[cfg(target_arch = "aarch64")]
60 use aarch64::{RegList, Register, StandardRegisters};
61 #[cfg(target_arch = "x86_64")]
62 use kvm_bindings::{
63     kvm_enable_cap, kvm_msr_entry, MsrList, KVM_CAP_HYPERV_SYNIC, KVM_CAP_SPLIT_IRQCHIP,
64     KVM_GUESTDBG_USE_HW_BP,
65 };
66 #[cfg(target_arch = "x86_64")]
67 use x86_64::check_required_kvm_extensions;
68 #[cfg(target_arch = "x86_64")]
69 pub use x86_64::{CpuId, ExtendedControlRegisters, MsrEntries, VcpuKvmState};
70 // aarch64 dependencies
71 #[cfg(target_arch = "aarch64")]
72 pub mod aarch64;
73 pub use kvm_bindings;
74 pub use kvm_bindings::{
75     kvm_clock_data, kvm_create_device, kvm_device_type_KVM_DEV_TYPE_VFIO, kvm_guest_debug,
76     kvm_irq_routing, kvm_irq_routing_entry, kvm_mp_state, kvm_userspace_memory_region,
77     KVM_GUESTDBG_ENABLE, KVM_GUESTDBG_SINGLESTEP, KVM_IRQ_ROUTING_IRQCHIP, KVM_IRQ_ROUTING_MSI,
78     KVM_MEM_LOG_DIRTY_PAGES, KVM_MEM_READONLY, KVM_MSI_VALID_DEVID,
79 };
80 #[cfg(target_arch = "aarch64")]
81 use kvm_bindings::{
82     kvm_regs, user_fpsimd_state, user_pt_regs, KVM_GUESTDBG_USE_HW, KVM_NR_SPSR, KVM_REG_ARM64,
83     KVM_REG_ARM64_SYSREG, KVM_REG_ARM64_SYSREG_CRM_MASK, KVM_REG_ARM64_SYSREG_CRN_MASK,
84     KVM_REG_ARM64_SYSREG_OP0_MASK, KVM_REG_ARM64_SYSREG_OP1_MASK, KVM_REG_ARM64_SYSREG_OP2_MASK,
85     KVM_REG_ARM_CORE, KVM_REG_SIZE_U128, KVM_REG_SIZE_U32, KVM_REG_SIZE_U64,
86 };
87 #[cfg(feature = "tdx")]
88 use kvm_bindings::{kvm_run__bindgen_ty_1, KVMIO};
89 pub use kvm_ioctls;
90 pub use kvm_ioctls::{Cap, Kvm};
91 #[cfg(target_arch = "aarch64")]
92 use std::mem;
93 use thiserror::Error;
94 use vfio_ioctls::VfioDeviceFd;
95 #[cfg(feature = "tdx")]
96 use vmm_sys_util::{ioctl::ioctl_with_val, ioctl_ioc_nr, ioctl_iowr_nr};
97 ///
98 /// Export generically-named wrappers of kvm-bindings for Unix-based platforms
99 ///
100 pub use {
101     kvm_bindings::kvm_create_device as CreateDevice, kvm_bindings::kvm_device_attr as DeviceAttr,
102     kvm_bindings::kvm_run, kvm_bindings::kvm_vcpu_events as VcpuEvents, kvm_ioctls::VcpuExit,
103 };
104 
105 #[cfg(target_arch = "x86_64")]
106 const KVM_CAP_SGX_ATTRIBUTE: u32 = 196;
107 
108 #[cfg(feature = "tdx")]
109 const KVM_EXIT_TDX: u32 = 50;
110 #[cfg(feature = "tdx")]
111 const TDG_VP_VMCALL_GET_QUOTE: u64 = 0x10002;
112 #[cfg(feature = "tdx")]
113 const TDG_VP_VMCALL_SETUP_EVENT_NOTIFY_INTERRUPT: u64 = 0x10004;
114 #[cfg(feature = "tdx")]
115 const TDG_VP_VMCALL_SUCCESS: u64 = 0;
116 #[cfg(feature = "tdx")]
117 const TDG_VP_VMCALL_INVALID_OPERAND: u64 = 0x8000000000000000;
118 
119 #[cfg(feature = "tdx")]
120 ioctl_iowr_nr!(KVM_MEMORY_ENCRYPT_OP, KVMIO, 0xba, std::os::raw::c_ulong);
121 
122 #[cfg(feature = "tdx")]
123 #[repr(u32)]
124 enum TdxCommand {
125     Capabilities = 0,
126     InitVm,
127     InitVcpu,
128     InitMemRegion,
129     Finalize,
130 }
131 
132 #[cfg(feature = "tdx")]
133 pub enum TdxExitDetails {
134     GetQuote,
135     SetupEventNotifyInterrupt,
136 }
137 
138 #[cfg(feature = "tdx")]
139 pub enum TdxExitStatus {
140     Success,
141     InvalidOperand,
142 }
143 
144 #[cfg(feature = "tdx")]
145 const TDX_MAX_NR_CPUID_CONFIGS: usize = 6;
146 
147 #[cfg(feature = "tdx")]
148 #[repr(C)]
149 #[derive(Debug, Default)]
150 pub struct TdxCpuidConfig {
151     pub leaf: u32,
152     pub sub_leaf: u32,
153     pub eax: u32,
154     pub ebx: u32,
155     pub ecx: u32,
156     pub edx: u32,
157 }
158 
159 #[cfg(feature = "tdx")]
160 #[repr(C)]
161 #[derive(Debug, Default)]
162 pub struct TdxCapabilities {
163     pub attrs_fixed0: u64,
164     pub attrs_fixed1: u64,
165     pub xfam_fixed0: u64,
166     pub xfam_fixed1: u64,
167     pub nr_cpuid_configs: u32,
168     pub padding: u32,
169     pub cpuid_configs: [TdxCpuidConfig; TDX_MAX_NR_CPUID_CONFIGS],
170 }
171 
172 #[cfg(feature = "tdx")]
173 #[derive(Copy, Clone)]
174 pub struct KvmTdxExit {
175     pub type_: u32,
176     pub pad: u32,
177     pub u: KvmTdxExitU,
178 }
179 
180 #[cfg(feature = "tdx")]
181 #[repr(C)]
182 #[derive(Copy, Clone)]
183 pub union KvmTdxExitU {
184     pub vmcall: KvmTdxExitVmcall,
185 }
186 
187 #[cfg(feature = "tdx")]
188 #[repr(C)]
189 #[derive(Debug, Default, Copy, Clone, PartialEq)]
190 pub struct KvmTdxExitVmcall {
191     pub type_: u64,
192     pub subfunction: u64,
193     pub reg_mask: u64,
194     pub in_r12: u64,
195     pub in_r13: u64,
196     pub in_r14: u64,
197     pub in_r15: u64,
198     pub in_rbx: u64,
199     pub in_rdi: u64,
200     pub in_rsi: u64,
201     pub in_r8: u64,
202     pub in_r9: u64,
203     pub in_rdx: u64,
204     pub status_code: u64,
205     pub out_r11: u64,
206     pub out_r12: u64,
207     pub out_r13: u64,
208     pub out_r14: u64,
209     pub out_r15: u64,
210     pub out_rbx: u64,
211     pub out_rdi: u64,
212     pub out_rsi: u64,
213     pub out_r8: u64,
214     pub out_r9: u64,
215     pub out_rdx: u64,
216 }
217 
218 impl From<kvm_userspace_memory_region> for UserMemoryRegion {
219     fn from(region: kvm_userspace_memory_region) -> Self {
220         let mut flags = USER_MEMORY_REGION_READ;
221         if region.flags & KVM_MEM_READONLY == 0 {
222             flags |= USER_MEMORY_REGION_WRITE;
223         }
224         if region.flags & KVM_MEM_LOG_DIRTY_PAGES != 0 {
225             flags |= USER_MEMORY_REGION_LOG_DIRTY;
226         }
227 
228         UserMemoryRegion {
229             slot: region.slot,
230             guest_phys_addr: region.guest_phys_addr,
231             memory_size: region.memory_size,
232             userspace_addr: region.userspace_addr,
233             flags,
234         }
235     }
236 }
237 
238 impl From<UserMemoryRegion> for kvm_userspace_memory_region {
239     fn from(region: UserMemoryRegion) -> Self {
240         assert!(
241             region.flags & USER_MEMORY_REGION_READ != 0,
242             "KVM mapped memory is always readable"
243         );
244 
245         let mut flags = 0;
246         if region.flags & USER_MEMORY_REGION_WRITE == 0 {
247             flags |= KVM_MEM_READONLY;
248         }
249         if region.flags & USER_MEMORY_REGION_LOG_DIRTY != 0 {
250             flags |= KVM_MEM_LOG_DIRTY_PAGES;
251         }
252 
253         kvm_userspace_memory_region {
254             slot: region.slot,
255             guest_phys_addr: region.guest_phys_addr,
256             memory_size: region.memory_size,
257             userspace_addr: region.userspace_addr,
258             flags,
259         }
260     }
261 }
262 
263 impl From<kvm_mp_state> for MpState {
264     fn from(s: kvm_mp_state) -> Self {
265         MpState::Kvm(s)
266     }
267 }
268 
269 impl From<MpState> for kvm_mp_state {
270     fn from(ms: MpState) -> Self {
271         match ms {
272             MpState::Kvm(s) => s,
273             /* Needed in case other hypervisors are enabled */
274             #[allow(unreachable_patterns)]
275             _ => panic!("CpuState is not valid"),
276         }
277     }
278 }
279 
280 impl From<kvm_ioctls::IoEventAddress> for IoEventAddress {
281     fn from(a: kvm_ioctls::IoEventAddress) -> Self {
282         match a {
283             kvm_ioctls::IoEventAddress::Pio(x) => Self::Pio(x),
284             kvm_ioctls::IoEventAddress::Mmio(x) => Self::Mmio(x),
285         }
286     }
287 }
288 
289 impl From<IoEventAddress> for kvm_ioctls::IoEventAddress {
290     fn from(a: IoEventAddress) -> Self {
291         match a {
292             IoEventAddress::Pio(x) => Self::Pio(x),
293             IoEventAddress::Mmio(x) => Self::Mmio(x),
294         }
295     }
296 }
297 
298 impl From<VcpuKvmState> for CpuState {
299     fn from(s: VcpuKvmState) -> Self {
300         CpuState::Kvm(s)
301     }
302 }
303 
304 impl From<CpuState> for VcpuKvmState {
305     fn from(s: CpuState) -> Self {
306         match s {
307             CpuState::Kvm(s) => s,
308             /* Needed in case other hypervisors are enabled */
309             #[allow(unreachable_patterns)]
310             _ => panic!("CpuState is not valid"),
311         }
312     }
313 }
314 
315 #[cfg(target_arch = "x86_64")]
316 impl From<kvm_clock_data> for ClockData {
317     fn from(d: kvm_clock_data) -> Self {
318         ClockData::Kvm(d)
319     }
320 }
321 
322 #[cfg(target_arch = "x86_64")]
323 impl From<ClockData> for kvm_clock_data {
324     fn from(ms: ClockData) -> Self {
325         match ms {
326             ClockData::Kvm(s) => s,
327             /* Needed in case other hypervisors are enabled */
328             #[allow(unreachable_patterns)]
329             _ => panic!("CpuState is not valid"),
330         }
331     }
332 }
333 
334 impl From<kvm_irq_routing_entry> for IrqRoutingEntry {
335     fn from(s: kvm_irq_routing_entry) -> Self {
336         IrqRoutingEntry::Kvm(s)
337     }
338 }
339 
340 impl From<IrqRoutingEntry> for kvm_irq_routing_entry {
341     fn from(e: IrqRoutingEntry) -> Self {
342         match e {
343             IrqRoutingEntry::Kvm(e) => e,
344             /* Needed in case other hypervisors are enabled */
345             #[allow(unreachable_patterns)]
346             _ => panic!("IrqRoutingEntry is not valid"),
347         }
348     }
349 }
350 
351 struct KvmDirtyLogSlot {
352     slot: u32,
353     guest_phys_addr: u64,
354     memory_size: u64,
355     userspace_addr: u64,
356 }
357 
358 /// Wrapper over KVM VM ioctls.
359 pub struct KvmVm {
360     fd: Arc<VmFd>,
361     #[cfg(target_arch = "x86_64")]
362     msrs: Vec<MsrEntry>,
363     dirty_log_slots: Arc<RwLock<HashMap<u32, KvmDirtyLogSlot>>>,
364 }
365 
366 impl KvmVm {
367     ///
368     /// Creates an emulated device in the kernel.
369     ///
370     /// See the documentation for `KVM_CREATE_DEVICE`.
371     fn create_device(&self, device: &mut CreateDevice) -> vm::Result<vfio_ioctls::VfioDeviceFd> {
372         let device_fd = self
373             .fd
374             .create_device(device)
375             .map_err(|e| vm::HypervisorVmError::CreateDevice(e.into()))?;
376         Ok(VfioDeviceFd::new_from_kvm(device_fd))
377     }
378     /// Checks if a particular `Cap` is available.
379     pub fn check_extension(&self, c: Cap) -> bool {
380         self.fd.check_extension(c)
381     }
382 }
383 
384 /// Implementation of Vm trait for KVM
385 ///
386 /// # Examples
387 ///
388 /// ```
389 /// # use hypervisor::kvm::KvmHypervisor;
390 /// # use std::sync::Arc;
391 /// let kvm = KvmHypervisor::new().unwrap();
392 /// let hypervisor = Arc::new(kvm);
393 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed");
394 /// ```
395 impl vm::Vm for KvmVm {
396     #[cfg(target_arch = "x86_64")]
397     ///
398     /// Sets the address of the one-page region in the VM's address space.
399     ///
400     fn set_identity_map_address(&self, address: u64) -> vm::Result<()> {
401         self.fd
402             .set_identity_map_address(address)
403             .map_err(|e| vm::HypervisorVmError::SetIdentityMapAddress(e.into()))
404     }
405 
406     #[cfg(target_arch = "x86_64")]
407     ///
408     /// Sets the address of the three-page region in the VM's address space.
409     ///
410     fn set_tss_address(&self, offset: usize) -> vm::Result<()> {
411         self.fd
412             .set_tss_address(offset)
413             .map_err(|e| vm::HypervisorVmError::SetTssAddress(e.into()))
414     }
415 
416     ///
417     /// Creates an in-kernel interrupt controller.
418     ///
419     fn create_irq_chip(&self) -> vm::Result<()> {
420         self.fd
421             .create_irq_chip()
422             .map_err(|e| vm::HypervisorVmError::CreateIrq(e.into()))
423     }
424 
425     ///
426     /// Registers an event that will, when signaled, trigger the `gsi` IRQ.
427     ///
428     fn register_irqfd(&self, fd: &EventFd, gsi: u32) -> vm::Result<()> {
429         self.fd
430             .register_irqfd(fd, gsi)
431             .map_err(|e| vm::HypervisorVmError::RegisterIrqFd(e.into()))
432     }
433 
434     ///
435     /// Unregisters an event that will, when signaled, trigger the `gsi` IRQ.
436     ///
437     fn unregister_irqfd(&self, fd: &EventFd, gsi: u32) -> vm::Result<()> {
438         self.fd
439             .unregister_irqfd(fd, gsi)
440             .map_err(|e| vm::HypervisorVmError::UnregisterIrqFd(e.into()))
441     }
442 
443     ///
444     /// Creates a VcpuFd object from a vcpu RawFd.
445     ///
446     fn create_vcpu(
447         &self,
448         id: u8,
449         vm_ops: Option<Arc<dyn VmOps>>,
450     ) -> vm::Result<Arc<dyn cpu::Vcpu>> {
451         let vc = self
452             .fd
453             .create_vcpu(id as u64)
454             .map_err(|e| vm::HypervisorVmError::CreateVcpu(e.into()))?;
455         let vcpu = KvmVcpu {
456             fd: vc,
457             #[cfg(target_arch = "x86_64")]
458             msrs: self.msrs.clone(),
459             vm_ops,
460             #[cfg(target_arch = "x86_64")]
461             hyperv_synic: AtomicBool::new(false),
462         };
463         Ok(Arc::new(vcpu))
464     }
465 
466     #[cfg(target_arch = "aarch64")]
467     ///
468     /// Creates a virtual GIC device.
469     ///
470     fn create_vgic(&self, config: VgicConfig) -> vm::Result<Arc<Mutex<dyn Vgic>>> {
471         let gic_device = KvmGicV3Its::new(self, config)
472             .map_err(|e| vm::HypervisorVmError::CreateVgic(anyhow!("Vgic error {:?}", e)))?;
473         Ok(Arc::new(Mutex::new(gic_device)))
474     }
475 
476     ///
477     /// Registers an event to be signaled whenever a certain address is written to.
478     ///
479     fn register_ioevent(
480         &self,
481         fd: &EventFd,
482         addr: &IoEventAddress,
483         datamatch: Option<vm::DataMatch>,
484     ) -> vm::Result<()> {
485         let addr = &kvm_ioctls::IoEventAddress::from(*addr);
486         if let Some(dm) = datamatch {
487             match dm {
488                 vm::DataMatch::DataMatch32(kvm_dm32) => self
489                     .fd
490                     .register_ioevent(fd, addr, kvm_dm32)
491                     .map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into())),
492                 vm::DataMatch::DataMatch64(kvm_dm64) => self
493                     .fd
494                     .register_ioevent(fd, addr, kvm_dm64)
495                     .map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into())),
496             }
497         } else {
498             self.fd
499                 .register_ioevent(fd, addr, NoDatamatch)
500                 .map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into()))
501         }
502     }
503 
504     ///
505     /// Unregisters an event from a certain address it has been previously registered to.
506     ///
507     fn unregister_ioevent(&self, fd: &EventFd, addr: &IoEventAddress) -> vm::Result<()> {
508         let addr = &kvm_ioctls::IoEventAddress::from(*addr);
509         self.fd
510             .unregister_ioevent(fd, addr, NoDatamatch)
511             .map_err(|e| vm::HypervisorVmError::UnregisterIoEvent(e.into()))
512     }
513 
514     ///
515     /// Constructs a routing entry
516     ///
517     fn make_routing_entry(&self, gsi: u32, config: &InterruptSourceConfig) -> IrqRoutingEntry {
518         match &config {
519             InterruptSourceConfig::MsiIrq(cfg) => {
520                 let mut kvm_route = kvm_irq_routing_entry {
521                     gsi,
522                     type_: KVM_IRQ_ROUTING_MSI,
523                     ..Default::default()
524                 };
525 
526                 kvm_route.u.msi.address_lo = cfg.low_addr;
527                 kvm_route.u.msi.address_hi = cfg.high_addr;
528                 kvm_route.u.msi.data = cfg.data;
529 
530                 if self.check_extension(crate::kvm::Cap::MsiDevid) {
531                     // On AArch64, there is limitation on the range of the 'devid',
532                     // it can not be greater than 65536 (the max of u16).
533                     //
534                     // BDF can not be used directly, because 'segment' is in high
535                     // 16 bits. The layout of the u32 BDF is:
536                     // |---- 16 bits ----|-- 8 bits --|-- 5 bits --|-- 3 bits --|
537                     // |      segment    |     bus    |   device   |  function  |
538                     //
539                     // Now that we support 1 bus only in a segment, we can build a
540                     // 'devid' by replacing the 'bus' bits with the low 8 bits of
541                     // 'segment' data.
542                     // This way we can resolve the range checking problem and give
543                     // different `devid` to all the devices. Limitation is that at
544                     // most 256 segments can be supported.
545                     //
546                     let modified_devid = (cfg.devid & 0x00ff_0000) >> 8 | cfg.devid & 0xff;
547 
548                     kvm_route.flags = KVM_MSI_VALID_DEVID;
549                     kvm_route.u.msi.__bindgen_anon_1.devid = modified_devid;
550                 }
551                 kvm_route.into()
552             }
553             InterruptSourceConfig::LegacyIrq(cfg) => {
554                 let mut kvm_route = kvm_irq_routing_entry {
555                     gsi,
556                     type_: KVM_IRQ_ROUTING_IRQCHIP,
557                     ..Default::default()
558                 };
559                 kvm_route.u.irqchip.irqchip = cfg.irqchip;
560                 kvm_route.u.irqchip.pin = cfg.pin;
561 
562                 kvm_route.into()
563             }
564         }
565     }
566 
567     ///
568     /// Sets the GSI routing table entries, overwriting any previously set
569     /// entries, as per the `KVM_SET_GSI_ROUTING` ioctl.
570     ///
571     fn set_gsi_routing(&self, entries: &[IrqRoutingEntry]) -> vm::Result<()> {
572         let mut irq_routing =
573             vec_with_array_field::<kvm_irq_routing, kvm_irq_routing_entry>(entries.len());
574         irq_routing[0].nr = entries.len() as u32;
575         irq_routing[0].flags = 0;
576         let entries: Vec<kvm_irq_routing_entry> = entries
577             .iter()
578             .map(|entry| match entry {
579                 IrqRoutingEntry::Kvm(e) => *e,
580                 #[allow(unreachable_patterns)]
581                 _ => panic!("IrqRoutingEntry type is wrong"),
582             })
583             .collect();
584 
585         // SAFETY: irq_routing initialized with entries.len() and now it is being turned into
586         // entries_slice with entries.len() again. It is guaranteed to be large enough to hold
587         // everything from entries.
588         unsafe {
589             let entries_slice: &mut [kvm_irq_routing_entry] =
590                 irq_routing[0].entries.as_mut_slice(entries.len());
591             entries_slice.copy_from_slice(&entries);
592         }
593 
594         self.fd
595             .set_gsi_routing(&irq_routing[0])
596             .map_err(|e| vm::HypervisorVmError::SetGsiRouting(e.into()))
597     }
598 
599     ///
600     /// Creates a memory region structure that can be used with {create/remove}_user_memory_region
601     ///
602     fn make_user_memory_region(
603         &self,
604         slot: u32,
605         guest_phys_addr: u64,
606         memory_size: u64,
607         userspace_addr: u64,
608         readonly: bool,
609         log_dirty_pages: bool,
610     ) -> UserMemoryRegion {
611         kvm_userspace_memory_region {
612             slot,
613             guest_phys_addr,
614             memory_size,
615             userspace_addr,
616             flags: if readonly { KVM_MEM_READONLY } else { 0 }
617                 | if log_dirty_pages {
618                     KVM_MEM_LOG_DIRTY_PAGES
619                 } else {
620                     0
621                 },
622         }
623         .into()
624     }
625 
626     ///
627     /// Creates a guest physical memory region.
628     ///
629     fn create_user_memory_region(&self, user_memory_region: UserMemoryRegion) -> vm::Result<()> {
630         let mut region: kvm_userspace_memory_region = user_memory_region.into();
631 
632         if (region.flags & KVM_MEM_LOG_DIRTY_PAGES) != 0 {
633             if (region.flags & KVM_MEM_READONLY) != 0 {
634                 return Err(vm::HypervisorVmError::CreateUserMemory(anyhow!(
635                     "Error creating regions with both 'dirty-pages-log' and 'read-only'."
636                 )));
637             }
638 
639             // Keep track of the regions that need dirty pages log
640             self.dirty_log_slots.write().unwrap().insert(
641                 region.slot,
642                 KvmDirtyLogSlot {
643                     slot: region.slot,
644                     guest_phys_addr: region.guest_phys_addr,
645                     memory_size: region.memory_size,
646                     userspace_addr: region.userspace_addr,
647                 },
648             );
649 
650             // Always create guest physical memory region without `KVM_MEM_LOG_DIRTY_PAGES`.
651             // For regions that need this flag, dirty pages log will be turned on in `start_dirty_log`.
652             region.flags = 0;
653         }
654 
655         // SAFETY: Safe because guest regions are guaranteed not to overlap.
656         unsafe {
657             self.fd
658                 .set_user_memory_region(region)
659                 .map_err(|e| vm::HypervisorVmError::CreateUserMemory(e.into()))
660         }
661     }
662 
663     ///
664     /// Removes a guest physical memory region.
665     ///
666     fn remove_user_memory_region(&self, user_memory_region: UserMemoryRegion) -> vm::Result<()> {
667         let mut region: kvm_userspace_memory_region = user_memory_region.into();
668 
669         // Remove the corresponding entry from "self.dirty_log_slots" if needed
670         self.dirty_log_slots.write().unwrap().remove(&region.slot);
671 
672         // Setting the size to 0 means "remove"
673         region.memory_size = 0;
674         // SAFETY: Safe because guest regions are guaranteed not to overlap.
675         unsafe {
676             self.fd
677                 .set_user_memory_region(region)
678                 .map_err(|e| vm::HypervisorVmError::RemoveUserMemory(e.into()))
679         }
680     }
681 
682     ///
683     /// Returns the preferred CPU target type which can be emulated by KVM on underlying host.
684     ///
685     #[cfg(target_arch = "aarch64")]
686     fn get_preferred_target(&self, kvi: &mut VcpuInit) -> vm::Result<()> {
687         self.fd
688             .get_preferred_target(kvi)
689             .map_err(|e| vm::HypervisorVmError::GetPreferredTarget(e.into()))
690     }
691 
692     #[cfg(target_arch = "x86_64")]
693     fn enable_split_irq(&self) -> vm::Result<()> {
694         // Create split irqchip
695         // Only the local APIC is emulated in kernel, both PICs and IOAPIC
696         // are not.
697         let mut cap = kvm_enable_cap {
698             cap: KVM_CAP_SPLIT_IRQCHIP,
699             ..Default::default()
700         };
701         cap.args[0] = NUM_IOAPIC_PINS as u64;
702         self.fd
703             .enable_cap(&cap)
704             .map_err(|e| vm::HypervisorVmError::EnableSplitIrq(e.into()))?;
705         Ok(())
706     }
707 
708     #[cfg(target_arch = "x86_64")]
709     fn enable_sgx_attribute(&self, file: File) -> vm::Result<()> {
710         let mut cap = kvm_enable_cap {
711             cap: KVM_CAP_SGX_ATTRIBUTE,
712             ..Default::default()
713         };
714         cap.args[0] = file.as_raw_fd() as u64;
715         self.fd
716             .enable_cap(&cap)
717             .map_err(|e| vm::HypervisorVmError::EnableSgxAttribute(e.into()))?;
718         Ok(())
719     }
720 
721     /// Retrieve guest clock.
722     #[cfg(target_arch = "x86_64")]
723     fn get_clock(&self) -> vm::Result<ClockData> {
724         Ok(self
725             .fd
726             .get_clock()
727             .map_err(|e| vm::HypervisorVmError::GetClock(e.into()))?
728             .into())
729     }
730 
731     /// Set guest clock.
732     #[cfg(target_arch = "x86_64")]
733     fn set_clock(&self, data: &ClockData) -> vm::Result<()> {
734         let data = (*data).into();
735         self.fd
736             .set_clock(&data)
737             .map_err(|e| vm::HypervisorVmError::SetClock(e.into()))
738     }
739 
740     /// Create a device that is used for passthrough
741     fn create_passthrough_device(&self) -> vm::Result<VfioDeviceFd> {
742         let mut vfio_dev = kvm_create_device {
743             type_: kvm_device_type_KVM_DEV_TYPE_VFIO,
744             fd: 0,
745             flags: 0,
746         };
747 
748         self.create_device(&mut vfio_dev)
749             .map_err(|e| vm::HypervisorVmError::CreatePassthroughDevice(e.into()))
750     }
751 
752     ///
753     /// Start logging dirty pages
754     ///
755     fn start_dirty_log(&self) -> vm::Result<()> {
756         let dirty_log_slots = self.dirty_log_slots.read().unwrap();
757         for (_, s) in dirty_log_slots.iter() {
758             let region = kvm_userspace_memory_region {
759                 slot: s.slot,
760                 guest_phys_addr: s.guest_phys_addr,
761                 memory_size: s.memory_size,
762                 userspace_addr: s.userspace_addr,
763                 flags: KVM_MEM_LOG_DIRTY_PAGES,
764             };
765             // SAFETY: Safe because guest regions are guaranteed not to overlap.
766             unsafe {
767                 self.fd
768                     .set_user_memory_region(region)
769                     .map_err(|e| vm::HypervisorVmError::StartDirtyLog(e.into()))?;
770             }
771         }
772 
773         Ok(())
774     }
775 
776     ///
777     /// Stop logging dirty pages
778     ///
779     fn stop_dirty_log(&self) -> vm::Result<()> {
780         let dirty_log_slots = self.dirty_log_slots.read().unwrap();
781         for (_, s) in dirty_log_slots.iter() {
782             let region = kvm_userspace_memory_region {
783                 slot: s.slot,
784                 guest_phys_addr: s.guest_phys_addr,
785                 memory_size: s.memory_size,
786                 userspace_addr: s.userspace_addr,
787                 flags: 0,
788             };
789             // SAFETY: Safe because guest regions are guaranteed not to overlap.
790             unsafe {
791                 self.fd
792                     .set_user_memory_region(region)
793                     .map_err(|e| vm::HypervisorVmError::StartDirtyLog(e.into()))?;
794             }
795         }
796 
797         Ok(())
798     }
799 
800     ///
801     /// Get dirty pages bitmap (one bit per page)
802     ///
803     fn get_dirty_log(&self, slot: u32, _base_gpa: u64, memory_size: u64) -> vm::Result<Vec<u64>> {
804         self.fd
805             .get_dirty_log(slot, memory_size as usize)
806             .map_err(|e| vm::HypervisorVmError::GetDirtyLog(e.into()))
807     }
808 
809     ///
810     /// Initialize TDX for this VM
811     ///
812     #[cfg(feature = "tdx")]
813     fn tdx_init(&self, cpuid: &[CpuIdEntry], max_vcpus: u32) -> vm::Result<()> {
814         const TDX_ATTR_SEPT_VE_DISABLE: usize = 28;
815 
816         let mut cpuid: Vec<kvm_bindings::kvm_cpuid_entry2> =
817             cpuid.iter().map(|e| (*e).into()).collect();
818         cpuid.resize(256, kvm_bindings::kvm_cpuid_entry2::default());
819 
820         #[repr(C)]
821         struct TdxInitVm {
822             attributes: u64,
823             max_vcpus: u32,
824             padding: u32,
825             mrconfigid: [u64; 6],
826             mrowner: [u64; 6],
827             mrownerconfig: [u64; 6],
828             cpuid_nent: u32,
829             cpuid_padding: u32,
830             cpuid_entries: [kvm_bindings::kvm_cpuid_entry2; 256],
831         }
832         let data = TdxInitVm {
833             attributes: 1 << TDX_ATTR_SEPT_VE_DISABLE,
834             max_vcpus,
835             padding: 0,
836             mrconfigid: [0; 6],
837             mrowner: [0; 6],
838             mrownerconfig: [0; 6],
839             cpuid_nent: cpuid.len() as u32,
840             cpuid_padding: 0,
841             cpuid_entries: cpuid.as_slice().try_into().unwrap(),
842         };
843 
844         tdx_command(
845             &self.fd.as_raw_fd(),
846             TdxCommand::InitVm,
847             0,
848             &data as *const _ as u64,
849         )
850         .map_err(vm::HypervisorVmError::InitializeTdx)
851     }
852 
853     ///
854     /// Finalize the TDX setup for this VM
855     ///
856     #[cfg(feature = "tdx")]
857     fn tdx_finalize(&self) -> vm::Result<()> {
858         tdx_command(&self.fd.as_raw_fd(), TdxCommand::Finalize, 0, 0)
859             .map_err(vm::HypervisorVmError::FinalizeTdx)
860     }
861 
862     ///
863     /// Initialize memory regions for the TDX VM
864     ///
865     #[cfg(feature = "tdx")]
866     fn tdx_init_memory_region(
867         &self,
868         host_address: u64,
869         guest_address: u64,
870         size: u64,
871         measure: bool,
872     ) -> vm::Result<()> {
873         #[repr(C)]
874         struct TdxInitMemRegion {
875             host_address: u64,
876             guest_address: u64,
877             pages: u64,
878         }
879         let data = TdxInitMemRegion {
880             host_address,
881             guest_address,
882             pages: size / 4096,
883         };
884 
885         tdx_command(
886             &self.fd.as_raw_fd(),
887             TdxCommand::InitMemRegion,
888             u32::from(measure),
889             &data as *const _ as u64,
890         )
891         .map_err(vm::HypervisorVmError::InitMemRegionTdx)
892     }
893 
894     /// Downcast to the underlying KvmVm type
895     fn as_any(&self) -> &dyn Any {
896         self
897     }
898 }
899 
900 #[cfg(feature = "tdx")]
901 fn tdx_command(
902     fd: &RawFd,
903     command: TdxCommand,
904     flags: u32,
905     data: u64,
906 ) -> std::result::Result<(), std::io::Error> {
907     #[repr(C)]
908     struct TdxIoctlCmd {
909         command: TdxCommand,
910         flags: u32,
911         data: u64,
912         error: u64,
913         unused: u64,
914     }
915     let cmd = TdxIoctlCmd {
916         command,
917         flags,
918         data,
919         error: 0,
920         unused: 0,
921     };
922     // SAFETY: FFI call. All input parameters are valid.
923     let ret = unsafe {
924         ioctl_with_val(
925             fd,
926             KVM_MEMORY_ENCRYPT_OP(),
927             &cmd as *const TdxIoctlCmd as std::os::raw::c_ulong,
928         )
929     };
930 
931     if ret < 0 {
932         return Err(std::io::Error::last_os_error());
933     }
934     Ok(())
935 }
936 
937 /// Wrapper over KVM system ioctls.
938 pub struct KvmHypervisor {
939     kvm: Kvm,
940 }
941 
942 impl KvmHypervisor {
943     #[cfg(target_arch = "x86_64")]
944     ///
945     /// Retrieve the list of MSRs supported by the hypervisor.
946     ///
947     fn get_msr_list(&self) -> hypervisor::Result<MsrList> {
948         self.kvm
949             .get_msr_index_list()
950             .map_err(|e| hypervisor::HypervisorError::GetMsrList(e.into()))
951     }
952 }
953 
954 /// Enum for KVM related error
955 #[derive(Debug, Error)]
956 pub enum KvmError {
957     #[error("Capability missing: {0:?}")]
958     CapabilityMissing(Cap),
959 }
960 
961 pub type KvmResult<T> = result::Result<T, KvmError>;
962 
963 impl KvmHypervisor {
964     /// Create a hypervisor based on Kvm
965     #[allow(clippy::new_ret_no_self)]
966     pub fn new() -> hypervisor::Result<Arc<dyn hypervisor::Hypervisor>> {
967         let kvm_obj = Kvm::new().map_err(|e| hypervisor::HypervisorError::VmCreate(e.into()))?;
968         let api_version = kvm_obj.get_api_version();
969 
970         if api_version != kvm_bindings::KVM_API_VERSION as i32 {
971             return Err(hypervisor::HypervisorError::IncompatibleApiVersion);
972         }
973 
974         Ok(Arc::new(KvmHypervisor { kvm: kvm_obj }))
975     }
976 
977     /// Check if the hypervisor is available
978     pub fn is_available() -> hypervisor::Result<bool> {
979         match std::fs::metadata("/dev/kvm") {
980             Ok(_) => Ok(true),
981             Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(false),
982             Err(err) => Err(hypervisor::HypervisorError::HypervisorAvailableCheck(
983                 err.into(),
984             )),
985         }
986     }
987 }
988 
989 /// Implementation of Hypervisor trait for KVM
990 ///
991 /// # Examples
992 ///
993 /// ```
994 /// # use hypervisor::kvm::KvmHypervisor;
995 /// # use std::sync::Arc;
996 /// let kvm = KvmHypervisor::new().unwrap();
997 /// let hypervisor = Arc::new(kvm);
998 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed");
999 /// ```
1000 impl hypervisor::Hypervisor for KvmHypervisor {
1001     ///
1002     /// Returns the type of the hypervisor
1003     ///
1004     fn hypervisor_type(&self) -> HypervisorType {
1005         HypervisorType::Kvm
1006     }
1007 
1008     /// Create a KVM vm object of a specific VM type and return the object as Vm trait object
1009     ///
1010     /// # Examples
1011     ///
1012     /// ```
1013     /// # use hypervisor::kvm::KvmHypervisor;
1014     /// use hypervisor::kvm::KvmVm;
1015     /// let hypervisor = KvmHypervisor::new().unwrap();
1016     /// let vm = hypervisor.create_vm_with_type(0).unwrap();
1017     /// ```
1018     fn create_vm_with_type(&self, vm_type: u64) -> hypervisor::Result<Arc<dyn vm::Vm>> {
1019         let fd: VmFd;
1020         loop {
1021             match self.kvm.create_vm_with_type(vm_type) {
1022                 Ok(res) => fd = res,
1023                 Err(e) => {
1024                     if e.errno() == libc::EINTR {
1025                         // If the error returned is EINTR, which means the
1026                         // ioctl has been interrupted, we have to retry as
1027                         // this can't be considered as a regular error.
1028                         continue;
1029                     } else {
1030                         return Err(hypervisor::HypervisorError::VmCreate(e.into()));
1031                     }
1032                 }
1033             }
1034             break;
1035         }
1036 
1037         let vm_fd = Arc::new(fd);
1038 
1039         #[cfg(target_arch = "x86_64")]
1040         {
1041             let msr_list = self.get_msr_list()?;
1042             let num_msrs = msr_list.as_fam_struct_ref().nmsrs as usize;
1043             let mut msrs: Vec<MsrEntry> = vec![
1044                 MsrEntry {
1045                     ..Default::default()
1046                 };
1047                 num_msrs
1048             ];
1049             let indices = msr_list.as_slice();
1050             for (pos, index) in indices.iter().enumerate() {
1051                 msrs[pos].index = *index;
1052             }
1053 
1054             Ok(Arc::new(KvmVm {
1055                 fd: vm_fd,
1056                 msrs,
1057                 dirty_log_slots: Arc::new(RwLock::new(HashMap::new())),
1058             }))
1059         }
1060 
1061         #[cfg(target_arch = "aarch64")]
1062         {
1063             Ok(Arc::new(KvmVm {
1064                 fd: vm_fd,
1065                 dirty_log_slots: Arc::new(RwLock::new(HashMap::new())),
1066             }))
1067         }
1068     }
1069 
1070     /// Create a KVM vm object and return the object as Vm trait object
1071     ///
1072     /// # Examples
1073     ///
1074     /// ```
1075     /// # use hypervisor::kvm::KvmHypervisor;
1076     /// use hypervisor::kvm::KvmVm;
1077     /// let hypervisor = KvmHypervisor::new().unwrap();
1078     /// let vm = hypervisor.create_vm().unwrap();
1079     /// ```
1080     fn create_vm(&self) -> hypervisor::Result<Arc<dyn vm::Vm>> {
1081         #[allow(unused_mut)]
1082         let mut vm_type: u64 = 0; // Create with default platform type
1083 
1084         // When KVM supports Cap::ArmVmIPASize, it is better to get the IPA
1085         // size from the host and use that when creating the VM, which may
1086         // avoid unnecessary VM creation failures.
1087         #[cfg(target_arch = "aarch64")]
1088         if self.kvm.check_extension(Cap::ArmVmIPASize) {
1089             vm_type = self.kvm.get_host_ipa_limit().try_into().unwrap();
1090         }
1091 
1092         self.create_vm_with_type(vm_type)
1093     }
1094 
1095     fn check_required_extensions(&self) -> hypervisor::Result<()> {
1096         check_required_kvm_extensions(&self.kvm)
1097             .map_err(|e| hypervisor::HypervisorError::CheckExtensions(e.into()))
1098     }
1099 
1100     #[cfg(target_arch = "x86_64")]
1101     ///
1102     /// X86 specific call to get the system supported CPUID values.
1103     ///
1104     fn get_supported_cpuid(&self) -> hypervisor::Result<Vec<CpuIdEntry>> {
1105         let kvm_cpuid = self
1106             .kvm
1107             .get_supported_cpuid(kvm_bindings::KVM_MAX_CPUID_ENTRIES)
1108             .map_err(|e| hypervisor::HypervisorError::GetCpuId(e.into()))?;
1109 
1110         let v = kvm_cpuid.as_slice().iter().map(|e| (*e).into()).collect();
1111 
1112         Ok(v)
1113     }
1114 
1115     #[cfg(target_arch = "aarch64")]
1116     ///
1117     /// Retrieve AArch64 host maximum IPA size supported by KVM.
1118     ///
1119     fn get_host_ipa_limit(&self) -> i32 {
1120         self.kvm.get_host_ipa_limit()
1121     }
1122 
1123     ///
1124     /// Retrieve TDX capabilities
1125     ///
1126     #[cfg(feature = "tdx")]
1127     fn tdx_capabilities(&self) -> hypervisor::Result<TdxCapabilities> {
1128         let data = TdxCapabilities {
1129             nr_cpuid_configs: TDX_MAX_NR_CPUID_CONFIGS as u32,
1130             ..Default::default()
1131         };
1132 
1133         tdx_command(
1134             &self.kvm.as_raw_fd(),
1135             TdxCommand::Capabilities,
1136             0,
1137             &data as *const _ as u64,
1138         )
1139         .map_err(|e| hypervisor::HypervisorError::TdxCapabilities(e.into()))?;
1140 
1141         Ok(data)
1142     }
1143 
1144     ///
1145     /// Get the number of supported hardware breakpoints
1146     ///
1147     fn get_guest_debug_hw_bps(&self) -> usize {
1148         #[cfg(target_arch = "x86_64")]
1149         {
1150             4
1151         }
1152         #[cfg(target_arch = "aarch64")]
1153         {
1154             self.kvm.get_guest_debug_hw_bps() as usize
1155         }
1156     }
1157 
1158     /// Get maximum number of vCPUs
1159     fn get_max_vcpus(&self) -> u32 {
1160         self.kvm.get_max_vcpus().min(u32::MAX as usize) as u32
1161     }
1162 }
1163 
1164 /// Vcpu struct for KVM
1165 pub struct KvmVcpu {
1166     fd: VcpuFd,
1167     #[cfg(target_arch = "x86_64")]
1168     msrs: Vec<MsrEntry>,
1169     vm_ops: Option<Arc<dyn vm::VmOps>>,
1170     #[cfg(target_arch = "x86_64")]
1171     hyperv_synic: AtomicBool,
1172 }
1173 
1174 /// Implementation of Vcpu trait for KVM
1175 ///
1176 /// # Examples
1177 ///
1178 /// ```
1179 /// # use hypervisor::kvm::KvmHypervisor;
1180 /// # use std::sync::Arc;
1181 /// let kvm = KvmHypervisor::new().unwrap();
1182 /// let hypervisor = Arc::new(kvm);
1183 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed");
1184 /// let vcpu = vm.create_vcpu(0, None).unwrap();
1185 /// ```
1186 impl cpu::Vcpu for KvmVcpu {
1187     #[cfg(target_arch = "x86_64")]
1188     ///
1189     /// Returns the vCPU general purpose registers.
1190     ///
1191     fn get_regs(&self) -> cpu::Result<StandardRegisters> {
1192         Ok(self
1193             .fd
1194             .get_regs()
1195             .map_err(|e| cpu::HypervisorCpuError::GetStandardRegs(e.into()))?
1196             .into())
1197     }
1198 
1199     ///
1200     /// Returns the vCPU general purpose registers.
1201     /// The `KVM_GET_REGS` ioctl is not available on AArch64, `KVM_GET_ONE_REG`
1202     /// is used to get registers one by one.
1203     ///
1204     #[cfg(target_arch = "aarch64")]
1205     fn get_regs(&self) -> cpu::Result<StandardRegisters> {
1206         let mut state: StandardRegisters = kvm_regs::default();
1207         let mut off = offset_of!(user_pt_regs, regs);
1208         // There are 31 user_pt_regs:
1209         // https://elixir.free-electrons.com/linux/v4.14.174/source/arch/arm64/include/uapi/asm/ptrace.h#L72
1210         // These actually are the general-purpose registers of the Armv8-a
1211         // architecture (i.e x0-x30 if used as a 64bit register or w0-30 when used as a 32bit register).
1212         for i in 0..31 {
1213             let mut bytes = [0_u8; 8];
1214             self.fd
1215                 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), &mut bytes)
1216                 .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?;
1217             state.regs.regs[i] = u64::from_le_bytes(bytes);
1218             off += std::mem::size_of::<u64>();
1219         }
1220 
1221         // We are now entering the "Other register" section of the ARMv8-a architecture.
1222         // First one, stack pointer.
1223         let off = offset_of!(user_pt_regs, sp);
1224         let mut bytes = [0_u8; 8];
1225         self.fd
1226             .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), &mut bytes)
1227             .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?;
1228         state.regs.sp = u64::from_le_bytes(bytes);
1229 
1230         // Second one, the program counter.
1231         let off = offset_of!(user_pt_regs, pc);
1232         let mut bytes = [0_u8; 8];
1233         self.fd
1234             .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), &mut bytes)
1235             .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?;
1236         state.regs.pc = u64::from_le_bytes(bytes);
1237 
1238         // Next is the processor state.
1239         let off = offset_of!(user_pt_regs, pstate);
1240         let mut bytes = [0_u8; 8];
1241         self.fd
1242             .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), &mut bytes)
1243             .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?;
1244         state.regs.pstate = u64::from_le_bytes(bytes);
1245 
1246         // The stack pointer associated with EL1
1247         let off = offset_of!(kvm_regs, sp_el1);
1248         let mut bytes = [0_u8; 8];
1249         self.fd
1250             .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), &mut bytes)
1251             .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?;
1252         state.sp_el1 = u64::from_le_bytes(bytes);
1253 
1254         // Exception Link Register for EL1, when taking an exception to EL1, this register
1255         // holds the address to which to return afterwards.
1256         let off = offset_of!(kvm_regs, elr_el1);
1257         let mut bytes = [0_u8; 8];
1258         self.fd
1259             .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), &mut bytes)
1260             .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?;
1261         state.elr_el1 = u64::from_le_bytes(bytes);
1262 
1263         // Saved Program Status Registers, there are 5 of them used in the kernel.
1264         let mut off = offset_of!(kvm_regs, spsr);
1265         for i in 0..KVM_NR_SPSR as usize {
1266             let mut bytes = [0_u8; 8];
1267             self.fd
1268                 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), &mut bytes)
1269                 .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?;
1270             state.spsr[i] = u64::from_le_bytes(bytes);
1271             off += std::mem::size_of::<u64>();
1272         }
1273 
1274         // Now moving on to floating point registers which are stored in the user_fpsimd_state in the kernel:
1275         // https://elixir.free-electrons.com/linux/v4.9.62/source/arch/arm64/include/uapi/asm/kvm.h#L53
1276         let mut off = offset_of!(kvm_regs, fp_regs) + offset_of!(user_fpsimd_state, vregs);
1277         for i in 0..32 {
1278             let mut bytes = [0_u8; 16];
1279             self.fd
1280                 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U128, off), &mut bytes)
1281                 .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?;
1282             state.fp_regs.vregs[i] = u128::from_le_bytes(bytes);
1283             off += mem::size_of::<u128>();
1284         }
1285 
1286         // Floating-point Status Register
1287         let off = offset_of!(kvm_regs, fp_regs) + offset_of!(user_fpsimd_state, fpsr);
1288         let mut bytes = [0_u8; 4];
1289         self.fd
1290             .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U32, off), &mut bytes)
1291             .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?;
1292         state.fp_regs.fpsr = u32::from_le_bytes(bytes);
1293 
1294         // Floating-point Control Register
1295         let off = offset_of!(kvm_regs, fp_regs) + offset_of!(user_fpsimd_state, fpcr);
1296         let mut bytes = [0_u8; 4];
1297         self.fd
1298             .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U32, off), &mut bytes)
1299             .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?;
1300         state.fp_regs.fpcr = u32::from_le_bytes(bytes);
1301         Ok(state)
1302     }
1303 
1304     #[cfg(target_arch = "x86_64")]
1305     ///
1306     /// Sets the vCPU general purpose registers using the `KVM_SET_REGS` ioctl.
1307     ///
1308     fn set_regs(&self, regs: &StandardRegisters) -> cpu::Result<()> {
1309         let regs = (*regs).into();
1310         self.fd
1311             .set_regs(&regs)
1312             .map_err(|e| cpu::HypervisorCpuError::SetStandardRegs(e.into()))
1313     }
1314 
1315     ///
1316     /// Sets the vCPU general purpose registers.
1317     /// The `KVM_SET_REGS` ioctl is not available on AArch64, `KVM_SET_ONE_REG`
1318     /// is used to set registers one by one.
1319     ///
1320     #[cfg(target_arch = "aarch64")]
1321     fn set_regs(&self, state: &StandardRegisters) -> cpu::Result<()> {
1322         // The function follows the exact identical order from `state`. Look there
1323         // for some additional info on registers.
1324         let mut off = offset_of!(user_pt_regs, regs);
1325         for i in 0..31 {
1326             self.fd
1327                 .set_one_reg(
1328                     arm64_core_reg_id!(KVM_REG_SIZE_U64, off),
1329                     &state.regs.regs[i].to_le_bytes(),
1330                 )
1331                 .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?;
1332             off += std::mem::size_of::<u64>();
1333         }
1334 
1335         let off = offset_of!(user_pt_regs, sp);
1336         self.fd
1337             .set_one_reg(
1338                 arm64_core_reg_id!(KVM_REG_SIZE_U64, off),
1339                 &state.regs.sp.to_le_bytes(),
1340             )
1341             .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?;
1342 
1343         let off = offset_of!(user_pt_regs, pc);
1344         self.fd
1345             .set_one_reg(
1346                 arm64_core_reg_id!(KVM_REG_SIZE_U64, off),
1347                 &state.regs.pc.to_le_bytes(),
1348             )
1349             .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?;
1350 
1351         let off = offset_of!(user_pt_regs, pstate);
1352         self.fd
1353             .set_one_reg(
1354                 arm64_core_reg_id!(KVM_REG_SIZE_U64, off),
1355                 &state.regs.pstate.to_le_bytes(),
1356             )
1357             .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?;
1358 
1359         let off = offset_of!(kvm_regs, sp_el1);
1360         self.fd
1361             .set_one_reg(
1362                 arm64_core_reg_id!(KVM_REG_SIZE_U64, off),
1363                 &state.sp_el1.to_le_bytes(),
1364             )
1365             .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?;
1366 
1367         let off = offset_of!(kvm_regs, elr_el1);
1368         self.fd
1369             .set_one_reg(
1370                 arm64_core_reg_id!(KVM_REG_SIZE_U64, off),
1371                 &state.elr_el1.to_le_bytes(),
1372             )
1373             .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?;
1374 
1375         let mut off = offset_of!(kvm_regs, spsr);
1376         for i in 0..KVM_NR_SPSR as usize {
1377             self.fd
1378                 .set_one_reg(
1379                     arm64_core_reg_id!(KVM_REG_SIZE_U64, off),
1380                     &state.spsr[i].to_le_bytes(),
1381                 )
1382                 .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?;
1383             off += std::mem::size_of::<u64>();
1384         }
1385 
1386         let mut off = offset_of!(kvm_regs, fp_regs) + offset_of!(user_fpsimd_state, vregs);
1387         for i in 0..32 {
1388             self.fd
1389                 .set_one_reg(
1390                     arm64_core_reg_id!(KVM_REG_SIZE_U128, off),
1391                     &state.fp_regs.vregs[i].to_le_bytes(),
1392                 )
1393                 .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?;
1394             off += mem::size_of::<u128>();
1395         }
1396 
1397         let off = offset_of!(kvm_regs, fp_regs) + offset_of!(user_fpsimd_state, fpsr);
1398         self.fd
1399             .set_one_reg(
1400                 arm64_core_reg_id!(KVM_REG_SIZE_U32, off),
1401                 &state.fp_regs.fpsr.to_le_bytes(),
1402             )
1403             .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?;
1404 
1405         let off = offset_of!(kvm_regs, fp_regs) + offset_of!(user_fpsimd_state, fpcr);
1406         self.fd
1407             .set_one_reg(
1408                 arm64_core_reg_id!(KVM_REG_SIZE_U32, off),
1409                 &state.fp_regs.fpcr.to_le_bytes(),
1410             )
1411             .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?;
1412         Ok(())
1413     }
1414 
1415     #[cfg(target_arch = "x86_64")]
1416     ///
1417     /// Returns the vCPU special registers.
1418     ///
1419     fn get_sregs(&self) -> cpu::Result<SpecialRegisters> {
1420         Ok(self
1421             .fd
1422             .get_sregs()
1423             .map_err(|e| cpu::HypervisorCpuError::GetSpecialRegs(e.into()))?
1424             .into())
1425     }
1426 
1427     #[cfg(target_arch = "x86_64")]
1428     ///
1429     /// Sets the vCPU special registers using the `KVM_SET_SREGS` ioctl.
1430     ///
1431     fn set_sregs(&self, sregs: &SpecialRegisters) -> cpu::Result<()> {
1432         let sregs = (*sregs).into();
1433         self.fd
1434             .set_sregs(&sregs)
1435             .map_err(|e| cpu::HypervisorCpuError::SetSpecialRegs(e.into()))
1436     }
1437 
1438     #[cfg(target_arch = "x86_64")]
1439     ///
1440     /// Returns the floating point state (FPU) from the vCPU.
1441     ///
1442     fn get_fpu(&self) -> cpu::Result<FpuState> {
1443         Ok(self
1444             .fd
1445             .get_fpu()
1446             .map_err(|e| cpu::HypervisorCpuError::GetFloatingPointRegs(e.into()))?
1447             .into())
1448     }
1449 
1450     #[cfg(target_arch = "x86_64")]
1451     ///
1452     /// Set the floating point state (FPU) of a vCPU using the `KVM_SET_FPU` ioct.
1453     ///
1454     fn set_fpu(&self, fpu: &FpuState) -> cpu::Result<()> {
1455         let fpu: kvm_bindings::kvm_fpu = (*fpu).clone().into();
1456         self.fd
1457             .set_fpu(&fpu)
1458             .map_err(|e| cpu::HypervisorCpuError::SetFloatingPointRegs(e.into()))
1459     }
1460 
1461     #[cfg(target_arch = "x86_64")]
1462     ///
1463     /// X86 specific call to setup the CPUID registers.
1464     ///
1465     fn set_cpuid2(&self, cpuid: &[CpuIdEntry]) -> cpu::Result<()> {
1466         let cpuid: Vec<kvm_bindings::kvm_cpuid_entry2> =
1467             cpuid.iter().map(|e| (*e).into()).collect();
1468         let kvm_cpuid = <CpuId>::from_entries(&cpuid)
1469             .map_err(|_| cpu::HypervisorCpuError::SetCpuid(anyhow!("failed to create CpuId")))?;
1470 
1471         self.fd
1472             .set_cpuid2(&kvm_cpuid)
1473             .map_err(|e| cpu::HypervisorCpuError::SetCpuid(e.into()))
1474     }
1475 
1476     #[cfg(target_arch = "x86_64")]
1477     ///
1478     /// X86 specific call to enable HyperV SynIC
1479     ///
1480     fn enable_hyperv_synic(&self) -> cpu::Result<()> {
1481         // Update the information about Hyper-V SynIC being enabled and
1482         // emulated as it will influence later which MSRs should be saved.
1483         self.hyperv_synic.store(true, Ordering::Release);
1484 
1485         let cap = kvm_enable_cap {
1486             cap: KVM_CAP_HYPERV_SYNIC,
1487             ..Default::default()
1488         };
1489         self.fd
1490             .enable_cap(&cap)
1491             .map_err(|e| cpu::HypervisorCpuError::EnableHyperVSyncIc(e.into()))
1492     }
1493 
1494     ///
1495     /// X86 specific call to retrieve the CPUID registers.
1496     ///
1497     #[cfg(target_arch = "x86_64")]
1498     fn get_cpuid2(&self, num_entries: usize) -> cpu::Result<Vec<CpuIdEntry>> {
1499         let kvm_cpuid = self
1500             .fd
1501             .get_cpuid2(num_entries)
1502             .map_err(|e| cpu::HypervisorCpuError::GetCpuid(e.into()))?;
1503 
1504         let v = kvm_cpuid.as_slice().iter().map(|e| (*e).into()).collect();
1505 
1506         Ok(v)
1507     }
1508 
1509     #[cfg(target_arch = "x86_64")]
1510     ///
1511     /// Returns the state of the LAPIC (Local Advanced Programmable Interrupt Controller).
1512     ///
1513     fn get_lapic(&self) -> cpu::Result<LapicState> {
1514         Ok(self
1515             .fd
1516             .get_lapic()
1517             .map_err(|e| cpu::HypervisorCpuError::GetlapicState(e.into()))?
1518             .into())
1519     }
1520 
1521     #[cfg(target_arch = "x86_64")]
1522     ///
1523     /// Sets the state of the LAPIC (Local Advanced Programmable Interrupt Controller).
1524     ///
1525     fn set_lapic(&self, klapic: &LapicState) -> cpu::Result<()> {
1526         let klapic: kvm_bindings::kvm_lapic_state = (*klapic).clone().into();
1527         self.fd
1528             .set_lapic(&klapic)
1529             .map_err(|e| cpu::HypervisorCpuError::SetLapicState(e.into()))
1530     }
1531 
1532     #[cfg(target_arch = "x86_64")]
1533     ///
1534     /// Returns the model-specific registers (MSR) for this vCPU.
1535     ///
1536     fn get_msrs(&self, msrs: &mut Vec<MsrEntry>) -> cpu::Result<usize> {
1537         let kvm_msrs: Vec<kvm_msr_entry> = msrs.iter().map(|e| (*e).into()).collect();
1538         let mut kvm_msrs = MsrEntries::from_entries(&kvm_msrs).unwrap();
1539         let succ = self
1540             .fd
1541             .get_msrs(&mut kvm_msrs)
1542             .map_err(|e| cpu::HypervisorCpuError::GetMsrEntries(e.into()))?;
1543 
1544         msrs[..succ].copy_from_slice(
1545             &kvm_msrs.as_slice()[..succ]
1546                 .iter()
1547                 .map(|e| (*e).into())
1548                 .collect::<Vec<MsrEntry>>(),
1549         );
1550 
1551         Ok(succ)
1552     }
1553 
1554     #[cfg(target_arch = "x86_64")]
1555     ///
1556     /// Setup the model-specific registers (MSR) for this vCPU.
1557     /// Returns the number of MSR entries actually written.
1558     ///
1559     fn set_msrs(&self, msrs: &[MsrEntry]) -> cpu::Result<usize> {
1560         let kvm_msrs: Vec<kvm_msr_entry> = msrs.iter().map(|e| (*e).into()).collect();
1561         let kvm_msrs = MsrEntries::from_entries(&kvm_msrs).unwrap();
1562         self.fd
1563             .set_msrs(&kvm_msrs)
1564             .map_err(|e| cpu::HypervisorCpuError::SetMsrEntries(e.into()))
1565     }
1566 
1567     ///
1568     /// Returns the vcpu's current "multiprocessing state".
1569     ///
1570     fn get_mp_state(&self) -> cpu::Result<MpState> {
1571         Ok(self
1572             .fd
1573             .get_mp_state()
1574             .map_err(|e| cpu::HypervisorCpuError::GetMpState(e.into()))?
1575             .into())
1576     }
1577 
1578     ///
1579     /// Sets the vcpu's current "multiprocessing state".
1580     ///
1581     fn set_mp_state(&self, mp_state: MpState) -> cpu::Result<()> {
1582         self.fd
1583             .set_mp_state(mp_state.into())
1584             .map_err(|e| cpu::HypervisorCpuError::SetMpState(e.into()))
1585     }
1586 
1587     #[cfg(target_arch = "x86_64")]
1588     ///
1589     /// Translates guest virtual address to guest physical address using the `KVM_TRANSLATE` ioctl.
1590     ///
1591     fn translate_gva(&self, gva: u64, _flags: u64) -> cpu::Result<(u64, u32)> {
1592         let tr = self
1593             .fd
1594             .translate_gva(gva)
1595             .map_err(|e| cpu::HypervisorCpuError::TranslateVirtualAddress(e.into()))?;
1596         // tr.valid is set if the GVA is mapped to valid GPA.
1597         match tr.valid {
1598             0 => Err(cpu::HypervisorCpuError::TranslateVirtualAddress(anyhow!(
1599                 "Invalid GVA: {:#x}",
1600                 gva
1601             ))),
1602             _ => Ok((tr.physical_address, 0)),
1603         }
1604     }
1605 
1606     ///
1607     /// Triggers the running of the current virtual CPU returning an exit reason.
1608     ///
1609     fn run(&self) -> std::result::Result<cpu::VmExit, cpu::HypervisorCpuError> {
1610         match self.fd.run() {
1611             Ok(run) => match run {
1612                 #[cfg(target_arch = "x86_64")]
1613                 VcpuExit::IoIn(addr, data) => {
1614                     if let Some(vm_ops) = &self.vm_ops {
1615                         return vm_ops
1616                             .pio_read(addr.into(), data)
1617                             .map(|_| cpu::VmExit::Ignore)
1618                             .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()));
1619                     }
1620 
1621                     Ok(cpu::VmExit::IoIn(addr, data))
1622                 }
1623                 #[cfg(target_arch = "x86_64")]
1624                 VcpuExit::IoOut(addr, data) => {
1625                     if let Some(vm_ops) = &self.vm_ops {
1626                         return vm_ops
1627                             .pio_write(addr.into(), data)
1628                             .map(|_| cpu::VmExit::Ignore)
1629                             .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()));
1630                     }
1631 
1632                     Ok(cpu::VmExit::IoOut(addr, data))
1633                 }
1634                 #[cfg(target_arch = "x86_64")]
1635                 VcpuExit::IoapicEoi(vector) => Ok(cpu::VmExit::IoapicEoi(vector)),
1636                 #[cfg(target_arch = "x86_64")]
1637                 VcpuExit::Shutdown | VcpuExit::Hlt => Ok(cpu::VmExit::Reset),
1638 
1639                 #[cfg(target_arch = "aarch64")]
1640                 VcpuExit::SystemEvent(event_type, flags) => {
1641                     use kvm_bindings::{KVM_SYSTEM_EVENT_RESET, KVM_SYSTEM_EVENT_SHUTDOWN};
1642                     // On Aarch64, when the VM is shutdown, run() returns
1643                     // VcpuExit::SystemEvent with reason KVM_SYSTEM_EVENT_SHUTDOWN
1644                     if event_type == KVM_SYSTEM_EVENT_RESET {
1645                         Ok(cpu::VmExit::Reset)
1646                     } else if event_type == KVM_SYSTEM_EVENT_SHUTDOWN {
1647                         Ok(cpu::VmExit::Shutdown)
1648                     } else {
1649                         Err(cpu::HypervisorCpuError::RunVcpu(anyhow!(
1650                             "Unexpected system event with type 0x{:x}, flags 0x{:x?}",
1651                             event_type,
1652                             flags
1653                         )))
1654                     }
1655                 }
1656 
1657                 VcpuExit::MmioRead(addr, data) => {
1658                     if let Some(vm_ops) = &self.vm_ops {
1659                         return vm_ops
1660                             .mmio_read(addr, data)
1661                             .map(|_| cpu::VmExit::Ignore)
1662                             .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()));
1663                     }
1664 
1665                     Ok(cpu::VmExit::MmioRead(addr, data))
1666                 }
1667                 VcpuExit::MmioWrite(addr, data) => {
1668                     if let Some(vm_ops) = &self.vm_ops {
1669                         return vm_ops
1670                             .mmio_write(addr, data)
1671                             .map(|_| cpu::VmExit::Ignore)
1672                             .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()));
1673                     }
1674 
1675                     Ok(cpu::VmExit::MmioWrite(addr, data))
1676                 }
1677                 VcpuExit::Hyperv => Ok(cpu::VmExit::Hyperv),
1678                 #[cfg(feature = "tdx")]
1679                 VcpuExit::Unsupported(KVM_EXIT_TDX) => Ok(cpu::VmExit::Tdx),
1680                 VcpuExit::Debug(_) => Ok(cpu::VmExit::Debug),
1681 
1682                 r => Err(cpu::HypervisorCpuError::RunVcpu(anyhow!(
1683                     "Unexpected exit reason on vcpu run: {:?}",
1684                     r
1685                 ))),
1686             },
1687 
1688             Err(ref e) => match e.errno() {
1689                 libc::EAGAIN | libc::EINTR => Ok(cpu::VmExit::Ignore),
1690                 _ => Err(cpu::HypervisorCpuError::RunVcpu(anyhow!(
1691                     "VCPU error {:?}",
1692                     e
1693                 ))),
1694             },
1695         }
1696     }
1697 
1698     #[cfg(target_arch = "x86_64")]
1699     ///
1700     /// Let the guest know that it has been paused, which prevents from
1701     /// potential soft lockups when being resumed.
1702     ///
1703     fn notify_guest_clock_paused(&self) -> cpu::Result<()> {
1704         if let Err(e) = self.fd.kvmclock_ctrl() {
1705             // Linux kernel returns -EINVAL if the PV clock isn't yet initialised
1706             // which could be because we're still in firmware or the guest doesn't
1707             // use KVM clock.
1708             if e.errno() != libc::EINVAL {
1709                 return Err(cpu::HypervisorCpuError::NotifyGuestClockPaused(e.into()));
1710             }
1711         }
1712 
1713         Ok(())
1714     }
1715 
1716     ///
1717     /// Sets debug registers to set hardware breakpoints and/or enable single step.
1718     ///
1719     fn set_guest_debug(
1720         &self,
1721         addrs: &[vm_memory::GuestAddress],
1722         singlestep: bool,
1723     ) -> cpu::Result<()> {
1724         let mut dbg = kvm_guest_debug {
1725             #[cfg(target_arch = "x86_64")]
1726             control: KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP,
1727             #[cfg(target_arch = "aarch64")]
1728             control: KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW,
1729             ..Default::default()
1730         };
1731         if singlestep {
1732             dbg.control |= KVM_GUESTDBG_SINGLESTEP;
1733         }
1734 
1735         // Set the debug registers.
1736         // Here we assume that the number of addresses do not exceed what
1737         // `Hypervisor::get_guest_debug_hw_bps()` specifies.
1738         #[cfg(target_arch = "x86_64")]
1739         {
1740             // Set bits 9 and 10.
1741             // bit 9: GE (global exact breakpoint enable) flag.
1742             // bit 10: always 1.
1743             dbg.arch.debugreg[7] = 0x0600;
1744 
1745             for (i, addr) in addrs.iter().enumerate() {
1746                 dbg.arch.debugreg[i] = addr.0;
1747                 // Set global breakpoint enable flag
1748                 dbg.arch.debugreg[7] |= 2 << (i * 2);
1749             }
1750         }
1751         #[cfg(target_arch = "aarch64")]
1752         {
1753             for (i, addr) in addrs.iter().enumerate() {
1754                 // DBGBCR_EL1 (Debug Breakpoint Control Registers, D13.3.2):
1755                 // bit 0: 1 (Enabled)
1756                 // bit 1~2: 0b11 (PMC = EL1/EL0)
1757                 // bit 5~8: 0b1111 (BAS = AArch64)
1758                 // others: 0
1759                 dbg.arch.dbg_bcr[i] = 0b1u64 | 0b110u64 | 0b1_1110_0000u64;
1760                 // DBGBVR_EL1 (Debug Breakpoint Value Registers, D13.3.3):
1761                 // bit 2~52: VA[2:52]
1762                 dbg.arch.dbg_bvr[i] = (!0u64 >> 11) & addr.0;
1763             }
1764         }
1765         self.fd
1766             .set_guest_debug(&dbg)
1767             .map_err(|e| cpu::HypervisorCpuError::SetDebugRegs(e.into()))
1768     }
1769 
1770     #[cfg(target_arch = "aarch64")]
1771     fn vcpu_init(&self, kvi: &VcpuInit) -> cpu::Result<()> {
1772         self.fd
1773             .vcpu_init(kvi)
1774             .map_err(|e| cpu::HypervisorCpuError::VcpuInit(e.into()))
1775     }
1776 
1777     ///
1778     /// Gets a list of the guest registers that are supported for the
1779     /// KVM_GET_ONE_REG/KVM_SET_ONE_REG calls.
1780     ///
1781     #[cfg(target_arch = "aarch64")]
1782     fn get_reg_list(&self, reg_list: &mut RegList) -> cpu::Result<()> {
1783         self.fd
1784             .get_reg_list(reg_list)
1785             .map_err(|e| cpu::HypervisorCpuError::GetRegList(e.into()))
1786     }
1787 
1788     ///
1789     /// Gets the value of a system register
1790     ///
1791     #[cfg(target_arch = "aarch64")]
1792     fn get_sys_reg(&self, sys_reg: u32) -> cpu::Result<u64> {
1793         //
1794         // Arm Architecture Reference Manual defines the encoding of
1795         // AArch64 system registers, see
1796         // https://developer.arm.com/documentation/ddi0487 (chapter D12).
1797         // While KVM defines another ID for each AArch64 system register,
1798         // which is used in calling `KVM_G/SET_ONE_REG` to access a system
1799         // register of a guest.
1800         // A mapping exists between the Arm standard encoding and the KVM ID.
1801         // This function takes the standard u32 ID as input parameter, converts
1802         // it to the corresponding KVM ID, and call `KVM_GET_ONE_REG` API to
1803         // get the value of the system parameter.
1804         //
1805         let id: u64 = KVM_REG_ARM64
1806             | KVM_REG_SIZE_U64
1807             | KVM_REG_ARM64_SYSREG as u64
1808             | ((((sys_reg) >> 5)
1809                 & (KVM_REG_ARM64_SYSREG_OP0_MASK
1810                     | KVM_REG_ARM64_SYSREG_OP1_MASK
1811                     | KVM_REG_ARM64_SYSREG_CRN_MASK
1812                     | KVM_REG_ARM64_SYSREG_CRM_MASK
1813                     | KVM_REG_ARM64_SYSREG_OP2_MASK)) as u64);
1814         let mut bytes = [0_u8; 8];
1815         self.fd
1816             .get_one_reg(id, &mut bytes)
1817             .map_err(|e| cpu::HypervisorCpuError::GetSysRegister(e.into()))?;
1818         Ok(u64::from_le_bytes(bytes))
1819     }
1820 
1821     ///
1822     /// Configure core registers for a given CPU.
1823     ///
1824     #[cfg(target_arch = "aarch64")]
1825     fn setup_regs(&self, cpu_id: u8, boot_ip: u64, fdt_start: u64) -> cpu::Result<()> {
1826         #[allow(non_upper_case_globals)]
1827         // PSR (Processor State Register) bits.
1828         // Taken from arch/arm64/include/uapi/asm/ptrace.h.
1829         const PSR_MODE_EL1h: u64 = 0x0000_0005;
1830         const PSR_F_BIT: u64 = 0x0000_0040;
1831         const PSR_I_BIT: u64 = 0x0000_0080;
1832         const PSR_A_BIT: u64 = 0x0000_0100;
1833         const PSR_D_BIT: u64 = 0x0000_0200;
1834         // Taken from arch/arm64/kvm/inject_fault.c.
1835         const PSTATE_FAULT_BITS_64: u64 =
1836             PSR_MODE_EL1h | PSR_A_BIT | PSR_F_BIT | PSR_I_BIT | PSR_D_BIT;
1837 
1838         let kreg_off = offset_of!(kvm_regs, regs);
1839 
1840         // Get the register index of the PSTATE (Processor State) register.
1841         let pstate = offset_of!(user_pt_regs, pstate) + kreg_off;
1842         self.fd
1843             .set_one_reg(
1844                 arm64_core_reg_id!(KVM_REG_SIZE_U64, pstate),
1845                 &PSTATE_FAULT_BITS_64.to_le_bytes(),
1846             )
1847             .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?;
1848 
1849         // Other vCPUs are powered off initially awaiting PSCI wakeup.
1850         if cpu_id == 0 {
1851             // Setting the PC (Processor Counter) to the current program address (kernel address).
1852             let pc = offset_of!(user_pt_regs, pc) + kreg_off;
1853             self.fd
1854                 .set_one_reg(
1855                     arm64_core_reg_id!(KVM_REG_SIZE_U64, pc),
1856                     &boot_ip.to_le_bytes(),
1857                 )
1858                 .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?;
1859 
1860             // Last mandatory thing to set -> the address pointing to the FDT (also called DTB).
1861             // "The device tree blob (dtb) must be placed on an 8-byte boundary and must
1862             // not exceed 2 megabytes in size." -> https://www.kernel.org/doc/Documentation/arm64/booting.txt.
1863             // We are choosing to place it the end of DRAM. See `get_fdt_addr`.
1864             let regs0 = offset_of!(user_pt_regs, regs) + kreg_off;
1865             self.fd
1866                 .set_one_reg(
1867                     arm64_core_reg_id!(KVM_REG_SIZE_U64, regs0),
1868                     &fdt_start.to_le_bytes(),
1869                 )
1870                 .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?;
1871         }
1872         Ok(())
1873     }
1874 
1875     #[cfg(target_arch = "x86_64")]
1876     ///
1877     /// Get the current CPU state
1878     ///
1879     /// Ordering requirements:
1880     ///
1881     /// KVM_GET_MP_STATE calls kvm_apic_accept_events(), which might modify
1882     /// vCPU/LAPIC state. As such, it must be done before most everything
1883     /// else, otherwise we cannot restore everything and expect it to work.
1884     ///
1885     /// KVM_GET_VCPU_EVENTS/KVM_SET_VCPU_EVENTS is unsafe if other vCPUs are
1886     /// still running.
1887     ///
1888     /// KVM_GET_LAPIC may change state of LAPIC before returning it.
1889     ///
1890     /// GET_VCPU_EVENTS should probably be last to save. The code looks as
1891     /// it might as well be affected by internal state modifications of the
1892     /// GET ioctls.
1893     ///
1894     /// SREGS saves/restores a pending interrupt, similar to what
1895     /// VCPU_EVENTS also does.
1896     ///
1897     /// GET_MSRS requires a pre-populated data structure to do something
1898     /// meaningful. For SET_MSRS it will then contain good data.
1899     ///
1900     /// # Example
1901     ///
1902     /// ```rust
1903     /// # use hypervisor::kvm::KvmHypervisor;
1904     /// # use std::sync::Arc;
1905     /// let kvm = KvmHypervisor::new().unwrap();
1906     /// let hv = Arc::new(kvm);
1907     /// let vm = hv.create_vm().expect("new VM fd creation failed");
1908     /// vm.enable_split_irq().unwrap();
1909     /// let vcpu = vm.create_vcpu(0, None).unwrap();
1910     /// let state = vcpu.state().unwrap();
1911     /// ```
1912     fn state(&self) -> cpu::Result<CpuState> {
1913         let cpuid = self.get_cpuid2(kvm_bindings::KVM_MAX_CPUID_ENTRIES)?;
1914         let mp_state = self.get_mp_state()?.into();
1915         let regs = self.get_regs()?;
1916         let sregs = self.get_sregs()?;
1917         let xsave = self.get_xsave()?;
1918         let xcrs = self.get_xcrs()?;
1919         let lapic_state = self.get_lapic()?;
1920         let fpu = self.get_fpu()?;
1921 
1922         // Try to get all MSRs based on the list previously retrieved from KVM.
1923         // If the number of MSRs obtained from GET_MSRS is different from the
1924         // expected amount, we fallback onto a slower method by getting MSRs
1925         // by chunks. This is the only way to make sure we try to get as many
1926         // MSRs as possible, even if some MSRs are not supported.
1927         let mut msr_entries = self.msrs.clone();
1928 
1929         // Save extra MSRs if the Hyper-V synthetic interrupt controller is
1930         // emulated.
1931         if self.hyperv_synic.load(Ordering::Acquire) {
1932             let hyperv_synic_msrs = vec![
1933                 0x40000020, 0x40000021, 0x40000080, 0x40000081, 0x40000082, 0x40000083, 0x40000084,
1934                 0x40000090, 0x40000091, 0x40000092, 0x40000093, 0x40000094, 0x40000095, 0x40000096,
1935                 0x40000097, 0x40000098, 0x40000099, 0x4000009a, 0x4000009b, 0x4000009c, 0x4000009d,
1936                 0x4000009e, 0x4000009f, 0x400000b0, 0x400000b1, 0x400000b2, 0x400000b3, 0x400000b4,
1937                 0x400000b5, 0x400000b6, 0x400000b7,
1938             ];
1939             for index in hyperv_synic_msrs {
1940                 let msr = kvm_msr_entry {
1941                     index,
1942                     ..Default::default()
1943                 };
1944                 msr_entries.push(msr.into());
1945             }
1946         }
1947 
1948         let expected_num_msrs = msr_entries.len();
1949         let num_msrs = self.get_msrs(&mut msr_entries)?;
1950         let msrs = if num_msrs != expected_num_msrs {
1951             let mut faulty_msr_index = num_msrs;
1952             let mut msr_entries_tmp = msr_entries[..faulty_msr_index].to_vec();
1953 
1954             loop {
1955                 warn!(
1956                     "Detected faulty MSR 0x{:x} while getting MSRs",
1957                     msr_entries[faulty_msr_index].index
1958                 );
1959 
1960                 // Skip the first bad MSR
1961                 let start_pos = faulty_msr_index + 1;
1962 
1963                 let mut sub_msr_entries = msr_entries[start_pos..].to_vec();
1964                 let num_msrs = self.get_msrs(&mut sub_msr_entries)?;
1965 
1966                 msr_entries_tmp.extend(&sub_msr_entries[..num_msrs]);
1967 
1968                 if num_msrs == sub_msr_entries.len() {
1969                     break;
1970                 }
1971 
1972                 faulty_msr_index = start_pos + num_msrs;
1973             }
1974 
1975             msr_entries_tmp
1976         } else {
1977             msr_entries
1978         };
1979 
1980         let vcpu_events = self.get_vcpu_events()?;
1981         let tsc_khz = self.tsc_khz()?;
1982 
1983         Ok(VcpuKvmState {
1984             cpuid,
1985             msrs,
1986             vcpu_events,
1987             regs: regs.into(),
1988             sregs: sregs.into(),
1989             fpu,
1990             lapic_state,
1991             xsave,
1992             xcrs,
1993             mp_state,
1994             tsc_khz,
1995         }
1996         .into())
1997     }
1998 
1999     ///
2000     /// Get the current AArch64 CPU state
2001     ///
2002     #[cfg(target_arch = "aarch64")]
2003     fn state(&self) -> cpu::Result<CpuState> {
2004         let mut state = VcpuKvmState {
2005             mp_state: self.get_mp_state()?.into(),
2006             ..Default::default()
2007         };
2008         // Get core registers
2009         state.core_regs = self.get_regs()?;
2010 
2011         // Get systerm register
2012         // Call KVM_GET_REG_LIST to get all registers available to the guest.
2013         // For ArmV8 there are around 500 registers.
2014         let mut sys_regs: Vec<Register> = Vec::new();
2015         let mut reg_list = RegList::new(500).unwrap();
2016         self.fd
2017             .get_reg_list(&mut reg_list)
2018             .map_err(|e| cpu::HypervisorCpuError::GetRegList(e.into()))?;
2019 
2020         // At this point reg_list should contain: core registers and system
2021         // registers.
2022         // The register list contains the number of registers and their ids. We
2023         // will be needing to call KVM_GET_ONE_REG on each id in order to save
2024         // all of them. We carve out from the list  the core registers which are
2025         // represented in the kernel by kvm_regs structure and for which we can
2026         // calculate the id based on the offset in the structure.
2027         reg_list.retain(|regid| is_system_register(*regid));
2028 
2029         // Now, for the rest of the registers left in the previously fetched
2030         // register list, we are simply calling KVM_GET_ONE_REG.
2031         let indices = reg_list.as_slice();
2032         for index in indices.iter() {
2033             let mut bytes = [0_u8; 8];
2034             self.fd
2035                 .get_one_reg(*index, &mut bytes)
2036                 .map_err(|e| cpu::HypervisorCpuError::GetSysRegister(e.into()))?;
2037             sys_regs.push(kvm_bindings::kvm_one_reg {
2038                 id: *index,
2039                 addr: u64::from_le_bytes(bytes),
2040             });
2041         }
2042 
2043         state.sys_regs = sys_regs;
2044 
2045         Ok(state.into())
2046     }
2047 
2048     #[cfg(target_arch = "x86_64")]
2049     ///
2050     /// Restore the previously saved CPU state
2051     ///
2052     /// Ordering requirements:
2053     ///
2054     /// KVM_GET_VCPU_EVENTS/KVM_SET_VCPU_EVENTS is unsafe if other vCPUs are
2055     /// still running.
2056     ///
2057     /// Some SET ioctls (like set_mp_state) depend on kvm_vcpu_is_bsp(), so
2058     /// if we ever change the BSP, we have to do that before restoring anything.
2059     /// The same seems to be true for CPUID stuff.
2060     ///
2061     /// SREGS saves/restores a pending interrupt, similar to what
2062     /// VCPU_EVENTS also does.
2063     ///
2064     /// SET_REGS clears pending exceptions unconditionally, thus, it must be
2065     /// done before SET_VCPU_EVENTS, which restores it.
2066     ///
2067     /// SET_LAPIC must come after SET_SREGS, because the latter restores
2068     /// the apic base msr.
2069     ///
2070     /// SET_LAPIC must come before SET_MSRS, because the TSC deadline MSR
2071     /// only restores successfully, when the LAPIC is correctly configured.
2072     ///
2073     /// Arguments: CpuState
2074     /// # Example
2075     ///
2076     /// ```rust
2077     /// # use hypervisor::kvm::KvmHypervisor;
2078     /// # use std::sync::Arc;
2079     /// let kvm = KvmHypervisor::new().unwrap();
2080     /// let hv = Arc::new(kvm);
2081     /// let vm = hv.create_vm().expect("new VM fd creation failed");
2082     /// vm.enable_split_irq().unwrap();
2083     /// let vcpu = vm.create_vcpu(0, None).unwrap();
2084     /// let state = vcpu.state().unwrap();
2085     /// vcpu.set_state(&state).unwrap();
2086     /// ```
2087     fn set_state(&self, state: &CpuState) -> cpu::Result<()> {
2088         let state: VcpuKvmState = state.clone().into();
2089         self.set_cpuid2(&state.cpuid)?;
2090         self.set_mp_state(state.mp_state.into())?;
2091         self.set_regs(&state.regs.into())?;
2092         self.set_sregs(&state.sregs.into())?;
2093         self.set_xsave(&state.xsave)?;
2094         self.set_xcrs(&state.xcrs)?;
2095         self.set_lapic(&state.lapic_state)?;
2096         self.set_fpu(&state.fpu)?;
2097 
2098         if let Some(freq) = state.tsc_khz {
2099             self.set_tsc_khz(freq)?;
2100         }
2101 
2102         // Try to set all MSRs previously stored.
2103         // If the number of MSRs set from SET_MSRS is different from the
2104         // expected amount, we fallback onto a slower method by setting MSRs
2105         // by chunks. This is the only way to make sure we try to set as many
2106         // MSRs as possible, even if some MSRs are not supported.
2107         let expected_num_msrs = state.msrs.len();
2108         let num_msrs = self.set_msrs(&state.msrs)?;
2109         if num_msrs != expected_num_msrs {
2110             let mut faulty_msr_index = num_msrs;
2111 
2112             loop {
2113                 warn!(
2114                     "Detected faulty MSR 0x{:x} while setting MSRs",
2115                     state.msrs[faulty_msr_index].index
2116                 );
2117 
2118                 // Skip the first bad MSR
2119                 let start_pos = faulty_msr_index + 1;
2120 
2121                 let sub_msr_entries = state.msrs[start_pos..].to_vec();
2122 
2123                 let num_msrs = self.set_msrs(&sub_msr_entries)?;
2124 
2125                 if num_msrs == sub_msr_entries.len() {
2126                     break;
2127                 }
2128 
2129                 faulty_msr_index = start_pos + num_msrs;
2130             }
2131         }
2132 
2133         self.set_vcpu_events(&state.vcpu_events)?;
2134 
2135         Ok(())
2136     }
2137 
2138     ///
2139     /// Restore the previously saved AArch64 CPU state
2140     ///
2141     #[cfg(target_arch = "aarch64")]
2142     fn set_state(&self, state: &CpuState) -> cpu::Result<()> {
2143         let state: VcpuKvmState = state.clone().into();
2144         // Set core registers
2145         self.set_regs(&state.core_regs)?;
2146         // Set system registers
2147         for reg in &state.sys_regs {
2148             self.fd
2149                 .set_one_reg(reg.id, &reg.addr.to_le_bytes())
2150                 .map_err(|e| cpu::HypervisorCpuError::SetSysRegister(e.into()))?;
2151         }
2152 
2153         self.set_mp_state(state.mp_state.into())?;
2154 
2155         Ok(())
2156     }
2157 
2158     ///
2159     /// Initialize TDX for this CPU
2160     ///
2161     #[cfg(feature = "tdx")]
2162     fn tdx_init(&self, hob_address: u64) -> cpu::Result<()> {
2163         tdx_command(&self.fd.as_raw_fd(), TdxCommand::InitVcpu, 0, hob_address)
2164             .map_err(cpu::HypervisorCpuError::InitializeTdx)
2165     }
2166 
2167     ///
2168     /// Set the "immediate_exit" state
2169     ///
2170     fn set_immediate_exit(&self, exit: bool) {
2171         self.fd.set_kvm_immediate_exit(exit.into());
2172     }
2173 
2174     ///
2175     /// Returns the details about TDX exit reason
2176     ///
2177     #[cfg(feature = "tdx")]
2178     fn get_tdx_exit_details(&mut self) -> cpu::Result<TdxExitDetails> {
2179         let kvm_run = self.fd.get_kvm_run();
2180         // SAFETY: accessing a union field in a valid structure
2181         let tdx_vmcall = unsafe {
2182             &mut (*((&mut kvm_run.__bindgen_anon_1) as *mut kvm_run__bindgen_ty_1
2183                 as *mut KvmTdxExit))
2184                 .u
2185                 .vmcall
2186         };
2187 
2188         tdx_vmcall.status_code = TDG_VP_VMCALL_INVALID_OPERAND;
2189 
2190         if tdx_vmcall.type_ != 0 {
2191             return Err(cpu::HypervisorCpuError::UnknownTdxVmCall);
2192         }
2193 
2194         match tdx_vmcall.subfunction {
2195             TDG_VP_VMCALL_GET_QUOTE => Ok(TdxExitDetails::GetQuote),
2196             TDG_VP_VMCALL_SETUP_EVENT_NOTIFY_INTERRUPT => {
2197                 Ok(TdxExitDetails::SetupEventNotifyInterrupt)
2198             }
2199             _ => Err(cpu::HypervisorCpuError::UnknownTdxVmCall),
2200         }
2201     }
2202 
2203     ///
2204     /// Set the status code for TDX exit
2205     ///
2206     #[cfg(feature = "tdx")]
2207     fn set_tdx_status(&mut self, status: TdxExitStatus) {
2208         let kvm_run = self.fd.get_kvm_run();
2209         // SAFETY: accessing a union field in a valid structure
2210         let tdx_vmcall = unsafe {
2211             &mut (*((&mut kvm_run.__bindgen_anon_1) as *mut kvm_run__bindgen_ty_1
2212                 as *mut KvmTdxExit))
2213                 .u
2214                 .vmcall
2215         };
2216 
2217         tdx_vmcall.status_code = match status {
2218             TdxExitStatus::Success => TDG_VP_VMCALL_SUCCESS,
2219             TdxExitStatus::InvalidOperand => TDG_VP_VMCALL_INVALID_OPERAND,
2220         };
2221     }
2222 
2223     #[cfg(target_arch = "x86_64")]
2224     ///
2225     /// Return the list of initial MSR entries for a VCPU
2226     ///
2227     fn boot_msr_entries(&self) -> Vec<MsrEntry> {
2228         use crate::arch::x86::{msr_index, MTRR_ENABLE, MTRR_MEM_TYPE_WB};
2229 
2230         [
2231             msr!(msr_index::MSR_IA32_SYSENTER_CS),
2232             msr!(msr_index::MSR_IA32_SYSENTER_ESP),
2233             msr!(msr_index::MSR_IA32_SYSENTER_EIP),
2234             msr!(msr_index::MSR_STAR),
2235             msr!(msr_index::MSR_CSTAR),
2236             msr!(msr_index::MSR_LSTAR),
2237             msr!(msr_index::MSR_KERNEL_GS_BASE),
2238             msr!(msr_index::MSR_SYSCALL_MASK),
2239             msr!(msr_index::MSR_IA32_TSC),
2240             msr_data!(
2241                 msr_index::MSR_IA32_MISC_ENABLE,
2242                 msr_index::MSR_IA32_MISC_ENABLE_FAST_STRING as u64
2243             ),
2244             msr_data!(msr_index::MSR_MTRRdefType, MTRR_ENABLE | MTRR_MEM_TYPE_WB),
2245         ]
2246         .to_vec()
2247     }
2248 
2249     #[cfg(target_arch = "aarch64")]
2250     fn has_pmu_support(&self) -> bool {
2251         let cpu_attr = kvm_bindings::kvm_device_attr {
2252             group: kvm_bindings::KVM_ARM_VCPU_PMU_V3_CTRL,
2253             attr: u64::from(kvm_bindings::KVM_ARM_VCPU_PMU_V3_INIT),
2254             addr: 0x0,
2255             flags: 0,
2256         };
2257         self.fd.has_device_attr(&cpu_attr).is_ok()
2258     }
2259 
2260     #[cfg(target_arch = "aarch64")]
2261     fn init_pmu(&self, irq: u32) -> cpu::Result<()> {
2262         let cpu_attr = kvm_bindings::kvm_device_attr {
2263             group: kvm_bindings::KVM_ARM_VCPU_PMU_V3_CTRL,
2264             attr: u64::from(kvm_bindings::KVM_ARM_VCPU_PMU_V3_INIT),
2265             addr: 0x0,
2266             flags: 0,
2267         };
2268         let cpu_attr_irq = kvm_bindings::kvm_device_attr {
2269             group: kvm_bindings::KVM_ARM_VCPU_PMU_V3_CTRL,
2270             attr: u64::from(kvm_bindings::KVM_ARM_VCPU_PMU_V3_IRQ),
2271             addr: &irq as *const u32 as u64,
2272             flags: 0,
2273         };
2274         self.fd
2275             .set_device_attr(&cpu_attr_irq)
2276             .map_err(|_| cpu::HypervisorCpuError::InitializePmu)?;
2277         self.fd
2278             .set_device_attr(&cpu_attr)
2279             .map_err(|_| cpu::HypervisorCpuError::InitializePmu)
2280     }
2281 
2282     #[cfg(target_arch = "x86_64")]
2283     ///
2284     /// Get the frequency of the TSC if available
2285     ///
2286     fn tsc_khz(&self) -> cpu::Result<Option<u32>> {
2287         match self.fd.get_tsc_khz() {
2288             Err(e) => {
2289                 if e.errno() == libc::EIO {
2290                     Ok(None)
2291                 } else {
2292                     Err(cpu::HypervisorCpuError::GetTscKhz(e.into()))
2293                 }
2294             }
2295             Ok(v) => Ok(Some(v)),
2296         }
2297     }
2298 
2299     #[cfg(target_arch = "x86_64")]
2300     ///
2301     /// Set the frequency of the TSC if available
2302     ///
2303     fn set_tsc_khz(&self, freq: u32) -> cpu::Result<()> {
2304         match self.fd.set_tsc_khz(freq) {
2305             Err(e) => {
2306                 if e.errno() == libc::EIO {
2307                     Ok(())
2308                 } else {
2309                     Err(cpu::HypervisorCpuError::SetTscKhz(e.into()))
2310                 }
2311             }
2312             Ok(_) => Ok(()),
2313         }
2314     }
2315 }
2316 
2317 impl KvmVcpu {
2318     #[cfg(target_arch = "x86_64")]
2319     ///
2320     /// X86 specific call that returns the vcpu's current "xsave struct".
2321     ///
2322     fn get_xsave(&self) -> cpu::Result<XsaveState> {
2323         Ok(self
2324             .fd
2325             .get_xsave()
2326             .map_err(|e| cpu::HypervisorCpuError::GetXsaveState(e.into()))?
2327             .into())
2328     }
2329 
2330     #[cfg(target_arch = "x86_64")]
2331     ///
2332     /// X86 specific call that sets the vcpu's current "xsave struct".
2333     ///
2334     fn set_xsave(&self, xsave: &XsaveState) -> cpu::Result<()> {
2335         let xsave: kvm_bindings::kvm_xsave = (*xsave).clone().into();
2336         self.fd
2337             .set_xsave(&xsave)
2338             .map_err(|e| cpu::HypervisorCpuError::SetXsaveState(e.into()))
2339     }
2340 
2341     #[cfg(target_arch = "x86_64")]
2342     ///
2343     /// X86 specific call that returns the vcpu's current "xcrs".
2344     ///
2345     fn get_xcrs(&self) -> cpu::Result<ExtendedControlRegisters> {
2346         self.fd
2347             .get_xcrs()
2348             .map_err(|e| cpu::HypervisorCpuError::GetXcsr(e.into()))
2349     }
2350 
2351     #[cfg(target_arch = "x86_64")]
2352     ///
2353     /// X86 specific call that sets the vcpu's current "xcrs".
2354     ///
2355     fn set_xcrs(&self, xcrs: &ExtendedControlRegisters) -> cpu::Result<()> {
2356         self.fd
2357             .set_xcrs(xcrs)
2358             .map_err(|e| cpu::HypervisorCpuError::SetXcsr(e.into()))
2359     }
2360 
2361     #[cfg(target_arch = "x86_64")]
2362     ///
2363     /// Returns currently pending exceptions, interrupts, and NMIs as well as related
2364     /// states of the vcpu.
2365     ///
2366     fn get_vcpu_events(&self) -> cpu::Result<VcpuEvents> {
2367         self.fd
2368             .get_vcpu_events()
2369             .map_err(|e| cpu::HypervisorCpuError::GetVcpuEvents(e.into()))
2370     }
2371 
2372     #[cfg(target_arch = "x86_64")]
2373     ///
2374     /// Sets pending exceptions, interrupts, and NMIs as well as related states
2375     /// of the vcpu.
2376     ///
2377     fn set_vcpu_events(&self, events: &VcpuEvents) -> cpu::Result<()> {
2378         self.fd
2379             .set_vcpu_events(events)
2380             .map_err(|e| cpu::HypervisorCpuError::SetVcpuEvents(e.into()))
2381     }
2382 }
2383