xref: /cloud-hypervisor/hypervisor/src/kvm/mod.rs (revision 1968805ba291ae08e07abf0ef8c0ade4cf11ab68)
1 // Copyright © 2024 Institute of Software, CAS. All rights reserved.
2 //
3 // Copyright © 2019 Intel Corporation
4 //
5 // SPDX-License-Identifier: Apache-2.0 OR BSD-3-Clause
6 //
7 // Copyright © 2020, Microsoft Corporation
8 //
9 // Copyright 2018-2019 CrowdStrike, Inc.
10 //
11 //
12 
13 use std::any::Any;
14 use std::collections::HashMap;
15 #[cfg(target_arch = "x86_64")]
16 use std::fs::File;
17 #[cfg(target_arch = "x86_64")]
18 use std::os::unix::io::AsRawFd;
19 #[cfg(feature = "tdx")]
20 use std::os::unix::io::RawFd;
21 use std::result;
22 #[cfg(target_arch = "x86_64")]
23 use std::sync::atomic::{AtomicBool, Ordering};
24 use std::sync::{Arc, Mutex, RwLock};
25 
26 use kvm_ioctls::{NoDatamatch, VcpuFd, VmFd};
27 use vmm_sys_util::eventfd::EventFd;
28 
29 #[cfg(target_arch = "aarch64")]
30 use crate::aarch64::gic::KvmGicV3Its;
31 #[cfg(target_arch = "aarch64")]
32 pub use crate::aarch64::{check_required_kvm_extensions, is_system_register, VcpuKvmState};
33 #[cfg(target_arch = "aarch64")]
34 use crate::arch::aarch64::gic::{Vgic, VgicConfig};
35 #[cfg(target_arch = "riscv64")]
36 use crate::arch::riscv64::aia::{Vaia, VaiaConfig};
37 #[cfg(target_arch = "riscv64")]
38 use crate::riscv64::aia::KvmAiaImsics;
39 #[cfg(target_arch = "riscv64")]
40 pub use crate::riscv64::{
41     aia::AiaImsicsState as AiaState, check_required_kvm_extensions, is_non_core_register,
42     VcpuKvmState,
43 };
44 use crate::vm::{self, InterruptSourceConfig, VmOps};
45 #[cfg(target_arch = "aarch64")]
46 use crate::{arm64_core_reg_id, offset_of};
47 use crate::{cpu, hypervisor, vec_with_array_field, HypervisorType};
48 #[cfg(target_arch = "riscv64")]
49 use crate::{offset_of, riscv64_reg_id};
50 // x86_64 dependencies
51 #[cfg(target_arch = "x86_64")]
52 pub mod x86_64;
53 #[cfg(target_arch = "x86_64")]
54 use kvm_bindings::{
55     kvm_enable_cap, kvm_msr_entry, MsrList, KVM_CAP_HYPERV_SYNIC, KVM_CAP_SPLIT_IRQCHIP,
56     KVM_GUESTDBG_USE_HW_BP,
57 };
58 #[cfg(target_arch = "x86_64")]
59 use x86_64::check_required_kvm_extensions;
60 #[cfg(target_arch = "x86_64")]
61 pub use x86_64::{CpuId, ExtendedControlRegisters, MsrEntries, VcpuKvmState};
62 
63 #[cfg(target_arch = "x86_64")]
64 use crate::arch::x86::{
65     CpuIdEntry, FpuState, LapicState, MsrEntry, SpecialRegisters, XsaveState, NUM_IOAPIC_PINS,
66 };
67 #[cfg(target_arch = "x86_64")]
68 use crate::ClockData;
69 use crate::{
70     CpuState, IoEventAddress, IrqRoutingEntry, MpState, StandardRegisters, UserMemoryRegion,
71     USER_MEMORY_REGION_LOG_DIRTY, USER_MEMORY_REGION_READ, USER_MEMORY_REGION_WRITE,
72 };
73 // aarch64 dependencies
74 #[cfg(target_arch = "aarch64")]
75 pub mod aarch64;
76 // riscv64 dependencies
77 #[cfg(target_arch = "riscv64")]
78 pub mod riscv64;
79 #[cfg(target_arch = "aarch64")]
80 use std::mem;
81 
82 ///
83 /// Export generically-named wrappers of kvm-bindings for Unix-based platforms
84 ///
85 #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))]
86 pub use kvm_bindings::kvm_vcpu_events as VcpuEvents;
87 pub use kvm_bindings::{
88     kvm_clock_data, kvm_create_device, kvm_create_device as CreateDevice,
89     kvm_device_attr as DeviceAttr, kvm_device_type_KVM_DEV_TYPE_VFIO, kvm_guest_debug,
90     kvm_irq_routing, kvm_irq_routing_entry, kvm_mp_state, kvm_run, kvm_userspace_memory_region,
91     KVM_GUESTDBG_ENABLE, KVM_GUESTDBG_SINGLESTEP, KVM_IRQ_ROUTING_IRQCHIP, KVM_IRQ_ROUTING_MSI,
92     KVM_MEM_LOG_DIRTY_PAGES, KVM_MEM_READONLY, KVM_MSI_VALID_DEVID,
93 };
94 #[cfg(target_arch = "aarch64")]
95 use kvm_bindings::{
96     kvm_regs, user_fpsimd_state, user_pt_regs, KVM_GUESTDBG_USE_HW, KVM_NR_SPSR, KVM_REG_ARM64,
97     KVM_REG_ARM64_SYSREG, KVM_REG_ARM64_SYSREG_CRM_MASK, KVM_REG_ARM64_SYSREG_CRN_MASK,
98     KVM_REG_ARM64_SYSREG_OP0_MASK, KVM_REG_ARM64_SYSREG_OP1_MASK, KVM_REG_ARM64_SYSREG_OP2_MASK,
99     KVM_REG_ARM_CORE, KVM_REG_SIZE_U128, KVM_REG_SIZE_U32, KVM_REG_SIZE_U64,
100 };
101 #[cfg(target_arch = "riscv64")]
102 use kvm_bindings::{kvm_riscv_core, user_regs_struct, KVM_REG_RISCV_CORE};
103 #[cfg(feature = "tdx")]
104 use kvm_bindings::{kvm_run__bindgen_ty_1, KVMIO};
105 pub use kvm_ioctls::{Cap, Kvm, VcpuExit};
106 use thiserror::Error;
107 use vfio_ioctls::VfioDeviceFd;
108 #[cfg(feature = "tdx")]
109 use vmm_sys_util::{ioctl::ioctl_with_val, ioctl_ioc_nr, ioctl_iowr_nr};
110 pub use {kvm_bindings, kvm_ioctls};
111 
112 #[cfg(target_arch = "aarch64")]
113 use crate::arch::aarch64::regs;
114 #[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))]
115 use crate::RegList;
116 
117 #[cfg(target_arch = "x86_64")]
118 const KVM_CAP_SGX_ATTRIBUTE: u32 = 196;
119 
120 #[cfg(target_arch = "x86_64")]
121 use vmm_sys_util::ioctl_io_nr;
122 #[cfg(all(not(feature = "tdx"), target_arch = "x86_64"))]
123 use vmm_sys_util::ioctl_ioc_nr;
124 
125 #[cfg(target_arch = "x86_64")]
126 ioctl_io_nr!(KVM_NMI, kvm_bindings::KVMIO, 0x9a);
127 
128 #[cfg(feature = "tdx")]
129 const KVM_EXIT_TDX: u32 = 50;
130 #[cfg(feature = "tdx")]
131 const TDG_VP_VMCALL_GET_QUOTE: u64 = 0x10002;
132 #[cfg(feature = "tdx")]
133 const TDG_VP_VMCALL_SETUP_EVENT_NOTIFY_INTERRUPT: u64 = 0x10004;
134 #[cfg(feature = "tdx")]
135 const TDG_VP_VMCALL_SUCCESS: u64 = 0;
136 #[cfg(feature = "tdx")]
137 const TDG_VP_VMCALL_INVALID_OPERAND: u64 = 0x8000000000000000;
138 
139 #[cfg(feature = "tdx")]
140 ioctl_iowr_nr!(KVM_MEMORY_ENCRYPT_OP, KVMIO, 0xba, std::os::raw::c_ulong);
141 
142 #[cfg(feature = "tdx")]
143 #[repr(u32)]
144 enum TdxCommand {
145     Capabilities = 0,
146     InitVm,
147     InitVcpu,
148     InitMemRegion,
149     Finalize,
150 }
151 
152 #[cfg(feature = "tdx")]
153 pub enum TdxExitDetails {
154     GetQuote,
155     SetupEventNotifyInterrupt,
156 }
157 
158 #[cfg(feature = "tdx")]
159 pub enum TdxExitStatus {
160     Success,
161     InvalidOperand,
162 }
163 
164 #[cfg(feature = "tdx")]
165 const TDX_MAX_NR_CPUID_CONFIGS: usize = 6;
166 
167 #[cfg(feature = "tdx")]
168 #[repr(C)]
169 #[derive(Debug, Default)]
170 pub struct TdxCpuidConfig {
171     pub leaf: u32,
172     pub sub_leaf: u32,
173     pub eax: u32,
174     pub ebx: u32,
175     pub ecx: u32,
176     pub edx: u32,
177 }
178 
179 #[cfg(feature = "tdx")]
180 #[repr(C)]
181 #[derive(Debug, Default)]
182 pub struct TdxCapabilities {
183     pub attrs_fixed0: u64,
184     pub attrs_fixed1: u64,
185     pub xfam_fixed0: u64,
186     pub xfam_fixed1: u64,
187     pub nr_cpuid_configs: u32,
188     pub padding: u32,
189     pub cpuid_configs: [TdxCpuidConfig; TDX_MAX_NR_CPUID_CONFIGS],
190 }
191 
192 #[cfg(feature = "tdx")]
193 #[derive(Copy, Clone)]
194 pub struct KvmTdxExit {
195     pub type_: u32,
196     pub pad: u32,
197     pub u: KvmTdxExitU,
198 }
199 
200 #[cfg(feature = "tdx")]
201 #[repr(C)]
202 #[derive(Copy, Clone)]
203 pub union KvmTdxExitU {
204     pub vmcall: KvmTdxExitVmcall,
205 }
206 
207 #[cfg(feature = "tdx")]
208 #[repr(C)]
209 #[derive(Debug, Default, Copy, Clone, PartialEq)]
210 pub struct KvmTdxExitVmcall {
211     pub type_: u64,
212     pub subfunction: u64,
213     pub reg_mask: u64,
214     pub in_r12: u64,
215     pub in_r13: u64,
216     pub in_r14: u64,
217     pub in_r15: u64,
218     pub in_rbx: u64,
219     pub in_rdi: u64,
220     pub in_rsi: u64,
221     pub in_r8: u64,
222     pub in_r9: u64,
223     pub in_rdx: u64,
224     pub status_code: u64,
225     pub out_r11: u64,
226     pub out_r12: u64,
227     pub out_r13: u64,
228     pub out_r14: u64,
229     pub out_r15: u64,
230     pub out_rbx: u64,
231     pub out_rdi: u64,
232     pub out_rsi: u64,
233     pub out_r8: u64,
234     pub out_r9: u64,
235     pub out_rdx: u64,
236 }
237 
238 impl From<kvm_userspace_memory_region> for UserMemoryRegion {
239     fn from(region: kvm_userspace_memory_region) -> Self {
240         let mut flags = USER_MEMORY_REGION_READ;
241         if region.flags & KVM_MEM_READONLY == 0 {
242             flags |= USER_MEMORY_REGION_WRITE;
243         }
244         if region.flags & KVM_MEM_LOG_DIRTY_PAGES != 0 {
245             flags |= USER_MEMORY_REGION_LOG_DIRTY;
246         }
247 
248         UserMemoryRegion {
249             slot: region.slot,
250             guest_phys_addr: region.guest_phys_addr,
251             memory_size: region.memory_size,
252             userspace_addr: region.userspace_addr,
253             flags,
254         }
255     }
256 }
257 
258 impl From<UserMemoryRegion> for kvm_userspace_memory_region {
259     fn from(region: UserMemoryRegion) -> Self {
260         assert!(
261             region.flags & USER_MEMORY_REGION_READ != 0,
262             "KVM mapped memory is always readable"
263         );
264 
265         let mut flags = 0;
266         if region.flags & USER_MEMORY_REGION_WRITE == 0 {
267             flags |= KVM_MEM_READONLY;
268         }
269         if region.flags & USER_MEMORY_REGION_LOG_DIRTY != 0 {
270             flags |= KVM_MEM_LOG_DIRTY_PAGES;
271         }
272 
273         kvm_userspace_memory_region {
274             slot: region.slot,
275             guest_phys_addr: region.guest_phys_addr,
276             memory_size: region.memory_size,
277             userspace_addr: region.userspace_addr,
278             flags,
279         }
280     }
281 }
282 
283 impl From<kvm_mp_state> for MpState {
284     fn from(s: kvm_mp_state) -> Self {
285         MpState::Kvm(s)
286     }
287 }
288 
289 impl From<MpState> for kvm_mp_state {
290     fn from(ms: MpState) -> Self {
291         match ms {
292             MpState::Kvm(s) => s,
293             /* Needed in case other hypervisors are enabled */
294             #[allow(unreachable_patterns)]
295             _ => panic!("CpuState is not valid"),
296         }
297     }
298 }
299 
300 impl From<kvm_ioctls::IoEventAddress> for IoEventAddress {
301     fn from(a: kvm_ioctls::IoEventAddress) -> Self {
302         match a {
303             kvm_ioctls::IoEventAddress::Pio(x) => Self::Pio(x),
304             kvm_ioctls::IoEventAddress::Mmio(x) => Self::Mmio(x),
305         }
306     }
307 }
308 
309 impl From<IoEventAddress> for kvm_ioctls::IoEventAddress {
310     fn from(a: IoEventAddress) -> Self {
311         match a {
312             IoEventAddress::Pio(x) => Self::Pio(x),
313             IoEventAddress::Mmio(x) => Self::Mmio(x),
314         }
315     }
316 }
317 
318 impl From<VcpuKvmState> for CpuState {
319     fn from(s: VcpuKvmState) -> Self {
320         CpuState::Kvm(s)
321     }
322 }
323 
324 impl From<CpuState> for VcpuKvmState {
325     fn from(s: CpuState) -> Self {
326         match s {
327             CpuState::Kvm(s) => s,
328             /* Needed in case other hypervisors are enabled */
329             #[allow(unreachable_patterns)]
330             _ => panic!("CpuState is not valid"),
331         }
332     }
333 }
334 
335 #[cfg(target_arch = "x86_64")]
336 impl From<kvm_clock_data> for ClockData {
337     fn from(d: kvm_clock_data) -> Self {
338         ClockData::Kvm(d)
339     }
340 }
341 
342 #[cfg(target_arch = "x86_64")]
343 impl From<ClockData> for kvm_clock_data {
344     fn from(ms: ClockData) -> Self {
345         match ms {
346             ClockData::Kvm(s) => s,
347             /* Needed in case other hypervisors are enabled */
348             #[allow(unreachable_patterns)]
349             _ => panic!("CpuState is not valid"),
350         }
351     }
352 }
353 
354 impl From<kvm_bindings::kvm_one_reg> for crate::Register {
355     fn from(s: kvm_bindings::kvm_one_reg) -> Self {
356         crate::Register::Kvm(s)
357     }
358 }
359 
360 impl From<crate::Register> for kvm_bindings::kvm_one_reg {
361     fn from(e: crate::Register) -> Self {
362         match e {
363             crate::Register::Kvm(e) => e,
364             /* Needed in case other hypervisors are enabled */
365             #[allow(unreachable_patterns)]
366             _ => panic!("Register is not valid"),
367         }
368     }
369 }
370 
371 #[cfg(target_arch = "aarch64")]
372 impl From<kvm_bindings::kvm_vcpu_init> for crate::VcpuInit {
373     fn from(s: kvm_bindings::kvm_vcpu_init) -> Self {
374         crate::VcpuInit::Kvm(s)
375     }
376 }
377 
378 #[cfg(target_arch = "aarch64")]
379 impl From<crate::VcpuInit> for kvm_bindings::kvm_vcpu_init {
380     fn from(e: crate::VcpuInit) -> Self {
381         match e {
382             crate::VcpuInit::Kvm(e) => e,
383             /* Needed in case other hypervisors are enabled */
384             #[allow(unreachable_patterns)]
385             _ => panic!("VcpuInit is not valid"),
386         }
387     }
388 }
389 
390 #[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))]
391 impl From<kvm_bindings::RegList> for crate::RegList {
392     fn from(s: kvm_bindings::RegList) -> Self {
393         crate::RegList::Kvm(s)
394     }
395 }
396 
397 #[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))]
398 impl From<crate::RegList> for kvm_bindings::RegList {
399     fn from(e: crate::RegList) -> Self {
400         match e {
401             crate::RegList::Kvm(e) => e,
402             /* Needed in case other hypervisors are enabled */
403             #[allow(unreachable_patterns)]
404             _ => panic!("RegList is not valid"),
405         }
406     }
407 }
408 
409 #[cfg(not(target_arch = "riscv64"))]
410 impl From<kvm_bindings::kvm_regs> for crate::StandardRegisters {
411     fn from(s: kvm_bindings::kvm_regs) -> Self {
412         crate::StandardRegisters::Kvm(s)
413     }
414 }
415 
416 #[cfg(not(target_arch = "riscv64"))]
417 impl From<crate::StandardRegisters> for kvm_bindings::kvm_regs {
418     fn from(e: crate::StandardRegisters) -> Self {
419         match e {
420             crate::StandardRegisters::Kvm(e) => e,
421             /* Needed in case other hypervisors are enabled */
422             #[allow(unreachable_patterns)]
423             _ => panic!("StandardRegisters are not valid"),
424         }
425     }
426 }
427 
428 #[cfg(target_arch = "riscv64")]
429 impl From<kvm_bindings::kvm_riscv_core> for crate::StandardRegisters {
430     fn from(s: kvm_bindings::kvm_riscv_core) -> Self {
431         crate::StandardRegisters::Kvm(s)
432     }
433 }
434 
435 #[cfg(target_arch = "riscv64")]
436 impl From<crate::StandardRegisters> for kvm_bindings::kvm_riscv_core {
437     fn from(e: crate::StandardRegisters) -> Self {
438         match e {
439             crate::StandardRegisters::Kvm(e) => e,
440             /* Needed in case other hypervisors are enabled */
441             #[allow(unreachable_patterns)]
442             _ => panic!("StandardRegisters are not valid"),
443         }
444     }
445 }
446 
447 impl From<kvm_irq_routing_entry> for IrqRoutingEntry {
448     fn from(s: kvm_irq_routing_entry) -> Self {
449         IrqRoutingEntry::Kvm(s)
450     }
451 }
452 
453 impl From<IrqRoutingEntry> for kvm_irq_routing_entry {
454     fn from(e: IrqRoutingEntry) -> Self {
455         match e {
456             IrqRoutingEntry::Kvm(e) => e,
457             /* Needed in case other hypervisors are enabled */
458             #[allow(unreachable_patterns)]
459             _ => panic!("IrqRoutingEntry is not valid"),
460         }
461     }
462 }
463 
464 struct KvmDirtyLogSlot {
465     slot: u32,
466     guest_phys_addr: u64,
467     memory_size: u64,
468     userspace_addr: u64,
469 }
470 
471 /// Wrapper over KVM VM ioctls.
472 pub struct KvmVm {
473     fd: Arc<VmFd>,
474     #[cfg(target_arch = "x86_64")]
475     msrs: Vec<MsrEntry>,
476     dirty_log_slots: Arc<RwLock<HashMap<u32, KvmDirtyLogSlot>>>,
477 }
478 
479 impl KvmVm {
480     ///
481     /// Creates an emulated device in the kernel.
482     ///
483     /// See the documentation for `KVM_CREATE_DEVICE`.
484     fn create_device(&self, device: &mut CreateDevice) -> vm::Result<vfio_ioctls::VfioDeviceFd> {
485         let device_fd = self
486             .fd
487             .create_device(device)
488             .map_err(|e| vm::HypervisorVmError::CreateDevice(e.into()))?;
489         Ok(VfioDeviceFd::new_from_kvm(device_fd))
490     }
491     /// Checks if a particular `Cap` is available.
492     pub fn check_extension(&self, c: Cap) -> bool {
493         self.fd.check_extension(c)
494     }
495 }
496 
497 /// Implementation of Vm trait for KVM
498 ///
499 /// # Examples
500 ///
501 /// ```
502 /// # use hypervisor::kvm::KvmHypervisor;
503 /// # use std::sync::Arc;
504 /// let kvm = KvmHypervisor::new().unwrap();
505 /// let hypervisor = Arc::new(kvm);
506 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed");
507 /// ```
508 impl vm::Vm for KvmVm {
509     #[cfg(target_arch = "x86_64")]
510     ///
511     /// Sets the address of the one-page region in the VM's address space.
512     ///
513     fn set_identity_map_address(&self, address: u64) -> vm::Result<()> {
514         self.fd
515             .set_identity_map_address(address)
516             .map_err(|e| vm::HypervisorVmError::SetIdentityMapAddress(e.into()))
517     }
518 
519     #[cfg(target_arch = "x86_64")]
520     ///
521     /// Sets the address of the three-page region in the VM's address space.
522     ///
523     fn set_tss_address(&self, offset: usize) -> vm::Result<()> {
524         self.fd
525             .set_tss_address(offset)
526             .map_err(|e| vm::HypervisorVmError::SetTssAddress(e.into()))
527     }
528 
529     #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))]
530     ///
531     /// Creates an in-kernel interrupt controller.
532     ///
533     fn create_irq_chip(&self) -> vm::Result<()> {
534         self.fd
535             .create_irq_chip()
536             .map_err(|e| vm::HypervisorVmError::CreateIrq(e.into()))
537     }
538 
539     ///
540     /// Registers an event that will, when signaled, trigger the `gsi` IRQ.
541     ///
542     fn register_irqfd(&self, fd: &EventFd, gsi: u32) -> vm::Result<()> {
543         self.fd
544             .register_irqfd(fd, gsi)
545             .map_err(|e| vm::HypervisorVmError::RegisterIrqFd(e.into()))
546     }
547 
548     ///
549     /// Unregisters an event that will, when signaled, trigger the `gsi` IRQ.
550     ///
551     fn unregister_irqfd(&self, fd: &EventFd, gsi: u32) -> vm::Result<()> {
552         self.fd
553             .unregister_irqfd(fd, gsi)
554             .map_err(|e| vm::HypervisorVmError::UnregisterIrqFd(e.into()))
555     }
556 
557     ///
558     /// Creates a VcpuFd object from a vcpu RawFd.
559     ///
560     fn create_vcpu(
561         &self,
562         id: u8,
563         vm_ops: Option<Arc<dyn VmOps>>,
564     ) -> vm::Result<Arc<dyn cpu::Vcpu>> {
565         let fd = self
566             .fd
567             .create_vcpu(id as u64)
568             .map_err(|e| vm::HypervisorVmError::CreateVcpu(e.into()))?;
569         let vcpu = KvmVcpu {
570             fd: Arc::new(Mutex::new(fd)),
571             #[cfg(target_arch = "x86_64")]
572             msrs: self.msrs.clone(),
573             vm_ops,
574             #[cfg(target_arch = "x86_64")]
575             hyperv_synic: AtomicBool::new(false),
576         };
577         Ok(Arc::new(vcpu))
578     }
579 
580     #[cfg(target_arch = "aarch64")]
581     ///
582     /// Creates a virtual GIC device.
583     ///
584     fn create_vgic(&self, config: VgicConfig) -> vm::Result<Arc<Mutex<dyn Vgic>>> {
585         let gic_device = KvmGicV3Its::new(self, config)
586             .map_err(|e| vm::HypervisorVmError::CreateVgic(anyhow!("Vgic error {:?}", e)))?;
587         Ok(Arc::new(Mutex::new(gic_device)))
588     }
589 
590     #[cfg(target_arch = "riscv64")]
591     ///
592     /// Creates a virtual AIA device.
593     ///
594     fn create_vaia(&self, config: VaiaConfig) -> vm::Result<Arc<Mutex<dyn Vaia>>> {
595         let aia_device = KvmAiaImsics::new(self, config)
596             .map_err(|e| vm::HypervisorVmError::CreateVaia(anyhow!("Vaia error {:?}", e)))?;
597         Ok(Arc::new(Mutex::new(aia_device)))
598     }
599 
600     ///
601     /// Registers an event to be signaled whenever a certain address is written to.
602     ///
603     fn register_ioevent(
604         &self,
605         fd: &EventFd,
606         addr: &IoEventAddress,
607         datamatch: Option<vm::DataMatch>,
608     ) -> vm::Result<()> {
609         let addr = &kvm_ioctls::IoEventAddress::from(*addr);
610         if let Some(dm) = datamatch {
611             match dm {
612                 vm::DataMatch::DataMatch32(kvm_dm32) => self
613                     .fd
614                     .register_ioevent(fd, addr, kvm_dm32)
615                     .map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into())),
616                 vm::DataMatch::DataMatch64(kvm_dm64) => self
617                     .fd
618                     .register_ioevent(fd, addr, kvm_dm64)
619                     .map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into())),
620             }
621         } else {
622             self.fd
623                 .register_ioevent(fd, addr, NoDatamatch)
624                 .map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into()))
625         }
626     }
627 
628     ///
629     /// Unregisters an event from a certain address it has been previously registered to.
630     ///
631     fn unregister_ioevent(&self, fd: &EventFd, addr: &IoEventAddress) -> vm::Result<()> {
632         let addr = &kvm_ioctls::IoEventAddress::from(*addr);
633         self.fd
634             .unregister_ioevent(fd, addr, NoDatamatch)
635             .map_err(|e| vm::HypervisorVmError::UnregisterIoEvent(e.into()))
636     }
637 
638     ///
639     /// Constructs a routing entry
640     ///
641     fn make_routing_entry(&self, gsi: u32, config: &InterruptSourceConfig) -> IrqRoutingEntry {
642         match &config {
643             InterruptSourceConfig::MsiIrq(cfg) => {
644                 let mut kvm_route = kvm_irq_routing_entry {
645                     gsi,
646                     type_: KVM_IRQ_ROUTING_MSI,
647                     ..Default::default()
648                 };
649 
650                 kvm_route.u.msi.address_lo = cfg.low_addr;
651                 kvm_route.u.msi.address_hi = cfg.high_addr;
652                 kvm_route.u.msi.data = cfg.data;
653 
654                 if self.check_extension(crate::kvm::Cap::MsiDevid) {
655                     // On AArch64, there is limitation on the range of the 'devid',
656                     // it cannot be greater than 65536 (the max of u16).
657                     //
658                     // BDF cannot be used directly, because 'segment' is in high
659                     // 16 bits. The layout of the u32 BDF is:
660                     // |---- 16 bits ----|-- 8 bits --|-- 5 bits --|-- 3 bits --|
661                     // |      segment    |     bus    |   device   |  function  |
662                     //
663                     // Now that we support 1 bus only in a segment, we can build a
664                     // 'devid' by replacing the 'bus' bits with the low 8 bits of
665                     // 'segment' data.
666                     // This way we can resolve the range checking problem and give
667                     // different `devid` to all the devices. Limitation is that at
668                     // most 256 segments can be supported.
669                     //
670                     let modified_devid = ((cfg.devid & 0x00ff_0000) >> 8) | cfg.devid & 0xff;
671 
672                     kvm_route.flags = KVM_MSI_VALID_DEVID;
673                     kvm_route.u.msi.__bindgen_anon_1.devid = modified_devid;
674                 }
675                 kvm_route.into()
676             }
677             InterruptSourceConfig::LegacyIrq(cfg) => {
678                 let mut kvm_route = kvm_irq_routing_entry {
679                     gsi,
680                     type_: KVM_IRQ_ROUTING_IRQCHIP,
681                     ..Default::default()
682                 };
683                 kvm_route.u.irqchip.irqchip = cfg.irqchip;
684                 kvm_route.u.irqchip.pin = cfg.pin;
685 
686                 kvm_route.into()
687             }
688         }
689     }
690 
691     ///
692     /// Sets the GSI routing table entries, overwriting any previously set
693     /// entries, as per the `KVM_SET_GSI_ROUTING` ioctl.
694     ///
695     fn set_gsi_routing(&self, entries: &[IrqRoutingEntry]) -> vm::Result<()> {
696         let mut irq_routing =
697             vec_with_array_field::<kvm_irq_routing, kvm_irq_routing_entry>(entries.len());
698         irq_routing[0].nr = entries.len() as u32;
699         irq_routing[0].flags = 0;
700         let entries: Vec<kvm_irq_routing_entry> = entries
701             .iter()
702             .map(|entry| match entry {
703                 IrqRoutingEntry::Kvm(e) => *e,
704                 #[allow(unreachable_patterns)]
705                 _ => panic!("IrqRoutingEntry type is wrong"),
706             })
707             .collect();
708 
709         // SAFETY: irq_routing initialized with entries.len() and now it is being turned into
710         // entries_slice with entries.len() again. It is guaranteed to be large enough to hold
711         // everything from entries.
712         unsafe {
713             let entries_slice: &mut [kvm_irq_routing_entry] =
714                 irq_routing[0].entries.as_mut_slice(entries.len());
715             entries_slice.copy_from_slice(&entries);
716         }
717 
718         self.fd
719             .set_gsi_routing(&irq_routing[0])
720             .map_err(|e| vm::HypervisorVmError::SetGsiRouting(e.into()))
721     }
722 
723     ///
724     /// Creates a memory region structure that can be used with {create/remove}_user_memory_region
725     ///
726     fn make_user_memory_region(
727         &self,
728         slot: u32,
729         guest_phys_addr: u64,
730         memory_size: u64,
731         userspace_addr: u64,
732         readonly: bool,
733         log_dirty_pages: bool,
734     ) -> UserMemoryRegion {
735         kvm_userspace_memory_region {
736             slot,
737             guest_phys_addr,
738             memory_size,
739             userspace_addr,
740             flags: if readonly { KVM_MEM_READONLY } else { 0 }
741                 | if log_dirty_pages {
742                     KVM_MEM_LOG_DIRTY_PAGES
743                 } else {
744                     0
745                 },
746         }
747         .into()
748     }
749 
750     ///
751     /// Creates a guest physical memory region.
752     ///
753     fn create_user_memory_region(&self, user_memory_region: UserMemoryRegion) -> vm::Result<()> {
754         let mut region: kvm_userspace_memory_region = user_memory_region.into();
755 
756         if (region.flags & KVM_MEM_LOG_DIRTY_PAGES) != 0 {
757             if (region.flags & KVM_MEM_READONLY) != 0 {
758                 return Err(vm::HypervisorVmError::CreateUserMemory(anyhow!(
759                     "Error creating regions with both 'dirty-pages-log' and 'read-only'."
760                 )));
761             }
762 
763             // Keep track of the regions that need dirty pages log
764             self.dirty_log_slots.write().unwrap().insert(
765                 region.slot,
766                 KvmDirtyLogSlot {
767                     slot: region.slot,
768                     guest_phys_addr: region.guest_phys_addr,
769                     memory_size: region.memory_size,
770                     userspace_addr: region.userspace_addr,
771                 },
772             );
773 
774             // Always create guest physical memory region without `KVM_MEM_LOG_DIRTY_PAGES`.
775             // For regions that need this flag, dirty pages log will be turned on in `start_dirty_log`.
776             region.flags = 0;
777         }
778 
779         // SAFETY: Safe because guest regions are guaranteed not to overlap.
780         unsafe {
781             self.fd
782                 .set_user_memory_region(region)
783                 .map_err(|e| vm::HypervisorVmError::CreateUserMemory(e.into()))
784         }
785     }
786 
787     ///
788     /// Removes a guest physical memory region.
789     ///
790     fn remove_user_memory_region(&self, user_memory_region: UserMemoryRegion) -> vm::Result<()> {
791         let mut region: kvm_userspace_memory_region = user_memory_region.into();
792 
793         // Remove the corresponding entry from "self.dirty_log_slots" if needed
794         self.dirty_log_slots.write().unwrap().remove(&region.slot);
795 
796         // Setting the size to 0 means "remove"
797         region.memory_size = 0;
798         // SAFETY: Safe because guest regions are guaranteed not to overlap.
799         unsafe {
800             self.fd
801                 .set_user_memory_region(region)
802                 .map_err(|e| vm::HypervisorVmError::RemoveUserMemory(e.into()))
803         }
804     }
805 
806     ///
807     /// Returns the preferred CPU target type which can be emulated by KVM on underlying host.
808     ///
809     #[cfg(target_arch = "aarch64")]
810     fn get_preferred_target(&self, kvi: &mut crate::VcpuInit) -> vm::Result<()> {
811         let mut kvm_kvi: kvm_bindings::kvm_vcpu_init = (*kvi).into();
812         self.fd
813             .get_preferred_target(&mut kvm_kvi)
814             .map_err(|e| vm::HypervisorVmError::GetPreferredTarget(e.into()))?;
815         *kvi = kvm_kvi.into();
816         Ok(())
817     }
818 
819     #[cfg(target_arch = "x86_64")]
820     fn enable_split_irq(&self) -> vm::Result<()> {
821         // Create split irqchip
822         // Only the local APIC is emulated in kernel, both PICs and IOAPIC
823         // are not.
824         let mut cap = kvm_enable_cap {
825             cap: KVM_CAP_SPLIT_IRQCHIP,
826             ..Default::default()
827         };
828         cap.args[0] = NUM_IOAPIC_PINS as u64;
829         self.fd
830             .enable_cap(&cap)
831             .map_err(|e| vm::HypervisorVmError::EnableSplitIrq(e.into()))?;
832         Ok(())
833     }
834 
835     #[cfg(target_arch = "x86_64")]
836     fn enable_sgx_attribute(&self, file: File) -> vm::Result<()> {
837         let mut cap = kvm_enable_cap {
838             cap: KVM_CAP_SGX_ATTRIBUTE,
839             ..Default::default()
840         };
841         cap.args[0] = file.as_raw_fd() as u64;
842         self.fd
843             .enable_cap(&cap)
844             .map_err(|e| vm::HypervisorVmError::EnableSgxAttribute(e.into()))?;
845         Ok(())
846     }
847 
848     /// Retrieve guest clock.
849     #[cfg(target_arch = "x86_64")]
850     fn get_clock(&self) -> vm::Result<ClockData> {
851         Ok(self
852             .fd
853             .get_clock()
854             .map_err(|e| vm::HypervisorVmError::GetClock(e.into()))?
855             .into())
856     }
857 
858     /// Set guest clock.
859     #[cfg(target_arch = "x86_64")]
860     fn set_clock(&self, data: &ClockData) -> vm::Result<()> {
861         let data = (*data).into();
862         self.fd
863             .set_clock(&data)
864             .map_err(|e| vm::HypervisorVmError::SetClock(e.into()))
865     }
866 
867     /// Create a device that is used for passthrough
868     fn create_passthrough_device(&self) -> vm::Result<VfioDeviceFd> {
869         let mut vfio_dev = kvm_create_device {
870             type_: kvm_device_type_KVM_DEV_TYPE_VFIO,
871             fd: 0,
872             flags: 0,
873         };
874 
875         self.create_device(&mut vfio_dev)
876             .map_err(|e| vm::HypervisorVmError::CreatePassthroughDevice(e.into()))
877     }
878 
879     ///
880     /// Start logging dirty pages
881     ///
882     fn start_dirty_log(&self) -> vm::Result<()> {
883         let dirty_log_slots = self.dirty_log_slots.read().unwrap();
884         for (_, s) in dirty_log_slots.iter() {
885             let region = kvm_userspace_memory_region {
886                 slot: s.slot,
887                 guest_phys_addr: s.guest_phys_addr,
888                 memory_size: s.memory_size,
889                 userspace_addr: s.userspace_addr,
890                 flags: KVM_MEM_LOG_DIRTY_PAGES,
891             };
892             // SAFETY: Safe because guest regions are guaranteed not to overlap.
893             unsafe {
894                 self.fd
895                     .set_user_memory_region(region)
896                     .map_err(|e| vm::HypervisorVmError::StartDirtyLog(e.into()))?;
897             }
898         }
899 
900         Ok(())
901     }
902 
903     ///
904     /// Stop logging dirty pages
905     ///
906     fn stop_dirty_log(&self) -> vm::Result<()> {
907         let dirty_log_slots = self.dirty_log_slots.read().unwrap();
908         for (_, s) in dirty_log_slots.iter() {
909             let region = kvm_userspace_memory_region {
910                 slot: s.slot,
911                 guest_phys_addr: s.guest_phys_addr,
912                 memory_size: s.memory_size,
913                 userspace_addr: s.userspace_addr,
914                 flags: 0,
915             };
916             // SAFETY: Safe because guest regions are guaranteed not to overlap.
917             unsafe {
918                 self.fd
919                     .set_user_memory_region(region)
920                     .map_err(|e| vm::HypervisorVmError::StartDirtyLog(e.into()))?;
921             }
922         }
923 
924         Ok(())
925     }
926 
927     ///
928     /// Get dirty pages bitmap (one bit per page)
929     ///
930     fn get_dirty_log(&self, slot: u32, _base_gpa: u64, memory_size: u64) -> vm::Result<Vec<u64>> {
931         self.fd
932             .get_dirty_log(slot, memory_size as usize)
933             .map_err(|e| vm::HypervisorVmError::GetDirtyLog(e.into()))
934     }
935 
936     ///
937     /// Initialize TDX for this VM
938     ///
939     #[cfg(feature = "tdx")]
940     fn tdx_init(&self, cpuid: &[CpuIdEntry], max_vcpus: u32) -> vm::Result<()> {
941         const TDX_ATTR_SEPT_VE_DISABLE: usize = 28;
942 
943         let mut cpuid: Vec<kvm_bindings::kvm_cpuid_entry2> =
944             cpuid.iter().map(|e| (*e).into()).collect();
945         cpuid.resize(256, kvm_bindings::kvm_cpuid_entry2::default());
946 
947         #[repr(C)]
948         struct TdxInitVm {
949             attributes: u64,
950             max_vcpus: u32,
951             padding: u32,
952             mrconfigid: [u64; 6],
953             mrowner: [u64; 6],
954             mrownerconfig: [u64; 6],
955             cpuid_nent: u32,
956             cpuid_padding: u32,
957             cpuid_entries: [kvm_bindings::kvm_cpuid_entry2; 256],
958         }
959         let data = TdxInitVm {
960             attributes: 1 << TDX_ATTR_SEPT_VE_DISABLE,
961             max_vcpus,
962             padding: 0,
963             mrconfigid: [0; 6],
964             mrowner: [0; 6],
965             mrownerconfig: [0; 6],
966             cpuid_nent: cpuid.len() as u32,
967             cpuid_padding: 0,
968             cpuid_entries: cpuid.as_slice().try_into().unwrap(),
969         };
970 
971         tdx_command(
972             &self.fd.as_raw_fd(),
973             TdxCommand::InitVm,
974             0,
975             &data as *const _ as u64,
976         )
977         .map_err(vm::HypervisorVmError::InitializeTdx)
978     }
979 
980     ///
981     /// Finalize the TDX setup for this VM
982     ///
983     #[cfg(feature = "tdx")]
984     fn tdx_finalize(&self) -> vm::Result<()> {
985         tdx_command(&self.fd.as_raw_fd(), TdxCommand::Finalize, 0, 0)
986             .map_err(vm::HypervisorVmError::FinalizeTdx)
987     }
988 
989     ///
990     /// Initialize memory regions for the TDX VM
991     ///
992     #[cfg(feature = "tdx")]
993     fn tdx_init_memory_region(
994         &self,
995         host_address: u64,
996         guest_address: u64,
997         size: u64,
998         measure: bool,
999     ) -> vm::Result<()> {
1000         #[repr(C)]
1001         struct TdxInitMemRegion {
1002             host_address: u64,
1003             guest_address: u64,
1004             pages: u64,
1005         }
1006         let data = TdxInitMemRegion {
1007             host_address,
1008             guest_address,
1009             pages: size / 4096,
1010         };
1011 
1012         tdx_command(
1013             &self.fd.as_raw_fd(),
1014             TdxCommand::InitMemRegion,
1015             u32::from(measure),
1016             &data as *const _ as u64,
1017         )
1018         .map_err(vm::HypervisorVmError::InitMemRegionTdx)
1019     }
1020 
1021     /// Downcast to the underlying KvmVm type
1022     fn as_any(&self) -> &dyn Any {
1023         self
1024     }
1025 }
1026 
1027 #[cfg(feature = "tdx")]
1028 fn tdx_command(
1029     fd: &RawFd,
1030     command: TdxCommand,
1031     flags: u32,
1032     data: u64,
1033 ) -> std::result::Result<(), std::io::Error> {
1034     #[repr(C)]
1035     struct TdxIoctlCmd {
1036         command: TdxCommand,
1037         flags: u32,
1038         data: u64,
1039         error: u64,
1040         unused: u64,
1041     }
1042     let cmd = TdxIoctlCmd {
1043         command,
1044         flags,
1045         data,
1046         error: 0,
1047         unused: 0,
1048     };
1049     // SAFETY: FFI call. All input parameters are valid.
1050     let ret = unsafe {
1051         ioctl_with_val(
1052             fd,
1053             KVM_MEMORY_ENCRYPT_OP(),
1054             &cmd as *const TdxIoctlCmd as std::os::raw::c_ulong,
1055         )
1056     };
1057 
1058     if ret < 0 {
1059         return Err(std::io::Error::last_os_error());
1060     }
1061     Ok(())
1062 }
1063 
1064 /// Wrapper over KVM system ioctls.
1065 pub struct KvmHypervisor {
1066     kvm: Kvm,
1067 }
1068 
1069 impl KvmHypervisor {
1070     #[cfg(target_arch = "x86_64")]
1071     ///
1072     /// Retrieve the list of MSRs supported by the hypervisor.
1073     ///
1074     fn get_msr_list(&self) -> hypervisor::Result<MsrList> {
1075         self.kvm
1076             .get_msr_index_list()
1077             .map_err(|e| hypervisor::HypervisorError::GetMsrList(e.into()))
1078     }
1079 }
1080 
1081 /// Enum for KVM related error
1082 #[derive(Debug, Error)]
1083 pub enum KvmError {
1084     #[error("Capability missing: {0:?}")]
1085     CapabilityMissing(Cap),
1086 }
1087 
1088 pub type KvmResult<T> = result::Result<T, KvmError>;
1089 
1090 impl KvmHypervisor {
1091     /// Create a hypervisor based on Kvm
1092     #[allow(clippy::new_ret_no_self)]
1093     pub fn new() -> hypervisor::Result<Arc<dyn hypervisor::Hypervisor>> {
1094         let kvm_obj = Kvm::new().map_err(|e| hypervisor::HypervisorError::VmCreate(e.into()))?;
1095         let api_version = kvm_obj.get_api_version();
1096 
1097         if api_version != kvm_bindings::KVM_API_VERSION as i32 {
1098             return Err(hypervisor::HypervisorError::IncompatibleApiVersion);
1099         }
1100 
1101         Ok(Arc::new(KvmHypervisor { kvm: kvm_obj }))
1102     }
1103 
1104     /// Check if the hypervisor is available
1105     pub fn is_available() -> hypervisor::Result<bool> {
1106         match std::fs::metadata("/dev/kvm") {
1107             Ok(_) => Ok(true),
1108             Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(false),
1109             Err(err) => Err(hypervisor::HypervisorError::HypervisorAvailableCheck(
1110                 err.into(),
1111             )),
1112         }
1113     }
1114 }
1115 
1116 /// Implementation of Hypervisor trait for KVM
1117 ///
1118 /// # Examples
1119 ///
1120 /// ```
1121 /// # use hypervisor::kvm::KvmHypervisor;
1122 /// # use std::sync::Arc;
1123 /// let kvm = KvmHypervisor::new().unwrap();
1124 /// let hypervisor = Arc::new(kvm);
1125 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed");
1126 /// ```
1127 impl hypervisor::Hypervisor for KvmHypervisor {
1128     ///
1129     /// Returns the type of the hypervisor
1130     ///
1131     fn hypervisor_type(&self) -> HypervisorType {
1132         HypervisorType::Kvm
1133     }
1134 
1135     ///
1136     /// Create a Vm of a specific type using the underlying hypervisor, passing memory size
1137     /// Return a hypervisor-agnostic Vm trait object
1138     ///
1139     /// # Examples
1140     ///
1141     /// ```
1142     /// # use hypervisor::kvm::KvmHypervisor;
1143     /// use hypervisor::kvm::KvmVm;
1144     /// let hypervisor = KvmHypervisor::new().unwrap();
1145     /// let vm = hypervisor.create_vm_with_type_and_memory(0).unwrap();
1146     /// ```
1147     fn create_vm_with_type_and_memory(
1148         &self,
1149         vm_type: u64,
1150         #[cfg(feature = "sev_snp")] _mem_size: u64,
1151     ) -> hypervisor::Result<Arc<dyn vm::Vm>> {
1152         self.create_vm_with_type(vm_type)
1153     }
1154 
1155     /// Create a KVM vm object of a specific VM type and return the object as Vm trait object
1156     ///
1157     /// # Examples
1158     ///
1159     /// ```
1160     /// # use hypervisor::kvm::KvmHypervisor;
1161     /// use hypervisor::kvm::KvmVm;
1162     /// let hypervisor = KvmHypervisor::new().unwrap();
1163     /// let vm = hypervisor.create_vm_with_type(0).unwrap();
1164     /// ```
1165     fn create_vm_with_type(&self, vm_type: u64) -> hypervisor::Result<Arc<dyn vm::Vm>> {
1166         let fd: VmFd;
1167         loop {
1168             match self.kvm.create_vm_with_type(vm_type) {
1169                 Ok(res) => fd = res,
1170                 Err(e) => {
1171                     if e.errno() == libc::EINTR {
1172                         // If the error returned is EINTR, which means the
1173                         // ioctl has been interrupted, we have to retry as
1174                         // this can't be considered as a regular error.
1175                         continue;
1176                     } else {
1177                         return Err(hypervisor::HypervisorError::VmCreate(e.into()));
1178                     }
1179                 }
1180             }
1181             break;
1182         }
1183 
1184         let vm_fd = Arc::new(fd);
1185 
1186         #[cfg(target_arch = "x86_64")]
1187         {
1188             let msr_list = self.get_msr_list()?;
1189             let num_msrs = msr_list.as_fam_struct_ref().nmsrs as usize;
1190             let mut msrs: Vec<MsrEntry> = vec![
1191                 MsrEntry {
1192                     ..Default::default()
1193                 };
1194                 num_msrs
1195             ];
1196             let indices = msr_list.as_slice();
1197             for (pos, index) in indices.iter().enumerate() {
1198                 msrs[pos].index = *index;
1199             }
1200 
1201             Ok(Arc::new(KvmVm {
1202                 fd: vm_fd,
1203                 msrs,
1204                 dirty_log_slots: Arc::new(RwLock::new(HashMap::new())),
1205             }))
1206         }
1207 
1208         #[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))]
1209         {
1210             Ok(Arc::new(KvmVm {
1211                 fd: vm_fd,
1212                 dirty_log_slots: Arc::new(RwLock::new(HashMap::new())),
1213             }))
1214         }
1215     }
1216 
1217     /// Create a KVM vm object and return the object as Vm trait object
1218     ///
1219     /// # Examples
1220     ///
1221     /// ```
1222     /// # use hypervisor::kvm::KvmHypervisor;
1223     /// use hypervisor::kvm::KvmVm;
1224     /// let hypervisor = KvmHypervisor::new().unwrap();
1225     /// let vm = hypervisor.create_vm().unwrap();
1226     /// ```
1227     fn create_vm(&self) -> hypervisor::Result<Arc<dyn vm::Vm>> {
1228         #[allow(unused_mut)]
1229         let mut vm_type: u64 = 0; // Create with default platform type
1230 
1231         // When KVM supports Cap::ArmVmIPASize, it is better to get the IPA
1232         // size from the host and use that when creating the VM, which may
1233         // avoid unnecessary VM creation failures.
1234         #[cfg(target_arch = "aarch64")]
1235         if self.kvm.check_extension(Cap::ArmVmIPASize) {
1236             vm_type = self.kvm.get_host_ipa_limit().try_into().unwrap();
1237         }
1238 
1239         self.create_vm_with_type(vm_type)
1240     }
1241 
1242     fn check_required_extensions(&self) -> hypervisor::Result<()> {
1243         check_required_kvm_extensions(&self.kvm)
1244             .map_err(|e| hypervisor::HypervisorError::CheckExtensions(e.into()))
1245     }
1246 
1247     #[cfg(target_arch = "x86_64")]
1248     ///
1249     /// X86 specific call to get the system supported CPUID values.
1250     ///
1251     fn get_supported_cpuid(&self) -> hypervisor::Result<Vec<CpuIdEntry>> {
1252         let kvm_cpuid = self
1253             .kvm
1254             .get_supported_cpuid(kvm_bindings::KVM_MAX_CPUID_ENTRIES)
1255             .map_err(|e| hypervisor::HypervisorError::GetCpuId(e.into()))?;
1256 
1257         let v = kvm_cpuid.as_slice().iter().map(|e| (*e).into()).collect();
1258 
1259         Ok(v)
1260     }
1261 
1262     #[cfg(target_arch = "aarch64")]
1263     ///
1264     /// Retrieve AArch64 host maximum IPA size supported by KVM.
1265     ///
1266     fn get_host_ipa_limit(&self) -> i32 {
1267         self.kvm.get_host_ipa_limit()
1268     }
1269 
1270     ///
1271     /// Retrieve TDX capabilities
1272     ///
1273     #[cfg(feature = "tdx")]
1274     fn tdx_capabilities(&self) -> hypervisor::Result<TdxCapabilities> {
1275         let data = TdxCapabilities {
1276             nr_cpuid_configs: TDX_MAX_NR_CPUID_CONFIGS as u32,
1277             ..Default::default()
1278         };
1279 
1280         tdx_command(
1281             &self.kvm.as_raw_fd(),
1282             TdxCommand::Capabilities,
1283             0,
1284             &data as *const _ as u64,
1285         )
1286         .map_err(|e| hypervisor::HypervisorError::TdxCapabilities(e.into()))?;
1287 
1288         Ok(data)
1289     }
1290 
1291     #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))]
1292     ///
1293     /// Get the number of supported hardware breakpoints
1294     ///
1295     fn get_guest_debug_hw_bps(&self) -> usize {
1296         #[cfg(target_arch = "x86_64")]
1297         {
1298             4
1299         }
1300         #[cfg(target_arch = "aarch64")]
1301         {
1302             self.kvm.get_guest_debug_hw_bps() as usize
1303         }
1304     }
1305 
1306     /// Get maximum number of vCPUs
1307     fn get_max_vcpus(&self) -> u32 {
1308         self.kvm.get_max_vcpus().min(u32::MAX as usize) as u32
1309     }
1310 }
1311 
1312 /// Vcpu struct for KVM
1313 pub struct KvmVcpu {
1314     fd: Arc<Mutex<VcpuFd>>,
1315     #[cfg(target_arch = "x86_64")]
1316     msrs: Vec<MsrEntry>,
1317     vm_ops: Option<Arc<dyn vm::VmOps>>,
1318     #[cfg(target_arch = "x86_64")]
1319     hyperv_synic: AtomicBool,
1320 }
1321 
1322 /// Implementation of Vcpu trait for KVM
1323 ///
1324 /// # Examples
1325 ///
1326 /// ```
1327 /// # use hypervisor::kvm::KvmHypervisor;
1328 /// # use std::sync::Arc;
1329 /// let kvm = KvmHypervisor::new().unwrap();
1330 /// let hypervisor = Arc::new(kvm);
1331 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed");
1332 /// let vcpu = vm.create_vcpu(0, None).unwrap();
1333 /// ```
1334 impl cpu::Vcpu for KvmVcpu {
1335     ///
1336     /// Returns StandardRegisters with default value set
1337     ///
1338     fn create_standard_regs(&self) -> StandardRegisters {
1339         #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))]
1340         {
1341             kvm_bindings::kvm_regs::default().into()
1342         }
1343         #[cfg(target_arch = "riscv64")]
1344         {
1345             kvm_bindings::kvm_riscv_core::default().into()
1346         }
1347     }
1348     #[cfg(target_arch = "x86_64")]
1349     ///
1350     /// Returns the vCPU general purpose registers.
1351     ///
1352     fn get_regs(&self) -> cpu::Result<StandardRegisters> {
1353         Ok(self
1354             .fd
1355             .lock()
1356             .unwrap()
1357             .get_regs()
1358             .map_err(|e| cpu::HypervisorCpuError::GetStandardRegs(e.into()))?
1359             .into())
1360     }
1361 
1362     ///
1363     /// Returns the vCPU general purpose registers.
1364     /// The `KVM_GET_REGS` ioctl is not available on AArch64, `KVM_GET_ONE_REG`
1365     /// is used to get registers one by one.
1366     ///
1367     #[cfg(target_arch = "aarch64")]
1368     fn get_regs(&self) -> cpu::Result<StandardRegisters> {
1369         let mut state = kvm_regs::default();
1370         let mut off = offset_of!(user_pt_regs, regs);
1371         // There are 31 user_pt_regs:
1372         // https://elixir.free-electrons.com/linux/v4.14.174/source/arch/arm64/include/uapi/asm/ptrace.h#L72
1373         // These actually are the general-purpose registers of the Armv8-a
1374         // architecture (i.e x0-x30 if used as a 64bit register or w0-30 when used as a 32bit register).
1375         for i in 0..31 {
1376             let mut bytes = [0_u8; 8];
1377             self.fd
1378                 .lock()
1379                 .unwrap()
1380                 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), &mut bytes)
1381                 .map_err(|e| cpu::HypervisorCpuError::GetAarchCoreRegister(e.into()))?;
1382             state.regs.regs[i] = u64::from_le_bytes(bytes);
1383             off += std::mem::size_of::<u64>();
1384         }
1385 
1386         // We are now entering the "Other register" section of the ARMv8-a architecture.
1387         // First one, stack pointer.
1388         let off = offset_of!(user_pt_regs, sp);
1389         let mut bytes = [0_u8; 8];
1390         self.fd
1391             .lock()
1392             .unwrap()
1393             .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), &mut bytes)
1394             .map_err(|e| cpu::HypervisorCpuError::GetAarchCoreRegister(e.into()))?;
1395         state.regs.sp = u64::from_le_bytes(bytes);
1396 
1397         // Second one, the program counter.
1398         let off = offset_of!(user_pt_regs, pc);
1399         let mut bytes = [0_u8; 8];
1400         self.fd
1401             .lock()
1402             .unwrap()
1403             .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), &mut bytes)
1404             .map_err(|e| cpu::HypervisorCpuError::GetAarchCoreRegister(e.into()))?;
1405         state.regs.pc = u64::from_le_bytes(bytes);
1406 
1407         // Next is the processor state.
1408         let off = offset_of!(user_pt_regs, pstate);
1409         let mut bytes = [0_u8; 8];
1410         self.fd
1411             .lock()
1412             .unwrap()
1413             .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), &mut bytes)
1414             .map_err(|e| cpu::HypervisorCpuError::GetAarchCoreRegister(e.into()))?;
1415         state.regs.pstate = u64::from_le_bytes(bytes);
1416 
1417         // The stack pointer associated with EL1
1418         let off = offset_of!(kvm_regs, sp_el1);
1419         let mut bytes = [0_u8; 8];
1420         self.fd
1421             .lock()
1422             .unwrap()
1423             .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), &mut bytes)
1424             .map_err(|e| cpu::HypervisorCpuError::GetAarchCoreRegister(e.into()))?;
1425         state.sp_el1 = u64::from_le_bytes(bytes);
1426 
1427         // Exception Link Register for EL1, when taking an exception to EL1, this register
1428         // holds the address to which to return afterwards.
1429         let off = offset_of!(kvm_regs, elr_el1);
1430         let mut bytes = [0_u8; 8];
1431         self.fd
1432             .lock()
1433             .unwrap()
1434             .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), &mut bytes)
1435             .map_err(|e| cpu::HypervisorCpuError::GetAarchCoreRegister(e.into()))?;
1436         state.elr_el1 = u64::from_le_bytes(bytes);
1437 
1438         // Saved Program Status Registers, there are 5 of them used in the kernel.
1439         let mut off = offset_of!(kvm_regs, spsr);
1440         for i in 0..KVM_NR_SPSR as usize {
1441             let mut bytes = [0_u8; 8];
1442             self.fd
1443                 .lock()
1444                 .unwrap()
1445                 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), &mut bytes)
1446                 .map_err(|e| cpu::HypervisorCpuError::GetAarchCoreRegister(e.into()))?;
1447             state.spsr[i] = u64::from_le_bytes(bytes);
1448             off += std::mem::size_of::<u64>();
1449         }
1450 
1451         // Now moving on to floating point registers which are stored in the user_fpsimd_state in the kernel:
1452         // https://elixir.free-electrons.com/linux/v4.9.62/source/arch/arm64/include/uapi/asm/kvm.h#L53
1453         let mut off = offset_of!(kvm_regs, fp_regs) + offset_of!(user_fpsimd_state, vregs);
1454         for i in 0..32 {
1455             let mut bytes = [0_u8; 16];
1456             self.fd
1457                 .lock()
1458                 .unwrap()
1459                 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U128, off), &mut bytes)
1460                 .map_err(|e| cpu::HypervisorCpuError::GetAarchCoreRegister(e.into()))?;
1461             state.fp_regs.vregs[i] = u128::from_le_bytes(bytes);
1462             off += mem::size_of::<u128>();
1463         }
1464 
1465         // Floating-point Status Register
1466         let off = offset_of!(kvm_regs, fp_regs) + offset_of!(user_fpsimd_state, fpsr);
1467         let mut bytes = [0_u8; 4];
1468         self.fd
1469             .lock()
1470             .unwrap()
1471             .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U32, off), &mut bytes)
1472             .map_err(|e| cpu::HypervisorCpuError::GetAarchCoreRegister(e.into()))?;
1473         state.fp_regs.fpsr = u32::from_le_bytes(bytes);
1474 
1475         // Floating-point Control Register
1476         let off = offset_of!(kvm_regs, fp_regs) + offset_of!(user_fpsimd_state, fpcr);
1477         let mut bytes = [0_u8; 4];
1478         self.fd
1479             .lock()
1480             .unwrap()
1481             .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U32, off), &mut bytes)
1482             .map_err(|e| cpu::HypervisorCpuError::GetAarchCoreRegister(e.into()))?;
1483         state.fp_regs.fpcr = u32::from_le_bytes(bytes);
1484         Ok(state.into())
1485     }
1486 
1487     #[cfg(target_arch = "riscv64")]
1488     ///
1489     /// Returns the RISC-V vCPU core registers.
1490     /// The `KVM_GET_REGS` ioctl is not available on RISC-V 64-bit,
1491     /// `KVM_GET_ONE_REG` is used to get registers one by one.
1492     ///
1493     fn get_regs(&self) -> cpu::Result<StandardRegisters> {
1494         let mut state = kvm_riscv_core::default();
1495 
1496         /// Macro used to extract RISC-V register data from KVM Vcpu according
1497         /// to `$reg_name` provided to `state`.
1498         macro_rules! riscv64_get_one_reg_from_vcpu {
1499             (mode) => {
1500                 let off = offset_of!(kvm_riscv_core, mode);
1501                 let mut bytes = [0_u8; 8];
1502                 self.fd
1503                     .lock()
1504                     .unwrap()
1505                     .get_one_reg(riscv64_reg_id!(KVM_REG_RISCV_CORE, off), &mut bytes)
1506                     .map_err(|e| cpu::HypervisorCpuError::GetRiscvCoreRegister(e.into()))?;
1507                 state.mode = u64::from_le_bytes(bytes);
1508             };
1509             ($reg_name:ident) => {
1510                 let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, $reg_name);
1511                 let mut bytes = [0_u8; 8];
1512                 self.fd
1513                     .lock()
1514                     .unwrap()
1515                     .get_one_reg(riscv64_reg_id!(KVM_REG_RISCV_CORE, off), &mut bytes)
1516                     .map_err(|e| cpu::HypervisorCpuError::GetRiscvCoreRegister(e.into()))?;
1517                 state.regs.$reg_name = u64::from_le_bytes(bytes);
1518             };
1519         }
1520 
1521         riscv64_get_one_reg_from_vcpu!(pc);
1522         riscv64_get_one_reg_from_vcpu!(ra);
1523         riscv64_get_one_reg_from_vcpu!(sp);
1524         riscv64_get_one_reg_from_vcpu!(gp);
1525         riscv64_get_one_reg_from_vcpu!(tp);
1526         riscv64_get_one_reg_from_vcpu!(t0);
1527         riscv64_get_one_reg_from_vcpu!(t1);
1528         riscv64_get_one_reg_from_vcpu!(t2);
1529         riscv64_get_one_reg_from_vcpu!(s0);
1530         riscv64_get_one_reg_from_vcpu!(s1);
1531         riscv64_get_one_reg_from_vcpu!(a0);
1532         riscv64_get_one_reg_from_vcpu!(a1);
1533         riscv64_get_one_reg_from_vcpu!(a2);
1534         riscv64_get_one_reg_from_vcpu!(a3);
1535         riscv64_get_one_reg_from_vcpu!(a4);
1536         riscv64_get_one_reg_from_vcpu!(a5);
1537         riscv64_get_one_reg_from_vcpu!(a6);
1538         riscv64_get_one_reg_from_vcpu!(a7);
1539         riscv64_get_one_reg_from_vcpu!(s2);
1540         riscv64_get_one_reg_from_vcpu!(s3);
1541         riscv64_get_one_reg_from_vcpu!(s4);
1542         riscv64_get_one_reg_from_vcpu!(s5);
1543         riscv64_get_one_reg_from_vcpu!(s6);
1544         riscv64_get_one_reg_from_vcpu!(s7);
1545         riscv64_get_one_reg_from_vcpu!(s8);
1546         riscv64_get_one_reg_from_vcpu!(s9);
1547         riscv64_get_one_reg_from_vcpu!(s10);
1548         riscv64_get_one_reg_from_vcpu!(s11);
1549         riscv64_get_one_reg_from_vcpu!(t3);
1550         riscv64_get_one_reg_from_vcpu!(t4);
1551         riscv64_get_one_reg_from_vcpu!(t5);
1552         riscv64_get_one_reg_from_vcpu!(t6);
1553         riscv64_get_one_reg_from_vcpu!(mode);
1554 
1555         Ok(state.into())
1556     }
1557 
1558     #[cfg(target_arch = "x86_64")]
1559     ///
1560     /// Sets the vCPU general purpose registers using the `KVM_SET_REGS` ioctl.
1561     ///
1562     fn set_regs(&self, regs: &StandardRegisters) -> cpu::Result<()> {
1563         let regs = (*regs).into();
1564         self.fd
1565             .lock()
1566             .unwrap()
1567             .set_regs(&regs)
1568             .map_err(|e| cpu::HypervisorCpuError::SetStandardRegs(e.into()))
1569     }
1570 
1571     ///
1572     /// Sets the vCPU general purpose registers.
1573     /// The `KVM_SET_REGS` ioctl is not available on AArch64, `KVM_SET_ONE_REG`
1574     /// is used to set registers one by one.
1575     ///
1576     #[cfg(target_arch = "aarch64")]
1577     fn set_regs(&self, state: &StandardRegisters) -> cpu::Result<()> {
1578         // The function follows the exact identical order from `state`. Look there
1579         // for some additional info on registers.
1580         let kvm_regs_state: kvm_regs = (*state).into();
1581         let mut off = offset_of!(user_pt_regs, regs);
1582         for i in 0..31 {
1583             self.fd
1584                 .lock()
1585                 .unwrap()
1586                 .set_one_reg(
1587                     arm64_core_reg_id!(KVM_REG_SIZE_U64, off),
1588                     &kvm_regs_state.regs.regs[i].to_le_bytes(),
1589                 )
1590                 .map_err(|e| cpu::HypervisorCpuError::SetAarchCoreRegister(e.into()))?;
1591             off += std::mem::size_of::<u64>();
1592         }
1593 
1594         let off = offset_of!(user_pt_regs, sp);
1595         self.fd
1596             .lock()
1597             .unwrap()
1598             .set_one_reg(
1599                 arm64_core_reg_id!(KVM_REG_SIZE_U64, off),
1600                 &kvm_regs_state.regs.sp.to_le_bytes(),
1601             )
1602             .map_err(|e| cpu::HypervisorCpuError::SetAarchCoreRegister(e.into()))?;
1603 
1604         let off = offset_of!(user_pt_regs, pc);
1605         self.fd
1606             .lock()
1607             .unwrap()
1608             .set_one_reg(
1609                 arm64_core_reg_id!(KVM_REG_SIZE_U64, off),
1610                 &kvm_regs_state.regs.pc.to_le_bytes(),
1611             )
1612             .map_err(|e| cpu::HypervisorCpuError::SetAarchCoreRegister(e.into()))?;
1613 
1614         let off = offset_of!(user_pt_regs, pstate);
1615         self.fd
1616             .lock()
1617             .unwrap()
1618             .set_one_reg(
1619                 arm64_core_reg_id!(KVM_REG_SIZE_U64, off),
1620                 &kvm_regs_state.regs.pstate.to_le_bytes(),
1621             )
1622             .map_err(|e| cpu::HypervisorCpuError::SetAarchCoreRegister(e.into()))?;
1623 
1624         let off = offset_of!(kvm_regs, sp_el1);
1625         self.fd
1626             .lock()
1627             .unwrap()
1628             .set_one_reg(
1629                 arm64_core_reg_id!(KVM_REG_SIZE_U64, off),
1630                 &kvm_regs_state.sp_el1.to_le_bytes(),
1631             )
1632             .map_err(|e| cpu::HypervisorCpuError::SetAarchCoreRegister(e.into()))?;
1633 
1634         let off = offset_of!(kvm_regs, elr_el1);
1635         self.fd
1636             .lock()
1637             .unwrap()
1638             .set_one_reg(
1639                 arm64_core_reg_id!(KVM_REG_SIZE_U64, off),
1640                 &kvm_regs_state.elr_el1.to_le_bytes(),
1641             )
1642             .map_err(|e| cpu::HypervisorCpuError::SetAarchCoreRegister(e.into()))?;
1643 
1644         let mut off = offset_of!(kvm_regs, spsr);
1645         for i in 0..KVM_NR_SPSR as usize {
1646             self.fd
1647                 .lock()
1648                 .unwrap()
1649                 .set_one_reg(
1650                     arm64_core_reg_id!(KVM_REG_SIZE_U64, off),
1651                     &kvm_regs_state.spsr[i].to_le_bytes(),
1652                 )
1653                 .map_err(|e| cpu::HypervisorCpuError::SetAarchCoreRegister(e.into()))?;
1654             off += std::mem::size_of::<u64>();
1655         }
1656 
1657         let mut off = offset_of!(kvm_regs, fp_regs) + offset_of!(user_fpsimd_state, vregs);
1658         for i in 0..32 {
1659             self.fd
1660                 .lock()
1661                 .unwrap()
1662                 .set_one_reg(
1663                     arm64_core_reg_id!(KVM_REG_SIZE_U128, off),
1664                     &kvm_regs_state.fp_regs.vregs[i].to_le_bytes(),
1665                 )
1666                 .map_err(|e| cpu::HypervisorCpuError::SetAarchCoreRegister(e.into()))?;
1667             off += mem::size_of::<u128>();
1668         }
1669 
1670         let off = offset_of!(kvm_regs, fp_regs) + offset_of!(user_fpsimd_state, fpsr);
1671         self.fd
1672             .lock()
1673             .unwrap()
1674             .set_one_reg(
1675                 arm64_core_reg_id!(KVM_REG_SIZE_U32, off),
1676                 &kvm_regs_state.fp_regs.fpsr.to_le_bytes(),
1677             )
1678             .map_err(|e| cpu::HypervisorCpuError::SetAarchCoreRegister(e.into()))?;
1679 
1680         let off = offset_of!(kvm_regs, fp_regs) + offset_of!(user_fpsimd_state, fpcr);
1681         self.fd
1682             .lock()
1683             .unwrap()
1684             .set_one_reg(
1685                 arm64_core_reg_id!(KVM_REG_SIZE_U32, off),
1686                 &kvm_regs_state.fp_regs.fpcr.to_le_bytes(),
1687             )
1688             .map_err(|e| cpu::HypervisorCpuError::SetAarchCoreRegister(e.into()))?;
1689         Ok(())
1690     }
1691 
1692     #[cfg(target_arch = "riscv64")]
1693     ///
1694     /// Sets the RISC-V vCPU core registers.
1695     /// The `KVM_SET_REGS` ioctl is not available on RISC-V 64-bit,
1696     /// `KVM_SET_ONE_REG` is used to set registers one by one.
1697     ///
1698     fn set_regs(&self, state: &StandardRegisters) -> cpu::Result<()> {
1699         // The function follows the exact identical order from `state`. Look there
1700         // for some additional info on registers.
1701         let kvm_regs_state: kvm_riscv_core = (*state).into();
1702 
1703         /// Macro used to set value of specific RISC-V `$reg_name` stored in
1704         /// `state` to KVM Vcpu.
1705         macro_rules! riscv64_set_one_reg_to_vcpu {
1706             (mode) => {
1707                 let off = offset_of!(kvm_riscv_core, mode);
1708                 self.fd
1709                     .lock()
1710                     .unwrap()
1711                     .set_one_reg(
1712                         riscv64_reg_id!(KVM_REG_RISCV_CORE, off),
1713                         &kvm_regs_state.mode.to_le_bytes(),
1714                     )
1715                     .map_err(|e| cpu::HypervisorCpuError::SetRiscvCoreRegister(e.into()))?;
1716             };
1717             ($reg_name:ident) => {
1718                 let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, $reg_name);
1719                 self.fd
1720                     .lock()
1721                     .unwrap()
1722                     .set_one_reg(
1723                         riscv64_reg_id!(KVM_REG_RISCV_CORE, off),
1724                         &kvm_regs_state.regs.$reg_name.to_le_bytes(),
1725                     )
1726                     .map_err(|e| cpu::HypervisorCpuError::SetRiscvCoreRegister(e.into()))?;
1727             };
1728         }
1729 
1730         riscv64_set_one_reg_to_vcpu!(pc);
1731         riscv64_set_one_reg_to_vcpu!(ra);
1732         riscv64_set_one_reg_to_vcpu!(sp);
1733         riscv64_set_one_reg_to_vcpu!(gp);
1734         riscv64_set_one_reg_to_vcpu!(tp);
1735         riscv64_set_one_reg_to_vcpu!(t0);
1736         riscv64_set_one_reg_to_vcpu!(t1);
1737         riscv64_set_one_reg_to_vcpu!(t2);
1738         riscv64_set_one_reg_to_vcpu!(s0);
1739         riscv64_set_one_reg_to_vcpu!(s1);
1740         riscv64_set_one_reg_to_vcpu!(a0);
1741         riscv64_set_one_reg_to_vcpu!(a1);
1742         riscv64_set_one_reg_to_vcpu!(a2);
1743         riscv64_set_one_reg_to_vcpu!(a3);
1744         riscv64_set_one_reg_to_vcpu!(a4);
1745         riscv64_set_one_reg_to_vcpu!(a5);
1746         riscv64_set_one_reg_to_vcpu!(a6);
1747         riscv64_set_one_reg_to_vcpu!(a7);
1748         riscv64_set_one_reg_to_vcpu!(s2);
1749         riscv64_set_one_reg_to_vcpu!(s3);
1750         riscv64_set_one_reg_to_vcpu!(s4);
1751         riscv64_set_one_reg_to_vcpu!(s5);
1752         riscv64_set_one_reg_to_vcpu!(s6);
1753         riscv64_set_one_reg_to_vcpu!(s7);
1754         riscv64_set_one_reg_to_vcpu!(s8);
1755         riscv64_set_one_reg_to_vcpu!(s9);
1756         riscv64_set_one_reg_to_vcpu!(s10);
1757         riscv64_set_one_reg_to_vcpu!(s11);
1758         riscv64_set_one_reg_to_vcpu!(t3);
1759         riscv64_set_one_reg_to_vcpu!(t4);
1760         riscv64_set_one_reg_to_vcpu!(t5);
1761         riscv64_set_one_reg_to_vcpu!(t6);
1762         riscv64_set_one_reg_to_vcpu!(mode);
1763 
1764         Ok(())
1765     }
1766 
1767     #[cfg(target_arch = "x86_64")]
1768     ///
1769     /// Returns the vCPU special registers.
1770     ///
1771     fn get_sregs(&self) -> cpu::Result<SpecialRegisters> {
1772         Ok(self
1773             .fd
1774             .lock()
1775             .unwrap()
1776             .get_sregs()
1777             .map_err(|e| cpu::HypervisorCpuError::GetSpecialRegs(e.into()))?
1778             .into())
1779     }
1780 
1781     #[cfg(target_arch = "x86_64")]
1782     ///
1783     /// Sets the vCPU special registers using the `KVM_SET_SREGS` ioctl.
1784     ///
1785     fn set_sregs(&self, sregs: &SpecialRegisters) -> cpu::Result<()> {
1786         let sregs = (*sregs).into();
1787         self.fd
1788             .lock()
1789             .unwrap()
1790             .set_sregs(&sregs)
1791             .map_err(|e| cpu::HypervisorCpuError::SetSpecialRegs(e.into()))
1792     }
1793 
1794     #[cfg(target_arch = "x86_64")]
1795     ///
1796     /// Returns the floating point state (FPU) from the vCPU.
1797     ///
1798     fn get_fpu(&self) -> cpu::Result<FpuState> {
1799         Ok(self
1800             .fd
1801             .lock()
1802             .unwrap()
1803             .get_fpu()
1804             .map_err(|e| cpu::HypervisorCpuError::GetFloatingPointRegs(e.into()))?
1805             .into())
1806     }
1807 
1808     #[cfg(target_arch = "x86_64")]
1809     ///
1810     /// Set the floating point state (FPU) of a vCPU using the `KVM_SET_FPU` ioctl.
1811     ///
1812     fn set_fpu(&self, fpu: &FpuState) -> cpu::Result<()> {
1813         let fpu: kvm_bindings::kvm_fpu = (*fpu).clone().into();
1814         self.fd
1815             .lock()
1816             .unwrap()
1817             .set_fpu(&fpu)
1818             .map_err(|e| cpu::HypervisorCpuError::SetFloatingPointRegs(e.into()))
1819     }
1820 
1821     #[cfg(target_arch = "x86_64")]
1822     ///
1823     /// X86 specific call to setup the CPUID registers.
1824     ///
1825     fn set_cpuid2(&self, cpuid: &[CpuIdEntry]) -> cpu::Result<()> {
1826         let cpuid: Vec<kvm_bindings::kvm_cpuid_entry2> =
1827             cpuid.iter().map(|e| (*e).into()).collect();
1828         let kvm_cpuid = <CpuId>::from_entries(&cpuid)
1829             .map_err(|_| cpu::HypervisorCpuError::SetCpuid(anyhow!("failed to create CpuId")))?;
1830 
1831         self.fd
1832             .lock()
1833             .unwrap()
1834             .set_cpuid2(&kvm_cpuid)
1835             .map_err(|e| cpu::HypervisorCpuError::SetCpuid(e.into()))
1836     }
1837 
1838     #[cfg(target_arch = "x86_64")]
1839     ///
1840     /// X86 specific call to enable HyperV SynIC
1841     ///
1842     fn enable_hyperv_synic(&self) -> cpu::Result<()> {
1843         // Update the information about Hyper-V SynIC being enabled and
1844         // emulated as it will influence later which MSRs should be saved.
1845         self.hyperv_synic.store(true, Ordering::Release);
1846 
1847         let cap = kvm_enable_cap {
1848             cap: KVM_CAP_HYPERV_SYNIC,
1849             ..Default::default()
1850         };
1851         self.fd
1852             .lock()
1853             .unwrap()
1854             .enable_cap(&cap)
1855             .map_err(|e| cpu::HypervisorCpuError::EnableHyperVSyncIc(e.into()))
1856     }
1857 
1858     ///
1859     /// X86 specific call to retrieve the CPUID registers.
1860     ///
1861     #[cfg(target_arch = "x86_64")]
1862     fn get_cpuid2(&self, num_entries: usize) -> cpu::Result<Vec<CpuIdEntry>> {
1863         let kvm_cpuid = self
1864             .fd
1865             .lock()
1866             .unwrap()
1867             .get_cpuid2(num_entries)
1868             .map_err(|e| cpu::HypervisorCpuError::GetCpuid(e.into()))?;
1869 
1870         let v = kvm_cpuid.as_slice().iter().map(|e| (*e).into()).collect();
1871 
1872         Ok(v)
1873     }
1874 
1875     #[cfg(target_arch = "x86_64")]
1876     ///
1877     /// Returns the state of the LAPIC (Local Advanced Programmable Interrupt Controller).
1878     ///
1879     fn get_lapic(&self) -> cpu::Result<LapicState> {
1880         Ok(self
1881             .fd
1882             .lock()
1883             .unwrap()
1884             .get_lapic()
1885             .map_err(|e| cpu::HypervisorCpuError::GetlapicState(e.into()))?
1886             .into())
1887     }
1888 
1889     #[cfg(target_arch = "x86_64")]
1890     ///
1891     /// Sets the state of the LAPIC (Local Advanced Programmable Interrupt Controller).
1892     ///
1893     fn set_lapic(&self, klapic: &LapicState) -> cpu::Result<()> {
1894         let klapic: kvm_bindings::kvm_lapic_state = (*klapic).clone().into();
1895         self.fd
1896             .lock()
1897             .unwrap()
1898             .set_lapic(&klapic)
1899             .map_err(|e| cpu::HypervisorCpuError::SetLapicState(e.into()))
1900     }
1901 
1902     #[cfg(target_arch = "x86_64")]
1903     ///
1904     /// Returns the model-specific registers (MSR) for this vCPU.
1905     ///
1906     fn get_msrs(&self, msrs: &mut Vec<MsrEntry>) -> cpu::Result<usize> {
1907         let kvm_msrs: Vec<kvm_msr_entry> = msrs.iter().map(|e| (*e).into()).collect();
1908         let mut kvm_msrs = MsrEntries::from_entries(&kvm_msrs).unwrap();
1909         let succ = self
1910             .fd
1911             .lock()
1912             .unwrap()
1913             .get_msrs(&mut kvm_msrs)
1914             .map_err(|e| cpu::HypervisorCpuError::GetMsrEntries(e.into()))?;
1915 
1916         msrs[..succ].copy_from_slice(
1917             &kvm_msrs.as_slice()[..succ]
1918                 .iter()
1919                 .map(|e| (*e).into())
1920                 .collect::<Vec<MsrEntry>>(),
1921         );
1922 
1923         Ok(succ)
1924     }
1925 
1926     #[cfg(target_arch = "x86_64")]
1927     ///
1928     /// Setup the model-specific registers (MSR) for this vCPU.
1929     /// Returns the number of MSR entries actually written.
1930     ///
1931     fn set_msrs(&self, msrs: &[MsrEntry]) -> cpu::Result<usize> {
1932         let kvm_msrs: Vec<kvm_msr_entry> = msrs.iter().map(|e| (*e).into()).collect();
1933         let kvm_msrs = MsrEntries::from_entries(&kvm_msrs).unwrap();
1934         self.fd
1935             .lock()
1936             .unwrap()
1937             .set_msrs(&kvm_msrs)
1938             .map_err(|e| cpu::HypervisorCpuError::SetMsrEntries(e.into()))
1939     }
1940 
1941     ///
1942     /// Returns the vcpu's current "multiprocessing state".
1943     ///
1944     fn get_mp_state(&self) -> cpu::Result<MpState> {
1945         Ok(self
1946             .fd
1947             .lock()
1948             .unwrap()
1949             .get_mp_state()
1950             .map_err(|e| cpu::HypervisorCpuError::GetMpState(e.into()))?
1951             .into())
1952     }
1953 
1954     ///
1955     /// Sets the vcpu's current "multiprocessing state".
1956     ///
1957     fn set_mp_state(&self, mp_state: MpState) -> cpu::Result<()> {
1958         self.fd
1959             .lock()
1960             .unwrap()
1961             .set_mp_state(mp_state.into())
1962             .map_err(|e| cpu::HypervisorCpuError::SetMpState(e.into()))
1963     }
1964 
1965     #[cfg(target_arch = "x86_64")]
1966     ///
1967     /// Translates guest virtual address to guest physical address using the `KVM_TRANSLATE` ioctl.
1968     ///
1969     fn translate_gva(&self, gva: u64, _flags: u64) -> cpu::Result<(u64, u32)> {
1970         let tr = self
1971             .fd
1972             .lock()
1973             .unwrap()
1974             .translate_gva(gva)
1975             .map_err(|e| cpu::HypervisorCpuError::TranslateVirtualAddress(e.into()))?;
1976         // tr.valid is set if the GVA is mapped to valid GPA.
1977         match tr.valid {
1978             0 => Err(cpu::HypervisorCpuError::TranslateVirtualAddress(anyhow!(
1979                 "Invalid GVA: {:#x}",
1980                 gva
1981             ))),
1982             _ => Ok((tr.physical_address, 0)),
1983         }
1984     }
1985 
1986     ///
1987     /// Triggers the running of the current virtual CPU returning an exit reason.
1988     ///
1989     fn run(&self) -> std::result::Result<cpu::VmExit, cpu::HypervisorCpuError> {
1990         match self.fd.lock().unwrap().run() {
1991             Ok(run) => match run {
1992                 #[cfg(target_arch = "x86_64")]
1993                 VcpuExit::IoIn(addr, data) => {
1994                     if let Some(vm_ops) = &self.vm_ops {
1995                         return vm_ops
1996                             .pio_read(addr.into(), data)
1997                             .map(|_| cpu::VmExit::Ignore)
1998                             .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()));
1999                     }
2000 
2001                     Ok(cpu::VmExit::Ignore)
2002                 }
2003                 #[cfg(target_arch = "x86_64")]
2004                 VcpuExit::IoOut(addr, data) => {
2005                     if let Some(vm_ops) = &self.vm_ops {
2006                         return vm_ops
2007                             .pio_write(addr.into(), data)
2008                             .map(|_| cpu::VmExit::Ignore)
2009                             .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()));
2010                     }
2011 
2012                     Ok(cpu::VmExit::Ignore)
2013                 }
2014                 #[cfg(target_arch = "x86_64")]
2015                 VcpuExit::IoapicEoi(vector) => Ok(cpu::VmExit::IoapicEoi(vector)),
2016                 #[cfg(target_arch = "x86_64")]
2017                 VcpuExit::Shutdown | VcpuExit::Hlt => Ok(cpu::VmExit::Reset),
2018 
2019                 #[cfg(target_arch = "aarch64")]
2020                 VcpuExit::SystemEvent(event_type, flags) => {
2021                     use kvm_bindings::{KVM_SYSTEM_EVENT_RESET, KVM_SYSTEM_EVENT_SHUTDOWN};
2022                     // On Aarch64, when the VM is shutdown, run() returns
2023                     // VcpuExit::SystemEvent with reason KVM_SYSTEM_EVENT_SHUTDOWN
2024                     if event_type == KVM_SYSTEM_EVENT_RESET {
2025                         Ok(cpu::VmExit::Reset)
2026                     } else if event_type == KVM_SYSTEM_EVENT_SHUTDOWN {
2027                         Ok(cpu::VmExit::Shutdown)
2028                     } else {
2029                         Err(cpu::HypervisorCpuError::RunVcpu(anyhow!(
2030                             "Unexpected system event with type 0x{:x}, flags 0x{:x?}",
2031                             event_type,
2032                             flags
2033                         )))
2034                     }
2035                 }
2036 
2037                 VcpuExit::MmioRead(addr, data) => {
2038                     if let Some(vm_ops) = &self.vm_ops {
2039                         return vm_ops
2040                             .mmio_read(addr, data)
2041                             .map(|_| cpu::VmExit::Ignore)
2042                             .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()));
2043                     }
2044 
2045                     Ok(cpu::VmExit::Ignore)
2046                 }
2047                 VcpuExit::MmioWrite(addr, data) => {
2048                     if let Some(vm_ops) = &self.vm_ops {
2049                         return vm_ops
2050                             .mmio_write(addr, data)
2051                             .map(|_| cpu::VmExit::Ignore)
2052                             .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()));
2053                     }
2054 
2055                     Ok(cpu::VmExit::Ignore)
2056                 }
2057                 VcpuExit::Hyperv => Ok(cpu::VmExit::Hyperv),
2058                 #[cfg(feature = "tdx")]
2059                 VcpuExit::Unsupported(KVM_EXIT_TDX) => Ok(cpu::VmExit::Tdx),
2060                 VcpuExit::Debug(_) => Ok(cpu::VmExit::Debug),
2061 
2062                 r => Err(cpu::HypervisorCpuError::RunVcpu(anyhow!(
2063                     "Unexpected exit reason on vcpu run: {:?}",
2064                     r
2065                 ))),
2066             },
2067 
2068             Err(ref e) => match e.errno() {
2069                 libc::EAGAIN | libc::EINTR => Ok(cpu::VmExit::Ignore),
2070                 _ => Err(cpu::HypervisorCpuError::RunVcpu(anyhow!(
2071                     "VCPU error {:?}",
2072                     e
2073                 ))),
2074             },
2075         }
2076     }
2077 
2078     #[cfg(target_arch = "x86_64")]
2079     ///
2080     /// Let the guest know that it has been paused, which prevents from
2081     /// potential soft lockups when being resumed.
2082     ///
2083     fn notify_guest_clock_paused(&self) -> cpu::Result<()> {
2084         if let Err(e) = self.fd.lock().unwrap().kvmclock_ctrl() {
2085             // Linux kernel returns -EINVAL if the PV clock isn't yet initialised
2086             // which could be because we're still in firmware or the guest doesn't
2087             // use KVM clock.
2088             if e.errno() != libc::EINVAL {
2089                 return Err(cpu::HypervisorCpuError::NotifyGuestClockPaused(e.into()));
2090             }
2091         }
2092 
2093         Ok(())
2094     }
2095 
2096     #[cfg(not(target_arch = "riscv64"))]
2097     ///
2098     /// Sets debug registers to set hardware breakpoints and/or enable single step.
2099     ///
2100     fn set_guest_debug(
2101         &self,
2102         addrs: &[vm_memory::GuestAddress],
2103         singlestep: bool,
2104     ) -> cpu::Result<()> {
2105         let mut dbg = kvm_guest_debug {
2106             #[cfg(target_arch = "x86_64")]
2107             control: KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP,
2108             #[cfg(target_arch = "aarch64")]
2109             control: KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW,
2110             ..Default::default()
2111         };
2112         if singlestep {
2113             dbg.control |= KVM_GUESTDBG_SINGLESTEP;
2114         }
2115 
2116         // Set the debug registers.
2117         // Here we assume that the number of addresses do not exceed what
2118         // `Hypervisor::get_guest_debug_hw_bps()` specifies.
2119         #[cfg(target_arch = "x86_64")]
2120         {
2121             // Set bits 9 and 10.
2122             // bit 9: GE (global exact breakpoint enable) flag.
2123             // bit 10: always 1.
2124             dbg.arch.debugreg[7] = 0x0600;
2125 
2126             for (i, addr) in addrs.iter().enumerate() {
2127                 dbg.arch.debugreg[i] = addr.0;
2128                 // Set global breakpoint enable flag
2129                 dbg.arch.debugreg[7] |= 2 << (i * 2);
2130             }
2131         }
2132         #[cfg(target_arch = "aarch64")]
2133         {
2134             for (i, addr) in addrs.iter().enumerate() {
2135                 // DBGBCR_EL1 (Debug Breakpoint Control Registers, D13.3.2):
2136                 // bit 0: 1 (Enabled)
2137                 // bit 1~2: 0b11 (PMC = EL1/EL0)
2138                 // bit 5~8: 0b1111 (BAS = AArch64)
2139                 // others: 0
2140                 dbg.arch.dbg_bcr[i] = 0b1u64 | 0b110u64 | 0b1_1110_0000u64;
2141                 // DBGBVR_EL1 (Debug Breakpoint Value Registers, D13.3.3):
2142                 // bit 2~52: VA[2:52]
2143                 dbg.arch.dbg_bvr[i] = (!0u64 >> 11) & addr.0;
2144             }
2145         }
2146         self.fd
2147             .lock()
2148             .unwrap()
2149             .set_guest_debug(&dbg)
2150             .map_err(|e| cpu::HypervisorCpuError::SetDebugRegs(e.into()))
2151     }
2152 
2153     #[cfg(target_arch = "aarch64")]
2154     fn vcpu_get_finalized_features(&self) -> i32 {
2155         kvm_bindings::KVM_ARM_VCPU_SVE as i32
2156     }
2157 
2158     #[cfg(target_arch = "aarch64")]
2159     fn vcpu_set_processor_features(
2160         &self,
2161         vm: &Arc<dyn crate::Vm>,
2162         kvi: &mut crate::VcpuInit,
2163         id: u8,
2164     ) -> cpu::Result<()> {
2165         use std::arch::is_aarch64_feature_detected;
2166         #[allow(clippy::nonminimal_bool)]
2167         let sve_supported =
2168             is_aarch64_feature_detected!("sve") || is_aarch64_feature_detected!("sve2");
2169 
2170         let mut kvm_kvi: kvm_bindings::kvm_vcpu_init = (*kvi).into();
2171 
2172         // We already checked that the capability is supported.
2173         kvm_kvi.features[0] |= 1 << kvm_bindings::KVM_ARM_VCPU_PSCI_0_2;
2174         if vm
2175             .as_any()
2176             .downcast_ref::<crate::kvm::KvmVm>()
2177             .unwrap()
2178             .check_extension(Cap::ArmPmuV3)
2179         {
2180             kvm_kvi.features[0] |= 1 << kvm_bindings::KVM_ARM_VCPU_PMU_V3;
2181         }
2182 
2183         if sve_supported
2184             && vm
2185                 .as_any()
2186                 .downcast_ref::<crate::kvm::KvmVm>()
2187                 .unwrap()
2188                 .check_extension(Cap::ArmSve)
2189         {
2190             kvm_kvi.features[0] |= 1 << kvm_bindings::KVM_ARM_VCPU_SVE;
2191         }
2192 
2193         // Non-boot cpus are powered off initially.
2194         if id > 0 {
2195             kvm_kvi.features[0] |= 1 << kvm_bindings::KVM_ARM_VCPU_POWER_OFF;
2196         }
2197 
2198         *kvi = kvm_kvi.into();
2199 
2200         Ok(())
2201     }
2202 
2203     ///
2204     /// Return VcpuInit with default value set
2205     ///
2206     #[cfg(target_arch = "aarch64")]
2207     fn create_vcpu_init(&self) -> crate::VcpuInit {
2208         kvm_bindings::kvm_vcpu_init::default().into()
2209     }
2210 
2211     #[cfg(target_arch = "aarch64")]
2212     fn vcpu_init(&self, kvi: &crate::VcpuInit) -> cpu::Result<()> {
2213         let kvm_kvi: kvm_bindings::kvm_vcpu_init = (*kvi).into();
2214         self.fd
2215             .lock()
2216             .unwrap()
2217             .vcpu_init(&kvm_kvi)
2218             .map_err(|e| cpu::HypervisorCpuError::VcpuInit(e.into()))
2219     }
2220 
2221     #[cfg(target_arch = "aarch64")]
2222     fn vcpu_finalize(&self, feature: i32) -> cpu::Result<()> {
2223         self.fd
2224             .lock()
2225             .unwrap()
2226             .vcpu_finalize(&feature)
2227             .map_err(|e| cpu::HypervisorCpuError::VcpuFinalize(e.into()))
2228     }
2229 
2230     #[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))]
2231     ///
2232     /// Gets a list of the guest registers that are supported for the
2233     /// KVM_GET_ONE_REG/KVM_SET_ONE_REG calls.
2234     ///
2235     fn get_reg_list(&self, reg_list: &mut RegList) -> cpu::Result<()> {
2236         let mut kvm_reg_list: kvm_bindings::RegList = reg_list.clone().into();
2237         self.fd
2238             .lock()
2239             .unwrap()
2240             .get_reg_list(&mut kvm_reg_list)
2241             .map_err(|e: kvm_ioctls::Error| cpu::HypervisorCpuError::GetRegList(e.into()))?;
2242         *reg_list = kvm_reg_list.into();
2243         Ok(())
2244     }
2245 
2246     ///
2247     /// Gets the value of a system register
2248     ///
2249     #[cfg(target_arch = "aarch64")]
2250     fn get_sys_reg(&self, sys_reg: u32) -> cpu::Result<u64> {
2251         //
2252         // Arm Architecture Reference Manual defines the encoding of
2253         // AArch64 system registers, see
2254         // https://developer.arm.com/documentation/ddi0487 (chapter D12).
2255         // While KVM defines another ID for each AArch64 system register,
2256         // which is used in calling `KVM_G/SET_ONE_REG` to access a system
2257         // register of a guest.
2258         // A mapping exists between the Arm standard encoding and the KVM ID.
2259         // This function takes the standard u32 ID as input parameter, converts
2260         // it to the corresponding KVM ID, and call `KVM_GET_ONE_REG` API to
2261         // get the value of the system parameter.
2262         //
2263         let id: u64 = KVM_REG_ARM64
2264             | KVM_REG_SIZE_U64
2265             | KVM_REG_ARM64_SYSREG as u64
2266             | ((((sys_reg) >> 5)
2267                 & (KVM_REG_ARM64_SYSREG_OP0_MASK
2268                     | KVM_REG_ARM64_SYSREG_OP1_MASK
2269                     | KVM_REG_ARM64_SYSREG_CRN_MASK
2270                     | KVM_REG_ARM64_SYSREG_CRM_MASK
2271                     | KVM_REG_ARM64_SYSREG_OP2_MASK)) as u64);
2272         let mut bytes = [0_u8; 8];
2273         self.fd
2274             .lock()
2275             .unwrap()
2276             .get_one_reg(id, &mut bytes)
2277             .map_err(|e| cpu::HypervisorCpuError::GetSysRegister(e.into()))?;
2278         Ok(u64::from_le_bytes(bytes))
2279     }
2280 
2281     ///
2282     /// Gets the value of a non-core register
2283     ///
2284     #[cfg(target_arch = "riscv64")]
2285     fn get_non_core_reg(&self, _non_core_reg: u32) -> cpu::Result<u64> {
2286         unimplemented!()
2287     }
2288 
2289     ///
2290     /// Configure core registers for a given CPU.
2291     ///
2292     #[cfg(target_arch = "aarch64")]
2293     fn setup_regs(&self, cpu_id: u8, boot_ip: u64, fdt_start: u64) -> cpu::Result<()> {
2294         let kreg_off = offset_of!(kvm_regs, regs);
2295 
2296         // Get the register index of the PSTATE (Processor State) register.
2297         let pstate = offset_of!(user_pt_regs, pstate) + kreg_off;
2298         self.fd
2299             .lock()
2300             .unwrap()
2301             .set_one_reg(
2302                 arm64_core_reg_id!(KVM_REG_SIZE_U64, pstate),
2303                 &regs::PSTATE_FAULT_BITS_64.to_le_bytes(),
2304             )
2305             .map_err(|e| cpu::HypervisorCpuError::SetAarchCoreRegister(e.into()))?;
2306 
2307         // Other vCPUs are powered off initially awaiting PSCI wakeup.
2308         if cpu_id == 0 {
2309             // Setting the PC (Processor Counter) to the current program address (kernel address).
2310             let pc = offset_of!(user_pt_regs, pc) + kreg_off;
2311             self.fd
2312                 .lock()
2313                 .unwrap()
2314                 .set_one_reg(
2315                     arm64_core_reg_id!(KVM_REG_SIZE_U64, pc),
2316                     &boot_ip.to_le_bytes(),
2317                 )
2318                 .map_err(|e| cpu::HypervisorCpuError::SetAarchCoreRegister(e.into()))?;
2319 
2320             // Last mandatory thing to set -> the address pointing to the FDT (also called DTB).
2321             // "The device tree blob (dtb) must be placed on an 8-byte boundary and must
2322             // not exceed 2 megabytes in size." -> https://www.kernel.org/doc/Documentation/arm64/booting.txt.
2323             // We are choosing to place it the end of DRAM. See `get_fdt_addr`.
2324             let regs0 = offset_of!(user_pt_regs, regs) + kreg_off;
2325             self.fd
2326                 .lock()
2327                 .unwrap()
2328                 .set_one_reg(
2329                     arm64_core_reg_id!(KVM_REG_SIZE_U64, regs0),
2330                     &fdt_start.to_le_bytes(),
2331                 )
2332                 .map_err(|e| cpu::HypervisorCpuError::SetAarchCoreRegister(e.into()))?;
2333         }
2334         Ok(())
2335     }
2336 
2337     #[cfg(target_arch = "riscv64")]
2338     ///
2339     /// Configure registers for a given RISC-V CPU.
2340     ///
2341     fn setup_regs(&self, cpu_id: u8, boot_ip: u64, fdt_start: u64) -> cpu::Result<()> {
2342         // Setting the A0 () to the hartid of this CPU.
2343         let a0 = offset_of!(kvm_riscv_core, regs, user_regs_struct, a0);
2344         self.fd
2345             .lock()
2346             .unwrap()
2347             .set_one_reg(
2348                 riscv64_reg_id!(KVM_REG_RISCV_CORE, a0),
2349                 &u64::from(cpu_id).to_le_bytes(),
2350             )
2351             .map_err(|e| cpu::HypervisorCpuError::SetRiscvCoreRegister(e.into()))?;
2352 
2353         // Setting the PC (Processor Counter) to the current program address (kernel address).
2354         let pc = offset_of!(kvm_riscv_core, regs, user_regs_struct, pc);
2355         self.fd
2356             .lock()
2357             .unwrap()
2358             .set_one_reg(
2359                 riscv64_reg_id!(KVM_REG_RISCV_CORE, pc),
2360                 &boot_ip.to_le_bytes(),
2361             )
2362             .map_err(|e| cpu::HypervisorCpuError::SetRiscvCoreRegister(e.into()))?;
2363 
2364         // Last mandatory thing to set -> the address pointing to the FDT (also called DTB).
2365         // "The device tree blob (dtb) must be placed on an 8-byte boundary and must
2366         // not exceed 64 kilobytes in size." -> https://www.kernel.org/doc/Documentation/arch/riscv/boot.txt.
2367         let a1 = offset_of!(kvm_riscv_core, regs, user_regs_struct, a1);
2368         self.fd
2369             .lock()
2370             .unwrap()
2371             .set_one_reg(
2372                 riscv64_reg_id!(KVM_REG_RISCV_CORE, a1),
2373                 &fdt_start.to_le_bytes(),
2374             )
2375             .map_err(|e| cpu::HypervisorCpuError::SetRiscvCoreRegister(e.into()))?;
2376 
2377         Ok(())
2378     }
2379 
2380     #[cfg(target_arch = "x86_64")]
2381     ///
2382     /// Get the current CPU state
2383     ///
2384     /// Ordering requirements:
2385     ///
2386     /// KVM_GET_MP_STATE calls kvm_apic_accept_events(), which might modify
2387     /// vCPU/LAPIC state. As such, it must be done before most everything
2388     /// else, otherwise we cannot restore everything and expect it to work.
2389     ///
2390     /// KVM_GET_VCPU_EVENTS/KVM_SET_VCPU_EVENTS is unsafe if other vCPUs are
2391     /// still running.
2392     ///
2393     /// KVM_GET_LAPIC may change state of LAPIC before returning it.
2394     ///
2395     /// GET_VCPU_EVENTS should probably be last to save. The code looks as
2396     /// it might as well be affected by internal state modifications of the
2397     /// GET ioctls.
2398     ///
2399     /// SREGS saves/restores a pending interrupt, similar to what
2400     /// VCPU_EVENTS also does.
2401     ///
2402     /// GET_MSRS requires a prepopulated data structure to do something
2403     /// meaningful. For SET_MSRS it will then contain good data.
2404     ///
2405     /// # Example
2406     ///
2407     /// ```rust
2408     /// # use hypervisor::kvm::KvmHypervisor;
2409     /// # use std::sync::Arc;
2410     /// let kvm = KvmHypervisor::new().unwrap();
2411     /// let hv = Arc::new(kvm);
2412     /// let vm = hv.create_vm().expect("new VM fd creation failed");
2413     /// vm.enable_split_irq().unwrap();
2414     /// let vcpu = vm.create_vcpu(0, None).unwrap();
2415     /// let state = vcpu.state().unwrap();
2416     /// ```
2417     fn state(&self) -> cpu::Result<CpuState> {
2418         let cpuid = self.get_cpuid2(kvm_bindings::KVM_MAX_CPUID_ENTRIES)?;
2419         let mp_state = self.get_mp_state()?.into();
2420         let regs = self.get_regs()?;
2421         let sregs = self.get_sregs()?;
2422         let xsave = self.get_xsave()?;
2423         let xcrs = self.get_xcrs()?;
2424         let lapic_state = self.get_lapic()?;
2425         let fpu = self.get_fpu()?;
2426 
2427         // Try to get all MSRs based on the list previously retrieved from KVM.
2428         // If the number of MSRs obtained from GET_MSRS is different from the
2429         // expected amount, we fallback onto a slower method by getting MSRs
2430         // by chunks. This is the only way to make sure we try to get as many
2431         // MSRs as possible, even if some MSRs are not supported.
2432         let mut msr_entries = self.msrs.clone();
2433 
2434         // Save extra MSRs if the Hyper-V synthetic interrupt controller is
2435         // emulated.
2436         if self.hyperv_synic.load(Ordering::Acquire) {
2437             let hyperv_synic_msrs = vec![
2438                 0x40000020, 0x40000021, 0x40000080, 0x40000081, 0x40000082, 0x40000083, 0x40000084,
2439                 0x40000090, 0x40000091, 0x40000092, 0x40000093, 0x40000094, 0x40000095, 0x40000096,
2440                 0x40000097, 0x40000098, 0x40000099, 0x4000009a, 0x4000009b, 0x4000009c, 0x4000009d,
2441                 0x4000009e, 0x4000009f, 0x400000b0, 0x400000b1, 0x400000b2, 0x400000b3, 0x400000b4,
2442                 0x400000b5, 0x400000b6, 0x400000b7,
2443             ];
2444             for index in hyperv_synic_msrs {
2445                 let msr = kvm_msr_entry {
2446                     index,
2447                     ..Default::default()
2448                 };
2449                 msr_entries.push(msr.into());
2450             }
2451         }
2452 
2453         let expected_num_msrs = msr_entries.len();
2454         let num_msrs = self.get_msrs(&mut msr_entries)?;
2455         let msrs = if num_msrs != expected_num_msrs {
2456             let mut faulty_msr_index = num_msrs;
2457             let mut msr_entries_tmp = msr_entries[..faulty_msr_index].to_vec();
2458 
2459             loop {
2460                 warn!(
2461                     "Detected faulty MSR 0x{:x} while getting MSRs",
2462                     msr_entries[faulty_msr_index].index
2463                 );
2464 
2465                 // Skip the first bad MSR
2466                 let start_pos = faulty_msr_index + 1;
2467 
2468                 let mut sub_msr_entries = msr_entries[start_pos..].to_vec();
2469                 let num_msrs = self.get_msrs(&mut sub_msr_entries)?;
2470 
2471                 msr_entries_tmp.extend(&sub_msr_entries[..num_msrs]);
2472 
2473                 if num_msrs == sub_msr_entries.len() {
2474                     break;
2475                 }
2476 
2477                 faulty_msr_index = start_pos + num_msrs;
2478             }
2479 
2480             msr_entries_tmp
2481         } else {
2482             msr_entries
2483         };
2484 
2485         let vcpu_events = self.get_vcpu_events()?;
2486         let tsc_khz = self.tsc_khz()?;
2487 
2488         Ok(VcpuKvmState {
2489             cpuid,
2490             msrs,
2491             vcpu_events,
2492             regs: regs.into(),
2493             sregs: sregs.into(),
2494             fpu,
2495             lapic_state,
2496             xsave,
2497             xcrs,
2498             mp_state,
2499             tsc_khz,
2500         }
2501         .into())
2502     }
2503 
2504     ///
2505     /// Get the current AArch64 CPU state
2506     ///
2507     #[cfg(target_arch = "aarch64")]
2508     fn state(&self) -> cpu::Result<CpuState> {
2509         let mut state = VcpuKvmState {
2510             mp_state: self.get_mp_state()?.into(),
2511             ..Default::default()
2512         };
2513         // Get core registers
2514         state.core_regs = self.get_regs()?.into();
2515 
2516         // Get systerm register
2517         // Call KVM_GET_REG_LIST to get all registers available to the guest.
2518         // For ArmV8 there are around 500 registers.
2519         let mut sys_regs: Vec<kvm_bindings::kvm_one_reg> = Vec::new();
2520         let mut reg_list = kvm_bindings::RegList::new(500).unwrap();
2521         self.fd
2522             .lock()
2523             .unwrap()
2524             .get_reg_list(&mut reg_list)
2525             .map_err(|e| cpu::HypervisorCpuError::GetRegList(e.into()))?;
2526 
2527         // At this point reg_list should contain: core registers and system
2528         // registers.
2529         // The register list contains the number of registers and their ids. We
2530         // will be needing to call KVM_GET_ONE_REG on each id in order to save
2531         // all of them. We carve out from the list  the core registers which are
2532         // represented in the kernel by kvm_regs structure and for which we can
2533         // calculate the id based on the offset in the structure.
2534         reg_list.retain(|regid| is_system_register(*regid));
2535 
2536         // Now, for the rest of the registers left in the previously fetched
2537         // register list, we are simply calling KVM_GET_ONE_REG.
2538         let indices = reg_list.as_slice();
2539         for index in indices.iter() {
2540             let mut bytes = [0_u8; 8];
2541             self.fd
2542                 .lock()
2543                 .unwrap()
2544                 .get_one_reg(*index, &mut bytes)
2545                 .map_err(|e| cpu::HypervisorCpuError::GetSysRegister(e.into()))?;
2546             sys_regs.push(kvm_bindings::kvm_one_reg {
2547                 id: *index,
2548                 addr: u64::from_le_bytes(bytes),
2549             });
2550         }
2551 
2552         state.sys_regs = sys_regs;
2553 
2554         Ok(state.into())
2555     }
2556 
2557     #[cfg(target_arch = "riscv64")]
2558     ///
2559     /// Get the current RISC-V 64-bit CPU state
2560     ///
2561     fn state(&self) -> cpu::Result<CpuState> {
2562         let mut state = VcpuKvmState {
2563             mp_state: self.get_mp_state()?.into(),
2564             ..Default::default()
2565         };
2566         // Get core registers
2567         state.core_regs = self.get_regs()?.into();
2568 
2569         // Get non-core register
2570         // Call KVM_GET_REG_LIST to get all registers available to the guest.
2571         // For RISC-V 64-bit there are around 200 registers.
2572         let mut sys_regs: Vec<kvm_bindings::kvm_one_reg> = Vec::new();
2573         let mut reg_list = kvm_bindings::RegList::new(200).unwrap();
2574         self.fd
2575             .lock()
2576             .unwrap()
2577             .get_reg_list(&mut reg_list)
2578             .map_err(|e| cpu::HypervisorCpuError::GetRegList(e.into()))?;
2579 
2580         // At this point reg_list should contain:
2581         // - core registers
2582         // - config registers
2583         // - timer registers
2584         // - control and status registers
2585         // - AIA control and status registers
2586         // - smstateen control and status registers
2587         // - sbi_sta control and status registers.
2588         //
2589         // The register list contains the number of registers and their ids. We
2590         // will be needing to call KVM_GET_ONE_REG on each id in order to save
2591         // all of them. We carve out from the list the core registers which are
2592         // represented in the kernel by `kvm_riscv_core` structure and for which
2593         // we can calculate the id based on the offset in the structure.
2594         reg_list.retain(|regid| is_non_core_register(*regid));
2595 
2596         // Now, for the rest of the registers left in the previously fetched
2597         // register list, we are simply calling KVM_GET_ONE_REG.
2598         let indices = reg_list.as_slice();
2599         for index in indices.iter() {
2600             let mut bytes = [0_u8; 8];
2601             self.fd
2602                 .lock()
2603                 .unwrap()
2604                 .get_one_reg(*index, &mut bytes)
2605                 .map_err(|e| cpu::HypervisorCpuError::GetSysRegister(e.into()))?;
2606             sys_regs.push(kvm_bindings::kvm_one_reg {
2607                 id: *index,
2608                 addr: u64::from_le_bytes(bytes),
2609             });
2610         }
2611 
2612         state.non_core_regs = sys_regs;
2613 
2614         Ok(state.into())
2615     }
2616 
2617     #[cfg(target_arch = "x86_64")]
2618     ///
2619     /// Restore the previously saved CPU state
2620     ///
2621     /// Ordering requirements:
2622     ///
2623     /// KVM_GET_VCPU_EVENTS/KVM_SET_VCPU_EVENTS is unsafe if other vCPUs are
2624     /// still running.
2625     ///
2626     /// Some SET ioctls (like set_mp_state) depend on kvm_vcpu_is_bsp(), so
2627     /// if we ever change the BSP, we have to do that before restoring anything.
2628     /// The same seems to be true for CPUID stuff.
2629     ///
2630     /// SREGS saves/restores a pending interrupt, similar to what
2631     /// VCPU_EVENTS also does.
2632     ///
2633     /// SET_REGS clears pending exceptions unconditionally, thus, it must be
2634     /// done before SET_VCPU_EVENTS, which restores it.
2635     ///
2636     /// SET_LAPIC must come after SET_SREGS, because the latter restores
2637     /// the apic base msr.
2638     ///
2639     /// SET_LAPIC must come before SET_MSRS, because the TSC deadline MSR
2640     /// only restores successfully, when the LAPIC is correctly configured.
2641     ///
2642     /// Arguments: CpuState
2643     /// # Example
2644     ///
2645     /// ```rust
2646     /// # use hypervisor::kvm::KvmHypervisor;
2647     /// # use std::sync::Arc;
2648     /// let kvm = KvmHypervisor::new().unwrap();
2649     /// let hv = Arc::new(kvm);
2650     /// let vm = hv.create_vm().expect("new VM fd creation failed");
2651     /// vm.enable_split_irq().unwrap();
2652     /// let vcpu = vm.create_vcpu(0, None).unwrap();
2653     /// let state = vcpu.state().unwrap();
2654     /// vcpu.set_state(&state).unwrap();
2655     /// ```
2656     fn set_state(&self, state: &CpuState) -> cpu::Result<()> {
2657         let state: VcpuKvmState = state.clone().into();
2658         self.set_cpuid2(&state.cpuid)?;
2659         self.set_mp_state(state.mp_state.into())?;
2660         self.set_regs(&state.regs.into())?;
2661         self.set_sregs(&state.sregs.into())?;
2662         self.set_xsave(&state.xsave)?;
2663         self.set_xcrs(&state.xcrs)?;
2664         self.set_lapic(&state.lapic_state)?;
2665         self.set_fpu(&state.fpu)?;
2666 
2667         if let Some(freq) = state.tsc_khz {
2668             self.set_tsc_khz(freq)?;
2669         }
2670 
2671         // Try to set all MSRs previously stored.
2672         // If the number of MSRs set from SET_MSRS is different from the
2673         // expected amount, we fallback onto a slower method by setting MSRs
2674         // by chunks. This is the only way to make sure we try to set as many
2675         // MSRs as possible, even if some MSRs are not supported.
2676         let expected_num_msrs = state.msrs.len();
2677         let num_msrs = self.set_msrs(&state.msrs)?;
2678         if num_msrs != expected_num_msrs {
2679             let mut faulty_msr_index = num_msrs;
2680 
2681             loop {
2682                 warn!(
2683                     "Detected faulty MSR 0x{:x} while setting MSRs",
2684                     state.msrs[faulty_msr_index].index
2685                 );
2686 
2687                 // Skip the first bad MSR
2688                 let start_pos = faulty_msr_index + 1;
2689 
2690                 let sub_msr_entries = state.msrs[start_pos..].to_vec();
2691 
2692                 let num_msrs = self.set_msrs(&sub_msr_entries)?;
2693 
2694                 if num_msrs == sub_msr_entries.len() {
2695                     break;
2696                 }
2697 
2698                 faulty_msr_index = start_pos + num_msrs;
2699             }
2700         }
2701 
2702         self.set_vcpu_events(&state.vcpu_events)?;
2703 
2704         Ok(())
2705     }
2706 
2707     ///
2708     /// Restore the previously saved AArch64 CPU state
2709     ///
2710     #[cfg(target_arch = "aarch64")]
2711     fn set_state(&self, state: &CpuState) -> cpu::Result<()> {
2712         let state: VcpuKvmState = state.clone().into();
2713         // Set core registers
2714         self.set_regs(&state.core_regs.into())?;
2715         // Set system registers
2716         for reg in &state.sys_regs {
2717             self.fd
2718                 .lock()
2719                 .unwrap()
2720                 .set_one_reg(reg.id, &reg.addr.to_le_bytes())
2721                 .map_err(|e| cpu::HypervisorCpuError::SetSysRegister(e.into()))?;
2722         }
2723 
2724         self.set_mp_state(state.mp_state.into())?;
2725 
2726         Ok(())
2727     }
2728 
2729     #[cfg(target_arch = "riscv64")]
2730     ///
2731     /// Restore the previously saved RISC-V 64-bit CPU state
2732     ///
2733     fn set_state(&self, state: &CpuState) -> cpu::Result<()> {
2734         let state: VcpuKvmState = state.clone().into();
2735         // Set core registers
2736         self.set_regs(&state.core_regs.into())?;
2737         // Set system registers
2738         for reg in &state.non_core_regs {
2739             self.fd
2740                 .lock()
2741                 .unwrap()
2742                 .set_one_reg(reg.id, &reg.addr.to_le_bytes())
2743                 .map_err(|e| cpu::HypervisorCpuError::SetSysRegister(e.into()))?;
2744         }
2745 
2746         self.set_mp_state(state.mp_state.into())?;
2747 
2748         Ok(())
2749     }
2750 
2751     ///
2752     /// Initialize TDX for this CPU
2753     ///
2754     #[cfg(feature = "tdx")]
2755     fn tdx_init(&self, hob_address: u64) -> cpu::Result<()> {
2756         tdx_command(
2757             &self.fd.lock().unwrap().as_raw_fd(),
2758             TdxCommand::InitVcpu,
2759             0,
2760             hob_address,
2761         )
2762         .map_err(cpu::HypervisorCpuError::InitializeTdx)
2763     }
2764 
2765     ///
2766     /// Set the "immediate_exit" state
2767     ///
2768     fn set_immediate_exit(&self, exit: bool) {
2769         self.fd.lock().unwrap().set_kvm_immediate_exit(exit.into());
2770     }
2771 
2772     ///
2773     /// Returns the details about TDX exit reason
2774     ///
2775     #[cfg(feature = "tdx")]
2776     fn get_tdx_exit_details(&mut self) -> cpu::Result<TdxExitDetails> {
2777         let mut fd = self.fd.as_ref().lock().unwrap();
2778         let kvm_run = fd.get_kvm_run();
2779         // SAFETY: accessing a union field in a valid structure
2780         let tdx_vmcall = unsafe {
2781             &mut (*((&mut kvm_run.__bindgen_anon_1) as *mut kvm_run__bindgen_ty_1
2782                 as *mut KvmTdxExit))
2783                 .u
2784                 .vmcall
2785         };
2786 
2787         tdx_vmcall.status_code = TDG_VP_VMCALL_INVALID_OPERAND;
2788 
2789         if tdx_vmcall.type_ != 0 {
2790             return Err(cpu::HypervisorCpuError::UnknownTdxVmCall);
2791         }
2792 
2793         match tdx_vmcall.subfunction {
2794             TDG_VP_VMCALL_GET_QUOTE => Ok(TdxExitDetails::GetQuote),
2795             TDG_VP_VMCALL_SETUP_EVENT_NOTIFY_INTERRUPT => {
2796                 Ok(TdxExitDetails::SetupEventNotifyInterrupt)
2797             }
2798             _ => Err(cpu::HypervisorCpuError::UnknownTdxVmCall),
2799         }
2800     }
2801 
2802     ///
2803     /// Set the status code for TDX exit
2804     ///
2805     #[cfg(feature = "tdx")]
2806     fn set_tdx_status(&mut self, status: TdxExitStatus) {
2807         let mut fd = self.fd.as_ref().lock().unwrap();
2808         let kvm_run = fd.get_kvm_run();
2809         // SAFETY: accessing a union field in a valid structure
2810         let tdx_vmcall = unsafe {
2811             &mut (*((&mut kvm_run.__bindgen_anon_1) as *mut kvm_run__bindgen_ty_1
2812                 as *mut KvmTdxExit))
2813                 .u
2814                 .vmcall
2815         };
2816 
2817         tdx_vmcall.status_code = match status {
2818             TdxExitStatus::Success => TDG_VP_VMCALL_SUCCESS,
2819             TdxExitStatus::InvalidOperand => TDG_VP_VMCALL_INVALID_OPERAND,
2820         };
2821     }
2822 
2823     #[cfg(target_arch = "x86_64")]
2824     ///
2825     /// Return the list of initial MSR entries for a VCPU
2826     ///
2827     fn boot_msr_entries(&self) -> Vec<MsrEntry> {
2828         use crate::arch::x86::{msr_index, MTRR_ENABLE, MTRR_MEM_TYPE_WB};
2829 
2830         [
2831             msr!(msr_index::MSR_IA32_SYSENTER_CS),
2832             msr!(msr_index::MSR_IA32_SYSENTER_ESP),
2833             msr!(msr_index::MSR_IA32_SYSENTER_EIP),
2834             msr!(msr_index::MSR_STAR),
2835             msr!(msr_index::MSR_CSTAR),
2836             msr!(msr_index::MSR_LSTAR),
2837             msr!(msr_index::MSR_KERNEL_GS_BASE),
2838             msr!(msr_index::MSR_SYSCALL_MASK),
2839             msr!(msr_index::MSR_IA32_TSC),
2840             msr_data!(
2841                 msr_index::MSR_IA32_MISC_ENABLE,
2842                 msr_index::MSR_IA32_MISC_ENABLE_FAST_STRING as u64
2843             ),
2844             msr_data!(msr_index::MSR_MTRRdefType, MTRR_ENABLE | MTRR_MEM_TYPE_WB),
2845         ]
2846         .to_vec()
2847     }
2848 
2849     #[cfg(target_arch = "aarch64")]
2850     fn has_pmu_support(&self) -> bool {
2851         let cpu_attr = kvm_bindings::kvm_device_attr {
2852             group: kvm_bindings::KVM_ARM_VCPU_PMU_V3_CTRL,
2853             attr: u64::from(kvm_bindings::KVM_ARM_VCPU_PMU_V3_INIT),
2854             addr: 0x0,
2855             flags: 0,
2856         };
2857         self.fd.lock().unwrap().has_device_attr(&cpu_attr).is_ok()
2858     }
2859 
2860     #[cfg(target_arch = "aarch64")]
2861     fn init_pmu(&self, irq: u32) -> cpu::Result<()> {
2862         let cpu_attr = kvm_bindings::kvm_device_attr {
2863             group: kvm_bindings::KVM_ARM_VCPU_PMU_V3_CTRL,
2864             attr: u64::from(kvm_bindings::KVM_ARM_VCPU_PMU_V3_INIT),
2865             addr: 0x0,
2866             flags: 0,
2867         };
2868         let cpu_attr_irq = kvm_bindings::kvm_device_attr {
2869             group: kvm_bindings::KVM_ARM_VCPU_PMU_V3_CTRL,
2870             attr: u64::from(kvm_bindings::KVM_ARM_VCPU_PMU_V3_IRQ),
2871             addr: &irq as *const u32 as u64,
2872             flags: 0,
2873         };
2874         self.fd
2875             .lock()
2876             .unwrap()
2877             .set_device_attr(&cpu_attr_irq)
2878             .map_err(|_| cpu::HypervisorCpuError::InitializePmu)?;
2879         self.fd
2880             .lock()
2881             .unwrap()
2882             .set_device_attr(&cpu_attr)
2883             .map_err(|_| cpu::HypervisorCpuError::InitializePmu)
2884     }
2885 
2886     #[cfg(target_arch = "x86_64")]
2887     ///
2888     /// Get the frequency of the TSC if available
2889     ///
2890     fn tsc_khz(&self) -> cpu::Result<Option<u32>> {
2891         match self.fd.lock().unwrap().get_tsc_khz() {
2892             Err(e) => {
2893                 if e.errno() == libc::EIO {
2894                     Ok(None)
2895                 } else {
2896                     Err(cpu::HypervisorCpuError::GetTscKhz(e.into()))
2897                 }
2898             }
2899             Ok(v) => Ok(Some(v)),
2900         }
2901     }
2902 
2903     #[cfg(target_arch = "x86_64")]
2904     ///
2905     /// Set the frequency of the TSC if available
2906     ///
2907     fn set_tsc_khz(&self, freq: u32) -> cpu::Result<()> {
2908         match self.fd.lock().unwrap().set_tsc_khz(freq) {
2909             Err(e) => {
2910                 if e.errno() == libc::EIO {
2911                     Ok(())
2912                 } else {
2913                     Err(cpu::HypervisorCpuError::SetTscKhz(e.into()))
2914                 }
2915             }
2916             Ok(_) => Ok(()),
2917         }
2918     }
2919 
2920     #[cfg(target_arch = "x86_64")]
2921     ///
2922     /// Trigger NMI interrupt
2923     ///
2924     fn nmi(&self) -> cpu::Result<()> {
2925         match self.fd.lock().unwrap().nmi() {
2926             Err(e) => {
2927                 if e.errno() == libc::EIO {
2928                     Ok(())
2929                 } else {
2930                     Err(cpu::HypervisorCpuError::Nmi(e.into()))
2931                 }
2932             }
2933             Ok(_) => Ok(()),
2934         }
2935     }
2936 }
2937 
2938 impl KvmVcpu {
2939     #[cfg(target_arch = "x86_64")]
2940     ///
2941     /// X86 specific call that returns the vcpu's current "xsave struct".
2942     ///
2943     fn get_xsave(&self) -> cpu::Result<XsaveState> {
2944         Ok(self
2945             .fd
2946             .lock()
2947             .unwrap()
2948             .get_xsave()
2949             .map_err(|e| cpu::HypervisorCpuError::GetXsaveState(e.into()))?
2950             .into())
2951     }
2952 
2953     #[cfg(target_arch = "x86_64")]
2954     ///
2955     /// X86 specific call that sets the vcpu's current "xsave struct".
2956     ///
2957     fn set_xsave(&self, xsave: &XsaveState) -> cpu::Result<()> {
2958         let xsave: kvm_bindings::kvm_xsave = (*xsave).clone().into();
2959         self.fd
2960             .lock()
2961             .unwrap()
2962             .set_xsave(&xsave)
2963             .map_err(|e| cpu::HypervisorCpuError::SetXsaveState(e.into()))
2964     }
2965 
2966     #[cfg(target_arch = "x86_64")]
2967     ///
2968     /// X86 specific call that returns the vcpu's current "xcrs".
2969     ///
2970     fn get_xcrs(&self) -> cpu::Result<ExtendedControlRegisters> {
2971         self.fd
2972             .lock()
2973             .unwrap()
2974             .get_xcrs()
2975             .map_err(|e| cpu::HypervisorCpuError::GetXcsr(e.into()))
2976     }
2977 
2978     #[cfg(target_arch = "x86_64")]
2979     ///
2980     /// X86 specific call that sets the vcpu's current "xcrs".
2981     ///
2982     fn set_xcrs(&self, xcrs: &ExtendedControlRegisters) -> cpu::Result<()> {
2983         self.fd
2984             .lock()
2985             .unwrap()
2986             .set_xcrs(xcrs)
2987             .map_err(|e| cpu::HypervisorCpuError::SetXcsr(e.into()))
2988     }
2989 
2990     #[cfg(target_arch = "x86_64")]
2991     ///
2992     /// Returns currently pending exceptions, interrupts, and NMIs as well as related
2993     /// states of the vcpu.
2994     ///
2995     fn get_vcpu_events(&self) -> cpu::Result<VcpuEvents> {
2996         self.fd
2997             .lock()
2998             .unwrap()
2999             .get_vcpu_events()
3000             .map_err(|e| cpu::HypervisorCpuError::GetVcpuEvents(e.into()))
3001     }
3002 
3003     #[cfg(target_arch = "x86_64")]
3004     ///
3005     /// Sets pending exceptions, interrupts, and NMIs as well as related states
3006     /// of the vcpu.
3007     ///
3008     fn set_vcpu_events(&self, events: &VcpuEvents) -> cpu::Result<()> {
3009         self.fd
3010             .lock()
3011             .unwrap()
3012             .set_vcpu_events(events)
3013             .map_err(|e| cpu::HypervisorCpuError::SetVcpuEvents(e.into()))
3014     }
3015 }
3016 
3017 #[cfg(test)]
3018 mod tests {
3019     #[test]
3020     #[cfg(target_arch = "riscv64")]
3021     fn test_get_and_set_regs() {
3022         use super::*;
3023 
3024         let kvm = KvmHypervisor::new().unwrap();
3025         let hypervisor = Arc::new(kvm);
3026         let vm = hypervisor.create_vm().expect("new VM fd creation failed");
3027         let vcpu0 = vm.create_vcpu(0, None).unwrap();
3028 
3029         let core_regs = StandardRegisters::from(kvm_riscv_core {
3030             regs: user_regs_struct {
3031                 pc: 0x00,
3032                 ra: 0x01,
3033                 sp: 0x02,
3034                 gp: 0x03,
3035                 tp: 0x04,
3036                 t0: 0x05,
3037                 t1: 0x06,
3038                 t2: 0x07,
3039                 s0: 0x08,
3040                 s1: 0x09,
3041                 a0: 0x0a,
3042                 a1: 0x0b,
3043                 a2: 0x0c,
3044                 a3: 0x0d,
3045                 a4: 0x0e,
3046                 a5: 0x0f,
3047                 a6: 0x10,
3048                 a7: 0x11,
3049                 s2: 0x12,
3050                 s3: 0x13,
3051                 s4: 0x14,
3052                 s5: 0x15,
3053                 s6: 0x16,
3054                 s7: 0x17,
3055                 s8: 0x18,
3056                 s9: 0x19,
3057                 s10: 0x1a,
3058                 s11: 0x1b,
3059                 t3: 0x1c,
3060                 t4: 0x1d,
3061                 t5: 0x1e,
3062                 t6: 0x1f,
3063             },
3064             mode: 0x00,
3065         });
3066 
3067         vcpu0.set_regs(&core_regs).unwrap();
3068         assert_eq!(vcpu0.get_regs().unwrap(), core_regs);
3069     }
3070 }
3071