xref: /cloud-hypervisor/hypervisor/src/kvm/mod.rs (revision 655d512523353961a67cf19cf3bc227d403898f0)
1 // Copyright © 2024 Institute of Software, CAS. All rights reserved.
2 //
3 // Copyright © 2019 Intel Corporation
4 //
5 // SPDX-License-Identifier: Apache-2.0 OR BSD-3-Clause
6 //
7 // Copyright © 2020, Microsoft Corporation
8 //
9 // Copyright 2018-2019 CrowdStrike, Inc.
10 //
11 //
12 
13 use std::any::Any;
14 use std::collections::HashMap;
15 #[cfg(target_arch = "x86_64")]
16 use std::fs::File;
17 #[cfg(target_arch = "x86_64")]
18 use std::os::unix::io::AsRawFd;
19 #[cfg(feature = "tdx")]
20 use std::os::unix::io::RawFd;
21 use std::result;
22 #[cfg(target_arch = "x86_64")]
23 use std::sync::atomic::{AtomicBool, Ordering};
24 use std::sync::{Arc, Mutex, RwLock};
25 
26 use kvm_ioctls::{NoDatamatch, VcpuFd, VmFd};
27 use vmm_sys_util::eventfd::EventFd;
28 
29 #[cfg(target_arch = "aarch64")]
30 use crate::aarch64::gic::KvmGicV3Its;
31 #[cfg(target_arch = "aarch64")]
32 pub use crate::aarch64::{
33     check_required_kvm_extensions, gic::Gicv3ItsState as GicState, is_system_register, VcpuKvmState,
34 };
35 #[cfg(target_arch = "aarch64")]
36 use crate::arch::aarch64::gic::{Vgic, VgicConfig};
37 #[cfg(target_arch = "riscv64")]
38 use crate::arch::riscv64::aia::{Vaia, VaiaConfig};
39 #[cfg(target_arch = "riscv64")]
40 use crate::riscv64::aia::KvmAiaImsics;
41 #[cfg(target_arch = "riscv64")]
42 pub use crate::riscv64::{
43     aia::AiaImsicsState as AiaState, check_required_kvm_extensions, is_non_core_register,
44     VcpuKvmState,
45 };
46 use crate::vm::{self, InterruptSourceConfig, VmOps};
47 #[cfg(target_arch = "aarch64")]
48 use crate::{arm64_core_reg_id, offset_of};
49 use crate::{cpu, hypervisor, vec_with_array_field, HypervisorType};
50 #[cfg(target_arch = "riscv64")]
51 use crate::{offset_of, riscv64_reg_id};
52 // x86_64 dependencies
53 #[cfg(target_arch = "x86_64")]
54 pub mod x86_64;
55 #[cfg(target_arch = "x86_64")]
56 use kvm_bindings::{
57     kvm_enable_cap, kvm_msr_entry, MsrList, KVM_CAP_HYPERV_SYNIC, KVM_CAP_SPLIT_IRQCHIP,
58     KVM_GUESTDBG_USE_HW_BP,
59 };
60 #[cfg(target_arch = "x86_64")]
61 use x86_64::check_required_kvm_extensions;
62 #[cfg(target_arch = "x86_64")]
63 pub use x86_64::{CpuId, ExtendedControlRegisters, MsrEntries, VcpuKvmState};
64 
65 #[cfg(target_arch = "x86_64")]
66 use crate::arch::x86::{
67     CpuIdEntry, FpuState, LapicState, MsrEntry, SpecialRegisters, XsaveState, NUM_IOAPIC_PINS,
68 };
69 #[cfg(target_arch = "x86_64")]
70 use crate::ClockData;
71 use crate::{
72     CpuState, IoEventAddress, IrqRoutingEntry, MpState, StandardRegisters, UserMemoryRegion,
73     USER_MEMORY_REGION_LOG_DIRTY, USER_MEMORY_REGION_READ, USER_MEMORY_REGION_WRITE,
74 };
75 // aarch64 dependencies
76 #[cfg(target_arch = "aarch64")]
77 pub mod aarch64;
78 // riscv64 dependencies
79 #[cfg(target_arch = "riscv64")]
80 pub mod riscv64;
81 #[cfg(target_arch = "aarch64")]
82 use std::mem;
83 
84 ///
85 /// Export generically-named wrappers of kvm-bindings for Unix-based platforms
86 ///
87 #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))]
88 pub use kvm_bindings::kvm_vcpu_events as VcpuEvents;
89 pub use kvm_bindings::{
90     kvm_clock_data, kvm_create_device, kvm_create_device as CreateDevice,
91     kvm_device_attr as DeviceAttr, kvm_device_type_KVM_DEV_TYPE_VFIO, kvm_guest_debug,
92     kvm_irq_routing, kvm_irq_routing_entry, kvm_mp_state, kvm_run, kvm_userspace_memory_region,
93     KVM_GUESTDBG_ENABLE, KVM_GUESTDBG_SINGLESTEP, KVM_IRQ_ROUTING_IRQCHIP, KVM_IRQ_ROUTING_MSI,
94     KVM_MEM_LOG_DIRTY_PAGES, KVM_MEM_READONLY, KVM_MSI_VALID_DEVID,
95 };
96 #[cfg(target_arch = "aarch64")]
97 use kvm_bindings::{
98     kvm_regs, user_fpsimd_state, user_pt_regs, KVM_GUESTDBG_USE_HW, KVM_NR_SPSR, KVM_REG_ARM64,
99     KVM_REG_ARM64_SYSREG, KVM_REG_ARM64_SYSREG_CRM_MASK, KVM_REG_ARM64_SYSREG_CRN_MASK,
100     KVM_REG_ARM64_SYSREG_OP0_MASK, KVM_REG_ARM64_SYSREG_OP1_MASK, KVM_REG_ARM64_SYSREG_OP2_MASK,
101     KVM_REG_ARM_CORE, KVM_REG_SIZE_U128, KVM_REG_SIZE_U32, KVM_REG_SIZE_U64,
102 };
103 #[cfg(target_arch = "riscv64")]
104 use kvm_bindings::{kvm_riscv_core, user_regs_struct, KVM_REG_RISCV_CORE};
105 #[cfg(feature = "tdx")]
106 use kvm_bindings::{kvm_run__bindgen_ty_1, KVMIO};
107 pub use kvm_ioctls::{Cap, Kvm, VcpuExit};
108 use thiserror::Error;
109 use vfio_ioctls::VfioDeviceFd;
110 #[cfg(feature = "tdx")]
111 use vmm_sys_util::{ioctl::ioctl_with_val, ioctl_ioc_nr, ioctl_iowr_nr};
112 pub use {kvm_bindings, kvm_ioctls};
113 
114 #[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))]
115 use crate::RegList;
116 
117 #[cfg(target_arch = "x86_64")]
118 const KVM_CAP_SGX_ATTRIBUTE: u32 = 196;
119 
120 #[cfg(target_arch = "x86_64")]
121 use vmm_sys_util::ioctl_io_nr;
122 #[cfg(all(not(feature = "tdx"), target_arch = "x86_64"))]
123 use vmm_sys_util::ioctl_ioc_nr;
124 
125 #[cfg(target_arch = "x86_64")]
126 ioctl_io_nr!(KVM_NMI, kvm_bindings::KVMIO, 0x9a);
127 
128 #[cfg(feature = "tdx")]
129 const KVM_EXIT_TDX: u32 = 50;
130 #[cfg(feature = "tdx")]
131 const TDG_VP_VMCALL_GET_QUOTE: u64 = 0x10002;
132 #[cfg(feature = "tdx")]
133 const TDG_VP_VMCALL_SETUP_EVENT_NOTIFY_INTERRUPT: u64 = 0x10004;
134 #[cfg(feature = "tdx")]
135 const TDG_VP_VMCALL_SUCCESS: u64 = 0;
136 #[cfg(feature = "tdx")]
137 const TDG_VP_VMCALL_INVALID_OPERAND: u64 = 0x8000000000000000;
138 
139 #[cfg(feature = "tdx")]
140 ioctl_iowr_nr!(KVM_MEMORY_ENCRYPT_OP, KVMIO, 0xba, std::os::raw::c_ulong);
141 
142 #[cfg(feature = "tdx")]
143 #[repr(u32)]
144 enum TdxCommand {
145     Capabilities = 0,
146     InitVm,
147     InitVcpu,
148     InitMemRegion,
149     Finalize,
150 }
151 
152 #[cfg(feature = "tdx")]
153 pub enum TdxExitDetails {
154     GetQuote,
155     SetupEventNotifyInterrupt,
156 }
157 
158 #[cfg(feature = "tdx")]
159 pub enum TdxExitStatus {
160     Success,
161     InvalidOperand,
162 }
163 
164 #[cfg(feature = "tdx")]
165 const TDX_MAX_NR_CPUID_CONFIGS: usize = 6;
166 
167 #[cfg(feature = "tdx")]
168 #[repr(C)]
169 #[derive(Debug, Default)]
170 pub struct TdxCpuidConfig {
171     pub leaf: u32,
172     pub sub_leaf: u32,
173     pub eax: u32,
174     pub ebx: u32,
175     pub ecx: u32,
176     pub edx: u32,
177 }
178 
179 #[cfg(feature = "tdx")]
180 #[repr(C)]
181 #[derive(Debug, Default)]
182 pub struct TdxCapabilities {
183     pub attrs_fixed0: u64,
184     pub attrs_fixed1: u64,
185     pub xfam_fixed0: u64,
186     pub xfam_fixed1: u64,
187     pub nr_cpuid_configs: u32,
188     pub padding: u32,
189     pub cpuid_configs: [TdxCpuidConfig; TDX_MAX_NR_CPUID_CONFIGS],
190 }
191 
192 #[cfg(feature = "tdx")]
193 #[derive(Copy, Clone)]
194 pub struct KvmTdxExit {
195     pub type_: u32,
196     pub pad: u32,
197     pub u: KvmTdxExitU,
198 }
199 
200 #[cfg(feature = "tdx")]
201 #[repr(C)]
202 #[derive(Copy, Clone)]
203 pub union KvmTdxExitU {
204     pub vmcall: KvmTdxExitVmcall,
205 }
206 
207 #[cfg(feature = "tdx")]
208 #[repr(C)]
209 #[derive(Debug, Default, Copy, Clone, PartialEq)]
210 pub struct KvmTdxExitVmcall {
211     pub type_: u64,
212     pub subfunction: u64,
213     pub reg_mask: u64,
214     pub in_r12: u64,
215     pub in_r13: u64,
216     pub in_r14: u64,
217     pub in_r15: u64,
218     pub in_rbx: u64,
219     pub in_rdi: u64,
220     pub in_rsi: u64,
221     pub in_r8: u64,
222     pub in_r9: u64,
223     pub in_rdx: u64,
224     pub status_code: u64,
225     pub out_r11: u64,
226     pub out_r12: u64,
227     pub out_r13: u64,
228     pub out_r14: u64,
229     pub out_r15: u64,
230     pub out_rbx: u64,
231     pub out_rdi: u64,
232     pub out_rsi: u64,
233     pub out_r8: u64,
234     pub out_r9: u64,
235     pub out_rdx: u64,
236 }
237 
238 impl From<kvm_userspace_memory_region> for UserMemoryRegion {
239     fn from(region: kvm_userspace_memory_region) -> Self {
240         let mut flags = USER_MEMORY_REGION_READ;
241         if region.flags & KVM_MEM_READONLY == 0 {
242             flags |= USER_MEMORY_REGION_WRITE;
243         }
244         if region.flags & KVM_MEM_LOG_DIRTY_PAGES != 0 {
245             flags |= USER_MEMORY_REGION_LOG_DIRTY;
246         }
247 
248         UserMemoryRegion {
249             slot: region.slot,
250             guest_phys_addr: region.guest_phys_addr,
251             memory_size: region.memory_size,
252             userspace_addr: region.userspace_addr,
253             flags,
254         }
255     }
256 }
257 
258 impl From<UserMemoryRegion> for kvm_userspace_memory_region {
259     fn from(region: UserMemoryRegion) -> Self {
260         assert!(
261             region.flags & USER_MEMORY_REGION_READ != 0,
262             "KVM mapped memory is always readable"
263         );
264 
265         let mut flags = 0;
266         if region.flags & USER_MEMORY_REGION_WRITE == 0 {
267             flags |= KVM_MEM_READONLY;
268         }
269         if region.flags & USER_MEMORY_REGION_LOG_DIRTY != 0 {
270             flags |= KVM_MEM_LOG_DIRTY_PAGES;
271         }
272 
273         kvm_userspace_memory_region {
274             slot: region.slot,
275             guest_phys_addr: region.guest_phys_addr,
276             memory_size: region.memory_size,
277             userspace_addr: region.userspace_addr,
278             flags,
279         }
280     }
281 }
282 
283 impl From<kvm_mp_state> for MpState {
284     fn from(s: kvm_mp_state) -> Self {
285         MpState::Kvm(s)
286     }
287 }
288 
289 impl From<MpState> for kvm_mp_state {
290     fn from(ms: MpState) -> Self {
291         match ms {
292             MpState::Kvm(s) => s,
293             /* Needed in case other hypervisors are enabled */
294             #[allow(unreachable_patterns)]
295             _ => panic!("CpuState is not valid"),
296         }
297     }
298 }
299 
300 impl From<kvm_ioctls::IoEventAddress> for IoEventAddress {
301     fn from(a: kvm_ioctls::IoEventAddress) -> Self {
302         match a {
303             kvm_ioctls::IoEventAddress::Pio(x) => Self::Pio(x),
304             kvm_ioctls::IoEventAddress::Mmio(x) => Self::Mmio(x),
305         }
306     }
307 }
308 
309 impl From<IoEventAddress> for kvm_ioctls::IoEventAddress {
310     fn from(a: IoEventAddress) -> Self {
311         match a {
312             IoEventAddress::Pio(x) => Self::Pio(x),
313             IoEventAddress::Mmio(x) => Self::Mmio(x),
314         }
315     }
316 }
317 
318 impl From<VcpuKvmState> for CpuState {
319     fn from(s: VcpuKvmState) -> Self {
320         CpuState::Kvm(s)
321     }
322 }
323 
324 impl From<CpuState> for VcpuKvmState {
325     fn from(s: CpuState) -> Self {
326         match s {
327             CpuState::Kvm(s) => s,
328             /* Needed in case other hypervisors are enabled */
329             #[allow(unreachable_patterns)]
330             _ => panic!("CpuState is not valid"),
331         }
332     }
333 }
334 
335 #[cfg(target_arch = "x86_64")]
336 impl From<kvm_clock_data> for ClockData {
337     fn from(d: kvm_clock_data) -> Self {
338         ClockData::Kvm(d)
339     }
340 }
341 
342 #[cfg(target_arch = "x86_64")]
343 impl From<ClockData> for kvm_clock_data {
344     fn from(ms: ClockData) -> Self {
345         match ms {
346             ClockData::Kvm(s) => s,
347             /* Needed in case other hypervisors are enabled */
348             #[allow(unreachable_patterns)]
349             _ => panic!("CpuState is not valid"),
350         }
351     }
352 }
353 
354 impl From<kvm_bindings::kvm_one_reg> for crate::Register {
355     fn from(s: kvm_bindings::kvm_one_reg) -> Self {
356         crate::Register::Kvm(s)
357     }
358 }
359 
360 impl From<crate::Register> for kvm_bindings::kvm_one_reg {
361     fn from(e: crate::Register) -> Self {
362         match e {
363             crate::Register::Kvm(e) => e,
364             /* Needed in case other hypervisors are enabled */
365             #[allow(unreachable_patterns)]
366             _ => panic!("Register is not valid"),
367         }
368     }
369 }
370 
371 #[cfg(target_arch = "aarch64")]
372 impl From<kvm_bindings::kvm_vcpu_init> for crate::VcpuInit {
373     fn from(s: kvm_bindings::kvm_vcpu_init) -> Self {
374         crate::VcpuInit::Kvm(s)
375     }
376 }
377 
378 #[cfg(target_arch = "aarch64")]
379 impl From<crate::VcpuInit> for kvm_bindings::kvm_vcpu_init {
380     fn from(e: crate::VcpuInit) -> Self {
381         match e {
382             crate::VcpuInit::Kvm(e) => e,
383             /* Needed in case other hypervisors are enabled */
384             #[allow(unreachable_patterns)]
385             _ => panic!("VcpuInit is not valid"),
386         }
387     }
388 }
389 
390 #[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))]
391 impl From<kvm_bindings::RegList> for crate::RegList {
392     fn from(s: kvm_bindings::RegList) -> Self {
393         crate::RegList::Kvm(s)
394     }
395 }
396 
397 #[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))]
398 impl From<crate::RegList> for kvm_bindings::RegList {
399     fn from(e: crate::RegList) -> Self {
400         match e {
401             crate::RegList::Kvm(e) => e,
402             /* Needed in case other hypervisors are enabled */
403             #[allow(unreachable_patterns)]
404             _ => panic!("RegList is not valid"),
405         }
406     }
407 }
408 
409 #[cfg(not(target_arch = "riscv64"))]
410 impl From<kvm_bindings::kvm_regs> for crate::StandardRegisters {
411     fn from(s: kvm_bindings::kvm_regs) -> Self {
412         crate::StandardRegisters::Kvm(s)
413     }
414 }
415 
416 #[cfg(not(target_arch = "riscv64"))]
417 impl From<crate::StandardRegisters> for kvm_bindings::kvm_regs {
418     fn from(e: crate::StandardRegisters) -> Self {
419         match e {
420             crate::StandardRegisters::Kvm(e) => e,
421             /* Needed in case other hypervisors are enabled */
422             #[allow(unreachable_patterns)]
423             _ => panic!("StandardRegisters are not valid"),
424         }
425     }
426 }
427 
428 #[cfg(target_arch = "riscv64")]
429 impl From<kvm_bindings::kvm_riscv_core> for crate::StandardRegisters {
430     fn from(s: kvm_bindings::kvm_riscv_core) -> Self {
431         crate::StandardRegisters::Kvm(s)
432     }
433 }
434 
435 #[cfg(target_arch = "riscv64")]
436 impl From<crate::StandardRegisters> for kvm_bindings::kvm_riscv_core {
437     fn from(e: crate::StandardRegisters) -> Self {
438         match e {
439             crate::StandardRegisters::Kvm(e) => e,
440             /* Needed in case other hypervisors are enabled */
441             #[allow(unreachable_patterns)]
442             _ => panic!("StandardRegisters are not valid"),
443         }
444     }
445 }
446 
447 impl From<kvm_irq_routing_entry> for IrqRoutingEntry {
448     fn from(s: kvm_irq_routing_entry) -> Self {
449         IrqRoutingEntry::Kvm(s)
450     }
451 }
452 
453 impl From<IrqRoutingEntry> for kvm_irq_routing_entry {
454     fn from(e: IrqRoutingEntry) -> Self {
455         match e {
456             IrqRoutingEntry::Kvm(e) => e,
457             /* Needed in case other hypervisors are enabled */
458             #[allow(unreachable_patterns)]
459             _ => panic!("IrqRoutingEntry is not valid"),
460         }
461     }
462 }
463 
464 struct KvmDirtyLogSlot {
465     slot: u32,
466     guest_phys_addr: u64,
467     memory_size: u64,
468     userspace_addr: u64,
469 }
470 
471 /// Wrapper over KVM VM ioctls.
472 pub struct KvmVm {
473     fd: Arc<VmFd>,
474     #[cfg(target_arch = "x86_64")]
475     msrs: Vec<MsrEntry>,
476     dirty_log_slots: Arc<RwLock<HashMap<u32, KvmDirtyLogSlot>>>,
477 }
478 
479 impl KvmVm {
480     ///
481     /// Creates an emulated device in the kernel.
482     ///
483     /// See the documentation for `KVM_CREATE_DEVICE`.
484     fn create_device(&self, device: &mut CreateDevice) -> vm::Result<vfio_ioctls::VfioDeviceFd> {
485         let device_fd = self
486             .fd
487             .create_device(device)
488             .map_err(|e| vm::HypervisorVmError::CreateDevice(e.into()))?;
489         Ok(VfioDeviceFd::new_from_kvm(device_fd))
490     }
491     /// Checks if a particular `Cap` is available.
492     pub fn check_extension(&self, c: Cap) -> bool {
493         self.fd.check_extension(c)
494     }
495 }
496 
497 /// Implementation of Vm trait for KVM
498 ///
499 /// # Examples
500 ///
501 /// ```
502 /// # use hypervisor::kvm::KvmHypervisor;
503 /// # use std::sync::Arc;
504 /// let kvm = KvmHypervisor::new().unwrap();
505 /// let hypervisor = Arc::new(kvm);
506 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed");
507 /// ```
508 impl vm::Vm for KvmVm {
509     #[cfg(target_arch = "x86_64")]
510     ///
511     /// Sets the address of the one-page region in the VM's address space.
512     ///
513     fn set_identity_map_address(&self, address: u64) -> vm::Result<()> {
514         self.fd
515             .set_identity_map_address(address)
516             .map_err(|e| vm::HypervisorVmError::SetIdentityMapAddress(e.into()))
517     }
518 
519     #[cfg(target_arch = "x86_64")]
520     ///
521     /// Sets the address of the three-page region in the VM's address space.
522     ///
523     fn set_tss_address(&self, offset: usize) -> vm::Result<()> {
524         self.fd
525             .set_tss_address(offset)
526             .map_err(|e| vm::HypervisorVmError::SetTssAddress(e.into()))
527     }
528 
529     #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))]
530     ///
531     /// Creates an in-kernel interrupt controller.
532     ///
533     fn create_irq_chip(&self) -> vm::Result<()> {
534         self.fd
535             .create_irq_chip()
536             .map_err(|e| vm::HypervisorVmError::CreateIrq(e.into()))
537     }
538 
539     ///
540     /// Registers an event that will, when signaled, trigger the `gsi` IRQ.
541     ///
542     fn register_irqfd(&self, fd: &EventFd, gsi: u32) -> vm::Result<()> {
543         self.fd
544             .register_irqfd(fd, gsi)
545             .map_err(|e| vm::HypervisorVmError::RegisterIrqFd(e.into()))
546     }
547 
548     ///
549     /// Unregisters an event that will, when signaled, trigger the `gsi` IRQ.
550     ///
551     fn unregister_irqfd(&self, fd: &EventFd, gsi: u32) -> vm::Result<()> {
552         self.fd
553             .unregister_irqfd(fd, gsi)
554             .map_err(|e| vm::HypervisorVmError::UnregisterIrqFd(e.into()))
555     }
556 
557     ///
558     /// Creates a VcpuFd object from a vcpu RawFd.
559     ///
560     fn create_vcpu(
561         &self,
562         id: u8,
563         vm_ops: Option<Arc<dyn VmOps>>,
564     ) -> vm::Result<Arc<dyn cpu::Vcpu>> {
565         let fd = self
566             .fd
567             .create_vcpu(id as u64)
568             .map_err(|e| vm::HypervisorVmError::CreateVcpu(e.into()))?;
569         let vcpu = KvmVcpu {
570             fd: Arc::new(Mutex::new(fd)),
571             #[cfg(target_arch = "x86_64")]
572             msrs: self.msrs.clone(),
573             vm_ops,
574             #[cfg(target_arch = "x86_64")]
575             hyperv_synic: AtomicBool::new(false),
576         };
577         Ok(Arc::new(vcpu))
578     }
579 
580     #[cfg(target_arch = "aarch64")]
581     ///
582     /// Creates a virtual GIC device.
583     ///
584     fn create_vgic(&self, config: VgicConfig) -> vm::Result<Arc<Mutex<dyn Vgic>>> {
585         let gic_device = KvmGicV3Its::new(self, config)
586             .map_err(|e| vm::HypervisorVmError::CreateVgic(anyhow!("Vgic error {:?}", e)))?;
587         Ok(Arc::new(Mutex::new(gic_device)))
588     }
589 
590     #[cfg(target_arch = "riscv64")]
591     ///
592     /// Creates a virtual AIA device.
593     ///
594     fn create_vaia(&self, config: VaiaConfig) -> vm::Result<Arc<Mutex<dyn Vaia>>> {
595         let aia_device = KvmAiaImsics::new(self, config)
596             .map_err(|e| vm::HypervisorVmError::CreateVaia(anyhow!("Vaia error {:?}", e)))?;
597         Ok(Arc::new(Mutex::new(aia_device)))
598     }
599 
600     ///
601     /// Registers an event to be signaled whenever a certain address is written to.
602     ///
603     fn register_ioevent(
604         &self,
605         fd: &EventFd,
606         addr: &IoEventAddress,
607         datamatch: Option<vm::DataMatch>,
608     ) -> vm::Result<()> {
609         let addr = &kvm_ioctls::IoEventAddress::from(*addr);
610         if let Some(dm) = datamatch {
611             match dm {
612                 vm::DataMatch::DataMatch32(kvm_dm32) => self
613                     .fd
614                     .register_ioevent(fd, addr, kvm_dm32)
615                     .map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into())),
616                 vm::DataMatch::DataMatch64(kvm_dm64) => self
617                     .fd
618                     .register_ioevent(fd, addr, kvm_dm64)
619                     .map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into())),
620             }
621         } else {
622             self.fd
623                 .register_ioevent(fd, addr, NoDatamatch)
624                 .map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into()))
625         }
626     }
627 
628     ///
629     /// Unregisters an event from a certain address it has been previously registered to.
630     ///
631     fn unregister_ioevent(&self, fd: &EventFd, addr: &IoEventAddress) -> vm::Result<()> {
632         let addr = &kvm_ioctls::IoEventAddress::from(*addr);
633         self.fd
634             .unregister_ioevent(fd, addr, NoDatamatch)
635             .map_err(|e| vm::HypervisorVmError::UnregisterIoEvent(e.into()))
636     }
637 
638     ///
639     /// Constructs a routing entry
640     ///
641     fn make_routing_entry(&self, gsi: u32, config: &InterruptSourceConfig) -> IrqRoutingEntry {
642         match &config {
643             InterruptSourceConfig::MsiIrq(cfg) => {
644                 let mut kvm_route = kvm_irq_routing_entry {
645                     gsi,
646                     type_: KVM_IRQ_ROUTING_MSI,
647                     ..Default::default()
648                 };
649 
650                 kvm_route.u.msi.address_lo = cfg.low_addr;
651                 kvm_route.u.msi.address_hi = cfg.high_addr;
652                 kvm_route.u.msi.data = cfg.data;
653 
654                 if self.check_extension(crate::kvm::Cap::MsiDevid) {
655                     // On AArch64, there is limitation on the range of the 'devid',
656                     // it cannot be greater than 65536 (the max of u16).
657                     //
658                     // BDF cannot be used directly, because 'segment' is in high
659                     // 16 bits. The layout of the u32 BDF is:
660                     // |---- 16 bits ----|-- 8 bits --|-- 5 bits --|-- 3 bits --|
661                     // |      segment    |     bus    |   device   |  function  |
662                     //
663                     // Now that we support 1 bus only in a segment, we can build a
664                     // 'devid' by replacing the 'bus' bits with the low 8 bits of
665                     // 'segment' data.
666                     // This way we can resolve the range checking problem and give
667                     // different `devid` to all the devices. Limitation is that at
668                     // most 256 segments can be supported.
669                     //
670                     let modified_devid = ((cfg.devid & 0x00ff_0000) >> 8) | cfg.devid & 0xff;
671 
672                     kvm_route.flags = KVM_MSI_VALID_DEVID;
673                     kvm_route.u.msi.__bindgen_anon_1.devid = modified_devid;
674                 }
675                 kvm_route.into()
676             }
677             InterruptSourceConfig::LegacyIrq(cfg) => {
678                 let mut kvm_route = kvm_irq_routing_entry {
679                     gsi,
680                     type_: KVM_IRQ_ROUTING_IRQCHIP,
681                     ..Default::default()
682                 };
683                 kvm_route.u.irqchip.irqchip = cfg.irqchip;
684                 kvm_route.u.irqchip.pin = cfg.pin;
685 
686                 kvm_route.into()
687             }
688         }
689     }
690 
691     ///
692     /// Sets the GSI routing table entries, overwriting any previously set
693     /// entries, as per the `KVM_SET_GSI_ROUTING` ioctl.
694     ///
695     fn set_gsi_routing(&self, entries: &[IrqRoutingEntry]) -> vm::Result<()> {
696         let mut irq_routing =
697             vec_with_array_field::<kvm_irq_routing, kvm_irq_routing_entry>(entries.len());
698         irq_routing[0].nr = entries.len() as u32;
699         irq_routing[0].flags = 0;
700         let entries: Vec<kvm_irq_routing_entry> = entries
701             .iter()
702             .map(|entry| match entry {
703                 IrqRoutingEntry::Kvm(e) => *e,
704                 #[allow(unreachable_patterns)]
705                 _ => panic!("IrqRoutingEntry type is wrong"),
706             })
707             .collect();
708 
709         // SAFETY: irq_routing initialized with entries.len() and now it is being turned into
710         // entries_slice with entries.len() again. It is guaranteed to be large enough to hold
711         // everything from entries.
712         unsafe {
713             let entries_slice: &mut [kvm_irq_routing_entry] =
714                 irq_routing[0].entries.as_mut_slice(entries.len());
715             entries_slice.copy_from_slice(&entries);
716         }
717 
718         self.fd
719             .set_gsi_routing(&irq_routing[0])
720             .map_err(|e| vm::HypervisorVmError::SetGsiRouting(e.into()))
721     }
722 
723     ///
724     /// Creates a memory region structure that can be used with {create/remove}_user_memory_region
725     ///
726     fn make_user_memory_region(
727         &self,
728         slot: u32,
729         guest_phys_addr: u64,
730         memory_size: u64,
731         userspace_addr: u64,
732         readonly: bool,
733         log_dirty_pages: bool,
734     ) -> UserMemoryRegion {
735         kvm_userspace_memory_region {
736             slot,
737             guest_phys_addr,
738             memory_size,
739             userspace_addr,
740             flags: if readonly { KVM_MEM_READONLY } else { 0 }
741                 | if log_dirty_pages {
742                     KVM_MEM_LOG_DIRTY_PAGES
743                 } else {
744                     0
745                 },
746         }
747         .into()
748     }
749 
750     ///
751     /// Creates a guest physical memory region.
752     ///
753     fn create_user_memory_region(&self, user_memory_region: UserMemoryRegion) -> vm::Result<()> {
754         let mut region: kvm_userspace_memory_region = user_memory_region.into();
755 
756         if (region.flags & KVM_MEM_LOG_DIRTY_PAGES) != 0 {
757             if (region.flags & KVM_MEM_READONLY) != 0 {
758                 return Err(vm::HypervisorVmError::CreateUserMemory(anyhow!(
759                     "Error creating regions with both 'dirty-pages-log' and 'read-only'."
760                 )));
761             }
762 
763             // Keep track of the regions that need dirty pages log
764             self.dirty_log_slots.write().unwrap().insert(
765                 region.slot,
766                 KvmDirtyLogSlot {
767                     slot: region.slot,
768                     guest_phys_addr: region.guest_phys_addr,
769                     memory_size: region.memory_size,
770                     userspace_addr: region.userspace_addr,
771                 },
772             );
773 
774             // Always create guest physical memory region without `KVM_MEM_LOG_DIRTY_PAGES`.
775             // For regions that need this flag, dirty pages log will be turned on in `start_dirty_log`.
776             region.flags = 0;
777         }
778 
779         // SAFETY: Safe because guest regions are guaranteed not to overlap.
780         unsafe {
781             self.fd
782                 .set_user_memory_region(region)
783                 .map_err(|e| vm::HypervisorVmError::CreateUserMemory(e.into()))
784         }
785     }
786 
787     ///
788     /// Removes a guest physical memory region.
789     ///
790     fn remove_user_memory_region(&self, user_memory_region: UserMemoryRegion) -> vm::Result<()> {
791         let mut region: kvm_userspace_memory_region = user_memory_region.into();
792 
793         // Remove the corresponding entry from "self.dirty_log_slots" if needed
794         self.dirty_log_slots.write().unwrap().remove(&region.slot);
795 
796         // Setting the size to 0 means "remove"
797         region.memory_size = 0;
798         // SAFETY: Safe because guest regions are guaranteed not to overlap.
799         unsafe {
800             self.fd
801                 .set_user_memory_region(region)
802                 .map_err(|e| vm::HypervisorVmError::RemoveUserMemory(e.into()))
803         }
804     }
805 
806     ///
807     /// Returns the preferred CPU target type which can be emulated by KVM on underlying host.
808     ///
809     #[cfg(target_arch = "aarch64")]
810     fn get_preferred_target(&self, kvi: &mut crate::VcpuInit) -> vm::Result<()> {
811         let mut kvm_kvi: kvm_bindings::kvm_vcpu_init = (*kvi).into();
812         self.fd
813             .get_preferred_target(&mut kvm_kvi)
814             .map_err(|e| vm::HypervisorVmError::GetPreferredTarget(e.into()))?;
815         *kvi = kvm_kvi.into();
816         Ok(())
817     }
818 
819     #[cfg(target_arch = "x86_64")]
820     fn enable_split_irq(&self) -> vm::Result<()> {
821         // Create split irqchip
822         // Only the local APIC is emulated in kernel, both PICs and IOAPIC
823         // are not.
824         let mut cap = kvm_enable_cap {
825             cap: KVM_CAP_SPLIT_IRQCHIP,
826             ..Default::default()
827         };
828         cap.args[0] = NUM_IOAPIC_PINS as u64;
829         self.fd
830             .enable_cap(&cap)
831             .map_err(|e| vm::HypervisorVmError::EnableSplitIrq(e.into()))?;
832         Ok(())
833     }
834 
835     #[cfg(target_arch = "x86_64")]
836     fn enable_sgx_attribute(&self, file: File) -> vm::Result<()> {
837         let mut cap = kvm_enable_cap {
838             cap: KVM_CAP_SGX_ATTRIBUTE,
839             ..Default::default()
840         };
841         cap.args[0] = file.as_raw_fd() as u64;
842         self.fd
843             .enable_cap(&cap)
844             .map_err(|e| vm::HypervisorVmError::EnableSgxAttribute(e.into()))?;
845         Ok(())
846     }
847 
848     /// Retrieve guest clock.
849     #[cfg(target_arch = "x86_64")]
850     fn get_clock(&self) -> vm::Result<ClockData> {
851         Ok(self
852             .fd
853             .get_clock()
854             .map_err(|e| vm::HypervisorVmError::GetClock(e.into()))?
855             .into())
856     }
857 
858     /// Set guest clock.
859     #[cfg(target_arch = "x86_64")]
860     fn set_clock(&self, data: &ClockData) -> vm::Result<()> {
861         let data = (*data).into();
862         self.fd
863             .set_clock(&data)
864             .map_err(|e| vm::HypervisorVmError::SetClock(e.into()))
865     }
866 
867     /// Create a device that is used for passthrough
868     fn create_passthrough_device(&self) -> vm::Result<VfioDeviceFd> {
869         let mut vfio_dev = kvm_create_device {
870             type_: kvm_device_type_KVM_DEV_TYPE_VFIO,
871             fd: 0,
872             flags: 0,
873         };
874 
875         self.create_device(&mut vfio_dev)
876             .map_err(|e| vm::HypervisorVmError::CreatePassthroughDevice(e.into()))
877     }
878 
879     ///
880     /// Start logging dirty pages
881     ///
882     fn start_dirty_log(&self) -> vm::Result<()> {
883         let dirty_log_slots = self.dirty_log_slots.read().unwrap();
884         for (_, s) in dirty_log_slots.iter() {
885             let region = kvm_userspace_memory_region {
886                 slot: s.slot,
887                 guest_phys_addr: s.guest_phys_addr,
888                 memory_size: s.memory_size,
889                 userspace_addr: s.userspace_addr,
890                 flags: KVM_MEM_LOG_DIRTY_PAGES,
891             };
892             // SAFETY: Safe because guest regions are guaranteed not to overlap.
893             unsafe {
894                 self.fd
895                     .set_user_memory_region(region)
896                     .map_err(|e| vm::HypervisorVmError::StartDirtyLog(e.into()))?;
897             }
898         }
899 
900         Ok(())
901     }
902 
903     ///
904     /// Stop logging dirty pages
905     ///
906     fn stop_dirty_log(&self) -> vm::Result<()> {
907         let dirty_log_slots = self.dirty_log_slots.read().unwrap();
908         for (_, s) in dirty_log_slots.iter() {
909             let region = kvm_userspace_memory_region {
910                 slot: s.slot,
911                 guest_phys_addr: s.guest_phys_addr,
912                 memory_size: s.memory_size,
913                 userspace_addr: s.userspace_addr,
914                 flags: 0,
915             };
916             // SAFETY: Safe because guest regions are guaranteed not to overlap.
917             unsafe {
918                 self.fd
919                     .set_user_memory_region(region)
920                     .map_err(|e| vm::HypervisorVmError::StartDirtyLog(e.into()))?;
921             }
922         }
923 
924         Ok(())
925     }
926 
927     ///
928     /// Get dirty pages bitmap (one bit per page)
929     ///
930     fn get_dirty_log(&self, slot: u32, _base_gpa: u64, memory_size: u64) -> vm::Result<Vec<u64>> {
931         self.fd
932             .get_dirty_log(slot, memory_size as usize)
933             .map_err(|e| vm::HypervisorVmError::GetDirtyLog(e.into()))
934     }
935 
936     ///
937     /// Initialize TDX for this VM
938     ///
939     #[cfg(feature = "tdx")]
940     fn tdx_init(&self, cpuid: &[CpuIdEntry], max_vcpus: u32) -> vm::Result<()> {
941         const TDX_ATTR_SEPT_VE_DISABLE: usize = 28;
942 
943         let mut cpuid: Vec<kvm_bindings::kvm_cpuid_entry2> =
944             cpuid.iter().map(|e| (*e).into()).collect();
945         cpuid.resize(256, kvm_bindings::kvm_cpuid_entry2::default());
946 
947         #[repr(C)]
948         struct TdxInitVm {
949             attributes: u64,
950             max_vcpus: u32,
951             padding: u32,
952             mrconfigid: [u64; 6],
953             mrowner: [u64; 6],
954             mrownerconfig: [u64; 6],
955             cpuid_nent: u32,
956             cpuid_padding: u32,
957             cpuid_entries: [kvm_bindings::kvm_cpuid_entry2; 256],
958         }
959         let data = TdxInitVm {
960             attributes: 1 << TDX_ATTR_SEPT_VE_DISABLE,
961             max_vcpus,
962             padding: 0,
963             mrconfigid: [0; 6],
964             mrowner: [0; 6],
965             mrownerconfig: [0; 6],
966             cpuid_nent: cpuid.len() as u32,
967             cpuid_padding: 0,
968             cpuid_entries: cpuid.as_slice().try_into().unwrap(),
969         };
970 
971         tdx_command(
972             &self.fd.as_raw_fd(),
973             TdxCommand::InitVm,
974             0,
975             &data as *const _ as u64,
976         )
977         .map_err(vm::HypervisorVmError::InitializeTdx)
978     }
979 
980     ///
981     /// Finalize the TDX setup for this VM
982     ///
983     #[cfg(feature = "tdx")]
984     fn tdx_finalize(&self) -> vm::Result<()> {
985         tdx_command(&self.fd.as_raw_fd(), TdxCommand::Finalize, 0, 0)
986             .map_err(vm::HypervisorVmError::FinalizeTdx)
987     }
988 
989     ///
990     /// Initialize memory regions for the TDX VM
991     ///
992     #[cfg(feature = "tdx")]
993     fn tdx_init_memory_region(
994         &self,
995         host_address: u64,
996         guest_address: u64,
997         size: u64,
998         measure: bool,
999     ) -> vm::Result<()> {
1000         #[repr(C)]
1001         struct TdxInitMemRegion {
1002             host_address: u64,
1003             guest_address: u64,
1004             pages: u64,
1005         }
1006         let data = TdxInitMemRegion {
1007             host_address,
1008             guest_address,
1009             pages: size / 4096,
1010         };
1011 
1012         tdx_command(
1013             &self.fd.as_raw_fd(),
1014             TdxCommand::InitMemRegion,
1015             u32::from(measure),
1016             &data as *const _ as u64,
1017         )
1018         .map_err(vm::HypervisorVmError::InitMemRegionTdx)
1019     }
1020 
1021     /// Downcast to the underlying KvmVm type
1022     fn as_any(&self) -> &dyn Any {
1023         self
1024     }
1025 }
1026 
1027 #[cfg(feature = "tdx")]
1028 fn tdx_command(
1029     fd: &RawFd,
1030     command: TdxCommand,
1031     flags: u32,
1032     data: u64,
1033 ) -> std::result::Result<(), std::io::Error> {
1034     #[repr(C)]
1035     struct TdxIoctlCmd {
1036         command: TdxCommand,
1037         flags: u32,
1038         data: u64,
1039         error: u64,
1040         unused: u64,
1041     }
1042     let cmd = TdxIoctlCmd {
1043         command,
1044         flags,
1045         data,
1046         error: 0,
1047         unused: 0,
1048     };
1049     // SAFETY: FFI call. All input parameters are valid.
1050     let ret = unsafe {
1051         ioctl_with_val(
1052             fd,
1053             KVM_MEMORY_ENCRYPT_OP(),
1054             &cmd as *const TdxIoctlCmd as std::os::raw::c_ulong,
1055         )
1056     };
1057 
1058     if ret < 0 {
1059         return Err(std::io::Error::last_os_error());
1060     }
1061     Ok(())
1062 }
1063 
1064 /// Wrapper over KVM system ioctls.
1065 pub struct KvmHypervisor {
1066     kvm: Kvm,
1067 }
1068 
1069 impl KvmHypervisor {
1070     #[cfg(target_arch = "x86_64")]
1071     ///
1072     /// Retrieve the list of MSRs supported by the hypervisor.
1073     ///
1074     fn get_msr_list(&self) -> hypervisor::Result<MsrList> {
1075         self.kvm
1076             .get_msr_index_list()
1077             .map_err(|e| hypervisor::HypervisorError::GetMsrList(e.into()))
1078     }
1079 }
1080 
1081 /// Enum for KVM related error
1082 #[derive(Debug, Error)]
1083 pub enum KvmError {
1084     #[error("Capability missing: {0:?}")]
1085     CapabilityMissing(Cap),
1086 }
1087 
1088 pub type KvmResult<T> = result::Result<T, KvmError>;
1089 
1090 impl KvmHypervisor {
1091     /// Create a hypervisor based on Kvm
1092     #[allow(clippy::new_ret_no_self)]
1093     pub fn new() -> hypervisor::Result<Arc<dyn hypervisor::Hypervisor>> {
1094         let kvm_obj = Kvm::new().map_err(|e| hypervisor::HypervisorError::VmCreate(e.into()))?;
1095         let api_version = kvm_obj.get_api_version();
1096 
1097         if api_version != kvm_bindings::KVM_API_VERSION as i32 {
1098             return Err(hypervisor::HypervisorError::IncompatibleApiVersion);
1099         }
1100 
1101         Ok(Arc::new(KvmHypervisor { kvm: kvm_obj }))
1102     }
1103 
1104     /// Check if the hypervisor is available
1105     pub fn is_available() -> hypervisor::Result<bool> {
1106         match std::fs::metadata("/dev/kvm") {
1107             Ok(_) => Ok(true),
1108             Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(false),
1109             Err(err) => Err(hypervisor::HypervisorError::HypervisorAvailableCheck(
1110                 err.into(),
1111             )),
1112         }
1113     }
1114 }
1115 
1116 /// Implementation of Hypervisor trait for KVM
1117 ///
1118 /// # Examples
1119 ///
1120 /// ```
1121 /// # use hypervisor::kvm::KvmHypervisor;
1122 /// # use std::sync::Arc;
1123 /// let kvm = KvmHypervisor::new().unwrap();
1124 /// let hypervisor = Arc::new(kvm);
1125 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed");
1126 /// ```
1127 impl hypervisor::Hypervisor for KvmHypervisor {
1128     ///
1129     /// Returns the type of the hypervisor
1130     ///
1131     fn hypervisor_type(&self) -> HypervisorType {
1132         HypervisorType::Kvm
1133     }
1134 
1135     ///
1136     /// Create a Vm of a specific type using the underlying hypervisor, passing memory size
1137     /// Return a hypervisor-agnostic Vm trait object
1138     ///
1139     /// # Examples
1140     ///
1141     /// ```
1142     /// # use hypervisor::kvm::KvmHypervisor;
1143     /// use hypervisor::kvm::KvmVm;
1144     /// let hypervisor = KvmHypervisor::new().unwrap();
1145     /// let vm = hypervisor.create_vm_with_type_and_memory(0).unwrap();
1146     /// ```
1147     fn create_vm_with_type_and_memory(
1148         &self,
1149         vm_type: u64,
1150         #[cfg(feature = "sev_snp")] _mem_size: u64,
1151     ) -> hypervisor::Result<Arc<dyn vm::Vm>> {
1152         self.create_vm_with_type(vm_type)
1153     }
1154 
1155     /// Create a KVM vm object of a specific VM type and return the object as Vm trait object
1156     ///
1157     /// # Examples
1158     ///
1159     /// ```
1160     /// # use hypervisor::kvm::KvmHypervisor;
1161     /// use hypervisor::kvm::KvmVm;
1162     /// let hypervisor = KvmHypervisor::new().unwrap();
1163     /// let vm = hypervisor.create_vm_with_type(0).unwrap();
1164     /// ```
1165     fn create_vm_with_type(&self, vm_type: u64) -> hypervisor::Result<Arc<dyn vm::Vm>> {
1166         let fd: VmFd;
1167         loop {
1168             match self.kvm.create_vm_with_type(vm_type) {
1169                 Ok(res) => fd = res,
1170                 Err(e) => {
1171                     if e.errno() == libc::EINTR {
1172                         // If the error returned is EINTR, which means the
1173                         // ioctl has been interrupted, we have to retry as
1174                         // this can't be considered as a regular error.
1175                         continue;
1176                     } else {
1177                         return Err(hypervisor::HypervisorError::VmCreate(e.into()));
1178                     }
1179                 }
1180             }
1181             break;
1182         }
1183 
1184         let vm_fd = Arc::new(fd);
1185 
1186         #[cfg(target_arch = "x86_64")]
1187         {
1188             let msr_list = self.get_msr_list()?;
1189             let num_msrs = msr_list.as_fam_struct_ref().nmsrs as usize;
1190             let mut msrs: Vec<MsrEntry> = vec![
1191                 MsrEntry {
1192                     ..Default::default()
1193                 };
1194                 num_msrs
1195             ];
1196             let indices = msr_list.as_slice();
1197             for (pos, index) in indices.iter().enumerate() {
1198                 msrs[pos].index = *index;
1199             }
1200 
1201             Ok(Arc::new(KvmVm {
1202                 fd: vm_fd,
1203                 msrs,
1204                 dirty_log_slots: Arc::new(RwLock::new(HashMap::new())),
1205             }))
1206         }
1207 
1208         #[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))]
1209         {
1210             Ok(Arc::new(KvmVm {
1211                 fd: vm_fd,
1212                 dirty_log_slots: Arc::new(RwLock::new(HashMap::new())),
1213             }))
1214         }
1215     }
1216 
1217     /// Create a KVM vm object and return the object as Vm trait object
1218     ///
1219     /// # Examples
1220     ///
1221     /// ```
1222     /// # use hypervisor::kvm::KvmHypervisor;
1223     /// use hypervisor::kvm::KvmVm;
1224     /// let hypervisor = KvmHypervisor::new().unwrap();
1225     /// let vm = hypervisor.create_vm().unwrap();
1226     /// ```
1227     fn create_vm(&self) -> hypervisor::Result<Arc<dyn vm::Vm>> {
1228         #[allow(unused_mut)]
1229         let mut vm_type: u64 = 0; // Create with default platform type
1230 
1231         // When KVM supports Cap::ArmVmIPASize, it is better to get the IPA
1232         // size from the host and use that when creating the VM, which may
1233         // avoid unnecessary VM creation failures.
1234         #[cfg(target_arch = "aarch64")]
1235         if self.kvm.check_extension(Cap::ArmVmIPASize) {
1236             vm_type = self.kvm.get_host_ipa_limit().try_into().unwrap();
1237         }
1238 
1239         self.create_vm_with_type(vm_type)
1240     }
1241 
1242     fn check_required_extensions(&self) -> hypervisor::Result<()> {
1243         check_required_kvm_extensions(&self.kvm)
1244             .map_err(|e| hypervisor::HypervisorError::CheckExtensions(e.into()))
1245     }
1246 
1247     #[cfg(target_arch = "x86_64")]
1248     ///
1249     /// X86 specific call to get the system supported CPUID values.
1250     ///
1251     fn get_supported_cpuid(&self) -> hypervisor::Result<Vec<CpuIdEntry>> {
1252         let kvm_cpuid = self
1253             .kvm
1254             .get_supported_cpuid(kvm_bindings::KVM_MAX_CPUID_ENTRIES)
1255             .map_err(|e| hypervisor::HypervisorError::GetCpuId(e.into()))?;
1256 
1257         let v = kvm_cpuid.as_slice().iter().map(|e| (*e).into()).collect();
1258 
1259         Ok(v)
1260     }
1261 
1262     #[cfg(target_arch = "aarch64")]
1263     ///
1264     /// Retrieve AArch64 host maximum IPA size supported by KVM.
1265     ///
1266     fn get_host_ipa_limit(&self) -> i32 {
1267         self.kvm.get_host_ipa_limit()
1268     }
1269 
1270     ///
1271     /// Retrieve TDX capabilities
1272     ///
1273     #[cfg(feature = "tdx")]
1274     fn tdx_capabilities(&self) -> hypervisor::Result<TdxCapabilities> {
1275         let data = TdxCapabilities {
1276             nr_cpuid_configs: TDX_MAX_NR_CPUID_CONFIGS as u32,
1277             ..Default::default()
1278         };
1279 
1280         tdx_command(
1281             &self.kvm.as_raw_fd(),
1282             TdxCommand::Capabilities,
1283             0,
1284             &data as *const _ as u64,
1285         )
1286         .map_err(|e| hypervisor::HypervisorError::TdxCapabilities(e.into()))?;
1287 
1288         Ok(data)
1289     }
1290 
1291     #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))]
1292     ///
1293     /// Get the number of supported hardware breakpoints
1294     ///
1295     fn get_guest_debug_hw_bps(&self) -> usize {
1296         #[cfg(target_arch = "x86_64")]
1297         {
1298             4
1299         }
1300         #[cfg(target_arch = "aarch64")]
1301         {
1302             self.kvm.get_guest_debug_hw_bps() as usize
1303         }
1304     }
1305 
1306     /// Get maximum number of vCPUs
1307     fn get_max_vcpus(&self) -> u32 {
1308         self.kvm.get_max_vcpus().min(u32::MAX as usize) as u32
1309     }
1310 }
1311 
1312 /// Vcpu struct for KVM
1313 pub struct KvmVcpu {
1314     fd: Arc<Mutex<VcpuFd>>,
1315     #[cfg(target_arch = "x86_64")]
1316     msrs: Vec<MsrEntry>,
1317     vm_ops: Option<Arc<dyn vm::VmOps>>,
1318     #[cfg(target_arch = "x86_64")]
1319     hyperv_synic: AtomicBool,
1320 }
1321 
1322 /// Implementation of Vcpu trait for KVM
1323 ///
1324 /// # Examples
1325 ///
1326 /// ```
1327 /// # use hypervisor::kvm::KvmHypervisor;
1328 /// # use std::sync::Arc;
1329 /// let kvm = KvmHypervisor::new().unwrap();
1330 /// let hypervisor = Arc::new(kvm);
1331 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed");
1332 /// let vcpu = vm.create_vcpu(0, None).unwrap();
1333 /// ```
1334 impl cpu::Vcpu for KvmVcpu {
1335     ///
1336     /// Returns StandardRegisters with default value set
1337     ///
1338     fn create_standard_regs(&self) -> StandardRegisters {
1339         #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))]
1340         {
1341             kvm_bindings::kvm_regs::default().into()
1342         }
1343         #[cfg(target_arch = "riscv64")]
1344         {
1345             kvm_bindings::kvm_riscv_core::default().into()
1346         }
1347     }
1348     #[cfg(target_arch = "x86_64")]
1349     ///
1350     /// Returns the vCPU general purpose registers.
1351     ///
1352     fn get_regs(&self) -> cpu::Result<StandardRegisters> {
1353         Ok(self
1354             .fd
1355             .lock()
1356             .unwrap()
1357             .get_regs()
1358             .map_err(|e| cpu::HypervisorCpuError::GetStandardRegs(e.into()))?
1359             .into())
1360     }
1361 
1362     ///
1363     /// Returns the vCPU general purpose registers.
1364     /// The `KVM_GET_REGS` ioctl is not available on AArch64, `KVM_GET_ONE_REG`
1365     /// is used to get registers one by one.
1366     ///
1367     #[cfg(target_arch = "aarch64")]
1368     fn get_regs(&self) -> cpu::Result<StandardRegisters> {
1369         let mut state = kvm_regs::default();
1370         let mut off = offset_of!(user_pt_regs, regs);
1371         // There are 31 user_pt_regs:
1372         // https://elixir.free-electrons.com/linux/v4.14.174/source/arch/arm64/include/uapi/asm/ptrace.h#L72
1373         // These actually are the general-purpose registers of the Armv8-a
1374         // architecture (i.e x0-x30 if used as a 64bit register or w0-30 when used as a 32bit register).
1375         for i in 0..31 {
1376             let mut bytes = [0_u8; 8];
1377             self.fd
1378                 .lock()
1379                 .unwrap()
1380                 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), &mut bytes)
1381                 .map_err(|e| cpu::HypervisorCpuError::GetAarchCoreRegister(e.into()))?;
1382             state.regs.regs[i] = u64::from_le_bytes(bytes);
1383             off += std::mem::size_of::<u64>();
1384         }
1385 
1386         // We are now entering the "Other register" section of the ARMv8-a architecture.
1387         // First one, stack pointer.
1388         let off = offset_of!(user_pt_regs, sp);
1389         let mut bytes = [0_u8; 8];
1390         self.fd
1391             .lock()
1392             .unwrap()
1393             .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), &mut bytes)
1394             .map_err(|e| cpu::HypervisorCpuError::GetAarchCoreRegister(e.into()))?;
1395         state.regs.sp = u64::from_le_bytes(bytes);
1396 
1397         // Second one, the program counter.
1398         let off = offset_of!(user_pt_regs, pc);
1399         let mut bytes = [0_u8; 8];
1400         self.fd
1401             .lock()
1402             .unwrap()
1403             .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), &mut bytes)
1404             .map_err(|e| cpu::HypervisorCpuError::GetAarchCoreRegister(e.into()))?;
1405         state.regs.pc = u64::from_le_bytes(bytes);
1406 
1407         // Next is the processor state.
1408         let off = offset_of!(user_pt_regs, pstate);
1409         let mut bytes = [0_u8; 8];
1410         self.fd
1411             .lock()
1412             .unwrap()
1413             .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), &mut bytes)
1414             .map_err(|e| cpu::HypervisorCpuError::GetAarchCoreRegister(e.into()))?;
1415         state.regs.pstate = u64::from_le_bytes(bytes);
1416 
1417         // The stack pointer associated with EL1
1418         let off = offset_of!(kvm_regs, sp_el1);
1419         let mut bytes = [0_u8; 8];
1420         self.fd
1421             .lock()
1422             .unwrap()
1423             .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), &mut bytes)
1424             .map_err(|e| cpu::HypervisorCpuError::GetAarchCoreRegister(e.into()))?;
1425         state.sp_el1 = u64::from_le_bytes(bytes);
1426 
1427         // Exception Link Register for EL1, when taking an exception to EL1, this register
1428         // holds the address to which to return afterwards.
1429         let off = offset_of!(kvm_regs, elr_el1);
1430         let mut bytes = [0_u8; 8];
1431         self.fd
1432             .lock()
1433             .unwrap()
1434             .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), &mut bytes)
1435             .map_err(|e| cpu::HypervisorCpuError::GetAarchCoreRegister(e.into()))?;
1436         state.elr_el1 = u64::from_le_bytes(bytes);
1437 
1438         // Saved Program Status Registers, there are 5 of them used in the kernel.
1439         let mut off = offset_of!(kvm_regs, spsr);
1440         for i in 0..KVM_NR_SPSR as usize {
1441             let mut bytes = [0_u8; 8];
1442             self.fd
1443                 .lock()
1444                 .unwrap()
1445                 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), &mut bytes)
1446                 .map_err(|e| cpu::HypervisorCpuError::GetAarchCoreRegister(e.into()))?;
1447             state.spsr[i] = u64::from_le_bytes(bytes);
1448             off += std::mem::size_of::<u64>();
1449         }
1450 
1451         // Now moving on to floating point registers which are stored in the user_fpsimd_state in the kernel:
1452         // https://elixir.free-electrons.com/linux/v4.9.62/source/arch/arm64/include/uapi/asm/kvm.h#L53
1453         let mut off = offset_of!(kvm_regs, fp_regs) + offset_of!(user_fpsimd_state, vregs);
1454         for i in 0..32 {
1455             let mut bytes = [0_u8; 16];
1456             self.fd
1457                 .lock()
1458                 .unwrap()
1459                 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U128, off), &mut bytes)
1460                 .map_err(|e| cpu::HypervisorCpuError::GetAarchCoreRegister(e.into()))?;
1461             state.fp_regs.vregs[i] = u128::from_le_bytes(bytes);
1462             off += mem::size_of::<u128>();
1463         }
1464 
1465         // Floating-point Status Register
1466         let off = offset_of!(kvm_regs, fp_regs) + offset_of!(user_fpsimd_state, fpsr);
1467         let mut bytes = [0_u8; 4];
1468         self.fd
1469             .lock()
1470             .unwrap()
1471             .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U32, off), &mut bytes)
1472             .map_err(|e| cpu::HypervisorCpuError::GetAarchCoreRegister(e.into()))?;
1473         state.fp_regs.fpsr = u32::from_le_bytes(bytes);
1474 
1475         // Floating-point Control Register
1476         let off = offset_of!(kvm_regs, fp_regs) + offset_of!(user_fpsimd_state, fpcr);
1477         let mut bytes = [0_u8; 4];
1478         self.fd
1479             .lock()
1480             .unwrap()
1481             .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U32, off), &mut bytes)
1482             .map_err(|e| cpu::HypervisorCpuError::GetAarchCoreRegister(e.into()))?;
1483         state.fp_regs.fpcr = u32::from_le_bytes(bytes);
1484         Ok(state.into())
1485     }
1486 
1487     #[cfg(target_arch = "riscv64")]
1488     ///
1489     /// Returns the RISC-V vCPU core registers.
1490     /// The `KVM_GET_REGS` ioctl is not available on RISC-V 64-bit,
1491     /// `KVM_GET_ONE_REG` is used to get registers one by one.
1492     ///
1493     fn get_regs(&self) -> cpu::Result<StandardRegisters> {
1494         let mut state = kvm_riscv_core::default();
1495 
1496         let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, pc);
1497         let mut bytes = [0_u8; 8];
1498         self.fd
1499             .lock()
1500             .unwrap()
1501             .get_one_reg(riscv64_reg_id!(KVM_REG_RISCV_CORE, off), &mut bytes)
1502             .map_err(|e| cpu::HypervisorCpuError::GetRiscvCoreRegister(e.into()))?;
1503         state.regs.pc = u64::from_le_bytes(bytes);
1504 
1505         let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, ra);
1506         let mut bytes = [0_u8; 8];
1507         self.fd
1508             .lock()
1509             .unwrap()
1510             .get_one_reg(riscv64_reg_id!(KVM_REG_RISCV_CORE, off), &mut bytes)
1511             .map_err(|e| cpu::HypervisorCpuError::GetRiscvCoreRegister(e.into()))?;
1512         state.regs.ra = u64::from_le_bytes(bytes);
1513 
1514         let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, sp);
1515         let mut bytes = [0_u8; 8];
1516         self.fd
1517             .lock()
1518             .unwrap()
1519             .get_one_reg(riscv64_reg_id!(KVM_REG_RISCV_CORE, off), &mut bytes)
1520             .map_err(|e| cpu::HypervisorCpuError::GetRiscvCoreRegister(e.into()))?;
1521         state.regs.sp = u64::from_le_bytes(bytes);
1522 
1523         let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, gp);
1524         let mut bytes = [0_u8; 8];
1525         self.fd
1526             .lock()
1527             .unwrap()
1528             .get_one_reg(riscv64_reg_id!(KVM_REG_RISCV_CORE, off), &mut bytes)
1529             .map_err(|e| cpu::HypervisorCpuError::GetRiscvCoreRegister(e.into()))?;
1530         state.regs.gp = u64::from_le_bytes(bytes);
1531 
1532         let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, tp);
1533         let mut bytes = [0_u8; 8];
1534         self.fd
1535             .lock()
1536             .unwrap()
1537             .get_one_reg(riscv64_reg_id!(KVM_REG_RISCV_CORE, off), &mut bytes)
1538             .map_err(|e| cpu::HypervisorCpuError::GetRiscvCoreRegister(e.into()))?;
1539         state.regs.tp = u64::from_le_bytes(bytes);
1540 
1541         let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, t0);
1542         let mut bytes = [0_u8; 8];
1543         self.fd
1544             .lock()
1545             .unwrap()
1546             .get_one_reg(riscv64_reg_id!(KVM_REG_RISCV_CORE, off), &mut bytes)
1547             .map_err(|e| cpu::HypervisorCpuError::GetRiscvCoreRegister(e.into()))?;
1548         state.regs.t0 = u64::from_le_bytes(bytes);
1549 
1550         let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, t1);
1551         let mut bytes = [0_u8; 8];
1552         self.fd
1553             .lock()
1554             .unwrap()
1555             .get_one_reg(riscv64_reg_id!(KVM_REG_RISCV_CORE, off), &mut bytes)
1556             .map_err(|e| cpu::HypervisorCpuError::GetRiscvCoreRegister(e.into()))?;
1557         state.regs.t1 = u64::from_le_bytes(bytes);
1558 
1559         let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, t2);
1560         let mut bytes = [0_u8; 8];
1561         self.fd
1562             .lock()
1563             .unwrap()
1564             .get_one_reg(riscv64_reg_id!(KVM_REG_RISCV_CORE, off), &mut bytes)
1565             .map_err(|e| cpu::HypervisorCpuError::GetRiscvCoreRegister(e.into()))?;
1566         state.regs.t2 = u64::from_le_bytes(bytes);
1567 
1568         let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, s0);
1569         let mut bytes = [0_u8; 8];
1570         self.fd
1571             .lock()
1572             .unwrap()
1573             .get_one_reg(riscv64_reg_id!(KVM_REG_RISCV_CORE, off), &mut bytes)
1574             .map_err(|e| cpu::HypervisorCpuError::GetRiscvCoreRegister(e.into()))?;
1575         state.regs.s0 = u64::from_le_bytes(bytes);
1576 
1577         let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, s1);
1578         let mut bytes = [0_u8; 8];
1579         self.fd
1580             .lock()
1581             .unwrap()
1582             .get_one_reg(riscv64_reg_id!(KVM_REG_RISCV_CORE, off), &mut bytes)
1583             .map_err(|e| cpu::HypervisorCpuError::GetRiscvCoreRegister(e.into()))?;
1584         state.regs.s1 = u64::from_le_bytes(bytes);
1585 
1586         let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, a0);
1587         let mut bytes = [0_u8; 8];
1588         self.fd
1589             .lock()
1590             .unwrap()
1591             .get_one_reg(riscv64_reg_id!(KVM_REG_RISCV_CORE, off), &mut bytes)
1592             .map_err(|e| cpu::HypervisorCpuError::GetRiscvCoreRegister(e.into()))?;
1593         state.regs.a0 = u64::from_le_bytes(bytes);
1594 
1595         let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, a1);
1596         let mut bytes = [0_u8; 8];
1597         self.fd
1598             .lock()
1599             .unwrap()
1600             .get_one_reg(riscv64_reg_id!(KVM_REG_RISCV_CORE, off), &mut bytes)
1601             .map_err(|e| cpu::HypervisorCpuError::GetRiscvCoreRegister(e.into()))?;
1602         state.regs.a1 = u64::from_le_bytes(bytes);
1603 
1604         let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, a2);
1605         let mut bytes = [0_u8; 8];
1606         self.fd
1607             .lock()
1608             .unwrap()
1609             .get_one_reg(riscv64_reg_id!(KVM_REG_RISCV_CORE, off), &mut bytes)
1610             .map_err(|e| cpu::HypervisorCpuError::GetRiscvCoreRegister(e.into()))?;
1611         state.regs.a2 = u64::from_le_bytes(bytes);
1612 
1613         let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, a3);
1614         let mut bytes = [0_u8; 8];
1615         self.fd
1616             .lock()
1617             .unwrap()
1618             .get_one_reg(riscv64_reg_id!(KVM_REG_RISCV_CORE, off), &mut bytes)
1619             .map_err(|e| cpu::HypervisorCpuError::GetRiscvCoreRegister(e.into()))?;
1620         state.regs.a3 = u64::from_le_bytes(bytes);
1621 
1622         let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, a4);
1623         let mut bytes = [0_u8; 8];
1624         self.fd
1625             .lock()
1626             .unwrap()
1627             .get_one_reg(riscv64_reg_id!(KVM_REG_RISCV_CORE, off), &mut bytes)
1628             .map_err(|e| cpu::HypervisorCpuError::GetRiscvCoreRegister(e.into()))?;
1629         state.regs.a4 = u64::from_le_bytes(bytes);
1630 
1631         let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, a5);
1632         let mut bytes = [0_u8; 8];
1633         self.fd
1634             .lock()
1635             .unwrap()
1636             .get_one_reg(riscv64_reg_id!(KVM_REG_RISCV_CORE, off), &mut bytes)
1637             .map_err(|e| cpu::HypervisorCpuError::GetRiscvCoreRegister(e.into()))?;
1638         state.regs.a5 = u64::from_le_bytes(bytes);
1639 
1640         let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, a6);
1641         let mut bytes = [0_u8; 8];
1642         self.fd
1643             .lock()
1644             .unwrap()
1645             .get_one_reg(riscv64_reg_id!(KVM_REG_RISCV_CORE, off), &mut bytes)
1646             .map_err(|e| cpu::HypervisorCpuError::GetRiscvCoreRegister(e.into()))?;
1647         state.regs.a6 = u64::from_le_bytes(bytes);
1648 
1649         let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, a7);
1650         let mut bytes = [0_u8; 8];
1651         self.fd
1652             .lock()
1653             .unwrap()
1654             .get_one_reg(riscv64_reg_id!(KVM_REG_RISCV_CORE, off), &mut bytes)
1655             .map_err(|e| cpu::HypervisorCpuError::GetRiscvCoreRegister(e.into()))?;
1656         state.regs.a7 = u64::from_le_bytes(bytes);
1657 
1658         let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, s2);
1659         let mut bytes = [0_u8; 8];
1660         self.fd
1661             .lock()
1662             .unwrap()
1663             .get_one_reg(riscv64_reg_id!(KVM_REG_RISCV_CORE, off), &mut bytes)
1664             .map_err(|e| cpu::HypervisorCpuError::GetRiscvCoreRegister(e.into()))?;
1665         state.regs.s2 = u64::from_le_bytes(bytes);
1666 
1667         let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, s3);
1668         let mut bytes = [0_u8; 8];
1669         self.fd
1670             .lock()
1671             .unwrap()
1672             .get_one_reg(riscv64_reg_id!(KVM_REG_RISCV_CORE, off), &mut bytes)
1673             .map_err(|e| cpu::HypervisorCpuError::GetRiscvCoreRegister(e.into()))?;
1674         state.regs.s3 = u64::from_le_bytes(bytes);
1675 
1676         let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, s4);
1677         let mut bytes = [0_u8; 8];
1678         self.fd
1679             .lock()
1680             .unwrap()
1681             .get_one_reg(riscv64_reg_id!(KVM_REG_RISCV_CORE, off), &mut bytes)
1682             .map_err(|e| cpu::HypervisorCpuError::GetRiscvCoreRegister(e.into()))?;
1683         state.regs.s4 = u64::from_le_bytes(bytes);
1684 
1685         let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, s5);
1686         let mut bytes = [0_u8; 8];
1687         self.fd
1688             .lock()
1689             .unwrap()
1690             .get_one_reg(riscv64_reg_id!(KVM_REG_RISCV_CORE, off), &mut bytes)
1691             .map_err(|e| cpu::HypervisorCpuError::GetRiscvCoreRegister(e.into()))?;
1692         state.regs.s5 = u64::from_le_bytes(bytes);
1693 
1694         let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, s6);
1695         let mut bytes = [0_u8; 8];
1696         self.fd
1697             .lock()
1698             .unwrap()
1699             .get_one_reg(riscv64_reg_id!(KVM_REG_RISCV_CORE, off), &mut bytes)
1700             .map_err(|e| cpu::HypervisorCpuError::GetRiscvCoreRegister(e.into()))?;
1701         state.regs.s6 = u64::from_le_bytes(bytes);
1702 
1703         let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, s7);
1704         let mut bytes = [0_u8; 8];
1705         self.fd
1706             .lock()
1707             .unwrap()
1708             .get_one_reg(riscv64_reg_id!(KVM_REG_RISCV_CORE, off), &mut bytes)
1709             .map_err(|e| cpu::HypervisorCpuError::GetRiscvCoreRegister(e.into()))?;
1710         state.regs.s7 = u64::from_le_bytes(bytes);
1711 
1712         let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, s8);
1713         let mut bytes = [0_u8; 8];
1714         self.fd
1715             .lock()
1716             .unwrap()
1717             .get_one_reg(riscv64_reg_id!(KVM_REG_RISCV_CORE, off), &mut bytes)
1718             .map_err(|e| cpu::HypervisorCpuError::GetRiscvCoreRegister(e.into()))?;
1719         state.regs.s8 = u64::from_le_bytes(bytes);
1720 
1721         let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, s9);
1722         let mut bytes = [0_u8; 8];
1723         self.fd
1724             .lock()
1725             .unwrap()
1726             .get_one_reg(riscv64_reg_id!(KVM_REG_RISCV_CORE, off), &mut bytes)
1727             .map_err(|e| cpu::HypervisorCpuError::GetRiscvCoreRegister(e.into()))?;
1728         state.regs.s9 = u64::from_le_bytes(bytes);
1729 
1730         let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, s10);
1731         let mut bytes = [0_u8; 8];
1732         self.fd
1733             .lock()
1734             .unwrap()
1735             .get_one_reg(riscv64_reg_id!(KVM_REG_RISCV_CORE, off), &mut bytes)
1736             .map_err(|e| cpu::HypervisorCpuError::GetRiscvCoreRegister(e.into()))?;
1737         state.regs.s10 = u64::from_le_bytes(bytes);
1738 
1739         let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, s11);
1740         let mut bytes = [0_u8; 8];
1741         self.fd
1742             .lock()
1743             .unwrap()
1744             .get_one_reg(riscv64_reg_id!(KVM_REG_RISCV_CORE, off), &mut bytes)
1745             .map_err(|e| cpu::HypervisorCpuError::GetRiscvCoreRegister(e.into()))?;
1746         state.regs.s11 = u64::from_le_bytes(bytes);
1747 
1748         let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, t3);
1749         let mut bytes = [0_u8; 8];
1750         self.fd
1751             .lock()
1752             .unwrap()
1753             .get_one_reg(riscv64_reg_id!(KVM_REG_RISCV_CORE, off), &mut bytes)
1754             .map_err(|e| cpu::HypervisorCpuError::GetRiscvCoreRegister(e.into()))?;
1755         state.regs.t3 = u64::from_le_bytes(bytes);
1756 
1757         let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, t4);
1758         let mut bytes = [0_u8; 8];
1759         self.fd
1760             .lock()
1761             .unwrap()
1762             .get_one_reg(riscv64_reg_id!(KVM_REG_RISCV_CORE, off), &mut bytes)
1763             .map_err(|e| cpu::HypervisorCpuError::GetRiscvCoreRegister(e.into()))?;
1764         state.regs.t4 = u64::from_le_bytes(bytes);
1765 
1766         let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, t5);
1767         let mut bytes = [0_u8; 8];
1768         self.fd
1769             .lock()
1770             .unwrap()
1771             .get_one_reg(riscv64_reg_id!(KVM_REG_RISCV_CORE, off), &mut bytes)
1772             .map_err(|e| cpu::HypervisorCpuError::GetRiscvCoreRegister(e.into()))?;
1773         state.regs.t5 = u64::from_le_bytes(bytes);
1774 
1775         let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, t6);
1776         let mut bytes = [0_u8; 8];
1777         self.fd
1778             .lock()
1779             .unwrap()
1780             .get_one_reg(riscv64_reg_id!(KVM_REG_RISCV_CORE, off), &mut bytes)
1781             .map_err(|e| cpu::HypervisorCpuError::GetRiscvCoreRegister(e.into()))?;
1782         state.regs.t6 = u64::from_le_bytes(bytes);
1783 
1784         let off = offset_of!(kvm_riscv_core, mode);
1785         let mut bytes = [0_u8; 8];
1786         self.fd
1787             .lock()
1788             .unwrap()
1789             .get_one_reg(riscv64_reg_id!(KVM_REG_RISCV_CORE, off), &mut bytes)
1790             .map_err(|e| cpu::HypervisorCpuError::GetRiscvCoreRegister(e.into()))?;
1791         state.mode = u64::from_le_bytes(bytes);
1792 
1793         Ok(state.into())
1794     }
1795 
1796     #[cfg(target_arch = "x86_64")]
1797     ///
1798     /// Sets the vCPU general purpose registers using the `KVM_SET_REGS` ioctl.
1799     ///
1800     fn set_regs(&self, regs: &StandardRegisters) -> cpu::Result<()> {
1801         let regs = (*regs).into();
1802         self.fd
1803             .lock()
1804             .unwrap()
1805             .set_regs(&regs)
1806             .map_err(|e| cpu::HypervisorCpuError::SetStandardRegs(e.into()))
1807     }
1808 
1809     ///
1810     /// Sets the vCPU general purpose registers.
1811     /// The `KVM_SET_REGS` ioctl is not available on AArch64, `KVM_SET_ONE_REG`
1812     /// is used to set registers one by one.
1813     ///
1814     #[cfg(target_arch = "aarch64")]
1815     fn set_regs(&self, state: &StandardRegisters) -> cpu::Result<()> {
1816         // The function follows the exact identical order from `state`. Look there
1817         // for some additional info on registers.
1818         let kvm_regs_state: kvm_regs = (*state).into();
1819         let mut off = offset_of!(user_pt_regs, regs);
1820         for i in 0..31 {
1821             self.fd
1822                 .lock()
1823                 .unwrap()
1824                 .set_one_reg(
1825                     arm64_core_reg_id!(KVM_REG_SIZE_U64, off),
1826                     &kvm_regs_state.regs.regs[i].to_le_bytes(),
1827                 )
1828                 .map_err(|e| cpu::HypervisorCpuError::SetAarchCoreRegister(e.into()))?;
1829             off += std::mem::size_of::<u64>();
1830         }
1831 
1832         let off = offset_of!(user_pt_regs, sp);
1833         self.fd
1834             .lock()
1835             .unwrap()
1836             .set_one_reg(
1837                 arm64_core_reg_id!(KVM_REG_SIZE_U64, off),
1838                 &kvm_regs_state.regs.sp.to_le_bytes(),
1839             )
1840             .map_err(|e| cpu::HypervisorCpuError::SetAarchCoreRegister(e.into()))?;
1841 
1842         let off = offset_of!(user_pt_regs, pc);
1843         self.fd
1844             .lock()
1845             .unwrap()
1846             .set_one_reg(
1847                 arm64_core_reg_id!(KVM_REG_SIZE_U64, off),
1848                 &kvm_regs_state.regs.pc.to_le_bytes(),
1849             )
1850             .map_err(|e| cpu::HypervisorCpuError::SetAarchCoreRegister(e.into()))?;
1851 
1852         let off = offset_of!(user_pt_regs, pstate);
1853         self.fd
1854             .lock()
1855             .unwrap()
1856             .set_one_reg(
1857                 arm64_core_reg_id!(KVM_REG_SIZE_U64, off),
1858                 &kvm_regs_state.regs.pstate.to_le_bytes(),
1859             )
1860             .map_err(|e| cpu::HypervisorCpuError::SetAarchCoreRegister(e.into()))?;
1861 
1862         let off = offset_of!(kvm_regs, sp_el1);
1863         self.fd
1864             .lock()
1865             .unwrap()
1866             .set_one_reg(
1867                 arm64_core_reg_id!(KVM_REG_SIZE_U64, off),
1868                 &kvm_regs_state.sp_el1.to_le_bytes(),
1869             )
1870             .map_err(|e| cpu::HypervisorCpuError::SetAarchCoreRegister(e.into()))?;
1871 
1872         let off = offset_of!(kvm_regs, elr_el1);
1873         self.fd
1874             .lock()
1875             .unwrap()
1876             .set_one_reg(
1877                 arm64_core_reg_id!(KVM_REG_SIZE_U64, off),
1878                 &kvm_regs_state.elr_el1.to_le_bytes(),
1879             )
1880             .map_err(|e| cpu::HypervisorCpuError::SetAarchCoreRegister(e.into()))?;
1881 
1882         let mut off = offset_of!(kvm_regs, spsr);
1883         for i in 0..KVM_NR_SPSR as usize {
1884             self.fd
1885                 .lock()
1886                 .unwrap()
1887                 .set_one_reg(
1888                     arm64_core_reg_id!(KVM_REG_SIZE_U64, off),
1889                     &kvm_regs_state.spsr[i].to_le_bytes(),
1890                 )
1891                 .map_err(|e| cpu::HypervisorCpuError::SetAarchCoreRegister(e.into()))?;
1892             off += std::mem::size_of::<u64>();
1893         }
1894 
1895         let mut off = offset_of!(kvm_regs, fp_regs) + offset_of!(user_fpsimd_state, vregs);
1896         for i in 0..32 {
1897             self.fd
1898                 .lock()
1899                 .unwrap()
1900                 .set_one_reg(
1901                     arm64_core_reg_id!(KVM_REG_SIZE_U128, off),
1902                     &kvm_regs_state.fp_regs.vregs[i].to_le_bytes(),
1903                 )
1904                 .map_err(|e| cpu::HypervisorCpuError::SetAarchCoreRegister(e.into()))?;
1905             off += mem::size_of::<u128>();
1906         }
1907 
1908         let off = offset_of!(kvm_regs, fp_regs) + offset_of!(user_fpsimd_state, fpsr);
1909         self.fd
1910             .lock()
1911             .unwrap()
1912             .set_one_reg(
1913                 arm64_core_reg_id!(KVM_REG_SIZE_U32, off),
1914                 &kvm_regs_state.fp_regs.fpsr.to_le_bytes(),
1915             )
1916             .map_err(|e| cpu::HypervisorCpuError::SetAarchCoreRegister(e.into()))?;
1917 
1918         let off = offset_of!(kvm_regs, fp_regs) + offset_of!(user_fpsimd_state, fpcr);
1919         self.fd
1920             .lock()
1921             .unwrap()
1922             .set_one_reg(
1923                 arm64_core_reg_id!(KVM_REG_SIZE_U32, off),
1924                 &kvm_regs_state.fp_regs.fpcr.to_le_bytes(),
1925             )
1926             .map_err(|e| cpu::HypervisorCpuError::SetAarchCoreRegister(e.into()))?;
1927         Ok(())
1928     }
1929 
1930     #[cfg(target_arch = "riscv64")]
1931     ///
1932     /// Sets the RISC-V vCPU core registers.
1933     /// The `KVM_SET_REGS` ioctl is not available on RISC-V 64-bit,
1934     /// `KVM_SET_ONE_REG` is used to set registers one by one.
1935     ///
1936     fn set_regs(&self, state: &StandardRegisters) -> cpu::Result<()> {
1937         // The function follows the exact identical order from `state`. Look there
1938         // for some additional info on registers.
1939         let kvm_regs_state: kvm_riscv_core = (*state).into();
1940 
1941         let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, pc);
1942         self.fd
1943             .lock()
1944             .unwrap()
1945             .set_one_reg(
1946                 riscv64_reg_id!(KVM_REG_RISCV_CORE, off),
1947                 &kvm_regs_state.regs.pc.to_le_bytes(),
1948             )
1949             .map_err(|e| cpu::HypervisorCpuError::SetRiscvCoreRegister(e.into()))?;
1950 
1951         let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, ra);
1952         self.fd
1953             .lock()
1954             .unwrap()
1955             .set_one_reg(
1956                 riscv64_reg_id!(KVM_REG_RISCV_CORE, off),
1957                 &kvm_regs_state.regs.ra.to_le_bytes(),
1958             )
1959             .map_err(|e| cpu::HypervisorCpuError::SetRiscvCoreRegister(e.into()))?;
1960 
1961         let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, sp);
1962         self.fd
1963             .lock()
1964             .unwrap()
1965             .set_one_reg(
1966                 riscv64_reg_id!(KVM_REG_RISCV_CORE, off),
1967                 &kvm_regs_state.regs.sp.to_le_bytes(),
1968             )
1969             .map_err(|e| cpu::HypervisorCpuError::SetRiscvCoreRegister(e.into()))?;
1970 
1971         let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, gp);
1972         self.fd
1973             .lock()
1974             .unwrap()
1975             .set_one_reg(
1976                 riscv64_reg_id!(KVM_REG_RISCV_CORE, off),
1977                 &kvm_regs_state.regs.gp.to_le_bytes(),
1978             )
1979             .map_err(|e| cpu::HypervisorCpuError::SetRiscvCoreRegister(e.into()))?;
1980 
1981         let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, tp);
1982         self.fd
1983             .lock()
1984             .unwrap()
1985             .set_one_reg(
1986                 riscv64_reg_id!(KVM_REG_RISCV_CORE, off),
1987                 &kvm_regs_state.regs.tp.to_le_bytes(),
1988             )
1989             .map_err(|e| cpu::HypervisorCpuError::SetRiscvCoreRegister(e.into()))?;
1990 
1991         let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, t0);
1992         self.fd
1993             .lock()
1994             .unwrap()
1995             .set_one_reg(
1996                 riscv64_reg_id!(KVM_REG_RISCV_CORE, off),
1997                 &kvm_regs_state.regs.t0.to_le_bytes(),
1998             )
1999             .map_err(|e| cpu::HypervisorCpuError::SetRiscvCoreRegister(e.into()))?;
2000 
2001         let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, t1);
2002         self.fd
2003             .lock()
2004             .unwrap()
2005             .set_one_reg(
2006                 riscv64_reg_id!(KVM_REG_RISCV_CORE, off),
2007                 &kvm_regs_state.regs.t1.to_le_bytes(),
2008             )
2009             .map_err(|e| cpu::HypervisorCpuError::SetRiscvCoreRegister(e.into()))?;
2010 
2011         let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, t2);
2012         self.fd
2013             .lock()
2014             .unwrap()
2015             .set_one_reg(
2016                 riscv64_reg_id!(KVM_REG_RISCV_CORE, off),
2017                 &kvm_regs_state.regs.t2.to_le_bytes(),
2018             )
2019             .map_err(|e| cpu::HypervisorCpuError::SetRiscvCoreRegister(e.into()))?;
2020 
2021         let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, s0);
2022         self.fd
2023             .lock()
2024             .unwrap()
2025             .set_one_reg(
2026                 riscv64_reg_id!(KVM_REG_RISCV_CORE, off),
2027                 &kvm_regs_state.regs.s0.to_le_bytes(),
2028             )
2029             .map_err(|e| cpu::HypervisorCpuError::SetRiscvCoreRegister(e.into()))?;
2030 
2031         let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, s1);
2032         self.fd
2033             .lock()
2034             .unwrap()
2035             .set_one_reg(
2036                 riscv64_reg_id!(KVM_REG_RISCV_CORE, off),
2037                 &kvm_regs_state.regs.s1.to_le_bytes(),
2038             )
2039             .map_err(|e| cpu::HypervisorCpuError::SetRiscvCoreRegister(e.into()))?;
2040 
2041         let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, a0);
2042         self.fd
2043             .lock()
2044             .unwrap()
2045             .set_one_reg(
2046                 riscv64_reg_id!(KVM_REG_RISCV_CORE, off),
2047                 &kvm_regs_state.regs.a0.to_le_bytes(),
2048             )
2049             .map_err(|e| cpu::HypervisorCpuError::SetRiscvCoreRegister(e.into()))?;
2050 
2051         let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, a1);
2052         self.fd
2053             .lock()
2054             .unwrap()
2055             .set_one_reg(
2056                 riscv64_reg_id!(KVM_REG_RISCV_CORE, off),
2057                 &kvm_regs_state.regs.a1.to_le_bytes(),
2058             )
2059             .map_err(|e| cpu::HypervisorCpuError::SetRiscvCoreRegister(e.into()))?;
2060 
2061         let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, a2);
2062         self.fd
2063             .lock()
2064             .unwrap()
2065             .set_one_reg(
2066                 riscv64_reg_id!(KVM_REG_RISCV_CORE, off),
2067                 &kvm_regs_state.regs.a2.to_le_bytes(),
2068             )
2069             .map_err(|e| cpu::HypervisorCpuError::SetRiscvCoreRegister(e.into()))?;
2070 
2071         let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, a3);
2072         self.fd
2073             .lock()
2074             .unwrap()
2075             .set_one_reg(
2076                 riscv64_reg_id!(KVM_REG_RISCV_CORE, off),
2077                 &kvm_regs_state.regs.a3.to_le_bytes(),
2078             )
2079             .map_err(|e| cpu::HypervisorCpuError::SetRiscvCoreRegister(e.into()))?;
2080 
2081         let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, a4);
2082         self.fd
2083             .lock()
2084             .unwrap()
2085             .set_one_reg(
2086                 riscv64_reg_id!(KVM_REG_RISCV_CORE, off),
2087                 &kvm_regs_state.regs.a4.to_le_bytes(),
2088             )
2089             .map_err(|e| cpu::HypervisorCpuError::SetRiscvCoreRegister(e.into()))?;
2090 
2091         let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, a5);
2092         self.fd
2093             .lock()
2094             .unwrap()
2095             .set_one_reg(
2096                 riscv64_reg_id!(KVM_REG_RISCV_CORE, off),
2097                 &kvm_regs_state.regs.a5.to_le_bytes(),
2098             )
2099             .map_err(|e| cpu::HypervisorCpuError::SetRiscvCoreRegister(e.into()))?;
2100 
2101         let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, a6);
2102         self.fd
2103             .lock()
2104             .unwrap()
2105             .set_one_reg(
2106                 riscv64_reg_id!(KVM_REG_RISCV_CORE, off),
2107                 &kvm_regs_state.regs.a6.to_le_bytes(),
2108             )
2109             .map_err(|e| cpu::HypervisorCpuError::SetRiscvCoreRegister(e.into()))?;
2110 
2111         let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, a7);
2112         self.fd
2113             .lock()
2114             .unwrap()
2115             .set_one_reg(
2116                 riscv64_reg_id!(KVM_REG_RISCV_CORE, off),
2117                 &kvm_regs_state.regs.a7.to_le_bytes(),
2118             )
2119             .map_err(|e| cpu::HypervisorCpuError::SetRiscvCoreRegister(e.into()))?;
2120 
2121         let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, s2);
2122         self.fd
2123             .lock()
2124             .unwrap()
2125             .set_one_reg(
2126                 riscv64_reg_id!(KVM_REG_RISCV_CORE, off),
2127                 &kvm_regs_state.regs.s2.to_le_bytes(),
2128             )
2129             .map_err(|e| cpu::HypervisorCpuError::SetRiscvCoreRegister(e.into()))?;
2130 
2131         let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, s3);
2132         self.fd
2133             .lock()
2134             .unwrap()
2135             .set_one_reg(
2136                 riscv64_reg_id!(KVM_REG_RISCV_CORE, off),
2137                 &kvm_regs_state.regs.s3.to_le_bytes(),
2138             )
2139             .map_err(|e| cpu::HypervisorCpuError::SetRiscvCoreRegister(e.into()))?;
2140 
2141         let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, s4);
2142         self.fd
2143             .lock()
2144             .unwrap()
2145             .set_one_reg(
2146                 riscv64_reg_id!(KVM_REG_RISCV_CORE, off),
2147                 &kvm_regs_state.regs.s4.to_le_bytes(),
2148             )
2149             .map_err(|e| cpu::HypervisorCpuError::SetRiscvCoreRegister(e.into()))?;
2150 
2151         let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, s5);
2152         self.fd
2153             .lock()
2154             .unwrap()
2155             .set_one_reg(
2156                 riscv64_reg_id!(KVM_REG_RISCV_CORE, off),
2157                 &kvm_regs_state.regs.s5.to_le_bytes(),
2158             )
2159             .map_err(|e| cpu::HypervisorCpuError::SetRiscvCoreRegister(e.into()))?;
2160 
2161         let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, s6);
2162         self.fd
2163             .lock()
2164             .unwrap()
2165             .set_one_reg(
2166                 riscv64_reg_id!(KVM_REG_RISCV_CORE, off),
2167                 &kvm_regs_state.regs.s6.to_le_bytes(),
2168             )
2169             .map_err(|e| cpu::HypervisorCpuError::SetRiscvCoreRegister(e.into()))?;
2170 
2171         let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, s7);
2172         self.fd
2173             .lock()
2174             .unwrap()
2175             .set_one_reg(
2176                 riscv64_reg_id!(KVM_REG_RISCV_CORE, off),
2177                 &kvm_regs_state.regs.s7.to_le_bytes(),
2178             )
2179             .map_err(|e| cpu::HypervisorCpuError::SetRiscvCoreRegister(e.into()))?;
2180 
2181         let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, s8);
2182         self.fd
2183             .lock()
2184             .unwrap()
2185             .set_one_reg(
2186                 riscv64_reg_id!(KVM_REG_RISCV_CORE, off),
2187                 &kvm_regs_state.regs.s8.to_le_bytes(),
2188             )
2189             .map_err(|e| cpu::HypervisorCpuError::SetRiscvCoreRegister(e.into()))?;
2190 
2191         let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, s9);
2192         self.fd
2193             .lock()
2194             .unwrap()
2195             .set_one_reg(
2196                 riscv64_reg_id!(KVM_REG_RISCV_CORE, off),
2197                 &kvm_regs_state.regs.s9.to_le_bytes(),
2198             )
2199             .map_err(|e| cpu::HypervisorCpuError::SetRiscvCoreRegister(e.into()))?;
2200 
2201         let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, s10);
2202         self.fd
2203             .lock()
2204             .unwrap()
2205             .set_one_reg(
2206                 riscv64_reg_id!(KVM_REG_RISCV_CORE, off),
2207                 &kvm_regs_state.regs.s10.to_le_bytes(),
2208             )
2209             .map_err(|e| cpu::HypervisorCpuError::SetRiscvCoreRegister(e.into()))?;
2210 
2211         let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, s11);
2212         self.fd
2213             .lock()
2214             .unwrap()
2215             .set_one_reg(
2216                 riscv64_reg_id!(KVM_REG_RISCV_CORE, off),
2217                 &kvm_regs_state.regs.s11.to_le_bytes(),
2218             )
2219             .map_err(|e| cpu::HypervisorCpuError::SetRiscvCoreRegister(e.into()))?;
2220 
2221         let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, t3);
2222         self.fd
2223             .lock()
2224             .unwrap()
2225             .set_one_reg(
2226                 riscv64_reg_id!(KVM_REG_RISCV_CORE, off),
2227                 &kvm_regs_state.regs.t3.to_le_bytes(),
2228             )
2229             .map_err(|e| cpu::HypervisorCpuError::SetRiscvCoreRegister(e.into()))?;
2230 
2231         let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, t4);
2232         self.fd
2233             .lock()
2234             .unwrap()
2235             .set_one_reg(
2236                 riscv64_reg_id!(KVM_REG_RISCV_CORE, off),
2237                 &kvm_regs_state.regs.t4.to_le_bytes(),
2238             )
2239             .map_err(|e| cpu::HypervisorCpuError::SetRiscvCoreRegister(e.into()))?;
2240 
2241         let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, t5);
2242         self.fd
2243             .lock()
2244             .unwrap()
2245             .set_one_reg(
2246                 riscv64_reg_id!(KVM_REG_RISCV_CORE, off),
2247                 &kvm_regs_state.regs.t5.to_le_bytes(),
2248             )
2249             .map_err(|e| cpu::HypervisorCpuError::SetRiscvCoreRegister(e.into()))?;
2250 
2251         let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, t6);
2252         self.fd
2253             .lock()
2254             .unwrap()
2255             .set_one_reg(
2256                 riscv64_reg_id!(KVM_REG_RISCV_CORE, off),
2257                 &kvm_regs_state.regs.t6.to_le_bytes(),
2258             )
2259             .map_err(|e| cpu::HypervisorCpuError::SetRiscvCoreRegister(e.into()))?;
2260 
2261         let off = offset_of!(kvm_riscv_core, mode);
2262         self.fd
2263             .lock()
2264             .unwrap()
2265             .set_one_reg(
2266                 riscv64_reg_id!(KVM_REG_RISCV_CORE, off),
2267                 &kvm_regs_state.mode.to_le_bytes(),
2268             )
2269             .map_err(|e| cpu::HypervisorCpuError::SetRiscvCoreRegister(e.into()))?;
2270 
2271         Ok(())
2272     }
2273 
2274     #[cfg(target_arch = "x86_64")]
2275     ///
2276     /// Returns the vCPU special registers.
2277     ///
2278     fn get_sregs(&self) -> cpu::Result<SpecialRegisters> {
2279         Ok(self
2280             .fd
2281             .lock()
2282             .unwrap()
2283             .get_sregs()
2284             .map_err(|e| cpu::HypervisorCpuError::GetSpecialRegs(e.into()))?
2285             .into())
2286     }
2287 
2288     #[cfg(target_arch = "x86_64")]
2289     ///
2290     /// Sets the vCPU special registers using the `KVM_SET_SREGS` ioctl.
2291     ///
2292     fn set_sregs(&self, sregs: &SpecialRegisters) -> cpu::Result<()> {
2293         let sregs = (*sregs).into();
2294         self.fd
2295             .lock()
2296             .unwrap()
2297             .set_sregs(&sregs)
2298             .map_err(|e| cpu::HypervisorCpuError::SetSpecialRegs(e.into()))
2299     }
2300 
2301     #[cfg(target_arch = "x86_64")]
2302     ///
2303     /// Returns the floating point state (FPU) from the vCPU.
2304     ///
2305     fn get_fpu(&self) -> cpu::Result<FpuState> {
2306         Ok(self
2307             .fd
2308             .lock()
2309             .unwrap()
2310             .get_fpu()
2311             .map_err(|e| cpu::HypervisorCpuError::GetFloatingPointRegs(e.into()))?
2312             .into())
2313     }
2314 
2315     #[cfg(target_arch = "x86_64")]
2316     ///
2317     /// Set the floating point state (FPU) of a vCPU using the `KVM_SET_FPU` ioctl.
2318     ///
2319     fn set_fpu(&self, fpu: &FpuState) -> cpu::Result<()> {
2320         let fpu: kvm_bindings::kvm_fpu = (*fpu).clone().into();
2321         self.fd
2322             .lock()
2323             .unwrap()
2324             .set_fpu(&fpu)
2325             .map_err(|e| cpu::HypervisorCpuError::SetFloatingPointRegs(e.into()))
2326     }
2327 
2328     #[cfg(target_arch = "x86_64")]
2329     ///
2330     /// X86 specific call to setup the CPUID registers.
2331     ///
2332     fn set_cpuid2(&self, cpuid: &[CpuIdEntry]) -> cpu::Result<()> {
2333         let cpuid: Vec<kvm_bindings::kvm_cpuid_entry2> =
2334             cpuid.iter().map(|e| (*e).into()).collect();
2335         let kvm_cpuid = <CpuId>::from_entries(&cpuid)
2336             .map_err(|_| cpu::HypervisorCpuError::SetCpuid(anyhow!("failed to create CpuId")))?;
2337 
2338         self.fd
2339             .lock()
2340             .unwrap()
2341             .set_cpuid2(&kvm_cpuid)
2342             .map_err(|e| cpu::HypervisorCpuError::SetCpuid(e.into()))
2343     }
2344 
2345     #[cfg(target_arch = "x86_64")]
2346     ///
2347     /// X86 specific call to enable HyperV SynIC
2348     ///
2349     fn enable_hyperv_synic(&self) -> cpu::Result<()> {
2350         // Update the information about Hyper-V SynIC being enabled and
2351         // emulated as it will influence later which MSRs should be saved.
2352         self.hyperv_synic.store(true, Ordering::Release);
2353 
2354         let cap = kvm_enable_cap {
2355             cap: KVM_CAP_HYPERV_SYNIC,
2356             ..Default::default()
2357         };
2358         self.fd
2359             .lock()
2360             .unwrap()
2361             .enable_cap(&cap)
2362             .map_err(|e| cpu::HypervisorCpuError::EnableHyperVSyncIc(e.into()))
2363     }
2364 
2365     ///
2366     /// X86 specific call to retrieve the CPUID registers.
2367     ///
2368     #[cfg(target_arch = "x86_64")]
2369     fn get_cpuid2(&self, num_entries: usize) -> cpu::Result<Vec<CpuIdEntry>> {
2370         let kvm_cpuid = self
2371             .fd
2372             .lock()
2373             .unwrap()
2374             .get_cpuid2(num_entries)
2375             .map_err(|e| cpu::HypervisorCpuError::GetCpuid(e.into()))?;
2376 
2377         let v = kvm_cpuid.as_slice().iter().map(|e| (*e).into()).collect();
2378 
2379         Ok(v)
2380     }
2381 
2382     #[cfg(target_arch = "x86_64")]
2383     ///
2384     /// Returns the state of the LAPIC (Local Advanced Programmable Interrupt Controller).
2385     ///
2386     fn get_lapic(&self) -> cpu::Result<LapicState> {
2387         Ok(self
2388             .fd
2389             .lock()
2390             .unwrap()
2391             .get_lapic()
2392             .map_err(|e| cpu::HypervisorCpuError::GetlapicState(e.into()))?
2393             .into())
2394     }
2395 
2396     #[cfg(target_arch = "x86_64")]
2397     ///
2398     /// Sets the state of the LAPIC (Local Advanced Programmable Interrupt Controller).
2399     ///
2400     fn set_lapic(&self, klapic: &LapicState) -> cpu::Result<()> {
2401         let klapic: kvm_bindings::kvm_lapic_state = (*klapic).clone().into();
2402         self.fd
2403             .lock()
2404             .unwrap()
2405             .set_lapic(&klapic)
2406             .map_err(|e| cpu::HypervisorCpuError::SetLapicState(e.into()))
2407     }
2408 
2409     #[cfg(target_arch = "x86_64")]
2410     ///
2411     /// Returns the model-specific registers (MSR) for this vCPU.
2412     ///
2413     fn get_msrs(&self, msrs: &mut Vec<MsrEntry>) -> cpu::Result<usize> {
2414         let kvm_msrs: Vec<kvm_msr_entry> = msrs.iter().map(|e| (*e).into()).collect();
2415         let mut kvm_msrs = MsrEntries::from_entries(&kvm_msrs).unwrap();
2416         let succ = self
2417             .fd
2418             .lock()
2419             .unwrap()
2420             .get_msrs(&mut kvm_msrs)
2421             .map_err(|e| cpu::HypervisorCpuError::GetMsrEntries(e.into()))?;
2422 
2423         msrs[..succ].copy_from_slice(
2424             &kvm_msrs.as_slice()[..succ]
2425                 .iter()
2426                 .map(|e| (*e).into())
2427                 .collect::<Vec<MsrEntry>>(),
2428         );
2429 
2430         Ok(succ)
2431     }
2432 
2433     #[cfg(target_arch = "x86_64")]
2434     ///
2435     /// Setup the model-specific registers (MSR) for this vCPU.
2436     /// Returns the number of MSR entries actually written.
2437     ///
2438     fn set_msrs(&self, msrs: &[MsrEntry]) -> cpu::Result<usize> {
2439         let kvm_msrs: Vec<kvm_msr_entry> = msrs.iter().map(|e| (*e).into()).collect();
2440         let kvm_msrs = MsrEntries::from_entries(&kvm_msrs).unwrap();
2441         self.fd
2442             .lock()
2443             .unwrap()
2444             .set_msrs(&kvm_msrs)
2445             .map_err(|e| cpu::HypervisorCpuError::SetMsrEntries(e.into()))
2446     }
2447 
2448     ///
2449     /// Returns the vcpu's current "multiprocessing state".
2450     ///
2451     fn get_mp_state(&self) -> cpu::Result<MpState> {
2452         Ok(self
2453             .fd
2454             .lock()
2455             .unwrap()
2456             .get_mp_state()
2457             .map_err(|e| cpu::HypervisorCpuError::GetMpState(e.into()))?
2458             .into())
2459     }
2460 
2461     ///
2462     /// Sets the vcpu's current "multiprocessing state".
2463     ///
2464     fn set_mp_state(&self, mp_state: MpState) -> cpu::Result<()> {
2465         self.fd
2466             .lock()
2467             .unwrap()
2468             .set_mp_state(mp_state.into())
2469             .map_err(|e| cpu::HypervisorCpuError::SetMpState(e.into()))
2470     }
2471 
2472     #[cfg(target_arch = "x86_64")]
2473     ///
2474     /// Translates guest virtual address to guest physical address using the `KVM_TRANSLATE` ioctl.
2475     ///
2476     fn translate_gva(&self, gva: u64, _flags: u64) -> cpu::Result<(u64, u32)> {
2477         let tr = self
2478             .fd
2479             .lock()
2480             .unwrap()
2481             .translate_gva(gva)
2482             .map_err(|e| cpu::HypervisorCpuError::TranslateVirtualAddress(e.into()))?;
2483         // tr.valid is set if the GVA is mapped to valid GPA.
2484         match tr.valid {
2485             0 => Err(cpu::HypervisorCpuError::TranslateVirtualAddress(anyhow!(
2486                 "Invalid GVA: {:#x}",
2487                 gva
2488             ))),
2489             _ => Ok((tr.physical_address, 0)),
2490         }
2491     }
2492 
2493     ///
2494     /// Triggers the running of the current virtual CPU returning an exit reason.
2495     ///
2496     fn run(&self) -> std::result::Result<cpu::VmExit, cpu::HypervisorCpuError> {
2497         match self.fd.lock().unwrap().run() {
2498             Ok(run) => match run {
2499                 #[cfg(target_arch = "x86_64")]
2500                 VcpuExit::IoIn(addr, data) => {
2501                     if let Some(vm_ops) = &self.vm_ops {
2502                         return vm_ops
2503                             .pio_read(addr.into(), data)
2504                             .map(|_| cpu::VmExit::Ignore)
2505                             .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()));
2506                     }
2507 
2508                     Ok(cpu::VmExit::Ignore)
2509                 }
2510                 #[cfg(target_arch = "x86_64")]
2511                 VcpuExit::IoOut(addr, data) => {
2512                     if let Some(vm_ops) = &self.vm_ops {
2513                         return vm_ops
2514                             .pio_write(addr.into(), data)
2515                             .map(|_| cpu::VmExit::Ignore)
2516                             .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()));
2517                     }
2518 
2519                     Ok(cpu::VmExit::Ignore)
2520                 }
2521                 #[cfg(target_arch = "x86_64")]
2522                 VcpuExit::IoapicEoi(vector) => Ok(cpu::VmExit::IoapicEoi(vector)),
2523                 #[cfg(target_arch = "x86_64")]
2524                 VcpuExit::Shutdown | VcpuExit::Hlt => Ok(cpu::VmExit::Reset),
2525 
2526                 #[cfg(target_arch = "aarch64")]
2527                 VcpuExit::SystemEvent(event_type, flags) => {
2528                     use kvm_bindings::{KVM_SYSTEM_EVENT_RESET, KVM_SYSTEM_EVENT_SHUTDOWN};
2529                     // On Aarch64, when the VM is shutdown, run() returns
2530                     // VcpuExit::SystemEvent with reason KVM_SYSTEM_EVENT_SHUTDOWN
2531                     if event_type == KVM_SYSTEM_EVENT_RESET {
2532                         Ok(cpu::VmExit::Reset)
2533                     } else if event_type == KVM_SYSTEM_EVENT_SHUTDOWN {
2534                         Ok(cpu::VmExit::Shutdown)
2535                     } else {
2536                         Err(cpu::HypervisorCpuError::RunVcpu(anyhow!(
2537                             "Unexpected system event with type 0x{:x}, flags 0x{:x?}",
2538                             event_type,
2539                             flags
2540                         )))
2541                     }
2542                 }
2543 
2544                 VcpuExit::MmioRead(addr, data) => {
2545                     if let Some(vm_ops) = &self.vm_ops {
2546                         return vm_ops
2547                             .mmio_read(addr, data)
2548                             .map(|_| cpu::VmExit::Ignore)
2549                             .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()));
2550                     }
2551 
2552                     Ok(cpu::VmExit::Ignore)
2553                 }
2554                 VcpuExit::MmioWrite(addr, data) => {
2555                     if let Some(vm_ops) = &self.vm_ops {
2556                         return vm_ops
2557                             .mmio_write(addr, data)
2558                             .map(|_| cpu::VmExit::Ignore)
2559                             .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()));
2560                     }
2561 
2562                     Ok(cpu::VmExit::Ignore)
2563                 }
2564                 VcpuExit::Hyperv => Ok(cpu::VmExit::Hyperv),
2565                 #[cfg(feature = "tdx")]
2566                 VcpuExit::Unsupported(KVM_EXIT_TDX) => Ok(cpu::VmExit::Tdx),
2567                 VcpuExit::Debug(_) => Ok(cpu::VmExit::Debug),
2568 
2569                 r => Err(cpu::HypervisorCpuError::RunVcpu(anyhow!(
2570                     "Unexpected exit reason on vcpu run: {:?}",
2571                     r
2572                 ))),
2573             },
2574 
2575             Err(ref e) => match e.errno() {
2576                 libc::EAGAIN | libc::EINTR => Ok(cpu::VmExit::Ignore),
2577                 _ => Err(cpu::HypervisorCpuError::RunVcpu(anyhow!(
2578                     "VCPU error {:?}",
2579                     e
2580                 ))),
2581             },
2582         }
2583     }
2584 
2585     #[cfg(target_arch = "x86_64")]
2586     ///
2587     /// Let the guest know that it has been paused, which prevents from
2588     /// potential soft lockups when being resumed.
2589     ///
2590     fn notify_guest_clock_paused(&self) -> cpu::Result<()> {
2591         if let Err(e) = self.fd.lock().unwrap().kvmclock_ctrl() {
2592             // Linux kernel returns -EINVAL if the PV clock isn't yet initialised
2593             // which could be because we're still in firmware or the guest doesn't
2594             // use KVM clock.
2595             if e.errno() != libc::EINVAL {
2596                 return Err(cpu::HypervisorCpuError::NotifyGuestClockPaused(e.into()));
2597             }
2598         }
2599 
2600         Ok(())
2601     }
2602 
2603     #[cfg(not(target_arch = "riscv64"))]
2604     ///
2605     /// Sets debug registers to set hardware breakpoints and/or enable single step.
2606     ///
2607     fn set_guest_debug(
2608         &self,
2609         addrs: &[vm_memory::GuestAddress],
2610         singlestep: bool,
2611     ) -> cpu::Result<()> {
2612         let mut dbg = kvm_guest_debug {
2613             #[cfg(target_arch = "x86_64")]
2614             control: KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP,
2615             #[cfg(target_arch = "aarch64")]
2616             control: KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW,
2617             ..Default::default()
2618         };
2619         if singlestep {
2620             dbg.control |= KVM_GUESTDBG_SINGLESTEP;
2621         }
2622 
2623         // Set the debug registers.
2624         // Here we assume that the number of addresses do not exceed what
2625         // `Hypervisor::get_guest_debug_hw_bps()` specifies.
2626         #[cfg(target_arch = "x86_64")]
2627         {
2628             // Set bits 9 and 10.
2629             // bit 9: GE (global exact breakpoint enable) flag.
2630             // bit 10: always 1.
2631             dbg.arch.debugreg[7] = 0x0600;
2632 
2633             for (i, addr) in addrs.iter().enumerate() {
2634                 dbg.arch.debugreg[i] = addr.0;
2635                 // Set global breakpoint enable flag
2636                 dbg.arch.debugreg[7] |= 2 << (i * 2);
2637             }
2638         }
2639         #[cfg(target_arch = "aarch64")]
2640         {
2641             for (i, addr) in addrs.iter().enumerate() {
2642                 // DBGBCR_EL1 (Debug Breakpoint Control Registers, D13.3.2):
2643                 // bit 0: 1 (Enabled)
2644                 // bit 1~2: 0b11 (PMC = EL1/EL0)
2645                 // bit 5~8: 0b1111 (BAS = AArch64)
2646                 // others: 0
2647                 dbg.arch.dbg_bcr[i] = 0b1u64 | 0b110u64 | 0b1_1110_0000u64;
2648                 // DBGBVR_EL1 (Debug Breakpoint Value Registers, D13.3.3):
2649                 // bit 2~52: VA[2:52]
2650                 dbg.arch.dbg_bvr[i] = (!0u64 >> 11) & addr.0;
2651             }
2652         }
2653         self.fd
2654             .lock()
2655             .unwrap()
2656             .set_guest_debug(&dbg)
2657             .map_err(|e| cpu::HypervisorCpuError::SetDebugRegs(e.into()))
2658     }
2659 
2660     #[cfg(target_arch = "aarch64")]
2661     fn vcpu_get_finalized_features(&self) -> i32 {
2662         kvm_bindings::KVM_ARM_VCPU_SVE as i32
2663     }
2664 
2665     #[cfg(target_arch = "aarch64")]
2666     fn vcpu_set_processor_features(
2667         &self,
2668         vm: &Arc<dyn crate::Vm>,
2669         kvi: &mut crate::VcpuInit,
2670         id: u8,
2671     ) -> cpu::Result<()> {
2672         use std::arch::is_aarch64_feature_detected;
2673         #[allow(clippy::nonminimal_bool)]
2674         let sve_supported =
2675             is_aarch64_feature_detected!("sve") || is_aarch64_feature_detected!("sve2");
2676 
2677         let mut kvm_kvi: kvm_bindings::kvm_vcpu_init = (*kvi).into();
2678 
2679         // We already checked that the capability is supported.
2680         kvm_kvi.features[0] |= 1 << kvm_bindings::KVM_ARM_VCPU_PSCI_0_2;
2681         if vm
2682             .as_any()
2683             .downcast_ref::<crate::kvm::KvmVm>()
2684             .unwrap()
2685             .check_extension(Cap::ArmPmuV3)
2686         {
2687             kvm_kvi.features[0] |= 1 << kvm_bindings::KVM_ARM_VCPU_PMU_V3;
2688         }
2689 
2690         if sve_supported
2691             && vm
2692                 .as_any()
2693                 .downcast_ref::<crate::kvm::KvmVm>()
2694                 .unwrap()
2695                 .check_extension(Cap::ArmSve)
2696         {
2697             kvm_kvi.features[0] |= 1 << kvm_bindings::KVM_ARM_VCPU_SVE;
2698         }
2699 
2700         // Non-boot cpus are powered off initially.
2701         if id > 0 {
2702             kvm_kvi.features[0] |= 1 << kvm_bindings::KVM_ARM_VCPU_POWER_OFF;
2703         }
2704 
2705         *kvi = kvm_kvi.into();
2706 
2707         Ok(())
2708     }
2709 
2710     ///
2711     /// Return VcpuInit with default value set
2712     ///
2713     #[cfg(target_arch = "aarch64")]
2714     fn create_vcpu_init(&self) -> crate::VcpuInit {
2715         kvm_bindings::kvm_vcpu_init::default().into()
2716     }
2717 
2718     #[cfg(target_arch = "aarch64")]
2719     fn vcpu_init(&self, kvi: &crate::VcpuInit) -> cpu::Result<()> {
2720         let kvm_kvi: kvm_bindings::kvm_vcpu_init = (*kvi).into();
2721         self.fd
2722             .lock()
2723             .unwrap()
2724             .vcpu_init(&kvm_kvi)
2725             .map_err(|e| cpu::HypervisorCpuError::VcpuInit(e.into()))
2726     }
2727 
2728     #[cfg(target_arch = "aarch64")]
2729     fn vcpu_finalize(&self, feature: i32) -> cpu::Result<()> {
2730         self.fd
2731             .lock()
2732             .unwrap()
2733             .vcpu_finalize(&feature)
2734             .map_err(|e| cpu::HypervisorCpuError::VcpuFinalize(e.into()))
2735     }
2736 
2737     #[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))]
2738     ///
2739     /// Gets a list of the guest registers that are supported for the
2740     /// KVM_GET_ONE_REG/KVM_SET_ONE_REG calls.
2741     ///
2742     fn get_reg_list(&self, reg_list: &mut RegList) -> cpu::Result<()> {
2743         let mut kvm_reg_list: kvm_bindings::RegList = reg_list.clone().into();
2744         self.fd
2745             .lock()
2746             .unwrap()
2747             .get_reg_list(&mut kvm_reg_list)
2748             .map_err(|e: kvm_ioctls::Error| cpu::HypervisorCpuError::GetRegList(e.into()))?;
2749         *reg_list = kvm_reg_list.into();
2750         Ok(())
2751     }
2752 
2753     ///
2754     /// Gets the value of a system register
2755     ///
2756     #[cfg(target_arch = "aarch64")]
2757     fn get_sys_reg(&self, sys_reg: u32) -> cpu::Result<u64> {
2758         //
2759         // Arm Architecture Reference Manual defines the encoding of
2760         // AArch64 system registers, see
2761         // https://developer.arm.com/documentation/ddi0487 (chapter D12).
2762         // While KVM defines another ID for each AArch64 system register,
2763         // which is used in calling `KVM_G/SET_ONE_REG` to access a system
2764         // register of a guest.
2765         // A mapping exists between the Arm standard encoding and the KVM ID.
2766         // This function takes the standard u32 ID as input parameter, converts
2767         // it to the corresponding KVM ID, and call `KVM_GET_ONE_REG` API to
2768         // get the value of the system parameter.
2769         //
2770         let id: u64 = KVM_REG_ARM64
2771             | KVM_REG_SIZE_U64
2772             | KVM_REG_ARM64_SYSREG as u64
2773             | ((((sys_reg) >> 5)
2774                 & (KVM_REG_ARM64_SYSREG_OP0_MASK
2775                     | KVM_REG_ARM64_SYSREG_OP1_MASK
2776                     | KVM_REG_ARM64_SYSREG_CRN_MASK
2777                     | KVM_REG_ARM64_SYSREG_CRM_MASK
2778                     | KVM_REG_ARM64_SYSREG_OP2_MASK)) as u64);
2779         let mut bytes = [0_u8; 8];
2780         self.fd
2781             .lock()
2782             .unwrap()
2783             .get_one_reg(id, &mut bytes)
2784             .map_err(|e| cpu::HypervisorCpuError::GetSysRegister(e.into()))?;
2785         Ok(u64::from_le_bytes(bytes))
2786     }
2787 
2788     ///
2789     /// Gets the value of a non-core register
2790     ///
2791     #[cfg(target_arch = "riscv64")]
2792     fn get_non_core_reg(&self, _non_core_reg: u32) -> cpu::Result<u64> {
2793         unimplemented!()
2794     }
2795 
2796     ///
2797     /// Configure core registers for a given CPU.
2798     ///
2799     #[cfg(target_arch = "aarch64")]
2800     fn setup_regs(&self, cpu_id: u8, boot_ip: u64, fdt_start: u64) -> cpu::Result<()> {
2801         #[allow(non_upper_case_globals)]
2802         // PSR (Processor State Register) bits.
2803         // Taken from arch/arm64/include/uapi/asm/ptrace.h.
2804         const PSR_MODE_EL1h: u64 = 0x0000_0005;
2805         const PSR_F_BIT: u64 = 0x0000_0040;
2806         const PSR_I_BIT: u64 = 0x0000_0080;
2807         const PSR_A_BIT: u64 = 0x0000_0100;
2808         const PSR_D_BIT: u64 = 0x0000_0200;
2809         // Taken from arch/arm64/kvm/inject_fault.c.
2810         const PSTATE_FAULT_BITS_64: u64 =
2811             PSR_MODE_EL1h | PSR_A_BIT | PSR_F_BIT | PSR_I_BIT | PSR_D_BIT;
2812 
2813         let kreg_off = offset_of!(kvm_regs, regs);
2814 
2815         // Get the register index of the PSTATE (Processor State) register.
2816         let pstate = offset_of!(user_pt_regs, pstate) + kreg_off;
2817         self.fd
2818             .lock()
2819             .unwrap()
2820             .set_one_reg(
2821                 arm64_core_reg_id!(KVM_REG_SIZE_U64, pstate),
2822                 &PSTATE_FAULT_BITS_64.to_le_bytes(),
2823             )
2824             .map_err(|e| cpu::HypervisorCpuError::SetAarchCoreRegister(e.into()))?;
2825 
2826         // Other vCPUs are powered off initially awaiting PSCI wakeup.
2827         if cpu_id == 0 {
2828             // Setting the PC (Processor Counter) to the current program address (kernel address).
2829             let pc = offset_of!(user_pt_regs, pc) + kreg_off;
2830             self.fd
2831                 .lock()
2832                 .unwrap()
2833                 .set_one_reg(
2834                     arm64_core_reg_id!(KVM_REG_SIZE_U64, pc),
2835                     &boot_ip.to_le_bytes(),
2836                 )
2837                 .map_err(|e| cpu::HypervisorCpuError::SetAarchCoreRegister(e.into()))?;
2838 
2839             // Last mandatory thing to set -> the address pointing to the FDT (also called DTB).
2840             // "The device tree blob (dtb) must be placed on an 8-byte boundary and must
2841             // not exceed 2 megabytes in size." -> https://www.kernel.org/doc/Documentation/arm64/booting.txt.
2842             // We are choosing to place it the end of DRAM. See `get_fdt_addr`.
2843             let regs0 = offset_of!(user_pt_regs, regs) + kreg_off;
2844             self.fd
2845                 .lock()
2846                 .unwrap()
2847                 .set_one_reg(
2848                     arm64_core_reg_id!(KVM_REG_SIZE_U64, regs0),
2849                     &fdt_start.to_le_bytes(),
2850                 )
2851                 .map_err(|e| cpu::HypervisorCpuError::SetAarchCoreRegister(e.into()))?;
2852         }
2853         Ok(())
2854     }
2855 
2856     #[cfg(target_arch = "riscv64")]
2857     ///
2858     /// Configure registers for a given RISC-V CPU.
2859     ///
2860     fn setup_regs(&self, cpu_id: u8, boot_ip: u64, fdt_start: u64) -> cpu::Result<()> {
2861         // Setting the A0 () to the hartid of this CPU.
2862         let a0 = offset_of!(kvm_riscv_core, regs, user_regs_struct, a0);
2863         self.fd
2864             .lock()
2865             .unwrap()
2866             .set_one_reg(
2867                 riscv64_reg_id!(KVM_REG_RISCV_CORE, a0),
2868                 &u64::from(cpu_id).to_le_bytes(),
2869             )
2870             .map_err(|e| cpu::HypervisorCpuError::SetRiscvCoreRegister(e.into()))?;
2871 
2872         // Setting the PC (Processor Counter) to the current program address (kernel address).
2873         let pc = offset_of!(kvm_riscv_core, regs, user_regs_struct, pc);
2874         self.fd
2875             .lock()
2876             .unwrap()
2877             .set_one_reg(
2878                 riscv64_reg_id!(KVM_REG_RISCV_CORE, pc),
2879                 &boot_ip.to_le_bytes(),
2880             )
2881             .map_err(|e| cpu::HypervisorCpuError::SetRiscvCoreRegister(e.into()))?;
2882 
2883         // Last mandatory thing to set -> the address pointing to the FDT (also called DTB).
2884         // "The device tree blob (dtb) must be placed on an 8-byte boundary and must
2885         // not exceed 64 kilobytes in size." -> https://www.kernel.org/doc/Documentation/arch/riscv/boot.txt.
2886         let a1 = offset_of!(kvm_riscv_core, regs, user_regs_struct, a1);
2887         self.fd
2888             .lock()
2889             .unwrap()
2890             .set_one_reg(
2891                 riscv64_reg_id!(KVM_REG_RISCV_CORE, a1),
2892                 &fdt_start.to_le_bytes(),
2893             )
2894             .map_err(|e| cpu::HypervisorCpuError::SetRiscvCoreRegister(e.into()))?;
2895 
2896         Ok(())
2897     }
2898 
2899     #[cfg(target_arch = "x86_64")]
2900     ///
2901     /// Get the current CPU state
2902     ///
2903     /// Ordering requirements:
2904     ///
2905     /// KVM_GET_MP_STATE calls kvm_apic_accept_events(), which might modify
2906     /// vCPU/LAPIC state. As such, it must be done before most everything
2907     /// else, otherwise we cannot restore everything and expect it to work.
2908     ///
2909     /// KVM_GET_VCPU_EVENTS/KVM_SET_VCPU_EVENTS is unsafe if other vCPUs are
2910     /// still running.
2911     ///
2912     /// KVM_GET_LAPIC may change state of LAPIC before returning it.
2913     ///
2914     /// GET_VCPU_EVENTS should probably be last to save. The code looks as
2915     /// it might as well be affected by internal state modifications of the
2916     /// GET ioctls.
2917     ///
2918     /// SREGS saves/restores a pending interrupt, similar to what
2919     /// VCPU_EVENTS also does.
2920     ///
2921     /// GET_MSRS requires a prepopulated data structure to do something
2922     /// meaningful. For SET_MSRS it will then contain good data.
2923     ///
2924     /// # Example
2925     ///
2926     /// ```rust
2927     /// # use hypervisor::kvm::KvmHypervisor;
2928     /// # use std::sync::Arc;
2929     /// let kvm = KvmHypervisor::new().unwrap();
2930     /// let hv = Arc::new(kvm);
2931     /// let vm = hv.create_vm().expect("new VM fd creation failed");
2932     /// vm.enable_split_irq().unwrap();
2933     /// let vcpu = vm.create_vcpu(0, None).unwrap();
2934     /// let state = vcpu.state().unwrap();
2935     /// ```
2936     fn state(&self) -> cpu::Result<CpuState> {
2937         let cpuid = self.get_cpuid2(kvm_bindings::KVM_MAX_CPUID_ENTRIES)?;
2938         let mp_state = self.get_mp_state()?.into();
2939         let regs = self.get_regs()?;
2940         let sregs = self.get_sregs()?;
2941         let xsave = self.get_xsave()?;
2942         let xcrs = self.get_xcrs()?;
2943         let lapic_state = self.get_lapic()?;
2944         let fpu = self.get_fpu()?;
2945 
2946         // Try to get all MSRs based on the list previously retrieved from KVM.
2947         // If the number of MSRs obtained from GET_MSRS is different from the
2948         // expected amount, we fallback onto a slower method by getting MSRs
2949         // by chunks. This is the only way to make sure we try to get as many
2950         // MSRs as possible, even if some MSRs are not supported.
2951         let mut msr_entries = self.msrs.clone();
2952 
2953         // Save extra MSRs if the Hyper-V synthetic interrupt controller is
2954         // emulated.
2955         if self.hyperv_synic.load(Ordering::Acquire) {
2956             let hyperv_synic_msrs = vec![
2957                 0x40000020, 0x40000021, 0x40000080, 0x40000081, 0x40000082, 0x40000083, 0x40000084,
2958                 0x40000090, 0x40000091, 0x40000092, 0x40000093, 0x40000094, 0x40000095, 0x40000096,
2959                 0x40000097, 0x40000098, 0x40000099, 0x4000009a, 0x4000009b, 0x4000009c, 0x4000009d,
2960                 0x4000009e, 0x4000009f, 0x400000b0, 0x400000b1, 0x400000b2, 0x400000b3, 0x400000b4,
2961                 0x400000b5, 0x400000b6, 0x400000b7,
2962             ];
2963             for index in hyperv_synic_msrs {
2964                 let msr = kvm_msr_entry {
2965                     index,
2966                     ..Default::default()
2967                 };
2968                 msr_entries.push(msr.into());
2969             }
2970         }
2971 
2972         let expected_num_msrs = msr_entries.len();
2973         let num_msrs = self.get_msrs(&mut msr_entries)?;
2974         let msrs = if num_msrs != expected_num_msrs {
2975             let mut faulty_msr_index = num_msrs;
2976             let mut msr_entries_tmp = msr_entries[..faulty_msr_index].to_vec();
2977 
2978             loop {
2979                 warn!(
2980                     "Detected faulty MSR 0x{:x} while getting MSRs",
2981                     msr_entries[faulty_msr_index].index
2982                 );
2983 
2984                 // Skip the first bad MSR
2985                 let start_pos = faulty_msr_index + 1;
2986 
2987                 let mut sub_msr_entries = msr_entries[start_pos..].to_vec();
2988                 let num_msrs = self.get_msrs(&mut sub_msr_entries)?;
2989 
2990                 msr_entries_tmp.extend(&sub_msr_entries[..num_msrs]);
2991 
2992                 if num_msrs == sub_msr_entries.len() {
2993                     break;
2994                 }
2995 
2996                 faulty_msr_index = start_pos + num_msrs;
2997             }
2998 
2999             msr_entries_tmp
3000         } else {
3001             msr_entries
3002         };
3003 
3004         let vcpu_events = self.get_vcpu_events()?;
3005         let tsc_khz = self.tsc_khz()?;
3006 
3007         Ok(VcpuKvmState {
3008             cpuid,
3009             msrs,
3010             vcpu_events,
3011             regs: regs.into(),
3012             sregs: sregs.into(),
3013             fpu,
3014             lapic_state,
3015             xsave,
3016             xcrs,
3017             mp_state,
3018             tsc_khz,
3019         }
3020         .into())
3021     }
3022 
3023     ///
3024     /// Get the current AArch64 CPU state
3025     ///
3026     #[cfg(target_arch = "aarch64")]
3027     fn state(&self) -> cpu::Result<CpuState> {
3028         let mut state = VcpuKvmState {
3029             mp_state: self.get_mp_state()?.into(),
3030             ..Default::default()
3031         };
3032         // Get core registers
3033         state.core_regs = self.get_regs()?.into();
3034 
3035         // Get systerm register
3036         // Call KVM_GET_REG_LIST to get all registers available to the guest.
3037         // For ArmV8 there are around 500 registers.
3038         let mut sys_regs: Vec<kvm_bindings::kvm_one_reg> = Vec::new();
3039         let mut reg_list = kvm_bindings::RegList::new(500).unwrap();
3040         self.fd
3041             .lock()
3042             .unwrap()
3043             .get_reg_list(&mut reg_list)
3044             .map_err(|e| cpu::HypervisorCpuError::GetRegList(e.into()))?;
3045 
3046         // At this point reg_list should contain: core registers and system
3047         // registers.
3048         // The register list contains the number of registers and their ids. We
3049         // will be needing to call KVM_GET_ONE_REG on each id in order to save
3050         // all of them. We carve out from the list  the core registers which are
3051         // represented in the kernel by kvm_regs structure and for which we can
3052         // calculate the id based on the offset in the structure.
3053         reg_list.retain(|regid| is_system_register(*regid));
3054 
3055         // Now, for the rest of the registers left in the previously fetched
3056         // register list, we are simply calling KVM_GET_ONE_REG.
3057         let indices = reg_list.as_slice();
3058         for index in indices.iter() {
3059             let mut bytes = [0_u8; 8];
3060             self.fd
3061                 .lock()
3062                 .unwrap()
3063                 .get_one_reg(*index, &mut bytes)
3064                 .map_err(|e| cpu::HypervisorCpuError::GetSysRegister(e.into()))?;
3065             sys_regs.push(kvm_bindings::kvm_one_reg {
3066                 id: *index,
3067                 addr: u64::from_le_bytes(bytes),
3068             });
3069         }
3070 
3071         state.sys_regs = sys_regs;
3072 
3073         Ok(state.into())
3074     }
3075 
3076     #[cfg(target_arch = "riscv64")]
3077     ///
3078     /// Get the current RISC-V 64-bit CPU state
3079     ///
3080     fn state(&self) -> cpu::Result<CpuState> {
3081         let mut state = VcpuKvmState {
3082             mp_state: self.get_mp_state()?.into(),
3083             ..Default::default()
3084         };
3085         // Get core registers
3086         state.core_regs = self.get_regs()?.into();
3087 
3088         // Get non-core register
3089         // Call KVM_GET_REG_LIST to get all registers available to the guest.
3090         // For RISC-V 64-bit there are around 200 registers.
3091         let mut sys_regs: Vec<kvm_bindings::kvm_one_reg> = Vec::new();
3092         let mut reg_list = kvm_bindings::RegList::new(200).unwrap();
3093         self.fd
3094             .lock()
3095             .unwrap()
3096             .get_reg_list(&mut reg_list)
3097             .map_err(|e| cpu::HypervisorCpuError::GetRegList(e.into()))?;
3098 
3099         // At this point reg_list should contain:
3100         // - core registers
3101         // - config registers
3102         // - timer registers
3103         // - control and status registers
3104         // - AIA control and status registers
3105         // - smstateen control and status registers
3106         // - sbi_sta control and status registers.
3107         //
3108         // The register list contains the number of registers and their ids. We
3109         // will be needing to call KVM_GET_ONE_REG on each id in order to save
3110         // all of them. We carve out from the list the core registers which are
3111         // represented in the kernel by `kvm_riscv_core` structure and for which
3112         // we can calculate the id based on the offset in the structure.
3113         reg_list.retain(|regid| is_non_core_register(*regid));
3114 
3115         // Now, for the rest of the registers left in the previously fetched
3116         // register list, we are simply calling KVM_GET_ONE_REG.
3117         let indices = reg_list.as_slice();
3118         for index in indices.iter() {
3119             let mut bytes = [0_u8; 8];
3120             self.fd
3121                 .lock()
3122                 .unwrap()
3123                 .get_one_reg(*index, &mut bytes)
3124                 .map_err(|e| cpu::HypervisorCpuError::GetSysRegister(e.into()))?;
3125             sys_regs.push(kvm_bindings::kvm_one_reg {
3126                 id: *index,
3127                 addr: u64::from_le_bytes(bytes),
3128             });
3129         }
3130 
3131         state.non_core_regs = sys_regs;
3132 
3133         Ok(state.into())
3134     }
3135 
3136     #[cfg(target_arch = "x86_64")]
3137     ///
3138     /// Restore the previously saved CPU state
3139     ///
3140     /// Ordering requirements:
3141     ///
3142     /// KVM_GET_VCPU_EVENTS/KVM_SET_VCPU_EVENTS is unsafe if other vCPUs are
3143     /// still running.
3144     ///
3145     /// Some SET ioctls (like set_mp_state) depend on kvm_vcpu_is_bsp(), so
3146     /// if we ever change the BSP, we have to do that before restoring anything.
3147     /// The same seems to be true for CPUID stuff.
3148     ///
3149     /// SREGS saves/restores a pending interrupt, similar to what
3150     /// VCPU_EVENTS also does.
3151     ///
3152     /// SET_REGS clears pending exceptions unconditionally, thus, it must be
3153     /// done before SET_VCPU_EVENTS, which restores it.
3154     ///
3155     /// SET_LAPIC must come after SET_SREGS, because the latter restores
3156     /// the apic base msr.
3157     ///
3158     /// SET_LAPIC must come before SET_MSRS, because the TSC deadline MSR
3159     /// only restores successfully, when the LAPIC is correctly configured.
3160     ///
3161     /// Arguments: CpuState
3162     /// # Example
3163     ///
3164     /// ```rust
3165     /// # use hypervisor::kvm::KvmHypervisor;
3166     /// # use std::sync::Arc;
3167     /// let kvm = KvmHypervisor::new().unwrap();
3168     /// let hv = Arc::new(kvm);
3169     /// let vm = hv.create_vm().expect("new VM fd creation failed");
3170     /// vm.enable_split_irq().unwrap();
3171     /// let vcpu = vm.create_vcpu(0, None).unwrap();
3172     /// let state = vcpu.state().unwrap();
3173     /// vcpu.set_state(&state).unwrap();
3174     /// ```
3175     fn set_state(&self, state: &CpuState) -> cpu::Result<()> {
3176         let state: VcpuKvmState = state.clone().into();
3177         self.set_cpuid2(&state.cpuid)?;
3178         self.set_mp_state(state.mp_state.into())?;
3179         self.set_regs(&state.regs.into())?;
3180         self.set_sregs(&state.sregs.into())?;
3181         self.set_xsave(&state.xsave)?;
3182         self.set_xcrs(&state.xcrs)?;
3183         self.set_lapic(&state.lapic_state)?;
3184         self.set_fpu(&state.fpu)?;
3185 
3186         if let Some(freq) = state.tsc_khz {
3187             self.set_tsc_khz(freq)?;
3188         }
3189 
3190         // Try to set all MSRs previously stored.
3191         // If the number of MSRs set from SET_MSRS is different from the
3192         // expected amount, we fallback onto a slower method by setting MSRs
3193         // by chunks. This is the only way to make sure we try to set as many
3194         // MSRs as possible, even if some MSRs are not supported.
3195         let expected_num_msrs = state.msrs.len();
3196         let num_msrs = self.set_msrs(&state.msrs)?;
3197         if num_msrs != expected_num_msrs {
3198             let mut faulty_msr_index = num_msrs;
3199 
3200             loop {
3201                 warn!(
3202                     "Detected faulty MSR 0x{:x} while setting MSRs",
3203                     state.msrs[faulty_msr_index].index
3204                 );
3205 
3206                 // Skip the first bad MSR
3207                 let start_pos = faulty_msr_index + 1;
3208 
3209                 let sub_msr_entries = state.msrs[start_pos..].to_vec();
3210 
3211                 let num_msrs = self.set_msrs(&sub_msr_entries)?;
3212 
3213                 if num_msrs == sub_msr_entries.len() {
3214                     break;
3215                 }
3216 
3217                 faulty_msr_index = start_pos + num_msrs;
3218             }
3219         }
3220 
3221         self.set_vcpu_events(&state.vcpu_events)?;
3222 
3223         Ok(())
3224     }
3225 
3226     ///
3227     /// Restore the previously saved AArch64 CPU state
3228     ///
3229     #[cfg(target_arch = "aarch64")]
3230     fn set_state(&self, state: &CpuState) -> cpu::Result<()> {
3231         let state: VcpuKvmState = state.clone().into();
3232         // Set core registers
3233         self.set_regs(&state.core_regs.into())?;
3234         // Set system registers
3235         for reg in &state.sys_regs {
3236             self.fd
3237                 .lock()
3238                 .unwrap()
3239                 .set_one_reg(reg.id, &reg.addr.to_le_bytes())
3240                 .map_err(|e| cpu::HypervisorCpuError::SetSysRegister(e.into()))?;
3241         }
3242 
3243         self.set_mp_state(state.mp_state.into())?;
3244 
3245         Ok(())
3246     }
3247 
3248     #[cfg(target_arch = "riscv64")]
3249     ///
3250     /// Restore the previously saved RISC-V 64-bit CPU state
3251     ///
3252     fn set_state(&self, state: &CpuState) -> cpu::Result<()> {
3253         let state: VcpuKvmState = state.clone().into();
3254         // Set core registers
3255         self.set_regs(&state.core_regs.into())?;
3256         // Set system registers
3257         for reg in &state.non_core_regs {
3258             self.fd
3259                 .lock()
3260                 .unwrap()
3261                 .set_one_reg(reg.id, &reg.addr.to_le_bytes())
3262                 .map_err(|e| cpu::HypervisorCpuError::SetSysRegister(e.into()))?;
3263         }
3264 
3265         self.set_mp_state(state.mp_state.into())?;
3266 
3267         Ok(())
3268     }
3269 
3270     ///
3271     /// Initialize TDX for this CPU
3272     ///
3273     #[cfg(feature = "tdx")]
3274     fn tdx_init(&self, hob_address: u64) -> cpu::Result<()> {
3275         tdx_command(
3276             &self.fd.lock().unwrap().as_raw_fd(),
3277             TdxCommand::InitVcpu,
3278             0,
3279             hob_address,
3280         )
3281         .map_err(cpu::HypervisorCpuError::InitializeTdx)
3282     }
3283 
3284     ///
3285     /// Set the "immediate_exit" state
3286     ///
3287     fn set_immediate_exit(&self, exit: bool) {
3288         self.fd.lock().unwrap().set_kvm_immediate_exit(exit.into());
3289     }
3290 
3291     ///
3292     /// Returns the details about TDX exit reason
3293     ///
3294     #[cfg(feature = "tdx")]
3295     fn get_tdx_exit_details(&mut self) -> cpu::Result<TdxExitDetails> {
3296         let mut fd = self.fd.as_ref().lock().unwrap();
3297         let kvm_run = fd.get_kvm_run();
3298         // SAFETY: accessing a union field in a valid structure
3299         let tdx_vmcall = unsafe {
3300             &mut (*((&mut kvm_run.__bindgen_anon_1) as *mut kvm_run__bindgen_ty_1
3301                 as *mut KvmTdxExit))
3302                 .u
3303                 .vmcall
3304         };
3305 
3306         tdx_vmcall.status_code = TDG_VP_VMCALL_INVALID_OPERAND;
3307 
3308         if tdx_vmcall.type_ != 0 {
3309             return Err(cpu::HypervisorCpuError::UnknownTdxVmCall);
3310         }
3311 
3312         match tdx_vmcall.subfunction {
3313             TDG_VP_VMCALL_GET_QUOTE => Ok(TdxExitDetails::GetQuote),
3314             TDG_VP_VMCALL_SETUP_EVENT_NOTIFY_INTERRUPT => {
3315                 Ok(TdxExitDetails::SetupEventNotifyInterrupt)
3316             }
3317             _ => Err(cpu::HypervisorCpuError::UnknownTdxVmCall),
3318         }
3319     }
3320 
3321     ///
3322     /// Set the status code for TDX exit
3323     ///
3324     #[cfg(feature = "tdx")]
3325     fn set_tdx_status(&mut self, status: TdxExitStatus) {
3326         let mut fd = self.fd.as_ref().lock().unwrap();
3327         let kvm_run = fd.get_kvm_run();
3328         // SAFETY: accessing a union field in a valid structure
3329         let tdx_vmcall = unsafe {
3330             &mut (*((&mut kvm_run.__bindgen_anon_1) as *mut kvm_run__bindgen_ty_1
3331                 as *mut KvmTdxExit))
3332                 .u
3333                 .vmcall
3334         };
3335 
3336         tdx_vmcall.status_code = match status {
3337             TdxExitStatus::Success => TDG_VP_VMCALL_SUCCESS,
3338             TdxExitStatus::InvalidOperand => TDG_VP_VMCALL_INVALID_OPERAND,
3339         };
3340     }
3341 
3342     #[cfg(target_arch = "x86_64")]
3343     ///
3344     /// Return the list of initial MSR entries for a VCPU
3345     ///
3346     fn boot_msr_entries(&self) -> Vec<MsrEntry> {
3347         use crate::arch::x86::{msr_index, MTRR_ENABLE, MTRR_MEM_TYPE_WB};
3348 
3349         [
3350             msr!(msr_index::MSR_IA32_SYSENTER_CS),
3351             msr!(msr_index::MSR_IA32_SYSENTER_ESP),
3352             msr!(msr_index::MSR_IA32_SYSENTER_EIP),
3353             msr!(msr_index::MSR_STAR),
3354             msr!(msr_index::MSR_CSTAR),
3355             msr!(msr_index::MSR_LSTAR),
3356             msr!(msr_index::MSR_KERNEL_GS_BASE),
3357             msr!(msr_index::MSR_SYSCALL_MASK),
3358             msr!(msr_index::MSR_IA32_TSC),
3359             msr_data!(
3360                 msr_index::MSR_IA32_MISC_ENABLE,
3361                 msr_index::MSR_IA32_MISC_ENABLE_FAST_STRING as u64
3362             ),
3363             msr_data!(msr_index::MSR_MTRRdefType, MTRR_ENABLE | MTRR_MEM_TYPE_WB),
3364         ]
3365         .to_vec()
3366     }
3367 
3368     #[cfg(target_arch = "aarch64")]
3369     fn has_pmu_support(&self) -> bool {
3370         let cpu_attr = kvm_bindings::kvm_device_attr {
3371             group: kvm_bindings::KVM_ARM_VCPU_PMU_V3_CTRL,
3372             attr: u64::from(kvm_bindings::KVM_ARM_VCPU_PMU_V3_INIT),
3373             addr: 0x0,
3374             flags: 0,
3375         };
3376         self.fd.lock().unwrap().has_device_attr(&cpu_attr).is_ok()
3377     }
3378 
3379     #[cfg(target_arch = "aarch64")]
3380     fn init_pmu(&self, irq: u32) -> cpu::Result<()> {
3381         let cpu_attr = kvm_bindings::kvm_device_attr {
3382             group: kvm_bindings::KVM_ARM_VCPU_PMU_V3_CTRL,
3383             attr: u64::from(kvm_bindings::KVM_ARM_VCPU_PMU_V3_INIT),
3384             addr: 0x0,
3385             flags: 0,
3386         };
3387         let cpu_attr_irq = kvm_bindings::kvm_device_attr {
3388             group: kvm_bindings::KVM_ARM_VCPU_PMU_V3_CTRL,
3389             attr: u64::from(kvm_bindings::KVM_ARM_VCPU_PMU_V3_IRQ),
3390             addr: &irq as *const u32 as u64,
3391             flags: 0,
3392         };
3393         self.fd
3394             .lock()
3395             .unwrap()
3396             .set_device_attr(&cpu_attr_irq)
3397             .map_err(|_| cpu::HypervisorCpuError::InitializePmu)?;
3398         self.fd
3399             .lock()
3400             .unwrap()
3401             .set_device_attr(&cpu_attr)
3402             .map_err(|_| cpu::HypervisorCpuError::InitializePmu)
3403     }
3404 
3405     #[cfg(target_arch = "x86_64")]
3406     ///
3407     /// Get the frequency of the TSC if available
3408     ///
3409     fn tsc_khz(&self) -> cpu::Result<Option<u32>> {
3410         match self.fd.lock().unwrap().get_tsc_khz() {
3411             Err(e) => {
3412                 if e.errno() == libc::EIO {
3413                     Ok(None)
3414                 } else {
3415                     Err(cpu::HypervisorCpuError::GetTscKhz(e.into()))
3416                 }
3417             }
3418             Ok(v) => Ok(Some(v)),
3419         }
3420     }
3421 
3422     #[cfg(target_arch = "x86_64")]
3423     ///
3424     /// Set the frequency of the TSC if available
3425     ///
3426     fn set_tsc_khz(&self, freq: u32) -> cpu::Result<()> {
3427         match self.fd.lock().unwrap().set_tsc_khz(freq) {
3428             Err(e) => {
3429                 if e.errno() == libc::EIO {
3430                     Ok(())
3431                 } else {
3432                     Err(cpu::HypervisorCpuError::SetTscKhz(e.into()))
3433                 }
3434             }
3435             Ok(_) => Ok(()),
3436         }
3437     }
3438 
3439     #[cfg(target_arch = "x86_64")]
3440     ///
3441     /// Trigger NMI interrupt
3442     ///
3443     fn nmi(&self) -> cpu::Result<()> {
3444         match self.fd.lock().unwrap().nmi() {
3445             Err(e) => {
3446                 if e.errno() == libc::EIO {
3447                     Ok(())
3448                 } else {
3449                     Err(cpu::HypervisorCpuError::Nmi(e.into()))
3450                 }
3451             }
3452             Ok(_) => Ok(()),
3453         }
3454     }
3455 }
3456 
3457 impl KvmVcpu {
3458     #[cfg(target_arch = "x86_64")]
3459     ///
3460     /// X86 specific call that returns the vcpu's current "xsave struct".
3461     ///
3462     fn get_xsave(&self) -> cpu::Result<XsaveState> {
3463         Ok(self
3464             .fd
3465             .lock()
3466             .unwrap()
3467             .get_xsave()
3468             .map_err(|e| cpu::HypervisorCpuError::GetXsaveState(e.into()))?
3469             .into())
3470     }
3471 
3472     #[cfg(target_arch = "x86_64")]
3473     ///
3474     /// X86 specific call that sets the vcpu's current "xsave struct".
3475     ///
3476     fn set_xsave(&self, xsave: &XsaveState) -> cpu::Result<()> {
3477         let xsave: kvm_bindings::kvm_xsave = (*xsave).clone().into();
3478         self.fd
3479             .lock()
3480             .unwrap()
3481             .set_xsave(&xsave)
3482             .map_err(|e| cpu::HypervisorCpuError::SetXsaveState(e.into()))
3483     }
3484 
3485     #[cfg(target_arch = "x86_64")]
3486     ///
3487     /// X86 specific call that returns the vcpu's current "xcrs".
3488     ///
3489     fn get_xcrs(&self) -> cpu::Result<ExtendedControlRegisters> {
3490         self.fd
3491             .lock()
3492             .unwrap()
3493             .get_xcrs()
3494             .map_err(|e| cpu::HypervisorCpuError::GetXcsr(e.into()))
3495     }
3496 
3497     #[cfg(target_arch = "x86_64")]
3498     ///
3499     /// X86 specific call that sets the vcpu's current "xcrs".
3500     ///
3501     fn set_xcrs(&self, xcrs: &ExtendedControlRegisters) -> cpu::Result<()> {
3502         self.fd
3503             .lock()
3504             .unwrap()
3505             .set_xcrs(xcrs)
3506             .map_err(|e| cpu::HypervisorCpuError::SetXcsr(e.into()))
3507     }
3508 
3509     #[cfg(target_arch = "x86_64")]
3510     ///
3511     /// Returns currently pending exceptions, interrupts, and NMIs as well as related
3512     /// states of the vcpu.
3513     ///
3514     fn get_vcpu_events(&self) -> cpu::Result<VcpuEvents> {
3515         self.fd
3516             .lock()
3517             .unwrap()
3518             .get_vcpu_events()
3519             .map_err(|e| cpu::HypervisorCpuError::GetVcpuEvents(e.into()))
3520     }
3521 
3522     #[cfg(target_arch = "x86_64")]
3523     ///
3524     /// Sets pending exceptions, interrupts, and NMIs as well as related states
3525     /// of the vcpu.
3526     ///
3527     fn set_vcpu_events(&self, events: &VcpuEvents) -> cpu::Result<()> {
3528         self.fd
3529             .lock()
3530             .unwrap()
3531             .set_vcpu_events(events)
3532             .map_err(|e| cpu::HypervisorCpuError::SetVcpuEvents(e.into()))
3533     }
3534 }
3535 
3536 #[cfg(test)]
3537 mod tests {
3538     #[test]
3539     #[cfg(target_arch = "riscv64")]
3540     fn test_get_and_set_regs() {
3541         use super::*;
3542 
3543         let kvm = KvmHypervisor::new().unwrap();
3544         let hypervisor = Arc::new(kvm);
3545         let vm = hypervisor.create_vm().expect("new VM fd creation failed");
3546         let vcpu0 = vm.create_vcpu(0, None).unwrap();
3547 
3548         let core_regs = StandardRegisters::from(kvm_riscv_core {
3549             regs: user_regs_struct {
3550                 pc: 0x00,
3551                 ra: 0x01,
3552                 sp: 0x02,
3553                 gp: 0x03,
3554                 tp: 0x04,
3555                 t0: 0x05,
3556                 t1: 0x06,
3557                 t2: 0x07,
3558                 s0: 0x08,
3559                 s1: 0x09,
3560                 a0: 0x0a,
3561                 a1: 0x0b,
3562                 a2: 0x0c,
3563                 a3: 0x0d,
3564                 a4: 0x0e,
3565                 a5: 0x0f,
3566                 a6: 0x10,
3567                 a7: 0x11,
3568                 s2: 0x12,
3569                 s3: 0x13,
3570                 s4: 0x14,
3571                 s5: 0x15,
3572                 s6: 0x16,
3573                 s7: 0x17,
3574                 s8: 0x18,
3575                 s9: 0x19,
3576                 s10: 0x1a,
3577                 s11: 0x1b,
3578                 t3: 0x1c,
3579                 t4: 0x1d,
3580                 t5: 0x1e,
3581                 t6: 0x1f,
3582             },
3583             mode: 0x00,
3584         });
3585 
3586         vcpu0.set_regs(&core_regs).unwrap();
3587         assert_eq!(vcpu0.get_regs().unwrap(), core_regs);
3588     }
3589 }
3590