xref: /cloud-hypervisor/hypervisor/src/kvm/mod.rs (revision 5f814308d6b19037f2afb3d36fe49b0aa14c0b22)
1 // Copyright © 2019 Intel Corporation
2 //
3 // SPDX-License-Identifier: Apache-2.0 OR BSD-3-Clause
4 //
5 // Copyright © 2020, Microsoft Corporation
6 //
7 // Copyright 2018-2019 CrowdStrike, Inc.
8 //
9 //
10 
11 #[cfg(target_arch = "aarch64")]
12 use crate::aarch64::gic::KvmGicV3Its;
13 #[cfg(target_arch = "aarch64")]
14 pub use crate::aarch64::{
15     check_required_kvm_extensions, gic::Gicv3ItsState as GicState, is_system_register, VcpuInit,
16     VcpuKvmState,
17 };
18 #[cfg(target_arch = "aarch64")]
19 use crate::arch::aarch64::gic::{Vgic, VgicConfig};
20 use crate::cpu;
21 use crate::hypervisor;
22 use crate::vec_with_array_field;
23 use crate::vm::{self, InterruptSourceConfig, VmOps};
24 use crate::HypervisorType;
25 #[cfg(target_arch = "aarch64")]
26 use crate::{arm64_core_reg_id, offset_of};
27 use kvm_ioctls::{NoDatamatch, VcpuFd, VmFd};
28 use std::any::Any;
29 use std::collections::HashMap;
30 #[cfg(target_arch = "x86_64")]
31 use std::fs::File;
32 #[cfg(target_arch = "x86_64")]
33 use std::os::unix::io::AsRawFd;
34 #[cfg(feature = "tdx")]
35 use std::os::unix::io::RawFd;
36 use std::result;
37 #[cfg(target_arch = "x86_64")]
38 use std::sync::atomic::{AtomicBool, Ordering};
39 use std::sync::Mutex;
40 use std::sync::{Arc, RwLock};
41 use vmm_sys_util::eventfd::EventFd;
42 // x86_64 dependencies
43 #[cfg(target_arch = "x86_64")]
44 pub mod x86_64;
45 #[cfg(target_arch = "x86_64")]
46 use crate::arch::x86::{
47     CpuIdEntry, FpuState, LapicState, MsrEntry, SpecialRegisters, XsaveState, NUM_IOAPIC_PINS,
48 };
49 #[cfg(target_arch = "x86_64")]
50 use crate::ClockData;
51 #[cfg(target_arch = "x86_64")]
52 use crate::StandardRegisters;
53 use crate::{
54     CpuState, IoEventAddress, IrqRoutingEntry, MpState, UserMemoryRegion,
55     USER_MEMORY_REGION_LOG_DIRTY, USER_MEMORY_REGION_READ, USER_MEMORY_REGION_WRITE,
56 };
57 #[cfg(target_arch = "aarch64")]
58 use aarch64::{RegList, Register, StandardRegisters};
59 #[cfg(target_arch = "x86_64")]
60 use kvm_bindings::{
61     kvm_enable_cap, kvm_msr_entry, MsrList, KVM_CAP_HYPERV_SYNIC, KVM_CAP_SPLIT_IRQCHIP,
62     KVM_GUESTDBG_USE_HW_BP,
63 };
64 #[cfg(target_arch = "x86_64")]
65 use x86_64::check_required_kvm_extensions;
66 #[cfg(target_arch = "x86_64")]
67 pub use x86_64::{CpuId, ExtendedControlRegisters, MsrEntries, VcpuKvmState};
68 // aarch64 dependencies
69 #[cfg(target_arch = "aarch64")]
70 pub mod aarch64;
71 pub use kvm_bindings;
72 pub use kvm_bindings::{
73     kvm_clock_data, kvm_create_device, kvm_device_type_KVM_DEV_TYPE_VFIO, kvm_guest_debug,
74     kvm_irq_routing, kvm_irq_routing_entry, kvm_mp_state, kvm_userspace_memory_region,
75     KVM_GUESTDBG_ENABLE, KVM_GUESTDBG_SINGLESTEP, KVM_IRQ_ROUTING_IRQCHIP, KVM_IRQ_ROUTING_MSI,
76     KVM_MEM_LOG_DIRTY_PAGES, KVM_MEM_READONLY, KVM_MSI_VALID_DEVID,
77 };
78 #[cfg(target_arch = "aarch64")]
79 use kvm_bindings::{
80     kvm_regs, user_fpsimd_state, user_pt_regs, KVM_GUESTDBG_USE_HW, KVM_NR_SPSR, KVM_REG_ARM64,
81     KVM_REG_ARM64_SYSREG, KVM_REG_ARM64_SYSREG_CRM_MASK, KVM_REG_ARM64_SYSREG_CRN_MASK,
82     KVM_REG_ARM64_SYSREG_OP0_MASK, KVM_REG_ARM64_SYSREG_OP1_MASK, KVM_REG_ARM64_SYSREG_OP2_MASK,
83     KVM_REG_ARM_CORE, KVM_REG_SIZE_U128, KVM_REG_SIZE_U32, KVM_REG_SIZE_U64,
84 };
85 #[cfg(feature = "tdx")]
86 use kvm_bindings::{kvm_run__bindgen_ty_1, KVMIO};
87 pub use kvm_ioctls;
88 pub use kvm_ioctls::{Cap, Kvm};
89 #[cfg(target_arch = "aarch64")]
90 use std::mem;
91 use thiserror::Error;
92 use vfio_ioctls::VfioDeviceFd;
93 #[cfg(feature = "tdx")]
94 use vmm_sys_util::{ioctl::ioctl_with_val, ioctl_ioc_nr, ioctl_iowr_nr};
95 ///
96 /// Export generically-named wrappers of kvm-bindings for Unix-based platforms
97 ///
98 pub use {
99     kvm_bindings::kvm_create_device as CreateDevice, kvm_bindings::kvm_device_attr as DeviceAttr,
100     kvm_bindings::kvm_run, kvm_bindings::kvm_vcpu_events as VcpuEvents, kvm_ioctls::VcpuExit,
101 };
102 
103 #[cfg(target_arch = "x86_64")]
104 const KVM_CAP_SGX_ATTRIBUTE: u32 = 196;
105 
106 #[cfg(target_arch = "x86_64")]
107 use vmm_sys_util::ioctl_io_nr;
108 
109 #[cfg(all(not(feature = "tdx"), target_arch = "x86_64"))]
110 use vmm_sys_util::ioctl_ioc_nr;
111 
112 #[cfg(target_arch = "x86_64")]
113 ioctl_io_nr!(KVM_NMI, kvm_bindings::KVMIO, 0x9a);
114 
115 #[cfg(feature = "tdx")]
116 const KVM_EXIT_TDX: u32 = 50;
117 #[cfg(feature = "tdx")]
118 const TDG_VP_VMCALL_GET_QUOTE: u64 = 0x10002;
119 #[cfg(feature = "tdx")]
120 const TDG_VP_VMCALL_SETUP_EVENT_NOTIFY_INTERRUPT: u64 = 0x10004;
121 #[cfg(feature = "tdx")]
122 const TDG_VP_VMCALL_SUCCESS: u64 = 0;
123 #[cfg(feature = "tdx")]
124 const TDG_VP_VMCALL_INVALID_OPERAND: u64 = 0x8000000000000000;
125 
126 #[cfg(feature = "tdx")]
127 ioctl_iowr_nr!(KVM_MEMORY_ENCRYPT_OP, KVMIO, 0xba, std::os::raw::c_ulong);
128 
129 #[cfg(feature = "tdx")]
130 #[repr(u32)]
131 enum TdxCommand {
132     Capabilities = 0,
133     InitVm,
134     InitVcpu,
135     InitMemRegion,
136     Finalize,
137 }
138 
139 #[cfg(feature = "tdx")]
140 pub enum TdxExitDetails {
141     GetQuote,
142     SetupEventNotifyInterrupt,
143 }
144 
145 #[cfg(feature = "tdx")]
146 pub enum TdxExitStatus {
147     Success,
148     InvalidOperand,
149 }
150 
151 #[cfg(feature = "tdx")]
152 const TDX_MAX_NR_CPUID_CONFIGS: usize = 6;
153 
154 #[cfg(feature = "tdx")]
155 #[repr(C)]
156 #[derive(Debug, Default)]
157 pub struct TdxCpuidConfig {
158     pub leaf: u32,
159     pub sub_leaf: u32,
160     pub eax: u32,
161     pub ebx: u32,
162     pub ecx: u32,
163     pub edx: u32,
164 }
165 
166 #[cfg(feature = "tdx")]
167 #[repr(C)]
168 #[derive(Debug, Default)]
169 pub struct TdxCapabilities {
170     pub attrs_fixed0: u64,
171     pub attrs_fixed1: u64,
172     pub xfam_fixed0: u64,
173     pub xfam_fixed1: u64,
174     pub nr_cpuid_configs: u32,
175     pub padding: u32,
176     pub cpuid_configs: [TdxCpuidConfig; TDX_MAX_NR_CPUID_CONFIGS],
177 }
178 
179 #[cfg(feature = "tdx")]
180 #[derive(Copy, Clone)]
181 pub struct KvmTdxExit {
182     pub type_: u32,
183     pub pad: u32,
184     pub u: KvmTdxExitU,
185 }
186 
187 #[cfg(feature = "tdx")]
188 #[repr(C)]
189 #[derive(Copy, Clone)]
190 pub union KvmTdxExitU {
191     pub vmcall: KvmTdxExitVmcall,
192 }
193 
194 #[cfg(feature = "tdx")]
195 #[repr(C)]
196 #[derive(Debug, Default, Copy, Clone, PartialEq)]
197 pub struct KvmTdxExitVmcall {
198     pub type_: u64,
199     pub subfunction: u64,
200     pub reg_mask: u64,
201     pub in_r12: u64,
202     pub in_r13: u64,
203     pub in_r14: u64,
204     pub in_r15: u64,
205     pub in_rbx: u64,
206     pub in_rdi: u64,
207     pub in_rsi: u64,
208     pub in_r8: u64,
209     pub in_r9: u64,
210     pub in_rdx: u64,
211     pub status_code: u64,
212     pub out_r11: u64,
213     pub out_r12: u64,
214     pub out_r13: u64,
215     pub out_r14: u64,
216     pub out_r15: u64,
217     pub out_rbx: u64,
218     pub out_rdi: u64,
219     pub out_rsi: u64,
220     pub out_r8: u64,
221     pub out_r9: u64,
222     pub out_rdx: u64,
223 }
224 
225 impl From<kvm_userspace_memory_region> for UserMemoryRegion {
226     fn from(region: kvm_userspace_memory_region) -> Self {
227         let mut flags = USER_MEMORY_REGION_READ;
228         if region.flags & KVM_MEM_READONLY == 0 {
229             flags |= USER_MEMORY_REGION_WRITE;
230         }
231         if region.flags & KVM_MEM_LOG_DIRTY_PAGES != 0 {
232             flags |= USER_MEMORY_REGION_LOG_DIRTY;
233         }
234 
235         UserMemoryRegion {
236             slot: region.slot,
237             guest_phys_addr: region.guest_phys_addr,
238             memory_size: region.memory_size,
239             userspace_addr: region.userspace_addr,
240             flags,
241         }
242     }
243 }
244 
245 impl From<UserMemoryRegion> for kvm_userspace_memory_region {
246     fn from(region: UserMemoryRegion) -> Self {
247         assert!(
248             region.flags & USER_MEMORY_REGION_READ != 0,
249             "KVM mapped memory is always readable"
250         );
251 
252         let mut flags = 0;
253         if region.flags & USER_MEMORY_REGION_WRITE == 0 {
254             flags |= KVM_MEM_READONLY;
255         }
256         if region.flags & USER_MEMORY_REGION_LOG_DIRTY != 0 {
257             flags |= KVM_MEM_LOG_DIRTY_PAGES;
258         }
259 
260         kvm_userspace_memory_region {
261             slot: region.slot,
262             guest_phys_addr: region.guest_phys_addr,
263             memory_size: region.memory_size,
264             userspace_addr: region.userspace_addr,
265             flags,
266         }
267     }
268 }
269 
270 impl From<kvm_mp_state> for MpState {
271     fn from(s: kvm_mp_state) -> Self {
272         MpState::Kvm(s)
273     }
274 }
275 
276 impl From<MpState> for kvm_mp_state {
277     fn from(ms: MpState) -> Self {
278         match ms {
279             MpState::Kvm(s) => s,
280             /* Needed in case other hypervisors are enabled */
281             #[allow(unreachable_patterns)]
282             _ => panic!("CpuState is not valid"),
283         }
284     }
285 }
286 
287 impl From<kvm_ioctls::IoEventAddress> for IoEventAddress {
288     fn from(a: kvm_ioctls::IoEventAddress) -> Self {
289         match a {
290             kvm_ioctls::IoEventAddress::Pio(x) => Self::Pio(x),
291             kvm_ioctls::IoEventAddress::Mmio(x) => Self::Mmio(x),
292         }
293     }
294 }
295 
296 impl From<IoEventAddress> for kvm_ioctls::IoEventAddress {
297     fn from(a: IoEventAddress) -> Self {
298         match a {
299             IoEventAddress::Pio(x) => Self::Pio(x),
300             IoEventAddress::Mmio(x) => Self::Mmio(x),
301         }
302     }
303 }
304 
305 impl From<VcpuKvmState> for CpuState {
306     fn from(s: VcpuKvmState) -> Self {
307         CpuState::Kvm(s)
308     }
309 }
310 
311 impl From<CpuState> for VcpuKvmState {
312     fn from(s: CpuState) -> Self {
313         match s {
314             CpuState::Kvm(s) => s,
315             /* Needed in case other hypervisors are enabled */
316             #[allow(unreachable_patterns)]
317             _ => panic!("CpuState is not valid"),
318         }
319     }
320 }
321 
322 #[cfg(target_arch = "x86_64")]
323 impl From<kvm_clock_data> for ClockData {
324     fn from(d: kvm_clock_data) -> Self {
325         ClockData::Kvm(d)
326     }
327 }
328 
329 #[cfg(target_arch = "x86_64")]
330 impl From<ClockData> for kvm_clock_data {
331     fn from(ms: ClockData) -> Self {
332         match ms {
333             ClockData::Kvm(s) => s,
334             /* Needed in case other hypervisors are enabled */
335             #[allow(unreachable_patterns)]
336             _ => panic!("CpuState is not valid"),
337         }
338     }
339 }
340 
341 impl From<kvm_bindings::kvm_regs> for crate::StandardRegisters {
342     fn from(s: kvm_bindings::kvm_regs) -> Self {
343         crate::StandardRegisters::Kvm(s)
344     }
345 }
346 
347 impl From<crate::StandardRegisters> for kvm_bindings::kvm_regs {
348     fn from(e: crate::StandardRegisters) -> Self {
349         match e {
350             crate::StandardRegisters::Kvm(e) => e,
351             /* Needed in case other hypervisors are enabled */
352             #[allow(unreachable_patterns)]
353             _ => panic!("StandardRegisters are not valid"),
354         }
355     }
356 }
357 
358 impl From<kvm_irq_routing_entry> for IrqRoutingEntry {
359     fn from(s: kvm_irq_routing_entry) -> Self {
360         IrqRoutingEntry::Kvm(s)
361     }
362 }
363 
364 impl From<IrqRoutingEntry> for kvm_irq_routing_entry {
365     fn from(e: IrqRoutingEntry) -> Self {
366         match e {
367             IrqRoutingEntry::Kvm(e) => e,
368             /* Needed in case other hypervisors are enabled */
369             #[allow(unreachable_patterns)]
370             _ => panic!("IrqRoutingEntry is not valid"),
371         }
372     }
373 }
374 
375 struct KvmDirtyLogSlot {
376     slot: u32,
377     guest_phys_addr: u64,
378     memory_size: u64,
379     userspace_addr: u64,
380 }
381 
382 /// Wrapper over KVM VM ioctls.
383 pub struct KvmVm {
384     fd: Arc<VmFd>,
385     #[cfg(target_arch = "x86_64")]
386     msrs: Vec<MsrEntry>,
387     dirty_log_slots: Arc<RwLock<HashMap<u32, KvmDirtyLogSlot>>>,
388 }
389 
390 impl KvmVm {
391     ///
392     /// Creates an emulated device in the kernel.
393     ///
394     /// See the documentation for `KVM_CREATE_DEVICE`.
395     fn create_device(&self, device: &mut CreateDevice) -> vm::Result<vfio_ioctls::VfioDeviceFd> {
396         let device_fd = self
397             .fd
398             .create_device(device)
399             .map_err(|e| vm::HypervisorVmError::CreateDevice(e.into()))?;
400         Ok(VfioDeviceFd::new_from_kvm(device_fd))
401     }
402     /// Checks if a particular `Cap` is available.
403     pub fn check_extension(&self, c: Cap) -> bool {
404         self.fd.check_extension(c)
405     }
406 }
407 
408 /// Implementation of Vm trait for KVM
409 ///
410 /// # Examples
411 ///
412 /// ```
413 /// # use hypervisor::kvm::KvmHypervisor;
414 /// # use std::sync::Arc;
415 /// let kvm = KvmHypervisor::new().unwrap();
416 /// let hypervisor = Arc::new(kvm);
417 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed");
418 /// ```
419 impl vm::Vm for KvmVm {
420     #[cfg(target_arch = "x86_64")]
421     ///
422     /// Sets the address of the one-page region in the VM's address space.
423     ///
424     fn set_identity_map_address(&self, address: u64) -> vm::Result<()> {
425         self.fd
426             .set_identity_map_address(address)
427             .map_err(|e| vm::HypervisorVmError::SetIdentityMapAddress(e.into()))
428     }
429 
430     #[cfg(target_arch = "x86_64")]
431     ///
432     /// Sets the address of the three-page region in the VM's address space.
433     ///
434     fn set_tss_address(&self, offset: usize) -> vm::Result<()> {
435         self.fd
436             .set_tss_address(offset)
437             .map_err(|e| vm::HypervisorVmError::SetTssAddress(e.into()))
438     }
439 
440     ///
441     /// Creates an in-kernel interrupt controller.
442     ///
443     fn create_irq_chip(&self) -> vm::Result<()> {
444         self.fd
445             .create_irq_chip()
446             .map_err(|e| vm::HypervisorVmError::CreateIrq(e.into()))
447     }
448 
449     ///
450     /// Registers an event that will, when signaled, trigger the `gsi` IRQ.
451     ///
452     fn register_irqfd(&self, fd: &EventFd, gsi: u32) -> vm::Result<()> {
453         self.fd
454             .register_irqfd(fd, gsi)
455             .map_err(|e| vm::HypervisorVmError::RegisterIrqFd(e.into()))
456     }
457 
458     ///
459     /// Unregisters an event that will, when signaled, trigger the `gsi` IRQ.
460     ///
461     fn unregister_irqfd(&self, fd: &EventFd, gsi: u32) -> vm::Result<()> {
462         self.fd
463             .unregister_irqfd(fd, gsi)
464             .map_err(|e| vm::HypervisorVmError::UnregisterIrqFd(e.into()))
465     }
466 
467     ///
468     /// Creates a VcpuFd object from a vcpu RawFd.
469     ///
470     fn create_vcpu(
471         &self,
472         id: u8,
473         vm_ops: Option<Arc<dyn VmOps>>,
474     ) -> vm::Result<Arc<dyn cpu::Vcpu>> {
475         let fd = self
476             .fd
477             .create_vcpu(id as u64)
478             .map_err(|e| vm::HypervisorVmError::CreateVcpu(e.into()))?;
479         let vcpu = KvmVcpu {
480             fd: Arc::new(Mutex::new(fd)),
481             #[cfg(target_arch = "x86_64")]
482             msrs: self.msrs.clone(),
483             vm_ops,
484             #[cfg(target_arch = "x86_64")]
485             hyperv_synic: AtomicBool::new(false),
486         };
487         Ok(Arc::new(vcpu))
488     }
489 
490     #[cfg(target_arch = "aarch64")]
491     ///
492     /// Creates a virtual GIC device.
493     ///
494     fn create_vgic(&self, config: VgicConfig) -> vm::Result<Arc<Mutex<dyn Vgic>>> {
495         let gic_device = KvmGicV3Its::new(self, config)
496             .map_err(|e| vm::HypervisorVmError::CreateVgic(anyhow!("Vgic error {:?}", e)))?;
497         Ok(Arc::new(Mutex::new(gic_device)))
498     }
499 
500     ///
501     /// Registers an event to be signaled whenever a certain address is written to.
502     ///
503     fn register_ioevent(
504         &self,
505         fd: &EventFd,
506         addr: &IoEventAddress,
507         datamatch: Option<vm::DataMatch>,
508     ) -> vm::Result<()> {
509         let addr = &kvm_ioctls::IoEventAddress::from(*addr);
510         if let Some(dm) = datamatch {
511             match dm {
512                 vm::DataMatch::DataMatch32(kvm_dm32) => self
513                     .fd
514                     .register_ioevent(fd, addr, kvm_dm32)
515                     .map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into())),
516                 vm::DataMatch::DataMatch64(kvm_dm64) => self
517                     .fd
518                     .register_ioevent(fd, addr, kvm_dm64)
519                     .map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into())),
520             }
521         } else {
522             self.fd
523                 .register_ioevent(fd, addr, NoDatamatch)
524                 .map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into()))
525         }
526     }
527 
528     ///
529     /// Unregisters an event from a certain address it has been previously registered to.
530     ///
531     fn unregister_ioevent(&self, fd: &EventFd, addr: &IoEventAddress) -> vm::Result<()> {
532         let addr = &kvm_ioctls::IoEventAddress::from(*addr);
533         self.fd
534             .unregister_ioevent(fd, addr, NoDatamatch)
535             .map_err(|e| vm::HypervisorVmError::UnregisterIoEvent(e.into()))
536     }
537 
538     ///
539     /// Constructs a routing entry
540     ///
541     fn make_routing_entry(&self, gsi: u32, config: &InterruptSourceConfig) -> IrqRoutingEntry {
542         match &config {
543             InterruptSourceConfig::MsiIrq(cfg) => {
544                 let mut kvm_route = kvm_irq_routing_entry {
545                     gsi,
546                     type_: KVM_IRQ_ROUTING_MSI,
547                     ..Default::default()
548                 };
549 
550                 kvm_route.u.msi.address_lo = cfg.low_addr;
551                 kvm_route.u.msi.address_hi = cfg.high_addr;
552                 kvm_route.u.msi.data = cfg.data;
553 
554                 if self.check_extension(crate::kvm::Cap::MsiDevid) {
555                     // On AArch64, there is limitation on the range of the 'devid',
556                     // it cannot be greater than 65536 (the max of u16).
557                     //
558                     // BDF cannot be used directly, because 'segment' is in high
559                     // 16 bits. The layout of the u32 BDF is:
560                     // |---- 16 bits ----|-- 8 bits --|-- 5 bits --|-- 3 bits --|
561                     // |      segment    |     bus    |   device   |  function  |
562                     //
563                     // Now that we support 1 bus only in a segment, we can build a
564                     // 'devid' by replacing the 'bus' bits with the low 8 bits of
565                     // 'segment' data.
566                     // This way we can resolve the range checking problem and give
567                     // different `devid` to all the devices. Limitation is that at
568                     // most 256 segments can be supported.
569                     //
570                     let modified_devid = (cfg.devid & 0x00ff_0000) >> 8 | cfg.devid & 0xff;
571 
572                     kvm_route.flags = KVM_MSI_VALID_DEVID;
573                     kvm_route.u.msi.__bindgen_anon_1.devid = modified_devid;
574                 }
575                 kvm_route.into()
576             }
577             InterruptSourceConfig::LegacyIrq(cfg) => {
578                 let mut kvm_route = kvm_irq_routing_entry {
579                     gsi,
580                     type_: KVM_IRQ_ROUTING_IRQCHIP,
581                     ..Default::default()
582                 };
583                 kvm_route.u.irqchip.irqchip = cfg.irqchip;
584                 kvm_route.u.irqchip.pin = cfg.pin;
585 
586                 kvm_route.into()
587             }
588         }
589     }
590 
591     ///
592     /// Sets the GSI routing table entries, overwriting any previously set
593     /// entries, as per the `KVM_SET_GSI_ROUTING` ioctl.
594     ///
595     fn set_gsi_routing(&self, entries: &[IrqRoutingEntry]) -> vm::Result<()> {
596         let mut irq_routing =
597             vec_with_array_field::<kvm_irq_routing, kvm_irq_routing_entry>(entries.len());
598         irq_routing[0].nr = entries.len() as u32;
599         irq_routing[0].flags = 0;
600         let entries: Vec<kvm_irq_routing_entry> = entries
601             .iter()
602             .map(|entry| match entry {
603                 IrqRoutingEntry::Kvm(e) => *e,
604                 #[allow(unreachable_patterns)]
605                 _ => panic!("IrqRoutingEntry type is wrong"),
606             })
607             .collect();
608 
609         // SAFETY: irq_routing initialized with entries.len() and now it is being turned into
610         // entries_slice with entries.len() again. It is guaranteed to be large enough to hold
611         // everything from entries.
612         unsafe {
613             let entries_slice: &mut [kvm_irq_routing_entry] =
614                 irq_routing[0].entries.as_mut_slice(entries.len());
615             entries_slice.copy_from_slice(&entries);
616         }
617 
618         self.fd
619             .set_gsi_routing(&irq_routing[0])
620             .map_err(|e| vm::HypervisorVmError::SetGsiRouting(e.into()))
621     }
622 
623     ///
624     /// Creates a memory region structure that can be used with {create/remove}_user_memory_region
625     ///
626     fn make_user_memory_region(
627         &self,
628         slot: u32,
629         guest_phys_addr: u64,
630         memory_size: u64,
631         userspace_addr: u64,
632         readonly: bool,
633         log_dirty_pages: bool,
634     ) -> UserMemoryRegion {
635         kvm_userspace_memory_region {
636             slot,
637             guest_phys_addr,
638             memory_size,
639             userspace_addr,
640             flags: if readonly { KVM_MEM_READONLY } else { 0 }
641                 | if log_dirty_pages {
642                     KVM_MEM_LOG_DIRTY_PAGES
643                 } else {
644                     0
645                 },
646         }
647         .into()
648     }
649 
650     ///
651     /// Creates a guest physical memory region.
652     ///
653     fn create_user_memory_region(&self, user_memory_region: UserMemoryRegion) -> vm::Result<()> {
654         let mut region: kvm_userspace_memory_region = user_memory_region.into();
655 
656         if (region.flags & KVM_MEM_LOG_DIRTY_PAGES) != 0 {
657             if (region.flags & KVM_MEM_READONLY) != 0 {
658                 return Err(vm::HypervisorVmError::CreateUserMemory(anyhow!(
659                     "Error creating regions with both 'dirty-pages-log' and 'read-only'."
660                 )));
661             }
662 
663             // Keep track of the regions that need dirty pages log
664             self.dirty_log_slots.write().unwrap().insert(
665                 region.slot,
666                 KvmDirtyLogSlot {
667                     slot: region.slot,
668                     guest_phys_addr: region.guest_phys_addr,
669                     memory_size: region.memory_size,
670                     userspace_addr: region.userspace_addr,
671                 },
672             );
673 
674             // Always create guest physical memory region without `KVM_MEM_LOG_DIRTY_PAGES`.
675             // For regions that need this flag, dirty pages log will be turned on in `start_dirty_log`.
676             region.flags = 0;
677         }
678 
679         // SAFETY: Safe because guest regions are guaranteed not to overlap.
680         unsafe {
681             self.fd
682                 .set_user_memory_region(region)
683                 .map_err(|e| vm::HypervisorVmError::CreateUserMemory(e.into()))
684         }
685     }
686 
687     ///
688     /// Removes a guest physical memory region.
689     ///
690     fn remove_user_memory_region(&self, user_memory_region: UserMemoryRegion) -> vm::Result<()> {
691         let mut region: kvm_userspace_memory_region = user_memory_region.into();
692 
693         // Remove the corresponding entry from "self.dirty_log_slots" if needed
694         self.dirty_log_slots.write().unwrap().remove(&region.slot);
695 
696         // Setting the size to 0 means "remove"
697         region.memory_size = 0;
698         // SAFETY: Safe because guest regions are guaranteed not to overlap.
699         unsafe {
700             self.fd
701                 .set_user_memory_region(region)
702                 .map_err(|e| vm::HypervisorVmError::RemoveUserMemory(e.into()))
703         }
704     }
705 
706     ///
707     /// Returns the preferred CPU target type which can be emulated by KVM on underlying host.
708     ///
709     #[cfg(target_arch = "aarch64")]
710     fn get_preferred_target(&self, kvi: &mut VcpuInit) -> vm::Result<()> {
711         self.fd
712             .get_preferred_target(kvi)
713             .map_err(|e| vm::HypervisorVmError::GetPreferredTarget(e.into()))
714     }
715 
716     #[cfg(target_arch = "x86_64")]
717     fn enable_split_irq(&self) -> vm::Result<()> {
718         // Create split irqchip
719         // Only the local APIC is emulated in kernel, both PICs and IOAPIC
720         // are not.
721         let mut cap = kvm_enable_cap {
722             cap: KVM_CAP_SPLIT_IRQCHIP,
723             ..Default::default()
724         };
725         cap.args[0] = NUM_IOAPIC_PINS as u64;
726         self.fd
727             .enable_cap(&cap)
728             .map_err(|e| vm::HypervisorVmError::EnableSplitIrq(e.into()))?;
729         Ok(())
730     }
731 
732     #[cfg(target_arch = "x86_64")]
733     fn enable_sgx_attribute(&self, file: File) -> vm::Result<()> {
734         let mut cap = kvm_enable_cap {
735             cap: KVM_CAP_SGX_ATTRIBUTE,
736             ..Default::default()
737         };
738         cap.args[0] = file.as_raw_fd() as u64;
739         self.fd
740             .enable_cap(&cap)
741             .map_err(|e| vm::HypervisorVmError::EnableSgxAttribute(e.into()))?;
742         Ok(())
743     }
744 
745     /// Retrieve guest clock.
746     #[cfg(target_arch = "x86_64")]
747     fn get_clock(&self) -> vm::Result<ClockData> {
748         Ok(self
749             .fd
750             .get_clock()
751             .map_err(|e| vm::HypervisorVmError::GetClock(e.into()))?
752             .into())
753     }
754 
755     /// Set guest clock.
756     #[cfg(target_arch = "x86_64")]
757     fn set_clock(&self, data: &ClockData) -> vm::Result<()> {
758         let data = (*data).into();
759         self.fd
760             .set_clock(&data)
761             .map_err(|e| vm::HypervisorVmError::SetClock(e.into()))
762     }
763 
764     /// Create a device that is used for passthrough
765     fn create_passthrough_device(&self) -> vm::Result<VfioDeviceFd> {
766         let mut vfio_dev = kvm_create_device {
767             type_: kvm_device_type_KVM_DEV_TYPE_VFIO,
768             fd: 0,
769             flags: 0,
770         };
771 
772         self.create_device(&mut vfio_dev)
773             .map_err(|e| vm::HypervisorVmError::CreatePassthroughDevice(e.into()))
774     }
775 
776     ///
777     /// Start logging dirty pages
778     ///
779     fn start_dirty_log(&self) -> vm::Result<()> {
780         let dirty_log_slots = self.dirty_log_slots.read().unwrap();
781         for (_, s) in dirty_log_slots.iter() {
782             let region = kvm_userspace_memory_region {
783                 slot: s.slot,
784                 guest_phys_addr: s.guest_phys_addr,
785                 memory_size: s.memory_size,
786                 userspace_addr: s.userspace_addr,
787                 flags: KVM_MEM_LOG_DIRTY_PAGES,
788             };
789             // SAFETY: Safe because guest regions are guaranteed not to overlap.
790             unsafe {
791                 self.fd
792                     .set_user_memory_region(region)
793                     .map_err(|e| vm::HypervisorVmError::StartDirtyLog(e.into()))?;
794             }
795         }
796 
797         Ok(())
798     }
799 
800     ///
801     /// Stop logging dirty pages
802     ///
803     fn stop_dirty_log(&self) -> vm::Result<()> {
804         let dirty_log_slots = self.dirty_log_slots.read().unwrap();
805         for (_, s) in dirty_log_slots.iter() {
806             let region = kvm_userspace_memory_region {
807                 slot: s.slot,
808                 guest_phys_addr: s.guest_phys_addr,
809                 memory_size: s.memory_size,
810                 userspace_addr: s.userspace_addr,
811                 flags: 0,
812             };
813             // SAFETY: Safe because guest regions are guaranteed not to overlap.
814             unsafe {
815                 self.fd
816                     .set_user_memory_region(region)
817                     .map_err(|e| vm::HypervisorVmError::StartDirtyLog(e.into()))?;
818             }
819         }
820 
821         Ok(())
822     }
823 
824     ///
825     /// Get dirty pages bitmap (one bit per page)
826     ///
827     fn get_dirty_log(&self, slot: u32, _base_gpa: u64, memory_size: u64) -> vm::Result<Vec<u64>> {
828         self.fd
829             .get_dirty_log(slot, memory_size as usize)
830             .map_err(|e| vm::HypervisorVmError::GetDirtyLog(e.into()))
831     }
832 
833     ///
834     /// Initialize TDX for this VM
835     ///
836     #[cfg(feature = "tdx")]
837     fn tdx_init(&self, cpuid: &[CpuIdEntry], max_vcpus: u32) -> vm::Result<()> {
838         const TDX_ATTR_SEPT_VE_DISABLE: usize = 28;
839 
840         let mut cpuid: Vec<kvm_bindings::kvm_cpuid_entry2> =
841             cpuid.iter().map(|e| (*e).into()).collect();
842         cpuid.resize(256, kvm_bindings::kvm_cpuid_entry2::default());
843 
844         #[repr(C)]
845         struct TdxInitVm {
846             attributes: u64,
847             max_vcpus: u32,
848             padding: u32,
849             mrconfigid: [u64; 6],
850             mrowner: [u64; 6],
851             mrownerconfig: [u64; 6],
852             cpuid_nent: u32,
853             cpuid_padding: u32,
854             cpuid_entries: [kvm_bindings::kvm_cpuid_entry2; 256],
855         }
856         let data = TdxInitVm {
857             attributes: 1 << TDX_ATTR_SEPT_VE_DISABLE,
858             max_vcpus,
859             padding: 0,
860             mrconfigid: [0; 6],
861             mrowner: [0; 6],
862             mrownerconfig: [0; 6],
863             cpuid_nent: cpuid.len() as u32,
864             cpuid_padding: 0,
865             cpuid_entries: cpuid.as_slice().try_into().unwrap(),
866         };
867 
868         tdx_command(
869             &self.fd.as_raw_fd(),
870             TdxCommand::InitVm,
871             0,
872             &data as *const _ as u64,
873         )
874         .map_err(vm::HypervisorVmError::InitializeTdx)
875     }
876 
877     ///
878     /// Finalize the TDX setup for this VM
879     ///
880     #[cfg(feature = "tdx")]
881     fn tdx_finalize(&self) -> vm::Result<()> {
882         tdx_command(&self.fd.as_raw_fd(), TdxCommand::Finalize, 0, 0)
883             .map_err(vm::HypervisorVmError::FinalizeTdx)
884     }
885 
886     ///
887     /// Initialize memory regions for the TDX VM
888     ///
889     #[cfg(feature = "tdx")]
890     fn tdx_init_memory_region(
891         &self,
892         host_address: u64,
893         guest_address: u64,
894         size: u64,
895         measure: bool,
896     ) -> vm::Result<()> {
897         #[repr(C)]
898         struct TdxInitMemRegion {
899             host_address: u64,
900             guest_address: u64,
901             pages: u64,
902         }
903         let data = TdxInitMemRegion {
904             host_address,
905             guest_address,
906             pages: size / 4096,
907         };
908 
909         tdx_command(
910             &self.fd.as_raw_fd(),
911             TdxCommand::InitMemRegion,
912             u32::from(measure),
913             &data as *const _ as u64,
914         )
915         .map_err(vm::HypervisorVmError::InitMemRegionTdx)
916     }
917 
918     /// Downcast to the underlying KvmVm type
919     fn as_any(&self) -> &dyn Any {
920         self
921     }
922 }
923 
924 #[cfg(feature = "tdx")]
925 fn tdx_command(
926     fd: &RawFd,
927     command: TdxCommand,
928     flags: u32,
929     data: u64,
930 ) -> std::result::Result<(), std::io::Error> {
931     #[repr(C)]
932     struct TdxIoctlCmd {
933         command: TdxCommand,
934         flags: u32,
935         data: u64,
936         error: u64,
937         unused: u64,
938     }
939     let cmd = TdxIoctlCmd {
940         command,
941         flags,
942         data,
943         error: 0,
944         unused: 0,
945     };
946     // SAFETY: FFI call. All input parameters are valid.
947     let ret = unsafe {
948         ioctl_with_val(
949             fd,
950             KVM_MEMORY_ENCRYPT_OP(),
951             &cmd as *const TdxIoctlCmd as std::os::raw::c_ulong,
952         )
953     };
954 
955     if ret < 0 {
956         return Err(std::io::Error::last_os_error());
957     }
958     Ok(())
959 }
960 
961 /// Wrapper over KVM system ioctls.
962 pub struct KvmHypervisor {
963     kvm: Kvm,
964 }
965 
966 impl KvmHypervisor {
967     #[cfg(target_arch = "x86_64")]
968     ///
969     /// Retrieve the list of MSRs supported by the hypervisor.
970     ///
971     fn get_msr_list(&self) -> hypervisor::Result<MsrList> {
972         self.kvm
973             .get_msr_index_list()
974             .map_err(|e| hypervisor::HypervisorError::GetMsrList(e.into()))
975     }
976 }
977 
978 /// Enum for KVM related error
979 #[derive(Debug, Error)]
980 pub enum KvmError {
981     #[error("Capability missing: {0:?}")]
982     CapabilityMissing(Cap),
983 }
984 
985 pub type KvmResult<T> = result::Result<T, KvmError>;
986 
987 impl KvmHypervisor {
988     /// Create a hypervisor based on Kvm
989     #[allow(clippy::new_ret_no_self)]
990     pub fn new() -> hypervisor::Result<Arc<dyn hypervisor::Hypervisor>> {
991         let kvm_obj = Kvm::new().map_err(|e| hypervisor::HypervisorError::VmCreate(e.into()))?;
992         let api_version = kvm_obj.get_api_version();
993 
994         if api_version != kvm_bindings::KVM_API_VERSION as i32 {
995             return Err(hypervisor::HypervisorError::IncompatibleApiVersion);
996         }
997 
998         Ok(Arc::new(KvmHypervisor { kvm: kvm_obj }))
999     }
1000 
1001     /// Check if the hypervisor is available
1002     pub fn is_available() -> hypervisor::Result<bool> {
1003         match std::fs::metadata("/dev/kvm") {
1004             Ok(_) => Ok(true),
1005             Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(false),
1006             Err(err) => Err(hypervisor::HypervisorError::HypervisorAvailableCheck(
1007                 err.into(),
1008             )),
1009         }
1010     }
1011 }
1012 
1013 /// Implementation of Hypervisor trait for KVM
1014 ///
1015 /// # Examples
1016 ///
1017 /// ```
1018 /// # use hypervisor::kvm::KvmHypervisor;
1019 /// # use std::sync::Arc;
1020 /// let kvm = KvmHypervisor::new().unwrap();
1021 /// let hypervisor = Arc::new(kvm);
1022 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed");
1023 /// ```
1024 impl hypervisor::Hypervisor for KvmHypervisor {
1025     ///
1026     /// Returns the type of the hypervisor
1027     ///
1028     fn hypervisor_type(&self) -> HypervisorType {
1029         HypervisorType::Kvm
1030     }
1031 
1032     /// Create a KVM vm object of a specific VM type and return the object as Vm trait object
1033     ///
1034     /// # Examples
1035     ///
1036     /// ```
1037     /// # use hypervisor::kvm::KvmHypervisor;
1038     /// use hypervisor::kvm::KvmVm;
1039     /// let hypervisor = KvmHypervisor::new().unwrap();
1040     /// let vm = hypervisor.create_vm_with_type(0).unwrap();
1041     /// ```
1042     fn create_vm_with_type(&self, vm_type: u64) -> hypervisor::Result<Arc<dyn vm::Vm>> {
1043         let fd: VmFd;
1044         loop {
1045             match self.kvm.create_vm_with_type(vm_type) {
1046                 Ok(res) => fd = res,
1047                 Err(e) => {
1048                     if e.errno() == libc::EINTR {
1049                         // If the error returned is EINTR, which means the
1050                         // ioctl has been interrupted, we have to retry as
1051                         // this can't be considered as a regular error.
1052                         continue;
1053                     } else {
1054                         return Err(hypervisor::HypervisorError::VmCreate(e.into()));
1055                     }
1056                 }
1057             }
1058             break;
1059         }
1060 
1061         let vm_fd = Arc::new(fd);
1062 
1063         #[cfg(target_arch = "x86_64")]
1064         {
1065             let msr_list = self.get_msr_list()?;
1066             let num_msrs = msr_list.as_fam_struct_ref().nmsrs as usize;
1067             let mut msrs: Vec<MsrEntry> = vec![
1068                 MsrEntry {
1069                     ..Default::default()
1070                 };
1071                 num_msrs
1072             ];
1073             let indices = msr_list.as_slice();
1074             for (pos, index) in indices.iter().enumerate() {
1075                 msrs[pos].index = *index;
1076             }
1077 
1078             Ok(Arc::new(KvmVm {
1079                 fd: vm_fd,
1080                 msrs,
1081                 dirty_log_slots: Arc::new(RwLock::new(HashMap::new())),
1082             }))
1083         }
1084 
1085         #[cfg(target_arch = "aarch64")]
1086         {
1087             Ok(Arc::new(KvmVm {
1088                 fd: vm_fd,
1089                 dirty_log_slots: Arc::new(RwLock::new(HashMap::new())),
1090             }))
1091         }
1092     }
1093 
1094     /// Create a KVM vm object and return the object as Vm trait object
1095     ///
1096     /// # Examples
1097     ///
1098     /// ```
1099     /// # use hypervisor::kvm::KvmHypervisor;
1100     /// use hypervisor::kvm::KvmVm;
1101     /// let hypervisor = KvmHypervisor::new().unwrap();
1102     /// let vm = hypervisor.create_vm().unwrap();
1103     /// ```
1104     fn create_vm(&self) -> hypervisor::Result<Arc<dyn vm::Vm>> {
1105         #[allow(unused_mut)]
1106         let mut vm_type: u64 = 0; // Create with default platform type
1107 
1108         // When KVM supports Cap::ArmVmIPASize, it is better to get the IPA
1109         // size from the host and use that when creating the VM, which may
1110         // avoid unnecessary VM creation failures.
1111         #[cfg(target_arch = "aarch64")]
1112         if self.kvm.check_extension(Cap::ArmVmIPASize) {
1113             vm_type = self.kvm.get_host_ipa_limit().try_into().unwrap();
1114         }
1115 
1116         self.create_vm_with_type(vm_type)
1117     }
1118 
1119     fn check_required_extensions(&self) -> hypervisor::Result<()> {
1120         check_required_kvm_extensions(&self.kvm)
1121             .map_err(|e| hypervisor::HypervisorError::CheckExtensions(e.into()))
1122     }
1123 
1124     #[cfg(target_arch = "x86_64")]
1125     ///
1126     /// X86 specific call to get the system supported CPUID values.
1127     ///
1128     fn get_supported_cpuid(&self) -> hypervisor::Result<Vec<CpuIdEntry>> {
1129         let kvm_cpuid = self
1130             .kvm
1131             .get_supported_cpuid(kvm_bindings::KVM_MAX_CPUID_ENTRIES)
1132             .map_err(|e| hypervisor::HypervisorError::GetCpuId(e.into()))?;
1133 
1134         let v = kvm_cpuid.as_slice().iter().map(|e| (*e).into()).collect();
1135 
1136         Ok(v)
1137     }
1138 
1139     #[cfg(target_arch = "aarch64")]
1140     ///
1141     /// Retrieve AArch64 host maximum IPA size supported by KVM.
1142     ///
1143     fn get_host_ipa_limit(&self) -> i32 {
1144         self.kvm.get_host_ipa_limit()
1145     }
1146 
1147     ///
1148     /// Retrieve TDX capabilities
1149     ///
1150     #[cfg(feature = "tdx")]
1151     fn tdx_capabilities(&self) -> hypervisor::Result<TdxCapabilities> {
1152         let data = TdxCapabilities {
1153             nr_cpuid_configs: TDX_MAX_NR_CPUID_CONFIGS as u32,
1154             ..Default::default()
1155         };
1156 
1157         tdx_command(
1158             &self.kvm.as_raw_fd(),
1159             TdxCommand::Capabilities,
1160             0,
1161             &data as *const _ as u64,
1162         )
1163         .map_err(|e| hypervisor::HypervisorError::TdxCapabilities(e.into()))?;
1164 
1165         Ok(data)
1166     }
1167 
1168     ///
1169     /// Get the number of supported hardware breakpoints
1170     ///
1171     fn get_guest_debug_hw_bps(&self) -> usize {
1172         #[cfg(target_arch = "x86_64")]
1173         {
1174             4
1175         }
1176         #[cfg(target_arch = "aarch64")]
1177         {
1178             self.kvm.get_guest_debug_hw_bps() as usize
1179         }
1180     }
1181 
1182     /// Get maximum number of vCPUs
1183     fn get_max_vcpus(&self) -> u32 {
1184         self.kvm.get_max_vcpus().min(u32::MAX as usize) as u32
1185     }
1186 }
1187 
1188 /// Vcpu struct for KVM
1189 pub struct KvmVcpu {
1190     fd: Arc<Mutex<VcpuFd>>,
1191     #[cfg(target_arch = "x86_64")]
1192     msrs: Vec<MsrEntry>,
1193     vm_ops: Option<Arc<dyn vm::VmOps>>,
1194     #[cfg(target_arch = "x86_64")]
1195     hyperv_synic: AtomicBool,
1196 }
1197 
1198 /// Implementation of Vcpu trait for KVM
1199 ///
1200 /// # Examples
1201 ///
1202 /// ```
1203 /// # use hypervisor::kvm::KvmHypervisor;
1204 /// # use std::sync::Arc;
1205 /// let kvm = KvmHypervisor::new().unwrap();
1206 /// let hypervisor = Arc::new(kvm);
1207 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed");
1208 /// let vcpu = vm.create_vcpu(0, None).unwrap();
1209 /// ```
1210 impl cpu::Vcpu for KvmVcpu {
1211     ///
1212     /// Returns StandardRegisters with default value set
1213     ///
1214     #[cfg(target_arch = "x86_64")]
1215     fn create_standard_regs(&self) -> StandardRegisters {
1216         kvm_bindings::kvm_regs::default().into()
1217     }
1218     #[cfg(target_arch = "x86_64")]
1219     ///
1220     /// Returns the vCPU general purpose registers.
1221     ///
1222     fn get_regs(&self) -> cpu::Result<StandardRegisters> {
1223         Ok(self
1224             .fd
1225             .lock()
1226             .unwrap()
1227             .get_regs()
1228             .map_err(|e| cpu::HypervisorCpuError::GetStandardRegs(e.into()))?
1229             .into())
1230     }
1231 
1232     ///
1233     /// Returns the vCPU general purpose registers.
1234     /// The `KVM_GET_REGS` ioctl is not available on AArch64, `KVM_GET_ONE_REG`
1235     /// is used to get registers one by one.
1236     ///
1237     #[cfg(target_arch = "aarch64")]
1238     fn get_regs(&self) -> cpu::Result<StandardRegisters> {
1239         let mut state: StandardRegisters = kvm_regs::default();
1240         let mut off = offset_of!(user_pt_regs, regs);
1241         // There are 31 user_pt_regs:
1242         // https://elixir.free-electrons.com/linux/v4.14.174/source/arch/arm64/include/uapi/asm/ptrace.h#L72
1243         // These actually are the general-purpose registers of the Armv8-a
1244         // architecture (i.e x0-x30 if used as a 64bit register or w0-30 when used as a 32bit register).
1245         for i in 0..31 {
1246             let mut bytes = [0_u8; 8];
1247             self.fd
1248                 .lock()
1249                 .unwrap()
1250                 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), &mut bytes)
1251                 .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?;
1252             state.regs.regs[i] = u64::from_le_bytes(bytes);
1253             off += std::mem::size_of::<u64>();
1254         }
1255 
1256         // We are now entering the "Other register" section of the ARMv8-a architecture.
1257         // First one, stack pointer.
1258         let off = offset_of!(user_pt_regs, sp);
1259         let mut bytes = [0_u8; 8];
1260         self.fd
1261             .lock()
1262             .unwrap()
1263             .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), &mut bytes)
1264             .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?;
1265         state.regs.sp = u64::from_le_bytes(bytes);
1266 
1267         // Second one, the program counter.
1268         let off = offset_of!(user_pt_regs, pc);
1269         let mut bytes = [0_u8; 8];
1270         self.fd
1271             .lock()
1272             .unwrap()
1273             .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), &mut bytes)
1274             .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?;
1275         state.regs.pc = u64::from_le_bytes(bytes);
1276 
1277         // Next is the processor state.
1278         let off = offset_of!(user_pt_regs, pstate);
1279         let mut bytes = [0_u8; 8];
1280         self.fd
1281             .lock()
1282             .unwrap()
1283             .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), &mut bytes)
1284             .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?;
1285         state.regs.pstate = u64::from_le_bytes(bytes);
1286 
1287         // The stack pointer associated with EL1
1288         let off = offset_of!(kvm_regs, sp_el1);
1289         let mut bytes = [0_u8; 8];
1290         self.fd
1291             .lock()
1292             .unwrap()
1293             .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), &mut bytes)
1294             .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?;
1295         state.sp_el1 = u64::from_le_bytes(bytes);
1296 
1297         // Exception Link Register for EL1, when taking an exception to EL1, this register
1298         // holds the address to which to return afterwards.
1299         let off = offset_of!(kvm_regs, elr_el1);
1300         let mut bytes = [0_u8; 8];
1301         self.fd
1302             .lock()
1303             .unwrap()
1304             .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), &mut bytes)
1305             .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?;
1306         state.elr_el1 = u64::from_le_bytes(bytes);
1307 
1308         // Saved Program Status Registers, there are 5 of them used in the kernel.
1309         let mut off = offset_of!(kvm_regs, spsr);
1310         for i in 0..KVM_NR_SPSR as usize {
1311             let mut bytes = [0_u8; 8];
1312             self.fd
1313                 .lock()
1314                 .unwrap()
1315                 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), &mut bytes)
1316                 .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?;
1317             state.spsr[i] = u64::from_le_bytes(bytes);
1318             off += std::mem::size_of::<u64>();
1319         }
1320 
1321         // Now moving on to floating point registers which are stored in the user_fpsimd_state in the kernel:
1322         // https://elixir.free-electrons.com/linux/v4.9.62/source/arch/arm64/include/uapi/asm/kvm.h#L53
1323         let mut off = offset_of!(kvm_regs, fp_regs) + offset_of!(user_fpsimd_state, vregs);
1324         for i in 0..32 {
1325             let mut bytes = [0_u8; 16];
1326             self.fd
1327                 .lock()
1328                 .unwrap()
1329                 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U128, off), &mut bytes)
1330                 .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?;
1331             state.fp_regs.vregs[i] = u128::from_le_bytes(bytes);
1332             off += mem::size_of::<u128>();
1333         }
1334 
1335         // Floating-point Status Register
1336         let off = offset_of!(kvm_regs, fp_regs) + offset_of!(user_fpsimd_state, fpsr);
1337         let mut bytes = [0_u8; 4];
1338         self.fd
1339             .lock()
1340             .unwrap()
1341             .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U32, off), &mut bytes)
1342             .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?;
1343         state.fp_regs.fpsr = u32::from_le_bytes(bytes);
1344 
1345         // Floating-point Control Register
1346         let off = offset_of!(kvm_regs, fp_regs) + offset_of!(user_fpsimd_state, fpcr);
1347         let mut bytes = [0_u8; 4];
1348         self.fd
1349             .lock()
1350             .unwrap()
1351             .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U32, off), &mut bytes)
1352             .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?;
1353         state.fp_regs.fpcr = u32::from_le_bytes(bytes);
1354         Ok(state)
1355     }
1356 
1357     #[cfg(target_arch = "x86_64")]
1358     ///
1359     /// Sets the vCPU general purpose registers using the `KVM_SET_REGS` ioctl.
1360     ///
1361     fn set_regs(&self, regs: &StandardRegisters) -> cpu::Result<()> {
1362         let regs = (*regs).into();
1363         self.fd
1364             .lock()
1365             .unwrap()
1366             .set_regs(&regs)
1367             .map_err(|e| cpu::HypervisorCpuError::SetStandardRegs(e.into()))
1368     }
1369 
1370     ///
1371     /// Sets the vCPU general purpose registers.
1372     /// The `KVM_SET_REGS` ioctl is not available on AArch64, `KVM_SET_ONE_REG`
1373     /// is used to set registers one by one.
1374     ///
1375     #[cfg(target_arch = "aarch64")]
1376     fn set_regs(&self, state: &StandardRegisters) -> cpu::Result<()> {
1377         // The function follows the exact identical order from `state`. Look there
1378         // for some additional info on registers.
1379         let mut off = offset_of!(user_pt_regs, regs);
1380         for i in 0..31 {
1381             self.fd
1382                 .lock()
1383                 .unwrap()
1384                 .set_one_reg(
1385                     arm64_core_reg_id!(KVM_REG_SIZE_U64, off),
1386                     &state.regs.regs[i].to_le_bytes(),
1387                 )
1388                 .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?;
1389             off += std::mem::size_of::<u64>();
1390         }
1391 
1392         let off = offset_of!(user_pt_regs, sp);
1393         self.fd
1394             .lock()
1395             .unwrap()
1396             .set_one_reg(
1397                 arm64_core_reg_id!(KVM_REG_SIZE_U64, off),
1398                 &state.regs.sp.to_le_bytes(),
1399             )
1400             .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?;
1401 
1402         let off = offset_of!(user_pt_regs, pc);
1403         self.fd
1404             .lock()
1405             .unwrap()
1406             .set_one_reg(
1407                 arm64_core_reg_id!(KVM_REG_SIZE_U64, off),
1408                 &state.regs.pc.to_le_bytes(),
1409             )
1410             .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?;
1411 
1412         let off = offset_of!(user_pt_regs, pstate);
1413         self.fd
1414             .lock()
1415             .unwrap()
1416             .set_one_reg(
1417                 arm64_core_reg_id!(KVM_REG_SIZE_U64, off),
1418                 &state.regs.pstate.to_le_bytes(),
1419             )
1420             .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?;
1421 
1422         let off = offset_of!(kvm_regs, sp_el1);
1423         self.fd
1424             .lock()
1425             .unwrap()
1426             .set_one_reg(
1427                 arm64_core_reg_id!(KVM_REG_SIZE_U64, off),
1428                 &state.sp_el1.to_le_bytes(),
1429             )
1430             .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?;
1431 
1432         let off = offset_of!(kvm_regs, elr_el1);
1433         self.fd
1434             .lock()
1435             .unwrap()
1436             .set_one_reg(
1437                 arm64_core_reg_id!(KVM_REG_SIZE_U64, off),
1438                 &state.elr_el1.to_le_bytes(),
1439             )
1440             .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?;
1441 
1442         let mut off = offset_of!(kvm_regs, spsr);
1443         for i in 0..KVM_NR_SPSR as usize {
1444             self.fd
1445                 .lock()
1446                 .unwrap()
1447                 .set_one_reg(
1448                     arm64_core_reg_id!(KVM_REG_SIZE_U64, off),
1449                     &state.spsr[i].to_le_bytes(),
1450                 )
1451                 .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?;
1452             off += std::mem::size_of::<u64>();
1453         }
1454 
1455         let mut off = offset_of!(kvm_regs, fp_regs) + offset_of!(user_fpsimd_state, vregs);
1456         for i in 0..32 {
1457             self.fd
1458                 .lock()
1459                 .unwrap()
1460                 .set_one_reg(
1461                     arm64_core_reg_id!(KVM_REG_SIZE_U128, off),
1462                     &state.fp_regs.vregs[i].to_le_bytes(),
1463                 )
1464                 .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?;
1465             off += mem::size_of::<u128>();
1466         }
1467 
1468         let off = offset_of!(kvm_regs, fp_regs) + offset_of!(user_fpsimd_state, fpsr);
1469         self.fd
1470             .lock()
1471             .unwrap()
1472             .set_one_reg(
1473                 arm64_core_reg_id!(KVM_REG_SIZE_U32, off),
1474                 &state.fp_regs.fpsr.to_le_bytes(),
1475             )
1476             .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?;
1477 
1478         let off = offset_of!(kvm_regs, fp_regs) + offset_of!(user_fpsimd_state, fpcr);
1479         self.fd
1480             .lock()
1481             .unwrap()
1482             .set_one_reg(
1483                 arm64_core_reg_id!(KVM_REG_SIZE_U32, off),
1484                 &state.fp_regs.fpcr.to_le_bytes(),
1485             )
1486             .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?;
1487         Ok(())
1488     }
1489 
1490     #[cfg(target_arch = "x86_64")]
1491     ///
1492     /// Returns the vCPU special registers.
1493     ///
1494     fn get_sregs(&self) -> cpu::Result<SpecialRegisters> {
1495         Ok(self
1496             .fd
1497             .lock()
1498             .unwrap()
1499             .get_sregs()
1500             .map_err(|e| cpu::HypervisorCpuError::GetSpecialRegs(e.into()))?
1501             .into())
1502     }
1503 
1504     #[cfg(target_arch = "x86_64")]
1505     ///
1506     /// Sets the vCPU special registers using the `KVM_SET_SREGS` ioctl.
1507     ///
1508     fn set_sregs(&self, sregs: &SpecialRegisters) -> cpu::Result<()> {
1509         let sregs = (*sregs).into();
1510         self.fd
1511             .lock()
1512             .unwrap()
1513             .set_sregs(&sregs)
1514             .map_err(|e| cpu::HypervisorCpuError::SetSpecialRegs(e.into()))
1515     }
1516 
1517     #[cfg(target_arch = "x86_64")]
1518     ///
1519     /// Returns the floating point state (FPU) from the vCPU.
1520     ///
1521     fn get_fpu(&self) -> cpu::Result<FpuState> {
1522         Ok(self
1523             .fd
1524             .lock()
1525             .unwrap()
1526             .get_fpu()
1527             .map_err(|e| cpu::HypervisorCpuError::GetFloatingPointRegs(e.into()))?
1528             .into())
1529     }
1530 
1531     #[cfg(target_arch = "x86_64")]
1532     ///
1533     /// Set the floating point state (FPU) of a vCPU using the `KVM_SET_FPU` ioctl.
1534     ///
1535     fn set_fpu(&self, fpu: &FpuState) -> cpu::Result<()> {
1536         let fpu: kvm_bindings::kvm_fpu = (*fpu).clone().into();
1537         self.fd
1538             .lock()
1539             .unwrap()
1540             .set_fpu(&fpu)
1541             .map_err(|e| cpu::HypervisorCpuError::SetFloatingPointRegs(e.into()))
1542     }
1543 
1544     #[cfg(target_arch = "x86_64")]
1545     ///
1546     /// X86 specific call to setup the CPUID registers.
1547     ///
1548     fn set_cpuid2(&self, cpuid: &[CpuIdEntry]) -> cpu::Result<()> {
1549         let cpuid: Vec<kvm_bindings::kvm_cpuid_entry2> =
1550             cpuid.iter().map(|e| (*e).into()).collect();
1551         let kvm_cpuid = <CpuId>::from_entries(&cpuid)
1552             .map_err(|_| cpu::HypervisorCpuError::SetCpuid(anyhow!("failed to create CpuId")))?;
1553 
1554         self.fd
1555             .lock()
1556             .unwrap()
1557             .set_cpuid2(&kvm_cpuid)
1558             .map_err(|e| cpu::HypervisorCpuError::SetCpuid(e.into()))
1559     }
1560 
1561     #[cfg(target_arch = "x86_64")]
1562     ///
1563     /// X86 specific call to enable HyperV SynIC
1564     ///
1565     fn enable_hyperv_synic(&self) -> cpu::Result<()> {
1566         // Update the information about Hyper-V SynIC being enabled and
1567         // emulated as it will influence later which MSRs should be saved.
1568         self.hyperv_synic.store(true, Ordering::Release);
1569 
1570         let cap = kvm_enable_cap {
1571             cap: KVM_CAP_HYPERV_SYNIC,
1572             ..Default::default()
1573         };
1574         self.fd
1575             .lock()
1576             .unwrap()
1577             .enable_cap(&cap)
1578             .map_err(|e| cpu::HypervisorCpuError::EnableHyperVSyncIc(e.into()))
1579     }
1580 
1581     ///
1582     /// X86 specific call to retrieve the CPUID registers.
1583     ///
1584     #[cfg(target_arch = "x86_64")]
1585     fn get_cpuid2(&self, num_entries: usize) -> cpu::Result<Vec<CpuIdEntry>> {
1586         let kvm_cpuid = self
1587             .fd
1588             .lock()
1589             .unwrap()
1590             .get_cpuid2(num_entries)
1591             .map_err(|e| cpu::HypervisorCpuError::GetCpuid(e.into()))?;
1592 
1593         let v = kvm_cpuid.as_slice().iter().map(|e| (*e).into()).collect();
1594 
1595         Ok(v)
1596     }
1597 
1598     #[cfg(target_arch = "x86_64")]
1599     ///
1600     /// Returns the state of the LAPIC (Local Advanced Programmable Interrupt Controller).
1601     ///
1602     fn get_lapic(&self) -> cpu::Result<LapicState> {
1603         Ok(self
1604             .fd
1605             .lock()
1606             .unwrap()
1607             .get_lapic()
1608             .map_err(|e| cpu::HypervisorCpuError::GetlapicState(e.into()))?
1609             .into())
1610     }
1611 
1612     #[cfg(target_arch = "x86_64")]
1613     ///
1614     /// Sets the state of the LAPIC (Local Advanced Programmable Interrupt Controller).
1615     ///
1616     fn set_lapic(&self, klapic: &LapicState) -> cpu::Result<()> {
1617         let klapic: kvm_bindings::kvm_lapic_state = (*klapic).clone().into();
1618         self.fd
1619             .lock()
1620             .unwrap()
1621             .set_lapic(&klapic)
1622             .map_err(|e| cpu::HypervisorCpuError::SetLapicState(e.into()))
1623     }
1624 
1625     #[cfg(target_arch = "x86_64")]
1626     ///
1627     /// Returns the model-specific registers (MSR) for this vCPU.
1628     ///
1629     fn get_msrs(&self, msrs: &mut Vec<MsrEntry>) -> cpu::Result<usize> {
1630         let kvm_msrs: Vec<kvm_msr_entry> = msrs.iter().map(|e| (*e).into()).collect();
1631         let mut kvm_msrs = MsrEntries::from_entries(&kvm_msrs).unwrap();
1632         let succ = self
1633             .fd
1634             .lock()
1635             .unwrap()
1636             .get_msrs(&mut kvm_msrs)
1637             .map_err(|e| cpu::HypervisorCpuError::GetMsrEntries(e.into()))?;
1638 
1639         msrs[..succ].copy_from_slice(
1640             &kvm_msrs.as_slice()[..succ]
1641                 .iter()
1642                 .map(|e| (*e).into())
1643                 .collect::<Vec<MsrEntry>>(),
1644         );
1645 
1646         Ok(succ)
1647     }
1648 
1649     #[cfg(target_arch = "x86_64")]
1650     ///
1651     /// Setup the model-specific registers (MSR) for this vCPU.
1652     /// Returns the number of MSR entries actually written.
1653     ///
1654     fn set_msrs(&self, msrs: &[MsrEntry]) -> cpu::Result<usize> {
1655         let kvm_msrs: Vec<kvm_msr_entry> = msrs.iter().map(|e| (*e).into()).collect();
1656         let kvm_msrs = MsrEntries::from_entries(&kvm_msrs).unwrap();
1657         self.fd
1658             .lock()
1659             .unwrap()
1660             .set_msrs(&kvm_msrs)
1661             .map_err(|e| cpu::HypervisorCpuError::SetMsrEntries(e.into()))
1662     }
1663 
1664     ///
1665     /// Returns the vcpu's current "multiprocessing state".
1666     ///
1667     fn get_mp_state(&self) -> cpu::Result<MpState> {
1668         Ok(self
1669             .fd
1670             .lock()
1671             .unwrap()
1672             .get_mp_state()
1673             .map_err(|e| cpu::HypervisorCpuError::GetMpState(e.into()))?
1674             .into())
1675     }
1676 
1677     ///
1678     /// Sets the vcpu's current "multiprocessing state".
1679     ///
1680     fn set_mp_state(&self, mp_state: MpState) -> cpu::Result<()> {
1681         self.fd
1682             .lock()
1683             .unwrap()
1684             .set_mp_state(mp_state.into())
1685             .map_err(|e| cpu::HypervisorCpuError::SetMpState(e.into()))
1686     }
1687 
1688     #[cfg(target_arch = "x86_64")]
1689     ///
1690     /// Translates guest virtual address to guest physical address using the `KVM_TRANSLATE` ioctl.
1691     ///
1692     fn translate_gva(&self, gva: u64, _flags: u64) -> cpu::Result<(u64, u32)> {
1693         let tr = self
1694             .fd
1695             .lock()
1696             .unwrap()
1697             .translate_gva(gva)
1698             .map_err(|e| cpu::HypervisorCpuError::TranslateVirtualAddress(e.into()))?;
1699         // tr.valid is set if the GVA is mapped to valid GPA.
1700         match tr.valid {
1701             0 => Err(cpu::HypervisorCpuError::TranslateVirtualAddress(anyhow!(
1702                 "Invalid GVA: {:#x}",
1703                 gva
1704             ))),
1705             _ => Ok((tr.physical_address, 0)),
1706         }
1707     }
1708 
1709     ///
1710     /// Triggers the running of the current virtual CPU returning an exit reason.
1711     ///
1712     fn run(&self) -> std::result::Result<cpu::VmExit, cpu::HypervisorCpuError> {
1713         match self.fd.lock().unwrap().run() {
1714             Ok(run) => match run {
1715                 #[cfg(target_arch = "x86_64")]
1716                 VcpuExit::IoIn(addr, data) => {
1717                     if let Some(vm_ops) = &self.vm_ops {
1718                         return vm_ops
1719                             .pio_read(addr.into(), data)
1720                             .map(|_| cpu::VmExit::Ignore)
1721                             .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()));
1722                     }
1723 
1724                     Ok(cpu::VmExit::Ignore)
1725                 }
1726                 #[cfg(target_arch = "x86_64")]
1727                 VcpuExit::IoOut(addr, data) => {
1728                     if let Some(vm_ops) = &self.vm_ops {
1729                         return vm_ops
1730                             .pio_write(addr.into(), data)
1731                             .map(|_| cpu::VmExit::Ignore)
1732                             .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()));
1733                     }
1734 
1735                     Ok(cpu::VmExit::Ignore)
1736                 }
1737                 #[cfg(target_arch = "x86_64")]
1738                 VcpuExit::IoapicEoi(vector) => Ok(cpu::VmExit::IoapicEoi(vector)),
1739                 #[cfg(target_arch = "x86_64")]
1740                 VcpuExit::Shutdown | VcpuExit::Hlt => Ok(cpu::VmExit::Reset),
1741 
1742                 #[cfg(target_arch = "aarch64")]
1743                 VcpuExit::SystemEvent(event_type, flags) => {
1744                     use kvm_bindings::{KVM_SYSTEM_EVENT_RESET, KVM_SYSTEM_EVENT_SHUTDOWN};
1745                     // On Aarch64, when the VM is shutdown, run() returns
1746                     // VcpuExit::SystemEvent with reason KVM_SYSTEM_EVENT_SHUTDOWN
1747                     if event_type == KVM_SYSTEM_EVENT_RESET {
1748                         Ok(cpu::VmExit::Reset)
1749                     } else if event_type == KVM_SYSTEM_EVENT_SHUTDOWN {
1750                         Ok(cpu::VmExit::Shutdown)
1751                     } else {
1752                         Err(cpu::HypervisorCpuError::RunVcpu(anyhow!(
1753                             "Unexpected system event with type 0x{:x}, flags 0x{:x?}",
1754                             event_type,
1755                             flags
1756                         )))
1757                     }
1758                 }
1759 
1760                 VcpuExit::MmioRead(addr, data) => {
1761                     if let Some(vm_ops) = &self.vm_ops {
1762                         return vm_ops
1763                             .mmio_read(addr, data)
1764                             .map(|_| cpu::VmExit::Ignore)
1765                             .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()));
1766                     }
1767 
1768                     Ok(cpu::VmExit::Ignore)
1769                 }
1770                 VcpuExit::MmioWrite(addr, data) => {
1771                     if let Some(vm_ops) = &self.vm_ops {
1772                         return vm_ops
1773                             .mmio_write(addr, data)
1774                             .map(|_| cpu::VmExit::Ignore)
1775                             .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()));
1776                     }
1777 
1778                     Ok(cpu::VmExit::Ignore)
1779                 }
1780                 VcpuExit::Hyperv => Ok(cpu::VmExit::Hyperv),
1781                 #[cfg(feature = "tdx")]
1782                 VcpuExit::Unsupported(KVM_EXIT_TDX) => Ok(cpu::VmExit::Tdx),
1783                 VcpuExit::Debug(_) => Ok(cpu::VmExit::Debug),
1784 
1785                 r => Err(cpu::HypervisorCpuError::RunVcpu(anyhow!(
1786                     "Unexpected exit reason on vcpu run: {:?}",
1787                     r
1788                 ))),
1789             },
1790 
1791             Err(ref e) => match e.errno() {
1792                 libc::EAGAIN | libc::EINTR => Ok(cpu::VmExit::Ignore),
1793                 _ => Err(cpu::HypervisorCpuError::RunVcpu(anyhow!(
1794                     "VCPU error {:?}",
1795                     e
1796                 ))),
1797             },
1798         }
1799     }
1800 
1801     #[cfg(target_arch = "x86_64")]
1802     ///
1803     /// Let the guest know that it has been paused, which prevents from
1804     /// potential soft lockups when being resumed.
1805     ///
1806     fn notify_guest_clock_paused(&self) -> cpu::Result<()> {
1807         if let Err(e) = self.fd.lock().unwrap().kvmclock_ctrl() {
1808             // Linux kernel returns -EINVAL if the PV clock isn't yet initialised
1809             // which could be because we're still in firmware or the guest doesn't
1810             // use KVM clock.
1811             if e.errno() != libc::EINVAL {
1812                 return Err(cpu::HypervisorCpuError::NotifyGuestClockPaused(e.into()));
1813             }
1814         }
1815 
1816         Ok(())
1817     }
1818 
1819     ///
1820     /// Sets debug registers to set hardware breakpoints and/or enable single step.
1821     ///
1822     fn set_guest_debug(
1823         &self,
1824         addrs: &[vm_memory::GuestAddress],
1825         singlestep: bool,
1826     ) -> cpu::Result<()> {
1827         let mut dbg = kvm_guest_debug {
1828             #[cfg(target_arch = "x86_64")]
1829             control: KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP,
1830             #[cfg(target_arch = "aarch64")]
1831             control: KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW,
1832             ..Default::default()
1833         };
1834         if singlestep {
1835             dbg.control |= KVM_GUESTDBG_SINGLESTEP;
1836         }
1837 
1838         // Set the debug registers.
1839         // Here we assume that the number of addresses do not exceed what
1840         // `Hypervisor::get_guest_debug_hw_bps()` specifies.
1841         #[cfg(target_arch = "x86_64")]
1842         {
1843             // Set bits 9 and 10.
1844             // bit 9: GE (global exact breakpoint enable) flag.
1845             // bit 10: always 1.
1846             dbg.arch.debugreg[7] = 0x0600;
1847 
1848             for (i, addr) in addrs.iter().enumerate() {
1849                 dbg.arch.debugreg[i] = addr.0;
1850                 // Set global breakpoint enable flag
1851                 dbg.arch.debugreg[7] |= 2 << (i * 2);
1852             }
1853         }
1854         #[cfg(target_arch = "aarch64")]
1855         {
1856             for (i, addr) in addrs.iter().enumerate() {
1857                 // DBGBCR_EL1 (Debug Breakpoint Control Registers, D13.3.2):
1858                 // bit 0: 1 (Enabled)
1859                 // bit 1~2: 0b11 (PMC = EL1/EL0)
1860                 // bit 5~8: 0b1111 (BAS = AArch64)
1861                 // others: 0
1862                 dbg.arch.dbg_bcr[i] = 0b1u64 | 0b110u64 | 0b1_1110_0000u64;
1863                 // DBGBVR_EL1 (Debug Breakpoint Value Registers, D13.3.3):
1864                 // bit 2~52: VA[2:52]
1865                 dbg.arch.dbg_bvr[i] = (!0u64 >> 11) & addr.0;
1866             }
1867         }
1868         self.fd
1869             .lock()
1870             .unwrap()
1871             .set_guest_debug(&dbg)
1872             .map_err(|e| cpu::HypervisorCpuError::SetDebugRegs(e.into()))
1873     }
1874 
1875     #[cfg(target_arch = "aarch64")]
1876     fn vcpu_init(&self, kvi: &VcpuInit) -> cpu::Result<()> {
1877         self.fd
1878             .lock()
1879             .unwrap()
1880             .vcpu_init(kvi)
1881             .map_err(|e| cpu::HypervisorCpuError::VcpuInit(e.into()))
1882     }
1883 
1884     ///
1885     /// Gets a list of the guest registers that are supported for the
1886     /// KVM_GET_ONE_REG/KVM_SET_ONE_REG calls.
1887     ///
1888     #[cfg(target_arch = "aarch64")]
1889     fn get_reg_list(&self, reg_list: &mut RegList) -> cpu::Result<()> {
1890         self.fd
1891             .lock()
1892             .unwrap()
1893             .get_reg_list(reg_list)
1894             .map_err(|e| cpu::HypervisorCpuError::GetRegList(e.into()))
1895     }
1896 
1897     ///
1898     /// Gets the value of a system register
1899     ///
1900     #[cfg(target_arch = "aarch64")]
1901     fn get_sys_reg(&self, sys_reg: u32) -> cpu::Result<u64> {
1902         //
1903         // Arm Architecture Reference Manual defines the encoding of
1904         // AArch64 system registers, see
1905         // https://developer.arm.com/documentation/ddi0487 (chapter D12).
1906         // While KVM defines another ID for each AArch64 system register,
1907         // which is used in calling `KVM_G/SET_ONE_REG` to access a system
1908         // register of a guest.
1909         // A mapping exists between the Arm standard encoding and the KVM ID.
1910         // This function takes the standard u32 ID as input parameter, converts
1911         // it to the corresponding KVM ID, and call `KVM_GET_ONE_REG` API to
1912         // get the value of the system parameter.
1913         //
1914         let id: u64 = KVM_REG_ARM64
1915             | KVM_REG_SIZE_U64
1916             | KVM_REG_ARM64_SYSREG as u64
1917             | ((((sys_reg) >> 5)
1918                 & (KVM_REG_ARM64_SYSREG_OP0_MASK
1919                     | KVM_REG_ARM64_SYSREG_OP1_MASK
1920                     | KVM_REG_ARM64_SYSREG_CRN_MASK
1921                     | KVM_REG_ARM64_SYSREG_CRM_MASK
1922                     | KVM_REG_ARM64_SYSREG_OP2_MASK)) as u64);
1923         let mut bytes = [0_u8; 8];
1924         self.fd
1925             .lock()
1926             .unwrap()
1927             .get_one_reg(id, &mut bytes)
1928             .map_err(|e| cpu::HypervisorCpuError::GetSysRegister(e.into()))?;
1929         Ok(u64::from_le_bytes(bytes))
1930     }
1931 
1932     ///
1933     /// Configure core registers for a given CPU.
1934     ///
1935     #[cfg(target_arch = "aarch64")]
1936     fn setup_regs(&self, cpu_id: u8, boot_ip: u64, fdt_start: u64) -> cpu::Result<()> {
1937         #[allow(non_upper_case_globals)]
1938         // PSR (Processor State Register) bits.
1939         // Taken from arch/arm64/include/uapi/asm/ptrace.h.
1940         const PSR_MODE_EL1h: u64 = 0x0000_0005;
1941         const PSR_F_BIT: u64 = 0x0000_0040;
1942         const PSR_I_BIT: u64 = 0x0000_0080;
1943         const PSR_A_BIT: u64 = 0x0000_0100;
1944         const PSR_D_BIT: u64 = 0x0000_0200;
1945         // Taken from arch/arm64/kvm/inject_fault.c.
1946         const PSTATE_FAULT_BITS_64: u64 =
1947             PSR_MODE_EL1h | PSR_A_BIT | PSR_F_BIT | PSR_I_BIT | PSR_D_BIT;
1948 
1949         let kreg_off = offset_of!(kvm_regs, regs);
1950 
1951         // Get the register index of the PSTATE (Processor State) register.
1952         let pstate = offset_of!(user_pt_regs, pstate) + kreg_off;
1953         self.fd
1954             .lock()
1955             .unwrap()
1956             .set_one_reg(
1957                 arm64_core_reg_id!(KVM_REG_SIZE_U64, pstate),
1958                 &PSTATE_FAULT_BITS_64.to_le_bytes(),
1959             )
1960             .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?;
1961 
1962         // Other vCPUs are powered off initially awaiting PSCI wakeup.
1963         if cpu_id == 0 {
1964             // Setting the PC (Processor Counter) to the current program address (kernel address).
1965             let pc = offset_of!(user_pt_regs, pc) + kreg_off;
1966             self.fd
1967                 .lock()
1968                 .unwrap()
1969                 .set_one_reg(
1970                     arm64_core_reg_id!(KVM_REG_SIZE_U64, pc),
1971                     &boot_ip.to_le_bytes(),
1972                 )
1973                 .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?;
1974 
1975             // Last mandatory thing to set -> the address pointing to the FDT (also called DTB).
1976             // "The device tree blob (dtb) must be placed on an 8-byte boundary and must
1977             // not exceed 2 megabytes in size." -> https://www.kernel.org/doc/Documentation/arm64/booting.txt.
1978             // We are choosing to place it the end of DRAM. See `get_fdt_addr`.
1979             let regs0 = offset_of!(user_pt_regs, regs) + kreg_off;
1980             self.fd
1981                 .lock()
1982                 .unwrap()
1983                 .set_one_reg(
1984                     arm64_core_reg_id!(KVM_REG_SIZE_U64, regs0),
1985                     &fdt_start.to_le_bytes(),
1986                 )
1987                 .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?;
1988         }
1989         Ok(())
1990     }
1991 
1992     #[cfg(target_arch = "x86_64")]
1993     ///
1994     /// Get the current CPU state
1995     ///
1996     /// Ordering requirements:
1997     ///
1998     /// KVM_GET_MP_STATE calls kvm_apic_accept_events(), which might modify
1999     /// vCPU/LAPIC state. As such, it must be done before most everything
2000     /// else, otherwise we cannot restore everything and expect it to work.
2001     ///
2002     /// KVM_GET_VCPU_EVENTS/KVM_SET_VCPU_EVENTS is unsafe if other vCPUs are
2003     /// still running.
2004     ///
2005     /// KVM_GET_LAPIC may change state of LAPIC before returning it.
2006     ///
2007     /// GET_VCPU_EVENTS should probably be last to save. The code looks as
2008     /// it might as well be affected by internal state modifications of the
2009     /// GET ioctls.
2010     ///
2011     /// SREGS saves/restores a pending interrupt, similar to what
2012     /// VCPU_EVENTS also does.
2013     ///
2014     /// GET_MSRS requires a prepopulated data structure to do something
2015     /// meaningful. For SET_MSRS it will then contain good data.
2016     ///
2017     /// # Example
2018     ///
2019     /// ```rust
2020     /// # use hypervisor::kvm::KvmHypervisor;
2021     /// # use std::sync::Arc;
2022     /// let kvm = KvmHypervisor::new().unwrap();
2023     /// let hv = Arc::new(kvm);
2024     /// let vm = hv.create_vm().expect("new VM fd creation failed");
2025     /// vm.enable_split_irq().unwrap();
2026     /// let vcpu = vm.create_vcpu(0, None).unwrap();
2027     /// let state = vcpu.state().unwrap();
2028     /// ```
2029     fn state(&self) -> cpu::Result<CpuState> {
2030         let cpuid = self.get_cpuid2(kvm_bindings::KVM_MAX_CPUID_ENTRIES)?;
2031         let mp_state = self.get_mp_state()?.into();
2032         let regs = self.get_regs()?;
2033         let sregs = self.get_sregs()?;
2034         let xsave = self.get_xsave()?;
2035         let xcrs = self.get_xcrs()?;
2036         let lapic_state = self.get_lapic()?;
2037         let fpu = self.get_fpu()?;
2038 
2039         // Try to get all MSRs based on the list previously retrieved from KVM.
2040         // If the number of MSRs obtained from GET_MSRS is different from the
2041         // expected amount, we fallback onto a slower method by getting MSRs
2042         // by chunks. This is the only way to make sure we try to get as many
2043         // MSRs as possible, even if some MSRs are not supported.
2044         let mut msr_entries = self.msrs.clone();
2045 
2046         // Save extra MSRs if the Hyper-V synthetic interrupt controller is
2047         // emulated.
2048         if self.hyperv_synic.load(Ordering::Acquire) {
2049             let hyperv_synic_msrs = vec![
2050                 0x40000020, 0x40000021, 0x40000080, 0x40000081, 0x40000082, 0x40000083, 0x40000084,
2051                 0x40000090, 0x40000091, 0x40000092, 0x40000093, 0x40000094, 0x40000095, 0x40000096,
2052                 0x40000097, 0x40000098, 0x40000099, 0x4000009a, 0x4000009b, 0x4000009c, 0x4000009d,
2053                 0x4000009e, 0x4000009f, 0x400000b0, 0x400000b1, 0x400000b2, 0x400000b3, 0x400000b4,
2054                 0x400000b5, 0x400000b6, 0x400000b7,
2055             ];
2056             for index in hyperv_synic_msrs {
2057                 let msr = kvm_msr_entry {
2058                     index,
2059                     ..Default::default()
2060                 };
2061                 msr_entries.push(msr.into());
2062             }
2063         }
2064 
2065         let expected_num_msrs = msr_entries.len();
2066         let num_msrs = self.get_msrs(&mut msr_entries)?;
2067         let msrs = if num_msrs != expected_num_msrs {
2068             let mut faulty_msr_index = num_msrs;
2069             let mut msr_entries_tmp = msr_entries[..faulty_msr_index].to_vec();
2070 
2071             loop {
2072                 warn!(
2073                     "Detected faulty MSR 0x{:x} while getting MSRs",
2074                     msr_entries[faulty_msr_index].index
2075                 );
2076 
2077                 // Skip the first bad MSR
2078                 let start_pos = faulty_msr_index + 1;
2079 
2080                 let mut sub_msr_entries = msr_entries[start_pos..].to_vec();
2081                 let num_msrs = self.get_msrs(&mut sub_msr_entries)?;
2082 
2083                 msr_entries_tmp.extend(&sub_msr_entries[..num_msrs]);
2084 
2085                 if num_msrs == sub_msr_entries.len() {
2086                     break;
2087                 }
2088 
2089                 faulty_msr_index = start_pos + num_msrs;
2090             }
2091 
2092             msr_entries_tmp
2093         } else {
2094             msr_entries
2095         };
2096 
2097         let vcpu_events = self.get_vcpu_events()?;
2098         let tsc_khz = self.tsc_khz()?;
2099 
2100         Ok(VcpuKvmState {
2101             cpuid,
2102             msrs,
2103             vcpu_events,
2104             regs: regs.into(),
2105             sregs: sregs.into(),
2106             fpu,
2107             lapic_state,
2108             xsave,
2109             xcrs,
2110             mp_state,
2111             tsc_khz,
2112         }
2113         .into())
2114     }
2115 
2116     ///
2117     /// Get the current AArch64 CPU state
2118     ///
2119     #[cfg(target_arch = "aarch64")]
2120     fn state(&self) -> cpu::Result<CpuState> {
2121         let mut state = VcpuKvmState {
2122             mp_state: self.get_mp_state()?.into(),
2123             ..Default::default()
2124         };
2125         // Get core registers
2126         state.core_regs = self.get_regs()?;
2127 
2128         // Get systerm register
2129         // Call KVM_GET_REG_LIST to get all registers available to the guest.
2130         // For ArmV8 there are around 500 registers.
2131         let mut sys_regs: Vec<Register> = Vec::new();
2132         let mut reg_list = RegList::new(500).unwrap();
2133         self.fd
2134             .lock()
2135             .unwrap()
2136             .get_reg_list(&mut reg_list)
2137             .map_err(|e| cpu::HypervisorCpuError::GetRegList(e.into()))?;
2138 
2139         // At this point reg_list should contain: core registers and system
2140         // registers.
2141         // The register list contains the number of registers and their ids. We
2142         // will be needing to call KVM_GET_ONE_REG on each id in order to save
2143         // all of them. We carve out from the list  the core registers which are
2144         // represented in the kernel by kvm_regs structure and for which we can
2145         // calculate the id based on the offset in the structure.
2146         reg_list.retain(|regid| is_system_register(*regid));
2147 
2148         // Now, for the rest of the registers left in the previously fetched
2149         // register list, we are simply calling KVM_GET_ONE_REG.
2150         let indices = reg_list.as_slice();
2151         for index in indices.iter() {
2152             let mut bytes = [0_u8; 8];
2153             self.fd
2154                 .lock()
2155                 .unwrap()
2156                 .get_one_reg(*index, &mut bytes)
2157                 .map_err(|e| cpu::HypervisorCpuError::GetSysRegister(e.into()))?;
2158             sys_regs.push(kvm_bindings::kvm_one_reg {
2159                 id: *index,
2160                 addr: u64::from_le_bytes(bytes),
2161             });
2162         }
2163 
2164         state.sys_regs = sys_regs;
2165 
2166         Ok(state.into())
2167     }
2168 
2169     #[cfg(target_arch = "x86_64")]
2170     ///
2171     /// Restore the previously saved CPU state
2172     ///
2173     /// Ordering requirements:
2174     ///
2175     /// KVM_GET_VCPU_EVENTS/KVM_SET_VCPU_EVENTS is unsafe if other vCPUs are
2176     /// still running.
2177     ///
2178     /// Some SET ioctls (like set_mp_state) depend on kvm_vcpu_is_bsp(), so
2179     /// if we ever change the BSP, we have to do that before restoring anything.
2180     /// The same seems to be true for CPUID stuff.
2181     ///
2182     /// SREGS saves/restores a pending interrupt, similar to what
2183     /// VCPU_EVENTS also does.
2184     ///
2185     /// SET_REGS clears pending exceptions unconditionally, thus, it must be
2186     /// done before SET_VCPU_EVENTS, which restores it.
2187     ///
2188     /// SET_LAPIC must come after SET_SREGS, because the latter restores
2189     /// the apic base msr.
2190     ///
2191     /// SET_LAPIC must come before SET_MSRS, because the TSC deadline MSR
2192     /// only restores successfully, when the LAPIC is correctly configured.
2193     ///
2194     /// Arguments: CpuState
2195     /// # Example
2196     ///
2197     /// ```rust
2198     /// # use hypervisor::kvm::KvmHypervisor;
2199     /// # use std::sync::Arc;
2200     /// let kvm = KvmHypervisor::new().unwrap();
2201     /// let hv = Arc::new(kvm);
2202     /// let vm = hv.create_vm().expect("new VM fd creation failed");
2203     /// vm.enable_split_irq().unwrap();
2204     /// let vcpu = vm.create_vcpu(0, None).unwrap();
2205     /// let state = vcpu.state().unwrap();
2206     /// vcpu.set_state(&state).unwrap();
2207     /// ```
2208     fn set_state(&self, state: &CpuState) -> cpu::Result<()> {
2209         let state: VcpuKvmState = state.clone().into();
2210         self.set_cpuid2(&state.cpuid)?;
2211         self.set_mp_state(state.mp_state.into())?;
2212         self.set_regs(&state.regs.into())?;
2213         self.set_sregs(&state.sregs.into())?;
2214         self.set_xsave(&state.xsave)?;
2215         self.set_xcrs(&state.xcrs)?;
2216         self.set_lapic(&state.lapic_state)?;
2217         self.set_fpu(&state.fpu)?;
2218 
2219         if let Some(freq) = state.tsc_khz {
2220             self.set_tsc_khz(freq)?;
2221         }
2222 
2223         // Try to set all MSRs previously stored.
2224         // If the number of MSRs set from SET_MSRS is different from the
2225         // expected amount, we fallback onto a slower method by setting MSRs
2226         // by chunks. This is the only way to make sure we try to set as many
2227         // MSRs as possible, even if some MSRs are not supported.
2228         let expected_num_msrs = state.msrs.len();
2229         let num_msrs = self.set_msrs(&state.msrs)?;
2230         if num_msrs != expected_num_msrs {
2231             let mut faulty_msr_index = num_msrs;
2232 
2233             loop {
2234                 warn!(
2235                     "Detected faulty MSR 0x{:x} while setting MSRs",
2236                     state.msrs[faulty_msr_index].index
2237                 );
2238 
2239                 // Skip the first bad MSR
2240                 let start_pos = faulty_msr_index + 1;
2241 
2242                 let sub_msr_entries = state.msrs[start_pos..].to_vec();
2243 
2244                 let num_msrs = self.set_msrs(&sub_msr_entries)?;
2245 
2246                 if num_msrs == sub_msr_entries.len() {
2247                     break;
2248                 }
2249 
2250                 faulty_msr_index = start_pos + num_msrs;
2251             }
2252         }
2253 
2254         self.set_vcpu_events(&state.vcpu_events)?;
2255 
2256         Ok(())
2257     }
2258 
2259     ///
2260     /// Restore the previously saved AArch64 CPU state
2261     ///
2262     #[cfg(target_arch = "aarch64")]
2263     fn set_state(&self, state: &CpuState) -> cpu::Result<()> {
2264         let state: VcpuKvmState = state.clone().into();
2265         // Set core registers
2266         self.set_regs(&state.core_regs)?;
2267         // Set system registers
2268         for reg in &state.sys_regs {
2269             self.fd
2270                 .lock()
2271                 .unwrap()
2272                 .set_one_reg(reg.id, &reg.addr.to_le_bytes())
2273                 .map_err(|e| cpu::HypervisorCpuError::SetSysRegister(e.into()))?;
2274         }
2275 
2276         self.set_mp_state(state.mp_state.into())?;
2277 
2278         Ok(())
2279     }
2280 
2281     ///
2282     /// Initialize TDX for this CPU
2283     ///
2284     #[cfg(feature = "tdx")]
2285     fn tdx_init(&self, hob_address: u64) -> cpu::Result<()> {
2286         tdx_command(
2287             &self.fd.lock().unwrap().as_raw_fd(),
2288             TdxCommand::InitVcpu,
2289             0,
2290             hob_address,
2291         )
2292         .map_err(cpu::HypervisorCpuError::InitializeTdx)
2293     }
2294 
2295     ///
2296     /// Set the "immediate_exit" state
2297     ///
2298     fn set_immediate_exit(&self, exit: bool) {
2299         self.fd.lock().unwrap().set_kvm_immediate_exit(exit.into());
2300     }
2301 
2302     ///
2303     /// Returns the details about TDX exit reason
2304     ///
2305     #[cfg(feature = "tdx")]
2306     fn get_tdx_exit_details(&mut self) -> cpu::Result<TdxExitDetails> {
2307         let mut fd = self.fd.as_ref().lock().unwrap();
2308         let kvm_run = fd.get_kvm_run();
2309         // SAFETY: accessing a union field in a valid structure
2310         let tdx_vmcall = unsafe {
2311             &mut (*((&mut kvm_run.__bindgen_anon_1) as *mut kvm_run__bindgen_ty_1
2312                 as *mut KvmTdxExit))
2313                 .u
2314                 .vmcall
2315         };
2316 
2317         tdx_vmcall.status_code = TDG_VP_VMCALL_INVALID_OPERAND;
2318 
2319         if tdx_vmcall.type_ != 0 {
2320             return Err(cpu::HypervisorCpuError::UnknownTdxVmCall);
2321         }
2322 
2323         match tdx_vmcall.subfunction {
2324             TDG_VP_VMCALL_GET_QUOTE => Ok(TdxExitDetails::GetQuote),
2325             TDG_VP_VMCALL_SETUP_EVENT_NOTIFY_INTERRUPT => {
2326                 Ok(TdxExitDetails::SetupEventNotifyInterrupt)
2327             }
2328             _ => Err(cpu::HypervisorCpuError::UnknownTdxVmCall),
2329         }
2330     }
2331 
2332     ///
2333     /// Set the status code for TDX exit
2334     ///
2335     #[cfg(feature = "tdx")]
2336     fn set_tdx_status(&mut self, status: TdxExitStatus) {
2337         let mut fd = self.fd.as_ref().lock().unwrap();
2338         let kvm_run = fd.get_kvm_run();
2339         // SAFETY: accessing a union field in a valid structure
2340         let tdx_vmcall = unsafe {
2341             &mut (*((&mut kvm_run.__bindgen_anon_1) as *mut kvm_run__bindgen_ty_1
2342                 as *mut KvmTdxExit))
2343                 .u
2344                 .vmcall
2345         };
2346 
2347         tdx_vmcall.status_code = match status {
2348             TdxExitStatus::Success => TDG_VP_VMCALL_SUCCESS,
2349             TdxExitStatus::InvalidOperand => TDG_VP_VMCALL_INVALID_OPERAND,
2350         };
2351     }
2352 
2353     #[cfg(target_arch = "x86_64")]
2354     ///
2355     /// Return the list of initial MSR entries for a VCPU
2356     ///
2357     fn boot_msr_entries(&self) -> Vec<MsrEntry> {
2358         use crate::arch::x86::{msr_index, MTRR_ENABLE, MTRR_MEM_TYPE_WB};
2359 
2360         [
2361             msr!(msr_index::MSR_IA32_SYSENTER_CS),
2362             msr!(msr_index::MSR_IA32_SYSENTER_ESP),
2363             msr!(msr_index::MSR_IA32_SYSENTER_EIP),
2364             msr!(msr_index::MSR_STAR),
2365             msr!(msr_index::MSR_CSTAR),
2366             msr!(msr_index::MSR_LSTAR),
2367             msr!(msr_index::MSR_KERNEL_GS_BASE),
2368             msr!(msr_index::MSR_SYSCALL_MASK),
2369             msr!(msr_index::MSR_IA32_TSC),
2370             msr_data!(
2371                 msr_index::MSR_IA32_MISC_ENABLE,
2372                 msr_index::MSR_IA32_MISC_ENABLE_FAST_STRING as u64
2373             ),
2374             msr_data!(msr_index::MSR_MTRRdefType, MTRR_ENABLE | MTRR_MEM_TYPE_WB),
2375         ]
2376         .to_vec()
2377     }
2378 
2379     #[cfg(target_arch = "aarch64")]
2380     fn has_pmu_support(&self) -> bool {
2381         let cpu_attr = kvm_bindings::kvm_device_attr {
2382             group: kvm_bindings::KVM_ARM_VCPU_PMU_V3_CTRL,
2383             attr: u64::from(kvm_bindings::KVM_ARM_VCPU_PMU_V3_INIT),
2384             addr: 0x0,
2385             flags: 0,
2386         };
2387         self.fd.lock().unwrap().has_device_attr(&cpu_attr).is_ok()
2388     }
2389 
2390     #[cfg(target_arch = "aarch64")]
2391     fn init_pmu(&self, irq: u32) -> cpu::Result<()> {
2392         let cpu_attr = kvm_bindings::kvm_device_attr {
2393             group: kvm_bindings::KVM_ARM_VCPU_PMU_V3_CTRL,
2394             attr: u64::from(kvm_bindings::KVM_ARM_VCPU_PMU_V3_INIT),
2395             addr: 0x0,
2396             flags: 0,
2397         };
2398         let cpu_attr_irq = kvm_bindings::kvm_device_attr {
2399             group: kvm_bindings::KVM_ARM_VCPU_PMU_V3_CTRL,
2400             attr: u64::from(kvm_bindings::KVM_ARM_VCPU_PMU_V3_IRQ),
2401             addr: &irq as *const u32 as u64,
2402             flags: 0,
2403         };
2404         self.fd
2405             .lock()
2406             .unwrap()
2407             .set_device_attr(&cpu_attr_irq)
2408             .map_err(|_| cpu::HypervisorCpuError::InitializePmu)?;
2409         self.fd
2410             .lock()
2411             .unwrap()
2412             .set_device_attr(&cpu_attr)
2413             .map_err(|_| cpu::HypervisorCpuError::InitializePmu)
2414     }
2415 
2416     #[cfg(target_arch = "x86_64")]
2417     ///
2418     /// Get the frequency of the TSC if available
2419     ///
2420     fn tsc_khz(&self) -> cpu::Result<Option<u32>> {
2421         match self.fd.lock().unwrap().get_tsc_khz() {
2422             Err(e) => {
2423                 if e.errno() == libc::EIO {
2424                     Ok(None)
2425                 } else {
2426                     Err(cpu::HypervisorCpuError::GetTscKhz(e.into()))
2427                 }
2428             }
2429             Ok(v) => Ok(Some(v)),
2430         }
2431     }
2432 
2433     #[cfg(target_arch = "x86_64")]
2434     ///
2435     /// Set the frequency of the TSC if available
2436     ///
2437     fn set_tsc_khz(&self, freq: u32) -> cpu::Result<()> {
2438         match self.fd.lock().unwrap().set_tsc_khz(freq) {
2439             Err(e) => {
2440                 if e.errno() == libc::EIO {
2441                     Ok(())
2442                 } else {
2443                     Err(cpu::HypervisorCpuError::SetTscKhz(e.into()))
2444                 }
2445             }
2446             Ok(_) => Ok(()),
2447         }
2448     }
2449 
2450     #[cfg(target_arch = "x86_64")]
2451     ///
2452     /// Trigger NMI interrupt
2453     ///
2454     fn nmi(&self) -> cpu::Result<()> {
2455         match self.fd.lock().unwrap().nmi() {
2456             Err(e) => {
2457                 if e.errno() == libc::EIO {
2458                     Ok(())
2459                 } else {
2460                     Err(cpu::HypervisorCpuError::Nmi(e.into()))
2461                 }
2462             }
2463             Ok(_) => Ok(()),
2464         }
2465     }
2466 }
2467 
2468 impl KvmVcpu {
2469     #[cfg(target_arch = "x86_64")]
2470     ///
2471     /// X86 specific call that returns the vcpu's current "xsave struct".
2472     ///
2473     fn get_xsave(&self) -> cpu::Result<XsaveState> {
2474         Ok(self
2475             .fd
2476             .lock()
2477             .unwrap()
2478             .get_xsave()
2479             .map_err(|e| cpu::HypervisorCpuError::GetXsaveState(e.into()))?
2480             .into())
2481     }
2482 
2483     #[cfg(target_arch = "x86_64")]
2484     ///
2485     /// X86 specific call that sets the vcpu's current "xsave struct".
2486     ///
2487     fn set_xsave(&self, xsave: &XsaveState) -> cpu::Result<()> {
2488         let xsave: kvm_bindings::kvm_xsave = (*xsave).clone().into();
2489         self.fd
2490             .lock()
2491             .unwrap()
2492             .set_xsave(&xsave)
2493             .map_err(|e| cpu::HypervisorCpuError::SetXsaveState(e.into()))
2494     }
2495 
2496     #[cfg(target_arch = "x86_64")]
2497     ///
2498     /// X86 specific call that returns the vcpu's current "xcrs".
2499     ///
2500     fn get_xcrs(&self) -> cpu::Result<ExtendedControlRegisters> {
2501         self.fd
2502             .lock()
2503             .unwrap()
2504             .get_xcrs()
2505             .map_err(|e| cpu::HypervisorCpuError::GetXcsr(e.into()))
2506     }
2507 
2508     #[cfg(target_arch = "x86_64")]
2509     ///
2510     /// X86 specific call that sets the vcpu's current "xcrs".
2511     ///
2512     fn set_xcrs(&self, xcrs: &ExtendedControlRegisters) -> cpu::Result<()> {
2513         self.fd
2514             .lock()
2515             .unwrap()
2516             .set_xcrs(xcrs)
2517             .map_err(|e| cpu::HypervisorCpuError::SetXcsr(e.into()))
2518     }
2519 
2520     #[cfg(target_arch = "x86_64")]
2521     ///
2522     /// Returns currently pending exceptions, interrupts, and NMIs as well as related
2523     /// states of the vcpu.
2524     ///
2525     fn get_vcpu_events(&self) -> cpu::Result<VcpuEvents> {
2526         self.fd
2527             .lock()
2528             .unwrap()
2529             .get_vcpu_events()
2530             .map_err(|e| cpu::HypervisorCpuError::GetVcpuEvents(e.into()))
2531     }
2532 
2533     #[cfg(target_arch = "x86_64")]
2534     ///
2535     /// Sets pending exceptions, interrupts, and NMIs as well as related states
2536     /// of the vcpu.
2537     ///
2538     fn set_vcpu_events(&self, events: &VcpuEvents) -> cpu::Result<()> {
2539         self.fd
2540             .lock()
2541             .unwrap()
2542             .set_vcpu_events(events)
2543             .map_err(|e| cpu::HypervisorCpuError::SetVcpuEvents(e.into()))
2544     }
2545 }
2546