xref: /cloud-hypervisor/hypervisor/src/kvm/mod.rs (revision d10f20eb718023742143fa847a37f3d6114ead52)
1 // Copyright © 2019 Intel Corporation
2 //
3 // SPDX-License-Identifier: Apache-2.0 OR BSD-3-Clause
4 //
5 // Copyright © 2020, Microsoft Corporation
6 //
7 // Copyright 2018-2019 CrowdStrike, Inc.
8 //
9 //
10 
11 #[cfg(target_arch = "aarch64")]
12 use crate::aarch64::gic::KvmGicV3Its;
13 #[cfg(target_arch = "aarch64")]
14 pub use crate::aarch64::{
15     check_required_kvm_extensions, gic::Gicv3ItsState as GicState, is_system_register, VcpuInit,
16     VcpuKvmState,
17 };
18 #[cfg(target_arch = "aarch64")]
19 use crate::arch::aarch64::gic::{Vgic, VgicConfig};
20 use crate::cpu;
21 use crate::hypervisor;
22 use crate::vec_with_array_field;
23 use crate::vm::{self, InterruptSourceConfig, VmOps};
24 use crate::HypervisorType;
25 #[cfg(target_arch = "aarch64")]
26 use crate::{arm64_core_reg_id, offset_of};
27 use kvm_ioctls::{NoDatamatch, VcpuFd, VmFd};
28 use std::any::Any;
29 use std::collections::HashMap;
30 #[cfg(target_arch = "x86_64")]
31 use std::fs::File;
32 #[cfg(target_arch = "x86_64")]
33 use std::os::unix::io::AsRawFd;
34 #[cfg(feature = "tdx")]
35 use std::os::unix::io::RawFd;
36 use std::result;
37 #[cfg(target_arch = "x86_64")]
38 use std::sync::atomic::{AtomicBool, Ordering};
39 use std::sync::Mutex;
40 use std::sync::{Arc, RwLock};
41 use vmm_sys_util::eventfd::EventFd;
42 // x86_64 dependencies
43 #[cfg(target_arch = "x86_64")]
44 pub mod x86_64;
45 #[cfg(target_arch = "x86_64")]
46 use crate::arch::x86::{
47     CpuIdEntry, FpuState, LapicState, MsrEntry, SpecialRegisters, StandardRegisters, XsaveState,
48     NUM_IOAPIC_PINS,
49 };
50 #[cfg(target_arch = "x86_64")]
51 use crate::ClockData;
52 use crate::{
53     CpuState, IoEventAddress, IrqRoutingEntry, MpState, UserMemoryRegion,
54     USER_MEMORY_REGION_LOG_DIRTY, USER_MEMORY_REGION_READ, USER_MEMORY_REGION_WRITE,
55 };
56 #[cfg(target_arch = "aarch64")]
57 use aarch64::{RegList, Register, StandardRegisters};
58 #[cfg(target_arch = "x86_64")]
59 use kvm_bindings::{
60     kvm_enable_cap, kvm_msr_entry, MsrList, KVM_CAP_HYPERV_SYNIC, KVM_CAP_SPLIT_IRQCHIP,
61     KVM_GUESTDBG_USE_HW_BP,
62 };
63 #[cfg(target_arch = "x86_64")]
64 use x86_64::check_required_kvm_extensions;
65 #[cfg(target_arch = "x86_64")]
66 pub use x86_64::{CpuId, ExtendedControlRegisters, MsrEntries, VcpuKvmState};
67 // aarch64 dependencies
68 #[cfg(target_arch = "aarch64")]
69 pub mod aarch64;
70 pub use kvm_bindings;
71 pub use kvm_bindings::{
72     kvm_clock_data, kvm_create_device, kvm_device_type_KVM_DEV_TYPE_VFIO, kvm_guest_debug,
73     kvm_irq_routing, kvm_irq_routing_entry, kvm_mp_state, kvm_userspace_memory_region,
74     KVM_GUESTDBG_ENABLE, KVM_GUESTDBG_SINGLESTEP, KVM_IRQ_ROUTING_IRQCHIP, KVM_IRQ_ROUTING_MSI,
75     KVM_MEM_LOG_DIRTY_PAGES, KVM_MEM_READONLY, KVM_MSI_VALID_DEVID,
76 };
77 #[cfg(target_arch = "aarch64")]
78 use kvm_bindings::{
79     kvm_regs, user_fpsimd_state, user_pt_regs, KVM_GUESTDBG_USE_HW, KVM_NR_SPSR, KVM_REG_ARM64,
80     KVM_REG_ARM64_SYSREG, KVM_REG_ARM64_SYSREG_CRM_MASK, KVM_REG_ARM64_SYSREG_CRN_MASK,
81     KVM_REG_ARM64_SYSREG_OP0_MASK, KVM_REG_ARM64_SYSREG_OP1_MASK, KVM_REG_ARM64_SYSREG_OP2_MASK,
82     KVM_REG_ARM_CORE, KVM_REG_SIZE_U128, KVM_REG_SIZE_U32, KVM_REG_SIZE_U64,
83 };
84 #[cfg(feature = "tdx")]
85 use kvm_bindings::{kvm_run__bindgen_ty_1, KVMIO};
86 pub use kvm_ioctls;
87 pub use kvm_ioctls::{Cap, Kvm};
88 #[cfg(target_arch = "aarch64")]
89 use std::mem;
90 use thiserror::Error;
91 use vfio_ioctls::VfioDeviceFd;
92 #[cfg(feature = "tdx")]
93 use vmm_sys_util::{ioctl::ioctl_with_val, ioctl_ioc_nr, ioctl_iowr_nr};
94 ///
95 /// Export generically-named wrappers of kvm-bindings for Unix-based platforms
96 ///
97 pub use {
98     kvm_bindings::kvm_create_device as CreateDevice, kvm_bindings::kvm_device_attr as DeviceAttr,
99     kvm_bindings::kvm_run, kvm_bindings::kvm_vcpu_events as VcpuEvents, kvm_ioctls::VcpuExit,
100 };
101 
102 #[cfg(target_arch = "x86_64")]
103 const KVM_CAP_SGX_ATTRIBUTE: u32 = 196;
104 
105 #[cfg(target_arch = "x86_64")]
106 use vmm_sys_util::ioctl_io_nr;
107 
108 #[cfg(all(not(feature = "tdx"), target_arch = "x86_64"))]
109 use vmm_sys_util::ioctl_ioc_nr;
110 
111 #[cfg(target_arch = "x86_64")]
112 ioctl_io_nr!(KVM_NMI, kvm_bindings::KVMIO, 0x9a);
113 
114 #[cfg(feature = "tdx")]
115 const KVM_EXIT_TDX: u32 = 50;
116 #[cfg(feature = "tdx")]
117 const TDG_VP_VMCALL_GET_QUOTE: u64 = 0x10002;
118 #[cfg(feature = "tdx")]
119 const TDG_VP_VMCALL_SETUP_EVENT_NOTIFY_INTERRUPT: u64 = 0x10004;
120 #[cfg(feature = "tdx")]
121 const TDG_VP_VMCALL_SUCCESS: u64 = 0;
122 #[cfg(feature = "tdx")]
123 const TDG_VP_VMCALL_INVALID_OPERAND: u64 = 0x8000000000000000;
124 
125 #[cfg(feature = "tdx")]
126 ioctl_iowr_nr!(KVM_MEMORY_ENCRYPT_OP, KVMIO, 0xba, std::os::raw::c_ulong);
127 
128 #[cfg(feature = "tdx")]
129 #[repr(u32)]
130 enum TdxCommand {
131     Capabilities = 0,
132     InitVm,
133     InitVcpu,
134     InitMemRegion,
135     Finalize,
136 }
137 
138 #[cfg(feature = "tdx")]
139 pub enum TdxExitDetails {
140     GetQuote,
141     SetupEventNotifyInterrupt,
142 }
143 
144 #[cfg(feature = "tdx")]
145 pub enum TdxExitStatus {
146     Success,
147     InvalidOperand,
148 }
149 
150 #[cfg(feature = "tdx")]
151 const TDX_MAX_NR_CPUID_CONFIGS: usize = 6;
152 
153 #[cfg(feature = "tdx")]
154 #[repr(C)]
155 #[derive(Debug, Default)]
156 pub struct TdxCpuidConfig {
157     pub leaf: u32,
158     pub sub_leaf: u32,
159     pub eax: u32,
160     pub ebx: u32,
161     pub ecx: u32,
162     pub edx: u32,
163 }
164 
165 #[cfg(feature = "tdx")]
166 #[repr(C)]
167 #[derive(Debug, Default)]
168 pub struct TdxCapabilities {
169     pub attrs_fixed0: u64,
170     pub attrs_fixed1: u64,
171     pub xfam_fixed0: u64,
172     pub xfam_fixed1: u64,
173     pub nr_cpuid_configs: u32,
174     pub padding: u32,
175     pub cpuid_configs: [TdxCpuidConfig; TDX_MAX_NR_CPUID_CONFIGS],
176 }
177 
178 #[cfg(feature = "tdx")]
179 #[derive(Copy, Clone)]
180 pub struct KvmTdxExit {
181     pub type_: u32,
182     pub pad: u32,
183     pub u: KvmTdxExitU,
184 }
185 
186 #[cfg(feature = "tdx")]
187 #[repr(C)]
188 #[derive(Copy, Clone)]
189 pub union KvmTdxExitU {
190     pub vmcall: KvmTdxExitVmcall,
191 }
192 
193 #[cfg(feature = "tdx")]
194 #[repr(C)]
195 #[derive(Debug, Default, Copy, Clone, PartialEq)]
196 pub struct KvmTdxExitVmcall {
197     pub type_: u64,
198     pub subfunction: u64,
199     pub reg_mask: u64,
200     pub in_r12: u64,
201     pub in_r13: u64,
202     pub in_r14: u64,
203     pub in_r15: u64,
204     pub in_rbx: u64,
205     pub in_rdi: u64,
206     pub in_rsi: u64,
207     pub in_r8: u64,
208     pub in_r9: u64,
209     pub in_rdx: u64,
210     pub status_code: u64,
211     pub out_r11: u64,
212     pub out_r12: u64,
213     pub out_r13: u64,
214     pub out_r14: u64,
215     pub out_r15: u64,
216     pub out_rbx: u64,
217     pub out_rdi: u64,
218     pub out_rsi: u64,
219     pub out_r8: u64,
220     pub out_r9: u64,
221     pub out_rdx: u64,
222 }
223 
224 impl From<kvm_userspace_memory_region> for UserMemoryRegion {
225     fn from(region: kvm_userspace_memory_region) -> Self {
226         let mut flags = USER_MEMORY_REGION_READ;
227         if region.flags & KVM_MEM_READONLY == 0 {
228             flags |= USER_MEMORY_REGION_WRITE;
229         }
230         if region.flags & KVM_MEM_LOG_DIRTY_PAGES != 0 {
231             flags |= USER_MEMORY_REGION_LOG_DIRTY;
232         }
233 
234         UserMemoryRegion {
235             slot: region.slot,
236             guest_phys_addr: region.guest_phys_addr,
237             memory_size: region.memory_size,
238             userspace_addr: region.userspace_addr,
239             flags,
240         }
241     }
242 }
243 
244 impl From<UserMemoryRegion> for kvm_userspace_memory_region {
245     fn from(region: UserMemoryRegion) -> Self {
246         assert!(
247             region.flags & USER_MEMORY_REGION_READ != 0,
248             "KVM mapped memory is always readable"
249         );
250 
251         let mut flags = 0;
252         if region.flags & USER_MEMORY_REGION_WRITE == 0 {
253             flags |= KVM_MEM_READONLY;
254         }
255         if region.flags & USER_MEMORY_REGION_LOG_DIRTY != 0 {
256             flags |= KVM_MEM_LOG_DIRTY_PAGES;
257         }
258 
259         kvm_userspace_memory_region {
260             slot: region.slot,
261             guest_phys_addr: region.guest_phys_addr,
262             memory_size: region.memory_size,
263             userspace_addr: region.userspace_addr,
264             flags,
265         }
266     }
267 }
268 
269 impl From<kvm_mp_state> for MpState {
270     fn from(s: kvm_mp_state) -> Self {
271         MpState::Kvm(s)
272     }
273 }
274 
275 impl From<MpState> for kvm_mp_state {
276     fn from(ms: MpState) -> Self {
277         match ms {
278             MpState::Kvm(s) => s,
279             /* Needed in case other hypervisors are enabled */
280             #[allow(unreachable_patterns)]
281             _ => panic!("CpuState is not valid"),
282         }
283     }
284 }
285 
286 impl From<kvm_ioctls::IoEventAddress> for IoEventAddress {
287     fn from(a: kvm_ioctls::IoEventAddress) -> Self {
288         match a {
289             kvm_ioctls::IoEventAddress::Pio(x) => Self::Pio(x),
290             kvm_ioctls::IoEventAddress::Mmio(x) => Self::Mmio(x),
291         }
292     }
293 }
294 
295 impl From<IoEventAddress> for kvm_ioctls::IoEventAddress {
296     fn from(a: IoEventAddress) -> Self {
297         match a {
298             IoEventAddress::Pio(x) => Self::Pio(x),
299             IoEventAddress::Mmio(x) => Self::Mmio(x),
300         }
301     }
302 }
303 
304 impl From<VcpuKvmState> for CpuState {
305     fn from(s: VcpuKvmState) -> Self {
306         CpuState::Kvm(s)
307     }
308 }
309 
310 impl From<CpuState> for VcpuKvmState {
311     fn from(s: CpuState) -> Self {
312         match s {
313             CpuState::Kvm(s) => s,
314             /* Needed in case other hypervisors are enabled */
315             #[allow(unreachable_patterns)]
316             _ => panic!("CpuState is not valid"),
317         }
318     }
319 }
320 
321 #[cfg(target_arch = "x86_64")]
322 impl From<kvm_clock_data> for ClockData {
323     fn from(d: kvm_clock_data) -> Self {
324         ClockData::Kvm(d)
325     }
326 }
327 
328 #[cfg(target_arch = "x86_64")]
329 impl From<ClockData> for kvm_clock_data {
330     fn from(ms: ClockData) -> Self {
331         match ms {
332             ClockData::Kvm(s) => s,
333             /* Needed in case other hypervisors are enabled */
334             #[allow(unreachable_patterns)]
335             _ => panic!("CpuState is not valid"),
336         }
337     }
338 }
339 
340 impl From<kvm_irq_routing_entry> for IrqRoutingEntry {
341     fn from(s: kvm_irq_routing_entry) -> Self {
342         IrqRoutingEntry::Kvm(s)
343     }
344 }
345 
346 impl From<IrqRoutingEntry> for kvm_irq_routing_entry {
347     fn from(e: IrqRoutingEntry) -> Self {
348         match e {
349             IrqRoutingEntry::Kvm(e) => e,
350             /* Needed in case other hypervisors are enabled */
351             #[allow(unreachable_patterns)]
352             _ => panic!("IrqRoutingEntry is not valid"),
353         }
354     }
355 }
356 
357 struct KvmDirtyLogSlot {
358     slot: u32,
359     guest_phys_addr: u64,
360     memory_size: u64,
361     userspace_addr: u64,
362 }
363 
364 /// Wrapper over KVM VM ioctls.
365 pub struct KvmVm {
366     fd: Arc<VmFd>,
367     #[cfg(target_arch = "x86_64")]
368     msrs: Vec<MsrEntry>,
369     dirty_log_slots: Arc<RwLock<HashMap<u32, KvmDirtyLogSlot>>>,
370 }
371 
372 impl KvmVm {
373     ///
374     /// Creates an emulated device in the kernel.
375     ///
376     /// See the documentation for `KVM_CREATE_DEVICE`.
377     fn create_device(&self, device: &mut CreateDevice) -> vm::Result<vfio_ioctls::VfioDeviceFd> {
378         let device_fd = self
379             .fd
380             .create_device(device)
381             .map_err(|e| vm::HypervisorVmError::CreateDevice(e.into()))?;
382         Ok(VfioDeviceFd::new_from_kvm(device_fd))
383     }
384     /// Checks if a particular `Cap` is available.
385     pub fn check_extension(&self, c: Cap) -> bool {
386         self.fd.check_extension(c)
387     }
388 }
389 
390 /// Implementation of Vm trait for KVM
391 ///
392 /// # Examples
393 ///
394 /// ```
395 /// # use hypervisor::kvm::KvmHypervisor;
396 /// # use std::sync::Arc;
397 /// let kvm = KvmHypervisor::new().unwrap();
398 /// let hypervisor = Arc::new(kvm);
399 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed");
400 /// ```
401 impl vm::Vm for KvmVm {
402     #[cfg(target_arch = "x86_64")]
403     ///
404     /// Sets the address of the one-page region in the VM's address space.
405     ///
406     fn set_identity_map_address(&self, address: u64) -> vm::Result<()> {
407         self.fd
408             .set_identity_map_address(address)
409             .map_err(|e| vm::HypervisorVmError::SetIdentityMapAddress(e.into()))
410     }
411 
412     #[cfg(target_arch = "x86_64")]
413     ///
414     /// Sets the address of the three-page region in the VM's address space.
415     ///
416     fn set_tss_address(&self, offset: usize) -> vm::Result<()> {
417         self.fd
418             .set_tss_address(offset)
419             .map_err(|e| vm::HypervisorVmError::SetTssAddress(e.into()))
420     }
421 
422     ///
423     /// Creates an in-kernel interrupt controller.
424     ///
425     fn create_irq_chip(&self) -> vm::Result<()> {
426         self.fd
427             .create_irq_chip()
428             .map_err(|e| vm::HypervisorVmError::CreateIrq(e.into()))
429     }
430 
431     ///
432     /// Registers an event that will, when signaled, trigger the `gsi` IRQ.
433     ///
434     fn register_irqfd(&self, fd: &EventFd, gsi: u32) -> vm::Result<()> {
435         self.fd
436             .register_irqfd(fd, gsi)
437             .map_err(|e| vm::HypervisorVmError::RegisterIrqFd(e.into()))
438     }
439 
440     ///
441     /// Unregisters an event that will, when signaled, trigger the `gsi` IRQ.
442     ///
443     fn unregister_irqfd(&self, fd: &EventFd, gsi: u32) -> vm::Result<()> {
444         self.fd
445             .unregister_irqfd(fd, gsi)
446             .map_err(|e| vm::HypervisorVmError::UnregisterIrqFd(e.into()))
447     }
448 
449     ///
450     /// Creates a VcpuFd object from a vcpu RawFd.
451     ///
452     fn create_vcpu(
453         &self,
454         id: u8,
455         vm_ops: Option<Arc<dyn VmOps>>,
456     ) -> vm::Result<Arc<dyn cpu::Vcpu>> {
457         let fd = self
458             .fd
459             .create_vcpu(id as u64)
460             .map_err(|e| vm::HypervisorVmError::CreateVcpu(e.into()))?;
461         let vcpu = KvmVcpu {
462             fd: Arc::new(Mutex::new(fd)),
463             #[cfg(target_arch = "x86_64")]
464             msrs: self.msrs.clone(),
465             vm_ops,
466             #[cfg(target_arch = "x86_64")]
467             hyperv_synic: AtomicBool::new(false),
468         };
469         Ok(Arc::new(vcpu))
470     }
471 
472     #[cfg(target_arch = "aarch64")]
473     ///
474     /// Creates a virtual GIC device.
475     ///
476     fn create_vgic(&self, config: VgicConfig) -> vm::Result<Arc<Mutex<dyn Vgic>>> {
477         let gic_device = KvmGicV3Its::new(self, config)
478             .map_err(|e| vm::HypervisorVmError::CreateVgic(anyhow!("Vgic error {:?}", e)))?;
479         Ok(Arc::new(Mutex::new(gic_device)))
480     }
481 
482     ///
483     /// Registers an event to be signaled whenever a certain address is written to.
484     ///
485     fn register_ioevent(
486         &self,
487         fd: &EventFd,
488         addr: &IoEventAddress,
489         datamatch: Option<vm::DataMatch>,
490     ) -> vm::Result<()> {
491         let addr = &kvm_ioctls::IoEventAddress::from(*addr);
492         if let Some(dm) = datamatch {
493             match dm {
494                 vm::DataMatch::DataMatch32(kvm_dm32) => self
495                     .fd
496                     .register_ioevent(fd, addr, kvm_dm32)
497                     .map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into())),
498                 vm::DataMatch::DataMatch64(kvm_dm64) => self
499                     .fd
500                     .register_ioevent(fd, addr, kvm_dm64)
501                     .map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into())),
502             }
503         } else {
504             self.fd
505                 .register_ioevent(fd, addr, NoDatamatch)
506                 .map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into()))
507         }
508     }
509 
510     ///
511     /// Unregisters an event from a certain address it has been previously registered to.
512     ///
513     fn unregister_ioevent(&self, fd: &EventFd, addr: &IoEventAddress) -> vm::Result<()> {
514         let addr = &kvm_ioctls::IoEventAddress::from(*addr);
515         self.fd
516             .unregister_ioevent(fd, addr, NoDatamatch)
517             .map_err(|e| vm::HypervisorVmError::UnregisterIoEvent(e.into()))
518     }
519 
520     ///
521     /// Constructs a routing entry
522     ///
523     fn make_routing_entry(&self, gsi: u32, config: &InterruptSourceConfig) -> IrqRoutingEntry {
524         match &config {
525             InterruptSourceConfig::MsiIrq(cfg) => {
526                 let mut kvm_route = kvm_irq_routing_entry {
527                     gsi,
528                     type_: KVM_IRQ_ROUTING_MSI,
529                     ..Default::default()
530                 };
531 
532                 kvm_route.u.msi.address_lo = cfg.low_addr;
533                 kvm_route.u.msi.address_hi = cfg.high_addr;
534                 kvm_route.u.msi.data = cfg.data;
535 
536                 if self.check_extension(crate::kvm::Cap::MsiDevid) {
537                     // On AArch64, there is limitation on the range of the 'devid',
538                     // it can not be greater than 65536 (the max of u16).
539                     //
540                     // BDF can not be used directly, because 'segment' is in high
541                     // 16 bits. The layout of the u32 BDF is:
542                     // |---- 16 bits ----|-- 8 bits --|-- 5 bits --|-- 3 bits --|
543                     // |      segment    |     bus    |   device   |  function  |
544                     //
545                     // Now that we support 1 bus only in a segment, we can build a
546                     // 'devid' by replacing the 'bus' bits with the low 8 bits of
547                     // 'segment' data.
548                     // This way we can resolve the range checking problem and give
549                     // different `devid` to all the devices. Limitation is that at
550                     // most 256 segments can be supported.
551                     //
552                     let modified_devid = (cfg.devid & 0x00ff_0000) >> 8 | cfg.devid & 0xff;
553 
554                     kvm_route.flags = KVM_MSI_VALID_DEVID;
555                     kvm_route.u.msi.__bindgen_anon_1.devid = modified_devid;
556                 }
557                 kvm_route.into()
558             }
559             InterruptSourceConfig::LegacyIrq(cfg) => {
560                 let mut kvm_route = kvm_irq_routing_entry {
561                     gsi,
562                     type_: KVM_IRQ_ROUTING_IRQCHIP,
563                     ..Default::default()
564                 };
565                 kvm_route.u.irqchip.irqchip = cfg.irqchip;
566                 kvm_route.u.irqchip.pin = cfg.pin;
567 
568                 kvm_route.into()
569             }
570         }
571     }
572 
573     ///
574     /// Sets the GSI routing table entries, overwriting any previously set
575     /// entries, as per the `KVM_SET_GSI_ROUTING` ioctl.
576     ///
577     fn set_gsi_routing(&self, entries: &[IrqRoutingEntry]) -> vm::Result<()> {
578         let mut irq_routing =
579             vec_with_array_field::<kvm_irq_routing, kvm_irq_routing_entry>(entries.len());
580         irq_routing[0].nr = entries.len() as u32;
581         irq_routing[0].flags = 0;
582         let entries: Vec<kvm_irq_routing_entry> = entries
583             .iter()
584             .map(|entry| match entry {
585                 IrqRoutingEntry::Kvm(e) => *e,
586                 #[allow(unreachable_patterns)]
587                 _ => panic!("IrqRoutingEntry type is wrong"),
588             })
589             .collect();
590 
591         // SAFETY: irq_routing initialized with entries.len() and now it is being turned into
592         // entries_slice with entries.len() again. It is guaranteed to be large enough to hold
593         // everything from entries.
594         unsafe {
595             let entries_slice: &mut [kvm_irq_routing_entry] =
596                 irq_routing[0].entries.as_mut_slice(entries.len());
597             entries_slice.copy_from_slice(&entries);
598         }
599 
600         self.fd
601             .set_gsi_routing(&irq_routing[0])
602             .map_err(|e| vm::HypervisorVmError::SetGsiRouting(e.into()))
603     }
604 
605     ///
606     /// Creates a memory region structure that can be used with {create/remove}_user_memory_region
607     ///
608     fn make_user_memory_region(
609         &self,
610         slot: u32,
611         guest_phys_addr: u64,
612         memory_size: u64,
613         userspace_addr: u64,
614         readonly: bool,
615         log_dirty_pages: bool,
616     ) -> UserMemoryRegion {
617         kvm_userspace_memory_region {
618             slot,
619             guest_phys_addr,
620             memory_size,
621             userspace_addr,
622             flags: if readonly { KVM_MEM_READONLY } else { 0 }
623                 | if log_dirty_pages {
624                     KVM_MEM_LOG_DIRTY_PAGES
625                 } else {
626                     0
627                 },
628         }
629         .into()
630     }
631 
632     ///
633     /// Creates a guest physical memory region.
634     ///
635     fn create_user_memory_region(&self, user_memory_region: UserMemoryRegion) -> vm::Result<()> {
636         let mut region: kvm_userspace_memory_region = user_memory_region.into();
637 
638         if (region.flags & KVM_MEM_LOG_DIRTY_PAGES) != 0 {
639             if (region.flags & KVM_MEM_READONLY) != 0 {
640                 return Err(vm::HypervisorVmError::CreateUserMemory(anyhow!(
641                     "Error creating regions with both 'dirty-pages-log' and 'read-only'."
642                 )));
643             }
644 
645             // Keep track of the regions that need dirty pages log
646             self.dirty_log_slots.write().unwrap().insert(
647                 region.slot,
648                 KvmDirtyLogSlot {
649                     slot: region.slot,
650                     guest_phys_addr: region.guest_phys_addr,
651                     memory_size: region.memory_size,
652                     userspace_addr: region.userspace_addr,
653                 },
654             );
655 
656             // Always create guest physical memory region without `KVM_MEM_LOG_DIRTY_PAGES`.
657             // For regions that need this flag, dirty pages log will be turned on in `start_dirty_log`.
658             region.flags = 0;
659         }
660 
661         // SAFETY: Safe because guest regions are guaranteed not to overlap.
662         unsafe {
663             self.fd
664                 .set_user_memory_region(region)
665                 .map_err(|e| vm::HypervisorVmError::CreateUserMemory(e.into()))
666         }
667     }
668 
669     ///
670     /// Removes a guest physical memory region.
671     ///
672     fn remove_user_memory_region(&self, user_memory_region: UserMemoryRegion) -> vm::Result<()> {
673         let mut region: kvm_userspace_memory_region = user_memory_region.into();
674 
675         // Remove the corresponding entry from "self.dirty_log_slots" if needed
676         self.dirty_log_slots.write().unwrap().remove(&region.slot);
677 
678         // Setting the size to 0 means "remove"
679         region.memory_size = 0;
680         // SAFETY: Safe because guest regions are guaranteed not to overlap.
681         unsafe {
682             self.fd
683                 .set_user_memory_region(region)
684                 .map_err(|e| vm::HypervisorVmError::RemoveUserMemory(e.into()))
685         }
686     }
687 
688     ///
689     /// Returns the preferred CPU target type which can be emulated by KVM on underlying host.
690     ///
691     #[cfg(target_arch = "aarch64")]
692     fn get_preferred_target(&self, kvi: &mut VcpuInit) -> vm::Result<()> {
693         self.fd
694             .get_preferred_target(kvi)
695             .map_err(|e| vm::HypervisorVmError::GetPreferredTarget(e.into()))
696     }
697 
698     #[cfg(target_arch = "x86_64")]
699     fn enable_split_irq(&self) -> vm::Result<()> {
700         // Create split irqchip
701         // Only the local APIC is emulated in kernel, both PICs and IOAPIC
702         // are not.
703         let mut cap = kvm_enable_cap {
704             cap: KVM_CAP_SPLIT_IRQCHIP,
705             ..Default::default()
706         };
707         cap.args[0] = NUM_IOAPIC_PINS as u64;
708         self.fd
709             .enable_cap(&cap)
710             .map_err(|e| vm::HypervisorVmError::EnableSplitIrq(e.into()))?;
711         Ok(())
712     }
713 
714     #[cfg(target_arch = "x86_64")]
715     fn enable_sgx_attribute(&self, file: File) -> vm::Result<()> {
716         let mut cap = kvm_enable_cap {
717             cap: KVM_CAP_SGX_ATTRIBUTE,
718             ..Default::default()
719         };
720         cap.args[0] = file.as_raw_fd() as u64;
721         self.fd
722             .enable_cap(&cap)
723             .map_err(|e| vm::HypervisorVmError::EnableSgxAttribute(e.into()))?;
724         Ok(())
725     }
726 
727     /// Retrieve guest clock.
728     #[cfg(target_arch = "x86_64")]
729     fn get_clock(&self) -> vm::Result<ClockData> {
730         Ok(self
731             .fd
732             .get_clock()
733             .map_err(|e| vm::HypervisorVmError::GetClock(e.into()))?
734             .into())
735     }
736 
737     /// Set guest clock.
738     #[cfg(target_arch = "x86_64")]
739     fn set_clock(&self, data: &ClockData) -> vm::Result<()> {
740         let data = (*data).into();
741         self.fd
742             .set_clock(&data)
743             .map_err(|e| vm::HypervisorVmError::SetClock(e.into()))
744     }
745 
746     /// Create a device that is used for passthrough
747     fn create_passthrough_device(&self) -> vm::Result<VfioDeviceFd> {
748         let mut vfio_dev = kvm_create_device {
749             type_: kvm_device_type_KVM_DEV_TYPE_VFIO,
750             fd: 0,
751             flags: 0,
752         };
753 
754         self.create_device(&mut vfio_dev)
755             .map_err(|e| vm::HypervisorVmError::CreatePassthroughDevice(e.into()))
756     }
757 
758     ///
759     /// Start logging dirty pages
760     ///
761     fn start_dirty_log(&self) -> vm::Result<()> {
762         let dirty_log_slots = self.dirty_log_slots.read().unwrap();
763         for (_, s) in dirty_log_slots.iter() {
764             let region = kvm_userspace_memory_region {
765                 slot: s.slot,
766                 guest_phys_addr: s.guest_phys_addr,
767                 memory_size: s.memory_size,
768                 userspace_addr: s.userspace_addr,
769                 flags: KVM_MEM_LOG_DIRTY_PAGES,
770             };
771             // SAFETY: Safe because guest regions are guaranteed not to overlap.
772             unsafe {
773                 self.fd
774                     .set_user_memory_region(region)
775                     .map_err(|e| vm::HypervisorVmError::StartDirtyLog(e.into()))?;
776             }
777         }
778 
779         Ok(())
780     }
781 
782     ///
783     /// Stop logging dirty pages
784     ///
785     fn stop_dirty_log(&self) -> vm::Result<()> {
786         let dirty_log_slots = self.dirty_log_slots.read().unwrap();
787         for (_, s) in dirty_log_slots.iter() {
788             let region = kvm_userspace_memory_region {
789                 slot: s.slot,
790                 guest_phys_addr: s.guest_phys_addr,
791                 memory_size: s.memory_size,
792                 userspace_addr: s.userspace_addr,
793                 flags: 0,
794             };
795             // SAFETY: Safe because guest regions are guaranteed not to overlap.
796             unsafe {
797                 self.fd
798                     .set_user_memory_region(region)
799                     .map_err(|e| vm::HypervisorVmError::StartDirtyLog(e.into()))?;
800             }
801         }
802 
803         Ok(())
804     }
805 
806     ///
807     /// Get dirty pages bitmap (one bit per page)
808     ///
809     fn get_dirty_log(&self, slot: u32, _base_gpa: u64, memory_size: u64) -> vm::Result<Vec<u64>> {
810         self.fd
811             .get_dirty_log(slot, memory_size as usize)
812             .map_err(|e| vm::HypervisorVmError::GetDirtyLog(e.into()))
813     }
814 
815     ///
816     /// Initialize TDX for this VM
817     ///
818     #[cfg(feature = "tdx")]
819     fn tdx_init(&self, cpuid: &[CpuIdEntry], max_vcpus: u32) -> vm::Result<()> {
820         const TDX_ATTR_SEPT_VE_DISABLE: usize = 28;
821 
822         let mut cpuid: Vec<kvm_bindings::kvm_cpuid_entry2> =
823             cpuid.iter().map(|e| (*e).into()).collect();
824         cpuid.resize(256, kvm_bindings::kvm_cpuid_entry2::default());
825 
826         #[repr(C)]
827         struct TdxInitVm {
828             attributes: u64,
829             max_vcpus: u32,
830             padding: u32,
831             mrconfigid: [u64; 6],
832             mrowner: [u64; 6],
833             mrownerconfig: [u64; 6],
834             cpuid_nent: u32,
835             cpuid_padding: u32,
836             cpuid_entries: [kvm_bindings::kvm_cpuid_entry2; 256],
837         }
838         let data = TdxInitVm {
839             attributes: 1 << TDX_ATTR_SEPT_VE_DISABLE,
840             max_vcpus,
841             padding: 0,
842             mrconfigid: [0; 6],
843             mrowner: [0; 6],
844             mrownerconfig: [0; 6],
845             cpuid_nent: cpuid.len() as u32,
846             cpuid_padding: 0,
847             cpuid_entries: cpuid.as_slice().try_into().unwrap(),
848         };
849 
850         tdx_command(
851             &self.fd.as_raw_fd(),
852             TdxCommand::InitVm,
853             0,
854             &data as *const _ as u64,
855         )
856         .map_err(vm::HypervisorVmError::InitializeTdx)
857     }
858 
859     ///
860     /// Finalize the TDX setup for this VM
861     ///
862     #[cfg(feature = "tdx")]
863     fn tdx_finalize(&self) -> vm::Result<()> {
864         tdx_command(&self.fd.as_raw_fd(), TdxCommand::Finalize, 0, 0)
865             .map_err(vm::HypervisorVmError::FinalizeTdx)
866     }
867 
868     ///
869     /// Initialize memory regions for the TDX VM
870     ///
871     #[cfg(feature = "tdx")]
872     fn tdx_init_memory_region(
873         &self,
874         host_address: u64,
875         guest_address: u64,
876         size: u64,
877         measure: bool,
878     ) -> vm::Result<()> {
879         #[repr(C)]
880         struct TdxInitMemRegion {
881             host_address: u64,
882             guest_address: u64,
883             pages: u64,
884         }
885         let data = TdxInitMemRegion {
886             host_address,
887             guest_address,
888             pages: size / 4096,
889         };
890 
891         tdx_command(
892             &self.fd.as_raw_fd(),
893             TdxCommand::InitMemRegion,
894             u32::from(measure),
895             &data as *const _ as u64,
896         )
897         .map_err(vm::HypervisorVmError::InitMemRegionTdx)
898     }
899 
900     /// Downcast to the underlying KvmVm type
901     fn as_any(&self) -> &dyn Any {
902         self
903     }
904 }
905 
906 #[cfg(feature = "tdx")]
907 fn tdx_command(
908     fd: &RawFd,
909     command: TdxCommand,
910     flags: u32,
911     data: u64,
912 ) -> std::result::Result<(), std::io::Error> {
913     #[repr(C)]
914     struct TdxIoctlCmd {
915         command: TdxCommand,
916         flags: u32,
917         data: u64,
918         error: u64,
919         unused: u64,
920     }
921     let cmd = TdxIoctlCmd {
922         command,
923         flags,
924         data,
925         error: 0,
926         unused: 0,
927     };
928     // SAFETY: FFI call. All input parameters are valid.
929     let ret = unsafe {
930         ioctl_with_val(
931             fd,
932             KVM_MEMORY_ENCRYPT_OP(),
933             &cmd as *const TdxIoctlCmd as std::os::raw::c_ulong,
934         )
935     };
936 
937     if ret < 0 {
938         return Err(std::io::Error::last_os_error());
939     }
940     Ok(())
941 }
942 
943 /// Wrapper over KVM system ioctls.
944 pub struct KvmHypervisor {
945     kvm: Kvm,
946 }
947 
948 impl KvmHypervisor {
949     #[cfg(target_arch = "x86_64")]
950     ///
951     /// Retrieve the list of MSRs supported by the hypervisor.
952     ///
953     fn get_msr_list(&self) -> hypervisor::Result<MsrList> {
954         self.kvm
955             .get_msr_index_list()
956             .map_err(|e| hypervisor::HypervisorError::GetMsrList(e.into()))
957     }
958 }
959 
960 /// Enum for KVM related error
961 #[derive(Debug, Error)]
962 pub enum KvmError {
963     #[error("Capability missing: {0:?}")]
964     CapabilityMissing(Cap),
965 }
966 
967 pub type KvmResult<T> = result::Result<T, KvmError>;
968 
969 impl KvmHypervisor {
970     /// Create a hypervisor based on Kvm
971     #[allow(clippy::new_ret_no_self)]
972     pub fn new() -> hypervisor::Result<Arc<dyn hypervisor::Hypervisor>> {
973         let kvm_obj = Kvm::new().map_err(|e| hypervisor::HypervisorError::VmCreate(e.into()))?;
974         let api_version = kvm_obj.get_api_version();
975 
976         if api_version != kvm_bindings::KVM_API_VERSION as i32 {
977             return Err(hypervisor::HypervisorError::IncompatibleApiVersion);
978         }
979 
980         Ok(Arc::new(KvmHypervisor { kvm: kvm_obj }))
981     }
982 
983     /// Check if the hypervisor is available
984     pub fn is_available() -> hypervisor::Result<bool> {
985         match std::fs::metadata("/dev/kvm") {
986             Ok(_) => Ok(true),
987             Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(false),
988             Err(err) => Err(hypervisor::HypervisorError::HypervisorAvailableCheck(
989                 err.into(),
990             )),
991         }
992     }
993 }
994 
995 /// Implementation of Hypervisor trait for KVM
996 ///
997 /// # Examples
998 ///
999 /// ```
1000 /// # use hypervisor::kvm::KvmHypervisor;
1001 /// # use std::sync::Arc;
1002 /// let kvm = KvmHypervisor::new().unwrap();
1003 /// let hypervisor = Arc::new(kvm);
1004 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed");
1005 /// ```
1006 impl hypervisor::Hypervisor for KvmHypervisor {
1007     ///
1008     /// Returns the type of the hypervisor
1009     ///
1010     fn hypervisor_type(&self) -> HypervisorType {
1011         HypervisorType::Kvm
1012     }
1013 
1014     /// Create a KVM vm object of a specific VM type and return the object as Vm trait object
1015     ///
1016     /// # Examples
1017     ///
1018     /// ```
1019     /// # use hypervisor::kvm::KvmHypervisor;
1020     /// use hypervisor::kvm::KvmVm;
1021     /// let hypervisor = KvmHypervisor::new().unwrap();
1022     /// let vm = hypervisor.create_vm_with_type(0).unwrap();
1023     /// ```
1024     fn create_vm_with_type(&self, vm_type: u64) -> hypervisor::Result<Arc<dyn vm::Vm>> {
1025         let fd: VmFd;
1026         loop {
1027             match self.kvm.create_vm_with_type(vm_type) {
1028                 Ok(res) => fd = res,
1029                 Err(e) => {
1030                     if e.errno() == libc::EINTR {
1031                         // If the error returned is EINTR, which means the
1032                         // ioctl has been interrupted, we have to retry as
1033                         // this can't be considered as a regular error.
1034                         continue;
1035                     } else {
1036                         return Err(hypervisor::HypervisorError::VmCreate(e.into()));
1037                     }
1038                 }
1039             }
1040             break;
1041         }
1042 
1043         let vm_fd = Arc::new(fd);
1044 
1045         #[cfg(target_arch = "x86_64")]
1046         {
1047             let msr_list = self.get_msr_list()?;
1048             let num_msrs = msr_list.as_fam_struct_ref().nmsrs as usize;
1049             let mut msrs: Vec<MsrEntry> = vec![
1050                 MsrEntry {
1051                     ..Default::default()
1052                 };
1053                 num_msrs
1054             ];
1055             let indices = msr_list.as_slice();
1056             for (pos, index) in indices.iter().enumerate() {
1057                 msrs[pos].index = *index;
1058             }
1059 
1060             Ok(Arc::new(KvmVm {
1061                 fd: vm_fd,
1062                 msrs,
1063                 dirty_log_slots: Arc::new(RwLock::new(HashMap::new())),
1064             }))
1065         }
1066 
1067         #[cfg(target_arch = "aarch64")]
1068         {
1069             Ok(Arc::new(KvmVm {
1070                 fd: vm_fd,
1071                 dirty_log_slots: Arc::new(RwLock::new(HashMap::new())),
1072             }))
1073         }
1074     }
1075 
1076     /// Create a KVM vm object and return the object as Vm trait object
1077     ///
1078     /// # Examples
1079     ///
1080     /// ```
1081     /// # use hypervisor::kvm::KvmHypervisor;
1082     /// use hypervisor::kvm::KvmVm;
1083     /// let hypervisor = KvmHypervisor::new().unwrap();
1084     /// let vm = hypervisor.create_vm().unwrap();
1085     /// ```
1086     fn create_vm(&self) -> hypervisor::Result<Arc<dyn vm::Vm>> {
1087         #[allow(unused_mut)]
1088         let mut vm_type: u64 = 0; // Create with default platform type
1089 
1090         // When KVM supports Cap::ArmVmIPASize, it is better to get the IPA
1091         // size from the host and use that when creating the VM, which may
1092         // avoid unnecessary VM creation failures.
1093         #[cfg(target_arch = "aarch64")]
1094         if self.kvm.check_extension(Cap::ArmVmIPASize) {
1095             vm_type = self.kvm.get_host_ipa_limit().try_into().unwrap();
1096         }
1097 
1098         self.create_vm_with_type(vm_type)
1099     }
1100 
1101     fn check_required_extensions(&self) -> hypervisor::Result<()> {
1102         check_required_kvm_extensions(&self.kvm)
1103             .map_err(|e| hypervisor::HypervisorError::CheckExtensions(e.into()))
1104     }
1105 
1106     #[cfg(target_arch = "x86_64")]
1107     ///
1108     /// X86 specific call to get the system supported CPUID values.
1109     ///
1110     fn get_supported_cpuid(&self) -> hypervisor::Result<Vec<CpuIdEntry>> {
1111         let kvm_cpuid = self
1112             .kvm
1113             .get_supported_cpuid(kvm_bindings::KVM_MAX_CPUID_ENTRIES)
1114             .map_err(|e| hypervisor::HypervisorError::GetCpuId(e.into()))?;
1115 
1116         let v = kvm_cpuid.as_slice().iter().map(|e| (*e).into()).collect();
1117 
1118         Ok(v)
1119     }
1120 
1121     #[cfg(target_arch = "aarch64")]
1122     ///
1123     /// Retrieve AArch64 host maximum IPA size supported by KVM.
1124     ///
1125     fn get_host_ipa_limit(&self) -> i32 {
1126         self.kvm.get_host_ipa_limit()
1127     }
1128 
1129     ///
1130     /// Retrieve TDX capabilities
1131     ///
1132     #[cfg(feature = "tdx")]
1133     fn tdx_capabilities(&self) -> hypervisor::Result<TdxCapabilities> {
1134         let data = TdxCapabilities {
1135             nr_cpuid_configs: TDX_MAX_NR_CPUID_CONFIGS as u32,
1136             ..Default::default()
1137         };
1138 
1139         tdx_command(
1140             &self.kvm.as_raw_fd(),
1141             TdxCommand::Capabilities,
1142             0,
1143             &data as *const _ as u64,
1144         )
1145         .map_err(|e| hypervisor::HypervisorError::TdxCapabilities(e.into()))?;
1146 
1147         Ok(data)
1148     }
1149 
1150     ///
1151     /// Get the number of supported hardware breakpoints
1152     ///
1153     fn get_guest_debug_hw_bps(&self) -> usize {
1154         #[cfg(target_arch = "x86_64")]
1155         {
1156             4
1157         }
1158         #[cfg(target_arch = "aarch64")]
1159         {
1160             self.kvm.get_guest_debug_hw_bps() as usize
1161         }
1162     }
1163 
1164     /// Get maximum number of vCPUs
1165     fn get_max_vcpus(&self) -> u32 {
1166         self.kvm.get_max_vcpus().min(u32::MAX as usize) as u32
1167     }
1168 }
1169 
1170 /// Vcpu struct for KVM
1171 pub struct KvmVcpu {
1172     fd: Arc<Mutex<VcpuFd>>,
1173     #[cfg(target_arch = "x86_64")]
1174     msrs: Vec<MsrEntry>,
1175     vm_ops: Option<Arc<dyn vm::VmOps>>,
1176     #[cfg(target_arch = "x86_64")]
1177     hyperv_synic: AtomicBool,
1178 }
1179 
1180 /// Implementation of Vcpu trait for KVM
1181 ///
1182 /// # Examples
1183 ///
1184 /// ```
1185 /// # use hypervisor::kvm::KvmHypervisor;
1186 /// # use std::sync::Arc;
1187 /// let kvm = KvmHypervisor::new().unwrap();
1188 /// let hypervisor = Arc::new(kvm);
1189 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed");
1190 /// let vcpu = vm.create_vcpu(0, None).unwrap();
1191 /// ```
1192 impl cpu::Vcpu for KvmVcpu {
1193     #[cfg(target_arch = "x86_64")]
1194     ///
1195     /// Returns the vCPU general purpose registers.
1196     ///
1197     fn get_regs(&self) -> cpu::Result<StandardRegisters> {
1198         Ok(self
1199             .fd
1200             .lock()
1201             .unwrap()
1202             .get_regs()
1203             .map_err(|e| cpu::HypervisorCpuError::GetStandardRegs(e.into()))?
1204             .into())
1205     }
1206 
1207     ///
1208     /// Returns the vCPU general purpose registers.
1209     /// The `KVM_GET_REGS` ioctl is not available on AArch64, `KVM_GET_ONE_REG`
1210     /// is used to get registers one by one.
1211     ///
1212     #[cfg(target_arch = "aarch64")]
1213     fn get_regs(&self) -> cpu::Result<StandardRegisters> {
1214         let mut state: StandardRegisters = kvm_regs::default();
1215         let mut off = offset_of!(user_pt_regs, regs);
1216         // There are 31 user_pt_regs:
1217         // https://elixir.free-electrons.com/linux/v4.14.174/source/arch/arm64/include/uapi/asm/ptrace.h#L72
1218         // These actually are the general-purpose registers of the Armv8-a
1219         // architecture (i.e x0-x30 if used as a 64bit register or w0-30 when used as a 32bit register).
1220         for i in 0..31 {
1221             let mut bytes = [0_u8; 8];
1222             self.fd
1223                 .lock()
1224                 .unwrap()
1225                 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), &mut bytes)
1226                 .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?;
1227             state.regs.regs[i] = u64::from_le_bytes(bytes);
1228             off += std::mem::size_of::<u64>();
1229         }
1230 
1231         // We are now entering the "Other register" section of the ARMv8-a architecture.
1232         // First one, stack pointer.
1233         let off = offset_of!(user_pt_regs, sp);
1234         let mut bytes = [0_u8; 8];
1235         self.fd
1236             .lock()
1237             .unwrap()
1238             .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), &mut bytes)
1239             .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?;
1240         state.regs.sp = u64::from_le_bytes(bytes);
1241 
1242         // Second one, the program counter.
1243         let off = offset_of!(user_pt_regs, pc);
1244         let mut bytes = [0_u8; 8];
1245         self.fd
1246             .lock()
1247             .unwrap()
1248             .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), &mut bytes)
1249             .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?;
1250         state.regs.pc = u64::from_le_bytes(bytes);
1251 
1252         // Next is the processor state.
1253         let off = offset_of!(user_pt_regs, pstate);
1254         let mut bytes = [0_u8; 8];
1255         self.fd
1256             .lock()
1257             .unwrap()
1258             .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), &mut bytes)
1259             .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?;
1260         state.regs.pstate = u64::from_le_bytes(bytes);
1261 
1262         // The stack pointer associated with EL1
1263         let off = offset_of!(kvm_regs, sp_el1);
1264         let mut bytes = [0_u8; 8];
1265         self.fd
1266             .lock()
1267             .unwrap()
1268             .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), &mut bytes)
1269             .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?;
1270         state.sp_el1 = u64::from_le_bytes(bytes);
1271 
1272         // Exception Link Register for EL1, when taking an exception to EL1, this register
1273         // holds the address to which to return afterwards.
1274         let off = offset_of!(kvm_regs, elr_el1);
1275         let mut bytes = [0_u8; 8];
1276         self.fd
1277             .lock()
1278             .unwrap()
1279             .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), &mut bytes)
1280             .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?;
1281         state.elr_el1 = u64::from_le_bytes(bytes);
1282 
1283         // Saved Program Status Registers, there are 5 of them used in the kernel.
1284         let mut off = offset_of!(kvm_regs, spsr);
1285         for i in 0..KVM_NR_SPSR as usize {
1286             let mut bytes = [0_u8; 8];
1287             self.fd
1288                 .lock()
1289                 .unwrap()
1290                 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), &mut bytes)
1291                 .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?;
1292             state.spsr[i] = u64::from_le_bytes(bytes);
1293             off += std::mem::size_of::<u64>();
1294         }
1295 
1296         // Now moving on to floating point registers which are stored in the user_fpsimd_state in the kernel:
1297         // https://elixir.free-electrons.com/linux/v4.9.62/source/arch/arm64/include/uapi/asm/kvm.h#L53
1298         let mut off = offset_of!(kvm_regs, fp_regs) + offset_of!(user_fpsimd_state, vregs);
1299         for i in 0..32 {
1300             let mut bytes = [0_u8; 16];
1301             self.fd
1302                 .lock()
1303                 .unwrap()
1304                 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U128, off), &mut bytes)
1305                 .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?;
1306             state.fp_regs.vregs[i] = u128::from_le_bytes(bytes);
1307             off += mem::size_of::<u128>();
1308         }
1309 
1310         // Floating-point Status Register
1311         let off = offset_of!(kvm_regs, fp_regs) + offset_of!(user_fpsimd_state, fpsr);
1312         let mut bytes = [0_u8; 4];
1313         self.fd
1314             .lock()
1315             .unwrap()
1316             .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U32, off), &mut bytes)
1317             .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?;
1318         state.fp_regs.fpsr = u32::from_le_bytes(bytes);
1319 
1320         // Floating-point Control Register
1321         let off = offset_of!(kvm_regs, fp_regs) + offset_of!(user_fpsimd_state, fpcr);
1322         let mut bytes = [0_u8; 4];
1323         self.fd
1324             .lock()
1325             .unwrap()
1326             .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U32, off), &mut bytes)
1327             .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?;
1328         state.fp_regs.fpcr = u32::from_le_bytes(bytes);
1329         Ok(state)
1330     }
1331 
1332     #[cfg(target_arch = "x86_64")]
1333     ///
1334     /// Sets the vCPU general purpose registers using the `KVM_SET_REGS` ioctl.
1335     ///
1336     fn set_regs(&self, regs: &StandardRegisters) -> cpu::Result<()> {
1337         let regs = (*regs).into();
1338         self.fd
1339             .lock()
1340             .unwrap()
1341             .set_regs(&regs)
1342             .map_err(|e| cpu::HypervisorCpuError::SetStandardRegs(e.into()))
1343     }
1344 
1345     ///
1346     /// Sets the vCPU general purpose registers.
1347     /// The `KVM_SET_REGS` ioctl is not available on AArch64, `KVM_SET_ONE_REG`
1348     /// is used to set registers one by one.
1349     ///
1350     #[cfg(target_arch = "aarch64")]
1351     fn set_regs(&self, state: &StandardRegisters) -> cpu::Result<()> {
1352         // The function follows the exact identical order from `state`. Look there
1353         // for some additional info on registers.
1354         let mut off = offset_of!(user_pt_regs, regs);
1355         for i in 0..31 {
1356             self.fd
1357                 .lock()
1358                 .unwrap()
1359                 .set_one_reg(
1360                     arm64_core_reg_id!(KVM_REG_SIZE_U64, off),
1361                     &state.regs.regs[i].to_le_bytes(),
1362                 )
1363                 .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?;
1364             off += std::mem::size_of::<u64>();
1365         }
1366 
1367         let off = offset_of!(user_pt_regs, sp);
1368         self.fd
1369             .lock()
1370             .unwrap()
1371             .set_one_reg(
1372                 arm64_core_reg_id!(KVM_REG_SIZE_U64, off),
1373                 &state.regs.sp.to_le_bytes(),
1374             )
1375             .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?;
1376 
1377         let off = offset_of!(user_pt_regs, pc);
1378         self.fd
1379             .lock()
1380             .unwrap()
1381             .set_one_reg(
1382                 arm64_core_reg_id!(KVM_REG_SIZE_U64, off),
1383                 &state.regs.pc.to_le_bytes(),
1384             )
1385             .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?;
1386 
1387         let off = offset_of!(user_pt_regs, pstate);
1388         self.fd
1389             .lock()
1390             .unwrap()
1391             .set_one_reg(
1392                 arm64_core_reg_id!(KVM_REG_SIZE_U64, off),
1393                 &state.regs.pstate.to_le_bytes(),
1394             )
1395             .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?;
1396 
1397         let off = offset_of!(kvm_regs, sp_el1);
1398         self.fd
1399             .lock()
1400             .unwrap()
1401             .set_one_reg(
1402                 arm64_core_reg_id!(KVM_REG_SIZE_U64, off),
1403                 &state.sp_el1.to_le_bytes(),
1404             )
1405             .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?;
1406 
1407         let off = offset_of!(kvm_regs, elr_el1);
1408         self.fd
1409             .lock()
1410             .unwrap()
1411             .set_one_reg(
1412                 arm64_core_reg_id!(KVM_REG_SIZE_U64, off),
1413                 &state.elr_el1.to_le_bytes(),
1414             )
1415             .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?;
1416 
1417         let mut off = offset_of!(kvm_regs, spsr);
1418         for i in 0..KVM_NR_SPSR as usize {
1419             self.fd
1420                 .lock()
1421                 .unwrap()
1422                 .set_one_reg(
1423                     arm64_core_reg_id!(KVM_REG_SIZE_U64, off),
1424                     &state.spsr[i].to_le_bytes(),
1425                 )
1426                 .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?;
1427             off += std::mem::size_of::<u64>();
1428         }
1429 
1430         let mut off = offset_of!(kvm_regs, fp_regs) + offset_of!(user_fpsimd_state, vregs);
1431         for i in 0..32 {
1432             self.fd
1433                 .lock()
1434                 .unwrap()
1435                 .set_one_reg(
1436                     arm64_core_reg_id!(KVM_REG_SIZE_U128, off),
1437                     &state.fp_regs.vregs[i].to_le_bytes(),
1438                 )
1439                 .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?;
1440             off += mem::size_of::<u128>();
1441         }
1442 
1443         let off = offset_of!(kvm_regs, fp_regs) + offset_of!(user_fpsimd_state, fpsr);
1444         self.fd
1445             .lock()
1446             .unwrap()
1447             .set_one_reg(
1448                 arm64_core_reg_id!(KVM_REG_SIZE_U32, off),
1449                 &state.fp_regs.fpsr.to_le_bytes(),
1450             )
1451             .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?;
1452 
1453         let off = offset_of!(kvm_regs, fp_regs) + offset_of!(user_fpsimd_state, fpcr);
1454         self.fd
1455             .lock()
1456             .unwrap()
1457             .set_one_reg(
1458                 arm64_core_reg_id!(KVM_REG_SIZE_U32, off),
1459                 &state.fp_regs.fpcr.to_le_bytes(),
1460             )
1461             .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?;
1462         Ok(())
1463     }
1464 
1465     #[cfg(target_arch = "x86_64")]
1466     ///
1467     /// Returns the vCPU special registers.
1468     ///
1469     fn get_sregs(&self) -> cpu::Result<SpecialRegisters> {
1470         Ok(self
1471             .fd
1472             .lock()
1473             .unwrap()
1474             .get_sregs()
1475             .map_err(|e| cpu::HypervisorCpuError::GetSpecialRegs(e.into()))?
1476             .into())
1477     }
1478 
1479     #[cfg(target_arch = "x86_64")]
1480     ///
1481     /// Sets the vCPU special registers using the `KVM_SET_SREGS` ioctl.
1482     ///
1483     fn set_sregs(&self, sregs: &SpecialRegisters) -> cpu::Result<()> {
1484         let sregs = (*sregs).into();
1485         self.fd
1486             .lock()
1487             .unwrap()
1488             .set_sregs(&sregs)
1489             .map_err(|e| cpu::HypervisorCpuError::SetSpecialRegs(e.into()))
1490     }
1491 
1492     #[cfg(target_arch = "x86_64")]
1493     ///
1494     /// Returns the floating point state (FPU) from the vCPU.
1495     ///
1496     fn get_fpu(&self) -> cpu::Result<FpuState> {
1497         Ok(self
1498             .fd
1499             .lock()
1500             .unwrap()
1501             .get_fpu()
1502             .map_err(|e| cpu::HypervisorCpuError::GetFloatingPointRegs(e.into()))?
1503             .into())
1504     }
1505 
1506     #[cfg(target_arch = "x86_64")]
1507     ///
1508     /// Set the floating point state (FPU) of a vCPU using the `KVM_SET_FPU` ioct.
1509     ///
1510     fn set_fpu(&self, fpu: &FpuState) -> cpu::Result<()> {
1511         let fpu: kvm_bindings::kvm_fpu = (*fpu).clone().into();
1512         self.fd
1513             .lock()
1514             .unwrap()
1515             .set_fpu(&fpu)
1516             .map_err(|e| cpu::HypervisorCpuError::SetFloatingPointRegs(e.into()))
1517     }
1518 
1519     #[cfg(target_arch = "x86_64")]
1520     ///
1521     /// X86 specific call to setup the CPUID registers.
1522     ///
1523     fn set_cpuid2(&self, cpuid: &[CpuIdEntry]) -> cpu::Result<()> {
1524         let cpuid: Vec<kvm_bindings::kvm_cpuid_entry2> =
1525             cpuid.iter().map(|e| (*e).into()).collect();
1526         let kvm_cpuid = <CpuId>::from_entries(&cpuid)
1527             .map_err(|_| cpu::HypervisorCpuError::SetCpuid(anyhow!("failed to create CpuId")))?;
1528 
1529         self.fd
1530             .lock()
1531             .unwrap()
1532             .set_cpuid2(&kvm_cpuid)
1533             .map_err(|e| cpu::HypervisorCpuError::SetCpuid(e.into()))
1534     }
1535 
1536     #[cfg(target_arch = "x86_64")]
1537     ///
1538     /// X86 specific call to enable HyperV SynIC
1539     ///
1540     fn enable_hyperv_synic(&self) -> cpu::Result<()> {
1541         // Update the information about Hyper-V SynIC being enabled and
1542         // emulated as it will influence later which MSRs should be saved.
1543         self.hyperv_synic.store(true, Ordering::Release);
1544 
1545         let cap = kvm_enable_cap {
1546             cap: KVM_CAP_HYPERV_SYNIC,
1547             ..Default::default()
1548         };
1549         self.fd
1550             .lock()
1551             .unwrap()
1552             .enable_cap(&cap)
1553             .map_err(|e| cpu::HypervisorCpuError::EnableHyperVSyncIc(e.into()))
1554     }
1555 
1556     ///
1557     /// X86 specific call to retrieve the CPUID registers.
1558     ///
1559     #[cfg(target_arch = "x86_64")]
1560     fn get_cpuid2(&self, num_entries: usize) -> cpu::Result<Vec<CpuIdEntry>> {
1561         let kvm_cpuid = self
1562             .fd
1563             .lock()
1564             .unwrap()
1565             .get_cpuid2(num_entries)
1566             .map_err(|e| cpu::HypervisorCpuError::GetCpuid(e.into()))?;
1567 
1568         let v = kvm_cpuid.as_slice().iter().map(|e| (*e).into()).collect();
1569 
1570         Ok(v)
1571     }
1572 
1573     #[cfg(target_arch = "x86_64")]
1574     ///
1575     /// Returns the state of the LAPIC (Local Advanced Programmable Interrupt Controller).
1576     ///
1577     fn get_lapic(&self) -> cpu::Result<LapicState> {
1578         Ok(self
1579             .fd
1580             .lock()
1581             .unwrap()
1582             .get_lapic()
1583             .map_err(|e| cpu::HypervisorCpuError::GetlapicState(e.into()))?
1584             .into())
1585     }
1586 
1587     #[cfg(target_arch = "x86_64")]
1588     ///
1589     /// Sets the state of the LAPIC (Local Advanced Programmable Interrupt Controller).
1590     ///
1591     fn set_lapic(&self, klapic: &LapicState) -> cpu::Result<()> {
1592         let klapic: kvm_bindings::kvm_lapic_state = (*klapic).clone().into();
1593         self.fd
1594             .lock()
1595             .unwrap()
1596             .set_lapic(&klapic)
1597             .map_err(|e| cpu::HypervisorCpuError::SetLapicState(e.into()))
1598     }
1599 
1600     #[cfg(target_arch = "x86_64")]
1601     ///
1602     /// Returns the model-specific registers (MSR) for this vCPU.
1603     ///
1604     fn get_msrs(&self, msrs: &mut Vec<MsrEntry>) -> cpu::Result<usize> {
1605         let kvm_msrs: Vec<kvm_msr_entry> = msrs.iter().map(|e| (*e).into()).collect();
1606         let mut kvm_msrs = MsrEntries::from_entries(&kvm_msrs).unwrap();
1607         let succ = self
1608             .fd
1609             .lock()
1610             .unwrap()
1611             .get_msrs(&mut kvm_msrs)
1612             .map_err(|e| cpu::HypervisorCpuError::GetMsrEntries(e.into()))?;
1613 
1614         msrs[..succ].copy_from_slice(
1615             &kvm_msrs.as_slice()[..succ]
1616                 .iter()
1617                 .map(|e| (*e).into())
1618                 .collect::<Vec<MsrEntry>>(),
1619         );
1620 
1621         Ok(succ)
1622     }
1623 
1624     #[cfg(target_arch = "x86_64")]
1625     ///
1626     /// Setup the model-specific registers (MSR) for this vCPU.
1627     /// Returns the number of MSR entries actually written.
1628     ///
1629     fn set_msrs(&self, msrs: &[MsrEntry]) -> cpu::Result<usize> {
1630         let kvm_msrs: Vec<kvm_msr_entry> = msrs.iter().map(|e| (*e).into()).collect();
1631         let kvm_msrs = MsrEntries::from_entries(&kvm_msrs).unwrap();
1632         self.fd
1633             .lock()
1634             .unwrap()
1635             .set_msrs(&kvm_msrs)
1636             .map_err(|e| cpu::HypervisorCpuError::SetMsrEntries(e.into()))
1637     }
1638 
1639     ///
1640     /// Returns the vcpu's current "multiprocessing state".
1641     ///
1642     fn get_mp_state(&self) -> cpu::Result<MpState> {
1643         Ok(self
1644             .fd
1645             .lock()
1646             .unwrap()
1647             .get_mp_state()
1648             .map_err(|e| cpu::HypervisorCpuError::GetMpState(e.into()))?
1649             .into())
1650     }
1651 
1652     ///
1653     /// Sets the vcpu's current "multiprocessing state".
1654     ///
1655     fn set_mp_state(&self, mp_state: MpState) -> cpu::Result<()> {
1656         self.fd
1657             .lock()
1658             .unwrap()
1659             .set_mp_state(mp_state.into())
1660             .map_err(|e| cpu::HypervisorCpuError::SetMpState(e.into()))
1661     }
1662 
1663     #[cfg(target_arch = "x86_64")]
1664     ///
1665     /// Translates guest virtual address to guest physical address using the `KVM_TRANSLATE` ioctl.
1666     ///
1667     fn translate_gva(&self, gva: u64, _flags: u64) -> cpu::Result<(u64, u32)> {
1668         let tr = self
1669             .fd
1670             .lock()
1671             .unwrap()
1672             .translate_gva(gva)
1673             .map_err(|e| cpu::HypervisorCpuError::TranslateVirtualAddress(e.into()))?;
1674         // tr.valid is set if the GVA is mapped to valid GPA.
1675         match tr.valid {
1676             0 => Err(cpu::HypervisorCpuError::TranslateVirtualAddress(anyhow!(
1677                 "Invalid GVA: {:#x}",
1678                 gva
1679             ))),
1680             _ => Ok((tr.physical_address, 0)),
1681         }
1682     }
1683 
1684     ///
1685     /// Triggers the running of the current virtual CPU returning an exit reason.
1686     ///
1687     fn run(&self) -> std::result::Result<cpu::VmExit, cpu::HypervisorCpuError> {
1688         match self.fd.lock().unwrap().run() {
1689             Ok(run) => match run {
1690                 #[cfg(target_arch = "x86_64")]
1691                 VcpuExit::IoIn(addr, data) => {
1692                     if let Some(vm_ops) = &self.vm_ops {
1693                         return vm_ops
1694                             .pio_read(addr.into(), data)
1695                             .map(|_| cpu::VmExit::Ignore)
1696                             .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()));
1697                     }
1698 
1699                     Ok(cpu::VmExit::Ignore)
1700                 }
1701                 #[cfg(target_arch = "x86_64")]
1702                 VcpuExit::IoOut(addr, data) => {
1703                     if let Some(vm_ops) = &self.vm_ops {
1704                         return vm_ops
1705                             .pio_write(addr.into(), data)
1706                             .map(|_| cpu::VmExit::Ignore)
1707                             .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()));
1708                     }
1709 
1710                     Ok(cpu::VmExit::Ignore)
1711                 }
1712                 #[cfg(target_arch = "x86_64")]
1713                 VcpuExit::IoapicEoi(vector) => Ok(cpu::VmExit::IoapicEoi(vector)),
1714                 #[cfg(target_arch = "x86_64")]
1715                 VcpuExit::Shutdown | VcpuExit::Hlt => Ok(cpu::VmExit::Reset),
1716 
1717                 #[cfg(target_arch = "aarch64")]
1718                 VcpuExit::SystemEvent(event_type, flags) => {
1719                     use kvm_bindings::{KVM_SYSTEM_EVENT_RESET, KVM_SYSTEM_EVENT_SHUTDOWN};
1720                     // On Aarch64, when the VM is shutdown, run() returns
1721                     // VcpuExit::SystemEvent with reason KVM_SYSTEM_EVENT_SHUTDOWN
1722                     if event_type == KVM_SYSTEM_EVENT_RESET {
1723                         Ok(cpu::VmExit::Reset)
1724                     } else if event_type == KVM_SYSTEM_EVENT_SHUTDOWN {
1725                         Ok(cpu::VmExit::Shutdown)
1726                     } else {
1727                         Err(cpu::HypervisorCpuError::RunVcpu(anyhow!(
1728                             "Unexpected system event with type 0x{:x}, flags 0x{:x?}",
1729                             event_type,
1730                             flags
1731                         )))
1732                     }
1733                 }
1734 
1735                 VcpuExit::MmioRead(addr, data) => {
1736                     if let Some(vm_ops) = &self.vm_ops {
1737                         return vm_ops
1738                             .mmio_read(addr, data)
1739                             .map(|_| cpu::VmExit::Ignore)
1740                             .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()));
1741                     }
1742 
1743                     Ok(cpu::VmExit::Ignore)
1744                 }
1745                 VcpuExit::MmioWrite(addr, data) => {
1746                     if let Some(vm_ops) = &self.vm_ops {
1747                         return vm_ops
1748                             .mmio_write(addr, data)
1749                             .map(|_| cpu::VmExit::Ignore)
1750                             .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()));
1751                     }
1752 
1753                     Ok(cpu::VmExit::Ignore)
1754                 }
1755                 VcpuExit::Hyperv => Ok(cpu::VmExit::Hyperv),
1756                 #[cfg(feature = "tdx")]
1757                 VcpuExit::Unsupported(KVM_EXIT_TDX) => Ok(cpu::VmExit::Tdx),
1758                 VcpuExit::Debug(_) => Ok(cpu::VmExit::Debug),
1759 
1760                 r => Err(cpu::HypervisorCpuError::RunVcpu(anyhow!(
1761                     "Unexpected exit reason on vcpu run: {:?}",
1762                     r
1763                 ))),
1764             },
1765 
1766             Err(ref e) => match e.errno() {
1767                 libc::EAGAIN | libc::EINTR => Ok(cpu::VmExit::Ignore),
1768                 _ => Err(cpu::HypervisorCpuError::RunVcpu(anyhow!(
1769                     "VCPU error {:?}",
1770                     e
1771                 ))),
1772             },
1773         }
1774     }
1775 
1776     #[cfg(target_arch = "x86_64")]
1777     ///
1778     /// Let the guest know that it has been paused, which prevents from
1779     /// potential soft lockups when being resumed.
1780     ///
1781     fn notify_guest_clock_paused(&self) -> cpu::Result<()> {
1782         if let Err(e) = self.fd.lock().unwrap().kvmclock_ctrl() {
1783             // Linux kernel returns -EINVAL if the PV clock isn't yet initialised
1784             // which could be because we're still in firmware or the guest doesn't
1785             // use KVM clock.
1786             if e.errno() != libc::EINVAL {
1787                 return Err(cpu::HypervisorCpuError::NotifyGuestClockPaused(e.into()));
1788             }
1789         }
1790 
1791         Ok(())
1792     }
1793 
1794     ///
1795     /// Sets debug registers to set hardware breakpoints and/or enable single step.
1796     ///
1797     fn set_guest_debug(
1798         &self,
1799         addrs: &[vm_memory::GuestAddress],
1800         singlestep: bool,
1801     ) -> cpu::Result<()> {
1802         let mut dbg = kvm_guest_debug {
1803             #[cfg(target_arch = "x86_64")]
1804             control: KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP,
1805             #[cfg(target_arch = "aarch64")]
1806             control: KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW,
1807             ..Default::default()
1808         };
1809         if singlestep {
1810             dbg.control |= KVM_GUESTDBG_SINGLESTEP;
1811         }
1812 
1813         // Set the debug registers.
1814         // Here we assume that the number of addresses do not exceed what
1815         // `Hypervisor::get_guest_debug_hw_bps()` specifies.
1816         #[cfg(target_arch = "x86_64")]
1817         {
1818             // Set bits 9 and 10.
1819             // bit 9: GE (global exact breakpoint enable) flag.
1820             // bit 10: always 1.
1821             dbg.arch.debugreg[7] = 0x0600;
1822 
1823             for (i, addr) in addrs.iter().enumerate() {
1824                 dbg.arch.debugreg[i] = addr.0;
1825                 // Set global breakpoint enable flag
1826                 dbg.arch.debugreg[7] |= 2 << (i * 2);
1827             }
1828         }
1829         #[cfg(target_arch = "aarch64")]
1830         {
1831             for (i, addr) in addrs.iter().enumerate() {
1832                 // DBGBCR_EL1 (Debug Breakpoint Control Registers, D13.3.2):
1833                 // bit 0: 1 (Enabled)
1834                 // bit 1~2: 0b11 (PMC = EL1/EL0)
1835                 // bit 5~8: 0b1111 (BAS = AArch64)
1836                 // others: 0
1837                 dbg.arch.dbg_bcr[i] = 0b1u64 | 0b110u64 | 0b1_1110_0000u64;
1838                 // DBGBVR_EL1 (Debug Breakpoint Value Registers, D13.3.3):
1839                 // bit 2~52: VA[2:52]
1840                 dbg.arch.dbg_bvr[i] = (!0u64 >> 11) & addr.0;
1841             }
1842         }
1843         self.fd
1844             .lock()
1845             .unwrap()
1846             .set_guest_debug(&dbg)
1847             .map_err(|e| cpu::HypervisorCpuError::SetDebugRegs(e.into()))
1848     }
1849 
1850     #[cfg(target_arch = "aarch64")]
1851     fn vcpu_init(&self, kvi: &VcpuInit) -> cpu::Result<()> {
1852         self.fd
1853             .lock()
1854             .unwrap()
1855             .vcpu_init(kvi)
1856             .map_err(|e| cpu::HypervisorCpuError::VcpuInit(e.into()))
1857     }
1858 
1859     ///
1860     /// Gets a list of the guest registers that are supported for the
1861     /// KVM_GET_ONE_REG/KVM_SET_ONE_REG calls.
1862     ///
1863     #[cfg(target_arch = "aarch64")]
1864     fn get_reg_list(&self, reg_list: &mut RegList) -> cpu::Result<()> {
1865         self.fd
1866             .lock()
1867             .unwrap()
1868             .get_reg_list(reg_list)
1869             .map_err(|e| cpu::HypervisorCpuError::GetRegList(e.into()))
1870     }
1871 
1872     ///
1873     /// Gets the value of a system register
1874     ///
1875     #[cfg(target_arch = "aarch64")]
1876     fn get_sys_reg(&self, sys_reg: u32) -> cpu::Result<u64> {
1877         //
1878         // Arm Architecture Reference Manual defines the encoding of
1879         // AArch64 system registers, see
1880         // https://developer.arm.com/documentation/ddi0487 (chapter D12).
1881         // While KVM defines another ID for each AArch64 system register,
1882         // which is used in calling `KVM_G/SET_ONE_REG` to access a system
1883         // register of a guest.
1884         // A mapping exists between the Arm standard encoding and the KVM ID.
1885         // This function takes the standard u32 ID as input parameter, converts
1886         // it to the corresponding KVM ID, and call `KVM_GET_ONE_REG` API to
1887         // get the value of the system parameter.
1888         //
1889         let id: u64 = KVM_REG_ARM64
1890             | KVM_REG_SIZE_U64
1891             | KVM_REG_ARM64_SYSREG as u64
1892             | ((((sys_reg) >> 5)
1893                 & (KVM_REG_ARM64_SYSREG_OP0_MASK
1894                     | KVM_REG_ARM64_SYSREG_OP1_MASK
1895                     | KVM_REG_ARM64_SYSREG_CRN_MASK
1896                     | KVM_REG_ARM64_SYSREG_CRM_MASK
1897                     | KVM_REG_ARM64_SYSREG_OP2_MASK)) as u64);
1898         let mut bytes = [0_u8; 8];
1899         self.fd
1900             .lock()
1901             .unwrap()
1902             .get_one_reg(id, &mut bytes)
1903             .map_err(|e| cpu::HypervisorCpuError::GetSysRegister(e.into()))?;
1904         Ok(u64::from_le_bytes(bytes))
1905     }
1906 
1907     ///
1908     /// Configure core registers for a given CPU.
1909     ///
1910     #[cfg(target_arch = "aarch64")]
1911     fn setup_regs(&self, cpu_id: u8, boot_ip: u64, fdt_start: u64) -> cpu::Result<()> {
1912         #[allow(non_upper_case_globals)]
1913         // PSR (Processor State Register) bits.
1914         // Taken from arch/arm64/include/uapi/asm/ptrace.h.
1915         const PSR_MODE_EL1h: u64 = 0x0000_0005;
1916         const PSR_F_BIT: u64 = 0x0000_0040;
1917         const PSR_I_BIT: u64 = 0x0000_0080;
1918         const PSR_A_BIT: u64 = 0x0000_0100;
1919         const PSR_D_BIT: u64 = 0x0000_0200;
1920         // Taken from arch/arm64/kvm/inject_fault.c.
1921         const PSTATE_FAULT_BITS_64: u64 =
1922             PSR_MODE_EL1h | PSR_A_BIT | PSR_F_BIT | PSR_I_BIT | PSR_D_BIT;
1923 
1924         let kreg_off = offset_of!(kvm_regs, regs);
1925 
1926         // Get the register index of the PSTATE (Processor State) register.
1927         let pstate = offset_of!(user_pt_regs, pstate) + kreg_off;
1928         self.fd
1929             .lock()
1930             .unwrap()
1931             .set_one_reg(
1932                 arm64_core_reg_id!(KVM_REG_SIZE_U64, pstate),
1933                 &PSTATE_FAULT_BITS_64.to_le_bytes(),
1934             )
1935             .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?;
1936 
1937         // Other vCPUs are powered off initially awaiting PSCI wakeup.
1938         if cpu_id == 0 {
1939             // Setting the PC (Processor Counter) to the current program address (kernel address).
1940             let pc = offset_of!(user_pt_regs, pc) + kreg_off;
1941             self.fd
1942                 .lock()
1943                 .unwrap()
1944                 .set_one_reg(
1945                     arm64_core_reg_id!(KVM_REG_SIZE_U64, pc),
1946                     &boot_ip.to_le_bytes(),
1947                 )
1948                 .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?;
1949 
1950             // Last mandatory thing to set -> the address pointing to the FDT (also called DTB).
1951             // "The device tree blob (dtb) must be placed on an 8-byte boundary and must
1952             // not exceed 2 megabytes in size." -> https://www.kernel.org/doc/Documentation/arm64/booting.txt.
1953             // We are choosing to place it the end of DRAM. See `get_fdt_addr`.
1954             let regs0 = offset_of!(user_pt_regs, regs) + kreg_off;
1955             self.fd
1956                 .lock()
1957                 .unwrap()
1958                 .set_one_reg(
1959                     arm64_core_reg_id!(KVM_REG_SIZE_U64, regs0),
1960                     &fdt_start.to_le_bytes(),
1961                 )
1962                 .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?;
1963         }
1964         Ok(())
1965     }
1966 
1967     #[cfg(target_arch = "x86_64")]
1968     ///
1969     /// Get the current CPU state
1970     ///
1971     /// Ordering requirements:
1972     ///
1973     /// KVM_GET_MP_STATE calls kvm_apic_accept_events(), which might modify
1974     /// vCPU/LAPIC state. As such, it must be done before most everything
1975     /// else, otherwise we cannot restore everything and expect it to work.
1976     ///
1977     /// KVM_GET_VCPU_EVENTS/KVM_SET_VCPU_EVENTS is unsafe if other vCPUs are
1978     /// still running.
1979     ///
1980     /// KVM_GET_LAPIC may change state of LAPIC before returning it.
1981     ///
1982     /// GET_VCPU_EVENTS should probably be last to save. The code looks as
1983     /// it might as well be affected by internal state modifications of the
1984     /// GET ioctls.
1985     ///
1986     /// SREGS saves/restores a pending interrupt, similar to what
1987     /// VCPU_EVENTS also does.
1988     ///
1989     /// GET_MSRS requires a pre-populated data structure to do something
1990     /// meaningful. For SET_MSRS it will then contain good data.
1991     ///
1992     /// # Example
1993     ///
1994     /// ```rust
1995     /// # use hypervisor::kvm::KvmHypervisor;
1996     /// # use std::sync::Arc;
1997     /// let kvm = KvmHypervisor::new().unwrap();
1998     /// let hv = Arc::new(kvm);
1999     /// let vm = hv.create_vm().expect("new VM fd creation failed");
2000     /// vm.enable_split_irq().unwrap();
2001     /// let vcpu = vm.create_vcpu(0, None).unwrap();
2002     /// let state = vcpu.state().unwrap();
2003     /// ```
2004     fn state(&self) -> cpu::Result<CpuState> {
2005         let cpuid = self.get_cpuid2(kvm_bindings::KVM_MAX_CPUID_ENTRIES)?;
2006         let mp_state = self.get_mp_state()?.into();
2007         let regs = self.get_regs()?;
2008         let sregs = self.get_sregs()?;
2009         let xsave = self.get_xsave()?;
2010         let xcrs = self.get_xcrs()?;
2011         let lapic_state = self.get_lapic()?;
2012         let fpu = self.get_fpu()?;
2013 
2014         // Try to get all MSRs based on the list previously retrieved from KVM.
2015         // If the number of MSRs obtained from GET_MSRS is different from the
2016         // expected amount, we fallback onto a slower method by getting MSRs
2017         // by chunks. This is the only way to make sure we try to get as many
2018         // MSRs as possible, even if some MSRs are not supported.
2019         let mut msr_entries = self.msrs.clone();
2020 
2021         // Save extra MSRs if the Hyper-V synthetic interrupt controller is
2022         // emulated.
2023         if self.hyperv_synic.load(Ordering::Acquire) {
2024             let hyperv_synic_msrs = vec![
2025                 0x40000020, 0x40000021, 0x40000080, 0x40000081, 0x40000082, 0x40000083, 0x40000084,
2026                 0x40000090, 0x40000091, 0x40000092, 0x40000093, 0x40000094, 0x40000095, 0x40000096,
2027                 0x40000097, 0x40000098, 0x40000099, 0x4000009a, 0x4000009b, 0x4000009c, 0x4000009d,
2028                 0x4000009e, 0x4000009f, 0x400000b0, 0x400000b1, 0x400000b2, 0x400000b3, 0x400000b4,
2029                 0x400000b5, 0x400000b6, 0x400000b7,
2030             ];
2031             for index in hyperv_synic_msrs {
2032                 let msr = kvm_msr_entry {
2033                     index,
2034                     ..Default::default()
2035                 };
2036                 msr_entries.push(msr.into());
2037             }
2038         }
2039 
2040         let expected_num_msrs = msr_entries.len();
2041         let num_msrs = self.get_msrs(&mut msr_entries)?;
2042         let msrs = if num_msrs != expected_num_msrs {
2043             let mut faulty_msr_index = num_msrs;
2044             let mut msr_entries_tmp = msr_entries[..faulty_msr_index].to_vec();
2045 
2046             loop {
2047                 warn!(
2048                     "Detected faulty MSR 0x{:x} while getting MSRs",
2049                     msr_entries[faulty_msr_index].index
2050                 );
2051 
2052                 // Skip the first bad MSR
2053                 let start_pos = faulty_msr_index + 1;
2054 
2055                 let mut sub_msr_entries = msr_entries[start_pos..].to_vec();
2056                 let num_msrs = self.get_msrs(&mut sub_msr_entries)?;
2057 
2058                 msr_entries_tmp.extend(&sub_msr_entries[..num_msrs]);
2059 
2060                 if num_msrs == sub_msr_entries.len() {
2061                     break;
2062                 }
2063 
2064                 faulty_msr_index = start_pos + num_msrs;
2065             }
2066 
2067             msr_entries_tmp
2068         } else {
2069             msr_entries
2070         };
2071 
2072         let vcpu_events = self.get_vcpu_events()?;
2073         let tsc_khz = self.tsc_khz()?;
2074 
2075         Ok(VcpuKvmState {
2076             cpuid,
2077             msrs,
2078             vcpu_events,
2079             regs: regs.into(),
2080             sregs: sregs.into(),
2081             fpu,
2082             lapic_state,
2083             xsave,
2084             xcrs,
2085             mp_state,
2086             tsc_khz,
2087         }
2088         .into())
2089     }
2090 
2091     ///
2092     /// Get the current AArch64 CPU state
2093     ///
2094     #[cfg(target_arch = "aarch64")]
2095     fn state(&self) -> cpu::Result<CpuState> {
2096         let mut state = VcpuKvmState {
2097             mp_state: self.get_mp_state()?.into(),
2098             ..Default::default()
2099         };
2100         // Get core registers
2101         state.core_regs = self.get_regs()?;
2102 
2103         // Get systerm register
2104         // Call KVM_GET_REG_LIST to get all registers available to the guest.
2105         // For ArmV8 there are around 500 registers.
2106         let mut sys_regs: Vec<Register> = Vec::new();
2107         let mut reg_list = RegList::new(500).unwrap();
2108         self.fd
2109             .lock()
2110             .unwrap()
2111             .get_reg_list(&mut reg_list)
2112             .map_err(|e| cpu::HypervisorCpuError::GetRegList(e.into()))?;
2113 
2114         // At this point reg_list should contain: core registers and system
2115         // registers.
2116         // The register list contains the number of registers and their ids. We
2117         // will be needing to call KVM_GET_ONE_REG on each id in order to save
2118         // all of them. We carve out from the list  the core registers which are
2119         // represented in the kernel by kvm_regs structure and for which we can
2120         // calculate the id based on the offset in the structure.
2121         reg_list.retain(|regid| is_system_register(*regid));
2122 
2123         // Now, for the rest of the registers left in the previously fetched
2124         // register list, we are simply calling KVM_GET_ONE_REG.
2125         let indices = reg_list.as_slice();
2126         for index in indices.iter() {
2127             let mut bytes = [0_u8; 8];
2128             self.fd
2129                 .lock()
2130                 .unwrap()
2131                 .get_one_reg(*index, &mut bytes)
2132                 .map_err(|e| cpu::HypervisorCpuError::GetSysRegister(e.into()))?;
2133             sys_regs.push(kvm_bindings::kvm_one_reg {
2134                 id: *index,
2135                 addr: u64::from_le_bytes(bytes),
2136             });
2137         }
2138 
2139         state.sys_regs = sys_regs;
2140 
2141         Ok(state.into())
2142     }
2143 
2144     #[cfg(target_arch = "x86_64")]
2145     ///
2146     /// Restore the previously saved CPU state
2147     ///
2148     /// Ordering requirements:
2149     ///
2150     /// KVM_GET_VCPU_EVENTS/KVM_SET_VCPU_EVENTS is unsafe if other vCPUs are
2151     /// still running.
2152     ///
2153     /// Some SET ioctls (like set_mp_state) depend on kvm_vcpu_is_bsp(), so
2154     /// if we ever change the BSP, we have to do that before restoring anything.
2155     /// The same seems to be true for CPUID stuff.
2156     ///
2157     /// SREGS saves/restores a pending interrupt, similar to what
2158     /// VCPU_EVENTS also does.
2159     ///
2160     /// SET_REGS clears pending exceptions unconditionally, thus, it must be
2161     /// done before SET_VCPU_EVENTS, which restores it.
2162     ///
2163     /// SET_LAPIC must come after SET_SREGS, because the latter restores
2164     /// the apic base msr.
2165     ///
2166     /// SET_LAPIC must come before SET_MSRS, because the TSC deadline MSR
2167     /// only restores successfully, when the LAPIC is correctly configured.
2168     ///
2169     /// Arguments: CpuState
2170     /// # Example
2171     ///
2172     /// ```rust
2173     /// # use hypervisor::kvm::KvmHypervisor;
2174     /// # use std::sync::Arc;
2175     /// let kvm = KvmHypervisor::new().unwrap();
2176     /// let hv = Arc::new(kvm);
2177     /// let vm = hv.create_vm().expect("new VM fd creation failed");
2178     /// vm.enable_split_irq().unwrap();
2179     /// let vcpu = vm.create_vcpu(0, None).unwrap();
2180     /// let state = vcpu.state().unwrap();
2181     /// vcpu.set_state(&state).unwrap();
2182     /// ```
2183     fn set_state(&self, state: &CpuState) -> cpu::Result<()> {
2184         let state: VcpuKvmState = state.clone().into();
2185         self.set_cpuid2(&state.cpuid)?;
2186         self.set_mp_state(state.mp_state.into())?;
2187         self.set_regs(&state.regs.into())?;
2188         self.set_sregs(&state.sregs.into())?;
2189         self.set_xsave(&state.xsave)?;
2190         self.set_xcrs(&state.xcrs)?;
2191         self.set_lapic(&state.lapic_state)?;
2192         self.set_fpu(&state.fpu)?;
2193 
2194         if let Some(freq) = state.tsc_khz {
2195             self.set_tsc_khz(freq)?;
2196         }
2197 
2198         // Try to set all MSRs previously stored.
2199         // If the number of MSRs set from SET_MSRS is different from the
2200         // expected amount, we fallback onto a slower method by setting MSRs
2201         // by chunks. This is the only way to make sure we try to set as many
2202         // MSRs as possible, even if some MSRs are not supported.
2203         let expected_num_msrs = state.msrs.len();
2204         let num_msrs = self.set_msrs(&state.msrs)?;
2205         if num_msrs != expected_num_msrs {
2206             let mut faulty_msr_index = num_msrs;
2207 
2208             loop {
2209                 warn!(
2210                     "Detected faulty MSR 0x{:x} while setting MSRs",
2211                     state.msrs[faulty_msr_index].index
2212                 );
2213 
2214                 // Skip the first bad MSR
2215                 let start_pos = faulty_msr_index + 1;
2216 
2217                 let sub_msr_entries = state.msrs[start_pos..].to_vec();
2218 
2219                 let num_msrs = self.set_msrs(&sub_msr_entries)?;
2220 
2221                 if num_msrs == sub_msr_entries.len() {
2222                     break;
2223                 }
2224 
2225                 faulty_msr_index = start_pos + num_msrs;
2226             }
2227         }
2228 
2229         self.set_vcpu_events(&state.vcpu_events)?;
2230 
2231         Ok(())
2232     }
2233 
2234     ///
2235     /// Restore the previously saved AArch64 CPU state
2236     ///
2237     #[cfg(target_arch = "aarch64")]
2238     fn set_state(&self, state: &CpuState) -> cpu::Result<()> {
2239         let state: VcpuKvmState = state.clone().into();
2240         // Set core registers
2241         self.set_regs(&state.core_regs)?;
2242         // Set system registers
2243         for reg in &state.sys_regs {
2244             self.fd
2245                 .lock()
2246                 .unwrap()
2247                 .set_one_reg(reg.id, &reg.addr.to_le_bytes())
2248                 .map_err(|e| cpu::HypervisorCpuError::SetSysRegister(e.into()))?;
2249         }
2250 
2251         self.set_mp_state(state.mp_state.into())?;
2252 
2253         Ok(())
2254     }
2255 
2256     ///
2257     /// Initialize TDX for this CPU
2258     ///
2259     #[cfg(feature = "tdx")]
2260     fn tdx_init(&self, hob_address: u64) -> cpu::Result<()> {
2261         tdx_command(
2262             &self.fd.lock().unwrap().as_raw_fd(),
2263             TdxCommand::InitVcpu,
2264             0,
2265             hob_address,
2266         )
2267         .map_err(cpu::HypervisorCpuError::InitializeTdx)
2268     }
2269 
2270     ///
2271     /// Set the "immediate_exit" state
2272     ///
2273     fn set_immediate_exit(&self, exit: bool) {
2274         self.fd.lock().unwrap().set_kvm_immediate_exit(exit.into());
2275     }
2276 
2277     ///
2278     /// Returns the details about TDX exit reason
2279     ///
2280     #[cfg(feature = "tdx")]
2281     fn get_tdx_exit_details(&mut self) -> cpu::Result<TdxExitDetails> {
2282         let mut fd = self.fd.as_ref().lock().unwrap();
2283         let kvm_run = fd.get_kvm_run();
2284         // SAFETY: accessing a union field in a valid structure
2285         let tdx_vmcall = unsafe {
2286             &mut (*((&mut kvm_run.__bindgen_anon_1) as *mut kvm_run__bindgen_ty_1
2287                 as *mut KvmTdxExit))
2288                 .u
2289                 .vmcall
2290         };
2291 
2292         tdx_vmcall.status_code = TDG_VP_VMCALL_INVALID_OPERAND;
2293 
2294         if tdx_vmcall.type_ != 0 {
2295             return Err(cpu::HypervisorCpuError::UnknownTdxVmCall);
2296         }
2297 
2298         match tdx_vmcall.subfunction {
2299             TDG_VP_VMCALL_GET_QUOTE => Ok(TdxExitDetails::GetQuote),
2300             TDG_VP_VMCALL_SETUP_EVENT_NOTIFY_INTERRUPT => {
2301                 Ok(TdxExitDetails::SetupEventNotifyInterrupt)
2302             }
2303             _ => Err(cpu::HypervisorCpuError::UnknownTdxVmCall),
2304         }
2305     }
2306 
2307     ///
2308     /// Set the status code for TDX exit
2309     ///
2310     #[cfg(feature = "tdx")]
2311     fn set_tdx_status(&mut self, status: TdxExitStatus) {
2312         let mut fd = self.fd.as_ref().lock().unwrap();
2313         let kvm_run = fd.get_kvm_run();
2314         // SAFETY: accessing a union field in a valid structure
2315         let tdx_vmcall = unsafe {
2316             &mut (*((&mut kvm_run.__bindgen_anon_1) as *mut kvm_run__bindgen_ty_1
2317                 as *mut KvmTdxExit))
2318                 .u
2319                 .vmcall
2320         };
2321 
2322         tdx_vmcall.status_code = match status {
2323             TdxExitStatus::Success => TDG_VP_VMCALL_SUCCESS,
2324             TdxExitStatus::InvalidOperand => TDG_VP_VMCALL_INVALID_OPERAND,
2325         };
2326     }
2327 
2328     #[cfg(target_arch = "x86_64")]
2329     ///
2330     /// Return the list of initial MSR entries for a VCPU
2331     ///
2332     fn boot_msr_entries(&self) -> Vec<MsrEntry> {
2333         use crate::arch::x86::{msr_index, MTRR_ENABLE, MTRR_MEM_TYPE_WB};
2334 
2335         [
2336             msr!(msr_index::MSR_IA32_SYSENTER_CS),
2337             msr!(msr_index::MSR_IA32_SYSENTER_ESP),
2338             msr!(msr_index::MSR_IA32_SYSENTER_EIP),
2339             msr!(msr_index::MSR_STAR),
2340             msr!(msr_index::MSR_CSTAR),
2341             msr!(msr_index::MSR_LSTAR),
2342             msr!(msr_index::MSR_KERNEL_GS_BASE),
2343             msr!(msr_index::MSR_SYSCALL_MASK),
2344             msr!(msr_index::MSR_IA32_TSC),
2345             msr_data!(
2346                 msr_index::MSR_IA32_MISC_ENABLE,
2347                 msr_index::MSR_IA32_MISC_ENABLE_FAST_STRING as u64
2348             ),
2349             msr_data!(msr_index::MSR_MTRRdefType, MTRR_ENABLE | MTRR_MEM_TYPE_WB),
2350         ]
2351         .to_vec()
2352     }
2353 
2354     #[cfg(target_arch = "aarch64")]
2355     fn has_pmu_support(&self) -> bool {
2356         let cpu_attr = kvm_bindings::kvm_device_attr {
2357             group: kvm_bindings::KVM_ARM_VCPU_PMU_V3_CTRL,
2358             attr: u64::from(kvm_bindings::KVM_ARM_VCPU_PMU_V3_INIT),
2359             addr: 0x0,
2360             flags: 0,
2361         };
2362         self.fd.lock().unwrap().has_device_attr(&cpu_attr).is_ok()
2363     }
2364 
2365     #[cfg(target_arch = "aarch64")]
2366     fn init_pmu(&self, irq: u32) -> cpu::Result<()> {
2367         let cpu_attr = kvm_bindings::kvm_device_attr {
2368             group: kvm_bindings::KVM_ARM_VCPU_PMU_V3_CTRL,
2369             attr: u64::from(kvm_bindings::KVM_ARM_VCPU_PMU_V3_INIT),
2370             addr: 0x0,
2371             flags: 0,
2372         };
2373         let cpu_attr_irq = kvm_bindings::kvm_device_attr {
2374             group: kvm_bindings::KVM_ARM_VCPU_PMU_V3_CTRL,
2375             attr: u64::from(kvm_bindings::KVM_ARM_VCPU_PMU_V3_IRQ),
2376             addr: &irq as *const u32 as u64,
2377             flags: 0,
2378         };
2379         self.fd
2380             .lock()
2381             .unwrap()
2382             .set_device_attr(&cpu_attr_irq)
2383             .map_err(|_| cpu::HypervisorCpuError::InitializePmu)?;
2384         self.fd
2385             .lock()
2386             .unwrap()
2387             .set_device_attr(&cpu_attr)
2388             .map_err(|_| cpu::HypervisorCpuError::InitializePmu)
2389     }
2390 
2391     #[cfg(target_arch = "x86_64")]
2392     ///
2393     /// Get the frequency of the TSC if available
2394     ///
2395     fn tsc_khz(&self) -> cpu::Result<Option<u32>> {
2396         match self.fd.lock().unwrap().get_tsc_khz() {
2397             Err(e) => {
2398                 if e.errno() == libc::EIO {
2399                     Ok(None)
2400                 } else {
2401                     Err(cpu::HypervisorCpuError::GetTscKhz(e.into()))
2402                 }
2403             }
2404             Ok(v) => Ok(Some(v)),
2405         }
2406     }
2407 
2408     #[cfg(target_arch = "x86_64")]
2409     ///
2410     /// Set the frequency of the TSC if available
2411     ///
2412     fn set_tsc_khz(&self, freq: u32) -> cpu::Result<()> {
2413         match self.fd.lock().unwrap().set_tsc_khz(freq) {
2414             Err(e) => {
2415                 if e.errno() == libc::EIO {
2416                     Ok(())
2417                 } else {
2418                     Err(cpu::HypervisorCpuError::SetTscKhz(e.into()))
2419                 }
2420             }
2421             Ok(_) => Ok(()),
2422         }
2423     }
2424 
2425     #[cfg(target_arch = "x86_64")]
2426     ///
2427     /// Trigger NMI interrupt
2428     ///
2429     fn nmi(&self) -> cpu::Result<()> {
2430         match self.fd.lock().unwrap().nmi() {
2431             Err(e) => {
2432                 if e.errno() == libc::EIO {
2433                     Ok(())
2434                 } else {
2435                     Err(cpu::HypervisorCpuError::Nmi(e.into()))
2436                 }
2437             }
2438             Ok(_) => Ok(()),
2439         }
2440     }
2441 }
2442 
2443 impl KvmVcpu {
2444     #[cfg(target_arch = "x86_64")]
2445     ///
2446     /// X86 specific call that returns the vcpu's current "xsave struct".
2447     ///
2448     fn get_xsave(&self) -> cpu::Result<XsaveState> {
2449         Ok(self
2450             .fd
2451             .lock()
2452             .unwrap()
2453             .get_xsave()
2454             .map_err(|e| cpu::HypervisorCpuError::GetXsaveState(e.into()))?
2455             .into())
2456     }
2457 
2458     #[cfg(target_arch = "x86_64")]
2459     ///
2460     /// X86 specific call that sets the vcpu's current "xsave struct".
2461     ///
2462     fn set_xsave(&self, xsave: &XsaveState) -> cpu::Result<()> {
2463         let xsave: kvm_bindings::kvm_xsave = (*xsave).clone().into();
2464         self.fd
2465             .lock()
2466             .unwrap()
2467             .set_xsave(&xsave)
2468             .map_err(|e| cpu::HypervisorCpuError::SetXsaveState(e.into()))
2469     }
2470 
2471     #[cfg(target_arch = "x86_64")]
2472     ///
2473     /// X86 specific call that returns the vcpu's current "xcrs".
2474     ///
2475     fn get_xcrs(&self) -> cpu::Result<ExtendedControlRegisters> {
2476         self.fd
2477             .lock()
2478             .unwrap()
2479             .get_xcrs()
2480             .map_err(|e| cpu::HypervisorCpuError::GetXcsr(e.into()))
2481     }
2482 
2483     #[cfg(target_arch = "x86_64")]
2484     ///
2485     /// X86 specific call that sets the vcpu's current "xcrs".
2486     ///
2487     fn set_xcrs(&self, xcrs: &ExtendedControlRegisters) -> cpu::Result<()> {
2488         self.fd
2489             .lock()
2490             .unwrap()
2491             .set_xcrs(xcrs)
2492             .map_err(|e| cpu::HypervisorCpuError::SetXcsr(e.into()))
2493     }
2494 
2495     #[cfg(target_arch = "x86_64")]
2496     ///
2497     /// Returns currently pending exceptions, interrupts, and NMIs as well as related
2498     /// states of the vcpu.
2499     ///
2500     fn get_vcpu_events(&self) -> cpu::Result<VcpuEvents> {
2501         self.fd
2502             .lock()
2503             .unwrap()
2504             .get_vcpu_events()
2505             .map_err(|e| cpu::HypervisorCpuError::GetVcpuEvents(e.into()))
2506     }
2507 
2508     #[cfg(target_arch = "x86_64")]
2509     ///
2510     /// Sets pending exceptions, interrupts, and NMIs as well as related states
2511     /// of the vcpu.
2512     ///
2513     fn set_vcpu_events(&self, events: &VcpuEvents) -> cpu::Result<()> {
2514         self.fd
2515             .lock()
2516             .unwrap()
2517             .set_vcpu_events(events)
2518             .map_err(|e| cpu::HypervisorCpuError::SetVcpuEvents(e.into()))
2519     }
2520 }
2521