xref: /cloud-hypervisor/hypervisor/src/kvm/mod.rs (revision fa7a000dbe9637eb256af18ae8c3c4a8d5bf9c8f)
1 // Copyright © 2019 Intel Corporation
2 //
3 // SPDX-License-Identifier: Apache-2.0 OR BSD-3-Clause
4 //
5 // Copyright © 2020, Microsoft Corporation
6 //
7 // Copyright 2018-2019 CrowdStrike, Inc.
8 //
9 //
10 
11 #[cfg(target_arch = "aarch64")]
12 use crate::aarch64::gic::KvmGicV3Its;
13 #[cfg(target_arch = "aarch64")]
14 pub use crate::aarch64::{
15     check_required_kvm_extensions, gic::Gicv3ItsState as GicState, is_system_register, VcpuInit,
16     VcpuKvmState,
17 };
18 #[cfg(target_arch = "aarch64")]
19 use crate::arch::aarch64::gic::{Vgic, VgicConfig};
20 use crate::cpu;
21 use crate::hypervisor;
22 use crate::vec_with_array_field;
23 use crate::vm::{self, InterruptSourceConfig, VmOps};
24 use crate::HypervisorType;
25 #[cfg(target_arch = "aarch64")]
26 use crate::{arm64_core_reg_id, offset_of};
27 use kvm_ioctls::{NoDatamatch, VcpuFd, VmFd};
28 use std::any::Any;
29 use std::collections::HashMap;
30 #[cfg(target_arch = "x86_64")]
31 use std::fs::File;
32 #[cfg(target_arch = "x86_64")]
33 use std::os::unix::io::AsRawFd;
34 #[cfg(feature = "tdx")]
35 use std::os::unix::io::RawFd;
36 use std::result;
37 #[cfg(target_arch = "x86_64")]
38 use std::sync::atomic::{AtomicBool, Ordering};
39 #[cfg(target_arch = "aarch64")]
40 use std::sync::Mutex;
41 use std::sync::{Arc, RwLock};
42 use vmm_sys_util::eventfd::EventFd;
43 // x86_64 dependencies
44 #[cfg(target_arch = "x86_64")]
45 pub mod x86_64;
46 #[cfg(target_arch = "x86_64")]
47 use crate::arch::x86::{
48     CpuIdEntry, FpuState, LapicState, MsrEntry, SpecialRegisters, StandardRegisters, XsaveState,
49     NUM_IOAPIC_PINS,
50 };
51 #[cfg(target_arch = "x86_64")]
52 use crate::ClockData;
53 use crate::{
54     CpuState, IoEventAddress, IrqRoutingEntry, MpState, UserMemoryRegion,
55     USER_MEMORY_REGION_LOG_DIRTY, USER_MEMORY_REGION_READ, USER_MEMORY_REGION_WRITE,
56 };
57 #[cfg(target_arch = "aarch64")]
58 use aarch64::{RegList, Register, StandardRegisters};
59 #[cfg(target_arch = "x86_64")]
60 use kvm_bindings::{
61     kvm_enable_cap, kvm_msr_entry, MsrList, KVM_CAP_HYPERV_SYNIC, KVM_CAP_SPLIT_IRQCHIP,
62     KVM_GUESTDBG_USE_HW_BP,
63 };
64 #[cfg(target_arch = "x86_64")]
65 use x86_64::check_required_kvm_extensions;
66 #[cfg(target_arch = "x86_64")]
67 pub use x86_64::{CpuId, ExtendedControlRegisters, MsrEntries, VcpuKvmState};
68 // aarch64 dependencies
69 #[cfg(target_arch = "aarch64")]
70 pub mod aarch64;
71 pub use kvm_bindings;
72 pub use kvm_bindings::{
73     kvm_clock_data, kvm_create_device, kvm_device_type_KVM_DEV_TYPE_VFIO, kvm_guest_debug,
74     kvm_irq_routing, kvm_irq_routing_entry, kvm_mp_state, kvm_userspace_memory_region,
75     KVM_GUESTDBG_ENABLE, KVM_GUESTDBG_SINGLESTEP, KVM_IRQ_ROUTING_IRQCHIP, KVM_IRQ_ROUTING_MSI,
76     KVM_MEM_LOG_DIRTY_PAGES, KVM_MEM_READONLY, KVM_MSI_VALID_DEVID,
77 };
78 #[cfg(target_arch = "aarch64")]
79 use kvm_bindings::{
80     kvm_regs, user_fpsimd_state, user_pt_regs, KVM_GUESTDBG_USE_HW, KVM_NR_SPSR, KVM_REG_ARM64,
81     KVM_REG_ARM64_SYSREG, KVM_REG_ARM64_SYSREG_CRM_MASK, KVM_REG_ARM64_SYSREG_CRN_MASK,
82     KVM_REG_ARM64_SYSREG_OP0_MASK, KVM_REG_ARM64_SYSREG_OP1_MASK, KVM_REG_ARM64_SYSREG_OP2_MASK,
83     KVM_REG_ARM_CORE, KVM_REG_SIZE_U128, KVM_REG_SIZE_U32, KVM_REG_SIZE_U64,
84 };
85 #[cfg(feature = "tdx")]
86 use kvm_bindings::{kvm_run__bindgen_ty_1, KVMIO};
87 pub use kvm_ioctls;
88 pub use kvm_ioctls::{Cap, Kvm};
89 #[cfg(target_arch = "aarch64")]
90 use std::mem;
91 use thiserror::Error;
92 use vfio_ioctls::VfioDeviceFd;
93 #[cfg(feature = "tdx")]
94 use vmm_sys_util::{ioctl::ioctl_with_val, ioctl_ioc_nr, ioctl_iowr_nr};
95 ///
96 /// Export generically-named wrappers of kvm-bindings for Unix-based platforms
97 ///
98 pub use {
99     kvm_bindings::kvm_create_device as CreateDevice, kvm_bindings::kvm_device_attr as DeviceAttr,
100     kvm_bindings::kvm_run, kvm_bindings::kvm_vcpu_events as VcpuEvents, kvm_ioctls::VcpuExit,
101 };
102 
103 #[cfg(target_arch = "x86_64")]
104 const KVM_CAP_SGX_ATTRIBUTE: u32 = 196;
105 
106 #[cfg(target_arch = "x86_64")]
107 use vmm_sys_util::ioctl_io_nr;
108 
109 #[cfg(all(not(feature = "tdx"), target_arch = "x86_64"))]
110 use vmm_sys_util::ioctl_ioc_nr;
111 
112 #[cfg(target_arch = "x86_64")]
113 ioctl_io_nr!(KVM_NMI, kvm_bindings::KVMIO, 0x9a);
114 
115 #[cfg(feature = "tdx")]
116 const KVM_EXIT_TDX: u32 = 50;
117 #[cfg(feature = "tdx")]
118 const TDG_VP_VMCALL_GET_QUOTE: u64 = 0x10002;
119 #[cfg(feature = "tdx")]
120 const TDG_VP_VMCALL_SETUP_EVENT_NOTIFY_INTERRUPT: u64 = 0x10004;
121 #[cfg(feature = "tdx")]
122 const TDG_VP_VMCALL_SUCCESS: u64 = 0;
123 #[cfg(feature = "tdx")]
124 const TDG_VP_VMCALL_INVALID_OPERAND: u64 = 0x8000000000000000;
125 
126 #[cfg(feature = "tdx")]
127 ioctl_iowr_nr!(KVM_MEMORY_ENCRYPT_OP, KVMIO, 0xba, std::os::raw::c_ulong);
128 
129 #[cfg(feature = "tdx")]
130 #[repr(u32)]
131 enum TdxCommand {
132     Capabilities = 0,
133     InitVm,
134     InitVcpu,
135     InitMemRegion,
136     Finalize,
137 }
138 
139 #[cfg(feature = "tdx")]
140 pub enum TdxExitDetails {
141     GetQuote,
142     SetupEventNotifyInterrupt,
143 }
144 
145 #[cfg(feature = "tdx")]
146 pub enum TdxExitStatus {
147     Success,
148     InvalidOperand,
149 }
150 
151 #[cfg(feature = "tdx")]
152 const TDX_MAX_NR_CPUID_CONFIGS: usize = 6;
153 
154 #[cfg(feature = "tdx")]
155 #[repr(C)]
156 #[derive(Debug, Default)]
157 pub struct TdxCpuidConfig {
158     pub leaf: u32,
159     pub sub_leaf: u32,
160     pub eax: u32,
161     pub ebx: u32,
162     pub ecx: u32,
163     pub edx: u32,
164 }
165 
166 #[cfg(feature = "tdx")]
167 #[repr(C)]
168 #[derive(Debug, Default)]
169 pub struct TdxCapabilities {
170     pub attrs_fixed0: u64,
171     pub attrs_fixed1: u64,
172     pub xfam_fixed0: u64,
173     pub xfam_fixed1: u64,
174     pub nr_cpuid_configs: u32,
175     pub padding: u32,
176     pub cpuid_configs: [TdxCpuidConfig; TDX_MAX_NR_CPUID_CONFIGS],
177 }
178 
179 #[cfg(feature = "tdx")]
180 #[derive(Copy, Clone)]
181 pub struct KvmTdxExit {
182     pub type_: u32,
183     pub pad: u32,
184     pub u: KvmTdxExitU,
185 }
186 
187 #[cfg(feature = "tdx")]
188 #[repr(C)]
189 #[derive(Copy, Clone)]
190 pub union KvmTdxExitU {
191     pub vmcall: KvmTdxExitVmcall,
192 }
193 
194 #[cfg(feature = "tdx")]
195 #[repr(C)]
196 #[derive(Debug, Default, Copy, Clone, PartialEq)]
197 pub struct KvmTdxExitVmcall {
198     pub type_: u64,
199     pub subfunction: u64,
200     pub reg_mask: u64,
201     pub in_r12: u64,
202     pub in_r13: u64,
203     pub in_r14: u64,
204     pub in_r15: u64,
205     pub in_rbx: u64,
206     pub in_rdi: u64,
207     pub in_rsi: u64,
208     pub in_r8: u64,
209     pub in_r9: u64,
210     pub in_rdx: u64,
211     pub status_code: u64,
212     pub out_r11: u64,
213     pub out_r12: u64,
214     pub out_r13: u64,
215     pub out_r14: u64,
216     pub out_r15: u64,
217     pub out_rbx: u64,
218     pub out_rdi: u64,
219     pub out_rsi: u64,
220     pub out_r8: u64,
221     pub out_r9: u64,
222     pub out_rdx: u64,
223 }
224 
225 impl From<kvm_userspace_memory_region> for UserMemoryRegion {
226     fn from(region: kvm_userspace_memory_region) -> Self {
227         let mut flags = USER_MEMORY_REGION_READ;
228         if region.flags & KVM_MEM_READONLY == 0 {
229             flags |= USER_MEMORY_REGION_WRITE;
230         }
231         if region.flags & KVM_MEM_LOG_DIRTY_PAGES != 0 {
232             flags |= USER_MEMORY_REGION_LOG_DIRTY;
233         }
234 
235         UserMemoryRegion {
236             slot: region.slot,
237             guest_phys_addr: region.guest_phys_addr,
238             memory_size: region.memory_size,
239             userspace_addr: region.userspace_addr,
240             flags,
241         }
242     }
243 }
244 
245 impl From<UserMemoryRegion> for kvm_userspace_memory_region {
246     fn from(region: UserMemoryRegion) -> Self {
247         assert!(
248             region.flags & USER_MEMORY_REGION_READ != 0,
249             "KVM mapped memory is always readable"
250         );
251 
252         let mut flags = 0;
253         if region.flags & USER_MEMORY_REGION_WRITE == 0 {
254             flags |= KVM_MEM_READONLY;
255         }
256         if region.flags & USER_MEMORY_REGION_LOG_DIRTY != 0 {
257             flags |= KVM_MEM_LOG_DIRTY_PAGES;
258         }
259 
260         kvm_userspace_memory_region {
261             slot: region.slot,
262             guest_phys_addr: region.guest_phys_addr,
263             memory_size: region.memory_size,
264             userspace_addr: region.userspace_addr,
265             flags,
266         }
267     }
268 }
269 
270 impl From<kvm_mp_state> for MpState {
271     fn from(s: kvm_mp_state) -> Self {
272         MpState::Kvm(s)
273     }
274 }
275 
276 impl From<MpState> for kvm_mp_state {
277     fn from(ms: MpState) -> Self {
278         match ms {
279             MpState::Kvm(s) => s,
280             /* Needed in case other hypervisors are enabled */
281             #[allow(unreachable_patterns)]
282             _ => panic!("CpuState is not valid"),
283         }
284     }
285 }
286 
287 impl From<kvm_ioctls::IoEventAddress> for IoEventAddress {
288     fn from(a: kvm_ioctls::IoEventAddress) -> Self {
289         match a {
290             kvm_ioctls::IoEventAddress::Pio(x) => Self::Pio(x),
291             kvm_ioctls::IoEventAddress::Mmio(x) => Self::Mmio(x),
292         }
293     }
294 }
295 
296 impl From<IoEventAddress> for kvm_ioctls::IoEventAddress {
297     fn from(a: IoEventAddress) -> Self {
298         match a {
299             IoEventAddress::Pio(x) => Self::Pio(x),
300             IoEventAddress::Mmio(x) => Self::Mmio(x),
301         }
302     }
303 }
304 
305 impl From<VcpuKvmState> for CpuState {
306     fn from(s: VcpuKvmState) -> Self {
307         CpuState::Kvm(s)
308     }
309 }
310 
311 impl From<CpuState> for VcpuKvmState {
312     fn from(s: CpuState) -> Self {
313         match s {
314             CpuState::Kvm(s) => s,
315             /* Needed in case other hypervisors are enabled */
316             #[allow(unreachable_patterns)]
317             _ => panic!("CpuState is not valid"),
318         }
319     }
320 }
321 
322 #[cfg(target_arch = "x86_64")]
323 impl From<kvm_clock_data> for ClockData {
324     fn from(d: kvm_clock_data) -> Self {
325         ClockData::Kvm(d)
326     }
327 }
328 
329 #[cfg(target_arch = "x86_64")]
330 impl From<ClockData> for kvm_clock_data {
331     fn from(ms: ClockData) -> Self {
332         match ms {
333             ClockData::Kvm(s) => s,
334             /* Needed in case other hypervisors are enabled */
335             #[allow(unreachable_patterns)]
336             _ => panic!("CpuState is not valid"),
337         }
338     }
339 }
340 
341 impl From<kvm_irq_routing_entry> for IrqRoutingEntry {
342     fn from(s: kvm_irq_routing_entry) -> Self {
343         IrqRoutingEntry::Kvm(s)
344     }
345 }
346 
347 impl From<IrqRoutingEntry> for kvm_irq_routing_entry {
348     fn from(e: IrqRoutingEntry) -> Self {
349         match e {
350             IrqRoutingEntry::Kvm(e) => e,
351             /* Needed in case other hypervisors are enabled */
352             #[allow(unreachable_patterns)]
353             _ => panic!("IrqRoutingEntry is not valid"),
354         }
355     }
356 }
357 
358 struct KvmDirtyLogSlot {
359     slot: u32,
360     guest_phys_addr: u64,
361     memory_size: u64,
362     userspace_addr: u64,
363 }
364 
365 /// Wrapper over KVM VM ioctls.
366 pub struct KvmVm {
367     fd: Arc<VmFd>,
368     #[cfg(target_arch = "x86_64")]
369     msrs: Vec<MsrEntry>,
370     dirty_log_slots: Arc<RwLock<HashMap<u32, KvmDirtyLogSlot>>>,
371 }
372 
373 impl KvmVm {
374     ///
375     /// Creates an emulated device in the kernel.
376     ///
377     /// See the documentation for `KVM_CREATE_DEVICE`.
378     fn create_device(&self, device: &mut CreateDevice) -> vm::Result<vfio_ioctls::VfioDeviceFd> {
379         let device_fd = self
380             .fd
381             .create_device(device)
382             .map_err(|e| vm::HypervisorVmError::CreateDevice(e.into()))?;
383         Ok(VfioDeviceFd::new_from_kvm(device_fd))
384     }
385     /// Checks if a particular `Cap` is available.
386     pub fn check_extension(&self, c: Cap) -> bool {
387         self.fd.check_extension(c)
388     }
389 }
390 
391 /// Implementation of Vm trait for KVM
392 ///
393 /// # Examples
394 ///
395 /// ```
396 /// # use hypervisor::kvm::KvmHypervisor;
397 /// # use std::sync::Arc;
398 /// let kvm = KvmHypervisor::new().unwrap();
399 /// let hypervisor = Arc::new(kvm);
400 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed");
401 /// ```
402 impl vm::Vm for KvmVm {
403     #[cfg(target_arch = "x86_64")]
404     ///
405     /// Sets the address of the one-page region in the VM's address space.
406     ///
407     fn set_identity_map_address(&self, address: u64) -> vm::Result<()> {
408         self.fd
409             .set_identity_map_address(address)
410             .map_err(|e| vm::HypervisorVmError::SetIdentityMapAddress(e.into()))
411     }
412 
413     #[cfg(target_arch = "x86_64")]
414     ///
415     /// Sets the address of the three-page region in the VM's address space.
416     ///
417     fn set_tss_address(&self, offset: usize) -> vm::Result<()> {
418         self.fd
419             .set_tss_address(offset)
420             .map_err(|e| vm::HypervisorVmError::SetTssAddress(e.into()))
421     }
422 
423     ///
424     /// Creates an in-kernel interrupt controller.
425     ///
426     fn create_irq_chip(&self) -> vm::Result<()> {
427         self.fd
428             .create_irq_chip()
429             .map_err(|e| vm::HypervisorVmError::CreateIrq(e.into()))
430     }
431 
432     ///
433     /// Registers an event that will, when signaled, trigger the `gsi` IRQ.
434     ///
435     fn register_irqfd(&self, fd: &EventFd, gsi: u32) -> vm::Result<()> {
436         self.fd
437             .register_irqfd(fd, gsi)
438             .map_err(|e| vm::HypervisorVmError::RegisterIrqFd(e.into()))
439     }
440 
441     ///
442     /// Unregisters an event that will, when signaled, trigger the `gsi` IRQ.
443     ///
444     fn unregister_irqfd(&self, fd: &EventFd, gsi: u32) -> vm::Result<()> {
445         self.fd
446             .unregister_irqfd(fd, gsi)
447             .map_err(|e| vm::HypervisorVmError::UnregisterIrqFd(e.into()))
448     }
449 
450     ///
451     /// Creates a VcpuFd object from a vcpu RawFd.
452     ///
453     fn create_vcpu(
454         &self,
455         id: u8,
456         vm_ops: Option<Arc<dyn VmOps>>,
457     ) -> vm::Result<Arc<dyn cpu::Vcpu>> {
458         let vc = self
459             .fd
460             .create_vcpu(id as u64)
461             .map_err(|e| vm::HypervisorVmError::CreateVcpu(e.into()))?;
462         let vcpu = KvmVcpu {
463             fd: vc,
464             #[cfg(target_arch = "x86_64")]
465             msrs: self.msrs.clone(),
466             vm_ops,
467             #[cfg(target_arch = "x86_64")]
468             hyperv_synic: AtomicBool::new(false),
469         };
470         Ok(Arc::new(vcpu))
471     }
472 
473     #[cfg(target_arch = "aarch64")]
474     ///
475     /// Creates a virtual GIC device.
476     ///
477     fn create_vgic(&self, config: VgicConfig) -> vm::Result<Arc<Mutex<dyn Vgic>>> {
478         let gic_device = KvmGicV3Its::new(self, config)
479             .map_err(|e| vm::HypervisorVmError::CreateVgic(anyhow!("Vgic error {:?}", e)))?;
480         Ok(Arc::new(Mutex::new(gic_device)))
481     }
482 
483     ///
484     /// Registers an event to be signaled whenever a certain address is written to.
485     ///
486     fn register_ioevent(
487         &self,
488         fd: &EventFd,
489         addr: &IoEventAddress,
490         datamatch: Option<vm::DataMatch>,
491     ) -> vm::Result<()> {
492         let addr = &kvm_ioctls::IoEventAddress::from(*addr);
493         if let Some(dm) = datamatch {
494             match dm {
495                 vm::DataMatch::DataMatch32(kvm_dm32) => self
496                     .fd
497                     .register_ioevent(fd, addr, kvm_dm32)
498                     .map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into())),
499                 vm::DataMatch::DataMatch64(kvm_dm64) => self
500                     .fd
501                     .register_ioevent(fd, addr, kvm_dm64)
502                     .map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into())),
503             }
504         } else {
505             self.fd
506                 .register_ioevent(fd, addr, NoDatamatch)
507                 .map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into()))
508         }
509     }
510 
511     ///
512     /// Unregisters an event from a certain address it has been previously registered to.
513     ///
514     fn unregister_ioevent(&self, fd: &EventFd, addr: &IoEventAddress) -> vm::Result<()> {
515         let addr = &kvm_ioctls::IoEventAddress::from(*addr);
516         self.fd
517             .unregister_ioevent(fd, addr, NoDatamatch)
518             .map_err(|e| vm::HypervisorVmError::UnregisterIoEvent(e.into()))
519     }
520 
521     ///
522     /// Constructs a routing entry
523     ///
524     fn make_routing_entry(&self, gsi: u32, config: &InterruptSourceConfig) -> IrqRoutingEntry {
525         match &config {
526             InterruptSourceConfig::MsiIrq(cfg) => {
527                 let mut kvm_route = kvm_irq_routing_entry {
528                     gsi,
529                     type_: KVM_IRQ_ROUTING_MSI,
530                     ..Default::default()
531                 };
532 
533                 kvm_route.u.msi.address_lo = cfg.low_addr;
534                 kvm_route.u.msi.address_hi = cfg.high_addr;
535                 kvm_route.u.msi.data = cfg.data;
536 
537                 if self.check_extension(crate::kvm::Cap::MsiDevid) {
538                     // On AArch64, there is limitation on the range of the 'devid',
539                     // it can not be greater than 65536 (the max of u16).
540                     //
541                     // BDF can not be used directly, because 'segment' is in high
542                     // 16 bits. The layout of the u32 BDF is:
543                     // |---- 16 bits ----|-- 8 bits --|-- 5 bits --|-- 3 bits --|
544                     // |      segment    |     bus    |   device   |  function  |
545                     //
546                     // Now that we support 1 bus only in a segment, we can build a
547                     // 'devid' by replacing the 'bus' bits with the low 8 bits of
548                     // 'segment' data.
549                     // This way we can resolve the range checking problem and give
550                     // different `devid` to all the devices. Limitation is that at
551                     // most 256 segments can be supported.
552                     //
553                     let modified_devid = (cfg.devid & 0x00ff_0000) >> 8 | cfg.devid & 0xff;
554 
555                     kvm_route.flags = KVM_MSI_VALID_DEVID;
556                     kvm_route.u.msi.__bindgen_anon_1.devid = modified_devid;
557                 }
558                 kvm_route.into()
559             }
560             InterruptSourceConfig::LegacyIrq(cfg) => {
561                 let mut kvm_route = kvm_irq_routing_entry {
562                     gsi,
563                     type_: KVM_IRQ_ROUTING_IRQCHIP,
564                     ..Default::default()
565                 };
566                 kvm_route.u.irqchip.irqchip = cfg.irqchip;
567                 kvm_route.u.irqchip.pin = cfg.pin;
568 
569                 kvm_route.into()
570             }
571         }
572     }
573 
574     ///
575     /// Sets the GSI routing table entries, overwriting any previously set
576     /// entries, as per the `KVM_SET_GSI_ROUTING` ioctl.
577     ///
578     fn set_gsi_routing(&self, entries: &[IrqRoutingEntry]) -> vm::Result<()> {
579         let mut irq_routing =
580             vec_with_array_field::<kvm_irq_routing, kvm_irq_routing_entry>(entries.len());
581         irq_routing[0].nr = entries.len() as u32;
582         irq_routing[0].flags = 0;
583         let entries: Vec<kvm_irq_routing_entry> = entries
584             .iter()
585             .map(|entry| match entry {
586                 IrqRoutingEntry::Kvm(e) => *e,
587                 #[allow(unreachable_patterns)]
588                 _ => panic!("IrqRoutingEntry type is wrong"),
589             })
590             .collect();
591 
592         // SAFETY: irq_routing initialized with entries.len() and now it is being turned into
593         // entries_slice with entries.len() again. It is guaranteed to be large enough to hold
594         // everything from entries.
595         unsafe {
596             let entries_slice: &mut [kvm_irq_routing_entry] =
597                 irq_routing[0].entries.as_mut_slice(entries.len());
598             entries_slice.copy_from_slice(&entries);
599         }
600 
601         self.fd
602             .set_gsi_routing(&irq_routing[0])
603             .map_err(|e| vm::HypervisorVmError::SetGsiRouting(e.into()))
604     }
605 
606     ///
607     /// Creates a memory region structure that can be used with {create/remove}_user_memory_region
608     ///
609     fn make_user_memory_region(
610         &self,
611         slot: u32,
612         guest_phys_addr: u64,
613         memory_size: u64,
614         userspace_addr: u64,
615         readonly: bool,
616         log_dirty_pages: bool,
617     ) -> UserMemoryRegion {
618         kvm_userspace_memory_region {
619             slot,
620             guest_phys_addr,
621             memory_size,
622             userspace_addr,
623             flags: if readonly { KVM_MEM_READONLY } else { 0 }
624                 | if log_dirty_pages {
625                     KVM_MEM_LOG_DIRTY_PAGES
626                 } else {
627                     0
628                 },
629         }
630         .into()
631     }
632 
633     ///
634     /// Creates a guest physical memory region.
635     ///
636     fn create_user_memory_region(&self, user_memory_region: UserMemoryRegion) -> vm::Result<()> {
637         let mut region: kvm_userspace_memory_region = user_memory_region.into();
638 
639         if (region.flags & KVM_MEM_LOG_DIRTY_PAGES) != 0 {
640             if (region.flags & KVM_MEM_READONLY) != 0 {
641                 return Err(vm::HypervisorVmError::CreateUserMemory(anyhow!(
642                     "Error creating regions with both 'dirty-pages-log' and 'read-only'."
643                 )));
644             }
645 
646             // Keep track of the regions that need dirty pages log
647             self.dirty_log_slots.write().unwrap().insert(
648                 region.slot,
649                 KvmDirtyLogSlot {
650                     slot: region.slot,
651                     guest_phys_addr: region.guest_phys_addr,
652                     memory_size: region.memory_size,
653                     userspace_addr: region.userspace_addr,
654                 },
655             );
656 
657             // Always create guest physical memory region without `KVM_MEM_LOG_DIRTY_PAGES`.
658             // For regions that need this flag, dirty pages log will be turned on in `start_dirty_log`.
659             region.flags = 0;
660         }
661 
662         // SAFETY: Safe because guest regions are guaranteed not to overlap.
663         unsafe {
664             self.fd
665                 .set_user_memory_region(region)
666                 .map_err(|e| vm::HypervisorVmError::CreateUserMemory(e.into()))
667         }
668     }
669 
670     ///
671     /// Removes a guest physical memory region.
672     ///
673     fn remove_user_memory_region(&self, user_memory_region: UserMemoryRegion) -> vm::Result<()> {
674         let mut region: kvm_userspace_memory_region = user_memory_region.into();
675 
676         // Remove the corresponding entry from "self.dirty_log_slots" if needed
677         self.dirty_log_slots.write().unwrap().remove(&region.slot);
678 
679         // Setting the size to 0 means "remove"
680         region.memory_size = 0;
681         // SAFETY: Safe because guest regions are guaranteed not to overlap.
682         unsafe {
683             self.fd
684                 .set_user_memory_region(region)
685                 .map_err(|e| vm::HypervisorVmError::RemoveUserMemory(e.into()))
686         }
687     }
688 
689     ///
690     /// Returns the preferred CPU target type which can be emulated by KVM on underlying host.
691     ///
692     #[cfg(target_arch = "aarch64")]
693     fn get_preferred_target(&self, kvi: &mut VcpuInit) -> vm::Result<()> {
694         self.fd
695             .get_preferred_target(kvi)
696             .map_err(|e| vm::HypervisorVmError::GetPreferredTarget(e.into()))
697     }
698 
699     #[cfg(target_arch = "x86_64")]
700     fn enable_split_irq(&self) -> vm::Result<()> {
701         // Create split irqchip
702         // Only the local APIC is emulated in kernel, both PICs and IOAPIC
703         // are not.
704         let mut cap = kvm_enable_cap {
705             cap: KVM_CAP_SPLIT_IRQCHIP,
706             ..Default::default()
707         };
708         cap.args[0] = NUM_IOAPIC_PINS as u64;
709         self.fd
710             .enable_cap(&cap)
711             .map_err(|e| vm::HypervisorVmError::EnableSplitIrq(e.into()))?;
712         Ok(())
713     }
714 
715     #[cfg(target_arch = "x86_64")]
716     fn enable_sgx_attribute(&self, file: File) -> vm::Result<()> {
717         let mut cap = kvm_enable_cap {
718             cap: KVM_CAP_SGX_ATTRIBUTE,
719             ..Default::default()
720         };
721         cap.args[0] = file.as_raw_fd() as u64;
722         self.fd
723             .enable_cap(&cap)
724             .map_err(|e| vm::HypervisorVmError::EnableSgxAttribute(e.into()))?;
725         Ok(())
726     }
727 
728     /// Retrieve guest clock.
729     #[cfg(target_arch = "x86_64")]
730     fn get_clock(&self) -> vm::Result<ClockData> {
731         Ok(self
732             .fd
733             .get_clock()
734             .map_err(|e| vm::HypervisorVmError::GetClock(e.into()))?
735             .into())
736     }
737 
738     /// Set guest clock.
739     #[cfg(target_arch = "x86_64")]
740     fn set_clock(&self, data: &ClockData) -> vm::Result<()> {
741         let data = (*data).into();
742         self.fd
743             .set_clock(&data)
744             .map_err(|e| vm::HypervisorVmError::SetClock(e.into()))
745     }
746 
747     /// Create a device that is used for passthrough
748     fn create_passthrough_device(&self) -> vm::Result<VfioDeviceFd> {
749         let mut vfio_dev = kvm_create_device {
750             type_: kvm_device_type_KVM_DEV_TYPE_VFIO,
751             fd: 0,
752             flags: 0,
753         };
754 
755         self.create_device(&mut vfio_dev)
756             .map_err(|e| vm::HypervisorVmError::CreatePassthroughDevice(e.into()))
757     }
758 
759     ///
760     /// Start logging dirty pages
761     ///
762     fn start_dirty_log(&self) -> vm::Result<()> {
763         let dirty_log_slots = self.dirty_log_slots.read().unwrap();
764         for (_, s) in dirty_log_slots.iter() {
765             let region = kvm_userspace_memory_region {
766                 slot: s.slot,
767                 guest_phys_addr: s.guest_phys_addr,
768                 memory_size: s.memory_size,
769                 userspace_addr: s.userspace_addr,
770                 flags: KVM_MEM_LOG_DIRTY_PAGES,
771             };
772             // SAFETY: Safe because guest regions are guaranteed not to overlap.
773             unsafe {
774                 self.fd
775                     .set_user_memory_region(region)
776                     .map_err(|e| vm::HypervisorVmError::StartDirtyLog(e.into()))?;
777             }
778         }
779 
780         Ok(())
781     }
782 
783     ///
784     /// Stop logging dirty pages
785     ///
786     fn stop_dirty_log(&self) -> vm::Result<()> {
787         let dirty_log_slots = self.dirty_log_slots.read().unwrap();
788         for (_, s) in dirty_log_slots.iter() {
789             let region = kvm_userspace_memory_region {
790                 slot: s.slot,
791                 guest_phys_addr: s.guest_phys_addr,
792                 memory_size: s.memory_size,
793                 userspace_addr: s.userspace_addr,
794                 flags: 0,
795             };
796             // SAFETY: Safe because guest regions are guaranteed not to overlap.
797             unsafe {
798                 self.fd
799                     .set_user_memory_region(region)
800                     .map_err(|e| vm::HypervisorVmError::StartDirtyLog(e.into()))?;
801             }
802         }
803 
804         Ok(())
805     }
806 
807     ///
808     /// Get dirty pages bitmap (one bit per page)
809     ///
810     fn get_dirty_log(&self, slot: u32, _base_gpa: u64, memory_size: u64) -> vm::Result<Vec<u64>> {
811         self.fd
812             .get_dirty_log(slot, memory_size as usize)
813             .map_err(|e| vm::HypervisorVmError::GetDirtyLog(e.into()))
814     }
815 
816     ///
817     /// Initialize TDX for this VM
818     ///
819     #[cfg(feature = "tdx")]
820     fn tdx_init(&self, cpuid: &[CpuIdEntry], max_vcpus: u32) -> vm::Result<()> {
821         const TDX_ATTR_SEPT_VE_DISABLE: usize = 28;
822 
823         let mut cpuid: Vec<kvm_bindings::kvm_cpuid_entry2> =
824             cpuid.iter().map(|e| (*e).into()).collect();
825         cpuid.resize(256, kvm_bindings::kvm_cpuid_entry2::default());
826 
827         #[repr(C)]
828         struct TdxInitVm {
829             attributes: u64,
830             max_vcpus: u32,
831             padding: u32,
832             mrconfigid: [u64; 6],
833             mrowner: [u64; 6],
834             mrownerconfig: [u64; 6],
835             cpuid_nent: u32,
836             cpuid_padding: u32,
837             cpuid_entries: [kvm_bindings::kvm_cpuid_entry2; 256],
838         }
839         let data = TdxInitVm {
840             attributes: 1 << TDX_ATTR_SEPT_VE_DISABLE,
841             max_vcpus,
842             padding: 0,
843             mrconfigid: [0; 6],
844             mrowner: [0; 6],
845             mrownerconfig: [0; 6],
846             cpuid_nent: cpuid.len() as u32,
847             cpuid_padding: 0,
848             cpuid_entries: cpuid.as_slice().try_into().unwrap(),
849         };
850 
851         tdx_command(
852             &self.fd.as_raw_fd(),
853             TdxCommand::InitVm,
854             0,
855             &data as *const _ as u64,
856         )
857         .map_err(vm::HypervisorVmError::InitializeTdx)
858     }
859 
860     ///
861     /// Finalize the TDX setup for this VM
862     ///
863     #[cfg(feature = "tdx")]
864     fn tdx_finalize(&self) -> vm::Result<()> {
865         tdx_command(&self.fd.as_raw_fd(), TdxCommand::Finalize, 0, 0)
866             .map_err(vm::HypervisorVmError::FinalizeTdx)
867     }
868 
869     ///
870     /// Initialize memory regions for the TDX VM
871     ///
872     #[cfg(feature = "tdx")]
873     fn tdx_init_memory_region(
874         &self,
875         host_address: u64,
876         guest_address: u64,
877         size: u64,
878         measure: bool,
879     ) -> vm::Result<()> {
880         #[repr(C)]
881         struct TdxInitMemRegion {
882             host_address: u64,
883             guest_address: u64,
884             pages: u64,
885         }
886         let data = TdxInitMemRegion {
887             host_address,
888             guest_address,
889             pages: size / 4096,
890         };
891 
892         tdx_command(
893             &self.fd.as_raw_fd(),
894             TdxCommand::InitMemRegion,
895             u32::from(measure),
896             &data as *const _ as u64,
897         )
898         .map_err(vm::HypervisorVmError::InitMemRegionTdx)
899     }
900 
901     /// Downcast to the underlying KvmVm type
902     fn as_any(&self) -> &dyn Any {
903         self
904     }
905 }
906 
907 #[cfg(feature = "tdx")]
908 fn tdx_command(
909     fd: &RawFd,
910     command: TdxCommand,
911     flags: u32,
912     data: u64,
913 ) -> std::result::Result<(), std::io::Error> {
914     #[repr(C)]
915     struct TdxIoctlCmd {
916         command: TdxCommand,
917         flags: u32,
918         data: u64,
919         error: u64,
920         unused: u64,
921     }
922     let cmd = TdxIoctlCmd {
923         command,
924         flags,
925         data,
926         error: 0,
927         unused: 0,
928     };
929     // SAFETY: FFI call. All input parameters are valid.
930     let ret = unsafe {
931         ioctl_with_val(
932             fd,
933             KVM_MEMORY_ENCRYPT_OP(),
934             &cmd as *const TdxIoctlCmd as std::os::raw::c_ulong,
935         )
936     };
937 
938     if ret < 0 {
939         return Err(std::io::Error::last_os_error());
940     }
941     Ok(())
942 }
943 
944 /// Wrapper over KVM system ioctls.
945 pub struct KvmHypervisor {
946     kvm: Kvm,
947 }
948 
949 impl KvmHypervisor {
950     #[cfg(target_arch = "x86_64")]
951     ///
952     /// Retrieve the list of MSRs supported by the hypervisor.
953     ///
954     fn get_msr_list(&self) -> hypervisor::Result<MsrList> {
955         self.kvm
956             .get_msr_index_list()
957             .map_err(|e| hypervisor::HypervisorError::GetMsrList(e.into()))
958     }
959 }
960 
961 /// Enum for KVM related error
962 #[derive(Debug, Error)]
963 pub enum KvmError {
964     #[error("Capability missing: {0:?}")]
965     CapabilityMissing(Cap),
966 }
967 
968 pub type KvmResult<T> = result::Result<T, KvmError>;
969 
970 impl KvmHypervisor {
971     /// Create a hypervisor based on Kvm
972     #[allow(clippy::new_ret_no_self)]
973     pub fn new() -> hypervisor::Result<Arc<dyn hypervisor::Hypervisor>> {
974         let kvm_obj = Kvm::new().map_err(|e| hypervisor::HypervisorError::VmCreate(e.into()))?;
975         let api_version = kvm_obj.get_api_version();
976 
977         if api_version != kvm_bindings::KVM_API_VERSION as i32 {
978             return Err(hypervisor::HypervisorError::IncompatibleApiVersion);
979         }
980 
981         Ok(Arc::new(KvmHypervisor { kvm: kvm_obj }))
982     }
983 
984     /// Check if the hypervisor is available
985     pub fn is_available() -> hypervisor::Result<bool> {
986         match std::fs::metadata("/dev/kvm") {
987             Ok(_) => Ok(true),
988             Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(false),
989             Err(err) => Err(hypervisor::HypervisorError::HypervisorAvailableCheck(
990                 err.into(),
991             )),
992         }
993     }
994 }
995 
996 /// Implementation of Hypervisor trait for KVM
997 ///
998 /// # Examples
999 ///
1000 /// ```
1001 /// # use hypervisor::kvm::KvmHypervisor;
1002 /// # use std::sync::Arc;
1003 /// let kvm = KvmHypervisor::new().unwrap();
1004 /// let hypervisor = Arc::new(kvm);
1005 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed");
1006 /// ```
1007 impl hypervisor::Hypervisor for KvmHypervisor {
1008     ///
1009     /// Returns the type of the hypervisor
1010     ///
1011     fn hypervisor_type(&self) -> HypervisorType {
1012         HypervisorType::Kvm
1013     }
1014 
1015     /// Create a KVM vm object of a specific VM type and return the object as Vm trait object
1016     ///
1017     /// # Examples
1018     ///
1019     /// ```
1020     /// # use hypervisor::kvm::KvmHypervisor;
1021     /// use hypervisor::kvm::KvmVm;
1022     /// let hypervisor = KvmHypervisor::new().unwrap();
1023     /// let vm = hypervisor.create_vm_with_type(0).unwrap();
1024     /// ```
1025     fn create_vm_with_type(&self, vm_type: u64) -> hypervisor::Result<Arc<dyn vm::Vm>> {
1026         let fd: VmFd;
1027         loop {
1028             match self.kvm.create_vm_with_type(vm_type) {
1029                 Ok(res) => fd = res,
1030                 Err(e) => {
1031                     if e.errno() == libc::EINTR {
1032                         // If the error returned is EINTR, which means the
1033                         // ioctl has been interrupted, we have to retry as
1034                         // this can't be considered as a regular error.
1035                         continue;
1036                     } else {
1037                         return Err(hypervisor::HypervisorError::VmCreate(e.into()));
1038                     }
1039                 }
1040             }
1041             break;
1042         }
1043 
1044         let vm_fd = Arc::new(fd);
1045 
1046         #[cfg(target_arch = "x86_64")]
1047         {
1048             let msr_list = self.get_msr_list()?;
1049             let num_msrs = msr_list.as_fam_struct_ref().nmsrs as usize;
1050             let mut msrs: Vec<MsrEntry> = vec![
1051                 MsrEntry {
1052                     ..Default::default()
1053                 };
1054                 num_msrs
1055             ];
1056             let indices = msr_list.as_slice();
1057             for (pos, index) in indices.iter().enumerate() {
1058                 msrs[pos].index = *index;
1059             }
1060 
1061             Ok(Arc::new(KvmVm {
1062                 fd: vm_fd,
1063                 msrs,
1064                 dirty_log_slots: Arc::new(RwLock::new(HashMap::new())),
1065             }))
1066         }
1067 
1068         #[cfg(target_arch = "aarch64")]
1069         {
1070             Ok(Arc::new(KvmVm {
1071                 fd: vm_fd,
1072                 dirty_log_slots: Arc::new(RwLock::new(HashMap::new())),
1073             }))
1074         }
1075     }
1076 
1077     /// Create a KVM vm object and return the object as Vm trait object
1078     ///
1079     /// # Examples
1080     ///
1081     /// ```
1082     /// # use hypervisor::kvm::KvmHypervisor;
1083     /// use hypervisor::kvm::KvmVm;
1084     /// let hypervisor = KvmHypervisor::new().unwrap();
1085     /// let vm = hypervisor.create_vm().unwrap();
1086     /// ```
1087     fn create_vm(&self) -> hypervisor::Result<Arc<dyn vm::Vm>> {
1088         #[allow(unused_mut)]
1089         let mut vm_type: u64 = 0; // Create with default platform type
1090 
1091         // When KVM supports Cap::ArmVmIPASize, it is better to get the IPA
1092         // size from the host and use that when creating the VM, which may
1093         // avoid unnecessary VM creation failures.
1094         #[cfg(target_arch = "aarch64")]
1095         if self.kvm.check_extension(Cap::ArmVmIPASize) {
1096             vm_type = self.kvm.get_host_ipa_limit().try_into().unwrap();
1097         }
1098 
1099         self.create_vm_with_type(vm_type)
1100     }
1101 
1102     fn check_required_extensions(&self) -> hypervisor::Result<()> {
1103         check_required_kvm_extensions(&self.kvm)
1104             .map_err(|e| hypervisor::HypervisorError::CheckExtensions(e.into()))
1105     }
1106 
1107     #[cfg(target_arch = "x86_64")]
1108     ///
1109     /// X86 specific call to get the system supported CPUID values.
1110     ///
1111     fn get_supported_cpuid(&self) -> hypervisor::Result<Vec<CpuIdEntry>> {
1112         let kvm_cpuid = self
1113             .kvm
1114             .get_supported_cpuid(kvm_bindings::KVM_MAX_CPUID_ENTRIES)
1115             .map_err(|e| hypervisor::HypervisorError::GetCpuId(e.into()))?;
1116 
1117         let v = kvm_cpuid.as_slice().iter().map(|e| (*e).into()).collect();
1118 
1119         Ok(v)
1120     }
1121 
1122     #[cfg(target_arch = "aarch64")]
1123     ///
1124     /// Retrieve AArch64 host maximum IPA size supported by KVM.
1125     ///
1126     fn get_host_ipa_limit(&self) -> i32 {
1127         self.kvm.get_host_ipa_limit()
1128     }
1129 
1130     ///
1131     /// Retrieve TDX capabilities
1132     ///
1133     #[cfg(feature = "tdx")]
1134     fn tdx_capabilities(&self) -> hypervisor::Result<TdxCapabilities> {
1135         let data = TdxCapabilities {
1136             nr_cpuid_configs: TDX_MAX_NR_CPUID_CONFIGS as u32,
1137             ..Default::default()
1138         };
1139 
1140         tdx_command(
1141             &self.kvm.as_raw_fd(),
1142             TdxCommand::Capabilities,
1143             0,
1144             &data as *const _ as u64,
1145         )
1146         .map_err(|e| hypervisor::HypervisorError::TdxCapabilities(e.into()))?;
1147 
1148         Ok(data)
1149     }
1150 
1151     ///
1152     /// Get the number of supported hardware breakpoints
1153     ///
1154     fn get_guest_debug_hw_bps(&self) -> usize {
1155         #[cfg(target_arch = "x86_64")]
1156         {
1157             4
1158         }
1159         #[cfg(target_arch = "aarch64")]
1160         {
1161             self.kvm.get_guest_debug_hw_bps() as usize
1162         }
1163     }
1164 
1165     /// Get maximum number of vCPUs
1166     fn get_max_vcpus(&self) -> u32 {
1167         self.kvm.get_max_vcpus().min(u32::MAX as usize) as u32
1168     }
1169 }
1170 
1171 /// Vcpu struct for KVM
1172 pub struct KvmVcpu {
1173     fd: VcpuFd,
1174     #[cfg(target_arch = "x86_64")]
1175     msrs: Vec<MsrEntry>,
1176     vm_ops: Option<Arc<dyn vm::VmOps>>,
1177     #[cfg(target_arch = "x86_64")]
1178     hyperv_synic: AtomicBool,
1179 }
1180 
1181 /// Implementation of Vcpu trait for KVM
1182 ///
1183 /// # Examples
1184 ///
1185 /// ```
1186 /// # use hypervisor::kvm::KvmHypervisor;
1187 /// # use std::sync::Arc;
1188 /// let kvm = KvmHypervisor::new().unwrap();
1189 /// let hypervisor = Arc::new(kvm);
1190 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed");
1191 /// let vcpu = vm.create_vcpu(0, None).unwrap();
1192 /// ```
1193 impl cpu::Vcpu for KvmVcpu {
1194     #[cfg(target_arch = "x86_64")]
1195     ///
1196     /// Returns the vCPU general purpose registers.
1197     ///
1198     fn get_regs(&self) -> cpu::Result<StandardRegisters> {
1199         Ok(self
1200             .fd
1201             .get_regs()
1202             .map_err(|e| cpu::HypervisorCpuError::GetStandardRegs(e.into()))?
1203             .into())
1204     }
1205 
1206     ///
1207     /// Returns the vCPU general purpose registers.
1208     /// The `KVM_GET_REGS` ioctl is not available on AArch64, `KVM_GET_ONE_REG`
1209     /// is used to get registers one by one.
1210     ///
1211     #[cfg(target_arch = "aarch64")]
1212     fn get_regs(&self) -> cpu::Result<StandardRegisters> {
1213         let mut state: StandardRegisters = kvm_regs::default();
1214         let mut off = offset_of!(user_pt_regs, regs);
1215         // There are 31 user_pt_regs:
1216         // https://elixir.free-electrons.com/linux/v4.14.174/source/arch/arm64/include/uapi/asm/ptrace.h#L72
1217         // These actually are the general-purpose registers of the Armv8-a
1218         // architecture (i.e x0-x30 if used as a 64bit register or w0-30 when used as a 32bit register).
1219         for i in 0..31 {
1220             let mut bytes = [0_u8; 8];
1221             self.fd
1222                 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), &mut bytes)
1223                 .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?;
1224             state.regs.regs[i] = u64::from_le_bytes(bytes);
1225             off += std::mem::size_of::<u64>();
1226         }
1227 
1228         // We are now entering the "Other register" section of the ARMv8-a architecture.
1229         // First one, stack pointer.
1230         let off = offset_of!(user_pt_regs, sp);
1231         let mut bytes = [0_u8; 8];
1232         self.fd
1233             .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), &mut bytes)
1234             .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?;
1235         state.regs.sp = u64::from_le_bytes(bytes);
1236 
1237         // Second one, the program counter.
1238         let off = offset_of!(user_pt_regs, pc);
1239         let mut bytes = [0_u8; 8];
1240         self.fd
1241             .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), &mut bytes)
1242             .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?;
1243         state.regs.pc = u64::from_le_bytes(bytes);
1244 
1245         // Next is the processor state.
1246         let off = offset_of!(user_pt_regs, pstate);
1247         let mut bytes = [0_u8; 8];
1248         self.fd
1249             .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), &mut bytes)
1250             .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?;
1251         state.regs.pstate = u64::from_le_bytes(bytes);
1252 
1253         // The stack pointer associated with EL1
1254         let off = offset_of!(kvm_regs, sp_el1);
1255         let mut bytes = [0_u8; 8];
1256         self.fd
1257             .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), &mut bytes)
1258             .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?;
1259         state.sp_el1 = u64::from_le_bytes(bytes);
1260 
1261         // Exception Link Register for EL1, when taking an exception to EL1, this register
1262         // holds the address to which to return afterwards.
1263         let off = offset_of!(kvm_regs, elr_el1);
1264         let mut bytes = [0_u8; 8];
1265         self.fd
1266             .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), &mut bytes)
1267             .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?;
1268         state.elr_el1 = u64::from_le_bytes(bytes);
1269 
1270         // Saved Program Status Registers, there are 5 of them used in the kernel.
1271         let mut off = offset_of!(kvm_regs, spsr);
1272         for i in 0..KVM_NR_SPSR as usize {
1273             let mut bytes = [0_u8; 8];
1274             self.fd
1275                 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), &mut bytes)
1276                 .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?;
1277             state.spsr[i] = u64::from_le_bytes(bytes);
1278             off += std::mem::size_of::<u64>();
1279         }
1280 
1281         // Now moving on to floating point registers which are stored in the user_fpsimd_state in the kernel:
1282         // https://elixir.free-electrons.com/linux/v4.9.62/source/arch/arm64/include/uapi/asm/kvm.h#L53
1283         let mut off = offset_of!(kvm_regs, fp_regs) + offset_of!(user_fpsimd_state, vregs);
1284         for i in 0..32 {
1285             let mut bytes = [0_u8; 16];
1286             self.fd
1287                 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U128, off), &mut bytes)
1288                 .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?;
1289             state.fp_regs.vregs[i] = u128::from_le_bytes(bytes);
1290             off += mem::size_of::<u128>();
1291         }
1292 
1293         // Floating-point Status Register
1294         let off = offset_of!(kvm_regs, fp_regs) + offset_of!(user_fpsimd_state, fpsr);
1295         let mut bytes = [0_u8; 4];
1296         self.fd
1297             .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U32, off), &mut bytes)
1298             .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?;
1299         state.fp_regs.fpsr = u32::from_le_bytes(bytes);
1300 
1301         // Floating-point Control Register
1302         let off = offset_of!(kvm_regs, fp_regs) + offset_of!(user_fpsimd_state, fpcr);
1303         let mut bytes = [0_u8; 4];
1304         self.fd
1305             .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U32, off), &mut bytes)
1306             .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?;
1307         state.fp_regs.fpcr = u32::from_le_bytes(bytes);
1308         Ok(state)
1309     }
1310 
1311     #[cfg(target_arch = "x86_64")]
1312     ///
1313     /// Sets the vCPU general purpose registers using the `KVM_SET_REGS` ioctl.
1314     ///
1315     fn set_regs(&self, regs: &StandardRegisters) -> cpu::Result<()> {
1316         let regs = (*regs).into();
1317         self.fd
1318             .set_regs(&regs)
1319             .map_err(|e| cpu::HypervisorCpuError::SetStandardRegs(e.into()))
1320     }
1321 
1322     ///
1323     /// Sets the vCPU general purpose registers.
1324     /// The `KVM_SET_REGS` ioctl is not available on AArch64, `KVM_SET_ONE_REG`
1325     /// is used to set registers one by one.
1326     ///
1327     #[cfg(target_arch = "aarch64")]
1328     fn set_regs(&self, state: &StandardRegisters) -> cpu::Result<()> {
1329         // The function follows the exact identical order from `state`. Look there
1330         // for some additional info on registers.
1331         let mut off = offset_of!(user_pt_regs, regs);
1332         for i in 0..31 {
1333             self.fd
1334                 .set_one_reg(
1335                     arm64_core_reg_id!(KVM_REG_SIZE_U64, off),
1336                     &state.regs.regs[i].to_le_bytes(),
1337                 )
1338                 .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?;
1339             off += std::mem::size_of::<u64>();
1340         }
1341 
1342         let off = offset_of!(user_pt_regs, sp);
1343         self.fd
1344             .set_one_reg(
1345                 arm64_core_reg_id!(KVM_REG_SIZE_U64, off),
1346                 &state.regs.sp.to_le_bytes(),
1347             )
1348             .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?;
1349 
1350         let off = offset_of!(user_pt_regs, pc);
1351         self.fd
1352             .set_one_reg(
1353                 arm64_core_reg_id!(KVM_REG_SIZE_U64, off),
1354                 &state.regs.pc.to_le_bytes(),
1355             )
1356             .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?;
1357 
1358         let off = offset_of!(user_pt_regs, pstate);
1359         self.fd
1360             .set_one_reg(
1361                 arm64_core_reg_id!(KVM_REG_SIZE_U64, off),
1362                 &state.regs.pstate.to_le_bytes(),
1363             )
1364             .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?;
1365 
1366         let off = offset_of!(kvm_regs, sp_el1);
1367         self.fd
1368             .set_one_reg(
1369                 arm64_core_reg_id!(KVM_REG_SIZE_U64, off),
1370                 &state.sp_el1.to_le_bytes(),
1371             )
1372             .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?;
1373 
1374         let off = offset_of!(kvm_regs, elr_el1);
1375         self.fd
1376             .set_one_reg(
1377                 arm64_core_reg_id!(KVM_REG_SIZE_U64, off),
1378                 &state.elr_el1.to_le_bytes(),
1379             )
1380             .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?;
1381 
1382         let mut off = offset_of!(kvm_regs, spsr);
1383         for i in 0..KVM_NR_SPSR as usize {
1384             self.fd
1385                 .set_one_reg(
1386                     arm64_core_reg_id!(KVM_REG_SIZE_U64, off),
1387                     &state.spsr[i].to_le_bytes(),
1388                 )
1389                 .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?;
1390             off += std::mem::size_of::<u64>();
1391         }
1392 
1393         let mut off = offset_of!(kvm_regs, fp_regs) + offset_of!(user_fpsimd_state, vregs);
1394         for i in 0..32 {
1395             self.fd
1396                 .set_one_reg(
1397                     arm64_core_reg_id!(KVM_REG_SIZE_U128, off),
1398                     &state.fp_regs.vregs[i].to_le_bytes(),
1399                 )
1400                 .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?;
1401             off += mem::size_of::<u128>();
1402         }
1403 
1404         let off = offset_of!(kvm_regs, fp_regs) + offset_of!(user_fpsimd_state, fpsr);
1405         self.fd
1406             .set_one_reg(
1407                 arm64_core_reg_id!(KVM_REG_SIZE_U32, off),
1408                 &state.fp_regs.fpsr.to_le_bytes(),
1409             )
1410             .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?;
1411 
1412         let off = offset_of!(kvm_regs, fp_regs) + offset_of!(user_fpsimd_state, fpcr);
1413         self.fd
1414             .set_one_reg(
1415                 arm64_core_reg_id!(KVM_REG_SIZE_U32, off),
1416                 &state.fp_regs.fpcr.to_le_bytes(),
1417             )
1418             .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?;
1419         Ok(())
1420     }
1421 
1422     #[cfg(target_arch = "x86_64")]
1423     ///
1424     /// Returns the vCPU special registers.
1425     ///
1426     fn get_sregs(&self) -> cpu::Result<SpecialRegisters> {
1427         Ok(self
1428             .fd
1429             .get_sregs()
1430             .map_err(|e| cpu::HypervisorCpuError::GetSpecialRegs(e.into()))?
1431             .into())
1432     }
1433 
1434     #[cfg(target_arch = "x86_64")]
1435     ///
1436     /// Sets the vCPU special registers using the `KVM_SET_SREGS` ioctl.
1437     ///
1438     fn set_sregs(&self, sregs: &SpecialRegisters) -> cpu::Result<()> {
1439         let sregs = (*sregs).into();
1440         self.fd
1441             .set_sregs(&sregs)
1442             .map_err(|e| cpu::HypervisorCpuError::SetSpecialRegs(e.into()))
1443     }
1444 
1445     #[cfg(target_arch = "x86_64")]
1446     ///
1447     /// Returns the floating point state (FPU) from the vCPU.
1448     ///
1449     fn get_fpu(&self) -> cpu::Result<FpuState> {
1450         Ok(self
1451             .fd
1452             .get_fpu()
1453             .map_err(|e| cpu::HypervisorCpuError::GetFloatingPointRegs(e.into()))?
1454             .into())
1455     }
1456 
1457     #[cfg(target_arch = "x86_64")]
1458     ///
1459     /// Set the floating point state (FPU) of a vCPU using the `KVM_SET_FPU` ioct.
1460     ///
1461     fn set_fpu(&self, fpu: &FpuState) -> cpu::Result<()> {
1462         let fpu: kvm_bindings::kvm_fpu = (*fpu).clone().into();
1463         self.fd
1464             .set_fpu(&fpu)
1465             .map_err(|e| cpu::HypervisorCpuError::SetFloatingPointRegs(e.into()))
1466     }
1467 
1468     #[cfg(target_arch = "x86_64")]
1469     ///
1470     /// X86 specific call to setup the CPUID registers.
1471     ///
1472     fn set_cpuid2(&self, cpuid: &[CpuIdEntry]) -> cpu::Result<()> {
1473         let cpuid: Vec<kvm_bindings::kvm_cpuid_entry2> =
1474             cpuid.iter().map(|e| (*e).into()).collect();
1475         let kvm_cpuid = <CpuId>::from_entries(&cpuid)
1476             .map_err(|_| cpu::HypervisorCpuError::SetCpuid(anyhow!("failed to create CpuId")))?;
1477 
1478         self.fd
1479             .set_cpuid2(&kvm_cpuid)
1480             .map_err(|e| cpu::HypervisorCpuError::SetCpuid(e.into()))
1481     }
1482 
1483     #[cfg(target_arch = "x86_64")]
1484     ///
1485     /// X86 specific call to enable HyperV SynIC
1486     ///
1487     fn enable_hyperv_synic(&self) -> cpu::Result<()> {
1488         // Update the information about Hyper-V SynIC being enabled and
1489         // emulated as it will influence later which MSRs should be saved.
1490         self.hyperv_synic.store(true, Ordering::Release);
1491 
1492         let cap = kvm_enable_cap {
1493             cap: KVM_CAP_HYPERV_SYNIC,
1494             ..Default::default()
1495         };
1496         self.fd
1497             .enable_cap(&cap)
1498             .map_err(|e| cpu::HypervisorCpuError::EnableHyperVSyncIc(e.into()))
1499     }
1500 
1501     ///
1502     /// X86 specific call to retrieve the CPUID registers.
1503     ///
1504     #[cfg(target_arch = "x86_64")]
1505     fn get_cpuid2(&self, num_entries: usize) -> cpu::Result<Vec<CpuIdEntry>> {
1506         let kvm_cpuid = self
1507             .fd
1508             .get_cpuid2(num_entries)
1509             .map_err(|e| cpu::HypervisorCpuError::GetCpuid(e.into()))?;
1510 
1511         let v = kvm_cpuid.as_slice().iter().map(|e| (*e).into()).collect();
1512 
1513         Ok(v)
1514     }
1515 
1516     #[cfg(target_arch = "x86_64")]
1517     ///
1518     /// Returns the state of the LAPIC (Local Advanced Programmable Interrupt Controller).
1519     ///
1520     fn get_lapic(&self) -> cpu::Result<LapicState> {
1521         Ok(self
1522             .fd
1523             .get_lapic()
1524             .map_err(|e| cpu::HypervisorCpuError::GetlapicState(e.into()))?
1525             .into())
1526     }
1527 
1528     #[cfg(target_arch = "x86_64")]
1529     ///
1530     /// Sets the state of the LAPIC (Local Advanced Programmable Interrupt Controller).
1531     ///
1532     fn set_lapic(&self, klapic: &LapicState) -> cpu::Result<()> {
1533         let klapic: kvm_bindings::kvm_lapic_state = (*klapic).clone().into();
1534         self.fd
1535             .set_lapic(&klapic)
1536             .map_err(|e| cpu::HypervisorCpuError::SetLapicState(e.into()))
1537     }
1538 
1539     #[cfg(target_arch = "x86_64")]
1540     ///
1541     /// Returns the model-specific registers (MSR) for this vCPU.
1542     ///
1543     fn get_msrs(&self, msrs: &mut Vec<MsrEntry>) -> cpu::Result<usize> {
1544         let kvm_msrs: Vec<kvm_msr_entry> = msrs.iter().map(|e| (*e).into()).collect();
1545         let mut kvm_msrs = MsrEntries::from_entries(&kvm_msrs).unwrap();
1546         let succ = self
1547             .fd
1548             .get_msrs(&mut kvm_msrs)
1549             .map_err(|e| cpu::HypervisorCpuError::GetMsrEntries(e.into()))?;
1550 
1551         msrs[..succ].copy_from_slice(
1552             &kvm_msrs.as_slice()[..succ]
1553                 .iter()
1554                 .map(|e| (*e).into())
1555                 .collect::<Vec<MsrEntry>>(),
1556         );
1557 
1558         Ok(succ)
1559     }
1560 
1561     #[cfg(target_arch = "x86_64")]
1562     ///
1563     /// Setup the model-specific registers (MSR) for this vCPU.
1564     /// Returns the number of MSR entries actually written.
1565     ///
1566     fn set_msrs(&self, msrs: &[MsrEntry]) -> cpu::Result<usize> {
1567         let kvm_msrs: Vec<kvm_msr_entry> = msrs.iter().map(|e| (*e).into()).collect();
1568         let kvm_msrs = MsrEntries::from_entries(&kvm_msrs).unwrap();
1569         self.fd
1570             .set_msrs(&kvm_msrs)
1571             .map_err(|e| cpu::HypervisorCpuError::SetMsrEntries(e.into()))
1572     }
1573 
1574     ///
1575     /// Returns the vcpu's current "multiprocessing state".
1576     ///
1577     fn get_mp_state(&self) -> cpu::Result<MpState> {
1578         Ok(self
1579             .fd
1580             .get_mp_state()
1581             .map_err(|e| cpu::HypervisorCpuError::GetMpState(e.into()))?
1582             .into())
1583     }
1584 
1585     ///
1586     /// Sets the vcpu's current "multiprocessing state".
1587     ///
1588     fn set_mp_state(&self, mp_state: MpState) -> cpu::Result<()> {
1589         self.fd
1590             .set_mp_state(mp_state.into())
1591             .map_err(|e| cpu::HypervisorCpuError::SetMpState(e.into()))
1592     }
1593 
1594     #[cfg(target_arch = "x86_64")]
1595     ///
1596     /// Translates guest virtual address to guest physical address using the `KVM_TRANSLATE` ioctl.
1597     ///
1598     fn translate_gva(&self, gva: u64, _flags: u64) -> cpu::Result<(u64, u32)> {
1599         let tr = self
1600             .fd
1601             .translate_gva(gva)
1602             .map_err(|e| cpu::HypervisorCpuError::TranslateVirtualAddress(e.into()))?;
1603         // tr.valid is set if the GVA is mapped to valid GPA.
1604         match tr.valid {
1605             0 => Err(cpu::HypervisorCpuError::TranslateVirtualAddress(anyhow!(
1606                 "Invalid GVA: {:#x}",
1607                 gva
1608             ))),
1609             _ => Ok((tr.physical_address, 0)),
1610         }
1611     }
1612 
1613     ///
1614     /// Triggers the running of the current virtual CPU returning an exit reason.
1615     ///
1616     fn run(&self) -> std::result::Result<cpu::VmExit, cpu::HypervisorCpuError> {
1617         match self.fd.run() {
1618             Ok(run) => match run {
1619                 #[cfg(target_arch = "x86_64")]
1620                 VcpuExit::IoIn(addr, data) => {
1621                     if let Some(vm_ops) = &self.vm_ops {
1622                         return vm_ops
1623                             .pio_read(addr.into(), data)
1624                             .map(|_| cpu::VmExit::Ignore)
1625                             .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()));
1626                     }
1627 
1628                     Ok(cpu::VmExit::IoIn(addr, data))
1629                 }
1630                 #[cfg(target_arch = "x86_64")]
1631                 VcpuExit::IoOut(addr, data) => {
1632                     if let Some(vm_ops) = &self.vm_ops {
1633                         return vm_ops
1634                             .pio_write(addr.into(), data)
1635                             .map(|_| cpu::VmExit::Ignore)
1636                             .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()));
1637                     }
1638 
1639                     Ok(cpu::VmExit::IoOut(addr, data))
1640                 }
1641                 #[cfg(target_arch = "x86_64")]
1642                 VcpuExit::IoapicEoi(vector) => Ok(cpu::VmExit::IoapicEoi(vector)),
1643                 #[cfg(target_arch = "x86_64")]
1644                 VcpuExit::Shutdown | VcpuExit::Hlt => Ok(cpu::VmExit::Reset),
1645 
1646                 #[cfg(target_arch = "aarch64")]
1647                 VcpuExit::SystemEvent(event_type, flags) => {
1648                     use kvm_bindings::{KVM_SYSTEM_EVENT_RESET, KVM_SYSTEM_EVENT_SHUTDOWN};
1649                     // On Aarch64, when the VM is shutdown, run() returns
1650                     // VcpuExit::SystemEvent with reason KVM_SYSTEM_EVENT_SHUTDOWN
1651                     if event_type == KVM_SYSTEM_EVENT_RESET {
1652                         Ok(cpu::VmExit::Reset)
1653                     } else if event_type == KVM_SYSTEM_EVENT_SHUTDOWN {
1654                         Ok(cpu::VmExit::Shutdown)
1655                     } else {
1656                         Err(cpu::HypervisorCpuError::RunVcpu(anyhow!(
1657                             "Unexpected system event with type 0x{:x}, flags 0x{:x?}",
1658                             event_type,
1659                             flags
1660                         )))
1661                     }
1662                 }
1663 
1664                 VcpuExit::MmioRead(addr, data) => {
1665                     if let Some(vm_ops) = &self.vm_ops {
1666                         return vm_ops
1667                             .mmio_read(addr, data)
1668                             .map(|_| cpu::VmExit::Ignore)
1669                             .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()));
1670                     }
1671 
1672                     Ok(cpu::VmExit::MmioRead(addr, data))
1673                 }
1674                 VcpuExit::MmioWrite(addr, data) => {
1675                     if let Some(vm_ops) = &self.vm_ops {
1676                         return vm_ops
1677                             .mmio_write(addr, data)
1678                             .map(|_| cpu::VmExit::Ignore)
1679                             .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()));
1680                     }
1681 
1682                     Ok(cpu::VmExit::MmioWrite(addr, data))
1683                 }
1684                 VcpuExit::Hyperv => Ok(cpu::VmExit::Hyperv),
1685                 #[cfg(feature = "tdx")]
1686                 VcpuExit::Unsupported(KVM_EXIT_TDX) => Ok(cpu::VmExit::Tdx),
1687                 VcpuExit::Debug(_) => Ok(cpu::VmExit::Debug),
1688 
1689                 r => Err(cpu::HypervisorCpuError::RunVcpu(anyhow!(
1690                     "Unexpected exit reason on vcpu run: {:?}",
1691                     r
1692                 ))),
1693             },
1694 
1695             Err(ref e) => match e.errno() {
1696                 libc::EAGAIN | libc::EINTR => Ok(cpu::VmExit::Ignore),
1697                 _ => Err(cpu::HypervisorCpuError::RunVcpu(anyhow!(
1698                     "VCPU error {:?}",
1699                     e
1700                 ))),
1701             },
1702         }
1703     }
1704 
1705     #[cfg(target_arch = "x86_64")]
1706     ///
1707     /// Let the guest know that it has been paused, which prevents from
1708     /// potential soft lockups when being resumed.
1709     ///
1710     fn notify_guest_clock_paused(&self) -> cpu::Result<()> {
1711         if let Err(e) = self.fd.kvmclock_ctrl() {
1712             // Linux kernel returns -EINVAL if the PV clock isn't yet initialised
1713             // which could be because we're still in firmware or the guest doesn't
1714             // use KVM clock.
1715             if e.errno() != libc::EINVAL {
1716                 return Err(cpu::HypervisorCpuError::NotifyGuestClockPaused(e.into()));
1717             }
1718         }
1719 
1720         Ok(())
1721     }
1722 
1723     ///
1724     /// Sets debug registers to set hardware breakpoints and/or enable single step.
1725     ///
1726     fn set_guest_debug(
1727         &self,
1728         addrs: &[vm_memory::GuestAddress],
1729         singlestep: bool,
1730     ) -> cpu::Result<()> {
1731         let mut dbg = kvm_guest_debug {
1732             #[cfg(target_arch = "x86_64")]
1733             control: KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP,
1734             #[cfg(target_arch = "aarch64")]
1735             control: KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW,
1736             ..Default::default()
1737         };
1738         if singlestep {
1739             dbg.control |= KVM_GUESTDBG_SINGLESTEP;
1740         }
1741 
1742         // Set the debug registers.
1743         // Here we assume that the number of addresses do not exceed what
1744         // `Hypervisor::get_guest_debug_hw_bps()` specifies.
1745         #[cfg(target_arch = "x86_64")]
1746         {
1747             // Set bits 9 and 10.
1748             // bit 9: GE (global exact breakpoint enable) flag.
1749             // bit 10: always 1.
1750             dbg.arch.debugreg[7] = 0x0600;
1751 
1752             for (i, addr) in addrs.iter().enumerate() {
1753                 dbg.arch.debugreg[i] = addr.0;
1754                 // Set global breakpoint enable flag
1755                 dbg.arch.debugreg[7] |= 2 << (i * 2);
1756             }
1757         }
1758         #[cfg(target_arch = "aarch64")]
1759         {
1760             for (i, addr) in addrs.iter().enumerate() {
1761                 // DBGBCR_EL1 (Debug Breakpoint Control Registers, D13.3.2):
1762                 // bit 0: 1 (Enabled)
1763                 // bit 1~2: 0b11 (PMC = EL1/EL0)
1764                 // bit 5~8: 0b1111 (BAS = AArch64)
1765                 // others: 0
1766                 dbg.arch.dbg_bcr[i] = 0b1u64 | 0b110u64 | 0b1_1110_0000u64;
1767                 // DBGBVR_EL1 (Debug Breakpoint Value Registers, D13.3.3):
1768                 // bit 2~52: VA[2:52]
1769                 dbg.arch.dbg_bvr[i] = (!0u64 >> 11) & addr.0;
1770             }
1771         }
1772         self.fd
1773             .set_guest_debug(&dbg)
1774             .map_err(|e| cpu::HypervisorCpuError::SetDebugRegs(e.into()))
1775     }
1776 
1777     #[cfg(target_arch = "aarch64")]
1778     fn vcpu_init(&self, kvi: &VcpuInit) -> cpu::Result<()> {
1779         self.fd
1780             .vcpu_init(kvi)
1781             .map_err(|e| cpu::HypervisorCpuError::VcpuInit(e.into()))
1782     }
1783 
1784     ///
1785     /// Gets a list of the guest registers that are supported for the
1786     /// KVM_GET_ONE_REG/KVM_SET_ONE_REG calls.
1787     ///
1788     #[cfg(target_arch = "aarch64")]
1789     fn get_reg_list(&self, reg_list: &mut RegList) -> cpu::Result<()> {
1790         self.fd
1791             .get_reg_list(reg_list)
1792             .map_err(|e| cpu::HypervisorCpuError::GetRegList(e.into()))
1793     }
1794 
1795     ///
1796     /// Gets the value of a system register
1797     ///
1798     #[cfg(target_arch = "aarch64")]
1799     fn get_sys_reg(&self, sys_reg: u32) -> cpu::Result<u64> {
1800         //
1801         // Arm Architecture Reference Manual defines the encoding of
1802         // AArch64 system registers, see
1803         // https://developer.arm.com/documentation/ddi0487 (chapter D12).
1804         // While KVM defines another ID for each AArch64 system register,
1805         // which is used in calling `KVM_G/SET_ONE_REG` to access a system
1806         // register of a guest.
1807         // A mapping exists between the Arm standard encoding and the KVM ID.
1808         // This function takes the standard u32 ID as input parameter, converts
1809         // it to the corresponding KVM ID, and call `KVM_GET_ONE_REG` API to
1810         // get the value of the system parameter.
1811         //
1812         let id: u64 = KVM_REG_ARM64
1813             | KVM_REG_SIZE_U64
1814             | KVM_REG_ARM64_SYSREG as u64
1815             | ((((sys_reg) >> 5)
1816                 & (KVM_REG_ARM64_SYSREG_OP0_MASK
1817                     | KVM_REG_ARM64_SYSREG_OP1_MASK
1818                     | KVM_REG_ARM64_SYSREG_CRN_MASK
1819                     | KVM_REG_ARM64_SYSREG_CRM_MASK
1820                     | KVM_REG_ARM64_SYSREG_OP2_MASK)) as u64);
1821         let mut bytes = [0_u8; 8];
1822         self.fd
1823             .get_one_reg(id, &mut bytes)
1824             .map_err(|e| cpu::HypervisorCpuError::GetSysRegister(e.into()))?;
1825         Ok(u64::from_le_bytes(bytes))
1826     }
1827 
1828     ///
1829     /// Configure core registers for a given CPU.
1830     ///
1831     #[cfg(target_arch = "aarch64")]
1832     fn setup_regs(&self, cpu_id: u8, boot_ip: u64, fdt_start: u64) -> cpu::Result<()> {
1833         #[allow(non_upper_case_globals)]
1834         // PSR (Processor State Register) bits.
1835         // Taken from arch/arm64/include/uapi/asm/ptrace.h.
1836         const PSR_MODE_EL1h: u64 = 0x0000_0005;
1837         const PSR_F_BIT: u64 = 0x0000_0040;
1838         const PSR_I_BIT: u64 = 0x0000_0080;
1839         const PSR_A_BIT: u64 = 0x0000_0100;
1840         const PSR_D_BIT: u64 = 0x0000_0200;
1841         // Taken from arch/arm64/kvm/inject_fault.c.
1842         const PSTATE_FAULT_BITS_64: u64 =
1843             PSR_MODE_EL1h | PSR_A_BIT | PSR_F_BIT | PSR_I_BIT | PSR_D_BIT;
1844 
1845         let kreg_off = offset_of!(kvm_regs, regs);
1846 
1847         // Get the register index of the PSTATE (Processor State) register.
1848         let pstate = offset_of!(user_pt_regs, pstate) + kreg_off;
1849         self.fd
1850             .set_one_reg(
1851                 arm64_core_reg_id!(KVM_REG_SIZE_U64, pstate),
1852                 &PSTATE_FAULT_BITS_64.to_le_bytes(),
1853             )
1854             .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?;
1855 
1856         // Other vCPUs are powered off initially awaiting PSCI wakeup.
1857         if cpu_id == 0 {
1858             // Setting the PC (Processor Counter) to the current program address (kernel address).
1859             let pc = offset_of!(user_pt_regs, pc) + kreg_off;
1860             self.fd
1861                 .set_one_reg(
1862                     arm64_core_reg_id!(KVM_REG_SIZE_U64, pc),
1863                     &boot_ip.to_le_bytes(),
1864                 )
1865                 .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?;
1866 
1867             // Last mandatory thing to set -> the address pointing to the FDT (also called DTB).
1868             // "The device tree blob (dtb) must be placed on an 8-byte boundary and must
1869             // not exceed 2 megabytes in size." -> https://www.kernel.org/doc/Documentation/arm64/booting.txt.
1870             // We are choosing to place it the end of DRAM. See `get_fdt_addr`.
1871             let regs0 = offset_of!(user_pt_regs, regs) + kreg_off;
1872             self.fd
1873                 .set_one_reg(
1874                     arm64_core_reg_id!(KVM_REG_SIZE_U64, regs0),
1875                     &fdt_start.to_le_bytes(),
1876                 )
1877                 .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?;
1878         }
1879         Ok(())
1880     }
1881 
1882     #[cfg(target_arch = "x86_64")]
1883     ///
1884     /// Get the current CPU state
1885     ///
1886     /// Ordering requirements:
1887     ///
1888     /// KVM_GET_MP_STATE calls kvm_apic_accept_events(), which might modify
1889     /// vCPU/LAPIC state. As such, it must be done before most everything
1890     /// else, otherwise we cannot restore everything and expect it to work.
1891     ///
1892     /// KVM_GET_VCPU_EVENTS/KVM_SET_VCPU_EVENTS is unsafe if other vCPUs are
1893     /// still running.
1894     ///
1895     /// KVM_GET_LAPIC may change state of LAPIC before returning it.
1896     ///
1897     /// GET_VCPU_EVENTS should probably be last to save. The code looks as
1898     /// it might as well be affected by internal state modifications of the
1899     /// GET ioctls.
1900     ///
1901     /// SREGS saves/restores a pending interrupt, similar to what
1902     /// VCPU_EVENTS also does.
1903     ///
1904     /// GET_MSRS requires a pre-populated data structure to do something
1905     /// meaningful. For SET_MSRS it will then contain good data.
1906     ///
1907     /// # Example
1908     ///
1909     /// ```rust
1910     /// # use hypervisor::kvm::KvmHypervisor;
1911     /// # use std::sync::Arc;
1912     /// let kvm = KvmHypervisor::new().unwrap();
1913     /// let hv = Arc::new(kvm);
1914     /// let vm = hv.create_vm().expect("new VM fd creation failed");
1915     /// vm.enable_split_irq().unwrap();
1916     /// let vcpu = vm.create_vcpu(0, None).unwrap();
1917     /// let state = vcpu.state().unwrap();
1918     /// ```
1919     fn state(&self) -> cpu::Result<CpuState> {
1920         let cpuid = self.get_cpuid2(kvm_bindings::KVM_MAX_CPUID_ENTRIES)?;
1921         let mp_state = self.get_mp_state()?.into();
1922         let regs = self.get_regs()?;
1923         let sregs = self.get_sregs()?;
1924         let xsave = self.get_xsave()?;
1925         let xcrs = self.get_xcrs()?;
1926         let lapic_state = self.get_lapic()?;
1927         let fpu = self.get_fpu()?;
1928 
1929         // Try to get all MSRs based on the list previously retrieved from KVM.
1930         // If the number of MSRs obtained from GET_MSRS is different from the
1931         // expected amount, we fallback onto a slower method by getting MSRs
1932         // by chunks. This is the only way to make sure we try to get as many
1933         // MSRs as possible, even if some MSRs are not supported.
1934         let mut msr_entries = self.msrs.clone();
1935 
1936         // Save extra MSRs if the Hyper-V synthetic interrupt controller is
1937         // emulated.
1938         if self.hyperv_synic.load(Ordering::Acquire) {
1939             let hyperv_synic_msrs = vec![
1940                 0x40000020, 0x40000021, 0x40000080, 0x40000081, 0x40000082, 0x40000083, 0x40000084,
1941                 0x40000090, 0x40000091, 0x40000092, 0x40000093, 0x40000094, 0x40000095, 0x40000096,
1942                 0x40000097, 0x40000098, 0x40000099, 0x4000009a, 0x4000009b, 0x4000009c, 0x4000009d,
1943                 0x4000009e, 0x4000009f, 0x400000b0, 0x400000b1, 0x400000b2, 0x400000b3, 0x400000b4,
1944                 0x400000b5, 0x400000b6, 0x400000b7,
1945             ];
1946             for index in hyperv_synic_msrs {
1947                 let msr = kvm_msr_entry {
1948                     index,
1949                     ..Default::default()
1950                 };
1951                 msr_entries.push(msr.into());
1952             }
1953         }
1954 
1955         let expected_num_msrs = msr_entries.len();
1956         let num_msrs = self.get_msrs(&mut msr_entries)?;
1957         let msrs = if num_msrs != expected_num_msrs {
1958             let mut faulty_msr_index = num_msrs;
1959             let mut msr_entries_tmp = msr_entries[..faulty_msr_index].to_vec();
1960 
1961             loop {
1962                 warn!(
1963                     "Detected faulty MSR 0x{:x} while getting MSRs",
1964                     msr_entries[faulty_msr_index].index
1965                 );
1966 
1967                 // Skip the first bad MSR
1968                 let start_pos = faulty_msr_index + 1;
1969 
1970                 let mut sub_msr_entries = msr_entries[start_pos..].to_vec();
1971                 let num_msrs = self.get_msrs(&mut sub_msr_entries)?;
1972 
1973                 msr_entries_tmp.extend(&sub_msr_entries[..num_msrs]);
1974 
1975                 if num_msrs == sub_msr_entries.len() {
1976                     break;
1977                 }
1978 
1979                 faulty_msr_index = start_pos + num_msrs;
1980             }
1981 
1982             msr_entries_tmp
1983         } else {
1984             msr_entries
1985         };
1986 
1987         let vcpu_events = self.get_vcpu_events()?;
1988         let tsc_khz = self.tsc_khz()?;
1989 
1990         Ok(VcpuKvmState {
1991             cpuid,
1992             msrs,
1993             vcpu_events,
1994             regs: regs.into(),
1995             sregs: sregs.into(),
1996             fpu,
1997             lapic_state,
1998             xsave,
1999             xcrs,
2000             mp_state,
2001             tsc_khz,
2002         }
2003         .into())
2004     }
2005 
2006     ///
2007     /// Get the current AArch64 CPU state
2008     ///
2009     #[cfg(target_arch = "aarch64")]
2010     fn state(&self) -> cpu::Result<CpuState> {
2011         let mut state = VcpuKvmState {
2012             mp_state: self.get_mp_state()?.into(),
2013             ..Default::default()
2014         };
2015         // Get core registers
2016         state.core_regs = self.get_regs()?;
2017 
2018         // Get systerm register
2019         // Call KVM_GET_REG_LIST to get all registers available to the guest.
2020         // For ArmV8 there are around 500 registers.
2021         let mut sys_regs: Vec<Register> = Vec::new();
2022         let mut reg_list = RegList::new(500).unwrap();
2023         self.fd
2024             .get_reg_list(&mut reg_list)
2025             .map_err(|e| cpu::HypervisorCpuError::GetRegList(e.into()))?;
2026 
2027         // At this point reg_list should contain: core registers and system
2028         // registers.
2029         // The register list contains the number of registers and their ids. We
2030         // will be needing to call KVM_GET_ONE_REG on each id in order to save
2031         // all of them. We carve out from the list  the core registers which are
2032         // represented in the kernel by kvm_regs structure and for which we can
2033         // calculate the id based on the offset in the structure.
2034         reg_list.retain(|regid| is_system_register(*regid));
2035 
2036         // Now, for the rest of the registers left in the previously fetched
2037         // register list, we are simply calling KVM_GET_ONE_REG.
2038         let indices = reg_list.as_slice();
2039         for index in indices.iter() {
2040             let mut bytes = [0_u8; 8];
2041             self.fd
2042                 .get_one_reg(*index, &mut bytes)
2043                 .map_err(|e| cpu::HypervisorCpuError::GetSysRegister(e.into()))?;
2044             sys_regs.push(kvm_bindings::kvm_one_reg {
2045                 id: *index,
2046                 addr: u64::from_le_bytes(bytes),
2047             });
2048         }
2049 
2050         state.sys_regs = sys_regs;
2051 
2052         Ok(state.into())
2053     }
2054 
2055     #[cfg(target_arch = "x86_64")]
2056     ///
2057     /// Restore the previously saved CPU state
2058     ///
2059     /// Ordering requirements:
2060     ///
2061     /// KVM_GET_VCPU_EVENTS/KVM_SET_VCPU_EVENTS is unsafe if other vCPUs are
2062     /// still running.
2063     ///
2064     /// Some SET ioctls (like set_mp_state) depend on kvm_vcpu_is_bsp(), so
2065     /// if we ever change the BSP, we have to do that before restoring anything.
2066     /// The same seems to be true for CPUID stuff.
2067     ///
2068     /// SREGS saves/restores a pending interrupt, similar to what
2069     /// VCPU_EVENTS also does.
2070     ///
2071     /// SET_REGS clears pending exceptions unconditionally, thus, it must be
2072     /// done before SET_VCPU_EVENTS, which restores it.
2073     ///
2074     /// SET_LAPIC must come after SET_SREGS, because the latter restores
2075     /// the apic base msr.
2076     ///
2077     /// SET_LAPIC must come before SET_MSRS, because the TSC deadline MSR
2078     /// only restores successfully, when the LAPIC is correctly configured.
2079     ///
2080     /// Arguments: CpuState
2081     /// # Example
2082     ///
2083     /// ```rust
2084     /// # use hypervisor::kvm::KvmHypervisor;
2085     /// # use std::sync::Arc;
2086     /// let kvm = KvmHypervisor::new().unwrap();
2087     /// let hv = Arc::new(kvm);
2088     /// let vm = hv.create_vm().expect("new VM fd creation failed");
2089     /// vm.enable_split_irq().unwrap();
2090     /// let vcpu = vm.create_vcpu(0, None).unwrap();
2091     /// let state = vcpu.state().unwrap();
2092     /// vcpu.set_state(&state).unwrap();
2093     /// ```
2094     fn set_state(&self, state: &CpuState) -> cpu::Result<()> {
2095         let state: VcpuKvmState = state.clone().into();
2096         self.set_cpuid2(&state.cpuid)?;
2097         self.set_mp_state(state.mp_state.into())?;
2098         self.set_regs(&state.regs.into())?;
2099         self.set_sregs(&state.sregs.into())?;
2100         self.set_xsave(&state.xsave)?;
2101         self.set_xcrs(&state.xcrs)?;
2102         self.set_lapic(&state.lapic_state)?;
2103         self.set_fpu(&state.fpu)?;
2104 
2105         if let Some(freq) = state.tsc_khz {
2106             self.set_tsc_khz(freq)?;
2107         }
2108 
2109         // Try to set all MSRs previously stored.
2110         // If the number of MSRs set from SET_MSRS is different from the
2111         // expected amount, we fallback onto a slower method by setting MSRs
2112         // by chunks. This is the only way to make sure we try to set as many
2113         // MSRs as possible, even if some MSRs are not supported.
2114         let expected_num_msrs = state.msrs.len();
2115         let num_msrs = self.set_msrs(&state.msrs)?;
2116         if num_msrs != expected_num_msrs {
2117             let mut faulty_msr_index = num_msrs;
2118 
2119             loop {
2120                 warn!(
2121                     "Detected faulty MSR 0x{:x} while setting MSRs",
2122                     state.msrs[faulty_msr_index].index
2123                 );
2124 
2125                 // Skip the first bad MSR
2126                 let start_pos = faulty_msr_index + 1;
2127 
2128                 let sub_msr_entries = state.msrs[start_pos..].to_vec();
2129 
2130                 let num_msrs = self.set_msrs(&sub_msr_entries)?;
2131 
2132                 if num_msrs == sub_msr_entries.len() {
2133                     break;
2134                 }
2135 
2136                 faulty_msr_index = start_pos + num_msrs;
2137             }
2138         }
2139 
2140         self.set_vcpu_events(&state.vcpu_events)?;
2141 
2142         Ok(())
2143     }
2144 
2145     ///
2146     /// Restore the previously saved AArch64 CPU state
2147     ///
2148     #[cfg(target_arch = "aarch64")]
2149     fn set_state(&self, state: &CpuState) -> cpu::Result<()> {
2150         let state: VcpuKvmState = state.clone().into();
2151         // Set core registers
2152         self.set_regs(&state.core_regs)?;
2153         // Set system registers
2154         for reg in &state.sys_regs {
2155             self.fd
2156                 .set_one_reg(reg.id, &reg.addr.to_le_bytes())
2157                 .map_err(|e| cpu::HypervisorCpuError::SetSysRegister(e.into()))?;
2158         }
2159 
2160         self.set_mp_state(state.mp_state.into())?;
2161 
2162         Ok(())
2163     }
2164 
2165     ///
2166     /// Initialize TDX for this CPU
2167     ///
2168     #[cfg(feature = "tdx")]
2169     fn tdx_init(&self, hob_address: u64) -> cpu::Result<()> {
2170         tdx_command(&self.fd.as_raw_fd(), TdxCommand::InitVcpu, 0, hob_address)
2171             .map_err(cpu::HypervisorCpuError::InitializeTdx)
2172     }
2173 
2174     ///
2175     /// Set the "immediate_exit" state
2176     ///
2177     fn set_immediate_exit(&self, exit: bool) {
2178         self.fd.set_kvm_immediate_exit(exit.into());
2179     }
2180 
2181     ///
2182     /// Returns the details about TDX exit reason
2183     ///
2184     #[cfg(feature = "tdx")]
2185     fn get_tdx_exit_details(&mut self) -> cpu::Result<TdxExitDetails> {
2186         let kvm_run = self.fd.get_kvm_run();
2187         // SAFETY: accessing a union field in a valid structure
2188         let tdx_vmcall = unsafe {
2189             &mut (*((&mut kvm_run.__bindgen_anon_1) as *mut kvm_run__bindgen_ty_1
2190                 as *mut KvmTdxExit))
2191                 .u
2192                 .vmcall
2193         };
2194 
2195         tdx_vmcall.status_code = TDG_VP_VMCALL_INVALID_OPERAND;
2196 
2197         if tdx_vmcall.type_ != 0 {
2198             return Err(cpu::HypervisorCpuError::UnknownTdxVmCall);
2199         }
2200 
2201         match tdx_vmcall.subfunction {
2202             TDG_VP_VMCALL_GET_QUOTE => Ok(TdxExitDetails::GetQuote),
2203             TDG_VP_VMCALL_SETUP_EVENT_NOTIFY_INTERRUPT => {
2204                 Ok(TdxExitDetails::SetupEventNotifyInterrupt)
2205             }
2206             _ => Err(cpu::HypervisorCpuError::UnknownTdxVmCall),
2207         }
2208     }
2209 
2210     ///
2211     /// Set the status code for TDX exit
2212     ///
2213     #[cfg(feature = "tdx")]
2214     fn set_tdx_status(&mut self, status: TdxExitStatus) {
2215         let kvm_run = self.fd.get_kvm_run();
2216         // SAFETY: accessing a union field in a valid structure
2217         let tdx_vmcall = unsafe {
2218             &mut (*((&mut kvm_run.__bindgen_anon_1) as *mut kvm_run__bindgen_ty_1
2219                 as *mut KvmTdxExit))
2220                 .u
2221                 .vmcall
2222         };
2223 
2224         tdx_vmcall.status_code = match status {
2225             TdxExitStatus::Success => TDG_VP_VMCALL_SUCCESS,
2226             TdxExitStatus::InvalidOperand => TDG_VP_VMCALL_INVALID_OPERAND,
2227         };
2228     }
2229 
2230     #[cfg(target_arch = "x86_64")]
2231     ///
2232     /// Return the list of initial MSR entries for a VCPU
2233     ///
2234     fn boot_msr_entries(&self) -> Vec<MsrEntry> {
2235         use crate::arch::x86::{msr_index, MTRR_ENABLE, MTRR_MEM_TYPE_WB};
2236 
2237         [
2238             msr!(msr_index::MSR_IA32_SYSENTER_CS),
2239             msr!(msr_index::MSR_IA32_SYSENTER_ESP),
2240             msr!(msr_index::MSR_IA32_SYSENTER_EIP),
2241             msr!(msr_index::MSR_STAR),
2242             msr!(msr_index::MSR_CSTAR),
2243             msr!(msr_index::MSR_LSTAR),
2244             msr!(msr_index::MSR_KERNEL_GS_BASE),
2245             msr!(msr_index::MSR_SYSCALL_MASK),
2246             msr!(msr_index::MSR_IA32_TSC),
2247             msr_data!(
2248                 msr_index::MSR_IA32_MISC_ENABLE,
2249                 msr_index::MSR_IA32_MISC_ENABLE_FAST_STRING as u64
2250             ),
2251             msr_data!(msr_index::MSR_MTRRdefType, MTRR_ENABLE | MTRR_MEM_TYPE_WB),
2252         ]
2253         .to_vec()
2254     }
2255 
2256     #[cfg(target_arch = "aarch64")]
2257     fn has_pmu_support(&self) -> bool {
2258         let cpu_attr = kvm_bindings::kvm_device_attr {
2259             group: kvm_bindings::KVM_ARM_VCPU_PMU_V3_CTRL,
2260             attr: u64::from(kvm_bindings::KVM_ARM_VCPU_PMU_V3_INIT),
2261             addr: 0x0,
2262             flags: 0,
2263         };
2264         self.fd.has_device_attr(&cpu_attr).is_ok()
2265     }
2266 
2267     #[cfg(target_arch = "aarch64")]
2268     fn init_pmu(&self, irq: u32) -> cpu::Result<()> {
2269         let cpu_attr = kvm_bindings::kvm_device_attr {
2270             group: kvm_bindings::KVM_ARM_VCPU_PMU_V3_CTRL,
2271             attr: u64::from(kvm_bindings::KVM_ARM_VCPU_PMU_V3_INIT),
2272             addr: 0x0,
2273             flags: 0,
2274         };
2275         let cpu_attr_irq = kvm_bindings::kvm_device_attr {
2276             group: kvm_bindings::KVM_ARM_VCPU_PMU_V3_CTRL,
2277             attr: u64::from(kvm_bindings::KVM_ARM_VCPU_PMU_V3_IRQ),
2278             addr: &irq as *const u32 as u64,
2279             flags: 0,
2280         };
2281         self.fd
2282             .set_device_attr(&cpu_attr_irq)
2283             .map_err(|_| cpu::HypervisorCpuError::InitializePmu)?;
2284         self.fd
2285             .set_device_attr(&cpu_attr)
2286             .map_err(|_| cpu::HypervisorCpuError::InitializePmu)
2287     }
2288 
2289     #[cfg(target_arch = "x86_64")]
2290     ///
2291     /// Get the frequency of the TSC if available
2292     ///
2293     fn tsc_khz(&self) -> cpu::Result<Option<u32>> {
2294         match self.fd.get_tsc_khz() {
2295             Err(e) => {
2296                 if e.errno() == libc::EIO {
2297                     Ok(None)
2298                 } else {
2299                     Err(cpu::HypervisorCpuError::GetTscKhz(e.into()))
2300                 }
2301             }
2302             Ok(v) => Ok(Some(v)),
2303         }
2304     }
2305 
2306     #[cfg(target_arch = "x86_64")]
2307     ///
2308     /// Set the frequency of the TSC if available
2309     ///
2310     fn set_tsc_khz(&self, freq: u32) -> cpu::Result<()> {
2311         match self.fd.set_tsc_khz(freq) {
2312             Err(e) => {
2313                 if e.errno() == libc::EIO {
2314                     Ok(())
2315                 } else {
2316                     Err(cpu::HypervisorCpuError::SetTscKhz(e.into()))
2317                 }
2318             }
2319             Ok(_) => Ok(()),
2320         }
2321     }
2322 
2323     #[cfg(target_arch = "x86_64")]
2324     ///
2325     /// Trigger NMI interrupt
2326     ///
2327     fn nmi(&self) -> cpu::Result<()> {
2328         match self.fd.nmi() {
2329             Err(e) => {
2330                 if e.errno() == libc::EIO {
2331                     Ok(())
2332                 } else {
2333                     Err(cpu::HypervisorCpuError::Nmi(e.into()))
2334                 }
2335             }
2336             Ok(_) => Ok(()),
2337         }
2338     }
2339 }
2340 
2341 impl KvmVcpu {
2342     #[cfg(target_arch = "x86_64")]
2343     ///
2344     /// X86 specific call that returns the vcpu's current "xsave struct".
2345     ///
2346     fn get_xsave(&self) -> cpu::Result<XsaveState> {
2347         Ok(self
2348             .fd
2349             .get_xsave()
2350             .map_err(|e| cpu::HypervisorCpuError::GetXsaveState(e.into()))?
2351             .into())
2352     }
2353 
2354     #[cfg(target_arch = "x86_64")]
2355     ///
2356     /// X86 specific call that sets the vcpu's current "xsave struct".
2357     ///
2358     fn set_xsave(&self, xsave: &XsaveState) -> cpu::Result<()> {
2359         let xsave: kvm_bindings::kvm_xsave = (*xsave).clone().into();
2360         self.fd
2361             .set_xsave(&xsave)
2362             .map_err(|e| cpu::HypervisorCpuError::SetXsaveState(e.into()))
2363     }
2364 
2365     #[cfg(target_arch = "x86_64")]
2366     ///
2367     /// X86 specific call that returns the vcpu's current "xcrs".
2368     ///
2369     fn get_xcrs(&self) -> cpu::Result<ExtendedControlRegisters> {
2370         self.fd
2371             .get_xcrs()
2372             .map_err(|e| cpu::HypervisorCpuError::GetXcsr(e.into()))
2373     }
2374 
2375     #[cfg(target_arch = "x86_64")]
2376     ///
2377     /// X86 specific call that sets the vcpu's current "xcrs".
2378     ///
2379     fn set_xcrs(&self, xcrs: &ExtendedControlRegisters) -> cpu::Result<()> {
2380         self.fd
2381             .set_xcrs(xcrs)
2382             .map_err(|e| cpu::HypervisorCpuError::SetXcsr(e.into()))
2383     }
2384 
2385     #[cfg(target_arch = "x86_64")]
2386     ///
2387     /// Returns currently pending exceptions, interrupts, and NMIs as well as related
2388     /// states of the vcpu.
2389     ///
2390     fn get_vcpu_events(&self) -> cpu::Result<VcpuEvents> {
2391         self.fd
2392             .get_vcpu_events()
2393             .map_err(|e| cpu::HypervisorCpuError::GetVcpuEvents(e.into()))
2394     }
2395 
2396     #[cfg(target_arch = "x86_64")]
2397     ///
2398     /// Sets pending exceptions, interrupts, and NMIs as well as related states
2399     /// of the vcpu.
2400     ///
2401     fn set_vcpu_events(&self, events: &VcpuEvents) -> cpu::Result<()> {
2402         self.fd
2403             .set_vcpu_events(events)
2404             .map_err(|e| cpu::HypervisorCpuError::SetVcpuEvents(e.into()))
2405     }
2406 }
2407