xref: /cloud-hypervisor/hypervisor/src/kvm/mod.rs (revision 7d7bfb2034001d4cb15df2ddc56d2d350c8da30f)
1 // Copyright © 2019 Intel Corporation
2 //
3 // SPDX-License-Identifier: Apache-2.0 OR BSD-3-Clause
4 //
5 // Copyright © 2020, Microsoft Corporation
6 //
7 // Copyright 2018-2019 CrowdStrike, Inc.
8 //
9 //
10 
11 #[cfg(target_arch = "aarch64")]
12 pub use crate::aarch64::{
13     check_required_kvm_extensions, is_system_register, VcpuInit, VcpuKvmState as CpuState,
14     MPIDR_EL1,
15 };
16 use crate::cpu;
17 use crate::device;
18 use crate::hypervisor;
19 use crate::vec_with_array_field;
20 use crate::vm::{self, VmmOps};
21 #[cfg(target_arch = "aarch64")]
22 use crate::{arm64_core_reg_id, offset__of};
23 use kvm_ioctls::{NoDatamatch, VcpuFd, VmFd};
24 use serde_derive::{Deserialize, Serialize};
25 use std::collections::HashMap;
26 #[cfg(target_arch = "aarch64")]
27 use std::convert::TryInto;
28 #[cfg(target_arch = "x86_64")]
29 use std::fs::File;
30 use std::os::unix::io::{AsRawFd, RawFd};
31 use std::result;
32 #[cfg(target_arch = "x86_64")]
33 use std::sync::atomic::{AtomicBool, Ordering};
34 use std::sync::{Arc, RwLock};
35 use vmm_sys_util::eventfd::EventFd;
36 // x86_64 dependencies
37 #[cfg(target_arch = "x86_64")]
38 pub mod x86_64;
39 #[cfg(target_arch = "x86_64")]
40 use crate::arch::x86::NUM_IOAPIC_PINS;
41 #[cfg(target_arch = "aarch64")]
42 use aarch64::{RegList, Register, StandardRegisters};
43 #[cfg(target_arch = "x86_64")]
44 use kvm_bindings::{
45     kvm_enable_cap, kvm_guest_debug, kvm_msr_entry, MsrList, KVM_CAP_HYPERV_SYNIC,
46     KVM_CAP_SPLIT_IRQCHIP, KVM_GUESTDBG_ENABLE, KVM_GUESTDBG_SINGLESTEP, KVM_GUESTDBG_USE_HW_BP,
47 };
48 #[cfg(target_arch = "x86_64")]
49 use x86_64::{check_required_kvm_extensions, FpuState, SpecialRegisters, StandardRegisters};
50 #[cfg(target_arch = "x86_64")]
51 pub use x86_64::{
52     CpuId, CpuIdEntry, ExtendedControlRegisters, LapicState, MsrEntries, VcpuKvmState as CpuState,
53     Xsave, CPUID_FLAG_VALID_INDEX,
54 };
55 // aarch64 dependencies
56 #[cfg(target_arch = "aarch64")]
57 pub mod aarch64;
58 pub use kvm_bindings;
59 #[cfg(feature = "tdx")]
60 use kvm_bindings::KVMIO;
61 pub use kvm_bindings::{
62     kvm_create_device, kvm_device_type_KVM_DEV_TYPE_VFIO, kvm_irq_routing, kvm_irq_routing_entry,
63     kvm_userspace_memory_region, KVM_IRQ_ROUTING_IRQCHIP, KVM_IRQ_ROUTING_MSI,
64     KVM_MEM_LOG_DIRTY_PAGES, KVM_MEM_READONLY, KVM_MSI_VALID_DEVID,
65 };
66 #[cfg(target_arch = "aarch64")]
67 use kvm_bindings::{
68     kvm_regs, user_fpsimd_state, user_pt_regs, KVM_NR_SPSR, KVM_REG_ARM64, KVM_REG_ARM_CORE,
69     KVM_REG_SIZE_U128, KVM_REG_SIZE_U32, KVM_REG_SIZE_U64,
70 };
71 pub use kvm_ioctls;
72 pub use kvm_ioctls::{Cap, Kvm};
73 #[cfg(target_arch = "aarch64")]
74 use std::mem;
75 use thiserror::Error;
76 #[cfg(feature = "tdx")]
77 use vmm_sys_util::{ioctl::ioctl_with_val, ioctl_expr, ioctl_ioc_nr, ioctl_iowr_nr};
78 ///
79 /// Export generically-named wrappers of kvm-bindings for Unix-based platforms
80 ///
81 pub use {
82     kvm_bindings::kvm_clock_data as ClockData, kvm_bindings::kvm_create_device as CreateDevice,
83     kvm_bindings::kvm_device_attr as DeviceAttr,
84     kvm_bindings::kvm_irq_routing_entry as IrqRoutingEntry, kvm_bindings::kvm_mp_state as MpState,
85     kvm_bindings::kvm_run, kvm_bindings::kvm_userspace_memory_region as MemoryRegion,
86     kvm_bindings::kvm_vcpu_events as VcpuEvents, kvm_ioctls::DeviceFd, kvm_ioctls::IoEventAddress,
87     kvm_ioctls::VcpuExit,
88 };
89 
90 #[cfg(target_arch = "x86_64")]
91 const KVM_CAP_SGX_ATTRIBUTE: u32 = 196;
92 
93 #[cfg(feature = "tdx")]
94 const KVM_EXIT_TDX: u32 = 35;
95 #[cfg(feature = "tdx")]
96 const TDG_VP_VMCALL_GET_QUOTE: u64 = 0x10002;
97 #[cfg(feature = "tdx")]
98 const TDG_VP_VMCALL_SETUP_EVENT_NOTIFY_INTERRUPT: u64 = 0x10004;
99 #[cfg(feature = "tdx")]
100 const TDG_VP_VMCALL_SUCCESS: u64 = 0;
101 #[cfg(feature = "tdx")]
102 const TDG_VP_VMCALL_INVALID_OPERAND: u64 = 0x8000000000000000;
103 
104 #[cfg(feature = "tdx")]
105 ioctl_iowr_nr!(KVM_MEMORY_ENCRYPT_OP, KVMIO, 0xba, std::os::raw::c_ulong);
106 
107 #[cfg(feature = "tdx")]
108 #[repr(u32)]
109 enum TdxCommand {
110     Capabilities = 0,
111     InitVm,
112     InitVcpu,
113     InitMemRegion,
114     Finalize,
115 }
116 
117 #[cfg(feature = "tdx")]
118 pub enum TdxExitDetails {
119     GetQuote,
120     SetupEventNotifyInterrupt,
121 }
122 
123 #[cfg(feature = "tdx")]
124 pub enum TdxExitStatus {
125     Success,
126     InvalidOperand,
127 }
128 
129 #[cfg(feature = "tdx")]
130 const TDX_MAX_NR_CPUID_CONFIGS: usize = 6;
131 
132 #[cfg(feature = "tdx")]
133 #[repr(C)]
134 #[derive(Debug, Default)]
135 pub struct TdxCpuidConfig {
136     pub leaf: u32,
137     pub sub_leaf: u32,
138     pub eax: u32,
139     pub ebx: u32,
140     pub ecx: u32,
141     pub edx: u32,
142 }
143 
144 #[cfg(feature = "tdx")]
145 #[repr(C)]
146 #[derive(Debug, Default)]
147 pub struct TdxCapabilities {
148     pub attrs_fixed0: u64,
149     pub attrs_fixed1: u64,
150     pub xfam_fixed0: u64,
151     pub xfam_fixed1: u64,
152     pub nr_cpuid_configs: u32,
153     pub padding: u32,
154     pub cpuid_configs: [TdxCpuidConfig; TDX_MAX_NR_CPUID_CONFIGS],
155 }
156 
157 #[derive(Clone, Copy, Debug, PartialEq, Deserialize, Serialize)]
158 pub struct KvmVmState {}
159 
160 pub use KvmVmState as VmState;
161 
162 struct KvmDirtyLogSlot {
163     slot: u32,
164     guest_phys_addr: u64,
165     memory_size: u64,
166     userspace_addr: u64,
167 }
168 
169 /// Wrapper over KVM VM ioctls.
170 pub struct KvmVm {
171     fd: Arc<VmFd>,
172     #[cfg(target_arch = "x86_64")]
173     msrs: MsrEntries,
174     state: KvmVmState,
175     dirty_log_slots: Arc<RwLock<HashMap<u32, KvmDirtyLogSlot>>>,
176 }
177 
178 ///
179 /// Implementation of Vm trait for KVM
180 /// Example:
181 /// #[cfg(feature = "kvm")]
182 /// extern crate hypervisor
183 /// let kvm = hypervisor::kvm::KvmHypervisor::new().unwrap();
184 /// let hypervisor: Arc<dyn hypervisor::Hypervisor> = Arc::new(kvm);
185 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed");
186 /// vm.set/get().unwrap()
187 ///
188 impl vm::Vm for KvmVm {
189     #[cfg(target_arch = "x86_64")]
190     ///
191     /// Sets the address of the one-page region in the VM's address space.
192     ///
193     fn set_identity_map_address(&self, address: u64) -> vm::Result<()> {
194         self.fd
195             .set_identity_map_address(address)
196             .map_err(|e| vm::HypervisorVmError::SetIdentityMapAddress(e.into()))
197     }
198     #[cfg(target_arch = "x86_64")]
199     ///
200     /// Sets the address of the three-page region in the VM's address space.
201     ///
202     fn set_tss_address(&self, offset: usize) -> vm::Result<()> {
203         self.fd
204             .set_tss_address(offset)
205             .map_err(|e| vm::HypervisorVmError::SetTssAddress(e.into()))
206     }
207     ///
208     /// Creates an in-kernel interrupt controller.
209     ///
210     fn create_irq_chip(&self) -> vm::Result<()> {
211         self.fd
212             .create_irq_chip()
213             .map_err(|e| vm::HypervisorVmError::CreateIrq(e.into()))
214     }
215     ///
216     /// Registers an event that will, when signaled, trigger the `gsi` IRQ.
217     ///
218     fn register_irqfd(&self, fd: &EventFd, gsi: u32) -> vm::Result<()> {
219         self.fd
220             .register_irqfd(fd, gsi)
221             .map_err(|e| vm::HypervisorVmError::RegisterIrqFd(e.into()))
222     }
223     ///
224     /// Unregisters an event that will, when signaled, trigger the `gsi` IRQ.
225     ///
226     fn unregister_irqfd(&self, fd: &EventFd, gsi: u32) -> vm::Result<()> {
227         self.fd
228             .unregister_irqfd(fd, gsi)
229             .map_err(|e| vm::HypervisorVmError::UnregisterIrqFd(e.into()))
230     }
231     ///
232     /// Creates a VcpuFd object from a vcpu RawFd.
233     ///
234     fn create_vcpu(
235         &self,
236         id: u8,
237         vmmops: Option<Arc<dyn VmmOps>>,
238     ) -> vm::Result<Arc<dyn cpu::Vcpu>> {
239         let vc = self
240             .fd
241             .create_vcpu(id as u64)
242             .map_err(|e| vm::HypervisorVmError::CreateVcpu(e.into()))?;
243         let vcpu = KvmVcpu {
244             fd: vc,
245             #[cfg(target_arch = "x86_64")]
246             msrs: self.msrs.clone(),
247             vmmops,
248             #[cfg(target_arch = "x86_64")]
249             hyperv_synic: AtomicBool::new(false),
250         };
251         Ok(Arc::new(vcpu))
252     }
253     ///
254     /// Registers an event to be signaled whenever a certain address is written to.
255     ///
256     fn register_ioevent(
257         &self,
258         fd: &EventFd,
259         addr: &IoEventAddress,
260         datamatch: Option<vm::DataMatch>,
261     ) -> vm::Result<()> {
262         if let Some(dm) = datamatch {
263             match dm {
264                 vm::DataMatch::DataMatch32(kvm_dm32) => self
265                     .fd
266                     .register_ioevent(fd, addr, kvm_dm32)
267                     .map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into())),
268                 vm::DataMatch::DataMatch64(kvm_dm64) => self
269                     .fd
270                     .register_ioevent(fd, addr, kvm_dm64)
271                     .map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into())),
272             }
273         } else {
274             self.fd
275                 .register_ioevent(fd, addr, NoDatamatch)
276                 .map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into()))
277         }
278     }
279     ///
280     /// Unregisters an event from a certain address it has been previously registered to.
281     ///
282     fn unregister_ioevent(&self, fd: &EventFd, addr: &IoEventAddress) -> vm::Result<()> {
283         self.fd
284             .unregister_ioevent(fd, addr, NoDatamatch)
285             .map_err(|e| vm::HypervisorVmError::UnregisterIoEvent(e.into()))
286     }
287     ///
288     /// Sets the GSI routing table entries, overwriting any previously set
289     /// entries, as per the `KVM_SET_GSI_ROUTING` ioctl.
290     ///
291     fn set_gsi_routing(&self, entries: &[IrqRoutingEntry]) -> vm::Result<()> {
292         let mut irq_routing =
293             vec_with_array_field::<kvm_irq_routing, kvm_irq_routing_entry>(entries.len());
294         irq_routing[0].nr = entries.len() as u32;
295         irq_routing[0].flags = 0;
296 
297         // SAFETY: irq_routing initialized with entries.len() and now it is being turned into
298         // entries_slice with entries.len() again. It is guaranteed to be large enough to hold
299         // everything from entries.
300         unsafe {
301             let entries_slice: &mut [kvm_irq_routing_entry] =
302                 irq_routing[0].entries.as_mut_slice(entries.len());
303             entries_slice.copy_from_slice(entries);
304         }
305 
306         self.fd
307             .set_gsi_routing(&irq_routing[0])
308             .map_err(|e| vm::HypervisorVmError::SetGsiRouting(e.into()))
309     }
310     ///
311     /// Creates a memory region structure that can be used with {create/remove}_user_memory_region
312     ///
313     fn make_user_memory_region(
314         &self,
315         slot: u32,
316         guest_phys_addr: u64,
317         memory_size: u64,
318         userspace_addr: u64,
319         readonly: bool,
320         log_dirty_pages: bool,
321     ) -> MemoryRegion {
322         MemoryRegion {
323             slot,
324             guest_phys_addr,
325             memory_size,
326             userspace_addr,
327             flags: if readonly { KVM_MEM_READONLY } else { 0 }
328                 | if log_dirty_pages {
329                     KVM_MEM_LOG_DIRTY_PAGES
330                 } else {
331                     0
332                 },
333         }
334     }
335     ///
336     /// Creates a guest physical memory region.
337     ///
338     fn create_user_memory_region(&self, user_memory_region: MemoryRegion) -> vm::Result<()> {
339         let mut region = user_memory_region;
340 
341         if (region.flags & KVM_MEM_LOG_DIRTY_PAGES) != 0 {
342             if (region.flags & KVM_MEM_READONLY) != 0 {
343                 return Err(vm::HypervisorVmError::CreateUserMemory(anyhow!(
344                     "Error creating regions with both 'dirty-pages-log' and 'read-only'."
345                 )));
346             }
347 
348             // Keep track of the regions that need dirty pages log
349             self.dirty_log_slots.write().unwrap().insert(
350                 region.slot,
351                 KvmDirtyLogSlot {
352                     slot: region.slot,
353                     guest_phys_addr: region.guest_phys_addr,
354                     memory_size: region.memory_size,
355                     userspace_addr: region.userspace_addr,
356                 },
357             );
358 
359             // Always create guest physical memory region without `KVM_MEM_LOG_DIRTY_PAGES`.
360             // For regions that need this flag, dirty pages log will be turned on in `start_dirty_log`.
361             region.flags = 0;
362         }
363 
364         // SAFETY: Safe because guest regions are guaranteed not to overlap.
365         unsafe {
366             self.fd
367                 .set_user_memory_region(region)
368                 .map_err(|e| vm::HypervisorVmError::CreateUserMemory(e.into()))
369         }
370     }
371     ///
372     /// Removes a guest physical memory region.
373     ///
374     fn remove_user_memory_region(&self, user_memory_region: MemoryRegion) -> vm::Result<()> {
375         let mut region = user_memory_region;
376 
377         // Remove the corresponding entry from "self.dirty_log_slots" if needed
378         self.dirty_log_slots.write().unwrap().remove(&region.slot);
379 
380         // Setting the size to 0 means "remove"
381         region.memory_size = 0;
382         // SAFETY: Safe because guest regions are guaranteed not to overlap.
383         unsafe {
384             self.fd
385                 .set_user_memory_region(region)
386                 .map_err(|e| vm::HypervisorVmError::RemoveUserMemory(e.into()))
387         }
388     }
389     ///
390     /// Creates an emulated device in the kernel.
391     ///
392     /// See the documentation for `KVM_CREATE_DEVICE`.
393     fn create_device(&self, device: &mut CreateDevice) -> vm::Result<Arc<dyn device::Device>> {
394         let fd = self
395             .fd
396             .create_device(device)
397             .map_err(|e| vm::HypervisorVmError::CreateDevice(e.into()))?;
398         let device = KvmDevice { fd };
399         Ok(Arc::new(device))
400     }
401     ///
402     /// Returns the preferred CPU target type which can be emulated by KVM on underlying host.
403     ///
404     #[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
405     fn get_preferred_target(&self, kvi: &mut VcpuInit) -> vm::Result<()> {
406         self.fd
407             .get_preferred_target(kvi)
408             .map_err(|e| vm::HypervisorVmError::GetPreferredTarget(e.into()))
409     }
410     #[cfg(target_arch = "x86_64")]
411     fn enable_split_irq(&self) -> vm::Result<()> {
412         // Create split irqchip
413         // Only the local APIC is emulated in kernel, both PICs and IOAPIC
414         // are not.
415         let mut cap = kvm_enable_cap {
416             cap: KVM_CAP_SPLIT_IRQCHIP,
417             ..Default::default()
418         };
419         cap.args[0] = NUM_IOAPIC_PINS as u64;
420         self.fd
421             .enable_cap(&cap)
422             .map_err(|e| vm::HypervisorVmError::EnableSplitIrq(e.into()))?;
423         Ok(())
424     }
425     #[cfg(target_arch = "x86_64")]
426     fn enable_sgx_attribute(&self, file: File) -> vm::Result<()> {
427         let mut cap = kvm_enable_cap {
428             cap: KVM_CAP_SGX_ATTRIBUTE,
429             ..Default::default()
430         };
431         cap.args[0] = file.as_raw_fd() as u64;
432         self.fd
433             .enable_cap(&cap)
434             .map_err(|e| vm::HypervisorVmError::EnableSgxAttribute(e.into()))?;
435         Ok(())
436     }
437     /// Retrieve guest clock.
438     #[cfg(target_arch = "x86_64")]
439     fn get_clock(&self) -> vm::Result<ClockData> {
440         self.fd
441             .get_clock()
442             .map_err(|e| vm::HypervisorVmError::GetClock(e.into()))
443     }
444     /// Set guest clock.
445     #[cfg(target_arch = "x86_64")]
446     fn set_clock(&self, data: &ClockData) -> vm::Result<()> {
447         self.fd
448             .set_clock(data)
449             .map_err(|e| vm::HypervisorVmError::SetClock(e.into()))
450     }
451     /// Checks if a particular `Cap` is available.
452     fn check_extension(&self, c: Cap) -> bool {
453         self.fd.check_extension(c)
454     }
455     /// Create a device that is used for passthrough
456     fn create_passthrough_device(&self) -> vm::Result<Arc<dyn device::Device>> {
457         let mut vfio_dev = kvm_create_device {
458             type_: kvm_device_type_KVM_DEV_TYPE_VFIO,
459             fd: 0,
460             flags: 0,
461         };
462 
463         self.create_device(&mut vfio_dev)
464             .map_err(|e| vm::HypervisorVmError::CreatePassthroughDevice(e.into()))
465     }
466     ///
467     /// Get the Vm state. Return VM specific data
468     ///
469     fn state(&self) -> vm::Result<VmState> {
470         Ok(self.state)
471     }
472     ///
473     /// Set the VM state
474     ///
475     fn set_state(&self, _state: VmState) -> vm::Result<()> {
476         Ok(())
477     }
478 
479     ///
480     /// Start logging dirty pages
481     ///
482     fn start_dirty_log(&self) -> vm::Result<()> {
483         let dirty_log_slots = self.dirty_log_slots.read().unwrap();
484         for (_, s) in dirty_log_slots.iter() {
485             let region = MemoryRegion {
486                 slot: s.slot,
487                 guest_phys_addr: s.guest_phys_addr,
488                 memory_size: s.memory_size,
489                 userspace_addr: s.userspace_addr,
490                 flags: KVM_MEM_LOG_DIRTY_PAGES,
491             };
492             // SAFETY: Safe because guest regions are guaranteed not to overlap.
493             unsafe {
494                 self.fd
495                     .set_user_memory_region(region)
496                     .map_err(|e| vm::HypervisorVmError::StartDirtyLog(e.into()))?;
497             }
498         }
499 
500         Ok(())
501     }
502 
503     ///
504     /// Stop logging dirty pages
505     ///
506     fn stop_dirty_log(&self) -> vm::Result<()> {
507         let dirty_log_slots = self.dirty_log_slots.read().unwrap();
508         for (_, s) in dirty_log_slots.iter() {
509             let region = MemoryRegion {
510                 slot: s.slot,
511                 guest_phys_addr: s.guest_phys_addr,
512                 memory_size: s.memory_size,
513                 userspace_addr: s.userspace_addr,
514                 flags: 0,
515             };
516             // SAFETY: Safe because guest regions are guaranteed not to overlap.
517             unsafe {
518                 self.fd
519                     .set_user_memory_region(region)
520                     .map_err(|e| vm::HypervisorVmError::StartDirtyLog(e.into()))?;
521             }
522         }
523 
524         Ok(())
525     }
526 
527     ///
528     /// Get dirty pages bitmap (one bit per page)
529     ///
530     fn get_dirty_log(&self, slot: u32, _base_gpa: u64, memory_size: u64) -> vm::Result<Vec<u64>> {
531         self.fd
532             .get_dirty_log(slot, memory_size as usize)
533             .map_err(|e| vm::HypervisorVmError::GetDirtyLog(e.into()))
534     }
535 
536     ///
537     /// Initialize TDX for this VM
538     ///
539     #[cfg(feature = "tdx")]
540     fn tdx_init(&self, cpuid: &CpuId, max_vcpus: u32) -> vm::Result<()> {
541         #[repr(C)]
542         struct TdxInitVm {
543             max_vcpus: u32,
544             tsc_khz: u32,
545             attributes: u64,
546             cpuid: u64,
547             mrconfigid: [u64; 6],
548             mrowner: [u64; 6],
549             mrownerconfig: [u64; 6],
550             reserved: [u64; 43],
551         }
552         let data = TdxInitVm {
553             max_vcpus,
554             tsc_khz: 0,
555             attributes: 0,
556             cpuid: cpuid.as_fam_struct_ptr() as u64,
557             mrconfigid: [0; 6],
558             mrowner: [0; 6],
559             mrownerconfig: [0; 6],
560             reserved: [0; 43],
561         };
562 
563         tdx_command(
564             &self.fd.as_raw_fd(),
565             TdxCommand::InitVm,
566             0,
567             &data as *const _ as u64,
568         )
569         .map_err(vm::HypervisorVmError::InitializeTdx)
570     }
571 
572     ///
573     /// Finalize the TDX setup for this VM
574     ///
575     #[cfg(feature = "tdx")]
576     fn tdx_finalize(&self) -> vm::Result<()> {
577         tdx_command(&self.fd.as_raw_fd(), TdxCommand::Finalize, 0, 0)
578             .map_err(vm::HypervisorVmError::FinalizeTdx)
579     }
580 
581     ///
582     /// Initialize memory regions for the TDX VM
583     ///
584     #[cfg(feature = "tdx")]
585     fn tdx_init_memory_region(
586         &self,
587         host_address: u64,
588         guest_address: u64,
589         size: u64,
590         measure: bool,
591     ) -> vm::Result<()> {
592         #[repr(C)]
593         struct TdxInitMemRegion {
594             host_address: u64,
595             guest_address: u64,
596             pages: u64,
597         }
598         let data = TdxInitMemRegion {
599             host_address,
600             guest_address,
601             pages: size / 4096,
602         };
603 
604         tdx_command(
605             &self.fd.as_raw_fd(),
606             TdxCommand::InitMemRegion,
607             if measure { 1 } else { 0 },
608             &data as *const _ as u64,
609         )
610         .map_err(vm::HypervisorVmError::InitMemRegionTdx)
611     }
612 }
613 
614 #[cfg(feature = "tdx")]
615 fn tdx_command(
616     fd: &RawFd,
617     command: TdxCommand,
618     metadata: u32,
619     data: u64,
620 ) -> std::result::Result<(), std::io::Error> {
621     #[repr(C)]
622     struct TdxIoctlCmd {
623         command: TdxCommand,
624         metadata: u32,
625         data: u64,
626     }
627     let cmd = TdxIoctlCmd {
628         command,
629         metadata,
630         data,
631     };
632     // SAFETY: FFI call. All input parameters are valid.
633     let ret = unsafe {
634         ioctl_with_val(
635             fd,
636             KVM_MEMORY_ENCRYPT_OP(),
637             &cmd as *const TdxIoctlCmd as std::os::raw::c_ulong,
638         )
639     };
640 
641     if ret < 0 {
642         return Err(std::io::Error::last_os_error());
643     }
644     Ok(())
645 }
646 
647 /// Wrapper over KVM system ioctls.
648 pub struct KvmHypervisor {
649     kvm: Kvm,
650 }
651 /// Enum for KVM related error
652 #[derive(Debug, Error)]
653 pub enum KvmError {
654     #[error("Capability missing: {0:?}")]
655     CapabilityMissing(Cap),
656 }
657 pub type KvmResult<T> = result::Result<T, KvmError>;
658 impl KvmHypervisor {
659     /// Create a hypervisor based on Kvm
660     pub fn new() -> hypervisor::Result<KvmHypervisor> {
661         let kvm_obj = Kvm::new().map_err(|e| hypervisor::HypervisorError::VmCreate(e.into()))?;
662         let api_version = kvm_obj.get_api_version();
663 
664         if api_version != kvm_bindings::KVM_API_VERSION as i32 {
665             return Err(hypervisor::HypervisorError::IncompatibleApiVersion);
666         }
667 
668         Ok(KvmHypervisor { kvm: kvm_obj })
669     }
670 }
671 /// Implementation of Hypervisor trait for KVM
672 /// Example:
673 /// #[cfg(feature = "kvm")]
674 /// extern crate hypervisor
675 /// let kvm = hypervisor::kvm::KvmHypervisor::new().unwrap();
676 /// let hypervisor: Arc<dyn hypervisor::Hypervisor> = Arc::new(kvm);
677 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed");
678 ///
679 impl hypervisor::Hypervisor for KvmHypervisor {
680     /// Create a KVM vm object of a specific VM type and return the object as Vm trait object
681     /// Example
682     /// # extern crate hypervisor;
683     /// # use hypervisor::KvmHypervisor;
684     /// use hypervisor::KvmVm;
685     /// let hypervisor = KvmHypervisor::new().unwrap();
686     /// let vm = hypervisor.create_vm_with_type(KvmVmType::LegacyVm).unwrap()
687     ///
688     fn create_vm_with_type(&self, vm_type: u64) -> hypervisor::Result<Arc<dyn vm::Vm>> {
689         let fd: VmFd;
690         loop {
691             match self.kvm.create_vm_with_type(vm_type) {
692                 Ok(res) => fd = res,
693                 Err(e) => {
694                     if e.errno() == libc::EINTR {
695                         // If the error returned is EINTR, which means the
696                         // ioctl has been interrupted, we have to retry as
697                         // this can't be considered as a regular error.
698                         continue;
699                     } else {
700                         return Err(hypervisor::HypervisorError::VmCreate(e.into()));
701                     }
702                 }
703             }
704             break;
705         }
706 
707         let vm_fd = Arc::new(fd);
708 
709         #[cfg(target_arch = "x86_64")]
710         {
711             let msr_list = self.get_msr_list()?;
712             let num_msrs = msr_list.as_fam_struct_ref().nmsrs as usize;
713             let mut msrs = MsrEntries::new(num_msrs).unwrap();
714             let indices = msr_list.as_slice();
715             let msr_entries = msrs.as_mut_slice();
716             for (pos, index) in indices.iter().enumerate() {
717                 msr_entries[pos].index = *index;
718             }
719 
720             Ok(Arc::new(KvmVm {
721                 fd: vm_fd,
722                 msrs,
723                 state: VmState {},
724                 dirty_log_slots: Arc::new(RwLock::new(HashMap::new())),
725             }))
726         }
727 
728         #[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
729         {
730             Ok(Arc::new(KvmVm {
731                 fd: vm_fd,
732                 state: VmState {},
733                 dirty_log_slots: Arc::new(RwLock::new(HashMap::new())),
734             }))
735         }
736     }
737 
738     /// Create a KVM vm object and return the object as Vm trait object
739     /// Example
740     /// # extern crate hypervisor;
741     /// # use hypervisor::KvmHypervisor;
742     /// use hypervisor::KvmVm;
743     /// let hypervisor = KvmHypervisor::new().unwrap();
744     /// let vm = hypervisor.create_vm().unwrap()
745     ///
746     fn create_vm(&self) -> hypervisor::Result<Arc<dyn vm::Vm>> {
747         #[allow(unused_mut)]
748         let mut vm_type: u64 = 0; // Create with default platform type
749 
750         // When KVM supports Cap::ArmVmIPASize, it is better to get the IPA
751         // size from the host and use that when creating the VM, which may
752         // avoid unnecessary VM creation failures.
753         #[cfg(target_arch = "aarch64")]
754         if self.kvm.check_extension(Cap::ArmVmIPASize) {
755             vm_type = self.kvm.get_host_ipa_limit().try_into().unwrap();
756         }
757 
758         self.create_vm_with_type(vm_type)
759     }
760 
761     fn check_required_extensions(&self) -> hypervisor::Result<()> {
762         check_required_kvm_extensions(&self.kvm)
763             .map_err(|e| hypervisor::HypervisorError::CheckExtensions(e.into()))
764     }
765 
766     #[cfg(target_arch = "x86_64")]
767     ///
768     /// X86 specific call to get the system supported CPUID values.
769     ///
770     fn get_cpuid(&self) -> hypervisor::Result<CpuId> {
771         self.kvm
772             .get_supported_cpuid(kvm_bindings::KVM_MAX_CPUID_ENTRIES)
773             .map_err(|e| hypervisor::HypervisorError::GetCpuId(e.into()))
774     }
775 
776     #[cfg(target_arch = "x86_64")]
777     ///
778     /// Retrieve the list of MSRs supported by KVM.
779     ///
780     fn get_msr_list(&self) -> hypervisor::Result<MsrList> {
781         self.kvm
782             .get_msr_index_list()
783             .map_err(|e| hypervisor::HypervisorError::GetMsrList(e.into()))
784     }
785     #[cfg(target_arch = "aarch64")]
786     ///
787     /// Retrieve AArch64 host maximum IPA size supported by KVM.
788     ///
789     fn get_host_ipa_limit(&self) -> i32 {
790         self.kvm.get_host_ipa_limit()
791     }
792 
793     ///
794     /// Retrieve TDX capabilities
795     ///
796     #[cfg(feature = "tdx")]
797     fn tdx_capabilities(&self) -> hypervisor::Result<TdxCapabilities> {
798         let data = TdxCapabilities {
799             nr_cpuid_configs: TDX_MAX_NR_CPUID_CONFIGS as u32,
800             ..Default::default()
801         };
802 
803         tdx_command(
804             &self.kvm.as_raw_fd(),
805             TdxCommand::Capabilities,
806             0,
807             &data as *const _ as u64,
808         )
809         .map_err(|e| hypervisor::HypervisorError::TdxCapabilities(e.into()))?;
810 
811         Ok(data)
812     }
813 }
814 /// Vcpu struct for KVM
815 pub struct KvmVcpu {
816     fd: VcpuFd,
817     #[cfg(target_arch = "x86_64")]
818     msrs: MsrEntries,
819     vmmops: Option<Arc<dyn vm::VmmOps>>,
820     #[cfg(target_arch = "x86_64")]
821     hyperv_synic: AtomicBool,
822 }
823 /// Implementation of Vcpu trait for KVM
824 /// Example:
825 /// #[cfg(feature = "kvm")]
826 /// extern crate hypervisor
827 /// let kvm = hypervisor::kvm::KvmHypervisor::new().unwrap();
828 /// let hypervisor: Arc<dyn hypervisor::Hypervisor> = Arc::new(kvm);
829 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed");
830 /// let vcpu = vm.create_vcpu(0, None).unwrap();
831 /// vcpu.get/set().unwrap()
832 ///
833 impl cpu::Vcpu for KvmVcpu {
834     #[cfg(target_arch = "x86_64")]
835     ///
836     /// Returns the vCPU general purpose registers.
837     ///
838     fn get_regs(&self) -> cpu::Result<StandardRegisters> {
839         self.fd
840             .get_regs()
841             .map_err(|e| cpu::HypervisorCpuError::GetStandardRegs(e.into()))
842     }
843     #[cfg(target_arch = "x86_64")]
844     ///
845     /// Sets the vCPU general purpose registers using the `KVM_SET_REGS` ioctl.
846     ///
847     fn set_regs(&self, regs: &StandardRegisters) -> cpu::Result<()> {
848         self.fd
849             .set_regs(regs)
850             .map_err(|e| cpu::HypervisorCpuError::SetStandardRegs(e.into()))
851     }
852 
853     #[cfg(target_arch = "aarch64")]
854     ///
855     /// Set attribute for vcpu.
856     ///
857     fn set_vcpu_attr(&self, attr: &DeviceAttr) -> cpu::Result<()> {
858         self.fd
859             .set_device_attr(attr)
860             .map_err(|e| cpu::HypervisorCpuError::SetVcpuAttribute(e.into()))
861     }
862 
863     #[cfg(target_arch = "aarch64")]
864     ///
865     /// Check if vcpu has a certain attribute.
866     ///
867     fn has_vcpu_attr(&self, attr: &DeviceAttr) -> cpu::Result<()> {
868         self.fd
869             .has_device_attr(attr)
870             .map_err(|e| cpu::HypervisorCpuError::HasVcpuAttribute(e.into()))
871     }
872 
873     #[cfg(target_arch = "x86_64")]
874     ///
875     /// Returns the vCPU special registers.
876     ///
877     fn get_sregs(&self) -> cpu::Result<SpecialRegisters> {
878         self.fd
879             .get_sregs()
880             .map_err(|e| cpu::HypervisorCpuError::GetSpecialRegs(e.into()))
881     }
882     #[cfg(target_arch = "x86_64")]
883     ///
884     /// Sets the vCPU special registers using the `KVM_SET_SREGS` ioctl.
885     ///
886     fn set_sregs(&self, sregs: &SpecialRegisters) -> cpu::Result<()> {
887         self.fd
888             .set_sregs(sregs)
889             .map_err(|e| cpu::HypervisorCpuError::SetSpecialRegs(e.into()))
890     }
891     #[cfg(target_arch = "x86_64")]
892     ///
893     /// Returns the floating point state (FPU) from the vCPU.
894     ///
895     fn get_fpu(&self) -> cpu::Result<FpuState> {
896         self.fd
897             .get_fpu()
898             .map_err(|e| cpu::HypervisorCpuError::GetFloatingPointRegs(e.into()))
899     }
900     #[cfg(target_arch = "x86_64")]
901     ///
902     /// Set the floating point state (FPU) of a vCPU using the `KVM_SET_FPU` ioct.
903     ///
904     fn set_fpu(&self, fpu: &FpuState) -> cpu::Result<()> {
905         self.fd
906             .set_fpu(fpu)
907             .map_err(|e| cpu::HypervisorCpuError::SetFloatingPointRegs(e.into()))
908     }
909     #[cfg(target_arch = "x86_64")]
910     ///
911     /// X86 specific call to setup the CPUID registers.
912     ///
913     fn set_cpuid2(&self, cpuid: &CpuId) -> cpu::Result<()> {
914         self.fd
915             .set_cpuid2(cpuid)
916             .map_err(|e| cpu::HypervisorCpuError::SetCpuid(e.into()))
917     }
918     #[cfg(target_arch = "x86_64")]
919     ///
920     /// X86 specific call to enable HyperV SynIC
921     ///
922     fn enable_hyperv_synic(&self) -> cpu::Result<()> {
923         // Update the information about Hyper-V SynIC being enabled and
924         // emulated as it will influence later which MSRs should be saved.
925         self.hyperv_synic.store(true, Ordering::Release);
926 
927         let cap = kvm_enable_cap {
928             cap: KVM_CAP_HYPERV_SYNIC,
929             ..Default::default()
930         };
931         self.fd
932             .enable_cap(&cap)
933             .map_err(|e| cpu::HypervisorCpuError::EnableHyperVSyncIc(e.into()))
934     }
935     ///
936     /// X86 specific call to retrieve the CPUID registers.
937     ///
938     #[cfg(target_arch = "x86_64")]
939     fn get_cpuid2(&self, num_entries: usize) -> cpu::Result<CpuId> {
940         self.fd
941             .get_cpuid2(num_entries)
942             .map_err(|e| cpu::HypervisorCpuError::GetCpuid(e.into()))
943     }
944     #[cfg(target_arch = "x86_64")]
945     ///
946     /// Returns the state of the LAPIC (Local Advanced Programmable Interrupt Controller).
947     ///
948     fn get_lapic(&self) -> cpu::Result<LapicState> {
949         self.fd
950             .get_lapic()
951             .map_err(|e| cpu::HypervisorCpuError::GetlapicState(e.into()))
952     }
953     #[cfg(target_arch = "x86_64")]
954     ///
955     /// Sets the state of the LAPIC (Local Advanced Programmable Interrupt Controller).
956     ///
957     fn set_lapic(&self, klapic: &LapicState) -> cpu::Result<()> {
958         self.fd
959             .set_lapic(klapic)
960             .map_err(|e| cpu::HypervisorCpuError::SetLapicState(e.into()))
961     }
962     #[cfg(target_arch = "x86_64")]
963     ///
964     /// Returns the model-specific registers (MSR) for this vCPU.
965     ///
966     fn get_msrs(&self, msrs: &mut MsrEntries) -> cpu::Result<usize> {
967         self.fd
968             .get_msrs(msrs)
969             .map_err(|e| cpu::HypervisorCpuError::GetMsrEntries(e.into()))
970     }
971     #[cfg(target_arch = "x86_64")]
972     ///
973     /// Setup the model-specific registers (MSR) for this vCPU.
974     /// Returns the number of MSR entries actually written.
975     ///
976     fn set_msrs(&self, msrs: &MsrEntries) -> cpu::Result<usize> {
977         self.fd
978             .set_msrs(msrs)
979             .map_err(|e| cpu::HypervisorCpuError::SetMsrEntries(e.into()))
980     }
981     ///
982     /// Returns the vcpu's current "multiprocessing state".
983     ///
984     fn get_mp_state(&self) -> cpu::Result<MpState> {
985         self.fd
986             .get_mp_state()
987             .map_err(|e| cpu::HypervisorCpuError::GetMpState(e.into()))
988     }
989     ///
990     /// Sets the vcpu's current "multiprocessing state".
991     ///
992     fn set_mp_state(&self, mp_state: MpState) -> cpu::Result<()> {
993         self.fd
994             .set_mp_state(mp_state)
995             .map_err(|e| cpu::HypervisorCpuError::SetMpState(e.into()))
996     }
997     #[cfg(target_arch = "x86_64")]
998     ///
999     /// X86 specific call that returns the vcpu's current "xsave struct".
1000     ///
1001     fn get_xsave(&self) -> cpu::Result<Xsave> {
1002         self.fd
1003             .get_xsave()
1004             .map_err(|e| cpu::HypervisorCpuError::GetXsaveState(e.into()))
1005     }
1006     #[cfg(target_arch = "x86_64")]
1007     ///
1008     /// X86 specific call that sets the vcpu's current "xsave struct".
1009     ///
1010     fn set_xsave(&self, xsave: &Xsave) -> cpu::Result<()> {
1011         self.fd
1012             .set_xsave(xsave)
1013             .map_err(|e| cpu::HypervisorCpuError::SetXsaveState(e.into()))
1014     }
1015     #[cfg(target_arch = "x86_64")]
1016     ///
1017     /// X86 specific call that returns the vcpu's current "xcrs".
1018     ///
1019     fn get_xcrs(&self) -> cpu::Result<ExtendedControlRegisters> {
1020         self.fd
1021             .get_xcrs()
1022             .map_err(|e| cpu::HypervisorCpuError::GetXcsr(e.into()))
1023     }
1024     #[cfg(target_arch = "x86_64")]
1025     ///
1026     /// X86 specific call that sets the vcpu's current "xcrs".
1027     ///
1028     fn set_xcrs(&self, xcrs: &ExtendedControlRegisters) -> cpu::Result<()> {
1029         self.fd
1030             .set_xcrs(xcrs)
1031             .map_err(|e| cpu::HypervisorCpuError::SetXcsr(e.into()))
1032     }
1033     #[cfg(target_arch = "x86_64")]
1034     ///
1035     /// Translates guest virtual address to guest physical address using the `KVM_TRANSLATE` ioctl.
1036     ///
1037     fn translate_gva(&self, gva: u64, _flags: u64) -> cpu::Result<(u64, u32)> {
1038         let tr = self
1039             .fd
1040             .translate_gva(gva)
1041             .map_err(|e| cpu::HypervisorCpuError::TranslateVirtualAddress(e.into()))?;
1042         // tr.valid is set if the GVA is mapped to valid GPA.
1043         match tr.valid {
1044             0 => Err(cpu::HypervisorCpuError::TranslateVirtualAddress(anyhow!(
1045                 "Invalid GVA: {:#x}",
1046                 gva
1047             ))),
1048             _ => Ok((tr.physical_address, 0)),
1049         }
1050     }
1051     ///
1052     /// Triggers the running of the current virtual CPU returning an exit reason.
1053     ///
1054     fn run(&self) -> std::result::Result<cpu::VmExit, cpu::HypervisorCpuError> {
1055         match self.fd.run() {
1056             Ok(run) => match run {
1057                 #[cfg(target_arch = "x86_64")]
1058                 VcpuExit::IoIn(addr, data) => {
1059                     if let Some(vmmops) = &self.vmmops {
1060                         return vmmops
1061                             .pio_read(addr.into(), data)
1062                             .map(|_| cpu::VmExit::Ignore)
1063                             .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()));
1064                     }
1065 
1066                     Ok(cpu::VmExit::IoIn(addr, data))
1067                 }
1068                 #[cfg(target_arch = "x86_64")]
1069                 VcpuExit::IoOut(addr, data) => {
1070                     if let Some(vmmops) = &self.vmmops {
1071                         return vmmops
1072                             .pio_write(addr.into(), data)
1073                             .map(|_| cpu::VmExit::Ignore)
1074                             .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()));
1075                     }
1076 
1077                     Ok(cpu::VmExit::IoOut(addr, data))
1078                 }
1079                 #[cfg(target_arch = "x86_64")]
1080                 VcpuExit::IoapicEoi(vector) => Ok(cpu::VmExit::IoapicEoi(vector)),
1081                 #[cfg(target_arch = "x86_64")]
1082                 VcpuExit::Shutdown | VcpuExit::Hlt => Ok(cpu::VmExit::Reset),
1083 
1084                 #[cfg(target_arch = "aarch64")]
1085                 VcpuExit::SystemEvent(event_type, flags) => {
1086                     use kvm_bindings::{KVM_SYSTEM_EVENT_RESET, KVM_SYSTEM_EVENT_SHUTDOWN};
1087                     // On Aarch64, when the VM is shutdown, run() returns
1088                     // VcpuExit::SystemEvent with reason KVM_SYSTEM_EVENT_SHUTDOWN
1089                     if event_type == KVM_SYSTEM_EVENT_RESET {
1090                         Ok(cpu::VmExit::Reset)
1091                     } else if event_type == KVM_SYSTEM_EVENT_SHUTDOWN {
1092                         Ok(cpu::VmExit::Shutdown)
1093                     } else {
1094                         Err(cpu::HypervisorCpuError::RunVcpu(anyhow!(
1095                             "Unexpected system event with type 0x{:x}, flags 0x{:x}",
1096                             event_type,
1097                             flags
1098                         )))
1099                     }
1100                 }
1101 
1102                 VcpuExit::MmioRead(addr, data) => {
1103                     if let Some(vmmops) = &self.vmmops {
1104                         return vmmops
1105                             .mmio_read(addr, data)
1106                             .map(|_| cpu::VmExit::Ignore)
1107                             .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()));
1108                     }
1109 
1110                     Ok(cpu::VmExit::MmioRead(addr, data))
1111                 }
1112                 VcpuExit::MmioWrite(addr, data) => {
1113                     if let Some(vmmops) = &self.vmmops {
1114                         return vmmops
1115                             .mmio_write(addr, data)
1116                             .map(|_| cpu::VmExit::Ignore)
1117                             .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()));
1118                     }
1119 
1120                     Ok(cpu::VmExit::MmioWrite(addr, data))
1121                 }
1122                 VcpuExit::Hyperv => Ok(cpu::VmExit::Hyperv),
1123                 #[cfg(feature = "tdx")]
1124                 VcpuExit::Unsupported(KVM_EXIT_TDX) => Ok(cpu::VmExit::Tdx),
1125                 VcpuExit::Debug(_) => Ok(cpu::VmExit::Debug),
1126 
1127                 r => Err(cpu::HypervisorCpuError::RunVcpu(anyhow!(
1128                     "Unexpected exit reason on vcpu run: {:?}",
1129                     r
1130                 ))),
1131             },
1132 
1133             Err(ref e) => match e.errno() {
1134                 libc::EAGAIN | libc::EINTR => Ok(cpu::VmExit::Ignore),
1135                 _ => Err(cpu::HypervisorCpuError::RunVcpu(anyhow!(
1136                     "VCPU error {:?}",
1137                     e
1138                 ))),
1139             },
1140         }
1141     }
1142     #[cfg(target_arch = "x86_64")]
1143     ///
1144     /// Returns currently pending exceptions, interrupts, and NMIs as well as related
1145     /// states of the vcpu.
1146     ///
1147     fn get_vcpu_events(&self) -> cpu::Result<VcpuEvents> {
1148         self.fd
1149             .get_vcpu_events()
1150             .map_err(|e| cpu::HypervisorCpuError::GetVcpuEvents(e.into()))
1151     }
1152     #[cfg(target_arch = "x86_64")]
1153     ///
1154     /// Sets pending exceptions, interrupts, and NMIs as well as related states
1155     /// of the vcpu.
1156     ///
1157     fn set_vcpu_events(&self, events: &VcpuEvents) -> cpu::Result<()> {
1158         self.fd
1159             .set_vcpu_events(events)
1160             .map_err(|e| cpu::HypervisorCpuError::SetVcpuEvents(e.into()))
1161     }
1162     #[cfg(target_arch = "x86_64")]
1163     ///
1164     /// Let the guest know that it has been paused, which prevents from
1165     /// potential soft lockups when being resumed.
1166     ///
1167     fn notify_guest_clock_paused(&self) -> cpu::Result<()> {
1168         if let Err(e) = self.fd.kvmclock_ctrl() {
1169             // Linux kernel returns -EINVAL if the PV clock isn't yet initialised
1170             // which could be because we're still in firmware or the guest doesn't
1171             // use KVM clock.
1172             if e.errno() != libc::EINVAL {
1173                 return Err(cpu::HypervisorCpuError::NotifyGuestClockPaused(e.into()));
1174             }
1175         }
1176 
1177         Ok(())
1178     }
1179     #[cfg(target_arch = "x86_64")]
1180     ///
1181     /// Sets debug registers to set hardware breakpoints and/or enable single step.
1182     ///
1183     fn set_guest_debug(
1184         &self,
1185         addrs: &[vm_memory::GuestAddress],
1186         singlestep: bool,
1187     ) -> cpu::Result<()> {
1188         if addrs.len() > 4 {
1189             return Err(cpu::HypervisorCpuError::SetDebugRegs(anyhow!(
1190                 "Support 4 breakpoints at most but {} addresses are passed",
1191                 addrs.len()
1192             )));
1193         }
1194 
1195         let mut dbg = kvm_guest_debug {
1196             control: KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP,
1197             ..Default::default()
1198         };
1199         if singlestep {
1200             dbg.control |= KVM_GUESTDBG_SINGLESTEP;
1201         }
1202 
1203         // Set bits 9 and 10.
1204         // bit 9: GE (global exact breakpoint enable) flag.
1205         // bit 10: always 1.
1206         dbg.arch.debugreg[7] = 0x0600;
1207 
1208         for (i, addr) in addrs.iter().enumerate() {
1209             dbg.arch.debugreg[i] = addr.0;
1210             // Set global breakpoint enable flag
1211             dbg.arch.debugreg[7] |= 2 << (i * 2);
1212         }
1213 
1214         self.fd
1215             .set_guest_debug(&dbg)
1216             .map_err(|e| cpu::HypervisorCpuError::SetDebugRegs(e.into()))
1217     }
1218     #[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
1219     fn vcpu_init(&self, kvi: &VcpuInit) -> cpu::Result<()> {
1220         self.fd
1221             .vcpu_init(kvi)
1222             .map_err(|e| cpu::HypervisorCpuError::VcpuInit(e.into()))
1223     }
1224     ///
1225     /// Sets the value of one register for this vCPU.
1226     ///
1227     #[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
1228     fn set_reg(&self, reg_id: u64, data: u64) -> cpu::Result<()> {
1229         self.fd
1230             .set_one_reg(reg_id, data)
1231             .map_err(|e| cpu::HypervisorCpuError::SetRegister(e.into()))
1232     }
1233     ///
1234     /// Gets the value of one register for this vCPU.
1235     ///
1236     #[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
1237     fn get_reg(&self, reg_id: u64) -> cpu::Result<u64> {
1238         self.fd
1239             .get_one_reg(reg_id)
1240             .map_err(|e| cpu::HypervisorCpuError::GetRegister(e.into()))
1241     }
1242     ///
1243     /// Gets a list of the guest registers that are supported for the
1244     /// KVM_GET_ONE_REG/KVM_SET_ONE_REG calls.
1245     ///
1246     #[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
1247     fn get_reg_list(&self, reg_list: &mut RegList) -> cpu::Result<()> {
1248         self.fd
1249             .get_reg_list(reg_list)
1250             .map_err(|e| cpu::HypervisorCpuError::GetRegList(e.into()))
1251     }
1252     ///
1253     /// Save the state of the core registers.
1254     ///
1255     #[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
1256     fn core_registers(&self, state: &mut StandardRegisters) -> cpu::Result<()> {
1257         let mut off = offset__of!(user_pt_regs, regs);
1258         // There are 31 user_pt_regs:
1259         // https://elixir.free-electrons.com/linux/v4.14.174/source/arch/arm64/include/uapi/asm/ptrace.h#L72
1260         // These actually are the general-purpose registers of the Armv8-a
1261         // architecture (i.e x0-x30 if used as a 64bit register or w0-30 when used as a 32bit register).
1262         for i in 0..31 {
1263             state.regs.regs[i] = self
1264                 .fd
1265                 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off))
1266                 .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?;
1267             off += std::mem::size_of::<u64>();
1268         }
1269 
1270         // We are now entering the "Other register" section of the ARMv8-a architecture.
1271         // First one, stack pointer.
1272         let off = offset__of!(user_pt_regs, sp);
1273         state.regs.sp = self
1274             .fd
1275             .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off))
1276             .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?;
1277 
1278         // Second one, the program counter.
1279         let off = offset__of!(user_pt_regs, pc);
1280         state.regs.pc = self
1281             .fd
1282             .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off))
1283             .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?;
1284 
1285         // Next is the processor state.
1286         let off = offset__of!(user_pt_regs, pstate);
1287         state.regs.pstate = self
1288             .fd
1289             .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off))
1290             .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?;
1291 
1292         // The stack pointer associated with EL1
1293         let off = offset__of!(kvm_regs, sp_el1);
1294         state.sp_el1 = self
1295             .fd
1296             .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off))
1297             .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?;
1298 
1299         // Exception Link Register for EL1, when taking an exception to EL1, this register
1300         // holds the address to which to return afterwards.
1301         let off = offset__of!(kvm_regs, elr_el1);
1302         state.elr_el1 = self
1303             .fd
1304             .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off))
1305             .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?;
1306 
1307         // Saved Program Status Registers, there are 5 of them used in the kernel.
1308         let mut off = offset__of!(kvm_regs, spsr);
1309         for i in 0..KVM_NR_SPSR as usize {
1310             state.spsr[i] = self
1311                 .fd
1312                 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off))
1313                 .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?;
1314             off += std::mem::size_of::<u64>();
1315         }
1316 
1317         // Now moving on to floting point registers which are stored in the user_fpsimd_state in the kernel:
1318         // https://elixir.free-electrons.com/linux/v4.9.62/source/arch/arm64/include/uapi/asm/kvm.h#L53
1319         let mut off = offset__of!(kvm_regs, fp_regs) + offset__of!(user_fpsimd_state, vregs);
1320         for i in 0..32 {
1321             state.fp_regs.vregs[i] = self
1322                 .fd
1323                 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U128, off))
1324                 .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?
1325                 .into();
1326             off += mem::size_of::<u128>();
1327         }
1328 
1329         // Floating-point Status Register
1330         let off = offset__of!(kvm_regs, fp_regs) + offset__of!(user_fpsimd_state, fpsr);
1331         state.fp_regs.fpsr = self
1332             .fd
1333             .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U32, off))
1334             .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?
1335             as u32;
1336 
1337         // Floating-point Control Register
1338         let off = offset__of!(kvm_regs, fp_regs) + offset__of!(user_fpsimd_state, fpcr);
1339         state.fp_regs.fpcr = self
1340             .fd
1341             .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U32, off))
1342             .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?
1343             as u32;
1344         Ok(())
1345     }
1346     ///
1347     /// Restore the state of the core registers.
1348     ///
1349     #[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
1350     fn set_core_registers(&self, state: &StandardRegisters) -> cpu::Result<()> {
1351         // The function follows the exact identical order from `state`. Look there
1352         // for some additional info on registers.
1353         let mut off = offset__of!(user_pt_regs, regs);
1354         for i in 0..31 {
1355             self.fd
1356                 .set_one_reg(
1357                     arm64_core_reg_id!(KVM_REG_SIZE_U64, off),
1358                     state.regs.regs[i],
1359                 )
1360                 .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?;
1361             off += std::mem::size_of::<u64>();
1362         }
1363 
1364         let off = offset__of!(user_pt_regs, sp);
1365         self.fd
1366             .set_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), state.regs.sp)
1367             .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?;
1368 
1369         let off = offset__of!(user_pt_regs, pc);
1370         self.fd
1371             .set_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), state.regs.pc)
1372             .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?;
1373 
1374         let off = offset__of!(user_pt_regs, pstate);
1375         self.fd
1376             .set_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), state.regs.pstate)
1377             .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?;
1378 
1379         let off = offset__of!(kvm_regs, sp_el1);
1380         self.fd
1381             .set_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), state.sp_el1)
1382             .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?;
1383 
1384         let off = offset__of!(kvm_regs, elr_el1);
1385         self.fd
1386             .set_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), state.elr_el1)
1387             .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?;
1388 
1389         let mut off = offset__of!(kvm_regs, spsr);
1390         for i in 0..KVM_NR_SPSR as usize {
1391             self.fd
1392                 .set_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), state.spsr[i])
1393                 .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?;
1394             off += std::mem::size_of::<u64>();
1395         }
1396 
1397         let mut off = offset__of!(kvm_regs, fp_regs) + offset__of!(user_fpsimd_state, vregs);
1398         for i in 0..32 {
1399             self.fd
1400                 .set_one_reg(
1401                     arm64_core_reg_id!(KVM_REG_SIZE_U128, off),
1402                     state.fp_regs.vregs[i] as u64,
1403                 )
1404                 .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?;
1405             off += mem::size_of::<u128>();
1406         }
1407 
1408         let off = offset__of!(kvm_regs, fp_regs) + offset__of!(user_fpsimd_state, fpsr);
1409         self.fd
1410             .set_one_reg(
1411                 arm64_core_reg_id!(KVM_REG_SIZE_U32, off),
1412                 state.fp_regs.fpsr as u64,
1413             )
1414             .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?;
1415 
1416         let off = offset__of!(kvm_regs, fp_regs) + offset__of!(user_fpsimd_state, fpcr);
1417         self.fd
1418             .set_one_reg(
1419                 arm64_core_reg_id!(KVM_REG_SIZE_U32, off),
1420                 state.fp_regs.fpcr as u64,
1421             )
1422             .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?;
1423         Ok(())
1424     }
1425     ///
1426     /// Save the state of the system registers.
1427     ///
1428     #[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
1429     fn system_registers(&self, state: &mut Vec<Register>) -> cpu::Result<()> {
1430         // Call KVM_GET_REG_LIST to get all registers available to the guest. For ArmV8 there are
1431         // around 500 registers.
1432         let mut reg_list = RegList::new(500).unwrap();
1433         self.fd
1434             .get_reg_list(&mut reg_list)
1435             .map_err(|e| cpu::HypervisorCpuError::GetRegList(e.into()))?;
1436 
1437         // At this point reg_list should contain: core registers and system registers.
1438         // The register list contains the number of registers and their ids. We will be needing to
1439         // call KVM_GET_ONE_REG on each id in order to save all of them. We carve out from the list
1440         // the core registers which are represented in the kernel by kvm_regs structure and for which
1441         // we can calculate the id based on the offset in the structure.
1442         reg_list.retain(|regid| is_system_register(*regid));
1443 
1444         // Now, for the rest of the registers left in the previously fetched register list, we are
1445         // simply calling KVM_GET_ONE_REG.
1446         let indices = reg_list.as_slice();
1447         for index in indices.iter() {
1448             state.push(kvm_bindings::kvm_one_reg {
1449                 id: *index,
1450                 addr: self
1451                     .fd
1452                     .get_one_reg(*index)
1453                     .map_err(|e| cpu::HypervisorCpuError::GetSysRegister(e.into()))?,
1454             });
1455         }
1456 
1457         Ok(())
1458     }
1459     ///
1460     /// Restore the state of the system registers.
1461     ///
1462     #[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
1463     fn set_system_registers(&self, state: &[Register]) -> cpu::Result<()> {
1464         for reg in state {
1465             self.fd
1466                 .set_one_reg(reg.id, reg.addr)
1467                 .map_err(|e| cpu::HypervisorCpuError::SetSysRegister(e.into()))?;
1468         }
1469         Ok(())
1470     }
1471     ///
1472     /// Read the MPIDR - Multiprocessor Affinity Register.
1473     ///
1474     #[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
1475     fn read_mpidr(&self) -> cpu::Result<u64> {
1476         self.fd
1477             .get_one_reg(MPIDR_EL1)
1478             .map_err(|e| cpu::HypervisorCpuError::GetSysRegister(e.into()))
1479     }
1480     #[cfg(target_arch = "x86_64")]
1481     ///
1482     /// Get the current CPU state
1483     ///
1484     /// Ordering requirements:
1485     ///
1486     /// KVM_GET_MP_STATE calls kvm_apic_accept_events(), which might modify
1487     /// vCPU/LAPIC state. As such, it must be done before most everything
1488     /// else, otherwise we cannot restore everything and expect it to work.
1489     ///
1490     /// KVM_GET_VCPU_EVENTS/KVM_SET_VCPU_EVENTS is unsafe if other vCPUs are
1491     /// still running.
1492     ///
1493     /// KVM_GET_LAPIC may change state of LAPIC before returning it.
1494     ///
1495     /// GET_VCPU_EVENTS should probably be last to save. The code looks as
1496     /// it might as well be affected by internal state modifications of the
1497     /// GET ioctls.
1498     ///
1499     /// SREGS saves/restores a pending interrupt, similar to what
1500     /// VCPU_EVENTS also does.
1501     ///
1502     /// GET_MSRS requires a pre-populated data structure to do something
1503     /// meaningful. For SET_MSRS it will then contain good data.
1504     ///
1505     /// # Example
1506     ///
1507     /// ```rust
1508     /// # extern crate hypervisor;
1509     /// # use hypervisor::KvmHypervisor;
1510     /// # use std::sync::Arc;
1511     /// let kvm = hypervisor::kvm::KvmHypervisor::new().unwrap();
1512     /// let hv: Arc<dyn hypervisor::Hypervisor> = Arc::new(kvm);
1513     /// let vm = hv.create_vm().expect("new VM fd creation failed");
1514     /// vm.enable_split_irq().unwrap();
1515     /// let vcpu = vm.create_vcpu(0, None).unwrap();
1516     /// let state = vcpu.state().unwrap();
1517     /// ```
1518     fn state(&self) -> cpu::Result<CpuState> {
1519         let cpuid = self.get_cpuid2(kvm_bindings::KVM_MAX_CPUID_ENTRIES)?;
1520         let mp_state = self.get_mp_state()?;
1521         let regs = self.get_regs()?;
1522         let sregs = self.get_sregs()?;
1523         let xsave = self.get_xsave()?;
1524         let xcrs = self.get_xcrs()?;
1525         let lapic_state = self.get_lapic()?;
1526         let fpu = self.get_fpu()?;
1527 
1528         // Try to get all MSRs based on the list previously retrieved from KVM.
1529         // If the number of MSRs obtained from GET_MSRS is different from the
1530         // expected amount, we fallback onto a slower method by getting MSRs
1531         // by chunks. This is the only way to make sure we try to get as many
1532         // MSRs as possible, even if some MSRs are not supported.
1533         let mut msr_entries = self.msrs.clone();
1534 
1535         // Save extra MSRs if the Hyper-V synthetic interrupt controller is
1536         // emulated.
1537         if self.hyperv_synic.load(Ordering::Acquire) {
1538             let hyperv_synic_msrs = vec![
1539                 0x40000020, 0x40000021, 0x40000080, 0x40000081, 0x40000082, 0x40000083, 0x40000084,
1540                 0x40000090, 0x40000091, 0x40000092, 0x40000093, 0x40000094, 0x40000095, 0x40000096,
1541                 0x40000097, 0x40000098, 0x40000099, 0x4000009a, 0x4000009b, 0x4000009c, 0x4000009d,
1542                 0x4000009e, 0x4000009f, 0x400000b0, 0x400000b1, 0x400000b2, 0x400000b3, 0x400000b4,
1543                 0x400000b5, 0x400000b6, 0x400000b7,
1544             ];
1545             for index in hyperv_synic_msrs {
1546                 let msr = kvm_msr_entry {
1547                     index,
1548                     ..Default::default()
1549                 };
1550                 msr_entries.push(msr).unwrap();
1551             }
1552         }
1553 
1554         let expected_num_msrs = msr_entries.as_fam_struct_ref().nmsrs as usize;
1555         let num_msrs = self.get_msrs(&mut msr_entries)?;
1556         let msrs = if num_msrs != expected_num_msrs {
1557             let mut faulty_msr_index = num_msrs;
1558             let mut msr_entries_tmp =
1559                 MsrEntries::from_entries(&msr_entries.as_slice()[..faulty_msr_index]).unwrap();
1560 
1561             loop {
1562                 warn!(
1563                     "Detected faulty MSR 0x{:x} while getting MSRs",
1564                     msr_entries.as_slice()[faulty_msr_index].index
1565                 );
1566 
1567                 let start_pos = faulty_msr_index + 1;
1568                 let mut sub_msr_entries =
1569                     MsrEntries::from_entries(&msr_entries.as_slice()[start_pos..]).unwrap();
1570                 let expected_num_msrs = sub_msr_entries.as_fam_struct_ref().nmsrs as usize;
1571                 let num_msrs = self.get_msrs(&mut sub_msr_entries)?;
1572 
1573                 for i in 0..num_msrs {
1574                     msr_entries_tmp
1575                         .push(sub_msr_entries.as_slice()[i])
1576                         .map_err(|e| {
1577                             cpu::HypervisorCpuError::GetMsrEntries(anyhow!(
1578                                 "Failed adding MSR entries: {:?}",
1579                                 e
1580                             ))
1581                         })?;
1582                 }
1583 
1584                 if num_msrs == expected_num_msrs {
1585                     break;
1586                 }
1587 
1588                 faulty_msr_index = start_pos + num_msrs;
1589             }
1590 
1591             msr_entries_tmp
1592         } else {
1593             msr_entries
1594         };
1595 
1596         let vcpu_events = self.get_vcpu_events()?;
1597 
1598         Ok(CpuState {
1599             cpuid,
1600             msrs,
1601             vcpu_events,
1602             regs,
1603             sregs,
1604             fpu,
1605             lapic_state,
1606             xsave,
1607             xcrs,
1608             mp_state,
1609         })
1610     }
1611     ///
1612     /// Get the current AArch64 CPU state
1613     ///
1614     #[cfg(target_arch = "aarch64")]
1615     fn state(&self) -> cpu::Result<CpuState> {
1616         let mut state = CpuState {
1617             mp_state: self.get_mp_state()?,
1618             mpidr: self.read_mpidr()?,
1619             ..Default::default()
1620         };
1621         self.core_registers(&mut state.core_regs)?;
1622         self.system_registers(&mut state.sys_regs)?;
1623 
1624         Ok(state)
1625     }
1626     #[cfg(target_arch = "x86_64")]
1627     ///
1628     /// Restore the previously saved CPU state
1629     ///
1630     /// Ordering requirements:
1631     ///
1632     /// KVM_GET_VCPU_EVENTS/KVM_SET_VCPU_EVENTS is unsafe if other vCPUs are
1633     /// still running.
1634     ///
1635     /// Some SET ioctls (like set_mp_state) depend on kvm_vcpu_is_bsp(), so
1636     /// if we ever change the BSP, we have to do that before restoring anything.
1637     /// The same seems to be true for CPUID stuff.
1638     ///
1639     /// SREGS saves/restores a pending interrupt, similar to what
1640     /// VCPU_EVENTS also does.
1641     ///
1642     /// SET_REGS clears pending exceptions unconditionally, thus, it must be
1643     /// done before SET_VCPU_EVENTS, which restores it.
1644     ///
1645     /// SET_LAPIC must come after SET_SREGS, because the latter restores
1646     /// the apic base msr.
1647     ///
1648     /// SET_LAPIC must come before SET_MSRS, because the TSC deadline MSR
1649     /// only restores successfully, when the LAPIC is correctly configured.
1650     ///
1651     /// Arguments: CpuState
1652     /// # Example
1653     ///
1654     /// ```rust
1655     /// # extern crate hypervisor;
1656     /// # use hypervisor::KvmHypervisor;
1657     /// # use std::sync::Arc;
1658     /// let kvm = hypervisor::kvm::KvmHypervisor::new().unwrap();
1659     /// let hv: Arc<dyn hypervisor::Hypervisor> = Arc::new(kvm);
1660     /// let vm = hv.create_vm().expect("new VM fd creation failed");
1661     /// vm.enable_split_irq().unwrap();
1662     /// let vcpu = vm.create_vcpu(0, None).unwrap();
1663     /// let state = vcpu.state().unwrap();
1664     /// vcpu.set_state(&state).unwrap();
1665     /// ```
1666     fn set_state(&self, state: &CpuState) -> cpu::Result<()> {
1667         self.set_cpuid2(&state.cpuid)?;
1668         self.set_mp_state(state.mp_state)?;
1669         self.set_regs(&state.regs)?;
1670         self.set_sregs(&state.sregs)?;
1671         self.set_xsave(&state.xsave)?;
1672         self.set_xcrs(&state.xcrs)?;
1673         self.set_lapic(&state.lapic_state)?;
1674         self.set_fpu(&state.fpu)?;
1675 
1676         // Try to set all MSRs previously stored.
1677         // If the number of MSRs set from SET_MSRS is different from the
1678         // expected amount, we fallback onto a slower method by setting MSRs
1679         // by chunks. This is the only way to make sure we try to set as many
1680         // MSRs as possible, even if some MSRs are not supported.
1681         let expected_num_msrs = state.msrs.as_fam_struct_ref().nmsrs as usize;
1682         let num_msrs = self.set_msrs(&state.msrs)?;
1683         if num_msrs != expected_num_msrs {
1684             let mut faulty_msr_index = num_msrs;
1685 
1686             loop {
1687                 warn!(
1688                     "Detected faulty MSR 0x{:x} while setting MSRs",
1689                     state.msrs.as_slice()[faulty_msr_index].index
1690                 );
1691 
1692                 let start_pos = faulty_msr_index + 1;
1693                 let sub_msr_entries =
1694                     MsrEntries::from_entries(&state.msrs.as_slice()[start_pos..]).unwrap();
1695                 let expected_num_msrs = sub_msr_entries.as_fam_struct_ref().nmsrs as usize;
1696                 let num_msrs = self.set_msrs(&sub_msr_entries)?;
1697 
1698                 if num_msrs == expected_num_msrs {
1699                     break;
1700                 }
1701 
1702                 faulty_msr_index = start_pos + num_msrs;
1703             }
1704         }
1705 
1706         self.set_vcpu_events(&state.vcpu_events)?;
1707 
1708         Ok(())
1709     }
1710     ///
1711     /// Restore the previously saved AArch64 CPU state
1712     ///
1713     #[cfg(target_arch = "aarch64")]
1714     fn set_state(&self, state: &CpuState) -> cpu::Result<()> {
1715         self.set_core_registers(&state.core_regs)?;
1716         self.set_system_registers(&state.sys_regs)?;
1717         self.set_mp_state(state.mp_state)?;
1718 
1719         Ok(())
1720     }
1721 
1722     ///
1723     /// Initialize TDX for this CPU
1724     ///
1725     #[cfg(feature = "tdx")]
1726     fn tdx_init(&self, hob_address: u64) -> cpu::Result<()> {
1727         tdx_command(&self.fd.as_raw_fd(), TdxCommand::InitVcpu, 0, hob_address)
1728             .map_err(cpu::HypervisorCpuError::InitializeTdx)
1729     }
1730 
1731     ///
1732     /// Set the "immediate_exit" state
1733     ///
1734     fn set_immediate_exit(&self, exit: bool) {
1735         self.fd.set_kvm_immediate_exit(exit.into());
1736     }
1737 
1738     ///
1739     /// Returns the details about TDX exit reason
1740     ///
1741     #[cfg(feature = "tdx")]
1742     fn get_tdx_exit_details(&mut self) -> cpu::Result<TdxExitDetails> {
1743         let kvm_run = self.fd.get_kvm_run();
1744         let tdx_vmcall = unsafe { &mut kvm_run.__bindgen_anon_1.tdx.u.vmcall };
1745 
1746         tdx_vmcall.status_code = TDG_VP_VMCALL_INVALID_OPERAND;
1747 
1748         if tdx_vmcall.type_ != 0 {
1749             return Err(cpu::HypervisorCpuError::UnknownTdxVmCall);
1750         }
1751 
1752         match tdx_vmcall.subfunction {
1753             TDG_VP_VMCALL_GET_QUOTE => Ok(TdxExitDetails::GetQuote),
1754             TDG_VP_VMCALL_SETUP_EVENT_NOTIFY_INTERRUPT => {
1755                 Ok(TdxExitDetails::SetupEventNotifyInterrupt)
1756             }
1757             _ => Err(cpu::HypervisorCpuError::UnknownTdxVmCall),
1758         }
1759     }
1760 
1761     ///
1762     /// Set the status code for TDX exit
1763     ///
1764     #[cfg(feature = "tdx")]
1765     fn set_tdx_status(&mut self, status: TdxExitStatus) {
1766         let kvm_run = self.fd.get_kvm_run();
1767         let tdx_vmcall = unsafe { &mut kvm_run.__bindgen_anon_1.tdx.u.vmcall };
1768 
1769         tdx_vmcall.status_code = match status {
1770             TdxExitStatus::Success => TDG_VP_VMCALL_SUCCESS,
1771             TdxExitStatus::InvalidOperand => TDG_VP_VMCALL_INVALID_OPERAND,
1772         };
1773     }
1774 }
1775 
1776 /// Device struct for KVM
1777 pub struct KvmDevice {
1778     fd: DeviceFd,
1779 }
1780 
1781 impl device::Device for KvmDevice {
1782     ///
1783     /// Set device attribute
1784     ///
1785     fn set_device_attr(&self, attr: &DeviceAttr) -> device::Result<()> {
1786         self.fd
1787             .set_device_attr(attr)
1788             .map_err(|e| device::HypervisorDeviceError::SetDeviceAttribute(e.into()))
1789     }
1790     ///
1791     /// Get device attribute
1792     ///
1793     fn get_device_attr(&self, attr: &mut DeviceAttr) -> device::Result<()> {
1794         self.fd
1795             .get_device_attr(attr)
1796             .map_err(|e| device::HypervisorDeviceError::GetDeviceAttribute(e.into()))
1797     }
1798 }
1799 
1800 impl AsRawFd for KvmDevice {
1801     fn as_raw_fd(&self) -> RawFd {
1802         self.fd.as_raw_fd()
1803     }
1804 }
1805