xref: /cloud-hypervisor/hypervisor/src/mshv/mod.rs (revision 80b2c98a68d4c68f372f849e8d26f7cae5867000)
1 // SPDX-License-Identifier: Apache-2.0 OR BSD-3-Clause
2 //
3 // Copyright © 2020, Microsoft Corporation
4 //
5 
6 use std::any::Any;
7 use std::collections::HashMap;
8 use std::sync::{Arc, RwLock};
9 
10 use mshv_bindings::*;
11 use mshv_ioctls::{set_registers_64, InterruptRequest, Mshv, NoDatamatch, VcpuFd, VmFd, VmType};
12 use vfio_ioctls::VfioDeviceFd;
13 use vm::DataMatch;
14 
15 use crate::arch::emulator::PlatformEmulator;
16 #[cfg(target_arch = "x86_64")]
17 use crate::arch::x86::emulator::Emulator;
18 use crate::mshv::emulator::MshvEmulatorContext;
19 use crate::vm::{self, InterruptSourceConfig, VmOps};
20 use crate::{cpu, hypervisor, vec_with_array_field, HypervisorType};
21 #[cfg(feature = "sev_snp")]
22 mod snp_constants;
23 // x86_64 dependencies
24 #[cfg(target_arch = "x86_64")]
25 pub mod x86_64;
26 #[cfg(target_arch = "x86_64")]
27 use std::fs::File;
28 use std::os::unix::io::AsRawFd;
29 
30 #[cfg(feature = "sev_snp")]
31 use igvm_defs::IGVM_VHS_SNP_ID_BLOCK;
32 #[cfg(feature = "sev_snp")]
33 use snp_constants::*;
34 use vmm_sys_util::eventfd::EventFd;
35 #[cfg(target_arch = "x86_64")]
36 pub use x86_64::*;
37 #[cfg(target_arch = "x86_64")]
38 pub use x86_64::{emulator, VcpuMshvState};
39 ///
40 /// Export generically-named wrappers of mshv-bindings for Unix-based platforms
41 ///
42 pub use {
43     mshv_bindings::mshv_create_device as CreateDevice,
44     mshv_bindings::mshv_device_attr as DeviceAttr, mshv_ioctls, mshv_ioctls::DeviceFd,
45 };
46 
47 #[cfg(target_arch = "x86_64")]
48 use crate::arch::x86::{CpuIdEntry, FpuState, MsrEntry};
49 #[cfg(target_arch = "x86_64")]
50 use crate::ClockData;
51 use crate::{
52     CpuState, IoEventAddress, IrqRoutingEntry, MpState, UserMemoryRegion,
53     USER_MEMORY_REGION_ADJUSTABLE, USER_MEMORY_REGION_EXECUTE, USER_MEMORY_REGION_READ,
54     USER_MEMORY_REGION_WRITE,
55 };
56 
57 pub const PAGE_SHIFT: usize = 12;
58 
59 impl From<mshv_user_mem_region> for UserMemoryRegion {
60     fn from(region: mshv_user_mem_region) -> Self {
61         let mut flags: u32 = USER_MEMORY_REGION_READ | USER_MEMORY_REGION_ADJUSTABLE;
62         if region.flags & (1 << MSHV_SET_MEM_BIT_WRITABLE) != 0 {
63             flags |= USER_MEMORY_REGION_WRITE;
64         }
65         if region.flags & (1 << MSHV_SET_MEM_BIT_EXECUTABLE) != 0 {
66             flags |= USER_MEMORY_REGION_EXECUTE;
67         }
68 
69         UserMemoryRegion {
70             guest_phys_addr: (region.guest_pfn << PAGE_SHIFT as u64)
71                 + (region.userspace_addr & ((1 << PAGE_SHIFT) - 1)),
72             memory_size: region.size,
73             userspace_addr: region.userspace_addr,
74             flags,
75             ..Default::default()
76         }
77     }
78 }
79 
80 #[cfg(target_arch = "x86_64")]
81 impl From<MshvClockData> for ClockData {
82     fn from(d: MshvClockData) -> Self {
83         ClockData::Mshv(d)
84     }
85 }
86 
87 #[cfg(target_arch = "x86_64")]
88 impl From<ClockData> for MshvClockData {
89     fn from(ms: ClockData) -> Self {
90         match ms {
91             ClockData::Mshv(s) => s,
92             /* Needed in case other hypervisors are enabled */
93             #[allow(unreachable_patterns)]
94             _ => unreachable!("MSHV clock data is not valid"),
95         }
96     }
97 }
98 
99 impl From<UserMemoryRegion> for mshv_user_mem_region {
100     fn from(region: UserMemoryRegion) -> Self {
101         let mut flags: u8 = 0;
102         if region.flags & USER_MEMORY_REGION_WRITE != 0 {
103             flags |= 1 << MSHV_SET_MEM_BIT_WRITABLE;
104         }
105         if region.flags & USER_MEMORY_REGION_EXECUTE != 0 {
106             flags |= 1 << MSHV_SET_MEM_BIT_EXECUTABLE;
107         }
108 
109         mshv_user_mem_region {
110             guest_pfn: region.guest_phys_addr >> PAGE_SHIFT,
111             size: region.memory_size,
112             userspace_addr: region.userspace_addr,
113             flags,
114             ..Default::default()
115         }
116     }
117 }
118 
119 impl From<mshv_ioctls::IoEventAddress> for IoEventAddress {
120     fn from(a: mshv_ioctls::IoEventAddress) -> Self {
121         match a {
122             mshv_ioctls::IoEventAddress::Pio(x) => Self::Pio(x),
123             mshv_ioctls::IoEventAddress::Mmio(x) => Self::Mmio(x),
124         }
125     }
126 }
127 
128 impl From<IoEventAddress> for mshv_ioctls::IoEventAddress {
129     fn from(a: IoEventAddress) -> Self {
130         match a {
131             IoEventAddress::Pio(x) => Self::Pio(x),
132             IoEventAddress::Mmio(x) => Self::Mmio(x),
133         }
134     }
135 }
136 
137 impl From<VcpuMshvState> for CpuState {
138     fn from(s: VcpuMshvState) -> Self {
139         CpuState::Mshv(s)
140     }
141 }
142 
143 impl From<CpuState> for VcpuMshvState {
144     fn from(s: CpuState) -> Self {
145         match s {
146             CpuState::Mshv(s) => s,
147             /* Needed in case other hypervisors are enabled */
148             #[allow(unreachable_patterns)]
149             _ => panic!("CpuState is not valid"),
150         }
151     }
152 }
153 
154 impl From<mshv_bindings::StandardRegisters> for crate::StandardRegisters {
155     fn from(s: mshv_bindings::StandardRegisters) -> Self {
156         crate::StandardRegisters::Mshv(s)
157     }
158 }
159 
160 impl From<crate::StandardRegisters> for mshv_bindings::StandardRegisters {
161     fn from(e: crate::StandardRegisters) -> Self {
162         match e {
163             crate::StandardRegisters::Mshv(e) => e,
164             /* Needed in case other hypervisors are enabled */
165             #[allow(unreachable_patterns)]
166             _ => panic!("StandardRegisters are not valid"),
167         }
168     }
169 }
170 
171 impl From<mshv_user_irq_entry> for IrqRoutingEntry {
172     fn from(s: mshv_user_irq_entry) -> Self {
173         IrqRoutingEntry::Mshv(s)
174     }
175 }
176 
177 impl From<IrqRoutingEntry> for mshv_user_irq_entry {
178     fn from(e: IrqRoutingEntry) -> Self {
179         match e {
180             IrqRoutingEntry::Mshv(e) => e,
181             /* Needed in case other hypervisors are enabled */
182             #[allow(unreachable_patterns)]
183             _ => panic!("IrqRoutingEntry is not valid"),
184         }
185     }
186 }
187 
188 struct MshvDirtyLogSlot {
189     guest_pfn: u64,
190     memory_size: u64,
191 }
192 
193 /// Wrapper over mshv system ioctls.
194 pub struct MshvHypervisor {
195     mshv: Mshv,
196 }
197 
198 impl MshvHypervisor {
199     #[cfg(target_arch = "x86_64")]
200     ///
201     /// Retrieve the list of MSRs supported by MSHV.
202     ///
203     fn get_msr_list(&self) -> hypervisor::Result<MsrList> {
204         self.mshv
205             .get_msr_index_list()
206             .map_err(|e| hypervisor::HypervisorError::GetMsrList(e.into()))
207     }
208 }
209 
210 impl MshvHypervisor {
211     /// Create a hypervisor based on Mshv
212     #[allow(clippy::new_ret_no_self)]
213     pub fn new() -> hypervisor::Result<Arc<dyn hypervisor::Hypervisor>> {
214         let mshv_obj =
215             Mshv::new().map_err(|e| hypervisor::HypervisorError::HypervisorCreate(e.into()))?;
216         Ok(Arc::new(MshvHypervisor { mshv: mshv_obj }))
217     }
218     /// Check if the hypervisor is available
219     pub fn is_available() -> hypervisor::Result<bool> {
220         match std::fs::metadata("/dev/mshv") {
221             Ok(_) => Ok(true),
222             Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(false),
223             Err(err) => Err(hypervisor::HypervisorError::HypervisorAvailableCheck(
224                 err.into(),
225             )),
226         }
227     }
228 }
229 
230 /// Implementation of Hypervisor trait for Mshv
231 ///
232 /// # Examples
233 ///
234 /// ```
235 /// # use hypervisor::mshv::MshvHypervisor;
236 /// # use std::sync::Arc;
237 /// let mshv = MshvHypervisor::new().unwrap();
238 /// let hypervisor = Arc::new(mshv);
239 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed");
240 /// ```
241 impl hypervisor::Hypervisor for MshvHypervisor {
242     ///
243     /// Returns the type of the hypervisor
244     ///
245     fn hypervisor_type(&self) -> HypervisorType {
246         HypervisorType::Mshv
247     }
248 
249     fn create_vm_with_type(&self, vm_type: u64) -> hypervisor::Result<Arc<dyn crate::Vm>> {
250         let mshv_vm_type: VmType = match VmType::try_from(vm_type) {
251             Ok(vm_type) => vm_type,
252             Err(_) => return Err(hypervisor::HypervisorError::UnsupportedVmType()),
253         };
254         let fd: VmFd;
255         loop {
256             match self.mshv.create_vm_with_type(mshv_vm_type) {
257                 Ok(res) => fd = res,
258                 Err(e) => {
259                     if e.errno() == libc::EINTR {
260                         // If the error returned is EINTR, which means the
261                         // ioctl has been interrupted, we have to retry as
262                         // this can't be considered as a regular error.
263                         continue;
264                     } else {
265                         return Err(hypervisor::HypervisorError::VmCreate(e.into()));
266                     }
267                 }
268             }
269             break;
270         }
271 
272         // Set additional partition property for SEV-SNP partition.
273         #[cfg(target_arch = "x86_64")]
274         if mshv_vm_type == VmType::Snp {
275             let snp_policy = snp::get_default_snp_guest_policy();
276             let vmgexit_offloads = snp::get_default_vmgexit_offload_features();
277             // SAFETY: access union fields
278             unsafe {
279                 debug!(
280                     "Setting the partition isolation policy as: 0x{:x}",
281                     snp_policy.as_uint64
282                 );
283                 fd.set_partition_property(
284                     hv_partition_property_code_HV_PARTITION_PROPERTY_ISOLATION_POLICY,
285                     snp_policy.as_uint64,
286                 )
287                 .map_err(|e| hypervisor::HypervisorError::SetPartitionProperty(e.into()))?;
288                 debug!(
289                     "Setting the partition property to enable VMGEXIT offloads as : 0x{:x}",
290                     vmgexit_offloads.as_uint64
291                 );
292                 fd.set_partition_property(
293                     hv_partition_property_code_HV_PARTITION_PROPERTY_SEV_VMGEXIT_OFFLOADS,
294                     vmgexit_offloads.as_uint64,
295                 )
296                 .map_err(|e| hypervisor::HypervisorError::SetPartitionProperty(e.into()))?;
297             }
298         }
299 
300         // Default Microsoft Hypervisor behavior for unimplemented MSR is to
301         // send a fault to the guest if it tries to access it. It is possible
302         // to override this behavior with a more suitable option i.e., ignore
303         // writes from the guest and return zero in attempt to read unimplemented
304         // MSR.
305         #[cfg(target_arch = "x86_64")]
306         fd.set_partition_property(
307             hv_partition_property_code_HV_PARTITION_PROPERTY_UNIMPLEMENTED_MSR_ACTION,
308             hv_unimplemented_msr_action_HV_UNIMPLEMENTED_MSR_ACTION_IGNORE_WRITE_READ_ZERO as u64,
309         )
310         .map_err(|e| hypervisor::HypervisorError::SetPartitionProperty(e.into()))?;
311 
312         // Always create a frozen partition
313         fd.set_partition_property(
314             hv_partition_property_code_HV_PARTITION_PROPERTY_TIME_FREEZE,
315             1u64,
316         )
317         .map_err(|e| hypervisor::HypervisorError::SetPartitionProperty(e.into()))?;
318 
319         let vm_fd = Arc::new(fd);
320 
321         #[cfg(target_arch = "x86_64")]
322         {
323             let msr_list = self.get_msr_list()?;
324             let num_msrs = msr_list.as_fam_struct_ref().nmsrs as usize;
325             let mut msrs: Vec<MsrEntry> = vec![
326                 MsrEntry {
327                     ..Default::default()
328                 };
329                 num_msrs
330             ];
331             let indices = msr_list.as_slice();
332             for (pos, index) in indices.iter().enumerate() {
333                 msrs[pos].index = *index;
334             }
335 
336             Ok(Arc::new(MshvVm {
337                 fd: vm_fd,
338                 msrs,
339                 dirty_log_slots: Arc::new(RwLock::new(HashMap::new())),
340                 #[cfg(feature = "sev_snp")]
341                 sev_snp_enabled: mshv_vm_type == VmType::Snp,
342             }))
343         }
344 
345         #[cfg(target_arch = "aarch64")]
346         {
347             Ok(Arc::new(MshvVm {
348                 fd: vm_fd,
349                 dirty_log_slots: Arc::new(RwLock::new(HashMap::new())),
350             }))
351         }
352     }
353 
354     /// Create a mshv vm object and return the object as Vm trait object
355     ///
356     /// # Examples
357     ///
358     /// ```
359     /// # extern crate hypervisor;
360     /// # use hypervisor::mshv::MshvHypervisor;
361     /// use hypervisor::mshv::MshvVm;
362     /// let hypervisor = MshvHypervisor::new().unwrap();
363     /// let vm = hypervisor.create_vm().unwrap();
364     /// ```
365     fn create_vm(&self) -> hypervisor::Result<Arc<dyn vm::Vm>> {
366         let vm_type = 0;
367         self.create_vm_with_type(vm_type)
368     }
369     #[cfg(target_arch = "x86_64")]
370     ///
371     /// Get the supported CpuID
372     ///
373     fn get_supported_cpuid(&self) -> hypervisor::Result<Vec<CpuIdEntry>> {
374         let mut cpuid = Vec::new();
375         let functions: [u32; 2] = [0x1, 0xb];
376 
377         for function in functions {
378             cpuid.push(CpuIdEntry {
379                 function,
380                 ..Default::default()
381             });
382         }
383         Ok(cpuid)
384     }
385 
386     /// Get maximum number of vCPUs
387     fn get_max_vcpus(&self) -> u32 {
388         // TODO: Using HV_MAXIMUM_PROCESSORS would be better
389         // but the ioctl API is limited to u8
390         256
391     }
392 
393     fn get_guest_debug_hw_bps(&self) -> usize {
394         0
395     }
396 }
397 
398 /// Vcpu struct for Microsoft Hypervisor
399 pub struct MshvVcpu {
400     fd: VcpuFd,
401     vp_index: u8,
402     #[cfg(target_arch = "x86_64")]
403     cpuid: Vec<CpuIdEntry>,
404     #[cfg(target_arch = "x86_64")]
405     msrs: Vec<MsrEntry>,
406     vm_ops: Option<Arc<dyn vm::VmOps>>,
407     vm_fd: Arc<VmFd>,
408 }
409 
410 /// Implementation of Vcpu trait for Microsoft Hypervisor
411 ///
412 /// # Examples
413 ///
414 /// ```
415 /// # use hypervisor::mshv::MshvHypervisor;
416 /// # use std::sync::Arc;
417 /// let mshv = MshvHypervisor::new().unwrap();
418 /// let hypervisor = Arc::new(mshv);
419 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed");
420 /// let vcpu = vm.create_vcpu(0, None).unwrap();
421 /// ```
422 impl cpu::Vcpu for MshvVcpu {
423     ///
424     /// Returns StandardRegisters with default value set
425     ///
426     #[cfg(target_arch = "x86_64")]
427     fn create_standard_regs(&self) -> crate::StandardRegisters {
428         mshv_bindings::StandardRegisters::default().into()
429     }
430     #[cfg(target_arch = "x86_64")]
431     ///
432     /// Returns the vCPU general purpose registers.
433     ///
434     fn get_regs(&self) -> cpu::Result<crate::StandardRegisters> {
435         Ok(self
436             .fd
437             .get_regs()
438             .map_err(|e| cpu::HypervisorCpuError::GetStandardRegs(e.into()))?
439             .into())
440     }
441 
442     #[cfg(target_arch = "x86_64")]
443     ///
444     /// Sets the vCPU general purpose registers.
445     ///
446     fn set_regs(&self, regs: &crate::StandardRegisters) -> cpu::Result<()> {
447         let regs = (*regs).into();
448         self.fd
449             .set_regs(&regs)
450             .map_err(|e| cpu::HypervisorCpuError::SetStandardRegs(e.into()))
451     }
452 
453     #[cfg(target_arch = "x86_64")]
454     ///
455     /// Returns the vCPU special registers.
456     ///
457     fn get_sregs(&self) -> cpu::Result<crate::arch::x86::SpecialRegisters> {
458         Ok(self
459             .fd
460             .get_sregs()
461             .map_err(|e| cpu::HypervisorCpuError::GetSpecialRegs(e.into()))?
462             .into())
463     }
464 
465     #[cfg(target_arch = "x86_64")]
466     ///
467     /// Sets the vCPU special registers.
468     ///
469     fn set_sregs(&self, sregs: &crate::arch::x86::SpecialRegisters) -> cpu::Result<()> {
470         let sregs = (*sregs).into();
471         self.fd
472             .set_sregs(&sregs)
473             .map_err(|e| cpu::HypervisorCpuError::SetSpecialRegs(e.into()))
474     }
475 
476     #[cfg(target_arch = "x86_64")]
477     ///
478     /// Returns the floating point state (FPU) from the vCPU.
479     ///
480     fn get_fpu(&self) -> cpu::Result<FpuState> {
481         Ok(self
482             .fd
483             .get_fpu()
484             .map_err(|e| cpu::HypervisorCpuError::GetFloatingPointRegs(e.into()))?
485             .into())
486     }
487 
488     #[cfg(target_arch = "x86_64")]
489     ///
490     /// Set the floating point state (FPU) of a vCPU.
491     ///
492     fn set_fpu(&self, fpu: &FpuState) -> cpu::Result<()> {
493         let fpu: mshv_bindings::FloatingPointUnit = (*fpu).clone().into();
494         self.fd
495             .set_fpu(&fpu)
496             .map_err(|e| cpu::HypervisorCpuError::SetFloatingPointRegs(e.into()))
497     }
498 
499     #[cfg(target_arch = "x86_64")]
500     ///
501     /// Returns the model-specific registers (MSR) for this vCPU.
502     ///
503     fn get_msrs(&self, msrs: &mut Vec<MsrEntry>) -> cpu::Result<usize> {
504         let mshv_msrs: Vec<msr_entry> = msrs.iter().map(|e| (*e).into()).collect();
505         let mut mshv_msrs = MsrEntries::from_entries(&mshv_msrs).unwrap();
506         let succ = self
507             .fd
508             .get_msrs(&mut mshv_msrs)
509             .map_err(|e| cpu::HypervisorCpuError::GetMsrEntries(e.into()))?;
510 
511         msrs[..succ].copy_from_slice(
512             &mshv_msrs.as_slice()[..succ]
513                 .iter()
514                 .map(|e| (*e).into())
515                 .collect::<Vec<MsrEntry>>(),
516         );
517 
518         Ok(succ)
519     }
520 
521     #[cfg(target_arch = "x86_64")]
522     ///
523     /// Setup the model-specific registers (MSR) for this vCPU.
524     /// Returns the number of MSR entries actually written.
525     ///
526     fn set_msrs(&self, msrs: &[MsrEntry]) -> cpu::Result<usize> {
527         let mshv_msrs: Vec<msr_entry> = msrs.iter().map(|e| (*e).into()).collect();
528         let mshv_msrs = MsrEntries::from_entries(&mshv_msrs).unwrap();
529         self.fd
530             .set_msrs(&mshv_msrs)
531             .map_err(|e| cpu::HypervisorCpuError::SetMsrEntries(e.into()))
532     }
533 
534     #[cfg(target_arch = "x86_64")]
535     ///
536     /// X86 specific call to enable HyperV SynIC
537     ///
538     fn enable_hyperv_synic(&self) -> cpu::Result<()> {
539         /* We always have SynIC enabled on MSHV */
540         Ok(())
541     }
542 
543     #[allow(non_upper_case_globals)]
544     fn run(&self) -> std::result::Result<cpu::VmExit, cpu::HypervisorCpuError> {
545         match self.fd.run() {
546             Ok(x) => match x.header.message_type {
547                 hv_message_type_HVMSG_X64_HALT => {
548                     debug!("HALT");
549                     Ok(cpu::VmExit::Reset)
550                 }
551                 hv_message_type_HVMSG_UNRECOVERABLE_EXCEPTION => {
552                     warn!("TRIPLE FAULT");
553                     Ok(cpu::VmExit::Shutdown)
554                 }
555                 #[cfg(target_arch = "x86_64")]
556                 hv_message_type_HVMSG_X64_IO_PORT_INTERCEPT => {
557                     let info = x.to_ioport_info().unwrap();
558                     let access_info = info.access_info;
559                     // SAFETY: access_info is valid, otherwise we won't be here
560                     let len = unsafe { access_info.__bindgen_anon_1.access_size() } as usize;
561                     let is_write = info.header.intercept_access_type == 1;
562                     let port = info.port_number;
563                     let mut data: [u8; 4] = [0; 4];
564                     let mut ret_rax = info.rax;
565 
566                     /*
567                      * XXX: Ignore QEMU fw_cfg (0x5xx) and debug console (0x402) ports.
568                      *
569                      * Cloud Hypervisor doesn't support fw_cfg at the moment. It does support 0x402
570                      * under the "fwdebug" feature flag. But that feature is not enabled by default
571                      * and is considered legacy.
572                      *
573                      * OVMF unconditionally pokes these IO ports with string IO.
574                      *
575                      * Instead of trying to implement string IO support now which does not do much
576                      * now, skip those ports explicitly to avoid panicking.
577                      *
578                      * Proper string IO support can be added once we gain the ability to translate
579                      * guest virtual addresses to guest physical addresses on MSHV.
580                      */
581                     match port {
582                         0x402 | 0x510 | 0x511 | 0x514 => {
583                             let insn_len = info.header.instruction_length() as u64;
584 
585                             /* Advance RIP and update RAX */
586                             let arr_reg_name_value = [
587                                 (
588                                     hv_register_name_HV_X64_REGISTER_RIP,
589                                     info.header.rip + insn_len,
590                                 ),
591                                 (hv_register_name_HV_X64_REGISTER_RAX, ret_rax),
592                             ];
593                             set_registers_64!(self.fd, arr_reg_name_value)
594                                 .map_err(|e| cpu::HypervisorCpuError::SetRegister(e.into()))?;
595                             return Ok(cpu::VmExit::Ignore);
596                         }
597                         _ => {}
598                     }
599 
600                     assert!(
601                         // SAFETY: access_info is valid, otherwise we won't be here
602                         (unsafe { access_info.__bindgen_anon_1.string_op() } != 1),
603                         "String IN/OUT not supported"
604                     );
605                     assert!(
606                         // SAFETY: access_info is valid, otherwise we won't be here
607                         (unsafe { access_info.__bindgen_anon_1.rep_prefix() } != 1),
608                         "Rep IN/OUT not supported"
609                     );
610 
611                     if is_write {
612                         let data = (info.rax as u32).to_le_bytes();
613                         if let Some(vm_ops) = &self.vm_ops {
614                             vm_ops
615                                 .pio_write(port.into(), &data[0..len])
616                                 .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()))?;
617                         }
618                     } else {
619                         if let Some(vm_ops) = &self.vm_ops {
620                             vm_ops
621                                 .pio_read(port.into(), &mut data[0..len])
622                                 .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()))?;
623                         }
624 
625                         let v = u32::from_le_bytes(data);
626                         /* Preserve high bits in EAX but clear out high bits in RAX */
627                         let mask = 0xffffffff >> (32 - len * 8);
628                         let eax = (info.rax as u32 & !mask) | (v & mask);
629                         ret_rax = eax as u64;
630                     }
631 
632                     let insn_len = info.header.instruction_length() as u64;
633 
634                     /* Advance RIP and update RAX */
635                     let arr_reg_name_value = [
636                         (
637                             hv_register_name_HV_X64_REGISTER_RIP,
638                             info.header.rip + insn_len,
639                         ),
640                         (hv_register_name_HV_X64_REGISTER_RAX, ret_rax),
641                     ];
642                     set_registers_64!(self.fd, arr_reg_name_value)
643                         .map_err(|e| cpu::HypervisorCpuError::SetRegister(e.into()))?;
644                     Ok(cpu::VmExit::Ignore)
645                 }
646                 #[cfg(target_arch = "x86_64")]
647                 msg_type @ (hv_message_type_HVMSG_UNMAPPED_GPA
648                 | hv_message_type_HVMSG_GPA_INTERCEPT) => {
649                     let info = x.to_memory_info().unwrap();
650                     let insn_len = info.instruction_byte_count as usize;
651                     let gva = info.guest_virtual_address;
652                     let gpa = info.guest_physical_address;
653 
654                     debug!("Exit ({:?}) GVA {:x} GPA {:x}", msg_type, gva, gpa);
655 
656                     let mut context = MshvEmulatorContext {
657                         vcpu: self,
658                         map: (gva, gpa),
659                     };
660 
661                     // Create a new emulator.
662                     let mut emul = Emulator::new(&mut context);
663 
664                     // Emulate the trapped instruction, and only the first one.
665                     let new_state = emul
666                         .emulate_first_insn(
667                             self.vp_index as usize,
668                             &info.instruction_bytes[..insn_len],
669                         )
670                         .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()))?;
671 
672                     // Set CPU state back.
673                     context
674                         .set_cpu_state(self.vp_index as usize, new_state)
675                         .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()))?;
676 
677                     Ok(cpu::VmExit::Ignore)
678                 }
679                 #[cfg(feature = "sev_snp")]
680                 hv_message_type_HVMSG_GPA_ATTRIBUTE_INTERCEPT => {
681                     let info = x.to_gpa_attribute_info().unwrap();
682                     let host_vis = info.__bindgen_anon_1.host_visibility();
683                     if host_vis >= HV_MAP_GPA_READABLE | HV_MAP_GPA_WRITABLE {
684                         warn!("Ignored attribute intercept with full host visibility");
685                         return Ok(cpu::VmExit::Ignore);
686                     }
687 
688                     let num_ranges = info.__bindgen_anon_1.range_count();
689                     assert!(num_ranges >= 1);
690                     if num_ranges > 1 {
691                         return Err(cpu::HypervisorCpuError::RunVcpu(anyhow!(
692                             "Unhandled VCPU exit(GPA_ATTRIBUTE_INTERCEPT): Expected num_ranges to be 1 but found num_ranges {:?}",
693                             num_ranges
694                         )));
695                     }
696 
697                     // TODO: we could also deny the request with HvCallCompleteIntercept
698                     let mut gpas = Vec::new();
699                     let ranges = info.ranges;
700                     let (gfn_start, gfn_count) = snp::parse_gpa_range(ranges[0]).unwrap();
701                     debug!(
702                         "Releasing pages: gfn_start: {:x?}, gfn_count: {:?}",
703                         gfn_start, gfn_count
704                     );
705                     let gpa_start = gfn_start * HV_PAGE_SIZE as u64;
706                     for i in 0..gfn_count {
707                         gpas.push(gpa_start + i * HV_PAGE_SIZE as u64);
708                     }
709 
710                     let mut gpa_list =
711                         vec_with_array_field::<mshv_modify_gpa_host_access, u64>(gpas.len());
712                     gpa_list[0].page_count = gpas.len() as u64;
713                     gpa_list[0].flags = 0;
714                     if host_vis & HV_MAP_GPA_READABLE != 0 {
715                         gpa_list[0].flags |= 1 << MSHV_GPA_HOST_ACCESS_BIT_READABLE;
716                     }
717                     if host_vis & HV_MAP_GPA_WRITABLE != 0 {
718                         gpa_list[0].flags |= 1 << MSHV_GPA_HOST_ACCESS_BIT_WRITABLE;
719                     }
720 
721                     // SAFETY: gpa_list initialized with gpas.len() and now it is being turned into
722                     // gpas_slice with gpas.len() again. It is guaranteed to be large enough to hold
723                     // everything from gpas.
724                     unsafe {
725                         let gpas_slice: &mut [u64] =
726                             gpa_list[0].guest_pfns.as_mut_slice(gpas.len());
727                         gpas_slice.copy_from_slice(gpas.as_slice());
728                     }
729 
730                     self.vm_fd
731                         .modify_gpa_host_access(&gpa_list[0])
732                         .map_err(|e| cpu::HypervisorCpuError::RunVcpu(anyhow!(
733                             "Unhandled VCPU exit: attribute intercept - couldn't modify host access {}", e
734                         )))?;
735                     Ok(cpu::VmExit::Ignore)
736                 }
737                 #[cfg(target_arch = "x86_64")]
738                 hv_message_type_HVMSG_UNACCEPTED_GPA => {
739                     let info = x.to_memory_info().unwrap();
740                     let gva = info.guest_virtual_address;
741                     let gpa = info.guest_physical_address;
742 
743                     Err(cpu::HypervisorCpuError::RunVcpu(anyhow!(
744                         "Unhandled VCPU exit: Unaccepted GPA({:x}) found at GVA({:x})",
745                         gpa,
746                         gva,
747                     )))
748                 }
749                 #[cfg(target_arch = "x86_64")]
750                 hv_message_type_HVMSG_X64_CPUID_INTERCEPT => {
751                     let info = x.to_cpuid_info().unwrap();
752                     debug!("cpuid eax: {:x}", { info.rax });
753                     Ok(cpu::VmExit::Ignore)
754                 }
755                 #[cfg(target_arch = "x86_64")]
756                 hv_message_type_HVMSG_X64_MSR_INTERCEPT => {
757                     let info = x.to_msr_info().unwrap();
758                     if info.header.intercept_access_type == 0 {
759                         debug!("msr read: {:x}", { info.msr_number });
760                     } else {
761                         debug!("msr write: {:x}", { info.msr_number });
762                     }
763                     Ok(cpu::VmExit::Ignore)
764                 }
765                 #[cfg(target_arch = "x86_64")]
766                 hv_message_type_HVMSG_X64_EXCEPTION_INTERCEPT => {
767                     //TODO: Handler for VMCALL here.
768                     let info = x.to_exception_info().unwrap();
769                     debug!("Exception Info {:?}", { info.exception_vector });
770                     Ok(cpu::VmExit::Ignore)
771                 }
772                 #[cfg(target_arch = "x86_64")]
773                 hv_message_type_HVMSG_X64_APIC_EOI => {
774                     let info = x.to_apic_eoi_info().unwrap();
775                     // The kernel should dispatch the EOI to the correct thread.
776                     // Check the VP index is the same as the one we have.
777                     assert!(info.vp_index == self.vp_index as u32);
778                     // The interrupt vector in info is u32, but x86 only supports 256 vectors.
779                     // There is no good way to recover from this if the hypervisor messes around.
780                     // Just unwrap.
781                     Ok(cpu::VmExit::IoapicEoi(
782                         info.interrupt_vector.try_into().unwrap(),
783                     ))
784                 }
785                 #[cfg(feature = "sev_snp")]
786                 hv_message_type_HVMSG_X64_SEV_VMGEXIT_INTERCEPT => {
787                     let info = x.to_vmg_intercept_info().unwrap();
788                     let ghcb_data = info.ghcb_msr >> GHCB_INFO_BIT_WIDTH;
789                     let ghcb_msr = svm_ghcb_msr {
790                         as_uint64: info.ghcb_msr,
791                     };
792                     // SAFETY: Accessing a union element from bindgen generated bindings.
793                     let ghcb_op = unsafe { ghcb_msr.__bindgen_anon_2.ghcb_info() as u32 };
794                     // Sanity check on the header fields before handling other operations.
795                     assert!(info.header.intercept_access_type == HV_INTERCEPT_ACCESS_EXECUTE as u8);
796 
797                     match ghcb_op {
798                         GHCB_INFO_HYP_FEATURE_REQUEST => {
799                             // Pre-condition: GHCB data must be zero
800                             assert!(ghcb_data == 0);
801                             let mut ghcb_response = GHCB_INFO_HYP_FEATURE_RESPONSE as u64;
802                             // Indicate support for basic SEV-SNP features
803                             ghcb_response |=
804                                 (GHCB_HYP_FEATURE_SEV_SNP << GHCB_INFO_BIT_WIDTH) as u64;
805                             // Indicate support for SEV-SNP AP creation
806                             ghcb_response |= (GHCB_HYP_FEATURE_SEV_SNP_AP_CREATION
807                                 << GHCB_INFO_BIT_WIDTH)
808                                 as u64;
809                             debug!(
810                                 "GHCB_INFO_HYP_FEATURE_REQUEST: Supported features: {:0x}",
811                                 ghcb_response
812                             );
813                             let arr_reg_name_value =
814                                 [(hv_register_name_HV_X64_REGISTER_GHCB, ghcb_response)];
815                             set_registers_64!(self.fd, arr_reg_name_value)
816                                 .map_err(|e| cpu::HypervisorCpuError::SetRegister(e.into()))?;
817                         }
818                         GHCB_INFO_REGISTER_REQUEST => {
819                             let mut ghcb_gpa = hv_x64_register_sev_ghcb::default();
820 
821                             // Disable the previously used GHCB page.
822                             self.disable_prev_ghcb_page()?;
823 
824                             // SAFETY: Accessing a union element from bindgen generated bindings.
825                             unsafe {
826                                 ghcb_gpa.__bindgen_anon_1.set_enabled(1);
827                                 ghcb_gpa
828                                     .__bindgen_anon_1
829                                     .set_page_number(ghcb_msr.__bindgen_anon_2.gpa_page_number());
830                             }
831                             // SAFETY: Accessing a union element from bindgen generated bindings.
832                             let reg_name_value = unsafe {
833                                 [(
834                                     hv_register_name_HV_X64_REGISTER_SEV_GHCB_GPA,
835                                     ghcb_gpa.as_uint64,
836                                 )]
837                             };
838 
839                             set_registers_64!(self.fd, reg_name_value)
840                                 .map_err(|e| cpu::HypervisorCpuError::SetRegister(e.into()))?;
841 
842                             let mut resp_ghcb_msr = svm_ghcb_msr::default();
843                             // SAFETY: Accessing a union element from bindgen generated bindings.
844                             unsafe {
845                                 resp_ghcb_msr
846                                     .__bindgen_anon_2
847                                     .set_ghcb_info(GHCB_INFO_REGISTER_RESPONSE as u64);
848                                 resp_ghcb_msr.__bindgen_anon_2.set_gpa_page_number(
849                                     ghcb_msr.__bindgen_anon_2.gpa_page_number(),
850                                 );
851                                 debug!("GHCB GPA is {:x}", ghcb_gpa.as_uint64);
852                             }
853                             // SAFETY: Accessing a union element from bindgen generated bindings.
854                             let reg_name_value = unsafe {
855                                 [(
856                                     hv_register_name_HV_X64_REGISTER_GHCB,
857                                     resp_ghcb_msr.as_uint64,
858                                 )]
859                             };
860 
861                             set_registers_64!(self.fd, reg_name_value)
862                                 .map_err(|e| cpu::HypervisorCpuError::SetRegister(e.into()))?;
863                         }
864                         GHCB_INFO_SEV_INFO_REQUEST => {
865                             let sev_cpuid_function = 0x8000_001F;
866                             let cpu_leaf = self
867                                 .fd
868                                 .get_cpuid_values(sev_cpuid_function, 0, 0, 0)
869                                 .unwrap();
870                             let ebx = cpu_leaf[1];
871                             // First 6-byte of EBX represents page table encryption bit number
872                             let pbit_encryption = (ebx & 0x3f) as u8;
873                             let mut ghcb_response = GHCB_INFO_SEV_INFO_RESPONSE as u64;
874 
875                             // GHCBData[63:48] specifies the maximum GHCB protocol version supported
876                             ghcb_response |= (GHCB_PROTOCOL_VERSION_MAX as u64) << 48;
877                             // GHCBData[47:32] specifies the minimum GHCB protocol version supported
878                             ghcb_response |= (GHCB_PROTOCOL_VERSION_MIN as u64) << 32;
879                             // GHCBData[31:24] specifies the SEV page table encryption bit number.
880                             ghcb_response |= (pbit_encryption as u64) << 24;
881 
882                             let arr_reg_name_value =
883                                 [(hv_register_name_HV_X64_REGISTER_GHCB, ghcb_response)];
884                             set_registers_64!(self.fd, arr_reg_name_value)
885                                 .map_err(|e| cpu::HypervisorCpuError::SetRegister(e.into()))?;
886                         }
887                         GHCB_INFO_NORMAL => {
888                             let exit_code =
889                                 info.__bindgen_anon_2.__bindgen_anon_1.sw_exit_code as u32;
890                             // SAFETY: Accessing a union element from bindgen generated bindings.
891                             let pfn = unsafe { ghcb_msr.__bindgen_anon_2.gpa_page_number() };
892                             let ghcb_gpa = pfn << GHCB_INFO_BIT_WIDTH;
893                             match exit_code {
894                                 SVM_EXITCODE_HV_DOORBELL_PAGE => {
895                                     let exit_info1 =
896                                         info.__bindgen_anon_2.__bindgen_anon_1.sw_exit_info1 as u32;
897                                     match exit_info1 {
898                                         SVM_NAE_HV_DOORBELL_PAGE_GET_PREFERRED => {
899                                             // Hypervisor does not have any preference for doorbell GPA.
900                                             let preferred_doorbell_gpa: u64 = 0xFFFFFFFFFFFFFFFF;
901                                             self.gpa_write(
902                                                 ghcb_gpa + GHCB_SW_EXITINFO2_OFFSET,
903                                                 &preferred_doorbell_gpa.to_le_bytes(),
904                                             )?;
905                                         }
906                                         SVM_NAE_HV_DOORBELL_PAGE_SET => {
907                                             let exit_info2 = info
908                                                 .__bindgen_anon_2
909                                                 .__bindgen_anon_1
910                                                 .sw_exit_info2;
911                                             let mut ghcb_doorbell_gpa =
912                                                 hv_x64_register_sev_hv_doorbell::default();
913                                             // SAFETY: Accessing a union element from bindgen generated bindings.
914                                             unsafe {
915                                                 ghcb_doorbell_gpa.__bindgen_anon_1.set_enabled(1);
916                                                 ghcb_doorbell_gpa
917                                                     .__bindgen_anon_1
918                                                     .set_page_number(exit_info2 >> PAGE_SHIFT);
919                                             }
920                                             // SAFETY: Accessing a union element from bindgen generated bindings.
921                                             let reg_names = unsafe {
922                                                 [(
923                                                     hv_register_name_HV_X64_REGISTER_SEV_DOORBELL_GPA,
924                                                     ghcb_doorbell_gpa.as_uint64,
925                                                 )]
926                                             };
927                                             set_registers_64!(self.fd, reg_names).map_err(|e| {
928                                                 cpu::HypervisorCpuError::SetRegister(e.into())
929                                             })?;
930 
931                                             self.gpa_write(
932                                                 ghcb_gpa + GHCB_SW_EXITINFO2_OFFSET,
933                                                 &exit_info2.to_le_bytes(),
934                                             )?;
935 
936                                             // Clear the SW_EXIT_INFO1 register to indicate no error
937                                             self.clear_swexit_info1(ghcb_gpa)?;
938                                         }
939                                         SVM_NAE_HV_DOORBELL_PAGE_QUERY => {
940                                             let mut reg_assocs = [ hv_register_assoc {
941                                                 name: hv_register_name_HV_X64_REGISTER_SEV_DOORBELL_GPA,
942                                                 ..Default::default()
943                                             } ];
944                                             self.fd.get_reg(&mut reg_assocs).unwrap();
945                                             // SAFETY: Accessing a union element from bindgen generated bindings.
946                                             let doorbell_gpa = unsafe { reg_assocs[0].value.reg64 };
947 
948                                             self.gpa_write(
949                                                 ghcb_gpa + GHCB_SW_EXITINFO2_OFFSET,
950                                                 &doorbell_gpa.to_le_bytes(),
951                                             )?;
952 
953                                             // Clear the SW_EXIT_INFO1 register to indicate no error
954                                             self.clear_swexit_info1(ghcb_gpa)?;
955                                         }
956                                         SVM_NAE_HV_DOORBELL_PAGE_CLEAR => {
957                                             self.gpa_write(
958                                                 ghcb_gpa + GHCB_SW_EXITINFO2_OFFSET,
959                                                 &[0; 8],
960                                             )?;
961                                         }
962                                         _ => {
963                                             panic!(
964                                                 "SVM_EXITCODE_HV_DOORBELL_PAGE: Unhandled exit code: {:0x}",
965                                                 exit_info1
966                                             );
967                                         }
968                                     }
969                                 }
970                                 SVM_EXITCODE_IOIO_PROT => {
971                                     let exit_info1 =
972                                         info.__bindgen_anon_2.__bindgen_anon_1.sw_exit_info1 as u32;
973                                     let port_info = hv_sev_vmgexit_port_info {
974                                         as_uint32: exit_info1,
975                                     };
976 
977                                     let port =
978                                         // SAFETY: Accessing a union element from bindgen generated bindings.
979                                         unsafe { port_info.__bindgen_anon_1.intercepted_port() };
980                                     let mut len = 4;
981                                     // SAFETY: Accessing a union element from bindgen generated bindings.
982                                     unsafe {
983                                         if port_info.__bindgen_anon_1.operand_size_16bit() == 1 {
984                                             len = 2;
985                                         } else if port_info.__bindgen_anon_1.operand_size_8bit()
986                                             == 1
987                                         {
988                                             len = 1;
989                                         }
990                                     }
991                                     let is_write =
992                                         // SAFETY: Accessing a union element from bindgen generated bindings.
993                                         unsafe { port_info.__bindgen_anon_1.access_type() == 0 };
994 
995                                     let mut data = [0; 8];
996                                     self.gpa_read(ghcb_gpa + GHCB_RAX_OFFSET, &mut data)?;
997 
998                                     if is_write {
999                                         if let Some(vm_ops) = &self.vm_ops {
1000                                             vm_ops.pio_write(port.into(), &data[..len]).map_err(
1001                                                 |e| cpu::HypervisorCpuError::RunVcpu(e.into()),
1002                                             )?;
1003                                         }
1004                                     } else {
1005                                         if let Some(vm_ops) = &self.vm_ops {
1006                                             vm_ops
1007                                                 .pio_read(port.into(), &mut data[..len])
1008                                                 .map_err(|e| {
1009                                                     cpu::HypervisorCpuError::RunVcpu(e.into())
1010                                                 })?;
1011                                         }
1012 
1013                                         self.gpa_write(ghcb_gpa + GHCB_RAX_OFFSET, &data)?;
1014                                     }
1015 
1016                                     // Clear the SW_EXIT_INFO1 register to indicate no error
1017                                     self.clear_swexit_info1(ghcb_gpa)?;
1018                                 }
1019                                 SVM_EXITCODE_MMIO_READ => {
1020                                     let src_gpa =
1021                                         info.__bindgen_anon_2.__bindgen_anon_1.sw_exit_info1;
1022                                     let dst_gpa = info.__bindgen_anon_2.__bindgen_anon_1.sw_scratch;
1023                                     let data_len =
1024                                         info.__bindgen_anon_2.__bindgen_anon_1.sw_exit_info2
1025                                             as usize;
1026                                     // Sanity check to make sure data len is within supported range.
1027                                     assert!(data_len <= 0x8);
1028 
1029                                     let mut data: Vec<u8> = vec![0; data_len];
1030                                     if let Some(vm_ops) = &self.vm_ops {
1031                                         vm_ops.mmio_read(src_gpa, &mut data).map_err(|e| {
1032                                             cpu::HypervisorCpuError::RunVcpu(e.into())
1033                                         })?;
1034                                     }
1035 
1036                                     self.gpa_write(dst_gpa, &data)?;
1037 
1038                                     // Clear the SW_EXIT_INFO1 register to indicate no error
1039                                     self.clear_swexit_info1(ghcb_gpa)?;
1040                                 }
1041                                 SVM_EXITCODE_MMIO_WRITE => {
1042                                     let dst_gpa =
1043                                         info.__bindgen_anon_2.__bindgen_anon_1.sw_exit_info1;
1044                                     let src_gpa = info.__bindgen_anon_2.__bindgen_anon_1.sw_scratch;
1045                                     let data_len =
1046                                         info.__bindgen_anon_2.__bindgen_anon_1.sw_exit_info2
1047                                             as usize;
1048                                     // Sanity check to make sure data len is within supported range.
1049                                     assert!(data_len <= 0x8);
1050 
1051                                     let mut data = vec![0; data_len];
1052                                     self.gpa_read(src_gpa, &mut data)?;
1053 
1054                                     if let Some(vm_ops) = &self.vm_ops {
1055                                         vm_ops.mmio_write(dst_gpa, &data).map_err(|e| {
1056                                             cpu::HypervisorCpuError::RunVcpu(e.into())
1057                                         })?;
1058                                     }
1059 
1060                                     // Clear the SW_EXIT_INFO1 register to indicate no error
1061                                     self.clear_swexit_info1(ghcb_gpa)?;
1062                                 }
1063                                 SVM_EXITCODE_SNP_GUEST_REQUEST
1064                                 | SVM_EXITCODE_SNP_EXTENDED_GUEST_REQUEST => {
1065                                     if exit_code == SVM_EXITCODE_SNP_EXTENDED_GUEST_REQUEST {
1066                                         info!("Fetching extended guest request is not supported");
1067                                         // We don't support extended guest request, so we just write empty data.
1068                                         // This matches the behavior of KVM in Linux 6.11.
1069 
1070                                         // Read RAX & RBX from the GHCB.
1071                                         let mut data = [0; 8];
1072                                         self.gpa_read(ghcb_gpa + GHCB_RAX_OFFSET, &mut data)?;
1073                                         let data_gpa = u64::from_le_bytes(data);
1074                                         self.gpa_read(ghcb_gpa + GHCB_RBX_OFFSET, &mut data)?;
1075                                         let data_npages = u64::from_le_bytes(data);
1076 
1077                                         if data_npages > 0 {
1078                                             // The certificates are terminated by 24 zero bytes.
1079                                             self.gpa_write(data_gpa, &[0; 24])?;
1080                                         }
1081                                     }
1082 
1083                                     let req_gpa =
1084                                         info.__bindgen_anon_2.__bindgen_anon_1.sw_exit_info1;
1085                                     let rsp_gpa =
1086                                         info.__bindgen_anon_2.__bindgen_anon_1.sw_exit_info2;
1087 
1088                                     let mshv_psp_req =
1089                                         mshv_issue_psp_guest_request { req_gpa, rsp_gpa };
1090                                     self.vm_fd
1091                                         .psp_issue_guest_request(&mshv_psp_req)
1092                                         .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()))?;
1093 
1094                                     debug!(
1095                                         "SNP guest request: req_gpa {:0x} rsp_gpa {:0x}",
1096                                         req_gpa, rsp_gpa
1097                                     );
1098 
1099                                     self.gpa_write(ghcb_gpa + GHCB_SW_EXITINFO2_OFFSET, &[0; 8])?;
1100                                 }
1101                                 SVM_EXITCODE_SNP_AP_CREATION => {
1102                                     let vmsa_gpa =
1103                                         info.__bindgen_anon_2.__bindgen_anon_1.sw_exit_info2;
1104                                     let apic_id =
1105                                         info.__bindgen_anon_2.__bindgen_anon_1.sw_exit_info1 >> 32;
1106                                     debug!(
1107                                         "SNP AP CREATE REQUEST with VMSA GPA {:0x}, and APIC ID {:?}",
1108                                         vmsa_gpa, apic_id
1109                                     );
1110 
1111                                     let mshv_ap_create_req = mshv_sev_snp_ap_create {
1112                                         vp_id: apic_id,
1113                                         vmsa_gpa,
1114                                     };
1115                                     self.vm_fd
1116                                         .sev_snp_ap_create(&mshv_ap_create_req)
1117                                         .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()))?;
1118 
1119                                     // Clear the SW_EXIT_INFO1 register to indicate no error
1120                                     self.clear_swexit_info1(ghcb_gpa)?;
1121                                 }
1122                                 _ => panic!(
1123                                     "GHCB_INFO_NORMAL: Unhandled exit code: {:0x}",
1124                                     exit_code
1125                                 ),
1126                             }
1127                         }
1128                         _ => panic!("Unsupported VMGEXIT operation: {:0x}", ghcb_op),
1129                     }
1130 
1131                     Ok(cpu::VmExit::Ignore)
1132                 }
1133                 exit => Err(cpu::HypervisorCpuError::RunVcpu(anyhow!(
1134                     "Unhandled VCPU exit {:?}",
1135                     exit
1136                 ))),
1137             },
1138 
1139             Err(e) => match e.errno() {
1140                 libc::EAGAIN | libc::EINTR => Ok(cpu::VmExit::Ignore),
1141                 _ => Err(cpu::HypervisorCpuError::RunVcpu(anyhow!(
1142                     "VCPU error {:?}",
1143                     e
1144                 ))),
1145             },
1146         }
1147     }
1148 
1149     #[cfg(target_arch = "aarch64")]
1150     fn init_pmu(&self, irq: u32) -> cpu::Result<()> {
1151         unimplemented!()
1152     }
1153 
1154     #[cfg(target_arch = "aarch64")]
1155     fn has_pmu_support(&self) -> bool {
1156         unimplemented!()
1157     }
1158 
1159     #[cfg(target_arch = "aarch64")]
1160     fn setup_regs(&self, cpu_id: u8, boot_ip: u64, fdt_start: u64) -> cpu::Result<()> {
1161         unimplemented!()
1162     }
1163 
1164     #[cfg(target_arch = "aarch64")]
1165     fn get_sys_reg(&self, sys_reg: u32) -> cpu::Result<u64> {
1166         unimplemented!()
1167     }
1168 
1169     #[cfg(target_arch = "aarch64")]
1170     fn get_reg_list(&self, reg_list: &mut RegList) -> cpu::Result<()> {
1171         unimplemented!()
1172     }
1173 
1174     #[cfg(target_arch = "aarch64")]
1175     fn vcpu_init(&self, kvi: &VcpuInit) -> cpu::Result<()> {
1176         unimplemented!()
1177     }
1178 
1179     #[cfg(target_arch = "aarch64")]
1180     fn set_regs(&self, regs: &StandardRegisters) -> cpu::Result<()> {
1181         unimplemented!()
1182     }
1183 
1184     #[cfg(target_arch = "aarch64")]
1185     fn get_regs(&self) -> cpu::Result<StandardRegisters> {
1186         unimplemented!()
1187     }
1188 
1189     #[cfg(target_arch = "x86_64")]
1190     ///
1191     /// X86 specific call to setup the CPUID registers.
1192     ///
1193     fn set_cpuid2(&self, cpuid: &[CpuIdEntry]) -> cpu::Result<()> {
1194         let cpuid: Vec<mshv_bindings::hv_cpuid_entry> = cpuid.iter().map(|e| (*e).into()).collect();
1195         let mshv_cpuid = <CpuId>::from_entries(&cpuid)
1196             .map_err(|_| cpu::HypervisorCpuError::SetCpuid(anyhow!("failed to create CpuId")))?;
1197 
1198         self.fd
1199             .register_intercept_result_cpuid(&mshv_cpuid)
1200             .map_err(|e| cpu::HypervisorCpuError::SetCpuid(e.into()))
1201     }
1202 
1203     #[cfg(target_arch = "x86_64")]
1204     ///
1205     /// X86 specific call to retrieve the CPUID registers.
1206     ///
1207     fn get_cpuid2(&self, _num_entries: usize) -> cpu::Result<Vec<CpuIdEntry>> {
1208         Ok(self.cpuid.clone())
1209     }
1210 
1211     #[cfg(target_arch = "x86_64")]
1212     ///
1213     /// X86 specific call to retrieve cpuid leaf
1214     ///
1215     fn get_cpuid_values(
1216         &self,
1217         function: u32,
1218         index: u32,
1219         xfem: u64,
1220         xss: u64,
1221     ) -> cpu::Result<[u32; 4]> {
1222         self.fd
1223             .get_cpuid_values(function, index, xfem, xss)
1224             .map_err(|e| cpu::HypervisorCpuError::GetCpuidVales(e.into()))
1225     }
1226 
1227     #[cfg(target_arch = "x86_64")]
1228     ///
1229     /// Returns the state of the LAPIC (Local Advanced Programmable Interrupt Controller).
1230     ///
1231     fn get_lapic(&self) -> cpu::Result<crate::arch::x86::LapicState> {
1232         Ok(self
1233             .fd
1234             .get_lapic()
1235             .map_err(|e| cpu::HypervisorCpuError::GetlapicState(e.into()))?
1236             .into())
1237     }
1238 
1239     #[cfg(target_arch = "x86_64")]
1240     ///
1241     /// Sets the state of the LAPIC (Local Advanced Programmable Interrupt Controller).
1242     ///
1243     fn set_lapic(&self, lapic: &crate::arch::x86::LapicState) -> cpu::Result<()> {
1244         let lapic: mshv_bindings::LapicState = (*lapic).clone().into();
1245         self.fd
1246             .set_lapic(&lapic)
1247             .map_err(|e| cpu::HypervisorCpuError::SetLapicState(e.into()))
1248     }
1249 
1250     ///
1251     /// Returns the vcpu's current "multiprocessing state".
1252     ///
1253     fn get_mp_state(&self) -> cpu::Result<MpState> {
1254         Ok(MpState::Mshv)
1255     }
1256 
1257     ///
1258     /// Sets the vcpu's current "multiprocessing state".
1259     ///
1260     fn set_mp_state(&self, _mp_state: MpState) -> cpu::Result<()> {
1261         Ok(())
1262     }
1263 
1264     #[cfg(target_arch = "x86_64")]
1265     ///
1266     /// Set CPU state for x86_64 guest.
1267     ///
1268     fn set_state(&self, state: &CpuState) -> cpu::Result<()> {
1269         let mut state: VcpuMshvState = state.clone().into();
1270         self.set_msrs(&state.msrs)?;
1271         self.set_vcpu_events(&state.vcpu_events)?;
1272         self.set_regs(&state.regs.into())?;
1273         self.set_sregs(&state.sregs.into())?;
1274         self.set_fpu(&state.fpu)?;
1275         self.set_xcrs(&state.xcrs)?;
1276         // These registers are global and needed to be set only for first VCPU
1277         // as Microsoft Hypervisor allows setting this register for only one VCPU
1278         if self.vp_index == 0 {
1279             self.fd
1280                 .set_misc_regs(&state.misc)
1281                 .map_err(|e| cpu::HypervisorCpuError::SetMiscRegs(e.into()))?
1282         }
1283         self.fd
1284             .set_debug_regs(&state.dbg)
1285             .map_err(|e| cpu::HypervisorCpuError::SetDebugRegs(e.into()))?;
1286         self.fd
1287             .set_all_vp_state_components(&mut state.vp_states)
1288             .map_err(|e| cpu::HypervisorCpuError::SetAllVpStateComponents(e.into()))?;
1289         Ok(())
1290     }
1291 
1292     #[cfg(target_arch = "aarch64")]
1293     ///
1294     /// Set CPU state for aarch64 guest.
1295     ///
1296     fn set_state(&self, state: &CpuState) -> cpu::Result<()> {
1297         unimplemented!()
1298     }
1299 
1300     #[cfg(target_arch = "x86_64")]
1301     ///
1302     /// Get CPU State for x86_64 guest
1303     ///
1304     fn state(&self) -> cpu::Result<CpuState> {
1305         let regs = self.get_regs()?;
1306         let sregs = self.get_sregs()?;
1307         let xcrs = self.get_xcrs()?;
1308         let fpu = self.get_fpu()?;
1309         let vcpu_events = self.get_vcpu_events()?;
1310         let mut msrs = self.msrs.clone();
1311         self.get_msrs(&mut msrs)?;
1312         let misc = self
1313             .fd
1314             .get_misc_regs()
1315             .map_err(|e| cpu::HypervisorCpuError::GetMiscRegs(e.into()))?;
1316         let dbg = self
1317             .fd
1318             .get_debug_regs()
1319             .map_err(|e| cpu::HypervisorCpuError::GetDebugRegs(e.into()))?;
1320         let vp_states = self
1321             .fd
1322             .get_all_vp_state_components()
1323             .map_err(|e| cpu::HypervisorCpuError::GetAllVpStateComponents(e.into()))?;
1324 
1325         Ok(VcpuMshvState {
1326             msrs,
1327             vcpu_events,
1328             regs: regs.into(),
1329             sregs: sregs.into(),
1330             fpu,
1331             xcrs,
1332             dbg,
1333             misc,
1334             vp_states,
1335         }
1336         .into())
1337     }
1338 
1339     #[cfg(target_arch = "aarch64")]
1340     ///
1341     /// Get CPU state for aarch64 guest.
1342     ///
1343     fn state(&self) -> cpu::Result<CpuState> {
1344         unimplemented!()
1345     }
1346 
1347     #[cfg(target_arch = "x86_64")]
1348     ///
1349     /// Translate guest virtual address to guest physical address
1350     ///
1351     fn translate_gva(&self, gva: u64, flags: u64) -> cpu::Result<(u64, u32)> {
1352         let r = self
1353             .fd
1354             .translate_gva(gva, flags)
1355             .map_err(|e| cpu::HypervisorCpuError::TranslateVirtualAddress(e.into()))?;
1356 
1357         let gpa = r.0;
1358         // SAFETY: r is valid, otherwise this function will have returned
1359         let result_code = unsafe { r.1.__bindgen_anon_1.result_code };
1360 
1361         Ok((gpa, result_code))
1362     }
1363 
1364     #[cfg(target_arch = "x86_64")]
1365     ///
1366     /// Return the list of initial MSR entries for a VCPU
1367     ///
1368     fn boot_msr_entries(&self) -> Vec<MsrEntry> {
1369         use crate::arch::x86::{msr_index, MTRR_ENABLE, MTRR_MEM_TYPE_WB};
1370 
1371         [
1372             msr!(msr_index::MSR_IA32_SYSENTER_CS),
1373             msr!(msr_index::MSR_IA32_SYSENTER_ESP),
1374             msr!(msr_index::MSR_IA32_SYSENTER_EIP),
1375             msr!(msr_index::MSR_STAR),
1376             msr!(msr_index::MSR_CSTAR),
1377             msr!(msr_index::MSR_LSTAR),
1378             msr!(msr_index::MSR_KERNEL_GS_BASE),
1379             msr!(msr_index::MSR_SYSCALL_MASK),
1380             msr_data!(msr_index::MSR_MTRRdefType, MTRR_ENABLE | MTRR_MEM_TYPE_WB),
1381         ]
1382         .to_vec()
1383     }
1384 
1385     ///
1386     /// Sets the AMD specific vcpu's sev control register.
1387     ///
1388     #[cfg(feature = "sev_snp")]
1389     fn set_sev_control_register(&self, vmsa_pfn: u64) -> cpu::Result<()> {
1390         let sev_control_reg = snp::get_sev_control_register(vmsa_pfn);
1391 
1392         self.fd
1393             .set_sev_control_register(sev_control_reg)
1394             .map_err(|e| cpu::HypervisorCpuError::SetSevControlRegister(e.into()))
1395     }
1396     #[cfg(target_arch = "x86_64")]
1397     ///
1398     /// Trigger NMI interrupt
1399     ///
1400     fn nmi(&self) -> cpu::Result<()> {
1401         let cfg = InterruptRequest {
1402             interrupt_type: hv_interrupt_type_HV_X64_INTERRUPT_TYPE_NMI,
1403             apic_id: self.vp_index as u64,
1404             level_triggered: false,
1405             vector: 0,
1406             logical_destination_mode: false,
1407             long_mode: false,
1408         };
1409         self.vm_fd
1410             .request_virtual_interrupt(&cfg)
1411             .map_err(|e| cpu::HypervisorCpuError::Nmi(e.into()))
1412     }
1413 }
1414 
1415 impl MshvVcpu {
1416     ///
1417     /// Deactivate previously used GHCB page.
1418     ///
1419     #[cfg(feature = "sev_snp")]
1420     fn disable_prev_ghcb_page(&self) -> cpu::Result<()> {
1421         let mut reg_assocs = [hv_register_assoc {
1422             name: hv_register_name_HV_X64_REGISTER_SEV_GHCB_GPA,
1423             ..Default::default()
1424         }];
1425         self.fd.get_reg(&mut reg_assocs).unwrap();
1426         // SAFETY: Accessing a union element from bindgen generated bindings.
1427         let prev_ghcb_gpa = unsafe { reg_assocs[0].value.reg64 };
1428 
1429         debug!("Prev GHCB GPA is {:x}", prev_ghcb_gpa);
1430 
1431         let mut ghcb_gpa = hv_x64_register_sev_ghcb::default();
1432 
1433         // SAFETY: Accessing a union element from bindgen generated bindings.
1434         unsafe {
1435             ghcb_gpa.__bindgen_anon_1.set_enabled(0);
1436             ghcb_gpa.__bindgen_anon_1.set_page_number(prev_ghcb_gpa);
1437         }
1438 
1439         // SAFETY: Accessing a union element from bindgen generated bindings.
1440         let reg_name_value = unsafe {
1441             [(
1442                 hv_register_name_HV_X64_REGISTER_SEV_GHCB_GPA,
1443                 ghcb_gpa.as_uint64,
1444             )]
1445         };
1446 
1447         set_registers_64!(self.fd, reg_name_value)
1448             .map_err(|e| cpu::HypervisorCpuError::SetRegister(e.into()))?;
1449 
1450         Ok(())
1451     }
1452     #[cfg(target_arch = "x86_64")]
1453     ///
1454     /// X86 specific call that returns the vcpu's current "xcrs".
1455     ///
1456     fn get_xcrs(&self) -> cpu::Result<ExtendedControlRegisters> {
1457         self.fd
1458             .get_xcrs()
1459             .map_err(|e| cpu::HypervisorCpuError::GetXcsr(e.into()))
1460     }
1461 
1462     #[cfg(target_arch = "x86_64")]
1463     ///
1464     /// X86 specific call that sets the vcpu's current "xcrs".
1465     ///
1466     fn set_xcrs(&self, xcrs: &ExtendedControlRegisters) -> cpu::Result<()> {
1467         self.fd
1468             .set_xcrs(xcrs)
1469             .map_err(|e| cpu::HypervisorCpuError::SetXcsr(e.into()))
1470     }
1471 
1472     #[cfg(target_arch = "x86_64")]
1473     ///
1474     /// Returns currently pending exceptions, interrupts, and NMIs as well as related
1475     /// states of the vcpu.
1476     ///
1477     fn get_vcpu_events(&self) -> cpu::Result<VcpuEvents> {
1478         self.fd
1479             .get_vcpu_events()
1480             .map_err(|e| cpu::HypervisorCpuError::GetVcpuEvents(e.into()))
1481     }
1482 
1483     #[cfg(target_arch = "x86_64")]
1484     ///
1485     /// Sets pending exceptions, interrupts, and NMIs as well as related states
1486     /// of the vcpu.
1487     ///
1488     fn set_vcpu_events(&self, events: &VcpuEvents) -> cpu::Result<()> {
1489         self.fd
1490             .set_vcpu_events(events)
1491             .map_err(|e| cpu::HypervisorCpuError::SetVcpuEvents(e.into()))
1492     }
1493 
1494     ///
1495     /// Clear SW_EXIT_INFO1 register for SEV-SNP guests.
1496     ///
1497     #[cfg(feature = "sev_snp")]
1498     fn clear_swexit_info1(
1499         &self,
1500         ghcb_gpa: u64,
1501     ) -> std::result::Result<cpu::VmExit, cpu::HypervisorCpuError> {
1502         // Clear the SW_EXIT_INFO1 register to indicate no error
1503         self.gpa_write(ghcb_gpa + GHCB_SW_EXITINFO1_OFFSET, &[0; 4])?;
1504 
1505         Ok(cpu::VmExit::Ignore)
1506     }
1507 
1508     #[cfg(feature = "sev_snp")]
1509     fn gpa_read(&self, gpa: u64, data: &mut [u8]) -> cpu::Result<()> {
1510         for (gpa, chunk) in (gpa..)
1511             .step_by(HV_READ_WRITE_GPA_MAX_SIZE as usize)
1512             .zip(data.chunks_mut(HV_READ_WRITE_GPA_MAX_SIZE as usize))
1513         {
1514             let mut rw_gpa_arg = mshv_bindings::mshv_read_write_gpa {
1515                 base_gpa: gpa,
1516                 byte_count: chunk.len() as u32,
1517                 ..Default::default()
1518             };
1519             self.fd
1520                 .gpa_read(&mut rw_gpa_arg)
1521                 .map_err(|e| cpu::HypervisorCpuError::GpaRead(e.into()))?;
1522 
1523             chunk.copy_from_slice(&rw_gpa_arg.data[..chunk.len()]);
1524         }
1525 
1526         Ok(())
1527     }
1528 
1529     #[cfg(feature = "sev_snp")]
1530     fn gpa_write(&self, gpa: u64, data: &[u8]) -> cpu::Result<()> {
1531         for (gpa, chunk) in (gpa..)
1532             .step_by(HV_READ_WRITE_GPA_MAX_SIZE as usize)
1533             .zip(data.chunks(HV_READ_WRITE_GPA_MAX_SIZE as usize))
1534         {
1535             let mut data = [0; HV_READ_WRITE_GPA_MAX_SIZE as usize];
1536             data[..chunk.len()].copy_from_slice(chunk);
1537 
1538             let mut rw_gpa_arg = mshv_bindings::mshv_read_write_gpa {
1539                 base_gpa: gpa,
1540                 byte_count: chunk.len() as u32,
1541                 data,
1542                 ..Default::default()
1543             };
1544             self.fd
1545                 .gpa_write(&mut rw_gpa_arg)
1546                 .map_err(|e| cpu::HypervisorCpuError::GpaWrite(e.into()))?;
1547         }
1548 
1549         Ok(())
1550     }
1551 }
1552 
1553 /// Wrapper over Mshv VM ioctls.
1554 pub struct MshvVm {
1555     fd: Arc<VmFd>,
1556     #[cfg(target_arch = "x86_64")]
1557     msrs: Vec<MsrEntry>,
1558     dirty_log_slots: Arc<RwLock<HashMap<u64, MshvDirtyLogSlot>>>,
1559     #[cfg(feature = "sev_snp")]
1560     sev_snp_enabled: bool,
1561 }
1562 
1563 impl MshvVm {
1564     ///
1565     /// Creates an in-kernel device.
1566     ///
1567     /// See the documentation for `MSHV_CREATE_DEVICE`.
1568     fn create_device(&self, device: &mut CreateDevice) -> vm::Result<VfioDeviceFd> {
1569         let device_fd = self
1570             .fd
1571             .create_device(device)
1572             .map_err(|e| vm::HypervisorVmError::CreateDevice(e.into()))?;
1573         Ok(VfioDeviceFd::new_from_mshv(device_fd))
1574     }
1575 }
1576 
1577 ///
1578 /// Implementation of Vm trait for Mshv
1579 ///
1580 /// # Examples
1581 ///
1582 /// ```
1583 /// # extern crate hypervisor;
1584 /// # use hypervisor::mshv::MshvHypervisor;
1585 /// # use std::sync::Arc;
1586 /// let mshv = MshvHypervisor::new().unwrap();
1587 /// let hypervisor = Arc::new(mshv);
1588 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed");
1589 /// ```
1590 impl vm::Vm for MshvVm {
1591     #[cfg(target_arch = "x86_64")]
1592     ///
1593     /// Sets the address of the one-page region in the VM's address space.
1594     ///
1595     fn set_identity_map_address(&self, _address: u64) -> vm::Result<()> {
1596         Ok(())
1597     }
1598 
1599     #[cfg(target_arch = "x86_64")]
1600     ///
1601     /// Sets the address of the three-page region in the VM's address space.
1602     ///
1603     fn set_tss_address(&self, _offset: usize) -> vm::Result<()> {
1604         Ok(())
1605     }
1606 
1607     ///
1608     /// Creates an in-kernel interrupt controller.
1609     ///
1610     fn create_irq_chip(&self) -> vm::Result<()> {
1611         Ok(())
1612     }
1613 
1614     ///
1615     /// Registers an event that will, when signaled, trigger the `gsi` IRQ.
1616     ///
1617     fn register_irqfd(&self, fd: &EventFd, gsi: u32) -> vm::Result<()> {
1618         debug!("register_irqfd fd {} gsi {}", fd.as_raw_fd(), gsi);
1619 
1620         self.fd
1621             .register_irqfd(fd, gsi)
1622             .map_err(|e| vm::HypervisorVmError::RegisterIrqFd(e.into()))?;
1623 
1624         Ok(())
1625     }
1626 
1627     ///
1628     /// Unregisters an event that will, when signaled, trigger the `gsi` IRQ.
1629     ///
1630     fn unregister_irqfd(&self, fd: &EventFd, gsi: u32) -> vm::Result<()> {
1631         debug!("unregister_irqfd fd {} gsi {}", fd.as_raw_fd(), gsi);
1632 
1633         self.fd
1634             .unregister_irqfd(fd, gsi)
1635             .map_err(|e| vm::HypervisorVmError::UnregisterIrqFd(e.into()))?;
1636 
1637         Ok(())
1638     }
1639 
1640     ///
1641     /// Creates a VcpuFd object from a vcpu RawFd.
1642     ///
1643     fn create_vcpu(
1644         &self,
1645         id: u8,
1646         vm_ops: Option<Arc<dyn VmOps>>,
1647     ) -> vm::Result<Arc<dyn cpu::Vcpu>> {
1648         let vcpu_fd = self
1649             .fd
1650             .create_vcpu(id)
1651             .map_err(|e| vm::HypervisorVmError::CreateVcpu(e.into()))?;
1652         let vcpu = MshvVcpu {
1653             fd: vcpu_fd,
1654             vp_index: id,
1655             #[cfg(target_arch = "x86_64")]
1656             cpuid: Vec::new(),
1657             #[cfg(target_arch = "x86_64")]
1658             msrs: self.msrs.clone(),
1659             vm_ops,
1660             vm_fd: self.fd.clone(),
1661         };
1662         Ok(Arc::new(vcpu))
1663     }
1664 
1665     #[cfg(target_arch = "x86_64")]
1666     fn enable_split_irq(&self) -> vm::Result<()> {
1667         Ok(())
1668     }
1669 
1670     #[cfg(target_arch = "x86_64")]
1671     fn enable_sgx_attribute(&self, _file: File) -> vm::Result<()> {
1672         Ok(())
1673     }
1674 
1675     fn register_ioevent(
1676         &self,
1677         fd: &EventFd,
1678         addr: &IoEventAddress,
1679         datamatch: Option<DataMatch>,
1680     ) -> vm::Result<()> {
1681         #[cfg(feature = "sev_snp")]
1682         if self.sev_snp_enabled {
1683             return Ok(());
1684         }
1685 
1686         let addr = &mshv_ioctls::IoEventAddress::from(*addr);
1687         debug!(
1688             "register_ioevent fd {} addr {:x?} datamatch {:?}",
1689             fd.as_raw_fd(),
1690             addr,
1691             datamatch
1692         );
1693         if let Some(dm) = datamatch {
1694             match dm {
1695                 vm::DataMatch::DataMatch32(mshv_dm32) => self
1696                     .fd
1697                     .register_ioevent(fd, addr, mshv_dm32)
1698                     .map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into())),
1699                 vm::DataMatch::DataMatch64(mshv_dm64) => self
1700                     .fd
1701                     .register_ioevent(fd, addr, mshv_dm64)
1702                     .map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into())),
1703             }
1704         } else {
1705             self.fd
1706                 .register_ioevent(fd, addr, NoDatamatch)
1707                 .map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into()))
1708         }
1709     }
1710 
1711     /// Unregister an event from a certain address it has been previously registered to.
1712     fn unregister_ioevent(&self, fd: &EventFd, addr: &IoEventAddress) -> vm::Result<()> {
1713         #[cfg(feature = "sev_snp")]
1714         if self.sev_snp_enabled {
1715             return Ok(());
1716         }
1717 
1718         let addr = &mshv_ioctls::IoEventAddress::from(*addr);
1719         debug!("unregister_ioevent fd {} addr {:x?}", fd.as_raw_fd(), addr);
1720 
1721         self.fd
1722             .unregister_ioevent(fd, addr, NoDatamatch)
1723             .map_err(|e| vm::HypervisorVmError::UnregisterIoEvent(e.into()))
1724     }
1725 
1726     /// Creates a guest physical memory region.
1727     fn create_user_memory_region(&self, user_memory_region: UserMemoryRegion) -> vm::Result<()> {
1728         let user_memory_region: mshv_user_mem_region = user_memory_region.into();
1729         // No matter read only or not we keep track the slots.
1730         // For readonly hypervisor can enable the dirty bits,
1731         // but a VM exit happens before setting the dirty bits
1732         self.dirty_log_slots.write().unwrap().insert(
1733             user_memory_region.guest_pfn,
1734             MshvDirtyLogSlot {
1735                 guest_pfn: user_memory_region.guest_pfn,
1736                 memory_size: user_memory_region.size,
1737             },
1738         );
1739 
1740         self.fd
1741             .map_user_memory(user_memory_region)
1742             .map_err(|e| vm::HypervisorVmError::CreateUserMemory(e.into()))?;
1743         Ok(())
1744     }
1745 
1746     /// Removes a guest physical memory region.
1747     fn remove_user_memory_region(&self, user_memory_region: UserMemoryRegion) -> vm::Result<()> {
1748         let user_memory_region: mshv_user_mem_region = user_memory_region.into();
1749         // Remove the corresponding entry from "self.dirty_log_slots" if needed
1750         self.dirty_log_slots
1751             .write()
1752             .unwrap()
1753             .remove(&user_memory_region.guest_pfn);
1754 
1755         self.fd
1756             .unmap_user_memory(user_memory_region)
1757             .map_err(|e| vm::HypervisorVmError::RemoveUserMemory(e.into()))?;
1758         Ok(())
1759     }
1760 
1761     fn make_user_memory_region(
1762         &self,
1763         _slot: u32,
1764         guest_phys_addr: u64,
1765         memory_size: u64,
1766         userspace_addr: u64,
1767         readonly: bool,
1768         _log_dirty_pages: bool,
1769     ) -> UserMemoryRegion {
1770         let mut flags = 1 << MSHV_SET_MEM_BIT_EXECUTABLE;
1771         if !readonly {
1772             flags |= 1 << MSHV_SET_MEM_BIT_WRITABLE;
1773         }
1774 
1775         mshv_user_mem_region {
1776             flags,
1777             guest_pfn: guest_phys_addr >> PAGE_SHIFT,
1778             size: memory_size,
1779             userspace_addr,
1780             ..Default::default()
1781         }
1782         .into()
1783     }
1784 
1785     fn create_passthrough_device(&self) -> vm::Result<VfioDeviceFd> {
1786         let mut vfio_dev = mshv_create_device {
1787             type_: mshv_device_type_MSHV_DEV_TYPE_VFIO,
1788             fd: 0,
1789             flags: 0,
1790         };
1791 
1792         self.create_device(&mut vfio_dev)
1793             .map_err(|e| vm::HypervisorVmError::CreatePassthroughDevice(e.into()))
1794     }
1795 
1796     ///
1797     /// Constructs a routing entry
1798     ///
1799     fn make_routing_entry(&self, gsi: u32, config: &InterruptSourceConfig) -> IrqRoutingEntry {
1800         match config {
1801             InterruptSourceConfig::MsiIrq(cfg) => mshv_user_irq_entry {
1802                 gsi,
1803                 address_lo: cfg.low_addr,
1804                 address_hi: cfg.high_addr,
1805                 data: cfg.data,
1806             }
1807             .into(),
1808             _ => {
1809                 unreachable!()
1810             }
1811         }
1812     }
1813 
1814     fn set_gsi_routing(&self, entries: &[IrqRoutingEntry]) -> vm::Result<()> {
1815         let mut msi_routing =
1816             vec_with_array_field::<mshv_user_irq_table, mshv_user_irq_entry>(entries.len());
1817         msi_routing[0].nr = entries.len() as u32;
1818 
1819         let entries: Vec<mshv_user_irq_entry> = entries
1820             .iter()
1821             .map(|entry| match entry {
1822                 IrqRoutingEntry::Mshv(e) => *e,
1823                 #[allow(unreachable_patterns)]
1824                 _ => panic!("IrqRoutingEntry type is wrong"),
1825             })
1826             .collect();
1827 
1828         // SAFETY: msi_routing initialized with entries.len() and now it is being turned into
1829         // entries_slice with entries.len() again. It is guaranteed to be large enough to hold
1830         // everything from entries.
1831         unsafe {
1832             let entries_slice: &mut [mshv_user_irq_entry] =
1833                 msi_routing[0].entries.as_mut_slice(entries.len());
1834             entries_slice.copy_from_slice(&entries);
1835         }
1836 
1837         self.fd
1838             .set_msi_routing(&msi_routing[0])
1839             .map_err(|e| vm::HypervisorVmError::SetGsiRouting(e.into()))
1840     }
1841 
1842     ///
1843     /// Start logging dirty pages
1844     ///
1845     fn start_dirty_log(&self) -> vm::Result<()> {
1846         self.fd
1847             .enable_dirty_page_tracking()
1848             .map_err(|e| vm::HypervisorVmError::StartDirtyLog(e.into()))
1849     }
1850 
1851     ///
1852     /// Stop logging dirty pages
1853     ///
1854     fn stop_dirty_log(&self) -> vm::Result<()> {
1855         let dirty_log_slots = self.dirty_log_slots.read().unwrap();
1856         // Before disabling the dirty page tracking we need
1857         // to set the dirty bits in the Hypervisor
1858         // This is a requirement from Microsoft Hypervisor
1859         for (_, s) in dirty_log_slots.iter() {
1860             self.fd
1861                 .get_dirty_log(
1862                     s.guest_pfn,
1863                     s.memory_size as usize,
1864                     MSHV_GPAP_ACCESS_OP_SET as u8,
1865                 )
1866                 .map_err(|e| vm::HypervisorVmError::StartDirtyLog(e.into()))?;
1867         }
1868         self.fd
1869             .disable_dirty_page_tracking()
1870             .map_err(|e| vm::HypervisorVmError::StartDirtyLog(e.into()))?;
1871         Ok(())
1872     }
1873 
1874     ///
1875     /// Get dirty pages bitmap (one bit per page)
1876     ///
1877     fn get_dirty_log(&self, _slot: u32, base_gpa: u64, memory_size: u64) -> vm::Result<Vec<u64>> {
1878         self.fd
1879             .get_dirty_log(
1880                 base_gpa >> PAGE_SHIFT,
1881                 memory_size as usize,
1882                 MSHV_GPAP_ACCESS_OP_CLEAR as u8,
1883             )
1884             .map_err(|e| vm::HypervisorVmError::GetDirtyLog(e.into()))
1885     }
1886 
1887     /// Retrieve guest clock.
1888     #[cfg(target_arch = "x86_64")]
1889     fn get_clock(&self) -> vm::Result<ClockData> {
1890         let val = self
1891             .fd
1892             .get_partition_property(hv_partition_property_code_HV_PARTITION_PROPERTY_REFERENCE_TIME)
1893             .map_err(|e| vm::HypervisorVmError::GetClock(e.into()))?;
1894         Ok(MshvClockData { ref_time: val }.into())
1895     }
1896 
1897     /// Set guest clock.
1898     #[cfg(target_arch = "x86_64")]
1899     fn set_clock(&self, data: &ClockData) -> vm::Result<()> {
1900         let data: MshvClockData = (*data).into();
1901         self.fd
1902             .set_partition_property(
1903                 hv_partition_property_code_HV_PARTITION_PROPERTY_REFERENCE_TIME,
1904                 data.ref_time,
1905             )
1906             .map_err(|e| vm::HypervisorVmError::SetClock(e.into()))
1907     }
1908 
1909     /// Downcast to the underlying MshvVm type
1910     fn as_any(&self) -> &dyn Any {
1911         self
1912     }
1913 
1914     /// Initialize the SEV-SNP VM
1915     #[cfg(feature = "sev_snp")]
1916     fn sev_snp_init(&self) -> vm::Result<()> {
1917         self.fd
1918             .set_partition_property(
1919                 hv_partition_property_code_HV_PARTITION_PROPERTY_ISOLATION_STATE,
1920                 hv_partition_isolation_state_HV_PARTITION_ISOLATION_SECURE as u64,
1921             )
1922             .map_err(|e| vm::HypervisorVmError::InitializeSevSnp(e.into()))
1923     }
1924 
1925     ///
1926     /// Importing isolated pages, these pages will be used
1927     /// for the PSP(Platform Security Processor) measurement.
1928     #[cfg(feature = "sev_snp")]
1929     fn import_isolated_pages(
1930         &self,
1931         page_type: u32,
1932         page_size: u32,
1933         pages: &[u64],
1934     ) -> vm::Result<()> {
1935         debug_assert!(page_size == hv_isolated_page_size_HV_ISOLATED_PAGE_SIZE_4KB);
1936         if pages.is_empty() {
1937             return Ok(());
1938         }
1939 
1940         let mut isolated_pages =
1941             vec_with_array_field::<mshv_import_isolated_pages, u64>(pages.len());
1942         isolated_pages[0].page_type = page_type as u8;
1943         isolated_pages[0].page_count = pages.len() as u64;
1944         // SAFETY: isolated_pages initialized with pages.len() and now it is being turned into
1945         // pages_slice with pages.len() again. It is guaranteed to be large enough to hold
1946         // everything from pages.
1947         unsafe {
1948             let pages_slice: &mut [u64] = isolated_pages[0].guest_pfns.as_mut_slice(pages.len());
1949             pages_slice.copy_from_slice(pages);
1950         }
1951         self.fd
1952             .import_isolated_pages(&isolated_pages[0])
1953             .map_err(|e| vm::HypervisorVmError::ImportIsolatedPages(e.into()))
1954     }
1955 
1956     ///
1957     /// Complete isolated import, telling the hypervisor that
1958     /// importing the pages to guest memory is complete.
1959     ///
1960     #[cfg(feature = "sev_snp")]
1961     fn complete_isolated_import(
1962         &self,
1963         snp_id_block: IGVM_VHS_SNP_ID_BLOCK,
1964         host_data: [u8; 32],
1965         id_block_enabled: u8,
1966     ) -> vm::Result<()> {
1967         let mut auth_info = hv_snp_id_auth_info {
1968             id_key_algorithm: snp_id_block.id_key_algorithm,
1969             auth_key_algorithm: snp_id_block.author_key_algorithm,
1970             ..Default::default()
1971         };
1972         // Each of r/s component is 576 bits long
1973         auth_info.id_block_signature[..SIG_R_COMPONENT_SIZE_IN_BYTES]
1974             .copy_from_slice(snp_id_block.id_key_signature.r_comp.as_ref());
1975         auth_info.id_block_signature
1976             [SIG_R_COMPONENT_SIZE_IN_BYTES..SIG_R_AND_S_COMPONENT_SIZE_IN_BYTES]
1977             .copy_from_slice(snp_id_block.id_key_signature.s_comp.as_ref());
1978         auth_info.id_key[..ECDSA_CURVE_ID_SIZE_IN_BYTES]
1979             .copy_from_slice(snp_id_block.id_public_key.curve.to_le_bytes().as_ref());
1980         auth_info.id_key[ECDSA_SIG_X_COMPONENT_START..ECDSA_SIG_X_COMPONENT_END]
1981             .copy_from_slice(snp_id_block.id_public_key.qx.as_ref());
1982         auth_info.id_key[ECDSA_SIG_Y_COMPONENT_START..ECDSA_SIG_Y_COMPONENT_END]
1983             .copy_from_slice(snp_id_block.id_public_key.qy.as_ref());
1984 
1985         let data = mshv_complete_isolated_import {
1986             import_data: hv_partition_complete_isolated_import_data {
1987                 psp_parameters: hv_psp_launch_finish_data {
1988                     id_block: hv_snp_id_block {
1989                         launch_digest: snp_id_block.ld,
1990                         family_id: snp_id_block.family_id,
1991                         image_id: snp_id_block.image_id,
1992                         version: snp_id_block.version,
1993                         guest_svn: snp_id_block.guest_svn,
1994                         policy: get_default_snp_guest_policy(),
1995                     },
1996                     id_auth_info: auth_info,
1997                     host_data,
1998                     id_block_enabled,
1999                     author_key_enabled: 0,
2000                 },
2001             },
2002         };
2003         self.fd
2004             .complete_isolated_import(&data)
2005             .map_err(|e| vm::HypervisorVmError::CompleteIsolatedImport(e.into()))
2006     }
2007 
2008     #[cfg(target_arch = "aarch64")]
2009     fn create_vgic(&self, config: VgicConfig) -> vm::Result<Arc<Mutex<dyn Vgic>>> {
2010         unimplemented!()
2011     }
2012 
2013     #[cfg(target_arch = "aarch64")]
2014     fn get_preferred_target(&self, kvi: &mut VcpuInit) -> vm::Result<()> {
2015         unimplemented!()
2016     }
2017 
2018     /// Pause the VM
2019     fn pause(&self) -> vm::Result<()> {
2020         // Freeze the partition
2021         self.fd
2022             .set_partition_property(
2023                 hv_partition_property_code_HV_PARTITION_PROPERTY_TIME_FREEZE,
2024                 1u64,
2025             )
2026             .map_err(|e| {
2027                 vm::HypervisorVmError::SetVmProperty(anyhow!(
2028                     "Failed to set partition property: {}",
2029                     e
2030                 ))
2031             })
2032     }
2033 
2034     /// Resume the VM
2035     fn resume(&self) -> vm::Result<()> {
2036         // Resuming the partition using TIME_FREEZE property
2037         self.fd
2038             .set_partition_property(
2039                 hv_partition_property_code_HV_PARTITION_PROPERTY_TIME_FREEZE,
2040                 0u64,
2041             )
2042             .map_err(|e| {
2043                 vm::HypervisorVmError::SetVmProperty(anyhow!(
2044                     "Failed to set partition property: {}",
2045                     e
2046                 ))
2047             })
2048     }
2049 
2050     #[cfg(feature = "sev_snp")]
2051     fn gain_page_access(&self, gpa: u64, size: u32) -> vm::Result<()> {
2052         use mshv_ioctls::set_bits;
2053 
2054         if !self.sev_snp_enabled {
2055             return Ok(());
2056         }
2057 
2058         let start_gpfn: u64 = gpa >> PAGE_SHIFT;
2059         let end_gpfn: u64 = (gpa + size as u64 - 1) >> PAGE_SHIFT;
2060 
2061         let gpas: Vec<u64> = (start_gpfn..=end_gpfn).map(|x| x << PAGE_SHIFT).collect();
2062 
2063         if !gpas.is_empty() {
2064             let mut gpa_list = vec_with_array_field::<mshv_modify_gpa_host_access, u64>(gpas.len());
2065             gpa_list[0].page_count = gpas.len() as u64;
2066             gpa_list[0].flags = set_bits!(
2067                 u8,
2068                 MSHV_GPA_HOST_ACCESS_BIT_ACQUIRE,
2069                 MSHV_GPA_HOST_ACCESS_BIT_READABLE,
2070                 MSHV_GPA_HOST_ACCESS_BIT_WRITABLE
2071             );
2072 
2073             // SAFETY: gpa_list initialized with gpas.len() and now it is being turned into
2074             // gpas_slice with gpas.len() again. It is guaranteed to be large enough to hold
2075             // everything from gpas.
2076             unsafe {
2077                 let gpas_slice: &mut [u64] = gpa_list[0].guest_pfns.as_mut_slice(gpas.len());
2078                 gpas_slice.copy_from_slice(gpas.as_slice());
2079             }
2080 
2081             self.fd
2082                 .modify_gpa_host_access(&gpa_list[0])
2083                 .map_err(|e| vm::HypervisorVmError::ModifyGpaHostAccess(e.into()))?;
2084         }
2085 
2086         Ok(())
2087     }
2088 }
2089