xref: /cloud-hypervisor/hypervisor/src/mshv/mod.rs (revision 88a9f799449c04180c6b9a21d3b9c0c4b57e2bd6)
1 // SPDX-License-Identifier: Apache-2.0 OR BSD-3-Clause
2 //
3 // Copyright © 2020, Microsoft Corporation
4 //
5 
6 use std::any::Any;
7 use std::collections::HashMap;
8 use std::sync::{Arc, RwLock};
9 
10 use mshv_bindings::*;
11 use mshv_ioctls::{set_registers_64, InterruptRequest, Mshv, NoDatamatch, VcpuFd, VmFd, VmType};
12 use vfio_ioctls::VfioDeviceFd;
13 use vm::DataMatch;
14 
15 use crate::arch::emulator::PlatformEmulator;
16 #[cfg(target_arch = "x86_64")]
17 use crate::arch::x86::emulator::Emulator;
18 use crate::cpu;
19 use crate::hypervisor;
20 use crate::mshv::emulator::MshvEmulatorContext;
21 use crate::vec_with_array_field;
22 use crate::vm::{self, InterruptSourceConfig, VmOps};
23 use crate::HypervisorType;
24 #[cfg(feature = "sev_snp")]
25 mod snp_constants;
26 // x86_64 dependencies
27 #[cfg(target_arch = "x86_64")]
28 pub mod x86_64;
29 #[cfg(target_arch = "x86_64")]
30 use std::fs::File;
31 use std::os::unix::io::AsRawFd;
32 
33 #[cfg(feature = "sev_snp")]
34 use igvm_defs::IGVM_VHS_SNP_ID_BLOCK;
35 #[cfg(feature = "sev_snp")]
36 use snp_constants::*;
37 use vmm_sys_util::eventfd::EventFd;
38 #[cfg(target_arch = "x86_64")]
39 pub use x86_64::*;
40 #[cfg(target_arch = "x86_64")]
41 pub use x86_64::{emulator, VcpuMshvState};
42 ///
43 /// Export generically-named wrappers of mshv-bindings for Unix-based platforms
44 ///
45 pub use {
46     mshv_bindings::mshv_create_device as CreateDevice,
47     mshv_bindings::mshv_device_attr as DeviceAttr, mshv_ioctls, mshv_ioctls::DeviceFd,
48 };
49 
50 #[cfg(target_arch = "x86_64")]
51 use crate::arch::x86::{CpuIdEntry, FpuState, MsrEntry};
52 #[cfg(target_arch = "x86_64")]
53 use crate::ClockData;
54 use crate::{
55     CpuState, IoEventAddress, IrqRoutingEntry, MpState, UserMemoryRegion,
56     USER_MEMORY_REGION_ADJUSTABLE, USER_MEMORY_REGION_EXECUTE, USER_MEMORY_REGION_READ,
57     USER_MEMORY_REGION_WRITE,
58 };
59 
60 pub const PAGE_SHIFT: usize = 12;
61 
62 impl From<mshv_user_mem_region> for UserMemoryRegion {
63     fn from(region: mshv_user_mem_region) -> Self {
64         let mut flags: u32 = USER_MEMORY_REGION_READ | USER_MEMORY_REGION_ADJUSTABLE;
65         if region.flags & (1 << MSHV_SET_MEM_BIT_WRITABLE) != 0 {
66             flags |= USER_MEMORY_REGION_WRITE;
67         }
68         if region.flags & (1 << MSHV_SET_MEM_BIT_EXECUTABLE) != 0 {
69             flags |= USER_MEMORY_REGION_EXECUTE;
70         }
71 
72         UserMemoryRegion {
73             guest_phys_addr: (region.guest_pfn << PAGE_SHIFT as u64)
74                 + (region.userspace_addr & ((1 << PAGE_SHIFT) - 1)),
75             memory_size: region.size,
76             userspace_addr: region.userspace_addr,
77             flags,
78             ..Default::default()
79         }
80     }
81 }
82 
83 #[cfg(target_arch = "x86_64")]
84 impl From<MshvClockData> for ClockData {
85     fn from(d: MshvClockData) -> Self {
86         ClockData::Mshv(d)
87     }
88 }
89 
90 #[cfg(target_arch = "x86_64")]
91 impl From<ClockData> for MshvClockData {
92     fn from(ms: ClockData) -> Self {
93         match ms {
94             ClockData::Mshv(s) => s,
95             /* Needed in case other hypervisors are enabled */
96             #[allow(unreachable_patterns)]
97             _ => unreachable!("MSHV clock data is not valid"),
98         }
99     }
100 }
101 
102 impl From<UserMemoryRegion> for mshv_user_mem_region {
103     fn from(region: UserMemoryRegion) -> Self {
104         let mut flags: u8 = 0;
105         if region.flags & USER_MEMORY_REGION_WRITE != 0 {
106             flags |= 1 << MSHV_SET_MEM_BIT_WRITABLE;
107         }
108         if region.flags & USER_MEMORY_REGION_EXECUTE != 0 {
109             flags |= 1 << MSHV_SET_MEM_BIT_EXECUTABLE;
110         }
111 
112         mshv_user_mem_region {
113             guest_pfn: region.guest_phys_addr >> PAGE_SHIFT,
114             size: region.memory_size,
115             userspace_addr: region.userspace_addr,
116             flags,
117             ..Default::default()
118         }
119     }
120 }
121 
122 impl From<mshv_ioctls::IoEventAddress> for IoEventAddress {
123     fn from(a: mshv_ioctls::IoEventAddress) -> Self {
124         match a {
125             mshv_ioctls::IoEventAddress::Pio(x) => Self::Pio(x),
126             mshv_ioctls::IoEventAddress::Mmio(x) => Self::Mmio(x),
127         }
128     }
129 }
130 
131 impl From<IoEventAddress> for mshv_ioctls::IoEventAddress {
132     fn from(a: IoEventAddress) -> Self {
133         match a {
134             IoEventAddress::Pio(x) => Self::Pio(x),
135             IoEventAddress::Mmio(x) => Self::Mmio(x),
136         }
137     }
138 }
139 
140 impl From<VcpuMshvState> for CpuState {
141     fn from(s: VcpuMshvState) -> Self {
142         CpuState::Mshv(s)
143     }
144 }
145 
146 impl From<CpuState> for VcpuMshvState {
147     fn from(s: CpuState) -> Self {
148         match s {
149             CpuState::Mshv(s) => s,
150             /* Needed in case other hypervisors are enabled */
151             #[allow(unreachable_patterns)]
152             _ => panic!("CpuState is not valid"),
153         }
154     }
155 }
156 
157 impl From<mshv_bindings::StandardRegisters> for crate::StandardRegisters {
158     fn from(s: mshv_bindings::StandardRegisters) -> Self {
159         crate::StandardRegisters::Mshv(s)
160     }
161 }
162 
163 impl From<crate::StandardRegisters> for mshv_bindings::StandardRegisters {
164     fn from(e: crate::StandardRegisters) -> Self {
165         match e {
166             crate::StandardRegisters::Mshv(e) => e,
167             /* Needed in case other hypervisors are enabled */
168             #[allow(unreachable_patterns)]
169             _ => panic!("StandardRegisters are not valid"),
170         }
171     }
172 }
173 
174 impl From<mshv_user_irq_entry> for IrqRoutingEntry {
175     fn from(s: mshv_user_irq_entry) -> Self {
176         IrqRoutingEntry::Mshv(s)
177     }
178 }
179 
180 impl From<IrqRoutingEntry> for mshv_user_irq_entry {
181     fn from(e: IrqRoutingEntry) -> Self {
182         match e {
183             IrqRoutingEntry::Mshv(e) => e,
184             /* Needed in case other hypervisors are enabled */
185             #[allow(unreachable_patterns)]
186             _ => panic!("IrqRoutingEntry is not valid"),
187         }
188     }
189 }
190 
191 struct MshvDirtyLogSlot {
192     guest_pfn: u64,
193     memory_size: u64,
194 }
195 
196 /// Wrapper over mshv system ioctls.
197 pub struct MshvHypervisor {
198     mshv: Mshv,
199 }
200 
201 impl MshvHypervisor {
202     #[cfg(target_arch = "x86_64")]
203     ///
204     /// Retrieve the list of MSRs supported by MSHV.
205     ///
206     fn get_msr_list(&self) -> hypervisor::Result<MsrList> {
207         self.mshv
208             .get_msr_index_list()
209             .map_err(|e| hypervisor::HypervisorError::GetMsrList(e.into()))
210     }
211 }
212 
213 impl MshvHypervisor {
214     /// Create a hypervisor based on Mshv
215     #[allow(clippy::new_ret_no_self)]
216     pub fn new() -> hypervisor::Result<Arc<dyn hypervisor::Hypervisor>> {
217         let mshv_obj =
218             Mshv::new().map_err(|e| hypervisor::HypervisorError::HypervisorCreate(e.into()))?;
219         Ok(Arc::new(MshvHypervisor { mshv: mshv_obj }))
220     }
221     /// Check if the hypervisor is available
222     pub fn is_available() -> hypervisor::Result<bool> {
223         match std::fs::metadata("/dev/mshv") {
224             Ok(_) => Ok(true),
225             Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(false),
226             Err(err) => Err(hypervisor::HypervisorError::HypervisorAvailableCheck(
227                 err.into(),
228             )),
229         }
230     }
231 }
232 
233 /// Implementation of Hypervisor trait for Mshv
234 ///
235 /// # Examples
236 ///
237 /// ```
238 /// # use hypervisor::mshv::MshvHypervisor;
239 /// # use std::sync::Arc;
240 /// let mshv = MshvHypervisor::new().unwrap();
241 /// let hypervisor = Arc::new(mshv);
242 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed");
243 /// ```
244 impl hypervisor::Hypervisor for MshvHypervisor {
245     ///
246     /// Returns the type of the hypervisor
247     ///
248     fn hypervisor_type(&self) -> HypervisorType {
249         HypervisorType::Mshv
250     }
251 
252     fn create_vm_with_type(&self, vm_type: u64) -> hypervisor::Result<Arc<dyn crate::Vm>> {
253         let mshv_vm_type: VmType = match VmType::try_from(vm_type) {
254             Ok(vm_type) => vm_type,
255             Err(_) => return Err(hypervisor::HypervisorError::UnsupportedVmType()),
256         };
257         let fd: VmFd;
258         loop {
259             match self.mshv.create_vm_with_type(mshv_vm_type) {
260                 Ok(res) => fd = res,
261                 Err(e) => {
262                     if e.errno() == libc::EINTR {
263                         // If the error returned is EINTR, which means the
264                         // ioctl has been interrupted, we have to retry as
265                         // this can't be considered as a regular error.
266                         continue;
267                     } else {
268                         return Err(hypervisor::HypervisorError::VmCreate(e.into()));
269                     }
270                 }
271             }
272             break;
273         }
274 
275         // Set additional partition property for SEV-SNP partition.
276         #[cfg(target_arch = "x86_64")]
277         if mshv_vm_type == VmType::Snp {
278             let snp_policy = snp::get_default_snp_guest_policy();
279             let vmgexit_offloads = snp::get_default_vmgexit_offload_features();
280             // SAFETY: access union fields
281             unsafe {
282                 debug!(
283                     "Setting the partition isolation policy as: 0x{:x}",
284                     snp_policy.as_uint64
285                 );
286                 fd.set_partition_property(
287                     hv_partition_property_code_HV_PARTITION_PROPERTY_ISOLATION_POLICY,
288                     snp_policy.as_uint64,
289                 )
290                 .map_err(|e| hypervisor::HypervisorError::SetPartitionProperty(e.into()))?;
291                 debug!(
292                     "Setting the partition property to enable VMGEXIT offloads as : 0x{:x}",
293                     vmgexit_offloads.as_uint64
294                 );
295                 fd.set_partition_property(
296                     hv_partition_property_code_HV_PARTITION_PROPERTY_SEV_VMGEXIT_OFFLOADS,
297                     vmgexit_offloads.as_uint64,
298                 )
299                 .map_err(|e| hypervisor::HypervisorError::SetPartitionProperty(e.into()))?;
300             }
301         }
302 
303         // Default Microsoft Hypervisor behavior for unimplemented MSR is to
304         // send a fault to the guest if it tries to access it. It is possible
305         // to override this behavior with a more suitable option i.e., ignore
306         // writes from the guest and return zero in attempt to read unimplemented
307         // MSR.
308         #[cfg(target_arch = "x86_64")]
309         fd.set_partition_property(
310             hv_partition_property_code_HV_PARTITION_PROPERTY_UNIMPLEMENTED_MSR_ACTION,
311             hv_unimplemented_msr_action_HV_UNIMPLEMENTED_MSR_ACTION_IGNORE_WRITE_READ_ZERO as u64,
312         )
313         .map_err(|e| hypervisor::HypervisorError::SetPartitionProperty(e.into()))?;
314 
315         // Always create a frozen partition
316         fd.set_partition_property(
317             hv_partition_property_code_HV_PARTITION_PROPERTY_TIME_FREEZE,
318             1u64,
319         )
320         .map_err(|e| hypervisor::HypervisorError::SetPartitionProperty(e.into()))?;
321 
322         let vm_fd = Arc::new(fd);
323 
324         #[cfg(target_arch = "x86_64")]
325         {
326             let msr_list = self.get_msr_list()?;
327             let num_msrs = msr_list.as_fam_struct_ref().nmsrs as usize;
328             let mut msrs: Vec<MsrEntry> = vec![
329                 MsrEntry {
330                     ..Default::default()
331                 };
332                 num_msrs
333             ];
334             let indices = msr_list.as_slice();
335             for (pos, index) in indices.iter().enumerate() {
336                 msrs[pos].index = *index;
337             }
338 
339             Ok(Arc::new(MshvVm {
340                 fd: vm_fd,
341                 msrs,
342                 dirty_log_slots: Arc::new(RwLock::new(HashMap::new())),
343                 #[cfg(feature = "sev_snp")]
344                 sev_snp_enabled: mshv_vm_type == VmType::Snp,
345             }))
346         }
347 
348         #[cfg(target_arch = "aarch64")]
349         {
350             Ok(Arc::new(MshvVm {
351                 fd: vm_fd,
352                 dirty_log_slots: Arc::new(RwLock::new(HashMap::new())),
353             }))
354         }
355     }
356 
357     /// Create a mshv vm object and return the object as Vm trait object
358     ///
359     /// # Examples
360     ///
361     /// ```
362     /// # extern crate hypervisor;
363     /// # use hypervisor::mshv::MshvHypervisor;
364     /// use hypervisor::mshv::MshvVm;
365     /// let hypervisor = MshvHypervisor::new().unwrap();
366     /// let vm = hypervisor.create_vm().unwrap();
367     /// ```
368     fn create_vm(&self) -> hypervisor::Result<Arc<dyn vm::Vm>> {
369         let vm_type = 0;
370         self.create_vm_with_type(vm_type)
371     }
372     #[cfg(target_arch = "x86_64")]
373     ///
374     /// Get the supported CpuID
375     ///
376     fn get_supported_cpuid(&self) -> hypervisor::Result<Vec<CpuIdEntry>> {
377         let mut cpuid = Vec::new();
378         let functions: [u32; 2] = [0x1, 0xb];
379 
380         for function in functions {
381             cpuid.push(CpuIdEntry {
382                 function,
383                 ..Default::default()
384             });
385         }
386         Ok(cpuid)
387     }
388 
389     /// Get maximum number of vCPUs
390     fn get_max_vcpus(&self) -> u32 {
391         // TODO: Using HV_MAXIMUM_PROCESSORS would be better
392         // but the ioctl API is limited to u8
393         256
394     }
395 
396     fn get_guest_debug_hw_bps(&self) -> usize {
397         0
398     }
399 }
400 
401 /// Vcpu struct for Microsoft Hypervisor
402 pub struct MshvVcpu {
403     fd: VcpuFd,
404     vp_index: u8,
405     #[cfg(target_arch = "x86_64")]
406     cpuid: Vec<CpuIdEntry>,
407     #[cfg(target_arch = "x86_64")]
408     msrs: Vec<MsrEntry>,
409     vm_ops: Option<Arc<dyn vm::VmOps>>,
410     vm_fd: Arc<VmFd>,
411 }
412 
413 /// Implementation of Vcpu trait for Microsoft Hypervisor
414 ///
415 /// # Examples
416 ///
417 /// ```
418 /// # use hypervisor::mshv::MshvHypervisor;
419 /// # use std::sync::Arc;
420 /// let mshv = MshvHypervisor::new().unwrap();
421 /// let hypervisor = Arc::new(mshv);
422 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed");
423 /// let vcpu = vm.create_vcpu(0, None).unwrap();
424 /// ```
425 impl cpu::Vcpu for MshvVcpu {
426     ///
427     /// Returns StandardRegisters with default value set
428     ///
429     #[cfg(target_arch = "x86_64")]
430     fn create_standard_regs(&self) -> crate::StandardRegisters {
431         mshv_bindings::StandardRegisters::default().into()
432     }
433     #[cfg(target_arch = "x86_64")]
434     ///
435     /// Returns the vCPU general purpose registers.
436     ///
437     fn get_regs(&self) -> cpu::Result<crate::StandardRegisters> {
438         Ok(self
439             .fd
440             .get_regs()
441             .map_err(|e| cpu::HypervisorCpuError::GetStandardRegs(e.into()))?
442             .into())
443     }
444 
445     #[cfg(target_arch = "x86_64")]
446     ///
447     /// Sets the vCPU general purpose registers.
448     ///
449     fn set_regs(&self, regs: &crate::StandardRegisters) -> cpu::Result<()> {
450         let regs = (*regs).into();
451         self.fd
452             .set_regs(&regs)
453             .map_err(|e| cpu::HypervisorCpuError::SetStandardRegs(e.into()))
454     }
455 
456     #[cfg(target_arch = "x86_64")]
457     ///
458     /// Returns the vCPU special registers.
459     ///
460     fn get_sregs(&self) -> cpu::Result<crate::arch::x86::SpecialRegisters> {
461         Ok(self
462             .fd
463             .get_sregs()
464             .map_err(|e| cpu::HypervisorCpuError::GetSpecialRegs(e.into()))?
465             .into())
466     }
467 
468     #[cfg(target_arch = "x86_64")]
469     ///
470     /// Sets the vCPU special registers.
471     ///
472     fn set_sregs(&self, sregs: &crate::arch::x86::SpecialRegisters) -> cpu::Result<()> {
473         let sregs = (*sregs).into();
474         self.fd
475             .set_sregs(&sregs)
476             .map_err(|e| cpu::HypervisorCpuError::SetSpecialRegs(e.into()))
477     }
478 
479     #[cfg(target_arch = "x86_64")]
480     ///
481     /// Returns the floating point state (FPU) from the vCPU.
482     ///
483     fn get_fpu(&self) -> cpu::Result<FpuState> {
484         Ok(self
485             .fd
486             .get_fpu()
487             .map_err(|e| cpu::HypervisorCpuError::GetFloatingPointRegs(e.into()))?
488             .into())
489     }
490 
491     #[cfg(target_arch = "x86_64")]
492     ///
493     /// Set the floating point state (FPU) of a vCPU.
494     ///
495     fn set_fpu(&self, fpu: &FpuState) -> cpu::Result<()> {
496         let fpu: mshv_bindings::FloatingPointUnit = (*fpu).clone().into();
497         self.fd
498             .set_fpu(&fpu)
499             .map_err(|e| cpu::HypervisorCpuError::SetFloatingPointRegs(e.into()))
500     }
501 
502     #[cfg(target_arch = "x86_64")]
503     ///
504     /// Returns the model-specific registers (MSR) for this vCPU.
505     ///
506     fn get_msrs(&self, msrs: &mut Vec<MsrEntry>) -> cpu::Result<usize> {
507         let mshv_msrs: Vec<msr_entry> = msrs.iter().map(|e| (*e).into()).collect();
508         let mut mshv_msrs = MsrEntries::from_entries(&mshv_msrs).unwrap();
509         let succ = self
510             .fd
511             .get_msrs(&mut mshv_msrs)
512             .map_err(|e| cpu::HypervisorCpuError::GetMsrEntries(e.into()))?;
513 
514         msrs[..succ].copy_from_slice(
515             &mshv_msrs.as_slice()[..succ]
516                 .iter()
517                 .map(|e| (*e).into())
518                 .collect::<Vec<MsrEntry>>(),
519         );
520 
521         Ok(succ)
522     }
523 
524     #[cfg(target_arch = "x86_64")]
525     ///
526     /// Setup the model-specific registers (MSR) for this vCPU.
527     /// Returns the number of MSR entries actually written.
528     ///
529     fn set_msrs(&self, msrs: &[MsrEntry]) -> cpu::Result<usize> {
530         let mshv_msrs: Vec<msr_entry> = msrs.iter().map(|e| (*e).into()).collect();
531         let mshv_msrs = MsrEntries::from_entries(&mshv_msrs).unwrap();
532         self.fd
533             .set_msrs(&mshv_msrs)
534             .map_err(|e| cpu::HypervisorCpuError::SetMsrEntries(e.into()))
535     }
536 
537     #[cfg(target_arch = "x86_64")]
538     ///
539     /// X86 specific call to enable HyperV SynIC
540     ///
541     fn enable_hyperv_synic(&self) -> cpu::Result<()> {
542         /* We always have SynIC enabled on MSHV */
543         Ok(())
544     }
545 
546     #[allow(non_upper_case_globals)]
547     fn run(&self) -> std::result::Result<cpu::VmExit, cpu::HypervisorCpuError> {
548         match self.fd.run() {
549             Ok(x) => match x.header.message_type {
550                 hv_message_type_HVMSG_X64_HALT => {
551                     debug!("HALT");
552                     Ok(cpu::VmExit::Reset)
553                 }
554                 hv_message_type_HVMSG_UNRECOVERABLE_EXCEPTION => {
555                     warn!("TRIPLE FAULT");
556                     Ok(cpu::VmExit::Shutdown)
557                 }
558                 #[cfg(target_arch = "x86_64")]
559                 hv_message_type_HVMSG_X64_IO_PORT_INTERCEPT => {
560                     let info = x.to_ioport_info().unwrap();
561                     let access_info = info.access_info;
562                     // SAFETY: access_info is valid, otherwise we won't be here
563                     let len = unsafe { access_info.__bindgen_anon_1.access_size() } as usize;
564                     let is_write = info.header.intercept_access_type == 1;
565                     let port = info.port_number;
566                     let mut data: [u8; 4] = [0; 4];
567                     let mut ret_rax = info.rax;
568 
569                     /*
570                      * XXX: Ignore QEMU fw_cfg (0x5xx) and debug console (0x402) ports.
571                      *
572                      * Cloud Hypervisor doesn't support fw_cfg at the moment. It does support 0x402
573                      * under the "fwdebug" feature flag. But that feature is not enabled by default
574                      * and is considered legacy.
575                      *
576                      * OVMF unconditionally pokes these IO ports with string IO.
577                      *
578                      * Instead of trying to implement string IO support now which does not do much
579                      * now, skip those ports explicitly to avoid panicking.
580                      *
581                      * Proper string IO support can be added once we gain the ability to translate
582                      * guest virtual addresses to guest physical addresses on MSHV.
583                      */
584                     match port {
585                         0x402 | 0x510 | 0x511 | 0x514 => {
586                             let insn_len = info.header.instruction_length() as u64;
587 
588                             /* Advance RIP and update RAX */
589                             let arr_reg_name_value = [
590                                 (
591                                     hv_register_name_HV_X64_REGISTER_RIP,
592                                     info.header.rip + insn_len,
593                                 ),
594                                 (hv_register_name_HV_X64_REGISTER_RAX, ret_rax),
595                             ];
596                             set_registers_64!(self.fd, arr_reg_name_value)
597                                 .map_err(|e| cpu::HypervisorCpuError::SetRegister(e.into()))?;
598                             return Ok(cpu::VmExit::Ignore);
599                         }
600                         _ => {}
601                     }
602 
603                     assert!(
604                         // SAFETY: access_info is valid, otherwise we won't be here
605                         (unsafe { access_info.__bindgen_anon_1.string_op() } != 1),
606                         "String IN/OUT not supported"
607                     );
608                     assert!(
609                         // SAFETY: access_info is valid, otherwise we won't be here
610                         (unsafe { access_info.__bindgen_anon_1.rep_prefix() } != 1),
611                         "Rep IN/OUT not supported"
612                     );
613 
614                     if is_write {
615                         let data = (info.rax as u32).to_le_bytes();
616                         if let Some(vm_ops) = &self.vm_ops {
617                             vm_ops
618                                 .pio_write(port.into(), &data[0..len])
619                                 .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()))?;
620                         }
621                     } else {
622                         if let Some(vm_ops) = &self.vm_ops {
623                             vm_ops
624                                 .pio_read(port.into(), &mut data[0..len])
625                                 .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()))?;
626                         }
627 
628                         let v = u32::from_le_bytes(data);
629                         /* Preserve high bits in EAX but clear out high bits in RAX */
630                         let mask = 0xffffffff >> (32 - len * 8);
631                         let eax = (info.rax as u32 & !mask) | (v & mask);
632                         ret_rax = eax as u64;
633                     }
634 
635                     let insn_len = info.header.instruction_length() as u64;
636 
637                     /* Advance RIP and update RAX */
638                     let arr_reg_name_value = [
639                         (
640                             hv_register_name_HV_X64_REGISTER_RIP,
641                             info.header.rip + insn_len,
642                         ),
643                         (hv_register_name_HV_X64_REGISTER_RAX, ret_rax),
644                     ];
645                     set_registers_64!(self.fd, arr_reg_name_value)
646                         .map_err(|e| cpu::HypervisorCpuError::SetRegister(e.into()))?;
647                     Ok(cpu::VmExit::Ignore)
648                 }
649                 #[cfg(target_arch = "x86_64")]
650                 msg_type @ (hv_message_type_HVMSG_UNMAPPED_GPA
651                 | hv_message_type_HVMSG_GPA_INTERCEPT) => {
652                     let info = x.to_memory_info().unwrap();
653                     let insn_len = info.instruction_byte_count as usize;
654                     let gva = info.guest_virtual_address;
655                     let gpa = info.guest_physical_address;
656 
657                     debug!("Exit ({:?}) GVA {:x} GPA {:x}", msg_type, gva, gpa);
658 
659                     let mut context = MshvEmulatorContext {
660                         vcpu: self,
661                         map: (gva, gpa),
662                     };
663 
664                     // Create a new emulator.
665                     let mut emul = Emulator::new(&mut context);
666 
667                     // Emulate the trapped instruction, and only the first one.
668                     let new_state = emul
669                         .emulate_first_insn(
670                             self.vp_index as usize,
671                             &info.instruction_bytes[..insn_len],
672                         )
673                         .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()))?;
674 
675                     // Set CPU state back.
676                     context
677                         .set_cpu_state(self.vp_index as usize, new_state)
678                         .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()))?;
679 
680                     Ok(cpu::VmExit::Ignore)
681                 }
682                 #[cfg(feature = "sev_snp")]
683                 hv_message_type_HVMSG_GPA_ATTRIBUTE_INTERCEPT => {
684                     let info = x.to_gpa_attribute_info().unwrap();
685                     let host_vis = info.__bindgen_anon_1.host_visibility();
686                     if host_vis >= HV_MAP_GPA_READABLE | HV_MAP_GPA_WRITABLE {
687                         warn!("Ignored attribute intercept with full host visibility");
688                         return Ok(cpu::VmExit::Ignore);
689                     }
690 
691                     let num_ranges = info.__bindgen_anon_1.range_count();
692                     assert!(num_ranges >= 1);
693                     if num_ranges > 1 {
694                         return Err(cpu::HypervisorCpuError::RunVcpu(anyhow!(
695                             "Unhandled VCPU exit(GPA_ATTRIBUTE_INTERCEPT): Expected num_ranges to be 1 but found num_ranges {:?}",
696                             num_ranges
697                         )));
698                     }
699 
700                     // TODO: we could also deny the request with HvCallCompleteIntercept
701                     let mut gpas = Vec::new();
702                     let ranges = info.ranges;
703                     let (gfn_start, gfn_count) = snp::parse_gpa_range(ranges[0]).unwrap();
704                     debug!(
705                         "Releasing pages: gfn_start: {:x?}, gfn_count: {:?}",
706                         gfn_start, gfn_count
707                     );
708                     let gpa_start = gfn_start * HV_PAGE_SIZE as u64;
709                     for i in 0..gfn_count {
710                         gpas.push(gpa_start + i * HV_PAGE_SIZE as u64);
711                     }
712 
713                     let mut gpa_list =
714                         vec_with_array_field::<mshv_modify_gpa_host_access, u64>(gpas.len());
715                     gpa_list[0].page_count = gpas.len() as u64;
716                     gpa_list[0].flags = 0;
717                     if host_vis & HV_MAP_GPA_READABLE != 0 {
718                         gpa_list[0].flags |= 1 << MSHV_GPA_HOST_ACCESS_BIT_READABLE;
719                     }
720                     if host_vis & HV_MAP_GPA_WRITABLE != 0 {
721                         gpa_list[0].flags |= 1 << MSHV_GPA_HOST_ACCESS_BIT_WRITABLE;
722                     }
723 
724                     // SAFETY: gpa_list initialized with gpas.len() and now it is being turned into
725                     // gpas_slice with gpas.len() again. It is guaranteed to be large enough to hold
726                     // everything from gpas.
727                     unsafe {
728                         let gpas_slice: &mut [u64] =
729                             gpa_list[0].guest_pfns.as_mut_slice(gpas.len());
730                         gpas_slice.copy_from_slice(gpas.as_slice());
731                     }
732 
733                     self.vm_fd
734                         .modify_gpa_host_access(&gpa_list[0])
735                         .map_err(|e| cpu::HypervisorCpuError::RunVcpu(anyhow!(
736                             "Unhandled VCPU exit: attribute intercept - couldn't modify host access {}", e
737                         )))?;
738                     Ok(cpu::VmExit::Ignore)
739                 }
740                 #[cfg(target_arch = "x86_64")]
741                 hv_message_type_HVMSG_UNACCEPTED_GPA => {
742                     let info = x.to_memory_info().unwrap();
743                     let gva = info.guest_virtual_address;
744                     let gpa = info.guest_physical_address;
745 
746                     Err(cpu::HypervisorCpuError::RunVcpu(anyhow!(
747                         "Unhandled VCPU exit: Unaccepted GPA({:x}) found at GVA({:x})",
748                         gpa,
749                         gva,
750                     )))
751                 }
752                 #[cfg(target_arch = "x86_64")]
753                 hv_message_type_HVMSG_X64_CPUID_INTERCEPT => {
754                     let info = x.to_cpuid_info().unwrap();
755                     debug!("cpuid eax: {:x}", { info.rax });
756                     Ok(cpu::VmExit::Ignore)
757                 }
758                 #[cfg(target_arch = "x86_64")]
759                 hv_message_type_HVMSG_X64_MSR_INTERCEPT => {
760                     let info = x.to_msr_info().unwrap();
761                     if info.header.intercept_access_type == 0 {
762                         debug!("msr read: {:x}", { info.msr_number });
763                     } else {
764                         debug!("msr write: {:x}", { info.msr_number });
765                     }
766                     Ok(cpu::VmExit::Ignore)
767                 }
768                 #[cfg(target_arch = "x86_64")]
769                 hv_message_type_HVMSG_X64_EXCEPTION_INTERCEPT => {
770                     //TODO: Handler for VMCALL here.
771                     let info = x.to_exception_info().unwrap();
772                     debug!("Exception Info {:?}", { info.exception_vector });
773                     Ok(cpu::VmExit::Ignore)
774                 }
775                 #[cfg(target_arch = "x86_64")]
776                 hv_message_type_HVMSG_X64_APIC_EOI => {
777                     let info = x.to_apic_eoi_info().unwrap();
778                     // The kernel should dispatch the EOI to the correct thread.
779                     // Check the VP index is the same as the one we have.
780                     assert!(info.vp_index == self.vp_index as u32);
781                     // The interrupt vector in info is u32, but x86 only supports 256 vectors.
782                     // There is no good way to recover from this if the hypervisor messes around.
783                     // Just unwrap.
784                     Ok(cpu::VmExit::IoapicEoi(
785                         info.interrupt_vector.try_into().unwrap(),
786                     ))
787                 }
788                 #[cfg(feature = "sev_snp")]
789                 hv_message_type_HVMSG_X64_SEV_VMGEXIT_INTERCEPT => {
790                     let info = x.to_vmg_intercept_info().unwrap();
791                     let ghcb_data = info.ghcb_msr >> GHCB_INFO_BIT_WIDTH;
792                     let ghcb_msr = svm_ghcb_msr {
793                         as_uint64: info.ghcb_msr,
794                     };
795                     // SAFETY: Accessing a union element from bindgen generated bindings.
796                     let ghcb_op = unsafe { ghcb_msr.__bindgen_anon_2.ghcb_info() as u32 };
797                     // Sanity check on the header fields before handling other operations.
798                     assert!(info.header.intercept_access_type == HV_INTERCEPT_ACCESS_EXECUTE as u8);
799 
800                     match ghcb_op {
801                         GHCB_INFO_HYP_FEATURE_REQUEST => {
802                             // Pre-condition: GHCB data must be zero
803                             assert!(ghcb_data == 0);
804                             let mut ghcb_response = GHCB_INFO_HYP_FEATURE_RESPONSE as u64;
805                             // Indicate support for basic SEV-SNP features
806                             ghcb_response |=
807                                 (GHCB_HYP_FEATURE_SEV_SNP << GHCB_INFO_BIT_WIDTH) as u64;
808                             // Indicate support for SEV-SNP AP creation
809                             ghcb_response |= (GHCB_HYP_FEATURE_SEV_SNP_AP_CREATION
810                                 << GHCB_INFO_BIT_WIDTH)
811                                 as u64;
812                             debug!(
813                                 "GHCB_INFO_HYP_FEATURE_REQUEST: Supported features: {:0x}",
814                                 ghcb_response
815                             );
816                             let arr_reg_name_value =
817                                 [(hv_register_name_HV_X64_REGISTER_GHCB, ghcb_response)];
818                             set_registers_64!(self.fd, arr_reg_name_value)
819                                 .map_err(|e| cpu::HypervisorCpuError::SetRegister(e.into()))?;
820                         }
821                         GHCB_INFO_REGISTER_REQUEST => {
822                             let mut ghcb_gpa = hv_x64_register_sev_ghcb::default();
823 
824                             // Disable the previously used GHCB page.
825                             self.disable_prev_ghcb_page()?;
826 
827                             // SAFETY: Accessing a union element from bindgen generated bindings.
828                             unsafe {
829                                 ghcb_gpa.__bindgen_anon_1.set_enabled(1);
830                                 ghcb_gpa
831                                     .__bindgen_anon_1
832                                     .set_page_number(ghcb_msr.__bindgen_anon_2.gpa_page_number());
833                             }
834                             // SAFETY: Accessing a union element from bindgen generated bindings.
835                             let reg_name_value = unsafe {
836                                 [(
837                                     hv_register_name_HV_X64_REGISTER_SEV_GHCB_GPA,
838                                     ghcb_gpa.as_uint64,
839                                 )]
840                             };
841 
842                             set_registers_64!(self.fd, reg_name_value)
843                                 .map_err(|e| cpu::HypervisorCpuError::SetRegister(e.into()))?;
844 
845                             let mut resp_ghcb_msr = svm_ghcb_msr::default();
846                             // SAFETY: Accessing a union element from bindgen generated bindings.
847                             unsafe {
848                                 resp_ghcb_msr
849                                     .__bindgen_anon_2
850                                     .set_ghcb_info(GHCB_INFO_REGISTER_RESPONSE as u64);
851                                 resp_ghcb_msr.__bindgen_anon_2.set_gpa_page_number(
852                                     ghcb_msr.__bindgen_anon_2.gpa_page_number(),
853                                 );
854                                 debug!("GHCB GPA is {:x}", ghcb_gpa.as_uint64);
855                             }
856                             // SAFETY: Accessing a union element from bindgen generated bindings.
857                             let reg_name_value = unsafe {
858                                 [(
859                                     hv_register_name_HV_X64_REGISTER_GHCB,
860                                     resp_ghcb_msr.as_uint64,
861                                 )]
862                             };
863 
864                             set_registers_64!(self.fd, reg_name_value)
865                                 .map_err(|e| cpu::HypervisorCpuError::SetRegister(e.into()))?;
866                         }
867                         GHCB_INFO_SEV_INFO_REQUEST => {
868                             let sev_cpuid_function = 0x8000_001F;
869                             let cpu_leaf = self
870                                 .fd
871                                 .get_cpuid_values(sev_cpuid_function, 0, 0, 0)
872                                 .unwrap();
873                             let ebx = cpu_leaf[1];
874                             // First 6-byte of EBX represents page table encryption bit number
875                             let pbit_encryption = (ebx & 0x3f) as u8;
876                             let mut ghcb_response = GHCB_INFO_SEV_INFO_RESPONSE as u64;
877 
878                             // GHCBData[63:48] specifies the maximum GHCB protocol version supported
879                             ghcb_response |= (GHCB_PROTOCOL_VERSION_MAX as u64) << 48;
880                             // GHCBData[47:32] specifies the minimum GHCB protocol version supported
881                             ghcb_response |= (GHCB_PROTOCOL_VERSION_MIN as u64) << 32;
882                             // GHCBData[31:24] specifies the SEV page table encryption bit number.
883                             ghcb_response |= (pbit_encryption as u64) << 24;
884 
885                             let arr_reg_name_value =
886                                 [(hv_register_name_HV_X64_REGISTER_GHCB, ghcb_response)];
887                             set_registers_64!(self.fd, arr_reg_name_value)
888                                 .map_err(|e| cpu::HypervisorCpuError::SetRegister(e.into()))?;
889                         }
890                         GHCB_INFO_NORMAL => {
891                             let exit_code =
892                                 info.__bindgen_anon_2.__bindgen_anon_1.sw_exit_code as u32;
893                             // SAFETY: Accessing a union element from bindgen generated bindings.
894                             let pfn = unsafe { ghcb_msr.__bindgen_anon_2.gpa_page_number() };
895                             let ghcb_gpa = pfn << GHCB_INFO_BIT_WIDTH;
896                             match exit_code {
897                                 SVM_EXITCODE_HV_DOORBELL_PAGE => {
898                                     let exit_info1 =
899                                         info.__bindgen_anon_2.__bindgen_anon_1.sw_exit_info1 as u32;
900                                     match exit_info1 {
901                                         SVM_NAE_HV_DOORBELL_PAGE_GET_PREFERRED => {
902                                             // Hypervisor does not have any preference for doorbell GPA.
903                                             let preferred_doorbell_gpa: u64 = 0xFFFFFFFFFFFFFFFF;
904                                             self.gpa_write(
905                                                 ghcb_gpa + GHCB_SW_EXITINFO2_OFFSET,
906                                                 &preferred_doorbell_gpa.to_le_bytes(),
907                                             )?;
908                                         }
909                                         SVM_NAE_HV_DOORBELL_PAGE_SET => {
910                                             let exit_info2 = info
911                                                 .__bindgen_anon_2
912                                                 .__bindgen_anon_1
913                                                 .sw_exit_info2;
914                                             let mut ghcb_doorbell_gpa =
915                                                 hv_x64_register_sev_hv_doorbell::default();
916                                             // SAFETY: Accessing a union element from bindgen generated bindings.
917                                             unsafe {
918                                                 ghcb_doorbell_gpa.__bindgen_anon_1.set_enabled(1);
919                                                 ghcb_doorbell_gpa
920                                                     .__bindgen_anon_1
921                                                     .set_page_number(exit_info2 >> PAGE_SHIFT);
922                                             }
923                                             // SAFETY: Accessing a union element from bindgen generated bindings.
924                                             let reg_names = unsafe {
925                                                 [(
926                                                     hv_register_name_HV_X64_REGISTER_SEV_DOORBELL_GPA,
927                                                     ghcb_doorbell_gpa.as_uint64,
928                                                 )]
929                                             };
930                                             set_registers_64!(self.fd, reg_names).map_err(|e| {
931                                                 cpu::HypervisorCpuError::SetRegister(e.into())
932                                             })?;
933 
934                                             self.gpa_write(
935                                                 ghcb_gpa + GHCB_SW_EXITINFO2_OFFSET,
936                                                 &exit_info2.to_le_bytes(),
937                                             )?;
938 
939                                             // Clear the SW_EXIT_INFO1 register to indicate no error
940                                             self.clear_swexit_info1(ghcb_gpa)?;
941                                         }
942                                         SVM_NAE_HV_DOORBELL_PAGE_QUERY => {
943                                             let mut reg_assocs = [ hv_register_assoc {
944                                                 name: hv_register_name_HV_X64_REGISTER_SEV_DOORBELL_GPA,
945                                                 ..Default::default()
946                                             } ];
947                                             self.fd.get_reg(&mut reg_assocs).unwrap();
948                                             // SAFETY: Accessing a union element from bindgen generated bindings.
949                                             let doorbell_gpa = unsafe { reg_assocs[0].value.reg64 };
950 
951                                             self.gpa_write(
952                                                 ghcb_gpa + GHCB_SW_EXITINFO2_OFFSET,
953                                                 &doorbell_gpa.to_le_bytes(),
954                                             )?;
955 
956                                             // Clear the SW_EXIT_INFO1 register to indicate no error
957                                             self.clear_swexit_info1(ghcb_gpa)?;
958                                         }
959                                         SVM_NAE_HV_DOORBELL_PAGE_CLEAR => {
960                                             self.gpa_write(
961                                                 ghcb_gpa + GHCB_SW_EXITINFO2_OFFSET,
962                                                 &[0; 8],
963                                             )?;
964                                         }
965                                         _ => {
966                                             panic!(
967                                                 "SVM_EXITCODE_HV_DOORBELL_PAGE: Unhandled exit code: {:0x}",
968                                                 exit_info1
969                                             );
970                                         }
971                                     }
972                                 }
973                                 SVM_EXITCODE_IOIO_PROT => {
974                                     let exit_info1 =
975                                         info.__bindgen_anon_2.__bindgen_anon_1.sw_exit_info1 as u32;
976                                     let port_info = hv_sev_vmgexit_port_info {
977                                         as_uint32: exit_info1,
978                                     };
979 
980                                     let port =
981                                         // SAFETY: Accessing a union element from bindgen generated bindings.
982                                         unsafe { port_info.__bindgen_anon_1.intercepted_port() };
983                                     let mut len = 4;
984                                     // SAFETY: Accessing a union element from bindgen generated bindings.
985                                     unsafe {
986                                         if port_info.__bindgen_anon_1.operand_size_16bit() == 1 {
987                                             len = 2;
988                                         } else if port_info.__bindgen_anon_1.operand_size_8bit()
989                                             == 1
990                                         {
991                                             len = 1;
992                                         }
993                                     }
994                                     let is_write =
995                                         // SAFETY: Accessing a union element from bindgen generated bindings.
996                                         unsafe { port_info.__bindgen_anon_1.access_type() == 0 };
997 
998                                     let mut data = [0; 8];
999                                     self.gpa_read(ghcb_gpa + GHCB_RAX_OFFSET, &mut data)?;
1000 
1001                                     if is_write {
1002                                         if let Some(vm_ops) = &self.vm_ops {
1003                                             vm_ops.pio_write(port.into(), &data[..len]).map_err(
1004                                                 |e| cpu::HypervisorCpuError::RunVcpu(e.into()),
1005                                             )?;
1006                                         }
1007                                     } else {
1008                                         if let Some(vm_ops) = &self.vm_ops {
1009                                             vm_ops
1010                                                 .pio_read(port.into(), &mut data[..len])
1011                                                 .map_err(|e| {
1012                                                     cpu::HypervisorCpuError::RunVcpu(e.into())
1013                                                 })?;
1014                                         }
1015 
1016                                         self.gpa_write(ghcb_gpa + GHCB_RAX_OFFSET, &data)?;
1017                                     }
1018 
1019                                     // Clear the SW_EXIT_INFO1 register to indicate no error
1020                                     self.clear_swexit_info1(ghcb_gpa)?;
1021                                 }
1022                                 SVM_EXITCODE_MMIO_READ => {
1023                                     let src_gpa =
1024                                         info.__bindgen_anon_2.__bindgen_anon_1.sw_exit_info1;
1025                                     let dst_gpa = info.__bindgen_anon_2.__bindgen_anon_1.sw_scratch;
1026                                     let data_len =
1027                                         info.__bindgen_anon_2.__bindgen_anon_1.sw_exit_info2
1028                                             as usize;
1029                                     // Sanity check to make sure data len is within supported range.
1030                                     assert!(data_len <= 0x8);
1031 
1032                                     let mut data: Vec<u8> = vec![0; data_len];
1033                                     if let Some(vm_ops) = &self.vm_ops {
1034                                         vm_ops.mmio_read(src_gpa, &mut data).map_err(|e| {
1035                                             cpu::HypervisorCpuError::RunVcpu(e.into())
1036                                         })?;
1037                                     }
1038 
1039                                     self.gpa_write(dst_gpa, &data)?;
1040 
1041                                     // Clear the SW_EXIT_INFO1 register to indicate no error
1042                                     self.clear_swexit_info1(ghcb_gpa)?;
1043                                 }
1044                                 SVM_EXITCODE_MMIO_WRITE => {
1045                                     let dst_gpa =
1046                                         info.__bindgen_anon_2.__bindgen_anon_1.sw_exit_info1;
1047                                     let src_gpa = info.__bindgen_anon_2.__bindgen_anon_1.sw_scratch;
1048                                     let data_len =
1049                                         info.__bindgen_anon_2.__bindgen_anon_1.sw_exit_info2
1050                                             as usize;
1051                                     // Sanity check to make sure data len is within supported range.
1052                                     assert!(data_len <= 0x8);
1053 
1054                                     let mut data = vec![0; data_len];
1055                                     self.gpa_read(src_gpa, &mut data)?;
1056 
1057                                     if let Some(vm_ops) = &self.vm_ops {
1058                                         vm_ops.mmio_write(dst_gpa, &data).map_err(|e| {
1059                                             cpu::HypervisorCpuError::RunVcpu(e.into())
1060                                         })?;
1061                                     }
1062 
1063                                     // Clear the SW_EXIT_INFO1 register to indicate no error
1064                                     self.clear_swexit_info1(ghcb_gpa)?;
1065                                 }
1066                                 SVM_EXITCODE_SNP_GUEST_REQUEST
1067                                 | SVM_EXITCODE_SNP_EXTENDED_GUEST_REQUEST => {
1068                                     if exit_code == SVM_EXITCODE_SNP_EXTENDED_GUEST_REQUEST {
1069                                         info!("Fetching extended guest request is not supported");
1070                                         // We don't support extended guest request, so we just write empty data.
1071                                         // This matches the behavior of KVM in Linux 6.11.
1072 
1073                                         // Read RAX & RBX from the GHCB.
1074                                         let mut data = [0; 8];
1075                                         self.gpa_read(ghcb_gpa + GHCB_RAX_OFFSET, &mut data)?;
1076                                         let data_gpa = u64::from_le_bytes(data);
1077                                         self.gpa_read(ghcb_gpa + GHCB_RBX_OFFSET, &mut data)?;
1078                                         let data_npages = u64::from_le_bytes(data);
1079 
1080                                         if data_npages > 0 {
1081                                             // The certificates are terminated by 24 zero bytes.
1082                                             self.gpa_write(data_gpa, &[0; 24])?;
1083                                         }
1084                                     }
1085 
1086                                     let req_gpa =
1087                                         info.__bindgen_anon_2.__bindgen_anon_1.sw_exit_info1;
1088                                     let rsp_gpa =
1089                                         info.__bindgen_anon_2.__bindgen_anon_1.sw_exit_info2;
1090 
1091                                     let mshv_psp_req =
1092                                         mshv_issue_psp_guest_request { req_gpa, rsp_gpa };
1093                                     self.vm_fd
1094                                         .psp_issue_guest_request(&mshv_psp_req)
1095                                         .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()))?;
1096 
1097                                     debug!(
1098                                         "SNP guest request: req_gpa {:0x} rsp_gpa {:0x}",
1099                                         req_gpa, rsp_gpa
1100                                     );
1101 
1102                                     self.gpa_write(ghcb_gpa + GHCB_SW_EXITINFO2_OFFSET, &[0; 8])?;
1103                                 }
1104                                 SVM_EXITCODE_SNP_AP_CREATION => {
1105                                     let vmsa_gpa =
1106                                         info.__bindgen_anon_2.__bindgen_anon_1.sw_exit_info2;
1107                                     let apic_id =
1108                                         info.__bindgen_anon_2.__bindgen_anon_1.sw_exit_info1 >> 32;
1109                                     debug!(
1110                                         "SNP AP CREATE REQUEST with VMSA GPA {:0x}, and APIC ID {:?}",
1111                                         vmsa_gpa, apic_id
1112                                     );
1113 
1114                                     let mshv_ap_create_req = mshv_sev_snp_ap_create {
1115                                         vp_id: apic_id,
1116                                         vmsa_gpa,
1117                                     };
1118                                     self.vm_fd
1119                                         .sev_snp_ap_create(&mshv_ap_create_req)
1120                                         .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()))?;
1121 
1122                                     // Clear the SW_EXIT_INFO1 register to indicate no error
1123                                     self.clear_swexit_info1(ghcb_gpa)?;
1124                                 }
1125                                 _ => panic!(
1126                                     "GHCB_INFO_NORMAL: Unhandled exit code: {:0x}",
1127                                     exit_code
1128                                 ),
1129                             }
1130                         }
1131                         _ => panic!("Unsupported VMGEXIT operation: {:0x}", ghcb_op),
1132                     }
1133 
1134                     Ok(cpu::VmExit::Ignore)
1135                 }
1136                 exit => Err(cpu::HypervisorCpuError::RunVcpu(anyhow!(
1137                     "Unhandled VCPU exit {:?}",
1138                     exit
1139                 ))),
1140             },
1141 
1142             Err(e) => match e.errno() {
1143                 libc::EAGAIN | libc::EINTR => Ok(cpu::VmExit::Ignore),
1144                 _ => Err(cpu::HypervisorCpuError::RunVcpu(anyhow!(
1145                     "VCPU error {:?}",
1146                     e
1147                 ))),
1148             },
1149         }
1150     }
1151 
1152     #[cfg(target_arch = "aarch64")]
1153     fn init_pmu(&self, irq: u32) -> cpu::Result<()> {
1154         unimplemented!()
1155     }
1156 
1157     #[cfg(target_arch = "aarch64")]
1158     fn has_pmu_support(&self) -> bool {
1159         unimplemented!()
1160     }
1161 
1162     #[cfg(target_arch = "aarch64")]
1163     fn setup_regs(&self, cpu_id: u8, boot_ip: u64, fdt_start: u64) -> cpu::Result<()> {
1164         unimplemented!()
1165     }
1166 
1167     #[cfg(target_arch = "aarch64")]
1168     fn get_sys_reg(&self, sys_reg: u32) -> cpu::Result<u64> {
1169         unimplemented!()
1170     }
1171 
1172     #[cfg(target_arch = "aarch64")]
1173     fn get_reg_list(&self, reg_list: &mut RegList) -> cpu::Result<()> {
1174         unimplemented!()
1175     }
1176 
1177     #[cfg(target_arch = "aarch64")]
1178     fn vcpu_init(&self, kvi: &VcpuInit) -> cpu::Result<()> {
1179         unimplemented!()
1180     }
1181 
1182     #[cfg(target_arch = "aarch64")]
1183     fn set_regs(&self, regs: &StandardRegisters) -> cpu::Result<()> {
1184         unimplemented!()
1185     }
1186 
1187     #[cfg(target_arch = "aarch64")]
1188     fn get_regs(&self) -> cpu::Result<StandardRegisters> {
1189         unimplemented!()
1190     }
1191 
1192     #[cfg(target_arch = "x86_64")]
1193     ///
1194     /// X86 specific call to setup the CPUID registers.
1195     ///
1196     fn set_cpuid2(&self, cpuid: &[CpuIdEntry]) -> cpu::Result<()> {
1197         let cpuid: Vec<mshv_bindings::hv_cpuid_entry> = cpuid.iter().map(|e| (*e).into()).collect();
1198         let mshv_cpuid = <CpuId>::from_entries(&cpuid)
1199             .map_err(|_| cpu::HypervisorCpuError::SetCpuid(anyhow!("failed to create CpuId")))?;
1200 
1201         self.fd
1202             .register_intercept_result_cpuid(&mshv_cpuid)
1203             .map_err(|e| cpu::HypervisorCpuError::SetCpuid(e.into()))
1204     }
1205 
1206     #[cfg(target_arch = "x86_64")]
1207     ///
1208     /// X86 specific call to retrieve the CPUID registers.
1209     ///
1210     fn get_cpuid2(&self, _num_entries: usize) -> cpu::Result<Vec<CpuIdEntry>> {
1211         Ok(self.cpuid.clone())
1212     }
1213 
1214     #[cfg(target_arch = "x86_64")]
1215     ///
1216     /// X86 specific call to retrieve cpuid leaf
1217     ///
1218     fn get_cpuid_values(
1219         &self,
1220         function: u32,
1221         index: u32,
1222         xfem: u64,
1223         xss: u64,
1224     ) -> cpu::Result<[u32; 4]> {
1225         self.fd
1226             .get_cpuid_values(function, index, xfem, xss)
1227             .map_err(|e| cpu::HypervisorCpuError::GetCpuidVales(e.into()))
1228     }
1229 
1230     #[cfg(target_arch = "x86_64")]
1231     ///
1232     /// Returns the state of the LAPIC (Local Advanced Programmable Interrupt Controller).
1233     ///
1234     fn get_lapic(&self) -> cpu::Result<crate::arch::x86::LapicState> {
1235         Ok(self
1236             .fd
1237             .get_lapic()
1238             .map_err(|e| cpu::HypervisorCpuError::GetlapicState(e.into()))?
1239             .into())
1240     }
1241 
1242     #[cfg(target_arch = "x86_64")]
1243     ///
1244     /// Sets the state of the LAPIC (Local Advanced Programmable Interrupt Controller).
1245     ///
1246     fn set_lapic(&self, lapic: &crate::arch::x86::LapicState) -> cpu::Result<()> {
1247         let lapic: mshv_bindings::LapicState = (*lapic).clone().into();
1248         self.fd
1249             .set_lapic(&lapic)
1250             .map_err(|e| cpu::HypervisorCpuError::SetLapicState(e.into()))
1251     }
1252 
1253     ///
1254     /// Returns the vcpu's current "multiprocessing state".
1255     ///
1256     fn get_mp_state(&self) -> cpu::Result<MpState> {
1257         Ok(MpState::Mshv)
1258     }
1259 
1260     ///
1261     /// Sets the vcpu's current "multiprocessing state".
1262     ///
1263     fn set_mp_state(&self, _mp_state: MpState) -> cpu::Result<()> {
1264         Ok(())
1265     }
1266 
1267     #[cfg(target_arch = "x86_64")]
1268     ///
1269     /// Set CPU state for x86_64 guest.
1270     ///
1271     fn set_state(&self, state: &CpuState) -> cpu::Result<()> {
1272         let mut state: VcpuMshvState = state.clone().into();
1273         self.set_msrs(&state.msrs)?;
1274         self.set_vcpu_events(&state.vcpu_events)?;
1275         self.set_regs(&state.regs.into())?;
1276         self.set_sregs(&state.sregs.into())?;
1277         self.set_fpu(&state.fpu)?;
1278         self.set_xcrs(&state.xcrs)?;
1279         // These registers are global and needed to be set only for first VCPU
1280         // as Microsoft Hypervisor allows setting this register for only one VCPU
1281         if self.vp_index == 0 {
1282             self.fd
1283                 .set_misc_regs(&state.misc)
1284                 .map_err(|e| cpu::HypervisorCpuError::SetMiscRegs(e.into()))?
1285         }
1286         self.fd
1287             .set_debug_regs(&state.dbg)
1288             .map_err(|e| cpu::HypervisorCpuError::SetDebugRegs(e.into()))?;
1289         self.fd
1290             .set_all_vp_state_components(&mut state.vp_states)
1291             .map_err(|e| cpu::HypervisorCpuError::SetAllVpStateComponents(e.into()))?;
1292         Ok(())
1293     }
1294 
1295     #[cfg(target_arch = "aarch64")]
1296     ///
1297     /// Set CPU state for aarch64 guest.
1298     ///
1299     fn set_state(&self, state: &CpuState) -> cpu::Result<()> {
1300         unimplemented!()
1301     }
1302 
1303     #[cfg(target_arch = "x86_64")]
1304     ///
1305     /// Get CPU State for x86_64 guest
1306     ///
1307     fn state(&self) -> cpu::Result<CpuState> {
1308         let regs = self.get_regs()?;
1309         let sregs = self.get_sregs()?;
1310         let xcrs = self.get_xcrs()?;
1311         let fpu = self.get_fpu()?;
1312         let vcpu_events = self.get_vcpu_events()?;
1313         let mut msrs = self.msrs.clone();
1314         self.get_msrs(&mut msrs)?;
1315         let misc = self
1316             .fd
1317             .get_misc_regs()
1318             .map_err(|e| cpu::HypervisorCpuError::GetMiscRegs(e.into()))?;
1319         let dbg = self
1320             .fd
1321             .get_debug_regs()
1322             .map_err(|e| cpu::HypervisorCpuError::GetDebugRegs(e.into()))?;
1323         let vp_states = self
1324             .fd
1325             .get_all_vp_state_components()
1326             .map_err(|e| cpu::HypervisorCpuError::GetAllVpStateComponents(e.into()))?;
1327 
1328         Ok(VcpuMshvState {
1329             msrs,
1330             vcpu_events,
1331             regs: regs.into(),
1332             sregs: sregs.into(),
1333             fpu,
1334             xcrs,
1335             dbg,
1336             misc,
1337             vp_states,
1338         }
1339         .into())
1340     }
1341 
1342     #[cfg(target_arch = "aarch64")]
1343     ///
1344     /// Get CPU state for aarch64 guest.
1345     ///
1346     fn state(&self) -> cpu::Result<CpuState> {
1347         unimplemented!()
1348     }
1349 
1350     #[cfg(target_arch = "x86_64")]
1351     ///
1352     /// Translate guest virtual address to guest physical address
1353     ///
1354     fn translate_gva(&self, gva: u64, flags: u64) -> cpu::Result<(u64, u32)> {
1355         let r = self
1356             .fd
1357             .translate_gva(gva, flags)
1358             .map_err(|e| cpu::HypervisorCpuError::TranslateVirtualAddress(e.into()))?;
1359 
1360         let gpa = r.0;
1361         // SAFETY: r is valid, otherwise this function will have returned
1362         let result_code = unsafe { r.1.__bindgen_anon_1.result_code };
1363 
1364         Ok((gpa, result_code))
1365     }
1366 
1367     #[cfg(target_arch = "x86_64")]
1368     ///
1369     /// Return the list of initial MSR entries for a VCPU
1370     ///
1371     fn boot_msr_entries(&self) -> Vec<MsrEntry> {
1372         use crate::arch::x86::{msr_index, MTRR_ENABLE, MTRR_MEM_TYPE_WB};
1373 
1374         [
1375             msr!(msr_index::MSR_IA32_SYSENTER_CS),
1376             msr!(msr_index::MSR_IA32_SYSENTER_ESP),
1377             msr!(msr_index::MSR_IA32_SYSENTER_EIP),
1378             msr!(msr_index::MSR_STAR),
1379             msr!(msr_index::MSR_CSTAR),
1380             msr!(msr_index::MSR_LSTAR),
1381             msr!(msr_index::MSR_KERNEL_GS_BASE),
1382             msr!(msr_index::MSR_SYSCALL_MASK),
1383             msr_data!(msr_index::MSR_MTRRdefType, MTRR_ENABLE | MTRR_MEM_TYPE_WB),
1384         ]
1385         .to_vec()
1386     }
1387 
1388     ///
1389     /// Sets the AMD specific vcpu's sev control register.
1390     ///
1391     #[cfg(feature = "sev_snp")]
1392     fn set_sev_control_register(&self, vmsa_pfn: u64) -> cpu::Result<()> {
1393         let sev_control_reg = snp::get_sev_control_register(vmsa_pfn);
1394 
1395         self.fd
1396             .set_sev_control_register(sev_control_reg)
1397             .map_err(|e| cpu::HypervisorCpuError::SetSevControlRegister(e.into()))
1398     }
1399     #[cfg(target_arch = "x86_64")]
1400     ///
1401     /// Trigger NMI interrupt
1402     ///
1403     fn nmi(&self) -> cpu::Result<()> {
1404         let cfg = InterruptRequest {
1405             interrupt_type: hv_interrupt_type_HV_X64_INTERRUPT_TYPE_NMI,
1406             apic_id: self.vp_index as u64,
1407             level_triggered: false,
1408             vector: 0,
1409             logical_destination_mode: false,
1410             long_mode: false,
1411         };
1412         self.vm_fd
1413             .request_virtual_interrupt(&cfg)
1414             .map_err(|e| cpu::HypervisorCpuError::Nmi(e.into()))
1415     }
1416 }
1417 
1418 impl MshvVcpu {
1419     ///
1420     /// Deactivate previously used GHCB page.
1421     ///
1422     #[cfg(feature = "sev_snp")]
1423     fn disable_prev_ghcb_page(&self) -> cpu::Result<()> {
1424         let mut reg_assocs = [hv_register_assoc {
1425             name: hv_register_name_HV_X64_REGISTER_SEV_GHCB_GPA,
1426             ..Default::default()
1427         }];
1428         self.fd.get_reg(&mut reg_assocs).unwrap();
1429         // SAFETY: Accessing a union element from bindgen generated bindings.
1430         let prev_ghcb_gpa = unsafe { reg_assocs[0].value.reg64 };
1431 
1432         debug!("Prev GHCB GPA is {:x}", prev_ghcb_gpa);
1433 
1434         let mut ghcb_gpa = hv_x64_register_sev_ghcb::default();
1435 
1436         // SAFETY: Accessing a union element from bindgen generated bindings.
1437         unsafe {
1438             ghcb_gpa.__bindgen_anon_1.set_enabled(0);
1439             ghcb_gpa.__bindgen_anon_1.set_page_number(prev_ghcb_gpa);
1440         }
1441 
1442         // SAFETY: Accessing a union element from bindgen generated bindings.
1443         let reg_name_value = unsafe {
1444             [(
1445                 hv_register_name_HV_X64_REGISTER_SEV_GHCB_GPA,
1446                 ghcb_gpa.as_uint64,
1447             )]
1448         };
1449 
1450         set_registers_64!(self.fd, reg_name_value)
1451             .map_err(|e| cpu::HypervisorCpuError::SetRegister(e.into()))?;
1452 
1453         Ok(())
1454     }
1455     #[cfg(target_arch = "x86_64")]
1456     ///
1457     /// X86 specific call that returns the vcpu's current "xcrs".
1458     ///
1459     fn get_xcrs(&self) -> cpu::Result<ExtendedControlRegisters> {
1460         self.fd
1461             .get_xcrs()
1462             .map_err(|e| cpu::HypervisorCpuError::GetXcsr(e.into()))
1463     }
1464 
1465     #[cfg(target_arch = "x86_64")]
1466     ///
1467     /// X86 specific call that sets the vcpu's current "xcrs".
1468     ///
1469     fn set_xcrs(&self, xcrs: &ExtendedControlRegisters) -> cpu::Result<()> {
1470         self.fd
1471             .set_xcrs(xcrs)
1472             .map_err(|e| cpu::HypervisorCpuError::SetXcsr(e.into()))
1473     }
1474 
1475     #[cfg(target_arch = "x86_64")]
1476     ///
1477     /// Returns currently pending exceptions, interrupts, and NMIs as well as related
1478     /// states of the vcpu.
1479     ///
1480     fn get_vcpu_events(&self) -> cpu::Result<VcpuEvents> {
1481         self.fd
1482             .get_vcpu_events()
1483             .map_err(|e| cpu::HypervisorCpuError::GetVcpuEvents(e.into()))
1484     }
1485 
1486     #[cfg(target_arch = "x86_64")]
1487     ///
1488     /// Sets pending exceptions, interrupts, and NMIs as well as related states
1489     /// of the vcpu.
1490     ///
1491     fn set_vcpu_events(&self, events: &VcpuEvents) -> cpu::Result<()> {
1492         self.fd
1493             .set_vcpu_events(events)
1494             .map_err(|e| cpu::HypervisorCpuError::SetVcpuEvents(e.into()))
1495     }
1496 
1497     ///
1498     /// Clear SW_EXIT_INFO1 register for SEV-SNP guests.
1499     ///
1500     #[cfg(feature = "sev_snp")]
1501     fn clear_swexit_info1(
1502         &self,
1503         ghcb_gpa: u64,
1504     ) -> std::result::Result<cpu::VmExit, cpu::HypervisorCpuError> {
1505         // Clear the SW_EXIT_INFO1 register to indicate no error
1506         self.gpa_write(ghcb_gpa + GHCB_SW_EXITINFO1_OFFSET, &[0; 4])?;
1507 
1508         Ok(cpu::VmExit::Ignore)
1509     }
1510 
1511     #[cfg(feature = "sev_snp")]
1512     fn gpa_read(&self, gpa: u64, data: &mut [u8]) -> cpu::Result<()> {
1513         for (gpa, chunk) in (gpa..)
1514             .step_by(HV_READ_WRITE_GPA_MAX_SIZE as usize)
1515             .zip(data.chunks_mut(HV_READ_WRITE_GPA_MAX_SIZE as usize))
1516         {
1517             let mut rw_gpa_arg = mshv_bindings::mshv_read_write_gpa {
1518                 base_gpa: gpa,
1519                 byte_count: chunk.len() as u32,
1520                 ..Default::default()
1521             };
1522             self.fd
1523                 .gpa_read(&mut rw_gpa_arg)
1524                 .map_err(|e| cpu::HypervisorCpuError::GpaRead(e.into()))?;
1525 
1526             chunk.copy_from_slice(&rw_gpa_arg.data[..chunk.len()]);
1527         }
1528 
1529         Ok(())
1530     }
1531 
1532     #[cfg(feature = "sev_snp")]
1533     fn gpa_write(&self, gpa: u64, data: &[u8]) -> cpu::Result<()> {
1534         for (gpa, chunk) in (gpa..)
1535             .step_by(HV_READ_WRITE_GPA_MAX_SIZE as usize)
1536             .zip(data.chunks(HV_READ_WRITE_GPA_MAX_SIZE as usize))
1537         {
1538             let mut data = [0; HV_READ_WRITE_GPA_MAX_SIZE as usize];
1539             data[..chunk.len()].copy_from_slice(chunk);
1540 
1541             let mut rw_gpa_arg = mshv_bindings::mshv_read_write_gpa {
1542                 base_gpa: gpa,
1543                 byte_count: chunk.len() as u32,
1544                 data,
1545                 ..Default::default()
1546             };
1547             self.fd
1548                 .gpa_write(&mut rw_gpa_arg)
1549                 .map_err(|e| cpu::HypervisorCpuError::GpaWrite(e.into()))?;
1550         }
1551 
1552         Ok(())
1553     }
1554 }
1555 
1556 /// Wrapper over Mshv VM ioctls.
1557 pub struct MshvVm {
1558     fd: Arc<VmFd>,
1559     #[cfg(target_arch = "x86_64")]
1560     msrs: Vec<MsrEntry>,
1561     dirty_log_slots: Arc<RwLock<HashMap<u64, MshvDirtyLogSlot>>>,
1562     #[cfg(feature = "sev_snp")]
1563     sev_snp_enabled: bool,
1564 }
1565 
1566 impl MshvVm {
1567     ///
1568     /// Creates an in-kernel device.
1569     ///
1570     /// See the documentation for `MSHV_CREATE_DEVICE`.
1571     fn create_device(&self, device: &mut CreateDevice) -> vm::Result<VfioDeviceFd> {
1572         let device_fd = self
1573             .fd
1574             .create_device(device)
1575             .map_err(|e| vm::HypervisorVmError::CreateDevice(e.into()))?;
1576         Ok(VfioDeviceFd::new_from_mshv(device_fd))
1577     }
1578 }
1579 
1580 ///
1581 /// Implementation of Vm trait for Mshv
1582 ///
1583 /// # Examples
1584 ///
1585 /// ```
1586 /// # extern crate hypervisor;
1587 /// # use hypervisor::mshv::MshvHypervisor;
1588 /// # use std::sync::Arc;
1589 /// let mshv = MshvHypervisor::new().unwrap();
1590 /// let hypervisor = Arc::new(mshv);
1591 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed");
1592 /// ```
1593 impl vm::Vm for MshvVm {
1594     #[cfg(target_arch = "x86_64")]
1595     ///
1596     /// Sets the address of the one-page region in the VM's address space.
1597     ///
1598     fn set_identity_map_address(&self, _address: u64) -> vm::Result<()> {
1599         Ok(())
1600     }
1601 
1602     #[cfg(target_arch = "x86_64")]
1603     ///
1604     /// Sets the address of the three-page region in the VM's address space.
1605     ///
1606     fn set_tss_address(&self, _offset: usize) -> vm::Result<()> {
1607         Ok(())
1608     }
1609 
1610     ///
1611     /// Creates an in-kernel interrupt controller.
1612     ///
1613     fn create_irq_chip(&self) -> vm::Result<()> {
1614         Ok(())
1615     }
1616 
1617     ///
1618     /// Registers an event that will, when signaled, trigger the `gsi` IRQ.
1619     ///
1620     fn register_irqfd(&self, fd: &EventFd, gsi: u32) -> vm::Result<()> {
1621         debug!("register_irqfd fd {} gsi {}", fd.as_raw_fd(), gsi);
1622 
1623         self.fd
1624             .register_irqfd(fd, gsi)
1625             .map_err(|e| vm::HypervisorVmError::RegisterIrqFd(e.into()))?;
1626 
1627         Ok(())
1628     }
1629 
1630     ///
1631     /// Unregisters an event that will, when signaled, trigger the `gsi` IRQ.
1632     ///
1633     fn unregister_irqfd(&self, fd: &EventFd, gsi: u32) -> vm::Result<()> {
1634         debug!("unregister_irqfd fd {} gsi {}", fd.as_raw_fd(), gsi);
1635 
1636         self.fd
1637             .unregister_irqfd(fd, gsi)
1638             .map_err(|e| vm::HypervisorVmError::UnregisterIrqFd(e.into()))?;
1639 
1640         Ok(())
1641     }
1642 
1643     ///
1644     /// Creates a VcpuFd object from a vcpu RawFd.
1645     ///
1646     fn create_vcpu(
1647         &self,
1648         id: u8,
1649         vm_ops: Option<Arc<dyn VmOps>>,
1650     ) -> vm::Result<Arc<dyn cpu::Vcpu>> {
1651         let vcpu_fd = self
1652             .fd
1653             .create_vcpu(id)
1654             .map_err(|e| vm::HypervisorVmError::CreateVcpu(e.into()))?;
1655         let vcpu = MshvVcpu {
1656             fd: vcpu_fd,
1657             vp_index: id,
1658             #[cfg(target_arch = "x86_64")]
1659             cpuid: Vec::new(),
1660             #[cfg(target_arch = "x86_64")]
1661             msrs: self.msrs.clone(),
1662             vm_ops,
1663             vm_fd: self.fd.clone(),
1664         };
1665         Ok(Arc::new(vcpu))
1666     }
1667 
1668     #[cfg(target_arch = "x86_64")]
1669     fn enable_split_irq(&self) -> vm::Result<()> {
1670         Ok(())
1671     }
1672 
1673     #[cfg(target_arch = "x86_64")]
1674     fn enable_sgx_attribute(&self, _file: File) -> vm::Result<()> {
1675         Ok(())
1676     }
1677 
1678     fn register_ioevent(
1679         &self,
1680         fd: &EventFd,
1681         addr: &IoEventAddress,
1682         datamatch: Option<DataMatch>,
1683     ) -> vm::Result<()> {
1684         #[cfg(feature = "sev_snp")]
1685         if self.sev_snp_enabled {
1686             return Ok(());
1687         }
1688 
1689         let addr = &mshv_ioctls::IoEventAddress::from(*addr);
1690         debug!(
1691             "register_ioevent fd {} addr {:x?} datamatch {:?}",
1692             fd.as_raw_fd(),
1693             addr,
1694             datamatch
1695         );
1696         if let Some(dm) = datamatch {
1697             match dm {
1698                 vm::DataMatch::DataMatch32(mshv_dm32) => self
1699                     .fd
1700                     .register_ioevent(fd, addr, mshv_dm32)
1701                     .map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into())),
1702                 vm::DataMatch::DataMatch64(mshv_dm64) => self
1703                     .fd
1704                     .register_ioevent(fd, addr, mshv_dm64)
1705                     .map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into())),
1706             }
1707         } else {
1708             self.fd
1709                 .register_ioevent(fd, addr, NoDatamatch)
1710                 .map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into()))
1711         }
1712     }
1713 
1714     /// Unregister an event from a certain address it has been previously registered to.
1715     fn unregister_ioevent(&self, fd: &EventFd, addr: &IoEventAddress) -> vm::Result<()> {
1716         #[cfg(feature = "sev_snp")]
1717         if self.sev_snp_enabled {
1718             return Ok(());
1719         }
1720 
1721         let addr = &mshv_ioctls::IoEventAddress::from(*addr);
1722         debug!("unregister_ioevent fd {} addr {:x?}", fd.as_raw_fd(), addr);
1723 
1724         self.fd
1725             .unregister_ioevent(fd, addr, NoDatamatch)
1726             .map_err(|e| vm::HypervisorVmError::UnregisterIoEvent(e.into()))
1727     }
1728 
1729     /// Creates a guest physical memory region.
1730     fn create_user_memory_region(&self, user_memory_region: UserMemoryRegion) -> vm::Result<()> {
1731         let user_memory_region: mshv_user_mem_region = user_memory_region.into();
1732         // No matter read only or not we keep track the slots.
1733         // For readonly hypervisor can enable the dirty bits,
1734         // but a VM exit happens before setting the dirty bits
1735         self.dirty_log_slots.write().unwrap().insert(
1736             user_memory_region.guest_pfn,
1737             MshvDirtyLogSlot {
1738                 guest_pfn: user_memory_region.guest_pfn,
1739                 memory_size: user_memory_region.size,
1740             },
1741         );
1742 
1743         self.fd
1744             .map_user_memory(user_memory_region)
1745             .map_err(|e| vm::HypervisorVmError::CreateUserMemory(e.into()))?;
1746         Ok(())
1747     }
1748 
1749     /// Removes a guest physical memory region.
1750     fn remove_user_memory_region(&self, user_memory_region: UserMemoryRegion) -> vm::Result<()> {
1751         let user_memory_region: mshv_user_mem_region = user_memory_region.into();
1752         // Remove the corresponding entry from "self.dirty_log_slots" if needed
1753         self.dirty_log_slots
1754             .write()
1755             .unwrap()
1756             .remove(&user_memory_region.guest_pfn);
1757 
1758         self.fd
1759             .unmap_user_memory(user_memory_region)
1760             .map_err(|e| vm::HypervisorVmError::RemoveUserMemory(e.into()))?;
1761         Ok(())
1762     }
1763 
1764     fn make_user_memory_region(
1765         &self,
1766         _slot: u32,
1767         guest_phys_addr: u64,
1768         memory_size: u64,
1769         userspace_addr: u64,
1770         readonly: bool,
1771         _log_dirty_pages: bool,
1772     ) -> UserMemoryRegion {
1773         let mut flags = 1 << MSHV_SET_MEM_BIT_EXECUTABLE;
1774         if !readonly {
1775             flags |= 1 << MSHV_SET_MEM_BIT_WRITABLE;
1776         }
1777 
1778         mshv_user_mem_region {
1779             flags,
1780             guest_pfn: guest_phys_addr >> PAGE_SHIFT,
1781             size: memory_size,
1782             userspace_addr,
1783             ..Default::default()
1784         }
1785         .into()
1786     }
1787 
1788     fn create_passthrough_device(&self) -> vm::Result<VfioDeviceFd> {
1789         let mut vfio_dev = mshv_create_device {
1790             type_: mshv_device_type_MSHV_DEV_TYPE_VFIO,
1791             fd: 0,
1792             flags: 0,
1793         };
1794 
1795         self.create_device(&mut vfio_dev)
1796             .map_err(|e| vm::HypervisorVmError::CreatePassthroughDevice(e.into()))
1797     }
1798 
1799     ///
1800     /// Constructs a routing entry
1801     ///
1802     fn make_routing_entry(&self, gsi: u32, config: &InterruptSourceConfig) -> IrqRoutingEntry {
1803         match config {
1804             InterruptSourceConfig::MsiIrq(cfg) => mshv_user_irq_entry {
1805                 gsi,
1806                 address_lo: cfg.low_addr,
1807                 address_hi: cfg.high_addr,
1808                 data: cfg.data,
1809             }
1810             .into(),
1811             _ => {
1812                 unreachable!()
1813             }
1814         }
1815     }
1816 
1817     fn set_gsi_routing(&self, entries: &[IrqRoutingEntry]) -> vm::Result<()> {
1818         let mut msi_routing =
1819             vec_with_array_field::<mshv_user_irq_table, mshv_user_irq_entry>(entries.len());
1820         msi_routing[0].nr = entries.len() as u32;
1821 
1822         let entries: Vec<mshv_user_irq_entry> = entries
1823             .iter()
1824             .map(|entry| match entry {
1825                 IrqRoutingEntry::Mshv(e) => *e,
1826                 #[allow(unreachable_patterns)]
1827                 _ => panic!("IrqRoutingEntry type is wrong"),
1828             })
1829             .collect();
1830 
1831         // SAFETY: msi_routing initialized with entries.len() and now it is being turned into
1832         // entries_slice with entries.len() again. It is guaranteed to be large enough to hold
1833         // everything from entries.
1834         unsafe {
1835             let entries_slice: &mut [mshv_user_irq_entry] =
1836                 msi_routing[0].entries.as_mut_slice(entries.len());
1837             entries_slice.copy_from_slice(&entries);
1838         }
1839 
1840         self.fd
1841             .set_msi_routing(&msi_routing[0])
1842             .map_err(|e| vm::HypervisorVmError::SetGsiRouting(e.into()))
1843     }
1844 
1845     ///
1846     /// Start logging dirty pages
1847     ///
1848     fn start_dirty_log(&self) -> vm::Result<()> {
1849         self.fd
1850             .enable_dirty_page_tracking()
1851             .map_err(|e| vm::HypervisorVmError::StartDirtyLog(e.into()))
1852     }
1853 
1854     ///
1855     /// Stop logging dirty pages
1856     ///
1857     fn stop_dirty_log(&self) -> vm::Result<()> {
1858         let dirty_log_slots = self.dirty_log_slots.read().unwrap();
1859         // Before disabling the dirty page tracking we need
1860         // to set the dirty bits in the Hypervisor
1861         // This is a requirement from Microsoft Hypervisor
1862         for (_, s) in dirty_log_slots.iter() {
1863             self.fd
1864                 .get_dirty_log(
1865                     s.guest_pfn,
1866                     s.memory_size as usize,
1867                     MSHV_GPAP_ACCESS_OP_SET as u8,
1868                 )
1869                 .map_err(|e| vm::HypervisorVmError::StartDirtyLog(e.into()))?;
1870         }
1871         self.fd
1872             .disable_dirty_page_tracking()
1873             .map_err(|e| vm::HypervisorVmError::StartDirtyLog(e.into()))?;
1874         Ok(())
1875     }
1876 
1877     ///
1878     /// Get dirty pages bitmap (one bit per page)
1879     ///
1880     fn get_dirty_log(&self, _slot: u32, base_gpa: u64, memory_size: u64) -> vm::Result<Vec<u64>> {
1881         self.fd
1882             .get_dirty_log(
1883                 base_gpa >> PAGE_SHIFT,
1884                 memory_size as usize,
1885                 MSHV_GPAP_ACCESS_OP_CLEAR as u8,
1886             )
1887             .map_err(|e| vm::HypervisorVmError::GetDirtyLog(e.into()))
1888     }
1889 
1890     /// Retrieve guest clock.
1891     #[cfg(target_arch = "x86_64")]
1892     fn get_clock(&self) -> vm::Result<ClockData> {
1893         let val = self
1894             .fd
1895             .get_partition_property(hv_partition_property_code_HV_PARTITION_PROPERTY_REFERENCE_TIME)
1896             .map_err(|e| vm::HypervisorVmError::GetClock(e.into()))?;
1897         Ok(MshvClockData { ref_time: val }.into())
1898     }
1899 
1900     /// Set guest clock.
1901     #[cfg(target_arch = "x86_64")]
1902     fn set_clock(&self, data: &ClockData) -> vm::Result<()> {
1903         let data: MshvClockData = (*data).into();
1904         self.fd
1905             .set_partition_property(
1906                 hv_partition_property_code_HV_PARTITION_PROPERTY_REFERENCE_TIME,
1907                 data.ref_time,
1908             )
1909             .map_err(|e| vm::HypervisorVmError::SetClock(e.into()))
1910     }
1911 
1912     /// Downcast to the underlying MshvVm type
1913     fn as_any(&self) -> &dyn Any {
1914         self
1915     }
1916 
1917     /// Initialize the SEV-SNP VM
1918     #[cfg(feature = "sev_snp")]
1919     fn sev_snp_init(&self) -> vm::Result<()> {
1920         self.fd
1921             .set_partition_property(
1922                 hv_partition_property_code_HV_PARTITION_PROPERTY_ISOLATION_STATE,
1923                 hv_partition_isolation_state_HV_PARTITION_ISOLATION_SECURE as u64,
1924             )
1925             .map_err(|e| vm::HypervisorVmError::InitializeSevSnp(e.into()))
1926     }
1927 
1928     ///
1929     /// Importing isolated pages, these pages will be used
1930     /// for the PSP(Platform Security Processor) measurement.
1931     #[cfg(feature = "sev_snp")]
1932     fn import_isolated_pages(
1933         &self,
1934         page_type: u32,
1935         page_size: u32,
1936         pages: &[u64],
1937     ) -> vm::Result<()> {
1938         debug_assert!(page_size == hv_isolated_page_size_HV_ISOLATED_PAGE_SIZE_4KB);
1939         if pages.is_empty() {
1940             return Ok(());
1941         }
1942 
1943         let mut isolated_pages =
1944             vec_with_array_field::<mshv_import_isolated_pages, u64>(pages.len());
1945         isolated_pages[0].page_type = page_type as u8;
1946         isolated_pages[0].page_count = pages.len() as u64;
1947         // SAFETY: isolated_pages initialized with pages.len() and now it is being turned into
1948         // pages_slice with pages.len() again. It is guaranteed to be large enough to hold
1949         // everything from pages.
1950         unsafe {
1951             let pages_slice: &mut [u64] = isolated_pages[0].guest_pfns.as_mut_slice(pages.len());
1952             pages_slice.copy_from_slice(pages);
1953         }
1954         self.fd
1955             .import_isolated_pages(&isolated_pages[0])
1956             .map_err(|e| vm::HypervisorVmError::ImportIsolatedPages(e.into()))
1957     }
1958 
1959     ///
1960     /// Complete isolated import, telling the hypervisor that
1961     /// importing the pages to guest memory is complete.
1962     ///
1963     #[cfg(feature = "sev_snp")]
1964     fn complete_isolated_import(
1965         &self,
1966         snp_id_block: IGVM_VHS_SNP_ID_BLOCK,
1967         host_data: [u8; 32],
1968         id_block_enabled: u8,
1969     ) -> vm::Result<()> {
1970         let mut auth_info = hv_snp_id_auth_info {
1971             id_key_algorithm: snp_id_block.id_key_algorithm,
1972             auth_key_algorithm: snp_id_block.author_key_algorithm,
1973             ..Default::default()
1974         };
1975         // Each of r/s component is 576 bits long
1976         auth_info.id_block_signature[..SIG_R_COMPONENT_SIZE_IN_BYTES]
1977             .copy_from_slice(snp_id_block.id_key_signature.r_comp.as_ref());
1978         auth_info.id_block_signature
1979             [SIG_R_COMPONENT_SIZE_IN_BYTES..SIG_R_AND_S_COMPONENT_SIZE_IN_BYTES]
1980             .copy_from_slice(snp_id_block.id_key_signature.s_comp.as_ref());
1981         auth_info.id_key[..ECDSA_CURVE_ID_SIZE_IN_BYTES]
1982             .copy_from_slice(snp_id_block.id_public_key.curve.to_le_bytes().as_ref());
1983         auth_info.id_key[ECDSA_SIG_X_COMPONENT_START..ECDSA_SIG_X_COMPONENT_END]
1984             .copy_from_slice(snp_id_block.id_public_key.qx.as_ref());
1985         auth_info.id_key[ECDSA_SIG_Y_COMPONENT_START..ECDSA_SIG_Y_COMPONENT_END]
1986             .copy_from_slice(snp_id_block.id_public_key.qy.as_ref());
1987 
1988         let data = mshv_complete_isolated_import {
1989             import_data: hv_partition_complete_isolated_import_data {
1990                 psp_parameters: hv_psp_launch_finish_data {
1991                     id_block: hv_snp_id_block {
1992                         launch_digest: snp_id_block.ld,
1993                         family_id: snp_id_block.family_id,
1994                         image_id: snp_id_block.image_id,
1995                         version: snp_id_block.version,
1996                         guest_svn: snp_id_block.guest_svn,
1997                         policy: get_default_snp_guest_policy(),
1998                     },
1999                     id_auth_info: auth_info,
2000                     host_data,
2001                     id_block_enabled,
2002                     author_key_enabled: 0,
2003                 },
2004             },
2005         };
2006         self.fd
2007             .complete_isolated_import(&data)
2008             .map_err(|e| vm::HypervisorVmError::CompleteIsolatedImport(e.into()))
2009     }
2010 
2011     #[cfg(target_arch = "aarch64")]
2012     fn create_vgic(&self, config: VgicConfig) -> vm::Result<Arc<Mutex<dyn Vgic>>> {
2013         unimplemented!()
2014     }
2015 
2016     #[cfg(target_arch = "aarch64")]
2017     fn get_preferred_target(&self, kvi: &mut VcpuInit) -> vm::Result<()> {
2018         unimplemented!()
2019     }
2020 
2021     /// Pause the VM
2022     fn pause(&self) -> vm::Result<()> {
2023         // Freeze the partition
2024         self.fd
2025             .set_partition_property(
2026                 hv_partition_property_code_HV_PARTITION_PROPERTY_TIME_FREEZE,
2027                 1u64,
2028             )
2029             .map_err(|e| {
2030                 vm::HypervisorVmError::SetVmProperty(anyhow!(
2031                     "Failed to set partition property: {}",
2032                     e
2033                 ))
2034             })
2035     }
2036 
2037     /// Resume the VM
2038     fn resume(&self) -> vm::Result<()> {
2039         // Resuming the partition using TIME_FREEZE property
2040         self.fd
2041             .set_partition_property(
2042                 hv_partition_property_code_HV_PARTITION_PROPERTY_TIME_FREEZE,
2043                 0u64,
2044             )
2045             .map_err(|e| {
2046                 vm::HypervisorVmError::SetVmProperty(anyhow!(
2047                     "Failed to set partition property: {}",
2048                     e
2049                 ))
2050             })
2051     }
2052 
2053     #[cfg(feature = "sev_snp")]
2054     fn gain_page_access(&self, gpa: u64, size: u32) -> vm::Result<()> {
2055         use mshv_ioctls::set_bits;
2056 
2057         if !self.sev_snp_enabled {
2058             return Ok(());
2059         }
2060 
2061         let start_gpfn: u64 = gpa >> PAGE_SHIFT;
2062         let end_gpfn: u64 = (gpa + size as u64 - 1) >> PAGE_SHIFT;
2063 
2064         let gpas: Vec<u64> = (start_gpfn..=end_gpfn).map(|x| x << PAGE_SHIFT).collect();
2065 
2066         if !gpas.is_empty() {
2067             let mut gpa_list = vec_with_array_field::<mshv_modify_gpa_host_access, u64>(gpas.len());
2068             gpa_list[0].page_count = gpas.len() as u64;
2069             gpa_list[0].flags = set_bits!(
2070                 u8,
2071                 MSHV_GPA_HOST_ACCESS_BIT_ACQUIRE,
2072                 MSHV_GPA_HOST_ACCESS_BIT_READABLE,
2073                 MSHV_GPA_HOST_ACCESS_BIT_WRITABLE
2074             );
2075 
2076             // SAFETY: gpa_list initialized with gpas.len() and now it is being turned into
2077             // gpas_slice with gpas.len() again. It is guaranteed to be large enough to hold
2078             // everything from gpas.
2079             unsafe {
2080                 let gpas_slice: &mut [u64] = gpa_list[0].guest_pfns.as_mut_slice(gpas.len());
2081                 gpas_slice.copy_from_slice(gpas.as_slice());
2082             }
2083 
2084             self.fd
2085                 .modify_gpa_host_access(&gpa_list[0])
2086                 .map_err(|e| vm::HypervisorVmError::ModifyGpaHostAccess(e.into()))?;
2087         }
2088 
2089         Ok(())
2090     }
2091 }
2092