xref: /cloud-hypervisor/hypervisor/src/mshv/mod.rs (revision b686a5bb24f949e3b201308d69b01e85c14f1ad6)
1 // SPDX-License-Identifier: Apache-2.0 OR BSD-3-Clause
2 //
3 // Copyright © 2020, Microsoft Corporation
4 //
5 
6 use std::any::Any;
7 use std::collections::HashMap;
8 #[cfg(feature = "sev_snp")]
9 use std::num::NonZeroUsize;
10 use std::sync::{Arc, RwLock};
11 
12 #[cfg(feature = "sev_snp")]
13 use arc_swap::ArcSwap;
14 use mshv_bindings::*;
15 #[cfg(target_arch = "x86_64")]
16 use mshv_ioctls::{set_registers_64, InterruptRequest};
17 use mshv_ioctls::{Mshv, NoDatamatch, VcpuFd, VmFd, VmType};
18 use vfio_ioctls::VfioDeviceFd;
19 use vm::DataMatch;
20 #[cfg(feature = "sev_snp")]
21 use vm_memory::bitmap::AtomicBitmap;
22 
23 #[cfg(target_arch = "x86_64")]
24 use crate::arch::emulator::PlatformEmulator;
25 #[cfg(target_arch = "x86_64")]
26 use crate::arch::x86::emulator::Emulator;
27 #[cfg(target_arch = "x86_64")]
28 use crate::mshv::emulator::MshvEmulatorContext;
29 use crate::vm::{self, InterruptSourceConfig, VmOps};
30 use crate::{cpu, hypervisor, vec_with_array_field, HypervisorType};
31 #[cfg(feature = "sev_snp")]
32 mod snp_constants;
33 // x86_64 dependencies
34 #[cfg(target_arch = "x86_64")]
35 pub mod x86_64;
36 // aarch64 dependencies
37 #[cfg(target_arch = "aarch64")]
38 pub mod aarch64;
39 #[cfg(target_arch = "x86_64")]
40 use std::fs::File;
41 use std::os::unix::io::AsRawFd;
42 #[cfg(target_arch = "aarch64")]
43 use std::sync::Mutex;
44 
45 #[cfg(target_arch = "aarch64")]
46 pub use aarch64::VcpuMshvState;
47 #[cfg(feature = "sev_snp")]
48 use igvm_defs::IGVM_VHS_SNP_ID_BLOCK;
49 #[cfg(feature = "sev_snp")]
50 use snp_constants::*;
51 use vmm_sys_util::eventfd::EventFd;
52 #[cfg(target_arch = "x86_64")]
53 pub use x86_64::*;
54 #[cfg(target_arch = "x86_64")]
55 pub use x86_64::{emulator, VcpuMshvState};
56 ///
57 /// Export generically-named wrappers of mshv-bindings for Unix-based platforms
58 ///
59 pub use {
60     mshv_bindings::mshv_create_device as CreateDevice,
61     mshv_bindings::mshv_device_attr as DeviceAttr, mshv_ioctls, mshv_ioctls::DeviceFd,
62 };
63 
64 #[cfg(target_arch = "aarch64")]
65 use crate::arch::aarch64::gic::{Vgic, VgicConfig};
66 #[cfg(target_arch = "x86_64")]
67 use crate::arch::x86::{CpuIdEntry, FpuState, MsrEntry};
68 #[cfg(target_arch = "x86_64")]
69 use crate::ClockData;
70 use crate::{
71     CpuState, IoEventAddress, IrqRoutingEntry, MpState, UserMemoryRegion,
72     USER_MEMORY_REGION_ADJUSTABLE, USER_MEMORY_REGION_EXECUTE, USER_MEMORY_REGION_READ,
73     USER_MEMORY_REGION_WRITE,
74 };
75 
76 pub const PAGE_SHIFT: usize = 12;
77 
78 impl From<mshv_user_mem_region> for UserMemoryRegion {
79     fn from(region: mshv_user_mem_region) -> Self {
80         let mut flags: u32 = USER_MEMORY_REGION_READ | USER_MEMORY_REGION_ADJUSTABLE;
81         if region.flags & (1 << MSHV_SET_MEM_BIT_WRITABLE) != 0 {
82             flags |= USER_MEMORY_REGION_WRITE;
83         }
84         if region.flags & (1 << MSHV_SET_MEM_BIT_EXECUTABLE) != 0 {
85             flags |= USER_MEMORY_REGION_EXECUTE;
86         }
87 
88         UserMemoryRegion {
89             guest_phys_addr: (region.guest_pfn << PAGE_SHIFT as u64)
90                 + (region.userspace_addr & ((1 << PAGE_SHIFT) - 1)),
91             memory_size: region.size,
92             userspace_addr: region.userspace_addr,
93             flags,
94             ..Default::default()
95         }
96     }
97 }
98 
99 #[cfg(target_arch = "x86_64")]
100 impl From<MshvClockData> for ClockData {
101     fn from(d: MshvClockData) -> Self {
102         ClockData::Mshv(d)
103     }
104 }
105 
106 #[cfg(target_arch = "x86_64")]
107 impl From<ClockData> for MshvClockData {
108     fn from(ms: ClockData) -> Self {
109         match ms {
110             ClockData::Mshv(s) => s,
111             /* Needed in case other hypervisors are enabled */
112             #[allow(unreachable_patterns)]
113             _ => unreachable!("MSHV clock data is not valid"),
114         }
115     }
116 }
117 
118 impl From<UserMemoryRegion> for mshv_user_mem_region {
119     fn from(region: UserMemoryRegion) -> Self {
120         let mut flags: u8 = 0;
121         if region.flags & USER_MEMORY_REGION_WRITE != 0 {
122             flags |= 1 << MSHV_SET_MEM_BIT_WRITABLE;
123         }
124         if region.flags & USER_MEMORY_REGION_EXECUTE != 0 {
125             flags |= 1 << MSHV_SET_MEM_BIT_EXECUTABLE;
126         }
127 
128         mshv_user_mem_region {
129             guest_pfn: region.guest_phys_addr >> PAGE_SHIFT,
130             size: region.memory_size,
131             userspace_addr: region.userspace_addr,
132             flags,
133             ..Default::default()
134         }
135     }
136 }
137 
138 impl From<mshv_ioctls::IoEventAddress> for IoEventAddress {
139     fn from(a: mshv_ioctls::IoEventAddress) -> Self {
140         match a {
141             mshv_ioctls::IoEventAddress::Pio(x) => Self::Pio(x),
142             mshv_ioctls::IoEventAddress::Mmio(x) => Self::Mmio(x),
143         }
144     }
145 }
146 
147 impl From<IoEventAddress> for mshv_ioctls::IoEventAddress {
148     fn from(a: IoEventAddress) -> Self {
149         match a {
150             IoEventAddress::Pio(x) => Self::Pio(x),
151             IoEventAddress::Mmio(x) => Self::Mmio(x),
152         }
153     }
154 }
155 
156 impl From<VcpuMshvState> for CpuState {
157     fn from(s: VcpuMshvState) -> Self {
158         CpuState::Mshv(s)
159     }
160 }
161 
162 impl From<CpuState> for VcpuMshvState {
163     fn from(s: CpuState) -> Self {
164         match s {
165             CpuState::Mshv(s) => s,
166             /* Needed in case other hypervisors are enabled */
167             #[allow(unreachable_patterns)]
168             _ => panic!("CpuState is not valid"),
169         }
170     }
171 }
172 
173 impl From<mshv_bindings::StandardRegisters> for crate::StandardRegisters {
174     fn from(s: mshv_bindings::StandardRegisters) -> Self {
175         crate::StandardRegisters::Mshv(s)
176     }
177 }
178 
179 impl From<crate::StandardRegisters> for mshv_bindings::StandardRegisters {
180     fn from(e: crate::StandardRegisters) -> Self {
181         match e {
182             crate::StandardRegisters::Mshv(e) => e,
183             /* Needed in case other hypervisors are enabled */
184             #[allow(unreachable_patterns)]
185             _ => panic!("StandardRegisters are not valid"),
186         }
187     }
188 }
189 
190 impl From<mshv_user_irq_entry> for IrqRoutingEntry {
191     fn from(s: mshv_user_irq_entry) -> Self {
192         IrqRoutingEntry::Mshv(s)
193     }
194 }
195 
196 impl From<IrqRoutingEntry> for mshv_user_irq_entry {
197     fn from(e: IrqRoutingEntry) -> Self {
198         match e {
199             IrqRoutingEntry::Mshv(e) => e,
200             /* Needed in case other hypervisors are enabled */
201             #[allow(unreachable_patterns)]
202             _ => panic!("IrqRoutingEntry is not valid"),
203         }
204     }
205 }
206 
207 #[cfg(target_arch = "aarch64")]
208 impl From<mshv_bindings::MshvRegList> for crate::RegList {
209     fn from(s: mshv_bindings::MshvRegList) -> Self {
210         crate::RegList::Mshv(s)
211     }
212 }
213 
214 #[cfg(target_arch = "aarch64")]
215 impl From<crate::RegList> for mshv_bindings::MshvRegList {
216     fn from(e: crate::RegList) -> Self {
217         match e {
218             crate::RegList::Mshv(e) => e,
219             /* Needed in case other hypervisors are enabled */
220             #[allow(unreachable_patterns)]
221             _ => panic!("RegList is not valid"),
222         }
223     }
224 }
225 
226 #[cfg(target_arch = "aarch64")]
227 impl From<mshv_bindings::MshvVcpuInit> for crate::VcpuInit {
228     fn from(s: mshv_bindings::MshvVcpuInit) -> Self {
229         crate::VcpuInit::Mshv(s)
230     }
231 }
232 
233 #[cfg(target_arch = "aarch64")]
234 impl From<crate::VcpuInit> for mshv_bindings::MshvVcpuInit {
235     fn from(e: crate::VcpuInit) -> Self {
236         match e {
237             crate::VcpuInit::Mshv(e) => e,
238             /* Needed in case other hypervisors are enabled */
239             #[allow(unreachable_patterns)]
240             _ => panic!("VcpuInit is not valid"),
241         }
242     }
243 }
244 
245 struct MshvDirtyLogSlot {
246     guest_pfn: u64,
247     memory_size: u64,
248 }
249 
250 /// Wrapper over mshv system ioctls.
251 pub struct MshvHypervisor {
252     mshv: Mshv,
253 }
254 
255 impl MshvHypervisor {
256     #[cfg(target_arch = "x86_64")]
257     ///
258     /// Retrieve the list of MSRs supported by MSHV.
259     ///
260     fn get_msr_list(&self) -> hypervisor::Result<MsrList> {
261         self.mshv
262             .get_msr_index_list()
263             .map_err(|e| hypervisor::HypervisorError::GetMsrList(e.into()))
264     }
265 
266     fn create_vm_with_type_and_memory_int(
267         &self,
268         vm_type: u64,
269         #[cfg(feature = "sev_snp")] _mem_size: Option<u64>,
270     ) -> hypervisor::Result<Arc<dyn crate::Vm>> {
271         let mshv_vm_type: VmType = match VmType::try_from(vm_type) {
272             Ok(vm_type) => vm_type,
273             Err(_) => return Err(hypervisor::HypervisorError::UnsupportedVmType()),
274         };
275         let fd: VmFd;
276         loop {
277             match self.mshv.create_vm_with_type(mshv_vm_type) {
278                 Ok(res) => fd = res,
279                 Err(e) => {
280                     if e.errno() == libc::EINTR {
281                         // If the error returned is EINTR, which means the
282                         // ioctl has been interrupted, we have to retry as
283                         // this can't be considered as a regular error.
284                         continue;
285                     } else {
286                         return Err(hypervisor::HypervisorError::VmCreate(e.into()));
287                     }
288                 }
289             }
290             break;
291         }
292 
293         // Set additional partition property for SEV-SNP partition.
294         #[cfg(target_arch = "x86_64")]
295         if mshv_vm_type == VmType::Snp {
296             let snp_policy = snp::get_default_snp_guest_policy();
297             let vmgexit_offloads = snp::get_default_vmgexit_offload_features();
298             // SAFETY: access union fields
299             unsafe {
300                 debug!(
301                     "Setting the partition isolation policy as: 0x{:x}",
302                     snp_policy.as_uint64
303                 );
304                 fd.set_partition_property(
305                     hv_partition_property_code_HV_PARTITION_PROPERTY_ISOLATION_POLICY,
306                     snp_policy.as_uint64,
307                 )
308                 .map_err(|e| hypervisor::HypervisorError::SetPartitionProperty(e.into()))?;
309                 debug!(
310                     "Setting the partition property to enable VMGEXIT offloads as : 0x{:x}",
311                     vmgexit_offloads.as_uint64
312                 );
313                 fd.set_partition_property(
314                     hv_partition_property_code_HV_PARTITION_PROPERTY_SEV_VMGEXIT_OFFLOADS,
315                     vmgexit_offloads.as_uint64,
316                 )
317                 .map_err(|e| hypervisor::HypervisorError::SetPartitionProperty(e.into()))?;
318             }
319         }
320 
321         // Default Microsoft Hypervisor behavior for unimplemented MSR is to
322         // send a fault to the guest if it tries to access it. It is possible
323         // to override this behavior with a more suitable option i.e., ignore
324         // writes from the guest and return zero in attempt to read unimplemented
325         // MSR.
326         #[cfg(target_arch = "x86_64")]
327         fd.set_partition_property(
328             hv_partition_property_code_HV_PARTITION_PROPERTY_UNIMPLEMENTED_MSR_ACTION,
329             hv_unimplemented_msr_action_HV_UNIMPLEMENTED_MSR_ACTION_IGNORE_WRITE_READ_ZERO as u64,
330         )
331         .map_err(|e| hypervisor::HypervisorError::SetPartitionProperty(e.into()))?;
332 
333         // Always create a frozen partition
334         fd.set_partition_property(
335             hv_partition_property_code_HV_PARTITION_PROPERTY_TIME_FREEZE,
336             1u64,
337         )
338         .map_err(|e| hypervisor::HypervisorError::SetPartitionProperty(e.into()))?;
339 
340         let vm_fd = Arc::new(fd);
341 
342         #[cfg(target_arch = "x86_64")]
343         {
344             let msr_list = self.get_msr_list()?;
345             let num_msrs = msr_list.as_fam_struct_ref().nmsrs as usize;
346             let mut msrs: Vec<MsrEntry> = vec![
347                 MsrEntry {
348                     ..Default::default()
349                 };
350                 num_msrs
351             ];
352             let indices = msr_list.as_slice();
353             for (pos, index) in indices.iter().enumerate() {
354                 msrs[pos].index = *index;
355             }
356 
357             Ok(Arc::new(MshvVm {
358                 fd: vm_fd,
359                 msrs,
360                 dirty_log_slots: Arc::new(RwLock::new(HashMap::new())),
361                 #[cfg(feature = "sev_snp")]
362                 sev_snp_enabled: mshv_vm_type == VmType::Snp,
363                 #[cfg(feature = "sev_snp")]
364                 host_access_pages: ArcSwap::new(
365                     AtomicBitmap::new(
366                         _mem_size.unwrap_or_default() as usize,
367                         NonZeroUsize::new(HV_PAGE_SIZE).unwrap(),
368                     )
369                     .into(),
370                 ),
371             }))
372         }
373 
374         #[cfg(target_arch = "aarch64")]
375         {
376             Ok(Arc::new(MshvVm {
377                 fd: vm_fd,
378                 dirty_log_slots: Arc::new(RwLock::new(HashMap::new())),
379             }))
380         }
381     }
382 }
383 
384 impl MshvHypervisor {
385     /// Create a hypervisor based on Mshv
386     #[allow(clippy::new_ret_no_self)]
387     pub fn new() -> hypervisor::Result<Arc<dyn hypervisor::Hypervisor>> {
388         let mshv_obj =
389             Mshv::new().map_err(|e| hypervisor::HypervisorError::HypervisorCreate(e.into()))?;
390         Ok(Arc::new(MshvHypervisor { mshv: mshv_obj }))
391     }
392     /// Check if the hypervisor is available
393     pub fn is_available() -> hypervisor::Result<bool> {
394         match std::fs::metadata("/dev/mshv") {
395             Ok(_) => Ok(true),
396             Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(false),
397             Err(err) => Err(hypervisor::HypervisorError::HypervisorAvailableCheck(
398                 err.into(),
399             )),
400         }
401     }
402 }
403 
404 /// Implementation of Hypervisor trait for Mshv
405 ///
406 /// # Examples
407 ///
408 /// ```
409 /// use hypervisor::mshv::MshvHypervisor;
410 /// use std::sync::Arc;
411 /// let mshv = MshvHypervisor::new().unwrap();
412 /// let hypervisor = Arc::new(mshv);
413 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed");
414 /// ```
415 impl hypervisor::Hypervisor for MshvHypervisor {
416     ///
417     /// Returns the type of the hypervisor
418     ///
419     fn hypervisor_type(&self) -> HypervisorType {
420         HypervisorType::Mshv
421     }
422 
423     ///
424     /// Create a Vm of a specific type using the underlying hypervisor, passing memory size
425     /// Return a hypervisor-agnostic Vm trait object
426     ///
427     /// # Examples
428     ///
429     /// ```
430     /// use hypervisor::kvm::KvmHypervisor;
431     /// use hypervisor::kvm::KvmVm;
432     /// let hypervisor = KvmHypervisor::new().unwrap();
433     /// let vm = hypervisor.create_vm_with_type(0, 512*1024*1024).unwrap();
434     /// ```
435     fn create_vm_with_type_and_memory(
436         &self,
437         vm_type: u64,
438         #[cfg(feature = "sev_snp")] _mem_size: u64,
439     ) -> hypervisor::Result<Arc<dyn vm::Vm>> {
440         self.create_vm_with_type_and_memory_int(
441             vm_type,
442             #[cfg(feature = "sev_snp")]
443             Some(_mem_size),
444         )
445     }
446 
447     fn create_vm_with_type(&self, vm_type: u64) -> hypervisor::Result<Arc<dyn crate::Vm>> {
448         self.create_vm_with_type_and_memory_int(
449             vm_type,
450             #[cfg(feature = "sev_snp")]
451             None,
452         )
453     }
454 
455     /// Create a mshv vm object and return the object as Vm trait object
456     ///
457     /// # Examples
458     ///
459     /// ```
460     /// # extern crate hypervisor;
461     /// use hypervisor::mshv::MshvHypervisor;
462     /// use hypervisor::mshv::MshvVm;
463     /// let hypervisor = MshvHypervisor::new().unwrap();
464     /// let vm = hypervisor.create_vm().unwrap();
465     /// ```
466     fn create_vm(&self) -> hypervisor::Result<Arc<dyn vm::Vm>> {
467         let vm_type = 0;
468         self.create_vm_with_type(vm_type)
469     }
470     #[cfg(target_arch = "x86_64")]
471     ///
472     /// Get the supported CpuID
473     ///
474     fn get_supported_cpuid(&self) -> hypervisor::Result<Vec<CpuIdEntry>> {
475         let mut cpuid = Vec::new();
476         let functions: [u32; 2] = [0x1, 0xb];
477 
478         for function in functions {
479             cpuid.push(CpuIdEntry {
480                 function,
481                 ..Default::default()
482             });
483         }
484         Ok(cpuid)
485     }
486 
487     /// Get maximum number of vCPUs
488     fn get_max_vcpus(&self) -> u32 {
489         // TODO: Using HV_MAXIMUM_PROCESSORS would be better
490         // but the ioctl API is limited to u8
491         256
492     }
493 
494     fn get_guest_debug_hw_bps(&self) -> usize {
495         0
496     }
497 
498     #[cfg(target_arch = "aarch64")]
499     ///
500     /// Retrieve AArch64 host maximum IPA size supported by MSHV.
501     ///
502     fn get_host_ipa_limit(&self) -> i32 {
503         let host_ipa = self.mshv.get_host_partition_property(
504             hv_partition_property_code_HV_PARTITION_PROPERTY_PHYSICAL_ADDRESS_WIDTH as u64,
505         );
506 
507         match host_ipa {
508             Ok(ipa) => ipa,
509             Err(e) => {
510                 panic!("Failed to get host IPA limit: {:?}", e);
511             }
512         }
513     }
514 }
515 
516 #[cfg(feature = "sev_snp")]
517 struct Ghcb(*mut svm_ghcb_base);
518 
519 #[cfg(feature = "sev_snp")]
520 // SAFETY: struct is based on GHCB page in the hypervisor,
521 // safe to Send across threads
522 unsafe impl Send for Ghcb {}
523 
524 #[cfg(feature = "sev_snp")]
525 // SAFETY: struct is based on GHCB page in the hypervisor,
526 // safe to Sync across threads as this is only required for Vcpu trait
527 // functionally not used anyway
528 unsafe impl Sync for Ghcb {}
529 
530 /// Vcpu struct for Microsoft Hypervisor
531 pub struct MshvVcpu {
532     fd: VcpuFd,
533     vp_index: u8,
534     #[cfg(target_arch = "x86_64")]
535     cpuid: Vec<CpuIdEntry>,
536     #[cfg(target_arch = "x86_64")]
537     msrs: Vec<MsrEntry>,
538     vm_ops: Option<Arc<dyn vm::VmOps>>,
539     vm_fd: Arc<VmFd>,
540     #[cfg(feature = "sev_snp")]
541     ghcb: Option<Ghcb>,
542     #[cfg(feature = "sev_snp")]
543     host_access_pages: ArcSwap<AtomicBitmap>,
544 }
545 
546 /// Implementation of Vcpu trait for Microsoft Hypervisor
547 ///
548 /// # Examples
549 ///
550 /// ```
551 /// use hypervisor::mshv::MshvHypervisor;
552 /// use std::sync::Arc;
553 /// let mshv = MshvHypervisor::new().unwrap();
554 /// let hypervisor = Arc::new(mshv);
555 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed");
556 /// let vcpu = vm.create_vcpu(0, None).unwrap();
557 /// ```
558 impl cpu::Vcpu for MshvVcpu {
559     ///
560     /// Returns StandardRegisters with default value set
561     ///
562     #[cfg(target_arch = "x86_64")]
563     fn create_standard_regs(&self) -> crate::StandardRegisters {
564         mshv_bindings::StandardRegisters::default().into()
565     }
566     #[cfg(target_arch = "x86_64")]
567     ///
568     /// Returns the vCPU general purpose registers.
569     ///
570     fn get_regs(&self) -> cpu::Result<crate::StandardRegisters> {
571         Ok(self
572             .fd
573             .get_regs()
574             .map_err(|e| cpu::HypervisorCpuError::GetStandardRegs(e.into()))?
575             .into())
576     }
577 
578     #[cfg(target_arch = "x86_64")]
579     ///
580     /// Sets the vCPU general purpose registers.
581     ///
582     fn set_regs(&self, regs: &crate::StandardRegisters) -> cpu::Result<()> {
583         let regs = (*regs).into();
584         self.fd
585             .set_regs(&regs)
586             .map_err(|e| cpu::HypervisorCpuError::SetStandardRegs(e.into()))
587     }
588 
589     #[cfg(target_arch = "x86_64")]
590     ///
591     /// Returns the vCPU special registers.
592     ///
593     fn get_sregs(&self) -> cpu::Result<crate::arch::x86::SpecialRegisters> {
594         Ok(self
595             .fd
596             .get_sregs()
597             .map_err(|e| cpu::HypervisorCpuError::GetSpecialRegs(e.into()))?
598             .into())
599     }
600 
601     #[cfg(target_arch = "x86_64")]
602     ///
603     /// Sets the vCPU special registers.
604     ///
605     fn set_sregs(&self, sregs: &crate::arch::x86::SpecialRegisters) -> cpu::Result<()> {
606         let sregs = (*sregs).into();
607         self.fd
608             .set_sregs(&sregs)
609             .map_err(|e| cpu::HypervisorCpuError::SetSpecialRegs(e.into()))
610     }
611 
612     #[cfg(target_arch = "x86_64")]
613     ///
614     /// Returns the floating point state (FPU) from the vCPU.
615     ///
616     fn get_fpu(&self) -> cpu::Result<FpuState> {
617         Ok(self
618             .fd
619             .get_fpu()
620             .map_err(|e| cpu::HypervisorCpuError::GetFloatingPointRegs(e.into()))?
621             .into())
622     }
623 
624     #[cfg(target_arch = "x86_64")]
625     ///
626     /// Set the floating point state (FPU) of a vCPU.
627     ///
628     fn set_fpu(&self, fpu: &FpuState) -> cpu::Result<()> {
629         let fpu: mshv_bindings::FloatingPointUnit = (*fpu).clone().into();
630         self.fd
631             .set_fpu(&fpu)
632             .map_err(|e| cpu::HypervisorCpuError::SetFloatingPointRegs(e.into()))
633     }
634 
635     #[cfg(target_arch = "x86_64")]
636     ///
637     /// Returns the model-specific registers (MSR) for this vCPU.
638     ///
639     fn get_msrs(&self, msrs: &mut Vec<MsrEntry>) -> cpu::Result<usize> {
640         let mshv_msrs: Vec<msr_entry> = msrs.iter().map(|e| (*e).into()).collect();
641         let mut mshv_msrs = MsrEntries::from_entries(&mshv_msrs).unwrap();
642         let succ = self
643             .fd
644             .get_msrs(&mut mshv_msrs)
645             .map_err(|e| cpu::HypervisorCpuError::GetMsrEntries(e.into()))?;
646 
647         msrs[..succ].copy_from_slice(
648             &mshv_msrs.as_slice()[..succ]
649                 .iter()
650                 .map(|e| (*e).into())
651                 .collect::<Vec<MsrEntry>>(),
652         );
653 
654         Ok(succ)
655     }
656 
657     #[cfg(target_arch = "x86_64")]
658     ///
659     /// Setup the model-specific registers (MSR) for this vCPU.
660     /// Returns the number of MSR entries actually written.
661     ///
662     fn set_msrs(&self, msrs: &[MsrEntry]) -> cpu::Result<usize> {
663         let mshv_msrs: Vec<msr_entry> = msrs.iter().map(|e| (*e).into()).collect();
664         let mshv_msrs = MsrEntries::from_entries(&mshv_msrs).unwrap();
665         self.fd
666             .set_msrs(&mshv_msrs)
667             .map_err(|e| cpu::HypervisorCpuError::SetMsrEntries(e.into()))
668     }
669 
670     #[cfg(target_arch = "x86_64")]
671     ///
672     /// X86 specific call to enable HyperV SynIC
673     ///
674     fn enable_hyperv_synic(&self) -> cpu::Result<()> {
675         /* We always have SynIC enabled on MSHV */
676         Ok(())
677     }
678 
679     #[allow(non_upper_case_globals)]
680     fn run(&self) -> std::result::Result<cpu::VmExit, cpu::HypervisorCpuError> {
681         match self.fd.run() {
682             Ok(x) => match x.header.message_type {
683                 hv_message_type_HVMSG_X64_HALT => {
684                     debug!("HALT");
685                     Ok(cpu::VmExit::Reset)
686                 }
687                 hv_message_type_HVMSG_UNRECOVERABLE_EXCEPTION => {
688                     warn!("TRIPLE FAULT");
689                     Ok(cpu::VmExit::Shutdown)
690                 }
691                 #[cfg(target_arch = "x86_64")]
692                 hv_message_type_HVMSG_X64_IO_PORT_INTERCEPT => {
693                     let info = x.to_ioport_info().unwrap();
694                     let access_info = info.access_info;
695                     // SAFETY: access_info is valid, otherwise we won't be here
696                     let len = unsafe { access_info.__bindgen_anon_1.access_size() } as usize;
697                     let is_write = info.header.intercept_access_type == 1;
698                     let port = info.port_number;
699                     let mut data: [u8; 4] = [0; 4];
700                     let mut ret_rax = info.rax;
701 
702                     /*
703                      * XXX: Ignore QEMU fw_cfg (0x5xx) and debug console (0x402) ports.
704                      *
705                      * Cloud Hypervisor doesn't support fw_cfg at the moment. It does support 0x402
706                      * under the "fwdebug" feature flag. But that feature is not enabled by default
707                      * and is considered legacy.
708                      *
709                      * OVMF unconditionally pokes these IO ports with string IO.
710                      *
711                      * Instead of trying to implement string IO support now which does not do much
712                      * now, skip those ports explicitly to avoid panicking.
713                      *
714                      * Proper string IO support can be added once we gain the ability to translate
715                      * guest virtual addresses to guest physical addresses on MSHV.
716                      */
717                     match port {
718                         0x402 | 0x510 | 0x511 | 0x514 => {
719                             let insn_len = info.header.instruction_length() as u64;
720 
721                             /* Advance RIP and update RAX */
722                             let arr_reg_name_value = [
723                                 (
724                                     hv_register_name_HV_X64_REGISTER_RIP,
725                                     info.header.rip + insn_len,
726                                 ),
727                                 (hv_register_name_HV_X64_REGISTER_RAX, ret_rax),
728                             ];
729                             set_registers_64!(self.fd, arr_reg_name_value)
730                                 .map_err(|e| cpu::HypervisorCpuError::SetRegister(e.into()))?;
731                             return Ok(cpu::VmExit::Ignore);
732                         }
733                         _ => {}
734                     }
735 
736                     assert!(
737                         // SAFETY: access_info is valid, otherwise we won't be here
738                         (unsafe { access_info.__bindgen_anon_1.string_op() } != 1),
739                         "String IN/OUT not supported"
740                     );
741                     assert!(
742                         // SAFETY: access_info is valid, otherwise we won't be here
743                         (unsafe { access_info.__bindgen_anon_1.rep_prefix() } != 1),
744                         "Rep IN/OUT not supported"
745                     );
746 
747                     if is_write {
748                         let data = (info.rax as u32).to_le_bytes();
749                         if let Some(vm_ops) = &self.vm_ops {
750                             vm_ops
751                                 .pio_write(port.into(), &data[0..len])
752                                 .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()))?;
753                         }
754                     } else {
755                         if let Some(vm_ops) = &self.vm_ops {
756                             vm_ops
757                                 .pio_read(port.into(), &mut data[0..len])
758                                 .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()))?;
759                         }
760 
761                         let v = u32::from_le_bytes(data);
762                         /* Preserve high bits in EAX but clear out high bits in RAX */
763                         let mask = 0xffffffff >> (32 - len * 8);
764                         let eax = (info.rax as u32 & !mask) | (v & mask);
765                         ret_rax = eax as u64;
766                     }
767 
768                     let insn_len = info.header.instruction_length() as u64;
769 
770                     /* Advance RIP and update RAX */
771                     let arr_reg_name_value = [
772                         (
773                             hv_register_name_HV_X64_REGISTER_RIP,
774                             info.header.rip + insn_len,
775                         ),
776                         (hv_register_name_HV_X64_REGISTER_RAX, ret_rax),
777                     ];
778                     set_registers_64!(self.fd, arr_reg_name_value)
779                         .map_err(|e| cpu::HypervisorCpuError::SetRegister(e.into()))?;
780                     Ok(cpu::VmExit::Ignore)
781                 }
782                 #[cfg(target_arch = "x86_64")]
783                 msg_type @ (hv_message_type_HVMSG_UNMAPPED_GPA
784                 | hv_message_type_HVMSG_GPA_INTERCEPT) => {
785                     let info = x.to_memory_info().unwrap();
786                     let insn_len = info.instruction_byte_count as usize;
787                     let gva = info.guest_virtual_address;
788                     let gpa = info.guest_physical_address;
789 
790                     debug!("Exit ({:?}) GVA {:x} GPA {:x}", msg_type, gva, gpa);
791 
792                     let mut context = MshvEmulatorContext {
793                         vcpu: self,
794                         map: (gva, gpa),
795                     };
796 
797                     // Create a new emulator.
798                     let mut emul = Emulator::new(&mut context);
799 
800                     // Emulate the trapped instruction, and only the first one.
801                     let new_state = emul
802                         .emulate_first_insn(
803                             self.vp_index as usize,
804                             &info.instruction_bytes[..insn_len],
805                         )
806                         .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()))?;
807 
808                     // Set CPU state back.
809                     context
810                         .set_cpu_state(self.vp_index as usize, new_state)
811                         .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()))?;
812 
813                     Ok(cpu::VmExit::Ignore)
814                 }
815                 #[cfg(feature = "sev_snp")]
816                 hv_message_type_HVMSG_GPA_ATTRIBUTE_INTERCEPT => {
817                     let info = x.to_gpa_attribute_info().unwrap();
818                     let host_vis = info.__bindgen_anon_1.host_visibility();
819                     if host_vis >= HV_MAP_GPA_READABLE | HV_MAP_GPA_WRITABLE {
820                         warn!("Ignored attribute intercept with full host visibility");
821                         return Ok(cpu::VmExit::Ignore);
822                     }
823 
824                     let num_ranges = info.__bindgen_anon_1.range_count();
825                     assert!(num_ranges >= 1);
826                     if num_ranges > 1 {
827                         return Err(cpu::HypervisorCpuError::RunVcpu(anyhow!(
828                             "Unhandled VCPU exit(GPA_ATTRIBUTE_INTERCEPT): Expected num_ranges to be 1 but found num_ranges {:?}",
829                             num_ranges
830                         )));
831                     }
832 
833                     // TODO: we could also deny the request with HvCallCompleteIntercept
834                     let mut gpas = Vec::new();
835                     let ranges = info.ranges;
836                     let (gfn_start, gfn_count) = snp::parse_gpa_range(ranges[0]).unwrap();
837                     debug!(
838                         "Releasing pages: gfn_start: {:x?}, gfn_count: {:?}",
839                         gfn_start, gfn_count
840                     );
841                     let gpa_start = gfn_start * HV_PAGE_SIZE as u64;
842                     for i in 0..gfn_count {
843                         gpas.push(gpa_start + i * HV_PAGE_SIZE as u64);
844                     }
845 
846                     let mut gpa_list =
847                         vec_with_array_field::<mshv_modify_gpa_host_access, u64>(gpas.len());
848                     gpa_list[0].page_count = gpas.len() as u64;
849                     gpa_list[0].flags = 0;
850                     if host_vis & HV_MAP_GPA_READABLE != 0 {
851                         gpa_list[0].flags |= 1 << MSHV_GPA_HOST_ACCESS_BIT_READABLE;
852                     }
853                     if host_vis & HV_MAP_GPA_WRITABLE != 0 {
854                         gpa_list[0].flags |= 1 << MSHV_GPA_HOST_ACCESS_BIT_WRITABLE;
855                     }
856 
857                     // SAFETY: gpa_list initialized with gpas.len() and now it is being turned into
858                     // gpas_slice with gpas.len() again. It is guaranteed to be large enough to hold
859                     // everything from gpas.
860                     unsafe {
861                         let gpas_slice: &mut [u64] =
862                             gpa_list[0].guest_pfns.as_mut_slice(gpas.len());
863                         gpas_slice.copy_from_slice(gpas.as_slice());
864                     }
865 
866                     self.vm_fd
867                         .modify_gpa_host_access(&gpa_list[0])
868                         .map_err(|e| cpu::HypervisorCpuError::RunVcpu(anyhow!(
869                             "Unhandled VCPU exit: attribute intercept - couldn't modify host access {}", e
870                         )))?;
871                     // Guest is revoking the shared access, so we need to update the bitmap
872                     self.host_access_pages.rcu(|_bitmap| {
873                         let bm = self.host_access_pages.load().as_ref().clone();
874                         bm.reset_addr_range(gpa_start as usize, gfn_count as usize);
875                         bm
876                     });
877                     Ok(cpu::VmExit::Ignore)
878                 }
879                 #[cfg(target_arch = "x86_64")]
880                 hv_message_type_HVMSG_UNACCEPTED_GPA => {
881                     let info = x.to_memory_info().unwrap();
882                     let gva = info.guest_virtual_address;
883                     let gpa = info.guest_physical_address;
884 
885                     Err(cpu::HypervisorCpuError::RunVcpu(anyhow!(
886                         "Unhandled VCPU exit: Unaccepted GPA({:x}) found at GVA({:x})",
887                         gpa,
888                         gva,
889                     )))
890                 }
891                 #[cfg(target_arch = "x86_64")]
892                 hv_message_type_HVMSG_X64_CPUID_INTERCEPT => {
893                     let info = x.to_cpuid_info().unwrap();
894                     debug!("cpuid eax: {:x}", { info.rax });
895                     Ok(cpu::VmExit::Ignore)
896                 }
897                 #[cfg(target_arch = "x86_64")]
898                 hv_message_type_HVMSG_X64_MSR_INTERCEPT => {
899                     let info = x.to_msr_info().unwrap();
900                     if info.header.intercept_access_type == 0 {
901                         debug!("msr read: {:x}", { info.msr_number });
902                     } else {
903                         debug!("msr write: {:x}", { info.msr_number });
904                     }
905                     Ok(cpu::VmExit::Ignore)
906                 }
907                 #[cfg(target_arch = "x86_64")]
908                 hv_message_type_HVMSG_X64_EXCEPTION_INTERCEPT => {
909                     //TODO: Handler for VMCALL here.
910                     let info = x.to_exception_info().unwrap();
911                     debug!("Exception Info {:?}", { info.exception_vector });
912                     Ok(cpu::VmExit::Ignore)
913                 }
914                 #[cfg(target_arch = "x86_64")]
915                 hv_message_type_HVMSG_X64_APIC_EOI => {
916                     let info = x.to_apic_eoi_info().unwrap();
917                     // The kernel should dispatch the EOI to the correct thread.
918                     // Check the VP index is the same as the one we have.
919                     assert!(info.vp_index == self.vp_index as u32);
920                     // The interrupt vector in info is u32, but x86 only supports 256 vectors.
921                     // There is no good way to recover from this if the hypervisor messes around.
922                     // Just unwrap.
923                     Ok(cpu::VmExit::IoapicEoi(
924                         info.interrupt_vector.try_into().unwrap(),
925                     ))
926                 }
927                 #[cfg(feature = "sev_snp")]
928                 hv_message_type_HVMSG_X64_SEV_VMGEXIT_INTERCEPT => {
929                     let info = x.to_vmg_intercept_info().unwrap();
930                     let ghcb_data = info.ghcb_msr >> GHCB_INFO_BIT_WIDTH;
931                     let ghcb_msr = svm_ghcb_msr {
932                         as_uint64: info.ghcb_msr,
933                     };
934                     // Safe to use unwrap, for sev_snp guest we already have the
935                     // GHCB pointer wrapped in the option, otherwise this place is not reached.
936                     let ghcb = self.ghcb.as_ref().unwrap().0;
937 
938                     // SAFETY: Accessing a union element from bindgen generated bindings.
939                     let ghcb_op = unsafe { ghcb_msr.__bindgen_anon_2.ghcb_info() as u32 };
940                     // Sanity check on the header fields before handling other operations.
941                     assert!(info.header.intercept_access_type == HV_INTERCEPT_ACCESS_EXECUTE as u8);
942 
943                     match ghcb_op {
944                         GHCB_INFO_HYP_FEATURE_REQUEST => {
945                             // Pre-condition: GHCB data must be zero
946                             assert!(ghcb_data == 0);
947                             let mut ghcb_response = GHCB_INFO_HYP_FEATURE_RESPONSE as u64;
948                             // Indicate support for basic SEV-SNP features
949                             ghcb_response |=
950                                 (GHCB_HYP_FEATURE_SEV_SNP << GHCB_INFO_BIT_WIDTH) as u64;
951                             // Indicate support for SEV-SNP AP creation
952                             ghcb_response |= (GHCB_HYP_FEATURE_SEV_SNP_AP_CREATION
953                                 << GHCB_INFO_BIT_WIDTH)
954                                 as u64;
955                             debug!(
956                                 "GHCB_INFO_HYP_FEATURE_REQUEST: Supported features: {:0x}",
957                                 ghcb_response
958                             );
959                             let arr_reg_name_value =
960                                 [(hv_register_name_HV_X64_REGISTER_GHCB, ghcb_response)];
961                             set_registers_64!(self.fd, arr_reg_name_value)
962                                 .map_err(|e| cpu::HypervisorCpuError::SetRegister(e.into()))?;
963                         }
964                         GHCB_INFO_REGISTER_REQUEST => {
965                             let mut ghcb_gpa = hv_x64_register_sev_ghcb::default();
966 
967                             // Disable the previously used GHCB page.
968                             self.disable_prev_ghcb_page()?;
969 
970                             // SAFETY: Accessing a union element from bindgen generated bindings.
971                             unsafe {
972                                 ghcb_gpa.__bindgen_anon_1.set_enabled(1);
973                                 ghcb_gpa
974                                     .__bindgen_anon_1
975                                     .set_page_number(ghcb_msr.__bindgen_anon_2.gpa_page_number());
976                             }
977                             // SAFETY: Accessing a union element from bindgen generated bindings.
978                             let reg_name_value = unsafe {
979                                 [(
980                                     hv_register_name_HV_X64_REGISTER_SEV_GHCB_GPA,
981                                     ghcb_gpa.as_uint64,
982                                 )]
983                             };
984 
985                             set_registers_64!(self.fd, reg_name_value)
986                                 .map_err(|e| cpu::HypervisorCpuError::SetRegister(e.into()))?;
987 
988                             let mut resp_ghcb_msr = svm_ghcb_msr::default();
989                             // SAFETY: Accessing a union element from bindgen generated bindings.
990                             unsafe {
991                                 resp_ghcb_msr
992                                     .__bindgen_anon_2
993                                     .set_ghcb_info(GHCB_INFO_REGISTER_RESPONSE as u64);
994                                 resp_ghcb_msr.__bindgen_anon_2.set_gpa_page_number(
995                                     ghcb_msr.__bindgen_anon_2.gpa_page_number(),
996                                 );
997                                 debug!("GHCB GPA is {:x}", ghcb_gpa.as_uint64);
998                             }
999                             // SAFETY: Accessing a union element from bindgen generated bindings.
1000                             let reg_name_value = unsafe {
1001                                 [(
1002                                     hv_register_name_HV_X64_REGISTER_GHCB,
1003                                     resp_ghcb_msr.as_uint64,
1004                                 )]
1005                             };
1006 
1007                             set_registers_64!(self.fd, reg_name_value)
1008                                 .map_err(|e| cpu::HypervisorCpuError::SetRegister(e.into()))?;
1009                         }
1010                         GHCB_INFO_SEV_INFO_REQUEST => {
1011                             let sev_cpuid_function = 0x8000_001F;
1012                             let cpu_leaf = self
1013                                 .fd
1014                                 .get_cpuid_values(sev_cpuid_function, 0, 0, 0)
1015                                 .unwrap();
1016                             let ebx = cpu_leaf[1];
1017                             // First 6-byte of EBX represents page table encryption bit number
1018                             let pbit_encryption = (ebx & 0x3f) as u8;
1019                             let mut ghcb_response = GHCB_INFO_SEV_INFO_RESPONSE as u64;
1020 
1021                             // GHCBData[63:48] specifies the maximum GHCB protocol version supported
1022                             ghcb_response |= (GHCB_PROTOCOL_VERSION_MAX as u64) << 48;
1023                             // GHCBData[47:32] specifies the minimum GHCB protocol version supported
1024                             ghcb_response |= (GHCB_PROTOCOL_VERSION_MIN as u64) << 32;
1025                             // GHCBData[31:24] specifies the SEV page table encryption bit number.
1026                             ghcb_response |= (pbit_encryption as u64) << 24;
1027 
1028                             let arr_reg_name_value =
1029                                 [(hv_register_name_HV_X64_REGISTER_GHCB, ghcb_response)];
1030                             set_registers_64!(self.fd, arr_reg_name_value)
1031                                 .map_err(|e| cpu::HypervisorCpuError::SetRegister(e.into()))?;
1032                         }
1033                         GHCB_INFO_NORMAL => {
1034                             let exit_code =
1035                                 info.__bindgen_anon_2.__bindgen_anon_1.sw_exit_code as u32;
1036 
1037                             match exit_code {
1038                                 SVM_EXITCODE_HV_DOORBELL_PAGE => {
1039                                     let exit_info1 =
1040                                         info.__bindgen_anon_2.__bindgen_anon_1.sw_exit_info1 as u32;
1041                                     match exit_info1 {
1042                                         SVM_NAE_HV_DOORBELL_PAGE_GET_PREFERRED => {
1043                                             // Hypervisor does not have any preference for doorbell GPA.
1044                                             let preferred_doorbell_gpa: u64 = 0xFFFFFFFFFFFFFFFF;
1045                                             set_svm_field_u64_ptr!(
1046                                                 ghcb,
1047                                                 exit_info2,
1048                                                 preferred_doorbell_gpa
1049                                             );
1050                                         }
1051                                         SVM_NAE_HV_DOORBELL_PAGE_SET => {
1052                                             let exit_info2 = info
1053                                                 .__bindgen_anon_2
1054                                                 .__bindgen_anon_1
1055                                                 .sw_exit_info2;
1056                                             let mut ghcb_doorbell_gpa =
1057                                                 hv_x64_register_sev_hv_doorbell::default();
1058                                             // SAFETY: Accessing a union element from bindgen generated bindings.
1059                                             unsafe {
1060                                                 ghcb_doorbell_gpa.__bindgen_anon_1.set_enabled(1);
1061                                                 ghcb_doorbell_gpa
1062                                                     .__bindgen_anon_1
1063                                                     .set_page_number(exit_info2 >> PAGE_SHIFT);
1064                                             }
1065                                             // SAFETY: Accessing a union element from bindgen generated bindings.
1066                                             let reg_names = unsafe {
1067                                                 [(
1068                                                     hv_register_name_HV_X64_REGISTER_SEV_DOORBELL_GPA,
1069                                                     ghcb_doorbell_gpa.as_uint64,
1070                                                 )]
1071                                             };
1072                                             set_registers_64!(self.fd, reg_names).map_err(|e| {
1073                                                 cpu::HypervisorCpuError::SetRegister(e.into())
1074                                             })?;
1075 
1076                                             set_svm_field_u64_ptr!(ghcb, exit_info2, exit_info2);
1077 
1078                                             // Clear the SW_EXIT_INFO1 register to indicate no error
1079                                             self.clear_swexit_info1()?;
1080                                         }
1081                                         SVM_NAE_HV_DOORBELL_PAGE_QUERY => {
1082                                             let mut reg_assocs = [ hv_register_assoc {
1083                                                 name: hv_register_name_HV_X64_REGISTER_SEV_DOORBELL_GPA,
1084                                                 ..Default::default()
1085                                             } ];
1086                                             self.fd.get_reg(&mut reg_assocs).unwrap();
1087                                             // SAFETY: Accessing a union element from bindgen generated bindings.
1088                                             let doorbell_gpa = unsafe { reg_assocs[0].value.reg64 };
1089 
1090                                             set_svm_field_u64_ptr!(ghcb, exit_info2, doorbell_gpa);
1091 
1092                                             // Clear the SW_EXIT_INFO1 register to indicate no error
1093                                             self.clear_swexit_info1()?;
1094                                         }
1095                                         SVM_NAE_HV_DOORBELL_PAGE_CLEAR => {
1096                                             set_svm_field_u64_ptr!(ghcb, exit_info2, 0);
1097                                         }
1098                                         _ => {
1099                                             panic!(
1100                                                 "SVM_EXITCODE_HV_DOORBELL_PAGE: Unhandled exit code: {:0x}",
1101                                                 exit_info1
1102                                             );
1103                                         }
1104                                     }
1105                                 }
1106                                 SVM_EXITCODE_IOIO_PROT => {
1107                                     let exit_info1 =
1108                                         info.__bindgen_anon_2.__bindgen_anon_1.sw_exit_info1 as u32;
1109                                     let port_info = hv_sev_vmgexit_port_info {
1110                                         as_uint32: exit_info1,
1111                                     };
1112 
1113                                     let port =
1114                                         // SAFETY: Accessing a union element from bindgen generated bindings.
1115                                         unsafe { port_info.__bindgen_anon_1.intercepted_port() };
1116                                     let mut len = 4;
1117                                     // SAFETY: Accessing a union element from bindgen generated bindings.
1118                                     unsafe {
1119                                         if port_info.__bindgen_anon_1.operand_size_16bit() == 1 {
1120                                             len = 2;
1121                                         } else if port_info.__bindgen_anon_1.operand_size_8bit()
1122                                             == 1
1123                                         {
1124                                             len = 1;
1125                                         }
1126                                     }
1127                                     let is_write =
1128                                         // SAFETY: Accessing a union element from bindgen generated bindings.
1129                                         unsafe { port_info.__bindgen_anon_1.access_type() == 0 };
1130                                     // SAFETY: Accessing the field from a mapped address
1131                                     let mut data = unsafe { (*ghcb).rax.to_le_bytes() };
1132 
1133                                     if is_write {
1134                                         if let Some(vm_ops) = &self.vm_ops {
1135                                             vm_ops.pio_write(port.into(), &data[..len]).map_err(
1136                                                 |e| cpu::HypervisorCpuError::RunVcpu(e.into()),
1137                                             )?;
1138                                         }
1139                                     } else {
1140                                         if let Some(vm_ops) = &self.vm_ops {
1141                                             vm_ops
1142                                                 .pio_read(port.into(), &mut data[..len])
1143                                                 .map_err(|e| {
1144                                                     cpu::HypervisorCpuError::RunVcpu(e.into())
1145                                                 })?;
1146                                         }
1147                                         set_svm_field_u64_ptr!(ghcb, rax, u64::from_le_bytes(data));
1148                                     }
1149 
1150                                     // Clear the SW_EXIT_INFO1 register to indicate no error
1151                                     self.clear_swexit_info1()?;
1152                                 }
1153                                 SVM_EXITCODE_MMIO_READ => {
1154                                     let src_gpa =
1155                                         info.__bindgen_anon_2.__bindgen_anon_1.sw_exit_info1;
1156                                     let data_len =
1157                                         info.__bindgen_anon_2.__bindgen_anon_1.sw_exit_info2
1158                                             as usize;
1159                                     // Sanity check to make sure data len is within supported range.
1160                                     assert!(data_len <= 0x8);
1161 
1162                                     let mut data: Vec<u8> = vec![0; data_len];
1163                                     if let Some(vm_ops) = &self.vm_ops {
1164                                         vm_ops.mmio_read(src_gpa, &mut data).map_err(|e| {
1165                                             cpu::HypervisorCpuError::RunVcpu(e.into())
1166                                         })?;
1167                                     }
1168                                     // Copy the data to the shared buffer of the GHCB page
1169                                     let mut buffer_data = [0; 8];
1170                                     buffer_data[..data_len].copy_from_slice(&data[..data_len]);
1171                                     // SAFETY: Updating the value of mapped area
1172                                     unsafe { (*ghcb).shared[0] = u64::from_le_bytes(buffer_data) };
1173 
1174                                     // Clear the SW_EXIT_INFO1 register to indicate no error
1175                                     self.clear_swexit_info1()?;
1176                                 }
1177                                 SVM_EXITCODE_MMIO_WRITE => {
1178                                     let dst_gpa =
1179                                         info.__bindgen_anon_2.__bindgen_anon_1.sw_exit_info1;
1180                                     let data_len =
1181                                         info.__bindgen_anon_2.__bindgen_anon_1.sw_exit_info2
1182                                             as usize;
1183                                     // Sanity check to make sure data len is within supported range.
1184                                     assert!(data_len <= 0x8);
1185 
1186                                     let mut data = vec![0; data_len];
1187                                     // SAFETY: Accessing data from a mapped address
1188                                     let bytes_shared_ghcb =
1189                                         unsafe { (*ghcb).shared[0].to_le_bytes() };
1190                                     data.copy_from_slice(&bytes_shared_ghcb[..data_len]);
1191 
1192                                     if let Some(vm_ops) = &self.vm_ops {
1193                                         vm_ops.mmio_write(dst_gpa, &data).map_err(|e| {
1194                                             cpu::HypervisorCpuError::RunVcpu(e.into())
1195                                         })?;
1196                                     }
1197 
1198                                     // Clear the SW_EXIT_INFO1 register to indicate no error
1199                                     self.clear_swexit_info1()?;
1200                                 }
1201                                 SVM_EXITCODE_SNP_GUEST_REQUEST
1202                                 | SVM_EXITCODE_SNP_EXTENDED_GUEST_REQUEST => {
1203                                     if exit_code == SVM_EXITCODE_SNP_EXTENDED_GUEST_REQUEST {
1204                                         info!("Fetching extended guest request is not supported");
1205                                         // We don't support extended guest request, so we just write empty data.
1206                                         // This matches the behavior of KVM in Linux 6.11.
1207 
1208                                         // Read RBX from the GHCB.
1209                                         // SAFETY: Accessing data from a mapped address
1210                                         let data_gpa = unsafe { (*ghcb).rax };
1211                                         // SAFETY: Accessing data from a mapped address
1212                                         let data_npages = unsafe { (*ghcb).rbx };
1213 
1214                                         if data_npages > 0 {
1215                                             // The certificates are terminated by 24 zero bytes.
1216                                             // TODO: Need to check if data_gpa is the address of the shared buffer in the GHCB page
1217                                             // in that case we should clear the shared buffer(24 bytes)
1218                                             self.gpa_write(data_gpa, &[0; 24])?;
1219                                         }
1220                                     }
1221 
1222                                     let req_gpa =
1223                                         info.__bindgen_anon_2.__bindgen_anon_1.sw_exit_info1;
1224                                     let rsp_gpa =
1225                                         info.__bindgen_anon_2.__bindgen_anon_1.sw_exit_info2;
1226 
1227                                     let mshv_psp_req =
1228                                         mshv_issue_psp_guest_request { req_gpa, rsp_gpa };
1229                                     self.vm_fd
1230                                         .psp_issue_guest_request(&mshv_psp_req)
1231                                         .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()))?;
1232 
1233                                     debug!(
1234                                         "SNP guest request: req_gpa {:0x} rsp_gpa {:0x}",
1235                                         req_gpa, rsp_gpa
1236                                     );
1237 
1238                                     set_svm_field_u64_ptr!(ghcb, exit_info2, 0);
1239                                 }
1240                                 SVM_EXITCODE_SNP_AP_CREATION => {
1241                                     let vmsa_gpa =
1242                                         info.__bindgen_anon_2.__bindgen_anon_1.sw_exit_info2;
1243                                     let apic_id =
1244                                         info.__bindgen_anon_2.__bindgen_anon_1.sw_exit_info1 >> 32;
1245                                     debug!(
1246                                         "SNP AP CREATE REQUEST with VMSA GPA {:0x}, and APIC ID {:?}",
1247                                         vmsa_gpa, apic_id
1248                                     );
1249 
1250                                     let mshv_ap_create_req = mshv_sev_snp_ap_create {
1251                                         vp_id: apic_id,
1252                                         vmsa_gpa,
1253                                     };
1254                                     self.vm_fd
1255                                         .sev_snp_ap_create(&mshv_ap_create_req)
1256                                         .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()))?;
1257 
1258                                     // Clear the SW_EXIT_INFO1 register to indicate no error
1259                                     self.clear_swexit_info1()?;
1260                                 }
1261                                 _ => panic!(
1262                                     "GHCB_INFO_NORMAL: Unhandled exit code: {:0x}",
1263                                     exit_code
1264                                 ),
1265                             }
1266                         }
1267                         _ => panic!("Unsupported VMGEXIT operation: {:0x}", ghcb_op),
1268                     }
1269 
1270                     Ok(cpu::VmExit::Ignore)
1271                 }
1272                 exit => Err(cpu::HypervisorCpuError::RunVcpu(anyhow!(
1273                     "Unhandled VCPU exit {:?}",
1274                     exit
1275                 ))),
1276             },
1277 
1278             Err(e) => match e.errno() {
1279                 libc::EAGAIN | libc::EINTR => Ok(cpu::VmExit::Ignore),
1280                 _ => Err(cpu::HypervisorCpuError::RunVcpu(anyhow!(
1281                     "VCPU error {:?}",
1282                     e
1283                 ))),
1284             },
1285         }
1286     }
1287 
1288     #[cfg(target_arch = "aarch64")]
1289     fn init_pmu(&self, irq: u32) -> cpu::Result<()> {
1290         unimplemented!()
1291     }
1292 
1293     #[cfg(target_arch = "aarch64")]
1294     fn has_pmu_support(&self) -> bool {
1295         unimplemented!()
1296     }
1297 
1298     #[cfg(target_arch = "aarch64")]
1299     fn setup_regs(&self, cpu_id: u8, boot_ip: u64, fdt_start: u64) -> cpu::Result<()> {
1300         unimplemented!()
1301     }
1302 
1303     #[cfg(target_arch = "aarch64")]
1304     fn get_sys_reg(&self, sys_reg: u32) -> cpu::Result<u64> {
1305         unimplemented!()
1306     }
1307 
1308     #[cfg(target_arch = "aarch64")]
1309     fn get_reg_list(&self, _reg_list: &mut crate::RegList) -> cpu::Result<()> {
1310         unimplemented!()
1311     }
1312 
1313     #[cfg(target_arch = "aarch64")]
1314     fn vcpu_init(&self, _kvi: &crate::VcpuInit) -> cpu::Result<()> {
1315         unimplemented!()
1316     }
1317 
1318     #[cfg(target_arch = "aarch64")]
1319     fn set_regs(&self, _regs: &crate::StandardRegisters) -> cpu::Result<()> {
1320         unimplemented!()
1321     }
1322 
1323     #[cfg(target_arch = "aarch64")]
1324     fn get_regs(&self) -> cpu::Result<crate::StandardRegisters> {
1325         unimplemented!()
1326     }
1327 
1328     #[cfg(target_arch = "aarch64")]
1329     fn vcpu_finalize(&self, _feature: i32) -> cpu::Result<()> {
1330         unimplemented!()
1331     }
1332 
1333     #[cfg(target_arch = "aarch64")]
1334     fn vcpu_get_finalized_features(&self) -> i32 {
1335         unimplemented!()
1336     }
1337 
1338     #[cfg(target_arch = "aarch64")]
1339     fn vcpu_set_processor_features(
1340         &self,
1341         _vm: &Arc<dyn crate::Vm>,
1342         _kvi: &mut crate::VcpuInit,
1343         _id: u8,
1344     ) -> cpu::Result<()> {
1345         unimplemented!()
1346     }
1347 
1348     #[cfg(target_arch = "aarch64")]
1349     fn create_vcpu_init(&self) -> crate::VcpuInit {
1350         unimplemented!();
1351     }
1352 
1353     #[cfg(target_arch = "x86_64")]
1354     ///
1355     /// X86 specific call to setup the CPUID registers.
1356     ///
1357     fn set_cpuid2(&self, cpuid: &[CpuIdEntry]) -> cpu::Result<()> {
1358         let cpuid: Vec<mshv_bindings::hv_cpuid_entry> = cpuid.iter().map(|e| (*e).into()).collect();
1359         let mshv_cpuid = <CpuId>::from_entries(&cpuid)
1360             .map_err(|_| cpu::HypervisorCpuError::SetCpuid(anyhow!("failed to create CpuId")))?;
1361 
1362         self.fd
1363             .register_intercept_result_cpuid(&mshv_cpuid)
1364             .map_err(|e| cpu::HypervisorCpuError::SetCpuid(e.into()))
1365     }
1366 
1367     #[cfg(target_arch = "x86_64")]
1368     ///
1369     /// X86 specific call to retrieve the CPUID registers.
1370     ///
1371     fn get_cpuid2(&self, _num_entries: usize) -> cpu::Result<Vec<CpuIdEntry>> {
1372         Ok(self.cpuid.clone())
1373     }
1374 
1375     #[cfg(target_arch = "x86_64")]
1376     ///
1377     /// X86 specific call to retrieve cpuid leaf
1378     ///
1379     fn get_cpuid_values(
1380         &self,
1381         function: u32,
1382         index: u32,
1383         xfem: u64,
1384         xss: u64,
1385     ) -> cpu::Result<[u32; 4]> {
1386         self.fd
1387             .get_cpuid_values(function, index, xfem, xss)
1388             .map_err(|e| cpu::HypervisorCpuError::GetCpuidVales(e.into()))
1389     }
1390 
1391     #[cfg(target_arch = "x86_64")]
1392     ///
1393     /// Returns the state of the LAPIC (Local Advanced Programmable Interrupt Controller).
1394     ///
1395     fn get_lapic(&self) -> cpu::Result<crate::arch::x86::LapicState> {
1396         Ok(self
1397             .fd
1398             .get_lapic()
1399             .map_err(|e| cpu::HypervisorCpuError::GetlapicState(e.into()))?
1400             .into())
1401     }
1402 
1403     #[cfg(target_arch = "x86_64")]
1404     ///
1405     /// Sets the state of the LAPIC (Local Advanced Programmable Interrupt Controller).
1406     ///
1407     fn set_lapic(&self, lapic: &crate::arch::x86::LapicState) -> cpu::Result<()> {
1408         let lapic: mshv_bindings::LapicState = (*lapic).clone().into();
1409         self.fd
1410             .set_lapic(&lapic)
1411             .map_err(|e| cpu::HypervisorCpuError::SetLapicState(e.into()))
1412     }
1413 
1414     ///
1415     /// Returns the vcpu's current "multiprocessing state".
1416     ///
1417     fn get_mp_state(&self) -> cpu::Result<MpState> {
1418         Ok(MpState::Mshv)
1419     }
1420 
1421     ///
1422     /// Sets the vcpu's current "multiprocessing state".
1423     ///
1424     fn set_mp_state(&self, _mp_state: MpState) -> cpu::Result<()> {
1425         Ok(())
1426     }
1427 
1428     #[cfg(target_arch = "x86_64")]
1429     ///
1430     /// Set CPU state for x86_64 guest.
1431     ///
1432     fn set_state(&self, state: &CpuState) -> cpu::Result<()> {
1433         let mut state: VcpuMshvState = state.clone().into();
1434         self.set_msrs(&state.msrs)?;
1435         self.set_vcpu_events(&state.vcpu_events)?;
1436         self.set_regs(&state.regs.into())?;
1437         self.set_sregs(&state.sregs.into())?;
1438         self.set_fpu(&state.fpu)?;
1439         self.set_xcrs(&state.xcrs)?;
1440         // These registers are global and needed to be set only for first VCPU
1441         // as Microsoft Hypervisor allows setting this register for only one VCPU
1442         if self.vp_index == 0 {
1443             self.fd
1444                 .set_misc_regs(&state.misc)
1445                 .map_err(|e| cpu::HypervisorCpuError::SetMiscRegs(e.into()))?
1446         }
1447         self.fd
1448             .set_debug_regs(&state.dbg)
1449             .map_err(|e| cpu::HypervisorCpuError::SetDebugRegs(e.into()))?;
1450         self.fd
1451             .set_all_vp_state_components(&mut state.vp_states)
1452             .map_err(|e| cpu::HypervisorCpuError::SetAllVpStateComponents(e.into()))?;
1453         Ok(())
1454     }
1455 
1456     #[cfg(target_arch = "aarch64")]
1457     ///
1458     /// Set CPU state for aarch64 guest.
1459     ///
1460     fn set_state(&self, state: &CpuState) -> cpu::Result<()> {
1461         unimplemented!()
1462     }
1463 
1464     #[cfg(target_arch = "x86_64")]
1465     ///
1466     /// Get CPU State for x86_64 guest
1467     ///
1468     fn state(&self) -> cpu::Result<CpuState> {
1469         let regs = self.get_regs()?;
1470         let sregs = self.get_sregs()?;
1471         let xcrs = self.get_xcrs()?;
1472         let fpu = self.get_fpu()?;
1473         let vcpu_events = self.get_vcpu_events()?;
1474         let mut msrs = self.msrs.clone();
1475         self.get_msrs(&mut msrs)?;
1476         let misc = self
1477             .fd
1478             .get_misc_regs()
1479             .map_err(|e| cpu::HypervisorCpuError::GetMiscRegs(e.into()))?;
1480         let dbg = self
1481             .fd
1482             .get_debug_regs()
1483             .map_err(|e| cpu::HypervisorCpuError::GetDebugRegs(e.into()))?;
1484         let vp_states = self
1485             .fd
1486             .get_all_vp_state_components()
1487             .map_err(|e| cpu::HypervisorCpuError::GetAllVpStateComponents(e.into()))?;
1488 
1489         Ok(VcpuMshvState {
1490             msrs,
1491             vcpu_events,
1492             regs: regs.into(),
1493             sregs: sregs.into(),
1494             fpu,
1495             xcrs,
1496             dbg,
1497             misc,
1498             vp_states,
1499         }
1500         .into())
1501     }
1502 
1503     #[cfg(target_arch = "aarch64")]
1504     ///
1505     /// Get CPU state for aarch64 guest.
1506     ///
1507     fn state(&self) -> cpu::Result<CpuState> {
1508         unimplemented!()
1509     }
1510 
1511     #[cfg(target_arch = "x86_64")]
1512     ///
1513     /// Translate guest virtual address to guest physical address
1514     ///
1515     fn translate_gva(&self, gva: u64, flags: u64) -> cpu::Result<(u64, u32)> {
1516         let r = self
1517             .fd
1518             .translate_gva(gva, flags)
1519             .map_err(|e| cpu::HypervisorCpuError::TranslateVirtualAddress(e.into()))?;
1520 
1521         let gpa = r.0;
1522         // SAFETY: r is valid, otherwise this function will have returned
1523         let result_code = unsafe { r.1.__bindgen_anon_1.result_code };
1524 
1525         Ok((gpa, result_code))
1526     }
1527 
1528     #[cfg(target_arch = "x86_64")]
1529     ///
1530     /// Return the list of initial MSR entries for a VCPU
1531     ///
1532     fn boot_msr_entries(&self) -> Vec<MsrEntry> {
1533         use crate::arch::x86::{msr_index, MTRR_ENABLE, MTRR_MEM_TYPE_WB};
1534 
1535         [
1536             msr!(msr_index::MSR_IA32_SYSENTER_CS),
1537             msr!(msr_index::MSR_IA32_SYSENTER_ESP),
1538             msr!(msr_index::MSR_IA32_SYSENTER_EIP),
1539             msr!(msr_index::MSR_STAR),
1540             msr!(msr_index::MSR_CSTAR),
1541             msr!(msr_index::MSR_LSTAR),
1542             msr!(msr_index::MSR_KERNEL_GS_BASE),
1543             msr!(msr_index::MSR_SYSCALL_MASK),
1544             msr_data!(msr_index::MSR_MTRRdefType, MTRR_ENABLE | MTRR_MEM_TYPE_WB),
1545         ]
1546         .to_vec()
1547     }
1548 
1549     ///
1550     /// Sets the AMD specific vcpu's sev control register.
1551     ///
1552     #[cfg(feature = "sev_snp")]
1553     fn set_sev_control_register(&self, vmsa_pfn: u64) -> cpu::Result<()> {
1554         let sev_control_reg = snp::get_sev_control_register(vmsa_pfn);
1555 
1556         self.fd
1557             .set_sev_control_register(sev_control_reg)
1558             .map_err(|e| cpu::HypervisorCpuError::SetSevControlRegister(e.into()))
1559     }
1560     #[cfg(target_arch = "x86_64")]
1561     ///
1562     /// Trigger NMI interrupt
1563     ///
1564     fn nmi(&self) -> cpu::Result<()> {
1565         let cfg = InterruptRequest {
1566             interrupt_type: hv_interrupt_type_HV_X64_INTERRUPT_TYPE_NMI,
1567             apic_id: self.vp_index as u64,
1568             level_triggered: false,
1569             vector: 0,
1570             logical_destination_mode: false,
1571             long_mode: false,
1572         };
1573         self.vm_fd
1574             .request_virtual_interrupt(&cfg)
1575             .map_err(|e| cpu::HypervisorCpuError::Nmi(e.into()))
1576     }
1577 }
1578 
1579 impl MshvVcpu {
1580     ///
1581     /// Deactivate previously used GHCB page.
1582     ///
1583     #[cfg(feature = "sev_snp")]
1584     fn disable_prev_ghcb_page(&self) -> cpu::Result<()> {
1585         let mut reg_assocs = [hv_register_assoc {
1586             name: hv_register_name_HV_X64_REGISTER_SEV_GHCB_GPA,
1587             ..Default::default()
1588         }];
1589         self.fd.get_reg(&mut reg_assocs).unwrap();
1590         // SAFETY: Accessing a union element from bindgen generated bindings.
1591         let prev_ghcb_gpa = unsafe { reg_assocs[0].value.reg64 };
1592 
1593         debug!("Prev GHCB GPA is {:x}", prev_ghcb_gpa);
1594 
1595         let mut ghcb_gpa = hv_x64_register_sev_ghcb::default();
1596 
1597         // SAFETY: Accessing a union element from bindgen generated bindings.
1598         unsafe {
1599             ghcb_gpa.__bindgen_anon_1.set_enabled(0);
1600             ghcb_gpa.__bindgen_anon_1.set_page_number(prev_ghcb_gpa);
1601         }
1602 
1603         // SAFETY: Accessing a union element from bindgen generated bindings.
1604         let reg_name_value = unsafe {
1605             [(
1606                 hv_register_name_HV_X64_REGISTER_SEV_GHCB_GPA,
1607                 ghcb_gpa.as_uint64,
1608             )]
1609         };
1610 
1611         set_registers_64!(self.fd, reg_name_value)
1612             .map_err(|e| cpu::HypervisorCpuError::SetRegister(e.into()))?;
1613 
1614         Ok(())
1615     }
1616     #[cfg(target_arch = "x86_64")]
1617     ///
1618     /// X86 specific call that returns the vcpu's current "xcrs".
1619     ///
1620     fn get_xcrs(&self) -> cpu::Result<ExtendedControlRegisters> {
1621         self.fd
1622             .get_xcrs()
1623             .map_err(|e| cpu::HypervisorCpuError::GetXcsr(e.into()))
1624     }
1625 
1626     #[cfg(target_arch = "x86_64")]
1627     ///
1628     /// X86 specific call that sets the vcpu's current "xcrs".
1629     ///
1630     fn set_xcrs(&self, xcrs: &ExtendedControlRegisters) -> cpu::Result<()> {
1631         self.fd
1632             .set_xcrs(xcrs)
1633             .map_err(|e| cpu::HypervisorCpuError::SetXcsr(e.into()))
1634     }
1635 
1636     #[cfg(target_arch = "x86_64")]
1637     ///
1638     /// Returns currently pending exceptions, interrupts, and NMIs as well as related
1639     /// states of the vcpu.
1640     ///
1641     fn get_vcpu_events(&self) -> cpu::Result<VcpuEvents> {
1642         self.fd
1643             .get_vcpu_events()
1644             .map_err(|e| cpu::HypervisorCpuError::GetVcpuEvents(e.into()))
1645     }
1646 
1647     #[cfg(target_arch = "x86_64")]
1648     ///
1649     /// Sets pending exceptions, interrupts, and NMIs as well as related states
1650     /// of the vcpu.
1651     ///
1652     fn set_vcpu_events(&self, events: &VcpuEvents) -> cpu::Result<()> {
1653         self.fd
1654             .set_vcpu_events(events)
1655             .map_err(|e| cpu::HypervisorCpuError::SetVcpuEvents(e.into()))
1656     }
1657 
1658     ///
1659     /// Clear SW_EXIT_INFO1 register for SEV-SNP guests.
1660     ///
1661     #[cfg(feature = "sev_snp")]
1662     fn clear_swexit_info1(&self) -> std::result::Result<cpu::VmExit, cpu::HypervisorCpuError> {
1663         // Clear the SW_EXIT_INFO1 register to indicate no error
1664         // Safe to use unwrap, for sev_snp guest we already have the
1665         // GHCB pointer wrapped in the option, otherwise this place is not reached.
1666         let ghcb = self.ghcb.as_ref().unwrap().0;
1667         set_svm_field_u64_ptr!(ghcb, exit_info1, 0);
1668 
1669         Ok(cpu::VmExit::Ignore)
1670     }
1671 
1672     #[cfg(feature = "sev_snp")]
1673     fn gpa_write(&self, gpa: u64, data: &[u8]) -> cpu::Result<()> {
1674         for (gpa, chunk) in (gpa..)
1675             .step_by(HV_READ_WRITE_GPA_MAX_SIZE as usize)
1676             .zip(data.chunks(HV_READ_WRITE_GPA_MAX_SIZE as usize))
1677         {
1678             let mut data = [0; HV_READ_WRITE_GPA_MAX_SIZE as usize];
1679             data[..chunk.len()].copy_from_slice(chunk);
1680 
1681             let mut rw_gpa_arg = mshv_bindings::mshv_read_write_gpa {
1682                 base_gpa: gpa,
1683                 byte_count: chunk.len() as u32,
1684                 data,
1685                 ..Default::default()
1686             };
1687             self.fd
1688                 .gpa_write(&mut rw_gpa_arg)
1689                 .map_err(|e| cpu::HypervisorCpuError::GpaWrite(e.into()))?;
1690         }
1691 
1692         Ok(())
1693     }
1694 }
1695 
1696 /// Wrapper over Mshv VM ioctls.
1697 pub struct MshvVm {
1698     fd: Arc<VmFd>,
1699     #[cfg(target_arch = "x86_64")]
1700     msrs: Vec<MsrEntry>,
1701     dirty_log_slots: Arc<RwLock<HashMap<u64, MshvDirtyLogSlot>>>,
1702     #[cfg(feature = "sev_snp")]
1703     sev_snp_enabled: bool,
1704     #[cfg(feature = "sev_snp")]
1705     host_access_pages: ArcSwap<AtomicBitmap>,
1706 }
1707 
1708 impl MshvVm {
1709     ///
1710     /// Creates an in-kernel device.
1711     ///
1712     /// See the documentation for `MSHV_CREATE_DEVICE`.
1713     fn create_device(&self, device: &mut CreateDevice) -> vm::Result<VfioDeviceFd> {
1714         let device_fd = self
1715             .fd
1716             .create_device(device)
1717             .map_err(|e| vm::HypervisorVmError::CreateDevice(e.into()))?;
1718         Ok(VfioDeviceFd::new_from_mshv(device_fd))
1719     }
1720 }
1721 
1722 ///
1723 /// Implementation of Vm trait for Mshv
1724 ///
1725 /// # Examples
1726 ///
1727 /// ```
1728 /// extern crate hypervisor;
1729 /// use hypervisor::mshv::MshvHypervisor;
1730 /// use std::sync::Arc;
1731 /// let mshv = MshvHypervisor::new().unwrap();
1732 /// let hypervisor = Arc::new(mshv);
1733 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed");
1734 /// ```
1735 impl vm::Vm for MshvVm {
1736     #[cfg(target_arch = "x86_64")]
1737     ///
1738     /// Sets the address of the one-page region in the VM's address space.
1739     ///
1740     fn set_identity_map_address(&self, _address: u64) -> vm::Result<()> {
1741         Ok(())
1742     }
1743 
1744     #[cfg(target_arch = "x86_64")]
1745     ///
1746     /// Sets the address of the three-page region in the VM's address space.
1747     ///
1748     fn set_tss_address(&self, _offset: usize) -> vm::Result<()> {
1749         Ok(())
1750     }
1751 
1752     ///
1753     /// Creates an in-kernel interrupt controller.
1754     ///
1755     fn create_irq_chip(&self) -> vm::Result<()> {
1756         Ok(())
1757     }
1758 
1759     ///
1760     /// Registers an event that will, when signaled, trigger the `gsi` IRQ.
1761     ///
1762     fn register_irqfd(&self, fd: &EventFd, gsi: u32) -> vm::Result<()> {
1763         debug!("register_irqfd fd {} gsi {}", fd.as_raw_fd(), gsi);
1764 
1765         self.fd
1766             .register_irqfd(fd, gsi)
1767             .map_err(|e| vm::HypervisorVmError::RegisterIrqFd(e.into()))?;
1768 
1769         Ok(())
1770     }
1771 
1772     ///
1773     /// Unregisters an event that will, when signaled, trigger the `gsi` IRQ.
1774     ///
1775     fn unregister_irqfd(&self, fd: &EventFd, gsi: u32) -> vm::Result<()> {
1776         debug!("unregister_irqfd fd {} gsi {}", fd.as_raw_fd(), gsi);
1777 
1778         self.fd
1779             .unregister_irqfd(fd, gsi)
1780             .map_err(|e| vm::HypervisorVmError::UnregisterIrqFd(e.into()))?;
1781 
1782         Ok(())
1783     }
1784 
1785     ///
1786     /// Creates a VcpuFd object from a vcpu RawFd.
1787     ///
1788     fn create_vcpu(
1789         &self,
1790         id: u8,
1791         vm_ops: Option<Arc<dyn VmOps>>,
1792     ) -> vm::Result<Arc<dyn cpu::Vcpu>> {
1793         let vcpu_fd = self
1794             .fd
1795             .create_vcpu(id)
1796             .map_err(|e| vm::HypervisorVmError::CreateVcpu(e.into()))?;
1797 
1798         /* Map the GHCB page to the VMM(root) address space
1799          * The map is available after the vcpu creation. This address is mapped
1800          * to the overlay ghcb page of the Microsoft Hypervisor, don't have
1801          * to worry about the scenario when a guest changes the GHCB mapping.
1802          */
1803         #[cfg(feature = "sev_snp")]
1804         let ghcb = if self.sev_snp_enabled {
1805             // SAFETY: Safe to call as VCPU has this map already available upon creation
1806             let addr = unsafe {
1807                 libc::mmap(
1808                     std::ptr::null_mut(),
1809                     HV_PAGE_SIZE,
1810                     libc::PROT_READ | libc::PROT_WRITE,
1811                     libc::MAP_SHARED,
1812                     vcpu_fd.as_raw_fd(),
1813                     MSHV_VP_MMAP_OFFSET_GHCB as i64 * libc::sysconf(libc::_SC_PAGE_SIZE),
1814                 )
1815             };
1816             if addr == libc::MAP_FAILED {
1817                 // No point of continuing, without this mmap VMGEXIT will fail anyway
1818                 // Return error
1819                 return Err(vm::HypervisorVmError::MmapToRoot);
1820             }
1821             Some(Ghcb(addr as *mut svm_ghcb_base))
1822         } else {
1823             None
1824         };
1825         let vcpu = MshvVcpu {
1826             fd: vcpu_fd,
1827             vp_index: id,
1828             #[cfg(target_arch = "x86_64")]
1829             cpuid: Vec::new(),
1830             #[cfg(target_arch = "x86_64")]
1831             msrs: self.msrs.clone(),
1832             vm_ops,
1833             vm_fd: self.fd.clone(),
1834             #[cfg(feature = "sev_snp")]
1835             ghcb,
1836             #[cfg(feature = "sev_snp")]
1837             host_access_pages: ArcSwap::new(self.host_access_pages.load().clone()),
1838         };
1839         Ok(Arc::new(vcpu))
1840     }
1841 
1842     #[cfg(target_arch = "x86_64")]
1843     fn enable_split_irq(&self) -> vm::Result<()> {
1844         Ok(())
1845     }
1846 
1847     #[cfg(target_arch = "x86_64")]
1848     fn enable_sgx_attribute(&self, _file: File) -> vm::Result<()> {
1849         Ok(())
1850     }
1851 
1852     fn register_ioevent(
1853         &self,
1854         fd: &EventFd,
1855         addr: &IoEventAddress,
1856         datamatch: Option<DataMatch>,
1857     ) -> vm::Result<()> {
1858         #[cfg(feature = "sev_snp")]
1859         if self.sev_snp_enabled {
1860             return Ok(());
1861         }
1862 
1863         let addr = &mshv_ioctls::IoEventAddress::from(*addr);
1864         debug!(
1865             "register_ioevent fd {} addr {:x?} datamatch {:?}",
1866             fd.as_raw_fd(),
1867             addr,
1868             datamatch
1869         );
1870         if let Some(dm) = datamatch {
1871             match dm {
1872                 vm::DataMatch::DataMatch32(mshv_dm32) => self
1873                     .fd
1874                     .register_ioevent(fd, addr, mshv_dm32)
1875                     .map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into())),
1876                 vm::DataMatch::DataMatch64(mshv_dm64) => self
1877                     .fd
1878                     .register_ioevent(fd, addr, mshv_dm64)
1879                     .map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into())),
1880             }
1881         } else {
1882             self.fd
1883                 .register_ioevent(fd, addr, NoDatamatch)
1884                 .map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into()))
1885         }
1886     }
1887 
1888     /// Unregister an event from a certain address it has been previously registered to.
1889     fn unregister_ioevent(&self, fd: &EventFd, addr: &IoEventAddress) -> vm::Result<()> {
1890         #[cfg(feature = "sev_snp")]
1891         if self.sev_snp_enabled {
1892             return Ok(());
1893         }
1894 
1895         let addr = &mshv_ioctls::IoEventAddress::from(*addr);
1896         debug!("unregister_ioevent fd {} addr {:x?}", fd.as_raw_fd(), addr);
1897 
1898         self.fd
1899             .unregister_ioevent(fd, addr, NoDatamatch)
1900             .map_err(|e| vm::HypervisorVmError::UnregisterIoEvent(e.into()))
1901     }
1902 
1903     /// Creates a guest physical memory region.
1904     fn create_user_memory_region(&self, user_memory_region: UserMemoryRegion) -> vm::Result<()> {
1905         let user_memory_region: mshv_user_mem_region = user_memory_region.into();
1906         // No matter read only or not we keep track the slots.
1907         // For readonly hypervisor can enable the dirty bits,
1908         // but a VM exit happens before setting the dirty bits
1909         self.dirty_log_slots.write().unwrap().insert(
1910             user_memory_region.guest_pfn,
1911             MshvDirtyLogSlot {
1912                 guest_pfn: user_memory_region.guest_pfn,
1913                 memory_size: user_memory_region.size,
1914             },
1915         );
1916 
1917         self.fd
1918             .map_user_memory(user_memory_region)
1919             .map_err(|e| vm::HypervisorVmError::CreateUserMemory(e.into()))?;
1920         Ok(())
1921     }
1922 
1923     /// Removes a guest physical memory region.
1924     fn remove_user_memory_region(&self, user_memory_region: UserMemoryRegion) -> vm::Result<()> {
1925         let user_memory_region: mshv_user_mem_region = user_memory_region.into();
1926         // Remove the corresponding entry from "self.dirty_log_slots" if needed
1927         self.dirty_log_slots
1928             .write()
1929             .unwrap()
1930             .remove(&user_memory_region.guest_pfn);
1931 
1932         self.fd
1933             .unmap_user_memory(user_memory_region)
1934             .map_err(|e| vm::HypervisorVmError::RemoveUserMemory(e.into()))?;
1935         Ok(())
1936     }
1937 
1938     fn make_user_memory_region(
1939         &self,
1940         _slot: u32,
1941         guest_phys_addr: u64,
1942         memory_size: u64,
1943         userspace_addr: u64,
1944         readonly: bool,
1945         _log_dirty_pages: bool,
1946     ) -> UserMemoryRegion {
1947         let mut flags = 1 << MSHV_SET_MEM_BIT_EXECUTABLE;
1948         if !readonly {
1949             flags |= 1 << MSHV_SET_MEM_BIT_WRITABLE;
1950         }
1951 
1952         mshv_user_mem_region {
1953             flags,
1954             guest_pfn: guest_phys_addr >> PAGE_SHIFT,
1955             size: memory_size,
1956             userspace_addr,
1957             ..Default::default()
1958         }
1959         .into()
1960     }
1961 
1962     fn create_passthrough_device(&self) -> vm::Result<VfioDeviceFd> {
1963         let mut vfio_dev = mshv_create_device {
1964             type_: MSHV_DEV_TYPE_VFIO,
1965             fd: 0,
1966             flags: 0,
1967         };
1968 
1969         self.create_device(&mut vfio_dev)
1970             .map_err(|e| vm::HypervisorVmError::CreatePassthroughDevice(e.into()))
1971     }
1972 
1973     ///
1974     /// Constructs a routing entry
1975     ///
1976     fn make_routing_entry(&self, gsi: u32, config: &InterruptSourceConfig) -> IrqRoutingEntry {
1977         match config {
1978             InterruptSourceConfig::MsiIrq(cfg) => mshv_user_irq_entry {
1979                 gsi,
1980                 address_lo: cfg.low_addr,
1981                 address_hi: cfg.high_addr,
1982                 data: cfg.data,
1983             }
1984             .into(),
1985             _ => {
1986                 unreachable!()
1987             }
1988         }
1989     }
1990 
1991     fn set_gsi_routing(&self, entries: &[IrqRoutingEntry]) -> vm::Result<()> {
1992         let mut msi_routing =
1993             vec_with_array_field::<mshv_user_irq_table, mshv_user_irq_entry>(entries.len());
1994         msi_routing[0].nr = entries.len() as u32;
1995 
1996         let entries: Vec<mshv_user_irq_entry> = entries
1997             .iter()
1998             .map(|entry| match entry {
1999                 IrqRoutingEntry::Mshv(e) => *e,
2000                 #[allow(unreachable_patterns)]
2001                 _ => panic!("IrqRoutingEntry type is wrong"),
2002             })
2003             .collect();
2004 
2005         // SAFETY: msi_routing initialized with entries.len() and now it is being turned into
2006         // entries_slice with entries.len() again. It is guaranteed to be large enough to hold
2007         // everything from entries.
2008         unsafe {
2009             let entries_slice: &mut [mshv_user_irq_entry] =
2010                 msi_routing[0].entries.as_mut_slice(entries.len());
2011             entries_slice.copy_from_slice(&entries);
2012         }
2013 
2014         self.fd
2015             .set_msi_routing(&msi_routing[0])
2016             .map_err(|e| vm::HypervisorVmError::SetGsiRouting(e.into()))
2017     }
2018 
2019     ///
2020     /// Start logging dirty pages
2021     ///
2022     fn start_dirty_log(&self) -> vm::Result<()> {
2023         self.fd
2024             .enable_dirty_page_tracking()
2025             .map_err(|e| vm::HypervisorVmError::StartDirtyLog(e.into()))
2026     }
2027 
2028     ///
2029     /// Stop logging dirty pages
2030     ///
2031     fn stop_dirty_log(&self) -> vm::Result<()> {
2032         let dirty_log_slots = self.dirty_log_slots.read().unwrap();
2033         // Before disabling the dirty page tracking we need
2034         // to set the dirty bits in the Hypervisor
2035         // This is a requirement from Microsoft Hypervisor
2036         for (_, s) in dirty_log_slots.iter() {
2037             self.fd
2038                 .get_dirty_log(
2039                     s.guest_pfn,
2040                     s.memory_size as usize,
2041                     MSHV_GPAP_ACCESS_OP_SET as u8,
2042                 )
2043                 .map_err(|e| vm::HypervisorVmError::StartDirtyLog(e.into()))?;
2044         }
2045         self.fd
2046             .disable_dirty_page_tracking()
2047             .map_err(|e| vm::HypervisorVmError::StartDirtyLog(e.into()))?;
2048         Ok(())
2049     }
2050 
2051     ///
2052     /// Get dirty pages bitmap (one bit per page)
2053     ///
2054     fn get_dirty_log(&self, _slot: u32, base_gpa: u64, memory_size: u64) -> vm::Result<Vec<u64>> {
2055         self.fd
2056             .get_dirty_log(
2057                 base_gpa >> PAGE_SHIFT,
2058                 memory_size as usize,
2059                 MSHV_GPAP_ACCESS_OP_CLEAR as u8,
2060             )
2061             .map_err(|e| vm::HypervisorVmError::GetDirtyLog(e.into()))
2062     }
2063 
2064     /// Retrieve guest clock.
2065     #[cfg(target_arch = "x86_64")]
2066     fn get_clock(&self) -> vm::Result<ClockData> {
2067         let val = self
2068             .fd
2069             .get_partition_property(hv_partition_property_code_HV_PARTITION_PROPERTY_REFERENCE_TIME)
2070             .map_err(|e| vm::HypervisorVmError::GetClock(e.into()))?;
2071         Ok(MshvClockData { ref_time: val }.into())
2072     }
2073 
2074     /// Set guest clock.
2075     #[cfg(target_arch = "x86_64")]
2076     fn set_clock(&self, data: &ClockData) -> vm::Result<()> {
2077         let data: MshvClockData = (*data).into();
2078         self.fd
2079             .set_partition_property(
2080                 hv_partition_property_code_HV_PARTITION_PROPERTY_REFERENCE_TIME,
2081                 data.ref_time,
2082             )
2083             .map_err(|e| vm::HypervisorVmError::SetClock(e.into()))
2084     }
2085 
2086     /// Downcast to the underlying MshvVm type
2087     fn as_any(&self) -> &dyn Any {
2088         self
2089     }
2090 
2091     /// Initialize the SEV-SNP VM
2092     #[cfg(feature = "sev_snp")]
2093     fn sev_snp_init(&self) -> vm::Result<()> {
2094         self.fd
2095             .set_partition_property(
2096                 hv_partition_property_code_HV_PARTITION_PROPERTY_ISOLATION_STATE,
2097                 hv_partition_isolation_state_HV_PARTITION_ISOLATION_SECURE as u64,
2098             )
2099             .map_err(|e| vm::HypervisorVmError::InitializeSevSnp(e.into()))
2100     }
2101 
2102     ///
2103     /// Importing isolated pages, these pages will be used
2104     /// for the PSP(Platform Security Processor) measurement.
2105     #[cfg(feature = "sev_snp")]
2106     fn import_isolated_pages(
2107         &self,
2108         page_type: u32,
2109         page_size: u32,
2110         pages: &[u64],
2111     ) -> vm::Result<()> {
2112         debug_assert!(page_size == hv_isolated_page_size_HV_ISOLATED_PAGE_SIZE_4KB);
2113         if pages.is_empty() {
2114             return Ok(());
2115         }
2116 
2117         let mut isolated_pages =
2118             vec_with_array_field::<mshv_import_isolated_pages, u64>(pages.len());
2119         isolated_pages[0].page_type = page_type as u8;
2120         isolated_pages[0].page_count = pages.len() as u64;
2121         // SAFETY: isolated_pages initialized with pages.len() and now it is being turned into
2122         // pages_slice with pages.len() again. It is guaranteed to be large enough to hold
2123         // everything from pages.
2124         unsafe {
2125             let pages_slice: &mut [u64] = isolated_pages[0].guest_pfns.as_mut_slice(pages.len());
2126             pages_slice.copy_from_slice(pages);
2127         }
2128         self.fd
2129             .import_isolated_pages(&isolated_pages[0])
2130             .map_err(|e| vm::HypervisorVmError::ImportIsolatedPages(e.into()))
2131     }
2132 
2133     ///
2134     /// Complete isolated import, telling the hypervisor that
2135     /// importing the pages to guest memory is complete.
2136     ///
2137     #[cfg(feature = "sev_snp")]
2138     fn complete_isolated_import(
2139         &self,
2140         snp_id_block: IGVM_VHS_SNP_ID_BLOCK,
2141         host_data: [u8; 32],
2142         id_block_enabled: u8,
2143     ) -> vm::Result<()> {
2144         let mut auth_info = hv_snp_id_auth_info {
2145             id_key_algorithm: snp_id_block.id_key_algorithm,
2146             auth_key_algorithm: snp_id_block.author_key_algorithm,
2147             ..Default::default()
2148         };
2149         // Each of r/s component is 576 bits long
2150         auth_info.id_block_signature[..SIG_R_COMPONENT_SIZE_IN_BYTES]
2151             .copy_from_slice(snp_id_block.id_key_signature.r_comp.as_ref());
2152         auth_info.id_block_signature
2153             [SIG_R_COMPONENT_SIZE_IN_BYTES..SIG_R_AND_S_COMPONENT_SIZE_IN_BYTES]
2154             .copy_from_slice(snp_id_block.id_key_signature.s_comp.as_ref());
2155         auth_info.id_key[..ECDSA_CURVE_ID_SIZE_IN_BYTES]
2156             .copy_from_slice(snp_id_block.id_public_key.curve.to_le_bytes().as_ref());
2157         auth_info.id_key[ECDSA_SIG_X_COMPONENT_START..ECDSA_SIG_X_COMPONENT_END]
2158             .copy_from_slice(snp_id_block.id_public_key.qx.as_ref());
2159         auth_info.id_key[ECDSA_SIG_Y_COMPONENT_START..ECDSA_SIG_Y_COMPONENT_END]
2160             .copy_from_slice(snp_id_block.id_public_key.qy.as_ref());
2161 
2162         let data = mshv_complete_isolated_import {
2163             import_data: hv_partition_complete_isolated_import_data {
2164                 psp_parameters: hv_psp_launch_finish_data {
2165                     id_block: hv_snp_id_block {
2166                         launch_digest: snp_id_block.ld,
2167                         family_id: snp_id_block.family_id,
2168                         image_id: snp_id_block.image_id,
2169                         version: snp_id_block.version,
2170                         guest_svn: snp_id_block.guest_svn,
2171                         policy: get_default_snp_guest_policy(),
2172                     },
2173                     id_auth_info: auth_info,
2174                     host_data,
2175                     id_block_enabled,
2176                     author_key_enabled: 0,
2177                 },
2178             },
2179         };
2180         self.fd
2181             .complete_isolated_import(&data)
2182             .map_err(|e| vm::HypervisorVmError::CompleteIsolatedImport(e.into()))
2183     }
2184 
2185     #[cfg(target_arch = "aarch64")]
2186     fn create_vgic(&self, config: VgicConfig) -> vm::Result<Arc<Mutex<dyn Vgic>>> {
2187         unimplemented!()
2188     }
2189 
2190     #[cfg(target_arch = "aarch64")]
2191     fn get_preferred_target(&self, _kvi: &mut crate::VcpuInit) -> vm::Result<()> {
2192         unimplemented!()
2193     }
2194 
2195     /// Pause the VM
2196     fn pause(&self) -> vm::Result<()> {
2197         // Freeze the partition
2198         self.fd
2199             .set_partition_property(
2200                 hv_partition_property_code_HV_PARTITION_PROPERTY_TIME_FREEZE,
2201                 1u64,
2202             )
2203             .map_err(|e| {
2204                 vm::HypervisorVmError::SetVmProperty(anyhow!(
2205                     "Failed to set partition property: {}",
2206                     e
2207                 ))
2208             })
2209     }
2210 
2211     /// Resume the VM
2212     fn resume(&self) -> vm::Result<()> {
2213         // Resuming the partition using TIME_FREEZE property
2214         self.fd
2215             .set_partition_property(
2216                 hv_partition_property_code_HV_PARTITION_PROPERTY_TIME_FREEZE,
2217                 0u64,
2218             )
2219             .map_err(|e| {
2220                 vm::HypervisorVmError::SetVmProperty(anyhow!(
2221                     "Failed to set partition property: {}",
2222                     e
2223                 ))
2224             })
2225     }
2226 
2227     #[cfg(feature = "sev_snp")]
2228     fn gain_page_access(&self, gpa: u64, size: u32) -> vm::Result<()> {
2229         use mshv_ioctls::set_bits;
2230         const ONE_GB: usize = 1024 * 1024 * 1024;
2231 
2232         if !self.sev_snp_enabled {
2233             return Ok(());
2234         }
2235 
2236         let start_gpfn: u64 = gpa >> PAGE_SHIFT;
2237         let end_gpfn: u64 = (gpa + size as u64 - 1) >> PAGE_SHIFT;
2238 
2239         // Enlarge the bitmap if the PFN is greater than the bitmap length
2240         if end_gpfn >= self.host_access_pages.load().as_ref().len() as u64 {
2241             self.host_access_pages.rcu(|bitmap| {
2242                 let mut bm = bitmap.as_ref().clone();
2243                 bm.enlarge(ONE_GB);
2244                 bm
2245             });
2246         }
2247 
2248         let gpas: Vec<u64> = (start_gpfn..=end_gpfn)
2249             .filter(|x| {
2250                 !self
2251                     .host_access_pages
2252                     .load()
2253                     .as_ref()
2254                     .is_bit_set(*x as usize)
2255             })
2256             .map(|x| x << PAGE_SHIFT)
2257             .collect();
2258 
2259         if !gpas.is_empty() {
2260             let mut gpa_list = vec_with_array_field::<mshv_modify_gpa_host_access, u64>(gpas.len());
2261             gpa_list[0].page_count = gpas.len() as u64;
2262             gpa_list[0].flags = set_bits!(
2263                 u8,
2264                 MSHV_GPA_HOST_ACCESS_BIT_ACQUIRE,
2265                 MSHV_GPA_HOST_ACCESS_BIT_READABLE,
2266                 MSHV_GPA_HOST_ACCESS_BIT_WRITABLE
2267             );
2268 
2269             // SAFETY: gpa_list initialized with gpas.len() and now it is being turned into
2270             // gpas_slice with gpas.len() again. It is guaranteed to be large enough to hold
2271             // everything from gpas.
2272             unsafe {
2273                 let gpas_slice: &mut [u64] = gpa_list[0].guest_pfns.as_mut_slice(gpas.len());
2274                 gpas_slice.copy_from_slice(gpas.as_slice());
2275             }
2276 
2277             self.fd
2278                 .modify_gpa_host_access(&gpa_list[0])
2279                 .map_err(|e| vm::HypervisorVmError::ModifyGpaHostAccess(e.into()))?;
2280 
2281             for acquired_gpa in gpas {
2282                 self.host_access_pages.rcu(|bitmap| {
2283                     let bm = bitmap.clone();
2284                     bm.set_bit((acquired_gpa >> PAGE_SHIFT) as usize);
2285                     bm
2286                 });
2287             }
2288         }
2289 
2290         Ok(())
2291     }
2292 }
2293