xref: /cloud-hypervisor/hypervisor/src/mshv/mod.rs (revision 6768a13d9544b9960231a33bfd422d4dc2398b79)
1 // SPDX-License-Identifier: Apache-2.0 OR BSD-3-Clause
2 //
3 // Copyright © 2020, Microsoft Corporation
4 //
5 
6 use std::any::Any;
7 use std::collections::HashMap;
8 #[cfg(feature = "sev_snp")]
9 use std::num::NonZeroUsize;
10 use std::sync::{Arc, RwLock};
11 
12 #[cfg(feature = "sev_snp")]
13 use arc_swap::ArcSwap;
14 use mshv_bindings::*;
15 use mshv_ioctls::{set_registers_64, InterruptRequest, Mshv, NoDatamatch, VcpuFd, VmFd, VmType};
16 use vfio_ioctls::VfioDeviceFd;
17 use vm::DataMatch;
18 #[cfg(feature = "sev_snp")]
19 use vm_memory::bitmap::AtomicBitmap;
20 
21 use crate::arch::emulator::PlatformEmulator;
22 #[cfg(target_arch = "x86_64")]
23 use crate::arch::x86::emulator::Emulator;
24 use crate::mshv::emulator::MshvEmulatorContext;
25 use crate::vm::{self, InterruptSourceConfig, VmOps};
26 use crate::{cpu, hypervisor, vec_with_array_field, HypervisorType};
27 #[cfg(feature = "sev_snp")]
28 mod snp_constants;
29 // x86_64 dependencies
30 #[cfg(target_arch = "x86_64")]
31 pub mod x86_64;
32 // aarch64 dependencies
33 #[cfg(target_arch = "aarch64")]
34 pub mod aarch64;
35 #[cfg(target_arch = "x86_64")]
36 use std::fs::File;
37 use std::os::unix::io::AsRawFd;
38 #[cfg(target_arch = "aarch64")]
39 use std::sync::Mutex;
40 
41 #[cfg(target_arch = "aarch64")]
42 pub use aarch64::VcpuMshvState;
43 #[cfg(feature = "sev_snp")]
44 use igvm_defs::IGVM_VHS_SNP_ID_BLOCK;
45 #[cfg(feature = "sev_snp")]
46 use snp_constants::*;
47 use vmm_sys_util::eventfd::EventFd;
48 #[cfg(target_arch = "x86_64")]
49 pub use x86_64::*;
50 #[cfg(target_arch = "x86_64")]
51 pub use x86_64::{emulator, VcpuMshvState};
52 ///
53 /// Export generically-named wrappers of mshv-bindings for Unix-based platforms
54 ///
55 pub use {
56     mshv_bindings::mshv_create_device as CreateDevice,
57     mshv_bindings::mshv_device_attr as DeviceAttr, mshv_ioctls, mshv_ioctls::DeviceFd,
58 };
59 
60 #[cfg(target_arch = "x86_64")]
61 use crate::arch::x86::{CpuIdEntry, FpuState, MsrEntry};
62 #[cfg(target_arch = "x86_64")]
63 use crate::ClockData;
64 use crate::{
65     CpuState, IoEventAddress, IrqRoutingEntry, MpState, UserMemoryRegion,
66     USER_MEMORY_REGION_ADJUSTABLE, USER_MEMORY_REGION_EXECUTE, USER_MEMORY_REGION_READ,
67     USER_MEMORY_REGION_WRITE,
68 };
69 
70 pub const PAGE_SHIFT: usize = 12;
71 
72 impl From<mshv_user_mem_region> for UserMemoryRegion {
73     fn from(region: mshv_user_mem_region) -> Self {
74         let mut flags: u32 = USER_MEMORY_REGION_READ | USER_MEMORY_REGION_ADJUSTABLE;
75         if region.flags & (1 << MSHV_SET_MEM_BIT_WRITABLE) != 0 {
76             flags |= USER_MEMORY_REGION_WRITE;
77         }
78         if region.flags & (1 << MSHV_SET_MEM_BIT_EXECUTABLE) != 0 {
79             flags |= USER_MEMORY_REGION_EXECUTE;
80         }
81 
82         UserMemoryRegion {
83             guest_phys_addr: (region.guest_pfn << PAGE_SHIFT as u64)
84                 + (region.userspace_addr & ((1 << PAGE_SHIFT) - 1)),
85             memory_size: region.size,
86             userspace_addr: region.userspace_addr,
87             flags,
88             ..Default::default()
89         }
90     }
91 }
92 
93 #[cfg(target_arch = "x86_64")]
94 impl From<MshvClockData> for ClockData {
95     fn from(d: MshvClockData) -> Self {
96         ClockData::Mshv(d)
97     }
98 }
99 
100 #[cfg(target_arch = "x86_64")]
101 impl From<ClockData> for MshvClockData {
102     fn from(ms: ClockData) -> Self {
103         match ms {
104             ClockData::Mshv(s) => s,
105             /* Needed in case other hypervisors are enabled */
106             #[allow(unreachable_patterns)]
107             _ => unreachable!("MSHV clock data is not valid"),
108         }
109     }
110 }
111 
112 impl From<UserMemoryRegion> for mshv_user_mem_region {
113     fn from(region: UserMemoryRegion) -> Self {
114         let mut flags: u8 = 0;
115         if region.flags & USER_MEMORY_REGION_WRITE != 0 {
116             flags |= 1 << MSHV_SET_MEM_BIT_WRITABLE;
117         }
118         if region.flags & USER_MEMORY_REGION_EXECUTE != 0 {
119             flags |= 1 << MSHV_SET_MEM_BIT_EXECUTABLE;
120         }
121 
122         mshv_user_mem_region {
123             guest_pfn: region.guest_phys_addr >> PAGE_SHIFT,
124             size: region.memory_size,
125             userspace_addr: region.userspace_addr,
126             flags,
127             ..Default::default()
128         }
129     }
130 }
131 
132 impl From<mshv_ioctls::IoEventAddress> for IoEventAddress {
133     fn from(a: mshv_ioctls::IoEventAddress) -> Self {
134         match a {
135             mshv_ioctls::IoEventAddress::Pio(x) => Self::Pio(x),
136             mshv_ioctls::IoEventAddress::Mmio(x) => Self::Mmio(x),
137         }
138     }
139 }
140 
141 impl From<IoEventAddress> for mshv_ioctls::IoEventAddress {
142     fn from(a: IoEventAddress) -> Self {
143         match a {
144             IoEventAddress::Pio(x) => Self::Pio(x),
145             IoEventAddress::Mmio(x) => Self::Mmio(x),
146         }
147     }
148 }
149 
150 impl From<VcpuMshvState> for CpuState {
151     fn from(s: VcpuMshvState) -> Self {
152         CpuState::Mshv(s)
153     }
154 }
155 
156 impl From<CpuState> for VcpuMshvState {
157     fn from(s: CpuState) -> Self {
158         match s {
159             CpuState::Mshv(s) => s,
160             /* Needed in case other hypervisors are enabled */
161             #[allow(unreachable_patterns)]
162             _ => panic!("CpuState is not valid"),
163         }
164     }
165 }
166 
167 impl From<mshv_bindings::StandardRegisters> for crate::StandardRegisters {
168     fn from(s: mshv_bindings::StandardRegisters) -> Self {
169         crate::StandardRegisters::Mshv(s)
170     }
171 }
172 
173 impl From<crate::StandardRegisters> for mshv_bindings::StandardRegisters {
174     fn from(e: crate::StandardRegisters) -> Self {
175         match e {
176             crate::StandardRegisters::Mshv(e) => e,
177             /* Needed in case other hypervisors are enabled */
178             #[allow(unreachable_patterns)]
179             _ => panic!("StandardRegisters are not valid"),
180         }
181     }
182 }
183 
184 impl From<mshv_user_irq_entry> for IrqRoutingEntry {
185     fn from(s: mshv_user_irq_entry) -> Self {
186         IrqRoutingEntry::Mshv(s)
187     }
188 }
189 
190 impl From<IrqRoutingEntry> for mshv_user_irq_entry {
191     fn from(e: IrqRoutingEntry) -> Self {
192         match e {
193             IrqRoutingEntry::Mshv(e) => e,
194             /* Needed in case other hypervisors are enabled */
195             #[allow(unreachable_patterns)]
196             _ => panic!("IrqRoutingEntry is not valid"),
197         }
198     }
199 }
200 
201 #[cfg(target_arch = "aarch64")]
202 impl From<mshv_bindings::MshvRegList> for crate::RegList {
203     fn from(s: mshv_bindings::MshvRegList) -> Self {
204         crate::RegList::Mshv(s)
205     }
206 }
207 
208 #[cfg(target_arch = "aarch64")]
209 impl From<crate::RegList> for mshv_bindings::MshvRegList {
210     fn from(e: crate::RegList) -> Self {
211         match e {
212             crate::RegList::Mshv(e) => e,
213             /* Needed in case other hypervisors are enabled */
214             #[allow(unreachable_patterns)]
215             _ => panic!("RegList is not valid"),
216         }
217     }
218 }
219 
220 #[cfg(target_arch = "aarch64")]
221 impl From<mshv_bindings::MshvVcpuInit> for crate::VcpuInit {
222     fn from(s: mshv_bindings::MshvVcpuInit) -> Self {
223         crate::VcpuInit::Mshv(s)
224     }
225 }
226 
227 #[cfg(target_arch = "aarch64")]
228 impl From<crate::VcpuInit> for mshv_bindings::MshvVcpuInit {
229     fn from(e: crate::VcpuInit) -> Self {
230         match e {
231             crate::VcpuInit::Mshv(e) => e,
232             /* Needed in case other hypervisors are enabled */
233             #[allow(unreachable_patterns)]
234             _ => panic!("VcpuInit is not valid"),
235         }
236     }
237 }
238 
239 struct MshvDirtyLogSlot {
240     guest_pfn: u64,
241     memory_size: u64,
242 }
243 
244 /// Wrapper over mshv system ioctls.
245 pub struct MshvHypervisor {
246     mshv: Mshv,
247 }
248 
249 impl MshvHypervisor {
250     #[cfg(target_arch = "x86_64")]
251     ///
252     /// Retrieve the list of MSRs supported by MSHV.
253     ///
254     fn get_msr_list(&self) -> hypervisor::Result<MsrList> {
255         self.mshv
256             .get_msr_index_list()
257             .map_err(|e| hypervisor::HypervisorError::GetMsrList(e.into()))
258     }
259 
260     fn create_vm_with_type_and_memory_int(
261         &self,
262         vm_type: u64,
263         #[cfg(feature = "sev_snp")] _mem_size: Option<u64>,
264     ) -> hypervisor::Result<Arc<dyn crate::Vm>> {
265         let mshv_vm_type: VmType = match VmType::try_from(vm_type) {
266             Ok(vm_type) => vm_type,
267             Err(_) => return Err(hypervisor::HypervisorError::UnsupportedVmType()),
268         };
269         let fd: VmFd;
270         loop {
271             match self.mshv.create_vm_with_type(mshv_vm_type) {
272                 Ok(res) => fd = res,
273                 Err(e) => {
274                     if e.errno() == libc::EINTR {
275                         // If the error returned is EINTR, which means the
276                         // ioctl has been interrupted, we have to retry as
277                         // this can't be considered as a regular error.
278                         continue;
279                     } else {
280                         return Err(hypervisor::HypervisorError::VmCreate(e.into()));
281                     }
282                 }
283             }
284             break;
285         }
286 
287         // Set additional partition property for SEV-SNP partition.
288         #[cfg(target_arch = "x86_64")]
289         if mshv_vm_type == VmType::Snp {
290             let snp_policy = snp::get_default_snp_guest_policy();
291             let vmgexit_offloads = snp::get_default_vmgexit_offload_features();
292             // SAFETY: access union fields
293             unsafe {
294                 debug!(
295                     "Setting the partition isolation policy as: 0x{:x}",
296                     snp_policy.as_uint64
297                 );
298                 fd.set_partition_property(
299                     hv_partition_property_code_HV_PARTITION_PROPERTY_ISOLATION_POLICY,
300                     snp_policy.as_uint64,
301                 )
302                 .map_err(|e| hypervisor::HypervisorError::SetPartitionProperty(e.into()))?;
303                 debug!(
304                     "Setting the partition property to enable VMGEXIT offloads as : 0x{:x}",
305                     vmgexit_offloads.as_uint64
306                 );
307                 fd.set_partition_property(
308                     hv_partition_property_code_HV_PARTITION_PROPERTY_SEV_VMGEXIT_OFFLOADS,
309                     vmgexit_offloads.as_uint64,
310                 )
311                 .map_err(|e| hypervisor::HypervisorError::SetPartitionProperty(e.into()))?;
312             }
313         }
314 
315         // Default Microsoft Hypervisor behavior for unimplemented MSR is to
316         // send a fault to the guest if it tries to access it. It is possible
317         // to override this behavior with a more suitable option i.e., ignore
318         // writes from the guest and return zero in attempt to read unimplemented
319         // MSR.
320         #[cfg(target_arch = "x86_64")]
321         fd.set_partition_property(
322             hv_partition_property_code_HV_PARTITION_PROPERTY_UNIMPLEMENTED_MSR_ACTION,
323             hv_unimplemented_msr_action_HV_UNIMPLEMENTED_MSR_ACTION_IGNORE_WRITE_READ_ZERO as u64,
324         )
325         .map_err(|e| hypervisor::HypervisorError::SetPartitionProperty(e.into()))?;
326 
327         // Always create a frozen partition
328         fd.set_partition_property(
329             hv_partition_property_code_HV_PARTITION_PROPERTY_TIME_FREEZE,
330             1u64,
331         )
332         .map_err(|e| hypervisor::HypervisorError::SetPartitionProperty(e.into()))?;
333 
334         let vm_fd = Arc::new(fd);
335 
336         #[cfg(target_arch = "x86_64")]
337         {
338             let msr_list = self.get_msr_list()?;
339             let num_msrs = msr_list.as_fam_struct_ref().nmsrs as usize;
340             let mut msrs: Vec<MsrEntry> = vec![
341                 MsrEntry {
342                     ..Default::default()
343                 };
344                 num_msrs
345             ];
346             let indices = msr_list.as_slice();
347             for (pos, index) in indices.iter().enumerate() {
348                 msrs[pos].index = *index;
349             }
350 
351             Ok(Arc::new(MshvVm {
352                 fd: vm_fd,
353                 msrs,
354                 dirty_log_slots: Arc::new(RwLock::new(HashMap::new())),
355                 #[cfg(feature = "sev_snp")]
356                 sev_snp_enabled: mshv_vm_type == VmType::Snp,
357                 #[cfg(feature = "sev_snp")]
358                 host_access_pages: ArcSwap::new(
359                     AtomicBitmap::new(
360                         _mem_size.unwrap_or_default() as usize,
361                         NonZeroUsize::new(HV_PAGE_SIZE).unwrap(),
362                     )
363                     .into(),
364                 ),
365             }))
366         }
367 
368         #[cfg(target_arch = "aarch64")]
369         {
370             Ok(Arc::new(MshvVm {
371                 fd: vm_fd,
372                 dirty_log_slots: Arc::new(RwLock::new(HashMap::new())),
373             }))
374         }
375     }
376 }
377 
378 impl MshvHypervisor {
379     /// Create a hypervisor based on Mshv
380     #[allow(clippy::new_ret_no_self)]
381     pub fn new() -> hypervisor::Result<Arc<dyn hypervisor::Hypervisor>> {
382         let mshv_obj =
383             Mshv::new().map_err(|e| hypervisor::HypervisorError::HypervisorCreate(e.into()))?;
384         Ok(Arc::new(MshvHypervisor { mshv: mshv_obj }))
385     }
386     /// Check if the hypervisor is available
387     pub fn is_available() -> hypervisor::Result<bool> {
388         match std::fs::metadata("/dev/mshv") {
389             Ok(_) => Ok(true),
390             Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(false),
391             Err(err) => Err(hypervisor::HypervisorError::HypervisorAvailableCheck(
392                 err.into(),
393             )),
394         }
395     }
396 }
397 
398 /// Implementation of Hypervisor trait for Mshv
399 ///
400 /// # Examples
401 ///
402 /// ```
403 /// use hypervisor::mshv::MshvHypervisor;
404 /// use std::sync::Arc;
405 /// let mshv = MshvHypervisor::new().unwrap();
406 /// let hypervisor = Arc::new(mshv);
407 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed");
408 /// ```
409 impl hypervisor::Hypervisor for MshvHypervisor {
410     ///
411     /// Returns the type of the hypervisor
412     ///
413     fn hypervisor_type(&self) -> HypervisorType {
414         HypervisorType::Mshv
415     }
416 
417     ///
418     /// Create a Vm of a specific type using the underlying hypervisor, passing memory size
419     /// Return a hypervisor-agnostic Vm trait object
420     ///
421     /// # Examples
422     ///
423     /// ```
424     /// use hypervisor::kvm::KvmHypervisor;
425     /// use hypervisor::kvm::KvmVm;
426     /// let hypervisor = KvmHypervisor::new().unwrap();
427     /// let vm = hypervisor.create_vm_with_type(0, 512*1024*1024).unwrap();
428     /// ```
429     fn create_vm_with_type_and_memory(
430         &self,
431         vm_type: u64,
432         #[cfg(feature = "sev_snp")] _mem_size: u64,
433     ) -> hypervisor::Result<Arc<dyn vm::Vm>> {
434         self.create_vm_with_type_and_memory_int(
435             vm_type,
436             #[cfg(feature = "sev_snp")]
437             Some(_mem_size),
438         )
439     }
440 
441     fn create_vm_with_type(&self, vm_type: u64) -> hypervisor::Result<Arc<dyn crate::Vm>> {
442         self.create_vm_with_type_and_memory_int(
443             vm_type,
444             #[cfg(feature = "sev_snp")]
445             None,
446         )
447     }
448 
449     /// Create a mshv vm object and return the object as Vm trait object
450     ///
451     /// # Examples
452     ///
453     /// ```
454     /// # extern crate hypervisor;
455     /// use hypervisor::mshv::MshvHypervisor;
456     /// use hypervisor::mshv::MshvVm;
457     /// let hypervisor = MshvHypervisor::new().unwrap();
458     /// let vm = hypervisor.create_vm().unwrap();
459     /// ```
460     fn create_vm(&self) -> hypervisor::Result<Arc<dyn vm::Vm>> {
461         let vm_type = 0;
462         self.create_vm_with_type(vm_type)
463     }
464     #[cfg(target_arch = "x86_64")]
465     ///
466     /// Get the supported CpuID
467     ///
468     fn get_supported_cpuid(&self) -> hypervisor::Result<Vec<CpuIdEntry>> {
469         let mut cpuid = Vec::new();
470         let functions: [u32; 2] = [0x1, 0xb];
471 
472         for function in functions {
473             cpuid.push(CpuIdEntry {
474                 function,
475                 ..Default::default()
476             });
477         }
478         Ok(cpuid)
479     }
480 
481     /// Get maximum number of vCPUs
482     fn get_max_vcpus(&self) -> u32 {
483         // TODO: Using HV_MAXIMUM_PROCESSORS would be better
484         // but the ioctl API is limited to u8
485         256
486     }
487 
488     fn get_guest_debug_hw_bps(&self) -> usize {
489         0
490     }
491 
492     #[cfg(target_arch = "aarch64")]
493     ///
494     /// Retrieve AArch64 host maximum IPA size supported by MSHV.
495     ///
496     fn get_host_ipa_limit(&self) -> i32 {
497         let host_ipa = self.mshv.get_host_partition_property(
498             hv_partition_property_code_HV_PARTITION_PROPERTY_PHYSICAL_ADDRESS_WIDTH as u64,
499         );
500 
501         match host_ipa {
502             Ok(ipa) => ipa,
503             Err(e) => {
504                 panic!("Failed to get host IPA limit: {:?}", e);
505             }
506         }
507     }
508 }
509 
510 #[cfg(feature = "sev_snp")]
511 struct Ghcb(*mut svm_ghcb_base);
512 
513 #[cfg(feature = "sev_snp")]
514 // SAFETY: struct is based on GHCB page in the hypervisor,
515 // safe to Send across threads
516 unsafe impl Send for Ghcb {}
517 
518 #[cfg(feature = "sev_snp")]
519 // SAFETY: struct is based on GHCB page in the hypervisor,
520 // safe to Sync across threads as this is only required for Vcpu trait
521 // functionally not used anyway
522 unsafe impl Sync for Ghcb {}
523 
524 /// Vcpu struct for Microsoft Hypervisor
525 pub struct MshvVcpu {
526     fd: VcpuFd,
527     vp_index: u8,
528     #[cfg(target_arch = "x86_64")]
529     cpuid: Vec<CpuIdEntry>,
530     #[cfg(target_arch = "x86_64")]
531     msrs: Vec<MsrEntry>,
532     vm_ops: Option<Arc<dyn vm::VmOps>>,
533     vm_fd: Arc<VmFd>,
534     #[cfg(feature = "sev_snp")]
535     ghcb: Option<Ghcb>,
536     #[cfg(feature = "sev_snp")]
537     host_access_pages: ArcSwap<AtomicBitmap>,
538 }
539 
540 /// Implementation of Vcpu trait for Microsoft Hypervisor
541 ///
542 /// # Examples
543 ///
544 /// ```
545 /// use hypervisor::mshv::MshvHypervisor;
546 /// use std::sync::Arc;
547 /// let mshv = MshvHypervisor::new().unwrap();
548 /// let hypervisor = Arc::new(mshv);
549 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed");
550 /// let vcpu = vm.create_vcpu(0, None).unwrap();
551 /// ```
552 impl cpu::Vcpu for MshvVcpu {
553     ///
554     /// Returns StandardRegisters with default value set
555     ///
556     #[cfg(target_arch = "x86_64")]
557     fn create_standard_regs(&self) -> crate::StandardRegisters {
558         mshv_bindings::StandardRegisters::default().into()
559     }
560     #[cfg(target_arch = "x86_64")]
561     ///
562     /// Returns the vCPU general purpose registers.
563     ///
564     fn get_regs(&self) -> cpu::Result<crate::StandardRegisters> {
565         Ok(self
566             .fd
567             .get_regs()
568             .map_err(|e| cpu::HypervisorCpuError::GetStandardRegs(e.into()))?
569             .into())
570     }
571 
572     #[cfg(target_arch = "x86_64")]
573     ///
574     /// Sets the vCPU general purpose registers.
575     ///
576     fn set_regs(&self, regs: &crate::StandardRegisters) -> cpu::Result<()> {
577         let regs = (*regs).into();
578         self.fd
579             .set_regs(&regs)
580             .map_err(|e| cpu::HypervisorCpuError::SetStandardRegs(e.into()))
581     }
582 
583     #[cfg(target_arch = "x86_64")]
584     ///
585     /// Returns the vCPU special registers.
586     ///
587     fn get_sregs(&self) -> cpu::Result<crate::arch::x86::SpecialRegisters> {
588         Ok(self
589             .fd
590             .get_sregs()
591             .map_err(|e| cpu::HypervisorCpuError::GetSpecialRegs(e.into()))?
592             .into())
593     }
594 
595     #[cfg(target_arch = "x86_64")]
596     ///
597     /// Sets the vCPU special registers.
598     ///
599     fn set_sregs(&self, sregs: &crate::arch::x86::SpecialRegisters) -> cpu::Result<()> {
600         let sregs = (*sregs).into();
601         self.fd
602             .set_sregs(&sregs)
603             .map_err(|e| cpu::HypervisorCpuError::SetSpecialRegs(e.into()))
604     }
605 
606     #[cfg(target_arch = "x86_64")]
607     ///
608     /// Returns the floating point state (FPU) from the vCPU.
609     ///
610     fn get_fpu(&self) -> cpu::Result<FpuState> {
611         Ok(self
612             .fd
613             .get_fpu()
614             .map_err(|e| cpu::HypervisorCpuError::GetFloatingPointRegs(e.into()))?
615             .into())
616     }
617 
618     #[cfg(target_arch = "x86_64")]
619     ///
620     /// Set the floating point state (FPU) of a vCPU.
621     ///
622     fn set_fpu(&self, fpu: &FpuState) -> cpu::Result<()> {
623         let fpu: mshv_bindings::FloatingPointUnit = (*fpu).clone().into();
624         self.fd
625             .set_fpu(&fpu)
626             .map_err(|e| cpu::HypervisorCpuError::SetFloatingPointRegs(e.into()))
627     }
628 
629     #[cfg(target_arch = "x86_64")]
630     ///
631     /// Returns the model-specific registers (MSR) for this vCPU.
632     ///
633     fn get_msrs(&self, msrs: &mut Vec<MsrEntry>) -> cpu::Result<usize> {
634         let mshv_msrs: Vec<msr_entry> = msrs.iter().map(|e| (*e).into()).collect();
635         let mut mshv_msrs = MsrEntries::from_entries(&mshv_msrs).unwrap();
636         let succ = self
637             .fd
638             .get_msrs(&mut mshv_msrs)
639             .map_err(|e| cpu::HypervisorCpuError::GetMsrEntries(e.into()))?;
640 
641         msrs[..succ].copy_from_slice(
642             &mshv_msrs.as_slice()[..succ]
643                 .iter()
644                 .map(|e| (*e).into())
645                 .collect::<Vec<MsrEntry>>(),
646         );
647 
648         Ok(succ)
649     }
650 
651     #[cfg(target_arch = "x86_64")]
652     ///
653     /// Setup the model-specific registers (MSR) for this vCPU.
654     /// Returns the number of MSR entries actually written.
655     ///
656     fn set_msrs(&self, msrs: &[MsrEntry]) -> cpu::Result<usize> {
657         let mshv_msrs: Vec<msr_entry> = msrs.iter().map(|e| (*e).into()).collect();
658         let mshv_msrs = MsrEntries::from_entries(&mshv_msrs).unwrap();
659         self.fd
660             .set_msrs(&mshv_msrs)
661             .map_err(|e| cpu::HypervisorCpuError::SetMsrEntries(e.into()))
662     }
663 
664     #[cfg(target_arch = "x86_64")]
665     ///
666     /// X86 specific call to enable HyperV SynIC
667     ///
668     fn enable_hyperv_synic(&self) -> cpu::Result<()> {
669         /* We always have SynIC enabled on MSHV */
670         Ok(())
671     }
672 
673     #[allow(non_upper_case_globals)]
674     fn run(&self) -> std::result::Result<cpu::VmExit, cpu::HypervisorCpuError> {
675         match self.fd.run() {
676             Ok(x) => match x.header.message_type {
677                 hv_message_type_HVMSG_X64_HALT => {
678                     debug!("HALT");
679                     Ok(cpu::VmExit::Reset)
680                 }
681                 hv_message_type_HVMSG_UNRECOVERABLE_EXCEPTION => {
682                     warn!("TRIPLE FAULT");
683                     Ok(cpu::VmExit::Shutdown)
684                 }
685                 #[cfg(target_arch = "x86_64")]
686                 hv_message_type_HVMSG_X64_IO_PORT_INTERCEPT => {
687                     let info = x.to_ioport_info().unwrap();
688                     let access_info = info.access_info;
689                     // SAFETY: access_info is valid, otherwise we won't be here
690                     let len = unsafe { access_info.__bindgen_anon_1.access_size() } as usize;
691                     let is_write = info.header.intercept_access_type == 1;
692                     let port = info.port_number;
693                     let mut data: [u8; 4] = [0; 4];
694                     let mut ret_rax = info.rax;
695 
696                     /*
697                      * XXX: Ignore QEMU fw_cfg (0x5xx) and debug console (0x402) ports.
698                      *
699                      * Cloud Hypervisor doesn't support fw_cfg at the moment. It does support 0x402
700                      * under the "fwdebug" feature flag. But that feature is not enabled by default
701                      * and is considered legacy.
702                      *
703                      * OVMF unconditionally pokes these IO ports with string IO.
704                      *
705                      * Instead of trying to implement string IO support now which does not do much
706                      * now, skip those ports explicitly to avoid panicking.
707                      *
708                      * Proper string IO support can be added once we gain the ability to translate
709                      * guest virtual addresses to guest physical addresses on MSHV.
710                      */
711                     match port {
712                         0x402 | 0x510 | 0x511 | 0x514 => {
713                             let insn_len = info.header.instruction_length() as u64;
714 
715                             /* Advance RIP and update RAX */
716                             let arr_reg_name_value = [
717                                 (
718                                     hv_register_name_HV_X64_REGISTER_RIP,
719                                     info.header.rip + insn_len,
720                                 ),
721                                 (hv_register_name_HV_X64_REGISTER_RAX, ret_rax),
722                             ];
723                             set_registers_64!(self.fd, arr_reg_name_value)
724                                 .map_err(|e| cpu::HypervisorCpuError::SetRegister(e.into()))?;
725                             return Ok(cpu::VmExit::Ignore);
726                         }
727                         _ => {}
728                     }
729 
730                     assert!(
731                         // SAFETY: access_info is valid, otherwise we won't be here
732                         (unsafe { access_info.__bindgen_anon_1.string_op() } != 1),
733                         "String IN/OUT not supported"
734                     );
735                     assert!(
736                         // SAFETY: access_info is valid, otherwise we won't be here
737                         (unsafe { access_info.__bindgen_anon_1.rep_prefix() } != 1),
738                         "Rep IN/OUT not supported"
739                     );
740 
741                     if is_write {
742                         let data = (info.rax as u32).to_le_bytes();
743                         if let Some(vm_ops) = &self.vm_ops {
744                             vm_ops
745                                 .pio_write(port.into(), &data[0..len])
746                                 .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()))?;
747                         }
748                     } else {
749                         if let Some(vm_ops) = &self.vm_ops {
750                             vm_ops
751                                 .pio_read(port.into(), &mut data[0..len])
752                                 .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()))?;
753                         }
754 
755                         let v = u32::from_le_bytes(data);
756                         /* Preserve high bits in EAX but clear out high bits in RAX */
757                         let mask = 0xffffffff >> (32 - len * 8);
758                         let eax = (info.rax as u32 & !mask) | (v & mask);
759                         ret_rax = eax as u64;
760                     }
761 
762                     let insn_len = info.header.instruction_length() as u64;
763 
764                     /* Advance RIP and update RAX */
765                     let arr_reg_name_value = [
766                         (
767                             hv_register_name_HV_X64_REGISTER_RIP,
768                             info.header.rip + insn_len,
769                         ),
770                         (hv_register_name_HV_X64_REGISTER_RAX, ret_rax),
771                     ];
772                     set_registers_64!(self.fd, arr_reg_name_value)
773                         .map_err(|e| cpu::HypervisorCpuError::SetRegister(e.into()))?;
774                     Ok(cpu::VmExit::Ignore)
775                 }
776                 #[cfg(target_arch = "x86_64")]
777                 msg_type @ (hv_message_type_HVMSG_UNMAPPED_GPA
778                 | hv_message_type_HVMSG_GPA_INTERCEPT) => {
779                     let info = x.to_memory_info().unwrap();
780                     let insn_len = info.instruction_byte_count as usize;
781                     let gva = info.guest_virtual_address;
782                     let gpa = info.guest_physical_address;
783 
784                     debug!("Exit ({:?}) GVA {:x} GPA {:x}", msg_type, gva, gpa);
785 
786                     let mut context = MshvEmulatorContext {
787                         vcpu: self,
788                         map: (gva, gpa),
789                     };
790 
791                     // Create a new emulator.
792                     let mut emul = Emulator::new(&mut context);
793 
794                     // Emulate the trapped instruction, and only the first one.
795                     let new_state = emul
796                         .emulate_first_insn(
797                             self.vp_index as usize,
798                             &info.instruction_bytes[..insn_len],
799                         )
800                         .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()))?;
801 
802                     // Set CPU state back.
803                     context
804                         .set_cpu_state(self.vp_index as usize, new_state)
805                         .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()))?;
806 
807                     Ok(cpu::VmExit::Ignore)
808                 }
809                 #[cfg(feature = "sev_snp")]
810                 hv_message_type_HVMSG_GPA_ATTRIBUTE_INTERCEPT => {
811                     let info = x.to_gpa_attribute_info().unwrap();
812                     let host_vis = info.__bindgen_anon_1.host_visibility();
813                     if host_vis >= HV_MAP_GPA_READABLE | HV_MAP_GPA_WRITABLE {
814                         warn!("Ignored attribute intercept with full host visibility");
815                         return Ok(cpu::VmExit::Ignore);
816                     }
817 
818                     let num_ranges = info.__bindgen_anon_1.range_count();
819                     assert!(num_ranges >= 1);
820                     if num_ranges > 1 {
821                         return Err(cpu::HypervisorCpuError::RunVcpu(anyhow!(
822                             "Unhandled VCPU exit(GPA_ATTRIBUTE_INTERCEPT): Expected num_ranges to be 1 but found num_ranges {:?}",
823                             num_ranges
824                         )));
825                     }
826 
827                     // TODO: we could also deny the request with HvCallCompleteIntercept
828                     let mut gpas = Vec::new();
829                     let ranges = info.ranges;
830                     let (gfn_start, gfn_count) = snp::parse_gpa_range(ranges[0]).unwrap();
831                     debug!(
832                         "Releasing pages: gfn_start: {:x?}, gfn_count: {:?}",
833                         gfn_start, gfn_count
834                     );
835                     let gpa_start = gfn_start * HV_PAGE_SIZE as u64;
836                     for i in 0..gfn_count {
837                         gpas.push(gpa_start + i * HV_PAGE_SIZE as u64);
838                     }
839 
840                     let mut gpa_list =
841                         vec_with_array_field::<mshv_modify_gpa_host_access, u64>(gpas.len());
842                     gpa_list[0].page_count = gpas.len() as u64;
843                     gpa_list[0].flags = 0;
844                     if host_vis & HV_MAP_GPA_READABLE != 0 {
845                         gpa_list[0].flags |= 1 << MSHV_GPA_HOST_ACCESS_BIT_READABLE;
846                     }
847                     if host_vis & HV_MAP_GPA_WRITABLE != 0 {
848                         gpa_list[0].flags |= 1 << MSHV_GPA_HOST_ACCESS_BIT_WRITABLE;
849                     }
850 
851                     // SAFETY: gpa_list initialized with gpas.len() and now it is being turned into
852                     // gpas_slice with gpas.len() again. It is guaranteed to be large enough to hold
853                     // everything from gpas.
854                     unsafe {
855                         let gpas_slice: &mut [u64] =
856                             gpa_list[0].guest_pfns.as_mut_slice(gpas.len());
857                         gpas_slice.copy_from_slice(gpas.as_slice());
858                     }
859 
860                     self.vm_fd
861                         .modify_gpa_host_access(&gpa_list[0])
862                         .map_err(|e| cpu::HypervisorCpuError::RunVcpu(anyhow!(
863                             "Unhandled VCPU exit: attribute intercept - couldn't modify host access {}", e
864                         )))?;
865                     // Guest is revoking the shared access, so we need to update the bitmap
866                     self.host_access_pages.rcu(|_bitmap| {
867                         let bm = self.host_access_pages.load().as_ref().clone();
868                         bm.reset_addr_range(gpa_start as usize, gfn_count as usize);
869                         bm
870                     });
871                     Ok(cpu::VmExit::Ignore)
872                 }
873                 #[cfg(target_arch = "x86_64")]
874                 hv_message_type_HVMSG_UNACCEPTED_GPA => {
875                     let info = x.to_memory_info().unwrap();
876                     let gva = info.guest_virtual_address;
877                     let gpa = info.guest_physical_address;
878 
879                     Err(cpu::HypervisorCpuError::RunVcpu(anyhow!(
880                         "Unhandled VCPU exit: Unaccepted GPA({:x}) found at GVA({:x})",
881                         gpa,
882                         gva,
883                     )))
884                 }
885                 #[cfg(target_arch = "x86_64")]
886                 hv_message_type_HVMSG_X64_CPUID_INTERCEPT => {
887                     let info = x.to_cpuid_info().unwrap();
888                     debug!("cpuid eax: {:x}", { info.rax });
889                     Ok(cpu::VmExit::Ignore)
890                 }
891                 #[cfg(target_arch = "x86_64")]
892                 hv_message_type_HVMSG_X64_MSR_INTERCEPT => {
893                     let info = x.to_msr_info().unwrap();
894                     if info.header.intercept_access_type == 0 {
895                         debug!("msr read: {:x}", { info.msr_number });
896                     } else {
897                         debug!("msr write: {:x}", { info.msr_number });
898                     }
899                     Ok(cpu::VmExit::Ignore)
900                 }
901                 #[cfg(target_arch = "x86_64")]
902                 hv_message_type_HVMSG_X64_EXCEPTION_INTERCEPT => {
903                     //TODO: Handler for VMCALL here.
904                     let info = x.to_exception_info().unwrap();
905                     debug!("Exception Info {:?}", { info.exception_vector });
906                     Ok(cpu::VmExit::Ignore)
907                 }
908                 #[cfg(target_arch = "x86_64")]
909                 hv_message_type_HVMSG_X64_APIC_EOI => {
910                     let info = x.to_apic_eoi_info().unwrap();
911                     // The kernel should dispatch the EOI to the correct thread.
912                     // Check the VP index is the same as the one we have.
913                     assert!(info.vp_index == self.vp_index as u32);
914                     // The interrupt vector in info is u32, but x86 only supports 256 vectors.
915                     // There is no good way to recover from this if the hypervisor messes around.
916                     // Just unwrap.
917                     Ok(cpu::VmExit::IoapicEoi(
918                         info.interrupt_vector.try_into().unwrap(),
919                     ))
920                 }
921                 #[cfg(feature = "sev_snp")]
922                 hv_message_type_HVMSG_X64_SEV_VMGEXIT_INTERCEPT => {
923                     let info = x.to_vmg_intercept_info().unwrap();
924                     let ghcb_data = info.ghcb_msr >> GHCB_INFO_BIT_WIDTH;
925                     let ghcb_msr = svm_ghcb_msr {
926                         as_uint64: info.ghcb_msr,
927                     };
928                     // Safe to use unwrap, for sev_snp guest we already have the
929                     // GHCB pointer wrapped in the option, otherwise this place is not reached.
930                     let ghcb = self.ghcb.as_ref().unwrap().0;
931 
932                     // SAFETY: Accessing a union element from bindgen generated bindings.
933                     let ghcb_op = unsafe { ghcb_msr.__bindgen_anon_2.ghcb_info() as u32 };
934                     // Sanity check on the header fields before handling other operations.
935                     assert!(info.header.intercept_access_type == HV_INTERCEPT_ACCESS_EXECUTE as u8);
936 
937                     match ghcb_op {
938                         GHCB_INFO_HYP_FEATURE_REQUEST => {
939                             // Pre-condition: GHCB data must be zero
940                             assert!(ghcb_data == 0);
941                             let mut ghcb_response = GHCB_INFO_HYP_FEATURE_RESPONSE as u64;
942                             // Indicate support for basic SEV-SNP features
943                             ghcb_response |=
944                                 (GHCB_HYP_FEATURE_SEV_SNP << GHCB_INFO_BIT_WIDTH) as u64;
945                             // Indicate support for SEV-SNP AP creation
946                             ghcb_response |= (GHCB_HYP_FEATURE_SEV_SNP_AP_CREATION
947                                 << GHCB_INFO_BIT_WIDTH)
948                                 as u64;
949                             debug!(
950                                 "GHCB_INFO_HYP_FEATURE_REQUEST: Supported features: {:0x}",
951                                 ghcb_response
952                             );
953                             let arr_reg_name_value =
954                                 [(hv_register_name_HV_X64_REGISTER_GHCB, ghcb_response)];
955                             set_registers_64!(self.fd, arr_reg_name_value)
956                                 .map_err(|e| cpu::HypervisorCpuError::SetRegister(e.into()))?;
957                         }
958                         GHCB_INFO_REGISTER_REQUEST => {
959                             let mut ghcb_gpa = hv_x64_register_sev_ghcb::default();
960 
961                             // Disable the previously used GHCB page.
962                             self.disable_prev_ghcb_page()?;
963 
964                             // SAFETY: Accessing a union element from bindgen generated bindings.
965                             unsafe {
966                                 ghcb_gpa.__bindgen_anon_1.set_enabled(1);
967                                 ghcb_gpa
968                                     .__bindgen_anon_1
969                                     .set_page_number(ghcb_msr.__bindgen_anon_2.gpa_page_number());
970                             }
971                             // SAFETY: Accessing a union element from bindgen generated bindings.
972                             let reg_name_value = unsafe {
973                                 [(
974                                     hv_register_name_HV_X64_REGISTER_SEV_GHCB_GPA,
975                                     ghcb_gpa.as_uint64,
976                                 )]
977                             };
978 
979                             set_registers_64!(self.fd, reg_name_value)
980                                 .map_err(|e| cpu::HypervisorCpuError::SetRegister(e.into()))?;
981 
982                             let mut resp_ghcb_msr = svm_ghcb_msr::default();
983                             // SAFETY: Accessing a union element from bindgen generated bindings.
984                             unsafe {
985                                 resp_ghcb_msr
986                                     .__bindgen_anon_2
987                                     .set_ghcb_info(GHCB_INFO_REGISTER_RESPONSE as u64);
988                                 resp_ghcb_msr.__bindgen_anon_2.set_gpa_page_number(
989                                     ghcb_msr.__bindgen_anon_2.gpa_page_number(),
990                                 );
991                                 debug!("GHCB GPA is {:x}", ghcb_gpa.as_uint64);
992                             }
993                             // SAFETY: Accessing a union element from bindgen generated bindings.
994                             let reg_name_value = unsafe {
995                                 [(
996                                     hv_register_name_HV_X64_REGISTER_GHCB,
997                                     resp_ghcb_msr.as_uint64,
998                                 )]
999                             };
1000 
1001                             set_registers_64!(self.fd, reg_name_value)
1002                                 .map_err(|e| cpu::HypervisorCpuError::SetRegister(e.into()))?;
1003                         }
1004                         GHCB_INFO_SEV_INFO_REQUEST => {
1005                             let sev_cpuid_function = 0x8000_001F;
1006                             let cpu_leaf = self
1007                                 .fd
1008                                 .get_cpuid_values(sev_cpuid_function, 0, 0, 0)
1009                                 .unwrap();
1010                             let ebx = cpu_leaf[1];
1011                             // First 6-byte of EBX represents page table encryption bit number
1012                             let pbit_encryption = (ebx & 0x3f) as u8;
1013                             let mut ghcb_response = GHCB_INFO_SEV_INFO_RESPONSE as u64;
1014 
1015                             // GHCBData[63:48] specifies the maximum GHCB protocol version supported
1016                             ghcb_response |= (GHCB_PROTOCOL_VERSION_MAX as u64) << 48;
1017                             // GHCBData[47:32] specifies the minimum GHCB protocol version supported
1018                             ghcb_response |= (GHCB_PROTOCOL_VERSION_MIN as u64) << 32;
1019                             // GHCBData[31:24] specifies the SEV page table encryption bit number.
1020                             ghcb_response |= (pbit_encryption as u64) << 24;
1021 
1022                             let arr_reg_name_value =
1023                                 [(hv_register_name_HV_X64_REGISTER_GHCB, ghcb_response)];
1024                             set_registers_64!(self.fd, arr_reg_name_value)
1025                                 .map_err(|e| cpu::HypervisorCpuError::SetRegister(e.into()))?;
1026                         }
1027                         GHCB_INFO_NORMAL => {
1028                             let exit_code =
1029                                 info.__bindgen_anon_2.__bindgen_anon_1.sw_exit_code as u32;
1030 
1031                             match exit_code {
1032                                 SVM_EXITCODE_HV_DOORBELL_PAGE => {
1033                                     let exit_info1 =
1034                                         info.__bindgen_anon_2.__bindgen_anon_1.sw_exit_info1 as u32;
1035                                     match exit_info1 {
1036                                         SVM_NAE_HV_DOORBELL_PAGE_GET_PREFERRED => {
1037                                             // Hypervisor does not have any preference for doorbell GPA.
1038                                             let preferred_doorbell_gpa: u64 = 0xFFFFFFFFFFFFFFFF;
1039                                             set_svm_field_u64_ptr!(
1040                                                 ghcb,
1041                                                 exit_info2,
1042                                                 preferred_doorbell_gpa
1043                                             );
1044                                         }
1045                                         SVM_NAE_HV_DOORBELL_PAGE_SET => {
1046                                             let exit_info2 = info
1047                                                 .__bindgen_anon_2
1048                                                 .__bindgen_anon_1
1049                                                 .sw_exit_info2;
1050                                             let mut ghcb_doorbell_gpa =
1051                                                 hv_x64_register_sev_hv_doorbell::default();
1052                                             // SAFETY: Accessing a union element from bindgen generated bindings.
1053                                             unsafe {
1054                                                 ghcb_doorbell_gpa.__bindgen_anon_1.set_enabled(1);
1055                                                 ghcb_doorbell_gpa
1056                                                     .__bindgen_anon_1
1057                                                     .set_page_number(exit_info2 >> PAGE_SHIFT);
1058                                             }
1059                                             // SAFETY: Accessing a union element from bindgen generated bindings.
1060                                             let reg_names = unsafe {
1061                                                 [(
1062                                                     hv_register_name_HV_X64_REGISTER_SEV_DOORBELL_GPA,
1063                                                     ghcb_doorbell_gpa.as_uint64,
1064                                                 )]
1065                                             };
1066                                             set_registers_64!(self.fd, reg_names).map_err(|e| {
1067                                                 cpu::HypervisorCpuError::SetRegister(e.into())
1068                                             })?;
1069 
1070                                             set_svm_field_u64_ptr!(ghcb, exit_info2, exit_info2);
1071 
1072                                             // Clear the SW_EXIT_INFO1 register to indicate no error
1073                                             self.clear_swexit_info1()?;
1074                                         }
1075                                         SVM_NAE_HV_DOORBELL_PAGE_QUERY => {
1076                                             let mut reg_assocs = [ hv_register_assoc {
1077                                                 name: hv_register_name_HV_X64_REGISTER_SEV_DOORBELL_GPA,
1078                                                 ..Default::default()
1079                                             } ];
1080                                             self.fd.get_reg(&mut reg_assocs).unwrap();
1081                                             // SAFETY: Accessing a union element from bindgen generated bindings.
1082                                             let doorbell_gpa = unsafe { reg_assocs[0].value.reg64 };
1083 
1084                                             set_svm_field_u64_ptr!(ghcb, exit_info2, doorbell_gpa);
1085 
1086                                             // Clear the SW_EXIT_INFO1 register to indicate no error
1087                                             self.clear_swexit_info1()?;
1088                                         }
1089                                         SVM_NAE_HV_DOORBELL_PAGE_CLEAR => {
1090                                             set_svm_field_u64_ptr!(ghcb, exit_info2, 0);
1091                                         }
1092                                         _ => {
1093                                             panic!(
1094                                                 "SVM_EXITCODE_HV_DOORBELL_PAGE: Unhandled exit code: {:0x}",
1095                                                 exit_info1
1096                                             );
1097                                         }
1098                                     }
1099                                 }
1100                                 SVM_EXITCODE_IOIO_PROT => {
1101                                     let exit_info1 =
1102                                         info.__bindgen_anon_2.__bindgen_anon_1.sw_exit_info1 as u32;
1103                                     let port_info = hv_sev_vmgexit_port_info {
1104                                         as_uint32: exit_info1,
1105                                     };
1106 
1107                                     let port =
1108                                         // SAFETY: Accessing a union element from bindgen generated bindings.
1109                                         unsafe { port_info.__bindgen_anon_1.intercepted_port() };
1110                                     let mut len = 4;
1111                                     // SAFETY: Accessing a union element from bindgen generated bindings.
1112                                     unsafe {
1113                                         if port_info.__bindgen_anon_1.operand_size_16bit() == 1 {
1114                                             len = 2;
1115                                         } else if port_info.__bindgen_anon_1.operand_size_8bit()
1116                                             == 1
1117                                         {
1118                                             len = 1;
1119                                         }
1120                                     }
1121                                     let is_write =
1122                                         // SAFETY: Accessing a union element from bindgen generated bindings.
1123                                         unsafe { port_info.__bindgen_anon_1.access_type() == 0 };
1124                                     // SAFETY: Accessing the field from a mapped address
1125                                     let mut data = unsafe { (*ghcb).rax.to_le_bytes() };
1126 
1127                                     if is_write {
1128                                         if let Some(vm_ops) = &self.vm_ops {
1129                                             vm_ops.pio_write(port.into(), &data[..len]).map_err(
1130                                                 |e| cpu::HypervisorCpuError::RunVcpu(e.into()),
1131                                             )?;
1132                                         }
1133                                     } else {
1134                                         if let Some(vm_ops) = &self.vm_ops {
1135                                             vm_ops
1136                                                 .pio_read(port.into(), &mut data[..len])
1137                                                 .map_err(|e| {
1138                                                     cpu::HypervisorCpuError::RunVcpu(e.into())
1139                                                 })?;
1140                                         }
1141                                         set_svm_field_u64_ptr!(ghcb, rax, u64::from_le_bytes(data));
1142                                     }
1143 
1144                                     // Clear the SW_EXIT_INFO1 register to indicate no error
1145                                     self.clear_swexit_info1()?;
1146                                 }
1147                                 SVM_EXITCODE_MMIO_READ => {
1148                                     let src_gpa =
1149                                         info.__bindgen_anon_2.__bindgen_anon_1.sw_exit_info1;
1150                                     let data_len =
1151                                         info.__bindgen_anon_2.__bindgen_anon_1.sw_exit_info2
1152                                             as usize;
1153                                     // Sanity check to make sure data len is within supported range.
1154                                     assert!(data_len <= 0x8);
1155 
1156                                     let mut data: Vec<u8> = vec![0; data_len];
1157                                     if let Some(vm_ops) = &self.vm_ops {
1158                                         vm_ops.mmio_read(src_gpa, &mut data).map_err(|e| {
1159                                             cpu::HypervisorCpuError::RunVcpu(e.into())
1160                                         })?;
1161                                     }
1162                                     // Copy the data to the shared buffer of the GHCB page
1163                                     let mut buffer_data = [0; 8];
1164                                     buffer_data[..data_len].copy_from_slice(&data[..data_len]);
1165                                     // SAFETY: Updating the value of mapped area
1166                                     unsafe { (*ghcb).shared[0] = u64::from_le_bytes(buffer_data) };
1167 
1168                                     // Clear the SW_EXIT_INFO1 register to indicate no error
1169                                     self.clear_swexit_info1()?;
1170                                 }
1171                                 SVM_EXITCODE_MMIO_WRITE => {
1172                                     let dst_gpa =
1173                                         info.__bindgen_anon_2.__bindgen_anon_1.sw_exit_info1;
1174                                     let data_len =
1175                                         info.__bindgen_anon_2.__bindgen_anon_1.sw_exit_info2
1176                                             as usize;
1177                                     // Sanity check to make sure data len is within supported range.
1178                                     assert!(data_len <= 0x8);
1179 
1180                                     let mut data = vec![0; data_len];
1181                                     // SAFETY: Accessing data from a mapped address
1182                                     let bytes_shared_ghcb =
1183                                         unsafe { (*ghcb).shared[0].to_le_bytes() };
1184                                     data.copy_from_slice(&bytes_shared_ghcb[..data_len]);
1185 
1186                                     if let Some(vm_ops) = &self.vm_ops {
1187                                         vm_ops.mmio_write(dst_gpa, &data).map_err(|e| {
1188                                             cpu::HypervisorCpuError::RunVcpu(e.into())
1189                                         })?;
1190                                     }
1191 
1192                                     // Clear the SW_EXIT_INFO1 register to indicate no error
1193                                     self.clear_swexit_info1()?;
1194                                 }
1195                                 SVM_EXITCODE_SNP_GUEST_REQUEST
1196                                 | SVM_EXITCODE_SNP_EXTENDED_GUEST_REQUEST => {
1197                                     if exit_code == SVM_EXITCODE_SNP_EXTENDED_GUEST_REQUEST {
1198                                         info!("Fetching extended guest request is not supported");
1199                                         // We don't support extended guest request, so we just write empty data.
1200                                         // This matches the behavior of KVM in Linux 6.11.
1201 
1202                                         // Read RBX from the GHCB.
1203                                         // SAFETY: Accessing data from a mapped address
1204                                         let data_gpa = unsafe { (*ghcb).rax };
1205                                         // SAFETY: Accessing data from a mapped address
1206                                         let data_npages = unsafe { (*ghcb).rbx };
1207 
1208                                         if data_npages > 0 {
1209                                             // The certificates are terminated by 24 zero bytes.
1210                                             // TODO: Need to check if data_gpa is the address of the shared buffer in the GHCB page
1211                                             // in that case we should clear the shared buffer(24 bytes)
1212                                             self.gpa_write(data_gpa, &[0; 24])?;
1213                                         }
1214                                     }
1215 
1216                                     let req_gpa =
1217                                         info.__bindgen_anon_2.__bindgen_anon_1.sw_exit_info1;
1218                                     let rsp_gpa =
1219                                         info.__bindgen_anon_2.__bindgen_anon_1.sw_exit_info2;
1220 
1221                                     let mshv_psp_req =
1222                                         mshv_issue_psp_guest_request { req_gpa, rsp_gpa };
1223                                     self.vm_fd
1224                                         .psp_issue_guest_request(&mshv_psp_req)
1225                                         .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()))?;
1226 
1227                                     debug!(
1228                                         "SNP guest request: req_gpa {:0x} rsp_gpa {:0x}",
1229                                         req_gpa, rsp_gpa
1230                                     );
1231 
1232                                     set_svm_field_u64_ptr!(ghcb, exit_info2, 0);
1233                                 }
1234                                 SVM_EXITCODE_SNP_AP_CREATION => {
1235                                     let vmsa_gpa =
1236                                         info.__bindgen_anon_2.__bindgen_anon_1.sw_exit_info2;
1237                                     let apic_id =
1238                                         info.__bindgen_anon_2.__bindgen_anon_1.sw_exit_info1 >> 32;
1239                                     debug!(
1240                                         "SNP AP CREATE REQUEST with VMSA GPA {:0x}, and APIC ID {:?}",
1241                                         vmsa_gpa, apic_id
1242                                     );
1243 
1244                                     let mshv_ap_create_req = mshv_sev_snp_ap_create {
1245                                         vp_id: apic_id,
1246                                         vmsa_gpa,
1247                                     };
1248                                     self.vm_fd
1249                                         .sev_snp_ap_create(&mshv_ap_create_req)
1250                                         .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()))?;
1251 
1252                                     // Clear the SW_EXIT_INFO1 register to indicate no error
1253                                     self.clear_swexit_info1()?;
1254                                 }
1255                                 _ => panic!(
1256                                     "GHCB_INFO_NORMAL: Unhandled exit code: {:0x}",
1257                                     exit_code
1258                                 ),
1259                             }
1260                         }
1261                         _ => panic!("Unsupported VMGEXIT operation: {:0x}", ghcb_op),
1262                     }
1263 
1264                     Ok(cpu::VmExit::Ignore)
1265                 }
1266                 exit => Err(cpu::HypervisorCpuError::RunVcpu(anyhow!(
1267                     "Unhandled VCPU exit {:?}",
1268                     exit
1269                 ))),
1270             },
1271 
1272             Err(e) => match e.errno() {
1273                 libc::EAGAIN | libc::EINTR => Ok(cpu::VmExit::Ignore),
1274                 _ => Err(cpu::HypervisorCpuError::RunVcpu(anyhow!(
1275                     "VCPU error {:?}",
1276                     e
1277                 ))),
1278             },
1279         }
1280     }
1281 
1282     #[cfg(target_arch = "aarch64")]
1283     fn init_pmu(&self, irq: u32) -> cpu::Result<()> {
1284         unimplemented!()
1285     }
1286 
1287     #[cfg(target_arch = "aarch64")]
1288     fn has_pmu_support(&self) -> bool {
1289         unimplemented!()
1290     }
1291 
1292     #[cfg(target_arch = "aarch64")]
1293     fn setup_regs(&self, cpu_id: u8, boot_ip: u64, fdt_start: u64) -> cpu::Result<()> {
1294         unimplemented!()
1295     }
1296 
1297     #[cfg(target_arch = "aarch64")]
1298     fn get_sys_reg(&self, sys_reg: u32) -> cpu::Result<u64> {
1299         unimplemented!()
1300     }
1301 
1302     #[cfg(target_arch = "aarch64")]
1303     fn get_reg_list(&self, reg_list: &mut RegList) -> cpu::Result<()> {
1304         unimplemented!()
1305     }
1306 
1307     #[cfg(target_arch = "aarch64")]
1308     fn vcpu_init(&self, kvi: &VcpuInit) -> cpu::Result<()> {
1309         unimplemented!()
1310     }
1311 
1312     #[cfg(target_arch = "aarch64")]
1313     fn set_regs(&self, regs: &StandardRegisters) -> cpu::Result<()> {
1314         unimplemented!()
1315     }
1316 
1317     #[cfg(target_arch = "aarch64")]
1318     fn get_regs(&self) -> cpu::Result<StandardRegisters> {
1319         unimplemented!()
1320     }
1321 
1322     #[cfg(target_arch = "aarch64")]
1323     fn vcpu_finalize(&self, _feature: i32) -> cpu::Result<()> {
1324         unimplemented!()
1325     }
1326 
1327     #[cfg(target_arch = "aarch64")]
1328     fn vcpu_get_finalized_features(&self) -> i32 {
1329         unimplemented!()
1330     }
1331 
1332     #[cfg(target_arch = "aarch64")]
1333     fn vcpu_set_processor_features(
1334         &self,
1335         _vm: &Arc<dyn crate::Vm>,
1336         _kvi: &mut crate::VcpuInit,
1337         _id: u8,
1338     ) -> cpu::Result<()> {
1339         unimplemented!()
1340     }
1341 
1342     #[cfg(target_arch = "aarch64")]
1343     fn create_vcpu_init(&self) -> crate::VcpuInit {
1344         unimplemented!();
1345     }
1346 
1347     #[cfg(target_arch = "x86_64")]
1348     ///
1349     /// X86 specific call to setup the CPUID registers.
1350     ///
1351     fn set_cpuid2(&self, cpuid: &[CpuIdEntry]) -> cpu::Result<()> {
1352         let cpuid: Vec<mshv_bindings::hv_cpuid_entry> = cpuid.iter().map(|e| (*e).into()).collect();
1353         let mshv_cpuid = <CpuId>::from_entries(&cpuid)
1354             .map_err(|_| cpu::HypervisorCpuError::SetCpuid(anyhow!("failed to create CpuId")))?;
1355 
1356         self.fd
1357             .register_intercept_result_cpuid(&mshv_cpuid)
1358             .map_err(|e| cpu::HypervisorCpuError::SetCpuid(e.into()))
1359     }
1360 
1361     #[cfg(target_arch = "x86_64")]
1362     ///
1363     /// X86 specific call to retrieve the CPUID registers.
1364     ///
1365     fn get_cpuid2(&self, _num_entries: usize) -> cpu::Result<Vec<CpuIdEntry>> {
1366         Ok(self.cpuid.clone())
1367     }
1368 
1369     #[cfg(target_arch = "x86_64")]
1370     ///
1371     /// X86 specific call to retrieve cpuid leaf
1372     ///
1373     fn get_cpuid_values(
1374         &self,
1375         function: u32,
1376         index: u32,
1377         xfem: u64,
1378         xss: u64,
1379     ) -> cpu::Result<[u32; 4]> {
1380         self.fd
1381             .get_cpuid_values(function, index, xfem, xss)
1382             .map_err(|e| cpu::HypervisorCpuError::GetCpuidVales(e.into()))
1383     }
1384 
1385     #[cfg(target_arch = "x86_64")]
1386     ///
1387     /// Returns the state of the LAPIC (Local Advanced Programmable Interrupt Controller).
1388     ///
1389     fn get_lapic(&self) -> cpu::Result<crate::arch::x86::LapicState> {
1390         Ok(self
1391             .fd
1392             .get_lapic()
1393             .map_err(|e| cpu::HypervisorCpuError::GetlapicState(e.into()))?
1394             .into())
1395     }
1396 
1397     #[cfg(target_arch = "x86_64")]
1398     ///
1399     /// Sets the state of the LAPIC (Local Advanced Programmable Interrupt Controller).
1400     ///
1401     fn set_lapic(&self, lapic: &crate::arch::x86::LapicState) -> cpu::Result<()> {
1402         let lapic: mshv_bindings::LapicState = (*lapic).clone().into();
1403         self.fd
1404             .set_lapic(&lapic)
1405             .map_err(|e| cpu::HypervisorCpuError::SetLapicState(e.into()))
1406     }
1407 
1408     ///
1409     /// Returns the vcpu's current "multiprocessing state".
1410     ///
1411     fn get_mp_state(&self) -> cpu::Result<MpState> {
1412         Ok(MpState::Mshv)
1413     }
1414 
1415     ///
1416     /// Sets the vcpu's current "multiprocessing state".
1417     ///
1418     fn set_mp_state(&self, _mp_state: MpState) -> cpu::Result<()> {
1419         Ok(())
1420     }
1421 
1422     #[cfg(target_arch = "x86_64")]
1423     ///
1424     /// Set CPU state for x86_64 guest.
1425     ///
1426     fn set_state(&self, state: &CpuState) -> cpu::Result<()> {
1427         let mut state: VcpuMshvState = state.clone().into();
1428         self.set_msrs(&state.msrs)?;
1429         self.set_vcpu_events(&state.vcpu_events)?;
1430         self.set_regs(&state.regs.into())?;
1431         self.set_sregs(&state.sregs.into())?;
1432         self.set_fpu(&state.fpu)?;
1433         self.set_xcrs(&state.xcrs)?;
1434         // These registers are global and needed to be set only for first VCPU
1435         // as Microsoft Hypervisor allows setting this register for only one VCPU
1436         if self.vp_index == 0 {
1437             self.fd
1438                 .set_misc_regs(&state.misc)
1439                 .map_err(|e| cpu::HypervisorCpuError::SetMiscRegs(e.into()))?
1440         }
1441         self.fd
1442             .set_debug_regs(&state.dbg)
1443             .map_err(|e| cpu::HypervisorCpuError::SetDebugRegs(e.into()))?;
1444         self.fd
1445             .set_all_vp_state_components(&mut state.vp_states)
1446             .map_err(|e| cpu::HypervisorCpuError::SetAllVpStateComponents(e.into()))?;
1447         Ok(())
1448     }
1449 
1450     #[cfg(target_arch = "aarch64")]
1451     ///
1452     /// Set CPU state for aarch64 guest.
1453     ///
1454     fn set_state(&self, state: &CpuState) -> cpu::Result<()> {
1455         unimplemented!()
1456     }
1457 
1458     #[cfg(target_arch = "x86_64")]
1459     ///
1460     /// Get CPU State for x86_64 guest
1461     ///
1462     fn state(&self) -> cpu::Result<CpuState> {
1463         let regs = self.get_regs()?;
1464         let sregs = self.get_sregs()?;
1465         let xcrs = self.get_xcrs()?;
1466         let fpu = self.get_fpu()?;
1467         let vcpu_events = self.get_vcpu_events()?;
1468         let mut msrs = self.msrs.clone();
1469         self.get_msrs(&mut msrs)?;
1470         let misc = self
1471             .fd
1472             .get_misc_regs()
1473             .map_err(|e| cpu::HypervisorCpuError::GetMiscRegs(e.into()))?;
1474         let dbg = self
1475             .fd
1476             .get_debug_regs()
1477             .map_err(|e| cpu::HypervisorCpuError::GetDebugRegs(e.into()))?;
1478         let vp_states = self
1479             .fd
1480             .get_all_vp_state_components()
1481             .map_err(|e| cpu::HypervisorCpuError::GetAllVpStateComponents(e.into()))?;
1482 
1483         Ok(VcpuMshvState {
1484             msrs,
1485             vcpu_events,
1486             regs: regs.into(),
1487             sregs: sregs.into(),
1488             fpu,
1489             xcrs,
1490             dbg,
1491             misc,
1492             vp_states,
1493         }
1494         .into())
1495     }
1496 
1497     #[cfg(target_arch = "aarch64")]
1498     ///
1499     /// Get CPU state for aarch64 guest.
1500     ///
1501     fn state(&self) -> cpu::Result<CpuState> {
1502         unimplemented!()
1503     }
1504 
1505     #[cfg(target_arch = "x86_64")]
1506     ///
1507     /// Translate guest virtual address to guest physical address
1508     ///
1509     fn translate_gva(&self, gva: u64, flags: u64) -> cpu::Result<(u64, u32)> {
1510         let r = self
1511             .fd
1512             .translate_gva(gva, flags)
1513             .map_err(|e| cpu::HypervisorCpuError::TranslateVirtualAddress(e.into()))?;
1514 
1515         let gpa = r.0;
1516         // SAFETY: r is valid, otherwise this function will have returned
1517         let result_code = unsafe { r.1.__bindgen_anon_1.result_code };
1518 
1519         Ok((gpa, result_code))
1520     }
1521 
1522     #[cfg(target_arch = "x86_64")]
1523     ///
1524     /// Return the list of initial MSR entries for a VCPU
1525     ///
1526     fn boot_msr_entries(&self) -> Vec<MsrEntry> {
1527         use crate::arch::x86::{msr_index, MTRR_ENABLE, MTRR_MEM_TYPE_WB};
1528 
1529         [
1530             msr!(msr_index::MSR_IA32_SYSENTER_CS),
1531             msr!(msr_index::MSR_IA32_SYSENTER_ESP),
1532             msr!(msr_index::MSR_IA32_SYSENTER_EIP),
1533             msr!(msr_index::MSR_STAR),
1534             msr!(msr_index::MSR_CSTAR),
1535             msr!(msr_index::MSR_LSTAR),
1536             msr!(msr_index::MSR_KERNEL_GS_BASE),
1537             msr!(msr_index::MSR_SYSCALL_MASK),
1538             msr_data!(msr_index::MSR_MTRRdefType, MTRR_ENABLE | MTRR_MEM_TYPE_WB),
1539         ]
1540         .to_vec()
1541     }
1542 
1543     ///
1544     /// Sets the AMD specific vcpu's sev control register.
1545     ///
1546     #[cfg(feature = "sev_snp")]
1547     fn set_sev_control_register(&self, vmsa_pfn: u64) -> cpu::Result<()> {
1548         let sev_control_reg = snp::get_sev_control_register(vmsa_pfn);
1549 
1550         self.fd
1551             .set_sev_control_register(sev_control_reg)
1552             .map_err(|e| cpu::HypervisorCpuError::SetSevControlRegister(e.into()))
1553     }
1554     #[cfg(target_arch = "x86_64")]
1555     ///
1556     /// Trigger NMI interrupt
1557     ///
1558     fn nmi(&self) -> cpu::Result<()> {
1559         let cfg = InterruptRequest {
1560             interrupt_type: hv_interrupt_type_HV_X64_INTERRUPT_TYPE_NMI,
1561             apic_id: self.vp_index as u64,
1562             level_triggered: false,
1563             vector: 0,
1564             logical_destination_mode: false,
1565             long_mode: false,
1566         };
1567         self.vm_fd
1568             .request_virtual_interrupt(&cfg)
1569             .map_err(|e| cpu::HypervisorCpuError::Nmi(e.into()))
1570     }
1571 }
1572 
1573 impl MshvVcpu {
1574     ///
1575     /// Deactivate previously used GHCB page.
1576     ///
1577     #[cfg(feature = "sev_snp")]
1578     fn disable_prev_ghcb_page(&self) -> cpu::Result<()> {
1579         let mut reg_assocs = [hv_register_assoc {
1580             name: hv_register_name_HV_X64_REGISTER_SEV_GHCB_GPA,
1581             ..Default::default()
1582         }];
1583         self.fd.get_reg(&mut reg_assocs).unwrap();
1584         // SAFETY: Accessing a union element from bindgen generated bindings.
1585         let prev_ghcb_gpa = unsafe { reg_assocs[0].value.reg64 };
1586 
1587         debug!("Prev GHCB GPA is {:x}", prev_ghcb_gpa);
1588 
1589         let mut ghcb_gpa = hv_x64_register_sev_ghcb::default();
1590 
1591         // SAFETY: Accessing a union element from bindgen generated bindings.
1592         unsafe {
1593             ghcb_gpa.__bindgen_anon_1.set_enabled(0);
1594             ghcb_gpa.__bindgen_anon_1.set_page_number(prev_ghcb_gpa);
1595         }
1596 
1597         // SAFETY: Accessing a union element from bindgen generated bindings.
1598         let reg_name_value = unsafe {
1599             [(
1600                 hv_register_name_HV_X64_REGISTER_SEV_GHCB_GPA,
1601                 ghcb_gpa.as_uint64,
1602             )]
1603         };
1604 
1605         set_registers_64!(self.fd, reg_name_value)
1606             .map_err(|e| cpu::HypervisorCpuError::SetRegister(e.into()))?;
1607 
1608         Ok(())
1609     }
1610     #[cfg(target_arch = "x86_64")]
1611     ///
1612     /// X86 specific call that returns the vcpu's current "xcrs".
1613     ///
1614     fn get_xcrs(&self) -> cpu::Result<ExtendedControlRegisters> {
1615         self.fd
1616             .get_xcrs()
1617             .map_err(|e| cpu::HypervisorCpuError::GetXcsr(e.into()))
1618     }
1619 
1620     #[cfg(target_arch = "x86_64")]
1621     ///
1622     /// X86 specific call that sets the vcpu's current "xcrs".
1623     ///
1624     fn set_xcrs(&self, xcrs: &ExtendedControlRegisters) -> cpu::Result<()> {
1625         self.fd
1626             .set_xcrs(xcrs)
1627             .map_err(|e| cpu::HypervisorCpuError::SetXcsr(e.into()))
1628     }
1629 
1630     #[cfg(target_arch = "x86_64")]
1631     ///
1632     /// Returns currently pending exceptions, interrupts, and NMIs as well as related
1633     /// states of the vcpu.
1634     ///
1635     fn get_vcpu_events(&self) -> cpu::Result<VcpuEvents> {
1636         self.fd
1637             .get_vcpu_events()
1638             .map_err(|e| cpu::HypervisorCpuError::GetVcpuEvents(e.into()))
1639     }
1640 
1641     #[cfg(target_arch = "x86_64")]
1642     ///
1643     /// Sets pending exceptions, interrupts, and NMIs as well as related states
1644     /// of the vcpu.
1645     ///
1646     fn set_vcpu_events(&self, events: &VcpuEvents) -> cpu::Result<()> {
1647         self.fd
1648             .set_vcpu_events(events)
1649             .map_err(|e| cpu::HypervisorCpuError::SetVcpuEvents(e.into()))
1650     }
1651 
1652     ///
1653     /// Clear SW_EXIT_INFO1 register for SEV-SNP guests.
1654     ///
1655     #[cfg(feature = "sev_snp")]
1656     fn clear_swexit_info1(&self) -> std::result::Result<cpu::VmExit, cpu::HypervisorCpuError> {
1657         // Clear the SW_EXIT_INFO1 register to indicate no error
1658         // Safe to use unwrap, for sev_snp guest we already have the
1659         // GHCB pointer wrapped in the option, otherwise this place is not reached.
1660         let ghcb = self.ghcb.as_ref().unwrap().0;
1661         set_svm_field_u64_ptr!(ghcb, exit_info1, 0);
1662 
1663         Ok(cpu::VmExit::Ignore)
1664     }
1665 
1666     #[cfg(feature = "sev_snp")]
1667     fn gpa_write(&self, gpa: u64, data: &[u8]) -> cpu::Result<()> {
1668         for (gpa, chunk) in (gpa..)
1669             .step_by(HV_READ_WRITE_GPA_MAX_SIZE as usize)
1670             .zip(data.chunks(HV_READ_WRITE_GPA_MAX_SIZE as usize))
1671         {
1672             let mut data = [0; HV_READ_WRITE_GPA_MAX_SIZE as usize];
1673             data[..chunk.len()].copy_from_slice(chunk);
1674 
1675             let mut rw_gpa_arg = mshv_bindings::mshv_read_write_gpa {
1676                 base_gpa: gpa,
1677                 byte_count: chunk.len() as u32,
1678                 data,
1679                 ..Default::default()
1680             };
1681             self.fd
1682                 .gpa_write(&mut rw_gpa_arg)
1683                 .map_err(|e| cpu::HypervisorCpuError::GpaWrite(e.into()))?;
1684         }
1685 
1686         Ok(())
1687     }
1688 }
1689 
1690 /// Wrapper over Mshv VM ioctls.
1691 pub struct MshvVm {
1692     fd: Arc<VmFd>,
1693     #[cfg(target_arch = "x86_64")]
1694     msrs: Vec<MsrEntry>,
1695     dirty_log_slots: Arc<RwLock<HashMap<u64, MshvDirtyLogSlot>>>,
1696     #[cfg(feature = "sev_snp")]
1697     sev_snp_enabled: bool,
1698     #[cfg(feature = "sev_snp")]
1699     host_access_pages: ArcSwap<AtomicBitmap>,
1700 }
1701 
1702 impl MshvVm {
1703     ///
1704     /// Creates an in-kernel device.
1705     ///
1706     /// See the documentation for `MSHV_CREATE_DEVICE`.
1707     fn create_device(&self, device: &mut CreateDevice) -> vm::Result<VfioDeviceFd> {
1708         let device_fd = self
1709             .fd
1710             .create_device(device)
1711             .map_err(|e| vm::HypervisorVmError::CreateDevice(e.into()))?;
1712         Ok(VfioDeviceFd::new_from_mshv(device_fd))
1713     }
1714 }
1715 
1716 ///
1717 /// Implementation of Vm trait for Mshv
1718 ///
1719 /// # Examples
1720 ///
1721 /// ```
1722 /// extern crate hypervisor;
1723 /// use hypervisor::mshv::MshvHypervisor;
1724 /// use std::sync::Arc;
1725 /// let mshv = MshvHypervisor::new().unwrap();
1726 /// let hypervisor = Arc::new(mshv);
1727 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed");
1728 /// ```
1729 impl vm::Vm for MshvVm {
1730     #[cfg(target_arch = "x86_64")]
1731     ///
1732     /// Sets the address of the one-page region in the VM's address space.
1733     ///
1734     fn set_identity_map_address(&self, _address: u64) -> vm::Result<()> {
1735         Ok(())
1736     }
1737 
1738     #[cfg(target_arch = "x86_64")]
1739     ///
1740     /// Sets the address of the three-page region in the VM's address space.
1741     ///
1742     fn set_tss_address(&self, _offset: usize) -> vm::Result<()> {
1743         Ok(())
1744     }
1745 
1746     ///
1747     /// Creates an in-kernel interrupt controller.
1748     ///
1749     fn create_irq_chip(&self) -> vm::Result<()> {
1750         Ok(())
1751     }
1752 
1753     ///
1754     /// Registers an event that will, when signaled, trigger the `gsi` IRQ.
1755     ///
1756     fn register_irqfd(&self, fd: &EventFd, gsi: u32) -> vm::Result<()> {
1757         debug!("register_irqfd fd {} gsi {}", fd.as_raw_fd(), gsi);
1758 
1759         self.fd
1760             .register_irqfd(fd, gsi)
1761             .map_err(|e| vm::HypervisorVmError::RegisterIrqFd(e.into()))?;
1762 
1763         Ok(())
1764     }
1765 
1766     ///
1767     /// Unregisters an event that will, when signaled, trigger the `gsi` IRQ.
1768     ///
1769     fn unregister_irqfd(&self, fd: &EventFd, gsi: u32) -> vm::Result<()> {
1770         debug!("unregister_irqfd fd {} gsi {}", fd.as_raw_fd(), gsi);
1771 
1772         self.fd
1773             .unregister_irqfd(fd, gsi)
1774             .map_err(|e| vm::HypervisorVmError::UnregisterIrqFd(e.into()))?;
1775 
1776         Ok(())
1777     }
1778 
1779     ///
1780     /// Creates a VcpuFd object from a vcpu RawFd.
1781     ///
1782     fn create_vcpu(
1783         &self,
1784         id: u8,
1785         vm_ops: Option<Arc<dyn VmOps>>,
1786     ) -> vm::Result<Arc<dyn cpu::Vcpu>> {
1787         let vcpu_fd = self
1788             .fd
1789             .create_vcpu(id)
1790             .map_err(|e| vm::HypervisorVmError::CreateVcpu(e.into()))?;
1791 
1792         /* Map the GHCB page to the VMM(root) address space
1793          * The map is available after the vcpu creation. This address is mapped
1794          * to the overlay ghcb page of the Microsoft Hypervisor, don't have
1795          * to worry about the scenario when a guest changes the GHCB mapping.
1796          */
1797         #[cfg(feature = "sev_snp")]
1798         let ghcb = if self.sev_snp_enabled {
1799             // SAFETY: Safe to call as VCPU has this map already available upon creation
1800             let addr = unsafe {
1801                 libc::mmap(
1802                     std::ptr::null_mut(),
1803                     HV_PAGE_SIZE,
1804                     libc::PROT_READ | libc::PROT_WRITE,
1805                     libc::MAP_SHARED,
1806                     vcpu_fd.as_raw_fd(),
1807                     MSHV_VP_MMAP_OFFSET_GHCB as i64 * libc::sysconf(libc::_SC_PAGE_SIZE),
1808                 )
1809             };
1810             if addr == libc::MAP_FAILED {
1811                 // No point of continuing, without this mmap VMGEXIT will fail anyway
1812                 // Return error
1813                 return Err(vm::HypervisorVmError::MmapToRoot);
1814             }
1815             Some(Ghcb(addr as *mut svm_ghcb_base))
1816         } else {
1817             None
1818         };
1819         let vcpu = MshvVcpu {
1820             fd: vcpu_fd,
1821             vp_index: id,
1822             #[cfg(target_arch = "x86_64")]
1823             cpuid: Vec::new(),
1824             #[cfg(target_arch = "x86_64")]
1825             msrs: self.msrs.clone(),
1826             vm_ops,
1827             vm_fd: self.fd.clone(),
1828             #[cfg(feature = "sev_snp")]
1829             ghcb,
1830             #[cfg(feature = "sev_snp")]
1831             host_access_pages: ArcSwap::new(self.host_access_pages.load().clone()),
1832         };
1833         Ok(Arc::new(vcpu))
1834     }
1835 
1836     #[cfg(target_arch = "x86_64")]
1837     fn enable_split_irq(&self) -> vm::Result<()> {
1838         Ok(())
1839     }
1840 
1841     #[cfg(target_arch = "x86_64")]
1842     fn enable_sgx_attribute(&self, _file: File) -> vm::Result<()> {
1843         Ok(())
1844     }
1845 
1846     fn register_ioevent(
1847         &self,
1848         fd: &EventFd,
1849         addr: &IoEventAddress,
1850         datamatch: Option<DataMatch>,
1851     ) -> vm::Result<()> {
1852         #[cfg(feature = "sev_snp")]
1853         if self.sev_snp_enabled {
1854             return Ok(());
1855         }
1856 
1857         let addr = &mshv_ioctls::IoEventAddress::from(*addr);
1858         debug!(
1859             "register_ioevent fd {} addr {:x?} datamatch {:?}",
1860             fd.as_raw_fd(),
1861             addr,
1862             datamatch
1863         );
1864         if let Some(dm) = datamatch {
1865             match dm {
1866                 vm::DataMatch::DataMatch32(mshv_dm32) => self
1867                     .fd
1868                     .register_ioevent(fd, addr, mshv_dm32)
1869                     .map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into())),
1870                 vm::DataMatch::DataMatch64(mshv_dm64) => self
1871                     .fd
1872                     .register_ioevent(fd, addr, mshv_dm64)
1873                     .map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into())),
1874             }
1875         } else {
1876             self.fd
1877                 .register_ioevent(fd, addr, NoDatamatch)
1878                 .map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into()))
1879         }
1880     }
1881 
1882     /// Unregister an event from a certain address it has been previously registered to.
1883     fn unregister_ioevent(&self, fd: &EventFd, addr: &IoEventAddress) -> vm::Result<()> {
1884         #[cfg(feature = "sev_snp")]
1885         if self.sev_snp_enabled {
1886             return Ok(());
1887         }
1888 
1889         let addr = &mshv_ioctls::IoEventAddress::from(*addr);
1890         debug!("unregister_ioevent fd {} addr {:x?}", fd.as_raw_fd(), addr);
1891 
1892         self.fd
1893             .unregister_ioevent(fd, addr, NoDatamatch)
1894             .map_err(|e| vm::HypervisorVmError::UnregisterIoEvent(e.into()))
1895     }
1896 
1897     /// Creates a guest physical memory region.
1898     fn create_user_memory_region(&self, user_memory_region: UserMemoryRegion) -> vm::Result<()> {
1899         let user_memory_region: mshv_user_mem_region = user_memory_region.into();
1900         // No matter read only or not we keep track the slots.
1901         // For readonly hypervisor can enable the dirty bits,
1902         // but a VM exit happens before setting the dirty bits
1903         self.dirty_log_slots.write().unwrap().insert(
1904             user_memory_region.guest_pfn,
1905             MshvDirtyLogSlot {
1906                 guest_pfn: user_memory_region.guest_pfn,
1907                 memory_size: user_memory_region.size,
1908             },
1909         );
1910 
1911         self.fd
1912             .map_user_memory(user_memory_region)
1913             .map_err(|e| vm::HypervisorVmError::CreateUserMemory(e.into()))?;
1914         Ok(())
1915     }
1916 
1917     /// Removes a guest physical memory region.
1918     fn remove_user_memory_region(&self, user_memory_region: UserMemoryRegion) -> vm::Result<()> {
1919         let user_memory_region: mshv_user_mem_region = user_memory_region.into();
1920         // Remove the corresponding entry from "self.dirty_log_slots" if needed
1921         self.dirty_log_slots
1922             .write()
1923             .unwrap()
1924             .remove(&user_memory_region.guest_pfn);
1925 
1926         self.fd
1927             .unmap_user_memory(user_memory_region)
1928             .map_err(|e| vm::HypervisorVmError::RemoveUserMemory(e.into()))?;
1929         Ok(())
1930     }
1931 
1932     fn make_user_memory_region(
1933         &self,
1934         _slot: u32,
1935         guest_phys_addr: u64,
1936         memory_size: u64,
1937         userspace_addr: u64,
1938         readonly: bool,
1939         _log_dirty_pages: bool,
1940     ) -> UserMemoryRegion {
1941         let mut flags = 1 << MSHV_SET_MEM_BIT_EXECUTABLE;
1942         if !readonly {
1943             flags |= 1 << MSHV_SET_MEM_BIT_WRITABLE;
1944         }
1945 
1946         mshv_user_mem_region {
1947             flags,
1948             guest_pfn: guest_phys_addr >> PAGE_SHIFT,
1949             size: memory_size,
1950             userspace_addr,
1951             ..Default::default()
1952         }
1953         .into()
1954     }
1955 
1956     fn create_passthrough_device(&self) -> vm::Result<VfioDeviceFd> {
1957         let mut vfio_dev = mshv_create_device {
1958             type_: MSHV_DEV_TYPE_VFIO,
1959             fd: 0,
1960             flags: 0,
1961         };
1962 
1963         self.create_device(&mut vfio_dev)
1964             .map_err(|e| vm::HypervisorVmError::CreatePassthroughDevice(e.into()))
1965     }
1966 
1967     ///
1968     /// Constructs a routing entry
1969     ///
1970     fn make_routing_entry(&self, gsi: u32, config: &InterruptSourceConfig) -> IrqRoutingEntry {
1971         match config {
1972             InterruptSourceConfig::MsiIrq(cfg) => mshv_user_irq_entry {
1973                 gsi,
1974                 address_lo: cfg.low_addr,
1975                 address_hi: cfg.high_addr,
1976                 data: cfg.data,
1977             }
1978             .into(),
1979             _ => {
1980                 unreachable!()
1981             }
1982         }
1983     }
1984 
1985     fn set_gsi_routing(&self, entries: &[IrqRoutingEntry]) -> vm::Result<()> {
1986         let mut msi_routing =
1987             vec_with_array_field::<mshv_user_irq_table, mshv_user_irq_entry>(entries.len());
1988         msi_routing[0].nr = entries.len() as u32;
1989 
1990         let entries: Vec<mshv_user_irq_entry> = entries
1991             .iter()
1992             .map(|entry| match entry {
1993                 IrqRoutingEntry::Mshv(e) => *e,
1994                 #[allow(unreachable_patterns)]
1995                 _ => panic!("IrqRoutingEntry type is wrong"),
1996             })
1997             .collect();
1998 
1999         // SAFETY: msi_routing initialized with entries.len() and now it is being turned into
2000         // entries_slice with entries.len() again. It is guaranteed to be large enough to hold
2001         // everything from entries.
2002         unsafe {
2003             let entries_slice: &mut [mshv_user_irq_entry] =
2004                 msi_routing[0].entries.as_mut_slice(entries.len());
2005             entries_slice.copy_from_slice(&entries);
2006         }
2007 
2008         self.fd
2009             .set_msi_routing(&msi_routing[0])
2010             .map_err(|e| vm::HypervisorVmError::SetGsiRouting(e.into()))
2011     }
2012 
2013     ///
2014     /// Start logging dirty pages
2015     ///
2016     fn start_dirty_log(&self) -> vm::Result<()> {
2017         self.fd
2018             .enable_dirty_page_tracking()
2019             .map_err(|e| vm::HypervisorVmError::StartDirtyLog(e.into()))
2020     }
2021 
2022     ///
2023     /// Stop logging dirty pages
2024     ///
2025     fn stop_dirty_log(&self) -> vm::Result<()> {
2026         let dirty_log_slots = self.dirty_log_slots.read().unwrap();
2027         // Before disabling the dirty page tracking we need
2028         // to set the dirty bits in the Hypervisor
2029         // This is a requirement from Microsoft Hypervisor
2030         for (_, s) in dirty_log_slots.iter() {
2031             self.fd
2032                 .get_dirty_log(
2033                     s.guest_pfn,
2034                     s.memory_size as usize,
2035                     MSHV_GPAP_ACCESS_OP_SET as u8,
2036                 )
2037                 .map_err(|e| vm::HypervisorVmError::StartDirtyLog(e.into()))?;
2038         }
2039         self.fd
2040             .disable_dirty_page_tracking()
2041             .map_err(|e| vm::HypervisorVmError::StartDirtyLog(e.into()))?;
2042         Ok(())
2043     }
2044 
2045     ///
2046     /// Get dirty pages bitmap (one bit per page)
2047     ///
2048     fn get_dirty_log(&self, _slot: u32, base_gpa: u64, memory_size: u64) -> vm::Result<Vec<u64>> {
2049         self.fd
2050             .get_dirty_log(
2051                 base_gpa >> PAGE_SHIFT,
2052                 memory_size as usize,
2053                 MSHV_GPAP_ACCESS_OP_CLEAR as u8,
2054             )
2055             .map_err(|e| vm::HypervisorVmError::GetDirtyLog(e.into()))
2056     }
2057 
2058     /// Retrieve guest clock.
2059     #[cfg(target_arch = "x86_64")]
2060     fn get_clock(&self) -> vm::Result<ClockData> {
2061         let val = self
2062             .fd
2063             .get_partition_property(hv_partition_property_code_HV_PARTITION_PROPERTY_REFERENCE_TIME)
2064             .map_err(|e| vm::HypervisorVmError::GetClock(e.into()))?;
2065         Ok(MshvClockData { ref_time: val }.into())
2066     }
2067 
2068     /// Set guest clock.
2069     #[cfg(target_arch = "x86_64")]
2070     fn set_clock(&self, data: &ClockData) -> vm::Result<()> {
2071         let data: MshvClockData = (*data).into();
2072         self.fd
2073             .set_partition_property(
2074                 hv_partition_property_code_HV_PARTITION_PROPERTY_REFERENCE_TIME,
2075                 data.ref_time,
2076             )
2077             .map_err(|e| vm::HypervisorVmError::SetClock(e.into()))
2078     }
2079 
2080     /// Downcast to the underlying MshvVm type
2081     fn as_any(&self) -> &dyn Any {
2082         self
2083     }
2084 
2085     /// Initialize the SEV-SNP VM
2086     #[cfg(feature = "sev_snp")]
2087     fn sev_snp_init(&self) -> vm::Result<()> {
2088         self.fd
2089             .set_partition_property(
2090                 hv_partition_property_code_HV_PARTITION_PROPERTY_ISOLATION_STATE,
2091                 hv_partition_isolation_state_HV_PARTITION_ISOLATION_SECURE as u64,
2092             )
2093             .map_err(|e| vm::HypervisorVmError::InitializeSevSnp(e.into()))
2094     }
2095 
2096     ///
2097     /// Importing isolated pages, these pages will be used
2098     /// for the PSP(Platform Security Processor) measurement.
2099     #[cfg(feature = "sev_snp")]
2100     fn import_isolated_pages(
2101         &self,
2102         page_type: u32,
2103         page_size: u32,
2104         pages: &[u64],
2105     ) -> vm::Result<()> {
2106         debug_assert!(page_size == hv_isolated_page_size_HV_ISOLATED_PAGE_SIZE_4KB);
2107         if pages.is_empty() {
2108             return Ok(());
2109         }
2110 
2111         let mut isolated_pages =
2112             vec_with_array_field::<mshv_import_isolated_pages, u64>(pages.len());
2113         isolated_pages[0].page_type = page_type as u8;
2114         isolated_pages[0].page_count = pages.len() as u64;
2115         // SAFETY: isolated_pages initialized with pages.len() and now it is being turned into
2116         // pages_slice with pages.len() again. It is guaranteed to be large enough to hold
2117         // everything from pages.
2118         unsafe {
2119             let pages_slice: &mut [u64] = isolated_pages[0].guest_pfns.as_mut_slice(pages.len());
2120             pages_slice.copy_from_slice(pages);
2121         }
2122         self.fd
2123             .import_isolated_pages(&isolated_pages[0])
2124             .map_err(|e| vm::HypervisorVmError::ImportIsolatedPages(e.into()))
2125     }
2126 
2127     ///
2128     /// Complete isolated import, telling the hypervisor that
2129     /// importing the pages to guest memory is complete.
2130     ///
2131     #[cfg(feature = "sev_snp")]
2132     fn complete_isolated_import(
2133         &self,
2134         snp_id_block: IGVM_VHS_SNP_ID_BLOCK,
2135         host_data: [u8; 32],
2136         id_block_enabled: u8,
2137     ) -> vm::Result<()> {
2138         let mut auth_info = hv_snp_id_auth_info {
2139             id_key_algorithm: snp_id_block.id_key_algorithm,
2140             auth_key_algorithm: snp_id_block.author_key_algorithm,
2141             ..Default::default()
2142         };
2143         // Each of r/s component is 576 bits long
2144         auth_info.id_block_signature[..SIG_R_COMPONENT_SIZE_IN_BYTES]
2145             .copy_from_slice(snp_id_block.id_key_signature.r_comp.as_ref());
2146         auth_info.id_block_signature
2147             [SIG_R_COMPONENT_SIZE_IN_BYTES..SIG_R_AND_S_COMPONENT_SIZE_IN_BYTES]
2148             .copy_from_slice(snp_id_block.id_key_signature.s_comp.as_ref());
2149         auth_info.id_key[..ECDSA_CURVE_ID_SIZE_IN_BYTES]
2150             .copy_from_slice(snp_id_block.id_public_key.curve.to_le_bytes().as_ref());
2151         auth_info.id_key[ECDSA_SIG_X_COMPONENT_START..ECDSA_SIG_X_COMPONENT_END]
2152             .copy_from_slice(snp_id_block.id_public_key.qx.as_ref());
2153         auth_info.id_key[ECDSA_SIG_Y_COMPONENT_START..ECDSA_SIG_Y_COMPONENT_END]
2154             .copy_from_slice(snp_id_block.id_public_key.qy.as_ref());
2155 
2156         let data = mshv_complete_isolated_import {
2157             import_data: hv_partition_complete_isolated_import_data {
2158                 psp_parameters: hv_psp_launch_finish_data {
2159                     id_block: hv_snp_id_block {
2160                         launch_digest: snp_id_block.ld,
2161                         family_id: snp_id_block.family_id,
2162                         image_id: snp_id_block.image_id,
2163                         version: snp_id_block.version,
2164                         guest_svn: snp_id_block.guest_svn,
2165                         policy: get_default_snp_guest_policy(),
2166                     },
2167                     id_auth_info: auth_info,
2168                     host_data,
2169                     id_block_enabled,
2170                     author_key_enabled: 0,
2171                 },
2172             },
2173         };
2174         self.fd
2175             .complete_isolated_import(&data)
2176             .map_err(|e| vm::HypervisorVmError::CompleteIsolatedImport(e.into()))
2177     }
2178 
2179     #[cfg(target_arch = "aarch64")]
2180     fn create_vgic(&self, config: VgicConfig) -> vm::Result<Arc<Mutex<dyn Vgic>>> {
2181         unimplemented!()
2182     }
2183 
2184     #[cfg(target_arch = "aarch64")]
2185     fn get_preferred_target(&self, kvi: &mut VcpuInit) -> vm::Result<()> {
2186         unimplemented!()
2187     }
2188 
2189     /// Pause the VM
2190     fn pause(&self) -> vm::Result<()> {
2191         // Freeze the partition
2192         self.fd
2193             .set_partition_property(
2194                 hv_partition_property_code_HV_PARTITION_PROPERTY_TIME_FREEZE,
2195                 1u64,
2196             )
2197             .map_err(|e| {
2198                 vm::HypervisorVmError::SetVmProperty(anyhow!(
2199                     "Failed to set partition property: {}",
2200                     e
2201                 ))
2202             })
2203     }
2204 
2205     /// Resume the VM
2206     fn resume(&self) -> vm::Result<()> {
2207         // Resuming the partition using TIME_FREEZE property
2208         self.fd
2209             .set_partition_property(
2210                 hv_partition_property_code_HV_PARTITION_PROPERTY_TIME_FREEZE,
2211                 0u64,
2212             )
2213             .map_err(|e| {
2214                 vm::HypervisorVmError::SetVmProperty(anyhow!(
2215                     "Failed to set partition property: {}",
2216                     e
2217                 ))
2218             })
2219     }
2220 
2221     #[cfg(feature = "sev_snp")]
2222     fn gain_page_access(&self, gpa: u64, size: u32) -> vm::Result<()> {
2223         use mshv_ioctls::set_bits;
2224         const ONE_GB: usize = 1024 * 1024 * 1024;
2225 
2226         if !self.sev_snp_enabled {
2227             return Ok(());
2228         }
2229 
2230         let start_gpfn: u64 = gpa >> PAGE_SHIFT;
2231         let end_gpfn: u64 = (gpa + size as u64 - 1) >> PAGE_SHIFT;
2232 
2233         // Enlarge the bitmap if the PFN is greater than the bitmap length
2234         if end_gpfn >= self.host_access_pages.load().as_ref().len() as u64 {
2235             self.host_access_pages.rcu(|bitmap| {
2236                 let mut bm = bitmap.as_ref().clone();
2237                 bm.enlarge(ONE_GB);
2238                 bm
2239             });
2240         }
2241 
2242         let gpas: Vec<u64> = (start_gpfn..=end_gpfn)
2243             .filter(|x| {
2244                 !self
2245                     .host_access_pages
2246                     .load()
2247                     .as_ref()
2248                     .is_bit_set(*x as usize)
2249             })
2250             .map(|x| x << PAGE_SHIFT)
2251             .collect();
2252 
2253         if !gpas.is_empty() {
2254             let mut gpa_list = vec_with_array_field::<mshv_modify_gpa_host_access, u64>(gpas.len());
2255             gpa_list[0].page_count = gpas.len() as u64;
2256             gpa_list[0].flags = set_bits!(
2257                 u8,
2258                 MSHV_GPA_HOST_ACCESS_BIT_ACQUIRE,
2259                 MSHV_GPA_HOST_ACCESS_BIT_READABLE,
2260                 MSHV_GPA_HOST_ACCESS_BIT_WRITABLE
2261             );
2262 
2263             // SAFETY: gpa_list initialized with gpas.len() and now it is being turned into
2264             // gpas_slice with gpas.len() again. It is guaranteed to be large enough to hold
2265             // everything from gpas.
2266             unsafe {
2267                 let gpas_slice: &mut [u64] = gpa_list[0].guest_pfns.as_mut_slice(gpas.len());
2268                 gpas_slice.copy_from_slice(gpas.as_slice());
2269             }
2270 
2271             self.fd
2272                 .modify_gpa_host_access(&gpa_list[0])
2273                 .map_err(|e| vm::HypervisorVmError::ModifyGpaHostAccess(e.into()))?;
2274 
2275             for acquired_gpa in gpas {
2276                 self.host_access_pages.rcu(|bitmap| {
2277                     let bm = bitmap.clone();
2278                     bm.set_bit((acquired_gpa >> PAGE_SHIFT) as usize);
2279                     bm
2280                 });
2281             }
2282         }
2283 
2284         Ok(())
2285     }
2286 }
2287