xref: /cloud-hypervisor/hypervisor/src/mshv/mod.rs (revision 655d512523353961a67cf19cf3bc227d403898f0)
1 // SPDX-License-Identifier: Apache-2.0 OR BSD-3-Clause
2 //
3 // Copyright © 2020, Microsoft Corporation
4 //
5 
6 use std::any::Any;
7 use std::collections::HashMap;
8 #[cfg(feature = "sev_snp")]
9 use std::num::NonZeroUsize;
10 use std::sync::{Arc, RwLock};
11 
12 #[cfg(feature = "sev_snp")]
13 use arc_swap::ArcSwap;
14 use mshv_bindings::*;
15 use mshv_ioctls::{set_registers_64, InterruptRequest, Mshv, NoDatamatch, VcpuFd, VmFd, VmType};
16 use vfio_ioctls::VfioDeviceFd;
17 use vm::DataMatch;
18 #[cfg(feature = "sev_snp")]
19 use vm_memory::bitmap::AtomicBitmap;
20 
21 use crate::arch::emulator::PlatformEmulator;
22 #[cfg(target_arch = "x86_64")]
23 use crate::arch::x86::emulator::Emulator;
24 use crate::mshv::emulator::MshvEmulatorContext;
25 use crate::vm::{self, InterruptSourceConfig, VmOps};
26 use crate::{cpu, hypervisor, vec_with_array_field, HypervisorType};
27 #[cfg(feature = "sev_snp")]
28 mod snp_constants;
29 // x86_64 dependencies
30 #[cfg(target_arch = "x86_64")]
31 pub mod x86_64;
32 #[cfg(target_arch = "x86_64")]
33 use std::fs::File;
34 use std::os::unix::io::AsRawFd;
35 
36 #[cfg(feature = "sev_snp")]
37 use igvm_defs::IGVM_VHS_SNP_ID_BLOCK;
38 #[cfg(feature = "sev_snp")]
39 use snp_constants::*;
40 use vmm_sys_util::eventfd::EventFd;
41 #[cfg(target_arch = "x86_64")]
42 pub use x86_64::*;
43 #[cfg(target_arch = "x86_64")]
44 pub use x86_64::{emulator, VcpuMshvState};
45 ///
46 /// Export generically-named wrappers of mshv-bindings for Unix-based platforms
47 ///
48 pub use {
49     mshv_bindings::mshv_create_device as CreateDevice,
50     mshv_bindings::mshv_device_attr as DeviceAttr, mshv_ioctls, mshv_ioctls::DeviceFd,
51 };
52 
53 #[cfg(target_arch = "x86_64")]
54 use crate::arch::x86::{CpuIdEntry, FpuState, MsrEntry};
55 #[cfg(target_arch = "x86_64")]
56 use crate::ClockData;
57 use crate::{
58     CpuState, IoEventAddress, IrqRoutingEntry, MpState, UserMemoryRegion,
59     USER_MEMORY_REGION_ADJUSTABLE, USER_MEMORY_REGION_EXECUTE, USER_MEMORY_REGION_READ,
60     USER_MEMORY_REGION_WRITE,
61 };
62 
63 pub const PAGE_SHIFT: usize = 12;
64 
65 impl From<mshv_user_mem_region> for UserMemoryRegion {
66     fn from(region: mshv_user_mem_region) -> Self {
67         let mut flags: u32 = USER_MEMORY_REGION_READ | USER_MEMORY_REGION_ADJUSTABLE;
68         if region.flags & (1 << MSHV_SET_MEM_BIT_WRITABLE) != 0 {
69             flags |= USER_MEMORY_REGION_WRITE;
70         }
71         if region.flags & (1 << MSHV_SET_MEM_BIT_EXECUTABLE) != 0 {
72             flags |= USER_MEMORY_REGION_EXECUTE;
73         }
74 
75         UserMemoryRegion {
76             guest_phys_addr: (region.guest_pfn << PAGE_SHIFT as u64)
77                 + (region.userspace_addr & ((1 << PAGE_SHIFT) - 1)),
78             memory_size: region.size,
79             userspace_addr: region.userspace_addr,
80             flags,
81             ..Default::default()
82         }
83     }
84 }
85 
86 #[cfg(target_arch = "x86_64")]
87 impl From<MshvClockData> for ClockData {
88     fn from(d: MshvClockData) -> Self {
89         ClockData::Mshv(d)
90     }
91 }
92 
93 #[cfg(target_arch = "x86_64")]
94 impl From<ClockData> for MshvClockData {
95     fn from(ms: ClockData) -> Self {
96         match ms {
97             ClockData::Mshv(s) => s,
98             /* Needed in case other hypervisors are enabled */
99             #[allow(unreachable_patterns)]
100             _ => unreachable!("MSHV clock data is not valid"),
101         }
102     }
103 }
104 
105 impl From<UserMemoryRegion> for mshv_user_mem_region {
106     fn from(region: UserMemoryRegion) -> Self {
107         let mut flags: u8 = 0;
108         if region.flags & USER_MEMORY_REGION_WRITE != 0 {
109             flags |= 1 << MSHV_SET_MEM_BIT_WRITABLE;
110         }
111         if region.flags & USER_MEMORY_REGION_EXECUTE != 0 {
112             flags |= 1 << MSHV_SET_MEM_BIT_EXECUTABLE;
113         }
114 
115         mshv_user_mem_region {
116             guest_pfn: region.guest_phys_addr >> PAGE_SHIFT,
117             size: region.memory_size,
118             userspace_addr: region.userspace_addr,
119             flags,
120             ..Default::default()
121         }
122     }
123 }
124 
125 impl From<mshv_ioctls::IoEventAddress> for IoEventAddress {
126     fn from(a: mshv_ioctls::IoEventAddress) -> Self {
127         match a {
128             mshv_ioctls::IoEventAddress::Pio(x) => Self::Pio(x),
129             mshv_ioctls::IoEventAddress::Mmio(x) => Self::Mmio(x),
130         }
131     }
132 }
133 
134 impl From<IoEventAddress> for mshv_ioctls::IoEventAddress {
135     fn from(a: IoEventAddress) -> Self {
136         match a {
137             IoEventAddress::Pio(x) => Self::Pio(x),
138             IoEventAddress::Mmio(x) => Self::Mmio(x),
139         }
140     }
141 }
142 
143 impl From<VcpuMshvState> for CpuState {
144     fn from(s: VcpuMshvState) -> Self {
145         CpuState::Mshv(s)
146     }
147 }
148 
149 impl From<CpuState> for VcpuMshvState {
150     fn from(s: CpuState) -> Self {
151         match s {
152             CpuState::Mshv(s) => s,
153             /* Needed in case other hypervisors are enabled */
154             #[allow(unreachable_patterns)]
155             _ => panic!("CpuState is not valid"),
156         }
157     }
158 }
159 
160 impl From<mshv_bindings::StandardRegisters> for crate::StandardRegisters {
161     fn from(s: mshv_bindings::StandardRegisters) -> Self {
162         crate::StandardRegisters::Mshv(s)
163     }
164 }
165 
166 impl From<crate::StandardRegisters> for mshv_bindings::StandardRegisters {
167     fn from(e: crate::StandardRegisters) -> Self {
168         match e {
169             crate::StandardRegisters::Mshv(e) => e,
170             /* Needed in case other hypervisors are enabled */
171             #[allow(unreachable_patterns)]
172             _ => panic!("StandardRegisters are not valid"),
173         }
174     }
175 }
176 
177 impl From<mshv_user_irq_entry> for IrqRoutingEntry {
178     fn from(s: mshv_user_irq_entry) -> Self {
179         IrqRoutingEntry::Mshv(s)
180     }
181 }
182 
183 impl From<IrqRoutingEntry> for mshv_user_irq_entry {
184     fn from(e: IrqRoutingEntry) -> Self {
185         match e {
186             IrqRoutingEntry::Mshv(e) => e,
187             /* Needed in case other hypervisors are enabled */
188             #[allow(unreachable_patterns)]
189             _ => panic!("IrqRoutingEntry is not valid"),
190         }
191     }
192 }
193 
194 #[cfg(target_arch = "aarch64")]
195 impl From<mshv_bindings::MshvRegList> for crate::RegList {
196     fn from(s: mshv_bindings::MshvRegList) -> Self {
197         crate::RegList::Mshv(s)
198     }
199 }
200 
201 #[cfg(target_arch = "aarch64")]
202 impl From<crate::RegList> for mshv_bindings::MshvRegList {
203     fn from(e: crate::RegList) -> Self {
204         match e {
205             crate::RegList::Mshv(e) => e,
206             /* Needed in case other hypervisors are enabled */
207             #[allow(unreachable_patterns)]
208             _ => panic!("RegList is not valid"),
209         }
210     }
211 }
212 
213 #[cfg(target_arch = "aarch64")]
214 impl From<mshv_bindings::MshvVcpuInit> for crate::VcpuInit {
215     fn from(s: mshv_bindings::MshvVcpuInit) -> Self {
216         crate::VcpuInit::Mshv(s)
217     }
218 }
219 
220 #[cfg(target_arch = "aarch64")]
221 impl From<crate::VcpuInit> for mshv_bindings::MshvVcpuInit {
222     fn from(e: crate::VcpuInit) -> Self {
223         match e {
224             crate::VcpuInit::Mshv(e) => e,
225             /* Needed in case other hypervisors are enabled */
226             #[allow(unreachable_patterns)]
227             _ => panic!("VcpuInit is not valid"),
228         }
229     }
230 }
231 
232 struct MshvDirtyLogSlot {
233     guest_pfn: u64,
234     memory_size: u64,
235 }
236 
237 /// Wrapper over mshv system ioctls.
238 pub struct MshvHypervisor {
239     mshv: Mshv,
240 }
241 
242 impl MshvHypervisor {
243     #[cfg(target_arch = "x86_64")]
244     ///
245     /// Retrieve the list of MSRs supported by MSHV.
246     ///
247     fn get_msr_list(&self) -> hypervisor::Result<MsrList> {
248         self.mshv
249             .get_msr_index_list()
250             .map_err(|e| hypervisor::HypervisorError::GetMsrList(e.into()))
251     }
252 
253     fn create_vm_with_type_and_memory_int(
254         &self,
255         vm_type: u64,
256         #[cfg(feature = "sev_snp")] _mem_size: Option<u64>,
257     ) -> hypervisor::Result<Arc<dyn crate::Vm>> {
258         let mshv_vm_type: VmType = match VmType::try_from(vm_type) {
259             Ok(vm_type) => vm_type,
260             Err(_) => return Err(hypervisor::HypervisorError::UnsupportedVmType()),
261         };
262         let fd: VmFd;
263         loop {
264             match self.mshv.create_vm_with_type(mshv_vm_type) {
265                 Ok(res) => fd = res,
266                 Err(e) => {
267                     if e.errno() == libc::EINTR {
268                         // If the error returned is EINTR, which means the
269                         // ioctl has been interrupted, we have to retry as
270                         // this can't be considered as a regular error.
271                         continue;
272                     } else {
273                         return Err(hypervisor::HypervisorError::VmCreate(e.into()));
274                     }
275                 }
276             }
277             break;
278         }
279 
280         // Set additional partition property for SEV-SNP partition.
281         #[cfg(target_arch = "x86_64")]
282         if mshv_vm_type == VmType::Snp {
283             let snp_policy = snp::get_default_snp_guest_policy();
284             let vmgexit_offloads = snp::get_default_vmgexit_offload_features();
285             // SAFETY: access union fields
286             unsafe {
287                 debug!(
288                     "Setting the partition isolation policy as: 0x{:x}",
289                     snp_policy.as_uint64
290                 );
291                 fd.set_partition_property(
292                     hv_partition_property_code_HV_PARTITION_PROPERTY_ISOLATION_POLICY,
293                     snp_policy.as_uint64,
294                 )
295                 .map_err(|e| hypervisor::HypervisorError::SetPartitionProperty(e.into()))?;
296                 debug!(
297                     "Setting the partition property to enable VMGEXIT offloads as : 0x{:x}",
298                     vmgexit_offloads.as_uint64
299                 );
300                 fd.set_partition_property(
301                     hv_partition_property_code_HV_PARTITION_PROPERTY_SEV_VMGEXIT_OFFLOADS,
302                     vmgexit_offloads.as_uint64,
303                 )
304                 .map_err(|e| hypervisor::HypervisorError::SetPartitionProperty(e.into()))?;
305             }
306         }
307 
308         // Default Microsoft Hypervisor behavior for unimplemented MSR is to
309         // send a fault to the guest if it tries to access it. It is possible
310         // to override this behavior with a more suitable option i.e., ignore
311         // writes from the guest and return zero in attempt to read unimplemented
312         // MSR.
313         #[cfg(target_arch = "x86_64")]
314         fd.set_partition_property(
315             hv_partition_property_code_HV_PARTITION_PROPERTY_UNIMPLEMENTED_MSR_ACTION,
316             hv_unimplemented_msr_action_HV_UNIMPLEMENTED_MSR_ACTION_IGNORE_WRITE_READ_ZERO as u64,
317         )
318         .map_err(|e| hypervisor::HypervisorError::SetPartitionProperty(e.into()))?;
319 
320         // Always create a frozen partition
321         fd.set_partition_property(
322             hv_partition_property_code_HV_PARTITION_PROPERTY_TIME_FREEZE,
323             1u64,
324         )
325         .map_err(|e| hypervisor::HypervisorError::SetPartitionProperty(e.into()))?;
326 
327         let vm_fd = Arc::new(fd);
328 
329         #[cfg(target_arch = "x86_64")]
330         {
331             let msr_list = self.get_msr_list()?;
332             let num_msrs = msr_list.as_fam_struct_ref().nmsrs as usize;
333             let mut msrs: Vec<MsrEntry> = vec![
334                 MsrEntry {
335                     ..Default::default()
336                 };
337                 num_msrs
338             ];
339             let indices = msr_list.as_slice();
340             for (pos, index) in indices.iter().enumerate() {
341                 msrs[pos].index = *index;
342             }
343 
344             Ok(Arc::new(MshvVm {
345                 fd: vm_fd,
346                 msrs,
347                 dirty_log_slots: Arc::new(RwLock::new(HashMap::new())),
348                 #[cfg(feature = "sev_snp")]
349                 sev_snp_enabled: mshv_vm_type == VmType::Snp,
350                 #[cfg(feature = "sev_snp")]
351                 host_access_pages: ArcSwap::new(
352                     AtomicBitmap::new(
353                         _mem_size.unwrap_or_default() as usize,
354                         NonZeroUsize::new(HV_PAGE_SIZE).unwrap(),
355                     )
356                     .into(),
357                 ),
358             }))
359         }
360 
361         #[cfg(target_arch = "aarch64")]
362         {
363             Ok(Arc::new(MshvVm {
364                 fd: vm_fd,
365                 dirty_log_slots: Arc::new(RwLock::new(HashMap::new())),
366             }))
367         }
368     }
369 }
370 
371 impl MshvHypervisor {
372     /// Create a hypervisor based on Mshv
373     #[allow(clippy::new_ret_no_self)]
374     pub fn new() -> hypervisor::Result<Arc<dyn hypervisor::Hypervisor>> {
375         let mshv_obj =
376             Mshv::new().map_err(|e| hypervisor::HypervisorError::HypervisorCreate(e.into()))?;
377         Ok(Arc::new(MshvHypervisor { mshv: mshv_obj }))
378     }
379     /// Check if the hypervisor is available
380     pub fn is_available() -> hypervisor::Result<bool> {
381         match std::fs::metadata("/dev/mshv") {
382             Ok(_) => Ok(true),
383             Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(false),
384             Err(err) => Err(hypervisor::HypervisorError::HypervisorAvailableCheck(
385                 err.into(),
386             )),
387         }
388     }
389 }
390 
391 /// Implementation of Hypervisor trait for Mshv
392 ///
393 /// # Examples
394 ///
395 /// ```
396 /// use hypervisor::mshv::MshvHypervisor;
397 /// use std::sync::Arc;
398 /// let mshv = MshvHypervisor::new().unwrap();
399 /// let hypervisor = Arc::new(mshv);
400 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed");
401 /// ```
402 impl hypervisor::Hypervisor for MshvHypervisor {
403     ///
404     /// Returns the type of the hypervisor
405     ///
406     fn hypervisor_type(&self) -> HypervisorType {
407         HypervisorType::Mshv
408     }
409 
410     ///
411     /// Create a Vm of a specific type using the underlying hypervisor, passing memory size
412     /// Return a hypervisor-agnostic Vm trait object
413     ///
414     /// # Examples
415     ///
416     /// ```
417     /// use hypervisor::kvm::KvmHypervisor;
418     /// use hypervisor::kvm::KvmVm;
419     /// let hypervisor = KvmHypervisor::new().unwrap();
420     /// let vm = hypervisor.create_vm_with_type(0, 512*1024*1024).unwrap();
421     /// ```
422     fn create_vm_with_type_and_memory(
423         &self,
424         vm_type: u64,
425         #[cfg(feature = "sev_snp")] _mem_size: u64,
426     ) -> hypervisor::Result<Arc<dyn vm::Vm>> {
427         self.create_vm_with_type_and_memory_int(
428             vm_type,
429             #[cfg(feature = "sev_snp")]
430             Some(_mem_size),
431         )
432     }
433 
434     fn create_vm_with_type(&self, vm_type: u64) -> hypervisor::Result<Arc<dyn crate::Vm>> {
435         self.create_vm_with_type_and_memory_int(
436             vm_type,
437             #[cfg(feature = "sev_snp")]
438             None,
439         )
440     }
441 
442     /// Create a mshv vm object and return the object as Vm trait object
443     ///
444     /// # Examples
445     ///
446     /// ```
447     /// # extern crate hypervisor;
448     /// use hypervisor::mshv::MshvHypervisor;
449     /// use hypervisor::mshv::MshvVm;
450     /// let hypervisor = MshvHypervisor::new().unwrap();
451     /// let vm = hypervisor.create_vm().unwrap();
452     /// ```
453     fn create_vm(&self) -> hypervisor::Result<Arc<dyn vm::Vm>> {
454         let vm_type = 0;
455         self.create_vm_with_type(vm_type)
456     }
457     #[cfg(target_arch = "x86_64")]
458     ///
459     /// Get the supported CpuID
460     ///
461     fn get_supported_cpuid(&self) -> hypervisor::Result<Vec<CpuIdEntry>> {
462         let mut cpuid = Vec::new();
463         let functions: [u32; 2] = [0x1, 0xb];
464 
465         for function in functions {
466             cpuid.push(CpuIdEntry {
467                 function,
468                 ..Default::default()
469             });
470         }
471         Ok(cpuid)
472     }
473 
474     /// Get maximum number of vCPUs
475     fn get_max_vcpus(&self) -> u32 {
476         // TODO: Using HV_MAXIMUM_PROCESSORS would be better
477         // but the ioctl API is limited to u8
478         256
479     }
480 
481     fn get_guest_debug_hw_bps(&self) -> usize {
482         0
483     }
484 
485     #[cfg(target_arch = "aarch64")]
486     ///
487     /// Retrieve AArch64 host maximum IPA size supported by MSHV.
488     ///
489     fn get_host_ipa_limit(&self) -> i32 {
490         let host_ipa = self.mshv.get_host_partition_property(
491             hv_partition_property_code_HV_PARTITION_PROPERTY_PHYSICAL_ADDRESS_WIDTH as u64,
492         );
493 
494         match host_ipa {
495             Ok(ipa) => ipa,
496             Err(e) => {
497                 panic!("Failed to get host IPA limit: {:?}", e);
498             }
499         }
500     }
501 }
502 
503 #[cfg(feature = "sev_snp")]
504 struct Ghcb(*mut svm_ghcb_base);
505 
506 #[cfg(feature = "sev_snp")]
507 // SAFETY: struct is based on GHCB page in the hypervisor,
508 // safe to Send across threads
509 unsafe impl Send for Ghcb {}
510 
511 #[cfg(feature = "sev_snp")]
512 // SAFETY: struct is based on GHCB page in the hypervisor,
513 // safe to Sync across threads as this is only required for Vcpu trait
514 // functionally not used anyway
515 unsafe impl Sync for Ghcb {}
516 
517 /// Vcpu struct for Microsoft Hypervisor
518 pub struct MshvVcpu {
519     fd: VcpuFd,
520     vp_index: u8,
521     #[cfg(target_arch = "x86_64")]
522     cpuid: Vec<CpuIdEntry>,
523     #[cfg(target_arch = "x86_64")]
524     msrs: Vec<MsrEntry>,
525     vm_ops: Option<Arc<dyn vm::VmOps>>,
526     vm_fd: Arc<VmFd>,
527     #[cfg(feature = "sev_snp")]
528     ghcb: Option<Ghcb>,
529     #[cfg(feature = "sev_snp")]
530     host_access_pages: ArcSwap<AtomicBitmap>,
531 }
532 
533 /// Implementation of Vcpu trait for Microsoft Hypervisor
534 ///
535 /// # Examples
536 ///
537 /// ```
538 /// use hypervisor::mshv::MshvHypervisor;
539 /// use std::sync::Arc;
540 /// let mshv = MshvHypervisor::new().unwrap();
541 /// let hypervisor = Arc::new(mshv);
542 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed");
543 /// let vcpu = vm.create_vcpu(0, None).unwrap();
544 /// ```
545 impl cpu::Vcpu for MshvVcpu {
546     ///
547     /// Returns StandardRegisters with default value set
548     ///
549     #[cfg(target_arch = "x86_64")]
550     fn create_standard_regs(&self) -> crate::StandardRegisters {
551         mshv_bindings::StandardRegisters::default().into()
552     }
553     #[cfg(target_arch = "x86_64")]
554     ///
555     /// Returns the vCPU general purpose registers.
556     ///
557     fn get_regs(&self) -> cpu::Result<crate::StandardRegisters> {
558         Ok(self
559             .fd
560             .get_regs()
561             .map_err(|e| cpu::HypervisorCpuError::GetStandardRegs(e.into()))?
562             .into())
563     }
564 
565     #[cfg(target_arch = "x86_64")]
566     ///
567     /// Sets the vCPU general purpose registers.
568     ///
569     fn set_regs(&self, regs: &crate::StandardRegisters) -> cpu::Result<()> {
570         let regs = (*regs).into();
571         self.fd
572             .set_regs(&regs)
573             .map_err(|e| cpu::HypervisorCpuError::SetStandardRegs(e.into()))
574     }
575 
576     #[cfg(target_arch = "x86_64")]
577     ///
578     /// Returns the vCPU special registers.
579     ///
580     fn get_sregs(&self) -> cpu::Result<crate::arch::x86::SpecialRegisters> {
581         Ok(self
582             .fd
583             .get_sregs()
584             .map_err(|e| cpu::HypervisorCpuError::GetSpecialRegs(e.into()))?
585             .into())
586     }
587 
588     #[cfg(target_arch = "x86_64")]
589     ///
590     /// Sets the vCPU special registers.
591     ///
592     fn set_sregs(&self, sregs: &crate::arch::x86::SpecialRegisters) -> cpu::Result<()> {
593         let sregs = (*sregs).into();
594         self.fd
595             .set_sregs(&sregs)
596             .map_err(|e| cpu::HypervisorCpuError::SetSpecialRegs(e.into()))
597     }
598 
599     #[cfg(target_arch = "x86_64")]
600     ///
601     /// Returns the floating point state (FPU) from the vCPU.
602     ///
603     fn get_fpu(&self) -> cpu::Result<FpuState> {
604         Ok(self
605             .fd
606             .get_fpu()
607             .map_err(|e| cpu::HypervisorCpuError::GetFloatingPointRegs(e.into()))?
608             .into())
609     }
610 
611     #[cfg(target_arch = "x86_64")]
612     ///
613     /// Set the floating point state (FPU) of a vCPU.
614     ///
615     fn set_fpu(&self, fpu: &FpuState) -> cpu::Result<()> {
616         let fpu: mshv_bindings::FloatingPointUnit = (*fpu).clone().into();
617         self.fd
618             .set_fpu(&fpu)
619             .map_err(|e| cpu::HypervisorCpuError::SetFloatingPointRegs(e.into()))
620     }
621 
622     #[cfg(target_arch = "x86_64")]
623     ///
624     /// Returns the model-specific registers (MSR) for this vCPU.
625     ///
626     fn get_msrs(&self, msrs: &mut Vec<MsrEntry>) -> cpu::Result<usize> {
627         let mshv_msrs: Vec<msr_entry> = msrs.iter().map(|e| (*e).into()).collect();
628         let mut mshv_msrs = MsrEntries::from_entries(&mshv_msrs).unwrap();
629         let succ = self
630             .fd
631             .get_msrs(&mut mshv_msrs)
632             .map_err(|e| cpu::HypervisorCpuError::GetMsrEntries(e.into()))?;
633 
634         msrs[..succ].copy_from_slice(
635             &mshv_msrs.as_slice()[..succ]
636                 .iter()
637                 .map(|e| (*e).into())
638                 .collect::<Vec<MsrEntry>>(),
639         );
640 
641         Ok(succ)
642     }
643 
644     #[cfg(target_arch = "x86_64")]
645     ///
646     /// Setup the model-specific registers (MSR) for this vCPU.
647     /// Returns the number of MSR entries actually written.
648     ///
649     fn set_msrs(&self, msrs: &[MsrEntry]) -> cpu::Result<usize> {
650         let mshv_msrs: Vec<msr_entry> = msrs.iter().map(|e| (*e).into()).collect();
651         let mshv_msrs = MsrEntries::from_entries(&mshv_msrs).unwrap();
652         self.fd
653             .set_msrs(&mshv_msrs)
654             .map_err(|e| cpu::HypervisorCpuError::SetMsrEntries(e.into()))
655     }
656 
657     #[cfg(target_arch = "x86_64")]
658     ///
659     /// X86 specific call to enable HyperV SynIC
660     ///
661     fn enable_hyperv_synic(&self) -> cpu::Result<()> {
662         /* We always have SynIC enabled on MSHV */
663         Ok(())
664     }
665 
666     #[allow(non_upper_case_globals)]
667     fn run(&self) -> std::result::Result<cpu::VmExit, cpu::HypervisorCpuError> {
668         match self.fd.run() {
669             Ok(x) => match x.header.message_type {
670                 hv_message_type_HVMSG_X64_HALT => {
671                     debug!("HALT");
672                     Ok(cpu::VmExit::Reset)
673                 }
674                 hv_message_type_HVMSG_UNRECOVERABLE_EXCEPTION => {
675                     warn!("TRIPLE FAULT");
676                     Ok(cpu::VmExit::Shutdown)
677                 }
678                 #[cfg(target_arch = "x86_64")]
679                 hv_message_type_HVMSG_X64_IO_PORT_INTERCEPT => {
680                     let info = x.to_ioport_info().unwrap();
681                     let access_info = info.access_info;
682                     // SAFETY: access_info is valid, otherwise we won't be here
683                     let len = unsafe { access_info.__bindgen_anon_1.access_size() } as usize;
684                     let is_write = info.header.intercept_access_type == 1;
685                     let port = info.port_number;
686                     let mut data: [u8; 4] = [0; 4];
687                     let mut ret_rax = info.rax;
688 
689                     /*
690                      * XXX: Ignore QEMU fw_cfg (0x5xx) and debug console (0x402) ports.
691                      *
692                      * Cloud Hypervisor doesn't support fw_cfg at the moment. It does support 0x402
693                      * under the "fwdebug" feature flag. But that feature is not enabled by default
694                      * and is considered legacy.
695                      *
696                      * OVMF unconditionally pokes these IO ports with string IO.
697                      *
698                      * Instead of trying to implement string IO support now which does not do much
699                      * now, skip those ports explicitly to avoid panicking.
700                      *
701                      * Proper string IO support can be added once we gain the ability to translate
702                      * guest virtual addresses to guest physical addresses on MSHV.
703                      */
704                     match port {
705                         0x402 | 0x510 | 0x511 | 0x514 => {
706                             let insn_len = info.header.instruction_length() as u64;
707 
708                             /* Advance RIP and update RAX */
709                             let arr_reg_name_value = [
710                                 (
711                                     hv_register_name_HV_X64_REGISTER_RIP,
712                                     info.header.rip + insn_len,
713                                 ),
714                                 (hv_register_name_HV_X64_REGISTER_RAX, ret_rax),
715                             ];
716                             set_registers_64!(self.fd, arr_reg_name_value)
717                                 .map_err(|e| cpu::HypervisorCpuError::SetRegister(e.into()))?;
718                             return Ok(cpu::VmExit::Ignore);
719                         }
720                         _ => {}
721                     }
722 
723                     assert!(
724                         // SAFETY: access_info is valid, otherwise we won't be here
725                         (unsafe { access_info.__bindgen_anon_1.string_op() } != 1),
726                         "String IN/OUT not supported"
727                     );
728                     assert!(
729                         // SAFETY: access_info is valid, otherwise we won't be here
730                         (unsafe { access_info.__bindgen_anon_1.rep_prefix() } != 1),
731                         "Rep IN/OUT not supported"
732                     );
733 
734                     if is_write {
735                         let data = (info.rax as u32).to_le_bytes();
736                         if let Some(vm_ops) = &self.vm_ops {
737                             vm_ops
738                                 .pio_write(port.into(), &data[0..len])
739                                 .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()))?;
740                         }
741                     } else {
742                         if let Some(vm_ops) = &self.vm_ops {
743                             vm_ops
744                                 .pio_read(port.into(), &mut data[0..len])
745                                 .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()))?;
746                         }
747 
748                         let v = u32::from_le_bytes(data);
749                         /* Preserve high bits in EAX but clear out high bits in RAX */
750                         let mask = 0xffffffff >> (32 - len * 8);
751                         let eax = (info.rax as u32 & !mask) | (v & mask);
752                         ret_rax = eax as u64;
753                     }
754 
755                     let insn_len = info.header.instruction_length() as u64;
756 
757                     /* Advance RIP and update RAX */
758                     let arr_reg_name_value = [
759                         (
760                             hv_register_name_HV_X64_REGISTER_RIP,
761                             info.header.rip + insn_len,
762                         ),
763                         (hv_register_name_HV_X64_REGISTER_RAX, ret_rax),
764                     ];
765                     set_registers_64!(self.fd, arr_reg_name_value)
766                         .map_err(|e| cpu::HypervisorCpuError::SetRegister(e.into()))?;
767                     Ok(cpu::VmExit::Ignore)
768                 }
769                 #[cfg(target_arch = "x86_64")]
770                 msg_type @ (hv_message_type_HVMSG_UNMAPPED_GPA
771                 | hv_message_type_HVMSG_GPA_INTERCEPT) => {
772                     let info = x.to_memory_info().unwrap();
773                     let insn_len = info.instruction_byte_count as usize;
774                     let gva = info.guest_virtual_address;
775                     let gpa = info.guest_physical_address;
776 
777                     debug!("Exit ({:?}) GVA {:x} GPA {:x}", msg_type, gva, gpa);
778 
779                     let mut context = MshvEmulatorContext {
780                         vcpu: self,
781                         map: (gva, gpa),
782                     };
783 
784                     // Create a new emulator.
785                     let mut emul = Emulator::new(&mut context);
786 
787                     // Emulate the trapped instruction, and only the first one.
788                     let new_state = emul
789                         .emulate_first_insn(
790                             self.vp_index as usize,
791                             &info.instruction_bytes[..insn_len],
792                         )
793                         .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()))?;
794 
795                     // Set CPU state back.
796                     context
797                         .set_cpu_state(self.vp_index as usize, new_state)
798                         .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()))?;
799 
800                     Ok(cpu::VmExit::Ignore)
801                 }
802                 #[cfg(feature = "sev_snp")]
803                 hv_message_type_HVMSG_GPA_ATTRIBUTE_INTERCEPT => {
804                     let info = x.to_gpa_attribute_info().unwrap();
805                     let host_vis = info.__bindgen_anon_1.host_visibility();
806                     if host_vis >= HV_MAP_GPA_READABLE | HV_MAP_GPA_WRITABLE {
807                         warn!("Ignored attribute intercept with full host visibility");
808                         return Ok(cpu::VmExit::Ignore);
809                     }
810 
811                     let num_ranges = info.__bindgen_anon_1.range_count();
812                     assert!(num_ranges >= 1);
813                     if num_ranges > 1 {
814                         return Err(cpu::HypervisorCpuError::RunVcpu(anyhow!(
815                             "Unhandled VCPU exit(GPA_ATTRIBUTE_INTERCEPT): Expected num_ranges to be 1 but found num_ranges {:?}",
816                             num_ranges
817                         )));
818                     }
819 
820                     // TODO: we could also deny the request with HvCallCompleteIntercept
821                     let mut gpas = Vec::new();
822                     let ranges = info.ranges;
823                     let (gfn_start, gfn_count) = snp::parse_gpa_range(ranges[0]).unwrap();
824                     debug!(
825                         "Releasing pages: gfn_start: {:x?}, gfn_count: {:?}",
826                         gfn_start, gfn_count
827                     );
828                     let gpa_start = gfn_start * HV_PAGE_SIZE as u64;
829                     for i in 0..gfn_count {
830                         gpas.push(gpa_start + i * HV_PAGE_SIZE as u64);
831                     }
832 
833                     let mut gpa_list =
834                         vec_with_array_field::<mshv_modify_gpa_host_access, u64>(gpas.len());
835                     gpa_list[0].page_count = gpas.len() as u64;
836                     gpa_list[0].flags = 0;
837                     if host_vis & HV_MAP_GPA_READABLE != 0 {
838                         gpa_list[0].flags |= 1 << MSHV_GPA_HOST_ACCESS_BIT_READABLE;
839                     }
840                     if host_vis & HV_MAP_GPA_WRITABLE != 0 {
841                         gpa_list[0].flags |= 1 << MSHV_GPA_HOST_ACCESS_BIT_WRITABLE;
842                     }
843 
844                     // SAFETY: gpa_list initialized with gpas.len() and now it is being turned into
845                     // gpas_slice with gpas.len() again. It is guaranteed to be large enough to hold
846                     // everything from gpas.
847                     unsafe {
848                         let gpas_slice: &mut [u64] =
849                             gpa_list[0].guest_pfns.as_mut_slice(gpas.len());
850                         gpas_slice.copy_from_slice(gpas.as_slice());
851                     }
852 
853                     self.vm_fd
854                         .modify_gpa_host_access(&gpa_list[0])
855                         .map_err(|e| cpu::HypervisorCpuError::RunVcpu(anyhow!(
856                             "Unhandled VCPU exit: attribute intercept - couldn't modify host access {}", e
857                         )))?;
858                     // Guest is revoking the shared access, so we need to update the bitmap
859                     self.host_access_pages.rcu(|_bitmap| {
860                         let bm = self.host_access_pages.load().as_ref().clone();
861                         bm.reset_addr_range(gpa_start as usize, gfn_count as usize);
862                         bm
863                     });
864                     Ok(cpu::VmExit::Ignore)
865                 }
866                 #[cfg(target_arch = "x86_64")]
867                 hv_message_type_HVMSG_UNACCEPTED_GPA => {
868                     let info = x.to_memory_info().unwrap();
869                     let gva = info.guest_virtual_address;
870                     let gpa = info.guest_physical_address;
871 
872                     Err(cpu::HypervisorCpuError::RunVcpu(anyhow!(
873                         "Unhandled VCPU exit: Unaccepted GPA({:x}) found at GVA({:x})",
874                         gpa,
875                         gva,
876                     )))
877                 }
878                 #[cfg(target_arch = "x86_64")]
879                 hv_message_type_HVMSG_X64_CPUID_INTERCEPT => {
880                     let info = x.to_cpuid_info().unwrap();
881                     debug!("cpuid eax: {:x}", { info.rax });
882                     Ok(cpu::VmExit::Ignore)
883                 }
884                 #[cfg(target_arch = "x86_64")]
885                 hv_message_type_HVMSG_X64_MSR_INTERCEPT => {
886                     let info = x.to_msr_info().unwrap();
887                     if info.header.intercept_access_type == 0 {
888                         debug!("msr read: {:x}", { info.msr_number });
889                     } else {
890                         debug!("msr write: {:x}", { info.msr_number });
891                     }
892                     Ok(cpu::VmExit::Ignore)
893                 }
894                 #[cfg(target_arch = "x86_64")]
895                 hv_message_type_HVMSG_X64_EXCEPTION_INTERCEPT => {
896                     //TODO: Handler for VMCALL here.
897                     let info = x.to_exception_info().unwrap();
898                     debug!("Exception Info {:?}", { info.exception_vector });
899                     Ok(cpu::VmExit::Ignore)
900                 }
901                 #[cfg(target_arch = "x86_64")]
902                 hv_message_type_HVMSG_X64_APIC_EOI => {
903                     let info = x.to_apic_eoi_info().unwrap();
904                     // The kernel should dispatch the EOI to the correct thread.
905                     // Check the VP index is the same as the one we have.
906                     assert!(info.vp_index == self.vp_index as u32);
907                     // The interrupt vector in info is u32, but x86 only supports 256 vectors.
908                     // There is no good way to recover from this if the hypervisor messes around.
909                     // Just unwrap.
910                     Ok(cpu::VmExit::IoapicEoi(
911                         info.interrupt_vector.try_into().unwrap(),
912                     ))
913                 }
914                 #[cfg(feature = "sev_snp")]
915                 hv_message_type_HVMSG_X64_SEV_VMGEXIT_INTERCEPT => {
916                     let info = x.to_vmg_intercept_info().unwrap();
917                     let ghcb_data = info.ghcb_msr >> GHCB_INFO_BIT_WIDTH;
918                     let ghcb_msr = svm_ghcb_msr {
919                         as_uint64: info.ghcb_msr,
920                     };
921                     // Safe to use unwrap, for sev_snp guest we already have the
922                     // GHCB pointer wrapped in the option, otherwise this place is not reached.
923                     let ghcb = self.ghcb.as_ref().unwrap().0;
924 
925                     // SAFETY: Accessing a union element from bindgen generated bindings.
926                     let ghcb_op = unsafe { ghcb_msr.__bindgen_anon_2.ghcb_info() as u32 };
927                     // Sanity check on the header fields before handling other operations.
928                     assert!(info.header.intercept_access_type == HV_INTERCEPT_ACCESS_EXECUTE as u8);
929 
930                     match ghcb_op {
931                         GHCB_INFO_HYP_FEATURE_REQUEST => {
932                             // Pre-condition: GHCB data must be zero
933                             assert!(ghcb_data == 0);
934                             let mut ghcb_response = GHCB_INFO_HYP_FEATURE_RESPONSE as u64;
935                             // Indicate support for basic SEV-SNP features
936                             ghcb_response |=
937                                 (GHCB_HYP_FEATURE_SEV_SNP << GHCB_INFO_BIT_WIDTH) as u64;
938                             // Indicate support for SEV-SNP AP creation
939                             ghcb_response |= (GHCB_HYP_FEATURE_SEV_SNP_AP_CREATION
940                                 << GHCB_INFO_BIT_WIDTH)
941                                 as u64;
942                             debug!(
943                                 "GHCB_INFO_HYP_FEATURE_REQUEST: Supported features: {:0x}",
944                                 ghcb_response
945                             );
946                             let arr_reg_name_value =
947                                 [(hv_register_name_HV_X64_REGISTER_GHCB, ghcb_response)];
948                             set_registers_64!(self.fd, arr_reg_name_value)
949                                 .map_err(|e| cpu::HypervisorCpuError::SetRegister(e.into()))?;
950                         }
951                         GHCB_INFO_REGISTER_REQUEST => {
952                             let mut ghcb_gpa = hv_x64_register_sev_ghcb::default();
953 
954                             // Disable the previously used GHCB page.
955                             self.disable_prev_ghcb_page()?;
956 
957                             // SAFETY: Accessing a union element from bindgen generated bindings.
958                             unsafe {
959                                 ghcb_gpa.__bindgen_anon_1.set_enabled(1);
960                                 ghcb_gpa
961                                     .__bindgen_anon_1
962                                     .set_page_number(ghcb_msr.__bindgen_anon_2.gpa_page_number());
963                             }
964                             // SAFETY: Accessing a union element from bindgen generated bindings.
965                             let reg_name_value = unsafe {
966                                 [(
967                                     hv_register_name_HV_X64_REGISTER_SEV_GHCB_GPA,
968                                     ghcb_gpa.as_uint64,
969                                 )]
970                             };
971 
972                             set_registers_64!(self.fd, reg_name_value)
973                                 .map_err(|e| cpu::HypervisorCpuError::SetRegister(e.into()))?;
974 
975                             let mut resp_ghcb_msr = svm_ghcb_msr::default();
976                             // SAFETY: Accessing a union element from bindgen generated bindings.
977                             unsafe {
978                                 resp_ghcb_msr
979                                     .__bindgen_anon_2
980                                     .set_ghcb_info(GHCB_INFO_REGISTER_RESPONSE as u64);
981                                 resp_ghcb_msr.__bindgen_anon_2.set_gpa_page_number(
982                                     ghcb_msr.__bindgen_anon_2.gpa_page_number(),
983                                 );
984                                 debug!("GHCB GPA is {:x}", ghcb_gpa.as_uint64);
985                             }
986                             // SAFETY: Accessing a union element from bindgen generated bindings.
987                             let reg_name_value = unsafe {
988                                 [(
989                                     hv_register_name_HV_X64_REGISTER_GHCB,
990                                     resp_ghcb_msr.as_uint64,
991                                 )]
992                             };
993 
994                             set_registers_64!(self.fd, reg_name_value)
995                                 .map_err(|e| cpu::HypervisorCpuError::SetRegister(e.into()))?;
996                         }
997                         GHCB_INFO_SEV_INFO_REQUEST => {
998                             let sev_cpuid_function = 0x8000_001F;
999                             let cpu_leaf = self
1000                                 .fd
1001                                 .get_cpuid_values(sev_cpuid_function, 0, 0, 0)
1002                                 .unwrap();
1003                             let ebx = cpu_leaf[1];
1004                             // First 6-byte of EBX represents page table encryption bit number
1005                             let pbit_encryption = (ebx & 0x3f) as u8;
1006                             let mut ghcb_response = GHCB_INFO_SEV_INFO_RESPONSE as u64;
1007 
1008                             // GHCBData[63:48] specifies the maximum GHCB protocol version supported
1009                             ghcb_response |= (GHCB_PROTOCOL_VERSION_MAX as u64) << 48;
1010                             // GHCBData[47:32] specifies the minimum GHCB protocol version supported
1011                             ghcb_response |= (GHCB_PROTOCOL_VERSION_MIN as u64) << 32;
1012                             // GHCBData[31:24] specifies the SEV page table encryption bit number.
1013                             ghcb_response |= (pbit_encryption as u64) << 24;
1014 
1015                             let arr_reg_name_value =
1016                                 [(hv_register_name_HV_X64_REGISTER_GHCB, ghcb_response)];
1017                             set_registers_64!(self.fd, arr_reg_name_value)
1018                                 .map_err(|e| cpu::HypervisorCpuError::SetRegister(e.into()))?;
1019                         }
1020                         GHCB_INFO_NORMAL => {
1021                             let exit_code =
1022                                 info.__bindgen_anon_2.__bindgen_anon_1.sw_exit_code as u32;
1023 
1024                             match exit_code {
1025                                 SVM_EXITCODE_HV_DOORBELL_PAGE => {
1026                                     let exit_info1 =
1027                                         info.__bindgen_anon_2.__bindgen_anon_1.sw_exit_info1 as u32;
1028                                     match exit_info1 {
1029                                         SVM_NAE_HV_DOORBELL_PAGE_GET_PREFERRED => {
1030                                             // Hypervisor does not have any preference for doorbell GPA.
1031                                             let preferred_doorbell_gpa: u64 = 0xFFFFFFFFFFFFFFFF;
1032                                             set_svm_field_u64_ptr!(
1033                                                 ghcb,
1034                                                 exit_info2,
1035                                                 preferred_doorbell_gpa
1036                                             );
1037                                         }
1038                                         SVM_NAE_HV_DOORBELL_PAGE_SET => {
1039                                             let exit_info2 = info
1040                                                 .__bindgen_anon_2
1041                                                 .__bindgen_anon_1
1042                                                 .sw_exit_info2;
1043                                             let mut ghcb_doorbell_gpa =
1044                                                 hv_x64_register_sev_hv_doorbell::default();
1045                                             // SAFETY: Accessing a union element from bindgen generated bindings.
1046                                             unsafe {
1047                                                 ghcb_doorbell_gpa.__bindgen_anon_1.set_enabled(1);
1048                                                 ghcb_doorbell_gpa
1049                                                     .__bindgen_anon_1
1050                                                     .set_page_number(exit_info2 >> PAGE_SHIFT);
1051                                             }
1052                                             // SAFETY: Accessing a union element from bindgen generated bindings.
1053                                             let reg_names = unsafe {
1054                                                 [(
1055                                                     hv_register_name_HV_X64_REGISTER_SEV_DOORBELL_GPA,
1056                                                     ghcb_doorbell_gpa.as_uint64,
1057                                                 )]
1058                                             };
1059                                             set_registers_64!(self.fd, reg_names).map_err(|e| {
1060                                                 cpu::HypervisorCpuError::SetRegister(e.into())
1061                                             })?;
1062 
1063                                             set_svm_field_u64_ptr!(ghcb, exit_info2, exit_info2);
1064 
1065                                             // Clear the SW_EXIT_INFO1 register to indicate no error
1066                                             self.clear_swexit_info1()?;
1067                                         }
1068                                         SVM_NAE_HV_DOORBELL_PAGE_QUERY => {
1069                                             let mut reg_assocs = [ hv_register_assoc {
1070                                                 name: hv_register_name_HV_X64_REGISTER_SEV_DOORBELL_GPA,
1071                                                 ..Default::default()
1072                                             } ];
1073                                             self.fd.get_reg(&mut reg_assocs).unwrap();
1074                                             // SAFETY: Accessing a union element from bindgen generated bindings.
1075                                             let doorbell_gpa = unsafe { reg_assocs[0].value.reg64 };
1076 
1077                                             set_svm_field_u64_ptr!(ghcb, exit_info2, doorbell_gpa);
1078 
1079                                             // Clear the SW_EXIT_INFO1 register to indicate no error
1080                                             self.clear_swexit_info1()?;
1081                                         }
1082                                         SVM_NAE_HV_DOORBELL_PAGE_CLEAR => {
1083                                             set_svm_field_u64_ptr!(ghcb, exit_info2, 0);
1084                                         }
1085                                         _ => {
1086                                             panic!(
1087                                                 "SVM_EXITCODE_HV_DOORBELL_PAGE: Unhandled exit code: {:0x}",
1088                                                 exit_info1
1089                                             );
1090                                         }
1091                                     }
1092                                 }
1093                                 SVM_EXITCODE_IOIO_PROT => {
1094                                     let exit_info1 =
1095                                         info.__bindgen_anon_2.__bindgen_anon_1.sw_exit_info1 as u32;
1096                                     let port_info = hv_sev_vmgexit_port_info {
1097                                         as_uint32: exit_info1,
1098                                     };
1099 
1100                                     let port =
1101                                         // SAFETY: Accessing a union element from bindgen generated bindings.
1102                                         unsafe { port_info.__bindgen_anon_1.intercepted_port() };
1103                                     let mut len = 4;
1104                                     // SAFETY: Accessing a union element from bindgen generated bindings.
1105                                     unsafe {
1106                                         if port_info.__bindgen_anon_1.operand_size_16bit() == 1 {
1107                                             len = 2;
1108                                         } else if port_info.__bindgen_anon_1.operand_size_8bit()
1109                                             == 1
1110                                         {
1111                                             len = 1;
1112                                         }
1113                                     }
1114                                     let is_write =
1115                                         // SAFETY: Accessing a union element from bindgen generated bindings.
1116                                         unsafe { port_info.__bindgen_anon_1.access_type() == 0 };
1117                                     // SAFETY: Accessing the field from a mapped address
1118                                     let mut data = unsafe { (*ghcb).rax.to_le_bytes() };
1119 
1120                                     if is_write {
1121                                         if let Some(vm_ops) = &self.vm_ops {
1122                                             vm_ops.pio_write(port.into(), &data[..len]).map_err(
1123                                                 |e| cpu::HypervisorCpuError::RunVcpu(e.into()),
1124                                             )?;
1125                                         }
1126                                     } else {
1127                                         if let Some(vm_ops) = &self.vm_ops {
1128                                             vm_ops
1129                                                 .pio_read(port.into(), &mut data[..len])
1130                                                 .map_err(|e| {
1131                                                     cpu::HypervisorCpuError::RunVcpu(e.into())
1132                                                 })?;
1133                                         }
1134                                         set_svm_field_u64_ptr!(ghcb, rax, u64::from_le_bytes(data));
1135                                     }
1136 
1137                                     // Clear the SW_EXIT_INFO1 register to indicate no error
1138                                     self.clear_swexit_info1()?;
1139                                 }
1140                                 SVM_EXITCODE_MMIO_READ => {
1141                                     let src_gpa =
1142                                         info.__bindgen_anon_2.__bindgen_anon_1.sw_exit_info1;
1143                                     let data_len =
1144                                         info.__bindgen_anon_2.__bindgen_anon_1.sw_exit_info2
1145                                             as usize;
1146                                     // Sanity check to make sure data len is within supported range.
1147                                     assert!(data_len <= 0x8);
1148 
1149                                     let mut data: Vec<u8> = vec![0; data_len];
1150                                     if let Some(vm_ops) = &self.vm_ops {
1151                                         vm_ops.mmio_read(src_gpa, &mut data).map_err(|e| {
1152                                             cpu::HypervisorCpuError::RunVcpu(e.into())
1153                                         })?;
1154                                     }
1155                                     // Copy the data to the shared buffer of the GHCB page
1156                                     let mut buffer_data = [0; 8];
1157                                     buffer_data[..data_len].copy_from_slice(&data[..data_len]);
1158                                     // SAFETY: Updating the value of mapped area
1159                                     unsafe { (*ghcb).shared[0] = u64::from_le_bytes(buffer_data) };
1160 
1161                                     // Clear the SW_EXIT_INFO1 register to indicate no error
1162                                     self.clear_swexit_info1()?;
1163                                 }
1164                                 SVM_EXITCODE_MMIO_WRITE => {
1165                                     let dst_gpa =
1166                                         info.__bindgen_anon_2.__bindgen_anon_1.sw_exit_info1;
1167                                     let data_len =
1168                                         info.__bindgen_anon_2.__bindgen_anon_1.sw_exit_info2
1169                                             as usize;
1170                                     // Sanity check to make sure data len is within supported range.
1171                                     assert!(data_len <= 0x8);
1172 
1173                                     let mut data = vec![0; data_len];
1174                                     // SAFETY: Accessing data from a mapped address
1175                                     let bytes_shared_ghcb =
1176                                         unsafe { (*ghcb).shared[0].to_le_bytes() };
1177                                     data.copy_from_slice(&bytes_shared_ghcb[..data_len]);
1178 
1179                                     if let Some(vm_ops) = &self.vm_ops {
1180                                         vm_ops.mmio_write(dst_gpa, &data).map_err(|e| {
1181                                             cpu::HypervisorCpuError::RunVcpu(e.into())
1182                                         })?;
1183                                     }
1184 
1185                                     // Clear the SW_EXIT_INFO1 register to indicate no error
1186                                     self.clear_swexit_info1()?;
1187                                 }
1188                                 SVM_EXITCODE_SNP_GUEST_REQUEST
1189                                 | SVM_EXITCODE_SNP_EXTENDED_GUEST_REQUEST => {
1190                                     if exit_code == SVM_EXITCODE_SNP_EXTENDED_GUEST_REQUEST {
1191                                         info!("Fetching extended guest request is not supported");
1192                                         // We don't support extended guest request, so we just write empty data.
1193                                         // This matches the behavior of KVM in Linux 6.11.
1194 
1195                                         // Read RBX from the GHCB.
1196                                         // SAFETY: Accessing data from a mapped address
1197                                         let data_gpa = unsafe { (*ghcb).rax };
1198                                         // SAFETY: Accessing data from a mapped address
1199                                         let data_npages = unsafe { (*ghcb).rbx };
1200 
1201                                         if data_npages > 0 {
1202                                             // The certificates are terminated by 24 zero bytes.
1203                                             // TODO: Need to check if data_gpa is the address of the shared buffer in the GHCB page
1204                                             // in that case we should clear the shared buffer(24 bytes)
1205                                             self.gpa_write(data_gpa, &[0; 24])?;
1206                                         }
1207                                     }
1208 
1209                                     let req_gpa =
1210                                         info.__bindgen_anon_2.__bindgen_anon_1.sw_exit_info1;
1211                                     let rsp_gpa =
1212                                         info.__bindgen_anon_2.__bindgen_anon_1.sw_exit_info2;
1213 
1214                                     let mshv_psp_req =
1215                                         mshv_issue_psp_guest_request { req_gpa, rsp_gpa };
1216                                     self.vm_fd
1217                                         .psp_issue_guest_request(&mshv_psp_req)
1218                                         .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()))?;
1219 
1220                                     debug!(
1221                                         "SNP guest request: req_gpa {:0x} rsp_gpa {:0x}",
1222                                         req_gpa, rsp_gpa
1223                                     );
1224 
1225                                     set_svm_field_u64_ptr!(ghcb, exit_info2, 0);
1226                                 }
1227                                 SVM_EXITCODE_SNP_AP_CREATION => {
1228                                     let vmsa_gpa =
1229                                         info.__bindgen_anon_2.__bindgen_anon_1.sw_exit_info2;
1230                                     let apic_id =
1231                                         info.__bindgen_anon_2.__bindgen_anon_1.sw_exit_info1 >> 32;
1232                                     debug!(
1233                                         "SNP AP CREATE REQUEST with VMSA GPA {:0x}, and APIC ID {:?}",
1234                                         vmsa_gpa, apic_id
1235                                     );
1236 
1237                                     let mshv_ap_create_req = mshv_sev_snp_ap_create {
1238                                         vp_id: apic_id,
1239                                         vmsa_gpa,
1240                                     };
1241                                     self.vm_fd
1242                                         .sev_snp_ap_create(&mshv_ap_create_req)
1243                                         .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()))?;
1244 
1245                                     // Clear the SW_EXIT_INFO1 register to indicate no error
1246                                     self.clear_swexit_info1()?;
1247                                 }
1248                                 _ => panic!(
1249                                     "GHCB_INFO_NORMAL: Unhandled exit code: {:0x}",
1250                                     exit_code
1251                                 ),
1252                             }
1253                         }
1254                         _ => panic!("Unsupported VMGEXIT operation: {:0x}", ghcb_op),
1255                     }
1256 
1257                     Ok(cpu::VmExit::Ignore)
1258                 }
1259                 exit => Err(cpu::HypervisorCpuError::RunVcpu(anyhow!(
1260                     "Unhandled VCPU exit {:?}",
1261                     exit
1262                 ))),
1263             },
1264 
1265             Err(e) => match e.errno() {
1266                 libc::EAGAIN | libc::EINTR => Ok(cpu::VmExit::Ignore),
1267                 _ => Err(cpu::HypervisorCpuError::RunVcpu(anyhow!(
1268                     "VCPU error {:?}",
1269                     e
1270                 ))),
1271             },
1272         }
1273     }
1274 
1275     #[cfg(target_arch = "aarch64")]
1276     fn init_pmu(&self, irq: u32) -> cpu::Result<()> {
1277         unimplemented!()
1278     }
1279 
1280     #[cfg(target_arch = "aarch64")]
1281     fn has_pmu_support(&self) -> bool {
1282         unimplemented!()
1283     }
1284 
1285     #[cfg(target_arch = "aarch64")]
1286     fn setup_regs(&self, cpu_id: u8, boot_ip: u64, fdt_start: u64) -> cpu::Result<()> {
1287         unimplemented!()
1288     }
1289 
1290     #[cfg(target_arch = "aarch64")]
1291     fn get_sys_reg(&self, sys_reg: u32) -> cpu::Result<u64> {
1292         unimplemented!()
1293     }
1294 
1295     #[cfg(target_arch = "aarch64")]
1296     fn get_reg_list(&self, reg_list: &mut RegList) -> cpu::Result<()> {
1297         unimplemented!()
1298     }
1299 
1300     #[cfg(target_arch = "aarch64")]
1301     fn vcpu_init(&self, kvi: &VcpuInit) -> cpu::Result<()> {
1302         unimplemented!()
1303     }
1304 
1305     #[cfg(target_arch = "aarch64")]
1306     fn set_regs(&self, regs: &StandardRegisters) -> cpu::Result<()> {
1307         unimplemented!()
1308     }
1309 
1310     #[cfg(target_arch = "aarch64")]
1311     fn get_regs(&self) -> cpu::Result<StandardRegisters> {
1312         unimplemented!()
1313     }
1314 
1315     #[cfg(target_arch = "aarch64")]
1316     fn vcpu_finalize(&self, _feature: i32) -> cpu::Result<()> {
1317         unimplemented!()
1318     }
1319 
1320     #[cfg(target_arch = "aarch64")]
1321     fn vcpu_get_finalized_features(&self) -> i32 {
1322         unimplemented!()
1323     }
1324 
1325     #[cfg(target_arch = "aarch64")]
1326     fn vcpu_set_processor_features(
1327         &self,
1328         _vm: &Arc<dyn crate::Vm>,
1329         _kvi: &mut crate::VcpuInit,
1330         _id: u8,
1331     ) -> cpu::Result<()> {
1332         unimplemented!()
1333     }
1334 
1335     #[cfg(target_arch = "aarch64")]
1336     fn create_vcpu_init(&self) -> crate::VcpuInit {
1337         unimplemented!();
1338     }
1339 
1340     #[cfg(target_arch = "x86_64")]
1341     ///
1342     /// X86 specific call to setup the CPUID registers.
1343     ///
1344     fn set_cpuid2(&self, cpuid: &[CpuIdEntry]) -> cpu::Result<()> {
1345         let cpuid: Vec<mshv_bindings::hv_cpuid_entry> = cpuid.iter().map(|e| (*e).into()).collect();
1346         let mshv_cpuid = <CpuId>::from_entries(&cpuid)
1347             .map_err(|_| cpu::HypervisorCpuError::SetCpuid(anyhow!("failed to create CpuId")))?;
1348 
1349         self.fd
1350             .register_intercept_result_cpuid(&mshv_cpuid)
1351             .map_err(|e| cpu::HypervisorCpuError::SetCpuid(e.into()))
1352     }
1353 
1354     #[cfg(target_arch = "x86_64")]
1355     ///
1356     /// X86 specific call to retrieve the CPUID registers.
1357     ///
1358     fn get_cpuid2(&self, _num_entries: usize) -> cpu::Result<Vec<CpuIdEntry>> {
1359         Ok(self.cpuid.clone())
1360     }
1361 
1362     #[cfg(target_arch = "x86_64")]
1363     ///
1364     /// X86 specific call to retrieve cpuid leaf
1365     ///
1366     fn get_cpuid_values(
1367         &self,
1368         function: u32,
1369         index: u32,
1370         xfem: u64,
1371         xss: u64,
1372     ) -> cpu::Result<[u32; 4]> {
1373         self.fd
1374             .get_cpuid_values(function, index, xfem, xss)
1375             .map_err(|e| cpu::HypervisorCpuError::GetCpuidVales(e.into()))
1376     }
1377 
1378     #[cfg(target_arch = "x86_64")]
1379     ///
1380     /// Returns the state of the LAPIC (Local Advanced Programmable Interrupt Controller).
1381     ///
1382     fn get_lapic(&self) -> cpu::Result<crate::arch::x86::LapicState> {
1383         Ok(self
1384             .fd
1385             .get_lapic()
1386             .map_err(|e| cpu::HypervisorCpuError::GetlapicState(e.into()))?
1387             .into())
1388     }
1389 
1390     #[cfg(target_arch = "x86_64")]
1391     ///
1392     /// Sets the state of the LAPIC (Local Advanced Programmable Interrupt Controller).
1393     ///
1394     fn set_lapic(&self, lapic: &crate::arch::x86::LapicState) -> cpu::Result<()> {
1395         let lapic: mshv_bindings::LapicState = (*lapic).clone().into();
1396         self.fd
1397             .set_lapic(&lapic)
1398             .map_err(|e| cpu::HypervisorCpuError::SetLapicState(e.into()))
1399     }
1400 
1401     ///
1402     /// Returns the vcpu's current "multiprocessing state".
1403     ///
1404     fn get_mp_state(&self) -> cpu::Result<MpState> {
1405         Ok(MpState::Mshv)
1406     }
1407 
1408     ///
1409     /// Sets the vcpu's current "multiprocessing state".
1410     ///
1411     fn set_mp_state(&self, _mp_state: MpState) -> cpu::Result<()> {
1412         Ok(())
1413     }
1414 
1415     #[cfg(target_arch = "x86_64")]
1416     ///
1417     /// Set CPU state for x86_64 guest.
1418     ///
1419     fn set_state(&self, state: &CpuState) -> cpu::Result<()> {
1420         let mut state: VcpuMshvState = state.clone().into();
1421         self.set_msrs(&state.msrs)?;
1422         self.set_vcpu_events(&state.vcpu_events)?;
1423         self.set_regs(&state.regs.into())?;
1424         self.set_sregs(&state.sregs.into())?;
1425         self.set_fpu(&state.fpu)?;
1426         self.set_xcrs(&state.xcrs)?;
1427         // These registers are global and needed to be set only for first VCPU
1428         // as Microsoft Hypervisor allows setting this register for only one VCPU
1429         if self.vp_index == 0 {
1430             self.fd
1431                 .set_misc_regs(&state.misc)
1432                 .map_err(|e| cpu::HypervisorCpuError::SetMiscRegs(e.into()))?
1433         }
1434         self.fd
1435             .set_debug_regs(&state.dbg)
1436             .map_err(|e| cpu::HypervisorCpuError::SetDebugRegs(e.into()))?;
1437         self.fd
1438             .set_all_vp_state_components(&mut state.vp_states)
1439             .map_err(|e| cpu::HypervisorCpuError::SetAllVpStateComponents(e.into()))?;
1440         Ok(())
1441     }
1442 
1443     #[cfg(target_arch = "aarch64")]
1444     ///
1445     /// Set CPU state for aarch64 guest.
1446     ///
1447     fn set_state(&self, state: &CpuState) -> cpu::Result<()> {
1448         unimplemented!()
1449     }
1450 
1451     #[cfg(target_arch = "x86_64")]
1452     ///
1453     /// Get CPU State for x86_64 guest
1454     ///
1455     fn state(&self) -> cpu::Result<CpuState> {
1456         let regs = self.get_regs()?;
1457         let sregs = self.get_sregs()?;
1458         let xcrs = self.get_xcrs()?;
1459         let fpu = self.get_fpu()?;
1460         let vcpu_events = self.get_vcpu_events()?;
1461         let mut msrs = self.msrs.clone();
1462         self.get_msrs(&mut msrs)?;
1463         let misc = self
1464             .fd
1465             .get_misc_regs()
1466             .map_err(|e| cpu::HypervisorCpuError::GetMiscRegs(e.into()))?;
1467         let dbg = self
1468             .fd
1469             .get_debug_regs()
1470             .map_err(|e| cpu::HypervisorCpuError::GetDebugRegs(e.into()))?;
1471         let vp_states = self
1472             .fd
1473             .get_all_vp_state_components()
1474             .map_err(|e| cpu::HypervisorCpuError::GetAllVpStateComponents(e.into()))?;
1475 
1476         Ok(VcpuMshvState {
1477             msrs,
1478             vcpu_events,
1479             regs: regs.into(),
1480             sregs: sregs.into(),
1481             fpu,
1482             xcrs,
1483             dbg,
1484             misc,
1485             vp_states,
1486         }
1487         .into())
1488     }
1489 
1490     #[cfg(target_arch = "aarch64")]
1491     ///
1492     /// Get CPU state for aarch64 guest.
1493     ///
1494     fn state(&self) -> cpu::Result<CpuState> {
1495         unimplemented!()
1496     }
1497 
1498     #[cfg(target_arch = "x86_64")]
1499     ///
1500     /// Translate guest virtual address to guest physical address
1501     ///
1502     fn translate_gva(&self, gva: u64, flags: u64) -> cpu::Result<(u64, u32)> {
1503         let r = self
1504             .fd
1505             .translate_gva(gva, flags)
1506             .map_err(|e| cpu::HypervisorCpuError::TranslateVirtualAddress(e.into()))?;
1507 
1508         let gpa = r.0;
1509         // SAFETY: r is valid, otherwise this function will have returned
1510         let result_code = unsafe { r.1.__bindgen_anon_1.result_code };
1511 
1512         Ok((gpa, result_code))
1513     }
1514 
1515     #[cfg(target_arch = "x86_64")]
1516     ///
1517     /// Return the list of initial MSR entries for a VCPU
1518     ///
1519     fn boot_msr_entries(&self) -> Vec<MsrEntry> {
1520         use crate::arch::x86::{msr_index, MTRR_ENABLE, MTRR_MEM_TYPE_WB};
1521 
1522         [
1523             msr!(msr_index::MSR_IA32_SYSENTER_CS),
1524             msr!(msr_index::MSR_IA32_SYSENTER_ESP),
1525             msr!(msr_index::MSR_IA32_SYSENTER_EIP),
1526             msr!(msr_index::MSR_STAR),
1527             msr!(msr_index::MSR_CSTAR),
1528             msr!(msr_index::MSR_LSTAR),
1529             msr!(msr_index::MSR_KERNEL_GS_BASE),
1530             msr!(msr_index::MSR_SYSCALL_MASK),
1531             msr_data!(msr_index::MSR_MTRRdefType, MTRR_ENABLE | MTRR_MEM_TYPE_WB),
1532         ]
1533         .to_vec()
1534     }
1535 
1536     ///
1537     /// Sets the AMD specific vcpu's sev control register.
1538     ///
1539     #[cfg(feature = "sev_snp")]
1540     fn set_sev_control_register(&self, vmsa_pfn: u64) -> cpu::Result<()> {
1541         let sev_control_reg = snp::get_sev_control_register(vmsa_pfn);
1542 
1543         self.fd
1544             .set_sev_control_register(sev_control_reg)
1545             .map_err(|e| cpu::HypervisorCpuError::SetSevControlRegister(e.into()))
1546     }
1547     #[cfg(target_arch = "x86_64")]
1548     ///
1549     /// Trigger NMI interrupt
1550     ///
1551     fn nmi(&self) -> cpu::Result<()> {
1552         let cfg = InterruptRequest {
1553             interrupt_type: hv_interrupt_type_HV_X64_INTERRUPT_TYPE_NMI,
1554             apic_id: self.vp_index as u64,
1555             level_triggered: false,
1556             vector: 0,
1557             logical_destination_mode: false,
1558             long_mode: false,
1559         };
1560         self.vm_fd
1561             .request_virtual_interrupt(&cfg)
1562             .map_err(|e| cpu::HypervisorCpuError::Nmi(e.into()))
1563     }
1564 }
1565 
1566 impl MshvVcpu {
1567     ///
1568     /// Deactivate previously used GHCB page.
1569     ///
1570     #[cfg(feature = "sev_snp")]
1571     fn disable_prev_ghcb_page(&self) -> cpu::Result<()> {
1572         let mut reg_assocs = [hv_register_assoc {
1573             name: hv_register_name_HV_X64_REGISTER_SEV_GHCB_GPA,
1574             ..Default::default()
1575         }];
1576         self.fd.get_reg(&mut reg_assocs).unwrap();
1577         // SAFETY: Accessing a union element from bindgen generated bindings.
1578         let prev_ghcb_gpa = unsafe { reg_assocs[0].value.reg64 };
1579 
1580         debug!("Prev GHCB GPA is {:x}", prev_ghcb_gpa);
1581 
1582         let mut ghcb_gpa = hv_x64_register_sev_ghcb::default();
1583 
1584         // SAFETY: Accessing a union element from bindgen generated bindings.
1585         unsafe {
1586             ghcb_gpa.__bindgen_anon_1.set_enabled(0);
1587             ghcb_gpa.__bindgen_anon_1.set_page_number(prev_ghcb_gpa);
1588         }
1589 
1590         // SAFETY: Accessing a union element from bindgen generated bindings.
1591         let reg_name_value = unsafe {
1592             [(
1593                 hv_register_name_HV_X64_REGISTER_SEV_GHCB_GPA,
1594                 ghcb_gpa.as_uint64,
1595             )]
1596         };
1597 
1598         set_registers_64!(self.fd, reg_name_value)
1599             .map_err(|e| cpu::HypervisorCpuError::SetRegister(e.into()))?;
1600 
1601         Ok(())
1602     }
1603     #[cfg(target_arch = "x86_64")]
1604     ///
1605     /// X86 specific call that returns the vcpu's current "xcrs".
1606     ///
1607     fn get_xcrs(&self) -> cpu::Result<ExtendedControlRegisters> {
1608         self.fd
1609             .get_xcrs()
1610             .map_err(|e| cpu::HypervisorCpuError::GetXcsr(e.into()))
1611     }
1612 
1613     #[cfg(target_arch = "x86_64")]
1614     ///
1615     /// X86 specific call that sets the vcpu's current "xcrs".
1616     ///
1617     fn set_xcrs(&self, xcrs: &ExtendedControlRegisters) -> cpu::Result<()> {
1618         self.fd
1619             .set_xcrs(xcrs)
1620             .map_err(|e| cpu::HypervisorCpuError::SetXcsr(e.into()))
1621     }
1622 
1623     #[cfg(target_arch = "x86_64")]
1624     ///
1625     /// Returns currently pending exceptions, interrupts, and NMIs as well as related
1626     /// states of the vcpu.
1627     ///
1628     fn get_vcpu_events(&self) -> cpu::Result<VcpuEvents> {
1629         self.fd
1630             .get_vcpu_events()
1631             .map_err(|e| cpu::HypervisorCpuError::GetVcpuEvents(e.into()))
1632     }
1633 
1634     #[cfg(target_arch = "x86_64")]
1635     ///
1636     /// Sets pending exceptions, interrupts, and NMIs as well as related states
1637     /// of the vcpu.
1638     ///
1639     fn set_vcpu_events(&self, events: &VcpuEvents) -> cpu::Result<()> {
1640         self.fd
1641             .set_vcpu_events(events)
1642             .map_err(|e| cpu::HypervisorCpuError::SetVcpuEvents(e.into()))
1643     }
1644 
1645     ///
1646     /// Clear SW_EXIT_INFO1 register for SEV-SNP guests.
1647     ///
1648     #[cfg(feature = "sev_snp")]
1649     fn clear_swexit_info1(&self) -> std::result::Result<cpu::VmExit, cpu::HypervisorCpuError> {
1650         // Clear the SW_EXIT_INFO1 register to indicate no error
1651         // Safe to use unwrap, for sev_snp guest we already have the
1652         // GHCB pointer wrapped in the option, otherwise this place is not reached.
1653         let ghcb = self.ghcb.as_ref().unwrap().0;
1654         set_svm_field_u64_ptr!(ghcb, exit_info1, 0);
1655 
1656         Ok(cpu::VmExit::Ignore)
1657     }
1658 
1659     #[cfg(feature = "sev_snp")]
1660     fn gpa_write(&self, gpa: u64, data: &[u8]) -> cpu::Result<()> {
1661         for (gpa, chunk) in (gpa..)
1662             .step_by(HV_READ_WRITE_GPA_MAX_SIZE as usize)
1663             .zip(data.chunks(HV_READ_WRITE_GPA_MAX_SIZE as usize))
1664         {
1665             let mut data = [0; HV_READ_WRITE_GPA_MAX_SIZE as usize];
1666             data[..chunk.len()].copy_from_slice(chunk);
1667 
1668             let mut rw_gpa_arg = mshv_bindings::mshv_read_write_gpa {
1669                 base_gpa: gpa,
1670                 byte_count: chunk.len() as u32,
1671                 data,
1672                 ..Default::default()
1673             };
1674             self.fd
1675                 .gpa_write(&mut rw_gpa_arg)
1676                 .map_err(|e| cpu::HypervisorCpuError::GpaWrite(e.into()))?;
1677         }
1678 
1679         Ok(())
1680     }
1681 }
1682 
1683 /// Wrapper over Mshv VM ioctls.
1684 pub struct MshvVm {
1685     fd: Arc<VmFd>,
1686     #[cfg(target_arch = "x86_64")]
1687     msrs: Vec<MsrEntry>,
1688     dirty_log_slots: Arc<RwLock<HashMap<u64, MshvDirtyLogSlot>>>,
1689     #[cfg(feature = "sev_snp")]
1690     sev_snp_enabled: bool,
1691     #[cfg(feature = "sev_snp")]
1692     host_access_pages: ArcSwap<AtomicBitmap>,
1693 }
1694 
1695 impl MshvVm {
1696     ///
1697     /// Creates an in-kernel device.
1698     ///
1699     /// See the documentation for `MSHV_CREATE_DEVICE`.
1700     fn create_device(&self, device: &mut CreateDevice) -> vm::Result<VfioDeviceFd> {
1701         let device_fd = self
1702             .fd
1703             .create_device(device)
1704             .map_err(|e| vm::HypervisorVmError::CreateDevice(e.into()))?;
1705         Ok(VfioDeviceFd::new_from_mshv(device_fd))
1706     }
1707 }
1708 
1709 ///
1710 /// Implementation of Vm trait for Mshv
1711 ///
1712 /// # Examples
1713 ///
1714 /// ```
1715 /// extern crate hypervisor;
1716 /// use hypervisor::mshv::MshvHypervisor;
1717 /// use std::sync::Arc;
1718 /// let mshv = MshvHypervisor::new().unwrap();
1719 /// let hypervisor = Arc::new(mshv);
1720 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed");
1721 /// ```
1722 impl vm::Vm for MshvVm {
1723     #[cfg(target_arch = "x86_64")]
1724     ///
1725     /// Sets the address of the one-page region in the VM's address space.
1726     ///
1727     fn set_identity_map_address(&self, _address: u64) -> vm::Result<()> {
1728         Ok(())
1729     }
1730 
1731     #[cfg(target_arch = "x86_64")]
1732     ///
1733     /// Sets the address of the three-page region in the VM's address space.
1734     ///
1735     fn set_tss_address(&self, _offset: usize) -> vm::Result<()> {
1736         Ok(())
1737     }
1738 
1739     ///
1740     /// Creates an in-kernel interrupt controller.
1741     ///
1742     fn create_irq_chip(&self) -> vm::Result<()> {
1743         Ok(())
1744     }
1745 
1746     ///
1747     /// Registers an event that will, when signaled, trigger the `gsi` IRQ.
1748     ///
1749     fn register_irqfd(&self, fd: &EventFd, gsi: u32) -> vm::Result<()> {
1750         debug!("register_irqfd fd {} gsi {}", fd.as_raw_fd(), gsi);
1751 
1752         self.fd
1753             .register_irqfd(fd, gsi)
1754             .map_err(|e| vm::HypervisorVmError::RegisterIrqFd(e.into()))?;
1755 
1756         Ok(())
1757     }
1758 
1759     ///
1760     /// Unregisters an event that will, when signaled, trigger the `gsi` IRQ.
1761     ///
1762     fn unregister_irqfd(&self, fd: &EventFd, gsi: u32) -> vm::Result<()> {
1763         debug!("unregister_irqfd fd {} gsi {}", fd.as_raw_fd(), gsi);
1764 
1765         self.fd
1766             .unregister_irqfd(fd, gsi)
1767             .map_err(|e| vm::HypervisorVmError::UnregisterIrqFd(e.into()))?;
1768 
1769         Ok(())
1770     }
1771 
1772     ///
1773     /// Creates a VcpuFd object from a vcpu RawFd.
1774     ///
1775     fn create_vcpu(
1776         &self,
1777         id: u8,
1778         vm_ops: Option<Arc<dyn VmOps>>,
1779     ) -> vm::Result<Arc<dyn cpu::Vcpu>> {
1780         let vcpu_fd = self
1781             .fd
1782             .create_vcpu(id)
1783             .map_err(|e| vm::HypervisorVmError::CreateVcpu(e.into()))?;
1784 
1785         /* Map the GHCB page to the VMM(root) address space
1786          * The map is available after the vcpu creation. This address is mapped
1787          * to the overlay ghcb page of the Microsoft Hypervisor, don't have
1788          * to worry about the scenario when a guest changes the GHCB mapping.
1789          */
1790         #[cfg(feature = "sev_snp")]
1791         let ghcb = if self.sev_snp_enabled {
1792             // SAFETY: Safe to call as VCPU has this map already available upon creation
1793             let addr = unsafe {
1794                 libc::mmap(
1795                     std::ptr::null_mut(),
1796                     HV_PAGE_SIZE,
1797                     libc::PROT_READ | libc::PROT_WRITE,
1798                     libc::MAP_SHARED,
1799                     vcpu_fd.as_raw_fd(),
1800                     MSHV_VP_MMAP_OFFSET_GHCB as i64 * libc::sysconf(libc::_SC_PAGE_SIZE),
1801                 )
1802             };
1803             if addr == libc::MAP_FAILED {
1804                 // No point of continuing, without this mmap VMGEXIT will fail anyway
1805                 // Return error
1806                 return Err(vm::HypervisorVmError::MmapToRoot);
1807             }
1808             Some(Ghcb(addr as *mut svm_ghcb_base))
1809         } else {
1810             None
1811         };
1812         let vcpu = MshvVcpu {
1813             fd: vcpu_fd,
1814             vp_index: id,
1815             #[cfg(target_arch = "x86_64")]
1816             cpuid: Vec::new(),
1817             #[cfg(target_arch = "x86_64")]
1818             msrs: self.msrs.clone(),
1819             vm_ops,
1820             vm_fd: self.fd.clone(),
1821             #[cfg(feature = "sev_snp")]
1822             ghcb,
1823             #[cfg(feature = "sev_snp")]
1824             host_access_pages: ArcSwap::new(self.host_access_pages.load().clone()),
1825         };
1826         Ok(Arc::new(vcpu))
1827     }
1828 
1829     #[cfg(target_arch = "x86_64")]
1830     fn enable_split_irq(&self) -> vm::Result<()> {
1831         Ok(())
1832     }
1833 
1834     #[cfg(target_arch = "x86_64")]
1835     fn enable_sgx_attribute(&self, _file: File) -> vm::Result<()> {
1836         Ok(())
1837     }
1838 
1839     fn register_ioevent(
1840         &self,
1841         fd: &EventFd,
1842         addr: &IoEventAddress,
1843         datamatch: Option<DataMatch>,
1844     ) -> vm::Result<()> {
1845         #[cfg(feature = "sev_snp")]
1846         if self.sev_snp_enabled {
1847             return Ok(());
1848         }
1849 
1850         let addr = &mshv_ioctls::IoEventAddress::from(*addr);
1851         debug!(
1852             "register_ioevent fd {} addr {:x?} datamatch {:?}",
1853             fd.as_raw_fd(),
1854             addr,
1855             datamatch
1856         );
1857         if let Some(dm) = datamatch {
1858             match dm {
1859                 vm::DataMatch::DataMatch32(mshv_dm32) => self
1860                     .fd
1861                     .register_ioevent(fd, addr, mshv_dm32)
1862                     .map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into())),
1863                 vm::DataMatch::DataMatch64(mshv_dm64) => self
1864                     .fd
1865                     .register_ioevent(fd, addr, mshv_dm64)
1866                     .map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into())),
1867             }
1868         } else {
1869             self.fd
1870                 .register_ioevent(fd, addr, NoDatamatch)
1871                 .map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into()))
1872         }
1873     }
1874 
1875     /// Unregister an event from a certain address it has been previously registered to.
1876     fn unregister_ioevent(&self, fd: &EventFd, addr: &IoEventAddress) -> vm::Result<()> {
1877         #[cfg(feature = "sev_snp")]
1878         if self.sev_snp_enabled {
1879             return Ok(());
1880         }
1881 
1882         let addr = &mshv_ioctls::IoEventAddress::from(*addr);
1883         debug!("unregister_ioevent fd {} addr {:x?}", fd.as_raw_fd(), addr);
1884 
1885         self.fd
1886             .unregister_ioevent(fd, addr, NoDatamatch)
1887             .map_err(|e| vm::HypervisorVmError::UnregisterIoEvent(e.into()))
1888     }
1889 
1890     /// Creates a guest physical memory region.
1891     fn create_user_memory_region(&self, user_memory_region: UserMemoryRegion) -> vm::Result<()> {
1892         let user_memory_region: mshv_user_mem_region = user_memory_region.into();
1893         // No matter read only or not we keep track the slots.
1894         // For readonly hypervisor can enable the dirty bits,
1895         // but a VM exit happens before setting the dirty bits
1896         self.dirty_log_slots.write().unwrap().insert(
1897             user_memory_region.guest_pfn,
1898             MshvDirtyLogSlot {
1899                 guest_pfn: user_memory_region.guest_pfn,
1900                 memory_size: user_memory_region.size,
1901             },
1902         );
1903 
1904         self.fd
1905             .map_user_memory(user_memory_region)
1906             .map_err(|e| vm::HypervisorVmError::CreateUserMemory(e.into()))?;
1907         Ok(())
1908     }
1909 
1910     /// Removes a guest physical memory region.
1911     fn remove_user_memory_region(&self, user_memory_region: UserMemoryRegion) -> vm::Result<()> {
1912         let user_memory_region: mshv_user_mem_region = user_memory_region.into();
1913         // Remove the corresponding entry from "self.dirty_log_slots" if needed
1914         self.dirty_log_slots
1915             .write()
1916             .unwrap()
1917             .remove(&user_memory_region.guest_pfn);
1918 
1919         self.fd
1920             .unmap_user_memory(user_memory_region)
1921             .map_err(|e| vm::HypervisorVmError::RemoveUserMemory(e.into()))?;
1922         Ok(())
1923     }
1924 
1925     fn make_user_memory_region(
1926         &self,
1927         _slot: u32,
1928         guest_phys_addr: u64,
1929         memory_size: u64,
1930         userspace_addr: u64,
1931         readonly: bool,
1932         _log_dirty_pages: bool,
1933     ) -> UserMemoryRegion {
1934         let mut flags = 1 << MSHV_SET_MEM_BIT_EXECUTABLE;
1935         if !readonly {
1936             flags |= 1 << MSHV_SET_MEM_BIT_WRITABLE;
1937         }
1938 
1939         mshv_user_mem_region {
1940             flags,
1941             guest_pfn: guest_phys_addr >> PAGE_SHIFT,
1942             size: memory_size,
1943             userspace_addr,
1944             ..Default::default()
1945         }
1946         .into()
1947     }
1948 
1949     fn create_passthrough_device(&self) -> vm::Result<VfioDeviceFd> {
1950         let mut vfio_dev = mshv_create_device {
1951             type_: MSHV_DEV_TYPE_VFIO,
1952             fd: 0,
1953             flags: 0,
1954         };
1955 
1956         self.create_device(&mut vfio_dev)
1957             .map_err(|e| vm::HypervisorVmError::CreatePassthroughDevice(e.into()))
1958     }
1959 
1960     ///
1961     /// Constructs a routing entry
1962     ///
1963     fn make_routing_entry(&self, gsi: u32, config: &InterruptSourceConfig) -> IrqRoutingEntry {
1964         match config {
1965             InterruptSourceConfig::MsiIrq(cfg) => mshv_user_irq_entry {
1966                 gsi,
1967                 address_lo: cfg.low_addr,
1968                 address_hi: cfg.high_addr,
1969                 data: cfg.data,
1970             }
1971             .into(),
1972             _ => {
1973                 unreachable!()
1974             }
1975         }
1976     }
1977 
1978     fn set_gsi_routing(&self, entries: &[IrqRoutingEntry]) -> vm::Result<()> {
1979         let mut msi_routing =
1980             vec_with_array_field::<mshv_user_irq_table, mshv_user_irq_entry>(entries.len());
1981         msi_routing[0].nr = entries.len() as u32;
1982 
1983         let entries: Vec<mshv_user_irq_entry> = entries
1984             .iter()
1985             .map(|entry| match entry {
1986                 IrqRoutingEntry::Mshv(e) => *e,
1987                 #[allow(unreachable_patterns)]
1988                 _ => panic!("IrqRoutingEntry type is wrong"),
1989             })
1990             .collect();
1991 
1992         // SAFETY: msi_routing initialized with entries.len() and now it is being turned into
1993         // entries_slice with entries.len() again. It is guaranteed to be large enough to hold
1994         // everything from entries.
1995         unsafe {
1996             let entries_slice: &mut [mshv_user_irq_entry] =
1997                 msi_routing[0].entries.as_mut_slice(entries.len());
1998             entries_slice.copy_from_slice(&entries);
1999         }
2000 
2001         self.fd
2002             .set_msi_routing(&msi_routing[0])
2003             .map_err(|e| vm::HypervisorVmError::SetGsiRouting(e.into()))
2004     }
2005 
2006     ///
2007     /// Start logging dirty pages
2008     ///
2009     fn start_dirty_log(&self) -> vm::Result<()> {
2010         self.fd
2011             .enable_dirty_page_tracking()
2012             .map_err(|e| vm::HypervisorVmError::StartDirtyLog(e.into()))
2013     }
2014 
2015     ///
2016     /// Stop logging dirty pages
2017     ///
2018     fn stop_dirty_log(&self) -> vm::Result<()> {
2019         let dirty_log_slots = self.dirty_log_slots.read().unwrap();
2020         // Before disabling the dirty page tracking we need
2021         // to set the dirty bits in the Hypervisor
2022         // This is a requirement from Microsoft Hypervisor
2023         for (_, s) in dirty_log_slots.iter() {
2024             self.fd
2025                 .get_dirty_log(
2026                     s.guest_pfn,
2027                     s.memory_size as usize,
2028                     MSHV_GPAP_ACCESS_OP_SET as u8,
2029                 )
2030                 .map_err(|e| vm::HypervisorVmError::StartDirtyLog(e.into()))?;
2031         }
2032         self.fd
2033             .disable_dirty_page_tracking()
2034             .map_err(|e| vm::HypervisorVmError::StartDirtyLog(e.into()))?;
2035         Ok(())
2036     }
2037 
2038     ///
2039     /// Get dirty pages bitmap (one bit per page)
2040     ///
2041     fn get_dirty_log(&self, _slot: u32, base_gpa: u64, memory_size: u64) -> vm::Result<Vec<u64>> {
2042         self.fd
2043             .get_dirty_log(
2044                 base_gpa >> PAGE_SHIFT,
2045                 memory_size as usize,
2046                 MSHV_GPAP_ACCESS_OP_CLEAR as u8,
2047             )
2048             .map_err(|e| vm::HypervisorVmError::GetDirtyLog(e.into()))
2049     }
2050 
2051     /// Retrieve guest clock.
2052     #[cfg(target_arch = "x86_64")]
2053     fn get_clock(&self) -> vm::Result<ClockData> {
2054         let val = self
2055             .fd
2056             .get_partition_property(hv_partition_property_code_HV_PARTITION_PROPERTY_REFERENCE_TIME)
2057             .map_err(|e| vm::HypervisorVmError::GetClock(e.into()))?;
2058         Ok(MshvClockData { ref_time: val }.into())
2059     }
2060 
2061     /// Set guest clock.
2062     #[cfg(target_arch = "x86_64")]
2063     fn set_clock(&self, data: &ClockData) -> vm::Result<()> {
2064         let data: MshvClockData = (*data).into();
2065         self.fd
2066             .set_partition_property(
2067                 hv_partition_property_code_HV_PARTITION_PROPERTY_REFERENCE_TIME,
2068                 data.ref_time,
2069             )
2070             .map_err(|e| vm::HypervisorVmError::SetClock(e.into()))
2071     }
2072 
2073     /// Downcast to the underlying MshvVm type
2074     fn as_any(&self) -> &dyn Any {
2075         self
2076     }
2077 
2078     /// Initialize the SEV-SNP VM
2079     #[cfg(feature = "sev_snp")]
2080     fn sev_snp_init(&self) -> vm::Result<()> {
2081         self.fd
2082             .set_partition_property(
2083                 hv_partition_property_code_HV_PARTITION_PROPERTY_ISOLATION_STATE,
2084                 hv_partition_isolation_state_HV_PARTITION_ISOLATION_SECURE as u64,
2085             )
2086             .map_err(|e| vm::HypervisorVmError::InitializeSevSnp(e.into()))
2087     }
2088 
2089     ///
2090     /// Importing isolated pages, these pages will be used
2091     /// for the PSP(Platform Security Processor) measurement.
2092     #[cfg(feature = "sev_snp")]
2093     fn import_isolated_pages(
2094         &self,
2095         page_type: u32,
2096         page_size: u32,
2097         pages: &[u64],
2098     ) -> vm::Result<()> {
2099         debug_assert!(page_size == hv_isolated_page_size_HV_ISOLATED_PAGE_SIZE_4KB);
2100         if pages.is_empty() {
2101             return Ok(());
2102         }
2103 
2104         let mut isolated_pages =
2105             vec_with_array_field::<mshv_import_isolated_pages, u64>(pages.len());
2106         isolated_pages[0].page_type = page_type as u8;
2107         isolated_pages[0].page_count = pages.len() as u64;
2108         // SAFETY: isolated_pages initialized with pages.len() and now it is being turned into
2109         // pages_slice with pages.len() again. It is guaranteed to be large enough to hold
2110         // everything from pages.
2111         unsafe {
2112             let pages_slice: &mut [u64] = isolated_pages[0].guest_pfns.as_mut_slice(pages.len());
2113             pages_slice.copy_from_slice(pages);
2114         }
2115         self.fd
2116             .import_isolated_pages(&isolated_pages[0])
2117             .map_err(|e| vm::HypervisorVmError::ImportIsolatedPages(e.into()))
2118     }
2119 
2120     ///
2121     /// Complete isolated import, telling the hypervisor that
2122     /// importing the pages to guest memory is complete.
2123     ///
2124     #[cfg(feature = "sev_snp")]
2125     fn complete_isolated_import(
2126         &self,
2127         snp_id_block: IGVM_VHS_SNP_ID_BLOCK,
2128         host_data: [u8; 32],
2129         id_block_enabled: u8,
2130     ) -> vm::Result<()> {
2131         let mut auth_info = hv_snp_id_auth_info {
2132             id_key_algorithm: snp_id_block.id_key_algorithm,
2133             auth_key_algorithm: snp_id_block.author_key_algorithm,
2134             ..Default::default()
2135         };
2136         // Each of r/s component is 576 bits long
2137         auth_info.id_block_signature[..SIG_R_COMPONENT_SIZE_IN_BYTES]
2138             .copy_from_slice(snp_id_block.id_key_signature.r_comp.as_ref());
2139         auth_info.id_block_signature
2140             [SIG_R_COMPONENT_SIZE_IN_BYTES..SIG_R_AND_S_COMPONENT_SIZE_IN_BYTES]
2141             .copy_from_slice(snp_id_block.id_key_signature.s_comp.as_ref());
2142         auth_info.id_key[..ECDSA_CURVE_ID_SIZE_IN_BYTES]
2143             .copy_from_slice(snp_id_block.id_public_key.curve.to_le_bytes().as_ref());
2144         auth_info.id_key[ECDSA_SIG_X_COMPONENT_START..ECDSA_SIG_X_COMPONENT_END]
2145             .copy_from_slice(snp_id_block.id_public_key.qx.as_ref());
2146         auth_info.id_key[ECDSA_SIG_Y_COMPONENT_START..ECDSA_SIG_Y_COMPONENT_END]
2147             .copy_from_slice(snp_id_block.id_public_key.qy.as_ref());
2148 
2149         let data = mshv_complete_isolated_import {
2150             import_data: hv_partition_complete_isolated_import_data {
2151                 psp_parameters: hv_psp_launch_finish_data {
2152                     id_block: hv_snp_id_block {
2153                         launch_digest: snp_id_block.ld,
2154                         family_id: snp_id_block.family_id,
2155                         image_id: snp_id_block.image_id,
2156                         version: snp_id_block.version,
2157                         guest_svn: snp_id_block.guest_svn,
2158                         policy: get_default_snp_guest_policy(),
2159                     },
2160                     id_auth_info: auth_info,
2161                     host_data,
2162                     id_block_enabled,
2163                     author_key_enabled: 0,
2164                 },
2165             },
2166         };
2167         self.fd
2168             .complete_isolated_import(&data)
2169             .map_err(|e| vm::HypervisorVmError::CompleteIsolatedImport(e.into()))
2170     }
2171 
2172     #[cfg(target_arch = "aarch64")]
2173     fn create_vgic(&self, config: VgicConfig) -> vm::Result<Arc<Mutex<dyn Vgic>>> {
2174         unimplemented!()
2175     }
2176 
2177     #[cfg(target_arch = "aarch64")]
2178     fn get_preferred_target(&self, kvi: &mut VcpuInit) -> vm::Result<()> {
2179         unimplemented!()
2180     }
2181 
2182     /// Pause the VM
2183     fn pause(&self) -> vm::Result<()> {
2184         // Freeze the partition
2185         self.fd
2186             .set_partition_property(
2187                 hv_partition_property_code_HV_PARTITION_PROPERTY_TIME_FREEZE,
2188                 1u64,
2189             )
2190             .map_err(|e| {
2191                 vm::HypervisorVmError::SetVmProperty(anyhow!(
2192                     "Failed to set partition property: {}",
2193                     e
2194                 ))
2195             })
2196     }
2197 
2198     /// Resume the VM
2199     fn resume(&self) -> vm::Result<()> {
2200         // Resuming the partition using TIME_FREEZE property
2201         self.fd
2202             .set_partition_property(
2203                 hv_partition_property_code_HV_PARTITION_PROPERTY_TIME_FREEZE,
2204                 0u64,
2205             )
2206             .map_err(|e| {
2207                 vm::HypervisorVmError::SetVmProperty(anyhow!(
2208                     "Failed to set partition property: {}",
2209                     e
2210                 ))
2211             })
2212     }
2213 
2214     #[cfg(feature = "sev_snp")]
2215     fn gain_page_access(&self, gpa: u64, size: u32) -> vm::Result<()> {
2216         use mshv_ioctls::set_bits;
2217         const ONE_GB: usize = 1024 * 1024 * 1024;
2218 
2219         if !self.sev_snp_enabled {
2220             return Ok(());
2221         }
2222 
2223         let start_gpfn: u64 = gpa >> PAGE_SHIFT;
2224         let end_gpfn: u64 = (gpa + size as u64 - 1) >> PAGE_SHIFT;
2225 
2226         // Enlarge the bitmap if the PFN is greater than the bitmap length
2227         if end_gpfn >= self.host_access_pages.load().as_ref().len() as u64 {
2228             self.host_access_pages.rcu(|bitmap| {
2229                 let mut bm = bitmap.as_ref().clone();
2230                 bm.enlarge(ONE_GB);
2231                 bm
2232             });
2233         }
2234 
2235         let gpas: Vec<u64> = (start_gpfn..=end_gpfn)
2236             .filter(|x| {
2237                 !self
2238                     .host_access_pages
2239                     .load()
2240                     .as_ref()
2241                     .is_bit_set(*x as usize)
2242             })
2243             .map(|x| x << PAGE_SHIFT)
2244             .collect();
2245 
2246         if !gpas.is_empty() {
2247             let mut gpa_list = vec_with_array_field::<mshv_modify_gpa_host_access, u64>(gpas.len());
2248             gpa_list[0].page_count = gpas.len() as u64;
2249             gpa_list[0].flags = set_bits!(
2250                 u8,
2251                 MSHV_GPA_HOST_ACCESS_BIT_ACQUIRE,
2252                 MSHV_GPA_HOST_ACCESS_BIT_READABLE,
2253                 MSHV_GPA_HOST_ACCESS_BIT_WRITABLE
2254             );
2255 
2256             // SAFETY: gpa_list initialized with gpas.len() and now it is being turned into
2257             // gpas_slice with gpas.len() again. It is guaranteed to be large enough to hold
2258             // everything from gpas.
2259             unsafe {
2260                 let gpas_slice: &mut [u64] = gpa_list[0].guest_pfns.as_mut_slice(gpas.len());
2261                 gpas_slice.copy_from_slice(gpas.as_slice());
2262             }
2263 
2264             self.fd
2265                 .modify_gpa_host_access(&gpa_list[0])
2266                 .map_err(|e| vm::HypervisorVmError::ModifyGpaHostAccess(e.into()))?;
2267 
2268             for acquired_gpa in gpas {
2269                 self.host_access_pages.rcu(|bitmap| {
2270                     let bm = bitmap.clone();
2271                     bm.set_bit((acquired_gpa >> PAGE_SHIFT) as usize);
2272                     bm
2273                 });
2274             }
2275         }
2276 
2277         Ok(())
2278     }
2279 }
2280