xref: /cloud-hypervisor/hypervisor/src/mshv/mod.rs (revision 686e6d50824fcc7403a51b91545899a6301d6216)
1 // SPDX-License-Identifier: Apache-2.0 OR BSD-3-Clause
2 //
3 // Copyright © 2020, Microsoft Corporation
4 //
5 
6 use crate::arch::emulator::{PlatformEmulator, PlatformError};
7 
8 #[cfg(target_arch = "x86_64")]
9 use crate::arch::x86::emulator::{Emulator, EmulatorCpuState};
10 use crate::cpu;
11 use crate::cpu::Vcpu;
12 use crate::hypervisor;
13 use crate::vec_with_array_field;
14 use crate::vm::{self, InterruptSourceConfig, VmOps};
15 use crate::HypervisorType;
16 pub use mshv_bindings::*;
17 use mshv_ioctls::{set_registers_64, Mshv, NoDatamatch, VcpuFd, VmFd};
18 use std::any::Any;
19 use std::collections::HashMap;
20 use std::sync::{Arc, RwLock};
21 use vfio_ioctls::VfioDeviceFd;
22 use vm::DataMatch;
23 // x86_64 dependencies
24 #[cfg(target_arch = "x86_64")]
25 pub mod x86_64;
26 use crate::{
27     ClockData, CpuState, IoEventAddress, IrqRoutingEntry, MpState, UserMemoryRegion,
28     USER_MEMORY_REGION_EXECUTE, USER_MEMORY_REGION_READ, USER_MEMORY_REGION_WRITE,
29 };
30 use vmm_sys_util::eventfd::EventFd;
31 #[cfg(target_arch = "x86_64")]
32 pub use x86_64::VcpuMshvState;
33 #[cfg(target_arch = "x86_64")]
34 pub use x86_64::*;
35 
36 #[cfg(target_arch = "x86_64")]
37 use std::fs::File;
38 use std::os::unix::io::AsRawFd;
39 
40 #[cfg(target_arch = "x86_64")]
41 use crate::arch::x86::{
42     CpuIdEntry, FpuState, LapicState, MsrEntry, SpecialRegisters, StandardRegisters,
43 };
44 
45 const DIRTY_BITMAP_CLEAR_DIRTY: u64 = 0x4;
46 const DIRTY_BITMAP_SET_DIRTY: u64 = 0x8;
47 
48 ///
49 /// Export generically-named wrappers of mshv-bindings for Unix-based platforms
50 ///
51 pub use {
52     mshv_bindings::mshv_create_device as CreateDevice,
53     mshv_bindings::mshv_device_attr as DeviceAttr, mshv_ioctls::DeviceFd,
54 };
55 
56 pub const PAGE_SHIFT: usize = 12;
57 
58 impl From<mshv_user_mem_region> for UserMemoryRegion {
59     fn from(region: mshv_user_mem_region) -> Self {
60         let mut flags: u32 = 0;
61         if region.flags & HV_MAP_GPA_READABLE != 0 {
62             flags |= USER_MEMORY_REGION_READ;
63         }
64         if region.flags & HV_MAP_GPA_WRITABLE != 0 {
65             flags |= USER_MEMORY_REGION_WRITE;
66         }
67         if region.flags & HV_MAP_GPA_EXECUTABLE != 0 {
68             flags |= USER_MEMORY_REGION_EXECUTE;
69         }
70 
71         UserMemoryRegion {
72             guest_phys_addr: (region.guest_pfn << PAGE_SHIFT as u64)
73                 + (region.userspace_addr & ((1 << PAGE_SHIFT) - 1)),
74             memory_size: region.size,
75             userspace_addr: region.userspace_addr,
76             flags,
77             ..Default::default()
78         }
79     }
80 }
81 
82 impl From<UserMemoryRegion> for mshv_user_mem_region {
83     fn from(region: UserMemoryRegion) -> Self {
84         let mut flags: u32 = 0;
85         if region.flags & USER_MEMORY_REGION_READ != 0 {
86             flags |= HV_MAP_GPA_READABLE;
87         }
88         if region.flags & USER_MEMORY_REGION_WRITE != 0 {
89             flags |= HV_MAP_GPA_WRITABLE;
90         }
91         if region.flags & USER_MEMORY_REGION_EXECUTE != 0 {
92             flags |= HV_MAP_GPA_EXECUTABLE;
93         }
94 
95         mshv_user_mem_region {
96             guest_pfn: region.guest_phys_addr >> PAGE_SHIFT,
97             size: region.memory_size,
98             userspace_addr: region.userspace_addr,
99             flags,
100         }
101     }
102 }
103 
104 impl From<mshv_ioctls::IoEventAddress> for IoEventAddress {
105     fn from(a: mshv_ioctls::IoEventAddress) -> Self {
106         match a {
107             mshv_ioctls::IoEventAddress::Pio(x) => Self::Pio(x),
108             mshv_ioctls::IoEventAddress::Mmio(x) => Self::Mmio(x),
109         }
110     }
111 }
112 
113 impl From<IoEventAddress> for mshv_ioctls::IoEventAddress {
114     fn from(a: IoEventAddress) -> Self {
115         match a {
116             IoEventAddress::Pio(x) => Self::Pio(x),
117             IoEventAddress::Mmio(x) => Self::Mmio(x),
118         }
119     }
120 }
121 
122 impl From<VcpuMshvState> for CpuState {
123     fn from(s: VcpuMshvState) -> Self {
124         CpuState::Mshv(s)
125     }
126 }
127 
128 impl From<CpuState> for VcpuMshvState {
129     fn from(s: CpuState) -> Self {
130         match s {
131             CpuState::Mshv(s) => s,
132             /* Needed in case other hypervisors are enabled */
133             #[allow(unreachable_patterns)]
134             _ => panic!("CpuState is not valid"),
135         }
136     }
137 }
138 
139 impl From<mshv_msi_routing_entry> for IrqRoutingEntry {
140     fn from(s: mshv_msi_routing_entry) -> Self {
141         IrqRoutingEntry::Mshv(s)
142     }
143 }
144 
145 impl From<IrqRoutingEntry> for mshv_msi_routing_entry {
146     fn from(e: IrqRoutingEntry) -> Self {
147         match e {
148             IrqRoutingEntry::Mshv(e) => e,
149             /* Needed in case other hypervisors are enabled */
150             #[allow(unreachable_patterns)]
151             _ => panic!("IrqRoutingEntry is not valid"),
152         }
153     }
154 }
155 
156 struct MshvDirtyLogSlot {
157     guest_pfn: u64,
158     memory_size: u64,
159 }
160 
161 /// Wrapper over mshv system ioctls.
162 pub struct MshvHypervisor {
163     mshv: Mshv,
164 }
165 
166 impl MshvHypervisor {
167     #[cfg(target_arch = "x86_64")]
168     ///
169     /// Retrieve the list of MSRs supported by MSHV.
170     ///
171     fn get_msr_list(&self) -> hypervisor::Result<MsrList> {
172         self.mshv
173             .get_msr_index_list()
174             .map_err(|e| hypervisor::HypervisorError::GetMsrList(e.into()))
175     }
176 }
177 
178 impl MshvHypervisor {
179     /// Create a hypervisor based on Mshv
180     #[allow(clippy::new_ret_no_self)]
181     pub fn new() -> hypervisor::Result<Arc<dyn hypervisor::Hypervisor>> {
182         let mshv_obj =
183             Mshv::new().map_err(|e| hypervisor::HypervisorError::HypervisorCreate(e.into()))?;
184         Ok(Arc::new(MshvHypervisor { mshv: mshv_obj }))
185     }
186     /// Check if the hypervisor is available
187     pub fn is_available() -> hypervisor::Result<bool> {
188         match std::fs::metadata("/dev/mshv") {
189             Ok(_) => Ok(true),
190             Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(false),
191             Err(err) => Err(hypervisor::HypervisorError::HypervisorAvailableCheck(
192                 err.into(),
193             )),
194         }
195     }
196 }
197 /// Implementation of Hypervisor trait for Mshv
198 /// Example:
199 /// #[cfg(feature = "mshv")]
200 /// extern crate hypervisor
201 /// let mshv = hypervisor::mshv::MshvHypervisor::new().unwrap();
202 /// let hypervisor: Arc<dyn hypervisor::Hypervisor> = Arc::new(mshv);
203 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed");
204 ///
205 impl hypervisor::Hypervisor for MshvHypervisor {
206     ///
207     /// Returns the type of the hypervisor
208     ///
209     fn hypervisor_type(&self) -> HypervisorType {
210         HypervisorType::Mshv
211     }
212     /// Create a mshv vm object and return the object as Vm trait object
213     /// Example
214     /// # extern crate hypervisor;
215     /// # use hypervisor::MshvHypervisor;
216     /// use hypervisor::MshvVm;
217     /// let hypervisor = MshvHypervisor::new().unwrap();
218     /// let vm = hypervisor.create_vm().unwrap()
219     ///
220     fn create_vm(&self) -> hypervisor::Result<Arc<dyn vm::Vm>> {
221         let fd: VmFd;
222         loop {
223             match self.mshv.create_vm() {
224                 Ok(res) => fd = res,
225                 Err(e) => {
226                     if e.errno() == libc::EINTR {
227                         // If the error returned is EINTR, which means the
228                         // ioctl has been interrupted, we have to retry as
229                         // this can't be considered as a regular error.
230                         continue;
231                     } else {
232                         return Err(hypervisor::HypervisorError::VmCreate(e.into()));
233                     }
234                 }
235             }
236             break;
237         }
238 
239         // Default Microsoft Hypervisor behavior for unimplemented MSR is to
240         // send a fault to the guest if it tries to access it. It is possible
241         // to override this behavior with a more suitable option i.e., ignore
242         // writes from the guest and return zero in attempt to read unimplemented
243         // MSR.
244         fd.set_partition_property(
245             hv_partition_property_code_HV_PARTITION_PROPERTY_UNIMPLEMENTED_MSR_ACTION,
246             hv_unimplemented_msr_action_HV_UNIMPLEMENTED_MSR_ACTION_IGNORE_WRITE_READ_ZERO as u64,
247         )
248         .map_err(|e| hypervisor::HypervisorError::SetPartitionProperty(e.into()))?;
249 
250         let msr_list = self.get_msr_list()?;
251         let num_msrs = msr_list.as_fam_struct_ref().nmsrs as usize;
252         let mut msrs: Vec<MsrEntry> = vec![
253             MsrEntry {
254                 ..Default::default()
255             };
256             num_msrs
257         ];
258         let indices = msr_list.as_slice();
259         for (pos, index) in indices.iter().enumerate() {
260             msrs[pos].index = *index;
261         }
262         let vm_fd = Arc::new(fd);
263 
264         Ok(Arc::new(MshvVm {
265             fd: vm_fd,
266             msrs,
267             dirty_log_slots: Arc::new(RwLock::new(HashMap::new())),
268         }))
269     }
270     ///
271     /// Get the supported CpuID
272     ///
273     fn get_cpuid(&self) -> hypervisor::Result<Vec<CpuIdEntry>> {
274         Ok(Vec::new())
275     }
276 }
277 
278 /// Vcpu struct for Microsoft Hypervisor
279 pub struct MshvVcpu {
280     fd: VcpuFd,
281     vp_index: u8,
282     cpuid: Vec<CpuIdEntry>,
283     msrs: Vec<MsrEntry>,
284     vm_ops: Option<Arc<dyn vm::VmOps>>,
285 }
286 
287 /// Implementation of Vcpu trait for Microsoft Hypervisor
288 /// Example:
289 /// #[cfg(feature = "mshv")]
290 /// extern crate hypervisor
291 /// let mshv = hypervisor::mshv::MshvHypervisor::new().unwrap();
292 /// let hypervisor: Arc<dyn hypervisor::Hypervisor> = Arc::new(mshv);
293 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed");
294 /// let vcpu = vm.create_vcpu(0).unwrap();
295 /// vcpu.get/set().unwrap()
296 ///
297 impl cpu::Vcpu for MshvVcpu {
298     #[cfg(target_arch = "x86_64")]
299     ///
300     /// Returns the vCPU general purpose registers.
301     ///
302     fn get_regs(&self) -> cpu::Result<StandardRegisters> {
303         Ok(self
304             .fd
305             .get_regs()
306             .map_err(|e| cpu::HypervisorCpuError::GetStandardRegs(e.into()))?
307             .into())
308     }
309     #[cfg(target_arch = "x86_64")]
310     ///
311     /// Sets the vCPU general purpose registers.
312     ///
313     fn set_regs(&self, regs: &StandardRegisters) -> cpu::Result<()> {
314         let regs = (*regs).into();
315         self.fd
316             .set_regs(&regs)
317             .map_err(|e| cpu::HypervisorCpuError::SetStandardRegs(e.into()))
318     }
319     #[cfg(target_arch = "x86_64")]
320     ///
321     /// Returns the vCPU special registers.
322     ///
323     fn get_sregs(&self) -> cpu::Result<SpecialRegisters> {
324         Ok(self
325             .fd
326             .get_sregs()
327             .map_err(|e| cpu::HypervisorCpuError::GetSpecialRegs(e.into()))?
328             .into())
329     }
330     #[cfg(target_arch = "x86_64")]
331     ///
332     /// Sets the vCPU special registers.
333     ///
334     fn set_sregs(&self, sregs: &SpecialRegisters) -> cpu::Result<()> {
335         let sregs = (*sregs).into();
336         self.fd
337             .set_sregs(&sregs)
338             .map_err(|e| cpu::HypervisorCpuError::SetSpecialRegs(e.into()))
339     }
340     #[cfg(target_arch = "x86_64")]
341     ///
342     /// Returns the floating point state (FPU) from the vCPU.
343     ///
344     fn get_fpu(&self) -> cpu::Result<FpuState> {
345         Ok(self
346             .fd
347             .get_fpu()
348             .map_err(|e| cpu::HypervisorCpuError::GetFloatingPointRegs(e.into()))?
349             .into())
350     }
351     #[cfg(target_arch = "x86_64")]
352     ///
353     /// Set the floating point state (FPU) of a vCPU.
354     ///
355     fn set_fpu(&self, fpu: &FpuState) -> cpu::Result<()> {
356         let fpu: mshv_bindings::FloatingPointUnit = (*fpu).clone().into();
357         self.fd
358             .set_fpu(&fpu)
359             .map_err(|e| cpu::HypervisorCpuError::SetFloatingPointRegs(e.into()))
360     }
361 
362     #[cfg(target_arch = "x86_64")]
363     ///
364     /// Returns the model-specific registers (MSR) for this vCPU.
365     ///
366     fn get_msrs(&self, msrs: &mut Vec<MsrEntry>) -> cpu::Result<usize> {
367         let mshv_msrs: Vec<msr_entry> = msrs.iter().map(|e| (*e).into()).collect();
368         let mut mshv_msrs = MsrEntries::from_entries(&mshv_msrs).unwrap();
369         let succ = self
370             .fd
371             .get_msrs(&mut mshv_msrs)
372             .map_err(|e| cpu::HypervisorCpuError::GetMsrEntries(e.into()))?;
373 
374         msrs[..succ].copy_from_slice(
375             &mshv_msrs.as_slice()[..succ]
376                 .iter()
377                 .map(|e| (*e).into())
378                 .collect::<Vec<MsrEntry>>(),
379         );
380 
381         Ok(succ)
382     }
383     #[cfg(target_arch = "x86_64")]
384     ///
385     /// Setup the model-specific registers (MSR) for this vCPU.
386     /// Returns the number of MSR entries actually written.
387     ///
388     fn set_msrs(&self, msrs: &[MsrEntry]) -> cpu::Result<usize> {
389         let mshv_msrs: Vec<msr_entry> = msrs.iter().map(|e| (*e).into()).collect();
390         let mshv_msrs = MsrEntries::from_entries(&mshv_msrs).unwrap();
391         self.fd
392             .set_msrs(&mshv_msrs)
393             .map_err(|e| cpu::HypervisorCpuError::SetMsrEntries(e.into()))
394     }
395 
396     #[cfg(target_arch = "x86_64")]
397     ///
398     /// X86 specific call to enable HyperV SynIC
399     ///
400     fn enable_hyperv_synic(&self) -> cpu::Result<()> {
401         /* We always have SynIC enabled on MSHV */
402         Ok(())
403     }
404     #[allow(non_upper_case_globals)]
405     fn run(&self) -> std::result::Result<cpu::VmExit, cpu::HypervisorCpuError> {
406         let hv_message: hv_message = hv_message::default();
407         match self.fd.run(hv_message) {
408             Ok(x) => match x.header.message_type {
409                 hv_message_type_HVMSG_X64_HALT => {
410                     debug!("HALT");
411                     Ok(cpu::VmExit::Reset)
412                 }
413                 hv_message_type_HVMSG_UNRECOVERABLE_EXCEPTION => {
414                     warn!("TRIPLE FAULT");
415                     Ok(cpu::VmExit::Shutdown)
416                 }
417                 hv_message_type_HVMSG_X64_IO_PORT_INTERCEPT => {
418                     let info = x.to_ioport_info().unwrap();
419                     let access_info = info.access_info;
420                     // SAFETY: access_info is valid, otherwise we won't be here
421                     let len = unsafe { access_info.__bindgen_anon_1.access_size() } as usize;
422                     let is_write = info.header.intercept_access_type == 1;
423                     let port = info.port_number;
424                     let mut data: [u8; 4] = [0; 4];
425                     let mut ret_rax = info.rax;
426 
427                     /*
428                      * XXX: Ignore QEMU fw_cfg (0x5xx) and debug console (0x402) ports.
429                      *
430                      * Cloud Hypervisor doesn't support fw_cfg at the moment. It does support 0x402
431                      * under the "fwdebug" feature flag. But that feature is not enabled by default
432                      * and is considered legacy.
433                      *
434                      * OVMF unconditionally pokes these IO ports with string IO.
435                      *
436                      * Instead of trying to implement string IO support now which does not do much
437                      * now, skip those ports explicitly to avoid panicking.
438                      *
439                      * Proper string IO support can be added once we gain the ability to translate
440                      * guest virtual addresses to guest physical addresses on MSHV.
441                      */
442                     match port {
443                         0x402 | 0x510 | 0x511 | 0x514 => {
444                             let insn_len = info.header.instruction_length() as u64;
445 
446                             /* Advance RIP and update RAX */
447                             let arr_reg_name_value = [
448                                 (
449                                     hv_register_name::HV_X64_REGISTER_RIP,
450                                     info.header.rip + insn_len,
451                                 ),
452                                 (hv_register_name::HV_X64_REGISTER_RAX, ret_rax),
453                             ];
454                             set_registers_64!(self.fd, arr_reg_name_value)
455                                 .map_err(|e| cpu::HypervisorCpuError::SetRegister(e.into()))?;
456                             return Ok(cpu::VmExit::Ignore);
457                         }
458                         _ => {}
459                     }
460 
461                     // SAFETY: access_info is valid, otherwise we won't be here
462                     assert!(
463                         (unsafe { access_info.__bindgen_anon_1.string_op() } != 1),
464                         "String IN/OUT not supported"
465                     );
466                     assert!(
467                         (unsafe { access_info.__bindgen_anon_1.rep_prefix() } != 1),
468                         "Rep IN/OUT not supported"
469                     );
470 
471                     if is_write {
472                         let data = (info.rax as u32).to_le_bytes();
473                         if let Some(vm_ops) = &self.vm_ops {
474                             vm_ops
475                                 .pio_write(port.into(), &data[0..len])
476                                 .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()))?;
477                         }
478                     } else {
479                         if let Some(vm_ops) = &self.vm_ops {
480                             vm_ops
481                                 .pio_read(port.into(), &mut data[0..len])
482                                 .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()))?;
483                         }
484 
485                         let v = u32::from_le_bytes(data);
486                         /* Preserve high bits in EAX but clear out high bits in RAX */
487                         let mask = 0xffffffff >> (32 - len * 8);
488                         let eax = (info.rax as u32 & !mask) | (v & mask);
489                         ret_rax = eax as u64;
490                     }
491 
492                     let insn_len = info.header.instruction_length() as u64;
493 
494                     /* Advance RIP and update RAX */
495                     let arr_reg_name_value = [
496                         (
497                             hv_register_name::HV_X64_REGISTER_RIP,
498                             info.header.rip + insn_len,
499                         ),
500                         (hv_register_name::HV_X64_REGISTER_RAX, ret_rax),
501                     ];
502                     set_registers_64!(self.fd, arr_reg_name_value)
503                         .map_err(|e| cpu::HypervisorCpuError::SetRegister(e.into()))?;
504                     Ok(cpu::VmExit::Ignore)
505                 }
506                 hv_message_type_HVMSG_UNMAPPED_GPA => {
507                     let info = x.to_memory_info().unwrap();
508                     let insn_len = info.instruction_byte_count as usize;
509                     assert!(insn_len > 0 && insn_len <= 16);
510 
511                     let mut context = MshvEmulatorContext {
512                         vcpu: self,
513                         map: (info.guest_virtual_address, info.guest_physical_address),
514                     };
515 
516                     // Create a new emulator.
517                     let mut emul = Emulator::new(&mut context);
518 
519                     // Emulate the trapped instruction, and only the first one.
520                     let new_state = emul
521                         .emulate_first_insn(self.vp_index as usize, &info.instruction_bytes)
522                         .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()))?;
523 
524                     // Set CPU state back.
525                     context
526                         .set_cpu_state(self.vp_index as usize, new_state)
527                         .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()))?;
528 
529                     Ok(cpu::VmExit::Ignore)
530                 }
531                 hv_message_type_HVMSG_X64_CPUID_INTERCEPT => {
532                     let info = x.to_cpuid_info().unwrap();
533                     debug!("cpuid eax: {:x}", { info.rax });
534                     Ok(cpu::VmExit::Ignore)
535                 }
536                 hv_message_type_HVMSG_X64_MSR_INTERCEPT => {
537                     let info = x.to_msr_info().unwrap();
538                     if info.header.intercept_access_type == 0 {
539                         debug!("msr read: {:x}", { info.msr_number });
540                     } else {
541                         debug!("msr write: {:x}", { info.msr_number });
542                     }
543                     Ok(cpu::VmExit::Ignore)
544                 }
545                 hv_message_type_HVMSG_X64_EXCEPTION_INTERCEPT => {
546                     //TODO: Handler for VMCALL here.
547                     let info = x.to_exception_info().unwrap();
548                     debug!("Exception Info {:?}", { info.exception_vector });
549                     Ok(cpu::VmExit::Ignore)
550                 }
551                 exit => Err(cpu::HypervisorCpuError::RunVcpu(anyhow!(
552                     "Unhandled VCPU exit {:?}",
553                     exit
554                 ))),
555             },
556 
557             Err(e) => match e.errno() {
558                 libc::EAGAIN | libc::EINTR => Ok(cpu::VmExit::Ignore),
559                 _ => Err(cpu::HypervisorCpuError::RunVcpu(anyhow!(
560                     "VCPU error {:?}",
561                     e
562                 ))),
563             },
564         }
565     }
566     #[cfg(target_arch = "x86_64")]
567     ///
568     /// X86 specific call to setup the CPUID registers.
569     ///
570     fn set_cpuid2(&self, _cpuid: &[CpuIdEntry]) -> cpu::Result<()> {
571         Ok(())
572     }
573     #[cfg(target_arch = "x86_64")]
574     ///
575     /// X86 specific call to retrieve the CPUID registers.
576     ///
577     fn get_cpuid2(&self, _num_entries: usize) -> cpu::Result<Vec<CpuIdEntry>> {
578         Ok(self.cpuid.clone())
579     }
580     #[cfg(target_arch = "x86_64")]
581     ///
582     /// Returns the state of the LAPIC (Local Advanced Programmable Interrupt Controller).
583     ///
584     fn get_lapic(&self) -> cpu::Result<LapicState> {
585         Ok(self
586             .fd
587             .get_lapic()
588             .map_err(|e| cpu::HypervisorCpuError::GetlapicState(e.into()))?
589             .into())
590     }
591     #[cfg(target_arch = "x86_64")]
592     ///
593     /// Sets the state of the LAPIC (Local Advanced Programmable Interrupt Controller).
594     ///
595     fn set_lapic(&self, lapic: &LapicState) -> cpu::Result<()> {
596         let lapic: mshv_bindings::LapicState = (*lapic).clone().into();
597         self.fd
598             .set_lapic(&lapic)
599             .map_err(|e| cpu::HypervisorCpuError::SetLapicState(e.into()))
600     }
601     ///
602     /// Returns the vcpu's current "multiprocessing state".
603     ///
604     fn get_mp_state(&self) -> cpu::Result<MpState> {
605         Ok(MpState::Mshv)
606     }
607     ///
608     /// Sets the vcpu's current "multiprocessing state".
609     ///
610     fn set_mp_state(&self, _mp_state: MpState) -> cpu::Result<()> {
611         Ok(())
612     }
613     ///
614     /// Set CPU state
615     ///
616     fn set_state(&self, state: &CpuState) -> cpu::Result<()> {
617         let state: VcpuMshvState = state.clone().into();
618         self.set_msrs(&state.msrs)?;
619         self.set_vcpu_events(&state.vcpu_events)?;
620         self.set_regs(&state.regs.into())?;
621         self.set_sregs(&state.sregs.into())?;
622         self.set_fpu(&state.fpu)?;
623         self.set_xcrs(&state.xcrs)?;
624         self.set_lapic(&state.lapic)?;
625         self.set_xsave(&state.xsave)?;
626         // These registers are global and needed to be set only for first VCPU
627         // as Microsoft Hypervisor allows setting this regsier for only one VCPU
628         if self.vp_index == 0 {
629             self.fd
630                 .set_misc_regs(&state.misc)
631                 .map_err(|e| cpu::HypervisorCpuError::SetMiscRegs(e.into()))?
632         }
633         self.fd
634             .set_debug_regs(&state.dbg)
635             .map_err(|e| cpu::HypervisorCpuError::SetDebugRegs(e.into()))?;
636         Ok(())
637     }
638     ///
639     /// Get CPU State
640     ///
641     fn state(&self) -> cpu::Result<CpuState> {
642         let regs = self.get_regs()?;
643         let sregs = self.get_sregs()?;
644         let xcrs = self.get_xcrs()?;
645         let fpu = self.get_fpu()?;
646         let vcpu_events = self.get_vcpu_events()?;
647         let mut msrs = self.msrs.clone();
648         self.get_msrs(&mut msrs)?;
649         let lapic = self.get_lapic()?;
650         let xsave = self.get_xsave()?;
651         let misc = self
652             .fd
653             .get_misc_regs()
654             .map_err(|e| cpu::HypervisorCpuError::GetMiscRegs(e.into()))?;
655         let dbg = self
656             .fd
657             .get_debug_regs()
658             .map_err(|e| cpu::HypervisorCpuError::GetDebugRegs(e.into()))?;
659 
660         Ok(VcpuMshvState {
661             msrs,
662             vcpu_events,
663             regs: regs.into(),
664             sregs: sregs.into(),
665             fpu,
666             xcrs,
667             lapic,
668             dbg,
669             xsave,
670             misc,
671         }
672         .into())
673     }
674     #[cfg(target_arch = "x86_64")]
675     ///
676     /// Translate guest virtual address to guest physical address
677     ///
678     fn translate_gva(&self, gva: u64, flags: u64) -> cpu::Result<(u64, u32)> {
679         let r = self
680             .fd
681             .translate_gva(gva, flags)
682             .map_err(|e| cpu::HypervisorCpuError::TranslateVirtualAddress(e.into()))?;
683 
684         let gpa = r.0;
685         // SAFETY: r is valid, otherwise this function will have returned
686         let result_code = unsafe { r.1.__bindgen_anon_1.result_code };
687 
688         Ok((gpa, result_code))
689     }
690     #[cfg(target_arch = "x86_64")]
691     ///
692     /// Return the list of initial MSR entries for a VCPU
693     ///
694     fn boot_msr_entries(&self) -> Vec<MsrEntry> {
695         use crate::arch::x86::{msr_index, MTRR_ENABLE, MTRR_MEM_TYPE_WB};
696 
697         [
698             msr!(msr_index::MSR_IA32_SYSENTER_CS),
699             msr!(msr_index::MSR_IA32_SYSENTER_ESP),
700             msr!(msr_index::MSR_IA32_SYSENTER_EIP),
701             msr!(msr_index::MSR_STAR),
702             msr!(msr_index::MSR_CSTAR),
703             msr!(msr_index::MSR_LSTAR),
704             msr!(msr_index::MSR_KERNEL_GS_BASE),
705             msr!(msr_index::MSR_SYSCALL_MASK),
706             msr!(msr_index::MSR_IA32_TSC),
707             msr_data!(msr_index::MSR_MTRRdefType, MTRR_ENABLE | MTRR_MEM_TYPE_WB),
708         ]
709         .to_vec()
710     }
711 }
712 
713 impl MshvVcpu {
714     #[cfg(target_arch = "x86_64")]
715     ///
716     /// X86 specific call that returns the vcpu's current "xsave struct".
717     ///
718     fn get_xsave(&self) -> cpu::Result<Xsave> {
719         self.fd
720             .get_xsave()
721             .map_err(|e| cpu::HypervisorCpuError::GetXsaveState(e.into()))
722     }
723     #[cfg(target_arch = "x86_64")]
724     ///
725     /// X86 specific call that sets the vcpu's current "xsave struct".
726     ///
727     fn set_xsave(&self, xsave: &Xsave) -> cpu::Result<()> {
728         self.fd
729             .set_xsave(xsave)
730             .map_err(|e| cpu::HypervisorCpuError::SetXsaveState(e.into()))
731     }
732     #[cfg(target_arch = "x86_64")]
733     ///
734     /// X86 specific call that returns the vcpu's current "xcrs".
735     ///
736     fn get_xcrs(&self) -> cpu::Result<ExtendedControlRegisters> {
737         self.fd
738             .get_xcrs()
739             .map_err(|e| cpu::HypervisorCpuError::GetXcsr(e.into()))
740     }
741     #[cfg(target_arch = "x86_64")]
742     ///
743     /// X86 specific call that sets the vcpu's current "xcrs".
744     ///
745     fn set_xcrs(&self, xcrs: &ExtendedControlRegisters) -> cpu::Result<()> {
746         self.fd
747             .set_xcrs(xcrs)
748             .map_err(|e| cpu::HypervisorCpuError::SetXcsr(e.into()))
749     }
750     #[cfg(target_arch = "x86_64")]
751     ///
752     /// Returns currently pending exceptions, interrupts, and NMIs as well as related
753     /// states of the vcpu.
754     ///
755     fn get_vcpu_events(&self) -> cpu::Result<VcpuEvents> {
756         self.fd
757             .get_vcpu_events()
758             .map_err(|e| cpu::HypervisorCpuError::GetVcpuEvents(e.into()))
759     }
760     #[cfg(target_arch = "x86_64")]
761     ///
762     /// Sets pending exceptions, interrupts, and NMIs as well as related states
763     /// of the vcpu.
764     ///
765     fn set_vcpu_events(&self, events: &VcpuEvents) -> cpu::Result<()> {
766         self.fd
767             .set_vcpu_events(events)
768             .map_err(|e| cpu::HypervisorCpuError::SetVcpuEvents(e.into()))
769     }
770 }
771 
772 struct MshvEmulatorContext<'a> {
773     vcpu: &'a MshvVcpu,
774     map: (u64, u64), // Initial GVA to GPA mapping provided by the hypervisor
775 }
776 
777 impl<'a> MshvEmulatorContext<'a> {
778     // Do the actual gva -> gpa translation
779     #[allow(non_upper_case_globals)]
780     fn translate(&self, gva: u64) -> Result<u64, PlatformError> {
781         if self.map.0 == gva {
782             return Ok(self.map.1);
783         }
784 
785         // TODO: More fine-grained control for the flags
786         let flags = HV_TRANSLATE_GVA_VALIDATE_READ | HV_TRANSLATE_GVA_VALIDATE_WRITE;
787 
788         let (gpa, result_code) = self
789             .vcpu
790             .translate_gva(gva, flags.into())
791             .map_err(|e| PlatformError::TranslateVirtualAddress(anyhow!(e)))?;
792 
793         match result_code {
794             hv_translate_gva_result_code_HV_TRANSLATE_GVA_SUCCESS => Ok(gpa),
795             _ => Err(PlatformError::TranslateVirtualAddress(anyhow!(result_code))),
796         }
797     }
798 }
799 
800 /// Platform emulation for Hyper-V
801 impl<'a> PlatformEmulator for MshvEmulatorContext<'a> {
802     type CpuState = EmulatorCpuState;
803 
804     fn read_memory(&self, gva: u64, data: &mut [u8]) -> Result<(), PlatformError> {
805         let gpa = self.translate(gva)?;
806         debug!(
807             "mshv emulator: memory read {} bytes from [{:#x} -> {:#x}]",
808             data.len(),
809             gva,
810             gpa
811         );
812 
813         if let Some(vm_ops) = &self.vcpu.vm_ops {
814             if vm_ops.guest_mem_read(gpa, data).is_err() {
815                 vm_ops
816                     .mmio_read(gpa, data)
817                     .map_err(|e| PlatformError::MemoryReadFailure(e.into()))?;
818             }
819         }
820 
821         Ok(())
822     }
823 
824     fn write_memory(&mut self, gva: u64, data: &[u8]) -> Result<(), PlatformError> {
825         let gpa = self.translate(gva)?;
826         debug!(
827             "mshv emulator: memory write {} bytes at [{:#x} -> {:#x}]",
828             data.len(),
829             gva,
830             gpa
831         );
832 
833         if let Some(vm_ops) = &self.vcpu.vm_ops {
834             if vm_ops.guest_mem_write(gpa, data).is_err() {
835                 vm_ops
836                     .mmio_write(gpa, data)
837                     .map_err(|e| PlatformError::MemoryWriteFailure(e.into()))?;
838             }
839         }
840 
841         Ok(())
842     }
843 
844     fn cpu_state(&self, cpu_id: usize) -> Result<Self::CpuState, PlatformError> {
845         if cpu_id != self.vcpu.vp_index as usize {
846             return Err(PlatformError::GetCpuStateFailure(anyhow!(
847                 "CPU id mismatch {:?} {:?}",
848                 cpu_id,
849                 self.vcpu.vp_index
850             )));
851         }
852 
853         let regs = self
854             .vcpu
855             .get_regs()
856             .map_err(|e| PlatformError::GetCpuStateFailure(e.into()))?;
857         let sregs = self
858             .vcpu
859             .get_sregs()
860             .map_err(|e| PlatformError::GetCpuStateFailure(e.into()))?;
861 
862         debug!("mshv emulator: Getting new CPU state");
863         debug!("mshv emulator: {:#x?}", regs);
864 
865         Ok(EmulatorCpuState { regs, sregs })
866     }
867 
868     fn set_cpu_state(&self, cpu_id: usize, state: Self::CpuState) -> Result<(), PlatformError> {
869         if cpu_id != self.vcpu.vp_index as usize {
870             return Err(PlatformError::SetCpuStateFailure(anyhow!(
871                 "CPU id mismatch {:?} {:?}",
872                 cpu_id,
873                 self.vcpu.vp_index
874             )));
875         }
876 
877         debug!("mshv emulator: Setting new CPU state");
878         debug!("mshv emulator: {:#x?}", state.regs);
879 
880         self.vcpu
881             .set_regs(&state.regs)
882             .map_err(|e| PlatformError::SetCpuStateFailure(e.into()))?;
883         self.vcpu
884             .set_sregs(&state.sregs)
885             .map_err(|e| PlatformError::SetCpuStateFailure(e.into()))
886     }
887 
888     fn gva_to_gpa(&self, gva: u64) -> Result<u64, PlatformError> {
889         self.translate(gva)
890     }
891 
892     fn fetch(&self, _ip: u64, _instruction_bytes: &mut [u8]) -> Result<(), PlatformError> {
893         Err(PlatformError::MemoryReadFailure(anyhow!("unimplemented")))
894     }
895 }
896 
897 /// Wrapper over Mshv VM ioctls.
898 pub struct MshvVm {
899     fd: Arc<VmFd>,
900     msrs: Vec<MsrEntry>,
901     dirty_log_slots: Arc<RwLock<HashMap<u64, MshvDirtyLogSlot>>>,
902 }
903 
904 impl MshvVm {
905     ///
906     /// Creates an in-kernel device.
907     ///
908     /// See the documentation for `MSHV_CREATE_DEVICE`.
909     fn create_device(&self, device: &mut CreateDevice) -> vm::Result<VfioDeviceFd> {
910         let device_fd = self
911             .fd
912             .create_device(device)
913             .map_err(|e| vm::HypervisorVmError::CreateDevice(e.into()))?;
914         Ok(VfioDeviceFd::new_from_mshv(device_fd))
915     }
916 }
917 
918 ///
919 /// Implementation of Vm trait for Mshv
920 /// Example:
921 /// #[cfg(feature = "mshv")]
922 /// # extern crate hypervisor;
923 /// # use hypervisor::MshvHypervisor;
924 /// let mshv = MshvHypervisor::new().unwrap();
925 /// let hypervisor: Arc<dyn hypervisor::Hypervisor> = Arc::new(mshv);
926 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed");
927 /// vm.set/get().unwrap()
928 ///
929 impl vm::Vm for MshvVm {
930     #[cfg(target_arch = "x86_64")]
931     ///
932     /// Sets the address of the one-page region in the VM's address space.
933     ///
934     fn set_identity_map_address(&self, _address: u64) -> vm::Result<()> {
935         Ok(())
936     }
937     #[cfg(target_arch = "x86_64")]
938     ///
939     /// Sets the address of the three-page region in the VM's address space.
940     ///
941     fn set_tss_address(&self, _offset: usize) -> vm::Result<()> {
942         Ok(())
943     }
944     ///
945     /// Creates an in-kernel interrupt controller.
946     ///
947     fn create_irq_chip(&self) -> vm::Result<()> {
948         Ok(())
949     }
950     ///
951     /// Registers an event that will, when signaled, trigger the `gsi` IRQ.
952     ///
953     fn register_irqfd(&self, fd: &EventFd, gsi: u32) -> vm::Result<()> {
954         debug!("register_irqfd fd {} gsi {}", fd.as_raw_fd(), gsi);
955 
956         self.fd
957             .register_irqfd(fd, gsi)
958             .map_err(|e| vm::HypervisorVmError::RegisterIrqFd(e.into()))?;
959 
960         Ok(())
961     }
962     ///
963     /// Unregisters an event that will, when signaled, trigger the `gsi` IRQ.
964     ///
965     fn unregister_irqfd(&self, fd: &EventFd, gsi: u32) -> vm::Result<()> {
966         debug!("unregister_irqfd fd {} gsi {}", fd.as_raw_fd(), gsi);
967 
968         self.fd
969             .unregister_irqfd(fd, gsi)
970             .map_err(|e| vm::HypervisorVmError::UnregisterIrqFd(e.into()))?;
971 
972         Ok(())
973     }
974     ///
975     /// Creates a VcpuFd object from a vcpu RawFd.
976     ///
977     fn create_vcpu(
978         &self,
979         id: u8,
980         vm_ops: Option<Arc<dyn VmOps>>,
981     ) -> vm::Result<Arc<dyn cpu::Vcpu>> {
982         let vcpu_fd = self
983             .fd
984             .create_vcpu(id)
985             .map_err(|e| vm::HypervisorVmError::CreateVcpu(e.into()))?;
986         let vcpu = MshvVcpu {
987             fd: vcpu_fd,
988             vp_index: id,
989             cpuid: Vec::new(),
990             msrs: self.msrs.clone(),
991             vm_ops,
992         };
993         Ok(Arc::new(vcpu))
994     }
995     #[cfg(target_arch = "x86_64")]
996     fn enable_split_irq(&self) -> vm::Result<()> {
997         Ok(())
998     }
999     #[cfg(target_arch = "x86_64")]
1000     fn enable_sgx_attribute(&self, _file: File) -> vm::Result<()> {
1001         Ok(())
1002     }
1003     fn register_ioevent(
1004         &self,
1005         fd: &EventFd,
1006         addr: &IoEventAddress,
1007         datamatch: Option<DataMatch>,
1008     ) -> vm::Result<()> {
1009         let addr = &mshv_ioctls::IoEventAddress::from(*addr);
1010         debug!(
1011             "register_ioevent fd {} addr {:x?} datamatch {:?}",
1012             fd.as_raw_fd(),
1013             addr,
1014             datamatch
1015         );
1016         if let Some(dm) = datamatch {
1017             match dm {
1018                 vm::DataMatch::DataMatch32(mshv_dm32) => self
1019                     .fd
1020                     .register_ioevent(fd, addr, mshv_dm32)
1021                     .map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into())),
1022                 vm::DataMatch::DataMatch64(mshv_dm64) => self
1023                     .fd
1024                     .register_ioevent(fd, addr, mshv_dm64)
1025                     .map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into())),
1026             }
1027         } else {
1028             self.fd
1029                 .register_ioevent(fd, addr, NoDatamatch)
1030                 .map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into()))
1031         }
1032     }
1033     /// Unregister an event from a certain address it has been previously registered to.
1034     fn unregister_ioevent(&self, fd: &EventFd, addr: &IoEventAddress) -> vm::Result<()> {
1035         let addr = &mshv_ioctls::IoEventAddress::from(*addr);
1036         debug!("unregister_ioevent fd {} addr {:x?}", fd.as_raw_fd(), addr);
1037 
1038         self.fd
1039             .unregister_ioevent(fd, addr, NoDatamatch)
1040             .map_err(|e| vm::HypervisorVmError::UnregisterIoEvent(e.into()))
1041     }
1042 
1043     /// Creates a guest physical memory region.
1044     fn create_user_memory_region(&self, user_memory_region: UserMemoryRegion) -> vm::Result<()> {
1045         let user_memory_region: mshv_user_mem_region = user_memory_region.into();
1046         // No matter read only or not we keep track the slots.
1047         // For readonly hypervisor can enable the dirty bits,
1048         // but a VM exit happens before setting the dirty bits
1049         self.dirty_log_slots.write().unwrap().insert(
1050             user_memory_region.guest_pfn,
1051             MshvDirtyLogSlot {
1052                 guest_pfn: user_memory_region.guest_pfn,
1053                 memory_size: user_memory_region.size,
1054             },
1055         );
1056 
1057         self.fd
1058             .map_user_memory(user_memory_region)
1059             .map_err(|e| vm::HypervisorVmError::CreateUserMemory(e.into()))?;
1060         Ok(())
1061     }
1062 
1063     /// Removes a guest physical memory region.
1064     fn remove_user_memory_region(&self, user_memory_region: UserMemoryRegion) -> vm::Result<()> {
1065         let user_memory_region: mshv_user_mem_region = user_memory_region.into();
1066         // Remove the corresponding entry from "self.dirty_log_slots" if needed
1067         self.dirty_log_slots
1068             .write()
1069             .unwrap()
1070             .remove(&user_memory_region.guest_pfn);
1071 
1072         self.fd
1073             .unmap_user_memory(user_memory_region)
1074             .map_err(|e| vm::HypervisorVmError::RemoveUserMemory(e.into()))?;
1075         Ok(())
1076     }
1077 
1078     fn make_user_memory_region(
1079         &self,
1080         _slot: u32,
1081         guest_phys_addr: u64,
1082         memory_size: u64,
1083         userspace_addr: u64,
1084         readonly: bool,
1085         _log_dirty_pages: bool,
1086     ) -> UserMemoryRegion {
1087         let mut flags = HV_MAP_GPA_READABLE | HV_MAP_GPA_EXECUTABLE;
1088         if !readonly {
1089             flags |= HV_MAP_GPA_WRITABLE;
1090         }
1091 
1092         mshv_user_mem_region {
1093             flags,
1094             guest_pfn: guest_phys_addr >> PAGE_SHIFT,
1095             size: memory_size,
1096             userspace_addr: userspace_addr as u64,
1097         }
1098         .into()
1099     }
1100 
1101     fn create_passthrough_device(&self) -> vm::Result<VfioDeviceFd> {
1102         let mut vfio_dev = mshv_create_device {
1103             type_: mshv_device_type_MSHV_DEV_TYPE_VFIO,
1104             fd: 0,
1105             flags: 0,
1106         };
1107 
1108         self.create_device(&mut vfio_dev)
1109             .map_err(|e| vm::HypervisorVmError::CreatePassthroughDevice(e.into()))
1110     }
1111 
1112     ///
1113     /// Constructs a routing entry
1114     ///
1115     fn make_routing_entry(&self, gsi: u32, config: &InterruptSourceConfig) -> IrqRoutingEntry {
1116         match config {
1117             InterruptSourceConfig::MsiIrq(cfg) => mshv_msi_routing_entry {
1118                 gsi,
1119                 address_lo: cfg.low_addr,
1120                 address_hi: cfg.high_addr,
1121                 data: cfg.data,
1122             }
1123             .into(),
1124             _ => {
1125                 unreachable!()
1126             }
1127         }
1128     }
1129 
1130     fn set_gsi_routing(&self, entries: &[IrqRoutingEntry]) -> vm::Result<()> {
1131         let mut msi_routing =
1132             vec_with_array_field::<mshv_msi_routing, mshv_msi_routing_entry>(entries.len());
1133         msi_routing[0].nr = entries.len() as u32;
1134 
1135         let entries: Vec<mshv_msi_routing_entry> = entries
1136             .iter()
1137             .map(|entry| match entry {
1138                 IrqRoutingEntry::Mshv(e) => *e,
1139                 #[allow(unreachable_patterns)]
1140                 _ => panic!("IrqRoutingEntry type is wrong"),
1141             })
1142             .collect();
1143 
1144         // SAFETY: msi_routing initialized with entries.len() and now it is being turned into
1145         // entries_slice with entries.len() again. It is guaranteed to be large enough to hold
1146         // everything from entries.
1147         unsafe {
1148             let entries_slice: &mut [mshv_msi_routing_entry] =
1149                 msi_routing[0].entries.as_mut_slice(entries.len());
1150             entries_slice.copy_from_slice(&entries);
1151         }
1152 
1153         self.fd
1154             .set_msi_routing(&msi_routing[0])
1155             .map_err(|e| vm::HypervisorVmError::SetGsiRouting(e.into()))
1156     }
1157     ///
1158     /// Start logging dirty pages
1159     ///
1160     fn start_dirty_log(&self) -> vm::Result<()> {
1161         self.fd
1162             .enable_dirty_page_tracking()
1163             .map_err(|e| vm::HypervisorVmError::StartDirtyLog(e.into()))
1164     }
1165     ///
1166     /// Stop logging dirty pages
1167     ///
1168     fn stop_dirty_log(&self) -> vm::Result<()> {
1169         let dirty_log_slots = self.dirty_log_slots.read().unwrap();
1170         // Before disabling the dirty page tracking we need
1171         // to set the dirty bits in the Hypervisor
1172         // This is a requirement from Microsoft Hypervisor
1173         for (_, s) in dirty_log_slots.iter() {
1174             self.fd
1175                 .get_dirty_log(s.guest_pfn, s.memory_size as usize, DIRTY_BITMAP_SET_DIRTY)
1176                 .map_err(|e| vm::HypervisorVmError::StartDirtyLog(e.into()))?;
1177         }
1178         self.fd
1179             .disable_dirty_page_tracking()
1180             .map_err(|e| vm::HypervisorVmError::StartDirtyLog(e.into()))?;
1181         Ok(())
1182     }
1183     ///
1184     /// Get dirty pages bitmap (one bit per page)
1185     ///
1186     fn get_dirty_log(&self, _slot: u32, base_gpa: u64, memory_size: u64) -> vm::Result<Vec<u64>> {
1187         self.fd
1188             .get_dirty_log(
1189                 base_gpa >> PAGE_SHIFT,
1190                 memory_size as usize,
1191                 DIRTY_BITMAP_CLEAR_DIRTY,
1192             )
1193             .map_err(|e| vm::HypervisorVmError::GetDirtyLog(e.into()))
1194     }
1195     /// Retrieve guest clock.
1196     #[cfg(target_arch = "x86_64")]
1197     fn get_clock(&self) -> vm::Result<ClockData> {
1198         Ok(ClockData::Mshv)
1199     }
1200     /// Set guest clock.
1201     #[cfg(target_arch = "x86_64")]
1202     fn set_clock(&self, _data: &ClockData) -> vm::Result<()> {
1203         Ok(())
1204     }
1205     /// Downcast to the underlying MshvVm type
1206     fn as_any(&self) -> &dyn Any {
1207         self
1208     }
1209 }
1210