xref: /cloud-hypervisor/hypervisor/src/mshv/mod.rs (revision f7f2f25a574b1b2dba22c094fc8226d404157d15)
1 // SPDX-License-Identifier: Apache-2.0 OR BSD-3-Clause
2 //
3 // Copyright © 2020, Microsoft Corporation
4 //
5 
6 use crate::arch::emulator::{PlatformEmulator, PlatformError};
7 
8 #[cfg(target_arch = "x86_64")]
9 use crate::arch::x86::emulator::{Emulator, EmulatorCpuState};
10 use crate::cpu;
11 use crate::cpu::Vcpu;
12 use crate::hypervisor;
13 use crate::vec_with_array_field;
14 use crate::vm::{self, VmmOps};
15 pub use mshv_bindings::*;
16 pub use mshv_ioctls::IoEventAddress;
17 use mshv_ioctls::{set_registers_64, Mshv, NoDatamatch, VcpuFd, VmFd};
18 use serde_derive::{Deserialize, Serialize};
19 use std::collections::HashMap;
20 use std::sync::{Arc, RwLock};
21 use vm::DataMatch;
22 // x86_64 dependencies
23 #[cfg(target_arch = "x86_64")]
24 pub mod x86_64;
25 use crate::device;
26 use vmm_sys_util::eventfd::EventFd;
27 #[cfg(target_arch = "x86_64")]
28 pub use x86_64::VcpuMshvState as CpuState;
29 #[cfg(target_arch = "x86_64")]
30 pub use x86_64::*;
31 
32 #[cfg(target_arch = "x86_64")]
33 use std::fs::File;
34 use std::os::unix::io::AsRawFd;
35 
36 const DIRTY_BITMAP_CLEAR_DIRTY: u64 = 0x4;
37 const DIRTY_BITMAP_SET_DIRTY: u64 = 0x8;
38 pub const PAGE_SHIFT: usize = 12;
39 
40 #[derive(Debug, Default, Copy, Clone, Serialize, Deserialize)]
41 pub struct HvState {
42     hypercall_page: u64,
43 }
44 
45 pub use HvState as VmState;
46 
47 struct MshvDirtyLogSlot {
48     guest_pfn: u64,
49     memory_size: u64,
50 }
51 
52 /// Wrapper over mshv system ioctls.
53 pub struct MshvHypervisor {
54     mshv: Mshv,
55 }
56 
57 impl MshvHypervisor {
58     /// Create a hypervisor based on Mshv
59     pub fn new() -> hypervisor::Result<MshvHypervisor> {
60         let mshv_obj =
61             Mshv::new().map_err(|e| hypervisor::HypervisorError::HypervisorCreate(e.into()))?;
62         Ok(MshvHypervisor { mshv: mshv_obj })
63     }
64 }
65 /// Implementation of Hypervisor trait for Mshv
66 /// Example:
67 /// #[cfg(feature = "mshv")]
68 /// extern crate hypervisor
69 /// let mshv = hypervisor::mshv::MshvHypervisor::new().unwrap();
70 /// let hypervisor: Arc<dyn hypervisor::Hypervisor> = Arc::new(mshv);
71 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed");
72 ///
73 impl hypervisor::Hypervisor for MshvHypervisor {
74     /// Create a mshv vm object and return the object as Vm trait object
75     /// Example
76     /// # extern crate hypervisor;
77     /// # use hypervisor::MshvHypervisor;
78     /// use hypervisor::MshvVm;
79     /// let hypervisor = MshvHypervisor::new().unwrap();
80     /// let vm = hypervisor.create_vm().unwrap()
81     ///
82     fn create_vm(&self) -> hypervisor::Result<Arc<dyn vm::Vm>> {
83         let fd: VmFd;
84         loop {
85             match self.mshv.create_vm() {
86                 Ok(res) => fd = res,
87                 Err(e) => {
88                     if e.errno() == libc::EINTR {
89                         // If the error returned is EINTR, which means the
90                         // ioctl has been interrupted, we have to retry as
91                         // this can't be considered as a regular error.
92                         continue;
93                     } else {
94                         return Err(hypervisor::HypervisorError::VmCreate(e.into()));
95                     }
96                 }
97             }
98             break;
99         }
100 
101         let msr_list = self.get_msr_list()?;
102         let num_msrs = msr_list.as_fam_struct_ref().nmsrs as usize;
103         let mut msrs = MsrEntries::new(num_msrs).unwrap();
104         let indices = msr_list.as_slice();
105         let msr_entries = msrs.as_mut_slice();
106         for (pos, index) in indices.iter().enumerate() {
107             msr_entries[pos].index = *index;
108         }
109         let vm_fd = Arc::new(fd);
110 
111         Ok(Arc::new(MshvVm {
112             fd: vm_fd,
113             msrs,
114             hv_state: hv_state_init(),
115             vmmops: None,
116             dirty_log_slots: Arc::new(RwLock::new(HashMap::new())),
117         }))
118     }
119     ///
120     /// Get the supported CpuID
121     ///
122     fn get_cpuid(&self) -> hypervisor::Result<CpuId> {
123         Ok(CpuId::new(1).unwrap())
124     }
125     #[cfg(target_arch = "x86_64")]
126     ///
127     /// Retrieve the list of MSRs supported by KVM.
128     ///
129     fn get_msr_list(&self) -> hypervisor::Result<MsrList> {
130         self.mshv
131             .get_msr_index_list()
132             .map_err(|e| hypervisor::HypervisorError::GetMsrList(e.into()))
133     }
134 }
135 
136 #[allow(dead_code)]
137 /// Vcpu struct for Microsoft Hypervisor
138 pub struct MshvVcpu {
139     fd: VcpuFd,
140     vp_index: u8,
141     cpuid: CpuId,
142     msrs: MsrEntries,
143     hv_state: Arc<RwLock<HvState>>, // Mshv State
144     vmmops: Option<Arc<dyn vm::VmmOps>>,
145 }
146 
147 /// Implementation of Vcpu trait for Microsoft Hypervisor
148 /// Example:
149 /// #[cfg(feature = "mshv")]
150 /// extern crate hypervisor
151 /// let mshv = hypervisor::mshv::MshvHypervisor::new().unwrap();
152 /// let hypervisor: Arc<dyn hypervisor::Hypervisor> = Arc::new(mshv);
153 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed");
154 /// let vcpu = vm.create_vcpu(0).unwrap();
155 /// vcpu.get/set().unwrap()
156 ///
157 impl cpu::Vcpu for MshvVcpu {
158     #[cfg(target_arch = "x86_64")]
159     ///
160     /// Returns the vCPU general purpose registers.
161     ///
162     fn get_regs(&self) -> cpu::Result<StandardRegisters> {
163         self.fd
164             .get_regs()
165             .map_err(|e| cpu::HypervisorCpuError::GetStandardRegs(e.into()))
166     }
167     #[cfg(target_arch = "x86_64")]
168     ///
169     /// Sets the vCPU general purpose registers.
170     ///
171     fn set_regs(&self, regs: &StandardRegisters) -> cpu::Result<()> {
172         self.fd
173             .set_regs(regs)
174             .map_err(|e| cpu::HypervisorCpuError::SetStandardRegs(e.into()))
175     }
176     #[cfg(target_arch = "x86_64")]
177     ///
178     /// Returns the vCPU special registers.
179     ///
180     fn get_sregs(&self) -> cpu::Result<SpecialRegisters> {
181         self.fd
182             .get_sregs()
183             .map_err(|e| cpu::HypervisorCpuError::GetSpecialRegs(e.into()))
184     }
185     #[cfg(target_arch = "x86_64")]
186     ///
187     /// Sets the vCPU special registers.
188     ///
189     fn set_sregs(&self, sregs: &SpecialRegisters) -> cpu::Result<()> {
190         self.fd
191             .set_sregs(sregs)
192             .map_err(|e| cpu::HypervisorCpuError::SetSpecialRegs(e.into()))
193     }
194     #[cfg(target_arch = "x86_64")]
195     ///
196     /// Returns the floating point state (FPU) from the vCPU.
197     ///
198     fn get_fpu(&self) -> cpu::Result<FpuState> {
199         self.fd
200             .get_fpu()
201             .map_err(|e| cpu::HypervisorCpuError::GetFloatingPointRegs(e.into()))
202     }
203     #[cfg(target_arch = "x86_64")]
204     ///
205     /// Set the floating point state (FPU) of a vCPU.
206     ///
207     fn set_fpu(&self, fpu: &FpuState) -> cpu::Result<()> {
208         self.fd
209             .set_fpu(fpu)
210             .map_err(|e| cpu::HypervisorCpuError::SetFloatingPointRegs(e.into()))
211     }
212 
213     #[cfg(target_arch = "x86_64")]
214     ///
215     /// Returns the model-specific registers (MSR) for this vCPU.
216     ///
217     fn get_msrs(&self, msrs: &mut MsrEntries) -> cpu::Result<usize> {
218         self.fd
219             .get_msrs(msrs)
220             .map_err(|e| cpu::HypervisorCpuError::GetMsrEntries(e.into()))
221     }
222     #[cfg(target_arch = "x86_64")]
223     ///
224     /// Setup the model-specific registers (MSR) for this vCPU.
225     /// Returns the number of MSR entries actually written.
226     ///
227     fn set_msrs(&self, msrs: &MsrEntries) -> cpu::Result<usize> {
228         self.fd
229             .set_msrs(msrs)
230             .map_err(|e| cpu::HypervisorCpuError::SetMsrEntries(e.into()))
231     }
232 
233     #[cfg(target_arch = "x86_64")]
234     ///
235     /// X86 specific call that returns the vcpu's current "xcrs".
236     ///
237     fn get_xcrs(&self) -> cpu::Result<ExtendedControlRegisters> {
238         self.fd
239             .get_xcrs()
240             .map_err(|e| cpu::HypervisorCpuError::GetXcsr(e.into()))
241     }
242     #[cfg(target_arch = "x86_64")]
243     ///
244     /// X86 specific call that sets the vcpu's current "xcrs".
245     ///
246     fn set_xcrs(&self, xcrs: &ExtendedControlRegisters) -> cpu::Result<()> {
247         self.fd
248             .set_xcrs(xcrs)
249             .map_err(|e| cpu::HypervisorCpuError::SetXcsr(e.into()))
250     }
251     #[cfg(target_arch = "x86_64")]
252     ///
253     /// Returns currently pending exceptions, interrupts, and NMIs as well as related
254     /// states of the vcpu.
255     ///
256     fn get_vcpu_events(&self) -> cpu::Result<VcpuEvents> {
257         self.fd
258             .get_vcpu_events()
259             .map_err(|e| cpu::HypervisorCpuError::GetVcpuEvents(e.into()))
260     }
261     #[cfg(target_arch = "x86_64")]
262     ///
263     /// Sets pending exceptions, interrupts, and NMIs as well as related states
264     /// of the vcpu.
265     ///
266     fn set_vcpu_events(&self, events: &VcpuEvents) -> cpu::Result<()> {
267         self.fd
268             .set_vcpu_events(events)
269             .map_err(|e| cpu::HypervisorCpuError::SetVcpuEvents(e.into()))
270     }
271     #[cfg(target_arch = "x86_64")]
272     ///
273     /// X86 specific call to enable HyperV SynIC
274     ///
275     fn enable_hyperv_synic(&self) -> cpu::Result<()> {
276         /* We always have SynIC enabled on MSHV */
277         Ok(())
278     }
279     #[allow(non_upper_case_globals)]
280     fn run(&self) -> std::result::Result<cpu::VmExit, cpu::HypervisorCpuError> {
281         // Safe because this is just only done during initialization.
282         // TODO don't zero it everytime we enter this function.
283         let hv_message: hv_message = unsafe { std::mem::zeroed() };
284         match self.fd.run(hv_message) {
285             Ok(x) => match x.header.message_type {
286                 hv_message_type_HVMSG_X64_HALT => {
287                     debug!("HALT");
288                     Ok(cpu::VmExit::Reset)
289                 }
290                 hv_message_type_HVMSG_UNRECOVERABLE_EXCEPTION => {
291                     warn!("TRIPLE FAULT");
292                     Ok(cpu::VmExit::Shutdown)
293                 }
294                 hv_message_type_HVMSG_X64_IO_PORT_INTERCEPT => {
295                     let info = x.to_ioport_info().unwrap();
296                     let access_info = info.access_info;
297                     let len = unsafe { access_info.__bindgen_anon_1.access_size() } as usize;
298                     let is_write = info.header.intercept_access_type == 1;
299                     let port = info.port_number;
300                     let mut data: [u8; 4] = [0; 4];
301                     let mut ret_rax = info.rax;
302 
303                     /*
304                      * XXX: Ignore QEMU fw_cfg (0x5xx) and debug console (0x402) ports.
305                      *
306                      * Cloud Hypervisor doesn't support fw_cfg at the moment. It does support 0x402
307                      * under the "fwdebug" feature flag. But that feature is not enabled by default
308                      * and is considered legacy.
309                      *
310                      * OVMF unconditionally pokes these IO ports with string IO.
311                      *
312                      * Instead of trying to implement string IO support now which does not do much
313                      * now, skip those ports explicitly to avoid panicking.
314                      *
315                      * Proper string IO support can be added once we gain the ability to translate
316                      * guest virtual addresses to guest physical addresses on MSHV.
317                      */
318                     match port {
319                         0x402 | 0x510 | 0x511 | 0x514 => {
320                             let insn_len = info.header.instruction_length() as u64;
321 
322                             /* Advance RIP and update RAX */
323                             let arr_reg_name_value = [
324                                 (
325                                     hv_register_name::HV_X64_REGISTER_RIP,
326                                     info.header.rip + insn_len,
327                                 ),
328                                 (hv_register_name::HV_X64_REGISTER_RAX, ret_rax),
329                             ];
330                             set_registers_64!(self.fd, arr_reg_name_value)
331                                 .map_err(|e| cpu::HypervisorCpuError::SetRegister(e.into()))?;
332                             return Ok(cpu::VmExit::Ignore);
333                         }
334                         _ => {}
335                     }
336 
337                     if unsafe { access_info.__bindgen_anon_1.string_op() } == 1 {
338                         panic!("String IN/OUT not supported");
339                     }
340                     if unsafe { access_info.__bindgen_anon_1.rep_prefix() } == 1 {
341                         panic!("Rep IN/OUT not supported");
342                     }
343 
344                     if is_write {
345                         let data = (info.rax as u32).to_le_bytes();
346                         if let Some(vmmops) = &self.vmmops {
347                             vmmops
348                                 .pio_write(port.into(), &data[0..len])
349                                 .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()))?;
350                         }
351                     } else {
352                         if let Some(vmmops) = &self.vmmops {
353                             vmmops
354                                 .pio_read(port.into(), &mut data[0..len])
355                                 .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()))?;
356                         }
357 
358                         let v = u32::from_le_bytes(data);
359                         /* Preserve high bits in EAX but clear out high bits in RAX */
360                         let mask = 0xffffffff >> (32 - len * 8);
361                         let eax = (info.rax as u32 & !mask) | (v & mask);
362                         ret_rax = eax as u64;
363                     }
364 
365                     let insn_len = info.header.instruction_length() as u64;
366 
367                     /* Advance RIP and update RAX */
368                     let arr_reg_name_value = [
369                         (
370                             hv_register_name::HV_X64_REGISTER_RIP,
371                             info.header.rip + insn_len,
372                         ),
373                         (hv_register_name::HV_X64_REGISTER_RAX, ret_rax),
374                     ];
375                     set_registers_64!(self.fd, arr_reg_name_value)
376                         .map_err(|e| cpu::HypervisorCpuError::SetRegister(e.into()))?;
377                     Ok(cpu::VmExit::Ignore)
378                 }
379                 hv_message_type_HVMSG_UNMAPPED_GPA => {
380                     let info = x.to_memory_info().unwrap();
381                     let insn_len = info.instruction_byte_count as usize;
382                     assert!(insn_len > 0 && insn_len <= 16);
383 
384                     let mut context = MshvEmulatorContext {
385                         vcpu: self,
386                         map: (info.guest_virtual_address, info.guest_physical_address),
387                     };
388 
389                     // Create a new emulator.
390                     let mut emul = Emulator::new(&mut context);
391 
392                     // Emulate the trapped instruction, and only the first one.
393                     let new_state = emul
394                         .emulate_first_insn(self.vp_index as usize, &info.instruction_bytes)
395                         .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()))?;
396 
397                     // Set CPU state back.
398                     context
399                         .set_cpu_state(self.vp_index as usize, new_state)
400                         .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()))?;
401 
402                     Ok(cpu::VmExit::Ignore)
403                 }
404                 hv_message_type_HVMSG_X64_CPUID_INTERCEPT => {
405                     let info = x.to_cpuid_info().unwrap();
406                     debug!("cpuid eax: {:x}", { info.rax });
407                     Ok(cpu::VmExit::Ignore)
408                 }
409                 hv_message_type_HVMSG_X64_MSR_INTERCEPT => {
410                     let info = x.to_msr_info().unwrap();
411                     if info.header.intercept_access_type == 0 {
412                         debug!("msr read: {:x}", { info.msr_number });
413                     } else {
414                         debug!("msr write: {:x}", { info.msr_number });
415                     }
416                     Ok(cpu::VmExit::Ignore)
417                 }
418                 hv_message_type_HVMSG_X64_EXCEPTION_INTERCEPT => {
419                     //TODO: Handler for VMCALL here.
420                     let info = x.to_exception_info().unwrap();
421                     debug!("Exception Info {:?}", { info.exception_vector });
422                     Ok(cpu::VmExit::Ignore)
423                 }
424                 exit => Err(cpu::HypervisorCpuError::RunVcpu(anyhow!(
425                     "Unhandled VCPU exit {:?}",
426                     exit
427                 ))),
428             },
429 
430             Err(e) => match e.errno() {
431                 libc::EAGAIN | libc::EINTR => Ok(cpu::VmExit::Ignore),
432                 _ => Err(cpu::HypervisorCpuError::RunVcpu(anyhow!(
433                     "VCPU error {:?}",
434                     e
435                 ))),
436             },
437         }
438     }
439     #[cfg(target_arch = "x86_64")]
440     ///
441     /// X86 specific call to setup the CPUID registers.
442     ///
443     fn set_cpuid2(&self, _cpuid: &CpuId) -> cpu::Result<()> {
444         Ok(())
445     }
446     #[cfg(target_arch = "x86_64")]
447     ///
448     /// X86 specific call to retrieve the CPUID registers.
449     ///
450     fn get_cpuid2(&self, _num_entries: usize) -> cpu::Result<CpuId> {
451         Ok(self.cpuid.clone())
452     }
453     #[cfg(target_arch = "x86_64")]
454     ///
455     /// Returns the state of the LAPIC (Local Advanced Programmable Interrupt Controller).
456     ///
457     fn get_lapic(&self) -> cpu::Result<LapicState> {
458         self.fd
459             .get_lapic()
460             .map_err(|e| cpu::HypervisorCpuError::GetlapicState(e.into()))
461     }
462     #[cfg(target_arch = "x86_64")]
463     ///
464     /// Sets the state of the LAPIC (Local Advanced Programmable Interrupt Controller).
465     ///
466     fn set_lapic(&self, lapic: &LapicState) -> cpu::Result<()> {
467         self.fd
468             .set_lapic(lapic)
469             .map_err(|e| cpu::HypervisorCpuError::SetLapicState(e.into()))
470     }
471     #[cfg(target_arch = "x86_64")]
472     ///
473     /// X86 specific call that returns the vcpu's current "xsave struct".
474     ///
475     fn get_xsave(&self) -> cpu::Result<Xsave> {
476         self.fd
477             .get_xsave()
478             .map_err(|e| cpu::HypervisorCpuError::GetXsaveState(e.into()))
479     }
480     #[cfg(target_arch = "x86_64")]
481     ///
482     /// X86 specific call that sets the vcpu's current "xsave struct".
483     ///
484     fn set_xsave(&self, xsave: &Xsave) -> cpu::Result<()> {
485         self.fd
486             .set_xsave(xsave)
487             .map_err(|e| cpu::HypervisorCpuError::SetXsaveState(e.into()))
488     }
489     ///
490     /// Set CPU state
491     ///
492     fn set_state(&self, state: &CpuState) -> cpu::Result<()> {
493         self.set_msrs(&state.msrs)?;
494         self.set_vcpu_events(&state.vcpu_events)?;
495         self.set_regs(&state.regs)?;
496         self.set_sregs(&state.sregs)?;
497         self.set_fpu(&state.fpu)?;
498         self.set_xcrs(&state.xcrs)?;
499         self.set_lapic(&state.lapic)?;
500         self.set_xsave(&state.xsave)?;
501         self.fd
502             .set_debug_regs(&state.dbg)
503             .map_err(|e| cpu::HypervisorCpuError::SetDebugRegs(e.into()))?;
504         Ok(())
505     }
506     ///
507     /// Get CPU State
508     ///
509     fn state(&self) -> cpu::Result<CpuState> {
510         let regs = self.get_regs()?;
511         let sregs = self.get_sregs()?;
512         let xcrs = self.get_xcrs()?;
513         let fpu = self.get_fpu()?;
514         let vcpu_events = self.get_vcpu_events()?;
515         let mut msrs = self.msrs.clone();
516         self.get_msrs(&mut msrs)?;
517         let lapic = self.get_lapic()?;
518         let xsave = self.get_xsave()?;
519         let dbg = self
520             .fd
521             .get_debug_regs()
522             .map_err(|e| cpu::HypervisorCpuError::GetDebugRegs(e.into()))?;
523         Ok(CpuState {
524             msrs,
525             vcpu_events,
526             regs,
527             sregs,
528             fpu,
529             xcrs,
530             lapic,
531             dbg,
532             xsave,
533         })
534     }
535     #[cfg(target_arch = "x86_64")]
536     ///
537     /// Translate guest virtual address to guest physical address
538     ///
539     fn translate_gva(&self, gva: u64, flags: u64) -> cpu::Result<(u64, hv_translate_gva_result)> {
540         let r = self
541             .fd
542             .translate_gva(gva, flags)
543             .map_err(|e| cpu::HypervisorCpuError::TranslateVirtualAddress(e.into()))?;
544 
545         Ok(r)
546     }
547     #[cfg(target_arch = "x86_64")]
548     ///
549     /// X86 specific call that returns the vcpu's current "suspend registers".
550     ///
551     fn get_suspend_regs(&self) -> cpu::Result<SuspendRegisters> {
552         self.fd
553             .get_suspend_regs()
554             .map_err(|e| cpu::HypervisorCpuError::GetSuspendRegs(e.into()))
555     }
556 }
557 
558 struct MshvEmulatorContext<'a> {
559     vcpu: &'a MshvVcpu,
560     map: (u64, u64), // Initial GVA to GPA mapping provided by the hypervisor
561 }
562 
563 impl<'a> MshvEmulatorContext<'a> {
564     // Do the actual gva -> gpa translation
565     #[allow(non_upper_case_globals)]
566     fn translate(&self, gva: u64) -> Result<u64, PlatformError> {
567         if self.map.0 == gva {
568             return Ok(self.map.1);
569         }
570 
571         // TODO: More fine-grained control for the flags
572         let flags = HV_TRANSLATE_GVA_VALIDATE_READ | HV_TRANSLATE_GVA_VALIDATE_WRITE;
573 
574         let r = self
575             .vcpu
576             .translate_gva(gva, flags.into())
577             .map_err(|e| PlatformError::TranslateVirtualAddress(anyhow!(e)))?;
578 
579         let result_code = unsafe { r.1.__bindgen_anon_1.result_code };
580         match result_code {
581             hv_translate_gva_result_code_HV_TRANSLATE_GVA_SUCCESS => Ok(r.0),
582             _ => Err(PlatformError::TranslateVirtualAddress(anyhow!(result_code))),
583         }
584     }
585 }
586 
587 /// Platform emulation for Hyper-V
588 impl<'a> PlatformEmulator for MshvEmulatorContext<'a> {
589     type CpuState = EmulatorCpuState;
590 
591     fn read_memory(&self, gva: u64, data: &mut [u8]) -> Result<(), PlatformError> {
592         let gpa = self.translate(gva)?;
593         debug!(
594             "mshv emulator: memory read {} bytes from [{:#x} -> {:#x}]",
595             data.len(),
596             gva,
597             gpa
598         );
599 
600         if let Some(vmmops) = &self.vcpu.vmmops {
601             if vmmops.guest_mem_read(gpa, data).is_err() {
602                 vmmops
603                     .mmio_read(gpa, data)
604                     .map_err(|e| PlatformError::MemoryReadFailure(e.into()))?;
605             }
606         }
607 
608         Ok(())
609     }
610 
611     fn write_memory(&mut self, gva: u64, data: &[u8]) -> Result<(), PlatformError> {
612         let gpa = self.translate(gva)?;
613         debug!(
614             "mshv emulator: memory write {} bytes at [{:#x} -> {:#x}]",
615             data.len(),
616             gva,
617             gpa
618         );
619 
620         if let Some(vmmops) = &self.vcpu.vmmops {
621             if vmmops.guest_mem_write(gpa, data).is_err() {
622                 vmmops
623                     .mmio_write(gpa, data)
624                     .map_err(|e| PlatformError::MemoryWriteFailure(e.into()))?;
625             }
626         }
627 
628         Ok(())
629     }
630 
631     fn cpu_state(&self, cpu_id: usize) -> Result<Self::CpuState, PlatformError> {
632         if cpu_id != self.vcpu.vp_index as usize {
633             return Err(PlatformError::GetCpuStateFailure(anyhow!(
634                 "CPU id mismatch {:?} {:?}",
635                 cpu_id,
636                 self.vcpu.vp_index
637             )));
638         }
639 
640         let regs = self
641             .vcpu
642             .get_regs()
643             .map_err(|e| PlatformError::GetCpuStateFailure(e.into()))?;
644         let sregs = self
645             .vcpu
646             .get_sregs()
647             .map_err(|e| PlatformError::GetCpuStateFailure(e.into()))?;
648 
649         debug!("mshv emulator: Getting new CPU state");
650         debug!("mshv emulator: {:#x?}", regs);
651 
652         Ok(EmulatorCpuState { regs, sregs })
653     }
654 
655     fn set_cpu_state(&self, cpu_id: usize, state: Self::CpuState) -> Result<(), PlatformError> {
656         if cpu_id != self.vcpu.vp_index as usize {
657             return Err(PlatformError::SetCpuStateFailure(anyhow!(
658                 "CPU id mismatch {:?} {:?}",
659                 cpu_id,
660                 self.vcpu.vp_index
661             )));
662         }
663 
664         debug!("mshv emulator: Setting new CPU state");
665         debug!("mshv emulator: {:#x?}", state.regs);
666 
667         self.vcpu
668             .set_regs(&state.regs)
669             .map_err(|e| PlatformError::SetCpuStateFailure(e.into()))?;
670         self.vcpu
671             .set_sregs(&state.sregs)
672             .map_err(|e| PlatformError::SetCpuStateFailure(e.into()))
673     }
674 
675     fn gva_to_gpa(&self, gva: u64) -> Result<u64, PlatformError> {
676         self.translate(gva)
677     }
678 
679     fn fetch(&self, _ip: u64, _instruction_bytes: &mut [u8]) -> Result<(), PlatformError> {
680         Err(PlatformError::MemoryReadFailure(anyhow!("unimplemented")))
681     }
682 }
683 
684 #[allow(dead_code)]
685 /// Wrapper over Mshv VM ioctls.
686 pub struct MshvVm {
687     fd: Arc<VmFd>,
688     msrs: MsrEntries,
689     // Hypervisor State
690     hv_state: Arc<RwLock<HvState>>,
691     vmmops: Option<Arc<dyn vm::VmmOps>>,
692     dirty_log_slots: Arc<RwLock<HashMap<u64, MshvDirtyLogSlot>>>,
693 }
694 
695 fn hv_state_init() -> Arc<RwLock<HvState>> {
696     Arc::new(RwLock::new(HvState { hypercall_page: 0 }))
697 }
698 
699 ///
700 /// Implementation of Vm trait for Mshv
701 /// Example:
702 /// #[cfg(feature = "mshv")]
703 /// # extern crate hypervisor;
704 /// # use hypervisor::MshvHypervisor;
705 /// let mshv = MshvHypervisor::new().unwrap();
706 /// let hypervisor: Arc<dyn hypervisor::Hypervisor> = Arc::new(mshv);
707 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed");
708 /// vm.set/get().unwrap()
709 ///
710 impl vm::Vm for MshvVm {
711     #[cfg(target_arch = "x86_64")]
712     ///
713     /// Sets the address of the three-page region in the VM's address space.
714     ///
715     fn set_tss_address(&self, _offset: usize) -> vm::Result<()> {
716         Ok(())
717     }
718     ///
719     /// Creates an in-kernel interrupt controller.
720     ///
721     fn create_irq_chip(&self) -> vm::Result<()> {
722         Ok(())
723     }
724     ///
725     /// Registers an event that will, when signaled, trigger the `gsi` IRQ.
726     ///
727     fn register_irqfd(&self, fd: &EventFd, gsi: u32) -> vm::Result<()> {
728         debug!("register_irqfd fd {} gsi {}", fd.as_raw_fd(), gsi);
729 
730         self.fd
731             .register_irqfd(fd, gsi)
732             .map_err(|e| vm::HypervisorVmError::RegisterIrqFd(e.into()))?;
733 
734         Ok(())
735     }
736     ///
737     /// Unregisters an event that will, when signaled, trigger the `gsi` IRQ.
738     ///
739     fn unregister_irqfd(&self, fd: &EventFd, gsi: u32) -> vm::Result<()> {
740         debug!("unregister_irqfd fd {} gsi {}", fd.as_raw_fd(), gsi);
741 
742         self.fd
743             .unregister_irqfd(fd, gsi)
744             .map_err(|e| vm::HypervisorVmError::UnregisterIrqFd(e.into()))?;
745 
746         Ok(())
747     }
748     ///
749     /// Creates a VcpuFd object from a vcpu RawFd.
750     ///
751     fn create_vcpu(
752         &self,
753         id: u8,
754         vmmops: Option<Arc<dyn VmmOps>>,
755     ) -> vm::Result<Arc<dyn cpu::Vcpu>> {
756         let vcpu_fd = self
757             .fd
758             .create_vcpu(id)
759             .map_err(|e| vm::HypervisorVmError::CreateVcpu(e.into()))?;
760         let vcpu = MshvVcpu {
761             fd: vcpu_fd,
762             vp_index: id,
763             cpuid: CpuId::new(1).unwrap(),
764             msrs: self.msrs.clone(),
765             hv_state: self.hv_state.clone(),
766             vmmops,
767         };
768         Ok(Arc::new(vcpu))
769     }
770     #[cfg(target_arch = "x86_64")]
771     fn enable_split_irq(&self) -> vm::Result<()> {
772         Ok(())
773     }
774     #[cfg(target_arch = "x86_64")]
775     fn enable_sgx_attribute(&self, _file: File) -> vm::Result<()> {
776         Ok(())
777     }
778     fn register_ioevent(
779         &self,
780         fd: &EventFd,
781         addr: &IoEventAddress,
782         datamatch: Option<DataMatch>,
783     ) -> vm::Result<()> {
784         debug!(
785             "register_ioevent fd {} addr {:x?} datamatch {:?}",
786             fd.as_raw_fd(),
787             addr,
788             datamatch
789         );
790         if let Some(dm) = datamatch {
791             match dm {
792                 vm::DataMatch::DataMatch32(mshv_dm32) => self
793                     .fd
794                     .register_ioevent(fd, addr, mshv_dm32)
795                     .map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into())),
796                 vm::DataMatch::DataMatch64(mshv_dm64) => self
797                     .fd
798                     .register_ioevent(fd, addr, mshv_dm64)
799                     .map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into())),
800             }
801         } else {
802             self.fd
803                 .register_ioevent(fd, addr, NoDatamatch)
804                 .map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into()))
805         }
806     }
807     /// Unregister an event from a certain address it has been previously registered to.
808     fn unregister_ioevent(&self, fd: &EventFd, addr: &IoEventAddress) -> vm::Result<()> {
809         debug!("unregister_ioevent fd {} addr {:x?}", fd.as_raw_fd(), addr);
810 
811         self.fd
812             .unregister_ioevent(fd, addr, NoDatamatch)
813             .map_err(|e| vm::HypervisorVmError::UnregisterIoEvent(e.into()))
814     }
815 
816     /// Creates a guest physical memory region.
817     fn create_user_memory_region(&self, user_memory_region: MemoryRegion) -> vm::Result<()> {
818         // No matter read only or not we keep track the slots.
819         // For readonly hypervisor can enable the dirty bits,
820         // but a VM exit happens before setting the dirty bits
821         self.dirty_log_slots.write().unwrap().insert(
822             user_memory_region.guest_pfn,
823             MshvDirtyLogSlot {
824                 guest_pfn: user_memory_region.guest_pfn,
825                 memory_size: user_memory_region.size,
826             },
827         );
828 
829         self.fd
830             .map_user_memory(user_memory_region)
831             .map_err(|e| vm::HypervisorVmError::CreateUserMemory(e.into()))?;
832         Ok(())
833     }
834 
835     /// Removes a guest physical memory region.
836     fn remove_user_memory_region(&self, user_memory_region: MemoryRegion) -> vm::Result<()> {
837         // Remove the corresponding entry from "self.dirty_log_slots" if needed
838         self.dirty_log_slots
839             .write()
840             .unwrap()
841             .remove(&user_memory_region.guest_pfn);
842 
843         self.fd
844             .unmap_user_memory(user_memory_region)
845             .map_err(|e| vm::HypervisorVmError::RemoveUserMemory(e.into()))?;
846         Ok(())
847     }
848 
849     fn make_user_memory_region(
850         &self,
851         _slot: u32,
852         guest_phys_addr: u64,
853         memory_size: u64,
854         userspace_addr: u64,
855         readonly: bool,
856         _log_dirty_pages: bool,
857     ) -> MemoryRegion {
858         let mut flags = HV_MAP_GPA_READABLE | HV_MAP_GPA_EXECUTABLE;
859         if !readonly {
860             flags |= HV_MAP_GPA_WRITABLE;
861         }
862 
863         mshv_user_mem_region {
864             flags,
865             guest_pfn: guest_phys_addr >> PAGE_SHIFT,
866             size: memory_size,
867             userspace_addr: userspace_addr as u64,
868         }
869     }
870 
871     fn create_passthrough_device(&self) -> vm::Result<Arc<dyn device::Device>> {
872         Err(vm::HypervisorVmError::CreatePassthroughDevice(anyhow!(
873             "No passthrough support"
874         )))
875     }
876 
877     fn set_gsi_routing(&self, entries: &[IrqRoutingEntry]) -> vm::Result<()> {
878         let mut msi_routing =
879             vec_with_array_field::<mshv_msi_routing, mshv_msi_routing_entry>(entries.len());
880         msi_routing[0].nr = entries.len() as u32;
881 
882         unsafe {
883             let entries_slice: &mut [mshv_msi_routing_entry] =
884                 msi_routing[0].entries.as_mut_slice(entries.len());
885             entries_slice.copy_from_slice(entries);
886         }
887 
888         self.fd
889             .set_msi_routing(&msi_routing[0])
890             .map_err(|e| vm::HypervisorVmError::SetGsiRouting(e.into()))
891     }
892     ///
893     /// Get the Vm state. Return VM specific data
894     ///
895     fn state(&self) -> vm::Result<VmState> {
896         Ok(*self.hv_state.read().unwrap())
897     }
898     ///
899     /// Set the VM state
900     ///
901     fn set_state(&self, state: VmState) -> vm::Result<()> {
902         self.hv_state.write().unwrap().hypercall_page = state.hypercall_page;
903         Ok(())
904     }
905     ///
906     /// Start logging dirty pages
907     ///
908     fn start_dirty_log(&self) -> vm::Result<()> {
909         self.fd
910             .enable_dirty_page_tracking()
911             .map_err(|e| vm::HypervisorVmError::StartDirtyLog(e.into()))
912     }
913     ///
914     /// Stop logging dirty pages
915     ///
916     fn stop_dirty_log(&self) -> vm::Result<()> {
917         let dirty_log_slots = self.dirty_log_slots.read().unwrap();
918         // Before disabling the dirty page tracking we need
919         // to set the dirty bits in the Hypervisor
920         // This is a requirement from Microsoft Hypervisor
921         for (_, s) in dirty_log_slots.iter() {
922             self.fd
923                 .get_dirty_log(s.guest_pfn, s.memory_size as usize, DIRTY_BITMAP_SET_DIRTY)
924                 .map_err(|e| vm::HypervisorVmError::StartDirtyLog(e.into()))?;
925         }
926         self.fd
927             .disable_dirty_page_tracking()
928             .map_err(|e| vm::HypervisorVmError::StartDirtyLog(e.into()))?;
929         Ok(())
930     }
931     ///
932     /// Get dirty pages bitmap (one bit per page)
933     ///
934     fn get_dirty_log(&self, _slot: u32, base_gpa: u64, memory_size: u64) -> vm::Result<Vec<u64>> {
935         self.fd
936             .get_dirty_log(
937                 base_gpa >> PAGE_SHIFT,
938                 memory_size as usize,
939                 DIRTY_BITMAP_CLEAR_DIRTY,
940             )
941             .map_err(|e| vm::HypervisorVmError::GetDirtyLog(e.into()))
942     }
943 }
944 pub use hv_cpuid_entry as CpuIdEntry;
945 
946 pub type IrqRoutingEntry = mshv_msi_routing_entry;
947 
948 pub const CPUID_FLAG_VALID_INDEX: u32 = 0;
949