xref: /cloud-hypervisor/hypervisor/src/mshv/mod.rs (revision f67b3f79ea19c9a66e04074cbbf5d292f6529e43)
1 // SPDX-License-Identifier: Apache-2.0 OR BSD-3-Clause
2 //
3 // Copyright © 2020, Microsoft Corporation
4 //
5 
6 use crate::arch::emulator::{PlatformEmulator, PlatformError};
7 
8 #[cfg(target_arch = "x86_64")]
9 use crate::arch::x86::emulator::{Emulator, EmulatorCpuState};
10 use crate::cpu;
11 use crate::cpu::Vcpu;
12 use crate::hypervisor;
13 use crate::vec_with_array_field;
14 use crate::vm::{self, VmmOps};
15 pub use mshv_bindings::*;
16 pub use mshv_ioctls::IoEventAddress;
17 use mshv_ioctls::{set_registers_64, Mshv, NoDatamatch, VcpuFd, VmFd};
18 use serde_derive::{Deserialize, Serialize};
19 use std::collections::HashMap;
20 use std::sync::{Arc, RwLock};
21 use vm::DataMatch;
22 // x86_64 dependencies
23 #[cfg(target_arch = "x86_64")]
24 pub mod x86_64;
25 use crate::device;
26 use vmm_sys_util::eventfd::EventFd;
27 #[cfg(target_arch = "x86_64")]
28 pub use x86_64::VcpuMshvState as CpuState;
29 #[cfg(target_arch = "x86_64")]
30 pub use x86_64::*;
31 
32 #[cfg(target_arch = "x86_64")]
33 use std::fs::File;
34 use std::os::unix::io::{AsRawFd, RawFd};
35 
36 const DIRTY_BITMAP_CLEAR_DIRTY: u64 = 0x4;
37 const DIRTY_BITMAP_SET_DIRTY: u64 = 0x8;
38 
39 ///
40 /// Export generically-named wrappers of mshv-bindings for Unix-based platforms
41 ///
42 pub use {
43     mshv_bindings::mshv_create_device as CreateDevice,
44     mshv_bindings::mshv_device_attr as DeviceAttr,
45     mshv_bindings::mshv_msi_routing_entry as IrqRoutingEntry, mshv_ioctls::DeviceFd,
46 };
47 
48 pub const PAGE_SHIFT: usize = 12;
49 
50 #[derive(Debug, Default, Copy, Clone, Serialize, Deserialize)]
51 pub struct HvState {
52     hypercall_page: u64,
53 }
54 
55 pub use HvState as VmState;
56 
57 struct MshvDirtyLogSlot {
58     guest_pfn: u64,
59     memory_size: u64,
60 }
61 
62 /// Wrapper over mshv system ioctls.
63 pub struct MshvHypervisor {
64     mshv: Mshv,
65 }
66 
67 impl MshvHypervisor {
68     /// Create a hypervisor based on Mshv
69     pub fn new() -> hypervisor::Result<MshvHypervisor> {
70         let mshv_obj =
71             Mshv::new().map_err(|e| hypervisor::HypervisorError::HypervisorCreate(e.into()))?;
72         Ok(MshvHypervisor { mshv: mshv_obj })
73     }
74 }
75 /// Implementation of Hypervisor trait for Mshv
76 /// Example:
77 /// #[cfg(feature = "mshv")]
78 /// extern crate hypervisor
79 /// let mshv = hypervisor::mshv::MshvHypervisor::new().unwrap();
80 /// let hypervisor: Arc<dyn hypervisor::Hypervisor> = Arc::new(mshv);
81 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed");
82 ///
83 impl hypervisor::Hypervisor for MshvHypervisor {
84     /// Create a mshv vm object and return the object as Vm trait object
85     /// Example
86     /// # extern crate hypervisor;
87     /// # use hypervisor::MshvHypervisor;
88     /// use hypervisor::MshvVm;
89     /// let hypervisor = MshvHypervisor::new().unwrap();
90     /// let vm = hypervisor.create_vm().unwrap()
91     ///
92     fn create_vm(&self) -> hypervisor::Result<Arc<dyn vm::Vm>> {
93         let fd: VmFd;
94         loop {
95             match self.mshv.create_vm() {
96                 Ok(res) => fd = res,
97                 Err(e) => {
98                     if e.errno() == libc::EINTR {
99                         // If the error returned is EINTR, which means the
100                         // ioctl has been interrupted, we have to retry as
101                         // this can't be considered as a regular error.
102                         continue;
103                     } else {
104                         return Err(hypervisor::HypervisorError::VmCreate(e.into()));
105                     }
106                 }
107             }
108             break;
109         }
110 
111         let msr_list = self.get_msr_list()?;
112         let num_msrs = msr_list.as_fam_struct_ref().nmsrs as usize;
113         let mut msrs = MsrEntries::new(num_msrs).unwrap();
114         let indices = msr_list.as_slice();
115         let msr_entries = msrs.as_mut_slice();
116         for (pos, index) in indices.iter().enumerate() {
117             msr_entries[pos].index = *index;
118         }
119         let vm_fd = Arc::new(fd);
120 
121         Ok(Arc::new(MshvVm {
122             fd: vm_fd,
123             msrs,
124             hv_state: hv_state_init(),
125             vmmops: None,
126             dirty_log_slots: Arc::new(RwLock::new(HashMap::new())),
127         }))
128     }
129     ///
130     /// Get the supported CpuID
131     ///
132     fn get_cpuid(&self) -> hypervisor::Result<CpuId> {
133         Ok(CpuId::new(1).unwrap())
134     }
135     #[cfg(target_arch = "x86_64")]
136     ///
137     /// Retrieve the list of MSRs supported by KVM.
138     ///
139     fn get_msr_list(&self) -> hypervisor::Result<MsrList> {
140         self.mshv
141             .get_msr_index_list()
142             .map_err(|e| hypervisor::HypervisorError::GetMsrList(e.into()))
143     }
144 }
145 
146 #[allow(dead_code)]
147 /// Vcpu struct for Microsoft Hypervisor
148 pub struct MshvVcpu {
149     fd: VcpuFd,
150     vp_index: u8,
151     cpuid: CpuId,
152     msrs: MsrEntries,
153     hv_state: Arc<RwLock<HvState>>, // Mshv State
154     vmmops: Option<Arc<dyn vm::VmmOps>>,
155 }
156 
157 /// Implementation of Vcpu trait for Microsoft Hypervisor
158 /// Example:
159 /// #[cfg(feature = "mshv")]
160 /// extern crate hypervisor
161 /// let mshv = hypervisor::mshv::MshvHypervisor::new().unwrap();
162 /// let hypervisor: Arc<dyn hypervisor::Hypervisor> = Arc::new(mshv);
163 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed");
164 /// let vcpu = vm.create_vcpu(0).unwrap();
165 /// vcpu.get/set().unwrap()
166 ///
167 impl cpu::Vcpu for MshvVcpu {
168     #[cfg(target_arch = "x86_64")]
169     ///
170     /// Returns the vCPU general purpose registers.
171     ///
172     fn get_regs(&self) -> cpu::Result<StandardRegisters> {
173         self.fd
174             .get_regs()
175             .map_err(|e| cpu::HypervisorCpuError::GetStandardRegs(e.into()))
176     }
177     #[cfg(target_arch = "x86_64")]
178     ///
179     /// Sets the vCPU general purpose registers.
180     ///
181     fn set_regs(&self, regs: &StandardRegisters) -> cpu::Result<()> {
182         self.fd
183             .set_regs(regs)
184             .map_err(|e| cpu::HypervisorCpuError::SetStandardRegs(e.into()))
185     }
186     #[cfg(target_arch = "x86_64")]
187     ///
188     /// Returns the vCPU special registers.
189     ///
190     fn get_sregs(&self) -> cpu::Result<SpecialRegisters> {
191         self.fd
192             .get_sregs()
193             .map_err(|e| cpu::HypervisorCpuError::GetSpecialRegs(e.into()))
194     }
195     #[cfg(target_arch = "x86_64")]
196     ///
197     /// Sets the vCPU special registers.
198     ///
199     fn set_sregs(&self, sregs: &SpecialRegisters) -> cpu::Result<()> {
200         self.fd
201             .set_sregs(sregs)
202             .map_err(|e| cpu::HypervisorCpuError::SetSpecialRegs(e.into()))
203     }
204     #[cfg(target_arch = "x86_64")]
205     ///
206     /// Returns the floating point state (FPU) from the vCPU.
207     ///
208     fn get_fpu(&self) -> cpu::Result<FpuState> {
209         self.fd
210             .get_fpu()
211             .map_err(|e| cpu::HypervisorCpuError::GetFloatingPointRegs(e.into()))
212     }
213     #[cfg(target_arch = "x86_64")]
214     ///
215     /// Set the floating point state (FPU) of a vCPU.
216     ///
217     fn set_fpu(&self, fpu: &FpuState) -> cpu::Result<()> {
218         self.fd
219             .set_fpu(fpu)
220             .map_err(|e| cpu::HypervisorCpuError::SetFloatingPointRegs(e.into()))
221     }
222 
223     #[cfg(target_arch = "x86_64")]
224     ///
225     /// Returns the model-specific registers (MSR) for this vCPU.
226     ///
227     fn get_msrs(&self, msrs: &mut MsrEntries) -> cpu::Result<usize> {
228         self.fd
229             .get_msrs(msrs)
230             .map_err(|e| cpu::HypervisorCpuError::GetMsrEntries(e.into()))
231     }
232     #[cfg(target_arch = "x86_64")]
233     ///
234     /// Setup the model-specific registers (MSR) for this vCPU.
235     /// Returns the number of MSR entries actually written.
236     ///
237     fn set_msrs(&self, msrs: &MsrEntries) -> cpu::Result<usize> {
238         self.fd
239             .set_msrs(msrs)
240             .map_err(|e| cpu::HypervisorCpuError::SetMsrEntries(e.into()))
241     }
242 
243     #[cfg(target_arch = "x86_64")]
244     ///
245     /// X86 specific call that returns the vcpu's current "xcrs".
246     ///
247     fn get_xcrs(&self) -> cpu::Result<ExtendedControlRegisters> {
248         self.fd
249             .get_xcrs()
250             .map_err(|e| cpu::HypervisorCpuError::GetXcsr(e.into()))
251     }
252     #[cfg(target_arch = "x86_64")]
253     ///
254     /// X86 specific call that sets the vcpu's current "xcrs".
255     ///
256     fn set_xcrs(&self, xcrs: &ExtendedControlRegisters) -> cpu::Result<()> {
257         self.fd
258             .set_xcrs(xcrs)
259             .map_err(|e| cpu::HypervisorCpuError::SetXcsr(e.into()))
260     }
261     #[cfg(target_arch = "x86_64")]
262     ///
263     /// Returns currently pending exceptions, interrupts, and NMIs as well as related
264     /// states of the vcpu.
265     ///
266     fn get_vcpu_events(&self) -> cpu::Result<VcpuEvents> {
267         self.fd
268             .get_vcpu_events()
269             .map_err(|e| cpu::HypervisorCpuError::GetVcpuEvents(e.into()))
270     }
271     #[cfg(target_arch = "x86_64")]
272     ///
273     /// Sets pending exceptions, interrupts, and NMIs as well as related states
274     /// of the vcpu.
275     ///
276     fn set_vcpu_events(&self, events: &VcpuEvents) -> cpu::Result<()> {
277         self.fd
278             .set_vcpu_events(events)
279             .map_err(|e| cpu::HypervisorCpuError::SetVcpuEvents(e.into()))
280     }
281     #[cfg(target_arch = "x86_64")]
282     ///
283     /// X86 specific call to enable HyperV SynIC
284     ///
285     fn enable_hyperv_synic(&self) -> cpu::Result<()> {
286         /* We always have SynIC enabled on MSHV */
287         Ok(())
288     }
289     #[allow(non_upper_case_globals)]
290     fn run(&self) -> std::result::Result<cpu::VmExit, cpu::HypervisorCpuError> {
291         // Safe because this is just only done during initialization.
292         // TODO don't zero it everytime we enter this function.
293         let hv_message: hv_message = unsafe { std::mem::zeroed() };
294         match self.fd.run(hv_message) {
295             Ok(x) => match x.header.message_type {
296                 hv_message_type_HVMSG_X64_HALT => {
297                     debug!("HALT");
298                     Ok(cpu::VmExit::Reset)
299                 }
300                 hv_message_type_HVMSG_UNRECOVERABLE_EXCEPTION => {
301                     warn!("TRIPLE FAULT");
302                     Ok(cpu::VmExit::Shutdown)
303                 }
304                 hv_message_type_HVMSG_X64_IO_PORT_INTERCEPT => {
305                     let info = x.to_ioport_info().unwrap();
306                     let access_info = info.access_info;
307                     let len = unsafe { access_info.__bindgen_anon_1.access_size() } as usize;
308                     let is_write = info.header.intercept_access_type == 1;
309                     let port = info.port_number;
310                     let mut data: [u8; 4] = [0; 4];
311                     let mut ret_rax = info.rax;
312 
313                     /*
314                      * XXX: Ignore QEMU fw_cfg (0x5xx) and debug console (0x402) ports.
315                      *
316                      * Cloud Hypervisor doesn't support fw_cfg at the moment. It does support 0x402
317                      * under the "fwdebug" feature flag. But that feature is not enabled by default
318                      * and is considered legacy.
319                      *
320                      * OVMF unconditionally pokes these IO ports with string IO.
321                      *
322                      * Instead of trying to implement string IO support now which does not do much
323                      * now, skip those ports explicitly to avoid panicking.
324                      *
325                      * Proper string IO support can be added once we gain the ability to translate
326                      * guest virtual addresses to guest physical addresses on MSHV.
327                      */
328                     match port {
329                         0x402 | 0x510 | 0x511 | 0x514 => {
330                             let insn_len = info.header.instruction_length() as u64;
331 
332                             /* Advance RIP and update RAX */
333                             let arr_reg_name_value = [
334                                 (
335                                     hv_register_name::HV_X64_REGISTER_RIP,
336                                     info.header.rip + insn_len,
337                                 ),
338                                 (hv_register_name::HV_X64_REGISTER_RAX, ret_rax),
339                             ];
340                             set_registers_64!(self.fd, arr_reg_name_value)
341                                 .map_err(|e| cpu::HypervisorCpuError::SetRegister(e.into()))?;
342                             return Ok(cpu::VmExit::Ignore);
343                         }
344                         _ => {}
345                     }
346 
347                     if unsafe { access_info.__bindgen_anon_1.string_op() } == 1 {
348                         panic!("String IN/OUT not supported");
349                     }
350                     if unsafe { access_info.__bindgen_anon_1.rep_prefix() } == 1 {
351                         panic!("Rep IN/OUT not supported");
352                     }
353 
354                     if is_write {
355                         let data = (info.rax as u32).to_le_bytes();
356                         if let Some(vmmops) = &self.vmmops {
357                             vmmops
358                                 .pio_write(port.into(), &data[0..len])
359                                 .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()))?;
360                         }
361                     } else {
362                         if let Some(vmmops) = &self.vmmops {
363                             vmmops
364                                 .pio_read(port.into(), &mut data[0..len])
365                                 .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()))?;
366                         }
367 
368                         let v = u32::from_le_bytes(data);
369                         /* Preserve high bits in EAX but clear out high bits in RAX */
370                         let mask = 0xffffffff >> (32 - len * 8);
371                         let eax = (info.rax as u32 & !mask) | (v & mask);
372                         ret_rax = eax as u64;
373                     }
374 
375                     let insn_len = info.header.instruction_length() as u64;
376 
377                     /* Advance RIP and update RAX */
378                     let arr_reg_name_value = [
379                         (
380                             hv_register_name::HV_X64_REGISTER_RIP,
381                             info.header.rip + insn_len,
382                         ),
383                         (hv_register_name::HV_X64_REGISTER_RAX, ret_rax),
384                     ];
385                     set_registers_64!(self.fd, arr_reg_name_value)
386                         .map_err(|e| cpu::HypervisorCpuError::SetRegister(e.into()))?;
387                     Ok(cpu::VmExit::Ignore)
388                 }
389                 hv_message_type_HVMSG_UNMAPPED_GPA => {
390                     let info = x.to_memory_info().unwrap();
391                     let insn_len = info.instruction_byte_count as usize;
392                     assert!(insn_len > 0 && insn_len <= 16);
393 
394                     let mut context = MshvEmulatorContext {
395                         vcpu: self,
396                         map: (info.guest_virtual_address, info.guest_physical_address),
397                     };
398 
399                     // Create a new emulator.
400                     let mut emul = Emulator::new(&mut context);
401 
402                     // Emulate the trapped instruction, and only the first one.
403                     let new_state = emul
404                         .emulate_first_insn(self.vp_index as usize, &info.instruction_bytes)
405                         .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()))?;
406 
407                     // Set CPU state back.
408                     context
409                         .set_cpu_state(self.vp_index as usize, new_state)
410                         .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()))?;
411 
412                     Ok(cpu::VmExit::Ignore)
413                 }
414                 hv_message_type_HVMSG_X64_CPUID_INTERCEPT => {
415                     let info = x.to_cpuid_info().unwrap();
416                     debug!("cpuid eax: {:x}", { info.rax });
417                     Ok(cpu::VmExit::Ignore)
418                 }
419                 hv_message_type_HVMSG_X64_MSR_INTERCEPT => {
420                     let info = x.to_msr_info().unwrap();
421                     if info.header.intercept_access_type == 0 {
422                         debug!("msr read: {:x}", { info.msr_number });
423                     } else {
424                         debug!("msr write: {:x}", { info.msr_number });
425                     }
426                     Ok(cpu::VmExit::Ignore)
427                 }
428                 hv_message_type_HVMSG_X64_EXCEPTION_INTERCEPT => {
429                     //TODO: Handler for VMCALL here.
430                     let info = x.to_exception_info().unwrap();
431                     debug!("Exception Info {:?}", { info.exception_vector });
432                     Ok(cpu::VmExit::Ignore)
433                 }
434                 exit => Err(cpu::HypervisorCpuError::RunVcpu(anyhow!(
435                     "Unhandled VCPU exit {:?}",
436                     exit
437                 ))),
438             },
439 
440             Err(e) => match e.errno() {
441                 libc::EAGAIN | libc::EINTR => Ok(cpu::VmExit::Ignore),
442                 _ => Err(cpu::HypervisorCpuError::RunVcpu(anyhow!(
443                     "VCPU error {:?}",
444                     e
445                 ))),
446             },
447         }
448     }
449     #[cfg(target_arch = "x86_64")]
450     ///
451     /// X86 specific call to setup the CPUID registers.
452     ///
453     fn set_cpuid2(&self, _cpuid: &CpuId) -> cpu::Result<()> {
454         Ok(())
455     }
456     #[cfg(target_arch = "x86_64")]
457     ///
458     /// X86 specific call to retrieve the CPUID registers.
459     ///
460     fn get_cpuid2(&self, _num_entries: usize) -> cpu::Result<CpuId> {
461         Ok(self.cpuid.clone())
462     }
463     #[cfg(target_arch = "x86_64")]
464     ///
465     /// Returns the state of the LAPIC (Local Advanced Programmable Interrupt Controller).
466     ///
467     fn get_lapic(&self) -> cpu::Result<LapicState> {
468         self.fd
469             .get_lapic()
470             .map_err(|e| cpu::HypervisorCpuError::GetlapicState(e.into()))
471     }
472     #[cfg(target_arch = "x86_64")]
473     ///
474     /// Sets the state of the LAPIC (Local Advanced Programmable Interrupt Controller).
475     ///
476     fn set_lapic(&self, lapic: &LapicState) -> cpu::Result<()> {
477         self.fd
478             .set_lapic(lapic)
479             .map_err(|e| cpu::HypervisorCpuError::SetLapicState(e.into()))
480     }
481     #[cfg(target_arch = "x86_64")]
482     ///
483     /// X86 specific call that returns the vcpu's current "xsave struct".
484     ///
485     fn get_xsave(&self) -> cpu::Result<Xsave> {
486         self.fd
487             .get_xsave()
488             .map_err(|e| cpu::HypervisorCpuError::GetXsaveState(e.into()))
489     }
490     #[cfg(target_arch = "x86_64")]
491     ///
492     /// X86 specific call that sets the vcpu's current "xsave struct".
493     ///
494     fn set_xsave(&self, xsave: &Xsave) -> cpu::Result<()> {
495         self.fd
496             .set_xsave(xsave)
497             .map_err(|e| cpu::HypervisorCpuError::SetXsaveState(e.into()))
498     }
499     ///
500     /// Set CPU state
501     ///
502     fn set_state(&self, state: &CpuState) -> cpu::Result<()> {
503         self.set_msrs(&state.msrs)?;
504         self.set_vcpu_events(&state.vcpu_events)?;
505         self.set_regs(&state.regs)?;
506         self.set_sregs(&state.sregs)?;
507         self.set_fpu(&state.fpu)?;
508         self.set_xcrs(&state.xcrs)?;
509         self.set_lapic(&state.lapic)?;
510         self.set_xsave(&state.xsave)?;
511         self.fd
512             .set_debug_regs(&state.dbg)
513             .map_err(|e| cpu::HypervisorCpuError::SetDebugRegs(e.into()))?;
514         Ok(())
515     }
516     ///
517     /// Get CPU State
518     ///
519     fn state(&self) -> cpu::Result<CpuState> {
520         let regs = self.get_regs()?;
521         let sregs = self.get_sregs()?;
522         let xcrs = self.get_xcrs()?;
523         let fpu = self.get_fpu()?;
524         let vcpu_events = self.get_vcpu_events()?;
525         let mut msrs = self.msrs.clone();
526         self.get_msrs(&mut msrs)?;
527         let lapic = self.get_lapic()?;
528         let xsave = self.get_xsave()?;
529         let dbg = self
530             .fd
531             .get_debug_regs()
532             .map_err(|e| cpu::HypervisorCpuError::GetDebugRegs(e.into()))?;
533         Ok(CpuState {
534             msrs,
535             vcpu_events,
536             regs,
537             sregs,
538             fpu,
539             xcrs,
540             lapic,
541             dbg,
542             xsave,
543         })
544     }
545     #[cfg(target_arch = "x86_64")]
546     ///
547     /// Translate guest virtual address to guest physical address
548     ///
549     fn translate_gva(&self, gva: u64, flags: u64) -> cpu::Result<(u64, hv_translate_gva_result)> {
550         let r = self
551             .fd
552             .translate_gva(gva, flags)
553             .map_err(|e| cpu::HypervisorCpuError::TranslateVirtualAddress(e.into()))?;
554 
555         Ok(r)
556     }
557     #[cfg(target_arch = "x86_64")]
558     ///
559     /// X86 specific call that returns the vcpu's current "suspend registers".
560     ///
561     fn get_suspend_regs(&self) -> cpu::Result<SuspendRegisters> {
562         self.fd
563             .get_suspend_regs()
564             .map_err(|e| cpu::HypervisorCpuError::GetSuspendRegs(e.into()))
565     }
566 }
567 
568 /// Device struct for MSHV
569 pub struct MshvDevice {
570     fd: DeviceFd,
571 }
572 
573 impl device::Device for MshvDevice {
574     ///
575     /// Set device attribute
576     ///
577     fn set_device_attr(&self, attr: &DeviceAttr) -> device::Result<()> {
578         self.fd
579             .set_device_attr(attr)
580             .map_err(|e| device::HypervisorDeviceError::SetDeviceAttribute(e.into()))
581     }
582     ///
583     /// Get device attribute
584     ///
585     fn get_device_attr(&self, attr: &mut DeviceAttr) -> device::Result<()> {
586         self.fd
587             .get_device_attr(attr)
588             .map_err(|e| device::HypervisorDeviceError::GetDeviceAttribute(e.into()))
589     }
590 }
591 
592 impl AsRawFd for MshvDevice {
593     fn as_raw_fd(&self) -> RawFd {
594         self.fd.as_raw_fd()
595     }
596 }
597 
598 struct MshvEmulatorContext<'a> {
599     vcpu: &'a MshvVcpu,
600     map: (u64, u64), // Initial GVA to GPA mapping provided by the hypervisor
601 }
602 
603 impl<'a> MshvEmulatorContext<'a> {
604     // Do the actual gva -> gpa translation
605     #[allow(non_upper_case_globals)]
606     fn translate(&self, gva: u64) -> Result<u64, PlatformError> {
607         if self.map.0 == gva {
608             return Ok(self.map.1);
609         }
610 
611         // TODO: More fine-grained control for the flags
612         let flags = HV_TRANSLATE_GVA_VALIDATE_READ | HV_TRANSLATE_GVA_VALIDATE_WRITE;
613 
614         let r = self
615             .vcpu
616             .translate_gva(gva, flags.into())
617             .map_err(|e| PlatformError::TranslateVirtualAddress(anyhow!(e)))?;
618 
619         let result_code = unsafe { r.1.__bindgen_anon_1.result_code };
620         match result_code {
621             hv_translate_gva_result_code_HV_TRANSLATE_GVA_SUCCESS => Ok(r.0),
622             _ => Err(PlatformError::TranslateVirtualAddress(anyhow!(result_code))),
623         }
624     }
625 }
626 
627 /// Platform emulation for Hyper-V
628 impl<'a> PlatformEmulator for MshvEmulatorContext<'a> {
629     type CpuState = EmulatorCpuState;
630 
631     fn read_memory(&self, gva: u64, data: &mut [u8]) -> Result<(), PlatformError> {
632         let gpa = self.translate(gva)?;
633         debug!(
634             "mshv emulator: memory read {} bytes from [{:#x} -> {:#x}]",
635             data.len(),
636             gva,
637             gpa
638         );
639 
640         if let Some(vmmops) = &self.vcpu.vmmops {
641             if vmmops.guest_mem_read(gpa, data).is_err() {
642                 vmmops
643                     .mmio_read(gpa, data)
644                     .map_err(|e| PlatformError::MemoryReadFailure(e.into()))?;
645             }
646         }
647 
648         Ok(())
649     }
650 
651     fn write_memory(&mut self, gva: u64, data: &[u8]) -> Result<(), PlatformError> {
652         let gpa = self.translate(gva)?;
653         debug!(
654             "mshv emulator: memory write {} bytes at [{:#x} -> {:#x}]",
655             data.len(),
656             gva,
657             gpa
658         );
659 
660         if let Some(vmmops) = &self.vcpu.vmmops {
661             if vmmops.guest_mem_write(gpa, data).is_err() {
662                 vmmops
663                     .mmio_write(gpa, data)
664                     .map_err(|e| PlatformError::MemoryWriteFailure(e.into()))?;
665             }
666         }
667 
668         Ok(())
669     }
670 
671     fn cpu_state(&self, cpu_id: usize) -> Result<Self::CpuState, PlatformError> {
672         if cpu_id != self.vcpu.vp_index as usize {
673             return Err(PlatformError::GetCpuStateFailure(anyhow!(
674                 "CPU id mismatch {:?} {:?}",
675                 cpu_id,
676                 self.vcpu.vp_index
677             )));
678         }
679 
680         let regs = self
681             .vcpu
682             .get_regs()
683             .map_err(|e| PlatformError::GetCpuStateFailure(e.into()))?;
684         let sregs = self
685             .vcpu
686             .get_sregs()
687             .map_err(|e| PlatformError::GetCpuStateFailure(e.into()))?;
688 
689         debug!("mshv emulator: Getting new CPU state");
690         debug!("mshv emulator: {:#x?}", regs);
691 
692         Ok(EmulatorCpuState { regs, sregs })
693     }
694 
695     fn set_cpu_state(&self, cpu_id: usize, state: Self::CpuState) -> Result<(), PlatformError> {
696         if cpu_id != self.vcpu.vp_index as usize {
697             return Err(PlatformError::SetCpuStateFailure(anyhow!(
698                 "CPU id mismatch {:?} {:?}",
699                 cpu_id,
700                 self.vcpu.vp_index
701             )));
702         }
703 
704         debug!("mshv emulator: Setting new CPU state");
705         debug!("mshv emulator: {:#x?}", state.regs);
706 
707         self.vcpu
708             .set_regs(&state.regs)
709             .map_err(|e| PlatformError::SetCpuStateFailure(e.into()))?;
710         self.vcpu
711             .set_sregs(&state.sregs)
712             .map_err(|e| PlatformError::SetCpuStateFailure(e.into()))
713     }
714 
715     fn gva_to_gpa(&self, gva: u64) -> Result<u64, PlatformError> {
716         self.translate(gva)
717     }
718 
719     fn fetch(&self, _ip: u64, _instruction_bytes: &mut [u8]) -> Result<(), PlatformError> {
720         Err(PlatformError::MemoryReadFailure(anyhow!("unimplemented")))
721     }
722 }
723 
724 #[allow(dead_code)]
725 /// Wrapper over Mshv VM ioctls.
726 pub struct MshvVm {
727     fd: Arc<VmFd>,
728     msrs: MsrEntries,
729     // Hypervisor State
730     hv_state: Arc<RwLock<HvState>>,
731     vmmops: Option<Arc<dyn vm::VmmOps>>,
732     dirty_log_slots: Arc<RwLock<HashMap<u64, MshvDirtyLogSlot>>>,
733 }
734 
735 fn hv_state_init() -> Arc<RwLock<HvState>> {
736     Arc::new(RwLock::new(HvState { hypercall_page: 0 }))
737 }
738 
739 ///
740 /// Implementation of Vm trait for Mshv
741 /// Example:
742 /// #[cfg(feature = "mshv")]
743 /// # extern crate hypervisor;
744 /// # use hypervisor::MshvHypervisor;
745 /// let mshv = MshvHypervisor::new().unwrap();
746 /// let hypervisor: Arc<dyn hypervisor::Hypervisor> = Arc::new(mshv);
747 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed");
748 /// vm.set/get().unwrap()
749 ///
750 impl vm::Vm for MshvVm {
751     #[cfg(target_arch = "x86_64")]
752     ///
753     /// Sets the address of the three-page region in the VM's address space.
754     ///
755     fn set_tss_address(&self, _offset: usize) -> vm::Result<()> {
756         Ok(())
757     }
758     ///
759     /// Creates an in-kernel interrupt controller.
760     ///
761     fn create_irq_chip(&self) -> vm::Result<()> {
762         Ok(())
763     }
764     ///
765     /// Registers an event that will, when signaled, trigger the `gsi` IRQ.
766     ///
767     fn register_irqfd(&self, fd: &EventFd, gsi: u32) -> vm::Result<()> {
768         debug!("register_irqfd fd {} gsi {}", fd.as_raw_fd(), gsi);
769 
770         self.fd
771             .register_irqfd(fd, gsi)
772             .map_err(|e| vm::HypervisorVmError::RegisterIrqFd(e.into()))?;
773 
774         Ok(())
775     }
776     ///
777     /// Unregisters an event that will, when signaled, trigger the `gsi` IRQ.
778     ///
779     fn unregister_irqfd(&self, fd: &EventFd, gsi: u32) -> vm::Result<()> {
780         debug!("unregister_irqfd fd {} gsi {}", fd.as_raw_fd(), gsi);
781 
782         self.fd
783             .unregister_irqfd(fd, gsi)
784             .map_err(|e| vm::HypervisorVmError::UnregisterIrqFd(e.into()))?;
785 
786         Ok(())
787     }
788     ///
789     /// Creates a VcpuFd object from a vcpu RawFd.
790     ///
791     fn create_vcpu(
792         &self,
793         id: u8,
794         vmmops: Option<Arc<dyn VmmOps>>,
795     ) -> vm::Result<Arc<dyn cpu::Vcpu>> {
796         let vcpu_fd = self
797             .fd
798             .create_vcpu(id)
799             .map_err(|e| vm::HypervisorVmError::CreateVcpu(e.into()))?;
800         let vcpu = MshvVcpu {
801             fd: vcpu_fd,
802             vp_index: id,
803             cpuid: CpuId::new(1).unwrap(),
804             msrs: self.msrs.clone(),
805             hv_state: self.hv_state.clone(),
806             vmmops,
807         };
808         Ok(Arc::new(vcpu))
809     }
810     #[cfg(target_arch = "x86_64")]
811     fn enable_split_irq(&self) -> vm::Result<()> {
812         Ok(())
813     }
814     #[cfg(target_arch = "x86_64")]
815     fn enable_sgx_attribute(&self, _file: File) -> vm::Result<()> {
816         Ok(())
817     }
818     fn register_ioevent(
819         &self,
820         fd: &EventFd,
821         addr: &IoEventAddress,
822         datamatch: Option<DataMatch>,
823     ) -> vm::Result<()> {
824         debug!(
825             "register_ioevent fd {} addr {:x?} datamatch {:?}",
826             fd.as_raw_fd(),
827             addr,
828             datamatch
829         );
830         if let Some(dm) = datamatch {
831             match dm {
832                 vm::DataMatch::DataMatch32(mshv_dm32) => self
833                     .fd
834                     .register_ioevent(fd, addr, mshv_dm32)
835                     .map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into())),
836                 vm::DataMatch::DataMatch64(mshv_dm64) => self
837                     .fd
838                     .register_ioevent(fd, addr, mshv_dm64)
839                     .map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into())),
840             }
841         } else {
842             self.fd
843                 .register_ioevent(fd, addr, NoDatamatch)
844                 .map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into()))
845         }
846     }
847     /// Unregister an event from a certain address it has been previously registered to.
848     fn unregister_ioevent(&self, fd: &EventFd, addr: &IoEventAddress) -> vm::Result<()> {
849         debug!("unregister_ioevent fd {} addr {:x?}", fd.as_raw_fd(), addr);
850 
851         self.fd
852             .unregister_ioevent(fd, addr, NoDatamatch)
853             .map_err(|e| vm::HypervisorVmError::UnregisterIoEvent(e.into()))
854     }
855 
856     /// Creates a guest physical memory region.
857     fn create_user_memory_region(&self, user_memory_region: MemoryRegion) -> vm::Result<()> {
858         // No matter read only or not we keep track the slots.
859         // For readonly hypervisor can enable the dirty bits,
860         // but a VM exit happens before setting the dirty bits
861         self.dirty_log_slots.write().unwrap().insert(
862             user_memory_region.guest_pfn,
863             MshvDirtyLogSlot {
864                 guest_pfn: user_memory_region.guest_pfn,
865                 memory_size: user_memory_region.size,
866             },
867         );
868 
869         self.fd
870             .map_user_memory(user_memory_region)
871             .map_err(|e| vm::HypervisorVmError::CreateUserMemory(e.into()))?;
872         Ok(())
873     }
874 
875     /// Removes a guest physical memory region.
876     fn remove_user_memory_region(&self, user_memory_region: MemoryRegion) -> vm::Result<()> {
877         // Remove the corresponding entry from "self.dirty_log_slots" if needed
878         self.dirty_log_slots
879             .write()
880             .unwrap()
881             .remove(&user_memory_region.guest_pfn);
882 
883         self.fd
884             .unmap_user_memory(user_memory_region)
885             .map_err(|e| vm::HypervisorVmError::RemoveUserMemory(e.into()))?;
886         Ok(())
887     }
888 
889     fn make_user_memory_region(
890         &self,
891         _slot: u32,
892         guest_phys_addr: u64,
893         memory_size: u64,
894         userspace_addr: u64,
895         readonly: bool,
896         _log_dirty_pages: bool,
897     ) -> MemoryRegion {
898         let mut flags = HV_MAP_GPA_READABLE | HV_MAP_GPA_EXECUTABLE;
899         if !readonly {
900             flags |= HV_MAP_GPA_WRITABLE;
901         }
902 
903         mshv_user_mem_region {
904             flags,
905             guest_pfn: guest_phys_addr >> PAGE_SHIFT,
906             size: memory_size,
907             userspace_addr: userspace_addr as u64,
908         }
909     }
910 
911     ///
912     /// Creates an in-kernel device.
913     ///
914     /// See the documentation for `MSHV_CREATE_DEVICE`.
915     fn create_device(&self, device: &mut CreateDevice) -> vm::Result<Arc<dyn device::Device>> {
916         let fd = self
917             .fd
918             .create_device(device)
919             .map_err(|e| vm::HypervisorVmError::CreateDevice(e.into()))?;
920         let device = MshvDevice { fd };
921         Ok(Arc::new(device))
922     }
923 
924     fn create_passthrough_device(&self) -> vm::Result<Arc<dyn device::Device>> {
925         let mut vfio_dev = mshv_create_device {
926             type_: mshv_device_type_MSHV_DEV_TYPE_VFIO,
927             fd: 0,
928             flags: 0,
929         };
930 
931         self.create_device(&mut vfio_dev)
932             .map_err(|e| vm::HypervisorVmError::CreatePassthroughDevice(e.into()))
933     }
934 
935     fn set_gsi_routing(&self, entries: &[IrqRoutingEntry]) -> vm::Result<()> {
936         let mut msi_routing =
937             vec_with_array_field::<mshv_msi_routing, mshv_msi_routing_entry>(entries.len());
938         msi_routing[0].nr = entries.len() as u32;
939 
940         unsafe {
941             let entries_slice: &mut [mshv_msi_routing_entry] =
942                 msi_routing[0].entries.as_mut_slice(entries.len());
943             entries_slice.copy_from_slice(entries);
944         }
945 
946         self.fd
947             .set_msi_routing(&msi_routing[0])
948             .map_err(|e| vm::HypervisorVmError::SetGsiRouting(e.into()))
949     }
950     ///
951     /// Get the Vm state. Return VM specific data
952     ///
953     fn state(&self) -> vm::Result<VmState> {
954         Ok(*self.hv_state.read().unwrap())
955     }
956     ///
957     /// Set the VM state
958     ///
959     fn set_state(&self, state: VmState) -> vm::Result<()> {
960         self.hv_state.write().unwrap().hypercall_page = state.hypercall_page;
961         Ok(())
962     }
963     ///
964     /// Start logging dirty pages
965     ///
966     fn start_dirty_log(&self) -> vm::Result<()> {
967         self.fd
968             .enable_dirty_page_tracking()
969             .map_err(|e| vm::HypervisorVmError::StartDirtyLog(e.into()))
970     }
971     ///
972     /// Stop logging dirty pages
973     ///
974     fn stop_dirty_log(&self) -> vm::Result<()> {
975         let dirty_log_slots = self.dirty_log_slots.read().unwrap();
976         // Before disabling the dirty page tracking we need
977         // to set the dirty bits in the Hypervisor
978         // This is a requirement from Microsoft Hypervisor
979         for (_, s) in dirty_log_slots.iter() {
980             self.fd
981                 .get_dirty_log(s.guest_pfn, s.memory_size as usize, DIRTY_BITMAP_SET_DIRTY)
982                 .map_err(|e| vm::HypervisorVmError::StartDirtyLog(e.into()))?;
983         }
984         self.fd
985             .disable_dirty_page_tracking()
986             .map_err(|e| vm::HypervisorVmError::StartDirtyLog(e.into()))?;
987         Ok(())
988     }
989     ///
990     /// Get dirty pages bitmap (one bit per page)
991     ///
992     fn get_dirty_log(&self, _slot: u32, base_gpa: u64, memory_size: u64) -> vm::Result<Vec<u64>> {
993         self.fd
994             .get_dirty_log(
995                 base_gpa >> PAGE_SHIFT,
996                 memory_size as usize,
997                 DIRTY_BITMAP_CLEAR_DIRTY,
998             )
999             .map_err(|e| vm::HypervisorVmError::GetDirtyLog(e.into()))
1000     }
1001 }
1002 pub use hv_cpuid_entry as CpuIdEntry;
1003 
1004 pub const CPUID_FLAG_VALID_INDEX: u32 = 0;
1005