xref: /cloud-hypervisor/hypervisor/src/mshv/mod.rs (revision 7d7bfb2034001d4cb15df2ddc56d2d350c8da30f)
1 // SPDX-License-Identifier: Apache-2.0 OR BSD-3-Clause
2 //
3 // Copyright © 2020, Microsoft Corporation
4 //
5 
6 use crate::arch::emulator::{PlatformEmulator, PlatformError};
7 
8 #[cfg(target_arch = "x86_64")]
9 use crate::arch::x86::emulator::{Emulator, EmulatorCpuState};
10 use crate::cpu;
11 use crate::cpu::Vcpu;
12 use crate::hypervisor;
13 use crate::vec_with_array_field;
14 use crate::vm::{self, VmmOps};
15 pub use mshv_bindings::*;
16 pub use mshv_ioctls::IoEventAddress;
17 use mshv_ioctls::{set_registers_64, Mshv, NoDatamatch, VcpuFd, VmFd};
18 use serde_derive::{Deserialize, Serialize};
19 use std::collections::HashMap;
20 use std::sync::{Arc, RwLock};
21 use vm::DataMatch;
22 // x86_64 dependencies
23 #[cfg(target_arch = "x86_64")]
24 pub mod x86_64;
25 use crate::device;
26 use vmm_sys_util::eventfd::EventFd;
27 #[cfg(target_arch = "x86_64")]
28 pub use x86_64::VcpuMshvState as CpuState;
29 #[cfg(target_arch = "x86_64")]
30 pub use x86_64::*;
31 
32 #[cfg(target_arch = "x86_64")]
33 use std::fs::File;
34 use std::os::unix::io::{AsRawFd, RawFd};
35 
36 const DIRTY_BITMAP_CLEAR_DIRTY: u64 = 0x4;
37 const DIRTY_BITMAP_SET_DIRTY: u64 = 0x8;
38 
39 ///
40 /// Export generically-named wrappers of mshv-bindings for Unix-based platforms
41 ///
42 pub use {
43     mshv_bindings::mshv_create_device as CreateDevice,
44     mshv_bindings::mshv_device_attr as DeviceAttr,
45     mshv_bindings::mshv_msi_routing_entry as IrqRoutingEntry, mshv_ioctls::DeviceFd,
46 };
47 
48 pub const PAGE_SHIFT: usize = 12;
49 
50 #[derive(Debug, Default, Copy, Clone, Serialize, Deserialize)]
51 pub struct HvState {
52     hypercall_page: u64,
53 }
54 
55 pub use HvState as VmState;
56 
57 struct MshvDirtyLogSlot {
58     guest_pfn: u64,
59     memory_size: u64,
60 }
61 
62 /// Wrapper over mshv system ioctls.
63 pub struct MshvHypervisor {
64     mshv: Mshv,
65 }
66 
67 impl MshvHypervisor {
68     /// Create a hypervisor based on Mshv
69     pub fn new() -> hypervisor::Result<MshvHypervisor> {
70         let mshv_obj =
71             Mshv::new().map_err(|e| hypervisor::HypervisorError::HypervisorCreate(e.into()))?;
72         Ok(MshvHypervisor { mshv: mshv_obj })
73     }
74 }
75 /// Implementation of Hypervisor trait for Mshv
76 /// Example:
77 /// #[cfg(feature = "mshv")]
78 /// extern crate hypervisor
79 /// let mshv = hypervisor::mshv::MshvHypervisor::new().unwrap();
80 /// let hypervisor: Arc<dyn hypervisor::Hypervisor> = Arc::new(mshv);
81 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed");
82 ///
83 impl hypervisor::Hypervisor for MshvHypervisor {
84     /// Create a mshv vm object and return the object as Vm trait object
85     /// Example
86     /// # extern crate hypervisor;
87     /// # use hypervisor::MshvHypervisor;
88     /// use hypervisor::MshvVm;
89     /// let hypervisor = MshvHypervisor::new().unwrap();
90     /// let vm = hypervisor.create_vm().unwrap()
91     ///
92     fn create_vm(&self) -> hypervisor::Result<Arc<dyn vm::Vm>> {
93         let fd: VmFd;
94         loop {
95             match self.mshv.create_vm() {
96                 Ok(res) => fd = res,
97                 Err(e) => {
98                     if e.errno() == libc::EINTR {
99                         // If the error returned is EINTR, which means the
100                         // ioctl has been interrupted, we have to retry as
101                         // this can't be considered as a regular error.
102                         continue;
103                     } else {
104                         return Err(hypervisor::HypervisorError::VmCreate(e.into()));
105                     }
106                 }
107             }
108             break;
109         }
110 
111         let msr_list = self.get_msr_list()?;
112         let num_msrs = msr_list.as_fam_struct_ref().nmsrs as usize;
113         let mut msrs = MsrEntries::new(num_msrs).unwrap();
114         let indices = msr_list.as_slice();
115         let msr_entries = msrs.as_mut_slice();
116         for (pos, index) in indices.iter().enumerate() {
117             msr_entries[pos].index = *index;
118         }
119         let vm_fd = Arc::new(fd);
120 
121         Ok(Arc::new(MshvVm {
122             fd: vm_fd,
123             msrs,
124             hv_state: hv_state_init(),
125             vmmops: None,
126             dirty_log_slots: Arc::new(RwLock::new(HashMap::new())),
127         }))
128     }
129     ///
130     /// Get the supported CpuID
131     ///
132     fn get_cpuid(&self) -> hypervisor::Result<CpuId> {
133         Ok(CpuId::new(1).unwrap())
134     }
135     #[cfg(target_arch = "x86_64")]
136     ///
137     /// Retrieve the list of MSRs supported by MSHV.
138     ///
139     fn get_msr_list(&self) -> hypervisor::Result<MsrList> {
140         self.mshv
141             .get_msr_index_list()
142             .map_err(|e| hypervisor::HypervisorError::GetMsrList(e.into()))
143     }
144 }
145 
146 #[allow(dead_code)]
147 /// Vcpu struct for Microsoft Hypervisor
148 pub struct MshvVcpu {
149     fd: VcpuFd,
150     vp_index: u8,
151     cpuid: CpuId,
152     msrs: MsrEntries,
153     hv_state: Arc<RwLock<HvState>>, // Mshv State
154     vmmops: Option<Arc<dyn vm::VmmOps>>,
155 }
156 
157 /// Implementation of Vcpu trait for Microsoft Hypervisor
158 /// Example:
159 /// #[cfg(feature = "mshv")]
160 /// extern crate hypervisor
161 /// let mshv = hypervisor::mshv::MshvHypervisor::new().unwrap();
162 /// let hypervisor: Arc<dyn hypervisor::Hypervisor> = Arc::new(mshv);
163 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed");
164 /// let vcpu = vm.create_vcpu(0).unwrap();
165 /// vcpu.get/set().unwrap()
166 ///
167 impl cpu::Vcpu for MshvVcpu {
168     #[cfg(target_arch = "x86_64")]
169     ///
170     /// Returns the vCPU general purpose registers.
171     ///
172     fn get_regs(&self) -> cpu::Result<StandardRegisters> {
173         self.fd
174             .get_regs()
175             .map_err(|e| cpu::HypervisorCpuError::GetStandardRegs(e.into()))
176     }
177     #[cfg(target_arch = "x86_64")]
178     ///
179     /// Sets the vCPU general purpose registers.
180     ///
181     fn set_regs(&self, regs: &StandardRegisters) -> cpu::Result<()> {
182         self.fd
183             .set_regs(regs)
184             .map_err(|e| cpu::HypervisorCpuError::SetStandardRegs(e.into()))
185     }
186     #[cfg(target_arch = "x86_64")]
187     ///
188     /// Returns the vCPU special registers.
189     ///
190     fn get_sregs(&self) -> cpu::Result<SpecialRegisters> {
191         self.fd
192             .get_sregs()
193             .map_err(|e| cpu::HypervisorCpuError::GetSpecialRegs(e.into()))
194     }
195     #[cfg(target_arch = "x86_64")]
196     ///
197     /// Sets the vCPU special registers.
198     ///
199     fn set_sregs(&self, sregs: &SpecialRegisters) -> cpu::Result<()> {
200         self.fd
201             .set_sregs(sregs)
202             .map_err(|e| cpu::HypervisorCpuError::SetSpecialRegs(e.into()))
203     }
204     #[cfg(target_arch = "x86_64")]
205     ///
206     /// Returns the floating point state (FPU) from the vCPU.
207     ///
208     fn get_fpu(&self) -> cpu::Result<FpuState> {
209         self.fd
210             .get_fpu()
211             .map_err(|e| cpu::HypervisorCpuError::GetFloatingPointRegs(e.into()))
212     }
213     #[cfg(target_arch = "x86_64")]
214     ///
215     /// Set the floating point state (FPU) of a vCPU.
216     ///
217     fn set_fpu(&self, fpu: &FpuState) -> cpu::Result<()> {
218         self.fd
219             .set_fpu(fpu)
220             .map_err(|e| cpu::HypervisorCpuError::SetFloatingPointRegs(e.into()))
221     }
222 
223     #[cfg(target_arch = "x86_64")]
224     ///
225     /// Returns the model-specific registers (MSR) for this vCPU.
226     ///
227     fn get_msrs(&self, msrs: &mut MsrEntries) -> cpu::Result<usize> {
228         self.fd
229             .get_msrs(msrs)
230             .map_err(|e| cpu::HypervisorCpuError::GetMsrEntries(e.into()))
231     }
232     #[cfg(target_arch = "x86_64")]
233     ///
234     /// Setup the model-specific registers (MSR) for this vCPU.
235     /// Returns the number of MSR entries actually written.
236     ///
237     fn set_msrs(&self, msrs: &MsrEntries) -> cpu::Result<usize> {
238         self.fd
239             .set_msrs(msrs)
240             .map_err(|e| cpu::HypervisorCpuError::SetMsrEntries(e.into()))
241     }
242 
243     #[cfg(target_arch = "x86_64")]
244     ///
245     /// X86 specific call that returns the vcpu's current "xcrs".
246     ///
247     fn get_xcrs(&self) -> cpu::Result<ExtendedControlRegisters> {
248         self.fd
249             .get_xcrs()
250             .map_err(|e| cpu::HypervisorCpuError::GetXcsr(e.into()))
251     }
252     #[cfg(target_arch = "x86_64")]
253     ///
254     /// X86 specific call that sets the vcpu's current "xcrs".
255     ///
256     fn set_xcrs(&self, xcrs: &ExtendedControlRegisters) -> cpu::Result<()> {
257         self.fd
258             .set_xcrs(xcrs)
259             .map_err(|e| cpu::HypervisorCpuError::SetXcsr(e.into()))
260     }
261     #[cfg(target_arch = "x86_64")]
262     ///
263     /// Returns currently pending exceptions, interrupts, and NMIs as well as related
264     /// states of the vcpu.
265     ///
266     fn get_vcpu_events(&self) -> cpu::Result<VcpuEvents> {
267         self.fd
268             .get_vcpu_events()
269             .map_err(|e| cpu::HypervisorCpuError::GetVcpuEvents(e.into()))
270     }
271     #[cfg(target_arch = "x86_64")]
272     ///
273     /// Sets pending exceptions, interrupts, and NMIs as well as related states
274     /// of the vcpu.
275     ///
276     fn set_vcpu_events(&self, events: &VcpuEvents) -> cpu::Result<()> {
277         self.fd
278             .set_vcpu_events(events)
279             .map_err(|e| cpu::HypervisorCpuError::SetVcpuEvents(e.into()))
280     }
281     #[cfg(target_arch = "x86_64")]
282     ///
283     /// X86 specific call to enable HyperV SynIC
284     ///
285     fn enable_hyperv_synic(&self) -> cpu::Result<()> {
286         /* We always have SynIC enabled on MSHV */
287         Ok(())
288     }
289     #[allow(non_upper_case_globals)]
290     fn run(&self) -> std::result::Result<cpu::VmExit, cpu::HypervisorCpuError> {
291         let hv_message: hv_message = hv_message::default();
292         match self.fd.run(hv_message) {
293             Ok(x) => match x.header.message_type {
294                 hv_message_type_HVMSG_X64_HALT => {
295                     debug!("HALT");
296                     Ok(cpu::VmExit::Reset)
297                 }
298                 hv_message_type_HVMSG_UNRECOVERABLE_EXCEPTION => {
299                     warn!("TRIPLE FAULT");
300                     Ok(cpu::VmExit::Shutdown)
301                 }
302                 hv_message_type_HVMSG_X64_IO_PORT_INTERCEPT => {
303                     let info = x.to_ioport_info().unwrap();
304                     let access_info = info.access_info;
305                     // SAFETY: access_info is valid, otherwise we won't be here
306                     let len = unsafe { access_info.__bindgen_anon_1.access_size() } as usize;
307                     let is_write = info.header.intercept_access_type == 1;
308                     let port = info.port_number;
309                     let mut data: [u8; 4] = [0; 4];
310                     let mut ret_rax = info.rax;
311 
312                     /*
313                      * XXX: Ignore QEMU fw_cfg (0x5xx) and debug console (0x402) ports.
314                      *
315                      * Cloud Hypervisor doesn't support fw_cfg at the moment. It does support 0x402
316                      * under the "fwdebug" feature flag. But that feature is not enabled by default
317                      * and is considered legacy.
318                      *
319                      * OVMF unconditionally pokes these IO ports with string IO.
320                      *
321                      * Instead of trying to implement string IO support now which does not do much
322                      * now, skip those ports explicitly to avoid panicking.
323                      *
324                      * Proper string IO support can be added once we gain the ability to translate
325                      * guest virtual addresses to guest physical addresses on MSHV.
326                      */
327                     match port {
328                         0x402 | 0x510 | 0x511 | 0x514 => {
329                             let insn_len = info.header.instruction_length() as u64;
330 
331                             /* Advance RIP and update RAX */
332                             let arr_reg_name_value = [
333                                 (
334                                     hv_register_name::HV_X64_REGISTER_RIP,
335                                     info.header.rip + insn_len,
336                                 ),
337                                 (hv_register_name::HV_X64_REGISTER_RAX, ret_rax),
338                             ];
339                             set_registers_64!(self.fd, arr_reg_name_value)
340                                 .map_err(|e| cpu::HypervisorCpuError::SetRegister(e.into()))?;
341                             return Ok(cpu::VmExit::Ignore);
342                         }
343                         _ => {}
344                     }
345 
346                     // SAFETY: access_info is valid, otherwise we won't be here
347                     assert!(
348                         (unsafe { access_info.__bindgen_anon_1.string_op() } != 1),
349                         "String IN/OUT not supported"
350                     );
351                     assert!(
352                         (unsafe { access_info.__bindgen_anon_1.rep_prefix() } != 1),
353                         "Rep IN/OUT not supported"
354                     );
355 
356                     if is_write {
357                         let data = (info.rax as u32).to_le_bytes();
358                         if let Some(vmmops) = &self.vmmops {
359                             vmmops
360                                 .pio_write(port.into(), &data[0..len])
361                                 .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()))?;
362                         }
363                     } else {
364                         if let Some(vmmops) = &self.vmmops {
365                             vmmops
366                                 .pio_read(port.into(), &mut data[0..len])
367                                 .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()))?;
368                         }
369 
370                         let v = u32::from_le_bytes(data);
371                         /* Preserve high bits in EAX but clear out high bits in RAX */
372                         let mask = 0xffffffff >> (32 - len * 8);
373                         let eax = (info.rax as u32 & !mask) | (v & mask);
374                         ret_rax = eax as u64;
375                     }
376 
377                     let insn_len = info.header.instruction_length() as u64;
378 
379                     /* Advance RIP and update RAX */
380                     let arr_reg_name_value = [
381                         (
382                             hv_register_name::HV_X64_REGISTER_RIP,
383                             info.header.rip + insn_len,
384                         ),
385                         (hv_register_name::HV_X64_REGISTER_RAX, ret_rax),
386                     ];
387                     set_registers_64!(self.fd, arr_reg_name_value)
388                         .map_err(|e| cpu::HypervisorCpuError::SetRegister(e.into()))?;
389                     Ok(cpu::VmExit::Ignore)
390                 }
391                 hv_message_type_HVMSG_UNMAPPED_GPA => {
392                     let info = x.to_memory_info().unwrap();
393                     let insn_len = info.instruction_byte_count as usize;
394                     assert!(insn_len > 0 && insn_len <= 16);
395 
396                     let mut context = MshvEmulatorContext {
397                         vcpu: self,
398                         map: (info.guest_virtual_address, info.guest_physical_address),
399                     };
400 
401                     // Create a new emulator.
402                     let mut emul = Emulator::new(&mut context);
403 
404                     // Emulate the trapped instruction, and only the first one.
405                     let new_state = emul
406                         .emulate_first_insn(self.vp_index as usize, &info.instruction_bytes)
407                         .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()))?;
408 
409                     // Set CPU state back.
410                     context
411                         .set_cpu_state(self.vp_index as usize, new_state)
412                         .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()))?;
413 
414                     Ok(cpu::VmExit::Ignore)
415                 }
416                 hv_message_type_HVMSG_X64_CPUID_INTERCEPT => {
417                     let info = x.to_cpuid_info().unwrap();
418                     debug!("cpuid eax: {:x}", { info.rax });
419                     Ok(cpu::VmExit::Ignore)
420                 }
421                 hv_message_type_HVMSG_X64_MSR_INTERCEPT => {
422                     let info = x.to_msr_info().unwrap();
423                     if info.header.intercept_access_type == 0 {
424                         debug!("msr read: {:x}", { info.msr_number });
425                     } else {
426                         debug!("msr write: {:x}", { info.msr_number });
427                     }
428                     Ok(cpu::VmExit::Ignore)
429                 }
430                 hv_message_type_HVMSG_X64_EXCEPTION_INTERCEPT => {
431                     //TODO: Handler for VMCALL here.
432                     let info = x.to_exception_info().unwrap();
433                     debug!("Exception Info {:?}", { info.exception_vector });
434                     Ok(cpu::VmExit::Ignore)
435                 }
436                 exit => Err(cpu::HypervisorCpuError::RunVcpu(anyhow!(
437                     "Unhandled VCPU exit {:?}",
438                     exit
439                 ))),
440             },
441 
442             Err(e) => match e.errno() {
443                 libc::EAGAIN | libc::EINTR => Ok(cpu::VmExit::Ignore),
444                 _ => Err(cpu::HypervisorCpuError::RunVcpu(anyhow!(
445                     "VCPU error {:?}",
446                     e
447                 ))),
448             },
449         }
450     }
451     #[cfg(target_arch = "x86_64")]
452     ///
453     /// X86 specific call to setup the CPUID registers.
454     ///
455     fn set_cpuid2(&self, _cpuid: &CpuId) -> cpu::Result<()> {
456         Ok(())
457     }
458     #[cfg(target_arch = "x86_64")]
459     ///
460     /// X86 specific call to retrieve the CPUID registers.
461     ///
462     fn get_cpuid2(&self, _num_entries: usize) -> cpu::Result<CpuId> {
463         Ok(self.cpuid.clone())
464     }
465     #[cfg(target_arch = "x86_64")]
466     ///
467     /// Returns the state of the LAPIC (Local Advanced Programmable Interrupt Controller).
468     ///
469     fn get_lapic(&self) -> cpu::Result<LapicState> {
470         self.fd
471             .get_lapic()
472             .map_err(|e| cpu::HypervisorCpuError::GetlapicState(e.into()))
473     }
474     #[cfg(target_arch = "x86_64")]
475     ///
476     /// Sets the state of the LAPIC (Local Advanced Programmable Interrupt Controller).
477     ///
478     fn set_lapic(&self, lapic: &LapicState) -> cpu::Result<()> {
479         self.fd
480             .set_lapic(lapic)
481             .map_err(|e| cpu::HypervisorCpuError::SetLapicState(e.into()))
482     }
483     #[cfg(target_arch = "x86_64")]
484     ///
485     /// X86 specific call that returns the vcpu's current "xsave struct".
486     ///
487     fn get_xsave(&self) -> cpu::Result<Xsave> {
488         self.fd
489             .get_xsave()
490             .map_err(|e| cpu::HypervisorCpuError::GetXsaveState(e.into()))
491     }
492     #[cfg(target_arch = "x86_64")]
493     ///
494     /// X86 specific call that sets the vcpu's current "xsave struct".
495     ///
496     fn set_xsave(&self, xsave: &Xsave) -> cpu::Result<()> {
497         self.fd
498             .set_xsave(xsave)
499             .map_err(|e| cpu::HypervisorCpuError::SetXsaveState(e.into()))
500     }
501     ///
502     /// Set CPU state
503     ///
504     fn set_state(&self, state: &CpuState) -> cpu::Result<()> {
505         self.set_msrs(&state.msrs)?;
506         self.set_vcpu_events(&state.vcpu_events)?;
507         self.set_regs(&state.regs)?;
508         self.set_sregs(&state.sregs)?;
509         self.set_fpu(&state.fpu)?;
510         self.set_xcrs(&state.xcrs)?;
511         self.set_lapic(&state.lapic)?;
512         self.set_xsave(&state.xsave)?;
513         // These registers are global and needed to be set only for first VCPU
514         // as Microsoft Hypervisor allows setting this regsier for only one VCPU
515         if self.vp_index == 0 {
516             self.fd
517                 .set_misc_regs(&state.misc)
518                 .map_err(|e| cpu::HypervisorCpuError::SetMiscRegs(e.into()))?
519         }
520         self.fd
521             .set_debug_regs(&state.dbg)
522             .map_err(|e| cpu::HypervisorCpuError::SetDebugRegs(e.into()))?;
523         Ok(())
524     }
525     ///
526     /// Get CPU State
527     ///
528     fn state(&self) -> cpu::Result<CpuState> {
529         let regs = self.get_regs()?;
530         let sregs = self.get_sregs()?;
531         let xcrs = self.get_xcrs()?;
532         let fpu = self.get_fpu()?;
533         let vcpu_events = self.get_vcpu_events()?;
534         let mut msrs = self.msrs.clone();
535         self.get_msrs(&mut msrs)?;
536         let lapic = self.get_lapic()?;
537         let xsave = self.get_xsave()?;
538         let misc = self
539             .fd
540             .get_misc_regs()
541             .map_err(|e| cpu::HypervisorCpuError::GetMiscRegs(e.into()))?;
542         let dbg = self
543             .fd
544             .get_debug_regs()
545             .map_err(|e| cpu::HypervisorCpuError::GetDebugRegs(e.into()))?;
546 
547         Ok(CpuState {
548             msrs,
549             vcpu_events,
550             regs,
551             sregs,
552             fpu,
553             xcrs,
554             lapic,
555             dbg,
556             xsave,
557             misc,
558         })
559     }
560     #[cfg(target_arch = "x86_64")]
561     ///
562     /// Translate guest virtual address to guest physical address
563     ///
564     fn translate_gva(&self, gva: u64, flags: u64) -> cpu::Result<(u64, u32)> {
565         let r = self
566             .fd
567             .translate_gva(gva, flags)
568             .map_err(|e| cpu::HypervisorCpuError::TranslateVirtualAddress(e.into()))?;
569 
570         let gpa = r.0;
571         // SAFETY: r is valid, otherwise this function will have returned
572         let result_code = unsafe { r.1.__bindgen_anon_1.result_code };
573 
574         Ok((gpa, result_code))
575     }
576     #[cfg(target_arch = "x86_64")]
577     ///
578     /// X86 specific call that returns the vcpu's current "suspend registers".
579     ///
580     fn get_suspend_regs(&self) -> cpu::Result<SuspendRegisters> {
581         self.fd
582             .get_suspend_regs()
583             .map_err(|e| cpu::HypervisorCpuError::GetSuspendRegs(e.into()))
584     }
585 }
586 
587 /// Device struct for MSHV
588 pub struct MshvDevice {
589     fd: DeviceFd,
590 }
591 
592 impl device::Device for MshvDevice {
593     ///
594     /// Set device attribute
595     ///
596     fn set_device_attr(&self, attr: &DeviceAttr) -> device::Result<()> {
597         self.fd
598             .set_device_attr(attr)
599             .map_err(|e| device::HypervisorDeviceError::SetDeviceAttribute(e.into()))
600     }
601     ///
602     /// Get device attribute
603     ///
604     fn get_device_attr(&self, attr: &mut DeviceAttr) -> device::Result<()> {
605         self.fd
606             .get_device_attr(attr)
607             .map_err(|e| device::HypervisorDeviceError::GetDeviceAttribute(e.into()))
608     }
609 }
610 
611 impl AsRawFd for MshvDevice {
612     fn as_raw_fd(&self) -> RawFd {
613         self.fd.as_raw_fd()
614     }
615 }
616 
617 struct MshvEmulatorContext<'a> {
618     vcpu: &'a MshvVcpu,
619     map: (u64, u64), // Initial GVA to GPA mapping provided by the hypervisor
620 }
621 
622 impl<'a> MshvEmulatorContext<'a> {
623     // Do the actual gva -> gpa translation
624     #[allow(non_upper_case_globals)]
625     fn translate(&self, gva: u64) -> Result<u64, PlatformError> {
626         if self.map.0 == gva {
627             return Ok(self.map.1);
628         }
629 
630         // TODO: More fine-grained control for the flags
631         let flags = HV_TRANSLATE_GVA_VALIDATE_READ | HV_TRANSLATE_GVA_VALIDATE_WRITE;
632 
633         let (gpa, result_code) = self
634             .vcpu
635             .translate_gva(gva, flags.into())
636             .map_err(|e| PlatformError::TranslateVirtualAddress(anyhow!(e)))?;
637 
638         match result_code {
639             hv_translate_gva_result_code_HV_TRANSLATE_GVA_SUCCESS => Ok(gpa),
640             _ => Err(PlatformError::TranslateVirtualAddress(anyhow!(result_code))),
641         }
642     }
643 }
644 
645 /// Platform emulation for Hyper-V
646 impl<'a> PlatformEmulator for MshvEmulatorContext<'a> {
647     type CpuState = EmulatorCpuState;
648 
649     fn read_memory(&self, gva: u64, data: &mut [u8]) -> Result<(), PlatformError> {
650         let gpa = self.translate(gva)?;
651         debug!(
652             "mshv emulator: memory read {} bytes from [{:#x} -> {:#x}]",
653             data.len(),
654             gva,
655             gpa
656         );
657 
658         if let Some(vmmops) = &self.vcpu.vmmops {
659             if vmmops.guest_mem_read(gpa, data).is_err() {
660                 vmmops
661                     .mmio_read(gpa, data)
662                     .map_err(|e| PlatformError::MemoryReadFailure(e.into()))?;
663             }
664         }
665 
666         Ok(())
667     }
668 
669     fn write_memory(&mut self, gva: u64, data: &[u8]) -> Result<(), PlatformError> {
670         let gpa = self.translate(gva)?;
671         debug!(
672             "mshv emulator: memory write {} bytes at [{:#x} -> {:#x}]",
673             data.len(),
674             gva,
675             gpa
676         );
677 
678         if let Some(vmmops) = &self.vcpu.vmmops {
679             if vmmops.guest_mem_write(gpa, data).is_err() {
680                 vmmops
681                     .mmio_write(gpa, data)
682                     .map_err(|e| PlatformError::MemoryWriteFailure(e.into()))?;
683             }
684         }
685 
686         Ok(())
687     }
688 
689     fn cpu_state(&self, cpu_id: usize) -> Result<Self::CpuState, PlatformError> {
690         if cpu_id != self.vcpu.vp_index as usize {
691             return Err(PlatformError::GetCpuStateFailure(anyhow!(
692                 "CPU id mismatch {:?} {:?}",
693                 cpu_id,
694                 self.vcpu.vp_index
695             )));
696         }
697 
698         let regs = self
699             .vcpu
700             .get_regs()
701             .map_err(|e| PlatformError::GetCpuStateFailure(e.into()))?;
702         let sregs = self
703             .vcpu
704             .get_sregs()
705             .map_err(|e| PlatformError::GetCpuStateFailure(e.into()))?;
706 
707         debug!("mshv emulator: Getting new CPU state");
708         debug!("mshv emulator: {:#x?}", regs);
709 
710         Ok(EmulatorCpuState { regs, sregs })
711     }
712 
713     fn set_cpu_state(&self, cpu_id: usize, state: Self::CpuState) -> Result<(), PlatformError> {
714         if cpu_id != self.vcpu.vp_index as usize {
715             return Err(PlatformError::SetCpuStateFailure(anyhow!(
716                 "CPU id mismatch {:?} {:?}",
717                 cpu_id,
718                 self.vcpu.vp_index
719             )));
720         }
721 
722         debug!("mshv emulator: Setting new CPU state");
723         debug!("mshv emulator: {:#x?}", state.regs);
724 
725         self.vcpu
726             .set_regs(&state.regs)
727             .map_err(|e| PlatformError::SetCpuStateFailure(e.into()))?;
728         self.vcpu
729             .set_sregs(&state.sregs)
730             .map_err(|e| PlatformError::SetCpuStateFailure(e.into()))
731     }
732 
733     fn gva_to_gpa(&self, gva: u64) -> Result<u64, PlatformError> {
734         self.translate(gva)
735     }
736 
737     fn fetch(&self, _ip: u64, _instruction_bytes: &mut [u8]) -> Result<(), PlatformError> {
738         Err(PlatformError::MemoryReadFailure(anyhow!("unimplemented")))
739     }
740 }
741 
742 #[allow(dead_code)]
743 /// Wrapper over Mshv VM ioctls.
744 pub struct MshvVm {
745     fd: Arc<VmFd>,
746     msrs: MsrEntries,
747     // Hypervisor State
748     hv_state: Arc<RwLock<HvState>>,
749     vmmops: Option<Arc<dyn vm::VmmOps>>,
750     dirty_log_slots: Arc<RwLock<HashMap<u64, MshvDirtyLogSlot>>>,
751 }
752 
753 fn hv_state_init() -> Arc<RwLock<HvState>> {
754     Arc::new(RwLock::new(HvState { hypercall_page: 0 }))
755 }
756 
757 ///
758 /// Implementation of Vm trait for Mshv
759 /// Example:
760 /// #[cfg(feature = "mshv")]
761 /// # extern crate hypervisor;
762 /// # use hypervisor::MshvHypervisor;
763 /// let mshv = MshvHypervisor::new().unwrap();
764 /// let hypervisor: Arc<dyn hypervisor::Hypervisor> = Arc::new(mshv);
765 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed");
766 /// vm.set/get().unwrap()
767 ///
768 impl vm::Vm for MshvVm {
769     #[cfg(target_arch = "x86_64")]
770     ///
771     /// Sets the address of the one-page region in the VM's address space.
772     ///
773     fn set_identity_map_address(&self, _address: u64) -> vm::Result<()> {
774         Ok(())
775     }
776     #[cfg(target_arch = "x86_64")]
777     ///
778     /// Sets the address of the three-page region in the VM's address space.
779     ///
780     fn set_tss_address(&self, _offset: usize) -> vm::Result<()> {
781         Ok(())
782     }
783     ///
784     /// Creates an in-kernel interrupt controller.
785     ///
786     fn create_irq_chip(&self) -> vm::Result<()> {
787         Ok(())
788     }
789     ///
790     /// Registers an event that will, when signaled, trigger the `gsi` IRQ.
791     ///
792     fn register_irqfd(&self, fd: &EventFd, gsi: u32) -> vm::Result<()> {
793         debug!("register_irqfd fd {} gsi {}", fd.as_raw_fd(), gsi);
794 
795         self.fd
796             .register_irqfd(fd, gsi)
797             .map_err(|e| vm::HypervisorVmError::RegisterIrqFd(e.into()))?;
798 
799         Ok(())
800     }
801     ///
802     /// Unregisters an event that will, when signaled, trigger the `gsi` IRQ.
803     ///
804     fn unregister_irqfd(&self, fd: &EventFd, gsi: u32) -> vm::Result<()> {
805         debug!("unregister_irqfd fd {} gsi {}", fd.as_raw_fd(), gsi);
806 
807         self.fd
808             .unregister_irqfd(fd, gsi)
809             .map_err(|e| vm::HypervisorVmError::UnregisterIrqFd(e.into()))?;
810 
811         Ok(())
812     }
813     ///
814     /// Creates a VcpuFd object from a vcpu RawFd.
815     ///
816     fn create_vcpu(
817         &self,
818         id: u8,
819         vmmops: Option<Arc<dyn VmmOps>>,
820     ) -> vm::Result<Arc<dyn cpu::Vcpu>> {
821         let vcpu_fd = self
822             .fd
823             .create_vcpu(id)
824             .map_err(|e| vm::HypervisorVmError::CreateVcpu(e.into()))?;
825         let vcpu = MshvVcpu {
826             fd: vcpu_fd,
827             vp_index: id,
828             cpuid: CpuId::new(1).unwrap(),
829             msrs: self.msrs.clone(),
830             hv_state: self.hv_state.clone(),
831             vmmops,
832         };
833         Ok(Arc::new(vcpu))
834     }
835     #[cfg(target_arch = "x86_64")]
836     fn enable_split_irq(&self) -> vm::Result<()> {
837         Ok(())
838     }
839     #[cfg(target_arch = "x86_64")]
840     fn enable_sgx_attribute(&self, _file: File) -> vm::Result<()> {
841         Ok(())
842     }
843     fn register_ioevent(
844         &self,
845         fd: &EventFd,
846         addr: &IoEventAddress,
847         datamatch: Option<DataMatch>,
848     ) -> vm::Result<()> {
849         debug!(
850             "register_ioevent fd {} addr {:x?} datamatch {:?}",
851             fd.as_raw_fd(),
852             addr,
853             datamatch
854         );
855         if let Some(dm) = datamatch {
856             match dm {
857                 vm::DataMatch::DataMatch32(mshv_dm32) => self
858                     .fd
859                     .register_ioevent(fd, addr, mshv_dm32)
860                     .map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into())),
861                 vm::DataMatch::DataMatch64(mshv_dm64) => self
862                     .fd
863                     .register_ioevent(fd, addr, mshv_dm64)
864                     .map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into())),
865             }
866         } else {
867             self.fd
868                 .register_ioevent(fd, addr, NoDatamatch)
869                 .map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into()))
870         }
871     }
872     /// Unregister an event from a certain address it has been previously registered to.
873     fn unregister_ioevent(&self, fd: &EventFd, addr: &IoEventAddress) -> vm::Result<()> {
874         debug!("unregister_ioevent fd {} addr {:x?}", fd.as_raw_fd(), addr);
875 
876         self.fd
877             .unregister_ioevent(fd, addr, NoDatamatch)
878             .map_err(|e| vm::HypervisorVmError::UnregisterIoEvent(e.into()))
879     }
880 
881     /// Creates a guest physical memory region.
882     fn create_user_memory_region(&self, user_memory_region: MemoryRegion) -> vm::Result<()> {
883         // No matter read only or not we keep track the slots.
884         // For readonly hypervisor can enable the dirty bits,
885         // but a VM exit happens before setting the dirty bits
886         self.dirty_log_slots.write().unwrap().insert(
887             user_memory_region.guest_pfn,
888             MshvDirtyLogSlot {
889                 guest_pfn: user_memory_region.guest_pfn,
890                 memory_size: user_memory_region.size,
891             },
892         );
893 
894         self.fd
895             .map_user_memory(user_memory_region)
896             .map_err(|e| vm::HypervisorVmError::CreateUserMemory(e.into()))?;
897         Ok(())
898     }
899 
900     /// Removes a guest physical memory region.
901     fn remove_user_memory_region(&self, user_memory_region: MemoryRegion) -> vm::Result<()> {
902         // Remove the corresponding entry from "self.dirty_log_slots" if needed
903         self.dirty_log_slots
904             .write()
905             .unwrap()
906             .remove(&user_memory_region.guest_pfn);
907 
908         self.fd
909             .unmap_user_memory(user_memory_region)
910             .map_err(|e| vm::HypervisorVmError::RemoveUserMemory(e.into()))?;
911         Ok(())
912     }
913 
914     fn make_user_memory_region(
915         &self,
916         _slot: u32,
917         guest_phys_addr: u64,
918         memory_size: u64,
919         userspace_addr: u64,
920         readonly: bool,
921         _log_dirty_pages: bool,
922     ) -> MemoryRegion {
923         let mut flags = HV_MAP_GPA_READABLE | HV_MAP_GPA_EXECUTABLE;
924         if !readonly {
925             flags |= HV_MAP_GPA_WRITABLE;
926         }
927 
928         mshv_user_mem_region {
929             flags,
930             guest_pfn: guest_phys_addr >> PAGE_SHIFT,
931             size: memory_size,
932             userspace_addr: userspace_addr as u64,
933         }
934     }
935 
936     ///
937     /// Creates an in-kernel device.
938     ///
939     /// See the documentation for `MSHV_CREATE_DEVICE`.
940     fn create_device(&self, device: &mut CreateDevice) -> vm::Result<Arc<dyn device::Device>> {
941         let fd = self
942             .fd
943             .create_device(device)
944             .map_err(|e| vm::HypervisorVmError::CreateDevice(e.into()))?;
945         let device = MshvDevice { fd };
946         Ok(Arc::new(device))
947     }
948 
949     fn create_passthrough_device(&self) -> vm::Result<Arc<dyn device::Device>> {
950         let mut vfio_dev = mshv_create_device {
951             type_: mshv_device_type_MSHV_DEV_TYPE_VFIO,
952             fd: 0,
953             flags: 0,
954         };
955 
956         self.create_device(&mut vfio_dev)
957             .map_err(|e| vm::HypervisorVmError::CreatePassthroughDevice(e.into()))
958     }
959 
960     fn set_gsi_routing(&self, entries: &[IrqRoutingEntry]) -> vm::Result<()> {
961         let mut msi_routing =
962             vec_with_array_field::<mshv_msi_routing, mshv_msi_routing_entry>(entries.len());
963         msi_routing[0].nr = entries.len() as u32;
964 
965         // SAFETY: msi_routing initialized with entries.len() and now it is being turned into
966         // entries_slice with entries.len() again. It is guaranteed to be large enough to hold
967         // everything from entries.
968         unsafe {
969             let entries_slice: &mut [mshv_msi_routing_entry] =
970                 msi_routing[0].entries.as_mut_slice(entries.len());
971             entries_slice.copy_from_slice(entries);
972         }
973 
974         self.fd
975             .set_msi_routing(&msi_routing[0])
976             .map_err(|e| vm::HypervisorVmError::SetGsiRouting(e.into()))
977     }
978     ///
979     /// Get the Vm state. Return VM specific data
980     ///
981     fn state(&self) -> vm::Result<VmState> {
982         Ok(*self.hv_state.read().unwrap())
983     }
984     ///
985     /// Set the VM state
986     ///
987     fn set_state(&self, state: VmState) -> vm::Result<()> {
988         self.hv_state.write().unwrap().hypercall_page = state.hypercall_page;
989         Ok(())
990     }
991     ///
992     /// Start logging dirty pages
993     ///
994     fn start_dirty_log(&self) -> vm::Result<()> {
995         self.fd
996             .enable_dirty_page_tracking()
997             .map_err(|e| vm::HypervisorVmError::StartDirtyLog(e.into()))
998     }
999     ///
1000     /// Stop logging dirty pages
1001     ///
1002     fn stop_dirty_log(&self) -> vm::Result<()> {
1003         let dirty_log_slots = self.dirty_log_slots.read().unwrap();
1004         // Before disabling the dirty page tracking we need
1005         // to set the dirty bits in the Hypervisor
1006         // This is a requirement from Microsoft Hypervisor
1007         for (_, s) in dirty_log_slots.iter() {
1008             self.fd
1009                 .get_dirty_log(s.guest_pfn, s.memory_size as usize, DIRTY_BITMAP_SET_DIRTY)
1010                 .map_err(|e| vm::HypervisorVmError::StartDirtyLog(e.into()))?;
1011         }
1012         self.fd
1013             .disable_dirty_page_tracking()
1014             .map_err(|e| vm::HypervisorVmError::StartDirtyLog(e.into()))?;
1015         Ok(())
1016     }
1017     ///
1018     /// Get dirty pages bitmap (one bit per page)
1019     ///
1020     fn get_dirty_log(&self, _slot: u32, base_gpa: u64, memory_size: u64) -> vm::Result<Vec<u64>> {
1021         self.fd
1022             .get_dirty_log(
1023                 base_gpa >> PAGE_SHIFT,
1024                 memory_size as usize,
1025                 DIRTY_BITMAP_CLEAR_DIRTY,
1026             )
1027             .map_err(|e| vm::HypervisorVmError::GetDirtyLog(e.into()))
1028     }
1029 }
1030 pub use hv_cpuid_entry as CpuIdEntry;
1031 
1032 pub const CPUID_FLAG_VALID_INDEX: u32 = 0;
1033