xref: /cloud-hypervisor/hypervisor/src/mshv/mod.rs (revision b440cb7d2330770cd415b63544a371d4caa2db3a)
1 // SPDX-License-Identifier: Apache-2.0 OR BSD-3-Clause
2 //
3 // Copyright © 2020, Microsoft Corporation
4 //
5 
6 use crate::arch::emulator::{PlatformEmulator, PlatformError};
7 
8 #[cfg(target_arch = "x86_64")]
9 use crate::arch::x86::emulator::{Emulator, EmulatorCpuState};
10 use crate::cpu;
11 use crate::cpu::Vcpu;
12 use crate::hypervisor;
13 use crate::vec_with_array_field;
14 use crate::vm::{self, InterruptSourceConfig, VmOps};
15 pub use mshv_bindings::*;
16 use mshv_ioctls::{set_registers_64, Mshv, NoDatamatch, VcpuFd, VmFd};
17 use std::any::Any;
18 use std::collections::HashMap;
19 use std::sync::{Arc, RwLock};
20 use vm::DataMatch;
21 // x86_64 dependencies
22 #[cfg(target_arch = "x86_64")]
23 pub mod x86_64;
24 use crate::device;
25 use crate::{
26     ClockData, CpuState, IoEventAddress, IrqRoutingEntry, MpState, UserMemoryRegion,
27     USER_MEMORY_REGION_EXECUTE, USER_MEMORY_REGION_READ, USER_MEMORY_REGION_WRITE,
28 };
29 use vmm_sys_util::eventfd::EventFd;
30 #[cfg(target_arch = "x86_64")]
31 pub use x86_64::VcpuMshvState;
32 #[cfg(target_arch = "x86_64")]
33 pub use x86_64::*;
34 
35 #[cfg(target_arch = "x86_64")]
36 use std::fs::File;
37 use std::os::unix::io::AsRawFd;
38 
39 #[cfg(target_arch = "x86_64")]
40 use crate::arch::x86::{
41     CpuIdEntry, FpuState, LapicState, MsrEntry, SpecialRegisters, StandardRegisters,
42 };
43 
44 const DIRTY_BITMAP_CLEAR_DIRTY: u64 = 0x4;
45 const DIRTY_BITMAP_SET_DIRTY: u64 = 0x8;
46 
47 ///
48 /// Export generically-named wrappers of mshv-bindings for Unix-based platforms
49 ///
50 pub use {
51     mshv_bindings::mshv_create_device as CreateDevice,
52     mshv_bindings::mshv_device_attr as DeviceAttr, mshv_ioctls::DeviceFd,
53 };
54 
55 pub const PAGE_SHIFT: usize = 12;
56 
57 impl From<mshv_user_mem_region> for UserMemoryRegion {
58     fn from(region: mshv_user_mem_region) -> Self {
59         let mut flags: u32 = 0;
60         if region.flags & HV_MAP_GPA_READABLE != 0 {
61             flags |= USER_MEMORY_REGION_READ;
62         }
63         if region.flags & HV_MAP_GPA_WRITABLE != 0 {
64             flags |= USER_MEMORY_REGION_WRITE;
65         }
66         if region.flags & HV_MAP_GPA_EXECUTABLE != 0 {
67             flags |= USER_MEMORY_REGION_EXECUTE;
68         }
69 
70         UserMemoryRegion {
71             guest_phys_addr: (region.guest_pfn << PAGE_SHIFT as u64)
72                 + (region.userspace_addr & ((1 << PAGE_SHIFT) - 1)),
73             memory_size: region.size,
74             userspace_addr: region.userspace_addr,
75             flags,
76             ..Default::default()
77         }
78     }
79 }
80 
81 impl From<UserMemoryRegion> for mshv_user_mem_region {
82     fn from(region: UserMemoryRegion) -> Self {
83         let mut flags: u32 = 0;
84         if region.flags & USER_MEMORY_REGION_READ != 0 {
85             flags |= HV_MAP_GPA_READABLE;
86         }
87         if region.flags & USER_MEMORY_REGION_WRITE != 0 {
88             flags |= HV_MAP_GPA_WRITABLE;
89         }
90         if region.flags & USER_MEMORY_REGION_EXECUTE != 0 {
91             flags |= HV_MAP_GPA_EXECUTABLE;
92         }
93 
94         mshv_user_mem_region {
95             guest_pfn: region.guest_phys_addr >> PAGE_SHIFT,
96             size: region.memory_size,
97             userspace_addr: region.userspace_addr,
98             flags,
99         }
100     }
101 }
102 
103 impl From<mshv_ioctls::IoEventAddress> for IoEventAddress {
104     fn from(a: mshv_ioctls::IoEventAddress) -> Self {
105         match a {
106             mshv_ioctls::IoEventAddress::Pio(x) => Self::Pio(x),
107             mshv_ioctls::IoEventAddress::Mmio(x) => Self::Mmio(x),
108         }
109     }
110 }
111 
112 impl From<IoEventAddress> for mshv_ioctls::IoEventAddress {
113     fn from(a: IoEventAddress) -> Self {
114         match a {
115             IoEventAddress::Pio(x) => Self::Pio(x),
116             IoEventAddress::Mmio(x) => Self::Mmio(x),
117         }
118     }
119 }
120 
121 impl From<VcpuMshvState> for CpuState {
122     fn from(s: VcpuMshvState) -> Self {
123         CpuState::Mshv(s)
124     }
125 }
126 
127 impl From<CpuState> for VcpuMshvState {
128     fn from(s: CpuState) -> Self {
129         match s {
130             CpuState::Mshv(s) => s,
131             /* Needed in case other hypervisors are enabled */
132             #[allow(unreachable_patterns)]
133             _ => panic!("CpuState is not valid"),
134         }
135     }
136 }
137 
138 impl From<mshv_msi_routing_entry> for IrqRoutingEntry {
139     fn from(s: mshv_msi_routing_entry) -> Self {
140         IrqRoutingEntry::Mshv(s)
141     }
142 }
143 
144 impl From<IrqRoutingEntry> for mshv_msi_routing_entry {
145     fn from(e: IrqRoutingEntry) -> Self {
146         match e {
147             IrqRoutingEntry::Mshv(e) => e,
148             /* Needed in case other hypervisors are enabled */
149             #[allow(unreachable_patterns)]
150             _ => panic!("IrqRoutingEntry is not valid"),
151         }
152     }
153 }
154 
155 struct MshvDirtyLogSlot {
156     guest_pfn: u64,
157     memory_size: u64,
158 }
159 
160 /// Wrapper over mshv system ioctls.
161 pub struct MshvHypervisor {
162     mshv: Mshv,
163 }
164 
165 impl MshvHypervisor {
166     #[cfg(target_arch = "x86_64")]
167     ///
168     /// Retrieve the list of MSRs supported by MSHV.
169     ///
170     fn get_msr_list(&self) -> hypervisor::Result<MsrList> {
171         self.mshv
172             .get_msr_index_list()
173             .map_err(|e| hypervisor::HypervisorError::GetMsrList(e.into()))
174     }
175 }
176 
177 impl MshvHypervisor {
178     /// Create a hypervisor based on Mshv
179     pub fn new() -> hypervisor::Result<MshvHypervisor> {
180         let mshv_obj =
181             Mshv::new().map_err(|e| hypervisor::HypervisorError::HypervisorCreate(e.into()))?;
182         Ok(MshvHypervisor { mshv: mshv_obj })
183     }
184 }
185 /// Implementation of Hypervisor trait for Mshv
186 /// Example:
187 /// #[cfg(feature = "mshv")]
188 /// extern crate hypervisor
189 /// let mshv = hypervisor::mshv::MshvHypervisor::new().unwrap();
190 /// let hypervisor: Arc<dyn hypervisor::Hypervisor> = Arc::new(mshv);
191 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed");
192 ///
193 impl hypervisor::Hypervisor for MshvHypervisor {
194     /// Create a mshv vm object and return the object as Vm trait object
195     /// Example
196     /// # extern crate hypervisor;
197     /// # use hypervisor::MshvHypervisor;
198     /// use hypervisor::MshvVm;
199     /// let hypervisor = MshvHypervisor::new().unwrap();
200     /// let vm = hypervisor.create_vm().unwrap()
201     ///
202     fn create_vm(&self) -> hypervisor::Result<Arc<dyn vm::Vm>> {
203         let fd: VmFd;
204         loop {
205             match self.mshv.create_vm() {
206                 Ok(res) => fd = res,
207                 Err(e) => {
208                     if e.errno() == libc::EINTR {
209                         // If the error returned is EINTR, which means the
210                         // ioctl has been interrupted, we have to retry as
211                         // this can't be considered as a regular error.
212                         continue;
213                     } else {
214                         return Err(hypervisor::HypervisorError::VmCreate(e.into()));
215                     }
216                 }
217             }
218             break;
219         }
220 
221         // Default Microsoft Hypervisor behavior for unimplemented MSR is to
222         // send a fault to the guest if it tries to access it. It is possible
223         // to override this behavior with a more suitable option i.e., ignore
224         // writes from the guest and return zero in attempt to read unimplemented
225         // MSR.
226         fd.set_partition_property(
227             hv_partition_property_code_HV_PARTITION_PROPERTY_UNIMPLEMENTED_MSR_ACTION,
228             hv_unimplemented_msr_action_HV_UNIMPLEMENTED_MSR_ACTION_IGNORE_WRITE_READ_ZERO as u64,
229         )
230         .map_err(|e| hypervisor::HypervisorError::SetPartitionProperty(e.into()))?;
231 
232         let msr_list = self.get_msr_list()?;
233         let num_msrs = msr_list.as_fam_struct_ref().nmsrs as usize;
234         let mut msrs: Vec<MsrEntry> = vec![
235             MsrEntry {
236                 ..Default::default()
237             };
238             num_msrs
239         ];
240         let indices = msr_list.as_slice();
241         for (pos, index) in indices.iter().enumerate() {
242             msrs[pos].index = *index;
243         }
244         let vm_fd = Arc::new(fd);
245 
246         Ok(Arc::new(MshvVm {
247             fd: vm_fd,
248             msrs,
249             vm_ops: None,
250             dirty_log_slots: Arc::new(RwLock::new(HashMap::new())),
251         }))
252     }
253     ///
254     /// Get the supported CpuID
255     ///
256     fn get_cpuid(&self) -> hypervisor::Result<Vec<CpuIdEntry>> {
257         Ok(Vec::new())
258     }
259 }
260 
261 #[allow(dead_code)]
262 /// Vcpu struct for Microsoft Hypervisor
263 pub struct MshvVcpu {
264     fd: VcpuFd,
265     vp_index: u8,
266     cpuid: Vec<CpuIdEntry>,
267     msrs: Vec<MsrEntry>,
268     vm_ops: Option<Arc<dyn vm::VmOps>>,
269 }
270 
271 /// Implementation of Vcpu trait for Microsoft Hypervisor
272 /// Example:
273 /// #[cfg(feature = "mshv")]
274 /// extern crate hypervisor
275 /// let mshv = hypervisor::mshv::MshvHypervisor::new().unwrap();
276 /// let hypervisor: Arc<dyn hypervisor::Hypervisor> = Arc::new(mshv);
277 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed");
278 /// let vcpu = vm.create_vcpu(0).unwrap();
279 /// vcpu.get/set().unwrap()
280 ///
281 impl cpu::Vcpu for MshvVcpu {
282     #[cfg(target_arch = "x86_64")]
283     ///
284     /// Returns the vCPU general purpose registers.
285     ///
286     fn get_regs(&self) -> cpu::Result<StandardRegisters> {
287         Ok(self
288             .fd
289             .get_regs()
290             .map_err(|e| cpu::HypervisorCpuError::GetStandardRegs(e.into()))?
291             .into())
292     }
293     #[cfg(target_arch = "x86_64")]
294     ///
295     /// Sets the vCPU general purpose registers.
296     ///
297     fn set_regs(&self, regs: &StandardRegisters) -> cpu::Result<()> {
298         let regs = (*regs).into();
299         self.fd
300             .set_regs(&regs)
301             .map_err(|e| cpu::HypervisorCpuError::SetStandardRegs(e.into()))
302     }
303     #[cfg(target_arch = "x86_64")]
304     ///
305     /// Returns the vCPU special registers.
306     ///
307     fn get_sregs(&self) -> cpu::Result<SpecialRegisters> {
308         Ok(self
309             .fd
310             .get_sregs()
311             .map_err(|e| cpu::HypervisorCpuError::GetSpecialRegs(e.into()))?
312             .into())
313     }
314     #[cfg(target_arch = "x86_64")]
315     ///
316     /// Sets the vCPU special registers.
317     ///
318     fn set_sregs(&self, sregs: &SpecialRegisters) -> cpu::Result<()> {
319         let sregs = (*sregs).into();
320         self.fd
321             .set_sregs(&sregs)
322             .map_err(|e| cpu::HypervisorCpuError::SetSpecialRegs(e.into()))
323     }
324     #[cfg(target_arch = "x86_64")]
325     ///
326     /// Returns the floating point state (FPU) from the vCPU.
327     ///
328     fn get_fpu(&self) -> cpu::Result<FpuState> {
329         Ok(self
330             .fd
331             .get_fpu()
332             .map_err(|e| cpu::HypervisorCpuError::GetFloatingPointRegs(e.into()))?
333             .into())
334     }
335     #[cfg(target_arch = "x86_64")]
336     ///
337     /// Set the floating point state (FPU) of a vCPU.
338     ///
339     fn set_fpu(&self, fpu: &FpuState) -> cpu::Result<()> {
340         let fpu: mshv_bindings::FloatingPointUnit = (*fpu).clone().into();
341         self.fd
342             .set_fpu(&fpu)
343             .map_err(|e| cpu::HypervisorCpuError::SetFloatingPointRegs(e.into()))
344     }
345 
346     #[cfg(target_arch = "x86_64")]
347     ///
348     /// Returns the model-specific registers (MSR) for this vCPU.
349     ///
350     fn get_msrs(&self, msrs: &mut Vec<MsrEntry>) -> cpu::Result<usize> {
351         let mshv_msrs: Vec<msr_entry> = msrs.iter().map(|e| (*e).into()).collect();
352         let mut mshv_msrs = MsrEntries::from_entries(&mshv_msrs).unwrap();
353         let succ = self
354             .fd
355             .get_msrs(&mut mshv_msrs)
356             .map_err(|e| cpu::HypervisorCpuError::GetMsrEntries(e.into()))?;
357 
358         msrs[..succ].copy_from_slice(
359             &mshv_msrs.as_slice()[..succ]
360                 .iter()
361                 .map(|e| (*e).into())
362                 .collect::<Vec<MsrEntry>>(),
363         );
364 
365         Ok(succ)
366     }
367     #[cfg(target_arch = "x86_64")]
368     ///
369     /// Setup the model-specific registers (MSR) for this vCPU.
370     /// Returns the number of MSR entries actually written.
371     ///
372     fn set_msrs(&self, msrs: &[MsrEntry]) -> cpu::Result<usize> {
373         let mshv_msrs: Vec<msr_entry> = msrs.iter().map(|e| (*e).into()).collect();
374         let mshv_msrs = MsrEntries::from_entries(&mshv_msrs).unwrap();
375         self.fd
376             .set_msrs(&mshv_msrs)
377             .map_err(|e| cpu::HypervisorCpuError::SetMsrEntries(e.into()))
378     }
379 
380     #[cfg(target_arch = "x86_64")]
381     ///
382     /// X86 specific call to enable HyperV SynIC
383     ///
384     fn enable_hyperv_synic(&self) -> cpu::Result<()> {
385         /* We always have SynIC enabled on MSHV */
386         Ok(())
387     }
388     #[allow(non_upper_case_globals)]
389     fn run(&self) -> std::result::Result<cpu::VmExit, cpu::HypervisorCpuError> {
390         let hv_message: hv_message = hv_message::default();
391         match self.fd.run(hv_message) {
392             Ok(x) => match x.header.message_type {
393                 hv_message_type_HVMSG_X64_HALT => {
394                     debug!("HALT");
395                     Ok(cpu::VmExit::Reset)
396                 }
397                 hv_message_type_HVMSG_UNRECOVERABLE_EXCEPTION => {
398                     warn!("TRIPLE FAULT");
399                     Ok(cpu::VmExit::Shutdown)
400                 }
401                 hv_message_type_HVMSG_X64_IO_PORT_INTERCEPT => {
402                     let info = x.to_ioport_info().unwrap();
403                     let access_info = info.access_info;
404                     // SAFETY: access_info is valid, otherwise we won't be here
405                     let len = unsafe { access_info.__bindgen_anon_1.access_size() } as usize;
406                     let is_write = info.header.intercept_access_type == 1;
407                     let port = info.port_number;
408                     let mut data: [u8; 4] = [0; 4];
409                     let mut ret_rax = info.rax;
410 
411                     /*
412                      * XXX: Ignore QEMU fw_cfg (0x5xx) and debug console (0x402) ports.
413                      *
414                      * Cloud Hypervisor doesn't support fw_cfg at the moment. It does support 0x402
415                      * under the "fwdebug" feature flag. But that feature is not enabled by default
416                      * and is considered legacy.
417                      *
418                      * OVMF unconditionally pokes these IO ports with string IO.
419                      *
420                      * Instead of trying to implement string IO support now which does not do much
421                      * now, skip those ports explicitly to avoid panicking.
422                      *
423                      * Proper string IO support can be added once we gain the ability to translate
424                      * guest virtual addresses to guest physical addresses on MSHV.
425                      */
426                     match port {
427                         0x402 | 0x510 | 0x511 | 0x514 => {
428                             let insn_len = info.header.instruction_length() as u64;
429 
430                             /* Advance RIP and update RAX */
431                             let arr_reg_name_value = [
432                                 (
433                                     hv_register_name::HV_X64_REGISTER_RIP,
434                                     info.header.rip + insn_len,
435                                 ),
436                                 (hv_register_name::HV_X64_REGISTER_RAX, ret_rax),
437                             ];
438                             set_registers_64!(self.fd, arr_reg_name_value)
439                                 .map_err(|e| cpu::HypervisorCpuError::SetRegister(e.into()))?;
440                             return Ok(cpu::VmExit::Ignore);
441                         }
442                         _ => {}
443                     }
444 
445                     // SAFETY: access_info is valid, otherwise we won't be here
446                     assert!(
447                         (unsafe { access_info.__bindgen_anon_1.string_op() } != 1),
448                         "String IN/OUT not supported"
449                     );
450                     assert!(
451                         (unsafe { access_info.__bindgen_anon_1.rep_prefix() } != 1),
452                         "Rep IN/OUT not supported"
453                     );
454 
455                     if is_write {
456                         let data = (info.rax as u32).to_le_bytes();
457                         if let Some(vm_ops) = &self.vm_ops {
458                             vm_ops
459                                 .pio_write(port.into(), &data[0..len])
460                                 .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()))?;
461                         }
462                     } else {
463                         if let Some(vm_ops) = &self.vm_ops {
464                             vm_ops
465                                 .pio_read(port.into(), &mut data[0..len])
466                                 .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()))?;
467                         }
468 
469                         let v = u32::from_le_bytes(data);
470                         /* Preserve high bits in EAX but clear out high bits in RAX */
471                         let mask = 0xffffffff >> (32 - len * 8);
472                         let eax = (info.rax as u32 & !mask) | (v & mask);
473                         ret_rax = eax as u64;
474                     }
475 
476                     let insn_len = info.header.instruction_length() as u64;
477 
478                     /* Advance RIP and update RAX */
479                     let arr_reg_name_value = [
480                         (
481                             hv_register_name::HV_X64_REGISTER_RIP,
482                             info.header.rip + insn_len,
483                         ),
484                         (hv_register_name::HV_X64_REGISTER_RAX, ret_rax),
485                     ];
486                     set_registers_64!(self.fd, arr_reg_name_value)
487                         .map_err(|e| cpu::HypervisorCpuError::SetRegister(e.into()))?;
488                     Ok(cpu::VmExit::Ignore)
489                 }
490                 hv_message_type_HVMSG_UNMAPPED_GPA => {
491                     let info = x.to_memory_info().unwrap();
492                     let insn_len = info.instruction_byte_count as usize;
493                     assert!(insn_len > 0 && insn_len <= 16);
494 
495                     let mut context = MshvEmulatorContext {
496                         vcpu: self,
497                         map: (info.guest_virtual_address, info.guest_physical_address),
498                     };
499 
500                     // Create a new emulator.
501                     let mut emul = Emulator::new(&mut context);
502 
503                     // Emulate the trapped instruction, and only the first one.
504                     let new_state = emul
505                         .emulate_first_insn(self.vp_index as usize, &info.instruction_bytes)
506                         .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()))?;
507 
508                     // Set CPU state back.
509                     context
510                         .set_cpu_state(self.vp_index as usize, new_state)
511                         .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()))?;
512 
513                     Ok(cpu::VmExit::Ignore)
514                 }
515                 hv_message_type_HVMSG_X64_CPUID_INTERCEPT => {
516                     let info = x.to_cpuid_info().unwrap();
517                     debug!("cpuid eax: {:x}", { info.rax });
518                     Ok(cpu::VmExit::Ignore)
519                 }
520                 hv_message_type_HVMSG_X64_MSR_INTERCEPT => {
521                     let info = x.to_msr_info().unwrap();
522                     if info.header.intercept_access_type == 0 {
523                         debug!("msr read: {:x}", { info.msr_number });
524                     } else {
525                         debug!("msr write: {:x}", { info.msr_number });
526                     }
527                     Ok(cpu::VmExit::Ignore)
528                 }
529                 hv_message_type_HVMSG_X64_EXCEPTION_INTERCEPT => {
530                     //TODO: Handler for VMCALL here.
531                     let info = x.to_exception_info().unwrap();
532                     debug!("Exception Info {:?}", { info.exception_vector });
533                     Ok(cpu::VmExit::Ignore)
534                 }
535                 exit => Err(cpu::HypervisorCpuError::RunVcpu(anyhow!(
536                     "Unhandled VCPU exit {:?}",
537                     exit
538                 ))),
539             },
540 
541             Err(e) => match e.errno() {
542                 libc::EAGAIN | libc::EINTR => Ok(cpu::VmExit::Ignore),
543                 _ => Err(cpu::HypervisorCpuError::RunVcpu(anyhow!(
544                     "VCPU error {:?}",
545                     e
546                 ))),
547             },
548         }
549     }
550     #[cfg(target_arch = "x86_64")]
551     ///
552     /// X86 specific call to setup the CPUID registers.
553     ///
554     fn set_cpuid2(&self, _cpuid: &[CpuIdEntry]) -> cpu::Result<()> {
555         Ok(())
556     }
557     #[cfg(target_arch = "x86_64")]
558     ///
559     /// X86 specific call to retrieve the CPUID registers.
560     ///
561     fn get_cpuid2(&self, _num_entries: usize) -> cpu::Result<Vec<CpuIdEntry>> {
562         Ok(self.cpuid.clone())
563     }
564     #[cfg(target_arch = "x86_64")]
565     ///
566     /// Returns the state of the LAPIC (Local Advanced Programmable Interrupt Controller).
567     ///
568     fn get_lapic(&self) -> cpu::Result<LapicState> {
569         Ok(self
570             .fd
571             .get_lapic()
572             .map_err(|e| cpu::HypervisorCpuError::GetlapicState(e.into()))?
573             .into())
574     }
575     #[cfg(target_arch = "x86_64")]
576     ///
577     /// Sets the state of the LAPIC (Local Advanced Programmable Interrupt Controller).
578     ///
579     fn set_lapic(&self, lapic: &LapicState) -> cpu::Result<()> {
580         let lapic: mshv_bindings::LapicState = (*lapic).clone().into();
581         self.fd
582             .set_lapic(&lapic)
583             .map_err(|e| cpu::HypervisorCpuError::SetLapicState(e.into()))
584     }
585     ///
586     /// Returns the vcpu's current "multiprocessing state".
587     ///
588     fn get_mp_state(&self) -> cpu::Result<MpState> {
589         Ok(MpState::Mshv)
590     }
591     ///
592     /// Sets the vcpu's current "multiprocessing state".
593     ///
594     fn set_mp_state(&self, _mp_state: MpState) -> cpu::Result<()> {
595         Ok(())
596     }
597     ///
598     /// Set CPU state
599     ///
600     fn set_state(&self, state: &CpuState) -> cpu::Result<()> {
601         let state: VcpuMshvState = state.clone().into();
602         self.set_msrs(&state.msrs)?;
603         self.set_vcpu_events(&state.vcpu_events)?;
604         self.set_regs(&state.regs.into())?;
605         self.set_sregs(&state.sregs.into())?;
606         self.set_fpu(&state.fpu)?;
607         self.set_xcrs(&state.xcrs)?;
608         self.set_lapic(&state.lapic)?;
609         self.set_xsave(&state.xsave)?;
610         // These registers are global and needed to be set only for first VCPU
611         // as Microsoft Hypervisor allows setting this regsier for only one VCPU
612         if self.vp_index == 0 {
613             self.fd
614                 .set_misc_regs(&state.misc)
615                 .map_err(|e| cpu::HypervisorCpuError::SetMiscRegs(e.into()))?
616         }
617         self.fd
618             .set_debug_regs(&state.dbg)
619             .map_err(|e| cpu::HypervisorCpuError::SetDebugRegs(e.into()))?;
620         Ok(())
621     }
622     ///
623     /// Get CPU State
624     ///
625     fn state(&self) -> cpu::Result<CpuState> {
626         let regs = self.get_regs()?;
627         let sregs = self.get_sregs()?;
628         let xcrs = self.get_xcrs()?;
629         let fpu = self.get_fpu()?;
630         let vcpu_events = self.get_vcpu_events()?;
631         let mut msrs = self.msrs.clone();
632         self.get_msrs(&mut msrs)?;
633         let lapic = self.get_lapic()?;
634         let xsave = self.get_xsave()?;
635         let misc = self
636             .fd
637             .get_misc_regs()
638             .map_err(|e| cpu::HypervisorCpuError::GetMiscRegs(e.into()))?;
639         let dbg = self
640             .fd
641             .get_debug_regs()
642             .map_err(|e| cpu::HypervisorCpuError::GetDebugRegs(e.into()))?;
643 
644         Ok(VcpuMshvState {
645             msrs,
646             vcpu_events,
647             regs: regs.into(),
648             sregs: sregs.into(),
649             fpu,
650             xcrs,
651             lapic,
652             dbg,
653             xsave,
654             misc,
655         }
656         .into())
657     }
658     #[cfg(target_arch = "x86_64")]
659     ///
660     /// Translate guest virtual address to guest physical address
661     ///
662     fn translate_gva(&self, gva: u64, flags: u64) -> cpu::Result<(u64, u32)> {
663         let r = self
664             .fd
665             .translate_gva(gva, flags)
666             .map_err(|e| cpu::HypervisorCpuError::TranslateVirtualAddress(e.into()))?;
667 
668         let gpa = r.0;
669         // SAFETY: r is valid, otherwise this function will have returned
670         let result_code = unsafe { r.1.__bindgen_anon_1.result_code };
671 
672         Ok((gpa, result_code))
673     }
674     #[cfg(target_arch = "x86_64")]
675     ///
676     /// Return the list of initial MSR entries for a VCPU
677     ///
678     fn boot_msr_entries(&self) -> Vec<MsrEntry> {
679         use crate::arch::x86::{msr_index, MTRR_ENABLE, MTRR_MEM_TYPE_WB};
680 
681         [
682             msr!(msr_index::MSR_IA32_SYSENTER_CS),
683             msr!(msr_index::MSR_IA32_SYSENTER_ESP),
684             msr!(msr_index::MSR_IA32_SYSENTER_EIP),
685             msr!(msr_index::MSR_STAR),
686             msr!(msr_index::MSR_CSTAR),
687             msr!(msr_index::MSR_LSTAR),
688             msr!(msr_index::MSR_KERNEL_GS_BASE),
689             msr!(msr_index::MSR_SYSCALL_MASK),
690             msr!(msr_index::MSR_IA32_TSC),
691             msr_data!(msr_index::MSR_MTRRdefType, MTRR_ENABLE | MTRR_MEM_TYPE_WB),
692         ]
693         .to_vec()
694     }
695 }
696 
697 impl MshvVcpu {
698     #[cfg(target_arch = "x86_64")]
699     ///
700     /// X86 specific call that returns the vcpu's current "xsave struct".
701     ///
702     fn get_xsave(&self) -> cpu::Result<Xsave> {
703         self.fd
704             .get_xsave()
705             .map_err(|e| cpu::HypervisorCpuError::GetXsaveState(e.into()))
706     }
707     #[cfg(target_arch = "x86_64")]
708     ///
709     /// X86 specific call that sets the vcpu's current "xsave struct".
710     ///
711     fn set_xsave(&self, xsave: &Xsave) -> cpu::Result<()> {
712         self.fd
713             .set_xsave(xsave)
714             .map_err(|e| cpu::HypervisorCpuError::SetXsaveState(e.into()))
715     }
716     #[cfg(target_arch = "x86_64")]
717     ///
718     /// X86 specific call that returns the vcpu's current "xcrs".
719     ///
720     fn get_xcrs(&self) -> cpu::Result<ExtendedControlRegisters> {
721         self.fd
722             .get_xcrs()
723             .map_err(|e| cpu::HypervisorCpuError::GetXcsr(e.into()))
724     }
725     #[cfg(target_arch = "x86_64")]
726     ///
727     /// X86 specific call that sets the vcpu's current "xcrs".
728     ///
729     fn set_xcrs(&self, xcrs: &ExtendedControlRegisters) -> cpu::Result<()> {
730         self.fd
731             .set_xcrs(xcrs)
732             .map_err(|e| cpu::HypervisorCpuError::SetXcsr(e.into()))
733     }
734     #[cfg(target_arch = "x86_64")]
735     ///
736     /// Returns currently pending exceptions, interrupts, and NMIs as well as related
737     /// states of the vcpu.
738     ///
739     fn get_vcpu_events(&self) -> cpu::Result<VcpuEvents> {
740         self.fd
741             .get_vcpu_events()
742             .map_err(|e| cpu::HypervisorCpuError::GetVcpuEvents(e.into()))
743     }
744     #[cfg(target_arch = "x86_64")]
745     ///
746     /// Sets pending exceptions, interrupts, and NMIs as well as related states
747     /// of the vcpu.
748     ///
749     fn set_vcpu_events(&self, events: &VcpuEvents) -> cpu::Result<()> {
750         self.fd
751             .set_vcpu_events(events)
752             .map_err(|e| cpu::HypervisorCpuError::SetVcpuEvents(e.into()))
753     }
754 }
755 
756 /// Device struct for MSHV
757 pub type MshvDevice = DeviceFd;
758 
759 impl device::Device for MshvDevice {
760     ///
761     /// Set device attribute
762     ///
763     fn set_device_attr(&self, attr: &DeviceAttr) -> device::Result<()> {
764         self.set_device_attr(attr)
765             .map_err(|e| device::HypervisorDeviceError::SetDeviceAttribute(e.into()))
766     }
767     ///
768     /// Get device attribute
769     ///
770     fn get_device_attr(&self, attr: &mut DeviceAttr) -> device::Result<()> {
771         self.get_device_attr(attr)
772             .map_err(|e| device::HypervisorDeviceError::GetDeviceAttribute(e.into()))
773     }
774     ///
775     /// Cast to the underlying MSHV device fd
776     ///
777     fn as_any(&self) -> &dyn Any {
778         self
779     }
780 }
781 
782 struct MshvEmulatorContext<'a> {
783     vcpu: &'a MshvVcpu,
784     map: (u64, u64), // Initial GVA to GPA mapping provided by the hypervisor
785 }
786 
787 impl<'a> MshvEmulatorContext<'a> {
788     // Do the actual gva -> gpa translation
789     #[allow(non_upper_case_globals)]
790     fn translate(&self, gva: u64) -> Result<u64, PlatformError> {
791         if self.map.0 == gva {
792             return Ok(self.map.1);
793         }
794 
795         // TODO: More fine-grained control for the flags
796         let flags = HV_TRANSLATE_GVA_VALIDATE_READ | HV_TRANSLATE_GVA_VALIDATE_WRITE;
797 
798         let (gpa, result_code) = self
799             .vcpu
800             .translate_gva(gva, flags.into())
801             .map_err(|e| PlatformError::TranslateVirtualAddress(anyhow!(e)))?;
802 
803         match result_code {
804             hv_translate_gva_result_code_HV_TRANSLATE_GVA_SUCCESS => Ok(gpa),
805             _ => Err(PlatformError::TranslateVirtualAddress(anyhow!(result_code))),
806         }
807     }
808 }
809 
810 /// Platform emulation for Hyper-V
811 impl<'a> PlatformEmulator for MshvEmulatorContext<'a> {
812     type CpuState = EmulatorCpuState;
813 
814     fn read_memory(&self, gva: u64, data: &mut [u8]) -> Result<(), PlatformError> {
815         let gpa = self.translate(gva)?;
816         debug!(
817             "mshv emulator: memory read {} bytes from [{:#x} -> {:#x}]",
818             data.len(),
819             gva,
820             gpa
821         );
822 
823         if let Some(vm_ops) = &self.vcpu.vm_ops {
824             if vm_ops.guest_mem_read(gpa, data).is_err() {
825                 vm_ops
826                     .mmio_read(gpa, data)
827                     .map_err(|e| PlatformError::MemoryReadFailure(e.into()))?;
828             }
829         }
830 
831         Ok(())
832     }
833 
834     fn write_memory(&mut self, gva: u64, data: &[u8]) -> Result<(), PlatformError> {
835         let gpa = self.translate(gva)?;
836         debug!(
837             "mshv emulator: memory write {} bytes at [{:#x} -> {:#x}]",
838             data.len(),
839             gva,
840             gpa
841         );
842 
843         if let Some(vm_ops) = &self.vcpu.vm_ops {
844             if vm_ops.guest_mem_write(gpa, data).is_err() {
845                 vm_ops
846                     .mmio_write(gpa, data)
847                     .map_err(|e| PlatformError::MemoryWriteFailure(e.into()))?;
848             }
849         }
850 
851         Ok(())
852     }
853 
854     fn cpu_state(&self, cpu_id: usize) -> Result<Self::CpuState, PlatformError> {
855         if cpu_id != self.vcpu.vp_index as usize {
856             return Err(PlatformError::GetCpuStateFailure(anyhow!(
857                 "CPU id mismatch {:?} {:?}",
858                 cpu_id,
859                 self.vcpu.vp_index
860             )));
861         }
862 
863         let regs = self
864             .vcpu
865             .get_regs()
866             .map_err(|e| PlatformError::GetCpuStateFailure(e.into()))?;
867         let sregs = self
868             .vcpu
869             .get_sregs()
870             .map_err(|e| PlatformError::GetCpuStateFailure(e.into()))?;
871 
872         debug!("mshv emulator: Getting new CPU state");
873         debug!("mshv emulator: {:#x?}", regs);
874 
875         Ok(EmulatorCpuState { regs, sregs })
876     }
877 
878     fn set_cpu_state(&self, cpu_id: usize, state: Self::CpuState) -> Result<(), PlatformError> {
879         if cpu_id != self.vcpu.vp_index as usize {
880             return Err(PlatformError::SetCpuStateFailure(anyhow!(
881                 "CPU id mismatch {:?} {:?}",
882                 cpu_id,
883                 self.vcpu.vp_index
884             )));
885         }
886 
887         debug!("mshv emulator: Setting new CPU state");
888         debug!("mshv emulator: {:#x?}", state.regs);
889 
890         self.vcpu
891             .set_regs(&state.regs)
892             .map_err(|e| PlatformError::SetCpuStateFailure(e.into()))?;
893         self.vcpu
894             .set_sregs(&state.sregs)
895             .map_err(|e| PlatformError::SetCpuStateFailure(e.into()))
896     }
897 
898     fn gva_to_gpa(&self, gva: u64) -> Result<u64, PlatformError> {
899         self.translate(gva)
900     }
901 
902     fn fetch(&self, _ip: u64, _instruction_bytes: &mut [u8]) -> Result<(), PlatformError> {
903         Err(PlatformError::MemoryReadFailure(anyhow!("unimplemented")))
904     }
905 }
906 
907 #[allow(dead_code)]
908 /// Wrapper over Mshv VM ioctls.
909 pub struct MshvVm {
910     fd: Arc<VmFd>,
911     msrs: Vec<MsrEntry>,
912     vm_ops: Option<Arc<dyn vm::VmOps>>,
913     dirty_log_slots: Arc<RwLock<HashMap<u64, MshvDirtyLogSlot>>>,
914 }
915 
916 ///
917 /// Implementation of Vm trait for Mshv
918 /// Example:
919 /// #[cfg(feature = "mshv")]
920 /// # extern crate hypervisor;
921 /// # use hypervisor::MshvHypervisor;
922 /// let mshv = MshvHypervisor::new().unwrap();
923 /// let hypervisor: Arc<dyn hypervisor::Hypervisor> = Arc::new(mshv);
924 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed");
925 /// vm.set/get().unwrap()
926 ///
927 impl vm::Vm for MshvVm {
928     #[cfg(target_arch = "x86_64")]
929     ///
930     /// Sets the address of the one-page region in the VM's address space.
931     ///
932     fn set_identity_map_address(&self, _address: u64) -> vm::Result<()> {
933         Ok(())
934     }
935     #[cfg(target_arch = "x86_64")]
936     ///
937     /// Sets the address of the three-page region in the VM's address space.
938     ///
939     fn set_tss_address(&self, _offset: usize) -> vm::Result<()> {
940         Ok(())
941     }
942     ///
943     /// Creates an in-kernel interrupt controller.
944     ///
945     fn create_irq_chip(&self) -> vm::Result<()> {
946         Ok(())
947     }
948     ///
949     /// Registers an event that will, when signaled, trigger the `gsi` IRQ.
950     ///
951     fn register_irqfd(&self, fd: &EventFd, gsi: u32) -> vm::Result<()> {
952         debug!("register_irqfd fd {} gsi {}", fd.as_raw_fd(), gsi);
953 
954         self.fd
955             .register_irqfd(fd, gsi)
956             .map_err(|e| vm::HypervisorVmError::RegisterIrqFd(e.into()))?;
957 
958         Ok(())
959     }
960     ///
961     /// Unregisters an event that will, when signaled, trigger the `gsi` IRQ.
962     ///
963     fn unregister_irqfd(&self, fd: &EventFd, gsi: u32) -> vm::Result<()> {
964         debug!("unregister_irqfd fd {} gsi {}", fd.as_raw_fd(), gsi);
965 
966         self.fd
967             .unregister_irqfd(fd, gsi)
968             .map_err(|e| vm::HypervisorVmError::UnregisterIrqFd(e.into()))?;
969 
970         Ok(())
971     }
972     ///
973     /// Creates a VcpuFd object from a vcpu RawFd.
974     ///
975     fn create_vcpu(
976         &self,
977         id: u8,
978         vm_ops: Option<Arc<dyn VmOps>>,
979     ) -> vm::Result<Arc<dyn cpu::Vcpu>> {
980         let vcpu_fd = self
981             .fd
982             .create_vcpu(id)
983             .map_err(|e| vm::HypervisorVmError::CreateVcpu(e.into()))?;
984         let vcpu = MshvVcpu {
985             fd: vcpu_fd,
986             vp_index: id,
987             cpuid: Vec::new(),
988             msrs: self.msrs.clone(),
989             vm_ops,
990         };
991         Ok(Arc::new(vcpu))
992     }
993     #[cfg(target_arch = "x86_64")]
994     fn enable_split_irq(&self) -> vm::Result<()> {
995         Ok(())
996     }
997     #[cfg(target_arch = "x86_64")]
998     fn enable_sgx_attribute(&self, _file: File) -> vm::Result<()> {
999         Ok(())
1000     }
1001     fn register_ioevent(
1002         &self,
1003         fd: &EventFd,
1004         addr: &IoEventAddress,
1005         datamatch: Option<DataMatch>,
1006     ) -> vm::Result<()> {
1007         let addr = &mshv_ioctls::IoEventAddress::from(*addr);
1008         debug!(
1009             "register_ioevent fd {} addr {:x?} datamatch {:?}",
1010             fd.as_raw_fd(),
1011             addr,
1012             datamatch
1013         );
1014         if let Some(dm) = datamatch {
1015             match dm {
1016                 vm::DataMatch::DataMatch32(mshv_dm32) => self
1017                     .fd
1018                     .register_ioevent(fd, addr, mshv_dm32)
1019                     .map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into())),
1020                 vm::DataMatch::DataMatch64(mshv_dm64) => self
1021                     .fd
1022                     .register_ioevent(fd, addr, mshv_dm64)
1023                     .map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into())),
1024             }
1025         } else {
1026             self.fd
1027                 .register_ioevent(fd, addr, NoDatamatch)
1028                 .map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into()))
1029         }
1030     }
1031     /// Unregister an event from a certain address it has been previously registered to.
1032     fn unregister_ioevent(&self, fd: &EventFd, addr: &IoEventAddress) -> vm::Result<()> {
1033         let addr = &mshv_ioctls::IoEventAddress::from(*addr);
1034         debug!("unregister_ioevent fd {} addr {:x?}", fd.as_raw_fd(), addr);
1035 
1036         self.fd
1037             .unregister_ioevent(fd, addr, NoDatamatch)
1038             .map_err(|e| vm::HypervisorVmError::UnregisterIoEvent(e.into()))
1039     }
1040 
1041     /// Creates a guest physical memory region.
1042     fn create_user_memory_region(&self, user_memory_region: UserMemoryRegion) -> vm::Result<()> {
1043         let user_memory_region: mshv_user_mem_region = user_memory_region.into();
1044         // No matter read only or not we keep track the slots.
1045         // For readonly hypervisor can enable the dirty bits,
1046         // but a VM exit happens before setting the dirty bits
1047         self.dirty_log_slots.write().unwrap().insert(
1048             user_memory_region.guest_pfn,
1049             MshvDirtyLogSlot {
1050                 guest_pfn: user_memory_region.guest_pfn,
1051                 memory_size: user_memory_region.size,
1052             },
1053         );
1054 
1055         self.fd
1056             .map_user_memory(user_memory_region)
1057             .map_err(|e| vm::HypervisorVmError::CreateUserMemory(e.into()))?;
1058         Ok(())
1059     }
1060 
1061     /// Removes a guest physical memory region.
1062     fn remove_user_memory_region(&self, user_memory_region: UserMemoryRegion) -> vm::Result<()> {
1063         let user_memory_region: mshv_user_mem_region = user_memory_region.into();
1064         // Remove the corresponding entry from "self.dirty_log_slots" if needed
1065         self.dirty_log_slots
1066             .write()
1067             .unwrap()
1068             .remove(&user_memory_region.guest_pfn);
1069 
1070         self.fd
1071             .unmap_user_memory(user_memory_region)
1072             .map_err(|e| vm::HypervisorVmError::RemoveUserMemory(e.into()))?;
1073         Ok(())
1074     }
1075 
1076     fn make_user_memory_region(
1077         &self,
1078         _slot: u32,
1079         guest_phys_addr: u64,
1080         memory_size: u64,
1081         userspace_addr: u64,
1082         readonly: bool,
1083         _log_dirty_pages: bool,
1084     ) -> UserMemoryRegion {
1085         let mut flags = HV_MAP_GPA_READABLE | HV_MAP_GPA_EXECUTABLE;
1086         if !readonly {
1087             flags |= HV_MAP_GPA_WRITABLE;
1088         }
1089 
1090         mshv_user_mem_region {
1091             flags,
1092             guest_pfn: guest_phys_addr >> PAGE_SHIFT,
1093             size: memory_size,
1094             userspace_addr: userspace_addr as u64,
1095         }
1096         .into()
1097     }
1098 
1099     ///
1100     /// Creates an in-kernel device.
1101     ///
1102     /// See the documentation for `MSHV_CREATE_DEVICE`.
1103     fn create_device(&self, device: &mut CreateDevice) -> vm::Result<Arc<dyn device::Device>> {
1104         let device_fd = self
1105             .fd
1106             .create_device(device)
1107             .map_err(|e| vm::HypervisorVmError::CreateDevice(e.into()))?;
1108         Ok(Arc::new(device_fd))
1109     }
1110 
1111     fn create_passthrough_device(&self) -> vm::Result<Arc<dyn device::Device>> {
1112         let mut vfio_dev = mshv_create_device {
1113             type_: mshv_device_type_MSHV_DEV_TYPE_VFIO,
1114             fd: 0,
1115             flags: 0,
1116         };
1117 
1118         self.create_device(&mut vfio_dev)
1119             .map_err(|e| vm::HypervisorVmError::CreatePassthroughDevice(e.into()))
1120     }
1121 
1122     ///
1123     /// Constructs a routing entry
1124     ///
1125     fn make_routing_entry(&self, gsi: u32, config: &InterruptSourceConfig) -> IrqRoutingEntry {
1126         match config {
1127             InterruptSourceConfig::MsiIrq(cfg) => mshv_msi_routing_entry {
1128                 gsi,
1129                 address_lo: cfg.low_addr,
1130                 address_hi: cfg.high_addr,
1131                 data: cfg.data,
1132             }
1133             .into(),
1134             _ => {
1135                 unreachable!()
1136             }
1137         }
1138     }
1139 
1140     fn set_gsi_routing(&self, entries: &[IrqRoutingEntry]) -> vm::Result<()> {
1141         let mut msi_routing =
1142             vec_with_array_field::<mshv_msi_routing, mshv_msi_routing_entry>(entries.len());
1143         msi_routing[0].nr = entries.len() as u32;
1144 
1145         let entries: Vec<mshv_msi_routing_entry> = entries
1146             .iter()
1147             .map(|entry| match entry {
1148                 IrqRoutingEntry::Mshv(e) => *e,
1149                 #[allow(unreachable_patterns)]
1150                 _ => panic!("IrqRoutingEntry type is wrong"),
1151             })
1152             .collect();
1153 
1154         // SAFETY: msi_routing initialized with entries.len() and now it is being turned into
1155         // entries_slice with entries.len() again. It is guaranteed to be large enough to hold
1156         // everything from entries.
1157         unsafe {
1158             let entries_slice: &mut [mshv_msi_routing_entry] =
1159                 msi_routing[0].entries.as_mut_slice(entries.len());
1160             entries_slice.copy_from_slice(&entries);
1161         }
1162 
1163         self.fd
1164             .set_msi_routing(&msi_routing[0])
1165             .map_err(|e| vm::HypervisorVmError::SetGsiRouting(e.into()))
1166     }
1167     ///
1168     /// Start logging dirty pages
1169     ///
1170     fn start_dirty_log(&self) -> vm::Result<()> {
1171         self.fd
1172             .enable_dirty_page_tracking()
1173             .map_err(|e| vm::HypervisorVmError::StartDirtyLog(e.into()))
1174     }
1175     ///
1176     /// Stop logging dirty pages
1177     ///
1178     fn stop_dirty_log(&self) -> vm::Result<()> {
1179         let dirty_log_slots = self.dirty_log_slots.read().unwrap();
1180         // Before disabling the dirty page tracking we need
1181         // to set the dirty bits in the Hypervisor
1182         // This is a requirement from Microsoft Hypervisor
1183         for (_, s) in dirty_log_slots.iter() {
1184             self.fd
1185                 .get_dirty_log(s.guest_pfn, s.memory_size as usize, DIRTY_BITMAP_SET_DIRTY)
1186                 .map_err(|e| vm::HypervisorVmError::StartDirtyLog(e.into()))?;
1187         }
1188         self.fd
1189             .disable_dirty_page_tracking()
1190             .map_err(|e| vm::HypervisorVmError::StartDirtyLog(e.into()))?;
1191         Ok(())
1192     }
1193     ///
1194     /// Get dirty pages bitmap (one bit per page)
1195     ///
1196     fn get_dirty_log(&self, _slot: u32, base_gpa: u64, memory_size: u64) -> vm::Result<Vec<u64>> {
1197         self.fd
1198             .get_dirty_log(
1199                 base_gpa >> PAGE_SHIFT,
1200                 memory_size as usize,
1201                 DIRTY_BITMAP_CLEAR_DIRTY,
1202             )
1203             .map_err(|e| vm::HypervisorVmError::GetDirtyLog(e.into()))
1204     }
1205     /// Retrieve guest clock.
1206     #[cfg(target_arch = "x86_64")]
1207     fn get_clock(&self) -> vm::Result<ClockData> {
1208         Ok(ClockData::Mshv)
1209     }
1210     /// Set guest clock.
1211     #[cfg(target_arch = "x86_64")]
1212     fn set_clock(&self, _data: &ClockData) -> vm::Result<()> {
1213         Ok(())
1214     }
1215 }
1216