xref: /cloud-hypervisor/hypervisor/src/mshv/mod.rs (revision ef3fad838885c436324c54c94d6f5b5112f18325)
1 // SPDX-License-Identifier: Apache-2.0 OR BSD-3-Clause
2 //
3 // Copyright © 2020, Microsoft Corporation
4 //
5 
6 #![allow(dead_code)]
7 #![allow(unused_imports)]
8 #![allow(unused_variables)]
9 #![allow(unused_macros)]
10 #![allow(non_upper_case_globals)]
11 
12 use crate::arch::emulator::{EmulationError, PlatformEmulator, PlatformError};
13 #[cfg(target_arch = "x86_64")]
14 use crate::arch::x86::emulator::{Emulator, EmulatorCpuState};
15 use crate::cpu;
16 use crate::cpu::Vcpu;
17 use crate::hypervisor;
18 use crate::vm::{self, VmmOps};
19 pub use mshv_bindings::*;
20 use mshv_ioctls::{set_registers_64, InterruptRequest, Mshv, VcpuFd, VmFd};
21 use serde_derive::{Deserialize, Serialize};
22 use std::sync::Arc;
23 use vm::DataMatch;
24 // x86_64 dependencies
25 #[cfg(target_arch = "x86_64")]
26 pub mod x86_64;
27 use crate::device;
28 use std::convert::TryInto;
29 use vmm_sys_util::eventfd::EventFd;
30 #[cfg(target_arch = "x86_64")]
31 pub use x86_64::VcpuMshvState as CpuState;
32 #[cfg(target_arch = "x86_64")]
33 pub use x86_64::*;
34 // Wei: for emulating irqfd and ioeventfd
35 use std::collections::HashMap;
36 use std::fs::File;
37 use std::io;
38 use std::os::unix::io::{AsRawFd, FromRawFd, RawFd};
39 use std::sync::{Mutex, RwLock};
40 use std::thread;
41 
42 pub const PAGE_SHIFT: usize = 12;
43 
44 #[derive(Debug, Default, Copy, Clone, Serialize, Deserialize)]
45 pub struct HvState {
46     hypercall_page: u64,
47 }
48 
49 pub use HvState as VmState;
50 
51 struct IrqfdCtrlEpollHandler {
52     vm_fd: Arc<VmFd>, /* For issuing hypercall */
53     irqfd: EventFd,   /* Registered by caller */
54     kill: EventFd,    /* Created by us, signal thread exit */
55     epoll_fd: RawFd,  /* epoll fd */
56     gsi: u32,
57     gsi_routes: Arc<RwLock<HashMap<u32, MshvIrqRoutingEntry>>>,
58 }
59 
60 fn register_listener(
61     epoll_fd: RawFd,
62     fd: RawFd,
63     ev_type: epoll::Events,
64     data: u64,
65 ) -> std::result::Result<(), io::Error> {
66     epoll::ctl(
67         epoll_fd,
68         epoll::ControlOptions::EPOLL_CTL_ADD,
69         fd,
70         epoll::Event::new(ev_type, data),
71     )
72 }
73 
74 const KILL_EVENT: u16 = 1;
75 const IRQFD_EVENT: u16 = 2;
76 
77 impl IrqfdCtrlEpollHandler {
78     fn assert_virtual_interrupt(&self, e: &MshvIrqRoutingEntry) -> vm::Result<()> {
79         // GSI routing contains MSI information.
80         // We still need to translate that to APIC ID etc
81 
82         debug!("Inject {:x?}", e);
83 
84         let MshvIrqRouting::Msi(msi) = e.route;
85 
86         /* Make an assumption here ... */
87         if msi.address_hi != 0 {
88             panic!("MSI high address part is not zero");
89         }
90 
91         let typ = self
92             .get_interrupt_type(self.get_delivery_mode(msi.data))
93             .unwrap();
94         let apic_id = self.get_destination(msi.address_lo);
95         let vector = self.get_vector(msi.data);
96         let level_triggered = self.get_trigger_mode(msi.data);
97         let logical_destination_mode = self.get_destination_mode(msi.address_lo);
98 
99         debug!(
100             "{:x} {:x} {:x} {} {}",
101             typ, apic_id, vector, level_triggered, logical_destination_mode
102         );
103 
104         let request: InterruptRequest = InterruptRequest {
105             interrupt_type: typ,
106             apic_id,
107             vector: vector.into(),
108             level_triggered,
109             logical_destination_mode,
110             long_mode: false,
111         };
112 
113         self.vm_fd
114             .request_virtual_interrupt(&request)
115             .map_err(|e| vm::HypervisorVmError::AsserttVirtualInterrupt(e.into()))?;
116 
117         Ok(())
118     }
119     fn run_ctrl(&mut self) {
120         self.epoll_fd = epoll::create(true).unwrap();
121         let epoll_file = unsafe { File::from_raw_fd(self.epoll_fd) };
122 
123         register_listener(
124             epoll_file.as_raw_fd(),
125             self.kill.as_raw_fd(),
126             epoll::Events::EPOLLIN,
127             u64::from(KILL_EVENT),
128         )
129         .unwrap_or_else(|err| {
130             info!(
131                 "IrqfdCtrlEpollHandler: failed to register listener: {:?}",
132                 err
133             );
134         });
135 
136         register_listener(
137             epoll_file.as_raw_fd(),
138             self.irqfd.as_raw_fd(),
139             epoll::Events::EPOLLIN,
140             u64::from(IRQFD_EVENT),
141         )
142         .unwrap_or_else(|err| {
143             info!(
144                 "IrqfdCtrlEpollHandler: failed to register listener: {:?}",
145                 err
146             );
147         });
148 
149         let mut events = vec![epoll::Event::new(epoll::Events::empty(), 0); 2];
150 
151         'epoll: loop {
152             let num_events = match epoll::wait(epoll_file.as_raw_fd(), -1, &mut events[..]) {
153                 Ok(res) => res,
154                 Err(e) => {
155                     if e.kind() == std::io::ErrorKind::Interrupted {
156                         continue;
157                     }
158                     panic!("irqfd epoll ???");
159                 }
160             };
161 
162             for event in events.iter().take(num_events) {
163                 let ev_type = event.data as u16;
164 
165                 match ev_type {
166                     KILL_EVENT => {
167                         break 'epoll;
168                     }
169                     IRQFD_EVENT => {
170                         debug!("IRQFD_EVENT received, inject to guest");
171                         let _ = self.irqfd.read().unwrap();
172                         let gsi_routes = self.gsi_routes.read().unwrap();
173 
174                         if let Some(e) = gsi_routes.get(&self.gsi) {
175                             self.assert_virtual_interrupt(&e).unwrap();
176                         } else {
177                             debug!("No routing info found for GSI {}", self.gsi);
178                         }
179                     }
180                     _ => {
181                         error!("Unknown event");
182                     }
183                 }
184             }
185         }
186     }
187 
188     ///
189     /// See Intel SDM vol3 10.11.1
190     /// We assume APIC ID and Hyper-V Vcpu ID are the same value
191     ///
192 
193     fn get_destination(&self, message_address: u32) -> u64 {
194         ((message_address >> 12) & 0xff).into()
195     }
196 
197     fn get_destination_mode(&self, message_address: u32) -> bool {
198         if (message_address >> 2) & 0x1 == 0x1 {
199             return true;
200         }
201 
202         false
203     }
204 
205     fn get_vector(&self, message_data: u32) -> u8 {
206         (message_data & 0xff) as u8
207     }
208 
209     ///
210     ///  True means level triggered
211     ///
212     fn get_trigger_mode(&self, message_data: u32) -> bool {
213         if (message_data >> 15) & 0x1 == 0x1 {
214             return true;
215         }
216 
217         false
218     }
219 
220     fn get_delivery_mode(&self, message_data: u32) -> u8 {
221         ((message_data & 0x700) >> 8) as u8
222     }
223     ///
224     ///  Translate from architectural defined delivery mode to Hyper-V type
225     /// See Intel SDM vol3 10.11.2
226     ///
227     fn get_interrupt_type(&self, delivery_mode: u8) -> Option<hv_interrupt_type> {
228         match delivery_mode {
229             0 => Some(hv_interrupt_type_HV_X64_INTERRUPT_TYPE_FIXED),
230             1 => Some(hv_interrupt_type_HV_X64_INTERRUPT_TYPE_LOWESTPRIORITY),
231             2 => Some(hv_interrupt_type_HV_X64_INTERRUPT_TYPE_SMI),
232             4 => Some(hv_interrupt_type_HV_X64_INTERRUPT_TYPE_NMI),
233             5 => Some(hv_interrupt_type_HV_X64_INTERRUPT_TYPE_INIT),
234             7 => Some(hv_interrupt_type_HV_X64_INTERRUPT_TYPE_EXTINT),
235             _ => None,
236         }
237     }
238 }
239 
240 /// Wrapper over mshv system ioctls.
241 pub struct MshvHypervisor {
242     mshv: Mshv,
243 }
244 
245 impl MshvHypervisor {
246     /// Create a hypervisor based on Mshv
247     pub fn new() -> hypervisor::Result<MshvHypervisor> {
248         let mshv_obj =
249             Mshv::new().map_err(|e| hypervisor::HypervisorError::HypervisorCreate(e.into()))?;
250         Ok(MshvHypervisor { mshv: mshv_obj })
251     }
252 }
253 /// Implementation of Hypervisor trait for Mshv
254 /// Example:
255 /// #[cfg(feature = "mshv")]
256 /// extern crate hypervisor
257 /// let mshv = hypervisor::mshv::MshvHypervisor::new().unwrap();
258 /// let hypervisor: Arc<dyn hypervisor::Hypervisor> = Arc::new(mshv);
259 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed");
260 ///
261 impl hypervisor::Hypervisor for MshvHypervisor {
262     /// Create a mshv vm object and return the object as Vm trait object
263     /// Example
264     /// # extern crate hypervisor;
265     /// # use hypervisor::MshvHypervisor;
266     /// use hypervisor::MshvVm;
267     /// let hypervisor = MshvHypervisor::new().unwrap();
268     /// let vm = hypervisor.create_vm().unwrap()
269     ///
270     fn create_vm(&self) -> hypervisor::Result<Arc<dyn vm::Vm>> {
271         let fd: VmFd;
272         loop {
273             match self.mshv.create_vm() {
274                 Ok(res) => fd = res,
275                 Err(e) => {
276                     if e.errno() == libc::EINTR {
277                         // If the error returned is EINTR, which means the
278                         // ioctl has been interrupted, we have to retry as
279                         // this can't be considered as a regular error.
280                         continue;
281                     } else {
282                         return Err(hypervisor::HypervisorError::VmCreate(e.into()));
283                     }
284                 }
285             }
286             break;
287         }
288 
289         let msr_list = self.get_msr_list()?;
290         let num_msrs = msr_list.as_fam_struct_ref().nmsrs as usize;
291         let mut msrs = MsrEntries::new(num_msrs);
292         let indices = msr_list.as_slice();
293         let msr_entries = msrs.as_mut_slice();
294         for (pos, index) in indices.iter().enumerate() {
295             msr_entries[pos].index = *index;
296         }
297         let vm_fd = Arc::new(fd);
298 
299         let irqfds = Mutex::new(HashMap::new());
300         let ioeventfds = Arc::new(RwLock::new(HashMap::new()));
301         let gsi_routes = Arc::new(RwLock::new(HashMap::new()));
302 
303         Ok(Arc::new(MshvVm {
304             fd: vm_fd,
305             msrs,
306             irqfds,
307             ioeventfds,
308             gsi_routes,
309             hv_state: hv_state_init(),
310             vmmops: None,
311         }))
312     }
313     ///
314     /// Get the supported CpuID
315     ///
316     fn get_cpuid(&self) -> hypervisor::Result<CpuId> {
317         Ok(CpuId::new(1 as usize))
318     }
319     #[cfg(target_arch = "x86_64")]
320     ///
321     /// Retrieve the list of MSRs supported by KVM.
322     ///
323     fn get_msr_list(&self) -> hypervisor::Result<MsrList> {
324         self.mshv
325             .get_msr_index_list()
326             .map_err(|e| hypervisor::HypervisorError::GetMsrList(e.into()))
327     }
328 }
329 
330 #[derive(Clone)]
331 // A software emulated TLB.
332 // This is mostly used by the instruction emulator to cache gva to gpa translations
333 // passed from the hypervisor.
334 struct SoftTLB {
335     addr_map: HashMap<u64, u64>,
336 }
337 
338 impl SoftTLB {
339     fn new() -> SoftTLB {
340         SoftTLB {
341             addr_map: HashMap::new(),
342         }
343     }
344 
345     // Adds a gva -> gpa mapping into the TLB.
346     fn add_mapping(&mut self, gva: u64, gpa: u64) -> Result<(), PlatformError> {
347         *self.addr_map.entry(gva).or_insert(gpa) = gpa;
348         Ok(())
349     }
350 
351     // Do the actual gva -> gpa translation
352     fn translate(&self, gva: u64) -> Result<u64, PlatformError> {
353         self.addr_map
354             .get(&gva)
355             .ok_or_else(|| PlatformError::UnmappedGVA(anyhow!("{:#?}", gva)))
356             .map(|v| *v)
357 
358         // TODO Check if we could fallback to e.g. an hypercall for doing
359         // the translation for us.
360     }
361 
362     // FLush the TLB, all mappings are removed.
363     fn flush(&mut self) -> Result<(), PlatformError> {
364         self.addr_map.clear();
365 
366         Ok(())
367     }
368 }
369 
370 #[allow(clippy::type_complexity)]
371 /// Vcpu struct for Microsoft Hypervisor
372 pub struct MshvVcpu {
373     fd: VcpuFd,
374     vp_index: u8,
375     cpuid: CpuId,
376     msrs: MsrEntries,
377     ioeventfds: Arc<RwLock<HashMap<IoEventAddress, (Option<DataMatch>, EventFd)>>>,
378     gsi_routes: Arc<RwLock<HashMap<u32, MshvIrqRoutingEntry>>>,
379     hv_state: Arc<RwLock<HvState>>, // Mshv State
380     vmmops: Option<Arc<Box<dyn vm::VmmOps>>>,
381 }
382 
383 /// Implementation of Vcpu trait for Microsoft Hypervisor
384 /// Example:
385 /// #[cfg(feature = "mshv")]
386 /// extern crate hypervisor
387 /// let mshv = hypervisor::mshv::MshvHypervisor::new().unwrap();
388 /// let hypervisor: Arc<dyn hypervisor::Hypervisor> = Arc::new(mshv);
389 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed");
390 /// let vcpu = vm.create_vcpu(0).unwrap();
391 /// vcpu.get/set().unwrap()
392 ///
393 impl cpu::Vcpu for MshvVcpu {
394     #[cfg(target_arch = "x86_64")]
395     ///
396     /// Returns the vCPU general purpose registers.
397     ///
398     fn get_regs(&self) -> cpu::Result<StandardRegisters> {
399         self.fd
400             .get_regs()
401             .map_err(|e| cpu::HypervisorCpuError::GetStandardRegs(e.into()))
402     }
403     #[cfg(target_arch = "x86_64")]
404     ///
405     /// Sets the vCPU general purpose registers.
406     ///
407     fn set_regs(&self, regs: &StandardRegisters) -> cpu::Result<()> {
408         self.fd
409             .set_regs(regs)
410             .map_err(|e| cpu::HypervisorCpuError::SetStandardRegs(e.into()))
411     }
412     #[cfg(target_arch = "x86_64")]
413     ///
414     /// Returns the vCPU special registers.
415     ///
416     fn get_sregs(&self) -> cpu::Result<SpecialRegisters> {
417         self.fd
418             .get_sregs()
419             .map_err(|e| cpu::HypervisorCpuError::GetSpecialRegs(e.into()))
420     }
421     #[cfg(target_arch = "x86_64")]
422     ///
423     /// Sets the vCPU special registers.
424     ///
425     fn set_sregs(&self, sregs: &SpecialRegisters) -> cpu::Result<()> {
426         self.fd
427             .set_sregs(sregs)
428             .map_err(|e| cpu::HypervisorCpuError::SetSpecialRegs(e.into()))
429     }
430     #[cfg(target_arch = "x86_64")]
431     ///
432     /// Returns the floating point state (FPU) from the vCPU.
433     ///
434     fn get_fpu(&self) -> cpu::Result<FpuState> {
435         self.fd
436             .get_fpu()
437             .map_err(|e| cpu::HypervisorCpuError::GetFloatingPointRegs(e.into()))
438     }
439     #[cfg(target_arch = "x86_64")]
440     ///
441     /// Set the floating point state (FPU) of a vCPU.
442     ///
443     fn set_fpu(&self, fpu: &FpuState) -> cpu::Result<()> {
444         self.fd
445             .set_fpu(fpu)
446             .map_err(|e| cpu::HypervisorCpuError::SetFloatingPointRegs(e.into()))
447     }
448 
449     #[cfg(target_arch = "x86_64")]
450     ///
451     /// Returns the model-specific registers (MSR) for this vCPU.
452     ///
453     fn get_msrs(&self, msrs: &mut MsrEntries) -> cpu::Result<usize> {
454         self.fd
455             .get_msrs(msrs)
456             .map_err(|e| cpu::HypervisorCpuError::GetMsrEntries(e.into()))
457     }
458     #[cfg(target_arch = "x86_64")]
459     ///
460     /// Setup the model-specific registers (MSR) for this vCPU.
461     /// Returns the number of MSR entries actually written.
462     ///
463     fn set_msrs(&self, msrs: &MsrEntries) -> cpu::Result<usize> {
464         self.fd
465             .set_msrs(msrs)
466             .map_err(|e| cpu::HypervisorCpuError::SetMsrEntries(e.into()))
467     }
468 
469     #[cfg(target_arch = "x86_64")]
470     ///
471     /// X86 specific call that returns the vcpu's current "xcrs".
472     ///
473     fn get_xcrs(&self) -> cpu::Result<ExtendedControlRegisters> {
474         self.fd
475             .get_xcrs()
476             .map_err(|e| cpu::HypervisorCpuError::GetXcsr(e.into()))
477     }
478     #[cfg(target_arch = "x86_64")]
479     ///
480     /// X86 specific call that sets the vcpu's current "xcrs".
481     ///
482     fn set_xcrs(&self, xcrs: &ExtendedControlRegisters) -> cpu::Result<()> {
483         self.fd
484             .set_xcrs(&xcrs)
485             .map_err(|e| cpu::HypervisorCpuError::SetXcsr(e.into()))
486     }
487     #[cfg(target_arch = "x86_64")]
488     ///
489     /// Returns currently pending exceptions, interrupts, and NMIs as well as related
490     /// states of the vcpu.
491     ///
492     fn get_vcpu_events(&self) -> cpu::Result<VcpuEvents> {
493         self.fd
494             .get_vcpu_events()
495             .map_err(|e| cpu::HypervisorCpuError::GetVcpuEvents(e.into()))
496     }
497     #[cfg(target_arch = "x86_64")]
498     ///
499     /// Sets pending exceptions, interrupts, and NMIs as well as related states
500     /// of the vcpu.
501     ///
502     fn set_vcpu_events(&self, events: &VcpuEvents) -> cpu::Result<()> {
503         self.fd
504             .set_vcpu_events(events)
505             .map_err(|e| cpu::HypervisorCpuError::SetVcpuEvents(e.into()))
506     }
507     #[cfg(target_arch = "x86_64")]
508     ///
509     /// X86 specific call to enable HyperV SynIC
510     ///
511     fn enable_hyperv_synic(&self) -> cpu::Result<()> {
512         /* We always have SynIC enabled on MSHV */
513         Ok(())
514     }
515     fn run(&self) -> std::result::Result<cpu::VmExit, cpu::HypervisorCpuError> {
516         // Safe because this is just only done during initialization.
517         // TODO don't zero it everytime we enter this function.
518         let hv_message: hv_message = unsafe { std::mem::zeroed() };
519         match self.fd.run(hv_message) {
520             Ok(x) => match x.header.message_type {
521                 hv_message_type_HVMSG_X64_HALT => {
522                     debug!("HALT");
523                     Ok(cpu::VmExit::Reset)
524                 }
525                 hv_message_type_HVMSG_UNRECOVERABLE_EXCEPTION => {
526                     warn!("TRIPLE FAULT");
527                     Ok(cpu::VmExit::Shutdown)
528                 }
529                 hv_message_type_HVMSG_X64_IO_PORT_INTERCEPT => {
530                     let info = x.to_ioport_info().unwrap();
531                     let access_info = info.access_info;
532                     if unsafe { access_info.__bindgen_anon_1.string_op() } == 1 {
533                         panic!("String IN/OUT not supported");
534                     }
535                     if unsafe { access_info.__bindgen_anon_1.rep_prefix() } == 1 {
536                         panic!("Rep IN/OUT not supported");
537                     }
538                     let len = unsafe { access_info.__bindgen_anon_1.access_size() } as usize;
539                     let is_write = info.header.intercept_access_type == 1;
540                     let port = info.port_number;
541                     let mut data: [u8; 4] = [0; 4];
542                     let mut ret_rax = info.rax;
543 
544                     if is_write {
545                         let data = (info.rax as u32).to_le_bytes();
546                         if let Some(vmmops) = &self.vmmops {
547                             vmmops
548                                 .pio_write(port.into(), &data[0..len])
549                                 .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()))?;
550                         }
551                     } else {
552                         if let Some(vmmops) = &self.vmmops {
553                             vmmops
554                                 .pio_read(port.into(), &mut data[0..len])
555                                 .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()))?;
556                         }
557 
558                         let v = u32::from_le_bytes(data);
559                         /* Preserve high bits in EAX but clear out high bits in RAX */
560                         let mask = 0xffffffff >> (32 - len * 8);
561                         let eax = (info.rax as u32 & !mask) | (v & mask);
562                         ret_rax = eax as u64;
563                     }
564 
565                     let insn_len = info.header.instruction_length() as u64;
566 
567                     /* Advance RIP and update RAX */
568                     let arr_reg_name_value = [
569                         (
570                             hv_register_name::HV_X64_REGISTER_RIP,
571                             info.header.rip + insn_len,
572                         ),
573                         (hv_register_name::HV_X64_REGISTER_RAX, ret_rax),
574                     ];
575                     set_registers_64!(self.fd, arr_reg_name_value)
576                         .map_err(|e| cpu::HypervisorCpuError::SetRegister(e.into()))?;
577                     Ok(cpu::VmExit::Ignore)
578                 }
579                 hv_message_type_HVMSG_UNMAPPED_GPA => {
580                     let info = x.to_memory_info().unwrap();
581                     let insn_len = info.instruction_byte_count as usize;
582                     assert!(insn_len > 0 && insn_len <= 16);
583 
584                     let mut context = MshvEmulatorContext {
585                         vcpu: self,
586                         tlb: SoftTLB::new(),
587                     };
588 
589                     // Add the GVA <-> GPA mapping.
590                     context
591                         .tlb
592                         .add_mapping(info.guest_virtual_address, info.guest_physical_address)
593                         .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()))?;
594 
595                     // Create a new emulator.
596                     let mut emul = Emulator::new(&mut context);
597 
598                     // Emulate the trapped instruction, and only the first one.
599                     let new_state = emul
600                         .emulate_first_insn(self.vp_index as usize, &info.instruction_bytes)
601                         .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()))?;
602 
603                     // Set CPU state back.
604                     context
605                         .set_cpu_state(self.vp_index as usize, new_state)
606                         .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()))?;
607 
608                     Ok(cpu::VmExit::Ignore)
609                 }
610                 hv_message_type_HVMSG_X64_CPUID_INTERCEPT => {
611                     let info = x.to_cpuid_info().unwrap();
612                     debug!("cpuid eax: {:x}", info.rax);
613                     Ok(cpu::VmExit::Ignore)
614                 }
615                 hv_message_type_HVMSG_X64_MSR_INTERCEPT => {
616                     let info = x.to_msr_info().unwrap();
617                     if info.header.intercept_access_type == 0 as u8 {
618                         debug!("msr read: {:x}", info.msr_number);
619                     } else {
620                         debug!("msr write: {:x}", info.msr_number);
621                     }
622                     Ok(cpu::VmExit::Ignore)
623                 }
624                 hv_message_type_HVMSG_X64_EXCEPTION_INTERCEPT => {
625                     //TODO: Handler for VMCALL here.
626                     let info = x.to_exception_info().unwrap();
627                     debug!("Exception Info {:?}", info.exception_vector);
628                     Ok(cpu::VmExit::Ignore)
629                 }
630                 exit => Err(cpu::HypervisorCpuError::RunVcpu(anyhow!(
631                     "Unhandled VCPU exit {:?}",
632                     exit
633                 ))),
634             },
635 
636             Err(e) => match e.errno() {
637                 libc::EAGAIN | libc::EINTR => Ok(cpu::VmExit::Ignore),
638                 _ => Err(cpu::HypervisorCpuError::RunVcpu(anyhow!(
639                     "VCPU error {:?}",
640                     e
641                 ))),
642             },
643         }
644     }
645     #[cfg(target_arch = "x86_64")]
646     ///
647     /// X86 specific call to setup the CPUID registers.
648     ///
649     fn set_cpuid2(&self, cpuid: &CpuId) -> cpu::Result<()> {
650         Ok(())
651     }
652     #[cfg(target_arch = "x86_64")]
653     ///
654     /// X86 specific call to retrieve the CPUID registers.
655     ///
656     fn get_cpuid2(&self, num_entries: usize) -> cpu::Result<CpuId> {
657         Ok(self.cpuid.clone())
658     }
659     #[cfg(target_arch = "x86_64")]
660     ///
661     /// Returns the state of the LAPIC (Local Advanced Programmable Interrupt Controller).
662     ///
663     fn get_lapic(&self) -> cpu::Result<LapicState> {
664         self.fd
665             .get_lapic()
666             .map_err(|e| cpu::HypervisorCpuError::GetlapicState(e.into()))
667     }
668     #[cfg(target_arch = "x86_64")]
669     ///
670     /// Sets the state of the LAPIC (Local Advanced Programmable Interrupt Controller).
671     ///
672     fn set_lapic(&self, lapic: &LapicState) -> cpu::Result<()> {
673         self.fd
674             .set_lapic(lapic)
675             .map_err(|e| cpu::HypervisorCpuError::SetLapicState(e.into()))
676     }
677     #[cfg(target_arch = "x86_64")]
678     ///
679     /// X86 specific call that returns the vcpu's current "xsave struct".
680     ///
681     fn get_xsave(&self) -> cpu::Result<Xsave> {
682         self.fd
683             .get_xsave()
684             .map_err(|e| cpu::HypervisorCpuError::GetXsaveState(e.into()))
685     }
686     #[cfg(target_arch = "x86_64")]
687     ///
688     /// X86 specific call that sets the vcpu's current "xsave struct".
689     ///
690     fn set_xsave(&self, xsave: &Xsave) -> cpu::Result<()> {
691         self.fd
692             .set_xsave(*xsave)
693             .map_err(|e| cpu::HypervisorCpuError::SetXsaveState(e.into()))
694     }
695     ///
696     /// Set CPU state
697     ///
698     fn set_state(&self, state: &CpuState) -> cpu::Result<()> {
699         self.set_msrs(&state.msrs)?;
700         self.set_vcpu_events(&state.vcpu_events)?;
701         self.set_regs(&state.regs)?;
702         self.set_sregs(&state.sregs)?;
703         self.set_fpu(&state.fpu)?;
704         self.set_xcrs(&state.xcrs)?;
705         self.set_lapic(&state.lapic)?;
706         self.set_xsave(&state.xsave)?;
707         self.fd
708             .set_debug_regs(&state.dbg)
709             .map_err(|e| cpu::HypervisorCpuError::SetDebugRegs(e.into()))?;
710         Ok(())
711     }
712     ///
713     /// Get CPU State
714     ///
715     fn state(&self) -> cpu::Result<CpuState> {
716         let regs = self.get_regs()?;
717         let sregs = self.get_sregs()?;
718         let xcrs = self.get_xcrs()?;
719         let fpu = self.get_fpu()?;
720         let vcpu_events = self.get_vcpu_events()?;
721         let mut msrs = self.msrs.clone();
722         self.get_msrs(&mut msrs)?;
723         let lapic = self.get_lapic()?;
724         let xsave = self.get_xsave()?;
725         let dbg = self
726             .fd
727             .get_debug_regs()
728             .map_err(|e| cpu::HypervisorCpuError::GetDebugRegs(e.into()))?;
729         Ok(CpuState {
730             msrs,
731             vcpu_events,
732             regs,
733             sregs,
734             fpu,
735             xcrs,
736             lapic,
737             dbg,
738             xsave,
739         })
740     }
741 }
742 
743 struct MshvEmulatorContext<'a> {
744     vcpu: &'a MshvVcpu,
745     tlb: SoftTLB,
746 }
747 
748 /// Platform emulation for Hyper-V
749 impl<'a> PlatformEmulator for MshvEmulatorContext<'a> {
750     type CpuState = EmulatorCpuState;
751 
752     fn read_memory(&self, gva: u64, data: &mut [u8]) -> Result<(), PlatformError> {
753         let gpa = self.tlb.translate(gva)?;
754         debug!(
755             "mshv emulator: memory read {} bytes from [{:#x} -> {:#x}]",
756             data.len(),
757             gva,
758             gpa
759         );
760 
761         if let Some(vmmops) = &self.vcpu.vmmops {
762             vmmops
763                 .mmio_read(gpa, data)
764                 .map_err(|e| PlatformError::MemoryReadFailure(e.into()))?;
765         }
766 
767         Ok(())
768     }
769 
770     fn write_memory(&mut self, gva: u64, data: &[u8]) -> Result<(), PlatformError> {
771         let gpa = self.tlb.translate(gva)?;
772         debug!(
773             "mshv emulator: memory write {} bytes at [{:#x} -> {:#x}]",
774             data.len(),
775             gva,
776             gpa
777         );
778 
779         if let Some((datamatch, efd)) = self
780             .vcpu
781             .ioeventfds
782             .read()
783             .unwrap()
784             .get(&IoEventAddress::Mmio(gpa))
785         {
786             debug!("ioevent {:x} {:x?} {}", gpa, datamatch, efd.as_raw_fd());
787 
788             /* TODO: use datamatch to provide the correct semantics */
789             efd.write(1).unwrap();
790         }
791 
792         if let Some(vmmops) = &self.vcpu.vmmops {
793             vmmops
794                 .mmio_write(gpa, data)
795                 .map_err(|e| PlatformError::MemoryWriteFailure(e.into()))?;
796         }
797 
798         Ok(())
799     }
800 
801     fn cpu_state(&self, cpu_id: usize) -> Result<Self::CpuState, PlatformError> {
802         if cpu_id != self.vcpu.vp_index as usize {
803             return Err(PlatformError::GetCpuStateFailure(anyhow!(
804                 "CPU id mismatch {:?} {:?}",
805                 cpu_id,
806                 self.vcpu.vp_index
807             )));
808         }
809 
810         let regs = self
811             .vcpu
812             .get_regs()
813             .map_err(|e| PlatformError::GetCpuStateFailure(e.into()))?;
814         let sregs = self
815             .vcpu
816             .get_sregs()
817             .map_err(|e| PlatformError::GetCpuStateFailure(e.into()))?;
818 
819         debug!("mshv emulator: Getting new CPU state");
820         debug!("mshv emulator: {:#x?}", regs);
821 
822         Ok(EmulatorCpuState { regs, sregs })
823     }
824 
825     fn set_cpu_state(&self, cpu_id: usize, state: Self::CpuState) -> Result<(), PlatformError> {
826         if cpu_id != self.vcpu.vp_index as usize {
827             return Err(PlatformError::SetCpuStateFailure(anyhow!(
828                 "CPU id mismatch {:?} {:?}",
829                 cpu_id,
830                 self.vcpu.vp_index
831             )));
832         }
833 
834         debug!("mshv emulator: Setting new CPU state");
835         debug!("mshv emulator: {:#x?}", state.regs);
836 
837         self.vcpu
838             .set_regs(&state.regs)
839             .map_err(|e| PlatformError::SetCpuStateFailure(e.into()))?;
840         self.vcpu
841             .set_sregs(&state.sregs)
842             .map_err(|e| PlatformError::SetCpuStateFailure(e.into()))
843     }
844 
845     fn gva_to_gpa(&self, gva: u64) -> Result<u64, PlatformError> {
846         self.tlb.translate(gva)
847     }
848 
849     fn fetch(&self, ip: u64, instruction_bytes: &mut [u8]) -> Result<(), PlatformError> {
850         Err(PlatformError::MemoryReadFailure(anyhow!("unimplemented")))
851     }
852 }
853 
854 #[allow(clippy::type_complexity)]
855 /// Wrapper over Mshv VM ioctls.
856 pub struct MshvVm {
857     fd: Arc<VmFd>,
858     msrs: MsrEntries,
859     // Emulate irqfd
860     irqfds: Mutex<HashMap<u32, (EventFd, EventFd)>>,
861     // Emulate ioeventfd
862     ioeventfds: Arc<RwLock<HashMap<IoEventAddress, (Option<DataMatch>, EventFd)>>>,
863     // GSI routing information
864     gsi_routes: Arc<RwLock<HashMap<u32, MshvIrqRoutingEntry>>>,
865     // Hypervisor State
866     hv_state: Arc<RwLock<HvState>>,
867     vmmops: Option<Arc<Box<dyn vm::VmmOps>>>,
868 }
869 
870 fn hv_state_init() -> Arc<RwLock<HvState>> {
871     Arc::new(RwLock::new(HvState { hypercall_page: 0 }))
872 }
873 
874 ///
875 /// Implementation of Vm trait for Mshv
876 /// Example:
877 /// #[cfg(feature = "mshv")]
878 /// # extern crate hypervisor;
879 /// # use hypervisor::MshvHypervisor;
880 /// let mshv = MshvHypervisor::new().unwrap();
881 /// let hypervisor: Arc<dyn hypervisor::Hypervisor> = Arc::new(mshv);
882 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed");
883 /// vm.set/get().unwrap()
884 ///
885 impl vm::Vm for MshvVm {
886     #[cfg(target_arch = "x86_64")]
887     ///
888     /// Sets the address of the three-page region in the VM's address space.
889     ///
890     fn set_tss_address(&self, offset: usize) -> vm::Result<()> {
891         Ok(())
892     }
893     ///
894     /// Creates an in-kernel interrupt controller.
895     ///
896     fn create_irq_chip(&self) -> vm::Result<()> {
897         Ok(())
898     }
899     ///
900     /// Registers an event that will, when signaled, trigger the `gsi` IRQ.
901     ///
902     fn register_irqfd(&self, fd: &EventFd, gsi: u32) -> vm::Result<()> {
903         let dup_fd = fd.try_clone().unwrap();
904         let kill_fd = EventFd::new(libc::EFD_NONBLOCK).unwrap();
905 
906         let mut ctrl_handler = IrqfdCtrlEpollHandler {
907             vm_fd: self.fd.clone(),
908             kill: kill_fd.try_clone().unwrap(),
909             irqfd: fd.try_clone().unwrap(),
910             epoll_fd: 0,
911             gsi,
912             gsi_routes: self.gsi_routes.clone(),
913         };
914 
915         debug!("register_irqfd fd {} gsi {}", fd.as_raw_fd(), gsi);
916 
917         thread::Builder::new()
918             .name(format!("irqfd_{}", gsi))
919             .spawn(move || ctrl_handler.run_ctrl())
920             .unwrap();
921 
922         self.irqfds.lock().unwrap().insert(gsi, (dup_fd, kill_fd));
923 
924         Ok(())
925     }
926     ///
927     /// Unregisters an event that will, when signaled, trigger the `gsi` IRQ.
928     ///
929     fn unregister_irqfd(&self, _fd: &EventFd, gsi: u32) -> vm::Result<()> {
930         debug!("unregister_irqfd fd {} gsi {}", _fd.as_raw_fd(), gsi);
931         let (_, kill_fd) = self.irqfds.lock().unwrap().remove(&gsi).unwrap();
932         kill_fd.write(1).unwrap();
933         Ok(())
934     }
935     ///
936     /// Creates a VcpuFd object from a vcpu RawFd.
937     ///
938     fn create_vcpu(
939         &self,
940         id: u8,
941         vmmops: Option<Arc<Box<dyn VmmOps>>>,
942     ) -> vm::Result<Arc<dyn cpu::Vcpu>> {
943         let vcpu_fd = self
944             .fd
945             .create_vcpu(id)
946             .map_err(|e| vm::HypervisorVmError::CreateVcpu(e.into()))?;
947         let vcpu = MshvVcpu {
948             fd: vcpu_fd,
949             vp_index: id,
950             cpuid: CpuId::new(1 as usize),
951             msrs: self.msrs.clone(),
952             ioeventfds: self.ioeventfds.clone(),
953             gsi_routes: self.gsi_routes.clone(),
954             hv_state: self.hv_state.clone(),
955             vmmops,
956         };
957         Ok(Arc::new(vcpu))
958     }
959     #[cfg(target_arch = "x86_64")]
960     fn enable_split_irq(&self) -> vm::Result<()> {
961         Ok(())
962     }
963     fn register_ioevent(
964         &self,
965         fd: &EventFd,
966         addr: &IoEventAddress,
967         datamatch: Option<DataMatch>,
968     ) -> vm::Result<()> {
969         let dup_fd = fd.try_clone().unwrap();
970 
971         debug!(
972             "register_ioevent fd {} addr {:x?} datamatch {:?}",
973             fd.as_raw_fd(),
974             addr,
975             datamatch
976         );
977 
978         self.ioeventfds
979             .write()
980             .unwrap()
981             .insert(*addr, (datamatch, dup_fd));
982         Ok(())
983     }
984     /// Unregister an event from a certain address it has been previously registered to.
985     fn unregister_ioevent(&self, fd: &EventFd, addr: &IoEventAddress) -> vm::Result<()> {
986         debug!("unregister_ioevent fd {} addr {:x?}", fd.as_raw_fd(), addr);
987         self.ioeventfds.write().unwrap().remove(addr).unwrap();
988         Ok(())
989     }
990 
991     /// Creates/modifies a guest physical memory slot.
992     fn set_user_memory_region(&self, user_memory_region: MemoryRegion) -> vm::Result<()> {
993         self.fd
994             .map_user_memory(user_memory_region)
995             .map_err(|e| vm::HypervisorVmError::SetUserMemory(e.into()))?;
996         Ok(())
997     }
998 
999     fn make_user_memory_region(
1000         &self,
1001         _slot: u32,
1002         guest_phys_addr: u64,
1003         memory_size: u64,
1004         userspace_addr: u64,
1005         readonly: bool,
1006         log_dirty_pages: bool,
1007     ) -> MemoryRegion {
1008         let mut flags = HV_MAP_GPA_READABLE | HV_MAP_GPA_EXECUTABLE;
1009         if !readonly {
1010             flags |= HV_MAP_GPA_WRITABLE;
1011         }
1012 
1013         mshv_user_mem_region {
1014             flags,
1015             guest_pfn: guest_phys_addr >> PAGE_SHIFT,
1016             size: memory_size,
1017             userspace_addr: userspace_addr as u64,
1018         }
1019     }
1020 
1021     fn create_passthrough_device(&self) -> vm::Result<Arc<dyn device::Device>> {
1022         Err(vm::HypervisorVmError::CreatePassthroughDevice(anyhow!(
1023             "No passthrough support"
1024         )))
1025     }
1026 
1027     fn set_gsi_routing(&self, irq_routing: &[IrqRoutingEntry]) -> vm::Result<()> {
1028         let mut routes = self.gsi_routes.write().unwrap();
1029 
1030         routes.drain();
1031 
1032         for r in irq_routing {
1033             debug!("gsi routing {:x?}", r);
1034             routes.insert(r.gsi, *r);
1035         }
1036 
1037         Ok(())
1038     }
1039     ///
1040     /// Get the Vm state. Return VM specific data
1041     ///
1042     fn state(&self) -> vm::Result<VmState> {
1043         Ok(*self.hv_state.read().unwrap())
1044     }
1045     ///
1046     /// Set the VM state
1047     ///
1048     fn set_state(&self, state: VmState) -> vm::Result<()> {
1049         self.hv_state.write().unwrap().hypercall_page = state.hypercall_page;
1050         Ok(())
1051     }
1052     ///
1053     /// Get dirty pages bitmap (one bit per page)
1054     ///
1055     fn get_dirty_log(&self, slot: u32, memory_size: u64) -> vm::Result<Vec<u64>> {
1056         Err(vm::HypervisorVmError::GetDirtyLog(anyhow!(
1057             "get_dirty_log not implemented"
1058         )))
1059     }
1060 }
1061 pub use hv_cpuid_entry as CpuIdEntry;
1062 
1063 #[derive(Copy, Clone, Debug)]
1064 pub struct MshvIrqRoutingMsi {
1065     pub address_lo: u32,
1066     pub address_hi: u32,
1067     pub data: u32,
1068 }
1069 
1070 #[derive(Copy, Clone, Debug)]
1071 pub enum MshvIrqRouting {
1072     Msi(MshvIrqRoutingMsi),
1073 }
1074 
1075 #[derive(Copy, Clone, Debug)]
1076 pub struct MshvIrqRoutingEntry {
1077     pub gsi: u32,
1078     pub route: MshvIrqRouting,
1079 }
1080 pub type IrqRoutingEntry = MshvIrqRoutingEntry;
1081 
1082 pub const CPUID_FLAG_VALID_INDEX: u32 = 0;
1083