1 // SPDX-License-Identifier: Apache-2.0 OR BSD-3-Clause 2 // 3 // Copyright © 2020, Microsoft Corporation 4 // 5 6 use crate::arch::emulator::{PlatformEmulator, PlatformError}; 7 8 #[cfg(target_arch = "x86_64")] 9 use crate::arch::x86::emulator::{Emulator, EmulatorCpuState}; 10 use crate::cpu; 11 use crate::cpu::Vcpu; 12 use crate::hypervisor; 13 use crate::vec_with_array_field; 14 use crate::vm::{self, VmmOps}; 15 pub use mshv_bindings::*; 16 pub use mshv_ioctls::IoEventAddress; 17 use mshv_ioctls::{set_registers_64, Mshv, NoDatamatch, VcpuFd, VmFd}; 18 use serde_derive::{Deserialize, Serialize}; 19 use std::collections::HashMap; 20 use std::sync::{Arc, RwLock}; 21 use vm::DataMatch; 22 // x86_64 dependencies 23 #[cfg(target_arch = "x86_64")] 24 pub mod x86_64; 25 use crate::device; 26 use vmm_sys_util::eventfd::EventFd; 27 #[cfg(target_arch = "x86_64")] 28 pub use x86_64::VcpuMshvState as CpuState; 29 #[cfg(target_arch = "x86_64")] 30 pub use x86_64::*; 31 32 #[cfg(target_arch = "x86_64")] 33 use std::fs::File; 34 use std::os::unix::io::{AsRawFd, RawFd}; 35 36 const DIRTY_BITMAP_CLEAR_DIRTY: u64 = 0x4; 37 const DIRTY_BITMAP_SET_DIRTY: u64 = 0x8; 38 39 /// 40 /// Export generically-named wrappers of mshv-bindings for Unix-based platforms 41 /// 42 pub use { 43 mshv_bindings::mshv_create_device as CreateDevice, 44 mshv_bindings::mshv_device_attr as DeviceAttr, 45 mshv_bindings::mshv_msi_routing_entry as IrqRoutingEntry, mshv_ioctls::DeviceFd, 46 }; 47 48 pub const PAGE_SHIFT: usize = 12; 49 50 #[derive(Debug, Default, Copy, Clone, Serialize, Deserialize)] 51 pub struct HvState { 52 hypercall_page: u64, 53 } 54 55 pub use HvState as VmState; 56 57 struct MshvDirtyLogSlot { 58 guest_pfn: u64, 59 memory_size: u64, 60 } 61 62 /// Wrapper over mshv system ioctls. 63 pub struct MshvHypervisor { 64 mshv: Mshv, 65 } 66 67 impl MshvHypervisor { 68 /// Create a hypervisor based on Mshv 69 pub fn new() -> hypervisor::Result<MshvHypervisor> { 70 let mshv_obj = 71 Mshv::new().map_err(|e| hypervisor::HypervisorError::HypervisorCreate(e.into()))?; 72 Ok(MshvHypervisor { mshv: mshv_obj }) 73 } 74 } 75 /// Implementation of Hypervisor trait for Mshv 76 /// Example: 77 /// #[cfg(feature = "mshv")] 78 /// extern crate hypervisor 79 /// let mshv = hypervisor::mshv::MshvHypervisor::new().unwrap(); 80 /// let hypervisor: Arc<dyn hypervisor::Hypervisor> = Arc::new(mshv); 81 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed"); 82 /// 83 impl hypervisor::Hypervisor for MshvHypervisor { 84 /// Create a mshv vm object and return the object as Vm trait object 85 /// Example 86 /// # extern crate hypervisor; 87 /// # use hypervisor::MshvHypervisor; 88 /// use hypervisor::MshvVm; 89 /// let hypervisor = MshvHypervisor::new().unwrap(); 90 /// let vm = hypervisor.create_vm().unwrap() 91 /// 92 fn create_vm(&self) -> hypervisor::Result<Arc<dyn vm::Vm>> { 93 let fd: VmFd; 94 loop { 95 match self.mshv.create_vm() { 96 Ok(res) => fd = res, 97 Err(e) => { 98 if e.errno() == libc::EINTR { 99 // If the error returned is EINTR, which means the 100 // ioctl has been interrupted, we have to retry as 101 // this can't be considered as a regular error. 102 continue; 103 } else { 104 return Err(hypervisor::HypervisorError::VmCreate(e.into())); 105 } 106 } 107 } 108 break; 109 } 110 111 let msr_list = self.get_msr_list()?; 112 let num_msrs = msr_list.as_fam_struct_ref().nmsrs as usize; 113 let mut msrs = MsrEntries::new(num_msrs).unwrap(); 114 let indices = msr_list.as_slice(); 115 let msr_entries = msrs.as_mut_slice(); 116 for (pos, index) in indices.iter().enumerate() { 117 msr_entries[pos].index = *index; 118 } 119 let vm_fd = Arc::new(fd); 120 121 Ok(Arc::new(MshvVm { 122 fd: vm_fd, 123 msrs, 124 hv_state: hv_state_init(), 125 vmmops: None, 126 dirty_log_slots: Arc::new(RwLock::new(HashMap::new())), 127 })) 128 } 129 /// 130 /// Get the supported CpuID 131 /// 132 fn get_cpuid(&self) -> hypervisor::Result<CpuId> { 133 Ok(CpuId::new(1).unwrap()) 134 } 135 #[cfg(target_arch = "x86_64")] 136 /// 137 /// Retrieve the list of MSRs supported by KVM. 138 /// 139 fn get_msr_list(&self) -> hypervisor::Result<MsrList> { 140 self.mshv 141 .get_msr_index_list() 142 .map_err(|e| hypervisor::HypervisorError::GetMsrList(e.into())) 143 } 144 } 145 146 #[allow(dead_code)] 147 /// Vcpu struct for Microsoft Hypervisor 148 pub struct MshvVcpu { 149 fd: VcpuFd, 150 vp_index: u8, 151 cpuid: CpuId, 152 msrs: MsrEntries, 153 hv_state: Arc<RwLock<HvState>>, // Mshv State 154 vmmops: Option<Arc<dyn vm::VmmOps>>, 155 } 156 157 /// Implementation of Vcpu trait for Microsoft Hypervisor 158 /// Example: 159 /// #[cfg(feature = "mshv")] 160 /// extern crate hypervisor 161 /// let mshv = hypervisor::mshv::MshvHypervisor::new().unwrap(); 162 /// let hypervisor: Arc<dyn hypervisor::Hypervisor> = Arc::new(mshv); 163 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed"); 164 /// let vcpu = vm.create_vcpu(0).unwrap(); 165 /// vcpu.get/set().unwrap() 166 /// 167 impl cpu::Vcpu for MshvVcpu { 168 #[cfg(target_arch = "x86_64")] 169 /// 170 /// Returns the vCPU general purpose registers. 171 /// 172 fn get_regs(&self) -> cpu::Result<StandardRegisters> { 173 self.fd 174 .get_regs() 175 .map_err(|e| cpu::HypervisorCpuError::GetStandardRegs(e.into())) 176 } 177 #[cfg(target_arch = "x86_64")] 178 /// 179 /// Sets the vCPU general purpose registers. 180 /// 181 fn set_regs(&self, regs: &StandardRegisters) -> cpu::Result<()> { 182 self.fd 183 .set_regs(regs) 184 .map_err(|e| cpu::HypervisorCpuError::SetStandardRegs(e.into())) 185 } 186 #[cfg(target_arch = "x86_64")] 187 /// 188 /// Returns the vCPU special registers. 189 /// 190 fn get_sregs(&self) -> cpu::Result<SpecialRegisters> { 191 self.fd 192 .get_sregs() 193 .map_err(|e| cpu::HypervisorCpuError::GetSpecialRegs(e.into())) 194 } 195 #[cfg(target_arch = "x86_64")] 196 /// 197 /// Sets the vCPU special registers. 198 /// 199 fn set_sregs(&self, sregs: &SpecialRegisters) -> cpu::Result<()> { 200 self.fd 201 .set_sregs(sregs) 202 .map_err(|e| cpu::HypervisorCpuError::SetSpecialRegs(e.into())) 203 } 204 #[cfg(target_arch = "x86_64")] 205 /// 206 /// Returns the floating point state (FPU) from the vCPU. 207 /// 208 fn get_fpu(&self) -> cpu::Result<FpuState> { 209 self.fd 210 .get_fpu() 211 .map_err(|e| cpu::HypervisorCpuError::GetFloatingPointRegs(e.into())) 212 } 213 #[cfg(target_arch = "x86_64")] 214 /// 215 /// Set the floating point state (FPU) of a vCPU. 216 /// 217 fn set_fpu(&self, fpu: &FpuState) -> cpu::Result<()> { 218 self.fd 219 .set_fpu(fpu) 220 .map_err(|e| cpu::HypervisorCpuError::SetFloatingPointRegs(e.into())) 221 } 222 223 #[cfg(target_arch = "x86_64")] 224 /// 225 /// Returns the model-specific registers (MSR) for this vCPU. 226 /// 227 fn get_msrs(&self, msrs: &mut MsrEntries) -> cpu::Result<usize> { 228 self.fd 229 .get_msrs(msrs) 230 .map_err(|e| cpu::HypervisorCpuError::GetMsrEntries(e.into())) 231 } 232 #[cfg(target_arch = "x86_64")] 233 /// 234 /// Setup the model-specific registers (MSR) for this vCPU. 235 /// Returns the number of MSR entries actually written. 236 /// 237 fn set_msrs(&self, msrs: &MsrEntries) -> cpu::Result<usize> { 238 self.fd 239 .set_msrs(msrs) 240 .map_err(|e| cpu::HypervisorCpuError::SetMsrEntries(e.into())) 241 } 242 243 #[cfg(target_arch = "x86_64")] 244 /// 245 /// X86 specific call that returns the vcpu's current "xcrs". 246 /// 247 fn get_xcrs(&self) -> cpu::Result<ExtendedControlRegisters> { 248 self.fd 249 .get_xcrs() 250 .map_err(|e| cpu::HypervisorCpuError::GetXcsr(e.into())) 251 } 252 #[cfg(target_arch = "x86_64")] 253 /// 254 /// X86 specific call that sets the vcpu's current "xcrs". 255 /// 256 fn set_xcrs(&self, xcrs: &ExtendedControlRegisters) -> cpu::Result<()> { 257 self.fd 258 .set_xcrs(xcrs) 259 .map_err(|e| cpu::HypervisorCpuError::SetXcsr(e.into())) 260 } 261 #[cfg(target_arch = "x86_64")] 262 /// 263 /// Returns currently pending exceptions, interrupts, and NMIs as well as related 264 /// states of the vcpu. 265 /// 266 fn get_vcpu_events(&self) -> cpu::Result<VcpuEvents> { 267 self.fd 268 .get_vcpu_events() 269 .map_err(|e| cpu::HypervisorCpuError::GetVcpuEvents(e.into())) 270 } 271 #[cfg(target_arch = "x86_64")] 272 /// 273 /// Sets pending exceptions, interrupts, and NMIs as well as related states 274 /// of the vcpu. 275 /// 276 fn set_vcpu_events(&self, events: &VcpuEvents) -> cpu::Result<()> { 277 self.fd 278 .set_vcpu_events(events) 279 .map_err(|e| cpu::HypervisorCpuError::SetVcpuEvents(e.into())) 280 } 281 #[cfg(target_arch = "x86_64")] 282 /// 283 /// X86 specific call to enable HyperV SynIC 284 /// 285 fn enable_hyperv_synic(&self) -> cpu::Result<()> { 286 /* We always have SynIC enabled on MSHV */ 287 Ok(()) 288 } 289 #[allow(non_upper_case_globals)] 290 fn run(&self) -> std::result::Result<cpu::VmExit, cpu::HypervisorCpuError> { 291 // Safe because this is just only done during initialization. 292 // TODO don't zero it everytime we enter this function. 293 let hv_message: hv_message = unsafe { std::mem::zeroed() }; 294 match self.fd.run(hv_message) { 295 Ok(x) => match x.header.message_type { 296 hv_message_type_HVMSG_X64_HALT => { 297 debug!("HALT"); 298 Ok(cpu::VmExit::Reset) 299 } 300 hv_message_type_HVMSG_UNRECOVERABLE_EXCEPTION => { 301 warn!("TRIPLE FAULT"); 302 Ok(cpu::VmExit::Shutdown) 303 } 304 hv_message_type_HVMSG_X64_IO_PORT_INTERCEPT => { 305 let info = x.to_ioport_info().unwrap(); 306 let access_info = info.access_info; 307 let len = unsafe { access_info.__bindgen_anon_1.access_size() } as usize; 308 let is_write = info.header.intercept_access_type == 1; 309 let port = info.port_number; 310 let mut data: [u8; 4] = [0; 4]; 311 let mut ret_rax = info.rax; 312 313 /* 314 * XXX: Ignore QEMU fw_cfg (0x5xx) and debug console (0x402) ports. 315 * 316 * Cloud Hypervisor doesn't support fw_cfg at the moment. It does support 0x402 317 * under the "fwdebug" feature flag. But that feature is not enabled by default 318 * and is considered legacy. 319 * 320 * OVMF unconditionally pokes these IO ports with string IO. 321 * 322 * Instead of trying to implement string IO support now which does not do much 323 * now, skip those ports explicitly to avoid panicking. 324 * 325 * Proper string IO support can be added once we gain the ability to translate 326 * guest virtual addresses to guest physical addresses on MSHV. 327 */ 328 match port { 329 0x402 | 0x510 | 0x511 | 0x514 => { 330 let insn_len = info.header.instruction_length() as u64; 331 332 /* Advance RIP and update RAX */ 333 let arr_reg_name_value = [ 334 ( 335 hv_register_name::HV_X64_REGISTER_RIP, 336 info.header.rip + insn_len, 337 ), 338 (hv_register_name::HV_X64_REGISTER_RAX, ret_rax), 339 ]; 340 set_registers_64!(self.fd, arr_reg_name_value) 341 .map_err(|e| cpu::HypervisorCpuError::SetRegister(e.into()))?; 342 return Ok(cpu::VmExit::Ignore); 343 } 344 _ => {} 345 } 346 347 if unsafe { access_info.__bindgen_anon_1.string_op() } == 1 { 348 panic!("String IN/OUT not supported"); 349 } 350 if unsafe { access_info.__bindgen_anon_1.rep_prefix() } == 1 { 351 panic!("Rep IN/OUT not supported"); 352 } 353 354 if is_write { 355 let data = (info.rax as u32).to_le_bytes(); 356 if let Some(vmmops) = &self.vmmops { 357 vmmops 358 .pio_write(port.into(), &data[0..len]) 359 .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()))?; 360 } 361 } else { 362 if let Some(vmmops) = &self.vmmops { 363 vmmops 364 .pio_read(port.into(), &mut data[0..len]) 365 .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()))?; 366 } 367 368 let v = u32::from_le_bytes(data); 369 /* Preserve high bits in EAX but clear out high bits in RAX */ 370 let mask = 0xffffffff >> (32 - len * 8); 371 let eax = (info.rax as u32 & !mask) | (v & mask); 372 ret_rax = eax as u64; 373 } 374 375 let insn_len = info.header.instruction_length() as u64; 376 377 /* Advance RIP and update RAX */ 378 let arr_reg_name_value = [ 379 ( 380 hv_register_name::HV_X64_REGISTER_RIP, 381 info.header.rip + insn_len, 382 ), 383 (hv_register_name::HV_X64_REGISTER_RAX, ret_rax), 384 ]; 385 set_registers_64!(self.fd, arr_reg_name_value) 386 .map_err(|e| cpu::HypervisorCpuError::SetRegister(e.into()))?; 387 Ok(cpu::VmExit::Ignore) 388 } 389 hv_message_type_HVMSG_UNMAPPED_GPA => { 390 let info = x.to_memory_info().unwrap(); 391 let insn_len = info.instruction_byte_count as usize; 392 assert!(insn_len > 0 && insn_len <= 16); 393 394 let mut context = MshvEmulatorContext { 395 vcpu: self, 396 map: (info.guest_virtual_address, info.guest_physical_address), 397 }; 398 399 // Create a new emulator. 400 let mut emul = Emulator::new(&mut context); 401 402 // Emulate the trapped instruction, and only the first one. 403 let new_state = emul 404 .emulate_first_insn(self.vp_index as usize, &info.instruction_bytes) 405 .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()))?; 406 407 // Set CPU state back. 408 context 409 .set_cpu_state(self.vp_index as usize, new_state) 410 .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()))?; 411 412 Ok(cpu::VmExit::Ignore) 413 } 414 hv_message_type_HVMSG_X64_CPUID_INTERCEPT => { 415 let info = x.to_cpuid_info().unwrap(); 416 debug!("cpuid eax: {:x}", { info.rax }); 417 Ok(cpu::VmExit::Ignore) 418 } 419 hv_message_type_HVMSG_X64_MSR_INTERCEPT => { 420 let info = x.to_msr_info().unwrap(); 421 if info.header.intercept_access_type == 0 { 422 debug!("msr read: {:x}", { info.msr_number }); 423 } else { 424 debug!("msr write: {:x}", { info.msr_number }); 425 } 426 Ok(cpu::VmExit::Ignore) 427 } 428 hv_message_type_HVMSG_X64_EXCEPTION_INTERCEPT => { 429 //TODO: Handler for VMCALL here. 430 let info = x.to_exception_info().unwrap(); 431 debug!("Exception Info {:?}", { info.exception_vector }); 432 Ok(cpu::VmExit::Ignore) 433 } 434 exit => Err(cpu::HypervisorCpuError::RunVcpu(anyhow!( 435 "Unhandled VCPU exit {:?}", 436 exit 437 ))), 438 }, 439 440 Err(e) => match e.errno() { 441 libc::EAGAIN | libc::EINTR => Ok(cpu::VmExit::Ignore), 442 _ => Err(cpu::HypervisorCpuError::RunVcpu(anyhow!( 443 "VCPU error {:?}", 444 e 445 ))), 446 }, 447 } 448 } 449 #[cfg(target_arch = "x86_64")] 450 /// 451 /// X86 specific call to setup the CPUID registers. 452 /// 453 fn set_cpuid2(&self, _cpuid: &CpuId) -> cpu::Result<()> { 454 Ok(()) 455 } 456 #[cfg(target_arch = "x86_64")] 457 /// 458 /// X86 specific call to retrieve the CPUID registers. 459 /// 460 fn get_cpuid2(&self, _num_entries: usize) -> cpu::Result<CpuId> { 461 Ok(self.cpuid.clone()) 462 } 463 #[cfg(target_arch = "x86_64")] 464 /// 465 /// Returns the state of the LAPIC (Local Advanced Programmable Interrupt Controller). 466 /// 467 fn get_lapic(&self) -> cpu::Result<LapicState> { 468 self.fd 469 .get_lapic() 470 .map_err(|e| cpu::HypervisorCpuError::GetlapicState(e.into())) 471 } 472 #[cfg(target_arch = "x86_64")] 473 /// 474 /// Sets the state of the LAPIC (Local Advanced Programmable Interrupt Controller). 475 /// 476 fn set_lapic(&self, lapic: &LapicState) -> cpu::Result<()> { 477 self.fd 478 .set_lapic(lapic) 479 .map_err(|e| cpu::HypervisorCpuError::SetLapicState(e.into())) 480 } 481 #[cfg(target_arch = "x86_64")] 482 /// 483 /// X86 specific call that returns the vcpu's current "xsave struct". 484 /// 485 fn get_xsave(&self) -> cpu::Result<Xsave> { 486 self.fd 487 .get_xsave() 488 .map_err(|e| cpu::HypervisorCpuError::GetXsaveState(e.into())) 489 } 490 #[cfg(target_arch = "x86_64")] 491 /// 492 /// X86 specific call that sets the vcpu's current "xsave struct". 493 /// 494 fn set_xsave(&self, xsave: &Xsave) -> cpu::Result<()> { 495 self.fd 496 .set_xsave(xsave) 497 .map_err(|e| cpu::HypervisorCpuError::SetXsaveState(e.into())) 498 } 499 /// 500 /// Set CPU state 501 /// 502 fn set_state(&self, state: &CpuState) -> cpu::Result<()> { 503 self.set_msrs(&state.msrs)?; 504 self.set_vcpu_events(&state.vcpu_events)?; 505 self.set_regs(&state.regs)?; 506 self.set_sregs(&state.sregs)?; 507 self.set_fpu(&state.fpu)?; 508 self.set_xcrs(&state.xcrs)?; 509 self.set_lapic(&state.lapic)?; 510 self.set_xsave(&state.xsave)?; 511 self.fd 512 .set_debug_regs(&state.dbg) 513 .map_err(|e| cpu::HypervisorCpuError::SetDebugRegs(e.into()))?; 514 Ok(()) 515 } 516 /// 517 /// Get CPU State 518 /// 519 fn state(&self) -> cpu::Result<CpuState> { 520 let regs = self.get_regs()?; 521 let sregs = self.get_sregs()?; 522 let xcrs = self.get_xcrs()?; 523 let fpu = self.get_fpu()?; 524 let vcpu_events = self.get_vcpu_events()?; 525 let mut msrs = self.msrs.clone(); 526 self.get_msrs(&mut msrs)?; 527 let lapic = self.get_lapic()?; 528 let xsave = self.get_xsave()?; 529 let dbg = self 530 .fd 531 .get_debug_regs() 532 .map_err(|e| cpu::HypervisorCpuError::GetDebugRegs(e.into()))?; 533 Ok(CpuState { 534 msrs, 535 vcpu_events, 536 regs, 537 sregs, 538 fpu, 539 xcrs, 540 lapic, 541 dbg, 542 xsave, 543 }) 544 } 545 #[cfg(target_arch = "x86_64")] 546 /// 547 /// Translate guest virtual address to guest physical address 548 /// 549 fn translate_gva(&self, gva: u64, flags: u64) -> cpu::Result<(u64, hv_translate_gva_result)> { 550 let r = self 551 .fd 552 .translate_gva(gva, flags) 553 .map_err(|e| cpu::HypervisorCpuError::TranslateVirtualAddress(e.into()))?; 554 555 Ok(r) 556 } 557 #[cfg(target_arch = "x86_64")] 558 /// 559 /// X86 specific call that returns the vcpu's current "suspend registers". 560 /// 561 fn get_suspend_regs(&self) -> cpu::Result<SuspendRegisters> { 562 self.fd 563 .get_suspend_regs() 564 .map_err(|e| cpu::HypervisorCpuError::GetSuspendRegs(e.into())) 565 } 566 } 567 568 /// Device struct for MSHV 569 pub struct MshvDevice { 570 fd: DeviceFd, 571 } 572 573 impl device::Device for MshvDevice { 574 /// 575 /// Set device attribute 576 /// 577 fn set_device_attr(&self, attr: &DeviceAttr) -> device::Result<()> { 578 self.fd 579 .set_device_attr(attr) 580 .map_err(|e| device::HypervisorDeviceError::SetDeviceAttribute(e.into())) 581 } 582 /// 583 /// Get device attribute 584 /// 585 fn get_device_attr(&self, attr: &mut DeviceAttr) -> device::Result<()> { 586 self.fd 587 .get_device_attr(attr) 588 .map_err(|e| device::HypervisorDeviceError::GetDeviceAttribute(e.into())) 589 } 590 } 591 592 impl AsRawFd for MshvDevice { 593 fn as_raw_fd(&self) -> RawFd { 594 self.fd.as_raw_fd() 595 } 596 } 597 598 struct MshvEmulatorContext<'a> { 599 vcpu: &'a MshvVcpu, 600 map: (u64, u64), // Initial GVA to GPA mapping provided by the hypervisor 601 } 602 603 impl<'a> MshvEmulatorContext<'a> { 604 // Do the actual gva -> gpa translation 605 #[allow(non_upper_case_globals)] 606 fn translate(&self, gva: u64) -> Result<u64, PlatformError> { 607 if self.map.0 == gva { 608 return Ok(self.map.1); 609 } 610 611 // TODO: More fine-grained control for the flags 612 let flags = HV_TRANSLATE_GVA_VALIDATE_READ | HV_TRANSLATE_GVA_VALIDATE_WRITE; 613 614 let r = self 615 .vcpu 616 .translate_gva(gva, flags.into()) 617 .map_err(|e| PlatformError::TranslateVirtualAddress(anyhow!(e)))?; 618 619 let result_code = unsafe { r.1.__bindgen_anon_1.result_code }; 620 match result_code { 621 hv_translate_gva_result_code_HV_TRANSLATE_GVA_SUCCESS => Ok(r.0), 622 _ => Err(PlatformError::TranslateVirtualAddress(anyhow!(result_code))), 623 } 624 } 625 } 626 627 /// Platform emulation for Hyper-V 628 impl<'a> PlatformEmulator for MshvEmulatorContext<'a> { 629 type CpuState = EmulatorCpuState; 630 631 fn read_memory(&self, gva: u64, data: &mut [u8]) -> Result<(), PlatformError> { 632 let gpa = self.translate(gva)?; 633 debug!( 634 "mshv emulator: memory read {} bytes from [{:#x} -> {:#x}]", 635 data.len(), 636 gva, 637 gpa 638 ); 639 640 if let Some(vmmops) = &self.vcpu.vmmops { 641 if vmmops.guest_mem_read(gpa, data).is_err() { 642 vmmops 643 .mmio_read(gpa, data) 644 .map_err(|e| PlatformError::MemoryReadFailure(e.into()))?; 645 } 646 } 647 648 Ok(()) 649 } 650 651 fn write_memory(&mut self, gva: u64, data: &[u8]) -> Result<(), PlatformError> { 652 let gpa = self.translate(gva)?; 653 debug!( 654 "mshv emulator: memory write {} bytes at [{:#x} -> {:#x}]", 655 data.len(), 656 gva, 657 gpa 658 ); 659 660 if let Some(vmmops) = &self.vcpu.vmmops { 661 if vmmops.guest_mem_write(gpa, data).is_err() { 662 vmmops 663 .mmio_write(gpa, data) 664 .map_err(|e| PlatformError::MemoryWriteFailure(e.into()))?; 665 } 666 } 667 668 Ok(()) 669 } 670 671 fn cpu_state(&self, cpu_id: usize) -> Result<Self::CpuState, PlatformError> { 672 if cpu_id != self.vcpu.vp_index as usize { 673 return Err(PlatformError::GetCpuStateFailure(anyhow!( 674 "CPU id mismatch {:?} {:?}", 675 cpu_id, 676 self.vcpu.vp_index 677 ))); 678 } 679 680 let regs = self 681 .vcpu 682 .get_regs() 683 .map_err(|e| PlatformError::GetCpuStateFailure(e.into()))?; 684 let sregs = self 685 .vcpu 686 .get_sregs() 687 .map_err(|e| PlatformError::GetCpuStateFailure(e.into()))?; 688 689 debug!("mshv emulator: Getting new CPU state"); 690 debug!("mshv emulator: {:#x?}", regs); 691 692 Ok(EmulatorCpuState { regs, sregs }) 693 } 694 695 fn set_cpu_state(&self, cpu_id: usize, state: Self::CpuState) -> Result<(), PlatformError> { 696 if cpu_id != self.vcpu.vp_index as usize { 697 return Err(PlatformError::SetCpuStateFailure(anyhow!( 698 "CPU id mismatch {:?} {:?}", 699 cpu_id, 700 self.vcpu.vp_index 701 ))); 702 } 703 704 debug!("mshv emulator: Setting new CPU state"); 705 debug!("mshv emulator: {:#x?}", state.regs); 706 707 self.vcpu 708 .set_regs(&state.regs) 709 .map_err(|e| PlatformError::SetCpuStateFailure(e.into()))?; 710 self.vcpu 711 .set_sregs(&state.sregs) 712 .map_err(|e| PlatformError::SetCpuStateFailure(e.into())) 713 } 714 715 fn gva_to_gpa(&self, gva: u64) -> Result<u64, PlatformError> { 716 self.translate(gva) 717 } 718 719 fn fetch(&self, _ip: u64, _instruction_bytes: &mut [u8]) -> Result<(), PlatformError> { 720 Err(PlatformError::MemoryReadFailure(anyhow!("unimplemented"))) 721 } 722 } 723 724 #[allow(dead_code)] 725 /// Wrapper over Mshv VM ioctls. 726 pub struct MshvVm { 727 fd: Arc<VmFd>, 728 msrs: MsrEntries, 729 // Hypervisor State 730 hv_state: Arc<RwLock<HvState>>, 731 vmmops: Option<Arc<dyn vm::VmmOps>>, 732 dirty_log_slots: Arc<RwLock<HashMap<u64, MshvDirtyLogSlot>>>, 733 } 734 735 fn hv_state_init() -> Arc<RwLock<HvState>> { 736 Arc::new(RwLock::new(HvState { hypercall_page: 0 })) 737 } 738 739 /// 740 /// Implementation of Vm trait for Mshv 741 /// Example: 742 /// #[cfg(feature = "mshv")] 743 /// # extern crate hypervisor; 744 /// # use hypervisor::MshvHypervisor; 745 /// let mshv = MshvHypervisor::new().unwrap(); 746 /// let hypervisor: Arc<dyn hypervisor::Hypervisor> = Arc::new(mshv); 747 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed"); 748 /// vm.set/get().unwrap() 749 /// 750 impl vm::Vm for MshvVm { 751 #[cfg(target_arch = "x86_64")] 752 /// 753 /// Sets the address of the three-page region in the VM's address space. 754 /// 755 fn set_tss_address(&self, _offset: usize) -> vm::Result<()> { 756 Ok(()) 757 } 758 /// 759 /// Creates an in-kernel interrupt controller. 760 /// 761 fn create_irq_chip(&self) -> vm::Result<()> { 762 Ok(()) 763 } 764 /// 765 /// Registers an event that will, when signaled, trigger the `gsi` IRQ. 766 /// 767 fn register_irqfd(&self, fd: &EventFd, gsi: u32) -> vm::Result<()> { 768 debug!("register_irqfd fd {} gsi {}", fd.as_raw_fd(), gsi); 769 770 self.fd 771 .register_irqfd(fd, gsi) 772 .map_err(|e| vm::HypervisorVmError::RegisterIrqFd(e.into()))?; 773 774 Ok(()) 775 } 776 /// 777 /// Unregisters an event that will, when signaled, trigger the `gsi` IRQ. 778 /// 779 fn unregister_irqfd(&self, fd: &EventFd, gsi: u32) -> vm::Result<()> { 780 debug!("unregister_irqfd fd {} gsi {}", fd.as_raw_fd(), gsi); 781 782 self.fd 783 .unregister_irqfd(fd, gsi) 784 .map_err(|e| vm::HypervisorVmError::UnregisterIrqFd(e.into()))?; 785 786 Ok(()) 787 } 788 /// 789 /// Creates a VcpuFd object from a vcpu RawFd. 790 /// 791 fn create_vcpu( 792 &self, 793 id: u8, 794 vmmops: Option<Arc<dyn VmmOps>>, 795 ) -> vm::Result<Arc<dyn cpu::Vcpu>> { 796 let vcpu_fd = self 797 .fd 798 .create_vcpu(id) 799 .map_err(|e| vm::HypervisorVmError::CreateVcpu(e.into()))?; 800 let vcpu = MshvVcpu { 801 fd: vcpu_fd, 802 vp_index: id, 803 cpuid: CpuId::new(1).unwrap(), 804 msrs: self.msrs.clone(), 805 hv_state: self.hv_state.clone(), 806 vmmops, 807 }; 808 Ok(Arc::new(vcpu)) 809 } 810 #[cfg(target_arch = "x86_64")] 811 fn enable_split_irq(&self) -> vm::Result<()> { 812 Ok(()) 813 } 814 #[cfg(target_arch = "x86_64")] 815 fn enable_sgx_attribute(&self, _file: File) -> vm::Result<()> { 816 Ok(()) 817 } 818 fn register_ioevent( 819 &self, 820 fd: &EventFd, 821 addr: &IoEventAddress, 822 datamatch: Option<DataMatch>, 823 ) -> vm::Result<()> { 824 debug!( 825 "register_ioevent fd {} addr {:x?} datamatch {:?}", 826 fd.as_raw_fd(), 827 addr, 828 datamatch 829 ); 830 if let Some(dm) = datamatch { 831 match dm { 832 vm::DataMatch::DataMatch32(mshv_dm32) => self 833 .fd 834 .register_ioevent(fd, addr, mshv_dm32) 835 .map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into())), 836 vm::DataMatch::DataMatch64(mshv_dm64) => self 837 .fd 838 .register_ioevent(fd, addr, mshv_dm64) 839 .map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into())), 840 } 841 } else { 842 self.fd 843 .register_ioevent(fd, addr, NoDatamatch) 844 .map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into())) 845 } 846 } 847 /// Unregister an event from a certain address it has been previously registered to. 848 fn unregister_ioevent(&self, fd: &EventFd, addr: &IoEventAddress) -> vm::Result<()> { 849 debug!("unregister_ioevent fd {} addr {:x?}", fd.as_raw_fd(), addr); 850 851 self.fd 852 .unregister_ioevent(fd, addr, NoDatamatch) 853 .map_err(|e| vm::HypervisorVmError::UnregisterIoEvent(e.into())) 854 } 855 856 /// Creates a guest physical memory region. 857 fn create_user_memory_region(&self, user_memory_region: MemoryRegion) -> vm::Result<()> { 858 // No matter read only or not we keep track the slots. 859 // For readonly hypervisor can enable the dirty bits, 860 // but a VM exit happens before setting the dirty bits 861 self.dirty_log_slots.write().unwrap().insert( 862 user_memory_region.guest_pfn, 863 MshvDirtyLogSlot { 864 guest_pfn: user_memory_region.guest_pfn, 865 memory_size: user_memory_region.size, 866 }, 867 ); 868 869 self.fd 870 .map_user_memory(user_memory_region) 871 .map_err(|e| vm::HypervisorVmError::CreateUserMemory(e.into()))?; 872 Ok(()) 873 } 874 875 /// Removes a guest physical memory region. 876 fn remove_user_memory_region(&self, user_memory_region: MemoryRegion) -> vm::Result<()> { 877 // Remove the corresponding entry from "self.dirty_log_slots" if needed 878 self.dirty_log_slots 879 .write() 880 .unwrap() 881 .remove(&user_memory_region.guest_pfn); 882 883 self.fd 884 .unmap_user_memory(user_memory_region) 885 .map_err(|e| vm::HypervisorVmError::RemoveUserMemory(e.into()))?; 886 Ok(()) 887 } 888 889 fn make_user_memory_region( 890 &self, 891 _slot: u32, 892 guest_phys_addr: u64, 893 memory_size: u64, 894 userspace_addr: u64, 895 readonly: bool, 896 _log_dirty_pages: bool, 897 ) -> MemoryRegion { 898 let mut flags = HV_MAP_GPA_READABLE | HV_MAP_GPA_EXECUTABLE; 899 if !readonly { 900 flags |= HV_MAP_GPA_WRITABLE; 901 } 902 903 mshv_user_mem_region { 904 flags, 905 guest_pfn: guest_phys_addr >> PAGE_SHIFT, 906 size: memory_size, 907 userspace_addr: userspace_addr as u64, 908 } 909 } 910 911 /// 912 /// Creates an in-kernel device. 913 /// 914 /// See the documentation for `MSHV_CREATE_DEVICE`. 915 fn create_device(&self, device: &mut CreateDevice) -> vm::Result<Arc<dyn device::Device>> { 916 let fd = self 917 .fd 918 .create_device(device) 919 .map_err(|e| vm::HypervisorVmError::CreateDevice(e.into()))?; 920 let device = MshvDevice { fd }; 921 Ok(Arc::new(device)) 922 } 923 924 fn create_passthrough_device(&self) -> vm::Result<Arc<dyn device::Device>> { 925 let mut vfio_dev = mshv_create_device { 926 type_: mshv_device_type_MSHV_DEV_TYPE_VFIO, 927 fd: 0, 928 flags: 0, 929 }; 930 931 self.create_device(&mut vfio_dev) 932 .map_err(|e| vm::HypervisorVmError::CreatePassthroughDevice(e.into())) 933 } 934 935 fn set_gsi_routing(&self, entries: &[IrqRoutingEntry]) -> vm::Result<()> { 936 let mut msi_routing = 937 vec_with_array_field::<mshv_msi_routing, mshv_msi_routing_entry>(entries.len()); 938 msi_routing[0].nr = entries.len() as u32; 939 940 unsafe { 941 let entries_slice: &mut [mshv_msi_routing_entry] = 942 msi_routing[0].entries.as_mut_slice(entries.len()); 943 entries_slice.copy_from_slice(entries); 944 } 945 946 self.fd 947 .set_msi_routing(&msi_routing[0]) 948 .map_err(|e| vm::HypervisorVmError::SetGsiRouting(e.into())) 949 } 950 /// 951 /// Get the Vm state. Return VM specific data 952 /// 953 fn state(&self) -> vm::Result<VmState> { 954 Ok(*self.hv_state.read().unwrap()) 955 } 956 /// 957 /// Set the VM state 958 /// 959 fn set_state(&self, state: VmState) -> vm::Result<()> { 960 self.hv_state.write().unwrap().hypercall_page = state.hypercall_page; 961 Ok(()) 962 } 963 /// 964 /// Start logging dirty pages 965 /// 966 fn start_dirty_log(&self) -> vm::Result<()> { 967 self.fd 968 .enable_dirty_page_tracking() 969 .map_err(|e| vm::HypervisorVmError::StartDirtyLog(e.into())) 970 } 971 /// 972 /// Stop logging dirty pages 973 /// 974 fn stop_dirty_log(&self) -> vm::Result<()> { 975 let dirty_log_slots = self.dirty_log_slots.read().unwrap(); 976 // Before disabling the dirty page tracking we need 977 // to set the dirty bits in the Hypervisor 978 // This is a requirement from Microsoft Hypervisor 979 for (_, s) in dirty_log_slots.iter() { 980 self.fd 981 .get_dirty_log(s.guest_pfn, s.memory_size as usize, DIRTY_BITMAP_SET_DIRTY) 982 .map_err(|e| vm::HypervisorVmError::StartDirtyLog(e.into()))?; 983 } 984 self.fd 985 .disable_dirty_page_tracking() 986 .map_err(|e| vm::HypervisorVmError::StartDirtyLog(e.into()))?; 987 Ok(()) 988 } 989 /// 990 /// Get dirty pages bitmap (one bit per page) 991 /// 992 fn get_dirty_log(&self, _slot: u32, base_gpa: u64, memory_size: u64) -> vm::Result<Vec<u64>> { 993 self.fd 994 .get_dirty_log( 995 base_gpa >> PAGE_SHIFT, 996 memory_size as usize, 997 DIRTY_BITMAP_CLEAR_DIRTY, 998 ) 999 .map_err(|e| vm::HypervisorVmError::GetDirtyLog(e.into())) 1000 } 1001 } 1002 pub use hv_cpuid_entry as CpuIdEntry; 1003 1004 pub const CPUID_FLAG_VALID_INDEX: u32 = 0; 1005