1 // SPDX-License-Identifier: Apache-2.0 OR BSD-3-Clause 2 // 3 // Copyright © 2020, Microsoft Corporation 4 // 5 6 use crate::arch::emulator::{PlatformEmulator, PlatformError}; 7 8 #[cfg(target_arch = "x86_64")] 9 use crate::arch::x86::emulator::{Emulator, EmulatorCpuState}; 10 use crate::cpu; 11 use crate::cpu::Vcpu; 12 use crate::hypervisor; 13 use crate::vec_with_array_field; 14 use crate::vm::{self, VmmOps}; 15 pub use mshv_bindings::*; 16 pub use mshv_ioctls::IoEventAddress; 17 use mshv_ioctls::{set_registers_64, Mshv, NoDatamatch, VcpuFd, VmFd}; 18 use serde_derive::{Deserialize, Serialize}; 19 use std::collections::HashMap; 20 use std::sync::{Arc, RwLock}; 21 use vm::DataMatch; 22 // x86_64 dependencies 23 #[cfg(target_arch = "x86_64")] 24 pub mod x86_64; 25 use crate::device; 26 use vmm_sys_util::eventfd::EventFd; 27 #[cfg(target_arch = "x86_64")] 28 pub use x86_64::VcpuMshvState as CpuState; 29 #[cfg(target_arch = "x86_64")] 30 pub use x86_64::*; 31 32 #[cfg(target_arch = "x86_64")] 33 use std::fs::File; 34 use std::os::unix::io::{AsRawFd, RawFd}; 35 36 const DIRTY_BITMAP_CLEAR_DIRTY: u64 = 0x4; 37 const DIRTY_BITMAP_SET_DIRTY: u64 = 0x8; 38 39 /// 40 /// Export generically-named wrappers of mshv-bindings for Unix-based platforms 41 /// 42 pub use { 43 mshv_bindings::mshv_create_device as CreateDevice, 44 mshv_bindings::mshv_device_attr as DeviceAttr, 45 mshv_bindings::mshv_msi_routing_entry as IrqRoutingEntry, mshv_ioctls::DeviceFd, 46 }; 47 48 pub const PAGE_SHIFT: usize = 12; 49 50 #[derive(Debug, Default, Copy, Clone, Serialize, Deserialize)] 51 pub struct HvState { 52 hypercall_page: u64, 53 } 54 55 pub use HvState as VmState; 56 57 struct MshvDirtyLogSlot { 58 guest_pfn: u64, 59 memory_size: u64, 60 } 61 62 /// Wrapper over mshv system ioctls. 63 pub struct MshvHypervisor { 64 mshv: Mshv, 65 } 66 67 impl MshvHypervisor { 68 /// Create a hypervisor based on Mshv 69 pub fn new() -> hypervisor::Result<MshvHypervisor> { 70 let mshv_obj = 71 Mshv::new().map_err(|e| hypervisor::HypervisorError::HypervisorCreate(e.into()))?; 72 Ok(MshvHypervisor { mshv: mshv_obj }) 73 } 74 } 75 /// Implementation of Hypervisor trait for Mshv 76 /// Example: 77 /// #[cfg(feature = "mshv")] 78 /// extern crate hypervisor 79 /// let mshv = hypervisor::mshv::MshvHypervisor::new().unwrap(); 80 /// let hypervisor: Arc<dyn hypervisor::Hypervisor> = Arc::new(mshv); 81 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed"); 82 /// 83 impl hypervisor::Hypervisor for MshvHypervisor { 84 /// Create a mshv vm object and return the object as Vm trait object 85 /// Example 86 /// # extern crate hypervisor; 87 /// # use hypervisor::MshvHypervisor; 88 /// use hypervisor::MshvVm; 89 /// let hypervisor = MshvHypervisor::new().unwrap(); 90 /// let vm = hypervisor.create_vm().unwrap() 91 /// 92 fn create_vm(&self) -> hypervisor::Result<Arc<dyn vm::Vm>> { 93 let fd: VmFd; 94 loop { 95 match self.mshv.create_vm() { 96 Ok(res) => fd = res, 97 Err(e) => { 98 if e.errno() == libc::EINTR { 99 // If the error returned is EINTR, which means the 100 // ioctl has been interrupted, we have to retry as 101 // this can't be considered as a regular error. 102 continue; 103 } else { 104 return Err(hypervisor::HypervisorError::VmCreate(e.into())); 105 } 106 } 107 } 108 break; 109 } 110 111 let msr_list = self.get_msr_list()?; 112 let num_msrs = msr_list.as_fam_struct_ref().nmsrs as usize; 113 let mut msrs = MsrEntries::new(num_msrs).unwrap(); 114 let indices = msr_list.as_slice(); 115 let msr_entries = msrs.as_mut_slice(); 116 for (pos, index) in indices.iter().enumerate() { 117 msr_entries[pos].index = *index; 118 } 119 let vm_fd = Arc::new(fd); 120 121 Ok(Arc::new(MshvVm { 122 fd: vm_fd, 123 msrs, 124 hv_state: hv_state_init(), 125 vmmops: None, 126 dirty_log_slots: Arc::new(RwLock::new(HashMap::new())), 127 })) 128 } 129 /// 130 /// Get the supported CpuID 131 /// 132 fn get_cpuid(&self) -> hypervisor::Result<CpuId> { 133 Ok(CpuId::new(1).unwrap()) 134 } 135 #[cfg(target_arch = "x86_64")] 136 /// 137 /// Retrieve the list of MSRs supported by MSHV. 138 /// 139 fn get_msr_list(&self) -> hypervisor::Result<MsrList> { 140 self.mshv 141 .get_msr_index_list() 142 .map_err(|e| hypervisor::HypervisorError::GetMsrList(e.into())) 143 } 144 } 145 146 #[allow(dead_code)] 147 /// Vcpu struct for Microsoft Hypervisor 148 pub struct MshvVcpu { 149 fd: VcpuFd, 150 vp_index: u8, 151 cpuid: CpuId, 152 msrs: MsrEntries, 153 hv_state: Arc<RwLock<HvState>>, // Mshv State 154 vmmops: Option<Arc<dyn vm::VmmOps>>, 155 } 156 157 /// Implementation of Vcpu trait for Microsoft Hypervisor 158 /// Example: 159 /// #[cfg(feature = "mshv")] 160 /// extern crate hypervisor 161 /// let mshv = hypervisor::mshv::MshvHypervisor::new().unwrap(); 162 /// let hypervisor: Arc<dyn hypervisor::Hypervisor> = Arc::new(mshv); 163 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed"); 164 /// let vcpu = vm.create_vcpu(0).unwrap(); 165 /// vcpu.get/set().unwrap() 166 /// 167 impl cpu::Vcpu for MshvVcpu { 168 #[cfg(target_arch = "x86_64")] 169 /// 170 /// Returns the vCPU general purpose registers. 171 /// 172 fn get_regs(&self) -> cpu::Result<StandardRegisters> { 173 self.fd 174 .get_regs() 175 .map_err(|e| cpu::HypervisorCpuError::GetStandardRegs(e.into())) 176 } 177 #[cfg(target_arch = "x86_64")] 178 /// 179 /// Sets the vCPU general purpose registers. 180 /// 181 fn set_regs(&self, regs: &StandardRegisters) -> cpu::Result<()> { 182 self.fd 183 .set_regs(regs) 184 .map_err(|e| cpu::HypervisorCpuError::SetStandardRegs(e.into())) 185 } 186 #[cfg(target_arch = "x86_64")] 187 /// 188 /// Returns the vCPU special registers. 189 /// 190 fn get_sregs(&self) -> cpu::Result<SpecialRegisters> { 191 self.fd 192 .get_sregs() 193 .map_err(|e| cpu::HypervisorCpuError::GetSpecialRegs(e.into())) 194 } 195 #[cfg(target_arch = "x86_64")] 196 /// 197 /// Sets the vCPU special registers. 198 /// 199 fn set_sregs(&self, sregs: &SpecialRegisters) -> cpu::Result<()> { 200 self.fd 201 .set_sregs(sregs) 202 .map_err(|e| cpu::HypervisorCpuError::SetSpecialRegs(e.into())) 203 } 204 #[cfg(target_arch = "x86_64")] 205 /// 206 /// Returns the floating point state (FPU) from the vCPU. 207 /// 208 fn get_fpu(&self) -> cpu::Result<FpuState> { 209 self.fd 210 .get_fpu() 211 .map_err(|e| cpu::HypervisorCpuError::GetFloatingPointRegs(e.into())) 212 } 213 #[cfg(target_arch = "x86_64")] 214 /// 215 /// Set the floating point state (FPU) of a vCPU. 216 /// 217 fn set_fpu(&self, fpu: &FpuState) -> cpu::Result<()> { 218 self.fd 219 .set_fpu(fpu) 220 .map_err(|e| cpu::HypervisorCpuError::SetFloatingPointRegs(e.into())) 221 } 222 223 #[cfg(target_arch = "x86_64")] 224 /// 225 /// Returns the model-specific registers (MSR) for this vCPU. 226 /// 227 fn get_msrs(&self, msrs: &mut MsrEntries) -> cpu::Result<usize> { 228 self.fd 229 .get_msrs(msrs) 230 .map_err(|e| cpu::HypervisorCpuError::GetMsrEntries(e.into())) 231 } 232 #[cfg(target_arch = "x86_64")] 233 /// 234 /// Setup the model-specific registers (MSR) for this vCPU. 235 /// Returns the number of MSR entries actually written. 236 /// 237 fn set_msrs(&self, msrs: &MsrEntries) -> cpu::Result<usize> { 238 self.fd 239 .set_msrs(msrs) 240 .map_err(|e| cpu::HypervisorCpuError::SetMsrEntries(e.into())) 241 } 242 243 #[cfg(target_arch = "x86_64")] 244 /// 245 /// X86 specific call that returns the vcpu's current "xcrs". 246 /// 247 fn get_xcrs(&self) -> cpu::Result<ExtendedControlRegisters> { 248 self.fd 249 .get_xcrs() 250 .map_err(|e| cpu::HypervisorCpuError::GetXcsr(e.into())) 251 } 252 #[cfg(target_arch = "x86_64")] 253 /// 254 /// X86 specific call that sets the vcpu's current "xcrs". 255 /// 256 fn set_xcrs(&self, xcrs: &ExtendedControlRegisters) -> cpu::Result<()> { 257 self.fd 258 .set_xcrs(xcrs) 259 .map_err(|e| cpu::HypervisorCpuError::SetXcsr(e.into())) 260 } 261 #[cfg(target_arch = "x86_64")] 262 /// 263 /// Returns currently pending exceptions, interrupts, and NMIs as well as related 264 /// states of the vcpu. 265 /// 266 fn get_vcpu_events(&self) -> cpu::Result<VcpuEvents> { 267 self.fd 268 .get_vcpu_events() 269 .map_err(|e| cpu::HypervisorCpuError::GetVcpuEvents(e.into())) 270 } 271 #[cfg(target_arch = "x86_64")] 272 /// 273 /// Sets pending exceptions, interrupts, and NMIs as well as related states 274 /// of the vcpu. 275 /// 276 fn set_vcpu_events(&self, events: &VcpuEvents) -> cpu::Result<()> { 277 self.fd 278 .set_vcpu_events(events) 279 .map_err(|e| cpu::HypervisorCpuError::SetVcpuEvents(e.into())) 280 } 281 #[cfg(target_arch = "x86_64")] 282 /// 283 /// X86 specific call to enable HyperV SynIC 284 /// 285 fn enable_hyperv_synic(&self) -> cpu::Result<()> { 286 /* We always have SynIC enabled on MSHV */ 287 Ok(()) 288 } 289 #[allow(non_upper_case_globals)] 290 fn run(&self) -> std::result::Result<cpu::VmExit, cpu::HypervisorCpuError> { 291 let hv_message: hv_message = hv_message::default(); 292 match self.fd.run(hv_message) { 293 Ok(x) => match x.header.message_type { 294 hv_message_type_HVMSG_X64_HALT => { 295 debug!("HALT"); 296 Ok(cpu::VmExit::Reset) 297 } 298 hv_message_type_HVMSG_UNRECOVERABLE_EXCEPTION => { 299 warn!("TRIPLE FAULT"); 300 Ok(cpu::VmExit::Shutdown) 301 } 302 hv_message_type_HVMSG_X64_IO_PORT_INTERCEPT => { 303 let info = x.to_ioport_info().unwrap(); 304 let access_info = info.access_info; 305 // SAFETY: access_info is valid, otherwise we won't be here 306 let len = unsafe { access_info.__bindgen_anon_1.access_size() } as usize; 307 let is_write = info.header.intercept_access_type == 1; 308 let port = info.port_number; 309 let mut data: [u8; 4] = [0; 4]; 310 let mut ret_rax = info.rax; 311 312 /* 313 * XXX: Ignore QEMU fw_cfg (0x5xx) and debug console (0x402) ports. 314 * 315 * Cloud Hypervisor doesn't support fw_cfg at the moment. It does support 0x402 316 * under the "fwdebug" feature flag. But that feature is not enabled by default 317 * and is considered legacy. 318 * 319 * OVMF unconditionally pokes these IO ports with string IO. 320 * 321 * Instead of trying to implement string IO support now which does not do much 322 * now, skip those ports explicitly to avoid panicking. 323 * 324 * Proper string IO support can be added once we gain the ability to translate 325 * guest virtual addresses to guest physical addresses on MSHV. 326 */ 327 match port { 328 0x402 | 0x510 | 0x511 | 0x514 => { 329 let insn_len = info.header.instruction_length() as u64; 330 331 /* Advance RIP and update RAX */ 332 let arr_reg_name_value = [ 333 ( 334 hv_register_name::HV_X64_REGISTER_RIP, 335 info.header.rip + insn_len, 336 ), 337 (hv_register_name::HV_X64_REGISTER_RAX, ret_rax), 338 ]; 339 set_registers_64!(self.fd, arr_reg_name_value) 340 .map_err(|e| cpu::HypervisorCpuError::SetRegister(e.into()))?; 341 return Ok(cpu::VmExit::Ignore); 342 } 343 _ => {} 344 } 345 346 // SAFETY: access_info is valid, otherwise we won't be here 347 assert!( 348 (unsafe { access_info.__bindgen_anon_1.string_op() } != 1), 349 "String IN/OUT not supported" 350 ); 351 assert!( 352 (unsafe { access_info.__bindgen_anon_1.rep_prefix() } != 1), 353 "Rep IN/OUT not supported" 354 ); 355 356 if is_write { 357 let data = (info.rax as u32).to_le_bytes(); 358 if let Some(vmmops) = &self.vmmops { 359 vmmops 360 .pio_write(port.into(), &data[0..len]) 361 .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()))?; 362 } 363 } else { 364 if let Some(vmmops) = &self.vmmops { 365 vmmops 366 .pio_read(port.into(), &mut data[0..len]) 367 .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()))?; 368 } 369 370 let v = u32::from_le_bytes(data); 371 /* Preserve high bits in EAX but clear out high bits in RAX */ 372 let mask = 0xffffffff >> (32 - len * 8); 373 let eax = (info.rax as u32 & !mask) | (v & mask); 374 ret_rax = eax as u64; 375 } 376 377 let insn_len = info.header.instruction_length() as u64; 378 379 /* Advance RIP and update RAX */ 380 let arr_reg_name_value = [ 381 ( 382 hv_register_name::HV_X64_REGISTER_RIP, 383 info.header.rip + insn_len, 384 ), 385 (hv_register_name::HV_X64_REGISTER_RAX, ret_rax), 386 ]; 387 set_registers_64!(self.fd, arr_reg_name_value) 388 .map_err(|e| cpu::HypervisorCpuError::SetRegister(e.into()))?; 389 Ok(cpu::VmExit::Ignore) 390 } 391 hv_message_type_HVMSG_UNMAPPED_GPA => { 392 let info = x.to_memory_info().unwrap(); 393 let insn_len = info.instruction_byte_count as usize; 394 assert!(insn_len > 0 && insn_len <= 16); 395 396 let mut context = MshvEmulatorContext { 397 vcpu: self, 398 map: (info.guest_virtual_address, info.guest_physical_address), 399 }; 400 401 // Create a new emulator. 402 let mut emul = Emulator::new(&mut context); 403 404 // Emulate the trapped instruction, and only the first one. 405 let new_state = emul 406 .emulate_first_insn(self.vp_index as usize, &info.instruction_bytes) 407 .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()))?; 408 409 // Set CPU state back. 410 context 411 .set_cpu_state(self.vp_index as usize, new_state) 412 .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()))?; 413 414 Ok(cpu::VmExit::Ignore) 415 } 416 hv_message_type_HVMSG_X64_CPUID_INTERCEPT => { 417 let info = x.to_cpuid_info().unwrap(); 418 debug!("cpuid eax: {:x}", { info.rax }); 419 Ok(cpu::VmExit::Ignore) 420 } 421 hv_message_type_HVMSG_X64_MSR_INTERCEPT => { 422 let info = x.to_msr_info().unwrap(); 423 if info.header.intercept_access_type == 0 { 424 debug!("msr read: {:x}", { info.msr_number }); 425 } else { 426 debug!("msr write: {:x}", { info.msr_number }); 427 } 428 Ok(cpu::VmExit::Ignore) 429 } 430 hv_message_type_HVMSG_X64_EXCEPTION_INTERCEPT => { 431 //TODO: Handler for VMCALL here. 432 let info = x.to_exception_info().unwrap(); 433 debug!("Exception Info {:?}", { info.exception_vector }); 434 Ok(cpu::VmExit::Ignore) 435 } 436 exit => Err(cpu::HypervisorCpuError::RunVcpu(anyhow!( 437 "Unhandled VCPU exit {:?}", 438 exit 439 ))), 440 }, 441 442 Err(e) => match e.errno() { 443 libc::EAGAIN | libc::EINTR => Ok(cpu::VmExit::Ignore), 444 _ => Err(cpu::HypervisorCpuError::RunVcpu(anyhow!( 445 "VCPU error {:?}", 446 e 447 ))), 448 }, 449 } 450 } 451 #[cfg(target_arch = "x86_64")] 452 /// 453 /// X86 specific call to setup the CPUID registers. 454 /// 455 fn set_cpuid2(&self, _cpuid: &CpuId) -> cpu::Result<()> { 456 Ok(()) 457 } 458 #[cfg(target_arch = "x86_64")] 459 /// 460 /// X86 specific call to retrieve the CPUID registers. 461 /// 462 fn get_cpuid2(&self, _num_entries: usize) -> cpu::Result<CpuId> { 463 Ok(self.cpuid.clone()) 464 } 465 #[cfg(target_arch = "x86_64")] 466 /// 467 /// Returns the state of the LAPIC (Local Advanced Programmable Interrupt Controller). 468 /// 469 fn get_lapic(&self) -> cpu::Result<LapicState> { 470 self.fd 471 .get_lapic() 472 .map_err(|e| cpu::HypervisorCpuError::GetlapicState(e.into())) 473 } 474 #[cfg(target_arch = "x86_64")] 475 /// 476 /// Sets the state of the LAPIC (Local Advanced Programmable Interrupt Controller). 477 /// 478 fn set_lapic(&self, lapic: &LapicState) -> cpu::Result<()> { 479 self.fd 480 .set_lapic(lapic) 481 .map_err(|e| cpu::HypervisorCpuError::SetLapicState(e.into())) 482 } 483 #[cfg(target_arch = "x86_64")] 484 /// 485 /// X86 specific call that returns the vcpu's current "xsave struct". 486 /// 487 fn get_xsave(&self) -> cpu::Result<Xsave> { 488 self.fd 489 .get_xsave() 490 .map_err(|e| cpu::HypervisorCpuError::GetXsaveState(e.into())) 491 } 492 #[cfg(target_arch = "x86_64")] 493 /// 494 /// X86 specific call that sets the vcpu's current "xsave struct". 495 /// 496 fn set_xsave(&self, xsave: &Xsave) -> cpu::Result<()> { 497 self.fd 498 .set_xsave(xsave) 499 .map_err(|e| cpu::HypervisorCpuError::SetXsaveState(e.into())) 500 } 501 /// 502 /// Set CPU state 503 /// 504 fn set_state(&self, state: &CpuState) -> cpu::Result<()> { 505 self.set_msrs(&state.msrs)?; 506 self.set_vcpu_events(&state.vcpu_events)?; 507 self.set_regs(&state.regs)?; 508 self.set_sregs(&state.sregs)?; 509 self.set_fpu(&state.fpu)?; 510 self.set_xcrs(&state.xcrs)?; 511 self.set_lapic(&state.lapic)?; 512 self.set_xsave(&state.xsave)?; 513 // These registers are global and needed to be set only for first VCPU 514 // as Microsoft Hypervisor allows setting this regsier for only one VCPU 515 if self.vp_index == 0 { 516 self.fd 517 .set_misc_regs(&state.misc) 518 .map_err(|e| cpu::HypervisorCpuError::SetMiscRegs(e.into()))? 519 } 520 self.fd 521 .set_debug_regs(&state.dbg) 522 .map_err(|e| cpu::HypervisorCpuError::SetDebugRegs(e.into()))?; 523 Ok(()) 524 } 525 /// 526 /// Get CPU State 527 /// 528 fn state(&self) -> cpu::Result<CpuState> { 529 let regs = self.get_regs()?; 530 let sregs = self.get_sregs()?; 531 let xcrs = self.get_xcrs()?; 532 let fpu = self.get_fpu()?; 533 let vcpu_events = self.get_vcpu_events()?; 534 let mut msrs = self.msrs.clone(); 535 self.get_msrs(&mut msrs)?; 536 let lapic = self.get_lapic()?; 537 let xsave = self.get_xsave()?; 538 let misc = self 539 .fd 540 .get_misc_regs() 541 .map_err(|e| cpu::HypervisorCpuError::GetMiscRegs(e.into()))?; 542 let dbg = self 543 .fd 544 .get_debug_regs() 545 .map_err(|e| cpu::HypervisorCpuError::GetDebugRegs(e.into()))?; 546 547 Ok(CpuState { 548 msrs, 549 vcpu_events, 550 regs, 551 sregs, 552 fpu, 553 xcrs, 554 lapic, 555 dbg, 556 xsave, 557 misc, 558 }) 559 } 560 #[cfg(target_arch = "x86_64")] 561 /// 562 /// Translate guest virtual address to guest physical address 563 /// 564 fn translate_gva(&self, gva: u64, flags: u64) -> cpu::Result<(u64, u32)> { 565 let r = self 566 .fd 567 .translate_gva(gva, flags) 568 .map_err(|e| cpu::HypervisorCpuError::TranslateVirtualAddress(e.into()))?; 569 570 let gpa = r.0; 571 // SAFETY: r is valid, otherwise this function will have returned 572 let result_code = unsafe { r.1.__bindgen_anon_1.result_code }; 573 574 Ok((gpa, result_code)) 575 } 576 #[cfg(target_arch = "x86_64")] 577 /// 578 /// X86 specific call that returns the vcpu's current "suspend registers". 579 /// 580 fn get_suspend_regs(&self) -> cpu::Result<SuspendRegisters> { 581 self.fd 582 .get_suspend_regs() 583 .map_err(|e| cpu::HypervisorCpuError::GetSuspendRegs(e.into())) 584 } 585 } 586 587 /// Device struct for MSHV 588 pub struct MshvDevice { 589 fd: DeviceFd, 590 } 591 592 impl device::Device for MshvDevice { 593 /// 594 /// Set device attribute 595 /// 596 fn set_device_attr(&self, attr: &DeviceAttr) -> device::Result<()> { 597 self.fd 598 .set_device_attr(attr) 599 .map_err(|e| device::HypervisorDeviceError::SetDeviceAttribute(e.into())) 600 } 601 /// 602 /// Get device attribute 603 /// 604 fn get_device_attr(&self, attr: &mut DeviceAttr) -> device::Result<()> { 605 self.fd 606 .get_device_attr(attr) 607 .map_err(|e| device::HypervisorDeviceError::GetDeviceAttribute(e.into())) 608 } 609 } 610 611 impl AsRawFd for MshvDevice { 612 fn as_raw_fd(&self) -> RawFd { 613 self.fd.as_raw_fd() 614 } 615 } 616 617 struct MshvEmulatorContext<'a> { 618 vcpu: &'a MshvVcpu, 619 map: (u64, u64), // Initial GVA to GPA mapping provided by the hypervisor 620 } 621 622 impl<'a> MshvEmulatorContext<'a> { 623 // Do the actual gva -> gpa translation 624 #[allow(non_upper_case_globals)] 625 fn translate(&self, gva: u64) -> Result<u64, PlatformError> { 626 if self.map.0 == gva { 627 return Ok(self.map.1); 628 } 629 630 // TODO: More fine-grained control for the flags 631 let flags = HV_TRANSLATE_GVA_VALIDATE_READ | HV_TRANSLATE_GVA_VALIDATE_WRITE; 632 633 let (gpa, result_code) = self 634 .vcpu 635 .translate_gva(gva, flags.into()) 636 .map_err(|e| PlatformError::TranslateVirtualAddress(anyhow!(e)))?; 637 638 match result_code { 639 hv_translate_gva_result_code_HV_TRANSLATE_GVA_SUCCESS => Ok(gpa), 640 _ => Err(PlatformError::TranslateVirtualAddress(anyhow!(result_code))), 641 } 642 } 643 } 644 645 /// Platform emulation for Hyper-V 646 impl<'a> PlatformEmulator for MshvEmulatorContext<'a> { 647 type CpuState = EmulatorCpuState; 648 649 fn read_memory(&self, gva: u64, data: &mut [u8]) -> Result<(), PlatformError> { 650 let gpa = self.translate(gva)?; 651 debug!( 652 "mshv emulator: memory read {} bytes from [{:#x} -> {:#x}]", 653 data.len(), 654 gva, 655 gpa 656 ); 657 658 if let Some(vmmops) = &self.vcpu.vmmops { 659 if vmmops.guest_mem_read(gpa, data).is_err() { 660 vmmops 661 .mmio_read(gpa, data) 662 .map_err(|e| PlatformError::MemoryReadFailure(e.into()))?; 663 } 664 } 665 666 Ok(()) 667 } 668 669 fn write_memory(&mut self, gva: u64, data: &[u8]) -> Result<(), PlatformError> { 670 let gpa = self.translate(gva)?; 671 debug!( 672 "mshv emulator: memory write {} bytes at [{:#x} -> {:#x}]", 673 data.len(), 674 gva, 675 gpa 676 ); 677 678 if let Some(vmmops) = &self.vcpu.vmmops { 679 if vmmops.guest_mem_write(gpa, data).is_err() { 680 vmmops 681 .mmio_write(gpa, data) 682 .map_err(|e| PlatformError::MemoryWriteFailure(e.into()))?; 683 } 684 } 685 686 Ok(()) 687 } 688 689 fn cpu_state(&self, cpu_id: usize) -> Result<Self::CpuState, PlatformError> { 690 if cpu_id != self.vcpu.vp_index as usize { 691 return Err(PlatformError::GetCpuStateFailure(anyhow!( 692 "CPU id mismatch {:?} {:?}", 693 cpu_id, 694 self.vcpu.vp_index 695 ))); 696 } 697 698 let regs = self 699 .vcpu 700 .get_regs() 701 .map_err(|e| PlatformError::GetCpuStateFailure(e.into()))?; 702 let sregs = self 703 .vcpu 704 .get_sregs() 705 .map_err(|e| PlatformError::GetCpuStateFailure(e.into()))?; 706 707 debug!("mshv emulator: Getting new CPU state"); 708 debug!("mshv emulator: {:#x?}", regs); 709 710 Ok(EmulatorCpuState { regs, sregs }) 711 } 712 713 fn set_cpu_state(&self, cpu_id: usize, state: Self::CpuState) -> Result<(), PlatformError> { 714 if cpu_id != self.vcpu.vp_index as usize { 715 return Err(PlatformError::SetCpuStateFailure(anyhow!( 716 "CPU id mismatch {:?} {:?}", 717 cpu_id, 718 self.vcpu.vp_index 719 ))); 720 } 721 722 debug!("mshv emulator: Setting new CPU state"); 723 debug!("mshv emulator: {:#x?}", state.regs); 724 725 self.vcpu 726 .set_regs(&state.regs) 727 .map_err(|e| PlatformError::SetCpuStateFailure(e.into()))?; 728 self.vcpu 729 .set_sregs(&state.sregs) 730 .map_err(|e| PlatformError::SetCpuStateFailure(e.into())) 731 } 732 733 fn gva_to_gpa(&self, gva: u64) -> Result<u64, PlatformError> { 734 self.translate(gva) 735 } 736 737 fn fetch(&self, _ip: u64, _instruction_bytes: &mut [u8]) -> Result<(), PlatformError> { 738 Err(PlatformError::MemoryReadFailure(anyhow!("unimplemented"))) 739 } 740 } 741 742 #[allow(dead_code)] 743 /// Wrapper over Mshv VM ioctls. 744 pub struct MshvVm { 745 fd: Arc<VmFd>, 746 msrs: MsrEntries, 747 // Hypervisor State 748 hv_state: Arc<RwLock<HvState>>, 749 vmmops: Option<Arc<dyn vm::VmmOps>>, 750 dirty_log_slots: Arc<RwLock<HashMap<u64, MshvDirtyLogSlot>>>, 751 } 752 753 fn hv_state_init() -> Arc<RwLock<HvState>> { 754 Arc::new(RwLock::new(HvState { hypercall_page: 0 })) 755 } 756 757 /// 758 /// Implementation of Vm trait for Mshv 759 /// Example: 760 /// #[cfg(feature = "mshv")] 761 /// # extern crate hypervisor; 762 /// # use hypervisor::MshvHypervisor; 763 /// let mshv = MshvHypervisor::new().unwrap(); 764 /// let hypervisor: Arc<dyn hypervisor::Hypervisor> = Arc::new(mshv); 765 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed"); 766 /// vm.set/get().unwrap() 767 /// 768 impl vm::Vm for MshvVm { 769 #[cfg(target_arch = "x86_64")] 770 /// 771 /// Sets the address of the one-page region in the VM's address space. 772 /// 773 fn set_identity_map_address(&self, _address: u64) -> vm::Result<()> { 774 Ok(()) 775 } 776 #[cfg(target_arch = "x86_64")] 777 /// 778 /// Sets the address of the three-page region in the VM's address space. 779 /// 780 fn set_tss_address(&self, _offset: usize) -> vm::Result<()> { 781 Ok(()) 782 } 783 /// 784 /// Creates an in-kernel interrupt controller. 785 /// 786 fn create_irq_chip(&self) -> vm::Result<()> { 787 Ok(()) 788 } 789 /// 790 /// Registers an event that will, when signaled, trigger the `gsi` IRQ. 791 /// 792 fn register_irqfd(&self, fd: &EventFd, gsi: u32) -> vm::Result<()> { 793 debug!("register_irqfd fd {} gsi {}", fd.as_raw_fd(), gsi); 794 795 self.fd 796 .register_irqfd(fd, gsi) 797 .map_err(|e| vm::HypervisorVmError::RegisterIrqFd(e.into()))?; 798 799 Ok(()) 800 } 801 /// 802 /// Unregisters an event that will, when signaled, trigger the `gsi` IRQ. 803 /// 804 fn unregister_irqfd(&self, fd: &EventFd, gsi: u32) -> vm::Result<()> { 805 debug!("unregister_irqfd fd {} gsi {}", fd.as_raw_fd(), gsi); 806 807 self.fd 808 .unregister_irqfd(fd, gsi) 809 .map_err(|e| vm::HypervisorVmError::UnregisterIrqFd(e.into()))?; 810 811 Ok(()) 812 } 813 /// 814 /// Creates a VcpuFd object from a vcpu RawFd. 815 /// 816 fn create_vcpu( 817 &self, 818 id: u8, 819 vmmops: Option<Arc<dyn VmmOps>>, 820 ) -> vm::Result<Arc<dyn cpu::Vcpu>> { 821 let vcpu_fd = self 822 .fd 823 .create_vcpu(id) 824 .map_err(|e| vm::HypervisorVmError::CreateVcpu(e.into()))?; 825 let vcpu = MshvVcpu { 826 fd: vcpu_fd, 827 vp_index: id, 828 cpuid: CpuId::new(1).unwrap(), 829 msrs: self.msrs.clone(), 830 hv_state: self.hv_state.clone(), 831 vmmops, 832 }; 833 Ok(Arc::new(vcpu)) 834 } 835 #[cfg(target_arch = "x86_64")] 836 fn enable_split_irq(&self) -> vm::Result<()> { 837 Ok(()) 838 } 839 #[cfg(target_arch = "x86_64")] 840 fn enable_sgx_attribute(&self, _file: File) -> vm::Result<()> { 841 Ok(()) 842 } 843 fn register_ioevent( 844 &self, 845 fd: &EventFd, 846 addr: &IoEventAddress, 847 datamatch: Option<DataMatch>, 848 ) -> vm::Result<()> { 849 debug!( 850 "register_ioevent fd {} addr {:x?} datamatch {:?}", 851 fd.as_raw_fd(), 852 addr, 853 datamatch 854 ); 855 if let Some(dm) = datamatch { 856 match dm { 857 vm::DataMatch::DataMatch32(mshv_dm32) => self 858 .fd 859 .register_ioevent(fd, addr, mshv_dm32) 860 .map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into())), 861 vm::DataMatch::DataMatch64(mshv_dm64) => self 862 .fd 863 .register_ioevent(fd, addr, mshv_dm64) 864 .map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into())), 865 } 866 } else { 867 self.fd 868 .register_ioevent(fd, addr, NoDatamatch) 869 .map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into())) 870 } 871 } 872 /// Unregister an event from a certain address it has been previously registered to. 873 fn unregister_ioevent(&self, fd: &EventFd, addr: &IoEventAddress) -> vm::Result<()> { 874 debug!("unregister_ioevent fd {} addr {:x?}", fd.as_raw_fd(), addr); 875 876 self.fd 877 .unregister_ioevent(fd, addr, NoDatamatch) 878 .map_err(|e| vm::HypervisorVmError::UnregisterIoEvent(e.into())) 879 } 880 881 /// Creates a guest physical memory region. 882 fn create_user_memory_region(&self, user_memory_region: MemoryRegion) -> vm::Result<()> { 883 // No matter read only or not we keep track the slots. 884 // For readonly hypervisor can enable the dirty bits, 885 // but a VM exit happens before setting the dirty bits 886 self.dirty_log_slots.write().unwrap().insert( 887 user_memory_region.guest_pfn, 888 MshvDirtyLogSlot { 889 guest_pfn: user_memory_region.guest_pfn, 890 memory_size: user_memory_region.size, 891 }, 892 ); 893 894 self.fd 895 .map_user_memory(user_memory_region) 896 .map_err(|e| vm::HypervisorVmError::CreateUserMemory(e.into()))?; 897 Ok(()) 898 } 899 900 /// Removes a guest physical memory region. 901 fn remove_user_memory_region(&self, user_memory_region: MemoryRegion) -> vm::Result<()> { 902 // Remove the corresponding entry from "self.dirty_log_slots" if needed 903 self.dirty_log_slots 904 .write() 905 .unwrap() 906 .remove(&user_memory_region.guest_pfn); 907 908 self.fd 909 .unmap_user_memory(user_memory_region) 910 .map_err(|e| vm::HypervisorVmError::RemoveUserMemory(e.into()))?; 911 Ok(()) 912 } 913 914 fn make_user_memory_region( 915 &self, 916 _slot: u32, 917 guest_phys_addr: u64, 918 memory_size: u64, 919 userspace_addr: u64, 920 readonly: bool, 921 _log_dirty_pages: bool, 922 ) -> MemoryRegion { 923 let mut flags = HV_MAP_GPA_READABLE | HV_MAP_GPA_EXECUTABLE; 924 if !readonly { 925 flags |= HV_MAP_GPA_WRITABLE; 926 } 927 928 mshv_user_mem_region { 929 flags, 930 guest_pfn: guest_phys_addr >> PAGE_SHIFT, 931 size: memory_size, 932 userspace_addr: userspace_addr as u64, 933 } 934 } 935 936 /// 937 /// Creates an in-kernel device. 938 /// 939 /// See the documentation for `MSHV_CREATE_DEVICE`. 940 fn create_device(&self, device: &mut CreateDevice) -> vm::Result<Arc<dyn device::Device>> { 941 let fd = self 942 .fd 943 .create_device(device) 944 .map_err(|e| vm::HypervisorVmError::CreateDevice(e.into()))?; 945 let device = MshvDevice { fd }; 946 Ok(Arc::new(device)) 947 } 948 949 fn create_passthrough_device(&self) -> vm::Result<Arc<dyn device::Device>> { 950 let mut vfio_dev = mshv_create_device { 951 type_: mshv_device_type_MSHV_DEV_TYPE_VFIO, 952 fd: 0, 953 flags: 0, 954 }; 955 956 self.create_device(&mut vfio_dev) 957 .map_err(|e| vm::HypervisorVmError::CreatePassthroughDevice(e.into())) 958 } 959 960 fn set_gsi_routing(&self, entries: &[IrqRoutingEntry]) -> vm::Result<()> { 961 let mut msi_routing = 962 vec_with_array_field::<mshv_msi_routing, mshv_msi_routing_entry>(entries.len()); 963 msi_routing[0].nr = entries.len() as u32; 964 965 // SAFETY: msi_routing initialized with entries.len() and now it is being turned into 966 // entries_slice with entries.len() again. It is guaranteed to be large enough to hold 967 // everything from entries. 968 unsafe { 969 let entries_slice: &mut [mshv_msi_routing_entry] = 970 msi_routing[0].entries.as_mut_slice(entries.len()); 971 entries_slice.copy_from_slice(entries); 972 } 973 974 self.fd 975 .set_msi_routing(&msi_routing[0]) 976 .map_err(|e| vm::HypervisorVmError::SetGsiRouting(e.into())) 977 } 978 /// 979 /// Get the Vm state. Return VM specific data 980 /// 981 fn state(&self) -> vm::Result<VmState> { 982 Ok(*self.hv_state.read().unwrap()) 983 } 984 /// 985 /// Set the VM state 986 /// 987 fn set_state(&self, state: VmState) -> vm::Result<()> { 988 self.hv_state.write().unwrap().hypercall_page = state.hypercall_page; 989 Ok(()) 990 } 991 /// 992 /// Start logging dirty pages 993 /// 994 fn start_dirty_log(&self) -> vm::Result<()> { 995 self.fd 996 .enable_dirty_page_tracking() 997 .map_err(|e| vm::HypervisorVmError::StartDirtyLog(e.into())) 998 } 999 /// 1000 /// Stop logging dirty pages 1001 /// 1002 fn stop_dirty_log(&self) -> vm::Result<()> { 1003 let dirty_log_slots = self.dirty_log_slots.read().unwrap(); 1004 // Before disabling the dirty page tracking we need 1005 // to set the dirty bits in the Hypervisor 1006 // This is a requirement from Microsoft Hypervisor 1007 for (_, s) in dirty_log_slots.iter() { 1008 self.fd 1009 .get_dirty_log(s.guest_pfn, s.memory_size as usize, DIRTY_BITMAP_SET_DIRTY) 1010 .map_err(|e| vm::HypervisorVmError::StartDirtyLog(e.into()))?; 1011 } 1012 self.fd 1013 .disable_dirty_page_tracking() 1014 .map_err(|e| vm::HypervisorVmError::StartDirtyLog(e.into()))?; 1015 Ok(()) 1016 } 1017 /// 1018 /// Get dirty pages bitmap (one bit per page) 1019 /// 1020 fn get_dirty_log(&self, _slot: u32, base_gpa: u64, memory_size: u64) -> vm::Result<Vec<u64>> { 1021 self.fd 1022 .get_dirty_log( 1023 base_gpa >> PAGE_SHIFT, 1024 memory_size as usize, 1025 DIRTY_BITMAP_CLEAR_DIRTY, 1026 ) 1027 .map_err(|e| vm::HypervisorVmError::GetDirtyLog(e.into())) 1028 } 1029 } 1030 pub use hv_cpuid_entry as CpuIdEntry; 1031 1032 pub const CPUID_FLAG_VALID_INDEX: u32 = 0; 1033