1 // SPDX-License-Identifier: Apache-2.0 OR BSD-3-Clause 2 // 3 // Copyright © 2020, Microsoft Corporation 4 // 5 6 use crate::arch::emulator::{PlatformEmulator, PlatformError}; 7 8 #[cfg(target_arch = "x86_64")] 9 use crate::arch::x86::emulator::{Emulator, EmulatorCpuState}; 10 use crate::cpu; 11 use crate::cpu::Vcpu; 12 use crate::hypervisor; 13 use crate::vec_with_array_field; 14 use crate::vm::{self, InterruptSourceConfig, VmOps}; 15 pub use mshv_bindings::*; 16 use mshv_ioctls::{set_registers_64, Mshv, NoDatamatch, VcpuFd, VmFd}; 17 use std::any::Any; 18 use std::collections::HashMap; 19 use std::sync::{Arc, RwLock}; 20 use vm::DataMatch; 21 // x86_64 dependencies 22 #[cfg(target_arch = "x86_64")] 23 pub mod x86_64; 24 use crate::device; 25 use crate::{ 26 ClockData, CpuState, IoEventAddress, IrqRoutingEntry, MpState, UserMemoryRegion, 27 USER_MEMORY_REGION_EXECUTE, USER_MEMORY_REGION_READ, USER_MEMORY_REGION_WRITE, 28 }; 29 use vmm_sys_util::eventfd::EventFd; 30 #[cfg(target_arch = "x86_64")] 31 pub use x86_64::VcpuMshvState; 32 #[cfg(target_arch = "x86_64")] 33 pub use x86_64::*; 34 35 #[cfg(target_arch = "x86_64")] 36 use std::fs::File; 37 use std::os::unix::io::AsRawFd; 38 39 #[cfg(target_arch = "x86_64")] 40 use crate::arch::x86::{ 41 CpuIdEntry, FpuState, LapicState, MsrEntry, SpecialRegisters, StandardRegisters, 42 }; 43 44 const DIRTY_BITMAP_CLEAR_DIRTY: u64 = 0x4; 45 const DIRTY_BITMAP_SET_DIRTY: u64 = 0x8; 46 47 /// 48 /// Export generically-named wrappers of mshv-bindings for Unix-based platforms 49 /// 50 pub use { 51 mshv_bindings::mshv_create_device as CreateDevice, 52 mshv_bindings::mshv_device_attr as DeviceAttr, mshv_ioctls::DeviceFd, 53 }; 54 55 pub const PAGE_SHIFT: usize = 12; 56 57 impl From<mshv_user_mem_region> for UserMemoryRegion { 58 fn from(region: mshv_user_mem_region) -> Self { 59 let mut flags: u32 = 0; 60 if region.flags & HV_MAP_GPA_READABLE != 0 { 61 flags |= USER_MEMORY_REGION_READ; 62 } 63 if region.flags & HV_MAP_GPA_WRITABLE != 0 { 64 flags |= USER_MEMORY_REGION_WRITE; 65 } 66 if region.flags & HV_MAP_GPA_EXECUTABLE != 0 { 67 flags |= USER_MEMORY_REGION_EXECUTE; 68 } 69 70 UserMemoryRegion { 71 guest_phys_addr: (region.guest_pfn << PAGE_SHIFT as u64) 72 + (region.userspace_addr & ((1 << PAGE_SHIFT) - 1)), 73 memory_size: region.size, 74 userspace_addr: region.userspace_addr, 75 flags, 76 ..Default::default() 77 } 78 } 79 } 80 81 impl From<UserMemoryRegion> for mshv_user_mem_region { 82 fn from(region: UserMemoryRegion) -> Self { 83 let mut flags: u32 = 0; 84 if region.flags & USER_MEMORY_REGION_READ != 0 { 85 flags |= HV_MAP_GPA_READABLE; 86 } 87 if region.flags & USER_MEMORY_REGION_WRITE != 0 { 88 flags |= HV_MAP_GPA_WRITABLE; 89 } 90 if region.flags & USER_MEMORY_REGION_EXECUTE != 0 { 91 flags |= HV_MAP_GPA_EXECUTABLE; 92 } 93 94 mshv_user_mem_region { 95 guest_pfn: region.guest_phys_addr >> PAGE_SHIFT, 96 size: region.memory_size, 97 userspace_addr: region.userspace_addr, 98 flags, 99 } 100 } 101 } 102 103 impl From<mshv_ioctls::IoEventAddress> for IoEventAddress { 104 fn from(a: mshv_ioctls::IoEventAddress) -> Self { 105 match a { 106 mshv_ioctls::IoEventAddress::Pio(x) => Self::Pio(x), 107 mshv_ioctls::IoEventAddress::Mmio(x) => Self::Mmio(x), 108 } 109 } 110 } 111 112 impl From<IoEventAddress> for mshv_ioctls::IoEventAddress { 113 fn from(a: IoEventAddress) -> Self { 114 match a { 115 IoEventAddress::Pio(x) => Self::Pio(x), 116 IoEventAddress::Mmio(x) => Self::Mmio(x), 117 } 118 } 119 } 120 121 impl From<VcpuMshvState> for CpuState { 122 fn from(s: VcpuMshvState) -> Self { 123 CpuState::Mshv(s) 124 } 125 } 126 127 impl From<CpuState> for VcpuMshvState { 128 fn from(s: CpuState) -> Self { 129 match s { 130 CpuState::Mshv(s) => s, 131 /* Needed in case other hypervisors are enabled */ 132 #[allow(unreachable_patterns)] 133 _ => panic!("CpuState is not valid"), 134 } 135 } 136 } 137 138 impl From<mshv_msi_routing_entry> for IrqRoutingEntry { 139 fn from(s: mshv_msi_routing_entry) -> Self { 140 IrqRoutingEntry::Mshv(s) 141 } 142 } 143 144 impl From<IrqRoutingEntry> for mshv_msi_routing_entry { 145 fn from(e: IrqRoutingEntry) -> Self { 146 match e { 147 IrqRoutingEntry::Mshv(e) => e, 148 /* Needed in case other hypervisors are enabled */ 149 #[allow(unreachable_patterns)] 150 _ => panic!("IrqRoutingEntry is not valid"), 151 } 152 } 153 } 154 155 struct MshvDirtyLogSlot { 156 guest_pfn: u64, 157 memory_size: u64, 158 } 159 160 /// Wrapper over mshv system ioctls. 161 pub struct MshvHypervisor { 162 mshv: Mshv, 163 } 164 165 impl MshvHypervisor { 166 #[cfg(target_arch = "x86_64")] 167 /// 168 /// Retrieve the list of MSRs supported by MSHV. 169 /// 170 fn get_msr_list(&self) -> hypervisor::Result<MsrList> { 171 self.mshv 172 .get_msr_index_list() 173 .map_err(|e| hypervisor::HypervisorError::GetMsrList(e.into())) 174 } 175 } 176 177 impl MshvHypervisor { 178 /// Create a hypervisor based on Mshv 179 pub fn new() -> hypervisor::Result<MshvHypervisor> { 180 let mshv_obj = 181 Mshv::new().map_err(|e| hypervisor::HypervisorError::HypervisorCreate(e.into()))?; 182 Ok(MshvHypervisor { mshv: mshv_obj }) 183 } 184 } 185 /// Implementation of Hypervisor trait for Mshv 186 /// Example: 187 /// #[cfg(feature = "mshv")] 188 /// extern crate hypervisor 189 /// let mshv = hypervisor::mshv::MshvHypervisor::new().unwrap(); 190 /// let hypervisor: Arc<dyn hypervisor::Hypervisor> = Arc::new(mshv); 191 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed"); 192 /// 193 impl hypervisor::Hypervisor for MshvHypervisor { 194 /// Create a mshv vm object and return the object as Vm trait object 195 /// Example 196 /// # extern crate hypervisor; 197 /// # use hypervisor::MshvHypervisor; 198 /// use hypervisor::MshvVm; 199 /// let hypervisor = MshvHypervisor::new().unwrap(); 200 /// let vm = hypervisor.create_vm().unwrap() 201 /// 202 fn create_vm(&self) -> hypervisor::Result<Arc<dyn vm::Vm>> { 203 let fd: VmFd; 204 loop { 205 match self.mshv.create_vm() { 206 Ok(res) => fd = res, 207 Err(e) => { 208 if e.errno() == libc::EINTR { 209 // If the error returned is EINTR, which means the 210 // ioctl has been interrupted, we have to retry as 211 // this can't be considered as a regular error. 212 continue; 213 } else { 214 return Err(hypervisor::HypervisorError::VmCreate(e.into())); 215 } 216 } 217 } 218 break; 219 } 220 221 // Default Microsoft Hypervisor behavior for unimplemented MSR is to 222 // send a fault to the guest if it tries to access it. It is possible 223 // to override this behavior with a more suitable option i.e., ignore 224 // writes from the guest and return zero in attempt to read unimplemented 225 // MSR. 226 fd.set_partition_property( 227 hv_partition_property_code_HV_PARTITION_PROPERTY_UNIMPLEMENTED_MSR_ACTION, 228 hv_unimplemented_msr_action_HV_UNIMPLEMENTED_MSR_ACTION_IGNORE_WRITE_READ_ZERO as u64, 229 ) 230 .map_err(|e| hypervisor::HypervisorError::SetPartitionProperty(e.into()))?; 231 232 let msr_list = self.get_msr_list()?; 233 let num_msrs = msr_list.as_fam_struct_ref().nmsrs as usize; 234 let mut msrs: Vec<MsrEntry> = vec![ 235 MsrEntry { 236 ..Default::default() 237 }; 238 num_msrs 239 ]; 240 let indices = msr_list.as_slice(); 241 for (pos, index) in indices.iter().enumerate() { 242 msrs[pos].index = *index; 243 } 244 let vm_fd = Arc::new(fd); 245 246 Ok(Arc::new(MshvVm { 247 fd: vm_fd, 248 msrs, 249 vm_ops: None, 250 dirty_log_slots: Arc::new(RwLock::new(HashMap::new())), 251 })) 252 } 253 /// 254 /// Get the supported CpuID 255 /// 256 fn get_cpuid(&self) -> hypervisor::Result<Vec<CpuIdEntry>> { 257 Ok(Vec::new()) 258 } 259 } 260 261 #[allow(dead_code)] 262 /// Vcpu struct for Microsoft Hypervisor 263 pub struct MshvVcpu { 264 fd: VcpuFd, 265 vp_index: u8, 266 cpuid: Vec<CpuIdEntry>, 267 msrs: Vec<MsrEntry>, 268 vm_ops: Option<Arc<dyn vm::VmOps>>, 269 } 270 271 /// Implementation of Vcpu trait for Microsoft Hypervisor 272 /// Example: 273 /// #[cfg(feature = "mshv")] 274 /// extern crate hypervisor 275 /// let mshv = hypervisor::mshv::MshvHypervisor::new().unwrap(); 276 /// let hypervisor: Arc<dyn hypervisor::Hypervisor> = Arc::new(mshv); 277 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed"); 278 /// let vcpu = vm.create_vcpu(0).unwrap(); 279 /// vcpu.get/set().unwrap() 280 /// 281 impl cpu::Vcpu for MshvVcpu { 282 #[cfg(target_arch = "x86_64")] 283 /// 284 /// Returns the vCPU general purpose registers. 285 /// 286 fn get_regs(&self) -> cpu::Result<StandardRegisters> { 287 Ok(self 288 .fd 289 .get_regs() 290 .map_err(|e| cpu::HypervisorCpuError::GetStandardRegs(e.into()))? 291 .into()) 292 } 293 #[cfg(target_arch = "x86_64")] 294 /// 295 /// Sets the vCPU general purpose registers. 296 /// 297 fn set_regs(&self, regs: &StandardRegisters) -> cpu::Result<()> { 298 let regs = (*regs).into(); 299 self.fd 300 .set_regs(®s) 301 .map_err(|e| cpu::HypervisorCpuError::SetStandardRegs(e.into())) 302 } 303 #[cfg(target_arch = "x86_64")] 304 /// 305 /// Returns the vCPU special registers. 306 /// 307 fn get_sregs(&self) -> cpu::Result<SpecialRegisters> { 308 Ok(self 309 .fd 310 .get_sregs() 311 .map_err(|e| cpu::HypervisorCpuError::GetSpecialRegs(e.into()))? 312 .into()) 313 } 314 #[cfg(target_arch = "x86_64")] 315 /// 316 /// Sets the vCPU special registers. 317 /// 318 fn set_sregs(&self, sregs: &SpecialRegisters) -> cpu::Result<()> { 319 let sregs = (*sregs).into(); 320 self.fd 321 .set_sregs(&sregs) 322 .map_err(|e| cpu::HypervisorCpuError::SetSpecialRegs(e.into())) 323 } 324 #[cfg(target_arch = "x86_64")] 325 /// 326 /// Returns the floating point state (FPU) from the vCPU. 327 /// 328 fn get_fpu(&self) -> cpu::Result<FpuState> { 329 Ok(self 330 .fd 331 .get_fpu() 332 .map_err(|e| cpu::HypervisorCpuError::GetFloatingPointRegs(e.into()))? 333 .into()) 334 } 335 #[cfg(target_arch = "x86_64")] 336 /// 337 /// Set the floating point state (FPU) of a vCPU. 338 /// 339 fn set_fpu(&self, fpu: &FpuState) -> cpu::Result<()> { 340 let fpu: mshv_bindings::FloatingPointUnit = (*fpu).clone().into(); 341 self.fd 342 .set_fpu(&fpu) 343 .map_err(|e| cpu::HypervisorCpuError::SetFloatingPointRegs(e.into())) 344 } 345 346 #[cfg(target_arch = "x86_64")] 347 /// 348 /// Returns the model-specific registers (MSR) for this vCPU. 349 /// 350 fn get_msrs(&self, msrs: &mut Vec<MsrEntry>) -> cpu::Result<usize> { 351 let mshv_msrs: Vec<msr_entry> = msrs.iter().map(|e| (*e).into()).collect(); 352 let mut mshv_msrs = MsrEntries::from_entries(&mshv_msrs).unwrap(); 353 let succ = self 354 .fd 355 .get_msrs(&mut mshv_msrs) 356 .map_err(|e| cpu::HypervisorCpuError::GetMsrEntries(e.into()))?; 357 358 msrs[..succ].copy_from_slice( 359 &mshv_msrs.as_slice()[..succ] 360 .iter() 361 .map(|e| (*e).into()) 362 .collect::<Vec<MsrEntry>>(), 363 ); 364 365 Ok(succ) 366 } 367 #[cfg(target_arch = "x86_64")] 368 /// 369 /// Setup the model-specific registers (MSR) for this vCPU. 370 /// Returns the number of MSR entries actually written. 371 /// 372 fn set_msrs(&self, msrs: &[MsrEntry]) -> cpu::Result<usize> { 373 let mshv_msrs: Vec<msr_entry> = msrs.iter().map(|e| (*e).into()).collect(); 374 let mshv_msrs = MsrEntries::from_entries(&mshv_msrs).unwrap(); 375 self.fd 376 .set_msrs(&mshv_msrs) 377 .map_err(|e| cpu::HypervisorCpuError::SetMsrEntries(e.into())) 378 } 379 380 #[cfg(target_arch = "x86_64")] 381 /// 382 /// X86 specific call to enable HyperV SynIC 383 /// 384 fn enable_hyperv_synic(&self) -> cpu::Result<()> { 385 /* We always have SynIC enabled on MSHV */ 386 Ok(()) 387 } 388 #[allow(non_upper_case_globals)] 389 fn run(&self) -> std::result::Result<cpu::VmExit, cpu::HypervisorCpuError> { 390 let hv_message: hv_message = hv_message::default(); 391 match self.fd.run(hv_message) { 392 Ok(x) => match x.header.message_type { 393 hv_message_type_HVMSG_X64_HALT => { 394 debug!("HALT"); 395 Ok(cpu::VmExit::Reset) 396 } 397 hv_message_type_HVMSG_UNRECOVERABLE_EXCEPTION => { 398 warn!("TRIPLE FAULT"); 399 Ok(cpu::VmExit::Shutdown) 400 } 401 hv_message_type_HVMSG_X64_IO_PORT_INTERCEPT => { 402 let info = x.to_ioport_info().unwrap(); 403 let access_info = info.access_info; 404 // SAFETY: access_info is valid, otherwise we won't be here 405 let len = unsafe { access_info.__bindgen_anon_1.access_size() } as usize; 406 let is_write = info.header.intercept_access_type == 1; 407 let port = info.port_number; 408 let mut data: [u8; 4] = [0; 4]; 409 let mut ret_rax = info.rax; 410 411 /* 412 * XXX: Ignore QEMU fw_cfg (0x5xx) and debug console (0x402) ports. 413 * 414 * Cloud Hypervisor doesn't support fw_cfg at the moment. It does support 0x402 415 * under the "fwdebug" feature flag. But that feature is not enabled by default 416 * and is considered legacy. 417 * 418 * OVMF unconditionally pokes these IO ports with string IO. 419 * 420 * Instead of trying to implement string IO support now which does not do much 421 * now, skip those ports explicitly to avoid panicking. 422 * 423 * Proper string IO support can be added once we gain the ability to translate 424 * guest virtual addresses to guest physical addresses on MSHV. 425 */ 426 match port { 427 0x402 | 0x510 | 0x511 | 0x514 => { 428 let insn_len = info.header.instruction_length() as u64; 429 430 /* Advance RIP and update RAX */ 431 let arr_reg_name_value = [ 432 ( 433 hv_register_name::HV_X64_REGISTER_RIP, 434 info.header.rip + insn_len, 435 ), 436 (hv_register_name::HV_X64_REGISTER_RAX, ret_rax), 437 ]; 438 set_registers_64!(self.fd, arr_reg_name_value) 439 .map_err(|e| cpu::HypervisorCpuError::SetRegister(e.into()))?; 440 return Ok(cpu::VmExit::Ignore); 441 } 442 _ => {} 443 } 444 445 // SAFETY: access_info is valid, otherwise we won't be here 446 assert!( 447 (unsafe { access_info.__bindgen_anon_1.string_op() } != 1), 448 "String IN/OUT not supported" 449 ); 450 assert!( 451 (unsafe { access_info.__bindgen_anon_1.rep_prefix() } != 1), 452 "Rep IN/OUT not supported" 453 ); 454 455 if is_write { 456 let data = (info.rax as u32).to_le_bytes(); 457 if let Some(vm_ops) = &self.vm_ops { 458 vm_ops 459 .pio_write(port.into(), &data[0..len]) 460 .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()))?; 461 } 462 } else { 463 if let Some(vm_ops) = &self.vm_ops { 464 vm_ops 465 .pio_read(port.into(), &mut data[0..len]) 466 .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()))?; 467 } 468 469 let v = u32::from_le_bytes(data); 470 /* Preserve high bits in EAX but clear out high bits in RAX */ 471 let mask = 0xffffffff >> (32 - len * 8); 472 let eax = (info.rax as u32 & !mask) | (v & mask); 473 ret_rax = eax as u64; 474 } 475 476 let insn_len = info.header.instruction_length() as u64; 477 478 /* Advance RIP and update RAX */ 479 let arr_reg_name_value = [ 480 ( 481 hv_register_name::HV_X64_REGISTER_RIP, 482 info.header.rip + insn_len, 483 ), 484 (hv_register_name::HV_X64_REGISTER_RAX, ret_rax), 485 ]; 486 set_registers_64!(self.fd, arr_reg_name_value) 487 .map_err(|e| cpu::HypervisorCpuError::SetRegister(e.into()))?; 488 Ok(cpu::VmExit::Ignore) 489 } 490 hv_message_type_HVMSG_UNMAPPED_GPA => { 491 let info = x.to_memory_info().unwrap(); 492 let insn_len = info.instruction_byte_count as usize; 493 assert!(insn_len > 0 && insn_len <= 16); 494 495 let mut context = MshvEmulatorContext { 496 vcpu: self, 497 map: (info.guest_virtual_address, info.guest_physical_address), 498 }; 499 500 // Create a new emulator. 501 let mut emul = Emulator::new(&mut context); 502 503 // Emulate the trapped instruction, and only the first one. 504 let new_state = emul 505 .emulate_first_insn(self.vp_index as usize, &info.instruction_bytes) 506 .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()))?; 507 508 // Set CPU state back. 509 context 510 .set_cpu_state(self.vp_index as usize, new_state) 511 .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()))?; 512 513 Ok(cpu::VmExit::Ignore) 514 } 515 hv_message_type_HVMSG_X64_CPUID_INTERCEPT => { 516 let info = x.to_cpuid_info().unwrap(); 517 debug!("cpuid eax: {:x}", { info.rax }); 518 Ok(cpu::VmExit::Ignore) 519 } 520 hv_message_type_HVMSG_X64_MSR_INTERCEPT => { 521 let info = x.to_msr_info().unwrap(); 522 if info.header.intercept_access_type == 0 { 523 debug!("msr read: {:x}", { info.msr_number }); 524 } else { 525 debug!("msr write: {:x}", { info.msr_number }); 526 } 527 Ok(cpu::VmExit::Ignore) 528 } 529 hv_message_type_HVMSG_X64_EXCEPTION_INTERCEPT => { 530 //TODO: Handler for VMCALL here. 531 let info = x.to_exception_info().unwrap(); 532 debug!("Exception Info {:?}", { info.exception_vector }); 533 Ok(cpu::VmExit::Ignore) 534 } 535 exit => Err(cpu::HypervisorCpuError::RunVcpu(anyhow!( 536 "Unhandled VCPU exit {:?}", 537 exit 538 ))), 539 }, 540 541 Err(e) => match e.errno() { 542 libc::EAGAIN | libc::EINTR => Ok(cpu::VmExit::Ignore), 543 _ => Err(cpu::HypervisorCpuError::RunVcpu(anyhow!( 544 "VCPU error {:?}", 545 e 546 ))), 547 }, 548 } 549 } 550 #[cfg(target_arch = "x86_64")] 551 /// 552 /// X86 specific call to setup the CPUID registers. 553 /// 554 fn set_cpuid2(&self, _cpuid: &[CpuIdEntry]) -> cpu::Result<()> { 555 Ok(()) 556 } 557 #[cfg(target_arch = "x86_64")] 558 /// 559 /// X86 specific call to retrieve the CPUID registers. 560 /// 561 fn get_cpuid2(&self, _num_entries: usize) -> cpu::Result<Vec<CpuIdEntry>> { 562 Ok(self.cpuid.clone()) 563 } 564 #[cfg(target_arch = "x86_64")] 565 /// 566 /// Returns the state of the LAPIC (Local Advanced Programmable Interrupt Controller). 567 /// 568 fn get_lapic(&self) -> cpu::Result<LapicState> { 569 Ok(self 570 .fd 571 .get_lapic() 572 .map_err(|e| cpu::HypervisorCpuError::GetlapicState(e.into()))? 573 .into()) 574 } 575 #[cfg(target_arch = "x86_64")] 576 /// 577 /// Sets the state of the LAPIC (Local Advanced Programmable Interrupt Controller). 578 /// 579 fn set_lapic(&self, lapic: &LapicState) -> cpu::Result<()> { 580 let lapic: mshv_bindings::LapicState = (*lapic).clone().into(); 581 self.fd 582 .set_lapic(&lapic) 583 .map_err(|e| cpu::HypervisorCpuError::SetLapicState(e.into())) 584 } 585 /// 586 /// Returns the vcpu's current "multiprocessing state". 587 /// 588 fn get_mp_state(&self) -> cpu::Result<MpState> { 589 Ok(MpState::Mshv) 590 } 591 /// 592 /// Sets the vcpu's current "multiprocessing state". 593 /// 594 fn set_mp_state(&self, _mp_state: MpState) -> cpu::Result<()> { 595 Ok(()) 596 } 597 /// 598 /// Set CPU state 599 /// 600 fn set_state(&self, state: &CpuState) -> cpu::Result<()> { 601 let state: VcpuMshvState = state.clone().into(); 602 self.set_msrs(&state.msrs)?; 603 self.set_vcpu_events(&state.vcpu_events)?; 604 self.set_regs(&state.regs.into())?; 605 self.set_sregs(&state.sregs.into())?; 606 self.set_fpu(&state.fpu)?; 607 self.set_xcrs(&state.xcrs)?; 608 self.set_lapic(&state.lapic)?; 609 self.set_xsave(&state.xsave)?; 610 // These registers are global and needed to be set only for first VCPU 611 // as Microsoft Hypervisor allows setting this regsier for only one VCPU 612 if self.vp_index == 0 { 613 self.fd 614 .set_misc_regs(&state.misc) 615 .map_err(|e| cpu::HypervisorCpuError::SetMiscRegs(e.into()))? 616 } 617 self.fd 618 .set_debug_regs(&state.dbg) 619 .map_err(|e| cpu::HypervisorCpuError::SetDebugRegs(e.into()))?; 620 Ok(()) 621 } 622 /// 623 /// Get CPU State 624 /// 625 fn state(&self) -> cpu::Result<CpuState> { 626 let regs = self.get_regs()?; 627 let sregs = self.get_sregs()?; 628 let xcrs = self.get_xcrs()?; 629 let fpu = self.get_fpu()?; 630 let vcpu_events = self.get_vcpu_events()?; 631 let mut msrs = self.msrs.clone(); 632 self.get_msrs(&mut msrs)?; 633 let lapic = self.get_lapic()?; 634 let xsave = self.get_xsave()?; 635 let misc = self 636 .fd 637 .get_misc_regs() 638 .map_err(|e| cpu::HypervisorCpuError::GetMiscRegs(e.into()))?; 639 let dbg = self 640 .fd 641 .get_debug_regs() 642 .map_err(|e| cpu::HypervisorCpuError::GetDebugRegs(e.into()))?; 643 644 Ok(VcpuMshvState { 645 msrs, 646 vcpu_events, 647 regs: regs.into(), 648 sregs: sregs.into(), 649 fpu, 650 xcrs, 651 lapic, 652 dbg, 653 xsave, 654 misc, 655 } 656 .into()) 657 } 658 #[cfg(target_arch = "x86_64")] 659 /// 660 /// Translate guest virtual address to guest physical address 661 /// 662 fn translate_gva(&self, gva: u64, flags: u64) -> cpu::Result<(u64, u32)> { 663 let r = self 664 .fd 665 .translate_gva(gva, flags) 666 .map_err(|e| cpu::HypervisorCpuError::TranslateVirtualAddress(e.into()))?; 667 668 let gpa = r.0; 669 // SAFETY: r is valid, otherwise this function will have returned 670 let result_code = unsafe { r.1.__bindgen_anon_1.result_code }; 671 672 Ok((gpa, result_code)) 673 } 674 #[cfg(target_arch = "x86_64")] 675 /// 676 /// Return the list of initial MSR entries for a VCPU 677 /// 678 fn boot_msr_entries(&self) -> Vec<MsrEntry> { 679 use crate::arch::x86::{msr_index, MTRR_ENABLE, MTRR_MEM_TYPE_WB}; 680 681 [ 682 msr!(msr_index::MSR_IA32_SYSENTER_CS), 683 msr!(msr_index::MSR_IA32_SYSENTER_ESP), 684 msr!(msr_index::MSR_IA32_SYSENTER_EIP), 685 msr!(msr_index::MSR_STAR), 686 msr!(msr_index::MSR_CSTAR), 687 msr!(msr_index::MSR_LSTAR), 688 msr!(msr_index::MSR_KERNEL_GS_BASE), 689 msr!(msr_index::MSR_SYSCALL_MASK), 690 msr!(msr_index::MSR_IA32_TSC), 691 msr_data!(msr_index::MSR_MTRRdefType, MTRR_ENABLE | MTRR_MEM_TYPE_WB), 692 ] 693 .to_vec() 694 } 695 } 696 697 impl MshvVcpu { 698 #[cfg(target_arch = "x86_64")] 699 /// 700 /// X86 specific call that returns the vcpu's current "xsave struct". 701 /// 702 fn get_xsave(&self) -> cpu::Result<Xsave> { 703 self.fd 704 .get_xsave() 705 .map_err(|e| cpu::HypervisorCpuError::GetXsaveState(e.into())) 706 } 707 #[cfg(target_arch = "x86_64")] 708 /// 709 /// X86 specific call that sets the vcpu's current "xsave struct". 710 /// 711 fn set_xsave(&self, xsave: &Xsave) -> cpu::Result<()> { 712 self.fd 713 .set_xsave(xsave) 714 .map_err(|e| cpu::HypervisorCpuError::SetXsaveState(e.into())) 715 } 716 #[cfg(target_arch = "x86_64")] 717 /// 718 /// X86 specific call that returns the vcpu's current "xcrs". 719 /// 720 fn get_xcrs(&self) -> cpu::Result<ExtendedControlRegisters> { 721 self.fd 722 .get_xcrs() 723 .map_err(|e| cpu::HypervisorCpuError::GetXcsr(e.into())) 724 } 725 #[cfg(target_arch = "x86_64")] 726 /// 727 /// X86 specific call that sets the vcpu's current "xcrs". 728 /// 729 fn set_xcrs(&self, xcrs: &ExtendedControlRegisters) -> cpu::Result<()> { 730 self.fd 731 .set_xcrs(xcrs) 732 .map_err(|e| cpu::HypervisorCpuError::SetXcsr(e.into())) 733 } 734 #[cfg(target_arch = "x86_64")] 735 /// 736 /// Returns currently pending exceptions, interrupts, and NMIs as well as related 737 /// states of the vcpu. 738 /// 739 fn get_vcpu_events(&self) -> cpu::Result<VcpuEvents> { 740 self.fd 741 .get_vcpu_events() 742 .map_err(|e| cpu::HypervisorCpuError::GetVcpuEvents(e.into())) 743 } 744 #[cfg(target_arch = "x86_64")] 745 /// 746 /// Sets pending exceptions, interrupts, and NMIs as well as related states 747 /// of the vcpu. 748 /// 749 fn set_vcpu_events(&self, events: &VcpuEvents) -> cpu::Result<()> { 750 self.fd 751 .set_vcpu_events(events) 752 .map_err(|e| cpu::HypervisorCpuError::SetVcpuEvents(e.into())) 753 } 754 } 755 756 /// Device struct for MSHV 757 pub type MshvDevice = DeviceFd; 758 759 impl device::Device for MshvDevice { 760 /// 761 /// Set device attribute 762 /// 763 fn set_device_attr(&self, attr: &DeviceAttr) -> device::Result<()> { 764 self.set_device_attr(attr) 765 .map_err(|e| device::HypervisorDeviceError::SetDeviceAttribute(e.into())) 766 } 767 /// 768 /// Get device attribute 769 /// 770 fn get_device_attr(&self, attr: &mut DeviceAttr) -> device::Result<()> { 771 self.get_device_attr(attr) 772 .map_err(|e| device::HypervisorDeviceError::GetDeviceAttribute(e.into())) 773 } 774 /// 775 /// Cast to the underlying MSHV device fd 776 /// 777 fn as_any(&self) -> &dyn Any { 778 self 779 } 780 } 781 782 struct MshvEmulatorContext<'a> { 783 vcpu: &'a MshvVcpu, 784 map: (u64, u64), // Initial GVA to GPA mapping provided by the hypervisor 785 } 786 787 impl<'a> MshvEmulatorContext<'a> { 788 // Do the actual gva -> gpa translation 789 #[allow(non_upper_case_globals)] 790 fn translate(&self, gva: u64) -> Result<u64, PlatformError> { 791 if self.map.0 == gva { 792 return Ok(self.map.1); 793 } 794 795 // TODO: More fine-grained control for the flags 796 let flags = HV_TRANSLATE_GVA_VALIDATE_READ | HV_TRANSLATE_GVA_VALIDATE_WRITE; 797 798 let (gpa, result_code) = self 799 .vcpu 800 .translate_gva(gva, flags.into()) 801 .map_err(|e| PlatformError::TranslateVirtualAddress(anyhow!(e)))?; 802 803 match result_code { 804 hv_translate_gva_result_code_HV_TRANSLATE_GVA_SUCCESS => Ok(gpa), 805 _ => Err(PlatformError::TranslateVirtualAddress(anyhow!(result_code))), 806 } 807 } 808 } 809 810 /// Platform emulation for Hyper-V 811 impl<'a> PlatformEmulator for MshvEmulatorContext<'a> { 812 type CpuState = EmulatorCpuState; 813 814 fn read_memory(&self, gva: u64, data: &mut [u8]) -> Result<(), PlatformError> { 815 let gpa = self.translate(gva)?; 816 debug!( 817 "mshv emulator: memory read {} bytes from [{:#x} -> {:#x}]", 818 data.len(), 819 gva, 820 gpa 821 ); 822 823 if let Some(vm_ops) = &self.vcpu.vm_ops { 824 if vm_ops.guest_mem_read(gpa, data).is_err() { 825 vm_ops 826 .mmio_read(gpa, data) 827 .map_err(|e| PlatformError::MemoryReadFailure(e.into()))?; 828 } 829 } 830 831 Ok(()) 832 } 833 834 fn write_memory(&mut self, gva: u64, data: &[u8]) -> Result<(), PlatformError> { 835 let gpa = self.translate(gva)?; 836 debug!( 837 "mshv emulator: memory write {} bytes at [{:#x} -> {:#x}]", 838 data.len(), 839 gva, 840 gpa 841 ); 842 843 if let Some(vm_ops) = &self.vcpu.vm_ops { 844 if vm_ops.guest_mem_write(gpa, data).is_err() { 845 vm_ops 846 .mmio_write(gpa, data) 847 .map_err(|e| PlatformError::MemoryWriteFailure(e.into()))?; 848 } 849 } 850 851 Ok(()) 852 } 853 854 fn cpu_state(&self, cpu_id: usize) -> Result<Self::CpuState, PlatformError> { 855 if cpu_id != self.vcpu.vp_index as usize { 856 return Err(PlatformError::GetCpuStateFailure(anyhow!( 857 "CPU id mismatch {:?} {:?}", 858 cpu_id, 859 self.vcpu.vp_index 860 ))); 861 } 862 863 let regs = self 864 .vcpu 865 .get_regs() 866 .map_err(|e| PlatformError::GetCpuStateFailure(e.into()))?; 867 let sregs = self 868 .vcpu 869 .get_sregs() 870 .map_err(|e| PlatformError::GetCpuStateFailure(e.into()))?; 871 872 debug!("mshv emulator: Getting new CPU state"); 873 debug!("mshv emulator: {:#x?}", regs); 874 875 Ok(EmulatorCpuState { regs, sregs }) 876 } 877 878 fn set_cpu_state(&self, cpu_id: usize, state: Self::CpuState) -> Result<(), PlatformError> { 879 if cpu_id != self.vcpu.vp_index as usize { 880 return Err(PlatformError::SetCpuStateFailure(anyhow!( 881 "CPU id mismatch {:?} {:?}", 882 cpu_id, 883 self.vcpu.vp_index 884 ))); 885 } 886 887 debug!("mshv emulator: Setting new CPU state"); 888 debug!("mshv emulator: {:#x?}", state.regs); 889 890 self.vcpu 891 .set_regs(&state.regs) 892 .map_err(|e| PlatformError::SetCpuStateFailure(e.into()))?; 893 self.vcpu 894 .set_sregs(&state.sregs) 895 .map_err(|e| PlatformError::SetCpuStateFailure(e.into())) 896 } 897 898 fn gva_to_gpa(&self, gva: u64) -> Result<u64, PlatformError> { 899 self.translate(gva) 900 } 901 902 fn fetch(&self, _ip: u64, _instruction_bytes: &mut [u8]) -> Result<(), PlatformError> { 903 Err(PlatformError::MemoryReadFailure(anyhow!("unimplemented"))) 904 } 905 } 906 907 #[allow(dead_code)] 908 /// Wrapper over Mshv VM ioctls. 909 pub struct MshvVm { 910 fd: Arc<VmFd>, 911 msrs: Vec<MsrEntry>, 912 vm_ops: Option<Arc<dyn vm::VmOps>>, 913 dirty_log_slots: Arc<RwLock<HashMap<u64, MshvDirtyLogSlot>>>, 914 } 915 916 /// 917 /// Implementation of Vm trait for Mshv 918 /// Example: 919 /// #[cfg(feature = "mshv")] 920 /// # extern crate hypervisor; 921 /// # use hypervisor::MshvHypervisor; 922 /// let mshv = MshvHypervisor::new().unwrap(); 923 /// let hypervisor: Arc<dyn hypervisor::Hypervisor> = Arc::new(mshv); 924 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed"); 925 /// vm.set/get().unwrap() 926 /// 927 impl vm::Vm for MshvVm { 928 #[cfg(target_arch = "x86_64")] 929 /// 930 /// Sets the address of the one-page region in the VM's address space. 931 /// 932 fn set_identity_map_address(&self, _address: u64) -> vm::Result<()> { 933 Ok(()) 934 } 935 #[cfg(target_arch = "x86_64")] 936 /// 937 /// Sets the address of the three-page region in the VM's address space. 938 /// 939 fn set_tss_address(&self, _offset: usize) -> vm::Result<()> { 940 Ok(()) 941 } 942 /// 943 /// Creates an in-kernel interrupt controller. 944 /// 945 fn create_irq_chip(&self) -> vm::Result<()> { 946 Ok(()) 947 } 948 /// 949 /// Registers an event that will, when signaled, trigger the `gsi` IRQ. 950 /// 951 fn register_irqfd(&self, fd: &EventFd, gsi: u32) -> vm::Result<()> { 952 debug!("register_irqfd fd {} gsi {}", fd.as_raw_fd(), gsi); 953 954 self.fd 955 .register_irqfd(fd, gsi) 956 .map_err(|e| vm::HypervisorVmError::RegisterIrqFd(e.into()))?; 957 958 Ok(()) 959 } 960 /// 961 /// Unregisters an event that will, when signaled, trigger the `gsi` IRQ. 962 /// 963 fn unregister_irqfd(&self, fd: &EventFd, gsi: u32) -> vm::Result<()> { 964 debug!("unregister_irqfd fd {} gsi {}", fd.as_raw_fd(), gsi); 965 966 self.fd 967 .unregister_irqfd(fd, gsi) 968 .map_err(|e| vm::HypervisorVmError::UnregisterIrqFd(e.into()))?; 969 970 Ok(()) 971 } 972 /// 973 /// Creates a VcpuFd object from a vcpu RawFd. 974 /// 975 fn create_vcpu( 976 &self, 977 id: u8, 978 vm_ops: Option<Arc<dyn VmOps>>, 979 ) -> vm::Result<Arc<dyn cpu::Vcpu>> { 980 let vcpu_fd = self 981 .fd 982 .create_vcpu(id) 983 .map_err(|e| vm::HypervisorVmError::CreateVcpu(e.into()))?; 984 let vcpu = MshvVcpu { 985 fd: vcpu_fd, 986 vp_index: id, 987 cpuid: Vec::new(), 988 msrs: self.msrs.clone(), 989 vm_ops, 990 }; 991 Ok(Arc::new(vcpu)) 992 } 993 #[cfg(target_arch = "x86_64")] 994 fn enable_split_irq(&self) -> vm::Result<()> { 995 Ok(()) 996 } 997 #[cfg(target_arch = "x86_64")] 998 fn enable_sgx_attribute(&self, _file: File) -> vm::Result<()> { 999 Ok(()) 1000 } 1001 fn register_ioevent( 1002 &self, 1003 fd: &EventFd, 1004 addr: &IoEventAddress, 1005 datamatch: Option<DataMatch>, 1006 ) -> vm::Result<()> { 1007 let addr = &mshv_ioctls::IoEventAddress::from(*addr); 1008 debug!( 1009 "register_ioevent fd {} addr {:x?} datamatch {:?}", 1010 fd.as_raw_fd(), 1011 addr, 1012 datamatch 1013 ); 1014 if let Some(dm) = datamatch { 1015 match dm { 1016 vm::DataMatch::DataMatch32(mshv_dm32) => self 1017 .fd 1018 .register_ioevent(fd, addr, mshv_dm32) 1019 .map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into())), 1020 vm::DataMatch::DataMatch64(mshv_dm64) => self 1021 .fd 1022 .register_ioevent(fd, addr, mshv_dm64) 1023 .map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into())), 1024 } 1025 } else { 1026 self.fd 1027 .register_ioevent(fd, addr, NoDatamatch) 1028 .map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into())) 1029 } 1030 } 1031 /// Unregister an event from a certain address it has been previously registered to. 1032 fn unregister_ioevent(&self, fd: &EventFd, addr: &IoEventAddress) -> vm::Result<()> { 1033 let addr = &mshv_ioctls::IoEventAddress::from(*addr); 1034 debug!("unregister_ioevent fd {} addr {:x?}", fd.as_raw_fd(), addr); 1035 1036 self.fd 1037 .unregister_ioevent(fd, addr, NoDatamatch) 1038 .map_err(|e| vm::HypervisorVmError::UnregisterIoEvent(e.into())) 1039 } 1040 1041 /// Creates a guest physical memory region. 1042 fn create_user_memory_region(&self, user_memory_region: UserMemoryRegion) -> vm::Result<()> { 1043 let user_memory_region: mshv_user_mem_region = user_memory_region.into(); 1044 // No matter read only or not we keep track the slots. 1045 // For readonly hypervisor can enable the dirty bits, 1046 // but a VM exit happens before setting the dirty bits 1047 self.dirty_log_slots.write().unwrap().insert( 1048 user_memory_region.guest_pfn, 1049 MshvDirtyLogSlot { 1050 guest_pfn: user_memory_region.guest_pfn, 1051 memory_size: user_memory_region.size, 1052 }, 1053 ); 1054 1055 self.fd 1056 .map_user_memory(user_memory_region) 1057 .map_err(|e| vm::HypervisorVmError::CreateUserMemory(e.into()))?; 1058 Ok(()) 1059 } 1060 1061 /// Removes a guest physical memory region. 1062 fn remove_user_memory_region(&self, user_memory_region: UserMemoryRegion) -> vm::Result<()> { 1063 let user_memory_region: mshv_user_mem_region = user_memory_region.into(); 1064 // Remove the corresponding entry from "self.dirty_log_slots" if needed 1065 self.dirty_log_slots 1066 .write() 1067 .unwrap() 1068 .remove(&user_memory_region.guest_pfn); 1069 1070 self.fd 1071 .unmap_user_memory(user_memory_region) 1072 .map_err(|e| vm::HypervisorVmError::RemoveUserMemory(e.into()))?; 1073 Ok(()) 1074 } 1075 1076 fn make_user_memory_region( 1077 &self, 1078 _slot: u32, 1079 guest_phys_addr: u64, 1080 memory_size: u64, 1081 userspace_addr: u64, 1082 readonly: bool, 1083 _log_dirty_pages: bool, 1084 ) -> UserMemoryRegion { 1085 let mut flags = HV_MAP_GPA_READABLE | HV_MAP_GPA_EXECUTABLE; 1086 if !readonly { 1087 flags |= HV_MAP_GPA_WRITABLE; 1088 } 1089 1090 mshv_user_mem_region { 1091 flags, 1092 guest_pfn: guest_phys_addr >> PAGE_SHIFT, 1093 size: memory_size, 1094 userspace_addr: userspace_addr as u64, 1095 } 1096 .into() 1097 } 1098 1099 /// 1100 /// Creates an in-kernel device. 1101 /// 1102 /// See the documentation for `MSHV_CREATE_DEVICE`. 1103 fn create_device(&self, device: &mut CreateDevice) -> vm::Result<Arc<dyn device::Device>> { 1104 let device_fd = self 1105 .fd 1106 .create_device(device) 1107 .map_err(|e| vm::HypervisorVmError::CreateDevice(e.into()))?; 1108 Ok(Arc::new(device_fd)) 1109 } 1110 1111 fn create_passthrough_device(&self) -> vm::Result<Arc<dyn device::Device>> { 1112 let mut vfio_dev = mshv_create_device { 1113 type_: mshv_device_type_MSHV_DEV_TYPE_VFIO, 1114 fd: 0, 1115 flags: 0, 1116 }; 1117 1118 self.create_device(&mut vfio_dev) 1119 .map_err(|e| vm::HypervisorVmError::CreatePassthroughDevice(e.into())) 1120 } 1121 1122 /// 1123 /// Constructs a routing entry 1124 /// 1125 fn make_routing_entry(&self, gsi: u32, config: &InterruptSourceConfig) -> IrqRoutingEntry { 1126 match config { 1127 InterruptSourceConfig::MsiIrq(cfg) => mshv_msi_routing_entry { 1128 gsi, 1129 address_lo: cfg.low_addr, 1130 address_hi: cfg.high_addr, 1131 data: cfg.data, 1132 } 1133 .into(), 1134 _ => { 1135 unreachable!() 1136 } 1137 } 1138 } 1139 1140 fn set_gsi_routing(&self, entries: &[IrqRoutingEntry]) -> vm::Result<()> { 1141 let mut msi_routing = 1142 vec_with_array_field::<mshv_msi_routing, mshv_msi_routing_entry>(entries.len()); 1143 msi_routing[0].nr = entries.len() as u32; 1144 1145 let entries: Vec<mshv_msi_routing_entry> = entries 1146 .iter() 1147 .map(|entry| match entry { 1148 IrqRoutingEntry::Mshv(e) => *e, 1149 #[allow(unreachable_patterns)] 1150 _ => panic!("IrqRoutingEntry type is wrong"), 1151 }) 1152 .collect(); 1153 1154 // SAFETY: msi_routing initialized with entries.len() and now it is being turned into 1155 // entries_slice with entries.len() again. It is guaranteed to be large enough to hold 1156 // everything from entries. 1157 unsafe { 1158 let entries_slice: &mut [mshv_msi_routing_entry] = 1159 msi_routing[0].entries.as_mut_slice(entries.len()); 1160 entries_slice.copy_from_slice(&entries); 1161 } 1162 1163 self.fd 1164 .set_msi_routing(&msi_routing[0]) 1165 .map_err(|e| vm::HypervisorVmError::SetGsiRouting(e.into())) 1166 } 1167 /// 1168 /// Start logging dirty pages 1169 /// 1170 fn start_dirty_log(&self) -> vm::Result<()> { 1171 self.fd 1172 .enable_dirty_page_tracking() 1173 .map_err(|e| vm::HypervisorVmError::StartDirtyLog(e.into())) 1174 } 1175 /// 1176 /// Stop logging dirty pages 1177 /// 1178 fn stop_dirty_log(&self) -> vm::Result<()> { 1179 let dirty_log_slots = self.dirty_log_slots.read().unwrap(); 1180 // Before disabling the dirty page tracking we need 1181 // to set the dirty bits in the Hypervisor 1182 // This is a requirement from Microsoft Hypervisor 1183 for (_, s) in dirty_log_slots.iter() { 1184 self.fd 1185 .get_dirty_log(s.guest_pfn, s.memory_size as usize, DIRTY_BITMAP_SET_DIRTY) 1186 .map_err(|e| vm::HypervisorVmError::StartDirtyLog(e.into()))?; 1187 } 1188 self.fd 1189 .disable_dirty_page_tracking() 1190 .map_err(|e| vm::HypervisorVmError::StartDirtyLog(e.into()))?; 1191 Ok(()) 1192 } 1193 /// 1194 /// Get dirty pages bitmap (one bit per page) 1195 /// 1196 fn get_dirty_log(&self, _slot: u32, base_gpa: u64, memory_size: u64) -> vm::Result<Vec<u64>> { 1197 self.fd 1198 .get_dirty_log( 1199 base_gpa >> PAGE_SHIFT, 1200 memory_size as usize, 1201 DIRTY_BITMAP_CLEAR_DIRTY, 1202 ) 1203 .map_err(|e| vm::HypervisorVmError::GetDirtyLog(e.into())) 1204 } 1205 /// Retrieve guest clock. 1206 #[cfg(target_arch = "x86_64")] 1207 fn get_clock(&self) -> vm::Result<ClockData> { 1208 Ok(ClockData::Mshv) 1209 } 1210 /// Set guest clock. 1211 #[cfg(target_arch = "x86_64")] 1212 fn set_clock(&self, _data: &ClockData) -> vm::Result<()> { 1213 Ok(()) 1214 } 1215 } 1216