1 // SPDX-License-Identifier: Apache-2.0 OR BSD-3-Clause 2 // 3 // Copyright © 2020, Microsoft Corporation 4 // 5 6 use crate::arch::emulator::{PlatformEmulator, PlatformError}; 7 8 #[cfg(target_arch = "x86_64")] 9 use crate::arch::x86::emulator::{Emulator, EmulatorCpuState}; 10 use crate::cpu; 11 use crate::cpu::Vcpu; 12 use crate::hypervisor; 13 use crate::vec_with_array_field; 14 use crate::vm::{self, InterruptSourceConfig, VmOps}; 15 use crate::HypervisorType; 16 pub use mshv_bindings::*; 17 use mshv_ioctls::{set_registers_64, Mshv, NoDatamatch, VcpuFd, VmFd}; 18 use std::any::Any; 19 use std::collections::HashMap; 20 use std::sync::{Arc, RwLock}; 21 use vfio_ioctls::VfioDeviceFd; 22 use vm::DataMatch; 23 // x86_64 dependencies 24 #[cfg(target_arch = "x86_64")] 25 pub mod x86_64; 26 use crate::{ 27 ClockData, CpuState, IoEventAddress, IrqRoutingEntry, MpState, UserMemoryRegion, 28 USER_MEMORY_REGION_EXECUTE, USER_MEMORY_REGION_READ, USER_MEMORY_REGION_WRITE, 29 }; 30 use vmm_sys_util::eventfd::EventFd; 31 #[cfg(target_arch = "x86_64")] 32 pub use x86_64::VcpuMshvState; 33 #[cfg(target_arch = "x86_64")] 34 pub use x86_64::*; 35 36 #[cfg(target_arch = "x86_64")] 37 use std::fs::File; 38 use std::os::unix::io::AsRawFd; 39 40 #[cfg(target_arch = "x86_64")] 41 use crate::arch::x86::{ 42 CpuIdEntry, FpuState, LapicState, MsrEntry, SpecialRegisters, StandardRegisters, 43 }; 44 45 const DIRTY_BITMAP_CLEAR_DIRTY: u64 = 0x4; 46 const DIRTY_BITMAP_SET_DIRTY: u64 = 0x8; 47 48 /// 49 /// Export generically-named wrappers of mshv-bindings for Unix-based platforms 50 /// 51 pub use { 52 mshv_bindings::mshv_create_device as CreateDevice, 53 mshv_bindings::mshv_device_attr as DeviceAttr, mshv_ioctls::DeviceFd, 54 }; 55 56 pub const PAGE_SHIFT: usize = 12; 57 58 impl From<mshv_user_mem_region> for UserMemoryRegion { 59 fn from(region: mshv_user_mem_region) -> Self { 60 let mut flags: u32 = 0; 61 if region.flags & HV_MAP_GPA_READABLE != 0 { 62 flags |= USER_MEMORY_REGION_READ; 63 } 64 if region.flags & HV_MAP_GPA_WRITABLE != 0 { 65 flags |= USER_MEMORY_REGION_WRITE; 66 } 67 if region.flags & HV_MAP_GPA_EXECUTABLE != 0 { 68 flags |= USER_MEMORY_REGION_EXECUTE; 69 } 70 71 UserMemoryRegion { 72 guest_phys_addr: (region.guest_pfn << PAGE_SHIFT as u64) 73 + (region.userspace_addr & ((1 << PAGE_SHIFT) - 1)), 74 memory_size: region.size, 75 userspace_addr: region.userspace_addr, 76 flags, 77 ..Default::default() 78 } 79 } 80 } 81 82 impl From<UserMemoryRegion> for mshv_user_mem_region { 83 fn from(region: UserMemoryRegion) -> Self { 84 let mut flags: u32 = 0; 85 if region.flags & USER_MEMORY_REGION_READ != 0 { 86 flags |= HV_MAP_GPA_READABLE; 87 } 88 if region.flags & USER_MEMORY_REGION_WRITE != 0 { 89 flags |= HV_MAP_GPA_WRITABLE; 90 } 91 if region.flags & USER_MEMORY_REGION_EXECUTE != 0 { 92 flags |= HV_MAP_GPA_EXECUTABLE; 93 } 94 95 mshv_user_mem_region { 96 guest_pfn: region.guest_phys_addr >> PAGE_SHIFT, 97 size: region.memory_size, 98 userspace_addr: region.userspace_addr, 99 flags, 100 } 101 } 102 } 103 104 impl From<mshv_ioctls::IoEventAddress> for IoEventAddress { 105 fn from(a: mshv_ioctls::IoEventAddress) -> Self { 106 match a { 107 mshv_ioctls::IoEventAddress::Pio(x) => Self::Pio(x), 108 mshv_ioctls::IoEventAddress::Mmio(x) => Self::Mmio(x), 109 } 110 } 111 } 112 113 impl From<IoEventAddress> for mshv_ioctls::IoEventAddress { 114 fn from(a: IoEventAddress) -> Self { 115 match a { 116 IoEventAddress::Pio(x) => Self::Pio(x), 117 IoEventAddress::Mmio(x) => Self::Mmio(x), 118 } 119 } 120 } 121 122 impl From<VcpuMshvState> for CpuState { 123 fn from(s: VcpuMshvState) -> Self { 124 CpuState::Mshv(s) 125 } 126 } 127 128 impl From<CpuState> for VcpuMshvState { 129 fn from(s: CpuState) -> Self { 130 match s { 131 CpuState::Mshv(s) => s, 132 /* Needed in case other hypervisors are enabled */ 133 #[allow(unreachable_patterns)] 134 _ => panic!("CpuState is not valid"), 135 } 136 } 137 } 138 139 impl From<mshv_msi_routing_entry> for IrqRoutingEntry { 140 fn from(s: mshv_msi_routing_entry) -> Self { 141 IrqRoutingEntry::Mshv(s) 142 } 143 } 144 145 impl From<IrqRoutingEntry> for mshv_msi_routing_entry { 146 fn from(e: IrqRoutingEntry) -> Self { 147 match e { 148 IrqRoutingEntry::Mshv(e) => e, 149 /* Needed in case other hypervisors are enabled */ 150 #[allow(unreachable_patterns)] 151 _ => panic!("IrqRoutingEntry is not valid"), 152 } 153 } 154 } 155 156 struct MshvDirtyLogSlot { 157 guest_pfn: u64, 158 memory_size: u64, 159 } 160 161 /// Wrapper over mshv system ioctls. 162 pub struct MshvHypervisor { 163 mshv: Mshv, 164 } 165 166 impl MshvHypervisor { 167 #[cfg(target_arch = "x86_64")] 168 /// 169 /// Retrieve the list of MSRs supported by MSHV. 170 /// 171 fn get_msr_list(&self) -> hypervisor::Result<MsrList> { 172 self.mshv 173 .get_msr_index_list() 174 .map_err(|e| hypervisor::HypervisorError::GetMsrList(e.into())) 175 } 176 } 177 178 impl MshvHypervisor { 179 /// Create a hypervisor based on Mshv 180 #[allow(clippy::new_ret_no_self)] 181 pub fn new() -> hypervisor::Result<Arc<dyn hypervisor::Hypervisor>> { 182 let mshv_obj = 183 Mshv::new().map_err(|e| hypervisor::HypervisorError::HypervisorCreate(e.into()))?; 184 Ok(Arc::new(MshvHypervisor { mshv: mshv_obj })) 185 } 186 /// Check if the hypervisor is available 187 pub fn is_available() -> hypervisor::Result<bool> { 188 match std::fs::metadata("/dev/mshv") { 189 Ok(_) => Ok(true), 190 Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(false), 191 Err(err) => Err(hypervisor::HypervisorError::HypervisorAvailableCheck( 192 err.into(), 193 )), 194 } 195 } 196 } 197 /// Implementation of Hypervisor trait for Mshv 198 /// Example: 199 /// #[cfg(feature = "mshv")] 200 /// extern crate hypervisor 201 /// let mshv = hypervisor::mshv::MshvHypervisor::new().unwrap(); 202 /// let hypervisor: Arc<dyn hypervisor::Hypervisor> = Arc::new(mshv); 203 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed"); 204 /// 205 impl hypervisor::Hypervisor for MshvHypervisor { 206 /// 207 /// Returns the type of the hypervisor 208 /// 209 fn hypervisor_type(&self) -> HypervisorType { 210 HypervisorType::Mshv 211 } 212 /// Create a mshv vm object and return the object as Vm trait object 213 /// Example 214 /// # extern crate hypervisor; 215 /// # use hypervisor::MshvHypervisor; 216 /// use hypervisor::MshvVm; 217 /// let hypervisor = MshvHypervisor::new().unwrap(); 218 /// let vm = hypervisor.create_vm().unwrap() 219 /// 220 fn create_vm(&self) -> hypervisor::Result<Arc<dyn vm::Vm>> { 221 let fd: VmFd; 222 loop { 223 match self.mshv.create_vm() { 224 Ok(res) => fd = res, 225 Err(e) => { 226 if e.errno() == libc::EINTR { 227 // If the error returned is EINTR, which means the 228 // ioctl has been interrupted, we have to retry as 229 // this can't be considered as a regular error. 230 continue; 231 } else { 232 return Err(hypervisor::HypervisorError::VmCreate(e.into())); 233 } 234 } 235 } 236 break; 237 } 238 239 // Default Microsoft Hypervisor behavior for unimplemented MSR is to 240 // send a fault to the guest if it tries to access it. It is possible 241 // to override this behavior with a more suitable option i.e., ignore 242 // writes from the guest and return zero in attempt to read unimplemented 243 // MSR. 244 fd.set_partition_property( 245 hv_partition_property_code_HV_PARTITION_PROPERTY_UNIMPLEMENTED_MSR_ACTION, 246 hv_unimplemented_msr_action_HV_UNIMPLEMENTED_MSR_ACTION_IGNORE_WRITE_READ_ZERO as u64, 247 ) 248 .map_err(|e| hypervisor::HypervisorError::SetPartitionProperty(e.into()))?; 249 250 let msr_list = self.get_msr_list()?; 251 let num_msrs = msr_list.as_fam_struct_ref().nmsrs as usize; 252 let mut msrs: Vec<MsrEntry> = vec![ 253 MsrEntry { 254 ..Default::default() 255 }; 256 num_msrs 257 ]; 258 let indices = msr_list.as_slice(); 259 for (pos, index) in indices.iter().enumerate() { 260 msrs[pos].index = *index; 261 } 262 let vm_fd = Arc::new(fd); 263 264 Ok(Arc::new(MshvVm { 265 fd: vm_fd, 266 msrs, 267 dirty_log_slots: Arc::new(RwLock::new(HashMap::new())), 268 })) 269 } 270 /// 271 /// Get the supported CpuID 272 /// 273 fn get_cpuid(&self) -> hypervisor::Result<Vec<CpuIdEntry>> { 274 Ok(Vec::new()) 275 } 276 } 277 278 /// Vcpu struct for Microsoft Hypervisor 279 pub struct MshvVcpu { 280 fd: VcpuFd, 281 vp_index: u8, 282 cpuid: Vec<CpuIdEntry>, 283 msrs: Vec<MsrEntry>, 284 vm_ops: Option<Arc<dyn vm::VmOps>>, 285 } 286 287 /// Implementation of Vcpu trait for Microsoft Hypervisor 288 /// Example: 289 /// #[cfg(feature = "mshv")] 290 /// extern crate hypervisor 291 /// let mshv = hypervisor::mshv::MshvHypervisor::new().unwrap(); 292 /// let hypervisor: Arc<dyn hypervisor::Hypervisor> = Arc::new(mshv); 293 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed"); 294 /// let vcpu = vm.create_vcpu(0).unwrap(); 295 /// vcpu.get/set().unwrap() 296 /// 297 impl cpu::Vcpu for MshvVcpu { 298 #[cfg(target_arch = "x86_64")] 299 /// 300 /// Returns the vCPU general purpose registers. 301 /// 302 fn get_regs(&self) -> cpu::Result<StandardRegisters> { 303 Ok(self 304 .fd 305 .get_regs() 306 .map_err(|e| cpu::HypervisorCpuError::GetStandardRegs(e.into()))? 307 .into()) 308 } 309 #[cfg(target_arch = "x86_64")] 310 /// 311 /// Sets the vCPU general purpose registers. 312 /// 313 fn set_regs(&self, regs: &StandardRegisters) -> cpu::Result<()> { 314 let regs = (*regs).into(); 315 self.fd 316 .set_regs(®s) 317 .map_err(|e| cpu::HypervisorCpuError::SetStandardRegs(e.into())) 318 } 319 #[cfg(target_arch = "x86_64")] 320 /// 321 /// Returns the vCPU special registers. 322 /// 323 fn get_sregs(&self) -> cpu::Result<SpecialRegisters> { 324 Ok(self 325 .fd 326 .get_sregs() 327 .map_err(|e| cpu::HypervisorCpuError::GetSpecialRegs(e.into()))? 328 .into()) 329 } 330 #[cfg(target_arch = "x86_64")] 331 /// 332 /// Sets the vCPU special registers. 333 /// 334 fn set_sregs(&self, sregs: &SpecialRegisters) -> cpu::Result<()> { 335 let sregs = (*sregs).into(); 336 self.fd 337 .set_sregs(&sregs) 338 .map_err(|e| cpu::HypervisorCpuError::SetSpecialRegs(e.into())) 339 } 340 #[cfg(target_arch = "x86_64")] 341 /// 342 /// Returns the floating point state (FPU) from the vCPU. 343 /// 344 fn get_fpu(&self) -> cpu::Result<FpuState> { 345 Ok(self 346 .fd 347 .get_fpu() 348 .map_err(|e| cpu::HypervisorCpuError::GetFloatingPointRegs(e.into()))? 349 .into()) 350 } 351 #[cfg(target_arch = "x86_64")] 352 /// 353 /// Set the floating point state (FPU) of a vCPU. 354 /// 355 fn set_fpu(&self, fpu: &FpuState) -> cpu::Result<()> { 356 let fpu: mshv_bindings::FloatingPointUnit = (*fpu).clone().into(); 357 self.fd 358 .set_fpu(&fpu) 359 .map_err(|e| cpu::HypervisorCpuError::SetFloatingPointRegs(e.into())) 360 } 361 362 #[cfg(target_arch = "x86_64")] 363 /// 364 /// Returns the model-specific registers (MSR) for this vCPU. 365 /// 366 fn get_msrs(&self, msrs: &mut Vec<MsrEntry>) -> cpu::Result<usize> { 367 let mshv_msrs: Vec<msr_entry> = msrs.iter().map(|e| (*e).into()).collect(); 368 let mut mshv_msrs = MsrEntries::from_entries(&mshv_msrs).unwrap(); 369 let succ = self 370 .fd 371 .get_msrs(&mut mshv_msrs) 372 .map_err(|e| cpu::HypervisorCpuError::GetMsrEntries(e.into()))?; 373 374 msrs[..succ].copy_from_slice( 375 &mshv_msrs.as_slice()[..succ] 376 .iter() 377 .map(|e| (*e).into()) 378 .collect::<Vec<MsrEntry>>(), 379 ); 380 381 Ok(succ) 382 } 383 #[cfg(target_arch = "x86_64")] 384 /// 385 /// Setup the model-specific registers (MSR) for this vCPU. 386 /// Returns the number of MSR entries actually written. 387 /// 388 fn set_msrs(&self, msrs: &[MsrEntry]) -> cpu::Result<usize> { 389 let mshv_msrs: Vec<msr_entry> = msrs.iter().map(|e| (*e).into()).collect(); 390 let mshv_msrs = MsrEntries::from_entries(&mshv_msrs).unwrap(); 391 self.fd 392 .set_msrs(&mshv_msrs) 393 .map_err(|e| cpu::HypervisorCpuError::SetMsrEntries(e.into())) 394 } 395 396 #[cfg(target_arch = "x86_64")] 397 /// 398 /// X86 specific call to enable HyperV SynIC 399 /// 400 fn enable_hyperv_synic(&self) -> cpu::Result<()> { 401 /* We always have SynIC enabled on MSHV */ 402 Ok(()) 403 } 404 #[allow(non_upper_case_globals)] 405 fn run(&self) -> std::result::Result<cpu::VmExit, cpu::HypervisorCpuError> { 406 let hv_message: hv_message = hv_message::default(); 407 match self.fd.run(hv_message) { 408 Ok(x) => match x.header.message_type { 409 hv_message_type_HVMSG_X64_HALT => { 410 debug!("HALT"); 411 Ok(cpu::VmExit::Reset) 412 } 413 hv_message_type_HVMSG_UNRECOVERABLE_EXCEPTION => { 414 warn!("TRIPLE FAULT"); 415 Ok(cpu::VmExit::Shutdown) 416 } 417 hv_message_type_HVMSG_X64_IO_PORT_INTERCEPT => { 418 let info = x.to_ioport_info().unwrap(); 419 let access_info = info.access_info; 420 // SAFETY: access_info is valid, otherwise we won't be here 421 let len = unsafe { access_info.__bindgen_anon_1.access_size() } as usize; 422 let is_write = info.header.intercept_access_type == 1; 423 let port = info.port_number; 424 let mut data: [u8; 4] = [0; 4]; 425 let mut ret_rax = info.rax; 426 427 /* 428 * XXX: Ignore QEMU fw_cfg (0x5xx) and debug console (0x402) ports. 429 * 430 * Cloud Hypervisor doesn't support fw_cfg at the moment. It does support 0x402 431 * under the "fwdebug" feature flag. But that feature is not enabled by default 432 * and is considered legacy. 433 * 434 * OVMF unconditionally pokes these IO ports with string IO. 435 * 436 * Instead of trying to implement string IO support now which does not do much 437 * now, skip those ports explicitly to avoid panicking. 438 * 439 * Proper string IO support can be added once we gain the ability to translate 440 * guest virtual addresses to guest physical addresses on MSHV. 441 */ 442 match port { 443 0x402 | 0x510 | 0x511 | 0x514 => { 444 let insn_len = info.header.instruction_length() as u64; 445 446 /* Advance RIP and update RAX */ 447 let arr_reg_name_value = [ 448 ( 449 hv_register_name::HV_X64_REGISTER_RIP, 450 info.header.rip + insn_len, 451 ), 452 (hv_register_name::HV_X64_REGISTER_RAX, ret_rax), 453 ]; 454 set_registers_64!(self.fd, arr_reg_name_value) 455 .map_err(|e| cpu::HypervisorCpuError::SetRegister(e.into()))?; 456 return Ok(cpu::VmExit::Ignore); 457 } 458 _ => {} 459 } 460 461 // SAFETY: access_info is valid, otherwise we won't be here 462 assert!( 463 (unsafe { access_info.__bindgen_anon_1.string_op() } != 1), 464 "String IN/OUT not supported" 465 ); 466 assert!( 467 (unsafe { access_info.__bindgen_anon_1.rep_prefix() } != 1), 468 "Rep IN/OUT not supported" 469 ); 470 471 if is_write { 472 let data = (info.rax as u32).to_le_bytes(); 473 if let Some(vm_ops) = &self.vm_ops { 474 vm_ops 475 .pio_write(port.into(), &data[0..len]) 476 .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()))?; 477 } 478 } else { 479 if let Some(vm_ops) = &self.vm_ops { 480 vm_ops 481 .pio_read(port.into(), &mut data[0..len]) 482 .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()))?; 483 } 484 485 let v = u32::from_le_bytes(data); 486 /* Preserve high bits in EAX but clear out high bits in RAX */ 487 let mask = 0xffffffff >> (32 - len * 8); 488 let eax = (info.rax as u32 & !mask) | (v & mask); 489 ret_rax = eax as u64; 490 } 491 492 let insn_len = info.header.instruction_length() as u64; 493 494 /* Advance RIP and update RAX */ 495 let arr_reg_name_value = [ 496 ( 497 hv_register_name::HV_X64_REGISTER_RIP, 498 info.header.rip + insn_len, 499 ), 500 (hv_register_name::HV_X64_REGISTER_RAX, ret_rax), 501 ]; 502 set_registers_64!(self.fd, arr_reg_name_value) 503 .map_err(|e| cpu::HypervisorCpuError::SetRegister(e.into()))?; 504 Ok(cpu::VmExit::Ignore) 505 } 506 hv_message_type_HVMSG_UNMAPPED_GPA => { 507 let info = x.to_memory_info().unwrap(); 508 let insn_len = info.instruction_byte_count as usize; 509 assert!(insn_len > 0 && insn_len <= 16); 510 511 let mut context = MshvEmulatorContext { 512 vcpu: self, 513 map: (info.guest_virtual_address, info.guest_physical_address), 514 }; 515 516 // Create a new emulator. 517 let mut emul = Emulator::new(&mut context); 518 519 // Emulate the trapped instruction, and only the first one. 520 let new_state = emul 521 .emulate_first_insn(self.vp_index as usize, &info.instruction_bytes) 522 .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()))?; 523 524 // Set CPU state back. 525 context 526 .set_cpu_state(self.vp_index as usize, new_state) 527 .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()))?; 528 529 Ok(cpu::VmExit::Ignore) 530 } 531 hv_message_type_HVMSG_X64_CPUID_INTERCEPT => { 532 let info = x.to_cpuid_info().unwrap(); 533 debug!("cpuid eax: {:x}", { info.rax }); 534 Ok(cpu::VmExit::Ignore) 535 } 536 hv_message_type_HVMSG_X64_MSR_INTERCEPT => { 537 let info = x.to_msr_info().unwrap(); 538 if info.header.intercept_access_type == 0 { 539 debug!("msr read: {:x}", { info.msr_number }); 540 } else { 541 debug!("msr write: {:x}", { info.msr_number }); 542 } 543 Ok(cpu::VmExit::Ignore) 544 } 545 hv_message_type_HVMSG_X64_EXCEPTION_INTERCEPT => { 546 //TODO: Handler for VMCALL here. 547 let info = x.to_exception_info().unwrap(); 548 debug!("Exception Info {:?}", { info.exception_vector }); 549 Ok(cpu::VmExit::Ignore) 550 } 551 exit => Err(cpu::HypervisorCpuError::RunVcpu(anyhow!( 552 "Unhandled VCPU exit {:?}", 553 exit 554 ))), 555 }, 556 557 Err(e) => match e.errno() { 558 libc::EAGAIN | libc::EINTR => Ok(cpu::VmExit::Ignore), 559 _ => Err(cpu::HypervisorCpuError::RunVcpu(anyhow!( 560 "VCPU error {:?}", 561 e 562 ))), 563 }, 564 } 565 } 566 #[cfg(target_arch = "x86_64")] 567 /// 568 /// X86 specific call to setup the CPUID registers. 569 /// 570 fn set_cpuid2(&self, _cpuid: &[CpuIdEntry]) -> cpu::Result<()> { 571 Ok(()) 572 } 573 #[cfg(target_arch = "x86_64")] 574 /// 575 /// X86 specific call to retrieve the CPUID registers. 576 /// 577 fn get_cpuid2(&self, _num_entries: usize) -> cpu::Result<Vec<CpuIdEntry>> { 578 Ok(self.cpuid.clone()) 579 } 580 #[cfg(target_arch = "x86_64")] 581 /// 582 /// Returns the state of the LAPIC (Local Advanced Programmable Interrupt Controller). 583 /// 584 fn get_lapic(&self) -> cpu::Result<LapicState> { 585 Ok(self 586 .fd 587 .get_lapic() 588 .map_err(|e| cpu::HypervisorCpuError::GetlapicState(e.into()))? 589 .into()) 590 } 591 #[cfg(target_arch = "x86_64")] 592 /// 593 /// Sets the state of the LAPIC (Local Advanced Programmable Interrupt Controller). 594 /// 595 fn set_lapic(&self, lapic: &LapicState) -> cpu::Result<()> { 596 let lapic: mshv_bindings::LapicState = (*lapic).clone().into(); 597 self.fd 598 .set_lapic(&lapic) 599 .map_err(|e| cpu::HypervisorCpuError::SetLapicState(e.into())) 600 } 601 /// 602 /// Returns the vcpu's current "multiprocessing state". 603 /// 604 fn get_mp_state(&self) -> cpu::Result<MpState> { 605 Ok(MpState::Mshv) 606 } 607 /// 608 /// Sets the vcpu's current "multiprocessing state". 609 /// 610 fn set_mp_state(&self, _mp_state: MpState) -> cpu::Result<()> { 611 Ok(()) 612 } 613 /// 614 /// Set CPU state 615 /// 616 fn set_state(&self, state: &CpuState) -> cpu::Result<()> { 617 let state: VcpuMshvState = state.clone().into(); 618 self.set_msrs(&state.msrs)?; 619 self.set_vcpu_events(&state.vcpu_events)?; 620 self.set_regs(&state.regs.into())?; 621 self.set_sregs(&state.sregs.into())?; 622 self.set_fpu(&state.fpu)?; 623 self.set_xcrs(&state.xcrs)?; 624 self.set_lapic(&state.lapic)?; 625 self.set_xsave(&state.xsave)?; 626 // These registers are global and needed to be set only for first VCPU 627 // as Microsoft Hypervisor allows setting this regsier for only one VCPU 628 if self.vp_index == 0 { 629 self.fd 630 .set_misc_regs(&state.misc) 631 .map_err(|e| cpu::HypervisorCpuError::SetMiscRegs(e.into()))? 632 } 633 self.fd 634 .set_debug_regs(&state.dbg) 635 .map_err(|e| cpu::HypervisorCpuError::SetDebugRegs(e.into()))?; 636 Ok(()) 637 } 638 /// 639 /// Get CPU State 640 /// 641 fn state(&self) -> cpu::Result<CpuState> { 642 let regs = self.get_regs()?; 643 let sregs = self.get_sregs()?; 644 let xcrs = self.get_xcrs()?; 645 let fpu = self.get_fpu()?; 646 let vcpu_events = self.get_vcpu_events()?; 647 let mut msrs = self.msrs.clone(); 648 self.get_msrs(&mut msrs)?; 649 let lapic = self.get_lapic()?; 650 let xsave = self.get_xsave()?; 651 let misc = self 652 .fd 653 .get_misc_regs() 654 .map_err(|e| cpu::HypervisorCpuError::GetMiscRegs(e.into()))?; 655 let dbg = self 656 .fd 657 .get_debug_regs() 658 .map_err(|e| cpu::HypervisorCpuError::GetDebugRegs(e.into()))?; 659 660 Ok(VcpuMshvState { 661 msrs, 662 vcpu_events, 663 regs: regs.into(), 664 sregs: sregs.into(), 665 fpu, 666 xcrs, 667 lapic, 668 dbg, 669 xsave, 670 misc, 671 } 672 .into()) 673 } 674 #[cfg(target_arch = "x86_64")] 675 /// 676 /// Translate guest virtual address to guest physical address 677 /// 678 fn translate_gva(&self, gva: u64, flags: u64) -> cpu::Result<(u64, u32)> { 679 let r = self 680 .fd 681 .translate_gva(gva, flags) 682 .map_err(|e| cpu::HypervisorCpuError::TranslateVirtualAddress(e.into()))?; 683 684 let gpa = r.0; 685 // SAFETY: r is valid, otherwise this function will have returned 686 let result_code = unsafe { r.1.__bindgen_anon_1.result_code }; 687 688 Ok((gpa, result_code)) 689 } 690 #[cfg(target_arch = "x86_64")] 691 /// 692 /// Return the list of initial MSR entries for a VCPU 693 /// 694 fn boot_msr_entries(&self) -> Vec<MsrEntry> { 695 use crate::arch::x86::{msr_index, MTRR_ENABLE, MTRR_MEM_TYPE_WB}; 696 697 [ 698 msr!(msr_index::MSR_IA32_SYSENTER_CS), 699 msr!(msr_index::MSR_IA32_SYSENTER_ESP), 700 msr!(msr_index::MSR_IA32_SYSENTER_EIP), 701 msr!(msr_index::MSR_STAR), 702 msr!(msr_index::MSR_CSTAR), 703 msr!(msr_index::MSR_LSTAR), 704 msr!(msr_index::MSR_KERNEL_GS_BASE), 705 msr!(msr_index::MSR_SYSCALL_MASK), 706 msr!(msr_index::MSR_IA32_TSC), 707 msr_data!(msr_index::MSR_MTRRdefType, MTRR_ENABLE | MTRR_MEM_TYPE_WB), 708 ] 709 .to_vec() 710 } 711 } 712 713 impl MshvVcpu { 714 #[cfg(target_arch = "x86_64")] 715 /// 716 /// X86 specific call that returns the vcpu's current "xsave struct". 717 /// 718 fn get_xsave(&self) -> cpu::Result<Xsave> { 719 self.fd 720 .get_xsave() 721 .map_err(|e| cpu::HypervisorCpuError::GetXsaveState(e.into())) 722 } 723 #[cfg(target_arch = "x86_64")] 724 /// 725 /// X86 specific call that sets the vcpu's current "xsave struct". 726 /// 727 fn set_xsave(&self, xsave: &Xsave) -> cpu::Result<()> { 728 self.fd 729 .set_xsave(xsave) 730 .map_err(|e| cpu::HypervisorCpuError::SetXsaveState(e.into())) 731 } 732 #[cfg(target_arch = "x86_64")] 733 /// 734 /// X86 specific call that returns the vcpu's current "xcrs". 735 /// 736 fn get_xcrs(&self) -> cpu::Result<ExtendedControlRegisters> { 737 self.fd 738 .get_xcrs() 739 .map_err(|e| cpu::HypervisorCpuError::GetXcsr(e.into())) 740 } 741 #[cfg(target_arch = "x86_64")] 742 /// 743 /// X86 specific call that sets the vcpu's current "xcrs". 744 /// 745 fn set_xcrs(&self, xcrs: &ExtendedControlRegisters) -> cpu::Result<()> { 746 self.fd 747 .set_xcrs(xcrs) 748 .map_err(|e| cpu::HypervisorCpuError::SetXcsr(e.into())) 749 } 750 #[cfg(target_arch = "x86_64")] 751 /// 752 /// Returns currently pending exceptions, interrupts, and NMIs as well as related 753 /// states of the vcpu. 754 /// 755 fn get_vcpu_events(&self) -> cpu::Result<VcpuEvents> { 756 self.fd 757 .get_vcpu_events() 758 .map_err(|e| cpu::HypervisorCpuError::GetVcpuEvents(e.into())) 759 } 760 #[cfg(target_arch = "x86_64")] 761 /// 762 /// Sets pending exceptions, interrupts, and NMIs as well as related states 763 /// of the vcpu. 764 /// 765 fn set_vcpu_events(&self, events: &VcpuEvents) -> cpu::Result<()> { 766 self.fd 767 .set_vcpu_events(events) 768 .map_err(|e| cpu::HypervisorCpuError::SetVcpuEvents(e.into())) 769 } 770 } 771 772 struct MshvEmulatorContext<'a> { 773 vcpu: &'a MshvVcpu, 774 map: (u64, u64), // Initial GVA to GPA mapping provided by the hypervisor 775 } 776 777 impl<'a> MshvEmulatorContext<'a> { 778 // Do the actual gva -> gpa translation 779 #[allow(non_upper_case_globals)] 780 fn translate(&self, gva: u64) -> Result<u64, PlatformError> { 781 if self.map.0 == gva { 782 return Ok(self.map.1); 783 } 784 785 // TODO: More fine-grained control for the flags 786 let flags = HV_TRANSLATE_GVA_VALIDATE_READ | HV_TRANSLATE_GVA_VALIDATE_WRITE; 787 788 let (gpa, result_code) = self 789 .vcpu 790 .translate_gva(gva, flags.into()) 791 .map_err(|e| PlatformError::TranslateVirtualAddress(anyhow!(e)))?; 792 793 match result_code { 794 hv_translate_gva_result_code_HV_TRANSLATE_GVA_SUCCESS => Ok(gpa), 795 _ => Err(PlatformError::TranslateVirtualAddress(anyhow!(result_code))), 796 } 797 } 798 } 799 800 /// Platform emulation for Hyper-V 801 impl<'a> PlatformEmulator for MshvEmulatorContext<'a> { 802 type CpuState = EmulatorCpuState; 803 804 fn read_memory(&self, gva: u64, data: &mut [u8]) -> Result<(), PlatformError> { 805 let gpa = self.translate(gva)?; 806 debug!( 807 "mshv emulator: memory read {} bytes from [{:#x} -> {:#x}]", 808 data.len(), 809 gva, 810 gpa 811 ); 812 813 if let Some(vm_ops) = &self.vcpu.vm_ops { 814 if vm_ops.guest_mem_read(gpa, data).is_err() { 815 vm_ops 816 .mmio_read(gpa, data) 817 .map_err(|e| PlatformError::MemoryReadFailure(e.into()))?; 818 } 819 } 820 821 Ok(()) 822 } 823 824 fn write_memory(&mut self, gva: u64, data: &[u8]) -> Result<(), PlatformError> { 825 let gpa = self.translate(gva)?; 826 debug!( 827 "mshv emulator: memory write {} bytes at [{:#x} -> {:#x}]", 828 data.len(), 829 gva, 830 gpa 831 ); 832 833 if let Some(vm_ops) = &self.vcpu.vm_ops { 834 if vm_ops.guest_mem_write(gpa, data).is_err() { 835 vm_ops 836 .mmio_write(gpa, data) 837 .map_err(|e| PlatformError::MemoryWriteFailure(e.into()))?; 838 } 839 } 840 841 Ok(()) 842 } 843 844 fn cpu_state(&self, cpu_id: usize) -> Result<Self::CpuState, PlatformError> { 845 if cpu_id != self.vcpu.vp_index as usize { 846 return Err(PlatformError::GetCpuStateFailure(anyhow!( 847 "CPU id mismatch {:?} {:?}", 848 cpu_id, 849 self.vcpu.vp_index 850 ))); 851 } 852 853 let regs = self 854 .vcpu 855 .get_regs() 856 .map_err(|e| PlatformError::GetCpuStateFailure(e.into()))?; 857 let sregs = self 858 .vcpu 859 .get_sregs() 860 .map_err(|e| PlatformError::GetCpuStateFailure(e.into()))?; 861 862 debug!("mshv emulator: Getting new CPU state"); 863 debug!("mshv emulator: {:#x?}", regs); 864 865 Ok(EmulatorCpuState { regs, sregs }) 866 } 867 868 fn set_cpu_state(&self, cpu_id: usize, state: Self::CpuState) -> Result<(), PlatformError> { 869 if cpu_id != self.vcpu.vp_index as usize { 870 return Err(PlatformError::SetCpuStateFailure(anyhow!( 871 "CPU id mismatch {:?} {:?}", 872 cpu_id, 873 self.vcpu.vp_index 874 ))); 875 } 876 877 debug!("mshv emulator: Setting new CPU state"); 878 debug!("mshv emulator: {:#x?}", state.regs); 879 880 self.vcpu 881 .set_regs(&state.regs) 882 .map_err(|e| PlatformError::SetCpuStateFailure(e.into()))?; 883 self.vcpu 884 .set_sregs(&state.sregs) 885 .map_err(|e| PlatformError::SetCpuStateFailure(e.into())) 886 } 887 888 fn gva_to_gpa(&self, gva: u64) -> Result<u64, PlatformError> { 889 self.translate(gva) 890 } 891 892 fn fetch(&self, _ip: u64, _instruction_bytes: &mut [u8]) -> Result<(), PlatformError> { 893 Err(PlatformError::MemoryReadFailure(anyhow!("unimplemented"))) 894 } 895 } 896 897 /// Wrapper over Mshv VM ioctls. 898 pub struct MshvVm { 899 fd: Arc<VmFd>, 900 msrs: Vec<MsrEntry>, 901 dirty_log_slots: Arc<RwLock<HashMap<u64, MshvDirtyLogSlot>>>, 902 } 903 904 impl MshvVm { 905 /// 906 /// Creates an in-kernel device. 907 /// 908 /// See the documentation for `MSHV_CREATE_DEVICE`. 909 fn create_device(&self, device: &mut CreateDevice) -> vm::Result<VfioDeviceFd> { 910 let device_fd = self 911 .fd 912 .create_device(device) 913 .map_err(|e| vm::HypervisorVmError::CreateDevice(e.into()))?; 914 Ok(VfioDeviceFd::new_from_mshv(device_fd)) 915 } 916 } 917 918 /// 919 /// Implementation of Vm trait for Mshv 920 /// Example: 921 /// #[cfg(feature = "mshv")] 922 /// # extern crate hypervisor; 923 /// # use hypervisor::MshvHypervisor; 924 /// let mshv = MshvHypervisor::new().unwrap(); 925 /// let hypervisor: Arc<dyn hypervisor::Hypervisor> = Arc::new(mshv); 926 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed"); 927 /// vm.set/get().unwrap() 928 /// 929 impl vm::Vm for MshvVm { 930 #[cfg(target_arch = "x86_64")] 931 /// 932 /// Sets the address of the one-page region in the VM's address space. 933 /// 934 fn set_identity_map_address(&self, _address: u64) -> vm::Result<()> { 935 Ok(()) 936 } 937 #[cfg(target_arch = "x86_64")] 938 /// 939 /// Sets the address of the three-page region in the VM's address space. 940 /// 941 fn set_tss_address(&self, _offset: usize) -> vm::Result<()> { 942 Ok(()) 943 } 944 /// 945 /// Creates an in-kernel interrupt controller. 946 /// 947 fn create_irq_chip(&self) -> vm::Result<()> { 948 Ok(()) 949 } 950 /// 951 /// Registers an event that will, when signaled, trigger the `gsi` IRQ. 952 /// 953 fn register_irqfd(&self, fd: &EventFd, gsi: u32) -> vm::Result<()> { 954 debug!("register_irqfd fd {} gsi {}", fd.as_raw_fd(), gsi); 955 956 self.fd 957 .register_irqfd(fd, gsi) 958 .map_err(|e| vm::HypervisorVmError::RegisterIrqFd(e.into()))?; 959 960 Ok(()) 961 } 962 /// 963 /// Unregisters an event that will, when signaled, trigger the `gsi` IRQ. 964 /// 965 fn unregister_irqfd(&self, fd: &EventFd, gsi: u32) -> vm::Result<()> { 966 debug!("unregister_irqfd fd {} gsi {}", fd.as_raw_fd(), gsi); 967 968 self.fd 969 .unregister_irqfd(fd, gsi) 970 .map_err(|e| vm::HypervisorVmError::UnregisterIrqFd(e.into()))?; 971 972 Ok(()) 973 } 974 /// 975 /// Creates a VcpuFd object from a vcpu RawFd. 976 /// 977 fn create_vcpu( 978 &self, 979 id: u8, 980 vm_ops: Option<Arc<dyn VmOps>>, 981 ) -> vm::Result<Arc<dyn cpu::Vcpu>> { 982 let vcpu_fd = self 983 .fd 984 .create_vcpu(id) 985 .map_err(|e| vm::HypervisorVmError::CreateVcpu(e.into()))?; 986 let vcpu = MshvVcpu { 987 fd: vcpu_fd, 988 vp_index: id, 989 cpuid: Vec::new(), 990 msrs: self.msrs.clone(), 991 vm_ops, 992 }; 993 Ok(Arc::new(vcpu)) 994 } 995 #[cfg(target_arch = "x86_64")] 996 fn enable_split_irq(&self) -> vm::Result<()> { 997 Ok(()) 998 } 999 #[cfg(target_arch = "x86_64")] 1000 fn enable_sgx_attribute(&self, _file: File) -> vm::Result<()> { 1001 Ok(()) 1002 } 1003 fn register_ioevent( 1004 &self, 1005 fd: &EventFd, 1006 addr: &IoEventAddress, 1007 datamatch: Option<DataMatch>, 1008 ) -> vm::Result<()> { 1009 let addr = &mshv_ioctls::IoEventAddress::from(*addr); 1010 debug!( 1011 "register_ioevent fd {} addr {:x?} datamatch {:?}", 1012 fd.as_raw_fd(), 1013 addr, 1014 datamatch 1015 ); 1016 if let Some(dm) = datamatch { 1017 match dm { 1018 vm::DataMatch::DataMatch32(mshv_dm32) => self 1019 .fd 1020 .register_ioevent(fd, addr, mshv_dm32) 1021 .map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into())), 1022 vm::DataMatch::DataMatch64(mshv_dm64) => self 1023 .fd 1024 .register_ioevent(fd, addr, mshv_dm64) 1025 .map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into())), 1026 } 1027 } else { 1028 self.fd 1029 .register_ioevent(fd, addr, NoDatamatch) 1030 .map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into())) 1031 } 1032 } 1033 /// Unregister an event from a certain address it has been previously registered to. 1034 fn unregister_ioevent(&self, fd: &EventFd, addr: &IoEventAddress) -> vm::Result<()> { 1035 let addr = &mshv_ioctls::IoEventAddress::from(*addr); 1036 debug!("unregister_ioevent fd {} addr {:x?}", fd.as_raw_fd(), addr); 1037 1038 self.fd 1039 .unregister_ioevent(fd, addr, NoDatamatch) 1040 .map_err(|e| vm::HypervisorVmError::UnregisterIoEvent(e.into())) 1041 } 1042 1043 /// Creates a guest physical memory region. 1044 fn create_user_memory_region(&self, user_memory_region: UserMemoryRegion) -> vm::Result<()> { 1045 let user_memory_region: mshv_user_mem_region = user_memory_region.into(); 1046 // No matter read only or not we keep track the slots. 1047 // For readonly hypervisor can enable the dirty bits, 1048 // but a VM exit happens before setting the dirty bits 1049 self.dirty_log_slots.write().unwrap().insert( 1050 user_memory_region.guest_pfn, 1051 MshvDirtyLogSlot { 1052 guest_pfn: user_memory_region.guest_pfn, 1053 memory_size: user_memory_region.size, 1054 }, 1055 ); 1056 1057 self.fd 1058 .map_user_memory(user_memory_region) 1059 .map_err(|e| vm::HypervisorVmError::CreateUserMemory(e.into()))?; 1060 Ok(()) 1061 } 1062 1063 /// Removes a guest physical memory region. 1064 fn remove_user_memory_region(&self, user_memory_region: UserMemoryRegion) -> vm::Result<()> { 1065 let user_memory_region: mshv_user_mem_region = user_memory_region.into(); 1066 // Remove the corresponding entry from "self.dirty_log_slots" if needed 1067 self.dirty_log_slots 1068 .write() 1069 .unwrap() 1070 .remove(&user_memory_region.guest_pfn); 1071 1072 self.fd 1073 .unmap_user_memory(user_memory_region) 1074 .map_err(|e| vm::HypervisorVmError::RemoveUserMemory(e.into()))?; 1075 Ok(()) 1076 } 1077 1078 fn make_user_memory_region( 1079 &self, 1080 _slot: u32, 1081 guest_phys_addr: u64, 1082 memory_size: u64, 1083 userspace_addr: u64, 1084 readonly: bool, 1085 _log_dirty_pages: bool, 1086 ) -> UserMemoryRegion { 1087 let mut flags = HV_MAP_GPA_READABLE | HV_MAP_GPA_EXECUTABLE; 1088 if !readonly { 1089 flags |= HV_MAP_GPA_WRITABLE; 1090 } 1091 1092 mshv_user_mem_region { 1093 flags, 1094 guest_pfn: guest_phys_addr >> PAGE_SHIFT, 1095 size: memory_size, 1096 userspace_addr: userspace_addr as u64, 1097 } 1098 .into() 1099 } 1100 1101 fn create_passthrough_device(&self) -> vm::Result<VfioDeviceFd> { 1102 let mut vfio_dev = mshv_create_device { 1103 type_: mshv_device_type_MSHV_DEV_TYPE_VFIO, 1104 fd: 0, 1105 flags: 0, 1106 }; 1107 1108 self.create_device(&mut vfio_dev) 1109 .map_err(|e| vm::HypervisorVmError::CreatePassthroughDevice(e.into())) 1110 } 1111 1112 /// 1113 /// Constructs a routing entry 1114 /// 1115 fn make_routing_entry(&self, gsi: u32, config: &InterruptSourceConfig) -> IrqRoutingEntry { 1116 match config { 1117 InterruptSourceConfig::MsiIrq(cfg) => mshv_msi_routing_entry { 1118 gsi, 1119 address_lo: cfg.low_addr, 1120 address_hi: cfg.high_addr, 1121 data: cfg.data, 1122 } 1123 .into(), 1124 _ => { 1125 unreachable!() 1126 } 1127 } 1128 } 1129 1130 fn set_gsi_routing(&self, entries: &[IrqRoutingEntry]) -> vm::Result<()> { 1131 let mut msi_routing = 1132 vec_with_array_field::<mshv_msi_routing, mshv_msi_routing_entry>(entries.len()); 1133 msi_routing[0].nr = entries.len() as u32; 1134 1135 let entries: Vec<mshv_msi_routing_entry> = entries 1136 .iter() 1137 .map(|entry| match entry { 1138 IrqRoutingEntry::Mshv(e) => *e, 1139 #[allow(unreachable_patterns)] 1140 _ => panic!("IrqRoutingEntry type is wrong"), 1141 }) 1142 .collect(); 1143 1144 // SAFETY: msi_routing initialized with entries.len() and now it is being turned into 1145 // entries_slice with entries.len() again. It is guaranteed to be large enough to hold 1146 // everything from entries. 1147 unsafe { 1148 let entries_slice: &mut [mshv_msi_routing_entry] = 1149 msi_routing[0].entries.as_mut_slice(entries.len()); 1150 entries_slice.copy_from_slice(&entries); 1151 } 1152 1153 self.fd 1154 .set_msi_routing(&msi_routing[0]) 1155 .map_err(|e| vm::HypervisorVmError::SetGsiRouting(e.into())) 1156 } 1157 /// 1158 /// Start logging dirty pages 1159 /// 1160 fn start_dirty_log(&self) -> vm::Result<()> { 1161 self.fd 1162 .enable_dirty_page_tracking() 1163 .map_err(|e| vm::HypervisorVmError::StartDirtyLog(e.into())) 1164 } 1165 /// 1166 /// Stop logging dirty pages 1167 /// 1168 fn stop_dirty_log(&self) -> vm::Result<()> { 1169 let dirty_log_slots = self.dirty_log_slots.read().unwrap(); 1170 // Before disabling the dirty page tracking we need 1171 // to set the dirty bits in the Hypervisor 1172 // This is a requirement from Microsoft Hypervisor 1173 for (_, s) in dirty_log_slots.iter() { 1174 self.fd 1175 .get_dirty_log(s.guest_pfn, s.memory_size as usize, DIRTY_BITMAP_SET_DIRTY) 1176 .map_err(|e| vm::HypervisorVmError::StartDirtyLog(e.into()))?; 1177 } 1178 self.fd 1179 .disable_dirty_page_tracking() 1180 .map_err(|e| vm::HypervisorVmError::StartDirtyLog(e.into()))?; 1181 Ok(()) 1182 } 1183 /// 1184 /// Get dirty pages bitmap (one bit per page) 1185 /// 1186 fn get_dirty_log(&self, _slot: u32, base_gpa: u64, memory_size: u64) -> vm::Result<Vec<u64>> { 1187 self.fd 1188 .get_dirty_log( 1189 base_gpa >> PAGE_SHIFT, 1190 memory_size as usize, 1191 DIRTY_BITMAP_CLEAR_DIRTY, 1192 ) 1193 .map_err(|e| vm::HypervisorVmError::GetDirtyLog(e.into())) 1194 } 1195 /// Retrieve guest clock. 1196 #[cfg(target_arch = "x86_64")] 1197 fn get_clock(&self) -> vm::Result<ClockData> { 1198 Ok(ClockData::Mshv) 1199 } 1200 /// Set guest clock. 1201 #[cfg(target_arch = "x86_64")] 1202 fn set_clock(&self, _data: &ClockData) -> vm::Result<()> { 1203 Ok(()) 1204 } 1205 /// Downcast to the underlying MshvVm type 1206 fn as_any(&self) -> &dyn Any { 1207 self 1208 } 1209 } 1210