1 // SPDX-License-Identifier: Apache-2.0 OR BSD-3-Clause 2 // 3 // Copyright © 2020, Microsoft Corporation 4 // 5 6 use crate::arch::emulator::{PlatformEmulator, PlatformError}; 7 8 #[cfg(target_arch = "x86_64")] 9 use crate::arch::x86::emulator::{Emulator, EmulatorCpuState}; 10 use crate::cpu; 11 use crate::cpu::Vcpu; 12 use crate::hypervisor; 13 use crate::vec_with_array_field; 14 use crate::vm::{self, InterruptSourceConfig, VmOps}; 15 use crate::HypervisorType; 16 pub use mshv_bindings::*; 17 use mshv_ioctls::{set_registers_64, Mshv, NoDatamatch, VcpuFd, VmFd}; 18 use std::any::Any; 19 use std::collections::HashMap; 20 use std::sync::{Arc, RwLock}; 21 use vfio_ioctls::VfioDeviceFd; 22 use vm::DataMatch; 23 // x86_64 dependencies 24 #[cfg(target_arch = "x86_64")] 25 pub mod x86_64; 26 use crate::{ 27 ClockData, CpuState, IoEventAddress, IrqRoutingEntry, MpState, UserMemoryRegion, 28 USER_MEMORY_REGION_EXECUTE, USER_MEMORY_REGION_READ, USER_MEMORY_REGION_WRITE, 29 }; 30 use vmm_sys_util::eventfd::EventFd; 31 #[cfg(target_arch = "x86_64")] 32 pub use x86_64::VcpuMshvState; 33 #[cfg(target_arch = "x86_64")] 34 pub use x86_64::*; 35 36 #[cfg(target_arch = "x86_64")] 37 use std::fs::File; 38 use std::os::unix::io::AsRawFd; 39 40 #[cfg(target_arch = "x86_64")] 41 use crate::arch::x86::{CpuIdEntry, FpuState, MsrEntry}; 42 43 const DIRTY_BITMAP_CLEAR_DIRTY: u64 = 0x4; 44 const DIRTY_BITMAP_SET_DIRTY: u64 = 0x8; 45 46 /// 47 /// Export generically-named wrappers of mshv-bindings for Unix-based platforms 48 /// 49 pub use { 50 mshv_bindings::mshv_create_device as CreateDevice, 51 mshv_bindings::mshv_device_attr as DeviceAttr, mshv_ioctls::DeviceFd, 52 }; 53 54 pub const PAGE_SHIFT: usize = 12; 55 56 impl From<mshv_user_mem_region> for UserMemoryRegion { 57 fn from(region: mshv_user_mem_region) -> Self { 58 let mut flags: u32 = 0; 59 if region.flags & HV_MAP_GPA_READABLE != 0 { 60 flags |= USER_MEMORY_REGION_READ; 61 } 62 if region.flags & HV_MAP_GPA_WRITABLE != 0 { 63 flags |= USER_MEMORY_REGION_WRITE; 64 } 65 if region.flags & HV_MAP_GPA_EXECUTABLE != 0 { 66 flags |= USER_MEMORY_REGION_EXECUTE; 67 } 68 69 UserMemoryRegion { 70 guest_phys_addr: (region.guest_pfn << PAGE_SHIFT as u64) 71 + (region.userspace_addr & ((1 << PAGE_SHIFT) - 1)), 72 memory_size: region.size, 73 userspace_addr: region.userspace_addr, 74 flags, 75 ..Default::default() 76 } 77 } 78 } 79 80 impl From<UserMemoryRegion> for mshv_user_mem_region { 81 fn from(region: UserMemoryRegion) -> Self { 82 let mut flags: u32 = 0; 83 if region.flags & USER_MEMORY_REGION_READ != 0 { 84 flags |= HV_MAP_GPA_READABLE; 85 } 86 if region.flags & USER_MEMORY_REGION_WRITE != 0 { 87 flags |= HV_MAP_GPA_WRITABLE; 88 } 89 if region.flags & USER_MEMORY_REGION_EXECUTE != 0 { 90 flags |= HV_MAP_GPA_EXECUTABLE; 91 } 92 93 mshv_user_mem_region { 94 guest_pfn: region.guest_phys_addr >> PAGE_SHIFT, 95 size: region.memory_size, 96 userspace_addr: region.userspace_addr, 97 flags, 98 } 99 } 100 } 101 102 impl From<mshv_ioctls::IoEventAddress> for IoEventAddress { 103 fn from(a: mshv_ioctls::IoEventAddress) -> Self { 104 match a { 105 mshv_ioctls::IoEventAddress::Pio(x) => Self::Pio(x), 106 mshv_ioctls::IoEventAddress::Mmio(x) => Self::Mmio(x), 107 } 108 } 109 } 110 111 impl From<IoEventAddress> for mshv_ioctls::IoEventAddress { 112 fn from(a: IoEventAddress) -> Self { 113 match a { 114 IoEventAddress::Pio(x) => Self::Pio(x), 115 IoEventAddress::Mmio(x) => Self::Mmio(x), 116 } 117 } 118 } 119 120 impl From<VcpuMshvState> for CpuState { 121 fn from(s: VcpuMshvState) -> Self { 122 CpuState::Mshv(s) 123 } 124 } 125 126 impl From<CpuState> for VcpuMshvState { 127 fn from(s: CpuState) -> Self { 128 match s { 129 CpuState::Mshv(s) => s, 130 /* Needed in case other hypervisors are enabled */ 131 #[allow(unreachable_patterns)] 132 _ => panic!("CpuState is not valid"), 133 } 134 } 135 } 136 137 impl From<mshv_msi_routing_entry> for IrqRoutingEntry { 138 fn from(s: mshv_msi_routing_entry) -> Self { 139 IrqRoutingEntry::Mshv(s) 140 } 141 } 142 143 impl From<IrqRoutingEntry> for mshv_msi_routing_entry { 144 fn from(e: IrqRoutingEntry) -> Self { 145 match e { 146 IrqRoutingEntry::Mshv(e) => e, 147 /* Needed in case other hypervisors are enabled */ 148 #[allow(unreachable_patterns)] 149 _ => panic!("IrqRoutingEntry is not valid"), 150 } 151 } 152 } 153 154 struct MshvDirtyLogSlot { 155 guest_pfn: u64, 156 memory_size: u64, 157 } 158 159 /// Wrapper over mshv system ioctls. 160 pub struct MshvHypervisor { 161 mshv: Mshv, 162 } 163 164 impl MshvHypervisor { 165 #[cfg(target_arch = "x86_64")] 166 /// 167 /// Retrieve the list of MSRs supported by MSHV. 168 /// 169 fn get_msr_list(&self) -> hypervisor::Result<MsrList> { 170 self.mshv 171 .get_msr_index_list() 172 .map_err(|e| hypervisor::HypervisorError::GetMsrList(e.into())) 173 } 174 } 175 176 impl MshvHypervisor { 177 /// Create a hypervisor based on Mshv 178 #[allow(clippy::new_ret_no_self)] 179 pub fn new() -> hypervisor::Result<Arc<dyn hypervisor::Hypervisor>> { 180 let mshv_obj = 181 Mshv::new().map_err(|e| hypervisor::HypervisorError::HypervisorCreate(e.into()))?; 182 Ok(Arc::new(MshvHypervisor { mshv: mshv_obj })) 183 } 184 /// Check if the hypervisor is available 185 pub fn is_available() -> hypervisor::Result<bool> { 186 match std::fs::metadata("/dev/mshv") { 187 Ok(_) => Ok(true), 188 Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(false), 189 Err(err) => Err(hypervisor::HypervisorError::HypervisorAvailableCheck( 190 err.into(), 191 )), 192 } 193 } 194 } 195 /// Implementation of Hypervisor trait for Mshv 196 /// 197 /// # Examples 198 /// 199 /// ``` 200 /// # use hypervisor::mshv::MshvHypervisor; 201 /// # use std::sync::Arc; 202 /// let mshv = MshvHypervisor::new().unwrap(); 203 /// let hypervisor = Arc::new(mshv); 204 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed"); 205 /// ``` 206 impl hypervisor::Hypervisor for MshvHypervisor { 207 /// 208 /// Returns the type of the hypervisor 209 /// 210 fn hypervisor_type(&self) -> HypervisorType { 211 HypervisorType::Mshv 212 } 213 /// Create a mshv vm object and return the object as Vm trait object 214 /// 215 /// # Examples 216 /// 217 /// ``` 218 /// # extern crate hypervisor; 219 /// # use hypervisor::mshv::MshvHypervisor; 220 /// use hypervisor::mshv::MshvVm; 221 /// let hypervisor = MshvHypervisor::new().unwrap(); 222 /// let vm = hypervisor.create_vm().unwrap(); 223 /// ``` 224 fn create_vm(&self) -> hypervisor::Result<Arc<dyn vm::Vm>> { 225 let fd: VmFd; 226 loop { 227 match self.mshv.create_vm() { 228 Ok(res) => fd = res, 229 Err(e) => { 230 if e.errno() == libc::EINTR { 231 // If the error returned is EINTR, which means the 232 // ioctl has been interrupted, we have to retry as 233 // this can't be considered as a regular error. 234 continue; 235 } else { 236 return Err(hypervisor::HypervisorError::VmCreate(e.into())); 237 } 238 } 239 } 240 break; 241 } 242 243 // Default Microsoft Hypervisor behavior for unimplemented MSR is to 244 // send a fault to the guest if it tries to access it. It is possible 245 // to override this behavior with a more suitable option i.e., ignore 246 // writes from the guest and return zero in attempt to read unimplemented 247 // MSR. 248 fd.set_partition_property( 249 hv_partition_property_code_HV_PARTITION_PROPERTY_UNIMPLEMENTED_MSR_ACTION, 250 hv_unimplemented_msr_action_HV_UNIMPLEMENTED_MSR_ACTION_IGNORE_WRITE_READ_ZERO as u64, 251 ) 252 .map_err(|e| hypervisor::HypervisorError::SetPartitionProperty(e.into()))?; 253 254 let msr_list = self.get_msr_list()?; 255 let num_msrs = msr_list.as_fam_struct_ref().nmsrs as usize; 256 let mut msrs: Vec<MsrEntry> = vec![ 257 MsrEntry { 258 ..Default::default() 259 }; 260 num_msrs 261 ]; 262 let indices = msr_list.as_slice(); 263 for (pos, index) in indices.iter().enumerate() { 264 msrs[pos].index = *index; 265 } 266 let vm_fd = Arc::new(fd); 267 268 Ok(Arc::new(MshvVm { 269 fd: vm_fd, 270 msrs, 271 dirty_log_slots: Arc::new(RwLock::new(HashMap::new())), 272 })) 273 } 274 /// 275 /// Get the supported CpuID 276 /// 277 fn get_supported_cpuid(&self) -> hypervisor::Result<Vec<CpuIdEntry>> { 278 Ok(Vec::new()) 279 } 280 281 /// Get maximum number of vCPUs 282 fn get_max_vcpus(&self) -> u32 { 283 // TODO: Using HV_MAXIMUM_PROCESSORS would be better 284 // but the ioctl API is limited to u8 285 256 286 } 287 } 288 289 /// Vcpu struct for Microsoft Hypervisor 290 pub struct MshvVcpu { 291 fd: VcpuFd, 292 vp_index: u8, 293 cpuid: Vec<CpuIdEntry>, 294 msrs: Vec<MsrEntry>, 295 vm_ops: Option<Arc<dyn vm::VmOps>>, 296 } 297 298 /// Implementation of Vcpu trait for Microsoft Hypervisor 299 /// 300 /// # Examples 301 /// 302 /// ``` 303 /// # use hypervisor::mshv::MshvHypervisor; 304 /// # use std::sync::Arc; 305 /// let mshv = MshvHypervisor::new().unwrap(); 306 /// let hypervisor = Arc::new(mshv); 307 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed"); 308 /// let vcpu = vm.create_vcpu(0, None).unwrap(); 309 /// ``` 310 impl cpu::Vcpu for MshvVcpu { 311 #[cfg(target_arch = "x86_64")] 312 /// 313 /// Returns the vCPU general purpose registers. 314 /// 315 fn get_regs(&self) -> cpu::Result<crate::arch::x86::StandardRegisters> { 316 Ok(self 317 .fd 318 .get_regs() 319 .map_err(|e| cpu::HypervisorCpuError::GetStandardRegs(e.into()))? 320 .into()) 321 } 322 #[cfg(target_arch = "x86_64")] 323 /// 324 /// Sets the vCPU general purpose registers. 325 /// 326 fn set_regs(&self, regs: &crate::arch::x86::StandardRegisters) -> cpu::Result<()> { 327 let regs = (*regs).into(); 328 self.fd 329 .set_regs(®s) 330 .map_err(|e| cpu::HypervisorCpuError::SetStandardRegs(e.into())) 331 } 332 #[cfg(target_arch = "x86_64")] 333 /// 334 /// Returns the vCPU special registers. 335 /// 336 fn get_sregs(&self) -> cpu::Result<crate::arch::x86::SpecialRegisters> { 337 Ok(self 338 .fd 339 .get_sregs() 340 .map_err(|e| cpu::HypervisorCpuError::GetSpecialRegs(e.into()))? 341 .into()) 342 } 343 #[cfg(target_arch = "x86_64")] 344 /// 345 /// Sets the vCPU special registers. 346 /// 347 fn set_sregs(&self, sregs: &crate::arch::x86::SpecialRegisters) -> cpu::Result<()> { 348 let sregs = (*sregs).into(); 349 self.fd 350 .set_sregs(&sregs) 351 .map_err(|e| cpu::HypervisorCpuError::SetSpecialRegs(e.into())) 352 } 353 #[cfg(target_arch = "x86_64")] 354 /// 355 /// Returns the floating point state (FPU) from the vCPU. 356 /// 357 fn get_fpu(&self) -> cpu::Result<FpuState> { 358 Ok(self 359 .fd 360 .get_fpu() 361 .map_err(|e| cpu::HypervisorCpuError::GetFloatingPointRegs(e.into()))? 362 .into()) 363 } 364 #[cfg(target_arch = "x86_64")] 365 /// 366 /// Set the floating point state (FPU) of a vCPU. 367 /// 368 fn set_fpu(&self, fpu: &FpuState) -> cpu::Result<()> { 369 let fpu: mshv_bindings::FloatingPointUnit = (*fpu).clone().into(); 370 self.fd 371 .set_fpu(&fpu) 372 .map_err(|e| cpu::HypervisorCpuError::SetFloatingPointRegs(e.into())) 373 } 374 375 #[cfg(target_arch = "x86_64")] 376 /// 377 /// Returns the model-specific registers (MSR) for this vCPU. 378 /// 379 fn get_msrs(&self, msrs: &mut Vec<MsrEntry>) -> cpu::Result<usize> { 380 let mshv_msrs: Vec<msr_entry> = msrs.iter().map(|e| (*e).into()).collect(); 381 let mut mshv_msrs = MsrEntries::from_entries(&mshv_msrs).unwrap(); 382 let succ = self 383 .fd 384 .get_msrs(&mut mshv_msrs) 385 .map_err(|e| cpu::HypervisorCpuError::GetMsrEntries(e.into()))?; 386 387 msrs[..succ].copy_from_slice( 388 &mshv_msrs.as_slice()[..succ] 389 .iter() 390 .map(|e| (*e).into()) 391 .collect::<Vec<MsrEntry>>(), 392 ); 393 394 Ok(succ) 395 } 396 #[cfg(target_arch = "x86_64")] 397 /// 398 /// Setup the model-specific registers (MSR) for this vCPU. 399 /// Returns the number of MSR entries actually written. 400 /// 401 fn set_msrs(&self, msrs: &[MsrEntry]) -> cpu::Result<usize> { 402 let mshv_msrs: Vec<msr_entry> = msrs.iter().map(|e| (*e).into()).collect(); 403 let mshv_msrs = MsrEntries::from_entries(&mshv_msrs).unwrap(); 404 self.fd 405 .set_msrs(&mshv_msrs) 406 .map_err(|e| cpu::HypervisorCpuError::SetMsrEntries(e.into())) 407 } 408 409 #[cfg(target_arch = "x86_64")] 410 /// 411 /// X86 specific call to enable HyperV SynIC 412 /// 413 fn enable_hyperv_synic(&self) -> cpu::Result<()> { 414 /* We always have SynIC enabled on MSHV */ 415 Ok(()) 416 } 417 #[allow(non_upper_case_globals)] 418 fn run(&self) -> std::result::Result<cpu::VmExit, cpu::HypervisorCpuError> { 419 let hv_message: hv_message = hv_message::default(); 420 match self.fd.run(hv_message) { 421 Ok(x) => match x.header.message_type { 422 hv_message_type_HVMSG_X64_HALT => { 423 debug!("HALT"); 424 Ok(cpu::VmExit::Reset) 425 } 426 hv_message_type_HVMSG_UNRECOVERABLE_EXCEPTION => { 427 warn!("TRIPLE FAULT"); 428 Ok(cpu::VmExit::Shutdown) 429 } 430 hv_message_type_HVMSG_X64_IO_PORT_INTERCEPT => { 431 let info = x.to_ioport_info().unwrap(); 432 let access_info = info.access_info; 433 // SAFETY: access_info is valid, otherwise we won't be here 434 let len = unsafe { access_info.__bindgen_anon_1.access_size() } as usize; 435 let is_write = info.header.intercept_access_type == 1; 436 let port = info.port_number; 437 let mut data: [u8; 4] = [0; 4]; 438 let mut ret_rax = info.rax; 439 440 /* 441 * XXX: Ignore QEMU fw_cfg (0x5xx) and debug console (0x402) ports. 442 * 443 * Cloud Hypervisor doesn't support fw_cfg at the moment. It does support 0x402 444 * under the "fwdebug" feature flag. But that feature is not enabled by default 445 * and is considered legacy. 446 * 447 * OVMF unconditionally pokes these IO ports with string IO. 448 * 449 * Instead of trying to implement string IO support now which does not do much 450 * now, skip those ports explicitly to avoid panicking. 451 * 452 * Proper string IO support can be added once we gain the ability to translate 453 * guest virtual addresses to guest physical addresses on MSHV. 454 */ 455 match port { 456 0x402 | 0x510 | 0x511 | 0x514 => { 457 let insn_len = info.header.instruction_length() as u64; 458 459 /* Advance RIP and update RAX */ 460 let arr_reg_name_value = [ 461 ( 462 hv_register_name_HV_X64_REGISTER_RIP, 463 info.header.rip + insn_len, 464 ), 465 (hv_register_name_HV_X64_REGISTER_RAX, ret_rax), 466 ]; 467 set_registers_64!(self.fd, arr_reg_name_value) 468 .map_err(|e| cpu::HypervisorCpuError::SetRegister(e.into()))?; 469 return Ok(cpu::VmExit::Ignore); 470 } 471 _ => {} 472 } 473 474 assert!( 475 // SAFETY: access_info is valid, otherwise we won't be here 476 (unsafe { access_info.__bindgen_anon_1.string_op() } != 1), 477 "String IN/OUT not supported" 478 ); 479 assert!( 480 // SAFETY: access_info is valid, otherwise we won't be here 481 (unsafe { access_info.__bindgen_anon_1.rep_prefix() } != 1), 482 "Rep IN/OUT not supported" 483 ); 484 485 if is_write { 486 let data = (info.rax as u32).to_le_bytes(); 487 if let Some(vm_ops) = &self.vm_ops { 488 vm_ops 489 .pio_write(port.into(), &data[0..len]) 490 .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()))?; 491 } 492 } else { 493 if let Some(vm_ops) = &self.vm_ops { 494 vm_ops 495 .pio_read(port.into(), &mut data[0..len]) 496 .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()))?; 497 } 498 499 let v = u32::from_le_bytes(data); 500 /* Preserve high bits in EAX but clear out high bits in RAX */ 501 let mask = 0xffffffff >> (32 - len * 8); 502 let eax = (info.rax as u32 & !mask) | (v & mask); 503 ret_rax = eax as u64; 504 } 505 506 let insn_len = info.header.instruction_length() as u64; 507 508 /* Advance RIP and update RAX */ 509 let arr_reg_name_value = [ 510 ( 511 hv_register_name_HV_X64_REGISTER_RIP, 512 info.header.rip + insn_len, 513 ), 514 (hv_register_name_HV_X64_REGISTER_RAX, ret_rax), 515 ]; 516 set_registers_64!(self.fd, arr_reg_name_value) 517 .map_err(|e| cpu::HypervisorCpuError::SetRegister(e.into()))?; 518 Ok(cpu::VmExit::Ignore) 519 } 520 hv_message_type_HVMSG_UNMAPPED_GPA => { 521 let info = x.to_memory_info().unwrap(); 522 let insn_len = info.instruction_byte_count as usize; 523 assert!(insn_len > 0 && insn_len <= 16); 524 525 let mut context = MshvEmulatorContext { 526 vcpu: self, 527 map: (info.guest_virtual_address, info.guest_physical_address), 528 }; 529 530 // Create a new emulator. 531 let mut emul = Emulator::new(&mut context); 532 533 // Emulate the trapped instruction, and only the first one. 534 let new_state = emul 535 .emulate_first_insn(self.vp_index as usize, &info.instruction_bytes) 536 .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()))?; 537 538 // Set CPU state back. 539 context 540 .set_cpu_state(self.vp_index as usize, new_state) 541 .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()))?; 542 543 Ok(cpu::VmExit::Ignore) 544 } 545 hv_message_type_HVMSG_X64_CPUID_INTERCEPT => { 546 let info = x.to_cpuid_info().unwrap(); 547 debug!("cpuid eax: {:x}", { info.rax }); 548 Ok(cpu::VmExit::Ignore) 549 } 550 hv_message_type_HVMSG_X64_MSR_INTERCEPT => { 551 let info = x.to_msr_info().unwrap(); 552 if info.header.intercept_access_type == 0 { 553 debug!("msr read: {:x}", { info.msr_number }); 554 } else { 555 debug!("msr write: {:x}", { info.msr_number }); 556 } 557 Ok(cpu::VmExit::Ignore) 558 } 559 hv_message_type_HVMSG_X64_EXCEPTION_INTERCEPT => { 560 //TODO: Handler for VMCALL here. 561 let info = x.to_exception_info().unwrap(); 562 debug!("Exception Info {:?}", { info.exception_vector }); 563 Ok(cpu::VmExit::Ignore) 564 } 565 exit => Err(cpu::HypervisorCpuError::RunVcpu(anyhow!( 566 "Unhandled VCPU exit {:?}", 567 exit 568 ))), 569 }, 570 571 Err(e) => match e.errno() { 572 libc::EAGAIN | libc::EINTR => Ok(cpu::VmExit::Ignore), 573 _ => Err(cpu::HypervisorCpuError::RunVcpu(anyhow!( 574 "VCPU error {:?}", 575 e 576 ))), 577 }, 578 } 579 } 580 #[cfg(target_arch = "x86_64")] 581 /// 582 /// X86 specific call to setup the CPUID registers. 583 /// 584 fn set_cpuid2(&self, cpuid: &[CpuIdEntry]) -> cpu::Result<()> { 585 let cpuid: Vec<mshv_bindings::hv_cpuid_entry> = cpuid.iter().map(|e| (*e).into()).collect(); 586 let mshv_cpuid = <CpuId>::from_entries(&cpuid) 587 .map_err(|_| cpu::HypervisorCpuError::SetCpuid(anyhow!("failed to create CpuId")))?; 588 589 self.fd 590 .register_intercept_result_cpuid(&mshv_cpuid) 591 .map_err(|e| cpu::HypervisorCpuError::SetCpuid(e.into())) 592 } 593 #[cfg(target_arch = "x86_64")] 594 /// 595 /// X86 specific call to retrieve the CPUID registers. 596 /// 597 fn get_cpuid2(&self, _num_entries: usize) -> cpu::Result<Vec<CpuIdEntry>> { 598 Ok(self.cpuid.clone()) 599 } 600 #[cfg(target_arch = "x86_64")] 601 /// 602 /// Returns the state of the LAPIC (Local Advanced Programmable Interrupt Controller). 603 /// 604 fn get_lapic(&self) -> cpu::Result<crate::arch::x86::LapicState> { 605 Ok(self 606 .fd 607 .get_lapic() 608 .map_err(|e| cpu::HypervisorCpuError::GetlapicState(e.into()))? 609 .into()) 610 } 611 #[cfg(target_arch = "x86_64")] 612 /// 613 /// Sets the state of the LAPIC (Local Advanced Programmable Interrupt Controller). 614 /// 615 fn set_lapic(&self, lapic: &crate::arch::x86::LapicState) -> cpu::Result<()> { 616 let lapic: mshv_bindings::LapicState = (*lapic).clone().into(); 617 self.fd 618 .set_lapic(&lapic) 619 .map_err(|e| cpu::HypervisorCpuError::SetLapicState(e.into())) 620 } 621 /// 622 /// Returns the vcpu's current "multiprocessing state". 623 /// 624 fn get_mp_state(&self) -> cpu::Result<MpState> { 625 Ok(MpState::Mshv) 626 } 627 /// 628 /// Sets the vcpu's current "multiprocessing state". 629 /// 630 fn set_mp_state(&self, _mp_state: MpState) -> cpu::Result<()> { 631 Ok(()) 632 } 633 /// 634 /// Set CPU state 635 /// 636 fn set_state(&self, state: &CpuState) -> cpu::Result<()> { 637 let state: VcpuMshvState = state.clone().into(); 638 self.set_msrs(&state.msrs)?; 639 self.set_vcpu_events(&state.vcpu_events)?; 640 self.set_regs(&state.regs.into())?; 641 self.set_sregs(&state.sregs.into())?; 642 self.set_fpu(&state.fpu)?; 643 self.set_xcrs(&state.xcrs)?; 644 self.set_lapic(&state.lapic)?; 645 self.set_xsave(&state.xsave)?; 646 // These registers are global and needed to be set only for first VCPU 647 // as Microsoft Hypervisor allows setting this regsier for only one VCPU 648 if self.vp_index == 0 { 649 self.fd 650 .set_misc_regs(&state.misc) 651 .map_err(|e| cpu::HypervisorCpuError::SetMiscRegs(e.into()))? 652 } 653 self.fd 654 .set_debug_regs(&state.dbg) 655 .map_err(|e| cpu::HypervisorCpuError::SetDebugRegs(e.into()))?; 656 Ok(()) 657 } 658 /// 659 /// Get CPU State 660 /// 661 fn state(&self) -> cpu::Result<CpuState> { 662 let regs = self.get_regs()?; 663 let sregs = self.get_sregs()?; 664 let xcrs = self.get_xcrs()?; 665 let fpu = self.get_fpu()?; 666 let vcpu_events = self.get_vcpu_events()?; 667 let mut msrs = self.msrs.clone(); 668 self.get_msrs(&mut msrs)?; 669 let lapic = self.get_lapic()?; 670 let xsave = self.get_xsave()?; 671 let misc = self 672 .fd 673 .get_misc_regs() 674 .map_err(|e| cpu::HypervisorCpuError::GetMiscRegs(e.into()))?; 675 let dbg = self 676 .fd 677 .get_debug_regs() 678 .map_err(|e| cpu::HypervisorCpuError::GetDebugRegs(e.into()))?; 679 680 Ok(VcpuMshvState { 681 msrs, 682 vcpu_events, 683 regs: regs.into(), 684 sregs: sregs.into(), 685 fpu, 686 xcrs, 687 lapic, 688 dbg, 689 xsave, 690 misc, 691 } 692 .into()) 693 } 694 #[cfg(target_arch = "x86_64")] 695 /// 696 /// Translate guest virtual address to guest physical address 697 /// 698 fn translate_gva(&self, gva: u64, flags: u64) -> cpu::Result<(u64, u32)> { 699 let r = self 700 .fd 701 .translate_gva(gva, flags) 702 .map_err(|e| cpu::HypervisorCpuError::TranslateVirtualAddress(e.into()))?; 703 704 let gpa = r.0; 705 // SAFETY: r is valid, otherwise this function will have returned 706 let result_code = unsafe { r.1.__bindgen_anon_1.result_code }; 707 708 Ok((gpa, result_code)) 709 } 710 #[cfg(target_arch = "x86_64")] 711 /// 712 /// Return the list of initial MSR entries for a VCPU 713 /// 714 fn boot_msr_entries(&self) -> Vec<MsrEntry> { 715 use crate::arch::x86::{msr_index, MTRR_ENABLE, MTRR_MEM_TYPE_WB}; 716 717 [ 718 msr!(msr_index::MSR_IA32_SYSENTER_CS), 719 msr!(msr_index::MSR_IA32_SYSENTER_ESP), 720 msr!(msr_index::MSR_IA32_SYSENTER_EIP), 721 msr!(msr_index::MSR_STAR), 722 msr!(msr_index::MSR_CSTAR), 723 msr!(msr_index::MSR_LSTAR), 724 msr!(msr_index::MSR_KERNEL_GS_BASE), 725 msr!(msr_index::MSR_SYSCALL_MASK), 726 msr_data!(msr_index::MSR_MTRRdefType, MTRR_ENABLE | MTRR_MEM_TYPE_WB), 727 ] 728 .to_vec() 729 } 730 } 731 732 impl MshvVcpu { 733 #[cfg(target_arch = "x86_64")] 734 /// 735 /// X86 specific call that returns the vcpu's current "xsave struct". 736 /// 737 fn get_xsave(&self) -> cpu::Result<Xsave> { 738 self.fd 739 .get_xsave() 740 .map_err(|e| cpu::HypervisorCpuError::GetXsaveState(e.into())) 741 } 742 #[cfg(target_arch = "x86_64")] 743 /// 744 /// X86 specific call that sets the vcpu's current "xsave struct". 745 /// 746 fn set_xsave(&self, xsave: &Xsave) -> cpu::Result<()> { 747 self.fd 748 .set_xsave(xsave) 749 .map_err(|e| cpu::HypervisorCpuError::SetXsaveState(e.into())) 750 } 751 #[cfg(target_arch = "x86_64")] 752 /// 753 /// X86 specific call that returns the vcpu's current "xcrs". 754 /// 755 fn get_xcrs(&self) -> cpu::Result<ExtendedControlRegisters> { 756 self.fd 757 .get_xcrs() 758 .map_err(|e| cpu::HypervisorCpuError::GetXcsr(e.into())) 759 } 760 #[cfg(target_arch = "x86_64")] 761 /// 762 /// X86 specific call that sets the vcpu's current "xcrs". 763 /// 764 fn set_xcrs(&self, xcrs: &ExtendedControlRegisters) -> cpu::Result<()> { 765 self.fd 766 .set_xcrs(xcrs) 767 .map_err(|e| cpu::HypervisorCpuError::SetXcsr(e.into())) 768 } 769 #[cfg(target_arch = "x86_64")] 770 /// 771 /// Returns currently pending exceptions, interrupts, and NMIs as well as related 772 /// states of the vcpu. 773 /// 774 fn get_vcpu_events(&self) -> cpu::Result<VcpuEvents> { 775 self.fd 776 .get_vcpu_events() 777 .map_err(|e| cpu::HypervisorCpuError::GetVcpuEvents(e.into())) 778 } 779 #[cfg(target_arch = "x86_64")] 780 /// 781 /// Sets pending exceptions, interrupts, and NMIs as well as related states 782 /// of the vcpu. 783 /// 784 fn set_vcpu_events(&self, events: &VcpuEvents) -> cpu::Result<()> { 785 self.fd 786 .set_vcpu_events(events) 787 .map_err(|e| cpu::HypervisorCpuError::SetVcpuEvents(e.into())) 788 } 789 } 790 791 struct MshvEmulatorContext<'a> { 792 vcpu: &'a MshvVcpu, 793 map: (u64, u64), // Initial GVA to GPA mapping provided by the hypervisor 794 } 795 796 impl<'a> MshvEmulatorContext<'a> { 797 // Do the actual gva -> gpa translation 798 #[allow(non_upper_case_globals)] 799 fn translate(&self, gva: u64) -> Result<u64, PlatformError> { 800 if self.map.0 == gva { 801 return Ok(self.map.1); 802 } 803 804 // TODO: More fine-grained control for the flags 805 let flags = HV_TRANSLATE_GVA_VALIDATE_READ | HV_TRANSLATE_GVA_VALIDATE_WRITE; 806 807 let (gpa, result_code) = self 808 .vcpu 809 .translate_gva(gva, flags.into()) 810 .map_err(|e| PlatformError::TranslateVirtualAddress(anyhow!(e)))?; 811 812 match result_code { 813 hv_translate_gva_result_code_HV_TRANSLATE_GVA_SUCCESS => Ok(gpa), 814 _ => Err(PlatformError::TranslateVirtualAddress(anyhow!(result_code))), 815 } 816 } 817 } 818 819 /// Platform emulation for Hyper-V 820 impl<'a> PlatformEmulator for MshvEmulatorContext<'a> { 821 type CpuState = EmulatorCpuState; 822 823 fn read_memory(&self, gva: u64, data: &mut [u8]) -> Result<(), PlatformError> { 824 let gpa = self.translate(gva)?; 825 debug!( 826 "mshv emulator: memory read {} bytes from [{:#x} -> {:#x}]", 827 data.len(), 828 gva, 829 gpa 830 ); 831 832 if let Some(vm_ops) = &self.vcpu.vm_ops { 833 if vm_ops.guest_mem_read(gpa, data).is_err() { 834 vm_ops 835 .mmio_read(gpa, data) 836 .map_err(|e| PlatformError::MemoryReadFailure(e.into()))?; 837 } 838 } 839 840 Ok(()) 841 } 842 843 fn write_memory(&mut self, gva: u64, data: &[u8]) -> Result<(), PlatformError> { 844 let gpa = self.translate(gva)?; 845 debug!( 846 "mshv emulator: memory write {} bytes at [{:#x} -> {:#x}]", 847 data.len(), 848 gva, 849 gpa 850 ); 851 852 if let Some(vm_ops) = &self.vcpu.vm_ops { 853 if vm_ops.guest_mem_write(gpa, data).is_err() { 854 vm_ops 855 .mmio_write(gpa, data) 856 .map_err(|e| PlatformError::MemoryWriteFailure(e.into()))?; 857 } 858 } 859 860 Ok(()) 861 } 862 863 fn cpu_state(&self, cpu_id: usize) -> Result<Self::CpuState, PlatformError> { 864 if cpu_id != self.vcpu.vp_index as usize { 865 return Err(PlatformError::GetCpuStateFailure(anyhow!( 866 "CPU id mismatch {:?} {:?}", 867 cpu_id, 868 self.vcpu.vp_index 869 ))); 870 } 871 872 let regs = self 873 .vcpu 874 .get_regs() 875 .map_err(|e| PlatformError::GetCpuStateFailure(e.into()))?; 876 let sregs = self 877 .vcpu 878 .get_sregs() 879 .map_err(|e| PlatformError::GetCpuStateFailure(e.into()))?; 880 881 debug!("mshv emulator: Getting new CPU state"); 882 debug!("mshv emulator: {:#x?}", regs); 883 884 Ok(EmulatorCpuState { regs, sregs }) 885 } 886 887 fn set_cpu_state(&self, cpu_id: usize, state: Self::CpuState) -> Result<(), PlatformError> { 888 if cpu_id != self.vcpu.vp_index as usize { 889 return Err(PlatformError::SetCpuStateFailure(anyhow!( 890 "CPU id mismatch {:?} {:?}", 891 cpu_id, 892 self.vcpu.vp_index 893 ))); 894 } 895 896 debug!("mshv emulator: Setting new CPU state"); 897 debug!("mshv emulator: {:#x?}", state.regs); 898 899 self.vcpu 900 .set_regs(&state.regs) 901 .map_err(|e| PlatformError::SetCpuStateFailure(e.into()))?; 902 self.vcpu 903 .set_sregs(&state.sregs) 904 .map_err(|e| PlatformError::SetCpuStateFailure(e.into())) 905 } 906 907 fn gva_to_gpa(&self, gva: u64) -> Result<u64, PlatformError> { 908 self.translate(gva) 909 } 910 911 fn fetch(&self, _ip: u64, _instruction_bytes: &mut [u8]) -> Result<(), PlatformError> { 912 Err(PlatformError::MemoryReadFailure(anyhow!("unimplemented"))) 913 } 914 } 915 916 /// Wrapper over Mshv VM ioctls. 917 pub struct MshvVm { 918 fd: Arc<VmFd>, 919 msrs: Vec<MsrEntry>, 920 dirty_log_slots: Arc<RwLock<HashMap<u64, MshvDirtyLogSlot>>>, 921 } 922 923 impl MshvVm { 924 /// 925 /// Creates an in-kernel device. 926 /// 927 /// See the documentation for `MSHV_CREATE_DEVICE`. 928 fn create_device(&self, device: &mut CreateDevice) -> vm::Result<VfioDeviceFd> { 929 let device_fd = self 930 .fd 931 .create_device(device) 932 .map_err(|e| vm::HypervisorVmError::CreateDevice(e.into()))?; 933 Ok(VfioDeviceFd::new_from_mshv(device_fd)) 934 } 935 } 936 937 /// 938 /// Implementation of Vm trait for Mshv 939 /// 940 /// # Examples 941 /// 942 /// ``` 943 /// # extern crate hypervisor; 944 /// # use hypervisor::mshv::MshvHypervisor; 945 /// # use std::sync::Arc; 946 /// let mshv = MshvHypervisor::new().unwrap(); 947 /// let hypervisor = Arc::new(mshv); 948 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed"); 949 /// ``` 950 impl vm::Vm for MshvVm { 951 #[cfg(target_arch = "x86_64")] 952 /// 953 /// Sets the address of the one-page region in the VM's address space. 954 /// 955 fn set_identity_map_address(&self, _address: u64) -> vm::Result<()> { 956 Ok(()) 957 } 958 #[cfg(target_arch = "x86_64")] 959 /// 960 /// Sets the address of the three-page region in the VM's address space. 961 /// 962 fn set_tss_address(&self, _offset: usize) -> vm::Result<()> { 963 Ok(()) 964 } 965 /// 966 /// Creates an in-kernel interrupt controller. 967 /// 968 fn create_irq_chip(&self) -> vm::Result<()> { 969 Ok(()) 970 } 971 /// 972 /// Registers an event that will, when signaled, trigger the `gsi` IRQ. 973 /// 974 fn register_irqfd(&self, fd: &EventFd, gsi: u32) -> vm::Result<()> { 975 debug!("register_irqfd fd {} gsi {}", fd.as_raw_fd(), gsi); 976 977 self.fd 978 .register_irqfd(fd, gsi) 979 .map_err(|e| vm::HypervisorVmError::RegisterIrqFd(e.into()))?; 980 981 Ok(()) 982 } 983 /// 984 /// Unregisters an event that will, when signaled, trigger the `gsi` IRQ. 985 /// 986 fn unregister_irqfd(&self, fd: &EventFd, gsi: u32) -> vm::Result<()> { 987 debug!("unregister_irqfd fd {} gsi {}", fd.as_raw_fd(), gsi); 988 989 self.fd 990 .unregister_irqfd(fd, gsi) 991 .map_err(|e| vm::HypervisorVmError::UnregisterIrqFd(e.into()))?; 992 993 Ok(()) 994 } 995 /// 996 /// Creates a VcpuFd object from a vcpu RawFd. 997 /// 998 fn create_vcpu( 999 &self, 1000 id: u8, 1001 vm_ops: Option<Arc<dyn VmOps>>, 1002 ) -> vm::Result<Arc<dyn cpu::Vcpu>> { 1003 let vcpu_fd = self 1004 .fd 1005 .create_vcpu(id) 1006 .map_err(|e| vm::HypervisorVmError::CreateVcpu(e.into()))?; 1007 let vcpu = MshvVcpu { 1008 fd: vcpu_fd, 1009 vp_index: id, 1010 cpuid: Vec::new(), 1011 msrs: self.msrs.clone(), 1012 vm_ops, 1013 }; 1014 Ok(Arc::new(vcpu)) 1015 } 1016 #[cfg(target_arch = "x86_64")] 1017 fn enable_split_irq(&self) -> vm::Result<()> { 1018 Ok(()) 1019 } 1020 #[cfg(target_arch = "x86_64")] 1021 fn enable_sgx_attribute(&self, _file: File) -> vm::Result<()> { 1022 Ok(()) 1023 } 1024 fn register_ioevent( 1025 &self, 1026 fd: &EventFd, 1027 addr: &IoEventAddress, 1028 datamatch: Option<DataMatch>, 1029 ) -> vm::Result<()> { 1030 let addr = &mshv_ioctls::IoEventAddress::from(*addr); 1031 debug!( 1032 "register_ioevent fd {} addr {:x?} datamatch {:?}", 1033 fd.as_raw_fd(), 1034 addr, 1035 datamatch 1036 ); 1037 if let Some(dm) = datamatch { 1038 match dm { 1039 vm::DataMatch::DataMatch32(mshv_dm32) => self 1040 .fd 1041 .register_ioevent(fd, addr, mshv_dm32) 1042 .map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into())), 1043 vm::DataMatch::DataMatch64(mshv_dm64) => self 1044 .fd 1045 .register_ioevent(fd, addr, mshv_dm64) 1046 .map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into())), 1047 } 1048 } else { 1049 self.fd 1050 .register_ioevent(fd, addr, NoDatamatch) 1051 .map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into())) 1052 } 1053 } 1054 /// Unregister an event from a certain address it has been previously registered to. 1055 fn unregister_ioevent(&self, fd: &EventFd, addr: &IoEventAddress) -> vm::Result<()> { 1056 let addr = &mshv_ioctls::IoEventAddress::from(*addr); 1057 debug!("unregister_ioevent fd {} addr {:x?}", fd.as_raw_fd(), addr); 1058 1059 self.fd 1060 .unregister_ioevent(fd, addr, NoDatamatch) 1061 .map_err(|e| vm::HypervisorVmError::UnregisterIoEvent(e.into())) 1062 } 1063 1064 /// Creates a guest physical memory region. 1065 fn create_user_memory_region(&self, user_memory_region: UserMemoryRegion) -> vm::Result<()> { 1066 let user_memory_region: mshv_user_mem_region = user_memory_region.into(); 1067 // No matter read only or not we keep track the slots. 1068 // For readonly hypervisor can enable the dirty bits, 1069 // but a VM exit happens before setting the dirty bits 1070 self.dirty_log_slots.write().unwrap().insert( 1071 user_memory_region.guest_pfn, 1072 MshvDirtyLogSlot { 1073 guest_pfn: user_memory_region.guest_pfn, 1074 memory_size: user_memory_region.size, 1075 }, 1076 ); 1077 1078 self.fd 1079 .map_user_memory(user_memory_region) 1080 .map_err(|e| vm::HypervisorVmError::CreateUserMemory(e.into()))?; 1081 Ok(()) 1082 } 1083 1084 /// Removes a guest physical memory region. 1085 fn remove_user_memory_region(&self, user_memory_region: UserMemoryRegion) -> vm::Result<()> { 1086 let user_memory_region: mshv_user_mem_region = user_memory_region.into(); 1087 // Remove the corresponding entry from "self.dirty_log_slots" if needed 1088 self.dirty_log_slots 1089 .write() 1090 .unwrap() 1091 .remove(&user_memory_region.guest_pfn); 1092 1093 self.fd 1094 .unmap_user_memory(user_memory_region) 1095 .map_err(|e| vm::HypervisorVmError::RemoveUserMemory(e.into()))?; 1096 Ok(()) 1097 } 1098 1099 fn make_user_memory_region( 1100 &self, 1101 _slot: u32, 1102 guest_phys_addr: u64, 1103 memory_size: u64, 1104 userspace_addr: u64, 1105 readonly: bool, 1106 _log_dirty_pages: bool, 1107 ) -> UserMemoryRegion { 1108 let mut flags = HV_MAP_GPA_READABLE | HV_MAP_GPA_EXECUTABLE; 1109 if !readonly { 1110 flags |= HV_MAP_GPA_WRITABLE; 1111 } 1112 1113 mshv_user_mem_region { 1114 flags, 1115 guest_pfn: guest_phys_addr >> PAGE_SHIFT, 1116 size: memory_size, 1117 userspace_addr, 1118 } 1119 .into() 1120 } 1121 1122 fn create_passthrough_device(&self) -> vm::Result<VfioDeviceFd> { 1123 let mut vfio_dev = mshv_create_device { 1124 type_: mshv_device_type_MSHV_DEV_TYPE_VFIO, 1125 fd: 0, 1126 flags: 0, 1127 }; 1128 1129 self.create_device(&mut vfio_dev) 1130 .map_err(|e| vm::HypervisorVmError::CreatePassthroughDevice(e.into())) 1131 } 1132 1133 /// 1134 /// Constructs a routing entry 1135 /// 1136 fn make_routing_entry(&self, gsi: u32, config: &InterruptSourceConfig) -> IrqRoutingEntry { 1137 match config { 1138 InterruptSourceConfig::MsiIrq(cfg) => mshv_msi_routing_entry { 1139 gsi, 1140 address_lo: cfg.low_addr, 1141 address_hi: cfg.high_addr, 1142 data: cfg.data, 1143 } 1144 .into(), 1145 _ => { 1146 unreachable!() 1147 } 1148 } 1149 } 1150 1151 fn set_gsi_routing(&self, entries: &[IrqRoutingEntry]) -> vm::Result<()> { 1152 let mut msi_routing = 1153 vec_with_array_field::<mshv_msi_routing, mshv_msi_routing_entry>(entries.len()); 1154 msi_routing[0].nr = entries.len() as u32; 1155 1156 let entries: Vec<mshv_msi_routing_entry> = entries 1157 .iter() 1158 .map(|entry| match entry { 1159 IrqRoutingEntry::Mshv(e) => *e, 1160 #[allow(unreachable_patterns)] 1161 _ => panic!("IrqRoutingEntry type is wrong"), 1162 }) 1163 .collect(); 1164 1165 // SAFETY: msi_routing initialized with entries.len() and now it is being turned into 1166 // entries_slice with entries.len() again. It is guaranteed to be large enough to hold 1167 // everything from entries. 1168 unsafe { 1169 let entries_slice: &mut [mshv_msi_routing_entry] = 1170 msi_routing[0].entries.as_mut_slice(entries.len()); 1171 entries_slice.copy_from_slice(&entries); 1172 } 1173 1174 self.fd 1175 .set_msi_routing(&msi_routing[0]) 1176 .map_err(|e| vm::HypervisorVmError::SetGsiRouting(e.into())) 1177 } 1178 /// 1179 /// Start logging dirty pages 1180 /// 1181 fn start_dirty_log(&self) -> vm::Result<()> { 1182 self.fd 1183 .enable_dirty_page_tracking() 1184 .map_err(|e| vm::HypervisorVmError::StartDirtyLog(e.into())) 1185 } 1186 /// 1187 /// Stop logging dirty pages 1188 /// 1189 fn stop_dirty_log(&self) -> vm::Result<()> { 1190 let dirty_log_slots = self.dirty_log_slots.read().unwrap(); 1191 // Before disabling the dirty page tracking we need 1192 // to set the dirty bits in the Hypervisor 1193 // This is a requirement from Microsoft Hypervisor 1194 for (_, s) in dirty_log_slots.iter() { 1195 self.fd 1196 .get_dirty_log(s.guest_pfn, s.memory_size as usize, DIRTY_BITMAP_SET_DIRTY) 1197 .map_err(|e| vm::HypervisorVmError::StartDirtyLog(e.into()))?; 1198 } 1199 self.fd 1200 .disable_dirty_page_tracking() 1201 .map_err(|e| vm::HypervisorVmError::StartDirtyLog(e.into()))?; 1202 Ok(()) 1203 } 1204 /// 1205 /// Get dirty pages bitmap (one bit per page) 1206 /// 1207 fn get_dirty_log(&self, _slot: u32, base_gpa: u64, memory_size: u64) -> vm::Result<Vec<u64>> { 1208 self.fd 1209 .get_dirty_log( 1210 base_gpa >> PAGE_SHIFT, 1211 memory_size as usize, 1212 DIRTY_BITMAP_CLEAR_DIRTY, 1213 ) 1214 .map_err(|e| vm::HypervisorVmError::GetDirtyLog(e.into())) 1215 } 1216 /// Retrieve guest clock. 1217 #[cfg(target_arch = "x86_64")] 1218 fn get_clock(&self) -> vm::Result<ClockData> { 1219 Ok(ClockData::Mshv) 1220 } 1221 /// Set guest clock. 1222 #[cfg(target_arch = "x86_64")] 1223 fn set_clock(&self, _data: &ClockData) -> vm::Result<()> { 1224 Ok(()) 1225 } 1226 /// Downcast to the underlying MshvVm type 1227 fn as_any(&self) -> &dyn Any { 1228 self 1229 } 1230 } 1231