1 // SPDX-License-Identifier: Apache-2.0 OR BSD-3-Clause 2 // 3 // Copyright © 2020, Microsoft Corporation 4 // 5 6 #![allow(dead_code)] 7 #![allow(unused_imports)] 8 #![allow(unused_variables)] 9 #![allow(unused_macros)] 10 #![allow(non_upper_case_globals)] 11 12 use crate::arch::emulator::{EmulationError, PlatformEmulator, PlatformError}; 13 #[cfg(target_arch = "x86_64")] 14 use crate::arch::x86::emulator::{Emulator, EmulatorCpuState}; 15 use crate::cpu; 16 use crate::cpu::Vcpu; 17 use crate::hypervisor; 18 use crate::vm::{self, VmmOps}; 19 pub use mshv_bindings::*; 20 use mshv_ioctls::{set_registers_64, InterruptRequest, Mshv, VcpuFd, VmFd}; 21 use serde_derive::{Deserialize, Serialize}; 22 use std::sync::Arc; 23 use vm::DataMatch; 24 // x86_64 dependencies 25 #[cfg(target_arch = "x86_64")] 26 pub mod x86_64; 27 use crate::device; 28 use std::convert::TryInto; 29 use vmm_sys_util::eventfd::EventFd; 30 #[cfg(target_arch = "x86_64")] 31 pub use x86_64::VcpuMshvState as CpuState; 32 #[cfg(target_arch = "x86_64")] 33 pub use x86_64::*; 34 // Wei: for emulating irqfd and ioeventfd 35 use std::collections::HashMap; 36 use std::fs::File; 37 use std::io; 38 use std::os::unix::io::{AsRawFd, FromRawFd, RawFd}; 39 use std::sync::{Mutex, RwLock}; 40 use std::thread; 41 42 pub const PAGE_SHIFT: usize = 12; 43 44 #[derive(Debug, Default, Copy, Clone, Serialize, Deserialize)] 45 pub struct HvState { 46 hypercall_page: u64, 47 } 48 49 pub use HvState as VmState; 50 51 struct IrqfdCtrlEpollHandler { 52 vm_fd: Arc<VmFd>, /* For issuing hypercall */ 53 irqfd: EventFd, /* Registered by caller */ 54 kill: EventFd, /* Created by us, signal thread exit */ 55 epoll_fd: RawFd, /* epoll fd */ 56 gsi: u32, 57 gsi_routes: Arc<RwLock<HashMap<u32, MshvIrqRoutingEntry>>>, 58 } 59 60 fn register_listener( 61 epoll_fd: RawFd, 62 fd: RawFd, 63 ev_type: epoll::Events, 64 data: u64, 65 ) -> std::result::Result<(), io::Error> { 66 epoll::ctl( 67 epoll_fd, 68 epoll::ControlOptions::EPOLL_CTL_ADD, 69 fd, 70 epoll::Event::new(ev_type, data), 71 ) 72 } 73 74 const KILL_EVENT: u16 = 1; 75 const IRQFD_EVENT: u16 = 2; 76 77 impl IrqfdCtrlEpollHandler { 78 fn assert_virtual_interrupt(&self, e: &MshvIrqRoutingEntry) -> vm::Result<()> { 79 // GSI routing contains MSI information. 80 // We still need to translate that to APIC ID etc 81 82 debug!("Inject {:x?}", e); 83 84 let MshvIrqRouting::Msi(msi) = e.route; 85 86 /* Make an assumption here ... */ 87 if msi.address_hi != 0 { 88 panic!("MSI high address part is not zero"); 89 } 90 91 let typ = self 92 .get_interrupt_type(self.get_delivery_mode(msi.data)) 93 .unwrap(); 94 let apic_id = self.get_destination(msi.address_lo); 95 let vector = self.get_vector(msi.data); 96 let level_triggered = self.get_trigger_mode(msi.data); 97 let logical_destination_mode = self.get_destination_mode(msi.address_lo); 98 99 debug!( 100 "{:x} {:x} {:x} {} {}", 101 typ, apic_id, vector, level_triggered, logical_destination_mode 102 ); 103 104 let request: InterruptRequest = InterruptRequest { 105 interrupt_type: typ, 106 apic_id, 107 vector: vector.into(), 108 level_triggered, 109 logical_destination_mode, 110 long_mode: false, 111 }; 112 113 self.vm_fd 114 .request_virtual_interrupt(&request) 115 .map_err(|e| vm::HypervisorVmError::AsserttVirtualInterrupt(e.into()))?; 116 117 Ok(()) 118 } 119 fn run_ctrl(&mut self) { 120 self.epoll_fd = epoll::create(true).unwrap(); 121 let epoll_file = unsafe { File::from_raw_fd(self.epoll_fd) }; 122 123 register_listener( 124 epoll_file.as_raw_fd(), 125 self.kill.as_raw_fd(), 126 epoll::Events::EPOLLIN, 127 u64::from(KILL_EVENT), 128 ) 129 .unwrap_or_else(|err| { 130 info!( 131 "IrqfdCtrlEpollHandler: failed to register listener: {:?}", 132 err 133 ); 134 }); 135 136 register_listener( 137 epoll_file.as_raw_fd(), 138 self.irqfd.as_raw_fd(), 139 epoll::Events::EPOLLIN, 140 u64::from(IRQFD_EVENT), 141 ) 142 .unwrap_or_else(|err| { 143 info!( 144 "IrqfdCtrlEpollHandler: failed to register listener: {:?}", 145 err 146 ); 147 }); 148 149 let mut events = vec![epoll::Event::new(epoll::Events::empty(), 0); 2]; 150 151 'epoll: loop { 152 let num_events = match epoll::wait(epoll_file.as_raw_fd(), -1, &mut events[..]) { 153 Ok(res) => res, 154 Err(e) => { 155 if e.kind() == std::io::ErrorKind::Interrupted { 156 continue; 157 } 158 panic!("irqfd epoll ???"); 159 } 160 }; 161 162 for event in events.iter().take(num_events) { 163 let ev_type = event.data as u16; 164 165 match ev_type { 166 KILL_EVENT => { 167 break 'epoll; 168 } 169 IRQFD_EVENT => { 170 debug!("IRQFD_EVENT received, inject to guest"); 171 let _ = self.irqfd.read().unwrap(); 172 let gsi_routes = self.gsi_routes.read().unwrap(); 173 174 if let Some(e) = gsi_routes.get(&self.gsi) { 175 self.assert_virtual_interrupt(&e).unwrap(); 176 } else { 177 debug!("No routing info found for GSI {}", self.gsi); 178 } 179 } 180 _ => { 181 error!("Unknown event"); 182 } 183 } 184 } 185 } 186 } 187 188 /// 189 /// See Intel SDM vol3 10.11.1 190 /// We assume APIC ID and Hyper-V Vcpu ID are the same value 191 /// 192 193 fn get_destination(&self, message_address: u32) -> u64 { 194 ((message_address >> 12) & 0xff).into() 195 } 196 197 fn get_destination_mode(&self, message_address: u32) -> bool { 198 if (message_address >> 2) & 0x1 == 0x1 { 199 return true; 200 } 201 202 false 203 } 204 205 fn get_vector(&self, message_data: u32) -> u8 { 206 (message_data & 0xff) as u8 207 } 208 209 /// 210 /// True means level triggered 211 /// 212 fn get_trigger_mode(&self, message_data: u32) -> bool { 213 if (message_data >> 15) & 0x1 == 0x1 { 214 return true; 215 } 216 217 false 218 } 219 220 fn get_delivery_mode(&self, message_data: u32) -> u8 { 221 ((message_data & 0x700) >> 8) as u8 222 } 223 /// 224 /// Translate from architectural defined delivery mode to Hyper-V type 225 /// See Intel SDM vol3 10.11.2 226 /// 227 fn get_interrupt_type(&self, delivery_mode: u8) -> Option<hv_interrupt_type> { 228 match delivery_mode { 229 0 => Some(hv_interrupt_type_HV_X64_INTERRUPT_TYPE_FIXED), 230 1 => Some(hv_interrupt_type_HV_X64_INTERRUPT_TYPE_LOWESTPRIORITY), 231 2 => Some(hv_interrupt_type_HV_X64_INTERRUPT_TYPE_SMI), 232 4 => Some(hv_interrupt_type_HV_X64_INTERRUPT_TYPE_NMI), 233 5 => Some(hv_interrupt_type_HV_X64_INTERRUPT_TYPE_INIT), 234 7 => Some(hv_interrupt_type_HV_X64_INTERRUPT_TYPE_EXTINT), 235 _ => None, 236 } 237 } 238 } 239 240 /// Wrapper over mshv system ioctls. 241 pub struct MshvHypervisor { 242 mshv: Mshv, 243 } 244 245 impl MshvHypervisor { 246 /// Create a hypervisor based on Mshv 247 pub fn new() -> hypervisor::Result<MshvHypervisor> { 248 let mshv_obj = 249 Mshv::new().map_err(|e| hypervisor::HypervisorError::HypervisorCreate(e.into()))?; 250 Ok(MshvHypervisor { mshv: mshv_obj }) 251 } 252 } 253 /// Implementation of Hypervisor trait for Mshv 254 /// Example: 255 /// #[cfg(feature = "mshv")] 256 /// extern crate hypervisor 257 /// let mshv = hypervisor::mshv::MshvHypervisor::new().unwrap(); 258 /// let hypervisor: Arc<dyn hypervisor::Hypervisor> = Arc::new(mshv); 259 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed"); 260 /// 261 impl hypervisor::Hypervisor for MshvHypervisor { 262 /// Create a mshv vm object and return the object as Vm trait object 263 /// Example 264 /// # extern crate hypervisor; 265 /// # use hypervisor::MshvHypervisor; 266 /// use hypervisor::MshvVm; 267 /// let hypervisor = MshvHypervisor::new().unwrap(); 268 /// let vm = hypervisor.create_vm().unwrap() 269 /// 270 fn create_vm(&self) -> hypervisor::Result<Arc<dyn vm::Vm>> { 271 let fd: VmFd; 272 loop { 273 match self.mshv.create_vm() { 274 Ok(res) => fd = res, 275 Err(e) => { 276 if e.errno() == libc::EINTR { 277 // If the error returned is EINTR, which means the 278 // ioctl has been interrupted, we have to retry as 279 // this can't be considered as a regular error. 280 continue; 281 } else { 282 return Err(hypervisor::HypervisorError::VmCreate(e.into())); 283 } 284 } 285 } 286 break; 287 } 288 289 let msr_list = self.get_msr_list()?; 290 let num_msrs = msr_list.as_fam_struct_ref().nmsrs as usize; 291 let mut msrs = MsrEntries::new(num_msrs); 292 let indices = msr_list.as_slice(); 293 let msr_entries = msrs.as_mut_slice(); 294 for (pos, index) in indices.iter().enumerate() { 295 msr_entries[pos].index = *index; 296 } 297 let vm_fd = Arc::new(fd); 298 299 let irqfds = Mutex::new(HashMap::new()); 300 let ioeventfds = Arc::new(RwLock::new(HashMap::new())); 301 let gsi_routes = Arc::new(RwLock::new(HashMap::new())); 302 303 Ok(Arc::new(MshvVm { 304 fd: vm_fd, 305 msrs, 306 irqfds, 307 ioeventfds, 308 gsi_routes, 309 hv_state: hv_state_init(), 310 vmmops: None, 311 })) 312 } 313 /// 314 /// Get the supported CpuID 315 /// 316 fn get_cpuid(&self) -> hypervisor::Result<CpuId> { 317 Ok(CpuId::new(1 as usize)) 318 } 319 #[cfg(target_arch = "x86_64")] 320 /// 321 /// Retrieve the list of MSRs supported by KVM. 322 /// 323 fn get_msr_list(&self) -> hypervisor::Result<MsrList> { 324 self.mshv 325 .get_msr_index_list() 326 .map_err(|e| hypervisor::HypervisorError::GetMsrList(e.into())) 327 } 328 } 329 330 #[derive(Clone)] 331 // A software emulated TLB. 332 // This is mostly used by the instruction emulator to cache gva to gpa translations 333 // passed from the hypervisor. 334 struct SoftTLB { 335 addr_map: HashMap<u64, u64>, 336 } 337 338 impl SoftTLB { 339 fn new() -> SoftTLB { 340 SoftTLB { 341 addr_map: HashMap::new(), 342 } 343 } 344 345 // Adds a gva -> gpa mapping into the TLB. 346 fn add_mapping(&mut self, gva: u64, gpa: u64) -> Result<(), PlatformError> { 347 *self.addr_map.entry(gva).or_insert(gpa) = gpa; 348 Ok(()) 349 } 350 351 // Do the actual gva -> gpa translation 352 fn translate(&self, gva: u64) -> Result<u64, PlatformError> { 353 self.addr_map 354 .get(&gva) 355 .ok_or_else(|| PlatformError::UnmappedGVA(anyhow!("{:#?}", gva))) 356 .map(|v| *v) 357 358 // TODO Check if we could fallback to e.g. an hypercall for doing 359 // the translation for us. 360 } 361 362 // FLush the TLB, all mappings are removed. 363 fn flush(&mut self) -> Result<(), PlatformError> { 364 self.addr_map.clear(); 365 366 Ok(()) 367 } 368 } 369 370 #[allow(clippy::type_complexity)] 371 /// Vcpu struct for Microsoft Hypervisor 372 pub struct MshvVcpu { 373 fd: VcpuFd, 374 vp_index: u8, 375 cpuid: CpuId, 376 msrs: MsrEntries, 377 ioeventfds: Arc<RwLock<HashMap<IoEventAddress, (Option<DataMatch>, EventFd)>>>, 378 gsi_routes: Arc<RwLock<HashMap<u32, MshvIrqRoutingEntry>>>, 379 hv_state: Arc<RwLock<HvState>>, // Mshv State 380 vmmops: Option<Arc<Box<dyn vm::VmmOps>>>, 381 } 382 383 /// Implementation of Vcpu trait for Microsoft Hypervisor 384 /// Example: 385 /// #[cfg(feature = "mshv")] 386 /// extern crate hypervisor 387 /// let mshv = hypervisor::mshv::MshvHypervisor::new().unwrap(); 388 /// let hypervisor: Arc<dyn hypervisor::Hypervisor> = Arc::new(mshv); 389 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed"); 390 /// let vcpu = vm.create_vcpu(0).unwrap(); 391 /// vcpu.get/set().unwrap() 392 /// 393 impl cpu::Vcpu for MshvVcpu { 394 #[cfg(target_arch = "x86_64")] 395 /// 396 /// Returns the vCPU general purpose registers. 397 /// 398 fn get_regs(&self) -> cpu::Result<StandardRegisters> { 399 self.fd 400 .get_regs() 401 .map_err(|e| cpu::HypervisorCpuError::GetStandardRegs(e.into())) 402 } 403 #[cfg(target_arch = "x86_64")] 404 /// 405 /// Sets the vCPU general purpose registers. 406 /// 407 fn set_regs(&self, regs: &StandardRegisters) -> cpu::Result<()> { 408 self.fd 409 .set_regs(regs) 410 .map_err(|e| cpu::HypervisorCpuError::SetStandardRegs(e.into())) 411 } 412 #[cfg(target_arch = "x86_64")] 413 /// 414 /// Returns the vCPU special registers. 415 /// 416 fn get_sregs(&self) -> cpu::Result<SpecialRegisters> { 417 self.fd 418 .get_sregs() 419 .map_err(|e| cpu::HypervisorCpuError::GetSpecialRegs(e.into())) 420 } 421 #[cfg(target_arch = "x86_64")] 422 /// 423 /// Sets the vCPU special registers. 424 /// 425 fn set_sregs(&self, sregs: &SpecialRegisters) -> cpu::Result<()> { 426 self.fd 427 .set_sregs(sregs) 428 .map_err(|e| cpu::HypervisorCpuError::SetSpecialRegs(e.into())) 429 } 430 #[cfg(target_arch = "x86_64")] 431 /// 432 /// Returns the floating point state (FPU) from the vCPU. 433 /// 434 fn get_fpu(&self) -> cpu::Result<FpuState> { 435 self.fd 436 .get_fpu() 437 .map_err(|e| cpu::HypervisorCpuError::GetFloatingPointRegs(e.into())) 438 } 439 #[cfg(target_arch = "x86_64")] 440 /// 441 /// Set the floating point state (FPU) of a vCPU. 442 /// 443 fn set_fpu(&self, fpu: &FpuState) -> cpu::Result<()> { 444 self.fd 445 .set_fpu(fpu) 446 .map_err(|e| cpu::HypervisorCpuError::SetFloatingPointRegs(e.into())) 447 } 448 449 #[cfg(target_arch = "x86_64")] 450 /// 451 /// Returns the model-specific registers (MSR) for this vCPU. 452 /// 453 fn get_msrs(&self, msrs: &mut MsrEntries) -> cpu::Result<usize> { 454 self.fd 455 .get_msrs(msrs) 456 .map_err(|e| cpu::HypervisorCpuError::GetMsrEntries(e.into())) 457 } 458 #[cfg(target_arch = "x86_64")] 459 /// 460 /// Setup the model-specific registers (MSR) for this vCPU. 461 /// Returns the number of MSR entries actually written. 462 /// 463 fn set_msrs(&self, msrs: &MsrEntries) -> cpu::Result<usize> { 464 self.fd 465 .set_msrs(msrs) 466 .map_err(|e| cpu::HypervisorCpuError::SetMsrEntries(e.into())) 467 } 468 469 #[cfg(target_arch = "x86_64")] 470 /// 471 /// X86 specific call that returns the vcpu's current "xcrs". 472 /// 473 fn get_xcrs(&self) -> cpu::Result<ExtendedControlRegisters> { 474 self.fd 475 .get_xcrs() 476 .map_err(|e| cpu::HypervisorCpuError::GetXcsr(e.into())) 477 } 478 #[cfg(target_arch = "x86_64")] 479 /// 480 /// X86 specific call that sets the vcpu's current "xcrs". 481 /// 482 fn set_xcrs(&self, xcrs: &ExtendedControlRegisters) -> cpu::Result<()> { 483 self.fd 484 .set_xcrs(&xcrs) 485 .map_err(|e| cpu::HypervisorCpuError::SetXcsr(e.into())) 486 } 487 #[cfg(target_arch = "x86_64")] 488 /// 489 /// Returns currently pending exceptions, interrupts, and NMIs as well as related 490 /// states of the vcpu. 491 /// 492 fn get_vcpu_events(&self) -> cpu::Result<VcpuEvents> { 493 self.fd 494 .get_vcpu_events() 495 .map_err(|e| cpu::HypervisorCpuError::GetVcpuEvents(e.into())) 496 } 497 #[cfg(target_arch = "x86_64")] 498 /// 499 /// Sets pending exceptions, interrupts, and NMIs as well as related states 500 /// of the vcpu. 501 /// 502 fn set_vcpu_events(&self, events: &VcpuEvents) -> cpu::Result<()> { 503 self.fd 504 .set_vcpu_events(events) 505 .map_err(|e| cpu::HypervisorCpuError::SetVcpuEvents(e.into())) 506 } 507 #[cfg(target_arch = "x86_64")] 508 /// 509 /// X86 specific call to enable HyperV SynIC 510 /// 511 fn enable_hyperv_synic(&self) -> cpu::Result<()> { 512 /* We always have SynIC enabled on MSHV */ 513 Ok(()) 514 } 515 fn run(&self) -> std::result::Result<cpu::VmExit, cpu::HypervisorCpuError> { 516 // Safe because this is just only done during initialization. 517 // TODO don't zero it everytime we enter this function. 518 let hv_message: hv_message = unsafe { std::mem::zeroed() }; 519 match self.fd.run(hv_message) { 520 Ok(x) => match x.header.message_type { 521 hv_message_type_HVMSG_X64_HALT => { 522 debug!("HALT"); 523 Ok(cpu::VmExit::Reset) 524 } 525 hv_message_type_HVMSG_UNRECOVERABLE_EXCEPTION => { 526 warn!("TRIPLE FAULT"); 527 Ok(cpu::VmExit::Shutdown) 528 } 529 hv_message_type_HVMSG_X64_IO_PORT_INTERCEPT => { 530 let info = x.to_ioport_info().unwrap(); 531 let access_info = info.access_info; 532 if unsafe { access_info.__bindgen_anon_1.string_op() } == 1 { 533 panic!("String IN/OUT not supported"); 534 } 535 if unsafe { access_info.__bindgen_anon_1.rep_prefix() } == 1 { 536 panic!("Rep IN/OUT not supported"); 537 } 538 let len = unsafe { access_info.__bindgen_anon_1.access_size() } as usize; 539 let is_write = info.header.intercept_access_type == 1; 540 let port = info.port_number; 541 let mut data: [u8; 4] = [0; 4]; 542 let mut ret_rax = info.rax; 543 544 if is_write { 545 let data = (info.rax as u32).to_le_bytes(); 546 if let Some(vmmops) = &self.vmmops { 547 vmmops 548 .pio_write(port.into(), &data[0..len]) 549 .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()))?; 550 } 551 } else { 552 if let Some(vmmops) = &self.vmmops { 553 vmmops 554 .pio_read(port.into(), &mut data[0..len]) 555 .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()))?; 556 } 557 558 let v = u32::from_le_bytes(data); 559 /* Preserve high bits in EAX but clear out high bits in RAX */ 560 let mask = 0xffffffff >> (32 - len * 8); 561 let eax = (info.rax as u32 & !mask) | (v & mask); 562 ret_rax = eax as u64; 563 } 564 565 let insn_len = info.header.instruction_length() as u64; 566 567 /* Advance RIP and update RAX */ 568 let arr_reg_name_value = [ 569 ( 570 hv_register_name::HV_X64_REGISTER_RIP, 571 info.header.rip + insn_len, 572 ), 573 (hv_register_name::HV_X64_REGISTER_RAX, ret_rax), 574 ]; 575 set_registers_64!(self.fd, arr_reg_name_value) 576 .map_err(|e| cpu::HypervisorCpuError::SetRegister(e.into()))?; 577 Ok(cpu::VmExit::Ignore) 578 } 579 hv_message_type_HVMSG_UNMAPPED_GPA => { 580 let info = x.to_memory_info().unwrap(); 581 let insn_len = info.instruction_byte_count as usize; 582 assert!(insn_len > 0 && insn_len <= 16); 583 584 let mut context = MshvEmulatorContext { 585 vcpu: self, 586 tlb: SoftTLB::new(), 587 }; 588 589 // Add the GVA <-> GPA mapping. 590 context 591 .tlb 592 .add_mapping(info.guest_virtual_address, info.guest_physical_address) 593 .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()))?; 594 595 // Create a new emulator. 596 let mut emul = Emulator::new(&mut context); 597 598 // Emulate the trapped instruction, and only the first one. 599 let new_state = emul 600 .emulate_first_insn(self.vp_index as usize, &info.instruction_bytes) 601 .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()))?; 602 603 // Set CPU state back. 604 context 605 .set_cpu_state(self.vp_index as usize, new_state) 606 .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()))?; 607 608 Ok(cpu::VmExit::Ignore) 609 } 610 hv_message_type_HVMSG_X64_CPUID_INTERCEPT => { 611 let info = x.to_cpuid_info().unwrap(); 612 debug!("cpuid eax: {:x}", info.rax); 613 Ok(cpu::VmExit::Ignore) 614 } 615 hv_message_type_HVMSG_X64_MSR_INTERCEPT => { 616 let info = x.to_msr_info().unwrap(); 617 if info.header.intercept_access_type == 0 as u8 { 618 debug!("msr read: {:x}", info.msr_number); 619 } else { 620 debug!("msr write: {:x}", info.msr_number); 621 } 622 Ok(cpu::VmExit::Ignore) 623 } 624 hv_message_type_HVMSG_X64_EXCEPTION_INTERCEPT => { 625 //TODO: Handler for VMCALL here. 626 let info = x.to_exception_info().unwrap(); 627 debug!("Exception Info {:?}", info.exception_vector); 628 Ok(cpu::VmExit::Ignore) 629 } 630 exit => Err(cpu::HypervisorCpuError::RunVcpu(anyhow!( 631 "Unhandled VCPU exit {:?}", 632 exit 633 ))), 634 }, 635 636 Err(e) => match e.errno() { 637 libc::EAGAIN | libc::EINTR => Ok(cpu::VmExit::Ignore), 638 _ => Err(cpu::HypervisorCpuError::RunVcpu(anyhow!( 639 "VCPU error {:?}", 640 e 641 ))), 642 }, 643 } 644 } 645 #[cfg(target_arch = "x86_64")] 646 /// 647 /// X86 specific call to setup the CPUID registers. 648 /// 649 fn set_cpuid2(&self, cpuid: &CpuId) -> cpu::Result<()> { 650 Ok(()) 651 } 652 #[cfg(target_arch = "x86_64")] 653 /// 654 /// X86 specific call to retrieve the CPUID registers. 655 /// 656 fn get_cpuid2(&self, num_entries: usize) -> cpu::Result<CpuId> { 657 Ok(self.cpuid.clone()) 658 } 659 #[cfg(target_arch = "x86_64")] 660 /// 661 /// Returns the state of the LAPIC (Local Advanced Programmable Interrupt Controller). 662 /// 663 fn get_lapic(&self) -> cpu::Result<LapicState> { 664 self.fd 665 .get_lapic() 666 .map_err(|e| cpu::HypervisorCpuError::GetlapicState(e.into())) 667 } 668 #[cfg(target_arch = "x86_64")] 669 /// 670 /// Sets the state of the LAPIC (Local Advanced Programmable Interrupt Controller). 671 /// 672 fn set_lapic(&self, lapic: &LapicState) -> cpu::Result<()> { 673 self.fd 674 .set_lapic(lapic) 675 .map_err(|e| cpu::HypervisorCpuError::SetLapicState(e.into())) 676 } 677 #[cfg(target_arch = "x86_64")] 678 /// 679 /// X86 specific call that returns the vcpu's current "xsave struct". 680 /// 681 fn get_xsave(&self) -> cpu::Result<Xsave> { 682 self.fd 683 .get_xsave() 684 .map_err(|e| cpu::HypervisorCpuError::GetXsaveState(e.into())) 685 } 686 #[cfg(target_arch = "x86_64")] 687 /// 688 /// X86 specific call that sets the vcpu's current "xsave struct". 689 /// 690 fn set_xsave(&self, xsave: &Xsave) -> cpu::Result<()> { 691 self.fd 692 .set_xsave(*xsave) 693 .map_err(|e| cpu::HypervisorCpuError::SetXsaveState(e.into())) 694 } 695 /// 696 /// Set CPU state 697 /// 698 fn set_state(&self, state: &CpuState) -> cpu::Result<()> { 699 self.set_msrs(&state.msrs)?; 700 self.set_vcpu_events(&state.vcpu_events)?; 701 self.set_regs(&state.regs)?; 702 self.set_sregs(&state.sregs)?; 703 self.set_fpu(&state.fpu)?; 704 self.set_xcrs(&state.xcrs)?; 705 self.set_lapic(&state.lapic)?; 706 self.set_xsave(&state.xsave)?; 707 self.fd 708 .set_debug_regs(&state.dbg) 709 .map_err(|e| cpu::HypervisorCpuError::SetDebugRegs(e.into()))?; 710 Ok(()) 711 } 712 /// 713 /// Get CPU State 714 /// 715 fn state(&self) -> cpu::Result<CpuState> { 716 let regs = self.get_regs()?; 717 let sregs = self.get_sregs()?; 718 let xcrs = self.get_xcrs()?; 719 let fpu = self.get_fpu()?; 720 let vcpu_events = self.get_vcpu_events()?; 721 let mut msrs = self.msrs.clone(); 722 self.get_msrs(&mut msrs)?; 723 let lapic = self.get_lapic()?; 724 let xsave = self.get_xsave()?; 725 let dbg = self 726 .fd 727 .get_debug_regs() 728 .map_err(|e| cpu::HypervisorCpuError::GetDebugRegs(e.into()))?; 729 Ok(CpuState { 730 msrs, 731 vcpu_events, 732 regs, 733 sregs, 734 fpu, 735 xcrs, 736 lapic, 737 dbg, 738 xsave, 739 }) 740 } 741 } 742 743 struct MshvEmulatorContext<'a> { 744 vcpu: &'a MshvVcpu, 745 tlb: SoftTLB, 746 } 747 748 /// Platform emulation for Hyper-V 749 impl<'a> PlatformEmulator for MshvEmulatorContext<'a> { 750 type CpuState = EmulatorCpuState; 751 752 fn read_memory(&self, gva: u64, data: &mut [u8]) -> Result<(), PlatformError> { 753 let gpa = self.tlb.translate(gva)?; 754 debug!( 755 "mshv emulator: memory read {} bytes from [{:#x} -> {:#x}]", 756 data.len(), 757 gva, 758 gpa 759 ); 760 761 if let Some(vmmops) = &self.vcpu.vmmops { 762 vmmops 763 .mmio_read(gpa, data) 764 .map_err(|e| PlatformError::MemoryReadFailure(e.into()))?; 765 } 766 767 Ok(()) 768 } 769 770 fn write_memory(&mut self, gva: u64, data: &[u8]) -> Result<(), PlatformError> { 771 let gpa = self.tlb.translate(gva)?; 772 debug!( 773 "mshv emulator: memory write {} bytes at [{:#x} -> {:#x}]", 774 data.len(), 775 gva, 776 gpa 777 ); 778 779 if let Some((datamatch, efd)) = self 780 .vcpu 781 .ioeventfds 782 .read() 783 .unwrap() 784 .get(&IoEventAddress::Mmio(gpa)) 785 { 786 debug!("ioevent {:x} {:x?} {}", gpa, datamatch, efd.as_raw_fd()); 787 788 /* TODO: use datamatch to provide the correct semantics */ 789 efd.write(1).unwrap(); 790 } 791 792 if let Some(vmmops) = &self.vcpu.vmmops { 793 vmmops 794 .mmio_write(gpa, data) 795 .map_err(|e| PlatformError::MemoryWriteFailure(e.into()))?; 796 } 797 798 Ok(()) 799 } 800 801 fn cpu_state(&self, cpu_id: usize) -> Result<Self::CpuState, PlatformError> { 802 if cpu_id != self.vcpu.vp_index as usize { 803 return Err(PlatformError::GetCpuStateFailure(anyhow!( 804 "CPU id mismatch {:?} {:?}", 805 cpu_id, 806 self.vcpu.vp_index 807 ))); 808 } 809 810 let regs = self 811 .vcpu 812 .get_regs() 813 .map_err(|e| PlatformError::GetCpuStateFailure(e.into()))?; 814 let sregs = self 815 .vcpu 816 .get_sregs() 817 .map_err(|e| PlatformError::GetCpuStateFailure(e.into()))?; 818 819 debug!("mshv emulator: Getting new CPU state"); 820 debug!("mshv emulator: {:#x?}", regs); 821 822 Ok(EmulatorCpuState { regs, sregs }) 823 } 824 825 fn set_cpu_state(&self, cpu_id: usize, state: Self::CpuState) -> Result<(), PlatformError> { 826 if cpu_id != self.vcpu.vp_index as usize { 827 return Err(PlatformError::SetCpuStateFailure(anyhow!( 828 "CPU id mismatch {:?} {:?}", 829 cpu_id, 830 self.vcpu.vp_index 831 ))); 832 } 833 834 debug!("mshv emulator: Setting new CPU state"); 835 debug!("mshv emulator: {:#x?}", state.regs); 836 837 self.vcpu 838 .set_regs(&state.regs) 839 .map_err(|e| PlatformError::SetCpuStateFailure(e.into()))?; 840 self.vcpu 841 .set_sregs(&state.sregs) 842 .map_err(|e| PlatformError::SetCpuStateFailure(e.into())) 843 } 844 845 fn gva_to_gpa(&self, gva: u64) -> Result<u64, PlatformError> { 846 self.tlb.translate(gva) 847 } 848 849 fn fetch(&self, ip: u64, instruction_bytes: &mut [u8]) -> Result<(), PlatformError> { 850 Err(PlatformError::MemoryReadFailure(anyhow!("unimplemented"))) 851 } 852 } 853 854 #[allow(clippy::type_complexity)] 855 /// Wrapper over Mshv VM ioctls. 856 pub struct MshvVm { 857 fd: Arc<VmFd>, 858 msrs: MsrEntries, 859 // Emulate irqfd 860 irqfds: Mutex<HashMap<u32, (EventFd, EventFd)>>, 861 // Emulate ioeventfd 862 ioeventfds: Arc<RwLock<HashMap<IoEventAddress, (Option<DataMatch>, EventFd)>>>, 863 // GSI routing information 864 gsi_routes: Arc<RwLock<HashMap<u32, MshvIrqRoutingEntry>>>, 865 // Hypervisor State 866 hv_state: Arc<RwLock<HvState>>, 867 vmmops: Option<Arc<Box<dyn vm::VmmOps>>>, 868 } 869 870 fn hv_state_init() -> Arc<RwLock<HvState>> { 871 Arc::new(RwLock::new(HvState { hypercall_page: 0 })) 872 } 873 874 /// 875 /// Implementation of Vm trait for Mshv 876 /// Example: 877 /// #[cfg(feature = "mshv")] 878 /// # extern crate hypervisor; 879 /// # use hypervisor::MshvHypervisor; 880 /// let mshv = MshvHypervisor::new().unwrap(); 881 /// let hypervisor: Arc<dyn hypervisor::Hypervisor> = Arc::new(mshv); 882 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed"); 883 /// vm.set/get().unwrap() 884 /// 885 impl vm::Vm for MshvVm { 886 #[cfg(target_arch = "x86_64")] 887 /// 888 /// Sets the address of the three-page region in the VM's address space. 889 /// 890 fn set_tss_address(&self, offset: usize) -> vm::Result<()> { 891 Ok(()) 892 } 893 /// 894 /// Creates an in-kernel interrupt controller. 895 /// 896 fn create_irq_chip(&self) -> vm::Result<()> { 897 Ok(()) 898 } 899 /// 900 /// Registers an event that will, when signaled, trigger the `gsi` IRQ. 901 /// 902 fn register_irqfd(&self, fd: &EventFd, gsi: u32) -> vm::Result<()> { 903 let dup_fd = fd.try_clone().unwrap(); 904 let kill_fd = EventFd::new(libc::EFD_NONBLOCK).unwrap(); 905 906 let mut ctrl_handler = IrqfdCtrlEpollHandler { 907 vm_fd: self.fd.clone(), 908 kill: kill_fd.try_clone().unwrap(), 909 irqfd: fd.try_clone().unwrap(), 910 epoll_fd: 0, 911 gsi, 912 gsi_routes: self.gsi_routes.clone(), 913 }; 914 915 debug!("register_irqfd fd {} gsi {}", fd.as_raw_fd(), gsi); 916 917 thread::Builder::new() 918 .name(format!("irqfd_{}", gsi)) 919 .spawn(move || ctrl_handler.run_ctrl()) 920 .unwrap(); 921 922 self.irqfds.lock().unwrap().insert(gsi, (dup_fd, kill_fd)); 923 924 Ok(()) 925 } 926 /// 927 /// Unregisters an event that will, when signaled, trigger the `gsi` IRQ. 928 /// 929 fn unregister_irqfd(&self, _fd: &EventFd, gsi: u32) -> vm::Result<()> { 930 debug!("unregister_irqfd fd {} gsi {}", _fd.as_raw_fd(), gsi); 931 let (_, kill_fd) = self.irqfds.lock().unwrap().remove(&gsi).unwrap(); 932 kill_fd.write(1).unwrap(); 933 Ok(()) 934 } 935 /// 936 /// Creates a VcpuFd object from a vcpu RawFd. 937 /// 938 fn create_vcpu( 939 &self, 940 id: u8, 941 vmmops: Option<Arc<Box<dyn VmmOps>>>, 942 ) -> vm::Result<Arc<dyn cpu::Vcpu>> { 943 let vcpu_fd = self 944 .fd 945 .create_vcpu(id) 946 .map_err(|e| vm::HypervisorVmError::CreateVcpu(e.into()))?; 947 let vcpu = MshvVcpu { 948 fd: vcpu_fd, 949 vp_index: id, 950 cpuid: CpuId::new(1 as usize), 951 msrs: self.msrs.clone(), 952 ioeventfds: self.ioeventfds.clone(), 953 gsi_routes: self.gsi_routes.clone(), 954 hv_state: self.hv_state.clone(), 955 vmmops, 956 }; 957 Ok(Arc::new(vcpu)) 958 } 959 #[cfg(target_arch = "x86_64")] 960 fn enable_split_irq(&self) -> vm::Result<()> { 961 Ok(()) 962 } 963 fn register_ioevent( 964 &self, 965 fd: &EventFd, 966 addr: &IoEventAddress, 967 datamatch: Option<DataMatch>, 968 ) -> vm::Result<()> { 969 let dup_fd = fd.try_clone().unwrap(); 970 971 debug!( 972 "register_ioevent fd {} addr {:x?} datamatch {:?}", 973 fd.as_raw_fd(), 974 addr, 975 datamatch 976 ); 977 978 self.ioeventfds 979 .write() 980 .unwrap() 981 .insert(*addr, (datamatch, dup_fd)); 982 Ok(()) 983 } 984 /// Unregister an event from a certain address it has been previously registered to. 985 fn unregister_ioevent(&self, fd: &EventFd, addr: &IoEventAddress) -> vm::Result<()> { 986 debug!("unregister_ioevent fd {} addr {:x?}", fd.as_raw_fd(), addr); 987 self.ioeventfds.write().unwrap().remove(addr).unwrap(); 988 Ok(()) 989 } 990 991 /// Creates/modifies a guest physical memory slot. 992 fn set_user_memory_region(&self, user_memory_region: MemoryRegion) -> vm::Result<()> { 993 self.fd 994 .map_user_memory(user_memory_region) 995 .map_err(|e| vm::HypervisorVmError::SetUserMemory(e.into()))?; 996 Ok(()) 997 } 998 999 fn make_user_memory_region( 1000 &self, 1001 _slot: u32, 1002 guest_phys_addr: u64, 1003 memory_size: u64, 1004 userspace_addr: u64, 1005 readonly: bool, 1006 log_dirty_pages: bool, 1007 ) -> MemoryRegion { 1008 let mut flags = HV_MAP_GPA_READABLE | HV_MAP_GPA_EXECUTABLE; 1009 if !readonly { 1010 flags |= HV_MAP_GPA_WRITABLE; 1011 } 1012 1013 mshv_user_mem_region { 1014 flags, 1015 guest_pfn: guest_phys_addr >> PAGE_SHIFT, 1016 size: memory_size, 1017 userspace_addr: userspace_addr as u64, 1018 } 1019 } 1020 1021 fn create_passthrough_device(&self) -> vm::Result<Arc<dyn device::Device>> { 1022 Err(vm::HypervisorVmError::CreatePassthroughDevice(anyhow!( 1023 "No passthrough support" 1024 ))) 1025 } 1026 1027 fn set_gsi_routing(&self, irq_routing: &[IrqRoutingEntry]) -> vm::Result<()> { 1028 let mut routes = self.gsi_routes.write().unwrap(); 1029 1030 routes.drain(); 1031 1032 for r in irq_routing { 1033 debug!("gsi routing {:x?}", r); 1034 routes.insert(r.gsi, *r); 1035 } 1036 1037 Ok(()) 1038 } 1039 /// 1040 /// Get the Vm state. Return VM specific data 1041 /// 1042 fn state(&self) -> vm::Result<VmState> { 1043 Ok(*self.hv_state.read().unwrap()) 1044 } 1045 /// 1046 /// Set the VM state 1047 /// 1048 fn set_state(&self, state: VmState) -> vm::Result<()> { 1049 self.hv_state.write().unwrap().hypercall_page = state.hypercall_page; 1050 Ok(()) 1051 } 1052 /// 1053 /// Get dirty pages bitmap (one bit per page) 1054 /// 1055 fn get_dirty_log(&self, slot: u32, memory_size: u64) -> vm::Result<Vec<u64>> { 1056 Err(vm::HypervisorVmError::GetDirtyLog(anyhow!( 1057 "get_dirty_log not implemented" 1058 ))) 1059 } 1060 } 1061 pub use hv_cpuid_entry as CpuIdEntry; 1062 1063 #[derive(Copy, Clone, Debug)] 1064 pub struct MshvIrqRoutingMsi { 1065 pub address_lo: u32, 1066 pub address_hi: u32, 1067 pub data: u32, 1068 } 1069 1070 #[derive(Copy, Clone, Debug)] 1071 pub enum MshvIrqRouting { 1072 Msi(MshvIrqRoutingMsi), 1073 } 1074 1075 #[derive(Copy, Clone, Debug)] 1076 pub struct MshvIrqRoutingEntry { 1077 pub gsi: u32, 1078 pub route: MshvIrqRouting, 1079 } 1080 pub type IrqRoutingEntry = MshvIrqRoutingEntry; 1081 1082 pub const CPUID_FLAG_VALID_INDEX: u32 = 0; 1083