1 // SPDX-License-Identifier: Apache-2.0 OR BSD-3-Clause 2 // 3 // Copyright © 2020, Microsoft Corporation 4 // 5 6 use std::any::Any; 7 use std::collections::HashMap; 8 #[cfg(feature = "sev_snp")] 9 use std::num::NonZeroUsize; 10 use std::sync::{Arc, RwLock}; 11 12 #[cfg(feature = "sev_snp")] 13 use arc_swap::ArcSwap; 14 use mshv_bindings::*; 15 use mshv_ioctls::{set_registers_64, InterruptRequest, Mshv, NoDatamatch, VcpuFd, VmFd, VmType}; 16 use vfio_ioctls::VfioDeviceFd; 17 use vm::DataMatch; 18 #[cfg(feature = "sev_snp")] 19 use vm_memory::bitmap::AtomicBitmap; 20 21 use crate::arch::emulator::PlatformEmulator; 22 #[cfg(target_arch = "x86_64")] 23 use crate::arch::x86::emulator::Emulator; 24 use crate::mshv::emulator::MshvEmulatorContext; 25 use crate::vm::{self, InterruptSourceConfig, VmOps}; 26 use crate::{cpu, hypervisor, vec_with_array_field, HypervisorType}; 27 #[cfg(feature = "sev_snp")] 28 mod snp_constants; 29 // x86_64 dependencies 30 #[cfg(target_arch = "x86_64")] 31 pub mod x86_64; 32 // aarch64 dependencies 33 #[cfg(target_arch = "aarch64")] 34 pub mod aarch64; 35 #[cfg(target_arch = "x86_64")] 36 use std::fs::File; 37 use std::os::unix::io::AsRawFd; 38 #[cfg(target_arch = "aarch64")] 39 use std::sync::Mutex; 40 41 #[cfg(target_arch = "aarch64")] 42 pub use aarch64::VcpuMshvState; 43 #[cfg(feature = "sev_snp")] 44 use igvm_defs::IGVM_VHS_SNP_ID_BLOCK; 45 #[cfg(feature = "sev_snp")] 46 use snp_constants::*; 47 use vmm_sys_util::eventfd::EventFd; 48 #[cfg(target_arch = "x86_64")] 49 pub use x86_64::*; 50 #[cfg(target_arch = "x86_64")] 51 pub use x86_64::{emulator, VcpuMshvState}; 52 /// 53 /// Export generically-named wrappers of mshv-bindings for Unix-based platforms 54 /// 55 pub use { 56 mshv_bindings::mshv_create_device as CreateDevice, 57 mshv_bindings::mshv_device_attr as DeviceAttr, mshv_ioctls, mshv_ioctls::DeviceFd, 58 }; 59 60 #[cfg(target_arch = "x86_64")] 61 use crate::arch::x86::{CpuIdEntry, FpuState, MsrEntry}; 62 #[cfg(target_arch = "x86_64")] 63 use crate::ClockData; 64 use crate::{ 65 CpuState, IoEventAddress, IrqRoutingEntry, MpState, UserMemoryRegion, 66 USER_MEMORY_REGION_ADJUSTABLE, USER_MEMORY_REGION_EXECUTE, USER_MEMORY_REGION_READ, 67 USER_MEMORY_REGION_WRITE, 68 }; 69 70 pub const PAGE_SHIFT: usize = 12; 71 72 impl From<mshv_user_mem_region> for UserMemoryRegion { 73 fn from(region: mshv_user_mem_region) -> Self { 74 let mut flags: u32 = USER_MEMORY_REGION_READ | USER_MEMORY_REGION_ADJUSTABLE; 75 if region.flags & (1 << MSHV_SET_MEM_BIT_WRITABLE) != 0 { 76 flags |= USER_MEMORY_REGION_WRITE; 77 } 78 if region.flags & (1 << MSHV_SET_MEM_BIT_EXECUTABLE) != 0 { 79 flags |= USER_MEMORY_REGION_EXECUTE; 80 } 81 82 UserMemoryRegion { 83 guest_phys_addr: (region.guest_pfn << PAGE_SHIFT as u64) 84 + (region.userspace_addr & ((1 << PAGE_SHIFT) - 1)), 85 memory_size: region.size, 86 userspace_addr: region.userspace_addr, 87 flags, 88 ..Default::default() 89 } 90 } 91 } 92 93 #[cfg(target_arch = "x86_64")] 94 impl From<MshvClockData> for ClockData { 95 fn from(d: MshvClockData) -> Self { 96 ClockData::Mshv(d) 97 } 98 } 99 100 #[cfg(target_arch = "x86_64")] 101 impl From<ClockData> for MshvClockData { 102 fn from(ms: ClockData) -> Self { 103 match ms { 104 ClockData::Mshv(s) => s, 105 /* Needed in case other hypervisors are enabled */ 106 #[allow(unreachable_patterns)] 107 _ => unreachable!("MSHV clock data is not valid"), 108 } 109 } 110 } 111 112 impl From<UserMemoryRegion> for mshv_user_mem_region { 113 fn from(region: UserMemoryRegion) -> Self { 114 let mut flags: u8 = 0; 115 if region.flags & USER_MEMORY_REGION_WRITE != 0 { 116 flags |= 1 << MSHV_SET_MEM_BIT_WRITABLE; 117 } 118 if region.flags & USER_MEMORY_REGION_EXECUTE != 0 { 119 flags |= 1 << MSHV_SET_MEM_BIT_EXECUTABLE; 120 } 121 122 mshv_user_mem_region { 123 guest_pfn: region.guest_phys_addr >> PAGE_SHIFT, 124 size: region.memory_size, 125 userspace_addr: region.userspace_addr, 126 flags, 127 ..Default::default() 128 } 129 } 130 } 131 132 impl From<mshv_ioctls::IoEventAddress> for IoEventAddress { 133 fn from(a: mshv_ioctls::IoEventAddress) -> Self { 134 match a { 135 mshv_ioctls::IoEventAddress::Pio(x) => Self::Pio(x), 136 mshv_ioctls::IoEventAddress::Mmio(x) => Self::Mmio(x), 137 } 138 } 139 } 140 141 impl From<IoEventAddress> for mshv_ioctls::IoEventAddress { 142 fn from(a: IoEventAddress) -> Self { 143 match a { 144 IoEventAddress::Pio(x) => Self::Pio(x), 145 IoEventAddress::Mmio(x) => Self::Mmio(x), 146 } 147 } 148 } 149 150 impl From<VcpuMshvState> for CpuState { 151 fn from(s: VcpuMshvState) -> Self { 152 CpuState::Mshv(s) 153 } 154 } 155 156 impl From<CpuState> for VcpuMshvState { 157 fn from(s: CpuState) -> Self { 158 match s { 159 CpuState::Mshv(s) => s, 160 /* Needed in case other hypervisors are enabled */ 161 #[allow(unreachable_patterns)] 162 _ => panic!("CpuState is not valid"), 163 } 164 } 165 } 166 167 impl From<mshv_bindings::StandardRegisters> for crate::StandardRegisters { 168 fn from(s: mshv_bindings::StandardRegisters) -> Self { 169 crate::StandardRegisters::Mshv(s) 170 } 171 } 172 173 impl From<crate::StandardRegisters> for mshv_bindings::StandardRegisters { 174 fn from(e: crate::StandardRegisters) -> Self { 175 match e { 176 crate::StandardRegisters::Mshv(e) => e, 177 /* Needed in case other hypervisors are enabled */ 178 #[allow(unreachable_patterns)] 179 _ => panic!("StandardRegisters are not valid"), 180 } 181 } 182 } 183 184 impl From<mshv_user_irq_entry> for IrqRoutingEntry { 185 fn from(s: mshv_user_irq_entry) -> Self { 186 IrqRoutingEntry::Mshv(s) 187 } 188 } 189 190 impl From<IrqRoutingEntry> for mshv_user_irq_entry { 191 fn from(e: IrqRoutingEntry) -> Self { 192 match e { 193 IrqRoutingEntry::Mshv(e) => e, 194 /* Needed in case other hypervisors are enabled */ 195 #[allow(unreachable_patterns)] 196 _ => panic!("IrqRoutingEntry is not valid"), 197 } 198 } 199 } 200 201 #[cfg(target_arch = "aarch64")] 202 impl From<mshv_bindings::MshvRegList> for crate::RegList { 203 fn from(s: mshv_bindings::MshvRegList) -> Self { 204 crate::RegList::Mshv(s) 205 } 206 } 207 208 #[cfg(target_arch = "aarch64")] 209 impl From<crate::RegList> for mshv_bindings::MshvRegList { 210 fn from(e: crate::RegList) -> Self { 211 match e { 212 crate::RegList::Mshv(e) => e, 213 /* Needed in case other hypervisors are enabled */ 214 #[allow(unreachable_patterns)] 215 _ => panic!("RegList is not valid"), 216 } 217 } 218 } 219 220 #[cfg(target_arch = "aarch64")] 221 impl From<mshv_bindings::MshvVcpuInit> for crate::VcpuInit { 222 fn from(s: mshv_bindings::MshvVcpuInit) -> Self { 223 crate::VcpuInit::Mshv(s) 224 } 225 } 226 227 #[cfg(target_arch = "aarch64")] 228 impl From<crate::VcpuInit> for mshv_bindings::MshvVcpuInit { 229 fn from(e: crate::VcpuInit) -> Self { 230 match e { 231 crate::VcpuInit::Mshv(e) => e, 232 /* Needed in case other hypervisors are enabled */ 233 #[allow(unreachable_patterns)] 234 _ => panic!("VcpuInit is not valid"), 235 } 236 } 237 } 238 239 struct MshvDirtyLogSlot { 240 guest_pfn: u64, 241 memory_size: u64, 242 } 243 244 /// Wrapper over mshv system ioctls. 245 pub struct MshvHypervisor { 246 mshv: Mshv, 247 } 248 249 impl MshvHypervisor { 250 #[cfg(target_arch = "x86_64")] 251 /// 252 /// Retrieve the list of MSRs supported by MSHV. 253 /// 254 fn get_msr_list(&self) -> hypervisor::Result<MsrList> { 255 self.mshv 256 .get_msr_index_list() 257 .map_err(|e| hypervisor::HypervisorError::GetMsrList(e.into())) 258 } 259 260 fn create_vm_with_type_and_memory_int( 261 &self, 262 vm_type: u64, 263 #[cfg(feature = "sev_snp")] _mem_size: Option<u64>, 264 ) -> hypervisor::Result<Arc<dyn crate::Vm>> { 265 let mshv_vm_type: VmType = match VmType::try_from(vm_type) { 266 Ok(vm_type) => vm_type, 267 Err(_) => return Err(hypervisor::HypervisorError::UnsupportedVmType()), 268 }; 269 let fd: VmFd; 270 loop { 271 match self.mshv.create_vm_with_type(mshv_vm_type) { 272 Ok(res) => fd = res, 273 Err(e) => { 274 if e.errno() == libc::EINTR { 275 // If the error returned is EINTR, which means the 276 // ioctl has been interrupted, we have to retry as 277 // this can't be considered as a regular error. 278 continue; 279 } else { 280 return Err(hypervisor::HypervisorError::VmCreate(e.into())); 281 } 282 } 283 } 284 break; 285 } 286 287 // Set additional partition property for SEV-SNP partition. 288 #[cfg(target_arch = "x86_64")] 289 if mshv_vm_type == VmType::Snp { 290 let snp_policy = snp::get_default_snp_guest_policy(); 291 let vmgexit_offloads = snp::get_default_vmgexit_offload_features(); 292 // SAFETY: access union fields 293 unsafe { 294 debug!( 295 "Setting the partition isolation policy as: 0x{:x}", 296 snp_policy.as_uint64 297 ); 298 fd.set_partition_property( 299 hv_partition_property_code_HV_PARTITION_PROPERTY_ISOLATION_POLICY, 300 snp_policy.as_uint64, 301 ) 302 .map_err(|e| hypervisor::HypervisorError::SetPartitionProperty(e.into()))?; 303 debug!( 304 "Setting the partition property to enable VMGEXIT offloads as : 0x{:x}", 305 vmgexit_offloads.as_uint64 306 ); 307 fd.set_partition_property( 308 hv_partition_property_code_HV_PARTITION_PROPERTY_SEV_VMGEXIT_OFFLOADS, 309 vmgexit_offloads.as_uint64, 310 ) 311 .map_err(|e| hypervisor::HypervisorError::SetPartitionProperty(e.into()))?; 312 } 313 } 314 315 // Default Microsoft Hypervisor behavior for unimplemented MSR is to 316 // send a fault to the guest if it tries to access it. It is possible 317 // to override this behavior with a more suitable option i.e., ignore 318 // writes from the guest and return zero in attempt to read unimplemented 319 // MSR. 320 #[cfg(target_arch = "x86_64")] 321 fd.set_partition_property( 322 hv_partition_property_code_HV_PARTITION_PROPERTY_UNIMPLEMENTED_MSR_ACTION, 323 hv_unimplemented_msr_action_HV_UNIMPLEMENTED_MSR_ACTION_IGNORE_WRITE_READ_ZERO as u64, 324 ) 325 .map_err(|e| hypervisor::HypervisorError::SetPartitionProperty(e.into()))?; 326 327 // Always create a frozen partition 328 fd.set_partition_property( 329 hv_partition_property_code_HV_PARTITION_PROPERTY_TIME_FREEZE, 330 1u64, 331 ) 332 .map_err(|e| hypervisor::HypervisorError::SetPartitionProperty(e.into()))?; 333 334 let vm_fd = Arc::new(fd); 335 336 #[cfg(target_arch = "x86_64")] 337 { 338 let msr_list = self.get_msr_list()?; 339 let num_msrs = msr_list.as_fam_struct_ref().nmsrs as usize; 340 let mut msrs: Vec<MsrEntry> = vec![ 341 MsrEntry { 342 ..Default::default() 343 }; 344 num_msrs 345 ]; 346 let indices = msr_list.as_slice(); 347 for (pos, index) in indices.iter().enumerate() { 348 msrs[pos].index = *index; 349 } 350 351 Ok(Arc::new(MshvVm { 352 fd: vm_fd, 353 msrs, 354 dirty_log_slots: Arc::new(RwLock::new(HashMap::new())), 355 #[cfg(feature = "sev_snp")] 356 sev_snp_enabled: mshv_vm_type == VmType::Snp, 357 #[cfg(feature = "sev_snp")] 358 host_access_pages: ArcSwap::new( 359 AtomicBitmap::new( 360 _mem_size.unwrap_or_default() as usize, 361 NonZeroUsize::new(HV_PAGE_SIZE).unwrap(), 362 ) 363 .into(), 364 ), 365 })) 366 } 367 368 #[cfg(target_arch = "aarch64")] 369 { 370 Ok(Arc::new(MshvVm { 371 fd: vm_fd, 372 dirty_log_slots: Arc::new(RwLock::new(HashMap::new())), 373 })) 374 } 375 } 376 } 377 378 impl MshvHypervisor { 379 /// Create a hypervisor based on Mshv 380 #[allow(clippy::new_ret_no_self)] 381 pub fn new() -> hypervisor::Result<Arc<dyn hypervisor::Hypervisor>> { 382 let mshv_obj = 383 Mshv::new().map_err(|e| hypervisor::HypervisorError::HypervisorCreate(e.into()))?; 384 Ok(Arc::new(MshvHypervisor { mshv: mshv_obj })) 385 } 386 /// Check if the hypervisor is available 387 pub fn is_available() -> hypervisor::Result<bool> { 388 match std::fs::metadata("/dev/mshv") { 389 Ok(_) => Ok(true), 390 Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(false), 391 Err(err) => Err(hypervisor::HypervisorError::HypervisorAvailableCheck( 392 err.into(), 393 )), 394 } 395 } 396 } 397 398 /// Implementation of Hypervisor trait for Mshv 399 /// 400 /// # Examples 401 /// 402 /// ``` 403 /// use hypervisor::mshv::MshvHypervisor; 404 /// use std::sync::Arc; 405 /// let mshv = MshvHypervisor::new().unwrap(); 406 /// let hypervisor = Arc::new(mshv); 407 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed"); 408 /// ``` 409 impl hypervisor::Hypervisor for MshvHypervisor { 410 /// 411 /// Returns the type of the hypervisor 412 /// 413 fn hypervisor_type(&self) -> HypervisorType { 414 HypervisorType::Mshv 415 } 416 417 /// 418 /// Create a Vm of a specific type using the underlying hypervisor, passing memory size 419 /// Return a hypervisor-agnostic Vm trait object 420 /// 421 /// # Examples 422 /// 423 /// ``` 424 /// use hypervisor::kvm::KvmHypervisor; 425 /// use hypervisor::kvm::KvmVm; 426 /// let hypervisor = KvmHypervisor::new().unwrap(); 427 /// let vm = hypervisor.create_vm_with_type(0, 512*1024*1024).unwrap(); 428 /// ``` 429 fn create_vm_with_type_and_memory( 430 &self, 431 vm_type: u64, 432 #[cfg(feature = "sev_snp")] _mem_size: u64, 433 ) -> hypervisor::Result<Arc<dyn vm::Vm>> { 434 self.create_vm_with_type_and_memory_int( 435 vm_type, 436 #[cfg(feature = "sev_snp")] 437 Some(_mem_size), 438 ) 439 } 440 441 fn create_vm_with_type(&self, vm_type: u64) -> hypervisor::Result<Arc<dyn crate::Vm>> { 442 self.create_vm_with_type_and_memory_int( 443 vm_type, 444 #[cfg(feature = "sev_snp")] 445 None, 446 ) 447 } 448 449 /// Create a mshv vm object and return the object as Vm trait object 450 /// 451 /// # Examples 452 /// 453 /// ``` 454 /// # extern crate hypervisor; 455 /// use hypervisor::mshv::MshvHypervisor; 456 /// use hypervisor::mshv::MshvVm; 457 /// let hypervisor = MshvHypervisor::new().unwrap(); 458 /// let vm = hypervisor.create_vm().unwrap(); 459 /// ``` 460 fn create_vm(&self) -> hypervisor::Result<Arc<dyn vm::Vm>> { 461 let vm_type = 0; 462 self.create_vm_with_type(vm_type) 463 } 464 #[cfg(target_arch = "x86_64")] 465 /// 466 /// Get the supported CpuID 467 /// 468 fn get_supported_cpuid(&self) -> hypervisor::Result<Vec<CpuIdEntry>> { 469 let mut cpuid = Vec::new(); 470 let functions: [u32; 2] = [0x1, 0xb]; 471 472 for function in functions { 473 cpuid.push(CpuIdEntry { 474 function, 475 ..Default::default() 476 }); 477 } 478 Ok(cpuid) 479 } 480 481 /// Get maximum number of vCPUs 482 fn get_max_vcpus(&self) -> u32 { 483 // TODO: Using HV_MAXIMUM_PROCESSORS would be better 484 // but the ioctl API is limited to u8 485 256 486 } 487 488 fn get_guest_debug_hw_bps(&self) -> usize { 489 0 490 } 491 492 #[cfg(target_arch = "aarch64")] 493 /// 494 /// Retrieve AArch64 host maximum IPA size supported by MSHV. 495 /// 496 fn get_host_ipa_limit(&self) -> i32 { 497 let host_ipa = self.mshv.get_host_partition_property( 498 hv_partition_property_code_HV_PARTITION_PROPERTY_PHYSICAL_ADDRESS_WIDTH as u64, 499 ); 500 501 match host_ipa { 502 Ok(ipa) => ipa, 503 Err(e) => { 504 panic!("Failed to get host IPA limit: {:?}", e); 505 } 506 } 507 } 508 } 509 510 #[cfg(feature = "sev_snp")] 511 struct Ghcb(*mut svm_ghcb_base); 512 513 #[cfg(feature = "sev_snp")] 514 // SAFETY: struct is based on GHCB page in the hypervisor, 515 // safe to Send across threads 516 unsafe impl Send for Ghcb {} 517 518 #[cfg(feature = "sev_snp")] 519 // SAFETY: struct is based on GHCB page in the hypervisor, 520 // safe to Sync across threads as this is only required for Vcpu trait 521 // functionally not used anyway 522 unsafe impl Sync for Ghcb {} 523 524 /// Vcpu struct for Microsoft Hypervisor 525 pub struct MshvVcpu { 526 fd: VcpuFd, 527 vp_index: u8, 528 #[cfg(target_arch = "x86_64")] 529 cpuid: Vec<CpuIdEntry>, 530 #[cfg(target_arch = "x86_64")] 531 msrs: Vec<MsrEntry>, 532 vm_ops: Option<Arc<dyn vm::VmOps>>, 533 vm_fd: Arc<VmFd>, 534 #[cfg(feature = "sev_snp")] 535 ghcb: Option<Ghcb>, 536 #[cfg(feature = "sev_snp")] 537 host_access_pages: ArcSwap<AtomicBitmap>, 538 } 539 540 /// Implementation of Vcpu trait for Microsoft Hypervisor 541 /// 542 /// # Examples 543 /// 544 /// ``` 545 /// use hypervisor::mshv::MshvHypervisor; 546 /// use std::sync::Arc; 547 /// let mshv = MshvHypervisor::new().unwrap(); 548 /// let hypervisor = Arc::new(mshv); 549 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed"); 550 /// let vcpu = vm.create_vcpu(0, None).unwrap(); 551 /// ``` 552 impl cpu::Vcpu for MshvVcpu { 553 /// 554 /// Returns StandardRegisters with default value set 555 /// 556 #[cfg(target_arch = "x86_64")] 557 fn create_standard_regs(&self) -> crate::StandardRegisters { 558 mshv_bindings::StandardRegisters::default().into() 559 } 560 #[cfg(target_arch = "x86_64")] 561 /// 562 /// Returns the vCPU general purpose registers. 563 /// 564 fn get_regs(&self) -> cpu::Result<crate::StandardRegisters> { 565 Ok(self 566 .fd 567 .get_regs() 568 .map_err(|e| cpu::HypervisorCpuError::GetStandardRegs(e.into()))? 569 .into()) 570 } 571 572 #[cfg(target_arch = "x86_64")] 573 /// 574 /// Sets the vCPU general purpose registers. 575 /// 576 fn set_regs(&self, regs: &crate::StandardRegisters) -> cpu::Result<()> { 577 let regs = (*regs).into(); 578 self.fd 579 .set_regs(®s) 580 .map_err(|e| cpu::HypervisorCpuError::SetStandardRegs(e.into())) 581 } 582 583 #[cfg(target_arch = "x86_64")] 584 /// 585 /// Returns the vCPU special registers. 586 /// 587 fn get_sregs(&self) -> cpu::Result<crate::arch::x86::SpecialRegisters> { 588 Ok(self 589 .fd 590 .get_sregs() 591 .map_err(|e| cpu::HypervisorCpuError::GetSpecialRegs(e.into()))? 592 .into()) 593 } 594 595 #[cfg(target_arch = "x86_64")] 596 /// 597 /// Sets the vCPU special registers. 598 /// 599 fn set_sregs(&self, sregs: &crate::arch::x86::SpecialRegisters) -> cpu::Result<()> { 600 let sregs = (*sregs).into(); 601 self.fd 602 .set_sregs(&sregs) 603 .map_err(|e| cpu::HypervisorCpuError::SetSpecialRegs(e.into())) 604 } 605 606 #[cfg(target_arch = "x86_64")] 607 /// 608 /// Returns the floating point state (FPU) from the vCPU. 609 /// 610 fn get_fpu(&self) -> cpu::Result<FpuState> { 611 Ok(self 612 .fd 613 .get_fpu() 614 .map_err(|e| cpu::HypervisorCpuError::GetFloatingPointRegs(e.into()))? 615 .into()) 616 } 617 618 #[cfg(target_arch = "x86_64")] 619 /// 620 /// Set the floating point state (FPU) of a vCPU. 621 /// 622 fn set_fpu(&self, fpu: &FpuState) -> cpu::Result<()> { 623 let fpu: mshv_bindings::FloatingPointUnit = (*fpu).clone().into(); 624 self.fd 625 .set_fpu(&fpu) 626 .map_err(|e| cpu::HypervisorCpuError::SetFloatingPointRegs(e.into())) 627 } 628 629 #[cfg(target_arch = "x86_64")] 630 /// 631 /// Returns the model-specific registers (MSR) for this vCPU. 632 /// 633 fn get_msrs(&self, msrs: &mut Vec<MsrEntry>) -> cpu::Result<usize> { 634 let mshv_msrs: Vec<msr_entry> = msrs.iter().map(|e| (*e).into()).collect(); 635 let mut mshv_msrs = MsrEntries::from_entries(&mshv_msrs).unwrap(); 636 let succ = self 637 .fd 638 .get_msrs(&mut mshv_msrs) 639 .map_err(|e| cpu::HypervisorCpuError::GetMsrEntries(e.into()))?; 640 641 msrs[..succ].copy_from_slice( 642 &mshv_msrs.as_slice()[..succ] 643 .iter() 644 .map(|e| (*e).into()) 645 .collect::<Vec<MsrEntry>>(), 646 ); 647 648 Ok(succ) 649 } 650 651 #[cfg(target_arch = "x86_64")] 652 /// 653 /// Setup the model-specific registers (MSR) for this vCPU. 654 /// Returns the number of MSR entries actually written. 655 /// 656 fn set_msrs(&self, msrs: &[MsrEntry]) -> cpu::Result<usize> { 657 let mshv_msrs: Vec<msr_entry> = msrs.iter().map(|e| (*e).into()).collect(); 658 let mshv_msrs = MsrEntries::from_entries(&mshv_msrs).unwrap(); 659 self.fd 660 .set_msrs(&mshv_msrs) 661 .map_err(|e| cpu::HypervisorCpuError::SetMsrEntries(e.into())) 662 } 663 664 #[cfg(target_arch = "x86_64")] 665 /// 666 /// X86 specific call to enable HyperV SynIC 667 /// 668 fn enable_hyperv_synic(&self) -> cpu::Result<()> { 669 /* We always have SynIC enabled on MSHV */ 670 Ok(()) 671 } 672 673 #[allow(non_upper_case_globals)] 674 fn run(&self) -> std::result::Result<cpu::VmExit, cpu::HypervisorCpuError> { 675 match self.fd.run() { 676 Ok(x) => match x.header.message_type { 677 hv_message_type_HVMSG_X64_HALT => { 678 debug!("HALT"); 679 Ok(cpu::VmExit::Reset) 680 } 681 hv_message_type_HVMSG_UNRECOVERABLE_EXCEPTION => { 682 warn!("TRIPLE FAULT"); 683 Ok(cpu::VmExit::Shutdown) 684 } 685 #[cfg(target_arch = "x86_64")] 686 hv_message_type_HVMSG_X64_IO_PORT_INTERCEPT => { 687 let info = x.to_ioport_info().unwrap(); 688 let access_info = info.access_info; 689 // SAFETY: access_info is valid, otherwise we won't be here 690 let len = unsafe { access_info.__bindgen_anon_1.access_size() } as usize; 691 let is_write = info.header.intercept_access_type == 1; 692 let port = info.port_number; 693 let mut data: [u8; 4] = [0; 4]; 694 let mut ret_rax = info.rax; 695 696 /* 697 * XXX: Ignore QEMU fw_cfg (0x5xx) and debug console (0x402) ports. 698 * 699 * Cloud Hypervisor doesn't support fw_cfg at the moment. It does support 0x402 700 * under the "fwdebug" feature flag. But that feature is not enabled by default 701 * and is considered legacy. 702 * 703 * OVMF unconditionally pokes these IO ports with string IO. 704 * 705 * Instead of trying to implement string IO support now which does not do much 706 * now, skip those ports explicitly to avoid panicking. 707 * 708 * Proper string IO support can be added once we gain the ability to translate 709 * guest virtual addresses to guest physical addresses on MSHV. 710 */ 711 match port { 712 0x402 | 0x510 | 0x511 | 0x514 => { 713 let insn_len = info.header.instruction_length() as u64; 714 715 /* Advance RIP and update RAX */ 716 let arr_reg_name_value = [ 717 ( 718 hv_register_name_HV_X64_REGISTER_RIP, 719 info.header.rip + insn_len, 720 ), 721 (hv_register_name_HV_X64_REGISTER_RAX, ret_rax), 722 ]; 723 set_registers_64!(self.fd, arr_reg_name_value) 724 .map_err(|e| cpu::HypervisorCpuError::SetRegister(e.into()))?; 725 return Ok(cpu::VmExit::Ignore); 726 } 727 _ => {} 728 } 729 730 assert!( 731 // SAFETY: access_info is valid, otherwise we won't be here 732 (unsafe { access_info.__bindgen_anon_1.string_op() } != 1), 733 "String IN/OUT not supported" 734 ); 735 assert!( 736 // SAFETY: access_info is valid, otherwise we won't be here 737 (unsafe { access_info.__bindgen_anon_1.rep_prefix() } != 1), 738 "Rep IN/OUT not supported" 739 ); 740 741 if is_write { 742 let data = (info.rax as u32).to_le_bytes(); 743 if let Some(vm_ops) = &self.vm_ops { 744 vm_ops 745 .pio_write(port.into(), &data[0..len]) 746 .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()))?; 747 } 748 } else { 749 if let Some(vm_ops) = &self.vm_ops { 750 vm_ops 751 .pio_read(port.into(), &mut data[0..len]) 752 .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()))?; 753 } 754 755 let v = u32::from_le_bytes(data); 756 /* Preserve high bits in EAX but clear out high bits in RAX */ 757 let mask = 0xffffffff >> (32 - len * 8); 758 let eax = (info.rax as u32 & !mask) | (v & mask); 759 ret_rax = eax as u64; 760 } 761 762 let insn_len = info.header.instruction_length() as u64; 763 764 /* Advance RIP and update RAX */ 765 let arr_reg_name_value = [ 766 ( 767 hv_register_name_HV_X64_REGISTER_RIP, 768 info.header.rip + insn_len, 769 ), 770 (hv_register_name_HV_X64_REGISTER_RAX, ret_rax), 771 ]; 772 set_registers_64!(self.fd, arr_reg_name_value) 773 .map_err(|e| cpu::HypervisorCpuError::SetRegister(e.into()))?; 774 Ok(cpu::VmExit::Ignore) 775 } 776 #[cfg(target_arch = "x86_64")] 777 msg_type @ (hv_message_type_HVMSG_UNMAPPED_GPA 778 | hv_message_type_HVMSG_GPA_INTERCEPT) => { 779 let info = x.to_memory_info().unwrap(); 780 let insn_len = info.instruction_byte_count as usize; 781 let gva = info.guest_virtual_address; 782 let gpa = info.guest_physical_address; 783 784 debug!("Exit ({:?}) GVA {:x} GPA {:x}", msg_type, gva, gpa); 785 786 let mut context = MshvEmulatorContext { 787 vcpu: self, 788 map: (gva, gpa), 789 }; 790 791 // Create a new emulator. 792 let mut emul = Emulator::new(&mut context); 793 794 // Emulate the trapped instruction, and only the first one. 795 let new_state = emul 796 .emulate_first_insn( 797 self.vp_index as usize, 798 &info.instruction_bytes[..insn_len], 799 ) 800 .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()))?; 801 802 // Set CPU state back. 803 context 804 .set_cpu_state(self.vp_index as usize, new_state) 805 .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()))?; 806 807 Ok(cpu::VmExit::Ignore) 808 } 809 #[cfg(feature = "sev_snp")] 810 hv_message_type_HVMSG_GPA_ATTRIBUTE_INTERCEPT => { 811 let info = x.to_gpa_attribute_info().unwrap(); 812 let host_vis = info.__bindgen_anon_1.host_visibility(); 813 if host_vis >= HV_MAP_GPA_READABLE | HV_MAP_GPA_WRITABLE { 814 warn!("Ignored attribute intercept with full host visibility"); 815 return Ok(cpu::VmExit::Ignore); 816 } 817 818 let num_ranges = info.__bindgen_anon_1.range_count(); 819 assert!(num_ranges >= 1); 820 if num_ranges > 1 { 821 return Err(cpu::HypervisorCpuError::RunVcpu(anyhow!( 822 "Unhandled VCPU exit(GPA_ATTRIBUTE_INTERCEPT): Expected num_ranges to be 1 but found num_ranges {:?}", 823 num_ranges 824 ))); 825 } 826 827 // TODO: we could also deny the request with HvCallCompleteIntercept 828 let mut gpas = Vec::new(); 829 let ranges = info.ranges; 830 let (gfn_start, gfn_count) = snp::parse_gpa_range(ranges[0]).unwrap(); 831 debug!( 832 "Releasing pages: gfn_start: {:x?}, gfn_count: {:?}", 833 gfn_start, gfn_count 834 ); 835 let gpa_start = gfn_start * HV_PAGE_SIZE as u64; 836 for i in 0..gfn_count { 837 gpas.push(gpa_start + i * HV_PAGE_SIZE as u64); 838 } 839 840 let mut gpa_list = 841 vec_with_array_field::<mshv_modify_gpa_host_access, u64>(gpas.len()); 842 gpa_list[0].page_count = gpas.len() as u64; 843 gpa_list[0].flags = 0; 844 if host_vis & HV_MAP_GPA_READABLE != 0 { 845 gpa_list[0].flags |= 1 << MSHV_GPA_HOST_ACCESS_BIT_READABLE; 846 } 847 if host_vis & HV_MAP_GPA_WRITABLE != 0 { 848 gpa_list[0].flags |= 1 << MSHV_GPA_HOST_ACCESS_BIT_WRITABLE; 849 } 850 851 // SAFETY: gpa_list initialized with gpas.len() and now it is being turned into 852 // gpas_slice with gpas.len() again. It is guaranteed to be large enough to hold 853 // everything from gpas. 854 unsafe { 855 let gpas_slice: &mut [u64] = 856 gpa_list[0].guest_pfns.as_mut_slice(gpas.len()); 857 gpas_slice.copy_from_slice(gpas.as_slice()); 858 } 859 860 self.vm_fd 861 .modify_gpa_host_access(&gpa_list[0]) 862 .map_err(|e| cpu::HypervisorCpuError::RunVcpu(anyhow!( 863 "Unhandled VCPU exit: attribute intercept - couldn't modify host access {}", e 864 )))?; 865 // Guest is revoking the shared access, so we need to update the bitmap 866 self.host_access_pages.rcu(|_bitmap| { 867 let bm = self.host_access_pages.load().as_ref().clone(); 868 bm.reset_addr_range(gpa_start as usize, gfn_count as usize); 869 bm 870 }); 871 Ok(cpu::VmExit::Ignore) 872 } 873 #[cfg(target_arch = "x86_64")] 874 hv_message_type_HVMSG_UNACCEPTED_GPA => { 875 let info = x.to_memory_info().unwrap(); 876 let gva = info.guest_virtual_address; 877 let gpa = info.guest_physical_address; 878 879 Err(cpu::HypervisorCpuError::RunVcpu(anyhow!( 880 "Unhandled VCPU exit: Unaccepted GPA({:x}) found at GVA({:x})", 881 gpa, 882 gva, 883 ))) 884 } 885 #[cfg(target_arch = "x86_64")] 886 hv_message_type_HVMSG_X64_CPUID_INTERCEPT => { 887 let info = x.to_cpuid_info().unwrap(); 888 debug!("cpuid eax: {:x}", { info.rax }); 889 Ok(cpu::VmExit::Ignore) 890 } 891 #[cfg(target_arch = "x86_64")] 892 hv_message_type_HVMSG_X64_MSR_INTERCEPT => { 893 let info = x.to_msr_info().unwrap(); 894 if info.header.intercept_access_type == 0 { 895 debug!("msr read: {:x}", { info.msr_number }); 896 } else { 897 debug!("msr write: {:x}", { info.msr_number }); 898 } 899 Ok(cpu::VmExit::Ignore) 900 } 901 #[cfg(target_arch = "x86_64")] 902 hv_message_type_HVMSG_X64_EXCEPTION_INTERCEPT => { 903 //TODO: Handler for VMCALL here. 904 let info = x.to_exception_info().unwrap(); 905 debug!("Exception Info {:?}", { info.exception_vector }); 906 Ok(cpu::VmExit::Ignore) 907 } 908 #[cfg(target_arch = "x86_64")] 909 hv_message_type_HVMSG_X64_APIC_EOI => { 910 let info = x.to_apic_eoi_info().unwrap(); 911 // The kernel should dispatch the EOI to the correct thread. 912 // Check the VP index is the same as the one we have. 913 assert!(info.vp_index == self.vp_index as u32); 914 // The interrupt vector in info is u32, but x86 only supports 256 vectors. 915 // There is no good way to recover from this if the hypervisor messes around. 916 // Just unwrap. 917 Ok(cpu::VmExit::IoapicEoi( 918 info.interrupt_vector.try_into().unwrap(), 919 )) 920 } 921 #[cfg(feature = "sev_snp")] 922 hv_message_type_HVMSG_X64_SEV_VMGEXIT_INTERCEPT => { 923 let info = x.to_vmg_intercept_info().unwrap(); 924 let ghcb_data = info.ghcb_msr >> GHCB_INFO_BIT_WIDTH; 925 let ghcb_msr = svm_ghcb_msr { 926 as_uint64: info.ghcb_msr, 927 }; 928 // Safe to use unwrap, for sev_snp guest we already have the 929 // GHCB pointer wrapped in the option, otherwise this place is not reached. 930 let ghcb = self.ghcb.as_ref().unwrap().0; 931 932 // SAFETY: Accessing a union element from bindgen generated bindings. 933 let ghcb_op = unsafe { ghcb_msr.__bindgen_anon_2.ghcb_info() as u32 }; 934 // Sanity check on the header fields before handling other operations. 935 assert!(info.header.intercept_access_type == HV_INTERCEPT_ACCESS_EXECUTE as u8); 936 937 match ghcb_op { 938 GHCB_INFO_HYP_FEATURE_REQUEST => { 939 // Pre-condition: GHCB data must be zero 940 assert!(ghcb_data == 0); 941 let mut ghcb_response = GHCB_INFO_HYP_FEATURE_RESPONSE as u64; 942 // Indicate support for basic SEV-SNP features 943 ghcb_response |= 944 (GHCB_HYP_FEATURE_SEV_SNP << GHCB_INFO_BIT_WIDTH) as u64; 945 // Indicate support for SEV-SNP AP creation 946 ghcb_response |= (GHCB_HYP_FEATURE_SEV_SNP_AP_CREATION 947 << GHCB_INFO_BIT_WIDTH) 948 as u64; 949 debug!( 950 "GHCB_INFO_HYP_FEATURE_REQUEST: Supported features: {:0x}", 951 ghcb_response 952 ); 953 let arr_reg_name_value = 954 [(hv_register_name_HV_X64_REGISTER_GHCB, ghcb_response)]; 955 set_registers_64!(self.fd, arr_reg_name_value) 956 .map_err(|e| cpu::HypervisorCpuError::SetRegister(e.into()))?; 957 } 958 GHCB_INFO_REGISTER_REQUEST => { 959 let mut ghcb_gpa = hv_x64_register_sev_ghcb::default(); 960 961 // Disable the previously used GHCB page. 962 self.disable_prev_ghcb_page()?; 963 964 // SAFETY: Accessing a union element from bindgen generated bindings. 965 unsafe { 966 ghcb_gpa.__bindgen_anon_1.set_enabled(1); 967 ghcb_gpa 968 .__bindgen_anon_1 969 .set_page_number(ghcb_msr.__bindgen_anon_2.gpa_page_number()); 970 } 971 // SAFETY: Accessing a union element from bindgen generated bindings. 972 let reg_name_value = unsafe { 973 [( 974 hv_register_name_HV_X64_REGISTER_SEV_GHCB_GPA, 975 ghcb_gpa.as_uint64, 976 )] 977 }; 978 979 set_registers_64!(self.fd, reg_name_value) 980 .map_err(|e| cpu::HypervisorCpuError::SetRegister(e.into()))?; 981 982 let mut resp_ghcb_msr = svm_ghcb_msr::default(); 983 // SAFETY: Accessing a union element from bindgen generated bindings. 984 unsafe { 985 resp_ghcb_msr 986 .__bindgen_anon_2 987 .set_ghcb_info(GHCB_INFO_REGISTER_RESPONSE as u64); 988 resp_ghcb_msr.__bindgen_anon_2.set_gpa_page_number( 989 ghcb_msr.__bindgen_anon_2.gpa_page_number(), 990 ); 991 debug!("GHCB GPA is {:x}", ghcb_gpa.as_uint64); 992 } 993 // SAFETY: Accessing a union element from bindgen generated bindings. 994 let reg_name_value = unsafe { 995 [( 996 hv_register_name_HV_X64_REGISTER_GHCB, 997 resp_ghcb_msr.as_uint64, 998 )] 999 }; 1000 1001 set_registers_64!(self.fd, reg_name_value) 1002 .map_err(|e| cpu::HypervisorCpuError::SetRegister(e.into()))?; 1003 } 1004 GHCB_INFO_SEV_INFO_REQUEST => { 1005 let sev_cpuid_function = 0x8000_001F; 1006 let cpu_leaf = self 1007 .fd 1008 .get_cpuid_values(sev_cpuid_function, 0, 0, 0) 1009 .unwrap(); 1010 let ebx = cpu_leaf[1]; 1011 // First 6-byte of EBX represents page table encryption bit number 1012 let pbit_encryption = (ebx & 0x3f) as u8; 1013 let mut ghcb_response = GHCB_INFO_SEV_INFO_RESPONSE as u64; 1014 1015 // GHCBData[63:48] specifies the maximum GHCB protocol version supported 1016 ghcb_response |= (GHCB_PROTOCOL_VERSION_MAX as u64) << 48; 1017 // GHCBData[47:32] specifies the minimum GHCB protocol version supported 1018 ghcb_response |= (GHCB_PROTOCOL_VERSION_MIN as u64) << 32; 1019 // GHCBData[31:24] specifies the SEV page table encryption bit number. 1020 ghcb_response |= (pbit_encryption as u64) << 24; 1021 1022 let arr_reg_name_value = 1023 [(hv_register_name_HV_X64_REGISTER_GHCB, ghcb_response)]; 1024 set_registers_64!(self.fd, arr_reg_name_value) 1025 .map_err(|e| cpu::HypervisorCpuError::SetRegister(e.into()))?; 1026 } 1027 GHCB_INFO_NORMAL => { 1028 let exit_code = 1029 info.__bindgen_anon_2.__bindgen_anon_1.sw_exit_code as u32; 1030 1031 match exit_code { 1032 SVM_EXITCODE_HV_DOORBELL_PAGE => { 1033 let exit_info1 = 1034 info.__bindgen_anon_2.__bindgen_anon_1.sw_exit_info1 as u32; 1035 match exit_info1 { 1036 SVM_NAE_HV_DOORBELL_PAGE_GET_PREFERRED => { 1037 // Hypervisor does not have any preference for doorbell GPA. 1038 let preferred_doorbell_gpa: u64 = 0xFFFFFFFFFFFFFFFF; 1039 set_svm_field_u64_ptr!( 1040 ghcb, 1041 exit_info2, 1042 preferred_doorbell_gpa 1043 ); 1044 } 1045 SVM_NAE_HV_DOORBELL_PAGE_SET => { 1046 let exit_info2 = info 1047 .__bindgen_anon_2 1048 .__bindgen_anon_1 1049 .sw_exit_info2; 1050 let mut ghcb_doorbell_gpa = 1051 hv_x64_register_sev_hv_doorbell::default(); 1052 // SAFETY: Accessing a union element from bindgen generated bindings. 1053 unsafe { 1054 ghcb_doorbell_gpa.__bindgen_anon_1.set_enabled(1); 1055 ghcb_doorbell_gpa 1056 .__bindgen_anon_1 1057 .set_page_number(exit_info2 >> PAGE_SHIFT); 1058 } 1059 // SAFETY: Accessing a union element from bindgen generated bindings. 1060 let reg_names = unsafe { 1061 [( 1062 hv_register_name_HV_X64_REGISTER_SEV_DOORBELL_GPA, 1063 ghcb_doorbell_gpa.as_uint64, 1064 )] 1065 }; 1066 set_registers_64!(self.fd, reg_names).map_err(|e| { 1067 cpu::HypervisorCpuError::SetRegister(e.into()) 1068 })?; 1069 1070 set_svm_field_u64_ptr!(ghcb, exit_info2, exit_info2); 1071 1072 // Clear the SW_EXIT_INFO1 register to indicate no error 1073 self.clear_swexit_info1()?; 1074 } 1075 SVM_NAE_HV_DOORBELL_PAGE_QUERY => { 1076 let mut reg_assocs = [ hv_register_assoc { 1077 name: hv_register_name_HV_X64_REGISTER_SEV_DOORBELL_GPA, 1078 ..Default::default() 1079 } ]; 1080 self.fd.get_reg(&mut reg_assocs).unwrap(); 1081 // SAFETY: Accessing a union element from bindgen generated bindings. 1082 let doorbell_gpa = unsafe { reg_assocs[0].value.reg64 }; 1083 1084 set_svm_field_u64_ptr!(ghcb, exit_info2, doorbell_gpa); 1085 1086 // Clear the SW_EXIT_INFO1 register to indicate no error 1087 self.clear_swexit_info1()?; 1088 } 1089 SVM_NAE_HV_DOORBELL_PAGE_CLEAR => { 1090 set_svm_field_u64_ptr!(ghcb, exit_info2, 0); 1091 } 1092 _ => { 1093 panic!( 1094 "SVM_EXITCODE_HV_DOORBELL_PAGE: Unhandled exit code: {:0x}", 1095 exit_info1 1096 ); 1097 } 1098 } 1099 } 1100 SVM_EXITCODE_IOIO_PROT => { 1101 let exit_info1 = 1102 info.__bindgen_anon_2.__bindgen_anon_1.sw_exit_info1 as u32; 1103 let port_info = hv_sev_vmgexit_port_info { 1104 as_uint32: exit_info1, 1105 }; 1106 1107 let port = 1108 // SAFETY: Accessing a union element from bindgen generated bindings. 1109 unsafe { port_info.__bindgen_anon_1.intercepted_port() }; 1110 let mut len = 4; 1111 // SAFETY: Accessing a union element from bindgen generated bindings. 1112 unsafe { 1113 if port_info.__bindgen_anon_1.operand_size_16bit() == 1 { 1114 len = 2; 1115 } else if port_info.__bindgen_anon_1.operand_size_8bit() 1116 == 1 1117 { 1118 len = 1; 1119 } 1120 } 1121 let is_write = 1122 // SAFETY: Accessing a union element from bindgen generated bindings. 1123 unsafe { port_info.__bindgen_anon_1.access_type() == 0 }; 1124 // SAFETY: Accessing the field from a mapped address 1125 let mut data = unsafe { (*ghcb).rax.to_le_bytes() }; 1126 1127 if is_write { 1128 if let Some(vm_ops) = &self.vm_ops { 1129 vm_ops.pio_write(port.into(), &data[..len]).map_err( 1130 |e| cpu::HypervisorCpuError::RunVcpu(e.into()), 1131 )?; 1132 } 1133 } else { 1134 if let Some(vm_ops) = &self.vm_ops { 1135 vm_ops 1136 .pio_read(port.into(), &mut data[..len]) 1137 .map_err(|e| { 1138 cpu::HypervisorCpuError::RunVcpu(e.into()) 1139 })?; 1140 } 1141 set_svm_field_u64_ptr!(ghcb, rax, u64::from_le_bytes(data)); 1142 } 1143 1144 // Clear the SW_EXIT_INFO1 register to indicate no error 1145 self.clear_swexit_info1()?; 1146 } 1147 SVM_EXITCODE_MMIO_READ => { 1148 let src_gpa = 1149 info.__bindgen_anon_2.__bindgen_anon_1.sw_exit_info1; 1150 let data_len = 1151 info.__bindgen_anon_2.__bindgen_anon_1.sw_exit_info2 1152 as usize; 1153 // Sanity check to make sure data len is within supported range. 1154 assert!(data_len <= 0x8); 1155 1156 let mut data: Vec<u8> = vec![0; data_len]; 1157 if let Some(vm_ops) = &self.vm_ops { 1158 vm_ops.mmio_read(src_gpa, &mut data).map_err(|e| { 1159 cpu::HypervisorCpuError::RunVcpu(e.into()) 1160 })?; 1161 } 1162 // Copy the data to the shared buffer of the GHCB page 1163 let mut buffer_data = [0; 8]; 1164 buffer_data[..data_len].copy_from_slice(&data[..data_len]); 1165 // SAFETY: Updating the value of mapped area 1166 unsafe { (*ghcb).shared[0] = u64::from_le_bytes(buffer_data) }; 1167 1168 // Clear the SW_EXIT_INFO1 register to indicate no error 1169 self.clear_swexit_info1()?; 1170 } 1171 SVM_EXITCODE_MMIO_WRITE => { 1172 let dst_gpa = 1173 info.__bindgen_anon_2.__bindgen_anon_1.sw_exit_info1; 1174 let data_len = 1175 info.__bindgen_anon_2.__bindgen_anon_1.sw_exit_info2 1176 as usize; 1177 // Sanity check to make sure data len is within supported range. 1178 assert!(data_len <= 0x8); 1179 1180 let mut data = vec![0; data_len]; 1181 // SAFETY: Accessing data from a mapped address 1182 let bytes_shared_ghcb = 1183 unsafe { (*ghcb).shared[0].to_le_bytes() }; 1184 data.copy_from_slice(&bytes_shared_ghcb[..data_len]); 1185 1186 if let Some(vm_ops) = &self.vm_ops { 1187 vm_ops.mmio_write(dst_gpa, &data).map_err(|e| { 1188 cpu::HypervisorCpuError::RunVcpu(e.into()) 1189 })?; 1190 } 1191 1192 // Clear the SW_EXIT_INFO1 register to indicate no error 1193 self.clear_swexit_info1()?; 1194 } 1195 SVM_EXITCODE_SNP_GUEST_REQUEST 1196 | SVM_EXITCODE_SNP_EXTENDED_GUEST_REQUEST => { 1197 if exit_code == SVM_EXITCODE_SNP_EXTENDED_GUEST_REQUEST { 1198 info!("Fetching extended guest request is not supported"); 1199 // We don't support extended guest request, so we just write empty data. 1200 // This matches the behavior of KVM in Linux 6.11. 1201 1202 // Read RBX from the GHCB. 1203 // SAFETY: Accessing data from a mapped address 1204 let data_gpa = unsafe { (*ghcb).rax }; 1205 // SAFETY: Accessing data from a mapped address 1206 let data_npages = unsafe { (*ghcb).rbx }; 1207 1208 if data_npages > 0 { 1209 // The certificates are terminated by 24 zero bytes. 1210 // TODO: Need to check if data_gpa is the address of the shared buffer in the GHCB page 1211 // in that case we should clear the shared buffer(24 bytes) 1212 self.gpa_write(data_gpa, &[0; 24])?; 1213 } 1214 } 1215 1216 let req_gpa = 1217 info.__bindgen_anon_2.__bindgen_anon_1.sw_exit_info1; 1218 let rsp_gpa = 1219 info.__bindgen_anon_2.__bindgen_anon_1.sw_exit_info2; 1220 1221 let mshv_psp_req = 1222 mshv_issue_psp_guest_request { req_gpa, rsp_gpa }; 1223 self.vm_fd 1224 .psp_issue_guest_request(&mshv_psp_req) 1225 .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()))?; 1226 1227 debug!( 1228 "SNP guest request: req_gpa {:0x} rsp_gpa {:0x}", 1229 req_gpa, rsp_gpa 1230 ); 1231 1232 set_svm_field_u64_ptr!(ghcb, exit_info2, 0); 1233 } 1234 SVM_EXITCODE_SNP_AP_CREATION => { 1235 let vmsa_gpa = 1236 info.__bindgen_anon_2.__bindgen_anon_1.sw_exit_info2; 1237 let apic_id = 1238 info.__bindgen_anon_2.__bindgen_anon_1.sw_exit_info1 >> 32; 1239 debug!( 1240 "SNP AP CREATE REQUEST with VMSA GPA {:0x}, and APIC ID {:?}", 1241 vmsa_gpa, apic_id 1242 ); 1243 1244 let mshv_ap_create_req = mshv_sev_snp_ap_create { 1245 vp_id: apic_id, 1246 vmsa_gpa, 1247 }; 1248 self.vm_fd 1249 .sev_snp_ap_create(&mshv_ap_create_req) 1250 .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()))?; 1251 1252 // Clear the SW_EXIT_INFO1 register to indicate no error 1253 self.clear_swexit_info1()?; 1254 } 1255 _ => panic!( 1256 "GHCB_INFO_NORMAL: Unhandled exit code: {:0x}", 1257 exit_code 1258 ), 1259 } 1260 } 1261 _ => panic!("Unsupported VMGEXIT operation: {:0x}", ghcb_op), 1262 } 1263 1264 Ok(cpu::VmExit::Ignore) 1265 } 1266 exit => Err(cpu::HypervisorCpuError::RunVcpu(anyhow!( 1267 "Unhandled VCPU exit {:?}", 1268 exit 1269 ))), 1270 }, 1271 1272 Err(e) => match e.errno() { 1273 libc::EAGAIN | libc::EINTR => Ok(cpu::VmExit::Ignore), 1274 _ => Err(cpu::HypervisorCpuError::RunVcpu(anyhow!( 1275 "VCPU error {:?}", 1276 e 1277 ))), 1278 }, 1279 } 1280 } 1281 1282 #[cfg(target_arch = "aarch64")] 1283 fn init_pmu(&self, irq: u32) -> cpu::Result<()> { 1284 unimplemented!() 1285 } 1286 1287 #[cfg(target_arch = "aarch64")] 1288 fn has_pmu_support(&self) -> bool { 1289 unimplemented!() 1290 } 1291 1292 #[cfg(target_arch = "aarch64")] 1293 fn setup_regs(&self, cpu_id: u8, boot_ip: u64, fdt_start: u64) -> cpu::Result<()> { 1294 unimplemented!() 1295 } 1296 1297 #[cfg(target_arch = "aarch64")] 1298 fn get_sys_reg(&self, sys_reg: u32) -> cpu::Result<u64> { 1299 unimplemented!() 1300 } 1301 1302 #[cfg(target_arch = "aarch64")] 1303 fn get_reg_list(&self, reg_list: &mut RegList) -> cpu::Result<()> { 1304 unimplemented!() 1305 } 1306 1307 #[cfg(target_arch = "aarch64")] 1308 fn vcpu_init(&self, kvi: &VcpuInit) -> cpu::Result<()> { 1309 unimplemented!() 1310 } 1311 1312 #[cfg(target_arch = "aarch64")] 1313 fn set_regs(&self, regs: &StandardRegisters) -> cpu::Result<()> { 1314 unimplemented!() 1315 } 1316 1317 #[cfg(target_arch = "aarch64")] 1318 fn get_regs(&self) -> cpu::Result<StandardRegisters> { 1319 unimplemented!() 1320 } 1321 1322 #[cfg(target_arch = "aarch64")] 1323 fn vcpu_finalize(&self, _feature: i32) -> cpu::Result<()> { 1324 unimplemented!() 1325 } 1326 1327 #[cfg(target_arch = "aarch64")] 1328 fn vcpu_get_finalized_features(&self) -> i32 { 1329 unimplemented!() 1330 } 1331 1332 #[cfg(target_arch = "aarch64")] 1333 fn vcpu_set_processor_features( 1334 &self, 1335 _vm: &Arc<dyn crate::Vm>, 1336 _kvi: &mut crate::VcpuInit, 1337 _id: u8, 1338 ) -> cpu::Result<()> { 1339 unimplemented!() 1340 } 1341 1342 #[cfg(target_arch = "aarch64")] 1343 fn create_vcpu_init(&self) -> crate::VcpuInit { 1344 unimplemented!(); 1345 } 1346 1347 #[cfg(target_arch = "x86_64")] 1348 /// 1349 /// X86 specific call to setup the CPUID registers. 1350 /// 1351 fn set_cpuid2(&self, cpuid: &[CpuIdEntry]) -> cpu::Result<()> { 1352 let cpuid: Vec<mshv_bindings::hv_cpuid_entry> = cpuid.iter().map(|e| (*e).into()).collect(); 1353 let mshv_cpuid = <CpuId>::from_entries(&cpuid) 1354 .map_err(|_| cpu::HypervisorCpuError::SetCpuid(anyhow!("failed to create CpuId")))?; 1355 1356 self.fd 1357 .register_intercept_result_cpuid(&mshv_cpuid) 1358 .map_err(|e| cpu::HypervisorCpuError::SetCpuid(e.into())) 1359 } 1360 1361 #[cfg(target_arch = "x86_64")] 1362 /// 1363 /// X86 specific call to retrieve the CPUID registers. 1364 /// 1365 fn get_cpuid2(&self, _num_entries: usize) -> cpu::Result<Vec<CpuIdEntry>> { 1366 Ok(self.cpuid.clone()) 1367 } 1368 1369 #[cfg(target_arch = "x86_64")] 1370 /// 1371 /// X86 specific call to retrieve cpuid leaf 1372 /// 1373 fn get_cpuid_values( 1374 &self, 1375 function: u32, 1376 index: u32, 1377 xfem: u64, 1378 xss: u64, 1379 ) -> cpu::Result<[u32; 4]> { 1380 self.fd 1381 .get_cpuid_values(function, index, xfem, xss) 1382 .map_err(|e| cpu::HypervisorCpuError::GetCpuidVales(e.into())) 1383 } 1384 1385 #[cfg(target_arch = "x86_64")] 1386 /// 1387 /// Returns the state of the LAPIC (Local Advanced Programmable Interrupt Controller). 1388 /// 1389 fn get_lapic(&self) -> cpu::Result<crate::arch::x86::LapicState> { 1390 Ok(self 1391 .fd 1392 .get_lapic() 1393 .map_err(|e| cpu::HypervisorCpuError::GetlapicState(e.into()))? 1394 .into()) 1395 } 1396 1397 #[cfg(target_arch = "x86_64")] 1398 /// 1399 /// Sets the state of the LAPIC (Local Advanced Programmable Interrupt Controller). 1400 /// 1401 fn set_lapic(&self, lapic: &crate::arch::x86::LapicState) -> cpu::Result<()> { 1402 let lapic: mshv_bindings::LapicState = (*lapic).clone().into(); 1403 self.fd 1404 .set_lapic(&lapic) 1405 .map_err(|e| cpu::HypervisorCpuError::SetLapicState(e.into())) 1406 } 1407 1408 /// 1409 /// Returns the vcpu's current "multiprocessing state". 1410 /// 1411 fn get_mp_state(&self) -> cpu::Result<MpState> { 1412 Ok(MpState::Mshv) 1413 } 1414 1415 /// 1416 /// Sets the vcpu's current "multiprocessing state". 1417 /// 1418 fn set_mp_state(&self, _mp_state: MpState) -> cpu::Result<()> { 1419 Ok(()) 1420 } 1421 1422 #[cfg(target_arch = "x86_64")] 1423 /// 1424 /// Set CPU state for x86_64 guest. 1425 /// 1426 fn set_state(&self, state: &CpuState) -> cpu::Result<()> { 1427 let mut state: VcpuMshvState = state.clone().into(); 1428 self.set_msrs(&state.msrs)?; 1429 self.set_vcpu_events(&state.vcpu_events)?; 1430 self.set_regs(&state.regs.into())?; 1431 self.set_sregs(&state.sregs.into())?; 1432 self.set_fpu(&state.fpu)?; 1433 self.set_xcrs(&state.xcrs)?; 1434 // These registers are global and needed to be set only for first VCPU 1435 // as Microsoft Hypervisor allows setting this register for only one VCPU 1436 if self.vp_index == 0 { 1437 self.fd 1438 .set_misc_regs(&state.misc) 1439 .map_err(|e| cpu::HypervisorCpuError::SetMiscRegs(e.into()))? 1440 } 1441 self.fd 1442 .set_debug_regs(&state.dbg) 1443 .map_err(|e| cpu::HypervisorCpuError::SetDebugRegs(e.into()))?; 1444 self.fd 1445 .set_all_vp_state_components(&mut state.vp_states) 1446 .map_err(|e| cpu::HypervisorCpuError::SetAllVpStateComponents(e.into()))?; 1447 Ok(()) 1448 } 1449 1450 #[cfg(target_arch = "aarch64")] 1451 /// 1452 /// Set CPU state for aarch64 guest. 1453 /// 1454 fn set_state(&self, state: &CpuState) -> cpu::Result<()> { 1455 unimplemented!() 1456 } 1457 1458 #[cfg(target_arch = "x86_64")] 1459 /// 1460 /// Get CPU State for x86_64 guest 1461 /// 1462 fn state(&self) -> cpu::Result<CpuState> { 1463 let regs = self.get_regs()?; 1464 let sregs = self.get_sregs()?; 1465 let xcrs = self.get_xcrs()?; 1466 let fpu = self.get_fpu()?; 1467 let vcpu_events = self.get_vcpu_events()?; 1468 let mut msrs = self.msrs.clone(); 1469 self.get_msrs(&mut msrs)?; 1470 let misc = self 1471 .fd 1472 .get_misc_regs() 1473 .map_err(|e| cpu::HypervisorCpuError::GetMiscRegs(e.into()))?; 1474 let dbg = self 1475 .fd 1476 .get_debug_regs() 1477 .map_err(|e| cpu::HypervisorCpuError::GetDebugRegs(e.into()))?; 1478 let vp_states = self 1479 .fd 1480 .get_all_vp_state_components() 1481 .map_err(|e| cpu::HypervisorCpuError::GetAllVpStateComponents(e.into()))?; 1482 1483 Ok(VcpuMshvState { 1484 msrs, 1485 vcpu_events, 1486 regs: regs.into(), 1487 sregs: sregs.into(), 1488 fpu, 1489 xcrs, 1490 dbg, 1491 misc, 1492 vp_states, 1493 } 1494 .into()) 1495 } 1496 1497 #[cfg(target_arch = "aarch64")] 1498 /// 1499 /// Get CPU state for aarch64 guest. 1500 /// 1501 fn state(&self) -> cpu::Result<CpuState> { 1502 unimplemented!() 1503 } 1504 1505 #[cfg(target_arch = "x86_64")] 1506 /// 1507 /// Translate guest virtual address to guest physical address 1508 /// 1509 fn translate_gva(&self, gva: u64, flags: u64) -> cpu::Result<(u64, u32)> { 1510 let r = self 1511 .fd 1512 .translate_gva(gva, flags) 1513 .map_err(|e| cpu::HypervisorCpuError::TranslateVirtualAddress(e.into()))?; 1514 1515 let gpa = r.0; 1516 // SAFETY: r is valid, otherwise this function will have returned 1517 let result_code = unsafe { r.1.__bindgen_anon_1.result_code }; 1518 1519 Ok((gpa, result_code)) 1520 } 1521 1522 #[cfg(target_arch = "x86_64")] 1523 /// 1524 /// Return the list of initial MSR entries for a VCPU 1525 /// 1526 fn boot_msr_entries(&self) -> Vec<MsrEntry> { 1527 use crate::arch::x86::{msr_index, MTRR_ENABLE, MTRR_MEM_TYPE_WB}; 1528 1529 [ 1530 msr!(msr_index::MSR_IA32_SYSENTER_CS), 1531 msr!(msr_index::MSR_IA32_SYSENTER_ESP), 1532 msr!(msr_index::MSR_IA32_SYSENTER_EIP), 1533 msr!(msr_index::MSR_STAR), 1534 msr!(msr_index::MSR_CSTAR), 1535 msr!(msr_index::MSR_LSTAR), 1536 msr!(msr_index::MSR_KERNEL_GS_BASE), 1537 msr!(msr_index::MSR_SYSCALL_MASK), 1538 msr_data!(msr_index::MSR_MTRRdefType, MTRR_ENABLE | MTRR_MEM_TYPE_WB), 1539 ] 1540 .to_vec() 1541 } 1542 1543 /// 1544 /// Sets the AMD specific vcpu's sev control register. 1545 /// 1546 #[cfg(feature = "sev_snp")] 1547 fn set_sev_control_register(&self, vmsa_pfn: u64) -> cpu::Result<()> { 1548 let sev_control_reg = snp::get_sev_control_register(vmsa_pfn); 1549 1550 self.fd 1551 .set_sev_control_register(sev_control_reg) 1552 .map_err(|e| cpu::HypervisorCpuError::SetSevControlRegister(e.into())) 1553 } 1554 #[cfg(target_arch = "x86_64")] 1555 /// 1556 /// Trigger NMI interrupt 1557 /// 1558 fn nmi(&self) -> cpu::Result<()> { 1559 let cfg = InterruptRequest { 1560 interrupt_type: hv_interrupt_type_HV_X64_INTERRUPT_TYPE_NMI, 1561 apic_id: self.vp_index as u64, 1562 level_triggered: false, 1563 vector: 0, 1564 logical_destination_mode: false, 1565 long_mode: false, 1566 }; 1567 self.vm_fd 1568 .request_virtual_interrupt(&cfg) 1569 .map_err(|e| cpu::HypervisorCpuError::Nmi(e.into())) 1570 } 1571 } 1572 1573 impl MshvVcpu { 1574 /// 1575 /// Deactivate previously used GHCB page. 1576 /// 1577 #[cfg(feature = "sev_snp")] 1578 fn disable_prev_ghcb_page(&self) -> cpu::Result<()> { 1579 let mut reg_assocs = [hv_register_assoc { 1580 name: hv_register_name_HV_X64_REGISTER_SEV_GHCB_GPA, 1581 ..Default::default() 1582 }]; 1583 self.fd.get_reg(&mut reg_assocs).unwrap(); 1584 // SAFETY: Accessing a union element from bindgen generated bindings. 1585 let prev_ghcb_gpa = unsafe { reg_assocs[0].value.reg64 }; 1586 1587 debug!("Prev GHCB GPA is {:x}", prev_ghcb_gpa); 1588 1589 let mut ghcb_gpa = hv_x64_register_sev_ghcb::default(); 1590 1591 // SAFETY: Accessing a union element from bindgen generated bindings. 1592 unsafe { 1593 ghcb_gpa.__bindgen_anon_1.set_enabled(0); 1594 ghcb_gpa.__bindgen_anon_1.set_page_number(prev_ghcb_gpa); 1595 } 1596 1597 // SAFETY: Accessing a union element from bindgen generated bindings. 1598 let reg_name_value = unsafe { 1599 [( 1600 hv_register_name_HV_X64_REGISTER_SEV_GHCB_GPA, 1601 ghcb_gpa.as_uint64, 1602 )] 1603 }; 1604 1605 set_registers_64!(self.fd, reg_name_value) 1606 .map_err(|e| cpu::HypervisorCpuError::SetRegister(e.into()))?; 1607 1608 Ok(()) 1609 } 1610 #[cfg(target_arch = "x86_64")] 1611 /// 1612 /// X86 specific call that returns the vcpu's current "xcrs". 1613 /// 1614 fn get_xcrs(&self) -> cpu::Result<ExtendedControlRegisters> { 1615 self.fd 1616 .get_xcrs() 1617 .map_err(|e| cpu::HypervisorCpuError::GetXcsr(e.into())) 1618 } 1619 1620 #[cfg(target_arch = "x86_64")] 1621 /// 1622 /// X86 specific call that sets the vcpu's current "xcrs". 1623 /// 1624 fn set_xcrs(&self, xcrs: &ExtendedControlRegisters) -> cpu::Result<()> { 1625 self.fd 1626 .set_xcrs(xcrs) 1627 .map_err(|e| cpu::HypervisorCpuError::SetXcsr(e.into())) 1628 } 1629 1630 #[cfg(target_arch = "x86_64")] 1631 /// 1632 /// Returns currently pending exceptions, interrupts, and NMIs as well as related 1633 /// states of the vcpu. 1634 /// 1635 fn get_vcpu_events(&self) -> cpu::Result<VcpuEvents> { 1636 self.fd 1637 .get_vcpu_events() 1638 .map_err(|e| cpu::HypervisorCpuError::GetVcpuEvents(e.into())) 1639 } 1640 1641 #[cfg(target_arch = "x86_64")] 1642 /// 1643 /// Sets pending exceptions, interrupts, and NMIs as well as related states 1644 /// of the vcpu. 1645 /// 1646 fn set_vcpu_events(&self, events: &VcpuEvents) -> cpu::Result<()> { 1647 self.fd 1648 .set_vcpu_events(events) 1649 .map_err(|e| cpu::HypervisorCpuError::SetVcpuEvents(e.into())) 1650 } 1651 1652 /// 1653 /// Clear SW_EXIT_INFO1 register for SEV-SNP guests. 1654 /// 1655 #[cfg(feature = "sev_snp")] 1656 fn clear_swexit_info1(&self) -> std::result::Result<cpu::VmExit, cpu::HypervisorCpuError> { 1657 // Clear the SW_EXIT_INFO1 register to indicate no error 1658 // Safe to use unwrap, for sev_snp guest we already have the 1659 // GHCB pointer wrapped in the option, otherwise this place is not reached. 1660 let ghcb = self.ghcb.as_ref().unwrap().0; 1661 set_svm_field_u64_ptr!(ghcb, exit_info1, 0); 1662 1663 Ok(cpu::VmExit::Ignore) 1664 } 1665 1666 #[cfg(feature = "sev_snp")] 1667 fn gpa_write(&self, gpa: u64, data: &[u8]) -> cpu::Result<()> { 1668 for (gpa, chunk) in (gpa..) 1669 .step_by(HV_READ_WRITE_GPA_MAX_SIZE as usize) 1670 .zip(data.chunks(HV_READ_WRITE_GPA_MAX_SIZE as usize)) 1671 { 1672 let mut data = [0; HV_READ_WRITE_GPA_MAX_SIZE as usize]; 1673 data[..chunk.len()].copy_from_slice(chunk); 1674 1675 let mut rw_gpa_arg = mshv_bindings::mshv_read_write_gpa { 1676 base_gpa: gpa, 1677 byte_count: chunk.len() as u32, 1678 data, 1679 ..Default::default() 1680 }; 1681 self.fd 1682 .gpa_write(&mut rw_gpa_arg) 1683 .map_err(|e| cpu::HypervisorCpuError::GpaWrite(e.into()))?; 1684 } 1685 1686 Ok(()) 1687 } 1688 } 1689 1690 /// Wrapper over Mshv VM ioctls. 1691 pub struct MshvVm { 1692 fd: Arc<VmFd>, 1693 #[cfg(target_arch = "x86_64")] 1694 msrs: Vec<MsrEntry>, 1695 dirty_log_slots: Arc<RwLock<HashMap<u64, MshvDirtyLogSlot>>>, 1696 #[cfg(feature = "sev_snp")] 1697 sev_snp_enabled: bool, 1698 #[cfg(feature = "sev_snp")] 1699 host_access_pages: ArcSwap<AtomicBitmap>, 1700 } 1701 1702 impl MshvVm { 1703 /// 1704 /// Creates an in-kernel device. 1705 /// 1706 /// See the documentation for `MSHV_CREATE_DEVICE`. 1707 fn create_device(&self, device: &mut CreateDevice) -> vm::Result<VfioDeviceFd> { 1708 let device_fd = self 1709 .fd 1710 .create_device(device) 1711 .map_err(|e| vm::HypervisorVmError::CreateDevice(e.into()))?; 1712 Ok(VfioDeviceFd::new_from_mshv(device_fd)) 1713 } 1714 } 1715 1716 /// 1717 /// Implementation of Vm trait for Mshv 1718 /// 1719 /// # Examples 1720 /// 1721 /// ``` 1722 /// extern crate hypervisor; 1723 /// use hypervisor::mshv::MshvHypervisor; 1724 /// use std::sync::Arc; 1725 /// let mshv = MshvHypervisor::new().unwrap(); 1726 /// let hypervisor = Arc::new(mshv); 1727 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed"); 1728 /// ``` 1729 impl vm::Vm for MshvVm { 1730 #[cfg(target_arch = "x86_64")] 1731 /// 1732 /// Sets the address of the one-page region in the VM's address space. 1733 /// 1734 fn set_identity_map_address(&self, _address: u64) -> vm::Result<()> { 1735 Ok(()) 1736 } 1737 1738 #[cfg(target_arch = "x86_64")] 1739 /// 1740 /// Sets the address of the three-page region in the VM's address space. 1741 /// 1742 fn set_tss_address(&self, _offset: usize) -> vm::Result<()> { 1743 Ok(()) 1744 } 1745 1746 /// 1747 /// Creates an in-kernel interrupt controller. 1748 /// 1749 fn create_irq_chip(&self) -> vm::Result<()> { 1750 Ok(()) 1751 } 1752 1753 /// 1754 /// Registers an event that will, when signaled, trigger the `gsi` IRQ. 1755 /// 1756 fn register_irqfd(&self, fd: &EventFd, gsi: u32) -> vm::Result<()> { 1757 debug!("register_irqfd fd {} gsi {}", fd.as_raw_fd(), gsi); 1758 1759 self.fd 1760 .register_irqfd(fd, gsi) 1761 .map_err(|e| vm::HypervisorVmError::RegisterIrqFd(e.into()))?; 1762 1763 Ok(()) 1764 } 1765 1766 /// 1767 /// Unregisters an event that will, when signaled, trigger the `gsi` IRQ. 1768 /// 1769 fn unregister_irqfd(&self, fd: &EventFd, gsi: u32) -> vm::Result<()> { 1770 debug!("unregister_irqfd fd {} gsi {}", fd.as_raw_fd(), gsi); 1771 1772 self.fd 1773 .unregister_irqfd(fd, gsi) 1774 .map_err(|e| vm::HypervisorVmError::UnregisterIrqFd(e.into()))?; 1775 1776 Ok(()) 1777 } 1778 1779 /// 1780 /// Creates a VcpuFd object from a vcpu RawFd. 1781 /// 1782 fn create_vcpu( 1783 &self, 1784 id: u8, 1785 vm_ops: Option<Arc<dyn VmOps>>, 1786 ) -> vm::Result<Arc<dyn cpu::Vcpu>> { 1787 let vcpu_fd = self 1788 .fd 1789 .create_vcpu(id) 1790 .map_err(|e| vm::HypervisorVmError::CreateVcpu(e.into()))?; 1791 1792 /* Map the GHCB page to the VMM(root) address space 1793 * The map is available after the vcpu creation. This address is mapped 1794 * to the overlay ghcb page of the Microsoft Hypervisor, don't have 1795 * to worry about the scenario when a guest changes the GHCB mapping. 1796 */ 1797 #[cfg(feature = "sev_snp")] 1798 let ghcb = if self.sev_snp_enabled { 1799 // SAFETY: Safe to call as VCPU has this map already available upon creation 1800 let addr = unsafe { 1801 libc::mmap( 1802 std::ptr::null_mut(), 1803 HV_PAGE_SIZE, 1804 libc::PROT_READ | libc::PROT_WRITE, 1805 libc::MAP_SHARED, 1806 vcpu_fd.as_raw_fd(), 1807 MSHV_VP_MMAP_OFFSET_GHCB as i64 * libc::sysconf(libc::_SC_PAGE_SIZE), 1808 ) 1809 }; 1810 if addr == libc::MAP_FAILED { 1811 // No point of continuing, without this mmap VMGEXIT will fail anyway 1812 // Return error 1813 return Err(vm::HypervisorVmError::MmapToRoot); 1814 } 1815 Some(Ghcb(addr as *mut svm_ghcb_base)) 1816 } else { 1817 None 1818 }; 1819 let vcpu = MshvVcpu { 1820 fd: vcpu_fd, 1821 vp_index: id, 1822 #[cfg(target_arch = "x86_64")] 1823 cpuid: Vec::new(), 1824 #[cfg(target_arch = "x86_64")] 1825 msrs: self.msrs.clone(), 1826 vm_ops, 1827 vm_fd: self.fd.clone(), 1828 #[cfg(feature = "sev_snp")] 1829 ghcb, 1830 #[cfg(feature = "sev_snp")] 1831 host_access_pages: ArcSwap::new(self.host_access_pages.load().clone()), 1832 }; 1833 Ok(Arc::new(vcpu)) 1834 } 1835 1836 #[cfg(target_arch = "x86_64")] 1837 fn enable_split_irq(&self) -> vm::Result<()> { 1838 Ok(()) 1839 } 1840 1841 #[cfg(target_arch = "x86_64")] 1842 fn enable_sgx_attribute(&self, _file: File) -> vm::Result<()> { 1843 Ok(()) 1844 } 1845 1846 fn register_ioevent( 1847 &self, 1848 fd: &EventFd, 1849 addr: &IoEventAddress, 1850 datamatch: Option<DataMatch>, 1851 ) -> vm::Result<()> { 1852 #[cfg(feature = "sev_snp")] 1853 if self.sev_snp_enabled { 1854 return Ok(()); 1855 } 1856 1857 let addr = &mshv_ioctls::IoEventAddress::from(*addr); 1858 debug!( 1859 "register_ioevent fd {} addr {:x?} datamatch {:?}", 1860 fd.as_raw_fd(), 1861 addr, 1862 datamatch 1863 ); 1864 if let Some(dm) = datamatch { 1865 match dm { 1866 vm::DataMatch::DataMatch32(mshv_dm32) => self 1867 .fd 1868 .register_ioevent(fd, addr, mshv_dm32) 1869 .map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into())), 1870 vm::DataMatch::DataMatch64(mshv_dm64) => self 1871 .fd 1872 .register_ioevent(fd, addr, mshv_dm64) 1873 .map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into())), 1874 } 1875 } else { 1876 self.fd 1877 .register_ioevent(fd, addr, NoDatamatch) 1878 .map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into())) 1879 } 1880 } 1881 1882 /// Unregister an event from a certain address it has been previously registered to. 1883 fn unregister_ioevent(&self, fd: &EventFd, addr: &IoEventAddress) -> vm::Result<()> { 1884 #[cfg(feature = "sev_snp")] 1885 if self.sev_snp_enabled { 1886 return Ok(()); 1887 } 1888 1889 let addr = &mshv_ioctls::IoEventAddress::from(*addr); 1890 debug!("unregister_ioevent fd {} addr {:x?}", fd.as_raw_fd(), addr); 1891 1892 self.fd 1893 .unregister_ioevent(fd, addr, NoDatamatch) 1894 .map_err(|e| vm::HypervisorVmError::UnregisterIoEvent(e.into())) 1895 } 1896 1897 /// Creates a guest physical memory region. 1898 fn create_user_memory_region(&self, user_memory_region: UserMemoryRegion) -> vm::Result<()> { 1899 let user_memory_region: mshv_user_mem_region = user_memory_region.into(); 1900 // No matter read only or not we keep track the slots. 1901 // For readonly hypervisor can enable the dirty bits, 1902 // but a VM exit happens before setting the dirty bits 1903 self.dirty_log_slots.write().unwrap().insert( 1904 user_memory_region.guest_pfn, 1905 MshvDirtyLogSlot { 1906 guest_pfn: user_memory_region.guest_pfn, 1907 memory_size: user_memory_region.size, 1908 }, 1909 ); 1910 1911 self.fd 1912 .map_user_memory(user_memory_region) 1913 .map_err(|e| vm::HypervisorVmError::CreateUserMemory(e.into()))?; 1914 Ok(()) 1915 } 1916 1917 /// Removes a guest physical memory region. 1918 fn remove_user_memory_region(&self, user_memory_region: UserMemoryRegion) -> vm::Result<()> { 1919 let user_memory_region: mshv_user_mem_region = user_memory_region.into(); 1920 // Remove the corresponding entry from "self.dirty_log_slots" if needed 1921 self.dirty_log_slots 1922 .write() 1923 .unwrap() 1924 .remove(&user_memory_region.guest_pfn); 1925 1926 self.fd 1927 .unmap_user_memory(user_memory_region) 1928 .map_err(|e| vm::HypervisorVmError::RemoveUserMemory(e.into()))?; 1929 Ok(()) 1930 } 1931 1932 fn make_user_memory_region( 1933 &self, 1934 _slot: u32, 1935 guest_phys_addr: u64, 1936 memory_size: u64, 1937 userspace_addr: u64, 1938 readonly: bool, 1939 _log_dirty_pages: bool, 1940 ) -> UserMemoryRegion { 1941 let mut flags = 1 << MSHV_SET_MEM_BIT_EXECUTABLE; 1942 if !readonly { 1943 flags |= 1 << MSHV_SET_MEM_BIT_WRITABLE; 1944 } 1945 1946 mshv_user_mem_region { 1947 flags, 1948 guest_pfn: guest_phys_addr >> PAGE_SHIFT, 1949 size: memory_size, 1950 userspace_addr, 1951 ..Default::default() 1952 } 1953 .into() 1954 } 1955 1956 fn create_passthrough_device(&self) -> vm::Result<VfioDeviceFd> { 1957 let mut vfio_dev = mshv_create_device { 1958 type_: MSHV_DEV_TYPE_VFIO, 1959 fd: 0, 1960 flags: 0, 1961 }; 1962 1963 self.create_device(&mut vfio_dev) 1964 .map_err(|e| vm::HypervisorVmError::CreatePassthroughDevice(e.into())) 1965 } 1966 1967 /// 1968 /// Constructs a routing entry 1969 /// 1970 fn make_routing_entry(&self, gsi: u32, config: &InterruptSourceConfig) -> IrqRoutingEntry { 1971 match config { 1972 InterruptSourceConfig::MsiIrq(cfg) => mshv_user_irq_entry { 1973 gsi, 1974 address_lo: cfg.low_addr, 1975 address_hi: cfg.high_addr, 1976 data: cfg.data, 1977 } 1978 .into(), 1979 _ => { 1980 unreachable!() 1981 } 1982 } 1983 } 1984 1985 fn set_gsi_routing(&self, entries: &[IrqRoutingEntry]) -> vm::Result<()> { 1986 let mut msi_routing = 1987 vec_with_array_field::<mshv_user_irq_table, mshv_user_irq_entry>(entries.len()); 1988 msi_routing[0].nr = entries.len() as u32; 1989 1990 let entries: Vec<mshv_user_irq_entry> = entries 1991 .iter() 1992 .map(|entry| match entry { 1993 IrqRoutingEntry::Mshv(e) => *e, 1994 #[allow(unreachable_patterns)] 1995 _ => panic!("IrqRoutingEntry type is wrong"), 1996 }) 1997 .collect(); 1998 1999 // SAFETY: msi_routing initialized with entries.len() and now it is being turned into 2000 // entries_slice with entries.len() again. It is guaranteed to be large enough to hold 2001 // everything from entries. 2002 unsafe { 2003 let entries_slice: &mut [mshv_user_irq_entry] = 2004 msi_routing[0].entries.as_mut_slice(entries.len()); 2005 entries_slice.copy_from_slice(&entries); 2006 } 2007 2008 self.fd 2009 .set_msi_routing(&msi_routing[0]) 2010 .map_err(|e| vm::HypervisorVmError::SetGsiRouting(e.into())) 2011 } 2012 2013 /// 2014 /// Start logging dirty pages 2015 /// 2016 fn start_dirty_log(&self) -> vm::Result<()> { 2017 self.fd 2018 .enable_dirty_page_tracking() 2019 .map_err(|e| vm::HypervisorVmError::StartDirtyLog(e.into())) 2020 } 2021 2022 /// 2023 /// Stop logging dirty pages 2024 /// 2025 fn stop_dirty_log(&self) -> vm::Result<()> { 2026 let dirty_log_slots = self.dirty_log_slots.read().unwrap(); 2027 // Before disabling the dirty page tracking we need 2028 // to set the dirty bits in the Hypervisor 2029 // This is a requirement from Microsoft Hypervisor 2030 for (_, s) in dirty_log_slots.iter() { 2031 self.fd 2032 .get_dirty_log( 2033 s.guest_pfn, 2034 s.memory_size as usize, 2035 MSHV_GPAP_ACCESS_OP_SET as u8, 2036 ) 2037 .map_err(|e| vm::HypervisorVmError::StartDirtyLog(e.into()))?; 2038 } 2039 self.fd 2040 .disable_dirty_page_tracking() 2041 .map_err(|e| vm::HypervisorVmError::StartDirtyLog(e.into()))?; 2042 Ok(()) 2043 } 2044 2045 /// 2046 /// Get dirty pages bitmap (one bit per page) 2047 /// 2048 fn get_dirty_log(&self, _slot: u32, base_gpa: u64, memory_size: u64) -> vm::Result<Vec<u64>> { 2049 self.fd 2050 .get_dirty_log( 2051 base_gpa >> PAGE_SHIFT, 2052 memory_size as usize, 2053 MSHV_GPAP_ACCESS_OP_CLEAR as u8, 2054 ) 2055 .map_err(|e| vm::HypervisorVmError::GetDirtyLog(e.into())) 2056 } 2057 2058 /// Retrieve guest clock. 2059 #[cfg(target_arch = "x86_64")] 2060 fn get_clock(&self) -> vm::Result<ClockData> { 2061 let val = self 2062 .fd 2063 .get_partition_property(hv_partition_property_code_HV_PARTITION_PROPERTY_REFERENCE_TIME) 2064 .map_err(|e| vm::HypervisorVmError::GetClock(e.into()))?; 2065 Ok(MshvClockData { ref_time: val }.into()) 2066 } 2067 2068 /// Set guest clock. 2069 #[cfg(target_arch = "x86_64")] 2070 fn set_clock(&self, data: &ClockData) -> vm::Result<()> { 2071 let data: MshvClockData = (*data).into(); 2072 self.fd 2073 .set_partition_property( 2074 hv_partition_property_code_HV_PARTITION_PROPERTY_REFERENCE_TIME, 2075 data.ref_time, 2076 ) 2077 .map_err(|e| vm::HypervisorVmError::SetClock(e.into())) 2078 } 2079 2080 /// Downcast to the underlying MshvVm type 2081 fn as_any(&self) -> &dyn Any { 2082 self 2083 } 2084 2085 /// Initialize the SEV-SNP VM 2086 #[cfg(feature = "sev_snp")] 2087 fn sev_snp_init(&self) -> vm::Result<()> { 2088 self.fd 2089 .set_partition_property( 2090 hv_partition_property_code_HV_PARTITION_PROPERTY_ISOLATION_STATE, 2091 hv_partition_isolation_state_HV_PARTITION_ISOLATION_SECURE as u64, 2092 ) 2093 .map_err(|e| vm::HypervisorVmError::InitializeSevSnp(e.into())) 2094 } 2095 2096 /// 2097 /// Importing isolated pages, these pages will be used 2098 /// for the PSP(Platform Security Processor) measurement. 2099 #[cfg(feature = "sev_snp")] 2100 fn import_isolated_pages( 2101 &self, 2102 page_type: u32, 2103 page_size: u32, 2104 pages: &[u64], 2105 ) -> vm::Result<()> { 2106 debug_assert!(page_size == hv_isolated_page_size_HV_ISOLATED_PAGE_SIZE_4KB); 2107 if pages.is_empty() { 2108 return Ok(()); 2109 } 2110 2111 let mut isolated_pages = 2112 vec_with_array_field::<mshv_import_isolated_pages, u64>(pages.len()); 2113 isolated_pages[0].page_type = page_type as u8; 2114 isolated_pages[0].page_count = pages.len() as u64; 2115 // SAFETY: isolated_pages initialized with pages.len() and now it is being turned into 2116 // pages_slice with pages.len() again. It is guaranteed to be large enough to hold 2117 // everything from pages. 2118 unsafe { 2119 let pages_slice: &mut [u64] = isolated_pages[0].guest_pfns.as_mut_slice(pages.len()); 2120 pages_slice.copy_from_slice(pages); 2121 } 2122 self.fd 2123 .import_isolated_pages(&isolated_pages[0]) 2124 .map_err(|e| vm::HypervisorVmError::ImportIsolatedPages(e.into())) 2125 } 2126 2127 /// 2128 /// Complete isolated import, telling the hypervisor that 2129 /// importing the pages to guest memory is complete. 2130 /// 2131 #[cfg(feature = "sev_snp")] 2132 fn complete_isolated_import( 2133 &self, 2134 snp_id_block: IGVM_VHS_SNP_ID_BLOCK, 2135 host_data: [u8; 32], 2136 id_block_enabled: u8, 2137 ) -> vm::Result<()> { 2138 let mut auth_info = hv_snp_id_auth_info { 2139 id_key_algorithm: snp_id_block.id_key_algorithm, 2140 auth_key_algorithm: snp_id_block.author_key_algorithm, 2141 ..Default::default() 2142 }; 2143 // Each of r/s component is 576 bits long 2144 auth_info.id_block_signature[..SIG_R_COMPONENT_SIZE_IN_BYTES] 2145 .copy_from_slice(snp_id_block.id_key_signature.r_comp.as_ref()); 2146 auth_info.id_block_signature 2147 [SIG_R_COMPONENT_SIZE_IN_BYTES..SIG_R_AND_S_COMPONENT_SIZE_IN_BYTES] 2148 .copy_from_slice(snp_id_block.id_key_signature.s_comp.as_ref()); 2149 auth_info.id_key[..ECDSA_CURVE_ID_SIZE_IN_BYTES] 2150 .copy_from_slice(snp_id_block.id_public_key.curve.to_le_bytes().as_ref()); 2151 auth_info.id_key[ECDSA_SIG_X_COMPONENT_START..ECDSA_SIG_X_COMPONENT_END] 2152 .copy_from_slice(snp_id_block.id_public_key.qx.as_ref()); 2153 auth_info.id_key[ECDSA_SIG_Y_COMPONENT_START..ECDSA_SIG_Y_COMPONENT_END] 2154 .copy_from_slice(snp_id_block.id_public_key.qy.as_ref()); 2155 2156 let data = mshv_complete_isolated_import { 2157 import_data: hv_partition_complete_isolated_import_data { 2158 psp_parameters: hv_psp_launch_finish_data { 2159 id_block: hv_snp_id_block { 2160 launch_digest: snp_id_block.ld, 2161 family_id: snp_id_block.family_id, 2162 image_id: snp_id_block.image_id, 2163 version: snp_id_block.version, 2164 guest_svn: snp_id_block.guest_svn, 2165 policy: get_default_snp_guest_policy(), 2166 }, 2167 id_auth_info: auth_info, 2168 host_data, 2169 id_block_enabled, 2170 author_key_enabled: 0, 2171 }, 2172 }, 2173 }; 2174 self.fd 2175 .complete_isolated_import(&data) 2176 .map_err(|e| vm::HypervisorVmError::CompleteIsolatedImport(e.into())) 2177 } 2178 2179 #[cfg(target_arch = "aarch64")] 2180 fn create_vgic(&self, config: VgicConfig) -> vm::Result<Arc<Mutex<dyn Vgic>>> { 2181 unimplemented!() 2182 } 2183 2184 #[cfg(target_arch = "aarch64")] 2185 fn get_preferred_target(&self, kvi: &mut VcpuInit) -> vm::Result<()> { 2186 unimplemented!() 2187 } 2188 2189 /// Pause the VM 2190 fn pause(&self) -> vm::Result<()> { 2191 // Freeze the partition 2192 self.fd 2193 .set_partition_property( 2194 hv_partition_property_code_HV_PARTITION_PROPERTY_TIME_FREEZE, 2195 1u64, 2196 ) 2197 .map_err(|e| { 2198 vm::HypervisorVmError::SetVmProperty(anyhow!( 2199 "Failed to set partition property: {}", 2200 e 2201 )) 2202 }) 2203 } 2204 2205 /// Resume the VM 2206 fn resume(&self) -> vm::Result<()> { 2207 // Resuming the partition using TIME_FREEZE property 2208 self.fd 2209 .set_partition_property( 2210 hv_partition_property_code_HV_PARTITION_PROPERTY_TIME_FREEZE, 2211 0u64, 2212 ) 2213 .map_err(|e| { 2214 vm::HypervisorVmError::SetVmProperty(anyhow!( 2215 "Failed to set partition property: {}", 2216 e 2217 )) 2218 }) 2219 } 2220 2221 #[cfg(feature = "sev_snp")] 2222 fn gain_page_access(&self, gpa: u64, size: u32) -> vm::Result<()> { 2223 use mshv_ioctls::set_bits; 2224 const ONE_GB: usize = 1024 * 1024 * 1024; 2225 2226 if !self.sev_snp_enabled { 2227 return Ok(()); 2228 } 2229 2230 let start_gpfn: u64 = gpa >> PAGE_SHIFT; 2231 let end_gpfn: u64 = (gpa + size as u64 - 1) >> PAGE_SHIFT; 2232 2233 // Enlarge the bitmap if the PFN is greater than the bitmap length 2234 if end_gpfn >= self.host_access_pages.load().as_ref().len() as u64 { 2235 self.host_access_pages.rcu(|bitmap| { 2236 let mut bm = bitmap.as_ref().clone(); 2237 bm.enlarge(ONE_GB); 2238 bm 2239 }); 2240 } 2241 2242 let gpas: Vec<u64> = (start_gpfn..=end_gpfn) 2243 .filter(|x| { 2244 !self 2245 .host_access_pages 2246 .load() 2247 .as_ref() 2248 .is_bit_set(*x as usize) 2249 }) 2250 .map(|x| x << PAGE_SHIFT) 2251 .collect(); 2252 2253 if !gpas.is_empty() { 2254 let mut gpa_list = vec_with_array_field::<mshv_modify_gpa_host_access, u64>(gpas.len()); 2255 gpa_list[0].page_count = gpas.len() as u64; 2256 gpa_list[0].flags = set_bits!( 2257 u8, 2258 MSHV_GPA_HOST_ACCESS_BIT_ACQUIRE, 2259 MSHV_GPA_HOST_ACCESS_BIT_READABLE, 2260 MSHV_GPA_HOST_ACCESS_BIT_WRITABLE 2261 ); 2262 2263 // SAFETY: gpa_list initialized with gpas.len() and now it is being turned into 2264 // gpas_slice with gpas.len() again. It is guaranteed to be large enough to hold 2265 // everything from gpas. 2266 unsafe { 2267 let gpas_slice: &mut [u64] = gpa_list[0].guest_pfns.as_mut_slice(gpas.len()); 2268 gpas_slice.copy_from_slice(gpas.as_slice()); 2269 } 2270 2271 self.fd 2272 .modify_gpa_host_access(&gpa_list[0]) 2273 .map_err(|e| vm::HypervisorVmError::ModifyGpaHostAccess(e.into()))?; 2274 2275 for acquired_gpa in gpas { 2276 self.host_access_pages.rcu(|bitmap| { 2277 let bm = bitmap.clone(); 2278 bm.set_bit((acquired_gpa >> PAGE_SHIFT) as usize); 2279 bm 2280 }); 2281 } 2282 } 2283 2284 Ok(()) 2285 } 2286 } 2287