1 // SPDX-License-Identifier: Apache-2.0 OR BSD-3-Clause 2 // 3 // Copyright © 2020, Microsoft Corporation 4 // 5 6 use std::any::Any; 7 use std::collections::HashMap; 8 #[cfg(feature = "sev_snp")] 9 use std::num::NonZeroUsize; 10 use std::sync::{Arc, RwLock}; 11 12 #[cfg(feature = "sev_snp")] 13 use arc_swap::ArcSwap; 14 use mshv_bindings::*; 15 use mshv_ioctls::{set_registers_64, InterruptRequest, Mshv, NoDatamatch, VcpuFd, VmFd, VmType}; 16 use vfio_ioctls::VfioDeviceFd; 17 use vm::DataMatch; 18 #[cfg(feature = "sev_snp")] 19 use vm_memory::bitmap::AtomicBitmap; 20 21 use crate::arch::emulator::PlatformEmulator; 22 #[cfg(target_arch = "x86_64")] 23 use crate::arch::x86::emulator::Emulator; 24 use crate::mshv::emulator::MshvEmulatorContext; 25 use crate::vm::{self, InterruptSourceConfig, VmOps}; 26 use crate::{cpu, hypervisor, vec_with_array_field, HypervisorType}; 27 #[cfg(feature = "sev_snp")] 28 mod snp_constants; 29 // x86_64 dependencies 30 #[cfg(target_arch = "x86_64")] 31 pub mod x86_64; 32 #[cfg(target_arch = "x86_64")] 33 use std::fs::File; 34 use std::os::unix::io::AsRawFd; 35 36 #[cfg(feature = "sev_snp")] 37 use igvm_defs::IGVM_VHS_SNP_ID_BLOCK; 38 #[cfg(feature = "sev_snp")] 39 use snp_constants::*; 40 use vmm_sys_util::eventfd::EventFd; 41 #[cfg(target_arch = "x86_64")] 42 pub use x86_64::*; 43 #[cfg(target_arch = "x86_64")] 44 pub use x86_64::{emulator, VcpuMshvState}; 45 /// 46 /// Export generically-named wrappers of mshv-bindings for Unix-based platforms 47 /// 48 pub use { 49 mshv_bindings::mshv_create_device as CreateDevice, 50 mshv_bindings::mshv_device_attr as DeviceAttr, mshv_ioctls, mshv_ioctls::DeviceFd, 51 }; 52 53 #[cfg(target_arch = "x86_64")] 54 use crate::arch::x86::{CpuIdEntry, FpuState, MsrEntry}; 55 #[cfg(target_arch = "x86_64")] 56 use crate::ClockData; 57 use crate::{ 58 CpuState, IoEventAddress, IrqRoutingEntry, MpState, UserMemoryRegion, 59 USER_MEMORY_REGION_ADJUSTABLE, USER_MEMORY_REGION_EXECUTE, USER_MEMORY_REGION_READ, 60 USER_MEMORY_REGION_WRITE, 61 }; 62 63 pub const PAGE_SHIFT: usize = 12; 64 65 impl From<mshv_user_mem_region> for UserMemoryRegion { 66 fn from(region: mshv_user_mem_region) -> Self { 67 let mut flags: u32 = USER_MEMORY_REGION_READ | USER_MEMORY_REGION_ADJUSTABLE; 68 if region.flags & (1 << MSHV_SET_MEM_BIT_WRITABLE) != 0 { 69 flags |= USER_MEMORY_REGION_WRITE; 70 } 71 if region.flags & (1 << MSHV_SET_MEM_BIT_EXECUTABLE) != 0 { 72 flags |= USER_MEMORY_REGION_EXECUTE; 73 } 74 75 UserMemoryRegion { 76 guest_phys_addr: (region.guest_pfn << PAGE_SHIFT as u64) 77 + (region.userspace_addr & ((1 << PAGE_SHIFT) - 1)), 78 memory_size: region.size, 79 userspace_addr: region.userspace_addr, 80 flags, 81 ..Default::default() 82 } 83 } 84 } 85 86 #[cfg(target_arch = "x86_64")] 87 impl From<MshvClockData> for ClockData { 88 fn from(d: MshvClockData) -> Self { 89 ClockData::Mshv(d) 90 } 91 } 92 93 #[cfg(target_arch = "x86_64")] 94 impl From<ClockData> for MshvClockData { 95 fn from(ms: ClockData) -> Self { 96 match ms { 97 ClockData::Mshv(s) => s, 98 /* Needed in case other hypervisors are enabled */ 99 #[allow(unreachable_patterns)] 100 _ => unreachable!("MSHV clock data is not valid"), 101 } 102 } 103 } 104 105 impl From<UserMemoryRegion> for mshv_user_mem_region { 106 fn from(region: UserMemoryRegion) -> Self { 107 let mut flags: u8 = 0; 108 if region.flags & USER_MEMORY_REGION_WRITE != 0 { 109 flags |= 1 << MSHV_SET_MEM_BIT_WRITABLE; 110 } 111 if region.flags & USER_MEMORY_REGION_EXECUTE != 0 { 112 flags |= 1 << MSHV_SET_MEM_BIT_EXECUTABLE; 113 } 114 115 mshv_user_mem_region { 116 guest_pfn: region.guest_phys_addr >> PAGE_SHIFT, 117 size: region.memory_size, 118 userspace_addr: region.userspace_addr, 119 flags, 120 ..Default::default() 121 } 122 } 123 } 124 125 impl From<mshv_ioctls::IoEventAddress> for IoEventAddress { 126 fn from(a: mshv_ioctls::IoEventAddress) -> Self { 127 match a { 128 mshv_ioctls::IoEventAddress::Pio(x) => Self::Pio(x), 129 mshv_ioctls::IoEventAddress::Mmio(x) => Self::Mmio(x), 130 } 131 } 132 } 133 134 impl From<IoEventAddress> for mshv_ioctls::IoEventAddress { 135 fn from(a: IoEventAddress) -> Self { 136 match a { 137 IoEventAddress::Pio(x) => Self::Pio(x), 138 IoEventAddress::Mmio(x) => Self::Mmio(x), 139 } 140 } 141 } 142 143 impl From<VcpuMshvState> for CpuState { 144 fn from(s: VcpuMshvState) -> Self { 145 CpuState::Mshv(s) 146 } 147 } 148 149 impl From<CpuState> for VcpuMshvState { 150 fn from(s: CpuState) -> Self { 151 match s { 152 CpuState::Mshv(s) => s, 153 /* Needed in case other hypervisors are enabled */ 154 #[allow(unreachable_patterns)] 155 _ => panic!("CpuState is not valid"), 156 } 157 } 158 } 159 160 impl From<mshv_bindings::StandardRegisters> for crate::StandardRegisters { 161 fn from(s: mshv_bindings::StandardRegisters) -> Self { 162 crate::StandardRegisters::Mshv(s) 163 } 164 } 165 166 impl From<crate::StandardRegisters> for mshv_bindings::StandardRegisters { 167 fn from(e: crate::StandardRegisters) -> Self { 168 match e { 169 crate::StandardRegisters::Mshv(e) => e, 170 /* Needed in case other hypervisors are enabled */ 171 #[allow(unreachable_patterns)] 172 _ => panic!("StandardRegisters are not valid"), 173 } 174 } 175 } 176 177 impl From<mshv_user_irq_entry> for IrqRoutingEntry { 178 fn from(s: mshv_user_irq_entry) -> Self { 179 IrqRoutingEntry::Mshv(s) 180 } 181 } 182 183 impl From<IrqRoutingEntry> for mshv_user_irq_entry { 184 fn from(e: IrqRoutingEntry) -> Self { 185 match e { 186 IrqRoutingEntry::Mshv(e) => e, 187 /* Needed in case other hypervisors are enabled */ 188 #[allow(unreachable_patterns)] 189 _ => panic!("IrqRoutingEntry is not valid"), 190 } 191 } 192 } 193 194 #[cfg(target_arch = "aarch64")] 195 impl From<mshv_bindings::MshvRegList> for crate::RegList { 196 fn from(s: mshv_bindings::MshvRegList) -> Self { 197 crate::RegList::Mshv(s) 198 } 199 } 200 201 #[cfg(target_arch = "aarch64")] 202 impl From<crate::RegList> for mshv_bindings::MshvRegList { 203 fn from(e: crate::RegList) -> Self { 204 match e { 205 crate::RegList::Mshv(e) => e, 206 /* Needed in case other hypervisors are enabled */ 207 #[allow(unreachable_patterns)] 208 _ => panic!("RegList is not valid"), 209 } 210 } 211 } 212 213 #[cfg(target_arch = "aarch64")] 214 impl From<mshv_bindings::MshvVcpuInit> for crate::VcpuInit { 215 fn from(s: mshv_bindings::MshvVcpuInit) -> Self { 216 crate::VcpuInit::Mshv(s) 217 } 218 } 219 220 #[cfg(target_arch = "aarch64")] 221 impl From<crate::VcpuInit> for mshv_bindings::MshvVcpuInit { 222 fn from(e: crate::VcpuInit) -> Self { 223 match e { 224 crate::VcpuInit::Mshv(e) => e, 225 /* Needed in case other hypervisors are enabled */ 226 #[allow(unreachable_patterns)] 227 _ => panic!("VcpuInit is not valid"), 228 } 229 } 230 } 231 232 struct MshvDirtyLogSlot { 233 guest_pfn: u64, 234 memory_size: u64, 235 } 236 237 /// Wrapper over mshv system ioctls. 238 pub struct MshvHypervisor { 239 mshv: Mshv, 240 } 241 242 impl MshvHypervisor { 243 #[cfg(target_arch = "x86_64")] 244 /// 245 /// Retrieve the list of MSRs supported by MSHV. 246 /// 247 fn get_msr_list(&self) -> hypervisor::Result<MsrList> { 248 self.mshv 249 .get_msr_index_list() 250 .map_err(|e| hypervisor::HypervisorError::GetMsrList(e.into())) 251 } 252 253 fn create_vm_with_type_and_memory_int( 254 &self, 255 vm_type: u64, 256 #[cfg(feature = "sev_snp")] _mem_size: Option<u64>, 257 ) -> hypervisor::Result<Arc<dyn crate::Vm>> { 258 let mshv_vm_type: VmType = match VmType::try_from(vm_type) { 259 Ok(vm_type) => vm_type, 260 Err(_) => return Err(hypervisor::HypervisorError::UnsupportedVmType()), 261 }; 262 let fd: VmFd; 263 loop { 264 match self.mshv.create_vm_with_type(mshv_vm_type) { 265 Ok(res) => fd = res, 266 Err(e) => { 267 if e.errno() == libc::EINTR { 268 // If the error returned is EINTR, which means the 269 // ioctl has been interrupted, we have to retry as 270 // this can't be considered as a regular error. 271 continue; 272 } else { 273 return Err(hypervisor::HypervisorError::VmCreate(e.into())); 274 } 275 } 276 } 277 break; 278 } 279 280 // Set additional partition property for SEV-SNP partition. 281 #[cfg(target_arch = "x86_64")] 282 if mshv_vm_type == VmType::Snp { 283 let snp_policy = snp::get_default_snp_guest_policy(); 284 let vmgexit_offloads = snp::get_default_vmgexit_offload_features(); 285 // SAFETY: access union fields 286 unsafe { 287 debug!( 288 "Setting the partition isolation policy as: 0x{:x}", 289 snp_policy.as_uint64 290 ); 291 fd.set_partition_property( 292 hv_partition_property_code_HV_PARTITION_PROPERTY_ISOLATION_POLICY, 293 snp_policy.as_uint64, 294 ) 295 .map_err(|e| hypervisor::HypervisorError::SetPartitionProperty(e.into()))?; 296 debug!( 297 "Setting the partition property to enable VMGEXIT offloads as : 0x{:x}", 298 vmgexit_offloads.as_uint64 299 ); 300 fd.set_partition_property( 301 hv_partition_property_code_HV_PARTITION_PROPERTY_SEV_VMGEXIT_OFFLOADS, 302 vmgexit_offloads.as_uint64, 303 ) 304 .map_err(|e| hypervisor::HypervisorError::SetPartitionProperty(e.into()))?; 305 } 306 } 307 308 // Default Microsoft Hypervisor behavior for unimplemented MSR is to 309 // send a fault to the guest if it tries to access it. It is possible 310 // to override this behavior with a more suitable option i.e., ignore 311 // writes from the guest and return zero in attempt to read unimplemented 312 // MSR. 313 #[cfg(target_arch = "x86_64")] 314 fd.set_partition_property( 315 hv_partition_property_code_HV_PARTITION_PROPERTY_UNIMPLEMENTED_MSR_ACTION, 316 hv_unimplemented_msr_action_HV_UNIMPLEMENTED_MSR_ACTION_IGNORE_WRITE_READ_ZERO as u64, 317 ) 318 .map_err(|e| hypervisor::HypervisorError::SetPartitionProperty(e.into()))?; 319 320 // Always create a frozen partition 321 fd.set_partition_property( 322 hv_partition_property_code_HV_PARTITION_PROPERTY_TIME_FREEZE, 323 1u64, 324 ) 325 .map_err(|e| hypervisor::HypervisorError::SetPartitionProperty(e.into()))?; 326 327 let vm_fd = Arc::new(fd); 328 329 #[cfg(target_arch = "x86_64")] 330 { 331 let msr_list = self.get_msr_list()?; 332 let num_msrs = msr_list.as_fam_struct_ref().nmsrs as usize; 333 let mut msrs: Vec<MsrEntry> = vec![ 334 MsrEntry { 335 ..Default::default() 336 }; 337 num_msrs 338 ]; 339 let indices = msr_list.as_slice(); 340 for (pos, index) in indices.iter().enumerate() { 341 msrs[pos].index = *index; 342 } 343 344 Ok(Arc::new(MshvVm { 345 fd: vm_fd, 346 msrs, 347 dirty_log_slots: Arc::new(RwLock::new(HashMap::new())), 348 #[cfg(feature = "sev_snp")] 349 sev_snp_enabled: mshv_vm_type == VmType::Snp, 350 #[cfg(feature = "sev_snp")] 351 host_access_pages: ArcSwap::new( 352 AtomicBitmap::new( 353 _mem_size.unwrap_or_default() as usize, 354 NonZeroUsize::new(HV_PAGE_SIZE).unwrap(), 355 ) 356 .into(), 357 ), 358 })) 359 } 360 361 #[cfg(target_arch = "aarch64")] 362 { 363 Ok(Arc::new(MshvVm { 364 fd: vm_fd, 365 dirty_log_slots: Arc::new(RwLock::new(HashMap::new())), 366 })) 367 } 368 } 369 } 370 371 impl MshvHypervisor { 372 /// Create a hypervisor based on Mshv 373 #[allow(clippy::new_ret_no_self)] 374 pub fn new() -> hypervisor::Result<Arc<dyn hypervisor::Hypervisor>> { 375 let mshv_obj = 376 Mshv::new().map_err(|e| hypervisor::HypervisorError::HypervisorCreate(e.into()))?; 377 Ok(Arc::new(MshvHypervisor { mshv: mshv_obj })) 378 } 379 /// Check if the hypervisor is available 380 pub fn is_available() -> hypervisor::Result<bool> { 381 match std::fs::metadata("/dev/mshv") { 382 Ok(_) => Ok(true), 383 Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(false), 384 Err(err) => Err(hypervisor::HypervisorError::HypervisorAvailableCheck( 385 err.into(), 386 )), 387 } 388 } 389 } 390 391 /// Implementation of Hypervisor trait for Mshv 392 /// 393 /// # Examples 394 /// 395 /// ``` 396 /// use hypervisor::mshv::MshvHypervisor; 397 /// use std::sync::Arc; 398 /// let mshv = MshvHypervisor::new().unwrap(); 399 /// let hypervisor = Arc::new(mshv); 400 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed"); 401 /// ``` 402 impl hypervisor::Hypervisor for MshvHypervisor { 403 /// 404 /// Returns the type of the hypervisor 405 /// 406 fn hypervisor_type(&self) -> HypervisorType { 407 HypervisorType::Mshv 408 } 409 410 /// 411 /// Create a Vm of a specific type using the underlying hypervisor, passing memory size 412 /// Return a hypervisor-agnostic Vm trait object 413 /// 414 /// # Examples 415 /// 416 /// ``` 417 /// use hypervisor::kvm::KvmHypervisor; 418 /// use hypervisor::kvm::KvmVm; 419 /// let hypervisor = KvmHypervisor::new().unwrap(); 420 /// let vm = hypervisor.create_vm_with_type(0, 512*1024*1024).unwrap(); 421 /// ``` 422 fn create_vm_with_type_and_memory( 423 &self, 424 vm_type: u64, 425 #[cfg(feature = "sev_snp")] _mem_size: u64, 426 ) -> hypervisor::Result<Arc<dyn vm::Vm>> { 427 self.create_vm_with_type_and_memory_int( 428 vm_type, 429 #[cfg(feature = "sev_snp")] 430 Some(_mem_size), 431 ) 432 } 433 434 fn create_vm_with_type(&self, vm_type: u64) -> hypervisor::Result<Arc<dyn crate::Vm>> { 435 self.create_vm_with_type_and_memory_int( 436 vm_type, 437 #[cfg(feature = "sev_snp")] 438 None, 439 ) 440 } 441 442 /// Create a mshv vm object and return the object as Vm trait object 443 /// 444 /// # Examples 445 /// 446 /// ``` 447 /// # extern crate hypervisor; 448 /// use hypervisor::mshv::MshvHypervisor; 449 /// use hypervisor::mshv::MshvVm; 450 /// let hypervisor = MshvHypervisor::new().unwrap(); 451 /// let vm = hypervisor.create_vm().unwrap(); 452 /// ``` 453 fn create_vm(&self) -> hypervisor::Result<Arc<dyn vm::Vm>> { 454 let vm_type = 0; 455 self.create_vm_with_type(vm_type) 456 } 457 #[cfg(target_arch = "x86_64")] 458 /// 459 /// Get the supported CpuID 460 /// 461 fn get_supported_cpuid(&self) -> hypervisor::Result<Vec<CpuIdEntry>> { 462 let mut cpuid = Vec::new(); 463 let functions: [u32; 2] = [0x1, 0xb]; 464 465 for function in functions { 466 cpuid.push(CpuIdEntry { 467 function, 468 ..Default::default() 469 }); 470 } 471 Ok(cpuid) 472 } 473 474 /// Get maximum number of vCPUs 475 fn get_max_vcpus(&self) -> u32 { 476 // TODO: Using HV_MAXIMUM_PROCESSORS would be better 477 // but the ioctl API is limited to u8 478 256 479 } 480 481 fn get_guest_debug_hw_bps(&self) -> usize { 482 0 483 } 484 485 #[cfg(target_arch = "aarch64")] 486 /// 487 /// Retrieve AArch64 host maximum IPA size supported by MSHV. 488 /// 489 fn get_host_ipa_limit(&self) -> i32 { 490 let host_ipa = self.mshv.get_host_partition_property( 491 hv_partition_property_code_HV_PARTITION_PROPERTY_PHYSICAL_ADDRESS_WIDTH as u64, 492 ); 493 494 match host_ipa { 495 Ok(ipa) => ipa, 496 Err(e) => { 497 panic!("Failed to get host IPA limit: {:?}", e); 498 } 499 } 500 } 501 } 502 503 #[cfg(feature = "sev_snp")] 504 struct Ghcb(*mut svm_ghcb_base); 505 506 #[cfg(feature = "sev_snp")] 507 // SAFETY: struct is based on GHCB page in the hypervisor, 508 // safe to Send across threads 509 unsafe impl Send for Ghcb {} 510 511 #[cfg(feature = "sev_snp")] 512 // SAFETY: struct is based on GHCB page in the hypervisor, 513 // safe to Sync across threads as this is only required for Vcpu trait 514 // functionally not used anyway 515 unsafe impl Sync for Ghcb {} 516 517 /// Vcpu struct for Microsoft Hypervisor 518 pub struct MshvVcpu { 519 fd: VcpuFd, 520 vp_index: u8, 521 #[cfg(target_arch = "x86_64")] 522 cpuid: Vec<CpuIdEntry>, 523 #[cfg(target_arch = "x86_64")] 524 msrs: Vec<MsrEntry>, 525 vm_ops: Option<Arc<dyn vm::VmOps>>, 526 vm_fd: Arc<VmFd>, 527 #[cfg(feature = "sev_snp")] 528 ghcb: Option<Ghcb>, 529 #[cfg(feature = "sev_snp")] 530 host_access_pages: ArcSwap<AtomicBitmap>, 531 } 532 533 /// Implementation of Vcpu trait for Microsoft Hypervisor 534 /// 535 /// # Examples 536 /// 537 /// ``` 538 /// use hypervisor::mshv::MshvHypervisor; 539 /// use std::sync::Arc; 540 /// let mshv = MshvHypervisor::new().unwrap(); 541 /// let hypervisor = Arc::new(mshv); 542 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed"); 543 /// let vcpu = vm.create_vcpu(0, None).unwrap(); 544 /// ``` 545 impl cpu::Vcpu for MshvVcpu { 546 /// 547 /// Returns StandardRegisters with default value set 548 /// 549 #[cfg(target_arch = "x86_64")] 550 fn create_standard_regs(&self) -> crate::StandardRegisters { 551 mshv_bindings::StandardRegisters::default().into() 552 } 553 #[cfg(target_arch = "x86_64")] 554 /// 555 /// Returns the vCPU general purpose registers. 556 /// 557 fn get_regs(&self) -> cpu::Result<crate::StandardRegisters> { 558 Ok(self 559 .fd 560 .get_regs() 561 .map_err(|e| cpu::HypervisorCpuError::GetStandardRegs(e.into()))? 562 .into()) 563 } 564 565 #[cfg(target_arch = "x86_64")] 566 /// 567 /// Sets the vCPU general purpose registers. 568 /// 569 fn set_regs(&self, regs: &crate::StandardRegisters) -> cpu::Result<()> { 570 let regs = (*regs).into(); 571 self.fd 572 .set_regs(®s) 573 .map_err(|e| cpu::HypervisorCpuError::SetStandardRegs(e.into())) 574 } 575 576 #[cfg(target_arch = "x86_64")] 577 /// 578 /// Returns the vCPU special registers. 579 /// 580 fn get_sregs(&self) -> cpu::Result<crate::arch::x86::SpecialRegisters> { 581 Ok(self 582 .fd 583 .get_sregs() 584 .map_err(|e| cpu::HypervisorCpuError::GetSpecialRegs(e.into()))? 585 .into()) 586 } 587 588 #[cfg(target_arch = "x86_64")] 589 /// 590 /// Sets the vCPU special registers. 591 /// 592 fn set_sregs(&self, sregs: &crate::arch::x86::SpecialRegisters) -> cpu::Result<()> { 593 let sregs = (*sregs).into(); 594 self.fd 595 .set_sregs(&sregs) 596 .map_err(|e| cpu::HypervisorCpuError::SetSpecialRegs(e.into())) 597 } 598 599 #[cfg(target_arch = "x86_64")] 600 /// 601 /// Returns the floating point state (FPU) from the vCPU. 602 /// 603 fn get_fpu(&self) -> cpu::Result<FpuState> { 604 Ok(self 605 .fd 606 .get_fpu() 607 .map_err(|e| cpu::HypervisorCpuError::GetFloatingPointRegs(e.into()))? 608 .into()) 609 } 610 611 #[cfg(target_arch = "x86_64")] 612 /// 613 /// Set the floating point state (FPU) of a vCPU. 614 /// 615 fn set_fpu(&self, fpu: &FpuState) -> cpu::Result<()> { 616 let fpu: mshv_bindings::FloatingPointUnit = (*fpu).clone().into(); 617 self.fd 618 .set_fpu(&fpu) 619 .map_err(|e| cpu::HypervisorCpuError::SetFloatingPointRegs(e.into())) 620 } 621 622 #[cfg(target_arch = "x86_64")] 623 /// 624 /// Returns the model-specific registers (MSR) for this vCPU. 625 /// 626 fn get_msrs(&self, msrs: &mut Vec<MsrEntry>) -> cpu::Result<usize> { 627 let mshv_msrs: Vec<msr_entry> = msrs.iter().map(|e| (*e).into()).collect(); 628 let mut mshv_msrs = MsrEntries::from_entries(&mshv_msrs).unwrap(); 629 let succ = self 630 .fd 631 .get_msrs(&mut mshv_msrs) 632 .map_err(|e| cpu::HypervisorCpuError::GetMsrEntries(e.into()))?; 633 634 msrs[..succ].copy_from_slice( 635 &mshv_msrs.as_slice()[..succ] 636 .iter() 637 .map(|e| (*e).into()) 638 .collect::<Vec<MsrEntry>>(), 639 ); 640 641 Ok(succ) 642 } 643 644 #[cfg(target_arch = "x86_64")] 645 /// 646 /// Setup the model-specific registers (MSR) for this vCPU. 647 /// Returns the number of MSR entries actually written. 648 /// 649 fn set_msrs(&self, msrs: &[MsrEntry]) -> cpu::Result<usize> { 650 let mshv_msrs: Vec<msr_entry> = msrs.iter().map(|e| (*e).into()).collect(); 651 let mshv_msrs = MsrEntries::from_entries(&mshv_msrs).unwrap(); 652 self.fd 653 .set_msrs(&mshv_msrs) 654 .map_err(|e| cpu::HypervisorCpuError::SetMsrEntries(e.into())) 655 } 656 657 #[cfg(target_arch = "x86_64")] 658 /// 659 /// X86 specific call to enable HyperV SynIC 660 /// 661 fn enable_hyperv_synic(&self) -> cpu::Result<()> { 662 /* We always have SynIC enabled on MSHV */ 663 Ok(()) 664 } 665 666 #[allow(non_upper_case_globals)] 667 fn run(&self) -> std::result::Result<cpu::VmExit, cpu::HypervisorCpuError> { 668 match self.fd.run() { 669 Ok(x) => match x.header.message_type { 670 hv_message_type_HVMSG_X64_HALT => { 671 debug!("HALT"); 672 Ok(cpu::VmExit::Reset) 673 } 674 hv_message_type_HVMSG_UNRECOVERABLE_EXCEPTION => { 675 warn!("TRIPLE FAULT"); 676 Ok(cpu::VmExit::Shutdown) 677 } 678 #[cfg(target_arch = "x86_64")] 679 hv_message_type_HVMSG_X64_IO_PORT_INTERCEPT => { 680 let info = x.to_ioport_info().unwrap(); 681 let access_info = info.access_info; 682 // SAFETY: access_info is valid, otherwise we won't be here 683 let len = unsafe { access_info.__bindgen_anon_1.access_size() } as usize; 684 let is_write = info.header.intercept_access_type == 1; 685 let port = info.port_number; 686 let mut data: [u8; 4] = [0; 4]; 687 let mut ret_rax = info.rax; 688 689 /* 690 * XXX: Ignore QEMU fw_cfg (0x5xx) and debug console (0x402) ports. 691 * 692 * Cloud Hypervisor doesn't support fw_cfg at the moment. It does support 0x402 693 * under the "fwdebug" feature flag. But that feature is not enabled by default 694 * and is considered legacy. 695 * 696 * OVMF unconditionally pokes these IO ports with string IO. 697 * 698 * Instead of trying to implement string IO support now which does not do much 699 * now, skip those ports explicitly to avoid panicking. 700 * 701 * Proper string IO support can be added once we gain the ability to translate 702 * guest virtual addresses to guest physical addresses on MSHV. 703 */ 704 match port { 705 0x402 | 0x510 | 0x511 | 0x514 => { 706 let insn_len = info.header.instruction_length() as u64; 707 708 /* Advance RIP and update RAX */ 709 let arr_reg_name_value = [ 710 ( 711 hv_register_name_HV_X64_REGISTER_RIP, 712 info.header.rip + insn_len, 713 ), 714 (hv_register_name_HV_X64_REGISTER_RAX, ret_rax), 715 ]; 716 set_registers_64!(self.fd, arr_reg_name_value) 717 .map_err(|e| cpu::HypervisorCpuError::SetRegister(e.into()))?; 718 return Ok(cpu::VmExit::Ignore); 719 } 720 _ => {} 721 } 722 723 assert!( 724 // SAFETY: access_info is valid, otherwise we won't be here 725 (unsafe { access_info.__bindgen_anon_1.string_op() } != 1), 726 "String IN/OUT not supported" 727 ); 728 assert!( 729 // SAFETY: access_info is valid, otherwise we won't be here 730 (unsafe { access_info.__bindgen_anon_1.rep_prefix() } != 1), 731 "Rep IN/OUT not supported" 732 ); 733 734 if is_write { 735 let data = (info.rax as u32).to_le_bytes(); 736 if let Some(vm_ops) = &self.vm_ops { 737 vm_ops 738 .pio_write(port.into(), &data[0..len]) 739 .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()))?; 740 } 741 } else { 742 if let Some(vm_ops) = &self.vm_ops { 743 vm_ops 744 .pio_read(port.into(), &mut data[0..len]) 745 .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()))?; 746 } 747 748 let v = u32::from_le_bytes(data); 749 /* Preserve high bits in EAX but clear out high bits in RAX */ 750 let mask = 0xffffffff >> (32 - len * 8); 751 let eax = (info.rax as u32 & !mask) | (v & mask); 752 ret_rax = eax as u64; 753 } 754 755 let insn_len = info.header.instruction_length() as u64; 756 757 /* Advance RIP and update RAX */ 758 let arr_reg_name_value = [ 759 ( 760 hv_register_name_HV_X64_REGISTER_RIP, 761 info.header.rip + insn_len, 762 ), 763 (hv_register_name_HV_X64_REGISTER_RAX, ret_rax), 764 ]; 765 set_registers_64!(self.fd, arr_reg_name_value) 766 .map_err(|e| cpu::HypervisorCpuError::SetRegister(e.into()))?; 767 Ok(cpu::VmExit::Ignore) 768 } 769 #[cfg(target_arch = "x86_64")] 770 msg_type @ (hv_message_type_HVMSG_UNMAPPED_GPA 771 | hv_message_type_HVMSG_GPA_INTERCEPT) => { 772 let info = x.to_memory_info().unwrap(); 773 let insn_len = info.instruction_byte_count as usize; 774 let gva = info.guest_virtual_address; 775 let gpa = info.guest_physical_address; 776 777 debug!("Exit ({:?}) GVA {:x} GPA {:x}", msg_type, gva, gpa); 778 779 let mut context = MshvEmulatorContext { 780 vcpu: self, 781 map: (gva, gpa), 782 }; 783 784 // Create a new emulator. 785 let mut emul = Emulator::new(&mut context); 786 787 // Emulate the trapped instruction, and only the first one. 788 let new_state = emul 789 .emulate_first_insn( 790 self.vp_index as usize, 791 &info.instruction_bytes[..insn_len], 792 ) 793 .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()))?; 794 795 // Set CPU state back. 796 context 797 .set_cpu_state(self.vp_index as usize, new_state) 798 .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()))?; 799 800 Ok(cpu::VmExit::Ignore) 801 } 802 #[cfg(feature = "sev_snp")] 803 hv_message_type_HVMSG_GPA_ATTRIBUTE_INTERCEPT => { 804 let info = x.to_gpa_attribute_info().unwrap(); 805 let host_vis = info.__bindgen_anon_1.host_visibility(); 806 if host_vis >= HV_MAP_GPA_READABLE | HV_MAP_GPA_WRITABLE { 807 warn!("Ignored attribute intercept with full host visibility"); 808 return Ok(cpu::VmExit::Ignore); 809 } 810 811 let num_ranges = info.__bindgen_anon_1.range_count(); 812 assert!(num_ranges >= 1); 813 if num_ranges > 1 { 814 return Err(cpu::HypervisorCpuError::RunVcpu(anyhow!( 815 "Unhandled VCPU exit(GPA_ATTRIBUTE_INTERCEPT): Expected num_ranges to be 1 but found num_ranges {:?}", 816 num_ranges 817 ))); 818 } 819 820 // TODO: we could also deny the request with HvCallCompleteIntercept 821 let mut gpas = Vec::new(); 822 let ranges = info.ranges; 823 let (gfn_start, gfn_count) = snp::parse_gpa_range(ranges[0]).unwrap(); 824 debug!( 825 "Releasing pages: gfn_start: {:x?}, gfn_count: {:?}", 826 gfn_start, gfn_count 827 ); 828 let gpa_start = gfn_start * HV_PAGE_SIZE as u64; 829 for i in 0..gfn_count { 830 gpas.push(gpa_start + i * HV_PAGE_SIZE as u64); 831 } 832 833 let mut gpa_list = 834 vec_with_array_field::<mshv_modify_gpa_host_access, u64>(gpas.len()); 835 gpa_list[0].page_count = gpas.len() as u64; 836 gpa_list[0].flags = 0; 837 if host_vis & HV_MAP_GPA_READABLE != 0 { 838 gpa_list[0].flags |= 1 << MSHV_GPA_HOST_ACCESS_BIT_READABLE; 839 } 840 if host_vis & HV_MAP_GPA_WRITABLE != 0 { 841 gpa_list[0].flags |= 1 << MSHV_GPA_HOST_ACCESS_BIT_WRITABLE; 842 } 843 844 // SAFETY: gpa_list initialized with gpas.len() and now it is being turned into 845 // gpas_slice with gpas.len() again. It is guaranteed to be large enough to hold 846 // everything from gpas. 847 unsafe { 848 let gpas_slice: &mut [u64] = 849 gpa_list[0].guest_pfns.as_mut_slice(gpas.len()); 850 gpas_slice.copy_from_slice(gpas.as_slice()); 851 } 852 853 self.vm_fd 854 .modify_gpa_host_access(&gpa_list[0]) 855 .map_err(|e| cpu::HypervisorCpuError::RunVcpu(anyhow!( 856 "Unhandled VCPU exit: attribute intercept - couldn't modify host access {}", e 857 )))?; 858 // Guest is revoking the shared access, so we need to update the bitmap 859 self.host_access_pages.rcu(|_bitmap| { 860 let bm = self.host_access_pages.load().as_ref().clone(); 861 bm.reset_addr_range(gpa_start as usize, gfn_count as usize); 862 bm 863 }); 864 Ok(cpu::VmExit::Ignore) 865 } 866 #[cfg(target_arch = "x86_64")] 867 hv_message_type_HVMSG_UNACCEPTED_GPA => { 868 let info = x.to_memory_info().unwrap(); 869 let gva = info.guest_virtual_address; 870 let gpa = info.guest_physical_address; 871 872 Err(cpu::HypervisorCpuError::RunVcpu(anyhow!( 873 "Unhandled VCPU exit: Unaccepted GPA({:x}) found at GVA({:x})", 874 gpa, 875 gva, 876 ))) 877 } 878 #[cfg(target_arch = "x86_64")] 879 hv_message_type_HVMSG_X64_CPUID_INTERCEPT => { 880 let info = x.to_cpuid_info().unwrap(); 881 debug!("cpuid eax: {:x}", { info.rax }); 882 Ok(cpu::VmExit::Ignore) 883 } 884 #[cfg(target_arch = "x86_64")] 885 hv_message_type_HVMSG_X64_MSR_INTERCEPT => { 886 let info = x.to_msr_info().unwrap(); 887 if info.header.intercept_access_type == 0 { 888 debug!("msr read: {:x}", { info.msr_number }); 889 } else { 890 debug!("msr write: {:x}", { info.msr_number }); 891 } 892 Ok(cpu::VmExit::Ignore) 893 } 894 #[cfg(target_arch = "x86_64")] 895 hv_message_type_HVMSG_X64_EXCEPTION_INTERCEPT => { 896 //TODO: Handler for VMCALL here. 897 let info = x.to_exception_info().unwrap(); 898 debug!("Exception Info {:?}", { info.exception_vector }); 899 Ok(cpu::VmExit::Ignore) 900 } 901 #[cfg(target_arch = "x86_64")] 902 hv_message_type_HVMSG_X64_APIC_EOI => { 903 let info = x.to_apic_eoi_info().unwrap(); 904 // The kernel should dispatch the EOI to the correct thread. 905 // Check the VP index is the same as the one we have. 906 assert!(info.vp_index == self.vp_index as u32); 907 // The interrupt vector in info is u32, but x86 only supports 256 vectors. 908 // There is no good way to recover from this if the hypervisor messes around. 909 // Just unwrap. 910 Ok(cpu::VmExit::IoapicEoi( 911 info.interrupt_vector.try_into().unwrap(), 912 )) 913 } 914 #[cfg(feature = "sev_snp")] 915 hv_message_type_HVMSG_X64_SEV_VMGEXIT_INTERCEPT => { 916 let info = x.to_vmg_intercept_info().unwrap(); 917 let ghcb_data = info.ghcb_msr >> GHCB_INFO_BIT_WIDTH; 918 let ghcb_msr = svm_ghcb_msr { 919 as_uint64: info.ghcb_msr, 920 }; 921 // Safe to use unwrap, for sev_snp guest we already have the 922 // GHCB pointer wrapped in the option, otherwise this place is not reached. 923 let ghcb = self.ghcb.as_ref().unwrap().0; 924 925 // SAFETY: Accessing a union element from bindgen generated bindings. 926 let ghcb_op = unsafe { ghcb_msr.__bindgen_anon_2.ghcb_info() as u32 }; 927 // Sanity check on the header fields before handling other operations. 928 assert!(info.header.intercept_access_type == HV_INTERCEPT_ACCESS_EXECUTE as u8); 929 930 match ghcb_op { 931 GHCB_INFO_HYP_FEATURE_REQUEST => { 932 // Pre-condition: GHCB data must be zero 933 assert!(ghcb_data == 0); 934 let mut ghcb_response = GHCB_INFO_HYP_FEATURE_RESPONSE as u64; 935 // Indicate support for basic SEV-SNP features 936 ghcb_response |= 937 (GHCB_HYP_FEATURE_SEV_SNP << GHCB_INFO_BIT_WIDTH) as u64; 938 // Indicate support for SEV-SNP AP creation 939 ghcb_response |= (GHCB_HYP_FEATURE_SEV_SNP_AP_CREATION 940 << GHCB_INFO_BIT_WIDTH) 941 as u64; 942 debug!( 943 "GHCB_INFO_HYP_FEATURE_REQUEST: Supported features: {:0x}", 944 ghcb_response 945 ); 946 let arr_reg_name_value = 947 [(hv_register_name_HV_X64_REGISTER_GHCB, ghcb_response)]; 948 set_registers_64!(self.fd, arr_reg_name_value) 949 .map_err(|e| cpu::HypervisorCpuError::SetRegister(e.into()))?; 950 } 951 GHCB_INFO_REGISTER_REQUEST => { 952 let mut ghcb_gpa = hv_x64_register_sev_ghcb::default(); 953 954 // Disable the previously used GHCB page. 955 self.disable_prev_ghcb_page()?; 956 957 // SAFETY: Accessing a union element from bindgen generated bindings. 958 unsafe { 959 ghcb_gpa.__bindgen_anon_1.set_enabled(1); 960 ghcb_gpa 961 .__bindgen_anon_1 962 .set_page_number(ghcb_msr.__bindgen_anon_2.gpa_page_number()); 963 } 964 // SAFETY: Accessing a union element from bindgen generated bindings. 965 let reg_name_value = unsafe { 966 [( 967 hv_register_name_HV_X64_REGISTER_SEV_GHCB_GPA, 968 ghcb_gpa.as_uint64, 969 )] 970 }; 971 972 set_registers_64!(self.fd, reg_name_value) 973 .map_err(|e| cpu::HypervisorCpuError::SetRegister(e.into()))?; 974 975 let mut resp_ghcb_msr = svm_ghcb_msr::default(); 976 // SAFETY: Accessing a union element from bindgen generated bindings. 977 unsafe { 978 resp_ghcb_msr 979 .__bindgen_anon_2 980 .set_ghcb_info(GHCB_INFO_REGISTER_RESPONSE as u64); 981 resp_ghcb_msr.__bindgen_anon_2.set_gpa_page_number( 982 ghcb_msr.__bindgen_anon_2.gpa_page_number(), 983 ); 984 debug!("GHCB GPA is {:x}", ghcb_gpa.as_uint64); 985 } 986 // SAFETY: Accessing a union element from bindgen generated bindings. 987 let reg_name_value = unsafe { 988 [( 989 hv_register_name_HV_X64_REGISTER_GHCB, 990 resp_ghcb_msr.as_uint64, 991 )] 992 }; 993 994 set_registers_64!(self.fd, reg_name_value) 995 .map_err(|e| cpu::HypervisorCpuError::SetRegister(e.into()))?; 996 } 997 GHCB_INFO_SEV_INFO_REQUEST => { 998 let sev_cpuid_function = 0x8000_001F; 999 let cpu_leaf = self 1000 .fd 1001 .get_cpuid_values(sev_cpuid_function, 0, 0, 0) 1002 .unwrap(); 1003 let ebx = cpu_leaf[1]; 1004 // First 6-byte of EBX represents page table encryption bit number 1005 let pbit_encryption = (ebx & 0x3f) as u8; 1006 let mut ghcb_response = GHCB_INFO_SEV_INFO_RESPONSE as u64; 1007 1008 // GHCBData[63:48] specifies the maximum GHCB protocol version supported 1009 ghcb_response |= (GHCB_PROTOCOL_VERSION_MAX as u64) << 48; 1010 // GHCBData[47:32] specifies the minimum GHCB protocol version supported 1011 ghcb_response |= (GHCB_PROTOCOL_VERSION_MIN as u64) << 32; 1012 // GHCBData[31:24] specifies the SEV page table encryption bit number. 1013 ghcb_response |= (pbit_encryption as u64) << 24; 1014 1015 let arr_reg_name_value = 1016 [(hv_register_name_HV_X64_REGISTER_GHCB, ghcb_response)]; 1017 set_registers_64!(self.fd, arr_reg_name_value) 1018 .map_err(|e| cpu::HypervisorCpuError::SetRegister(e.into()))?; 1019 } 1020 GHCB_INFO_NORMAL => { 1021 let exit_code = 1022 info.__bindgen_anon_2.__bindgen_anon_1.sw_exit_code as u32; 1023 1024 match exit_code { 1025 SVM_EXITCODE_HV_DOORBELL_PAGE => { 1026 let exit_info1 = 1027 info.__bindgen_anon_2.__bindgen_anon_1.sw_exit_info1 as u32; 1028 match exit_info1 { 1029 SVM_NAE_HV_DOORBELL_PAGE_GET_PREFERRED => { 1030 // Hypervisor does not have any preference for doorbell GPA. 1031 let preferred_doorbell_gpa: u64 = 0xFFFFFFFFFFFFFFFF; 1032 set_svm_field_u64_ptr!( 1033 ghcb, 1034 exit_info2, 1035 preferred_doorbell_gpa 1036 ); 1037 } 1038 SVM_NAE_HV_DOORBELL_PAGE_SET => { 1039 let exit_info2 = info 1040 .__bindgen_anon_2 1041 .__bindgen_anon_1 1042 .sw_exit_info2; 1043 let mut ghcb_doorbell_gpa = 1044 hv_x64_register_sev_hv_doorbell::default(); 1045 // SAFETY: Accessing a union element from bindgen generated bindings. 1046 unsafe { 1047 ghcb_doorbell_gpa.__bindgen_anon_1.set_enabled(1); 1048 ghcb_doorbell_gpa 1049 .__bindgen_anon_1 1050 .set_page_number(exit_info2 >> PAGE_SHIFT); 1051 } 1052 // SAFETY: Accessing a union element from bindgen generated bindings. 1053 let reg_names = unsafe { 1054 [( 1055 hv_register_name_HV_X64_REGISTER_SEV_DOORBELL_GPA, 1056 ghcb_doorbell_gpa.as_uint64, 1057 )] 1058 }; 1059 set_registers_64!(self.fd, reg_names).map_err(|e| { 1060 cpu::HypervisorCpuError::SetRegister(e.into()) 1061 })?; 1062 1063 set_svm_field_u64_ptr!(ghcb, exit_info2, exit_info2); 1064 1065 // Clear the SW_EXIT_INFO1 register to indicate no error 1066 self.clear_swexit_info1()?; 1067 } 1068 SVM_NAE_HV_DOORBELL_PAGE_QUERY => { 1069 let mut reg_assocs = [ hv_register_assoc { 1070 name: hv_register_name_HV_X64_REGISTER_SEV_DOORBELL_GPA, 1071 ..Default::default() 1072 } ]; 1073 self.fd.get_reg(&mut reg_assocs).unwrap(); 1074 // SAFETY: Accessing a union element from bindgen generated bindings. 1075 let doorbell_gpa = unsafe { reg_assocs[0].value.reg64 }; 1076 1077 set_svm_field_u64_ptr!(ghcb, exit_info2, doorbell_gpa); 1078 1079 // Clear the SW_EXIT_INFO1 register to indicate no error 1080 self.clear_swexit_info1()?; 1081 } 1082 SVM_NAE_HV_DOORBELL_PAGE_CLEAR => { 1083 set_svm_field_u64_ptr!(ghcb, exit_info2, 0); 1084 } 1085 _ => { 1086 panic!( 1087 "SVM_EXITCODE_HV_DOORBELL_PAGE: Unhandled exit code: {:0x}", 1088 exit_info1 1089 ); 1090 } 1091 } 1092 } 1093 SVM_EXITCODE_IOIO_PROT => { 1094 let exit_info1 = 1095 info.__bindgen_anon_2.__bindgen_anon_1.sw_exit_info1 as u32; 1096 let port_info = hv_sev_vmgexit_port_info { 1097 as_uint32: exit_info1, 1098 }; 1099 1100 let port = 1101 // SAFETY: Accessing a union element from bindgen generated bindings. 1102 unsafe { port_info.__bindgen_anon_1.intercepted_port() }; 1103 let mut len = 4; 1104 // SAFETY: Accessing a union element from bindgen generated bindings. 1105 unsafe { 1106 if port_info.__bindgen_anon_1.operand_size_16bit() == 1 { 1107 len = 2; 1108 } else if port_info.__bindgen_anon_1.operand_size_8bit() 1109 == 1 1110 { 1111 len = 1; 1112 } 1113 } 1114 let is_write = 1115 // SAFETY: Accessing a union element from bindgen generated bindings. 1116 unsafe { port_info.__bindgen_anon_1.access_type() == 0 }; 1117 // SAFETY: Accessing the field from a mapped address 1118 let mut data = unsafe { (*ghcb).rax.to_le_bytes() }; 1119 1120 if is_write { 1121 if let Some(vm_ops) = &self.vm_ops { 1122 vm_ops.pio_write(port.into(), &data[..len]).map_err( 1123 |e| cpu::HypervisorCpuError::RunVcpu(e.into()), 1124 )?; 1125 } 1126 } else { 1127 if let Some(vm_ops) = &self.vm_ops { 1128 vm_ops 1129 .pio_read(port.into(), &mut data[..len]) 1130 .map_err(|e| { 1131 cpu::HypervisorCpuError::RunVcpu(e.into()) 1132 })?; 1133 } 1134 set_svm_field_u64_ptr!(ghcb, rax, u64::from_le_bytes(data)); 1135 } 1136 1137 // Clear the SW_EXIT_INFO1 register to indicate no error 1138 self.clear_swexit_info1()?; 1139 } 1140 SVM_EXITCODE_MMIO_READ => { 1141 let src_gpa = 1142 info.__bindgen_anon_2.__bindgen_anon_1.sw_exit_info1; 1143 let data_len = 1144 info.__bindgen_anon_2.__bindgen_anon_1.sw_exit_info2 1145 as usize; 1146 // Sanity check to make sure data len is within supported range. 1147 assert!(data_len <= 0x8); 1148 1149 let mut data: Vec<u8> = vec![0; data_len]; 1150 if let Some(vm_ops) = &self.vm_ops { 1151 vm_ops.mmio_read(src_gpa, &mut data).map_err(|e| { 1152 cpu::HypervisorCpuError::RunVcpu(e.into()) 1153 })?; 1154 } 1155 // Copy the data to the shared buffer of the GHCB page 1156 let mut buffer_data = [0; 8]; 1157 buffer_data[..data_len].copy_from_slice(&data[..data_len]); 1158 // SAFETY: Updating the value of mapped area 1159 unsafe { (*ghcb).shared[0] = u64::from_le_bytes(buffer_data) }; 1160 1161 // Clear the SW_EXIT_INFO1 register to indicate no error 1162 self.clear_swexit_info1()?; 1163 } 1164 SVM_EXITCODE_MMIO_WRITE => { 1165 let dst_gpa = 1166 info.__bindgen_anon_2.__bindgen_anon_1.sw_exit_info1; 1167 let data_len = 1168 info.__bindgen_anon_2.__bindgen_anon_1.sw_exit_info2 1169 as usize; 1170 // Sanity check to make sure data len is within supported range. 1171 assert!(data_len <= 0x8); 1172 1173 let mut data = vec![0; data_len]; 1174 // SAFETY: Accessing data from a mapped address 1175 let bytes_shared_ghcb = 1176 unsafe { (*ghcb).shared[0].to_le_bytes() }; 1177 data.copy_from_slice(&bytes_shared_ghcb[..data_len]); 1178 1179 if let Some(vm_ops) = &self.vm_ops { 1180 vm_ops.mmio_write(dst_gpa, &data).map_err(|e| { 1181 cpu::HypervisorCpuError::RunVcpu(e.into()) 1182 })?; 1183 } 1184 1185 // Clear the SW_EXIT_INFO1 register to indicate no error 1186 self.clear_swexit_info1()?; 1187 } 1188 SVM_EXITCODE_SNP_GUEST_REQUEST 1189 | SVM_EXITCODE_SNP_EXTENDED_GUEST_REQUEST => { 1190 if exit_code == SVM_EXITCODE_SNP_EXTENDED_GUEST_REQUEST { 1191 info!("Fetching extended guest request is not supported"); 1192 // We don't support extended guest request, so we just write empty data. 1193 // This matches the behavior of KVM in Linux 6.11. 1194 1195 // Read RBX from the GHCB. 1196 // SAFETY: Accessing data from a mapped address 1197 let data_gpa = unsafe { (*ghcb).rax }; 1198 // SAFETY: Accessing data from a mapped address 1199 let data_npages = unsafe { (*ghcb).rbx }; 1200 1201 if data_npages > 0 { 1202 // The certificates are terminated by 24 zero bytes. 1203 // TODO: Need to check if data_gpa is the address of the shared buffer in the GHCB page 1204 // in that case we should clear the shared buffer(24 bytes) 1205 self.gpa_write(data_gpa, &[0; 24])?; 1206 } 1207 } 1208 1209 let req_gpa = 1210 info.__bindgen_anon_2.__bindgen_anon_1.sw_exit_info1; 1211 let rsp_gpa = 1212 info.__bindgen_anon_2.__bindgen_anon_1.sw_exit_info2; 1213 1214 let mshv_psp_req = 1215 mshv_issue_psp_guest_request { req_gpa, rsp_gpa }; 1216 self.vm_fd 1217 .psp_issue_guest_request(&mshv_psp_req) 1218 .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()))?; 1219 1220 debug!( 1221 "SNP guest request: req_gpa {:0x} rsp_gpa {:0x}", 1222 req_gpa, rsp_gpa 1223 ); 1224 1225 set_svm_field_u64_ptr!(ghcb, exit_info2, 0); 1226 } 1227 SVM_EXITCODE_SNP_AP_CREATION => { 1228 let vmsa_gpa = 1229 info.__bindgen_anon_2.__bindgen_anon_1.sw_exit_info2; 1230 let apic_id = 1231 info.__bindgen_anon_2.__bindgen_anon_1.sw_exit_info1 >> 32; 1232 debug!( 1233 "SNP AP CREATE REQUEST with VMSA GPA {:0x}, and APIC ID {:?}", 1234 vmsa_gpa, apic_id 1235 ); 1236 1237 let mshv_ap_create_req = mshv_sev_snp_ap_create { 1238 vp_id: apic_id, 1239 vmsa_gpa, 1240 }; 1241 self.vm_fd 1242 .sev_snp_ap_create(&mshv_ap_create_req) 1243 .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()))?; 1244 1245 // Clear the SW_EXIT_INFO1 register to indicate no error 1246 self.clear_swexit_info1()?; 1247 } 1248 _ => panic!( 1249 "GHCB_INFO_NORMAL: Unhandled exit code: {:0x}", 1250 exit_code 1251 ), 1252 } 1253 } 1254 _ => panic!("Unsupported VMGEXIT operation: {:0x}", ghcb_op), 1255 } 1256 1257 Ok(cpu::VmExit::Ignore) 1258 } 1259 exit => Err(cpu::HypervisorCpuError::RunVcpu(anyhow!( 1260 "Unhandled VCPU exit {:?}", 1261 exit 1262 ))), 1263 }, 1264 1265 Err(e) => match e.errno() { 1266 libc::EAGAIN | libc::EINTR => Ok(cpu::VmExit::Ignore), 1267 _ => Err(cpu::HypervisorCpuError::RunVcpu(anyhow!( 1268 "VCPU error {:?}", 1269 e 1270 ))), 1271 }, 1272 } 1273 } 1274 1275 #[cfg(target_arch = "aarch64")] 1276 fn init_pmu(&self, irq: u32) -> cpu::Result<()> { 1277 unimplemented!() 1278 } 1279 1280 #[cfg(target_arch = "aarch64")] 1281 fn has_pmu_support(&self) -> bool { 1282 unimplemented!() 1283 } 1284 1285 #[cfg(target_arch = "aarch64")] 1286 fn setup_regs(&self, cpu_id: u8, boot_ip: u64, fdt_start: u64) -> cpu::Result<()> { 1287 unimplemented!() 1288 } 1289 1290 #[cfg(target_arch = "aarch64")] 1291 fn get_sys_reg(&self, sys_reg: u32) -> cpu::Result<u64> { 1292 unimplemented!() 1293 } 1294 1295 #[cfg(target_arch = "aarch64")] 1296 fn get_reg_list(&self, reg_list: &mut RegList) -> cpu::Result<()> { 1297 unimplemented!() 1298 } 1299 1300 #[cfg(target_arch = "aarch64")] 1301 fn vcpu_init(&self, kvi: &VcpuInit) -> cpu::Result<()> { 1302 unimplemented!() 1303 } 1304 1305 #[cfg(target_arch = "aarch64")] 1306 fn set_regs(&self, regs: &StandardRegisters) -> cpu::Result<()> { 1307 unimplemented!() 1308 } 1309 1310 #[cfg(target_arch = "aarch64")] 1311 fn get_regs(&self) -> cpu::Result<StandardRegisters> { 1312 unimplemented!() 1313 } 1314 1315 #[cfg(target_arch = "aarch64")] 1316 fn vcpu_finalize(&self, _feature: i32) -> cpu::Result<()> { 1317 unimplemented!() 1318 } 1319 1320 #[cfg(target_arch = "aarch64")] 1321 fn vcpu_get_finalized_features(&self) -> i32 { 1322 unimplemented!() 1323 } 1324 1325 #[cfg(target_arch = "aarch64")] 1326 fn vcpu_set_processor_features( 1327 &self, 1328 _vm: &Arc<dyn crate::Vm>, 1329 _kvi: &mut crate::VcpuInit, 1330 _id: u8, 1331 ) -> cpu::Result<()> { 1332 unimplemented!() 1333 } 1334 1335 #[cfg(target_arch = "aarch64")] 1336 fn create_vcpu_init(&self) -> crate::VcpuInit { 1337 unimplemented!(); 1338 } 1339 1340 #[cfg(target_arch = "x86_64")] 1341 /// 1342 /// X86 specific call to setup the CPUID registers. 1343 /// 1344 fn set_cpuid2(&self, cpuid: &[CpuIdEntry]) -> cpu::Result<()> { 1345 let cpuid: Vec<mshv_bindings::hv_cpuid_entry> = cpuid.iter().map(|e| (*e).into()).collect(); 1346 let mshv_cpuid = <CpuId>::from_entries(&cpuid) 1347 .map_err(|_| cpu::HypervisorCpuError::SetCpuid(anyhow!("failed to create CpuId")))?; 1348 1349 self.fd 1350 .register_intercept_result_cpuid(&mshv_cpuid) 1351 .map_err(|e| cpu::HypervisorCpuError::SetCpuid(e.into())) 1352 } 1353 1354 #[cfg(target_arch = "x86_64")] 1355 /// 1356 /// X86 specific call to retrieve the CPUID registers. 1357 /// 1358 fn get_cpuid2(&self, _num_entries: usize) -> cpu::Result<Vec<CpuIdEntry>> { 1359 Ok(self.cpuid.clone()) 1360 } 1361 1362 #[cfg(target_arch = "x86_64")] 1363 /// 1364 /// X86 specific call to retrieve cpuid leaf 1365 /// 1366 fn get_cpuid_values( 1367 &self, 1368 function: u32, 1369 index: u32, 1370 xfem: u64, 1371 xss: u64, 1372 ) -> cpu::Result<[u32; 4]> { 1373 self.fd 1374 .get_cpuid_values(function, index, xfem, xss) 1375 .map_err(|e| cpu::HypervisorCpuError::GetCpuidVales(e.into())) 1376 } 1377 1378 #[cfg(target_arch = "x86_64")] 1379 /// 1380 /// Returns the state of the LAPIC (Local Advanced Programmable Interrupt Controller). 1381 /// 1382 fn get_lapic(&self) -> cpu::Result<crate::arch::x86::LapicState> { 1383 Ok(self 1384 .fd 1385 .get_lapic() 1386 .map_err(|e| cpu::HypervisorCpuError::GetlapicState(e.into()))? 1387 .into()) 1388 } 1389 1390 #[cfg(target_arch = "x86_64")] 1391 /// 1392 /// Sets the state of the LAPIC (Local Advanced Programmable Interrupt Controller). 1393 /// 1394 fn set_lapic(&self, lapic: &crate::arch::x86::LapicState) -> cpu::Result<()> { 1395 let lapic: mshv_bindings::LapicState = (*lapic).clone().into(); 1396 self.fd 1397 .set_lapic(&lapic) 1398 .map_err(|e| cpu::HypervisorCpuError::SetLapicState(e.into())) 1399 } 1400 1401 /// 1402 /// Returns the vcpu's current "multiprocessing state". 1403 /// 1404 fn get_mp_state(&self) -> cpu::Result<MpState> { 1405 Ok(MpState::Mshv) 1406 } 1407 1408 /// 1409 /// Sets the vcpu's current "multiprocessing state". 1410 /// 1411 fn set_mp_state(&self, _mp_state: MpState) -> cpu::Result<()> { 1412 Ok(()) 1413 } 1414 1415 #[cfg(target_arch = "x86_64")] 1416 /// 1417 /// Set CPU state for x86_64 guest. 1418 /// 1419 fn set_state(&self, state: &CpuState) -> cpu::Result<()> { 1420 let mut state: VcpuMshvState = state.clone().into(); 1421 self.set_msrs(&state.msrs)?; 1422 self.set_vcpu_events(&state.vcpu_events)?; 1423 self.set_regs(&state.regs.into())?; 1424 self.set_sregs(&state.sregs.into())?; 1425 self.set_fpu(&state.fpu)?; 1426 self.set_xcrs(&state.xcrs)?; 1427 // These registers are global and needed to be set only for first VCPU 1428 // as Microsoft Hypervisor allows setting this register for only one VCPU 1429 if self.vp_index == 0 { 1430 self.fd 1431 .set_misc_regs(&state.misc) 1432 .map_err(|e| cpu::HypervisorCpuError::SetMiscRegs(e.into()))? 1433 } 1434 self.fd 1435 .set_debug_regs(&state.dbg) 1436 .map_err(|e| cpu::HypervisorCpuError::SetDebugRegs(e.into()))?; 1437 self.fd 1438 .set_all_vp_state_components(&mut state.vp_states) 1439 .map_err(|e| cpu::HypervisorCpuError::SetAllVpStateComponents(e.into()))?; 1440 Ok(()) 1441 } 1442 1443 #[cfg(target_arch = "aarch64")] 1444 /// 1445 /// Set CPU state for aarch64 guest. 1446 /// 1447 fn set_state(&self, state: &CpuState) -> cpu::Result<()> { 1448 unimplemented!() 1449 } 1450 1451 #[cfg(target_arch = "x86_64")] 1452 /// 1453 /// Get CPU State for x86_64 guest 1454 /// 1455 fn state(&self) -> cpu::Result<CpuState> { 1456 let regs = self.get_regs()?; 1457 let sregs = self.get_sregs()?; 1458 let xcrs = self.get_xcrs()?; 1459 let fpu = self.get_fpu()?; 1460 let vcpu_events = self.get_vcpu_events()?; 1461 let mut msrs = self.msrs.clone(); 1462 self.get_msrs(&mut msrs)?; 1463 let misc = self 1464 .fd 1465 .get_misc_regs() 1466 .map_err(|e| cpu::HypervisorCpuError::GetMiscRegs(e.into()))?; 1467 let dbg = self 1468 .fd 1469 .get_debug_regs() 1470 .map_err(|e| cpu::HypervisorCpuError::GetDebugRegs(e.into()))?; 1471 let vp_states = self 1472 .fd 1473 .get_all_vp_state_components() 1474 .map_err(|e| cpu::HypervisorCpuError::GetAllVpStateComponents(e.into()))?; 1475 1476 Ok(VcpuMshvState { 1477 msrs, 1478 vcpu_events, 1479 regs: regs.into(), 1480 sregs: sregs.into(), 1481 fpu, 1482 xcrs, 1483 dbg, 1484 misc, 1485 vp_states, 1486 } 1487 .into()) 1488 } 1489 1490 #[cfg(target_arch = "aarch64")] 1491 /// 1492 /// Get CPU state for aarch64 guest. 1493 /// 1494 fn state(&self) -> cpu::Result<CpuState> { 1495 unimplemented!() 1496 } 1497 1498 #[cfg(target_arch = "x86_64")] 1499 /// 1500 /// Translate guest virtual address to guest physical address 1501 /// 1502 fn translate_gva(&self, gva: u64, flags: u64) -> cpu::Result<(u64, u32)> { 1503 let r = self 1504 .fd 1505 .translate_gva(gva, flags) 1506 .map_err(|e| cpu::HypervisorCpuError::TranslateVirtualAddress(e.into()))?; 1507 1508 let gpa = r.0; 1509 // SAFETY: r is valid, otherwise this function will have returned 1510 let result_code = unsafe { r.1.__bindgen_anon_1.result_code }; 1511 1512 Ok((gpa, result_code)) 1513 } 1514 1515 #[cfg(target_arch = "x86_64")] 1516 /// 1517 /// Return the list of initial MSR entries for a VCPU 1518 /// 1519 fn boot_msr_entries(&self) -> Vec<MsrEntry> { 1520 use crate::arch::x86::{msr_index, MTRR_ENABLE, MTRR_MEM_TYPE_WB}; 1521 1522 [ 1523 msr!(msr_index::MSR_IA32_SYSENTER_CS), 1524 msr!(msr_index::MSR_IA32_SYSENTER_ESP), 1525 msr!(msr_index::MSR_IA32_SYSENTER_EIP), 1526 msr!(msr_index::MSR_STAR), 1527 msr!(msr_index::MSR_CSTAR), 1528 msr!(msr_index::MSR_LSTAR), 1529 msr!(msr_index::MSR_KERNEL_GS_BASE), 1530 msr!(msr_index::MSR_SYSCALL_MASK), 1531 msr_data!(msr_index::MSR_MTRRdefType, MTRR_ENABLE | MTRR_MEM_TYPE_WB), 1532 ] 1533 .to_vec() 1534 } 1535 1536 /// 1537 /// Sets the AMD specific vcpu's sev control register. 1538 /// 1539 #[cfg(feature = "sev_snp")] 1540 fn set_sev_control_register(&self, vmsa_pfn: u64) -> cpu::Result<()> { 1541 let sev_control_reg = snp::get_sev_control_register(vmsa_pfn); 1542 1543 self.fd 1544 .set_sev_control_register(sev_control_reg) 1545 .map_err(|e| cpu::HypervisorCpuError::SetSevControlRegister(e.into())) 1546 } 1547 #[cfg(target_arch = "x86_64")] 1548 /// 1549 /// Trigger NMI interrupt 1550 /// 1551 fn nmi(&self) -> cpu::Result<()> { 1552 let cfg = InterruptRequest { 1553 interrupt_type: hv_interrupt_type_HV_X64_INTERRUPT_TYPE_NMI, 1554 apic_id: self.vp_index as u64, 1555 level_triggered: false, 1556 vector: 0, 1557 logical_destination_mode: false, 1558 long_mode: false, 1559 }; 1560 self.vm_fd 1561 .request_virtual_interrupt(&cfg) 1562 .map_err(|e| cpu::HypervisorCpuError::Nmi(e.into())) 1563 } 1564 } 1565 1566 impl MshvVcpu { 1567 /// 1568 /// Deactivate previously used GHCB page. 1569 /// 1570 #[cfg(feature = "sev_snp")] 1571 fn disable_prev_ghcb_page(&self) -> cpu::Result<()> { 1572 let mut reg_assocs = [hv_register_assoc { 1573 name: hv_register_name_HV_X64_REGISTER_SEV_GHCB_GPA, 1574 ..Default::default() 1575 }]; 1576 self.fd.get_reg(&mut reg_assocs).unwrap(); 1577 // SAFETY: Accessing a union element from bindgen generated bindings. 1578 let prev_ghcb_gpa = unsafe { reg_assocs[0].value.reg64 }; 1579 1580 debug!("Prev GHCB GPA is {:x}", prev_ghcb_gpa); 1581 1582 let mut ghcb_gpa = hv_x64_register_sev_ghcb::default(); 1583 1584 // SAFETY: Accessing a union element from bindgen generated bindings. 1585 unsafe { 1586 ghcb_gpa.__bindgen_anon_1.set_enabled(0); 1587 ghcb_gpa.__bindgen_anon_1.set_page_number(prev_ghcb_gpa); 1588 } 1589 1590 // SAFETY: Accessing a union element from bindgen generated bindings. 1591 let reg_name_value = unsafe { 1592 [( 1593 hv_register_name_HV_X64_REGISTER_SEV_GHCB_GPA, 1594 ghcb_gpa.as_uint64, 1595 )] 1596 }; 1597 1598 set_registers_64!(self.fd, reg_name_value) 1599 .map_err(|e| cpu::HypervisorCpuError::SetRegister(e.into()))?; 1600 1601 Ok(()) 1602 } 1603 #[cfg(target_arch = "x86_64")] 1604 /// 1605 /// X86 specific call that returns the vcpu's current "xcrs". 1606 /// 1607 fn get_xcrs(&self) -> cpu::Result<ExtendedControlRegisters> { 1608 self.fd 1609 .get_xcrs() 1610 .map_err(|e| cpu::HypervisorCpuError::GetXcsr(e.into())) 1611 } 1612 1613 #[cfg(target_arch = "x86_64")] 1614 /// 1615 /// X86 specific call that sets the vcpu's current "xcrs". 1616 /// 1617 fn set_xcrs(&self, xcrs: &ExtendedControlRegisters) -> cpu::Result<()> { 1618 self.fd 1619 .set_xcrs(xcrs) 1620 .map_err(|e| cpu::HypervisorCpuError::SetXcsr(e.into())) 1621 } 1622 1623 #[cfg(target_arch = "x86_64")] 1624 /// 1625 /// Returns currently pending exceptions, interrupts, and NMIs as well as related 1626 /// states of the vcpu. 1627 /// 1628 fn get_vcpu_events(&self) -> cpu::Result<VcpuEvents> { 1629 self.fd 1630 .get_vcpu_events() 1631 .map_err(|e| cpu::HypervisorCpuError::GetVcpuEvents(e.into())) 1632 } 1633 1634 #[cfg(target_arch = "x86_64")] 1635 /// 1636 /// Sets pending exceptions, interrupts, and NMIs as well as related states 1637 /// of the vcpu. 1638 /// 1639 fn set_vcpu_events(&self, events: &VcpuEvents) -> cpu::Result<()> { 1640 self.fd 1641 .set_vcpu_events(events) 1642 .map_err(|e| cpu::HypervisorCpuError::SetVcpuEvents(e.into())) 1643 } 1644 1645 /// 1646 /// Clear SW_EXIT_INFO1 register for SEV-SNP guests. 1647 /// 1648 #[cfg(feature = "sev_snp")] 1649 fn clear_swexit_info1(&self) -> std::result::Result<cpu::VmExit, cpu::HypervisorCpuError> { 1650 // Clear the SW_EXIT_INFO1 register to indicate no error 1651 // Safe to use unwrap, for sev_snp guest we already have the 1652 // GHCB pointer wrapped in the option, otherwise this place is not reached. 1653 let ghcb = self.ghcb.as_ref().unwrap().0; 1654 set_svm_field_u64_ptr!(ghcb, exit_info1, 0); 1655 1656 Ok(cpu::VmExit::Ignore) 1657 } 1658 1659 #[cfg(feature = "sev_snp")] 1660 fn gpa_write(&self, gpa: u64, data: &[u8]) -> cpu::Result<()> { 1661 for (gpa, chunk) in (gpa..) 1662 .step_by(HV_READ_WRITE_GPA_MAX_SIZE as usize) 1663 .zip(data.chunks(HV_READ_WRITE_GPA_MAX_SIZE as usize)) 1664 { 1665 let mut data = [0; HV_READ_WRITE_GPA_MAX_SIZE as usize]; 1666 data[..chunk.len()].copy_from_slice(chunk); 1667 1668 let mut rw_gpa_arg = mshv_bindings::mshv_read_write_gpa { 1669 base_gpa: gpa, 1670 byte_count: chunk.len() as u32, 1671 data, 1672 ..Default::default() 1673 }; 1674 self.fd 1675 .gpa_write(&mut rw_gpa_arg) 1676 .map_err(|e| cpu::HypervisorCpuError::GpaWrite(e.into()))?; 1677 } 1678 1679 Ok(()) 1680 } 1681 } 1682 1683 /// Wrapper over Mshv VM ioctls. 1684 pub struct MshvVm { 1685 fd: Arc<VmFd>, 1686 #[cfg(target_arch = "x86_64")] 1687 msrs: Vec<MsrEntry>, 1688 dirty_log_slots: Arc<RwLock<HashMap<u64, MshvDirtyLogSlot>>>, 1689 #[cfg(feature = "sev_snp")] 1690 sev_snp_enabled: bool, 1691 #[cfg(feature = "sev_snp")] 1692 host_access_pages: ArcSwap<AtomicBitmap>, 1693 } 1694 1695 impl MshvVm { 1696 /// 1697 /// Creates an in-kernel device. 1698 /// 1699 /// See the documentation for `MSHV_CREATE_DEVICE`. 1700 fn create_device(&self, device: &mut CreateDevice) -> vm::Result<VfioDeviceFd> { 1701 let device_fd = self 1702 .fd 1703 .create_device(device) 1704 .map_err(|e| vm::HypervisorVmError::CreateDevice(e.into()))?; 1705 Ok(VfioDeviceFd::new_from_mshv(device_fd)) 1706 } 1707 } 1708 1709 /// 1710 /// Implementation of Vm trait for Mshv 1711 /// 1712 /// # Examples 1713 /// 1714 /// ``` 1715 /// extern crate hypervisor; 1716 /// use hypervisor::mshv::MshvHypervisor; 1717 /// use std::sync::Arc; 1718 /// let mshv = MshvHypervisor::new().unwrap(); 1719 /// let hypervisor = Arc::new(mshv); 1720 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed"); 1721 /// ``` 1722 impl vm::Vm for MshvVm { 1723 #[cfg(target_arch = "x86_64")] 1724 /// 1725 /// Sets the address of the one-page region in the VM's address space. 1726 /// 1727 fn set_identity_map_address(&self, _address: u64) -> vm::Result<()> { 1728 Ok(()) 1729 } 1730 1731 #[cfg(target_arch = "x86_64")] 1732 /// 1733 /// Sets the address of the three-page region in the VM's address space. 1734 /// 1735 fn set_tss_address(&self, _offset: usize) -> vm::Result<()> { 1736 Ok(()) 1737 } 1738 1739 /// 1740 /// Creates an in-kernel interrupt controller. 1741 /// 1742 fn create_irq_chip(&self) -> vm::Result<()> { 1743 Ok(()) 1744 } 1745 1746 /// 1747 /// Registers an event that will, when signaled, trigger the `gsi` IRQ. 1748 /// 1749 fn register_irqfd(&self, fd: &EventFd, gsi: u32) -> vm::Result<()> { 1750 debug!("register_irqfd fd {} gsi {}", fd.as_raw_fd(), gsi); 1751 1752 self.fd 1753 .register_irqfd(fd, gsi) 1754 .map_err(|e| vm::HypervisorVmError::RegisterIrqFd(e.into()))?; 1755 1756 Ok(()) 1757 } 1758 1759 /// 1760 /// Unregisters an event that will, when signaled, trigger the `gsi` IRQ. 1761 /// 1762 fn unregister_irqfd(&self, fd: &EventFd, gsi: u32) -> vm::Result<()> { 1763 debug!("unregister_irqfd fd {} gsi {}", fd.as_raw_fd(), gsi); 1764 1765 self.fd 1766 .unregister_irqfd(fd, gsi) 1767 .map_err(|e| vm::HypervisorVmError::UnregisterIrqFd(e.into()))?; 1768 1769 Ok(()) 1770 } 1771 1772 /// 1773 /// Creates a VcpuFd object from a vcpu RawFd. 1774 /// 1775 fn create_vcpu( 1776 &self, 1777 id: u8, 1778 vm_ops: Option<Arc<dyn VmOps>>, 1779 ) -> vm::Result<Arc<dyn cpu::Vcpu>> { 1780 let vcpu_fd = self 1781 .fd 1782 .create_vcpu(id) 1783 .map_err(|e| vm::HypervisorVmError::CreateVcpu(e.into()))?; 1784 1785 /* Map the GHCB page to the VMM(root) address space 1786 * The map is available after the vcpu creation. This address is mapped 1787 * to the overlay ghcb page of the Microsoft Hypervisor, don't have 1788 * to worry about the scenario when a guest changes the GHCB mapping. 1789 */ 1790 #[cfg(feature = "sev_snp")] 1791 let ghcb = if self.sev_snp_enabled { 1792 // SAFETY: Safe to call as VCPU has this map already available upon creation 1793 let addr = unsafe { 1794 libc::mmap( 1795 std::ptr::null_mut(), 1796 HV_PAGE_SIZE, 1797 libc::PROT_READ | libc::PROT_WRITE, 1798 libc::MAP_SHARED, 1799 vcpu_fd.as_raw_fd(), 1800 MSHV_VP_MMAP_OFFSET_GHCB as i64 * libc::sysconf(libc::_SC_PAGE_SIZE), 1801 ) 1802 }; 1803 if addr == libc::MAP_FAILED { 1804 // No point of continuing, without this mmap VMGEXIT will fail anyway 1805 // Return error 1806 return Err(vm::HypervisorVmError::MmapToRoot); 1807 } 1808 Some(Ghcb(addr as *mut svm_ghcb_base)) 1809 } else { 1810 None 1811 }; 1812 let vcpu = MshvVcpu { 1813 fd: vcpu_fd, 1814 vp_index: id, 1815 #[cfg(target_arch = "x86_64")] 1816 cpuid: Vec::new(), 1817 #[cfg(target_arch = "x86_64")] 1818 msrs: self.msrs.clone(), 1819 vm_ops, 1820 vm_fd: self.fd.clone(), 1821 #[cfg(feature = "sev_snp")] 1822 ghcb, 1823 #[cfg(feature = "sev_snp")] 1824 host_access_pages: ArcSwap::new(self.host_access_pages.load().clone()), 1825 }; 1826 Ok(Arc::new(vcpu)) 1827 } 1828 1829 #[cfg(target_arch = "x86_64")] 1830 fn enable_split_irq(&self) -> vm::Result<()> { 1831 Ok(()) 1832 } 1833 1834 #[cfg(target_arch = "x86_64")] 1835 fn enable_sgx_attribute(&self, _file: File) -> vm::Result<()> { 1836 Ok(()) 1837 } 1838 1839 fn register_ioevent( 1840 &self, 1841 fd: &EventFd, 1842 addr: &IoEventAddress, 1843 datamatch: Option<DataMatch>, 1844 ) -> vm::Result<()> { 1845 #[cfg(feature = "sev_snp")] 1846 if self.sev_snp_enabled { 1847 return Ok(()); 1848 } 1849 1850 let addr = &mshv_ioctls::IoEventAddress::from(*addr); 1851 debug!( 1852 "register_ioevent fd {} addr {:x?} datamatch {:?}", 1853 fd.as_raw_fd(), 1854 addr, 1855 datamatch 1856 ); 1857 if let Some(dm) = datamatch { 1858 match dm { 1859 vm::DataMatch::DataMatch32(mshv_dm32) => self 1860 .fd 1861 .register_ioevent(fd, addr, mshv_dm32) 1862 .map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into())), 1863 vm::DataMatch::DataMatch64(mshv_dm64) => self 1864 .fd 1865 .register_ioevent(fd, addr, mshv_dm64) 1866 .map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into())), 1867 } 1868 } else { 1869 self.fd 1870 .register_ioevent(fd, addr, NoDatamatch) 1871 .map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into())) 1872 } 1873 } 1874 1875 /// Unregister an event from a certain address it has been previously registered to. 1876 fn unregister_ioevent(&self, fd: &EventFd, addr: &IoEventAddress) -> vm::Result<()> { 1877 #[cfg(feature = "sev_snp")] 1878 if self.sev_snp_enabled { 1879 return Ok(()); 1880 } 1881 1882 let addr = &mshv_ioctls::IoEventAddress::from(*addr); 1883 debug!("unregister_ioevent fd {} addr {:x?}", fd.as_raw_fd(), addr); 1884 1885 self.fd 1886 .unregister_ioevent(fd, addr, NoDatamatch) 1887 .map_err(|e| vm::HypervisorVmError::UnregisterIoEvent(e.into())) 1888 } 1889 1890 /// Creates a guest physical memory region. 1891 fn create_user_memory_region(&self, user_memory_region: UserMemoryRegion) -> vm::Result<()> { 1892 let user_memory_region: mshv_user_mem_region = user_memory_region.into(); 1893 // No matter read only or not we keep track the slots. 1894 // For readonly hypervisor can enable the dirty bits, 1895 // but a VM exit happens before setting the dirty bits 1896 self.dirty_log_slots.write().unwrap().insert( 1897 user_memory_region.guest_pfn, 1898 MshvDirtyLogSlot { 1899 guest_pfn: user_memory_region.guest_pfn, 1900 memory_size: user_memory_region.size, 1901 }, 1902 ); 1903 1904 self.fd 1905 .map_user_memory(user_memory_region) 1906 .map_err(|e| vm::HypervisorVmError::CreateUserMemory(e.into()))?; 1907 Ok(()) 1908 } 1909 1910 /// Removes a guest physical memory region. 1911 fn remove_user_memory_region(&self, user_memory_region: UserMemoryRegion) -> vm::Result<()> { 1912 let user_memory_region: mshv_user_mem_region = user_memory_region.into(); 1913 // Remove the corresponding entry from "self.dirty_log_slots" if needed 1914 self.dirty_log_slots 1915 .write() 1916 .unwrap() 1917 .remove(&user_memory_region.guest_pfn); 1918 1919 self.fd 1920 .unmap_user_memory(user_memory_region) 1921 .map_err(|e| vm::HypervisorVmError::RemoveUserMemory(e.into()))?; 1922 Ok(()) 1923 } 1924 1925 fn make_user_memory_region( 1926 &self, 1927 _slot: u32, 1928 guest_phys_addr: u64, 1929 memory_size: u64, 1930 userspace_addr: u64, 1931 readonly: bool, 1932 _log_dirty_pages: bool, 1933 ) -> UserMemoryRegion { 1934 let mut flags = 1 << MSHV_SET_MEM_BIT_EXECUTABLE; 1935 if !readonly { 1936 flags |= 1 << MSHV_SET_MEM_BIT_WRITABLE; 1937 } 1938 1939 mshv_user_mem_region { 1940 flags, 1941 guest_pfn: guest_phys_addr >> PAGE_SHIFT, 1942 size: memory_size, 1943 userspace_addr, 1944 ..Default::default() 1945 } 1946 .into() 1947 } 1948 1949 fn create_passthrough_device(&self) -> vm::Result<VfioDeviceFd> { 1950 let mut vfio_dev = mshv_create_device { 1951 type_: MSHV_DEV_TYPE_VFIO, 1952 fd: 0, 1953 flags: 0, 1954 }; 1955 1956 self.create_device(&mut vfio_dev) 1957 .map_err(|e| vm::HypervisorVmError::CreatePassthroughDevice(e.into())) 1958 } 1959 1960 /// 1961 /// Constructs a routing entry 1962 /// 1963 fn make_routing_entry(&self, gsi: u32, config: &InterruptSourceConfig) -> IrqRoutingEntry { 1964 match config { 1965 InterruptSourceConfig::MsiIrq(cfg) => mshv_user_irq_entry { 1966 gsi, 1967 address_lo: cfg.low_addr, 1968 address_hi: cfg.high_addr, 1969 data: cfg.data, 1970 } 1971 .into(), 1972 _ => { 1973 unreachable!() 1974 } 1975 } 1976 } 1977 1978 fn set_gsi_routing(&self, entries: &[IrqRoutingEntry]) -> vm::Result<()> { 1979 let mut msi_routing = 1980 vec_with_array_field::<mshv_user_irq_table, mshv_user_irq_entry>(entries.len()); 1981 msi_routing[0].nr = entries.len() as u32; 1982 1983 let entries: Vec<mshv_user_irq_entry> = entries 1984 .iter() 1985 .map(|entry| match entry { 1986 IrqRoutingEntry::Mshv(e) => *e, 1987 #[allow(unreachable_patterns)] 1988 _ => panic!("IrqRoutingEntry type is wrong"), 1989 }) 1990 .collect(); 1991 1992 // SAFETY: msi_routing initialized with entries.len() and now it is being turned into 1993 // entries_slice with entries.len() again. It is guaranteed to be large enough to hold 1994 // everything from entries. 1995 unsafe { 1996 let entries_slice: &mut [mshv_user_irq_entry] = 1997 msi_routing[0].entries.as_mut_slice(entries.len()); 1998 entries_slice.copy_from_slice(&entries); 1999 } 2000 2001 self.fd 2002 .set_msi_routing(&msi_routing[0]) 2003 .map_err(|e| vm::HypervisorVmError::SetGsiRouting(e.into())) 2004 } 2005 2006 /// 2007 /// Start logging dirty pages 2008 /// 2009 fn start_dirty_log(&self) -> vm::Result<()> { 2010 self.fd 2011 .enable_dirty_page_tracking() 2012 .map_err(|e| vm::HypervisorVmError::StartDirtyLog(e.into())) 2013 } 2014 2015 /// 2016 /// Stop logging dirty pages 2017 /// 2018 fn stop_dirty_log(&self) -> vm::Result<()> { 2019 let dirty_log_slots = self.dirty_log_slots.read().unwrap(); 2020 // Before disabling the dirty page tracking we need 2021 // to set the dirty bits in the Hypervisor 2022 // This is a requirement from Microsoft Hypervisor 2023 for (_, s) in dirty_log_slots.iter() { 2024 self.fd 2025 .get_dirty_log( 2026 s.guest_pfn, 2027 s.memory_size as usize, 2028 MSHV_GPAP_ACCESS_OP_SET as u8, 2029 ) 2030 .map_err(|e| vm::HypervisorVmError::StartDirtyLog(e.into()))?; 2031 } 2032 self.fd 2033 .disable_dirty_page_tracking() 2034 .map_err(|e| vm::HypervisorVmError::StartDirtyLog(e.into()))?; 2035 Ok(()) 2036 } 2037 2038 /// 2039 /// Get dirty pages bitmap (one bit per page) 2040 /// 2041 fn get_dirty_log(&self, _slot: u32, base_gpa: u64, memory_size: u64) -> vm::Result<Vec<u64>> { 2042 self.fd 2043 .get_dirty_log( 2044 base_gpa >> PAGE_SHIFT, 2045 memory_size as usize, 2046 MSHV_GPAP_ACCESS_OP_CLEAR as u8, 2047 ) 2048 .map_err(|e| vm::HypervisorVmError::GetDirtyLog(e.into())) 2049 } 2050 2051 /// Retrieve guest clock. 2052 #[cfg(target_arch = "x86_64")] 2053 fn get_clock(&self) -> vm::Result<ClockData> { 2054 let val = self 2055 .fd 2056 .get_partition_property(hv_partition_property_code_HV_PARTITION_PROPERTY_REFERENCE_TIME) 2057 .map_err(|e| vm::HypervisorVmError::GetClock(e.into()))?; 2058 Ok(MshvClockData { ref_time: val }.into()) 2059 } 2060 2061 /// Set guest clock. 2062 #[cfg(target_arch = "x86_64")] 2063 fn set_clock(&self, data: &ClockData) -> vm::Result<()> { 2064 let data: MshvClockData = (*data).into(); 2065 self.fd 2066 .set_partition_property( 2067 hv_partition_property_code_HV_PARTITION_PROPERTY_REFERENCE_TIME, 2068 data.ref_time, 2069 ) 2070 .map_err(|e| vm::HypervisorVmError::SetClock(e.into())) 2071 } 2072 2073 /// Downcast to the underlying MshvVm type 2074 fn as_any(&self) -> &dyn Any { 2075 self 2076 } 2077 2078 /// Initialize the SEV-SNP VM 2079 #[cfg(feature = "sev_snp")] 2080 fn sev_snp_init(&self) -> vm::Result<()> { 2081 self.fd 2082 .set_partition_property( 2083 hv_partition_property_code_HV_PARTITION_PROPERTY_ISOLATION_STATE, 2084 hv_partition_isolation_state_HV_PARTITION_ISOLATION_SECURE as u64, 2085 ) 2086 .map_err(|e| vm::HypervisorVmError::InitializeSevSnp(e.into())) 2087 } 2088 2089 /// 2090 /// Importing isolated pages, these pages will be used 2091 /// for the PSP(Platform Security Processor) measurement. 2092 #[cfg(feature = "sev_snp")] 2093 fn import_isolated_pages( 2094 &self, 2095 page_type: u32, 2096 page_size: u32, 2097 pages: &[u64], 2098 ) -> vm::Result<()> { 2099 debug_assert!(page_size == hv_isolated_page_size_HV_ISOLATED_PAGE_SIZE_4KB); 2100 if pages.is_empty() { 2101 return Ok(()); 2102 } 2103 2104 let mut isolated_pages = 2105 vec_with_array_field::<mshv_import_isolated_pages, u64>(pages.len()); 2106 isolated_pages[0].page_type = page_type as u8; 2107 isolated_pages[0].page_count = pages.len() as u64; 2108 // SAFETY: isolated_pages initialized with pages.len() and now it is being turned into 2109 // pages_slice with pages.len() again. It is guaranteed to be large enough to hold 2110 // everything from pages. 2111 unsafe { 2112 let pages_slice: &mut [u64] = isolated_pages[0].guest_pfns.as_mut_slice(pages.len()); 2113 pages_slice.copy_from_slice(pages); 2114 } 2115 self.fd 2116 .import_isolated_pages(&isolated_pages[0]) 2117 .map_err(|e| vm::HypervisorVmError::ImportIsolatedPages(e.into())) 2118 } 2119 2120 /// 2121 /// Complete isolated import, telling the hypervisor that 2122 /// importing the pages to guest memory is complete. 2123 /// 2124 #[cfg(feature = "sev_snp")] 2125 fn complete_isolated_import( 2126 &self, 2127 snp_id_block: IGVM_VHS_SNP_ID_BLOCK, 2128 host_data: [u8; 32], 2129 id_block_enabled: u8, 2130 ) -> vm::Result<()> { 2131 let mut auth_info = hv_snp_id_auth_info { 2132 id_key_algorithm: snp_id_block.id_key_algorithm, 2133 auth_key_algorithm: snp_id_block.author_key_algorithm, 2134 ..Default::default() 2135 }; 2136 // Each of r/s component is 576 bits long 2137 auth_info.id_block_signature[..SIG_R_COMPONENT_SIZE_IN_BYTES] 2138 .copy_from_slice(snp_id_block.id_key_signature.r_comp.as_ref()); 2139 auth_info.id_block_signature 2140 [SIG_R_COMPONENT_SIZE_IN_BYTES..SIG_R_AND_S_COMPONENT_SIZE_IN_BYTES] 2141 .copy_from_slice(snp_id_block.id_key_signature.s_comp.as_ref()); 2142 auth_info.id_key[..ECDSA_CURVE_ID_SIZE_IN_BYTES] 2143 .copy_from_slice(snp_id_block.id_public_key.curve.to_le_bytes().as_ref()); 2144 auth_info.id_key[ECDSA_SIG_X_COMPONENT_START..ECDSA_SIG_X_COMPONENT_END] 2145 .copy_from_slice(snp_id_block.id_public_key.qx.as_ref()); 2146 auth_info.id_key[ECDSA_SIG_Y_COMPONENT_START..ECDSA_SIG_Y_COMPONENT_END] 2147 .copy_from_slice(snp_id_block.id_public_key.qy.as_ref()); 2148 2149 let data = mshv_complete_isolated_import { 2150 import_data: hv_partition_complete_isolated_import_data { 2151 psp_parameters: hv_psp_launch_finish_data { 2152 id_block: hv_snp_id_block { 2153 launch_digest: snp_id_block.ld, 2154 family_id: snp_id_block.family_id, 2155 image_id: snp_id_block.image_id, 2156 version: snp_id_block.version, 2157 guest_svn: snp_id_block.guest_svn, 2158 policy: get_default_snp_guest_policy(), 2159 }, 2160 id_auth_info: auth_info, 2161 host_data, 2162 id_block_enabled, 2163 author_key_enabled: 0, 2164 }, 2165 }, 2166 }; 2167 self.fd 2168 .complete_isolated_import(&data) 2169 .map_err(|e| vm::HypervisorVmError::CompleteIsolatedImport(e.into())) 2170 } 2171 2172 #[cfg(target_arch = "aarch64")] 2173 fn create_vgic(&self, config: VgicConfig) -> vm::Result<Arc<Mutex<dyn Vgic>>> { 2174 unimplemented!() 2175 } 2176 2177 #[cfg(target_arch = "aarch64")] 2178 fn get_preferred_target(&self, kvi: &mut VcpuInit) -> vm::Result<()> { 2179 unimplemented!() 2180 } 2181 2182 /// Pause the VM 2183 fn pause(&self) -> vm::Result<()> { 2184 // Freeze the partition 2185 self.fd 2186 .set_partition_property( 2187 hv_partition_property_code_HV_PARTITION_PROPERTY_TIME_FREEZE, 2188 1u64, 2189 ) 2190 .map_err(|e| { 2191 vm::HypervisorVmError::SetVmProperty(anyhow!( 2192 "Failed to set partition property: {}", 2193 e 2194 )) 2195 }) 2196 } 2197 2198 /// Resume the VM 2199 fn resume(&self) -> vm::Result<()> { 2200 // Resuming the partition using TIME_FREEZE property 2201 self.fd 2202 .set_partition_property( 2203 hv_partition_property_code_HV_PARTITION_PROPERTY_TIME_FREEZE, 2204 0u64, 2205 ) 2206 .map_err(|e| { 2207 vm::HypervisorVmError::SetVmProperty(anyhow!( 2208 "Failed to set partition property: {}", 2209 e 2210 )) 2211 }) 2212 } 2213 2214 #[cfg(feature = "sev_snp")] 2215 fn gain_page_access(&self, gpa: u64, size: u32) -> vm::Result<()> { 2216 use mshv_ioctls::set_bits; 2217 const ONE_GB: usize = 1024 * 1024 * 1024; 2218 2219 if !self.sev_snp_enabled { 2220 return Ok(()); 2221 } 2222 2223 let start_gpfn: u64 = gpa >> PAGE_SHIFT; 2224 let end_gpfn: u64 = (gpa + size as u64 - 1) >> PAGE_SHIFT; 2225 2226 // Enlarge the bitmap if the PFN is greater than the bitmap length 2227 if end_gpfn >= self.host_access_pages.load().as_ref().len() as u64 { 2228 self.host_access_pages.rcu(|bitmap| { 2229 let mut bm = bitmap.as_ref().clone(); 2230 bm.enlarge(ONE_GB); 2231 bm 2232 }); 2233 } 2234 2235 let gpas: Vec<u64> = (start_gpfn..=end_gpfn) 2236 .filter(|x| { 2237 !self 2238 .host_access_pages 2239 .load() 2240 .as_ref() 2241 .is_bit_set(*x as usize) 2242 }) 2243 .map(|x| x << PAGE_SHIFT) 2244 .collect(); 2245 2246 if !gpas.is_empty() { 2247 let mut gpa_list = vec_with_array_field::<mshv_modify_gpa_host_access, u64>(gpas.len()); 2248 gpa_list[0].page_count = gpas.len() as u64; 2249 gpa_list[0].flags = set_bits!( 2250 u8, 2251 MSHV_GPA_HOST_ACCESS_BIT_ACQUIRE, 2252 MSHV_GPA_HOST_ACCESS_BIT_READABLE, 2253 MSHV_GPA_HOST_ACCESS_BIT_WRITABLE 2254 ); 2255 2256 // SAFETY: gpa_list initialized with gpas.len() and now it is being turned into 2257 // gpas_slice with gpas.len() again. It is guaranteed to be large enough to hold 2258 // everything from gpas. 2259 unsafe { 2260 let gpas_slice: &mut [u64] = gpa_list[0].guest_pfns.as_mut_slice(gpas.len()); 2261 gpas_slice.copy_from_slice(gpas.as_slice()); 2262 } 2263 2264 self.fd 2265 .modify_gpa_host_access(&gpa_list[0]) 2266 .map_err(|e| vm::HypervisorVmError::ModifyGpaHostAccess(e.into()))?; 2267 2268 for acquired_gpa in gpas { 2269 self.host_access_pages.rcu(|bitmap| { 2270 let bm = bitmap.clone(); 2271 bm.set_bit((acquired_gpa >> PAGE_SHIFT) as usize); 2272 bm 2273 }); 2274 } 2275 } 2276 2277 Ok(()) 2278 } 2279 } 2280