1 // SPDX-License-Identifier: Apache-2.0 OR BSD-3-Clause 2 // 3 // Copyright © 2020, Microsoft Corporation 4 // 5 6 use std::any::Any; 7 use std::collections::HashMap; 8 #[cfg(feature = "sev_snp")] 9 use std::num::NonZeroUsize; 10 use std::sync::{Arc, RwLock}; 11 12 #[cfg(feature = "sev_snp")] 13 use arc_swap::ArcSwap; 14 use mshv_bindings::*; 15 #[cfg(target_arch = "x86_64")] 16 use mshv_ioctls::{set_registers_64, InterruptRequest}; 17 use mshv_ioctls::{Mshv, NoDatamatch, VcpuFd, VmFd, VmType}; 18 use vfio_ioctls::VfioDeviceFd; 19 use vm::DataMatch; 20 #[cfg(feature = "sev_snp")] 21 use vm_memory::bitmap::AtomicBitmap; 22 23 #[cfg(target_arch = "x86_64")] 24 use crate::arch::emulator::PlatformEmulator; 25 #[cfg(target_arch = "x86_64")] 26 use crate::arch::x86::emulator::Emulator; 27 #[cfg(target_arch = "x86_64")] 28 use crate::mshv::emulator::MshvEmulatorContext; 29 use crate::vm::{self, InterruptSourceConfig, VmOps}; 30 use crate::{cpu, hypervisor, vec_with_array_field, HypervisorType}; 31 #[cfg(feature = "sev_snp")] 32 mod snp_constants; 33 // x86_64 dependencies 34 #[cfg(target_arch = "x86_64")] 35 pub mod x86_64; 36 // aarch64 dependencies 37 #[cfg(target_arch = "aarch64")] 38 pub mod aarch64; 39 #[cfg(target_arch = "x86_64")] 40 use std::fs::File; 41 use std::os::unix::io::AsRawFd; 42 #[cfg(target_arch = "aarch64")] 43 use std::sync::Mutex; 44 45 #[cfg(target_arch = "aarch64")] 46 pub use aarch64::VcpuMshvState; 47 #[cfg(feature = "sev_snp")] 48 use igvm_defs::IGVM_VHS_SNP_ID_BLOCK; 49 #[cfg(feature = "sev_snp")] 50 use snp_constants::*; 51 use vmm_sys_util::eventfd::EventFd; 52 #[cfg(target_arch = "x86_64")] 53 pub use x86_64::*; 54 #[cfg(target_arch = "x86_64")] 55 pub use x86_64::{emulator, VcpuMshvState}; 56 /// 57 /// Export generically-named wrappers of mshv-bindings for Unix-based platforms 58 /// 59 pub use { 60 mshv_bindings::mshv_create_device as CreateDevice, 61 mshv_bindings::mshv_device_attr as DeviceAttr, mshv_ioctls, mshv_ioctls::DeviceFd, 62 }; 63 64 #[cfg(target_arch = "aarch64")] 65 use crate::arch::aarch64::gic::{Vgic, VgicConfig}; 66 #[cfg(target_arch = "x86_64")] 67 use crate::arch::x86::{CpuIdEntry, FpuState, MsrEntry}; 68 #[cfg(target_arch = "x86_64")] 69 use crate::ClockData; 70 use crate::{ 71 CpuState, IoEventAddress, IrqRoutingEntry, MpState, UserMemoryRegion, 72 USER_MEMORY_REGION_ADJUSTABLE, USER_MEMORY_REGION_EXECUTE, USER_MEMORY_REGION_READ, 73 USER_MEMORY_REGION_WRITE, 74 }; 75 76 pub const PAGE_SHIFT: usize = 12; 77 78 impl From<mshv_user_mem_region> for UserMemoryRegion { 79 fn from(region: mshv_user_mem_region) -> Self { 80 let mut flags: u32 = USER_MEMORY_REGION_READ | USER_MEMORY_REGION_ADJUSTABLE; 81 if region.flags & (1 << MSHV_SET_MEM_BIT_WRITABLE) != 0 { 82 flags |= USER_MEMORY_REGION_WRITE; 83 } 84 if region.flags & (1 << MSHV_SET_MEM_BIT_EXECUTABLE) != 0 { 85 flags |= USER_MEMORY_REGION_EXECUTE; 86 } 87 88 UserMemoryRegion { 89 guest_phys_addr: (region.guest_pfn << PAGE_SHIFT as u64) 90 + (region.userspace_addr & ((1 << PAGE_SHIFT) - 1)), 91 memory_size: region.size, 92 userspace_addr: region.userspace_addr, 93 flags, 94 ..Default::default() 95 } 96 } 97 } 98 99 #[cfg(target_arch = "x86_64")] 100 impl From<MshvClockData> for ClockData { 101 fn from(d: MshvClockData) -> Self { 102 ClockData::Mshv(d) 103 } 104 } 105 106 #[cfg(target_arch = "x86_64")] 107 impl From<ClockData> for MshvClockData { 108 fn from(ms: ClockData) -> Self { 109 match ms { 110 ClockData::Mshv(s) => s, 111 /* Needed in case other hypervisors are enabled */ 112 #[allow(unreachable_patterns)] 113 _ => unreachable!("MSHV clock data is not valid"), 114 } 115 } 116 } 117 118 impl From<UserMemoryRegion> for mshv_user_mem_region { 119 fn from(region: UserMemoryRegion) -> Self { 120 let mut flags: u8 = 0; 121 if region.flags & USER_MEMORY_REGION_WRITE != 0 { 122 flags |= 1 << MSHV_SET_MEM_BIT_WRITABLE; 123 } 124 if region.flags & USER_MEMORY_REGION_EXECUTE != 0 { 125 flags |= 1 << MSHV_SET_MEM_BIT_EXECUTABLE; 126 } 127 128 mshv_user_mem_region { 129 guest_pfn: region.guest_phys_addr >> PAGE_SHIFT, 130 size: region.memory_size, 131 userspace_addr: region.userspace_addr, 132 flags, 133 ..Default::default() 134 } 135 } 136 } 137 138 impl From<mshv_ioctls::IoEventAddress> for IoEventAddress { 139 fn from(a: mshv_ioctls::IoEventAddress) -> Self { 140 match a { 141 mshv_ioctls::IoEventAddress::Pio(x) => Self::Pio(x), 142 mshv_ioctls::IoEventAddress::Mmio(x) => Self::Mmio(x), 143 } 144 } 145 } 146 147 impl From<IoEventAddress> for mshv_ioctls::IoEventAddress { 148 fn from(a: IoEventAddress) -> Self { 149 match a { 150 IoEventAddress::Pio(x) => Self::Pio(x), 151 IoEventAddress::Mmio(x) => Self::Mmio(x), 152 } 153 } 154 } 155 156 impl From<VcpuMshvState> for CpuState { 157 fn from(s: VcpuMshvState) -> Self { 158 CpuState::Mshv(s) 159 } 160 } 161 162 impl From<CpuState> for VcpuMshvState { 163 fn from(s: CpuState) -> Self { 164 match s { 165 CpuState::Mshv(s) => s, 166 /* Needed in case other hypervisors are enabled */ 167 #[allow(unreachable_patterns)] 168 _ => panic!("CpuState is not valid"), 169 } 170 } 171 } 172 173 impl From<mshv_bindings::StandardRegisters> for crate::StandardRegisters { 174 fn from(s: mshv_bindings::StandardRegisters) -> Self { 175 crate::StandardRegisters::Mshv(s) 176 } 177 } 178 179 impl From<crate::StandardRegisters> for mshv_bindings::StandardRegisters { 180 fn from(e: crate::StandardRegisters) -> Self { 181 match e { 182 crate::StandardRegisters::Mshv(e) => e, 183 /* Needed in case other hypervisors are enabled */ 184 #[allow(unreachable_patterns)] 185 _ => panic!("StandardRegisters are not valid"), 186 } 187 } 188 } 189 190 impl From<mshv_user_irq_entry> for IrqRoutingEntry { 191 fn from(s: mshv_user_irq_entry) -> Self { 192 IrqRoutingEntry::Mshv(s) 193 } 194 } 195 196 impl From<IrqRoutingEntry> for mshv_user_irq_entry { 197 fn from(e: IrqRoutingEntry) -> Self { 198 match e { 199 IrqRoutingEntry::Mshv(e) => e, 200 /* Needed in case other hypervisors are enabled */ 201 #[allow(unreachable_patterns)] 202 _ => panic!("IrqRoutingEntry is not valid"), 203 } 204 } 205 } 206 207 #[cfg(target_arch = "aarch64")] 208 impl From<mshv_bindings::MshvRegList> for crate::RegList { 209 fn from(s: mshv_bindings::MshvRegList) -> Self { 210 crate::RegList::Mshv(s) 211 } 212 } 213 214 #[cfg(target_arch = "aarch64")] 215 impl From<crate::RegList> for mshv_bindings::MshvRegList { 216 fn from(e: crate::RegList) -> Self { 217 match e { 218 crate::RegList::Mshv(e) => e, 219 /* Needed in case other hypervisors are enabled */ 220 #[allow(unreachable_patterns)] 221 _ => panic!("RegList is not valid"), 222 } 223 } 224 } 225 226 #[cfg(target_arch = "aarch64")] 227 impl From<mshv_bindings::MshvVcpuInit> for crate::VcpuInit { 228 fn from(s: mshv_bindings::MshvVcpuInit) -> Self { 229 crate::VcpuInit::Mshv(s) 230 } 231 } 232 233 #[cfg(target_arch = "aarch64")] 234 impl From<crate::VcpuInit> for mshv_bindings::MshvVcpuInit { 235 fn from(e: crate::VcpuInit) -> Self { 236 match e { 237 crate::VcpuInit::Mshv(e) => e, 238 /* Needed in case other hypervisors are enabled */ 239 #[allow(unreachable_patterns)] 240 _ => panic!("VcpuInit is not valid"), 241 } 242 } 243 } 244 245 struct MshvDirtyLogSlot { 246 guest_pfn: u64, 247 memory_size: u64, 248 } 249 250 /// Wrapper over mshv system ioctls. 251 pub struct MshvHypervisor { 252 mshv: Mshv, 253 } 254 255 impl MshvHypervisor { 256 #[cfg(target_arch = "x86_64")] 257 /// 258 /// Retrieve the list of MSRs supported by MSHV. 259 /// 260 fn get_msr_list(&self) -> hypervisor::Result<MsrList> { 261 self.mshv 262 .get_msr_index_list() 263 .map_err(|e| hypervisor::HypervisorError::GetMsrList(e.into())) 264 } 265 266 fn create_vm_with_type_and_memory_int( 267 &self, 268 vm_type: u64, 269 #[cfg(feature = "sev_snp")] _mem_size: Option<u64>, 270 ) -> hypervisor::Result<Arc<dyn crate::Vm>> { 271 let mshv_vm_type: VmType = match VmType::try_from(vm_type) { 272 Ok(vm_type) => vm_type, 273 Err(_) => return Err(hypervisor::HypervisorError::UnsupportedVmType()), 274 }; 275 let fd: VmFd; 276 loop { 277 match self.mshv.create_vm_with_type(mshv_vm_type) { 278 Ok(res) => fd = res, 279 Err(e) => { 280 if e.errno() == libc::EINTR { 281 // If the error returned is EINTR, which means the 282 // ioctl has been interrupted, we have to retry as 283 // this can't be considered as a regular error. 284 continue; 285 } else { 286 return Err(hypervisor::HypervisorError::VmCreate(e.into())); 287 } 288 } 289 } 290 break; 291 } 292 293 // Set additional partition property for SEV-SNP partition. 294 #[cfg(target_arch = "x86_64")] 295 if mshv_vm_type == VmType::Snp { 296 let snp_policy = snp::get_default_snp_guest_policy(); 297 let vmgexit_offloads = snp::get_default_vmgexit_offload_features(); 298 // SAFETY: access union fields 299 unsafe { 300 debug!( 301 "Setting the partition isolation policy as: 0x{:x}", 302 snp_policy.as_uint64 303 ); 304 fd.set_partition_property( 305 hv_partition_property_code_HV_PARTITION_PROPERTY_ISOLATION_POLICY, 306 snp_policy.as_uint64, 307 ) 308 .map_err(|e| hypervisor::HypervisorError::SetPartitionProperty(e.into()))?; 309 debug!( 310 "Setting the partition property to enable VMGEXIT offloads as : 0x{:x}", 311 vmgexit_offloads.as_uint64 312 ); 313 fd.set_partition_property( 314 hv_partition_property_code_HV_PARTITION_PROPERTY_SEV_VMGEXIT_OFFLOADS, 315 vmgexit_offloads.as_uint64, 316 ) 317 .map_err(|e| hypervisor::HypervisorError::SetPartitionProperty(e.into()))?; 318 } 319 } 320 321 // Default Microsoft Hypervisor behavior for unimplemented MSR is to 322 // send a fault to the guest if it tries to access it. It is possible 323 // to override this behavior with a more suitable option i.e., ignore 324 // writes from the guest and return zero in attempt to read unimplemented 325 // MSR. 326 #[cfg(target_arch = "x86_64")] 327 fd.set_partition_property( 328 hv_partition_property_code_HV_PARTITION_PROPERTY_UNIMPLEMENTED_MSR_ACTION, 329 hv_unimplemented_msr_action_HV_UNIMPLEMENTED_MSR_ACTION_IGNORE_WRITE_READ_ZERO as u64, 330 ) 331 .map_err(|e| hypervisor::HypervisorError::SetPartitionProperty(e.into()))?; 332 333 // Always create a frozen partition 334 fd.set_partition_property( 335 hv_partition_property_code_HV_PARTITION_PROPERTY_TIME_FREEZE, 336 1u64, 337 ) 338 .map_err(|e| hypervisor::HypervisorError::SetPartitionProperty(e.into()))?; 339 340 let vm_fd = Arc::new(fd); 341 342 #[cfg(target_arch = "x86_64")] 343 { 344 let msr_list = self.get_msr_list()?; 345 let num_msrs = msr_list.as_fam_struct_ref().nmsrs as usize; 346 let mut msrs: Vec<MsrEntry> = vec![ 347 MsrEntry { 348 ..Default::default() 349 }; 350 num_msrs 351 ]; 352 let indices = msr_list.as_slice(); 353 for (pos, index) in indices.iter().enumerate() { 354 msrs[pos].index = *index; 355 } 356 357 Ok(Arc::new(MshvVm { 358 fd: vm_fd, 359 msrs, 360 dirty_log_slots: Arc::new(RwLock::new(HashMap::new())), 361 #[cfg(feature = "sev_snp")] 362 sev_snp_enabled: mshv_vm_type == VmType::Snp, 363 #[cfg(feature = "sev_snp")] 364 host_access_pages: ArcSwap::new( 365 AtomicBitmap::new( 366 _mem_size.unwrap_or_default() as usize, 367 NonZeroUsize::new(HV_PAGE_SIZE).unwrap(), 368 ) 369 .into(), 370 ), 371 })) 372 } 373 374 #[cfg(target_arch = "aarch64")] 375 { 376 Ok(Arc::new(MshvVm { 377 fd: vm_fd, 378 dirty_log_slots: Arc::new(RwLock::new(HashMap::new())), 379 })) 380 } 381 } 382 } 383 384 impl MshvHypervisor { 385 /// Create a hypervisor based on Mshv 386 #[allow(clippy::new_ret_no_self)] 387 pub fn new() -> hypervisor::Result<Arc<dyn hypervisor::Hypervisor>> { 388 let mshv_obj = 389 Mshv::new().map_err(|e| hypervisor::HypervisorError::HypervisorCreate(e.into()))?; 390 Ok(Arc::new(MshvHypervisor { mshv: mshv_obj })) 391 } 392 /// Check if the hypervisor is available 393 pub fn is_available() -> hypervisor::Result<bool> { 394 match std::fs::metadata("/dev/mshv") { 395 Ok(_) => Ok(true), 396 Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(false), 397 Err(err) => Err(hypervisor::HypervisorError::HypervisorAvailableCheck( 398 err.into(), 399 )), 400 } 401 } 402 } 403 404 /// Implementation of Hypervisor trait for Mshv 405 /// 406 /// # Examples 407 /// 408 /// ``` 409 /// use hypervisor::mshv::MshvHypervisor; 410 /// use std::sync::Arc; 411 /// let mshv = MshvHypervisor::new().unwrap(); 412 /// let hypervisor = Arc::new(mshv); 413 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed"); 414 /// ``` 415 impl hypervisor::Hypervisor for MshvHypervisor { 416 /// 417 /// Returns the type of the hypervisor 418 /// 419 fn hypervisor_type(&self) -> HypervisorType { 420 HypervisorType::Mshv 421 } 422 423 /// 424 /// Create a Vm of a specific type using the underlying hypervisor, passing memory size 425 /// Return a hypervisor-agnostic Vm trait object 426 /// 427 /// # Examples 428 /// 429 /// ``` 430 /// use hypervisor::kvm::KvmHypervisor; 431 /// use hypervisor::kvm::KvmVm; 432 /// let hypervisor = KvmHypervisor::new().unwrap(); 433 /// let vm = hypervisor.create_vm_with_type(0, 512*1024*1024).unwrap(); 434 /// ``` 435 fn create_vm_with_type_and_memory( 436 &self, 437 vm_type: u64, 438 #[cfg(feature = "sev_snp")] _mem_size: u64, 439 ) -> hypervisor::Result<Arc<dyn vm::Vm>> { 440 self.create_vm_with_type_and_memory_int( 441 vm_type, 442 #[cfg(feature = "sev_snp")] 443 Some(_mem_size), 444 ) 445 } 446 447 fn create_vm_with_type(&self, vm_type: u64) -> hypervisor::Result<Arc<dyn crate::Vm>> { 448 self.create_vm_with_type_and_memory_int( 449 vm_type, 450 #[cfg(feature = "sev_snp")] 451 None, 452 ) 453 } 454 455 /// Create a mshv vm object and return the object as Vm trait object 456 /// 457 /// # Examples 458 /// 459 /// ``` 460 /// # extern crate hypervisor; 461 /// use hypervisor::mshv::MshvHypervisor; 462 /// use hypervisor::mshv::MshvVm; 463 /// let hypervisor = MshvHypervisor::new().unwrap(); 464 /// let vm = hypervisor.create_vm().unwrap(); 465 /// ``` 466 fn create_vm(&self) -> hypervisor::Result<Arc<dyn vm::Vm>> { 467 let vm_type = 0; 468 self.create_vm_with_type(vm_type) 469 } 470 #[cfg(target_arch = "x86_64")] 471 /// 472 /// Get the supported CpuID 473 /// 474 fn get_supported_cpuid(&self) -> hypervisor::Result<Vec<CpuIdEntry>> { 475 let mut cpuid = Vec::new(); 476 let functions: [u32; 2] = [0x1, 0xb]; 477 478 for function in functions { 479 cpuid.push(CpuIdEntry { 480 function, 481 ..Default::default() 482 }); 483 } 484 Ok(cpuid) 485 } 486 487 /// Get maximum number of vCPUs 488 fn get_max_vcpus(&self) -> u32 { 489 // TODO: Using HV_MAXIMUM_PROCESSORS would be better 490 // but the ioctl API is limited to u8 491 256 492 } 493 494 fn get_guest_debug_hw_bps(&self) -> usize { 495 0 496 } 497 498 #[cfg(target_arch = "aarch64")] 499 /// 500 /// Retrieve AArch64 host maximum IPA size supported by MSHV. 501 /// 502 fn get_host_ipa_limit(&self) -> i32 { 503 let host_ipa = self.mshv.get_host_partition_property( 504 hv_partition_property_code_HV_PARTITION_PROPERTY_PHYSICAL_ADDRESS_WIDTH as u64, 505 ); 506 507 match host_ipa { 508 Ok(ipa) => ipa, 509 Err(e) => { 510 panic!("Failed to get host IPA limit: {:?}", e); 511 } 512 } 513 } 514 } 515 516 #[cfg(feature = "sev_snp")] 517 struct Ghcb(*mut svm_ghcb_base); 518 519 #[cfg(feature = "sev_snp")] 520 // SAFETY: struct is based on GHCB page in the hypervisor, 521 // safe to Send across threads 522 unsafe impl Send for Ghcb {} 523 524 #[cfg(feature = "sev_snp")] 525 // SAFETY: struct is based on GHCB page in the hypervisor, 526 // safe to Sync across threads as this is only required for Vcpu trait 527 // functionally not used anyway 528 unsafe impl Sync for Ghcb {} 529 530 /// Vcpu struct for Microsoft Hypervisor 531 pub struct MshvVcpu { 532 fd: VcpuFd, 533 vp_index: u8, 534 #[cfg(target_arch = "x86_64")] 535 cpuid: Vec<CpuIdEntry>, 536 #[cfg(target_arch = "x86_64")] 537 msrs: Vec<MsrEntry>, 538 vm_ops: Option<Arc<dyn vm::VmOps>>, 539 vm_fd: Arc<VmFd>, 540 #[cfg(feature = "sev_snp")] 541 ghcb: Option<Ghcb>, 542 #[cfg(feature = "sev_snp")] 543 host_access_pages: ArcSwap<AtomicBitmap>, 544 } 545 546 /// Implementation of Vcpu trait for Microsoft Hypervisor 547 /// 548 /// # Examples 549 /// 550 /// ``` 551 /// use hypervisor::mshv::MshvHypervisor; 552 /// use std::sync::Arc; 553 /// let mshv = MshvHypervisor::new().unwrap(); 554 /// let hypervisor = Arc::new(mshv); 555 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed"); 556 /// let vcpu = vm.create_vcpu(0, None).unwrap(); 557 /// ``` 558 impl cpu::Vcpu for MshvVcpu { 559 /// 560 /// Returns StandardRegisters with default value set 561 /// 562 #[cfg(target_arch = "x86_64")] 563 fn create_standard_regs(&self) -> crate::StandardRegisters { 564 mshv_bindings::StandardRegisters::default().into() 565 } 566 #[cfg(target_arch = "x86_64")] 567 /// 568 /// Returns the vCPU general purpose registers. 569 /// 570 fn get_regs(&self) -> cpu::Result<crate::StandardRegisters> { 571 Ok(self 572 .fd 573 .get_regs() 574 .map_err(|e| cpu::HypervisorCpuError::GetStandardRegs(e.into()))? 575 .into()) 576 } 577 578 #[cfg(target_arch = "x86_64")] 579 /// 580 /// Sets the vCPU general purpose registers. 581 /// 582 fn set_regs(&self, regs: &crate::StandardRegisters) -> cpu::Result<()> { 583 let regs = (*regs).into(); 584 self.fd 585 .set_regs(®s) 586 .map_err(|e| cpu::HypervisorCpuError::SetStandardRegs(e.into())) 587 } 588 589 #[cfg(target_arch = "x86_64")] 590 /// 591 /// Returns the vCPU special registers. 592 /// 593 fn get_sregs(&self) -> cpu::Result<crate::arch::x86::SpecialRegisters> { 594 Ok(self 595 .fd 596 .get_sregs() 597 .map_err(|e| cpu::HypervisorCpuError::GetSpecialRegs(e.into()))? 598 .into()) 599 } 600 601 #[cfg(target_arch = "x86_64")] 602 /// 603 /// Sets the vCPU special registers. 604 /// 605 fn set_sregs(&self, sregs: &crate::arch::x86::SpecialRegisters) -> cpu::Result<()> { 606 let sregs = (*sregs).into(); 607 self.fd 608 .set_sregs(&sregs) 609 .map_err(|e| cpu::HypervisorCpuError::SetSpecialRegs(e.into())) 610 } 611 612 #[cfg(target_arch = "x86_64")] 613 /// 614 /// Returns the floating point state (FPU) from the vCPU. 615 /// 616 fn get_fpu(&self) -> cpu::Result<FpuState> { 617 Ok(self 618 .fd 619 .get_fpu() 620 .map_err(|e| cpu::HypervisorCpuError::GetFloatingPointRegs(e.into()))? 621 .into()) 622 } 623 624 #[cfg(target_arch = "x86_64")] 625 /// 626 /// Set the floating point state (FPU) of a vCPU. 627 /// 628 fn set_fpu(&self, fpu: &FpuState) -> cpu::Result<()> { 629 let fpu: mshv_bindings::FloatingPointUnit = (*fpu).clone().into(); 630 self.fd 631 .set_fpu(&fpu) 632 .map_err(|e| cpu::HypervisorCpuError::SetFloatingPointRegs(e.into())) 633 } 634 635 #[cfg(target_arch = "x86_64")] 636 /// 637 /// Returns the model-specific registers (MSR) for this vCPU. 638 /// 639 fn get_msrs(&self, msrs: &mut Vec<MsrEntry>) -> cpu::Result<usize> { 640 let mshv_msrs: Vec<msr_entry> = msrs.iter().map(|e| (*e).into()).collect(); 641 let mut mshv_msrs = MsrEntries::from_entries(&mshv_msrs).unwrap(); 642 let succ = self 643 .fd 644 .get_msrs(&mut mshv_msrs) 645 .map_err(|e| cpu::HypervisorCpuError::GetMsrEntries(e.into()))?; 646 647 msrs[..succ].copy_from_slice( 648 &mshv_msrs.as_slice()[..succ] 649 .iter() 650 .map(|e| (*e).into()) 651 .collect::<Vec<MsrEntry>>(), 652 ); 653 654 Ok(succ) 655 } 656 657 #[cfg(target_arch = "x86_64")] 658 /// 659 /// Setup the model-specific registers (MSR) for this vCPU. 660 /// Returns the number of MSR entries actually written. 661 /// 662 fn set_msrs(&self, msrs: &[MsrEntry]) -> cpu::Result<usize> { 663 let mshv_msrs: Vec<msr_entry> = msrs.iter().map(|e| (*e).into()).collect(); 664 let mshv_msrs = MsrEntries::from_entries(&mshv_msrs).unwrap(); 665 self.fd 666 .set_msrs(&mshv_msrs) 667 .map_err(|e| cpu::HypervisorCpuError::SetMsrEntries(e.into())) 668 } 669 670 #[cfg(target_arch = "x86_64")] 671 /// 672 /// X86 specific call to enable HyperV SynIC 673 /// 674 fn enable_hyperv_synic(&self) -> cpu::Result<()> { 675 /* We always have SynIC enabled on MSHV */ 676 Ok(()) 677 } 678 679 #[allow(non_upper_case_globals)] 680 fn run(&self) -> std::result::Result<cpu::VmExit, cpu::HypervisorCpuError> { 681 match self.fd.run() { 682 Ok(x) => match x.header.message_type { 683 hv_message_type_HVMSG_X64_HALT => { 684 debug!("HALT"); 685 Ok(cpu::VmExit::Reset) 686 } 687 hv_message_type_HVMSG_UNRECOVERABLE_EXCEPTION => { 688 warn!("TRIPLE FAULT"); 689 Ok(cpu::VmExit::Shutdown) 690 } 691 #[cfg(target_arch = "x86_64")] 692 hv_message_type_HVMSG_X64_IO_PORT_INTERCEPT => { 693 let info = x.to_ioport_info().unwrap(); 694 let access_info = info.access_info; 695 // SAFETY: access_info is valid, otherwise we won't be here 696 let len = unsafe { access_info.__bindgen_anon_1.access_size() } as usize; 697 let is_write = info.header.intercept_access_type == 1; 698 let port = info.port_number; 699 let mut data: [u8; 4] = [0; 4]; 700 let mut ret_rax = info.rax; 701 702 /* 703 * XXX: Ignore QEMU fw_cfg (0x5xx) and debug console (0x402) ports. 704 * 705 * Cloud Hypervisor doesn't support fw_cfg at the moment. It does support 0x402 706 * under the "fwdebug" feature flag. But that feature is not enabled by default 707 * and is considered legacy. 708 * 709 * OVMF unconditionally pokes these IO ports with string IO. 710 * 711 * Instead of trying to implement string IO support now which does not do much 712 * now, skip those ports explicitly to avoid panicking. 713 * 714 * Proper string IO support can be added once we gain the ability to translate 715 * guest virtual addresses to guest physical addresses on MSHV. 716 */ 717 match port { 718 0x402 | 0x510 | 0x511 | 0x514 => { 719 let insn_len = info.header.instruction_length() as u64; 720 721 /* Advance RIP and update RAX */ 722 let arr_reg_name_value = [ 723 ( 724 hv_register_name_HV_X64_REGISTER_RIP, 725 info.header.rip + insn_len, 726 ), 727 (hv_register_name_HV_X64_REGISTER_RAX, ret_rax), 728 ]; 729 set_registers_64!(self.fd, arr_reg_name_value) 730 .map_err(|e| cpu::HypervisorCpuError::SetRegister(e.into()))?; 731 return Ok(cpu::VmExit::Ignore); 732 } 733 _ => {} 734 } 735 736 assert!( 737 // SAFETY: access_info is valid, otherwise we won't be here 738 (unsafe { access_info.__bindgen_anon_1.string_op() } != 1), 739 "String IN/OUT not supported" 740 ); 741 assert!( 742 // SAFETY: access_info is valid, otherwise we won't be here 743 (unsafe { access_info.__bindgen_anon_1.rep_prefix() } != 1), 744 "Rep IN/OUT not supported" 745 ); 746 747 if is_write { 748 let data = (info.rax as u32).to_le_bytes(); 749 if let Some(vm_ops) = &self.vm_ops { 750 vm_ops 751 .pio_write(port.into(), &data[0..len]) 752 .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()))?; 753 } 754 } else { 755 if let Some(vm_ops) = &self.vm_ops { 756 vm_ops 757 .pio_read(port.into(), &mut data[0..len]) 758 .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()))?; 759 } 760 761 let v = u32::from_le_bytes(data); 762 /* Preserve high bits in EAX but clear out high bits in RAX */ 763 let mask = 0xffffffff >> (32 - len * 8); 764 let eax = (info.rax as u32 & !mask) | (v & mask); 765 ret_rax = eax as u64; 766 } 767 768 let insn_len = info.header.instruction_length() as u64; 769 770 /* Advance RIP and update RAX */ 771 let arr_reg_name_value = [ 772 ( 773 hv_register_name_HV_X64_REGISTER_RIP, 774 info.header.rip + insn_len, 775 ), 776 (hv_register_name_HV_X64_REGISTER_RAX, ret_rax), 777 ]; 778 set_registers_64!(self.fd, arr_reg_name_value) 779 .map_err(|e| cpu::HypervisorCpuError::SetRegister(e.into()))?; 780 Ok(cpu::VmExit::Ignore) 781 } 782 #[cfg(target_arch = "x86_64")] 783 msg_type @ (hv_message_type_HVMSG_UNMAPPED_GPA 784 | hv_message_type_HVMSG_GPA_INTERCEPT) => { 785 let info = x.to_memory_info().unwrap(); 786 let insn_len = info.instruction_byte_count as usize; 787 let gva = info.guest_virtual_address; 788 let gpa = info.guest_physical_address; 789 790 debug!("Exit ({:?}) GVA {:x} GPA {:x}", msg_type, gva, gpa); 791 792 let mut context = MshvEmulatorContext { 793 vcpu: self, 794 map: (gva, gpa), 795 }; 796 797 // Create a new emulator. 798 let mut emul = Emulator::new(&mut context); 799 800 // Emulate the trapped instruction, and only the first one. 801 let new_state = emul 802 .emulate_first_insn( 803 self.vp_index as usize, 804 &info.instruction_bytes[..insn_len], 805 ) 806 .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()))?; 807 808 // Set CPU state back. 809 context 810 .set_cpu_state(self.vp_index as usize, new_state) 811 .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()))?; 812 813 Ok(cpu::VmExit::Ignore) 814 } 815 #[cfg(feature = "sev_snp")] 816 hv_message_type_HVMSG_GPA_ATTRIBUTE_INTERCEPT => { 817 let info = x.to_gpa_attribute_info().unwrap(); 818 let host_vis = info.__bindgen_anon_1.host_visibility(); 819 if host_vis >= HV_MAP_GPA_READABLE | HV_MAP_GPA_WRITABLE { 820 warn!("Ignored attribute intercept with full host visibility"); 821 return Ok(cpu::VmExit::Ignore); 822 } 823 824 let num_ranges = info.__bindgen_anon_1.range_count(); 825 assert!(num_ranges >= 1); 826 if num_ranges > 1 { 827 return Err(cpu::HypervisorCpuError::RunVcpu(anyhow!( 828 "Unhandled VCPU exit(GPA_ATTRIBUTE_INTERCEPT): Expected num_ranges to be 1 but found num_ranges {:?}", 829 num_ranges 830 ))); 831 } 832 833 // TODO: we could also deny the request with HvCallCompleteIntercept 834 let mut gpas = Vec::new(); 835 let ranges = info.ranges; 836 let (gfn_start, gfn_count) = snp::parse_gpa_range(ranges[0]).unwrap(); 837 debug!( 838 "Releasing pages: gfn_start: {:x?}, gfn_count: {:?}", 839 gfn_start, gfn_count 840 ); 841 let gpa_start = gfn_start * HV_PAGE_SIZE as u64; 842 for i in 0..gfn_count { 843 gpas.push(gpa_start + i * HV_PAGE_SIZE as u64); 844 } 845 846 let mut gpa_list = 847 vec_with_array_field::<mshv_modify_gpa_host_access, u64>(gpas.len()); 848 gpa_list[0].page_count = gpas.len() as u64; 849 gpa_list[0].flags = 0; 850 if host_vis & HV_MAP_GPA_READABLE != 0 { 851 gpa_list[0].flags |= 1 << MSHV_GPA_HOST_ACCESS_BIT_READABLE; 852 } 853 if host_vis & HV_MAP_GPA_WRITABLE != 0 { 854 gpa_list[0].flags |= 1 << MSHV_GPA_HOST_ACCESS_BIT_WRITABLE; 855 } 856 857 // SAFETY: gpa_list initialized with gpas.len() and now it is being turned into 858 // gpas_slice with gpas.len() again. It is guaranteed to be large enough to hold 859 // everything from gpas. 860 unsafe { 861 let gpas_slice: &mut [u64] = 862 gpa_list[0].guest_pfns.as_mut_slice(gpas.len()); 863 gpas_slice.copy_from_slice(gpas.as_slice()); 864 } 865 866 self.vm_fd 867 .modify_gpa_host_access(&gpa_list[0]) 868 .map_err(|e| cpu::HypervisorCpuError::RunVcpu(anyhow!( 869 "Unhandled VCPU exit: attribute intercept - couldn't modify host access {}", e 870 )))?; 871 // Guest is revoking the shared access, so we need to update the bitmap 872 self.host_access_pages.rcu(|_bitmap| { 873 let bm = self.host_access_pages.load().as_ref().clone(); 874 bm.reset_addr_range(gpa_start as usize, gfn_count as usize); 875 bm 876 }); 877 Ok(cpu::VmExit::Ignore) 878 } 879 #[cfg(target_arch = "x86_64")] 880 hv_message_type_HVMSG_UNACCEPTED_GPA => { 881 let info = x.to_memory_info().unwrap(); 882 let gva = info.guest_virtual_address; 883 let gpa = info.guest_physical_address; 884 885 Err(cpu::HypervisorCpuError::RunVcpu(anyhow!( 886 "Unhandled VCPU exit: Unaccepted GPA({:x}) found at GVA({:x})", 887 gpa, 888 gva, 889 ))) 890 } 891 #[cfg(target_arch = "x86_64")] 892 hv_message_type_HVMSG_X64_CPUID_INTERCEPT => { 893 let info = x.to_cpuid_info().unwrap(); 894 debug!("cpuid eax: {:x}", { info.rax }); 895 Ok(cpu::VmExit::Ignore) 896 } 897 #[cfg(target_arch = "x86_64")] 898 hv_message_type_HVMSG_X64_MSR_INTERCEPT => { 899 let info = x.to_msr_info().unwrap(); 900 if info.header.intercept_access_type == 0 { 901 debug!("msr read: {:x}", { info.msr_number }); 902 } else { 903 debug!("msr write: {:x}", { info.msr_number }); 904 } 905 Ok(cpu::VmExit::Ignore) 906 } 907 #[cfg(target_arch = "x86_64")] 908 hv_message_type_HVMSG_X64_EXCEPTION_INTERCEPT => { 909 //TODO: Handler for VMCALL here. 910 let info = x.to_exception_info().unwrap(); 911 debug!("Exception Info {:?}", { info.exception_vector }); 912 Ok(cpu::VmExit::Ignore) 913 } 914 #[cfg(target_arch = "x86_64")] 915 hv_message_type_HVMSG_X64_APIC_EOI => { 916 let info = x.to_apic_eoi_info().unwrap(); 917 // The kernel should dispatch the EOI to the correct thread. 918 // Check the VP index is the same as the one we have. 919 assert!(info.vp_index == self.vp_index as u32); 920 // The interrupt vector in info is u32, but x86 only supports 256 vectors. 921 // There is no good way to recover from this if the hypervisor messes around. 922 // Just unwrap. 923 Ok(cpu::VmExit::IoapicEoi( 924 info.interrupt_vector.try_into().unwrap(), 925 )) 926 } 927 #[cfg(feature = "sev_snp")] 928 hv_message_type_HVMSG_X64_SEV_VMGEXIT_INTERCEPT => { 929 let info = x.to_vmg_intercept_info().unwrap(); 930 let ghcb_data = info.ghcb_msr >> GHCB_INFO_BIT_WIDTH; 931 let ghcb_msr = svm_ghcb_msr { 932 as_uint64: info.ghcb_msr, 933 }; 934 // Safe to use unwrap, for sev_snp guest we already have the 935 // GHCB pointer wrapped in the option, otherwise this place is not reached. 936 let ghcb = self.ghcb.as_ref().unwrap().0; 937 938 // SAFETY: Accessing a union element from bindgen generated bindings. 939 let ghcb_op = unsafe { ghcb_msr.__bindgen_anon_2.ghcb_info() as u32 }; 940 // Sanity check on the header fields before handling other operations. 941 assert!(info.header.intercept_access_type == HV_INTERCEPT_ACCESS_EXECUTE as u8); 942 943 match ghcb_op { 944 GHCB_INFO_HYP_FEATURE_REQUEST => { 945 // Pre-condition: GHCB data must be zero 946 assert!(ghcb_data == 0); 947 let mut ghcb_response = GHCB_INFO_HYP_FEATURE_RESPONSE as u64; 948 // Indicate support for basic SEV-SNP features 949 ghcb_response |= 950 (GHCB_HYP_FEATURE_SEV_SNP << GHCB_INFO_BIT_WIDTH) as u64; 951 // Indicate support for SEV-SNP AP creation 952 ghcb_response |= (GHCB_HYP_FEATURE_SEV_SNP_AP_CREATION 953 << GHCB_INFO_BIT_WIDTH) 954 as u64; 955 debug!( 956 "GHCB_INFO_HYP_FEATURE_REQUEST: Supported features: {:0x}", 957 ghcb_response 958 ); 959 let arr_reg_name_value = 960 [(hv_register_name_HV_X64_REGISTER_GHCB, ghcb_response)]; 961 set_registers_64!(self.fd, arr_reg_name_value) 962 .map_err(|e| cpu::HypervisorCpuError::SetRegister(e.into()))?; 963 } 964 GHCB_INFO_REGISTER_REQUEST => { 965 let mut ghcb_gpa = hv_x64_register_sev_ghcb::default(); 966 967 // Disable the previously used GHCB page. 968 self.disable_prev_ghcb_page()?; 969 970 // SAFETY: Accessing a union element from bindgen generated bindings. 971 unsafe { 972 ghcb_gpa.__bindgen_anon_1.set_enabled(1); 973 ghcb_gpa 974 .__bindgen_anon_1 975 .set_page_number(ghcb_msr.__bindgen_anon_2.gpa_page_number()); 976 } 977 // SAFETY: Accessing a union element from bindgen generated bindings. 978 let reg_name_value = unsafe { 979 [( 980 hv_register_name_HV_X64_REGISTER_SEV_GHCB_GPA, 981 ghcb_gpa.as_uint64, 982 )] 983 }; 984 985 set_registers_64!(self.fd, reg_name_value) 986 .map_err(|e| cpu::HypervisorCpuError::SetRegister(e.into()))?; 987 988 let mut resp_ghcb_msr = svm_ghcb_msr::default(); 989 // SAFETY: Accessing a union element from bindgen generated bindings. 990 unsafe { 991 resp_ghcb_msr 992 .__bindgen_anon_2 993 .set_ghcb_info(GHCB_INFO_REGISTER_RESPONSE as u64); 994 resp_ghcb_msr.__bindgen_anon_2.set_gpa_page_number( 995 ghcb_msr.__bindgen_anon_2.gpa_page_number(), 996 ); 997 debug!("GHCB GPA is {:x}", ghcb_gpa.as_uint64); 998 } 999 // SAFETY: Accessing a union element from bindgen generated bindings. 1000 let reg_name_value = unsafe { 1001 [( 1002 hv_register_name_HV_X64_REGISTER_GHCB, 1003 resp_ghcb_msr.as_uint64, 1004 )] 1005 }; 1006 1007 set_registers_64!(self.fd, reg_name_value) 1008 .map_err(|e| cpu::HypervisorCpuError::SetRegister(e.into()))?; 1009 } 1010 GHCB_INFO_SEV_INFO_REQUEST => { 1011 let sev_cpuid_function = 0x8000_001F; 1012 let cpu_leaf = self 1013 .fd 1014 .get_cpuid_values(sev_cpuid_function, 0, 0, 0) 1015 .unwrap(); 1016 let ebx = cpu_leaf[1]; 1017 // First 6-byte of EBX represents page table encryption bit number 1018 let pbit_encryption = (ebx & 0x3f) as u8; 1019 let mut ghcb_response = GHCB_INFO_SEV_INFO_RESPONSE as u64; 1020 1021 // GHCBData[63:48] specifies the maximum GHCB protocol version supported 1022 ghcb_response |= (GHCB_PROTOCOL_VERSION_MAX as u64) << 48; 1023 // GHCBData[47:32] specifies the minimum GHCB protocol version supported 1024 ghcb_response |= (GHCB_PROTOCOL_VERSION_MIN as u64) << 32; 1025 // GHCBData[31:24] specifies the SEV page table encryption bit number. 1026 ghcb_response |= (pbit_encryption as u64) << 24; 1027 1028 let arr_reg_name_value = 1029 [(hv_register_name_HV_X64_REGISTER_GHCB, ghcb_response)]; 1030 set_registers_64!(self.fd, arr_reg_name_value) 1031 .map_err(|e| cpu::HypervisorCpuError::SetRegister(e.into()))?; 1032 } 1033 GHCB_INFO_NORMAL => { 1034 let exit_code = 1035 info.__bindgen_anon_2.__bindgen_anon_1.sw_exit_code as u32; 1036 1037 match exit_code { 1038 SVM_EXITCODE_HV_DOORBELL_PAGE => { 1039 let exit_info1 = 1040 info.__bindgen_anon_2.__bindgen_anon_1.sw_exit_info1 as u32; 1041 match exit_info1 { 1042 SVM_NAE_HV_DOORBELL_PAGE_GET_PREFERRED => { 1043 // Hypervisor does not have any preference for doorbell GPA. 1044 let preferred_doorbell_gpa: u64 = 0xFFFFFFFFFFFFFFFF; 1045 set_svm_field_u64_ptr!( 1046 ghcb, 1047 exit_info2, 1048 preferred_doorbell_gpa 1049 ); 1050 } 1051 SVM_NAE_HV_DOORBELL_PAGE_SET => { 1052 let exit_info2 = info 1053 .__bindgen_anon_2 1054 .__bindgen_anon_1 1055 .sw_exit_info2; 1056 let mut ghcb_doorbell_gpa = 1057 hv_x64_register_sev_hv_doorbell::default(); 1058 // SAFETY: Accessing a union element from bindgen generated bindings. 1059 unsafe { 1060 ghcb_doorbell_gpa.__bindgen_anon_1.set_enabled(1); 1061 ghcb_doorbell_gpa 1062 .__bindgen_anon_1 1063 .set_page_number(exit_info2 >> PAGE_SHIFT); 1064 } 1065 // SAFETY: Accessing a union element from bindgen generated bindings. 1066 let reg_names = unsafe { 1067 [( 1068 hv_register_name_HV_X64_REGISTER_SEV_DOORBELL_GPA, 1069 ghcb_doorbell_gpa.as_uint64, 1070 )] 1071 }; 1072 set_registers_64!(self.fd, reg_names).map_err(|e| { 1073 cpu::HypervisorCpuError::SetRegister(e.into()) 1074 })?; 1075 1076 set_svm_field_u64_ptr!(ghcb, exit_info2, exit_info2); 1077 1078 // Clear the SW_EXIT_INFO1 register to indicate no error 1079 self.clear_swexit_info1()?; 1080 } 1081 SVM_NAE_HV_DOORBELL_PAGE_QUERY => { 1082 let mut reg_assocs = [ hv_register_assoc { 1083 name: hv_register_name_HV_X64_REGISTER_SEV_DOORBELL_GPA, 1084 ..Default::default() 1085 } ]; 1086 self.fd.get_reg(&mut reg_assocs).unwrap(); 1087 // SAFETY: Accessing a union element from bindgen generated bindings. 1088 let doorbell_gpa = unsafe { reg_assocs[0].value.reg64 }; 1089 1090 set_svm_field_u64_ptr!(ghcb, exit_info2, doorbell_gpa); 1091 1092 // Clear the SW_EXIT_INFO1 register to indicate no error 1093 self.clear_swexit_info1()?; 1094 } 1095 SVM_NAE_HV_DOORBELL_PAGE_CLEAR => { 1096 set_svm_field_u64_ptr!(ghcb, exit_info2, 0); 1097 } 1098 _ => { 1099 panic!( 1100 "SVM_EXITCODE_HV_DOORBELL_PAGE: Unhandled exit code: {:0x}", 1101 exit_info1 1102 ); 1103 } 1104 } 1105 } 1106 SVM_EXITCODE_IOIO_PROT => { 1107 let exit_info1 = 1108 info.__bindgen_anon_2.__bindgen_anon_1.sw_exit_info1 as u32; 1109 let port_info = hv_sev_vmgexit_port_info { 1110 as_uint32: exit_info1, 1111 }; 1112 1113 let port = 1114 // SAFETY: Accessing a union element from bindgen generated bindings. 1115 unsafe { port_info.__bindgen_anon_1.intercepted_port() }; 1116 let mut len = 4; 1117 // SAFETY: Accessing a union element from bindgen generated bindings. 1118 unsafe { 1119 if port_info.__bindgen_anon_1.operand_size_16bit() == 1 { 1120 len = 2; 1121 } else if port_info.__bindgen_anon_1.operand_size_8bit() 1122 == 1 1123 { 1124 len = 1; 1125 } 1126 } 1127 let is_write = 1128 // SAFETY: Accessing a union element from bindgen generated bindings. 1129 unsafe { port_info.__bindgen_anon_1.access_type() == 0 }; 1130 // SAFETY: Accessing the field from a mapped address 1131 let mut data = unsafe { (*ghcb).rax.to_le_bytes() }; 1132 1133 if is_write { 1134 if let Some(vm_ops) = &self.vm_ops { 1135 vm_ops.pio_write(port.into(), &data[..len]).map_err( 1136 |e| cpu::HypervisorCpuError::RunVcpu(e.into()), 1137 )?; 1138 } 1139 } else { 1140 if let Some(vm_ops) = &self.vm_ops { 1141 vm_ops 1142 .pio_read(port.into(), &mut data[..len]) 1143 .map_err(|e| { 1144 cpu::HypervisorCpuError::RunVcpu(e.into()) 1145 })?; 1146 } 1147 set_svm_field_u64_ptr!(ghcb, rax, u64::from_le_bytes(data)); 1148 } 1149 1150 // Clear the SW_EXIT_INFO1 register to indicate no error 1151 self.clear_swexit_info1()?; 1152 } 1153 SVM_EXITCODE_MMIO_READ => { 1154 let src_gpa = 1155 info.__bindgen_anon_2.__bindgen_anon_1.sw_exit_info1; 1156 let data_len = 1157 info.__bindgen_anon_2.__bindgen_anon_1.sw_exit_info2 1158 as usize; 1159 // Sanity check to make sure data len is within supported range. 1160 assert!(data_len <= 0x8); 1161 1162 let mut data: Vec<u8> = vec![0; data_len]; 1163 if let Some(vm_ops) = &self.vm_ops { 1164 vm_ops.mmio_read(src_gpa, &mut data).map_err(|e| { 1165 cpu::HypervisorCpuError::RunVcpu(e.into()) 1166 })?; 1167 } 1168 // Copy the data to the shared buffer of the GHCB page 1169 let mut buffer_data = [0; 8]; 1170 buffer_data[..data_len].copy_from_slice(&data[..data_len]); 1171 // SAFETY: Updating the value of mapped area 1172 unsafe { (*ghcb).shared[0] = u64::from_le_bytes(buffer_data) }; 1173 1174 // Clear the SW_EXIT_INFO1 register to indicate no error 1175 self.clear_swexit_info1()?; 1176 } 1177 SVM_EXITCODE_MMIO_WRITE => { 1178 let dst_gpa = 1179 info.__bindgen_anon_2.__bindgen_anon_1.sw_exit_info1; 1180 let data_len = 1181 info.__bindgen_anon_2.__bindgen_anon_1.sw_exit_info2 1182 as usize; 1183 // Sanity check to make sure data len is within supported range. 1184 assert!(data_len <= 0x8); 1185 1186 let mut data = vec![0; data_len]; 1187 // SAFETY: Accessing data from a mapped address 1188 let bytes_shared_ghcb = 1189 unsafe { (*ghcb).shared[0].to_le_bytes() }; 1190 data.copy_from_slice(&bytes_shared_ghcb[..data_len]); 1191 1192 if let Some(vm_ops) = &self.vm_ops { 1193 vm_ops.mmio_write(dst_gpa, &data).map_err(|e| { 1194 cpu::HypervisorCpuError::RunVcpu(e.into()) 1195 })?; 1196 } 1197 1198 // Clear the SW_EXIT_INFO1 register to indicate no error 1199 self.clear_swexit_info1()?; 1200 } 1201 SVM_EXITCODE_SNP_GUEST_REQUEST 1202 | SVM_EXITCODE_SNP_EXTENDED_GUEST_REQUEST => { 1203 if exit_code == SVM_EXITCODE_SNP_EXTENDED_GUEST_REQUEST { 1204 info!("Fetching extended guest request is not supported"); 1205 // We don't support extended guest request, so we just write empty data. 1206 // This matches the behavior of KVM in Linux 6.11. 1207 1208 // Read RBX from the GHCB. 1209 // SAFETY: Accessing data from a mapped address 1210 let data_gpa = unsafe { (*ghcb).rax }; 1211 // SAFETY: Accessing data from a mapped address 1212 let data_npages = unsafe { (*ghcb).rbx }; 1213 1214 if data_npages > 0 { 1215 // The certificates are terminated by 24 zero bytes. 1216 // TODO: Need to check if data_gpa is the address of the shared buffer in the GHCB page 1217 // in that case we should clear the shared buffer(24 bytes) 1218 self.gpa_write(data_gpa, &[0; 24])?; 1219 } 1220 } 1221 1222 let req_gpa = 1223 info.__bindgen_anon_2.__bindgen_anon_1.sw_exit_info1; 1224 let rsp_gpa = 1225 info.__bindgen_anon_2.__bindgen_anon_1.sw_exit_info2; 1226 1227 let mshv_psp_req = 1228 mshv_issue_psp_guest_request { req_gpa, rsp_gpa }; 1229 self.vm_fd 1230 .psp_issue_guest_request(&mshv_psp_req) 1231 .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()))?; 1232 1233 debug!( 1234 "SNP guest request: req_gpa {:0x} rsp_gpa {:0x}", 1235 req_gpa, rsp_gpa 1236 ); 1237 1238 set_svm_field_u64_ptr!(ghcb, exit_info2, 0); 1239 } 1240 SVM_EXITCODE_SNP_AP_CREATION => { 1241 let vmsa_gpa = 1242 info.__bindgen_anon_2.__bindgen_anon_1.sw_exit_info2; 1243 let apic_id = 1244 info.__bindgen_anon_2.__bindgen_anon_1.sw_exit_info1 >> 32; 1245 debug!( 1246 "SNP AP CREATE REQUEST with VMSA GPA {:0x}, and APIC ID {:?}", 1247 vmsa_gpa, apic_id 1248 ); 1249 1250 let mshv_ap_create_req = mshv_sev_snp_ap_create { 1251 vp_id: apic_id, 1252 vmsa_gpa, 1253 }; 1254 self.vm_fd 1255 .sev_snp_ap_create(&mshv_ap_create_req) 1256 .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into()))?; 1257 1258 // Clear the SW_EXIT_INFO1 register to indicate no error 1259 self.clear_swexit_info1()?; 1260 } 1261 _ => panic!( 1262 "GHCB_INFO_NORMAL: Unhandled exit code: {:0x}", 1263 exit_code 1264 ), 1265 } 1266 } 1267 _ => panic!("Unsupported VMGEXIT operation: {:0x}", ghcb_op), 1268 } 1269 1270 Ok(cpu::VmExit::Ignore) 1271 } 1272 exit => Err(cpu::HypervisorCpuError::RunVcpu(anyhow!( 1273 "Unhandled VCPU exit {:?}", 1274 exit 1275 ))), 1276 }, 1277 1278 Err(e) => match e.errno() { 1279 libc::EAGAIN | libc::EINTR => Ok(cpu::VmExit::Ignore), 1280 _ => Err(cpu::HypervisorCpuError::RunVcpu(anyhow!( 1281 "VCPU error {:?}", 1282 e 1283 ))), 1284 }, 1285 } 1286 } 1287 1288 #[cfg(target_arch = "aarch64")] 1289 fn init_pmu(&self, irq: u32) -> cpu::Result<()> { 1290 unimplemented!() 1291 } 1292 1293 #[cfg(target_arch = "aarch64")] 1294 fn has_pmu_support(&self) -> bool { 1295 unimplemented!() 1296 } 1297 1298 #[cfg(target_arch = "aarch64")] 1299 fn setup_regs(&self, cpu_id: u8, boot_ip: u64, fdt_start: u64) -> cpu::Result<()> { 1300 unimplemented!() 1301 } 1302 1303 #[cfg(target_arch = "aarch64")] 1304 fn get_sys_reg(&self, sys_reg: u32) -> cpu::Result<u64> { 1305 unimplemented!() 1306 } 1307 1308 #[cfg(target_arch = "aarch64")] 1309 fn get_reg_list(&self, _reg_list: &mut crate::RegList) -> cpu::Result<()> { 1310 unimplemented!() 1311 } 1312 1313 #[cfg(target_arch = "aarch64")] 1314 fn vcpu_init(&self, _kvi: &crate::VcpuInit) -> cpu::Result<()> { 1315 unimplemented!() 1316 } 1317 1318 #[cfg(target_arch = "aarch64")] 1319 fn set_regs(&self, _regs: &crate::StandardRegisters) -> cpu::Result<()> { 1320 unimplemented!() 1321 } 1322 1323 #[cfg(target_arch = "aarch64")] 1324 fn get_regs(&self) -> cpu::Result<crate::StandardRegisters> { 1325 unimplemented!() 1326 } 1327 1328 #[cfg(target_arch = "aarch64")] 1329 fn vcpu_finalize(&self, _feature: i32) -> cpu::Result<()> { 1330 unimplemented!() 1331 } 1332 1333 #[cfg(target_arch = "aarch64")] 1334 fn vcpu_get_finalized_features(&self) -> i32 { 1335 unimplemented!() 1336 } 1337 1338 #[cfg(target_arch = "aarch64")] 1339 fn vcpu_set_processor_features( 1340 &self, 1341 _vm: &Arc<dyn crate::Vm>, 1342 _kvi: &mut crate::VcpuInit, 1343 _id: u8, 1344 ) -> cpu::Result<()> { 1345 unimplemented!() 1346 } 1347 1348 #[cfg(target_arch = "aarch64")] 1349 fn create_vcpu_init(&self) -> crate::VcpuInit { 1350 unimplemented!(); 1351 } 1352 1353 #[cfg(target_arch = "x86_64")] 1354 /// 1355 /// X86 specific call to setup the CPUID registers. 1356 /// 1357 fn set_cpuid2(&self, cpuid: &[CpuIdEntry]) -> cpu::Result<()> { 1358 let cpuid: Vec<mshv_bindings::hv_cpuid_entry> = cpuid.iter().map(|e| (*e).into()).collect(); 1359 let mshv_cpuid = <CpuId>::from_entries(&cpuid) 1360 .map_err(|_| cpu::HypervisorCpuError::SetCpuid(anyhow!("failed to create CpuId")))?; 1361 1362 self.fd 1363 .register_intercept_result_cpuid(&mshv_cpuid) 1364 .map_err(|e| cpu::HypervisorCpuError::SetCpuid(e.into())) 1365 } 1366 1367 #[cfg(target_arch = "x86_64")] 1368 /// 1369 /// X86 specific call to retrieve the CPUID registers. 1370 /// 1371 fn get_cpuid2(&self, _num_entries: usize) -> cpu::Result<Vec<CpuIdEntry>> { 1372 Ok(self.cpuid.clone()) 1373 } 1374 1375 #[cfg(target_arch = "x86_64")] 1376 /// 1377 /// X86 specific call to retrieve cpuid leaf 1378 /// 1379 fn get_cpuid_values( 1380 &self, 1381 function: u32, 1382 index: u32, 1383 xfem: u64, 1384 xss: u64, 1385 ) -> cpu::Result<[u32; 4]> { 1386 self.fd 1387 .get_cpuid_values(function, index, xfem, xss) 1388 .map_err(|e| cpu::HypervisorCpuError::GetCpuidVales(e.into())) 1389 } 1390 1391 #[cfg(target_arch = "x86_64")] 1392 /// 1393 /// Returns the state of the LAPIC (Local Advanced Programmable Interrupt Controller). 1394 /// 1395 fn get_lapic(&self) -> cpu::Result<crate::arch::x86::LapicState> { 1396 Ok(self 1397 .fd 1398 .get_lapic() 1399 .map_err(|e| cpu::HypervisorCpuError::GetlapicState(e.into()))? 1400 .into()) 1401 } 1402 1403 #[cfg(target_arch = "x86_64")] 1404 /// 1405 /// Sets the state of the LAPIC (Local Advanced Programmable Interrupt Controller). 1406 /// 1407 fn set_lapic(&self, lapic: &crate::arch::x86::LapicState) -> cpu::Result<()> { 1408 let lapic: mshv_bindings::LapicState = (*lapic).clone().into(); 1409 self.fd 1410 .set_lapic(&lapic) 1411 .map_err(|e| cpu::HypervisorCpuError::SetLapicState(e.into())) 1412 } 1413 1414 /// 1415 /// Returns the vcpu's current "multiprocessing state". 1416 /// 1417 fn get_mp_state(&self) -> cpu::Result<MpState> { 1418 Ok(MpState::Mshv) 1419 } 1420 1421 /// 1422 /// Sets the vcpu's current "multiprocessing state". 1423 /// 1424 fn set_mp_state(&self, _mp_state: MpState) -> cpu::Result<()> { 1425 Ok(()) 1426 } 1427 1428 #[cfg(target_arch = "x86_64")] 1429 /// 1430 /// Set CPU state for x86_64 guest. 1431 /// 1432 fn set_state(&self, state: &CpuState) -> cpu::Result<()> { 1433 let mut state: VcpuMshvState = state.clone().into(); 1434 self.set_msrs(&state.msrs)?; 1435 self.set_vcpu_events(&state.vcpu_events)?; 1436 self.set_regs(&state.regs.into())?; 1437 self.set_sregs(&state.sregs.into())?; 1438 self.set_fpu(&state.fpu)?; 1439 self.set_xcrs(&state.xcrs)?; 1440 // These registers are global and needed to be set only for first VCPU 1441 // as Microsoft Hypervisor allows setting this register for only one VCPU 1442 if self.vp_index == 0 { 1443 self.fd 1444 .set_misc_regs(&state.misc) 1445 .map_err(|e| cpu::HypervisorCpuError::SetMiscRegs(e.into()))? 1446 } 1447 self.fd 1448 .set_debug_regs(&state.dbg) 1449 .map_err(|e| cpu::HypervisorCpuError::SetDebugRegs(e.into()))?; 1450 self.fd 1451 .set_all_vp_state_components(&mut state.vp_states) 1452 .map_err(|e| cpu::HypervisorCpuError::SetAllVpStateComponents(e.into()))?; 1453 Ok(()) 1454 } 1455 1456 #[cfg(target_arch = "aarch64")] 1457 /// 1458 /// Set CPU state for aarch64 guest. 1459 /// 1460 fn set_state(&self, state: &CpuState) -> cpu::Result<()> { 1461 unimplemented!() 1462 } 1463 1464 #[cfg(target_arch = "x86_64")] 1465 /// 1466 /// Get CPU State for x86_64 guest 1467 /// 1468 fn state(&self) -> cpu::Result<CpuState> { 1469 let regs = self.get_regs()?; 1470 let sregs = self.get_sregs()?; 1471 let xcrs = self.get_xcrs()?; 1472 let fpu = self.get_fpu()?; 1473 let vcpu_events = self.get_vcpu_events()?; 1474 let mut msrs = self.msrs.clone(); 1475 self.get_msrs(&mut msrs)?; 1476 let misc = self 1477 .fd 1478 .get_misc_regs() 1479 .map_err(|e| cpu::HypervisorCpuError::GetMiscRegs(e.into()))?; 1480 let dbg = self 1481 .fd 1482 .get_debug_regs() 1483 .map_err(|e| cpu::HypervisorCpuError::GetDebugRegs(e.into()))?; 1484 let vp_states = self 1485 .fd 1486 .get_all_vp_state_components() 1487 .map_err(|e| cpu::HypervisorCpuError::GetAllVpStateComponents(e.into()))?; 1488 1489 Ok(VcpuMshvState { 1490 msrs, 1491 vcpu_events, 1492 regs: regs.into(), 1493 sregs: sregs.into(), 1494 fpu, 1495 xcrs, 1496 dbg, 1497 misc, 1498 vp_states, 1499 } 1500 .into()) 1501 } 1502 1503 #[cfg(target_arch = "aarch64")] 1504 /// 1505 /// Get CPU state for aarch64 guest. 1506 /// 1507 fn state(&self) -> cpu::Result<CpuState> { 1508 unimplemented!() 1509 } 1510 1511 #[cfg(target_arch = "x86_64")] 1512 /// 1513 /// Translate guest virtual address to guest physical address 1514 /// 1515 fn translate_gva(&self, gva: u64, flags: u64) -> cpu::Result<(u64, u32)> { 1516 let r = self 1517 .fd 1518 .translate_gva(gva, flags) 1519 .map_err(|e| cpu::HypervisorCpuError::TranslateVirtualAddress(e.into()))?; 1520 1521 let gpa = r.0; 1522 // SAFETY: r is valid, otherwise this function will have returned 1523 let result_code = unsafe { r.1.__bindgen_anon_1.result_code }; 1524 1525 Ok((gpa, result_code)) 1526 } 1527 1528 #[cfg(target_arch = "x86_64")] 1529 /// 1530 /// Return the list of initial MSR entries for a VCPU 1531 /// 1532 fn boot_msr_entries(&self) -> Vec<MsrEntry> { 1533 use crate::arch::x86::{msr_index, MTRR_ENABLE, MTRR_MEM_TYPE_WB}; 1534 1535 [ 1536 msr!(msr_index::MSR_IA32_SYSENTER_CS), 1537 msr!(msr_index::MSR_IA32_SYSENTER_ESP), 1538 msr!(msr_index::MSR_IA32_SYSENTER_EIP), 1539 msr!(msr_index::MSR_STAR), 1540 msr!(msr_index::MSR_CSTAR), 1541 msr!(msr_index::MSR_LSTAR), 1542 msr!(msr_index::MSR_KERNEL_GS_BASE), 1543 msr!(msr_index::MSR_SYSCALL_MASK), 1544 msr_data!(msr_index::MSR_MTRRdefType, MTRR_ENABLE | MTRR_MEM_TYPE_WB), 1545 ] 1546 .to_vec() 1547 } 1548 1549 /// 1550 /// Sets the AMD specific vcpu's sev control register. 1551 /// 1552 #[cfg(feature = "sev_snp")] 1553 fn set_sev_control_register(&self, vmsa_pfn: u64) -> cpu::Result<()> { 1554 let sev_control_reg = snp::get_sev_control_register(vmsa_pfn); 1555 1556 self.fd 1557 .set_sev_control_register(sev_control_reg) 1558 .map_err(|e| cpu::HypervisorCpuError::SetSevControlRegister(e.into())) 1559 } 1560 #[cfg(target_arch = "x86_64")] 1561 /// 1562 /// Trigger NMI interrupt 1563 /// 1564 fn nmi(&self) -> cpu::Result<()> { 1565 let cfg = InterruptRequest { 1566 interrupt_type: hv_interrupt_type_HV_X64_INTERRUPT_TYPE_NMI, 1567 apic_id: self.vp_index as u64, 1568 level_triggered: false, 1569 vector: 0, 1570 logical_destination_mode: false, 1571 long_mode: false, 1572 }; 1573 self.vm_fd 1574 .request_virtual_interrupt(&cfg) 1575 .map_err(|e| cpu::HypervisorCpuError::Nmi(e.into())) 1576 } 1577 } 1578 1579 impl MshvVcpu { 1580 /// 1581 /// Deactivate previously used GHCB page. 1582 /// 1583 #[cfg(feature = "sev_snp")] 1584 fn disable_prev_ghcb_page(&self) -> cpu::Result<()> { 1585 let mut reg_assocs = [hv_register_assoc { 1586 name: hv_register_name_HV_X64_REGISTER_SEV_GHCB_GPA, 1587 ..Default::default() 1588 }]; 1589 self.fd.get_reg(&mut reg_assocs).unwrap(); 1590 // SAFETY: Accessing a union element from bindgen generated bindings. 1591 let prev_ghcb_gpa = unsafe { reg_assocs[0].value.reg64 }; 1592 1593 debug!("Prev GHCB GPA is {:x}", prev_ghcb_gpa); 1594 1595 let mut ghcb_gpa = hv_x64_register_sev_ghcb::default(); 1596 1597 // SAFETY: Accessing a union element from bindgen generated bindings. 1598 unsafe { 1599 ghcb_gpa.__bindgen_anon_1.set_enabled(0); 1600 ghcb_gpa.__bindgen_anon_1.set_page_number(prev_ghcb_gpa); 1601 } 1602 1603 // SAFETY: Accessing a union element from bindgen generated bindings. 1604 let reg_name_value = unsafe { 1605 [( 1606 hv_register_name_HV_X64_REGISTER_SEV_GHCB_GPA, 1607 ghcb_gpa.as_uint64, 1608 )] 1609 }; 1610 1611 set_registers_64!(self.fd, reg_name_value) 1612 .map_err(|e| cpu::HypervisorCpuError::SetRegister(e.into()))?; 1613 1614 Ok(()) 1615 } 1616 #[cfg(target_arch = "x86_64")] 1617 /// 1618 /// X86 specific call that returns the vcpu's current "xcrs". 1619 /// 1620 fn get_xcrs(&self) -> cpu::Result<ExtendedControlRegisters> { 1621 self.fd 1622 .get_xcrs() 1623 .map_err(|e| cpu::HypervisorCpuError::GetXcsr(e.into())) 1624 } 1625 1626 #[cfg(target_arch = "x86_64")] 1627 /// 1628 /// X86 specific call that sets the vcpu's current "xcrs". 1629 /// 1630 fn set_xcrs(&self, xcrs: &ExtendedControlRegisters) -> cpu::Result<()> { 1631 self.fd 1632 .set_xcrs(xcrs) 1633 .map_err(|e| cpu::HypervisorCpuError::SetXcsr(e.into())) 1634 } 1635 1636 #[cfg(target_arch = "x86_64")] 1637 /// 1638 /// Returns currently pending exceptions, interrupts, and NMIs as well as related 1639 /// states of the vcpu. 1640 /// 1641 fn get_vcpu_events(&self) -> cpu::Result<VcpuEvents> { 1642 self.fd 1643 .get_vcpu_events() 1644 .map_err(|e| cpu::HypervisorCpuError::GetVcpuEvents(e.into())) 1645 } 1646 1647 #[cfg(target_arch = "x86_64")] 1648 /// 1649 /// Sets pending exceptions, interrupts, and NMIs as well as related states 1650 /// of the vcpu. 1651 /// 1652 fn set_vcpu_events(&self, events: &VcpuEvents) -> cpu::Result<()> { 1653 self.fd 1654 .set_vcpu_events(events) 1655 .map_err(|e| cpu::HypervisorCpuError::SetVcpuEvents(e.into())) 1656 } 1657 1658 /// 1659 /// Clear SW_EXIT_INFO1 register for SEV-SNP guests. 1660 /// 1661 #[cfg(feature = "sev_snp")] 1662 fn clear_swexit_info1(&self) -> std::result::Result<cpu::VmExit, cpu::HypervisorCpuError> { 1663 // Clear the SW_EXIT_INFO1 register to indicate no error 1664 // Safe to use unwrap, for sev_snp guest we already have the 1665 // GHCB pointer wrapped in the option, otherwise this place is not reached. 1666 let ghcb = self.ghcb.as_ref().unwrap().0; 1667 set_svm_field_u64_ptr!(ghcb, exit_info1, 0); 1668 1669 Ok(cpu::VmExit::Ignore) 1670 } 1671 1672 #[cfg(feature = "sev_snp")] 1673 fn gpa_write(&self, gpa: u64, data: &[u8]) -> cpu::Result<()> { 1674 for (gpa, chunk) in (gpa..) 1675 .step_by(HV_READ_WRITE_GPA_MAX_SIZE as usize) 1676 .zip(data.chunks(HV_READ_WRITE_GPA_MAX_SIZE as usize)) 1677 { 1678 let mut data = [0; HV_READ_WRITE_GPA_MAX_SIZE as usize]; 1679 data[..chunk.len()].copy_from_slice(chunk); 1680 1681 let mut rw_gpa_arg = mshv_bindings::mshv_read_write_gpa { 1682 base_gpa: gpa, 1683 byte_count: chunk.len() as u32, 1684 data, 1685 ..Default::default() 1686 }; 1687 self.fd 1688 .gpa_write(&mut rw_gpa_arg) 1689 .map_err(|e| cpu::HypervisorCpuError::GpaWrite(e.into()))?; 1690 } 1691 1692 Ok(()) 1693 } 1694 } 1695 1696 /// Wrapper over Mshv VM ioctls. 1697 pub struct MshvVm { 1698 fd: Arc<VmFd>, 1699 #[cfg(target_arch = "x86_64")] 1700 msrs: Vec<MsrEntry>, 1701 dirty_log_slots: Arc<RwLock<HashMap<u64, MshvDirtyLogSlot>>>, 1702 #[cfg(feature = "sev_snp")] 1703 sev_snp_enabled: bool, 1704 #[cfg(feature = "sev_snp")] 1705 host_access_pages: ArcSwap<AtomicBitmap>, 1706 } 1707 1708 impl MshvVm { 1709 /// 1710 /// Creates an in-kernel device. 1711 /// 1712 /// See the documentation for `MSHV_CREATE_DEVICE`. 1713 fn create_device(&self, device: &mut CreateDevice) -> vm::Result<VfioDeviceFd> { 1714 let device_fd = self 1715 .fd 1716 .create_device(device) 1717 .map_err(|e| vm::HypervisorVmError::CreateDevice(e.into()))?; 1718 Ok(VfioDeviceFd::new_from_mshv(device_fd)) 1719 } 1720 } 1721 1722 /// 1723 /// Implementation of Vm trait for Mshv 1724 /// 1725 /// # Examples 1726 /// 1727 /// ``` 1728 /// extern crate hypervisor; 1729 /// use hypervisor::mshv::MshvHypervisor; 1730 /// use std::sync::Arc; 1731 /// let mshv = MshvHypervisor::new().unwrap(); 1732 /// let hypervisor = Arc::new(mshv); 1733 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed"); 1734 /// ``` 1735 impl vm::Vm for MshvVm { 1736 #[cfg(target_arch = "x86_64")] 1737 /// 1738 /// Sets the address of the one-page region in the VM's address space. 1739 /// 1740 fn set_identity_map_address(&self, _address: u64) -> vm::Result<()> { 1741 Ok(()) 1742 } 1743 1744 #[cfg(target_arch = "x86_64")] 1745 /// 1746 /// Sets the address of the three-page region in the VM's address space. 1747 /// 1748 fn set_tss_address(&self, _offset: usize) -> vm::Result<()> { 1749 Ok(()) 1750 } 1751 1752 /// 1753 /// Creates an in-kernel interrupt controller. 1754 /// 1755 fn create_irq_chip(&self) -> vm::Result<()> { 1756 Ok(()) 1757 } 1758 1759 /// 1760 /// Registers an event that will, when signaled, trigger the `gsi` IRQ. 1761 /// 1762 fn register_irqfd(&self, fd: &EventFd, gsi: u32) -> vm::Result<()> { 1763 debug!("register_irqfd fd {} gsi {}", fd.as_raw_fd(), gsi); 1764 1765 self.fd 1766 .register_irqfd(fd, gsi) 1767 .map_err(|e| vm::HypervisorVmError::RegisterIrqFd(e.into()))?; 1768 1769 Ok(()) 1770 } 1771 1772 /// 1773 /// Unregisters an event that will, when signaled, trigger the `gsi` IRQ. 1774 /// 1775 fn unregister_irqfd(&self, fd: &EventFd, gsi: u32) -> vm::Result<()> { 1776 debug!("unregister_irqfd fd {} gsi {}", fd.as_raw_fd(), gsi); 1777 1778 self.fd 1779 .unregister_irqfd(fd, gsi) 1780 .map_err(|e| vm::HypervisorVmError::UnregisterIrqFd(e.into()))?; 1781 1782 Ok(()) 1783 } 1784 1785 /// 1786 /// Creates a VcpuFd object from a vcpu RawFd. 1787 /// 1788 fn create_vcpu( 1789 &self, 1790 id: u8, 1791 vm_ops: Option<Arc<dyn VmOps>>, 1792 ) -> vm::Result<Arc<dyn cpu::Vcpu>> { 1793 let vcpu_fd = self 1794 .fd 1795 .create_vcpu(id) 1796 .map_err(|e| vm::HypervisorVmError::CreateVcpu(e.into()))?; 1797 1798 /* Map the GHCB page to the VMM(root) address space 1799 * The map is available after the vcpu creation. This address is mapped 1800 * to the overlay ghcb page of the Microsoft Hypervisor, don't have 1801 * to worry about the scenario when a guest changes the GHCB mapping. 1802 */ 1803 #[cfg(feature = "sev_snp")] 1804 let ghcb = if self.sev_snp_enabled { 1805 // SAFETY: Safe to call as VCPU has this map already available upon creation 1806 let addr = unsafe { 1807 libc::mmap( 1808 std::ptr::null_mut(), 1809 HV_PAGE_SIZE, 1810 libc::PROT_READ | libc::PROT_WRITE, 1811 libc::MAP_SHARED, 1812 vcpu_fd.as_raw_fd(), 1813 MSHV_VP_MMAP_OFFSET_GHCB as i64 * libc::sysconf(libc::_SC_PAGE_SIZE), 1814 ) 1815 }; 1816 if addr == libc::MAP_FAILED { 1817 // No point of continuing, without this mmap VMGEXIT will fail anyway 1818 // Return error 1819 return Err(vm::HypervisorVmError::MmapToRoot); 1820 } 1821 Some(Ghcb(addr as *mut svm_ghcb_base)) 1822 } else { 1823 None 1824 }; 1825 let vcpu = MshvVcpu { 1826 fd: vcpu_fd, 1827 vp_index: id, 1828 #[cfg(target_arch = "x86_64")] 1829 cpuid: Vec::new(), 1830 #[cfg(target_arch = "x86_64")] 1831 msrs: self.msrs.clone(), 1832 vm_ops, 1833 vm_fd: self.fd.clone(), 1834 #[cfg(feature = "sev_snp")] 1835 ghcb, 1836 #[cfg(feature = "sev_snp")] 1837 host_access_pages: ArcSwap::new(self.host_access_pages.load().clone()), 1838 }; 1839 Ok(Arc::new(vcpu)) 1840 } 1841 1842 #[cfg(target_arch = "x86_64")] 1843 fn enable_split_irq(&self) -> vm::Result<()> { 1844 Ok(()) 1845 } 1846 1847 #[cfg(target_arch = "x86_64")] 1848 fn enable_sgx_attribute(&self, _file: File) -> vm::Result<()> { 1849 Ok(()) 1850 } 1851 1852 fn register_ioevent( 1853 &self, 1854 fd: &EventFd, 1855 addr: &IoEventAddress, 1856 datamatch: Option<DataMatch>, 1857 ) -> vm::Result<()> { 1858 #[cfg(feature = "sev_snp")] 1859 if self.sev_snp_enabled { 1860 return Ok(()); 1861 } 1862 1863 let addr = &mshv_ioctls::IoEventAddress::from(*addr); 1864 debug!( 1865 "register_ioevent fd {} addr {:x?} datamatch {:?}", 1866 fd.as_raw_fd(), 1867 addr, 1868 datamatch 1869 ); 1870 if let Some(dm) = datamatch { 1871 match dm { 1872 vm::DataMatch::DataMatch32(mshv_dm32) => self 1873 .fd 1874 .register_ioevent(fd, addr, mshv_dm32) 1875 .map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into())), 1876 vm::DataMatch::DataMatch64(mshv_dm64) => self 1877 .fd 1878 .register_ioevent(fd, addr, mshv_dm64) 1879 .map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into())), 1880 } 1881 } else { 1882 self.fd 1883 .register_ioevent(fd, addr, NoDatamatch) 1884 .map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into())) 1885 } 1886 } 1887 1888 /// Unregister an event from a certain address it has been previously registered to. 1889 fn unregister_ioevent(&self, fd: &EventFd, addr: &IoEventAddress) -> vm::Result<()> { 1890 #[cfg(feature = "sev_snp")] 1891 if self.sev_snp_enabled { 1892 return Ok(()); 1893 } 1894 1895 let addr = &mshv_ioctls::IoEventAddress::from(*addr); 1896 debug!("unregister_ioevent fd {} addr {:x?}", fd.as_raw_fd(), addr); 1897 1898 self.fd 1899 .unregister_ioevent(fd, addr, NoDatamatch) 1900 .map_err(|e| vm::HypervisorVmError::UnregisterIoEvent(e.into())) 1901 } 1902 1903 /// Creates a guest physical memory region. 1904 fn create_user_memory_region(&self, user_memory_region: UserMemoryRegion) -> vm::Result<()> { 1905 let user_memory_region: mshv_user_mem_region = user_memory_region.into(); 1906 // No matter read only or not we keep track the slots. 1907 // For readonly hypervisor can enable the dirty bits, 1908 // but a VM exit happens before setting the dirty bits 1909 self.dirty_log_slots.write().unwrap().insert( 1910 user_memory_region.guest_pfn, 1911 MshvDirtyLogSlot { 1912 guest_pfn: user_memory_region.guest_pfn, 1913 memory_size: user_memory_region.size, 1914 }, 1915 ); 1916 1917 self.fd 1918 .map_user_memory(user_memory_region) 1919 .map_err(|e| vm::HypervisorVmError::CreateUserMemory(e.into()))?; 1920 Ok(()) 1921 } 1922 1923 /// Removes a guest physical memory region. 1924 fn remove_user_memory_region(&self, user_memory_region: UserMemoryRegion) -> vm::Result<()> { 1925 let user_memory_region: mshv_user_mem_region = user_memory_region.into(); 1926 // Remove the corresponding entry from "self.dirty_log_slots" if needed 1927 self.dirty_log_slots 1928 .write() 1929 .unwrap() 1930 .remove(&user_memory_region.guest_pfn); 1931 1932 self.fd 1933 .unmap_user_memory(user_memory_region) 1934 .map_err(|e| vm::HypervisorVmError::RemoveUserMemory(e.into()))?; 1935 Ok(()) 1936 } 1937 1938 fn make_user_memory_region( 1939 &self, 1940 _slot: u32, 1941 guest_phys_addr: u64, 1942 memory_size: u64, 1943 userspace_addr: u64, 1944 readonly: bool, 1945 _log_dirty_pages: bool, 1946 ) -> UserMemoryRegion { 1947 let mut flags = 1 << MSHV_SET_MEM_BIT_EXECUTABLE; 1948 if !readonly { 1949 flags |= 1 << MSHV_SET_MEM_BIT_WRITABLE; 1950 } 1951 1952 mshv_user_mem_region { 1953 flags, 1954 guest_pfn: guest_phys_addr >> PAGE_SHIFT, 1955 size: memory_size, 1956 userspace_addr, 1957 ..Default::default() 1958 } 1959 .into() 1960 } 1961 1962 fn create_passthrough_device(&self) -> vm::Result<VfioDeviceFd> { 1963 let mut vfio_dev = mshv_create_device { 1964 type_: MSHV_DEV_TYPE_VFIO, 1965 fd: 0, 1966 flags: 0, 1967 }; 1968 1969 self.create_device(&mut vfio_dev) 1970 .map_err(|e| vm::HypervisorVmError::CreatePassthroughDevice(e.into())) 1971 } 1972 1973 /// 1974 /// Constructs a routing entry 1975 /// 1976 fn make_routing_entry(&self, gsi: u32, config: &InterruptSourceConfig) -> IrqRoutingEntry { 1977 match config { 1978 InterruptSourceConfig::MsiIrq(cfg) => mshv_user_irq_entry { 1979 gsi, 1980 address_lo: cfg.low_addr, 1981 address_hi: cfg.high_addr, 1982 data: cfg.data, 1983 } 1984 .into(), 1985 _ => { 1986 unreachable!() 1987 } 1988 } 1989 } 1990 1991 fn set_gsi_routing(&self, entries: &[IrqRoutingEntry]) -> vm::Result<()> { 1992 let mut msi_routing = 1993 vec_with_array_field::<mshv_user_irq_table, mshv_user_irq_entry>(entries.len()); 1994 msi_routing[0].nr = entries.len() as u32; 1995 1996 let entries: Vec<mshv_user_irq_entry> = entries 1997 .iter() 1998 .map(|entry| match entry { 1999 IrqRoutingEntry::Mshv(e) => *e, 2000 #[allow(unreachable_patterns)] 2001 _ => panic!("IrqRoutingEntry type is wrong"), 2002 }) 2003 .collect(); 2004 2005 // SAFETY: msi_routing initialized with entries.len() and now it is being turned into 2006 // entries_slice with entries.len() again. It is guaranteed to be large enough to hold 2007 // everything from entries. 2008 unsafe { 2009 let entries_slice: &mut [mshv_user_irq_entry] = 2010 msi_routing[0].entries.as_mut_slice(entries.len()); 2011 entries_slice.copy_from_slice(&entries); 2012 } 2013 2014 self.fd 2015 .set_msi_routing(&msi_routing[0]) 2016 .map_err(|e| vm::HypervisorVmError::SetGsiRouting(e.into())) 2017 } 2018 2019 /// 2020 /// Start logging dirty pages 2021 /// 2022 fn start_dirty_log(&self) -> vm::Result<()> { 2023 self.fd 2024 .enable_dirty_page_tracking() 2025 .map_err(|e| vm::HypervisorVmError::StartDirtyLog(e.into())) 2026 } 2027 2028 /// 2029 /// Stop logging dirty pages 2030 /// 2031 fn stop_dirty_log(&self) -> vm::Result<()> { 2032 let dirty_log_slots = self.dirty_log_slots.read().unwrap(); 2033 // Before disabling the dirty page tracking we need 2034 // to set the dirty bits in the Hypervisor 2035 // This is a requirement from Microsoft Hypervisor 2036 for (_, s) in dirty_log_slots.iter() { 2037 self.fd 2038 .get_dirty_log( 2039 s.guest_pfn, 2040 s.memory_size as usize, 2041 MSHV_GPAP_ACCESS_OP_SET as u8, 2042 ) 2043 .map_err(|e| vm::HypervisorVmError::StartDirtyLog(e.into()))?; 2044 } 2045 self.fd 2046 .disable_dirty_page_tracking() 2047 .map_err(|e| vm::HypervisorVmError::StartDirtyLog(e.into()))?; 2048 Ok(()) 2049 } 2050 2051 /// 2052 /// Get dirty pages bitmap (one bit per page) 2053 /// 2054 fn get_dirty_log(&self, _slot: u32, base_gpa: u64, memory_size: u64) -> vm::Result<Vec<u64>> { 2055 self.fd 2056 .get_dirty_log( 2057 base_gpa >> PAGE_SHIFT, 2058 memory_size as usize, 2059 MSHV_GPAP_ACCESS_OP_CLEAR as u8, 2060 ) 2061 .map_err(|e| vm::HypervisorVmError::GetDirtyLog(e.into())) 2062 } 2063 2064 /// Retrieve guest clock. 2065 #[cfg(target_arch = "x86_64")] 2066 fn get_clock(&self) -> vm::Result<ClockData> { 2067 let val = self 2068 .fd 2069 .get_partition_property(hv_partition_property_code_HV_PARTITION_PROPERTY_REFERENCE_TIME) 2070 .map_err(|e| vm::HypervisorVmError::GetClock(e.into()))?; 2071 Ok(MshvClockData { ref_time: val }.into()) 2072 } 2073 2074 /// Set guest clock. 2075 #[cfg(target_arch = "x86_64")] 2076 fn set_clock(&self, data: &ClockData) -> vm::Result<()> { 2077 let data: MshvClockData = (*data).into(); 2078 self.fd 2079 .set_partition_property( 2080 hv_partition_property_code_HV_PARTITION_PROPERTY_REFERENCE_TIME, 2081 data.ref_time, 2082 ) 2083 .map_err(|e| vm::HypervisorVmError::SetClock(e.into())) 2084 } 2085 2086 /// Downcast to the underlying MshvVm type 2087 fn as_any(&self) -> &dyn Any { 2088 self 2089 } 2090 2091 /// Initialize the SEV-SNP VM 2092 #[cfg(feature = "sev_snp")] 2093 fn sev_snp_init(&self) -> vm::Result<()> { 2094 self.fd 2095 .set_partition_property( 2096 hv_partition_property_code_HV_PARTITION_PROPERTY_ISOLATION_STATE, 2097 hv_partition_isolation_state_HV_PARTITION_ISOLATION_SECURE as u64, 2098 ) 2099 .map_err(|e| vm::HypervisorVmError::InitializeSevSnp(e.into())) 2100 } 2101 2102 /// 2103 /// Importing isolated pages, these pages will be used 2104 /// for the PSP(Platform Security Processor) measurement. 2105 #[cfg(feature = "sev_snp")] 2106 fn import_isolated_pages( 2107 &self, 2108 page_type: u32, 2109 page_size: u32, 2110 pages: &[u64], 2111 ) -> vm::Result<()> { 2112 debug_assert!(page_size == hv_isolated_page_size_HV_ISOLATED_PAGE_SIZE_4KB); 2113 if pages.is_empty() { 2114 return Ok(()); 2115 } 2116 2117 let mut isolated_pages = 2118 vec_with_array_field::<mshv_import_isolated_pages, u64>(pages.len()); 2119 isolated_pages[0].page_type = page_type as u8; 2120 isolated_pages[0].page_count = pages.len() as u64; 2121 // SAFETY: isolated_pages initialized with pages.len() and now it is being turned into 2122 // pages_slice with pages.len() again. It is guaranteed to be large enough to hold 2123 // everything from pages. 2124 unsafe { 2125 let pages_slice: &mut [u64] = isolated_pages[0].guest_pfns.as_mut_slice(pages.len()); 2126 pages_slice.copy_from_slice(pages); 2127 } 2128 self.fd 2129 .import_isolated_pages(&isolated_pages[0]) 2130 .map_err(|e| vm::HypervisorVmError::ImportIsolatedPages(e.into())) 2131 } 2132 2133 /// 2134 /// Complete isolated import, telling the hypervisor that 2135 /// importing the pages to guest memory is complete. 2136 /// 2137 #[cfg(feature = "sev_snp")] 2138 fn complete_isolated_import( 2139 &self, 2140 snp_id_block: IGVM_VHS_SNP_ID_BLOCK, 2141 host_data: [u8; 32], 2142 id_block_enabled: u8, 2143 ) -> vm::Result<()> { 2144 let mut auth_info = hv_snp_id_auth_info { 2145 id_key_algorithm: snp_id_block.id_key_algorithm, 2146 auth_key_algorithm: snp_id_block.author_key_algorithm, 2147 ..Default::default() 2148 }; 2149 // Each of r/s component is 576 bits long 2150 auth_info.id_block_signature[..SIG_R_COMPONENT_SIZE_IN_BYTES] 2151 .copy_from_slice(snp_id_block.id_key_signature.r_comp.as_ref()); 2152 auth_info.id_block_signature 2153 [SIG_R_COMPONENT_SIZE_IN_BYTES..SIG_R_AND_S_COMPONENT_SIZE_IN_BYTES] 2154 .copy_from_slice(snp_id_block.id_key_signature.s_comp.as_ref()); 2155 auth_info.id_key[..ECDSA_CURVE_ID_SIZE_IN_BYTES] 2156 .copy_from_slice(snp_id_block.id_public_key.curve.to_le_bytes().as_ref()); 2157 auth_info.id_key[ECDSA_SIG_X_COMPONENT_START..ECDSA_SIG_X_COMPONENT_END] 2158 .copy_from_slice(snp_id_block.id_public_key.qx.as_ref()); 2159 auth_info.id_key[ECDSA_SIG_Y_COMPONENT_START..ECDSA_SIG_Y_COMPONENT_END] 2160 .copy_from_slice(snp_id_block.id_public_key.qy.as_ref()); 2161 2162 let data = mshv_complete_isolated_import { 2163 import_data: hv_partition_complete_isolated_import_data { 2164 psp_parameters: hv_psp_launch_finish_data { 2165 id_block: hv_snp_id_block { 2166 launch_digest: snp_id_block.ld, 2167 family_id: snp_id_block.family_id, 2168 image_id: snp_id_block.image_id, 2169 version: snp_id_block.version, 2170 guest_svn: snp_id_block.guest_svn, 2171 policy: get_default_snp_guest_policy(), 2172 }, 2173 id_auth_info: auth_info, 2174 host_data, 2175 id_block_enabled, 2176 author_key_enabled: 0, 2177 }, 2178 }, 2179 }; 2180 self.fd 2181 .complete_isolated_import(&data) 2182 .map_err(|e| vm::HypervisorVmError::CompleteIsolatedImport(e.into())) 2183 } 2184 2185 #[cfg(target_arch = "aarch64")] 2186 fn create_vgic(&self, config: VgicConfig) -> vm::Result<Arc<Mutex<dyn Vgic>>> { 2187 unimplemented!() 2188 } 2189 2190 #[cfg(target_arch = "aarch64")] 2191 fn get_preferred_target(&self, _kvi: &mut crate::VcpuInit) -> vm::Result<()> { 2192 unimplemented!() 2193 } 2194 2195 /// Pause the VM 2196 fn pause(&self) -> vm::Result<()> { 2197 // Freeze the partition 2198 self.fd 2199 .set_partition_property( 2200 hv_partition_property_code_HV_PARTITION_PROPERTY_TIME_FREEZE, 2201 1u64, 2202 ) 2203 .map_err(|e| { 2204 vm::HypervisorVmError::SetVmProperty(anyhow!( 2205 "Failed to set partition property: {}", 2206 e 2207 )) 2208 }) 2209 } 2210 2211 /// Resume the VM 2212 fn resume(&self) -> vm::Result<()> { 2213 // Resuming the partition using TIME_FREEZE property 2214 self.fd 2215 .set_partition_property( 2216 hv_partition_property_code_HV_PARTITION_PROPERTY_TIME_FREEZE, 2217 0u64, 2218 ) 2219 .map_err(|e| { 2220 vm::HypervisorVmError::SetVmProperty(anyhow!( 2221 "Failed to set partition property: {}", 2222 e 2223 )) 2224 }) 2225 } 2226 2227 #[cfg(feature = "sev_snp")] 2228 fn gain_page_access(&self, gpa: u64, size: u32) -> vm::Result<()> { 2229 use mshv_ioctls::set_bits; 2230 const ONE_GB: usize = 1024 * 1024 * 1024; 2231 2232 if !self.sev_snp_enabled { 2233 return Ok(()); 2234 } 2235 2236 let start_gpfn: u64 = gpa >> PAGE_SHIFT; 2237 let end_gpfn: u64 = (gpa + size as u64 - 1) >> PAGE_SHIFT; 2238 2239 // Enlarge the bitmap if the PFN is greater than the bitmap length 2240 if end_gpfn >= self.host_access_pages.load().as_ref().len() as u64 { 2241 self.host_access_pages.rcu(|bitmap| { 2242 let mut bm = bitmap.as_ref().clone(); 2243 bm.enlarge(ONE_GB); 2244 bm 2245 }); 2246 } 2247 2248 let gpas: Vec<u64> = (start_gpfn..=end_gpfn) 2249 .filter(|x| { 2250 !self 2251 .host_access_pages 2252 .load() 2253 .as_ref() 2254 .is_bit_set(*x as usize) 2255 }) 2256 .map(|x| x << PAGE_SHIFT) 2257 .collect(); 2258 2259 if !gpas.is_empty() { 2260 let mut gpa_list = vec_with_array_field::<mshv_modify_gpa_host_access, u64>(gpas.len()); 2261 gpa_list[0].page_count = gpas.len() as u64; 2262 gpa_list[0].flags = set_bits!( 2263 u8, 2264 MSHV_GPA_HOST_ACCESS_BIT_ACQUIRE, 2265 MSHV_GPA_HOST_ACCESS_BIT_READABLE, 2266 MSHV_GPA_HOST_ACCESS_BIT_WRITABLE 2267 ); 2268 2269 // SAFETY: gpa_list initialized with gpas.len() and now it is being turned into 2270 // gpas_slice with gpas.len() again. It is guaranteed to be large enough to hold 2271 // everything from gpas. 2272 unsafe { 2273 let gpas_slice: &mut [u64] = gpa_list[0].guest_pfns.as_mut_slice(gpas.len()); 2274 gpas_slice.copy_from_slice(gpas.as_slice()); 2275 } 2276 2277 self.fd 2278 .modify_gpa_host_access(&gpa_list[0]) 2279 .map_err(|e| vm::HypervisorVmError::ModifyGpaHostAccess(e.into()))?; 2280 2281 for acquired_gpa in gpas { 2282 self.host_access_pages.rcu(|bitmap| { 2283 let bm = bitmap.clone(); 2284 bm.set_bit((acquired_gpa >> PAGE_SHIFT) as usize); 2285 bm 2286 }); 2287 } 2288 } 2289 2290 Ok(()) 2291 } 2292 } 2293