1 // Copyright © 2019 Intel Corporation 2 // 3 // SPDX-License-Identifier: Apache-2.0 OR BSD-3-Clause 4 // 5 // Copyright © 2020, Microsoft Corporation 6 // 7 // Copyright 2018-2019 CrowdStrike, Inc. 8 // 9 // 10 11 #[cfg(target_arch = "aarch64")] 12 use crate::aarch64::gic::KvmGicV3Its; 13 #[cfg(target_arch = "aarch64")] 14 pub use crate::aarch64::{ 15 check_required_kvm_extensions, gic::Gicv3ItsState as GicState, is_system_register, VcpuInit, 16 VcpuKvmState, 17 }; 18 #[cfg(target_arch = "aarch64")] 19 use crate::arch::aarch64::gic::{Vgic, VgicConfig}; 20 use crate::cpu; 21 use crate::hypervisor; 22 use crate::vec_with_array_field; 23 use crate::vm::{self, InterruptSourceConfig, VmOps}; 24 use crate::HypervisorType; 25 #[cfg(target_arch = "aarch64")] 26 use crate::{arm64_core_reg_id, offset_of}; 27 use kvm_ioctls::{NoDatamatch, VcpuFd, VmFd}; 28 use std::any::Any; 29 use std::collections::HashMap; 30 #[cfg(target_arch = "x86_64")] 31 use std::fs::File; 32 #[cfg(target_arch = "x86_64")] 33 use std::os::unix::io::AsRawFd; 34 #[cfg(feature = "tdx")] 35 use std::os::unix::io::RawFd; 36 use std::result; 37 #[cfg(target_arch = "x86_64")] 38 use std::sync::atomic::{AtomicBool, Ordering}; 39 use std::sync::Mutex; 40 use std::sync::{Arc, RwLock}; 41 use vmm_sys_util::eventfd::EventFd; 42 // x86_64 dependencies 43 #[cfg(target_arch = "x86_64")] 44 pub mod x86_64; 45 #[cfg(target_arch = "x86_64")] 46 use crate::arch::x86::{ 47 CpuIdEntry, FpuState, LapicState, MsrEntry, SpecialRegisters, StandardRegisters, XsaveState, 48 NUM_IOAPIC_PINS, 49 }; 50 #[cfg(target_arch = "x86_64")] 51 use crate::ClockData; 52 use crate::{ 53 CpuState, IoEventAddress, IrqRoutingEntry, MpState, UserMemoryRegion, 54 USER_MEMORY_REGION_LOG_DIRTY, USER_MEMORY_REGION_READ, USER_MEMORY_REGION_WRITE, 55 }; 56 #[cfg(target_arch = "aarch64")] 57 use aarch64::{RegList, Register, StandardRegisters}; 58 #[cfg(target_arch = "x86_64")] 59 use kvm_bindings::{ 60 kvm_enable_cap, kvm_msr_entry, MsrList, KVM_CAP_HYPERV_SYNIC, KVM_CAP_SPLIT_IRQCHIP, 61 KVM_GUESTDBG_USE_HW_BP, 62 }; 63 #[cfg(target_arch = "x86_64")] 64 use x86_64::check_required_kvm_extensions; 65 #[cfg(target_arch = "x86_64")] 66 pub use x86_64::{CpuId, ExtendedControlRegisters, MsrEntries, VcpuKvmState}; 67 // aarch64 dependencies 68 #[cfg(target_arch = "aarch64")] 69 pub mod aarch64; 70 pub use kvm_bindings; 71 pub use kvm_bindings::{ 72 kvm_clock_data, kvm_create_device, kvm_device_type_KVM_DEV_TYPE_VFIO, kvm_guest_debug, 73 kvm_irq_routing, kvm_irq_routing_entry, kvm_mp_state, kvm_userspace_memory_region, 74 KVM_GUESTDBG_ENABLE, KVM_GUESTDBG_SINGLESTEP, KVM_IRQ_ROUTING_IRQCHIP, KVM_IRQ_ROUTING_MSI, 75 KVM_MEM_LOG_DIRTY_PAGES, KVM_MEM_READONLY, KVM_MSI_VALID_DEVID, 76 }; 77 #[cfg(target_arch = "aarch64")] 78 use kvm_bindings::{ 79 kvm_regs, user_fpsimd_state, user_pt_regs, KVM_GUESTDBG_USE_HW, KVM_NR_SPSR, KVM_REG_ARM64, 80 KVM_REG_ARM64_SYSREG, KVM_REG_ARM64_SYSREG_CRM_MASK, KVM_REG_ARM64_SYSREG_CRN_MASK, 81 KVM_REG_ARM64_SYSREG_OP0_MASK, KVM_REG_ARM64_SYSREG_OP1_MASK, KVM_REG_ARM64_SYSREG_OP2_MASK, 82 KVM_REG_ARM_CORE, KVM_REG_SIZE_U128, KVM_REG_SIZE_U32, KVM_REG_SIZE_U64, 83 }; 84 #[cfg(feature = "tdx")] 85 use kvm_bindings::{kvm_run__bindgen_ty_1, KVMIO}; 86 pub use kvm_ioctls; 87 pub use kvm_ioctls::{Cap, Kvm}; 88 #[cfg(target_arch = "aarch64")] 89 use std::mem; 90 use thiserror::Error; 91 use vfio_ioctls::VfioDeviceFd; 92 #[cfg(feature = "tdx")] 93 use vmm_sys_util::{ioctl::ioctl_with_val, ioctl_ioc_nr, ioctl_iowr_nr}; 94 /// 95 /// Export generically-named wrappers of kvm-bindings for Unix-based platforms 96 /// 97 pub use { 98 kvm_bindings::kvm_create_device as CreateDevice, kvm_bindings::kvm_device_attr as DeviceAttr, 99 kvm_bindings::kvm_run, kvm_bindings::kvm_vcpu_events as VcpuEvents, kvm_ioctls::VcpuExit, 100 }; 101 102 #[cfg(target_arch = "x86_64")] 103 const KVM_CAP_SGX_ATTRIBUTE: u32 = 196; 104 105 #[cfg(target_arch = "x86_64")] 106 use vmm_sys_util::ioctl_io_nr; 107 108 #[cfg(all(not(feature = "tdx"), target_arch = "x86_64"))] 109 use vmm_sys_util::ioctl_ioc_nr; 110 111 #[cfg(target_arch = "x86_64")] 112 ioctl_io_nr!(KVM_NMI, kvm_bindings::KVMIO, 0x9a); 113 114 #[cfg(feature = "tdx")] 115 const KVM_EXIT_TDX: u32 = 50; 116 #[cfg(feature = "tdx")] 117 const TDG_VP_VMCALL_GET_QUOTE: u64 = 0x10002; 118 #[cfg(feature = "tdx")] 119 const TDG_VP_VMCALL_SETUP_EVENT_NOTIFY_INTERRUPT: u64 = 0x10004; 120 #[cfg(feature = "tdx")] 121 const TDG_VP_VMCALL_SUCCESS: u64 = 0; 122 #[cfg(feature = "tdx")] 123 const TDG_VP_VMCALL_INVALID_OPERAND: u64 = 0x8000000000000000; 124 125 #[cfg(feature = "tdx")] 126 ioctl_iowr_nr!(KVM_MEMORY_ENCRYPT_OP, KVMIO, 0xba, std::os::raw::c_ulong); 127 128 #[cfg(feature = "tdx")] 129 #[repr(u32)] 130 enum TdxCommand { 131 Capabilities = 0, 132 InitVm, 133 InitVcpu, 134 InitMemRegion, 135 Finalize, 136 } 137 138 #[cfg(feature = "tdx")] 139 pub enum TdxExitDetails { 140 GetQuote, 141 SetupEventNotifyInterrupt, 142 } 143 144 #[cfg(feature = "tdx")] 145 pub enum TdxExitStatus { 146 Success, 147 InvalidOperand, 148 } 149 150 #[cfg(feature = "tdx")] 151 const TDX_MAX_NR_CPUID_CONFIGS: usize = 6; 152 153 #[cfg(feature = "tdx")] 154 #[repr(C)] 155 #[derive(Debug, Default)] 156 pub struct TdxCpuidConfig { 157 pub leaf: u32, 158 pub sub_leaf: u32, 159 pub eax: u32, 160 pub ebx: u32, 161 pub ecx: u32, 162 pub edx: u32, 163 } 164 165 #[cfg(feature = "tdx")] 166 #[repr(C)] 167 #[derive(Debug, Default)] 168 pub struct TdxCapabilities { 169 pub attrs_fixed0: u64, 170 pub attrs_fixed1: u64, 171 pub xfam_fixed0: u64, 172 pub xfam_fixed1: u64, 173 pub nr_cpuid_configs: u32, 174 pub padding: u32, 175 pub cpuid_configs: [TdxCpuidConfig; TDX_MAX_NR_CPUID_CONFIGS], 176 } 177 178 #[cfg(feature = "tdx")] 179 #[derive(Copy, Clone)] 180 pub struct KvmTdxExit { 181 pub type_: u32, 182 pub pad: u32, 183 pub u: KvmTdxExitU, 184 } 185 186 #[cfg(feature = "tdx")] 187 #[repr(C)] 188 #[derive(Copy, Clone)] 189 pub union KvmTdxExitU { 190 pub vmcall: KvmTdxExitVmcall, 191 } 192 193 #[cfg(feature = "tdx")] 194 #[repr(C)] 195 #[derive(Debug, Default, Copy, Clone, PartialEq)] 196 pub struct KvmTdxExitVmcall { 197 pub type_: u64, 198 pub subfunction: u64, 199 pub reg_mask: u64, 200 pub in_r12: u64, 201 pub in_r13: u64, 202 pub in_r14: u64, 203 pub in_r15: u64, 204 pub in_rbx: u64, 205 pub in_rdi: u64, 206 pub in_rsi: u64, 207 pub in_r8: u64, 208 pub in_r9: u64, 209 pub in_rdx: u64, 210 pub status_code: u64, 211 pub out_r11: u64, 212 pub out_r12: u64, 213 pub out_r13: u64, 214 pub out_r14: u64, 215 pub out_r15: u64, 216 pub out_rbx: u64, 217 pub out_rdi: u64, 218 pub out_rsi: u64, 219 pub out_r8: u64, 220 pub out_r9: u64, 221 pub out_rdx: u64, 222 } 223 224 impl From<kvm_userspace_memory_region> for UserMemoryRegion { 225 fn from(region: kvm_userspace_memory_region) -> Self { 226 let mut flags = USER_MEMORY_REGION_READ; 227 if region.flags & KVM_MEM_READONLY == 0 { 228 flags |= USER_MEMORY_REGION_WRITE; 229 } 230 if region.flags & KVM_MEM_LOG_DIRTY_PAGES != 0 { 231 flags |= USER_MEMORY_REGION_LOG_DIRTY; 232 } 233 234 UserMemoryRegion { 235 slot: region.slot, 236 guest_phys_addr: region.guest_phys_addr, 237 memory_size: region.memory_size, 238 userspace_addr: region.userspace_addr, 239 flags, 240 } 241 } 242 } 243 244 impl From<UserMemoryRegion> for kvm_userspace_memory_region { 245 fn from(region: UserMemoryRegion) -> Self { 246 assert!( 247 region.flags & USER_MEMORY_REGION_READ != 0, 248 "KVM mapped memory is always readable" 249 ); 250 251 let mut flags = 0; 252 if region.flags & USER_MEMORY_REGION_WRITE == 0 { 253 flags |= KVM_MEM_READONLY; 254 } 255 if region.flags & USER_MEMORY_REGION_LOG_DIRTY != 0 { 256 flags |= KVM_MEM_LOG_DIRTY_PAGES; 257 } 258 259 kvm_userspace_memory_region { 260 slot: region.slot, 261 guest_phys_addr: region.guest_phys_addr, 262 memory_size: region.memory_size, 263 userspace_addr: region.userspace_addr, 264 flags, 265 } 266 } 267 } 268 269 impl From<kvm_mp_state> for MpState { 270 fn from(s: kvm_mp_state) -> Self { 271 MpState::Kvm(s) 272 } 273 } 274 275 impl From<MpState> for kvm_mp_state { 276 fn from(ms: MpState) -> Self { 277 match ms { 278 MpState::Kvm(s) => s, 279 /* Needed in case other hypervisors are enabled */ 280 #[allow(unreachable_patterns)] 281 _ => panic!("CpuState is not valid"), 282 } 283 } 284 } 285 286 impl From<kvm_ioctls::IoEventAddress> for IoEventAddress { 287 fn from(a: kvm_ioctls::IoEventAddress) -> Self { 288 match a { 289 kvm_ioctls::IoEventAddress::Pio(x) => Self::Pio(x), 290 kvm_ioctls::IoEventAddress::Mmio(x) => Self::Mmio(x), 291 } 292 } 293 } 294 295 impl From<IoEventAddress> for kvm_ioctls::IoEventAddress { 296 fn from(a: IoEventAddress) -> Self { 297 match a { 298 IoEventAddress::Pio(x) => Self::Pio(x), 299 IoEventAddress::Mmio(x) => Self::Mmio(x), 300 } 301 } 302 } 303 304 impl From<VcpuKvmState> for CpuState { 305 fn from(s: VcpuKvmState) -> Self { 306 CpuState::Kvm(s) 307 } 308 } 309 310 impl From<CpuState> for VcpuKvmState { 311 fn from(s: CpuState) -> Self { 312 match s { 313 CpuState::Kvm(s) => s, 314 /* Needed in case other hypervisors are enabled */ 315 #[allow(unreachable_patterns)] 316 _ => panic!("CpuState is not valid"), 317 } 318 } 319 } 320 321 #[cfg(target_arch = "x86_64")] 322 impl From<kvm_clock_data> for ClockData { 323 fn from(d: kvm_clock_data) -> Self { 324 ClockData::Kvm(d) 325 } 326 } 327 328 #[cfg(target_arch = "x86_64")] 329 impl From<ClockData> for kvm_clock_data { 330 fn from(ms: ClockData) -> Self { 331 match ms { 332 ClockData::Kvm(s) => s, 333 /* Needed in case other hypervisors are enabled */ 334 #[allow(unreachable_patterns)] 335 _ => panic!("CpuState is not valid"), 336 } 337 } 338 } 339 340 impl From<kvm_irq_routing_entry> for IrqRoutingEntry { 341 fn from(s: kvm_irq_routing_entry) -> Self { 342 IrqRoutingEntry::Kvm(s) 343 } 344 } 345 346 impl From<IrqRoutingEntry> for kvm_irq_routing_entry { 347 fn from(e: IrqRoutingEntry) -> Self { 348 match e { 349 IrqRoutingEntry::Kvm(e) => e, 350 /* Needed in case other hypervisors are enabled */ 351 #[allow(unreachable_patterns)] 352 _ => panic!("IrqRoutingEntry is not valid"), 353 } 354 } 355 } 356 357 struct KvmDirtyLogSlot { 358 slot: u32, 359 guest_phys_addr: u64, 360 memory_size: u64, 361 userspace_addr: u64, 362 } 363 364 /// Wrapper over KVM VM ioctls. 365 pub struct KvmVm { 366 fd: Arc<VmFd>, 367 #[cfg(target_arch = "x86_64")] 368 msrs: Vec<MsrEntry>, 369 dirty_log_slots: Arc<RwLock<HashMap<u32, KvmDirtyLogSlot>>>, 370 } 371 372 impl KvmVm { 373 /// 374 /// Creates an emulated device in the kernel. 375 /// 376 /// See the documentation for `KVM_CREATE_DEVICE`. 377 fn create_device(&self, device: &mut CreateDevice) -> vm::Result<vfio_ioctls::VfioDeviceFd> { 378 let device_fd = self 379 .fd 380 .create_device(device) 381 .map_err(|e| vm::HypervisorVmError::CreateDevice(e.into()))?; 382 Ok(VfioDeviceFd::new_from_kvm(device_fd)) 383 } 384 /// Checks if a particular `Cap` is available. 385 pub fn check_extension(&self, c: Cap) -> bool { 386 self.fd.check_extension(c) 387 } 388 } 389 390 /// Implementation of Vm trait for KVM 391 /// 392 /// # Examples 393 /// 394 /// ``` 395 /// # use hypervisor::kvm::KvmHypervisor; 396 /// # use std::sync::Arc; 397 /// let kvm = KvmHypervisor::new().unwrap(); 398 /// let hypervisor = Arc::new(kvm); 399 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed"); 400 /// ``` 401 impl vm::Vm for KvmVm { 402 #[cfg(target_arch = "x86_64")] 403 /// 404 /// Sets the address of the one-page region in the VM's address space. 405 /// 406 fn set_identity_map_address(&self, address: u64) -> vm::Result<()> { 407 self.fd 408 .set_identity_map_address(address) 409 .map_err(|e| vm::HypervisorVmError::SetIdentityMapAddress(e.into())) 410 } 411 412 #[cfg(target_arch = "x86_64")] 413 /// 414 /// Sets the address of the three-page region in the VM's address space. 415 /// 416 fn set_tss_address(&self, offset: usize) -> vm::Result<()> { 417 self.fd 418 .set_tss_address(offset) 419 .map_err(|e| vm::HypervisorVmError::SetTssAddress(e.into())) 420 } 421 422 /// 423 /// Creates an in-kernel interrupt controller. 424 /// 425 fn create_irq_chip(&self) -> vm::Result<()> { 426 self.fd 427 .create_irq_chip() 428 .map_err(|e| vm::HypervisorVmError::CreateIrq(e.into())) 429 } 430 431 /// 432 /// Registers an event that will, when signaled, trigger the `gsi` IRQ. 433 /// 434 fn register_irqfd(&self, fd: &EventFd, gsi: u32) -> vm::Result<()> { 435 self.fd 436 .register_irqfd(fd, gsi) 437 .map_err(|e| vm::HypervisorVmError::RegisterIrqFd(e.into())) 438 } 439 440 /// 441 /// Unregisters an event that will, when signaled, trigger the `gsi` IRQ. 442 /// 443 fn unregister_irqfd(&self, fd: &EventFd, gsi: u32) -> vm::Result<()> { 444 self.fd 445 .unregister_irqfd(fd, gsi) 446 .map_err(|e| vm::HypervisorVmError::UnregisterIrqFd(e.into())) 447 } 448 449 /// 450 /// Creates a VcpuFd object from a vcpu RawFd. 451 /// 452 fn create_vcpu( 453 &self, 454 id: u8, 455 vm_ops: Option<Arc<dyn VmOps>>, 456 ) -> vm::Result<Arc<dyn cpu::Vcpu>> { 457 let fd = self 458 .fd 459 .create_vcpu(id as u64) 460 .map_err(|e| vm::HypervisorVmError::CreateVcpu(e.into()))?; 461 let vcpu = KvmVcpu { 462 fd: Arc::new(Mutex::new(fd)), 463 #[cfg(target_arch = "x86_64")] 464 msrs: self.msrs.clone(), 465 vm_ops, 466 #[cfg(target_arch = "x86_64")] 467 hyperv_synic: AtomicBool::new(false), 468 }; 469 Ok(Arc::new(vcpu)) 470 } 471 472 #[cfg(target_arch = "aarch64")] 473 /// 474 /// Creates a virtual GIC device. 475 /// 476 fn create_vgic(&self, config: VgicConfig) -> vm::Result<Arc<Mutex<dyn Vgic>>> { 477 let gic_device = KvmGicV3Its::new(self, config) 478 .map_err(|e| vm::HypervisorVmError::CreateVgic(anyhow!("Vgic error {:?}", e)))?; 479 Ok(Arc::new(Mutex::new(gic_device))) 480 } 481 482 /// 483 /// Registers an event to be signaled whenever a certain address is written to. 484 /// 485 fn register_ioevent( 486 &self, 487 fd: &EventFd, 488 addr: &IoEventAddress, 489 datamatch: Option<vm::DataMatch>, 490 ) -> vm::Result<()> { 491 let addr = &kvm_ioctls::IoEventAddress::from(*addr); 492 if let Some(dm) = datamatch { 493 match dm { 494 vm::DataMatch::DataMatch32(kvm_dm32) => self 495 .fd 496 .register_ioevent(fd, addr, kvm_dm32) 497 .map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into())), 498 vm::DataMatch::DataMatch64(kvm_dm64) => self 499 .fd 500 .register_ioevent(fd, addr, kvm_dm64) 501 .map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into())), 502 } 503 } else { 504 self.fd 505 .register_ioevent(fd, addr, NoDatamatch) 506 .map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into())) 507 } 508 } 509 510 /// 511 /// Unregisters an event from a certain address it has been previously registered to. 512 /// 513 fn unregister_ioevent(&self, fd: &EventFd, addr: &IoEventAddress) -> vm::Result<()> { 514 let addr = &kvm_ioctls::IoEventAddress::from(*addr); 515 self.fd 516 .unregister_ioevent(fd, addr, NoDatamatch) 517 .map_err(|e| vm::HypervisorVmError::UnregisterIoEvent(e.into())) 518 } 519 520 /// 521 /// Constructs a routing entry 522 /// 523 fn make_routing_entry(&self, gsi: u32, config: &InterruptSourceConfig) -> IrqRoutingEntry { 524 match &config { 525 InterruptSourceConfig::MsiIrq(cfg) => { 526 let mut kvm_route = kvm_irq_routing_entry { 527 gsi, 528 type_: KVM_IRQ_ROUTING_MSI, 529 ..Default::default() 530 }; 531 532 kvm_route.u.msi.address_lo = cfg.low_addr; 533 kvm_route.u.msi.address_hi = cfg.high_addr; 534 kvm_route.u.msi.data = cfg.data; 535 536 if self.check_extension(crate::kvm::Cap::MsiDevid) { 537 // On AArch64, there is limitation on the range of the 'devid', 538 // it cannot be greater than 65536 (the max of u16). 539 // 540 // BDF cannot be used directly, because 'segment' is in high 541 // 16 bits. The layout of the u32 BDF is: 542 // |---- 16 bits ----|-- 8 bits --|-- 5 bits --|-- 3 bits --| 543 // | segment | bus | device | function | 544 // 545 // Now that we support 1 bus only in a segment, we can build a 546 // 'devid' by replacing the 'bus' bits with the low 8 bits of 547 // 'segment' data. 548 // This way we can resolve the range checking problem and give 549 // different `devid` to all the devices. Limitation is that at 550 // most 256 segments can be supported. 551 // 552 let modified_devid = (cfg.devid & 0x00ff_0000) >> 8 | cfg.devid & 0xff; 553 554 kvm_route.flags = KVM_MSI_VALID_DEVID; 555 kvm_route.u.msi.__bindgen_anon_1.devid = modified_devid; 556 } 557 kvm_route.into() 558 } 559 InterruptSourceConfig::LegacyIrq(cfg) => { 560 let mut kvm_route = kvm_irq_routing_entry { 561 gsi, 562 type_: KVM_IRQ_ROUTING_IRQCHIP, 563 ..Default::default() 564 }; 565 kvm_route.u.irqchip.irqchip = cfg.irqchip; 566 kvm_route.u.irqchip.pin = cfg.pin; 567 568 kvm_route.into() 569 } 570 } 571 } 572 573 /// 574 /// Sets the GSI routing table entries, overwriting any previously set 575 /// entries, as per the `KVM_SET_GSI_ROUTING` ioctl. 576 /// 577 fn set_gsi_routing(&self, entries: &[IrqRoutingEntry]) -> vm::Result<()> { 578 let mut irq_routing = 579 vec_with_array_field::<kvm_irq_routing, kvm_irq_routing_entry>(entries.len()); 580 irq_routing[0].nr = entries.len() as u32; 581 irq_routing[0].flags = 0; 582 let entries: Vec<kvm_irq_routing_entry> = entries 583 .iter() 584 .map(|entry| match entry { 585 IrqRoutingEntry::Kvm(e) => *e, 586 #[allow(unreachable_patterns)] 587 _ => panic!("IrqRoutingEntry type is wrong"), 588 }) 589 .collect(); 590 591 // SAFETY: irq_routing initialized with entries.len() and now it is being turned into 592 // entries_slice with entries.len() again. It is guaranteed to be large enough to hold 593 // everything from entries. 594 unsafe { 595 let entries_slice: &mut [kvm_irq_routing_entry] = 596 irq_routing[0].entries.as_mut_slice(entries.len()); 597 entries_slice.copy_from_slice(&entries); 598 } 599 600 self.fd 601 .set_gsi_routing(&irq_routing[0]) 602 .map_err(|e| vm::HypervisorVmError::SetGsiRouting(e.into())) 603 } 604 605 /// 606 /// Creates a memory region structure that can be used with {create/remove}_user_memory_region 607 /// 608 fn make_user_memory_region( 609 &self, 610 slot: u32, 611 guest_phys_addr: u64, 612 memory_size: u64, 613 userspace_addr: u64, 614 readonly: bool, 615 log_dirty_pages: bool, 616 ) -> UserMemoryRegion { 617 kvm_userspace_memory_region { 618 slot, 619 guest_phys_addr, 620 memory_size, 621 userspace_addr, 622 flags: if readonly { KVM_MEM_READONLY } else { 0 } 623 | if log_dirty_pages { 624 KVM_MEM_LOG_DIRTY_PAGES 625 } else { 626 0 627 }, 628 } 629 .into() 630 } 631 632 /// 633 /// Creates a guest physical memory region. 634 /// 635 fn create_user_memory_region(&self, user_memory_region: UserMemoryRegion) -> vm::Result<()> { 636 let mut region: kvm_userspace_memory_region = user_memory_region.into(); 637 638 if (region.flags & KVM_MEM_LOG_DIRTY_PAGES) != 0 { 639 if (region.flags & KVM_MEM_READONLY) != 0 { 640 return Err(vm::HypervisorVmError::CreateUserMemory(anyhow!( 641 "Error creating regions with both 'dirty-pages-log' and 'read-only'." 642 ))); 643 } 644 645 // Keep track of the regions that need dirty pages log 646 self.dirty_log_slots.write().unwrap().insert( 647 region.slot, 648 KvmDirtyLogSlot { 649 slot: region.slot, 650 guest_phys_addr: region.guest_phys_addr, 651 memory_size: region.memory_size, 652 userspace_addr: region.userspace_addr, 653 }, 654 ); 655 656 // Always create guest physical memory region without `KVM_MEM_LOG_DIRTY_PAGES`. 657 // For regions that need this flag, dirty pages log will be turned on in `start_dirty_log`. 658 region.flags = 0; 659 } 660 661 // SAFETY: Safe because guest regions are guaranteed not to overlap. 662 unsafe { 663 self.fd 664 .set_user_memory_region(region) 665 .map_err(|e| vm::HypervisorVmError::CreateUserMemory(e.into())) 666 } 667 } 668 669 /// 670 /// Removes a guest physical memory region. 671 /// 672 fn remove_user_memory_region(&self, user_memory_region: UserMemoryRegion) -> vm::Result<()> { 673 let mut region: kvm_userspace_memory_region = user_memory_region.into(); 674 675 // Remove the corresponding entry from "self.dirty_log_slots" if needed 676 self.dirty_log_slots.write().unwrap().remove(®ion.slot); 677 678 // Setting the size to 0 means "remove" 679 region.memory_size = 0; 680 // SAFETY: Safe because guest regions are guaranteed not to overlap. 681 unsafe { 682 self.fd 683 .set_user_memory_region(region) 684 .map_err(|e| vm::HypervisorVmError::RemoveUserMemory(e.into())) 685 } 686 } 687 688 /// 689 /// Returns the preferred CPU target type which can be emulated by KVM on underlying host. 690 /// 691 #[cfg(target_arch = "aarch64")] 692 fn get_preferred_target(&self, kvi: &mut VcpuInit) -> vm::Result<()> { 693 self.fd 694 .get_preferred_target(kvi) 695 .map_err(|e| vm::HypervisorVmError::GetPreferredTarget(e.into())) 696 } 697 698 #[cfg(target_arch = "x86_64")] 699 fn enable_split_irq(&self) -> vm::Result<()> { 700 // Create split irqchip 701 // Only the local APIC is emulated in kernel, both PICs and IOAPIC 702 // are not. 703 let mut cap = kvm_enable_cap { 704 cap: KVM_CAP_SPLIT_IRQCHIP, 705 ..Default::default() 706 }; 707 cap.args[0] = NUM_IOAPIC_PINS as u64; 708 self.fd 709 .enable_cap(&cap) 710 .map_err(|e| vm::HypervisorVmError::EnableSplitIrq(e.into()))?; 711 Ok(()) 712 } 713 714 #[cfg(target_arch = "x86_64")] 715 fn enable_sgx_attribute(&self, file: File) -> vm::Result<()> { 716 let mut cap = kvm_enable_cap { 717 cap: KVM_CAP_SGX_ATTRIBUTE, 718 ..Default::default() 719 }; 720 cap.args[0] = file.as_raw_fd() as u64; 721 self.fd 722 .enable_cap(&cap) 723 .map_err(|e| vm::HypervisorVmError::EnableSgxAttribute(e.into()))?; 724 Ok(()) 725 } 726 727 /// Retrieve guest clock. 728 #[cfg(target_arch = "x86_64")] 729 fn get_clock(&self) -> vm::Result<ClockData> { 730 Ok(self 731 .fd 732 .get_clock() 733 .map_err(|e| vm::HypervisorVmError::GetClock(e.into()))? 734 .into()) 735 } 736 737 /// Set guest clock. 738 #[cfg(target_arch = "x86_64")] 739 fn set_clock(&self, data: &ClockData) -> vm::Result<()> { 740 let data = (*data).into(); 741 self.fd 742 .set_clock(&data) 743 .map_err(|e| vm::HypervisorVmError::SetClock(e.into())) 744 } 745 746 /// Create a device that is used for passthrough 747 fn create_passthrough_device(&self) -> vm::Result<VfioDeviceFd> { 748 let mut vfio_dev = kvm_create_device { 749 type_: kvm_device_type_KVM_DEV_TYPE_VFIO, 750 fd: 0, 751 flags: 0, 752 }; 753 754 self.create_device(&mut vfio_dev) 755 .map_err(|e| vm::HypervisorVmError::CreatePassthroughDevice(e.into())) 756 } 757 758 /// 759 /// Start logging dirty pages 760 /// 761 fn start_dirty_log(&self) -> vm::Result<()> { 762 let dirty_log_slots = self.dirty_log_slots.read().unwrap(); 763 for (_, s) in dirty_log_slots.iter() { 764 let region = kvm_userspace_memory_region { 765 slot: s.slot, 766 guest_phys_addr: s.guest_phys_addr, 767 memory_size: s.memory_size, 768 userspace_addr: s.userspace_addr, 769 flags: KVM_MEM_LOG_DIRTY_PAGES, 770 }; 771 // SAFETY: Safe because guest regions are guaranteed not to overlap. 772 unsafe { 773 self.fd 774 .set_user_memory_region(region) 775 .map_err(|e| vm::HypervisorVmError::StartDirtyLog(e.into()))?; 776 } 777 } 778 779 Ok(()) 780 } 781 782 /// 783 /// Stop logging dirty pages 784 /// 785 fn stop_dirty_log(&self) -> vm::Result<()> { 786 let dirty_log_slots = self.dirty_log_slots.read().unwrap(); 787 for (_, s) in dirty_log_slots.iter() { 788 let region = kvm_userspace_memory_region { 789 slot: s.slot, 790 guest_phys_addr: s.guest_phys_addr, 791 memory_size: s.memory_size, 792 userspace_addr: s.userspace_addr, 793 flags: 0, 794 }; 795 // SAFETY: Safe because guest regions are guaranteed not to overlap. 796 unsafe { 797 self.fd 798 .set_user_memory_region(region) 799 .map_err(|e| vm::HypervisorVmError::StartDirtyLog(e.into()))?; 800 } 801 } 802 803 Ok(()) 804 } 805 806 /// 807 /// Get dirty pages bitmap (one bit per page) 808 /// 809 fn get_dirty_log(&self, slot: u32, _base_gpa: u64, memory_size: u64) -> vm::Result<Vec<u64>> { 810 self.fd 811 .get_dirty_log(slot, memory_size as usize) 812 .map_err(|e| vm::HypervisorVmError::GetDirtyLog(e.into())) 813 } 814 815 /// 816 /// Initialize TDX for this VM 817 /// 818 #[cfg(feature = "tdx")] 819 fn tdx_init(&self, cpuid: &[CpuIdEntry], max_vcpus: u32) -> vm::Result<()> { 820 const TDX_ATTR_SEPT_VE_DISABLE: usize = 28; 821 822 let mut cpuid: Vec<kvm_bindings::kvm_cpuid_entry2> = 823 cpuid.iter().map(|e| (*e).into()).collect(); 824 cpuid.resize(256, kvm_bindings::kvm_cpuid_entry2::default()); 825 826 #[repr(C)] 827 struct TdxInitVm { 828 attributes: u64, 829 max_vcpus: u32, 830 padding: u32, 831 mrconfigid: [u64; 6], 832 mrowner: [u64; 6], 833 mrownerconfig: [u64; 6], 834 cpuid_nent: u32, 835 cpuid_padding: u32, 836 cpuid_entries: [kvm_bindings::kvm_cpuid_entry2; 256], 837 } 838 let data = TdxInitVm { 839 attributes: 1 << TDX_ATTR_SEPT_VE_DISABLE, 840 max_vcpus, 841 padding: 0, 842 mrconfigid: [0; 6], 843 mrowner: [0; 6], 844 mrownerconfig: [0; 6], 845 cpuid_nent: cpuid.len() as u32, 846 cpuid_padding: 0, 847 cpuid_entries: cpuid.as_slice().try_into().unwrap(), 848 }; 849 850 tdx_command( 851 &self.fd.as_raw_fd(), 852 TdxCommand::InitVm, 853 0, 854 &data as *const _ as u64, 855 ) 856 .map_err(vm::HypervisorVmError::InitializeTdx) 857 } 858 859 /// 860 /// Finalize the TDX setup for this VM 861 /// 862 #[cfg(feature = "tdx")] 863 fn tdx_finalize(&self) -> vm::Result<()> { 864 tdx_command(&self.fd.as_raw_fd(), TdxCommand::Finalize, 0, 0) 865 .map_err(vm::HypervisorVmError::FinalizeTdx) 866 } 867 868 /// 869 /// Initialize memory regions for the TDX VM 870 /// 871 #[cfg(feature = "tdx")] 872 fn tdx_init_memory_region( 873 &self, 874 host_address: u64, 875 guest_address: u64, 876 size: u64, 877 measure: bool, 878 ) -> vm::Result<()> { 879 #[repr(C)] 880 struct TdxInitMemRegion { 881 host_address: u64, 882 guest_address: u64, 883 pages: u64, 884 } 885 let data = TdxInitMemRegion { 886 host_address, 887 guest_address, 888 pages: size / 4096, 889 }; 890 891 tdx_command( 892 &self.fd.as_raw_fd(), 893 TdxCommand::InitMemRegion, 894 u32::from(measure), 895 &data as *const _ as u64, 896 ) 897 .map_err(vm::HypervisorVmError::InitMemRegionTdx) 898 } 899 900 /// Downcast to the underlying KvmVm type 901 fn as_any(&self) -> &dyn Any { 902 self 903 } 904 } 905 906 #[cfg(feature = "tdx")] 907 fn tdx_command( 908 fd: &RawFd, 909 command: TdxCommand, 910 flags: u32, 911 data: u64, 912 ) -> std::result::Result<(), std::io::Error> { 913 #[repr(C)] 914 struct TdxIoctlCmd { 915 command: TdxCommand, 916 flags: u32, 917 data: u64, 918 error: u64, 919 unused: u64, 920 } 921 let cmd = TdxIoctlCmd { 922 command, 923 flags, 924 data, 925 error: 0, 926 unused: 0, 927 }; 928 // SAFETY: FFI call. All input parameters are valid. 929 let ret = unsafe { 930 ioctl_with_val( 931 fd, 932 KVM_MEMORY_ENCRYPT_OP(), 933 &cmd as *const TdxIoctlCmd as std::os::raw::c_ulong, 934 ) 935 }; 936 937 if ret < 0 { 938 return Err(std::io::Error::last_os_error()); 939 } 940 Ok(()) 941 } 942 943 /// Wrapper over KVM system ioctls. 944 pub struct KvmHypervisor { 945 kvm: Kvm, 946 } 947 948 impl KvmHypervisor { 949 #[cfg(target_arch = "x86_64")] 950 /// 951 /// Retrieve the list of MSRs supported by the hypervisor. 952 /// 953 fn get_msr_list(&self) -> hypervisor::Result<MsrList> { 954 self.kvm 955 .get_msr_index_list() 956 .map_err(|e| hypervisor::HypervisorError::GetMsrList(e.into())) 957 } 958 } 959 960 /// Enum for KVM related error 961 #[derive(Debug, Error)] 962 pub enum KvmError { 963 #[error("Capability missing: {0:?}")] 964 CapabilityMissing(Cap), 965 } 966 967 pub type KvmResult<T> = result::Result<T, KvmError>; 968 969 impl KvmHypervisor { 970 /// Create a hypervisor based on Kvm 971 #[allow(clippy::new_ret_no_self)] 972 pub fn new() -> hypervisor::Result<Arc<dyn hypervisor::Hypervisor>> { 973 let kvm_obj = Kvm::new().map_err(|e| hypervisor::HypervisorError::VmCreate(e.into()))?; 974 let api_version = kvm_obj.get_api_version(); 975 976 if api_version != kvm_bindings::KVM_API_VERSION as i32 { 977 return Err(hypervisor::HypervisorError::IncompatibleApiVersion); 978 } 979 980 Ok(Arc::new(KvmHypervisor { kvm: kvm_obj })) 981 } 982 983 /// Check if the hypervisor is available 984 pub fn is_available() -> hypervisor::Result<bool> { 985 match std::fs::metadata("/dev/kvm") { 986 Ok(_) => Ok(true), 987 Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(false), 988 Err(err) => Err(hypervisor::HypervisorError::HypervisorAvailableCheck( 989 err.into(), 990 )), 991 } 992 } 993 } 994 995 /// Implementation of Hypervisor trait for KVM 996 /// 997 /// # Examples 998 /// 999 /// ``` 1000 /// # use hypervisor::kvm::KvmHypervisor; 1001 /// # use std::sync::Arc; 1002 /// let kvm = KvmHypervisor::new().unwrap(); 1003 /// let hypervisor = Arc::new(kvm); 1004 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed"); 1005 /// ``` 1006 impl hypervisor::Hypervisor for KvmHypervisor { 1007 /// 1008 /// Returns the type of the hypervisor 1009 /// 1010 fn hypervisor_type(&self) -> HypervisorType { 1011 HypervisorType::Kvm 1012 } 1013 1014 /// Create a KVM vm object of a specific VM type and return the object as Vm trait object 1015 /// 1016 /// # Examples 1017 /// 1018 /// ``` 1019 /// # use hypervisor::kvm::KvmHypervisor; 1020 /// use hypervisor::kvm::KvmVm; 1021 /// let hypervisor = KvmHypervisor::new().unwrap(); 1022 /// let vm = hypervisor.create_vm_with_type(0).unwrap(); 1023 /// ``` 1024 fn create_vm_with_type(&self, vm_type: u64) -> hypervisor::Result<Arc<dyn vm::Vm>> { 1025 let fd: VmFd; 1026 loop { 1027 match self.kvm.create_vm_with_type(vm_type) { 1028 Ok(res) => fd = res, 1029 Err(e) => { 1030 if e.errno() == libc::EINTR { 1031 // If the error returned is EINTR, which means the 1032 // ioctl has been interrupted, we have to retry as 1033 // this can't be considered as a regular error. 1034 continue; 1035 } else { 1036 return Err(hypervisor::HypervisorError::VmCreate(e.into())); 1037 } 1038 } 1039 } 1040 break; 1041 } 1042 1043 let vm_fd = Arc::new(fd); 1044 1045 #[cfg(target_arch = "x86_64")] 1046 { 1047 let msr_list = self.get_msr_list()?; 1048 let num_msrs = msr_list.as_fam_struct_ref().nmsrs as usize; 1049 let mut msrs: Vec<MsrEntry> = vec![ 1050 MsrEntry { 1051 ..Default::default() 1052 }; 1053 num_msrs 1054 ]; 1055 let indices = msr_list.as_slice(); 1056 for (pos, index) in indices.iter().enumerate() { 1057 msrs[pos].index = *index; 1058 } 1059 1060 Ok(Arc::new(KvmVm { 1061 fd: vm_fd, 1062 msrs, 1063 dirty_log_slots: Arc::new(RwLock::new(HashMap::new())), 1064 })) 1065 } 1066 1067 #[cfg(target_arch = "aarch64")] 1068 { 1069 Ok(Arc::new(KvmVm { 1070 fd: vm_fd, 1071 dirty_log_slots: Arc::new(RwLock::new(HashMap::new())), 1072 })) 1073 } 1074 } 1075 1076 /// Create a KVM vm object and return the object as Vm trait object 1077 /// 1078 /// # Examples 1079 /// 1080 /// ``` 1081 /// # use hypervisor::kvm::KvmHypervisor; 1082 /// use hypervisor::kvm::KvmVm; 1083 /// let hypervisor = KvmHypervisor::new().unwrap(); 1084 /// let vm = hypervisor.create_vm().unwrap(); 1085 /// ``` 1086 fn create_vm(&self) -> hypervisor::Result<Arc<dyn vm::Vm>> { 1087 #[allow(unused_mut)] 1088 let mut vm_type: u64 = 0; // Create with default platform type 1089 1090 // When KVM supports Cap::ArmVmIPASize, it is better to get the IPA 1091 // size from the host and use that when creating the VM, which may 1092 // avoid unnecessary VM creation failures. 1093 #[cfg(target_arch = "aarch64")] 1094 if self.kvm.check_extension(Cap::ArmVmIPASize) { 1095 vm_type = self.kvm.get_host_ipa_limit().try_into().unwrap(); 1096 } 1097 1098 self.create_vm_with_type(vm_type) 1099 } 1100 1101 fn check_required_extensions(&self) -> hypervisor::Result<()> { 1102 check_required_kvm_extensions(&self.kvm) 1103 .map_err(|e| hypervisor::HypervisorError::CheckExtensions(e.into())) 1104 } 1105 1106 #[cfg(target_arch = "x86_64")] 1107 /// 1108 /// X86 specific call to get the system supported CPUID values. 1109 /// 1110 fn get_supported_cpuid(&self) -> hypervisor::Result<Vec<CpuIdEntry>> { 1111 let kvm_cpuid = self 1112 .kvm 1113 .get_supported_cpuid(kvm_bindings::KVM_MAX_CPUID_ENTRIES) 1114 .map_err(|e| hypervisor::HypervisorError::GetCpuId(e.into()))?; 1115 1116 let v = kvm_cpuid.as_slice().iter().map(|e| (*e).into()).collect(); 1117 1118 Ok(v) 1119 } 1120 1121 #[cfg(target_arch = "aarch64")] 1122 /// 1123 /// Retrieve AArch64 host maximum IPA size supported by KVM. 1124 /// 1125 fn get_host_ipa_limit(&self) -> i32 { 1126 self.kvm.get_host_ipa_limit() 1127 } 1128 1129 /// 1130 /// Retrieve TDX capabilities 1131 /// 1132 #[cfg(feature = "tdx")] 1133 fn tdx_capabilities(&self) -> hypervisor::Result<TdxCapabilities> { 1134 let data = TdxCapabilities { 1135 nr_cpuid_configs: TDX_MAX_NR_CPUID_CONFIGS as u32, 1136 ..Default::default() 1137 }; 1138 1139 tdx_command( 1140 &self.kvm.as_raw_fd(), 1141 TdxCommand::Capabilities, 1142 0, 1143 &data as *const _ as u64, 1144 ) 1145 .map_err(|e| hypervisor::HypervisorError::TdxCapabilities(e.into()))?; 1146 1147 Ok(data) 1148 } 1149 1150 /// 1151 /// Get the number of supported hardware breakpoints 1152 /// 1153 fn get_guest_debug_hw_bps(&self) -> usize { 1154 #[cfg(target_arch = "x86_64")] 1155 { 1156 4 1157 } 1158 #[cfg(target_arch = "aarch64")] 1159 { 1160 self.kvm.get_guest_debug_hw_bps() as usize 1161 } 1162 } 1163 1164 /// Get maximum number of vCPUs 1165 fn get_max_vcpus(&self) -> u32 { 1166 self.kvm.get_max_vcpus().min(u32::MAX as usize) as u32 1167 } 1168 } 1169 1170 /// Vcpu struct for KVM 1171 pub struct KvmVcpu { 1172 fd: Arc<Mutex<VcpuFd>>, 1173 #[cfg(target_arch = "x86_64")] 1174 msrs: Vec<MsrEntry>, 1175 vm_ops: Option<Arc<dyn vm::VmOps>>, 1176 #[cfg(target_arch = "x86_64")] 1177 hyperv_synic: AtomicBool, 1178 } 1179 1180 /// Implementation of Vcpu trait for KVM 1181 /// 1182 /// # Examples 1183 /// 1184 /// ``` 1185 /// # use hypervisor::kvm::KvmHypervisor; 1186 /// # use std::sync::Arc; 1187 /// let kvm = KvmHypervisor::new().unwrap(); 1188 /// let hypervisor = Arc::new(kvm); 1189 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed"); 1190 /// let vcpu = vm.create_vcpu(0, None).unwrap(); 1191 /// ``` 1192 impl cpu::Vcpu for KvmVcpu { 1193 #[cfg(target_arch = "x86_64")] 1194 /// 1195 /// Returns the vCPU general purpose registers. 1196 /// 1197 fn get_regs(&self) -> cpu::Result<StandardRegisters> { 1198 Ok(self 1199 .fd 1200 .lock() 1201 .unwrap() 1202 .get_regs() 1203 .map_err(|e| cpu::HypervisorCpuError::GetStandardRegs(e.into()))? 1204 .into()) 1205 } 1206 1207 /// 1208 /// Returns the vCPU general purpose registers. 1209 /// The `KVM_GET_REGS` ioctl is not available on AArch64, `KVM_GET_ONE_REG` 1210 /// is used to get registers one by one. 1211 /// 1212 #[cfg(target_arch = "aarch64")] 1213 fn get_regs(&self) -> cpu::Result<StandardRegisters> { 1214 let mut state: StandardRegisters = kvm_regs::default(); 1215 let mut off = offset_of!(user_pt_regs, regs); 1216 // There are 31 user_pt_regs: 1217 // https://elixir.free-electrons.com/linux/v4.14.174/source/arch/arm64/include/uapi/asm/ptrace.h#L72 1218 // These actually are the general-purpose registers of the Armv8-a 1219 // architecture (i.e x0-x30 if used as a 64bit register or w0-30 when used as a 32bit register). 1220 for i in 0..31 { 1221 let mut bytes = [0_u8; 8]; 1222 self.fd 1223 .lock() 1224 .unwrap() 1225 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), &mut bytes) 1226 .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?; 1227 state.regs.regs[i] = u64::from_le_bytes(bytes); 1228 off += std::mem::size_of::<u64>(); 1229 } 1230 1231 // We are now entering the "Other register" section of the ARMv8-a architecture. 1232 // First one, stack pointer. 1233 let off = offset_of!(user_pt_regs, sp); 1234 let mut bytes = [0_u8; 8]; 1235 self.fd 1236 .lock() 1237 .unwrap() 1238 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), &mut bytes) 1239 .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?; 1240 state.regs.sp = u64::from_le_bytes(bytes); 1241 1242 // Second one, the program counter. 1243 let off = offset_of!(user_pt_regs, pc); 1244 let mut bytes = [0_u8; 8]; 1245 self.fd 1246 .lock() 1247 .unwrap() 1248 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), &mut bytes) 1249 .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?; 1250 state.regs.pc = u64::from_le_bytes(bytes); 1251 1252 // Next is the processor state. 1253 let off = offset_of!(user_pt_regs, pstate); 1254 let mut bytes = [0_u8; 8]; 1255 self.fd 1256 .lock() 1257 .unwrap() 1258 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), &mut bytes) 1259 .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?; 1260 state.regs.pstate = u64::from_le_bytes(bytes); 1261 1262 // The stack pointer associated with EL1 1263 let off = offset_of!(kvm_regs, sp_el1); 1264 let mut bytes = [0_u8; 8]; 1265 self.fd 1266 .lock() 1267 .unwrap() 1268 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), &mut bytes) 1269 .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?; 1270 state.sp_el1 = u64::from_le_bytes(bytes); 1271 1272 // Exception Link Register for EL1, when taking an exception to EL1, this register 1273 // holds the address to which to return afterwards. 1274 let off = offset_of!(kvm_regs, elr_el1); 1275 let mut bytes = [0_u8; 8]; 1276 self.fd 1277 .lock() 1278 .unwrap() 1279 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), &mut bytes) 1280 .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?; 1281 state.elr_el1 = u64::from_le_bytes(bytes); 1282 1283 // Saved Program Status Registers, there are 5 of them used in the kernel. 1284 let mut off = offset_of!(kvm_regs, spsr); 1285 for i in 0..KVM_NR_SPSR as usize { 1286 let mut bytes = [0_u8; 8]; 1287 self.fd 1288 .lock() 1289 .unwrap() 1290 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), &mut bytes) 1291 .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?; 1292 state.spsr[i] = u64::from_le_bytes(bytes); 1293 off += std::mem::size_of::<u64>(); 1294 } 1295 1296 // Now moving on to floating point registers which are stored in the user_fpsimd_state in the kernel: 1297 // https://elixir.free-electrons.com/linux/v4.9.62/source/arch/arm64/include/uapi/asm/kvm.h#L53 1298 let mut off = offset_of!(kvm_regs, fp_regs) + offset_of!(user_fpsimd_state, vregs); 1299 for i in 0..32 { 1300 let mut bytes = [0_u8; 16]; 1301 self.fd 1302 .lock() 1303 .unwrap() 1304 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U128, off), &mut bytes) 1305 .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?; 1306 state.fp_regs.vregs[i] = u128::from_le_bytes(bytes); 1307 off += mem::size_of::<u128>(); 1308 } 1309 1310 // Floating-point Status Register 1311 let off = offset_of!(kvm_regs, fp_regs) + offset_of!(user_fpsimd_state, fpsr); 1312 let mut bytes = [0_u8; 4]; 1313 self.fd 1314 .lock() 1315 .unwrap() 1316 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U32, off), &mut bytes) 1317 .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?; 1318 state.fp_regs.fpsr = u32::from_le_bytes(bytes); 1319 1320 // Floating-point Control Register 1321 let off = offset_of!(kvm_regs, fp_regs) + offset_of!(user_fpsimd_state, fpcr); 1322 let mut bytes = [0_u8; 4]; 1323 self.fd 1324 .lock() 1325 .unwrap() 1326 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U32, off), &mut bytes) 1327 .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?; 1328 state.fp_regs.fpcr = u32::from_le_bytes(bytes); 1329 Ok(state) 1330 } 1331 1332 #[cfg(target_arch = "x86_64")] 1333 /// 1334 /// Sets the vCPU general purpose registers using the `KVM_SET_REGS` ioctl. 1335 /// 1336 fn set_regs(&self, regs: &StandardRegisters) -> cpu::Result<()> { 1337 let regs = (*regs).into(); 1338 self.fd 1339 .lock() 1340 .unwrap() 1341 .set_regs(®s) 1342 .map_err(|e| cpu::HypervisorCpuError::SetStandardRegs(e.into())) 1343 } 1344 1345 /// 1346 /// Sets the vCPU general purpose registers. 1347 /// The `KVM_SET_REGS` ioctl is not available on AArch64, `KVM_SET_ONE_REG` 1348 /// is used to set registers one by one. 1349 /// 1350 #[cfg(target_arch = "aarch64")] 1351 fn set_regs(&self, state: &StandardRegisters) -> cpu::Result<()> { 1352 // The function follows the exact identical order from `state`. Look there 1353 // for some additional info on registers. 1354 let mut off = offset_of!(user_pt_regs, regs); 1355 for i in 0..31 { 1356 self.fd 1357 .lock() 1358 .unwrap() 1359 .set_one_reg( 1360 arm64_core_reg_id!(KVM_REG_SIZE_U64, off), 1361 &state.regs.regs[i].to_le_bytes(), 1362 ) 1363 .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?; 1364 off += std::mem::size_of::<u64>(); 1365 } 1366 1367 let off = offset_of!(user_pt_regs, sp); 1368 self.fd 1369 .lock() 1370 .unwrap() 1371 .set_one_reg( 1372 arm64_core_reg_id!(KVM_REG_SIZE_U64, off), 1373 &state.regs.sp.to_le_bytes(), 1374 ) 1375 .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?; 1376 1377 let off = offset_of!(user_pt_regs, pc); 1378 self.fd 1379 .lock() 1380 .unwrap() 1381 .set_one_reg( 1382 arm64_core_reg_id!(KVM_REG_SIZE_U64, off), 1383 &state.regs.pc.to_le_bytes(), 1384 ) 1385 .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?; 1386 1387 let off = offset_of!(user_pt_regs, pstate); 1388 self.fd 1389 .lock() 1390 .unwrap() 1391 .set_one_reg( 1392 arm64_core_reg_id!(KVM_REG_SIZE_U64, off), 1393 &state.regs.pstate.to_le_bytes(), 1394 ) 1395 .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?; 1396 1397 let off = offset_of!(kvm_regs, sp_el1); 1398 self.fd 1399 .lock() 1400 .unwrap() 1401 .set_one_reg( 1402 arm64_core_reg_id!(KVM_REG_SIZE_U64, off), 1403 &state.sp_el1.to_le_bytes(), 1404 ) 1405 .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?; 1406 1407 let off = offset_of!(kvm_regs, elr_el1); 1408 self.fd 1409 .lock() 1410 .unwrap() 1411 .set_one_reg( 1412 arm64_core_reg_id!(KVM_REG_SIZE_U64, off), 1413 &state.elr_el1.to_le_bytes(), 1414 ) 1415 .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?; 1416 1417 let mut off = offset_of!(kvm_regs, spsr); 1418 for i in 0..KVM_NR_SPSR as usize { 1419 self.fd 1420 .lock() 1421 .unwrap() 1422 .set_one_reg( 1423 arm64_core_reg_id!(KVM_REG_SIZE_U64, off), 1424 &state.spsr[i].to_le_bytes(), 1425 ) 1426 .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?; 1427 off += std::mem::size_of::<u64>(); 1428 } 1429 1430 let mut off = offset_of!(kvm_regs, fp_regs) + offset_of!(user_fpsimd_state, vregs); 1431 for i in 0..32 { 1432 self.fd 1433 .lock() 1434 .unwrap() 1435 .set_one_reg( 1436 arm64_core_reg_id!(KVM_REG_SIZE_U128, off), 1437 &state.fp_regs.vregs[i].to_le_bytes(), 1438 ) 1439 .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?; 1440 off += mem::size_of::<u128>(); 1441 } 1442 1443 let off = offset_of!(kvm_regs, fp_regs) + offset_of!(user_fpsimd_state, fpsr); 1444 self.fd 1445 .lock() 1446 .unwrap() 1447 .set_one_reg( 1448 arm64_core_reg_id!(KVM_REG_SIZE_U32, off), 1449 &state.fp_regs.fpsr.to_le_bytes(), 1450 ) 1451 .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?; 1452 1453 let off = offset_of!(kvm_regs, fp_regs) + offset_of!(user_fpsimd_state, fpcr); 1454 self.fd 1455 .lock() 1456 .unwrap() 1457 .set_one_reg( 1458 arm64_core_reg_id!(KVM_REG_SIZE_U32, off), 1459 &state.fp_regs.fpcr.to_le_bytes(), 1460 ) 1461 .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?; 1462 Ok(()) 1463 } 1464 1465 #[cfg(target_arch = "x86_64")] 1466 /// 1467 /// Returns the vCPU special registers. 1468 /// 1469 fn get_sregs(&self) -> cpu::Result<SpecialRegisters> { 1470 Ok(self 1471 .fd 1472 .lock() 1473 .unwrap() 1474 .get_sregs() 1475 .map_err(|e| cpu::HypervisorCpuError::GetSpecialRegs(e.into()))? 1476 .into()) 1477 } 1478 1479 #[cfg(target_arch = "x86_64")] 1480 /// 1481 /// Sets the vCPU special registers using the `KVM_SET_SREGS` ioctl. 1482 /// 1483 fn set_sregs(&self, sregs: &SpecialRegisters) -> cpu::Result<()> { 1484 let sregs = (*sregs).into(); 1485 self.fd 1486 .lock() 1487 .unwrap() 1488 .set_sregs(&sregs) 1489 .map_err(|e| cpu::HypervisorCpuError::SetSpecialRegs(e.into())) 1490 } 1491 1492 #[cfg(target_arch = "x86_64")] 1493 /// 1494 /// Returns the floating point state (FPU) from the vCPU. 1495 /// 1496 fn get_fpu(&self) -> cpu::Result<FpuState> { 1497 Ok(self 1498 .fd 1499 .lock() 1500 .unwrap() 1501 .get_fpu() 1502 .map_err(|e| cpu::HypervisorCpuError::GetFloatingPointRegs(e.into()))? 1503 .into()) 1504 } 1505 1506 #[cfg(target_arch = "x86_64")] 1507 /// 1508 /// Set the floating point state (FPU) of a vCPU using the `KVM_SET_FPU` ioctl. 1509 /// 1510 fn set_fpu(&self, fpu: &FpuState) -> cpu::Result<()> { 1511 let fpu: kvm_bindings::kvm_fpu = (*fpu).clone().into(); 1512 self.fd 1513 .lock() 1514 .unwrap() 1515 .set_fpu(&fpu) 1516 .map_err(|e| cpu::HypervisorCpuError::SetFloatingPointRegs(e.into())) 1517 } 1518 1519 #[cfg(target_arch = "x86_64")] 1520 /// 1521 /// X86 specific call to setup the CPUID registers. 1522 /// 1523 fn set_cpuid2(&self, cpuid: &[CpuIdEntry]) -> cpu::Result<()> { 1524 let cpuid: Vec<kvm_bindings::kvm_cpuid_entry2> = 1525 cpuid.iter().map(|e| (*e).into()).collect(); 1526 let kvm_cpuid = <CpuId>::from_entries(&cpuid) 1527 .map_err(|_| cpu::HypervisorCpuError::SetCpuid(anyhow!("failed to create CpuId")))?; 1528 1529 self.fd 1530 .lock() 1531 .unwrap() 1532 .set_cpuid2(&kvm_cpuid) 1533 .map_err(|e| cpu::HypervisorCpuError::SetCpuid(e.into())) 1534 } 1535 1536 #[cfg(target_arch = "x86_64")] 1537 /// 1538 /// X86 specific call to enable HyperV SynIC 1539 /// 1540 fn enable_hyperv_synic(&self) -> cpu::Result<()> { 1541 // Update the information about Hyper-V SynIC being enabled and 1542 // emulated as it will influence later which MSRs should be saved. 1543 self.hyperv_synic.store(true, Ordering::Release); 1544 1545 let cap = kvm_enable_cap { 1546 cap: KVM_CAP_HYPERV_SYNIC, 1547 ..Default::default() 1548 }; 1549 self.fd 1550 .lock() 1551 .unwrap() 1552 .enable_cap(&cap) 1553 .map_err(|e| cpu::HypervisorCpuError::EnableHyperVSyncIc(e.into())) 1554 } 1555 1556 /// 1557 /// X86 specific call to retrieve the CPUID registers. 1558 /// 1559 #[cfg(target_arch = "x86_64")] 1560 fn get_cpuid2(&self, num_entries: usize) -> cpu::Result<Vec<CpuIdEntry>> { 1561 let kvm_cpuid = self 1562 .fd 1563 .lock() 1564 .unwrap() 1565 .get_cpuid2(num_entries) 1566 .map_err(|e| cpu::HypervisorCpuError::GetCpuid(e.into()))?; 1567 1568 let v = kvm_cpuid.as_slice().iter().map(|e| (*e).into()).collect(); 1569 1570 Ok(v) 1571 } 1572 1573 #[cfg(target_arch = "x86_64")] 1574 /// 1575 /// Returns the state of the LAPIC (Local Advanced Programmable Interrupt Controller). 1576 /// 1577 fn get_lapic(&self) -> cpu::Result<LapicState> { 1578 Ok(self 1579 .fd 1580 .lock() 1581 .unwrap() 1582 .get_lapic() 1583 .map_err(|e| cpu::HypervisorCpuError::GetlapicState(e.into()))? 1584 .into()) 1585 } 1586 1587 #[cfg(target_arch = "x86_64")] 1588 /// 1589 /// Sets the state of the LAPIC (Local Advanced Programmable Interrupt Controller). 1590 /// 1591 fn set_lapic(&self, klapic: &LapicState) -> cpu::Result<()> { 1592 let klapic: kvm_bindings::kvm_lapic_state = (*klapic).clone().into(); 1593 self.fd 1594 .lock() 1595 .unwrap() 1596 .set_lapic(&klapic) 1597 .map_err(|e| cpu::HypervisorCpuError::SetLapicState(e.into())) 1598 } 1599 1600 #[cfg(target_arch = "x86_64")] 1601 /// 1602 /// Returns the model-specific registers (MSR) for this vCPU. 1603 /// 1604 fn get_msrs(&self, msrs: &mut Vec<MsrEntry>) -> cpu::Result<usize> { 1605 let kvm_msrs: Vec<kvm_msr_entry> = msrs.iter().map(|e| (*e).into()).collect(); 1606 let mut kvm_msrs = MsrEntries::from_entries(&kvm_msrs).unwrap(); 1607 let succ = self 1608 .fd 1609 .lock() 1610 .unwrap() 1611 .get_msrs(&mut kvm_msrs) 1612 .map_err(|e| cpu::HypervisorCpuError::GetMsrEntries(e.into()))?; 1613 1614 msrs[..succ].copy_from_slice( 1615 &kvm_msrs.as_slice()[..succ] 1616 .iter() 1617 .map(|e| (*e).into()) 1618 .collect::<Vec<MsrEntry>>(), 1619 ); 1620 1621 Ok(succ) 1622 } 1623 1624 #[cfg(target_arch = "x86_64")] 1625 /// 1626 /// Setup the model-specific registers (MSR) for this vCPU. 1627 /// Returns the number of MSR entries actually written. 1628 /// 1629 fn set_msrs(&self, msrs: &[MsrEntry]) -> cpu::Result<usize> { 1630 let kvm_msrs: Vec<kvm_msr_entry> = msrs.iter().map(|e| (*e).into()).collect(); 1631 let kvm_msrs = MsrEntries::from_entries(&kvm_msrs).unwrap(); 1632 self.fd 1633 .lock() 1634 .unwrap() 1635 .set_msrs(&kvm_msrs) 1636 .map_err(|e| cpu::HypervisorCpuError::SetMsrEntries(e.into())) 1637 } 1638 1639 /// 1640 /// Returns the vcpu's current "multiprocessing state". 1641 /// 1642 fn get_mp_state(&self) -> cpu::Result<MpState> { 1643 Ok(self 1644 .fd 1645 .lock() 1646 .unwrap() 1647 .get_mp_state() 1648 .map_err(|e| cpu::HypervisorCpuError::GetMpState(e.into()))? 1649 .into()) 1650 } 1651 1652 /// 1653 /// Sets the vcpu's current "multiprocessing state". 1654 /// 1655 fn set_mp_state(&self, mp_state: MpState) -> cpu::Result<()> { 1656 self.fd 1657 .lock() 1658 .unwrap() 1659 .set_mp_state(mp_state.into()) 1660 .map_err(|e| cpu::HypervisorCpuError::SetMpState(e.into())) 1661 } 1662 1663 #[cfg(target_arch = "x86_64")] 1664 /// 1665 /// Translates guest virtual address to guest physical address using the `KVM_TRANSLATE` ioctl. 1666 /// 1667 fn translate_gva(&self, gva: u64, _flags: u64) -> cpu::Result<(u64, u32)> { 1668 let tr = self 1669 .fd 1670 .lock() 1671 .unwrap() 1672 .translate_gva(gva) 1673 .map_err(|e| cpu::HypervisorCpuError::TranslateVirtualAddress(e.into()))?; 1674 // tr.valid is set if the GVA is mapped to valid GPA. 1675 match tr.valid { 1676 0 => Err(cpu::HypervisorCpuError::TranslateVirtualAddress(anyhow!( 1677 "Invalid GVA: {:#x}", 1678 gva 1679 ))), 1680 _ => Ok((tr.physical_address, 0)), 1681 } 1682 } 1683 1684 /// 1685 /// Triggers the running of the current virtual CPU returning an exit reason. 1686 /// 1687 fn run(&self) -> std::result::Result<cpu::VmExit, cpu::HypervisorCpuError> { 1688 match self.fd.lock().unwrap().run() { 1689 Ok(run) => match run { 1690 #[cfg(target_arch = "x86_64")] 1691 VcpuExit::IoIn(addr, data) => { 1692 if let Some(vm_ops) = &self.vm_ops { 1693 return vm_ops 1694 .pio_read(addr.into(), data) 1695 .map(|_| cpu::VmExit::Ignore) 1696 .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into())); 1697 } 1698 1699 Ok(cpu::VmExit::Ignore) 1700 } 1701 #[cfg(target_arch = "x86_64")] 1702 VcpuExit::IoOut(addr, data) => { 1703 if let Some(vm_ops) = &self.vm_ops { 1704 return vm_ops 1705 .pio_write(addr.into(), data) 1706 .map(|_| cpu::VmExit::Ignore) 1707 .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into())); 1708 } 1709 1710 Ok(cpu::VmExit::Ignore) 1711 } 1712 #[cfg(target_arch = "x86_64")] 1713 VcpuExit::IoapicEoi(vector) => Ok(cpu::VmExit::IoapicEoi(vector)), 1714 #[cfg(target_arch = "x86_64")] 1715 VcpuExit::Shutdown | VcpuExit::Hlt => Ok(cpu::VmExit::Reset), 1716 1717 #[cfg(target_arch = "aarch64")] 1718 VcpuExit::SystemEvent(event_type, flags) => { 1719 use kvm_bindings::{KVM_SYSTEM_EVENT_RESET, KVM_SYSTEM_EVENT_SHUTDOWN}; 1720 // On Aarch64, when the VM is shutdown, run() returns 1721 // VcpuExit::SystemEvent with reason KVM_SYSTEM_EVENT_SHUTDOWN 1722 if event_type == KVM_SYSTEM_EVENT_RESET { 1723 Ok(cpu::VmExit::Reset) 1724 } else if event_type == KVM_SYSTEM_EVENT_SHUTDOWN { 1725 Ok(cpu::VmExit::Shutdown) 1726 } else { 1727 Err(cpu::HypervisorCpuError::RunVcpu(anyhow!( 1728 "Unexpected system event with type 0x{:x}, flags 0x{:x?}", 1729 event_type, 1730 flags 1731 ))) 1732 } 1733 } 1734 1735 VcpuExit::MmioRead(addr, data) => { 1736 if let Some(vm_ops) = &self.vm_ops { 1737 return vm_ops 1738 .mmio_read(addr, data) 1739 .map(|_| cpu::VmExit::Ignore) 1740 .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into())); 1741 } 1742 1743 Ok(cpu::VmExit::Ignore) 1744 } 1745 VcpuExit::MmioWrite(addr, data) => { 1746 if let Some(vm_ops) = &self.vm_ops { 1747 return vm_ops 1748 .mmio_write(addr, data) 1749 .map(|_| cpu::VmExit::Ignore) 1750 .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into())); 1751 } 1752 1753 Ok(cpu::VmExit::Ignore) 1754 } 1755 VcpuExit::Hyperv => Ok(cpu::VmExit::Hyperv), 1756 #[cfg(feature = "tdx")] 1757 VcpuExit::Unsupported(KVM_EXIT_TDX) => Ok(cpu::VmExit::Tdx), 1758 VcpuExit::Debug(_) => Ok(cpu::VmExit::Debug), 1759 1760 r => Err(cpu::HypervisorCpuError::RunVcpu(anyhow!( 1761 "Unexpected exit reason on vcpu run: {:?}", 1762 r 1763 ))), 1764 }, 1765 1766 Err(ref e) => match e.errno() { 1767 libc::EAGAIN | libc::EINTR => Ok(cpu::VmExit::Ignore), 1768 _ => Err(cpu::HypervisorCpuError::RunVcpu(anyhow!( 1769 "VCPU error {:?}", 1770 e 1771 ))), 1772 }, 1773 } 1774 } 1775 1776 #[cfg(target_arch = "x86_64")] 1777 /// 1778 /// Let the guest know that it has been paused, which prevents from 1779 /// potential soft lockups when being resumed. 1780 /// 1781 fn notify_guest_clock_paused(&self) -> cpu::Result<()> { 1782 if let Err(e) = self.fd.lock().unwrap().kvmclock_ctrl() { 1783 // Linux kernel returns -EINVAL if the PV clock isn't yet initialised 1784 // which could be because we're still in firmware or the guest doesn't 1785 // use KVM clock. 1786 if e.errno() != libc::EINVAL { 1787 return Err(cpu::HypervisorCpuError::NotifyGuestClockPaused(e.into())); 1788 } 1789 } 1790 1791 Ok(()) 1792 } 1793 1794 /// 1795 /// Sets debug registers to set hardware breakpoints and/or enable single step. 1796 /// 1797 fn set_guest_debug( 1798 &self, 1799 addrs: &[vm_memory::GuestAddress], 1800 singlestep: bool, 1801 ) -> cpu::Result<()> { 1802 let mut dbg = kvm_guest_debug { 1803 #[cfg(target_arch = "x86_64")] 1804 control: KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP, 1805 #[cfg(target_arch = "aarch64")] 1806 control: KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW, 1807 ..Default::default() 1808 }; 1809 if singlestep { 1810 dbg.control |= KVM_GUESTDBG_SINGLESTEP; 1811 } 1812 1813 // Set the debug registers. 1814 // Here we assume that the number of addresses do not exceed what 1815 // `Hypervisor::get_guest_debug_hw_bps()` specifies. 1816 #[cfg(target_arch = "x86_64")] 1817 { 1818 // Set bits 9 and 10. 1819 // bit 9: GE (global exact breakpoint enable) flag. 1820 // bit 10: always 1. 1821 dbg.arch.debugreg[7] = 0x0600; 1822 1823 for (i, addr) in addrs.iter().enumerate() { 1824 dbg.arch.debugreg[i] = addr.0; 1825 // Set global breakpoint enable flag 1826 dbg.arch.debugreg[7] |= 2 << (i * 2); 1827 } 1828 } 1829 #[cfg(target_arch = "aarch64")] 1830 { 1831 for (i, addr) in addrs.iter().enumerate() { 1832 // DBGBCR_EL1 (Debug Breakpoint Control Registers, D13.3.2): 1833 // bit 0: 1 (Enabled) 1834 // bit 1~2: 0b11 (PMC = EL1/EL0) 1835 // bit 5~8: 0b1111 (BAS = AArch64) 1836 // others: 0 1837 dbg.arch.dbg_bcr[i] = 0b1u64 | 0b110u64 | 0b1_1110_0000u64; 1838 // DBGBVR_EL1 (Debug Breakpoint Value Registers, D13.3.3): 1839 // bit 2~52: VA[2:52] 1840 dbg.arch.dbg_bvr[i] = (!0u64 >> 11) & addr.0; 1841 } 1842 } 1843 self.fd 1844 .lock() 1845 .unwrap() 1846 .set_guest_debug(&dbg) 1847 .map_err(|e| cpu::HypervisorCpuError::SetDebugRegs(e.into())) 1848 } 1849 1850 #[cfg(target_arch = "aarch64")] 1851 fn vcpu_init(&self, kvi: &VcpuInit) -> cpu::Result<()> { 1852 self.fd 1853 .lock() 1854 .unwrap() 1855 .vcpu_init(kvi) 1856 .map_err(|e| cpu::HypervisorCpuError::VcpuInit(e.into())) 1857 } 1858 1859 /// 1860 /// Gets a list of the guest registers that are supported for the 1861 /// KVM_GET_ONE_REG/KVM_SET_ONE_REG calls. 1862 /// 1863 #[cfg(target_arch = "aarch64")] 1864 fn get_reg_list(&self, reg_list: &mut RegList) -> cpu::Result<()> { 1865 self.fd 1866 .lock() 1867 .unwrap() 1868 .get_reg_list(reg_list) 1869 .map_err(|e| cpu::HypervisorCpuError::GetRegList(e.into())) 1870 } 1871 1872 /// 1873 /// Gets the value of a system register 1874 /// 1875 #[cfg(target_arch = "aarch64")] 1876 fn get_sys_reg(&self, sys_reg: u32) -> cpu::Result<u64> { 1877 // 1878 // Arm Architecture Reference Manual defines the encoding of 1879 // AArch64 system registers, see 1880 // https://developer.arm.com/documentation/ddi0487 (chapter D12). 1881 // While KVM defines another ID for each AArch64 system register, 1882 // which is used in calling `KVM_G/SET_ONE_REG` to access a system 1883 // register of a guest. 1884 // A mapping exists between the Arm standard encoding and the KVM ID. 1885 // This function takes the standard u32 ID as input parameter, converts 1886 // it to the corresponding KVM ID, and call `KVM_GET_ONE_REG` API to 1887 // get the value of the system parameter. 1888 // 1889 let id: u64 = KVM_REG_ARM64 1890 | KVM_REG_SIZE_U64 1891 | KVM_REG_ARM64_SYSREG as u64 1892 | ((((sys_reg) >> 5) 1893 & (KVM_REG_ARM64_SYSREG_OP0_MASK 1894 | KVM_REG_ARM64_SYSREG_OP1_MASK 1895 | KVM_REG_ARM64_SYSREG_CRN_MASK 1896 | KVM_REG_ARM64_SYSREG_CRM_MASK 1897 | KVM_REG_ARM64_SYSREG_OP2_MASK)) as u64); 1898 let mut bytes = [0_u8; 8]; 1899 self.fd 1900 .lock() 1901 .unwrap() 1902 .get_one_reg(id, &mut bytes) 1903 .map_err(|e| cpu::HypervisorCpuError::GetSysRegister(e.into()))?; 1904 Ok(u64::from_le_bytes(bytes)) 1905 } 1906 1907 /// 1908 /// Configure core registers for a given CPU. 1909 /// 1910 #[cfg(target_arch = "aarch64")] 1911 fn setup_regs(&self, cpu_id: u8, boot_ip: u64, fdt_start: u64) -> cpu::Result<()> { 1912 #[allow(non_upper_case_globals)] 1913 // PSR (Processor State Register) bits. 1914 // Taken from arch/arm64/include/uapi/asm/ptrace.h. 1915 const PSR_MODE_EL1h: u64 = 0x0000_0005; 1916 const PSR_F_BIT: u64 = 0x0000_0040; 1917 const PSR_I_BIT: u64 = 0x0000_0080; 1918 const PSR_A_BIT: u64 = 0x0000_0100; 1919 const PSR_D_BIT: u64 = 0x0000_0200; 1920 // Taken from arch/arm64/kvm/inject_fault.c. 1921 const PSTATE_FAULT_BITS_64: u64 = 1922 PSR_MODE_EL1h | PSR_A_BIT | PSR_F_BIT | PSR_I_BIT | PSR_D_BIT; 1923 1924 let kreg_off = offset_of!(kvm_regs, regs); 1925 1926 // Get the register index of the PSTATE (Processor State) register. 1927 let pstate = offset_of!(user_pt_regs, pstate) + kreg_off; 1928 self.fd 1929 .lock() 1930 .unwrap() 1931 .set_one_reg( 1932 arm64_core_reg_id!(KVM_REG_SIZE_U64, pstate), 1933 &PSTATE_FAULT_BITS_64.to_le_bytes(), 1934 ) 1935 .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?; 1936 1937 // Other vCPUs are powered off initially awaiting PSCI wakeup. 1938 if cpu_id == 0 { 1939 // Setting the PC (Processor Counter) to the current program address (kernel address). 1940 let pc = offset_of!(user_pt_regs, pc) + kreg_off; 1941 self.fd 1942 .lock() 1943 .unwrap() 1944 .set_one_reg( 1945 arm64_core_reg_id!(KVM_REG_SIZE_U64, pc), 1946 &boot_ip.to_le_bytes(), 1947 ) 1948 .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?; 1949 1950 // Last mandatory thing to set -> the address pointing to the FDT (also called DTB). 1951 // "The device tree blob (dtb) must be placed on an 8-byte boundary and must 1952 // not exceed 2 megabytes in size." -> https://www.kernel.org/doc/Documentation/arm64/booting.txt. 1953 // We are choosing to place it the end of DRAM. See `get_fdt_addr`. 1954 let regs0 = offset_of!(user_pt_regs, regs) + kreg_off; 1955 self.fd 1956 .lock() 1957 .unwrap() 1958 .set_one_reg( 1959 arm64_core_reg_id!(KVM_REG_SIZE_U64, regs0), 1960 &fdt_start.to_le_bytes(), 1961 ) 1962 .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?; 1963 } 1964 Ok(()) 1965 } 1966 1967 #[cfg(target_arch = "x86_64")] 1968 /// 1969 /// Get the current CPU state 1970 /// 1971 /// Ordering requirements: 1972 /// 1973 /// KVM_GET_MP_STATE calls kvm_apic_accept_events(), which might modify 1974 /// vCPU/LAPIC state. As such, it must be done before most everything 1975 /// else, otherwise we cannot restore everything and expect it to work. 1976 /// 1977 /// KVM_GET_VCPU_EVENTS/KVM_SET_VCPU_EVENTS is unsafe if other vCPUs are 1978 /// still running. 1979 /// 1980 /// KVM_GET_LAPIC may change state of LAPIC before returning it. 1981 /// 1982 /// GET_VCPU_EVENTS should probably be last to save. The code looks as 1983 /// it might as well be affected by internal state modifications of the 1984 /// GET ioctls. 1985 /// 1986 /// SREGS saves/restores a pending interrupt, similar to what 1987 /// VCPU_EVENTS also does. 1988 /// 1989 /// GET_MSRS requires a prepopulated data structure to do something 1990 /// meaningful. For SET_MSRS it will then contain good data. 1991 /// 1992 /// # Example 1993 /// 1994 /// ```rust 1995 /// # use hypervisor::kvm::KvmHypervisor; 1996 /// # use std::sync::Arc; 1997 /// let kvm = KvmHypervisor::new().unwrap(); 1998 /// let hv = Arc::new(kvm); 1999 /// let vm = hv.create_vm().expect("new VM fd creation failed"); 2000 /// vm.enable_split_irq().unwrap(); 2001 /// let vcpu = vm.create_vcpu(0, None).unwrap(); 2002 /// let state = vcpu.state().unwrap(); 2003 /// ``` 2004 fn state(&self) -> cpu::Result<CpuState> { 2005 let cpuid = self.get_cpuid2(kvm_bindings::KVM_MAX_CPUID_ENTRIES)?; 2006 let mp_state = self.get_mp_state()?.into(); 2007 let regs = self.get_regs()?; 2008 let sregs = self.get_sregs()?; 2009 let xsave = self.get_xsave()?; 2010 let xcrs = self.get_xcrs()?; 2011 let lapic_state = self.get_lapic()?; 2012 let fpu = self.get_fpu()?; 2013 2014 // Try to get all MSRs based on the list previously retrieved from KVM. 2015 // If the number of MSRs obtained from GET_MSRS is different from the 2016 // expected amount, we fallback onto a slower method by getting MSRs 2017 // by chunks. This is the only way to make sure we try to get as many 2018 // MSRs as possible, even if some MSRs are not supported. 2019 let mut msr_entries = self.msrs.clone(); 2020 2021 // Save extra MSRs if the Hyper-V synthetic interrupt controller is 2022 // emulated. 2023 if self.hyperv_synic.load(Ordering::Acquire) { 2024 let hyperv_synic_msrs = vec![ 2025 0x40000020, 0x40000021, 0x40000080, 0x40000081, 0x40000082, 0x40000083, 0x40000084, 2026 0x40000090, 0x40000091, 0x40000092, 0x40000093, 0x40000094, 0x40000095, 0x40000096, 2027 0x40000097, 0x40000098, 0x40000099, 0x4000009a, 0x4000009b, 0x4000009c, 0x4000009d, 2028 0x4000009e, 0x4000009f, 0x400000b0, 0x400000b1, 0x400000b2, 0x400000b3, 0x400000b4, 2029 0x400000b5, 0x400000b6, 0x400000b7, 2030 ]; 2031 for index in hyperv_synic_msrs { 2032 let msr = kvm_msr_entry { 2033 index, 2034 ..Default::default() 2035 }; 2036 msr_entries.push(msr.into()); 2037 } 2038 } 2039 2040 let expected_num_msrs = msr_entries.len(); 2041 let num_msrs = self.get_msrs(&mut msr_entries)?; 2042 let msrs = if num_msrs != expected_num_msrs { 2043 let mut faulty_msr_index = num_msrs; 2044 let mut msr_entries_tmp = msr_entries[..faulty_msr_index].to_vec(); 2045 2046 loop { 2047 warn!( 2048 "Detected faulty MSR 0x{:x} while getting MSRs", 2049 msr_entries[faulty_msr_index].index 2050 ); 2051 2052 // Skip the first bad MSR 2053 let start_pos = faulty_msr_index + 1; 2054 2055 let mut sub_msr_entries = msr_entries[start_pos..].to_vec(); 2056 let num_msrs = self.get_msrs(&mut sub_msr_entries)?; 2057 2058 msr_entries_tmp.extend(&sub_msr_entries[..num_msrs]); 2059 2060 if num_msrs == sub_msr_entries.len() { 2061 break; 2062 } 2063 2064 faulty_msr_index = start_pos + num_msrs; 2065 } 2066 2067 msr_entries_tmp 2068 } else { 2069 msr_entries 2070 }; 2071 2072 let vcpu_events = self.get_vcpu_events()?; 2073 let tsc_khz = self.tsc_khz()?; 2074 2075 Ok(VcpuKvmState { 2076 cpuid, 2077 msrs, 2078 vcpu_events, 2079 regs: regs.into(), 2080 sregs: sregs.into(), 2081 fpu, 2082 lapic_state, 2083 xsave, 2084 xcrs, 2085 mp_state, 2086 tsc_khz, 2087 } 2088 .into()) 2089 } 2090 2091 /// 2092 /// Get the current AArch64 CPU state 2093 /// 2094 #[cfg(target_arch = "aarch64")] 2095 fn state(&self) -> cpu::Result<CpuState> { 2096 let mut state = VcpuKvmState { 2097 mp_state: self.get_mp_state()?.into(), 2098 ..Default::default() 2099 }; 2100 // Get core registers 2101 state.core_regs = self.get_regs()?; 2102 2103 // Get systerm register 2104 // Call KVM_GET_REG_LIST to get all registers available to the guest. 2105 // For ArmV8 there are around 500 registers. 2106 let mut sys_regs: Vec<Register> = Vec::new(); 2107 let mut reg_list = RegList::new(500).unwrap(); 2108 self.fd 2109 .lock() 2110 .unwrap() 2111 .get_reg_list(&mut reg_list) 2112 .map_err(|e| cpu::HypervisorCpuError::GetRegList(e.into()))?; 2113 2114 // At this point reg_list should contain: core registers and system 2115 // registers. 2116 // The register list contains the number of registers and their ids. We 2117 // will be needing to call KVM_GET_ONE_REG on each id in order to save 2118 // all of them. We carve out from the list the core registers which are 2119 // represented in the kernel by kvm_regs structure and for which we can 2120 // calculate the id based on the offset in the structure. 2121 reg_list.retain(|regid| is_system_register(*regid)); 2122 2123 // Now, for the rest of the registers left in the previously fetched 2124 // register list, we are simply calling KVM_GET_ONE_REG. 2125 let indices = reg_list.as_slice(); 2126 for index in indices.iter() { 2127 let mut bytes = [0_u8; 8]; 2128 self.fd 2129 .lock() 2130 .unwrap() 2131 .get_one_reg(*index, &mut bytes) 2132 .map_err(|e| cpu::HypervisorCpuError::GetSysRegister(e.into()))?; 2133 sys_regs.push(kvm_bindings::kvm_one_reg { 2134 id: *index, 2135 addr: u64::from_le_bytes(bytes), 2136 }); 2137 } 2138 2139 state.sys_regs = sys_regs; 2140 2141 Ok(state.into()) 2142 } 2143 2144 #[cfg(target_arch = "x86_64")] 2145 /// 2146 /// Restore the previously saved CPU state 2147 /// 2148 /// Ordering requirements: 2149 /// 2150 /// KVM_GET_VCPU_EVENTS/KVM_SET_VCPU_EVENTS is unsafe if other vCPUs are 2151 /// still running. 2152 /// 2153 /// Some SET ioctls (like set_mp_state) depend on kvm_vcpu_is_bsp(), so 2154 /// if we ever change the BSP, we have to do that before restoring anything. 2155 /// The same seems to be true for CPUID stuff. 2156 /// 2157 /// SREGS saves/restores a pending interrupt, similar to what 2158 /// VCPU_EVENTS also does. 2159 /// 2160 /// SET_REGS clears pending exceptions unconditionally, thus, it must be 2161 /// done before SET_VCPU_EVENTS, which restores it. 2162 /// 2163 /// SET_LAPIC must come after SET_SREGS, because the latter restores 2164 /// the apic base msr. 2165 /// 2166 /// SET_LAPIC must come before SET_MSRS, because the TSC deadline MSR 2167 /// only restores successfully, when the LAPIC is correctly configured. 2168 /// 2169 /// Arguments: CpuState 2170 /// # Example 2171 /// 2172 /// ```rust 2173 /// # use hypervisor::kvm::KvmHypervisor; 2174 /// # use std::sync::Arc; 2175 /// let kvm = KvmHypervisor::new().unwrap(); 2176 /// let hv = Arc::new(kvm); 2177 /// let vm = hv.create_vm().expect("new VM fd creation failed"); 2178 /// vm.enable_split_irq().unwrap(); 2179 /// let vcpu = vm.create_vcpu(0, None).unwrap(); 2180 /// let state = vcpu.state().unwrap(); 2181 /// vcpu.set_state(&state).unwrap(); 2182 /// ``` 2183 fn set_state(&self, state: &CpuState) -> cpu::Result<()> { 2184 let state: VcpuKvmState = state.clone().into(); 2185 self.set_cpuid2(&state.cpuid)?; 2186 self.set_mp_state(state.mp_state.into())?; 2187 self.set_regs(&state.regs.into())?; 2188 self.set_sregs(&state.sregs.into())?; 2189 self.set_xsave(&state.xsave)?; 2190 self.set_xcrs(&state.xcrs)?; 2191 self.set_lapic(&state.lapic_state)?; 2192 self.set_fpu(&state.fpu)?; 2193 2194 if let Some(freq) = state.tsc_khz { 2195 self.set_tsc_khz(freq)?; 2196 } 2197 2198 // Try to set all MSRs previously stored. 2199 // If the number of MSRs set from SET_MSRS is different from the 2200 // expected amount, we fallback onto a slower method by setting MSRs 2201 // by chunks. This is the only way to make sure we try to set as many 2202 // MSRs as possible, even if some MSRs are not supported. 2203 let expected_num_msrs = state.msrs.len(); 2204 let num_msrs = self.set_msrs(&state.msrs)?; 2205 if num_msrs != expected_num_msrs { 2206 let mut faulty_msr_index = num_msrs; 2207 2208 loop { 2209 warn!( 2210 "Detected faulty MSR 0x{:x} while setting MSRs", 2211 state.msrs[faulty_msr_index].index 2212 ); 2213 2214 // Skip the first bad MSR 2215 let start_pos = faulty_msr_index + 1; 2216 2217 let sub_msr_entries = state.msrs[start_pos..].to_vec(); 2218 2219 let num_msrs = self.set_msrs(&sub_msr_entries)?; 2220 2221 if num_msrs == sub_msr_entries.len() { 2222 break; 2223 } 2224 2225 faulty_msr_index = start_pos + num_msrs; 2226 } 2227 } 2228 2229 self.set_vcpu_events(&state.vcpu_events)?; 2230 2231 Ok(()) 2232 } 2233 2234 /// 2235 /// Restore the previously saved AArch64 CPU state 2236 /// 2237 #[cfg(target_arch = "aarch64")] 2238 fn set_state(&self, state: &CpuState) -> cpu::Result<()> { 2239 let state: VcpuKvmState = state.clone().into(); 2240 // Set core registers 2241 self.set_regs(&state.core_regs)?; 2242 // Set system registers 2243 for reg in &state.sys_regs { 2244 self.fd 2245 .lock() 2246 .unwrap() 2247 .set_one_reg(reg.id, ®.addr.to_le_bytes()) 2248 .map_err(|e| cpu::HypervisorCpuError::SetSysRegister(e.into()))?; 2249 } 2250 2251 self.set_mp_state(state.mp_state.into())?; 2252 2253 Ok(()) 2254 } 2255 2256 /// 2257 /// Initialize TDX for this CPU 2258 /// 2259 #[cfg(feature = "tdx")] 2260 fn tdx_init(&self, hob_address: u64) -> cpu::Result<()> { 2261 tdx_command( 2262 &self.fd.lock().unwrap().as_raw_fd(), 2263 TdxCommand::InitVcpu, 2264 0, 2265 hob_address, 2266 ) 2267 .map_err(cpu::HypervisorCpuError::InitializeTdx) 2268 } 2269 2270 /// 2271 /// Set the "immediate_exit" state 2272 /// 2273 fn set_immediate_exit(&self, exit: bool) { 2274 self.fd.lock().unwrap().set_kvm_immediate_exit(exit.into()); 2275 } 2276 2277 /// 2278 /// Returns the details about TDX exit reason 2279 /// 2280 #[cfg(feature = "tdx")] 2281 fn get_tdx_exit_details(&mut self) -> cpu::Result<TdxExitDetails> { 2282 let mut fd = self.fd.as_ref().lock().unwrap(); 2283 let kvm_run = fd.get_kvm_run(); 2284 // SAFETY: accessing a union field in a valid structure 2285 let tdx_vmcall = unsafe { 2286 &mut (*((&mut kvm_run.__bindgen_anon_1) as *mut kvm_run__bindgen_ty_1 2287 as *mut KvmTdxExit)) 2288 .u 2289 .vmcall 2290 }; 2291 2292 tdx_vmcall.status_code = TDG_VP_VMCALL_INVALID_OPERAND; 2293 2294 if tdx_vmcall.type_ != 0 { 2295 return Err(cpu::HypervisorCpuError::UnknownTdxVmCall); 2296 } 2297 2298 match tdx_vmcall.subfunction { 2299 TDG_VP_VMCALL_GET_QUOTE => Ok(TdxExitDetails::GetQuote), 2300 TDG_VP_VMCALL_SETUP_EVENT_NOTIFY_INTERRUPT => { 2301 Ok(TdxExitDetails::SetupEventNotifyInterrupt) 2302 } 2303 _ => Err(cpu::HypervisorCpuError::UnknownTdxVmCall), 2304 } 2305 } 2306 2307 /// 2308 /// Set the status code for TDX exit 2309 /// 2310 #[cfg(feature = "tdx")] 2311 fn set_tdx_status(&mut self, status: TdxExitStatus) { 2312 let mut fd = self.fd.as_ref().lock().unwrap(); 2313 let kvm_run = fd.get_kvm_run(); 2314 // SAFETY: accessing a union field in a valid structure 2315 let tdx_vmcall = unsafe { 2316 &mut (*((&mut kvm_run.__bindgen_anon_1) as *mut kvm_run__bindgen_ty_1 2317 as *mut KvmTdxExit)) 2318 .u 2319 .vmcall 2320 }; 2321 2322 tdx_vmcall.status_code = match status { 2323 TdxExitStatus::Success => TDG_VP_VMCALL_SUCCESS, 2324 TdxExitStatus::InvalidOperand => TDG_VP_VMCALL_INVALID_OPERAND, 2325 }; 2326 } 2327 2328 #[cfg(target_arch = "x86_64")] 2329 /// 2330 /// Return the list of initial MSR entries for a VCPU 2331 /// 2332 fn boot_msr_entries(&self) -> Vec<MsrEntry> { 2333 use crate::arch::x86::{msr_index, MTRR_ENABLE, MTRR_MEM_TYPE_WB}; 2334 2335 [ 2336 msr!(msr_index::MSR_IA32_SYSENTER_CS), 2337 msr!(msr_index::MSR_IA32_SYSENTER_ESP), 2338 msr!(msr_index::MSR_IA32_SYSENTER_EIP), 2339 msr!(msr_index::MSR_STAR), 2340 msr!(msr_index::MSR_CSTAR), 2341 msr!(msr_index::MSR_LSTAR), 2342 msr!(msr_index::MSR_KERNEL_GS_BASE), 2343 msr!(msr_index::MSR_SYSCALL_MASK), 2344 msr!(msr_index::MSR_IA32_TSC), 2345 msr_data!( 2346 msr_index::MSR_IA32_MISC_ENABLE, 2347 msr_index::MSR_IA32_MISC_ENABLE_FAST_STRING as u64 2348 ), 2349 msr_data!(msr_index::MSR_MTRRdefType, MTRR_ENABLE | MTRR_MEM_TYPE_WB), 2350 ] 2351 .to_vec() 2352 } 2353 2354 #[cfg(target_arch = "aarch64")] 2355 fn has_pmu_support(&self) -> bool { 2356 let cpu_attr = kvm_bindings::kvm_device_attr { 2357 group: kvm_bindings::KVM_ARM_VCPU_PMU_V3_CTRL, 2358 attr: u64::from(kvm_bindings::KVM_ARM_VCPU_PMU_V3_INIT), 2359 addr: 0x0, 2360 flags: 0, 2361 }; 2362 self.fd.lock().unwrap().has_device_attr(&cpu_attr).is_ok() 2363 } 2364 2365 #[cfg(target_arch = "aarch64")] 2366 fn init_pmu(&self, irq: u32) -> cpu::Result<()> { 2367 let cpu_attr = kvm_bindings::kvm_device_attr { 2368 group: kvm_bindings::KVM_ARM_VCPU_PMU_V3_CTRL, 2369 attr: u64::from(kvm_bindings::KVM_ARM_VCPU_PMU_V3_INIT), 2370 addr: 0x0, 2371 flags: 0, 2372 }; 2373 let cpu_attr_irq = kvm_bindings::kvm_device_attr { 2374 group: kvm_bindings::KVM_ARM_VCPU_PMU_V3_CTRL, 2375 attr: u64::from(kvm_bindings::KVM_ARM_VCPU_PMU_V3_IRQ), 2376 addr: &irq as *const u32 as u64, 2377 flags: 0, 2378 }; 2379 self.fd 2380 .lock() 2381 .unwrap() 2382 .set_device_attr(&cpu_attr_irq) 2383 .map_err(|_| cpu::HypervisorCpuError::InitializePmu)?; 2384 self.fd 2385 .lock() 2386 .unwrap() 2387 .set_device_attr(&cpu_attr) 2388 .map_err(|_| cpu::HypervisorCpuError::InitializePmu) 2389 } 2390 2391 #[cfg(target_arch = "x86_64")] 2392 /// 2393 /// Get the frequency of the TSC if available 2394 /// 2395 fn tsc_khz(&self) -> cpu::Result<Option<u32>> { 2396 match self.fd.lock().unwrap().get_tsc_khz() { 2397 Err(e) => { 2398 if e.errno() == libc::EIO { 2399 Ok(None) 2400 } else { 2401 Err(cpu::HypervisorCpuError::GetTscKhz(e.into())) 2402 } 2403 } 2404 Ok(v) => Ok(Some(v)), 2405 } 2406 } 2407 2408 #[cfg(target_arch = "x86_64")] 2409 /// 2410 /// Set the frequency of the TSC if available 2411 /// 2412 fn set_tsc_khz(&self, freq: u32) -> cpu::Result<()> { 2413 match self.fd.lock().unwrap().set_tsc_khz(freq) { 2414 Err(e) => { 2415 if e.errno() == libc::EIO { 2416 Ok(()) 2417 } else { 2418 Err(cpu::HypervisorCpuError::SetTscKhz(e.into())) 2419 } 2420 } 2421 Ok(_) => Ok(()), 2422 } 2423 } 2424 2425 #[cfg(target_arch = "x86_64")] 2426 /// 2427 /// Trigger NMI interrupt 2428 /// 2429 fn nmi(&self) -> cpu::Result<()> { 2430 match self.fd.lock().unwrap().nmi() { 2431 Err(e) => { 2432 if e.errno() == libc::EIO { 2433 Ok(()) 2434 } else { 2435 Err(cpu::HypervisorCpuError::Nmi(e.into())) 2436 } 2437 } 2438 Ok(_) => Ok(()), 2439 } 2440 } 2441 } 2442 2443 impl KvmVcpu { 2444 #[cfg(target_arch = "x86_64")] 2445 /// 2446 /// X86 specific call that returns the vcpu's current "xsave struct". 2447 /// 2448 fn get_xsave(&self) -> cpu::Result<XsaveState> { 2449 Ok(self 2450 .fd 2451 .lock() 2452 .unwrap() 2453 .get_xsave() 2454 .map_err(|e| cpu::HypervisorCpuError::GetXsaveState(e.into()))? 2455 .into()) 2456 } 2457 2458 #[cfg(target_arch = "x86_64")] 2459 /// 2460 /// X86 specific call that sets the vcpu's current "xsave struct". 2461 /// 2462 fn set_xsave(&self, xsave: &XsaveState) -> cpu::Result<()> { 2463 let xsave: kvm_bindings::kvm_xsave = (*xsave).clone().into(); 2464 self.fd 2465 .lock() 2466 .unwrap() 2467 .set_xsave(&xsave) 2468 .map_err(|e| cpu::HypervisorCpuError::SetXsaveState(e.into())) 2469 } 2470 2471 #[cfg(target_arch = "x86_64")] 2472 /// 2473 /// X86 specific call that returns the vcpu's current "xcrs". 2474 /// 2475 fn get_xcrs(&self) -> cpu::Result<ExtendedControlRegisters> { 2476 self.fd 2477 .lock() 2478 .unwrap() 2479 .get_xcrs() 2480 .map_err(|e| cpu::HypervisorCpuError::GetXcsr(e.into())) 2481 } 2482 2483 #[cfg(target_arch = "x86_64")] 2484 /// 2485 /// X86 specific call that sets the vcpu's current "xcrs". 2486 /// 2487 fn set_xcrs(&self, xcrs: &ExtendedControlRegisters) -> cpu::Result<()> { 2488 self.fd 2489 .lock() 2490 .unwrap() 2491 .set_xcrs(xcrs) 2492 .map_err(|e| cpu::HypervisorCpuError::SetXcsr(e.into())) 2493 } 2494 2495 #[cfg(target_arch = "x86_64")] 2496 /// 2497 /// Returns currently pending exceptions, interrupts, and NMIs as well as related 2498 /// states of the vcpu. 2499 /// 2500 fn get_vcpu_events(&self) -> cpu::Result<VcpuEvents> { 2501 self.fd 2502 .lock() 2503 .unwrap() 2504 .get_vcpu_events() 2505 .map_err(|e| cpu::HypervisorCpuError::GetVcpuEvents(e.into())) 2506 } 2507 2508 #[cfg(target_arch = "x86_64")] 2509 /// 2510 /// Sets pending exceptions, interrupts, and NMIs as well as related states 2511 /// of the vcpu. 2512 /// 2513 fn set_vcpu_events(&self, events: &VcpuEvents) -> cpu::Result<()> { 2514 self.fd 2515 .lock() 2516 .unwrap() 2517 .set_vcpu_events(events) 2518 .map_err(|e| cpu::HypervisorCpuError::SetVcpuEvents(e.into())) 2519 } 2520 } 2521