1 // Copyright © 2019 Intel Corporation 2 // 3 // SPDX-License-Identifier: Apache-2.0 OR BSD-3-Clause 4 // 5 // Copyright © 2020, Microsoft Corporation 6 // 7 // Copyright 2018-2019 CrowdStrike, Inc. 8 // 9 // 10 11 #[cfg(target_arch = "aarch64")] 12 use crate::aarch64::gic::KvmGicV3Its; 13 #[cfg(target_arch = "aarch64")] 14 pub use crate::aarch64::{ 15 check_required_kvm_extensions, gic::Gicv3ItsState as GicState, is_system_register, VcpuInit, 16 VcpuKvmState, 17 }; 18 #[cfg(target_arch = "aarch64")] 19 use crate::arch::aarch64::gic::{Vgic, VgicConfig}; 20 use crate::cpu; 21 use crate::hypervisor; 22 use crate::vec_with_array_field; 23 use crate::vm::{self, InterruptSourceConfig, VmOps}; 24 use crate::HypervisorType; 25 #[cfg(target_arch = "aarch64")] 26 use crate::{arm64_core_reg_id, offset_of}; 27 use kvm_ioctls::{NoDatamatch, VcpuFd, VmFd}; 28 use std::any::Any; 29 use std::collections::HashMap; 30 #[cfg(target_arch = "x86_64")] 31 use std::fs::File; 32 #[cfg(target_arch = "x86_64")] 33 use std::os::unix::io::AsRawFd; 34 #[cfg(feature = "tdx")] 35 use std::os::unix::io::RawFd; 36 use std::result; 37 #[cfg(target_arch = "x86_64")] 38 use std::sync::atomic::{AtomicBool, Ordering}; 39 use std::sync::Mutex; 40 use std::sync::{Arc, RwLock}; 41 use vmm_sys_util::eventfd::EventFd; 42 // x86_64 dependencies 43 #[cfg(target_arch = "x86_64")] 44 pub mod x86_64; 45 #[cfg(target_arch = "x86_64")] 46 use crate::arch::x86::{ 47 CpuIdEntry, FpuState, LapicState, MsrEntry, SpecialRegisters, XsaveState, NUM_IOAPIC_PINS, 48 }; 49 #[cfg(target_arch = "x86_64")] 50 use crate::ClockData; 51 #[cfg(target_arch = "x86_64")] 52 use crate::StandardRegisters; 53 use crate::{ 54 CpuState, IoEventAddress, IrqRoutingEntry, MpState, UserMemoryRegion, 55 USER_MEMORY_REGION_LOG_DIRTY, USER_MEMORY_REGION_READ, USER_MEMORY_REGION_WRITE, 56 }; 57 #[cfg(target_arch = "aarch64")] 58 use aarch64::{RegList, Register, StandardRegisters}; 59 #[cfg(target_arch = "x86_64")] 60 use kvm_bindings::{ 61 kvm_enable_cap, kvm_msr_entry, MsrList, KVM_CAP_HYPERV_SYNIC, KVM_CAP_SPLIT_IRQCHIP, 62 KVM_GUESTDBG_USE_HW_BP, 63 }; 64 #[cfg(target_arch = "x86_64")] 65 use x86_64::check_required_kvm_extensions; 66 #[cfg(target_arch = "x86_64")] 67 pub use x86_64::{CpuId, ExtendedControlRegisters, MsrEntries, VcpuKvmState}; 68 // aarch64 dependencies 69 #[cfg(target_arch = "aarch64")] 70 pub mod aarch64; 71 pub use kvm_bindings; 72 pub use kvm_bindings::{ 73 kvm_clock_data, kvm_create_device, kvm_device_type_KVM_DEV_TYPE_VFIO, kvm_guest_debug, 74 kvm_irq_routing, kvm_irq_routing_entry, kvm_mp_state, kvm_userspace_memory_region, 75 KVM_GUESTDBG_ENABLE, KVM_GUESTDBG_SINGLESTEP, KVM_IRQ_ROUTING_IRQCHIP, KVM_IRQ_ROUTING_MSI, 76 KVM_MEM_LOG_DIRTY_PAGES, KVM_MEM_READONLY, KVM_MSI_VALID_DEVID, 77 }; 78 #[cfg(target_arch = "aarch64")] 79 use kvm_bindings::{ 80 kvm_regs, user_fpsimd_state, user_pt_regs, KVM_GUESTDBG_USE_HW, KVM_NR_SPSR, KVM_REG_ARM64, 81 KVM_REG_ARM64_SYSREG, KVM_REG_ARM64_SYSREG_CRM_MASK, KVM_REG_ARM64_SYSREG_CRN_MASK, 82 KVM_REG_ARM64_SYSREG_OP0_MASK, KVM_REG_ARM64_SYSREG_OP1_MASK, KVM_REG_ARM64_SYSREG_OP2_MASK, 83 KVM_REG_ARM_CORE, KVM_REG_SIZE_U128, KVM_REG_SIZE_U32, KVM_REG_SIZE_U64, 84 }; 85 #[cfg(feature = "tdx")] 86 use kvm_bindings::{kvm_run__bindgen_ty_1, KVMIO}; 87 pub use kvm_ioctls; 88 pub use kvm_ioctls::{Cap, Kvm}; 89 #[cfg(target_arch = "aarch64")] 90 use std::mem; 91 use thiserror::Error; 92 use vfio_ioctls::VfioDeviceFd; 93 #[cfg(feature = "tdx")] 94 use vmm_sys_util::{ioctl::ioctl_with_val, ioctl_ioc_nr, ioctl_iowr_nr}; 95 /// 96 /// Export generically-named wrappers of kvm-bindings for Unix-based platforms 97 /// 98 pub use { 99 kvm_bindings::kvm_create_device as CreateDevice, kvm_bindings::kvm_device_attr as DeviceAttr, 100 kvm_bindings::kvm_run, kvm_bindings::kvm_vcpu_events as VcpuEvents, kvm_ioctls::VcpuExit, 101 }; 102 103 #[cfg(target_arch = "x86_64")] 104 const KVM_CAP_SGX_ATTRIBUTE: u32 = 196; 105 106 #[cfg(target_arch = "x86_64")] 107 use vmm_sys_util::ioctl_io_nr; 108 109 #[cfg(all(not(feature = "tdx"), target_arch = "x86_64"))] 110 use vmm_sys_util::ioctl_ioc_nr; 111 112 #[cfg(target_arch = "x86_64")] 113 ioctl_io_nr!(KVM_NMI, kvm_bindings::KVMIO, 0x9a); 114 115 #[cfg(feature = "tdx")] 116 const KVM_EXIT_TDX: u32 = 50; 117 #[cfg(feature = "tdx")] 118 const TDG_VP_VMCALL_GET_QUOTE: u64 = 0x10002; 119 #[cfg(feature = "tdx")] 120 const TDG_VP_VMCALL_SETUP_EVENT_NOTIFY_INTERRUPT: u64 = 0x10004; 121 #[cfg(feature = "tdx")] 122 const TDG_VP_VMCALL_SUCCESS: u64 = 0; 123 #[cfg(feature = "tdx")] 124 const TDG_VP_VMCALL_INVALID_OPERAND: u64 = 0x8000000000000000; 125 126 #[cfg(feature = "tdx")] 127 ioctl_iowr_nr!(KVM_MEMORY_ENCRYPT_OP, KVMIO, 0xba, std::os::raw::c_ulong); 128 129 #[cfg(feature = "tdx")] 130 #[repr(u32)] 131 enum TdxCommand { 132 Capabilities = 0, 133 InitVm, 134 InitVcpu, 135 InitMemRegion, 136 Finalize, 137 } 138 139 #[cfg(feature = "tdx")] 140 pub enum TdxExitDetails { 141 GetQuote, 142 SetupEventNotifyInterrupt, 143 } 144 145 #[cfg(feature = "tdx")] 146 pub enum TdxExitStatus { 147 Success, 148 InvalidOperand, 149 } 150 151 #[cfg(feature = "tdx")] 152 const TDX_MAX_NR_CPUID_CONFIGS: usize = 6; 153 154 #[cfg(feature = "tdx")] 155 #[repr(C)] 156 #[derive(Debug, Default)] 157 pub struct TdxCpuidConfig { 158 pub leaf: u32, 159 pub sub_leaf: u32, 160 pub eax: u32, 161 pub ebx: u32, 162 pub ecx: u32, 163 pub edx: u32, 164 } 165 166 #[cfg(feature = "tdx")] 167 #[repr(C)] 168 #[derive(Debug, Default)] 169 pub struct TdxCapabilities { 170 pub attrs_fixed0: u64, 171 pub attrs_fixed1: u64, 172 pub xfam_fixed0: u64, 173 pub xfam_fixed1: u64, 174 pub nr_cpuid_configs: u32, 175 pub padding: u32, 176 pub cpuid_configs: [TdxCpuidConfig; TDX_MAX_NR_CPUID_CONFIGS], 177 } 178 179 #[cfg(feature = "tdx")] 180 #[derive(Copy, Clone)] 181 pub struct KvmTdxExit { 182 pub type_: u32, 183 pub pad: u32, 184 pub u: KvmTdxExitU, 185 } 186 187 #[cfg(feature = "tdx")] 188 #[repr(C)] 189 #[derive(Copy, Clone)] 190 pub union KvmTdxExitU { 191 pub vmcall: KvmTdxExitVmcall, 192 } 193 194 #[cfg(feature = "tdx")] 195 #[repr(C)] 196 #[derive(Debug, Default, Copy, Clone, PartialEq)] 197 pub struct KvmTdxExitVmcall { 198 pub type_: u64, 199 pub subfunction: u64, 200 pub reg_mask: u64, 201 pub in_r12: u64, 202 pub in_r13: u64, 203 pub in_r14: u64, 204 pub in_r15: u64, 205 pub in_rbx: u64, 206 pub in_rdi: u64, 207 pub in_rsi: u64, 208 pub in_r8: u64, 209 pub in_r9: u64, 210 pub in_rdx: u64, 211 pub status_code: u64, 212 pub out_r11: u64, 213 pub out_r12: u64, 214 pub out_r13: u64, 215 pub out_r14: u64, 216 pub out_r15: u64, 217 pub out_rbx: u64, 218 pub out_rdi: u64, 219 pub out_rsi: u64, 220 pub out_r8: u64, 221 pub out_r9: u64, 222 pub out_rdx: u64, 223 } 224 225 impl From<kvm_userspace_memory_region> for UserMemoryRegion { 226 fn from(region: kvm_userspace_memory_region) -> Self { 227 let mut flags = USER_MEMORY_REGION_READ; 228 if region.flags & KVM_MEM_READONLY == 0 { 229 flags |= USER_MEMORY_REGION_WRITE; 230 } 231 if region.flags & KVM_MEM_LOG_DIRTY_PAGES != 0 { 232 flags |= USER_MEMORY_REGION_LOG_DIRTY; 233 } 234 235 UserMemoryRegion { 236 slot: region.slot, 237 guest_phys_addr: region.guest_phys_addr, 238 memory_size: region.memory_size, 239 userspace_addr: region.userspace_addr, 240 flags, 241 } 242 } 243 } 244 245 impl From<UserMemoryRegion> for kvm_userspace_memory_region { 246 fn from(region: UserMemoryRegion) -> Self { 247 assert!( 248 region.flags & USER_MEMORY_REGION_READ != 0, 249 "KVM mapped memory is always readable" 250 ); 251 252 let mut flags = 0; 253 if region.flags & USER_MEMORY_REGION_WRITE == 0 { 254 flags |= KVM_MEM_READONLY; 255 } 256 if region.flags & USER_MEMORY_REGION_LOG_DIRTY != 0 { 257 flags |= KVM_MEM_LOG_DIRTY_PAGES; 258 } 259 260 kvm_userspace_memory_region { 261 slot: region.slot, 262 guest_phys_addr: region.guest_phys_addr, 263 memory_size: region.memory_size, 264 userspace_addr: region.userspace_addr, 265 flags, 266 } 267 } 268 } 269 270 impl From<kvm_mp_state> for MpState { 271 fn from(s: kvm_mp_state) -> Self { 272 MpState::Kvm(s) 273 } 274 } 275 276 impl From<MpState> for kvm_mp_state { 277 fn from(ms: MpState) -> Self { 278 match ms { 279 MpState::Kvm(s) => s, 280 /* Needed in case other hypervisors are enabled */ 281 #[allow(unreachable_patterns)] 282 _ => panic!("CpuState is not valid"), 283 } 284 } 285 } 286 287 impl From<kvm_ioctls::IoEventAddress> for IoEventAddress { 288 fn from(a: kvm_ioctls::IoEventAddress) -> Self { 289 match a { 290 kvm_ioctls::IoEventAddress::Pio(x) => Self::Pio(x), 291 kvm_ioctls::IoEventAddress::Mmio(x) => Self::Mmio(x), 292 } 293 } 294 } 295 296 impl From<IoEventAddress> for kvm_ioctls::IoEventAddress { 297 fn from(a: IoEventAddress) -> Self { 298 match a { 299 IoEventAddress::Pio(x) => Self::Pio(x), 300 IoEventAddress::Mmio(x) => Self::Mmio(x), 301 } 302 } 303 } 304 305 impl From<VcpuKvmState> for CpuState { 306 fn from(s: VcpuKvmState) -> Self { 307 CpuState::Kvm(s) 308 } 309 } 310 311 impl From<CpuState> for VcpuKvmState { 312 fn from(s: CpuState) -> Self { 313 match s { 314 CpuState::Kvm(s) => s, 315 /* Needed in case other hypervisors are enabled */ 316 #[allow(unreachable_patterns)] 317 _ => panic!("CpuState is not valid"), 318 } 319 } 320 } 321 322 #[cfg(target_arch = "x86_64")] 323 impl From<kvm_clock_data> for ClockData { 324 fn from(d: kvm_clock_data) -> Self { 325 ClockData::Kvm(d) 326 } 327 } 328 329 #[cfg(target_arch = "x86_64")] 330 impl From<ClockData> for kvm_clock_data { 331 fn from(ms: ClockData) -> Self { 332 match ms { 333 ClockData::Kvm(s) => s, 334 /* Needed in case other hypervisors are enabled */ 335 #[allow(unreachable_patterns)] 336 _ => panic!("CpuState is not valid"), 337 } 338 } 339 } 340 341 impl From<kvm_bindings::kvm_regs> for crate::StandardRegisters { 342 fn from(s: kvm_bindings::kvm_regs) -> Self { 343 crate::StandardRegisters::Kvm(s) 344 } 345 } 346 347 impl From<crate::StandardRegisters> for kvm_bindings::kvm_regs { 348 fn from(e: crate::StandardRegisters) -> Self { 349 match e { 350 crate::StandardRegisters::Kvm(e) => e, 351 /* Needed in case other hypervisors are enabled */ 352 #[allow(unreachable_patterns)] 353 _ => panic!("StandardRegisters are not valid"), 354 } 355 } 356 } 357 358 impl From<kvm_irq_routing_entry> for IrqRoutingEntry { 359 fn from(s: kvm_irq_routing_entry) -> Self { 360 IrqRoutingEntry::Kvm(s) 361 } 362 } 363 364 impl From<IrqRoutingEntry> for kvm_irq_routing_entry { 365 fn from(e: IrqRoutingEntry) -> Self { 366 match e { 367 IrqRoutingEntry::Kvm(e) => e, 368 /* Needed in case other hypervisors are enabled */ 369 #[allow(unreachable_patterns)] 370 _ => panic!("IrqRoutingEntry is not valid"), 371 } 372 } 373 } 374 375 struct KvmDirtyLogSlot { 376 slot: u32, 377 guest_phys_addr: u64, 378 memory_size: u64, 379 userspace_addr: u64, 380 } 381 382 /// Wrapper over KVM VM ioctls. 383 pub struct KvmVm { 384 fd: Arc<VmFd>, 385 #[cfg(target_arch = "x86_64")] 386 msrs: Vec<MsrEntry>, 387 dirty_log_slots: Arc<RwLock<HashMap<u32, KvmDirtyLogSlot>>>, 388 } 389 390 impl KvmVm { 391 /// 392 /// Creates an emulated device in the kernel. 393 /// 394 /// See the documentation for `KVM_CREATE_DEVICE`. 395 fn create_device(&self, device: &mut CreateDevice) -> vm::Result<vfio_ioctls::VfioDeviceFd> { 396 let device_fd = self 397 .fd 398 .create_device(device) 399 .map_err(|e| vm::HypervisorVmError::CreateDevice(e.into()))?; 400 Ok(VfioDeviceFd::new_from_kvm(device_fd)) 401 } 402 /// Checks if a particular `Cap` is available. 403 pub fn check_extension(&self, c: Cap) -> bool { 404 self.fd.check_extension(c) 405 } 406 } 407 408 /// Implementation of Vm trait for KVM 409 /// 410 /// # Examples 411 /// 412 /// ``` 413 /// # use hypervisor::kvm::KvmHypervisor; 414 /// # use std::sync::Arc; 415 /// let kvm = KvmHypervisor::new().unwrap(); 416 /// let hypervisor = Arc::new(kvm); 417 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed"); 418 /// ``` 419 impl vm::Vm for KvmVm { 420 #[cfg(target_arch = "x86_64")] 421 /// 422 /// Sets the address of the one-page region in the VM's address space. 423 /// 424 fn set_identity_map_address(&self, address: u64) -> vm::Result<()> { 425 self.fd 426 .set_identity_map_address(address) 427 .map_err(|e| vm::HypervisorVmError::SetIdentityMapAddress(e.into())) 428 } 429 430 #[cfg(target_arch = "x86_64")] 431 /// 432 /// Sets the address of the three-page region in the VM's address space. 433 /// 434 fn set_tss_address(&self, offset: usize) -> vm::Result<()> { 435 self.fd 436 .set_tss_address(offset) 437 .map_err(|e| vm::HypervisorVmError::SetTssAddress(e.into())) 438 } 439 440 /// 441 /// Creates an in-kernel interrupt controller. 442 /// 443 fn create_irq_chip(&self) -> vm::Result<()> { 444 self.fd 445 .create_irq_chip() 446 .map_err(|e| vm::HypervisorVmError::CreateIrq(e.into())) 447 } 448 449 /// 450 /// Registers an event that will, when signaled, trigger the `gsi` IRQ. 451 /// 452 fn register_irqfd(&self, fd: &EventFd, gsi: u32) -> vm::Result<()> { 453 self.fd 454 .register_irqfd(fd, gsi) 455 .map_err(|e| vm::HypervisorVmError::RegisterIrqFd(e.into())) 456 } 457 458 /// 459 /// Unregisters an event that will, when signaled, trigger the `gsi` IRQ. 460 /// 461 fn unregister_irqfd(&self, fd: &EventFd, gsi: u32) -> vm::Result<()> { 462 self.fd 463 .unregister_irqfd(fd, gsi) 464 .map_err(|e| vm::HypervisorVmError::UnregisterIrqFd(e.into())) 465 } 466 467 /// 468 /// Creates a VcpuFd object from a vcpu RawFd. 469 /// 470 fn create_vcpu( 471 &self, 472 id: u8, 473 vm_ops: Option<Arc<dyn VmOps>>, 474 ) -> vm::Result<Arc<dyn cpu::Vcpu>> { 475 let fd = self 476 .fd 477 .create_vcpu(id as u64) 478 .map_err(|e| vm::HypervisorVmError::CreateVcpu(e.into()))?; 479 let vcpu = KvmVcpu { 480 fd: Arc::new(Mutex::new(fd)), 481 #[cfg(target_arch = "x86_64")] 482 msrs: self.msrs.clone(), 483 vm_ops, 484 #[cfg(target_arch = "x86_64")] 485 hyperv_synic: AtomicBool::new(false), 486 }; 487 Ok(Arc::new(vcpu)) 488 } 489 490 #[cfg(target_arch = "aarch64")] 491 /// 492 /// Creates a virtual GIC device. 493 /// 494 fn create_vgic(&self, config: VgicConfig) -> vm::Result<Arc<Mutex<dyn Vgic>>> { 495 let gic_device = KvmGicV3Its::new(self, config) 496 .map_err(|e| vm::HypervisorVmError::CreateVgic(anyhow!("Vgic error {:?}", e)))?; 497 Ok(Arc::new(Mutex::new(gic_device))) 498 } 499 500 /// 501 /// Registers an event to be signaled whenever a certain address is written to. 502 /// 503 fn register_ioevent( 504 &self, 505 fd: &EventFd, 506 addr: &IoEventAddress, 507 datamatch: Option<vm::DataMatch>, 508 ) -> vm::Result<()> { 509 let addr = &kvm_ioctls::IoEventAddress::from(*addr); 510 if let Some(dm) = datamatch { 511 match dm { 512 vm::DataMatch::DataMatch32(kvm_dm32) => self 513 .fd 514 .register_ioevent(fd, addr, kvm_dm32) 515 .map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into())), 516 vm::DataMatch::DataMatch64(kvm_dm64) => self 517 .fd 518 .register_ioevent(fd, addr, kvm_dm64) 519 .map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into())), 520 } 521 } else { 522 self.fd 523 .register_ioevent(fd, addr, NoDatamatch) 524 .map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into())) 525 } 526 } 527 528 /// 529 /// Unregisters an event from a certain address it has been previously registered to. 530 /// 531 fn unregister_ioevent(&self, fd: &EventFd, addr: &IoEventAddress) -> vm::Result<()> { 532 let addr = &kvm_ioctls::IoEventAddress::from(*addr); 533 self.fd 534 .unregister_ioevent(fd, addr, NoDatamatch) 535 .map_err(|e| vm::HypervisorVmError::UnregisterIoEvent(e.into())) 536 } 537 538 /// 539 /// Constructs a routing entry 540 /// 541 fn make_routing_entry(&self, gsi: u32, config: &InterruptSourceConfig) -> IrqRoutingEntry { 542 match &config { 543 InterruptSourceConfig::MsiIrq(cfg) => { 544 let mut kvm_route = kvm_irq_routing_entry { 545 gsi, 546 type_: KVM_IRQ_ROUTING_MSI, 547 ..Default::default() 548 }; 549 550 kvm_route.u.msi.address_lo = cfg.low_addr; 551 kvm_route.u.msi.address_hi = cfg.high_addr; 552 kvm_route.u.msi.data = cfg.data; 553 554 if self.check_extension(crate::kvm::Cap::MsiDevid) { 555 // On AArch64, there is limitation on the range of the 'devid', 556 // it cannot be greater than 65536 (the max of u16). 557 // 558 // BDF cannot be used directly, because 'segment' is in high 559 // 16 bits. The layout of the u32 BDF is: 560 // |---- 16 bits ----|-- 8 bits --|-- 5 bits --|-- 3 bits --| 561 // | segment | bus | device | function | 562 // 563 // Now that we support 1 bus only in a segment, we can build a 564 // 'devid' by replacing the 'bus' bits with the low 8 bits of 565 // 'segment' data. 566 // This way we can resolve the range checking problem and give 567 // different `devid` to all the devices. Limitation is that at 568 // most 256 segments can be supported. 569 // 570 let modified_devid = (cfg.devid & 0x00ff_0000) >> 8 | cfg.devid & 0xff; 571 572 kvm_route.flags = KVM_MSI_VALID_DEVID; 573 kvm_route.u.msi.__bindgen_anon_1.devid = modified_devid; 574 } 575 kvm_route.into() 576 } 577 InterruptSourceConfig::LegacyIrq(cfg) => { 578 let mut kvm_route = kvm_irq_routing_entry { 579 gsi, 580 type_: KVM_IRQ_ROUTING_IRQCHIP, 581 ..Default::default() 582 }; 583 kvm_route.u.irqchip.irqchip = cfg.irqchip; 584 kvm_route.u.irqchip.pin = cfg.pin; 585 586 kvm_route.into() 587 } 588 } 589 } 590 591 /// 592 /// Sets the GSI routing table entries, overwriting any previously set 593 /// entries, as per the `KVM_SET_GSI_ROUTING` ioctl. 594 /// 595 fn set_gsi_routing(&self, entries: &[IrqRoutingEntry]) -> vm::Result<()> { 596 let mut irq_routing = 597 vec_with_array_field::<kvm_irq_routing, kvm_irq_routing_entry>(entries.len()); 598 irq_routing[0].nr = entries.len() as u32; 599 irq_routing[0].flags = 0; 600 let entries: Vec<kvm_irq_routing_entry> = entries 601 .iter() 602 .map(|entry| match entry { 603 IrqRoutingEntry::Kvm(e) => *e, 604 #[allow(unreachable_patterns)] 605 _ => panic!("IrqRoutingEntry type is wrong"), 606 }) 607 .collect(); 608 609 // SAFETY: irq_routing initialized with entries.len() and now it is being turned into 610 // entries_slice with entries.len() again. It is guaranteed to be large enough to hold 611 // everything from entries. 612 unsafe { 613 let entries_slice: &mut [kvm_irq_routing_entry] = 614 irq_routing[0].entries.as_mut_slice(entries.len()); 615 entries_slice.copy_from_slice(&entries); 616 } 617 618 self.fd 619 .set_gsi_routing(&irq_routing[0]) 620 .map_err(|e| vm::HypervisorVmError::SetGsiRouting(e.into())) 621 } 622 623 /// 624 /// Creates a memory region structure that can be used with {create/remove}_user_memory_region 625 /// 626 fn make_user_memory_region( 627 &self, 628 slot: u32, 629 guest_phys_addr: u64, 630 memory_size: u64, 631 userspace_addr: u64, 632 readonly: bool, 633 log_dirty_pages: bool, 634 ) -> UserMemoryRegion { 635 kvm_userspace_memory_region { 636 slot, 637 guest_phys_addr, 638 memory_size, 639 userspace_addr, 640 flags: if readonly { KVM_MEM_READONLY } else { 0 } 641 | if log_dirty_pages { 642 KVM_MEM_LOG_DIRTY_PAGES 643 } else { 644 0 645 }, 646 } 647 .into() 648 } 649 650 /// 651 /// Creates a guest physical memory region. 652 /// 653 fn create_user_memory_region(&self, user_memory_region: UserMemoryRegion) -> vm::Result<()> { 654 let mut region: kvm_userspace_memory_region = user_memory_region.into(); 655 656 if (region.flags & KVM_MEM_LOG_DIRTY_PAGES) != 0 { 657 if (region.flags & KVM_MEM_READONLY) != 0 { 658 return Err(vm::HypervisorVmError::CreateUserMemory(anyhow!( 659 "Error creating regions with both 'dirty-pages-log' and 'read-only'." 660 ))); 661 } 662 663 // Keep track of the regions that need dirty pages log 664 self.dirty_log_slots.write().unwrap().insert( 665 region.slot, 666 KvmDirtyLogSlot { 667 slot: region.slot, 668 guest_phys_addr: region.guest_phys_addr, 669 memory_size: region.memory_size, 670 userspace_addr: region.userspace_addr, 671 }, 672 ); 673 674 // Always create guest physical memory region without `KVM_MEM_LOG_DIRTY_PAGES`. 675 // For regions that need this flag, dirty pages log will be turned on in `start_dirty_log`. 676 region.flags = 0; 677 } 678 679 // SAFETY: Safe because guest regions are guaranteed not to overlap. 680 unsafe { 681 self.fd 682 .set_user_memory_region(region) 683 .map_err(|e| vm::HypervisorVmError::CreateUserMemory(e.into())) 684 } 685 } 686 687 /// 688 /// Removes a guest physical memory region. 689 /// 690 fn remove_user_memory_region(&self, user_memory_region: UserMemoryRegion) -> vm::Result<()> { 691 let mut region: kvm_userspace_memory_region = user_memory_region.into(); 692 693 // Remove the corresponding entry from "self.dirty_log_slots" if needed 694 self.dirty_log_slots.write().unwrap().remove(®ion.slot); 695 696 // Setting the size to 0 means "remove" 697 region.memory_size = 0; 698 // SAFETY: Safe because guest regions are guaranteed not to overlap. 699 unsafe { 700 self.fd 701 .set_user_memory_region(region) 702 .map_err(|e| vm::HypervisorVmError::RemoveUserMemory(e.into())) 703 } 704 } 705 706 /// 707 /// Returns the preferred CPU target type which can be emulated by KVM on underlying host. 708 /// 709 #[cfg(target_arch = "aarch64")] 710 fn get_preferred_target(&self, kvi: &mut VcpuInit) -> vm::Result<()> { 711 self.fd 712 .get_preferred_target(kvi) 713 .map_err(|e| vm::HypervisorVmError::GetPreferredTarget(e.into())) 714 } 715 716 #[cfg(target_arch = "x86_64")] 717 fn enable_split_irq(&self) -> vm::Result<()> { 718 // Create split irqchip 719 // Only the local APIC is emulated in kernel, both PICs and IOAPIC 720 // are not. 721 let mut cap = kvm_enable_cap { 722 cap: KVM_CAP_SPLIT_IRQCHIP, 723 ..Default::default() 724 }; 725 cap.args[0] = NUM_IOAPIC_PINS as u64; 726 self.fd 727 .enable_cap(&cap) 728 .map_err(|e| vm::HypervisorVmError::EnableSplitIrq(e.into()))?; 729 Ok(()) 730 } 731 732 #[cfg(target_arch = "x86_64")] 733 fn enable_sgx_attribute(&self, file: File) -> vm::Result<()> { 734 let mut cap = kvm_enable_cap { 735 cap: KVM_CAP_SGX_ATTRIBUTE, 736 ..Default::default() 737 }; 738 cap.args[0] = file.as_raw_fd() as u64; 739 self.fd 740 .enable_cap(&cap) 741 .map_err(|e| vm::HypervisorVmError::EnableSgxAttribute(e.into()))?; 742 Ok(()) 743 } 744 745 /// Retrieve guest clock. 746 #[cfg(target_arch = "x86_64")] 747 fn get_clock(&self) -> vm::Result<ClockData> { 748 Ok(self 749 .fd 750 .get_clock() 751 .map_err(|e| vm::HypervisorVmError::GetClock(e.into()))? 752 .into()) 753 } 754 755 /// Set guest clock. 756 #[cfg(target_arch = "x86_64")] 757 fn set_clock(&self, data: &ClockData) -> vm::Result<()> { 758 let data = (*data).into(); 759 self.fd 760 .set_clock(&data) 761 .map_err(|e| vm::HypervisorVmError::SetClock(e.into())) 762 } 763 764 /// Create a device that is used for passthrough 765 fn create_passthrough_device(&self) -> vm::Result<VfioDeviceFd> { 766 let mut vfio_dev = kvm_create_device { 767 type_: kvm_device_type_KVM_DEV_TYPE_VFIO, 768 fd: 0, 769 flags: 0, 770 }; 771 772 self.create_device(&mut vfio_dev) 773 .map_err(|e| vm::HypervisorVmError::CreatePassthroughDevice(e.into())) 774 } 775 776 /// 777 /// Start logging dirty pages 778 /// 779 fn start_dirty_log(&self) -> vm::Result<()> { 780 let dirty_log_slots = self.dirty_log_slots.read().unwrap(); 781 for (_, s) in dirty_log_slots.iter() { 782 let region = kvm_userspace_memory_region { 783 slot: s.slot, 784 guest_phys_addr: s.guest_phys_addr, 785 memory_size: s.memory_size, 786 userspace_addr: s.userspace_addr, 787 flags: KVM_MEM_LOG_DIRTY_PAGES, 788 }; 789 // SAFETY: Safe because guest regions are guaranteed not to overlap. 790 unsafe { 791 self.fd 792 .set_user_memory_region(region) 793 .map_err(|e| vm::HypervisorVmError::StartDirtyLog(e.into()))?; 794 } 795 } 796 797 Ok(()) 798 } 799 800 /// 801 /// Stop logging dirty pages 802 /// 803 fn stop_dirty_log(&self) -> vm::Result<()> { 804 let dirty_log_slots = self.dirty_log_slots.read().unwrap(); 805 for (_, s) in dirty_log_slots.iter() { 806 let region = kvm_userspace_memory_region { 807 slot: s.slot, 808 guest_phys_addr: s.guest_phys_addr, 809 memory_size: s.memory_size, 810 userspace_addr: s.userspace_addr, 811 flags: 0, 812 }; 813 // SAFETY: Safe because guest regions are guaranteed not to overlap. 814 unsafe { 815 self.fd 816 .set_user_memory_region(region) 817 .map_err(|e| vm::HypervisorVmError::StartDirtyLog(e.into()))?; 818 } 819 } 820 821 Ok(()) 822 } 823 824 /// 825 /// Get dirty pages bitmap (one bit per page) 826 /// 827 fn get_dirty_log(&self, slot: u32, _base_gpa: u64, memory_size: u64) -> vm::Result<Vec<u64>> { 828 self.fd 829 .get_dirty_log(slot, memory_size as usize) 830 .map_err(|e| vm::HypervisorVmError::GetDirtyLog(e.into())) 831 } 832 833 /// 834 /// Initialize TDX for this VM 835 /// 836 #[cfg(feature = "tdx")] 837 fn tdx_init(&self, cpuid: &[CpuIdEntry], max_vcpus: u32) -> vm::Result<()> { 838 const TDX_ATTR_SEPT_VE_DISABLE: usize = 28; 839 840 let mut cpuid: Vec<kvm_bindings::kvm_cpuid_entry2> = 841 cpuid.iter().map(|e| (*e).into()).collect(); 842 cpuid.resize(256, kvm_bindings::kvm_cpuid_entry2::default()); 843 844 #[repr(C)] 845 struct TdxInitVm { 846 attributes: u64, 847 max_vcpus: u32, 848 padding: u32, 849 mrconfigid: [u64; 6], 850 mrowner: [u64; 6], 851 mrownerconfig: [u64; 6], 852 cpuid_nent: u32, 853 cpuid_padding: u32, 854 cpuid_entries: [kvm_bindings::kvm_cpuid_entry2; 256], 855 } 856 let data = TdxInitVm { 857 attributes: 1 << TDX_ATTR_SEPT_VE_DISABLE, 858 max_vcpus, 859 padding: 0, 860 mrconfigid: [0; 6], 861 mrowner: [0; 6], 862 mrownerconfig: [0; 6], 863 cpuid_nent: cpuid.len() as u32, 864 cpuid_padding: 0, 865 cpuid_entries: cpuid.as_slice().try_into().unwrap(), 866 }; 867 868 tdx_command( 869 &self.fd.as_raw_fd(), 870 TdxCommand::InitVm, 871 0, 872 &data as *const _ as u64, 873 ) 874 .map_err(vm::HypervisorVmError::InitializeTdx) 875 } 876 877 /// 878 /// Finalize the TDX setup for this VM 879 /// 880 #[cfg(feature = "tdx")] 881 fn tdx_finalize(&self) -> vm::Result<()> { 882 tdx_command(&self.fd.as_raw_fd(), TdxCommand::Finalize, 0, 0) 883 .map_err(vm::HypervisorVmError::FinalizeTdx) 884 } 885 886 /// 887 /// Initialize memory regions for the TDX VM 888 /// 889 #[cfg(feature = "tdx")] 890 fn tdx_init_memory_region( 891 &self, 892 host_address: u64, 893 guest_address: u64, 894 size: u64, 895 measure: bool, 896 ) -> vm::Result<()> { 897 #[repr(C)] 898 struct TdxInitMemRegion { 899 host_address: u64, 900 guest_address: u64, 901 pages: u64, 902 } 903 let data = TdxInitMemRegion { 904 host_address, 905 guest_address, 906 pages: size / 4096, 907 }; 908 909 tdx_command( 910 &self.fd.as_raw_fd(), 911 TdxCommand::InitMemRegion, 912 u32::from(measure), 913 &data as *const _ as u64, 914 ) 915 .map_err(vm::HypervisorVmError::InitMemRegionTdx) 916 } 917 918 /// Downcast to the underlying KvmVm type 919 fn as_any(&self) -> &dyn Any { 920 self 921 } 922 } 923 924 #[cfg(feature = "tdx")] 925 fn tdx_command( 926 fd: &RawFd, 927 command: TdxCommand, 928 flags: u32, 929 data: u64, 930 ) -> std::result::Result<(), std::io::Error> { 931 #[repr(C)] 932 struct TdxIoctlCmd { 933 command: TdxCommand, 934 flags: u32, 935 data: u64, 936 error: u64, 937 unused: u64, 938 } 939 let cmd = TdxIoctlCmd { 940 command, 941 flags, 942 data, 943 error: 0, 944 unused: 0, 945 }; 946 // SAFETY: FFI call. All input parameters are valid. 947 let ret = unsafe { 948 ioctl_with_val( 949 fd, 950 KVM_MEMORY_ENCRYPT_OP(), 951 &cmd as *const TdxIoctlCmd as std::os::raw::c_ulong, 952 ) 953 }; 954 955 if ret < 0 { 956 return Err(std::io::Error::last_os_error()); 957 } 958 Ok(()) 959 } 960 961 /// Wrapper over KVM system ioctls. 962 pub struct KvmHypervisor { 963 kvm: Kvm, 964 } 965 966 impl KvmHypervisor { 967 #[cfg(target_arch = "x86_64")] 968 /// 969 /// Retrieve the list of MSRs supported by the hypervisor. 970 /// 971 fn get_msr_list(&self) -> hypervisor::Result<MsrList> { 972 self.kvm 973 .get_msr_index_list() 974 .map_err(|e| hypervisor::HypervisorError::GetMsrList(e.into())) 975 } 976 } 977 978 /// Enum for KVM related error 979 #[derive(Debug, Error)] 980 pub enum KvmError { 981 #[error("Capability missing: {0:?}")] 982 CapabilityMissing(Cap), 983 } 984 985 pub type KvmResult<T> = result::Result<T, KvmError>; 986 987 impl KvmHypervisor { 988 /// Create a hypervisor based on Kvm 989 #[allow(clippy::new_ret_no_self)] 990 pub fn new() -> hypervisor::Result<Arc<dyn hypervisor::Hypervisor>> { 991 let kvm_obj = Kvm::new().map_err(|e| hypervisor::HypervisorError::VmCreate(e.into()))?; 992 let api_version = kvm_obj.get_api_version(); 993 994 if api_version != kvm_bindings::KVM_API_VERSION as i32 { 995 return Err(hypervisor::HypervisorError::IncompatibleApiVersion); 996 } 997 998 Ok(Arc::new(KvmHypervisor { kvm: kvm_obj })) 999 } 1000 1001 /// Check if the hypervisor is available 1002 pub fn is_available() -> hypervisor::Result<bool> { 1003 match std::fs::metadata("/dev/kvm") { 1004 Ok(_) => Ok(true), 1005 Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(false), 1006 Err(err) => Err(hypervisor::HypervisorError::HypervisorAvailableCheck( 1007 err.into(), 1008 )), 1009 } 1010 } 1011 } 1012 1013 /// Implementation of Hypervisor trait for KVM 1014 /// 1015 /// # Examples 1016 /// 1017 /// ``` 1018 /// # use hypervisor::kvm::KvmHypervisor; 1019 /// # use std::sync::Arc; 1020 /// let kvm = KvmHypervisor::new().unwrap(); 1021 /// let hypervisor = Arc::new(kvm); 1022 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed"); 1023 /// ``` 1024 impl hypervisor::Hypervisor for KvmHypervisor { 1025 /// 1026 /// Returns the type of the hypervisor 1027 /// 1028 fn hypervisor_type(&self) -> HypervisorType { 1029 HypervisorType::Kvm 1030 } 1031 1032 /// Create a KVM vm object of a specific VM type and return the object as Vm trait object 1033 /// 1034 /// # Examples 1035 /// 1036 /// ``` 1037 /// # use hypervisor::kvm::KvmHypervisor; 1038 /// use hypervisor::kvm::KvmVm; 1039 /// let hypervisor = KvmHypervisor::new().unwrap(); 1040 /// let vm = hypervisor.create_vm_with_type(0).unwrap(); 1041 /// ``` 1042 fn create_vm_with_type(&self, vm_type: u64) -> hypervisor::Result<Arc<dyn vm::Vm>> { 1043 let fd: VmFd; 1044 loop { 1045 match self.kvm.create_vm_with_type(vm_type) { 1046 Ok(res) => fd = res, 1047 Err(e) => { 1048 if e.errno() == libc::EINTR { 1049 // If the error returned is EINTR, which means the 1050 // ioctl has been interrupted, we have to retry as 1051 // this can't be considered as a regular error. 1052 continue; 1053 } else { 1054 return Err(hypervisor::HypervisorError::VmCreate(e.into())); 1055 } 1056 } 1057 } 1058 break; 1059 } 1060 1061 let vm_fd = Arc::new(fd); 1062 1063 #[cfg(target_arch = "x86_64")] 1064 { 1065 let msr_list = self.get_msr_list()?; 1066 let num_msrs = msr_list.as_fam_struct_ref().nmsrs as usize; 1067 let mut msrs: Vec<MsrEntry> = vec![ 1068 MsrEntry { 1069 ..Default::default() 1070 }; 1071 num_msrs 1072 ]; 1073 let indices = msr_list.as_slice(); 1074 for (pos, index) in indices.iter().enumerate() { 1075 msrs[pos].index = *index; 1076 } 1077 1078 Ok(Arc::new(KvmVm { 1079 fd: vm_fd, 1080 msrs, 1081 dirty_log_slots: Arc::new(RwLock::new(HashMap::new())), 1082 })) 1083 } 1084 1085 #[cfg(target_arch = "aarch64")] 1086 { 1087 Ok(Arc::new(KvmVm { 1088 fd: vm_fd, 1089 dirty_log_slots: Arc::new(RwLock::new(HashMap::new())), 1090 })) 1091 } 1092 } 1093 1094 /// Create a KVM vm object and return the object as Vm trait object 1095 /// 1096 /// # Examples 1097 /// 1098 /// ``` 1099 /// # use hypervisor::kvm::KvmHypervisor; 1100 /// use hypervisor::kvm::KvmVm; 1101 /// let hypervisor = KvmHypervisor::new().unwrap(); 1102 /// let vm = hypervisor.create_vm().unwrap(); 1103 /// ``` 1104 fn create_vm(&self) -> hypervisor::Result<Arc<dyn vm::Vm>> { 1105 #[allow(unused_mut)] 1106 let mut vm_type: u64 = 0; // Create with default platform type 1107 1108 // When KVM supports Cap::ArmVmIPASize, it is better to get the IPA 1109 // size from the host and use that when creating the VM, which may 1110 // avoid unnecessary VM creation failures. 1111 #[cfg(target_arch = "aarch64")] 1112 if self.kvm.check_extension(Cap::ArmVmIPASize) { 1113 vm_type = self.kvm.get_host_ipa_limit().try_into().unwrap(); 1114 } 1115 1116 self.create_vm_with_type(vm_type) 1117 } 1118 1119 fn check_required_extensions(&self) -> hypervisor::Result<()> { 1120 check_required_kvm_extensions(&self.kvm) 1121 .map_err(|e| hypervisor::HypervisorError::CheckExtensions(e.into())) 1122 } 1123 1124 #[cfg(target_arch = "x86_64")] 1125 /// 1126 /// X86 specific call to get the system supported CPUID values. 1127 /// 1128 fn get_supported_cpuid(&self) -> hypervisor::Result<Vec<CpuIdEntry>> { 1129 let kvm_cpuid = self 1130 .kvm 1131 .get_supported_cpuid(kvm_bindings::KVM_MAX_CPUID_ENTRIES) 1132 .map_err(|e| hypervisor::HypervisorError::GetCpuId(e.into()))?; 1133 1134 let v = kvm_cpuid.as_slice().iter().map(|e| (*e).into()).collect(); 1135 1136 Ok(v) 1137 } 1138 1139 #[cfg(target_arch = "aarch64")] 1140 /// 1141 /// Retrieve AArch64 host maximum IPA size supported by KVM. 1142 /// 1143 fn get_host_ipa_limit(&self) -> i32 { 1144 self.kvm.get_host_ipa_limit() 1145 } 1146 1147 /// 1148 /// Retrieve TDX capabilities 1149 /// 1150 #[cfg(feature = "tdx")] 1151 fn tdx_capabilities(&self) -> hypervisor::Result<TdxCapabilities> { 1152 let data = TdxCapabilities { 1153 nr_cpuid_configs: TDX_MAX_NR_CPUID_CONFIGS as u32, 1154 ..Default::default() 1155 }; 1156 1157 tdx_command( 1158 &self.kvm.as_raw_fd(), 1159 TdxCommand::Capabilities, 1160 0, 1161 &data as *const _ as u64, 1162 ) 1163 .map_err(|e| hypervisor::HypervisorError::TdxCapabilities(e.into()))?; 1164 1165 Ok(data) 1166 } 1167 1168 /// 1169 /// Get the number of supported hardware breakpoints 1170 /// 1171 fn get_guest_debug_hw_bps(&self) -> usize { 1172 #[cfg(target_arch = "x86_64")] 1173 { 1174 4 1175 } 1176 #[cfg(target_arch = "aarch64")] 1177 { 1178 self.kvm.get_guest_debug_hw_bps() as usize 1179 } 1180 } 1181 1182 /// Get maximum number of vCPUs 1183 fn get_max_vcpus(&self) -> u32 { 1184 self.kvm.get_max_vcpus().min(u32::MAX as usize) as u32 1185 } 1186 } 1187 1188 /// Vcpu struct for KVM 1189 pub struct KvmVcpu { 1190 fd: Arc<Mutex<VcpuFd>>, 1191 #[cfg(target_arch = "x86_64")] 1192 msrs: Vec<MsrEntry>, 1193 vm_ops: Option<Arc<dyn vm::VmOps>>, 1194 #[cfg(target_arch = "x86_64")] 1195 hyperv_synic: AtomicBool, 1196 } 1197 1198 /// Implementation of Vcpu trait for KVM 1199 /// 1200 /// # Examples 1201 /// 1202 /// ``` 1203 /// # use hypervisor::kvm::KvmHypervisor; 1204 /// # use std::sync::Arc; 1205 /// let kvm = KvmHypervisor::new().unwrap(); 1206 /// let hypervisor = Arc::new(kvm); 1207 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed"); 1208 /// let vcpu = vm.create_vcpu(0, None).unwrap(); 1209 /// ``` 1210 impl cpu::Vcpu for KvmVcpu { 1211 /// 1212 /// Returns StandardRegisters with default value set 1213 /// 1214 #[cfg(target_arch = "x86_64")] 1215 fn create_standard_regs(&self) -> StandardRegisters { 1216 kvm_bindings::kvm_regs::default().into() 1217 } 1218 #[cfg(target_arch = "x86_64")] 1219 /// 1220 /// Returns the vCPU general purpose registers. 1221 /// 1222 fn get_regs(&self) -> cpu::Result<StandardRegisters> { 1223 Ok(self 1224 .fd 1225 .lock() 1226 .unwrap() 1227 .get_regs() 1228 .map_err(|e| cpu::HypervisorCpuError::GetStandardRegs(e.into()))? 1229 .into()) 1230 } 1231 1232 /// 1233 /// Returns the vCPU general purpose registers. 1234 /// The `KVM_GET_REGS` ioctl is not available on AArch64, `KVM_GET_ONE_REG` 1235 /// is used to get registers one by one. 1236 /// 1237 #[cfg(target_arch = "aarch64")] 1238 fn get_regs(&self) -> cpu::Result<StandardRegisters> { 1239 let mut state: StandardRegisters = kvm_regs::default(); 1240 let mut off = offset_of!(user_pt_regs, regs); 1241 // There are 31 user_pt_regs: 1242 // https://elixir.free-electrons.com/linux/v4.14.174/source/arch/arm64/include/uapi/asm/ptrace.h#L72 1243 // These actually are the general-purpose registers of the Armv8-a 1244 // architecture (i.e x0-x30 if used as a 64bit register or w0-30 when used as a 32bit register). 1245 for i in 0..31 { 1246 let mut bytes = [0_u8; 8]; 1247 self.fd 1248 .lock() 1249 .unwrap() 1250 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), &mut bytes) 1251 .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?; 1252 state.regs.regs[i] = u64::from_le_bytes(bytes); 1253 off += std::mem::size_of::<u64>(); 1254 } 1255 1256 // We are now entering the "Other register" section of the ARMv8-a architecture. 1257 // First one, stack pointer. 1258 let off = offset_of!(user_pt_regs, sp); 1259 let mut bytes = [0_u8; 8]; 1260 self.fd 1261 .lock() 1262 .unwrap() 1263 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), &mut bytes) 1264 .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?; 1265 state.regs.sp = u64::from_le_bytes(bytes); 1266 1267 // Second one, the program counter. 1268 let off = offset_of!(user_pt_regs, pc); 1269 let mut bytes = [0_u8; 8]; 1270 self.fd 1271 .lock() 1272 .unwrap() 1273 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), &mut bytes) 1274 .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?; 1275 state.regs.pc = u64::from_le_bytes(bytes); 1276 1277 // Next is the processor state. 1278 let off = offset_of!(user_pt_regs, pstate); 1279 let mut bytes = [0_u8; 8]; 1280 self.fd 1281 .lock() 1282 .unwrap() 1283 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), &mut bytes) 1284 .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?; 1285 state.regs.pstate = u64::from_le_bytes(bytes); 1286 1287 // The stack pointer associated with EL1 1288 let off = offset_of!(kvm_regs, sp_el1); 1289 let mut bytes = [0_u8; 8]; 1290 self.fd 1291 .lock() 1292 .unwrap() 1293 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), &mut bytes) 1294 .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?; 1295 state.sp_el1 = u64::from_le_bytes(bytes); 1296 1297 // Exception Link Register for EL1, when taking an exception to EL1, this register 1298 // holds the address to which to return afterwards. 1299 let off = offset_of!(kvm_regs, elr_el1); 1300 let mut bytes = [0_u8; 8]; 1301 self.fd 1302 .lock() 1303 .unwrap() 1304 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), &mut bytes) 1305 .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?; 1306 state.elr_el1 = u64::from_le_bytes(bytes); 1307 1308 // Saved Program Status Registers, there are 5 of them used in the kernel. 1309 let mut off = offset_of!(kvm_regs, spsr); 1310 for i in 0..KVM_NR_SPSR as usize { 1311 let mut bytes = [0_u8; 8]; 1312 self.fd 1313 .lock() 1314 .unwrap() 1315 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), &mut bytes) 1316 .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?; 1317 state.spsr[i] = u64::from_le_bytes(bytes); 1318 off += std::mem::size_of::<u64>(); 1319 } 1320 1321 // Now moving on to floating point registers which are stored in the user_fpsimd_state in the kernel: 1322 // https://elixir.free-electrons.com/linux/v4.9.62/source/arch/arm64/include/uapi/asm/kvm.h#L53 1323 let mut off = offset_of!(kvm_regs, fp_regs) + offset_of!(user_fpsimd_state, vregs); 1324 for i in 0..32 { 1325 let mut bytes = [0_u8; 16]; 1326 self.fd 1327 .lock() 1328 .unwrap() 1329 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U128, off), &mut bytes) 1330 .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?; 1331 state.fp_regs.vregs[i] = u128::from_le_bytes(bytes); 1332 off += mem::size_of::<u128>(); 1333 } 1334 1335 // Floating-point Status Register 1336 let off = offset_of!(kvm_regs, fp_regs) + offset_of!(user_fpsimd_state, fpsr); 1337 let mut bytes = [0_u8; 4]; 1338 self.fd 1339 .lock() 1340 .unwrap() 1341 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U32, off), &mut bytes) 1342 .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?; 1343 state.fp_regs.fpsr = u32::from_le_bytes(bytes); 1344 1345 // Floating-point Control Register 1346 let off = offset_of!(kvm_regs, fp_regs) + offset_of!(user_fpsimd_state, fpcr); 1347 let mut bytes = [0_u8; 4]; 1348 self.fd 1349 .lock() 1350 .unwrap() 1351 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U32, off), &mut bytes) 1352 .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?; 1353 state.fp_regs.fpcr = u32::from_le_bytes(bytes); 1354 Ok(state) 1355 } 1356 1357 #[cfg(target_arch = "x86_64")] 1358 /// 1359 /// Sets the vCPU general purpose registers using the `KVM_SET_REGS` ioctl. 1360 /// 1361 fn set_regs(&self, regs: &StandardRegisters) -> cpu::Result<()> { 1362 let regs = (*regs).into(); 1363 self.fd 1364 .lock() 1365 .unwrap() 1366 .set_regs(®s) 1367 .map_err(|e| cpu::HypervisorCpuError::SetStandardRegs(e.into())) 1368 } 1369 1370 /// 1371 /// Sets the vCPU general purpose registers. 1372 /// The `KVM_SET_REGS` ioctl is not available on AArch64, `KVM_SET_ONE_REG` 1373 /// is used to set registers one by one. 1374 /// 1375 #[cfg(target_arch = "aarch64")] 1376 fn set_regs(&self, state: &StandardRegisters) -> cpu::Result<()> { 1377 // The function follows the exact identical order from `state`. Look there 1378 // for some additional info on registers. 1379 let mut off = offset_of!(user_pt_regs, regs); 1380 for i in 0..31 { 1381 self.fd 1382 .lock() 1383 .unwrap() 1384 .set_one_reg( 1385 arm64_core_reg_id!(KVM_REG_SIZE_U64, off), 1386 &state.regs.regs[i].to_le_bytes(), 1387 ) 1388 .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?; 1389 off += std::mem::size_of::<u64>(); 1390 } 1391 1392 let off = offset_of!(user_pt_regs, sp); 1393 self.fd 1394 .lock() 1395 .unwrap() 1396 .set_one_reg( 1397 arm64_core_reg_id!(KVM_REG_SIZE_U64, off), 1398 &state.regs.sp.to_le_bytes(), 1399 ) 1400 .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?; 1401 1402 let off = offset_of!(user_pt_regs, pc); 1403 self.fd 1404 .lock() 1405 .unwrap() 1406 .set_one_reg( 1407 arm64_core_reg_id!(KVM_REG_SIZE_U64, off), 1408 &state.regs.pc.to_le_bytes(), 1409 ) 1410 .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?; 1411 1412 let off = offset_of!(user_pt_regs, pstate); 1413 self.fd 1414 .lock() 1415 .unwrap() 1416 .set_one_reg( 1417 arm64_core_reg_id!(KVM_REG_SIZE_U64, off), 1418 &state.regs.pstate.to_le_bytes(), 1419 ) 1420 .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?; 1421 1422 let off = offset_of!(kvm_regs, sp_el1); 1423 self.fd 1424 .lock() 1425 .unwrap() 1426 .set_one_reg( 1427 arm64_core_reg_id!(KVM_REG_SIZE_U64, off), 1428 &state.sp_el1.to_le_bytes(), 1429 ) 1430 .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?; 1431 1432 let off = offset_of!(kvm_regs, elr_el1); 1433 self.fd 1434 .lock() 1435 .unwrap() 1436 .set_one_reg( 1437 arm64_core_reg_id!(KVM_REG_SIZE_U64, off), 1438 &state.elr_el1.to_le_bytes(), 1439 ) 1440 .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?; 1441 1442 let mut off = offset_of!(kvm_regs, spsr); 1443 for i in 0..KVM_NR_SPSR as usize { 1444 self.fd 1445 .lock() 1446 .unwrap() 1447 .set_one_reg( 1448 arm64_core_reg_id!(KVM_REG_SIZE_U64, off), 1449 &state.spsr[i].to_le_bytes(), 1450 ) 1451 .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?; 1452 off += std::mem::size_of::<u64>(); 1453 } 1454 1455 let mut off = offset_of!(kvm_regs, fp_regs) + offset_of!(user_fpsimd_state, vregs); 1456 for i in 0..32 { 1457 self.fd 1458 .lock() 1459 .unwrap() 1460 .set_one_reg( 1461 arm64_core_reg_id!(KVM_REG_SIZE_U128, off), 1462 &state.fp_regs.vregs[i].to_le_bytes(), 1463 ) 1464 .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?; 1465 off += mem::size_of::<u128>(); 1466 } 1467 1468 let off = offset_of!(kvm_regs, fp_regs) + offset_of!(user_fpsimd_state, fpsr); 1469 self.fd 1470 .lock() 1471 .unwrap() 1472 .set_one_reg( 1473 arm64_core_reg_id!(KVM_REG_SIZE_U32, off), 1474 &state.fp_regs.fpsr.to_le_bytes(), 1475 ) 1476 .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?; 1477 1478 let off = offset_of!(kvm_regs, fp_regs) + offset_of!(user_fpsimd_state, fpcr); 1479 self.fd 1480 .lock() 1481 .unwrap() 1482 .set_one_reg( 1483 arm64_core_reg_id!(KVM_REG_SIZE_U32, off), 1484 &state.fp_regs.fpcr.to_le_bytes(), 1485 ) 1486 .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?; 1487 Ok(()) 1488 } 1489 1490 #[cfg(target_arch = "x86_64")] 1491 /// 1492 /// Returns the vCPU special registers. 1493 /// 1494 fn get_sregs(&self) -> cpu::Result<SpecialRegisters> { 1495 Ok(self 1496 .fd 1497 .lock() 1498 .unwrap() 1499 .get_sregs() 1500 .map_err(|e| cpu::HypervisorCpuError::GetSpecialRegs(e.into()))? 1501 .into()) 1502 } 1503 1504 #[cfg(target_arch = "x86_64")] 1505 /// 1506 /// Sets the vCPU special registers using the `KVM_SET_SREGS` ioctl. 1507 /// 1508 fn set_sregs(&self, sregs: &SpecialRegisters) -> cpu::Result<()> { 1509 let sregs = (*sregs).into(); 1510 self.fd 1511 .lock() 1512 .unwrap() 1513 .set_sregs(&sregs) 1514 .map_err(|e| cpu::HypervisorCpuError::SetSpecialRegs(e.into())) 1515 } 1516 1517 #[cfg(target_arch = "x86_64")] 1518 /// 1519 /// Returns the floating point state (FPU) from the vCPU. 1520 /// 1521 fn get_fpu(&self) -> cpu::Result<FpuState> { 1522 Ok(self 1523 .fd 1524 .lock() 1525 .unwrap() 1526 .get_fpu() 1527 .map_err(|e| cpu::HypervisorCpuError::GetFloatingPointRegs(e.into()))? 1528 .into()) 1529 } 1530 1531 #[cfg(target_arch = "x86_64")] 1532 /// 1533 /// Set the floating point state (FPU) of a vCPU using the `KVM_SET_FPU` ioctl. 1534 /// 1535 fn set_fpu(&self, fpu: &FpuState) -> cpu::Result<()> { 1536 let fpu: kvm_bindings::kvm_fpu = (*fpu).clone().into(); 1537 self.fd 1538 .lock() 1539 .unwrap() 1540 .set_fpu(&fpu) 1541 .map_err(|e| cpu::HypervisorCpuError::SetFloatingPointRegs(e.into())) 1542 } 1543 1544 #[cfg(target_arch = "x86_64")] 1545 /// 1546 /// X86 specific call to setup the CPUID registers. 1547 /// 1548 fn set_cpuid2(&self, cpuid: &[CpuIdEntry]) -> cpu::Result<()> { 1549 let cpuid: Vec<kvm_bindings::kvm_cpuid_entry2> = 1550 cpuid.iter().map(|e| (*e).into()).collect(); 1551 let kvm_cpuid = <CpuId>::from_entries(&cpuid) 1552 .map_err(|_| cpu::HypervisorCpuError::SetCpuid(anyhow!("failed to create CpuId")))?; 1553 1554 self.fd 1555 .lock() 1556 .unwrap() 1557 .set_cpuid2(&kvm_cpuid) 1558 .map_err(|e| cpu::HypervisorCpuError::SetCpuid(e.into())) 1559 } 1560 1561 #[cfg(target_arch = "x86_64")] 1562 /// 1563 /// X86 specific call to enable HyperV SynIC 1564 /// 1565 fn enable_hyperv_synic(&self) -> cpu::Result<()> { 1566 // Update the information about Hyper-V SynIC being enabled and 1567 // emulated as it will influence later which MSRs should be saved. 1568 self.hyperv_synic.store(true, Ordering::Release); 1569 1570 let cap = kvm_enable_cap { 1571 cap: KVM_CAP_HYPERV_SYNIC, 1572 ..Default::default() 1573 }; 1574 self.fd 1575 .lock() 1576 .unwrap() 1577 .enable_cap(&cap) 1578 .map_err(|e| cpu::HypervisorCpuError::EnableHyperVSyncIc(e.into())) 1579 } 1580 1581 /// 1582 /// X86 specific call to retrieve the CPUID registers. 1583 /// 1584 #[cfg(target_arch = "x86_64")] 1585 fn get_cpuid2(&self, num_entries: usize) -> cpu::Result<Vec<CpuIdEntry>> { 1586 let kvm_cpuid = self 1587 .fd 1588 .lock() 1589 .unwrap() 1590 .get_cpuid2(num_entries) 1591 .map_err(|e| cpu::HypervisorCpuError::GetCpuid(e.into()))?; 1592 1593 let v = kvm_cpuid.as_slice().iter().map(|e| (*e).into()).collect(); 1594 1595 Ok(v) 1596 } 1597 1598 #[cfg(target_arch = "x86_64")] 1599 /// 1600 /// Returns the state of the LAPIC (Local Advanced Programmable Interrupt Controller). 1601 /// 1602 fn get_lapic(&self) -> cpu::Result<LapicState> { 1603 Ok(self 1604 .fd 1605 .lock() 1606 .unwrap() 1607 .get_lapic() 1608 .map_err(|e| cpu::HypervisorCpuError::GetlapicState(e.into()))? 1609 .into()) 1610 } 1611 1612 #[cfg(target_arch = "x86_64")] 1613 /// 1614 /// Sets the state of the LAPIC (Local Advanced Programmable Interrupt Controller). 1615 /// 1616 fn set_lapic(&self, klapic: &LapicState) -> cpu::Result<()> { 1617 let klapic: kvm_bindings::kvm_lapic_state = (*klapic).clone().into(); 1618 self.fd 1619 .lock() 1620 .unwrap() 1621 .set_lapic(&klapic) 1622 .map_err(|e| cpu::HypervisorCpuError::SetLapicState(e.into())) 1623 } 1624 1625 #[cfg(target_arch = "x86_64")] 1626 /// 1627 /// Returns the model-specific registers (MSR) for this vCPU. 1628 /// 1629 fn get_msrs(&self, msrs: &mut Vec<MsrEntry>) -> cpu::Result<usize> { 1630 let kvm_msrs: Vec<kvm_msr_entry> = msrs.iter().map(|e| (*e).into()).collect(); 1631 let mut kvm_msrs = MsrEntries::from_entries(&kvm_msrs).unwrap(); 1632 let succ = self 1633 .fd 1634 .lock() 1635 .unwrap() 1636 .get_msrs(&mut kvm_msrs) 1637 .map_err(|e| cpu::HypervisorCpuError::GetMsrEntries(e.into()))?; 1638 1639 msrs[..succ].copy_from_slice( 1640 &kvm_msrs.as_slice()[..succ] 1641 .iter() 1642 .map(|e| (*e).into()) 1643 .collect::<Vec<MsrEntry>>(), 1644 ); 1645 1646 Ok(succ) 1647 } 1648 1649 #[cfg(target_arch = "x86_64")] 1650 /// 1651 /// Setup the model-specific registers (MSR) for this vCPU. 1652 /// Returns the number of MSR entries actually written. 1653 /// 1654 fn set_msrs(&self, msrs: &[MsrEntry]) -> cpu::Result<usize> { 1655 let kvm_msrs: Vec<kvm_msr_entry> = msrs.iter().map(|e| (*e).into()).collect(); 1656 let kvm_msrs = MsrEntries::from_entries(&kvm_msrs).unwrap(); 1657 self.fd 1658 .lock() 1659 .unwrap() 1660 .set_msrs(&kvm_msrs) 1661 .map_err(|e| cpu::HypervisorCpuError::SetMsrEntries(e.into())) 1662 } 1663 1664 /// 1665 /// Returns the vcpu's current "multiprocessing state". 1666 /// 1667 fn get_mp_state(&self) -> cpu::Result<MpState> { 1668 Ok(self 1669 .fd 1670 .lock() 1671 .unwrap() 1672 .get_mp_state() 1673 .map_err(|e| cpu::HypervisorCpuError::GetMpState(e.into()))? 1674 .into()) 1675 } 1676 1677 /// 1678 /// Sets the vcpu's current "multiprocessing state". 1679 /// 1680 fn set_mp_state(&self, mp_state: MpState) -> cpu::Result<()> { 1681 self.fd 1682 .lock() 1683 .unwrap() 1684 .set_mp_state(mp_state.into()) 1685 .map_err(|e| cpu::HypervisorCpuError::SetMpState(e.into())) 1686 } 1687 1688 #[cfg(target_arch = "x86_64")] 1689 /// 1690 /// Translates guest virtual address to guest physical address using the `KVM_TRANSLATE` ioctl. 1691 /// 1692 fn translate_gva(&self, gva: u64, _flags: u64) -> cpu::Result<(u64, u32)> { 1693 let tr = self 1694 .fd 1695 .lock() 1696 .unwrap() 1697 .translate_gva(gva) 1698 .map_err(|e| cpu::HypervisorCpuError::TranslateVirtualAddress(e.into()))?; 1699 // tr.valid is set if the GVA is mapped to valid GPA. 1700 match tr.valid { 1701 0 => Err(cpu::HypervisorCpuError::TranslateVirtualAddress(anyhow!( 1702 "Invalid GVA: {:#x}", 1703 gva 1704 ))), 1705 _ => Ok((tr.physical_address, 0)), 1706 } 1707 } 1708 1709 /// 1710 /// Triggers the running of the current virtual CPU returning an exit reason. 1711 /// 1712 fn run(&self) -> std::result::Result<cpu::VmExit, cpu::HypervisorCpuError> { 1713 match self.fd.lock().unwrap().run() { 1714 Ok(run) => match run { 1715 #[cfg(target_arch = "x86_64")] 1716 VcpuExit::IoIn(addr, data) => { 1717 if let Some(vm_ops) = &self.vm_ops { 1718 return vm_ops 1719 .pio_read(addr.into(), data) 1720 .map(|_| cpu::VmExit::Ignore) 1721 .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into())); 1722 } 1723 1724 Ok(cpu::VmExit::Ignore) 1725 } 1726 #[cfg(target_arch = "x86_64")] 1727 VcpuExit::IoOut(addr, data) => { 1728 if let Some(vm_ops) = &self.vm_ops { 1729 return vm_ops 1730 .pio_write(addr.into(), data) 1731 .map(|_| cpu::VmExit::Ignore) 1732 .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into())); 1733 } 1734 1735 Ok(cpu::VmExit::Ignore) 1736 } 1737 #[cfg(target_arch = "x86_64")] 1738 VcpuExit::IoapicEoi(vector) => Ok(cpu::VmExit::IoapicEoi(vector)), 1739 #[cfg(target_arch = "x86_64")] 1740 VcpuExit::Shutdown | VcpuExit::Hlt => Ok(cpu::VmExit::Reset), 1741 1742 #[cfg(target_arch = "aarch64")] 1743 VcpuExit::SystemEvent(event_type, flags) => { 1744 use kvm_bindings::{KVM_SYSTEM_EVENT_RESET, KVM_SYSTEM_EVENT_SHUTDOWN}; 1745 // On Aarch64, when the VM is shutdown, run() returns 1746 // VcpuExit::SystemEvent with reason KVM_SYSTEM_EVENT_SHUTDOWN 1747 if event_type == KVM_SYSTEM_EVENT_RESET { 1748 Ok(cpu::VmExit::Reset) 1749 } else if event_type == KVM_SYSTEM_EVENT_SHUTDOWN { 1750 Ok(cpu::VmExit::Shutdown) 1751 } else { 1752 Err(cpu::HypervisorCpuError::RunVcpu(anyhow!( 1753 "Unexpected system event with type 0x{:x}, flags 0x{:x?}", 1754 event_type, 1755 flags 1756 ))) 1757 } 1758 } 1759 1760 VcpuExit::MmioRead(addr, data) => { 1761 if let Some(vm_ops) = &self.vm_ops { 1762 return vm_ops 1763 .mmio_read(addr, data) 1764 .map(|_| cpu::VmExit::Ignore) 1765 .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into())); 1766 } 1767 1768 Ok(cpu::VmExit::Ignore) 1769 } 1770 VcpuExit::MmioWrite(addr, data) => { 1771 if let Some(vm_ops) = &self.vm_ops { 1772 return vm_ops 1773 .mmio_write(addr, data) 1774 .map(|_| cpu::VmExit::Ignore) 1775 .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into())); 1776 } 1777 1778 Ok(cpu::VmExit::Ignore) 1779 } 1780 VcpuExit::Hyperv => Ok(cpu::VmExit::Hyperv), 1781 #[cfg(feature = "tdx")] 1782 VcpuExit::Unsupported(KVM_EXIT_TDX) => Ok(cpu::VmExit::Tdx), 1783 VcpuExit::Debug(_) => Ok(cpu::VmExit::Debug), 1784 1785 r => Err(cpu::HypervisorCpuError::RunVcpu(anyhow!( 1786 "Unexpected exit reason on vcpu run: {:?}", 1787 r 1788 ))), 1789 }, 1790 1791 Err(ref e) => match e.errno() { 1792 libc::EAGAIN | libc::EINTR => Ok(cpu::VmExit::Ignore), 1793 _ => Err(cpu::HypervisorCpuError::RunVcpu(anyhow!( 1794 "VCPU error {:?}", 1795 e 1796 ))), 1797 }, 1798 } 1799 } 1800 1801 #[cfg(target_arch = "x86_64")] 1802 /// 1803 /// Let the guest know that it has been paused, which prevents from 1804 /// potential soft lockups when being resumed. 1805 /// 1806 fn notify_guest_clock_paused(&self) -> cpu::Result<()> { 1807 if let Err(e) = self.fd.lock().unwrap().kvmclock_ctrl() { 1808 // Linux kernel returns -EINVAL if the PV clock isn't yet initialised 1809 // which could be because we're still in firmware or the guest doesn't 1810 // use KVM clock. 1811 if e.errno() != libc::EINVAL { 1812 return Err(cpu::HypervisorCpuError::NotifyGuestClockPaused(e.into())); 1813 } 1814 } 1815 1816 Ok(()) 1817 } 1818 1819 /// 1820 /// Sets debug registers to set hardware breakpoints and/or enable single step. 1821 /// 1822 fn set_guest_debug( 1823 &self, 1824 addrs: &[vm_memory::GuestAddress], 1825 singlestep: bool, 1826 ) -> cpu::Result<()> { 1827 let mut dbg = kvm_guest_debug { 1828 #[cfg(target_arch = "x86_64")] 1829 control: KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP, 1830 #[cfg(target_arch = "aarch64")] 1831 control: KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW, 1832 ..Default::default() 1833 }; 1834 if singlestep { 1835 dbg.control |= KVM_GUESTDBG_SINGLESTEP; 1836 } 1837 1838 // Set the debug registers. 1839 // Here we assume that the number of addresses do not exceed what 1840 // `Hypervisor::get_guest_debug_hw_bps()` specifies. 1841 #[cfg(target_arch = "x86_64")] 1842 { 1843 // Set bits 9 and 10. 1844 // bit 9: GE (global exact breakpoint enable) flag. 1845 // bit 10: always 1. 1846 dbg.arch.debugreg[7] = 0x0600; 1847 1848 for (i, addr) in addrs.iter().enumerate() { 1849 dbg.arch.debugreg[i] = addr.0; 1850 // Set global breakpoint enable flag 1851 dbg.arch.debugreg[7] |= 2 << (i * 2); 1852 } 1853 } 1854 #[cfg(target_arch = "aarch64")] 1855 { 1856 for (i, addr) in addrs.iter().enumerate() { 1857 // DBGBCR_EL1 (Debug Breakpoint Control Registers, D13.3.2): 1858 // bit 0: 1 (Enabled) 1859 // bit 1~2: 0b11 (PMC = EL1/EL0) 1860 // bit 5~8: 0b1111 (BAS = AArch64) 1861 // others: 0 1862 dbg.arch.dbg_bcr[i] = 0b1u64 | 0b110u64 | 0b1_1110_0000u64; 1863 // DBGBVR_EL1 (Debug Breakpoint Value Registers, D13.3.3): 1864 // bit 2~52: VA[2:52] 1865 dbg.arch.dbg_bvr[i] = (!0u64 >> 11) & addr.0; 1866 } 1867 } 1868 self.fd 1869 .lock() 1870 .unwrap() 1871 .set_guest_debug(&dbg) 1872 .map_err(|e| cpu::HypervisorCpuError::SetDebugRegs(e.into())) 1873 } 1874 1875 #[cfg(target_arch = "aarch64")] 1876 fn vcpu_init(&self, kvi: &VcpuInit) -> cpu::Result<()> { 1877 self.fd 1878 .lock() 1879 .unwrap() 1880 .vcpu_init(kvi) 1881 .map_err(|e| cpu::HypervisorCpuError::VcpuInit(e.into())) 1882 } 1883 1884 /// 1885 /// Gets a list of the guest registers that are supported for the 1886 /// KVM_GET_ONE_REG/KVM_SET_ONE_REG calls. 1887 /// 1888 #[cfg(target_arch = "aarch64")] 1889 fn get_reg_list(&self, reg_list: &mut RegList) -> cpu::Result<()> { 1890 self.fd 1891 .lock() 1892 .unwrap() 1893 .get_reg_list(reg_list) 1894 .map_err(|e| cpu::HypervisorCpuError::GetRegList(e.into())) 1895 } 1896 1897 /// 1898 /// Gets the value of a system register 1899 /// 1900 #[cfg(target_arch = "aarch64")] 1901 fn get_sys_reg(&self, sys_reg: u32) -> cpu::Result<u64> { 1902 // 1903 // Arm Architecture Reference Manual defines the encoding of 1904 // AArch64 system registers, see 1905 // https://developer.arm.com/documentation/ddi0487 (chapter D12). 1906 // While KVM defines another ID for each AArch64 system register, 1907 // which is used in calling `KVM_G/SET_ONE_REG` to access a system 1908 // register of a guest. 1909 // A mapping exists between the Arm standard encoding and the KVM ID. 1910 // This function takes the standard u32 ID as input parameter, converts 1911 // it to the corresponding KVM ID, and call `KVM_GET_ONE_REG` API to 1912 // get the value of the system parameter. 1913 // 1914 let id: u64 = KVM_REG_ARM64 1915 | KVM_REG_SIZE_U64 1916 | KVM_REG_ARM64_SYSREG as u64 1917 | ((((sys_reg) >> 5) 1918 & (KVM_REG_ARM64_SYSREG_OP0_MASK 1919 | KVM_REG_ARM64_SYSREG_OP1_MASK 1920 | KVM_REG_ARM64_SYSREG_CRN_MASK 1921 | KVM_REG_ARM64_SYSREG_CRM_MASK 1922 | KVM_REG_ARM64_SYSREG_OP2_MASK)) as u64); 1923 let mut bytes = [0_u8; 8]; 1924 self.fd 1925 .lock() 1926 .unwrap() 1927 .get_one_reg(id, &mut bytes) 1928 .map_err(|e| cpu::HypervisorCpuError::GetSysRegister(e.into()))?; 1929 Ok(u64::from_le_bytes(bytes)) 1930 } 1931 1932 /// 1933 /// Configure core registers for a given CPU. 1934 /// 1935 #[cfg(target_arch = "aarch64")] 1936 fn setup_regs(&self, cpu_id: u8, boot_ip: u64, fdt_start: u64) -> cpu::Result<()> { 1937 #[allow(non_upper_case_globals)] 1938 // PSR (Processor State Register) bits. 1939 // Taken from arch/arm64/include/uapi/asm/ptrace.h. 1940 const PSR_MODE_EL1h: u64 = 0x0000_0005; 1941 const PSR_F_BIT: u64 = 0x0000_0040; 1942 const PSR_I_BIT: u64 = 0x0000_0080; 1943 const PSR_A_BIT: u64 = 0x0000_0100; 1944 const PSR_D_BIT: u64 = 0x0000_0200; 1945 // Taken from arch/arm64/kvm/inject_fault.c. 1946 const PSTATE_FAULT_BITS_64: u64 = 1947 PSR_MODE_EL1h | PSR_A_BIT | PSR_F_BIT | PSR_I_BIT | PSR_D_BIT; 1948 1949 let kreg_off = offset_of!(kvm_regs, regs); 1950 1951 // Get the register index of the PSTATE (Processor State) register. 1952 let pstate = offset_of!(user_pt_regs, pstate) + kreg_off; 1953 self.fd 1954 .lock() 1955 .unwrap() 1956 .set_one_reg( 1957 arm64_core_reg_id!(KVM_REG_SIZE_U64, pstate), 1958 &PSTATE_FAULT_BITS_64.to_le_bytes(), 1959 ) 1960 .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?; 1961 1962 // Other vCPUs are powered off initially awaiting PSCI wakeup. 1963 if cpu_id == 0 { 1964 // Setting the PC (Processor Counter) to the current program address (kernel address). 1965 let pc = offset_of!(user_pt_regs, pc) + kreg_off; 1966 self.fd 1967 .lock() 1968 .unwrap() 1969 .set_one_reg( 1970 arm64_core_reg_id!(KVM_REG_SIZE_U64, pc), 1971 &boot_ip.to_le_bytes(), 1972 ) 1973 .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?; 1974 1975 // Last mandatory thing to set -> the address pointing to the FDT (also called DTB). 1976 // "The device tree blob (dtb) must be placed on an 8-byte boundary and must 1977 // not exceed 2 megabytes in size." -> https://www.kernel.org/doc/Documentation/arm64/booting.txt. 1978 // We are choosing to place it the end of DRAM. See `get_fdt_addr`. 1979 let regs0 = offset_of!(user_pt_regs, regs) + kreg_off; 1980 self.fd 1981 .lock() 1982 .unwrap() 1983 .set_one_reg( 1984 arm64_core_reg_id!(KVM_REG_SIZE_U64, regs0), 1985 &fdt_start.to_le_bytes(), 1986 ) 1987 .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?; 1988 } 1989 Ok(()) 1990 } 1991 1992 #[cfg(target_arch = "x86_64")] 1993 /// 1994 /// Get the current CPU state 1995 /// 1996 /// Ordering requirements: 1997 /// 1998 /// KVM_GET_MP_STATE calls kvm_apic_accept_events(), which might modify 1999 /// vCPU/LAPIC state. As such, it must be done before most everything 2000 /// else, otherwise we cannot restore everything and expect it to work. 2001 /// 2002 /// KVM_GET_VCPU_EVENTS/KVM_SET_VCPU_EVENTS is unsafe if other vCPUs are 2003 /// still running. 2004 /// 2005 /// KVM_GET_LAPIC may change state of LAPIC before returning it. 2006 /// 2007 /// GET_VCPU_EVENTS should probably be last to save. The code looks as 2008 /// it might as well be affected by internal state modifications of the 2009 /// GET ioctls. 2010 /// 2011 /// SREGS saves/restores a pending interrupt, similar to what 2012 /// VCPU_EVENTS also does. 2013 /// 2014 /// GET_MSRS requires a prepopulated data structure to do something 2015 /// meaningful. For SET_MSRS it will then contain good data. 2016 /// 2017 /// # Example 2018 /// 2019 /// ```rust 2020 /// # use hypervisor::kvm::KvmHypervisor; 2021 /// # use std::sync::Arc; 2022 /// let kvm = KvmHypervisor::new().unwrap(); 2023 /// let hv = Arc::new(kvm); 2024 /// let vm = hv.create_vm().expect("new VM fd creation failed"); 2025 /// vm.enable_split_irq().unwrap(); 2026 /// let vcpu = vm.create_vcpu(0, None).unwrap(); 2027 /// let state = vcpu.state().unwrap(); 2028 /// ``` 2029 fn state(&self) -> cpu::Result<CpuState> { 2030 let cpuid = self.get_cpuid2(kvm_bindings::KVM_MAX_CPUID_ENTRIES)?; 2031 let mp_state = self.get_mp_state()?.into(); 2032 let regs = self.get_regs()?; 2033 let sregs = self.get_sregs()?; 2034 let xsave = self.get_xsave()?; 2035 let xcrs = self.get_xcrs()?; 2036 let lapic_state = self.get_lapic()?; 2037 let fpu = self.get_fpu()?; 2038 2039 // Try to get all MSRs based on the list previously retrieved from KVM. 2040 // If the number of MSRs obtained from GET_MSRS is different from the 2041 // expected amount, we fallback onto a slower method by getting MSRs 2042 // by chunks. This is the only way to make sure we try to get as many 2043 // MSRs as possible, even if some MSRs are not supported. 2044 let mut msr_entries = self.msrs.clone(); 2045 2046 // Save extra MSRs if the Hyper-V synthetic interrupt controller is 2047 // emulated. 2048 if self.hyperv_synic.load(Ordering::Acquire) { 2049 let hyperv_synic_msrs = vec![ 2050 0x40000020, 0x40000021, 0x40000080, 0x40000081, 0x40000082, 0x40000083, 0x40000084, 2051 0x40000090, 0x40000091, 0x40000092, 0x40000093, 0x40000094, 0x40000095, 0x40000096, 2052 0x40000097, 0x40000098, 0x40000099, 0x4000009a, 0x4000009b, 0x4000009c, 0x4000009d, 2053 0x4000009e, 0x4000009f, 0x400000b0, 0x400000b1, 0x400000b2, 0x400000b3, 0x400000b4, 2054 0x400000b5, 0x400000b6, 0x400000b7, 2055 ]; 2056 for index in hyperv_synic_msrs { 2057 let msr = kvm_msr_entry { 2058 index, 2059 ..Default::default() 2060 }; 2061 msr_entries.push(msr.into()); 2062 } 2063 } 2064 2065 let expected_num_msrs = msr_entries.len(); 2066 let num_msrs = self.get_msrs(&mut msr_entries)?; 2067 let msrs = if num_msrs != expected_num_msrs { 2068 let mut faulty_msr_index = num_msrs; 2069 let mut msr_entries_tmp = msr_entries[..faulty_msr_index].to_vec(); 2070 2071 loop { 2072 warn!( 2073 "Detected faulty MSR 0x{:x} while getting MSRs", 2074 msr_entries[faulty_msr_index].index 2075 ); 2076 2077 // Skip the first bad MSR 2078 let start_pos = faulty_msr_index + 1; 2079 2080 let mut sub_msr_entries = msr_entries[start_pos..].to_vec(); 2081 let num_msrs = self.get_msrs(&mut sub_msr_entries)?; 2082 2083 msr_entries_tmp.extend(&sub_msr_entries[..num_msrs]); 2084 2085 if num_msrs == sub_msr_entries.len() { 2086 break; 2087 } 2088 2089 faulty_msr_index = start_pos + num_msrs; 2090 } 2091 2092 msr_entries_tmp 2093 } else { 2094 msr_entries 2095 }; 2096 2097 let vcpu_events = self.get_vcpu_events()?; 2098 let tsc_khz = self.tsc_khz()?; 2099 2100 Ok(VcpuKvmState { 2101 cpuid, 2102 msrs, 2103 vcpu_events, 2104 regs: regs.into(), 2105 sregs: sregs.into(), 2106 fpu, 2107 lapic_state, 2108 xsave, 2109 xcrs, 2110 mp_state, 2111 tsc_khz, 2112 } 2113 .into()) 2114 } 2115 2116 /// 2117 /// Get the current AArch64 CPU state 2118 /// 2119 #[cfg(target_arch = "aarch64")] 2120 fn state(&self) -> cpu::Result<CpuState> { 2121 let mut state = VcpuKvmState { 2122 mp_state: self.get_mp_state()?.into(), 2123 ..Default::default() 2124 }; 2125 // Get core registers 2126 state.core_regs = self.get_regs()?; 2127 2128 // Get systerm register 2129 // Call KVM_GET_REG_LIST to get all registers available to the guest. 2130 // For ArmV8 there are around 500 registers. 2131 let mut sys_regs: Vec<Register> = Vec::new(); 2132 let mut reg_list = RegList::new(500).unwrap(); 2133 self.fd 2134 .lock() 2135 .unwrap() 2136 .get_reg_list(&mut reg_list) 2137 .map_err(|e| cpu::HypervisorCpuError::GetRegList(e.into()))?; 2138 2139 // At this point reg_list should contain: core registers and system 2140 // registers. 2141 // The register list contains the number of registers and their ids. We 2142 // will be needing to call KVM_GET_ONE_REG on each id in order to save 2143 // all of them. We carve out from the list the core registers which are 2144 // represented in the kernel by kvm_regs structure and for which we can 2145 // calculate the id based on the offset in the structure. 2146 reg_list.retain(|regid| is_system_register(*regid)); 2147 2148 // Now, for the rest of the registers left in the previously fetched 2149 // register list, we are simply calling KVM_GET_ONE_REG. 2150 let indices = reg_list.as_slice(); 2151 for index in indices.iter() { 2152 let mut bytes = [0_u8; 8]; 2153 self.fd 2154 .lock() 2155 .unwrap() 2156 .get_one_reg(*index, &mut bytes) 2157 .map_err(|e| cpu::HypervisorCpuError::GetSysRegister(e.into()))?; 2158 sys_regs.push(kvm_bindings::kvm_one_reg { 2159 id: *index, 2160 addr: u64::from_le_bytes(bytes), 2161 }); 2162 } 2163 2164 state.sys_regs = sys_regs; 2165 2166 Ok(state.into()) 2167 } 2168 2169 #[cfg(target_arch = "x86_64")] 2170 /// 2171 /// Restore the previously saved CPU state 2172 /// 2173 /// Ordering requirements: 2174 /// 2175 /// KVM_GET_VCPU_EVENTS/KVM_SET_VCPU_EVENTS is unsafe if other vCPUs are 2176 /// still running. 2177 /// 2178 /// Some SET ioctls (like set_mp_state) depend on kvm_vcpu_is_bsp(), so 2179 /// if we ever change the BSP, we have to do that before restoring anything. 2180 /// The same seems to be true for CPUID stuff. 2181 /// 2182 /// SREGS saves/restores a pending interrupt, similar to what 2183 /// VCPU_EVENTS also does. 2184 /// 2185 /// SET_REGS clears pending exceptions unconditionally, thus, it must be 2186 /// done before SET_VCPU_EVENTS, which restores it. 2187 /// 2188 /// SET_LAPIC must come after SET_SREGS, because the latter restores 2189 /// the apic base msr. 2190 /// 2191 /// SET_LAPIC must come before SET_MSRS, because the TSC deadline MSR 2192 /// only restores successfully, when the LAPIC is correctly configured. 2193 /// 2194 /// Arguments: CpuState 2195 /// # Example 2196 /// 2197 /// ```rust 2198 /// # use hypervisor::kvm::KvmHypervisor; 2199 /// # use std::sync::Arc; 2200 /// let kvm = KvmHypervisor::new().unwrap(); 2201 /// let hv = Arc::new(kvm); 2202 /// let vm = hv.create_vm().expect("new VM fd creation failed"); 2203 /// vm.enable_split_irq().unwrap(); 2204 /// let vcpu = vm.create_vcpu(0, None).unwrap(); 2205 /// let state = vcpu.state().unwrap(); 2206 /// vcpu.set_state(&state).unwrap(); 2207 /// ``` 2208 fn set_state(&self, state: &CpuState) -> cpu::Result<()> { 2209 let state: VcpuKvmState = state.clone().into(); 2210 self.set_cpuid2(&state.cpuid)?; 2211 self.set_mp_state(state.mp_state.into())?; 2212 self.set_regs(&state.regs.into())?; 2213 self.set_sregs(&state.sregs.into())?; 2214 self.set_xsave(&state.xsave)?; 2215 self.set_xcrs(&state.xcrs)?; 2216 self.set_lapic(&state.lapic_state)?; 2217 self.set_fpu(&state.fpu)?; 2218 2219 if let Some(freq) = state.tsc_khz { 2220 self.set_tsc_khz(freq)?; 2221 } 2222 2223 // Try to set all MSRs previously stored. 2224 // If the number of MSRs set from SET_MSRS is different from the 2225 // expected amount, we fallback onto a slower method by setting MSRs 2226 // by chunks. This is the only way to make sure we try to set as many 2227 // MSRs as possible, even if some MSRs are not supported. 2228 let expected_num_msrs = state.msrs.len(); 2229 let num_msrs = self.set_msrs(&state.msrs)?; 2230 if num_msrs != expected_num_msrs { 2231 let mut faulty_msr_index = num_msrs; 2232 2233 loop { 2234 warn!( 2235 "Detected faulty MSR 0x{:x} while setting MSRs", 2236 state.msrs[faulty_msr_index].index 2237 ); 2238 2239 // Skip the first bad MSR 2240 let start_pos = faulty_msr_index + 1; 2241 2242 let sub_msr_entries = state.msrs[start_pos..].to_vec(); 2243 2244 let num_msrs = self.set_msrs(&sub_msr_entries)?; 2245 2246 if num_msrs == sub_msr_entries.len() { 2247 break; 2248 } 2249 2250 faulty_msr_index = start_pos + num_msrs; 2251 } 2252 } 2253 2254 self.set_vcpu_events(&state.vcpu_events)?; 2255 2256 Ok(()) 2257 } 2258 2259 /// 2260 /// Restore the previously saved AArch64 CPU state 2261 /// 2262 #[cfg(target_arch = "aarch64")] 2263 fn set_state(&self, state: &CpuState) -> cpu::Result<()> { 2264 let state: VcpuKvmState = state.clone().into(); 2265 // Set core registers 2266 self.set_regs(&state.core_regs)?; 2267 // Set system registers 2268 for reg in &state.sys_regs { 2269 self.fd 2270 .lock() 2271 .unwrap() 2272 .set_one_reg(reg.id, ®.addr.to_le_bytes()) 2273 .map_err(|e| cpu::HypervisorCpuError::SetSysRegister(e.into()))?; 2274 } 2275 2276 self.set_mp_state(state.mp_state.into())?; 2277 2278 Ok(()) 2279 } 2280 2281 /// 2282 /// Initialize TDX for this CPU 2283 /// 2284 #[cfg(feature = "tdx")] 2285 fn tdx_init(&self, hob_address: u64) -> cpu::Result<()> { 2286 tdx_command( 2287 &self.fd.lock().unwrap().as_raw_fd(), 2288 TdxCommand::InitVcpu, 2289 0, 2290 hob_address, 2291 ) 2292 .map_err(cpu::HypervisorCpuError::InitializeTdx) 2293 } 2294 2295 /// 2296 /// Set the "immediate_exit" state 2297 /// 2298 fn set_immediate_exit(&self, exit: bool) { 2299 self.fd.lock().unwrap().set_kvm_immediate_exit(exit.into()); 2300 } 2301 2302 /// 2303 /// Returns the details about TDX exit reason 2304 /// 2305 #[cfg(feature = "tdx")] 2306 fn get_tdx_exit_details(&mut self) -> cpu::Result<TdxExitDetails> { 2307 let mut fd = self.fd.as_ref().lock().unwrap(); 2308 let kvm_run = fd.get_kvm_run(); 2309 // SAFETY: accessing a union field in a valid structure 2310 let tdx_vmcall = unsafe { 2311 &mut (*((&mut kvm_run.__bindgen_anon_1) as *mut kvm_run__bindgen_ty_1 2312 as *mut KvmTdxExit)) 2313 .u 2314 .vmcall 2315 }; 2316 2317 tdx_vmcall.status_code = TDG_VP_VMCALL_INVALID_OPERAND; 2318 2319 if tdx_vmcall.type_ != 0 { 2320 return Err(cpu::HypervisorCpuError::UnknownTdxVmCall); 2321 } 2322 2323 match tdx_vmcall.subfunction { 2324 TDG_VP_VMCALL_GET_QUOTE => Ok(TdxExitDetails::GetQuote), 2325 TDG_VP_VMCALL_SETUP_EVENT_NOTIFY_INTERRUPT => { 2326 Ok(TdxExitDetails::SetupEventNotifyInterrupt) 2327 } 2328 _ => Err(cpu::HypervisorCpuError::UnknownTdxVmCall), 2329 } 2330 } 2331 2332 /// 2333 /// Set the status code for TDX exit 2334 /// 2335 #[cfg(feature = "tdx")] 2336 fn set_tdx_status(&mut self, status: TdxExitStatus) { 2337 let mut fd = self.fd.as_ref().lock().unwrap(); 2338 let kvm_run = fd.get_kvm_run(); 2339 // SAFETY: accessing a union field in a valid structure 2340 let tdx_vmcall = unsafe { 2341 &mut (*((&mut kvm_run.__bindgen_anon_1) as *mut kvm_run__bindgen_ty_1 2342 as *mut KvmTdxExit)) 2343 .u 2344 .vmcall 2345 }; 2346 2347 tdx_vmcall.status_code = match status { 2348 TdxExitStatus::Success => TDG_VP_VMCALL_SUCCESS, 2349 TdxExitStatus::InvalidOperand => TDG_VP_VMCALL_INVALID_OPERAND, 2350 }; 2351 } 2352 2353 #[cfg(target_arch = "x86_64")] 2354 /// 2355 /// Return the list of initial MSR entries for a VCPU 2356 /// 2357 fn boot_msr_entries(&self) -> Vec<MsrEntry> { 2358 use crate::arch::x86::{msr_index, MTRR_ENABLE, MTRR_MEM_TYPE_WB}; 2359 2360 [ 2361 msr!(msr_index::MSR_IA32_SYSENTER_CS), 2362 msr!(msr_index::MSR_IA32_SYSENTER_ESP), 2363 msr!(msr_index::MSR_IA32_SYSENTER_EIP), 2364 msr!(msr_index::MSR_STAR), 2365 msr!(msr_index::MSR_CSTAR), 2366 msr!(msr_index::MSR_LSTAR), 2367 msr!(msr_index::MSR_KERNEL_GS_BASE), 2368 msr!(msr_index::MSR_SYSCALL_MASK), 2369 msr!(msr_index::MSR_IA32_TSC), 2370 msr_data!( 2371 msr_index::MSR_IA32_MISC_ENABLE, 2372 msr_index::MSR_IA32_MISC_ENABLE_FAST_STRING as u64 2373 ), 2374 msr_data!(msr_index::MSR_MTRRdefType, MTRR_ENABLE | MTRR_MEM_TYPE_WB), 2375 ] 2376 .to_vec() 2377 } 2378 2379 #[cfg(target_arch = "aarch64")] 2380 fn has_pmu_support(&self) -> bool { 2381 let cpu_attr = kvm_bindings::kvm_device_attr { 2382 group: kvm_bindings::KVM_ARM_VCPU_PMU_V3_CTRL, 2383 attr: u64::from(kvm_bindings::KVM_ARM_VCPU_PMU_V3_INIT), 2384 addr: 0x0, 2385 flags: 0, 2386 }; 2387 self.fd.lock().unwrap().has_device_attr(&cpu_attr).is_ok() 2388 } 2389 2390 #[cfg(target_arch = "aarch64")] 2391 fn init_pmu(&self, irq: u32) -> cpu::Result<()> { 2392 let cpu_attr = kvm_bindings::kvm_device_attr { 2393 group: kvm_bindings::KVM_ARM_VCPU_PMU_V3_CTRL, 2394 attr: u64::from(kvm_bindings::KVM_ARM_VCPU_PMU_V3_INIT), 2395 addr: 0x0, 2396 flags: 0, 2397 }; 2398 let cpu_attr_irq = kvm_bindings::kvm_device_attr { 2399 group: kvm_bindings::KVM_ARM_VCPU_PMU_V3_CTRL, 2400 attr: u64::from(kvm_bindings::KVM_ARM_VCPU_PMU_V3_IRQ), 2401 addr: &irq as *const u32 as u64, 2402 flags: 0, 2403 }; 2404 self.fd 2405 .lock() 2406 .unwrap() 2407 .set_device_attr(&cpu_attr_irq) 2408 .map_err(|_| cpu::HypervisorCpuError::InitializePmu)?; 2409 self.fd 2410 .lock() 2411 .unwrap() 2412 .set_device_attr(&cpu_attr) 2413 .map_err(|_| cpu::HypervisorCpuError::InitializePmu) 2414 } 2415 2416 #[cfg(target_arch = "x86_64")] 2417 /// 2418 /// Get the frequency of the TSC if available 2419 /// 2420 fn tsc_khz(&self) -> cpu::Result<Option<u32>> { 2421 match self.fd.lock().unwrap().get_tsc_khz() { 2422 Err(e) => { 2423 if e.errno() == libc::EIO { 2424 Ok(None) 2425 } else { 2426 Err(cpu::HypervisorCpuError::GetTscKhz(e.into())) 2427 } 2428 } 2429 Ok(v) => Ok(Some(v)), 2430 } 2431 } 2432 2433 #[cfg(target_arch = "x86_64")] 2434 /// 2435 /// Set the frequency of the TSC if available 2436 /// 2437 fn set_tsc_khz(&self, freq: u32) -> cpu::Result<()> { 2438 match self.fd.lock().unwrap().set_tsc_khz(freq) { 2439 Err(e) => { 2440 if e.errno() == libc::EIO { 2441 Ok(()) 2442 } else { 2443 Err(cpu::HypervisorCpuError::SetTscKhz(e.into())) 2444 } 2445 } 2446 Ok(_) => Ok(()), 2447 } 2448 } 2449 2450 #[cfg(target_arch = "x86_64")] 2451 /// 2452 /// Trigger NMI interrupt 2453 /// 2454 fn nmi(&self) -> cpu::Result<()> { 2455 match self.fd.lock().unwrap().nmi() { 2456 Err(e) => { 2457 if e.errno() == libc::EIO { 2458 Ok(()) 2459 } else { 2460 Err(cpu::HypervisorCpuError::Nmi(e.into())) 2461 } 2462 } 2463 Ok(_) => Ok(()), 2464 } 2465 } 2466 } 2467 2468 impl KvmVcpu { 2469 #[cfg(target_arch = "x86_64")] 2470 /// 2471 /// X86 specific call that returns the vcpu's current "xsave struct". 2472 /// 2473 fn get_xsave(&self) -> cpu::Result<XsaveState> { 2474 Ok(self 2475 .fd 2476 .lock() 2477 .unwrap() 2478 .get_xsave() 2479 .map_err(|e| cpu::HypervisorCpuError::GetXsaveState(e.into()))? 2480 .into()) 2481 } 2482 2483 #[cfg(target_arch = "x86_64")] 2484 /// 2485 /// X86 specific call that sets the vcpu's current "xsave struct". 2486 /// 2487 fn set_xsave(&self, xsave: &XsaveState) -> cpu::Result<()> { 2488 let xsave: kvm_bindings::kvm_xsave = (*xsave).clone().into(); 2489 self.fd 2490 .lock() 2491 .unwrap() 2492 .set_xsave(&xsave) 2493 .map_err(|e| cpu::HypervisorCpuError::SetXsaveState(e.into())) 2494 } 2495 2496 #[cfg(target_arch = "x86_64")] 2497 /// 2498 /// X86 specific call that returns the vcpu's current "xcrs". 2499 /// 2500 fn get_xcrs(&self) -> cpu::Result<ExtendedControlRegisters> { 2501 self.fd 2502 .lock() 2503 .unwrap() 2504 .get_xcrs() 2505 .map_err(|e| cpu::HypervisorCpuError::GetXcsr(e.into())) 2506 } 2507 2508 #[cfg(target_arch = "x86_64")] 2509 /// 2510 /// X86 specific call that sets the vcpu's current "xcrs". 2511 /// 2512 fn set_xcrs(&self, xcrs: &ExtendedControlRegisters) -> cpu::Result<()> { 2513 self.fd 2514 .lock() 2515 .unwrap() 2516 .set_xcrs(xcrs) 2517 .map_err(|e| cpu::HypervisorCpuError::SetXcsr(e.into())) 2518 } 2519 2520 #[cfg(target_arch = "x86_64")] 2521 /// 2522 /// Returns currently pending exceptions, interrupts, and NMIs as well as related 2523 /// states of the vcpu. 2524 /// 2525 fn get_vcpu_events(&self) -> cpu::Result<VcpuEvents> { 2526 self.fd 2527 .lock() 2528 .unwrap() 2529 .get_vcpu_events() 2530 .map_err(|e| cpu::HypervisorCpuError::GetVcpuEvents(e.into())) 2531 } 2532 2533 #[cfg(target_arch = "x86_64")] 2534 /// 2535 /// Sets pending exceptions, interrupts, and NMIs as well as related states 2536 /// of the vcpu. 2537 /// 2538 fn set_vcpu_events(&self, events: &VcpuEvents) -> cpu::Result<()> { 2539 self.fd 2540 .lock() 2541 .unwrap() 2542 .set_vcpu_events(events) 2543 .map_err(|e| cpu::HypervisorCpuError::SetVcpuEvents(e.into())) 2544 } 2545 } 2546