1 // Copyright © 2019 Intel Corporation 2 // 3 // SPDX-License-Identifier: Apache-2.0 OR BSD-3-Clause 4 // 5 // Copyright © 2020, Microsoft Corporation 6 // 7 // Copyright 2018-2019 CrowdStrike, Inc. 8 // 9 // 10 11 use std::any::Any; 12 use std::collections::HashMap; 13 #[cfg(target_arch = "x86_64")] 14 use std::fs::File; 15 #[cfg(target_arch = "x86_64")] 16 use std::os::unix::io::AsRawFd; 17 #[cfg(feature = "tdx")] 18 use std::os::unix::io::RawFd; 19 use std::result; 20 #[cfg(target_arch = "x86_64")] 21 use std::sync::atomic::{AtomicBool, Ordering}; 22 use std::sync::Mutex; 23 use std::sync::{Arc, RwLock}; 24 25 use kvm_ioctls::{NoDatamatch, VcpuFd, VmFd}; 26 use vmm_sys_util::eventfd::EventFd; 27 28 #[cfg(target_arch = "aarch64")] 29 use crate::aarch64::gic::KvmGicV3Its; 30 #[cfg(target_arch = "aarch64")] 31 pub use crate::aarch64::{ 32 check_required_kvm_extensions, gic::Gicv3ItsState as GicState, is_system_register, VcpuInit, 33 VcpuKvmState, 34 }; 35 #[cfg(target_arch = "aarch64")] 36 use crate::arch::aarch64::gic::{Vgic, VgicConfig}; 37 use crate::cpu; 38 use crate::hypervisor; 39 use crate::vec_with_array_field; 40 use crate::vm::{self, InterruptSourceConfig, VmOps}; 41 use crate::HypervisorType; 42 #[cfg(target_arch = "aarch64")] 43 use crate::{arm64_core_reg_id, offset_of}; 44 // x86_64 dependencies 45 #[cfg(target_arch = "x86_64")] 46 pub mod x86_64; 47 #[cfg(target_arch = "aarch64")] 48 use aarch64::{RegList, Register}; 49 #[cfg(target_arch = "x86_64")] 50 use kvm_bindings::{ 51 kvm_enable_cap, kvm_msr_entry, MsrList, KVM_CAP_HYPERV_SYNIC, KVM_CAP_SPLIT_IRQCHIP, 52 KVM_GUESTDBG_USE_HW_BP, 53 }; 54 #[cfg(target_arch = "x86_64")] 55 use x86_64::check_required_kvm_extensions; 56 #[cfg(target_arch = "x86_64")] 57 pub use x86_64::{CpuId, ExtendedControlRegisters, MsrEntries, VcpuKvmState}; 58 59 #[cfg(target_arch = "x86_64")] 60 use crate::arch::x86::{ 61 CpuIdEntry, FpuState, LapicState, MsrEntry, SpecialRegisters, XsaveState, NUM_IOAPIC_PINS, 62 }; 63 #[cfg(target_arch = "x86_64")] 64 use crate::ClockData; 65 use crate::StandardRegisters; 66 use crate::{ 67 CpuState, IoEventAddress, IrqRoutingEntry, MpState, UserMemoryRegion, 68 USER_MEMORY_REGION_LOG_DIRTY, USER_MEMORY_REGION_READ, USER_MEMORY_REGION_WRITE, 69 }; 70 // aarch64 dependencies 71 #[cfg(target_arch = "aarch64")] 72 pub mod aarch64; 73 #[cfg(target_arch = "aarch64")] 74 use std::mem; 75 76 pub use kvm_bindings; 77 pub use kvm_bindings::{ 78 kvm_clock_data, kvm_create_device, kvm_device_type_KVM_DEV_TYPE_VFIO, kvm_guest_debug, 79 kvm_irq_routing, kvm_irq_routing_entry, kvm_mp_state, kvm_userspace_memory_region, 80 KVM_GUESTDBG_ENABLE, KVM_GUESTDBG_SINGLESTEP, KVM_IRQ_ROUTING_IRQCHIP, KVM_IRQ_ROUTING_MSI, 81 KVM_MEM_LOG_DIRTY_PAGES, KVM_MEM_READONLY, KVM_MSI_VALID_DEVID, 82 }; 83 #[cfg(target_arch = "aarch64")] 84 use kvm_bindings::{ 85 kvm_regs, user_fpsimd_state, user_pt_regs, KVM_GUESTDBG_USE_HW, KVM_NR_SPSR, KVM_REG_ARM64, 86 KVM_REG_ARM64_SYSREG, KVM_REG_ARM64_SYSREG_CRM_MASK, KVM_REG_ARM64_SYSREG_CRN_MASK, 87 KVM_REG_ARM64_SYSREG_OP0_MASK, KVM_REG_ARM64_SYSREG_OP1_MASK, KVM_REG_ARM64_SYSREG_OP2_MASK, 88 KVM_REG_ARM_CORE, KVM_REG_SIZE_U128, KVM_REG_SIZE_U32, KVM_REG_SIZE_U64, 89 }; 90 #[cfg(feature = "tdx")] 91 use kvm_bindings::{kvm_run__bindgen_ty_1, KVMIO}; 92 pub use kvm_ioctls; 93 pub use kvm_ioctls::{Cap, Kvm}; 94 use thiserror::Error; 95 use vfio_ioctls::VfioDeviceFd; 96 #[cfg(feature = "tdx")] 97 use vmm_sys_util::{ioctl::ioctl_with_val, ioctl_ioc_nr, ioctl_iowr_nr}; 98 /// 99 /// Export generically-named wrappers of kvm-bindings for Unix-based platforms 100 /// 101 pub use { 102 kvm_bindings::kvm_create_device as CreateDevice, kvm_bindings::kvm_device_attr as DeviceAttr, 103 kvm_bindings::kvm_run, kvm_bindings::kvm_vcpu_events as VcpuEvents, kvm_ioctls::VcpuExit, 104 }; 105 106 #[cfg(target_arch = "x86_64")] 107 const KVM_CAP_SGX_ATTRIBUTE: u32 = 196; 108 109 #[cfg(target_arch = "x86_64")] 110 use vmm_sys_util::ioctl_io_nr; 111 #[cfg(all(not(feature = "tdx"), target_arch = "x86_64"))] 112 use vmm_sys_util::ioctl_ioc_nr; 113 114 #[cfg(target_arch = "x86_64")] 115 ioctl_io_nr!(KVM_NMI, kvm_bindings::KVMIO, 0x9a); 116 117 #[cfg(feature = "tdx")] 118 const KVM_EXIT_TDX: u32 = 50; 119 #[cfg(feature = "tdx")] 120 const TDG_VP_VMCALL_GET_QUOTE: u64 = 0x10002; 121 #[cfg(feature = "tdx")] 122 const TDG_VP_VMCALL_SETUP_EVENT_NOTIFY_INTERRUPT: u64 = 0x10004; 123 #[cfg(feature = "tdx")] 124 const TDG_VP_VMCALL_SUCCESS: u64 = 0; 125 #[cfg(feature = "tdx")] 126 const TDG_VP_VMCALL_INVALID_OPERAND: u64 = 0x8000000000000000; 127 128 #[cfg(feature = "tdx")] 129 ioctl_iowr_nr!(KVM_MEMORY_ENCRYPT_OP, KVMIO, 0xba, std::os::raw::c_ulong); 130 131 #[cfg(feature = "tdx")] 132 #[repr(u32)] 133 enum TdxCommand { 134 Capabilities = 0, 135 InitVm, 136 InitVcpu, 137 InitMemRegion, 138 Finalize, 139 } 140 141 #[cfg(feature = "tdx")] 142 pub enum TdxExitDetails { 143 GetQuote, 144 SetupEventNotifyInterrupt, 145 } 146 147 #[cfg(feature = "tdx")] 148 pub enum TdxExitStatus { 149 Success, 150 InvalidOperand, 151 } 152 153 #[cfg(feature = "tdx")] 154 const TDX_MAX_NR_CPUID_CONFIGS: usize = 6; 155 156 #[cfg(feature = "tdx")] 157 #[repr(C)] 158 #[derive(Debug, Default)] 159 pub struct TdxCpuidConfig { 160 pub leaf: u32, 161 pub sub_leaf: u32, 162 pub eax: u32, 163 pub ebx: u32, 164 pub ecx: u32, 165 pub edx: u32, 166 } 167 168 #[cfg(feature = "tdx")] 169 #[repr(C)] 170 #[derive(Debug, Default)] 171 pub struct TdxCapabilities { 172 pub attrs_fixed0: u64, 173 pub attrs_fixed1: u64, 174 pub xfam_fixed0: u64, 175 pub xfam_fixed1: u64, 176 pub nr_cpuid_configs: u32, 177 pub padding: u32, 178 pub cpuid_configs: [TdxCpuidConfig; TDX_MAX_NR_CPUID_CONFIGS], 179 } 180 181 #[cfg(feature = "tdx")] 182 #[derive(Copy, Clone)] 183 pub struct KvmTdxExit { 184 pub type_: u32, 185 pub pad: u32, 186 pub u: KvmTdxExitU, 187 } 188 189 #[cfg(feature = "tdx")] 190 #[repr(C)] 191 #[derive(Copy, Clone)] 192 pub union KvmTdxExitU { 193 pub vmcall: KvmTdxExitVmcall, 194 } 195 196 #[cfg(feature = "tdx")] 197 #[repr(C)] 198 #[derive(Debug, Default, Copy, Clone, PartialEq)] 199 pub struct KvmTdxExitVmcall { 200 pub type_: u64, 201 pub subfunction: u64, 202 pub reg_mask: u64, 203 pub in_r12: u64, 204 pub in_r13: u64, 205 pub in_r14: u64, 206 pub in_r15: u64, 207 pub in_rbx: u64, 208 pub in_rdi: u64, 209 pub in_rsi: u64, 210 pub in_r8: u64, 211 pub in_r9: u64, 212 pub in_rdx: u64, 213 pub status_code: u64, 214 pub out_r11: u64, 215 pub out_r12: u64, 216 pub out_r13: u64, 217 pub out_r14: u64, 218 pub out_r15: u64, 219 pub out_rbx: u64, 220 pub out_rdi: u64, 221 pub out_rsi: u64, 222 pub out_r8: u64, 223 pub out_r9: u64, 224 pub out_rdx: u64, 225 } 226 227 impl From<kvm_userspace_memory_region> for UserMemoryRegion { 228 fn from(region: kvm_userspace_memory_region) -> Self { 229 let mut flags = USER_MEMORY_REGION_READ; 230 if region.flags & KVM_MEM_READONLY == 0 { 231 flags |= USER_MEMORY_REGION_WRITE; 232 } 233 if region.flags & KVM_MEM_LOG_DIRTY_PAGES != 0 { 234 flags |= USER_MEMORY_REGION_LOG_DIRTY; 235 } 236 237 UserMemoryRegion { 238 slot: region.slot, 239 guest_phys_addr: region.guest_phys_addr, 240 memory_size: region.memory_size, 241 userspace_addr: region.userspace_addr, 242 flags, 243 } 244 } 245 } 246 247 impl From<UserMemoryRegion> for kvm_userspace_memory_region { 248 fn from(region: UserMemoryRegion) -> Self { 249 assert!( 250 region.flags & USER_MEMORY_REGION_READ != 0, 251 "KVM mapped memory is always readable" 252 ); 253 254 let mut flags = 0; 255 if region.flags & USER_MEMORY_REGION_WRITE == 0 { 256 flags |= KVM_MEM_READONLY; 257 } 258 if region.flags & USER_MEMORY_REGION_LOG_DIRTY != 0 { 259 flags |= KVM_MEM_LOG_DIRTY_PAGES; 260 } 261 262 kvm_userspace_memory_region { 263 slot: region.slot, 264 guest_phys_addr: region.guest_phys_addr, 265 memory_size: region.memory_size, 266 userspace_addr: region.userspace_addr, 267 flags, 268 } 269 } 270 } 271 272 impl From<kvm_mp_state> for MpState { 273 fn from(s: kvm_mp_state) -> Self { 274 MpState::Kvm(s) 275 } 276 } 277 278 impl From<MpState> for kvm_mp_state { 279 fn from(ms: MpState) -> Self { 280 match ms { 281 MpState::Kvm(s) => s, 282 /* Needed in case other hypervisors are enabled */ 283 #[allow(unreachable_patterns)] 284 _ => panic!("CpuState is not valid"), 285 } 286 } 287 } 288 289 impl From<kvm_ioctls::IoEventAddress> for IoEventAddress { 290 fn from(a: kvm_ioctls::IoEventAddress) -> Self { 291 match a { 292 kvm_ioctls::IoEventAddress::Pio(x) => Self::Pio(x), 293 kvm_ioctls::IoEventAddress::Mmio(x) => Self::Mmio(x), 294 } 295 } 296 } 297 298 impl From<IoEventAddress> for kvm_ioctls::IoEventAddress { 299 fn from(a: IoEventAddress) -> Self { 300 match a { 301 IoEventAddress::Pio(x) => Self::Pio(x), 302 IoEventAddress::Mmio(x) => Self::Mmio(x), 303 } 304 } 305 } 306 307 impl From<VcpuKvmState> for CpuState { 308 fn from(s: VcpuKvmState) -> Self { 309 CpuState::Kvm(s) 310 } 311 } 312 313 impl From<CpuState> for VcpuKvmState { 314 fn from(s: CpuState) -> Self { 315 match s { 316 CpuState::Kvm(s) => s, 317 /* Needed in case other hypervisors are enabled */ 318 #[allow(unreachable_patterns)] 319 _ => panic!("CpuState is not valid"), 320 } 321 } 322 } 323 324 #[cfg(target_arch = "x86_64")] 325 impl From<kvm_clock_data> for ClockData { 326 fn from(d: kvm_clock_data) -> Self { 327 ClockData::Kvm(d) 328 } 329 } 330 331 #[cfg(target_arch = "x86_64")] 332 impl From<ClockData> for kvm_clock_data { 333 fn from(ms: ClockData) -> Self { 334 match ms { 335 ClockData::Kvm(s) => s, 336 /* Needed in case other hypervisors are enabled */ 337 #[allow(unreachable_patterns)] 338 _ => panic!("CpuState is not valid"), 339 } 340 } 341 } 342 343 impl From<kvm_bindings::kvm_regs> for crate::StandardRegisters { 344 fn from(s: kvm_bindings::kvm_regs) -> Self { 345 crate::StandardRegisters::Kvm(s) 346 } 347 } 348 349 impl From<crate::StandardRegisters> for kvm_bindings::kvm_regs { 350 fn from(e: crate::StandardRegisters) -> Self { 351 match e { 352 crate::StandardRegisters::Kvm(e) => e, 353 /* Needed in case other hypervisors are enabled */ 354 #[allow(unreachable_patterns)] 355 _ => panic!("StandardRegisters are not valid"), 356 } 357 } 358 } 359 360 impl From<kvm_irq_routing_entry> for IrqRoutingEntry { 361 fn from(s: kvm_irq_routing_entry) -> Self { 362 IrqRoutingEntry::Kvm(s) 363 } 364 } 365 366 impl From<IrqRoutingEntry> for kvm_irq_routing_entry { 367 fn from(e: IrqRoutingEntry) -> Self { 368 match e { 369 IrqRoutingEntry::Kvm(e) => e, 370 /* Needed in case other hypervisors are enabled */ 371 #[allow(unreachable_patterns)] 372 _ => panic!("IrqRoutingEntry is not valid"), 373 } 374 } 375 } 376 377 struct KvmDirtyLogSlot { 378 slot: u32, 379 guest_phys_addr: u64, 380 memory_size: u64, 381 userspace_addr: u64, 382 } 383 384 /// Wrapper over KVM VM ioctls. 385 pub struct KvmVm { 386 fd: Arc<VmFd>, 387 #[cfg(target_arch = "x86_64")] 388 msrs: Vec<MsrEntry>, 389 dirty_log_slots: Arc<RwLock<HashMap<u32, KvmDirtyLogSlot>>>, 390 } 391 392 impl KvmVm { 393 /// 394 /// Creates an emulated device in the kernel. 395 /// 396 /// See the documentation for `KVM_CREATE_DEVICE`. 397 fn create_device(&self, device: &mut CreateDevice) -> vm::Result<vfio_ioctls::VfioDeviceFd> { 398 let device_fd = self 399 .fd 400 .create_device(device) 401 .map_err(|e| vm::HypervisorVmError::CreateDevice(e.into()))?; 402 Ok(VfioDeviceFd::new_from_kvm(device_fd)) 403 } 404 /// Checks if a particular `Cap` is available. 405 pub fn check_extension(&self, c: Cap) -> bool { 406 self.fd.check_extension(c) 407 } 408 } 409 410 /// Implementation of Vm trait for KVM 411 /// 412 /// # Examples 413 /// 414 /// ``` 415 /// # use hypervisor::kvm::KvmHypervisor; 416 /// # use std::sync::Arc; 417 /// let kvm = KvmHypervisor::new().unwrap(); 418 /// let hypervisor = Arc::new(kvm); 419 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed"); 420 /// ``` 421 impl vm::Vm for KvmVm { 422 #[cfg(target_arch = "x86_64")] 423 /// 424 /// Sets the address of the one-page region in the VM's address space. 425 /// 426 fn set_identity_map_address(&self, address: u64) -> vm::Result<()> { 427 self.fd 428 .set_identity_map_address(address) 429 .map_err(|e| vm::HypervisorVmError::SetIdentityMapAddress(e.into())) 430 } 431 432 #[cfg(target_arch = "x86_64")] 433 /// 434 /// Sets the address of the three-page region in the VM's address space. 435 /// 436 fn set_tss_address(&self, offset: usize) -> vm::Result<()> { 437 self.fd 438 .set_tss_address(offset) 439 .map_err(|e| vm::HypervisorVmError::SetTssAddress(e.into())) 440 } 441 442 /// 443 /// Creates an in-kernel interrupt controller. 444 /// 445 fn create_irq_chip(&self) -> vm::Result<()> { 446 self.fd 447 .create_irq_chip() 448 .map_err(|e| vm::HypervisorVmError::CreateIrq(e.into())) 449 } 450 451 /// 452 /// Registers an event that will, when signaled, trigger the `gsi` IRQ. 453 /// 454 fn register_irqfd(&self, fd: &EventFd, gsi: u32) -> vm::Result<()> { 455 self.fd 456 .register_irqfd(fd, gsi) 457 .map_err(|e| vm::HypervisorVmError::RegisterIrqFd(e.into())) 458 } 459 460 /// 461 /// Unregisters an event that will, when signaled, trigger the `gsi` IRQ. 462 /// 463 fn unregister_irqfd(&self, fd: &EventFd, gsi: u32) -> vm::Result<()> { 464 self.fd 465 .unregister_irqfd(fd, gsi) 466 .map_err(|e| vm::HypervisorVmError::UnregisterIrqFd(e.into())) 467 } 468 469 /// 470 /// Creates a VcpuFd object from a vcpu RawFd. 471 /// 472 fn create_vcpu( 473 &self, 474 id: u8, 475 vm_ops: Option<Arc<dyn VmOps>>, 476 ) -> vm::Result<Arc<dyn cpu::Vcpu>> { 477 let fd = self 478 .fd 479 .create_vcpu(id as u64) 480 .map_err(|e| vm::HypervisorVmError::CreateVcpu(e.into()))?; 481 let vcpu = KvmVcpu { 482 fd: Arc::new(Mutex::new(fd)), 483 #[cfg(target_arch = "x86_64")] 484 msrs: self.msrs.clone(), 485 vm_ops, 486 #[cfg(target_arch = "x86_64")] 487 hyperv_synic: AtomicBool::new(false), 488 }; 489 Ok(Arc::new(vcpu)) 490 } 491 492 #[cfg(target_arch = "aarch64")] 493 /// 494 /// Creates a virtual GIC device. 495 /// 496 fn create_vgic(&self, config: VgicConfig) -> vm::Result<Arc<Mutex<dyn Vgic>>> { 497 let gic_device = KvmGicV3Its::new(self, config) 498 .map_err(|e| vm::HypervisorVmError::CreateVgic(anyhow!("Vgic error {:?}", e)))?; 499 Ok(Arc::new(Mutex::new(gic_device))) 500 } 501 502 /// 503 /// Registers an event to be signaled whenever a certain address is written to. 504 /// 505 fn register_ioevent( 506 &self, 507 fd: &EventFd, 508 addr: &IoEventAddress, 509 datamatch: Option<vm::DataMatch>, 510 ) -> vm::Result<()> { 511 let addr = &kvm_ioctls::IoEventAddress::from(*addr); 512 if let Some(dm) = datamatch { 513 match dm { 514 vm::DataMatch::DataMatch32(kvm_dm32) => self 515 .fd 516 .register_ioevent(fd, addr, kvm_dm32) 517 .map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into())), 518 vm::DataMatch::DataMatch64(kvm_dm64) => self 519 .fd 520 .register_ioevent(fd, addr, kvm_dm64) 521 .map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into())), 522 } 523 } else { 524 self.fd 525 .register_ioevent(fd, addr, NoDatamatch) 526 .map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into())) 527 } 528 } 529 530 /// 531 /// Unregisters an event from a certain address it has been previously registered to. 532 /// 533 fn unregister_ioevent(&self, fd: &EventFd, addr: &IoEventAddress) -> vm::Result<()> { 534 let addr = &kvm_ioctls::IoEventAddress::from(*addr); 535 self.fd 536 .unregister_ioevent(fd, addr, NoDatamatch) 537 .map_err(|e| vm::HypervisorVmError::UnregisterIoEvent(e.into())) 538 } 539 540 /// 541 /// Constructs a routing entry 542 /// 543 fn make_routing_entry(&self, gsi: u32, config: &InterruptSourceConfig) -> IrqRoutingEntry { 544 match &config { 545 InterruptSourceConfig::MsiIrq(cfg) => { 546 let mut kvm_route = kvm_irq_routing_entry { 547 gsi, 548 type_: KVM_IRQ_ROUTING_MSI, 549 ..Default::default() 550 }; 551 552 kvm_route.u.msi.address_lo = cfg.low_addr; 553 kvm_route.u.msi.address_hi = cfg.high_addr; 554 kvm_route.u.msi.data = cfg.data; 555 556 if self.check_extension(crate::kvm::Cap::MsiDevid) { 557 // On AArch64, there is limitation on the range of the 'devid', 558 // it cannot be greater than 65536 (the max of u16). 559 // 560 // BDF cannot be used directly, because 'segment' is in high 561 // 16 bits. The layout of the u32 BDF is: 562 // |---- 16 bits ----|-- 8 bits --|-- 5 bits --|-- 3 bits --| 563 // | segment | bus | device | function | 564 // 565 // Now that we support 1 bus only in a segment, we can build a 566 // 'devid' by replacing the 'bus' bits with the low 8 bits of 567 // 'segment' data. 568 // This way we can resolve the range checking problem and give 569 // different `devid` to all the devices. Limitation is that at 570 // most 256 segments can be supported. 571 // 572 let modified_devid = (cfg.devid & 0x00ff_0000) >> 8 | cfg.devid & 0xff; 573 574 kvm_route.flags = KVM_MSI_VALID_DEVID; 575 kvm_route.u.msi.__bindgen_anon_1.devid = modified_devid; 576 } 577 kvm_route.into() 578 } 579 InterruptSourceConfig::LegacyIrq(cfg) => { 580 let mut kvm_route = kvm_irq_routing_entry { 581 gsi, 582 type_: KVM_IRQ_ROUTING_IRQCHIP, 583 ..Default::default() 584 }; 585 kvm_route.u.irqchip.irqchip = cfg.irqchip; 586 kvm_route.u.irqchip.pin = cfg.pin; 587 588 kvm_route.into() 589 } 590 } 591 } 592 593 /// 594 /// Sets the GSI routing table entries, overwriting any previously set 595 /// entries, as per the `KVM_SET_GSI_ROUTING` ioctl. 596 /// 597 fn set_gsi_routing(&self, entries: &[IrqRoutingEntry]) -> vm::Result<()> { 598 let mut irq_routing = 599 vec_with_array_field::<kvm_irq_routing, kvm_irq_routing_entry>(entries.len()); 600 irq_routing[0].nr = entries.len() as u32; 601 irq_routing[0].flags = 0; 602 let entries: Vec<kvm_irq_routing_entry> = entries 603 .iter() 604 .map(|entry| match entry { 605 IrqRoutingEntry::Kvm(e) => *e, 606 #[allow(unreachable_patterns)] 607 _ => panic!("IrqRoutingEntry type is wrong"), 608 }) 609 .collect(); 610 611 // SAFETY: irq_routing initialized with entries.len() and now it is being turned into 612 // entries_slice with entries.len() again. It is guaranteed to be large enough to hold 613 // everything from entries. 614 unsafe { 615 let entries_slice: &mut [kvm_irq_routing_entry] = 616 irq_routing[0].entries.as_mut_slice(entries.len()); 617 entries_slice.copy_from_slice(&entries); 618 } 619 620 self.fd 621 .set_gsi_routing(&irq_routing[0]) 622 .map_err(|e| vm::HypervisorVmError::SetGsiRouting(e.into())) 623 } 624 625 /// 626 /// Creates a memory region structure that can be used with {create/remove}_user_memory_region 627 /// 628 fn make_user_memory_region( 629 &self, 630 slot: u32, 631 guest_phys_addr: u64, 632 memory_size: u64, 633 userspace_addr: u64, 634 readonly: bool, 635 log_dirty_pages: bool, 636 ) -> UserMemoryRegion { 637 kvm_userspace_memory_region { 638 slot, 639 guest_phys_addr, 640 memory_size, 641 userspace_addr, 642 flags: if readonly { KVM_MEM_READONLY } else { 0 } 643 | if log_dirty_pages { 644 KVM_MEM_LOG_DIRTY_PAGES 645 } else { 646 0 647 }, 648 } 649 .into() 650 } 651 652 /// 653 /// Creates a guest physical memory region. 654 /// 655 fn create_user_memory_region(&self, user_memory_region: UserMemoryRegion) -> vm::Result<()> { 656 let mut region: kvm_userspace_memory_region = user_memory_region.into(); 657 658 if (region.flags & KVM_MEM_LOG_DIRTY_PAGES) != 0 { 659 if (region.flags & KVM_MEM_READONLY) != 0 { 660 return Err(vm::HypervisorVmError::CreateUserMemory(anyhow!( 661 "Error creating regions with both 'dirty-pages-log' and 'read-only'." 662 ))); 663 } 664 665 // Keep track of the regions that need dirty pages log 666 self.dirty_log_slots.write().unwrap().insert( 667 region.slot, 668 KvmDirtyLogSlot { 669 slot: region.slot, 670 guest_phys_addr: region.guest_phys_addr, 671 memory_size: region.memory_size, 672 userspace_addr: region.userspace_addr, 673 }, 674 ); 675 676 // Always create guest physical memory region without `KVM_MEM_LOG_DIRTY_PAGES`. 677 // For regions that need this flag, dirty pages log will be turned on in `start_dirty_log`. 678 region.flags = 0; 679 } 680 681 // SAFETY: Safe because guest regions are guaranteed not to overlap. 682 unsafe { 683 self.fd 684 .set_user_memory_region(region) 685 .map_err(|e| vm::HypervisorVmError::CreateUserMemory(e.into())) 686 } 687 } 688 689 /// 690 /// Removes a guest physical memory region. 691 /// 692 fn remove_user_memory_region(&self, user_memory_region: UserMemoryRegion) -> vm::Result<()> { 693 let mut region: kvm_userspace_memory_region = user_memory_region.into(); 694 695 // Remove the corresponding entry from "self.dirty_log_slots" if needed 696 self.dirty_log_slots.write().unwrap().remove(®ion.slot); 697 698 // Setting the size to 0 means "remove" 699 region.memory_size = 0; 700 // SAFETY: Safe because guest regions are guaranteed not to overlap. 701 unsafe { 702 self.fd 703 .set_user_memory_region(region) 704 .map_err(|e| vm::HypervisorVmError::RemoveUserMemory(e.into())) 705 } 706 } 707 708 /// 709 /// Returns the preferred CPU target type which can be emulated by KVM on underlying host. 710 /// 711 #[cfg(target_arch = "aarch64")] 712 fn get_preferred_target(&self, kvi: &mut VcpuInit) -> vm::Result<()> { 713 self.fd 714 .get_preferred_target(kvi) 715 .map_err(|e| vm::HypervisorVmError::GetPreferredTarget(e.into())) 716 } 717 718 #[cfg(target_arch = "x86_64")] 719 fn enable_split_irq(&self) -> vm::Result<()> { 720 // Create split irqchip 721 // Only the local APIC is emulated in kernel, both PICs and IOAPIC 722 // are not. 723 let mut cap = kvm_enable_cap { 724 cap: KVM_CAP_SPLIT_IRQCHIP, 725 ..Default::default() 726 }; 727 cap.args[0] = NUM_IOAPIC_PINS as u64; 728 self.fd 729 .enable_cap(&cap) 730 .map_err(|e| vm::HypervisorVmError::EnableSplitIrq(e.into()))?; 731 Ok(()) 732 } 733 734 #[cfg(target_arch = "x86_64")] 735 fn enable_sgx_attribute(&self, file: File) -> vm::Result<()> { 736 let mut cap = kvm_enable_cap { 737 cap: KVM_CAP_SGX_ATTRIBUTE, 738 ..Default::default() 739 }; 740 cap.args[0] = file.as_raw_fd() as u64; 741 self.fd 742 .enable_cap(&cap) 743 .map_err(|e| vm::HypervisorVmError::EnableSgxAttribute(e.into()))?; 744 Ok(()) 745 } 746 747 /// Retrieve guest clock. 748 #[cfg(target_arch = "x86_64")] 749 fn get_clock(&self) -> vm::Result<ClockData> { 750 Ok(self 751 .fd 752 .get_clock() 753 .map_err(|e| vm::HypervisorVmError::GetClock(e.into()))? 754 .into()) 755 } 756 757 /// Set guest clock. 758 #[cfg(target_arch = "x86_64")] 759 fn set_clock(&self, data: &ClockData) -> vm::Result<()> { 760 let data = (*data).into(); 761 self.fd 762 .set_clock(&data) 763 .map_err(|e| vm::HypervisorVmError::SetClock(e.into())) 764 } 765 766 /// Create a device that is used for passthrough 767 fn create_passthrough_device(&self) -> vm::Result<VfioDeviceFd> { 768 let mut vfio_dev = kvm_create_device { 769 type_: kvm_device_type_KVM_DEV_TYPE_VFIO, 770 fd: 0, 771 flags: 0, 772 }; 773 774 self.create_device(&mut vfio_dev) 775 .map_err(|e| vm::HypervisorVmError::CreatePassthroughDevice(e.into())) 776 } 777 778 /// 779 /// Start logging dirty pages 780 /// 781 fn start_dirty_log(&self) -> vm::Result<()> { 782 let dirty_log_slots = self.dirty_log_slots.read().unwrap(); 783 for (_, s) in dirty_log_slots.iter() { 784 let region = kvm_userspace_memory_region { 785 slot: s.slot, 786 guest_phys_addr: s.guest_phys_addr, 787 memory_size: s.memory_size, 788 userspace_addr: s.userspace_addr, 789 flags: KVM_MEM_LOG_DIRTY_PAGES, 790 }; 791 // SAFETY: Safe because guest regions are guaranteed not to overlap. 792 unsafe { 793 self.fd 794 .set_user_memory_region(region) 795 .map_err(|e| vm::HypervisorVmError::StartDirtyLog(e.into()))?; 796 } 797 } 798 799 Ok(()) 800 } 801 802 /// 803 /// Stop logging dirty pages 804 /// 805 fn stop_dirty_log(&self) -> vm::Result<()> { 806 let dirty_log_slots = self.dirty_log_slots.read().unwrap(); 807 for (_, s) in dirty_log_slots.iter() { 808 let region = kvm_userspace_memory_region { 809 slot: s.slot, 810 guest_phys_addr: s.guest_phys_addr, 811 memory_size: s.memory_size, 812 userspace_addr: s.userspace_addr, 813 flags: 0, 814 }; 815 // SAFETY: Safe because guest regions are guaranteed not to overlap. 816 unsafe { 817 self.fd 818 .set_user_memory_region(region) 819 .map_err(|e| vm::HypervisorVmError::StartDirtyLog(e.into()))?; 820 } 821 } 822 823 Ok(()) 824 } 825 826 /// 827 /// Get dirty pages bitmap (one bit per page) 828 /// 829 fn get_dirty_log(&self, slot: u32, _base_gpa: u64, memory_size: u64) -> vm::Result<Vec<u64>> { 830 self.fd 831 .get_dirty_log(slot, memory_size as usize) 832 .map_err(|e| vm::HypervisorVmError::GetDirtyLog(e.into())) 833 } 834 835 /// 836 /// Initialize TDX for this VM 837 /// 838 #[cfg(feature = "tdx")] 839 fn tdx_init(&self, cpuid: &[CpuIdEntry], max_vcpus: u32) -> vm::Result<()> { 840 const TDX_ATTR_SEPT_VE_DISABLE: usize = 28; 841 842 let mut cpuid: Vec<kvm_bindings::kvm_cpuid_entry2> = 843 cpuid.iter().map(|e| (*e).into()).collect(); 844 cpuid.resize(256, kvm_bindings::kvm_cpuid_entry2::default()); 845 846 #[repr(C)] 847 struct TdxInitVm { 848 attributes: u64, 849 max_vcpus: u32, 850 padding: u32, 851 mrconfigid: [u64; 6], 852 mrowner: [u64; 6], 853 mrownerconfig: [u64; 6], 854 cpuid_nent: u32, 855 cpuid_padding: u32, 856 cpuid_entries: [kvm_bindings::kvm_cpuid_entry2; 256], 857 } 858 let data = TdxInitVm { 859 attributes: 1 << TDX_ATTR_SEPT_VE_DISABLE, 860 max_vcpus, 861 padding: 0, 862 mrconfigid: [0; 6], 863 mrowner: [0; 6], 864 mrownerconfig: [0; 6], 865 cpuid_nent: cpuid.len() as u32, 866 cpuid_padding: 0, 867 cpuid_entries: cpuid.as_slice().try_into().unwrap(), 868 }; 869 870 tdx_command( 871 &self.fd.as_raw_fd(), 872 TdxCommand::InitVm, 873 0, 874 &data as *const _ as u64, 875 ) 876 .map_err(vm::HypervisorVmError::InitializeTdx) 877 } 878 879 /// 880 /// Finalize the TDX setup for this VM 881 /// 882 #[cfg(feature = "tdx")] 883 fn tdx_finalize(&self) -> vm::Result<()> { 884 tdx_command(&self.fd.as_raw_fd(), TdxCommand::Finalize, 0, 0) 885 .map_err(vm::HypervisorVmError::FinalizeTdx) 886 } 887 888 /// 889 /// Initialize memory regions for the TDX VM 890 /// 891 #[cfg(feature = "tdx")] 892 fn tdx_init_memory_region( 893 &self, 894 host_address: u64, 895 guest_address: u64, 896 size: u64, 897 measure: bool, 898 ) -> vm::Result<()> { 899 #[repr(C)] 900 struct TdxInitMemRegion { 901 host_address: u64, 902 guest_address: u64, 903 pages: u64, 904 } 905 let data = TdxInitMemRegion { 906 host_address, 907 guest_address, 908 pages: size / 4096, 909 }; 910 911 tdx_command( 912 &self.fd.as_raw_fd(), 913 TdxCommand::InitMemRegion, 914 u32::from(measure), 915 &data as *const _ as u64, 916 ) 917 .map_err(vm::HypervisorVmError::InitMemRegionTdx) 918 } 919 920 /// Downcast to the underlying KvmVm type 921 fn as_any(&self) -> &dyn Any { 922 self 923 } 924 } 925 926 #[cfg(feature = "tdx")] 927 fn tdx_command( 928 fd: &RawFd, 929 command: TdxCommand, 930 flags: u32, 931 data: u64, 932 ) -> std::result::Result<(), std::io::Error> { 933 #[repr(C)] 934 struct TdxIoctlCmd { 935 command: TdxCommand, 936 flags: u32, 937 data: u64, 938 error: u64, 939 unused: u64, 940 } 941 let cmd = TdxIoctlCmd { 942 command, 943 flags, 944 data, 945 error: 0, 946 unused: 0, 947 }; 948 // SAFETY: FFI call. All input parameters are valid. 949 let ret = unsafe { 950 ioctl_with_val( 951 fd, 952 KVM_MEMORY_ENCRYPT_OP(), 953 &cmd as *const TdxIoctlCmd as std::os::raw::c_ulong, 954 ) 955 }; 956 957 if ret < 0 { 958 return Err(std::io::Error::last_os_error()); 959 } 960 Ok(()) 961 } 962 963 /// Wrapper over KVM system ioctls. 964 pub struct KvmHypervisor { 965 kvm: Kvm, 966 } 967 968 impl KvmHypervisor { 969 #[cfg(target_arch = "x86_64")] 970 /// 971 /// Retrieve the list of MSRs supported by the hypervisor. 972 /// 973 fn get_msr_list(&self) -> hypervisor::Result<MsrList> { 974 self.kvm 975 .get_msr_index_list() 976 .map_err(|e| hypervisor::HypervisorError::GetMsrList(e.into())) 977 } 978 } 979 980 /// Enum for KVM related error 981 #[derive(Debug, Error)] 982 pub enum KvmError { 983 #[error("Capability missing: {0:?}")] 984 CapabilityMissing(Cap), 985 } 986 987 pub type KvmResult<T> = result::Result<T, KvmError>; 988 989 impl KvmHypervisor { 990 /// Create a hypervisor based on Kvm 991 #[allow(clippy::new_ret_no_self)] 992 pub fn new() -> hypervisor::Result<Arc<dyn hypervisor::Hypervisor>> { 993 let kvm_obj = Kvm::new().map_err(|e| hypervisor::HypervisorError::VmCreate(e.into()))?; 994 let api_version = kvm_obj.get_api_version(); 995 996 if api_version != kvm_bindings::KVM_API_VERSION as i32 { 997 return Err(hypervisor::HypervisorError::IncompatibleApiVersion); 998 } 999 1000 Ok(Arc::new(KvmHypervisor { kvm: kvm_obj })) 1001 } 1002 1003 /// Check if the hypervisor is available 1004 pub fn is_available() -> hypervisor::Result<bool> { 1005 match std::fs::metadata("/dev/kvm") { 1006 Ok(_) => Ok(true), 1007 Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(false), 1008 Err(err) => Err(hypervisor::HypervisorError::HypervisorAvailableCheck( 1009 err.into(), 1010 )), 1011 } 1012 } 1013 } 1014 1015 /// Implementation of Hypervisor trait for KVM 1016 /// 1017 /// # Examples 1018 /// 1019 /// ``` 1020 /// # use hypervisor::kvm::KvmHypervisor; 1021 /// # use std::sync::Arc; 1022 /// let kvm = KvmHypervisor::new().unwrap(); 1023 /// let hypervisor = Arc::new(kvm); 1024 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed"); 1025 /// ``` 1026 impl hypervisor::Hypervisor for KvmHypervisor { 1027 /// 1028 /// Returns the type of the hypervisor 1029 /// 1030 fn hypervisor_type(&self) -> HypervisorType { 1031 HypervisorType::Kvm 1032 } 1033 1034 /// Create a KVM vm object of a specific VM type and return the object as Vm trait object 1035 /// 1036 /// # Examples 1037 /// 1038 /// ``` 1039 /// # use hypervisor::kvm::KvmHypervisor; 1040 /// use hypervisor::kvm::KvmVm; 1041 /// let hypervisor = KvmHypervisor::new().unwrap(); 1042 /// let vm = hypervisor.create_vm_with_type(0).unwrap(); 1043 /// ``` 1044 fn create_vm_with_type(&self, vm_type: u64) -> hypervisor::Result<Arc<dyn vm::Vm>> { 1045 let fd: VmFd; 1046 loop { 1047 match self.kvm.create_vm_with_type(vm_type) { 1048 Ok(res) => fd = res, 1049 Err(e) => { 1050 if e.errno() == libc::EINTR { 1051 // If the error returned is EINTR, which means the 1052 // ioctl has been interrupted, we have to retry as 1053 // this can't be considered as a regular error. 1054 continue; 1055 } else { 1056 return Err(hypervisor::HypervisorError::VmCreate(e.into())); 1057 } 1058 } 1059 } 1060 break; 1061 } 1062 1063 let vm_fd = Arc::new(fd); 1064 1065 #[cfg(target_arch = "x86_64")] 1066 { 1067 let msr_list = self.get_msr_list()?; 1068 let num_msrs = msr_list.as_fam_struct_ref().nmsrs as usize; 1069 let mut msrs: Vec<MsrEntry> = vec![ 1070 MsrEntry { 1071 ..Default::default() 1072 }; 1073 num_msrs 1074 ]; 1075 let indices = msr_list.as_slice(); 1076 for (pos, index) in indices.iter().enumerate() { 1077 msrs[pos].index = *index; 1078 } 1079 1080 Ok(Arc::new(KvmVm { 1081 fd: vm_fd, 1082 msrs, 1083 dirty_log_slots: Arc::new(RwLock::new(HashMap::new())), 1084 })) 1085 } 1086 1087 #[cfg(target_arch = "aarch64")] 1088 { 1089 Ok(Arc::new(KvmVm { 1090 fd: vm_fd, 1091 dirty_log_slots: Arc::new(RwLock::new(HashMap::new())), 1092 })) 1093 } 1094 } 1095 1096 /// Create a KVM vm object and return the object as Vm trait object 1097 /// 1098 /// # Examples 1099 /// 1100 /// ``` 1101 /// # use hypervisor::kvm::KvmHypervisor; 1102 /// use hypervisor::kvm::KvmVm; 1103 /// let hypervisor = KvmHypervisor::new().unwrap(); 1104 /// let vm = hypervisor.create_vm().unwrap(); 1105 /// ``` 1106 fn create_vm(&self) -> hypervisor::Result<Arc<dyn vm::Vm>> { 1107 #[allow(unused_mut)] 1108 let mut vm_type: u64 = 0; // Create with default platform type 1109 1110 // When KVM supports Cap::ArmVmIPASize, it is better to get the IPA 1111 // size from the host and use that when creating the VM, which may 1112 // avoid unnecessary VM creation failures. 1113 #[cfg(target_arch = "aarch64")] 1114 if self.kvm.check_extension(Cap::ArmVmIPASize) { 1115 vm_type = self.kvm.get_host_ipa_limit().try_into().unwrap(); 1116 } 1117 1118 self.create_vm_with_type(vm_type) 1119 } 1120 1121 fn check_required_extensions(&self) -> hypervisor::Result<()> { 1122 check_required_kvm_extensions(&self.kvm) 1123 .map_err(|e| hypervisor::HypervisorError::CheckExtensions(e.into())) 1124 } 1125 1126 #[cfg(target_arch = "x86_64")] 1127 /// 1128 /// X86 specific call to get the system supported CPUID values. 1129 /// 1130 fn get_supported_cpuid(&self) -> hypervisor::Result<Vec<CpuIdEntry>> { 1131 let kvm_cpuid = self 1132 .kvm 1133 .get_supported_cpuid(kvm_bindings::KVM_MAX_CPUID_ENTRIES) 1134 .map_err(|e| hypervisor::HypervisorError::GetCpuId(e.into()))?; 1135 1136 let v = kvm_cpuid.as_slice().iter().map(|e| (*e).into()).collect(); 1137 1138 Ok(v) 1139 } 1140 1141 #[cfg(target_arch = "aarch64")] 1142 /// 1143 /// Retrieve AArch64 host maximum IPA size supported by KVM. 1144 /// 1145 fn get_host_ipa_limit(&self) -> i32 { 1146 self.kvm.get_host_ipa_limit() 1147 } 1148 1149 /// 1150 /// Retrieve TDX capabilities 1151 /// 1152 #[cfg(feature = "tdx")] 1153 fn tdx_capabilities(&self) -> hypervisor::Result<TdxCapabilities> { 1154 let data = TdxCapabilities { 1155 nr_cpuid_configs: TDX_MAX_NR_CPUID_CONFIGS as u32, 1156 ..Default::default() 1157 }; 1158 1159 tdx_command( 1160 &self.kvm.as_raw_fd(), 1161 TdxCommand::Capabilities, 1162 0, 1163 &data as *const _ as u64, 1164 ) 1165 .map_err(|e| hypervisor::HypervisorError::TdxCapabilities(e.into()))?; 1166 1167 Ok(data) 1168 } 1169 1170 /// 1171 /// Get the number of supported hardware breakpoints 1172 /// 1173 fn get_guest_debug_hw_bps(&self) -> usize { 1174 #[cfg(target_arch = "x86_64")] 1175 { 1176 4 1177 } 1178 #[cfg(target_arch = "aarch64")] 1179 { 1180 self.kvm.get_guest_debug_hw_bps() as usize 1181 } 1182 } 1183 1184 /// Get maximum number of vCPUs 1185 fn get_max_vcpus(&self) -> u32 { 1186 self.kvm.get_max_vcpus().min(u32::MAX as usize) as u32 1187 } 1188 } 1189 1190 /// Vcpu struct for KVM 1191 pub struct KvmVcpu { 1192 fd: Arc<Mutex<VcpuFd>>, 1193 #[cfg(target_arch = "x86_64")] 1194 msrs: Vec<MsrEntry>, 1195 vm_ops: Option<Arc<dyn vm::VmOps>>, 1196 #[cfg(target_arch = "x86_64")] 1197 hyperv_synic: AtomicBool, 1198 } 1199 1200 /// Implementation of Vcpu trait for KVM 1201 /// 1202 /// # Examples 1203 /// 1204 /// ``` 1205 /// # use hypervisor::kvm::KvmHypervisor; 1206 /// # use std::sync::Arc; 1207 /// let kvm = KvmHypervisor::new().unwrap(); 1208 /// let hypervisor = Arc::new(kvm); 1209 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed"); 1210 /// let vcpu = vm.create_vcpu(0, None).unwrap(); 1211 /// ``` 1212 impl cpu::Vcpu for KvmVcpu { 1213 /// 1214 /// Returns StandardRegisters with default value set 1215 /// 1216 #[cfg(target_arch = "x86_64")] 1217 fn create_standard_regs(&self) -> StandardRegisters { 1218 kvm_bindings::kvm_regs::default().into() 1219 } 1220 #[cfg(target_arch = "x86_64")] 1221 /// 1222 /// Returns the vCPU general purpose registers. 1223 /// 1224 fn get_regs(&self) -> cpu::Result<StandardRegisters> { 1225 Ok(self 1226 .fd 1227 .lock() 1228 .unwrap() 1229 .get_regs() 1230 .map_err(|e| cpu::HypervisorCpuError::GetStandardRegs(e.into()))? 1231 .into()) 1232 } 1233 1234 /// 1235 /// Returns the vCPU general purpose registers. 1236 /// The `KVM_GET_REGS` ioctl is not available on AArch64, `KVM_GET_ONE_REG` 1237 /// is used to get registers one by one. 1238 /// 1239 #[cfg(target_arch = "aarch64")] 1240 fn get_regs(&self) -> cpu::Result<StandardRegisters> { 1241 let mut state = kvm_regs::default(); 1242 let mut off = offset_of!(user_pt_regs, regs); 1243 // There are 31 user_pt_regs: 1244 // https://elixir.free-electrons.com/linux/v4.14.174/source/arch/arm64/include/uapi/asm/ptrace.h#L72 1245 // These actually are the general-purpose registers of the Armv8-a 1246 // architecture (i.e x0-x30 if used as a 64bit register or w0-30 when used as a 32bit register). 1247 for i in 0..31 { 1248 let mut bytes = [0_u8; 8]; 1249 self.fd 1250 .lock() 1251 .unwrap() 1252 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), &mut bytes) 1253 .map_err(|e| cpu::HypervisorCpuError::GetAarchCoreRegister(e.into()))?; 1254 state.regs.regs[i] = u64::from_le_bytes(bytes); 1255 off += std::mem::size_of::<u64>(); 1256 } 1257 1258 // We are now entering the "Other register" section of the ARMv8-a architecture. 1259 // First one, stack pointer. 1260 let off = offset_of!(user_pt_regs, sp); 1261 let mut bytes = [0_u8; 8]; 1262 self.fd 1263 .lock() 1264 .unwrap() 1265 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), &mut bytes) 1266 .map_err(|e| cpu::HypervisorCpuError::GetAarchCoreRegister(e.into()))?; 1267 state.regs.sp = u64::from_le_bytes(bytes); 1268 1269 // Second one, the program counter. 1270 let off = offset_of!(user_pt_regs, pc); 1271 let mut bytes = [0_u8; 8]; 1272 self.fd 1273 .lock() 1274 .unwrap() 1275 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), &mut bytes) 1276 .map_err(|e| cpu::HypervisorCpuError::GetAarchCoreRegister(e.into()))?; 1277 state.regs.pc = u64::from_le_bytes(bytes); 1278 1279 // Next is the processor state. 1280 let off = offset_of!(user_pt_regs, pstate); 1281 let mut bytes = [0_u8; 8]; 1282 self.fd 1283 .lock() 1284 .unwrap() 1285 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), &mut bytes) 1286 .map_err(|e| cpu::HypervisorCpuError::GetAarchCoreRegister(e.into()))?; 1287 state.regs.pstate = u64::from_le_bytes(bytes); 1288 1289 // The stack pointer associated with EL1 1290 let off = offset_of!(kvm_regs, sp_el1); 1291 let mut bytes = [0_u8; 8]; 1292 self.fd 1293 .lock() 1294 .unwrap() 1295 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), &mut bytes) 1296 .map_err(|e| cpu::HypervisorCpuError::GetAarchCoreRegister(e.into()))?; 1297 state.sp_el1 = u64::from_le_bytes(bytes); 1298 1299 // Exception Link Register for EL1, when taking an exception to EL1, this register 1300 // holds the address to which to return afterwards. 1301 let off = offset_of!(kvm_regs, elr_el1); 1302 let mut bytes = [0_u8; 8]; 1303 self.fd 1304 .lock() 1305 .unwrap() 1306 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), &mut bytes) 1307 .map_err(|e| cpu::HypervisorCpuError::GetAarchCoreRegister(e.into()))?; 1308 state.elr_el1 = u64::from_le_bytes(bytes); 1309 1310 // Saved Program Status Registers, there are 5 of them used in the kernel. 1311 let mut off = offset_of!(kvm_regs, spsr); 1312 for i in 0..KVM_NR_SPSR as usize { 1313 let mut bytes = [0_u8; 8]; 1314 self.fd 1315 .lock() 1316 .unwrap() 1317 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), &mut bytes) 1318 .map_err(|e| cpu::HypervisorCpuError::GetAarchCoreRegister(e.into()))?; 1319 state.spsr[i] = u64::from_le_bytes(bytes); 1320 off += std::mem::size_of::<u64>(); 1321 } 1322 1323 // Now moving on to floating point registers which are stored in the user_fpsimd_state in the kernel: 1324 // https://elixir.free-electrons.com/linux/v4.9.62/source/arch/arm64/include/uapi/asm/kvm.h#L53 1325 let mut off = offset_of!(kvm_regs, fp_regs) + offset_of!(user_fpsimd_state, vregs); 1326 for i in 0..32 { 1327 let mut bytes = [0_u8; 16]; 1328 self.fd 1329 .lock() 1330 .unwrap() 1331 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U128, off), &mut bytes) 1332 .map_err(|e| cpu::HypervisorCpuError::GetAarchCoreRegister(e.into()))?; 1333 state.fp_regs.vregs[i] = u128::from_le_bytes(bytes); 1334 off += mem::size_of::<u128>(); 1335 } 1336 1337 // Floating-point Status Register 1338 let off = offset_of!(kvm_regs, fp_regs) + offset_of!(user_fpsimd_state, fpsr); 1339 let mut bytes = [0_u8; 4]; 1340 self.fd 1341 .lock() 1342 .unwrap() 1343 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U32, off), &mut bytes) 1344 .map_err(|e| cpu::HypervisorCpuError::GetAarchCoreRegister(e.into()))?; 1345 state.fp_regs.fpsr = u32::from_le_bytes(bytes); 1346 1347 // Floating-point Control Register 1348 let off = offset_of!(kvm_regs, fp_regs) + offset_of!(user_fpsimd_state, fpcr); 1349 let mut bytes = [0_u8; 4]; 1350 self.fd 1351 .lock() 1352 .unwrap() 1353 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U32, off), &mut bytes) 1354 .map_err(|e| cpu::HypervisorCpuError::GetAarchCoreRegister(e.into()))?; 1355 state.fp_regs.fpcr = u32::from_le_bytes(bytes); 1356 Ok(state.into()) 1357 } 1358 1359 #[cfg(target_arch = "x86_64")] 1360 /// 1361 /// Sets the vCPU general purpose registers using the `KVM_SET_REGS` ioctl. 1362 /// 1363 fn set_regs(&self, regs: &StandardRegisters) -> cpu::Result<()> { 1364 let regs = (*regs).into(); 1365 self.fd 1366 .lock() 1367 .unwrap() 1368 .set_regs(®s) 1369 .map_err(|e| cpu::HypervisorCpuError::SetStandardRegs(e.into())) 1370 } 1371 1372 /// 1373 /// Sets the vCPU general purpose registers. 1374 /// The `KVM_SET_REGS` ioctl is not available on AArch64, `KVM_SET_ONE_REG` 1375 /// is used to set registers one by one. 1376 /// 1377 #[cfg(target_arch = "aarch64")] 1378 fn set_regs(&self, state: &StandardRegisters) -> cpu::Result<()> { 1379 // The function follows the exact identical order from `state`. Look there 1380 // for some additional info on registers. 1381 let kvm_regs_state: kvm_regs = (*state).into(); 1382 let mut off = offset_of!(user_pt_regs, regs); 1383 for i in 0..31 { 1384 self.fd 1385 .lock() 1386 .unwrap() 1387 .set_one_reg( 1388 arm64_core_reg_id!(KVM_REG_SIZE_U64, off), 1389 &kvm_regs_state.regs.regs[i].to_le_bytes(), 1390 ) 1391 .map_err(|e| cpu::HypervisorCpuError::SetAarchCoreRegister(e.into()))?; 1392 off += std::mem::size_of::<u64>(); 1393 } 1394 1395 let off = offset_of!(user_pt_regs, sp); 1396 self.fd 1397 .lock() 1398 .unwrap() 1399 .set_one_reg( 1400 arm64_core_reg_id!(KVM_REG_SIZE_U64, off), 1401 &kvm_regs_state.regs.sp.to_le_bytes(), 1402 ) 1403 .map_err(|e| cpu::HypervisorCpuError::SetAarchCoreRegister(e.into()))?; 1404 1405 let off = offset_of!(user_pt_regs, pc); 1406 self.fd 1407 .lock() 1408 .unwrap() 1409 .set_one_reg( 1410 arm64_core_reg_id!(KVM_REG_SIZE_U64, off), 1411 &kvm_regs_state.regs.pc.to_le_bytes(), 1412 ) 1413 .map_err(|e| cpu::HypervisorCpuError::SetAarchCoreRegister(e.into()))?; 1414 1415 let off = offset_of!(user_pt_regs, pstate); 1416 self.fd 1417 .lock() 1418 .unwrap() 1419 .set_one_reg( 1420 arm64_core_reg_id!(KVM_REG_SIZE_U64, off), 1421 &kvm_regs_state.regs.pstate.to_le_bytes(), 1422 ) 1423 .map_err(|e| cpu::HypervisorCpuError::SetAarchCoreRegister(e.into()))?; 1424 1425 let off = offset_of!(kvm_regs, sp_el1); 1426 self.fd 1427 .lock() 1428 .unwrap() 1429 .set_one_reg( 1430 arm64_core_reg_id!(KVM_REG_SIZE_U64, off), 1431 &kvm_regs_state.sp_el1.to_le_bytes(), 1432 ) 1433 .map_err(|e| cpu::HypervisorCpuError::SetAarchCoreRegister(e.into()))?; 1434 1435 let off = offset_of!(kvm_regs, elr_el1); 1436 self.fd 1437 .lock() 1438 .unwrap() 1439 .set_one_reg( 1440 arm64_core_reg_id!(KVM_REG_SIZE_U64, off), 1441 &kvm_regs_state.elr_el1.to_le_bytes(), 1442 ) 1443 .map_err(|e| cpu::HypervisorCpuError::SetAarchCoreRegister(e.into()))?; 1444 1445 let mut off = offset_of!(kvm_regs, spsr); 1446 for i in 0..KVM_NR_SPSR as usize { 1447 self.fd 1448 .lock() 1449 .unwrap() 1450 .set_one_reg( 1451 arm64_core_reg_id!(KVM_REG_SIZE_U64, off), 1452 &kvm_regs_state.spsr[i].to_le_bytes(), 1453 ) 1454 .map_err(|e| cpu::HypervisorCpuError::SetAarchCoreRegister(e.into()))?; 1455 off += std::mem::size_of::<u64>(); 1456 } 1457 1458 let mut off = offset_of!(kvm_regs, fp_regs) + offset_of!(user_fpsimd_state, vregs); 1459 for i in 0..32 { 1460 self.fd 1461 .lock() 1462 .unwrap() 1463 .set_one_reg( 1464 arm64_core_reg_id!(KVM_REG_SIZE_U128, off), 1465 &kvm_regs_state.fp_regs.vregs[i].to_le_bytes(), 1466 ) 1467 .map_err(|e| cpu::HypervisorCpuError::SetAarchCoreRegister(e.into()))?; 1468 off += mem::size_of::<u128>(); 1469 } 1470 1471 let off = offset_of!(kvm_regs, fp_regs) + offset_of!(user_fpsimd_state, fpsr); 1472 self.fd 1473 .lock() 1474 .unwrap() 1475 .set_one_reg( 1476 arm64_core_reg_id!(KVM_REG_SIZE_U32, off), 1477 &kvm_regs_state.fp_regs.fpsr.to_le_bytes(), 1478 ) 1479 .map_err(|e| cpu::HypervisorCpuError::SetAarchCoreRegister(e.into()))?; 1480 1481 let off = offset_of!(kvm_regs, fp_regs) + offset_of!(user_fpsimd_state, fpcr); 1482 self.fd 1483 .lock() 1484 .unwrap() 1485 .set_one_reg( 1486 arm64_core_reg_id!(KVM_REG_SIZE_U32, off), 1487 &kvm_regs_state.fp_regs.fpcr.to_le_bytes(), 1488 ) 1489 .map_err(|e| cpu::HypervisorCpuError::SetAarchCoreRegister(e.into()))?; 1490 Ok(()) 1491 } 1492 1493 #[cfg(target_arch = "x86_64")] 1494 /// 1495 /// Returns the vCPU special registers. 1496 /// 1497 fn get_sregs(&self) -> cpu::Result<SpecialRegisters> { 1498 Ok(self 1499 .fd 1500 .lock() 1501 .unwrap() 1502 .get_sregs() 1503 .map_err(|e| cpu::HypervisorCpuError::GetSpecialRegs(e.into()))? 1504 .into()) 1505 } 1506 1507 #[cfg(target_arch = "x86_64")] 1508 /// 1509 /// Sets the vCPU special registers using the `KVM_SET_SREGS` ioctl. 1510 /// 1511 fn set_sregs(&self, sregs: &SpecialRegisters) -> cpu::Result<()> { 1512 let sregs = (*sregs).into(); 1513 self.fd 1514 .lock() 1515 .unwrap() 1516 .set_sregs(&sregs) 1517 .map_err(|e| cpu::HypervisorCpuError::SetSpecialRegs(e.into())) 1518 } 1519 1520 #[cfg(target_arch = "x86_64")] 1521 /// 1522 /// Returns the floating point state (FPU) from the vCPU. 1523 /// 1524 fn get_fpu(&self) -> cpu::Result<FpuState> { 1525 Ok(self 1526 .fd 1527 .lock() 1528 .unwrap() 1529 .get_fpu() 1530 .map_err(|e| cpu::HypervisorCpuError::GetFloatingPointRegs(e.into()))? 1531 .into()) 1532 } 1533 1534 #[cfg(target_arch = "x86_64")] 1535 /// 1536 /// Set the floating point state (FPU) of a vCPU using the `KVM_SET_FPU` ioctl. 1537 /// 1538 fn set_fpu(&self, fpu: &FpuState) -> cpu::Result<()> { 1539 let fpu: kvm_bindings::kvm_fpu = (*fpu).clone().into(); 1540 self.fd 1541 .lock() 1542 .unwrap() 1543 .set_fpu(&fpu) 1544 .map_err(|e| cpu::HypervisorCpuError::SetFloatingPointRegs(e.into())) 1545 } 1546 1547 #[cfg(target_arch = "x86_64")] 1548 /// 1549 /// X86 specific call to setup the CPUID registers. 1550 /// 1551 fn set_cpuid2(&self, cpuid: &[CpuIdEntry]) -> cpu::Result<()> { 1552 let cpuid: Vec<kvm_bindings::kvm_cpuid_entry2> = 1553 cpuid.iter().map(|e| (*e).into()).collect(); 1554 let kvm_cpuid = <CpuId>::from_entries(&cpuid) 1555 .map_err(|_| cpu::HypervisorCpuError::SetCpuid(anyhow!("failed to create CpuId")))?; 1556 1557 self.fd 1558 .lock() 1559 .unwrap() 1560 .set_cpuid2(&kvm_cpuid) 1561 .map_err(|e| cpu::HypervisorCpuError::SetCpuid(e.into())) 1562 } 1563 1564 #[cfg(target_arch = "x86_64")] 1565 /// 1566 /// X86 specific call to enable HyperV SynIC 1567 /// 1568 fn enable_hyperv_synic(&self) -> cpu::Result<()> { 1569 // Update the information about Hyper-V SynIC being enabled and 1570 // emulated as it will influence later which MSRs should be saved. 1571 self.hyperv_synic.store(true, Ordering::Release); 1572 1573 let cap = kvm_enable_cap { 1574 cap: KVM_CAP_HYPERV_SYNIC, 1575 ..Default::default() 1576 }; 1577 self.fd 1578 .lock() 1579 .unwrap() 1580 .enable_cap(&cap) 1581 .map_err(|e| cpu::HypervisorCpuError::EnableHyperVSyncIc(e.into())) 1582 } 1583 1584 /// 1585 /// X86 specific call to retrieve the CPUID registers. 1586 /// 1587 #[cfg(target_arch = "x86_64")] 1588 fn get_cpuid2(&self, num_entries: usize) -> cpu::Result<Vec<CpuIdEntry>> { 1589 let kvm_cpuid = self 1590 .fd 1591 .lock() 1592 .unwrap() 1593 .get_cpuid2(num_entries) 1594 .map_err(|e| cpu::HypervisorCpuError::GetCpuid(e.into()))?; 1595 1596 let v = kvm_cpuid.as_slice().iter().map(|e| (*e).into()).collect(); 1597 1598 Ok(v) 1599 } 1600 1601 #[cfg(target_arch = "x86_64")] 1602 /// 1603 /// Returns the state of the LAPIC (Local Advanced Programmable Interrupt Controller). 1604 /// 1605 fn get_lapic(&self) -> cpu::Result<LapicState> { 1606 Ok(self 1607 .fd 1608 .lock() 1609 .unwrap() 1610 .get_lapic() 1611 .map_err(|e| cpu::HypervisorCpuError::GetlapicState(e.into()))? 1612 .into()) 1613 } 1614 1615 #[cfg(target_arch = "x86_64")] 1616 /// 1617 /// Sets the state of the LAPIC (Local Advanced Programmable Interrupt Controller). 1618 /// 1619 fn set_lapic(&self, klapic: &LapicState) -> cpu::Result<()> { 1620 let klapic: kvm_bindings::kvm_lapic_state = (*klapic).clone().into(); 1621 self.fd 1622 .lock() 1623 .unwrap() 1624 .set_lapic(&klapic) 1625 .map_err(|e| cpu::HypervisorCpuError::SetLapicState(e.into())) 1626 } 1627 1628 #[cfg(target_arch = "x86_64")] 1629 /// 1630 /// Returns the model-specific registers (MSR) for this vCPU. 1631 /// 1632 fn get_msrs(&self, msrs: &mut Vec<MsrEntry>) -> cpu::Result<usize> { 1633 let kvm_msrs: Vec<kvm_msr_entry> = msrs.iter().map(|e| (*e).into()).collect(); 1634 let mut kvm_msrs = MsrEntries::from_entries(&kvm_msrs).unwrap(); 1635 let succ = self 1636 .fd 1637 .lock() 1638 .unwrap() 1639 .get_msrs(&mut kvm_msrs) 1640 .map_err(|e| cpu::HypervisorCpuError::GetMsrEntries(e.into()))?; 1641 1642 msrs[..succ].copy_from_slice( 1643 &kvm_msrs.as_slice()[..succ] 1644 .iter() 1645 .map(|e| (*e).into()) 1646 .collect::<Vec<MsrEntry>>(), 1647 ); 1648 1649 Ok(succ) 1650 } 1651 1652 #[cfg(target_arch = "x86_64")] 1653 /// 1654 /// Setup the model-specific registers (MSR) for this vCPU. 1655 /// Returns the number of MSR entries actually written. 1656 /// 1657 fn set_msrs(&self, msrs: &[MsrEntry]) -> cpu::Result<usize> { 1658 let kvm_msrs: Vec<kvm_msr_entry> = msrs.iter().map(|e| (*e).into()).collect(); 1659 let kvm_msrs = MsrEntries::from_entries(&kvm_msrs).unwrap(); 1660 self.fd 1661 .lock() 1662 .unwrap() 1663 .set_msrs(&kvm_msrs) 1664 .map_err(|e| cpu::HypervisorCpuError::SetMsrEntries(e.into())) 1665 } 1666 1667 /// 1668 /// Returns the vcpu's current "multiprocessing state". 1669 /// 1670 fn get_mp_state(&self) -> cpu::Result<MpState> { 1671 Ok(self 1672 .fd 1673 .lock() 1674 .unwrap() 1675 .get_mp_state() 1676 .map_err(|e| cpu::HypervisorCpuError::GetMpState(e.into()))? 1677 .into()) 1678 } 1679 1680 /// 1681 /// Sets the vcpu's current "multiprocessing state". 1682 /// 1683 fn set_mp_state(&self, mp_state: MpState) -> cpu::Result<()> { 1684 self.fd 1685 .lock() 1686 .unwrap() 1687 .set_mp_state(mp_state.into()) 1688 .map_err(|e| cpu::HypervisorCpuError::SetMpState(e.into())) 1689 } 1690 1691 #[cfg(target_arch = "x86_64")] 1692 /// 1693 /// Translates guest virtual address to guest physical address using the `KVM_TRANSLATE` ioctl. 1694 /// 1695 fn translate_gva(&self, gva: u64, _flags: u64) -> cpu::Result<(u64, u32)> { 1696 let tr = self 1697 .fd 1698 .lock() 1699 .unwrap() 1700 .translate_gva(gva) 1701 .map_err(|e| cpu::HypervisorCpuError::TranslateVirtualAddress(e.into()))?; 1702 // tr.valid is set if the GVA is mapped to valid GPA. 1703 match tr.valid { 1704 0 => Err(cpu::HypervisorCpuError::TranslateVirtualAddress(anyhow!( 1705 "Invalid GVA: {:#x}", 1706 gva 1707 ))), 1708 _ => Ok((tr.physical_address, 0)), 1709 } 1710 } 1711 1712 /// 1713 /// Triggers the running of the current virtual CPU returning an exit reason. 1714 /// 1715 fn run(&self) -> std::result::Result<cpu::VmExit, cpu::HypervisorCpuError> { 1716 match self.fd.lock().unwrap().run() { 1717 Ok(run) => match run { 1718 #[cfg(target_arch = "x86_64")] 1719 VcpuExit::IoIn(addr, data) => { 1720 if let Some(vm_ops) = &self.vm_ops { 1721 return vm_ops 1722 .pio_read(addr.into(), data) 1723 .map(|_| cpu::VmExit::Ignore) 1724 .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into())); 1725 } 1726 1727 Ok(cpu::VmExit::Ignore) 1728 } 1729 #[cfg(target_arch = "x86_64")] 1730 VcpuExit::IoOut(addr, data) => { 1731 if let Some(vm_ops) = &self.vm_ops { 1732 return vm_ops 1733 .pio_write(addr.into(), data) 1734 .map(|_| cpu::VmExit::Ignore) 1735 .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into())); 1736 } 1737 1738 Ok(cpu::VmExit::Ignore) 1739 } 1740 #[cfg(target_arch = "x86_64")] 1741 VcpuExit::IoapicEoi(vector) => Ok(cpu::VmExit::IoapicEoi(vector)), 1742 #[cfg(target_arch = "x86_64")] 1743 VcpuExit::Shutdown | VcpuExit::Hlt => Ok(cpu::VmExit::Reset), 1744 1745 #[cfg(target_arch = "aarch64")] 1746 VcpuExit::SystemEvent(event_type, flags) => { 1747 use kvm_bindings::{KVM_SYSTEM_EVENT_RESET, KVM_SYSTEM_EVENT_SHUTDOWN}; 1748 // On Aarch64, when the VM is shutdown, run() returns 1749 // VcpuExit::SystemEvent with reason KVM_SYSTEM_EVENT_SHUTDOWN 1750 if event_type == KVM_SYSTEM_EVENT_RESET { 1751 Ok(cpu::VmExit::Reset) 1752 } else if event_type == KVM_SYSTEM_EVENT_SHUTDOWN { 1753 Ok(cpu::VmExit::Shutdown) 1754 } else { 1755 Err(cpu::HypervisorCpuError::RunVcpu(anyhow!( 1756 "Unexpected system event with type 0x{:x}, flags 0x{:x?}", 1757 event_type, 1758 flags 1759 ))) 1760 } 1761 } 1762 1763 VcpuExit::MmioRead(addr, data) => { 1764 if let Some(vm_ops) = &self.vm_ops { 1765 return vm_ops 1766 .mmio_read(addr, data) 1767 .map(|_| cpu::VmExit::Ignore) 1768 .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into())); 1769 } 1770 1771 Ok(cpu::VmExit::Ignore) 1772 } 1773 VcpuExit::MmioWrite(addr, data) => { 1774 if let Some(vm_ops) = &self.vm_ops { 1775 return vm_ops 1776 .mmio_write(addr, data) 1777 .map(|_| cpu::VmExit::Ignore) 1778 .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into())); 1779 } 1780 1781 Ok(cpu::VmExit::Ignore) 1782 } 1783 VcpuExit::Hyperv => Ok(cpu::VmExit::Hyperv), 1784 #[cfg(feature = "tdx")] 1785 VcpuExit::Unsupported(KVM_EXIT_TDX) => Ok(cpu::VmExit::Tdx), 1786 VcpuExit::Debug(_) => Ok(cpu::VmExit::Debug), 1787 1788 r => Err(cpu::HypervisorCpuError::RunVcpu(anyhow!( 1789 "Unexpected exit reason on vcpu run: {:?}", 1790 r 1791 ))), 1792 }, 1793 1794 Err(ref e) => match e.errno() { 1795 libc::EAGAIN | libc::EINTR => Ok(cpu::VmExit::Ignore), 1796 _ => Err(cpu::HypervisorCpuError::RunVcpu(anyhow!( 1797 "VCPU error {:?}", 1798 e 1799 ))), 1800 }, 1801 } 1802 } 1803 1804 #[cfg(target_arch = "x86_64")] 1805 /// 1806 /// Let the guest know that it has been paused, which prevents from 1807 /// potential soft lockups when being resumed. 1808 /// 1809 fn notify_guest_clock_paused(&self) -> cpu::Result<()> { 1810 if let Err(e) = self.fd.lock().unwrap().kvmclock_ctrl() { 1811 // Linux kernel returns -EINVAL if the PV clock isn't yet initialised 1812 // which could be because we're still in firmware or the guest doesn't 1813 // use KVM clock. 1814 if e.errno() != libc::EINVAL { 1815 return Err(cpu::HypervisorCpuError::NotifyGuestClockPaused(e.into())); 1816 } 1817 } 1818 1819 Ok(()) 1820 } 1821 1822 /// 1823 /// Sets debug registers to set hardware breakpoints and/or enable single step. 1824 /// 1825 fn set_guest_debug( 1826 &self, 1827 addrs: &[vm_memory::GuestAddress], 1828 singlestep: bool, 1829 ) -> cpu::Result<()> { 1830 let mut dbg = kvm_guest_debug { 1831 #[cfg(target_arch = "x86_64")] 1832 control: KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP, 1833 #[cfg(target_arch = "aarch64")] 1834 control: KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW, 1835 ..Default::default() 1836 }; 1837 if singlestep { 1838 dbg.control |= KVM_GUESTDBG_SINGLESTEP; 1839 } 1840 1841 // Set the debug registers. 1842 // Here we assume that the number of addresses do not exceed what 1843 // `Hypervisor::get_guest_debug_hw_bps()` specifies. 1844 #[cfg(target_arch = "x86_64")] 1845 { 1846 // Set bits 9 and 10. 1847 // bit 9: GE (global exact breakpoint enable) flag. 1848 // bit 10: always 1. 1849 dbg.arch.debugreg[7] = 0x0600; 1850 1851 for (i, addr) in addrs.iter().enumerate() { 1852 dbg.arch.debugreg[i] = addr.0; 1853 // Set global breakpoint enable flag 1854 dbg.arch.debugreg[7] |= 2 << (i * 2); 1855 } 1856 } 1857 #[cfg(target_arch = "aarch64")] 1858 { 1859 for (i, addr) in addrs.iter().enumerate() { 1860 // DBGBCR_EL1 (Debug Breakpoint Control Registers, D13.3.2): 1861 // bit 0: 1 (Enabled) 1862 // bit 1~2: 0b11 (PMC = EL1/EL0) 1863 // bit 5~8: 0b1111 (BAS = AArch64) 1864 // others: 0 1865 dbg.arch.dbg_bcr[i] = 0b1u64 | 0b110u64 | 0b1_1110_0000u64; 1866 // DBGBVR_EL1 (Debug Breakpoint Value Registers, D13.3.3): 1867 // bit 2~52: VA[2:52] 1868 dbg.arch.dbg_bvr[i] = (!0u64 >> 11) & addr.0; 1869 } 1870 } 1871 self.fd 1872 .lock() 1873 .unwrap() 1874 .set_guest_debug(&dbg) 1875 .map_err(|e| cpu::HypervisorCpuError::SetDebugRegs(e.into())) 1876 } 1877 1878 #[cfg(target_arch = "aarch64")] 1879 fn vcpu_init(&self, kvi: &VcpuInit) -> cpu::Result<()> { 1880 self.fd 1881 .lock() 1882 .unwrap() 1883 .vcpu_init(kvi) 1884 .map_err(|e| cpu::HypervisorCpuError::VcpuInit(e.into())) 1885 } 1886 1887 #[cfg(target_arch = "aarch64")] 1888 fn vcpu_finalize(&self, feature: i32) -> cpu::Result<()> { 1889 self.fd 1890 .lock() 1891 .unwrap() 1892 .vcpu_finalize(&feature) 1893 .map_err(|e| cpu::HypervisorCpuError::VcpuFinalize(e.into())) 1894 } 1895 1896 /// 1897 /// Gets a list of the guest registers that are supported for the 1898 /// KVM_GET_ONE_REG/KVM_SET_ONE_REG calls. 1899 /// 1900 #[cfg(target_arch = "aarch64")] 1901 fn get_reg_list(&self, reg_list: &mut RegList) -> cpu::Result<()> { 1902 self.fd 1903 .lock() 1904 .unwrap() 1905 .get_reg_list(reg_list) 1906 .map_err(|e| cpu::HypervisorCpuError::GetRegList(e.into())) 1907 } 1908 1909 /// 1910 /// Gets the value of a system register 1911 /// 1912 #[cfg(target_arch = "aarch64")] 1913 fn get_sys_reg(&self, sys_reg: u32) -> cpu::Result<u64> { 1914 // 1915 // Arm Architecture Reference Manual defines the encoding of 1916 // AArch64 system registers, see 1917 // https://developer.arm.com/documentation/ddi0487 (chapter D12). 1918 // While KVM defines another ID for each AArch64 system register, 1919 // which is used in calling `KVM_G/SET_ONE_REG` to access a system 1920 // register of a guest. 1921 // A mapping exists between the Arm standard encoding and the KVM ID. 1922 // This function takes the standard u32 ID as input parameter, converts 1923 // it to the corresponding KVM ID, and call `KVM_GET_ONE_REG` API to 1924 // get the value of the system parameter. 1925 // 1926 let id: u64 = KVM_REG_ARM64 1927 | KVM_REG_SIZE_U64 1928 | KVM_REG_ARM64_SYSREG as u64 1929 | ((((sys_reg) >> 5) 1930 & (KVM_REG_ARM64_SYSREG_OP0_MASK 1931 | KVM_REG_ARM64_SYSREG_OP1_MASK 1932 | KVM_REG_ARM64_SYSREG_CRN_MASK 1933 | KVM_REG_ARM64_SYSREG_CRM_MASK 1934 | KVM_REG_ARM64_SYSREG_OP2_MASK)) as u64); 1935 let mut bytes = [0_u8; 8]; 1936 self.fd 1937 .lock() 1938 .unwrap() 1939 .get_one_reg(id, &mut bytes) 1940 .map_err(|e| cpu::HypervisorCpuError::GetSysRegister(e.into()))?; 1941 Ok(u64::from_le_bytes(bytes)) 1942 } 1943 1944 /// 1945 /// Configure core registers for a given CPU. 1946 /// 1947 #[cfg(target_arch = "aarch64")] 1948 fn setup_regs(&self, cpu_id: u8, boot_ip: u64, fdt_start: u64) -> cpu::Result<()> { 1949 #[allow(non_upper_case_globals)] 1950 // PSR (Processor State Register) bits. 1951 // Taken from arch/arm64/include/uapi/asm/ptrace.h. 1952 const PSR_MODE_EL1h: u64 = 0x0000_0005; 1953 const PSR_F_BIT: u64 = 0x0000_0040; 1954 const PSR_I_BIT: u64 = 0x0000_0080; 1955 const PSR_A_BIT: u64 = 0x0000_0100; 1956 const PSR_D_BIT: u64 = 0x0000_0200; 1957 // Taken from arch/arm64/kvm/inject_fault.c. 1958 const PSTATE_FAULT_BITS_64: u64 = 1959 PSR_MODE_EL1h | PSR_A_BIT | PSR_F_BIT | PSR_I_BIT | PSR_D_BIT; 1960 1961 let kreg_off = offset_of!(kvm_regs, regs); 1962 1963 // Get the register index of the PSTATE (Processor State) register. 1964 let pstate = offset_of!(user_pt_regs, pstate) + kreg_off; 1965 self.fd 1966 .lock() 1967 .unwrap() 1968 .set_one_reg( 1969 arm64_core_reg_id!(KVM_REG_SIZE_U64, pstate), 1970 &PSTATE_FAULT_BITS_64.to_le_bytes(), 1971 ) 1972 .map_err(|e| cpu::HypervisorCpuError::SetAarchCoreRegister(e.into()))?; 1973 1974 // Other vCPUs are powered off initially awaiting PSCI wakeup. 1975 if cpu_id == 0 { 1976 // Setting the PC (Processor Counter) to the current program address (kernel address). 1977 let pc = offset_of!(user_pt_regs, pc) + kreg_off; 1978 self.fd 1979 .lock() 1980 .unwrap() 1981 .set_one_reg( 1982 arm64_core_reg_id!(KVM_REG_SIZE_U64, pc), 1983 &boot_ip.to_le_bytes(), 1984 ) 1985 .map_err(|e| cpu::HypervisorCpuError::SetAarchCoreRegister(e.into()))?; 1986 1987 // Last mandatory thing to set -> the address pointing to the FDT (also called DTB). 1988 // "The device tree blob (dtb) must be placed on an 8-byte boundary and must 1989 // not exceed 2 megabytes in size." -> https://www.kernel.org/doc/Documentation/arm64/booting.txt. 1990 // We are choosing to place it the end of DRAM. See `get_fdt_addr`. 1991 let regs0 = offset_of!(user_pt_regs, regs) + kreg_off; 1992 self.fd 1993 .lock() 1994 .unwrap() 1995 .set_one_reg( 1996 arm64_core_reg_id!(KVM_REG_SIZE_U64, regs0), 1997 &fdt_start.to_le_bytes(), 1998 ) 1999 .map_err(|e| cpu::HypervisorCpuError::SetAarchCoreRegister(e.into()))?; 2000 } 2001 Ok(()) 2002 } 2003 2004 #[cfg(target_arch = "x86_64")] 2005 /// 2006 /// Get the current CPU state 2007 /// 2008 /// Ordering requirements: 2009 /// 2010 /// KVM_GET_MP_STATE calls kvm_apic_accept_events(), which might modify 2011 /// vCPU/LAPIC state. As such, it must be done before most everything 2012 /// else, otherwise we cannot restore everything and expect it to work. 2013 /// 2014 /// KVM_GET_VCPU_EVENTS/KVM_SET_VCPU_EVENTS is unsafe if other vCPUs are 2015 /// still running. 2016 /// 2017 /// KVM_GET_LAPIC may change state of LAPIC before returning it. 2018 /// 2019 /// GET_VCPU_EVENTS should probably be last to save. The code looks as 2020 /// it might as well be affected by internal state modifications of the 2021 /// GET ioctls. 2022 /// 2023 /// SREGS saves/restores a pending interrupt, similar to what 2024 /// VCPU_EVENTS also does. 2025 /// 2026 /// GET_MSRS requires a prepopulated data structure to do something 2027 /// meaningful. For SET_MSRS it will then contain good data. 2028 /// 2029 /// # Example 2030 /// 2031 /// ```rust 2032 /// # use hypervisor::kvm::KvmHypervisor; 2033 /// # use std::sync::Arc; 2034 /// let kvm = KvmHypervisor::new().unwrap(); 2035 /// let hv = Arc::new(kvm); 2036 /// let vm = hv.create_vm().expect("new VM fd creation failed"); 2037 /// vm.enable_split_irq().unwrap(); 2038 /// let vcpu = vm.create_vcpu(0, None).unwrap(); 2039 /// let state = vcpu.state().unwrap(); 2040 /// ``` 2041 fn state(&self) -> cpu::Result<CpuState> { 2042 let cpuid = self.get_cpuid2(kvm_bindings::KVM_MAX_CPUID_ENTRIES)?; 2043 let mp_state = self.get_mp_state()?.into(); 2044 let regs = self.get_regs()?; 2045 let sregs = self.get_sregs()?; 2046 let xsave = self.get_xsave()?; 2047 let xcrs = self.get_xcrs()?; 2048 let lapic_state = self.get_lapic()?; 2049 let fpu = self.get_fpu()?; 2050 2051 // Try to get all MSRs based on the list previously retrieved from KVM. 2052 // If the number of MSRs obtained from GET_MSRS is different from the 2053 // expected amount, we fallback onto a slower method by getting MSRs 2054 // by chunks. This is the only way to make sure we try to get as many 2055 // MSRs as possible, even if some MSRs are not supported. 2056 let mut msr_entries = self.msrs.clone(); 2057 2058 // Save extra MSRs if the Hyper-V synthetic interrupt controller is 2059 // emulated. 2060 if self.hyperv_synic.load(Ordering::Acquire) { 2061 let hyperv_synic_msrs = vec![ 2062 0x40000020, 0x40000021, 0x40000080, 0x40000081, 0x40000082, 0x40000083, 0x40000084, 2063 0x40000090, 0x40000091, 0x40000092, 0x40000093, 0x40000094, 0x40000095, 0x40000096, 2064 0x40000097, 0x40000098, 0x40000099, 0x4000009a, 0x4000009b, 0x4000009c, 0x4000009d, 2065 0x4000009e, 0x4000009f, 0x400000b0, 0x400000b1, 0x400000b2, 0x400000b3, 0x400000b4, 2066 0x400000b5, 0x400000b6, 0x400000b7, 2067 ]; 2068 for index in hyperv_synic_msrs { 2069 let msr = kvm_msr_entry { 2070 index, 2071 ..Default::default() 2072 }; 2073 msr_entries.push(msr.into()); 2074 } 2075 } 2076 2077 let expected_num_msrs = msr_entries.len(); 2078 let num_msrs = self.get_msrs(&mut msr_entries)?; 2079 let msrs = if num_msrs != expected_num_msrs { 2080 let mut faulty_msr_index = num_msrs; 2081 let mut msr_entries_tmp = msr_entries[..faulty_msr_index].to_vec(); 2082 2083 loop { 2084 warn!( 2085 "Detected faulty MSR 0x{:x} while getting MSRs", 2086 msr_entries[faulty_msr_index].index 2087 ); 2088 2089 // Skip the first bad MSR 2090 let start_pos = faulty_msr_index + 1; 2091 2092 let mut sub_msr_entries = msr_entries[start_pos..].to_vec(); 2093 let num_msrs = self.get_msrs(&mut sub_msr_entries)?; 2094 2095 msr_entries_tmp.extend(&sub_msr_entries[..num_msrs]); 2096 2097 if num_msrs == sub_msr_entries.len() { 2098 break; 2099 } 2100 2101 faulty_msr_index = start_pos + num_msrs; 2102 } 2103 2104 msr_entries_tmp 2105 } else { 2106 msr_entries 2107 }; 2108 2109 let vcpu_events = self.get_vcpu_events()?; 2110 let tsc_khz = self.tsc_khz()?; 2111 2112 Ok(VcpuKvmState { 2113 cpuid, 2114 msrs, 2115 vcpu_events, 2116 regs: regs.into(), 2117 sregs: sregs.into(), 2118 fpu, 2119 lapic_state, 2120 xsave, 2121 xcrs, 2122 mp_state, 2123 tsc_khz, 2124 } 2125 .into()) 2126 } 2127 2128 /// 2129 /// Get the current AArch64 CPU state 2130 /// 2131 #[cfg(target_arch = "aarch64")] 2132 fn state(&self) -> cpu::Result<CpuState> { 2133 let mut state = VcpuKvmState { 2134 mp_state: self.get_mp_state()?.into(), 2135 ..Default::default() 2136 }; 2137 // Get core registers 2138 state.core_regs = self.get_regs()?.into(); 2139 2140 // Get systerm register 2141 // Call KVM_GET_REG_LIST to get all registers available to the guest. 2142 // For ArmV8 there are around 500 registers. 2143 let mut sys_regs: Vec<Register> = Vec::new(); 2144 let mut reg_list = RegList::new(500).unwrap(); 2145 self.fd 2146 .lock() 2147 .unwrap() 2148 .get_reg_list(&mut reg_list) 2149 .map_err(|e| cpu::HypervisorCpuError::GetRegList(e.into()))?; 2150 2151 // At this point reg_list should contain: core registers and system 2152 // registers. 2153 // The register list contains the number of registers and their ids. We 2154 // will be needing to call KVM_GET_ONE_REG on each id in order to save 2155 // all of them. We carve out from the list the core registers which are 2156 // represented in the kernel by kvm_regs structure and for which we can 2157 // calculate the id based on the offset in the structure. 2158 reg_list.retain(|regid| is_system_register(*regid)); 2159 2160 // Now, for the rest of the registers left in the previously fetched 2161 // register list, we are simply calling KVM_GET_ONE_REG. 2162 let indices = reg_list.as_slice(); 2163 for index in indices.iter() { 2164 let mut bytes = [0_u8; 8]; 2165 self.fd 2166 .lock() 2167 .unwrap() 2168 .get_one_reg(*index, &mut bytes) 2169 .map_err(|e| cpu::HypervisorCpuError::GetSysRegister(e.into()))?; 2170 sys_regs.push(kvm_bindings::kvm_one_reg { 2171 id: *index, 2172 addr: u64::from_le_bytes(bytes), 2173 }); 2174 } 2175 2176 state.sys_regs = sys_regs; 2177 2178 Ok(state.into()) 2179 } 2180 2181 #[cfg(target_arch = "x86_64")] 2182 /// 2183 /// Restore the previously saved CPU state 2184 /// 2185 /// Ordering requirements: 2186 /// 2187 /// KVM_GET_VCPU_EVENTS/KVM_SET_VCPU_EVENTS is unsafe if other vCPUs are 2188 /// still running. 2189 /// 2190 /// Some SET ioctls (like set_mp_state) depend on kvm_vcpu_is_bsp(), so 2191 /// if we ever change the BSP, we have to do that before restoring anything. 2192 /// The same seems to be true for CPUID stuff. 2193 /// 2194 /// SREGS saves/restores a pending interrupt, similar to what 2195 /// VCPU_EVENTS also does. 2196 /// 2197 /// SET_REGS clears pending exceptions unconditionally, thus, it must be 2198 /// done before SET_VCPU_EVENTS, which restores it. 2199 /// 2200 /// SET_LAPIC must come after SET_SREGS, because the latter restores 2201 /// the apic base msr. 2202 /// 2203 /// SET_LAPIC must come before SET_MSRS, because the TSC deadline MSR 2204 /// only restores successfully, when the LAPIC is correctly configured. 2205 /// 2206 /// Arguments: CpuState 2207 /// # Example 2208 /// 2209 /// ```rust 2210 /// # use hypervisor::kvm::KvmHypervisor; 2211 /// # use std::sync::Arc; 2212 /// let kvm = KvmHypervisor::new().unwrap(); 2213 /// let hv = Arc::new(kvm); 2214 /// let vm = hv.create_vm().expect("new VM fd creation failed"); 2215 /// vm.enable_split_irq().unwrap(); 2216 /// let vcpu = vm.create_vcpu(0, None).unwrap(); 2217 /// let state = vcpu.state().unwrap(); 2218 /// vcpu.set_state(&state).unwrap(); 2219 /// ``` 2220 fn set_state(&self, state: &CpuState) -> cpu::Result<()> { 2221 let state: VcpuKvmState = state.clone().into(); 2222 self.set_cpuid2(&state.cpuid)?; 2223 self.set_mp_state(state.mp_state.into())?; 2224 self.set_regs(&state.regs.into())?; 2225 self.set_sregs(&state.sregs.into())?; 2226 self.set_xsave(&state.xsave)?; 2227 self.set_xcrs(&state.xcrs)?; 2228 self.set_lapic(&state.lapic_state)?; 2229 self.set_fpu(&state.fpu)?; 2230 2231 if let Some(freq) = state.tsc_khz { 2232 self.set_tsc_khz(freq)?; 2233 } 2234 2235 // Try to set all MSRs previously stored. 2236 // If the number of MSRs set from SET_MSRS is different from the 2237 // expected amount, we fallback onto a slower method by setting MSRs 2238 // by chunks. This is the only way to make sure we try to set as many 2239 // MSRs as possible, even if some MSRs are not supported. 2240 let expected_num_msrs = state.msrs.len(); 2241 let num_msrs = self.set_msrs(&state.msrs)?; 2242 if num_msrs != expected_num_msrs { 2243 let mut faulty_msr_index = num_msrs; 2244 2245 loop { 2246 warn!( 2247 "Detected faulty MSR 0x{:x} while setting MSRs", 2248 state.msrs[faulty_msr_index].index 2249 ); 2250 2251 // Skip the first bad MSR 2252 let start_pos = faulty_msr_index + 1; 2253 2254 let sub_msr_entries = state.msrs[start_pos..].to_vec(); 2255 2256 let num_msrs = self.set_msrs(&sub_msr_entries)?; 2257 2258 if num_msrs == sub_msr_entries.len() { 2259 break; 2260 } 2261 2262 faulty_msr_index = start_pos + num_msrs; 2263 } 2264 } 2265 2266 self.set_vcpu_events(&state.vcpu_events)?; 2267 2268 Ok(()) 2269 } 2270 2271 /// 2272 /// Restore the previously saved AArch64 CPU state 2273 /// 2274 #[cfg(target_arch = "aarch64")] 2275 fn set_state(&self, state: &CpuState) -> cpu::Result<()> { 2276 let state: VcpuKvmState = state.clone().into(); 2277 // Set core registers 2278 self.set_regs(&state.core_regs.into())?; 2279 // Set system registers 2280 for reg in &state.sys_regs { 2281 self.fd 2282 .lock() 2283 .unwrap() 2284 .set_one_reg(reg.id, ®.addr.to_le_bytes()) 2285 .map_err(|e| cpu::HypervisorCpuError::SetSysRegister(e.into()))?; 2286 } 2287 2288 self.set_mp_state(state.mp_state.into())?; 2289 2290 Ok(()) 2291 } 2292 2293 /// 2294 /// Initialize TDX for this CPU 2295 /// 2296 #[cfg(feature = "tdx")] 2297 fn tdx_init(&self, hob_address: u64) -> cpu::Result<()> { 2298 tdx_command( 2299 &self.fd.lock().unwrap().as_raw_fd(), 2300 TdxCommand::InitVcpu, 2301 0, 2302 hob_address, 2303 ) 2304 .map_err(cpu::HypervisorCpuError::InitializeTdx) 2305 } 2306 2307 /// 2308 /// Set the "immediate_exit" state 2309 /// 2310 fn set_immediate_exit(&self, exit: bool) { 2311 self.fd.lock().unwrap().set_kvm_immediate_exit(exit.into()); 2312 } 2313 2314 /// 2315 /// Returns the details about TDX exit reason 2316 /// 2317 #[cfg(feature = "tdx")] 2318 fn get_tdx_exit_details(&mut self) -> cpu::Result<TdxExitDetails> { 2319 let mut fd = self.fd.as_ref().lock().unwrap(); 2320 let kvm_run = fd.get_kvm_run(); 2321 // SAFETY: accessing a union field in a valid structure 2322 let tdx_vmcall = unsafe { 2323 &mut (*((&mut kvm_run.__bindgen_anon_1) as *mut kvm_run__bindgen_ty_1 2324 as *mut KvmTdxExit)) 2325 .u 2326 .vmcall 2327 }; 2328 2329 tdx_vmcall.status_code = TDG_VP_VMCALL_INVALID_OPERAND; 2330 2331 if tdx_vmcall.type_ != 0 { 2332 return Err(cpu::HypervisorCpuError::UnknownTdxVmCall); 2333 } 2334 2335 match tdx_vmcall.subfunction { 2336 TDG_VP_VMCALL_GET_QUOTE => Ok(TdxExitDetails::GetQuote), 2337 TDG_VP_VMCALL_SETUP_EVENT_NOTIFY_INTERRUPT => { 2338 Ok(TdxExitDetails::SetupEventNotifyInterrupt) 2339 } 2340 _ => Err(cpu::HypervisorCpuError::UnknownTdxVmCall), 2341 } 2342 } 2343 2344 /// 2345 /// Set the status code for TDX exit 2346 /// 2347 #[cfg(feature = "tdx")] 2348 fn set_tdx_status(&mut self, status: TdxExitStatus) { 2349 let mut fd = self.fd.as_ref().lock().unwrap(); 2350 let kvm_run = fd.get_kvm_run(); 2351 // SAFETY: accessing a union field in a valid structure 2352 let tdx_vmcall = unsafe { 2353 &mut (*((&mut kvm_run.__bindgen_anon_1) as *mut kvm_run__bindgen_ty_1 2354 as *mut KvmTdxExit)) 2355 .u 2356 .vmcall 2357 }; 2358 2359 tdx_vmcall.status_code = match status { 2360 TdxExitStatus::Success => TDG_VP_VMCALL_SUCCESS, 2361 TdxExitStatus::InvalidOperand => TDG_VP_VMCALL_INVALID_OPERAND, 2362 }; 2363 } 2364 2365 #[cfg(target_arch = "x86_64")] 2366 /// 2367 /// Return the list of initial MSR entries for a VCPU 2368 /// 2369 fn boot_msr_entries(&self) -> Vec<MsrEntry> { 2370 use crate::arch::x86::{msr_index, MTRR_ENABLE, MTRR_MEM_TYPE_WB}; 2371 2372 [ 2373 msr!(msr_index::MSR_IA32_SYSENTER_CS), 2374 msr!(msr_index::MSR_IA32_SYSENTER_ESP), 2375 msr!(msr_index::MSR_IA32_SYSENTER_EIP), 2376 msr!(msr_index::MSR_STAR), 2377 msr!(msr_index::MSR_CSTAR), 2378 msr!(msr_index::MSR_LSTAR), 2379 msr!(msr_index::MSR_KERNEL_GS_BASE), 2380 msr!(msr_index::MSR_SYSCALL_MASK), 2381 msr!(msr_index::MSR_IA32_TSC), 2382 msr_data!( 2383 msr_index::MSR_IA32_MISC_ENABLE, 2384 msr_index::MSR_IA32_MISC_ENABLE_FAST_STRING as u64 2385 ), 2386 msr_data!(msr_index::MSR_MTRRdefType, MTRR_ENABLE | MTRR_MEM_TYPE_WB), 2387 ] 2388 .to_vec() 2389 } 2390 2391 #[cfg(target_arch = "aarch64")] 2392 fn has_pmu_support(&self) -> bool { 2393 let cpu_attr = kvm_bindings::kvm_device_attr { 2394 group: kvm_bindings::KVM_ARM_VCPU_PMU_V3_CTRL, 2395 attr: u64::from(kvm_bindings::KVM_ARM_VCPU_PMU_V3_INIT), 2396 addr: 0x0, 2397 flags: 0, 2398 }; 2399 self.fd.lock().unwrap().has_device_attr(&cpu_attr).is_ok() 2400 } 2401 2402 #[cfg(target_arch = "aarch64")] 2403 fn init_pmu(&self, irq: u32) -> cpu::Result<()> { 2404 let cpu_attr = kvm_bindings::kvm_device_attr { 2405 group: kvm_bindings::KVM_ARM_VCPU_PMU_V3_CTRL, 2406 attr: u64::from(kvm_bindings::KVM_ARM_VCPU_PMU_V3_INIT), 2407 addr: 0x0, 2408 flags: 0, 2409 }; 2410 let cpu_attr_irq = kvm_bindings::kvm_device_attr { 2411 group: kvm_bindings::KVM_ARM_VCPU_PMU_V3_CTRL, 2412 attr: u64::from(kvm_bindings::KVM_ARM_VCPU_PMU_V3_IRQ), 2413 addr: &irq as *const u32 as u64, 2414 flags: 0, 2415 }; 2416 self.fd 2417 .lock() 2418 .unwrap() 2419 .set_device_attr(&cpu_attr_irq) 2420 .map_err(|_| cpu::HypervisorCpuError::InitializePmu)?; 2421 self.fd 2422 .lock() 2423 .unwrap() 2424 .set_device_attr(&cpu_attr) 2425 .map_err(|_| cpu::HypervisorCpuError::InitializePmu) 2426 } 2427 2428 #[cfg(target_arch = "x86_64")] 2429 /// 2430 /// Get the frequency of the TSC if available 2431 /// 2432 fn tsc_khz(&self) -> cpu::Result<Option<u32>> { 2433 match self.fd.lock().unwrap().get_tsc_khz() { 2434 Err(e) => { 2435 if e.errno() == libc::EIO { 2436 Ok(None) 2437 } else { 2438 Err(cpu::HypervisorCpuError::GetTscKhz(e.into())) 2439 } 2440 } 2441 Ok(v) => Ok(Some(v)), 2442 } 2443 } 2444 2445 #[cfg(target_arch = "x86_64")] 2446 /// 2447 /// Set the frequency of the TSC if available 2448 /// 2449 fn set_tsc_khz(&self, freq: u32) -> cpu::Result<()> { 2450 match self.fd.lock().unwrap().set_tsc_khz(freq) { 2451 Err(e) => { 2452 if e.errno() == libc::EIO { 2453 Ok(()) 2454 } else { 2455 Err(cpu::HypervisorCpuError::SetTscKhz(e.into())) 2456 } 2457 } 2458 Ok(_) => Ok(()), 2459 } 2460 } 2461 2462 #[cfg(target_arch = "x86_64")] 2463 /// 2464 /// Trigger NMI interrupt 2465 /// 2466 fn nmi(&self) -> cpu::Result<()> { 2467 match self.fd.lock().unwrap().nmi() { 2468 Err(e) => { 2469 if e.errno() == libc::EIO { 2470 Ok(()) 2471 } else { 2472 Err(cpu::HypervisorCpuError::Nmi(e.into())) 2473 } 2474 } 2475 Ok(_) => Ok(()), 2476 } 2477 } 2478 } 2479 2480 impl KvmVcpu { 2481 #[cfg(target_arch = "x86_64")] 2482 /// 2483 /// X86 specific call that returns the vcpu's current "xsave struct". 2484 /// 2485 fn get_xsave(&self) -> cpu::Result<XsaveState> { 2486 Ok(self 2487 .fd 2488 .lock() 2489 .unwrap() 2490 .get_xsave() 2491 .map_err(|e| cpu::HypervisorCpuError::GetXsaveState(e.into()))? 2492 .into()) 2493 } 2494 2495 #[cfg(target_arch = "x86_64")] 2496 /// 2497 /// X86 specific call that sets the vcpu's current "xsave struct". 2498 /// 2499 fn set_xsave(&self, xsave: &XsaveState) -> cpu::Result<()> { 2500 let xsave: kvm_bindings::kvm_xsave = (*xsave).clone().into(); 2501 self.fd 2502 .lock() 2503 .unwrap() 2504 .set_xsave(&xsave) 2505 .map_err(|e| cpu::HypervisorCpuError::SetXsaveState(e.into())) 2506 } 2507 2508 #[cfg(target_arch = "x86_64")] 2509 /// 2510 /// X86 specific call that returns the vcpu's current "xcrs". 2511 /// 2512 fn get_xcrs(&self) -> cpu::Result<ExtendedControlRegisters> { 2513 self.fd 2514 .lock() 2515 .unwrap() 2516 .get_xcrs() 2517 .map_err(|e| cpu::HypervisorCpuError::GetXcsr(e.into())) 2518 } 2519 2520 #[cfg(target_arch = "x86_64")] 2521 /// 2522 /// X86 specific call that sets the vcpu's current "xcrs". 2523 /// 2524 fn set_xcrs(&self, xcrs: &ExtendedControlRegisters) -> cpu::Result<()> { 2525 self.fd 2526 .lock() 2527 .unwrap() 2528 .set_xcrs(xcrs) 2529 .map_err(|e| cpu::HypervisorCpuError::SetXcsr(e.into())) 2530 } 2531 2532 #[cfg(target_arch = "x86_64")] 2533 /// 2534 /// Returns currently pending exceptions, interrupts, and NMIs as well as related 2535 /// states of the vcpu. 2536 /// 2537 fn get_vcpu_events(&self) -> cpu::Result<VcpuEvents> { 2538 self.fd 2539 .lock() 2540 .unwrap() 2541 .get_vcpu_events() 2542 .map_err(|e| cpu::HypervisorCpuError::GetVcpuEvents(e.into())) 2543 } 2544 2545 #[cfg(target_arch = "x86_64")] 2546 /// 2547 /// Sets pending exceptions, interrupts, and NMIs as well as related states 2548 /// of the vcpu. 2549 /// 2550 fn set_vcpu_events(&self, events: &VcpuEvents) -> cpu::Result<()> { 2551 self.fd 2552 .lock() 2553 .unwrap() 2554 .set_vcpu_events(events) 2555 .map_err(|e| cpu::HypervisorCpuError::SetVcpuEvents(e.into())) 2556 } 2557 } 2558