1 // Copyright © 2019 Intel Corporation 2 // 3 // SPDX-License-Identifier: Apache-2.0 OR BSD-3-Clause 4 // 5 // Copyright © 2020, Microsoft Corporation 6 // 7 // Copyright 2018-2019 CrowdStrike, Inc. 8 // 9 // 10 11 #[cfg(target_arch = "aarch64")] 12 use crate::aarch64::gic::KvmGicV3Its; 13 #[cfg(target_arch = "aarch64")] 14 pub use crate::aarch64::{ 15 check_required_kvm_extensions, gic::Gicv3ItsState as GicState, is_system_register, VcpuInit, 16 VcpuKvmState, 17 }; 18 #[cfg(target_arch = "aarch64")] 19 use crate::arch::aarch64::gic::{Vgic, VgicConfig}; 20 use crate::cpu; 21 use crate::hypervisor; 22 use crate::vec_with_array_field; 23 use crate::vm::{self, InterruptSourceConfig, VmOps}; 24 use crate::HypervisorType; 25 #[cfg(target_arch = "aarch64")] 26 use crate::{arm64_core_reg_id, offset_of}; 27 use kvm_ioctls::{NoDatamatch, VcpuFd, VmFd}; 28 use std::any::Any; 29 use std::collections::HashMap; 30 #[cfg(target_arch = "x86_64")] 31 use std::fs::File; 32 #[cfg(target_arch = "x86_64")] 33 use std::os::unix::io::AsRawFd; 34 #[cfg(feature = "tdx")] 35 use std::os::unix::io::RawFd; 36 use std::result; 37 #[cfg(target_arch = "x86_64")] 38 use std::sync::atomic::{AtomicBool, Ordering}; 39 use std::sync::Mutex; 40 use std::sync::{Arc, RwLock}; 41 use vmm_sys_util::eventfd::EventFd; 42 // x86_64 dependencies 43 #[cfg(target_arch = "x86_64")] 44 pub mod x86_64; 45 #[cfg(target_arch = "x86_64")] 46 use crate::arch::x86::{ 47 CpuIdEntry, FpuState, LapicState, MsrEntry, SpecialRegisters, XsaveState, NUM_IOAPIC_PINS, 48 }; 49 #[cfg(target_arch = "x86_64")] 50 use crate::ClockData; 51 use crate::StandardRegisters; 52 use crate::{ 53 CpuState, IoEventAddress, IrqRoutingEntry, MpState, UserMemoryRegion, 54 USER_MEMORY_REGION_LOG_DIRTY, USER_MEMORY_REGION_READ, USER_MEMORY_REGION_WRITE, 55 }; 56 #[cfg(target_arch = "aarch64")] 57 use aarch64::{RegList, Register}; 58 #[cfg(target_arch = "x86_64")] 59 use kvm_bindings::{ 60 kvm_enable_cap, kvm_msr_entry, MsrList, KVM_CAP_HYPERV_SYNIC, KVM_CAP_SPLIT_IRQCHIP, 61 KVM_GUESTDBG_USE_HW_BP, 62 }; 63 #[cfg(target_arch = "x86_64")] 64 use x86_64::check_required_kvm_extensions; 65 #[cfg(target_arch = "x86_64")] 66 pub use x86_64::{CpuId, ExtendedControlRegisters, MsrEntries, VcpuKvmState}; 67 // aarch64 dependencies 68 #[cfg(target_arch = "aarch64")] 69 pub mod aarch64; 70 pub use kvm_bindings; 71 pub use kvm_bindings::{ 72 kvm_clock_data, kvm_create_device, kvm_device_type_KVM_DEV_TYPE_VFIO, kvm_guest_debug, 73 kvm_irq_routing, kvm_irq_routing_entry, kvm_mp_state, kvm_userspace_memory_region, 74 KVM_GUESTDBG_ENABLE, KVM_GUESTDBG_SINGLESTEP, KVM_IRQ_ROUTING_IRQCHIP, KVM_IRQ_ROUTING_MSI, 75 KVM_MEM_LOG_DIRTY_PAGES, KVM_MEM_READONLY, KVM_MSI_VALID_DEVID, 76 }; 77 #[cfg(target_arch = "aarch64")] 78 use kvm_bindings::{ 79 kvm_regs, user_fpsimd_state, user_pt_regs, KVM_GUESTDBG_USE_HW, KVM_NR_SPSR, KVM_REG_ARM64, 80 KVM_REG_ARM64_SYSREG, KVM_REG_ARM64_SYSREG_CRM_MASK, KVM_REG_ARM64_SYSREG_CRN_MASK, 81 KVM_REG_ARM64_SYSREG_OP0_MASK, KVM_REG_ARM64_SYSREG_OP1_MASK, KVM_REG_ARM64_SYSREG_OP2_MASK, 82 KVM_REG_ARM_CORE, KVM_REG_SIZE_U128, KVM_REG_SIZE_U32, KVM_REG_SIZE_U64, 83 }; 84 #[cfg(feature = "tdx")] 85 use kvm_bindings::{kvm_run__bindgen_ty_1, KVMIO}; 86 pub use kvm_ioctls; 87 pub use kvm_ioctls::{Cap, Kvm}; 88 #[cfg(target_arch = "aarch64")] 89 use std::mem; 90 use thiserror::Error; 91 use vfio_ioctls::VfioDeviceFd; 92 #[cfg(feature = "tdx")] 93 use vmm_sys_util::{ioctl::ioctl_with_val, ioctl_ioc_nr, ioctl_iowr_nr}; 94 /// 95 /// Export generically-named wrappers of kvm-bindings for Unix-based platforms 96 /// 97 pub use { 98 kvm_bindings::kvm_create_device as CreateDevice, kvm_bindings::kvm_device_attr as DeviceAttr, 99 kvm_bindings::kvm_run, kvm_bindings::kvm_vcpu_events as VcpuEvents, kvm_ioctls::VcpuExit, 100 }; 101 102 #[cfg(target_arch = "x86_64")] 103 const KVM_CAP_SGX_ATTRIBUTE: u32 = 196; 104 105 #[cfg(target_arch = "x86_64")] 106 use vmm_sys_util::ioctl_io_nr; 107 108 #[cfg(all(not(feature = "tdx"), target_arch = "x86_64"))] 109 use vmm_sys_util::ioctl_ioc_nr; 110 111 #[cfg(target_arch = "x86_64")] 112 ioctl_io_nr!(KVM_NMI, kvm_bindings::KVMIO, 0x9a); 113 114 #[cfg(feature = "tdx")] 115 const KVM_EXIT_TDX: u32 = 50; 116 #[cfg(feature = "tdx")] 117 const TDG_VP_VMCALL_GET_QUOTE: u64 = 0x10002; 118 #[cfg(feature = "tdx")] 119 const TDG_VP_VMCALL_SETUP_EVENT_NOTIFY_INTERRUPT: u64 = 0x10004; 120 #[cfg(feature = "tdx")] 121 const TDG_VP_VMCALL_SUCCESS: u64 = 0; 122 #[cfg(feature = "tdx")] 123 const TDG_VP_VMCALL_INVALID_OPERAND: u64 = 0x8000000000000000; 124 125 #[cfg(feature = "tdx")] 126 ioctl_iowr_nr!(KVM_MEMORY_ENCRYPT_OP, KVMIO, 0xba, std::os::raw::c_ulong); 127 128 #[cfg(feature = "tdx")] 129 #[repr(u32)] 130 enum TdxCommand { 131 Capabilities = 0, 132 InitVm, 133 InitVcpu, 134 InitMemRegion, 135 Finalize, 136 } 137 138 #[cfg(feature = "tdx")] 139 pub enum TdxExitDetails { 140 GetQuote, 141 SetupEventNotifyInterrupt, 142 } 143 144 #[cfg(feature = "tdx")] 145 pub enum TdxExitStatus { 146 Success, 147 InvalidOperand, 148 } 149 150 #[cfg(feature = "tdx")] 151 const TDX_MAX_NR_CPUID_CONFIGS: usize = 6; 152 153 #[cfg(feature = "tdx")] 154 #[repr(C)] 155 #[derive(Debug, Default)] 156 pub struct TdxCpuidConfig { 157 pub leaf: u32, 158 pub sub_leaf: u32, 159 pub eax: u32, 160 pub ebx: u32, 161 pub ecx: u32, 162 pub edx: u32, 163 } 164 165 #[cfg(feature = "tdx")] 166 #[repr(C)] 167 #[derive(Debug, Default)] 168 pub struct TdxCapabilities { 169 pub attrs_fixed0: u64, 170 pub attrs_fixed1: u64, 171 pub xfam_fixed0: u64, 172 pub xfam_fixed1: u64, 173 pub nr_cpuid_configs: u32, 174 pub padding: u32, 175 pub cpuid_configs: [TdxCpuidConfig; TDX_MAX_NR_CPUID_CONFIGS], 176 } 177 178 #[cfg(feature = "tdx")] 179 #[derive(Copy, Clone)] 180 pub struct KvmTdxExit { 181 pub type_: u32, 182 pub pad: u32, 183 pub u: KvmTdxExitU, 184 } 185 186 #[cfg(feature = "tdx")] 187 #[repr(C)] 188 #[derive(Copy, Clone)] 189 pub union KvmTdxExitU { 190 pub vmcall: KvmTdxExitVmcall, 191 } 192 193 #[cfg(feature = "tdx")] 194 #[repr(C)] 195 #[derive(Debug, Default, Copy, Clone, PartialEq)] 196 pub struct KvmTdxExitVmcall { 197 pub type_: u64, 198 pub subfunction: u64, 199 pub reg_mask: u64, 200 pub in_r12: u64, 201 pub in_r13: u64, 202 pub in_r14: u64, 203 pub in_r15: u64, 204 pub in_rbx: u64, 205 pub in_rdi: u64, 206 pub in_rsi: u64, 207 pub in_r8: u64, 208 pub in_r9: u64, 209 pub in_rdx: u64, 210 pub status_code: u64, 211 pub out_r11: u64, 212 pub out_r12: u64, 213 pub out_r13: u64, 214 pub out_r14: u64, 215 pub out_r15: u64, 216 pub out_rbx: u64, 217 pub out_rdi: u64, 218 pub out_rsi: u64, 219 pub out_r8: u64, 220 pub out_r9: u64, 221 pub out_rdx: u64, 222 } 223 224 impl From<kvm_userspace_memory_region> for UserMemoryRegion { 225 fn from(region: kvm_userspace_memory_region) -> Self { 226 let mut flags = USER_MEMORY_REGION_READ; 227 if region.flags & KVM_MEM_READONLY == 0 { 228 flags |= USER_MEMORY_REGION_WRITE; 229 } 230 if region.flags & KVM_MEM_LOG_DIRTY_PAGES != 0 { 231 flags |= USER_MEMORY_REGION_LOG_DIRTY; 232 } 233 234 UserMemoryRegion { 235 slot: region.slot, 236 guest_phys_addr: region.guest_phys_addr, 237 memory_size: region.memory_size, 238 userspace_addr: region.userspace_addr, 239 flags, 240 } 241 } 242 } 243 244 impl From<UserMemoryRegion> for kvm_userspace_memory_region { 245 fn from(region: UserMemoryRegion) -> Self { 246 assert!( 247 region.flags & USER_MEMORY_REGION_READ != 0, 248 "KVM mapped memory is always readable" 249 ); 250 251 let mut flags = 0; 252 if region.flags & USER_MEMORY_REGION_WRITE == 0 { 253 flags |= KVM_MEM_READONLY; 254 } 255 if region.flags & USER_MEMORY_REGION_LOG_DIRTY != 0 { 256 flags |= KVM_MEM_LOG_DIRTY_PAGES; 257 } 258 259 kvm_userspace_memory_region { 260 slot: region.slot, 261 guest_phys_addr: region.guest_phys_addr, 262 memory_size: region.memory_size, 263 userspace_addr: region.userspace_addr, 264 flags, 265 } 266 } 267 } 268 269 impl From<kvm_mp_state> for MpState { 270 fn from(s: kvm_mp_state) -> Self { 271 MpState::Kvm(s) 272 } 273 } 274 275 impl From<MpState> for kvm_mp_state { 276 fn from(ms: MpState) -> Self { 277 match ms { 278 MpState::Kvm(s) => s, 279 /* Needed in case other hypervisors are enabled */ 280 #[allow(unreachable_patterns)] 281 _ => panic!("CpuState is not valid"), 282 } 283 } 284 } 285 286 impl From<kvm_ioctls::IoEventAddress> for IoEventAddress { 287 fn from(a: kvm_ioctls::IoEventAddress) -> Self { 288 match a { 289 kvm_ioctls::IoEventAddress::Pio(x) => Self::Pio(x), 290 kvm_ioctls::IoEventAddress::Mmio(x) => Self::Mmio(x), 291 } 292 } 293 } 294 295 impl From<IoEventAddress> for kvm_ioctls::IoEventAddress { 296 fn from(a: IoEventAddress) -> Self { 297 match a { 298 IoEventAddress::Pio(x) => Self::Pio(x), 299 IoEventAddress::Mmio(x) => Self::Mmio(x), 300 } 301 } 302 } 303 304 impl From<VcpuKvmState> for CpuState { 305 fn from(s: VcpuKvmState) -> Self { 306 CpuState::Kvm(s) 307 } 308 } 309 310 impl From<CpuState> for VcpuKvmState { 311 fn from(s: CpuState) -> Self { 312 match s { 313 CpuState::Kvm(s) => s, 314 /* Needed in case other hypervisors are enabled */ 315 #[allow(unreachable_patterns)] 316 _ => panic!("CpuState is not valid"), 317 } 318 } 319 } 320 321 #[cfg(target_arch = "x86_64")] 322 impl From<kvm_clock_data> for ClockData { 323 fn from(d: kvm_clock_data) -> Self { 324 ClockData::Kvm(d) 325 } 326 } 327 328 #[cfg(target_arch = "x86_64")] 329 impl From<ClockData> for kvm_clock_data { 330 fn from(ms: ClockData) -> Self { 331 match ms { 332 ClockData::Kvm(s) => s, 333 /* Needed in case other hypervisors are enabled */ 334 #[allow(unreachable_patterns)] 335 _ => panic!("CpuState is not valid"), 336 } 337 } 338 } 339 340 impl From<kvm_bindings::kvm_regs> for crate::StandardRegisters { 341 fn from(s: kvm_bindings::kvm_regs) -> Self { 342 crate::StandardRegisters::Kvm(s) 343 } 344 } 345 346 impl From<crate::StandardRegisters> for kvm_bindings::kvm_regs { 347 fn from(e: crate::StandardRegisters) -> Self { 348 match e { 349 crate::StandardRegisters::Kvm(e) => e, 350 /* Needed in case other hypervisors are enabled */ 351 #[allow(unreachable_patterns)] 352 _ => panic!("StandardRegisters are not valid"), 353 } 354 } 355 } 356 357 impl From<kvm_irq_routing_entry> for IrqRoutingEntry { 358 fn from(s: kvm_irq_routing_entry) -> Self { 359 IrqRoutingEntry::Kvm(s) 360 } 361 } 362 363 impl From<IrqRoutingEntry> for kvm_irq_routing_entry { 364 fn from(e: IrqRoutingEntry) -> Self { 365 match e { 366 IrqRoutingEntry::Kvm(e) => e, 367 /* Needed in case other hypervisors are enabled */ 368 #[allow(unreachable_patterns)] 369 _ => panic!("IrqRoutingEntry is not valid"), 370 } 371 } 372 } 373 374 struct KvmDirtyLogSlot { 375 slot: u32, 376 guest_phys_addr: u64, 377 memory_size: u64, 378 userspace_addr: u64, 379 } 380 381 /// Wrapper over KVM VM ioctls. 382 pub struct KvmVm { 383 fd: Arc<VmFd>, 384 #[cfg(target_arch = "x86_64")] 385 msrs: Vec<MsrEntry>, 386 dirty_log_slots: Arc<RwLock<HashMap<u32, KvmDirtyLogSlot>>>, 387 } 388 389 impl KvmVm { 390 /// 391 /// Creates an emulated device in the kernel. 392 /// 393 /// See the documentation for `KVM_CREATE_DEVICE`. 394 fn create_device(&self, device: &mut CreateDevice) -> vm::Result<vfio_ioctls::VfioDeviceFd> { 395 let device_fd = self 396 .fd 397 .create_device(device) 398 .map_err(|e| vm::HypervisorVmError::CreateDevice(e.into()))?; 399 Ok(VfioDeviceFd::new_from_kvm(device_fd)) 400 } 401 /// Checks if a particular `Cap` is available. 402 pub fn check_extension(&self, c: Cap) -> bool { 403 self.fd.check_extension(c) 404 } 405 } 406 407 /// Implementation of Vm trait for KVM 408 /// 409 /// # Examples 410 /// 411 /// ``` 412 /// # use hypervisor::kvm::KvmHypervisor; 413 /// # use std::sync::Arc; 414 /// let kvm = KvmHypervisor::new().unwrap(); 415 /// let hypervisor = Arc::new(kvm); 416 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed"); 417 /// ``` 418 impl vm::Vm for KvmVm { 419 #[cfg(target_arch = "x86_64")] 420 /// 421 /// Sets the address of the one-page region in the VM's address space. 422 /// 423 fn set_identity_map_address(&self, address: u64) -> vm::Result<()> { 424 self.fd 425 .set_identity_map_address(address) 426 .map_err(|e| vm::HypervisorVmError::SetIdentityMapAddress(e.into())) 427 } 428 429 #[cfg(target_arch = "x86_64")] 430 /// 431 /// Sets the address of the three-page region in the VM's address space. 432 /// 433 fn set_tss_address(&self, offset: usize) -> vm::Result<()> { 434 self.fd 435 .set_tss_address(offset) 436 .map_err(|e| vm::HypervisorVmError::SetTssAddress(e.into())) 437 } 438 439 /// 440 /// Creates an in-kernel interrupt controller. 441 /// 442 fn create_irq_chip(&self) -> vm::Result<()> { 443 self.fd 444 .create_irq_chip() 445 .map_err(|e| vm::HypervisorVmError::CreateIrq(e.into())) 446 } 447 448 /// 449 /// Registers an event that will, when signaled, trigger the `gsi` IRQ. 450 /// 451 fn register_irqfd(&self, fd: &EventFd, gsi: u32) -> vm::Result<()> { 452 self.fd 453 .register_irqfd(fd, gsi) 454 .map_err(|e| vm::HypervisorVmError::RegisterIrqFd(e.into())) 455 } 456 457 /// 458 /// Unregisters an event that will, when signaled, trigger the `gsi` IRQ. 459 /// 460 fn unregister_irqfd(&self, fd: &EventFd, gsi: u32) -> vm::Result<()> { 461 self.fd 462 .unregister_irqfd(fd, gsi) 463 .map_err(|e| vm::HypervisorVmError::UnregisterIrqFd(e.into())) 464 } 465 466 /// 467 /// Creates a VcpuFd object from a vcpu RawFd. 468 /// 469 fn create_vcpu( 470 &self, 471 id: u8, 472 vm_ops: Option<Arc<dyn VmOps>>, 473 ) -> vm::Result<Arc<dyn cpu::Vcpu>> { 474 let fd = self 475 .fd 476 .create_vcpu(id as u64) 477 .map_err(|e| vm::HypervisorVmError::CreateVcpu(e.into()))?; 478 let vcpu = KvmVcpu { 479 fd: Arc::new(Mutex::new(fd)), 480 #[cfg(target_arch = "x86_64")] 481 msrs: self.msrs.clone(), 482 vm_ops, 483 #[cfg(target_arch = "x86_64")] 484 hyperv_synic: AtomicBool::new(false), 485 }; 486 Ok(Arc::new(vcpu)) 487 } 488 489 #[cfg(target_arch = "aarch64")] 490 /// 491 /// Creates a virtual GIC device. 492 /// 493 fn create_vgic(&self, config: VgicConfig) -> vm::Result<Arc<Mutex<dyn Vgic>>> { 494 let gic_device = KvmGicV3Its::new(self, config) 495 .map_err(|e| vm::HypervisorVmError::CreateVgic(anyhow!("Vgic error {:?}", e)))?; 496 Ok(Arc::new(Mutex::new(gic_device))) 497 } 498 499 /// 500 /// Registers an event to be signaled whenever a certain address is written to. 501 /// 502 fn register_ioevent( 503 &self, 504 fd: &EventFd, 505 addr: &IoEventAddress, 506 datamatch: Option<vm::DataMatch>, 507 ) -> vm::Result<()> { 508 let addr = &kvm_ioctls::IoEventAddress::from(*addr); 509 if let Some(dm) = datamatch { 510 match dm { 511 vm::DataMatch::DataMatch32(kvm_dm32) => self 512 .fd 513 .register_ioevent(fd, addr, kvm_dm32) 514 .map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into())), 515 vm::DataMatch::DataMatch64(kvm_dm64) => self 516 .fd 517 .register_ioevent(fd, addr, kvm_dm64) 518 .map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into())), 519 } 520 } else { 521 self.fd 522 .register_ioevent(fd, addr, NoDatamatch) 523 .map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into())) 524 } 525 } 526 527 /// 528 /// Unregisters an event from a certain address it has been previously registered to. 529 /// 530 fn unregister_ioevent(&self, fd: &EventFd, addr: &IoEventAddress) -> vm::Result<()> { 531 let addr = &kvm_ioctls::IoEventAddress::from(*addr); 532 self.fd 533 .unregister_ioevent(fd, addr, NoDatamatch) 534 .map_err(|e| vm::HypervisorVmError::UnregisterIoEvent(e.into())) 535 } 536 537 /// 538 /// Constructs a routing entry 539 /// 540 fn make_routing_entry(&self, gsi: u32, config: &InterruptSourceConfig) -> IrqRoutingEntry { 541 match &config { 542 InterruptSourceConfig::MsiIrq(cfg) => { 543 let mut kvm_route = kvm_irq_routing_entry { 544 gsi, 545 type_: KVM_IRQ_ROUTING_MSI, 546 ..Default::default() 547 }; 548 549 kvm_route.u.msi.address_lo = cfg.low_addr; 550 kvm_route.u.msi.address_hi = cfg.high_addr; 551 kvm_route.u.msi.data = cfg.data; 552 553 if self.check_extension(crate::kvm::Cap::MsiDevid) { 554 // On AArch64, there is limitation on the range of the 'devid', 555 // it cannot be greater than 65536 (the max of u16). 556 // 557 // BDF cannot be used directly, because 'segment' is in high 558 // 16 bits. The layout of the u32 BDF is: 559 // |---- 16 bits ----|-- 8 bits --|-- 5 bits --|-- 3 bits --| 560 // | segment | bus | device | function | 561 // 562 // Now that we support 1 bus only in a segment, we can build a 563 // 'devid' by replacing the 'bus' bits with the low 8 bits of 564 // 'segment' data. 565 // This way we can resolve the range checking problem and give 566 // different `devid` to all the devices. Limitation is that at 567 // most 256 segments can be supported. 568 // 569 let modified_devid = (cfg.devid & 0x00ff_0000) >> 8 | cfg.devid & 0xff; 570 571 kvm_route.flags = KVM_MSI_VALID_DEVID; 572 kvm_route.u.msi.__bindgen_anon_1.devid = modified_devid; 573 } 574 kvm_route.into() 575 } 576 InterruptSourceConfig::LegacyIrq(cfg) => { 577 let mut kvm_route = kvm_irq_routing_entry { 578 gsi, 579 type_: KVM_IRQ_ROUTING_IRQCHIP, 580 ..Default::default() 581 }; 582 kvm_route.u.irqchip.irqchip = cfg.irqchip; 583 kvm_route.u.irqchip.pin = cfg.pin; 584 585 kvm_route.into() 586 } 587 } 588 } 589 590 /// 591 /// Sets the GSI routing table entries, overwriting any previously set 592 /// entries, as per the `KVM_SET_GSI_ROUTING` ioctl. 593 /// 594 fn set_gsi_routing(&self, entries: &[IrqRoutingEntry]) -> vm::Result<()> { 595 let mut irq_routing = 596 vec_with_array_field::<kvm_irq_routing, kvm_irq_routing_entry>(entries.len()); 597 irq_routing[0].nr = entries.len() as u32; 598 irq_routing[0].flags = 0; 599 let entries: Vec<kvm_irq_routing_entry> = entries 600 .iter() 601 .map(|entry| match entry { 602 IrqRoutingEntry::Kvm(e) => *e, 603 #[allow(unreachable_patterns)] 604 _ => panic!("IrqRoutingEntry type is wrong"), 605 }) 606 .collect(); 607 608 // SAFETY: irq_routing initialized with entries.len() and now it is being turned into 609 // entries_slice with entries.len() again. It is guaranteed to be large enough to hold 610 // everything from entries. 611 unsafe { 612 let entries_slice: &mut [kvm_irq_routing_entry] = 613 irq_routing[0].entries.as_mut_slice(entries.len()); 614 entries_slice.copy_from_slice(&entries); 615 } 616 617 self.fd 618 .set_gsi_routing(&irq_routing[0]) 619 .map_err(|e| vm::HypervisorVmError::SetGsiRouting(e.into())) 620 } 621 622 /// 623 /// Creates a memory region structure that can be used with {create/remove}_user_memory_region 624 /// 625 fn make_user_memory_region( 626 &self, 627 slot: u32, 628 guest_phys_addr: u64, 629 memory_size: u64, 630 userspace_addr: u64, 631 readonly: bool, 632 log_dirty_pages: bool, 633 ) -> UserMemoryRegion { 634 kvm_userspace_memory_region { 635 slot, 636 guest_phys_addr, 637 memory_size, 638 userspace_addr, 639 flags: if readonly { KVM_MEM_READONLY } else { 0 } 640 | if log_dirty_pages { 641 KVM_MEM_LOG_DIRTY_PAGES 642 } else { 643 0 644 }, 645 } 646 .into() 647 } 648 649 /// 650 /// Creates a guest physical memory region. 651 /// 652 fn create_user_memory_region(&self, user_memory_region: UserMemoryRegion) -> vm::Result<()> { 653 let mut region: kvm_userspace_memory_region = user_memory_region.into(); 654 655 if (region.flags & KVM_MEM_LOG_DIRTY_PAGES) != 0 { 656 if (region.flags & KVM_MEM_READONLY) != 0 { 657 return Err(vm::HypervisorVmError::CreateUserMemory(anyhow!( 658 "Error creating regions with both 'dirty-pages-log' and 'read-only'." 659 ))); 660 } 661 662 // Keep track of the regions that need dirty pages log 663 self.dirty_log_slots.write().unwrap().insert( 664 region.slot, 665 KvmDirtyLogSlot { 666 slot: region.slot, 667 guest_phys_addr: region.guest_phys_addr, 668 memory_size: region.memory_size, 669 userspace_addr: region.userspace_addr, 670 }, 671 ); 672 673 // Always create guest physical memory region without `KVM_MEM_LOG_DIRTY_PAGES`. 674 // For regions that need this flag, dirty pages log will be turned on in `start_dirty_log`. 675 region.flags = 0; 676 } 677 678 // SAFETY: Safe because guest regions are guaranteed not to overlap. 679 unsafe { 680 self.fd 681 .set_user_memory_region(region) 682 .map_err(|e| vm::HypervisorVmError::CreateUserMemory(e.into())) 683 } 684 } 685 686 /// 687 /// Removes a guest physical memory region. 688 /// 689 fn remove_user_memory_region(&self, user_memory_region: UserMemoryRegion) -> vm::Result<()> { 690 let mut region: kvm_userspace_memory_region = user_memory_region.into(); 691 692 // Remove the corresponding entry from "self.dirty_log_slots" if needed 693 self.dirty_log_slots.write().unwrap().remove(®ion.slot); 694 695 // Setting the size to 0 means "remove" 696 region.memory_size = 0; 697 // SAFETY: Safe because guest regions are guaranteed not to overlap. 698 unsafe { 699 self.fd 700 .set_user_memory_region(region) 701 .map_err(|e| vm::HypervisorVmError::RemoveUserMemory(e.into())) 702 } 703 } 704 705 /// 706 /// Returns the preferred CPU target type which can be emulated by KVM on underlying host. 707 /// 708 #[cfg(target_arch = "aarch64")] 709 fn get_preferred_target(&self, kvi: &mut VcpuInit) -> vm::Result<()> { 710 self.fd 711 .get_preferred_target(kvi) 712 .map_err(|e| vm::HypervisorVmError::GetPreferredTarget(e.into())) 713 } 714 715 #[cfg(target_arch = "x86_64")] 716 fn enable_split_irq(&self) -> vm::Result<()> { 717 // Create split irqchip 718 // Only the local APIC is emulated in kernel, both PICs and IOAPIC 719 // are not. 720 let mut cap = kvm_enable_cap { 721 cap: KVM_CAP_SPLIT_IRQCHIP, 722 ..Default::default() 723 }; 724 cap.args[0] = NUM_IOAPIC_PINS as u64; 725 self.fd 726 .enable_cap(&cap) 727 .map_err(|e| vm::HypervisorVmError::EnableSplitIrq(e.into()))?; 728 Ok(()) 729 } 730 731 #[cfg(target_arch = "x86_64")] 732 fn enable_sgx_attribute(&self, file: File) -> vm::Result<()> { 733 let mut cap = kvm_enable_cap { 734 cap: KVM_CAP_SGX_ATTRIBUTE, 735 ..Default::default() 736 }; 737 cap.args[0] = file.as_raw_fd() as u64; 738 self.fd 739 .enable_cap(&cap) 740 .map_err(|e| vm::HypervisorVmError::EnableSgxAttribute(e.into()))?; 741 Ok(()) 742 } 743 744 /// Retrieve guest clock. 745 #[cfg(target_arch = "x86_64")] 746 fn get_clock(&self) -> vm::Result<ClockData> { 747 Ok(self 748 .fd 749 .get_clock() 750 .map_err(|e| vm::HypervisorVmError::GetClock(e.into()))? 751 .into()) 752 } 753 754 /// Set guest clock. 755 #[cfg(target_arch = "x86_64")] 756 fn set_clock(&self, data: &ClockData) -> vm::Result<()> { 757 let data = (*data).into(); 758 self.fd 759 .set_clock(&data) 760 .map_err(|e| vm::HypervisorVmError::SetClock(e.into())) 761 } 762 763 /// Create a device that is used for passthrough 764 fn create_passthrough_device(&self) -> vm::Result<VfioDeviceFd> { 765 let mut vfio_dev = kvm_create_device { 766 type_: kvm_device_type_KVM_DEV_TYPE_VFIO, 767 fd: 0, 768 flags: 0, 769 }; 770 771 self.create_device(&mut vfio_dev) 772 .map_err(|e| vm::HypervisorVmError::CreatePassthroughDevice(e.into())) 773 } 774 775 /// 776 /// Start logging dirty pages 777 /// 778 fn start_dirty_log(&self) -> vm::Result<()> { 779 let dirty_log_slots = self.dirty_log_slots.read().unwrap(); 780 for (_, s) in dirty_log_slots.iter() { 781 let region = kvm_userspace_memory_region { 782 slot: s.slot, 783 guest_phys_addr: s.guest_phys_addr, 784 memory_size: s.memory_size, 785 userspace_addr: s.userspace_addr, 786 flags: KVM_MEM_LOG_DIRTY_PAGES, 787 }; 788 // SAFETY: Safe because guest regions are guaranteed not to overlap. 789 unsafe { 790 self.fd 791 .set_user_memory_region(region) 792 .map_err(|e| vm::HypervisorVmError::StartDirtyLog(e.into()))?; 793 } 794 } 795 796 Ok(()) 797 } 798 799 /// 800 /// Stop logging dirty pages 801 /// 802 fn stop_dirty_log(&self) -> vm::Result<()> { 803 let dirty_log_slots = self.dirty_log_slots.read().unwrap(); 804 for (_, s) in dirty_log_slots.iter() { 805 let region = kvm_userspace_memory_region { 806 slot: s.slot, 807 guest_phys_addr: s.guest_phys_addr, 808 memory_size: s.memory_size, 809 userspace_addr: s.userspace_addr, 810 flags: 0, 811 }; 812 // SAFETY: Safe because guest regions are guaranteed not to overlap. 813 unsafe { 814 self.fd 815 .set_user_memory_region(region) 816 .map_err(|e| vm::HypervisorVmError::StartDirtyLog(e.into()))?; 817 } 818 } 819 820 Ok(()) 821 } 822 823 /// 824 /// Get dirty pages bitmap (one bit per page) 825 /// 826 fn get_dirty_log(&self, slot: u32, _base_gpa: u64, memory_size: u64) -> vm::Result<Vec<u64>> { 827 self.fd 828 .get_dirty_log(slot, memory_size as usize) 829 .map_err(|e| vm::HypervisorVmError::GetDirtyLog(e.into())) 830 } 831 832 /// 833 /// Initialize TDX for this VM 834 /// 835 #[cfg(feature = "tdx")] 836 fn tdx_init(&self, cpuid: &[CpuIdEntry], max_vcpus: u32) -> vm::Result<()> { 837 const TDX_ATTR_SEPT_VE_DISABLE: usize = 28; 838 839 let mut cpuid: Vec<kvm_bindings::kvm_cpuid_entry2> = 840 cpuid.iter().map(|e| (*e).into()).collect(); 841 cpuid.resize(256, kvm_bindings::kvm_cpuid_entry2::default()); 842 843 #[repr(C)] 844 struct TdxInitVm { 845 attributes: u64, 846 max_vcpus: u32, 847 padding: u32, 848 mrconfigid: [u64; 6], 849 mrowner: [u64; 6], 850 mrownerconfig: [u64; 6], 851 cpuid_nent: u32, 852 cpuid_padding: u32, 853 cpuid_entries: [kvm_bindings::kvm_cpuid_entry2; 256], 854 } 855 let data = TdxInitVm { 856 attributes: 1 << TDX_ATTR_SEPT_VE_DISABLE, 857 max_vcpus, 858 padding: 0, 859 mrconfigid: [0; 6], 860 mrowner: [0; 6], 861 mrownerconfig: [0; 6], 862 cpuid_nent: cpuid.len() as u32, 863 cpuid_padding: 0, 864 cpuid_entries: cpuid.as_slice().try_into().unwrap(), 865 }; 866 867 tdx_command( 868 &self.fd.as_raw_fd(), 869 TdxCommand::InitVm, 870 0, 871 &data as *const _ as u64, 872 ) 873 .map_err(vm::HypervisorVmError::InitializeTdx) 874 } 875 876 /// 877 /// Finalize the TDX setup for this VM 878 /// 879 #[cfg(feature = "tdx")] 880 fn tdx_finalize(&self) -> vm::Result<()> { 881 tdx_command(&self.fd.as_raw_fd(), TdxCommand::Finalize, 0, 0) 882 .map_err(vm::HypervisorVmError::FinalizeTdx) 883 } 884 885 /// 886 /// Initialize memory regions for the TDX VM 887 /// 888 #[cfg(feature = "tdx")] 889 fn tdx_init_memory_region( 890 &self, 891 host_address: u64, 892 guest_address: u64, 893 size: u64, 894 measure: bool, 895 ) -> vm::Result<()> { 896 #[repr(C)] 897 struct TdxInitMemRegion { 898 host_address: u64, 899 guest_address: u64, 900 pages: u64, 901 } 902 let data = TdxInitMemRegion { 903 host_address, 904 guest_address, 905 pages: size / 4096, 906 }; 907 908 tdx_command( 909 &self.fd.as_raw_fd(), 910 TdxCommand::InitMemRegion, 911 u32::from(measure), 912 &data as *const _ as u64, 913 ) 914 .map_err(vm::HypervisorVmError::InitMemRegionTdx) 915 } 916 917 /// Downcast to the underlying KvmVm type 918 fn as_any(&self) -> &dyn Any { 919 self 920 } 921 } 922 923 #[cfg(feature = "tdx")] 924 fn tdx_command( 925 fd: &RawFd, 926 command: TdxCommand, 927 flags: u32, 928 data: u64, 929 ) -> std::result::Result<(), std::io::Error> { 930 #[repr(C)] 931 struct TdxIoctlCmd { 932 command: TdxCommand, 933 flags: u32, 934 data: u64, 935 error: u64, 936 unused: u64, 937 } 938 let cmd = TdxIoctlCmd { 939 command, 940 flags, 941 data, 942 error: 0, 943 unused: 0, 944 }; 945 // SAFETY: FFI call. All input parameters are valid. 946 let ret = unsafe { 947 ioctl_with_val( 948 fd, 949 KVM_MEMORY_ENCRYPT_OP(), 950 &cmd as *const TdxIoctlCmd as std::os::raw::c_ulong, 951 ) 952 }; 953 954 if ret < 0 { 955 return Err(std::io::Error::last_os_error()); 956 } 957 Ok(()) 958 } 959 960 /// Wrapper over KVM system ioctls. 961 pub struct KvmHypervisor { 962 kvm: Kvm, 963 } 964 965 impl KvmHypervisor { 966 #[cfg(target_arch = "x86_64")] 967 /// 968 /// Retrieve the list of MSRs supported by the hypervisor. 969 /// 970 fn get_msr_list(&self) -> hypervisor::Result<MsrList> { 971 self.kvm 972 .get_msr_index_list() 973 .map_err(|e| hypervisor::HypervisorError::GetMsrList(e.into())) 974 } 975 } 976 977 /// Enum for KVM related error 978 #[derive(Debug, Error)] 979 pub enum KvmError { 980 #[error("Capability missing: {0:?}")] 981 CapabilityMissing(Cap), 982 } 983 984 pub type KvmResult<T> = result::Result<T, KvmError>; 985 986 impl KvmHypervisor { 987 /// Create a hypervisor based on Kvm 988 #[allow(clippy::new_ret_no_self)] 989 pub fn new() -> hypervisor::Result<Arc<dyn hypervisor::Hypervisor>> { 990 let kvm_obj = Kvm::new().map_err(|e| hypervisor::HypervisorError::VmCreate(e.into()))?; 991 let api_version = kvm_obj.get_api_version(); 992 993 if api_version != kvm_bindings::KVM_API_VERSION as i32 { 994 return Err(hypervisor::HypervisorError::IncompatibleApiVersion); 995 } 996 997 Ok(Arc::new(KvmHypervisor { kvm: kvm_obj })) 998 } 999 1000 /// Check if the hypervisor is available 1001 pub fn is_available() -> hypervisor::Result<bool> { 1002 match std::fs::metadata("/dev/kvm") { 1003 Ok(_) => Ok(true), 1004 Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(false), 1005 Err(err) => Err(hypervisor::HypervisorError::HypervisorAvailableCheck( 1006 err.into(), 1007 )), 1008 } 1009 } 1010 } 1011 1012 /// Implementation of Hypervisor trait for KVM 1013 /// 1014 /// # Examples 1015 /// 1016 /// ``` 1017 /// # use hypervisor::kvm::KvmHypervisor; 1018 /// # use std::sync::Arc; 1019 /// let kvm = KvmHypervisor::new().unwrap(); 1020 /// let hypervisor = Arc::new(kvm); 1021 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed"); 1022 /// ``` 1023 impl hypervisor::Hypervisor for KvmHypervisor { 1024 /// 1025 /// Returns the type of the hypervisor 1026 /// 1027 fn hypervisor_type(&self) -> HypervisorType { 1028 HypervisorType::Kvm 1029 } 1030 1031 /// Create a KVM vm object of a specific VM type and return the object as Vm trait object 1032 /// 1033 /// # Examples 1034 /// 1035 /// ``` 1036 /// # use hypervisor::kvm::KvmHypervisor; 1037 /// use hypervisor::kvm::KvmVm; 1038 /// let hypervisor = KvmHypervisor::new().unwrap(); 1039 /// let vm = hypervisor.create_vm_with_type(0).unwrap(); 1040 /// ``` 1041 fn create_vm_with_type(&self, vm_type: u64) -> hypervisor::Result<Arc<dyn vm::Vm>> { 1042 let fd: VmFd; 1043 loop { 1044 match self.kvm.create_vm_with_type(vm_type) { 1045 Ok(res) => fd = res, 1046 Err(e) => { 1047 if e.errno() == libc::EINTR { 1048 // If the error returned is EINTR, which means the 1049 // ioctl has been interrupted, we have to retry as 1050 // this can't be considered as a regular error. 1051 continue; 1052 } else { 1053 return Err(hypervisor::HypervisorError::VmCreate(e.into())); 1054 } 1055 } 1056 } 1057 break; 1058 } 1059 1060 let vm_fd = Arc::new(fd); 1061 1062 #[cfg(target_arch = "x86_64")] 1063 { 1064 let msr_list = self.get_msr_list()?; 1065 let num_msrs = msr_list.as_fam_struct_ref().nmsrs as usize; 1066 let mut msrs: Vec<MsrEntry> = vec![ 1067 MsrEntry { 1068 ..Default::default() 1069 }; 1070 num_msrs 1071 ]; 1072 let indices = msr_list.as_slice(); 1073 for (pos, index) in indices.iter().enumerate() { 1074 msrs[pos].index = *index; 1075 } 1076 1077 Ok(Arc::new(KvmVm { 1078 fd: vm_fd, 1079 msrs, 1080 dirty_log_slots: Arc::new(RwLock::new(HashMap::new())), 1081 })) 1082 } 1083 1084 #[cfg(target_arch = "aarch64")] 1085 { 1086 Ok(Arc::new(KvmVm { 1087 fd: vm_fd, 1088 dirty_log_slots: Arc::new(RwLock::new(HashMap::new())), 1089 })) 1090 } 1091 } 1092 1093 /// Create a KVM vm object and return the object as Vm trait object 1094 /// 1095 /// # Examples 1096 /// 1097 /// ``` 1098 /// # use hypervisor::kvm::KvmHypervisor; 1099 /// use hypervisor::kvm::KvmVm; 1100 /// let hypervisor = KvmHypervisor::new().unwrap(); 1101 /// let vm = hypervisor.create_vm().unwrap(); 1102 /// ``` 1103 fn create_vm(&self) -> hypervisor::Result<Arc<dyn vm::Vm>> { 1104 #[allow(unused_mut)] 1105 let mut vm_type: u64 = 0; // Create with default platform type 1106 1107 // When KVM supports Cap::ArmVmIPASize, it is better to get the IPA 1108 // size from the host and use that when creating the VM, which may 1109 // avoid unnecessary VM creation failures. 1110 #[cfg(target_arch = "aarch64")] 1111 if self.kvm.check_extension(Cap::ArmVmIPASize) { 1112 vm_type = self.kvm.get_host_ipa_limit().try_into().unwrap(); 1113 } 1114 1115 self.create_vm_with_type(vm_type) 1116 } 1117 1118 fn check_required_extensions(&self) -> hypervisor::Result<()> { 1119 check_required_kvm_extensions(&self.kvm) 1120 .map_err(|e| hypervisor::HypervisorError::CheckExtensions(e.into())) 1121 } 1122 1123 #[cfg(target_arch = "x86_64")] 1124 /// 1125 /// X86 specific call to get the system supported CPUID values. 1126 /// 1127 fn get_supported_cpuid(&self) -> hypervisor::Result<Vec<CpuIdEntry>> { 1128 let kvm_cpuid = self 1129 .kvm 1130 .get_supported_cpuid(kvm_bindings::KVM_MAX_CPUID_ENTRIES) 1131 .map_err(|e| hypervisor::HypervisorError::GetCpuId(e.into()))?; 1132 1133 let v = kvm_cpuid.as_slice().iter().map(|e| (*e).into()).collect(); 1134 1135 Ok(v) 1136 } 1137 1138 #[cfg(target_arch = "aarch64")] 1139 /// 1140 /// Retrieve AArch64 host maximum IPA size supported by KVM. 1141 /// 1142 fn get_host_ipa_limit(&self) -> i32 { 1143 self.kvm.get_host_ipa_limit() 1144 } 1145 1146 /// 1147 /// Retrieve TDX capabilities 1148 /// 1149 #[cfg(feature = "tdx")] 1150 fn tdx_capabilities(&self) -> hypervisor::Result<TdxCapabilities> { 1151 let data = TdxCapabilities { 1152 nr_cpuid_configs: TDX_MAX_NR_CPUID_CONFIGS as u32, 1153 ..Default::default() 1154 }; 1155 1156 tdx_command( 1157 &self.kvm.as_raw_fd(), 1158 TdxCommand::Capabilities, 1159 0, 1160 &data as *const _ as u64, 1161 ) 1162 .map_err(|e| hypervisor::HypervisorError::TdxCapabilities(e.into()))?; 1163 1164 Ok(data) 1165 } 1166 1167 /// 1168 /// Get the number of supported hardware breakpoints 1169 /// 1170 fn get_guest_debug_hw_bps(&self) -> usize { 1171 #[cfg(target_arch = "x86_64")] 1172 { 1173 4 1174 } 1175 #[cfg(target_arch = "aarch64")] 1176 { 1177 self.kvm.get_guest_debug_hw_bps() as usize 1178 } 1179 } 1180 1181 /// Get maximum number of vCPUs 1182 fn get_max_vcpus(&self) -> u32 { 1183 self.kvm.get_max_vcpus().min(u32::MAX as usize) as u32 1184 } 1185 } 1186 1187 /// Vcpu struct for KVM 1188 pub struct KvmVcpu { 1189 fd: Arc<Mutex<VcpuFd>>, 1190 #[cfg(target_arch = "x86_64")] 1191 msrs: Vec<MsrEntry>, 1192 vm_ops: Option<Arc<dyn vm::VmOps>>, 1193 #[cfg(target_arch = "x86_64")] 1194 hyperv_synic: AtomicBool, 1195 } 1196 1197 /// Implementation of Vcpu trait for KVM 1198 /// 1199 /// # Examples 1200 /// 1201 /// ``` 1202 /// # use hypervisor::kvm::KvmHypervisor; 1203 /// # use std::sync::Arc; 1204 /// let kvm = KvmHypervisor::new().unwrap(); 1205 /// let hypervisor = Arc::new(kvm); 1206 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed"); 1207 /// let vcpu = vm.create_vcpu(0, None).unwrap(); 1208 /// ``` 1209 impl cpu::Vcpu for KvmVcpu { 1210 /// 1211 /// Returns StandardRegisters with default value set 1212 /// 1213 #[cfg(target_arch = "x86_64")] 1214 fn create_standard_regs(&self) -> StandardRegisters { 1215 kvm_bindings::kvm_regs::default().into() 1216 } 1217 #[cfg(target_arch = "x86_64")] 1218 /// 1219 /// Returns the vCPU general purpose registers. 1220 /// 1221 fn get_regs(&self) -> cpu::Result<StandardRegisters> { 1222 Ok(self 1223 .fd 1224 .lock() 1225 .unwrap() 1226 .get_regs() 1227 .map_err(|e| cpu::HypervisorCpuError::GetStandardRegs(e.into()))? 1228 .into()) 1229 } 1230 1231 /// 1232 /// Returns the vCPU general purpose registers. 1233 /// The `KVM_GET_REGS` ioctl is not available on AArch64, `KVM_GET_ONE_REG` 1234 /// is used to get registers one by one. 1235 /// 1236 #[cfg(target_arch = "aarch64")] 1237 fn get_regs(&self) -> cpu::Result<StandardRegisters> { 1238 let mut state = kvm_regs::default(); 1239 let mut off = offset_of!(user_pt_regs, regs); 1240 // There are 31 user_pt_regs: 1241 // https://elixir.free-electrons.com/linux/v4.14.174/source/arch/arm64/include/uapi/asm/ptrace.h#L72 1242 // These actually are the general-purpose registers of the Armv8-a 1243 // architecture (i.e x0-x30 if used as a 64bit register or w0-30 when used as a 32bit register). 1244 for i in 0..31 { 1245 let mut bytes = [0_u8; 8]; 1246 self.fd 1247 .lock() 1248 .unwrap() 1249 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), &mut bytes) 1250 .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?; 1251 state.regs.regs[i] = u64::from_le_bytes(bytes); 1252 off += std::mem::size_of::<u64>(); 1253 } 1254 1255 // We are now entering the "Other register" section of the ARMv8-a architecture. 1256 // First one, stack pointer. 1257 let off = offset_of!(user_pt_regs, sp); 1258 let mut bytes = [0_u8; 8]; 1259 self.fd 1260 .lock() 1261 .unwrap() 1262 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), &mut bytes) 1263 .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?; 1264 state.regs.sp = u64::from_le_bytes(bytes); 1265 1266 // Second one, the program counter. 1267 let off = offset_of!(user_pt_regs, pc); 1268 let mut bytes = [0_u8; 8]; 1269 self.fd 1270 .lock() 1271 .unwrap() 1272 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), &mut bytes) 1273 .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?; 1274 state.regs.pc = u64::from_le_bytes(bytes); 1275 1276 // Next is the processor state. 1277 let off = offset_of!(user_pt_regs, pstate); 1278 let mut bytes = [0_u8; 8]; 1279 self.fd 1280 .lock() 1281 .unwrap() 1282 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), &mut bytes) 1283 .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?; 1284 state.regs.pstate = u64::from_le_bytes(bytes); 1285 1286 // The stack pointer associated with EL1 1287 let off = offset_of!(kvm_regs, sp_el1); 1288 let mut bytes = [0_u8; 8]; 1289 self.fd 1290 .lock() 1291 .unwrap() 1292 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), &mut bytes) 1293 .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?; 1294 state.sp_el1 = u64::from_le_bytes(bytes); 1295 1296 // Exception Link Register for EL1, when taking an exception to EL1, this register 1297 // holds the address to which to return afterwards. 1298 let off = offset_of!(kvm_regs, elr_el1); 1299 let mut bytes = [0_u8; 8]; 1300 self.fd 1301 .lock() 1302 .unwrap() 1303 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), &mut bytes) 1304 .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?; 1305 state.elr_el1 = u64::from_le_bytes(bytes); 1306 1307 // Saved Program Status Registers, there are 5 of them used in the kernel. 1308 let mut off = offset_of!(kvm_regs, spsr); 1309 for i in 0..KVM_NR_SPSR as usize { 1310 let mut bytes = [0_u8; 8]; 1311 self.fd 1312 .lock() 1313 .unwrap() 1314 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), &mut bytes) 1315 .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?; 1316 state.spsr[i] = u64::from_le_bytes(bytes); 1317 off += std::mem::size_of::<u64>(); 1318 } 1319 1320 // Now moving on to floating point registers which are stored in the user_fpsimd_state in the kernel: 1321 // https://elixir.free-electrons.com/linux/v4.9.62/source/arch/arm64/include/uapi/asm/kvm.h#L53 1322 let mut off = offset_of!(kvm_regs, fp_regs) + offset_of!(user_fpsimd_state, vregs); 1323 for i in 0..32 { 1324 let mut bytes = [0_u8; 16]; 1325 self.fd 1326 .lock() 1327 .unwrap() 1328 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U128, off), &mut bytes) 1329 .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?; 1330 state.fp_regs.vregs[i] = u128::from_le_bytes(bytes); 1331 off += mem::size_of::<u128>(); 1332 } 1333 1334 // Floating-point Status Register 1335 let off = offset_of!(kvm_regs, fp_regs) + offset_of!(user_fpsimd_state, fpsr); 1336 let mut bytes = [0_u8; 4]; 1337 self.fd 1338 .lock() 1339 .unwrap() 1340 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U32, off), &mut bytes) 1341 .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?; 1342 state.fp_regs.fpsr = u32::from_le_bytes(bytes); 1343 1344 // Floating-point Control Register 1345 let off = offset_of!(kvm_regs, fp_regs) + offset_of!(user_fpsimd_state, fpcr); 1346 let mut bytes = [0_u8; 4]; 1347 self.fd 1348 .lock() 1349 .unwrap() 1350 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U32, off), &mut bytes) 1351 .map_err(|e| cpu::HypervisorCpuError::GetCoreRegister(e.into()))?; 1352 state.fp_regs.fpcr = u32::from_le_bytes(bytes); 1353 Ok(state.into()) 1354 } 1355 1356 #[cfg(target_arch = "x86_64")] 1357 /// 1358 /// Sets the vCPU general purpose registers using the `KVM_SET_REGS` ioctl. 1359 /// 1360 fn set_regs(&self, regs: &StandardRegisters) -> cpu::Result<()> { 1361 let regs = (*regs).into(); 1362 self.fd 1363 .lock() 1364 .unwrap() 1365 .set_regs(®s) 1366 .map_err(|e| cpu::HypervisorCpuError::SetStandardRegs(e.into())) 1367 } 1368 1369 /// 1370 /// Sets the vCPU general purpose registers. 1371 /// The `KVM_SET_REGS` ioctl is not available on AArch64, `KVM_SET_ONE_REG` 1372 /// is used to set registers one by one. 1373 /// 1374 #[cfg(target_arch = "aarch64")] 1375 fn set_regs(&self, state: &StandardRegisters) -> cpu::Result<()> { 1376 // The function follows the exact identical order from `state`. Look there 1377 // for some additional info on registers. 1378 let kvm_regs_state: kvm_regs = (*state).into(); 1379 let mut off = offset_of!(user_pt_regs, regs); 1380 for i in 0..31 { 1381 self.fd 1382 .lock() 1383 .unwrap() 1384 .set_one_reg( 1385 arm64_core_reg_id!(KVM_REG_SIZE_U64, off), 1386 &kvm_regs_state.regs.regs[i].to_le_bytes(), 1387 ) 1388 .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?; 1389 off += std::mem::size_of::<u64>(); 1390 } 1391 1392 let off = offset_of!(user_pt_regs, sp); 1393 self.fd 1394 .lock() 1395 .unwrap() 1396 .set_one_reg( 1397 arm64_core_reg_id!(KVM_REG_SIZE_U64, off), 1398 &kvm_regs_state.regs.sp.to_le_bytes(), 1399 ) 1400 .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?; 1401 1402 let off = offset_of!(user_pt_regs, pc); 1403 self.fd 1404 .lock() 1405 .unwrap() 1406 .set_one_reg( 1407 arm64_core_reg_id!(KVM_REG_SIZE_U64, off), 1408 &kvm_regs_state.regs.pc.to_le_bytes(), 1409 ) 1410 .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?; 1411 1412 let off = offset_of!(user_pt_regs, pstate); 1413 self.fd 1414 .lock() 1415 .unwrap() 1416 .set_one_reg( 1417 arm64_core_reg_id!(KVM_REG_SIZE_U64, off), 1418 &kvm_regs_state.regs.pstate.to_le_bytes(), 1419 ) 1420 .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?; 1421 1422 let off = offset_of!(kvm_regs, sp_el1); 1423 self.fd 1424 .lock() 1425 .unwrap() 1426 .set_one_reg( 1427 arm64_core_reg_id!(KVM_REG_SIZE_U64, off), 1428 &kvm_regs_state.sp_el1.to_le_bytes(), 1429 ) 1430 .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?; 1431 1432 let off = offset_of!(kvm_regs, elr_el1); 1433 self.fd 1434 .lock() 1435 .unwrap() 1436 .set_one_reg( 1437 arm64_core_reg_id!(KVM_REG_SIZE_U64, off), 1438 &kvm_regs_state.elr_el1.to_le_bytes(), 1439 ) 1440 .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?; 1441 1442 let mut off = offset_of!(kvm_regs, spsr); 1443 for i in 0..KVM_NR_SPSR as usize { 1444 self.fd 1445 .lock() 1446 .unwrap() 1447 .set_one_reg( 1448 arm64_core_reg_id!(KVM_REG_SIZE_U64, off), 1449 &kvm_regs_state.spsr[i].to_le_bytes(), 1450 ) 1451 .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?; 1452 off += std::mem::size_of::<u64>(); 1453 } 1454 1455 let mut off = offset_of!(kvm_regs, fp_regs) + offset_of!(user_fpsimd_state, vregs); 1456 for i in 0..32 { 1457 self.fd 1458 .lock() 1459 .unwrap() 1460 .set_one_reg( 1461 arm64_core_reg_id!(KVM_REG_SIZE_U128, off), 1462 &kvm_regs_state.fp_regs.vregs[i].to_le_bytes(), 1463 ) 1464 .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?; 1465 off += mem::size_of::<u128>(); 1466 } 1467 1468 let off = offset_of!(kvm_regs, fp_regs) + offset_of!(user_fpsimd_state, fpsr); 1469 self.fd 1470 .lock() 1471 .unwrap() 1472 .set_one_reg( 1473 arm64_core_reg_id!(KVM_REG_SIZE_U32, off), 1474 &kvm_regs_state.fp_regs.fpsr.to_le_bytes(), 1475 ) 1476 .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?; 1477 1478 let off = offset_of!(kvm_regs, fp_regs) + offset_of!(user_fpsimd_state, fpcr); 1479 self.fd 1480 .lock() 1481 .unwrap() 1482 .set_one_reg( 1483 arm64_core_reg_id!(KVM_REG_SIZE_U32, off), 1484 &kvm_regs_state.fp_regs.fpcr.to_le_bytes(), 1485 ) 1486 .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?; 1487 Ok(()) 1488 } 1489 1490 #[cfg(target_arch = "x86_64")] 1491 /// 1492 /// Returns the vCPU special registers. 1493 /// 1494 fn get_sregs(&self) -> cpu::Result<SpecialRegisters> { 1495 Ok(self 1496 .fd 1497 .lock() 1498 .unwrap() 1499 .get_sregs() 1500 .map_err(|e| cpu::HypervisorCpuError::GetSpecialRegs(e.into()))? 1501 .into()) 1502 } 1503 1504 #[cfg(target_arch = "x86_64")] 1505 /// 1506 /// Sets the vCPU special registers using the `KVM_SET_SREGS` ioctl. 1507 /// 1508 fn set_sregs(&self, sregs: &SpecialRegisters) -> cpu::Result<()> { 1509 let sregs = (*sregs).into(); 1510 self.fd 1511 .lock() 1512 .unwrap() 1513 .set_sregs(&sregs) 1514 .map_err(|e| cpu::HypervisorCpuError::SetSpecialRegs(e.into())) 1515 } 1516 1517 #[cfg(target_arch = "x86_64")] 1518 /// 1519 /// Returns the floating point state (FPU) from the vCPU. 1520 /// 1521 fn get_fpu(&self) -> cpu::Result<FpuState> { 1522 Ok(self 1523 .fd 1524 .lock() 1525 .unwrap() 1526 .get_fpu() 1527 .map_err(|e| cpu::HypervisorCpuError::GetFloatingPointRegs(e.into()))? 1528 .into()) 1529 } 1530 1531 #[cfg(target_arch = "x86_64")] 1532 /// 1533 /// Set the floating point state (FPU) of a vCPU using the `KVM_SET_FPU` ioctl. 1534 /// 1535 fn set_fpu(&self, fpu: &FpuState) -> cpu::Result<()> { 1536 let fpu: kvm_bindings::kvm_fpu = (*fpu).clone().into(); 1537 self.fd 1538 .lock() 1539 .unwrap() 1540 .set_fpu(&fpu) 1541 .map_err(|e| cpu::HypervisorCpuError::SetFloatingPointRegs(e.into())) 1542 } 1543 1544 #[cfg(target_arch = "x86_64")] 1545 /// 1546 /// X86 specific call to setup the CPUID registers. 1547 /// 1548 fn set_cpuid2(&self, cpuid: &[CpuIdEntry]) -> cpu::Result<()> { 1549 let cpuid: Vec<kvm_bindings::kvm_cpuid_entry2> = 1550 cpuid.iter().map(|e| (*e).into()).collect(); 1551 let kvm_cpuid = <CpuId>::from_entries(&cpuid) 1552 .map_err(|_| cpu::HypervisorCpuError::SetCpuid(anyhow!("failed to create CpuId")))?; 1553 1554 self.fd 1555 .lock() 1556 .unwrap() 1557 .set_cpuid2(&kvm_cpuid) 1558 .map_err(|e| cpu::HypervisorCpuError::SetCpuid(e.into())) 1559 } 1560 1561 #[cfg(target_arch = "x86_64")] 1562 /// 1563 /// X86 specific call to enable HyperV SynIC 1564 /// 1565 fn enable_hyperv_synic(&self) -> cpu::Result<()> { 1566 // Update the information about Hyper-V SynIC being enabled and 1567 // emulated as it will influence later which MSRs should be saved. 1568 self.hyperv_synic.store(true, Ordering::Release); 1569 1570 let cap = kvm_enable_cap { 1571 cap: KVM_CAP_HYPERV_SYNIC, 1572 ..Default::default() 1573 }; 1574 self.fd 1575 .lock() 1576 .unwrap() 1577 .enable_cap(&cap) 1578 .map_err(|e| cpu::HypervisorCpuError::EnableHyperVSyncIc(e.into())) 1579 } 1580 1581 /// 1582 /// X86 specific call to retrieve the CPUID registers. 1583 /// 1584 #[cfg(target_arch = "x86_64")] 1585 fn get_cpuid2(&self, num_entries: usize) -> cpu::Result<Vec<CpuIdEntry>> { 1586 let kvm_cpuid = self 1587 .fd 1588 .lock() 1589 .unwrap() 1590 .get_cpuid2(num_entries) 1591 .map_err(|e| cpu::HypervisorCpuError::GetCpuid(e.into()))?; 1592 1593 let v = kvm_cpuid.as_slice().iter().map(|e| (*e).into()).collect(); 1594 1595 Ok(v) 1596 } 1597 1598 #[cfg(target_arch = "x86_64")] 1599 /// 1600 /// Returns the state of the LAPIC (Local Advanced Programmable Interrupt Controller). 1601 /// 1602 fn get_lapic(&self) -> cpu::Result<LapicState> { 1603 Ok(self 1604 .fd 1605 .lock() 1606 .unwrap() 1607 .get_lapic() 1608 .map_err(|e| cpu::HypervisorCpuError::GetlapicState(e.into()))? 1609 .into()) 1610 } 1611 1612 #[cfg(target_arch = "x86_64")] 1613 /// 1614 /// Sets the state of the LAPIC (Local Advanced Programmable Interrupt Controller). 1615 /// 1616 fn set_lapic(&self, klapic: &LapicState) -> cpu::Result<()> { 1617 let klapic: kvm_bindings::kvm_lapic_state = (*klapic).clone().into(); 1618 self.fd 1619 .lock() 1620 .unwrap() 1621 .set_lapic(&klapic) 1622 .map_err(|e| cpu::HypervisorCpuError::SetLapicState(e.into())) 1623 } 1624 1625 #[cfg(target_arch = "x86_64")] 1626 /// 1627 /// Returns the model-specific registers (MSR) for this vCPU. 1628 /// 1629 fn get_msrs(&self, msrs: &mut Vec<MsrEntry>) -> cpu::Result<usize> { 1630 let kvm_msrs: Vec<kvm_msr_entry> = msrs.iter().map(|e| (*e).into()).collect(); 1631 let mut kvm_msrs = MsrEntries::from_entries(&kvm_msrs).unwrap(); 1632 let succ = self 1633 .fd 1634 .lock() 1635 .unwrap() 1636 .get_msrs(&mut kvm_msrs) 1637 .map_err(|e| cpu::HypervisorCpuError::GetMsrEntries(e.into()))?; 1638 1639 msrs[..succ].copy_from_slice( 1640 &kvm_msrs.as_slice()[..succ] 1641 .iter() 1642 .map(|e| (*e).into()) 1643 .collect::<Vec<MsrEntry>>(), 1644 ); 1645 1646 Ok(succ) 1647 } 1648 1649 #[cfg(target_arch = "x86_64")] 1650 /// 1651 /// Setup the model-specific registers (MSR) for this vCPU. 1652 /// Returns the number of MSR entries actually written. 1653 /// 1654 fn set_msrs(&self, msrs: &[MsrEntry]) -> cpu::Result<usize> { 1655 let kvm_msrs: Vec<kvm_msr_entry> = msrs.iter().map(|e| (*e).into()).collect(); 1656 let kvm_msrs = MsrEntries::from_entries(&kvm_msrs).unwrap(); 1657 self.fd 1658 .lock() 1659 .unwrap() 1660 .set_msrs(&kvm_msrs) 1661 .map_err(|e| cpu::HypervisorCpuError::SetMsrEntries(e.into())) 1662 } 1663 1664 /// 1665 /// Returns the vcpu's current "multiprocessing state". 1666 /// 1667 fn get_mp_state(&self) -> cpu::Result<MpState> { 1668 Ok(self 1669 .fd 1670 .lock() 1671 .unwrap() 1672 .get_mp_state() 1673 .map_err(|e| cpu::HypervisorCpuError::GetMpState(e.into()))? 1674 .into()) 1675 } 1676 1677 /// 1678 /// Sets the vcpu's current "multiprocessing state". 1679 /// 1680 fn set_mp_state(&self, mp_state: MpState) -> cpu::Result<()> { 1681 self.fd 1682 .lock() 1683 .unwrap() 1684 .set_mp_state(mp_state.into()) 1685 .map_err(|e| cpu::HypervisorCpuError::SetMpState(e.into())) 1686 } 1687 1688 #[cfg(target_arch = "x86_64")] 1689 /// 1690 /// Translates guest virtual address to guest physical address using the `KVM_TRANSLATE` ioctl. 1691 /// 1692 fn translate_gva(&self, gva: u64, _flags: u64) -> cpu::Result<(u64, u32)> { 1693 let tr = self 1694 .fd 1695 .lock() 1696 .unwrap() 1697 .translate_gva(gva) 1698 .map_err(|e| cpu::HypervisorCpuError::TranslateVirtualAddress(e.into()))?; 1699 // tr.valid is set if the GVA is mapped to valid GPA. 1700 match tr.valid { 1701 0 => Err(cpu::HypervisorCpuError::TranslateVirtualAddress(anyhow!( 1702 "Invalid GVA: {:#x}", 1703 gva 1704 ))), 1705 _ => Ok((tr.physical_address, 0)), 1706 } 1707 } 1708 1709 /// 1710 /// Triggers the running of the current virtual CPU returning an exit reason. 1711 /// 1712 fn run(&self) -> std::result::Result<cpu::VmExit, cpu::HypervisorCpuError> { 1713 match self.fd.lock().unwrap().run() { 1714 Ok(run) => match run { 1715 #[cfg(target_arch = "x86_64")] 1716 VcpuExit::IoIn(addr, data) => { 1717 if let Some(vm_ops) = &self.vm_ops { 1718 return vm_ops 1719 .pio_read(addr.into(), data) 1720 .map(|_| cpu::VmExit::Ignore) 1721 .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into())); 1722 } 1723 1724 Ok(cpu::VmExit::Ignore) 1725 } 1726 #[cfg(target_arch = "x86_64")] 1727 VcpuExit::IoOut(addr, data) => { 1728 if let Some(vm_ops) = &self.vm_ops { 1729 return vm_ops 1730 .pio_write(addr.into(), data) 1731 .map(|_| cpu::VmExit::Ignore) 1732 .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into())); 1733 } 1734 1735 Ok(cpu::VmExit::Ignore) 1736 } 1737 #[cfg(target_arch = "x86_64")] 1738 VcpuExit::IoapicEoi(vector) => Ok(cpu::VmExit::IoapicEoi(vector)), 1739 #[cfg(target_arch = "x86_64")] 1740 VcpuExit::Shutdown | VcpuExit::Hlt => Ok(cpu::VmExit::Reset), 1741 1742 #[cfg(target_arch = "aarch64")] 1743 VcpuExit::SystemEvent(event_type, flags) => { 1744 use kvm_bindings::{KVM_SYSTEM_EVENT_RESET, KVM_SYSTEM_EVENT_SHUTDOWN}; 1745 // On Aarch64, when the VM is shutdown, run() returns 1746 // VcpuExit::SystemEvent with reason KVM_SYSTEM_EVENT_SHUTDOWN 1747 if event_type == KVM_SYSTEM_EVENT_RESET { 1748 Ok(cpu::VmExit::Reset) 1749 } else if event_type == KVM_SYSTEM_EVENT_SHUTDOWN { 1750 Ok(cpu::VmExit::Shutdown) 1751 } else { 1752 Err(cpu::HypervisorCpuError::RunVcpu(anyhow!( 1753 "Unexpected system event with type 0x{:x}, flags 0x{:x?}", 1754 event_type, 1755 flags 1756 ))) 1757 } 1758 } 1759 1760 VcpuExit::MmioRead(addr, data) => { 1761 if let Some(vm_ops) = &self.vm_ops { 1762 return vm_ops 1763 .mmio_read(addr, data) 1764 .map(|_| cpu::VmExit::Ignore) 1765 .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into())); 1766 } 1767 1768 Ok(cpu::VmExit::Ignore) 1769 } 1770 VcpuExit::MmioWrite(addr, data) => { 1771 if let Some(vm_ops) = &self.vm_ops { 1772 return vm_ops 1773 .mmio_write(addr, data) 1774 .map(|_| cpu::VmExit::Ignore) 1775 .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into())); 1776 } 1777 1778 Ok(cpu::VmExit::Ignore) 1779 } 1780 VcpuExit::Hyperv => Ok(cpu::VmExit::Hyperv), 1781 #[cfg(feature = "tdx")] 1782 VcpuExit::Unsupported(KVM_EXIT_TDX) => Ok(cpu::VmExit::Tdx), 1783 VcpuExit::Debug(_) => Ok(cpu::VmExit::Debug), 1784 1785 r => Err(cpu::HypervisorCpuError::RunVcpu(anyhow!( 1786 "Unexpected exit reason on vcpu run: {:?}", 1787 r 1788 ))), 1789 }, 1790 1791 Err(ref e) => match e.errno() { 1792 libc::EAGAIN | libc::EINTR => Ok(cpu::VmExit::Ignore), 1793 _ => Err(cpu::HypervisorCpuError::RunVcpu(anyhow!( 1794 "VCPU error {:?}", 1795 e 1796 ))), 1797 }, 1798 } 1799 } 1800 1801 #[cfg(target_arch = "x86_64")] 1802 /// 1803 /// Let the guest know that it has been paused, which prevents from 1804 /// potential soft lockups when being resumed. 1805 /// 1806 fn notify_guest_clock_paused(&self) -> cpu::Result<()> { 1807 if let Err(e) = self.fd.lock().unwrap().kvmclock_ctrl() { 1808 // Linux kernel returns -EINVAL if the PV clock isn't yet initialised 1809 // which could be because we're still in firmware or the guest doesn't 1810 // use KVM clock. 1811 if e.errno() != libc::EINVAL { 1812 return Err(cpu::HypervisorCpuError::NotifyGuestClockPaused(e.into())); 1813 } 1814 } 1815 1816 Ok(()) 1817 } 1818 1819 /// 1820 /// Sets debug registers to set hardware breakpoints and/or enable single step. 1821 /// 1822 fn set_guest_debug( 1823 &self, 1824 addrs: &[vm_memory::GuestAddress], 1825 singlestep: bool, 1826 ) -> cpu::Result<()> { 1827 let mut dbg = kvm_guest_debug { 1828 #[cfg(target_arch = "x86_64")] 1829 control: KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP, 1830 #[cfg(target_arch = "aarch64")] 1831 control: KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW, 1832 ..Default::default() 1833 }; 1834 if singlestep { 1835 dbg.control |= KVM_GUESTDBG_SINGLESTEP; 1836 } 1837 1838 // Set the debug registers. 1839 // Here we assume that the number of addresses do not exceed what 1840 // `Hypervisor::get_guest_debug_hw_bps()` specifies. 1841 #[cfg(target_arch = "x86_64")] 1842 { 1843 // Set bits 9 and 10. 1844 // bit 9: GE (global exact breakpoint enable) flag. 1845 // bit 10: always 1. 1846 dbg.arch.debugreg[7] = 0x0600; 1847 1848 for (i, addr) in addrs.iter().enumerate() { 1849 dbg.arch.debugreg[i] = addr.0; 1850 // Set global breakpoint enable flag 1851 dbg.arch.debugreg[7] |= 2 << (i * 2); 1852 } 1853 } 1854 #[cfg(target_arch = "aarch64")] 1855 { 1856 for (i, addr) in addrs.iter().enumerate() { 1857 // DBGBCR_EL1 (Debug Breakpoint Control Registers, D13.3.2): 1858 // bit 0: 1 (Enabled) 1859 // bit 1~2: 0b11 (PMC = EL1/EL0) 1860 // bit 5~8: 0b1111 (BAS = AArch64) 1861 // others: 0 1862 dbg.arch.dbg_bcr[i] = 0b1u64 | 0b110u64 | 0b1_1110_0000u64; 1863 // DBGBVR_EL1 (Debug Breakpoint Value Registers, D13.3.3): 1864 // bit 2~52: VA[2:52] 1865 dbg.arch.dbg_bvr[i] = (!0u64 >> 11) & addr.0; 1866 } 1867 } 1868 self.fd 1869 .lock() 1870 .unwrap() 1871 .set_guest_debug(&dbg) 1872 .map_err(|e| cpu::HypervisorCpuError::SetDebugRegs(e.into())) 1873 } 1874 1875 #[cfg(target_arch = "aarch64")] 1876 fn vcpu_init(&self, kvi: &VcpuInit) -> cpu::Result<()> { 1877 self.fd 1878 .lock() 1879 .unwrap() 1880 .vcpu_init(kvi) 1881 .map_err(|e| cpu::HypervisorCpuError::VcpuInit(e.into())) 1882 } 1883 1884 #[cfg(target_arch = "aarch64")] 1885 fn vcpu_finalize(&self, feature: i32) -> cpu::Result<()> { 1886 self.fd 1887 .lock() 1888 .unwrap() 1889 .vcpu_finalize(&feature) 1890 .map_err(|e| cpu::HypervisorCpuError::VcpuFinalize(e.into())) 1891 } 1892 1893 /// 1894 /// Gets a list of the guest registers that are supported for the 1895 /// KVM_GET_ONE_REG/KVM_SET_ONE_REG calls. 1896 /// 1897 #[cfg(target_arch = "aarch64")] 1898 fn get_reg_list(&self, reg_list: &mut RegList) -> cpu::Result<()> { 1899 self.fd 1900 .lock() 1901 .unwrap() 1902 .get_reg_list(reg_list) 1903 .map_err(|e| cpu::HypervisorCpuError::GetRegList(e.into())) 1904 } 1905 1906 /// 1907 /// Gets the value of a system register 1908 /// 1909 #[cfg(target_arch = "aarch64")] 1910 fn get_sys_reg(&self, sys_reg: u32) -> cpu::Result<u64> { 1911 // 1912 // Arm Architecture Reference Manual defines the encoding of 1913 // AArch64 system registers, see 1914 // https://developer.arm.com/documentation/ddi0487 (chapter D12). 1915 // While KVM defines another ID for each AArch64 system register, 1916 // which is used in calling `KVM_G/SET_ONE_REG` to access a system 1917 // register of a guest. 1918 // A mapping exists between the Arm standard encoding and the KVM ID. 1919 // This function takes the standard u32 ID as input parameter, converts 1920 // it to the corresponding KVM ID, and call `KVM_GET_ONE_REG` API to 1921 // get the value of the system parameter. 1922 // 1923 let id: u64 = KVM_REG_ARM64 1924 | KVM_REG_SIZE_U64 1925 | KVM_REG_ARM64_SYSREG as u64 1926 | ((((sys_reg) >> 5) 1927 & (KVM_REG_ARM64_SYSREG_OP0_MASK 1928 | KVM_REG_ARM64_SYSREG_OP1_MASK 1929 | KVM_REG_ARM64_SYSREG_CRN_MASK 1930 | KVM_REG_ARM64_SYSREG_CRM_MASK 1931 | KVM_REG_ARM64_SYSREG_OP2_MASK)) as u64); 1932 let mut bytes = [0_u8; 8]; 1933 self.fd 1934 .lock() 1935 .unwrap() 1936 .get_one_reg(id, &mut bytes) 1937 .map_err(|e| cpu::HypervisorCpuError::GetSysRegister(e.into()))?; 1938 Ok(u64::from_le_bytes(bytes)) 1939 } 1940 1941 /// 1942 /// Configure core registers for a given CPU. 1943 /// 1944 #[cfg(target_arch = "aarch64")] 1945 fn setup_regs(&self, cpu_id: u8, boot_ip: u64, fdt_start: u64) -> cpu::Result<()> { 1946 #[allow(non_upper_case_globals)] 1947 // PSR (Processor State Register) bits. 1948 // Taken from arch/arm64/include/uapi/asm/ptrace.h. 1949 const PSR_MODE_EL1h: u64 = 0x0000_0005; 1950 const PSR_F_BIT: u64 = 0x0000_0040; 1951 const PSR_I_BIT: u64 = 0x0000_0080; 1952 const PSR_A_BIT: u64 = 0x0000_0100; 1953 const PSR_D_BIT: u64 = 0x0000_0200; 1954 // Taken from arch/arm64/kvm/inject_fault.c. 1955 const PSTATE_FAULT_BITS_64: u64 = 1956 PSR_MODE_EL1h | PSR_A_BIT | PSR_F_BIT | PSR_I_BIT | PSR_D_BIT; 1957 1958 let kreg_off = offset_of!(kvm_regs, regs); 1959 1960 // Get the register index of the PSTATE (Processor State) register. 1961 let pstate = offset_of!(user_pt_regs, pstate) + kreg_off; 1962 self.fd 1963 .lock() 1964 .unwrap() 1965 .set_one_reg( 1966 arm64_core_reg_id!(KVM_REG_SIZE_U64, pstate), 1967 &PSTATE_FAULT_BITS_64.to_le_bytes(), 1968 ) 1969 .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?; 1970 1971 // Other vCPUs are powered off initially awaiting PSCI wakeup. 1972 if cpu_id == 0 { 1973 // Setting the PC (Processor Counter) to the current program address (kernel address). 1974 let pc = offset_of!(user_pt_regs, pc) + kreg_off; 1975 self.fd 1976 .lock() 1977 .unwrap() 1978 .set_one_reg( 1979 arm64_core_reg_id!(KVM_REG_SIZE_U64, pc), 1980 &boot_ip.to_le_bytes(), 1981 ) 1982 .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?; 1983 1984 // Last mandatory thing to set -> the address pointing to the FDT (also called DTB). 1985 // "The device tree blob (dtb) must be placed on an 8-byte boundary and must 1986 // not exceed 2 megabytes in size." -> https://www.kernel.org/doc/Documentation/arm64/booting.txt. 1987 // We are choosing to place it the end of DRAM. See `get_fdt_addr`. 1988 let regs0 = offset_of!(user_pt_regs, regs) + kreg_off; 1989 self.fd 1990 .lock() 1991 .unwrap() 1992 .set_one_reg( 1993 arm64_core_reg_id!(KVM_REG_SIZE_U64, regs0), 1994 &fdt_start.to_le_bytes(), 1995 ) 1996 .map_err(|e| cpu::HypervisorCpuError::SetCoreRegister(e.into()))?; 1997 } 1998 Ok(()) 1999 } 2000 2001 #[cfg(target_arch = "x86_64")] 2002 /// 2003 /// Get the current CPU state 2004 /// 2005 /// Ordering requirements: 2006 /// 2007 /// KVM_GET_MP_STATE calls kvm_apic_accept_events(), which might modify 2008 /// vCPU/LAPIC state. As such, it must be done before most everything 2009 /// else, otherwise we cannot restore everything and expect it to work. 2010 /// 2011 /// KVM_GET_VCPU_EVENTS/KVM_SET_VCPU_EVENTS is unsafe if other vCPUs are 2012 /// still running. 2013 /// 2014 /// KVM_GET_LAPIC may change state of LAPIC before returning it. 2015 /// 2016 /// GET_VCPU_EVENTS should probably be last to save. The code looks as 2017 /// it might as well be affected by internal state modifications of the 2018 /// GET ioctls. 2019 /// 2020 /// SREGS saves/restores a pending interrupt, similar to what 2021 /// VCPU_EVENTS also does. 2022 /// 2023 /// GET_MSRS requires a prepopulated data structure to do something 2024 /// meaningful. For SET_MSRS it will then contain good data. 2025 /// 2026 /// # Example 2027 /// 2028 /// ```rust 2029 /// # use hypervisor::kvm::KvmHypervisor; 2030 /// # use std::sync::Arc; 2031 /// let kvm = KvmHypervisor::new().unwrap(); 2032 /// let hv = Arc::new(kvm); 2033 /// let vm = hv.create_vm().expect("new VM fd creation failed"); 2034 /// vm.enable_split_irq().unwrap(); 2035 /// let vcpu = vm.create_vcpu(0, None).unwrap(); 2036 /// let state = vcpu.state().unwrap(); 2037 /// ``` 2038 fn state(&self) -> cpu::Result<CpuState> { 2039 let cpuid = self.get_cpuid2(kvm_bindings::KVM_MAX_CPUID_ENTRIES)?; 2040 let mp_state = self.get_mp_state()?.into(); 2041 let regs = self.get_regs()?; 2042 let sregs = self.get_sregs()?; 2043 let xsave = self.get_xsave()?; 2044 let xcrs = self.get_xcrs()?; 2045 let lapic_state = self.get_lapic()?; 2046 let fpu = self.get_fpu()?; 2047 2048 // Try to get all MSRs based on the list previously retrieved from KVM. 2049 // If the number of MSRs obtained from GET_MSRS is different from the 2050 // expected amount, we fallback onto a slower method by getting MSRs 2051 // by chunks. This is the only way to make sure we try to get as many 2052 // MSRs as possible, even if some MSRs are not supported. 2053 let mut msr_entries = self.msrs.clone(); 2054 2055 // Save extra MSRs if the Hyper-V synthetic interrupt controller is 2056 // emulated. 2057 if self.hyperv_synic.load(Ordering::Acquire) { 2058 let hyperv_synic_msrs = vec![ 2059 0x40000020, 0x40000021, 0x40000080, 0x40000081, 0x40000082, 0x40000083, 0x40000084, 2060 0x40000090, 0x40000091, 0x40000092, 0x40000093, 0x40000094, 0x40000095, 0x40000096, 2061 0x40000097, 0x40000098, 0x40000099, 0x4000009a, 0x4000009b, 0x4000009c, 0x4000009d, 2062 0x4000009e, 0x4000009f, 0x400000b0, 0x400000b1, 0x400000b2, 0x400000b3, 0x400000b4, 2063 0x400000b5, 0x400000b6, 0x400000b7, 2064 ]; 2065 for index in hyperv_synic_msrs { 2066 let msr = kvm_msr_entry { 2067 index, 2068 ..Default::default() 2069 }; 2070 msr_entries.push(msr.into()); 2071 } 2072 } 2073 2074 let expected_num_msrs = msr_entries.len(); 2075 let num_msrs = self.get_msrs(&mut msr_entries)?; 2076 let msrs = if num_msrs != expected_num_msrs { 2077 let mut faulty_msr_index = num_msrs; 2078 let mut msr_entries_tmp = msr_entries[..faulty_msr_index].to_vec(); 2079 2080 loop { 2081 warn!( 2082 "Detected faulty MSR 0x{:x} while getting MSRs", 2083 msr_entries[faulty_msr_index].index 2084 ); 2085 2086 // Skip the first bad MSR 2087 let start_pos = faulty_msr_index + 1; 2088 2089 let mut sub_msr_entries = msr_entries[start_pos..].to_vec(); 2090 let num_msrs = self.get_msrs(&mut sub_msr_entries)?; 2091 2092 msr_entries_tmp.extend(&sub_msr_entries[..num_msrs]); 2093 2094 if num_msrs == sub_msr_entries.len() { 2095 break; 2096 } 2097 2098 faulty_msr_index = start_pos + num_msrs; 2099 } 2100 2101 msr_entries_tmp 2102 } else { 2103 msr_entries 2104 }; 2105 2106 let vcpu_events = self.get_vcpu_events()?; 2107 let tsc_khz = self.tsc_khz()?; 2108 2109 Ok(VcpuKvmState { 2110 cpuid, 2111 msrs, 2112 vcpu_events, 2113 regs: regs.into(), 2114 sregs: sregs.into(), 2115 fpu, 2116 lapic_state, 2117 xsave, 2118 xcrs, 2119 mp_state, 2120 tsc_khz, 2121 } 2122 .into()) 2123 } 2124 2125 /// 2126 /// Get the current AArch64 CPU state 2127 /// 2128 #[cfg(target_arch = "aarch64")] 2129 fn state(&self) -> cpu::Result<CpuState> { 2130 let mut state = VcpuKvmState { 2131 mp_state: self.get_mp_state()?.into(), 2132 ..Default::default() 2133 }; 2134 // Get core registers 2135 state.core_regs = self.get_regs()?.into(); 2136 2137 // Get systerm register 2138 // Call KVM_GET_REG_LIST to get all registers available to the guest. 2139 // For ArmV8 there are around 500 registers. 2140 let mut sys_regs: Vec<Register> = Vec::new(); 2141 let mut reg_list = RegList::new(500).unwrap(); 2142 self.fd 2143 .lock() 2144 .unwrap() 2145 .get_reg_list(&mut reg_list) 2146 .map_err(|e| cpu::HypervisorCpuError::GetRegList(e.into()))?; 2147 2148 // At this point reg_list should contain: core registers and system 2149 // registers. 2150 // The register list contains the number of registers and their ids. We 2151 // will be needing to call KVM_GET_ONE_REG on each id in order to save 2152 // all of them. We carve out from the list the core registers which are 2153 // represented in the kernel by kvm_regs structure and for which we can 2154 // calculate the id based on the offset in the structure. 2155 reg_list.retain(|regid| is_system_register(*regid)); 2156 2157 // Now, for the rest of the registers left in the previously fetched 2158 // register list, we are simply calling KVM_GET_ONE_REG. 2159 let indices = reg_list.as_slice(); 2160 for index in indices.iter() { 2161 let mut bytes = [0_u8; 8]; 2162 self.fd 2163 .lock() 2164 .unwrap() 2165 .get_one_reg(*index, &mut bytes) 2166 .map_err(|e| cpu::HypervisorCpuError::GetSysRegister(e.into()))?; 2167 sys_regs.push(kvm_bindings::kvm_one_reg { 2168 id: *index, 2169 addr: u64::from_le_bytes(bytes), 2170 }); 2171 } 2172 2173 state.sys_regs = sys_regs; 2174 2175 Ok(state.into()) 2176 } 2177 2178 #[cfg(target_arch = "x86_64")] 2179 /// 2180 /// Restore the previously saved CPU state 2181 /// 2182 /// Ordering requirements: 2183 /// 2184 /// KVM_GET_VCPU_EVENTS/KVM_SET_VCPU_EVENTS is unsafe if other vCPUs are 2185 /// still running. 2186 /// 2187 /// Some SET ioctls (like set_mp_state) depend on kvm_vcpu_is_bsp(), so 2188 /// if we ever change the BSP, we have to do that before restoring anything. 2189 /// The same seems to be true for CPUID stuff. 2190 /// 2191 /// SREGS saves/restores a pending interrupt, similar to what 2192 /// VCPU_EVENTS also does. 2193 /// 2194 /// SET_REGS clears pending exceptions unconditionally, thus, it must be 2195 /// done before SET_VCPU_EVENTS, which restores it. 2196 /// 2197 /// SET_LAPIC must come after SET_SREGS, because the latter restores 2198 /// the apic base msr. 2199 /// 2200 /// SET_LAPIC must come before SET_MSRS, because the TSC deadline MSR 2201 /// only restores successfully, when the LAPIC is correctly configured. 2202 /// 2203 /// Arguments: CpuState 2204 /// # Example 2205 /// 2206 /// ```rust 2207 /// # use hypervisor::kvm::KvmHypervisor; 2208 /// # use std::sync::Arc; 2209 /// let kvm = KvmHypervisor::new().unwrap(); 2210 /// let hv = Arc::new(kvm); 2211 /// let vm = hv.create_vm().expect("new VM fd creation failed"); 2212 /// vm.enable_split_irq().unwrap(); 2213 /// let vcpu = vm.create_vcpu(0, None).unwrap(); 2214 /// let state = vcpu.state().unwrap(); 2215 /// vcpu.set_state(&state).unwrap(); 2216 /// ``` 2217 fn set_state(&self, state: &CpuState) -> cpu::Result<()> { 2218 let state: VcpuKvmState = state.clone().into(); 2219 self.set_cpuid2(&state.cpuid)?; 2220 self.set_mp_state(state.mp_state.into())?; 2221 self.set_regs(&state.regs.into())?; 2222 self.set_sregs(&state.sregs.into())?; 2223 self.set_xsave(&state.xsave)?; 2224 self.set_xcrs(&state.xcrs)?; 2225 self.set_lapic(&state.lapic_state)?; 2226 self.set_fpu(&state.fpu)?; 2227 2228 if let Some(freq) = state.tsc_khz { 2229 self.set_tsc_khz(freq)?; 2230 } 2231 2232 // Try to set all MSRs previously stored. 2233 // If the number of MSRs set from SET_MSRS is different from the 2234 // expected amount, we fallback onto a slower method by setting MSRs 2235 // by chunks. This is the only way to make sure we try to set as many 2236 // MSRs as possible, even if some MSRs are not supported. 2237 let expected_num_msrs = state.msrs.len(); 2238 let num_msrs = self.set_msrs(&state.msrs)?; 2239 if num_msrs != expected_num_msrs { 2240 let mut faulty_msr_index = num_msrs; 2241 2242 loop { 2243 warn!( 2244 "Detected faulty MSR 0x{:x} while setting MSRs", 2245 state.msrs[faulty_msr_index].index 2246 ); 2247 2248 // Skip the first bad MSR 2249 let start_pos = faulty_msr_index + 1; 2250 2251 let sub_msr_entries = state.msrs[start_pos..].to_vec(); 2252 2253 let num_msrs = self.set_msrs(&sub_msr_entries)?; 2254 2255 if num_msrs == sub_msr_entries.len() { 2256 break; 2257 } 2258 2259 faulty_msr_index = start_pos + num_msrs; 2260 } 2261 } 2262 2263 self.set_vcpu_events(&state.vcpu_events)?; 2264 2265 Ok(()) 2266 } 2267 2268 /// 2269 /// Restore the previously saved AArch64 CPU state 2270 /// 2271 #[cfg(target_arch = "aarch64")] 2272 fn set_state(&self, state: &CpuState) -> cpu::Result<()> { 2273 let state: VcpuKvmState = state.clone().into(); 2274 // Set core registers 2275 self.set_regs(&state.core_regs.into())?; 2276 // Set system registers 2277 for reg in &state.sys_regs { 2278 self.fd 2279 .lock() 2280 .unwrap() 2281 .set_one_reg(reg.id, ®.addr.to_le_bytes()) 2282 .map_err(|e| cpu::HypervisorCpuError::SetSysRegister(e.into()))?; 2283 } 2284 2285 self.set_mp_state(state.mp_state.into())?; 2286 2287 Ok(()) 2288 } 2289 2290 /// 2291 /// Initialize TDX for this CPU 2292 /// 2293 #[cfg(feature = "tdx")] 2294 fn tdx_init(&self, hob_address: u64) -> cpu::Result<()> { 2295 tdx_command( 2296 &self.fd.lock().unwrap().as_raw_fd(), 2297 TdxCommand::InitVcpu, 2298 0, 2299 hob_address, 2300 ) 2301 .map_err(cpu::HypervisorCpuError::InitializeTdx) 2302 } 2303 2304 /// 2305 /// Set the "immediate_exit" state 2306 /// 2307 fn set_immediate_exit(&self, exit: bool) { 2308 self.fd.lock().unwrap().set_kvm_immediate_exit(exit.into()); 2309 } 2310 2311 /// 2312 /// Returns the details about TDX exit reason 2313 /// 2314 #[cfg(feature = "tdx")] 2315 fn get_tdx_exit_details(&mut self) -> cpu::Result<TdxExitDetails> { 2316 let mut fd = self.fd.as_ref().lock().unwrap(); 2317 let kvm_run = fd.get_kvm_run(); 2318 // SAFETY: accessing a union field in a valid structure 2319 let tdx_vmcall = unsafe { 2320 &mut (*((&mut kvm_run.__bindgen_anon_1) as *mut kvm_run__bindgen_ty_1 2321 as *mut KvmTdxExit)) 2322 .u 2323 .vmcall 2324 }; 2325 2326 tdx_vmcall.status_code = TDG_VP_VMCALL_INVALID_OPERAND; 2327 2328 if tdx_vmcall.type_ != 0 { 2329 return Err(cpu::HypervisorCpuError::UnknownTdxVmCall); 2330 } 2331 2332 match tdx_vmcall.subfunction { 2333 TDG_VP_VMCALL_GET_QUOTE => Ok(TdxExitDetails::GetQuote), 2334 TDG_VP_VMCALL_SETUP_EVENT_NOTIFY_INTERRUPT => { 2335 Ok(TdxExitDetails::SetupEventNotifyInterrupt) 2336 } 2337 _ => Err(cpu::HypervisorCpuError::UnknownTdxVmCall), 2338 } 2339 } 2340 2341 /// 2342 /// Set the status code for TDX exit 2343 /// 2344 #[cfg(feature = "tdx")] 2345 fn set_tdx_status(&mut self, status: TdxExitStatus) { 2346 let mut fd = self.fd.as_ref().lock().unwrap(); 2347 let kvm_run = fd.get_kvm_run(); 2348 // SAFETY: accessing a union field in a valid structure 2349 let tdx_vmcall = unsafe { 2350 &mut (*((&mut kvm_run.__bindgen_anon_1) as *mut kvm_run__bindgen_ty_1 2351 as *mut KvmTdxExit)) 2352 .u 2353 .vmcall 2354 }; 2355 2356 tdx_vmcall.status_code = match status { 2357 TdxExitStatus::Success => TDG_VP_VMCALL_SUCCESS, 2358 TdxExitStatus::InvalidOperand => TDG_VP_VMCALL_INVALID_OPERAND, 2359 }; 2360 } 2361 2362 #[cfg(target_arch = "x86_64")] 2363 /// 2364 /// Return the list of initial MSR entries for a VCPU 2365 /// 2366 fn boot_msr_entries(&self) -> Vec<MsrEntry> { 2367 use crate::arch::x86::{msr_index, MTRR_ENABLE, MTRR_MEM_TYPE_WB}; 2368 2369 [ 2370 msr!(msr_index::MSR_IA32_SYSENTER_CS), 2371 msr!(msr_index::MSR_IA32_SYSENTER_ESP), 2372 msr!(msr_index::MSR_IA32_SYSENTER_EIP), 2373 msr!(msr_index::MSR_STAR), 2374 msr!(msr_index::MSR_CSTAR), 2375 msr!(msr_index::MSR_LSTAR), 2376 msr!(msr_index::MSR_KERNEL_GS_BASE), 2377 msr!(msr_index::MSR_SYSCALL_MASK), 2378 msr!(msr_index::MSR_IA32_TSC), 2379 msr_data!( 2380 msr_index::MSR_IA32_MISC_ENABLE, 2381 msr_index::MSR_IA32_MISC_ENABLE_FAST_STRING as u64 2382 ), 2383 msr_data!(msr_index::MSR_MTRRdefType, MTRR_ENABLE | MTRR_MEM_TYPE_WB), 2384 ] 2385 .to_vec() 2386 } 2387 2388 #[cfg(target_arch = "aarch64")] 2389 fn has_pmu_support(&self) -> bool { 2390 let cpu_attr = kvm_bindings::kvm_device_attr { 2391 group: kvm_bindings::KVM_ARM_VCPU_PMU_V3_CTRL, 2392 attr: u64::from(kvm_bindings::KVM_ARM_VCPU_PMU_V3_INIT), 2393 addr: 0x0, 2394 flags: 0, 2395 }; 2396 self.fd.lock().unwrap().has_device_attr(&cpu_attr).is_ok() 2397 } 2398 2399 #[cfg(target_arch = "aarch64")] 2400 fn init_pmu(&self, irq: u32) -> cpu::Result<()> { 2401 let cpu_attr = kvm_bindings::kvm_device_attr { 2402 group: kvm_bindings::KVM_ARM_VCPU_PMU_V3_CTRL, 2403 attr: u64::from(kvm_bindings::KVM_ARM_VCPU_PMU_V3_INIT), 2404 addr: 0x0, 2405 flags: 0, 2406 }; 2407 let cpu_attr_irq = kvm_bindings::kvm_device_attr { 2408 group: kvm_bindings::KVM_ARM_VCPU_PMU_V3_CTRL, 2409 attr: u64::from(kvm_bindings::KVM_ARM_VCPU_PMU_V3_IRQ), 2410 addr: &irq as *const u32 as u64, 2411 flags: 0, 2412 }; 2413 self.fd 2414 .lock() 2415 .unwrap() 2416 .set_device_attr(&cpu_attr_irq) 2417 .map_err(|_| cpu::HypervisorCpuError::InitializePmu)?; 2418 self.fd 2419 .lock() 2420 .unwrap() 2421 .set_device_attr(&cpu_attr) 2422 .map_err(|_| cpu::HypervisorCpuError::InitializePmu) 2423 } 2424 2425 #[cfg(target_arch = "x86_64")] 2426 /// 2427 /// Get the frequency of the TSC if available 2428 /// 2429 fn tsc_khz(&self) -> cpu::Result<Option<u32>> { 2430 match self.fd.lock().unwrap().get_tsc_khz() { 2431 Err(e) => { 2432 if e.errno() == libc::EIO { 2433 Ok(None) 2434 } else { 2435 Err(cpu::HypervisorCpuError::GetTscKhz(e.into())) 2436 } 2437 } 2438 Ok(v) => Ok(Some(v)), 2439 } 2440 } 2441 2442 #[cfg(target_arch = "x86_64")] 2443 /// 2444 /// Set the frequency of the TSC if available 2445 /// 2446 fn set_tsc_khz(&self, freq: u32) -> cpu::Result<()> { 2447 match self.fd.lock().unwrap().set_tsc_khz(freq) { 2448 Err(e) => { 2449 if e.errno() == libc::EIO { 2450 Ok(()) 2451 } else { 2452 Err(cpu::HypervisorCpuError::SetTscKhz(e.into())) 2453 } 2454 } 2455 Ok(_) => Ok(()), 2456 } 2457 } 2458 2459 #[cfg(target_arch = "x86_64")] 2460 /// 2461 /// Trigger NMI interrupt 2462 /// 2463 fn nmi(&self) -> cpu::Result<()> { 2464 match self.fd.lock().unwrap().nmi() { 2465 Err(e) => { 2466 if e.errno() == libc::EIO { 2467 Ok(()) 2468 } else { 2469 Err(cpu::HypervisorCpuError::Nmi(e.into())) 2470 } 2471 } 2472 Ok(_) => Ok(()), 2473 } 2474 } 2475 } 2476 2477 impl KvmVcpu { 2478 #[cfg(target_arch = "x86_64")] 2479 /// 2480 /// X86 specific call that returns the vcpu's current "xsave struct". 2481 /// 2482 fn get_xsave(&self) -> cpu::Result<XsaveState> { 2483 Ok(self 2484 .fd 2485 .lock() 2486 .unwrap() 2487 .get_xsave() 2488 .map_err(|e| cpu::HypervisorCpuError::GetXsaveState(e.into()))? 2489 .into()) 2490 } 2491 2492 #[cfg(target_arch = "x86_64")] 2493 /// 2494 /// X86 specific call that sets the vcpu's current "xsave struct". 2495 /// 2496 fn set_xsave(&self, xsave: &XsaveState) -> cpu::Result<()> { 2497 let xsave: kvm_bindings::kvm_xsave = (*xsave).clone().into(); 2498 self.fd 2499 .lock() 2500 .unwrap() 2501 .set_xsave(&xsave) 2502 .map_err(|e| cpu::HypervisorCpuError::SetXsaveState(e.into())) 2503 } 2504 2505 #[cfg(target_arch = "x86_64")] 2506 /// 2507 /// X86 specific call that returns the vcpu's current "xcrs". 2508 /// 2509 fn get_xcrs(&self) -> cpu::Result<ExtendedControlRegisters> { 2510 self.fd 2511 .lock() 2512 .unwrap() 2513 .get_xcrs() 2514 .map_err(|e| cpu::HypervisorCpuError::GetXcsr(e.into())) 2515 } 2516 2517 #[cfg(target_arch = "x86_64")] 2518 /// 2519 /// X86 specific call that sets the vcpu's current "xcrs". 2520 /// 2521 fn set_xcrs(&self, xcrs: &ExtendedControlRegisters) -> cpu::Result<()> { 2522 self.fd 2523 .lock() 2524 .unwrap() 2525 .set_xcrs(xcrs) 2526 .map_err(|e| cpu::HypervisorCpuError::SetXcsr(e.into())) 2527 } 2528 2529 #[cfg(target_arch = "x86_64")] 2530 /// 2531 /// Returns currently pending exceptions, interrupts, and NMIs as well as related 2532 /// states of the vcpu. 2533 /// 2534 fn get_vcpu_events(&self) -> cpu::Result<VcpuEvents> { 2535 self.fd 2536 .lock() 2537 .unwrap() 2538 .get_vcpu_events() 2539 .map_err(|e| cpu::HypervisorCpuError::GetVcpuEvents(e.into())) 2540 } 2541 2542 #[cfg(target_arch = "x86_64")] 2543 /// 2544 /// Sets pending exceptions, interrupts, and NMIs as well as related states 2545 /// of the vcpu. 2546 /// 2547 fn set_vcpu_events(&self, events: &VcpuEvents) -> cpu::Result<()> { 2548 self.fd 2549 .lock() 2550 .unwrap() 2551 .set_vcpu_events(events) 2552 .map_err(|e| cpu::HypervisorCpuError::SetVcpuEvents(e.into())) 2553 } 2554 } 2555