1 // Copyright © 2019 Intel Corporation 2 // 3 // SPDX-License-Identifier: Apache-2.0 OR BSD-3-Clause 4 // 5 // Copyright © 2020, Microsoft Corporation 6 // 7 // Copyright 2018-2019 CrowdStrike, Inc. 8 // 9 // 10 11 use std::any::Any; 12 use std::collections::HashMap; 13 #[cfg(target_arch = "x86_64")] 14 use std::fs::File; 15 #[cfg(target_arch = "x86_64")] 16 use std::os::unix::io::AsRawFd; 17 #[cfg(feature = "tdx")] 18 use std::os::unix::io::RawFd; 19 use std::result; 20 #[cfg(target_arch = "x86_64")] 21 use std::sync::atomic::{AtomicBool, Ordering}; 22 use std::sync::{Arc, Mutex, RwLock}; 23 24 use kvm_ioctls::{NoDatamatch, VcpuFd, VmFd}; 25 use vmm_sys_util::eventfd::EventFd; 26 27 #[cfg(target_arch = "aarch64")] 28 use crate::aarch64::gic::KvmGicV3Its; 29 #[cfg(target_arch = "aarch64")] 30 pub use crate::aarch64::{ 31 check_required_kvm_extensions, gic::Gicv3ItsState as GicState, is_system_register, VcpuInit, 32 VcpuKvmState, 33 }; 34 #[cfg(target_arch = "aarch64")] 35 use crate::arch::aarch64::gic::{Vgic, VgicConfig}; 36 use crate::vm::{self, InterruptSourceConfig, VmOps}; 37 #[cfg(target_arch = "aarch64")] 38 use crate::{arm64_core_reg_id, offset_of}; 39 use crate::{cpu, hypervisor, vec_with_array_field, HypervisorType}; 40 // x86_64 dependencies 41 #[cfg(target_arch = "x86_64")] 42 pub mod x86_64; 43 #[cfg(target_arch = "aarch64")] 44 use aarch64::{RegList, Register}; 45 #[cfg(target_arch = "x86_64")] 46 use kvm_bindings::{ 47 kvm_enable_cap, kvm_msr_entry, MsrList, KVM_CAP_HYPERV_SYNIC, KVM_CAP_SPLIT_IRQCHIP, 48 KVM_GUESTDBG_USE_HW_BP, 49 }; 50 #[cfg(target_arch = "x86_64")] 51 use x86_64::check_required_kvm_extensions; 52 #[cfg(target_arch = "x86_64")] 53 pub use x86_64::{CpuId, ExtendedControlRegisters, MsrEntries, VcpuKvmState}; 54 55 #[cfg(target_arch = "x86_64")] 56 use crate::arch::x86::{ 57 CpuIdEntry, FpuState, LapicState, MsrEntry, SpecialRegisters, XsaveState, NUM_IOAPIC_PINS, 58 }; 59 #[cfg(target_arch = "x86_64")] 60 use crate::ClockData; 61 use crate::{ 62 CpuState, IoEventAddress, IrqRoutingEntry, MpState, StandardRegisters, UserMemoryRegion, 63 USER_MEMORY_REGION_LOG_DIRTY, USER_MEMORY_REGION_READ, USER_MEMORY_REGION_WRITE, 64 }; 65 // aarch64 dependencies 66 #[cfg(target_arch = "aarch64")] 67 pub mod aarch64; 68 #[cfg(target_arch = "aarch64")] 69 use std::mem; 70 71 pub use kvm_bindings::{ 72 kvm_clock_data, kvm_create_device, kvm_device_type_KVM_DEV_TYPE_VFIO, kvm_guest_debug, 73 kvm_irq_routing, kvm_irq_routing_entry, kvm_mp_state, kvm_userspace_memory_region, 74 KVM_GUESTDBG_ENABLE, KVM_GUESTDBG_SINGLESTEP, KVM_IRQ_ROUTING_IRQCHIP, KVM_IRQ_ROUTING_MSI, 75 KVM_MEM_LOG_DIRTY_PAGES, KVM_MEM_READONLY, KVM_MSI_VALID_DEVID, 76 }; 77 #[cfg(target_arch = "aarch64")] 78 use kvm_bindings::{ 79 kvm_regs, user_fpsimd_state, user_pt_regs, KVM_GUESTDBG_USE_HW, KVM_NR_SPSR, KVM_REG_ARM64, 80 KVM_REG_ARM64_SYSREG, KVM_REG_ARM64_SYSREG_CRM_MASK, KVM_REG_ARM64_SYSREG_CRN_MASK, 81 KVM_REG_ARM64_SYSREG_OP0_MASK, KVM_REG_ARM64_SYSREG_OP1_MASK, KVM_REG_ARM64_SYSREG_OP2_MASK, 82 KVM_REG_ARM_CORE, KVM_REG_SIZE_U128, KVM_REG_SIZE_U32, KVM_REG_SIZE_U64, 83 }; 84 #[cfg(feature = "tdx")] 85 use kvm_bindings::{kvm_run__bindgen_ty_1, KVMIO}; 86 pub use kvm_ioctls::{Cap, Kvm}; 87 use thiserror::Error; 88 use vfio_ioctls::VfioDeviceFd; 89 #[cfg(feature = "tdx")] 90 use vmm_sys_util::{ioctl::ioctl_with_val, ioctl_ioc_nr, ioctl_iowr_nr}; 91 pub use {kvm_bindings, kvm_ioctls}; 92 /// 93 /// Export generically-named wrappers of kvm-bindings for Unix-based platforms 94 /// 95 pub use { 96 kvm_bindings::kvm_create_device as CreateDevice, kvm_bindings::kvm_device_attr as DeviceAttr, 97 kvm_bindings::kvm_run, kvm_bindings::kvm_vcpu_events as VcpuEvents, kvm_ioctls::VcpuExit, 98 }; 99 100 #[cfg(target_arch = "x86_64")] 101 const KVM_CAP_SGX_ATTRIBUTE: u32 = 196; 102 103 #[cfg(target_arch = "x86_64")] 104 use vmm_sys_util::ioctl_io_nr; 105 #[cfg(all(not(feature = "tdx"), target_arch = "x86_64"))] 106 use vmm_sys_util::ioctl_ioc_nr; 107 108 #[cfg(target_arch = "x86_64")] 109 ioctl_io_nr!(KVM_NMI, kvm_bindings::KVMIO, 0x9a); 110 111 #[cfg(feature = "tdx")] 112 const KVM_EXIT_TDX: u32 = 50; 113 #[cfg(feature = "tdx")] 114 const TDG_VP_VMCALL_GET_QUOTE: u64 = 0x10002; 115 #[cfg(feature = "tdx")] 116 const TDG_VP_VMCALL_SETUP_EVENT_NOTIFY_INTERRUPT: u64 = 0x10004; 117 #[cfg(feature = "tdx")] 118 const TDG_VP_VMCALL_SUCCESS: u64 = 0; 119 #[cfg(feature = "tdx")] 120 const TDG_VP_VMCALL_INVALID_OPERAND: u64 = 0x8000000000000000; 121 122 #[cfg(feature = "tdx")] 123 ioctl_iowr_nr!(KVM_MEMORY_ENCRYPT_OP, KVMIO, 0xba, std::os::raw::c_ulong); 124 125 #[cfg(feature = "tdx")] 126 #[repr(u32)] 127 enum TdxCommand { 128 Capabilities = 0, 129 InitVm, 130 InitVcpu, 131 InitMemRegion, 132 Finalize, 133 } 134 135 #[cfg(feature = "tdx")] 136 pub enum TdxExitDetails { 137 GetQuote, 138 SetupEventNotifyInterrupt, 139 } 140 141 #[cfg(feature = "tdx")] 142 pub enum TdxExitStatus { 143 Success, 144 InvalidOperand, 145 } 146 147 #[cfg(feature = "tdx")] 148 const TDX_MAX_NR_CPUID_CONFIGS: usize = 6; 149 150 #[cfg(feature = "tdx")] 151 #[repr(C)] 152 #[derive(Debug, Default)] 153 pub struct TdxCpuidConfig { 154 pub leaf: u32, 155 pub sub_leaf: u32, 156 pub eax: u32, 157 pub ebx: u32, 158 pub ecx: u32, 159 pub edx: u32, 160 } 161 162 #[cfg(feature = "tdx")] 163 #[repr(C)] 164 #[derive(Debug, Default)] 165 pub struct TdxCapabilities { 166 pub attrs_fixed0: u64, 167 pub attrs_fixed1: u64, 168 pub xfam_fixed0: u64, 169 pub xfam_fixed1: u64, 170 pub nr_cpuid_configs: u32, 171 pub padding: u32, 172 pub cpuid_configs: [TdxCpuidConfig; TDX_MAX_NR_CPUID_CONFIGS], 173 } 174 175 #[cfg(feature = "tdx")] 176 #[derive(Copy, Clone)] 177 pub struct KvmTdxExit { 178 pub type_: u32, 179 pub pad: u32, 180 pub u: KvmTdxExitU, 181 } 182 183 #[cfg(feature = "tdx")] 184 #[repr(C)] 185 #[derive(Copy, Clone)] 186 pub union KvmTdxExitU { 187 pub vmcall: KvmTdxExitVmcall, 188 } 189 190 #[cfg(feature = "tdx")] 191 #[repr(C)] 192 #[derive(Debug, Default, Copy, Clone, PartialEq)] 193 pub struct KvmTdxExitVmcall { 194 pub type_: u64, 195 pub subfunction: u64, 196 pub reg_mask: u64, 197 pub in_r12: u64, 198 pub in_r13: u64, 199 pub in_r14: u64, 200 pub in_r15: u64, 201 pub in_rbx: u64, 202 pub in_rdi: u64, 203 pub in_rsi: u64, 204 pub in_r8: u64, 205 pub in_r9: u64, 206 pub in_rdx: u64, 207 pub status_code: u64, 208 pub out_r11: u64, 209 pub out_r12: u64, 210 pub out_r13: u64, 211 pub out_r14: u64, 212 pub out_r15: u64, 213 pub out_rbx: u64, 214 pub out_rdi: u64, 215 pub out_rsi: u64, 216 pub out_r8: u64, 217 pub out_r9: u64, 218 pub out_rdx: u64, 219 } 220 221 impl From<kvm_userspace_memory_region> for UserMemoryRegion { 222 fn from(region: kvm_userspace_memory_region) -> Self { 223 let mut flags = USER_MEMORY_REGION_READ; 224 if region.flags & KVM_MEM_READONLY == 0 { 225 flags |= USER_MEMORY_REGION_WRITE; 226 } 227 if region.flags & KVM_MEM_LOG_DIRTY_PAGES != 0 { 228 flags |= USER_MEMORY_REGION_LOG_DIRTY; 229 } 230 231 UserMemoryRegion { 232 slot: region.slot, 233 guest_phys_addr: region.guest_phys_addr, 234 memory_size: region.memory_size, 235 userspace_addr: region.userspace_addr, 236 flags, 237 } 238 } 239 } 240 241 impl From<UserMemoryRegion> for kvm_userspace_memory_region { 242 fn from(region: UserMemoryRegion) -> Self { 243 assert!( 244 region.flags & USER_MEMORY_REGION_READ != 0, 245 "KVM mapped memory is always readable" 246 ); 247 248 let mut flags = 0; 249 if region.flags & USER_MEMORY_REGION_WRITE == 0 { 250 flags |= KVM_MEM_READONLY; 251 } 252 if region.flags & USER_MEMORY_REGION_LOG_DIRTY != 0 { 253 flags |= KVM_MEM_LOG_DIRTY_PAGES; 254 } 255 256 kvm_userspace_memory_region { 257 slot: region.slot, 258 guest_phys_addr: region.guest_phys_addr, 259 memory_size: region.memory_size, 260 userspace_addr: region.userspace_addr, 261 flags, 262 } 263 } 264 } 265 266 impl From<kvm_mp_state> for MpState { 267 fn from(s: kvm_mp_state) -> Self { 268 MpState::Kvm(s) 269 } 270 } 271 272 impl From<MpState> for kvm_mp_state { 273 fn from(ms: MpState) -> Self { 274 match ms { 275 MpState::Kvm(s) => s, 276 /* Needed in case other hypervisors are enabled */ 277 #[allow(unreachable_patterns)] 278 _ => panic!("CpuState is not valid"), 279 } 280 } 281 } 282 283 impl From<kvm_ioctls::IoEventAddress> for IoEventAddress { 284 fn from(a: kvm_ioctls::IoEventAddress) -> Self { 285 match a { 286 kvm_ioctls::IoEventAddress::Pio(x) => Self::Pio(x), 287 kvm_ioctls::IoEventAddress::Mmio(x) => Self::Mmio(x), 288 } 289 } 290 } 291 292 impl From<IoEventAddress> for kvm_ioctls::IoEventAddress { 293 fn from(a: IoEventAddress) -> Self { 294 match a { 295 IoEventAddress::Pio(x) => Self::Pio(x), 296 IoEventAddress::Mmio(x) => Self::Mmio(x), 297 } 298 } 299 } 300 301 impl From<VcpuKvmState> for CpuState { 302 fn from(s: VcpuKvmState) -> Self { 303 CpuState::Kvm(s) 304 } 305 } 306 307 impl From<CpuState> for VcpuKvmState { 308 fn from(s: CpuState) -> Self { 309 match s { 310 CpuState::Kvm(s) => s, 311 /* Needed in case other hypervisors are enabled */ 312 #[allow(unreachable_patterns)] 313 _ => panic!("CpuState is not valid"), 314 } 315 } 316 } 317 318 #[cfg(target_arch = "x86_64")] 319 impl From<kvm_clock_data> for ClockData { 320 fn from(d: kvm_clock_data) -> Self { 321 ClockData::Kvm(d) 322 } 323 } 324 325 #[cfg(target_arch = "x86_64")] 326 impl From<ClockData> for kvm_clock_data { 327 fn from(ms: ClockData) -> Self { 328 match ms { 329 ClockData::Kvm(s) => s, 330 /* Needed in case other hypervisors are enabled */ 331 #[allow(unreachable_patterns)] 332 _ => panic!("CpuState is not valid"), 333 } 334 } 335 } 336 337 impl From<kvm_bindings::kvm_regs> for crate::StandardRegisters { 338 fn from(s: kvm_bindings::kvm_regs) -> Self { 339 crate::StandardRegisters::Kvm(s) 340 } 341 } 342 343 impl From<crate::StandardRegisters> for kvm_bindings::kvm_regs { 344 fn from(e: crate::StandardRegisters) -> Self { 345 match e { 346 crate::StandardRegisters::Kvm(e) => e, 347 /* Needed in case other hypervisors are enabled */ 348 #[allow(unreachable_patterns)] 349 _ => panic!("StandardRegisters are not valid"), 350 } 351 } 352 } 353 354 impl From<kvm_irq_routing_entry> for IrqRoutingEntry { 355 fn from(s: kvm_irq_routing_entry) -> Self { 356 IrqRoutingEntry::Kvm(s) 357 } 358 } 359 360 impl From<IrqRoutingEntry> for kvm_irq_routing_entry { 361 fn from(e: IrqRoutingEntry) -> Self { 362 match e { 363 IrqRoutingEntry::Kvm(e) => e, 364 /* Needed in case other hypervisors are enabled */ 365 #[allow(unreachable_patterns)] 366 _ => panic!("IrqRoutingEntry is not valid"), 367 } 368 } 369 } 370 371 struct KvmDirtyLogSlot { 372 slot: u32, 373 guest_phys_addr: u64, 374 memory_size: u64, 375 userspace_addr: u64, 376 } 377 378 /// Wrapper over KVM VM ioctls. 379 pub struct KvmVm { 380 fd: Arc<VmFd>, 381 #[cfg(target_arch = "x86_64")] 382 msrs: Vec<MsrEntry>, 383 dirty_log_slots: Arc<RwLock<HashMap<u32, KvmDirtyLogSlot>>>, 384 } 385 386 impl KvmVm { 387 /// 388 /// Creates an emulated device in the kernel. 389 /// 390 /// See the documentation for `KVM_CREATE_DEVICE`. 391 fn create_device(&self, device: &mut CreateDevice) -> vm::Result<vfio_ioctls::VfioDeviceFd> { 392 let device_fd = self 393 .fd 394 .create_device(device) 395 .map_err(|e| vm::HypervisorVmError::CreateDevice(e.into()))?; 396 Ok(VfioDeviceFd::new_from_kvm(device_fd)) 397 } 398 /// Checks if a particular `Cap` is available. 399 pub fn check_extension(&self, c: Cap) -> bool { 400 self.fd.check_extension(c) 401 } 402 } 403 404 /// Implementation of Vm trait for KVM 405 /// 406 /// # Examples 407 /// 408 /// ``` 409 /// # use hypervisor::kvm::KvmHypervisor; 410 /// # use std::sync::Arc; 411 /// let kvm = KvmHypervisor::new().unwrap(); 412 /// let hypervisor = Arc::new(kvm); 413 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed"); 414 /// ``` 415 impl vm::Vm for KvmVm { 416 #[cfg(target_arch = "x86_64")] 417 /// 418 /// Sets the address of the one-page region in the VM's address space. 419 /// 420 fn set_identity_map_address(&self, address: u64) -> vm::Result<()> { 421 self.fd 422 .set_identity_map_address(address) 423 .map_err(|e| vm::HypervisorVmError::SetIdentityMapAddress(e.into())) 424 } 425 426 #[cfg(target_arch = "x86_64")] 427 /// 428 /// Sets the address of the three-page region in the VM's address space. 429 /// 430 fn set_tss_address(&self, offset: usize) -> vm::Result<()> { 431 self.fd 432 .set_tss_address(offset) 433 .map_err(|e| vm::HypervisorVmError::SetTssAddress(e.into())) 434 } 435 436 /// 437 /// Creates an in-kernel interrupt controller. 438 /// 439 fn create_irq_chip(&self) -> vm::Result<()> { 440 self.fd 441 .create_irq_chip() 442 .map_err(|e| vm::HypervisorVmError::CreateIrq(e.into())) 443 } 444 445 /// 446 /// Registers an event that will, when signaled, trigger the `gsi` IRQ. 447 /// 448 fn register_irqfd(&self, fd: &EventFd, gsi: u32) -> vm::Result<()> { 449 self.fd 450 .register_irqfd(fd, gsi) 451 .map_err(|e| vm::HypervisorVmError::RegisterIrqFd(e.into())) 452 } 453 454 /// 455 /// Unregisters an event that will, when signaled, trigger the `gsi` IRQ. 456 /// 457 fn unregister_irqfd(&self, fd: &EventFd, gsi: u32) -> vm::Result<()> { 458 self.fd 459 .unregister_irqfd(fd, gsi) 460 .map_err(|e| vm::HypervisorVmError::UnregisterIrqFd(e.into())) 461 } 462 463 /// 464 /// Creates a VcpuFd object from a vcpu RawFd. 465 /// 466 fn create_vcpu( 467 &self, 468 id: u8, 469 vm_ops: Option<Arc<dyn VmOps>>, 470 ) -> vm::Result<Arc<dyn cpu::Vcpu>> { 471 let fd = self 472 .fd 473 .create_vcpu(id as u64) 474 .map_err(|e| vm::HypervisorVmError::CreateVcpu(e.into()))?; 475 let vcpu = KvmVcpu { 476 fd: Arc::new(Mutex::new(fd)), 477 #[cfg(target_arch = "x86_64")] 478 msrs: self.msrs.clone(), 479 vm_ops, 480 #[cfg(target_arch = "x86_64")] 481 hyperv_synic: AtomicBool::new(false), 482 }; 483 Ok(Arc::new(vcpu)) 484 } 485 486 #[cfg(target_arch = "aarch64")] 487 /// 488 /// Creates a virtual GIC device. 489 /// 490 fn create_vgic(&self, config: VgicConfig) -> vm::Result<Arc<Mutex<dyn Vgic>>> { 491 let gic_device = KvmGicV3Its::new(self, config) 492 .map_err(|e| vm::HypervisorVmError::CreateVgic(anyhow!("Vgic error {:?}", e)))?; 493 Ok(Arc::new(Mutex::new(gic_device))) 494 } 495 496 /// 497 /// Registers an event to be signaled whenever a certain address is written to. 498 /// 499 fn register_ioevent( 500 &self, 501 fd: &EventFd, 502 addr: &IoEventAddress, 503 datamatch: Option<vm::DataMatch>, 504 ) -> vm::Result<()> { 505 let addr = &kvm_ioctls::IoEventAddress::from(*addr); 506 if let Some(dm) = datamatch { 507 match dm { 508 vm::DataMatch::DataMatch32(kvm_dm32) => self 509 .fd 510 .register_ioevent(fd, addr, kvm_dm32) 511 .map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into())), 512 vm::DataMatch::DataMatch64(kvm_dm64) => self 513 .fd 514 .register_ioevent(fd, addr, kvm_dm64) 515 .map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into())), 516 } 517 } else { 518 self.fd 519 .register_ioevent(fd, addr, NoDatamatch) 520 .map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into())) 521 } 522 } 523 524 /// 525 /// Unregisters an event from a certain address it has been previously registered to. 526 /// 527 fn unregister_ioevent(&self, fd: &EventFd, addr: &IoEventAddress) -> vm::Result<()> { 528 let addr = &kvm_ioctls::IoEventAddress::from(*addr); 529 self.fd 530 .unregister_ioevent(fd, addr, NoDatamatch) 531 .map_err(|e| vm::HypervisorVmError::UnregisterIoEvent(e.into())) 532 } 533 534 /// 535 /// Constructs a routing entry 536 /// 537 fn make_routing_entry(&self, gsi: u32, config: &InterruptSourceConfig) -> IrqRoutingEntry { 538 match &config { 539 InterruptSourceConfig::MsiIrq(cfg) => { 540 let mut kvm_route = kvm_irq_routing_entry { 541 gsi, 542 type_: KVM_IRQ_ROUTING_MSI, 543 ..Default::default() 544 }; 545 546 kvm_route.u.msi.address_lo = cfg.low_addr; 547 kvm_route.u.msi.address_hi = cfg.high_addr; 548 kvm_route.u.msi.data = cfg.data; 549 550 if self.check_extension(crate::kvm::Cap::MsiDevid) { 551 // On AArch64, there is limitation on the range of the 'devid', 552 // it cannot be greater than 65536 (the max of u16). 553 // 554 // BDF cannot be used directly, because 'segment' is in high 555 // 16 bits. The layout of the u32 BDF is: 556 // |---- 16 bits ----|-- 8 bits --|-- 5 bits --|-- 3 bits --| 557 // | segment | bus | device | function | 558 // 559 // Now that we support 1 bus only in a segment, we can build a 560 // 'devid' by replacing the 'bus' bits with the low 8 bits of 561 // 'segment' data. 562 // This way we can resolve the range checking problem and give 563 // different `devid` to all the devices. Limitation is that at 564 // most 256 segments can be supported. 565 // 566 let modified_devid = (cfg.devid & 0x00ff_0000) >> 8 | cfg.devid & 0xff; 567 568 kvm_route.flags = KVM_MSI_VALID_DEVID; 569 kvm_route.u.msi.__bindgen_anon_1.devid = modified_devid; 570 } 571 kvm_route.into() 572 } 573 InterruptSourceConfig::LegacyIrq(cfg) => { 574 let mut kvm_route = kvm_irq_routing_entry { 575 gsi, 576 type_: KVM_IRQ_ROUTING_IRQCHIP, 577 ..Default::default() 578 }; 579 kvm_route.u.irqchip.irqchip = cfg.irqchip; 580 kvm_route.u.irqchip.pin = cfg.pin; 581 582 kvm_route.into() 583 } 584 } 585 } 586 587 /// 588 /// Sets the GSI routing table entries, overwriting any previously set 589 /// entries, as per the `KVM_SET_GSI_ROUTING` ioctl. 590 /// 591 fn set_gsi_routing(&self, entries: &[IrqRoutingEntry]) -> vm::Result<()> { 592 let mut irq_routing = 593 vec_with_array_field::<kvm_irq_routing, kvm_irq_routing_entry>(entries.len()); 594 irq_routing[0].nr = entries.len() as u32; 595 irq_routing[0].flags = 0; 596 let entries: Vec<kvm_irq_routing_entry> = entries 597 .iter() 598 .map(|entry| match entry { 599 IrqRoutingEntry::Kvm(e) => *e, 600 #[allow(unreachable_patterns)] 601 _ => panic!("IrqRoutingEntry type is wrong"), 602 }) 603 .collect(); 604 605 // SAFETY: irq_routing initialized with entries.len() and now it is being turned into 606 // entries_slice with entries.len() again. It is guaranteed to be large enough to hold 607 // everything from entries. 608 unsafe { 609 let entries_slice: &mut [kvm_irq_routing_entry] = 610 irq_routing[0].entries.as_mut_slice(entries.len()); 611 entries_slice.copy_from_slice(&entries); 612 } 613 614 self.fd 615 .set_gsi_routing(&irq_routing[0]) 616 .map_err(|e| vm::HypervisorVmError::SetGsiRouting(e.into())) 617 } 618 619 /// 620 /// Creates a memory region structure that can be used with {create/remove}_user_memory_region 621 /// 622 fn make_user_memory_region( 623 &self, 624 slot: u32, 625 guest_phys_addr: u64, 626 memory_size: u64, 627 userspace_addr: u64, 628 readonly: bool, 629 log_dirty_pages: bool, 630 ) -> UserMemoryRegion { 631 kvm_userspace_memory_region { 632 slot, 633 guest_phys_addr, 634 memory_size, 635 userspace_addr, 636 flags: if readonly { KVM_MEM_READONLY } else { 0 } 637 | if log_dirty_pages { 638 KVM_MEM_LOG_DIRTY_PAGES 639 } else { 640 0 641 }, 642 } 643 .into() 644 } 645 646 /// 647 /// Creates a guest physical memory region. 648 /// 649 fn create_user_memory_region(&self, user_memory_region: UserMemoryRegion) -> vm::Result<()> { 650 let mut region: kvm_userspace_memory_region = user_memory_region.into(); 651 652 if (region.flags & KVM_MEM_LOG_DIRTY_PAGES) != 0 { 653 if (region.flags & KVM_MEM_READONLY) != 0 { 654 return Err(vm::HypervisorVmError::CreateUserMemory(anyhow!( 655 "Error creating regions with both 'dirty-pages-log' and 'read-only'." 656 ))); 657 } 658 659 // Keep track of the regions that need dirty pages log 660 self.dirty_log_slots.write().unwrap().insert( 661 region.slot, 662 KvmDirtyLogSlot { 663 slot: region.slot, 664 guest_phys_addr: region.guest_phys_addr, 665 memory_size: region.memory_size, 666 userspace_addr: region.userspace_addr, 667 }, 668 ); 669 670 // Always create guest physical memory region without `KVM_MEM_LOG_DIRTY_PAGES`. 671 // For regions that need this flag, dirty pages log will be turned on in `start_dirty_log`. 672 region.flags = 0; 673 } 674 675 // SAFETY: Safe because guest regions are guaranteed not to overlap. 676 unsafe { 677 self.fd 678 .set_user_memory_region(region) 679 .map_err(|e| vm::HypervisorVmError::CreateUserMemory(e.into())) 680 } 681 } 682 683 /// 684 /// Removes a guest physical memory region. 685 /// 686 fn remove_user_memory_region(&self, user_memory_region: UserMemoryRegion) -> vm::Result<()> { 687 let mut region: kvm_userspace_memory_region = user_memory_region.into(); 688 689 // Remove the corresponding entry from "self.dirty_log_slots" if needed 690 self.dirty_log_slots.write().unwrap().remove(®ion.slot); 691 692 // Setting the size to 0 means "remove" 693 region.memory_size = 0; 694 // SAFETY: Safe because guest regions are guaranteed not to overlap. 695 unsafe { 696 self.fd 697 .set_user_memory_region(region) 698 .map_err(|e| vm::HypervisorVmError::RemoveUserMemory(e.into())) 699 } 700 } 701 702 /// 703 /// Returns the preferred CPU target type which can be emulated by KVM on underlying host. 704 /// 705 #[cfg(target_arch = "aarch64")] 706 fn get_preferred_target(&self, kvi: &mut VcpuInit) -> vm::Result<()> { 707 self.fd 708 .get_preferred_target(kvi) 709 .map_err(|e| vm::HypervisorVmError::GetPreferredTarget(e.into())) 710 } 711 712 #[cfg(target_arch = "x86_64")] 713 fn enable_split_irq(&self) -> vm::Result<()> { 714 // Create split irqchip 715 // Only the local APIC is emulated in kernel, both PICs and IOAPIC 716 // are not. 717 let mut cap = kvm_enable_cap { 718 cap: KVM_CAP_SPLIT_IRQCHIP, 719 ..Default::default() 720 }; 721 cap.args[0] = NUM_IOAPIC_PINS as u64; 722 self.fd 723 .enable_cap(&cap) 724 .map_err(|e| vm::HypervisorVmError::EnableSplitIrq(e.into()))?; 725 Ok(()) 726 } 727 728 #[cfg(target_arch = "x86_64")] 729 fn enable_sgx_attribute(&self, file: File) -> vm::Result<()> { 730 let mut cap = kvm_enable_cap { 731 cap: KVM_CAP_SGX_ATTRIBUTE, 732 ..Default::default() 733 }; 734 cap.args[0] = file.as_raw_fd() as u64; 735 self.fd 736 .enable_cap(&cap) 737 .map_err(|e| vm::HypervisorVmError::EnableSgxAttribute(e.into()))?; 738 Ok(()) 739 } 740 741 /// Retrieve guest clock. 742 #[cfg(target_arch = "x86_64")] 743 fn get_clock(&self) -> vm::Result<ClockData> { 744 Ok(self 745 .fd 746 .get_clock() 747 .map_err(|e| vm::HypervisorVmError::GetClock(e.into()))? 748 .into()) 749 } 750 751 /// Set guest clock. 752 #[cfg(target_arch = "x86_64")] 753 fn set_clock(&self, data: &ClockData) -> vm::Result<()> { 754 let data = (*data).into(); 755 self.fd 756 .set_clock(&data) 757 .map_err(|e| vm::HypervisorVmError::SetClock(e.into())) 758 } 759 760 /// Create a device that is used for passthrough 761 fn create_passthrough_device(&self) -> vm::Result<VfioDeviceFd> { 762 let mut vfio_dev = kvm_create_device { 763 type_: kvm_device_type_KVM_DEV_TYPE_VFIO, 764 fd: 0, 765 flags: 0, 766 }; 767 768 self.create_device(&mut vfio_dev) 769 .map_err(|e| vm::HypervisorVmError::CreatePassthroughDevice(e.into())) 770 } 771 772 /// 773 /// Start logging dirty pages 774 /// 775 fn start_dirty_log(&self) -> vm::Result<()> { 776 let dirty_log_slots = self.dirty_log_slots.read().unwrap(); 777 for (_, s) in dirty_log_slots.iter() { 778 let region = kvm_userspace_memory_region { 779 slot: s.slot, 780 guest_phys_addr: s.guest_phys_addr, 781 memory_size: s.memory_size, 782 userspace_addr: s.userspace_addr, 783 flags: KVM_MEM_LOG_DIRTY_PAGES, 784 }; 785 // SAFETY: Safe because guest regions are guaranteed not to overlap. 786 unsafe { 787 self.fd 788 .set_user_memory_region(region) 789 .map_err(|e| vm::HypervisorVmError::StartDirtyLog(e.into()))?; 790 } 791 } 792 793 Ok(()) 794 } 795 796 /// 797 /// Stop logging dirty pages 798 /// 799 fn stop_dirty_log(&self) -> vm::Result<()> { 800 let dirty_log_slots = self.dirty_log_slots.read().unwrap(); 801 for (_, s) in dirty_log_slots.iter() { 802 let region = kvm_userspace_memory_region { 803 slot: s.slot, 804 guest_phys_addr: s.guest_phys_addr, 805 memory_size: s.memory_size, 806 userspace_addr: s.userspace_addr, 807 flags: 0, 808 }; 809 // SAFETY: Safe because guest regions are guaranteed not to overlap. 810 unsafe { 811 self.fd 812 .set_user_memory_region(region) 813 .map_err(|e| vm::HypervisorVmError::StartDirtyLog(e.into()))?; 814 } 815 } 816 817 Ok(()) 818 } 819 820 /// 821 /// Get dirty pages bitmap (one bit per page) 822 /// 823 fn get_dirty_log(&self, slot: u32, _base_gpa: u64, memory_size: u64) -> vm::Result<Vec<u64>> { 824 self.fd 825 .get_dirty_log(slot, memory_size as usize) 826 .map_err(|e| vm::HypervisorVmError::GetDirtyLog(e.into())) 827 } 828 829 /// 830 /// Initialize TDX for this VM 831 /// 832 #[cfg(feature = "tdx")] 833 fn tdx_init(&self, cpuid: &[CpuIdEntry], max_vcpus: u32) -> vm::Result<()> { 834 const TDX_ATTR_SEPT_VE_DISABLE: usize = 28; 835 836 let mut cpuid: Vec<kvm_bindings::kvm_cpuid_entry2> = 837 cpuid.iter().map(|e| (*e).into()).collect(); 838 cpuid.resize(256, kvm_bindings::kvm_cpuid_entry2::default()); 839 840 #[repr(C)] 841 struct TdxInitVm { 842 attributes: u64, 843 max_vcpus: u32, 844 padding: u32, 845 mrconfigid: [u64; 6], 846 mrowner: [u64; 6], 847 mrownerconfig: [u64; 6], 848 cpuid_nent: u32, 849 cpuid_padding: u32, 850 cpuid_entries: [kvm_bindings::kvm_cpuid_entry2; 256], 851 } 852 let data = TdxInitVm { 853 attributes: 1 << TDX_ATTR_SEPT_VE_DISABLE, 854 max_vcpus, 855 padding: 0, 856 mrconfigid: [0; 6], 857 mrowner: [0; 6], 858 mrownerconfig: [0; 6], 859 cpuid_nent: cpuid.len() as u32, 860 cpuid_padding: 0, 861 cpuid_entries: cpuid.as_slice().try_into().unwrap(), 862 }; 863 864 tdx_command( 865 &self.fd.as_raw_fd(), 866 TdxCommand::InitVm, 867 0, 868 &data as *const _ as u64, 869 ) 870 .map_err(vm::HypervisorVmError::InitializeTdx) 871 } 872 873 /// 874 /// Finalize the TDX setup for this VM 875 /// 876 #[cfg(feature = "tdx")] 877 fn tdx_finalize(&self) -> vm::Result<()> { 878 tdx_command(&self.fd.as_raw_fd(), TdxCommand::Finalize, 0, 0) 879 .map_err(vm::HypervisorVmError::FinalizeTdx) 880 } 881 882 /// 883 /// Initialize memory regions for the TDX VM 884 /// 885 #[cfg(feature = "tdx")] 886 fn tdx_init_memory_region( 887 &self, 888 host_address: u64, 889 guest_address: u64, 890 size: u64, 891 measure: bool, 892 ) -> vm::Result<()> { 893 #[repr(C)] 894 struct TdxInitMemRegion { 895 host_address: u64, 896 guest_address: u64, 897 pages: u64, 898 } 899 let data = TdxInitMemRegion { 900 host_address, 901 guest_address, 902 pages: size / 4096, 903 }; 904 905 tdx_command( 906 &self.fd.as_raw_fd(), 907 TdxCommand::InitMemRegion, 908 u32::from(measure), 909 &data as *const _ as u64, 910 ) 911 .map_err(vm::HypervisorVmError::InitMemRegionTdx) 912 } 913 914 /// Downcast to the underlying KvmVm type 915 fn as_any(&self) -> &dyn Any { 916 self 917 } 918 } 919 920 #[cfg(feature = "tdx")] 921 fn tdx_command( 922 fd: &RawFd, 923 command: TdxCommand, 924 flags: u32, 925 data: u64, 926 ) -> std::result::Result<(), std::io::Error> { 927 #[repr(C)] 928 struct TdxIoctlCmd { 929 command: TdxCommand, 930 flags: u32, 931 data: u64, 932 error: u64, 933 unused: u64, 934 } 935 let cmd = TdxIoctlCmd { 936 command, 937 flags, 938 data, 939 error: 0, 940 unused: 0, 941 }; 942 // SAFETY: FFI call. All input parameters are valid. 943 let ret = unsafe { 944 ioctl_with_val( 945 fd, 946 KVM_MEMORY_ENCRYPT_OP(), 947 &cmd as *const TdxIoctlCmd as std::os::raw::c_ulong, 948 ) 949 }; 950 951 if ret < 0 { 952 return Err(std::io::Error::last_os_error()); 953 } 954 Ok(()) 955 } 956 957 /// Wrapper over KVM system ioctls. 958 pub struct KvmHypervisor { 959 kvm: Kvm, 960 } 961 962 impl KvmHypervisor { 963 #[cfg(target_arch = "x86_64")] 964 /// 965 /// Retrieve the list of MSRs supported by the hypervisor. 966 /// 967 fn get_msr_list(&self) -> hypervisor::Result<MsrList> { 968 self.kvm 969 .get_msr_index_list() 970 .map_err(|e| hypervisor::HypervisorError::GetMsrList(e.into())) 971 } 972 } 973 974 /// Enum for KVM related error 975 #[derive(Debug, Error)] 976 pub enum KvmError { 977 #[error("Capability missing: {0:?}")] 978 CapabilityMissing(Cap), 979 } 980 981 pub type KvmResult<T> = result::Result<T, KvmError>; 982 983 impl KvmHypervisor { 984 /// Create a hypervisor based on Kvm 985 #[allow(clippy::new_ret_no_self)] 986 pub fn new() -> hypervisor::Result<Arc<dyn hypervisor::Hypervisor>> { 987 let kvm_obj = Kvm::new().map_err(|e| hypervisor::HypervisorError::VmCreate(e.into()))?; 988 let api_version = kvm_obj.get_api_version(); 989 990 if api_version != kvm_bindings::KVM_API_VERSION as i32 { 991 return Err(hypervisor::HypervisorError::IncompatibleApiVersion); 992 } 993 994 Ok(Arc::new(KvmHypervisor { kvm: kvm_obj })) 995 } 996 997 /// Check if the hypervisor is available 998 pub fn is_available() -> hypervisor::Result<bool> { 999 match std::fs::metadata("/dev/kvm") { 1000 Ok(_) => Ok(true), 1001 Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(false), 1002 Err(err) => Err(hypervisor::HypervisorError::HypervisorAvailableCheck( 1003 err.into(), 1004 )), 1005 } 1006 } 1007 } 1008 1009 /// Implementation of Hypervisor trait for KVM 1010 /// 1011 /// # Examples 1012 /// 1013 /// ``` 1014 /// # use hypervisor::kvm::KvmHypervisor; 1015 /// # use std::sync::Arc; 1016 /// let kvm = KvmHypervisor::new().unwrap(); 1017 /// let hypervisor = Arc::new(kvm); 1018 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed"); 1019 /// ``` 1020 impl hypervisor::Hypervisor for KvmHypervisor { 1021 /// 1022 /// Returns the type of the hypervisor 1023 /// 1024 fn hypervisor_type(&self) -> HypervisorType { 1025 HypervisorType::Kvm 1026 } 1027 1028 /// Create a KVM vm object of a specific VM type and return the object as Vm trait object 1029 /// 1030 /// # Examples 1031 /// 1032 /// ``` 1033 /// # use hypervisor::kvm::KvmHypervisor; 1034 /// use hypervisor::kvm::KvmVm; 1035 /// let hypervisor = KvmHypervisor::new().unwrap(); 1036 /// let vm = hypervisor.create_vm_with_type(0).unwrap(); 1037 /// ``` 1038 fn create_vm_with_type(&self, vm_type: u64) -> hypervisor::Result<Arc<dyn vm::Vm>> { 1039 let fd: VmFd; 1040 loop { 1041 match self.kvm.create_vm_with_type(vm_type) { 1042 Ok(res) => fd = res, 1043 Err(e) => { 1044 if e.errno() == libc::EINTR { 1045 // If the error returned is EINTR, which means the 1046 // ioctl has been interrupted, we have to retry as 1047 // this can't be considered as a regular error. 1048 continue; 1049 } else { 1050 return Err(hypervisor::HypervisorError::VmCreate(e.into())); 1051 } 1052 } 1053 } 1054 break; 1055 } 1056 1057 let vm_fd = Arc::new(fd); 1058 1059 #[cfg(target_arch = "x86_64")] 1060 { 1061 let msr_list = self.get_msr_list()?; 1062 let num_msrs = msr_list.as_fam_struct_ref().nmsrs as usize; 1063 let mut msrs: Vec<MsrEntry> = vec![ 1064 MsrEntry { 1065 ..Default::default() 1066 }; 1067 num_msrs 1068 ]; 1069 let indices = msr_list.as_slice(); 1070 for (pos, index) in indices.iter().enumerate() { 1071 msrs[pos].index = *index; 1072 } 1073 1074 Ok(Arc::new(KvmVm { 1075 fd: vm_fd, 1076 msrs, 1077 dirty_log_slots: Arc::new(RwLock::new(HashMap::new())), 1078 })) 1079 } 1080 1081 #[cfg(target_arch = "aarch64")] 1082 { 1083 Ok(Arc::new(KvmVm { 1084 fd: vm_fd, 1085 dirty_log_slots: Arc::new(RwLock::new(HashMap::new())), 1086 })) 1087 } 1088 } 1089 1090 /// Create a KVM vm object and return the object as Vm trait object 1091 /// 1092 /// # Examples 1093 /// 1094 /// ``` 1095 /// # use hypervisor::kvm::KvmHypervisor; 1096 /// use hypervisor::kvm::KvmVm; 1097 /// let hypervisor = KvmHypervisor::new().unwrap(); 1098 /// let vm = hypervisor.create_vm().unwrap(); 1099 /// ``` 1100 fn create_vm(&self) -> hypervisor::Result<Arc<dyn vm::Vm>> { 1101 #[allow(unused_mut)] 1102 let mut vm_type: u64 = 0; // Create with default platform type 1103 1104 // When KVM supports Cap::ArmVmIPASize, it is better to get the IPA 1105 // size from the host and use that when creating the VM, which may 1106 // avoid unnecessary VM creation failures. 1107 #[cfg(target_arch = "aarch64")] 1108 if self.kvm.check_extension(Cap::ArmVmIPASize) { 1109 vm_type = self.kvm.get_host_ipa_limit().try_into().unwrap(); 1110 } 1111 1112 self.create_vm_with_type(vm_type) 1113 } 1114 1115 fn check_required_extensions(&self) -> hypervisor::Result<()> { 1116 check_required_kvm_extensions(&self.kvm) 1117 .map_err(|e| hypervisor::HypervisorError::CheckExtensions(e.into())) 1118 } 1119 1120 #[cfg(target_arch = "x86_64")] 1121 /// 1122 /// X86 specific call to get the system supported CPUID values. 1123 /// 1124 fn get_supported_cpuid(&self) -> hypervisor::Result<Vec<CpuIdEntry>> { 1125 let kvm_cpuid = self 1126 .kvm 1127 .get_supported_cpuid(kvm_bindings::KVM_MAX_CPUID_ENTRIES) 1128 .map_err(|e| hypervisor::HypervisorError::GetCpuId(e.into()))?; 1129 1130 let v = kvm_cpuid.as_slice().iter().map(|e| (*e).into()).collect(); 1131 1132 Ok(v) 1133 } 1134 1135 #[cfg(target_arch = "aarch64")] 1136 /// 1137 /// Retrieve AArch64 host maximum IPA size supported by KVM. 1138 /// 1139 fn get_host_ipa_limit(&self) -> i32 { 1140 self.kvm.get_host_ipa_limit() 1141 } 1142 1143 /// 1144 /// Retrieve TDX capabilities 1145 /// 1146 #[cfg(feature = "tdx")] 1147 fn tdx_capabilities(&self) -> hypervisor::Result<TdxCapabilities> { 1148 let data = TdxCapabilities { 1149 nr_cpuid_configs: TDX_MAX_NR_CPUID_CONFIGS as u32, 1150 ..Default::default() 1151 }; 1152 1153 tdx_command( 1154 &self.kvm.as_raw_fd(), 1155 TdxCommand::Capabilities, 1156 0, 1157 &data as *const _ as u64, 1158 ) 1159 .map_err(|e| hypervisor::HypervisorError::TdxCapabilities(e.into()))?; 1160 1161 Ok(data) 1162 } 1163 1164 /// 1165 /// Get the number of supported hardware breakpoints 1166 /// 1167 fn get_guest_debug_hw_bps(&self) -> usize { 1168 #[cfg(target_arch = "x86_64")] 1169 { 1170 4 1171 } 1172 #[cfg(target_arch = "aarch64")] 1173 { 1174 self.kvm.get_guest_debug_hw_bps() as usize 1175 } 1176 } 1177 1178 /// Get maximum number of vCPUs 1179 fn get_max_vcpus(&self) -> u32 { 1180 self.kvm.get_max_vcpus().min(u32::MAX as usize) as u32 1181 } 1182 } 1183 1184 /// Vcpu struct for KVM 1185 pub struct KvmVcpu { 1186 fd: Arc<Mutex<VcpuFd>>, 1187 #[cfg(target_arch = "x86_64")] 1188 msrs: Vec<MsrEntry>, 1189 vm_ops: Option<Arc<dyn vm::VmOps>>, 1190 #[cfg(target_arch = "x86_64")] 1191 hyperv_synic: AtomicBool, 1192 } 1193 1194 /// Implementation of Vcpu trait for KVM 1195 /// 1196 /// # Examples 1197 /// 1198 /// ``` 1199 /// # use hypervisor::kvm::KvmHypervisor; 1200 /// # use std::sync::Arc; 1201 /// let kvm = KvmHypervisor::new().unwrap(); 1202 /// let hypervisor = Arc::new(kvm); 1203 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed"); 1204 /// let vcpu = vm.create_vcpu(0, None).unwrap(); 1205 /// ``` 1206 impl cpu::Vcpu for KvmVcpu { 1207 /// 1208 /// Returns StandardRegisters with default value set 1209 /// 1210 fn create_standard_regs(&self) -> StandardRegisters { 1211 kvm_bindings::kvm_regs::default().into() 1212 } 1213 #[cfg(target_arch = "x86_64")] 1214 /// 1215 /// Returns the vCPU general purpose registers. 1216 /// 1217 fn get_regs(&self) -> cpu::Result<StandardRegisters> { 1218 Ok(self 1219 .fd 1220 .lock() 1221 .unwrap() 1222 .get_regs() 1223 .map_err(|e| cpu::HypervisorCpuError::GetStandardRegs(e.into()))? 1224 .into()) 1225 } 1226 1227 /// 1228 /// Returns the vCPU general purpose registers. 1229 /// The `KVM_GET_REGS` ioctl is not available on AArch64, `KVM_GET_ONE_REG` 1230 /// is used to get registers one by one. 1231 /// 1232 #[cfg(target_arch = "aarch64")] 1233 fn get_regs(&self) -> cpu::Result<StandardRegisters> { 1234 let mut state = kvm_regs::default(); 1235 let mut off = offset_of!(user_pt_regs, regs); 1236 // There are 31 user_pt_regs: 1237 // https://elixir.free-electrons.com/linux/v4.14.174/source/arch/arm64/include/uapi/asm/ptrace.h#L72 1238 // These actually are the general-purpose registers of the Armv8-a 1239 // architecture (i.e x0-x30 if used as a 64bit register or w0-30 when used as a 32bit register). 1240 for i in 0..31 { 1241 let mut bytes = [0_u8; 8]; 1242 self.fd 1243 .lock() 1244 .unwrap() 1245 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), &mut bytes) 1246 .map_err(|e| cpu::HypervisorCpuError::GetAarchCoreRegister(e.into()))?; 1247 state.regs.regs[i] = u64::from_le_bytes(bytes); 1248 off += std::mem::size_of::<u64>(); 1249 } 1250 1251 // We are now entering the "Other register" section of the ARMv8-a architecture. 1252 // First one, stack pointer. 1253 let off = offset_of!(user_pt_regs, sp); 1254 let mut bytes = [0_u8; 8]; 1255 self.fd 1256 .lock() 1257 .unwrap() 1258 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), &mut bytes) 1259 .map_err(|e| cpu::HypervisorCpuError::GetAarchCoreRegister(e.into()))?; 1260 state.regs.sp = u64::from_le_bytes(bytes); 1261 1262 // Second one, the program counter. 1263 let off = offset_of!(user_pt_regs, pc); 1264 let mut bytes = [0_u8; 8]; 1265 self.fd 1266 .lock() 1267 .unwrap() 1268 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), &mut bytes) 1269 .map_err(|e| cpu::HypervisorCpuError::GetAarchCoreRegister(e.into()))?; 1270 state.regs.pc = u64::from_le_bytes(bytes); 1271 1272 // Next is the processor state. 1273 let off = offset_of!(user_pt_regs, pstate); 1274 let mut bytes = [0_u8; 8]; 1275 self.fd 1276 .lock() 1277 .unwrap() 1278 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), &mut bytes) 1279 .map_err(|e| cpu::HypervisorCpuError::GetAarchCoreRegister(e.into()))?; 1280 state.regs.pstate = u64::from_le_bytes(bytes); 1281 1282 // The stack pointer associated with EL1 1283 let off = offset_of!(kvm_regs, sp_el1); 1284 let mut bytes = [0_u8; 8]; 1285 self.fd 1286 .lock() 1287 .unwrap() 1288 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), &mut bytes) 1289 .map_err(|e| cpu::HypervisorCpuError::GetAarchCoreRegister(e.into()))?; 1290 state.sp_el1 = u64::from_le_bytes(bytes); 1291 1292 // Exception Link Register for EL1, when taking an exception to EL1, this register 1293 // holds the address to which to return afterwards. 1294 let off = offset_of!(kvm_regs, elr_el1); 1295 let mut bytes = [0_u8; 8]; 1296 self.fd 1297 .lock() 1298 .unwrap() 1299 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), &mut bytes) 1300 .map_err(|e| cpu::HypervisorCpuError::GetAarchCoreRegister(e.into()))?; 1301 state.elr_el1 = u64::from_le_bytes(bytes); 1302 1303 // Saved Program Status Registers, there are 5 of them used in the kernel. 1304 let mut off = offset_of!(kvm_regs, spsr); 1305 for i in 0..KVM_NR_SPSR as usize { 1306 let mut bytes = [0_u8; 8]; 1307 self.fd 1308 .lock() 1309 .unwrap() 1310 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), &mut bytes) 1311 .map_err(|e| cpu::HypervisorCpuError::GetAarchCoreRegister(e.into()))?; 1312 state.spsr[i] = u64::from_le_bytes(bytes); 1313 off += std::mem::size_of::<u64>(); 1314 } 1315 1316 // Now moving on to floating point registers which are stored in the user_fpsimd_state in the kernel: 1317 // https://elixir.free-electrons.com/linux/v4.9.62/source/arch/arm64/include/uapi/asm/kvm.h#L53 1318 let mut off = offset_of!(kvm_regs, fp_regs) + offset_of!(user_fpsimd_state, vregs); 1319 for i in 0..32 { 1320 let mut bytes = [0_u8; 16]; 1321 self.fd 1322 .lock() 1323 .unwrap() 1324 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U128, off), &mut bytes) 1325 .map_err(|e| cpu::HypervisorCpuError::GetAarchCoreRegister(e.into()))?; 1326 state.fp_regs.vregs[i] = u128::from_le_bytes(bytes); 1327 off += mem::size_of::<u128>(); 1328 } 1329 1330 // Floating-point Status Register 1331 let off = offset_of!(kvm_regs, fp_regs) + offset_of!(user_fpsimd_state, fpsr); 1332 let mut bytes = [0_u8; 4]; 1333 self.fd 1334 .lock() 1335 .unwrap() 1336 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U32, off), &mut bytes) 1337 .map_err(|e| cpu::HypervisorCpuError::GetAarchCoreRegister(e.into()))?; 1338 state.fp_regs.fpsr = u32::from_le_bytes(bytes); 1339 1340 // Floating-point Control Register 1341 let off = offset_of!(kvm_regs, fp_regs) + offset_of!(user_fpsimd_state, fpcr); 1342 let mut bytes = [0_u8; 4]; 1343 self.fd 1344 .lock() 1345 .unwrap() 1346 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U32, off), &mut bytes) 1347 .map_err(|e| cpu::HypervisorCpuError::GetAarchCoreRegister(e.into()))?; 1348 state.fp_regs.fpcr = u32::from_le_bytes(bytes); 1349 Ok(state.into()) 1350 } 1351 1352 #[cfg(target_arch = "x86_64")] 1353 /// 1354 /// Sets the vCPU general purpose registers using the `KVM_SET_REGS` ioctl. 1355 /// 1356 fn set_regs(&self, regs: &StandardRegisters) -> cpu::Result<()> { 1357 let regs = (*regs).into(); 1358 self.fd 1359 .lock() 1360 .unwrap() 1361 .set_regs(®s) 1362 .map_err(|e| cpu::HypervisorCpuError::SetStandardRegs(e.into())) 1363 } 1364 1365 /// 1366 /// Sets the vCPU general purpose registers. 1367 /// The `KVM_SET_REGS` ioctl is not available on AArch64, `KVM_SET_ONE_REG` 1368 /// is used to set registers one by one. 1369 /// 1370 #[cfg(target_arch = "aarch64")] 1371 fn set_regs(&self, state: &StandardRegisters) -> cpu::Result<()> { 1372 // The function follows the exact identical order from `state`. Look there 1373 // for some additional info on registers. 1374 let kvm_regs_state: kvm_regs = (*state).into(); 1375 let mut off = offset_of!(user_pt_regs, regs); 1376 for i in 0..31 { 1377 self.fd 1378 .lock() 1379 .unwrap() 1380 .set_one_reg( 1381 arm64_core_reg_id!(KVM_REG_SIZE_U64, off), 1382 &kvm_regs_state.regs.regs[i].to_le_bytes(), 1383 ) 1384 .map_err(|e| cpu::HypervisorCpuError::SetAarchCoreRegister(e.into()))?; 1385 off += std::mem::size_of::<u64>(); 1386 } 1387 1388 let off = offset_of!(user_pt_regs, sp); 1389 self.fd 1390 .lock() 1391 .unwrap() 1392 .set_one_reg( 1393 arm64_core_reg_id!(KVM_REG_SIZE_U64, off), 1394 &kvm_regs_state.regs.sp.to_le_bytes(), 1395 ) 1396 .map_err(|e| cpu::HypervisorCpuError::SetAarchCoreRegister(e.into()))?; 1397 1398 let off = offset_of!(user_pt_regs, pc); 1399 self.fd 1400 .lock() 1401 .unwrap() 1402 .set_one_reg( 1403 arm64_core_reg_id!(KVM_REG_SIZE_U64, off), 1404 &kvm_regs_state.regs.pc.to_le_bytes(), 1405 ) 1406 .map_err(|e| cpu::HypervisorCpuError::SetAarchCoreRegister(e.into()))?; 1407 1408 let off = offset_of!(user_pt_regs, pstate); 1409 self.fd 1410 .lock() 1411 .unwrap() 1412 .set_one_reg( 1413 arm64_core_reg_id!(KVM_REG_SIZE_U64, off), 1414 &kvm_regs_state.regs.pstate.to_le_bytes(), 1415 ) 1416 .map_err(|e| cpu::HypervisorCpuError::SetAarchCoreRegister(e.into()))?; 1417 1418 let off = offset_of!(kvm_regs, sp_el1); 1419 self.fd 1420 .lock() 1421 .unwrap() 1422 .set_one_reg( 1423 arm64_core_reg_id!(KVM_REG_SIZE_U64, off), 1424 &kvm_regs_state.sp_el1.to_le_bytes(), 1425 ) 1426 .map_err(|e| cpu::HypervisorCpuError::SetAarchCoreRegister(e.into()))?; 1427 1428 let off = offset_of!(kvm_regs, elr_el1); 1429 self.fd 1430 .lock() 1431 .unwrap() 1432 .set_one_reg( 1433 arm64_core_reg_id!(KVM_REG_SIZE_U64, off), 1434 &kvm_regs_state.elr_el1.to_le_bytes(), 1435 ) 1436 .map_err(|e| cpu::HypervisorCpuError::SetAarchCoreRegister(e.into()))?; 1437 1438 let mut off = offset_of!(kvm_regs, spsr); 1439 for i in 0..KVM_NR_SPSR as usize { 1440 self.fd 1441 .lock() 1442 .unwrap() 1443 .set_one_reg( 1444 arm64_core_reg_id!(KVM_REG_SIZE_U64, off), 1445 &kvm_regs_state.spsr[i].to_le_bytes(), 1446 ) 1447 .map_err(|e| cpu::HypervisorCpuError::SetAarchCoreRegister(e.into()))?; 1448 off += std::mem::size_of::<u64>(); 1449 } 1450 1451 let mut off = offset_of!(kvm_regs, fp_regs) + offset_of!(user_fpsimd_state, vregs); 1452 for i in 0..32 { 1453 self.fd 1454 .lock() 1455 .unwrap() 1456 .set_one_reg( 1457 arm64_core_reg_id!(KVM_REG_SIZE_U128, off), 1458 &kvm_regs_state.fp_regs.vregs[i].to_le_bytes(), 1459 ) 1460 .map_err(|e| cpu::HypervisorCpuError::SetAarchCoreRegister(e.into()))?; 1461 off += mem::size_of::<u128>(); 1462 } 1463 1464 let off = offset_of!(kvm_regs, fp_regs) + offset_of!(user_fpsimd_state, fpsr); 1465 self.fd 1466 .lock() 1467 .unwrap() 1468 .set_one_reg( 1469 arm64_core_reg_id!(KVM_REG_SIZE_U32, off), 1470 &kvm_regs_state.fp_regs.fpsr.to_le_bytes(), 1471 ) 1472 .map_err(|e| cpu::HypervisorCpuError::SetAarchCoreRegister(e.into()))?; 1473 1474 let off = offset_of!(kvm_regs, fp_regs) + offset_of!(user_fpsimd_state, fpcr); 1475 self.fd 1476 .lock() 1477 .unwrap() 1478 .set_one_reg( 1479 arm64_core_reg_id!(KVM_REG_SIZE_U32, off), 1480 &kvm_regs_state.fp_regs.fpcr.to_le_bytes(), 1481 ) 1482 .map_err(|e| cpu::HypervisorCpuError::SetAarchCoreRegister(e.into()))?; 1483 Ok(()) 1484 } 1485 1486 #[cfg(target_arch = "x86_64")] 1487 /// 1488 /// Returns the vCPU special registers. 1489 /// 1490 fn get_sregs(&self) -> cpu::Result<SpecialRegisters> { 1491 Ok(self 1492 .fd 1493 .lock() 1494 .unwrap() 1495 .get_sregs() 1496 .map_err(|e| cpu::HypervisorCpuError::GetSpecialRegs(e.into()))? 1497 .into()) 1498 } 1499 1500 #[cfg(target_arch = "x86_64")] 1501 /// 1502 /// Sets the vCPU special registers using the `KVM_SET_SREGS` ioctl. 1503 /// 1504 fn set_sregs(&self, sregs: &SpecialRegisters) -> cpu::Result<()> { 1505 let sregs = (*sregs).into(); 1506 self.fd 1507 .lock() 1508 .unwrap() 1509 .set_sregs(&sregs) 1510 .map_err(|e| cpu::HypervisorCpuError::SetSpecialRegs(e.into())) 1511 } 1512 1513 #[cfg(target_arch = "x86_64")] 1514 /// 1515 /// Returns the floating point state (FPU) from the vCPU. 1516 /// 1517 fn get_fpu(&self) -> cpu::Result<FpuState> { 1518 Ok(self 1519 .fd 1520 .lock() 1521 .unwrap() 1522 .get_fpu() 1523 .map_err(|e| cpu::HypervisorCpuError::GetFloatingPointRegs(e.into()))? 1524 .into()) 1525 } 1526 1527 #[cfg(target_arch = "x86_64")] 1528 /// 1529 /// Set the floating point state (FPU) of a vCPU using the `KVM_SET_FPU` ioctl. 1530 /// 1531 fn set_fpu(&self, fpu: &FpuState) -> cpu::Result<()> { 1532 let fpu: kvm_bindings::kvm_fpu = (*fpu).clone().into(); 1533 self.fd 1534 .lock() 1535 .unwrap() 1536 .set_fpu(&fpu) 1537 .map_err(|e| cpu::HypervisorCpuError::SetFloatingPointRegs(e.into())) 1538 } 1539 1540 #[cfg(target_arch = "x86_64")] 1541 /// 1542 /// X86 specific call to setup the CPUID registers. 1543 /// 1544 fn set_cpuid2(&self, cpuid: &[CpuIdEntry]) -> cpu::Result<()> { 1545 let cpuid: Vec<kvm_bindings::kvm_cpuid_entry2> = 1546 cpuid.iter().map(|e| (*e).into()).collect(); 1547 let kvm_cpuid = <CpuId>::from_entries(&cpuid) 1548 .map_err(|_| cpu::HypervisorCpuError::SetCpuid(anyhow!("failed to create CpuId")))?; 1549 1550 self.fd 1551 .lock() 1552 .unwrap() 1553 .set_cpuid2(&kvm_cpuid) 1554 .map_err(|e| cpu::HypervisorCpuError::SetCpuid(e.into())) 1555 } 1556 1557 #[cfg(target_arch = "x86_64")] 1558 /// 1559 /// X86 specific call to enable HyperV SynIC 1560 /// 1561 fn enable_hyperv_synic(&self) -> cpu::Result<()> { 1562 // Update the information about Hyper-V SynIC being enabled and 1563 // emulated as it will influence later which MSRs should be saved. 1564 self.hyperv_synic.store(true, Ordering::Release); 1565 1566 let cap = kvm_enable_cap { 1567 cap: KVM_CAP_HYPERV_SYNIC, 1568 ..Default::default() 1569 }; 1570 self.fd 1571 .lock() 1572 .unwrap() 1573 .enable_cap(&cap) 1574 .map_err(|e| cpu::HypervisorCpuError::EnableHyperVSyncIc(e.into())) 1575 } 1576 1577 /// 1578 /// X86 specific call to retrieve the CPUID registers. 1579 /// 1580 #[cfg(target_arch = "x86_64")] 1581 fn get_cpuid2(&self, num_entries: usize) -> cpu::Result<Vec<CpuIdEntry>> { 1582 let kvm_cpuid = self 1583 .fd 1584 .lock() 1585 .unwrap() 1586 .get_cpuid2(num_entries) 1587 .map_err(|e| cpu::HypervisorCpuError::GetCpuid(e.into()))?; 1588 1589 let v = kvm_cpuid.as_slice().iter().map(|e| (*e).into()).collect(); 1590 1591 Ok(v) 1592 } 1593 1594 #[cfg(target_arch = "x86_64")] 1595 /// 1596 /// Returns the state of the LAPIC (Local Advanced Programmable Interrupt Controller). 1597 /// 1598 fn get_lapic(&self) -> cpu::Result<LapicState> { 1599 Ok(self 1600 .fd 1601 .lock() 1602 .unwrap() 1603 .get_lapic() 1604 .map_err(|e| cpu::HypervisorCpuError::GetlapicState(e.into()))? 1605 .into()) 1606 } 1607 1608 #[cfg(target_arch = "x86_64")] 1609 /// 1610 /// Sets the state of the LAPIC (Local Advanced Programmable Interrupt Controller). 1611 /// 1612 fn set_lapic(&self, klapic: &LapicState) -> cpu::Result<()> { 1613 let klapic: kvm_bindings::kvm_lapic_state = (*klapic).clone().into(); 1614 self.fd 1615 .lock() 1616 .unwrap() 1617 .set_lapic(&klapic) 1618 .map_err(|e| cpu::HypervisorCpuError::SetLapicState(e.into())) 1619 } 1620 1621 #[cfg(target_arch = "x86_64")] 1622 /// 1623 /// Returns the model-specific registers (MSR) for this vCPU. 1624 /// 1625 fn get_msrs(&self, msrs: &mut Vec<MsrEntry>) -> cpu::Result<usize> { 1626 let kvm_msrs: Vec<kvm_msr_entry> = msrs.iter().map(|e| (*e).into()).collect(); 1627 let mut kvm_msrs = MsrEntries::from_entries(&kvm_msrs).unwrap(); 1628 let succ = self 1629 .fd 1630 .lock() 1631 .unwrap() 1632 .get_msrs(&mut kvm_msrs) 1633 .map_err(|e| cpu::HypervisorCpuError::GetMsrEntries(e.into()))?; 1634 1635 msrs[..succ].copy_from_slice( 1636 &kvm_msrs.as_slice()[..succ] 1637 .iter() 1638 .map(|e| (*e).into()) 1639 .collect::<Vec<MsrEntry>>(), 1640 ); 1641 1642 Ok(succ) 1643 } 1644 1645 #[cfg(target_arch = "x86_64")] 1646 /// 1647 /// Setup the model-specific registers (MSR) for this vCPU. 1648 /// Returns the number of MSR entries actually written. 1649 /// 1650 fn set_msrs(&self, msrs: &[MsrEntry]) -> cpu::Result<usize> { 1651 let kvm_msrs: Vec<kvm_msr_entry> = msrs.iter().map(|e| (*e).into()).collect(); 1652 let kvm_msrs = MsrEntries::from_entries(&kvm_msrs).unwrap(); 1653 self.fd 1654 .lock() 1655 .unwrap() 1656 .set_msrs(&kvm_msrs) 1657 .map_err(|e| cpu::HypervisorCpuError::SetMsrEntries(e.into())) 1658 } 1659 1660 /// 1661 /// Returns the vcpu's current "multiprocessing state". 1662 /// 1663 fn get_mp_state(&self) -> cpu::Result<MpState> { 1664 Ok(self 1665 .fd 1666 .lock() 1667 .unwrap() 1668 .get_mp_state() 1669 .map_err(|e| cpu::HypervisorCpuError::GetMpState(e.into()))? 1670 .into()) 1671 } 1672 1673 /// 1674 /// Sets the vcpu's current "multiprocessing state". 1675 /// 1676 fn set_mp_state(&self, mp_state: MpState) -> cpu::Result<()> { 1677 self.fd 1678 .lock() 1679 .unwrap() 1680 .set_mp_state(mp_state.into()) 1681 .map_err(|e| cpu::HypervisorCpuError::SetMpState(e.into())) 1682 } 1683 1684 #[cfg(target_arch = "x86_64")] 1685 /// 1686 /// Translates guest virtual address to guest physical address using the `KVM_TRANSLATE` ioctl. 1687 /// 1688 fn translate_gva(&self, gva: u64, _flags: u64) -> cpu::Result<(u64, u32)> { 1689 let tr = self 1690 .fd 1691 .lock() 1692 .unwrap() 1693 .translate_gva(gva) 1694 .map_err(|e| cpu::HypervisorCpuError::TranslateVirtualAddress(e.into()))?; 1695 // tr.valid is set if the GVA is mapped to valid GPA. 1696 match tr.valid { 1697 0 => Err(cpu::HypervisorCpuError::TranslateVirtualAddress(anyhow!( 1698 "Invalid GVA: {:#x}", 1699 gva 1700 ))), 1701 _ => Ok((tr.physical_address, 0)), 1702 } 1703 } 1704 1705 /// 1706 /// Triggers the running of the current virtual CPU returning an exit reason. 1707 /// 1708 fn run(&self) -> std::result::Result<cpu::VmExit, cpu::HypervisorCpuError> { 1709 match self.fd.lock().unwrap().run() { 1710 Ok(run) => match run { 1711 #[cfg(target_arch = "x86_64")] 1712 VcpuExit::IoIn(addr, data) => { 1713 if let Some(vm_ops) = &self.vm_ops { 1714 return vm_ops 1715 .pio_read(addr.into(), data) 1716 .map(|_| cpu::VmExit::Ignore) 1717 .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into())); 1718 } 1719 1720 Ok(cpu::VmExit::Ignore) 1721 } 1722 #[cfg(target_arch = "x86_64")] 1723 VcpuExit::IoOut(addr, data) => { 1724 if let Some(vm_ops) = &self.vm_ops { 1725 return vm_ops 1726 .pio_write(addr.into(), data) 1727 .map(|_| cpu::VmExit::Ignore) 1728 .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into())); 1729 } 1730 1731 Ok(cpu::VmExit::Ignore) 1732 } 1733 #[cfg(target_arch = "x86_64")] 1734 VcpuExit::IoapicEoi(vector) => Ok(cpu::VmExit::IoapicEoi(vector)), 1735 #[cfg(target_arch = "x86_64")] 1736 VcpuExit::Shutdown | VcpuExit::Hlt => Ok(cpu::VmExit::Reset), 1737 1738 #[cfg(target_arch = "aarch64")] 1739 VcpuExit::SystemEvent(event_type, flags) => { 1740 use kvm_bindings::{KVM_SYSTEM_EVENT_RESET, KVM_SYSTEM_EVENT_SHUTDOWN}; 1741 // On Aarch64, when the VM is shutdown, run() returns 1742 // VcpuExit::SystemEvent with reason KVM_SYSTEM_EVENT_SHUTDOWN 1743 if event_type == KVM_SYSTEM_EVENT_RESET { 1744 Ok(cpu::VmExit::Reset) 1745 } else if event_type == KVM_SYSTEM_EVENT_SHUTDOWN { 1746 Ok(cpu::VmExit::Shutdown) 1747 } else { 1748 Err(cpu::HypervisorCpuError::RunVcpu(anyhow!( 1749 "Unexpected system event with type 0x{:x}, flags 0x{:x?}", 1750 event_type, 1751 flags 1752 ))) 1753 } 1754 } 1755 1756 VcpuExit::MmioRead(addr, data) => { 1757 if let Some(vm_ops) = &self.vm_ops { 1758 return vm_ops 1759 .mmio_read(addr, data) 1760 .map(|_| cpu::VmExit::Ignore) 1761 .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into())); 1762 } 1763 1764 Ok(cpu::VmExit::Ignore) 1765 } 1766 VcpuExit::MmioWrite(addr, data) => { 1767 if let Some(vm_ops) = &self.vm_ops { 1768 return vm_ops 1769 .mmio_write(addr, data) 1770 .map(|_| cpu::VmExit::Ignore) 1771 .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into())); 1772 } 1773 1774 Ok(cpu::VmExit::Ignore) 1775 } 1776 VcpuExit::Hyperv => Ok(cpu::VmExit::Hyperv), 1777 #[cfg(feature = "tdx")] 1778 VcpuExit::Unsupported(KVM_EXIT_TDX) => Ok(cpu::VmExit::Tdx), 1779 VcpuExit::Debug(_) => Ok(cpu::VmExit::Debug), 1780 1781 r => Err(cpu::HypervisorCpuError::RunVcpu(anyhow!( 1782 "Unexpected exit reason on vcpu run: {:?}", 1783 r 1784 ))), 1785 }, 1786 1787 Err(ref e) => match e.errno() { 1788 libc::EAGAIN | libc::EINTR => Ok(cpu::VmExit::Ignore), 1789 _ => Err(cpu::HypervisorCpuError::RunVcpu(anyhow!( 1790 "VCPU error {:?}", 1791 e 1792 ))), 1793 }, 1794 } 1795 } 1796 1797 #[cfg(target_arch = "x86_64")] 1798 /// 1799 /// Let the guest know that it has been paused, which prevents from 1800 /// potential soft lockups when being resumed. 1801 /// 1802 fn notify_guest_clock_paused(&self) -> cpu::Result<()> { 1803 if let Err(e) = self.fd.lock().unwrap().kvmclock_ctrl() { 1804 // Linux kernel returns -EINVAL if the PV clock isn't yet initialised 1805 // which could be because we're still in firmware or the guest doesn't 1806 // use KVM clock. 1807 if e.errno() != libc::EINVAL { 1808 return Err(cpu::HypervisorCpuError::NotifyGuestClockPaused(e.into())); 1809 } 1810 } 1811 1812 Ok(()) 1813 } 1814 1815 /// 1816 /// Sets debug registers to set hardware breakpoints and/or enable single step. 1817 /// 1818 fn set_guest_debug( 1819 &self, 1820 addrs: &[vm_memory::GuestAddress], 1821 singlestep: bool, 1822 ) -> cpu::Result<()> { 1823 let mut dbg = kvm_guest_debug { 1824 #[cfg(target_arch = "x86_64")] 1825 control: KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP, 1826 #[cfg(target_arch = "aarch64")] 1827 control: KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW, 1828 ..Default::default() 1829 }; 1830 if singlestep { 1831 dbg.control |= KVM_GUESTDBG_SINGLESTEP; 1832 } 1833 1834 // Set the debug registers. 1835 // Here we assume that the number of addresses do not exceed what 1836 // `Hypervisor::get_guest_debug_hw_bps()` specifies. 1837 #[cfg(target_arch = "x86_64")] 1838 { 1839 // Set bits 9 and 10. 1840 // bit 9: GE (global exact breakpoint enable) flag. 1841 // bit 10: always 1. 1842 dbg.arch.debugreg[7] = 0x0600; 1843 1844 for (i, addr) in addrs.iter().enumerate() { 1845 dbg.arch.debugreg[i] = addr.0; 1846 // Set global breakpoint enable flag 1847 dbg.arch.debugreg[7] |= 2 << (i * 2); 1848 } 1849 } 1850 #[cfg(target_arch = "aarch64")] 1851 { 1852 for (i, addr) in addrs.iter().enumerate() { 1853 // DBGBCR_EL1 (Debug Breakpoint Control Registers, D13.3.2): 1854 // bit 0: 1 (Enabled) 1855 // bit 1~2: 0b11 (PMC = EL1/EL0) 1856 // bit 5~8: 0b1111 (BAS = AArch64) 1857 // others: 0 1858 dbg.arch.dbg_bcr[i] = 0b1u64 | 0b110u64 | 0b1_1110_0000u64; 1859 // DBGBVR_EL1 (Debug Breakpoint Value Registers, D13.3.3): 1860 // bit 2~52: VA[2:52] 1861 dbg.arch.dbg_bvr[i] = (!0u64 >> 11) & addr.0; 1862 } 1863 } 1864 self.fd 1865 .lock() 1866 .unwrap() 1867 .set_guest_debug(&dbg) 1868 .map_err(|e| cpu::HypervisorCpuError::SetDebugRegs(e.into())) 1869 } 1870 1871 #[cfg(target_arch = "aarch64")] 1872 fn vcpu_init(&self, kvi: &VcpuInit) -> cpu::Result<()> { 1873 self.fd 1874 .lock() 1875 .unwrap() 1876 .vcpu_init(kvi) 1877 .map_err(|e| cpu::HypervisorCpuError::VcpuInit(e.into())) 1878 } 1879 1880 #[cfg(target_arch = "aarch64")] 1881 fn vcpu_finalize(&self, feature: i32) -> cpu::Result<()> { 1882 self.fd 1883 .lock() 1884 .unwrap() 1885 .vcpu_finalize(&feature) 1886 .map_err(|e| cpu::HypervisorCpuError::VcpuFinalize(e.into())) 1887 } 1888 1889 /// 1890 /// Gets a list of the guest registers that are supported for the 1891 /// KVM_GET_ONE_REG/KVM_SET_ONE_REG calls. 1892 /// 1893 #[cfg(target_arch = "aarch64")] 1894 fn get_reg_list(&self, reg_list: &mut RegList) -> cpu::Result<()> { 1895 self.fd 1896 .lock() 1897 .unwrap() 1898 .get_reg_list(reg_list) 1899 .map_err(|e| cpu::HypervisorCpuError::GetRegList(e.into())) 1900 } 1901 1902 /// 1903 /// Gets the value of a system register 1904 /// 1905 #[cfg(target_arch = "aarch64")] 1906 fn get_sys_reg(&self, sys_reg: u32) -> cpu::Result<u64> { 1907 // 1908 // Arm Architecture Reference Manual defines the encoding of 1909 // AArch64 system registers, see 1910 // https://developer.arm.com/documentation/ddi0487 (chapter D12). 1911 // While KVM defines another ID for each AArch64 system register, 1912 // which is used in calling `KVM_G/SET_ONE_REG` to access a system 1913 // register of a guest. 1914 // A mapping exists between the Arm standard encoding and the KVM ID. 1915 // This function takes the standard u32 ID as input parameter, converts 1916 // it to the corresponding KVM ID, and call `KVM_GET_ONE_REG` API to 1917 // get the value of the system parameter. 1918 // 1919 let id: u64 = KVM_REG_ARM64 1920 | KVM_REG_SIZE_U64 1921 | KVM_REG_ARM64_SYSREG as u64 1922 | ((((sys_reg) >> 5) 1923 & (KVM_REG_ARM64_SYSREG_OP0_MASK 1924 | KVM_REG_ARM64_SYSREG_OP1_MASK 1925 | KVM_REG_ARM64_SYSREG_CRN_MASK 1926 | KVM_REG_ARM64_SYSREG_CRM_MASK 1927 | KVM_REG_ARM64_SYSREG_OP2_MASK)) as u64); 1928 let mut bytes = [0_u8; 8]; 1929 self.fd 1930 .lock() 1931 .unwrap() 1932 .get_one_reg(id, &mut bytes) 1933 .map_err(|e| cpu::HypervisorCpuError::GetSysRegister(e.into()))?; 1934 Ok(u64::from_le_bytes(bytes)) 1935 } 1936 1937 /// 1938 /// Configure core registers for a given CPU. 1939 /// 1940 #[cfg(target_arch = "aarch64")] 1941 fn setup_regs(&self, cpu_id: u8, boot_ip: u64, fdt_start: u64) -> cpu::Result<()> { 1942 #[allow(non_upper_case_globals)] 1943 // PSR (Processor State Register) bits. 1944 // Taken from arch/arm64/include/uapi/asm/ptrace.h. 1945 const PSR_MODE_EL1h: u64 = 0x0000_0005; 1946 const PSR_F_BIT: u64 = 0x0000_0040; 1947 const PSR_I_BIT: u64 = 0x0000_0080; 1948 const PSR_A_BIT: u64 = 0x0000_0100; 1949 const PSR_D_BIT: u64 = 0x0000_0200; 1950 // Taken from arch/arm64/kvm/inject_fault.c. 1951 const PSTATE_FAULT_BITS_64: u64 = 1952 PSR_MODE_EL1h | PSR_A_BIT | PSR_F_BIT | PSR_I_BIT | PSR_D_BIT; 1953 1954 let kreg_off = offset_of!(kvm_regs, regs); 1955 1956 // Get the register index of the PSTATE (Processor State) register. 1957 let pstate = offset_of!(user_pt_regs, pstate) + kreg_off; 1958 self.fd 1959 .lock() 1960 .unwrap() 1961 .set_one_reg( 1962 arm64_core_reg_id!(KVM_REG_SIZE_U64, pstate), 1963 &PSTATE_FAULT_BITS_64.to_le_bytes(), 1964 ) 1965 .map_err(|e| cpu::HypervisorCpuError::SetAarchCoreRegister(e.into()))?; 1966 1967 // Other vCPUs are powered off initially awaiting PSCI wakeup. 1968 if cpu_id == 0 { 1969 // Setting the PC (Processor Counter) to the current program address (kernel address). 1970 let pc = offset_of!(user_pt_regs, pc) + kreg_off; 1971 self.fd 1972 .lock() 1973 .unwrap() 1974 .set_one_reg( 1975 arm64_core_reg_id!(KVM_REG_SIZE_U64, pc), 1976 &boot_ip.to_le_bytes(), 1977 ) 1978 .map_err(|e| cpu::HypervisorCpuError::SetAarchCoreRegister(e.into()))?; 1979 1980 // Last mandatory thing to set -> the address pointing to the FDT (also called DTB). 1981 // "The device tree blob (dtb) must be placed on an 8-byte boundary and must 1982 // not exceed 2 megabytes in size." -> https://www.kernel.org/doc/Documentation/arm64/booting.txt. 1983 // We are choosing to place it the end of DRAM. See `get_fdt_addr`. 1984 let regs0 = offset_of!(user_pt_regs, regs) + kreg_off; 1985 self.fd 1986 .lock() 1987 .unwrap() 1988 .set_one_reg( 1989 arm64_core_reg_id!(KVM_REG_SIZE_U64, regs0), 1990 &fdt_start.to_le_bytes(), 1991 ) 1992 .map_err(|e| cpu::HypervisorCpuError::SetAarchCoreRegister(e.into()))?; 1993 } 1994 Ok(()) 1995 } 1996 1997 #[cfg(target_arch = "x86_64")] 1998 /// 1999 /// Get the current CPU state 2000 /// 2001 /// Ordering requirements: 2002 /// 2003 /// KVM_GET_MP_STATE calls kvm_apic_accept_events(), which might modify 2004 /// vCPU/LAPIC state. As such, it must be done before most everything 2005 /// else, otherwise we cannot restore everything and expect it to work. 2006 /// 2007 /// KVM_GET_VCPU_EVENTS/KVM_SET_VCPU_EVENTS is unsafe if other vCPUs are 2008 /// still running. 2009 /// 2010 /// KVM_GET_LAPIC may change state of LAPIC before returning it. 2011 /// 2012 /// GET_VCPU_EVENTS should probably be last to save. The code looks as 2013 /// it might as well be affected by internal state modifications of the 2014 /// GET ioctls. 2015 /// 2016 /// SREGS saves/restores a pending interrupt, similar to what 2017 /// VCPU_EVENTS also does. 2018 /// 2019 /// GET_MSRS requires a prepopulated data structure to do something 2020 /// meaningful. For SET_MSRS it will then contain good data. 2021 /// 2022 /// # Example 2023 /// 2024 /// ```rust 2025 /// # use hypervisor::kvm::KvmHypervisor; 2026 /// # use std::sync::Arc; 2027 /// let kvm = KvmHypervisor::new().unwrap(); 2028 /// let hv = Arc::new(kvm); 2029 /// let vm = hv.create_vm().expect("new VM fd creation failed"); 2030 /// vm.enable_split_irq().unwrap(); 2031 /// let vcpu = vm.create_vcpu(0, None).unwrap(); 2032 /// let state = vcpu.state().unwrap(); 2033 /// ``` 2034 fn state(&self) -> cpu::Result<CpuState> { 2035 let cpuid = self.get_cpuid2(kvm_bindings::KVM_MAX_CPUID_ENTRIES)?; 2036 let mp_state = self.get_mp_state()?.into(); 2037 let regs = self.get_regs()?; 2038 let sregs = self.get_sregs()?; 2039 let xsave = self.get_xsave()?; 2040 let xcrs = self.get_xcrs()?; 2041 let lapic_state = self.get_lapic()?; 2042 let fpu = self.get_fpu()?; 2043 2044 // Try to get all MSRs based on the list previously retrieved from KVM. 2045 // If the number of MSRs obtained from GET_MSRS is different from the 2046 // expected amount, we fallback onto a slower method by getting MSRs 2047 // by chunks. This is the only way to make sure we try to get as many 2048 // MSRs as possible, even if some MSRs are not supported. 2049 let mut msr_entries = self.msrs.clone(); 2050 2051 // Save extra MSRs if the Hyper-V synthetic interrupt controller is 2052 // emulated. 2053 if self.hyperv_synic.load(Ordering::Acquire) { 2054 let hyperv_synic_msrs = vec![ 2055 0x40000020, 0x40000021, 0x40000080, 0x40000081, 0x40000082, 0x40000083, 0x40000084, 2056 0x40000090, 0x40000091, 0x40000092, 0x40000093, 0x40000094, 0x40000095, 0x40000096, 2057 0x40000097, 0x40000098, 0x40000099, 0x4000009a, 0x4000009b, 0x4000009c, 0x4000009d, 2058 0x4000009e, 0x4000009f, 0x400000b0, 0x400000b1, 0x400000b2, 0x400000b3, 0x400000b4, 2059 0x400000b5, 0x400000b6, 0x400000b7, 2060 ]; 2061 for index in hyperv_synic_msrs { 2062 let msr = kvm_msr_entry { 2063 index, 2064 ..Default::default() 2065 }; 2066 msr_entries.push(msr.into()); 2067 } 2068 } 2069 2070 let expected_num_msrs = msr_entries.len(); 2071 let num_msrs = self.get_msrs(&mut msr_entries)?; 2072 let msrs = if num_msrs != expected_num_msrs { 2073 let mut faulty_msr_index = num_msrs; 2074 let mut msr_entries_tmp = msr_entries[..faulty_msr_index].to_vec(); 2075 2076 loop { 2077 warn!( 2078 "Detected faulty MSR 0x{:x} while getting MSRs", 2079 msr_entries[faulty_msr_index].index 2080 ); 2081 2082 // Skip the first bad MSR 2083 let start_pos = faulty_msr_index + 1; 2084 2085 let mut sub_msr_entries = msr_entries[start_pos..].to_vec(); 2086 let num_msrs = self.get_msrs(&mut sub_msr_entries)?; 2087 2088 msr_entries_tmp.extend(&sub_msr_entries[..num_msrs]); 2089 2090 if num_msrs == sub_msr_entries.len() { 2091 break; 2092 } 2093 2094 faulty_msr_index = start_pos + num_msrs; 2095 } 2096 2097 msr_entries_tmp 2098 } else { 2099 msr_entries 2100 }; 2101 2102 let vcpu_events = self.get_vcpu_events()?; 2103 let tsc_khz = self.tsc_khz()?; 2104 2105 Ok(VcpuKvmState { 2106 cpuid, 2107 msrs, 2108 vcpu_events, 2109 regs: regs.into(), 2110 sregs: sregs.into(), 2111 fpu, 2112 lapic_state, 2113 xsave, 2114 xcrs, 2115 mp_state, 2116 tsc_khz, 2117 } 2118 .into()) 2119 } 2120 2121 /// 2122 /// Get the current AArch64 CPU state 2123 /// 2124 #[cfg(target_arch = "aarch64")] 2125 fn state(&self) -> cpu::Result<CpuState> { 2126 let mut state = VcpuKvmState { 2127 mp_state: self.get_mp_state()?.into(), 2128 ..Default::default() 2129 }; 2130 // Get core registers 2131 state.core_regs = self.get_regs()?.into(); 2132 2133 // Get systerm register 2134 // Call KVM_GET_REG_LIST to get all registers available to the guest. 2135 // For ArmV8 there are around 500 registers. 2136 let mut sys_regs: Vec<Register> = Vec::new(); 2137 let mut reg_list = RegList::new(500).unwrap(); 2138 self.fd 2139 .lock() 2140 .unwrap() 2141 .get_reg_list(&mut reg_list) 2142 .map_err(|e| cpu::HypervisorCpuError::GetRegList(e.into()))?; 2143 2144 // At this point reg_list should contain: core registers and system 2145 // registers. 2146 // The register list contains the number of registers and their ids. We 2147 // will be needing to call KVM_GET_ONE_REG on each id in order to save 2148 // all of them. We carve out from the list the core registers which are 2149 // represented in the kernel by kvm_regs structure and for which we can 2150 // calculate the id based on the offset in the structure. 2151 reg_list.retain(|regid| is_system_register(*regid)); 2152 2153 // Now, for the rest of the registers left in the previously fetched 2154 // register list, we are simply calling KVM_GET_ONE_REG. 2155 let indices = reg_list.as_slice(); 2156 for index in indices.iter() { 2157 let mut bytes = [0_u8; 8]; 2158 self.fd 2159 .lock() 2160 .unwrap() 2161 .get_one_reg(*index, &mut bytes) 2162 .map_err(|e| cpu::HypervisorCpuError::GetSysRegister(e.into()))?; 2163 sys_regs.push(kvm_bindings::kvm_one_reg { 2164 id: *index, 2165 addr: u64::from_le_bytes(bytes), 2166 }); 2167 } 2168 2169 state.sys_regs = sys_regs; 2170 2171 Ok(state.into()) 2172 } 2173 2174 #[cfg(target_arch = "x86_64")] 2175 /// 2176 /// Restore the previously saved CPU state 2177 /// 2178 /// Ordering requirements: 2179 /// 2180 /// KVM_GET_VCPU_EVENTS/KVM_SET_VCPU_EVENTS is unsafe if other vCPUs are 2181 /// still running. 2182 /// 2183 /// Some SET ioctls (like set_mp_state) depend on kvm_vcpu_is_bsp(), so 2184 /// if we ever change the BSP, we have to do that before restoring anything. 2185 /// The same seems to be true for CPUID stuff. 2186 /// 2187 /// SREGS saves/restores a pending interrupt, similar to what 2188 /// VCPU_EVENTS also does. 2189 /// 2190 /// SET_REGS clears pending exceptions unconditionally, thus, it must be 2191 /// done before SET_VCPU_EVENTS, which restores it. 2192 /// 2193 /// SET_LAPIC must come after SET_SREGS, because the latter restores 2194 /// the apic base msr. 2195 /// 2196 /// SET_LAPIC must come before SET_MSRS, because the TSC deadline MSR 2197 /// only restores successfully, when the LAPIC is correctly configured. 2198 /// 2199 /// Arguments: CpuState 2200 /// # Example 2201 /// 2202 /// ```rust 2203 /// # use hypervisor::kvm::KvmHypervisor; 2204 /// # use std::sync::Arc; 2205 /// let kvm = KvmHypervisor::new().unwrap(); 2206 /// let hv = Arc::new(kvm); 2207 /// let vm = hv.create_vm().expect("new VM fd creation failed"); 2208 /// vm.enable_split_irq().unwrap(); 2209 /// let vcpu = vm.create_vcpu(0, None).unwrap(); 2210 /// let state = vcpu.state().unwrap(); 2211 /// vcpu.set_state(&state).unwrap(); 2212 /// ``` 2213 fn set_state(&self, state: &CpuState) -> cpu::Result<()> { 2214 let state: VcpuKvmState = state.clone().into(); 2215 self.set_cpuid2(&state.cpuid)?; 2216 self.set_mp_state(state.mp_state.into())?; 2217 self.set_regs(&state.regs.into())?; 2218 self.set_sregs(&state.sregs.into())?; 2219 self.set_xsave(&state.xsave)?; 2220 self.set_xcrs(&state.xcrs)?; 2221 self.set_lapic(&state.lapic_state)?; 2222 self.set_fpu(&state.fpu)?; 2223 2224 if let Some(freq) = state.tsc_khz { 2225 self.set_tsc_khz(freq)?; 2226 } 2227 2228 // Try to set all MSRs previously stored. 2229 // If the number of MSRs set from SET_MSRS is different from the 2230 // expected amount, we fallback onto a slower method by setting MSRs 2231 // by chunks. This is the only way to make sure we try to set as many 2232 // MSRs as possible, even if some MSRs are not supported. 2233 let expected_num_msrs = state.msrs.len(); 2234 let num_msrs = self.set_msrs(&state.msrs)?; 2235 if num_msrs != expected_num_msrs { 2236 let mut faulty_msr_index = num_msrs; 2237 2238 loop { 2239 warn!( 2240 "Detected faulty MSR 0x{:x} while setting MSRs", 2241 state.msrs[faulty_msr_index].index 2242 ); 2243 2244 // Skip the first bad MSR 2245 let start_pos = faulty_msr_index + 1; 2246 2247 let sub_msr_entries = state.msrs[start_pos..].to_vec(); 2248 2249 let num_msrs = self.set_msrs(&sub_msr_entries)?; 2250 2251 if num_msrs == sub_msr_entries.len() { 2252 break; 2253 } 2254 2255 faulty_msr_index = start_pos + num_msrs; 2256 } 2257 } 2258 2259 self.set_vcpu_events(&state.vcpu_events)?; 2260 2261 Ok(()) 2262 } 2263 2264 /// 2265 /// Restore the previously saved AArch64 CPU state 2266 /// 2267 #[cfg(target_arch = "aarch64")] 2268 fn set_state(&self, state: &CpuState) -> cpu::Result<()> { 2269 let state: VcpuKvmState = state.clone().into(); 2270 // Set core registers 2271 self.set_regs(&state.core_regs.into())?; 2272 // Set system registers 2273 for reg in &state.sys_regs { 2274 self.fd 2275 .lock() 2276 .unwrap() 2277 .set_one_reg(reg.id, ®.addr.to_le_bytes()) 2278 .map_err(|e| cpu::HypervisorCpuError::SetSysRegister(e.into()))?; 2279 } 2280 2281 self.set_mp_state(state.mp_state.into())?; 2282 2283 Ok(()) 2284 } 2285 2286 /// 2287 /// Initialize TDX for this CPU 2288 /// 2289 #[cfg(feature = "tdx")] 2290 fn tdx_init(&self, hob_address: u64) -> cpu::Result<()> { 2291 tdx_command( 2292 &self.fd.lock().unwrap().as_raw_fd(), 2293 TdxCommand::InitVcpu, 2294 0, 2295 hob_address, 2296 ) 2297 .map_err(cpu::HypervisorCpuError::InitializeTdx) 2298 } 2299 2300 /// 2301 /// Set the "immediate_exit" state 2302 /// 2303 fn set_immediate_exit(&self, exit: bool) { 2304 self.fd.lock().unwrap().set_kvm_immediate_exit(exit.into()); 2305 } 2306 2307 /// 2308 /// Returns the details about TDX exit reason 2309 /// 2310 #[cfg(feature = "tdx")] 2311 fn get_tdx_exit_details(&mut self) -> cpu::Result<TdxExitDetails> { 2312 let mut fd = self.fd.as_ref().lock().unwrap(); 2313 let kvm_run = fd.get_kvm_run(); 2314 // SAFETY: accessing a union field in a valid structure 2315 let tdx_vmcall = unsafe { 2316 &mut (*((&mut kvm_run.__bindgen_anon_1) as *mut kvm_run__bindgen_ty_1 2317 as *mut KvmTdxExit)) 2318 .u 2319 .vmcall 2320 }; 2321 2322 tdx_vmcall.status_code = TDG_VP_VMCALL_INVALID_OPERAND; 2323 2324 if tdx_vmcall.type_ != 0 { 2325 return Err(cpu::HypervisorCpuError::UnknownTdxVmCall); 2326 } 2327 2328 match tdx_vmcall.subfunction { 2329 TDG_VP_VMCALL_GET_QUOTE => Ok(TdxExitDetails::GetQuote), 2330 TDG_VP_VMCALL_SETUP_EVENT_NOTIFY_INTERRUPT => { 2331 Ok(TdxExitDetails::SetupEventNotifyInterrupt) 2332 } 2333 _ => Err(cpu::HypervisorCpuError::UnknownTdxVmCall), 2334 } 2335 } 2336 2337 /// 2338 /// Set the status code for TDX exit 2339 /// 2340 #[cfg(feature = "tdx")] 2341 fn set_tdx_status(&mut self, status: TdxExitStatus) { 2342 let mut fd = self.fd.as_ref().lock().unwrap(); 2343 let kvm_run = fd.get_kvm_run(); 2344 // SAFETY: accessing a union field in a valid structure 2345 let tdx_vmcall = unsafe { 2346 &mut (*((&mut kvm_run.__bindgen_anon_1) as *mut kvm_run__bindgen_ty_1 2347 as *mut KvmTdxExit)) 2348 .u 2349 .vmcall 2350 }; 2351 2352 tdx_vmcall.status_code = match status { 2353 TdxExitStatus::Success => TDG_VP_VMCALL_SUCCESS, 2354 TdxExitStatus::InvalidOperand => TDG_VP_VMCALL_INVALID_OPERAND, 2355 }; 2356 } 2357 2358 #[cfg(target_arch = "x86_64")] 2359 /// 2360 /// Return the list of initial MSR entries for a VCPU 2361 /// 2362 fn boot_msr_entries(&self) -> Vec<MsrEntry> { 2363 use crate::arch::x86::{msr_index, MTRR_ENABLE, MTRR_MEM_TYPE_WB}; 2364 2365 [ 2366 msr!(msr_index::MSR_IA32_SYSENTER_CS), 2367 msr!(msr_index::MSR_IA32_SYSENTER_ESP), 2368 msr!(msr_index::MSR_IA32_SYSENTER_EIP), 2369 msr!(msr_index::MSR_STAR), 2370 msr!(msr_index::MSR_CSTAR), 2371 msr!(msr_index::MSR_LSTAR), 2372 msr!(msr_index::MSR_KERNEL_GS_BASE), 2373 msr!(msr_index::MSR_SYSCALL_MASK), 2374 msr!(msr_index::MSR_IA32_TSC), 2375 msr_data!( 2376 msr_index::MSR_IA32_MISC_ENABLE, 2377 msr_index::MSR_IA32_MISC_ENABLE_FAST_STRING as u64 2378 ), 2379 msr_data!(msr_index::MSR_MTRRdefType, MTRR_ENABLE | MTRR_MEM_TYPE_WB), 2380 ] 2381 .to_vec() 2382 } 2383 2384 #[cfg(target_arch = "aarch64")] 2385 fn has_pmu_support(&self) -> bool { 2386 let cpu_attr = kvm_bindings::kvm_device_attr { 2387 group: kvm_bindings::KVM_ARM_VCPU_PMU_V3_CTRL, 2388 attr: u64::from(kvm_bindings::KVM_ARM_VCPU_PMU_V3_INIT), 2389 addr: 0x0, 2390 flags: 0, 2391 }; 2392 self.fd.lock().unwrap().has_device_attr(&cpu_attr).is_ok() 2393 } 2394 2395 #[cfg(target_arch = "aarch64")] 2396 fn init_pmu(&self, irq: u32) -> cpu::Result<()> { 2397 let cpu_attr = kvm_bindings::kvm_device_attr { 2398 group: kvm_bindings::KVM_ARM_VCPU_PMU_V3_CTRL, 2399 attr: u64::from(kvm_bindings::KVM_ARM_VCPU_PMU_V3_INIT), 2400 addr: 0x0, 2401 flags: 0, 2402 }; 2403 let cpu_attr_irq = kvm_bindings::kvm_device_attr { 2404 group: kvm_bindings::KVM_ARM_VCPU_PMU_V3_CTRL, 2405 attr: u64::from(kvm_bindings::KVM_ARM_VCPU_PMU_V3_IRQ), 2406 addr: &irq as *const u32 as u64, 2407 flags: 0, 2408 }; 2409 self.fd 2410 .lock() 2411 .unwrap() 2412 .set_device_attr(&cpu_attr_irq) 2413 .map_err(|_| cpu::HypervisorCpuError::InitializePmu)?; 2414 self.fd 2415 .lock() 2416 .unwrap() 2417 .set_device_attr(&cpu_attr) 2418 .map_err(|_| cpu::HypervisorCpuError::InitializePmu) 2419 } 2420 2421 #[cfg(target_arch = "x86_64")] 2422 /// 2423 /// Get the frequency of the TSC if available 2424 /// 2425 fn tsc_khz(&self) -> cpu::Result<Option<u32>> { 2426 match self.fd.lock().unwrap().get_tsc_khz() { 2427 Err(e) => { 2428 if e.errno() == libc::EIO { 2429 Ok(None) 2430 } else { 2431 Err(cpu::HypervisorCpuError::GetTscKhz(e.into())) 2432 } 2433 } 2434 Ok(v) => Ok(Some(v)), 2435 } 2436 } 2437 2438 #[cfg(target_arch = "x86_64")] 2439 /// 2440 /// Set the frequency of the TSC if available 2441 /// 2442 fn set_tsc_khz(&self, freq: u32) -> cpu::Result<()> { 2443 match self.fd.lock().unwrap().set_tsc_khz(freq) { 2444 Err(e) => { 2445 if e.errno() == libc::EIO { 2446 Ok(()) 2447 } else { 2448 Err(cpu::HypervisorCpuError::SetTscKhz(e.into())) 2449 } 2450 } 2451 Ok(_) => Ok(()), 2452 } 2453 } 2454 2455 #[cfg(target_arch = "x86_64")] 2456 /// 2457 /// Trigger NMI interrupt 2458 /// 2459 fn nmi(&self) -> cpu::Result<()> { 2460 match self.fd.lock().unwrap().nmi() { 2461 Err(e) => { 2462 if e.errno() == libc::EIO { 2463 Ok(()) 2464 } else { 2465 Err(cpu::HypervisorCpuError::Nmi(e.into())) 2466 } 2467 } 2468 Ok(_) => Ok(()), 2469 } 2470 } 2471 } 2472 2473 impl KvmVcpu { 2474 #[cfg(target_arch = "x86_64")] 2475 /// 2476 /// X86 specific call that returns the vcpu's current "xsave struct". 2477 /// 2478 fn get_xsave(&self) -> cpu::Result<XsaveState> { 2479 Ok(self 2480 .fd 2481 .lock() 2482 .unwrap() 2483 .get_xsave() 2484 .map_err(|e| cpu::HypervisorCpuError::GetXsaveState(e.into()))? 2485 .into()) 2486 } 2487 2488 #[cfg(target_arch = "x86_64")] 2489 /// 2490 /// X86 specific call that sets the vcpu's current "xsave struct". 2491 /// 2492 fn set_xsave(&self, xsave: &XsaveState) -> cpu::Result<()> { 2493 let xsave: kvm_bindings::kvm_xsave = (*xsave).clone().into(); 2494 self.fd 2495 .lock() 2496 .unwrap() 2497 .set_xsave(&xsave) 2498 .map_err(|e| cpu::HypervisorCpuError::SetXsaveState(e.into())) 2499 } 2500 2501 #[cfg(target_arch = "x86_64")] 2502 /// 2503 /// X86 specific call that returns the vcpu's current "xcrs". 2504 /// 2505 fn get_xcrs(&self) -> cpu::Result<ExtendedControlRegisters> { 2506 self.fd 2507 .lock() 2508 .unwrap() 2509 .get_xcrs() 2510 .map_err(|e| cpu::HypervisorCpuError::GetXcsr(e.into())) 2511 } 2512 2513 #[cfg(target_arch = "x86_64")] 2514 /// 2515 /// X86 specific call that sets the vcpu's current "xcrs". 2516 /// 2517 fn set_xcrs(&self, xcrs: &ExtendedControlRegisters) -> cpu::Result<()> { 2518 self.fd 2519 .lock() 2520 .unwrap() 2521 .set_xcrs(xcrs) 2522 .map_err(|e| cpu::HypervisorCpuError::SetXcsr(e.into())) 2523 } 2524 2525 #[cfg(target_arch = "x86_64")] 2526 /// 2527 /// Returns currently pending exceptions, interrupts, and NMIs as well as related 2528 /// states of the vcpu. 2529 /// 2530 fn get_vcpu_events(&self) -> cpu::Result<VcpuEvents> { 2531 self.fd 2532 .lock() 2533 .unwrap() 2534 .get_vcpu_events() 2535 .map_err(|e| cpu::HypervisorCpuError::GetVcpuEvents(e.into())) 2536 } 2537 2538 #[cfg(target_arch = "x86_64")] 2539 /// 2540 /// Sets pending exceptions, interrupts, and NMIs as well as related states 2541 /// of the vcpu. 2542 /// 2543 fn set_vcpu_events(&self, events: &VcpuEvents) -> cpu::Result<()> { 2544 self.fd 2545 .lock() 2546 .unwrap() 2547 .set_vcpu_events(events) 2548 .map_err(|e| cpu::HypervisorCpuError::SetVcpuEvents(e.into())) 2549 } 2550 } 2551