1 // Copyright © 2019 Intel Corporation 2 // 3 // SPDX-License-Identifier: Apache-2.0 OR BSD-3-Clause 4 // 5 // Copyright © 2020, Microsoft Corporation 6 // 7 // Copyright 2018-2019 CrowdStrike, Inc. 8 // 9 // 10 11 use std::any::Any; 12 use std::collections::HashMap; 13 #[cfg(target_arch = "x86_64")] 14 use std::fs::File; 15 #[cfg(target_arch = "x86_64")] 16 use std::os::unix::io::AsRawFd; 17 #[cfg(feature = "tdx")] 18 use std::os::unix::io::RawFd; 19 use std::result; 20 #[cfg(target_arch = "x86_64")] 21 use std::sync::atomic::{AtomicBool, Ordering}; 22 use std::sync::{Arc, Mutex, RwLock}; 23 24 use kvm_ioctls::{NoDatamatch, VcpuFd, VmFd}; 25 use vmm_sys_util::eventfd::EventFd; 26 27 #[cfg(target_arch = "aarch64")] 28 use crate::aarch64::gic::KvmGicV3Its; 29 #[cfg(target_arch = "aarch64")] 30 pub use crate::aarch64::{ 31 check_required_kvm_extensions, gic::Gicv3ItsState as GicState, is_system_register, VcpuInit, 32 VcpuKvmState, 33 }; 34 #[cfg(target_arch = "aarch64")] 35 use crate::arch::aarch64::gic::{Vgic, VgicConfig}; 36 use crate::vm::{self, InterruptSourceConfig, VmOps}; 37 #[cfg(target_arch = "aarch64")] 38 use crate::{arm64_core_reg_id, offset_of}; 39 use crate::{cpu, hypervisor, vec_with_array_field, HypervisorType}; 40 // x86_64 dependencies 41 #[cfg(target_arch = "x86_64")] 42 pub mod x86_64; 43 #[cfg(target_arch = "aarch64")] 44 use aarch64::{RegList, Register}; 45 #[cfg(target_arch = "x86_64")] 46 use kvm_bindings::{ 47 kvm_enable_cap, kvm_msr_entry, MsrList, KVM_CAP_HYPERV_SYNIC, KVM_CAP_SPLIT_IRQCHIP, 48 KVM_GUESTDBG_USE_HW_BP, 49 }; 50 #[cfg(target_arch = "x86_64")] 51 use x86_64::check_required_kvm_extensions; 52 #[cfg(target_arch = "x86_64")] 53 pub use x86_64::{CpuId, ExtendedControlRegisters, MsrEntries, VcpuKvmState}; 54 55 #[cfg(target_arch = "x86_64")] 56 use crate::arch::x86::{ 57 CpuIdEntry, FpuState, LapicState, MsrEntry, SpecialRegisters, XsaveState, NUM_IOAPIC_PINS, 58 }; 59 #[cfg(target_arch = "x86_64")] 60 use crate::ClockData; 61 use crate::{ 62 CpuState, IoEventAddress, IrqRoutingEntry, MpState, StandardRegisters, UserMemoryRegion, 63 USER_MEMORY_REGION_LOG_DIRTY, USER_MEMORY_REGION_READ, USER_MEMORY_REGION_WRITE, 64 }; 65 // aarch64 dependencies 66 #[cfg(target_arch = "aarch64")] 67 pub mod aarch64; 68 #[cfg(target_arch = "aarch64")] 69 use std::mem; 70 71 pub use kvm_bindings::{ 72 kvm_clock_data, kvm_create_device, kvm_device_type_KVM_DEV_TYPE_VFIO, kvm_guest_debug, 73 kvm_irq_routing, kvm_irq_routing_entry, kvm_mp_state, kvm_userspace_memory_region, 74 KVM_GUESTDBG_ENABLE, KVM_GUESTDBG_SINGLESTEP, KVM_IRQ_ROUTING_IRQCHIP, KVM_IRQ_ROUTING_MSI, 75 KVM_MEM_LOG_DIRTY_PAGES, KVM_MEM_READONLY, KVM_MSI_VALID_DEVID, 76 }; 77 #[cfg(target_arch = "aarch64")] 78 use kvm_bindings::{ 79 kvm_regs, user_fpsimd_state, user_pt_regs, KVM_GUESTDBG_USE_HW, KVM_NR_SPSR, KVM_REG_ARM64, 80 KVM_REG_ARM64_SYSREG, KVM_REG_ARM64_SYSREG_CRM_MASK, KVM_REG_ARM64_SYSREG_CRN_MASK, 81 KVM_REG_ARM64_SYSREG_OP0_MASK, KVM_REG_ARM64_SYSREG_OP1_MASK, KVM_REG_ARM64_SYSREG_OP2_MASK, 82 KVM_REG_ARM_CORE, KVM_REG_SIZE_U128, KVM_REG_SIZE_U32, KVM_REG_SIZE_U64, 83 }; 84 #[cfg(feature = "tdx")] 85 use kvm_bindings::{kvm_run__bindgen_ty_1, KVMIO}; 86 pub use kvm_ioctls::{Cap, Kvm}; 87 use thiserror::Error; 88 use vfio_ioctls::VfioDeviceFd; 89 #[cfg(feature = "tdx")] 90 use vmm_sys_util::{ioctl::ioctl_with_val, ioctl_ioc_nr, ioctl_iowr_nr}; 91 pub use {kvm_bindings, kvm_ioctls}; 92 /// 93 /// Export generically-named wrappers of kvm-bindings for Unix-based platforms 94 /// 95 pub use { 96 kvm_bindings::kvm_create_device as CreateDevice, kvm_bindings::kvm_device_attr as DeviceAttr, 97 kvm_bindings::kvm_run, kvm_bindings::kvm_vcpu_events as VcpuEvents, kvm_ioctls::VcpuExit, 98 }; 99 100 #[cfg(target_arch = "x86_64")] 101 const KVM_CAP_SGX_ATTRIBUTE: u32 = 196; 102 103 #[cfg(target_arch = "x86_64")] 104 use vmm_sys_util::ioctl_io_nr; 105 #[cfg(all(not(feature = "tdx"), target_arch = "x86_64"))] 106 use vmm_sys_util::ioctl_ioc_nr; 107 108 #[cfg(target_arch = "x86_64")] 109 ioctl_io_nr!(KVM_NMI, kvm_bindings::KVMIO, 0x9a); 110 111 #[cfg(feature = "tdx")] 112 const KVM_EXIT_TDX: u32 = 50; 113 #[cfg(feature = "tdx")] 114 const TDG_VP_VMCALL_GET_QUOTE: u64 = 0x10002; 115 #[cfg(feature = "tdx")] 116 const TDG_VP_VMCALL_SETUP_EVENT_NOTIFY_INTERRUPT: u64 = 0x10004; 117 #[cfg(feature = "tdx")] 118 const TDG_VP_VMCALL_SUCCESS: u64 = 0; 119 #[cfg(feature = "tdx")] 120 const TDG_VP_VMCALL_INVALID_OPERAND: u64 = 0x8000000000000000; 121 122 #[cfg(feature = "tdx")] 123 ioctl_iowr_nr!(KVM_MEMORY_ENCRYPT_OP, KVMIO, 0xba, std::os::raw::c_ulong); 124 125 #[cfg(feature = "tdx")] 126 #[repr(u32)] 127 enum TdxCommand { 128 Capabilities = 0, 129 InitVm, 130 InitVcpu, 131 InitMemRegion, 132 Finalize, 133 } 134 135 #[cfg(feature = "tdx")] 136 pub enum TdxExitDetails { 137 GetQuote, 138 SetupEventNotifyInterrupt, 139 } 140 141 #[cfg(feature = "tdx")] 142 pub enum TdxExitStatus { 143 Success, 144 InvalidOperand, 145 } 146 147 #[cfg(feature = "tdx")] 148 const TDX_MAX_NR_CPUID_CONFIGS: usize = 6; 149 150 #[cfg(feature = "tdx")] 151 #[repr(C)] 152 #[derive(Debug, Default)] 153 pub struct TdxCpuidConfig { 154 pub leaf: u32, 155 pub sub_leaf: u32, 156 pub eax: u32, 157 pub ebx: u32, 158 pub ecx: u32, 159 pub edx: u32, 160 } 161 162 #[cfg(feature = "tdx")] 163 #[repr(C)] 164 #[derive(Debug, Default)] 165 pub struct TdxCapabilities { 166 pub attrs_fixed0: u64, 167 pub attrs_fixed1: u64, 168 pub xfam_fixed0: u64, 169 pub xfam_fixed1: u64, 170 pub nr_cpuid_configs: u32, 171 pub padding: u32, 172 pub cpuid_configs: [TdxCpuidConfig; TDX_MAX_NR_CPUID_CONFIGS], 173 } 174 175 #[cfg(feature = "tdx")] 176 #[derive(Copy, Clone)] 177 pub struct KvmTdxExit { 178 pub type_: u32, 179 pub pad: u32, 180 pub u: KvmTdxExitU, 181 } 182 183 #[cfg(feature = "tdx")] 184 #[repr(C)] 185 #[derive(Copy, Clone)] 186 pub union KvmTdxExitU { 187 pub vmcall: KvmTdxExitVmcall, 188 } 189 190 #[cfg(feature = "tdx")] 191 #[repr(C)] 192 #[derive(Debug, Default, Copy, Clone, PartialEq)] 193 pub struct KvmTdxExitVmcall { 194 pub type_: u64, 195 pub subfunction: u64, 196 pub reg_mask: u64, 197 pub in_r12: u64, 198 pub in_r13: u64, 199 pub in_r14: u64, 200 pub in_r15: u64, 201 pub in_rbx: u64, 202 pub in_rdi: u64, 203 pub in_rsi: u64, 204 pub in_r8: u64, 205 pub in_r9: u64, 206 pub in_rdx: u64, 207 pub status_code: u64, 208 pub out_r11: u64, 209 pub out_r12: u64, 210 pub out_r13: u64, 211 pub out_r14: u64, 212 pub out_r15: u64, 213 pub out_rbx: u64, 214 pub out_rdi: u64, 215 pub out_rsi: u64, 216 pub out_r8: u64, 217 pub out_r9: u64, 218 pub out_rdx: u64, 219 } 220 221 impl From<kvm_userspace_memory_region> for UserMemoryRegion { 222 fn from(region: kvm_userspace_memory_region) -> Self { 223 let mut flags = USER_MEMORY_REGION_READ; 224 if region.flags & KVM_MEM_READONLY == 0 { 225 flags |= USER_MEMORY_REGION_WRITE; 226 } 227 if region.flags & KVM_MEM_LOG_DIRTY_PAGES != 0 { 228 flags |= USER_MEMORY_REGION_LOG_DIRTY; 229 } 230 231 UserMemoryRegion { 232 slot: region.slot, 233 guest_phys_addr: region.guest_phys_addr, 234 memory_size: region.memory_size, 235 userspace_addr: region.userspace_addr, 236 flags, 237 } 238 } 239 } 240 241 impl From<UserMemoryRegion> for kvm_userspace_memory_region { 242 fn from(region: UserMemoryRegion) -> Self { 243 assert!( 244 region.flags & USER_MEMORY_REGION_READ != 0, 245 "KVM mapped memory is always readable" 246 ); 247 248 let mut flags = 0; 249 if region.flags & USER_MEMORY_REGION_WRITE == 0 { 250 flags |= KVM_MEM_READONLY; 251 } 252 if region.flags & USER_MEMORY_REGION_LOG_DIRTY != 0 { 253 flags |= KVM_MEM_LOG_DIRTY_PAGES; 254 } 255 256 kvm_userspace_memory_region { 257 slot: region.slot, 258 guest_phys_addr: region.guest_phys_addr, 259 memory_size: region.memory_size, 260 userspace_addr: region.userspace_addr, 261 flags, 262 } 263 } 264 } 265 266 impl From<kvm_mp_state> for MpState { 267 fn from(s: kvm_mp_state) -> Self { 268 MpState::Kvm(s) 269 } 270 } 271 272 impl From<MpState> for kvm_mp_state { 273 fn from(ms: MpState) -> Self { 274 match ms { 275 MpState::Kvm(s) => s, 276 /* Needed in case other hypervisors are enabled */ 277 #[allow(unreachable_patterns)] 278 _ => panic!("CpuState is not valid"), 279 } 280 } 281 } 282 283 impl From<kvm_ioctls::IoEventAddress> for IoEventAddress { 284 fn from(a: kvm_ioctls::IoEventAddress) -> Self { 285 match a { 286 kvm_ioctls::IoEventAddress::Pio(x) => Self::Pio(x), 287 kvm_ioctls::IoEventAddress::Mmio(x) => Self::Mmio(x), 288 } 289 } 290 } 291 292 impl From<IoEventAddress> for kvm_ioctls::IoEventAddress { 293 fn from(a: IoEventAddress) -> Self { 294 match a { 295 IoEventAddress::Pio(x) => Self::Pio(x), 296 IoEventAddress::Mmio(x) => Self::Mmio(x), 297 } 298 } 299 } 300 301 impl From<VcpuKvmState> for CpuState { 302 fn from(s: VcpuKvmState) -> Self { 303 CpuState::Kvm(s) 304 } 305 } 306 307 impl From<CpuState> for VcpuKvmState { 308 fn from(s: CpuState) -> Self { 309 match s { 310 CpuState::Kvm(s) => s, 311 /* Needed in case other hypervisors are enabled */ 312 #[allow(unreachable_patterns)] 313 _ => panic!("CpuState is not valid"), 314 } 315 } 316 } 317 318 #[cfg(target_arch = "x86_64")] 319 impl From<kvm_clock_data> for ClockData { 320 fn from(d: kvm_clock_data) -> Self { 321 ClockData::Kvm(d) 322 } 323 } 324 325 #[cfg(target_arch = "x86_64")] 326 impl From<ClockData> for kvm_clock_data { 327 fn from(ms: ClockData) -> Self { 328 match ms { 329 ClockData::Kvm(s) => s, 330 /* Needed in case other hypervisors are enabled */ 331 #[allow(unreachable_patterns)] 332 _ => panic!("CpuState is not valid"), 333 } 334 } 335 } 336 337 impl From<kvm_bindings::kvm_regs> for crate::StandardRegisters { 338 fn from(s: kvm_bindings::kvm_regs) -> Self { 339 crate::StandardRegisters::Kvm(s) 340 } 341 } 342 343 impl From<crate::StandardRegisters> for kvm_bindings::kvm_regs { 344 fn from(e: crate::StandardRegisters) -> Self { 345 match e { 346 crate::StandardRegisters::Kvm(e) => e, 347 /* Needed in case other hypervisors are enabled */ 348 #[allow(unreachable_patterns)] 349 _ => panic!("StandardRegisters are not valid"), 350 } 351 } 352 } 353 354 impl From<kvm_irq_routing_entry> for IrqRoutingEntry { 355 fn from(s: kvm_irq_routing_entry) -> Self { 356 IrqRoutingEntry::Kvm(s) 357 } 358 } 359 360 impl From<IrqRoutingEntry> for kvm_irq_routing_entry { 361 fn from(e: IrqRoutingEntry) -> Self { 362 match e { 363 IrqRoutingEntry::Kvm(e) => e, 364 /* Needed in case other hypervisors are enabled */ 365 #[allow(unreachable_patterns)] 366 _ => panic!("IrqRoutingEntry is not valid"), 367 } 368 } 369 } 370 371 struct KvmDirtyLogSlot { 372 slot: u32, 373 guest_phys_addr: u64, 374 memory_size: u64, 375 userspace_addr: u64, 376 } 377 378 /// Wrapper over KVM VM ioctls. 379 pub struct KvmVm { 380 fd: Arc<VmFd>, 381 #[cfg(target_arch = "x86_64")] 382 msrs: Vec<MsrEntry>, 383 dirty_log_slots: Arc<RwLock<HashMap<u32, KvmDirtyLogSlot>>>, 384 } 385 386 impl KvmVm { 387 /// 388 /// Creates an emulated device in the kernel. 389 /// 390 /// See the documentation for `KVM_CREATE_DEVICE`. 391 fn create_device(&self, device: &mut CreateDevice) -> vm::Result<vfio_ioctls::VfioDeviceFd> { 392 let device_fd = self 393 .fd 394 .create_device(device) 395 .map_err(|e| vm::HypervisorVmError::CreateDevice(e.into()))?; 396 Ok(VfioDeviceFd::new_from_kvm(device_fd)) 397 } 398 /// Checks if a particular `Cap` is available. 399 pub fn check_extension(&self, c: Cap) -> bool { 400 self.fd.check_extension(c) 401 } 402 } 403 404 /// Implementation of Vm trait for KVM 405 /// 406 /// # Examples 407 /// 408 /// ``` 409 /// # use hypervisor::kvm::KvmHypervisor; 410 /// # use std::sync::Arc; 411 /// let kvm = KvmHypervisor::new().unwrap(); 412 /// let hypervisor = Arc::new(kvm); 413 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed"); 414 /// ``` 415 impl vm::Vm for KvmVm { 416 #[cfg(target_arch = "x86_64")] 417 /// 418 /// Sets the address of the one-page region in the VM's address space. 419 /// 420 fn set_identity_map_address(&self, address: u64) -> vm::Result<()> { 421 self.fd 422 .set_identity_map_address(address) 423 .map_err(|e| vm::HypervisorVmError::SetIdentityMapAddress(e.into())) 424 } 425 426 #[cfg(target_arch = "x86_64")] 427 /// 428 /// Sets the address of the three-page region in the VM's address space. 429 /// 430 fn set_tss_address(&self, offset: usize) -> vm::Result<()> { 431 self.fd 432 .set_tss_address(offset) 433 .map_err(|e| vm::HypervisorVmError::SetTssAddress(e.into())) 434 } 435 436 /// 437 /// Creates an in-kernel interrupt controller. 438 /// 439 fn create_irq_chip(&self) -> vm::Result<()> { 440 self.fd 441 .create_irq_chip() 442 .map_err(|e| vm::HypervisorVmError::CreateIrq(e.into())) 443 } 444 445 /// 446 /// Registers an event that will, when signaled, trigger the `gsi` IRQ. 447 /// 448 fn register_irqfd(&self, fd: &EventFd, gsi: u32) -> vm::Result<()> { 449 self.fd 450 .register_irqfd(fd, gsi) 451 .map_err(|e| vm::HypervisorVmError::RegisterIrqFd(e.into())) 452 } 453 454 /// 455 /// Unregisters an event that will, when signaled, trigger the `gsi` IRQ. 456 /// 457 fn unregister_irqfd(&self, fd: &EventFd, gsi: u32) -> vm::Result<()> { 458 self.fd 459 .unregister_irqfd(fd, gsi) 460 .map_err(|e| vm::HypervisorVmError::UnregisterIrqFd(e.into())) 461 } 462 463 /// 464 /// Creates a VcpuFd object from a vcpu RawFd. 465 /// 466 fn create_vcpu( 467 &self, 468 id: u8, 469 vm_ops: Option<Arc<dyn VmOps>>, 470 ) -> vm::Result<Arc<dyn cpu::Vcpu>> { 471 let fd = self 472 .fd 473 .create_vcpu(id as u64) 474 .map_err(|e| vm::HypervisorVmError::CreateVcpu(e.into()))?; 475 let vcpu = KvmVcpu { 476 fd: Arc::new(Mutex::new(fd)), 477 #[cfg(target_arch = "x86_64")] 478 msrs: self.msrs.clone(), 479 vm_ops, 480 #[cfg(target_arch = "x86_64")] 481 hyperv_synic: AtomicBool::new(false), 482 }; 483 Ok(Arc::new(vcpu)) 484 } 485 486 #[cfg(target_arch = "aarch64")] 487 /// 488 /// Creates a virtual GIC device. 489 /// 490 fn create_vgic(&self, config: VgicConfig) -> vm::Result<Arc<Mutex<dyn Vgic>>> { 491 let gic_device = KvmGicV3Its::new(self, config) 492 .map_err(|e| vm::HypervisorVmError::CreateVgic(anyhow!("Vgic error {:?}", e)))?; 493 Ok(Arc::new(Mutex::new(gic_device))) 494 } 495 496 /// 497 /// Registers an event to be signaled whenever a certain address is written to. 498 /// 499 fn register_ioevent( 500 &self, 501 fd: &EventFd, 502 addr: &IoEventAddress, 503 datamatch: Option<vm::DataMatch>, 504 ) -> vm::Result<()> { 505 let addr = &kvm_ioctls::IoEventAddress::from(*addr); 506 if let Some(dm) = datamatch { 507 match dm { 508 vm::DataMatch::DataMatch32(kvm_dm32) => self 509 .fd 510 .register_ioevent(fd, addr, kvm_dm32) 511 .map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into())), 512 vm::DataMatch::DataMatch64(kvm_dm64) => self 513 .fd 514 .register_ioevent(fd, addr, kvm_dm64) 515 .map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into())), 516 } 517 } else { 518 self.fd 519 .register_ioevent(fd, addr, NoDatamatch) 520 .map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into())) 521 } 522 } 523 524 /// 525 /// Unregisters an event from a certain address it has been previously registered to. 526 /// 527 fn unregister_ioevent(&self, fd: &EventFd, addr: &IoEventAddress) -> vm::Result<()> { 528 let addr = &kvm_ioctls::IoEventAddress::from(*addr); 529 self.fd 530 .unregister_ioevent(fd, addr, NoDatamatch) 531 .map_err(|e| vm::HypervisorVmError::UnregisterIoEvent(e.into())) 532 } 533 534 /// 535 /// Constructs a routing entry 536 /// 537 fn make_routing_entry(&self, gsi: u32, config: &InterruptSourceConfig) -> IrqRoutingEntry { 538 match &config { 539 InterruptSourceConfig::MsiIrq(cfg) => { 540 let mut kvm_route = kvm_irq_routing_entry { 541 gsi, 542 type_: KVM_IRQ_ROUTING_MSI, 543 ..Default::default() 544 }; 545 546 kvm_route.u.msi.address_lo = cfg.low_addr; 547 kvm_route.u.msi.address_hi = cfg.high_addr; 548 kvm_route.u.msi.data = cfg.data; 549 550 if self.check_extension(crate::kvm::Cap::MsiDevid) { 551 // On AArch64, there is limitation on the range of the 'devid', 552 // it cannot be greater than 65536 (the max of u16). 553 // 554 // BDF cannot be used directly, because 'segment' is in high 555 // 16 bits. The layout of the u32 BDF is: 556 // |---- 16 bits ----|-- 8 bits --|-- 5 bits --|-- 3 bits --| 557 // | segment | bus | device | function | 558 // 559 // Now that we support 1 bus only in a segment, we can build a 560 // 'devid' by replacing the 'bus' bits with the low 8 bits of 561 // 'segment' data. 562 // This way we can resolve the range checking problem and give 563 // different `devid` to all the devices. Limitation is that at 564 // most 256 segments can be supported. 565 // 566 let modified_devid = (cfg.devid & 0x00ff_0000) >> 8 | cfg.devid & 0xff; 567 568 kvm_route.flags = KVM_MSI_VALID_DEVID; 569 kvm_route.u.msi.__bindgen_anon_1.devid = modified_devid; 570 } 571 kvm_route.into() 572 } 573 InterruptSourceConfig::LegacyIrq(cfg) => { 574 let mut kvm_route = kvm_irq_routing_entry { 575 gsi, 576 type_: KVM_IRQ_ROUTING_IRQCHIP, 577 ..Default::default() 578 }; 579 kvm_route.u.irqchip.irqchip = cfg.irqchip; 580 kvm_route.u.irqchip.pin = cfg.pin; 581 582 kvm_route.into() 583 } 584 } 585 } 586 587 /// 588 /// Sets the GSI routing table entries, overwriting any previously set 589 /// entries, as per the `KVM_SET_GSI_ROUTING` ioctl. 590 /// 591 fn set_gsi_routing(&self, entries: &[IrqRoutingEntry]) -> vm::Result<()> { 592 let mut irq_routing = 593 vec_with_array_field::<kvm_irq_routing, kvm_irq_routing_entry>(entries.len()); 594 irq_routing[0].nr = entries.len() as u32; 595 irq_routing[0].flags = 0; 596 let entries: Vec<kvm_irq_routing_entry> = entries 597 .iter() 598 .map(|entry| match entry { 599 IrqRoutingEntry::Kvm(e) => *e, 600 #[allow(unreachable_patterns)] 601 _ => panic!("IrqRoutingEntry type is wrong"), 602 }) 603 .collect(); 604 605 // SAFETY: irq_routing initialized with entries.len() and now it is being turned into 606 // entries_slice with entries.len() again. It is guaranteed to be large enough to hold 607 // everything from entries. 608 unsafe { 609 let entries_slice: &mut [kvm_irq_routing_entry] = 610 irq_routing[0].entries.as_mut_slice(entries.len()); 611 entries_slice.copy_from_slice(&entries); 612 } 613 614 self.fd 615 .set_gsi_routing(&irq_routing[0]) 616 .map_err(|e| vm::HypervisorVmError::SetGsiRouting(e.into())) 617 } 618 619 /// 620 /// Creates a memory region structure that can be used with {create/remove}_user_memory_region 621 /// 622 fn make_user_memory_region( 623 &self, 624 slot: u32, 625 guest_phys_addr: u64, 626 memory_size: u64, 627 userspace_addr: u64, 628 readonly: bool, 629 log_dirty_pages: bool, 630 ) -> UserMemoryRegion { 631 kvm_userspace_memory_region { 632 slot, 633 guest_phys_addr, 634 memory_size, 635 userspace_addr, 636 flags: if readonly { KVM_MEM_READONLY } else { 0 } 637 | if log_dirty_pages { 638 KVM_MEM_LOG_DIRTY_PAGES 639 } else { 640 0 641 }, 642 } 643 .into() 644 } 645 646 /// 647 /// Creates a guest physical memory region. 648 /// 649 fn create_user_memory_region(&self, user_memory_region: UserMemoryRegion) -> vm::Result<()> { 650 let mut region: kvm_userspace_memory_region = user_memory_region.into(); 651 652 if (region.flags & KVM_MEM_LOG_DIRTY_PAGES) != 0 { 653 if (region.flags & KVM_MEM_READONLY) != 0 { 654 return Err(vm::HypervisorVmError::CreateUserMemory(anyhow!( 655 "Error creating regions with both 'dirty-pages-log' and 'read-only'." 656 ))); 657 } 658 659 // Keep track of the regions that need dirty pages log 660 self.dirty_log_slots.write().unwrap().insert( 661 region.slot, 662 KvmDirtyLogSlot { 663 slot: region.slot, 664 guest_phys_addr: region.guest_phys_addr, 665 memory_size: region.memory_size, 666 userspace_addr: region.userspace_addr, 667 }, 668 ); 669 670 // Always create guest physical memory region without `KVM_MEM_LOG_DIRTY_PAGES`. 671 // For regions that need this flag, dirty pages log will be turned on in `start_dirty_log`. 672 region.flags = 0; 673 } 674 675 // SAFETY: Safe because guest regions are guaranteed not to overlap. 676 unsafe { 677 self.fd 678 .set_user_memory_region(region) 679 .map_err(|e| vm::HypervisorVmError::CreateUserMemory(e.into())) 680 } 681 } 682 683 /// 684 /// Removes a guest physical memory region. 685 /// 686 fn remove_user_memory_region(&self, user_memory_region: UserMemoryRegion) -> vm::Result<()> { 687 let mut region: kvm_userspace_memory_region = user_memory_region.into(); 688 689 // Remove the corresponding entry from "self.dirty_log_slots" if needed 690 self.dirty_log_slots.write().unwrap().remove(®ion.slot); 691 692 // Setting the size to 0 means "remove" 693 region.memory_size = 0; 694 // SAFETY: Safe because guest regions are guaranteed not to overlap. 695 unsafe { 696 self.fd 697 .set_user_memory_region(region) 698 .map_err(|e| vm::HypervisorVmError::RemoveUserMemory(e.into())) 699 } 700 } 701 702 /// 703 /// Returns the preferred CPU target type which can be emulated by KVM on underlying host. 704 /// 705 #[cfg(target_arch = "aarch64")] 706 fn get_preferred_target(&self, kvi: &mut VcpuInit) -> vm::Result<()> { 707 self.fd 708 .get_preferred_target(kvi) 709 .map_err(|e| vm::HypervisorVmError::GetPreferredTarget(e.into())) 710 } 711 712 #[cfg(target_arch = "x86_64")] 713 fn enable_split_irq(&self) -> vm::Result<()> { 714 // Create split irqchip 715 // Only the local APIC is emulated in kernel, both PICs and IOAPIC 716 // are not. 717 let mut cap = kvm_enable_cap { 718 cap: KVM_CAP_SPLIT_IRQCHIP, 719 ..Default::default() 720 }; 721 cap.args[0] = NUM_IOAPIC_PINS as u64; 722 self.fd 723 .enable_cap(&cap) 724 .map_err(|e| vm::HypervisorVmError::EnableSplitIrq(e.into()))?; 725 Ok(()) 726 } 727 728 #[cfg(target_arch = "x86_64")] 729 fn enable_sgx_attribute(&self, file: File) -> vm::Result<()> { 730 let mut cap = kvm_enable_cap { 731 cap: KVM_CAP_SGX_ATTRIBUTE, 732 ..Default::default() 733 }; 734 cap.args[0] = file.as_raw_fd() as u64; 735 self.fd 736 .enable_cap(&cap) 737 .map_err(|e| vm::HypervisorVmError::EnableSgxAttribute(e.into()))?; 738 Ok(()) 739 } 740 741 /// Retrieve guest clock. 742 #[cfg(target_arch = "x86_64")] 743 fn get_clock(&self) -> vm::Result<ClockData> { 744 Ok(self 745 .fd 746 .get_clock() 747 .map_err(|e| vm::HypervisorVmError::GetClock(e.into()))? 748 .into()) 749 } 750 751 /// Set guest clock. 752 #[cfg(target_arch = "x86_64")] 753 fn set_clock(&self, data: &ClockData) -> vm::Result<()> { 754 let data = (*data).into(); 755 self.fd 756 .set_clock(&data) 757 .map_err(|e| vm::HypervisorVmError::SetClock(e.into())) 758 } 759 760 /// Create a device that is used for passthrough 761 fn create_passthrough_device(&self) -> vm::Result<VfioDeviceFd> { 762 let mut vfio_dev = kvm_create_device { 763 type_: kvm_device_type_KVM_DEV_TYPE_VFIO, 764 fd: 0, 765 flags: 0, 766 }; 767 768 self.create_device(&mut vfio_dev) 769 .map_err(|e| vm::HypervisorVmError::CreatePassthroughDevice(e.into())) 770 } 771 772 /// 773 /// Start logging dirty pages 774 /// 775 fn start_dirty_log(&self) -> vm::Result<()> { 776 let dirty_log_slots = self.dirty_log_slots.read().unwrap(); 777 for (_, s) in dirty_log_slots.iter() { 778 let region = kvm_userspace_memory_region { 779 slot: s.slot, 780 guest_phys_addr: s.guest_phys_addr, 781 memory_size: s.memory_size, 782 userspace_addr: s.userspace_addr, 783 flags: KVM_MEM_LOG_DIRTY_PAGES, 784 }; 785 // SAFETY: Safe because guest regions are guaranteed not to overlap. 786 unsafe { 787 self.fd 788 .set_user_memory_region(region) 789 .map_err(|e| vm::HypervisorVmError::StartDirtyLog(e.into()))?; 790 } 791 } 792 793 Ok(()) 794 } 795 796 /// 797 /// Stop logging dirty pages 798 /// 799 fn stop_dirty_log(&self) -> vm::Result<()> { 800 let dirty_log_slots = self.dirty_log_slots.read().unwrap(); 801 for (_, s) in dirty_log_slots.iter() { 802 let region = kvm_userspace_memory_region { 803 slot: s.slot, 804 guest_phys_addr: s.guest_phys_addr, 805 memory_size: s.memory_size, 806 userspace_addr: s.userspace_addr, 807 flags: 0, 808 }; 809 // SAFETY: Safe because guest regions are guaranteed not to overlap. 810 unsafe { 811 self.fd 812 .set_user_memory_region(region) 813 .map_err(|e| vm::HypervisorVmError::StartDirtyLog(e.into()))?; 814 } 815 } 816 817 Ok(()) 818 } 819 820 /// 821 /// Get dirty pages bitmap (one bit per page) 822 /// 823 fn get_dirty_log(&self, slot: u32, _base_gpa: u64, memory_size: u64) -> vm::Result<Vec<u64>> { 824 self.fd 825 .get_dirty_log(slot, memory_size as usize) 826 .map_err(|e| vm::HypervisorVmError::GetDirtyLog(e.into())) 827 } 828 829 /// 830 /// Initialize TDX for this VM 831 /// 832 #[cfg(feature = "tdx")] 833 fn tdx_init(&self, cpuid: &[CpuIdEntry], max_vcpus: u32) -> vm::Result<()> { 834 const TDX_ATTR_SEPT_VE_DISABLE: usize = 28; 835 836 let mut cpuid: Vec<kvm_bindings::kvm_cpuid_entry2> = 837 cpuid.iter().map(|e| (*e).into()).collect(); 838 cpuid.resize(256, kvm_bindings::kvm_cpuid_entry2::default()); 839 840 #[repr(C)] 841 struct TdxInitVm { 842 attributes: u64, 843 max_vcpus: u32, 844 padding: u32, 845 mrconfigid: [u64; 6], 846 mrowner: [u64; 6], 847 mrownerconfig: [u64; 6], 848 cpuid_nent: u32, 849 cpuid_padding: u32, 850 cpuid_entries: [kvm_bindings::kvm_cpuid_entry2; 256], 851 } 852 let data = TdxInitVm { 853 attributes: 1 << TDX_ATTR_SEPT_VE_DISABLE, 854 max_vcpus, 855 padding: 0, 856 mrconfigid: [0; 6], 857 mrowner: [0; 6], 858 mrownerconfig: [0; 6], 859 cpuid_nent: cpuid.len() as u32, 860 cpuid_padding: 0, 861 cpuid_entries: cpuid.as_slice().try_into().unwrap(), 862 }; 863 864 tdx_command( 865 &self.fd.as_raw_fd(), 866 TdxCommand::InitVm, 867 0, 868 &data as *const _ as u64, 869 ) 870 .map_err(vm::HypervisorVmError::InitializeTdx) 871 } 872 873 /// 874 /// Finalize the TDX setup for this VM 875 /// 876 #[cfg(feature = "tdx")] 877 fn tdx_finalize(&self) -> vm::Result<()> { 878 tdx_command(&self.fd.as_raw_fd(), TdxCommand::Finalize, 0, 0) 879 .map_err(vm::HypervisorVmError::FinalizeTdx) 880 } 881 882 /// 883 /// Initialize memory regions for the TDX VM 884 /// 885 #[cfg(feature = "tdx")] 886 fn tdx_init_memory_region( 887 &self, 888 host_address: u64, 889 guest_address: u64, 890 size: u64, 891 measure: bool, 892 ) -> vm::Result<()> { 893 #[repr(C)] 894 struct TdxInitMemRegion { 895 host_address: u64, 896 guest_address: u64, 897 pages: u64, 898 } 899 let data = TdxInitMemRegion { 900 host_address, 901 guest_address, 902 pages: size / 4096, 903 }; 904 905 tdx_command( 906 &self.fd.as_raw_fd(), 907 TdxCommand::InitMemRegion, 908 u32::from(measure), 909 &data as *const _ as u64, 910 ) 911 .map_err(vm::HypervisorVmError::InitMemRegionTdx) 912 } 913 914 /// Downcast to the underlying KvmVm type 915 fn as_any(&self) -> &dyn Any { 916 self 917 } 918 } 919 920 #[cfg(feature = "tdx")] 921 fn tdx_command( 922 fd: &RawFd, 923 command: TdxCommand, 924 flags: u32, 925 data: u64, 926 ) -> std::result::Result<(), std::io::Error> { 927 #[repr(C)] 928 struct TdxIoctlCmd { 929 command: TdxCommand, 930 flags: u32, 931 data: u64, 932 error: u64, 933 unused: u64, 934 } 935 let cmd = TdxIoctlCmd { 936 command, 937 flags, 938 data, 939 error: 0, 940 unused: 0, 941 }; 942 // SAFETY: FFI call. All input parameters are valid. 943 let ret = unsafe { 944 ioctl_with_val( 945 fd, 946 KVM_MEMORY_ENCRYPT_OP(), 947 &cmd as *const TdxIoctlCmd as std::os::raw::c_ulong, 948 ) 949 }; 950 951 if ret < 0 { 952 return Err(std::io::Error::last_os_error()); 953 } 954 Ok(()) 955 } 956 957 /// Wrapper over KVM system ioctls. 958 pub struct KvmHypervisor { 959 kvm: Kvm, 960 } 961 962 impl KvmHypervisor { 963 #[cfg(target_arch = "x86_64")] 964 /// 965 /// Retrieve the list of MSRs supported by the hypervisor. 966 /// 967 fn get_msr_list(&self) -> hypervisor::Result<MsrList> { 968 self.kvm 969 .get_msr_index_list() 970 .map_err(|e| hypervisor::HypervisorError::GetMsrList(e.into())) 971 } 972 } 973 974 /// Enum for KVM related error 975 #[derive(Debug, Error)] 976 pub enum KvmError { 977 #[error("Capability missing: {0:?}")] 978 CapabilityMissing(Cap), 979 } 980 981 pub type KvmResult<T> = result::Result<T, KvmError>; 982 983 impl KvmHypervisor { 984 /// Create a hypervisor based on Kvm 985 #[allow(clippy::new_ret_no_self)] 986 pub fn new() -> hypervisor::Result<Arc<dyn hypervisor::Hypervisor>> { 987 let kvm_obj = Kvm::new().map_err(|e| hypervisor::HypervisorError::VmCreate(e.into()))?; 988 let api_version = kvm_obj.get_api_version(); 989 990 if api_version != kvm_bindings::KVM_API_VERSION as i32 { 991 return Err(hypervisor::HypervisorError::IncompatibleApiVersion); 992 } 993 994 Ok(Arc::new(KvmHypervisor { kvm: kvm_obj })) 995 } 996 997 /// Check if the hypervisor is available 998 pub fn is_available() -> hypervisor::Result<bool> { 999 match std::fs::metadata("/dev/kvm") { 1000 Ok(_) => Ok(true), 1001 Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(false), 1002 Err(err) => Err(hypervisor::HypervisorError::HypervisorAvailableCheck( 1003 err.into(), 1004 )), 1005 } 1006 } 1007 } 1008 1009 /// Implementation of Hypervisor trait for KVM 1010 /// 1011 /// # Examples 1012 /// 1013 /// ``` 1014 /// # use hypervisor::kvm::KvmHypervisor; 1015 /// # use std::sync::Arc; 1016 /// let kvm = KvmHypervisor::new().unwrap(); 1017 /// let hypervisor = Arc::new(kvm); 1018 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed"); 1019 /// ``` 1020 impl hypervisor::Hypervisor for KvmHypervisor { 1021 /// 1022 /// Returns the type of the hypervisor 1023 /// 1024 fn hypervisor_type(&self) -> HypervisorType { 1025 HypervisorType::Kvm 1026 } 1027 1028 /// Create a KVM vm object of a specific VM type and return the object as Vm trait object 1029 /// 1030 /// # Examples 1031 /// 1032 /// ``` 1033 /// # use hypervisor::kvm::KvmHypervisor; 1034 /// use hypervisor::kvm::KvmVm; 1035 /// let hypervisor = KvmHypervisor::new().unwrap(); 1036 /// let vm = hypervisor.create_vm_with_type(0).unwrap(); 1037 /// ``` 1038 fn create_vm_with_type(&self, vm_type: u64) -> hypervisor::Result<Arc<dyn vm::Vm>> { 1039 let fd: VmFd; 1040 loop { 1041 match self.kvm.create_vm_with_type(vm_type) { 1042 Ok(res) => fd = res, 1043 Err(e) => { 1044 if e.errno() == libc::EINTR { 1045 // If the error returned is EINTR, which means the 1046 // ioctl has been interrupted, we have to retry as 1047 // this can't be considered as a regular error. 1048 continue; 1049 } else { 1050 return Err(hypervisor::HypervisorError::VmCreate(e.into())); 1051 } 1052 } 1053 } 1054 break; 1055 } 1056 1057 let vm_fd = Arc::new(fd); 1058 1059 #[cfg(target_arch = "x86_64")] 1060 { 1061 let msr_list = self.get_msr_list()?; 1062 let num_msrs = msr_list.as_fam_struct_ref().nmsrs as usize; 1063 let mut msrs: Vec<MsrEntry> = vec![ 1064 MsrEntry { 1065 ..Default::default() 1066 }; 1067 num_msrs 1068 ]; 1069 let indices = msr_list.as_slice(); 1070 for (pos, index) in indices.iter().enumerate() { 1071 msrs[pos].index = *index; 1072 } 1073 1074 Ok(Arc::new(KvmVm { 1075 fd: vm_fd, 1076 msrs, 1077 dirty_log_slots: Arc::new(RwLock::new(HashMap::new())), 1078 })) 1079 } 1080 1081 #[cfg(target_arch = "aarch64")] 1082 { 1083 Ok(Arc::new(KvmVm { 1084 fd: vm_fd, 1085 dirty_log_slots: Arc::new(RwLock::new(HashMap::new())), 1086 })) 1087 } 1088 } 1089 1090 /// Create a KVM vm object and return the object as Vm trait object 1091 /// 1092 /// # Examples 1093 /// 1094 /// ``` 1095 /// # use hypervisor::kvm::KvmHypervisor; 1096 /// use hypervisor::kvm::KvmVm; 1097 /// let hypervisor = KvmHypervisor::new().unwrap(); 1098 /// let vm = hypervisor.create_vm().unwrap(); 1099 /// ``` 1100 fn create_vm(&self) -> hypervisor::Result<Arc<dyn vm::Vm>> { 1101 #[allow(unused_mut)] 1102 let mut vm_type: u64 = 0; // Create with default platform type 1103 1104 // When KVM supports Cap::ArmVmIPASize, it is better to get the IPA 1105 // size from the host and use that when creating the VM, which may 1106 // avoid unnecessary VM creation failures. 1107 #[cfg(target_arch = "aarch64")] 1108 if self.kvm.check_extension(Cap::ArmVmIPASize) { 1109 vm_type = self.kvm.get_host_ipa_limit().try_into().unwrap(); 1110 } 1111 1112 self.create_vm_with_type(vm_type) 1113 } 1114 1115 fn check_required_extensions(&self) -> hypervisor::Result<()> { 1116 check_required_kvm_extensions(&self.kvm) 1117 .map_err(|e| hypervisor::HypervisorError::CheckExtensions(e.into())) 1118 } 1119 1120 #[cfg(target_arch = "x86_64")] 1121 /// 1122 /// X86 specific call to get the system supported CPUID values. 1123 /// 1124 fn get_supported_cpuid(&self) -> hypervisor::Result<Vec<CpuIdEntry>> { 1125 let kvm_cpuid = self 1126 .kvm 1127 .get_supported_cpuid(kvm_bindings::KVM_MAX_CPUID_ENTRIES) 1128 .map_err(|e| hypervisor::HypervisorError::GetCpuId(e.into()))?; 1129 1130 let v = kvm_cpuid.as_slice().iter().map(|e| (*e).into()).collect(); 1131 1132 Ok(v) 1133 } 1134 1135 #[cfg(target_arch = "aarch64")] 1136 /// 1137 /// Retrieve AArch64 host maximum IPA size supported by KVM. 1138 /// 1139 fn get_host_ipa_limit(&self) -> i32 { 1140 self.kvm.get_host_ipa_limit() 1141 } 1142 1143 /// 1144 /// Retrieve TDX capabilities 1145 /// 1146 #[cfg(feature = "tdx")] 1147 fn tdx_capabilities(&self) -> hypervisor::Result<TdxCapabilities> { 1148 let data = TdxCapabilities { 1149 nr_cpuid_configs: TDX_MAX_NR_CPUID_CONFIGS as u32, 1150 ..Default::default() 1151 }; 1152 1153 tdx_command( 1154 &self.kvm.as_raw_fd(), 1155 TdxCommand::Capabilities, 1156 0, 1157 &data as *const _ as u64, 1158 ) 1159 .map_err(|e| hypervisor::HypervisorError::TdxCapabilities(e.into()))?; 1160 1161 Ok(data) 1162 } 1163 1164 /// 1165 /// Get the number of supported hardware breakpoints 1166 /// 1167 fn get_guest_debug_hw_bps(&self) -> usize { 1168 #[cfg(target_arch = "x86_64")] 1169 { 1170 4 1171 } 1172 #[cfg(target_arch = "aarch64")] 1173 { 1174 self.kvm.get_guest_debug_hw_bps() as usize 1175 } 1176 } 1177 1178 /// Get maximum number of vCPUs 1179 fn get_max_vcpus(&self) -> u32 { 1180 self.kvm.get_max_vcpus().min(u32::MAX as usize) as u32 1181 } 1182 } 1183 1184 /// Vcpu struct for KVM 1185 pub struct KvmVcpu { 1186 fd: Arc<Mutex<VcpuFd>>, 1187 #[cfg(target_arch = "x86_64")] 1188 msrs: Vec<MsrEntry>, 1189 vm_ops: Option<Arc<dyn vm::VmOps>>, 1190 #[cfg(target_arch = "x86_64")] 1191 hyperv_synic: AtomicBool, 1192 } 1193 1194 /// Implementation of Vcpu trait for KVM 1195 /// 1196 /// # Examples 1197 /// 1198 /// ``` 1199 /// # use hypervisor::kvm::KvmHypervisor; 1200 /// # use std::sync::Arc; 1201 /// let kvm = KvmHypervisor::new().unwrap(); 1202 /// let hypervisor = Arc::new(kvm); 1203 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed"); 1204 /// let vcpu = vm.create_vcpu(0, None).unwrap(); 1205 /// ``` 1206 impl cpu::Vcpu for KvmVcpu { 1207 /// 1208 /// Returns StandardRegisters with default value set 1209 /// 1210 #[cfg(target_arch = "x86_64")] 1211 fn create_standard_regs(&self) -> StandardRegisters { 1212 kvm_bindings::kvm_regs::default().into() 1213 } 1214 #[cfg(target_arch = "x86_64")] 1215 /// 1216 /// Returns the vCPU general purpose registers. 1217 /// 1218 fn get_regs(&self) -> cpu::Result<StandardRegisters> { 1219 Ok(self 1220 .fd 1221 .lock() 1222 .unwrap() 1223 .get_regs() 1224 .map_err(|e| cpu::HypervisorCpuError::GetStandardRegs(e.into()))? 1225 .into()) 1226 } 1227 1228 /// 1229 /// Returns the vCPU general purpose registers. 1230 /// The `KVM_GET_REGS` ioctl is not available on AArch64, `KVM_GET_ONE_REG` 1231 /// is used to get registers one by one. 1232 /// 1233 #[cfg(target_arch = "aarch64")] 1234 fn get_regs(&self) -> cpu::Result<StandardRegisters> { 1235 let mut state = kvm_regs::default(); 1236 let mut off = offset_of!(user_pt_regs, regs); 1237 // There are 31 user_pt_regs: 1238 // https://elixir.free-electrons.com/linux/v4.14.174/source/arch/arm64/include/uapi/asm/ptrace.h#L72 1239 // These actually are the general-purpose registers of the Armv8-a 1240 // architecture (i.e x0-x30 if used as a 64bit register or w0-30 when used as a 32bit register). 1241 for i in 0..31 { 1242 let mut bytes = [0_u8; 8]; 1243 self.fd 1244 .lock() 1245 .unwrap() 1246 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), &mut bytes) 1247 .map_err(|e| cpu::HypervisorCpuError::GetAarchCoreRegister(e.into()))?; 1248 state.regs.regs[i] = u64::from_le_bytes(bytes); 1249 off += std::mem::size_of::<u64>(); 1250 } 1251 1252 // We are now entering the "Other register" section of the ARMv8-a architecture. 1253 // First one, stack pointer. 1254 let off = offset_of!(user_pt_regs, sp); 1255 let mut bytes = [0_u8; 8]; 1256 self.fd 1257 .lock() 1258 .unwrap() 1259 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), &mut bytes) 1260 .map_err(|e| cpu::HypervisorCpuError::GetAarchCoreRegister(e.into()))?; 1261 state.regs.sp = u64::from_le_bytes(bytes); 1262 1263 // Second one, the program counter. 1264 let off = offset_of!(user_pt_regs, pc); 1265 let mut bytes = [0_u8; 8]; 1266 self.fd 1267 .lock() 1268 .unwrap() 1269 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), &mut bytes) 1270 .map_err(|e| cpu::HypervisorCpuError::GetAarchCoreRegister(e.into()))?; 1271 state.regs.pc = u64::from_le_bytes(bytes); 1272 1273 // Next is the processor state. 1274 let off = offset_of!(user_pt_regs, pstate); 1275 let mut bytes = [0_u8; 8]; 1276 self.fd 1277 .lock() 1278 .unwrap() 1279 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), &mut bytes) 1280 .map_err(|e| cpu::HypervisorCpuError::GetAarchCoreRegister(e.into()))?; 1281 state.regs.pstate = u64::from_le_bytes(bytes); 1282 1283 // The stack pointer associated with EL1 1284 let off = offset_of!(kvm_regs, sp_el1); 1285 let mut bytes = [0_u8; 8]; 1286 self.fd 1287 .lock() 1288 .unwrap() 1289 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), &mut bytes) 1290 .map_err(|e| cpu::HypervisorCpuError::GetAarchCoreRegister(e.into()))?; 1291 state.sp_el1 = u64::from_le_bytes(bytes); 1292 1293 // Exception Link Register for EL1, when taking an exception to EL1, this register 1294 // holds the address to which to return afterwards. 1295 let off = offset_of!(kvm_regs, elr_el1); 1296 let mut bytes = [0_u8; 8]; 1297 self.fd 1298 .lock() 1299 .unwrap() 1300 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), &mut bytes) 1301 .map_err(|e| cpu::HypervisorCpuError::GetAarchCoreRegister(e.into()))?; 1302 state.elr_el1 = u64::from_le_bytes(bytes); 1303 1304 // Saved Program Status Registers, there are 5 of them used in the kernel. 1305 let mut off = offset_of!(kvm_regs, spsr); 1306 for i in 0..KVM_NR_SPSR as usize { 1307 let mut bytes = [0_u8; 8]; 1308 self.fd 1309 .lock() 1310 .unwrap() 1311 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), &mut bytes) 1312 .map_err(|e| cpu::HypervisorCpuError::GetAarchCoreRegister(e.into()))?; 1313 state.spsr[i] = u64::from_le_bytes(bytes); 1314 off += std::mem::size_of::<u64>(); 1315 } 1316 1317 // Now moving on to floating point registers which are stored in the user_fpsimd_state in the kernel: 1318 // https://elixir.free-electrons.com/linux/v4.9.62/source/arch/arm64/include/uapi/asm/kvm.h#L53 1319 let mut off = offset_of!(kvm_regs, fp_regs) + offset_of!(user_fpsimd_state, vregs); 1320 for i in 0..32 { 1321 let mut bytes = [0_u8; 16]; 1322 self.fd 1323 .lock() 1324 .unwrap() 1325 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U128, off), &mut bytes) 1326 .map_err(|e| cpu::HypervisorCpuError::GetAarchCoreRegister(e.into()))?; 1327 state.fp_regs.vregs[i] = u128::from_le_bytes(bytes); 1328 off += mem::size_of::<u128>(); 1329 } 1330 1331 // Floating-point Status Register 1332 let off = offset_of!(kvm_regs, fp_regs) + offset_of!(user_fpsimd_state, fpsr); 1333 let mut bytes = [0_u8; 4]; 1334 self.fd 1335 .lock() 1336 .unwrap() 1337 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U32, off), &mut bytes) 1338 .map_err(|e| cpu::HypervisorCpuError::GetAarchCoreRegister(e.into()))?; 1339 state.fp_regs.fpsr = u32::from_le_bytes(bytes); 1340 1341 // Floating-point Control Register 1342 let off = offset_of!(kvm_regs, fp_regs) + offset_of!(user_fpsimd_state, fpcr); 1343 let mut bytes = [0_u8; 4]; 1344 self.fd 1345 .lock() 1346 .unwrap() 1347 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U32, off), &mut bytes) 1348 .map_err(|e| cpu::HypervisorCpuError::GetAarchCoreRegister(e.into()))?; 1349 state.fp_regs.fpcr = u32::from_le_bytes(bytes); 1350 Ok(state.into()) 1351 } 1352 1353 #[cfg(target_arch = "x86_64")] 1354 /// 1355 /// Sets the vCPU general purpose registers using the `KVM_SET_REGS` ioctl. 1356 /// 1357 fn set_regs(&self, regs: &StandardRegisters) -> cpu::Result<()> { 1358 let regs = (*regs).into(); 1359 self.fd 1360 .lock() 1361 .unwrap() 1362 .set_regs(®s) 1363 .map_err(|e| cpu::HypervisorCpuError::SetStandardRegs(e.into())) 1364 } 1365 1366 /// 1367 /// Sets the vCPU general purpose registers. 1368 /// The `KVM_SET_REGS` ioctl is not available on AArch64, `KVM_SET_ONE_REG` 1369 /// is used to set registers one by one. 1370 /// 1371 #[cfg(target_arch = "aarch64")] 1372 fn set_regs(&self, state: &StandardRegisters) -> cpu::Result<()> { 1373 // The function follows the exact identical order from `state`. Look there 1374 // for some additional info on registers. 1375 let kvm_regs_state: kvm_regs = (*state).into(); 1376 let mut off = offset_of!(user_pt_regs, regs); 1377 for i in 0..31 { 1378 self.fd 1379 .lock() 1380 .unwrap() 1381 .set_one_reg( 1382 arm64_core_reg_id!(KVM_REG_SIZE_U64, off), 1383 &kvm_regs_state.regs.regs[i].to_le_bytes(), 1384 ) 1385 .map_err(|e| cpu::HypervisorCpuError::SetAarchCoreRegister(e.into()))?; 1386 off += std::mem::size_of::<u64>(); 1387 } 1388 1389 let off = offset_of!(user_pt_regs, sp); 1390 self.fd 1391 .lock() 1392 .unwrap() 1393 .set_one_reg( 1394 arm64_core_reg_id!(KVM_REG_SIZE_U64, off), 1395 &kvm_regs_state.regs.sp.to_le_bytes(), 1396 ) 1397 .map_err(|e| cpu::HypervisorCpuError::SetAarchCoreRegister(e.into()))?; 1398 1399 let off = offset_of!(user_pt_regs, pc); 1400 self.fd 1401 .lock() 1402 .unwrap() 1403 .set_one_reg( 1404 arm64_core_reg_id!(KVM_REG_SIZE_U64, off), 1405 &kvm_regs_state.regs.pc.to_le_bytes(), 1406 ) 1407 .map_err(|e| cpu::HypervisorCpuError::SetAarchCoreRegister(e.into()))?; 1408 1409 let off = offset_of!(user_pt_regs, pstate); 1410 self.fd 1411 .lock() 1412 .unwrap() 1413 .set_one_reg( 1414 arm64_core_reg_id!(KVM_REG_SIZE_U64, off), 1415 &kvm_regs_state.regs.pstate.to_le_bytes(), 1416 ) 1417 .map_err(|e| cpu::HypervisorCpuError::SetAarchCoreRegister(e.into()))?; 1418 1419 let off = offset_of!(kvm_regs, sp_el1); 1420 self.fd 1421 .lock() 1422 .unwrap() 1423 .set_one_reg( 1424 arm64_core_reg_id!(KVM_REG_SIZE_U64, off), 1425 &kvm_regs_state.sp_el1.to_le_bytes(), 1426 ) 1427 .map_err(|e| cpu::HypervisorCpuError::SetAarchCoreRegister(e.into()))?; 1428 1429 let off = offset_of!(kvm_regs, elr_el1); 1430 self.fd 1431 .lock() 1432 .unwrap() 1433 .set_one_reg( 1434 arm64_core_reg_id!(KVM_REG_SIZE_U64, off), 1435 &kvm_regs_state.elr_el1.to_le_bytes(), 1436 ) 1437 .map_err(|e| cpu::HypervisorCpuError::SetAarchCoreRegister(e.into()))?; 1438 1439 let mut off = offset_of!(kvm_regs, spsr); 1440 for i in 0..KVM_NR_SPSR as usize { 1441 self.fd 1442 .lock() 1443 .unwrap() 1444 .set_one_reg( 1445 arm64_core_reg_id!(KVM_REG_SIZE_U64, off), 1446 &kvm_regs_state.spsr[i].to_le_bytes(), 1447 ) 1448 .map_err(|e| cpu::HypervisorCpuError::SetAarchCoreRegister(e.into()))?; 1449 off += std::mem::size_of::<u64>(); 1450 } 1451 1452 let mut off = offset_of!(kvm_regs, fp_regs) + offset_of!(user_fpsimd_state, vregs); 1453 for i in 0..32 { 1454 self.fd 1455 .lock() 1456 .unwrap() 1457 .set_one_reg( 1458 arm64_core_reg_id!(KVM_REG_SIZE_U128, off), 1459 &kvm_regs_state.fp_regs.vregs[i].to_le_bytes(), 1460 ) 1461 .map_err(|e| cpu::HypervisorCpuError::SetAarchCoreRegister(e.into()))?; 1462 off += mem::size_of::<u128>(); 1463 } 1464 1465 let off = offset_of!(kvm_regs, fp_regs) + offset_of!(user_fpsimd_state, fpsr); 1466 self.fd 1467 .lock() 1468 .unwrap() 1469 .set_one_reg( 1470 arm64_core_reg_id!(KVM_REG_SIZE_U32, off), 1471 &kvm_regs_state.fp_regs.fpsr.to_le_bytes(), 1472 ) 1473 .map_err(|e| cpu::HypervisorCpuError::SetAarchCoreRegister(e.into()))?; 1474 1475 let off = offset_of!(kvm_regs, fp_regs) + offset_of!(user_fpsimd_state, fpcr); 1476 self.fd 1477 .lock() 1478 .unwrap() 1479 .set_one_reg( 1480 arm64_core_reg_id!(KVM_REG_SIZE_U32, off), 1481 &kvm_regs_state.fp_regs.fpcr.to_le_bytes(), 1482 ) 1483 .map_err(|e| cpu::HypervisorCpuError::SetAarchCoreRegister(e.into()))?; 1484 Ok(()) 1485 } 1486 1487 #[cfg(target_arch = "x86_64")] 1488 /// 1489 /// Returns the vCPU special registers. 1490 /// 1491 fn get_sregs(&self) -> cpu::Result<SpecialRegisters> { 1492 Ok(self 1493 .fd 1494 .lock() 1495 .unwrap() 1496 .get_sregs() 1497 .map_err(|e| cpu::HypervisorCpuError::GetSpecialRegs(e.into()))? 1498 .into()) 1499 } 1500 1501 #[cfg(target_arch = "x86_64")] 1502 /// 1503 /// Sets the vCPU special registers using the `KVM_SET_SREGS` ioctl. 1504 /// 1505 fn set_sregs(&self, sregs: &SpecialRegisters) -> cpu::Result<()> { 1506 let sregs = (*sregs).into(); 1507 self.fd 1508 .lock() 1509 .unwrap() 1510 .set_sregs(&sregs) 1511 .map_err(|e| cpu::HypervisorCpuError::SetSpecialRegs(e.into())) 1512 } 1513 1514 #[cfg(target_arch = "x86_64")] 1515 /// 1516 /// Returns the floating point state (FPU) from the vCPU. 1517 /// 1518 fn get_fpu(&self) -> cpu::Result<FpuState> { 1519 Ok(self 1520 .fd 1521 .lock() 1522 .unwrap() 1523 .get_fpu() 1524 .map_err(|e| cpu::HypervisorCpuError::GetFloatingPointRegs(e.into()))? 1525 .into()) 1526 } 1527 1528 #[cfg(target_arch = "x86_64")] 1529 /// 1530 /// Set the floating point state (FPU) of a vCPU using the `KVM_SET_FPU` ioctl. 1531 /// 1532 fn set_fpu(&self, fpu: &FpuState) -> cpu::Result<()> { 1533 let fpu: kvm_bindings::kvm_fpu = (*fpu).clone().into(); 1534 self.fd 1535 .lock() 1536 .unwrap() 1537 .set_fpu(&fpu) 1538 .map_err(|e| cpu::HypervisorCpuError::SetFloatingPointRegs(e.into())) 1539 } 1540 1541 #[cfg(target_arch = "x86_64")] 1542 /// 1543 /// X86 specific call to setup the CPUID registers. 1544 /// 1545 fn set_cpuid2(&self, cpuid: &[CpuIdEntry]) -> cpu::Result<()> { 1546 let cpuid: Vec<kvm_bindings::kvm_cpuid_entry2> = 1547 cpuid.iter().map(|e| (*e).into()).collect(); 1548 let kvm_cpuid = <CpuId>::from_entries(&cpuid) 1549 .map_err(|_| cpu::HypervisorCpuError::SetCpuid(anyhow!("failed to create CpuId")))?; 1550 1551 self.fd 1552 .lock() 1553 .unwrap() 1554 .set_cpuid2(&kvm_cpuid) 1555 .map_err(|e| cpu::HypervisorCpuError::SetCpuid(e.into())) 1556 } 1557 1558 #[cfg(target_arch = "x86_64")] 1559 /// 1560 /// X86 specific call to enable HyperV SynIC 1561 /// 1562 fn enable_hyperv_synic(&self) -> cpu::Result<()> { 1563 // Update the information about Hyper-V SynIC being enabled and 1564 // emulated as it will influence later which MSRs should be saved. 1565 self.hyperv_synic.store(true, Ordering::Release); 1566 1567 let cap = kvm_enable_cap { 1568 cap: KVM_CAP_HYPERV_SYNIC, 1569 ..Default::default() 1570 }; 1571 self.fd 1572 .lock() 1573 .unwrap() 1574 .enable_cap(&cap) 1575 .map_err(|e| cpu::HypervisorCpuError::EnableHyperVSyncIc(e.into())) 1576 } 1577 1578 /// 1579 /// X86 specific call to retrieve the CPUID registers. 1580 /// 1581 #[cfg(target_arch = "x86_64")] 1582 fn get_cpuid2(&self, num_entries: usize) -> cpu::Result<Vec<CpuIdEntry>> { 1583 let kvm_cpuid = self 1584 .fd 1585 .lock() 1586 .unwrap() 1587 .get_cpuid2(num_entries) 1588 .map_err(|e| cpu::HypervisorCpuError::GetCpuid(e.into()))?; 1589 1590 let v = kvm_cpuid.as_slice().iter().map(|e| (*e).into()).collect(); 1591 1592 Ok(v) 1593 } 1594 1595 #[cfg(target_arch = "x86_64")] 1596 /// 1597 /// Returns the state of the LAPIC (Local Advanced Programmable Interrupt Controller). 1598 /// 1599 fn get_lapic(&self) -> cpu::Result<LapicState> { 1600 Ok(self 1601 .fd 1602 .lock() 1603 .unwrap() 1604 .get_lapic() 1605 .map_err(|e| cpu::HypervisorCpuError::GetlapicState(e.into()))? 1606 .into()) 1607 } 1608 1609 #[cfg(target_arch = "x86_64")] 1610 /// 1611 /// Sets the state of the LAPIC (Local Advanced Programmable Interrupt Controller). 1612 /// 1613 fn set_lapic(&self, klapic: &LapicState) -> cpu::Result<()> { 1614 let klapic: kvm_bindings::kvm_lapic_state = (*klapic).clone().into(); 1615 self.fd 1616 .lock() 1617 .unwrap() 1618 .set_lapic(&klapic) 1619 .map_err(|e| cpu::HypervisorCpuError::SetLapicState(e.into())) 1620 } 1621 1622 #[cfg(target_arch = "x86_64")] 1623 /// 1624 /// Returns the model-specific registers (MSR) for this vCPU. 1625 /// 1626 fn get_msrs(&self, msrs: &mut Vec<MsrEntry>) -> cpu::Result<usize> { 1627 let kvm_msrs: Vec<kvm_msr_entry> = msrs.iter().map(|e| (*e).into()).collect(); 1628 let mut kvm_msrs = MsrEntries::from_entries(&kvm_msrs).unwrap(); 1629 let succ = self 1630 .fd 1631 .lock() 1632 .unwrap() 1633 .get_msrs(&mut kvm_msrs) 1634 .map_err(|e| cpu::HypervisorCpuError::GetMsrEntries(e.into()))?; 1635 1636 msrs[..succ].copy_from_slice( 1637 &kvm_msrs.as_slice()[..succ] 1638 .iter() 1639 .map(|e| (*e).into()) 1640 .collect::<Vec<MsrEntry>>(), 1641 ); 1642 1643 Ok(succ) 1644 } 1645 1646 #[cfg(target_arch = "x86_64")] 1647 /// 1648 /// Setup the model-specific registers (MSR) for this vCPU. 1649 /// Returns the number of MSR entries actually written. 1650 /// 1651 fn set_msrs(&self, msrs: &[MsrEntry]) -> cpu::Result<usize> { 1652 let kvm_msrs: Vec<kvm_msr_entry> = msrs.iter().map(|e| (*e).into()).collect(); 1653 let kvm_msrs = MsrEntries::from_entries(&kvm_msrs).unwrap(); 1654 self.fd 1655 .lock() 1656 .unwrap() 1657 .set_msrs(&kvm_msrs) 1658 .map_err(|e| cpu::HypervisorCpuError::SetMsrEntries(e.into())) 1659 } 1660 1661 /// 1662 /// Returns the vcpu's current "multiprocessing state". 1663 /// 1664 fn get_mp_state(&self) -> cpu::Result<MpState> { 1665 Ok(self 1666 .fd 1667 .lock() 1668 .unwrap() 1669 .get_mp_state() 1670 .map_err(|e| cpu::HypervisorCpuError::GetMpState(e.into()))? 1671 .into()) 1672 } 1673 1674 /// 1675 /// Sets the vcpu's current "multiprocessing state". 1676 /// 1677 fn set_mp_state(&self, mp_state: MpState) -> cpu::Result<()> { 1678 self.fd 1679 .lock() 1680 .unwrap() 1681 .set_mp_state(mp_state.into()) 1682 .map_err(|e| cpu::HypervisorCpuError::SetMpState(e.into())) 1683 } 1684 1685 #[cfg(target_arch = "x86_64")] 1686 /// 1687 /// Translates guest virtual address to guest physical address using the `KVM_TRANSLATE` ioctl. 1688 /// 1689 fn translate_gva(&self, gva: u64, _flags: u64) -> cpu::Result<(u64, u32)> { 1690 let tr = self 1691 .fd 1692 .lock() 1693 .unwrap() 1694 .translate_gva(gva) 1695 .map_err(|e| cpu::HypervisorCpuError::TranslateVirtualAddress(e.into()))?; 1696 // tr.valid is set if the GVA is mapped to valid GPA. 1697 match tr.valid { 1698 0 => Err(cpu::HypervisorCpuError::TranslateVirtualAddress(anyhow!( 1699 "Invalid GVA: {:#x}", 1700 gva 1701 ))), 1702 _ => Ok((tr.physical_address, 0)), 1703 } 1704 } 1705 1706 /// 1707 /// Triggers the running of the current virtual CPU returning an exit reason. 1708 /// 1709 fn run(&self) -> std::result::Result<cpu::VmExit, cpu::HypervisorCpuError> { 1710 match self.fd.lock().unwrap().run() { 1711 Ok(run) => match run { 1712 #[cfg(target_arch = "x86_64")] 1713 VcpuExit::IoIn(addr, data) => { 1714 if let Some(vm_ops) = &self.vm_ops { 1715 return vm_ops 1716 .pio_read(addr.into(), data) 1717 .map(|_| cpu::VmExit::Ignore) 1718 .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into())); 1719 } 1720 1721 Ok(cpu::VmExit::Ignore) 1722 } 1723 #[cfg(target_arch = "x86_64")] 1724 VcpuExit::IoOut(addr, data) => { 1725 if let Some(vm_ops) = &self.vm_ops { 1726 return vm_ops 1727 .pio_write(addr.into(), data) 1728 .map(|_| cpu::VmExit::Ignore) 1729 .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into())); 1730 } 1731 1732 Ok(cpu::VmExit::Ignore) 1733 } 1734 #[cfg(target_arch = "x86_64")] 1735 VcpuExit::IoapicEoi(vector) => Ok(cpu::VmExit::IoapicEoi(vector)), 1736 #[cfg(target_arch = "x86_64")] 1737 VcpuExit::Shutdown | VcpuExit::Hlt => Ok(cpu::VmExit::Reset), 1738 1739 #[cfg(target_arch = "aarch64")] 1740 VcpuExit::SystemEvent(event_type, flags) => { 1741 use kvm_bindings::{KVM_SYSTEM_EVENT_RESET, KVM_SYSTEM_EVENT_SHUTDOWN}; 1742 // On Aarch64, when the VM is shutdown, run() returns 1743 // VcpuExit::SystemEvent with reason KVM_SYSTEM_EVENT_SHUTDOWN 1744 if event_type == KVM_SYSTEM_EVENT_RESET { 1745 Ok(cpu::VmExit::Reset) 1746 } else if event_type == KVM_SYSTEM_EVENT_SHUTDOWN { 1747 Ok(cpu::VmExit::Shutdown) 1748 } else { 1749 Err(cpu::HypervisorCpuError::RunVcpu(anyhow!( 1750 "Unexpected system event with type 0x{:x}, flags 0x{:x?}", 1751 event_type, 1752 flags 1753 ))) 1754 } 1755 } 1756 1757 VcpuExit::MmioRead(addr, data) => { 1758 if let Some(vm_ops) = &self.vm_ops { 1759 return vm_ops 1760 .mmio_read(addr, data) 1761 .map(|_| cpu::VmExit::Ignore) 1762 .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into())); 1763 } 1764 1765 Ok(cpu::VmExit::Ignore) 1766 } 1767 VcpuExit::MmioWrite(addr, data) => { 1768 if let Some(vm_ops) = &self.vm_ops { 1769 return vm_ops 1770 .mmio_write(addr, data) 1771 .map(|_| cpu::VmExit::Ignore) 1772 .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into())); 1773 } 1774 1775 Ok(cpu::VmExit::Ignore) 1776 } 1777 VcpuExit::Hyperv => Ok(cpu::VmExit::Hyperv), 1778 #[cfg(feature = "tdx")] 1779 VcpuExit::Unsupported(KVM_EXIT_TDX) => Ok(cpu::VmExit::Tdx), 1780 VcpuExit::Debug(_) => Ok(cpu::VmExit::Debug), 1781 1782 r => Err(cpu::HypervisorCpuError::RunVcpu(anyhow!( 1783 "Unexpected exit reason on vcpu run: {:?}", 1784 r 1785 ))), 1786 }, 1787 1788 Err(ref e) => match e.errno() { 1789 libc::EAGAIN | libc::EINTR => Ok(cpu::VmExit::Ignore), 1790 _ => Err(cpu::HypervisorCpuError::RunVcpu(anyhow!( 1791 "VCPU error {:?}", 1792 e 1793 ))), 1794 }, 1795 } 1796 } 1797 1798 #[cfg(target_arch = "x86_64")] 1799 /// 1800 /// Let the guest know that it has been paused, which prevents from 1801 /// potential soft lockups when being resumed. 1802 /// 1803 fn notify_guest_clock_paused(&self) -> cpu::Result<()> { 1804 if let Err(e) = self.fd.lock().unwrap().kvmclock_ctrl() { 1805 // Linux kernel returns -EINVAL if the PV clock isn't yet initialised 1806 // which could be because we're still in firmware or the guest doesn't 1807 // use KVM clock. 1808 if e.errno() != libc::EINVAL { 1809 return Err(cpu::HypervisorCpuError::NotifyGuestClockPaused(e.into())); 1810 } 1811 } 1812 1813 Ok(()) 1814 } 1815 1816 /// 1817 /// Sets debug registers to set hardware breakpoints and/or enable single step. 1818 /// 1819 fn set_guest_debug( 1820 &self, 1821 addrs: &[vm_memory::GuestAddress], 1822 singlestep: bool, 1823 ) -> cpu::Result<()> { 1824 let mut dbg = kvm_guest_debug { 1825 #[cfg(target_arch = "x86_64")] 1826 control: KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP, 1827 #[cfg(target_arch = "aarch64")] 1828 control: KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW, 1829 ..Default::default() 1830 }; 1831 if singlestep { 1832 dbg.control |= KVM_GUESTDBG_SINGLESTEP; 1833 } 1834 1835 // Set the debug registers. 1836 // Here we assume that the number of addresses do not exceed what 1837 // `Hypervisor::get_guest_debug_hw_bps()` specifies. 1838 #[cfg(target_arch = "x86_64")] 1839 { 1840 // Set bits 9 and 10. 1841 // bit 9: GE (global exact breakpoint enable) flag. 1842 // bit 10: always 1. 1843 dbg.arch.debugreg[7] = 0x0600; 1844 1845 for (i, addr) in addrs.iter().enumerate() { 1846 dbg.arch.debugreg[i] = addr.0; 1847 // Set global breakpoint enable flag 1848 dbg.arch.debugreg[7] |= 2 << (i * 2); 1849 } 1850 } 1851 #[cfg(target_arch = "aarch64")] 1852 { 1853 for (i, addr) in addrs.iter().enumerate() { 1854 // DBGBCR_EL1 (Debug Breakpoint Control Registers, D13.3.2): 1855 // bit 0: 1 (Enabled) 1856 // bit 1~2: 0b11 (PMC = EL1/EL0) 1857 // bit 5~8: 0b1111 (BAS = AArch64) 1858 // others: 0 1859 dbg.arch.dbg_bcr[i] = 0b1u64 | 0b110u64 | 0b1_1110_0000u64; 1860 // DBGBVR_EL1 (Debug Breakpoint Value Registers, D13.3.3): 1861 // bit 2~52: VA[2:52] 1862 dbg.arch.dbg_bvr[i] = (!0u64 >> 11) & addr.0; 1863 } 1864 } 1865 self.fd 1866 .lock() 1867 .unwrap() 1868 .set_guest_debug(&dbg) 1869 .map_err(|e| cpu::HypervisorCpuError::SetDebugRegs(e.into())) 1870 } 1871 1872 #[cfg(target_arch = "aarch64")] 1873 fn vcpu_init(&self, kvi: &VcpuInit) -> cpu::Result<()> { 1874 self.fd 1875 .lock() 1876 .unwrap() 1877 .vcpu_init(kvi) 1878 .map_err(|e| cpu::HypervisorCpuError::VcpuInit(e.into())) 1879 } 1880 1881 #[cfg(target_arch = "aarch64")] 1882 fn vcpu_finalize(&self, feature: i32) -> cpu::Result<()> { 1883 self.fd 1884 .lock() 1885 .unwrap() 1886 .vcpu_finalize(&feature) 1887 .map_err(|e| cpu::HypervisorCpuError::VcpuFinalize(e.into())) 1888 } 1889 1890 /// 1891 /// Gets a list of the guest registers that are supported for the 1892 /// KVM_GET_ONE_REG/KVM_SET_ONE_REG calls. 1893 /// 1894 #[cfg(target_arch = "aarch64")] 1895 fn get_reg_list(&self, reg_list: &mut RegList) -> cpu::Result<()> { 1896 self.fd 1897 .lock() 1898 .unwrap() 1899 .get_reg_list(reg_list) 1900 .map_err(|e| cpu::HypervisorCpuError::GetRegList(e.into())) 1901 } 1902 1903 /// 1904 /// Gets the value of a system register 1905 /// 1906 #[cfg(target_arch = "aarch64")] 1907 fn get_sys_reg(&self, sys_reg: u32) -> cpu::Result<u64> { 1908 // 1909 // Arm Architecture Reference Manual defines the encoding of 1910 // AArch64 system registers, see 1911 // https://developer.arm.com/documentation/ddi0487 (chapter D12). 1912 // While KVM defines another ID for each AArch64 system register, 1913 // which is used in calling `KVM_G/SET_ONE_REG` to access a system 1914 // register of a guest. 1915 // A mapping exists between the Arm standard encoding and the KVM ID. 1916 // This function takes the standard u32 ID as input parameter, converts 1917 // it to the corresponding KVM ID, and call `KVM_GET_ONE_REG` API to 1918 // get the value of the system parameter. 1919 // 1920 let id: u64 = KVM_REG_ARM64 1921 | KVM_REG_SIZE_U64 1922 | KVM_REG_ARM64_SYSREG as u64 1923 | ((((sys_reg) >> 5) 1924 & (KVM_REG_ARM64_SYSREG_OP0_MASK 1925 | KVM_REG_ARM64_SYSREG_OP1_MASK 1926 | KVM_REG_ARM64_SYSREG_CRN_MASK 1927 | KVM_REG_ARM64_SYSREG_CRM_MASK 1928 | KVM_REG_ARM64_SYSREG_OP2_MASK)) as u64); 1929 let mut bytes = [0_u8; 8]; 1930 self.fd 1931 .lock() 1932 .unwrap() 1933 .get_one_reg(id, &mut bytes) 1934 .map_err(|e| cpu::HypervisorCpuError::GetSysRegister(e.into()))?; 1935 Ok(u64::from_le_bytes(bytes)) 1936 } 1937 1938 /// 1939 /// Configure core registers for a given CPU. 1940 /// 1941 #[cfg(target_arch = "aarch64")] 1942 fn setup_regs(&self, cpu_id: u8, boot_ip: u64, fdt_start: u64) -> cpu::Result<()> { 1943 #[allow(non_upper_case_globals)] 1944 // PSR (Processor State Register) bits. 1945 // Taken from arch/arm64/include/uapi/asm/ptrace.h. 1946 const PSR_MODE_EL1h: u64 = 0x0000_0005; 1947 const PSR_F_BIT: u64 = 0x0000_0040; 1948 const PSR_I_BIT: u64 = 0x0000_0080; 1949 const PSR_A_BIT: u64 = 0x0000_0100; 1950 const PSR_D_BIT: u64 = 0x0000_0200; 1951 // Taken from arch/arm64/kvm/inject_fault.c. 1952 const PSTATE_FAULT_BITS_64: u64 = 1953 PSR_MODE_EL1h | PSR_A_BIT | PSR_F_BIT | PSR_I_BIT | PSR_D_BIT; 1954 1955 let kreg_off = offset_of!(kvm_regs, regs); 1956 1957 // Get the register index of the PSTATE (Processor State) register. 1958 let pstate = offset_of!(user_pt_regs, pstate) + kreg_off; 1959 self.fd 1960 .lock() 1961 .unwrap() 1962 .set_one_reg( 1963 arm64_core_reg_id!(KVM_REG_SIZE_U64, pstate), 1964 &PSTATE_FAULT_BITS_64.to_le_bytes(), 1965 ) 1966 .map_err(|e| cpu::HypervisorCpuError::SetAarchCoreRegister(e.into()))?; 1967 1968 // Other vCPUs are powered off initially awaiting PSCI wakeup. 1969 if cpu_id == 0 { 1970 // Setting the PC (Processor Counter) to the current program address (kernel address). 1971 let pc = offset_of!(user_pt_regs, pc) + kreg_off; 1972 self.fd 1973 .lock() 1974 .unwrap() 1975 .set_one_reg( 1976 arm64_core_reg_id!(KVM_REG_SIZE_U64, pc), 1977 &boot_ip.to_le_bytes(), 1978 ) 1979 .map_err(|e| cpu::HypervisorCpuError::SetAarchCoreRegister(e.into()))?; 1980 1981 // Last mandatory thing to set -> the address pointing to the FDT (also called DTB). 1982 // "The device tree blob (dtb) must be placed on an 8-byte boundary and must 1983 // not exceed 2 megabytes in size." -> https://www.kernel.org/doc/Documentation/arm64/booting.txt. 1984 // We are choosing to place it the end of DRAM. See `get_fdt_addr`. 1985 let regs0 = offset_of!(user_pt_regs, regs) + kreg_off; 1986 self.fd 1987 .lock() 1988 .unwrap() 1989 .set_one_reg( 1990 arm64_core_reg_id!(KVM_REG_SIZE_U64, regs0), 1991 &fdt_start.to_le_bytes(), 1992 ) 1993 .map_err(|e| cpu::HypervisorCpuError::SetAarchCoreRegister(e.into()))?; 1994 } 1995 Ok(()) 1996 } 1997 1998 #[cfg(target_arch = "x86_64")] 1999 /// 2000 /// Get the current CPU state 2001 /// 2002 /// Ordering requirements: 2003 /// 2004 /// KVM_GET_MP_STATE calls kvm_apic_accept_events(), which might modify 2005 /// vCPU/LAPIC state. As such, it must be done before most everything 2006 /// else, otherwise we cannot restore everything and expect it to work. 2007 /// 2008 /// KVM_GET_VCPU_EVENTS/KVM_SET_VCPU_EVENTS is unsafe if other vCPUs are 2009 /// still running. 2010 /// 2011 /// KVM_GET_LAPIC may change state of LAPIC before returning it. 2012 /// 2013 /// GET_VCPU_EVENTS should probably be last to save. The code looks as 2014 /// it might as well be affected by internal state modifications of the 2015 /// GET ioctls. 2016 /// 2017 /// SREGS saves/restores a pending interrupt, similar to what 2018 /// VCPU_EVENTS also does. 2019 /// 2020 /// GET_MSRS requires a prepopulated data structure to do something 2021 /// meaningful. For SET_MSRS it will then contain good data. 2022 /// 2023 /// # Example 2024 /// 2025 /// ```rust 2026 /// # use hypervisor::kvm::KvmHypervisor; 2027 /// # use std::sync::Arc; 2028 /// let kvm = KvmHypervisor::new().unwrap(); 2029 /// let hv = Arc::new(kvm); 2030 /// let vm = hv.create_vm().expect("new VM fd creation failed"); 2031 /// vm.enable_split_irq().unwrap(); 2032 /// let vcpu = vm.create_vcpu(0, None).unwrap(); 2033 /// let state = vcpu.state().unwrap(); 2034 /// ``` 2035 fn state(&self) -> cpu::Result<CpuState> { 2036 let cpuid = self.get_cpuid2(kvm_bindings::KVM_MAX_CPUID_ENTRIES)?; 2037 let mp_state = self.get_mp_state()?.into(); 2038 let regs = self.get_regs()?; 2039 let sregs = self.get_sregs()?; 2040 let xsave = self.get_xsave()?; 2041 let xcrs = self.get_xcrs()?; 2042 let lapic_state = self.get_lapic()?; 2043 let fpu = self.get_fpu()?; 2044 2045 // Try to get all MSRs based on the list previously retrieved from KVM. 2046 // If the number of MSRs obtained from GET_MSRS is different from the 2047 // expected amount, we fallback onto a slower method by getting MSRs 2048 // by chunks. This is the only way to make sure we try to get as many 2049 // MSRs as possible, even if some MSRs are not supported. 2050 let mut msr_entries = self.msrs.clone(); 2051 2052 // Save extra MSRs if the Hyper-V synthetic interrupt controller is 2053 // emulated. 2054 if self.hyperv_synic.load(Ordering::Acquire) { 2055 let hyperv_synic_msrs = vec![ 2056 0x40000020, 0x40000021, 0x40000080, 0x40000081, 0x40000082, 0x40000083, 0x40000084, 2057 0x40000090, 0x40000091, 0x40000092, 0x40000093, 0x40000094, 0x40000095, 0x40000096, 2058 0x40000097, 0x40000098, 0x40000099, 0x4000009a, 0x4000009b, 0x4000009c, 0x4000009d, 2059 0x4000009e, 0x4000009f, 0x400000b0, 0x400000b1, 0x400000b2, 0x400000b3, 0x400000b4, 2060 0x400000b5, 0x400000b6, 0x400000b7, 2061 ]; 2062 for index in hyperv_synic_msrs { 2063 let msr = kvm_msr_entry { 2064 index, 2065 ..Default::default() 2066 }; 2067 msr_entries.push(msr.into()); 2068 } 2069 } 2070 2071 let expected_num_msrs = msr_entries.len(); 2072 let num_msrs = self.get_msrs(&mut msr_entries)?; 2073 let msrs = if num_msrs != expected_num_msrs { 2074 let mut faulty_msr_index = num_msrs; 2075 let mut msr_entries_tmp = msr_entries[..faulty_msr_index].to_vec(); 2076 2077 loop { 2078 warn!( 2079 "Detected faulty MSR 0x{:x} while getting MSRs", 2080 msr_entries[faulty_msr_index].index 2081 ); 2082 2083 // Skip the first bad MSR 2084 let start_pos = faulty_msr_index + 1; 2085 2086 let mut sub_msr_entries = msr_entries[start_pos..].to_vec(); 2087 let num_msrs = self.get_msrs(&mut sub_msr_entries)?; 2088 2089 msr_entries_tmp.extend(&sub_msr_entries[..num_msrs]); 2090 2091 if num_msrs == sub_msr_entries.len() { 2092 break; 2093 } 2094 2095 faulty_msr_index = start_pos + num_msrs; 2096 } 2097 2098 msr_entries_tmp 2099 } else { 2100 msr_entries 2101 }; 2102 2103 let vcpu_events = self.get_vcpu_events()?; 2104 let tsc_khz = self.tsc_khz()?; 2105 2106 Ok(VcpuKvmState { 2107 cpuid, 2108 msrs, 2109 vcpu_events, 2110 regs: regs.into(), 2111 sregs: sregs.into(), 2112 fpu, 2113 lapic_state, 2114 xsave, 2115 xcrs, 2116 mp_state, 2117 tsc_khz, 2118 } 2119 .into()) 2120 } 2121 2122 /// 2123 /// Get the current AArch64 CPU state 2124 /// 2125 #[cfg(target_arch = "aarch64")] 2126 fn state(&self) -> cpu::Result<CpuState> { 2127 let mut state = VcpuKvmState { 2128 mp_state: self.get_mp_state()?.into(), 2129 ..Default::default() 2130 }; 2131 // Get core registers 2132 state.core_regs = self.get_regs()?.into(); 2133 2134 // Get systerm register 2135 // Call KVM_GET_REG_LIST to get all registers available to the guest. 2136 // For ArmV8 there are around 500 registers. 2137 let mut sys_regs: Vec<Register> = Vec::new(); 2138 let mut reg_list = RegList::new(500).unwrap(); 2139 self.fd 2140 .lock() 2141 .unwrap() 2142 .get_reg_list(&mut reg_list) 2143 .map_err(|e| cpu::HypervisorCpuError::GetRegList(e.into()))?; 2144 2145 // At this point reg_list should contain: core registers and system 2146 // registers. 2147 // The register list contains the number of registers and their ids. We 2148 // will be needing to call KVM_GET_ONE_REG on each id in order to save 2149 // all of them. We carve out from the list the core registers which are 2150 // represented in the kernel by kvm_regs structure and for which we can 2151 // calculate the id based on the offset in the structure. 2152 reg_list.retain(|regid| is_system_register(*regid)); 2153 2154 // Now, for the rest of the registers left in the previously fetched 2155 // register list, we are simply calling KVM_GET_ONE_REG. 2156 let indices = reg_list.as_slice(); 2157 for index in indices.iter() { 2158 let mut bytes = [0_u8; 8]; 2159 self.fd 2160 .lock() 2161 .unwrap() 2162 .get_one_reg(*index, &mut bytes) 2163 .map_err(|e| cpu::HypervisorCpuError::GetSysRegister(e.into()))?; 2164 sys_regs.push(kvm_bindings::kvm_one_reg { 2165 id: *index, 2166 addr: u64::from_le_bytes(bytes), 2167 }); 2168 } 2169 2170 state.sys_regs = sys_regs; 2171 2172 Ok(state.into()) 2173 } 2174 2175 #[cfg(target_arch = "x86_64")] 2176 /// 2177 /// Restore the previously saved CPU state 2178 /// 2179 /// Ordering requirements: 2180 /// 2181 /// KVM_GET_VCPU_EVENTS/KVM_SET_VCPU_EVENTS is unsafe if other vCPUs are 2182 /// still running. 2183 /// 2184 /// Some SET ioctls (like set_mp_state) depend on kvm_vcpu_is_bsp(), so 2185 /// if we ever change the BSP, we have to do that before restoring anything. 2186 /// The same seems to be true for CPUID stuff. 2187 /// 2188 /// SREGS saves/restores a pending interrupt, similar to what 2189 /// VCPU_EVENTS also does. 2190 /// 2191 /// SET_REGS clears pending exceptions unconditionally, thus, it must be 2192 /// done before SET_VCPU_EVENTS, which restores it. 2193 /// 2194 /// SET_LAPIC must come after SET_SREGS, because the latter restores 2195 /// the apic base msr. 2196 /// 2197 /// SET_LAPIC must come before SET_MSRS, because the TSC deadline MSR 2198 /// only restores successfully, when the LAPIC is correctly configured. 2199 /// 2200 /// Arguments: CpuState 2201 /// # Example 2202 /// 2203 /// ```rust 2204 /// # use hypervisor::kvm::KvmHypervisor; 2205 /// # use std::sync::Arc; 2206 /// let kvm = KvmHypervisor::new().unwrap(); 2207 /// let hv = Arc::new(kvm); 2208 /// let vm = hv.create_vm().expect("new VM fd creation failed"); 2209 /// vm.enable_split_irq().unwrap(); 2210 /// let vcpu = vm.create_vcpu(0, None).unwrap(); 2211 /// let state = vcpu.state().unwrap(); 2212 /// vcpu.set_state(&state).unwrap(); 2213 /// ``` 2214 fn set_state(&self, state: &CpuState) -> cpu::Result<()> { 2215 let state: VcpuKvmState = state.clone().into(); 2216 self.set_cpuid2(&state.cpuid)?; 2217 self.set_mp_state(state.mp_state.into())?; 2218 self.set_regs(&state.regs.into())?; 2219 self.set_sregs(&state.sregs.into())?; 2220 self.set_xsave(&state.xsave)?; 2221 self.set_xcrs(&state.xcrs)?; 2222 self.set_lapic(&state.lapic_state)?; 2223 self.set_fpu(&state.fpu)?; 2224 2225 if let Some(freq) = state.tsc_khz { 2226 self.set_tsc_khz(freq)?; 2227 } 2228 2229 // Try to set all MSRs previously stored. 2230 // If the number of MSRs set from SET_MSRS is different from the 2231 // expected amount, we fallback onto a slower method by setting MSRs 2232 // by chunks. This is the only way to make sure we try to set as many 2233 // MSRs as possible, even if some MSRs are not supported. 2234 let expected_num_msrs = state.msrs.len(); 2235 let num_msrs = self.set_msrs(&state.msrs)?; 2236 if num_msrs != expected_num_msrs { 2237 let mut faulty_msr_index = num_msrs; 2238 2239 loop { 2240 warn!( 2241 "Detected faulty MSR 0x{:x} while setting MSRs", 2242 state.msrs[faulty_msr_index].index 2243 ); 2244 2245 // Skip the first bad MSR 2246 let start_pos = faulty_msr_index + 1; 2247 2248 let sub_msr_entries = state.msrs[start_pos..].to_vec(); 2249 2250 let num_msrs = self.set_msrs(&sub_msr_entries)?; 2251 2252 if num_msrs == sub_msr_entries.len() { 2253 break; 2254 } 2255 2256 faulty_msr_index = start_pos + num_msrs; 2257 } 2258 } 2259 2260 self.set_vcpu_events(&state.vcpu_events)?; 2261 2262 Ok(()) 2263 } 2264 2265 /// 2266 /// Restore the previously saved AArch64 CPU state 2267 /// 2268 #[cfg(target_arch = "aarch64")] 2269 fn set_state(&self, state: &CpuState) -> cpu::Result<()> { 2270 let state: VcpuKvmState = state.clone().into(); 2271 // Set core registers 2272 self.set_regs(&state.core_regs.into())?; 2273 // Set system registers 2274 for reg in &state.sys_regs { 2275 self.fd 2276 .lock() 2277 .unwrap() 2278 .set_one_reg(reg.id, ®.addr.to_le_bytes()) 2279 .map_err(|e| cpu::HypervisorCpuError::SetSysRegister(e.into()))?; 2280 } 2281 2282 self.set_mp_state(state.mp_state.into())?; 2283 2284 Ok(()) 2285 } 2286 2287 /// 2288 /// Initialize TDX for this CPU 2289 /// 2290 #[cfg(feature = "tdx")] 2291 fn tdx_init(&self, hob_address: u64) -> cpu::Result<()> { 2292 tdx_command( 2293 &self.fd.lock().unwrap().as_raw_fd(), 2294 TdxCommand::InitVcpu, 2295 0, 2296 hob_address, 2297 ) 2298 .map_err(cpu::HypervisorCpuError::InitializeTdx) 2299 } 2300 2301 /// 2302 /// Set the "immediate_exit" state 2303 /// 2304 fn set_immediate_exit(&self, exit: bool) { 2305 self.fd.lock().unwrap().set_kvm_immediate_exit(exit.into()); 2306 } 2307 2308 /// 2309 /// Returns the details about TDX exit reason 2310 /// 2311 #[cfg(feature = "tdx")] 2312 fn get_tdx_exit_details(&mut self) -> cpu::Result<TdxExitDetails> { 2313 let mut fd = self.fd.as_ref().lock().unwrap(); 2314 let kvm_run = fd.get_kvm_run(); 2315 // SAFETY: accessing a union field in a valid structure 2316 let tdx_vmcall = unsafe { 2317 &mut (*((&mut kvm_run.__bindgen_anon_1) as *mut kvm_run__bindgen_ty_1 2318 as *mut KvmTdxExit)) 2319 .u 2320 .vmcall 2321 }; 2322 2323 tdx_vmcall.status_code = TDG_VP_VMCALL_INVALID_OPERAND; 2324 2325 if tdx_vmcall.type_ != 0 { 2326 return Err(cpu::HypervisorCpuError::UnknownTdxVmCall); 2327 } 2328 2329 match tdx_vmcall.subfunction { 2330 TDG_VP_VMCALL_GET_QUOTE => Ok(TdxExitDetails::GetQuote), 2331 TDG_VP_VMCALL_SETUP_EVENT_NOTIFY_INTERRUPT => { 2332 Ok(TdxExitDetails::SetupEventNotifyInterrupt) 2333 } 2334 _ => Err(cpu::HypervisorCpuError::UnknownTdxVmCall), 2335 } 2336 } 2337 2338 /// 2339 /// Set the status code for TDX exit 2340 /// 2341 #[cfg(feature = "tdx")] 2342 fn set_tdx_status(&mut self, status: TdxExitStatus) { 2343 let mut fd = self.fd.as_ref().lock().unwrap(); 2344 let kvm_run = fd.get_kvm_run(); 2345 // SAFETY: accessing a union field in a valid structure 2346 let tdx_vmcall = unsafe { 2347 &mut (*((&mut kvm_run.__bindgen_anon_1) as *mut kvm_run__bindgen_ty_1 2348 as *mut KvmTdxExit)) 2349 .u 2350 .vmcall 2351 }; 2352 2353 tdx_vmcall.status_code = match status { 2354 TdxExitStatus::Success => TDG_VP_VMCALL_SUCCESS, 2355 TdxExitStatus::InvalidOperand => TDG_VP_VMCALL_INVALID_OPERAND, 2356 }; 2357 } 2358 2359 #[cfg(target_arch = "x86_64")] 2360 /// 2361 /// Return the list of initial MSR entries for a VCPU 2362 /// 2363 fn boot_msr_entries(&self) -> Vec<MsrEntry> { 2364 use crate::arch::x86::{msr_index, MTRR_ENABLE, MTRR_MEM_TYPE_WB}; 2365 2366 [ 2367 msr!(msr_index::MSR_IA32_SYSENTER_CS), 2368 msr!(msr_index::MSR_IA32_SYSENTER_ESP), 2369 msr!(msr_index::MSR_IA32_SYSENTER_EIP), 2370 msr!(msr_index::MSR_STAR), 2371 msr!(msr_index::MSR_CSTAR), 2372 msr!(msr_index::MSR_LSTAR), 2373 msr!(msr_index::MSR_KERNEL_GS_BASE), 2374 msr!(msr_index::MSR_SYSCALL_MASK), 2375 msr!(msr_index::MSR_IA32_TSC), 2376 msr_data!( 2377 msr_index::MSR_IA32_MISC_ENABLE, 2378 msr_index::MSR_IA32_MISC_ENABLE_FAST_STRING as u64 2379 ), 2380 msr_data!(msr_index::MSR_MTRRdefType, MTRR_ENABLE | MTRR_MEM_TYPE_WB), 2381 ] 2382 .to_vec() 2383 } 2384 2385 #[cfg(target_arch = "aarch64")] 2386 fn has_pmu_support(&self) -> bool { 2387 let cpu_attr = kvm_bindings::kvm_device_attr { 2388 group: kvm_bindings::KVM_ARM_VCPU_PMU_V3_CTRL, 2389 attr: u64::from(kvm_bindings::KVM_ARM_VCPU_PMU_V3_INIT), 2390 addr: 0x0, 2391 flags: 0, 2392 }; 2393 self.fd.lock().unwrap().has_device_attr(&cpu_attr).is_ok() 2394 } 2395 2396 #[cfg(target_arch = "aarch64")] 2397 fn init_pmu(&self, irq: u32) -> cpu::Result<()> { 2398 let cpu_attr = kvm_bindings::kvm_device_attr { 2399 group: kvm_bindings::KVM_ARM_VCPU_PMU_V3_CTRL, 2400 attr: u64::from(kvm_bindings::KVM_ARM_VCPU_PMU_V3_INIT), 2401 addr: 0x0, 2402 flags: 0, 2403 }; 2404 let cpu_attr_irq = kvm_bindings::kvm_device_attr { 2405 group: kvm_bindings::KVM_ARM_VCPU_PMU_V3_CTRL, 2406 attr: u64::from(kvm_bindings::KVM_ARM_VCPU_PMU_V3_IRQ), 2407 addr: &irq as *const u32 as u64, 2408 flags: 0, 2409 }; 2410 self.fd 2411 .lock() 2412 .unwrap() 2413 .set_device_attr(&cpu_attr_irq) 2414 .map_err(|_| cpu::HypervisorCpuError::InitializePmu)?; 2415 self.fd 2416 .lock() 2417 .unwrap() 2418 .set_device_attr(&cpu_attr) 2419 .map_err(|_| cpu::HypervisorCpuError::InitializePmu) 2420 } 2421 2422 #[cfg(target_arch = "x86_64")] 2423 /// 2424 /// Get the frequency of the TSC if available 2425 /// 2426 fn tsc_khz(&self) -> cpu::Result<Option<u32>> { 2427 match self.fd.lock().unwrap().get_tsc_khz() { 2428 Err(e) => { 2429 if e.errno() == libc::EIO { 2430 Ok(None) 2431 } else { 2432 Err(cpu::HypervisorCpuError::GetTscKhz(e.into())) 2433 } 2434 } 2435 Ok(v) => Ok(Some(v)), 2436 } 2437 } 2438 2439 #[cfg(target_arch = "x86_64")] 2440 /// 2441 /// Set the frequency of the TSC if available 2442 /// 2443 fn set_tsc_khz(&self, freq: u32) -> cpu::Result<()> { 2444 match self.fd.lock().unwrap().set_tsc_khz(freq) { 2445 Err(e) => { 2446 if e.errno() == libc::EIO { 2447 Ok(()) 2448 } else { 2449 Err(cpu::HypervisorCpuError::SetTscKhz(e.into())) 2450 } 2451 } 2452 Ok(_) => Ok(()), 2453 } 2454 } 2455 2456 #[cfg(target_arch = "x86_64")] 2457 /// 2458 /// Trigger NMI interrupt 2459 /// 2460 fn nmi(&self) -> cpu::Result<()> { 2461 match self.fd.lock().unwrap().nmi() { 2462 Err(e) => { 2463 if e.errno() == libc::EIO { 2464 Ok(()) 2465 } else { 2466 Err(cpu::HypervisorCpuError::Nmi(e.into())) 2467 } 2468 } 2469 Ok(_) => Ok(()), 2470 } 2471 } 2472 } 2473 2474 impl KvmVcpu { 2475 #[cfg(target_arch = "x86_64")] 2476 /// 2477 /// X86 specific call that returns the vcpu's current "xsave struct". 2478 /// 2479 fn get_xsave(&self) -> cpu::Result<XsaveState> { 2480 Ok(self 2481 .fd 2482 .lock() 2483 .unwrap() 2484 .get_xsave() 2485 .map_err(|e| cpu::HypervisorCpuError::GetXsaveState(e.into()))? 2486 .into()) 2487 } 2488 2489 #[cfg(target_arch = "x86_64")] 2490 /// 2491 /// X86 specific call that sets the vcpu's current "xsave struct". 2492 /// 2493 fn set_xsave(&self, xsave: &XsaveState) -> cpu::Result<()> { 2494 let xsave: kvm_bindings::kvm_xsave = (*xsave).clone().into(); 2495 self.fd 2496 .lock() 2497 .unwrap() 2498 .set_xsave(&xsave) 2499 .map_err(|e| cpu::HypervisorCpuError::SetXsaveState(e.into())) 2500 } 2501 2502 #[cfg(target_arch = "x86_64")] 2503 /// 2504 /// X86 specific call that returns the vcpu's current "xcrs". 2505 /// 2506 fn get_xcrs(&self) -> cpu::Result<ExtendedControlRegisters> { 2507 self.fd 2508 .lock() 2509 .unwrap() 2510 .get_xcrs() 2511 .map_err(|e| cpu::HypervisorCpuError::GetXcsr(e.into())) 2512 } 2513 2514 #[cfg(target_arch = "x86_64")] 2515 /// 2516 /// X86 specific call that sets the vcpu's current "xcrs". 2517 /// 2518 fn set_xcrs(&self, xcrs: &ExtendedControlRegisters) -> cpu::Result<()> { 2519 self.fd 2520 .lock() 2521 .unwrap() 2522 .set_xcrs(xcrs) 2523 .map_err(|e| cpu::HypervisorCpuError::SetXcsr(e.into())) 2524 } 2525 2526 #[cfg(target_arch = "x86_64")] 2527 /// 2528 /// Returns currently pending exceptions, interrupts, and NMIs as well as related 2529 /// states of the vcpu. 2530 /// 2531 fn get_vcpu_events(&self) -> cpu::Result<VcpuEvents> { 2532 self.fd 2533 .lock() 2534 .unwrap() 2535 .get_vcpu_events() 2536 .map_err(|e| cpu::HypervisorCpuError::GetVcpuEvents(e.into())) 2537 } 2538 2539 #[cfg(target_arch = "x86_64")] 2540 /// 2541 /// Sets pending exceptions, interrupts, and NMIs as well as related states 2542 /// of the vcpu. 2543 /// 2544 fn set_vcpu_events(&self, events: &VcpuEvents) -> cpu::Result<()> { 2545 self.fd 2546 .lock() 2547 .unwrap() 2548 .set_vcpu_events(events) 2549 .map_err(|e| cpu::HypervisorCpuError::SetVcpuEvents(e.into())) 2550 } 2551 } 2552