1 // Copyright © 2024 Institute of Software, CAS. All rights reserved. 2 // 3 // Copyright © 2019 Intel Corporation 4 // 5 // SPDX-License-Identifier: Apache-2.0 OR BSD-3-Clause 6 // 7 // Copyright © 2020, Microsoft Corporation 8 // 9 // Copyright 2018-2019 CrowdStrike, Inc. 10 // 11 // 12 13 use std::any::Any; 14 use std::collections::HashMap; 15 #[cfg(target_arch = "x86_64")] 16 use std::fs::File; 17 #[cfg(target_arch = "x86_64")] 18 use std::os::unix::io::AsRawFd; 19 #[cfg(feature = "tdx")] 20 use std::os::unix::io::RawFd; 21 use std::result; 22 #[cfg(target_arch = "x86_64")] 23 use std::sync::atomic::{AtomicBool, Ordering}; 24 use std::sync::{Arc, Mutex, RwLock}; 25 26 use kvm_ioctls::{NoDatamatch, VcpuFd, VmFd}; 27 use vmm_sys_util::eventfd::EventFd; 28 29 #[cfg(target_arch = "aarch64")] 30 use crate::aarch64::gic::KvmGicV3Its; 31 #[cfg(target_arch = "aarch64")] 32 pub use crate::aarch64::{check_required_kvm_extensions, is_system_register, VcpuKvmState}; 33 #[cfg(target_arch = "aarch64")] 34 use crate::arch::aarch64::gic::{Vgic, VgicConfig}; 35 #[cfg(target_arch = "riscv64")] 36 use crate::arch::riscv64::aia::{Vaia, VaiaConfig}; 37 #[cfg(target_arch = "riscv64")] 38 use crate::riscv64::aia::KvmAiaImsics; 39 #[cfg(target_arch = "riscv64")] 40 pub use crate::riscv64::{ 41 aia::AiaImsicsState as AiaState, check_required_kvm_extensions, is_non_core_register, 42 VcpuKvmState, 43 }; 44 use crate::vm::{self, InterruptSourceConfig, VmOps}; 45 #[cfg(target_arch = "aarch64")] 46 use crate::{arm64_core_reg_id, offset_of}; 47 use crate::{cpu, hypervisor, vec_with_array_field, HypervisorType}; 48 #[cfg(target_arch = "riscv64")] 49 use crate::{offset_of, riscv64_reg_id}; 50 // x86_64 dependencies 51 #[cfg(target_arch = "x86_64")] 52 pub mod x86_64; 53 #[cfg(target_arch = "x86_64")] 54 use kvm_bindings::{ 55 kvm_enable_cap, kvm_msr_entry, MsrList, KVM_CAP_HYPERV_SYNIC, KVM_CAP_SPLIT_IRQCHIP, 56 KVM_GUESTDBG_USE_HW_BP, 57 }; 58 #[cfg(target_arch = "x86_64")] 59 use x86_64::check_required_kvm_extensions; 60 #[cfg(target_arch = "x86_64")] 61 pub use x86_64::{CpuId, ExtendedControlRegisters, MsrEntries, VcpuKvmState}; 62 63 #[cfg(target_arch = "x86_64")] 64 use crate::arch::x86::{ 65 CpuIdEntry, FpuState, LapicState, MsrEntry, SpecialRegisters, XsaveState, NUM_IOAPIC_PINS, 66 }; 67 #[cfg(target_arch = "x86_64")] 68 use crate::ClockData; 69 use crate::{ 70 CpuState, IoEventAddress, IrqRoutingEntry, MpState, StandardRegisters, UserMemoryRegion, 71 USER_MEMORY_REGION_LOG_DIRTY, USER_MEMORY_REGION_READ, USER_MEMORY_REGION_WRITE, 72 }; 73 // aarch64 dependencies 74 #[cfg(target_arch = "aarch64")] 75 pub mod aarch64; 76 // riscv64 dependencies 77 #[cfg(target_arch = "riscv64")] 78 pub mod riscv64; 79 #[cfg(target_arch = "aarch64")] 80 use std::mem; 81 82 /// 83 /// Export generically-named wrappers of kvm-bindings for Unix-based platforms 84 /// 85 #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] 86 pub use kvm_bindings::kvm_vcpu_events as VcpuEvents; 87 pub use kvm_bindings::{ 88 kvm_clock_data, kvm_create_device, kvm_create_device as CreateDevice, 89 kvm_device_attr as DeviceAttr, kvm_device_type_KVM_DEV_TYPE_VFIO, kvm_guest_debug, 90 kvm_irq_routing, kvm_irq_routing_entry, kvm_mp_state, kvm_run, kvm_userspace_memory_region, 91 KVM_GUESTDBG_ENABLE, KVM_GUESTDBG_SINGLESTEP, KVM_IRQ_ROUTING_IRQCHIP, KVM_IRQ_ROUTING_MSI, 92 KVM_MEM_LOG_DIRTY_PAGES, KVM_MEM_READONLY, KVM_MSI_VALID_DEVID, 93 }; 94 #[cfg(target_arch = "aarch64")] 95 use kvm_bindings::{ 96 kvm_regs, user_fpsimd_state, user_pt_regs, KVM_GUESTDBG_USE_HW, KVM_NR_SPSR, KVM_REG_ARM64, 97 KVM_REG_ARM64_SYSREG, KVM_REG_ARM64_SYSREG_CRM_MASK, KVM_REG_ARM64_SYSREG_CRN_MASK, 98 KVM_REG_ARM64_SYSREG_OP0_MASK, KVM_REG_ARM64_SYSREG_OP1_MASK, KVM_REG_ARM64_SYSREG_OP2_MASK, 99 KVM_REG_ARM_CORE, KVM_REG_SIZE_U128, KVM_REG_SIZE_U32, KVM_REG_SIZE_U64, 100 }; 101 #[cfg(target_arch = "riscv64")] 102 use kvm_bindings::{kvm_riscv_core, user_regs_struct, KVM_REG_RISCV_CORE}; 103 #[cfg(feature = "tdx")] 104 use kvm_bindings::{kvm_run__bindgen_ty_1, KVMIO}; 105 pub use kvm_ioctls::{Cap, Kvm, VcpuExit}; 106 use thiserror::Error; 107 use vfio_ioctls::VfioDeviceFd; 108 #[cfg(feature = "tdx")] 109 use vmm_sys_util::{ioctl::ioctl_with_val, ioctl_ioc_nr, ioctl_iowr_nr}; 110 pub use {kvm_bindings, kvm_ioctls}; 111 112 #[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))] 113 use crate::RegList; 114 115 #[cfg(target_arch = "x86_64")] 116 const KVM_CAP_SGX_ATTRIBUTE: u32 = 196; 117 118 #[cfg(target_arch = "x86_64")] 119 use vmm_sys_util::ioctl_io_nr; 120 #[cfg(all(not(feature = "tdx"), target_arch = "x86_64"))] 121 use vmm_sys_util::ioctl_ioc_nr; 122 123 #[cfg(target_arch = "x86_64")] 124 ioctl_io_nr!(KVM_NMI, kvm_bindings::KVMIO, 0x9a); 125 126 #[cfg(feature = "tdx")] 127 const KVM_EXIT_TDX: u32 = 50; 128 #[cfg(feature = "tdx")] 129 const TDG_VP_VMCALL_GET_QUOTE: u64 = 0x10002; 130 #[cfg(feature = "tdx")] 131 const TDG_VP_VMCALL_SETUP_EVENT_NOTIFY_INTERRUPT: u64 = 0x10004; 132 #[cfg(feature = "tdx")] 133 const TDG_VP_VMCALL_SUCCESS: u64 = 0; 134 #[cfg(feature = "tdx")] 135 const TDG_VP_VMCALL_INVALID_OPERAND: u64 = 0x8000000000000000; 136 137 #[cfg(feature = "tdx")] 138 ioctl_iowr_nr!(KVM_MEMORY_ENCRYPT_OP, KVMIO, 0xba, std::os::raw::c_ulong); 139 140 #[cfg(feature = "tdx")] 141 #[repr(u32)] 142 enum TdxCommand { 143 Capabilities = 0, 144 InitVm, 145 InitVcpu, 146 InitMemRegion, 147 Finalize, 148 } 149 150 #[cfg(feature = "tdx")] 151 pub enum TdxExitDetails { 152 GetQuote, 153 SetupEventNotifyInterrupt, 154 } 155 156 #[cfg(feature = "tdx")] 157 pub enum TdxExitStatus { 158 Success, 159 InvalidOperand, 160 } 161 162 #[cfg(feature = "tdx")] 163 const TDX_MAX_NR_CPUID_CONFIGS: usize = 6; 164 165 #[cfg(feature = "tdx")] 166 #[repr(C)] 167 #[derive(Debug, Default)] 168 pub struct TdxCpuidConfig { 169 pub leaf: u32, 170 pub sub_leaf: u32, 171 pub eax: u32, 172 pub ebx: u32, 173 pub ecx: u32, 174 pub edx: u32, 175 } 176 177 #[cfg(feature = "tdx")] 178 #[repr(C)] 179 #[derive(Debug, Default)] 180 pub struct TdxCapabilities { 181 pub attrs_fixed0: u64, 182 pub attrs_fixed1: u64, 183 pub xfam_fixed0: u64, 184 pub xfam_fixed1: u64, 185 pub nr_cpuid_configs: u32, 186 pub padding: u32, 187 pub cpuid_configs: [TdxCpuidConfig; TDX_MAX_NR_CPUID_CONFIGS], 188 } 189 190 #[cfg(feature = "tdx")] 191 #[derive(Copy, Clone)] 192 pub struct KvmTdxExit { 193 pub type_: u32, 194 pub pad: u32, 195 pub u: KvmTdxExitU, 196 } 197 198 #[cfg(feature = "tdx")] 199 #[repr(C)] 200 #[derive(Copy, Clone)] 201 pub union KvmTdxExitU { 202 pub vmcall: KvmTdxExitVmcall, 203 } 204 205 #[cfg(feature = "tdx")] 206 #[repr(C)] 207 #[derive(Debug, Default, Copy, Clone, PartialEq)] 208 pub struct KvmTdxExitVmcall { 209 pub type_: u64, 210 pub subfunction: u64, 211 pub reg_mask: u64, 212 pub in_r12: u64, 213 pub in_r13: u64, 214 pub in_r14: u64, 215 pub in_r15: u64, 216 pub in_rbx: u64, 217 pub in_rdi: u64, 218 pub in_rsi: u64, 219 pub in_r8: u64, 220 pub in_r9: u64, 221 pub in_rdx: u64, 222 pub status_code: u64, 223 pub out_r11: u64, 224 pub out_r12: u64, 225 pub out_r13: u64, 226 pub out_r14: u64, 227 pub out_r15: u64, 228 pub out_rbx: u64, 229 pub out_rdi: u64, 230 pub out_rsi: u64, 231 pub out_r8: u64, 232 pub out_r9: u64, 233 pub out_rdx: u64, 234 } 235 236 impl From<kvm_userspace_memory_region> for UserMemoryRegion { 237 fn from(region: kvm_userspace_memory_region) -> Self { 238 let mut flags = USER_MEMORY_REGION_READ; 239 if region.flags & KVM_MEM_READONLY == 0 { 240 flags |= USER_MEMORY_REGION_WRITE; 241 } 242 if region.flags & KVM_MEM_LOG_DIRTY_PAGES != 0 { 243 flags |= USER_MEMORY_REGION_LOG_DIRTY; 244 } 245 246 UserMemoryRegion { 247 slot: region.slot, 248 guest_phys_addr: region.guest_phys_addr, 249 memory_size: region.memory_size, 250 userspace_addr: region.userspace_addr, 251 flags, 252 } 253 } 254 } 255 256 impl From<UserMemoryRegion> for kvm_userspace_memory_region { 257 fn from(region: UserMemoryRegion) -> Self { 258 assert!( 259 region.flags & USER_MEMORY_REGION_READ != 0, 260 "KVM mapped memory is always readable" 261 ); 262 263 let mut flags = 0; 264 if region.flags & USER_MEMORY_REGION_WRITE == 0 { 265 flags |= KVM_MEM_READONLY; 266 } 267 if region.flags & USER_MEMORY_REGION_LOG_DIRTY != 0 { 268 flags |= KVM_MEM_LOG_DIRTY_PAGES; 269 } 270 271 kvm_userspace_memory_region { 272 slot: region.slot, 273 guest_phys_addr: region.guest_phys_addr, 274 memory_size: region.memory_size, 275 userspace_addr: region.userspace_addr, 276 flags, 277 } 278 } 279 } 280 281 impl From<kvm_mp_state> for MpState { 282 fn from(s: kvm_mp_state) -> Self { 283 MpState::Kvm(s) 284 } 285 } 286 287 impl From<MpState> for kvm_mp_state { 288 fn from(ms: MpState) -> Self { 289 match ms { 290 MpState::Kvm(s) => s, 291 /* Needed in case other hypervisors are enabled */ 292 #[allow(unreachable_patterns)] 293 _ => panic!("CpuState is not valid"), 294 } 295 } 296 } 297 298 impl From<kvm_ioctls::IoEventAddress> for IoEventAddress { 299 fn from(a: kvm_ioctls::IoEventAddress) -> Self { 300 match a { 301 kvm_ioctls::IoEventAddress::Pio(x) => Self::Pio(x), 302 kvm_ioctls::IoEventAddress::Mmio(x) => Self::Mmio(x), 303 } 304 } 305 } 306 307 impl From<IoEventAddress> for kvm_ioctls::IoEventAddress { 308 fn from(a: IoEventAddress) -> Self { 309 match a { 310 IoEventAddress::Pio(x) => Self::Pio(x), 311 IoEventAddress::Mmio(x) => Self::Mmio(x), 312 } 313 } 314 } 315 316 impl From<VcpuKvmState> for CpuState { 317 fn from(s: VcpuKvmState) -> Self { 318 CpuState::Kvm(s) 319 } 320 } 321 322 impl From<CpuState> for VcpuKvmState { 323 fn from(s: CpuState) -> Self { 324 match s { 325 CpuState::Kvm(s) => s, 326 /* Needed in case other hypervisors are enabled */ 327 #[allow(unreachable_patterns)] 328 _ => panic!("CpuState is not valid"), 329 } 330 } 331 } 332 333 #[cfg(target_arch = "x86_64")] 334 impl From<kvm_clock_data> for ClockData { 335 fn from(d: kvm_clock_data) -> Self { 336 ClockData::Kvm(d) 337 } 338 } 339 340 #[cfg(target_arch = "x86_64")] 341 impl From<ClockData> for kvm_clock_data { 342 fn from(ms: ClockData) -> Self { 343 match ms { 344 ClockData::Kvm(s) => s, 345 /* Needed in case other hypervisors are enabled */ 346 #[allow(unreachable_patterns)] 347 _ => panic!("CpuState is not valid"), 348 } 349 } 350 } 351 352 impl From<kvm_bindings::kvm_one_reg> for crate::Register { 353 fn from(s: kvm_bindings::kvm_one_reg) -> Self { 354 crate::Register::Kvm(s) 355 } 356 } 357 358 impl From<crate::Register> for kvm_bindings::kvm_one_reg { 359 fn from(e: crate::Register) -> Self { 360 match e { 361 crate::Register::Kvm(e) => e, 362 /* Needed in case other hypervisors are enabled */ 363 #[allow(unreachable_patterns)] 364 _ => panic!("Register is not valid"), 365 } 366 } 367 } 368 369 #[cfg(target_arch = "aarch64")] 370 impl From<kvm_bindings::kvm_vcpu_init> for crate::VcpuInit { 371 fn from(s: kvm_bindings::kvm_vcpu_init) -> Self { 372 crate::VcpuInit::Kvm(s) 373 } 374 } 375 376 #[cfg(target_arch = "aarch64")] 377 impl From<crate::VcpuInit> for kvm_bindings::kvm_vcpu_init { 378 fn from(e: crate::VcpuInit) -> Self { 379 match e { 380 crate::VcpuInit::Kvm(e) => e, 381 /* Needed in case other hypervisors are enabled */ 382 #[allow(unreachable_patterns)] 383 _ => panic!("VcpuInit is not valid"), 384 } 385 } 386 } 387 388 #[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))] 389 impl From<kvm_bindings::RegList> for crate::RegList { 390 fn from(s: kvm_bindings::RegList) -> Self { 391 crate::RegList::Kvm(s) 392 } 393 } 394 395 #[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))] 396 impl From<crate::RegList> for kvm_bindings::RegList { 397 fn from(e: crate::RegList) -> Self { 398 match e { 399 crate::RegList::Kvm(e) => e, 400 /* Needed in case other hypervisors are enabled */ 401 #[allow(unreachable_patterns)] 402 _ => panic!("RegList is not valid"), 403 } 404 } 405 } 406 407 #[cfg(not(target_arch = "riscv64"))] 408 impl From<kvm_bindings::kvm_regs> for crate::StandardRegisters { 409 fn from(s: kvm_bindings::kvm_regs) -> Self { 410 crate::StandardRegisters::Kvm(s) 411 } 412 } 413 414 #[cfg(not(target_arch = "riscv64"))] 415 impl From<crate::StandardRegisters> for kvm_bindings::kvm_regs { 416 fn from(e: crate::StandardRegisters) -> Self { 417 match e { 418 crate::StandardRegisters::Kvm(e) => e, 419 /* Needed in case other hypervisors are enabled */ 420 #[allow(unreachable_patterns)] 421 _ => panic!("StandardRegisters are not valid"), 422 } 423 } 424 } 425 426 #[cfg(target_arch = "riscv64")] 427 impl From<kvm_bindings::kvm_riscv_core> for crate::StandardRegisters { 428 fn from(s: kvm_bindings::kvm_riscv_core) -> Self { 429 crate::StandardRegisters::Kvm(s) 430 } 431 } 432 433 #[cfg(target_arch = "riscv64")] 434 impl From<crate::StandardRegisters> for kvm_bindings::kvm_riscv_core { 435 fn from(e: crate::StandardRegisters) -> Self { 436 match e { 437 crate::StandardRegisters::Kvm(e) => e, 438 /* Needed in case other hypervisors are enabled */ 439 #[allow(unreachable_patterns)] 440 _ => panic!("StandardRegisters are not valid"), 441 } 442 } 443 } 444 445 impl From<kvm_irq_routing_entry> for IrqRoutingEntry { 446 fn from(s: kvm_irq_routing_entry) -> Self { 447 IrqRoutingEntry::Kvm(s) 448 } 449 } 450 451 impl From<IrqRoutingEntry> for kvm_irq_routing_entry { 452 fn from(e: IrqRoutingEntry) -> Self { 453 match e { 454 IrqRoutingEntry::Kvm(e) => e, 455 /* Needed in case other hypervisors are enabled */ 456 #[allow(unreachable_patterns)] 457 _ => panic!("IrqRoutingEntry is not valid"), 458 } 459 } 460 } 461 462 struct KvmDirtyLogSlot { 463 slot: u32, 464 guest_phys_addr: u64, 465 memory_size: u64, 466 userspace_addr: u64, 467 } 468 469 /// Wrapper over KVM VM ioctls. 470 pub struct KvmVm { 471 fd: Arc<VmFd>, 472 #[cfg(target_arch = "x86_64")] 473 msrs: Vec<MsrEntry>, 474 dirty_log_slots: Arc<RwLock<HashMap<u32, KvmDirtyLogSlot>>>, 475 } 476 477 impl KvmVm { 478 /// 479 /// Creates an emulated device in the kernel. 480 /// 481 /// See the documentation for `KVM_CREATE_DEVICE`. 482 fn create_device(&self, device: &mut CreateDevice) -> vm::Result<vfio_ioctls::VfioDeviceFd> { 483 let device_fd = self 484 .fd 485 .create_device(device) 486 .map_err(|e| vm::HypervisorVmError::CreateDevice(e.into()))?; 487 Ok(VfioDeviceFd::new_from_kvm(device_fd)) 488 } 489 /// Checks if a particular `Cap` is available. 490 pub fn check_extension(&self, c: Cap) -> bool { 491 self.fd.check_extension(c) 492 } 493 } 494 495 /// Implementation of Vm trait for KVM 496 /// 497 /// # Examples 498 /// 499 /// ``` 500 /// # use hypervisor::kvm::KvmHypervisor; 501 /// # use std::sync::Arc; 502 /// let kvm = KvmHypervisor::new().unwrap(); 503 /// let hypervisor = Arc::new(kvm); 504 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed"); 505 /// ``` 506 impl vm::Vm for KvmVm { 507 #[cfg(target_arch = "x86_64")] 508 /// 509 /// Sets the address of the one-page region in the VM's address space. 510 /// 511 fn set_identity_map_address(&self, address: u64) -> vm::Result<()> { 512 self.fd 513 .set_identity_map_address(address) 514 .map_err(|e| vm::HypervisorVmError::SetIdentityMapAddress(e.into())) 515 } 516 517 #[cfg(target_arch = "x86_64")] 518 /// 519 /// Sets the address of the three-page region in the VM's address space. 520 /// 521 fn set_tss_address(&self, offset: usize) -> vm::Result<()> { 522 self.fd 523 .set_tss_address(offset) 524 .map_err(|e| vm::HypervisorVmError::SetTssAddress(e.into())) 525 } 526 527 #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] 528 /// 529 /// Creates an in-kernel interrupt controller. 530 /// 531 fn create_irq_chip(&self) -> vm::Result<()> { 532 self.fd 533 .create_irq_chip() 534 .map_err(|e| vm::HypervisorVmError::CreateIrq(e.into())) 535 } 536 537 /// 538 /// Registers an event that will, when signaled, trigger the `gsi` IRQ. 539 /// 540 fn register_irqfd(&self, fd: &EventFd, gsi: u32) -> vm::Result<()> { 541 self.fd 542 .register_irqfd(fd, gsi) 543 .map_err(|e| vm::HypervisorVmError::RegisterIrqFd(e.into())) 544 } 545 546 /// 547 /// Unregisters an event that will, when signaled, trigger the `gsi` IRQ. 548 /// 549 fn unregister_irqfd(&self, fd: &EventFd, gsi: u32) -> vm::Result<()> { 550 self.fd 551 .unregister_irqfd(fd, gsi) 552 .map_err(|e| vm::HypervisorVmError::UnregisterIrqFd(e.into())) 553 } 554 555 /// 556 /// Creates a VcpuFd object from a vcpu RawFd. 557 /// 558 fn create_vcpu( 559 &self, 560 id: u8, 561 vm_ops: Option<Arc<dyn VmOps>>, 562 ) -> vm::Result<Arc<dyn cpu::Vcpu>> { 563 let fd = self 564 .fd 565 .create_vcpu(id as u64) 566 .map_err(|e| vm::HypervisorVmError::CreateVcpu(e.into()))?; 567 let vcpu = KvmVcpu { 568 fd: Arc::new(Mutex::new(fd)), 569 #[cfg(target_arch = "x86_64")] 570 msrs: self.msrs.clone(), 571 vm_ops, 572 #[cfg(target_arch = "x86_64")] 573 hyperv_synic: AtomicBool::new(false), 574 }; 575 Ok(Arc::new(vcpu)) 576 } 577 578 #[cfg(target_arch = "aarch64")] 579 /// 580 /// Creates a virtual GIC device. 581 /// 582 fn create_vgic(&self, config: VgicConfig) -> vm::Result<Arc<Mutex<dyn Vgic>>> { 583 let gic_device = KvmGicV3Its::new(self, config) 584 .map_err(|e| vm::HypervisorVmError::CreateVgic(anyhow!("Vgic error {:?}", e)))?; 585 Ok(Arc::new(Mutex::new(gic_device))) 586 } 587 588 #[cfg(target_arch = "riscv64")] 589 /// 590 /// Creates a virtual AIA device. 591 /// 592 fn create_vaia(&self, config: VaiaConfig) -> vm::Result<Arc<Mutex<dyn Vaia>>> { 593 let aia_device = KvmAiaImsics::new(self, config) 594 .map_err(|e| vm::HypervisorVmError::CreateVaia(anyhow!("Vaia error {:?}", e)))?; 595 Ok(Arc::new(Mutex::new(aia_device))) 596 } 597 598 /// 599 /// Registers an event to be signaled whenever a certain address is written to. 600 /// 601 fn register_ioevent( 602 &self, 603 fd: &EventFd, 604 addr: &IoEventAddress, 605 datamatch: Option<vm::DataMatch>, 606 ) -> vm::Result<()> { 607 let addr = &kvm_ioctls::IoEventAddress::from(*addr); 608 if let Some(dm) = datamatch { 609 match dm { 610 vm::DataMatch::DataMatch32(kvm_dm32) => self 611 .fd 612 .register_ioevent(fd, addr, kvm_dm32) 613 .map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into())), 614 vm::DataMatch::DataMatch64(kvm_dm64) => self 615 .fd 616 .register_ioevent(fd, addr, kvm_dm64) 617 .map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into())), 618 } 619 } else { 620 self.fd 621 .register_ioevent(fd, addr, NoDatamatch) 622 .map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into())) 623 } 624 } 625 626 /// 627 /// Unregisters an event from a certain address it has been previously registered to. 628 /// 629 fn unregister_ioevent(&self, fd: &EventFd, addr: &IoEventAddress) -> vm::Result<()> { 630 let addr = &kvm_ioctls::IoEventAddress::from(*addr); 631 self.fd 632 .unregister_ioevent(fd, addr, NoDatamatch) 633 .map_err(|e| vm::HypervisorVmError::UnregisterIoEvent(e.into())) 634 } 635 636 /// 637 /// Constructs a routing entry 638 /// 639 fn make_routing_entry(&self, gsi: u32, config: &InterruptSourceConfig) -> IrqRoutingEntry { 640 match &config { 641 InterruptSourceConfig::MsiIrq(cfg) => { 642 let mut kvm_route = kvm_irq_routing_entry { 643 gsi, 644 type_: KVM_IRQ_ROUTING_MSI, 645 ..Default::default() 646 }; 647 648 kvm_route.u.msi.address_lo = cfg.low_addr; 649 kvm_route.u.msi.address_hi = cfg.high_addr; 650 kvm_route.u.msi.data = cfg.data; 651 652 if self.check_extension(crate::kvm::Cap::MsiDevid) { 653 // On AArch64, there is limitation on the range of the 'devid', 654 // it cannot be greater than 65536 (the max of u16). 655 // 656 // BDF cannot be used directly, because 'segment' is in high 657 // 16 bits. The layout of the u32 BDF is: 658 // |---- 16 bits ----|-- 8 bits --|-- 5 bits --|-- 3 bits --| 659 // | segment | bus | device | function | 660 // 661 // Now that we support 1 bus only in a segment, we can build a 662 // 'devid' by replacing the 'bus' bits with the low 8 bits of 663 // 'segment' data. 664 // This way we can resolve the range checking problem and give 665 // different `devid` to all the devices. Limitation is that at 666 // most 256 segments can be supported. 667 // 668 let modified_devid = ((cfg.devid & 0x00ff_0000) >> 8) | cfg.devid & 0xff; 669 670 kvm_route.flags = KVM_MSI_VALID_DEVID; 671 kvm_route.u.msi.__bindgen_anon_1.devid = modified_devid; 672 } 673 kvm_route.into() 674 } 675 InterruptSourceConfig::LegacyIrq(cfg) => { 676 let mut kvm_route = kvm_irq_routing_entry { 677 gsi, 678 type_: KVM_IRQ_ROUTING_IRQCHIP, 679 ..Default::default() 680 }; 681 kvm_route.u.irqchip.irqchip = cfg.irqchip; 682 kvm_route.u.irqchip.pin = cfg.pin; 683 684 kvm_route.into() 685 } 686 } 687 } 688 689 /// 690 /// Sets the GSI routing table entries, overwriting any previously set 691 /// entries, as per the `KVM_SET_GSI_ROUTING` ioctl. 692 /// 693 fn set_gsi_routing(&self, entries: &[IrqRoutingEntry]) -> vm::Result<()> { 694 let mut irq_routing = 695 vec_with_array_field::<kvm_irq_routing, kvm_irq_routing_entry>(entries.len()); 696 irq_routing[0].nr = entries.len() as u32; 697 irq_routing[0].flags = 0; 698 let entries: Vec<kvm_irq_routing_entry> = entries 699 .iter() 700 .map(|entry| match entry { 701 IrqRoutingEntry::Kvm(e) => *e, 702 #[allow(unreachable_patterns)] 703 _ => panic!("IrqRoutingEntry type is wrong"), 704 }) 705 .collect(); 706 707 // SAFETY: irq_routing initialized with entries.len() and now it is being turned into 708 // entries_slice with entries.len() again. It is guaranteed to be large enough to hold 709 // everything from entries. 710 unsafe { 711 let entries_slice: &mut [kvm_irq_routing_entry] = 712 irq_routing[0].entries.as_mut_slice(entries.len()); 713 entries_slice.copy_from_slice(&entries); 714 } 715 716 self.fd 717 .set_gsi_routing(&irq_routing[0]) 718 .map_err(|e| vm::HypervisorVmError::SetGsiRouting(e.into())) 719 } 720 721 /// 722 /// Creates a memory region structure that can be used with {create/remove}_user_memory_region 723 /// 724 fn make_user_memory_region( 725 &self, 726 slot: u32, 727 guest_phys_addr: u64, 728 memory_size: u64, 729 userspace_addr: u64, 730 readonly: bool, 731 log_dirty_pages: bool, 732 ) -> UserMemoryRegion { 733 kvm_userspace_memory_region { 734 slot, 735 guest_phys_addr, 736 memory_size, 737 userspace_addr, 738 flags: if readonly { KVM_MEM_READONLY } else { 0 } 739 | if log_dirty_pages { 740 KVM_MEM_LOG_DIRTY_PAGES 741 } else { 742 0 743 }, 744 } 745 .into() 746 } 747 748 /// 749 /// Creates a guest physical memory region. 750 /// 751 fn create_user_memory_region(&self, user_memory_region: UserMemoryRegion) -> vm::Result<()> { 752 let mut region: kvm_userspace_memory_region = user_memory_region.into(); 753 754 if (region.flags & KVM_MEM_LOG_DIRTY_PAGES) != 0 { 755 if (region.flags & KVM_MEM_READONLY) != 0 { 756 return Err(vm::HypervisorVmError::CreateUserMemory(anyhow!( 757 "Error creating regions with both 'dirty-pages-log' and 'read-only'." 758 ))); 759 } 760 761 // Keep track of the regions that need dirty pages log 762 self.dirty_log_slots.write().unwrap().insert( 763 region.slot, 764 KvmDirtyLogSlot { 765 slot: region.slot, 766 guest_phys_addr: region.guest_phys_addr, 767 memory_size: region.memory_size, 768 userspace_addr: region.userspace_addr, 769 }, 770 ); 771 772 // Always create guest physical memory region without `KVM_MEM_LOG_DIRTY_PAGES`. 773 // For regions that need this flag, dirty pages log will be turned on in `start_dirty_log`. 774 region.flags = 0; 775 } 776 777 // SAFETY: Safe because guest regions are guaranteed not to overlap. 778 unsafe { 779 self.fd 780 .set_user_memory_region(region) 781 .map_err(|e| vm::HypervisorVmError::CreateUserMemory(e.into())) 782 } 783 } 784 785 /// 786 /// Removes a guest physical memory region. 787 /// 788 fn remove_user_memory_region(&self, user_memory_region: UserMemoryRegion) -> vm::Result<()> { 789 let mut region: kvm_userspace_memory_region = user_memory_region.into(); 790 791 // Remove the corresponding entry from "self.dirty_log_slots" if needed 792 self.dirty_log_slots.write().unwrap().remove(®ion.slot); 793 794 // Setting the size to 0 means "remove" 795 region.memory_size = 0; 796 // SAFETY: Safe because guest regions are guaranteed not to overlap. 797 unsafe { 798 self.fd 799 .set_user_memory_region(region) 800 .map_err(|e| vm::HypervisorVmError::RemoveUserMemory(e.into())) 801 } 802 } 803 804 /// 805 /// Returns the preferred CPU target type which can be emulated by KVM on underlying host. 806 /// 807 #[cfg(target_arch = "aarch64")] 808 fn get_preferred_target(&self, kvi: &mut crate::VcpuInit) -> vm::Result<()> { 809 let mut kvm_kvi: kvm_bindings::kvm_vcpu_init = (*kvi).into(); 810 self.fd 811 .get_preferred_target(&mut kvm_kvi) 812 .map_err(|e| vm::HypervisorVmError::GetPreferredTarget(e.into()))?; 813 *kvi = kvm_kvi.into(); 814 Ok(()) 815 } 816 817 #[cfg(target_arch = "x86_64")] 818 fn enable_split_irq(&self) -> vm::Result<()> { 819 // Create split irqchip 820 // Only the local APIC is emulated in kernel, both PICs and IOAPIC 821 // are not. 822 let mut cap = kvm_enable_cap { 823 cap: KVM_CAP_SPLIT_IRQCHIP, 824 ..Default::default() 825 }; 826 cap.args[0] = NUM_IOAPIC_PINS as u64; 827 self.fd 828 .enable_cap(&cap) 829 .map_err(|e| vm::HypervisorVmError::EnableSplitIrq(e.into()))?; 830 Ok(()) 831 } 832 833 #[cfg(target_arch = "x86_64")] 834 fn enable_sgx_attribute(&self, file: File) -> vm::Result<()> { 835 let mut cap = kvm_enable_cap { 836 cap: KVM_CAP_SGX_ATTRIBUTE, 837 ..Default::default() 838 }; 839 cap.args[0] = file.as_raw_fd() as u64; 840 self.fd 841 .enable_cap(&cap) 842 .map_err(|e| vm::HypervisorVmError::EnableSgxAttribute(e.into()))?; 843 Ok(()) 844 } 845 846 /// Retrieve guest clock. 847 #[cfg(target_arch = "x86_64")] 848 fn get_clock(&self) -> vm::Result<ClockData> { 849 Ok(self 850 .fd 851 .get_clock() 852 .map_err(|e| vm::HypervisorVmError::GetClock(e.into()))? 853 .into()) 854 } 855 856 /// Set guest clock. 857 #[cfg(target_arch = "x86_64")] 858 fn set_clock(&self, data: &ClockData) -> vm::Result<()> { 859 let data = (*data).into(); 860 self.fd 861 .set_clock(&data) 862 .map_err(|e| vm::HypervisorVmError::SetClock(e.into())) 863 } 864 865 /// Create a device that is used for passthrough 866 fn create_passthrough_device(&self) -> vm::Result<VfioDeviceFd> { 867 let mut vfio_dev = kvm_create_device { 868 type_: kvm_device_type_KVM_DEV_TYPE_VFIO, 869 fd: 0, 870 flags: 0, 871 }; 872 873 self.create_device(&mut vfio_dev) 874 .map_err(|e| vm::HypervisorVmError::CreatePassthroughDevice(e.into())) 875 } 876 877 /// 878 /// Start logging dirty pages 879 /// 880 fn start_dirty_log(&self) -> vm::Result<()> { 881 let dirty_log_slots = self.dirty_log_slots.read().unwrap(); 882 for (_, s) in dirty_log_slots.iter() { 883 let region = kvm_userspace_memory_region { 884 slot: s.slot, 885 guest_phys_addr: s.guest_phys_addr, 886 memory_size: s.memory_size, 887 userspace_addr: s.userspace_addr, 888 flags: KVM_MEM_LOG_DIRTY_PAGES, 889 }; 890 // SAFETY: Safe because guest regions are guaranteed not to overlap. 891 unsafe { 892 self.fd 893 .set_user_memory_region(region) 894 .map_err(|e| vm::HypervisorVmError::StartDirtyLog(e.into()))?; 895 } 896 } 897 898 Ok(()) 899 } 900 901 /// 902 /// Stop logging dirty pages 903 /// 904 fn stop_dirty_log(&self) -> vm::Result<()> { 905 let dirty_log_slots = self.dirty_log_slots.read().unwrap(); 906 for (_, s) in dirty_log_slots.iter() { 907 let region = kvm_userspace_memory_region { 908 slot: s.slot, 909 guest_phys_addr: s.guest_phys_addr, 910 memory_size: s.memory_size, 911 userspace_addr: s.userspace_addr, 912 flags: 0, 913 }; 914 // SAFETY: Safe because guest regions are guaranteed not to overlap. 915 unsafe { 916 self.fd 917 .set_user_memory_region(region) 918 .map_err(|e| vm::HypervisorVmError::StartDirtyLog(e.into()))?; 919 } 920 } 921 922 Ok(()) 923 } 924 925 /// 926 /// Get dirty pages bitmap (one bit per page) 927 /// 928 fn get_dirty_log(&self, slot: u32, _base_gpa: u64, memory_size: u64) -> vm::Result<Vec<u64>> { 929 self.fd 930 .get_dirty_log(slot, memory_size as usize) 931 .map_err(|e| vm::HypervisorVmError::GetDirtyLog(e.into())) 932 } 933 934 /// 935 /// Initialize TDX for this VM 936 /// 937 #[cfg(feature = "tdx")] 938 fn tdx_init(&self, cpuid: &[CpuIdEntry], max_vcpus: u32) -> vm::Result<()> { 939 const TDX_ATTR_SEPT_VE_DISABLE: usize = 28; 940 941 let mut cpuid: Vec<kvm_bindings::kvm_cpuid_entry2> = 942 cpuid.iter().map(|e| (*e).into()).collect(); 943 cpuid.resize(256, kvm_bindings::kvm_cpuid_entry2::default()); 944 945 #[repr(C)] 946 struct TdxInitVm { 947 attributes: u64, 948 max_vcpus: u32, 949 padding: u32, 950 mrconfigid: [u64; 6], 951 mrowner: [u64; 6], 952 mrownerconfig: [u64; 6], 953 cpuid_nent: u32, 954 cpuid_padding: u32, 955 cpuid_entries: [kvm_bindings::kvm_cpuid_entry2; 256], 956 } 957 let data = TdxInitVm { 958 attributes: 1 << TDX_ATTR_SEPT_VE_DISABLE, 959 max_vcpus, 960 padding: 0, 961 mrconfigid: [0; 6], 962 mrowner: [0; 6], 963 mrownerconfig: [0; 6], 964 cpuid_nent: cpuid.len() as u32, 965 cpuid_padding: 0, 966 cpuid_entries: cpuid.as_slice().try_into().unwrap(), 967 }; 968 969 tdx_command( 970 &self.fd.as_raw_fd(), 971 TdxCommand::InitVm, 972 0, 973 &data as *const _ as u64, 974 ) 975 .map_err(vm::HypervisorVmError::InitializeTdx) 976 } 977 978 /// 979 /// Finalize the TDX setup for this VM 980 /// 981 #[cfg(feature = "tdx")] 982 fn tdx_finalize(&self) -> vm::Result<()> { 983 tdx_command(&self.fd.as_raw_fd(), TdxCommand::Finalize, 0, 0) 984 .map_err(vm::HypervisorVmError::FinalizeTdx) 985 } 986 987 /// 988 /// Initialize memory regions for the TDX VM 989 /// 990 #[cfg(feature = "tdx")] 991 fn tdx_init_memory_region( 992 &self, 993 host_address: u64, 994 guest_address: u64, 995 size: u64, 996 measure: bool, 997 ) -> vm::Result<()> { 998 #[repr(C)] 999 struct TdxInitMemRegion { 1000 host_address: u64, 1001 guest_address: u64, 1002 pages: u64, 1003 } 1004 let data = TdxInitMemRegion { 1005 host_address, 1006 guest_address, 1007 pages: size / 4096, 1008 }; 1009 1010 tdx_command( 1011 &self.fd.as_raw_fd(), 1012 TdxCommand::InitMemRegion, 1013 u32::from(measure), 1014 &data as *const _ as u64, 1015 ) 1016 .map_err(vm::HypervisorVmError::InitMemRegionTdx) 1017 } 1018 1019 /// Downcast to the underlying KvmVm type 1020 fn as_any(&self) -> &dyn Any { 1021 self 1022 } 1023 } 1024 1025 #[cfg(feature = "tdx")] 1026 fn tdx_command( 1027 fd: &RawFd, 1028 command: TdxCommand, 1029 flags: u32, 1030 data: u64, 1031 ) -> std::result::Result<(), std::io::Error> { 1032 #[repr(C)] 1033 struct TdxIoctlCmd { 1034 command: TdxCommand, 1035 flags: u32, 1036 data: u64, 1037 error: u64, 1038 unused: u64, 1039 } 1040 let cmd = TdxIoctlCmd { 1041 command, 1042 flags, 1043 data, 1044 error: 0, 1045 unused: 0, 1046 }; 1047 // SAFETY: FFI call. All input parameters are valid. 1048 let ret = unsafe { 1049 ioctl_with_val( 1050 fd, 1051 KVM_MEMORY_ENCRYPT_OP(), 1052 &cmd as *const TdxIoctlCmd as std::os::raw::c_ulong, 1053 ) 1054 }; 1055 1056 if ret < 0 { 1057 return Err(std::io::Error::last_os_error()); 1058 } 1059 Ok(()) 1060 } 1061 1062 /// Wrapper over KVM system ioctls. 1063 pub struct KvmHypervisor { 1064 kvm: Kvm, 1065 } 1066 1067 impl KvmHypervisor { 1068 #[cfg(target_arch = "x86_64")] 1069 /// 1070 /// Retrieve the list of MSRs supported by the hypervisor. 1071 /// 1072 fn get_msr_list(&self) -> hypervisor::Result<MsrList> { 1073 self.kvm 1074 .get_msr_index_list() 1075 .map_err(|e| hypervisor::HypervisorError::GetMsrList(e.into())) 1076 } 1077 } 1078 1079 /// Enum for KVM related error 1080 #[derive(Debug, Error)] 1081 pub enum KvmError { 1082 #[error("Capability missing: {0:?}")] 1083 CapabilityMissing(Cap), 1084 } 1085 1086 pub type KvmResult<T> = result::Result<T, KvmError>; 1087 1088 impl KvmHypervisor { 1089 /// Create a hypervisor based on Kvm 1090 #[allow(clippy::new_ret_no_self)] 1091 pub fn new() -> hypervisor::Result<Arc<dyn hypervisor::Hypervisor>> { 1092 let kvm_obj = Kvm::new().map_err(|e| hypervisor::HypervisorError::VmCreate(e.into()))?; 1093 let api_version = kvm_obj.get_api_version(); 1094 1095 if api_version != kvm_bindings::KVM_API_VERSION as i32 { 1096 return Err(hypervisor::HypervisorError::IncompatibleApiVersion); 1097 } 1098 1099 Ok(Arc::new(KvmHypervisor { kvm: kvm_obj })) 1100 } 1101 1102 /// Check if the hypervisor is available 1103 pub fn is_available() -> hypervisor::Result<bool> { 1104 match std::fs::metadata("/dev/kvm") { 1105 Ok(_) => Ok(true), 1106 Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(false), 1107 Err(err) => Err(hypervisor::HypervisorError::HypervisorAvailableCheck( 1108 err.into(), 1109 )), 1110 } 1111 } 1112 } 1113 1114 /// Implementation of Hypervisor trait for KVM 1115 /// 1116 /// # Examples 1117 /// 1118 /// ``` 1119 /// # use hypervisor::kvm::KvmHypervisor; 1120 /// # use std::sync::Arc; 1121 /// let kvm = KvmHypervisor::new().unwrap(); 1122 /// let hypervisor = Arc::new(kvm); 1123 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed"); 1124 /// ``` 1125 impl hypervisor::Hypervisor for KvmHypervisor { 1126 /// 1127 /// Returns the type of the hypervisor 1128 /// 1129 fn hypervisor_type(&self) -> HypervisorType { 1130 HypervisorType::Kvm 1131 } 1132 1133 /// 1134 /// Create a Vm of a specific type using the underlying hypervisor, passing memory size 1135 /// Return a hypervisor-agnostic Vm trait object 1136 /// 1137 /// # Examples 1138 /// 1139 /// ``` 1140 /// # use hypervisor::kvm::KvmHypervisor; 1141 /// use hypervisor::kvm::KvmVm; 1142 /// let hypervisor = KvmHypervisor::new().unwrap(); 1143 /// let vm = hypervisor.create_vm_with_type_and_memory(0).unwrap(); 1144 /// ``` 1145 fn create_vm_with_type_and_memory( 1146 &self, 1147 vm_type: u64, 1148 #[cfg(feature = "sev_snp")] _mem_size: u64, 1149 ) -> hypervisor::Result<Arc<dyn vm::Vm>> { 1150 self.create_vm_with_type(vm_type) 1151 } 1152 1153 /// Create a KVM vm object of a specific VM type and return the object as Vm trait object 1154 /// 1155 /// # Examples 1156 /// 1157 /// ``` 1158 /// # use hypervisor::kvm::KvmHypervisor; 1159 /// use hypervisor::kvm::KvmVm; 1160 /// let hypervisor = KvmHypervisor::new().unwrap(); 1161 /// let vm = hypervisor.create_vm_with_type(0).unwrap(); 1162 /// ``` 1163 fn create_vm_with_type(&self, vm_type: u64) -> hypervisor::Result<Arc<dyn vm::Vm>> { 1164 let fd: VmFd; 1165 loop { 1166 match self.kvm.create_vm_with_type(vm_type) { 1167 Ok(res) => fd = res, 1168 Err(e) => { 1169 if e.errno() == libc::EINTR { 1170 // If the error returned is EINTR, which means the 1171 // ioctl has been interrupted, we have to retry as 1172 // this can't be considered as a regular error. 1173 continue; 1174 } else { 1175 return Err(hypervisor::HypervisorError::VmCreate(e.into())); 1176 } 1177 } 1178 } 1179 break; 1180 } 1181 1182 let vm_fd = Arc::new(fd); 1183 1184 #[cfg(target_arch = "x86_64")] 1185 { 1186 let msr_list = self.get_msr_list()?; 1187 let num_msrs = msr_list.as_fam_struct_ref().nmsrs as usize; 1188 let mut msrs: Vec<MsrEntry> = vec![ 1189 MsrEntry { 1190 ..Default::default() 1191 }; 1192 num_msrs 1193 ]; 1194 let indices = msr_list.as_slice(); 1195 for (pos, index) in indices.iter().enumerate() { 1196 msrs[pos].index = *index; 1197 } 1198 1199 Ok(Arc::new(KvmVm { 1200 fd: vm_fd, 1201 msrs, 1202 dirty_log_slots: Arc::new(RwLock::new(HashMap::new())), 1203 })) 1204 } 1205 1206 #[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))] 1207 { 1208 Ok(Arc::new(KvmVm { 1209 fd: vm_fd, 1210 dirty_log_slots: Arc::new(RwLock::new(HashMap::new())), 1211 })) 1212 } 1213 } 1214 1215 /// Create a KVM vm object and return the object as Vm trait object 1216 /// 1217 /// # Examples 1218 /// 1219 /// ``` 1220 /// # use hypervisor::kvm::KvmHypervisor; 1221 /// use hypervisor::kvm::KvmVm; 1222 /// let hypervisor = KvmHypervisor::new().unwrap(); 1223 /// let vm = hypervisor.create_vm().unwrap(); 1224 /// ``` 1225 fn create_vm(&self) -> hypervisor::Result<Arc<dyn vm::Vm>> { 1226 #[allow(unused_mut)] 1227 let mut vm_type: u64 = 0; // Create with default platform type 1228 1229 // When KVM supports Cap::ArmVmIPASize, it is better to get the IPA 1230 // size from the host and use that when creating the VM, which may 1231 // avoid unnecessary VM creation failures. 1232 #[cfg(target_arch = "aarch64")] 1233 if self.kvm.check_extension(Cap::ArmVmIPASize) { 1234 vm_type = self.kvm.get_host_ipa_limit().try_into().unwrap(); 1235 } 1236 1237 self.create_vm_with_type(vm_type) 1238 } 1239 1240 fn check_required_extensions(&self) -> hypervisor::Result<()> { 1241 check_required_kvm_extensions(&self.kvm) 1242 .map_err(|e| hypervisor::HypervisorError::CheckExtensions(e.into())) 1243 } 1244 1245 #[cfg(target_arch = "x86_64")] 1246 /// 1247 /// X86 specific call to get the system supported CPUID values. 1248 /// 1249 fn get_supported_cpuid(&self) -> hypervisor::Result<Vec<CpuIdEntry>> { 1250 let kvm_cpuid = self 1251 .kvm 1252 .get_supported_cpuid(kvm_bindings::KVM_MAX_CPUID_ENTRIES) 1253 .map_err(|e| hypervisor::HypervisorError::GetCpuId(e.into()))?; 1254 1255 let v = kvm_cpuid.as_slice().iter().map(|e| (*e).into()).collect(); 1256 1257 Ok(v) 1258 } 1259 1260 #[cfg(target_arch = "aarch64")] 1261 /// 1262 /// Retrieve AArch64 host maximum IPA size supported by KVM. 1263 /// 1264 fn get_host_ipa_limit(&self) -> i32 { 1265 self.kvm.get_host_ipa_limit() 1266 } 1267 1268 /// 1269 /// Retrieve TDX capabilities 1270 /// 1271 #[cfg(feature = "tdx")] 1272 fn tdx_capabilities(&self) -> hypervisor::Result<TdxCapabilities> { 1273 let data = TdxCapabilities { 1274 nr_cpuid_configs: TDX_MAX_NR_CPUID_CONFIGS as u32, 1275 ..Default::default() 1276 }; 1277 1278 tdx_command( 1279 &self.kvm.as_raw_fd(), 1280 TdxCommand::Capabilities, 1281 0, 1282 &data as *const _ as u64, 1283 ) 1284 .map_err(|e| hypervisor::HypervisorError::TdxCapabilities(e.into()))?; 1285 1286 Ok(data) 1287 } 1288 1289 #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] 1290 /// 1291 /// Get the number of supported hardware breakpoints 1292 /// 1293 fn get_guest_debug_hw_bps(&self) -> usize { 1294 #[cfg(target_arch = "x86_64")] 1295 { 1296 4 1297 } 1298 #[cfg(target_arch = "aarch64")] 1299 { 1300 self.kvm.get_guest_debug_hw_bps() as usize 1301 } 1302 } 1303 1304 /// Get maximum number of vCPUs 1305 fn get_max_vcpus(&self) -> u32 { 1306 self.kvm.get_max_vcpus().min(u32::MAX as usize) as u32 1307 } 1308 } 1309 1310 /// Vcpu struct for KVM 1311 pub struct KvmVcpu { 1312 fd: Arc<Mutex<VcpuFd>>, 1313 #[cfg(target_arch = "x86_64")] 1314 msrs: Vec<MsrEntry>, 1315 vm_ops: Option<Arc<dyn vm::VmOps>>, 1316 #[cfg(target_arch = "x86_64")] 1317 hyperv_synic: AtomicBool, 1318 } 1319 1320 /// Implementation of Vcpu trait for KVM 1321 /// 1322 /// # Examples 1323 /// 1324 /// ``` 1325 /// # use hypervisor::kvm::KvmHypervisor; 1326 /// # use std::sync::Arc; 1327 /// let kvm = KvmHypervisor::new().unwrap(); 1328 /// let hypervisor = Arc::new(kvm); 1329 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed"); 1330 /// let vcpu = vm.create_vcpu(0, None).unwrap(); 1331 /// ``` 1332 impl cpu::Vcpu for KvmVcpu { 1333 /// 1334 /// Returns StandardRegisters with default value set 1335 /// 1336 fn create_standard_regs(&self) -> StandardRegisters { 1337 #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] 1338 { 1339 kvm_bindings::kvm_regs::default().into() 1340 } 1341 #[cfg(target_arch = "riscv64")] 1342 { 1343 kvm_bindings::kvm_riscv_core::default().into() 1344 } 1345 } 1346 #[cfg(target_arch = "x86_64")] 1347 /// 1348 /// Returns the vCPU general purpose registers. 1349 /// 1350 fn get_regs(&self) -> cpu::Result<StandardRegisters> { 1351 Ok(self 1352 .fd 1353 .lock() 1354 .unwrap() 1355 .get_regs() 1356 .map_err(|e| cpu::HypervisorCpuError::GetStandardRegs(e.into()))? 1357 .into()) 1358 } 1359 1360 /// 1361 /// Returns the vCPU general purpose registers. 1362 /// The `KVM_GET_REGS` ioctl is not available on AArch64, `KVM_GET_ONE_REG` 1363 /// is used to get registers one by one. 1364 /// 1365 #[cfg(target_arch = "aarch64")] 1366 fn get_regs(&self) -> cpu::Result<StandardRegisters> { 1367 let mut state = kvm_regs::default(); 1368 let mut off = offset_of!(user_pt_regs, regs); 1369 // There are 31 user_pt_regs: 1370 // https://elixir.free-electrons.com/linux/v4.14.174/source/arch/arm64/include/uapi/asm/ptrace.h#L72 1371 // These actually are the general-purpose registers of the Armv8-a 1372 // architecture (i.e x0-x30 if used as a 64bit register or w0-30 when used as a 32bit register). 1373 for i in 0..31 { 1374 let mut bytes = [0_u8; 8]; 1375 self.fd 1376 .lock() 1377 .unwrap() 1378 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), &mut bytes) 1379 .map_err(|e| cpu::HypervisorCpuError::GetAarchCoreRegister(e.into()))?; 1380 state.regs.regs[i] = u64::from_le_bytes(bytes); 1381 off += std::mem::size_of::<u64>(); 1382 } 1383 1384 // We are now entering the "Other register" section of the ARMv8-a architecture. 1385 // First one, stack pointer. 1386 let off = offset_of!(user_pt_regs, sp); 1387 let mut bytes = [0_u8; 8]; 1388 self.fd 1389 .lock() 1390 .unwrap() 1391 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), &mut bytes) 1392 .map_err(|e| cpu::HypervisorCpuError::GetAarchCoreRegister(e.into()))?; 1393 state.regs.sp = u64::from_le_bytes(bytes); 1394 1395 // Second one, the program counter. 1396 let off = offset_of!(user_pt_regs, pc); 1397 let mut bytes = [0_u8; 8]; 1398 self.fd 1399 .lock() 1400 .unwrap() 1401 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), &mut bytes) 1402 .map_err(|e| cpu::HypervisorCpuError::GetAarchCoreRegister(e.into()))?; 1403 state.regs.pc = u64::from_le_bytes(bytes); 1404 1405 // Next is the processor state. 1406 let off = offset_of!(user_pt_regs, pstate); 1407 let mut bytes = [0_u8; 8]; 1408 self.fd 1409 .lock() 1410 .unwrap() 1411 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), &mut bytes) 1412 .map_err(|e| cpu::HypervisorCpuError::GetAarchCoreRegister(e.into()))?; 1413 state.regs.pstate = u64::from_le_bytes(bytes); 1414 1415 // The stack pointer associated with EL1 1416 let off = offset_of!(kvm_regs, sp_el1); 1417 let mut bytes = [0_u8; 8]; 1418 self.fd 1419 .lock() 1420 .unwrap() 1421 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), &mut bytes) 1422 .map_err(|e| cpu::HypervisorCpuError::GetAarchCoreRegister(e.into()))?; 1423 state.sp_el1 = u64::from_le_bytes(bytes); 1424 1425 // Exception Link Register for EL1, when taking an exception to EL1, this register 1426 // holds the address to which to return afterwards. 1427 let off = offset_of!(kvm_regs, elr_el1); 1428 let mut bytes = [0_u8; 8]; 1429 self.fd 1430 .lock() 1431 .unwrap() 1432 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), &mut bytes) 1433 .map_err(|e| cpu::HypervisorCpuError::GetAarchCoreRegister(e.into()))?; 1434 state.elr_el1 = u64::from_le_bytes(bytes); 1435 1436 // Saved Program Status Registers, there are 5 of them used in the kernel. 1437 let mut off = offset_of!(kvm_regs, spsr); 1438 for i in 0..KVM_NR_SPSR as usize { 1439 let mut bytes = [0_u8; 8]; 1440 self.fd 1441 .lock() 1442 .unwrap() 1443 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), &mut bytes) 1444 .map_err(|e| cpu::HypervisorCpuError::GetAarchCoreRegister(e.into()))?; 1445 state.spsr[i] = u64::from_le_bytes(bytes); 1446 off += std::mem::size_of::<u64>(); 1447 } 1448 1449 // Now moving on to floating point registers which are stored in the user_fpsimd_state in the kernel: 1450 // https://elixir.free-electrons.com/linux/v4.9.62/source/arch/arm64/include/uapi/asm/kvm.h#L53 1451 let mut off = offset_of!(kvm_regs, fp_regs) + offset_of!(user_fpsimd_state, vregs); 1452 for i in 0..32 { 1453 let mut bytes = [0_u8; 16]; 1454 self.fd 1455 .lock() 1456 .unwrap() 1457 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U128, off), &mut bytes) 1458 .map_err(|e| cpu::HypervisorCpuError::GetAarchCoreRegister(e.into()))?; 1459 state.fp_regs.vregs[i] = u128::from_le_bytes(bytes); 1460 off += mem::size_of::<u128>(); 1461 } 1462 1463 // Floating-point Status Register 1464 let off = offset_of!(kvm_regs, fp_regs) + offset_of!(user_fpsimd_state, fpsr); 1465 let mut bytes = [0_u8; 4]; 1466 self.fd 1467 .lock() 1468 .unwrap() 1469 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U32, off), &mut bytes) 1470 .map_err(|e| cpu::HypervisorCpuError::GetAarchCoreRegister(e.into()))?; 1471 state.fp_regs.fpsr = u32::from_le_bytes(bytes); 1472 1473 // Floating-point Control Register 1474 let off = offset_of!(kvm_regs, fp_regs) + offset_of!(user_fpsimd_state, fpcr); 1475 let mut bytes = [0_u8; 4]; 1476 self.fd 1477 .lock() 1478 .unwrap() 1479 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U32, off), &mut bytes) 1480 .map_err(|e| cpu::HypervisorCpuError::GetAarchCoreRegister(e.into()))?; 1481 state.fp_regs.fpcr = u32::from_le_bytes(bytes); 1482 Ok(state.into()) 1483 } 1484 1485 #[cfg(target_arch = "riscv64")] 1486 /// 1487 /// Returns the RISC-V vCPU core registers. 1488 /// The `KVM_GET_REGS` ioctl is not available on RISC-V 64-bit, 1489 /// `KVM_GET_ONE_REG` is used to get registers one by one. 1490 /// 1491 fn get_regs(&self) -> cpu::Result<StandardRegisters> { 1492 let mut state = kvm_riscv_core::default(); 1493 1494 /// Macro used to extract RISC-V register data from KVM Vcpu according 1495 /// to `$reg_name` provided to `state`. 1496 macro_rules! riscv64_get_one_reg_from_vcpu { 1497 (mode) => { 1498 let off = offset_of!(kvm_riscv_core, mode); 1499 let mut bytes = [0_u8; 8]; 1500 self.fd 1501 .lock() 1502 .unwrap() 1503 .get_one_reg(riscv64_reg_id!(KVM_REG_RISCV_CORE, off), &mut bytes) 1504 .map_err(|e| cpu::HypervisorCpuError::GetRiscvCoreRegister(e.into()))?; 1505 state.mode = u64::from_le_bytes(bytes); 1506 }; 1507 ($reg_name:ident) => { 1508 let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, $reg_name); 1509 let mut bytes = [0_u8; 8]; 1510 self.fd 1511 .lock() 1512 .unwrap() 1513 .get_one_reg(riscv64_reg_id!(KVM_REG_RISCV_CORE, off), &mut bytes) 1514 .map_err(|e| cpu::HypervisorCpuError::GetRiscvCoreRegister(e.into()))?; 1515 state.regs.$reg_name = u64::from_le_bytes(bytes); 1516 }; 1517 } 1518 1519 riscv64_get_one_reg_from_vcpu!(pc); 1520 riscv64_get_one_reg_from_vcpu!(ra); 1521 riscv64_get_one_reg_from_vcpu!(sp); 1522 riscv64_get_one_reg_from_vcpu!(gp); 1523 riscv64_get_one_reg_from_vcpu!(tp); 1524 riscv64_get_one_reg_from_vcpu!(t0); 1525 riscv64_get_one_reg_from_vcpu!(t1); 1526 riscv64_get_one_reg_from_vcpu!(t2); 1527 riscv64_get_one_reg_from_vcpu!(s0); 1528 riscv64_get_one_reg_from_vcpu!(s1); 1529 riscv64_get_one_reg_from_vcpu!(a0); 1530 riscv64_get_one_reg_from_vcpu!(a1); 1531 riscv64_get_one_reg_from_vcpu!(a2); 1532 riscv64_get_one_reg_from_vcpu!(a3); 1533 riscv64_get_one_reg_from_vcpu!(a4); 1534 riscv64_get_one_reg_from_vcpu!(a5); 1535 riscv64_get_one_reg_from_vcpu!(a6); 1536 riscv64_get_one_reg_from_vcpu!(a7); 1537 riscv64_get_one_reg_from_vcpu!(s2); 1538 riscv64_get_one_reg_from_vcpu!(s3); 1539 riscv64_get_one_reg_from_vcpu!(s4); 1540 riscv64_get_one_reg_from_vcpu!(s5); 1541 riscv64_get_one_reg_from_vcpu!(s6); 1542 riscv64_get_one_reg_from_vcpu!(s7); 1543 riscv64_get_one_reg_from_vcpu!(s8); 1544 riscv64_get_one_reg_from_vcpu!(s9); 1545 riscv64_get_one_reg_from_vcpu!(s10); 1546 riscv64_get_one_reg_from_vcpu!(s11); 1547 riscv64_get_one_reg_from_vcpu!(t3); 1548 riscv64_get_one_reg_from_vcpu!(t4); 1549 riscv64_get_one_reg_from_vcpu!(t5); 1550 riscv64_get_one_reg_from_vcpu!(t6); 1551 riscv64_get_one_reg_from_vcpu!(mode); 1552 1553 Ok(state.into()) 1554 } 1555 1556 #[cfg(target_arch = "x86_64")] 1557 /// 1558 /// Sets the vCPU general purpose registers using the `KVM_SET_REGS` ioctl. 1559 /// 1560 fn set_regs(&self, regs: &StandardRegisters) -> cpu::Result<()> { 1561 let regs = (*regs).into(); 1562 self.fd 1563 .lock() 1564 .unwrap() 1565 .set_regs(®s) 1566 .map_err(|e| cpu::HypervisorCpuError::SetStandardRegs(e.into())) 1567 } 1568 1569 /// 1570 /// Sets the vCPU general purpose registers. 1571 /// The `KVM_SET_REGS` ioctl is not available on AArch64, `KVM_SET_ONE_REG` 1572 /// is used to set registers one by one. 1573 /// 1574 #[cfg(target_arch = "aarch64")] 1575 fn set_regs(&self, state: &StandardRegisters) -> cpu::Result<()> { 1576 // The function follows the exact identical order from `state`. Look there 1577 // for some additional info on registers. 1578 let kvm_regs_state: kvm_regs = (*state).into(); 1579 let mut off = offset_of!(user_pt_regs, regs); 1580 for i in 0..31 { 1581 self.fd 1582 .lock() 1583 .unwrap() 1584 .set_one_reg( 1585 arm64_core_reg_id!(KVM_REG_SIZE_U64, off), 1586 &kvm_regs_state.regs.regs[i].to_le_bytes(), 1587 ) 1588 .map_err(|e| cpu::HypervisorCpuError::SetAarchCoreRegister(e.into()))?; 1589 off += std::mem::size_of::<u64>(); 1590 } 1591 1592 let off = offset_of!(user_pt_regs, sp); 1593 self.fd 1594 .lock() 1595 .unwrap() 1596 .set_one_reg( 1597 arm64_core_reg_id!(KVM_REG_SIZE_U64, off), 1598 &kvm_regs_state.regs.sp.to_le_bytes(), 1599 ) 1600 .map_err(|e| cpu::HypervisorCpuError::SetAarchCoreRegister(e.into()))?; 1601 1602 let off = offset_of!(user_pt_regs, pc); 1603 self.fd 1604 .lock() 1605 .unwrap() 1606 .set_one_reg( 1607 arm64_core_reg_id!(KVM_REG_SIZE_U64, off), 1608 &kvm_regs_state.regs.pc.to_le_bytes(), 1609 ) 1610 .map_err(|e| cpu::HypervisorCpuError::SetAarchCoreRegister(e.into()))?; 1611 1612 let off = offset_of!(user_pt_regs, pstate); 1613 self.fd 1614 .lock() 1615 .unwrap() 1616 .set_one_reg( 1617 arm64_core_reg_id!(KVM_REG_SIZE_U64, off), 1618 &kvm_regs_state.regs.pstate.to_le_bytes(), 1619 ) 1620 .map_err(|e| cpu::HypervisorCpuError::SetAarchCoreRegister(e.into()))?; 1621 1622 let off = offset_of!(kvm_regs, sp_el1); 1623 self.fd 1624 .lock() 1625 .unwrap() 1626 .set_one_reg( 1627 arm64_core_reg_id!(KVM_REG_SIZE_U64, off), 1628 &kvm_regs_state.sp_el1.to_le_bytes(), 1629 ) 1630 .map_err(|e| cpu::HypervisorCpuError::SetAarchCoreRegister(e.into()))?; 1631 1632 let off = offset_of!(kvm_regs, elr_el1); 1633 self.fd 1634 .lock() 1635 .unwrap() 1636 .set_one_reg( 1637 arm64_core_reg_id!(KVM_REG_SIZE_U64, off), 1638 &kvm_regs_state.elr_el1.to_le_bytes(), 1639 ) 1640 .map_err(|e| cpu::HypervisorCpuError::SetAarchCoreRegister(e.into()))?; 1641 1642 let mut off = offset_of!(kvm_regs, spsr); 1643 for i in 0..KVM_NR_SPSR as usize { 1644 self.fd 1645 .lock() 1646 .unwrap() 1647 .set_one_reg( 1648 arm64_core_reg_id!(KVM_REG_SIZE_U64, off), 1649 &kvm_regs_state.spsr[i].to_le_bytes(), 1650 ) 1651 .map_err(|e| cpu::HypervisorCpuError::SetAarchCoreRegister(e.into()))?; 1652 off += std::mem::size_of::<u64>(); 1653 } 1654 1655 let mut off = offset_of!(kvm_regs, fp_regs) + offset_of!(user_fpsimd_state, vregs); 1656 for i in 0..32 { 1657 self.fd 1658 .lock() 1659 .unwrap() 1660 .set_one_reg( 1661 arm64_core_reg_id!(KVM_REG_SIZE_U128, off), 1662 &kvm_regs_state.fp_regs.vregs[i].to_le_bytes(), 1663 ) 1664 .map_err(|e| cpu::HypervisorCpuError::SetAarchCoreRegister(e.into()))?; 1665 off += mem::size_of::<u128>(); 1666 } 1667 1668 let off = offset_of!(kvm_regs, fp_regs) + offset_of!(user_fpsimd_state, fpsr); 1669 self.fd 1670 .lock() 1671 .unwrap() 1672 .set_one_reg( 1673 arm64_core_reg_id!(KVM_REG_SIZE_U32, off), 1674 &kvm_regs_state.fp_regs.fpsr.to_le_bytes(), 1675 ) 1676 .map_err(|e| cpu::HypervisorCpuError::SetAarchCoreRegister(e.into()))?; 1677 1678 let off = offset_of!(kvm_regs, fp_regs) + offset_of!(user_fpsimd_state, fpcr); 1679 self.fd 1680 .lock() 1681 .unwrap() 1682 .set_one_reg( 1683 arm64_core_reg_id!(KVM_REG_SIZE_U32, off), 1684 &kvm_regs_state.fp_regs.fpcr.to_le_bytes(), 1685 ) 1686 .map_err(|e| cpu::HypervisorCpuError::SetAarchCoreRegister(e.into()))?; 1687 Ok(()) 1688 } 1689 1690 #[cfg(target_arch = "riscv64")] 1691 /// 1692 /// Sets the RISC-V vCPU core registers. 1693 /// The `KVM_SET_REGS` ioctl is not available on RISC-V 64-bit, 1694 /// `KVM_SET_ONE_REG` is used to set registers one by one. 1695 /// 1696 fn set_regs(&self, state: &StandardRegisters) -> cpu::Result<()> { 1697 // The function follows the exact identical order from `state`. Look there 1698 // for some additional info on registers. 1699 let kvm_regs_state: kvm_riscv_core = (*state).into(); 1700 1701 /// Macro used to set value of specific RISC-V `$reg_name` stored in 1702 /// `state` to KVM Vcpu. 1703 macro_rules! riscv64_set_one_reg_to_vcpu { 1704 (mode) => { 1705 let off = offset_of!(kvm_riscv_core, mode); 1706 self.fd 1707 .lock() 1708 .unwrap() 1709 .set_one_reg( 1710 riscv64_reg_id!(KVM_REG_RISCV_CORE, off), 1711 &kvm_regs_state.mode.to_le_bytes(), 1712 ) 1713 .map_err(|e| cpu::HypervisorCpuError::SetRiscvCoreRegister(e.into()))?; 1714 }; 1715 ($reg_name:ident) => { 1716 let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, $reg_name); 1717 self.fd 1718 .lock() 1719 .unwrap() 1720 .set_one_reg( 1721 riscv64_reg_id!(KVM_REG_RISCV_CORE, off), 1722 &kvm_regs_state.regs.$reg_name.to_le_bytes(), 1723 ) 1724 .map_err(|e| cpu::HypervisorCpuError::SetRiscvCoreRegister(e.into()))?; 1725 }; 1726 } 1727 1728 riscv64_set_one_reg_to_vcpu!(pc); 1729 riscv64_set_one_reg_to_vcpu!(ra); 1730 riscv64_set_one_reg_to_vcpu!(sp); 1731 riscv64_set_one_reg_to_vcpu!(gp); 1732 riscv64_set_one_reg_to_vcpu!(tp); 1733 riscv64_set_one_reg_to_vcpu!(t0); 1734 riscv64_set_one_reg_to_vcpu!(t1); 1735 riscv64_set_one_reg_to_vcpu!(t2); 1736 riscv64_set_one_reg_to_vcpu!(s0); 1737 riscv64_set_one_reg_to_vcpu!(s1); 1738 riscv64_set_one_reg_to_vcpu!(a0); 1739 riscv64_set_one_reg_to_vcpu!(a1); 1740 riscv64_set_one_reg_to_vcpu!(a2); 1741 riscv64_set_one_reg_to_vcpu!(a3); 1742 riscv64_set_one_reg_to_vcpu!(a4); 1743 riscv64_set_one_reg_to_vcpu!(a5); 1744 riscv64_set_one_reg_to_vcpu!(a6); 1745 riscv64_set_one_reg_to_vcpu!(a7); 1746 riscv64_set_one_reg_to_vcpu!(s2); 1747 riscv64_set_one_reg_to_vcpu!(s3); 1748 riscv64_set_one_reg_to_vcpu!(s4); 1749 riscv64_set_one_reg_to_vcpu!(s5); 1750 riscv64_set_one_reg_to_vcpu!(s6); 1751 riscv64_set_one_reg_to_vcpu!(s7); 1752 riscv64_set_one_reg_to_vcpu!(s8); 1753 riscv64_set_one_reg_to_vcpu!(s9); 1754 riscv64_set_one_reg_to_vcpu!(s10); 1755 riscv64_set_one_reg_to_vcpu!(s11); 1756 riscv64_set_one_reg_to_vcpu!(t3); 1757 riscv64_set_one_reg_to_vcpu!(t4); 1758 riscv64_set_one_reg_to_vcpu!(t5); 1759 riscv64_set_one_reg_to_vcpu!(t6); 1760 riscv64_set_one_reg_to_vcpu!(mode); 1761 1762 Ok(()) 1763 } 1764 1765 #[cfg(target_arch = "x86_64")] 1766 /// 1767 /// Returns the vCPU special registers. 1768 /// 1769 fn get_sregs(&self) -> cpu::Result<SpecialRegisters> { 1770 Ok(self 1771 .fd 1772 .lock() 1773 .unwrap() 1774 .get_sregs() 1775 .map_err(|e| cpu::HypervisorCpuError::GetSpecialRegs(e.into()))? 1776 .into()) 1777 } 1778 1779 #[cfg(target_arch = "x86_64")] 1780 /// 1781 /// Sets the vCPU special registers using the `KVM_SET_SREGS` ioctl. 1782 /// 1783 fn set_sregs(&self, sregs: &SpecialRegisters) -> cpu::Result<()> { 1784 let sregs = (*sregs).into(); 1785 self.fd 1786 .lock() 1787 .unwrap() 1788 .set_sregs(&sregs) 1789 .map_err(|e| cpu::HypervisorCpuError::SetSpecialRegs(e.into())) 1790 } 1791 1792 #[cfg(target_arch = "x86_64")] 1793 /// 1794 /// Returns the floating point state (FPU) from the vCPU. 1795 /// 1796 fn get_fpu(&self) -> cpu::Result<FpuState> { 1797 Ok(self 1798 .fd 1799 .lock() 1800 .unwrap() 1801 .get_fpu() 1802 .map_err(|e| cpu::HypervisorCpuError::GetFloatingPointRegs(e.into()))? 1803 .into()) 1804 } 1805 1806 #[cfg(target_arch = "x86_64")] 1807 /// 1808 /// Set the floating point state (FPU) of a vCPU using the `KVM_SET_FPU` ioctl. 1809 /// 1810 fn set_fpu(&self, fpu: &FpuState) -> cpu::Result<()> { 1811 let fpu: kvm_bindings::kvm_fpu = (*fpu).clone().into(); 1812 self.fd 1813 .lock() 1814 .unwrap() 1815 .set_fpu(&fpu) 1816 .map_err(|e| cpu::HypervisorCpuError::SetFloatingPointRegs(e.into())) 1817 } 1818 1819 #[cfg(target_arch = "x86_64")] 1820 /// 1821 /// X86 specific call to setup the CPUID registers. 1822 /// 1823 fn set_cpuid2(&self, cpuid: &[CpuIdEntry]) -> cpu::Result<()> { 1824 let cpuid: Vec<kvm_bindings::kvm_cpuid_entry2> = 1825 cpuid.iter().map(|e| (*e).into()).collect(); 1826 let kvm_cpuid = <CpuId>::from_entries(&cpuid) 1827 .map_err(|_| cpu::HypervisorCpuError::SetCpuid(anyhow!("failed to create CpuId")))?; 1828 1829 self.fd 1830 .lock() 1831 .unwrap() 1832 .set_cpuid2(&kvm_cpuid) 1833 .map_err(|e| cpu::HypervisorCpuError::SetCpuid(e.into())) 1834 } 1835 1836 #[cfg(target_arch = "x86_64")] 1837 /// 1838 /// X86 specific call to enable HyperV SynIC 1839 /// 1840 fn enable_hyperv_synic(&self) -> cpu::Result<()> { 1841 // Update the information about Hyper-V SynIC being enabled and 1842 // emulated as it will influence later which MSRs should be saved. 1843 self.hyperv_synic.store(true, Ordering::Release); 1844 1845 let cap = kvm_enable_cap { 1846 cap: KVM_CAP_HYPERV_SYNIC, 1847 ..Default::default() 1848 }; 1849 self.fd 1850 .lock() 1851 .unwrap() 1852 .enable_cap(&cap) 1853 .map_err(|e| cpu::HypervisorCpuError::EnableHyperVSyncIc(e.into())) 1854 } 1855 1856 /// 1857 /// X86 specific call to retrieve the CPUID registers. 1858 /// 1859 #[cfg(target_arch = "x86_64")] 1860 fn get_cpuid2(&self, num_entries: usize) -> cpu::Result<Vec<CpuIdEntry>> { 1861 let kvm_cpuid = self 1862 .fd 1863 .lock() 1864 .unwrap() 1865 .get_cpuid2(num_entries) 1866 .map_err(|e| cpu::HypervisorCpuError::GetCpuid(e.into()))?; 1867 1868 let v = kvm_cpuid.as_slice().iter().map(|e| (*e).into()).collect(); 1869 1870 Ok(v) 1871 } 1872 1873 #[cfg(target_arch = "x86_64")] 1874 /// 1875 /// Returns the state of the LAPIC (Local Advanced Programmable Interrupt Controller). 1876 /// 1877 fn get_lapic(&self) -> cpu::Result<LapicState> { 1878 Ok(self 1879 .fd 1880 .lock() 1881 .unwrap() 1882 .get_lapic() 1883 .map_err(|e| cpu::HypervisorCpuError::GetlapicState(e.into()))? 1884 .into()) 1885 } 1886 1887 #[cfg(target_arch = "x86_64")] 1888 /// 1889 /// Sets the state of the LAPIC (Local Advanced Programmable Interrupt Controller). 1890 /// 1891 fn set_lapic(&self, klapic: &LapicState) -> cpu::Result<()> { 1892 let klapic: kvm_bindings::kvm_lapic_state = (*klapic).clone().into(); 1893 self.fd 1894 .lock() 1895 .unwrap() 1896 .set_lapic(&klapic) 1897 .map_err(|e| cpu::HypervisorCpuError::SetLapicState(e.into())) 1898 } 1899 1900 #[cfg(target_arch = "x86_64")] 1901 /// 1902 /// Returns the model-specific registers (MSR) for this vCPU. 1903 /// 1904 fn get_msrs(&self, msrs: &mut Vec<MsrEntry>) -> cpu::Result<usize> { 1905 let kvm_msrs: Vec<kvm_msr_entry> = msrs.iter().map(|e| (*e).into()).collect(); 1906 let mut kvm_msrs = MsrEntries::from_entries(&kvm_msrs).unwrap(); 1907 let succ = self 1908 .fd 1909 .lock() 1910 .unwrap() 1911 .get_msrs(&mut kvm_msrs) 1912 .map_err(|e| cpu::HypervisorCpuError::GetMsrEntries(e.into()))?; 1913 1914 msrs[..succ].copy_from_slice( 1915 &kvm_msrs.as_slice()[..succ] 1916 .iter() 1917 .map(|e| (*e).into()) 1918 .collect::<Vec<MsrEntry>>(), 1919 ); 1920 1921 Ok(succ) 1922 } 1923 1924 #[cfg(target_arch = "x86_64")] 1925 /// 1926 /// Setup the model-specific registers (MSR) for this vCPU. 1927 /// Returns the number of MSR entries actually written. 1928 /// 1929 fn set_msrs(&self, msrs: &[MsrEntry]) -> cpu::Result<usize> { 1930 let kvm_msrs: Vec<kvm_msr_entry> = msrs.iter().map(|e| (*e).into()).collect(); 1931 let kvm_msrs = MsrEntries::from_entries(&kvm_msrs).unwrap(); 1932 self.fd 1933 .lock() 1934 .unwrap() 1935 .set_msrs(&kvm_msrs) 1936 .map_err(|e| cpu::HypervisorCpuError::SetMsrEntries(e.into())) 1937 } 1938 1939 /// 1940 /// Returns the vcpu's current "multiprocessing state". 1941 /// 1942 fn get_mp_state(&self) -> cpu::Result<MpState> { 1943 Ok(self 1944 .fd 1945 .lock() 1946 .unwrap() 1947 .get_mp_state() 1948 .map_err(|e| cpu::HypervisorCpuError::GetMpState(e.into()))? 1949 .into()) 1950 } 1951 1952 /// 1953 /// Sets the vcpu's current "multiprocessing state". 1954 /// 1955 fn set_mp_state(&self, mp_state: MpState) -> cpu::Result<()> { 1956 self.fd 1957 .lock() 1958 .unwrap() 1959 .set_mp_state(mp_state.into()) 1960 .map_err(|e| cpu::HypervisorCpuError::SetMpState(e.into())) 1961 } 1962 1963 #[cfg(target_arch = "x86_64")] 1964 /// 1965 /// Translates guest virtual address to guest physical address using the `KVM_TRANSLATE` ioctl. 1966 /// 1967 fn translate_gva(&self, gva: u64, _flags: u64) -> cpu::Result<(u64, u32)> { 1968 let tr = self 1969 .fd 1970 .lock() 1971 .unwrap() 1972 .translate_gva(gva) 1973 .map_err(|e| cpu::HypervisorCpuError::TranslateVirtualAddress(e.into()))?; 1974 // tr.valid is set if the GVA is mapped to valid GPA. 1975 match tr.valid { 1976 0 => Err(cpu::HypervisorCpuError::TranslateVirtualAddress(anyhow!( 1977 "Invalid GVA: {:#x}", 1978 gva 1979 ))), 1980 _ => Ok((tr.physical_address, 0)), 1981 } 1982 } 1983 1984 /// 1985 /// Triggers the running of the current virtual CPU returning an exit reason. 1986 /// 1987 fn run(&self) -> std::result::Result<cpu::VmExit, cpu::HypervisorCpuError> { 1988 match self.fd.lock().unwrap().run() { 1989 Ok(run) => match run { 1990 #[cfg(target_arch = "x86_64")] 1991 VcpuExit::IoIn(addr, data) => { 1992 if let Some(vm_ops) = &self.vm_ops { 1993 return vm_ops 1994 .pio_read(addr.into(), data) 1995 .map(|_| cpu::VmExit::Ignore) 1996 .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into())); 1997 } 1998 1999 Ok(cpu::VmExit::Ignore) 2000 } 2001 #[cfg(target_arch = "x86_64")] 2002 VcpuExit::IoOut(addr, data) => { 2003 if let Some(vm_ops) = &self.vm_ops { 2004 return vm_ops 2005 .pio_write(addr.into(), data) 2006 .map(|_| cpu::VmExit::Ignore) 2007 .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into())); 2008 } 2009 2010 Ok(cpu::VmExit::Ignore) 2011 } 2012 #[cfg(target_arch = "x86_64")] 2013 VcpuExit::IoapicEoi(vector) => Ok(cpu::VmExit::IoapicEoi(vector)), 2014 #[cfg(target_arch = "x86_64")] 2015 VcpuExit::Shutdown | VcpuExit::Hlt => Ok(cpu::VmExit::Reset), 2016 2017 #[cfg(target_arch = "aarch64")] 2018 VcpuExit::SystemEvent(event_type, flags) => { 2019 use kvm_bindings::{KVM_SYSTEM_EVENT_RESET, KVM_SYSTEM_EVENT_SHUTDOWN}; 2020 // On Aarch64, when the VM is shutdown, run() returns 2021 // VcpuExit::SystemEvent with reason KVM_SYSTEM_EVENT_SHUTDOWN 2022 if event_type == KVM_SYSTEM_EVENT_RESET { 2023 Ok(cpu::VmExit::Reset) 2024 } else if event_type == KVM_SYSTEM_EVENT_SHUTDOWN { 2025 Ok(cpu::VmExit::Shutdown) 2026 } else { 2027 Err(cpu::HypervisorCpuError::RunVcpu(anyhow!( 2028 "Unexpected system event with type 0x{:x}, flags 0x{:x?}", 2029 event_type, 2030 flags 2031 ))) 2032 } 2033 } 2034 2035 VcpuExit::MmioRead(addr, data) => { 2036 if let Some(vm_ops) = &self.vm_ops { 2037 return vm_ops 2038 .mmio_read(addr, data) 2039 .map(|_| cpu::VmExit::Ignore) 2040 .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into())); 2041 } 2042 2043 Ok(cpu::VmExit::Ignore) 2044 } 2045 VcpuExit::MmioWrite(addr, data) => { 2046 if let Some(vm_ops) = &self.vm_ops { 2047 return vm_ops 2048 .mmio_write(addr, data) 2049 .map(|_| cpu::VmExit::Ignore) 2050 .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into())); 2051 } 2052 2053 Ok(cpu::VmExit::Ignore) 2054 } 2055 VcpuExit::Hyperv => Ok(cpu::VmExit::Hyperv), 2056 #[cfg(feature = "tdx")] 2057 VcpuExit::Unsupported(KVM_EXIT_TDX) => Ok(cpu::VmExit::Tdx), 2058 VcpuExit::Debug(_) => Ok(cpu::VmExit::Debug), 2059 2060 r => Err(cpu::HypervisorCpuError::RunVcpu(anyhow!( 2061 "Unexpected exit reason on vcpu run: {:?}", 2062 r 2063 ))), 2064 }, 2065 2066 Err(ref e) => match e.errno() { 2067 libc::EAGAIN | libc::EINTR => Ok(cpu::VmExit::Ignore), 2068 _ => Err(cpu::HypervisorCpuError::RunVcpu(anyhow!( 2069 "VCPU error {:?}", 2070 e 2071 ))), 2072 }, 2073 } 2074 } 2075 2076 #[cfg(target_arch = "x86_64")] 2077 /// 2078 /// Let the guest know that it has been paused, which prevents from 2079 /// potential soft lockups when being resumed. 2080 /// 2081 fn notify_guest_clock_paused(&self) -> cpu::Result<()> { 2082 if let Err(e) = self.fd.lock().unwrap().kvmclock_ctrl() { 2083 // Linux kernel returns -EINVAL if the PV clock isn't yet initialised 2084 // which could be because we're still in firmware or the guest doesn't 2085 // use KVM clock. 2086 if e.errno() != libc::EINVAL { 2087 return Err(cpu::HypervisorCpuError::NotifyGuestClockPaused(e.into())); 2088 } 2089 } 2090 2091 Ok(()) 2092 } 2093 2094 #[cfg(not(target_arch = "riscv64"))] 2095 /// 2096 /// Sets debug registers to set hardware breakpoints and/or enable single step. 2097 /// 2098 fn set_guest_debug( 2099 &self, 2100 addrs: &[vm_memory::GuestAddress], 2101 singlestep: bool, 2102 ) -> cpu::Result<()> { 2103 let mut dbg = kvm_guest_debug { 2104 #[cfg(target_arch = "x86_64")] 2105 control: KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP, 2106 #[cfg(target_arch = "aarch64")] 2107 control: KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW, 2108 ..Default::default() 2109 }; 2110 if singlestep { 2111 dbg.control |= KVM_GUESTDBG_SINGLESTEP; 2112 } 2113 2114 // Set the debug registers. 2115 // Here we assume that the number of addresses do not exceed what 2116 // `Hypervisor::get_guest_debug_hw_bps()` specifies. 2117 #[cfg(target_arch = "x86_64")] 2118 { 2119 // Set bits 9 and 10. 2120 // bit 9: GE (global exact breakpoint enable) flag. 2121 // bit 10: always 1. 2122 dbg.arch.debugreg[7] = 0x0600; 2123 2124 for (i, addr) in addrs.iter().enumerate() { 2125 dbg.arch.debugreg[i] = addr.0; 2126 // Set global breakpoint enable flag 2127 dbg.arch.debugreg[7] |= 2 << (i * 2); 2128 } 2129 } 2130 #[cfg(target_arch = "aarch64")] 2131 { 2132 for (i, addr) in addrs.iter().enumerate() { 2133 // DBGBCR_EL1 (Debug Breakpoint Control Registers, D13.3.2): 2134 // bit 0: 1 (Enabled) 2135 // bit 1~2: 0b11 (PMC = EL1/EL0) 2136 // bit 5~8: 0b1111 (BAS = AArch64) 2137 // others: 0 2138 dbg.arch.dbg_bcr[i] = 0b1u64 | 0b110u64 | 0b1_1110_0000u64; 2139 // DBGBVR_EL1 (Debug Breakpoint Value Registers, D13.3.3): 2140 // bit 2~52: VA[2:52] 2141 dbg.arch.dbg_bvr[i] = (!0u64 >> 11) & addr.0; 2142 } 2143 } 2144 self.fd 2145 .lock() 2146 .unwrap() 2147 .set_guest_debug(&dbg) 2148 .map_err(|e| cpu::HypervisorCpuError::SetDebugRegs(e.into())) 2149 } 2150 2151 #[cfg(target_arch = "aarch64")] 2152 fn vcpu_get_finalized_features(&self) -> i32 { 2153 kvm_bindings::KVM_ARM_VCPU_SVE as i32 2154 } 2155 2156 #[cfg(target_arch = "aarch64")] 2157 fn vcpu_set_processor_features( 2158 &self, 2159 vm: &Arc<dyn crate::Vm>, 2160 kvi: &mut crate::VcpuInit, 2161 id: u8, 2162 ) -> cpu::Result<()> { 2163 use std::arch::is_aarch64_feature_detected; 2164 #[allow(clippy::nonminimal_bool)] 2165 let sve_supported = 2166 is_aarch64_feature_detected!("sve") || is_aarch64_feature_detected!("sve2"); 2167 2168 let mut kvm_kvi: kvm_bindings::kvm_vcpu_init = (*kvi).into(); 2169 2170 // We already checked that the capability is supported. 2171 kvm_kvi.features[0] |= 1 << kvm_bindings::KVM_ARM_VCPU_PSCI_0_2; 2172 if vm 2173 .as_any() 2174 .downcast_ref::<crate::kvm::KvmVm>() 2175 .unwrap() 2176 .check_extension(Cap::ArmPmuV3) 2177 { 2178 kvm_kvi.features[0] |= 1 << kvm_bindings::KVM_ARM_VCPU_PMU_V3; 2179 } 2180 2181 if sve_supported 2182 && vm 2183 .as_any() 2184 .downcast_ref::<crate::kvm::KvmVm>() 2185 .unwrap() 2186 .check_extension(Cap::ArmSve) 2187 { 2188 kvm_kvi.features[0] |= 1 << kvm_bindings::KVM_ARM_VCPU_SVE; 2189 } 2190 2191 // Non-boot cpus are powered off initially. 2192 if id > 0 { 2193 kvm_kvi.features[0] |= 1 << kvm_bindings::KVM_ARM_VCPU_POWER_OFF; 2194 } 2195 2196 *kvi = kvm_kvi.into(); 2197 2198 Ok(()) 2199 } 2200 2201 /// 2202 /// Return VcpuInit with default value set 2203 /// 2204 #[cfg(target_arch = "aarch64")] 2205 fn create_vcpu_init(&self) -> crate::VcpuInit { 2206 kvm_bindings::kvm_vcpu_init::default().into() 2207 } 2208 2209 #[cfg(target_arch = "aarch64")] 2210 fn vcpu_init(&self, kvi: &crate::VcpuInit) -> cpu::Result<()> { 2211 let kvm_kvi: kvm_bindings::kvm_vcpu_init = (*kvi).into(); 2212 self.fd 2213 .lock() 2214 .unwrap() 2215 .vcpu_init(&kvm_kvi) 2216 .map_err(|e| cpu::HypervisorCpuError::VcpuInit(e.into())) 2217 } 2218 2219 #[cfg(target_arch = "aarch64")] 2220 fn vcpu_finalize(&self, feature: i32) -> cpu::Result<()> { 2221 self.fd 2222 .lock() 2223 .unwrap() 2224 .vcpu_finalize(&feature) 2225 .map_err(|e| cpu::HypervisorCpuError::VcpuFinalize(e.into())) 2226 } 2227 2228 #[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))] 2229 /// 2230 /// Gets a list of the guest registers that are supported for the 2231 /// KVM_GET_ONE_REG/KVM_SET_ONE_REG calls. 2232 /// 2233 fn get_reg_list(&self, reg_list: &mut RegList) -> cpu::Result<()> { 2234 let mut kvm_reg_list: kvm_bindings::RegList = reg_list.clone().into(); 2235 self.fd 2236 .lock() 2237 .unwrap() 2238 .get_reg_list(&mut kvm_reg_list) 2239 .map_err(|e: kvm_ioctls::Error| cpu::HypervisorCpuError::GetRegList(e.into()))?; 2240 *reg_list = kvm_reg_list.into(); 2241 Ok(()) 2242 } 2243 2244 /// 2245 /// Gets the value of a system register 2246 /// 2247 #[cfg(target_arch = "aarch64")] 2248 fn get_sys_reg(&self, sys_reg: u32) -> cpu::Result<u64> { 2249 // 2250 // Arm Architecture Reference Manual defines the encoding of 2251 // AArch64 system registers, see 2252 // https://developer.arm.com/documentation/ddi0487 (chapter D12). 2253 // While KVM defines another ID for each AArch64 system register, 2254 // which is used in calling `KVM_G/SET_ONE_REG` to access a system 2255 // register of a guest. 2256 // A mapping exists between the Arm standard encoding and the KVM ID. 2257 // This function takes the standard u32 ID as input parameter, converts 2258 // it to the corresponding KVM ID, and call `KVM_GET_ONE_REG` API to 2259 // get the value of the system parameter. 2260 // 2261 let id: u64 = KVM_REG_ARM64 2262 | KVM_REG_SIZE_U64 2263 | KVM_REG_ARM64_SYSREG as u64 2264 | ((((sys_reg) >> 5) 2265 & (KVM_REG_ARM64_SYSREG_OP0_MASK 2266 | KVM_REG_ARM64_SYSREG_OP1_MASK 2267 | KVM_REG_ARM64_SYSREG_CRN_MASK 2268 | KVM_REG_ARM64_SYSREG_CRM_MASK 2269 | KVM_REG_ARM64_SYSREG_OP2_MASK)) as u64); 2270 let mut bytes = [0_u8; 8]; 2271 self.fd 2272 .lock() 2273 .unwrap() 2274 .get_one_reg(id, &mut bytes) 2275 .map_err(|e| cpu::HypervisorCpuError::GetSysRegister(e.into()))?; 2276 Ok(u64::from_le_bytes(bytes)) 2277 } 2278 2279 /// 2280 /// Gets the value of a non-core register 2281 /// 2282 #[cfg(target_arch = "riscv64")] 2283 fn get_non_core_reg(&self, _non_core_reg: u32) -> cpu::Result<u64> { 2284 unimplemented!() 2285 } 2286 2287 /// 2288 /// Configure core registers for a given CPU. 2289 /// 2290 #[cfg(target_arch = "aarch64")] 2291 fn setup_regs(&self, cpu_id: u8, boot_ip: u64, fdt_start: u64) -> cpu::Result<()> { 2292 #[allow(non_upper_case_globals)] 2293 // PSR (Processor State Register) bits. 2294 // Taken from arch/arm64/include/uapi/asm/ptrace.h. 2295 const PSR_MODE_EL1h: u64 = 0x0000_0005; 2296 const PSR_F_BIT: u64 = 0x0000_0040; 2297 const PSR_I_BIT: u64 = 0x0000_0080; 2298 const PSR_A_BIT: u64 = 0x0000_0100; 2299 const PSR_D_BIT: u64 = 0x0000_0200; 2300 // Taken from arch/arm64/kvm/inject_fault.c. 2301 const PSTATE_FAULT_BITS_64: u64 = 2302 PSR_MODE_EL1h | PSR_A_BIT | PSR_F_BIT | PSR_I_BIT | PSR_D_BIT; 2303 2304 let kreg_off = offset_of!(kvm_regs, regs); 2305 2306 // Get the register index of the PSTATE (Processor State) register. 2307 let pstate = offset_of!(user_pt_regs, pstate) + kreg_off; 2308 self.fd 2309 .lock() 2310 .unwrap() 2311 .set_one_reg( 2312 arm64_core_reg_id!(KVM_REG_SIZE_U64, pstate), 2313 &PSTATE_FAULT_BITS_64.to_le_bytes(), 2314 ) 2315 .map_err(|e| cpu::HypervisorCpuError::SetAarchCoreRegister(e.into()))?; 2316 2317 // Other vCPUs are powered off initially awaiting PSCI wakeup. 2318 if cpu_id == 0 { 2319 // Setting the PC (Processor Counter) to the current program address (kernel address). 2320 let pc = offset_of!(user_pt_regs, pc) + kreg_off; 2321 self.fd 2322 .lock() 2323 .unwrap() 2324 .set_one_reg( 2325 arm64_core_reg_id!(KVM_REG_SIZE_U64, pc), 2326 &boot_ip.to_le_bytes(), 2327 ) 2328 .map_err(|e| cpu::HypervisorCpuError::SetAarchCoreRegister(e.into()))?; 2329 2330 // Last mandatory thing to set -> the address pointing to the FDT (also called DTB). 2331 // "The device tree blob (dtb) must be placed on an 8-byte boundary and must 2332 // not exceed 2 megabytes in size." -> https://www.kernel.org/doc/Documentation/arm64/booting.txt. 2333 // We are choosing to place it the end of DRAM. See `get_fdt_addr`. 2334 let regs0 = offset_of!(user_pt_regs, regs) + kreg_off; 2335 self.fd 2336 .lock() 2337 .unwrap() 2338 .set_one_reg( 2339 arm64_core_reg_id!(KVM_REG_SIZE_U64, regs0), 2340 &fdt_start.to_le_bytes(), 2341 ) 2342 .map_err(|e| cpu::HypervisorCpuError::SetAarchCoreRegister(e.into()))?; 2343 } 2344 Ok(()) 2345 } 2346 2347 #[cfg(target_arch = "riscv64")] 2348 /// 2349 /// Configure registers for a given RISC-V CPU. 2350 /// 2351 fn setup_regs(&self, cpu_id: u8, boot_ip: u64, fdt_start: u64) -> cpu::Result<()> { 2352 // Setting the A0 () to the hartid of this CPU. 2353 let a0 = offset_of!(kvm_riscv_core, regs, user_regs_struct, a0); 2354 self.fd 2355 .lock() 2356 .unwrap() 2357 .set_one_reg( 2358 riscv64_reg_id!(KVM_REG_RISCV_CORE, a0), 2359 &u64::from(cpu_id).to_le_bytes(), 2360 ) 2361 .map_err(|e| cpu::HypervisorCpuError::SetRiscvCoreRegister(e.into()))?; 2362 2363 // Setting the PC (Processor Counter) to the current program address (kernel address). 2364 let pc = offset_of!(kvm_riscv_core, regs, user_regs_struct, pc); 2365 self.fd 2366 .lock() 2367 .unwrap() 2368 .set_one_reg( 2369 riscv64_reg_id!(KVM_REG_RISCV_CORE, pc), 2370 &boot_ip.to_le_bytes(), 2371 ) 2372 .map_err(|e| cpu::HypervisorCpuError::SetRiscvCoreRegister(e.into()))?; 2373 2374 // Last mandatory thing to set -> the address pointing to the FDT (also called DTB). 2375 // "The device tree blob (dtb) must be placed on an 8-byte boundary and must 2376 // not exceed 64 kilobytes in size." -> https://www.kernel.org/doc/Documentation/arch/riscv/boot.txt. 2377 let a1 = offset_of!(kvm_riscv_core, regs, user_regs_struct, a1); 2378 self.fd 2379 .lock() 2380 .unwrap() 2381 .set_one_reg( 2382 riscv64_reg_id!(KVM_REG_RISCV_CORE, a1), 2383 &fdt_start.to_le_bytes(), 2384 ) 2385 .map_err(|e| cpu::HypervisorCpuError::SetRiscvCoreRegister(e.into()))?; 2386 2387 Ok(()) 2388 } 2389 2390 #[cfg(target_arch = "x86_64")] 2391 /// 2392 /// Get the current CPU state 2393 /// 2394 /// Ordering requirements: 2395 /// 2396 /// KVM_GET_MP_STATE calls kvm_apic_accept_events(), which might modify 2397 /// vCPU/LAPIC state. As such, it must be done before most everything 2398 /// else, otherwise we cannot restore everything and expect it to work. 2399 /// 2400 /// KVM_GET_VCPU_EVENTS/KVM_SET_VCPU_EVENTS is unsafe if other vCPUs are 2401 /// still running. 2402 /// 2403 /// KVM_GET_LAPIC may change state of LAPIC before returning it. 2404 /// 2405 /// GET_VCPU_EVENTS should probably be last to save. The code looks as 2406 /// it might as well be affected by internal state modifications of the 2407 /// GET ioctls. 2408 /// 2409 /// SREGS saves/restores a pending interrupt, similar to what 2410 /// VCPU_EVENTS also does. 2411 /// 2412 /// GET_MSRS requires a prepopulated data structure to do something 2413 /// meaningful. For SET_MSRS it will then contain good data. 2414 /// 2415 /// # Example 2416 /// 2417 /// ```rust 2418 /// # use hypervisor::kvm::KvmHypervisor; 2419 /// # use std::sync::Arc; 2420 /// let kvm = KvmHypervisor::new().unwrap(); 2421 /// let hv = Arc::new(kvm); 2422 /// let vm = hv.create_vm().expect("new VM fd creation failed"); 2423 /// vm.enable_split_irq().unwrap(); 2424 /// let vcpu = vm.create_vcpu(0, None).unwrap(); 2425 /// let state = vcpu.state().unwrap(); 2426 /// ``` 2427 fn state(&self) -> cpu::Result<CpuState> { 2428 let cpuid = self.get_cpuid2(kvm_bindings::KVM_MAX_CPUID_ENTRIES)?; 2429 let mp_state = self.get_mp_state()?.into(); 2430 let regs = self.get_regs()?; 2431 let sregs = self.get_sregs()?; 2432 let xsave = self.get_xsave()?; 2433 let xcrs = self.get_xcrs()?; 2434 let lapic_state = self.get_lapic()?; 2435 let fpu = self.get_fpu()?; 2436 2437 // Try to get all MSRs based on the list previously retrieved from KVM. 2438 // If the number of MSRs obtained from GET_MSRS is different from the 2439 // expected amount, we fallback onto a slower method by getting MSRs 2440 // by chunks. This is the only way to make sure we try to get as many 2441 // MSRs as possible, even if some MSRs are not supported. 2442 let mut msr_entries = self.msrs.clone(); 2443 2444 // Save extra MSRs if the Hyper-V synthetic interrupt controller is 2445 // emulated. 2446 if self.hyperv_synic.load(Ordering::Acquire) { 2447 let hyperv_synic_msrs = vec![ 2448 0x40000020, 0x40000021, 0x40000080, 0x40000081, 0x40000082, 0x40000083, 0x40000084, 2449 0x40000090, 0x40000091, 0x40000092, 0x40000093, 0x40000094, 0x40000095, 0x40000096, 2450 0x40000097, 0x40000098, 0x40000099, 0x4000009a, 0x4000009b, 0x4000009c, 0x4000009d, 2451 0x4000009e, 0x4000009f, 0x400000b0, 0x400000b1, 0x400000b2, 0x400000b3, 0x400000b4, 2452 0x400000b5, 0x400000b6, 0x400000b7, 2453 ]; 2454 for index in hyperv_synic_msrs { 2455 let msr = kvm_msr_entry { 2456 index, 2457 ..Default::default() 2458 }; 2459 msr_entries.push(msr.into()); 2460 } 2461 } 2462 2463 let expected_num_msrs = msr_entries.len(); 2464 let num_msrs = self.get_msrs(&mut msr_entries)?; 2465 let msrs = if num_msrs != expected_num_msrs { 2466 let mut faulty_msr_index = num_msrs; 2467 let mut msr_entries_tmp = msr_entries[..faulty_msr_index].to_vec(); 2468 2469 loop { 2470 warn!( 2471 "Detected faulty MSR 0x{:x} while getting MSRs", 2472 msr_entries[faulty_msr_index].index 2473 ); 2474 2475 // Skip the first bad MSR 2476 let start_pos = faulty_msr_index + 1; 2477 2478 let mut sub_msr_entries = msr_entries[start_pos..].to_vec(); 2479 let num_msrs = self.get_msrs(&mut sub_msr_entries)?; 2480 2481 msr_entries_tmp.extend(&sub_msr_entries[..num_msrs]); 2482 2483 if num_msrs == sub_msr_entries.len() { 2484 break; 2485 } 2486 2487 faulty_msr_index = start_pos + num_msrs; 2488 } 2489 2490 msr_entries_tmp 2491 } else { 2492 msr_entries 2493 }; 2494 2495 let vcpu_events = self.get_vcpu_events()?; 2496 let tsc_khz = self.tsc_khz()?; 2497 2498 Ok(VcpuKvmState { 2499 cpuid, 2500 msrs, 2501 vcpu_events, 2502 regs: regs.into(), 2503 sregs: sregs.into(), 2504 fpu, 2505 lapic_state, 2506 xsave, 2507 xcrs, 2508 mp_state, 2509 tsc_khz, 2510 } 2511 .into()) 2512 } 2513 2514 /// 2515 /// Get the current AArch64 CPU state 2516 /// 2517 #[cfg(target_arch = "aarch64")] 2518 fn state(&self) -> cpu::Result<CpuState> { 2519 let mut state = VcpuKvmState { 2520 mp_state: self.get_mp_state()?.into(), 2521 ..Default::default() 2522 }; 2523 // Get core registers 2524 state.core_regs = self.get_regs()?.into(); 2525 2526 // Get systerm register 2527 // Call KVM_GET_REG_LIST to get all registers available to the guest. 2528 // For ArmV8 there are around 500 registers. 2529 let mut sys_regs: Vec<kvm_bindings::kvm_one_reg> = Vec::new(); 2530 let mut reg_list = kvm_bindings::RegList::new(500).unwrap(); 2531 self.fd 2532 .lock() 2533 .unwrap() 2534 .get_reg_list(&mut reg_list) 2535 .map_err(|e| cpu::HypervisorCpuError::GetRegList(e.into()))?; 2536 2537 // At this point reg_list should contain: core registers and system 2538 // registers. 2539 // The register list contains the number of registers and their ids. We 2540 // will be needing to call KVM_GET_ONE_REG on each id in order to save 2541 // all of them. We carve out from the list the core registers which are 2542 // represented in the kernel by kvm_regs structure and for which we can 2543 // calculate the id based on the offset in the structure. 2544 reg_list.retain(|regid| is_system_register(*regid)); 2545 2546 // Now, for the rest of the registers left in the previously fetched 2547 // register list, we are simply calling KVM_GET_ONE_REG. 2548 let indices = reg_list.as_slice(); 2549 for index in indices.iter() { 2550 let mut bytes = [0_u8; 8]; 2551 self.fd 2552 .lock() 2553 .unwrap() 2554 .get_one_reg(*index, &mut bytes) 2555 .map_err(|e| cpu::HypervisorCpuError::GetSysRegister(e.into()))?; 2556 sys_regs.push(kvm_bindings::kvm_one_reg { 2557 id: *index, 2558 addr: u64::from_le_bytes(bytes), 2559 }); 2560 } 2561 2562 state.sys_regs = sys_regs; 2563 2564 Ok(state.into()) 2565 } 2566 2567 #[cfg(target_arch = "riscv64")] 2568 /// 2569 /// Get the current RISC-V 64-bit CPU state 2570 /// 2571 fn state(&self) -> cpu::Result<CpuState> { 2572 let mut state = VcpuKvmState { 2573 mp_state: self.get_mp_state()?.into(), 2574 ..Default::default() 2575 }; 2576 // Get core registers 2577 state.core_regs = self.get_regs()?.into(); 2578 2579 // Get non-core register 2580 // Call KVM_GET_REG_LIST to get all registers available to the guest. 2581 // For RISC-V 64-bit there are around 200 registers. 2582 let mut sys_regs: Vec<kvm_bindings::kvm_one_reg> = Vec::new(); 2583 let mut reg_list = kvm_bindings::RegList::new(200).unwrap(); 2584 self.fd 2585 .lock() 2586 .unwrap() 2587 .get_reg_list(&mut reg_list) 2588 .map_err(|e| cpu::HypervisorCpuError::GetRegList(e.into()))?; 2589 2590 // At this point reg_list should contain: 2591 // - core registers 2592 // - config registers 2593 // - timer registers 2594 // - control and status registers 2595 // - AIA control and status registers 2596 // - smstateen control and status registers 2597 // - sbi_sta control and status registers. 2598 // 2599 // The register list contains the number of registers and their ids. We 2600 // will be needing to call KVM_GET_ONE_REG on each id in order to save 2601 // all of them. We carve out from the list the core registers which are 2602 // represented in the kernel by `kvm_riscv_core` structure and for which 2603 // we can calculate the id based on the offset in the structure. 2604 reg_list.retain(|regid| is_non_core_register(*regid)); 2605 2606 // Now, for the rest of the registers left in the previously fetched 2607 // register list, we are simply calling KVM_GET_ONE_REG. 2608 let indices = reg_list.as_slice(); 2609 for index in indices.iter() { 2610 let mut bytes = [0_u8; 8]; 2611 self.fd 2612 .lock() 2613 .unwrap() 2614 .get_one_reg(*index, &mut bytes) 2615 .map_err(|e| cpu::HypervisorCpuError::GetSysRegister(e.into()))?; 2616 sys_regs.push(kvm_bindings::kvm_one_reg { 2617 id: *index, 2618 addr: u64::from_le_bytes(bytes), 2619 }); 2620 } 2621 2622 state.non_core_regs = sys_regs; 2623 2624 Ok(state.into()) 2625 } 2626 2627 #[cfg(target_arch = "x86_64")] 2628 /// 2629 /// Restore the previously saved CPU state 2630 /// 2631 /// Ordering requirements: 2632 /// 2633 /// KVM_GET_VCPU_EVENTS/KVM_SET_VCPU_EVENTS is unsafe if other vCPUs are 2634 /// still running. 2635 /// 2636 /// Some SET ioctls (like set_mp_state) depend on kvm_vcpu_is_bsp(), so 2637 /// if we ever change the BSP, we have to do that before restoring anything. 2638 /// The same seems to be true for CPUID stuff. 2639 /// 2640 /// SREGS saves/restores a pending interrupt, similar to what 2641 /// VCPU_EVENTS also does. 2642 /// 2643 /// SET_REGS clears pending exceptions unconditionally, thus, it must be 2644 /// done before SET_VCPU_EVENTS, which restores it. 2645 /// 2646 /// SET_LAPIC must come after SET_SREGS, because the latter restores 2647 /// the apic base msr. 2648 /// 2649 /// SET_LAPIC must come before SET_MSRS, because the TSC deadline MSR 2650 /// only restores successfully, when the LAPIC is correctly configured. 2651 /// 2652 /// Arguments: CpuState 2653 /// # Example 2654 /// 2655 /// ```rust 2656 /// # use hypervisor::kvm::KvmHypervisor; 2657 /// # use std::sync::Arc; 2658 /// let kvm = KvmHypervisor::new().unwrap(); 2659 /// let hv = Arc::new(kvm); 2660 /// let vm = hv.create_vm().expect("new VM fd creation failed"); 2661 /// vm.enable_split_irq().unwrap(); 2662 /// let vcpu = vm.create_vcpu(0, None).unwrap(); 2663 /// let state = vcpu.state().unwrap(); 2664 /// vcpu.set_state(&state).unwrap(); 2665 /// ``` 2666 fn set_state(&self, state: &CpuState) -> cpu::Result<()> { 2667 let state: VcpuKvmState = state.clone().into(); 2668 self.set_cpuid2(&state.cpuid)?; 2669 self.set_mp_state(state.mp_state.into())?; 2670 self.set_regs(&state.regs.into())?; 2671 self.set_sregs(&state.sregs.into())?; 2672 self.set_xsave(&state.xsave)?; 2673 self.set_xcrs(&state.xcrs)?; 2674 self.set_lapic(&state.lapic_state)?; 2675 self.set_fpu(&state.fpu)?; 2676 2677 if let Some(freq) = state.tsc_khz { 2678 self.set_tsc_khz(freq)?; 2679 } 2680 2681 // Try to set all MSRs previously stored. 2682 // If the number of MSRs set from SET_MSRS is different from the 2683 // expected amount, we fallback onto a slower method by setting MSRs 2684 // by chunks. This is the only way to make sure we try to set as many 2685 // MSRs as possible, even if some MSRs are not supported. 2686 let expected_num_msrs = state.msrs.len(); 2687 let num_msrs = self.set_msrs(&state.msrs)?; 2688 if num_msrs != expected_num_msrs { 2689 let mut faulty_msr_index = num_msrs; 2690 2691 loop { 2692 warn!( 2693 "Detected faulty MSR 0x{:x} while setting MSRs", 2694 state.msrs[faulty_msr_index].index 2695 ); 2696 2697 // Skip the first bad MSR 2698 let start_pos = faulty_msr_index + 1; 2699 2700 let sub_msr_entries = state.msrs[start_pos..].to_vec(); 2701 2702 let num_msrs = self.set_msrs(&sub_msr_entries)?; 2703 2704 if num_msrs == sub_msr_entries.len() { 2705 break; 2706 } 2707 2708 faulty_msr_index = start_pos + num_msrs; 2709 } 2710 } 2711 2712 self.set_vcpu_events(&state.vcpu_events)?; 2713 2714 Ok(()) 2715 } 2716 2717 /// 2718 /// Restore the previously saved AArch64 CPU state 2719 /// 2720 #[cfg(target_arch = "aarch64")] 2721 fn set_state(&self, state: &CpuState) -> cpu::Result<()> { 2722 let state: VcpuKvmState = state.clone().into(); 2723 // Set core registers 2724 self.set_regs(&state.core_regs.into())?; 2725 // Set system registers 2726 for reg in &state.sys_regs { 2727 self.fd 2728 .lock() 2729 .unwrap() 2730 .set_one_reg(reg.id, ®.addr.to_le_bytes()) 2731 .map_err(|e| cpu::HypervisorCpuError::SetSysRegister(e.into()))?; 2732 } 2733 2734 self.set_mp_state(state.mp_state.into())?; 2735 2736 Ok(()) 2737 } 2738 2739 #[cfg(target_arch = "riscv64")] 2740 /// 2741 /// Restore the previously saved RISC-V 64-bit CPU state 2742 /// 2743 fn set_state(&self, state: &CpuState) -> cpu::Result<()> { 2744 let state: VcpuKvmState = state.clone().into(); 2745 // Set core registers 2746 self.set_regs(&state.core_regs.into())?; 2747 // Set system registers 2748 for reg in &state.non_core_regs { 2749 self.fd 2750 .lock() 2751 .unwrap() 2752 .set_one_reg(reg.id, ®.addr.to_le_bytes()) 2753 .map_err(|e| cpu::HypervisorCpuError::SetSysRegister(e.into()))?; 2754 } 2755 2756 self.set_mp_state(state.mp_state.into())?; 2757 2758 Ok(()) 2759 } 2760 2761 /// 2762 /// Initialize TDX for this CPU 2763 /// 2764 #[cfg(feature = "tdx")] 2765 fn tdx_init(&self, hob_address: u64) -> cpu::Result<()> { 2766 tdx_command( 2767 &self.fd.lock().unwrap().as_raw_fd(), 2768 TdxCommand::InitVcpu, 2769 0, 2770 hob_address, 2771 ) 2772 .map_err(cpu::HypervisorCpuError::InitializeTdx) 2773 } 2774 2775 /// 2776 /// Set the "immediate_exit" state 2777 /// 2778 fn set_immediate_exit(&self, exit: bool) { 2779 self.fd.lock().unwrap().set_kvm_immediate_exit(exit.into()); 2780 } 2781 2782 /// 2783 /// Returns the details about TDX exit reason 2784 /// 2785 #[cfg(feature = "tdx")] 2786 fn get_tdx_exit_details(&mut self) -> cpu::Result<TdxExitDetails> { 2787 let mut fd = self.fd.as_ref().lock().unwrap(); 2788 let kvm_run = fd.get_kvm_run(); 2789 // SAFETY: accessing a union field in a valid structure 2790 let tdx_vmcall = unsafe { 2791 &mut (*((&mut kvm_run.__bindgen_anon_1) as *mut kvm_run__bindgen_ty_1 2792 as *mut KvmTdxExit)) 2793 .u 2794 .vmcall 2795 }; 2796 2797 tdx_vmcall.status_code = TDG_VP_VMCALL_INVALID_OPERAND; 2798 2799 if tdx_vmcall.type_ != 0 { 2800 return Err(cpu::HypervisorCpuError::UnknownTdxVmCall); 2801 } 2802 2803 match tdx_vmcall.subfunction { 2804 TDG_VP_VMCALL_GET_QUOTE => Ok(TdxExitDetails::GetQuote), 2805 TDG_VP_VMCALL_SETUP_EVENT_NOTIFY_INTERRUPT => { 2806 Ok(TdxExitDetails::SetupEventNotifyInterrupt) 2807 } 2808 _ => Err(cpu::HypervisorCpuError::UnknownTdxVmCall), 2809 } 2810 } 2811 2812 /// 2813 /// Set the status code for TDX exit 2814 /// 2815 #[cfg(feature = "tdx")] 2816 fn set_tdx_status(&mut self, status: TdxExitStatus) { 2817 let mut fd = self.fd.as_ref().lock().unwrap(); 2818 let kvm_run = fd.get_kvm_run(); 2819 // SAFETY: accessing a union field in a valid structure 2820 let tdx_vmcall = unsafe { 2821 &mut (*((&mut kvm_run.__bindgen_anon_1) as *mut kvm_run__bindgen_ty_1 2822 as *mut KvmTdxExit)) 2823 .u 2824 .vmcall 2825 }; 2826 2827 tdx_vmcall.status_code = match status { 2828 TdxExitStatus::Success => TDG_VP_VMCALL_SUCCESS, 2829 TdxExitStatus::InvalidOperand => TDG_VP_VMCALL_INVALID_OPERAND, 2830 }; 2831 } 2832 2833 #[cfg(target_arch = "x86_64")] 2834 /// 2835 /// Return the list of initial MSR entries for a VCPU 2836 /// 2837 fn boot_msr_entries(&self) -> Vec<MsrEntry> { 2838 use crate::arch::x86::{msr_index, MTRR_ENABLE, MTRR_MEM_TYPE_WB}; 2839 2840 [ 2841 msr!(msr_index::MSR_IA32_SYSENTER_CS), 2842 msr!(msr_index::MSR_IA32_SYSENTER_ESP), 2843 msr!(msr_index::MSR_IA32_SYSENTER_EIP), 2844 msr!(msr_index::MSR_STAR), 2845 msr!(msr_index::MSR_CSTAR), 2846 msr!(msr_index::MSR_LSTAR), 2847 msr!(msr_index::MSR_KERNEL_GS_BASE), 2848 msr!(msr_index::MSR_SYSCALL_MASK), 2849 msr!(msr_index::MSR_IA32_TSC), 2850 msr_data!( 2851 msr_index::MSR_IA32_MISC_ENABLE, 2852 msr_index::MSR_IA32_MISC_ENABLE_FAST_STRING as u64 2853 ), 2854 msr_data!(msr_index::MSR_MTRRdefType, MTRR_ENABLE | MTRR_MEM_TYPE_WB), 2855 ] 2856 .to_vec() 2857 } 2858 2859 #[cfg(target_arch = "aarch64")] 2860 fn has_pmu_support(&self) -> bool { 2861 let cpu_attr = kvm_bindings::kvm_device_attr { 2862 group: kvm_bindings::KVM_ARM_VCPU_PMU_V3_CTRL, 2863 attr: u64::from(kvm_bindings::KVM_ARM_VCPU_PMU_V3_INIT), 2864 addr: 0x0, 2865 flags: 0, 2866 }; 2867 self.fd.lock().unwrap().has_device_attr(&cpu_attr).is_ok() 2868 } 2869 2870 #[cfg(target_arch = "aarch64")] 2871 fn init_pmu(&self, irq: u32) -> cpu::Result<()> { 2872 let cpu_attr = kvm_bindings::kvm_device_attr { 2873 group: kvm_bindings::KVM_ARM_VCPU_PMU_V3_CTRL, 2874 attr: u64::from(kvm_bindings::KVM_ARM_VCPU_PMU_V3_INIT), 2875 addr: 0x0, 2876 flags: 0, 2877 }; 2878 let cpu_attr_irq = kvm_bindings::kvm_device_attr { 2879 group: kvm_bindings::KVM_ARM_VCPU_PMU_V3_CTRL, 2880 attr: u64::from(kvm_bindings::KVM_ARM_VCPU_PMU_V3_IRQ), 2881 addr: &irq as *const u32 as u64, 2882 flags: 0, 2883 }; 2884 self.fd 2885 .lock() 2886 .unwrap() 2887 .set_device_attr(&cpu_attr_irq) 2888 .map_err(|_| cpu::HypervisorCpuError::InitializePmu)?; 2889 self.fd 2890 .lock() 2891 .unwrap() 2892 .set_device_attr(&cpu_attr) 2893 .map_err(|_| cpu::HypervisorCpuError::InitializePmu) 2894 } 2895 2896 #[cfg(target_arch = "x86_64")] 2897 /// 2898 /// Get the frequency of the TSC if available 2899 /// 2900 fn tsc_khz(&self) -> cpu::Result<Option<u32>> { 2901 match self.fd.lock().unwrap().get_tsc_khz() { 2902 Err(e) => { 2903 if e.errno() == libc::EIO { 2904 Ok(None) 2905 } else { 2906 Err(cpu::HypervisorCpuError::GetTscKhz(e.into())) 2907 } 2908 } 2909 Ok(v) => Ok(Some(v)), 2910 } 2911 } 2912 2913 #[cfg(target_arch = "x86_64")] 2914 /// 2915 /// Set the frequency of the TSC if available 2916 /// 2917 fn set_tsc_khz(&self, freq: u32) -> cpu::Result<()> { 2918 match self.fd.lock().unwrap().set_tsc_khz(freq) { 2919 Err(e) => { 2920 if e.errno() == libc::EIO { 2921 Ok(()) 2922 } else { 2923 Err(cpu::HypervisorCpuError::SetTscKhz(e.into())) 2924 } 2925 } 2926 Ok(_) => Ok(()), 2927 } 2928 } 2929 2930 #[cfg(target_arch = "x86_64")] 2931 /// 2932 /// Trigger NMI interrupt 2933 /// 2934 fn nmi(&self) -> cpu::Result<()> { 2935 match self.fd.lock().unwrap().nmi() { 2936 Err(e) => { 2937 if e.errno() == libc::EIO { 2938 Ok(()) 2939 } else { 2940 Err(cpu::HypervisorCpuError::Nmi(e.into())) 2941 } 2942 } 2943 Ok(_) => Ok(()), 2944 } 2945 } 2946 } 2947 2948 impl KvmVcpu { 2949 #[cfg(target_arch = "x86_64")] 2950 /// 2951 /// X86 specific call that returns the vcpu's current "xsave struct". 2952 /// 2953 fn get_xsave(&self) -> cpu::Result<XsaveState> { 2954 Ok(self 2955 .fd 2956 .lock() 2957 .unwrap() 2958 .get_xsave() 2959 .map_err(|e| cpu::HypervisorCpuError::GetXsaveState(e.into()))? 2960 .into()) 2961 } 2962 2963 #[cfg(target_arch = "x86_64")] 2964 /// 2965 /// X86 specific call that sets the vcpu's current "xsave struct". 2966 /// 2967 fn set_xsave(&self, xsave: &XsaveState) -> cpu::Result<()> { 2968 let xsave: kvm_bindings::kvm_xsave = (*xsave).clone().into(); 2969 self.fd 2970 .lock() 2971 .unwrap() 2972 .set_xsave(&xsave) 2973 .map_err(|e| cpu::HypervisorCpuError::SetXsaveState(e.into())) 2974 } 2975 2976 #[cfg(target_arch = "x86_64")] 2977 /// 2978 /// X86 specific call that returns the vcpu's current "xcrs". 2979 /// 2980 fn get_xcrs(&self) -> cpu::Result<ExtendedControlRegisters> { 2981 self.fd 2982 .lock() 2983 .unwrap() 2984 .get_xcrs() 2985 .map_err(|e| cpu::HypervisorCpuError::GetXcsr(e.into())) 2986 } 2987 2988 #[cfg(target_arch = "x86_64")] 2989 /// 2990 /// X86 specific call that sets the vcpu's current "xcrs". 2991 /// 2992 fn set_xcrs(&self, xcrs: &ExtendedControlRegisters) -> cpu::Result<()> { 2993 self.fd 2994 .lock() 2995 .unwrap() 2996 .set_xcrs(xcrs) 2997 .map_err(|e| cpu::HypervisorCpuError::SetXcsr(e.into())) 2998 } 2999 3000 #[cfg(target_arch = "x86_64")] 3001 /// 3002 /// Returns currently pending exceptions, interrupts, and NMIs as well as related 3003 /// states of the vcpu. 3004 /// 3005 fn get_vcpu_events(&self) -> cpu::Result<VcpuEvents> { 3006 self.fd 3007 .lock() 3008 .unwrap() 3009 .get_vcpu_events() 3010 .map_err(|e| cpu::HypervisorCpuError::GetVcpuEvents(e.into())) 3011 } 3012 3013 #[cfg(target_arch = "x86_64")] 3014 /// 3015 /// Sets pending exceptions, interrupts, and NMIs as well as related states 3016 /// of the vcpu. 3017 /// 3018 fn set_vcpu_events(&self, events: &VcpuEvents) -> cpu::Result<()> { 3019 self.fd 3020 .lock() 3021 .unwrap() 3022 .set_vcpu_events(events) 3023 .map_err(|e| cpu::HypervisorCpuError::SetVcpuEvents(e.into())) 3024 } 3025 } 3026 3027 #[cfg(test)] 3028 mod tests { 3029 #[test] 3030 #[cfg(target_arch = "riscv64")] 3031 fn test_get_and_set_regs() { 3032 use super::*; 3033 3034 let kvm = KvmHypervisor::new().unwrap(); 3035 let hypervisor = Arc::new(kvm); 3036 let vm = hypervisor.create_vm().expect("new VM fd creation failed"); 3037 let vcpu0 = vm.create_vcpu(0, None).unwrap(); 3038 3039 let core_regs = StandardRegisters::from(kvm_riscv_core { 3040 regs: user_regs_struct { 3041 pc: 0x00, 3042 ra: 0x01, 3043 sp: 0x02, 3044 gp: 0x03, 3045 tp: 0x04, 3046 t0: 0x05, 3047 t1: 0x06, 3048 t2: 0x07, 3049 s0: 0x08, 3050 s1: 0x09, 3051 a0: 0x0a, 3052 a1: 0x0b, 3053 a2: 0x0c, 3054 a3: 0x0d, 3055 a4: 0x0e, 3056 a5: 0x0f, 3057 a6: 0x10, 3058 a7: 0x11, 3059 s2: 0x12, 3060 s3: 0x13, 3061 s4: 0x14, 3062 s5: 0x15, 3063 s6: 0x16, 3064 s7: 0x17, 3065 s8: 0x18, 3066 s9: 0x19, 3067 s10: 0x1a, 3068 s11: 0x1b, 3069 t3: 0x1c, 3070 t4: 0x1d, 3071 t5: 0x1e, 3072 t6: 0x1f, 3073 }, 3074 mode: 0x00, 3075 }); 3076 3077 vcpu0.set_regs(&core_regs).unwrap(); 3078 assert_eq!(vcpu0.get_regs().unwrap(), core_regs); 3079 } 3080 } 3081