1 // Copyright © 2024 Institute of Software, CAS. All rights reserved. 2 // 3 // Copyright © 2019 Intel Corporation 4 // 5 // SPDX-License-Identifier: Apache-2.0 OR BSD-3-Clause 6 // 7 // Copyright © 2020, Microsoft Corporation 8 // 9 // Copyright 2018-2019 CrowdStrike, Inc. 10 // 11 // 12 13 use std::any::Any; 14 use std::collections::HashMap; 15 #[cfg(target_arch = "x86_64")] 16 use std::fs::File; 17 #[cfg(target_arch = "x86_64")] 18 use std::os::unix::io::AsRawFd; 19 #[cfg(feature = "tdx")] 20 use std::os::unix::io::RawFd; 21 use std::result; 22 #[cfg(target_arch = "x86_64")] 23 use std::sync::atomic::{AtomicBool, Ordering}; 24 use std::sync::{Arc, Mutex, RwLock}; 25 26 use kvm_ioctls::{NoDatamatch, VcpuFd, VmFd}; 27 use vmm_sys_util::eventfd::EventFd; 28 29 #[cfg(target_arch = "aarch64")] 30 use crate::aarch64::gic::KvmGicV3Its; 31 #[cfg(target_arch = "aarch64")] 32 pub use crate::aarch64::{check_required_kvm_extensions, is_system_register, VcpuKvmState}; 33 #[cfg(target_arch = "aarch64")] 34 use crate::arch::aarch64::gic::{Vgic, VgicConfig}; 35 #[cfg(target_arch = "riscv64")] 36 use crate::arch::riscv64::aia::{Vaia, VaiaConfig}; 37 #[cfg(target_arch = "riscv64")] 38 use crate::riscv64::aia::KvmAiaImsics; 39 #[cfg(target_arch = "riscv64")] 40 pub use crate::riscv64::{ 41 aia::AiaImsicsState as AiaState, check_required_kvm_extensions, is_non_core_register, 42 VcpuKvmState, 43 }; 44 use crate::vm::{self, InterruptSourceConfig, VmOps}; 45 #[cfg(target_arch = "aarch64")] 46 use crate::{arm64_core_reg_id, offset_of}; 47 use crate::{cpu, hypervisor, vec_with_array_field, HypervisorType}; 48 #[cfg(target_arch = "riscv64")] 49 use crate::{offset_of, riscv64_reg_id}; 50 // x86_64 dependencies 51 #[cfg(target_arch = "x86_64")] 52 pub mod x86_64; 53 #[cfg(target_arch = "x86_64")] 54 use kvm_bindings::{ 55 kvm_enable_cap, kvm_msr_entry, MsrList, KVM_CAP_HYPERV_SYNIC, KVM_CAP_SPLIT_IRQCHIP, 56 KVM_GUESTDBG_USE_HW_BP, 57 }; 58 #[cfg(target_arch = "x86_64")] 59 use x86_64::check_required_kvm_extensions; 60 #[cfg(target_arch = "x86_64")] 61 pub use x86_64::{CpuId, ExtendedControlRegisters, MsrEntries, VcpuKvmState}; 62 63 #[cfg(target_arch = "x86_64")] 64 use crate::arch::x86::{ 65 CpuIdEntry, FpuState, LapicState, MsrEntry, SpecialRegisters, XsaveState, NUM_IOAPIC_PINS, 66 }; 67 #[cfg(target_arch = "x86_64")] 68 use crate::ClockData; 69 use crate::{ 70 CpuState, IoEventAddress, IrqRoutingEntry, MpState, StandardRegisters, UserMemoryRegion, 71 USER_MEMORY_REGION_LOG_DIRTY, USER_MEMORY_REGION_READ, USER_MEMORY_REGION_WRITE, 72 }; 73 // aarch64 dependencies 74 #[cfg(target_arch = "aarch64")] 75 pub mod aarch64; 76 // riscv64 dependencies 77 #[cfg(target_arch = "riscv64")] 78 pub mod riscv64; 79 #[cfg(target_arch = "aarch64")] 80 use std::mem; 81 82 /// 83 /// Export generically-named wrappers of kvm-bindings for Unix-based platforms 84 /// 85 #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] 86 pub use kvm_bindings::kvm_vcpu_events as VcpuEvents; 87 pub use kvm_bindings::{ 88 kvm_clock_data, kvm_create_device, kvm_create_device as CreateDevice, 89 kvm_device_attr as DeviceAttr, kvm_device_type_KVM_DEV_TYPE_VFIO, kvm_guest_debug, 90 kvm_irq_routing, kvm_irq_routing_entry, kvm_mp_state, kvm_run, kvm_userspace_memory_region, 91 KVM_GUESTDBG_ENABLE, KVM_GUESTDBG_SINGLESTEP, KVM_IRQ_ROUTING_IRQCHIP, KVM_IRQ_ROUTING_MSI, 92 KVM_MEM_LOG_DIRTY_PAGES, KVM_MEM_READONLY, KVM_MSI_VALID_DEVID, 93 }; 94 #[cfg(target_arch = "aarch64")] 95 use kvm_bindings::{ 96 kvm_regs, user_fpsimd_state, user_pt_regs, KVM_GUESTDBG_USE_HW, KVM_NR_SPSR, KVM_REG_ARM64, 97 KVM_REG_ARM64_SYSREG, KVM_REG_ARM64_SYSREG_CRM_MASK, KVM_REG_ARM64_SYSREG_CRN_MASK, 98 KVM_REG_ARM64_SYSREG_OP0_MASK, KVM_REG_ARM64_SYSREG_OP1_MASK, KVM_REG_ARM64_SYSREG_OP2_MASK, 99 KVM_REG_ARM_CORE, KVM_REG_SIZE_U128, KVM_REG_SIZE_U32, KVM_REG_SIZE_U64, 100 }; 101 #[cfg(target_arch = "riscv64")] 102 use kvm_bindings::{kvm_riscv_core, user_regs_struct, KVM_REG_RISCV_CORE}; 103 #[cfg(feature = "tdx")] 104 use kvm_bindings::{kvm_run__bindgen_ty_1, KVMIO}; 105 pub use kvm_ioctls::{Cap, Kvm, VcpuExit}; 106 use thiserror::Error; 107 use vfio_ioctls::VfioDeviceFd; 108 #[cfg(feature = "tdx")] 109 use vmm_sys_util::{ioctl::ioctl_with_val, ioctl_ioc_nr, ioctl_iowr_nr}; 110 pub use {kvm_bindings, kvm_ioctls}; 111 112 #[cfg(target_arch = "aarch64")] 113 use crate::arch::aarch64::regs; 114 #[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))] 115 use crate::RegList; 116 117 #[cfg(target_arch = "x86_64")] 118 const KVM_CAP_SGX_ATTRIBUTE: u32 = 196; 119 120 #[cfg(target_arch = "x86_64")] 121 use vmm_sys_util::ioctl_io_nr; 122 #[cfg(all(not(feature = "tdx"), target_arch = "x86_64"))] 123 use vmm_sys_util::ioctl_ioc_nr; 124 125 #[cfg(target_arch = "x86_64")] 126 ioctl_io_nr!(KVM_NMI, kvm_bindings::KVMIO, 0x9a); 127 128 #[cfg(feature = "tdx")] 129 const KVM_EXIT_TDX: u32 = 50; 130 #[cfg(feature = "tdx")] 131 const TDG_VP_VMCALL_GET_QUOTE: u64 = 0x10002; 132 #[cfg(feature = "tdx")] 133 const TDG_VP_VMCALL_SETUP_EVENT_NOTIFY_INTERRUPT: u64 = 0x10004; 134 #[cfg(feature = "tdx")] 135 const TDG_VP_VMCALL_SUCCESS: u64 = 0; 136 #[cfg(feature = "tdx")] 137 const TDG_VP_VMCALL_INVALID_OPERAND: u64 = 0x8000000000000000; 138 139 #[cfg(feature = "tdx")] 140 ioctl_iowr_nr!(KVM_MEMORY_ENCRYPT_OP, KVMIO, 0xba, std::os::raw::c_ulong); 141 142 #[cfg(feature = "tdx")] 143 #[repr(u32)] 144 enum TdxCommand { 145 Capabilities = 0, 146 InitVm, 147 InitVcpu, 148 InitMemRegion, 149 Finalize, 150 } 151 152 #[cfg(feature = "tdx")] 153 pub enum TdxExitDetails { 154 GetQuote, 155 SetupEventNotifyInterrupt, 156 } 157 158 #[cfg(feature = "tdx")] 159 pub enum TdxExitStatus { 160 Success, 161 InvalidOperand, 162 } 163 164 #[cfg(feature = "tdx")] 165 const TDX_MAX_NR_CPUID_CONFIGS: usize = 6; 166 167 #[cfg(feature = "tdx")] 168 #[repr(C)] 169 #[derive(Debug, Default)] 170 pub struct TdxCpuidConfig { 171 pub leaf: u32, 172 pub sub_leaf: u32, 173 pub eax: u32, 174 pub ebx: u32, 175 pub ecx: u32, 176 pub edx: u32, 177 } 178 179 #[cfg(feature = "tdx")] 180 #[repr(C)] 181 #[derive(Debug, Default)] 182 pub struct TdxCapabilities { 183 pub attrs_fixed0: u64, 184 pub attrs_fixed1: u64, 185 pub xfam_fixed0: u64, 186 pub xfam_fixed1: u64, 187 pub nr_cpuid_configs: u32, 188 pub padding: u32, 189 pub cpuid_configs: [TdxCpuidConfig; TDX_MAX_NR_CPUID_CONFIGS], 190 } 191 192 #[cfg(feature = "tdx")] 193 #[derive(Copy, Clone)] 194 pub struct KvmTdxExit { 195 pub type_: u32, 196 pub pad: u32, 197 pub u: KvmTdxExitU, 198 } 199 200 #[cfg(feature = "tdx")] 201 #[repr(C)] 202 #[derive(Copy, Clone)] 203 pub union KvmTdxExitU { 204 pub vmcall: KvmTdxExitVmcall, 205 } 206 207 #[cfg(feature = "tdx")] 208 #[repr(C)] 209 #[derive(Debug, Default, Copy, Clone, PartialEq)] 210 pub struct KvmTdxExitVmcall { 211 pub type_: u64, 212 pub subfunction: u64, 213 pub reg_mask: u64, 214 pub in_r12: u64, 215 pub in_r13: u64, 216 pub in_r14: u64, 217 pub in_r15: u64, 218 pub in_rbx: u64, 219 pub in_rdi: u64, 220 pub in_rsi: u64, 221 pub in_r8: u64, 222 pub in_r9: u64, 223 pub in_rdx: u64, 224 pub status_code: u64, 225 pub out_r11: u64, 226 pub out_r12: u64, 227 pub out_r13: u64, 228 pub out_r14: u64, 229 pub out_r15: u64, 230 pub out_rbx: u64, 231 pub out_rdi: u64, 232 pub out_rsi: u64, 233 pub out_r8: u64, 234 pub out_r9: u64, 235 pub out_rdx: u64, 236 } 237 238 impl From<kvm_userspace_memory_region> for UserMemoryRegion { 239 fn from(region: kvm_userspace_memory_region) -> Self { 240 let mut flags = USER_MEMORY_REGION_READ; 241 if region.flags & KVM_MEM_READONLY == 0 { 242 flags |= USER_MEMORY_REGION_WRITE; 243 } 244 if region.flags & KVM_MEM_LOG_DIRTY_PAGES != 0 { 245 flags |= USER_MEMORY_REGION_LOG_DIRTY; 246 } 247 248 UserMemoryRegion { 249 slot: region.slot, 250 guest_phys_addr: region.guest_phys_addr, 251 memory_size: region.memory_size, 252 userspace_addr: region.userspace_addr, 253 flags, 254 } 255 } 256 } 257 258 impl From<UserMemoryRegion> for kvm_userspace_memory_region { 259 fn from(region: UserMemoryRegion) -> Self { 260 assert!( 261 region.flags & USER_MEMORY_REGION_READ != 0, 262 "KVM mapped memory is always readable" 263 ); 264 265 let mut flags = 0; 266 if region.flags & USER_MEMORY_REGION_WRITE == 0 { 267 flags |= KVM_MEM_READONLY; 268 } 269 if region.flags & USER_MEMORY_REGION_LOG_DIRTY != 0 { 270 flags |= KVM_MEM_LOG_DIRTY_PAGES; 271 } 272 273 kvm_userspace_memory_region { 274 slot: region.slot, 275 guest_phys_addr: region.guest_phys_addr, 276 memory_size: region.memory_size, 277 userspace_addr: region.userspace_addr, 278 flags, 279 } 280 } 281 } 282 283 impl From<kvm_mp_state> for MpState { 284 fn from(s: kvm_mp_state) -> Self { 285 MpState::Kvm(s) 286 } 287 } 288 289 impl From<MpState> for kvm_mp_state { 290 fn from(ms: MpState) -> Self { 291 match ms { 292 MpState::Kvm(s) => s, 293 /* Needed in case other hypervisors are enabled */ 294 #[allow(unreachable_patterns)] 295 _ => panic!("CpuState is not valid"), 296 } 297 } 298 } 299 300 impl From<kvm_ioctls::IoEventAddress> for IoEventAddress { 301 fn from(a: kvm_ioctls::IoEventAddress) -> Self { 302 match a { 303 kvm_ioctls::IoEventAddress::Pio(x) => Self::Pio(x), 304 kvm_ioctls::IoEventAddress::Mmio(x) => Self::Mmio(x), 305 } 306 } 307 } 308 309 impl From<IoEventAddress> for kvm_ioctls::IoEventAddress { 310 fn from(a: IoEventAddress) -> Self { 311 match a { 312 IoEventAddress::Pio(x) => Self::Pio(x), 313 IoEventAddress::Mmio(x) => Self::Mmio(x), 314 } 315 } 316 } 317 318 impl From<VcpuKvmState> for CpuState { 319 fn from(s: VcpuKvmState) -> Self { 320 CpuState::Kvm(s) 321 } 322 } 323 324 impl From<CpuState> for VcpuKvmState { 325 fn from(s: CpuState) -> Self { 326 match s { 327 CpuState::Kvm(s) => s, 328 /* Needed in case other hypervisors are enabled */ 329 #[allow(unreachable_patterns)] 330 _ => panic!("CpuState is not valid"), 331 } 332 } 333 } 334 335 #[cfg(target_arch = "x86_64")] 336 impl From<kvm_clock_data> for ClockData { 337 fn from(d: kvm_clock_data) -> Self { 338 ClockData::Kvm(d) 339 } 340 } 341 342 #[cfg(target_arch = "x86_64")] 343 impl From<ClockData> for kvm_clock_data { 344 fn from(ms: ClockData) -> Self { 345 match ms { 346 ClockData::Kvm(s) => s, 347 /* Needed in case other hypervisors are enabled */ 348 #[allow(unreachable_patterns)] 349 _ => panic!("CpuState is not valid"), 350 } 351 } 352 } 353 354 impl From<kvm_bindings::kvm_one_reg> for crate::Register { 355 fn from(s: kvm_bindings::kvm_one_reg) -> Self { 356 crate::Register::Kvm(s) 357 } 358 } 359 360 impl From<crate::Register> for kvm_bindings::kvm_one_reg { 361 fn from(e: crate::Register) -> Self { 362 match e { 363 crate::Register::Kvm(e) => e, 364 /* Needed in case other hypervisors are enabled */ 365 #[allow(unreachable_patterns)] 366 _ => panic!("Register is not valid"), 367 } 368 } 369 } 370 371 #[cfg(target_arch = "aarch64")] 372 impl From<kvm_bindings::kvm_vcpu_init> for crate::VcpuInit { 373 fn from(s: kvm_bindings::kvm_vcpu_init) -> Self { 374 crate::VcpuInit::Kvm(s) 375 } 376 } 377 378 #[cfg(target_arch = "aarch64")] 379 impl From<crate::VcpuInit> for kvm_bindings::kvm_vcpu_init { 380 fn from(e: crate::VcpuInit) -> Self { 381 match e { 382 crate::VcpuInit::Kvm(e) => e, 383 /* Needed in case other hypervisors are enabled */ 384 #[allow(unreachable_patterns)] 385 _ => panic!("VcpuInit is not valid"), 386 } 387 } 388 } 389 390 #[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))] 391 impl From<kvm_bindings::RegList> for crate::RegList { 392 fn from(s: kvm_bindings::RegList) -> Self { 393 crate::RegList::Kvm(s) 394 } 395 } 396 397 #[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))] 398 impl From<crate::RegList> for kvm_bindings::RegList { 399 fn from(e: crate::RegList) -> Self { 400 match e { 401 crate::RegList::Kvm(e) => e, 402 /* Needed in case other hypervisors are enabled */ 403 #[allow(unreachable_patterns)] 404 _ => panic!("RegList is not valid"), 405 } 406 } 407 } 408 409 #[cfg(not(target_arch = "riscv64"))] 410 impl From<kvm_bindings::kvm_regs> for crate::StandardRegisters { 411 fn from(s: kvm_bindings::kvm_regs) -> Self { 412 crate::StandardRegisters::Kvm(s) 413 } 414 } 415 416 #[cfg(not(target_arch = "riscv64"))] 417 impl From<crate::StandardRegisters> for kvm_bindings::kvm_regs { 418 fn from(e: crate::StandardRegisters) -> Self { 419 match e { 420 crate::StandardRegisters::Kvm(e) => e, 421 /* Needed in case other hypervisors are enabled */ 422 #[allow(unreachable_patterns)] 423 _ => panic!("StandardRegisters are not valid"), 424 } 425 } 426 } 427 428 #[cfg(target_arch = "riscv64")] 429 impl From<kvm_bindings::kvm_riscv_core> for crate::StandardRegisters { 430 fn from(s: kvm_bindings::kvm_riscv_core) -> Self { 431 crate::StandardRegisters::Kvm(s) 432 } 433 } 434 435 #[cfg(target_arch = "riscv64")] 436 impl From<crate::StandardRegisters> for kvm_bindings::kvm_riscv_core { 437 fn from(e: crate::StandardRegisters) -> Self { 438 match e { 439 crate::StandardRegisters::Kvm(e) => e, 440 /* Needed in case other hypervisors are enabled */ 441 #[allow(unreachable_patterns)] 442 _ => panic!("StandardRegisters are not valid"), 443 } 444 } 445 } 446 447 impl From<kvm_irq_routing_entry> for IrqRoutingEntry { 448 fn from(s: kvm_irq_routing_entry) -> Self { 449 IrqRoutingEntry::Kvm(s) 450 } 451 } 452 453 impl From<IrqRoutingEntry> for kvm_irq_routing_entry { 454 fn from(e: IrqRoutingEntry) -> Self { 455 match e { 456 IrqRoutingEntry::Kvm(e) => e, 457 /* Needed in case other hypervisors are enabled */ 458 #[allow(unreachable_patterns)] 459 _ => panic!("IrqRoutingEntry is not valid"), 460 } 461 } 462 } 463 464 struct KvmDirtyLogSlot { 465 slot: u32, 466 guest_phys_addr: u64, 467 memory_size: u64, 468 userspace_addr: u64, 469 } 470 471 /// Wrapper over KVM VM ioctls. 472 pub struct KvmVm { 473 fd: Arc<VmFd>, 474 #[cfg(target_arch = "x86_64")] 475 msrs: Vec<MsrEntry>, 476 dirty_log_slots: Arc<RwLock<HashMap<u32, KvmDirtyLogSlot>>>, 477 } 478 479 impl KvmVm { 480 /// 481 /// Creates an emulated device in the kernel. 482 /// 483 /// See the documentation for `KVM_CREATE_DEVICE`. 484 fn create_device(&self, device: &mut CreateDevice) -> vm::Result<vfio_ioctls::VfioDeviceFd> { 485 let device_fd = self 486 .fd 487 .create_device(device) 488 .map_err(|e| vm::HypervisorVmError::CreateDevice(e.into()))?; 489 Ok(VfioDeviceFd::new_from_kvm(device_fd)) 490 } 491 /// Checks if a particular `Cap` is available. 492 pub fn check_extension(&self, c: Cap) -> bool { 493 self.fd.check_extension(c) 494 } 495 } 496 497 /// Implementation of Vm trait for KVM 498 /// 499 /// # Examples 500 /// 501 /// ``` 502 /// # use hypervisor::kvm::KvmHypervisor; 503 /// # use std::sync::Arc; 504 /// let kvm = KvmHypervisor::new().unwrap(); 505 /// let hypervisor = Arc::new(kvm); 506 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed"); 507 /// ``` 508 impl vm::Vm for KvmVm { 509 #[cfg(target_arch = "x86_64")] 510 /// 511 /// Sets the address of the one-page region in the VM's address space. 512 /// 513 fn set_identity_map_address(&self, address: u64) -> vm::Result<()> { 514 self.fd 515 .set_identity_map_address(address) 516 .map_err(|e| vm::HypervisorVmError::SetIdentityMapAddress(e.into())) 517 } 518 519 #[cfg(target_arch = "x86_64")] 520 /// 521 /// Sets the address of the three-page region in the VM's address space. 522 /// 523 fn set_tss_address(&self, offset: usize) -> vm::Result<()> { 524 self.fd 525 .set_tss_address(offset) 526 .map_err(|e| vm::HypervisorVmError::SetTssAddress(e.into())) 527 } 528 529 #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] 530 /// 531 /// Creates an in-kernel interrupt controller. 532 /// 533 fn create_irq_chip(&self) -> vm::Result<()> { 534 self.fd 535 .create_irq_chip() 536 .map_err(|e| vm::HypervisorVmError::CreateIrq(e.into())) 537 } 538 539 /// 540 /// Registers an event that will, when signaled, trigger the `gsi` IRQ. 541 /// 542 fn register_irqfd(&self, fd: &EventFd, gsi: u32) -> vm::Result<()> { 543 self.fd 544 .register_irqfd(fd, gsi) 545 .map_err(|e| vm::HypervisorVmError::RegisterIrqFd(e.into())) 546 } 547 548 /// 549 /// Unregisters an event that will, when signaled, trigger the `gsi` IRQ. 550 /// 551 fn unregister_irqfd(&self, fd: &EventFd, gsi: u32) -> vm::Result<()> { 552 self.fd 553 .unregister_irqfd(fd, gsi) 554 .map_err(|e| vm::HypervisorVmError::UnregisterIrqFd(e.into())) 555 } 556 557 /// 558 /// Creates a VcpuFd object from a vcpu RawFd. 559 /// 560 fn create_vcpu( 561 &self, 562 id: u8, 563 vm_ops: Option<Arc<dyn VmOps>>, 564 ) -> vm::Result<Arc<dyn cpu::Vcpu>> { 565 let fd = self 566 .fd 567 .create_vcpu(id as u64) 568 .map_err(|e| vm::HypervisorVmError::CreateVcpu(e.into()))?; 569 let vcpu = KvmVcpu { 570 fd: Arc::new(Mutex::new(fd)), 571 #[cfg(target_arch = "x86_64")] 572 msrs: self.msrs.clone(), 573 vm_ops, 574 #[cfg(target_arch = "x86_64")] 575 hyperv_synic: AtomicBool::new(false), 576 }; 577 Ok(Arc::new(vcpu)) 578 } 579 580 #[cfg(target_arch = "aarch64")] 581 /// 582 /// Creates a virtual GIC device. 583 /// 584 fn create_vgic(&self, config: VgicConfig) -> vm::Result<Arc<Mutex<dyn Vgic>>> { 585 let gic_device = KvmGicV3Its::new(self, config) 586 .map_err(|e| vm::HypervisorVmError::CreateVgic(anyhow!("Vgic error {:?}", e)))?; 587 Ok(Arc::new(Mutex::new(gic_device))) 588 } 589 590 #[cfg(target_arch = "riscv64")] 591 /// 592 /// Creates a virtual AIA device. 593 /// 594 fn create_vaia(&self, config: VaiaConfig) -> vm::Result<Arc<Mutex<dyn Vaia>>> { 595 let aia_device = KvmAiaImsics::new(self, config) 596 .map_err(|e| vm::HypervisorVmError::CreateVaia(anyhow!("Vaia error {:?}", e)))?; 597 Ok(Arc::new(Mutex::new(aia_device))) 598 } 599 600 /// 601 /// Registers an event to be signaled whenever a certain address is written to. 602 /// 603 fn register_ioevent( 604 &self, 605 fd: &EventFd, 606 addr: &IoEventAddress, 607 datamatch: Option<vm::DataMatch>, 608 ) -> vm::Result<()> { 609 let addr = &kvm_ioctls::IoEventAddress::from(*addr); 610 if let Some(dm) = datamatch { 611 match dm { 612 vm::DataMatch::DataMatch32(kvm_dm32) => self 613 .fd 614 .register_ioevent(fd, addr, kvm_dm32) 615 .map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into())), 616 vm::DataMatch::DataMatch64(kvm_dm64) => self 617 .fd 618 .register_ioevent(fd, addr, kvm_dm64) 619 .map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into())), 620 } 621 } else { 622 self.fd 623 .register_ioevent(fd, addr, NoDatamatch) 624 .map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into())) 625 } 626 } 627 628 /// 629 /// Unregisters an event from a certain address it has been previously registered to. 630 /// 631 fn unregister_ioevent(&self, fd: &EventFd, addr: &IoEventAddress) -> vm::Result<()> { 632 let addr = &kvm_ioctls::IoEventAddress::from(*addr); 633 self.fd 634 .unregister_ioevent(fd, addr, NoDatamatch) 635 .map_err(|e| vm::HypervisorVmError::UnregisterIoEvent(e.into())) 636 } 637 638 /// 639 /// Constructs a routing entry 640 /// 641 fn make_routing_entry(&self, gsi: u32, config: &InterruptSourceConfig) -> IrqRoutingEntry { 642 match &config { 643 InterruptSourceConfig::MsiIrq(cfg) => { 644 let mut kvm_route = kvm_irq_routing_entry { 645 gsi, 646 type_: KVM_IRQ_ROUTING_MSI, 647 ..Default::default() 648 }; 649 650 kvm_route.u.msi.address_lo = cfg.low_addr; 651 kvm_route.u.msi.address_hi = cfg.high_addr; 652 kvm_route.u.msi.data = cfg.data; 653 654 if self.check_extension(crate::kvm::Cap::MsiDevid) { 655 // On AArch64, there is limitation on the range of the 'devid', 656 // it cannot be greater than 65536 (the max of u16). 657 // 658 // BDF cannot be used directly, because 'segment' is in high 659 // 16 bits. The layout of the u32 BDF is: 660 // |---- 16 bits ----|-- 8 bits --|-- 5 bits --|-- 3 bits --| 661 // | segment | bus | device | function | 662 // 663 // Now that we support 1 bus only in a segment, we can build a 664 // 'devid' by replacing the 'bus' bits with the low 8 bits of 665 // 'segment' data. 666 // This way we can resolve the range checking problem and give 667 // different `devid` to all the devices. Limitation is that at 668 // most 256 segments can be supported. 669 // 670 let modified_devid = ((cfg.devid & 0x00ff_0000) >> 8) | cfg.devid & 0xff; 671 672 kvm_route.flags = KVM_MSI_VALID_DEVID; 673 kvm_route.u.msi.__bindgen_anon_1.devid = modified_devid; 674 } 675 kvm_route.into() 676 } 677 InterruptSourceConfig::LegacyIrq(cfg) => { 678 let mut kvm_route = kvm_irq_routing_entry { 679 gsi, 680 type_: KVM_IRQ_ROUTING_IRQCHIP, 681 ..Default::default() 682 }; 683 kvm_route.u.irqchip.irqchip = cfg.irqchip; 684 kvm_route.u.irqchip.pin = cfg.pin; 685 686 kvm_route.into() 687 } 688 } 689 } 690 691 /// 692 /// Sets the GSI routing table entries, overwriting any previously set 693 /// entries, as per the `KVM_SET_GSI_ROUTING` ioctl. 694 /// 695 fn set_gsi_routing(&self, entries: &[IrqRoutingEntry]) -> vm::Result<()> { 696 let mut irq_routing = 697 vec_with_array_field::<kvm_irq_routing, kvm_irq_routing_entry>(entries.len()); 698 irq_routing[0].nr = entries.len() as u32; 699 irq_routing[0].flags = 0; 700 let entries: Vec<kvm_irq_routing_entry> = entries 701 .iter() 702 .map(|entry| match entry { 703 IrqRoutingEntry::Kvm(e) => *e, 704 #[allow(unreachable_patterns)] 705 _ => panic!("IrqRoutingEntry type is wrong"), 706 }) 707 .collect(); 708 709 // SAFETY: irq_routing initialized with entries.len() and now it is being turned into 710 // entries_slice with entries.len() again. It is guaranteed to be large enough to hold 711 // everything from entries. 712 unsafe { 713 let entries_slice: &mut [kvm_irq_routing_entry] = 714 irq_routing[0].entries.as_mut_slice(entries.len()); 715 entries_slice.copy_from_slice(&entries); 716 } 717 718 self.fd 719 .set_gsi_routing(&irq_routing[0]) 720 .map_err(|e| vm::HypervisorVmError::SetGsiRouting(e.into())) 721 } 722 723 /// 724 /// Creates a memory region structure that can be used with {create/remove}_user_memory_region 725 /// 726 fn make_user_memory_region( 727 &self, 728 slot: u32, 729 guest_phys_addr: u64, 730 memory_size: u64, 731 userspace_addr: u64, 732 readonly: bool, 733 log_dirty_pages: bool, 734 ) -> UserMemoryRegion { 735 kvm_userspace_memory_region { 736 slot, 737 guest_phys_addr, 738 memory_size, 739 userspace_addr, 740 flags: if readonly { KVM_MEM_READONLY } else { 0 } 741 | if log_dirty_pages { 742 KVM_MEM_LOG_DIRTY_PAGES 743 } else { 744 0 745 }, 746 } 747 .into() 748 } 749 750 /// 751 /// Creates a guest physical memory region. 752 /// 753 fn create_user_memory_region(&self, user_memory_region: UserMemoryRegion) -> vm::Result<()> { 754 let mut region: kvm_userspace_memory_region = user_memory_region.into(); 755 756 if (region.flags & KVM_MEM_LOG_DIRTY_PAGES) != 0 { 757 if (region.flags & KVM_MEM_READONLY) != 0 { 758 return Err(vm::HypervisorVmError::CreateUserMemory(anyhow!( 759 "Error creating regions with both 'dirty-pages-log' and 'read-only'." 760 ))); 761 } 762 763 // Keep track of the regions that need dirty pages log 764 self.dirty_log_slots.write().unwrap().insert( 765 region.slot, 766 KvmDirtyLogSlot { 767 slot: region.slot, 768 guest_phys_addr: region.guest_phys_addr, 769 memory_size: region.memory_size, 770 userspace_addr: region.userspace_addr, 771 }, 772 ); 773 774 // Always create guest physical memory region without `KVM_MEM_LOG_DIRTY_PAGES`. 775 // For regions that need this flag, dirty pages log will be turned on in `start_dirty_log`. 776 region.flags = 0; 777 } 778 779 // SAFETY: Safe because guest regions are guaranteed not to overlap. 780 unsafe { 781 self.fd 782 .set_user_memory_region(region) 783 .map_err(|e| vm::HypervisorVmError::CreateUserMemory(e.into())) 784 } 785 } 786 787 /// 788 /// Removes a guest physical memory region. 789 /// 790 fn remove_user_memory_region(&self, user_memory_region: UserMemoryRegion) -> vm::Result<()> { 791 let mut region: kvm_userspace_memory_region = user_memory_region.into(); 792 793 // Remove the corresponding entry from "self.dirty_log_slots" if needed 794 self.dirty_log_slots.write().unwrap().remove(®ion.slot); 795 796 // Setting the size to 0 means "remove" 797 region.memory_size = 0; 798 // SAFETY: Safe because guest regions are guaranteed not to overlap. 799 unsafe { 800 self.fd 801 .set_user_memory_region(region) 802 .map_err(|e| vm::HypervisorVmError::RemoveUserMemory(e.into())) 803 } 804 } 805 806 /// 807 /// Returns the preferred CPU target type which can be emulated by KVM on underlying host. 808 /// 809 #[cfg(target_arch = "aarch64")] 810 fn get_preferred_target(&self, kvi: &mut crate::VcpuInit) -> vm::Result<()> { 811 let mut kvm_kvi: kvm_bindings::kvm_vcpu_init = (*kvi).into(); 812 self.fd 813 .get_preferred_target(&mut kvm_kvi) 814 .map_err(|e| vm::HypervisorVmError::GetPreferredTarget(e.into()))?; 815 *kvi = kvm_kvi.into(); 816 Ok(()) 817 } 818 819 #[cfg(target_arch = "x86_64")] 820 fn enable_split_irq(&self) -> vm::Result<()> { 821 // Create split irqchip 822 // Only the local APIC is emulated in kernel, both PICs and IOAPIC 823 // are not. 824 let mut cap = kvm_enable_cap { 825 cap: KVM_CAP_SPLIT_IRQCHIP, 826 ..Default::default() 827 }; 828 cap.args[0] = NUM_IOAPIC_PINS as u64; 829 self.fd 830 .enable_cap(&cap) 831 .map_err(|e| vm::HypervisorVmError::EnableSplitIrq(e.into()))?; 832 Ok(()) 833 } 834 835 #[cfg(target_arch = "x86_64")] 836 fn enable_sgx_attribute(&self, file: File) -> vm::Result<()> { 837 let mut cap = kvm_enable_cap { 838 cap: KVM_CAP_SGX_ATTRIBUTE, 839 ..Default::default() 840 }; 841 cap.args[0] = file.as_raw_fd() as u64; 842 self.fd 843 .enable_cap(&cap) 844 .map_err(|e| vm::HypervisorVmError::EnableSgxAttribute(e.into()))?; 845 Ok(()) 846 } 847 848 /// Retrieve guest clock. 849 #[cfg(target_arch = "x86_64")] 850 fn get_clock(&self) -> vm::Result<ClockData> { 851 Ok(self 852 .fd 853 .get_clock() 854 .map_err(|e| vm::HypervisorVmError::GetClock(e.into()))? 855 .into()) 856 } 857 858 /// Set guest clock. 859 #[cfg(target_arch = "x86_64")] 860 fn set_clock(&self, data: &ClockData) -> vm::Result<()> { 861 let data = (*data).into(); 862 self.fd 863 .set_clock(&data) 864 .map_err(|e| vm::HypervisorVmError::SetClock(e.into())) 865 } 866 867 /// Create a device that is used for passthrough 868 fn create_passthrough_device(&self) -> vm::Result<VfioDeviceFd> { 869 let mut vfio_dev = kvm_create_device { 870 type_: kvm_device_type_KVM_DEV_TYPE_VFIO, 871 fd: 0, 872 flags: 0, 873 }; 874 875 self.create_device(&mut vfio_dev) 876 .map_err(|e| vm::HypervisorVmError::CreatePassthroughDevice(e.into())) 877 } 878 879 /// 880 /// Start logging dirty pages 881 /// 882 fn start_dirty_log(&self) -> vm::Result<()> { 883 let dirty_log_slots = self.dirty_log_slots.read().unwrap(); 884 for (_, s) in dirty_log_slots.iter() { 885 let region = kvm_userspace_memory_region { 886 slot: s.slot, 887 guest_phys_addr: s.guest_phys_addr, 888 memory_size: s.memory_size, 889 userspace_addr: s.userspace_addr, 890 flags: KVM_MEM_LOG_DIRTY_PAGES, 891 }; 892 // SAFETY: Safe because guest regions are guaranteed not to overlap. 893 unsafe { 894 self.fd 895 .set_user_memory_region(region) 896 .map_err(|e| vm::HypervisorVmError::StartDirtyLog(e.into()))?; 897 } 898 } 899 900 Ok(()) 901 } 902 903 /// 904 /// Stop logging dirty pages 905 /// 906 fn stop_dirty_log(&self) -> vm::Result<()> { 907 let dirty_log_slots = self.dirty_log_slots.read().unwrap(); 908 for (_, s) in dirty_log_slots.iter() { 909 let region = kvm_userspace_memory_region { 910 slot: s.slot, 911 guest_phys_addr: s.guest_phys_addr, 912 memory_size: s.memory_size, 913 userspace_addr: s.userspace_addr, 914 flags: 0, 915 }; 916 // SAFETY: Safe because guest regions are guaranteed not to overlap. 917 unsafe { 918 self.fd 919 .set_user_memory_region(region) 920 .map_err(|e| vm::HypervisorVmError::StartDirtyLog(e.into()))?; 921 } 922 } 923 924 Ok(()) 925 } 926 927 /// 928 /// Get dirty pages bitmap (one bit per page) 929 /// 930 fn get_dirty_log(&self, slot: u32, _base_gpa: u64, memory_size: u64) -> vm::Result<Vec<u64>> { 931 self.fd 932 .get_dirty_log(slot, memory_size as usize) 933 .map_err(|e| vm::HypervisorVmError::GetDirtyLog(e.into())) 934 } 935 936 /// 937 /// Initialize TDX for this VM 938 /// 939 #[cfg(feature = "tdx")] 940 fn tdx_init(&self, cpuid: &[CpuIdEntry], max_vcpus: u32) -> vm::Result<()> { 941 const TDX_ATTR_SEPT_VE_DISABLE: usize = 28; 942 943 let mut cpuid: Vec<kvm_bindings::kvm_cpuid_entry2> = 944 cpuid.iter().map(|e| (*e).into()).collect(); 945 cpuid.resize(256, kvm_bindings::kvm_cpuid_entry2::default()); 946 947 #[repr(C)] 948 struct TdxInitVm { 949 attributes: u64, 950 max_vcpus: u32, 951 padding: u32, 952 mrconfigid: [u64; 6], 953 mrowner: [u64; 6], 954 mrownerconfig: [u64; 6], 955 cpuid_nent: u32, 956 cpuid_padding: u32, 957 cpuid_entries: [kvm_bindings::kvm_cpuid_entry2; 256], 958 } 959 let data = TdxInitVm { 960 attributes: 1 << TDX_ATTR_SEPT_VE_DISABLE, 961 max_vcpus, 962 padding: 0, 963 mrconfigid: [0; 6], 964 mrowner: [0; 6], 965 mrownerconfig: [0; 6], 966 cpuid_nent: cpuid.len() as u32, 967 cpuid_padding: 0, 968 cpuid_entries: cpuid.as_slice().try_into().unwrap(), 969 }; 970 971 tdx_command( 972 &self.fd.as_raw_fd(), 973 TdxCommand::InitVm, 974 0, 975 &data as *const _ as u64, 976 ) 977 .map_err(vm::HypervisorVmError::InitializeTdx) 978 } 979 980 /// 981 /// Finalize the TDX setup for this VM 982 /// 983 #[cfg(feature = "tdx")] 984 fn tdx_finalize(&self) -> vm::Result<()> { 985 tdx_command(&self.fd.as_raw_fd(), TdxCommand::Finalize, 0, 0) 986 .map_err(vm::HypervisorVmError::FinalizeTdx) 987 } 988 989 /// 990 /// Initialize memory regions for the TDX VM 991 /// 992 #[cfg(feature = "tdx")] 993 fn tdx_init_memory_region( 994 &self, 995 host_address: u64, 996 guest_address: u64, 997 size: u64, 998 measure: bool, 999 ) -> vm::Result<()> { 1000 #[repr(C)] 1001 struct TdxInitMemRegion { 1002 host_address: u64, 1003 guest_address: u64, 1004 pages: u64, 1005 } 1006 let data = TdxInitMemRegion { 1007 host_address, 1008 guest_address, 1009 pages: size / 4096, 1010 }; 1011 1012 tdx_command( 1013 &self.fd.as_raw_fd(), 1014 TdxCommand::InitMemRegion, 1015 u32::from(measure), 1016 &data as *const _ as u64, 1017 ) 1018 .map_err(vm::HypervisorVmError::InitMemRegionTdx) 1019 } 1020 1021 /// Downcast to the underlying KvmVm type 1022 fn as_any(&self) -> &dyn Any { 1023 self 1024 } 1025 } 1026 1027 #[cfg(feature = "tdx")] 1028 fn tdx_command( 1029 fd: &RawFd, 1030 command: TdxCommand, 1031 flags: u32, 1032 data: u64, 1033 ) -> std::result::Result<(), std::io::Error> { 1034 #[repr(C)] 1035 struct TdxIoctlCmd { 1036 command: TdxCommand, 1037 flags: u32, 1038 data: u64, 1039 error: u64, 1040 unused: u64, 1041 } 1042 let cmd = TdxIoctlCmd { 1043 command, 1044 flags, 1045 data, 1046 error: 0, 1047 unused: 0, 1048 }; 1049 // SAFETY: FFI call. All input parameters are valid. 1050 let ret = unsafe { 1051 ioctl_with_val( 1052 fd, 1053 KVM_MEMORY_ENCRYPT_OP(), 1054 &cmd as *const TdxIoctlCmd as std::os::raw::c_ulong, 1055 ) 1056 }; 1057 1058 if ret < 0 { 1059 return Err(std::io::Error::last_os_error()); 1060 } 1061 Ok(()) 1062 } 1063 1064 /// Wrapper over KVM system ioctls. 1065 pub struct KvmHypervisor { 1066 kvm: Kvm, 1067 } 1068 1069 impl KvmHypervisor { 1070 #[cfg(target_arch = "x86_64")] 1071 /// 1072 /// Retrieve the list of MSRs supported by the hypervisor. 1073 /// 1074 fn get_msr_list(&self) -> hypervisor::Result<MsrList> { 1075 self.kvm 1076 .get_msr_index_list() 1077 .map_err(|e| hypervisor::HypervisorError::GetMsrList(e.into())) 1078 } 1079 } 1080 1081 /// Enum for KVM related error 1082 #[derive(Debug, Error)] 1083 pub enum KvmError { 1084 #[error("Capability missing: {0:?}")] 1085 CapabilityMissing(Cap), 1086 } 1087 1088 pub type KvmResult<T> = result::Result<T, KvmError>; 1089 1090 impl KvmHypervisor { 1091 /// Create a hypervisor based on Kvm 1092 #[allow(clippy::new_ret_no_self)] 1093 pub fn new() -> hypervisor::Result<Arc<dyn hypervisor::Hypervisor>> { 1094 let kvm_obj = Kvm::new().map_err(|e| hypervisor::HypervisorError::VmCreate(e.into()))?; 1095 let api_version = kvm_obj.get_api_version(); 1096 1097 if api_version != kvm_bindings::KVM_API_VERSION as i32 { 1098 return Err(hypervisor::HypervisorError::IncompatibleApiVersion); 1099 } 1100 1101 Ok(Arc::new(KvmHypervisor { kvm: kvm_obj })) 1102 } 1103 1104 /// Check if the hypervisor is available 1105 pub fn is_available() -> hypervisor::Result<bool> { 1106 match std::fs::metadata("/dev/kvm") { 1107 Ok(_) => Ok(true), 1108 Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(false), 1109 Err(err) => Err(hypervisor::HypervisorError::HypervisorAvailableCheck( 1110 err.into(), 1111 )), 1112 } 1113 } 1114 } 1115 1116 /// Implementation of Hypervisor trait for KVM 1117 /// 1118 /// # Examples 1119 /// 1120 /// ``` 1121 /// # use hypervisor::kvm::KvmHypervisor; 1122 /// # use std::sync::Arc; 1123 /// let kvm = KvmHypervisor::new().unwrap(); 1124 /// let hypervisor = Arc::new(kvm); 1125 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed"); 1126 /// ``` 1127 impl hypervisor::Hypervisor for KvmHypervisor { 1128 /// 1129 /// Returns the type of the hypervisor 1130 /// 1131 fn hypervisor_type(&self) -> HypervisorType { 1132 HypervisorType::Kvm 1133 } 1134 1135 /// 1136 /// Create a Vm of a specific type using the underlying hypervisor, passing memory size 1137 /// Return a hypervisor-agnostic Vm trait object 1138 /// 1139 /// # Examples 1140 /// 1141 /// ``` 1142 /// # use hypervisor::kvm::KvmHypervisor; 1143 /// use hypervisor::kvm::KvmVm; 1144 /// let hypervisor = KvmHypervisor::new().unwrap(); 1145 /// let vm = hypervisor.create_vm_with_type_and_memory(0).unwrap(); 1146 /// ``` 1147 fn create_vm_with_type_and_memory( 1148 &self, 1149 vm_type: u64, 1150 #[cfg(feature = "sev_snp")] _mem_size: u64, 1151 ) -> hypervisor::Result<Arc<dyn vm::Vm>> { 1152 self.create_vm_with_type(vm_type) 1153 } 1154 1155 /// Create a KVM vm object of a specific VM type and return the object as Vm trait object 1156 /// 1157 /// # Examples 1158 /// 1159 /// ``` 1160 /// # use hypervisor::kvm::KvmHypervisor; 1161 /// use hypervisor::kvm::KvmVm; 1162 /// let hypervisor = KvmHypervisor::new().unwrap(); 1163 /// let vm = hypervisor.create_vm_with_type(0).unwrap(); 1164 /// ``` 1165 fn create_vm_with_type(&self, vm_type: u64) -> hypervisor::Result<Arc<dyn vm::Vm>> { 1166 let fd: VmFd; 1167 loop { 1168 match self.kvm.create_vm_with_type(vm_type) { 1169 Ok(res) => fd = res, 1170 Err(e) => { 1171 if e.errno() == libc::EINTR { 1172 // If the error returned is EINTR, which means the 1173 // ioctl has been interrupted, we have to retry as 1174 // this can't be considered as a regular error. 1175 continue; 1176 } else { 1177 return Err(hypervisor::HypervisorError::VmCreate(e.into())); 1178 } 1179 } 1180 } 1181 break; 1182 } 1183 1184 let vm_fd = Arc::new(fd); 1185 1186 #[cfg(target_arch = "x86_64")] 1187 { 1188 let msr_list = self.get_msr_list()?; 1189 let num_msrs = msr_list.as_fam_struct_ref().nmsrs as usize; 1190 let mut msrs: Vec<MsrEntry> = vec![ 1191 MsrEntry { 1192 ..Default::default() 1193 }; 1194 num_msrs 1195 ]; 1196 let indices = msr_list.as_slice(); 1197 for (pos, index) in indices.iter().enumerate() { 1198 msrs[pos].index = *index; 1199 } 1200 1201 Ok(Arc::new(KvmVm { 1202 fd: vm_fd, 1203 msrs, 1204 dirty_log_slots: Arc::new(RwLock::new(HashMap::new())), 1205 })) 1206 } 1207 1208 #[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))] 1209 { 1210 Ok(Arc::new(KvmVm { 1211 fd: vm_fd, 1212 dirty_log_slots: Arc::new(RwLock::new(HashMap::new())), 1213 })) 1214 } 1215 } 1216 1217 /// Create a KVM vm object and return the object as Vm trait object 1218 /// 1219 /// # Examples 1220 /// 1221 /// ``` 1222 /// # use hypervisor::kvm::KvmHypervisor; 1223 /// use hypervisor::kvm::KvmVm; 1224 /// let hypervisor = KvmHypervisor::new().unwrap(); 1225 /// let vm = hypervisor.create_vm().unwrap(); 1226 /// ``` 1227 fn create_vm(&self) -> hypervisor::Result<Arc<dyn vm::Vm>> { 1228 #[allow(unused_mut)] 1229 let mut vm_type: u64 = 0; // Create with default platform type 1230 1231 // When KVM supports Cap::ArmVmIPASize, it is better to get the IPA 1232 // size from the host and use that when creating the VM, which may 1233 // avoid unnecessary VM creation failures. 1234 #[cfg(target_arch = "aarch64")] 1235 if self.kvm.check_extension(Cap::ArmVmIPASize) { 1236 vm_type = self.kvm.get_host_ipa_limit().try_into().unwrap(); 1237 } 1238 1239 self.create_vm_with_type(vm_type) 1240 } 1241 1242 fn check_required_extensions(&self) -> hypervisor::Result<()> { 1243 check_required_kvm_extensions(&self.kvm) 1244 .map_err(|e| hypervisor::HypervisorError::CheckExtensions(e.into())) 1245 } 1246 1247 #[cfg(target_arch = "x86_64")] 1248 /// 1249 /// X86 specific call to get the system supported CPUID values. 1250 /// 1251 fn get_supported_cpuid(&self) -> hypervisor::Result<Vec<CpuIdEntry>> { 1252 let kvm_cpuid = self 1253 .kvm 1254 .get_supported_cpuid(kvm_bindings::KVM_MAX_CPUID_ENTRIES) 1255 .map_err(|e| hypervisor::HypervisorError::GetCpuId(e.into()))?; 1256 1257 let v = kvm_cpuid.as_slice().iter().map(|e| (*e).into()).collect(); 1258 1259 Ok(v) 1260 } 1261 1262 #[cfg(target_arch = "aarch64")] 1263 /// 1264 /// Retrieve AArch64 host maximum IPA size supported by KVM. 1265 /// 1266 fn get_host_ipa_limit(&self) -> i32 { 1267 self.kvm.get_host_ipa_limit() 1268 } 1269 1270 /// 1271 /// Retrieve TDX capabilities 1272 /// 1273 #[cfg(feature = "tdx")] 1274 fn tdx_capabilities(&self) -> hypervisor::Result<TdxCapabilities> { 1275 let data = TdxCapabilities { 1276 nr_cpuid_configs: TDX_MAX_NR_CPUID_CONFIGS as u32, 1277 ..Default::default() 1278 }; 1279 1280 tdx_command( 1281 &self.kvm.as_raw_fd(), 1282 TdxCommand::Capabilities, 1283 0, 1284 &data as *const _ as u64, 1285 ) 1286 .map_err(|e| hypervisor::HypervisorError::TdxCapabilities(e.into()))?; 1287 1288 Ok(data) 1289 } 1290 1291 #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] 1292 /// 1293 /// Get the number of supported hardware breakpoints 1294 /// 1295 fn get_guest_debug_hw_bps(&self) -> usize { 1296 #[cfg(target_arch = "x86_64")] 1297 { 1298 4 1299 } 1300 #[cfg(target_arch = "aarch64")] 1301 { 1302 self.kvm.get_guest_debug_hw_bps() as usize 1303 } 1304 } 1305 1306 /// Get maximum number of vCPUs 1307 fn get_max_vcpus(&self) -> u32 { 1308 self.kvm.get_max_vcpus().min(u32::MAX as usize) as u32 1309 } 1310 } 1311 1312 /// Vcpu struct for KVM 1313 pub struct KvmVcpu { 1314 fd: Arc<Mutex<VcpuFd>>, 1315 #[cfg(target_arch = "x86_64")] 1316 msrs: Vec<MsrEntry>, 1317 vm_ops: Option<Arc<dyn vm::VmOps>>, 1318 #[cfg(target_arch = "x86_64")] 1319 hyperv_synic: AtomicBool, 1320 } 1321 1322 /// Implementation of Vcpu trait for KVM 1323 /// 1324 /// # Examples 1325 /// 1326 /// ``` 1327 /// # use hypervisor::kvm::KvmHypervisor; 1328 /// # use std::sync::Arc; 1329 /// let kvm = KvmHypervisor::new().unwrap(); 1330 /// let hypervisor = Arc::new(kvm); 1331 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed"); 1332 /// let vcpu = vm.create_vcpu(0, None).unwrap(); 1333 /// ``` 1334 impl cpu::Vcpu for KvmVcpu { 1335 /// 1336 /// Returns StandardRegisters with default value set 1337 /// 1338 fn create_standard_regs(&self) -> StandardRegisters { 1339 #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] 1340 { 1341 kvm_bindings::kvm_regs::default().into() 1342 } 1343 #[cfg(target_arch = "riscv64")] 1344 { 1345 kvm_bindings::kvm_riscv_core::default().into() 1346 } 1347 } 1348 #[cfg(target_arch = "x86_64")] 1349 /// 1350 /// Returns the vCPU general purpose registers. 1351 /// 1352 fn get_regs(&self) -> cpu::Result<StandardRegisters> { 1353 Ok(self 1354 .fd 1355 .lock() 1356 .unwrap() 1357 .get_regs() 1358 .map_err(|e| cpu::HypervisorCpuError::GetStandardRegs(e.into()))? 1359 .into()) 1360 } 1361 1362 /// 1363 /// Returns the vCPU general purpose registers. 1364 /// The `KVM_GET_REGS` ioctl is not available on AArch64, `KVM_GET_ONE_REG` 1365 /// is used to get registers one by one. 1366 /// 1367 #[cfg(target_arch = "aarch64")] 1368 fn get_regs(&self) -> cpu::Result<StandardRegisters> { 1369 let mut state = kvm_regs::default(); 1370 let mut off = offset_of!(user_pt_regs, regs); 1371 // There are 31 user_pt_regs: 1372 // https://elixir.free-electrons.com/linux/v4.14.174/source/arch/arm64/include/uapi/asm/ptrace.h#L72 1373 // These actually are the general-purpose registers of the Armv8-a 1374 // architecture (i.e x0-x30 if used as a 64bit register or w0-30 when used as a 32bit register). 1375 for i in 0..31 { 1376 let mut bytes = [0_u8; 8]; 1377 self.fd 1378 .lock() 1379 .unwrap() 1380 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), &mut bytes) 1381 .map_err(|e| cpu::HypervisorCpuError::GetAarchCoreRegister(e.into()))?; 1382 state.regs.regs[i] = u64::from_le_bytes(bytes); 1383 off += std::mem::size_of::<u64>(); 1384 } 1385 1386 // We are now entering the "Other register" section of the ARMv8-a architecture. 1387 // First one, stack pointer. 1388 let off = offset_of!(user_pt_regs, sp); 1389 let mut bytes = [0_u8; 8]; 1390 self.fd 1391 .lock() 1392 .unwrap() 1393 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), &mut bytes) 1394 .map_err(|e| cpu::HypervisorCpuError::GetAarchCoreRegister(e.into()))?; 1395 state.regs.sp = u64::from_le_bytes(bytes); 1396 1397 // Second one, the program counter. 1398 let off = offset_of!(user_pt_regs, pc); 1399 let mut bytes = [0_u8; 8]; 1400 self.fd 1401 .lock() 1402 .unwrap() 1403 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), &mut bytes) 1404 .map_err(|e| cpu::HypervisorCpuError::GetAarchCoreRegister(e.into()))?; 1405 state.regs.pc = u64::from_le_bytes(bytes); 1406 1407 // Next is the processor state. 1408 let off = offset_of!(user_pt_regs, pstate); 1409 let mut bytes = [0_u8; 8]; 1410 self.fd 1411 .lock() 1412 .unwrap() 1413 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), &mut bytes) 1414 .map_err(|e| cpu::HypervisorCpuError::GetAarchCoreRegister(e.into()))?; 1415 state.regs.pstate = u64::from_le_bytes(bytes); 1416 1417 // The stack pointer associated with EL1 1418 let off = offset_of!(kvm_regs, sp_el1); 1419 let mut bytes = [0_u8; 8]; 1420 self.fd 1421 .lock() 1422 .unwrap() 1423 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), &mut bytes) 1424 .map_err(|e| cpu::HypervisorCpuError::GetAarchCoreRegister(e.into()))?; 1425 state.sp_el1 = u64::from_le_bytes(bytes); 1426 1427 // Exception Link Register for EL1, when taking an exception to EL1, this register 1428 // holds the address to which to return afterwards. 1429 let off = offset_of!(kvm_regs, elr_el1); 1430 let mut bytes = [0_u8; 8]; 1431 self.fd 1432 .lock() 1433 .unwrap() 1434 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), &mut bytes) 1435 .map_err(|e| cpu::HypervisorCpuError::GetAarchCoreRegister(e.into()))?; 1436 state.elr_el1 = u64::from_le_bytes(bytes); 1437 1438 // Saved Program Status Registers, there are 5 of them used in the kernel. 1439 let mut off = offset_of!(kvm_regs, spsr); 1440 for i in 0..KVM_NR_SPSR as usize { 1441 let mut bytes = [0_u8; 8]; 1442 self.fd 1443 .lock() 1444 .unwrap() 1445 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), &mut bytes) 1446 .map_err(|e| cpu::HypervisorCpuError::GetAarchCoreRegister(e.into()))?; 1447 state.spsr[i] = u64::from_le_bytes(bytes); 1448 off += std::mem::size_of::<u64>(); 1449 } 1450 1451 // Now moving on to floating point registers which are stored in the user_fpsimd_state in the kernel: 1452 // https://elixir.free-electrons.com/linux/v4.9.62/source/arch/arm64/include/uapi/asm/kvm.h#L53 1453 let mut off = offset_of!(kvm_regs, fp_regs) + offset_of!(user_fpsimd_state, vregs); 1454 for i in 0..32 { 1455 let mut bytes = [0_u8; 16]; 1456 self.fd 1457 .lock() 1458 .unwrap() 1459 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U128, off), &mut bytes) 1460 .map_err(|e| cpu::HypervisorCpuError::GetAarchCoreRegister(e.into()))?; 1461 state.fp_regs.vregs[i] = u128::from_le_bytes(bytes); 1462 off += mem::size_of::<u128>(); 1463 } 1464 1465 // Floating-point Status Register 1466 let off = offset_of!(kvm_regs, fp_regs) + offset_of!(user_fpsimd_state, fpsr); 1467 let mut bytes = [0_u8; 4]; 1468 self.fd 1469 .lock() 1470 .unwrap() 1471 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U32, off), &mut bytes) 1472 .map_err(|e| cpu::HypervisorCpuError::GetAarchCoreRegister(e.into()))?; 1473 state.fp_regs.fpsr = u32::from_le_bytes(bytes); 1474 1475 // Floating-point Control Register 1476 let off = offset_of!(kvm_regs, fp_regs) + offset_of!(user_fpsimd_state, fpcr); 1477 let mut bytes = [0_u8; 4]; 1478 self.fd 1479 .lock() 1480 .unwrap() 1481 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U32, off), &mut bytes) 1482 .map_err(|e| cpu::HypervisorCpuError::GetAarchCoreRegister(e.into()))?; 1483 state.fp_regs.fpcr = u32::from_le_bytes(bytes); 1484 Ok(state.into()) 1485 } 1486 1487 #[cfg(target_arch = "riscv64")] 1488 /// 1489 /// Returns the RISC-V vCPU core registers. 1490 /// The `KVM_GET_REGS` ioctl is not available on RISC-V 64-bit, 1491 /// `KVM_GET_ONE_REG` is used to get registers one by one. 1492 /// 1493 fn get_regs(&self) -> cpu::Result<StandardRegisters> { 1494 let mut state = kvm_riscv_core::default(); 1495 1496 /// Macro used to extract RISC-V register data from KVM Vcpu according 1497 /// to `$reg_name` provided to `state`. 1498 macro_rules! riscv64_get_one_reg_from_vcpu { 1499 (mode) => { 1500 let off = offset_of!(kvm_riscv_core, mode); 1501 let mut bytes = [0_u8; 8]; 1502 self.fd 1503 .lock() 1504 .unwrap() 1505 .get_one_reg(riscv64_reg_id!(KVM_REG_RISCV_CORE, off), &mut bytes) 1506 .map_err(|e| cpu::HypervisorCpuError::GetRiscvCoreRegister(e.into()))?; 1507 state.mode = u64::from_le_bytes(bytes); 1508 }; 1509 ($reg_name:ident) => { 1510 let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, $reg_name); 1511 let mut bytes = [0_u8; 8]; 1512 self.fd 1513 .lock() 1514 .unwrap() 1515 .get_one_reg(riscv64_reg_id!(KVM_REG_RISCV_CORE, off), &mut bytes) 1516 .map_err(|e| cpu::HypervisorCpuError::GetRiscvCoreRegister(e.into()))?; 1517 state.regs.$reg_name = u64::from_le_bytes(bytes); 1518 }; 1519 } 1520 1521 riscv64_get_one_reg_from_vcpu!(pc); 1522 riscv64_get_one_reg_from_vcpu!(ra); 1523 riscv64_get_one_reg_from_vcpu!(sp); 1524 riscv64_get_one_reg_from_vcpu!(gp); 1525 riscv64_get_one_reg_from_vcpu!(tp); 1526 riscv64_get_one_reg_from_vcpu!(t0); 1527 riscv64_get_one_reg_from_vcpu!(t1); 1528 riscv64_get_one_reg_from_vcpu!(t2); 1529 riscv64_get_one_reg_from_vcpu!(s0); 1530 riscv64_get_one_reg_from_vcpu!(s1); 1531 riscv64_get_one_reg_from_vcpu!(a0); 1532 riscv64_get_one_reg_from_vcpu!(a1); 1533 riscv64_get_one_reg_from_vcpu!(a2); 1534 riscv64_get_one_reg_from_vcpu!(a3); 1535 riscv64_get_one_reg_from_vcpu!(a4); 1536 riscv64_get_one_reg_from_vcpu!(a5); 1537 riscv64_get_one_reg_from_vcpu!(a6); 1538 riscv64_get_one_reg_from_vcpu!(a7); 1539 riscv64_get_one_reg_from_vcpu!(s2); 1540 riscv64_get_one_reg_from_vcpu!(s3); 1541 riscv64_get_one_reg_from_vcpu!(s4); 1542 riscv64_get_one_reg_from_vcpu!(s5); 1543 riscv64_get_one_reg_from_vcpu!(s6); 1544 riscv64_get_one_reg_from_vcpu!(s7); 1545 riscv64_get_one_reg_from_vcpu!(s8); 1546 riscv64_get_one_reg_from_vcpu!(s9); 1547 riscv64_get_one_reg_from_vcpu!(s10); 1548 riscv64_get_one_reg_from_vcpu!(s11); 1549 riscv64_get_one_reg_from_vcpu!(t3); 1550 riscv64_get_one_reg_from_vcpu!(t4); 1551 riscv64_get_one_reg_from_vcpu!(t5); 1552 riscv64_get_one_reg_from_vcpu!(t6); 1553 riscv64_get_one_reg_from_vcpu!(mode); 1554 1555 Ok(state.into()) 1556 } 1557 1558 #[cfg(target_arch = "x86_64")] 1559 /// 1560 /// Sets the vCPU general purpose registers using the `KVM_SET_REGS` ioctl. 1561 /// 1562 fn set_regs(&self, regs: &StandardRegisters) -> cpu::Result<()> { 1563 let regs = (*regs).into(); 1564 self.fd 1565 .lock() 1566 .unwrap() 1567 .set_regs(®s) 1568 .map_err(|e| cpu::HypervisorCpuError::SetStandardRegs(e.into())) 1569 } 1570 1571 /// 1572 /// Sets the vCPU general purpose registers. 1573 /// The `KVM_SET_REGS` ioctl is not available on AArch64, `KVM_SET_ONE_REG` 1574 /// is used to set registers one by one. 1575 /// 1576 #[cfg(target_arch = "aarch64")] 1577 fn set_regs(&self, state: &StandardRegisters) -> cpu::Result<()> { 1578 // The function follows the exact identical order from `state`. Look there 1579 // for some additional info on registers. 1580 let kvm_regs_state: kvm_regs = (*state).into(); 1581 let mut off = offset_of!(user_pt_regs, regs); 1582 for i in 0..31 { 1583 self.fd 1584 .lock() 1585 .unwrap() 1586 .set_one_reg( 1587 arm64_core_reg_id!(KVM_REG_SIZE_U64, off), 1588 &kvm_regs_state.regs.regs[i].to_le_bytes(), 1589 ) 1590 .map_err(|e| cpu::HypervisorCpuError::SetAarchCoreRegister(e.into()))?; 1591 off += std::mem::size_of::<u64>(); 1592 } 1593 1594 let off = offset_of!(user_pt_regs, sp); 1595 self.fd 1596 .lock() 1597 .unwrap() 1598 .set_one_reg( 1599 arm64_core_reg_id!(KVM_REG_SIZE_U64, off), 1600 &kvm_regs_state.regs.sp.to_le_bytes(), 1601 ) 1602 .map_err(|e| cpu::HypervisorCpuError::SetAarchCoreRegister(e.into()))?; 1603 1604 let off = offset_of!(user_pt_regs, pc); 1605 self.fd 1606 .lock() 1607 .unwrap() 1608 .set_one_reg( 1609 arm64_core_reg_id!(KVM_REG_SIZE_U64, off), 1610 &kvm_regs_state.regs.pc.to_le_bytes(), 1611 ) 1612 .map_err(|e| cpu::HypervisorCpuError::SetAarchCoreRegister(e.into()))?; 1613 1614 let off = offset_of!(user_pt_regs, pstate); 1615 self.fd 1616 .lock() 1617 .unwrap() 1618 .set_one_reg( 1619 arm64_core_reg_id!(KVM_REG_SIZE_U64, off), 1620 &kvm_regs_state.regs.pstate.to_le_bytes(), 1621 ) 1622 .map_err(|e| cpu::HypervisorCpuError::SetAarchCoreRegister(e.into()))?; 1623 1624 let off = offset_of!(kvm_regs, sp_el1); 1625 self.fd 1626 .lock() 1627 .unwrap() 1628 .set_one_reg( 1629 arm64_core_reg_id!(KVM_REG_SIZE_U64, off), 1630 &kvm_regs_state.sp_el1.to_le_bytes(), 1631 ) 1632 .map_err(|e| cpu::HypervisorCpuError::SetAarchCoreRegister(e.into()))?; 1633 1634 let off = offset_of!(kvm_regs, elr_el1); 1635 self.fd 1636 .lock() 1637 .unwrap() 1638 .set_one_reg( 1639 arm64_core_reg_id!(KVM_REG_SIZE_U64, off), 1640 &kvm_regs_state.elr_el1.to_le_bytes(), 1641 ) 1642 .map_err(|e| cpu::HypervisorCpuError::SetAarchCoreRegister(e.into()))?; 1643 1644 let mut off = offset_of!(kvm_regs, spsr); 1645 for i in 0..KVM_NR_SPSR as usize { 1646 self.fd 1647 .lock() 1648 .unwrap() 1649 .set_one_reg( 1650 arm64_core_reg_id!(KVM_REG_SIZE_U64, off), 1651 &kvm_regs_state.spsr[i].to_le_bytes(), 1652 ) 1653 .map_err(|e| cpu::HypervisorCpuError::SetAarchCoreRegister(e.into()))?; 1654 off += std::mem::size_of::<u64>(); 1655 } 1656 1657 let mut off = offset_of!(kvm_regs, fp_regs) + offset_of!(user_fpsimd_state, vregs); 1658 for i in 0..32 { 1659 self.fd 1660 .lock() 1661 .unwrap() 1662 .set_one_reg( 1663 arm64_core_reg_id!(KVM_REG_SIZE_U128, off), 1664 &kvm_regs_state.fp_regs.vregs[i].to_le_bytes(), 1665 ) 1666 .map_err(|e| cpu::HypervisorCpuError::SetAarchCoreRegister(e.into()))?; 1667 off += mem::size_of::<u128>(); 1668 } 1669 1670 let off = offset_of!(kvm_regs, fp_regs) + offset_of!(user_fpsimd_state, fpsr); 1671 self.fd 1672 .lock() 1673 .unwrap() 1674 .set_one_reg( 1675 arm64_core_reg_id!(KVM_REG_SIZE_U32, off), 1676 &kvm_regs_state.fp_regs.fpsr.to_le_bytes(), 1677 ) 1678 .map_err(|e| cpu::HypervisorCpuError::SetAarchCoreRegister(e.into()))?; 1679 1680 let off = offset_of!(kvm_regs, fp_regs) + offset_of!(user_fpsimd_state, fpcr); 1681 self.fd 1682 .lock() 1683 .unwrap() 1684 .set_one_reg( 1685 arm64_core_reg_id!(KVM_REG_SIZE_U32, off), 1686 &kvm_regs_state.fp_regs.fpcr.to_le_bytes(), 1687 ) 1688 .map_err(|e| cpu::HypervisorCpuError::SetAarchCoreRegister(e.into()))?; 1689 Ok(()) 1690 } 1691 1692 #[cfg(target_arch = "riscv64")] 1693 /// 1694 /// Sets the RISC-V vCPU core registers. 1695 /// The `KVM_SET_REGS` ioctl is not available on RISC-V 64-bit, 1696 /// `KVM_SET_ONE_REG` is used to set registers one by one. 1697 /// 1698 fn set_regs(&self, state: &StandardRegisters) -> cpu::Result<()> { 1699 // The function follows the exact identical order from `state`. Look there 1700 // for some additional info on registers. 1701 let kvm_regs_state: kvm_riscv_core = (*state).into(); 1702 1703 /// Macro used to set value of specific RISC-V `$reg_name` stored in 1704 /// `state` to KVM Vcpu. 1705 macro_rules! riscv64_set_one_reg_to_vcpu { 1706 (mode) => { 1707 let off = offset_of!(kvm_riscv_core, mode); 1708 self.fd 1709 .lock() 1710 .unwrap() 1711 .set_one_reg( 1712 riscv64_reg_id!(KVM_REG_RISCV_CORE, off), 1713 &kvm_regs_state.mode.to_le_bytes(), 1714 ) 1715 .map_err(|e| cpu::HypervisorCpuError::SetRiscvCoreRegister(e.into()))?; 1716 }; 1717 ($reg_name:ident) => { 1718 let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, $reg_name); 1719 self.fd 1720 .lock() 1721 .unwrap() 1722 .set_one_reg( 1723 riscv64_reg_id!(KVM_REG_RISCV_CORE, off), 1724 &kvm_regs_state.regs.$reg_name.to_le_bytes(), 1725 ) 1726 .map_err(|e| cpu::HypervisorCpuError::SetRiscvCoreRegister(e.into()))?; 1727 }; 1728 } 1729 1730 riscv64_set_one_reg_to_vcpu!(pc); 1731 riscv64_set_one_reg_to_vcpu!(ra); 1732 riscv64_set_one_reg_to_vcpu!(sp); 1733 riscv64_set_one_reg_to_vcpu!(gp); 1734 riscv64_set_one_reg_to_vcpu!(tp); 1735 riscv64_set_one_reg_to_vcpu!(t0); 1736 riscv64_set_one_reg_to_vcpu!(t1); 1737 riscv64_set_one_reg_to_vcpu!(t2); 1738 riscv64_set_one_reg_to_vcpu!(s0); 1739 riscv64_set_one_reg_to_vcpu!(s1); 1740 riscv64_set_one_reg_to_vcpu!(a0); 1741 riscv64_set_one_reg_to_vcpu!(a1); 1742 riscv64_set_one_reg_to_vcpu!(a2); 1743 riscv64_set_one_reg_to_vcpu!(a3); 1744 riscv64_set_one_reg_to_vcpu!(a4); 1745 riscv64_set_one_reg_to_vcpu!(a5); 1746 riscv64_set_one_reg_to_vcpu!(a6); 1747 riscv64_set_one_reg_to_vcpu!(a7); 1748 riscv64_set_one_reg_to_vcpu!(s2); 1749 riscv64_set_one_reg_to_vcpu!(s3); 1750 riscv64_set_one_reg_to_vcpu!(s4); 1751 riscv64_set_one_reg_to_vcpu!(s5); 1752 riscv64_set_one_reg_to_vcpu!(s6); 1753 riscv64_set_one_reg_to_vcpu!(s7); 1754 riscv64_set_one_reg_to_vcpu!(s8); 1755 riscv64_set_one_reg_to_vcpu!(s9); 1756 riscv64_set_one_reg_to_vcpu!(s10); 1757 riscv64_set_one_reg_to_vcpu!(s11); 1758 riscv64_set_one_reg_to_vcpu!(t3); 1759 riscv64_set_one_reg_to_vcpu!(t4); 1760 riscv64_set_one_reg_to_vcpu!(t5); 1761 riscv64_set_one_reg_to_vcpu!(t6); 1762 riscv64_set_one_reg_to_vcpu!(mode); 1763 1764 Ok(()) 1765 } 1766 1767 #[cfg(target_arch = "x86_64")] 1768 /// 1769 /// Returns the vCPU special registers. 1770 /// 1771 fn get_sregs(&self) -> cpu::Result<SpecialRegisters> { 1772 Ok(self 1773 .fd 1774 .lock() 1775 .unwrap() 1776 .get_sregs() 1777 .map_err(|e| cpu::HypervisorCpuError::GetSpecialRegs(e.into()))? 1778 .into()) 1779 } 1780 1781 #[cfg(target_arch = "x86_64")] 1782 /// 1783 /// Sets the vCPU special registers using the `KVM_SET_SREGS` ioctl. 1784 /// 1785 fn set_sregs(&self, sregs: &SpecialRegisters) -> cpu::Result<()> { 1786 let sregs = (*sregs).into(); 1787 self.fd 1788 .lock() 1789 .unwrap() 1790 .set_sregs(&sregs) 1791 .map_err(|e| cpu::HypervisorCpuError::SetSpecialRegs(e.into())) 1792 } 1793 1794 #[cfg(target_arch = "x86_64")] 1795 /// 1796 /// Returns the floating point state (FPU) from the vCPU. 1797 /// 1798 fn get_fpu(&self) -> cpu::Result<FpuState> { 1799 Ok(self 1800 .fd 1801 .lock() 1802 .unwrap() 1803 .get_fpu() 1804 .map_err(|e| cpu::HypervisorCpuError::GetFloatingPointRegs(e.into()))? 1805 .into()) 1806 } 1807 1808 #[cfg(target_arch = "x86_64")] 1809 /// 1810 /// Set the floating point state (FPU) of a vCPU using the `KVM_SET_FPU` ioctl. 1811 /// 1812 fn set_fpu(&self, fpu: &FpuState) -> cpu::Result<()> { 1813 let fpu: kvm_bindings::kvm_fpu = (*fpu).clone().into(); 1814 self.fd 1815 .lock() 1816 .unwrap() 1817 .set_fpu(&fpu) 1818 .map_err(|e| cpu::HypervisorCpuError::SetFloatingPointRegs(e.into())) 1819 } 1820 1821 #[cfg(target_arch = "x86_64")] 1822 /// 1823 /// X86 specific call to setup the CPUID registers. 1824 /// 1825 fn set_cpuid2(&self, cpuid: &[CpuIdEntry]) -> cpu::Result<()> { 1826 let cpuid: Vec<kvm_bindings::kvm_cpuid_entry2> = 1827 cpuid.iter().map(|e| (*e).into()).collect(); 1828 let kvm_cpuid = <CpuId>::from_entries(&cpuid) 1829 .map_err(|_| cpu::HypervisorCpuError::SetCpuid(anyhow!("failed to create CpuId")))?; 1830 1831 self.fd 1832 .lock() 1833 .unwrap() 1834 .set_cpuid2(&kvm_cpuid) 1835 .map_err(|e| cpu::HypervisorCpuError::SetCpuid(e.into())) 1836 } 1837 1838 #[cfg(target_arch = "x86_64")] 1839 /// 1840 /// X86 specific call to enable HyperV SynIC 1841 /// 1842 fn enable_hyperv_synic(&self) -> cpu::Result<()> { 1843 // Update the information about Hyper-V SynIC being enabled and 1844 // emulated as it will influence later which MSRs should be saved. 1845 self.hyperv_synic.store(true, Ordering::Release); 1846 1847 let cap = kvm_enable_cap { 1848 cap: KVM_CAP_HYPERV_SYNIC, 1849 ..Default::default() 1850 }; 1851 self.fd 1852 .lock() 1853 .unwrap() 1854 .enable_cap(&cap) 1855 .map_err(|e| cpu::HypervisorCpuError::EnableHyperVSyncIc(e.into())) 1856 } 1857 1858 /// 1859 /// X86 specific call to retrieve the CPUID registers. 1860 /// 1861 #[cfg(target_arch = "x86_64")] 1862 fn get_cpuid2(&self, num_entries: usize) -> cpu::Result<Vec<CpuIdEntry>> { 1863 let kvm_cpuid = self 1864 .fd 1865 .lock() 1866 .unwrap() 1867 .get_cpuid2(num_entries) 1868 .map_err(|e| cpu::HypervisorCpuError::GetCpuid(e.into()))?; 1869 1870 let v = kvm_cpuid.as_slice().iter().map(|e| (*e).into()).collect(); 1871 1872 Ok(v) 1873 } 1874 1875 #[cfg(target_arch = "x86_64")] 1876 /// 1877 /// Returns the state of the LAPIC (Local Advanced Programmable Interrupt Controller). 1878 /// 1879 fn get_lapic(&self) -> cpu::Result<LapicState> { 1880 Ok(self 1881 .fd 1882 .lock() 1883 .unwrap() 1884 .get_lapic() 1885 .map_err(|e| cpu::HypervisorCpuError::GetlapicState(e.into()))? 1886 .into()) 1887 } 1888 1889 #[cfg(target_arch = "x86_64")] 1890 /// 1891 /// Sets the state of the LAPIC (Local Advanced Programmable Interrupt Controller). 1892 /// 1893 fn set_lapic(&self, klapic: &LapicState) -> cpu::Result<()> { 1894 let klapic: kvm_bindings::kvm_lapic_state = (*klapic).clone().into(); 1895 self.fd 1896 .lock() 1897 .unwrap() 1898 .set_lapic(&klapic) 1899 .map_err(|e| cpu::HypervisorCpuError::SetLapicState(e.into())) 1900 } 1901 1902 #[cfg(target_arch = "x86_64")] 1903 /// 1904 /// Returns the model-specific registers (MSR) for this vCPU. 1905 /// 1906 fn get_msrs(&self, msrs: &mut Vec<MsrEntry>) -> cpu::Result<usize> { 1907 let kvm_msrs: Vec<kvm_msr_entry> = msrs.iter().map(|e| (*e).into()).collect(); 1908 let mut kvm_msrs = MsrEntries::from_entries(&kvm_msrs).unwrap(); 1909 let succ = self 1910 .fd 1911 .lock() 1912 .unwrap() 1913 .get_msrs(&mut kvm_msrs) 1914 .map_err(|e| cpu::HypervisorCpuError::GetMsrEntries(e.into()))?; 1915 1916 msrs[..succ].copy_from_slice( 1917 &kvm_msrs.as_slice()[..succ] 1918 .iter() 1919 .map(|e| (*e).into()) 1920 .collect::<Vec<MsrEntry>>(), 1921 ); 1922 1923 Ok(succ) 1924 } 1925 1926 #[cfg(target_arch = "x86_64")] 1927 /// 1928 /// Setup the model-specific registers (MSR) for this vCPU. 1929 /// Returns the number of MSR entries actually written. 1930 /// 1931 fn set_msrs(&self, msrs: &[MsrEntry]) -> cpu::Result<usize> { 1932 let kvm_msrs: Vec<kvm_msr_entry> = msrs.iter().map(|e| (*e).into()).collect(); 1933 let kvm_msrs = MsrEntries::from_entries(&kvm_msrs).unwrap(); 1934 self.fd 1935 .lock() 1936 .unwrap() 1937 .set_msrs(&kvm_msrs) 1938 .map_err(|e| cpu::HypervisorCpuError::SetMsrEntries(e.into())) 1939 } 1940 1941 /// 1942 /// Returns the vcpu's current "multiprocessing state". 1943 /// 1944 fn get_mp_state(&self) -> cpu::Result<MpState> { 1945 Ok(self 1946 .fd 1947 .lock() 1948 .unwrap() 1949 .get_mp_state() 1950 .map_err(|e| cpu::HypervisorCpuError::GetMpState(e.into()))? 1951 .into()) 1952 } 1953 1954 /// 1955 /// Sets the vcpu's current "multiprocessing state". 1956 /// 1957 fn set_mp_state(&self, mp_state: MpState) -> cpu::Result<()> { 1958 self.fd 1959 .lock() 1960 .unwrap() 1961 .set_mp_state(mp_state.into()) 1962 .map_err(|e| cpu::HypervisorCpuError::SetMpState(e.into())) 1963 } 1964 1965 #[cfg(target_arch = "x86_64")] 1966 /// 1967 /// Translates guest virtual address to guest physical address using the `KVM_TRANSLATE` ioctl. 1968 /// 1969 fn translate_gva(&self, gva: u64, _flags: u64) -> cpu::Result<(u64, u32)> { 1970 let tr = self 1971 .fd 1972 .lock() 1973 .unwrap() 1974 .translate_gva(gva) 1975 .map_err(|e| cpu::HypervisorCpuError::TranslateVirtualAddress(e.into()))?; 1976 // tr.valid is set if the GVA is mapped to valid GPA. 1977 match tr.valid { 1978 0 => Err(cpu::HypervisorCpuError::TranslateVirtualAddress(anyhow!( 1979 "Invalid GVA: {:#x}", 1980 gva 1981 ))), 1982 _ => Ok((tr.physical_address, 0)), 1983 } 1984 } 1985 1986 /// 1987 /// Triggers the running of the current virtual CPU returning an exit reason. 1988 /// 1989 fn run(&self) -> std::result::Result<cpu::VmExit, cpu::HypervisorCpuError> { 1990 match self.fd.lock().unwrap().run() { 1991 Ok(run) => match run { 1992 #[cfg(target_arch = "x86_64")] 1993 VcpuExit::IoIn(addr, data) => { 1994 if let Some(vm_ops) = &self.vm_ops { 1995 return vm_ops 1996 .pio_read(addr.into(), data) 1997 .map(|_| cpu::VmExit::Ignore) 1998 .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into())); 1999 } 2000 2001 Ok(cpu::VmExit::Ignore) 2002 } 2003 #[cfg(target_arch = "x86_64")] 2004 VcpuExit::IoOut(addr, data) => { 2005 if let Some(vm_ops) = &self.vm_ops { 2006 return vm_ops 2007 .pio_write(addr.into(), data) 2008 .map(|_| cpu::VmExit::Ignore) 2009 .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into())); 2010 } 2011 2012 Ok(cpu::VmExit::Ignore) 2013 } 2014 #[cfg(target_arch = "x86_64")] 2015 VcpuExit::IoapicEoi(vector) => Ok(cpu::VmExit::IoapicEoi(vector)), 2016 #[cfg(target_arch = "x86_64")] 2017 VcpuExit::Shutdown | VcpuExit::Hlt => Ok(cpu::VmExit::Reset), 2018 2019 #[cfg(target_arch = "aarch64")] 2020 VcpuExit::SystemEvent(event_type, flags) => { 2021 use kvm_bindings::{KVM_SYSTEM_EVENT_RESET, KVM_SYSTEM_EVENT_SHUTDOWN}; 2022 // On Aarch64, when the VM is shutdown, run() returns 2023 // VcpuExit::SystemEvent with reason KVM_SYSTEM_EVENT_SHUTDOWN 2024 if event_type == KVM_SYSTEM_EVENT_RESET { 2025 Ok(cpu::VmExit::Reset) 2026 } else if event_type == KVM_SYSTEM_EVENT_SHUTDOWN { 2027 Ok(cpu::VmExit::Shutdown) 2028 } else { 2029 Err(cpu::HypervisorCpuError::RunVcpu(anyhow!( 2030 "Unexpected system event with type 0x{:x}, flags 0x{:x?}", 2031 event_type, 2032 flags 2033 ))) 2034 } 2035 } 2036 2037 VcpuExit::MmioRead(addr, data) => { 2038 if let Some(vm_ops) = &self.vm_ops { 2039 return vm_ops 2040 .mmio_read(addr, data) 2041 .map(|_| cpu::VmExit::Ignore) 2042 .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into())); 2043 } 2044 2045 Ok(cpu::VmExit::Ignore) 2046 } 2047 VcpuExit::MmioWrite(addr, data) => { 2048 if let Some(vm_ops) = &self.vm_ops { 2049 return vm_ops 2050 .mmio_write(addr, data) 2051 .map(|_| cpu::VmExit::Ignore) 2052 .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into())); 2053 } 2054 2055 Ok(cpu::VmExit::Ignore) 2056 } 2057 VcpuExit::Hyperv => Ok(cpu::VmExit::Hyperv), 2058 #[cfg(feature = "tdx")] 2059 VcpuExit::Unsupported(KVM_EXIT_TDX) => Ok(cpu::VmExit::Tdx), 2060 VcpuExit::Debug(_) => Ok(cpu::VmExit::Debug), 2061 2062 r => Err(cpu::HypervisorCpuError::RunVcpu(anyhow!( 2063 "Unexpected exit reason on vcpu run: {:?}", 2064 r 2065 ))), 2066 }, 2067 2068 Err(ref e) => match e.errno() { 2069 libc::EAGAIN | libc::EINTR => Ok(cpu::VmExit::Ignore), 2070 _ => Err(cpu::HypervisorCpuError::RunVcpu(anyhow!( 2071 "VCPU error {:?}", 2072 e 2073 ))), 2074 }, 2075 } 2076 } 2077 2078 #[cfg(target_arch = "x86_64")] 2079 /// 2080 /// Let the guest know that it has been paused, which prevents from 2081 /// potential soft lockups when being resumed. 2082 /// 2083 fn notify_guest_clock_paused(&self) -> cpu::Result<()> { 2084 if let Err(e) = self.fd.lock().unwrap().kvmclock_ctrl() { 2085 // Linux kernel returns -EINVAL if the PV clock isn't yet initialised 2086 // which could be because we're still in firmware or the guest doesn't 2087 // use KVM clock. 2088 if e.errno() != libc::EINVAL { 2089 return Err(cpu::HypervisorCpuError::NotifyGuestClockPaused(e.into())); 2090 } 2091 } 2092 2093 Ok(()) 2094 } 2095 2096 #[cfg(not(target_arch = "riscv64"))] 2097 /// 2098 /// Sets debug registers to set hardware breakpoints and/or enable single step. 2099 /// 2100 fn set_guest_debug( 2101 &self, 2102 addrs: &[vm_memory::GuestAddress], 2103 singlestep: bool, 2104 ) -> cpu::Result<()> { 2105 let mut dbg = kvm_guest_debug { 2106 #[cfg(target_arch = "x86_64")] 2107 control: KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP, 2108 #[cfg(target_arch = "aarch64")] 2109 control: KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW, 2110 ..Default::default() 2111 }; 2112 if singlestep { 2113 dbg.control |= KVM_GUESTDBG_SINGLESTEP; 2114 } 2115 2116 // Set the debug registers. 2117 // Here we assume that the number of addresses do not exceed what 2118 // `Hypervisor::get_guest_debug_hw_bps()` specifies. 2119 #[cfg(target_arch = "x86_64")] 2120 { 2121 // Set bits 9 and 10. 2122 // bit 9: GE (global exact breakpoint enable) flag. 2123 // bit 10: always 1. 2124 dbg.arch.debugreg[7] = 0x0600; 2125 2126 for (i, addr) in addrs.iter().enumerate() { 2127 dbg.arch.debugreg[i] = addr.0; 2128 // Set global breakpoint enable flag 2129 dbg.arch.debugreg[7] |= 2 << (i * 2); 2130 } 2131 } 2132 #[cfg(target_arch = "aarch64")] 2133 { 2134 for (i, addr) in addrs.iter().enumerate() { 2135 // DBGBCR_EL1 (Debug Breakpoint Control Registers, D13.3.2): 2136 // bit 0: 1 (Enabled) 2137 // bit 1~2: 0b11 (PMC = EL1/EL0) 2138 // bit 5~8: 0b1111 (BAS = AArch64) 2139 // others: 0 2140 dbg.arch.dbg_bcr[i] = 0b1u64 | 0b110u64 | 0b1_1110_0000u64; 2141 // DBGBVR_EL1 (Debug Breakpoint Value Registers, D13.3.3): 2142 // bit 2~52: VA[2:52] 2143 dbg.arch.dbg_bvr[i] = (!0u64 >> 11) & addr.0; 2144 } 2145 } 2146 self.fd 2147 .lock() 2148 .unwrap() 2149 .set_guest_debug(&dbg) 2150 .map_err(|e| cpu::HypervisorCpuError::SetDebugRegs(e.into())) 2151 } 2152 2153 #[cfg(target_arch = "aarch64")] 2154 fn vcpu_get_finalized_features(&self) -> i32 { 2155 kvm_bindings::KVM_ARM_VCPU_SVE as i32 2156 } 2157 2158 #[cfg(target_arch = "aarch64")] 2159 fn vcpu_set_processor_features( 2160 &self, 2161 vm: &Arc<dyn crate::Vm>, 2162 kvi: &mut crate::VcpuInit, 2163 id: u8, 2164 ) -> cpu::Result<()> { 2165 use std::arch::is_aarch64_feature_detected; 2166 #[allow(clippy::nonminimal_bool)] 2167 let sve_supported = 2168 is_aarch64_feature_detected!("sve") || is_aarch64_feature_detected!("sve2"); 2169 2170 let mut kvm_kvi: kvm_bindings::kvm_vcpu_init = (*kvi).into(); 2171 2172 // We already checked that the capability is supported. 2173 kvm_kvi.features[0] |= 1 << kvm_bindings::KVM_ARM_VCPU_PSCI_0_2; 2174 if vm 2175 .as_any() 2176 .downcast_ref::<crate::kvm::KvmVm>() 2177 .unwrap() 2178 .check_extension(Cap::ArmPmuV3) 2179 { 2180 kvm_kvi.features[0] |= 1 << kvm_bindings::KVM_ARM_VCPU_PMU_V3; 2181 } 2182 2183 if sve_supported 2184 && vm 2185 .as_any() 2186 .downcast_ref::<crate::kvm::KvmVm>() 2187 .unwrap() 2188 .check_extension(Cap::ArmSve) 2189 { 2190 kvm_kvi.features[0] |= 1 << kvm_bindings::KVM_ARM_VCPU_SVE; 2191 } 2192 2193 // Non-boot cpus are powered off initially. 2194 if id > 0 { 2195 kvm_kvi.features[0] |= 1 << kvm_bindings::KVM_ARM_VCPU_POWER_OFF; 2196 } 2197 2198 *kvi = kvm_kvi.into(); 2199 2200 Ok(()) 2201 } 2202 2203 /// 2204 /// Return VcpuInit with default value set 2205 /// 2206 #[cfg(target_arch = "aarch64")] 2207 fn create_vcpu_init(&self) -> crate::VcpuInit { 2208 kvm_bindings::kvm_vcpu_init::default().into() 2209 } 2210 2211 #[cfg(target_arch = "aarch64")] 2212 fn vcpu_init(&self, kvi: &crate::VcpuInit) -> cpu::Result<()> { 2213 let kvm_kvi: kvm_bindings::kvm_vcpu_init = (*kvi).into(); 2214 self.fd 2215 .lock() 2216 .unwrap() 2217 .vcpu_init(&kvm_kvi) 2218 .map_err(|e| cpu::HypervisorCpuError::VcpuInit(e.into())) 2219 } 2220 2221 #[cfg(target_arch = "aarch64")] 2222 fn vcpu_finalize(&self, feature: i32) -> cpu::Result<()> { 2223 self.fd 2224 .lock() 2225 .unwrap() 2226 .vcpu_finalize(&feature) 2227 .map_err(|e| cpu::HypervisorCpuError::VcpuFinalize(e.into())) 2228 } 2229 2230 #[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))] 2231 /// 2232 /// Gets a list of the guest registers that are supported for the 2233 /// KVM_GET_ONE_REG/KVM_SET_ONE_REG calls. 2234 /// 2235 fn get_reg_list(&self, reg_list: &mut RegList) -> cpu::Result<()> { 2236 let mut kvm_reg_list: kvm_bindings::RegList = reg_list.clone().into(); 2237 self.fd 2238 .lock() 2239 .unwrap() 2240 .get_reg_list(&mut kvm_reg_list) 2241 .map_err(|e: kvm_ioctls::Error| cpu::HypervisorCpuError::GetRegList(e.into()))?; 2242 *reg_list = kvm_reg_list.into(); 2243 Ok(()) 2244 } 2245 2246 /// 2247 /// Gets the value of a system register 2248 /// 2249 #[cfg(target_arch = "aarch64")] 2250 fn get_sys_reg(&self, sys_reg: u32) -> cpu::Result<u64> { 2251 // 2252 // Arm Architecture Reference Manual defines the encoding of 2253 // AArch64 system registers, see 2254 // https://developer.arm.com/documentation/ddi0487 (chapter D12). 2255 // While KVM defines another ID for each AArch64 system register, 2256 // which is used in calling `KVM_G/SET_ONE_REG` to access a system 2257 // register of a guest. 2258 // A mapping exists between the Arm standard encoding and the KVM ID. 2259 // This function takes the standard u32 ID as input parameter, converts 2260 // it to the corresponding KVM ID, and call `KVM_GET_ONE_REG` API to 2261 // get the value of the system parameter. 2262 // 2263 let id: u64 = KVM_REG_ARM64 2264 | KVM_REG_SIZE_U64 2265 | KVM_REG_ARM64_SYSREG as u64 2266 | ((((sys_reg) >> 5) 2267 & (KVM_REG_ARM64_SYSREG_OP0_MASK 2268 | KVM_REG_ARM64_SYSREG_OP1_MASK 2269 | KVM_REG_ARM64_SYSREG_CRN_MASK 2270 | KVM_REG_ARM64_SYSREG_CRM_MASK 2271 | KVM_REG_ARM64_SYSREG_OP2_MASK)) as u64); 2272 let mut bytes = [0_u8; 8]; 2273 self.fd 2274 .lock() 2275 .unwrap() 2276 .get_one_reg(id, &mut bytes) 2277 .map_err(|e| cpu::HypervisorCpuError::GetSysRegister(e.into()))?; 2278 Ok(u64::from_le_bytes(bytes)) 2279 } 2280 2281 /// 2282 /// Gets the value of a non-core register 2283 /// 2284 #[cfg(target_arch = "riscv64")] 2285 fn get_non_core_reg(&self, _non_core_reg: u32) -> cpu::Result<u64> { 2286 unimplemented!() 2287 } 2288 2289 /// 2290 /// Configure core registers for a given CPU. 2291 /// 2292 #[cfg(target_arch = "aarch64")] 2293 fn setup_regs(&self, cpu_id: u8, boot_ip: u64, fdt_start: u64) -> cpu::Result<()> { 2294 let kreg_off = offset_of!(kvm_regs, regs); 2295 2296 // Get the register index of the PSTATE (Processor State) register. 2297 let pstate = offset_of!(user_pt_regs, pstate) + kreg_off; 2298 self.fd 2299 .lock() 2300 .unwrap() 2301 .set_one_reg( 2302 arm64_core_reg_id!(KVM_REG_SIZE_U64, pstate), 2303 ®s::PSTATE_FAULT_BITS_64.to_le_bytes(), 2304 ) 2305 .map_err(|e| cpu::HypervisorCpuError::SetAarchCoreRegister(e.into()))?; 2306 2307 // Other vCPUs are powered off initially awaiting PSCI wakeup. 2308 if cpu_id == 0 { 2309 // Setting the PC (Processor Counter) to the current program address (kernel address). 2310 let pc = offset_of!(user_pt_regs, pc) + kreg_off; 2311 self.fd 2312 .lock() 2313 .unwrap() 2314 .set_one_reg( 2315 arm64_core_reg_id!(KVM_REG_SIZE_U64, pc), 2316 &boot_ip.to_le_bytes(), 2317 ) 2318 .map_err(|e| cpu::HypervisorCpuError::SetAarchCoreRegister(e.into()))?; 2319 2320 // Last mandatory thing to set -> the address pointing to the FDT (also called DTB). 2321 // "The device tree blob (dtb) must be placed on an 8-byte boundary and must 2322 // not exceed 2 megabytes in size." -> https://www.kernel.org/doc/Documentation/arm64/booting.txt. 2323 // We are choosing to place it the end of DRAM. See `get_fdt_addr`. 2324 let regs0 = offset_of!(user_pt_regs, regs) + kreg_off; 2325 self.fd 2326 .lock() 2327 .unwrap() 2328 .set_one_reg( 2329 arm64_core_reg_id!(KVM_REG_SIZE_U64, regs0), 2330 &fdt_start.to_le_bytes(), 2331 ) 2332 .map_err(|e| cpu::HypervisorCpuError::SetAarchCoreRegister(e.into()))?; 2333 } 2334 Ok(()) 2335 } 2336 2337 #[cfg(target_arch = "riscv64")] 2338 /// 2339 /// Configure registers for a given RISC-V CPU. 2340 /// 2341 fn setup_regs(&self, cpu_id: u8, boot_ip: u64, fdt_start: u64) -> cpu::Result<()> { 2342 // Setting the A0 () to the hartid of this CPU. 2343 let a0 = offset_of!(kvm_riscv_core, regs, user_regs_struct, a0); 2344 self.fd 2345 .lock() 2346 .unwrap() 2347 .set_one_reg( 2348 riscv64_reg_id!(KVM_REG_RISCV_CORE, a0), 2349 &u64::from(cpu_id).to_le_bytes(), 2350 ) 2351 .map_err(|e| cpu::HypervisorCpuError::SetRiscvCoreRegister(e.into()))?; 2352 2353 // Setting the PC (Processor Counter) to the current program address (kernel address). 2354 let pc = offset_of!(kvm_riscv_core, regs, user_regs_struct, pc); 2355 self.fd 2356 .lock() 2357 .unwrap() 2358 .set_one_reg( 2359 riscv64_reg_id!(KVM_REG_RISCV_CORE, pc), 2360 &boot_ip.to_le_bytes(), 2361 ) 2362 .map_err(|e| cpu::HypervisorCpuError::SetRiscvCoreRegister(e.into()))?; 2363 2364 // Last mandatory thing to set -> the address pointing to the FDT (also called DTB). 2365 // "The device tree blob (dtb) must be placed on an 8-byte boundary and must 2366 // not exceed 64 kilobytes in size." -> https://www.kernel.org/doc/Documentation/arch/riscv/boot.txt. 2367 let a1 = offset_of!(kvm_riscv_core, regs, user_regs_struct, a1); 2368 self.fd 2369 .lock() 2370 .unwrap() 2371 .set_one_reg( 2372 riscv64_reg_id!(KVM_REG_RISCV_CORE, a1), 2373 &fdt_start.to_le_bytes(), 2374 ) 2375 .map_err(|e| cpu::HypervisorCpuError::SetRiscvCoreRegister(e.into()))?; 2376 2377 Ok(()) 2378 } 2379 2380 #[cfg(target_arch = "x86_64")] 2381 /// 2382 /// Get the current CPU state 2383 /// 2384 /// Ordering requirements: 2385 /// 2386 /// KVM_GET_MP_STATE calls kvm_apic_accept_events(), which might modify 2387 /// vCPU/LAPIC state. As such, it must be done before most everything 2388 /// else, otherwise we cannot restore everything and expect it to work. 2389 /// 2390 /// KVM_GET_VCPU_EVENTS/KVM_SET_VCPU_EVENTS is unsafe if other vCPUs are 2391 /// still running. 2392 /// 2393 /// KVM_GET_LAPIC may change state of LAPIC before returning it. 2394 /// 2395 /// GET_VCPU_EVENTS should probably be last to save. The code looks as 2396 /// it might as well be affected by internal state modifications of the 2397 /// GET ioctls. 2398 /// 2399 /// SREGS saves/restores a pending interrupt, similar to what 2400 /// VCPU_EVENTS also does. 2401 /// 2402 /// GET_MSRS requires a prepopulated data structure to do something 2403 /// meaningful. For SET_MSRS it will then contain good data. 2404 /// 2405 /// # Example 2406 /// 2407 /// ```rust 2408 /// # use hypervisor::kvm::KvmHypervisor; 2409 /// # use std::sync::Arc; 2410 /// let kvm = KvmHypervisor::new().unwrap(); 2411 /// let hv = Arc::new(kvm); 2412 /// let vm = hv.create_vm().expect("new VM fd creation failed"); 2413 /// vm.enable_split_irq().unwrap(); 2414 /// let vcpu = vm.create_vcpu(0, None).unwrap(); 2415 /// let state = vcpu.state().unwrap(); 2416 /// ``` 2417 fn state(&self) -> cpu::Result<CpuState> { 2418 let cpuid = self.get_cpuid2(kvm_bindings::KVM_MAX_CPUID_ENTRIES)?; 2419 let mp_state = self.get_mp_state()?.into(); 2420 let regs = self.get_regs()?; 2421 let sregs = self.get_sregs()?; 2422 let xsave = self.get_xsave()?; 2423 let xcrs = self.get_xcrs()?; 2424 let lapic_state = self.get_lapic()?; 2425 let fpu = self.get_fpu()?; 2426 2427 // Try to get all MSRs based on the list previously retrieved from KVM. 2428 // If the number of MSRs obtained from GET_MSRS is different from the 2429 // expected amount, we fallback onto a slower method by getting MSRs 2430 // by chunks. This is the only way to make sure we try to get as many 2431 // MSRs as possible, even if some MSRs are not supported. 2432 let mut msr_entries = self.msrs.clone(); 2433 2434 // Save extra MSRs if the Hyper-V synthetic interrupt controller is 2435 // emulated. 2436 if self.hyperv_synic.load(Ordering::Acquire) { 2437 let hyperv_synic_msrs = vec![ 2438 0x40000020, 0x40000021, 0x40000080, 0x40000081, 0x40000082, 0x40000083, 0x40000084, 2439 0x40000090, 0x40000091, 0x40000092, 0x40000093, 0x40000094, 0x40000095, 0x40000096, 2440 0x40000097, 0x40000098, 0x40000099, 0x4000009a, 0x4000009b, 0x4000009c, 0x4000009d, 2441 0x4000009e, 0x4000009f, 0x400000b0, 0x400000b1, 0x400000b2, 0x400000b3, 0x400000b4, 2442 0x400000b5, 0x400000b6, 0x400000b7, 2443 ]; 2444 for index in hyperv_synic_msrs { 2445 let msr = kvm_msr_entry { 2446 index, 2447 ..Default::default() 2448 }; 2449 msr_entries.push(msr.into()); 2450 } 2451 } 2452 2453 let expected_num_msrs = msr_entries.len(); 2454 let num_msrs = self.get_msrs(&mut msr_entries)?; 2455 let msrs = if num_msrs != expected_num_msrs { 2456 let mut faulty_msr_index = num_msrs; 2457 let mut msr_entries_tmp = msr_entries[..faulty_msr_index].to_vec(); 2458 2459 loop { 2460 warn!( 2461 "Detected faulty MSR 0x{:x} while getting MSRs", 2462 msr_entries[faulty_msr_index].index 2463 ); 2464 2465 // Skip the first bad MSR 2466 let start_pos = faulty_msr_index + 1; 2467 2468 let mut sub_msr_entries = msr_entries[start_pos..].to_vec(); 2469 let num_msrs = self.get_msrs(&mut sub_msr_entries)?; 2470 2471 msr_entries_tmp.extend(&sub_msr_entries[..num_msrs]); 2472 2473 if num_msrs == sub_msr_entries.len() { 2474 break; 2475 } 2476 2477 faulty_msr_index = start_pos + num_msrs; 2478 } 2479 2480 msr_entries_tmp 2481 } else { 2482 msr_entries 2483 }; 2484 2485 let vcpu_events = self.get_vcpu_events()?; 2486 let tsc_khz = self.tsc_khz()?; 2487 2488 Ok(VcpuKvmState { 2489 cpuid, 2490 msrs, 2491 vcpu_events, 2492 regs: regs.into(), 2493 sregs: sregs.into(), 2494 fpu, 2495 lapic_state, 2496 xsave, 2497 xcrs, 2498 mp_state, 2499 tsc_khz, 2500 } 2501 .into()) 2502 } 2503 2504 /// 2505 /// Get the current AArch64 CPU state 2506 /// 2507 #[cfg(target_arch = "aarch64")] 2508 fn state(&self) -> cpu::Result<CpuState> { 2509 let mut state = VcpuKvmState { 2510 mp_state: self.get_mp_state()?.into(), 2511 ..Default::default() 2512 }; 2513 // Get core registers 2514 state.core_regs = self.get_regs()?.into(); 2515 2516 // Get systerm register 2517 // Call KVM_GET_REG_LIST to get all registers available to the guest. 2518 // For ArmV8 there are around 500 registers. 2519 let mut sys_regs: Vec<kvm_bindings::kvm_one_reg> = Vec::new(); 2520 let mut reg_list = kvm_bindings::RegList::new(500).unwrap(); 2521 self.fd 2522 .lock() 2523 .unwrap() 2524 .get_reg_list(&mut reg_list) 2525 .map_err(|e| cpu::HypervisorCpuError::GetRegList(e.into()))?; 2526 2527 // At this point reg_list should contain: core registers and system 2528 // registers. 2529 // The register list contains the number of registers and their ids. We 2530 // will be needing to call KVM_GET_ONE_REG on each id in order to save 2531 // all of them. We carve out from the list the core registers which are 2532 // represented in the kernel by kvm_regs structure and for which we can 2533 // calculate the id based on the offset in the structure. 2534 reg_list.retain(|regid| is_system_register(*regid)); 2535 2536 // Now, for the rest of the registers left in the previously fetched 2537 // register list, we are simply calling KVM_GET_ONE_REG. 2538 let indices = reg_list.as_slice(); 2539 for index in indices.iter() { 2540 let mut bytes = [0_u8; 8]; 2541 self.fd 2542 .lock() 2543 .unwrap() 2544 .get_one_reg(*index, &mut bytes) 2545 .map_err(|e| cpu::HypervisorCpuError::GetSysRegister(e.into()))?; 2546 sys_regs.push(kvm_bindings::kvm_one_reg { 2547 id: *index, 2548 addr: u64::from_le_bytes(bytes), 2549 }); 2550 } 2551 2552 state.sys_regs = sys_regs; 2553 2554 Ok(state.into()) 2555 } 2556 2557 #[cfg(target_arch = "riscv64")] 2558 /// 2559 /// Get the current RISC-V 64-bit CPU state 2560 /// 2561 fn state(&self) -> cpu::Result<CpuState> { 2562 let mut state = VcpuKvmState { 2563 mp_state: self.get_mp_state()?.into(), 2564 ..Default::default() 2565 }; 2566 // Get core registers 2567 state.core_regs = self.get_regs()?.into(); 2568 2569 // Get non-core register 2570 // Call KVM_GET_REG_LIST to get all registers available to the guest. 2571 // For RISC-V 64-bit there are around 200 registers. 2572 let mut sys_regs: Vec<kvm_bindings::kvm_one_reg> = Vec::new(); 2573 let mut reg_list = kvm_bindings::RegList::new(200).unwrap(); 2574 self.fd 2575 .lock() 2576 .unwrap() 2577 .get_reg_list(&mut reg_list) 2578 .map_err(|e| cpu::HypervisorCpuError::GetRegList(e.into()))?; 2579 2580 // At this point reg_list should contain: 2581 // - core registers 2582 // - config registers 2583 // - timer registers 2584 // - control and status registers 2585 // - AIA control and status registers 2586 // - smstateen control and status registers 2587 // - sbi_sta control and status registers. 2588 // 2589 // The register list contains the number of registers and their ids. We 2590 // will be needing to call KVM_GET_ONE_REG on each id in order to save 2591 // all of them. We carve out from the list the core registers which are 2592 // represented in the kernel by `kvm_riscv_core` structure and for which 2593 // we can calculate the id based on the offset in the structure. 2594 reg_list.retain(|regid| is_non_core_register(*regid)); 2595 2596 // Now, for the rest of the registers left in the previously fetched 2597 // register list, we are simply calling KVM_GET_ONE_REG. 2598 let indices = reg_list.as_slice(); 2599 for index in indices.iter() { 2600 let mut bytes = [0_u8; 8]; 2601 self.fd 2602 .lock() 2603 .unwrap() 2604 .get_one_reg(*index, &mut bytes) 2605 .map_err(|e| cpu::HypervisorCpuError::GetSysRegister(e.into()))?; 2606 sys_regs.push(kvm_bindings::kvm_one_reg { 2607 id: *index, 2608 addr: u64::from_le_bytes(bytes), 2609 }); 2610 } 2611 2612 state.non_core_regs = sys_regs; 2613 2614 Ok(state.into()) 2615 } 2616 2617 #[cfg(target_arch = "x86_64")] 2618 /// 2619 /// Restore the previously saved CPU state 2620 /// 2621 /// Ordering requirements: 2622 /// 2623 /// KVM_GET_VCPU_EVENTS/KVM_SET_VCPU_EVENTS is unsafe if other vCPUs are 2624 /// still running. 2625 /// 2626 /// Some SET ioctls (like set_mp_state) depend on kvm_vcpu_is_bsp(), so 2627 /// if we ever change the BSP, we have to do that before restoring anything. 2628 /// The same seems to be true for CPUID stuff. 2629 /// 2630 /// SREGS saves/restores a pending interrupt, similar to what 2631 /// VCPU_EVENTS also does. 2632 /// 2633 /// SET_REGS clears pending exceptions unconditionally, thus, it must be 2634 /// done before SET_VCPU_EVENTS, which restores it. 2635 /// 2636 /// SET_LAPIC must come after SET_SREGS, because the latter restores 2637 /// the apic base msr. 2638 /// 2639 /// SET_LAPIC must come before SET_MSRS, because the TSC deadline MSR 2640 /// only restores successfully, when the LAPIC is correctly configured. 2641 /// 2642 /// Arguments: CpuState 2643 /// # Example 2644 /// 2645 /// ```rust 2646 /// # use hypervisor::kvm::KvmHypervisor; 2647 /// # use std::sync::Arc; 2648 /// let kvm = KvmHypervisor::new().unwrap(); 2649 /// let hv = Arc::new(kvm); 2650 /// let vm = hv.create_vm().expect("new VM fd creation failed"); 2651 /// vm.enable_split_irq().unwrap(); 2652 /// let vcpu = vm.create_vcpu(0, None).unwrap(); 2653 /// let state = vcpu.state().unwrap(); 2654 /// vcpu.set_state(&state).unwrap(); 2655 /// ``` 2656 fn set_state(&self, state: &CpuState) -> cpu::Result<()> { 2657 let state: VcpuKvmState = state.clone().into(); 2658 self.set_cpuid2(&state.cpuid)?; 2659 self.set_mp_state(state.mp_state.into())?; 2660 self.set_regs(&state.regs.into())?; 2661 self.set_sregs(&state.sregs.into())?; 2662 self.set_xsave(&state.xsave)?; 2663 self.set_xcrs(&state.xcrs)?; 2664 self.set_lapic(&state.lapic_state)?; 2665 self.set_fpu(&state.fpu)?; 2666 2667 if let Some(freq) = state.tsc_khz { 2668 self.set_tsc_khz(freq)?; 2669 } 2670 2671 // Try to set all MSRs previously stored. 2672 // If the number of MSRs set from SET_MSRS is different from the 2673 // expected amount, we fallback onto a slower method by setting MSRs 2674 // by chunks. This is the only way to make sure we try to set as many 2675 // MSRs as possible, even if some MSRs are not supported. 2676 let expected_num_msrs = state.msrs.len(); 2677 let num_msrs = self.set_msrs(&state.msrs)?; 2678 if num_msrs != expected_num_msrs { 2679 let mut faulty_msr_index = num_msrs; 2680 2681 loop { 2682 warn!( 2683 "Detected faulty MSR 0x{:x} while setting MSRs", 2684 state.msrs[faulty_msr_index].index 2685 ); 2686 2687 // Skip the first bad MSR 2688 let start_pos = faulty_msr_index + 1; 2689 2690 let sub_msr_entries = state.msrs[start_pos..].to_vec(); 2691 2692 let num_msrs = self.set_msrs(&sub_msr_entries)?; 2693 2694 if num_msrs == sub_msr_entries.len() { 2695 break; 2696 } 2697 2698 faulty_msr_index = start_pos + num_msrs; 2699 } 2700 } 2701 2702 self.set_vcpu_events(&state.vcpu_events)?; 2703 2704 Ok(()) 2705 } 2706 2707 /// 2708 /// Restore the previously saved AArch64 CPU state 2709 /// 2710 #[cfg(target_arch = "aarch64")] 2711 fn set_state(&self, state: &CpuState) -> cpu::Result<()> { 2712 let state: VcpuKvmState = state.clone().into(); 2713 // Set core registers 2714 self.set_regs(&state.core_regs.into())?; 2715 // Set system registers 2716 for reg in &state.sys_regs { 2717 self.fd 2718 .lock() 2719 .unwrap() 2720 .set_one_reg(reg.id, ®.addr.to_le_bytes()) 2721 .map_err(|e| cpu::HypervisorCpuError::SetSysRegister(e.into()))?; 2722 } 2723 2724 self.set_mp_state(state.mp_state.into())?; 2725 2726 Ok(()) 2727 } 2728 2729 #[cfg(target_arch = "riscv64")] 2730 /// 2731 /// Restore the previously saved RISC-V 64-bit CPU state 2732 /// 2733 fn set_state(&self, state: &CpuState) -> cpu::Result<()> { 2734 let state: VcpuKvmState = state.clone().into(); 2735 // Set core registers 2736 self.set_regs(&state.core_regs.into())?; 2737 // Set system registers 2738 for reg in &state.non_core_regs { 2739 self.fd 2740 .lock() 2741 .unwrap() 2742 .set_one_reg(reg.id, ®.addr.to_le_bytes()) 2743 .map_err(|e| cpu::HypervisorCpuError::SetSysRegister(e.into()))?; 2744 } 2745 2746 self.set_mp_state(state.mp_state.into())?; 2747 2748 Ok(()) 2749 } 2750 2751 /// 2752 /// Initialize TDX for this CPU 2753 /// 2754 #[cfg(feature = "tdx")] 2755 fn tdx_init(&self, hob_address: u64) -> cpu::Result<()> { 2756 tdx_command( 2757 &self.fd.lock().unwrap().as_raw_fd(), 2758 TdxCommand::InitVcpu, 2759 0, 2760 hob_address, 2761 ) 2762 .map_err(cpu::HypervisorCpuError::InitializeTdx) 2763 } 2764 2765 /// 2766 /// Set the "immediate_exit" state 2767 /// 2768 fn set_immediate_exit(&self, exit: bool) { 2769 self.fd.lock().unwrap().set_kvm_immediate_exit(exit.into()); 2770 } 2771 2772 /// 2773 /// Returns the details about TDX exit reason 2774 /// 2775 #[cfg(feature = "tdx")] 2776 fn get_tdx_exit_details(&mut self) -> cpu::Result<TdxExitDetails> { 2777 let mut fd = self.fd.as_ref().lock().unwrap(); 2778 let kvm_run = fd.get_kvm_run(); 2779 // SAFETY: accessing a union field in a valid structure 2780 let tdx_vmcall = unsafe { 2781 &mut (*((&mut kvm_run.__bindgen_anon_1) as *mut kvm_run__bindgen_ty_1 2782 as *mut KvmTdxExit)) 2783 .u 2784 .vmcall 2785 }; 2786 2787 tdx_vmcall.status_code = TDG_VP_VMCALL_INVALID_OPERAND; 2788 2789 if tdx_vmcall.type_ != 0 { 2790 return Err(cpu::HypervisorCpuError::UnknownTdxVmCall); 2791 } 2792 2793 match tdx_vmcall.subfunction { 2794 TDG_VP_VMCALL_GET_QUOTE => Ok(TdxExitDetails::GetQuote), 2795 TDG_VP_VMCALL_SETUP_EVENT_NOTIFY_INTERRUPT => { 2796 Ok(TdxExitDetails::SetupEventNotifyInterrupt) 2797 } 2798 _ => Err(cpu::HypervisorCpuError::UnknownTdxVmCall), 2799 } 2800 } 2801 2802 /// 2803 /// Set the status code for TDX exit 2804 /// 2805 #[cfg(feature = "tdx")] 2806 fn set_tdx_status(&mut self, status: TdxExitStatus) { 2807 let mut fd = self.fd.as_ref().lock().unwrap(); 2808 let kvm_run = fd.get_kvm_run(); 2809 // SAFETY: accessing a union field in a valid structure 2810 let tdx_vmcall = unsafe { 2811 &mut (*((&mut kvm_run.__bindgen_anon_1) as *mut kvm_run__bindgen_ty_1 2812 as *mut KvmTdxExit)) 2813 .u 2814 .vmcall 2815 }; 2816 2817 tdx_vmcall.status_code = match status { 2818 TdxExitStatus::Success => TDG_VP_VMCALL_SUCCESS, 2819 TdxExitStatus::InvalidOperand => TDG_VP_VMCALL_INVALID_OPERAND, 2820 }; 2821 } 2822 2823 #[cfg(target_arch = "x86_64")] 2824 /// 2825 /// Return the list of initial MSR entries for a VCPU 2826 /// 2827 fn boot_msr_entries(&self) -> Vec<MsrEntry> { 2828 use crate::arch::x86::{msr_index, MTRR_ENABLE, MTRR_MEM_TYPE_WB}; 2829 2830 [ 2831 msr!(msr_index::MSR_IA32_SYSENTER_CS), 2832 msr!(msr_index::MSR_IA32_SYSENTER_ESP), 2833 msr!(msr_index::MSR_IA32_SYSENTER_EIP), 2834 msr!(msr_index::MSR_STAR), 2835 msr!(msr_index::MSR_CSTAR), 2836 msr!(msr_index::MSR_LSTAR), 2837 msr!(msr_index::MSR_KERNEL_GS_BASE), 2838 msr!(msr_index::MSR_SYSCALL_MASK), 2839 msr!(msr_index::MSR_IA32_TSC), 2840 msr_data!( 2841 msr_index::MSR_IA32_MISC_ENABLE, 2842 msr_index::MSR_IA32_MISC_ENABLE_FAST_STRING as u64 2843 ), 2844 msr_data!(msr_index::MSR_MTRRdefType, MTRR_ENABLE | MTRR_MEM_TYPE_WB), 2845 ] 2846 .to_vec() 2847 } 2848 2849 #[cfg(target_arch = "aarch64")] 2850 fn has_pmu_support(&self) -> bool { 2851 let cpu_attr = kvm_bindings::kvm_device_attr { 2852 group: kvm_bindings::KVM_ARM_VCPU_PMU_V3_CTRL, 2853 attr: u64::from(kvm_bindings::KVM_ARM_VCPU_PMU_V3_INIT), 2854 addr: 0x0, 2855 flags: 0, 2856 }; 2857 self.fd.lock().unwrap().has_device_attr(&cpu_attr).is_ok() 2858 } 2859 2860 #[cfg(target_arch = "aarch64")] 2861 fn init_pmu(&self, irq: u32) -> cpu::Result<()> { 2862 let cpu_attr = kvm_bindings::kvm_device_attr { 2863 group: kvm_bindings::KVM_ARM_VCPU_PMU_V3_CTRL, 2864 attr: u64::from(kvm_bindings::KVM_ARM_VCPU_PMU_V3_INIT), 2865 addr: 0x0, 2866 flags: 0, 2867 }; 2868 let cpu_attr_irq = kvm_bindings::kvm_device_attr { 2869 group: kvm_bindings::KVM_ARM_VCPU_PMU_V3_CTRL, 2870 attr: u64::from(kvm_bindings::KVM_ARM_VCPU_PMU_V3_IRQ), 2871 addr: &irq as *const u32 as u64, 2872 flags: 0, 2873 }; 2874 self.fd 2875 .lock() 2876 .unwrap() 2877 .set_device_attr(&cpu_attr_irq) 2878 .map_err(|_| cpu::HypervisorCpuError::InitializePmu)?; 2879 self.fd 2880 .lock() 2881 .unwrap() 2882 .set_device_attr(&cpu_attr) 2883 .map_err(|_| cpu::HypervisorCpuError::InitializePmu) 2884 } 2885 2886 #[cfg(target_arch = "x86_64")] 2887 /// 2888 /// Get the frequency of the TSC if available 2889 /// 2890 fn tsc_khz(&self) -> cpu::Result<Option<u32>> { 2891 match self.fd.lock().unwrap().get_tsc_khz() { 2892 Err(e) => { 2893 if e.errno() == libc::EIO { 2894 Ok(None) 2895 } else { 2896 Err(cpu::HypervisorCpuError::GetTscKhz(e.into())) 2897 } 2898 } 2899 Ok(v) => Ok(Some(v)), 2900 } 2901 } 2902 2903 #[cfg(target_arch = "x86_64")] 2904 /// 2905 /// Set the frequency of the TSC if available 2906 /// 2907 fn set_tsc_khz(&self, freq: u32) -> cpu::Result<()> { 2908 match self.fd.lock().unwrap().set_tsc_khz(freq) { 2909 Err(e) => { 2910 if e.errno() == libc::EIO { 2911 Ok(()) 2912 } else { 2913 Err(cpu::HypervisorCpuError::SetTscKhz(e.into())) 2914 } 2915 } 2916 Ok(_) => Ok(()), 2917 } 2918 } 2919 2920 #[cfg(target_arch = "x86_64")] 2921 /// 2922 /// Trigger NMI interrupt 2923 /// 2924 fn nmi(&self) -> cpu::Result<()> { 2925 match self.fd.lock().unwrap().nmi() { 2926 Err(e) => { 2927 if e.errno() == libc::EIO { 2928 Ok(()) 2929 } else { 2930 Err(cpu::HypervisorCpuError::Nmi(e.into())) 2931 } 2932 } 2933 Ok(_) => Ok(()), 2934 } 2935 } 2936 } 2937 2938 impl KvmVcpu { 2939 #[cfg(target_arch = "x86_64")] 2940 /// 2941 /// X86 specific call that returns the vcpu's current "xsave struct". 2942 /// 2943 fn get_xsave(&self) -> cpu::Result<XsaveState> { 2944 Ok(self 2945 .fd 2946 .lock() 2947 .unwrap() 2948 .get_xsave() 2949 .map_err(|e| cpu::HypervisorCpuError::GetXsaveState(e.into()))? 2950 .into()) 2951 } 2952 2953 #[cfg(target_arch = "x86_64")] 2954 /// 2955 /// X86 specific call that sets the vcpu's current "xsave struct". 2956 /// 2957 fn set_xsave(&self, xsave: &XsaveState) -> cpu::Result<()> { 2958 let xsave: kvm_bindings::kvm_xsave = (*xsave).clone().into(); 2959 self.fd 2960 .lock() 2961 .unwrap() 2962 .set_xsave(&xsave) 2963 .map_err(|e| cpu::HypervisorCpuError::SetXsaveState(e.into())) 2964 } 2965 2966 #[cfg(target_arch = "x86_64")] 2967 /// 2968 /// X86 specific call that returns the vcpu's current "xcrs". 2969 /// 2970 fn get_xcrs(&self) -> cpu::Result<ExtendedControlRegisters> { 2971 self.fd 2972 .lock() 2973 .unwrap() 2974 .get_xcrs() 2975 .map_err(|e| cpu::HypervisorCpuError::GetXcsr(e.into())) 2976 } 2977 2978 #[cfg(target_arch = "x86_64")] 2979 /// 2980 /// X86 specific call that sets the vcpu's current "xcrs". 2981 /// 2982 fn set_xcrs(&self, xcrs: &ExtendedControlRegisters) -> cpu::Result<()> { 2983 self.fd 2984 .lock() 2985 .unwrap() 2986 .set_xcrs(xcrs) 2987 .map_err(|e| cpu::HypervisorCpuError::SetXcsr(e.into())) 2988 } 2989 2990 #[cfg(target_arch = "x86_64")] 2991 /// 2992 /// Returns currently pending exceptions, interrupts, and NMIs as well as related 2993 /// states of the vcpu. 2994 /// 2995 fn get_vcpu_events(&self) -> cpu::Result<VcpuEvents> { 2996 self.fd 2997 .lock() 2998 .unwrap() 2999 .get_vcpu_events() 3000 .map_err(|e| cpu::HypervisorCpuError::GetVcpuEvents(e.into())) 3001 } 3002 3003 #[cfg(target_arch = "x86_64")] 3004 /// 3005 /// Sets pending exceptions, interrupts, and NMIs as well as related states 3006 /// of the vcpu. 3007 /// 3008 fn set_vcpu_events(&self, events: &VcpuEvents) -> cpu::Result<()> { 3009 self.fd 3010 .lock() 3011 .unwrap() 3012 .set_vcpu_events(events) 3013 .map_err(|e| cpu::HypervisorCpuError::SetVcpuEvents(e.into())) 3014 } 3015 } 3016 3017 #[cfg(test)] 3018 mod tests { 3019 #[test] 3020 #[cfg(target_arch = "riscv64")] 3021 fn test_get_and_set_regs() { 3022 use super::*; 3023 3024 let kvm = KvmHypervisor::new().unwrap(); 3025 let hypervisor = Arc::new(kvm); 3026 let vm = hypervisor.create_vm().expect("new VM fd creation failed"); 3027 let vcpu0 = vm.create_vcpu(0, None).unwrap(); 3028 3029 let core_regs = StandardRegisters::from(kvm_riscv_core { 3030 regs: user_regs_struct { 3031 pc: 0x00, 3032 ra: 0x01, 3033 sp: 0x02, 3034 gp: 0x03, 3035 tp: 0x04, 3036 t0: 0x05, 3037 t1: 0x06, 3038 t2: 0x07, 3039 s0: 0x08, 3040 s1: 0x09, 3041 a0: 0x0a, 3042 a1: 0x0b, 3043 a2: 0x0c, 3044 a3: 0x0d, 3045 a4: 0x0e, 3046 a5: 0x0f, 3047 a6: 0x10, 3048 a7: 0x11, 3049 s2: 0x12, 3050 s3: 0x13, 3051 s4: 0x14, 3052 s5: 0x15, 3053 s6: 0x16, 3054 s7: 0x17, 3055 s8: 0x18, 3056 s9: 0x19, 3057 s10: 0x1a, 3058 s11: 0x1b, 3059 t3: 0x1c, 3060 t4: 0x1d, 3061 t5: 0x1e, 3062 t6: 0x1f, 3063 }, 3064 mode: 0x00, 3065 }); 3066 3067 vcpu0.set_regs(&core_regs).unwrap(); 3068 assert_eq!(vcpu0.get_regs().unwrap(), core_regs); 3069 } 3070 } 3071