1 // Copyright © 2024 Institute of Software, CAS. All rights reserved. 2 // 3 // Copyright © 2019 Intel Corporation 4 // 5 // SPDX-License-Identifier: Apache-2.0 OR BSD-3-Clause 6 // 7 // Copyright © 2020, Microsoft Corporation 8 // 9 // Copyright 2018-2019 CrowdStrike, Inc. 10 // 11 // 12 13 use std::any::Any; 14 use std::collections::HashMap; 15 #[cfg(target_arch = "x86_64")] 16 use std::fs::File; 17 #[cfg(target_arch = "x86_64")] 18 use std::os::unix::io::AsRawFd; 19 #[cfg(feature = "tdx")] 20 use std::os::unix::io::RawFd; 21 use std::result; 22 #[cfg(target_arch = "x86_64")] 23 use std::sync::atomic::{AtomicBool, Ordering}; 24 use std::sync::{Arc, Mutex, RwLock}; 25 26 use kvm_ioctls::{NoDatamatch, VcpuFd, VmFd}; 27 use vmm_sys_util::eventfd::EventFd; 28 29 #[cfg(target_arch = "aarch64")] 30 use crate::aarch64::gic::KvmGicV3Its; 31 #[cfg(target_arch = "aarch64")] 32 pub use crate::aarch64::{ 33 check_required_kvm_extensions, gic::Gicv3ItsState as GicState, is_system_register, VcpuKvmState, 34 }; 35 #[cfg(target_arch = "aarch64")] 36 use crate::arch::aarch64::gic::{Vgic, VgicConfig}; 37 #[cfg(target_arch = "riscv64")] 38 use crate::arch::riscv64::aia::{Vaia, VaiaConfig}; 39 #[cfg(target_arch = "riscv64")] 40 use crate::riscv64::aia::KvmAiaImsics; 41 #[cfg(target_arch = "riscv64")] 42 pub use crate::riscv64::{ 43 aia::AiaImsicsState as AiaState, check_required_kvm_extensions, is_non_core_register, 44 VcpuKvmState, 45 }; 46 use crate::vm::{self, InterruptSourceConfig, VmOps}; 47 #[cfg(target_arch = "aarch64")] 48 use crate::{arm64_core_reg_id, offset_of}; 49 use crate::{cpu, hypervisor, vec_with_array_field, HypervisorType}; 50 #[cfg(target_arch = "riscv64")] 51 use crate::{offset_of, riscv64_reg_id}; 52 // x86_64 dependencies 53 #[cfg(target_arch = "x86_64")] 54 pub mod x86_64; 55 #[cfg(target_arch = "x86_64")] 56 use kvm_bindings::{ 57 kvm_enable_cap, kvm_msr_entry, MsrList, KVM_CAP_HYPERV_SYNIC, KVM_CAP_SPLIT_IRQCHIP, 58 KVM_GUESTDBG_USE_HW_BP, 59 }; 60 #[cfg(target_arch = "x86_64")] 61 use x86_64::check_required_kvm_extensions; 62 #[cfg(target_arch = "x86_64")] 63 pub use x86_64::{CpuId, ExtendedControlRegisters, MsrEntries, VcpuKvmState}; 64 65 #[cfg(target_arch = "x86_64")] 66 use crate::arch::x86::{ 67 CpuIdEntry, FpuState, LapicState, MsrEntry, SpecialRegisters, XsaveState, NUM_IOAPIC_PINS, 68 }; 69 #[cfg(target_arch = "x86_64")] 70 use crate::ClockData; 71 use crate::{ 72 CpuState, IoEventAddress, IrqRoutingEntry, MpState, StandardRegisters, UserMemoryRegion, 73 USER_MEMORY_REGION_LOG_DIRTY, USER_MEMORY_REGION_READ, USER_MEMORY_REGION_WRITE, 74 }; 75 // aarch64 dependencies 76 #[cfg(target_arch = "aarch64")] 77 pub mod aarch64; 78 // riscv64 dependencies 79 #[cfg(target_arch = "riscv64")] 80 pub mod riscv64; 81 #[cfg(target_arch = "aarch64")] 82 use std::mem; 83 84 /// 85 /// Export generically-named wrappers of kvm-bindings for Unix-based platforms 86 /// 87 #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] 88 pub use kvm_bindings::kvm_vcpu_events as VcpuEvents; 89 pub use kvm_bindings::{ 90 kvm_clock_data, kvm_create_device, kvm_create_device as CreateDevice, 91 kvm_device_attr as DeviceAttr, kvm_device_type_KVM_DEV_TYPE_VFIO, kvm_guest_debug, 92 kvm_irq_routing, kvm_irq_routing_entry, kvm_mp_state, kvm_run, kvm_userspace_memory_region, 93 KVM_GUESTDBG_ENABLE, KVM_GUESTDBG_SINGLESTEP, KVM_IRQ_ROUTING_IRQCHIP, KVM_IRQ_ROUTING_MSI, 94 KVM_MEM_LOG_DIRTY_PAGES, KVM_MEM_READONLY, KVM_MSI_VALID_DEVID, 95 }; 96 #[cfg(target_arch = "aarch64")] 97 use kvm_bindings::{ 98 kvm_regs, user_fpsimd_state, user_pt_regs, KVM_GUESTDBG_USE_HW, KVM_NR_SPSR, KVM_REG_ARM64, 99 KVM_REG_ARM64_SYSREG, KVM_REG_ARM64_SYSREG_CRM_MASK, KVM_REG_ARM64_SYSREG_CRN_MASK, 100 KVM_REG_ARM64_SYSREG_OP0_MASK, KVM_REG_ARM64_SYSREG_OP1_MASK, KVM_REG_ARM64_SYSREG_OP2_MASK, 101 KVM_REG_ARM_CORE, KVM_REG_SIZE_U128, KVM_REG_SIZE_U32, KVM_REG_SIZE_U64, 102 }; 103 #[cfg(target_arch = "riscv64")] 104 use kvm_bindings::{kvm_riscv_core, user_regs_struct, KVM_REG_RISCV_CORE}; 105 #[cfg(feature = "tdx")] 106 use kvm_bindings::{kvm_run__bindgen_ty_1, KVMIO}; 107 pub use kvm_ioctls::{Cap, Kvm, VcpuExit}; 108 use thiserror::Error; 109 use vfio_ioctls::VfioDeviceFd; 110 #[cfg(feature = "tdx")] 111 use vmm_sys_util::{ioctl::ioctl_with_val, ioctl_ioc_nr, ioctl_iowr_nr}; 112 pub use {kvm_bindings, kvm_ioctls}; 113 114 #[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))] 115 use crate::RegList; 116 117 #[cfg(target_arch = "x86_64")] 118 const KVM_CAP_SGX_ATTRIBUTE: u32 = 196; 119 120 #[cfg(target_arch = "x86_64")] 121 use vmm_sys_util::ioctl_io_nr; 122 #[cfg(all(not(feature = "tdx"), target_arch = "x86_64"))] 123 use vmm_sys_util::ioctl_ioc_nr; 124 125 #[cfg(target_arch = "x86_64")] 126 ioctl_io_nr!(KVM_NMI, kvm_bindings::KVMIO, 0x9a); 127 128 #[cfg(feature = "tdx")] 129 const KVM_EXIT_TDX: u32 = 50; 130 #[cfg(feature = "tdx")] 131 const TDG_VP_VMCALL_GET_QUOTE: u64 = 0x10002; 132 #[cfg(feature = "tdx")] 133 const TDG_VP_VMCALL_SETUP_EVENT_NOTIFY_INTERRUPT: u64 = 0x10004; 134 #[cfg(feature = "tdx")] 135 const TDG_VP_VMCALL_SUCCESS: u64 = 0; 136 #[cfg(feature = "tdx")] 137 const TDG_VP_VMCALL_INVALID_OPERAND: u64 = 0x8000000000000000; 138 139 #[cfg(feature = "tdx")] 140 ioctl_iowr_nr!(KVM_MEMORY_ENCRYPT_OP, KVMIO, 0xba, std::os::raw::c_ulong); 141 142 #[cfg(feature = "tdx")] 143 #[repr(u32)] 144 enum TdxCommand { 145 Capabilities = 0, 146 InitVm, 147 InitVcpu, 148 InitMemRegion, 149 Finalize, 150 } 151 152 #[cfg(feature = "tdx")] 153 pub enum TdxExitDetails { 154 GetQuote, 155 SetupEventNotifyInterrupt, 156 } 157 158 #[cfg(feature = "tdx")] 159 pub enum TdxExitStatus { 160 Success, 161 InvalidOperand, 162 } 163 164 #[cfg(feature = "tdx")] 165 const TDX_MAX_NR_CPUID_CONFIGS: usize = 6; 166 167 #[cfg(feature = "tdx")] 168 #[repr(C)] 169 #[derive(Debug, Default)] 170 pub struct TdxCpuidConfig { 171 pub leaf: u32, 172 pub sub_leaf: u32, 173 pub eax: u32, 174 pub ebx: u32, 175 pub ecx: u32, 176 pub edx: u32, 177 } 178 179 #[cfg(feature = "tdx")] 180 #[repr(C)] 181 #[derive(Debug, Default)] 182 pub struct TdxCapabilities { 183 pub attrs_fixed0: u64, 184 pub attrs_fixed1: u64, 185 pub xfam_fixed0: u64, 186 pub xfam_fixed1: u64, 187 pub nr_cpuid_configs: u32, 188 pub padding: u32, 189 pub cpuid_configs: [TdxCpuidConfig; TDX_MAX_NR_CPUID_CONFIGS], 190 } 191 192 #[cfg(feature = "tdx")] 193 #[derive(Copy, Clone)] 194 pub struct KvmTdxExit { 195 pub type_: u32, 196 pub pad: u32, 197 pub u: KvmTdxExitU, 198 } 199 200 #[cfg(feature = "tdx")] 201 #[repr(C)] 202 #[derive(Copy, Clone)] 203 pub union KvmTdxExitU { 204 pub vmcall: KvmTdxExitVmcall, 205 } 206 207 #[cfg(feature = "tdx")] 208 #[repr(C)] 209 #[derive(Debug, Default, Copy, Clone, PartialEq)] 210 pub struct KvmTdxExitVmcall { 211 pub type_: u64, 212 pub subfunction: u64, 213 pub reg_mask: u64, 214 pub in_r12: u64, 215 pub in_r13: u64, 216 pub in_r14: u64, 217 pub in_r15: u64, 218 pub in_rbx: u64, 219 pub in_rdi: u64, 220 pub in_rsi: u64, 221 pub in_r8: u64, 222 pub in_r9: u64, 223 pub in_rdx: u64, 224 pub status_code: u64, 225 pub out_r11: u64, 226 pub out_r12: u64, 227 pub out_r13: u64, 228 pub out_r14: u64, 229 pub out_r15: u64, 230 pub out_rbx: u64, 231 pub out_rdi: u64, 232 pub out_rsi: u64, 233 pub out_r8: u64, 234 pub out_r9: u64, 235 pub out_rdx: u64, 236 } 237 238 impl From<kvm_userspace_memory_region> for UserMemoryRegion { 239 fn from(region: kvm_userspace_memory_region) -> Self { 240 let mut flags = USER_MEMORY_REGION_READ; 241 if region.flags & KVM_MEM_READONLY == 0 { 242 flags |= USER_MEMORY_REGION_WRITE; 243 } 244 if region.flags & KVM_MEM_LOG_DIRTY_PAGES != 0 { 245 flags |= USER_MEMORY_REGION_LOG_DIRTY; 246 } 247 248 UserMemoryRegion { 249 slot: region.slot, 250 guest_phys_addr: region.guest_phys_addr, 251 memory_size: region.memory_size, 252 userspace_addr: region.userspace_addr, 253 flags, 254 } 255 } 256 } 257 258 impl From<UserMemoryRegion> for kvm_userspace_memory_region { 259 fn from(region: UserMemoryRegion) -> Self { 260 assert!( 261 region.flags & USER_MEMORY_REGION_READ != 0, 262 "KVM mapped memory is always readable" 263 ); 264 265 let mut flags = 0; 266 if region.flags & USER_MEMORY_REGION_WRITE == 0 { 267 flags |= KVM_MEM_READONLY; 268 } 269 if region.flags & USER_MEMORY_REGION_LOG_DIRTY != 0 { 270 flags |= KVM_MEM_LOG_DIRTY_PAGES; 271 } 272 273 kvm_userspace_memory_region { 274 slot: region.slot, 275 guest_phys_addr: region.guest_phys_addr, 276 memory_size: region.memory_size, 277 userspace_addr: region.userspace_addr, 278 flags, 279 } 280 } 281 } 282 283 impl From<kvm_mp_state> for MpState { 284 fn from(s: kvm_mp_state) -> Self { 285 MpState::Kvm(s) 286 } 287 } 288 289 impl From<MpState> for kvm_mp_state { 290 fn from(ms: MpState) -> Self { 291 match ms { 292 MpState::Kvm(s) => s, 293 /* Needed in case other hypervisors are enabled */ 294 #[allow(unreachable_patterns)] 295 _ => panic!("CpuState is not valid"), 296 } 297 } 298 } 299 300 impl From<kvm_ioctls::IoEventAddress> for IoEventAddress { 301 fn from(a: kvm_ioctls::IoEventAddress) -> Self { 302 match a { 303 kvm_ioctls::IoEventAddress::Pio(x) => Self::Pio(x), 304 kvm_ioctls::IoEventAddress::Mmio(x) => Self::Mmio(x), 305 } 306 } 307 } 308 309 impl From<IoEventAddress> for kvm_ioctls::IoEventAddress { 310 fn from(a: IoEventAddress) -> Self { 311 match a { 312 IoEventAddress::Pio(x) => Self::Pio(x), 313 IoEventAddress::Mmio(x) => Self::Mmio(x), 314 } 315 } 316 } 317 318 impl From<VcpuKvmState> for CpuState { 319 fn from(s: VcpuKvmState) -> Self { 320 CpuState::Kvm(s) 321 } 322 } 323 324 impl From<CpuState> for VcpuKvmState { 325 fn from(s: CpuState) -> Self { 326 match s { 327 CpuState::Kvm(s) => s, 328 /* Needed in case other hypervisors are enabled */ 329 #[allow(unreachable_patterns)] 330 _ => panic!("CpuState is not valid"), 331 } 332 } 333 } 334 335 #[cfg(target_arch = "x86_64")] 336 impl From<kvm_clock_data> for ClockData { 337 fn from(d: kvm_clock_data) -> Self { 338 ClockData::Kvm(d) 339 } 340 } 341 342 #[cfg(target_arch = "x86_64")] 343 impl From<ClockData> for kvm_clock_data { 344 fn from(ms: ClockData) -> Self { 345 match ms { 346 ClockData::Kvm(s) => s, 347 /* Needed in case other hypervisors are enabled */ 348 #[allow(unreachable_patterns)] 349 _ => panic!("CpuState is not valid"), 350 } 351 } 352 } 353 354 impl From<kvm_bindings::kvm_one_reg> for crate::Register { 355 fn from(s: kvm_bindings::kvm_one_reg) -> Self { 356 crate::Register::Kvm(s) 357 } 358 } 359 360 impl From<crate::Register> for kvm_bindings::kvm_one_reg { 361 fn from(e: crate::Register) -> Self { 362 match e { 363 crate::Register::Kvm(e) => e, 364 /* Needed in case other hypervisors are enabled */ 365 #[allow(unreachable_patterns)] 366 _ => panic!("Register is not valid"), 367 } 368 } 369 } 370 371 #[cfg(target_arch = "aarch64")] 372 impl From<kvm_bindings::kvm_vcpu_init> for crate::VcpuInit { 373 fn from(s: kvm_bindings::kvm_vcpu_init) -> Self { 374 crate::VcpuInit::Kvm(s) 375 } 376 } 377 378 #[cfg(target_arch = "aarch64")] 379 impl From<crate::VcpuInit> for kvm_bindings::kvm_vcpu_init { 380 fn from(e: crate::VcpuInit) -> Self { 381 match e { 382 crate::VcpuInit::Kvm(e) => e, 383 /* Needed in case other hypervisors are enabled */ 384 #[allow(unreachable_patterns)] 385 _ => panic!("VcpuInit is not valid"), 386 } 387 } 388 } 389 390 #[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))] 391 impl From<kvm_bindings::RegList> for crate::RegList { 392 fn from(s: kvm_bindings::RegList) -> Self { 393 crate::RegList::Kvm(s) 394 } 395 } 396 397 #[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))] 398 impl From<crate::RegList> for kvm_bindings::RegList { 399 fn from(e: crate::RegList) -> Self { 400 match e { 401 crate::RegList::Kvm(e) => e, 402 /* Needed in case other hypervisors are enabled */ 403 #[allow(unreachable_patterns)] 404 _ => panic!("RegList is not valid"), 405 } 406 } 407 } 408 409 #[cfg(not(target_arch = "riscv64"))] 410 impl From<kvm_bindings::kvm_regs> for crate::StandardRegisters { 411 fn from(s: kvm_bindings::kvm_regs) -> Self { 412 crate::StandardRegisters::Kvm(s) 413 } 414 } 415 416 #[cfg(not(target_arch = "riscv64"))] 417 impl From<crate::StandardRegisters> for kvm_bindings::kvm_regs { 418 fn from(e: crate::StandardRegisters) -> Self { 419 match e { 420 crate::StandardRegisters::Kvm(e) => e, 421 /* Needed in case other hypervisors are enabled */ 422 #[allow(unreachable_patterns)] 423 _ => panic!("StandardRegisters are not valid"), 424 } 425 } 426 } 427 428 #[cfg(target_arch = "riscv64")] 429 impl From<kvm_bindings::kvm_riscv_core> for crate::StandardRegisters { 430 fn from(s: kvm_bindings::kvm_riscv_core) -> Self { 431 crate::StandardRegisters::Kvm(s) 432 } 433 } 434 435 #[cfg(target_arch = "riscv64")] 436 impl From<crate::StandardRegisters> for kvm_bindings::kvm_riscv_core { 437 fn from(e: crate::StandardRegisters) -> Self { 438 match e { 439 crate::StandardRegisters::Kvm(e) => e, 440 /* Needed in case other hypervisors are enabled */ 441 #[allow(unreachable_patterns)] 442 _ => panic!("StandardRegisters are not valid"), 443 } 444 } 445 } 446 447 impl From<kvm_irq_routing_entry> for IrqRoutingEntry { 448 fn from(s: kvm_irq_routing_entry) -> Self { 449 IrqRoutingEntry::Kvm(s) 450 } 451 } 452 453 impl From<IrqRoutingEntry> for kvm_irq_routing_entry { 454 fn from(e: IrqRoutingEntry) -> Self { 455 match e { 456 IrqRoutingEntry::Kvm(e) => e, 457 /* Needed in case other hypervisors are enabled */ 458 #[allow(unreachable_patterns)] 459 _ => panic!("IrqRoutingEntry is not valid"), 460 } 461 } 462 } 463 464 struct KvmDirtyLogSlot { 465 slot: u32, 466 guest_phys_addr: u64, 467 memory_size: u64, 468 userspace_addr: u64, 469 } 470 471 /// Wrapper over KVM VM ioctls. 472 pub struct KvmVm { 473 fd: Arc<VmFd>, 474 #[cfg(target_arch = "x86_64")] 475 msrs: Vec<MsrEntry>, 476 dirty_log_slots: Arc<RwLock<HashMap<u32, KvmDirtyLogSlot>>>, 477 } 478 479 impl KvmVm { 480 /// 481 /// Creates an emulated device in the kernel. 482 /// 483 /// See the documentation for `KVM_CREATE_DEVICE`. 484 fn create_device(&self, device: &mut CreateDevice) -> vm::Result<vfio_ioctls::VfioDeviceFd> { 485 let device_fd = self 486 .fd 487 .create_device(device) 488 .map_err(|e| vm::HypervisorVmError::CreateDevice(e.into()))?; 489 Ok(VfioDeviceFd::new_from_kvm(device_fd)) 490 } 491 /// Checks if a particular `Cap` is available. 492 pub fn check_extension(&self, c: Cap) -> bool { 493 self.fd.check_extension(c) 494 } 495 } 496 497 /// Implementation of Vm trait for KVM 498 /// 499 /// # Examples 500 /// 501 /// ``` 502 /// # use hypervisor::kvm::KvmHypervisor; 503 /// # use std::sync::Arc; 504 /// let kvm = KvmHypervisor::new().unwrap(); 505 /// let hypervisor = Arc::new(kvm); 506 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed"); 507 /// ``` 508 impl vm::Vm for KvmVm { 509 #[cfg(target_arch = "x86_64")] 510 /// 511 /// Sets the address of the one-page region in the VM's address space. 512 /// 513 fn set_identity_map_address(&self, address: u64) -> vm::Result<()> { 514 self.fd 515 .set_identity_map_address(address) 516 .map_err(|e| vm::HypervisorVmError::SetIdentityMapAddress(e.into())) 517 } 518 519 #[cfg(target_arch = "x86_64")] 520 /// 521 /// Sets the address of the three-page region in the VM's address space. 522 /// 523 fn set_tss_address(&self, offset: usize) -> vm::Result<()> { 524 self.fd 525 .set_tss_address(offset) 526 .map_err(|e| vm::HypervisorVmError::SetTssAddress(e.into())) 527 } 528 529 #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] 530 /// 531 /// Creates an in-kernel interrupt controller. 532 /// 533 fn create_irq_chip(&self) -> vm::Result<()> { 534 self.fd 535 .create_irq_chip() 536 .map_err(|e| vm::HypervisorVmError::CreateIrq(e.into())) 537 } 538 539 /// 540 /// Registers an event that will, when signaled, trigger the `gsi` IRQ. 541 /// 542 fn register_irqfd(&self, fd: &EventFd, gsi: u32) -> vm::Result<()> { 543 self.fd 544 .register_irqfd(fd, gsi) 545 .map_err(|e| vm::HypervisorVmError::RegisterIrqFd(e.into())) 546 } 547 548 /// 549 /// Unregisters an event that will, when signaled, trigger the `gsi` IRQ. 550 /// 551 fn unregister_irqfd(&self, fd: &EventFd, gsi: u32) -> vm::Result<()> { 552 self.fd 553 .unregister_irqfd(fd, gsi) 554 .map_err(|e| vm::HypervisorVmError::UnregisterIrqFd(e.into())) 555 } 556 557 /// 558 /// Creates a VcpuFd object from a vcpu RawFd. 559 /// 560 fn create_vcpu( 561 &self, 562 id: u8, 563 vm_ops: Option<Arc<dyn VmOps>>, 564 ) -> vm::Result<Arc<dyn cpu::Vcpu>> { 565 let fd = self 566 .fd 567 .create_vcpu(id as u64) 568 .map_err(|e| vm::HypervisorVmError::CreateVcpu(e.into()))?; 569 let vcpu = KvmVcpu { 570 fd: Arc::new(Mutex::new(fd)), 571 #[cfg(target_arch = "x86_64")] 572 msrs: self.msrs.clone(), 573 vm_ops, 574 #[cfg(target_arch = "x86_64")] 575 hyperv_synic: AtomicBool::new(false), 576 }; 577 Ok(Arc::new(vcpu)) 578 } 579 580 #[cfg(target_arch = "aarch64")] 581 /// 582 /// Creates a virtual GIC device. 583 /// 584 fn create_vgic(&self, config: VgicConfig) -> vm::Result<Arc<Mutex<dyn Vgic>>> { 585 let gic_device = KvmGicV3Its::new(self, config) 586 .map_err(|e| vm::HypervisorVmError::CreateVgic(anyhow!("Vgic error {:?}", e)))?; 587 Ok(Arc::new(Mutex::new(gic_device))) 588 } 589 590 #[cfg(target_arch = "riscv64")] 591 /// 592 /// Creates a virtual AIA device. 593 /// 594 fn create_vaia(&self, config: VaiaConfig) -> vm::Result<Arc<Mutex<dyn Vaia>>> { 595 let aia_device = KvmAiaImsics::new(self, config) 596 .map_err(|e| vm::HypervisorVmError::CreateVaia(anyhow!("Vaia error {:?}", e)))?; 597 Ok(Arc::new(Mutex::new(aia_device))) 598 } 599 600 /// 601 /// Registers an event to be signaled whenever a certain address is written to. 602 /// 603 fn register_ioevent( 604 &self, 605 fd: &EventFd, 606 addr: &IoEventAddress, 607 datamatch: Option<vm::DataMatch>, 608 ) -> vm::Result<()> { 609 let addr = &kvm_ioctls::IoEventAddress::from(*addr); 610 if let Some(dm) = datamatch { 611 match dm { 612 vm::DataMatch::DataMatch32(kvm_dm32) => self 613 .fd 614 .register_ioevent(fd, addr, kvm_dm32) 615 .map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into())), 616 vm::DataMatch::DataMatch64(kvm_dm64) => self 617 .fd 618 .register_ioevent(fd, addr, kvm_dm64) 619 .map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into())), 620 } 621 } else { 622 self.fd 623 .register_ioevent(fd, addr, NoDatamatch) 624 .map_err(|e| vm::HypervisorVmError::RegisterIoEvent(e.into())) 625 } 626 } 627 628 /// 629 /// Unregisters an event from a certain address it has been previously registered to. 630 /// 631 fn unregister_ioevent(&self, fd: &EventFd, addr: &IoEventAddress) -> vm::Result<()> { 632 let addr = &kvm_ioctls::IoEventAddress::from(*addr); 633 self.fd 634 .unregister_ioevent(fd, addr, NoDatamatch) 635 .map_err(|e| vm::HypervisorVmError::UnregisterIoEvent(e.into())) 636 } 637 638 /// 639 /// Constructs a routing entry 640 /// 641 fn make_routing_entry(&self, gsi: u32, config: &InterruptSourceConfig) -> IrqRoutingEntry { 642 match &config { 643 InterruptSourceConfig::MsiIrq(cfg) => { 644 let mut kvm_route = kvm_irq_routing_entry { 645 gsi, 646 type_: KVM_IRQ_ROUTING_MSI, 647 ..Default::default() 648 }; 649 650 kvm_route.u.msi.address_lo = cfg.low_addr; 651 kvm_route.u.msi.address_hi = cfg.high_addr; 652 kvm_route.u.msi.data = cfg.data; 653 654 if self.check_extension(crate::kvm::Cap::MsiDevid) { 655 // On AArch64, there is limitation on the range of the 'devid', 656 // it cannot be greater than 65536 (the max of u16). 657 // 658 // BDF cannot be used directly, because 'segment' is in high 659 // 16 bits. The layout of the u32 BDF is: 660 // |---- 16 bits ----|-- 8 bits --|-- 5 bits --|-- 3 bits --| 661 // | segment | bus | device | function | 662 // 663 // Now that we support 1 bus only in a segment, we can build a 664 // 'devid' by replacing the 'bus' bits with the low 8 bits of 665 // 'segment' data. 666 // This way we can resolve the range checking problem and give 667 // different `devid` to all the devices. Limitation is that at 668 // most 256 segments can be supported. 669 // 670 let modified_devid = ((cfg.devid & 0x00ff_0000) >> 8) | cfg.devid & 0xff; 671 672 kvm_route.flags = KVM_MSI_VALID_DEVID; 673 kvm_route.u.msi.__bindgen_anon_1.devid = modified_devid; 674 } 675 kvm_route.into() 676 } 677 InterruptSourceConfig::LegacyIrq(cfg) => { 678 let mut kvm_route = kvm_irq_routing_entry { 679 gsi, 680 type_: KVM_IRQ_ROUTING_IRQCHIP, 681 ..Default::default() 682 }; 683 kvm_route.u.irqchip.irqchip = cfg.irqchip; 684 kvm_route.u.irqchip.pin = cfg.pin; 685 686 kvm_route.into() 687 } 688 } 689 } 690 691 /// 692 /// Sets the GSI routing table entries, overwriting any previously set 693 /// entries, as per the `KVM_SET_GSI_ROUTING` ioctl. 694 /// 695 fn set_gsi_routing(&self, entries: &[IrqRoutingEntry]) -> vm::Result<()> { 696 let mut irq_routing = 697 vec_with_array_field::<kvm_irq_routing, kvm_irq_routing_entry>(entries.len()); 698 irq_routing[0].nr = entries.len() as u32; 699 irq_routing[0].flags = 0; 700 let entries: Vec<kvm_irq_routing_entry> = entries 701 .iter() 702 .map(|entry| match entry { 703 IrqRoutingEntry::Kvm(e) => *e, 704 #[allow(unreachable_patterns)] 705 _ => panic!("IrqRoutingEntry type is wrong"), 706 }) 707 .collect(); 708 709 // SAFETY: irq_routing initialized with entries.len() and now it is being turned into 710 // entries_slice with entries.len() again. It is guaranteed to be large enough to hold 711 // everything from entries. 712 unsafe { 713 let entries_slice: &mut [kvm_irq_routing_entry] = 714 irq_routing[0].entries.as_mut_slice(entries.len()); 715 entries_slice.copy_from_slice(&entries); 716 } 717 718 self.fd 719 .set_gsi_routing(&irq_routing[0]) 720 .map_err(|e| vm::HypervisorVmError::SetGsiRouting(e.into())) 721 } 722 723 /// 724 /// Creates a memory region structure that can be used with {create/remove}_user_memory_region 725 /// 726 fn make_user_memory_region( 727 &self, 728 slot: u32, 729 guest_phys_addr: u64, 730 memory_size: u64, 731 userspace_addr: u64, 732 readonly: bool, 733 log_dirty_pages: bool, 734 ) -> UserMemoryRegion { 735 kvm_userspace_memory_region { 736 slot, 737 guest_phys_addr, 738 memory_size, 739 userspace_addr, 740 flags: if readonly { KVM_MEM_READONLY } else { 0 } 741 | if log_dirty_pages { 742 KVM_MEM_LOG_DIRTY_PAGES 743 } else { 744 0 745 }, 746 } 747 .into() 748 } 749 750 /// 751 /// Creates a guest physical memory region. 752 /// 753 fn create_user_memory_region(&self, user_memory_region: UserMemoryRegion) -> vm::Result<()> { 754 let mut region: kvm_userspace_memory_region = user_memory_region.into(); 755 756 if (region.flags & KVM_MEM_LOG_DIRTY_PAGES) != 0 { 757 if (region.flags & KVM_MEM_READONLY) != 0 { 758 return Err(vm::HypervisorVmError::CreateUserMemory(anyhow!( 759 "Error creating regions with both 'dirty-pages-log' and 'read-only'." 760 ))); 761 } 762 763 // Keep track of the regions that need dirty pages log 764 self.dirty_log_slots.write().unwrap().insert( 765 region.slot, 766 KvmDirtyLogSlot { 767 slot: region.slot, 768 guest_phys_addr: region.guest_phys_addr, 769 memory_size: region.memory_size, 770 userspace_addr: region.userspace_addr, 771 }, 772 ); 773 774 // Always create guest physical memory region without `KVM_MEM_LOG_DIRTY_PAGES`. 775 // For regions that need this flag, dirty pages log will be turned on in `start_dirty_log`. 776 region.flags = 0; 777 } 778 779 // SAFETY: Safe because guest regions are guaranteed not to overlap. 780 unsafe { 781 self.fd 782 .set_user_memory_region(region) 783 .map_err(|e| vm::HypervisorVmError::CreateUserMemory(e.into())) 784 } 785 } 786 787 /// 788 /// Removes a guest physical memory region. 789 /// 790 fn remove_user_memory_region(&self, user_memory_region: UserMemoryRegion) -> vm::Result<()> { 791 let mut region: kvm_userspace_memory_region = user_memory_region.into(); 792 793 // Remove the corresponding entry from "self.dirty_log_slots" if needed 794 self.dirty_log_slots.write().unwrap().remove(®ion.slot); 795 796 // Setting the size to 0 means "remove" 797 region.memory_size = 0; 798 // SAFETY: Safe because guest regions are guaranteed not to overlap. 799 unsafe { 800 self.fd 801 .set_user_memory_region(region) 802 .map_err(|e| vm::HypervisorVmError::RemoveUserMemory(e.into())) 803 } 804 } 805 806 /// 807 /// Returns the preferred CPU target type which can be emulated by KVM on underlying host. 808 /// 809 #[cfg(target_arch = "aarch64")] 810 fn get_preferred_target(&self, kvi: &mut crate::VcpuInit) -> vm::Result<()> { 811 let mut kvm_kvi: kvm_bindings::kvm_vcpu_init = (*kvi).into(); 812 self.fd 813 .get_preferred_target(&mut kvm_kvi) 814 .map_err(|e| vm::HypervisorVmError::GetPreferredTarget(e.into()))?; 815 *kvi = kvm_kvi.into(); 816 Ok(()) 817 } 818 819 #[cfg(target_arch = "x86_64")] 820 fn enable_split_irq(&self) -> vm::Result<()> { 821 // Create split irqchip 822 // Only the local APIC is emulated in kernel, both PICs and IOAPIC 823 // are not. 824 let mut cap = kvm_enable_cap { 825 cap: KVM_CAP_SPLIT_IRQCHIP, 826 ..Default::default() 827 }; 828 cap.args[0] = NUM_IOAPIC_PINS as u64; 829 self.fd 830 .enable_cap(&cap) 831 .map_err(|e| vm::HypervisorVmError::EnableSplitIrq(e.into()))?; 832 Ok(()) 833 } 834 835 #[cfg(target_arch = "x86_64")] 836 fn enable_sgx_attribute(&self, file: File) -> vm::Result<()> { 837 let mut cap = kvm_enable_cap { 838 cap: KVM_CAP_SGX_ATTRIBUTE, 839 ..Default::default() 840 }; 841 cap.args[0] = file.as_raw_fd() as u64; 842 self.fd 843 .enable_cap(&cap) 844 .map_err(|e| vm::HypervisorVmError::EnableSgxAttribute(e.into()))?; 845 Ok(()) 846 } 847 848 /// Retrieve guest clock. 849 #[cfg(target_arch = "x86_64")] 850 fn get_clock(&self) -> vm::Result<ClockData> { 851 Ok(self 852 .fd 853 .get_clock() 854 .map_err(|e| vm::HypervisorVmError::GetClock(e.into()))? 855 .into()) 856 } 857 858 /// Set guest clock. 859 #[cfg(target_arch = "x86_64")] 860 fn set_clock(&self, data: &ClockData) -> vm::Result<()> { 861 let data = (*data).into(); 862 self.fd 863 .set_clock(&data) 864 .map_err(|e| vm::HypervisorVmError::SetClock(e.into())) 865 } 866 867 /// Create a device that is used for passthrough 868 fn create_passthrough_device(&self) -> vm::Result<VfioDeviceFd> { 869 let mut vfio_dev = kvm_create_device { 870 type_: kvm_device_type_KVM_DEV_TYPE_VFIO, 871 fd: 0, 872 flags: 0, 873 }; 874 875 self.create_device(&mut vfio_dev) 876 .map_err(|e| vm::HypervisorVmError::CreatePassthroughDevice(e.into())) 877 } 878 879 /// 880 /// Start logging dirty pages 881 /// 882 fn start_dirty_log(&self) -> vm::Result<()> { 883 let dirty_log_slots = self.dirty_log_slots.read().unwrap(); 884 for (_, s) in dirty_log_slots.iter() { 885 let region = kvm_userspace_memory_region { 886 slot: s.slot, 887 guest_phys_addr: s.guest_phys_addr, 888 memory_size: s.memory_size, 889 userspace_addr: s.userspace_addr, 890 flags: KVM_MEM_LOG_DIRTY_PAGES, 891 }; 892 // SAFETY: Safe because guest regions are guaranteed not to overlap. 893 unsafe { 894 self.fd 895 .set_user_memory_region(region) 896 .map_err(|e| vm::HypervisorVmError::StartDirtyLog(e.into()))?; 897 } 898 } 899 900 Ok(()) 901 } 902 903 /// 904 /// Stop logging dirty pages 905 /// 906 fn stop_dirty_log(&self) -> vm::Result<()> { 907 let dirty_log_slots = self.dirty_log_slots.read().unwrap(); 908 for (_, s) in dirty_log_slots.iter() { 909 let region = kvm_userspace_memory_region { 910 slot: s.slot, 911 guest_phys_addr: s.guest_phys_addr, 912 memory_size: s.memory_size, 913 userspace_addr: s.userspace_addr, 914 flags: 0, 915 }; 916 // SAFETY: Safe because guest regions are guaranteed not to overlap. 917 unsafe { 918 self.fd 919 .set_user_memory_region(region) 920 .map_err(|e| vm::HypervisorVmError::StartDirtyLog(e.into()))?; 921 } 922 } 923 924 Ok(()) 925 } 926 927 /// 928 /// Get dirty pages bitmap (one bit per page) 929 /// 930 fn get_dirty_log(&self, slot: u32, _base_gpa: u64, memory_size: u64) -> vm::Result<Vec<u64>> { 931 self.fd 932 .get_dirty_log(slot, memory_size as usize) 933 .map_err(|e| vm::HypervisorVmError::GetDirtyLog(e.into())) 934 } 935 936 /// 937 /// Initialize TDX for this VM 938 /// 939 #[cfg(feature = "tdx")] 940 fn tdx_init(&self, cpuid: &[CpuIdEntry], max_vcpus: u32) -> vm::Result<()> { 941 const TDX_ATTR_SEPT_VE_DISABLE: usize = 28; 942 943 let mut cpuid: Vec<kvm_bindings::kvm_cpuid_entry2> = 944 cpuid.iter().map(|e| (*e).into()).collect(); 945 cpuid.resize(256, kvm_bindings::kvm_cpuid_entry2::default()); 946 947 #[repr(C)] 948 struct TdxInitVm { 949 attributes: u64, 950 max_vcpus: u32, 951 padding: u32, 952 mrconfigid: [u64; 6], 953 mrowner: [u64; 6], 954 mrownerconfig: [u64; 6], 955 cpuid_nent: u32, 956 cpuid_padding: u32, 957 cpuid_entries: [kvm_bindings::kvm_cpuid_entry2; 256], 958 } 959 let data = TdxInitVm { 960 attributes: 1 << TDX_ATTR_SEPT_VE_DISABLE, 961 max_vcpus, 962 padding: 0, 963 mrconfigid: [0; 6], 964 mrowner: [0; 6], 965 mrownerconfig: [0; 6], 966 cpuid_nent: cpuid.len() as u32, 967 cpuid_padding: 0, 968 cpuid_entries: cpuid.as_slice().try_into().unwrap(), 969 }; 970 971 tdx_command( 972 &self.fd.as_raw_fd(), 973 TdxCommand::InitVm, 974 0, 975 &data as *const _ as u64, 976 ) 977 .map_err(vm::HypervisorVmError::InitializeTdx) 978 } 979 980 /// 981 /// Finalize the TDX setup for this VM 982 /// 983 #[cfg(feature = "tdx")] 984 fn tdx_finalize(&self) -> vm::Result<()> { 985 tdx_command(&self.fd.as_raw_fd(), TdxCommand::Finalize, 0, 0) 986 .map_err(vm::HypervisorVmError::FinalizeTdx) 987 } 988 989 /// 990 /// Initialize memory regions for the TDX VM 991 /// 992 #[cfg(feature = "tdx")] 993 fn tdx_init_memory_region( 994 &self, 995 host_address: u64, 996 guest_address: u64, 997 size: u64, 998 measure: bool, 999 ) -> vm::Result<()> { 1000 #[repr(C)] 1001 struct TdxInitMemRegion { 1002 host_address: u64, 1003 guest_address: u64, 1004 pages: u64, 1005 } 1006 let data = TdxInitMemRegion { 1007 host_address, 1008 guest_address, 1009 pages: size / 4096, 1010 }; 1011 1012 tdx_command( 1013 &self.fd.as_raw_fd(), 1014 TdxCommand::InitMemRegion, 1015 u32::from(measure), 1016 &data as *const _ as u64, 1017 ) 1018 .map_err(vm::HypervisorVmError::InitMemRegionTdx) 1019 } 1020 1021 /// Downcast to the underlying KvmVm type 1022 fn as_any(&self) -> &dyn Any { 1023 self 1024 } 1025 } 1026 1027 #[cfg(feature = "tdx")] 1028 fn tdx_command( 1029 fd: &RawFd, 1030 command: TdxCommand, 1031 flags: u32, 1032 data: u64, 1033 ) -> std::result::Result<(), std::io::Error> { 1034 #[repr(C)] 1035 struct TdxIoctlCmd { 1036 command: TdxCommand, 1037 flags: u32, 1038 data: u64, 1039 error: u64, 1040 unused: u64, 1041 } 1042 let cmd = TdxIoctlCmd { 1043 command, 1044 flags, 1045 data, 1046 error: 0, 1047 unused: 0, 1048 }; 1049 // SAFETY: FFI call. All input parameters are valid. 1050 let ret = unsafe { 1051 ioctl_with_val( 1052 fd, 1053 KVM_MEMORY_ENCRYPT_OP(), 1054 &cmd as *const TdxIoctlCmd as std::os::raw::c_ulong, 1055 ) 1056 }; 1057 1058 if ret < 0 { 1059 return Err(std::io::Error::last_os_error()); 1060 } 1061 Ok(()) 1062 } 1063 1064 /// Wrapper over KVM system ioctls. 1065 pub struct KvmHypervisor { 1066 kvm: Kvm, 1067 } 1068 1069 impl KvmHypervisor { 1070 #[cfg(target_arch = "x86_64")] 1071 /// 1072 /// Retrieve the list of MSRs supported by the hypervisor. 1073 /// 1074 fn get_msr_list(&self) -> hypervisor::Result<MsrList> { 1075 self.kvm 1076 .get_msr_index_list() 1077 .map_err(|e| hypervisor::HypervisorError::GetMsrList(e.into())) 1078 } 1079 } 1080 1081 /// Enum for KVM related error 1082 #[derive(Debug, Error)] 1083 pub enum KvmError { 1084 #[error("Capability missing: {0:?}")] 1085 CapabilityMissing(Cap), 1086 } 1087 1088 pub type KvmResult<T> = result::Result<T, KvmError>; 1089 1090 impl KvmHypervisor { 1091 /// Create a hypervisor based on Kvm 1092 #[allow(clippy::new_ret_no_self)] 1093 pub fn new() -> hypervisor::Result<Arc<dyn hypervisor::Hypervisor>> { 1094 let kvm_obj = Kvm::new().map_err(|e| hypervisor::HypervisorError::VmCreate(e.into()))?; 1095 let api_version = kvm_obj.get_api_version(); 1096 1097 if api_version != kvm_bindings::KVM_API_VERSION as i32 { 1098 return Err(hypervisor::HypervisorError::IncompatibleApiVersion); 1099 } 1100 1101 Ok(Arc::new(KvmHypervisor { kvm: kvm_obj })) 1102 } 1103 1104 /// Check if the hypervisor is available 1105 pub fn is_available() -> hypervisor::Result<bool> { 1106 match std::fs::metadata("/dev/kvm") { 1107 Ok(_) => Ok(true), 1108 Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(false), 1109 Err(err) => Err(hypervisor::HypervisorError::HypervisorAvailableCheck( 1110 err.into(), 1111 )), 1112 } 1113 } 1114 } 1115 1116 /// Implementation of Hypervisor trait for KVM 1117 /// 1118 /// # Examples 1119 /// 1120 /// ``` 1121 /// # use hypervisor::kvm::KvmHypervisor; 1122 /// # use std::sync::Arc; 1123 /// let kvm = KvmHypervisor::new().unwrap(); 1124 /// let hypervisor = Arc::new(kvm); 1125 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed"); 1126 /// ``` 1127 impl hypervisor::Hypervisor for KvmHypervisor { 1128 /// 1129 /// Returns the type of the hypervisor 1130 /// 1131 fn hypervisor_type(&self) -> HypervisorType { 1132 HypervisorType::Kvm 1133 } 1134 1135 /// 1136 /// Create a Vm of a specific type using the underlying hypervisor, passing memory size 1137 /// Return a hypervisor-agnostic Vm trait object 1138 /// 1139 /// # Examples 1140 /// 1141 /// ``` 1142 /// # use hypervisor::kvm::KvmHypervisor; 1143 /// use hypervisor::kvm::KvmVm; 1144 /// let hypervisor = KvmHypervisor::new().unwrap(); 1145 /// let vm = hypervisor.create_vm_with_type_and_memory(0).unwrap(); 1146 /// ``` 1147 fn create_vm_with_type_and_memory( 1148 &self, 1149 vm_type: u64, 1150 #[cfg(feature = "sev_snp")] _mem_size: u64, 1151 ) -> hypervisor::Result<Arc<dyn vm::Vm>> { 1152 self.create_vm_with_type(vm_type) 1153 } 1154 1155 /// Create a KVM vm object of a specific VM type and return the object as Vm trait object 1156 /// 1157 /// # Examples 1158 /// 1159 /// ``` 1160 /// # use hypervisor::kvm::KvmHypervisor; 1161 /// use hypervisor::kvm::KvmVm; 1162 /// let hypervisor = KvmHypervisor::new().unwrap(); 1163 /// let vm = hypervisor.create_vm_with_type(0).unwrap(); 1164 /// ``` 1165 fn create_vm_with_type(&self, vm_type: u64) -> hypervisor::Result<Arc<dyn vm::Vm>> { 1166 let fd: VmFd; 1167 loop { 1168 match self.kvm.create_vm_with_type(vm_type) { 1169 Ok(res) => fd = res, 1170 Err(e) => { 1171 if e.errno() == libc::EINTR { 1172 // If the error returned is EINTR, which means the 1173 // ioctl has been interrupted, we have to retry as 1174 // this can't be considered as a regular error. 1175 continue; 1176 } else { 1177 return Err(hypervisor::HypervisorError::VmCreate(e.into())); 1178 } 1179 } 1180 } 1181 break; 1182 } 1183 1184 let vm_fd = Arc::new(fd); 1185 1186 #[cfg(target_arch = "x86_64")] 1187 { 1188 let msr_list = self.get_msr_list()?; 1189 let num_msrs = msr_list.as_fam_struct_ref().nmsrs as usize; 1190 let mut msrs: Vec<MsrEntry> = vec![ 1191 MsrEntry { 1192 ..Default::default() 1193 }; 1194 num_msrs 1195 ]; 1196 let indices = msr_list.as_slice(); 1197 for (pos, index) in indices.iter().enumerate() { 1198 msrs[pos].index = *index; 1199 } 1200 1201 Ok(Arc::new(KvmVm { 1202 fd: vm_fd, 1203 msrs, 1204 dirty_log_slots: Arc::new(RwLock::new(HashMap::new())), 1205 })) 1206 } 1207 1208 #[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))] 1209 { 1210 Ok(Arc::new(KvmVm { 1211 fd: vm_fd, 1212 dirty_log_slots: Arc::new(RwLock::new(HashMap::new())), 1213 })) 1214 } 1215 } 1216 1217 /// Create a KVM vm object and return the object as Vm trait object 1218 /// 1219 /// # Examples 1220 /// 1221 /// ``` 1222 /// # use hypervisor::kvm::KvmHypervisor; 1223 /// use hypervisor::kvm::KvmVm; 1224 /// let hypervisor = KvmHypervisor::new().unwrap(); 1225 /// let vm = hypervisor.create_vm().unwrap(); 1226 /// ``` 1227 fn create_vm(&self) -> hypervisor::Result<Arc<dyn vm::Vm>> { 1228 #[allow(unused_mut)] 1229 let mut vm_type: u64 = 0; // Create with default platform type 1230 1231 // When KVM supports Cap::ArmVmIPASize, it is better to get the IPA 1232 // size from the host and use that when creating the VM, which may 1233 // avoid unnecessary VM creation failures. 1234 #[cfg(target_arch = "aarch64")] 1235 if self.kvm.check_extension(Cap::ArmVmIPASize) { 1236 vm_type = self.kvm.get_host_ipa_limit().try_into().unwrap(); 1237 } 1238 1239 self.create_vm_with_type(vm_type) 1240 } 1241 1242 fn check_required_extensions(&self) -> hypervisor::Result<()> { 1243 check_required_kvm_extensions(&self.kvm) 1244 .map_err(|e| hypervisor::HypervisorError::CheckExtensions(e.into())) 1245 } 1246 1247 #[cfg(target_arch = "x86_64")] 1248 /// 1249 /// X86 specific call to get the system supported CPUID values. 1250 /// 1251 fn get_supported_cpuid(&self) -> hypervisor::Result<Vec<CpuIdEntry>> { 1252 let kvm_cpuid = self 1253 .kvm 1254 .get_supported_cpuid(kvm_bindings::KVM_MAX_CPUID_ENTRIES) 1255 .map_err(|e| hypervisor::HypervisorError::GetCpuId(e.into()))?; 1256 1257 let v = kvm_cpuid.as_slice().iter().map(|e| (*e).into()).collect(); 1258 1259 Ok(v) 1260 } 1261 1262 #[cfg(target_arch = "aarch64")] 1263 /// 1264 /// Retrieve AArch64 host maximum IPA size supported by KVM. 1265 /// 1266 fn get_host_ipa_limit(&self) -> i32 { 1267 self.kvm.get_host_ipa_limit() 1268 } 1269 1270 /// 1271 /// Retrieve TDX capabilities 1272 /// 1273 #[cfg(feature = "tdx")] 1274 fn tdx_capabilities(&self) -> hypervisor::Result<TdxCapabilities> { 1275 let data = TdxCapabilities { 1276 nr_cpuid_configs: TDX_MAX_NR_CPUID_CONFIGS as u32, 1277 ..Default::default() 1278 }; 1279 1280 tdx_command( 1281 &self.kvm.as_raw_fd(), 1282 TdxCommand::Capabilities, 1283 0, 1284 &data as *const _ as u64, 1285 ) 1286 .map_err(|e| hypervisor::HypervisorError::TdxCapabilities(e.into()))?; 1287 1288 Ok(data) 1289 } 1290 1291 #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] 1292 /// 1293 /// Get the number of supported hardware breakpoints 1294 /// 1295 fn get_guest_debug_hw_bps(&self) -> usize { 1296 #[cfg(target_arch = "x86_64")] 1297 { 1298 4 1299 } 1300 #[cfg(target_arch = "aarch64")] 1301 { 1302 self.kvm.get_guest_debug_hw_bps() as usize 1303 } 1304 } 1305 1306 /// Get maximum number of vCPUs 1307 fn get_max_vcpus(&self) -> u32 { 1308 self.kvm.get_max_vcpus().min(u32::MAX as usize) as u32 1309 } 1310 } 1311 1312 /// Vcpu struct for KVM 1313 pub struct KvmVcpu { 1314 fd: Arc<Mutex<VcpuFd>>, 1315 #[cfg(target_arch = "x86_64")] 1316 msrs: Vec<MsrEntry>, 1317 vm_ops: Option<Arc<dyn vm::VmOps>>, 1318 #[cfg(target_arch = "x86_64")] 1319 hyperv_synic: AtomicBool, 1320 } 1321 1322 /// Implementation of Vcpu trait for KVM 1323 /// 1324 /// # Examples 1325 /// 1326 /// ``` 1327 /// # use hypervisor::kvm::KvmHypervisor; 1328 /// # use std::sync::Arc; 1329 /// let kvm = KvmHypervisor::new().unwrap(); 1330 /// let hypervisor = Arc::new(kvm); 1331 /// let vm = hypervisor.create_vm().expect("new VM fd creation failed"); 1332 /// let vcpu = vm.create_vcpu(0, None).unwrap(); 1333 /// ``` 1334 impl cpu::Vcpu for KvmVcpu { 1335 /// 1336 /// Returns StandardRegisters with default value set 1337 /// 1338 fn create_standard_regs(&self) -> StandardRegisters { 1339 #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] 1340 { 1341 kvm_bindings::kvm_regs::default().into() 1342 } 1343 #[cfg(target_arch = "riscv64")] 1344 { 1345 kvm_bindings::kvm_riscv_core::default().into() 1346 } 1347 } 1348 #[cfg(target_arch = "x86_64")] 1349 /// 1350 /// Returns the vCPU general purpose registers. 1351 /// 1352 fn get_regs(&self) -> cpu::Result<StandardRegisters> { 1353 Ok(self 1354 .fd 1355 .lock() 1356 .unwrap() 1357 .get_regs() 1358 .map_err(|e| cpu::HypervisorCpuError::GetStandardRegs(e.into()))? 1359 .into()) 1360 } 1361 1362 /// 1363 /// Returns the vCPU general purpose registers. 1364 /// The `KVM_GET_REGS` ioctl is not available on AArch64, `KVM_GET_ONE_REG` 1365 /// is used to get registers one by one. 1366 /// 1367 #[cfg(target_arch = "aarch64")] 1368 fn get_regs(&self) -> cpu::Result<StandardRegisters> { 1369 let mut state = kvm_regs::default(); 1370 let mut off = offset_of!(user_pt_regs, regs); 1371 // There are 31 user_pt_regs: 1372 // https://elixir.free-electrons.com/linux/v4.14.174/source/arch/arm64/include/uapi/asm/ptrace.h#L72 1373 // These actually are the general-purpose registers of the Armv8-a 1374 // architecture (i.e x0-x30 if used as a 64bit register or w0-30 when used as a 32bit register). 1375 for i in 0..31 { 1376 let mut bytes = [0_u8; 8]; 1377 self.fd 1378 .lock() 1379 .unwrap() 1380 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), &mut bytes) 1381 .map_err(|e| cpu::HypervisorCpuError::GetAarchCoreRegister(e.into()))?; 1382 state.regs.regs[i] = u64::from_le_bytes(bytes); 1383 off += std::mem::size_of::<u64>(); 1384 } 1385 1386 // We are now entering the "Other register" section of the ARMv8-a architecture. 1387 // First one, stack pointer. 1388 let off = offset_of!(user_pt_regs, sp); 1389 let mut bytes = [0_u8; 8]; 1390 self.fd 1391 .lock() 1392 .unwrap() 1393 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), &mut bytes) 1394 .map_err(|e| cpu::HypervisorCpuError::GetAarchCoreRegister(e.into()))?; 1395 state.regs.sp = u64::from_le_bytes(bytes); 1396 1397 // Second one, the program counter. 1398 let off = offset_of!(user_pt_regs, pc); 1399 let mut bytes = [0_u8; 8]; 1400 self.fd 1401 .lock() 1402 .unwrap() 1403 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), &mut bytes) 1404 .map_err(|e| cpu::HypervisorCpuError::GetAarchCoreRegister(e.into()))?; 1405 state.regs.pc = u64::from_le_bytes(bytes); 1406 1407 // Next is the processor state. 1408 let off = offset_of!(user_pt_regs, pstate); 1409 let mut bytes = [0_u8; 8]; 1410 self.fd 1411 .lock() 1412 .unwrap() 1413 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), &mut bytes) 1414 .map_err(|e| cpu::HypervisorCpuError::GetAarchCoreRegister(e.into()))?; 1415 state.regs.pstate = u64::from_le_bytes(bytes); 1416 1417 // The stack pointer associated with EL1 1418 let off = offset_of!(kvm_regs, sp_el1); 1419 let mut bytes = [0_u8; 8]; 1420 self.fd 1421 .lock() 1422 .unwrap() 1423 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), &mut bytes) 1424 .map_err(|e| cpu::HypervisorCpuError::GetAarchCoreRegister(e.into()))?; 1425 state.sp_el1 = u64::from_le_bytes(bytes); 1426 1427 // Exception Link Register for EL1, when taking an exception to EL1, this register 1428 // holds the address to which to return afterwards. 1429 let off = offset_of!(kvm_regs, elr_el1); 1430 let mut bytes = [0_u8; 8]; 1431 self.fd 1432 .lock() 1433 .unwrap() 1434 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), &mut bytes) 1435 .map_err(|e| cpu::HypervisorCpuError::GetAarchCoreRegister(e.into()))?; 1436 state.elr_el1 = u64::from_le_bytes(bytes); 1437 1438 // Saved Program Status Registers, there are 5 of them used in the kernel. 1439 let mut off = offset_of!(kvm_regs, spsr); 1440 for i in 0..KVM_NR_SPSR as usize { 1441 let mut bytes = [0_u8; 8]; 1442 self.fd 1443 .lock() 1444 .unwrap() 1445 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U64, off), &mut bytes) 1446 .map_err(|e| cpu::HypervisorCpuError::GetAarchCoreRegister(e.into()))?; 1447 state.spsr[i] = u64::from_le_bytes(bytes); 1448 off += std::mem::size_of::<u64>(); 1449 } 1450 1451 // Now moving on to floating point registers which are stored in the user_fpsimd_state in the kernel: 1452 // https://elixir.free-electrons.com/linux/v4.9.62/source/arch/arm64/include/uapi/asm/kvm.h#L53 1453 let mut off = offset_of!(kvm_regs, fp_regs) + offset_of!(user_fpsimd_state, vregs); 1454 for i in 0..32 { 1455 let mut bytes = [0_u8; 16]; 1456 self.fd 1457 .lock() 1458 .unwrap() 1459 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U128, off), &mut bytes) 1460 .map_err(|e| cpu::HypervisorCpuError::GetAarchCoreRegister(e.into()))?; 1461 state.fp_regs.vregs[i] = u128::from_le_bytes(bytes); 1462 off += mem::size_of::<u128>(); 1463 } 1464 1465 // Floating-point Status Register 1466 let off = offset_of!(kvm_regs, fp_regs) + offset_of!(user_fpsimd_state, fpsr); 1467 let mut bytes = [0_u8; 4]; 1468 self.fd 1469 .lock() 1470 .unwrap() 1471 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U32, off), &mut bytes) 1472 .map_err(|e| cpu::HypervisorCpuError::GetAarchCoreRegister(e.into()))?; 1473 state.fp_regs.fpsr = u32::from_le_bytes(bytes); 1474 1475 // Floating-point Control Register 1476 let off = offset_of!(kvm_regs, fp_regs) + offset_of!(user_fpsimd_state, fpcr); 1477 let mut bytes = [0_u8; 4]; 1478 self.fd 1479 .lock() 1480 .unwrap() 1481 .get_one_reg(arm64_core_reg_id!(KVM_REG_SIZE_U32, off), &mut bytes) 1482 .map_err(|e| cpu::HypervisorCpuError::GetAarchCoreRegister(e.into()))?; 1483 state.fp_regs.fpcr = u32::from_le_bytes(bytes); 1484 Ok(state.into()) 1485 } 1486 1487 #[cfg(target_arch = "riscv64")] 1488 /// 1489 /// Returns the RISC-V vCPU core registers. 1490 /// The `KVM_GET_REGS` ioctl is not available on RISC-V 64-bit, 1491 /// `KVM_GET_ONE_REG` is used to get registers one by one. 1492 /// 1493 fn get_regs(&self) -> cpu::Result<StandardRegisters> { 1494 let mut state = kvm_riscv_core::default(); 1495 1496 /// Macro used to extract RISC-V register data from KVM Vcpu according 1497 /// to `$reg_name` provided to `state`. 1498 macro_rules! riscv64_get_one_reg_from_vcpu { 1499 (mode) => { 1500 let off = offset_of!(kvm_riscv_core, mode); 1501 let mut bytes = [0_u8; 8]; 1502 self.fd 1503 .lock() 1504 .unwrap() 1505 .get_one_reg(riscv64_reg_id!(KVM_REG_RISCV_CORE, off), &mut bytes) 1506 .map_err(|e| cpu::HypervisorCpuError::GetRiscvCoreRegister(e.into()))?; 1507 state.mode = u64::from_le_bytes(bytes); 1508 }; 1509 ($reg_name:ident) => { 1510 let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, $reg_name); 1511 let mut bytes = [0_u8; 8]; 1512 self.fd 1513 .lock() 1514 .unwrap() 1515 .get_one_reg(riscv64_reg_id!(KVM_REG_RISCV_CORE, off), &mut bytes) 1516 .map_err(|e| cpu::HypervisorCpuError::GetRiscvCoreRegister(e.into()))?; 1517 state.regs.$reg_name = u64::from_le_bytes(bytes); 1518 }; 1519 } 1520 1521 riscv64_get_one_reg_from_vcpu!(pc); 1522 riscv64_get_one_reg_from_vcpu!(ra); 1523 riscv64_get_one_reg_from_vcpu!(sp); 1524 riscv64_get_one_reg_from_vcpu!(gp); 1525 riscv64_get_one_reg_from_vcpu!(tp); 1526 riscv64_get_one_reg_from_vcpu!(t0); 1527 riscv64_get_one_reg_from_vcpu!(t1); 1528 riscv64_get_one_reg_from_vcpu!(t2); 1529 riscv64_get_one_reg_from_vcpu!(s0); 1530 riscv64_get_one_reg_from_vcpu!(s1); 1531 riscv64_get_one_reg_from_vcpu!(a0); 1532 riscv64_get_one_reg_from_vcpu!(a1); 1533 riscv64_get_one_reg_from_vcpu!(a2); 1534 riscv64_get_one_reg_from_vcpu!(a3); 1535 riscv64_get_one_reg_from_vcpu!(a4); 1536 riscv64_get_one_reg_from_vcpu!(a5); 1537 riscv64_get_one_reg_from_vcpu!(a6); 1538 riscv64_get_one_reg_from_vcpu!(a7); 1539 riscv64_get_one_reg_from_vcpu!(s2); 1540 riscv64_get_one_reg_from_vcpu!(s3); 1541 riscv64_get_one_reg_from_vcpu!(s4); 1542 riscv64_get_one_reg_from_vcpu!(s5); 1543 riscv64_get_one_reg_from_vcpu!(s6); 1544 riscv64_get_one_reg_from_vcpu!(s7); 1545 riscv64_get_one_reg_from_vcpu!(s8); 1546 riscv64_get_one_reg_from_vcpu!(s9); 1547 riscv64_get_one_reg_from_vcpu!(s10); 1548 riscv64_get_one_reg_from_vcpu!(s11); 1549 riscv64_get_one_reg_from_vcpu!(t3); 1550 riscv64_get_one_reg_from_vcpu!(t4); 1551 riscv64_get_one_reg_from_vcpu!(t5); 1552 riscv64_get_one_reg_from_vcpu!(t6); 1553 riscv64_get_one_reg_from_vcpu!(mode); 1554 1555 Ok(state.into()) 1556 } 1557 1558 #[cfg(target_arch = "x86_64")] 1559 /// 1560 /// Sets the vCPU general purpose registers using the `KVM_SET_REGS` ioctl. 1561 /// 1562 fn set_regs(&self, regs: &StandardRegisters) -> cpu::Result<()> { 1563 let regs = (*regs).into(); 1564 self.fd 1565 .lock() 1566 .unwrap() 1567 .set_regs(®s) 1568 .map_err(|e| cpu::HypervisorCpuError::SetStandardRegs(e.into())) 1569 } 1570 1571 /// 1572 /// Sets the vCPU general purpose registers. 1573 /// The `KVM_SET_REGS` ioctl is not available on AArch64, `KVM_SET_ONE_REG` 1574 /// is used to set registers one by one. 1575 /// 1576 #[cfg(target_arch = "aarch64")] 1577 fn set_regs(&self, state: &StandardRegisters) -> cpu::Result<()> { 1578 // The function follows the exact identical order from `state`. Look there 1579 // for some additional info on registers. 1580 let kvm_regs_state: kvm_regs = (*state).into(); 1581 let mut off = offset_of!(user_pt_regs, regs); 1582 for i in 0..31 { 1583 self.fd 1584 .lock() 1585 .unwrap() 1586 .set_one_reg( 1587 arm64_core_reg_id!(KVM_REG_SIZE_U64, off), 1588 &kvm_regs_state.regs.regs[i].to_le_bytes(), 1589 ) 1590 .map_err(|e| cpu::HypervisorCpuError::SetAarchCoreRegister(e.into()))?; 1591 off += std::mem::size_of::<u64>(); 1592 } 1593 1594 let off = offset_of!(user_pt_regs, sp); 1595 self.fd 1596 .lock() 1597 .unwrap() 1598 .set_one_reg( 1599 arm64_core_reg_id!(KVM_REG_SIZE_U64, off), 1600 &kvm_regs_state.regs.sp.to_le_bytes(), 1601 ) 1602 .map_err(|e| cpu::HypervisorCpuError::SetAarchCoreRegister(e.into()))?; 1603 1604 let off = offset_of!(user_pt_regs, pc); 1605 self.fd 1606 .lock() 1607 .unwrap() 1608 .set_one_reg( 1609 arm64_core_reg_id!(KVM_REG_SIZE_U64, off), 1610 &kvm_regs_state.regs.pc.to_le_bytes(), 1611 ) 1612 .map_err(|e| cpu::HypervisorCpuError::SetAarchCoreRegister(e.into()))?; 1613 1614 let off = offset_of!(user_pt_regs, pstate); 1615 self.fd 1616 .lock() 1617 .unwrap() 1618 .set_one_reg( 1619 arm64_core_reg_id!(KVM_REG_SIZE_U64, off), 1620 &kvm_regs_state.regs.pstate.to_le_bytes(), 1621 ) 1622 .map_err(|e| cpu::HypervisorCpuError::SetAarchCoreRegister(e.into()))?; 1623 1624 let off = offset_of!(kvm_regs, sp_el1); 1625 self.fd 1626 .lock() 1627 .unwrap() 1628 .set_one_reg( 1629 arm64_core_reg_id!(KVM_REG_SIZE_U64, off), 1630 &kvm_regs_state.sp_el1.to_le_bytes(), 1631 ) 1632 .map_err(|e| cpu::HypervisorCpuError::SetAarchCoreRegister(e.into()))?; 1633 1634 let off = offset_of!(kvm_regs, elr_el1); 1635 self.fd 1636 .lock() 1637 .unwrap() 1638 .set_one_reg( 1639 arm64_core_reg_id!(KVM_REG_SIZE_U64, off), 1640 &kvm_regs_state.elr_el1.to_le_bytes(), 1641 ) 1642 .map_err(|e| cpu::HypervisorCpuError::SetAarchCoreRegister(e.into()))?; 1643 1644 let mut off = offset_of!(kvm_regs, spsr); 1645 for i in 0..KVM_NR_SPSR as usize { 1646 self.fd 1647 .lock() 1648 .unwrap() 1649 .set_one_reg( 1650 arm64_core_reg_id!(KVM_REG_SIZE_U64, off), 1651 &kvm_regs_state.spsr[i].to_le_bytes(), 1652 ) 1653 .map_err(|e| cpu::HypervisorCpuError::SetAarchCoreRegister(e.into()))?; 1654 off += std::mem::size_of::<u64>(); 1655 } 1656 1657 let mut off = offset_of!(kvm_regs, fp_regs) + offset_of!(user_fpsimd_state, vregs); 1658 for i in 0..32 { 1659 self.fd 1660 .lock() 1661 .unwrap() 1662 .set_one_reg( 1663 arm64_core_reg_id!(KVM_REG_SIZE_U128, off), 1664 &kvm_regs_state.fp_regs.vregs[i].to_le_bytes(), 1665 ) 1666 .map_err(|e| cpu::HypervisorCpuError::SetAarchCoreRegister(e.into()))?; 1667 off += mem::size_of::<u128>(); 1668 } 1669 1670 let off = offset_of!(kvm_regs, fp_regs) + offset_of!(user_fpsimd_state, fpsr); 1671 self.fd 1672 .lock() 1673 .unwrap() 1674 .set_one_reg( 1675 arm64_core_reg_id!(KVM_REG_SIZE_U32, off), 1676 &kvm_regs_state.fp_regs.fpsr.to_le_bytes(), 1677 ) 1678 .map_err(|e| cpu::HypervisorCpuError::SetAarchCoreRegister(e.into()))?; 1679 1680 let off = offset_of!(kvm_regs, fp_regs) + offset_of!(user_fpsimd_state, fpcr); 1681 self.fd 1682 .lock() 1683 .unwrap() 1684 .set_one_reg( 1685 arm64_core_reg_id!(KVM_REG_SIZE_U32, off), 1686 &kvm_regs_state.fp_regs.fpcr.to_le_bytes(), 1687 ) 1688 .map_err(|e| cpu::HypervisorCpuError::SetAarchCoreRegister(e.into()))?; 1689 Ok(()) 1690 } 1691 1692 #[cfg(target_arch = "riscv64")] 1693 /// 1694 /// Sets the RISC-V vCPU core registers. 1695 /// The `KVM_SET_REGS` ioctl is not available on RISC-V 64-bit, 1696 /// `KVM_SET_ONE_REG` is used to set registers one by one. 1697 /// 1698 fn set_regs(&self, state: &StandardRegisters) -> cpu::Result<()> { 1699 // The function follows the exact identical order from `state`. Look there 1700 // for some additional info on registers. 1701 let kvm_regs_state: kvm_riscv_core = (*state).into(); 1702 1703 /// Macro used to set value of specific RISC-V `$reg_name` stored in 1704 /// `state` to KVM Vcpu. 1705 macro_rules! riscv64_set_one_reg_to_vcpu { 1706 (mode) => { 1707 let off = offset_of!(kvm_riscv_core, mode); 1708 self.fd 1709 .lock() 1710 .unwrap() 1711 .set_one_reg( 1712 riscv64_reg_id!(KVM_REG_RISCV_CORE, off), 1713 &kvm_regs_state.mode.to_le_bytes(), 1714 ) 1715 .map_err(|e| cpu::HypervisorCpuError::SetRiscvCoreRegister(e.into()))?; 1716 }; 1717 ($reg_name:ident) => { 1718 let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, $reg_name); 1719 self.fd 1720 .lock() 1721 .unwrap() 1722 .set_one_reg( 1723 riscv64_reg_id!(KVM_REG_RISCV_CORE, off), 1724 &kvm_regs_state.regs.$reg_name.to_le_bytes(), 1725 ) 1726 .map_err(|e| cpu::HypervisorCpuError::SetRiscvCoreRegister(e.into()))?; 1727 }; 1728 } 1729 1730 let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, pc); 1731 self.fd 1732 .lock() 1733 .unwrap() 1734 .set_one_reg( 1735 riscv64_reg_id!(KVM_REG_RISCV_CORE, off), 1736 &kvm_regs_state.regs.pc.to_le_bytes(), 1737 ) 1738 .map_err(|e| cpu::HypervisorCpuError::SetRiscvCoreRegister(e.into()))?; 1739 1740 let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, ra); 1741 self.fd 1742 .lock() 1743 .unwrap() 1744 .set_one_reg( 1745 riscv64_reg_id!(KVM_REG_RISCV_CORE, off), 1746 &kvm_regs_state.regs.ra.to_le_bytes(), 1747 ) 1748 .map_err(|e| cpu::HypervisorCpuError::SetRiscvCoreRegister(e.into()))?; 1749 1750 let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, sp); 1751 self.fd 1752 .lock() 1753 .unwrap() 1754 .set_one_reg( 1755 riscv64_reg_id!(KVM_REG_RISCV_CORE, off), 1756 &kvm_regs_state.regs.sp.to_le_bytes(), 1757 ) 1758 .map_err(|e| cpu::HypervisorCpuError::SetRiscvCoreRegister(e.into()))?; 1759 1760 let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, gp); 1761 self.fd 1762 .lock() 1763 .unwrap() 1764 .set_one_reg( 1765 riscv64_reg_id!(KVM_REG_RISCV_CORE, off), 1766 &kvm_regs_state.regs.gp.to_le_bytes(), 1767 ) 1768 .map_err(|e| cpu::HypervisorCpuError::SetRiscvCoreRegister(e.into()))?; 1769 1770 let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, tp); 1771 self.fd 1772 .lock() 1773 .unwrap() 1774 .set_one_reg( 1775 riscv64_reg_id!(KVM_REG_RISCV_CORE, off), 1776 &kvm_regs_state.regs.tp.to_le_bytes(), 1777 ) 1778 .map_err(|e| cpu::HypervisorCpuError::SetRiscvCoreRegister(e.into()))?; 1779 1780 let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, t0); 1781 self.fd 1782 .lock() 1783 .unwrap() 1784 .set_one_reg( 1785 riscv64_reg_id!(KVM_REG_RISCV_CORE, off), 1786 &kvm_regs_state.regs.t0.to_le_bytes(), 1787 ) 1788 .map_err(|e| cpu::HypervisorCpuError::SetRiscvCoreRegister(e.into()))?; 1789 1790 let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, t1); 1791 self.fd 1792 .lock() 1793 .unwrap() 1794 .set_one_reg( 1795 riscv64_reg_id!(KVM_REG_RISCV_CORE, off), 1796 &kvm_regs_state.regs.t1.to_le_bytes(), 1797 ) 1798 .map_err(|e| cpu::HypervisorCpuError::SetRiscvCoreRegister(e.into()))?; 1799 1800 let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, t2); 1801 self.fd 1802 .lock() 1803 .unwrap() 1804 .set_one_reg( 1805 riscv64_reg_id!(KVM_REG_RISCV_CORE, off), 1806 &kvm_regs_state.regs.t2.to_le_bytes(), 1807 ) 1808 .map_err(|e| cpu::HypervisorCpuError::SetRiscvCoreRegister(e.into()))?; 1809 1810 let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, s0); 1811 self.fd 1812 .lock() 1813 .unwrap() 1814 .set_one_reg( 1815 riscv64_reg_id!(KVM_REG_RISCV_CORE, off), 1816 &kvm_regs_state.regs.s0.to_le_bytes(), 1817 ) 1818 .map_err(|e| cpu::HypervisorCpuError::SetRiscvCoreRegister(e.into()))?; 1819 1820 let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, s1); 1821 self.fd 1822 .lock() 1823 .unwrap() 1824 .set_one_reg( 1825 riscv64_reg_id!(KVM_REG_RISCV_CORE, off), 1826 &kvm_regs_state.regs.s1.to_le_bytes(), 1827 ) 1828 .map_err(|e| cpu::HypervisorCpuError::SetRiscvCoreRegister(e.into()))?; 1829 1830 let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, a0); 1831 self.fd 1832 .lock() 1833 .unwrap() 1834 .set_one_reg( 1835 riscv64_reg_id!(KVM_REG_RISCV_CORE, off), 1836 &kvm_regs_state.regs.a0.to_le_bytes(), 1837 ) 1838 .map_err(|e| cpu::HypervisorCpuError::SetRiscvCoreRegister(e.into()))?; 1839 1840 let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, a1); 1841 self.fd 1842 .lock() 1843 .unwrap() 1844 .set_one_reg( 1845 riscv64_reg_id!(KVM_REG_RISCV_CORE, off), 1846 &kvm_regs_state.regs.a1.to_le_bytes(), 1847 ) 1848 .map_err(|e| cpu::HypervisorCpuError::SetRiscvCoreRegister(e.into()))?; 1849 1850 let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, a2); 1851 self.fd 1852 .lock() 1853 .unwrap() 1854 .set_one_reg( 1855 riscv64_reg_id!(KVM_REG_RISCV_CORE, off), 1856 &kvm_regs_state.regs.a2.to_le_bytes(), 1857 ) 1858 .map_err(|e| cpu::HypervisorCpuError::SetRiscvCoreRegister(e.into()))?; 1859 1860 let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, a3); 1861 self.fd 1862 .lock() 1863 .unwrap() 1864 .set_one_reg( 1865 riscv64_reg_id!(KVM_REG_RISCV_CORE, off), 1866 &kvm_regs_state.regs.a3.to_le_bytes(), 1867 ) 1868 .map_err(|e| cpu::HypervisorCpuError::SetRiscvCoreRegister(e.into()))?; 1869 1870 let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, a4); 1871 self.fd 1872 .lock() 1873 .unwrap() 1874 .set_one_reg( 1875 riscv64_reg_id!(KVM_REG_RISCV_CORE, off), 1876 &kvm_regs_state.regs.a4.to_le_bytes(), 1877 ) 1878 .map_err(|e| cpu::HypervisorCpuError::SetRiscvCoreRegister(e.into()))?; 1879 1880 let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, a5); 1881 self.fd 1882 .lock() 1883 .unwrap() 1884 .set_one_reg( 1885 riscv64_reg_id!(KVM_REG_RISCV_CORE, off), 1886 &kvm_regs_state.regs.a5.to_le_bytes(), 1887 ) 1888 .map_err(|e| cpu::HypervisorCpuError::SetRiscvCoreRegister(e.into()))?; 1889 1890 let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, a6); 1891 self.fd 1892 .lock() 1893 .unwrap() 1894 .set_one_reg( 1895 riscv64_reg_id!(KVM_REG_RISCV_CORE, off), 1896 &kvm_regs_state.regs.a6.to_le_bytes(), 1897 ) 1898 .map_err(|e| cpu::HypervisorCpuError::SetRiscvCoreRegister(e.into()))?; 1899 1900 let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, a7); 1901 self.fd 1902 .lock() 1903 .unwrap() 1904 .set_one_reg( 1905 riscv64_reg_id!(KVM_REG_RISCV_CORE, off), 1906 &kvm_regs_state.regs.a7.to_le_bytes(), 1907 ) 1908 .map_err(|e| cpu::HypervisorCpuError::SetRiscvCoreRegister(e.into()))?; 1909 1910 let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, s2); 1911 self.fd 1912 .lock() 1913 .unwrap() 1914 .set_one_reg( 1915 riscv64_reg_id!(KVM_REG_RISCV_CORE, off), 1916 &kvm_regs_state.regs.s2.to_le_bytes(), 1917 ) 1918 .map_err(|e| cpu::HypervisorCpuError::SetRiscvCoreRegister(e.into()))?; 1919 1920 let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, s3); 1921 self.fd 1922 .lock() 1923 .unwrap() 1924 .set_one_reg( 1925 riscv64_reg_id!(KVM_REG_RISCV_CORE, off), 1926 &kvm_regs_state.regs.s3.to_le_bytes(), 1927 ) 1928 .map_err(|e| cpu::HypervisorCpuError::SetRiscvCoreRegister(e.into()))?; 1929 1930 let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, s4); 1931 self.fd 1932 .lock() 1933 .unwrap() 1934 .set_one_reg( 1935 riscv64_reg_id!(KVM_REG_RISCV_CORE, off), 1936 &kvm_regs_state.regs.s4.to_le_bytes(), 1937 ) 1938 .map_err(|e| cpu::HypervisorCpuError::SetRiscvCoreRegister(e.into()))?; 1939 1940 let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, s5); 1941 self.fd 1942 .lock() 1943 .unwrap() 1944 .set_one_reg( 1945 riscv64_reg_id!(KVM_REG_RISCV_CORE, off), 1946 &kvm_regs_state.regs.s5.to_le_bytes(), 1947 ) 1948 .map_err(|e| cpu::HypervisorCpuError::SetRiscvCoreRegister(e.into()))?; 1949 1950 let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, s6); 1951 self.fd 1952 .lock() 1953 .unwrap() 1954 .set_one_reg( 1955 riscv64_reg_id!(KVM_REG_RISCV_CORE, off), 1956 &kvm_regs_state.regs.s6.to_le_bytes(), 1957 ) 1958 .map_err(|e| cpu::HypervisorCpuError::SetRiscvCoreRegister(e.into()))?; 1959 1960 let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, s7); 1961 self.fd 1962 .lock() 1963 .unwrap() 1964 .set_one_reg( 1965 riscv64_reg_id!(KVM_REG_RISCV_CORE, off), 1966 &kvm_regs_state.regs.s7.to_le_bytes(), 1967 ) 1968 .map_err(|e| cpu::HypervisorCpuError::SetRiscvCoreRegister(e.into()))?; 1969 1970 let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, s8); 1971 self.fd 1972 .lock() 1973 .unwrap() 1974 .set_one_reg( 1975 riscv64_reg_id!(KVM_REG_RISCV_CORE, off), 1976 &kvm_regs_state.regs.s8.to_le_bytes(), 1977 ) 1978 .map_err(|e| cpu::HypervisorCpuError::SetRiscvCoreRegister(e.into()))?; 1979 1980 let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, s9); 1981 self.fd 1982 .lock() 1983 .unwrap() 1984 .set_one_reg( 1985 riscv64_reg_id!(KVM_REG_RISCV_CORE, off), 1986 &kvm_regs_state.regs.s9.to_le_bytes(), 1987 ) 1988 .map_err(|e| cpu::HypervisorCpuError::SetRiscvCoreRegister(e.into()))?; 1989 1990 let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, s10); 1991 self.fd 1992 .lock() 1993 .unwrap() 1994 .set_one_reg( 1995 riscv64_reg_id!(KVM_REG_RISCV_CORE, off), 1996 &kvm_regs_state.regs.s10.to_le_bytes(), 1997 ) 1998 .map_err(|e| cpu::HypervisorCpuError::SetRiscvCoreRegister(e.into()))?; 1999 2000 let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, s11); 2001 self.fd 2002 .lock() 2003 .unwrap() 2004 .set_one_reg( 2005 riscv64_reg_id!(KVM_REG_RISCV_CORE, off), 2006 &kvm_regs_state.regs.s11.to_le_bytes(), 2007 ) 2008 .map_err(|e| cpu::HypervisorCpuError::SetRiscvCoreRegister(e.into()))?; 2009 2010 let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, t3); 2011 self.fd 2012 .lock() 2013 .unwrap() 2014 .set_one_reg( 2015 riscv64_reg_id!(KVM_REG_RISCV_CORE, off), 2016 &kvm_regs_state.regs.t3.to_le_bytes(), 2017 ) 2018 .map_err(|e| cpu::HypervisorCpuError::SetRiscvCoreRegister(e.into()))?; 2019 2020 let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, t4); 2021 self.fd 2022 .lock() 2023 .unwrap() 2024 .set_one_reg( 2025 riscv64_reg_id!(KVM_REG_RISCV_CORE, off), 2026 &kvm_regs_state.regs.t4.to_le_bytes(), 2027 ) 2028 .map_err(|e| cpu::HypervisorCpuError::SetRiscvCoreRegister(e.into()))?; 2029 2030 let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, t5); 2031 self.fd 2032 .lock() 2033 .unwrap() 2034 .set_one_reg( 2035 riscv64_reg_id!(KVM_REG_RISCV_CORE, off), 2036 &kvm_regs_state.regs.t5.to_le_bytes(), 2037 ) 2038 .map_err(|e| cpu::HypervisorCpuError::SetRiscvCoreRegister(e.into()))?; 2039 2040 let off = offset_of!(kvm_riscv_core, regs, user_regs_struct, t6); 2041 self.fd 2042 .lock() 2043 .unwrap() 2044 .set_one_reg( 2045 riscv64_reg_id!(KVM_REG_RISCV_CORE, off), 2046 &kvm_regs_state.regs.t6.to_le_bytes(), 2047 ) 2048 .map_err(|e| cpu::HypervisorCpuError::SetRiscvCoreRegister(e.into()))?; 2049 2050 let off = offset_of!(kvm_riscv_core, mode); 2051 self.fd 2052 .lock() 2053 .unwrap() 2054 .set_one_reg( 2055 riscv64_reg_id!(KVM_REG_RISCV_CORE, off), 2056 &kvm_regs_state.mode.to_le_bytes(), 2057 ) 2058 .map_err(|e| cpu::HypervisorCpuError::SetRiscvCoreRegister(e.into()))?; 2059 2060 Ok(()) 2061 } 2062 2063 #[cfg(target_arch = "x86_64")] 2064 /// 2065 /// Returns the vCPU special registers. 2066 /// 2067 fn get_sregs(&self) -> cpu::Result<SpecialRegisters> { 2068 Ok(self 2069 .fd 2070 .lock() 2071 .unwrap() 2072 .get_sregs() 2073 .map_err(|e| cpu::HypervisorCpuError::GetSpecialRegs(e.into()))? 2074 .into()) 2075 } 2076 2077 #[cfg(target_arch = "x86_64")] 2078 /// 2079 /// Sets the vCPU special registers using the `KVM_SET_SREGS` ioctl. 2080 /// 2081 fn set_sregs(&self, sregs: &SpecialRegisters) -> cpu::Result<()> { 2082 let sregs = (*sregs).into(); 2083 self.fd 2084 .lock() 2085 .unwrap() 2086 .set_sregs(&sregs) 2087 .map_err(|e| cpu::HypervisorCpuError::SetSpecialRegs(e.into())) 2088 } 2089 2090 #[cfg(target_arch = "x86_64")] 2091 /// 2092 /// Returns the floating point state (FPU) from the vCPU. 2093 /// 2094 fn get_fpu(&self) -> cpu::Result<FpuState> { 2095 Ok(self 2096 .fd 2097 .lock() 2098 .unwrap() 2099 .get_fpu() 2100 .map_err(|e| cpu::HypervisorCpuError::GetFloatingPointRegs(e.into()))? 2101 .into()) 2102 } 2103 2104 #[cfg(target_arch = "x86_64")] 2105 /// 2106 /// Set the floating point state (FPU) of a vCPU using the `KVM_SET_FPU` ioctl. 2107 /// 2108 fn set_fpu(&self, fpu: &FpuState) -> cpu::Result<()> { 2109 let fpu: kvm_bindings::kvm_fpu = (*fpu).clone().into(); 2110 self.fd 2111 .lock() 2112 .unwrap() 2113 .set_fpu(&fpu) 2114 .map_err(|e| cpu::HypervisorCpuError::SetFloatingPointRegs(e.into())) 2115 } 2116 2117 #[cfg(target_arch = "x86_64")] 2118 /// 2119 /// X86 specific call to setup the CPUID registers. 2120 /// 2121 fn set_cpuid2(&self, cpuid: &[CpuIdEntry]) -> cpu::Result<()> { 2122 let cpuid: Vec<kvm_bindings::kvm_cpuid_entry2> = 2123 cpuid.iter().map(|e| (*e).into()).collect(); 2124 let kvm_cpuid = <CpuId>::from_entries(&cpuid) 2125 .map_err(|_| cpu::HypervisorCpuError::SetCpuid(anyhow!("failed to create CpuId")))?; 2126 2127 self.fd 2128 .lock() 2129 .unwrap() 2130 .set_cpuid2(&kvm_cpuid) 2131 .map_err(|e| cpu::HypervisorCpuError::SetCpuid(e.into())) 2132 } 2133 2134 #[cfg(target_arch = "x86_64")] 2135 /// 2136 /// X86 specific call to enable HyperV SynIC 2137 /// 2138 fn enable_hyperv_synic(&self) -> cpu::Result<()> { 2139 // Update the information about Hyper-V SynIC being enabled and 2140 // emulated as it will influence later which MSRs should be saved. 2141 self.hyperv_synic.store(true, Ordering::Release); 2142 2143 let cap = kvm_enable_cap { 2144 cap: KVM_CAP_HYPERV_SYNIC, 2145 ..Default::default() 2146 }; 2147 self.fd 2148 .lock() 2149 .unwrap() 2150 .enable_cap(&cap) 2151 .map_err(|e| cpu::HypervisorCpuError::EnableHyperVSyncIc(e.into())) 2152 } 2153 2154 /// 2155 /// X86 specific call to retrieve the CPUID registers. 2156 /// 2157 #[cfg(target_arch = "x86_64")] 2158 fn get_cpuid2(&self, num_entries: usize) -> cpu::Result<Vec<CpuIdEntry>> { 2159 let kvm_cpuid = self 2160 .fd 2161 .lock() 2162 .unwrap() 2163 .get_cpuid2(num_entries) 2164 .map_err(|e| cpu::HypervisorCpuError::GetCpuid(e.into()))?; 2165 2166 let v = kvm_cpuid.as_slice().iter().map(|e| (*e).into()).collect(); 2167 2168 Ok(v) 2169 } 2170 2171 #[cfg(target_arch = "x86_64")] 2172 /// 2173 /// Returns the state of the LAPIC (Local Advanced Programmable Interrupt Controller). 2174 /// 2175 fn get_lapic(&self) -> cpu::Result<LapicState> { 2176 Ok(self 2177 .fd 2178 .lock() 2179 .unwrap() 2180 .get_lapic() 2181 .map_err(|e| cpu::HypervisorCpuError::GetlapicState(e.into()))? 2182 .into()) 2183 } 2184 2185 #[cfg(target_arch = "x86_64")] 2186 /// 2187 /// Sets the state of the LAPIC (Local Advanced Programmable Interrupt Controller). 2188 /// 2189 fn set_lapic(&self, klapic: &LapicState) -> cpu::Result<()> { 2190 let klapic: kvm_bindings::kvm_lapic_state = (*klapic).clone().into(); 2191 self.fd 2192 .lock() 2193 .unwrap() 2194 .set_lapic(&klapic) 2195 .map_err(|e| cpu::HypervisorCpuError::SetLapicState(e.into())) 2196 } 2197 2198 #[cfg(target_arch = "x86_64")] 2199 /// 2200 /// Returns the model-specific registers (MSR) for this vCPU. 2201 /// 2202 fn get_msrs(&self, msrs: &mut Vec<MsrEntry>) -> cpu::Result<usize> { 2203 let kvm_msrs: Vec<kvm_msr_entry> = msrs.iter().map(|e| (*e).into()).collect(); 2204 let mut kvm_msrs = MsrEntries::from_entries(&kvm_msrs).unwrap(); 2205 let succ = self 2206 .fd 2207 .lock() 2208 .unwrap() 2209 .get_msrs(&mut kvm_msrs) 2210 .map_err(|e| cpu::HypervisorCpuError::GetMsrEntries(e.into()))?; 2211 2212 msrs[..succ].copy_from_slice( 2213 &kvm_msrs.as_slice()[..succ] 2214 .iter() 2215 .map(|e| (*e).into()) 2216 .collect::<Vec<MsrEntry>>(), 2217 ); 2218 2219 Ok(succ) 2220 } 2221 2222 #[cfg(target_arch = "x86_64")] 2223 /// 2224 /// Setup the model-specific registers (MSR) for this vCPU. 2225 /// Returns the number of MSR entries actually written. 2226 /// 2227 fn set_msrs(&self, msrs: &[MsrEntry]) -> cpu::Result<usize> { 2228 let kvm_msrs: Vec<kvm_msr_entry> = msrs.iter().map(|e| (*e).into()).collect(); 2229 let kvm_msrs = MsrEntries::from_entries(&kvm_msrs).unwrap(); 2230 self.fd 2231 .lock() 2232 .unwrap() 2233 .set_msrs(&kvm_msrs) 2234 .map_err(|e| cpu::HypervisorCpuError::SetMsrEntries(e.into())) 2235 } 2236 2237 /// 2238 /// Returns the vcpu's current "multiprocessing state". 2239 /// 2240 fn get_mp_state(&self) -> cpu::Result<MpState> { 2241 Ok(self 2242 .fd 2243 .lock() 2244 .unwrap() 2245 .get_mp_state() 2246 .map_err(|e| cpu::HypervisorCpuError::GetMpState(e.into()))? 2247 .into()) 2248 } 2249 2250 /// 2251 /// Sets the vcpu's current "multiprocessing state". 2252 /// 2253 fn set_mp_state(&self, mp_state: MpState) -> cpu::Result<()> { 2254 self.fd 2255 .lock() 2256 .unwrap() 2257 .set_mp_state(mp_state.into()) 2258 .map_err(|e| cpu::HypervisorCpuError::SetMpState(e.into())) 2259 } 2260 2261 #[cfg(target_arch = "x86_64")] 2262 /// 2263 /// Translates guest virtual address to guest physical address using the `KVM_TRANSLATE` ioctl. 2264 /// 2265 fn translate_gva(&self, gva: u64, _flags: u64) -> cpu::Result<(u64, u32)> { 2266 let tr = self 2267 .fd 2268 .lock() 2269 .unwrap() 2270 .translate_gva(gva) 2271 .map_err(|e| cpu::HypervisorCpuError::TranslateVirtualAddress(e.into()))?; 2272 // tr.valid is set if the GVA is mapped to valid GPA. 2273 match tr.valid { 2274 0 => Err(cpu::HypervisorCpuError::TranslateVirtualAddress(anyhow!( 2275 "Invalid GVA: {:#x}", 2276 gva 2277 ))), 2278 _ => Ok((tr.physical_address, 0)), 2279 } 2280 } 2281 2282 /// 2283 /// Triggers the running of the current virtual CPU returning an exit reason. 2284 /// 2285 fn run(&self) -> std::result::Result<cpu::VmExit, cpu::HypervisorCpuError> { 2286 match self.fd.lock().unwrap().run() { 2287 Ok(run) => match run { 2288 #[cfg(target_arch = "x86_64")] 2289 VcpuExit::IoIn(addr, data) => { 2290 if let Some(vm_ops) = &self.vm_ops { 2291 return vm_ops 2292 .pio_read(addr.into(), data) 2293 .map(|_| cpu::VmExit::Ignore) 2294 .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into())); 2295 } 2296 2297 Ok(cpu::VmExit::Ignore) 2298 } 2299 #[cfg(target_arch = "x86_64")] 2300 VcpuExit::IoOut(addr, data) => { 2301 if let Some(vm_ops) = &self.vm_ops { 2302 return vm_ops 2303 .pio_write(addr.into(), data) 2304 .map(|_| cpu::VmExit::Ignore) 2305 .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into())); 2306 } 2307 2308 Ok(cpu::VmExit::Ignore) 2309 } 2310 #[cfg(target_arch = "x86_64")] 2311 VcpuExit::IoapicEoi(vector) => Ok(cpu::VmExit::IoapicEoi(vector)), 2312 #[cfg(target_arch = "x86_64")] 2313 VcpuExit::Shutdown | VcpuExit::Hlt => Ok(cpu::VmExit::Reset), 2314 2315 #[cfg(target_arch = "aarch64")] 2316 VcpuExit::SystemEvent(event_type, flags) => { 2317 use kvm_bindings::{KVM_SYSTEM_EVENT_RESET, KVM_SYSTEM_EVENT_SHUTDOWN}; 2318 // On Aarch64, when the VM is shutdown, run() returns 2319 // VcpuExit::SystemEvent with reason KVM_SYSTEM_EVENT_SHUTDOWN 2320 if event_type == KVM_SYSTEM_EVENT_RESET { 2321 Ok(cpu::VmExit::Reset) 2322 } else if event_type == KVM_SYSTEM_EVENT_SHUTDOWN { 2323 Ok(cpu::VmExit::Shutdown) 2324 } else { 2325 Err(cpu::HypervisorCpuError::RunVcpu(anyhow!( 2326 "Unexpected system event with type 0x{:x}, flags 0x{:x?}", 2327 event_type, 2328 flags 2329 ))) 2330 } 2331 } 2332 2333 VcpuExit::MmioRead(addr, data) => { 2334 if let Some(vm_ops) = &self.vm_ops { 2335 return vm_ops 2336 .mmio_read(addr, data) 2337 .map(|_| cpu::VmExit::Ignore) 2338 .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into())); 2339 } 2340 2341 Ok(cpu::VmExit::Ignore) 2342 } 2343 VcpuExit::MmioWrite(addr, data) => { 2344 if let Some(vm_ops) = &self.vm_ops { 2345 return vm_ops 2346 .mmio_write(addr, data) 2347 .map(|_| cpu::VmExit::Ignore) 2348 .map_err(|e| cpu::HypervisorCpuError::RunVcpu(e.into())); 2349 } 2350 2351 Ok(cpu::VmExit::Ignore) 2352 } 2353 VcpuExit::Hyperv => Ok(cpu::VmExit::Hyperv), 2354 #[cfg(feature = "tdx")] 2355 VcpuExit::Unsupported(KVM_EXIT_TDX) => Ok(cpu::VmExit::Tdx), 2356 VcpuExit::Debug(_) => Ok(cpu::VmExit::Debug), 2357 2358 r => Err(cpu::HypervisorCpuError::RunVcpu(anyhow!( 2359 "Unexpected exit reason on vcpu run: {:?}", 2360 r 2361 ))), 2362 }, 2363 2364 Err(ref e) => match e.errno() { 2365 libc::EAGAIN | libc::EINTR => Ok(cpu::VmExit::Ignore), 2366 _ => Err(cpu::HypervisorCpuError::RunVcpu(anyhow!( 2367 "VCPU error {:?}", 2368 e 2369 ))), 2370 }, 2371 } 2372 } 2373 2374 #[cfg(target_arch = "x86_64")] 2375 /// 2376 /// Let the guest know that it has been paused, which prevents from 2377 /// potential soft lockups when being resumed. 2378 /// 2379 fn notify_guest_clock_paused(&self) -> cpu::Result<()> { 2380 if let Err(e) = self.fd.lock().unwrap().kvmclock_ctrl() { 2381 // Linux kernel returns -EINVAL if the PV clock isn't yet initialised 2382 // which could be because we're still in firmware or the guest doesn't 2383 // use KVM clock. 2384 if e.errno() != libc::EINVAL { 2385 return Err(cpu::HypervisorCpuError::NotifyGuestClockPaused(e.into())); 2386 } 2387 } 2388 2389 Ok(()) 2390 } 2391 2392 #[cfg(not(target_arch = "riscv64"))] 2393 /// 2394 /// Sets debug registers to set hardware breakpoints and/or enable single step. 2395 /// 2396 fn set_guest_debug( 2397 &self, 2398 addrs: &[vm_memory::GuestAddress], 2399 singlestep: bool, 2400 ) -> cpu::Result<()> { 2401 let mut dbg = kvm_guest_debug { 2402 #[cfg(target_arch = "x86_64")] 2403 control: KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP, 2404 #[cfg(target_arch = "aarch64")] 2405 control: KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW, 2406 ..Default::default() 2407 }; 2408 if singlestep { 2409 dbg.control |= KVM_GUESTDBG_SINGLESTEP; 2410 } 2411 2412 // Set the debug registers. 2413 // Here we assume that the number of addresses do not exceed what 2414 // `Hypervisor::get_guest_debug_hw_bps()` specifies. 2415 #[cfg(target_arch = "x86_64")] 2416 { 2417 // Set bits 9 and 10. 2418 // bit 9: GE (global exact breakpoint enable) flag. 2419 // bit 10: always 1. 2420 dbg.arch.debugreg[7] = 0x0600; 2421 2422 for (i, addr) in addrs.iter().enumerate() { 2423 dbg.arch.debugreg[i] = addr.0; 2424 // Set global breakpoint enable flag 2425 dbg.arch.debugreg[7] |= 2 << (i * 2); 2426 } 2427 } 2428 #[cfg(target_arch = "aarch64")] 2429 { 2430 for (i, addr) in addrs.iter().enumerate() { 2431 // DBGBCR_EL1 (Debug Breakpoint Control Registers, D13.3.2): 2432 // bit 0: 1 (Enabled) 2433 // bit 1~2: 0b11 (PMC = EL1/EL0) 2434 // bit 5~8: 0b1111 (BAS = AArch64) 2435 // others: 0 2436 dbg.arch.dbg_bcr[i] = 0b1u64 | 0b110u64 | 0b1_1110_0000u64; 2437 // DBGBVR_EL1 (Debug Breakpoint Value Registers, D13.3.3): 2438 // bit 2~52: VA[2:52] 2439 dbg.arch.dbg_bvr[i] = (!0u64 >> 11) & addr.0; 2440 } 2441 } 2442 self.fd 2443 .lock() 2444 .unwrap() 2445 .set_guest_debug(&dbg) 2446 .map_err(|e| cpu::HypervisorCpuError::SetDebugRegs(e.into())) 2447 } 2448 2449 #[cfg(target_arch = "aarch64")] 2450 fn vcpu_get_finalized_features(&self) -> i32 { 2451 kvm_bindings::KVM_ARM_VCPU_SVE as i32 2452 } 2453 2454 #[cfg(target_arch = "aarch64")] 2455 fn vcpu_set_processor_features( 2456 &self, 2457 vm: &Arc<dyn crate::Vm>, 2458 kvi: &mut crate::VcpuInit, 2459 id: u8, 2460 ) -> cpu::Result<()> { 2461 use std::arch::is_aarch64_feature_detected; 2462 #[allow(clippy::nonminimal_bool)] 2463 let sve_supported = 2464 is_aarch64_feature_detected!("sve") || is_aarch64_feature_detected!("sve2"); 2465 2466 let mut kvm_kvi: kvm_bindings::kvm_vcpu_init = (*kvi).into(); 2467 2468 // We already checked that the capability is supported. 2469 kvm_kvi.features[0] |= 1 << kvm_bindings::KVM_ARM_VCPU_PSCI_0_2; 2470 if vm 2471 .as_any() 2472 .downcast_ref::<crate::kvm::KvmVm>() 2473 .unwrap() 2474 .check_extension(Cap::ArmPmuV3) 2475 { 2476 kvm_kvi.features[0] |= 1 << kvm_bindings::KVM_ARM_VCPU_PMU_V3; 2477 } 2478 2479 if sve_supported 2480 && vm 2481 .as_any() 2482 .downcast_ref::<crate::kvm::KvmVm>() 2483 .unwrap() 2484 .check_extension(Cap::ArmSve) 2485 { 2486 kvm_kvi.features[0] |= 1 << kvm_bindings::KVM_ARM_VCPU_SVE; 2487 } 2488 2489 // Non-boot cpus are powered off initially. 2490 if id > 0 { 2491 kvm_kvi.features[0] |= 1 << kvm_bindings::KVM_ARM_VCPU_POWER_OFF; 2492 } 2493 2494 *kvi = kvm_kvi.into(); 2495 2496 Ok(()) 2497 } 2498 2499 /// 2500 /// Return VcpuInit with default value set 2501 /// 2502 #[cfg(target_arch = "aarch64")] 2503 fn create_vcpu_init(&self) -> crate::VcpuInit { 2504 kvm_bindings::kvm_vcpu_init::default().into() 2505 } 2506 2507 #[cfg(target_arch = "aarch64")] 2508 fn vcpu_init(&self, kvi: &crate::VcpuInit) -> cpu::Result<()> { 2509 let kvm_kvi: kvm_bindings::kvm_vcpu_init = (*kvi).into(); 2510 self.fd 2511 .lock() 2512 .unwrap() 2513 .vcpu_init(&kvm_kvi) 2514 .map_err(|e| cpu::HypervisorCpuError::VcpuInit(e.into())) 2515 } 2516 2517 #[cfg(target_arch = "aarch64")] 2518 fn vcpu_finalize(&self, feature: i32) -> cpu::Result<()> { 2519 self.fd 2520 .lock() 2521 .unwrap() 2522 .vcpu_finalize(&feature) 2523 .map_err(|e| cpu::HypervisorCpuError::VcpuFinalize(e.into())) 2524 } 2525 2526 #[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))] 2527 /// 2528 /// Gets a list of the guest registers that are supported for the 2529 /// KVM_GET_ONE_REG/KVM_SET_ONE_REG calls. 2530 /// 2531 fn get_reg_list(&self, reg_list: &mut RegList) -> cpu::Result<()> { 2532 let mut kvm_reg_list: kvm_bindings::RegList = reg_list.clone().into(); 2533 self.fd 2534 .lock() 2535 .unwrap() 2536 .get_reg_list(&mut kvm_reg_list) 2537 .map_err(|e: kvm_ioctls::Error| cpu::HypervisorCpuError::GetRegList(e.into()))?; 2538 *reg_list = kvm_reg_list.into(); 2539 Ok(()) 2540 } 2541 2542 /// 2543 /// Gets the value of a system register 2544 /// 2545 #[cfg(target_arch = "aarch64")] 2546 fn get_sys_reg(&self, sys_reg: u32) -> cpu::Result<u64> { 2547 // 2548 // Arm Architecture Reference Manual defines the encoding of 2549 // AArch64 system registers, see 2550 // https://developer.arm.com/documentation/ddi0487 (chapter D12). 2551 // While KVM defines another ID for each AArch64 system register, 2552 // which is used in calling `KVM_G/SET_ONE_REG` to access a system 2553 // register of a guest. 2554 // A mapping exists between the Arm standard encoding and the KVM ID. 2555 // This function takes the standard u32 ID as input parameter, converts 2556 // it to the corresponding KVM ID, and call `KVM_GET_ONE_REG` API to 2557 // get the value of the system parameter. 2558 // 2559 let id: u64 = KVM_REG_ARM64 2560 | KVM_REG_SIZE_U64 2561 | KVM_REG_ARM64_SYSREG as u64 2562 | ((((sys_reg) >> 5) 2563 & (KVM_REG_ARM64_SYSREG_OP0_MASK 2564 | KVM_REG_ARM64_SYSREG_OP1_MASK 2565 | KVM_REG_ARM64_SYSREG_CRN_MASK 2566 | KVM_REG_ARM64_SYSREG_CRM_MASK 2567 | KVM_REG_ARM64_SYSREG_OP2_MASK)) as u64); 2568 let mut bytes = [0_u8; 8]; 2569 self.fd 2570 .lock() 2571 .unwrap() 2572 .get_one_reg(id, &mut bytes) 2573 .map_err(|e| cpu::HypervisorCpuError::GetSysRegister(e.into()))?; 2574 Ok(u64::from_le_bytes(bytes)) 2575 } 2576 2577 /// 2578 /// Gets the value of a non-core register 2579 /// 2580 #[cfg(target_arch = "riscv64")] 2581 fn get_non_core_reg(&self, _non_core_reg: u32) -> cpu::Result<u64> { 2582 unimplemented!() 2583 } 2584 2585 /// 2586 /// Configure core registers for a given CPU. 2587 /// 2588 #[cfg(target_arch = "aarch64")] 2589 fn setup_regs(&self, cpu_id: u8, boot_ip: u64, fdt_start: u64) -> cpu::Result<()> { 2590 #[allow(non_upper_case_globals)] 2591 // PSR (Processor State Register) bits. 2592 // Taken from arch/arm64/include/uapi/asm/ptrace.h. 2593 const PSR_MODE_EL1h: u64 = 0x0000_0005; 2594 const PSR_F_BIT: u64 = 0x0000_0040; 2595 const PSR_I_BIT: u64 = 0x0000_0080; 2596 const PSR_A_BIT: u64 = 0x0000_0100; 2597 const PSR_D_BIT: u64 = 0x0000_0200; 2598 // Taken from arch/arm64/kvm/inject_fault.c. 2599 const PSTATE_FAULT_BITS_64: u64 = 2600 PSR_MODE_EL1h | PSR_A_BIT | PSR_F_BIT | PSR_I_BIT | PSR_D_BIT; 2601 2602 let kreg_off = offset_of!(kvm_regs, regs); 2603 2604 // Get the register index of the PSTATE (Processor State) register. 2605 let pstate = offset_of!(user_pt_regs, pstate) + kreg_off; 2606 self.fd 2607 .lock() 2608 .unwrap() 2609 .set_one_reg( 2610 arm64_core_reg_id!(KVM_REG_SIZE_U64, pstate), 2611 &PSTATE_FAULT_BITS_64.to_le_bytes(), 2612 ) 2613 .map_err(|e| cpu::HypervisorCpuError::SetAarchCoreRegister(e.into()))?; 2614 2615 // Other vCPUs are powered off initially awaiting PSCI wakeup. 2616 if cpu_id == 0 { 2617 // Setting the PC (Processor Counter) to the current program address (kernel address). 2618 let pc = offset_of!(user_pt_regs, pc) + kreg_off; 2619 self.fd 2620 .lock() 2621 .unwrap() 2622 .set_one_reg( 2623 arm64_core_reg_id!(KVM_REG_SIZE_U64, pc), 2624 &boot_ip.to_le_bytes(), 2625 ) 2626 .map_err(|e| cpu::HypervisorCpuError::SetAarchCoreRegister(e.into()))?; 2627 2628 // Last mandatory thing to set -> the address pointing to the FDT (also called DTB). 2629 // "The device tree blob (dtb) must be placed on an 8-byte boundary and must 2630 // not exceed 2 megabytes in size." -> https://www.kernel.org/doc/Documentation/arm64/booting.txt. 2631 // We are choosing to place it the end of DRAM. See `get_fdt_addr`. 2632 let regs0 = offset_of!(user_pt_regs, regs) + kreg_off; 2633 self.fd 2634 .lock() 2635 .unwrap() 2636 .set_one_reg( 2637 arm64_core_reg_id!(KVM_REG_SIZE_U64, regs0), 2638 &fdt_start.to_le_bytes(), 2639 ) 2640 .map_err(|e| cpu::HypervisorCpuError::SetAarchCoreRegister(e.into()))?; 2641 } 2642 Ok(()) 2643 } 2644 2645 #[cfg(target_arch = "riscv64")] 2646 /// 2647 /// Configure registers for a given RISC-V CPU. 2648 /// 2649 fn setup_regs(&self, cpu_id: u8, boot_ip: u64, fdt_start: u64) -> cpu::Result<()> { 2650 // Setting the A0 () to the hartid of this CPU. 2651 let a0 = offset_of!(kvm_riscv_core, regs, user_regs_struct, a0); 2652 self.fd 2653 .lock() 2654 .unwrap() 2655 .set_one_reg( 2656 riscv64_reg_id!(KVM_REG_RISCV_CORE, a0), 2657 &u64::from(cpu_id).to_le_bytes(), 2658 ) 2659 .map_err(|e| cpu::HypervisorCpuError::SetRiscvCoreRegister(e.into()))?; 2660 2661 // Setting the PC (Processor Counter) to the current program address (kernel address). 2662 let pc = offset_of!(kvm_riscv_core, regs, user_regs_struct, pc); 2663 self.fd 2664 .lock() 2665 .unwrap() 2666 .set_one_reg( 2667 riscv64_reg_id!(KVM_REG_RISCV_CORE, pc), 2668 &boot_ip.to_le_bytes(), 2669 ) 2670 .map_err(|e| cpu::HypervisorCpuError::SetRiscvCoreRegister(e.into()))?; 2671 2672 // Last mandatory thing to set -> the address pointing to the FDT (also called DTB). 2673 // "The device tree blob (dtb) must be placed on an 8-byte boundary and must 2674 // not exceed 64 kilobytes in size." -> https://www.kernel.org/doc/Documentation/arch/riscv/boot.txt. 2675 let a1 = offset_of!(kvm_riscv_core, regs, user_regs_struct, a1); 2676 self.fd 2677 .lock() 2678 .unwrap() 2679 .set_one_reg( 2680 riscv64_reg_id!(KVM_REG_RISCV_CORE, a1), 2681 &fdt_start.to_le_bytes(), 2682 ) 2683 .map_err(|e| cpu::HypervisorCpuError::SetRiscvCoreRegister(e.into()))?; 2684 2685 Ok(()) 2686 } 2687 2688 #[cfg(target_arch = "x86_64")] 2689 /// 2690 /// Get the current CPU state 2691 /// 2692 /// Ordering requirements: 2693 /// 2694 /// KVM_GET_MP_STATE calls kvm_apic_accept_events(), which might modify 2695 /// vCPU/LAPIC state. As such, it must be done before most everything 2696 /// else, otherwise we cannot restore everything and expect it to work. 2697 /// 2698 /// KVM_GET_VCPU_EVENTS/KVM_SET_VCPU_EVENTS is unsafe if other vCPUs are 2699 /// still running. 2700 /// 2701 /// KVM_GET_LAPIC may change state of LAPIC before returning it. 2702 /// 2703 /// GET_VCPU_EVENTS should probably be last to save. The code looks as 2704 /// it might as well be affected by internal state modifications of the 2705 /// GET ioctls. 2706 /// 2707 /// SREGS saves/restores a pending interrupt, similar to what 2708 /// VCPU_EVENTS also does. 2709 /// 2710 /// GET_MSRS requires a prepopulated data structure to do something 2711 /// meaningful. For SET_MSRS it will then contain good data. 2712 /// 2713 /// # Example 2714 /// 2715 /// ```rust 2716 /// # use hypervisor::kvm::KvmHypervisor; 2717 /// # use std::sync::Arc; 2718 /// let kvm = KvmHypervisor::new().unwrap(); 2719 /// let hv = Arc::new(kvm); 2720 /// let vm = hv.create_vm().expect("new VM fd creation failed"); 2721 /// vm.enable_split_irq().unwrap(); 2722 /// let vcpu = vm.create_vcpu(0, None).unwrap(); 2723 /// let state = vcpu.state().unwrap(); 2724 /// ``` 2725 fn state(&self) -> cpu::Result<CpuState> { 2726 let cpuid = self.get_cpuid2(kvm_bindings::KVM_MAX_CPUID_ENTRIES)?; 2727 let mp_state = self.get_mp_state()?.into(); 2728 let regs = self.get_regs()?; 2729 let sregs = self.get_sregs()?; 2730 let xsave = self.get_xsave()?; 2731 let xcrs = self.get_xcrs()?; 2732 let lapic_state = self.get_lapic()?; 2733 let fpu = self.get_fpu()?; 2734 2735 // Try to get all MSRs based on the list previously retrieved from KVM. 2736 // If the number of MSRs obtained from GET_MSRS is different from the 2737 // expected amount, we fallback onto a slower method by getting MSRs 2738 // by chunks. This is the only way to make sure we try to get as many 2739 // MSRs as possible, even if some MSRs are not supported. 2740 let mut msr_entries = self.msrs.clone(); 2741 2742 // Save extra MSRs if the Hyper-V synthetic interrupt controller is 2743 // emulated. 2744 if self.hyperv_synic.load(Ordering::Acquire) { 2745 let hyperv_synic_msrs = vec![ 2746 0x40000020, 0x40000021, 0x40000080, 0x40000081, 0x40000082, 0x40000083, 0x40000084, 2747 0x40000090, 0x40000091, 0x40000092, 0x40000093, 0x40000094, 0x40000095, 0x40000096, 2748 0x40000097, 0x40000098, 0x40000099, 0x4000009a, 0x4000009b, 0x4000009c, 0x4000009d, 2749 0x4000009e, 0x4000009f, 0x400000b0, 0x400000b1, 0x400000b2, 0x400000b3, 0x400000b4, 2750 0x400000b5, 0x400000b6, 0x400000b7, 2751 ]; 2752 for index in hyperv_synic_msrs { 2753 let msr = kvm_msr_entry { 2754 index, 2755 ..Default::default() 2756 }; 2757 msr_entries.push(msr.into()); 2758 } 2759 } 2760 2761 let expected_num_msrs = msr_entries.len(); 2762 let num_msrs = self.get_msrs(&mut msr_entries)?; 2763 let msrs = if num_msrs != expected_num_msrs { 2764 let mut faulty_msr_index = num_msrs; 2765 let mut msr_entries_tmp = msr_entries[..faulty_msr_index].to_vec(); 2766 2767 loop { 2768 warn!( 2769 "Detected faulty MSR 0x{:x} while getting MSRs", 2770 msr_entries[faulty_msr_index].index 2771 ); 2772 2773 // Skip the first bad MSR 2774 let start_pos = faulty_msr_index + 1; 2775 2776 let mut sub_msr_entries = msr_entries[start_pos..].to_vec(); 2777 let num_msrs = self.get_msrs(&mut sub_msr_entries)?; 2778 2779 msr_entries_tmp.extend(&sub_msr_entries[..num_msrs]); 2780 2781 if num_msrs == sub_msr_entries.len() { 2782 break; 2783 } 2784 2785 faulty_msr_index = start_pos + num_msrs; 2786 } 2787 2788 msr_entries_tmp 2789 } else { 2790 msr_entries 2791 }; 2792 2793 let vcpu_events = self.get_vcpu_events()?; 2794 let tsc_khz = self.tsc_khz()?; 2795 2796 Ok(VcpuKvmState { 2797 cpuid, 2798 msrs, 2799 vcpu_events, 2800 regs: regs.into(), 2801 sregs: sregs.into(), 2802 fpu, 2803 lapic_state, 2804 xsave, 2805 xcrs, 2806 mp_state, 2807 tsc_khz, 2808 } 2809 .into()) 2810 } 2811 2812 /// 2813 /// Get the current AArch64 CPU state 2814 /// 2815 #[cfg(target_arch = "aarch64")] 2816 fn state(&self) -> cpu::Result<CpuState> { 2817 let mut state = VcpuKvmState { 2818 mp_state: self.get_mp_state()?.into(), 2819 ..Default::default() 2820 }; 2821 // Get core registers 2822 state.core_regs = self.get_regs()?.into(); 2823 2824 // Get systerm register 2825 // Call KVM_GET_REG_LIST to get all registers available to the guest. 2826 // For ArmV8 there are around 500 registers. 2827 let mut sys_regs: Vec<kvm_bindings::kvm_one_reg> = Vec::new(); 2828 let mut reg_list = kvm_bindings::RegList::new(500).unwrap(); 2829 self.fd 2830 .lock() 2831 .unwrap() 2832 .get_reg_list(&mut reg_list) 2833 .map_err(|e| cpu::HypervisorCpuError::GetRegList(e.into()))?; 2834 2835 // At this point reg_list should contain: core registers and system 2836 // registers. 2837 // The register list contains the number of registers and their ids. We 2838 // will be needing to call KVM_GET_ONE_REG on each id in order to save 2839 // all of them. We carve out from the list the core registers which are 2840 // represented in the kernel by kvm_regs structure and for which we can 2841 // calculate the id based on the offset in the structure. 2842 reg_list.retain(|regid| is_system_register(*regid)); 2843 2844 // Now, for the rest of the registers left in the previously fetched 2845 // register list, we are simply calling KVM_GET_ONE_REG. 2846 let indices = reg_list.as_slice(); 2847 for index in indices.iter() { 2848 let mut bytes = [0_u8; 8]; 2849 self.fd 2850 .lock() 2851 .unwrap() 2852 .get_one_reg(*index, &mut bytes) 2853 .map_err(|e| cpu::HypervisorCpuError::GetSysRegister(e.into()))?; 2854 sys_regs.push(kvm_bindings::kvm_one_reg { 2855 id: *index, 2856 addr: u64::from_le_bytes(bytes), 2857 }); 2858 } 2859 2860 state.sys_regs = sys_regs; 2861 2862 Ok(state.into()) 2863 } 2864 2865 #[cfg(target_arch = "riscv64")] 2866 /// 2867 /// Get the current RISC-V 64-bit CPU state 2868 /// 2869 fn state(&self) -> cpu::Result<CpuState> { 2870 let mut state = VcpuKvmState { 2871 mp_state: self.get_mp_state()?.into(), 2872 ..Default::default() 2873 }; 2874 // Get core registers 2875 state.core_regs = self.get_regs()?.into(); 2876 2877 // Get non-core register 2878 // Call KVM_GET_REG_LIST to get all registers available to the guest. 2879 // For RISC-V 64-bit there are around 200 registers. 2880 let mut sys_regs: Vec<kvm_bindings::kvm_one_reg> = Vec::new(); 2881 let mut reg_list = kvm_bindings::RegList::new(200).unwrap(); 2882 self.fd 2883 .lock() 2884 .unwrap() 2885 .get_reg_list(&mut reg_list) 2886 .map_err(|e| cpu::HypervisorCpuError::GetRegList(e.into()))?; 2887 2888 // At this point reg_list should contain: 2889 // - core registers 2890 // - config registers 2891 // - timer registers 2892 // - control and status registers 2893 // - AIA control and status registers 2894 // - smstateen control and status registers 2895 // - sbi_sta control and status registers. 2896 // 2897 // The register list contains the number of registers and their ids. We 2898 // will be needing to call KVM_GET_ONE_REG on each id in order to save 2899 // all of them. We carve out from the list the core registers which are 2900 // represented in the kernel by `kvm_riscv_core` structure and for which 2901 // we can calculate the id based on the offset in the structure. 2902 reg_list.retain(|regid| is_non_core_register(*regid)); 2903 2904 // Now, for the rest of the registers left in the previously fetched 2905 // register list, we are simply calling KVM_GET_ONE_REG. 2906 let indices = reg_list.as_slice(); 2907 for index in indices.iter() { 2908 let mut bytes = [0_u8; 8]; 2909 self.fd 2910 .lock() 2911 .unwrap() 2912 .get_one_reg(*index, &mut bytes) 2913 .map_err(|e| cpu::HypervisorCpuError::GetSysRegister(e.into()))?; 2914 sys_regs.push(kvm_bindings::kvm_one_reg { 2915 id: *index, 2916 addr: u64::from_le_bytes(bytes), 2917 }); 2918 } 2919 2920 state.non_core_regs = sys_regs; 2921 2922 Ok(state.into()) 2923 } 2924 2925 #[cfg(target_arch = "x86_64")] 2926 /// 2927 /// Restore the previously saved CPU state 2928 /// 2929 /// Ordering requirements: 2930 /// 2931 /// KVM_GET_VCPU_EVENTS/KVM_SET_VCPU_EVENTS is unsafe if other vCPUs are 2932 /// still running. 2933 /// 2934 /// Some SET ioctls (like set_mp_state) depend on kvm_vcpu_is_bsp(), so 2935 /// if we ever change the BSP, we have to do that before restoring anything. 2936 /// The same seems to be true for CPUID stuff. 2937 /// 2938 /// SREGS saves/restores a pending interrupt, similar to what 2939 /// VCPU_EVENTS also does. 2940 /// 2941 /// SET_REGS clears pending exceptions unconditionally, thus, it must be 2942 /// done before SET_VCPU_EVENTS, which restores it. 2943 /// 2944 /// SET_LAPIC must come after SET_SREGS, because the latter restores 2945 /// the apic base msr. 2946 /// 2947 /// SET_LAPIC must come before SET_MSRS, because the TSC deadline MSR 2948 /// only restores successfully, when the LAPIC is correctly configured. 2949 /// 2950 /// Arguments: CpuState 2951 /// # Example 2952 /// 2953 /// ```rust 2954 /// # use hypervisor::kvm::KvmHypervisor; 2955 /// # use std::sync::Arc; 2956 /// let kvm = KvmHypervisor::new().unwrap(); 2957 /// let hv = Arc::new(kvm); 2958 /// let vm = hv.create_vm().expect("new VM fd creation failed"); 2959 /// vm.enable_split_irq().unwrap(); 2960 /// let vcpu = vm.create_vcpu(0, None).unwrap(); 2961 /// let state = vcpu.state().unwrap(); 2962 /// vcpu.set_state(&state).unwrap(); 2963 /// ``` 2964 fn set_state(&self, state: &CpuState) -> cpu::Result<()> { 2965 let state: VcpuKvmState = state.clone().into(); 2966 self.set_cpuid2(&state.cpuid)?; 2967 self.set_mp_state(state.mp_state.into())?; 2968 self.set_regs(&state.regs.into())?; 2969 self.set_sregs(&state.sregs.into())?; 2970 self.set_xsave(&state.xsave)?; 2971 self.set_xcrs(&state.xcrs)?; 2972 self.set_lapic(&state.lapic_state)?; 2973 self.set_fpu(&state.fpu)?; 2974 2975 if let Some(freq) = state.tsc_khz { 2976 self.set_tsc_khz(freq)?; 2977 } 2978 2979 // Try to set all MSRs previously stored. 2980 // If the number of MSRs set from SET_MSRS is different from the 2981 // expected amount, we fallback onto a slower method by setting MSRs 2982 // by chunks. This is the only way to make sure we try to set as many 2983 // MSRs as possible, even if some MSRs are not supported. 2984 let expected_num_msrs = state.msrs.len(); 2985 let num_msrs = self.set_msrs(&state.msrs)?; 2986 if num_msrs != expected_num_msrs { 2987 let mut faulty_msr_index = num_msrs; 2988 2989 loop { 2990 warn!( 2991 "Detected faulty MSR 0x{:x} while setting MSRs", 2992 state.msrs[faulty_msr_index].index 2993 ); 2994 2995 // Skip the first bad MSR 2996 let start_pos = faulty_msr_index + 1; 2997 2998 let sub_msr_entries = state.msrs[start_pos..].to_vec(); 2999 3000 let num_msrs = self.set_msrs(&sub_msr_entries)?; 3001 3002 if num_msrs == sub_msr_entries.len() { 3003 break; 3004 } 3005 3006 faulty_msr_index = start_pos + num_msrs; 3007 } 3008 } 3009 3010 self.set_vcpu_events(&state.vcpu_events)?; 3011 3012 Ok(()) 3013 } 3014 3015 /// 3016 /// Restore the previously saved AArch64 CPU state 3017 /// 3018 #[cfg(target_arch = "aarch64")] 3019 fn set_state(&self, state: &CpuState) -> cpu::Result<()> { 3020 let state: VcpuKvmState = state.clone().into(); 3021 // Set core registers 3022 self.set_regs(&state.core_regs.into())?; 3023 // Set system registers 3024 for reg in &state.sys_regs { 3025 self.fd 3026 .lock() 3027 .unwrap() 3028 .set_one_reg(reg.id, ®.addr.to_le_bytes()) 3029 .map_err(|e| cpu::HypervisorCpuError::SetSysRegister(e.into()))?; 3030 } 3031 3032 self.set_mp_state(state.mp_state.into())?; 3033 3034 Ok(()) 3035 } 3036 3037 #[cfg(target_arch = "riscv64")] 3038 /// 3039 /// Restore the previously saved RISC-V 64-bit CPU state 3040 /// 3041 fn set_state(&self, state: &CpuState) -> cpu::Result<()> { 3042 let state: VcpuKvmState = state.clone().into(); 3043 // Set core registers 3044 self.set_regs(&state.core_regs.into())?; 3045 // Set system registers 3046 for reg in &state.non_core_regs { 3047 self.fd 3048 .lock() 3049 .unwrap() 3050 .set_one_reg(reg.id, ®.addr.to_le_bytes()) 3051 .map_err(|e| cpu::HypervisorCpuError::SetSysRegister(e.into()))?; 3052 } 3053 3054 self.set_mp_state(state.mp_state.into())?; 3055 3056 Ok(()) 3057 } 3058 3059 /// 3060 /// Initialize TDX for this CPU 3061 /// 3062 #[cfg(feature = "tdx")] 3063 fn tdx_init(&self, hob_address: u64) -> cpu::Result<()> { 3064 tdx_command( 3065 &self.fd.lock().unwrap().as_raw_fd(), 3066 TdxCommand::InitVcpu, 3067 0, 3068 hob_address, 3069 ) 3070 .map_err(cpu::HypervisorCpuError::InitializeTdx) 3071 } 3072 3073 /// 3074 /// Set the "immediate_exit" state 3075 /// 3076 fn set_immediate_exit(&self, exit: bool) { 3077 self.fd.lock().unwrap().set_kvm_immediate_exit(exit.into()); 3078 } 3079 3080 /// 3081 /// Returns the details about TDX exit reason 3082 /// 3083 #[cfg(feature = "tdx")] 3084 fn get_tdx_exit_details(&mut self) -> cpu::Result<TdxExitDetails> { 3085 let mut fd = self.fd.as_ref().lock().unwrap(); 3086 let kvm_run = fd.get_kvm_run(); 3087 // SAFETY: accessing a union field in a valid structure 3088 let tdx_vmcall = unsafe { 3089 &mut (*((&mut kvm_run.__bindgen_anon_1) as *mut kvm_run__bindgen_ty_1 3090 as *mut KvmTdxExit)) 3091 .u 3092 .vmcall 3093 }; 3094 3095 tdx_vmcall.status_code = TDG_VP_VMCALL_INVALID_OPERAND; 3096 3097 if tdx_vmcall.type_ != 0 { 3098 return Err(cpu::HypervisorCpuError::UnknownTdxVmCall); 3099 } 3100 3101 match tdx_vmcall.subfunction { 3102 TDG_VP_VMCALL_GET_QUOTE => Ok(TdxExitDetails::GetQuote), 3103 TDG_VP_VMCALL_SETUP_EVENT_NOTIFY_INTERRUPT => { 3104 Ok(TdxExitDetails::SetupEventNotifyInterrupt) 3105 } 3106 _ => Err(cpu::HypervisorCpuError::UnknownTdxVmCall), 3107 } 3108 } 3109 3110 /// 3111 /// Set the status code for TDX exit 3112 /// 3113 #[cfg(feature = "tdx")] 3114 fn set_tdx_status(&mut self, status: TdxExitStatus) { 3115 let mut fd = self.fd.as_ref().lock().unwrap(); 3116 let kvm_run = fd.get_kvm_run(); 3117 // SAFETY: accessing a union field in a valid structure 3118 let tdx_vmcall = unsafe { 3119 &mut (*((&mut kvm_run.__bindgen_anon_1) as *mut kvm_run__bindgen_ty_1 3120 as *mut KvmTdxExit)) 3121 .u 3122 .vmcall 3123 }; 3124 3125 tdx_vmcall.status_code = match status { 3126 TdxExitStatus::Success => TDG_VP_VMCALL_SUCCESS, 3127 TdxExitStatus::InvalidOperand => TDG_VP_VMCALL_INVALID_OPERAND, 3128 }; 3129 } 3130 3131 #[cfg(target_arch = "x86_64")] 3132 /// 3133 /// Return the list of initial MSR entries for a VCPU 3134 /// 3135 fn boot_msr_entries(&self) -> Vec<MsrEntry> { 3136 use crate::arch::x86::{msr_index, MTRR_ENABLE, MTRR_MEM_TYPE_WB}; 3137 3138 [ 3139 msr!(msr_index::MSR_IA32_SYSENTER_CS), 3140 msr!(msr_index::MSR_IA32_SYSENTER_ESP), 3141 msr!(msr_index::MSR_IA32_SYSENTER_EIP), 3142 msr!(msr_index::MSR_STAR), 3143 msr!(msr_index::MSR_CSTAR), 3144 msr!(msr_index::MSR_LSTAR), 3145 msr!(msr_index::MSR_KERNEL_GS_BASE), 3146 msr!(msr_index::MSR_SYSCALL_MASK), 3147 msr!(msr_index::MSR_IA32_TSC), 3148 msr_data!( 3149 msr_index::MSR_IA32_MISC_ENABLE, 3150 msr_index::MSR_IA32_MISC_ENABLE_FAST_STRING as u64 3151 ), 3152 msr_data!(msr_index::MSR_MTRRdefType, MTRR_ENABLE | MTRR_MEM_TYPE_WB), 3153 ] 3154 .to_vec() 3155 } 3156 3157 #[cfg(target_arch = "aarch64")] 3158 fn has_pmu_support(&self) -> bool { 3159 let cpu_attr = kvm_bindings::kvm_device_attr { 3160 group: kvm_bindings::KVM_ARM_VCPU_PMU_V3_CTRL, 3161 attr: u64::from(kvm_bindings::KVM_ARM_VCPU_PMU_V3_INIT), 3162 addr: 0x0, 3163 flags: 0, 3164 }; 3165 self.fd.lock().unwrap().has_device_attr(&cpu_attr).is_ok() 3166 } 3167 3168 #[cfg(target_arch = "aarch64")] 3169 fn init_pmu(&self, irq: u32) -> cpu::Result<()> { 3170 let cpu_attr = kvm_bindings::kvm_device_attr { 3171 group: kvm_bindings::KVM_ARM_VCPU_PMU_V3_CTRL, 3172 attr: u64::from(kvm_bindings::KVM_ARM_VCPU_PMU_V3_INIT), 3173 addr: 0x0, 3174 flags: 0, 3175 }; 3176 let cpu_attr_irq = kvm_bindings::kvm_device_attr { 3177 group: kvm_bindings::KVM_ARM_VCPU_PMU_V3_CTRL, 3178 attr: u64::from(kvm_bindings::KVM_ARM_VCPU_PMU_V3_IRQ), 3179 addr: &irq as *const u32 as u64, 3180 flags: 0, 3181 }; 3182 self.fd 3183 .lock() 3184 .unwrap() 3185 .set_device_attr(&cpu_attr_irq) 3186 .map_err(|_| cpu::HypervisorCpuError::InitializePmu)?; 3187 self.fd 3188 .lock() 3189 .unwrap() 3190 .set_device_attr(&cpu_attr) 3191 .map_err(|_| cpu::HypervisorCpuError::InitializePmu) 3192 } 3193 3194 #[cfg(target_arch = "x86_64")] 3195 /// 3196 /// Get the frequency of the TSC if available 3197 /// 3198 fn tsc_khz(&self) -> cpu::Result<Option<u32>> { 3199 match self.fd.lock().unwrap().get_tsc_khz() { 3200 Err(e) => { 3201 if e.errno() == libc::EIO { 3202 Ok(None) 3203 } else { 3204 Err(cpu::HypervisorCpuError::GetTscKhz(e.into())) 3205 } 3206 } 3207 Ok(v) => Ok(Some(v)), 3208 } 3209 } 3210 3211 #[cfg(target_arch = "x86_64")] 3212 /// 3213 /// Set the frequency of the TSC if available 3214 /// 3215 fn set_tsc_khz(&self, freq: u32) -> cpu::Result<()> { 3216 match self.fd.lock().unwrap().set_tsc_khz(freq) { 3217 Err(e) => { 3218 if e.errno() == libc::EIO { 3219 Ok(()) 3220 } else { 3221 Err(cpu::HypervisorCpuError::SetTscKhz(e.into())) 3222 } 3223 } 3224 Ok(_) => Ok(()), 3225 } 3226 } 3227 3228 #[cfg(target_arch = "x86_64")] 3229 /// 3230 /// Trigger NMI interrupt 3231 /// 3232 fn nmi(&self) -> cpu::Result<()> { 3233 match self.fd.lock().unwrap().nmi() { 3234 Err(e) => { 3235 if e.errno() == libc::EIO { 3236 Ok(()) 3237 } else { 3238 Err(cpu::HypervisorCpuError::Nmi(e.into())) 3239 } 3240 } 3241 Ok(_) => Ok(()), 3242 } 3243 } 3244 } 3245 3246 impl KvmVcpu { 3247 #[cfg(target_arch = "x86_64")] 3248 /// 3249 /// X86 specific call that returns the vcpu's current "xsave struct". 3250 /// 3251 fn get_xsave(&self) -> cpu::Result<XsaveState> { 3252 Ok(self 3253 .fd 3254 .lock() 3255 .unwrap() 3256 .get_xsave() 3257 .map_err(|e| cpu::HypervisorCpuError::GetXsaveState(e.into()))? 3258 .into()) 3259 } 3260 3261 #[cfg(target_arch = "x86_64")] 3262 /// 3263 /// X86 specific call that sets the vcpu's current "xsave struct". 3264 /// 3265 fn set_xsave(&self, xsave: &XsaveState) -> cpu::Result<()> { 3266 let xsave: kvm_bindings::kvm_xsave = (*xsave).clone().into(); 3267 self.fd 3268 .lock() 3269 .unwrap() 3270 .set_xsave(&xsave) 3271 .map_err(|e| cpu::HypervisorCpuError::SetXsaveState(e.into())) 3272 } 3273 3274 #[cfg(target_arch = "x86_64")] 3275 /// 3276 /// X86 specific call that returns the vcpu's current "xcrs". 3277 /// 3278 fn get_xcrs(&self) -> cpu::Result<ExtendedControlRegisters> { 3279 self.fd 3280 .lock() 3281 .unwrap() 3282 .get_xcrs() 3283 .map_err(|e| cpu::HypervisorCpuError::GetXcsr(e.into())) 3284 } 3285 3286 #[cfg(target_arch = "x86_64")] 3287 /// 3288 /// X86 specific call that sets the vcpu's current "xcrs". 3289 /// 3290 fn set_xcrs(&self, xcrs: &ExtendedControlRegisters) -> cpu::Result<()> { 3291 self.fd 3292 .lock() 3293 .unwrap() 3294 .set_xcrs(xcrs) 3295 .map_err(|e| cpu::HypervisorCpuError::SetXcsr(e.into())) 3296 } 3297 3298 #[cfg(target_arch = "x86_64")] 3299 /// 3300 /// Returns currently pending exceptions, interrupts, and NMIs as well as related 3301 /// states of the vcpu. 3302 /// 3303 fn get_vcpu_events(&self) -> cpu::Result<VcpuEvents> { 3304 self.fd 3305 .lock() 3306 .unwrap() 3307 .get_vcpu_events() 3308 .map_err(|e| cpu::HypervisorCpuError::GetVcpuEvents(e.into())) 3309 } 3310 3311 #[cfg(target_arch = "x86_64")] 3312 /// 3313 /// Sets pending exceptions, interrupts, and NMIs as well as related states 3314 /// of the vcpu. 3315 /// 3316 fn set_vcpu_events(&self, events: &VcpuEvents) -> cpu::Result<()> { 3317 self.fd 3318 .lock() 3319 .unwrap() 3320 .set_vcpu_events(events) 3321 .map_err(|e| cpu::HypervisorCpuError::SetVcpuEvents(e.into())) 3322 } 3323 } 3324 3325 #[cfg(test)] 3326 mod tests { 3327 #[test] 3328 #[cfg(target_arch = "riscv64")] 3329 fn test_get_and_set_regs() { 3330 use super::*; 3331 3332 let kvm = KvmHypervisor::new().unwrap(); 3333 let hypervisor = Arc::new(kvm); 3334 let vm = hypervisor.create_vm().expect("new VM fd creation failed"); 3335 let vcpu0 = vm.create_vcpu(0, None).unwrap(); 3336 3337 let core_regs = StandardRegisters::from(kvm_riscv_core { 3338 regs: user_regs_struct { 3339 pc: 0x00, 3340 ra: 0x01, 3341 sp: 0x02, 3342 gp: 0x03, 3343 tp: 0x04, 3344 t0: 0x05, 3345 t1: 0x06, 3346 t2: 0x07, 3347 s0: 0x08, 3348 s1: 0x09, 3349 a0: 0x0a, 3350 a1: 0x0b, 3351 a2: 0x0c, 3352 a3: 0x0d, 3353 a4: 0x0e, 3354 a5: 0x0f, 3355 a6: 0x10, 3356 a7: 0x11, 3357 s2: 0x12, 3358 s3: 0x13, 3359 s4: 0x14, 3360 s5: 0x15, 3361 s6: 0x16, 3362 s7: 0x17, 3363 s8: 0x18, 3364 s9: 0x19, 3365 s10: 0x1a, 3366 s11: 0x1b, 3367 t3: 0x1c, 3368 t4: 0x1d, 3369 t5: 0x1e, 3370 t6: 0x1f, 3371 }, 3372 mode: 0x00, 3373 }); 3374 3375 vcpu0.set_regs(&core_regs).unwrap(); 3376 assert_eq!(vcpu0.get_regs().unwrap(), core_regs); 3377 } 3378 } 3379